nobypass [PPC/POWERNV]
Disable IOMMU bypass, using IOMMU for PCI devices.
+ iommu.strict= [ARM64] Configure TLB invalidation behaviour
+ Format: { "0" | "1" }
+ 0 - Lazy mode.
+ Request that DMA unmap operations use deferred
+ invalidation of hardware TLBs, for increased
+ throughput at the cost of reduced device isolation.
+ Will fall back to strict mode if not supported by
+ the relevant IOMMU driver.
+ 1 - Strict mode (default).
+ DMA unmap operations invalidate IOMMU hardware TLBs
+ synchronously.
+
iommu.passthrough=
[ARM64] Configure DMA to bypass the IOMMU by default.
Format: { "0" | "1" }
seconds. Use this parameter to check at some
other rate. 0 disables periodic checking.
- memtest= [KNL,X86,ARM] Enable memtest
+ memtest= [KNL,X86,ARM,PPC] Enable memtest
Format: <integer>
default : 0 <disable>
Specifies the number of memtest passes to be
...
}
-
-* CPU node:
-
-The Alpine platform includes cortex-a15 cores.
-enable-method: must be "al,alpine-smp" to allow smp [1]
-
-Example:
-
-cpus {
- #address-cells = <1>;
- #size-cells = <0>;
- enable-method = "al,alpine-smp";
-
- cpu@0 {
- compatible = "arm,cortex-a15";
- device_type = "cpu";
- reg = <0>;
- };
-
- cpu@1 {
- compatible = "arm,cortex-a15";
- device_type = "cpu";
- reg = <1>;
- };
-
- cpu@2 {
- compatible = "arm,cortex-a15";
- device_type = "cpu";
- reg = <2>;
- };
-
- cpu@3 {
- compatible = "arm,cortex-a15";
- device_type = "cpu";
- reg = <3>;
- };
-};
-
-
-* Alpine CPU resume registers
-
-The CPU resume register are used to define required resume address after
-reset.
-
-Properties:
-- compatible : Should contain "al,alpine-cpu-resume".
-- reg : Offset and length of the register set for the device
-
-Example:
-
-cpu_resume {
- compatible = "al,alpine-cpu-resume";
- reg = <0xfbff5ed0 0x30>;
-};
-
-* Alpine System-Fabric Service Registers
-
-The System-Fabric Service Registers allow various operation on CPU and
-system fabric, like powering CPUs off.
-
-Properties:
-- compatible : Should contain "al,alpine-sysfabric-service" and "syscon".
-- reg : Offset and length of the register set for the device
-
-Example:
-
-nb_service {
- compatible = "al,alpine-sysfabric-service", "syscon";
- reg = <0xfb070000 0x10000>;
-};
-
-[1] arm/cpu-enable-method/al,alpine-smp
- "atmel,samv71q19"
- "atmel,samv71q20"
- "atmel,samv71q21"
-
-Chipid required properties:
-- compatible: Should be "atmel,sama5d2-chipid"
-- reg : Should contain registers location and length
-
-PIT Timer required properties:
-- compatible: Should be "atmel,at91sam9260-pit"
-- reg: Should contain registers location and length
-- interrupts: Should contain interrupt for the PIT which is the IRQ line
- shared across all System Controller members.
-
-System Timer (ST) required properties:
-- compatible: Should be "atmel,at91rm9200-st", "syscon", "simple-mfd"
-- reg: Should contain registers location and length
-- interrupts: Should contain interrupt for the ST which is the IRQ line
- shared across all System Controller members.
-- clocks: phandle to input clock.
-Its subnodes can be:
-- watchdog: compatible should be "atmel,at91rm9200-wdt"
-
-RSTC Reset Controller required properties:
-- compatible: Should be "atmel,<chip>-rstc".
- <chip> can be "at91sam9260" or "at91sam9g45" or "sama5d3"
-- reg: Should contain registers location and length
-- clocks: phandle to input clock.
-
-Example:
-
- rstc@fffffd00 {
- compatible = "atmel,at91sam9260-rstc";
- reg = <0xfffffd00 0x10>;
- clocks = <&clk32k>;
- };
-
-RAMC SDRAM/DDR Controller required properties:
-- compatible: Should be "atmel,at91rm9200-sdramc", "syscon"
- "atmel,at91sam9260-sdramc",
- "atmel,at91sam9g45-ddramc",
- "atmel,sama5d3-ddramc",
-- reg: Should contain registers location and length
-
-Examples:
-
- ramc0: ramc@ffffe800 {
- compatible = "atmel,at91sam9g45-ddramc";
- reg = <0xffffe800 0x200>;
- };
-
-SHDWC Shutdown Controller
-
-required properties:
-- compatible: Should be "atmel,<chip>-shdwc".
- <chip> can be "at91sam9260", "at91sam9rl" or "at91sam9x5".
-- reg: Should contain registers location and length
-- clocks: phandle to input clock.
-
-optional properties:
-- atmel,wakeup-mode: String, operation mode of the wakeup mode.
- Supported values are: "none", "high", "low", "any".
-- atmel,wakeup-counter: Counter on Wake-up 0 (between 0x0 and 0xf).
-
-optional at91sam9260 properties:
-- atmel,wakeup-rtt-timer: boolean to enable Real-time Timer Wake-up.
-
-optional at91sam9rl properties:
-- atmel,wakeup-rtc-timer: boolean to enable Real-time Clock Wake-up.
-- atmel,wakeup-rtt-timer: boolean to enable Real-time Timer Wake-up.
-
-optional at91sam9x5 properties:
-- atmel,wakeup-rtc-timer: boolean to enable Real-time Clock Wake-up.
-
-Example:
-
- shdwc@fffffd10 {
- compatible = "atmel,at91sam9260-shdwc";
- reg = <0xfffffd10 0x10>;
- clocks = <&clk32k>;
- };
-
-SHDWC SAMA5D2-Compatible Shutdown Controller
-
-1) shdwc node
-
-required properties:
-- compatible: should be "atmel,sama5d2-shdwc".
-- reg: should contain registers location and length
-- clocks: phandle to input clock.
-- #address-cells: should be one. The cell is the wake-up input index.
-- #size-cells: should be zero.
-
-optional properties:
-
-- debounce-delay-us: minimum wake-up inputs debouncer period in
- microseconds. It's usually a board-related property.
-- atmel,wakeup-rtc-timer: boolean to enable Real-Time Clock wake-up.
-
-The node contains child nodes for each wake-up input that the platform uses.
-
-2) input nodes
-
-Wake-up input nodes are usually described in the "board" part of the Device
-Tree. Note also that input 0 is linked to the wake-up pin and is frequently
-used.
-
-Required properties:
-- reg: should contain the wake-up input index [0 - 15].
-
-Optional properties:
-- atmel,wakeup-active-high: boolean, the corresponding wake-up input described
- by the child, forces the wake-up of the core power supply on a high level.
- The default is to be active low.
-
-Example:
-
-On the SoC side:
- shdwc@f8048010 {
- compatible = "atmel,sama5d2-shdwc";
- reg = <0xf8048010 0x10>;
- clocks = <&clk32k>;
- #address-cells = <1>;
- #size-cells = <0>;
- atmel,wakeup-rtc-timer;
- };
-
-On the board side:
- shdwc@f8048010 {
- debounce-delay-us = <976>;
-
- input@0 {
- reg = <0>;
- };
-
- input@1 {
- reg = <1>;
- atmel,wakeup-active-high;
- };
- };
-
-Special Function Registers (SFR)
-
-Special Function Registers (SFR) manage specific aspects of the integrated
-memory, bridge implementations, processor and other functionality not controlled
-elsewhere.
-
-required properties:
-- compatible: Should be "atmel,<chip>-sfr", "syscon" or
- "atmel,<chip>-sfrbu", "syscon"
- <chip> can be "sama5d3", "sama5d4" or "sama5d2".
-- reg: Should contain registers location and length
-
- sfr@f0038000 {
- compatible = "atmel,sama5d3-sfr", "syscon";
- reg = <0xf0038000 0x60>;
- };
-
-Security Module (SECUMOD)
-
-The Security Module macrocell provides all necessary secure functions to avoid
-voltage, temperature, frequency and mechanical attacks on the chip. It also
-embeds secure memories that can be scrambled
-
-required properties:
-- compatible: Should be "atmel,<chip>-secumod", "syscon".
- <chip> can be "sama5d2".
-- reg: Should contain registers location and length
-
- secumod@fc040000 {
- compatible = "atmel,sama5d2-secumod", "syscon";
- reg = <0xfc040000 0x100>;
- };
--- /dev/null
+Atmel system registers
+
+Chipid required properties:
+- compatible: Should be "atmel,sama5d2-chipid"
+- reg : Should contain registers location and length
+
+PIT Timer required properties:
+- compatible: Should be "atmel,at91sam9260-pit"
+- reg: Should contain registers location and length
+- interrupts: Should contain interrupt for the PIT which is the IRQ line
+ shared across all System Controller members.
+
+System Timer (ST) required properties:
+- compatible: Should be "atmel,at91rm9200-st", "syscon", "simple-mfd"
+- reg: Should contain registers location and length
+- interrupts: Should contain interrupt for the ST which is the IRQ line
+ shared across all System Controller members.
+- clocks: phandle to input clock.
+Its subnodes can be:
+- watchdog: compatible should be "atmel,at91rm9200-wdt"
+
+RSTC Reset Controller required properties:
+- compatible: Should be "atmel,<chip>-rstc".
+ <chip> can be "at91sam9260" or "at91sam9g45" or "sama5d3"
+- reg: Should contain registers location and length
+- clocks: phandle to input clock.
+
+Example:
+
+ rstc@fffffd00 {
+ compatible = "atmel,at91sam9260-rstc";
+ reg = <0xfffffd00 0x10>;
+ clocks = <&clk32k>;
+ };
+
+RAMC SDRAM/DDR Controller required properties:
+- compatible: Should be "atmel,at91rm9200-sdramc", "syscon"
+ "atmel,at91sam9260-sdramc",
+ "atmel,at91sam9g45-ddramc",
+ "atmel,sama5d3-ddramc",
+- reg: Should contain registers location and length
+
+Examples:
+
+ ramc0: ramc@ffffe800 {
+ compatible = "atmel,at91sam9g45-ddramc";
+ reg = <0xffffe800 0x200>;
+ };
+
+SHDWC Shutdown Controller
+
+required properties:
+- compatible: Should be "atmel,<chip>-shdwc".
+ <chip> can be "at91sam9260", "at91sam9rl" or "at91sam9x5".
+- reg: Should contain registers location and length
+- clocks: phandle to input clock.
+
+optional properties:
+- atmel,wakeup-mode: String, operation mode of the wakeup mode.
+ Supported values are: "none", "high", "low", "any".
+- atmel,wakeup-counter: Counter on Wake-up 0 (between 0x0 and 0xf).
+
+optional at91sam9260 properties:
+- atmel,wakeup-rtt-timer: boolean to enable Real-time Timer Wake-up.
+
+optional at91sam9rl properties:
+- atmel,wakeup-rtc-timer: boolean to enable Real-time Clock Wake-up.
+- atmel,wakeup-rtt-timer: boolean to enable Real-time Timer Wake-up.
+
+optional at91sam9x5 properties:
+- atmel,wakeup-rtc-timer: boolean to enable Real-time Clock Wake-up.
+
+Example:
+
+ shdwc@fffffd10 {
+ compatible = "atmel,at91sam9260-shdwc";
+ reg = <0xfffffd10 0x10>;
+ clocks = <&clk32k>;
+ };
+
+SHDWC SAMA5D2-Compatible Shutdown Controller
+
+1) shdwc node
+
+required properties:
+- compatible: should be "atmel,sama5d2-shdwc".
+- reg: should contain registers location and length
+- clocks: phandle to input clock.
+- #address-cells: should be one. The cell is the wake-up input index.
+- #size-cells: should be zero.
+
+optional properties:
+
+- debounce-delay-us: minimum wake-up inputs debouncer period in
+ microseconds. It's usually a board-related property.
+- atmel,wakeup-rtc-timer: boolean to enable Real-Time Clock wake-up.
+
+The node contains child nodes for each wake-up input that the platform uses.
+
+2) input nodes
+
+Wake-up input nodes are usually described in the "board" part of the Device
+Tree. Note also that input 0 is linked to the wake-up pin and is frequently
+used.
+
+Required properties:
+- reg: should contain the wake-up input index [0 - 15].
+
+Optional properties:
+- atmel,wakeup-active-high: boolean, the corresponding wake-up input described
+ by the child, forces the wake-up of the core power supply on a high level.
+ The default is to be active low.
+
+Example:
+
+On the SoC side:
+ shdwc@f8048010 {
+ compatible = "atmel,sama5d2-shdwc";
+ reg = <0xf8048010 0x10>;
+ clocks = <&clk32k>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ atmel,wakeup-rtc-timer;
+ };
+
+On the board side:
+ shdwc@f8048010 {
+ debounce-delay-us = <976>;
+
+ input@0 {
+ reg = <0>;
+ };
+
+ input@1 {
+ reg = <1>;
+ atmel,wakeup-active-high;
+ };
+ };
+
+Special Function Registers (SFR)
+
+Special Function Registers (SFR) manage specific aspects of the integrated
+memory, bridge implementations, processor and other functionality not controlled
+elsewhere.
+
+required properties:
+- compatible: Should be "atmel,<chip>-sfr", "syscon" or
+ "atmel,<chip>-sfrbu", "syscon"
+ <chip> can be "sama5d3", "sama5d4" or "sama5d2".
+- reg: Should contain registers location and length
+
+ sfr@f0038000 {
+ compatible = "atmel,sama5d3-sfr", "syscon";
+ reg = <0xf0038000 0x60>;
+ };
+
+Security Module (SECUMOD)
+
+The Security Module macrocell provides all necessary secure functions to avoid
+voltage, temperature, frequency and mechanical attacks on the chip. It also
+embeds secure memories that can be scrambled
+
+required properties:
+- compatible: Should be "atmel,<chip>-secumod", "syscon".
+ <chip> can be "sama5d2".
+- reg: Should contain registers location and length
+
+ secumod@fc040000 {
+ compatible = "atmel,sama5d2-secumod", "syscon";
+ reg = <0xfc040000 0x100>;
+ };
Note:
This enable method requires valid nodes compatible with
-"al,alpine-cpu-resume" and "al,alpine-nb-service"[1].
+"al,alpine-cpu-resume" and "al,alpine-nb-service".
+
+
+* Alpine CPU resume registers
+
+The CPU resume register are used to define required resume address after
+reset.
+
+Properties:
+- compatible : Should contain "al,alpine-cpu-resume".
+- reg : Offset and length of the register set for the device
+
+
+* Alpine System-Fabric Service Registers
+
+The System-Fabric Service Registers allow various operation on CPU and
+system fabric, like powering CPUs off.
+
+Properties:
+- compatible : Should contain "al,alpine-sysfabric-service" and "syscon".
+- reg : Offset and length of the register set for the device
+
Example:
};
};
---
-[1] arm/al,alpine.txt
+cpu_resume {
+ compatible = "al,alpine-cpu-resume";
+ reg = <0xfbff5ed0 0x30>;
+};
+
+nb_service {
+ compatible = "al,alpine-sysfabric-service", "syscon";
+ reg = <0xfb070000 0x10000>;
+};
Usage: optional
Value type: <prop-encoded-array>
Definition: A u32 value that represents the running time dynamic
- power coefficient in units of mW/MHz/uV^2. The
+ power coefficient in units of uW/MHz/V^2. The
coefficient can either be calculated from power
measurements or derived by analysis.
Pdyn = dynamic-power-coefficient * V^2 * f
- where voltage is in uV, frequency is in MHz.
+ where voltage is in V, frequency is in MHz.
Example 1 (dual-cluster big.LITTLE system 32-bit):
--- /dev/null
+Freescale DCFG
+
+DCFG is the device configuration unit, that provides general purpose
+configuration and status for the device. Such as setting the secondary
+core start address and release the secondary core from holdoff and startup.
+
+Required properties:
+ - compatible: Should contain a chip-specific compatible string,
+ Chip-specific strings are of the form "fsl,<chip>-dcfg",
+ The following <chip>s are known to be supported:
+ ls1012a, ls1021a, ls1043a, ls1046a, ls2080a.
+
+ - reg : should contain base address and length of DCFG memory-mapped registers
+
+Example:
+ dcfg: dcfg@1ee0000 {
+ compatible = "fsl,ls1021a-dcfg";
+ reg = <0x0 0x1ee0000 0x0 0x10000>;
+ };
--- /dev/null
+Freescale SCFG
+
+SCFG is the supplemental configuration unit, that provides SoC specific
+configuration and status registers for the chip. Such as getting PEX port
+status.
+
+Required properties:
+ - compatible: Should contain a chip-specific compatible string,
+ Chip-specific strings are of the form "fsl,<chip>-scfg",
+ The following <chip>s are known to be supported:
+ ls1012a, ls1021a, ls1043a, ls1046a, ls2080a.
+
+ - reg: should contain base address and length of SCFG memory-mapped registers
+
+Example:
+ scfg: scfg@1570000 {
+ compatible = "fsl,ls1021a-scfg";
+ reg = <0x0 0x1570000 0x0 0x10000>;
+ };
Required root node compatible properties:
- compatible = "fsl,ls1021a";
-Freescale SoC-specific Device Tree Bindings
--------------------------------------------
-
-Freescale SCFG
- SCFG is the supplemental configuration unit, that provides SoC specific
-configuration and status registers for the chip. Such as getting PEX port
-status.
- Required properties:
- - compatible: Should contain a chip-specific compatible string,
- Chip-specific strings are of the form "fsl,<chip>-scfg",
- The following <chip>s are known to be supported:
- ls1012a, ls1021a, ls1043a, ls1046a, ls2080a.
-
- - reg: should contain base address and length of SCFG memory-mapped registers
-
-Example:
- scfg: scfg@1570000 {
- compatible = "fsl,ls1021a-scfg";
- reg = <0x0 0x1570000 0x0 0x10000>;
- };
-
-Freescale DCFG
- DCFG is the device configuration unit, that provides general purpose
-configuration and status for the device. Such as setting the secondary
-core start address and release the secondary core from holdoff and startup.
- Required properties:
- - compatible: Should contain a chip-specific compatible string,
- Chip-specific strings are of the form "fsl,<chip>-dcfg",
- The following <chip>s are known to be supported:
- ls1012a, ls1021a, ls1043a, ls1046a, ls2080a.
-
- - reg : should contain base address and length of DCFG memory-mapped registers
-
-Example:
- dcfg: dcfg@1ee0000 {
- compatible = "fsl,ls1021a-dcfg";
- reg = <0x0 0x1ee0000 0x0 0x10000>;
- };
-
Freescale ARMv8 based Layerscape SoC family Device Tree Bindings
----------------------------------------------------------------
secure- bindings only need to be used where both the Secure and Normal
world views need to be described in a single device tree.
-Valid Secure world properties:
+Valid Secure world properties
+-----------------------------
- secure-status : specifies whether the device is present and usable
in the secure world. The combination of this with "status" allows
status = "disabled"; secure-status = "okay"; /* S-only */
status = "disabled"; /* disabled in both */
status = "disabled"; secure-status = "disabled"; /* disabled in both */
+
+The secure-chosen node
+----------------------
+
+Similar to the /chosen node which serves as a place for passing data
+between firmware and the operating system, the /secure-chosen node may
+be used to pass data to the Secure OS. Only the properties defined
+below may appear in the /secure-chosen node.
+
+- stdout-path : specifies the device to be used by the Secure OS for
+ its console output. The syntax is the same as for /chosen/stdout-path.
+ If the /secure-chosen node exists but the stdout-path property is not
+ present, the Secure OS should not perform any console output. If
+ /secure-chosen does not exist, the Secure OS should use the value of
+ /chosen/stdout-path instead (that is, use the same device as the
+ Normal world OS).
--- /dev/null
+ZTE sysctrl Registers
+
+Registers for 'zte,zx296702' SoC:
+
+System management required properties:
+ - compatible = "zte,sysctrl"
+
+Low power management required properties:
+ - compatible = "zte,zx296702-pcu"
+
+Bus matrix required properties:
+ - compatible = "zte,zx-bus-matrix"
+
+
+Registers for 'zte,zx296718' SoC:
+
+System management required properties:
+ - compatible = "zte,zx296718-aon-sysctrl"
+ - compatible = "zte,zx296718-sysctrl"
+
+Example:
+aon_sysctrl: aon-sysctrl@116000 {
+ compatible = "zte,zx296718-aon-sysctrl", "syscon";
+ reg = <0x116000 0x1000>;
+};
+
+sysctrl: sysctrl@1463000 {
+ compatible = "zte,zx296718-sysctrl", "syscon";
+ reg = <0x1463000 0x1000>;
+};
ZTE platforms device tree bindings
----------------------------------------
+---------------------------------------
- ZX296702 board:
Required root node properties:
- compatible = "zte,zx296702-ad1", "zte,zx296702"
-System management required properties:
- - compatible = "zte,sysctrl"
-
-Low power management required properties:
- - compatible = "zte,zx296702-pcu"
-
-Bus matrix required properties:
- - compatible = "zte,zx-bus-matrix"
-
-
---------------------------------------
- ZX296718 SoC:
Required root node properties:
ZX296718 EVB board:
- "zte,zx296718-evb"
-
-System management required properties:
- - compatible = "zte,zx296718-aon-sysctrl"
- - compatible = "zte,zx296718-sysctrl"
-
-Example:
-aon_sysctrl: aon-sysctrl@116000 {
- compatible = "zte,zx296718-aon-sysctrl", "syscon";
- reg = <0x116000 0x1000>;
-};
-
-sysctrl: sysctrl@1463000 {
- compatible = "zte,zx296718-sysctrl", "syscon";
- reg = <0x1463000 0x1000>;
-};
Example:
-p1_sec_a: crypto@400,d2000000 {
+p1_sec_a: crypto@400d2000000 {
compatible = "hisilicon,hip07-sec";
reg = <0x400 0xd0000000 0x0 0x10000
0x400 0xd2000000 0x0 0x10000
firmware-name = "base.rbf";
fpga-bridge@4400 {
- compatible = "altr,freeze-bridge";
+ compatible = "altr,freeze-bridge-controller";
reg = <0x4400 0x10>;
fpga_region1: fpga-region1 {
};
fpga-bridge@4420 {
- compatible = "altr,freeze-bridge";
+ compatible = "altr,freeze-bridge-controller";
reg = <0x4420 0x10>;
fpga_region2: fpga-region2 {
used by the device. I2C core will assign "irq" interrupt (or the very first
interrupt if not using interrupt names) as primary interrupt for the slave.
-Alternatively, devices supporting SMbus Host Notify, and connected to
+Alternatively, devices supporting SMBus Host Notify, and connected to
adapters that support this feature, may use "host-notify" property. I2C
core will create a virtual interrupt for Host Notify and assign it as
primary interrupt for the slave.
- "renesas,ipmmu-r8a73a4" for the R8A73A4 (R-Mobile APE6) IPMMU.
- "renesas,ipmmu-r8a7743" for the R8A7743 (RZ/G1M) IPMMU.
+ - "renesas,ipmmu-r8a7744" for the R8A7744 (RZ/G1N) IPMMU.
- "renesas,ipmmu-r8a7745" for the R8A7745 (RZ/G1E) IPMMU.
- "renesas,ipmmu-r8a7790" for the R8A7790 (R-Car H2) IPMMU.
- "renesas,ipmmu-r8a7791" for the R8A7791 (R-Car M2-W) IPMMU.
Also see child specific device properties:
Regulator - ../regulator/arizona-regulator.txt
Extcon - ../extcon/extcon-arizona.txt
- Sound - ../sound/arizona.txt
+ Sound - ../sound/wlf,arizona.txt
Example:
such as network interfaces, crypto accelerator instances, L2 switches,
etc.
+For an overview of the DPAA2 architecture and fsl-mc bus see:
+Documentation/networking/dpaa2/overview.rst
+
+As described in the above overview, all DPAA2 objects in a DPRC share the
+same hardware "isolation context" and a 10-bit value called an ICID
+(isolation context id) is expressed by the hardware to identify
+the requester.
+
+The generic 'iommus' property is insufficient to describe the relationship
+between ICIDs and IOMMUs, so an iommu-map property is used to define
+the set of possible ICIDs under a root DPRC and how they map to
+an IOMMU.
+
+For generic IOMMU bindings, see
+Documentation/devicetree/bindings/iommu/iommu.txt.
+
+For arm-smmu binding, see:
+Documentation/devicetree/bindings/iommu/arm,smmu.txt.
+
Required properties:
- compatible
Value type: <phandle>
Definition: Specifies the phandle to the PHY device node associated
with the this dpmac.
+Optional properties:
+
+- iommu-map: Maps an ICID to an IOMMU and associated iommu-specifier
+ data.
+
+ The property is an arbitrary number of tuples of
+ (icid-base,iommu,iommu-base,length).
+
+ Any ICID i in the interval [icid-base, icid-base + length) is
+ associated with the listed IOMMU, with the iommu-specifier
+ (i - icid-base + iommu-base).
Example:
+ smmu: iommu@5000000 {
+ compatible = "arm,mmu-500";
+ #iommu-cells = <1>;
+ stream-match-mask = <0x7C00>;
+ ...
+ };
+
fsl_mc: fsl-mc@80c000000 {
compatible = "fsl,qoriq-mc";
reg = <0x00000008 0x0c000000 0 0x40>, /* MC portal base */
<0x00000000 0x08340000 0 0x40000>; /* MC control reg */
msi-parent = <&its>;
+ /* define map for ICIDs 23-64 */
+ iommu-map = <23 &smmu 23 41>;
#address-cells = <3>;
#size-cells = <1>;
--- /dev/null
+* Liebherr's BK4 controller external SPI
+
+A device which handles data acquisition from compatible industrial
+peripherals.
+The SPI is used for data and management purposes in both master and
+slave modes.
+
+Required properties:
+
+- compatible : Should be "lwn,bk4"
+
+Required SPI properties:
+
+- reg : Should be address of the device chip select within
+ the controller.
+
+- spi-max-frequency : Maximum SPI clocking speed of device in Hz, should be
+ 30MHz at most for the Liebherr's BK4 external bus.
+
+Example:
+
+spidev0: spi@0 {
+ compatible = "lwn,bk4";
+ spi-max-frequency = <30000000>;
+ reg = <0>;
+};
Required properties:
- compatible: "renesas,can-r8a7743" if CAN controller is a part of R8A7743 SoC.
+ "renesas,can-r8a7744" if CAN controller is a part of R8A7744 SoC.
"renesas,can-r8a7745" if CAN controller is a part of R8A7745 SoC.
"renesas,can-r8a7778" if CAN controller is a part of R8A7778 SoC.
"renesas,can-r8a7779" if CAN controller is a part of R8A7779 SoC.
Required properties:
- compatible: "renesas,usb-phy-r8a7743" if the device is a part of R8A7743 SoC.
+ "renesas,usb-phy-r8a7744" if the device is a part of R8A7744 SoC.
"renesas,usb-phy-r8a7745" if the device is a part of R8A7745 SoC.
"renesas,usb-phy-r8a7790" if the device is a part of R8A7790 SoC.
"renesas,usb-phy-r8a7791" if the device is a part of R8A7791 SoC.
controller binding usage.
Required properties:
-- compatible: Should be "fsl,imx7-src", "syscon"
+- compatible: Should be "fsl,imx7d-src", "syscon"
- reg: should be register base and length as documented in the
datasheet
- interrupts: Should contain SRC interrupt
Required properties:
- compatible: Should contain "qcom,spmi-temp-alarm".
-- reg: Specifies the SPMI address and length of the controller's
- registers.
+- reg: Specifies the SPMI address.
- interrupts: PMIC temperature alarm interrupt.
- #thermal-sensor-cells: Should be 0. See thermal.txt for a description.
pm8941_temp: thermal-alarm@2400 {
compatible = "qcom,spmi-temp-alarm";
- reg = <0x2400 0x100>;
+ reg = <0x2400>;
interrupts = <0 0x24 0 IRQ_TYPE_EDGE_RISING>;
#thermal-sensor-cells = <0>;
thermal-sensors = <&pm8941_temp>;
trips {
- passive {
- temperature = <1050000>;
+ stage1 {
+ temperature = <105000>;
hysteresis = <2000>;
type = "passive";
};
- alert {
+ stage2 {
temperature = <125000>;
hysteresis = <2000>;
- type = "hot";
- };
- crit {
- temperature = <145000>;
- hysteresis = <2000>;
type = "critical";
};
};
* Thermal Monitoring Unit (TMU) on Freescale QorIQ SoCs
Required properties:
-- compatible : Must include "fsl,qoriq-tmu". The version of the device is
- determined by the TMU IP Block Revision Register (IPBRR0) at
- offset 0x0BF8.
+- compatible : Must include "fsl,qoriq-tmu" or "fsl,imx8mq-tmu". The
+ version of the device is determined by the TMU IP Block Revision
+ Register (IPBRR0) at offset 0x0BF8.
Table of correspondences between IPBRR0 values and example chips:
Value Device
---------- -----
Required properties:
- compatible : "renesas,<soctype>-thermal",
Examples with soctypes are:
+ - "renesas,r8a774a1-thermal" (RZ/G2M)
- "renesas,r8a7795-thermal" (R-Car H3)
- "renesas,r8a7796-thermal" (R-Car M3-W)
- "renesas,r8a77965-thermal" (R-Car M3-N)
+ - "renesas,r8a77980-thermal" (R-Car V3H)
- reg : Address ranges of the thermal registers. Each sensor
needs one address range. Sorting must be done in
increasing order according to datasheet, i.e.
Optional properties:
-- interrupts : interrupts routed to the TSC (3 for H3, M3-W and M3-N)
+- interrupts : interrupts routed to the TSC (3 for H3, M3-W, M3-N,
+ and V3H)
- power-domain : Must contain a reference to the power domain. This
property is mandatory if the thermal sensor instance
is part of a controllable power domain.
- compatible : "renesas,thermal-<soctype>",
"renesas,rcar-gen2-thermal" (with thermal-zone) or
"renesas,rcar-thermal" (without thermal-zone) as
- fallback except R-Car D3.
+ fallback except R-Car V3M/D3.
Examples with soctypes are:
- "renesas,thermal-r8a73a4" (R-Mobile APE6)
- "renesas,thermal-r8a7743" (RZ/G1M)
+ - "renesas,thermal-r8a7744" (RZ/G1N)
- "renesas,thermal-r8a7779" (R-Car H1)
- "renesas,thermal-r8a7790" (R-Car H2)
- "renesas,thermal-r8a7791" (R-Car M2-W)
- "renesas,thermal-r8a7792" (R-Car V2H)
- "renesas,thermal-r8a7793" (R-Car M2-N)
+ - "renesas,thermal-r8a77970" (R-Car V3M)
- "renesas,thermal-r8a77995" (R-Car D3)
- reg : Address range of the thermal registers.
The 1st reg will be recognized as common register
Option properties:
- interrupts : If present should contain 3 interrupts for
- R-Car D3 or 1 interrupt otherwise.
+ R-Car V3M/D3 or 1 interrupt otherwise.
Example (non interrupt support):
--- /dev/null
+Binding for Thermal Sensor for STMicroelectronics STM32 series of SoCs.
+
+On STM32 SoCs, the Digital Temperature Sensor (DTS) is in charge of managing an
+analog block which delivers a frequency depending on the internal SoC's
+temperature. By using a reference frequency, DTS is able to provide a sample
+number which can be translated into a temperature by the user.
+
+DTS provides interrupt notification mechanism by threshold. This mechanism
+offers two temperature trip points: passive and critical. The first is intended
+for passive cooling notification while the second is used for over-temperature
+reset.
+
+Required parameters:
+-------------------
+
+compatible: Should be "st,stm32-thermal"
+reg: This should be the physical base address and length of the
+ sensor's registers.
+clocks: Phandle of the clock used by the thermal sensor.
+ See: Documentation/devicetree/bindings/clock/clock-bindings.txt
+clock-names: Should be "pclk" for register access clock and reference clock.
+ See: Documentation/devicetree/bindings/resource-names.txt
+#thermal-sensor-cells: Should be 0. See ./thermal.txt for a description.
+interrupts: Standard way to define interrupt number.
+
+Example:
+
+ thermal-zones {
+ cpu_thermal: cpu-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <0>;
+
+ thermal-sensors = <&thermal>;
+
+ trips {
+ cpu_alert1: cpu-alert1 {
+ temperature = <85000>;
+ hysteresis = <0>;
+ type = "passive";
+ };
+
+ cpu-crit: cpu-crit {
+ temperature = <120000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ };
+ };
+ };
+
+ thermal: thermal@50028000 {
+ compatible = "st,stm32-thermal";
+ reg = <0x50028000 0x100>;
+ clocks = <&rcc TMPSENS>;
+ clock-names = "pclk";
+ #thermal-sensor-cells = <0>;
+ interrupts = <GIC_SPI 147 IRQ_TYPE_LEVEL_HIGH>;
+ };
Elem size: one cell the sensors listed in the thermal-sensors property.
Elem type: signed Coefficients defaults to 1, in case this property
is not specified. A simple linear polynomial is used:
- Z = c0 * x0 + c1 + x1 + ... + c(n-1) * x(n-1) + cn.
+ Z = c0 * x0 + c1 * x1 + ... + c(n-1) * x(n-1) + cn.
The coefficients are ordered and they match with sensors
by means of sensor ID. Additional coefficients are
- "renesas,r8a73a4-cmt1" for the 48-bit CMT1 device included in r8a73a4.
- "renesas,r8a7743-cmt0" for the 32-bit CMT0 device included in r8a7743.
- "renesas,r8a7743-cmt1" for the 48-bit CMT1 device included in r8a7743.
+ - "renesas,r8a7744-cmt0" for the 32-bit CMT0 device included in r8a7744.
+ - "renesas,r8a7744-cmt1" for the 48-bit CMT1 device included in r8a7744.
- "renesas,r8a7745-cmt0" for the 32-bit CMT0 device included in r8a7745.
- "renesas,r8a7745-cmt1" for the 48-bit CMT1 device included in r8a7745.
- "renesas,r8a7790-cmt0" for the 32-bit CMT0 device included in r8a7790.
Required Properties:
- compatible: must be one or more of the following:
- - "renesas,r7s72100-ostm" for the r7s72100 OSTM
+ - "renesas,r7s72100-ostm" for the R7S72100 (RZ/A1) OSTM
+ - "renesas,r7s9210-ostm" for the R7S9210 (RZ/A2) OSTM
- "renesas,ostm" for any OSTM
This is a fallback for the above renesas,*-ostm entries
exar Exar Corporation
excito Excito
ezchip EZchip Semiconductor
+facebook Facebook
fairphone Fairphone B.V.
faraday Faraday Technology Corporation
fastrax Fastrax Oy
okaya Okaya Electric America, Inc.
oki Oki Electric Industry Co., Ltd.
olimex OLIMEX Ltd.
+olpc One Laptop Per Child
onion Onion Corporation
onnn ON Semiconductor Corp.
ontat On Tat Industrial Company
version.
Examples with soctypes are:
- "renesas,r8a7743-wdt" (RZ/G1M)
+ - "renesas,r8a7744-wdt" (RZ/G1N)
- "renesas,r8a7745-wdt" (RZ/G1E)
- "renesas,r8a774a1-wdt" (RZ/G2M)
- "renesas,r8a7790-wdt" (R-Car H2)
# Carefully list dependencies so we do not try to build scripts twice
# in parallel
PHONY += scripts
-scripts: scripts_basic asm-generic gcc-plugins $(autoksyms_h)
+scripts: scripts_basic scripts_dtc asm-generic gcc-plugins $(autoksyms_h)
$(Q)$(MAKE) $(build)=$(@)
# Things we need to do before we recursively start building the kernel
$(srctree)/tools/testing/selftests/*/config
+$(Q)$(MAKE) -f $(srctree)/Makefile olddefconfig
+# ---------------------------------------------------------------------------
+# Devicetree files
+
+ifneq ($(wildcard $(srctree)/arch/$(SRCARCH)/boot/dts/),)
+dtstree := arch/$(SRCARCH)/boot/dts
+endif
+
+ifneq ($(dtstree),)
+
+%.dtb: prepare3 scripts_dtc
+ $(Q)$(MAKE) $(build)=$(dtstree) $(dtstree)/$@
+
+PHONY += dtbs dtbs_install
+dtbs: prepare3 scripts_dtc
+ $(Q)$(MAKE) $(build)=$(dtstree)
+
+dtbs_install:
+ $(Q)$(MAKE) $(dtbinst)=$(dtstree)
+
+ifdef CONFIG_OF_EARLY_FLATTREE
+all: dtbs
+endif
+
+endif
+
+PHONY += scripts_dtc
+scripts_dtc: scripts_basic
+ $(Q)$(MAKE) $(build)=scripts/dtc
+
# ---------------------------------------------------------------------------
# Modules
@echo ' kselftest-merge - Merge all the config dependencies of kselftest to existing'
@echo ' .config.'
@echo ''
+ @$(if $(dtstree), \
+ echo 'Devicetree:'; \
+ echo '* dtbs - Build device tree blobs for enabled boards'; \
+ echo ' dtbs_install - Install dtbs to $(INSTALL_DTBS_PATH)'; \
+ echo '')
+
@echo 'Userspace tools targets:'
@echo ' use "make tools/help"'
@echo ' or "cd tools; make help"'
$(boot_targets): vmlinux
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
-%.dtb %.dtb.S %.dtb.o: scripts
- $(Q)$(MAKE) $(build)=$(boot)/dts $(boot)/dts/$@
-
-dtbs: scripts
- $(Q)$(MAKE) $(build)=$(boot)/dts
-
archclean:
$(Q)$(MAKE) $(clean)=$(boot)
KBUILD_IMAGE := $(boot)/zImage
endif
-# Build the DT binary blobs if we have OF configured
-ifeq ($(CONFIG_USE_OF),y)
-KBUILD_DTBS := dtbs
-endif
-
-all: $(notdir $(KBUILD_IMAGE)) $(KBUILD_DTBS)
+all: $(notdir $(KBUILD_IMAGE))
archheaders:
$(INSTALL_TARGETS):
$(Q)$(MAKE) $(build)=$(boot) MACHINE=$(MACHINE) $@
-%.dtb: | scripts
- $(Q)$(MAKE) $(build)=$(boot)/dts MACHINE=$(MACHINE) $(boot)/dts/$@
-
-PHONY += dtbs dtbs_install
-
-dtbs: prepare scripts
- $(Q)$(MAKE) $(build)=$(boot)/dts
-
-dtbs_install:
- $(Q)$(MAKE) $(dtbinst)=$(boot)/dts
-
PHONY += vdso_install
vdso_install:
ifeq ($(CONFIG_VDSO),y)
echo ' uImage - U-Boot wrapped zImage'
echo ' bootpImage - Combined zImage and initial RAM disk'
echo ' (supply initrd image via make variable INITRD=<path>)'
- echo '* dtbs - Build device tree blobs for enabled boards'
- echo ' dtbs_install - Install dtbs to $(INSTALL_DTBS_PATH)'
echo ' install - Install uncompressed kernel'
echo ' zinstall - Install compressed kernel'
echo ' uinstall - Install U-Boot wrapped compressed kernel'
#include <linux/string.h>
#include <asm/byteorder.h>
+#define INT_MAX ((int)(~0U>>1))
+
typedef __be16 fdt16_t;
typedef __be32 fdt32_t;
typedef __be64 fdt64_t;
if (!cpus)
return;
- for_each_child_of_node(cpus, cpu) {
+ for_each_of_cpu_node(cpu) {
const __be32 *cell;
int prop_bytes;
u32 hwid;
- if (of_node_cmp(cpu->type, "cpu"))
- continue;
-
pr_debug(" * %pOF...\n", cpu);
/*
* A device tree containing CPU nodes with missing "reg"
__cpu_capacity = kcalloc(nr_cpu_ids, sizeof(*__cpu_capacity),
GFP_NOWAIT);
- cn = of_find_node_by_path("/cpus");
- if (!cn) {
- pr_err("No CPU information found in DT\n");
- return;
- }
-
for_each_possible_cpu(cpu) {
const u32 *rate;
int len;
void __iomem *p;
u32 bar;
static int once;
- struct device_node *np, *cpus;
+ struct device_node *np;
bool has_a7 = false;
bool has_a15 = false;
struct resource res;
if (once++)
return;
- cpus = of_find_node_by_path("/cpus");
- if (!cpus)
- return;
-
- for_each_child_of_node(cpus, np) {
+ for_each_of_cpu_node(np) {
if (of_device_is_compatible(np, "arm,cortex-a15"))
has_a15 = true;
else if (of_device_is_compatible(np, "arm,cortex-a7"))
const struct of_device_id *id;
/* PM domains containing CPUs */
- for_each_node_by_type(np, "cpu")
+ for_each_of_cpu_node(np)
add_special_pd(np, PD_CPU);
/* PM domain containing console */
void __init shmobile_init_delay(void)
{
- struct device_node *np, *cpus;
+ struct device_node *np;
u32 max_freq = 0;
- cpus = of_find_node_by_path("/cpus");
- if (!cpus)
- return;
-
- for_each_child_of_node(cpus, np) {
+ for_each_of_cpu_node(np) {
u32 freq;
if (!of_property_read_u32(np, "clock-frequency", &freq))
max_freq = max(max_freq, freq);
}
- of_node_put(cpus);
-
if (!max_freq)
return;
select ARCH_CLOCKSOURCE_DATA
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEVMEM_IS_ALLOWED
+ select ARCH_HAS_DMA_COHERENT_TO_PFN
+ select ARCH_HAS_DMA_MMAP_PGPROT
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FAST_MULTIPLIER
select ARCH_HAS_SG_CHAIN
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_STRICT_MODULE_RWX
+ select ARCH_HAS_SYNC_DMA_FOR_DEVICE
+ select ARCH_HAS_SYNC_DMA_FOR_CPU
select ARCH_HAS_SYSCALL_WRAPPER
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAVE_NMI_SAFE_CMPXCHG
# Default target when executing plain make
boot := arch/arm64/boot
KBUILD_IMAGE := $(boot)/Image.gz
-KBUILD_DTBS := dtbs
-all: Image.gz $(KBUILD_DTBS)
+all: Image.gz
Image: vmlinux
zinstall install:
$(Q)$(MAKE) $(build)=$(boot) $@
-%.dtb: scripts
- $(Q)$(MAKE) $(build)=$(boot)/dts $(boot)/dts/$@
-
-PHONY += dtbs dtbs_install
-
-dtbs: prepare scripts
- $(Q)$(MAKE) $(build)=$(boot)/dts
-
-dtbs_install:
- $(Q)$(MAKE) $(dtbinst)=$(boot)/dts
-
PHONY += vdso_install
vdso_install:
$(Q)$(MAKE) $(build)=arch/arm64/kernel/vdso $@
# We use MRPROPER_FILES and CLEAN_FILES now
archclean:
$(Q)$(MAKE) $(clean)=$(boot)
- $(Q)$(MAKE) $(clean)=$(boot)/dts
# We need to generate vdso-offsets.h before compiling certain files in kernel/.
# In order to do that, we should use the archprepare target, but we can't since
define archhelp
echo '* Image.gz - Compressed kernel image (arch/$(ARCH)/boot/Image.gz)'
echo ' Image - Uncompressed kernel image (arch/$(ARCH)/boot/Image)'
- echo '* dtbs - Build device tree blobs for enabled boards'
- echo ' dtbs_install - Install dtbs to $(INSTALL_DTBS_PATH)'
echo ' install - Install uncompressed kernel'
echo ' zinstall - Install compressed kernel'
echo ' Install using (your) ~/bin/installkernel or'
#address-cells = <2>;
#size-cells = <2>;
ranges;
+ dma-ranges = <0x0 0x0 0x0 0x0 0x10000 0x00000000>;
clockgen: clocking@1300000 {
compatible = "fsl,ls2080a-clockgen";
reg = <0x00000008 0x0c000000 0 0x40>, /* MC portal base */
<0x00000000 0x08340000 0 0x40000>; /* MC control reg */
msi-parent = <&its>;
+ iommu-map = <0 &smmu 0 0>; /* This is fixed-up by u-boot */
+ dma-coherent;
#address-cells = <3>;
#size-cells = <1>;
compatible = "arm,mmu-500";
reg = <0 0x5000000 0 0x800000>;
#global-interrupts = <12>;
+ #iommu-cells = <1>;
+ stream-match-mask = <0x7C00>;
+ dma-coherent;
interrupts = <0 13 4>, /* global secure fault */
<0 14 4>, /* combined secure interrupt */
<0 15 4>, /* global non-secure fault */
<0 204 4>, <0 205 4>,
<0 206 4>, <0 207 4>,
<0 208 4>, <0 209 4>;
- mmu-masters = <&fsl_mc 0x300 0>;
};
dspi: dspi@2100000 {
#ifdef CONFIG_XEN
const struct dma_map_ops *dev_dma_ops;
#endif
- bool dma_coherent;
};
struct pdev_archdata {
#define arch_teardown_dma_ops arch_teardown_dma_ops
#endif
-/* do not use this function in a driver */
+/*
+ * Do not use this function in a driver, it is only provided for
+ * arch/arm/mm/xen.c, which is used by arm64 as well.
+ */
static inline bool is_device_dma_coherent(struct device *dev)
{
- return dev->archdata.dma_coherent;
+ return dev->dma_coherent;
}
#endif /* __KERNEL__ */
{
struct device_node *dn;
- for_each_node_by_type(dn, "cpu") {
+ for_each_of_cpu_node(dn) {
u64 hwid = of_get_cpu_mpidr(dn);
if (hwid == INVALID_HWID)
#include <linux/slab.h>
#include <linux/genalloc.h>
#include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
#include <linux/dma-contiguous.h>
#include <linux/vmalloc.h>
#include <linux/swiotlb.h>
#include <asm/cacheflush.h>
-static int swiotlb __ro_after_init;
-
-static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot,
- bool coherent)
-{
- if (!coherent || (attrs & DMA_ATTR_WRITE_COMBINE))
- return pgprot_writecombine(prot);
- return prot;
-}
-
static struct gen_pool *atomic_pool __ro_after_init;
#define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K
return 1;
}
-static void *__dma_alloc(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flags,
- unsigned long attrs)
+void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
+ gfp_t flags, unsigned long attrs)
{
struct page *page;
void *ptr, *coherent_ptr;
- bool coherent = is_device_dma_coherent(dev);
- pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, false);
+ pgprot_t prot = pgprot_writecombine(PAGE_KERNEL);
size = PAGE_ALIGN(size);
- if (!coherent && !gfpflags_allow_blocking(flags)) {
+ if (!gfpflags_allow_blocking(flags)) {
struct page *page = NULL;
void *addr = __alloc_from_pool(size, &page, flags);
return addr;
}
- ptr = swiotlb_alloc(dev, size, dma_handle, flags, attrs);
+ ptr = dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs);
if (!ptr)
goto no_mem;
- /* no need for non-cacheable mapping if coherent */
- if (coherent)
- return ptr;
-
/* remove any dirty cache lines on the kernel alias */
__dma_flush_area(ptr, size);
return coherent_ptr;
no_map:
- swiotlb_free(dev, size, ptr, *dma_handle, attrs);
+ dma_direct_free_pages(dev, size, ptr, *dma_handle, attrs);
no_mem:
return NULL;
}
-static void __dma_free(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- unsigned long attrs)
+void arch_dma_free(struct device *dev, size_t size, void *vaddr,
+ dma_addr_t dma_handle, unsigned long attrs)
{
- void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle));
+ if (!__free_from_pool(vaddr, PAGE_ALIGN(size))) {
+ void *kaddr = phys_to_virt(dma_to_phys(dev, dma_handle));
- size = PAGE_ALIGN(size);
-
- if (!is_device_dma_coherent(dev)) {
- if (__free_from_pool(vaddr, size))
- return;
vunmap(vaddr);
+ dma_direct_free_pages(dev, size, kaddr, dma_handle, attrs);
}
- swiotlb_free(dev, size, swiotlb_addr, dma_handle, attrs);
}
-static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction dir,
- unsigned long attrs)
+long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr,
+ dma_addr_t dma_addr)
{
- dma_addr_t dev_addr;
-
- dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs);
- if (!is_device_dma_coherent(dev) &&
- (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
- __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
-
- return dev_addr;
-}
-
-
-static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr,
- size_t size, enum dma_data_direction dir,
- unsigned long attrs)
-{
- if (!is_device_dma_coherent(dev) &&
- (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
- __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
- swiotlb_unmap_page(dev, dev_addr, size, dir, attrs);
+ return __phys_to_pfn(dma_to_phys(dev, dma_addr));
}
-static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
- int nelems, enum dma_data_direction dir,
- unsigned long attrs)
+pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
+ unsigned long attrs)
{
- struct scatterlist *sg;
- int i, ret;
-
- ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs);
- if (!is_device_dma_coherent(dev) &&
- (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
- for_each_sg(sgl, sg, ret, i)
- __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
- sg->length, dir);
-
- return ret;
-}
-
-static void __swiotlb_unmap_sg_attrs(struct device *dev,
- struct scatterlist *sgl, int nelems,
- enum dma_data_direction dir,
- unsigned long attrs)
-{
- struct scatterlist *sg;
- int i;
-
- if (!is_device_dma_coherent(dev) &&
- (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
- for_each_sg(sgl, sg, nelems, i)
- __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
- sg->length, dir);
- swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs);
+ if (!dev_is_dma_coherent(dev) || (attrs & DMA_ATTR_WRITE_COMBINE))
+ return pgprot_writecombine(prot);
+ return prot;
}
-static void __swiotlb_sync_single_for_cpu(struct device *dev,
- dma_addr_t dev_addr, size_t size,
- enum dma_data_direction dir)
+void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr,
+ size_t size, enum dma_data_direction dir)
{
- if (!is_device_dma_coherent(dev))
- __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
- swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir);
+ __dma_map_area(phys_to_virt(paddr), size, dir);
}
-static void __swiotlb_sync_single_for_device(struct device *dev,
- dma_addr_t dev_addr, size_t size,
- enum dma_data_direction dir)
+void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr,
+ size_t size, enum dma_data_direction dir)
{
- swiotlb_sync_single_for_device(dev, dev_addr, size, dir);
- if (!is_device_dma_coherent(dev))
- __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir);
+ __dma_unmap_area(phys_to_virt(paddr), size, dir);
}
-static void __swiotlb_sync_sg_for_cpu(struct device *dev,
- struct scatterlist *sgl, int nelems,
- enum dma_data_direction dir)
+static int __swiotlb_get_sgtable_page(struct sg_table *sgt,
+ struct page *page, size_t size)
{
- struct scatterlist *sg;
- int i;
-
- if (!is_device_dma_coherent(dev))
- for_each_sg(sgl, sg, nelems, i)
- __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
- sg->length, dir);
- swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir);
-}
+ int ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
-static void __swiotlb_sync_sg_for_device(struct device *dev,
- struct scatterlist *sgl, int nelems,
- enum dma_data_direction dir)
-{
- struct scatterlist *sg;
- int i;
+ if (!ret)
+ sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
- swiotlb_sync_sg_for_device(dev, sgl, nelems, dir);
- if (!is_device_dma_coherent(dev))
- for_each_sg(sgl, sg, nelems, i)
- __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)),
- sg->length, dir);
+ return ret;
}
static int __swiotlb_mmap_pfn(struct vm_area_struct *vma,
return ret;
}
-static int __swiotlb_mmap(struct device *dev,
- struct vm_area_struct *vma,
- void *cpu_addr, dma_addr_t dma_addr, size_t size,
- unsigned long attrs)
-{
- int ret;
- unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT;
-
- vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
- is_device_dma_coherent(dev));
-
- if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
- return ret;
-
- return __swiotlb_mmap_pfn(vma, pfn, size);
-}
-
-static int __swiotlb_get_sgtable_page(struct sg_table *sgt,
- struct page *page, size_t size)
-{
- int ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
-
- if (!ret)
- sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
-
- return ret;
-}
-
-static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt,
- void *cpu_addr, dma_addr_t handle, size_t size,
- unsigned long attrs)
-{
- struct page *page = phys_to_page(dma_to_phys(dev, handle));
-
- return __swiotlb_get_sgtable_page(sgt, page, size);
-}
-
-static int __swiotlb_dma_supported(struct device *hwdev, u64 mask)
-{
- if (swiotlb)
- return swiotlb_dma_supported(hwdev, mask);
- return 1;
-}
-
-static int __swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t addr)
-{
- if (swiotlb)
- return swiotlb_dma_mapping_error(hwdev, addr);
- return 0;
-}
-
-static const struct dma_map_ops arm64_swiotlb_dma_ops = {
- .alloc = __dma_alloc,
- .free = __dma_free,
- .mmap = __swiotlb_mmap,
- .get_sgtable = __swiotlb_get_sgtable,
- .map_page = __swiotlb_map_page,
- .unmap_page = __swiotlb_unmap_page,
- .map_sg = __swiotlb_map_sg_attrs,
- .unmap_sg = __swiotlb_unmap_sg_attrs,
- .sync_single_for_cpu = __swiotlb_sync_single_for_cpu,
- .sync_single_for_device = __swiotlb_sync_single_for_device,
- .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu,
- .sync_sg_for_device = __swiotlb_sync_sg_for_device,
- .dma_supported = __swiotlb_dma_supported,
- .mapping_error = __swiotlb_dma_mapping_error,
-};
-
static int __init atomic_pool_init(void)
{
pgprot_t prot = __pgprot(PROT_NORMAL_NC);
static int __init arm64_dma_init(void)
{
- if (swiotlb_force == SWIOTLB_FORCE ||
- max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT))
- swiotlb = 1;
-
WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(),
TAINT_CPU_OUT_OF_SPEC,
"ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)",
dma_addr_t *handle, gfp_t gfp,
unsigned long attrs)
{
- bool coherent = is_device_dma_coherent(dev);
+ bool coherent = dev_is_dma_coherent(dev);
int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
size_t iosize = size;
void *addr;
addr = NULL;
}
} else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) {
- pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent);
+ pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs);
struct page *page;
page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT,
size >> PAGE_SHIFT);
}
} else {
- pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent);
+ pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs);
struct page **pages;
pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot,
struct vm_struct *area;
int ret;
- vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot,
- is_device_dma_coherent(dev));
+ vma->vm_page_prot = arch_dma_mmap_pgprot(dev, vma->vm_page_prot, attrs);
if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret))
return ret;
{
phys_addr_t phys;
- if (is_device_dma_coherent(dev))
+ if (dev_is_dma_coherent(dev))
return;
- phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr);
- __dma_unmap_area(phys_to_virt(phys), size, dir);
+ phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr);
+ arch_sync_dma_for_cpu(dev, phys, size, dir);
}
static void __iommu_sync_single_for_device(struct device *dev,
{
phys_addr_t phys;
- if (is_device_dma_coherent(dev))
+ if (dev_is_dma_coherent(dev))
return;
- phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr);
- __dma_map_area(phys_to_virt(phys), size, dir);
+ phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dev_addr);
+ arch_sync_dma_for_device(dev, phys, size, dir);
}
static dma_addr_t __iommu_map_page(struct device *dev, struct page *page,
enum dma_data_direction dir,
unsigned long attrs)
{
- bool coherent = is_device_dma_coherent(dev);
+ bool coherent = dev_is_dma_coherent(dev);
int prot = dma_info_to_prot(dir, coherent, attrs);
dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot);
- if (!iommu_dma_mapping_error(dev, dev_addr) &&
- (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
- __iommu_sync_single_for_device(dev, dev_addr, size, dir);
+ if (!coherent && !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+ !iommu_dma_mapping_error(dev, dev_addr))
+ __dma_map_area(page_address(page) + offset, size, dir);
return dev_addr;
}
struct scatterlist *sg;
int i;
- if (is_device_dma_coherent(dev))
+ if (dev_is_dma_coherent(dev))
return;
for_each_sg(sgl, sg, nelems, i)
- __dma_unmap_area(sg_virt(sg), sg->length, dir);
+ arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir);
}
static void __iommu_sync_sg_for_device(struct device *dev,
struct scatterlist *sg;
int i;
- if (is_device_dma_coherent(dev))
+ if (dev_is_dma_coherent(dev))
return;
for_each_sg(sgl, sg, nelems, i)
- __dma_map_area(sg_virt(sg), sg->length, dir);
+ arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir);
}
static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl,
int nelems, enum dma_data_direction dir,
unsigned long attrs)
{
- bool coherent = is_device_dma_coherent(dev);
+ bool coherent = dev_is_dma_coherent(dev);
if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
__iommu_sync_sg_for_device(dev, sgl, nelems, dir);
const struct iommu_ops *iommu, bool coherent)
{
if (!dev->dma_ops)
- dev->dma_ops = &arm64_swiotlb_dma_ops;
+ dev->dma_ops = &swiotlb_dma_ops;
- dev->archdata.dma_coherent = coherent;
+ dev->dma_coherent = coherent;
__iommu_setup_dma_ops(dev, dma_base, size, iommu);
#ifdef CONFIG_XEN
DTB:=$(subst dtbImage.,,$(filter dtbImage.%, $(MAKECMDGOALS)))
export DTB
-ifneq ($(DTB),)
core-y += $(boot)/dts/
-endif
# With make 3.82 we cannot mix normal and wildcard targets
DTC_FLAGS ?= -p 1024
+dtb-$(CONFIG_SOC_TMS320C6455) += dsk6455.dtb
+dtb-$(CONFIG_SOC_TMS320C6457) += evmc6457.dtb
+dtb-$(CONFIG_SOC_TMS320C6472) += evmc6472.dtb
+dtb-$(CONFIG_SOC_TMS320C6474) += evmc6474.dtb
+dtb-$(CONFIG_SOC_TMS320C6678) += evmc6678.dtb
+
ifneq ($(DTB),)
-obj-y += linked_dtb.o
+obj-y += $(DTB).dtb.o
endif
-
-quiet_cmd_cp = CP $< $@$2
- cmd_cp = cat $< >$@$2 || (rm -f $@ && echo false)
-
-# Generate builtin.dtb from $(DTB).dtb
-$(obj)/builtin.dtb: $(obj)/$(DTB).dtb
- $(call if_changed,cp)
-
-$(obj)/linked_dtb.o: $(obj)/builtin.dtb
+++ /dev/null
-.section __fdt_blob,"a"
-.incbin "arch/c6x/boot/dts/builtin.dtb"
extern char _vectors_end[];
extern char _data_lma[];
-extern char _fdt_start[], _fdt_end[];
#endif /* _ASM_C6X_SECTIONS_H */
unsigned long core_khz;
u64 tmp;
struct cpuinfo_c6x *p;
- struct device_node *node, *np;
+ struct device_node *node;
p = &per_cpu(cpu_data, smp_processor_id());
p->core_id = get_coreid();
- node = of_find_node_by_name(NULL, "cpus");
- if (node) {
- for_each_child_of_node(node, np)
- if (!strcmp("cpu", np->name))
- ++c6x_num_cores;
- of_node_put(node);
- }
+ for_each_of_cpu_node(node)
+ ++c6x_num_cores;
node = of_find_node_by_name(NULL, "soc");
if (node) {
notrace void __init machine_init(unsigned long dt_ptr)
{
void *dtb = __va(dt_ptr);
- void *fdt = _fdt_start;
+ void *fdt = __dtb_start;
/* interrupts must be masked */
set_creg(IER, 2);
memory_end >> PAGE_SHIFT);
memblock_reserve(memory_start, bootmap_size);
- unflatten_device_tree();
+ unflatten_and_copy_device_tree();
c6x_cache_init();
*(.switch)
}
- . = ALIGN (8) ;
- __fdt_blob : AT(ADDR(__fdt_blob) - LOAD_OFFSET)
- {
- _fdt_start = . ; /* place for fdt blob */
- *(__fdt_blob) ; /* Any link-placed DTB */
- BYTE(0); /* section always has contents */
- . = _fdt_start + 0x4000; /* Pad up to 16kbyte */
- _fdt_end = . ;
- }
-
_etext = .;
/*
endif
core-y += arch/$(ARCH)/kernel/ arch/$(ARCH)/mm/
-ifneq '$(CONFIG_H8300_BUILTIN_DTB)' '""'
-core-y += arch/h8300/boot/dts/
-endif
+core-y += arch/$(ARCH)/boot/dts/
libs-y += arch/$(ARCH)/lib/
boot := arch/h8300/boot
-%.dtb %.dtb.S %.dtb.o: | scripts
- $(Q)$(MAKE) $(build)=arch/h8300/boot/dts arch/h8300/boot/dts/$@
-
-PHONY += dtbs
-dtbs: scripts
- $(Q)$(MAKE) $(build)=arch/h8300/boot/dts
-
archmrproper:
archclean:
static void (*rom_reset)(void);
#ifdef CONFIG_ADB_CUDA
-static time64_t cuda_read_time(void)
-{
- struct adb_request req;
- time64_t time;
-
- if (cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_GET_TIME) < 0)
- return 0;
- while (!req.complete)
- cuda_poll();
-
- time = (u32)((req.reply[3] << 24) | (req.reply[4] << 16) |
- (req.reply[5] << 8) | req.reply[6]);
-
- return time - RTC_OFFSET;
-}
-
-static void cuda_write_time(time64_t time)
-{
- struct adb_request req;
- u32 data = lower_32_bits(time + RTC_OFFSET);
-
- if (cuda_request(&req, NULL, 6, CUDA_PACKET, CUDA_SET_TIME,
- (data >> 24) & 0xFF, (data >> 16) & 0xFF,
- (data >> 8) & 0xFF, data & 0xFF) < 0)
- return;
- while (!req.complete)
- cuda_poll();
-}
-
static __u8 cuda_read_pram(int offset)
{
struct adb_request req;
#endif /* CONFIG_ADB_CUDA */
#ifdef CONFIG_ADB_PMU
-static time64_t pmu_read_time(void)
-{
- struct adb_request req;
- time64_t time;
-
- if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0)
- return 0;
- pmu_wait_complete(&req);
-
- time = (u32)((req.reply[0] << 24) | (req.reply[1] << 16) |
- (req.reply[2] << 8) | req.reply[3]);
-
- return time - RTC_OFFSET;
-}
-
-static void pmu_write_time(time64_t time)
-{
- struct adb_request req;
- u32 data = lower_32_bits(time + RTC_OFFSET);
-
- if (pmu_request(&req, NULL, 5, PMU_SET_RTC,
- (data >> 24) & 0xFF, (data >> 16) & 0xFF,
- (data >> 8) & 0xFF, data & 0xFF) < 0)
- return;
- pmu_wait_complete(&req);
-}
-
static __u8 pmu_read_pram(int offset)
{
struct adb_request req;
* is basically any machine with Mac II-style ADB.
*/
-static void via_write_time(time64_t time)
+static void via_set_rtc_time(struct rtc_time *tm)
{
union {
__u8 cdata[4];
__u32 idata;
} data;
__u8 temp;
+ time64_t time;
+
+ time = mktime64(tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
+ tm->tm_hour, tm->tm_min, tm->tm_sec);
/* Clear the write protect bit */
#ifdef CONFIG_ADB_CUDA
case MAC_ADB_EGRET:
case MAC_ADB_CUDA:
- now = cuda_read_time();
+ now = cuda_get_time();
break;
#endif
#ifdef CONFIG_ADB_PMU
case MAC_ADB_PB2:
- now = pmu_read_time();
+ now = pmu_get_time();
break;
#endif
default:
__func__, t->tm_year + 1900, t->tm_mon + 1, t->tm_mday,
t->tm_hour, t->tm_min, t->tm_sec);
- now = mktime64(t->tm_year + 1900, t->tm_mon + 1, t->tm_mday,
- t->tm_hour, t->tm_min, t->tm_sec);
-
switch (macintosh_config->adb_type) {
case MAC_ADB_IOP:
case MAC_ADB_II:
case MAC_ADB_PB1:
- via_write_time(now);
+ via_set_rtc_time(t);
break;
#ifdef CONFIG_ADB_CUDA
case MAC_ADB_EGRET:
case MAC_ADB_CUDA:
- cuda_write_time(now);
+ cuda_set_rtc_time(t);
break;
#endif
#ifdef CONFIG_ADB_PMU
case MAC_ADB_PB2:
- pmu_write_time(now);
+ pmu_set_rtc_time(t);
break;
#endif
default:
# Are we making a simpleImage.<boardname> target? If so, crack out the boardname
DTB:=$(subst simpleImage.,,$(filter simpleImage.%, $(MAKECMDGOALS)))
-ifneq ($(DTB),)
- core-y += $(boot)/dts/
-endif
+core-y += $(boot)/dts/
# defines filename extension depending memory management type
ifeq ($(CONFIG_MMU),)
# SPDX-License-Identifier: GPL-2.0
#
+dtb-y := system.dtb
+
+ifneq ($(DTB),)
obj-y += linked_dtb.o
# Ensure system.dtb exists
$(obj)/system.dtb: $(obj)/$(DTB).dtb
$(call if_changed,cp)
endif
+endif
quiet_cmd_cp = CP $< $@$2
cmd_cp = cat $< >$@$2 || (rm -f $@ && echo false)
void __init setup_cpuinfo(void)
{
- cpu = (struct device_node *) of_find_node_by_type(NULL, "cpu");
+ cpu = of_get_cpu_node(0, NULL);
if (!cpu)
- pr_err("You don't have cpu!!!\n");
+ pr_err("You don't have cpu or are missing cpu reg property!!!\n");
pr_info("%s: initialising\n", __func__);
if (cpuinfo.mmu_privins)
pr_warn("%s: Stream instructions enabled"
" - USERSPACE CAN LOCK THIS KERNEL!\n", __func__);
+
+ of_node_put(cpu);
}
void __init setup_cpuinfo_clk(void)
select GENERIC_CLOCKEVENTS
select GENERIC_CMOS_UPDATE
select GENERIC_CPU_AUTOPROBE
+ select GENERIC_IOMAP
select GENERIC_IRQ_PROBE
select GENERIC_IRQ_SHOW
select GENERIC_LIB_ASHLDI3
select GENERIC_LIB_CMPDI2
select GENERIC_LIB_LSHRDI3
select GENERIC_LIB_UCMPDI2
- select GENERIC_PCI_IOMAP
select GENERIC_SCHED_CLOCK if !CAVIUM_OCTEON_SOC
select GENERIC_SMP_IDLE_THREAD
select GENERIC_TIME_VSYSCALL
select RTC_LIB if !MACH_LOONGSON64
select SYSCTL_EXCEPTION_TRACE
select VIRT_TO_BUS
+ select NO_BOOTMEM
menu "Machine selection"
select USB_UHCI_BIG_ENDIAN_DESC if CPU_BIG_ENDIAN
select USB_UHCI_BIG_ENDIAN_MMIO if CPU_BIG_ENDIAN
select USE_OF
+ select UHI_BOOT
help
Select this to build a kernel which aims to support multiple boards,
generally using a flattened device tree passed from the bootloader
config GENERIC_CSUM
bool
+ default y if !CPU_HAS_LOAD_STORE_LR
config GENERIC_ISA_DMA
bool
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
select CPU_SUPPORTS_HUGEPAGES
+ select CPU_HAS_LOAD_STORE_LR
select WEAK_ORDERING
select WEAK_REORDERING_BEYOND_LLSC
select MIPS_PGD_C0_CONTEXT
bool "MIPS32 Release 1"
depends on SYS_HAS_CPU_MIPS32_R1
select CPU_HAS_PREFETCH
+ select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
help
bool "MIPS32 Release 2"
depends on SYS_HAS_CPU_MIPS32_R2
select CPU_HAS_PREFETCH
+ select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
select CPU_SUPPORTS_MSA
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
select CPU_SUPPORTS_MSA
- select GENERIC_CSUM
select HAVE_KVM
select MIPS_O32_FP64_SUPPORT
help
bool "MIPS64 Release 1"
depends on SYS_HAS_CPU_MIPS64_R1
select CPU_HAS_PREFETCH
+ select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
bool "MIPS64 Release 2"
depends on SYS_HAS_CPU_MIPS64_R2
select CPU_HAS_PREFETCH
+ select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
select CPU_SUPPORTS_MSA
- select GENERIC_CSUM
select MIPS_O32_FP64_SUPPORT if 32BIT || MIPS32_O32
select HAVE_KVM
help
bool "R3000"
depends on SYS_HAS_CPU_R3000
select CPU_HAS_WB
+ select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
help
bool "R39XX"
depends on SYS_HAS_CPU_TX39XX
select CPU_SUPPORTS_32BIT_KERNEL
+ select CPU_HAS_LOAD_STORE_LR
config CPU_VR41XX
bool "R41xx"
depends on SYS_HAS_CPU_VR41XX
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
+ select CPU_HAS_LOAD_STORE_LR
help
The options selects support for the NEC VR4100 series of processors.
Only choose this option if you have one of these processors as a
depends on SYS_HAS_CPU_R4300
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
+ select CPU_HAS_LOAD_STORE_LR
help
MIPS Technologies R4300-series processors.
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HUGEPAGES
+ select CPU_HAS_LOAD_STORE_LR
help
MIPS Technologies R4000-series processors other than 4300, including
the R4000, R4400, R4600, and 4700.
bool "R49XX"
depends on SYS_HAS_CPU_TX49XX
select CPU_HAS_PREFETCH
+ select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HUGEPAGES
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HUGEPAGES
+ select CPU_HAS_LOAD_STORE_LR
help
MIPS Technologies R5000-series processors other than the Nevada.
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HUGEPAGES
+ select CPU_HAS_LOAD_STORE_LR
config CPU_R5500
bool "R5500"
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HUGEPAGES
+ select CPU_HAS_LOAD_STORE_LR
help
NEC VR5500 and VR5500A series processors implement 64-bit MIPS IV
instruction set.
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HUGEPAGES
+ select CPU_HAS_LOAD_STORE_LR
help
QED / PMC-Sierra RM52xx-series ("Nevada") processors.
bool "R8000"
depends on SYS_HAS_CPU_R8000
select CPU_HAS_PREFETCH
+ select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_64BIT_KERNEL
help
MIPS Technologies R8000 processors. Note these processors are
bool "R10000"
depends on SYS_HAS_CPU_R10000
select CPU_HAS_PREFETCH
+ select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
bool "RM7000"
depends on SYS_HAS_CPU_RM7000
select CPU_HAS_PREFETCH
+ select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
config CPU_SB1
bool "SB1"
depends on SYS_HAS_CPU_SB1
+ select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
bool "Cavium Octeon processor"
depends on SYS_HAS_CPU_CAVIUM_OCTEON
select CPU_HAS_PREFETCH
+ select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_64BIT_KERNEL
select WEAK_ORDERING
select CPU_SUPPORTS_HIGHMEM
select WEAK_ORDERING
select CPU_SUPPORTS_HIGHMEM
select CPU_HAS_PREFETCH
+ select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_CPUFREQ
select MIPS_EXTERNAL_TIMER
help
config CPU_XLR
bool "Netlogic XLR SoC"
depends on SYS_HAS_CPU_XLR
+ select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_64BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
select WEAK_ORDERING
select WEAK_REORDERING_BEYOND_LLSC
select CPU_HAS_PREFETCH
+ select CPU_HAS_LOAD_STORE_LR
select CPU_MIPSR2
select CPU_SUPPORTS_HUGEPAGES
select MIPS_ASID_BITS_VARIABLE
select CPU_SUPPORTS_HIGHMEM
select CPU_SUPPORTS_HUGEPAGES
select ARCH_HAS_PHYS_TO_DMA
+ select CPU_HAS_LOAD_STORE_LR
config CPU_LOONGSON1
bool
select CPU_MIPS32
select CPU_MIPSR1
select CPU_HAS_PREFETCH
+ select CPU_HAS_LOAD_STORE_LR
select CPU_SUPPORTS_32BIT_KERNEL
select CPU_SUPPORTS_HIGHMEM
select CPU_SUPPORTS_CPUFREQ
config CPU_HAS_RIXI
bool
+config CPU_HAS_LOAD_STORE_LR
+ bool
+ help
+ CPU has support for unaligned load and store instructions:
+ LWL, LWR, SWL, SWR (Load/store word left/right).
+ LDL, LDR, SDL, SDR (Load/store doubleword left/right, for 64bit systems).
+
#
# Vectored interrupt mode is an R2 feature
#
select OF_EARLY_FLATTREE
select IRQ_DOMAIN
+config UHI_BOOT
+ bool
+
config BUILTIN_DTB
bool
#
archscripts: scripts_basic
+ $(Q)$(MAKE) $(build)=arch/mips/tools elf-entry
$(Q)$(MAKE) $(build)=arch/mips/boot/tools relocs
KBUILD_DEFCONFIG := 32r2el_defconfig
cflags-$(toolchain-xpa) += -DTOOLCHAIN_SUPPORTS_XPA
toolchain-crc := $(call cc-option-yn,$(mips-cflags) -Wa$(comma)-mcrc)
cflags-$(toolchain-crc) += -DTOOLCHAIN_SUPPORTS_CRC
+toolchain-dsp := $(call cc-option-yn,$(mips-cflags) -Wa$(comma)-mdsp)
+cflags-$(toolchain-dsp) += -DTOOLCHAIN_SUPPORTS_DSP
#
# Firmware support
load-y = $(CONFIG_PHYSICAL_START)
endif
-# Sign-extend the entry point to 64 bits if retrieved as a 32-bit number.
-entry-y = $(shell $(OBJDUMP) -f vmlinux 2>/dev/null \
- | sed -n '/^start address / { \
- s/^.* //; \
- s/0x\([0-7].......\)$$/0x00000000\1/; \
- s/0x\(........\)$$/0xffffffff\1/; p }')
-
+entry-y = $(shell $(objtree)/arch/mips/tools/elf-entry vmlinux)
cflags-y += -I$(srctree)/arch/mips/include/asm/mach-generic
drivers-$(CONFIG_PCI) += arch/mips/pci/
CLEAN_FILES += vmlinux.32 vmlinux.64
# device-trees
-core-$(CONFIG_BUILTIN_DTB) += arch/mips/boot/dts/
-
-%.dtb %.dtb.S %.dtb.o: | scripts
- $(Q)$(MAKE) $(build)=arch/mips/boot/dts arch/mips/boot/dts/$@
-
-PHONY += dtbs
-dtbs: scripts
- $(Q)$(MAKE) $(build)=arch/mips/boot/dts
-
-PHONY += dtbs_install
-dtbs_install:
- $(Q)$(MAKE) $(dtbinst)=arch/mips/boot/dts
+core-y += arch/mips/boot/dts/
archprepare:
ifdef CONFIG_MIPS32_N32
echo ' uImage.lzma - U-Boot image (lzma)'
echo ' uImage.lzo - U-Boot image (lzo)'
echo ' uzImage.bin - U-Boot image (self-extracting)'
- echo ' dtbs - Device-tree blobs for enabled boards'
- echo ' dtbs_install - Install dtbs to $(INSTALL_DTBS_PATH)'
echo
echo ' These will be default as appropriate for a configured platform.'
echo
#include <bcm47xx_board.h>
#include <bcm47xx.h>
-static void __init bcm47xx_workarounds_netgear_wnr3500l(void)
+static void __init bcm47xx_workarounds_enable_usb_power(int usb_power)
{
- const int usb_power = 12;
int err;
err = gpio_request_one(usb_power, GPIOF_OUT_INIT_HIGH, "usb_power");
switch (board) {
case BCM47XX_BOARD_NETGEAR_WNR3500L:
- bcm47xx_workarounds_netgear_wnr3500l();
+ bcm47xx_workarounds_enable_usb_power(12);
+ break;
+ case BCM47XX_BOARD_NETGEAR_WNDR3400_V3:
+ bcm47xx_workarounds_enable_usb_power(21);
break;
default:
/* No workaround(s) needed */
mips_hpt_frequency = freq;
}
-extern const char __appended_dtb;
-
void __init plat_mem_setup(void)
{
void *dtb;
ioport_resource.start = 0;
ioport_resource.end = ~0;
-#ifdef CONFIG_MIPS_ELF_APPENDED_DTB
- if (!fdt_check_header(&__appended_dtb))
- dtb = (void *)&__appended_dtb;
- else
-#endif
/* intended to somewhat resemble ARM; see Documentation/arm/Booting */
if (fw_arg0 == 0 && fw_arg1 == 0xffffffff)
dtb = phys_to_virt(fw_arg2);
- else if (fw_passed_dtb) /* UHI interface */
+ else if (fw_passed_dtb) /* UHI interface or appended dtb */
dtb = (void *)fw_passed_dtb;
else if (__dtb_start != __dtb_end)
dtb = (void *)__dtb_start;
};
};
- biu@1F800000 {
+ biu@1f800000 {
#address-cells = <1>;
#size-cells = <1>;
compatible = "lantiq,biu", "simple-bus";
- reg = <0x1F800000 0x800000>;
- ranges = <0x0 0x1F800000 0x7FFFFF>;
+ reg = <0x1f800000 0x800000>;
+ ranges = <0x0 0x1f800000 0x7fffff>;
icu0: icu@80200 {
#interrupt-cells = <1>;
reg = <0x80200 0x120>;
};
- watchdog@803F0 {
+ watchdog@803f0 {
compatible = "lantiq,wdt";
- reg = <0x803F0 0x10>;
+ reg = <0x803f0 0x10>;
};
};
- sram@1F000000 {
+ sram@1f000000 {
#address-cells = <1>;
#size-cells = <1>;
compatible = "lantiq,sram";
- reg = <0x1F000000 0x800000>;
- ranges = <0x0 0x1F000000 0x7FFFFF>;
+ reg = <0x1f000000 0x800000>;
+ ranges = <0x0 0x1f000000 0x7fffff>;
eiu0: eiu@101000 {
#interrupt-cells = <1>;
#address-cells = <1>;
#size-cells = <1>;
compatible = "lantiq,fpi", "simple-bus";
- ranges = <0x0 0x10000000 0xEEFFFFF>;
- reg = <0x10000000 0xEF00000>;
+ ranges = <0x0 0x10000000 0xeefffff>;
+ reg = <0x10000000 0xef00000>;
- gptu@E100A00 {
+ gptu@e100a00 {
compatible = "lantiq,gptu-xway";
- reg = <0xE100A00 0x100>;
+ reg = <0xe100a00 0x100>;
};
- serial@E100C00 {
+ serial@e100c00 {
compatible = "lantiq,asc";
- reg = <0xE100C00 0x400>;
+ reg = <0xe100c00 0x400>;
interrupt-parent = <&icu0>;
interrupts = <112 113 114>;
};
- dma0: dma@E104100 {
+ dma0: dma@e104100 {
compatible = "lantiq,dma-xway";
- reg = <0xE104100 0x800>;
+ reg = <0xe104100 0x800>;
};
- ebu0: ebu@E105300 {
+ ebu0: ebu@e105300 {
compatible = "lantiq,ebu-xway";
- reg = <0xE105300 0x100>;
+ reg = <0xe105300 0x100>;
};
- pci0: pci@E105400 {
+ pci0: pci@e105400 {
#address-cells = <3>;
#size-cells = <2>;
#interrupt-cells = <1>;
compatible = "lantiq,pci-xway";
bus-range = <0x0 0x0>;
ranges = <0x2000000 0 0x8000000 0x8000000 0 0x2000000 /* pci memory */
- 0x1000000 0 0x00000000 0xAE00000 0 0x200000>; /* io space */
+ 0x1000000 0 0x00000000 0xae00000 0 0x200000>; /* io space */
reg = <0x7000000 0x8000 /* config space */
- 0xE105400 0x400>; /* pci bridge */
+ 0xe105400 0x400>; /* pci bridge */
};
};
};
};
};
- gpio: pinmux@E100B10 {
+ gpio: pinmux@e100b10 {
compatible = "lantiq,danube-pinctrl";
pinctrl-names = "default";
pinctrl-0 = <&state_default>;
#gpio-cells = <2>;
gpio-controller;
- reg = <0xE100B10 0xA0>;
+ reg = <0xe100b10 0xa0>;
state_default: pinmux {
stp {
};
};
- etop@E180000 {
+ etop@e180000 {
compatible = "lantiq,etop-xway";
- reg = <0xE180000 0x40000>;
+ reg = <0xe180000 0x40000>;
interrupt-parent = <&icu0>;
interrupts = <73 78>;
phy-mode = "rmii";
mac-address = [ 00 11 22 33 44 55 ];
};
- stp0: stp@E100BB0 {
+ stp0: stp@e100bb0 {
#gpio-cells = <2>;
compatible = "lantiq,gpio-stp-xway";
gpio-controller;
- reg = <0xE100BB0 0x40>;
+ reg = <0xe100bb0 0x40>;
lantiq,shadow = <0xfff>;
lantiq,groups = <0x3>;
};
- pci@E105400 {
+ pci@e105400 {
lantiq,bus-clock = <33333333>;
interrupt-map-mask = <0xf800 0x0 0x0 0x7>;
interrupt-map = <
-dtb-$(CONFIG_MSCC_OCELOT) += ocelot_pcb123.dtb
+dtb-$(CONFIG_MSCC_OCELOT) += ocelot_pcb123.dtb ocelot_pcb120.dtb
obj-$(CONFIG_BUILTIN_DTB) += $(addsuffix .o, $(dtb-y))
status = "disabled";
};
+ i2c: i2c@100400 {
+ compatible = "mscc,ocelot-i2c", "snps,designware-i2c";
+ pinctrl-0 = <&i2c_pins>;
+ pinctrl-names = "default";
+ reg = <0x100400 0x100>, <0x198 0x8>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ interrupts = <8>;
+ clocks = <&ahb_clk>;
+
+ status = "disabled";
+ };
+
uart2: serial@100800 {
pinctrl-0 = <&uart2_pins>;
pinctrl-names = "default";
interrupts = <13>;
#interrupt-cells = <2>;
+ i2c_pins: i2c-pins {
+ pins = "GPIO_16", "GPIO_17";
+ function = "twi";
+ };
+
uart_pins: uart-pins {
pins = "GPIO_6", "GPIO_7";
function = "uart";
pins = "GPIO_14", "GPIO_15";
function = "miim1";
};
+
};
mdio0: mdio@107009c {
--- /dev/null
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Copyright (c) 2017 Microsemi Corporation */
+
+/dts-v1/;
+
+#include <dt-bindings/interrupt-controller/irq.h>
+#include <dt-bindings/phy/phy-ocelot-serdes.h>
+#include "ocelot.dtsi"
+
+/ {
+ compatible = "mscc,ocelot-pcb120", "mscc,ocelot";
+
+ chosen {
+ stdout-path = "serial0:115200n8";
+ };
+
+ memory@0 {
+ device_type = "memory";
+ reg = <0x0 0x0e000000>;
+ };
+};
+
+&gpio {
+ phy_int_pins: phy_int_pins {
+ pins = "GPIO_4";
+ function = "gpio";
+ };
+};
+
+&mdio0 {
+ status = "okay";
+};
+
+&mdio1 {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&miim1>, <&phy_int_pins>;
+
+ phy7: ethernet-phy@0 {
+ reg = <0>;
+ interrupts = <4 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-parent = <&gpio>;
+ };
+ phy6: ethernet-phy@1 {
+ reg = <1>;
+ interrupts = <4 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-parent = <&gpio>;
+ };
+ phy5: ethernet-phy@2 {
+ reg = <2>;
+ interrupts = <4 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-parent = <&gpio>;
+ };
+ phy4: ethernet-phy@3 {
+ reg = <3>;
+ interrupts = <4 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-parent = <&gpio>;
+ };
+};
+
+&port0 {
+ phy-handle = <&phy0>;
+};
+
+&port1 {
+ phy-handle = <&phy1>;
+};
+
+&port2 {
+ phy-handle = <&phy2>;
+};
+
+&port3 {
+ phy-handle = <&phy3>;
+};
+
+&port4 {
+ phy-handle = <&phy7>;
+ phy-mode = "sgmii";
+ phys = <&serdes 4 SERDES1G(2)>;
+};
+
+&port5 {
+ phy-handle = <&phy4>;
+ phy-mode = "sgmii";
+ phys = <&serdes 5 SERDES1G(5)>;
+};
+
+&port6 {
+ phy-handle = <&phy6>;
+ phy-mode = "sgmii";
+ phys = <&serdes 6 SERDES1G(3)>;
+};
+
+&port9 {
+ phy-handle = <&phy5>;
+ phy-mode = "sgmii";
+ phys = <&serdes 9 SERDES1G(4)>;
+};
+
+&uart0 {
+ status = "okay";
+};
+
+&uart2 {
+ status = "okay";
+};
};
};
+&i2c {
+ clock-frequency = <100000>;
+ i2c-sda-hold-time-ns = <300>;
+ status = "okay";
+};
+
&mdio0 {
status = "okay";
};
type = IRQ_TYPE_LEVEL_LOW;
break;
default:
- pr_err("Error: (%s) Invalid irq trigger specification: %x\n",
- node->name,
+ pr_err("Error: (%pOFn) Invalid irq trigger specification: %x\n",
+ node,
trigger);
type = IRQ_TYPE_LEVEL_LOW;
break;
parent_irq = irq_of_parse_and_map(ciu_node, 0);
if (!parent_irq) {
- pr_err("ERROR: Couldn't acquire parent_irq for %s\n",
- ciu_node->name);
+ pr_err("ERROR: Couldn't acquire parent_irq for %pOFn\n",
+ ciu_node);
return -EINVAL;
}
addr = of_get_address(ciu_node, 0, NULL, NULL);
if (!addr) {
- pr_err("ERROR: Couldn't acquire reg(0) %s\n", ciu_node->name);
+ pr_err("ERROR: Couldn't acquire reg(0) %pOFn\n", ciu_node);
return -EINVAL;
}
host_data->raw_reg = (u64)phys_to_virt(
addr = of_get_address(ciu_node, 1, NULL, NULL);
if (!addr) {
- pr_err("ERROR: Couldn't acquire reg(1) %s\n", ciu_node->name);
+ pr_err("ERROR: Couldn't acquire reg(1) %pOFn\n", ciu_node);
return -EINVAL;
}
host_data->en_reg = (u64)phys_to_virt(
r = of_property_read_u32(ciu_node, "cavium,max-bits", &val);
if (r) {
- pr_err("ERROR: Couldn't read cavium,max-bits from %s\n",
- ciu_node->name);
+ pr_err("ERROR: Couldn't read cavium,max-bits from %pOFn\n",
+ ciu_node);
return r;
}
host_data->max_bits = val;
bool do_prune;
bool fill_mac;
-#ifdef CONFIG_MIPS_ELF_APPENDED_DTB
- if (!fdt_check_header(&__appended_dtb)) {
- fdt = &__appended_dtb;
+ if (fw_passed_dtb) {
+ fdt = (void *)fw_passed_dtb;
do_prune = false;
fill_mac = true;
pr_info("Using appended Device Tree.\n");
- } else
-#endif
- if (octeon_bootinfo->minor_version >= 3 && octeon_bootinfo->fdt_addr) {
+ } else if (octeon_bootinfo->minor_version >= 3 && octeon_bootinfo->fdt_addr) {
fdt = phys_to_virt(octeon_bootinfo->fdt_addr);
if (fdt_check_header(fdt))
panic("Corrupt Device Tree passed to kernel.");
#include <linux/sched/task_stack.h>
#include <linux/init.h>
#include <linux/export.h>
+#include <linux/kexec.h>
#include <asm/mmu_context.h>
#include <asm/time.h>
.cpu_disable = octeon_cpu_disable,
.cpu_die = octeon_cpu_die,
#endif
+#ifdef CONFIG_KEXEC
+ .kexec_nonboot_cpu = kexec_nonboot_cpu_jump,
+#endif
};
static irqreturn_t octeon_78xx_reched_interrupt(int irq, void *dev_id)
.cpu_disable = octeon_cpu_disable,
.cpu_die = octeon_cpu_die,
#endif
+#ifdef CONFIG_KEXEC
+ .kexec_nonboot_cpu = kexec_nonboot_cpu_jump,
+#endif
};
void __init octeon_setup_smp(void)
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_OF_PLATFORM=y
-CONFIG_GPIO_SYSFS=y
+CONFIG_NETDEVICES=y
+CONFIG_MSCC_OCELOT_SWITCH=y
+CONFIG_MSCC_OCELOT_SWITCH_OCELOT=y
+CONFIG_MDIO_MSCC_MIIM=y
+CONFIG_MICROSEMI_PHY=y
CONFIG_I2C=y
CONFIG_I2C_CHARDEV=y
CONFIG_I2C_MUX=y
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
CONFIG_SPI=y
CONFIG_SPI_BITBANG=y
CONFIG_SPI_DESIGNWARE=y
+CONFIG_SPI_DW_MMIO=y
CONFIG_SPI_SPIDEV=y
+CONFIG_GPIO_SYSFS=y
+
CONFIG_POWER_RESET=y
CONFIG_POWER_RESET_OCELOT_RESET=y
Enable this to include the FDT for the MIPSfpga platform
from Imagination Technologies in the FIT kernel image.
-config FIT_IMAGE_FDT_OCELOT_PCB123
- bool "Include FDT for Microsemi Ocelot PCB123"
+config FIT_IMAGE_FDT_OCELOT
+ bool "Include FDT for Microsemi Ocelot development platforms"
select MSCC_OCELOT
help
- Enable this to include the FDT for the Ocelot PCB123 platform
+ Enable this to include the FDT for the Ocelot development platforms
from Microsemi in the FIT kernel image.
This requires u-boot on the platform.
obj-$(CONFIG_YAMON_DT_SHIM) += yamon-dt.o
obj-$(CONFIG_LEGACY_BOARD_SEAD3) += board-sead3.o
obj-$(CONFIG_LEGACY_BOARD_OCELOT) += board-ocelot.o
-obj-$(CONFIG_KEXEC) += kexec.o
obj-$(CONFIG_VIRT_BOARD_RANCHU) += board-ranchu.o
its-y := vmlinux.its.S
its-$(CONFIG_FIT_IMAGE_FDT_BOSTON) += board-boston.its.S
its-$(CONFIG_FIT_IMAGE_FDT_NI169445) += board-ni169445.its.S
-its-$(CONFIG_FIT_IMAGE_FDT_OCELOT_PCB123) += board-ocelot_pcb123.its.S
+its-$(CONFIG_FIT_IMAGE_FDT_OCELOT) += board-ocelot.its.S
its-$(CONFIG_FIT_IMAGE_FDT_XILFPGA) += board-xilfpga.its.S
--- /dev/null
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/ {
+ images {
+ fdt@ocelot_pcb123 {
+ description = "MSCC Ocelot PCB123 Device Tree";
+ data = /incbin/("boot/dts/mscc/ocelot_pcb123.dtb");
+ type = "flat_dt";
+ arch = "mips";
+ compression = "none";
+ hash@0 {
+ algo = "sha1";
+ };
+ };
+
+ fdt@ocelot_pcb120 {
+ description = "MSCC Ocelot PCB120 Device Tree";
+ data = /incbin/("boot/dts/mscc/ocelot_pcb120.dtb");
+ type = "flat_dt";
+ arch = "mips";
+ compression = "none";
+ hash@0 {
+ algo = "sha1";
+ };
+ };
+ };
+
+ configurations {
+ conf@ocelot_pcb123 {
+ description = "Ocelot Linux kernel";
+ kernel = "kernel@0";
+ fdt = "fdt@ocelot_pcb123";
+ };
+
+ conf@ocelot_pcb120 {
+ description = "Ocelot Linux kernel";
+ kernel = "kernel@0";
+ fdt = "fdt@ocelot_pcb120";
+ };
+ };
+};
+++ /dev/null
-/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
-/ {
- images {
- fdt@ocelot_pcb123 {
- description = "MSCC Ocelot PCB123 Device Tree";
- data = /incbin/("boot/dts/mscc/ocelot_pcb123.dtb");
- type = "flat_dt";
- arch = "mips";
- compression = "none";
- hash@0 {
- algo = "sha1";
- };
- };
- };
-
- configurations {
- conf@ocelot_pcb123 {
- description = "Ocelot Linux kernel";
- kernel = "kernel@0";
- fdt = "fdt@ocelot_pcb123";
- };
- };
-};
+++ /dev/null
-/*
- * Copyright (C) 2016 Imagination Technologies
- * Author: Marcin Nowakowski <marcin.nowakowski@mips.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 2 of the License, or (at your
- * option) any later version.
- */
-
-#include <linux/kexec.h>
-#include <linux/libfdt.h>
-#include <linux/uaccess.h>
-
-static int generic_kexec_prepare(struct kimage *image)
-{
- int i;
-
- for (i = 0; i < image->nr_segments; i++) {
- struct fdt_header fdt;
-
- if (image->segment[i].memsz <= sizeof(fdt))
- continue;
-
- if (copy_from_user(&fdt, image->segment[i].buf, sizeof(fdt)))
- continue;
-
- if (fdt_check_header(&fdt))
- continue;
-
- kexec_args[0] = -2;
- kexec_args[1] = (unsigned long)
- phys_to_virt((unsigned long)image->segment[i].mem);
- break;
- }
- return 0;
-}
-
-static int __init register_generic_kexec(void)
-{
- _machine_kexec_prepare = generic_kexec_prepare;
- return 0;
-}
-arch_initcall(register_generic_kexec);
/* Kernel variants */
#define kernel_cache(op, base) "cache " op ", " base "\n"
+#define kernel_pref(hint, base) "pref " hint ", " base "\n"
#define kernel_ll(reg, addr) "ll " reg ", " addr "\n"
#define kernel_sc(reg, addr) "sc " reg ", " addr "\n"
#define kernel_lw(reg, addr) "lw " reg ", " addr "\n"
" .set pop\n"
#define user_cache(op, base) __BUILD_EVA_INSN("cachee", op, base)
+#define user_pref(hint, base) __BUILD_EVA_INSN("prefe", hint, base)
#define user_ll(reg, addr) __BUILD_EVA_INSN("lle", reg, addr)
#define user_sc(reg, addr) __BUILD_EVA_INSN("sce", reg, addr)
#define user_lw(reg, addr) __BUILD_EVA_INSN("lwe", reg, addr)
#else
#define user_cache(op, base) kernel_cache(op, base)
+#define user_pref(hint, base) kernel_pref(hint, base)
#define user_ll(reg, addr) kernel_ll(reg, addr)
#define user_sc(reg, addr) kernel_sc(reg, addr)
#define user_lw(reg, addr) kernel_lw(reg, addr)
#else /* __ASSEMBLY__ */
#define kernel_cache(op, base) cache op, base
+#define kernel_pref(hint, base) pref hint, base
#define kernel_ll(reg, addr) ll reg, addr
#define kernel_sc(reg, addr) sc reg, addr
#define kernel_lw(reg, addr) lw reg, addr
.set pop;
#define user_cache(op, base) __BUILD_EVA_INSN(cachee, op, base)
+#define user_pref(hint, base) __BUILD_EVA_INSN(prefe, hint, base)
#define user_ll(reg, addr) __BUILD_EVA_INSN(lle, reg, addr)
#define user_sc(reg, addr) __BUILD_EVA_INSN(sce, reg, addr)
#define user_lw(reg, addr) __BUILD_EVA_INSN(lwe, reg, addr)
#else
#define user_cache(op, base) kernel_cache(op, base)
+#define user_pref(hint, base) kernel_pref(hint, base)
#define user_ll(reg, addr) kernel_ll(reg, addr)
#define user_sc(reg, addr) kernel_sc(reg, addr)
#define user_lw(reg, addr) kernel_lw(reg, addr)
#include <asm/sgidefs.h>
#include <asm/asm-eva.h>
-#ifndef CAT
-#ifdef __STDC__
-#define __CAT(str1, str2) str1##str2
-#else
-#define __CAT(str1, str2) str1/**/str2
-#endif
-#define CAT(str1, str2) __CAT(str1, str2)
-#endif
-
-/*
- * PIC specific declarations
- * Not used for the kernel but here seems to be the right place.
- */
-#ifdef __PIC__
-#define CPRESTORE(register) \
- .cprestore register
-#define CPADD(register) \
- .cpadd register
-#define CPLOAD(register) \
- .cpload register
-#else
-#define CPRESTORE(register)
-#define CPADD(register)
-#define CPLOAD(register)
-#endif
-
/*
* LEAF - declare leaf routine
*/
8: .asciiz msg; \
.popsection;
-/*
- * Build text tables
- */
-#define TTABLE(string) \
- .pushsection .text; \
- .word 1f; \
- .popsection \
- .pushsection .data; \
-1: .asciiz string; \
- .popsection
-
-/*
- * MIPS IV pref instruction.
- * Use with .set noreorder only!
- *
- * MIPS IV implementations are free to treat this as a nop. The R5000
- * is one of them. So we should have an option not to use this instruction.
- */
-#ifdef CONFIG_CPU_HAS_PREFETCH
-
-#define PREF(hint,addr) \
- .set push; \
- .set arch=r5000; \
- pref hint, addr; \
- .set pop
-
-#define PREFE(hint, addr) \
- .set push; \
- .set mips0; \
- .set eva; \
- prefe hint, addr; \
- .set pop
-
-#define PREFX(hint,addr) \
- .set push; \
- .set arch=r5000; \
- prefx hint, addr; \
- .set pop
-
-#else /* !CONFIG_CPU_HAS_PREFETCH */
-
-#define PREF(hint, addr)
-#define PREFE(hint, addr)
-#define PREFX(hint, addr)
-
-#endif /* !CONFIG_CPU_HAS_PREFETCH */
-
-/*
- * MIPS ISA IV/V movn/movz instructions and equivalents for older CPUs.
- */
-#if (_MIPS_ISA == _MIPS_ISA_MIPS1)
-#define MOVN(rd, rs, rt) \
- .set push; \
- .set reorder; \
- beqz rt, 9f; \
- move rd, rs; \
- .set pop; \
-9:
-#define MOVZ(rd, rs, rt) \
- .set push; \
- .set reorder; \
- bnez rt, 9f; \
- move rd, rs; \
- .set pop; \
-9:
-#endif /* _MIPS_ISA == _MIPS_ISA_MIPS1 */
-#if (_MIPS_ISA == _MIPS_ISA_MIPS2) || (_MIPS_ISA == _MIPS_ISA_MIPS3)
-#define MOVN(rd, rs, rt) \
- .set push; \
- .set noreorder; \
- bnezl rt, 9f; \
- move rd, rs; \
- .set pop; \
-9:
-#define MOVZ(rd, rs, rt) \
- .set push; \
- .set noreorder; \
- beqzl rt, 9f; \
- move rd, rs; \
- .set pop; \
-9:
-#endif /* (_MIPS_ISA == _MIPS_ISA_MIPS2) || (_MIPS_ISA == _MIPS_ISA_MIPS3) */
-#if (_MIPS_ISA == _MIPS_ISA_MIPS4 ) || (_MIPS_ISA == _MIPS_ISA_MIPS5) || \
- (_MIPS_ISA == _MIPS_ISA_MIPS32) || (_MIPS_ISA == _MIPS_ISA_MIPS64)
-#define MOVN(rd, rs, rt) \
- movn rd, rs, rt
-#define MOVZ(rd, rs, rt) \
- movz rd, rs, rt
-#endif /* MIPS IV, MIPS V, MIPS32 or MIPS64 */
-
/*
* Stack alignment
*/
#include <linux/irqflags.h>
#include <asm/addrspace.h>
+#include <asm/barrier.h>
#include <asm/bug.h>
#include <asm/byteorder.h>
#include <asm/cpu.h>
#include <ioremap.h>
#include <mangle-port.h>
-/*
- * Slowdown I/O port space accesses for antique hardware.
- */
-#undef CONF_SLOWDOWN_IO
-
/*
* Raw operations are never swapped in software. OTOH values that raw
* operations are working on may or may not have been swapped by the bus
# define __raw_ioswabq(a, x) (x)
# define ____raw_ioswabq(a, x) (x)
+# define __relaxed_ioswabb ioswabb
+# define __relaxed_ioswabw ioswabw
+# define __relaxed_ioswabl ioswabl
+# define __relaxed_ioswabq ioswabq
+
/* ioswab[bwlq], __mem_ioswab[bwlq] are defined in mangle-port.h */
#define IO_SPACE_LIMIT 0xffff
}
/*
- * Thanks to James van Artsdalen for a better timing-fix than
- * the two short jumps: using outb's to a nonexistent port seems
- * to guarantee better timings even on fast machines.
- *
- * On the other hand, I'd like to be sure of a non-existent port:
- * I feel a bit unsafe about using 0x80 (should be safe, though)
- *
- * Linus
- *
+ * Provide the necessary definitions for generic iomap. We make use of
+ * mips_io_port_base for iomap(), but we don't reserve any low addresses for
+ * use with I/O ports.
*/
-#define __SLOW_DOWN_IO \
- __asm__ __volatile__( \
- "sb\t$0,0x80(%0)" \
- : : "r" (mips_io_port_base));
+#define HAVE_ARCH_PIO_SIZE
+#define PIO_OFFSET mips_io_port_base
+#define PIO_MASK IO_SPACE_LIMIT
+#define PIO_RESERVED 0x0UL
-#ifdef CONF_SLOWDOWN_IO
-#ifdef REALLY_SLOW_IO
-#define SLOW_DOWN_IO { __SLOW_DOWN_IO; __SLOW_DOWN_IO; __SLOW_DOWN_IO; __SLOW_DOWN_IO; }
-#else
-#define SLOW_DOWN_IO __SLOW_DOWN_IO
-#endif
-#else
-#define SLOW_DOWN_IO
-#endif
+/*
+ * Enforce in-order execution of data I/O. In the MIPS architecture
+ * these are equivalent to corresponding platform-specific memory
+ * barriers defined in <asm/barrier.h>. API pinched from PowerPC,
+ * with sync additionally defined.
+ */
+#define iobarrier_rw() mb()
+#define iobarrier_r() rmb()
+#define iobarrier_w() wmb()
+#define iobarrier_sync() iob()
+
+/* Some callers use this older API instead. */
+#define mmiowb() iobarrier_w()
/*
* virt_to_phys - map virtual addresses to physical
extern void __iomem * __ioremap(phys_addr_t offset, phys_addr_t size, unsigned long flags);
extern void __iounmap(const volatile void __iomem *addr);
-#ifndef CONFIG_PCI
-struct pci_dev;
-static inline void pci_iounmap(struct pci_dev *dev, void __iomem *addr) {}
-#endif
-
static inline void __iomem * __ioremap_mode(phys_addr_t offset, unsigned long size,
unsigned long flags)
{
#undef __IS_KSEG1
}
-#if defined(CONFIG_CPU_CAVIUM_OCTEON) || defined(CONFIG_LOONGSON3_ENHANCEMENT)
+#if defined(CONFIG_CPU_CAVIUM_OCTEON) || defined(CONFIG_CPU_LOONGSON3)
#define war_io_reorder_wmb() wmb()
#else
#define war_io_reorder_wmb() barrier()
#endif
-#define __BUILD_MEMORY_SINGLE(pfx, bwlq, type, irq) \
+#define __BUILD_MEMORY_SINGLE(pfx, bwlq, type, barrier, relax, irq) \
\
static inline void pfx##write##bwlq(type val, \
volatile void __iomem *mem) \
volatile type *__mem; \
type __val; \
\
- war_io_reorder_wmb(); \
+ if (barrier) \
+ iobarrier_rw(); \
+ else \
+ war_io_reorder_wmb(); \
\
__mem = (void *)__swizzle_addr_##bwlq((unsigned long)(mem)); \
\
\
__mem = (void *)__swizzle_addr_##bwlq((unsigned long)(mem)); \
\
+ if (barrier) \
+ iobarrier_rw(); \
+ \
if (sizeof(type) != sizeof(u64) || sizeof(u64) == sizeof(long)) \
__val = *__mem; \
else if (cpu_has_64bits) { \
} \
\
/* prevent prefetching of coherent DMA data prematurely */ \
- rmb(); \
+ if (!relax) \
+ rmb(); \
return pfx##ioswab##bwlq(__mem, __val); \
}
-#define __BUILD_IOPORT_SINGLE(pfx, bwlq, type, p, slow) \
+#define __BUILD_IOPORT_SINGLE(pfx, bwlq, type, barrier, relax, p) \
\
static inline void pfx##out##bwlq##p(type val, unsigned long port) \
{ \
volatile type *__addr; \
type __val; \
\
- war_io_reorder_wmb(); \
+ if (barrier) \
+ iobarrier_rw(); \
+ else \
+ war_io_reorder_wmb(); \
\
__addr = (void *)__swizzle_addr_##bwlq(mips_io_port_base + port); \
\
BUILD_BUG_ON(sizeof(type) > sizeof(unsigned long)); \
\
*__addr = __val; \
- slow; \
} \
\
static inline type pfx##in##bwlq##p(unsigned long port) \
\
BUILD_BUG_ON(sizeof(type) > sizeof(unsigned long)); \
\
+ if (barrier) \
+ iobarrier_rw(); \
+ \
__val = *__addr; \
- slow; \
\
/* prevent prefetching of coherent DMA data prematurely */ \
- rmb(); \
+ if (!relax) \
+ rmb(); \
return pfx##ioswab##bwlq(__addr, __val); \
}
-#define __BUILD_MEMORY_PFX(bus, bwlq, type) \
+#define __BUILD_MEMORY_PFX(bus, bwlq, type, relax) \
\
-__BUILD_MEMORY_SINGLE(bus, bwlq, type, 1)
+__BUILD_MEMORY_SINGLE(bus, bwlq, type, 1, relax, 1)
#define BUILDIO_MEM(bwlq, type) \
\
-__BUILD_MEMORY_PFX(__raw_, bwlq, type) \
-__BUILD_MEMORY_PFX(, bwlq, type) \
-__BUILD_MEMORY_PFX(__mem_, bwlq, type) \
+__BUILD_MEMORY_PFX(__raw_, bwlq, type, 0) \
+__BUILD_MEMORY_PFX(__relaxed_, bwlq, type, 1) \
+__BUILD_MEMORY_PFX(__mem_, bwlq, type, 0) \
+__BUILD_MEMORY_PFX(, bwlq, type, 0)
BUILDIO_MEM(b, u8)
BUILDIO_MEM(w, u16)
BUILDIO_MEM(q, u64)
#define __BUILD_IOPORT_PFX(bus, bwlq, type) \
- __BUILD_IOPORT_SINGLE(bus, bwlq, type, ,) \
- __BUILD_IOPORT_SINGLE(bus, bwlq, type, _p, SLOW_DOWN_IO)
+ __BUILD_IOPORT_SINGLE(bus, bwlq, type, 1, 0,) \
+ __BUILD_IOPORT_SINGLE(bus, bwlq, type, 1, 0, _p)
#define BUILDIO_IOPORT(bwlq, type) \
__BUILD_IOPORT_PFX(, bwlq, type) \
#define __BUILDIO(bwlq, type) \
\
-__BUILD_MEMORY_SINGLE(____raw_, bwlq, type, 0)
+__BUILD_MEMORY_SINGLE(____raw_, bwlq, type, 1, 0, 0)
__BUILDIO(q, u64)
-#define readb_relaxed readb
-#define readw_relaxed readw
-#define readl_relaxed readl
-#define readq_relaxed readq
+#define readb_relaxed __relaxed_readb
+#define readw_relaxed __relaxed_readw
+#define readl_relaxed __relaxed_readl
+#define readq_relaxed __relaxed_readq
-#define writeb_relaxed writeb
-#define writew_relaxed writew
-#define writel_relaxed writel
-#define writeq_relaxed writeq
+#define writeb_relaxed __relaxed_writeb
+#define writew_relaxed __relaxed_writew
+#define writel_relaxed __relaxed_writel
+#define writeq_relaxed __relaxed_writeq
#define readb_be(addr) \
__raw_readb((__force unsigned *)(addr))
BUILDSTRING(q, u64)
#endif
-
-#ifdef CONFIG_CPU_CAVIUM_OCTEON
-#define mmiowb() wmb()
-#else
-/* Depends on MIPS II instruction set */
-#define mmiowb() asm volatile ("sync" ::: "memory")
-#endif
-
static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count)
{
memset((void __force *) addr, val, count);
#include <asm/stacktrace.h>
/* Maximum physical address we can use pages from */
-#define KEXEC_SOURCE_MEMORY_LIMIT (0x20000000)
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
/* Maximum address we can reach in physical address mode */
-#define KEXEC_DESTINATION_MEMORY_LIMIT (0x20000000)
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
/* Maximum address we can use for the control code buffer */
-#define KEXEC_CONTROL_MEMORY_LIMIT (0x20000000)
+#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
/* Reserve 3*4096 bytes for board-specific info */
#define KEXEC_CONTROL_PAGE_SIZE (4096 + 3*4096)
extern int (*_machine_kexec_prepare)(struct kimage *);
extern void (*_machine_kexec_shutdown)(void);
extern void (*_machine_crash_shutdown)(struct pt_regs *regs);
-extern void default_machine_crash_shutdown(struct pt_regs *regs);
+void default_machine_crash_shutdown(struct pt_regs *regs);
+void kexec_nonboot_cpu_jump(void);
+void kexec_reboot(void);
#ifdef CONFIG_SMP
extern const unsigned char kexec_smp_wait[];
extern unsigned long secondary_kexec_args[4];
-extern void (*relocated_kexec_smp_wait) (void *);
extern atomic_t kexec_ready_to_reboot;
extern void (*_crash_smp_send_stop)(void);
#endif
#define MIPS_CPU_IRQ_BASE 56
#define LOONGSON_UART_IRQ (MIPS_CPU_IRQ_BASE + 2) /* UART */
-#define LOONGSON_HT1_IRQ (MIPS_CPU_IRQ_BASE + 3) /* HT1 */
+#define LOONGSON_BRIDGE_IRQ (MIPS_CPU_IRQ_BASE + 3) /* CASCADE */
#define LOONGSON_TIMER_IRQ (MIPS_CPU_IRQ_BASE + 7) /* CPU Timer */
#define LOONGSON_HT1_CFG_BASE loongson_sysconf.ht_control_base
#ifndef __ASM_MACH_LOONGSON64_KERNEL_ENTRY_H
#define __ASM_MACH_LOONGSON64_KERNEL_ENTRY_H
+#include <asm/cpu.h>
+
/*
* Override macros used in arch/mips/kernel/head.S.
*/
mfc0 t0, CP0_PAGEGRAIN
or t0, (0x1 << 29)
mtc0 t0, CP0_PAGEGRAIN
-#ifdef CONFIG_LOONGSON3_ENHANCEMENT
/* Enable STFill Buffer */
+ mfc0 t0, CP0_PRID
+ andi t0, (PRID_IMP_MASK | PRID_REV_MASK)
+ slti t0, (PRID_IMP_LOONGSON_64 | PRID_REV_LOONGSON3A_R2)
+ bnez t0, 1f
mfc0 t0, CP0_CONFIG6
or t0, 0x100
mtc0 t0, CP0_CONFIG6
-#endif
+1:
_ehb
.set pop
#endif
mfc0 t0, CP0_PAGEGRAIN
or t0, (0x1 << 29)
mtc0 t0, CP0_PAGEGRAIN
-#ifdef CONFIG_LOONGSON3_ENHANCEMENT
/* Enable STFill Buffer */
+ mfc0 t0, CP0_PRID
+ andi t0, (PRID_IMP_MASK | PRID_REV_MASK)
+ slti t0, (PRID_IMP_LOONGSON_64 | PRID_REV_LOONGSON3A_R2)
+ bnez t0, 1f
mfc0 t0, CP0_CONFIG6
or t0, 0x100
mtc0 t0, CP0_CONFIG6
-#endif
+1:
_ehb
.set pop
#endif
_write_32bit_cp1_register(dest, val, )
#endif
-#ifdef HAVE_AS_DSP
+#ifdef TOOLCHAIN_SUPPORTS_DSP
#define rddsp(mask) \
({ \
unsigned int __dspctl; \
\
__asm__ __volatile__( \
" .set push \n" \
+ " .set " MIPS_ISA_LEVEL " \n" \
" .set dsp \n" \
" rddsp %0, %x1 \n" \
" .set pop \n" \
do { \
__asm__ __volatile__( \
" .set push \n" \
+ " .set " MIPS_ISA_LEVEL " \n" \
" .set dsp \n" \
" wrdsp %0, %x1 \n" \
" .set pop \n" \
long mflo0; \
__asm__( \
" .set push \n" \
+ " .set " MIPS_ISA_LEVEL " \n" \
" .set dsp \n" \
" mflo %0, $ac0 \n" \
" .set pop \n" \
long mflo1; \
__asm__( \
" .set push \n" \
+ " .set " MIPS_ISA_LEVEL " \n" \
" .set dsp \n" \
" mflo %0, $ac1 \n" \
" .set pop \n" \
long mflo2; \
__asm__( \
" .set push \n" \
+ " .set " MIPS_ISA_LEVEL " \n" \
" .set dsp \n" \
" mflo %0, $ac2 \n" \
" .set pop \n" \
long mflo3; \
__asm__( \
" .set push \n" \
+ " .set " MIPS_ISA_LEVEL " \n" \
" .set dsp \n" \
" mflo %0, $ac3 \n" \
" .set pop \n" \
long mfhi0; \
__asm__( \
" .set push \n" \
+ " .set " MIPS_ISA_LEVEL " \n" \
" .set dsp \n" \
" mfhi %0, $ac0 \n" \
" .set pop \n" \
long mfhi1; \
__asm__( \
" .set push \n" \
+ " .set " MIPS_ISA_LEVEL " \n" \
" .set dsp \n" \
" mfhi %0, $ac1 \n" \
" .set pop \n" \
long mfhi2; \
__asm__( \
" .set push \n" \
+ " .set " MIPS_ISA_LEVEL " \n" \
" .set dsp \n" \
" mfhi %0, $ac2 \n" \
" .set pop \n" \
long mfhi3; \
__asm__( \
" .set push \n" \
+ " .set " MIPS_ISA_LEVEL " \n" \
" .set dsp \n" \
" mfhi %0, $ac3 \n" \
" .set pop \n" \
({ \
__asm__( \
" .set push \n" \
+ " .set " MIPS_ISA_LEVEL " \n" \
" .set dsp \n" \
" mtlo %0, $ac0 \n" \
" .set pop \n" \
({ \
__asm__( \
" .set push \n" \
+ " .set " MIPS_ISA_LEVEL " \n" \
" .set dsp \n" \
" mtlo %0, $ac1 \n" \
" .set pop \n" \
({ \
__asm__( \
" .set push \n" \
+ " .set " MIPS_ISA_LEVEL " \n" \
" .set dsp \n" \
" mtlo %0, $ac2 \n" \
" .set pop \n" \
({ \
__asm__( \
" .set push \n" \
+ " .set " MIPS_ISA_LEVEL " \n" \
" .set dsp \n" \
" mtlo %0, $ac3 \n" \
" .set pop \n" \
({ \
__asm__( \
" .set push \n" \
+ " .set " MIPS_ISA_LEVEL " \n" \
" .set dsp \n" \
" mthi %0, $ac0 \n" \
" .set pop \n" \
({ \
__asm__( \
" .set push \n" \
+ " .set " MIPS_ISA_LEVEL " \n" \
" .set dsp \n" \
" mthi %0, $ac1 \n" \
" .set pop \n" \
({ \
__asm__( \
" .set push \n" \
+ " .set " MIPS_ISA_LEVEL " \n" \
" .set dsp \n" \
" mthi %0, $ac2 \n" \
" .set pop \n" \
({ \
__asm__( \
" .set push \n" \
+ " .set " MIPS_ISA_LEVEL " \n" \
" .set dsp \n" \
" mthi %0, $ac3 \n" \
" .set pop \n" \
#endif
-#define VDSO_RANDOMIZE_SIZE (TASK_IS_32BIT_ADDR ? SZ_1M : SZ_256M)
+#define VDSO_RANDOMIZE_SIZE (TASK_IS_32BIT_ADDR ? SZ_1M : SZ_64M)
extern unsigned long mips_stack_top(void);
#define STACK_TOP mips_stack_top()
: \
: "i" (op), "R" (*(unsigned char *)(addr)))
-#ifdef CONFIG_MIPS_MT
-
-#define __iflush_prologue \
- unsigned long redundance; \
- extern int mt_n_iflushes; \
- for (redundance = 0; redundance < mt_n_iflushes; redundance++) {
-
-#define __iflush_epilogue \
- }
-
-#define __dflush_prologue \
- unsigned long redundance; \
- extern int mt_n_dflushes; \
- for (redundance = 0; redundance < mt_n_dflushes; redundance++) {
-
-#define __dflush_epilogue \
- }
-
-#define __inv_dflush_prologue __dflush_prologue
-#define __inv_dflush_epilogue __dflush_epilogue
-#define __sflush_prologue {
-#define __sflush_epilogue }
-#define __inv_sflush_prologue __sflush_prologue
-#define __inv_sflush_epilogue __sflush_epilogue
-
-#else /* CONFIG_MIPS_MT */
-
-#define __iflush_prologue {
-#define __iflush_epilogue }
-#define __dflush_prologue {
-#define __dflush_epilogue }
-#define __inv_dflush_prologue {
-#define __inv_dflush_epilogue }
-#define __sflush_prologue {
-#define __sflush_epilogue }
-#define __inv_sflush_prologue {
-#define __inv_sflush_epilogue }
-
-#endif /* CONFIG_MIPS_MT */
-
static inline void flush_icache_line_indexed(unsigned long addr)
{
- __iflush_prologue
cache_op(Index_Invalidate_I, addr);
- __iflush_epilogue
}
static inline void flush_dcache_line_indexed(unsigned long addr)
{
- __dflush_prologue
cache_op(Index_Writeback_Inv_D, addr);
- __dflush_epilogue
}
static inline void flush_scache_line_indexed(unsigned long addr)
static inline void flush_icache_line(unsigned long addr)
{
- __iflush_prologue
switch (boot_cpu_type()) {
case CPU_LOONGSON2:
cache_op(Hit_Invalidate_I_Loongson2, addr);
cache_op(Hit_Invalidate_I, addr);
break;
}
- __iflush_epilogue
}
static inline void flush_dcache_line(unsigned long addr)
{
- __dflush_prologue
cache_op(Hit_Writeback_Inv_D, addr);
- __dflush_epilogue
}
static inline void invalidate_dcache_line(unsigned long addr)
{
- __dflush_prologue
cache_op(Hit_Invalidate_D, addr);
- __dflush_epilogue
}
static inline void invalidate_scache_line(unsigned long addr)
current_cpu_data.desc.waybit; \
unsigned long ws, addr; \
\
- __##pfx##flush_prologue \
- \
for (ws = 0; ws < ws_end; ws += ws_inc) \
for (addr = start; addr < end; addr += lsize * 32) \
cache##lsize##_unroll32(addr|ws, indexop); \
- \
- __##pfx##flush_epilogue \
} \
\
static inline void extra##blast_##pfx##cache##lsize##_page(unsigned long page) \
unsigned long start = page; \
unsigned long end = page + PAGE_SIZE; \
\
- __##pfx##flush_prologue \
- \
do { \
cache##lsize##_unroll32(start, hitop); \
start += lsize * 32; \
} while (start < end); \
- \
- __##pfx##flush_epilogue \
} \
\
static inline void extra##blast_##pfx##cache##lsize##_page_indexed(unsigned long page) \
current_cpu_data.desc.waybit; \
unsigned long ws, addr; \
\
- __##pfx##flush_prologue \
- \
for (ws = 0; ws < ws_end; ws += ws_inc) \
for (addr = start; addr < end; addr += lsize * 32) \
cache##lsize##_unroll32(addr|ws, indexop); \
- \
- __##pfx##flush_epilogue \
}
__BUILD_BLAST_CACHE(d, dcache, Index_Writeback_Inv_D, Hit_Writeback_Inv_D, 16, )
unsigned long start = page; \
unsigned long end = page + PAGE_SIZE; \
\
- __##pfx##flush_prologue \
- \
do { \
cache##lsize##_unroll32_user(start, hitop); \
start += lsize * 32; \
} while (start < end); \
- \
- __##pfx##flush_epilogue \
}
__BUILD_BLAST_USER_CACHE(d, dcache, Index_Writeback_Inv_D, Hit_Writeback_Inv_D,
unsigned long addr = start & ~(lsize - 1); \
unsigned long aend = (end - 1) & ~(lsize - 1); \
\
- __##pfx##flush_prologue \
- \
while (1) { \
prot##cache_op(hitop, addr); \
if (addr == aend) \
break; \
addr += lsize; \
} \
- \
- __##pfx##flush_epilogue \
}
#ifndef CONFIG_EVA
unsigned long addr = start & ~(lsize - 1); \
unsigned long aend = (end - 1) & ~(lsize - 1); \
\
- __##pfx##flush_prologue \
- \
if (!uaccess_kernel()) { \
while (1) { \
protected_cachee_op(hitop, addr); \
} \
\
} \
- __##pfx##flush_epilogue \
}
__BUILD_PROT_BLAST_CACHE_RANGE(d, dcache, Hit_Writeback_Inv_D)
int (*cpu_disable)(void);
void (*cpu_die)(unsigned int cpu);
#endif
+#ifdef CONFIG_KEXEC
+ void (*kexec_nonboot_cpu)(void);
+#endif
};
extern void register_smp_ops(const struct plat_smp_ops *ops);
extern void play_dead(void);
#endif
+#ifdef CONFIG_KEXEC
+static inline void kexec_nonboot_cpu(void)
+{
+ extern const struct plat_smp_ops *mp_ops; /* private */
+
+ return mp_ops->kexec_nonboot_cpu();
+}
+
+static inline void *kexec_nonboot_cpu_func(void)
+{
+ extern const struct plat_smp_ops *mp_ops; /* private */
+
+ return mp_ops->kexec_nonboot_cpu;
+}
+#endif
+
/*
* This function will set up the necessary IPIs for Linux to communicate
* with the CPUs in mask.
obj-$(CONFIG_CPU_PM) += pm.o
obj-$(CONFIG_MIPS_CPS_PM) += pm-cps.o
-#
-# DSP ASE supported for MIPS32 or MIPS64 Release 2 cores only. It is not
-# safe to unconditionnaly use the assembler -mdsp / -mdspr2 switches
-# here because the compiler may use DSP ASE instructions (such as lwx) in
-# code paths where we cannot check that the CPU we are running on supports it.
-# Proper abstraction using HAVE_AS_DSP and macros is done in
-# arch/mips/include/asm/mipsregs.h.
-#
-ifeq ($(CONFIG_CPU_MIPSR2), y)
-CFLAGS_DSP = -DHAVE_AS_DSP
-
-CFLAGS_signal.o = $(CFLAGS_DSP)
-CFLAGS_signal32.o = $(CFLAGS_DSP)
-CFLAGS_process.o = $(CFLAGS_DSP)
-CFLAGS_branch.o = $(CFLAGS_DSP)
-CFLAGS_ptrace.o = $(CFLAGS_DSP)
-endif
-
CPPFLAGS_vmlinux.lds := $(KBUILD_CFLAGS)
if (!cpu_online(cpu))
return;
+ /* We won't be sent IPIs any more. */
+ set_cpu_online(cpu, false);
+
local_irq_disable();
if (!cpumask_test_cpu(cpu, &cpus_in_crash))
crash_save_cpu(regs, cpu);
while (!atomic_read(&kexec_ready_to_reboot))
cpu_relax();
- relocated_kexec_smp_wait(NULL);
+
+ kexec_reboot();
+
/* NOTREACHED */
}
*/
FEXPORT(__kernel_entry)
j kernel_entry
-#endif
+#endif /* CONFIG_BOOT_RAW */
__REF
0:
#ifdef CONFIG_USE_OF
-#ifdef CONFIG_MIPS_RAW_APPENDED_DTB
+#if defined(CONFIG_MIPS_RAW_APPENDED_DTB) || \
+ defined(CONFIG_MIPS_ELF_APPENDED_DTB)
+
PTR_LA t2, __appended_dtb
#ifdef CONFIG_CPU_BIG_ENDIAN
li t1, 0xd00dfeed
-#else
+#else /* !CONFIG_CPU_BIG_ENDIAN */
li t1, 0xedfe0dd0
-#endif
+#endif /* !CONFIG_CPU_BIG_ENDIAN */
lw t0, (t2)
beq t0, t1, dtb_found
-#endif
+#endif /* CONFIG_MIPS_RAW_APPENDED_DTB || CONFIG_MIPS_ELF_APPENDED_DTB */
li t1, -2
move t2, a1
beq a0, t1, dtb_found
li t2, 0
dtb_found:
-#endif
+#endif /* CONFIG_USE_OF */
PTR_LA t0, __bss_start # clear .bss
LONG_S zero, (t0)
PTR_LA t1, __bss_stop - LONGSIZE
* newly sync'd icache.
*/
jr.hb v0
-#else
+#else /* !CONFIG_RELOCATABLE */
j start_kernel
-#endif
+#endif /* !CONFIG_RELOCATABLE */
END(kernel_entry)
#ifdef CONFIG_SMP
#include <linux/kexec.h>
#include <linux/mm.h>
#include <linux/delay.h>
+#include <linux/libfdt.h>
#include <asm/cacheflush.h>
#include <asm/page.h>
extern unsigned long kexec_start_address;
extern unsigned long kexec_indirection_page;
-int (*_machine_kexec_prepare)(struct kimage *) = NULL;
-void (*_machine_kexec_shutdown)(void) = NULL;
-void (*_machine_crash_shutdown)(struct pt_regs *regs) = NULL;
+static unsigned long reboot_code_buffer;
+
#ifdef CONFIG_SMP
-void (*relocated_kexec_smp_wait) (void *);
+static void (*relocated_kexec_smp_wait)(void *);
+
atomic_t kexec_ready_to_reboot = ATOMIC_INIT(0);
void (*_crash_smp_send_stop)(void) = NULL;
#endif
+void (*_machine_kexec_shutdown)(void) = NULL;
+void (*_machine_crash_shutdown)(struct pt_regs *regs) = NULL;
+
static void kexec_image_info(const struct kimage *kimage)
{
unsigned long i;
}
}
+#ifdef CONFIG_UHI_BOOT
+
+static int uhi_machine_kexec_prepare(struct kimage *kimage)
+{
+ int i;
+
+ /*
+ * In case DTB file is not passed to the new kernel, a flat device
+ * tree will be created by kexec tool. It holds modified command
+ * line for the new kernel.
+ */
+ for (i = 0; i < kimage->nr_segments; i++) {
+ struct fdt_header fdt;
+
+ if (kimage->segment[i].memsz <= sizeof(fdt))
+ continue;
+
+ if (copy_from_user(&fdt, kimage->segment[i].buf, sizeof(fdt)))
+ continue;
+
+ if (fdt_check_header(&fdt))
+ continue;
+
+ kexec_args[0] = -2;
+ kexec_args[1] = (unsigned long)
+ phys_to_virt((unsigned long)kimage->segment[i].mem);
+ break;
+ }
+
+ return 0;
+}
+
+int (*_machine_kexec_prepare)(struct kimage *) = uhi_machine_kexec_prepare;
+
+#else
+
+int (*_machine_kexec_prepare)(struct kimage *) = NULL;
+
+#endif /* CONFIG_UHI_BOOT */
+
int
machine_kexec_prepare(struct kimage *kimage)
{
+#ifdef CONFIG_SMP
+ if (!kexec_nonboot_cpu_func())
+ return -EINVAL;
+#endif
+
kexec_image_info(kimage);
if (_machine_kexec_prepare)
return _machine_kexec_prepare(kimage);
+
return 0;
}
{
}
+#ifdef CONFIG_SMP
+static void kexec_shutdown_secondary(void *param)
+{
+ int cpu = smp_processor_id();
+
+ if (!cpu_online(cpu))
+ return;
+
+ /* We won't be sent IPIs any more. */
+ set_cpu_online(cpu, false);
+
+ local_irq_disable();
+ while (!atomic_read(&kexec_ready_to_reboot))
+ cpu_relax();
+
+ kexec_reboot();
+
+ /* NOTREACHED */
+}
+#endif
+
void
machine_shutdown(void)
{
if (_machine_kexec_shutdown)
_machine_kexec_shutdown();
+
+#ifdef CONFIG_SMP
+ smp_call_function(kexec_shutdown_secondary, NULL, 0);
+
+ while (num_online_cpus() > 1) {
+ cpu_relax();
+ mdelay(1);
+ }
+#endif
}
void
default_machine_crash_shutdown(regs);
}
-typedef void (*noretfun_t)(void) __noreturn;
+#ifdef CONFIG_SMP
+void kexec_nonboot_cpu_jump(void)
+{
+ local_flush_icache_range((unsigned long)relocated_kexec_smp_wait,
+ reboot_code_buffer + relocate_new_kernel_size);
+
+ relocated_kexec_smp_wait(NULL);
+}
+#endif
+
+void kexec_reboot(void)
+{
+ void (*do_kexec)(void) __noreturn;
+
+ /*
+ * We know we were online, and there will be no incoming IPIs at
+ * this point. Mark online again before rebooting so that the crash
+ * analysis tool will see us correctly.
+ */
+ set_cpu_online(smp_processor_id(), true);
+
+ /* Ensure remote CPUs observe that we're online before rebooting. */
+ smp_mb__after_atomic();
+
+#ifdef CONFIG_SMP
+ if (smp_processor_id() > 0) {
+ /*
+ * Instead of cpu_relax() or wait, this is needed for kexec
+ * smp reboot. Kdump usually doesn't require an smp new
+ * kernel, but kexec may do.
+ */
+ kexec_nonboot_cpu();
+
+ /* NOTREACHED */
+ }
+#endif
+
+ /*
+ * Make sure we get correct instructions written by the
+ * machine_kexec() CPU.
+ */
+ local_flush_icache_range(reboot_code_buffer,
+ reboot_code_buffer + relocate_new_kernel_size);
+
+ do_kexec = (void *)reboot_code_buffer;
+ do_kexec();
+}
void
machine_kexec(struct kimage *image)
{
- unsigned long reboot_code_buffer;
unsigned long entry;
unsigned long *ptr;
*ptr = (unsigned long) phys_to_virt(*ptr);
}
+ /* Mark offline BEFORE disabling local irq. */
+ set_cpu_online(smp_processor_id(), false);
+
/*
* we do not want to be bothered.
*/
printk("Will call new kernel at %08lx\n", image->start);
printk("Bye ...\n");
+ /* Make reboot code buffer available to the boot CPU. */
__flush_cache_all();
#ifdef CONFIG_SMP
/* All secondary cpus now may jump to kexec_wait cycle */
smp_wmb();
atomic_set(&kexec_ready_to_reboot, 1);
#endif
- ((noretfun_t) reboot_code_buffer)();
+ kexec_reboot();
}
}
__setup("config7=", config7_set);
-/* Experimental cache flush control parameters that should go away some day */
-int mt_protiflush;
-int mt_protdflush;
-int mt_n_iflushes = 1;
-int mt_n_dflushes = 1;
-
-static int __init set_protiflush(char *s)
-{
- mt_protiflush = 1;
- return 1;
-}
-__setup("protiflush", set_protiflush);
-
-static int __init set_protdflush(char *s)
-{
- mt_protdflush = 1;
- return 1;
-}
-__setup("protdflush", set_protdflush);
-
-static int __init niflush(char *s)
-{
- get_option(&s, &mt_n_iflushes);
- return 1;
-}
-__setup("niflush=", niflush);
-
-static int __init ndflush(char *s)
-{
- get_option(&s, &mt_n_dflushes);
- return 1;
-}
-__setup("ndflush=", ndflush);
-
static unsigned int itc_base;
static int __init set_itc_base(char *str)
printk("Config7: 0x%08x\n", read_c0_config7());
}
- /* Report Cache management debug options */
- if (mt_protiflush)
- printk("I-cache flushes single-threaded\n");
- if (mt_protdflush)
- printk("D-cache flushes single-threaded\n");
- if (mt_n_iflushes != 1)
- printk("I-Cache Flushes Repeated %d times\n", mt_n_iflushes);
- if (mt_n_dflushes != 1)
- printk("D-Cache Flushes Repeated %d times\n", mt_n_dflushes);
-
if (itc_base != 0) {
/*
* Configure ITC mapping. This code is very
}
}
-/*
- * Function to protect cache flushes from concurrent execution
- * depends on MP software model chosen.
- */
-
-void mt_cflush_lockdown(void)
-{
- /* FILL IN VSMP and AP/SP VERSIONS HERE */
-}
-
-void mt_cflush_release(void)
-{
- /* FILL IN VSMP and AP/SP VERSIONS HERE */
-}
-
struct class *mt_class;
static int __init mt_init(void)
break;
type = (*r >> 24) & 0xff;
- loc_orig = (void *)(kbase_old + ((*r & 0x00ffffff) << 2));
+ loc_orig = kbase_old + ((*r & 0x00ffffff) << 2);
loc_new = RELOCATED(loc_orig);
if (reloc_handlers_rel[type] == NULL) {
maybe_bswap_initrd();
- reserve_bootmem(__pa(initrd_start), size, BOOTMEM_DEFAULT);
+ memblock_reserve(__pa(initrd_start), size);
initrd_below_start_ok = 1;
pr_info("Initial ramdisk at: 0x%lx (%lu bytes)\n",
#else /* !CONFIG_SGI_IP27 */
-static unsigned long __init bootmap_bytes(unsigned long pages)
-{
- unsigned long bytes = DIV_ROUND_UP(pages, 8);
-
- return ALIGN(bytes, sizeof(long));
-}
-
static void __init bootmem_init(void)
{
unsigned long reserved_end;
- unsigned long mapstart = ~0UL;
- unsigned long bootmap_size;
phys_addr_t ramstart = PHYS_ADDR_MAX;
- bool bootmap_valid = false;
int i;
/*
init_initrd();
reserved_end = (unsigned long) PFN_UP(__pa_symbol(&_end));
+ memblock_reserve(PHYS_OFFSET, reserved_end << PAGE_SHIFT);
+
/*
* max_low_pfn is not a number of pages. The number of pages
* of the system is given by 'max_low_pfn - min_low_pfn'.
if (initrd_end && end <= (unsigned long)PFN_UP(__pa(initrd_end)))
continue;
#endif
- if (start >= mapstart)
- continue;
- mapstart = max(reserved_end, start);
}
if (min_low_pfn >= max_low_pfn)
/*
* Reserve any memory between the start of RAM and PHYS_OFFSET
*/
- if (ramstart > PHYS_OFFSET)
+ if (ramstart > PHYS_OFFSET) {
add_memory_region(PHYS_OFFSET, ramstart - PHYS_OFFSET,
BOOT_MEM_RESERVED);
+ memblock_reserve(PHYS_OFFSET, ramstart - PHYS_OFFSET);
+ }
if (min_low_pfn > ARCH_PFN_OFFSET) {
pr_info("Wasting %lu bytes for tracking %lu unused pages\n",
max_low_pfn = PFN_DOWN(HIGHMEM_START);
}
-#ifdef CONFIG_BLK_DEV_INITRD
- /*
- * mapstart should be after initrd_end
- */
- if (initrd_end)
- mapstart = max(mapstart, (unsigned long)PFN_UP(__pa(initrd_end)));
-#endif
-
- /*
- * check that mapstart doesn't overlap with any of
- * memory regions that have been reserved through eg. DTB
- */
- bootmap_size = bootmap_bytes(max_low_pfn - min_low_pfn);
-
- bootmap_valid = memory_region_available(PFN_PHYS(mapstart),
- bootmap_size);
- for (i = 0; i < boot_mem_map.nr_map && !bootmap_valid; i++) {
- unsigned long mapstart_addr;
-
- switch (boot_mem_map.map[i].type) {
- case BOOT_MEM_RESERVED:
- mapstart_addr = PFN_ALIGN(boot_mem_map.map[i].addr +
- boot_mem_map.map[i].size);
- if (PHYS_PFN(mapstart_addr) < mapstart)
- break;
-
- bootmap_valid = memory_region_available(mapstart_addr,
- bootmap_size);
- if (bootmap_valid)
- mapstart = PHYS_PFN(mapstart_addr);
- break;
- default:
- break;
- }
- }
-
- if (!bootmap_valid)
- panic("No memory area to place a bootmap bitmap");
-
- /*
- * Initialize the boot-time allocator with low memory only.
- */
- if (bootmap_size != init_bootmem_node(NODE_DATA(0), mapstart,
- min_low_pfn, max_low_pfn))
- panic("Unexpected memory size required for bootmap");
-
for (i = 0; i < boot_mem_map.nr_map; i++) {
unsigned long start, end;
default:
/* Not usable memory */
if (start > min_low_pfn && end < max_low_pfn)
- reserve_bootmem(boot_mem_map.map[i].addr,
- boot_mem_map.map[i].size,
- BOOTMEM_DEFAULT);
+ memblock_reserve(boot_mem_map.map[i].addr,
+ boot_mem_map.map[i].size);
+
continue;
}
size = end - start;
/* Register lowmem ranges */
- free_bootmem(PFN_PHYS(start), size << PAGE_SHIFT);
memory_present(0, start, end);
}
- /*
- * Reserve the bootmap memory.
- */
- reserve_bootmem(PFN_PHYS(mapstart), bootmap_size, BOOTMEM_DEFAULT);
-
#ifdef CONFIG_RELOCATABLE
/*
* The kernel reserves all memory below its _end symbol as bootmem,
#endif /* CONFIG_SGI_IP27 */
-/*
- * arch_mem_init - initialize memory management subsystem
- *
- * o plat_mem_setup() detects the memory configuration and will record detected
- * memory areas using add_memory_region.
- *
- * At this stage the memory configuration of the system is known to the
- * kernel but generic memory management system is still entirely uninitialized.
- *
- * o bootmem_init()
- * o sparse_init()
- * o paging_init()
- * o dma_contiguous_reserve()
- *
- * At this stage the bootmem allocator is ready to use.
- *
- * NOTE: historically plat_mem_setup did the entire platform initialization.
- * This was rather impractical because it meant plat_mem_setup had to
- * get away without any kind of memory allocator. To keep old code from
- * breaking plat_setup was just renamed to plat_mem_setup and a second platform
- * initialization hook for anything else was introduced.
- */
-
static int usermem __initdata;
static int __init early_parse_mem(char *p)
#define BUILTIN_EXTEND_WITH_PROM \
IS_ENABLED(CONFIG_MIPS_CMDLINE_BUILTIN_EXTEND)
+/*
+ * arch_mem_init - initialize memory management subsystem
+ *
+ * o plat_mem_setup() detects the memory configuration and will record detected
+ * memory areas using add_memory_region.
+ *
+ * At this stage the memory configuration of the system is known to the
+ * kernel but generic memory management system is still entirely uninitialized.
+ *
+ * o bootmem_init()
+ * o sparse_init()
+ * o paging_init()
+ * o dma_contiguous_reserve()
+ *
+ * At this stage the bootmem allocator is ready to use.
+ *
+ * NOTE: historically plat_mem_setup did the entire platform initialization.
+ * This was rather impractical because it meant plat_mem_setup had to
+ * get away without any kind of memory allocator. To keep old code from
+ * breaking plat_setup was just renamed to plat_mem_setup and a second platform
+ * initialization hook for anything else was introduced.
+ */
static void __init arch_mem_init(char **cmdline_p)
{
struct memblock_region *reg;
early_init_fdt_scan_reserved_mem();
bootmem_init();
+
+ /*
+ * Prevent memblock from allocating high memory.
+ * This cannot be done before max_low_pfn is detected, so up
+ * to this point is possible to only reserve physical memory
+ * with memblock_reserve; memblock_virt_alloc* can be used
+ * only after this point
+ */
+ memblock_set_current_limit(PFN_PHYS(max_low_pfn));
+
#ifdef CONFIG_PROC_VMCORE
if (setup_elfcorehdr && setup_elfcorehdr_size) {
printk(KERN_INFO "kdump reserved memory at %lx-%lx\n",
setup_elfcorehdr, setup_elfcorehdr_size);
- reserve_bootmem(setup_elfcorehdr, setup_elfcorehdr_size,
- BOOTMEM_DEFAULT);
+ memblock_reserve(setup_elfcorehdr, setup_elfcorehdr_size);
}
#endif
mips_parse_crashkernel();
#ifdef CONFIG_KEXEC
if (crashk_res.start != crashk_res.end)
- reserve_bootmem(crashk_res.start,
- crashk_res.end - crashk_res.start + 1,
- BOOTMEM_DEFAULT);
+ memblock_reserve(crashk_res.start,
+ crashk_res.end - crashk_res.start + 1);
#endif
device_tree_init();
sparse_init();
/* Tell bootmem about cma reserved memblock section */
for_each_memblock(reserved, reg)
if (reg->size != 0)
- reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT);
+ memblock_reserve(reg->base, reg->size);
reserve_bootmem_region(__pa_symbol(&__nosave_begin),
__pa_symbol(&__nosave_end)); /* Reserve for hibernation */
#include <linux/linkage.h>
#include <linux/bug.h>
#include <linux/kernel.h>
+#include <linux/kexec.h>
#include <asm/time.h>
#include <asm/pgtable.h>
.cpu_disable = bmips_cpu_disable,
.cpu_die = bmips_cpu_die,
#endif
+#ifdef CONFIG_KEXEC
+ .kexec_nonboot_cpu = kexec_nonboot_cpu_jump,
+#endif
};
const struct plat_smp_ops bmips5000_smp_ops = {
.cpu_disable = bmips_cpu_disable,
.cpu_die = bmips_cpu_die,
#endif
+#ifdef CONFIG_KEXEC
+ .kexec_nonboot_cpu = kexec_nonboot_cpu_jump,
+#endif
};
#endif /* CONFIG_SMP */
local_irq_enable();
}
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_KEXEC)
+
+enum cpu_death {
+ CPU_DEATH_HALT,
+ CPU_DEATH_POWER,
+};
+
+static void cps_shutdown_this_cpu(enum cpu_death death)
+{
+ unsigned int cpu, core, vpe_id;
+
+ cpu = smp_processor_id();
+ core = cpu_core(&cpu_data[cpu]);
+
+ if (death == CPU_DEATH_HALT) {
+ vpe_id = cpu_vpe_id(&cpu_data[cpu]);
+
+ pr_debug("Halting core %d VP%d\n", core, vpe_id);
+ if (cpu_has_mipsmt) {
+ /* Halt this TC */
+ write_c0_tchalt(TCHALT_H);
+ instruction_hazard();
+ } else if (cpu_has_vp) {
+ write_cpc_cl_vp_stop(1 << vpe_id);
+
+ /* Ensure that the VP_STOP register is written */
+ wmb();
+ }
+ } else {
+ pr_debug("Gating power to core %d\n", core);
+ /* Power down the core */
+ cps_pm_enter_state(CPS_PM_POWER_GATED);
+ }
+}
+
+#ifdef CONFIG_KEXEC
+
+static void cps_kexec_nonboot_cpu(void)
+{
+ if (cpu_has_mipsmt || cpu_has_vp)
+ cps_shutdown_this_cpu(CPU_DEATH_HALT);
+ else
+ cps_shutdown_this_cpu(CPU_DEATH_POWER);
+}
+
+#endif /* CONFIG_KEXEC */
+
+#endif /* CONFIG_HOTPLUG_CPU || CONFIG_KEXEC */
+
#ifdef CONFIG_HOTPLUG_CPU
static int cps_cpu_disable(void)
}
static unsigned cpu_death_sibling;
-static enum {
- CPU_DEATH_HALT,
- CPU_DEATH_POWER,
-} cpu_death;
+static enum cpu_death cpu_death;
void play_dead(void)
{
- unsigned int cpu, core, vpe_id;
+ unsigned int cpu;
local_irq_disable();
idle_task_exit();
cpu = smp_processor_id();
- core = cpu_core(&cpu_data[cpu]);
cpu_death = CPU_DEATH_POWER;
pr_debug("CPU%d going offline\n", cpu);
/* This CPU has chosen its way out */
(void)cpu_report_death();
- if (cpu_death == CPU_DEATH_HALT) {
- vpe_id = cpu_vpe_id(&cpu_data[cpu]);
-
- pr_debug("Halting core %d VP%d\n", core, vpe_id);
- if (cpu_has_mipsmt) {
- /* Halt this TC */
- write_c0_tchalt(TCHALT_H);
- instruction_hazard();
- } else if (cpu_has_vp) {
- write_cpc_cl_vp_stop(1 << vpe_id);
-
- /* Ensure that the VP_STOP register is written */
- wmb();
- }
- } else {
- pr_debug("Gating power to core %d\n", core);
- /* Power down the core */
- cps_pm_enter_state(CPS_PM_POWER_GATED);
- }
+ cps_shutdown_this_cpu(cpu_death);
/* This should never be reached */
panic("Failed to offline CPU %u", cpu);
.cpu_disable = cps_cpu_disable,
.cpu_die = cps_cpu_die,
#endif
+#ifdef CONFIG_KEXEC
+ .kexec_nonboot_cpu = cps_kexec_nonboot_cpu,
+#endif
};
bool mips_cps_smp_in_use(void)
#include <linux/spinlock.h>
#include <linux/kallsyms.h>
#include <linux/bootmem.h>
+#include <linux/memblock.h>
#include <linux/interrupt.h>
#include <linux/ptrace.h>
#include <linux/kgdb.h>
*/
void show_regs(struct pt_regs *regs)
{
- __show_regs((struct pt_regs *)regs);
+ __show_regs(regs);
dump_stack();
}
unsigned long size = 0x200 + VECTORSPACING*64;
phys_addr_t ebase_pa;
+ memblock_set_bottom_up(true);
ebase = (unsigned long)
__alloc_bootmem(size, 1 << fls(size), 0);
+ memblock_set_bottom_up(false);
/*
* Try to ensure ebase resides in KSeg0 if possible.
: "r" (addr), "i" (-EFAULT)); \
} while(0)
-#ifndef CONFIG_CPU_MIPSR6
+#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
#define _LoadW(addr, value, res, type) \
do { \
__asm__ __volatile__ ( \
: "r" (addr), "i" (-EFAULT)); \
} while(0)
-#else
-/* MIPSR6 has no lwl instruction */
+#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
+/* For CPUs without lwl instruction */
#define _LoadW(addr, value, res, type) \
do { \
__asm__ __volatile__ ( \
: "r" (addr), "i" (-EFAULT)); \
} while(0)
-#endif /* CONFIG_CPU_MIPSR6 */
+#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
#define _LoadHWU(addr, value, res, type) \
do { \
: "r" (addr), "i" (-EFAULT)); \
} while(0)
-#ifndef CONFIG_CPU_MIPSR6
+#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
#define _LoadWU(addr, value, res, type) \
do { \
__asm__ __volatile__ ( \
: "r" (addr), "i" (-EFAULT)); \
} while(0)
-#else
-/* MIPSR6 has not lwl and ldl instructions */
+#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
+/* For CPUs without lwl and ldl instructions */
#define _LoadWU(addr, value, res, type) \
do { \
__asm__ __volatile__ ( \
: "r" (addr), "i" (-EFAULT)); \
} while(0)
-#endif /* CONFIG_CPU_MIPSR6 */
+#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
#define _StoreHW(addr, value, res, type) \
: "r" (value), "r" (addr), "i" (-EFAULT));\
} while(0)
-#ifndef CONFIG_CPU_MIPSR6
+#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
#define _StoreW(addr, value, res, type) \
do { \
__asm__ __volatile__ ( \
: "r" (value), "r" (addr), "i" (-EFAULT)); \
} while(0)
-#else
-/* MIPSR6 has no swl and sdl instructions */
+#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
#define _StoreW(addr, value, res, type) \
do { \
__asm__ __volatile__ ( \
: "memory"); \
} while(0)
-#endif /* CONFIG_CPU_MIPSR6 */
+#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
#else /* __BIG_ENDIAN */
: "r" (addr), "i" (-EFAULT)); \
} while(0)
-#ifndef CONFIG_CPU_MIPSR6
+#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
#define _LoadW(addr, value, res, type) \
do { \
__asm__ __volatile__ ( \
: "r" (addr), "i" (-EFAULT)); \
} while(0)
-#else
-/* MIPSR6 has no lwl instruction */
+#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
+/* For CPUs without lwl instruction */
#define _LoadW(addr, value, res, type) \
do { \
__asm__ __volatile__ ( \
: "r" (addr), "i" (-EFAULT)); \
} while(0)
-#endif /* CONFIG_CPU_MIPSR6 */
+#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
#define _LoadHWU(addr, value, res, type) \
: "r" (addr), "i" (-EFAULT)); \
} while(0)
-#ifndef CONFIG_CPU_MIPSR6
+#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
#define _LoadWU(addr, value, res, type) \
do { \
__asm__ __volatile__ ( \
: "r" (addr), "i" (-EFAULT)); \
} while(0)
-#else
-/* MIPSR6 has not lwl and ldl instructions */
+#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
+/* For CPUs without lwl and ldl instructions */
#define _LoadWU(addr, value, res, type) \
do { \
__asm__ __volatile__ ( \
: "=&r" (value), "=r" (res) \
: "r" (addr), "i" (-EFAULT)); \
} while(0)
-#endif /* CONFIG_CPU_MIPSR6 */
+#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
#define _StoreHW(addr, value, res, type) \
do { \
: "r" (value), "r" (addr), "i" (-EFAULT));\
} while(0)
-#ifndef CONFIG_CPU_MIPSR6
+#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
#define _StoreW(addr, value, res, type) \
do { \
__asm__ __volatile__ ( \
: "r" (value), "r" (addr), "i" (-EFAULT)); \
} while(0)
-#else
-/* MIPSR6 has no swl and sdl instructions */
+#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
+/* For CPUs without swl and sdl instructions */
#define _StoreW(addr, value, res, type) \
do { \
__asm__ __volatile__ ( \
: "memory"); \
} while(0)
-#endif /* CONFIG_CPU_MIPSR6 */
+#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
#endif
#define LoadHWU(addr, value, res) _LoadHWU(addr, value, res, kernel)
mips-atomic.o strncpy_user.o \
strnlen_user.o uncached.o
-obj-y += iomap.o iomap_copy.o
+obj-y += iomap_copy.o
obj-$(CONFIG_PCI) += iomap-pci.o
lib-$(CONFIG_GENERIC_CSUM) := $(filter-out csum_partial.o, $(lib-y))
}
#endif /* CONFIG_PCI_DRIVERS_LEGACY */
-
-void pci_iounmap(struct pci_dev *dev, void __iomem * addr)
-{
- iounmap(addr);
-}
-
-EXPORT_SYMBOL(pci_iounmap);
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Implement the default iomap interfaces
- *
- * (C) Copyright 2004 Linus Torvalds
- * (C) Copyright 2006 Ralf Baechle <ralf@linux-mips.org>
- * (C) Copyright 2007 MIPS Technologies, Inc.
- * written by Ralf Baechle <ralf@linux-mips.org>
- */
-#include <linux/export.h>
-#include <asm/io.h>
-
-/*
- * Read/write from/to an (offsettable) iomem cookie. It might be a PIO
- * access or a MMIO access, these functions don't care. The info is
- * encoded in the hardware mapping set up by the mapping functions
- * (or the cookie itself, depending on implementation and hw).
- *
- * The generic routines don't assume any hardware mappings, and just
- * encode the PIO/MMIO as part of the cookie. They coldly assume that
- * the MMIO IO mappings are not in the low address range.
- *
- * Architectures for which this is not true can't use this generic
- * implementation and should do their own copy.
- */
-
-#define PIO_MASK 0x0ffffUL
-
-unsigned int ioread8(void __iomem *addr)
-{
- return readb(addr);
-}
-
-EXPORT_SYMBOL(ioread8);
-
-unsigned int ioread16(void __iomem *addr)
-{
- return readw(addr);
-}
-
-EXPORT_SYMBOL(ioread16);
-
-unsigned int ioread16be(void __iomem *addr)
-{
- return be16_to_cpu(__raw_readw(addr));
-}
-
-EXPORT_SYMBOL(ioread16be);
-
-unsigned int ioread32(void __iomem *addr)
-{
- return readl(addr);
-}
-
-EXPORT_SYMBOL(ioread32);
-
-unsigned int ioread32be(void __iomem *addr)
-{
- return be32_to_cpu(__raw_readl(addr));
-}
-
-EXPORT_SYMBOL(ioread32be);
-
-void iowrite8(u8 val, void __iomem *addr)
-{
- writeb(val, addr);
-}
-
-EXPORT_SYMBOL(iowrite8);
-
-void iowrite16(u16 val, void __iomem *addr)
-{
- writew(val, addr);
-}
-
-EXPORT_SYMBOL(iowrite16);
-
-void iowrite16be(u16 val, void __iomem *addr)
-{
- __raw_writew(cpu_to_be16(val), addr);
-}
-
-EXPORT_SYMBOL(iowrite16be);
-
-void iowrite32(u32 val, void __iomem *addr)
-{
- writel(val, addr);
-}
-
-EXPORT_SYMBOL(iowrite32);
-
-void iowrite32be(u32 val, void __iomem *addr)
-{
- __raw_writel(cpu_to_be32(val), addr);
-}
-
-EXPORT_SYMBOL(iowrite32be);
-
-/*
- * These are the "repeat MMIO read/write" functions.
- * Note the "__mem" accesses, since we want to convert
- * to CPU byte order if the host bus happens to not match the
- * endianness of PCI/ISA (see mach-generic/mangle-port.h).
- */
-static inline void mmio_insb(void __iomem *addr, u8 *dst, int count)
-{
- while (--count >= 0) {
- u8 data = __mem_readb(addr);
- *dst = data;
- dst++;
- }
-}
-
-static inline void mmio_insw(void __iomem *addr, u16 *dst, int count)
-{
- while (--count >= 0) {
- u16 data = __mem_readw(addr);
- *dst = data;
- dst++;
- }
-}
-
-static inline void mmio_insl(void __iomem *addr, u32 *dst, int count)
-{
- while (--count >= 0) {
- u32 data = __mem_readl(addr);
- *dst = data;
- dst++;
- }
-}
-
-static inline void mmio_outsb(void __iomem *addr, const u8 *src, int count)
-{
- while (--count >= 0) {
- __mem_writeb(*src, addr);
- src++;
- }
-}
-
-static inline void mmio_outsw(void __iomem *addr, const u16 *src, int count)
-{
- while (--count >= 0) {
- __mem_writew(*src, addr);
- src++;
- }
-}
-
-static inline void mmio_outsl(void __iomem *addr, const u32 *src, int count)
-{
- while (--count >= 0) {
- __mem_writel(*src, addr);
- src++;
- }
-}
-
-void ioread8_rep(void __iomem *addr, void *dst, unsigned long count)
-{
- mmio_insb(addr, dst, count);
-}
-
-EXPORT_SYMBOL(ioread8_rep);
-
-void ioread16_rep(void __iomem *addr, void *dst, unsigned long count)
-{
- mmio_insw(addr, dst, count);
-}
-
-EXPORT_SYMBOL(ioread16_rep);
-
-void ioread32_rep(void __iomem *addr, void *dst, unsigned long count)
-{
- mmio_insl(addr, dst, count);
-}
-
-EXPORT_SYMBOL(ioread32_rep);
-
-void iowrite8_rep(void __iomem *addr, const void *src, unsigned long count)
-{
- mmio_outsb(addr, src, count);
-}
-
-EXPORT_SYMBOL(iowrite8_rep);
-
-void iowrite16_rep(void __iomem *addr, const void *src, unsigned long count)
-{
- mmio_outsw(addr, src, count);
-}
-
-EXPORT_SYMBOL(iowrite16_rep);
-
-void iowrite32_rep(void __iomem *addr, const void *src, unsigned long count)
-{
- mmio_outsl(addr, src, count);
-}
-
-EXPORT_SYMBOL(iowrite32_rep);
-
-/*
- * Create a virtual mapping cookie for an IO port range
- *
- * This uses the same mapping are as the in/out family which has to be setup
- * by the platform initialization code.
- *
- * Just to make matters somewhat more interesting on MIPS systems with
- * multiple host bridge each will have it's own ioport address space.
- */
-static void __iomem *ioport_map_legacy(unsigned long port, unsigned int nr)
-{
- return (void __iomem *) (mips_io_port_base + port);
-}
-
-void __iomem *ioport_map(unsigned long port, unsigned int nr)
-{
- if (port > PIO_MASK)
- return NULL;
-
- return ioport_map_legacy(port, nr);
-}
-
-EXPORT_SYMBOL(ioport_map);
-
-void ioport_unmap(void __iomem *addr)
-{
- /* Nothing to do */
-}
-
-EXPORT_SYMBOL(ioport_unmap);
#define LOADB(reg, addr, handler) EXC(lb, LD_INSN, reg, addr, handler)
#define STOREB(reg, addr, handler) EXC(sb, ST_INSN, reg, addr, handler)
-#define _PREF(hint, addr, type) \
+#ifdef CONFIG_CPU_HAS_PREFETCH
+# define _PREF(hint, addr, type) \
.if \mode == LEGACY_MODE; \
- PREF(hint, addr); \
+ kernel_pref(hint, addr); \
.else; \
.if ((\from == USEROP) && (type == SRC_PREFETCH)) || \
((\to == USEROP) && (type == DST_PREFETCH)); \
* used later on. Therefore use $v1. \
*/ \
.set at=v1; \
- PREFE(hint, addr); \
+ user_pref(hint, addr); \
.set noat; \
.else; \
- PREF(hint, addr); \
+ kernel_pref(hint, addr); \
.endif; \
.endif
+#else
+# define _PREF(hint, addr, type)
+#endif
#define PREFS(hint, addr) _PREF(hint, addr, SRC_PREFETCH)
#define PREFD(hint, addr) _PREF(hint, addr, DST_PREFETCH)
and t0, src, ADDRMASK
PREFS( 0, 2*32(src) )
PREFD( 1, 2*32(dst) )
-#ifndef CONFIG_CPU_MIPSR6
+#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
bnez t1, .Ldst_unaligned\@
nop
bnez t0, .Lsrc_unaligned_dst_aligned\@
bne rem, len, 1b
.set noreorder
-#ifndef CONFIG_CPU_MIPSR6
+#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
/*
* src and dst are aligned, need to copy rem bytes (rem < NBYTES)
* A loop would do only a byte at a time with possible branch
bne len, rem, 1b
.set noreorder
-#endif /* !CONFIG_CPU_MIPSR6 */
+#endif /* CONFIG_CPU_HAS_LOAD_STORE_LR */
.Lcopy_bytes_checklen\@:
beqz len, .Ldone\@
nop
jr ra
nop
-#ifdef CONFIG_CPU_MIPSR6
+#ifndef CONFIG_CPU_HAS_LOAD_STORE_LR
.Lcopy_unaligned_bytes\@:
1:
COPY_BYTE(0)
ADD src, src, 8
b 1b
ADD dst, dst, 8
-#endif /* CONFIG_CPU_MIPSR6 */
+#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
.if __memcpy == 1
END(memcpy)
.set __memcpy, 0
#endif
.endm
- .set noreorder
.align 5
/*
.endif
sltiu t0, a2, STORSIZE /* very small region? */
+ .set noreorder
bnez t0, .Lsmall_memset\@
andi t0, a0, STORMASK /* aligned? */
+ .set reorder
#ifdef CONFIG_CPU_MICROMIPS
move t8, a1 /* used by 'swp' instruction */
move t9, a1
#endif
+ .set noreorder
#ifndef CONFIG_CPU_DADDI_WORKAROUNDS
beqz t0, 1f
PTR_SUBU t0, STORSIZE /* alignment in bytes */
PTR_SUBU t0, AT /* alignment in bytes */
.set at
#endif
+ .set reorder
-#ifndef CONFIG_CPU_MIPSR6
+#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
R10KCBARRIER(0(ra))
#ifdef __MIPSEB__
EX(LONG_S_L, a1, (a0), .Lfirst_fixup\@) /* make word/dword aligned */
PTR_SUBU a0, t0 /* long align ptr */
PTR_ADDU a2, t0 /* correct size */
-#else /* CONFIG_CPU_MIPSR6 */
+#else /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
#define STORE_BYTE(N) \
EX(sb, a1, N(a0), .Lbyte_fixup\@); \
+ .set noreorder; \
beqz t0, 0f; \
- PTR_ADDU t0, 1;
+ PTR_ADDU t0, 1; \
+ .set reorder;
PTR_ADDU a2, t0 /* correct size */
PTR_ADDU t0, 1
ori a0, STORMASK
xori a0, STORMASK
PTR_ADDIU a0, STORSIZE
-#endif /* CONFIG_CPU_MIPSR6 */
+#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
1: ori t1, a2, 0x3f /* # of full blocks */
xori t1, 0x3f
+ andi t0, a2, 0x40-STORSIZE
beqz t1, .Lmemset_partial\@ /* no block to fill */
- andi t0, a2, 0x40-STORSIZE
PTR_ADDU t1, a0 /* end address */
- .set reorder
1: PTR_ADDIU a0, 64
R10KCBARRIER(0(ra))
f_fill64 a0, -64, FILL64RG, .Lfwd_fixup\@, \mode
bne t1, a0, 1b
- .set noreorder
.Lmemset_partial\@:
R10KCBARRIER(0(ra))
PTR_SUBU t1, AT
.set at
#endif
+ PTR_ADDU a0, t0 /* dest ptr */
jr t1
- PTR_ADDU a0, t0 /* dest ptr */
- .set push
- .set noreorder
- .set nomacro
/* ... but first do longs ... */
f_fill64 a0, -64, FILL64RG, .Lpartial_fixup\@, \mode
-2: .set pop
- andi a2, STORMASK /* At most one long to go */
+2: andi a2, STORMASK /* At most one long to go */
+ .set noreorder
beqz a2, 1f
-#ifndef CONFIG_CPU_MIPSR6
+#ifdef CONFIG_CPU_HAS_LOAD_STORE_LR
PTR_ADDU a0, a2 /* What's left */
+ .set reorder
R10KCBARRIER(0(ra))
#ifdef __MIPSEB__
EX(LONG_S_R, a1, -1(a0), .Llast_fixup\@)
#endif
#else
PTR_SUBU t0, $0, a2
+ .set reorder
move a2, zero /* No remaining longs */
PTR_ADDIU t0, 1
STORE_BYTE(0)
#endif
0:
#endif
-1: jr ra
- move a2, zero
+1: move a2, zero
+ jr ra
.Lsmall_memset\@:
+ PTR_ADDU t1, a0, a2
beqz a2, 2f
- PTR_ADDU t1, a0, a2
1: PTR_ADDIU a0, 1 /* fill bytewise */
R10KCBARRIER(0(ra))
+ .set noreorder
bne t1, a0, 1b
EX(sb, a1, -1(a0), .Lsmall_fixup\@)
+ .set reorder
-2: jr ra /* done */
- move a2, zero
+2: move a2, zero
+ jr ra /* done */
.if __memset == 1
END(memset)
.set __memset, 0
.hidden __memset
.endif
-#ifdef CONFIG_CPU_MIPSR6
+#ifndef CONFIG_CPU_HAS_LOAD_STORE_LR
.Lbyte_fixup\@:
/*
* unset_bytes = (#bytes - (#unaligned bytes)) - (-#unaligned bytes remaining + 1) + 1
* a2 = a2 - t0 + 1
*/
PTR_SUBU a2, t0
+ PTR_ADDIU a2, 1
jr ra
- PTR_ADDIU a2, 1
-#endif /* CONFIG_CPU_MIPSR6 */
+#endif /* !CONFIG_CPU_HAS_LOAD_STORE_LR */
.Lfirst_fixup\@:
/* unset_bytes already in a2 */
jr ra
- nop
.Lfwd_fixup\@:
/*
andi a2, 0x3f
LONG_L t0, THREAD_BUADDR(t0)
LONG_ADDU a2, t1
+ LONG_SUBU a2, t0
jr ra
- LONG_SUBU a2, t0
.Lpartial_fixup\@:
/*
andi a2, STORMASK
LONG_L t0, THREAD_BUADDR(t0)
LONG_ADDU a2, a0
+ LONG_SUBU a2, t0
jr ra
- LONG_SUBU a2, t0
.Llast_fixup\@:
/* unset_bytes already in a2 */
jr ra
- nop
.Lsmall_fixup\@:
/*
* unset_bytes = end_addr - current_addr + 1
* a2 = t1 - a0 + 1
*/
- .set reorder
PTR_SUBU a2, t1, a0
PTR_ADDIU a2, 1
jr ra
- .set noreorder
.endm
LEAF(memset)
EXPORT_SYMBOL(memset)
+ move v0, a0 /* result */
beqz a1, 1f
- move v0, a0 /* result */
andi a1, 0xff /* spread fillword */
LONG_SLL t1, a1, 8
obj-y += setup.o init.o cmdline.o env.o time.o reset.o irq.o \
bonito-irq.o mem.o machtype.o platform.o serial.o
obj-$(CONFIG_PCI) += pci.o
-obj-$(CONFIG_CPU_LOONGSON2) += dma.o
#
# Serial port support
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/dma-direct.h>
-
-dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
-{
- return paddr | 0x80000000;
-}
-
-phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
-{
-#if defined(CONFIG_CPU_LOONGSON2F) && defined(CONFIG_64BIT)
- if (dma_addr > 0x8fffffff)
- return dma_addr;
- return dma_addr & 0x0fffffff;
-#else
- return dma_addr & 0x7fffffff;
-#endif
-}
# Makefile for Lemote Fuloong2e mini-PC board.
#
-obj-y += irq.o reset.o
+obj-y += irq.o reset.o dma.o
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/dma-direct.h>
+
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+ return paddr | 0x80000000;
+}
+
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
+{
+ return dma_addr & 0x7fffffff;
+}
# Makefile for lemote loongson2f family machines
#
-obj-y += clock.o machtype.o irq.o reset.o ec_kb3310b.o
+obj-y += clock.o machtype.o irq.o reset.o dma.o ec_kb3310b.o
#
# Suspend Support
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/dma-direct.h>
+
+dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr)
+{
+ return paddr | 0x80000000;
+}
+
+phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr)
+{
+ if (dma_addr > 0x8fffffff)
+ return dma_addr;
+ return dma_addr & 0x0fffffff;
+}
}
}
-static struct irqaction cascade_irqaction = {
- .handler = no_action,
- .flags = IRQF_NO_SUSPEND,
- .name = "cascade",
-};
-
-static inline void mask_loongson_irq(struct irq_data *d)
-{
- clear_c0_status(0x100 << (d->irq - MIPS_CPU_IRQ_BASE));
- irq_disable_hazard();
-
- /* Workaround: UART IRQ may deliver to any core */
- if (d->irq == LOONGSON_UART_IRQ) {
- int cpu = smp_processor_id();
- int node_id = cpu_logical_map(cpu) / loongson_sysconf.cores_per_node;
- int core_id = cpu_logical_map(cpu) % loongson_sysconf.cores_per_node;
- u64 intenclr_addr = smp_group[node_id] |
- (u64)(&LOONGSON_INT_ROUTER_INTENCLR);
- u64 introuter_lpc_addr = smp_group[node_id] |
- (u64)(&LOONGSON_INT_ROUTER_LPC);
-
- *(volatile u32 *)intenclr_addr = 1 << 10;
- *(volatile u8 *)introuter_lpc_addr = 0x10 + (1<<core_id);
- }
-}
-
-static inline void unmask_loongson_irq(struct irq_data *d)
-{
- /* Workaround: UART IRQ may deliver to any core */
- if (d->irq == LOONGSON_UART_IRQ) {
- int cpu = smp_processor_id();
- int node_id = cpu_logical_map(cpu) / loongson_sysconf.cores_per_node;
- int core_id = cpu_logical_map(cpu) % loongson_sysconf.cores_per_node;
- u64 intenset_addr = smp_group[node_id] |
- (u64)(&LOONGSON_INT_ROUTER_INTENSET);
- u64 introuter_lpc_addr = smp_group[node_id] |
- (u64)(&LOONGSON_INT_ROUTER_LPC);
-
- *(volatile u32 *)intenset_addr = 1 << 10;
- *(volatile u8 *)introuter_lpc_addr = 0x10 + (1<<core_id);
- }
-
- set_c0_status(0x100 << (d->irq - MIPS_CPU_IRQ_BASE));
- irq_enable_hazard();
-}
+static inline void mask_loongson_irq(struct irq_data *d) { }
+static inline void unmask_loongson_irq(struct irq_data *d) { }
/* For MIPS IRQs which shared by all cores */
static struct irq_chip loongson_irq_chip = {
chip->irq_set_affinity = plat_set_irq_affinity;
irq_set_chip_and_handler(LOONGSON_UART_IRQ,
- &loongson_irq_chip, handle_level_irq);
-
- /* setup HT1 irq */
- setup_irq(LOONGSON_HT1_IRQ, &cascade_irqaction);
+ &loongson_irq_chip, handle_percpu_irq);
+ irq_set_chip_and_handler(LOONGSON_BRIDGE_IRQ,
+ &loongson_irq_chip, handle_percpu_irq);
- set_c0_status(STATUSF_IP2 | STATUSF_IP6);
+ set_c0_status(STATUSF_IP2 | STATUSF_IP3 | STATUSF_IP6);
}
#ifdef CONFIG_HOTPLUG_CPU
static void __init node_mem_init(unsigned int node)
{
- unsigned long bootmap_size;
unsigned long node_addrspace_offset;
- unsigned long start_pfn, end_pfn, freepfn;
+ unsigned long start_pfn, end_pfn;
node_addrspace_offset = nid_to_addroffset(node);
pr_info("Node%d's addrspace_offset is 0x%lx\n",
node, node_addrspace_offset);
get_pfn_range_for_nid(node, &start_pfn, &end_pfn);
- freepfn = start_pfn;
- if (node == 0)
- freepfn = PFN_UP(__pa_symbol(&_end)); /* kernel end address */
- pr_info("Node%d: start_pfn=0x%lx, end_pfn=0x%lx, freepfn=0x%lx\n",
- node, start_pfn, end_pfn, freepfn);
+ pr_info("Node%d: start_pfn=0x%lx, end_pfn=0x%lx\n",
+ node, start_pfn, end_pfn);
__node_data[node] = prealloc__node_data + node;
- NODE_DATA(node)->bdata = &bootmem_node_data[node];
NODE_DATA(node)->node_start_pfn = start_pfn;
NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn;
- bootmap_size = init_bootmem_node(NODE_DATA(node), freepfn,
- start_pfn, end_pfn);
free_bootmem_with_active_regions(node, end_pfn);
- if (node == 0) /* used by finalize_initrd() */
+
+ if (node == 0) {
+ /* kernel end address */
+ unsigned long kernel_end_pfn = PFN_UP(__pa_symbol(&_end));
+
+ /* used by finalize_initrd() */
max_low_pfn = end_pfn;
- /* This is reserved for the kernel and bdata->node_bootmem_map */
- reserve_bootmem_node(NODE_DATA(node), start_pfn << PAGE_SHIFT,
- ((freepfn - start_pfn) << PAGE_SHIFT) + bootmap_size,
- BOOTMEM_DEFAULT);
+ /* Reserve the kernel text/data/bss */
+ memblock_reserve(start_pfn << PAGE_SHIFT,
+ ((kernel_end_pfn - start_pfn) << PAGE_SHIFT));
- if (node == 0 && node_end_pfn(0) >= (0xffffffff >> PAGE_SHIFT)) {
/* Reserve 0xfe000000~0xffffffff for RS780E integrated GPU */
- reserve_bootmem_node(NODE_DATA(node),
- (node_addrspace_offset | 0xfe000000),
- 32 << 20, BOOTMEM_DEFAULT);
+ if (node_end_pfn(0) >= (0xffffffff >> PAGE_SHIFT))
+ memblock_reserve((node_addrspace_offset | 0xfe000000),
+ 32 << 20);
}
sparse_memory_present_with_active_regions(node);
#include <linux/sched/task_stack.h>
#include <linux/smp.h>
#include <linux/cpufreq.h>
+#include <linux/kexec.h>
#include <asm/processor.h>
#include <asm/time.h>
#include <asm/clock.h>
write_c0_compare(read_c0_count() + mips_hpt_frequency/HZ);
local_irq_enable();
loongson3_ipi_write64(0,
- (void *)(ipi_mailbox_buf[cpu_logical_map(cpu)]+0x0));
+ ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x0);
pr_info("CPU#%d finished, CP0_ST=%x\n",
smp_processor_id(), read_c0_status());
}
cpu, startargs[0], startargs[1], startargs[2]);
loongson3_ipi_write64(startargs[3],
- (void *)(ipi_mailbox_buf[cpu_logical_map(cpu)]+0x18));
+ ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x18);
loongson3_ipi_write64(startargs[2],
- (void *)(ipi_mailbox_buf[cpu_logical_map(cpu)]+0x10));
+ ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x10);
loongson3_ipi_write64(startargs[1],
- (void *)(ipi_mailbox_buf[cpu_logical_map(cpu)]+0x8));
+ ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x8);
loongson3_ipi_write64(startargs[0],
- (void *)(ipi_mailbox_buf[cpu_logical_map(cpu)]+0x0));
+ ipi_mailbox_buf[cpu_logical_map(cpu)] + 0x0);
return 0;
}
.cpu_disable = loongson3_cpu_disable,
.cpu_die = loongson3_cpu_die,
#endif
+#ifdef CONFIG_KEXEC
+ .kexec_nonboot_cpu = kexec_nonboot_cpu_jump,
+#endif
};
#include <linux/kcore.h>
#include <linux/initrd.h>
-#include <asm/asm-offsets.h>
#include <asm/bootinfo.h>
#include <asm/cachectl.h>
#include <asm/cpu.h>
#endif
/*
- * gcc 3.3 and older have trouble determining that PTRS_PER_PGD and PGD_ORDER
- * are constants. So we use the variants from asm-offset.h until that gcc
- * will officially be retired.
- *
* Align swapper_pg_dir in to 64K, allows its address to be loaded
* with a single LUI instruction in the TLB handlers. If we used
* __aligned(64K), its size would get rounded up to the alignment
* size, and waste space. So we place it in its own section and align
* it in the linker script.
*/
-pgd_t swapper_pg_dir[_PTRS_PER_PGD] __section(.bss..swapper_pg_dir);
+pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir);
#ifndef __PAGETABLE_PUD_FOLDED
pud_t invalid_pud_table[PTRS_PER_PUD] __page_aligned_bss;
#endif
/* we need a hack to get the PIC's SoC chip id */
ret = of_address_to_resource(node, 0, &res);
if (ret < 0) {
- pr_err("PIC %s: reg property not found!\n", node->name);
+ pr_err("PIC %pOFn: reg property not found!\n", node);
return -EINVAL;
}
break;
}
if (socid == NLM_NR_NODES) {
- pr_err("PIC %s: Node mapping for bus %d not found!\n",
- node->name, bus);
+ pr_err("PIC %pOFn: Node mapping for bus %d not found!\n",
+ node, bus);
return -EINVAL;
}
} else {
socid = (res.start >> 18) & 0x3;
if (!nlm_node_present(socid)) {
- pr_err("PIC %s: node %d does not exist!\n",
- node->name, socid);
+ pr_err("PIC %pOFn: node %d does not exist!\n",
+ node, socid);
return -EINVAL;
}
}
if (!nlm_node_present(socid)) {
- pr_err("PIC %s: node %d does not exist!\n", node->name, socid);
+ pr_err("PIC %pOFn: node %d does not exist!\n", node, socid);
return -EINVAL;
}
nlm_irq_to_xirq(socid, PIC_IRQ_BASE), PIC_IRQ_BASE,
&xlp_pic_irq_domain_ops, NULL);
if (xlp_pic_domain == NULL) {
- pr_err("PIC %s: Creating legacy domain failed!\n", node->name);
+ pr_err("PIC %pOFn: Creating legacy domain failed!\n", node);
return -EINVAL;
}
pr_info("Node %d: IRQ domain created for PIC@%pR\n", socid, &res);
int where, u32 *data)
{
unsigned char busnum = bus->number;
- u_int64_t addr, type;
- void *addrp;
- int device = PCI_SLOT(devfn);
int function = PCI_FUNC(devfn);
+ int device = PCI_SLOT(devfn);
int reg = where & ~3;
+ void *addrp;
+ u64 addr;
+
+ if (where < PCI_CFG_SPACE_SIZE) { /* standard config */
+ addr = (busnum << 16) | (device << 11) | (function << 8) | reg;
+ if (busnum == 0) {
+ if (device > 31)
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ addrp = (void *)TO_UNCAC(HT1LO_PCICFG_BASE | addr);
+ } else {
+ addrp = (void *)TO_UNCAC(HT1LO_PCICFG_BASE_TP1 | addr);
+ }
+ } else if (where < PCI_CFG_SPACE_EXP_SIZE) { /* extended config */
+ struct pci_dev *rootdev;
+
+ rootdev = pci_get_domain_bus_and_slot(0, 0, 0);
+ if (!rootdev)
+ return PCIBIOS_DEVICE_NOT_FOUND;
- addr = (busnum << 16) | (device << 11) | (function << 8) | reg;
- if (busnum == 0) {
- if (device > 31)
+ addr = pci_resource_start(rootdev, 3);
+ if (!addr)
return PCIBIOS_DEVICE_NOT_FOUND;
- addrp = (void *)(TO_UNCAC(HT1LO_PCICFG_BASE) | (addr & 0xffff));
- type = 0;
+ addr |= busnum << 20 | device << 15 | function << 12 | reg;
+ addrp = (void *)TO_UNCAC(addr);
} else {
- addrp = (void *)(TO_UNCAC(HT1LO_PCICFG_BASE_TP1) | (addr));
- type = 0x10000;
+ return PCIBIOS_DEVICE_NOT_FOUND;
}
if (access_type == PCI_ACCESS_WRITE)
if (pci_has_flag(PCI_PROBE_ONLY)) {
pci_bus_claim_resources(bus);
} else {
+ struct pci_bus *child;
+
pci_bus_size_bridges(bus);
pci_bus_assign_resources(bus);
+ list_for_each_entry(child, &bus->children, node)
+ pcie_bus_configure_settings(child);
}
pci_bus_add_devices(bus);
}
rt2880_pci_write_u32(PCI_BASE_ADDRESS_0, 0x08000000);
(void) rt2880_pci_read_u32(PCI_BASE_ADDRESS_0);
+ rt2880_pci_controller.of_node = pdev->dev.of_node;
+
register_pci_controller(&rt2880_pci_controller);
return 0;
}
* "D" for device-mode. If it works for Ethernet, why not USB...
* -- hammtrev, 2007/03/22
*/
- snprintf((char *)&envstr[0], sizeof(envstr), "usbmode");
+ snprintf(&envstr[0], sizeof(envstr), "usbmode");
/* set default host mode */
val = 1;
/* get environment string */
- strp = prom_getenv((char *)&envstr[0]);
+ strp = prom_getenv(&envstr[0]);
if (strp) {
/* compare string */
if (!strcmp(strp, "device"))
systick.dev.min_delta_ticks = 0x3;
systick.dev.irq = irq_of_parse_and_map(np, 0);
if (!systick.dev.irq) {
- pr_err("%s: request_irq failed", np->name);
+ pr_err("%pOFn: request_irq failed", np);
return -EINVAL;
}
clockevents_register_device(&systick.dev);
- pr_info("%s: running - mult: %d, shift: %d\n",
- np->name, systick.dev.mult, systick.dev.shift);
+ pr_info("%pOFn: running - mult: %d, shift: %d\n",
+ np, systick.dev.mult, systick.dev.shift);
return 0;
}
pdev = of_find_device_by_node(np);
if (!pdev) {
- pr_err("%s: failed to lookup pdev\n", np->name);
+ pr_err("%pOFn: failed to lookup pdev\n", np);
return -EINVAL;
}
static struct rt2880_pmx_func rt3352_lna_func[] = { FUNC("lna", 0, 36, 2) };
static struct rt2880_pmx_func rt3352_pa_func[] = { FUNC("pa", 0, 38, 2) };
static struct rt2880_pmx_func rt3352_led_func[] = { FUNC("led", 0, 40, 5) };
+static struct rt2880_pmx_func rt3352_cs1_func[] = {
+ FUNC("spi_cs1", 0, 45, 1),
+ FUNC("wdg_cs1", 1, 45, 1),
+};
static struct rt2880_pmx_group rt3050_pinmux_data[] = {
GRP("i2c", i2c_func, 1, RT305X_GPIO_MODE_I2C),
GRP("lna", rt3352_lna_func, 1, RT3352_GPIO_MODE_LNA),
GRP("pa", rt3352_pa_func, 1, RT3352_GPIO_MODE_PA),
GRP("led", rt3352_led_func, 1, RT5350_GPIO_MODE_PHY_LED),
+ GRP("spi_cs1", rt3352_cs1_func, 2, RT5350_GPIO_MODE_SPI_CS1),
{ 0 }
};
die_if_kernel("Oops", regs);
force_sig(SIGBUS, current);
} else if (debug_be_interrupt)
- show_regs((struct pt_regs *)regs);
+ show_regs(regs);
}
static int ip28_be_handler(struct pt_regs *regs, int is_fixup)
{
unsigned long slot_firstpfn = slot_getbasepfn(node, 0);
unsigned long slot_freepfn = node_getfirstfree(node);
- unsigned long bootmap_size;
unsigned long start_pfn, end_pfn;
get_pfn_range_for_nid(node, &start_pfn, &end_pfn);
__node_data[node] = __va(slot_freepfn << PAGE_SHIFT);
memset(__node_data[node], 0, PAGE_SIZE);
- NODE_DATA(node)->bdata = &bootmem_node_data[node];
NODE_DATA(node)->node_start_pfn = start_pfn;
NODE_DATA(node)->node_spanned_pages = end_pfn - start_pfn;
slot_freepfn += PFN_UP(sizeof(struct pglist_data) +
sizeof(struct hub_data));
- bootmap_size = init_bootmem_node(NODE_DATA(node), slot_freepfn,
- start_pfn, end_pfn);
free_bootmem_with_active_regions(node, end_pfn);
- reserve_bootmem_node(NODE_DATA(node), slot_firstpfn << PAGE_SHIFT,
- ((slot_freepfn - slot_firstpfn) << PAGE_SHIFT) + bootmap_size,
- BOOTMEM_DEFAULT);
+
+ memblock_reserve(slot_firstpfn << PAGE_SHIFT,
+ ((slot_freepfn - slot_firstpfn) << PAGE_SHIFT));
+
sparse_memory_present_with_active_regions(node);
}
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
+hostprogs-y := elf-entry
+PHONY += elf-entry
+elf-entry: $(obj)/elf-entry
+ @:
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <byteswap.h>
+#include <elf.h>
+#include <endian.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef be32toh
+/* If libc provides [bl]e{32,64}toh() then we'll use them */
+#elif BYTE_ORDER == LITTLE_ENDIAN
+# define be32toh(x) bswap_32(x)
+# define le32toh(x) (x)
+# define be64toh(x) bswap_64(x)
+# define le64toh(x) (x)
+#elif BYTE_ORDER == BIG_ENDIAN
+# define be32toh(x) (x)
+# define le32toh(x) bswap_32(x)
+# define be64toh(x) (x)
+# define le64toh(x) bswap_64(x)
+#endif
+
+__attribute__((noreturn))
+static void die(const char *msg)
+{
+ fputs(msg, stderr);
+ exit(EXIT_FAILURE);
+}
+
+int main(int argc, const char *argv[])
+{
+ uint64_t entry;
+ size_t nread;
+ FILE *file;
+ union {
+ Elf32_Ehdr ehdr32;
+ Elf64_Ehdr ehdr64;
+ } hdr;
+
+ if (argc != 2)
+ die("Usage: elf-entry <elf-file>\n");
+
+ file = fopen(argv[1], "r");
+ if (!file) {
+ perror("Unable to open input file");
+ return EXIT_FAILURE;
+ }
+
+ nread = fread(&hdr, 1, sizeof(hdr), file);
+ if (nread != sizeof(hdr)) {
+ perror("Unable to read input file");
+ return EXIT_FAILURE;
+ }
+
+ if (memcmp(hdr.ehdr32.e_ident, ELFMAG, SELFMAG))
+ die("Input is not an ELF\n");
+
+ switch (hdr.ehdr32.e_ident[EI_CLASS]) {
+ case ELFCLASS32:
+ switch (hdr.ehdr32.e_ident[EI_DATA]) {
+ case ELFDATA2LSB:
+ entry = le32toh(hdr.ehdr32.e_entry);
+ break;
+ case ELFDATA2MSB:
+ entry = be32toh(hdr.ehdr32.e_entry);
+ break;
+ default:
+ die("Invalid ELF encoding\n");
+ }
+
+ /* Sign extend to form a canonical address */
+ entry = (int64_t)(int32_t)entry;
+ break;
+
+ case ELFCLASS64:
+ switch (hdr.ehdr32.e_ident[EI_DATA]) {
+ case ELFDATA2LSB:
+ entry = le64toh(hdr.ehdr64.e_entry);
+ break;
+ case ELFDATA2MSB:
+ entry = be64toh(hdr.ehdr64.e_entry);
+ break;
+ default:
+ die("Invalid ELF encoding\n");
+ }
+ break;
+
+ default:
+ die("Invalid ELF class\n");
+ }
+
+ printf("0x%016" PRIx64 "\n", entry);
+ return EXIT_SUCCESS;
+}
goto exit_put;
err = sysfs_create_bin_file(&dev->dev.kobj, &dev->bindata_attr);
if (err) {
- device_unregister(&dev->dev);
iounmap(dev->base);
- kfree(dev);
+ device_unregister(&dev->dev);
}
return;
exit_put:
+ iounmap(dev->base);
put_device(&dev->dev);
- return;
}
endif
boot := arch/nds32/boot
-core-$(BUILTIN_DTB) += $(boot)/dts/
+core-y += $(boot)/dts/
.PHONY: FORCE
PHONY += $(BOOT_TARGETS) install
KBUILD_IMAGE := $(nios2-boot)/vmImage
-ifneq ($(CONFIG_NIOS2_DTB_SOURCE),"")
- core-y += $(nios2-boot)/
-endif
+core-y += $(nios2-boot)/dts/
all: vmImage
archclean:
$(Q)$(MAKE) $(clean)=$(nios2-boot)
-%.dtb: | scripts
- $(Q)$(MAKE) $(build)=$(nios2-boot) $(nios2-boot)/$@
-
-dtbs:
- $(Q)$(MAKE) $(build)=$(nios2-boot) $(nios2-boot)/$@
-
$(BOOT_TARGETS): vmlinux
$(Q)$(MAKE) $(build)=$(nios2-boot) $(nios2-boot)/$@
echo ' (your) ~/bin/$(INSTALLKERNEL) or'
echo ' (distribution) /sbin/$(INSTALLKERNEL) or'
echo ' install to $$(INSTALL_PATH)'
- echo ' dtbs - Build device tree blobs for enabled boards'
endef
$(obj)/compressed/vmlinux: $(obj)/vmlinux.gz FORCE
$(Q)$(MAKE) $(build)=$(obj)/compressed $@
-# Rule to build device tree blobs
-DTB_SRC := $(patsubst "%",%,$(CONFIG_NIOS2_DTB_SOURCE))
-
-# Make sure the generated dtb gets removed during clean
-extra-$(CONFIG_NIOS2_DTB_SOURCE_BOOL) += system.dtb
-
-$(obj)/system.dtb: $(DTB_SRC) FORCE
- $(call cmd,dtc)
-
-# Ensure system.dtb exists
-$(obj)/linked_dtb.o: $(obj)/system.dtb
-
-obj-$(CONFIG_NIOS2_DTB_SOURCE_BOOL) += linked_dtb.o
-
-targets += $(dtb-y)
-
-# Rule to build device tree blobs with make command
-$(obj)/%.dtb: $(src)/dts/%.dts FORCE
- $(call if_changed_dep,dtc)
-
-$(obj)/dtbs: $(addprefix $(obj)/, $(dtb-y))
-
install:
sh $(srctree)/$(src)/install.sh $(KERNELRELEASE) $(BOOTIMAGE) System.map "$(INSTALL_PATH)"
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y := $(patsubst "%.dts",%.dtb.o,$(CONFIG_NIOS2_DTB_SOURCE))
+
+dtstree := $(srctree)/$(src)
+dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts))
+++ /dev/null
-/*
- * Copyright (C) 2011 Thomas Chou <thomas@wytron.com.tw>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
- */
-.section .dtb.init.rodata,"a"
-.incbin "arch/nios2/boot/system.dtb"
const char *str;
int len;
- cpu = of_find_node_by_type(NULL, "cpu");
+ cpu = of_get_cpu_node(0, NULL);
if (!cpu)
panic("%s: No CPU found in devicetree!\n", __func__);
cpuinfo.reset_addr = fcpu(cpu, "altr,reset-addr");
cpuinfo.exception_addr = fcpu(cpu, "altr,exception-addr");
cpuinfo.fast_tlb_miss_exc_addr = fcpu(cpu, "altr,fast-tlb-miss-addr");
+
+ of_node_put(cpu);
}
#ifdef CONFIG_PROC_FS
{
*base = of_iomap(np, 0);
if (!*base) {
- pr_crit("Unable to map reg for %s\n", np->name);
+ pr_crit("Unable to map reg for %pOFn\n", np);
return -ENXIO;
}
if (of_property_read_u32(np, "clock-frequency", freq)) {
- pr_crit("Unable to get %s clock frequency\n", np->name);
+ pr_crit("Unable to get %pOFn clock frequency\n", np);
return -EINVAL;
}
{
u32 hwid;
struct device_node *cpun;
- struct device_node *cpus = of_find_node_by_path("/cpus");
- for_each_available_child_of_node(cpus, cpun) {
+ for_each_of_cpu_node(cpun) {
if (of_property_read_u32(cpun, "reg", &hwid))
continue;
if (hwid == cpu)
--- /dev/null
+subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+
+obj-y += kernel/
+obj-y += mm/
+obj-y += lib/
+obj-y += sysdev/
+obj-y += platforms/
+obj-y += math-emu/
+obj-y += crypto/
+obj-y += net/
+
+obj-$(CONFIG_XMON) += xmon/
+obj-$(CONFIG_KVM) += kvm/
+
+obj-$(CONFIG_PERF_EVENTS) += perf/
+obj-$(CONFIG_KEXEC_FILE) += purgatory/
select ARCH_HAS_PMEM_API if PPC64
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_MEMBARRIER_CALLBACKS
- select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE
+ select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC64
select ARCH_HAS_SG_CHAIN
select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION)
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_CBPF_JIT if !PPC64
+ select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
+ select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
select HAVE_CONTEXT_TRACKING if PPC64
select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_EBPF_JIT if PPC64
select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU)
select HAVE_FTRACE_MCOUNT_RECORD
+ select HAVE_FUNCTION_ERROR_INJECTION
select HAVE_FUNCTION_GRAPH_TRACER
select HAVE_FUNCTION_TRACER
select HAVE_GCC_PLUGINS if GCC_VERSION >= 50200 # plugin support on gcc <= 5.1 is buggy on PPC
config PPC_UDBG_16550
bool
- default n
config GENERIC_TBSYNC
bool
default y if PPC32 && SMP
- default n
config AUDIT_ARCH
bool
bool
help
Used to allow a board to specify it wants an ePAPR compliant wrapper.
- default n
config DEFAULT_UIMAGE
bool
help
Used to allow a board to specify it wants a uImage built by default
- default n
config ARCH_HIBERNATION_POSSIBLE
bool
config PPC_DCR_NATIVE
bool
- default n
config PPC_DCR_MMIO
bool
- default n
config PPC_DCR
bool
bool
depends on PCI
depends on PPC64 # not supported on 32 bits yet
- default n
config ARCH_SUPPORTS_DEBUG_PAGEALLOC
depends on PPC32 || PPC_BOOK3S_64
depends on SMP
select ALTIVEC
select VSX
- default n
---help---
Support user-mode Transactional Memory on POWERPC.
config LD_HEAD_STUB_CATCH
bool "Reserve 256 bytes to cope with linker stubs in HEAD text" if EXPERT
depends on PPC64
- default n
help
Very large kernels can cause linker branch stubs to be generated by
code in head_64.S, which moves the head text sections out of their
config RELOCATABLE_TEST
bool "Test relocatable kernel"
depends on (PPC64 && RELOCATABLE)
- default n
help
This runs the relocatable kernel at the address it was initially
loaded at, which tends to be non-zero and therefore test the
config PPC_COPRO_BASE
bool
- default n
config SCHED_SMT
bool "SMT (Hyperthreading) scheduler support"
bool
depends on PCI
default y if 40x || 44x
- default n
config EISA
bool
config HAS_RAPIDIO
bool
- default n
config RAPIDIO
tristate "RapidIO support"
config NONSTATIC_KERNEL
bool
- default n
menu "Advanced setup"
depends on PPC32
config PPC_DISABLE_WERROR
bool "Don't build arch/powerpc code with -Werror"
- default n
help
This option tells the compiler NOT to build the code under
arch/powerpc with the -Werror flag (which means warnings
config CODE_PATCHING_SELFTEST
bool "Run self-tests of the code-patching code"
depends on DEBUG_KERNEL
- default n
config JUMP_LABEL_FEATURE_CHECKS
bool "Enable use of jump label for cpu/mmu_has_feature()"
config JUMP_LABEL_FEATURE_CHECK_DEBUG
bool "Do extra check on feature fixup calls"
depends on DEBUG_KERNEL && JUMP_LABEL_FEATURE_CHECKS
- default n
help
This tries to catch incorrect usage of cpu_has_feature() and
mmu_has_feature() in the code.
config FTR_FIXUP_SELFTEST
bool "Run self-tests of the feature-fixup code"
depends on DEBUG_KERNEL
- default n
config MSI_BITMAP_SELFTEST
bool "Run self-tests of the MSI bitmap code"
depends on DEBUG_KERNEL
- default n
config PPC_IRQ_SOFT_MASK_DEBUG
bool "Include extra checks for powerpc irq soft masking"
- default n
config XMON
bool "Include xmon kernel debugger"
KBUILD_ARFLAGS += --target=elf$(BITS)-$(GNUTARGET)
endif
+cflags-$(CONFIG_STACKPROTECTOR) += -mstack-protector-guard=tls
+ifdef CONFIG_PPC64
+cflags-$(CONFIG_STACKPROTECTOR) += -mstack-protector-guard-reg=r13
+else
+cflags-$(CONFIG_STACKPROTECTOR) += -mstack-protector-guard-reg=r2
+endif
+
LDFLAGS_vmlinux-y := -Bstatic
LDFLAGS_vmlinux-$(CONFIG_RELOCATABLE) := -pie
LDFLAGS_vmlinux := $(LDFLAGS_vmlinux-y)
CFLAGS-$(CONFIG_GENERIC_CPU) += -mcpu=powerpc64
endif
+ifdef CONFIG_FUNCTION_TRACER
+CC_FLAGS_FTRACE := -pg
ifdef CONFIG_MPROFILE_KERNEL
- CC_FLAGS_FTRACE := -pg -mprofile-kernel
+CC_FLAGS_FTRACE += -mprofile-kernel
+endif
+# Work around gcc code-gen bugs with -pg / -fno-omit-frame-pointer in gcc <= 4.8
+# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=44199
+# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52828
+ifneq ($(cc-name),clang)
+CC_FLAGS_FTRACE += $(call cc-ifversion, -lt, 0409, -mno-sched-epilog)
+endif
endif
CFLAGS-$(CONFIG_TARGET_CPU_BOOL) += $(call cc-option,-mcpu=$(CONFIG_TARGET_CPU))
KBUILD_CFLAGS += -mcpu=powerpc
endif
-# Work around a gcc code-gen bug with -fno-omit-frame-pointer.
-ifdef CONFIG_FUNCTION_TRACER
-KBUILD_CFLAGS += -mno-sched-epilog
-endif
-
cpu-as-$(CONFIG_4xx) += -Wa,-m405
cpu-as-$(CONFIG_ALTIVEC) += $(call as-option,-Wa$(comma)-maltivec)
cpu-as-$(CONFIG_E200) += -Wa,-me200
cpu-as-$(CONFIG_E500) += -Wa,-me500
-cpu-as-$(CONFIG_PPC_BOOK3S_64) += -Wa,-mpower4
+
+# When using '-many -mpower4' gas will first try and find a matching power4
+# mnemonic and failing that it will allow any valid mnemonic that GAS knows
+# about. GCC will pass -many to GAS when assembling, clang does not.
+cpu-as-$(CONFIG_PPC_BOOK3S_64) += -Wa,-mpower4 -Wa,-many
cpu-as-$(CONFIG_PPC_E500MC) += $(call as-option,-Wa$(comma)-me500mc)
KBUILD_AFLAGS += $(cpu-as-y)
head-$(CONFIG_ALTIVEC) += arch/powerpc/kernel/vector.o
head-$(CONFIG_PPC_OF_BOOT_TRAMPOLINE) += arch/powerpc/kernel/prom_init.o
-core-y += arch/powerpc/kernel/ \
- arch/powerpc/mm/ \
- arch/powerpc/lib/ \
- arch/powerpc/sysdev/ \
- arch/powerpc/platforms/ \
- arch/powerpc/math-emu/ \
- arch/powerpc/crypto/ \
- arch/powerpc/net/
-core-$(CONFIG_XMON) += arch/powerpc/xmon/
-core-$(CONFIG_KVM) += arch/powerpc/kvm/
-core-$(CONFIG_PERF_EVENTS) += arch/powerpc/perf/
-core-$(CONFIG_KEXEC_FILE) += arch/powerpc/purgatory/
+# See arch/powerpc/Kbuild for content of core part of the kernel
+core-y += arch/powerpc/
drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/
bootwrapper_install:
$(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
-%.dtb: scripts
- $(Q)$(MAKE) $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
-
# Used to create 'merged defconfigs'
# To use it $(call) it with the first argument as the base defconfig
# and the second argument as a space separated list of .config files to merge,
archprepare: checkbin
-# Use the file '.tmp_gas_check' for binutils tests, as gas won't output
-# to stdout and these checks are run even on install targets.
-TOUT := .tmp_gas_check
+ifdef CONFIG_STACKPROTECTOR
+prepare: stack_protector_prepare
+
+stack_protector_prepare: prepare0
+ifdef CONFIG_PPC64
+ $(eval KBUILD_CFLAGS += -mstack-protector-guard-offset=$(shell awk '{if ($$2 == "PACA_CANARY") print $$3;}' include/generated/asm-offsets.h))
+else
+ $(eval KBUILD_CFLAGS += -mstack-protector-guard-offset=$(shell awk '{if ($$2 == "TASK_CANARY") print $$3;}' include/generated/asm-offsets.h))
+endif
+endif
-# Check gcc and binutils versions:
-# - gcc-3.4 and binutils-2.14 are a fatal combination
-# - Require gcc 4.0 or above on 64-bit
-# - gcc-4.2.0 has issues compiling modules on 64-bit
+# Check toolchain versions:
+# - gcc-4.6 is the minimum kernel-wide version so nothing required.
checkbin:
- @if test "$(cc-name)" != "clang" \
- && test "$(cc-version)" = "0304" ; then \
- if ! /bin/echo mftb 5 | $(AS) -v -mppc -many -o $(TOUT) >/dev/null 2>&1 ; then \
- echo -n '*** ${VERSION}.${PATCHLEVEL} kernels no longer build '; \
- echo 'correctly with gcc-3.4 and your version of binutils.'; \
- echo '*** Please upgrade your binutils or downgrade your gcc'; \
- false; \
- fi ; \
- fi
- @if test "$(cc-name)" != "clang" \
- && test "$(cc-version)" -lt "0400" \
- && test "x${CONFIG_PPC64}" = "xy" ; then \
- echo -n "Sorry, GCC v4.0 or above is required to build " ; \
- echo "the 64-bit powerpc kernel." ; \
- false ; \
- fi
- @if test "$(cc-name)" != "clang" \
- && test "$(cc-fullversion)" = "040200" \
- && test "x${CONFIG_MODULES}${CONFIG_PPC64}" = "xyy" ; then \
- echo -n '*** GCC-4.2.0 cannot compile the 64-bit powerpc ' ; \
- echo 'kernel with modules enabled.' ; \
- echo -n '*** Please use a different GCC version or ' ; \
- echo 'disable kernel modules' ; \
- false ; \
- fi
@if test "x${CONFIG_CPU_LITTLE_ENDIAN}" = "xy" \
&& $(LD) --version | head -1 | grep ' 2\.24$$' >/dev/null ; then \
echo -n '*** binutils 2.24 miscompiles weak symbols ' ; \
echo -n '*** Please use a different binutils version.' ; \
false ; \
fi
-
-
-CLEAN_FILES += $(TOUT)
-
fdt_wip.c
libfdt.h
libfdt_internal.h
+autoconf.h
endif
BOOTCFLAGS := -Wall -Wundef -Wstrict-prototypes -Wno-trigraphs \
- -fno-strict-aliasing -Os -msoft-float -pipe \
- -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
+ -fno-strict-aliasing -O2 -msoft-float -mno-altivec -mno-vsx \
+ -pipe -fomit-frame-pointer -fno-builtin -fPIC -nostdinc \
-D$(compress-y)
ifdef CONFIG_PPC64_BOOT_WRAPPER
$(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds : $(obj)/%: $(srctree)/$(src)/%.S
$(Q)cp $< $@
+$(obj)/serial.c: $(obj)/autoconf.h
+
+$(obj)/autoconf.h: $(obj)/%: $(objtree)/include/generated/%
+ $(Q)cp $< $@
+
clean-files := $(zlib-) $(zlibheader-) $(zliblinuxheader-) \
$(zlib-decomp-) $(libfdt) $(libfdtheader) \
- empty.c zImage.coff.lds zImage.ps3.lds zImage.lds
+ autoconf.h empty.c zImage.coff.lds zImage.ps3.lds zImage.lds
quiet_cmd_bootcc = BOOTCC $@
cmd_bootcc = $(BOOTCC) -Wp,-MD,$(depfile) $(BOOTCFLAGS) -c -o $@ $<
dtbImage.adder875-redboot
# Board ports in arch/powerpc/platform/52xx/Kconfig
-image-$(CONFIG_PPC_LITE5200) += cuImage.lite5200 lite5200.dtb
-image-$(CONFIG_PPC_LITE5200) += cuImage.lite5200b lite5200b.dtb
-image-$(CONFIG_PPC_MEDIA5200) += cuImage.media5200 media5200.dtb
+image-$(CONFIG_PPC_LITE5200) += cuImage.lite5200
+image-$(CONFIG_PPC_LITE5200) += cuImage.lite5200b
+image-$(CONFIG_PPC_MEDIA5200) += cuImage.media5200
# Board ports in arch/powerpc/platform/82xx/Kconfig
image-$(CONFIG_MPC8272_ADS) += cuImage.mpc8272ads
$(call if_changed,wrap,$(subst $(obj)/zImage.,,$@))
# dtbImage% - a dtbImage is a zImage with an embedded device tree blob
-$(obj)/dtbImage.initrd.%: vmlinux $(wrapperbits) $(obj)/%.dtb FORCE
- $(call if_changed,wrap,$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
+$(obj)/dtbImage.initrd.%: vmlinux $(wrapperbits) $(obj)/dts/%.dtb FORCE
+ $(call if_changed,wrap,$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
-$(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/%.dtb FORCE
- $(call if_changed,wrap,$*,,$(obj)/$*.dtb)
+$(obj)/dtbImage.%: vmlinux $(wrapperbits) $(obj)/dts/%.dtb FORCE
+ $(call if_changed,wrap,$*,,$(obj)/dts/$*.dtb)
# This cannot be in the root of $(src) as the zImage rule always adds a $(obj)
# prefix
$(obj)/uImage: vmlinux $(wrapperbits) FORCE
$(call if_changed,wrap,uboot)
-$(obj)/uImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
- $(call if_changed,wrap,uboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
-
-$(obj)/uImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
- $(call if_changed,wrap,uboot-$*,,$(obj)/$*.dtb)
+$(obj)/uImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+ $(call if_changed,wrap,uboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
-$(obj)/cuImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
- $(call if_changed,wrap,cuboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
+$(obj)/uImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+ $(call if_changed,wrap,uboot-$*,,$(obj)/dts/$*.dtb)
-$(obj)/cuImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
- $(call if_changed,wrap,cuboot-$*,,$(obj)/$*.dtb)
+$(obj)/cuImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+ $(call if_changed,wrap,cuboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
-$(obj)/simpleImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
- $(call if_changed,wrap,simpleboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
+$(obj)/cuImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+ $(call if_changed,wrap,cuboot-$*,,$(obj)/dts/$*.dtb)
-$(obj)/simpleImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
- $(call if_changed,wrap,simpleboot-$*,,$(obj)/$*.dtb)
+$(obj)/simpleImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+ $(call if_changed,wrap,simpleboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
-$(obj)/treeImage.initrd.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
- $(call if_changed,wrap,treeboot-$*,,$(obj)/$*.dtb,$(obj)/ramdisk.image.gz)
+$(obj)/simpleImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+ $(call if_changed,wrap,simpleboot-$*,,$(obj)/dts/$*.dtb)
-$(obj)/treeImage.%: vmlinux $(obj)/%.dtb $(wrapperbits) FORCE
- $(call if_changed,wrap,treeboot-$*,,$(obj)/$*.dtb)
+$(obj)/treeImage.initrd.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+ $(call if_changed,wrap,treeboot-$*,,$(obj)/dts/$*.dtb,$(obj)/ramdisk.image.gz)
-# Rule to build device tree blobs
-$(obj)/%.dtb: $(src)/dts/%.dts FORCE
- $(call if_changed_dep,dtc)
+$(obj)/treeImage.%: vmlinux $(obj)/dts/%.dtb $(wrapperbits) FORCE
+ $(call if_changed,wrap,treeboot-$*,,$(obj)/dts/$*.dtb)
-$(obj)/%.dtb: $(src)/dts/fsl/%.dts FORCE
- $(call if_changed_dep,dtc)
+# Needed for the above targets to work with dts/fsl/ files
+$(obj)/dts/%.dtb: $(obj)/dts/fsl/%.dtb
+ @cp $< $@
# If there isn't a platform selected then just strip the vmlinux.
ifeq (,$(image-y))
p_pstack: .long _platform_stack_top
#endif
- .weak _zimage_start
.globl _zimage_start
+ /* Clang appears to require the .weak directive to be after the symbol
+ * is defined. See https://bugs.llvm.org/show_bug.cgi?id=38921 */
+ .weak _zimage_start
_zimage_start:
.globl _zimage_start_lib
_zimage_start_lib:
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
+
+subdir-y += fsl
+
+dtstree := $(srctree)/$(src)
+dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts))
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
+
+dtstree := $(srctree)/$(src)
+dtb-$(CONFIG_OF_ALL_DTBS) := $(patsubst $(dtstree)/%.dts,%.dtb, $(wildcard $(dtstree)/*.dts))
#include <types.h>
#include <string.h>
+#define INT_MAX ((int)(~0U>>1))
+
#include "of.h"
typedef unsigned long uintptr_t;
#include <libfdt.h>
#include "../include/asm/opal-api.h"
-#ifdef CONFIG_PPC64_BOOT_WRAPPER
-
/* Global OPAL struct used by opal-call.S */
struct opal {
u64 base;
return 0;
}
-#else
-int opal_console_init(void *devp, struct serial_console_data *scdp)
-{
- return -1;
-}
-#endif /* __powerpc64__ */
#include "stdio.h"
#include "io.h"
#include "ops.h"
+#include "autoconf.h"
static int serial_open(void)
{
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
# CONFIG_CRYPTO_HW is not set
+CONFIG_PRINTK_TIME=y
CONFIG_CRYPTO_ECB=m
CONFIG_CRYPTO_PCBC=m
# CONFIG_CRYPTO_HW is not set
+CONFIG_PRINTK_TIME=y
# CONFIG_PPC_PSERIES is not set
# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
CONFIG_CPU_IDLE=y
CONFIG_HZ_100=y
CONFIG_BINFMT_MISC=m
CONFIG_KVM_BOOK3S_64=m
CONFIG_KVM_BOOK3S_64_HV=m
CONFIG_VHOST_NET=m
+CONFIG_PRINTK_TIME=y
CONFIG_PPC_IBM_CELL_BLADE=y
CONFIG_RTAS_FLASH=m
CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_POWERSAVE=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
CONFIG_CPU_FREQ_PMAC64=y
CONFIG_HZ_100=y
CONFIG_BINFMT_MISC=m
CONFIG_KVM_BOOK3S_64=m
CONFIG_KVM_BOOK3S_64_HV=m
CONFIG_VHOST_NET=m
+CONFIG_PRINTK_TIME=y
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_LZO=m
+CONFIG_PRINTK_TIME=y
CONFIG_KVM_BOOK3S_64=m
CONFIG_KVM_BOOK3S_64_HV=m
CONFIG_VHOST_NET=m
+CONFIG_PRINTK_TIME=y
CONFIG_VSX=y
CONFIG_NR_CPUS=2048
CONFIG_CPU_LITTLE_ENDIAN=y
+CONFIG_KERNEL_XZ=y
# CONFIG_SWAP is not set
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
# CONFIG_CROSS_MEMORY_ATTACH is not set
CONFIG_NO_HZ=y
CONFIG_HIGH_RES_TIMERS=y
-CONFIG_TASKSTATS=y
-CONFIG_TASK_DELAY_ACCT=y
-CONFIG_TASK_XACCT=y
-CONFIG_TASK_IO_ACCOUNTING=y
+# CONFIG_CPU_ISOLATION is not set
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_LOG_BUF_SHIFT=20
-CONFIG_RELAY=y
CONFIG_BLK_DEV_INITRD=y
# CONFIG_RD_GZIP is not set
# CONFIG_RD_BZIP2 is not set
# CONFIG_RD_LZO is not set
# CONFIG_RD_LZ4 is not set
CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+CONFIG_EXPERT=y
+# CONFIG_SGETMASK_SYSCALL is not set
+# CONFIG_SYSFS_SYSCALL is not set
+# CONFIG_SHMEM is not set
+# CONFIG_AIO is not set
CONFIG_PERF_EVENTS=y
# CONFIG_COMPAT_BRK is not set
+CONFIG_SLAB_FREELIST_HARDENED=y
CONFIG_JUMP_LABEL=y
CONFIG_STRICT_KERNEL_RWX=y
CONFIG_MODULES=y
CONFIG_MODULE_SIG_SHA512=y
CONFIG_PARTITION_ADVANCED=y
# CONFIG_IOSCHED_DEADLINE is not set
+# CONFIG_PPC_VAS is not set
# CONFIG_PPC_PSERIES is not set
+# CONFIG_PPC_OF_BOOT_TRAMPOLINE is not set
CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
CONFIG_CPU_IDLE=y
CONFIG_HZ_100=y
CONFIG_PPC_64K_PAGES=y
CONFIG_SCHED_SMT=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=tty0 console=hvc0 powersave=off"
+CONFIG_CMDLINE="console=tty0 console=hvc0 ipr.fast_reboot=1 quiet"
# CONFIG_SECCOMP is not set
+# CONFIG_PPC_MEM_KEYS is not set
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
# CONFIG_INET_XFRM_MODE_TUNNEL is not set
# CONFIG_INET_XFRM_MODE_BEET is not set
-# CONFIG_IPV6 is not set
CONFIG_DNS_RESOLVER=y
# CONFIG_WIRELESS is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_BLK_DEV_RAM_SIZE=65536
CONFIG_VIRTIO_BLK=m
CONFIG_BLK_DEV_NVME=m
-CONFIG_EEPROM_AT24=y
+CONFIG_NVME_MULTIPATH=y
+CONFIG_EEPROM_AT24=m
# CONFIG_CXL is not set
+# CONFIG_OCXL is not set
CONFIG_BLK_DEV_SD=m
CONFIG_BLK_DEV_SR=m
CONFIG_BLK_DEV_SR_VENDOR=y
CONFIG_SCSI_CXGB3_ISCSI=m
CONFIG_SCSI_CXGB4_ISCSI=m
CONFIG_SCSI_BNX2_ISCSI=m
-CONFIG_BE2ISCSI=m
CONFIG_SCSI_AACRAID=m
CONFIG_MEGARAID_NEWGEN=y
CONFIG_MEGARAID_MM=m
CONFIG_SCSI_DH=y
CONFIG_SCSI_DH_ALUA=m
CONFIG_ATA=y
-CONFIG_SATA_AHCI=y
+CONFIG_SATA_AHCI=m
# CONFIG_ATA_SFF is not set
CONFIG_MD=y
CONFIG_BLK_DEV_MD=m
CONFIG_DM_MIRROR=m
CONFIG_DM_ZERO=m
CONFIG_DM_MULTIPATH=m
+# CONFIG_NET_VENDOR_3COM is not set
+# CONFIG_NET_VENDOR_ADAPTEC is not set
+# CONFIG_NET_VENDOR_AGERE is not set
+# CONFIG_NET_VENDOR_ALACRITECH is not set
CONFIG_ACENIC=m
CONFIG_ACENIC_OMIT_TIGON_I=y
-CONFIG_TIGON3=y
+# CONFIG_NET_VENDOR_AMAZON is not set
+# CONFIG_NET_VENDOR_AMD is not set
+# CONFIG_NET_VENDOR_AQUANTIA is not set
+# CONFIG_NET_VENDOR_ARC is not set
+# CONFIG_NET_VENDOR_ATHEROS is not set
+CONFIG_TIGON3=m
CONFIG_BNX2X=m
-CONFIG_CHELSIO_T1=y
+# CONFIG_NET_VENDOR_BROCADE is not set
+# CONFIG_NET_CADENCE is not set
+# CONFIG_NET_VENDOR_CAVIUM is not set
+CONFIG_CHELSIO_T1=m
+# CONFIG_NET_VENDOR_CISCO is not set
+# CONFIG_NET_VENDOR_CORTINA is not set
+# CONFIG_NET_VENDOR_DEC is not set
+# CONFIG_NET_VENDOR_DLINK is not set
CONFIG_BE2NET=m
-CONFIG_S2IO=m
-CONFIG_E100=m
+# CONFIG_NET_VENDOR_EZCHIP is not set
+# CONFIG_NET_VENDOR_HP is not set
+# CONFIG_NET_VENDOR_HUAWEI is not set
CONFIG_E1000=m
-CONFIG_E1000E=m
+CONFIG_IGB=m
CONFIG_IXGB=m
CONFIG_IXGBE=m
+CONFIG_I40E=m
+CONFIG_S2IO=m
+# CONFIG_NET_VENDOR_MARVELL is not set
CONFIG_MLX4_EN=m
+# CONFIG_MLX4_CORE_GEN2 is not set
CONFIG_MLX5_CORE=m
-CONFIG_MLX5_CORE_EN=y
+# CONFIG_NET_VENDOR_MICREL is not set
CONFIG_MYRI10GE=m
+# CONFIG_NET_VENDOR_NATSEMI is not set
+# CONFIG_NET_VENDOR_NETRONOME is not set
+# CONFIG_NET_VENDOR_NI is not set
+# CONFIG_NET_VENDOR_NVIDIA is not set
+# CONFIG_NET_VENDOR_OKI is not set
+# CONFIG_NET_PACKET_ENGINE is not set
CONFIG_QLGE=m
CONFIG_NETXEN_NIC=m
+# CONFIG_NET_VENDOR_QUALCOMM is not set
+# CONFIG_NET_VENDOR_RDC is not set
+# CONFIG_NET_VENDOR_REALTEK is not set
+# CONFIG_NET_VENDOR_RENESAS is not set
+# CONFIG_NET_VENDOR_ROCKER is not set
+# CONFIG_NET_VENDOR_SAMSUNG is not set
+# CONFIG_NET_VENDOR_SEEQ is not set
CONFIG_SFC=m
+# CONFIG_NET_VENDOR_SILAN is not set
+# CONFIG_NET_VENDOR_SIS is not set
+# CONFIG_NET_VENDOR_SMSC is not set
+# CONFIG_NET_VENDOR_SOCIONEXT is not set
+# CONFIG_NET_VENDOR_STMICRO is not set
+# CONFIG_NET_VENDOR_SUN is not set
+# CONFIG_NET_VENDOR_SYNOPSYS is not set
+# CONFIG_NET_VENDOR_TEHUTI is not set
+# CONFIG_NET_VENDOR_TI is not set
+# CONFIG_NET_VENDOR_VIA is not set
+# CONFIG_NET_VENDOR_WIZNET is not set
+# CONFIG_NET_VENDOR_XILINX is not set
+CONFIG_PHYLIB=y
# CONFIG_USB_NET_DRIVERS is not set
# CONFIG_WLAN is not set
CONFIG_INPUT_EVDEV=y
CONFIG_IPMI_HANDLER=y
CONFIG_IPMI_DEVICE_INTERFACE=y
CONFIG_IPMI_POWERNV=y
+CONFIG_IPMI_WATCHDOG=y
CONFIG_HW_RANDOM=y
+CONFIG_TCG_TPM=y
CONFIG_TCG_TIS_I2C_NUVOTON=y
+CONFIG_I2C=y
# CONFIG_I2C_COMPAT is not set
CONFIG_I2C_CHARDEV=y
# CONFIG_I2C_HELPER_AUTO is not set
-CONFIG_DRM=y
-CONFIG_DRM_RADEON=y
+CONFIG_I2C_ALGOBIT=y
+CONFIG_I2C_OPAL=m
+CONFIG_PPS=y
+CONFIG_SENSORS_IBMPOWERNV=m
+CONFIG_DRM=m
CONFIG_DRM_AST=m
+CONFIG_FB=y
CONFIG_FIRMWARE_EDID=y
-CONFIG_FB_MODE_HELPERS=y
-CONFIG_FB_OF=y
-CONFIG_FB_MATROX=y
-CONFIG_FB_MATROX_MILLENIUM=y
-CONFIG_FB_MATROX_MYSTIQUE=y
-CONFIG_FB_MATROX_G=y
-# CONFIG_LCD_CLASS_DEVICE is not set
-# CONFIG_BACKLIGHT_GENERIC is not set
# CONFIG_VGA_CONSOLE is not set
+CONFIG_FRAMEBUFFER_CONSOLE=y
CONFIG_LOGO=y
# CONFIG_LOGO_LINUX_MONO is not set
# CONFIG_LOGO_LINUX_VGA16 is not set
+CONFIG_HID_GENERIC=m
+CONFIG_HID_A4TECH=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CHICONY=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_ITE=y
+CONFIG_HID_KENSINGTON=y
+CONFIG_HID_LOGITECH=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
CONFIG_USB_HIDDEV=y
-CONFIG_USB=y
-CONFIG_USB_MON=y
-CONFIG_USB_XHCI_HCD=y
-CONFIG_USB_EHCI_HCD=y
+CONFIG_USB=m
+CONFIG_USB_XHCI_HCD=m
+CONFIG_USB_EHCI_HCD=m
# CONFIG_USB_EHCI_HCD_PPC_OF is not set
-CONFIG_USB_OHCI_HCD=y
-CONFIG_USB_STORAGE=y
+CONFIG_USB_OHCI_HCD=m
+CONFIG_USB_STORAGE=m
CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_OPAL=m
CONFIG_RTC_DRV_GENERIC=m
CONFIG_VIRT_DRIVERS=y
-CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_PCI=m
# CONFIG_IOMMU_SUPPORT is not set
CONFIG_EXT4_FS=m
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_MSDOS_FS=m
CONFIG_VFAT_FS=m
CONFIG_PROC_KCORE=y
-CONFIG_TMPFS=y
-CONFIG_TMPFS_POSIX_ACL=y
# CONFIG_MISC_FILESYSTEMS is not set
# CONFIG_NETWORK_FILESYSTEMS is not set
+CONFIG_NLS=y
CONFIG_NLS_DEFAULT="utf8"
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ASCII=y
CONFIG_CRC16=y
CONFIG_CRC_ITU_T=y
CONFIG_LIBCRC32C=y
+# CONFIG_XZ_DEC_X86 is not set
+# CONFIG_XZ_DEC_IA64 is not set
+# CONFIG_XZ_DEC_ARM is not set
+# CONFIG_XZ_DEC_ARMTHUMB is not set
+# CONFIG_XZ_DEC_SPARC is not set
CONFIG_PRINTK_TIME=y
CONFIG_MAGIC_SYSRQ=y
-CONFIG_DEBUG_KERNEL=y
CONFIG_DEBUG_STACKOVERFLOW=y
CONFIG_SOFTLOCKUP_DETECTOR=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
CONFIG_HARDLOCKUP_DETECTOR=y
CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
-CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
CONFIG_WQ_WATCHDOG=y
-CONFIG_SCHEDSTATS=y
+# CONFIG_SCHED_DEBUG is not set
# CONFIG_FTRACE is not set
+# CONFIG_RUNTIME_TESTING_MENU is not set
CONFIG_XMON=y
CONFIG_XMON_DEFAULT=y
-CONFIG_SECURITY=y
-CONFIG_IMA=y
-CONFIG_EVM=y
+CONFIG_ENCRYPTED_KEYS=y
# CONFIG_CRYPTO_ECHAINIV is not set
-CONFIG_CRYPTO_ECB=y
-CONFIG_CRYPTO_CMAC=y
-CONFIG_CRYPTO_MD4=y
-CONFIG_CRYPTO_ARC4=y
-CONFIG_CRYPTO_DES=y
# CONFIG_CRYPTO_HW is not set
/* Accumulated cputime values to flush on ticks*/
unsigned long utime;
unsigned long stime;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
unsigned long utime_scaled;
unsigned long stime_scaled;
+#endif
unsigned long gtime;
unsigned long hardirq_time;
unsigned long softirq_time;
/* Internal counters */
unsigned long starttime; /* TB value snapshot */
unsigned long starttime_user; /* TB value on exit to usermode */
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
unsigned long startspurr; /* SPURR value snapshot */
unsigned long utime_sspurr; /* ->user_time when ->startspurr set */
+#endif
};
#endif
void alignment_exception(struct pt_regs *regs);
void slb_miss_bad_addr(struct pt_regs *regs);
void StackOverflow(struct pt_regs *regs);
-void nonrecoverable_exception(struct pt_regs *regs);
void kernel_fp_unavailable_exception(struct pt_regs *regs);
void altivec_unavailable_exception(struct pt_regs *regs);
void vsx_unavailable_exception(struct pt_regs *regs);
void system_reset_exception(struct pt_regs *regs);
void machine_check_exception(struct pt_regs *regs);
void emulation_assist_interrupt(struct pt_regs *regs);
+long do_slb_fault(struct pt_regs *regs, unsigned long ea);
+void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err);
/* signals, syscalls and interrupts */
long sys_swapcontext(struct ucontext __user *old_ctx,
#include <asm/book3s/32/hash.h>
/* And here we include common definitions */
-#include <asm/pte-common.h>
+
+#define _PAGE_KERNEL_RO 0
+#define _PAGE_KERNEL_ROX 0
+#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW)
+#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW)
+
+#define _PAGE_HPTEFLAGS _PAGE_HASHPTE
+
+#ifndef __ASSEMBLY__
+
+static inline bool pte_user(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_USER;
+}
+#endif /* __ASSEMBLY__ */
+
+/*
+ * Location of the PFN in the PTE. Most 32-bit platforms use the same
+ * as _PAGE_SHIFT here (ie, naturally aligned).
+ * Platform who don't just pre-define the value so we don't override it here.
+ */
+#define PTE_RPN_SHIFT (PAGE_SHIFT)
+
+/*
+ * The mask covered by the RPN must be a ULL on 32-bit platforms with
+ * 64-bit PTEs.
+ */
+#ifdef CONFIG_PTE_64BIT
+#define PTE_RPN_MASK (~((1ULL << PTE_RPN_SHIFT) - 1))
+#else
+#define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1))
+#endif
+
+/*
+ * _PAGE_CHG_MASK masks of bits that are to be preserved across
+ * pgprot changes.
+ */
+#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HASHPTE | _PAGE_DIRTY | \
+ _PAGE_ACCESSED | _PAGE_SPECIAL)
+
+/*
+ * We define 2 sets of base prot bits, one for basic pages (ie,
+ * cacheable kernel and user pages) and one for non cacheable
+ * pages. We always set _PAGE_COHERENT when SMP is enabled or
+ * the processor might need it for DMA coherency.
+ */
+#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED)
+#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT)
+
+/*
+ * Permission masks used to generate the __P and __S table.
+ *
+ * Note:__pgprot is defined in arch/powerpc/include/asm/page.h
+ *
+ * Write permissions imply read permissions for now.
+ */
+#define PAGE_NONE __pgprot(_PAGE_BASE)
+#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER)
+
+/* Permission masks used for kernel mappings */
+#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
+#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE)
+#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
+ _PAGE_NO_CACHE | _PAGE_GUARDED)
+#define PAGE_KERNEL_X __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
+#define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
+#define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
+
+/*
+ * Protection used for kernel text. We want the debuggers to be able to
+ * set breakpoints anywhere, so don't write protect the kernel text
+ * on platforms where such control is possible.
+ */
+#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\
+ defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
+#define PAGE_KERNEL_TEXT PAGE_KERNEL_X
+#else
+#define PAGE_KERNEL_TEXT PAGE_KERNEL_ROX
+#endif
+
+/* Make modules code happy. We don't set RO yet */
+#define PAGE_KERNEL_EXEC PAGE_KERNEL_X
+
+/* Advertise special mapping type for AGP */
+#define PAGE_AGP (PAGE_KERNEL_NC)
+#define HAVE_PAGE_AGP
#define PTE_INDEX_SIZE PTE_SHIFT
#define PMD_INDEX_SIZE 0
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
- pte_update(ptep, (_PAGE_RW | _PAGE_HWWRITE), _PAGE_RO);
+ pte_update(ptep, _PAGE_RW, 0);
}
static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
int psize)
{
unsigned long set = pte_val(entry) &
- (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
- unsigned long clr = ~pte_val(entry) & _PAGE_RO;
+ (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW);
- pte_update(ptep, clr, set);
+ pte_update(ptep, 0, set);
flush_tlb_page(vma, address);
}
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 })
-int map_kernel_page(unsigned long va, phys_addr_t pa, int flags);
+int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
/* Generic accessors to PTE bits */
static inline int pte_write(pte_t pte) { return !!(pte_val(pte) & _PAGE_RW);}
static inline int pte_young(pte_t pte) { return !!(pte_val(pte) & _PAGE_ACCESSED); }
static inline int pte_special(pte_t pte) { return !!(pte_val(pte) & _PAGE_SPECIAL); }
static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
-static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
+static inline bool pte_exec(pte_t pte) { return true; }
static inline int pte_present(pte_t pte)
{
return pte_val(pte) & _PAGE_PRESENT;
}
+static inline bool pte_hw_valid(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_PRESENT;
+}
+
+static inline bool pte_hashpte(pte_t pte)
+{
+ return !!(pte_val(pte) & _PAGE_HASHPTE);
+}
+
+static inline bool pte_ci(pte_t pte)
+{
+ return !!(pte_val(pte) & _PAGE_NO_CACHE);
+}
+
/*
* We only find page table entry in the last level
* Hence no need for other accessors
#define pte_access_permitted pte_access_permitted
static inline bool pte_access_permitted(pte_t pte, bool write)
{
- unsigned long pteval = pte_val(pte);
/*
* A read-only access is controlled by _PAGE_USER bit.
* We have _PAGE_READ set for WRITE and EXECUTE
*/
- unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_USER;
-
- if (write)
- need_pte_bits |= _PAGE_WRITE;
+ if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte))
+ return false;
- if ((pteval & need_pte_bits) != need_pte_bits)
+ if (write && !pte_write(pte))
return false;
return true;
return __pte(pte_val(pte) & ~_PAGE_RW);
}
+static inline pte_t pte_exprotect(pte_t pte)
+{
+ return pte;
+}
+
static inline pte_t pte_mkclean(pte_t pte)
{
return __pte(pte_val(pte) & ~_PAGE_DIRTY);
return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
}
+static inline pte_t pte_mkexec(pte_t pte)
+{
+ return pte;
+}
+
+static inline pte_t pte_mkpte(pte_t pte)
+{
+ return pte;
+}
+
static inline pte_t pte_mkwrite(pte_t pte)
{
return __pte(pte_val(pte) | _PAGE_RW);
return pte;
}
+static inline pte_t pte_mkprivileged(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_USER);
+}
+
+static inline pte_t pte_mkuser(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_USER);
+}
+
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
* if it is not a pte and have hugepd shift mask
* set, then it is a hugepd directory pointer
*/
- if (!(hpdval & _PAGE_PTE) &&
+ if (!(hpdval & _PAGE_PTE) && (hpdval & _PAGE_PRESENT) &&
((hpdval & HUGEPD_SHIFT_MASK) != 0))
return true;
return false;
#include <asm/book3s/64/hash-4k.h>
#endif
+/* Bits to set in a PMD/PUD/PGD entry valid bit*/
+#define HASH_PMD_VAL_BITS (0x8000000000000000UL)
+#define HASH_PUD_VAL_BITS (0x8000000000000000UL)
+#define HASH_PGD_VAL_BITS (0x8000000000000000UL)
+
/*
* Size of EA range mapped by our pagetables.
*/
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-extern int hash__map_kernel_page(unsigned long ea, unsigned long pa,
- unsigned long flags);
+int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot);
extern int __meminit hash__vmemmap_create_mapping(unsigned long start,
unsigned long page_size,
unsigned long phys);
}
#endif
+/* hugepd entry valid bit */
+#define HUGEPD_VAL_BITS (0x8000000000000000UL)
+
#endif
* SLB
*/
-#define SLB_NUM_BOLTED 3
+#define SLB_NUM_BOLTED 2
#define SLB_CACHE_ENTRIES 8
#define SLB_MIN_SIZE 32
extern void pseries_add_gpage(u64 addr, u64 page_size, unsigned long number_of_pages);
extern void demote_segment_4k(struct mm_struct *mm, unsigned long addr);
+extern void hash__setup_new_exec(void);
+
#ifdef CONFIG_PPC_PSERIES
void hpte_init_pseries(void);
#else
extern void hpte_init_native(void);
+struct slb_entry {
+ u64 esid;
+ u64 vsid;
+};
+
extern void slb_initialize(void);
-extern void slb_flush_and_rebolt(void);
+void slb_flush_and_restore_bolted(void);
void slb_flush_all_realmode(void);
void __slb_restore_bolted_realmode(void);
void slb_restore_bolted_realmode(void);
+void slb_save_contents(struct slb_entry *slb_ptr);
+void slb_dump_contents(struct slb_entry *slb_ptr);
extern void slb_vmalloc_update(void);
extern void slb_set_size(u16 size);
* from mmu context id and effective segment id of the address.
*
* For user processes max context id is limited to MAX_USER_CONTEXT.
-
- * For kernel space, we use context ids 1-4 to map addresses as below:
- * NOTE: each context only support 64TB now.
- * 0x00001 - [ 0xc000000000000000 - 0xc0003fffffffffff ]
- * 0x00002 - [ 0xd000000000000000 - 0xd0003fffffffffff ]
- * 0x00003 - [ 0xe000000000000000 - 0xe0003fffffffffff ]
- * 0x00004 - [ 0xf000000000000000 - 0xf0003fffffffffff ]
+ * more details in get_user_context
+ *
+ * For kernel space get_kernel_context
*
* The proto-VSIDs are then scrambled into real VSIDs with the
* multiplicative hash:
#define ESID_BITS_MASK ((1 << ESID_BITS) - 1)
#define ESID_BITS_1T_MASK ((1 << ESID_BITS_1T) - 1)
+/*
+ * Now certain config support MAX_PHYSMEM more than 512TB. Hence we will need
+ * to use more than one context for linear mapping the kernel.
+ * For vmalloc and memmap, we use just one context with 512TB. With 64 byte
+ * struct page size, we need ony 32 TB in memmap for 2PB (51 bits (MAX_PHYSMEM_BITS)).
+ */
+#if (MAX_PHYSMEM_BITS > MAX_EA_BITS_PER_CONTEXT)
+#define MAX_KERNEL_CTX_CNT (1UL << (MAX_PHYSMEM_BITS - MAX_EA_BITS_PER_CONTEXT))
+#else
+#define MAX_KERNEL_CTX_CNT 1
+#endif
+
+#define MAX_VMALLOC_CTX_CNT 1
+#define MAX_MEMMAP_CTX_CNT 1
+
/*
* 256MB segment
* The proto-VSID space has 2^(CONTEX_BITS + ESID_BITS) - 1 segments
* We also need to avoid the last segment of the last context, because that
* would give a protovsid of 0x1fffffffff. That will result in a VSID 0
* because of the modulo operation in vsid scramble.
+ *
+ * We add one extra context to MIN_USER_CONTEXT so that we can map kernel
+ * context easily. The +1 is to map the unused 0xe region mapping.
*/
#define MAX_USER_CONTEXT ((ASM_CONST(1) << CONTEXT_BITS) - 2)
-#define MIN_USER_CONTEXT (5)
-
-/* Would be nice to use KERNEL_REGION_ID here */
-#define KERNEL_REGION_CONTEXT_OFFSET (0xc - 1)
+#define MIN_USER_CONTEXT (MAX_KERNEL_CTX_CNT + MAX_VMALLOC_CTX_CNT + \
+ MAX_MEMMAP_CTX_CNT + 2)
/*
* For platforms that support on 65bit VA we limit the context bits
return vsid_scramble(protovsid, VSID_MULTIPLIER_1T, vsid_bits);
}
+/*
+ * For kernel space, we use context ids as below
+ * below. Range is 512TB per context.
+ *
+ * 0x00001 - [ 0xc000000000000000 - 0xc001ffffffffffff]
+ * 0x00002 - [ 0xc002000000000000 - 0xc003ffffffffffff]
+ * 0x00003 - [ 0xc004000000000000 - 0xc005ffffffffffff]
+ * 0x00004 - [ 0xc006000000000000 - 0xc007ffffffffffff]
+
+ * 0x00005 - [ 0xd000000000000000 - 0xd001ffffffffffff ]
+ * 0x00006 - Not used - Can map 0xe000000000000000 range.
+ * 0x00007 - [ 0xf000000000000000 - 0xf001ffffffffffff ]
+ *
+ * So we can compute the context from the region (top nibble) by
+ * subtracting 11, or 0xc - 1.
+ */
+static inline unsigned long get_kernel_context(unsigned long ea)
+{
+ unsigned long region_id = REGION_ID(ea);
+ unsigned long ctx;
+ /*
+ * For linear mapping we do support multiple context
+ */
+ if (region_id == KERNEL_REGION_ID) {
+ /*
+ * We already verified ea to be not beyond the addr limit.
+ */
+ ctx = 1 + ((ea & ~REGION_MASK) >> MAX_EA_BITS_PER_CONTEXT);
+ } else
+ ctx = (region_id - 0xc) + MAX_KERNEL_CTX_CNT;
+ return ctx;
+}
+
/*
* This is only valid for addresses >= PAGE_OFFSET
*/
if (!is_kernel_addr(ea))
return 0;
- /*
- * For kernel space, we use context ids 1-4 to map the address space as
- * below:
- *
- * 0x00001 - [ 0xc000000000000000 - 0xc0003fffffffffff ]
- * 0x00002 - [ 0xd000000000000000 - 0xd0003fffffffffff ]
- * 0x00003 - [ 0xe000000000000000 - 0xe0003fffffffffff ]
- * 0x00004 - [ 0xf000000000000000 - 0xf0003fffffffffff ]
- *
- * So we can compute the context from the region (top nibble) by
- * subtracting 11, or 0xc - 1.
- */
- context = (ea >> 60) - KERNEL_REGION_CONTEXT_OFFSET;
-
+ context = get_kernel_context(ea);
return get_vsid(context, ea, ssize);
}
static inline void radix_init_pseries(void) { };
#endif
-static inline int get_ea_context(mm_context_t *ctx, unsigned long ea)
+static inline int get_user_context(mm_context_t *ctx, unsigned long ea)
{
int index = ea >> MAX_EA_BITS_PER_CONTEXT;
static inline unsigned long get_user_vsid(mm_context_t *ctx,
unsigned long ea, int ssize)
{
- unsigned long context = get_ea_context(ctx, ea);
+ unsigned long context = get_user_context(ctx, ea);
return get_vsid(context, ea, ssize);
}
*
* Defined in such a way that we can optimize away code block at build time
* if CONFIG_HUGETLB_PAGE=n.
+ *
+ * returns true for pmd migration entries, THP, devmap, hugetlb
+ * But compile time dependent on CONFIG_HUGETLB_PAGE
*/
static inline int pmd_huge(pmd_t pmd)
{
*/
#define _PAGE_BIT_SWAP_TYPE 0
-#define _PAGE_NA 0
-#define _PAGE_RO 0
-#define _PAGE_USER 0
-
#define _PAGE_EXEC 0x00001 /* execute permission */
#define _PAGE_WRITE 0x00002 /* write access allowed */
#define _PAGE_READ 0x00004 /* read access allowed */
#define _PAGE_KERNEL_RO (_PAGE_PRIVILEGED | _PAGE_READ)
#define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | \
_PAGE_RW | _PAGE_EXEC)
-/*
- * No page size encoding in the linux PTE
- */
-#define _PAGE_PSIZE 0
/*
* _PAGE_CHG_MASK masks of bits that are to be preserved across
* pgprot changes
#define H_PTE_PKEY (H_PTE_PKEY_BIT0 | H_PTE_PKEY_BIT1 | H_PTE_PKEY_BIT2 | \
H_PTE_PKEY_BIT3 | H_PTE_PKEY_BIT4)
-/*
- * Mask of bits returned by pte_pgprot()
- */
-#define PAGE_PROT_BITS (_PAGE_SAO | _PAGE_NON_IDEMPOTENT | _PAGE_TOLERANT | \
- H_PAGE_4K_PFN | _PAGE_PRIVILEGED | _PAGE_ACCESSED | \
- _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_EXEC | \
- _PAGE_SOFT_DIRTY | H_PTE_PKEY)
/*
* We define 2 sets of base prot bits, one for basic pages (ie,
* cacheable kernel and user pages) and one for non cacheable
* pages. We always set _PAGE_COHERENT when SMP is enabled or
* the processor might need it for DMA coherency.
*/
-#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
+#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED)
#define _PAGE_BASE (_PAGE_BASE_NC)
/* Permission masks used to generate the __P and __S table,
* Write permissions imply read permissions for now (we could make write-only
* pages on BookE but we don't bother for now). Execute permission control is
* possible on platforms that define _PAGE_EXEC
- *
- * Note due to the way vm flags are laid out, the bits are XWR
*/
#define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_PRIVILEGED)
#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_RW)
#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_READ)
#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_EXEC)
-#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY
-#define __P010 PAGE_COPY
-#define __P011 PAGE_COPY
-#define __P100 PAGE_READONLY_X
-#define __P101 PAGE_READONLY_X
-#define __P110 PAGE_COPY_X
-#define __P111 PAGE_COPY_X
-
-#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY
-#define __S010 PAGE_SHARED
-#define __S011 PAGE_SHARED
-#define __S100 PAGE_READONLY_X
-#define __S101 PAGE_READONLY_X
-#define __S110 PAGE_SHARED_X
-#define __S111 PAGE_SHARED_X
-
/* Permission masks used for kernel mappings */
#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
return !!(pte_raw(pte) & cpu_to_be64(_PAGE_SPECIAL));
}
-static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
+static inline bool pte_exec(pte_t pte)
+{
+ return !!(pte_raw(pte) & cpu_to_be64(_PAGE_EXEC));
+}
+
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
static inline bool pte_soft_dirty(pte_t pte)
static inline pte_t pte_mksoft_dirty(pte_t pte)
{
- return __pte(pte_val(pte) | _PAGE_SOFT_DIRTY);
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SOFT_DIRTY));
}
static inline pte_t pte_clear_soft_dirty(pte_t pte)
{
- return __pte(pte_val(pte) & ~_PAGE_SOFT_DIRTY);
+ return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_SOFT_DIRTY));
}
#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
*/
VM_BUG_ON((pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_RWX | _PAGE_PRIVILEGED)) !=
cpu_to_be64(_PAGE_PRESENT | _PAGE_PRIVILEGED));
- return __pte(pte_val(pte) & ~_PAGE_PRIVILEGED);
+ return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_PRIVILEGED));
}
#define pte_clear_savedwrite pte_clear_savedwrite
* Used by KSM subsystem to make a protnone pte readonly.
*/
VM_BUG_ON(!pte_protnone(pte));
- return __pte(pte_val(pte) | _PAGE_PRIVILEGED);
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PRIVILEGED));
}
#else
#define pte_clear_savedwrite pte_clear_savedwrite
static inline pte_t pte_clear_savedwrite(pte_t pte)
{
VM_WARN_ON(1);
- return __pte(pte_val(pte) & ~_PAGE_WRITE);
+ return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_WRITE));
}
#endif /* CONFIG_NUMA_BALANCING */
return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID));
}
+static inline bool pte_hw_valid(pte_t pte)
+{
+ return !!(pte_raw(pte) & cpu_to_be64(_PAGE_PRESENT));
+}
+
#ifdef CONFIG_PPC_MEM_KEYS
extern bool arch_pte_access_permitted(u64 pte, bool write, bool execute);
#else
}
#endif /* CONFIG_PPC_MEM_KEYS */
+static inline bool pte_user(pte_t pte)
+{
+ return !(pte_raw(pte) & cpu_to_be64(_PAGE_PRIVILEGED));
+}
+
#define pte_access_permitted pte_access_permitted
static inline bool pte_access_permitted(pte_t pte, bool write)
{
- unsigned long pteval = pte_val(pte);
- /* Also check for pte_user */
- unsigned long clear_pte_bits = _PAGE_PRIVILEGED;
/*
* _PAGE_READ is needed for any access and will be
* cleared for PROT_NONE
*/
- unsigned long need_pte_bits = _PAGE_PRESENT | _PAGE_READ;
-
- if (write)
- need_pte_bits |= _PAGE_WRITE;
-
- if ((pteval & need_pte_bits) != need_pte_bits)
+ if (!pte_present(pte) || !pte_user(pte) || !pte_read(pte))
return false;
- if ((pteval & clear_pte_bits) == clear_pte_bits)
+ if (write && !pte_write(pte))
return false;
return arch_pte_access_permitted(pte_val(pte), write, 0);
{
if (unlikely(pte_savedwrite(pte)))
return pte_clear_savedwrite(pte);
- return __pte(pte_val(pte) & ~_PAGE_WRITE);
+ return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_WRITE));
+}
+
+static inline pte_t pte_exprotect(pte_t pte)
+{
+ return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_EXEC));
}
static inline pte_t pte_mkclean(pte_t pte)
{
- return __pte(pte_val(pte) & ~_PAGE_DIRTY);
+ return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_DIRTY));
}
static inline pte_t pte_mkold(pte_t pte)
{
- return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
+ return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_ACCESSED));
+}
+
+static inline pte_t pte_mkexec(pte_t pte)
+{
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_EXEC));
+}
+
+static inline pte_t pte_mkpte(pte_t pte)
+{
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PTE));
}
static inline pte_t pte_mkwrite(pte_t pte)
/*
* write implies read, hence set both
*/
- return __pte(pte_val(pte) | _PAGE_RW);
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_RW));
}
static inline pte_t pte_mkdirty(pte_t pte)
{
- return __pte(pte_val(pte) | _PAGE_DIRTY | _PAGE_SOFT_DIRTY);
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_DIRTY | _PAGE_SOFT_DIRTY));
}
static inline pte_t pte_mkyoung(pte_t pte)
{
- return __pte(pte_val(pte) | _PAGE_ACCESSED);
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_ACCESSED));
}
static inline pte_t pte_mkspecial(pte_t pte)
{
- return __pte(pte_val(pte) | _PAGE_SPECIAL);
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SPECIAL));
}
static inline pte_t pte_mkhuge(pte_t pte)
static inline pte_t pte_mkdevmap(pte_t pte)
{
- return __pte(pte_val(pte) | _PAGE_SPECIAL|_PAGE_DEVMAP);
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SPECIAL | _PAGE_DEVMAP));
+}
+
+static inline pte_t pte_mkprivileged(pte_t pte)
+{
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PRIVILEGED));
+}
+
+static inline pte_t pte_mkuser(pte_t pte)
+{
+ return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_PRIVILEGED));
}
/*
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
/* FIXME!! check whether this need to be a conditional */
- return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
-}
-
-static inline bool pte_user(pte_t pte)
-{
- return !(pte_raw(pte) & cpu_to_be64(_PAGE_PRIVILEGED));
+ return __pte_raw((pte_raw(pte) & cpu_to_be64(_PAGE_CHG_MASK)) |
+ cpu_to_be64(pgprot_val(newprot)));
}
/* Encode and de-code a swap entry */
*/
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) & ~_PAGE_PTE })
#define __swp_entry_to_pte(x) __pte((x).val | _PAGE_PTE)
+#define __pmd_to_swp_entry(pmd) (__pte_to_swp_entry(pmd_pte(pmd)))
+#define __swp_entry_to_pmd(x) (pte_pmd(__swp_entry_to_pte(x)))
#ifdef CONFIG_MEM_SOFT_DIRTY
#define _PAGE_SWP_SOFT_DIRTY (1UL << (SWP_TYPE_BITS + _PAGE_BIT_SWAP_TYPE))
#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY
static inline pte_t pte_swp_mksoft_dirty(pte_t pte)
{
- return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY);
+ return __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_SWP_SOFT_DIRTY));
}
static inline bool pte_swp_soft_dirty(pte_t pte)
static inline pte_t pte_swp_clear_soft_dirty(pte_t pte)
{
- return __pte(pte_val(pte) & ~_PAGE_SWP_SOFT_DIRTY);
+ return __pte_raw(pte_raw(pte) & cpu_to_be64(~_PAGE_SWP_SOFT_DIRTY));
}
#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
*/
static inline bool pte_ci(pte_t pte)
{
- unsigned long pte_v = pte_val(pte);
+ __be64 pte_v = pte_raw(pte);
- if (((pte_v & _PAGE_CACHE_CTL) == _PAGE_TOLERANT) ||
- ((pte_v & _PAGE_CACHE_CTL) == _PAGE_NON_IDEMPOTENT))
+ if (((pte_v & cpu_to_be64(_PAGE_CACHE_CTL)) == cpu_to_be64(_PAGE_TOLERANT)) ||
+ ((pte_v & cpu_to_be64(_PAGE_CACHE_CTL)) == cpu_to_be64(_PAGE_NON_IDEMPOTENT)))
return true;
return false;
}
static inline int pmd_present(pmd_t pmd)
{
+ /*
+ * A pmd is considerent present if _PAGE_PRESENT is set.
+ * We also need to consider the pmd present which is marked
+ * invalid during a split. Hence we look for _PAGE_INVALID
+ * if we find _PAGE_PRESENT cleared.
+ */
+ if (pmd_raw(pmd) & cpu_to_be64(_PAGE_PRESENT | _PAGE_INVALID))
+ return true;
- return !pmd_none(pmd);
+ return false;
}
static inline int pmd_bad(pmd_t pmd)
static inline int pud_present(pud_t pud)
{
- return !pud_none(pud);
+ return (pud_raw(pud) & cpu_to_be64(_PAGE_PRESENT));
}
extern struct page *pud_page(pud_t pud);
static inline int pgd_present(pgd_t pgd)
{
- return !pgd_none(pgd);
+ return (pgd_raw(pgd) & cpu_to_be64(_PAGE_PRESENT));
}
static inline pte_t pgd_pte(pgd_t pgd)
#define pgd_ERROR(e) \
pr_err("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
-static inline int map_kernel_page(unsigned long ea, unsigned long pa,
- unsigned long flags)
+static inline int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
{
if (radix_enabled()) {
#if defined(CONFIG_PPC_RADIX_MMU) && defined(DEBUG_VM)
unsigned long page_size = 1 << mmu_psize_defs[mmu_io_psize].shift;
WARN((page_size != PAGE_SIZE), "I/O page size != PAGE_SIZE");
#endif
- return radix__map_kernel_page(ea, pa, __pgprot(flags), PAGE_SIZE);
+ return radix__map_kernel_page(ea, pa, prot, PAGE_SIZE);
}
- return hash__map_kernel_page(ea, pa, flags);
+ return hash__map_kernel_page(ea, pa, prot);
}
static inline int __meminit vmemmap_create_mapping(unsigned long start,
#define pmd_soft_dirty(pmd) pte_soft_dirty(pmd_pte(pmd))
#define pmd_mksoft_dirty(pmd) pte_pmd(pte_mksoft_dirty(pmd_pte(pmd)))
#define pmd_clear_soft_dirty(pmd) pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd)))
+
+#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION
+#define pmd_swp_mksoft_dirty(pmd) pte_pmd(pte_swp_mksoft_dirty(pmd_pte(pmd)))
+#define pmd_swp_soft_dirty(pmd) pte_swp_soft_dirty(pmd_pte(pmd))
+#define pmd_swp_clear_soft_dirty(pmd) pte_pmd(pte_swp_clear_soft_dirty(pmd_pte(pmd)))
+#endif
#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */
#ifdef CONFIG_NUMA_BALANCING
return hash__pmd_hugepage_update(mm, addr, pmdp, clr, set);
}
+/*
+ * returns true for pmd migration entries, THP, devmap, hugetlb
+ * But compile time dependent on THP config
+ */
static inline int pmd_large(pmd_t pmd)
{
return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
pmd_hugepage_update(mm, addr, pmdp, 0, _PAGE_PRIVILEGED);
}
+/*
+ * Only returns true for a THP. False for pmd migration entry.
+ * We also need to return true when we come across a pte that
+ * in between a thp split. While splitting THP, we mark the pmd
+ * invalid (pmdp_invalidate()) before we set it with pte page
+ * address. A pmd_trans_huge() check against a pmd entry during that time
+ * should return true.
+ * We should not call this on a hugetlb entry. We should check for HugeTLB
+ * entry using vma->vm_flags
+ * The page table walk rule is explained in Documentation/vm/transhuge.rst
+ */
static inline int pmd_trans_huge(pmd_t pmd)
{
+ if (!pmd_present(pmd))
+ return false;
+
if (radix_enabled())
return radix__pmd_trans_huge(pmd);
return hash__pmd_trans_huge(pmd);
extern int threads_per_core;
extern int threads_per_subcore;
extern int threads_shift;
+extern bool has_big_cores;
extern cpumask_t threads_core_mask;
#else
#define threads_per_core 1
#define threads_per_subcore 1
#define threads_shift 0
+#define has_big_cores 0
#define threads_core_mask (*get_cpu_mask(0))
#endif
struct cpu_accounting_data *acct0 = get_accounting(prev);
acct->starttime = acct0->starttime;
- acct->startspurr = acct0->startspurr;
}
#endif
void (*func)(struct drmem_lmb *, const __be32 **));
#endif
+static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
+{
+ lmb->aa_index = 0xffffffff;
+}
+
#endif /* _ASM_POWERPC_LMB_H */
#define EEH_VALID_PE_ZERO 0x10 /* PE#0 is valid */
#define EEH_ENABLE_IO_FOR_LOG 0x20 /* Enable IO for log */
#define EEH_EARLY_DUMP_LOG 0x40 /* Dump log immediately */
-#define EEH_POSTPONED_PROBE 0x80 /* Powernv may postpone device probe */
/*
* Delay for PE reset, all in ms
atomic_t pass_dev_cnt; /* Count of passed through devs */
struct eeh_pe *parent; /* Parent PE */
void *data; /* PE auxillary data */
- struct list_head child_list; /* Link PE to the child list */
- struct list_head edevs; /* Link list of EEH devices */
- struct list_head child; /* Child PEs */
+ struct list_head child_list; /* List of PEs below this PE */
+ struct list_head child; /* Memb. child_list/eeh_phb_pe */
+ struct list_head edevs; /* List of eeh_dev in this PE */
};
#define eeh_pe_for_each_dev(pe, edev, tmp) \
- list_for_each_entry_safe(edev, tmp, &pe->edevs, list)
+ list_for_each_entry_safe(edev, tmp, &pe->edevs, entry)
#define eeh_for_each_pe(root, pe) \
for (pe = root; pe; pe = eeh_pe_next(pe, root))
int aer_cap; /* Saved AER capability */
int af_cap; /* Saved AF capability */
struct eeh_pe *pe; /* Associated PE */
- struct list_head list; /* Form link list in the PE */
- struct list_head rmv_list; /* Record the removed edevs */
+ struct list_head entry; /* Membership in eeh_pe.edevs */
+ struct list_head rmv_entry; /* Membership in rmv_list */
struct pci_dn *pdn; /* Associated PCI device node */
struct pci_dev *pdev; /* Associated PCI device */
bool in_error; /* Error flag for edev */
struct pci_dev *physfn; /* Associated SRIOV PF */
- struct pci_bus *bus; /* PCI bus for partial hotplug */
};
static inline struct pci_dn *eeh_dev_to_pdn(struct eeh_dev *edev)
void* (*probe)(struct pci_dn *pdn, void *data);
int (*set_option)(struct eeh_pe *pe, int option);
int (*get_pe_addr)(struct eeh_pe *pe);
- int (*get_state)(struct eeh_pe *pe, int *state);
+ int (*get_state)(struct eeh_pe *pe, int *delay);
int (*reset)(struct eeh_pe *pe, int option);
- int (*wait_state)(struct eeh_pe *pe, int max_wait);
int (*get_log)(struct eeh_pe *pe, int severity, char *drv_log, unsigned long len);
int (*configure_bridge)(struct eeh_pe *pe);
int (*err_inject)(struct eeh_pe *pe, int type, int func,
static inline bool eeh_enabled(void)
{
- if (eeh_has_flag(EEH_FORCE_DISABLED) ||
- !eeh_has_flag(EEH_ENABLED))
- return false;
-
- return true;
+ return eeh_has_flag(EEH_ENABLED) && !eeh_has_flag(EEH_FORCE_DISABLED);
}
static inline void eeh_serialize_lock(unsigned long *flags)
typedef void *(*eeh_pe_traverse_func)(struct eeh_pe *pe, void *flag);
void eeh_set_pe_aux_size(int size);
int eeh_phb_pe_create(struct pci_controller *phb);
+int eeh_wait_state(struct eeh_pe *pe, int max_wait);
struct eeh_pe *eeh_phb_pe_get(struct pci_controller *phb);
struct eeh_pe *eeh_pe_next(struct eeh_pe *pe, struct eeh_pe *root);
struct eeh_pe *eeh_pe_get(struct pci_controller *phb,
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef _ASM_ERROR_INJECTION_H
+#define _ASM_ERROR_INJECTION_H
+
+#include <linux/compiler.h>
+#include <linux/linkage.h>
+#include <asm/ptrace.h>
+#include <asm-generic/error-injection.h>
+
+void override_function_with_return(struct pt_regs *regs);
+
+#endif /* _ASM_ERROR_INJECTION_H */
*/
#define MAX_MCE_DEPTH 4
-/*
- * EX_LR is only used in EXSLB and where it does not overlap with EX_DAR
- * EX_CCR similarly with DSISR, but being 4 byte registers there is a hole
- * in the save area so it's not necessary to overlap them. Could be used
- * for future savings though if another 4 byte register was to be saved.
- */
-#define EX_LR EX_DAR
-
/*
* EX_R3 is only used by the bad_stack handler. bad_stack reloads and
* saves DAR from SPRN_DAR, and EX_DAR is not used. So EX_R3 can overlap
* PPR save/restore macros used in exceptions_64s.S
* Used for P7 or later processors
*/
-#define SAVE_PPR(area, ra, rb) \
+#define SAVE_PPR(area, ra) \
BEGIN_FTR_SECTION_NESTED(940) \
- ld ra,PACACURRENT(r13); \
- ld rb,area+EX_PPR(r13); /* Read PPR from paca */ \
- std rb,TASKTHREADPPR(ra); \
+ ld ra,area+EX_PPR(r13); /* Read PPR from paca */ \
+ std ra,_PPR(r1); \
END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940)
#define RESTORE_PPR_PACA(area, ra) \
3: EXCEPTION_PROLOG_COMMON_1(); \
beq 4f; /* if from kernel mode */ \
ACCOUNT_CPU_USER_ENTRY(r13, r9, r10); \
- SAVE_PPR(area, r9, r10); \
+ SAVE_PPR(area, r9); \
4: EXCEPTION_PROLOG_COMMON_2(area) \
EXCEPTION_PROLOG_COMMON_3(n) \
ACCOUNT_STOLEN_TIME
#define FW_FEATURE_PRRN ASM_CONST(0x0000000200000000)
#define FW_FEATURE_DRMEM_V2 ASM_CONST(0x0000000400000000)
#define FW_FEATURE_DRC_INFO ASM_CONST(0x0000000800000000)
+#define FW_FEATURE_BLOCK_REMOVE ASM_CONST(0x0000001000000000)
+#define FW_FEATURE_PAPR_SCM ASM_CONST(0x0000002000000000)
#ifndef __ASSEMBLY__
FW_FEATURE_SET_MODE | FW_FEATURE_BEST_ENERGY |
FW_FEATURE_TYPE1_AFFINITY | FW_FEATURE_PRRN |
FW_FEATURE_HPT_RESIZE | FW_FEATURE_DRMEM_V2 |
- FW_FEATURE_DRC_INFO,
+ FW_FEATURE_DRC_INFO | FW_FEATURE_BLOCK_REMOVE |
+ FW_FEATURE_PAPR_SCM,
FW_FEATURE_PSERIES_ALWAYS = 0,
FW_FEATURE_POWERNV_POSSIBLE = FW_FEATURE_OPAL,
FW_FEATURE_POWERNV_ALWAYS = 0,
static inline void __set_fixmap(enum fixed_addresses idx,
phys_addr_t phys, pgprot_t flags)
{
- map_kernel_page(fix_to_virt(idx), phys, pgprot_val(flags));
+ map_kernel_page(fix_to_virt(idx), phys, flags);
}
#endif /* !__ASSEMBLY__ */
#define H_COP 0x304
#define H_GET_MPP_X 0x314
#define H_SET_MODE 0x31C
+#define H_BLOCK_REMOVE 0x328
#define H_CLEAR_HPT 0x358
#define H_REQUEST_VMC 0x360
#define H_RESIZE_HPT_PREPARE 0x36C
#define H_INT_ESB 0x3C8
#define H_INT_SYNC 0x3CC
#define H_INT_RESET 0x3D0
-#define MAX_HCALL_OPCODE H_INT_RESET
+#define H_SCM_READ_METADATA 0x3E4
+#define H_SCM_WRITE_METADATA 0x3E8
+#define H_SCM_BIND_MEM 0x3EC
+#define H_SCM_UNBIND_MEM 0x3F0
+#define H_SCM_QUERY_BLOCK_MEM_BINDING 0x3F4
+#define H_SCM_QUERY_LOGICAL_MEM_BINDING 0x3F8
+#define H_SCM_MEM_QUERY 0x3FC
+#define H_SCM_BLOCK_CLEAR 0x400
+#define MAX_HCALL_OPCODE H_SCM_BLOCK_CLEAR
/* H_VIOCTL functions */
#define H_GET_VIOA_DUMP_SIZE 0x01
#ifdef __KERNEL__
#define ARCH_HAS_IOREMAP_WC
+#ifdef CONFIG_PPC32
+#define ARCH_HAS_IOREMAP_WT
+#endif
/*
* This program is free software; you can redistribute it and/or
#define IO_SET_SYNC_FLAG()
#endif
-/* gcc 4.0 and older doesn't have 'Z' constraint */
-#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ == 0)
-#define DEF_MMIO_IN_X(name, size, insn) \
-static inline u##size name(const volatile u##size __iomem *addr) \
-{ \
- u##size ret; \
- __asm__ __volatile__("sync;"#insn" %0,0,%1;twi 0,%0,0;isync" \
- : "=r" (ret) : "r" (addr), "m" (*addr) : "memory"); \
- return ret; \
-}
-
-#define DEF_MMIO_OUT_X(name, size, insn) \
-static inline void name(volatile u##size __iomem *addr, u##size val) \
-{ \
- __asm__ __volatile__("sync;"#insn" %1,0,%2" \
- : "=m" (*addr) : "r" (val), "r" (addr) : "memory"); \
- IO_SET_SYNC_FLAG(); \
-}
-#else /* newer gcc */
#define DEF_MMIO_IN_X(name, size, insn) \
static inline u##size name(const volatile u##size __iomem *addr) \
{ \
: "=Z" (*addr) : "r" (val) : "memory"); \
IO_SET_SYNC_FLAG(); \
}
-#endif
#define DEF_MMIO_IN_D(name, size, insn) \
static inline u##size name(const volatile u##size __iomem *addr) \
*
* * ioremap_wc enables write combining
*
+ * * ioremap_wt enables write through
+ *
+ * * ioremap_coherent maps coherent cached memory
+ *
* * iounmap undoes such a mapping and can be hooked
*
* * __ioremap_at (and the pending __iounmap_at) are low level functions to
extern void __iomem *ioremap_prot(phys_addr_t address, unsigned long size,
unsigned long flags);
extern void __iomem *ioremap_wc(phys_addr_t address, unsigned long size);
+void __iomem *ioremap_wt(phys_addr_t address, unsigned long size);
+void __iomem *ioremap_coherent(phys_addr_t address, unsigned long size);
#define ioremap_nocache(addr, size) ioremap((addr), (size))
#define ioremap_uc(addr, size) ioremap((addr), (size))
#define ioremap_cache(addr, size) \
extern void __iomem *__ioremap(phys_addr_t, unsigned long size,
unsigned long flags);
extern void __iomem *__ioremap_caller(phys_addr_t, unsigned long size,
- unsigned long flags, void *caller);
+ pgprot_t prot, void *caller);
extern void __iounmap(volatile void __iomem *addr);
extern void __iomem * __ioremap_at(phys_addr_t pa, void *ea,
- unsigned long size, unsigned long flags);
+ unsigned long size, pgprot_t prot);
extern void __iounmap_at(void *ea, unsigned long size);
/*
#define BREAK_INSTR_SIZE 4
#define BUFMAX ((NUMREGBYTES * 2) + 512)
#define OUTBUFMAX ((NUMREGBYTES * 2) + 512)
+
+#define BREAK_INSTR 0x7d821008 /* twge r2, r2 */
+
static inline void arch_kgdb_breakpoint(void)
{
- asm(".long 0x7d821008"); /* twge r2, r2 */
+ asm(stringify_in_c(.long BREAK_INSTR));
}
#define CACHE_FLUSH_IS_SAFE 1
#define DBG_MAX_REG_NUM 70
char *name;
#ifdef CONFIG_PPC64
void __iomem * (*ioremap)(phys_addr_t addr, unsigned long size,
- unsigned long flags, void *caller);
+ pgprot_t prot, void *caller);
void (*iounmap)(volatile void __iomem *token);
#ifdef CONFIG_PM
/* Early exception handlers called in realmode */
int (*hmi_exception_early)(struct pt_regs *regs);
+ long (*machine_check_early)(struct pt_regs *regs);
/* Called during machine check exception to retrive fixup address. */
bool (*mce_check_early_recovery)(struct pt_regs *regs);
extern void machine_check_queue_event(void);
extern void machine_check_print_event_info(struct machine_check_event *evt,
bool user_mode);
+#ifdef CONFIG_PPC_BOOK3S_64
+void flush_and_reload_slb(void);
+#endif /* CONFIG_PPC_BOOK3S_64 */
#endif /* __ASM_PPC64_MCE_H__ */
*/
#define MMU_PAGE_COUNT 16
+/*
+ * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
+ * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
+ * page_to_nid does a page->section->node lookup
+ * Hence only increase for VMEMMAP. Further depending on SPARSEMEM_EXTREME reduce
+ * memory requirements with large number of sections.
+ * 51 bits is the max physical real address on POWER9
+ */
+#if defined(CONFIG_SPARSEMEM_VMEMMAP) && defined(CONFIG_SPARSEMEM_EXTREME) && \
+ defined (CONFIG_PPC_64K_PAGES)
+#define MAX_PHYSMEM_BITS 51
+#else
+#define MAX_PHYSMEM_BITS 46
+#endif
+
#ifdef CONFIG_PPC_BOOK3S_64
#include <asm/book3s/64/mmu.h>
#else /* CONFIG_PPC_BOOK3S_64 */
{
int context_id;
- context_id = get_ea_context(&mm->context, ea);
+ context_id = get_user_context(&mm->context, ea);
if (!context_id)
return true;
return false;
#define MPIC_REGSET_TSI108 MPIC_REGSET(1) /* Tsi108/109 PIC */
/* Get the version of primary MPIC */
+#ifdef CONFIG_MPIC
extern u32 fsl_mpic_primary_get_version(void);
+#else
+static inline u32 fsl_mpic_primary_get_version(void)
+{
+ return 0;
+}
+#endif
/* Allocate the controller structure and setup the linux irq descs
* for the range if interrupts passed in. No HW initialization is
#include <asm/nohash/32/pte-8xx.h>
#endif
-/* And here we include common definitions */
-#include <asm/pte-common.h>
+/*
+ * Location of the PFN in the PTE. Most 32-bit platforms use the same
+ * as _PAGE_SHIFT here (ie, naturally aligned).
+ * Platform who don't just pre-define the value so we don't override it here.
+ */
+#ifndef PTE_RPN_SHIFT
+#define PTE_RPN_SHIFT (PAGE_SHIFT)
+#endif
+
+/*
+ * The mask covered by the RPN must be a ULL on 32-bit platforms with
+ * 64-bit PTEs.
+ */
+#if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
+#define PTE_RPN_MASK (~((1ULL << PTE_RPN_SHIFT) - 1))
+#else
+#define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1))
+#endif
+
+/*
+ * _PAGE_CHG_MASK masks of bits that are to be preserved across
+ * pgprot changes.
+ */
+#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPECIAL)
#ifndef __ASSEMBLY__
#define pte_clear(mm, addr, ptep) \
do { pte_update(ptep, ~0, 0); } while (0)
+#ifndef pte_mkwrite
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_RW);
+}
+#endif
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_ACCESSED);
+}
+
+#ifndef pte_wrprotect
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_RW);
+}
+#endif
+
+static inline pte_t pte_mkexec(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_EXEC);
+}
+
#define pmd_none(pmd) (!pmd_val(pmd))
#define pmd_bad(pmd) (pmd_val(pmd) & _PMD_BAD)
#define pmd_present(pmd) (pmd_val(pmd) & _PMD_PRESENT_MASK)
static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr,
pte_t *ptep)
{
- pte_update(ptep, (_PAGE_RW | _PAGE_HWWRITE), _PAGE_RO);
+ unsigned long clr = ~pte_val(pte_wrprotect(__pte(~0)));
+ unsigned long set = pte_val(pte_wrprotect(__pte(0)));
+
+ pte_update(ptep, clr, set);
}
static inline void __ptep_set_access_flags(struct vm_area_struct *vma,
unsigned long address,
int psize)
{
- unsigned long set = pte_val(entry) &
- (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
- unsigned long clr = ~pte_val(entry) & (_PAGE_RO | _PAGE_NA);
+ pte_t pte_set = pte_mkyoung(pte_mkdirty(pte_mkwrite(pte_mkexec(__pte(0)))));
+ pte_t pte_clr = pte_mkyoung(pte_mkdirty(pte_mkwrite(pte_mkexec(__pte(~0)))));
+ unsigned long set = pte_val(entry) & pte_val(pte_set);
+ unsigned long clr = ~pte_val(entry) & ~pte_val(pte_clr);
pte_update(ptep, clr, set);
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val(pte) >> 3 })
#define __swp_entry_to_pte(x) ((pte_t) { (x).val << 3 })
-int map_kernel_page(unsigned long va, phys_addr_t pa, int flags);
+int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot);
#endif /* !__ASSEMBLY__ */
#define _PAGE_EXEC 0x200 /* hardware: EX permission */
#define _PAGE_ACCESSED 0x400 /* software: R: page referenced */
+/* No page size encoding in the linux PTE */
+#define _PAGE_PSIZE 0
+
+/* cache related flags non existing on 40x */
+#define _PAGE_COHERENT 0
+
+#define _PAGE_KERNEL_RO 0
+#define _PAGE_KERNEL_ROX _PAGE_EXEC
+#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE)
+#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_HWWRITE | _PAGE_EXEC)
+
#define _PMD_PRESENT 0x400 /* PMD points to page of PTEs */
+#define _PMD_PRESENT_MASK _PMD_PRESENT
#define _PMD_BAD 0x802
#define _PMD_SIZE_4M 0x0c0
#define _PMD_SIZE_16M 0x0e0
+#define _PMD_USER 0
+
+#define _PTE_NONE_MASK 0
/* Until my rework is finished, 40x still needs atomic PTE updates */
#define PTE_ATOMIC_UPDATES 1
+#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED)
+#define _PAGE_BASE (_PAGE_BASE_NC)
+
+/* Permission masks used to generate the __P and __S table */
+#define PAGE_NONE __pgprot(_PAGE_BASE)
+#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC)
+#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+
+#ifndef __ASSEMBLY__
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~(_PAGE_RW | _PAGE_HWWRITE));
+}
+
+#define pte_wrprotect pte_wrprotect
+
+static inline pte_t pte_mkclean(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~(_PAGE_DIRTY | _PAGE_HWWRITE));
+}
+
+#define pte_mkclean pte_mkclean
+#endif
+
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_NOHASH_32_PTE_40x_H */
#define _PAGE_NO_CACHE 0x00000400 /* H: I bit */
#define _PAGE_WRITETHRU 0x00000800 /* H: W bit */
+/* No page size encoding in the linux PTE */
+#define _PAGE_PSIZE 0
+
+#define _PAGE_KERNEL_RO 0
+#define _PAGE_KERNEL_ROX _PAGE_EXEC
+#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW)
+#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC)
+
/* TODO: Add large page lowmem mapping support */
#define _PMD_PRESENT 0
#define _PMD_PRESENT_MASK (PAGE_MASK)
#define _PMD_BAD (~PAGE_MASK)
+#define _PMD_USER 0
/* ERPN in a PTE never gets cleared, ignore it */
#define _PTE_NONE_MASK 0xffffffff00000000ULL
+/*
+ * We define 2 sets of base prot bits, one for basic pages (ie,
+ * cacheable kernel and user pages) and one for non cacheable
+ * pages. We always set _PAGE_COHERENT when SMP is enabled or
+ * the processor might need it for DMA coherency.
+ */
+#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED)
+#if defined(CONFIG_SMP)
+#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT)
+#else
+#define _PAGE_BASE (_PAGE_BASE_NC)
+#endif
+
+/* Permission masks used to generate the __P and __S table */
+#define PAGE_NONE __pgprot(_PAGE_BASE)
+#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC)
+#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_NOHASH_32_PTE_44x_H */
*/
/* Definitions for 8xx embedded chips. */
-#define _PAGE_PRESENT 0x0001 /* Page is valid */
-#define _PAGE_NO_CACHE 0x0002 /* I: cache inhibit */
-#define _PAGE_PRIVILEGED 0x0004 /* No ASID (context) compare */
-#define _PAGE_HUGE 0x0008 /* SPS: Small Page Size (1 if 16k, 512k or 8M)*/
+#define _PAGE_PRESENT 0x0001 /* V: Page is valid */
+#define _PAGE_NO_CACHE 0x0002 /* CI: cache inhibit */
+#define _PAGE_SH 0x0004 /* SH: No ASID (context) compare */
+#define _PAGE_SPS 0x0008 /* SPS: Small Page Size (1 if 16k, 512k or 8M)*/
#define _PAGE_DIRTY 0x0100 /* C: page changed */
/* These 4 software bits must be masked out when the L2 entry is loaded
#define _PAGE_NA 0x0200 /* Supervisor NA, User no access */
#define _PAGE_RO 0x0600 /* Supervisor RO, User no access */
+/* cache related flags non existing on 8xx */
+#define _PAGE_COHERENT 0
+#define _PAGE_WRITETHRU 0
+
+#define _PAGE_KERNEL_RO (_PAGE_SH | _PAGE_RO)
+#define _PAGE_KERNEL_ROX (_PAGE_SH | _PAGE_RO | _PAGE_EXEC)
+#define _PAGE_KERNEL_RW (_PAGE_SH | _PAGE_DIRTY)
+#define _PAGE_KERNEL_RWX (_PAGE_SH | _PAGE_DIRTY | _PAGE_EXEC)
+
#define _PMD_PRESENT 0x0001
+#define _PMD_PRESENT_MASK _PMD_PRESENT
#define _PMD_BAD 0x0fd0
#define _PMD_PAGE_MASK 0x000c
#define _PMD_PAGE_8M 0x000c
#define _PMD_PAGE_512K 0x0004
#define _PMD_USER 0x0020 /* APG 1 */
+#define _PTE_NONE_MASK 0
+
/* Until my rework is finished, 8xx still needs atomic PTE updates */
#define PTE_ATOMIC_UPDATES 1
#ifdef CONFIG_PPC_16K_PAGES
-#define _PAGE_PSIZE _PAGE_HUGE
+#define _PAGE_PSIZE _PAGE_SPS
+#else
+#define _PAGE_PSIZE 0
+#endif
+
+#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
+#define _PAGE_BASE (_PAGE_BASE_NC)
+
+/* Permission masks used to generate the __P and __S table */
+#define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_NA)
+#define PAGE_SHARED __pgprot(_PAGE_BASE)
+#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_EXEC)
+#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_RO)
+#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_RO | _PAGE_EXEC)
+#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_RO)
+#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_RO | _PAGE_EXEC)
+
+#ifndef __ASSEMBLY__
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_RO);
+}
+
+#define pte_wrprotect pte_wrprotect
+
+static inline int pte_write(pte_t pte)
+{
+ return !(pte_val(pte) & _PAGE_RO);
+}
+
+#define pte_write pte_write
+
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_RO);
+}
+
+#define pte_mkwrite pte_mkwrite
+
+static inline bool pte_user(pte_t pte)
+{
+ return !(pte_val(pte) & _PAGE_SH);
+}
+
+#define pte_user pte_user
+
+static inline pte_t pte_mkprivileged(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_SH);
+}
+
+#define pte_mkprivileged pte_mkprivileged
+
+static inline pte_t pte_mkuser(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_SH);
+}
+
+#define pte_mkuser pte_mkuser
+
+static inline pte_t pte_mkhuge(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_SPS);
+}
+
+#define pte_mkhuge pte_mkhuge
#endif
#endif /* __KERNEL__ */
#define _PAGE_WRITETHRU 0x00400 /* H: W bit */
#define _PAGE_SPECIAL 0x00800 /* S: Special page */
+#define _PAGE_KERNEL_RO 0
+#define _PAGE_KERNEL_ROX _PAGE_EXEC
+#define _PAGE_KERNEL_RW (_PAGE_DIRTY | _PAGE_RW)
+#define _PAGE_KERNEL_RWX (_PAGE_DIRTY | _PAGE_RW | _PAGE_EXEC)
+
+/* No page size encoding in the linux PTE */
+#define _PAGE_PSIZE 0
+
#define _PMD_PRESENT 0
#define _PMD_PRESENT_MASK (PAGE_MASK)
#define _PMD_BAD (~PAGE_MASK)
+#define _PMD_USER 0
+
+#define _PTE_NONE_MASK 0
#define PTE_WIMGE_SHIFT (6)
+/*
+ * We define 2 sets of base prot bits, one for basic pages (ie,
+ * cacheable kernel and user pages) and one for non cacheable
+ * pages. We always set _PAGE_COHERENT when SMP is enabled or
+ * the processor might need it for DMA coherency.
+ */
+#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED)
+#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
+#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT)
+#else
+#define _PAGE_BASE (_PAGE_BASE_NC)
+#endif
+
+/* Permission masks used to generate the __P and __S table */
+#define PAGE_NONE __pgprot(_PAGE_BASE)
+#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC)
+#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_NOHASH_32_PTE_FSL_BOOKE_H */
* Include the PTE bits definitions
*/
#include <asm/nohash/pte-book3e.h>
-#include <asm/pte-common.h>
+
+#define _PAGE_SAO 0
+
+#define PTE_RPN_MASK (~((1UL << PTE_RPN_SHIFT) - 1))
+
+/*
+ * _PAGE_CHG_MASK masks of bits that are to be preserved across
+ * pgprot changes.
+ */
+#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_SPECIAL)
+
+#define H_PAGE_4K_PFN 0
#ifndef __ASSEMBLY__
/* pte_clear moved to later in this file */
+static inline pte_t pte_mkwrite(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_RW);
+}
+
+static inline pte_t pte_mkdirty(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_DIRTY);
+}
+
+static inline pte_t pte_mkyoung(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_ACCESSED);
+}
+
+static inline pte_t pte_wrprotect(pte_t pte)
+{
+ return __pte(pte_val(pte) & ~_PAGE_RW);
+}
+
+static inline pte_t pte_mkexec(pte_t pte)
+{
+ return __pte(pte_val(pte) | _PAGE_EXEC);
+}
+
#define PMD_BAD_BITS (PTE_TABLE_SIZE-1)
#define PUD_BAD_BITS (PMD_TABLE_SIZE-1)
#define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) })
#define __swp_entry_to_pte(x) __pte((x).val)
-extern int map_kernel_page(unsigned long ea, unsigned long pa,
- unsigned long flags);
+int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot);
extern int __meminit vmemmap_create_mapping(unsigned long start,
unsigned long page_size,
unsigned long phys);
#include <asm/nohash/32/pgtable.h>
#endif
+/* Permission masks used for kernel mappings */
+#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
+#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | _PAGE_NO_CACHE)
+#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
+ _PAGE_NO_CACHE | _PAGE_GUARDED)
+#define PAGE_KERNEL_X __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
+#define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
+#define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
+
+/*
+ * Protection used for kernel text. We want the debuggers to be able to
+ * set breakpoints anywhere, so don't write protect the kernel text
+ * on platforms where such control is possible.
+ */
+#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\
+ defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
+#define PAGE_KERNEL_TEXT PAGE_KERNEL_X
+#else
+#define PAGE_KERNEL_TEXT PAGE_KERNEL_ROX
+#endif
+
+/* Make modules code happy. We don't set RO yet */
+#define PAGE_KERNEL_EXEC PAGE_KERNEL_X
+
+/* Advertise special mapping type for AGP */
+#define PAGE_AGP (PAGE_KERNEL_NC)
+#define HAVE_PAGE_AGP
+
#ifndef __ASSEMBLY__
/* Generic accessors to PTE bits */
+#ifndef pte_write
static inline int pte_write(pte_t pte)
{
- return (pte_val(pte) & (_PAGE_RW | _PAGE_RO)) != _PAGE_RO;
+ return pte_val(pte) & _PAGE_RW;
}
+#endif
static inline int pte_read(pte_t pte) { return 1; }
static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; }
static inline int pte_none(pte_t pte) { return (pte_val(pte) & ~_PTE_NONE_MASK) == 0; }
-static inline pgprot_t pte_pgprot(pte_t pte) { return __pgprot(pte_val(pte) & PAGE_PROT_BITS); }
+static inline bool pte_hashpte(pte_t pte) { return false; }
+static inline bool pte_ci(pte_t pte) { return pte_val(pte) & _PAGE_NO_CACHE; }
+static inline bool pte_exec(pte_t pte) { return pte_val(pte) & _PAGE_EXEC; }
#ifdef CONFIG_NUMA_BALANCING
/*
*/
static inline int pte_protnone(pte_t pte)
{
- return (pte_val(pte) &
- (_PAGE_PRESENT | _PAGE_USER)) == _PAGE_PRESENT;
+ return pte_present(pte) && !pte_user(pte);
}
static inline int pmd_protnone(pmd_t pmd)
return pte_val(pte) & _PAGE_PRESENT;
}
+static inline bool pte_hw_valid(pte_t pte)
+{
+ return pte_val(pte) & _PAGE_PRESENT;
+}
+
+/*
+ * Don't just check for any non zero bits in __PAGE_USER, since for book3e
+ * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in
+ * _PAGE_USER. Need to explicitly match _PAGE_BAP_UR bit in that case too.
+ */
+#ifndef pte_user
+static inline bool pte_user(pte_t pte)
+{
+ return (pte_val(pte) & _PAGE_USER) == _PAGE_USER;
+}
+#endif
+
/*
* We only find page table entry in the last level
* Hence no need for other accessors
return pte_val(pte) >> PTE_RPN_SHIFT; }
/* Generic modifiers for PTE bits */
-static inline pte_t pte_wrprotect(pte_t pte)
+static inline pte_t pte_exprotect(pte_t pte)
{
- pte_basic_t ptev;
-
- ptev = pte_val(pte) & ~(_PAGE_RW | _PAGE_HWWRITE);
- ptev |= _PAGE_RO;
- return __pte(ptev);
+ return __pte(pte_val(pte) & ~_PAGE_EXEC);
}
+#ifndef pte_mkclean
static inline pte_t pte_mkclean(pte_t pte)
{
- return __pte(pte_val(pte) & ~(_PAGE_DIRTY | _PAGE_HWWRITE));
+ return __pte(pte_val(pte) & ~_PAGE_DIRTY);
}
+#endif
static inline pte_t pte_mkold(pte_t pte)
{
return __pte(pte_val(pte) & ~_PAGE_ACCESSED);
}
-static inline pte_t pte_mkwrite(pte_t pte)
+static inline pte_t pte_mkpte(pte_t pte)
{
- pte_basic_t ptev;
-
- ptev = pte_val(pte) & ~_PAGE_RO;
- ptev |= _PAGE_RW;
- return __pte(ptev);
+ return pte;
}
-static inline pte_t pte_mkdirty(pte_t pte)
+static inline pte_t pte_mkspecial(pte_t pte)
{
- return __pte(pte_val(pte) | _PAGE_DIRTY);
+ return __pte(pte_val(pte) | _PAGE_SPECIAL);
}
-static inline pte_t pte_mkyoung(pte_t pte)
+#ifndef pte_mkhuge
+static inline pte_t pte_mkhuge(pte_t pte)
{
- return __pte(pte_val(pte) | _PAGE_ACCESSED);
+ return __pte(pte_val(pte));
}
+#endif
-static inline pte_t pte_mkspecial(pte_t pte)
+#ifndef pte_mkprivileged
+static inline pte_t pte_mkprivileged(pte_t pte)
{
- return __pte(pte_val(pte) | _PAGE_SPECIAL);
+ return __pte(pte_val(pte) & ~_PAGE_USER);
}
+#endif
-static inline pte_t pte_mkhuge(pte_t pte)
+#ifndef pte_mkuser
+static inline pte_t pte_mkuser(pte_t pte)
{
- return __pte(pte_val(pte) | _PAGE_HUGE);
+ return __pte(pte_val(pte) | _PAGE_USER);
}
+#endif
static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{
#if _PAGE_WRITETHRU != 0
#define pgprot_cached_wthru(prot) (__pgprot((pgprot_val(prot) & ~_PAGE_CACHE_CTL) | \
_PAGE_COHERENT | _PAGE_WRITETHRU))
+#else
+#define pgprot_cached_wthru(prot) pgprot_noncached(prot)
#endif
#define pgprot_cached_noncoherent(prot) \
#define _PMD_PRESENT 0
#define _PMD_PRESENT_MASK (PAGE_MASK)
#define _PMD_BAD (~PAGE_MASK)
+#define _PMD_USER 0
+#else
+#define _PTE_NONE_MASK 0
+#endif
+
+/*
+ * We define 2 sets of base prot bits, one for basic pages (ie,
+ * cacheable kernel and user pages) and one for non cacheable
+ * pages. We always set _PAGE_COHERENT when SMP is enabled or
+ * the processor might need it for DMA coherency.
+ */
+#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
+#if defined(CONFIG_SMP)
+#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT)
+#else
+#define _PAGE_BASE (_PAGE_BASE_NC)
#endif
+/* Permission masks used to generate the __P and __S table */
+#define PAGE_NONE __pgprot(_PAGE_BASE)
+#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
+#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | _PAGE_EXEC)
+#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER)
+#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_EXEC)
+
+#ifndef __ASSEMBLY__
+static inline pte_t pte_mkprivileged(pte_t pte)
+{
+ return __pte((pte_val(pte) & ~_PAGE_USER) | _PAGE_PRIVILEGED);
+}
+
+#define pte_mkprivileged pte_mkprivileged
+
+static inline pte_t pte_mkuser(pte_t pte)
+{
+ return __pte((pte_val(pte) & ~_PAGE_PRIVILEGED) | _PAGE_USER);
+}
+
+#define pte_mkuser pte_mkuser
+#endif /* __ASSEMBLY__ */
+
#endif /* __KERNEL__ */
#endif /* _ASM_POWERPC_NOHASH_PTE_BOOK3E_H */
enum {
OPAL_REBOOT_NORMAL = 0,
OPAL_REBOOT_PLATFORM_ERROR = 1,
+ OPAL_REBOOT_FULL_IPL = 2,
};
/* Argument to OPAL_PCI_TCE_KILL */
* on the linear mapping */
/* SLB related definitions */
u16 vmalloc_sllp;
- u16 slb_cache_ptr;
+ u8 slb_cache_ptr;
+ u8 stab_rr; /* stab/slb round-robin counter */
+#ifdef CONFIG_DEBUG_VM
+ u8 in_kernel_slb_handler;
+#endif
+ u32 slb_used_bitmap; /* Bitmaps for first 32 SLB entries. */
+ u32 slb_kern_bitmap;
u32 slb_cache[SLB_CACHE_ENTRIES];
#endif /* CONFIG_PPC_BOOK3S_64 */
*/
struct task_struct *__current; /* Pointer to current */
u64 kstack; /* Saved Kernel stack addr */
- u64 stab_rr; /* stab/slb round-robin counter */
u64 saved_r1; /* r1 save for RTAS calls or PM or EE=0 */
u64 saved_msr; /* MSR saved here by enter_rtas */
u16 trap_save; /* Used when bad stack is encountered */
#ifdef CONFIG_PPC_PSERIES
u8 *mce_data_buf; /* buffer to hold per cpu rtas errlog */
#endif /* CONFIG_PPC_PSERIES */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* Capture SLB related old contents in MCE handler. */
+ struct slb_entry *mce_faulty_slbs;
+ u16 slb_save_cache_ptr;
+#endif /* CONFIG_PPC_BOOK3S_64 */
+#ifdef CONFIG_STACKPROTECTOR
+ unsigned long canary;
+#endif
} ____cacheline_aligned;
extern void copy_mm_to_paca(struct mm_struct *mm);
#include <asm/nohash/pgtable.h>
#endif /* !CONFIG_PPC_BOOK3S */
+/* Note due to the way vm flags are laid out, the bits are XWR */
+#define __P000 PAGE_NONE
+#define __P001 PAGE_READONLY
+#define __P010 PAGE_COPY
+#define __P011 PAGE_COPY
+#define __P100 PAGE_READONLY_X
+#define __P101 PAGE_READONLY_X
+#define __P110 PAGE_COPY_X
+#define __P111 PAGE_COPY_X
+
+#define __S000 PAGE_NONE
+#define __S001 PAGE_READONLY
+#define __S010 PAGE_SHARED
+#define __S011 PAGE_SHARED
+#define __S100 PAGE_READONLY_X
+#define __S101 PAGE_READONLY_X
+#define __S110 PAGE_SHARED_X
+#define __S111 PAGE_SHARED_X
+
#ifndef __ASSEMBLY__
#include <asm/tlbflush.h>
/* Keep these as a macros to avoid include dependency mess */
#define pte_page(x) pfn_to_page(pte_pfn(x))
#define mk_pte(page, pgprot) pfn_pte(page_to_pfn(page), (pgprot))
+/*
+ * Select all bits except the pfn
+ */
+static inline pgprot_t pte_pgprot(pte_t pte)
+{
+ unsigned long pte_flags;
+
+ pte_flags = pte_val(pte) & ~PTE_RPN_MASK;
+ return __pgprot(pte_flags);
+}
/*
* ZERO_PAGE is a global shared page that is always zero: used
int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
void eeh_pe_state_mark(struct eeh_pe *pe, int state);
+void eeh_pe_mark_isolated(struct eeh_pe *pe);
void eeh_pe_state_clear(struct eeh_pe *pe, int state);
void eeh_pe_state_mark_with_cfg(struct eeh_pe *pe, int state);
void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode);
/* Default SMT priority is set to 3. Use 11- 13bits to save priority. */
#define PPR_PRIORITY 3
#ifdef __ASSEMBLY__
-#define INIT_PPR (PPR_PRIORITY << 50)
+#define DEFAULT_PPR (PPR_PRIORITY << 50)
#else
-#define INIT_PPR ((u64)PPR_PRIORITY << 50)
+#define DEFAULT_PPR ((u64)PPR_PRIORITY << 50)
#endif /* __ASSEMBLY__ */
#endif /* CONFIG_PPC64 */
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
struct arch_hw_breakpoint hw_brk; /* info on the hardware breakpoint */
unsigned long trap_nr; /* last trap # on this thread */
+ u8 load_slb; /* Ages out SLB preload cache entries */
u8 load_fp;
#ifdef CONFIG_ALTIVEC
u8 load_vec;
* onwards.
*/
int dscr_inherit;
- unsigned long ppr; /* used to save/restore SMT priority */
unsigned long tidr;
#endif
#ifdef CONFIG_PPC_BOOK3S_64
.regs = (struct pt_regs *)INIT_SP - 1, /* XXX bogus, I think */ \
.addr_limit = KERNEL_DS, \
.fpexc_mode = 0, \
- .ppr = INIT_PPR, \
.fscr = FSCR_TAR | FSCR_EBB \
}
#endif
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Included from asm/pgtable-*.h only ! */
-
-/*
- * Some bits are only used on some cpu families... Make sure that all
- * the undefined gets a sensible default
- */
-#ifndef _PAGE_HASHPTE
-#define _PAGE_HASHPTE 0
-#endif
-#ifndef _PAGE_HWWRITE
-#define _PAGE_HWWRITE 0
-#endif
-#ifndef _PAGE_EXEC
-#define _PAGE_EXEC 0
-#endif
-#ifndef _PAGE_ENDIAN
-#define _PAGE_ENDIAN 0
-#endif
-#ifndef _PAGE_COHERENT
-#define _PAGE_COHERENT 0
-#endif
-#ifndef _PAGE_WRITETHRU
-#define _PAGE_WRITETHRU 0
-#endif
-#ifndef _PAGE_4K_PFN
-#define _PAGE_4K_PFN 0
-#endif
-#ifndef _PAGE_SAO
-#define _PAGE_SAO 0
-#endif
-#ifndef _PAGE_PSIZE
-#define _PAGE_PSIZE 0
-#endif
-/* _PAGE_RO and _PAGE_RW shall not be defined at the same time */
-#ifndef _PAGE_RO
-#define _PAGE_RO 0
-#else
-#define _PAGE_RW 0
-#endif
-
-#ifndef _PAGE_PTE
-#define _PAGE_PTE 0
-#endif
-/* At least one of _PAGE_PRIVILEGED or _PAGE_USER must be defined */
-#ifndef _PAGE_PRIVILEGED
-#define _PAGE_PRIVILEGED 0
-#else
-#ifndef _PAGE_USER
-#define _PAGE_USER 0
-#endif
-#endif
-#ifndef _PAGE_NA
-#define _PAGE_NA 0
-#endif
-#ifndef _PAGE_HUGE
-#define _PAGE_HUGE 0
-#endif
-
-#ifndef _PMD_PRESENT_MASK
-#define _PMD_PRESENT_MASK _PMD_PRESENT
-#endif
-#ifndef _PMD_USER
-#define _PMD_USER 0
-#endif
-#ifndef _PAGE_KERNEL_RO
-#define _PAGE_KERNEL_RO (_PAGE_PRIVILEGED | _PAGE_RO)
-#endif
-#ifndef _PAGE_KERNEL_ROX
-#define _PAGE_KERNEL_ROX (_PAGE_PRIVILEGED | _PAGE_RO | _PAGE_EXEC)
-#endif
-#ifndef _PAGE_KERNEL_RW
-#define _PAGE_KERNEL_RW (_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_RW | \
- _PAGE_HWWRITE)
-#endif
-#ifndef _PAGE_KERNEL_RWX
-#define _PAGE_KERNEL_RWX (_PAGE_PRIVILEGED | _PAGE_DIRTY | _PAGE_RW | \
- _PAGE_HWWRITE | _PAGE_EXEC)
-#endif
-#ifndef _PAGE_HPTEFLAGS
-#define _PAGE_HPTEFLAGS _PAGE_HASHPTE
-#endif
-#ifndef _PTE_NONE_MASK
-#define _PTE_NONE_MASK _PAGE_HPTEFLAGS
-#endif
-
-#ifndef __ASSEMBLY__
-
-/*
- * Don't just check for any non zero bits in __PAGE_USER, since for book3e
- * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in
- * _PAGE_USER. Need to explicitly match _PAGE_BAP_UR bit in that case too.
- */
-static inline bool pte_user(pte_t pte)
-{
- return (pte_val(pte) & (_PAGE_USER | _PAGE_PRIVILEGED)) == _PAGE_USER;
-}
-#endif /* __ASSEMBLY__ */
-
-/* Location of the PFN in the PTE. Most 32-bit platforms use the same
- * as _PAGE_SHIFT here (ie, naturally aligned).
- * Platform who don't just pre-define the value so we don't override it here
- */
-#ifndef PTE_RPN_SHIFT
-#define PTE_RPN_SHIFT (PAGE_SHIFT)
-#endif
-
-/* The mask covered by the RPN must be a ULL on 32-bit platforms with
- * 64-bit PTEs
- */
-#if defined(CONFIG_PPC32) && defined(CONFIG_PTE_64BIT)
-#define PTE_RPN_MASK (~((1ULL<<PTE_RPN_SHIFT)-1))
-#else
-#define PTE_RPN_MASK (~((1UL<<PTE_RPN_SHIFT)-1))
-#endif
-
-/* _PAGE_CHG_MASK masks of bits that are to be preserved across
- * pgprot changes
- */
-#define _PAGE_CHG_MASK (PTE_RPN_MASK | _PAGE_HPTEFLAGS | _PAGE_DIRTY | \
- _PAGE_ACCESSED | _PAGE_SPECIAL)
-
-/* Mask of bits returned by pte_pgprot() */
-#define PAGE_PROT_BITS (_PAGE_GUARDED | _PAGE_COHERENT | _PAGE_NO_CACHE | \
- _PAGE_WRITETHRU | _PAGE_ENDIAN | _PAGE_4K_PFN | \
- _PAGE_USER | _PAGE_ACCESSED | _PAGE_RO | _PAGE_NA | \
- _PAGE_PRIVILEGED | \
- _PAGE_RW | _PAGE_HWWRITE | _PAGE_DIRTY | _PAGE_EXEC)
-
-/*
- * We define 2 sets of base prot bits, one for basic pages (ie,
- * cacheable kernel and user pages) and one for non cacheable
- * pages. We always set _PAGE_COHERENT when SMP is enabled or
- * the processor might need it for DMA coherency.
- */
-#define _PAGE_BASE_NC (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_PSIZE)
-#if defined(CONFIG_SMP) || defined(CONFIG_PPC_STD_MMU) || \
- defined(CONFIG_PPC_E500MC)
-#define _PAGE_BASE (_PAGE_BASE_NC | _PAGE_COHERENT)
-#else
-#define _PAGE_BASE (_PAGE_BASE_NC)
-#endif
-
-/* Permission masks used to generate the __P and __S table,
- *
- * Note:__pgprot is defined in arch/powerpc/include/asm/page.h
- *
- * Write permissions imply read permissions for now (we could make write-only
- * pages on BookE but we don't bother for now). Execute permission control is
- * possible on platforms that define _PAGE_EXEC
- *
- * Note due to the way vm flags are laid out, the bits are XWR
- */
-#define PAGE_NONE __pgprot(_PAGE_BASE | _PAGE_NA)
-#define PAGE_SHARED __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW)
-#define PAGE_SHARED_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RW | \
- _PAGE_EXEC)
-#define PAGE_COPY __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO)
-#define PAGE_COPY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO | \
- _PAGE_EXEC)
-#define PAGE_READONLY __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO)
-#define PAGE_READONLY_X __pgprot(_PAGE_BASE | _PAGE_USER | _PAGE_RO | \
- _PAGE_EXEC)
-
-#define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY
-#define __P010 PAGE_COPY
-#define __P011 PAGE_COPY
-#define __P100 PAGE_READONLY_X
-#define __P101 PAGE_READONLY_X
-#define __P110 PAGE_COPY_X
-#define __P111 PAGE_COPY_X
-
-#define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY
-#define __S010 PAGE_SHARED
-#define __S011 PAGE_SHARED
-#define __S100 PAGE_READONLY_X
-#define __S101 PAGE_READONLY_X
-#define __S110 PAGE_SHARED_X
-#define __S111 PAGE_SHARED_X
-
-/* Permission masks used for kernel mappings */
-#define PAGE_KERNEL __pgprot(_PAGE_BASE | _PAGE_KERNEL_RW)
-#define PAGE_KERNEL_NC __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
- _PAGE_NO_CACHE)
-#define PAGE_KERNEL_NCG __pgprot(_PAGE_BASE_NC | _PAGE_KERNEL_RW | \
- _PAGE_NO_CACHE | _PAGE_GUARDED)
-#define PAGE_KERNEL_X __pgprot(_PAGE_BASE | _PAGE_KERNEL_RWX)
-#define PAGE_KERNEL_RO __pgprot(_PAGE_BASE | _PAGE_KERNEL_RO)
-#define PAGE_KERNEL_ROX __pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
-
-/* Protection used for kernel text. We want the debuggers to be able to
- * set breakpoints anywhere, so don't write protect the kernel text
- * on platforms where such control is possible.
- */
-#if defined(CONFIG_KGDB) || defined(CONFIG_XMON) || defined(CONFIG_BDI_SWITCH) ||\
- defined(CONFIG_KPROBES) || defined(CONFIG_DYNAMIC_FTRACE)
-#define PAGE_KERNEL_TEXT PAGE_KERNEL_X
-#else
-#define PAGE_KERNEL_TEXT PAGE_KERNEL_ROX
-#endif
-
-/* Make modules code happy. We don't set RO yet */
-#define PAGE_KERNEL_EXEC PAGE_KERNEL_X
-
-/* Advertise special mapping type for AGP */
-#define PAGE_AGP (PAGE_KERNEL_NC)
-#define HAVE_PAGE_AGP
-
-#ifndef _PAGE_READ
-/* if not defined, we should not find _PAGE_WRITE too */
-#define _PAGE_READ 0
-#define _PAGE_WRITE _PAGE_RW
-#endif
-
-#ifndef H_PAGE_4K_PFN
-#define H_PAGE_4K_PFN 0
-#endif
#include <uapi/asm/ptrace.h>
#include <asm/asm-const.h>
+#ifndef __ASSEMBLY__
+struct pt_regs
+{
+ union {
+ struct user_pt_regs user_regs;
+ struct {
+ unsigned long gpr[32];
+ unsigned long nip;
+ unsigned long msr;
+ unsigned long orig_gpr3;
+ unsigned long ctr;
+ unsigned long link;
+ unsigned long xer;
+ unsigned long ccr;
+#ifdef CONFIG_PPC64
+ unsigned long softe;
+#else
+ unsigned long mq;
+#endif
+ unsigned long trap;
+ unsigned long dar;
+ unsigned long dsisr;
+ unsigned long result;
+ };
+ };
+
+#ifdef CONFIG_PPC64
+ unsigned long ppr;
+#endif
+};
+#endif
#ifdef __powerpc64__
return -regs->gpr[3];
}
+static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
+{
+ regs->gpr[3] = rc;
+}
+
#ifdef __powerpc64__
#define user_mode(regs) ((((regs)->msr) >> MSR_PR_LG) & 0x1)
#else
#define MSR_TS_S __MASK(MSR_TS_S_LG) /* Transaction Suspended */
#define MSR_TS_T __MASK(MSR_TS_T_LG) /* Transaction Transactional */
#define MSR_TS_MASK (MSR_TS_T | MSR_TS_S) /* Transaction State bits */
-#define MSR_TM_ACTIVE(x) (((x) & MSR_TS_MASK) != 0) /* Transaction active? */
#define MSR_TM_RESV(x) (((x) & MSR_TS_MASK) == MSR_TS_MASK) /* Reserved */
#define MSR_TM_TRANSACTIONAL(x) (((x) & MSR_TS_MASK) == MSR_TS_T)
#define MSR_TM_SUSPENDED(x) (((x) & MSR_TS_MASK) == MSR_TS_S)
+#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+#define MSR_TM_ACTIVE(x) (((x) & MSR_TS_MASK) != 0) /* Transaction active? */
+#else
+#define MSR_TM_ACTIVE(x) 0
+#endif
+
#if defined(CONFIG_PPC_BOOK3S_64)
#define MSR_64BIT MSR_SF
#define RTAS_TYPE_INFO 0xE2
#define RTAS_TYPE_DEALLOC 0xE3
#define RTAS_TYPE_DUMP 0xE4
+#define RTAS_TYPE_HOTPLUG 0xE5
/* I don't add PowerMGM events right now, this is a different topic */
#define RTAS_TYPE_PMGM_POWER_SW_ON 0x60
#define RTAS_TYPE_PMGM_POWER_SW_OFF 0x61
return (elog->byte1 & 0x18) >> 3;
}
+static inline
+void rtas_set_disposition_recovered(struct rtas_error_log *elog)
+{
+ elog->byte1 &= ~0x18;
+ elog->byte1 |= (RTAS_DISP_FULLY_RECOVERED << 3);
+}
+
static inline uint8_t rtas_error_extended(const struct rtas_error_log *elog)
{
return (elog->byte1 & 0x04) >> 2;
}
+static inline uint8_t rtas_error_initiator(const struct rtas_error_log *elog)
+{
+ return (elog->byte2 & 0xf0) >> 4;
+}
+
#define rtas_error_type(x) ((x)->byte3)
static inline
#define PSERIES_ELOG_SECT_ID_CALL_HOME (('C' << 8) | 'H')
#define PSERIES_ELOG_SECT_ID_USER_DEF (('U' << 8) | 'D')
#define PSERIES_ELOG_SECT_ID_HOTPLUG (('H' << 8) | 'P')
+#define PSERIES_ELOG_SECT_ID_MCE (('M' << 8) | 'C')
/* Vendor specific Platform Event Log Format, Version 6, section header */
struct pseries_errorlog {
#define PSERIES_HP_ELOG_RESOURCE_MEM 2
#define PSERIES_HP_ELOG_RESOURCE_SLOT 3
#define PSERIES_HP_ELOG_RESOURCE_PHB 4
+#define PSERIES_HP_ELOG_RESOURCE_PMEM 6
#define PSERIES_HP_ELOG_ACTION_ADD 1
#define PSERIES_HP_ELOG_ACTION_REMOVE 2
unsigned long len, unsigned int psize);
void slice_init_new_context_exec(struct mm_struct *mm);
+void slice_setup_new_exec(void);
#endif /* __ASSEMBLY__ */
DECLARE_PER_CPU(cpumask_var_t, cpu_sibling_map);
DECLARE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
DECLARE_PER_CPU(cpumask_var_t, cpu_core_map);
+DECLARE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
static inline struct cpumask *cpu_sibling_mask(int cpu)
{
return per_cpu(cpu_l2_cache_map, cpu);
}
+static inline struct cpumask *cpu_smallcore_mask(int cpu)
+{
+ return per_cpu(cpu_smallcore_map, cpu);
+}
+
extern int cpu_to_core_id(int cpu);
/* Since OpenPIC has only 4 IPIs, we use slightly different message numbers.
return cpumask_of(cpu);
}
+static inline const struct cpumask *cpu_smallcore_mask(int cpu)
+{
+ return cpumask_of(cpu);
+}
+
#endif /* CONFIG_SMP */
#ifdef CONFIG_PPC64
* MAX_PHYSMEM_BITS 2^N: how much memory we can have in that space
*/
#define SECTION_SIZE_BITS 24
-/*
- * If we store section details in page->flags we can't increase the MAX_PHYSMEM_BITS
- * if we increase SECTIONS_WIDTH we will not store node details in page->flags and
- * page_to_nid does a page->section->node lookup
- * Hence only increase for VMEMMAP.
- */
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-#define MAX_PHYSMEM_BITS 47
-#else
-#define MAX_PHYSMEM_BITS 46
-#endif
#endif /* CONFIG_SPARSEMEM */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * GCC stack protector support.
+ *
+ */
+
+#ifndef _ASM_STACKPROTECTOR_H
+#define _ASM_STACKPROTECTOR_H
+
+#include <linux/random.h>
+#include <linux/version.h>
+#include <asm/reg.h>
+#include <asm/current.h>
+#include <asm/paca.h>
+
+/*
+ * Initialize the stackprotector canary value.
+ *
+ * NOTE: this must only be called from functions that never return,
+ * and it must always be inlined.
+ */
+static __always_inline void boot_init_stack_canary(void)
+{
+ unsigned long canary;
+
+ /* Try to get a semi random initial value. */
+ canary = get_random_canary();
+ canary ^= mftb();
+ canary ^= LINUX_VERSION_CODE;
+ canary &= CANARY_MASK;
+
+ current->stack_canary = canary;
+#ifdef CONFIG_PPC64
+ get_paca()->canary = canary;
+#endif
+}
+
+#endif /* _ASM_STACKPROTECTOR_H */
#include <asm/page.h>
#include <asm/accounting.h>
+#define SLB_PRELOAD_NR 16U
/*
* low level task data.
*/
#if defined(CONFIG_VIRT_CPU_ACCOUNTING_NATIVE) && defined(CONFIG_PPC32)
struct cpu_accounting_data accounting;
#endif
+ unsigned char slb_preload_nr;
+ unsigned char slb_preload_tail;
+ u32 slb_preload_esid[SLB_PRELOAD_NR];
+
/* low level flags - has atomic operations done on it */
unsigned long flags ____cacheline_aligned_in_smp;
};
}
extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
+
+#ifdef CONFIG_PPC_BOOK3S_64
+void arch_setup_new_exec(void);
+#define arch_setup_new_exec arch_setup_new_exec
+#endif
+
#endif /* __ASSEMBLY__ */
/*
#define TIF_SIGPENDING 1 /* signal pending */
#define TIF_NEED_RESCHED 2 /* rescheduling necessary */
#define TIF_FSCHECK 3 /* Check FS is USER_DS on return */
-#define TIF_32BIT 4 /* 32 bit binary */
+#define TIF_SYSCALL_EMU 4 /* syscall emulation active */
#define TIF_RESTORE_TM 5 /* need to restore TM FP/VEC/VSX */
#define TIF_PATCH_PENDING 6 /* pending live patching update */
#define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */
#define TIF_ELF2ABI 18 /* function descriptors must die! */
#endif
#define TIF_POLLING_NRFLAG 19 /* true if poll_idle() is polling TIF_NEED_RESCHED */
+#define TIF_32BIT 20 /* 32 bit binary */
/* as above, but as bit values */
#define _TIF_SYSCALL_TRACE (1<<TIF_SYSCALL_TRACE)
#define _TIF_EMULATE_STACK_STORE (1<<TIF_EMULATE_STACK_STORE)
#define _TIF_NOHZ (1<<TIF_NOHZ)
#define _TIF_FSCHECK (1<<TIF_FSCHECK)
+#define _TIF_SYSCALL_EMU (1<<TIF_SYSCALL_EMU)
#define _TIF_SYSCALL_DOTRACE (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SECCOMP | _TIF_SYSCALL_TRACEPOINT | \
- _TIF_NOHZ)
+ _TIF_NOHZ | _TIF_SYSCALL_EMU)
#define _TIF_USER_WORK_MASK (_TIF_SIGPENDING | _TIF_NEED_RESCHED | \
_TIF_NOTIFY_RESUME | _TIF_UPROBE | \
__entry->r)
);
+TRACE_EVENT(tlbia,
+
+ TP_PROTO(unsigned long id),
+ TP_ARGS(id),
+ TP_STRUCT__entry(
+ __field(unsigned long, id)
+ ),
+
+ TP_fast_assign(
+ __entry->id = id;
+ ),
+
+ TP_printk("ctx.id=0x%lx", __entry->id)
+);
+
#endif /* _TRACE_POWERPC_H */
#undef TRACE_INCLUDE_PATH
({ \
long __gu_err; \
__long_type(*(ptr)) __gu_val; \
- const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
+ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
__chk_user_ptr(ptr); \
if (!is_kernel_addr((unsigned long)__gu_addr)) \
might_fault(); \
({ \
long __gu_err = -EFAULT; \
__long_type(*(ptr)) __gu_val = 0; \
- const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
+ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
might_fault(); \
if (access_ok(VERIFY_READ, __gu_addr, (size))) { \
barrier_nospec(); \
({ \
long __gu_err; \
__long_type(*(ptr)) __gu_val; \
- const __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
+ __typeof__(*(ptr)) __user *__gu_addr = (ptr); \
__chk_user_ptr(ptr); \
barrier_nospec(); \
__get_user_size(__gu_val, __gu_addr, (size), __gu_err); \
* to write an integer number of pages.
*/
struct user {
- struct pt_regs regs; /* entire machine state */
+ struct user_pt_regs regs; /* entire machine state */
size_t u_tsize; /* text size (pages) */
size_t u_dsize; /* data size (pages) */
size_t u_ssize; /* stack size (pages) */
#ifndef __ASSEMBLY__
-struct pt_regs {
+#ifdef __KERNEL__
+struct user_pt_regs
+#else
+struct pt_regs
+#endif
+{
unsigned long gpr[32];
unsigned long nip;
unsigned long msr;
#define PTRACE_GETVSRREGS 0x1b
#define PTRACE_SETVSRREGS 0x1c
+/* Syscall emulation defines */
+#define PTRACE_SYSEMU 0x1d
+#define PTRACE_SYSEMU_SINGLESTEP 0x1e
+
/*
* Get or set a debug register. The first 16 are DABR registers and the
* second 16 are IABR registers.
#endif
unsigned long handler;
unsigned long oldmask;
- struct pt_regs __user *regs;
+#ifdef __KERNEL__
+ struct user_pt_regs __user *regs;
+#else
+ struct pt_regs *regs;
+#endif
#ifdef __powerpc64__
elf_gregset_t gp_regs;
elf_fpregset_t fp_regs;
CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"'
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+# Disable clang warning for using setjmp without setjmp.h header
+CFLAGS_crash.o += $(call cc-disable-warning, builtin-requires-header)
ifdef CONFIG_PPC64
CFLAGS_prom_init.o += $(NO_MINIMAL_TOC)
CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+CFLAGS_prom_init.o += $(call cc-option, -fno-stack-protector)
+
ifdef CONFIG_FUNCTION_TRACER
# Do not trace early boot code
-CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_prom_init.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_btext.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
-CFLAGS_REMOVE_prom.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_cputable.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_prom_init.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_btext.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_prom.o = $(CC_FLAGS_FTRACE)
endif
obj-y := cputable.o ptrace.o syscalls.o \
{
OFFSET(THREAD, task_struct, thread);
OFFSET(MM, task_struct, mm);
+#ifdef CONFIG_STACKPROTECTOR
+ OFFSET(TASK_CANARY, task_struct, stack_canary);
+#ifdef CONFIG_PPC64
+ OFFSET(PACA_CANARY, paca_struct, canary);
+#endif
+#endif
OFFSET(MMCONTEXTID, mm_struct, context.id);
#ifdef CONFIG_PPC64
DEFINE(SIGSEGV, SIGSEGV);
DEFINE(NMI_MASK, NMI_MASK);
- OFFSET(TASKTHREADPPR, task_struct, thread.ppr);
#else
OFFSET(THREAD_INFO, task_struct, stack);
DEFINE(THREAD_INFO_GAP, _ALIGN_UP(sizeof(struct thread_info), 16));
OFFSET(PACAKSAVE, paca_struct, kstack);
OFFSET(PACACURRENT, paca_struct, __current);
OFFSET(PACASAVEDMSR, paca_struct, saved_msr);
- OFFSET(PACASTABRR, paca_struct, stab_rr);
OFFSET(PACAR1, paca_struct, saved_r1);
OFFSET(PACATOC, paca_struct, kernel_toc);
OFFSET(PACAKBASE, paca_struct, kernelbase);
#ifdef CONFIG_PPC_BOOK3S_64
OFFSET(PACASLBCACHE, paca_struct, slb_cache);
OFFSET(PACASLBCACHEPTR, paca_struct, slb_cache_ptr);
+ OFFSET(PACASTABRR, paca_struct, stab_rr);
OFFSET(PACAVMALLOCSLLP, paca_struct, vmalloc_sllp);
#ifdef CONFIG_PPC_MM_SLICES
OFFSET(MMUPSIZESLLP, mmu_psize_def, sllp);
/* Interrupt register frame */
DEFINE(INT_FRAME_SIZE, STACK_INT_FRAME_SIZE);
DEFINE(SWITCH_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs));
-#ifdef CONFIG_PPC64
- /* Create extra stack space for SRR0 and SRR1 when calling prom/rtas. */
- DEFINE(PROM_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
- DEFINE(RTAS_FRAME_SIZE, STACK_FRAME_OVERHEAD + sizeof(struct pt_regs) + 16);
-#endif /* CONFIG_PPC64 */
STACK_PT_REGS_OFFSET(GPR0, gpr[0]);
STACK_PT_REGS_OFFSET(GPR1, gpr[1]);
STACK_PT_REGS_OFFSET(GPR2, gpr[2]);
STACK_PT_REGS_OFFSET(_ESR, dsisr);
#else /* CONFIG_PPC64 */
STACK_PT_REGS_OFFSET(SOFTE, softe);
-
- /* These _only_ to be used with {PROM,RTAS}_FRAME_SIZE!!! */
- DEFINE(_SRR0, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs));
- DEFINE(_SRR1, STACK_FRAME_OVERHEAD+sizeof(struct pt_regs)+8);
+ STACK_PT_REGS_OFFSET(_PPR, ppr);
#endif /* CONFIG_PPC64 */
#if defined(CONFIG_PPC32)
offset = ((unsigned long) dispDeviceBase) - base;
size = dispDeviceRowBytes * dispDeviceRect[3] + offset
+ dispDeviceRect[0];
- vbase = __ioremap(base, size, pgprot_val(pgprot_noncached_wc(__pgprot(0))));
+ vbase = ioremap_wc(base, size);
if (!vbase)
return;
logicalDisplayBase = vbase + offset;
#include <linux/percpu.h>
#include <linux/slab.h>
#include <asm/prom.h>
+#include <asm/cputhreads.h>
+#include <asm/smp.h>
#include "cacheinfo.h"
static struct kobj_attribute cache_level_attr =
__ATTR(level, 0444, level_show, NULL);
+static unsigned int index_dir_to_cpu(struct cache_index_dir *index)
+{
+ struct kobject *index_dir_kobj = &index->kobj;
+ struct kobject *cache_dir_kobj = index_dir_kobj->parent;
+ struct kobject *cpu_dev_kobj = cache_dir_kobj->parent;
+ struct device *dev = kobj_to_dev(cpu_dev_kobj);
+
+ return dev->id;
+}
+
+/*
+ * On big-core systems, each core has two groups of CPUs each of which
+ * has its own L1-cache. The thread-siblings which share l1-cache with
+ * @cpu can be obtained via cpu_smallcore_mask().
+ */
+static const struct cpumask *get_big_core_shared_cpu_map(int cpu, struct cache *cache)
+{
+ if (cache->level == 1)
+ return cpu_smallcore_mask(cpu);
+
+ return &cache->shared_cpu_map;
+}
+
static ssize_t shared_cpu_map_show(struct kobject *k, struct kobj_attribute *attr, char *buf)
{
struct cache_index_dir *index;
struct cache *cache;
- int ret;
+ const struct cpumask *mask;
+ int ret, cpu;
index = kobj_to_cache_index_dir(k);
cache = index->cache;
+ if (has_big_cores) {
+ cpu = index_dir_to_cpu(index);
+ mask = get_big_core_shared_cpu_map(cpu, cache);
+ } else {
+ mask = &cache->shared_cpu_map;
+ }
+
ret = scnprintf(buf, PAGE_SIZE - 1, "%*pb\n",
- cpumask_pr_args(&cache->shared_cpu_map));
+ cpumask_pr_args(mask));
buf[ret++] = '\n';
buf[ret] = '\0';
return ret;
vaddr = __va(paddr);
csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
} else {
- vaddr = __ioremap(paddr, PAGE_SIZE, 0);
+ vaddr = ioremap_cache(paddr, PAGE_SIZE);
csize = copy_oldmem_vaddr(vaddr, buf, csize, offset, userbuf);
iounmap(vaddr);
}
*
*/
-#include <linux/dma-mapping.h>
+#include <linux/dma-direct.h>
#include <linux/memblock.h>
#include <linux/pfn.h>
#include <linux/of_platform.h>
.sync_single_for_device = swiotlb_sync_single_for_device,
.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
.sync_sg_for_device = swiotlb_sync_sg_for_device,
- .mapping_error = swiotlb_dma_mapping_error,
+ .mapping_error = dma_direct_mapping_error,
.get_required_mask = swiotlb_powerpc_get_required,
};
int n = 0, l = 0;
char buffer[128];
+ if (!pdn) {
+ pr_warn("EEH: Note: No error log for absent device.\n");
+ return 0;
+ }
+
n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n",
pdn->phb->global_number, pdn->busno,
PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
}
/* Isolate the PHB and send event */
- eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
+ eeh_pe_mark_isolated(phb_pe);
eeh_serialize_unlock(flags);
pr_err("EEH: PHB#%x failure detected, location: %s\n",
* with other functions on this device, and functions under
* bridges.
*/
- eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+ eeh_pe_mark_isolated(pe);
eeh_serialize_unlock(flags);
/* Most EEH events are due to device driver bugs. Having
/* Check if the request is finished successfully */
if (active_flag) {
- rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
+ rc = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
if (rc < 0)
return rc;
eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
break;
case pcie_hot_reset:
- eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED);
+ eeh_pe_mark_isolated(pe);
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
if (!(pe->type & EEH_PE_VF))
eeh_ops->reset(pe, EEH_RESET_HOT);
break;
case pcie_warm_reset:
- eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED);
+ eeh_pe_mark_isolated(pe);
+ eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED);
eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE);
eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev);
if (!(pe->type & EEH_PE_VF))
break;
/* Wait until the PE is in a functioning state */
- state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
- if (eeh_state_active(state))
- break;
-
+ state = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
if (state < 0) {
pr_warn("%s: Unrecoverable slot failure on PHB#%x-PE#%x",
__func__, pe->phb->global_number, pe->addr);
ret = -ENOTRECOVERABLE;
break;
}
+ if (eeh_state_active(state))
+ break;
/* Set error in case this is our last attempt */
ret = -EIO;
pdn = hose->pci_data;
traverse_pci_dn(pdn, eeh_ops->probe, NULL);
}
+ if (eeh_enabled())
+ pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
+ else
+ pr_info("EEH: No capable adapters found\n");
+
}
/**
eeh_dev_phb_init_dynamic(hose);
/* Initialize EEH event */
- ret = eeh_event_init();
- if (ret)
- return ret;
-
- eeh_probe_devices();
-
- if (eeh_enabled())
- pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
- else if (!eeh_has_flag(EEH_POSTPONED_PROBE))
- pr_info("EEH: No capable adapters found\n");
-
- return ret;
+ return eeh_event_init();
}
core_initcall_sync(eeh_init);
/* Associate EEH device with OF node */
pdn->edev = edev;
edev->pdn = pdn;
- INIT_LIST_HEAD(&edev->list);
- INIT_LIST_HEAD(&edev->rmv_list);
return edev;
}
#include <asm/rtas.h>
struct eeh_rmv_data {
- struct list_head edev_list;
- int removed;
+ struct list_head removed_vf_list;
+ int removed_dev_count;
};
static int eeh_result_priority(enum pci_ers_result result)
struct pci_driver *driver;
enum pci_ers_result new_result;
+ if (!edev->pdev) {
+ eeh_edev_info(edev, "no device");
+ return;
+ }
device_lock(&edev->pdev->dev);
if (eeh_edev_actionable(edev)) {
driver = eeh_pcid_get(edev->pdev);
* EEH device is created.
*/
if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED)) {
- if (list_is_last(&edev->list, &edev->pe->edevs))
+ if (list_is_last(&edev->entry, &edev->pe->edevs))
eeh_pe_restore_bars(edev->pe);
return NULL;
return rc;
}
-static void *eeh_add_virt_device(void *data, void *userdata)
+static void *eeh_add_virt_device(struct eeh_dev *edev)
{
struct pci_driver *driver;
- struct eeh_dev *edev = (struct eeh_dev *)data;
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
struct pci_driver *driver;
struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata;
- int *removed = rmv_data ? &rmv_data->removed : NULL;
/*
* Actually, we should remove the PCI bridges as well.
if (eeh_dev_removed(edev))
return NULL;
- if (removed) {
+ if (rmv_data) {
if (eeh_pe_passed(edev->pe))
return NULL;
driver = eeh_pcid_get(dev);
/* Remove it from PCI subsystem */
pr_debug("EEH: Removing %s without EEH sensitive driver\n",
pci_name(dev));
- edev->bus = dev->bus;
edev->mode |= EEH_DEV_DISCONNECTED;
- if (removed)
- (*removed)++;
+ if (rmv_data)
+ rmv_data->removed_dev_count++;
if (edev->physfn) {
#ifdef CONFIG_PCI_IOV
pdn->pe_number = IODA_INVALID_PE;
#endif
if (rmv_data)
- list_add(&edev->rmv_list, &rmv_data->edev_list);
+ list_add(&edev->rmv_entry, &rmv_data->removed_vf_list);
} else {
pci_lock_rescan_remove();
pci_stop_and_remove_bus_device(dev);
* the device up before the scripts have taken it down,
* potentially weird things happen.
*/
- if (!driver_eeh_aware || rmv_data->removed) {
+ if (!driver_eeh_aware || rmv_data->removed_dev_count) {
pr_info("EEH: Sleep 5s ahead of %s hotplug\n",
(driver_eeh_aware ? "partial" : "complete"));
ssleep(5);
* PE. We should disconnect it so the binding can be
* rebuilt when adding PCI devices.
*/
- edev = list_first_entry(&pe->edevs, struct eeh_dev, list);
+ edev = list_first_entry(&pe->edevs, struct eeh_dev, entry);
eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
if (pe->type & EEH_PE_VF) {
- eeh_add_virt_device(edev, NULL);
+ eeh_add_virt_device(edev);
} else {
if (!driver_eeh_aware)
eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
struct eeh_pe *tmp_pe;
int rc = 0;
enum pci_ers_result result = PCI_ERS_RESULT_NONE;
- struct eeh_rmv_data rmv_data = {LIST_HEAD_INIT(rmv_data.edev_list), 0};
+ struct eeh_rmv_data rmv_data =
+ {LIST_HEAD_INIT(rmv_data.removed_vf_list), 0};
bus = eeh_pe_bus_get(pe);
if (!bus) {
pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n",
pe->phb->global_number, pe->addr,
pe->freeze_count);
- goto hard_fail;
+ result = PCI_ERS_RESULT_DISCONNECT;
}
- pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
- pe->freeze_count, eeh_max_freezes);
/* Walk the various device drivers attached to this slot through
* a reset sequence, giving each an opportunity to do what it needs
* the error. Override the result if necessary to have partially
* hotplug for this case.
*/
- pr_info("EEH: Notify device drivers to shutdown\n");
- eeh_set_channel_state(pe, pci_channel_io_frozen);
- eeh_set_irq_state(pe, false);
- eeh_pe_report("error_detected(IO frozen)", pe, eeh_report_error,
- &result);
- if ((pe->type & EEH_PE_PHB) &&
- result != PCI_ERS_RESULT_NONE &&
- result != PCI_ERS_RESULT_NEED_RESET)
- result = PCI_ERS_RESULT_NEED_RESET;
+ if (result != PCI_ERS_RESULT_DISCONNECT) {
+ pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
+ pe->freeze_count, eeh_max_freezes);
+ pr_info("EEH: Notify device drivers to shutdown\n");
+ eeh_set_channel_state(pe, pci_channel_io_frozen);
+ eeh_set_irq_state(pe, false);
+ eeh_pe_report("error_detected(IO frozen)", pe,
+ eeh_report_error, &result);
+ if ((pe->type & EEH_PE_PHB) &&
+ result != PCI_ERS_RESULT_NONE &&
+ result != PCI_ERS_RESULT_NEED_RESET)
+ result = PCI_ERS_RESULT_NEED_RESET;
+ }
/* Get the current PCI slot state. This can take a long time,
* sometimes over 300 seconds for certain systems.
*/
- rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
- if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
- pr_warn("EEH: Permanent failure\n");
- goto hard_fail;
+ if (result != PCI_ERS_RESULT_DISCONNECT) {
+ rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
+ if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
+ pr_warn("EEH: Permanent failure\n");
+ result = PCI_ERS_RESULT_DISCONNECT;
+ }
}
/* Since rtas may enable MMIO when posting the error log,
* don't post the error log until after all dev drivers
* have been informed.
*/
- pr_info("EEH: Collect temporary log\n");
- eeh_slot_error_detail(pe, EEH_LOG_TEMP);
+ if (result != PCI_ERS_RESULT_DISCONNECT) {
+ pr_info("EEH: Collect temporary log\n");
+ eeh_slot_error_detail(pe, EEH_LOG_TEMP);
+ }
/* If all device drivers were EEH-unaware, then shut
* down all of the device drivers, and hope they
if (rc) {
pr_warn("%s: Unable to reset, err=%d\n",
__func__, rc);
- goto hard_fail;
+ result = PCI_ERS_RESULT_DISCONNECT;
}
}
pr_info("EEH: Enable I/O for affected devices\n");
rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
- if (rc < 0)
- goto hard_fail;
- if (rc) {
+ if (rc < 0) {
+ result = PCI_ERS_RESULT_DISCONNECT;
+ } else if (rc) {
result = PCI_ERS_RESULT_NEED_RESET;
} else {
pr_info("EEH: Notify device drivers to resume I/O\n");
pr_info("EEH: Enabled DMA for affected devices\n");
rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
- if (rc < 0)
- goto hard_fail;
- if (rc) {
+ if (rc < 0) {
+ result = PCI_ERS_RESULT_DISCONNECT;
+ } else if (rc) {
result = PCI_ERS_RESULT_NEED_RESET;
} else {
/*
}
}
- /* If any device has a hard failure, then shut off everything. */
- if (result == PCI_ERS_RESULT_DISCONNECT) {
- pr_warn("EEH: Device driver gave up\n");
- goto hard_fail;
- }
-
/* If any device called out for a reset, then reset the slot */
if (result == PCI_ERS_RESULT_NEED_RESET) {
pr_info("EEH: Reset without hotplug activity\n");
if (rc) {
pr_warn("%s: Cannot reset, err=%d\n",
__func__, rc);
- goto hard_fail;
+ result = PCI_ERS_RESULT_DISCONNECT;
+ } else {
+ result = PCI_ERS_RESULT_NONE;
+ eeh_set_channel_state(pe, pci_channel_io_normal);
+ eeh_set_irq_state(pe, true);
+ eeh_pe_report("slot_reset", pe, eeh_report_reset,
+ &result);
}
-
- pr_info("EEH: Notify device drivers "
- "the completion of reset\n");
- result = PCI_ERS_RESULT_NONE;
- eeh_set_channel_state(pe, pci_channel_io_normal);
- eeh_set_irq_state(pe, true);
- eeh_pe_report("slot_reset", pe, eeh_report_reset, &result);
- }
-
- /* All devices should claim they have recovered by now. */
- if ((result != PCI_ERS_RESULT_RECOVERED) &&
- (result != PCI_ERS_RESULT_NONE)) {
- pr_warn("EEH: Not recovered\n");
- goto hard_fail;
- }
-
- /*
- * For those hot removed VFs, we should add back them after PF get
- * recovered properly.
- */
- list_for_each_entry_safe(edev, tmp, &rmv_data.edev_list, rmv_list) {
- eeh_add_virt_device(edev, NULL);
- list_del(&edev->rmv_list);
}
- /* Tell all device drivers that they can resume operations */
- pr_info("EEH: Notify device driver to resume\n");
- eeh_set_channel_state(pe, pci_channel_io_normal);
- eeh_set_irq_state(pe, true);
- eeh_pe_report("resume", pe, eeh_report_resume, NULL);
- eeh_for_each_pe(pe, tmp_pe) {
- eeh_pe_for_each_dev(tmp_pe, edev, tmp) {
- edev->mode &= ~EEH_DEV_NO_HANDLER;
- edev->in_error = false;
+ if ((result == PCI_ERS_RESULT_RECOVERED) ||
+ (result == PCI_ERS_RESULT_NONE)) {
+ /*
+ * For those hot removed VFs, we should add back them after PF
+ * get recovered properly.
+ */
+ list_for_each_entry_safe(edev, tmp, &rmv_data.removed_vf_list,
+ rmv_entry) {
+ eeh_add_virt_device(edev);
+ list_del(&edev->rmv_entry);
}
- }
- pr_info("EEH: Recovery successful.\n");
- goto final;
+ /* Tell all device drivers that they can resume operations */
+ pr_info("EEH: Notify device driver to resume\n");
+ eeh_set_channel_state(pe, pci_channel_io_normal);
+ eeh_set_irq_state(pe, true);
+ eeh_pe_report("resume", pe, eeh_report_resume, NULL);
+ eeh_for_each_pe(pe, tmp_pe) {
+ eeh_pe_for_each_dev(tmp_pe, edev, tmp) {
+ edev->mode &= ~EEH_DEV_NO_HANDLER;
+ edev->in_error = false;
+ }
+ }
-hard_fail:
- /*
- * About 90% of all real-life EEH failures in the field
- * are due to poorly seated PCI cards. Only 10% or so are
- * due to actual, failed cards.
- */
- pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
- "Please try reseating or replacing it\n",
- pe->phb->global_number, pe->addr);
+ pr_info("EEH: Recovery successful.\n");
+ } else {
+ /*
+ * About 90% of all real-life EEH failures in the field
+ * are due to poorly seated PCI cards. Only 10% or so are
+ * due to actual, failed cards.
+ */
+ pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
+ "Please try reseating or replacing it\n",
+ pe->phb->global_number, pe->addr);
- eeh_slot_error_detail(pe, EEH_LOG_PERM);
+ eeh_slot_error_detail(pe, EEH_LOG_PERM);
- /* Notify all devices that they're about to go down. */
- eeh_set_channel_state(pe, pci_channel_io_perm_failure);
- eeh_set_irq_state(pe, false);
- eeh_pe_report("error_detected(permanent failure)", pe,
- eeh_report_failure, NULL);
+ /* Notify all devices that they're about to go down. */
+ eeh_set_channel_state(pe, pci_channel_io_perm_failure);
+ eeh_set_irq_state(pe, false);
+ eeh_pe_report("error_detected(permanent failure)", pe,
+ eeh_report_failure, NULL);
- /* Mark the PE to be removed permanently */
- eeh_pe_state_mark(pe, EEH_PE_REMOVED);
+ /* Mark the PE to be removed permanently */
+ eeh_pe_state_mark(pe, EEH_PE_REMOVED);
- /*
- * Shut down the device drivers for good. We mark
- * all removed devices correctly to avoid access
- * the their PCI config any more.
- */
- if (pe->type & EEH_PE_VF) {
- eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
- eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
- } else {
- eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
- eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+ /*
+ * Shut down the device drivers for good. We mark
+ * all removed devices correctly to avoid access
+ * the their PCI config any more.
+ */
+ if (pe->type & EEH_PE_VF) {
+ eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
+ } else {
+ eeh_pe_state_clear(pe, EEH_PE_PRI_BUS);
+ eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
- pci_lock_rescan_remove();
- pci_hp_remove_devices(bus);
- pci_unlock_rescan_remove();
- /* The passed PE should no longer be used */
- return;
+ pci_lock_rescan_remove();
+ pci_hp_remove_devices(bus);
+ pci_unlock_rescan_remove();
+ /* The passed PE should no longer be used */
+ return;
+ }
}
-final:
eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
}
phb_pe = eeh_phb_pe_get(hose);
if (!phb_pe) continue;
- eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED);
+ eeh_pe_mark_isolated(phb_pe);
}
eeh_serialize_unlock(flags);
/* Purge all events of the PHB */
eeh_remove_event(pe, true);
- if (rc == EEH_NEXT_ERR_DEAD_PHB)
- eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
- else
- eeh_pe_state_mark(pe,
- EEH_PE_ISOLATED | EEH_PE_RECOVERING);
+ if (rc != EEH_NEXT_ERR_DEAD_PHB)
+ eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
+ eeh_pe_mark_isolated(pe);
eeh_serialize_unlock(flags);
pe->type = type;
pe->phb = phb;
INIT_LIST_HEAD(&pe->child_list);
- INIT_LIST_HEAD(&pe->child);
INIT_LIST_HEAD(&pe->edevs);
pe->data = (void *)pe + ALIGN(sizeof(struct eeh_pe),
return 0;
}
+/**
+ * eeh_wait_state - Wait for PE state
+ * @pe: EEH PE
+ * @max_wait: maximal period in millisecond
+ *
+ * Wait for the state of associated PE. It might take some time
+ * to retrieve the PE's state.
+ */
+int eeh_wait_state(struct eeh_pe *pe, int max_wait)
+{
+ int ret;
+ int mwait;
+
+ /*
+ * According to PAPR, the state of PE might be temporarily
+ * unavailable. Under the circumstance, we have to wait
+ * for indicated time determined by firmware. The maximal
+ * wait time is 5 minutes, which is acquired from the original
+ * EEH implementation. Also, the original implementation
+ * also defined the minimal wait time as 1 second.
+ */
+#define EEH_STATE_MIN_WAIT_TIME (1000)
+#define EEH_STATE_MAX_WAIT_TIME (300 * 1000)
+
+ while (1) {
+ ret = eeh_ops->get_state(pe, &mwait);
+
+ if (ret != EEH_STATE_UNAVAILABLE)
+ return ret;
+
+ if (max_wait <= 0) {
+ pr_warn("%s: Timeout when getting PE's state (%d)\n",
+ __func__, max_wait);
+ return EEH_STATE_NOT_SUPPORT;
+ }
+
+ if (mwait < EEH_STATE_MIN_WAIT_TIME) {
+ pr_warn("%s: Firmware returned bad wait value %d\n",
+ __func__, mwait);
+ mwait = EEH_STATE_MIN_WAIT_TIME;
+ } else if (mwait > EEH_STATE_MAX_WAIT_TIME) {
+ pr_warn("%s: Firmware returned too long wait value %d\n",
+ __func__, mwait);
+ mwait = EEH_STATE_MAX_WAIT_TIME;
+ }
+
+ msleep(min(mwait, max_wait));
+ max_wait -= mwait;
+ }
+}
+
/**
* eeh_phb_pe_get - Retrieve PHB PE based on the given PHB
* @phb: PCI controller
edev->pe = pe;
/* Put the edev to PE */
- list_add_tail(&edev->list, &pe->edevs);
+ list_add_tail(&edev->entry, &pe->edevs);
pr_debug("EEH: Add %04x:%02x:%02x.%01x to Bus PE#%x\n",
pdn->phb->global_number,
pdn->busno,
pe->addr);
return 0;
} else if (pe && (pe->type & EEH_PE_INVALID)) {
- list_add_tail(&edev->list, &pe->edevs);
+ list_add_tail(&edev->entry, &pe->edevs);
edev->pe = pe;
/*
* We're running to here because of PCI hotplug caused by
while (parent) {
if (!(parent->type & EEH_PE_INVALID))
break;
- parent->type &= ~(EEH_PE_INVALID | EEH_PE_KEEP);
+ parent->type &= ~EEH_PE_INVALID;
parent = parent->parent;
}
* link the EEH device accordingly.
*/
list_add_tail(&pe->child, &parent->child_list);
- list_add_tail(&edev->list, &pe->edevs);
+ list_add_tail(&edev->entry, &pe->edevs);
edev->pe = pe;
pr_debug("EEH: Add %04x:%02x:%02x.%01x to "
"Device PE#%x, Parent PE#%x\n",
int cnt;
struct pci_dn *pdn = eeh_dev_to_pdn(edev);
- if (!edev->pe) {
+ pe = eeh_dev_to_pe(edev);
+ if (!pe) {
pr_debug("%s: No PE found for device %04x:%02x:%02x.%01x\n",
__func__, pdn->phb->global_number,
pdn->busno,
}
/* Remove the EEH device */
- pe = eeh_dev_to_pe(edev);
edev->pe = NULL;
- list_del(&edev->list);
+ list_del(&edev->entry);
/*
* Check if the parent PE includes any EEH devices.
}
/**
- * __eeh_pe_state_mark - Mark the state for the PE
- * @data: EEH PE
- * @flag: state
+ * eeh_pe_state_mark - Mark specified state for PE and its associated device
+ * @pe: EEH PE
*
- * The function is used to mark the indicated state for the given
- * PE. Also, the associated PCI devices will be put into IO frozen
- * state as well.
+ * EEH error affects the current PE and its child PEs. The function
+ * is used to mark appropriate state for the affected PEs and the
+ * associated devices.
*/
-static void *__eeh_pe_state_mark(struct eeh_pe *pe, void *flag)
+void eeh_pe_state_mark(struct eeh_pe *root, int state)
{
- int state = *((int *)flag);
- struct eeh_dev *edev, *tmp;
- struct pci_dev *pdev;
-
- /* Keep the state of permanently removed PE intact */
- if (pe->state & EEH_PE_REMOVED)
- return NULL;
-
- pe->state |= state;
-
- /* Offline PCI devices if applicable */
- if (!(state & EEH_PE_ISOLATED))
- return NULL;
-
- eeh_pe_for_each_dev(pe, edev, tmp) {
- pdev = eeh_dev_to_pci_dev(edev);
- if (pdev)
- pdev->error_state = pci_channel_io_frozen;
- }
-
- /* Block PCI config access if required */
- if (pe->state & EEH_PE_CFG_RESTRICTED)
- pe->state |= EEH_PE_CFG_BLOCKED;
+ struct eeh_pe *pe;
- return NULL;
+ eeh_for_each_pe(root, pe)
+ if (!(pe->state & EEH_PE_REMOVED))
+ pe->state |= state;
}
+EXPORT_SYMBOL_GPL(eeh_pe_state_mark);
/**
- * eeh_pe_state_mark - Mark specified state for PE and its associated device
+ * eeh_pe_mark_isolated
* @pe: EEH PE
*
- * EEH error affects the current PE and its child PEs. The function
- * is used to mark appropriate state for the affected PEs and the
- * associated devices.
+ * Record that a PE has been isolated by marking the PE and it's children as
+ * EEH_PE_ISOLATED (and EEH_PE_CFG_BLOCKED, if required) and their PCI devices
+ * as pci_channel_io_frozen.
*/
-void eeh_pe_state_mark(struct eeh_pe *pe, int state)
+void eeh_pe_mark_isolated(struct eeh_pe *root)
{
- eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
+ struct eeh_pe *pe;
+ struct eeh_dev *edev;
+ struct pci_dev *pdev;
+
+ eeh_pe_state_mark(root, EEH_PE_ISOLATED);
+ eeh_for_each_pe(root, pe) {
+ list_for_each_entry(edev, &pe->edevs, entry) {
+ pdev = eeh_dev_to_pci_dev(edev);
+ if (pdev)
+ pdev->error_state = pci_channel_io_frozen;
+ }
+ /* Block PCI config access if required */
+ if (pe->state & EEH_PE_CFG_RESTRICTED)
+ pe->state |= EEH_PE_CFG_BLOCKED;
+ }
}
-EXPORT_SYMBOL_GPL(eeh_pe_state_mark);
+EXPORT_SYMBOL_GPL(eeh_pe_mark_isolated);
static void *__eeh_pe_dev_mode_mark(struct eeh_dev *edev, void *flag)
{
eeh_pe_traverse(pe, __eeh_pe_state_clear, &state);
}
-/**
- * eeh_pe_state_mark_with_cfg - Mark PE state with unblocked config space
- * @pe: PE
- * @state: PE state to be set
- *
- * Set specified flag to PE and its child PEs. The PCI config space
- * of some PEs is blocked automatically when EEH_PE_ISOLATED is set,
- * which isn't needed in some situations. The function allows to set
- * the specified flag to indicated PEs without blocking their PCI
- * config space.
- */
-void eeh_pe_state_mark_with_cfg(struct eeh_pe *pe, int state)
-{
- eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
- if (!(state & EEH_PE_ISOLATED))
- return;
-
- /* Clear EEH_PE_CFG_BLOCKED, which might be set just now */
- state = EEH_PE_CFG_BLOCKED;
- eeh_pe_traverse(pe, __eeh_pe_state_clear, &state);
-}
-
/*
* Some PCI bridges (e.g. PLX bridges) have primary/secondary
* buses assigned explicitly by firmware, and we probably have
return pe->bus;
/* Retrieve the parent PCI bus of first (top) PCI device */
- edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, list);
+ edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry);
pdev = eeh_dev_to_pci_dev(edev);
if (pdev)
return pdev->bus;
lis r10,MSR_KERNEL@h
ori r10,r10,MSR_KERNEL@l
bl transfer_to_handler_full
- .long nonrecoverable_exception
+ .long unrecoverable_exception
.long ret_from_except
#endif
rlwinm r3,r3,0,0,30
stw r3,_TRAP(r1)
4: addi r3,r1,STACK_FRAME_OVERHEAD
- bl nonrecoverable_exception
+ bl unrecoverable_exception
/* shouldn't return */
b 4b
* based on caller's run-mode / personality.
*/
ld r11,SYS_CALL_TABLE@toc(2)
- andi. r10,r10,_TIF_32BIT
+ andis. r10,r10,_TIF_32BIT@h
beq 15f
addi r11,r11,8 /* use 32-bit syscall entries */
clrldi r3,r3,32
4: /* Anything else left to do? */
BEGIN_FTR_SECTION
- lis r3,INIT_PPR@highest /* Set thread.ppr = 3 */
- ld r10,PACACURRENT(r13)
+ lis r3,DEFAULT_PPR@highest /* Set default PPR */
sldi r3,r3,32 /* bits 11-13 are used for ppr */
- std r3,TASKTHREADPPR(r10)
+ std r3,_PPR(r1)
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
andi. r0,r9,(_TIF_SYSCALL_DOTRACE|_TIF_SINGLESTEP)
addi r6,r4,-THREAD /* Convert THREAD to 'current' */
std r6,PACACURRENT(r13) /* Set new 'current' */
+#if defined(CONFIG_STACKPROTECTOR)
+ ld r6, TASK_CANARY(r6)
+ std r6, PACA_CANARY(r13)
+#endif
ld r8,KSP(r4) /* new stack pointer */
#ifdef CONFIG_PPC_BOOK3S_64
isync
slbie r6
+BEGIN_FTR_SECTION
slbie r6 /* Workaround POWER5 < DD2.1 issue */
+END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
slbmte r7,r0
isync
2:
andi. r0,r3,MSR_RI
beq- .Lunrecov_restore
- /* Load PPR from thread struct before we clear MSR:RI */
-BEGIN_FTR_SECTION
- ld r2,PACACURRENT(r13)
- ld r2,TASKTHREADPPR(r2)
-END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
-
/*
* Clear RI before restoring r13. If we are returning to
* userspace and we take an exception after restoring r13,
andi. r0,r3,MSR_PR
beq 1f
BEGIN_FTR_SECTION
- mtspr SPRN_PPR,r2 /* Restore PPR */
+ /* Restore PPR */
+ ld r2,_PPR(r1)
+ mtspr SPRN_PPR,r2
END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
ACCOUNT_CPU_USER_EXIT(r13, r2, r4)
REST_GPR(13, r1)
_GLOBAL(enter_rtas)
mflr r0
std r0,16(r1)
- stdu r1,-RTAS_FRAME_SIZE(r1) /* Save SP and create stack space. */
+ stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space. */
/* Because RTAS is running in 32b mode, it clobbers the high order half
* of all registers that it saves. We therefore save those registers
ld r8,_DSISR(r1)
mtdsisr r8
- addi r1,r1,RTAS_FRAME_SIZE /* Unstack our frame */
+ addi r1,r1,SWITCH_FRAME_SIZE /* Unstack our frame */
ld r0,16(r1) /* get return address */
mtlr r0
_GLOBAL(enter_prom)
mflr r0
std r0,16(r1)
- stdu r1,-PROM_FRAME_SIZE(r1) /* Save SP and create stack space */
+ stdu r1,-SWITCH_FRAME_SIZE(r1) /* Save SP and create stack space */
/* Because PROM is running in 32b mode, it clobbers the high order half
* of all registers that it saves. We therefore save those registers
REST_10GPRS(22, r1)
ld r4,_CCR(r1)
mtcr r4
-
- addi r1,r1,PROM_FRAME_SIZE
+
+ addi r1,r1,SWITCH_FRAME_SIZE
ld r0,16(r1)
mtlr r0
blr
SET_SCRATCH0(r13) /* save r13 */
EXCEPTION_PROLOG_0(PACA_EXMC)
BEGIN_FTR_SECTION
- b machine_check_powernv_early
+ b machine_check_common_early
FTR_SECTION_ELSE
b machine_check_pSeries_0
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
EXC_REAL_END(machine_check, 0x200, 0x100)
EXC_VIRT_NONE(0x4200, 0x100)
-TRAMP_REAL_BEGIN(machine_check_powernv_early)
-BEGIN_FTR_SECTION
+TRAMP_REAL_BEGIN(machine_check_common_early)
EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
/*
* Register contents:
/* Save r9 through r13 from EXMC save area to stack frame. */
EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
mfmsr r11 /* get MSR value */
+BEGIN_FTR_SECTION
ori r11,r11,MSR_ME /* turn on ME bit */
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
ori r11,r11,MSR_RI /* turn on RI bit */
LOAD_HANDLER(r12, machine_check_handle_early)
1: mtspr SPRN_SRR0,r12
andc r11,r11,r10 /* Turn off MSR_ME */
b 1b
b . /* prevent speculative execution */
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
TRAMP_REAL_BEGIN(machine_check_pSeries)
.globl machine_check_fwnmi
machine_check_fwnmi:
SET_SCRATCH0(r13) /* save r13 */
EXCEPTION_PROLOG_0(PACA_EXMC)
+BEGIN_FTR_SECTION
+ b machine_check_common_early
+END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
machine_check_pSeries_0:
EXCEPTION_PROLOG_1(PACA_EXMC, KVMTEST_PR, 0x200)
/*
bl machine_check_early
std r3,RESULT(r1) /* Save result */
ld r12,_MSR(r1)
+BEGIN_FTR_SECTION
+ b 4f
+END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
#ifdef CONFIG_PPC_P7_NAP
/*
*/
rldicl. r11,r12,4,63 /* See if MC hit while in HV mode. */
beq 5f
- andi. r11,r12,MSR_PR /* See if coming from user. */
+4: andi. r11,r12,MSR_PR /* See if coming from user. */
bne 9f /* continue in V mode if we are. */
5:
#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+BEGIN_FTR_SECTION
/*
* We are coming from kernel context. Check if we are coming from
* guest. if yes, then we can continue. We will fall through
lbz r11,HSTATE_IN_GUEST(r13)
cmpwi r11,0 /* Check if coming from guest */
bne 9f /* continue if we are. */
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
#endif
/*
* At this point we are not sure about what context we come from.
cmpdi r3,0 /* see if we handled MCE successfully */
beq 1b /* if !handled then panic */
+BEGIN_FTR_SECTION
/*
* Return from MC interrupt.
* Queue up the MCE event so that we can log it later, while
bl machine_check_queue_event
MACHINE_CHECK_HANDLER_WINDUP
RFI_TO_USER_OR_KERNEL
+FTR_SECTION_ELSE
+ /*
+ * pSeries: Return from MC interrupt. Before that stay on emergency
+ * stack and call machine_check_exception to log the MCE event.
+ */
+ LOAD_HANDLER(r10,mce_return)
+ mtspr SPRN_SRR0,r10
+ ld r10,PACAKMSR(r13)
+ mtspr SPRN_SRR1,r10
+ RFI_TO_KERNEL
+ b .
+ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
9:
/* Deliver the machine check to host kernel in V mode. */
MACHINE_CHECK_HANDLER_WINDUP
- b machine_check_pSeries
+ SET_SCRATCH0(r13) /* save r13 */
+ EXCEPTION_PROLOG_0(PACA_EXMC)
+ b machine_check_pSeries_0
EXC_COMMON_BEGIN(unrecover_mce)
/* Invoke machine_check_exception to print MCE event and panic. */
bl unrecoverable_exception
b 1b
+EXC_COMMON_BEGIN(mce_return)
+ /* Invoke machine_check_exception to print MCE event and return. */
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl machine_check_exception
+ MACHINE_CHECK_HANDLER_WINDUP
+ RFI_TO_KERNEL
+ b .
EXC_REAL(data_access, 0x300, 0x80)
EXC_VIRT(data_access, 0x4300, 0x80, 0x300)
EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXSLB)
- EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x380)
- mr r12,r3 /* save r3 */
- mfspr r3,SPRN_DAR
- mfspr r11,SPRN_SRR1
- crset 4*cr6+eq
- BRANCH_TO_COMMON(r10, slb_miss_common)
+EXCEPTION_PROLOG(PACA_EXSLB, data_access_slb_common, EXC_STD, KVMTEST_PR, 0x380);
EXC_REAL_END(data_access_slb, 0x380, 0x80)
EXC_VIRT_BEGIN(data_access_slb, 0x4380, 0x80)
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXSLB)
- EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x380)
- mr r12,r3 /* save r3 */
- mfspr r3,SPRN_DAR
- mfspr r11,SPRN_SRR1
- crset 4*cr6+eq
- BRANCH_TO_COMMON(r10, slb_miss_common)
+EXCEPTION_RELON_PROLOG(PACA_EXSLB, data_access_slb_common, EXC_STD, NOTEST, 0x380);
EXC_VIRT_END(data_access_slb, 0x4380, 0x80)
+
TRAMP_KVM_SKIP(PACA_EXSLB, 0x380)
+EXC_COMMON_BEGIN(data_access_slb_common)
+ mfspr r10,SPRN_DAR
+ std r10,PACA_EXSLB+EX_DAR(r13)
+ EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB)
+ ld r4,PACA_EXSLB+EX_DAR(r13)
+ std r4,_DAR(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl do_slb_fault
+ cmpdi r3,0
+ bne- 1f
+ b fast_exception_return
+1: /* Error case */
+ std r3,RESULT(r1)
+ bl save_nvgprs
+ RECONCILE_IRQ_STATE(r10, r11)
+ ld r4,_DAR(r1)
+ ld r5,RESULT(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl do_bad_slb_fault
+ b ret_from_except
+
EXC_REAL(instruction_access, 0x400, 0x80)
EXC_VIRT(instruction_access, 0x4400, 0x80, 0x400)
EXC_REAL_BEGIN(instruction_access_slb, 0x480, 0x80)
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXSLB)
- EXCEPTION_PROLOG_1(PACA_EXSLB, KVMTEST_PR, 0x480)
- mr r12,r3 /* save r3 */
- mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
- mfspr r11,SPRN_SRR1
- crclr 4*cr6+eq
- BRANCH_TO_COMMON(r10, slb_miss_common)
+EXCEPTION_PROLOG(PACA_EXSLB, instruction_access_slb_common, EXC_STD, KVMTEST_PR, 0x480);
EXC_REAL_END(instruction_access_slb, 0x480, 0x80)
EXC_VIRT_BEGIN(instruction_access_slb, 0x4480, 0x80)
- SET_SCRATCH0(r13)
- EXCEPTION_PROLOG_0(PACA_EXSLB)
- EXCEPTION_PROLOG_1(PACA_EXSLB, NOTEST, 0x480)
- mr r12,r3 /* save r3 */
- mfspr r3,SPRN_SRR0 /* SRR0 is faulting address */
- mfspr r11,SPRN_SRR1
- crclr 4*cr6+eq
- BRANCH_TO_COMMON(r10, slb_miss_common)
+EXCEPTION_RELON_PROLOG(PACA_EXSLB, instruction_access_slb_common, EXC_STD, NOTEST, 0x480);
EXC_VIRT_END(instruction_access_slb, 0x4480, 0x80)
-TRAMP_KVM(PACA_EXSLB, 0x480)
-
-
-/*
- * This handler is used by the 0x380 and 0x480 SLB miss interrupts, as well as
- * the virtual mode 0x4380 and 0x4480 interrupts if AIL is enabled.
- */
-EXC_COMMON_BEGIN(slb_miss_common)
- /*
- * r13 points to the PACA, r9 contains the saved CR,
- * r12 contains the saved r3,
- * r11 contain the saved SRR1, SRR0 is still ready for return
- * r3 has the faulting address
- * r9 - r13 are saved in paca->exslb.
- * cr6.eq is set for a D-SLB miss, clear for a I-SLB miss
- * We assume we aren't going to take any exceptions during this
- * procedure.
- */
- mflr r10
- stw r9,PACA_EXSLB+EX_CCR(r13) /* save CR in exc. frame */
- std r10,PACA_EXSLB+EX_LR(r13) /* save LR */
-
- andi. r9,r11,MSR_PR // Check for exception from userspace
- cmpdi cr4,r9,MSR_PR // And save the result in CR4 for later
-
- /*
- * Test MSR_RI before calling slb_allocate_realmode, because the
- * MSR in r11 gets clobbered. However we still want to allocate
- * SLB in case MSR_RI=0, to minimise the risk of getting stuck in
- * recursive SLB faults. So use cr5 for this, which is preserved.
- */
- andi. r11,r11,MSR_RI /* check for unrecoverable exception */
- cmpdi cr5,r11,MSR_RI
-
- crset 4*cr0+eq
-#ifdef CONFIG_PPC_BOOK3S_64
-BEGIN_MMU_FTR_SECTION
- bl slb_allocate
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_TYPE_RADIX)
-#endif
-
- ld r10,PACA_EXSLB+EX_LR(r13)
- lwz r9,PACA_EXSLB+EX_CCR(r13) /* get saved CR */
- mtlr r10
-
- /*
- * Large address, check whether we have to allocate new contexts.
- */
- beq- 8f
- bne- cr5,2f /* if unrecoverable exception, oops */
-
- /* All done -- return from exception. */
-
- bne cr4,1f /* returning to kernel */
-
- mtcrf 0x80,r9
- mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */
- mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
- mtcrf 0x02,r9 /* I/D indication is in cr6 */
- mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
-
- RESTORE_CTR(r9, PACA_EXSLB)
- RESTORE_PPR_PACA(PACA_EXSLB, r9)
- mr r3,r12
- ld r9,PACA_EXSLB+EX_R9(r13)
- ld r10,PACA_EXSLB+EX_R10(r13)
- ld r11,PACA_EXSLB+EX_R11(r13)
- ld r12,PACA_EXSLB+EX_R12(r13)
- ld r13,PACA_EXSLB+EX_R13(r13)
- RFI_TO_USER
- b . /* prevent speculative execution */
-1:
- mtcrf 0x80,r9
- mtcrf 0x08,r9 /* MSR[PR] indication is in cr4 */
- mtcrf 0x04,r9 /* MSR[RI] indication is in cr5 */
- mtcrf 0x02,r9 /* I/D indication is in cr6 */
- mtcrf 0x01,r9 /* slb_allocate uses cr0 and cr7 */
-
- RESTORE_CTR(r9, PACA_EXSLB)
- RESTORE_PPR_PACA(PACA_EXSLB, r9)
- mr r3,r12
- ld r9,PACA_EXSLB+EX_R9(r13)
- ld r10,PACA_EXSLB+EX_R10(r13)
- ld r11,PACA_EXSLB+EX_R11(r13)
- ld r12,PACA_EXSLB+EX_R12(r13)
- ld r13,PACA_EXSLB+EX_R13(r13)
- RFI_TO_KERNEL
- b . /* prevent speculative execution */
-
-
-2: std r3,PACA_EXSLB+EX_DAR(r13)
- mr r3,r12
- mfspr r11,SPRN_SRR0
- mfspr r12,SPRN_SRR1
- LOAD_HANDLER(r10,unrecov_slb)
- mtspr SPRN_SRR0,r10
- ld r10,PACAKMSR(r13)
- mtspr SPRN_SRR1,r10
- RFI_TO_KERNEL
- b .
-
-8: std r3,PACA_EXSLB+EX_DAR(r13)
- mr r3,r12
- mfspr r11,SPRN_SRR0
- mfspr r12,SPRN_SRR1
- LOAD_HANDLER(r10, large_addr_slb)
- mtspr SPRN_SRR0,r10
- ld r10,PACAKMSR(r13)
- mtspr SPRN_SRR1,r10
- RFI_TO_KERNEL
- b .
+TRAMP_KVM(PACA_EXSLB, 0x480)
-EXC_COMMON_BEGIN(unrecov_slb)
- EXCEPTION_PROLOG_COMMON(0x4100, PACA_EXSLB)
- RECONCILE_IRQ_STATE(r10, r11)
+EXC_COMMON_BEGIN(instruction_access_slb_common)
+ EXCEPTION_PROLOG_COMMON(0x480, PACA_EXSLB)
+ ld r4,_NIP(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl do_slb_fault
+ cmpdi r3,0
+ bne- 1f
+ b fast_exception_return
+1: /* Error case */
+ std r3,RESULT(r1)
bl save_nvgprs
-1: addi r3,r1,STACK_FRAME_OVERHEAD
- bl unrecoverable_exception
- b 1b
-
-EXC_COMMON_BEGIN(large_addr_slb)
- EXCEPTION_PROLOG_COMMON(0x380, PACA_EXSLB)
RECONCILE_IRQ_STATE(r10, r11)
- ld r3, PACA_EXSLB+EX_DAR(r13)
- std r3, _DAR(r1)
- beq cr6, 2f
- li r10, 0x481 /* fix trap number for I-SLB miss */
- std r10, _TRAP(r1)
-2: bl save_nvgprs
- addi r3, r1, STACK_FRAME_OVERHEAD
- bl slb_miss_large_addr
+ ld r4,_NIP(r1)
+ ld r5,RESULT(r1)
+ addi r3,r1,STACK_FRAME_OVERHEAD
+ bl do_bad_slb_fault
b ret_from_except
+
EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
.globl hardware_interrupt_hv;
hardware_interrupt_hv:
break;
case 1:
if (fw_dump.dump_registered == 1) {
- ret = -EEXIST;
- goto unlock_out;
+ /* Un-register Firmware-assisted dump */
+ fadump_unregister_dump(&fdm);
}
/* Register Firmware-assisted dump */
ret = register_fadump();
mtspr SPRN_MD_TWC, r10
mfspr r10, SPRN_IMMR /* Get current IMMR */
rlwinm r10, r10, 0, 0xfff80000 /* Get 512 kbytes boundary */
- ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \
+ ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
_PAGE_PRESENT | _PAGE_NO_CACHE
mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
li r11, MD_PS8MEG | MD_SVALID | M_APG2
mtspr SPRN_MD_TWC, r11
rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
- ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \
+ ori r10, r10, 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
_PAGE_PRESENT
mtspr SPRN_MD_RPN, r10 /* Update TLB entry */
li r11, MI_PS8MEG | MI_SVALID | M_APG2
mtspr SPRN_MI_TWC, r11
rlwinm r10, r10, 0, 0x0f800000 /* 8xx supports max 256Mb RAM */
- ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY | \
+ ori r10, r10, 0xf0 | MI_SPS16K | _PAGE_SH | _PAGE_DIRTY | \
_PAGE_PRESENT
mtspr SPRN_MI_RPN, r10 /* Update TLB entry */
#ifdef CONFIG_PPC_INDIRECT_MMIO
static void __iomem *iowa_ioremap(phys_addr_t addr, unsigned long size,
- unsigned long flags, void *caller)
+ pgprot_t prot, void *caller)
{
struct iowa_bus *bus;
- void __iomem *res = __ioremap_caller(addr, size, flags, caller);
+ void __iomem *res = __ioremap_caller(addr, size, prot, caller);
int busno;
bus = iowa_pci_find(0, (unsigned long)addr);
vaddr = page_address(page) + offset;
uaddr = (unsigned long)vaddr;
- npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl));
if (tbl) {
+ npages = iommu_num_pages(uaddr, size, IOMMU_PAGE_SIZE(tbl));
align = 0;
if (tbl->it_page_shift < PAGE_SHIFT && size >= PAGE_SIZE &&
((unsigned long)vaddr & ~PAGE_MASK) == 0)
size = 0x10000;
__ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
- size, pgprot_val(pgprot_noncached(__pgprot(0))));
+ size, pgprot_noncached(PAGE_KERNEL));
return;
inval_range:
printk(KERN_ERR "no ISA IO ranges or unexpected isa range, "
"mapping 64k\n");
__ioremap_at(phb_io_base_phys, (void *)ISA_IO_BASE,
- 0x10000, pgprot_val(pgprot_noncached(__pgprot(0))));
+ 0x10000, pgprot_noncached(PAGE_KERNEL));
}
*/
isa_io_base = ISA_IO_BASE;
__ioremap_at(pbase, (void *)ISA_IO_BASE,
- size, pgprot_val(pgprot_noncached(__pgprot(0))));
+ size, pgprot_noncached(PAGE_KERNEL));
pr_debug("ISA: Non-PCI bridge is %pOF\n", np);
}
#include <asm/processor.h>
#include <asm/machdep.h>
#include <asm/debug.h>
+#include <asm/code-patching.h>
#include <linux/slab.h>
/*
if (kgdb_handle_exception(1, SIGTRAP, 0, regs) != 0)
return 0;
- if (*(u32 *) (regs->nip) == *(u32 *) (&arch_kgdb_ops.gdb_bpt_instr))
+ if (*(u32 *)regs->nip == BREAK_INSTR)
regs->nip += BREAK_INSTR_SIZE;
return 1;
return -1;
}
+int kgdb_arch_set_breakpoint(struct kgdb_bkpt *bpt)
+{
+ int err;
+ unsigned int instr;
+ unsigned int *addr = (unsigned int *)bpt->bpt_addr;
+
+ err = probe_kernel_address(addr, instr);
+ if (err)
+ return err;
+
+ err = patch_instruction(addr, BREAK_INSTR);
+ if (err)
+ return -EFAULT;
+
+ *(unsigned int *)bpt->saved_instr = instr;
+
+ return 0;
+}
+
+int kgdb_arch_remove_breakpoint(struct kgdb_bkpt *bpt)
+{
+ int err;
+ unsigned int instr = *(unsigned int *)bpt->saved_instr;
+ unsigned int *addr = (unsigned int *)bpt->bpt_addr;
+
+ err = patch_instruction(addr, instr);
+ if (err)
+ return -EFAULT;
+
+ return 0;
+}
+
/*
* Global data
*/
-struct kgdb_arch arch_kgdb_ops = {
-#ifdef __LITTLE_ENDIAN__
- .gdb_bpt_instr = {0x08, 0x10, 0x82, 0x7d},
-#else
- .gdb_bpt_instr = {0x7d, 0x82, 0x10, 0x08},
-#endif
-};
+struct kgdb_arch arch_kgdb_ops;
static int kgdb_not_implemented(struct pt_regs *regs)
{
{
long handled = 0;
- __this_cpu_inc(irq_stat.mce_exceptions);
-
- if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
- handled = cur_cpu_spec->machine_check_early(regs);
+ /*
+ * See if platform is capable of handling machine check.
+ */
+ if (ppc_md.machine_check_early)
+ handled = ppc_md.machine_check_early(regs);
return handled;
}
/* flush SLBs and reload */
#ifdef CONFIG_PPC_BOOK3S_64
-static void flush_and_reload_slb(void)
+void flush_and_reload_slb(void)
{
/* Invalidate all SLBs */
slb_flush_all_realmode();
static void flush_erat(void)
{
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
+ flush_and_reload_slb();
+ return;
+ }
+#endif
+ /* PPC_INVALIDATE_ERAT can only be used on ISA v3 and newer */
asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
}
(void *)sect->sh_addr + sect->sh_size);
#endif /* CONFIG_PPC64 */
+#ifdef PPC64_ELF_ABI_v1
+ sect = find_section(hdr, sechdrs, ".opd");
+ if (sect != NULL) {
+ me->arch.start_opd = sect->sh_addr;
+ me->arch.end_opd = sect->sh_addr + sect->sh_size;
+ }
+#endif /* PPC64_ELF_ABI_v1 */
+
#ifdef CONFIG_PPC_BARRIER_NOSPEC
sect = find_section(hdr, sechdrs, "__spec_barrier_fixup");
if (sect != NULL)
else if (strcmp(secstrings+sechdrs[i].sh_name,"__versions")==0)
dedotify_versions((void *)hdr + sechdrs[i].sh_offset,
sechdrs[i].sh_size);
- else if (!strcmp(secstrings + sechdrs[i].sh_name, ".opd")) {
- me->arch.start_opd = sechdrs[i].sh_addr;
- me->arch.end_opd = sechdrs[i].sh_addr +
- sechdrs[i].sh_size;
- }
/* We don't handle .init for the moment: rename to _init */
while ((p = strstr(secstrings + sechdrs[i].sh_name, ".init")))
case R_PPC64_REL32:
/* 32 bits relative (used by relative exception tables) */
- *(u32 *)location = value - (unsigned long)location;
+ /* Convert value to relative */
+ value -= (unsigned long)location;
+ if (value + 0x80000000 > 0xffffffff) {
+ pr_err("%s: REL32 %li out of range!\n",
+ me->name, (long int)value);
+ return -ENOEXEC;
+ }
+ *(u32 *)location = value;
break;
case R_PPC64_TOCSAVE:
#include <linux/of.h>
#include <linux/slab.h>
#include <linux/export.h>
-#include <linux/syscalls.h>
#include <asm/processor.h>
#include <asm/io.h>
/* Establish the mapping */
if (__ioremap_at(phys_page, area->addr, size_page,
- pgprot_val(pgprot_noncached(__pgprot(0)))) == NULL)
+ pgprot_noncached(PAGE_KERNEL)) == NULL)
return -ENOMEM;
/* Fixup hose IO resource */
#include <linux/uaccess.h>
#include <linux/elf-randomize.h>
#include <linux/pkeys.h>
+#include <linux/seq_buf.h>
#include <asm/pgtable.h>
#include <asm/io.h>
#include <asm/livepatch.h>
#include <asm/cpu_has_feature.h>
#include <asm/asm-prototypes.h>
+#include <asm/stacktrace.h>
#include <linux/kprobes.h>
#include <linux/kdebug.h>
}
}
-static inline bool msr_tm_active(unsigned long msr)
-{
- return MSR_TM_ACTIVE(msr);
-}
-
static bool tm_active_with_fp(struct task_struct *tsk)
{
- return msr_tm_active(tsk->thread.regs->msr) &&
+ return MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
(tsk->thread.ckpt_regs.msr & MSR_FP);
}
static bool tm_active_with_altivec(struct task_struct *tsk)
{
- return msr_tm_active(tsk->thread.regs->msr) &&
+ return MSR_TM_ACTIVE(tsk->thread.regs->msr) &&
(tsk->thread.ckpt_regs.msr & MSR_VEC);
}
#else
-static inline bool msr_tm_active(unsigned long msr) { return false; }
static inline void check_if_tm_restore_required(struct task_struct *tsk) { }
static inline bool tm_active_with_fp(struct task_struct *tsk) { return false; }
static inline bool tm_active_with_altivec(struct task_struct *tsk) { return false; }
* giveup as this would save to the 'live' structure not the
* checkpointed structure.
*/
- if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr))
+ if (!MSR_TM_ACTIVE(cpumsr) &&
+ MSR_TM_ACTIVE(current->thread.regs->msr))
return;
__giveup_fpu(current);
}
* giveup as this would save to the 'live' structure not the
* checkpointed structure.
*/
- if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr))
+ if (!MSR_TM_ACTIVE(cpumsr) &&
+ MSR_TM_ACTIVE(current->thread.regs->msr))
return;
__giveup_altivec(current);
}
* giveup as this would save to the 'live' structure not the
* checkpointed structure.
*/
- if(!msr_tm_active(cpumsr) && msr_tm_active(current->thread.regs->msr))
+ if (!MSR_TM_ACTIVE(cpumsr) &&
+ MSR_TM_ACTIVE(current->thread.regs->msr))
return;
__giveup_vsx(current);
}
{
unsigned long msr;
- if (!msr_tm_active(regs->msr) &&
+ if (!MSR_TM_ACTIVE(regs->msr) &&
!current->thread.load_fp && !loadvec(current->thread))
return;
return last;
}
-static int instructions_to_print = 16;
+#define NR_INSN_TO_PRINT 16
static void show_instructions(struct pt_regs *regs)
{
int i;
- unsigned long pc = regs->nip - (instructions_to_print * 3 / 4 *
- sizeof(int));
+ unsigned long pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
printk("Instruction dump:");
- for (i = 0; i < instructions_to_print; i++) {
+ for (i = 0; i < NR_INSN_TO_PRINT; i++) {
int instr;
if (!(i % 8))
#endif
if (!__kernel_text_address(pc) ||
- probe_kernel_address((unsigned int __user *)pc, instr)) {
+ probe_kernel_address((const void *)pc, instr)) {
pr_cont("XXXXXXXX ");
} else {
if (regs->nip == pc)
void show_user_instructions(struct pt_regs *regs)
{
unsigned long pc;
- int i;
+ int n = NR_INSN_TO_PRINT;
+ struct seq_buf s;
+ char buf[96]; /* enough for 8 times 9 + 2 chars */
- pc = regs->nip - (instructions_to_print * 3 / 4 * sizeof(int));
+ pc = regs->nip - (NR_INSN_TO_PRINT * 3 / 4 * sizeof(int));
/*
* Make sure the NIP points at userspace, not kernel text/data or
* elsewhere.
*/
- if (!__access_ok(pc, instructions_to_print * sizeof(int), USER_DS)) {
+ if (!__access_ok(pc, NR_INSN_TO_PRINT * sizeof(int), USER_DS)) {
pr_info("%s[%d]: Bad NIP, not dumping instructions.\n",
current->comm, current->pid);
return;
}
- pr_info("%s[%d]: code: ", current->comm, current->pid);
+ seq_buf_init(&s, buf, sizeof(buf));
- for (i = 0; i < instructions_to_print; i++) {
- int instr;
+ while (n) {
+ int i;
- if (!(i % 8) && (i > 0)) {
- pr_cont("\n");
- pr_info("%s[%d]: code: ", current->comm, current->pid);
- }
+ seq_buf_clear(&s);
- if (probe_kernel_address((unsigned int __user *)pc, instr)) {
- pr_cont("XXXXXXXX ");
- } else {
- if (regs->nip == pc)
- pr_cont("<%08x> ", instr);
- else
- pr_cont("%08x ", instr);
+ for (i = 0; i < 8 && n; i++, n--, pc += sizeof(int)) {
+ int instr;
+
+ if (probe_kernel_address((const void *)pc, instr)) {
+ seq_buf_printf(&s, "XXXXXXXX ");
+ continue;
+ }
+ seq_buf_printf(&s, regs->nip == pc ? "<%08x> " : "%08x ", instr);
}
- pc += sizeof(int);
+ if (!seq_buf_has_overflowed(&s))
+ pr_info("%s[%d]: code: %s\n", current->comm,
+ current->pid, s.buffer);
}
-
- pr_cont("\n");
}
struct regbit {
#endif /* CONFIG_HAVE_HW_BREAKPOINT */
}
+#ifdef CONFIG_PPC_BOOK3S_64
+void arch_setup_new_exec(void)
+{
+ if (radix_enabled())
+ return;
+ hash__setup_new_exec();
+}
+#endif
+
int set_thread_uses_vas(void)
{
#ifdef CONFIG_PPC_BOOK3S_64
p->thread.dscr = mfspr(SPRN_DSCR);
}
if (cpu_has_feature(CPU_FTR_HAS_PPR))
- p->thread.ppr = INIT_PPR;
+ childregs->ppr = DEFAULT_PPR;
p->thread.tidr = 0;
#endif
return 0;
}
+void preload_new_slb_context(unsigned long start, unsigned long sp);
+
/*
* Set up a thread for executing a new program
*/
{
#ifdef CONFIG_PPC64
unsigned long load_addr = regs->gpr[2]; /* saved by ELF_PLAT_INIT */
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ preload_new_slb_context(start, sp);
+#endif
#endif
/*
#ifdef CONFIG_VSX
current->thread.used_vsr = 0;
#endif
+ current->thread.load_slb = 0;
current->thread.load_fp = 0;
memset(¤t->thread.fp_state, 0, sizeof(current->thread.fp_state));
current->thread.fp_save_area = NULL;
#include <asm/btext.h>
#include <asm/sections.h>
#include <asm/machdep.h>
-#include <asm/opal.h>
#include <asm/asm-prototypes.h>
#include <linux/linux_logo.h>
+/* All of prom_init bss lives here */
+#define __prombss __section(.bss.prominit)
+
/*
* Eventually bump that one up
*/
#define OF_WORKAROUNDS 0
#else
#define OF_WORKAROUNDS of_workarounds
-int of_workarounds;
+static int of_workarounds __prombss;
#endif
#define OF_WA_CLAIM 1 /* do phys/virt claim separately, then map */
unsigned long size, unsigned long offset);
/* prom structure */
-static struct prom_t __initdata prom;
+static struct prom_t __prombss prom;
-static unsigned long prom_entry __initdata;
+static unsigned long __prombss prom_entry;
#define PROM_SCRATCH_SIZE 256
-static char __initdata of_stdout_device[256];
-static char __initdata prom_scratch[PROM_SCRATCH_SIZE];
+static char __prombss of_stdout_device[256];
+static char __prombss prom_scratch[PROM_SCRATCH_SIZE];
-static unsigned long __initdata dt_header_start;
-static unsigned long __initdata dt_struct_start, dt_struct_end;
-static unsigned long __initdata dt_string_start, dt_string_end;
+static unsigned long __prombss dt_header_start;
+static unsigned long __prombss dt_struct_start, dt_struct_end;
+static unsigned long __prombss dt_string_start, dt_string_end;
-static unsigned long __initdata prom_initrd_start, prom_initrd_end;
+static unsigned long __prombss prom_initrd_start, prom_initrd_end;
#ifdef CONFIG_PPC64
-static int __initdata prom_iommu_force_on;
-static int __initdata prom_iommu_off;
-static unsigned long __initdata prom_tce_alloc_start;
-static unsigned long __initdata prom_tce_alloc_end;
+static int __prombss prom_iommu_force_on;
+static int __prombss prom_iommu_off;
+static unsigned long __prombss prom_tce_alloc_start;
+static unsigned long __prombss prom_tce_alloc_end;
#endif
-static bool prom_radix_disable __initdata = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
+#ifdef CONFIG_PPC_PSERIES
+static bool __prombss prom_radix_disable;
+#endif
struct platform_support {
bool hash_mmu;
#define PLATFORM_LPAR 0x0001
#define PLATFORM_POWERMAC 0x0400
#define PLATFORM_GENERIC 0x0500
-#define PLATFORM_OPAL 0x0600
-static int __initdata of_platform;
+static int __prombss of_platform;
-static char __initdata prom_cmd_line[COMMAND_LINE_SIZE];
+static char __prombss prom_cmd_line[COMMAND_LINE_SIZE];
-static unsigned long __initdata prom_memory_limit;
+static unsigned long __prombss prom_memory_limit;
-static unsigned long __initdata alloc_top;
-static unsigned long __initdata alloc_top_high;
-static unsigned long __initdata alloc_bottom;
-static unsigned long __initdata rmo_top;
-static unsigned long __initdata ram_top;
+static unsigned long __prombss alloc_top;
+static unsigned long __prombss alloc_top_high;
+static unsigned long __prombss alloc_bottom;
+static unsigned long __prombss rmo_top;
+static unsigned long __prombss ram_top;
-static struct mem_map_entry __initdata mem_reserve_map[MEM_RESERVE_MAP_SIZE];
-static int __initdata mem_reserve_cnt;
+static struct mem_map_entry __prombss mem_reserve_map[MEM_RESERVE_MAP_SIZE];
+static int __prombss mem_reserve_cnt;
-static cell_t __initdata regbuf[1024];
+static cell_t __prombss regbuf[1024];
-static bool rtas_has_query_cpu_stopped;
+static bool __prombss rtas_has_query_cpu_stopped;
/*
static char *tohex(unsigned int x)
{
- static char digits[] = "0123456789abcdef";
- static char result[9];
+ static const char digits[] __initconst = "0123456789abcdef";
+ static char result[9] __prombss;
int i;
result[8] = 0;
#endif
}
+#ifdef CONFIG_PPC_PSERIES
+ prom_radix_disable = !IS_ENABLED(CONFIG_PPC_RADIX_MMU_DEFAULT);
opt = strstr(prom_cmd_line, "disable_radix");
if (opt) {
opt += 13;
}
if (prom_radix_disable)
prom_debug("Radix disabled from cmdline\n");
+#endif /* CONFIG_PPC_PSERIES */
}
-#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+#ifdef CONFIG_PPC_PSERIES
/*
* The architecture vector has an array of PVR mask/value pairs,
* followed by # option vectors - 1, followed by the option vectors.
struct option_vector6 vec6;
} __packed;
-struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
+static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
.pvrs = {
{
.mask = cpu_to_be32(0xfffe0000), /* POWER5/POWER5+ */
},
};
+static struct ibm_arch_vec __prombss ibm_architecture_vec ____cacheline_aligned;
+
/* Old method - ELF header with PT_NOTE sections only works on BE */
#ifdef __BIG_ENDIAN__
-static struct fake_elf {
+static const struct fake_elf {
Elf32_Ehdr elfhdr;
Elf32_Phdr phdr[2];
struct chrpnote {
u32 ignore_me;
} rpadesc;
} rpanote;
-} fake_elf = {
+} fake_elf __initconst = {
.elfhdr = {
.e_ident = { 0x7f, 'E', 'L', 'F',
ELFCLASS32, ELFDATA2MSB, EV_CURRENT },
};
int prop_len = prom_getproplen(prom.chosen,
"ibm,arch-vec-5-platform-support");
+
+ /* First copy the architecture vec template */
+ ibm_architecture_vec = ibm_architecture_vec_template;
+
if (prop_len > 1) {
int i;
- u8 vec[prop_len];
+ u8 vec[8];
prom_debug("Found ibm,arch-vec-5-platform-support, len: %d\n",
prop_len);
+ if (prop_len > sizeof(vec))
+ prom_printf("WARNING: ibm,arch-vec-5-platform-support longer than expected (len: %d)\n",
+ prop_len);
prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support",
&vec, sizeof(vec));
- for (i = 0; i < prop_len; i += 2) {
+ for (i = 0; i < sizeof(vec); i += 2) {
prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2
, vec[i]
, vec[i + 1]);
}
#endif /* __BIG_ENDIAN__ */
}
-#endif /* #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) */
+#endif /* CONFIG_PPC_PSERIES */
/*
* Memory allocation strategy... our layout is normally:
}
}
-#ifdef CONFIG_PPC_POWERNV
-
-#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
-static u64 __initdata prom_opal_base;
-static u64 __initdata prom_opal_entry;
-#endif
-
-/*
- * Allocate room for and instantiate OPAL
- */
-static void __init prom_instantiate_opal(void)
-{
- phandle opal_node;
- ihandle opal_inst;
- u64 base, entry;
- u64 size = 0, align = 0x10000;
- __be64 val64;
- u32 rets[2];
-
- prom_debug("prom_instantiate_opal: start...\n");
-
- opal_node = call_prom("finddevice", 1, 1, ADDR("/ibm,opal"));
- prom_debug("opal_node: %x\n", opal_node);
- if (!PHANDLE_VALID(opal_node))
- return;
-
- val64 = 0;
- prom_getprop(opal_node, "opal-runtime-size", &val64, sizeof(val64));
- size = be64_to_cpu(val64);
- if (size == 0)
- return;
- val64 = 0;
- prom_getprop(opal_node, "opal-runtime-alignment", &val64,sizeof(val64));
- align = be64_to_cpu(val64);
-
- base = alloc_down(size, align, 0);
- if (base == 0) {
- prom_printf("OPAL allocation failed !\n");
- return;
- }
-
- opal_inst = call_prom("open", 1, 1, ADDR("/ibm,opal"));
- if (!IHANDLE_VALID(opal_inst)) {
- prom_printf("opening opal package failed (%x)\n", opal_inst);
- return;
- }
-
- prom_printf("instantiating opal at 0x%llx...", base);
-
- if (call_prom_ret("call-method", 4, 3, rets,
- ADDR("load-opal-runtime"),
- opal_inst,
- base >> 32, base & 0xffffffff) != 0
- || (rets[0] == 0 && rets[1] == 0)) {
- prom_printf(" failed\n");
- return;
- }
- entry = (((u64)rets[0]) << 32) | rets[1];
-
- prom_printf(" done\n");
-
- reserve_mem(base, size);
-
- prom_debug("opal base = 0x%llx\n", base);
- prom_debug("opal align = 0x%llx\n", align);
- prom_debug("opal entry = 0x%llx\n", entry);
- prom_debug("opal size = 0x%llx\n", size);
-
- prom_setprop(opal_node, "/ibm,opal", "opal-base-address",
- &base, sizeof(base));
- prom_setprop(opal_node, "/ibm,opal", "opal-entry-address",
- &entry, sizeof(entry));
-
-#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
- prom_opal_base = base;
- prom_opal_entry = entry;
-#endif
- prom_debug("prom_instantiate_opal: end...\n");
-}
-
-#endif /* CONFIG_PPC_POWERNV */
-
/*
* Allocate room for and instantiate RTAS
*/
}
}
#ifdef CONFIG_PPC64
- /* Try to detect OPAL */
- if (PHANDLE_VALID(call_prom("finddevice", 1, 1, ADDR("/ibm,opal"))))
- return PLATFORM_OPAL;
-
/* Try to figure out if it's an IBM pSeries or any other
* PAPR compliant platform. We assume it is if :
* - /device_type is "chrp" (please, do NOT use that for future
ihandle ih;
int i;
- static unsigned char default_colors[] = {
+ static const unsigned char default_colors[] __initconst = {
0x00, 0x00, 0x00,
0x00, 0x00, 0xaa,
0x00, 0xaa, 0x00,
char *namep, *prev_name, *sstart, *p, *ep, *lp, *path;
unsigned long soff;
unsigned char *valp;
- static char pname[MAX_PROPERTY_NAME];
+ static char pname[MAX_PROPERTY_NAME] __prombss;
int l, room, has_phandle = 0;
dt_push_token(OF_DT_BEGIN_NODE, mem_start, mem_end);
has_phandle = 1;
}
- /* Add a "linux,phandle" property if no "phandle" property already
- * existed (can happen with OPAL)
- */
+ /* Add a "phandle" property if none already exist */
if (!has_phandle) {
- soff = dt_find_string("linux,phandle");
+ soff = dt_find_string("phandle");
if (soff == 0)
- prom_printf("WARNING: Can't find string index for"
- " <linux-phandle> node %s\n", path);
+ prom_printf("WARNING: Can't find string index for <phandle> node %s\n", path);
else {
dt_push_token(OF_DT_PROP, mem_start, mem_end);
dt_push_token(4, mem_start, mem_end);
dt_string_start = mem_start;
mem_start += 4; /* hole */
- /* Add "linux,phandle" in there, we'll need it */
+ /* Add "phandle" in there, we'll need it */
namep = make_room(&mem_start, &mem_end, 16, 1);
- strcpy(namep, "linux,phandle");
+ strcpy(namep, "phandle");
mem_start = (unsigned long)namep + strlen(namep) + 1;
/* Build string array */
*/
early_cmdline_parse();
-#if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV)
+#ifdef CONFIG_PPC_PSERIES
/*
* On pSeries, inform the firmware about our capabilities
*/
* On non-powermacs, try to instantiate RTAS. PowerMacs don't
* have a usable RTAS implementation.
*/
- if (of_platform != PLATFORM_POWERMAC &&
- of_platform != PLATFORM_OPAL)
+ if (of_platform != PLATFORM_POWERMAC)
prom_instantiate_rtas();
-#ifdef CONFIG_PPC_POWERNV
- if (of_platform == PLATFORM_OPAL)
- prom_instantiate_opal();
-#endif /* CONFIG_PPC_POWERNV */
-
#ifdef CONFIG_PPC64
/* instantiate sml */
prom_instantiate_sml();
*
* (This must be done after instanciating RTAS)
*/
- if (of_platform != PLATFORM_POWERMAC &&
- of_platform != PLATFORM_OPAL)
+ if (of_platform != PLATFORM_POWERMAC)
prom_hold_cpus();
/*
/*
* in case stdin is USB and still active on IBM machines...
* Unfortunately quiesce crashes on some powermacs if we have
- * closed stdin already (in particular the powerbook 101). It
- * appears that the OPAL version of OFW doesn't like it either.
+ * closed stdin already (in particular the powerbook 101).
*/
- if (of_platform != PLATFORM_POWERMAC &&
- of_platform != PLATFORM_OPAL)
+ if (of_platform != PLATFORM_POWERMAC)
prom_close_stdin();
/*
hdr = dt_header_start;
/* Don't print anything after quiesce under OPAL, it crashes OFW */
- if (of_platform != PLATFORM_OPAL) {
- prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase);
- prom_debug("->dt_header_start=0x%lx\n", hdr);
- }
+ prom_printf("Booting Linux via __start() @ 0x%lx ...\n", kbase);
+ prom_debug("->dt_header_start=0x%lx\n", hdr);
#ifdef CONFIG_PPC32
reloc_got2(-offset);
unreloc_toc();
#endif
-#ifdef CONFIG_PPC_EARLY_DEBUG_OPAL
- /* OPAL early debug gets the OPAL base & entry in r8 and r9 */
- __start(hdr, kbase, 0, 0, 0,
- prom_opal_base, prom_opal_entry);
-#else
__start(hdr, kbase, 0, 0, 0, 0, 0);
-#endif
return 0;
}
ERROR=0
+function check_section()
+{
+ file=$1
+ section=$2
+ size=$(objdump -h -j $section $file 2>/dev/null | awk "\$2 == \"$section\" {print \$3}")
+ size=${size:-0}
+ if [ $size -ne 0 ]; then
+ ERROR=1
+ echo "Error: Section $section not empty in prom_init.c" >&2
+ fi
+}
+
for UNDEF in $($NM -u $OBJ | awk '{print $2}')
do
# On 64-bit nm gives us the function descriptors, which have
fi
done
+check_section $OBJ .data
+check_section $OBJ .bss
+check_section $OBJ .init.data
+
exit $ERROR
}
#endif
- if (regno < (sizeof(struct pt_regs) / sizeof(unsigned long))) {
+ if (regno < (sizeof(struct user_pt_regs) / sizeof(unsigned long))) {
*data = ((unsigned long *)task->thread.regs)[regno];
return 0;
}
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&target->thread.regs->orig_gpr3,
offsetof(struct pt_regs, orig_gpr3),
- sizeof(struct pt_regs));
+ sizeof(struct user_pt_regs));
if (!ret)
ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
- sizeof(struct pt_regs), -1);
+ sizeof(struct user_pt_regs), -1);
return ret;
}
ret = user_regset_copyout(&pos, &count, &kbuf, &ubuf,
&target->thread.ckpt_regs.orig_gpr3,
offsetof(struct pt_regs, orig_gpr3),
- sizeof(struct pt_regs));
+ sizeof(struct user_pt_regs));
if (!ret)
ret = user_regset_copyout_zero(&pos, &count, &kbuf, &ubuf,
- sizeof(struct pt_regs), -1);
+ sizeof(struct user_pt_regs), -1);
return ret;
}
void *kbuf, void __user *ubuf)
{
return user_regset_copyout(&pos, &count, &kbuf, &ubuf,
- &target->thread.ppr, 0, sizeof(u64));
+ &target->thread.regs->ppr, 0, sizeof(u64));
}
static int ppr_set(struct task_struct *target,
const void *kbuf, const void __user *ubuf)
{
return user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.ppr, 0, sizeof(u64));
+ &target->thread.regs->ppr, 0, sizeof(u64));
}
static int dscr_get(struct task_struct *target,
{
/* make sure the single step bit is not set. */
user_disable_single_step(child);
+ clear_tsk_thread_flag(child, TIF_SYSCALL_EMU);
}
#ifdef CONFIG_PPC_ADV_DEBUG_REGS
case PTRACE_GETREGS: /* Get all pt_regs from the child. */
return copy_regset_to_user(child, &user_ppc_native_view,
REGSET_GPR,
- 0, sizeof(struct pt_regs),
+ 0, sizeof(struct user_pt_regs),
datavp);
#ifdef CONFIG_PPC64
case PTRACE_SETREGS: /* Set all gp regs in the child. */
return copy_regset_from_user(child, &user_ppc_native_view,
REGSET_GPR,
- 0, sizeof(struct pt_regs),
+ 0, sizeof(struct user_pt_regs),
datavp);
case PTRACE_GETFPREGS: /* Get the child FPU state (FPR0...31 + FPSCR) */
{
user_exit();
+ if (test_thread_flag(TIF_SYSCALL_EMU)) {
+ ptrace_report_syscall(regs);
+ /*
+ * Returning -1 will skip the syscall execution. We want to
+ * avoid clobbering any register also, thus, not 'gotoing'
+ * skip label.
+ */
+ return -1;
+ }
+
/*
* The tracer may decide to abort the syscall, if so tracehook
* will return !0. Note that the tracer may also just change
user_enter();
}
+
+void __init pt_regs_check(void)
+{
+ BUILD_BUG_ON(offsetof(struct pt_regs, gpr) !=
+ offsetof(struct user_pt_regs, gpr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, nip) !=
+ offsetof(struct user_pt_regs, nip));
+ BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
+ offsetof(struct user_pt_regs, msr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, msr) !=
+ offsetof(struct user_pt_regs, msr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, orig_gpr3) !=
+ offsetof(struct user_pt_regs, orig_gpr3));
+ BUILD_BUG_ON(offsetof(struct pt_regs, ctr) !=
+ offsetof(struct user_pt_regs, ctr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, link) !=
+ offsetof(struct user_pt_regs, link));
+ BUILD_BUG_ON(offsetof(struct pt_regs, xer) !=
+ offsetof(struct user_pt_regs, xer));
+ BUILD_BUG_ON(offsetof(struct pt_regs, ccr) !=
+ offsetof(struct user_pt_regs, ccr));
+#ifdef __powerpc64__
+ BUILD_BUG_ON(offsetof(struct pt_regs, softe) !=
+ offsetof(struct user_pt_regs, softe));
+#else
+ BUILD_BUG_ON(offsetof(struct pt_regs, mq) !=
+ offsetof(struct user_pt_regs, mq));
+#endif
+ BUILD_BUG_ON(offsetof(struct pt_regs, trap) !=
+ offsetof(struct user_pt_regs, trap));
+ BUILD_BUG_ON(offsetof(struct pt_regs, dar) !=
+ offsetof(struct user_pt_regs, dar));
+ BUILD_BUG_ON(offsetof(struct pt_regs, dsisr) !=
+ offsetof(struct user_pt_regs, dsisr));
+ BUILD_BUG_ON(offsetof(struct pt_regs, result) !=
+ offsetof(struct user_pt_regs, result));
+
+ BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs));
+}
goto out;
}
- stop_topology_update();
+ cpu_hotplug_disable();
+
+ /* Check if we raced with a CPU-Offline Operation */
+ if (unlikely(!cpumask_equal(cpu_present_mask, cpu_online_mask))) {
+ pr_err("%s: Raced against a concurrent CPU-Offline\n",
+ __func__);
+ atomic_set(&data.error, -EBUSY);
+ goto out_hotplug_enable;
+ }
/* Call function on all CPUs. One of us will make the
* rtas call
if (atomic_read(&data.error) != 0)
printk(KERN_ERR "Error doing global join\n");
- start_topology_update();
+out_hotplug_enable:
+ cpu_hotplug_enable();
/* Take down CPUs not online prior to suspend */
cpuret = rtas_offline_cpus_mask(offline_mask);
return "Dump Notification Event";
case RTAS_TYPE_PRRN:
return "Platform Resource Reassignment Event";
+ case RTAS_TYPE_HOTPLUG:
+ return "Hotplug Event";
}
return rtas_type[0];
} else {
struct rtas_error_log *errlog = (struct rtas_error_log *)buf;
- printk(RTAS_DEBUG "event: %d, Type: %s, Severity: %d\n",
- error_log_cnt, rtas_event_type(rtas_error_type(errlog)),
+ printk(RTAS_DEBUG "event: %d, Type: %s (%d), Severity: %d\n",
+ error_log_cnt,
+ rtas_event_type(rtas_error_type(errlog)),
+ rtas_error_type(errlog),
rtas_error_severity(errlog));
}
}
}
#ifdef CONFIG_PPC_PSERIES
-static s32 prrn_update_scope;
-
-static void prrn_work_fn(struct work_struct *work)
+static void handle_prrn_event(s32 scope)
{
/*
* For PRRN, we must pass the negative of the scope value in
* the RTAS event.
*/
- pseries_devicetree_update(-prrn_update_scope);
+ pseries_devicetree_update(-scope);
numa_update_cpu_topology(false);
}
-static DECLARE_WORK(prrn_work, prrn_work_fn);
-
-static void prrn_schedule_update(u32 scope)
-{
- flush_work(&prrn_work);
- prrn_update_scope = scope;
- schedule_work(&prrn_work);
-}
-
static void handle_rtas_event(const struct rtas_error_log *log)
{
if (rtas_error_type(log) != RTAS_TYPE_PRRN || !prrn_is_enabled())
/* For PRRN Events the extended log length is used to denote
* the scope for calling rtas update-nodes.
*/
- prrn_schedule_update(rtas_error_extended_log_length(log));
+ handle_prrn_event(rtas_error_extended_log_length(log));
}
#else
#include <linux/serial_8250.h>
#include <linux/percpu.h>
#include <linux/memblock.h>
+#include <linux/bootmem.h>
#include <linux/of_platform.h>
#include <linux/hugetlb.h>
#include <asm/debugfs.h>
initmem_init();
+ early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
+
#ifdef CONFIG_DUMMY_CONSOLE
conswitchp = &dummy_con;
#endif
}
/*
- * Fixup HFSCR:TM based on CPU features. The bit is set by our
- * early asm init because at that point we haven't updated our
- * CPU features from firmware and device-tree. Here we have,
- * so let's do it.
+ * Set HFSCR:TM based on CPU features:
+ * In the special case of TM no suspend (P9N DD2.1), Linux is
+ * told TM is off via the dt-ftrs but told to (partially) use
+ * it via OPAL_REINIT_CPUS_TM_SUSPEND_DISABLED. So HFSCR[TM]
+ * will be off from dt-ftrs but we need to turn it on for the
+ * no suspend case.
*/
- if (cpu_has_feature(CPU_FTR_HVMODE) && !cpu_has_feature(CPU_FTR_TM_COMP))
- mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) & ~HFSCR_TM);
+ if (cpu_has_feature(CPU_FTR_HVMODE)) {
+ if (cpu_has_feature(CPU_FTR_TM_COMP))
+ mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) | HFSCR_TM);
+ else
+ mtspr(SPRN_HFSCR, mfspr(SPRN_HFSCR) & ~HFSCR_TM);
+ }
/* Set IR and DR in PACA MSR */
get_paca()->kernel_msr = MSR_KERNEL;
#include <linux/topology.h>
#include <linux/profile.h>
#include <linux/processor.h>
+#include <linux/random.h>
+#include <linux/stackprotector.h>
#include <asm/ptrace.h>
#include <linux/atomic.h>
#endif
struct thread_info *secondary_ti;
+bool has_big_cores;
DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
+DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_l2_cache_map);
DEFINE_PER_CPU(cpumask_var_t, cpu_core_map);
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
EXPORT_PER_CPU_SYMBOL(cpu_l2_cache_map);
EXPORT_PER_CPU_SYMBOL(cpu_core_map);
+EXPORT_SYMBOL_GPL(has_big_cores);
+
+#define MAX_THREAD_LIST_SIZE 8
+#define THREAD_GROUP_SHARE_L1 1
+struct thread_groups {
+ unsigned int property;
+ unsigned int nr_groups;
+ unsigned int threads_per_group;
+ unsigned int thread_list[MAX_THREAD_LIST_SIZE];
+};
+
+/*
+ * On big-cores system, cpu_l1_cache_map for each CPU corresponds to
+ * the set its siblings that share the L1-cache.
+ */
+DEFINE_PER_CPU(cpumask_var_t, cpu_l1_cache_map);
/* SMP operations for this machine */
struct smp_ops_t *smp_ops;
}
#endif
+/*
+ * parse_thread_groups: Parses the "ibm,thread-groups" device tree
+ * property for the CPU device node @dn and stores
+ * the parsed output in the thread_groups
+ * structure @tg if the ibm,thread-groups[0]
+ * matches @property.
+ *
+ * @dn: The device node of the CPU device.
+ * @tg: Pointer to a thread group structure into which the parsed
+ * output of "ibm,thread-groups" is stored.
+ * @property: The property of the thread-group that the caller is
+ * interested in.
+ *
+ * ibm,thread-groups[0..N-1] array defines which group of threads in
+ * the CPU-device node can be grouped together based on the property.
+ *
+ * ibm,thread-groups[0] tells us the property based on which the
+ * threads are being grouped together. If this value is 1, it implies
+ * that the threads in the same group share L1, translation cache.
+ *
+ * ibm,thread-groups[1] tells us how many such thread groups exist.
+ *
+ * ibm,thread-groups[2] tells us the number of threads in each such
+ * group.
+ *
+ * ibm,thread-groups[3..N-1] is the list of threads identified by
+ * "ibm,ppc-interrupt-server#s" arranged as per their membership in
+ * the grouping.
+ *
+ * Example: If ibm,thread-groups = [1,2,4,5,6,7,8,9,10,11,12] it
+ * implies that there are 2 groups of 4 threads each, where each group
+ * of threads share L1, translation cache.
+ *
+ * The "ibm,ppc-interrupt-server#s" of the first group is {5,6,7,8}
+ * and the "ibm,ppc-interrupt-server#s" of the second group is {9, 10,
+ * 11, 12} structure
+ *
+ * Returns 0 on success, -EINVAL if the property does not exist,
+ * -ENODATA if property does not have a value, and -EOVERFLOW if the
+ * property data isn't large enough.
+ */
+static int parse_thread_groups(struct device_node *dn,
+ struct thread_groups *tg,
+ unsigned int property)
+{
+ int i;
+ u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE];
+ u32 *thread_list;
+ size_t total_threads;
+ int ret;
+
+ ret = of_property_read_u32_array(dn, "ibm,thread-groups",
+ thread_group_array, 3);
+ if (ret)
+ return ret;
+
+ tg->property = thread_group_array[0];
+ tg->nr_groups = thread_group_array[1];
+ tg->threads_per_group = thread_group_array[2];
+ if (tg->property != property ||
+ tg->nr_groups < 1 ||
+ tg->threads_per_group < 1)
+ return -ENODATA;
+
+ total_threads = tg->nr_groups * tg->threads_per_group;
+
+ ret = of_property_read_u32_array(dn, "ibm,thread-groups",
+ thread_group_array,
+ 3 + total_threads);
+ if (ret)
+ return ret;
+
+ thread_list = &thread_group_array[3];
+
+ for (i = 0 ; i < total_threads; i++)
+ tg->thread_list[i] = thread_list[i];
+
+ return 0;
+}
+
+/*
+ * get_cpu_thread_group_start : Searches the thread group in tg->thread_list
+ * that @cpu belongs to.
+ *
+ * @cpu : The logical CPU whose thread group is being searched.
+ * @tg : The thread-group structure of the CPU node which @cpu belongs
+ * to.
+ *
+ * Returns the index to tg->thread_list that points to the the start
+ * of the thread_group that @cpu belongs to.
+ *
+ * Returns -1 if cpu doesn't belong to any of the groups pointed to by
+ * tg->thread_list.
+ */
+static int get_cpu_thread_group_start(int cpu, struct thread_groups *tg)
+{
+ int hw_cpu_id = get_hard_smp_processor_id(cpu);
+ int i, j;
+
+ for (i = 0; i < tg->nr_groups; i++) {
+ int group_start = i * tg->threads_per_group;
+
+ for (j = 0; j < tg->threads_per_group; j++) {
+ int idx = group_start + j;
+
+ if (tg->thread_list[idx] == hw_cpu_id)
+ return group_start;
+ }
+ }
+
+ return -1;
+}
+
+static int init_cpu_l1_cache_map(int cpu)
+
+{
+ struct device_node *dn = of_get_cpu_node(cpu, NULL);
+ struct thread_groups tg = {.property = 0,
+ .nr_groups = 0,
+ .threads_per_group = 0};
+ int first_thread = cpu_first_thread_sibling(cpu);
+ int i, cpu_group_start = -1, err = 0;
+
+ if (!dn)
+ return -ENODATA;
+
+ err = parse_thread_groups(dn, &tg, THREAD_GROUP_SHARE_L1);
+ if (err)
+ goto out;
+
+ zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu),
+ GFP_KERNEL,
+ cpu_to_node(cpu));
+
+ cpu_group_start = get_cpu_thread_group_start(cpu, &tg);
+
+ if (unlikely(cpu_group_start == -1)) {
+ WARN_ON_ONCE(1);
+ err = -ENODATA;
+ goto out;
+ }
+
+ for (i = first_thread; i < first_thread + threads_per_core; i++) {
+ int i_group_start = get_cpu_thread_group_start(i, &tg);
+
+ if (unlikely(i_group_start == -1)) {
+ WARN_ON_ONCE(1);
+ err = -ENODATA;
+ goto out;
+ }
+
+ if (i_group_start == cpu_group_start)
+ cpumask_set_cpu(i, per_cpu(cpu_l1_cache_map, cpu));
+ }
+
+out:
+ of_node_put(dn);
+ return err;
+}
+
+static int init_big_cores(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ int err = init_cpu_l1_cache_map(cpu);
+
+ if (err)
+ return err;
+
+ zalloc_cpumask_var_node(&per_cpu(cpu_smallcore_map, cpu),
+ GFP_KERNEL,
+ cpu_to_node(cpu));
+ }
+
+ has_big_cores = true;
+ return 0;
+}
+
void __init smp_prepare_cpus(unsigned int max_cpus)
{
unsigned int cpu;
cpumask_set_cpu(boot_cpuid, cpu_l2_cache_mask(boot_cpuid));
cpumask_set_cpu(boot_cpuid, cpu_core_mask(boot_cpuid));
+ init_big_cores();
+ if (has_big_cores) {
+ cpumask_set_cpu(boot_cpuid,
+ cpu_smallcore_mask(boot_cpuid));
+ }
+
if (smp_ops && smp_ops->probe)
smp_ops->probe();
}
set_cpus_unrelated(cpu, i, cpu_core_mask);
set_cpus_unrelated(cpu, i, cpu_l2_cache_mask);
set_cpus_unrelated(cpu, i, cpu_sibling_mask);
+ if (has_big_cores)
+ set_cpus_unrelated(cpu, i, cpu_smallcore_mask);
}
}
#endif
+static inline void add_cpu_to_smallcore_masks(int cpu)
+{
+ struct cpumask *this_l1_cache_map = per_cpu(cpu_l1_cache_map, cpu);
+ int i, first_thread = cpu_first_thread_sibling(cpu);
+
+ if (!has_big_cores)
+ return;
+
+ cpumask_set_cpu(cpu, cpu_smallcore_mask(cpu));
+
+ for (i = first_thread; i < first_thread + threads_per_core; i++) {
+ if (cpu_online(i) && cpumask_test_cpu(i, this_l1_cache_map))
+ set_cpus_related(i, cpu, cpu_smallcore_mask);
+ }
+}
+
static void add_cpu_to_masks(int cpu)
{
int first_thread = cpu_first_thread_sibling(cpu);
if (cpu_online(i))
set_cpus_related(i, cpu, cpu_sibling_mask);
+ add_cpu_to_smallcore_masks(cpu);
/*
* Copy the thread sibling mask into the cache sibling mask
* and mark any CPUs that share an L2 with this CPU.
void start_secondary(void *unused)
{
unsigned int cpu = smp_processor_id();
+ struct cpumask *(*sibling_mask)(int) = cpu_sibling_mask;
mmgrab(&init_mm);
current->active_mm = &init_mm;
/* Update topology CPU masks */
add_cpu_to_masks(cpu);
+ if (has_big_cores)
+ sibling_mask = cpu_smallcore_mask;
/*
* Check for any shared caches. Note that this must be done on a
* per-core basis because one core in the pair might be disabled.
*/
- if (!cpumask_equal(cpu_l2_cache_mask(cpu), cpu_sibling_mask(cpu)))
+ if (!cpumask_equal(cpu_l2_cache_mask(cpu), sibling_mask(cpu)))
shared_caches = true;
set_numa_node(numa_cpu_lookup_table[cpu]);
notify_cpu_starting(cpu);
set_cpu_online(cpu, true);
+ boot_init_stack_canary();
+
local_irq_enable();
/* We can enable ftrace for secondary cpus now */
return cpu_l2_cache_mask(cpu);
}
+#ifdef CONFIG_SCHED_SMT
+static const struct cpumask *smallcore_smt_mask(int cpu)
+{
+ return cpu_smallcore_mask(cpu);
+}
+#endif
+
static struct sched_domain_topology_level power9_topology[] = {
#ifdef CONFIG_SCHED_SMT
{ cpu_smt_mask, powerpc_smt_flags, SD_INIT_NAME(SMT) },
shared_proc_topology_init();
dump_numa_cpu_topology();
+#ifdef CONFIG_SCHED_SMT
+ if (has_big_cores) {
+ pr_info("Using small cores at SMT level\n");
+ power9_topology[0].mask = smallcore_smt_mask;
+ powerpc_topology[0].mask = smallcore_smt_mask;
+ }
+#endif
/*
* If any CPU detects that it's sharing a cache with another CPU then
* use the deeper topology that is aware of this sharing.
addi r1,r1,-128
#ifdef CONFIG_PPC_BOOK3S_64
- bl slb_flush_and_rebolt
+ bl slb_flush_and_restore_bolted
#endif
bl do_after_copyback
addi r1,r1,128
.rating = 200,
.irq = 0,
.set_next_event = decrementer_set_next_event,
+ .set_state_oneshot_stopped = decrementer_shutdown,
.set_state_shutdown = decrementer_shutdown,
.tick_resume = decrementer_shutdown,
.features = CLOCK_EVT_FEAT_ONESHOT |
* Read the SPURR on systems that have it, otherwise the PURR,
* or if that doesn't exist return the timebase value passed in.
*/
-static unsigned long read_spurr(unsigned long tb)
+static inline unsigned long read_spurr(unsigned long tb)
{
if (cpu_has_feature(CPU_FTR_SPURR))
return mfspr(SPRN_SPURR);
* Account time for a transition between system, hard irq
* or soft irq state.
*/
-static unsigned long vtime_delta(struct task_struct *tsk,
- unsigned long *stime_scaled,
- unsigned long *steal_time)
+static unsigned long vtime_delta_scaled(struct cpu_accounting_data *acct,
+ unsigned long now, unsigned long stime)
{
- unsigned long now, nowscaled, deltascaled;
- unsigned long stime;
+ unsigned long stime_scaled = 0;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+ unsigned long nowscaled, deltascaled;
unsigned long utime, utime_scaled;
- struct cpu_accounting_data *acct = get_accounting(tsk);
- WARN_ON_ONCE(!irqs_disabled());
-
- now = mftb();
nowscaled = read_spurr(now);
- stime = now - acct->starttime;
- acct->starttime = now;
deltascaled = nowscaled - acct->startspurr;
acct->startspurr = nowscaled;
-
- *steal_time = calculate_stolen_time(now);
-
utime = acct->utime - acct->utime_sspurr;
acct->utime_sspurr = acct->utime;
* the user ticks get saved up in paca->user_time_scaled to be
* used by account_process_tick.
*/
- *stime_scaled = stime;
+ stime_scaled = stime;
utime_scaled = utime;
if (deltascaled != stime + utime) {
if (utime) {
- *stime_scaled = deltascaled * stime / (stime + utime);
- utime_scaled = deltascaled - *stime_scaled;
+ stime_scaled = deltascaled * stime / (stime + utime);
+ utime_scaled = deltascaled - stime_scaled;
} else {
- *stime_scaled = deltascaled;
+ stime_scaled = deltascaled;
}
}
acct->utime_scaled += utime_scaled;
+#endif
+
+ return stime_scaled;
+}
+
+static unsigned long vtime_delta(struct task_struct *tsk,
+ unsigned long *stime_scaled,
+ unsigned long *steal_time)
+{
+ unsigned long now, stime;
+ struct cpu_accounting_data *acct = get_accounting(tsk);
+
+ WARN_ON_ONCE(!irqs_disabled());
+
+ now = mftb();
+ stime = now - acct->starttime;
+ acct->starttime = now;
+
+ *stime_scaled = vtime_delta_scaled(acct, now, stime);
+
+ *steal_time = calculate_stolen_time(now);
return stime;
}
if ((tsk->flags & PF_VCPU) && !irq_count()) {
acct->gtime += stime;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
acct->utime_scaled += stime_scaled;
+#endif
} else {
if (hardirq_count())
acct->hardirq_time += stime;
else
acct->stime += stime;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
acct->stime_scaled += stime_scaled;
+#endif
}
}
EXPORT_SYMBOL_GPL(vtime_account_system);
acct->idle_time += stime + steal_time;
}
+static void vtime_flush_scaled(struct task_struct *tsk,
+ struct cpu_accounting_data *acct)
+{
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+ if (acct->utime_scaled)
+ tsk->utimescaled += cputime_to_nsecs(acct->utime_scaled);
+ if (acct->stime_scaled)
+ tsk->stimescaled += cputime_to_nsecs(acct->stime_scaled);
+
+ acct->utime_scaled = 0;
+ acct->utime_sspurr = 0;
+ acct->stime_scaled = 0;
+#endif
+}
+
/*
* Account the whole cputime accumulated in the paca
* Must be called with interrupts disabled.
if (acct->utime)
account_user_time(tsk, cputime_to_nsecs(acct->utime));
- if (acct->utime_scaled)
- tsk->utimescaled += cputime_to_nsecs(acct->utime_scaled);
-
if (acct->gtime)
account_guest_time(tsk, cputime_to_nsecs(acct->gtime));
- if (acct->steal_time)
+ if (IS_ENABLED(CONFIG_PPC_SPLPAR) && acct->steal_time) {
account_steal_time(cputime_to_nsecs(acct->steal_time));
+ acct->steal_time = 0;
+ }
if (acct->idle_time)
account_idle_time(cputime_to_nsecs(acct->idle_time));
if (acct->stime)
account_system_index_time(tsk, cputime_to_nsecs(acct->stime),
CPUTIME_SYSTEM);
- if (acct->stime_scaled)
- tsk->stimescaled += cputime_to_nsecs(acct->stime_scaled);
if (acct->hardirq_time)
account_system_index_time(tsk, cputime_to_nsecs(acct->hardirq_time),
account_system_index_time(tsk, cputime_to_nsecs(acct->softirq_time),
CPUTIME_SOFTIRQ);
+ vtime_flush_scaled(tsk, acct);
+
acct->utime = 0;
- acct->utime_scaled = 0;
- acct->utime_sspurr = 0;
acct->gtime = 0;
- acct->steal_time = 0;
acct->idle_time = 0;
acct->stime = 0;
- acct->stime_scaled = 0;
acct->hardirq_time = 0;
acct->softirq_time = 0;
}
*dec = decrementer_clockevent;
dec->cpumask = cpumask_of(cpu);
+ clockevents_config_and_register(dec, ppc_tb_freq, 2, decrementer_max);
+
printk_once(KERN_DEBUG "clockevent: %s mult[%x] shift[%d] cpu[%d]\n",
dec->name, dec->mult, dec->shift, cpu);
- clockevents_register_device(dec);
+ /* Set values for KVM, see kvm_emulate_dec() */
+ decrementer_clockevent.mult = dec->mult;
+ decrementer_clockevent.shift = dec->shift;
}
static void enable_large_decrementer(void)
static void __init init_decrementer_clockevent(void)
{
- int cpu = smp_processor_id();
-
- clockevents_calc_mult_shift(&decrementer_clockevent, ppc_tb_freq, 4);
-
- decrementer_clockevent.max_delta_ns =
- clockevent_delta2ns(decrementer_max, &decrementer_clockevent);
- decrementer_clockevent.max_delta_ticks = decrementer_max;
- decrementer_clockevent.min_delta_ns =
- clockevent_delta2ns(2, &decrementer_clockevent);
- decrementer_clockevent.min_delta_ticks = 2;
-
- register_decrementer_clockevent(cpu);
+ register_decrementer_clockevent(smp_processor_id());
}
void secondary_cpu_time_init(void)
blr
EXPORT_SYMBOL_GPL(tm_abort);
-/* void tm_reclaim(struct thread_struct *thread,
+/*
+ * void tm_reclaim(struct thread_struct *thread,
* uint8_t cause)
*
* - Performs a full reclaim. This destroys outstanding
- * transactions and updates thread->regs.tm_ckpt_* with the
- * original checkpointed state. Note that thread->regs is
- * unchanged.
+ * transactions and updates thread.ckpt_regs, thread.ckfp_state and
+ * thread.ckvr_state with the original checkpointed state. Note that
+ * thread->regs is unchanged.
*
* Purpose is to both abort transactions of, and preserve the state of,
* a transactions at a context switch. We preserve/restore both sets of process
*/
TRECLAIM(R4) /* Cause in r4 */
- /* ******************** GPRs ******************** */
- /* Stash the checkpointed r13 away in the scratch SPR and get the real
- * paca
+ /*
+ * ******************** GPRs ********************
+ * Stash the checkpointed r13 in the scratch SPR and get the real paca.
*/
SET_SCRATCH0(r13)
GET_PACA(r13)
- /* Stash the checkpointed r1 away in paca tm_scratch and get the real
- * stack pointer back
+ /*
+ * Stash the checkpointed r1 away in paca->tm_scratch and get the real
+ * stack pointer back into r1.
*/
std r1, PACATMSCRATCH(r13)
ld r1, PACAR1(r13)
addi r7, r12, PT_CKPT_REGS /* Thread's ckpt_regs */
- /* Make r7 look like an exception frame so that we
- * can use the neat GPRx(n) macros. r7 is NOT a pt_regs ptr!
+ /*
+ * Make r7 look like an exception frame so that we can use the neat
+ * GPRx(n) macros. r7 is NOT a pt_regs ptr!
*/
subi r7, r7, STACK_FRAME_OVERHEAD
/* Sync the userland GPRs 2-12, 14-31 to thread->regs: */
SAVE_GPR(0, r7) /* user r0 */
- SAVE_GPR(2, r7) /* user r2 */
+ SAVE_GPR(2, r7) /* user r2 */
SAVE_4GPRS(3, r7) /* user r3-r6 */
SAVE_GPR(8, r7) /* user r8 */
SAVE_GPR(9, r7) /* user r9 */
/* ******************** NIP ******************** */
mfspr r3, SPRN_TFHAR
std r3, _NIP(r7) /* Returns to failhandler */
- /* The checkpointed NIP is ignored when rescheduling/rechkpting,
+ /*
+ * The checkpointed NIP is ignored when rescheduling/rechkpting,
* but is used in signal return to 'wind back' to the abort handler.
*/
std r3, THREAD_TM_TAR(r12)
std r4, THREAD_TM_DSCR(r12)
- /* MSR and flags: We don't change CRs, and we don't need to alter
- * MSR.
+ /*
+ * MSR and flags: We don't change CRs, and we don't need to alter MSR.
*/
- /* ******************** FPR/VR/VSRs ************
+ /*
+ * ******************** FPR/VR/VSRs ************
* After reclaiming, capture the checkpointed FPRs/VRs.
*
* We enabled VEC/FP/VSX in the msr above, so we can execute these
/* Altivec (VEC/VMX/VR)*/
addi r7, r3, THREAD_CKVRSTATE
- SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 transact vr state */
+ SAVE_32VRS(0, r6, r7) /* r6 scratch, r7 ckvr_state */
mfvscr v0
li r6, VRSTATE_VSCR
stvx v0, r7, r6
/* Floating Point (FP) */
addi r7, r3, THREAD_CKFPSTATE
- SAVE_32FPRS_VSRS(0, R6, R7) /* r6 scratch, r7 transact fp state */
+ SAVE_32FPRS_VSRS(0, R6, R7) /* r6 scratch, r7 ckfp_state */
mffs fr0
stfd fr0,FPSTATE_FPSCR(r7)
- /* TM regs, incl TEXASR -- these live in thread_struct. Note they've
+ /*
+ * TM regs, incl TEXASR -- these live in thread_struct. Note they've
* been updated by the treclaim, to explain to userland the failure
* cause (aborted).
*/
blr
- /*
+ /*
* void __tm_recheckpoint(struct thread_struct *thread)
* - Restore the checkpointed register state saved by tm_reclaim
* when we switch_to a process.
std r2, STK_GOT(r1)
stdu r1, -TM_FRAME_SIZE(r1)
- /* We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD].
+ /*
+ * We've a struct pt_regs at [r1+STACK_FRAME_OVERHEAD].
* This is used for backing up the NVGPRs:
*/
SAVE_NVGPRS(r1)
addi r7, r3, PT_CKPT_REGS /* Thread's ckpt_regs */
- /* Make r7 look like an exception frame so that we
- * can use the neat GPRx(n) macros. r7 is now NOT a pt_regs ptr!
+ /*
+ * Make r7 look like an exception frame so that we can use the neat
+ * GPRx(n) macros. r7 is now NOT a pt_regs ptr!
*/
subi r7, r7, STACK_FRAME_OVERHEAD
REST_NVGPRS(r7) /* GPR14-31 */
- /* Load up PPR and DSCR here so we don't run with user values for long
- */
+ /* Load up PPR and DSCR here so we don't run with user values for long */
mtspr SPRN_DSCR, r5
mtspr SPRN_PPR, r6
- /* Do final sanity check on TEXASR to make sure FS is set. Do this
+ /*
+ * Do final sanity check on TEXASR to make sure FS is set. Do this
* here before we load up the userspace r1 so any bugs we hit will get
- * a call chain */
+ * a call chain.
+ */
mfspr r5, SPRN_TEXASR
srdi r5, r5, 16
li r6, (TEXASR_FS)@h
1: tdeqi r6, 0
EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0
- /* Do final sanity check on MSR to make sure we are not transactional
- * or suspended
+ /*
+ * Do final sanity check on MSR to make sure we are not transactional
+ * or suspended.
*/
mfmsr r6
li r5, (MSR_TS_MASK)@higher
REST_GPR(6, r7)
/*
- * Store r1 and r5 on the stack so that we can access them
- * after we clear MSR RI.
+ * Store r1 and r5 on the stack so that we can access them after we
+ * clear MSR RI.
*/
REST_GPR(5, r7)
HMT_MEDIUM
- /* Our transactional state has now changed.
+ /*
+ * Our transactional state has now changed.
*
* Now just get out of here. Transactional (current) state will be
* updated once restore is called on the return path in the _switch-ed
# Makefile for the powerpc trace subsystem
#
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
-
ifdef CONFIG_FUNCTION_TRACER
# do not trace tracer code
-CFLAGS_REMOVE_ftrace.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
endif
obj32-$(CONFIG_FUNCTION_TRACER) += ftrace_32.o
#ifdef CONFIG_DYNAMIC_FTRACE
+
+/*
+ * We generally only have a single long_branch tramp and at most 2 or 3 plt
+ * tramps generated. But, we don't use the plt tramps currently. We also allot
+ * 2 tramps after .text and .init.text. So, we only end up with around 3 usable
+ * tramps in total. Set aside 8 just to be sure.
+ */
+#define NUM_FTRACE_TRAMPS 8
+static unsigned long ftrace_tramps[NUM_FTRACE_TRAMPS];
+
static unsigned int
ftrace_call_replace(unsigned long ip, unsigned long addr, int link)
{
return create_branch((unsigned int *)ip, addr, 0);
}
-#ifdef CONFIG_MODULES
-
static int is_bl_op(unsigned int op)
{
return (op & 0xfc000003) == 0x48000001;
}
+static int is_b_op(unsigned int op)
+{
+ return (op & 0xfc000003) == 0x48000000;
+}
+
static unsigned long find_bl_target(unsigned long ip, unsigned int op)
{
static int offset;
return ip + (long)offset;
}
+#ifdef CONFIG_MODULES
#ifdef CONFIG_PPC64
static int
__ftrace_make_nop(struct module *mod,
#endif /* PPC64 */
#endif /* CONFIG_MODULES */
+static unsigned long find_ftrace_tramp(unsigned long ip)
+{
+ int i;
+
+ /*
+ * We have the compiler generated long_branch tramps at the end
+ * and we prefer those
+ */
+ for (i = NUM_FTRACE_TRAMPS - 1; i >= 0; i--)
+ if (!ftrace_tramps[i])
+ continue;
+ else if (create_branch((void *)ip, ftrace_tramps[i], 0))
+ return ftrace_tramps[i];
+
+ return 0;
+}
+
+static int add_ftrace_tramp(unsigned long tramp)
+{
+ int i;
+
+ for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+ if (!ftrace_tramps[i]) {
+ ftrace_tramps[i] = tramp;
+ return 0;
+ }
+
+ return -1;
+}
+
+/*
+ * If this is a compiler generated long_branch trampoline (essentially, a
+ * trampoline that has a branch to _mcount()), we re-write the branch to
+ * instead go to ftrace_[regs_]caller() and note down the location of this
+ * trampoline.
+ */
+static int setup_mcount_compiler_tramp(unsigned long tramp)
+{
+ int i, op;
+ unsigned long ptr;
+ static unsigned long ftrace_plt_tramps[NUM_FTRACE_TRAMPS];
+
+ /* Is this a known long jump tramp? */
+ for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+ if (!ftrace_tramps[i])
+ break;
+ else if (ftrace_tramps[i] == tramp)
+ return 0;
+
+ /* Is this a known plt tramp? */
+ for (i = 0; i < NUM_FTRACE_TRAMPS; i++)
+ if (!ftrace_plt_tramps[i])
+ break;
+ else if (ftrace_plt_tramps[i] == tramp)
+ return -1;
+
+ /* New trampoline -- read where this goes */
+ if (probe_kernel_read(&op, (void *)tramp, sizeof(int))) {
+ pr_debug("Fetching opcode failed.\n");
+ return -1;
+ }
+
+ /* Is this a 24 bit branch? */
+ if (!is_b_op(op)) {
+ pr_debug("Trampoline is not a long branch tramp.\n");
+ return -1;
+ }
+
+ /* lets find where the pointer goes */
+ ptr = find_bl_target(tramp, op);
+
+ if (ptr != ppc_global_function_entry((void *)_mcount)) {
+ pr_debug("Trampoline target %p is not _mcount\n", (void *)ptr);
+ return -1;
+ }
+
+ /* Let's re-write the tramp to go to ftrace_[regs_]caller */
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ ptr = ppc_global_function_entry((void *)ftrace_regs_caller);
+#else
+ ptr = ppc_global_function_entry((void *)ftrace_caller);
+#endif
+ if (!create_branch((void *)tramp, ptr, 0)) {
+ pr_debug("%ps is not reachable from existing mcount tramp\n",
+ (void *)ptr);
+ return -1;
+ }
+
+ if (patch_branch((unsigned int *)tramp, ptr, 0)) {
+ pr_debug("REL24 out of range!\n");
+ return -1;
+ }
+
+ if (add_ftrace_tramp(tramp)) {
+ pr_debug("No tramp locations left\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int __ftrace_make_nop_kernel(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long tramp, ip = rec->ip;
+ unsigned int op;
+
+ /* Read where this goes */
+ if (probe_kernel_read(&op, (void *)ip, sizeof(int))) {
+ pr_err("Fetching opcode failed.\n");
+ return -EFAULT;
+ }
+
+ /* Make sure that that this is still a 24bit jump */
+ if (!is_bl_op(op)) {
+ pr_err("Not expected bl: opcode is %x\n", op);
+ return -EINVAL;
+ }
+
+ /* Let's find where the pointer goes */
+ tramp = find_bl_target(ip, op);
+
+ pr_devel("ip:%lx jumps to %lx", ip, tramp);
+
+ if (setup_mcount_compiler_tramp(tramp)) {
+ /* Are other trampolines reachable? */
+ if (!find_ftrace_tramp(ip)) {
+ pr_err("No ftrace trampolines reachable from %ps\n",
+ (void *)ip);
+ return -EINVAL;
+ }
+ }
+
+ if (patch_instruction((unsigned int *)ip, PPC_INST_NOP)) {
+ pr_err("Patching NOP failed.\n");
+ return -EPERM;
+ }
+
+ return 0;
+}
+
int ftrace_make_nop(struct module *mod,
struct dyn_ftrace *rec, unsigned long addr)
{
old = ftrace_call_replace(ip, addr, 1);
new = PPC_INST_NOP;
return ftrace_modify_code(ip, old, new);
- }
+ } else if (core_kernel_text(ip))
+ return __ftrace_make_nop_kernel(rec, addr);
#ifdef CONFIG_MODULES
/*
#endif /* CONFIG_PPC64 */
#endif /* CONFIG_MODULES */
+static int __ftrace_make_call_kernel(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned int op;
+ void *ip = (void *)rec->ip;
+ unsigned long tramp, entry, ptr;
+
+ /* Make sure we're being asked to patch branch to a known ftrace addr */
+ entry = ppc_global_function_entry((void *)ftrace_caller);
+ ptr = ppc_global_function_entry((void *)addr);
+
+ if (ptr != entry) {
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ entry = ppc_global_function_entry((void *)ftrace_regs_caller);
+ if (ptr != entry) {
+#endif
+ pr_err("Unknown ftrace addr to patch: %ps\n", (void *)ptr);
+ return -EINVAL;
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ }
+#endif
+ }
+
+ /* Make sure we have a nop */
+ if (probe_kernel_read(&op, ip, sizeof(op))) {
+ pr_err("Unable to read ftrace location %p\n", ip);
+ return -EFAULT;
+ }
+
+ if (op != PPC_INST_NOP) {
+ pr_err("Unexpected call sequence at %p: %x\n", ip, op);
+ return -EINVAL;
+ }
+
+ tramp = find_ftrace_tramp((unsigned long)ip);
+ if (!tramp) {
+ pr_err("No ftrace trampolines reachable from %ps\n", ip);
+ return -EINVAL;
+ }
+
+ if (patch_branch(ip, tramp, BRANCH_SET_LINK)) {
+ pr_err("Error patching branch to ftrace tramp!\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
{
unsigned long ip = rec->ip;
old = PPC_INST_NOP;
new = ftrace_call_replace(ip, addr, 1);
return ftrace_modify_code(ip, old, new);
- }
+ } else if (core_kernel_text(ip))
+ return __ftrace_make_call_kernel(rec, addr);
#ifdef CONFIG_MODULES
/*
old = ftrace_call_replace(ip, old_addr, 1);
new = ftrace_call_replace(ip, addr, 1);
return ftrace_modify_code(ip, old, new);
+ } else if (core_kernel_text(ip)) {
+ /*
+ * We always patch out of range locations to go to the regs
+ * variant, so there is nothing to do here
+ */
+ return 0;
}
#ifdef CONFIG_MODULES
ftrace_modify_all_code(command);
}
+#ifdef CONFIG_PPC64
+#define PACATOC offsetof(struct paca_struct, kernel_toc)
+
+#define PPC_LO(v) ((v) & 0xffff)
+#define PPC_HI(v) (((v) >> 16) & 0xffff)
+#define PPC_HA(v) PPC_HI ((v) + 0x8000)
+
+extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[];
+
+int __init ftrace_dyn_arch_init(void)
+{
+ int i;
+ unsigned int *tramp[] = { ftrace_tramp_text, ftrace_tramp_init };
+ u32 stub_insns[] = {
+ 0xe98d0000 | PACATOC, /* ld r12,PACATOC(r13) */
+ 0x3d8c0000, /* addis r12,r12,<high> */
+ 0x398c0000, /* addi r12,r12,<low> */
+ 0x7d8903a6, /* mtctr r12 */
+ 0x4e800420, /* bctr */
+ };
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+ unsigned long addr = ppc_global_function_entry((void *)ftrace_regs_caller);
+#else
+ unsigned long addr = ppc_global_function_entry((void *)ftrace_caller);
+#endif
+ long reladdr = addr - kernel_toc_addr();
+
+ if (reladdr > 0x7FFFFFFF || reladdr < -(0x80000000L)) {
+ pr_err("Address of %ps out of range of kernel_toc.\n",
+ (void *)addr);
+ return -1;
+ }
+
+ for (i = 0; i < 2; i++) {
+ memcpy(tramp[i], stub_insns, sizeof(stub_insns));
+ tramp[i][1] |= PPC_HA(reladdr);
+ tramp[i][2] |= PPC_LO(reladdr);
+ add_ftrace_tramp((unsigned long)tramp[i]);
+ }
+
+ return 0;
+}
+#else
int __init ftrace_dyn_arch_init(void)
{
return 0;
}
+#endif
#endif /* CONFIG_DYNAMIC_FTRACE */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
#include <asm/ppc-opcode.h>
#include <asm/export.h>
+.pushsection ".tramp.ftrace.text","aw",@progbits;
+.globl ftrace_tramp_text
+ftrace_tramp_text:
+ .space 64
+.popsection
+
+.pushsection ".tramp.ftrace.init","aw",@progbits;
+.globl ftrace_tramp_init
+ftrace_tramp_init:
+ .space 64
+.popsection
+
_GLOBAL(mcount)
_GLOBAL(_mcount)
EXPORT_SYMBOL(_mcount)
mdelay(MSEC_PER_SEC);
}
- if (in_interrupt())
- panic("Fatal exception in interrupt");
if (panic_on_oops)
panic("Fatal exception");
do_exit(signr);
printk("Caused by (from MCSR=%lx): ", reason);
if (reason & MCSR_MCP)
- printk("Machine Check Signal\n");
+ pr_cont("Machine Check Signal\n");
if (reason & MCSR_ICPERR) {
- printk("Instruction Cache Parity Error\n");
+ pr_cont("Instruction Cache Parity Error\n");
/*
* This is recoverable by invalidating the i-cache.
}
if (reason & MCSR_DCPERR_MC) {
- printk("Data Cache Parity Error\n");
+ pr_cont("Data Cache Parity Error\n");
/*
* In write shadow mode we auto-recover from the error, but it
}
if (reason & MCSR_L2MMU_MHIT) {
- printk("Hit on multiple TLB entries\n");
+ pr_cont("Hit on multiple TLB entries\n");
recoverable = 0;
}
if (reason & MCSR_NMI)
- printk("Non-maskable interrupt\n");
+ pr_cont("Non-maskable interrupt\n");
if (reason & MCSR_IF) {
- printk("Instruction Fetch Error Report\n");
+ pr_cont("Instruction Fetch Error Report\n");
recoverable = 0;
}
if (reason & MCSR_LD) {
- printk("Load Error Report\n");
+ pr_cont("Load Error Report\n");
recoverable = 0;
}
if (reason & MCSR_ST) {
- printk("Store Error Report\n");
+ pr_cont("Store Error Report\n");
recoverable = 0;
}
if (reason & MCSR_LDG) {
- printk("Guarded Load Error Report\n");
+ pr_cont("Guarded Load Error Report\n");
recoverable = 0;
}
if (reason & MCSR_TLBSYNC)
- printk("Simultaneous tlbsync operations\n");
+ pr_cont("Simultaneous tlbsync operations\n");
if (reason & MCSR_BSL2_ERR) {
- printk("Level 2 Cache Error\n");
+ pr_cont("Level 2 Cache Error\n");
recoverable = 0;
}
addr = mfspr(SPRN_MCAR);
addr |= (u64)mfspr(SPRN_MCARU) << 32;
- printk("Machine Check %s Address: %#llx\n",
+ pr_cont("Machine Check %s Address: %#llx\n",
reason & MCSR_MEA ? "Effective" : "Physical", addr);
}
printk("Caused by (from MCSR=%lx): ", reason);
if (reason & MCSR_MCP)
- printk("Machine Check Signal\n");
+ pr_cont("Machine Check Signal\n");
if (reason & MCSR_ICPERR)
- printk("Instruction Cache Parity Error\n");
+ pr_cont("Instruction Cache Parity Error\n");
if (reason & MCSR_DCP_PERR)
- printk("Data Cache Push Parity Error\n");
+ pr_cont("Data Cache Push Parity Error\n");
if (reason & MCSR_DCPERR)
- printk("Data Cache Parity Error\n");
+ pr_cont("Data Cache Parity Error\n");
if (reason & MCSR_BUS_IAERR)
- printk("Bus - Instruction Address Error\n");
+ pr_cont("Bus - Instruction Address Error\n");
if (reason & MCSR_BUS_RAERR)
- printk("Bus - Read Address Error\n");
+ pr_cont("Bus - Read Address Error\n");
if (reason & MCSR_BUS_WAERR)
- printk("Bus - Write Address Error\n");
+ pr_cont("Bus - Write Address Error\n");
if (reason & MCSR_BUS_IBERR)
- printk("Bus - Instruction Data Error\n");
+ pr_cont("Bus - Instruction Data Error\n");
if (reason & MCSR_BUS_RBERR)
- printk("Bus - Read Data Bus Error\n");
+ pr_cont("Bus - Read Data Bus Error\n");
if (reason & MCSR_BUS_WBERR)
- printk("Bus - Write Data Bus Error\n");
+ pr_cont("Bus - Write Data Bus Error\n");
if (reason & MCSR_BUS_IPERR)
- printk("Bus - Instruction Parity Error\n");
+ pr_cont("Bus - Instruction Parity Error\n");
if (reason & MCSR_BUS_RPERR)
- printk("Bus - Read Parity Error\n");
+ pr_cont("Bus - Read Parity Error\n");
return 0;
}
printk("Caused by (from MCSR=%lx): ", reason);
if (reason & MCSR_MCP)
- printk("Machine Check Signal\n");
+ pr_cont("Machine Check Signal\n");
if (reason & MCSR_CP_PERR)
- printk("Cache Push Parity Error\n");
+ pr_cont("Cache Push Parity Error\n");
if (reason & MCSR_CPERR)
- printk("Cache Parity Error\n");
+ pr_cont("Cache Parity Error\n");
if (reason & MCSR_EXCP_ERR)
- printk("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n");
+ pr_cont("ISI, ITLB, or Bus Error on first instruction fetch for an exception handler\n");
if (reason & MCSR_BUS_IRERR)
- printk("Bus - Read Bus Error on instruction fetch\n");
+ pr_cont("Bus - Read Bus Error on instruction fetch\n");
if (reason & MCSR_BUS_DRERR)
- printk("Bus - Read Bus Error on data load\n");
+ pr_cont("Bus - Read Bus Error on data load\n");
if (reason & MCSR_BUS_WRERR)
- printk("Bus - Write Bus Error on buffered store or cache line push\n");
+ pr_cont("Bus - Write Bus Error on buffered store or cache line push\n");
return 0;
}
printk("Caused by (from SRR1=%lx): ", reason);
switch (reason & 0x601F0000) {
case 0x80000:
- printk("Machine check signal\n");
+ pr_cont("Machine check signal\n");
break;
case 0: /* for 601 */
case 0x40000:
case 0x140000: /* 7450 MSS error and TEA */
- printk("Transfer error ack signal\n");
+ pr_cont("Transfer error ack signal\n");
break;
case 0x20000:
- printk("Data parity error signal\n");
+ pr_cont("Data parity error signal\n");
break;
case 0x10000:
- printk("Address parity error signal\n");
+ pr_cont("Address parity error signal\n");
break;
case 0x20000000:
- printk("L1 Data Cache error\n");
+ pr_cont("L1 Data Cache error\n");
break;
case 0x40000000:
- printk("L1 Instruction Cache error\n");
+ pr_cont("L1 Instruction Cache error\n");
break;
case 0x00100000:
- printk("L2 data cache parity error\n");
+ pr_cont("L2 data cache parity error\n");
break;
default:
- printk("Unknown values in msr\n");
+ pr_cont("Unknown values in msr\n");
}
return 0;
}
if (!nested)
nmi_enter();
- /* 64s accounts the mce in machine_check_early when in HVMODE */
- if (!IS_ENABLED(CONFIG_PPC_BOOK3S_64) || !cpu_has_feature(CPU_FTR_HVMODE))
- __this_cpu_inc(irq_stat.mce_exceptions);
+ __this_cpu_inc(irq_stat.mce_exceptions);
add_taint(TAINT_MACHINE_CHECK, LOCKDEP_NOW_UNRELIABLE);
if (check_io_access(regs))
goto bail;
- die("Machine check", regs, SIGBUS);
-
/* Must die if the interrupt is not recoverable */
if (!(regs->msr & MSR_RI))
nmi_panic(regs, "Unrecoverable Machine check");
+ if (!nested)
+ nmi_exit();
+
+ die("Machine check", regs, SIGBUS);
+
+ return;
+
bail:
if (!nested)
nmi_exit();
goto bail;
} else {
printk(KERN_EMERG "Unexpected TM Bad Thing exception "
- "at %lx (msr 0x%x)\n", regs->nip, reason);
+ "at %lx (msr 0x%lx)\n", regs->nip, regs->msr);
die("Unrecoverable exception", regs, SIGABRT);
}
}
panic("kernel stack overflow");
}
-void nonrecoverable_exception(struct pt_regs *regs)
-{
- printk(KERN_ERR "Non-recoverable exception at PC=%lx MSR=%lx\n",
- regs->nip, regs->msr);
- debugger(regs);
- die("nonrecoverable exception", regs, SIGKILL);
-}
-
void kernel_fp_unavailable_exception(struct pt_regs *regs)
{
enum ctx_state prev_state = exception_enter();
* checkpointed FP registers need to be loaded.
*/
tm_reclaim_current(TM_CAUSE_FAC_UNAV);
- /* Reclaim didn't save out any FPRs to transact_fprs. */
+
+ /*
+ * Reclaim initially saved out bogus (lazy) FPRs to ckfp_state, and
+ * then it was overwrite by the thr->fp_state by tm_reclaim_thread().
+ *
+ * At this point, ck{fp,vr}_state contains the exact values we want to
+ * recheckpoint.
+ */
/* Enable FP for the task: */
current->thread.load_fp = 1;
- /* This loads and recheckpoints the FP registers from
- * thread.fpr[]. They will remain in registers after the
- * checkpoint so we don't need to reload them after.
- * If VMX is in use, the VRs now hold checkpointed values,
- * so we don't want to load the VRs from the thread_struct.
+ /*
+ * Recheckpoint all the checkpointed ckpt, ck{fp, vr}_state registers.
*/
tm_recheckpoint(¤t->thread);
}
*/
void unrecoverable_exception(struct pt_regs *regs)
{
- printk(KERN_EMERG "Unrecoverable exception %lx at %lx\n",
- regs->trap, regs->nip);
+ pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n",
+ regs->trap, regs->nip, regs->msr);
die("Unrecoverable exception", regs, SIGABRT);
}
NOKPROBE_SYMBOL(unrecoverable_exception);
mtlr r0
addi r3, r3, __kernel_datapage_offset-data_page_branch
lwz r0,0(r3)
+ .cfi_restore lr
add r3,r0,r3
blr
.cfi_endproc
*/
99:
li r0,__NR_clock_gettime
+ .cfi_restore lr
sc
blr
.cfi_endproc
mtlr r0
addi r3, r3, __kernel_datapage_offset-data_page_branch
lwz r0,0(r3)
+ .cfi_restore lr
add r3,r0,r3
blr
.cfi_endproc
*/
99:
li r0,__NR_clock_gettime
+ .cfi_restore lr
sc
blr
.cfi_endproc
#else
#define PROVIDE32(x) PROVIDE(x)
#endif
+
+#define BSS_FIRST_SECTIONS *(.bss.prominit)
+
#include <asm/page.h>
#include <asm-generic/vmlinux.lds.h>
#include <asm/cache.h>
#endif
/* careful! __ftr_alt_* sections need to be close to .text */
*(.text.hot TEXT_MAIN .text.fixup .text.unlikely .fixup __ftr_alt_* .ref.text);
+#ifdef CONFIG_PPC64
+ *(.tramp.ftrace.text);
+#endif
SCHED_TEXT
CPUIDLE_TEXT
LOCK_TEXT
*/
. = ALIGN(STRICT_ALIGN_SIZE);
__init_begin = .;
- INIT_TEXT_SECTION(PAGE_SIZE) :kernel
+ . = ALIGN(PAGE_SIZE);
+ .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) {
+ _sinittext = .;
+ INIT_TEXT
+ _einittext = .;
+#ifdef CONFIG_PPC64
+ *(.tramp.ftrace.init);
+#endif
+ } :kernel
/* .exit.text is discarded at runtime, not link time,
* to deal with references from __bug_table
# Makefile for Kernel-based Virtual Machine module
#
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
-
ccflags-y := -Ivirt/kvm -Iarch/powerpc/kvm
KVM := ../../../virt/kvm
# Makefile for ppc-specific library files..
#
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
-
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE)
obj-$(CONFIG_PPC32) += div64.o copy_32.o crtsavres.o strlen_32.o
+obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+
# See corresponding test in arch/powerpc/Makefile
# 64-bit linker creates .sfpr on demand for final link (vmlinux),
# so it is only needed for modules, and only for older linkers which
else
pfn = __pa_symbol(addr) >> PAGE_SHIFT;
- err = map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT),
- pgprot_val(PAGE_KERNEL));
+ err = map_kernel_page(text_poke_addr, (pfn << PAGE_SHIFT), PAGE_KERNEL);
pr_devel("Mapped addr %lx with pfn %lx:%d\n", text_poke_addr, pfn, err);
if (err)
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/error-injection.h>
+#include <linux/kprobes.h>
+#include <linux/uaccess.h>
+
+void override_function_with_return(struct pt_regs *regs)
+{
+ /*
+ * Emulate 'blr'. 'regs' represents the state on entry of a predefined
+ * function in the kernel/module, captured on a kprobe. We don't need
+ * to worry about 32-bit userspace on a 64-bit kernel.
+ */
+ regs->nip = regs->link;
+}
+NOKPROBE_SYMBOL(override_function_with_return);
.Lms: PPC_MTOCRF(1,r0)
mr r6,r3
blt cr1,8f
- beq+ 3f /* if already 8-byte aligned */
+ beq 3f /* if already 8-byte aligned */
subf r5,r0,r5
bf 31,1f
stb r4,0(r6)
addi r6,r6,8
8: cmpwi r5,0
PPC_MTOCRF(1,r5)
- beqlr+
+ beqlr
bf 29,9f
stw r4,0(r6)
addi r6,r6,4
/* PIN up to the 3 first 8Mb after IMMR in DTLB table */
#ifdef CONFIG_PIN_TLB_DATA
unsigned long ctr = mfspr(SPRN_MD_CTR) & 0xfe000000;
- unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_PRIVILEGED | _PAGE_DIRTY;
+ unsigned long flags = 0xf0 | MD_SPS16K | _PAGE_SH | _PAGE_DIRTY;
#ifdef CONFIG_PIN_TLB_IMMR
int i = 29;
#else
{
unsigned long p = PHYS_IMMR_BASE;
unsigned long v = VIRT_IMMR_BASE;
- unsigned long f = pgprot_val(PAGE_KERNEL_NCG);
int offset;
for (offset = 0; offset < IMMR_SIZE; offset += PAGE_SIZE)
- map_kernel_page(v + offset, p + offset, f);
+ map_kernel_page(v + offset, p + offset, PAGE_KERNEL_NCG);
}
/* Address of instructions to patch */
# Makefile for the linux ppc-specific parts of the memory manager.
#
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
-
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
+CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE)
+
obj-y := fault.o mem.o pgtable.o mmap.o \
init_$(BITS).o pgtable_$(BITS).o \
init-common.o mmu_context.o drmem.o
obj-$(CONFIG_PPC_BOOK3E) += tlb_low_$(BITS)e.o
hash64-$(CONFIG_PPC_NATIVE) := hash_native_64.o
obj-$(CONFIG_PPC_BOOK3E_64) += pgtable-book3e.o
-obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb_low.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o
+obj-$(CONFIG_PPC_BOOK3S_64) += pgtable-hash64.o hash_utils_64.o slb.o $(hash64-y) mmu_context_book3s64.o pgtable-book3s64.o
obj-$(CONFIG_PPC_RADIX_MMU) += pgtable-radix.o tlb-radix.o
obj-$(CONFIG_PPC_STD_MMU_32) += ppc_mmu_32.o hash_low_32.o mmu_context_hash32.o
obj-$(CONFIG_PPC_STD_MMU) += tlb_hash$(BITS).o
obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o
obj-$(CONFIG_SPAPR_TCE_IOMMU) += mmu_context_iommu.o
obj-$(CONFIG_PPC_PTDUMP) += dump_linuxpagetables.o
+ifdef CONFIG_PPC_PTDUMP
+obj-$(CONFIG_4xx) += dump_linuxpagetables-generic.o
+obj-$(CONFIG_PPC_8xx) += dump_linuxpagetables-8xx.o
+obj-$(CONFIG_PPC_BOOK3E_MMU) += dump_linuxpagetables-generic.o
+obj-$(CONFIG_PPC_BOOK3S_32) += dump_linuxpagetables-generic.o
+obj-$(CONFIG_PPC_BOOK3S_64) += dump_linuxpagetables-book3s64.o
+endif
obj-$(CONFIG_PPC_HTDUMP) += dump_hashpagetable.o
obj-$(CONFIG_PPC_MEM_KEYS) += pkeys.o
do {
SetPageReserved(page);
map_kernel_page(vaddr, page_to_phys(page),
- pgprot_val(pgprot_noncached(PAGE_KERNEL)));
+ pgprot_noncached(PAGE_KERNEL));
page++;
vaddr += PAGE_SIZE;
} while (size -= PAGE_SIZE);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * From split of dump_linuxpagetables.c
+ * Copyright 2016, Rashmica Gupta, IBM Corp.
+ *
+ */
+#include <linux/kernel.h>
+#include <asm/pgtable.h>
+
+#include "dump_linuxpagetables.h"
+
+static const struct flag_info flag_array[] = {
+ {
+ .mask = _PAGE_SH,
+ .val = 0,
+ .set = "user",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_RO | _PAGE_NA,
+ .val = 0,
+ .set = "rw",
+ }, {
+ .mask = _PAGE_RO | _PAGE_NA,
+ .val = _PAGE_RO,
+ .set = "r ",
+ }, {
+ .mask = _PAGE_RO | _PAGE_NA,
+ .val = _PAGE_NA,
+ .set = " ",
+ }, {
+ .mask = _PAGE_EXEC,
+ .val = _PAGE_EXEC,
+ .set = " X ",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_PRESENT,
+ .val = _PAGE_PRESENT,
+ .set = "present",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_GUARDED,
+ .val = _PAGE_GUARDED,
+ .set = "guarded",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_DIRTY,
+ .val = _PAGE_DIRTY,
+ .set = "dirty",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_ACCESSED,
+ .val = _PAGE_ACCESSED,
+ .set = "accessed",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_NO_CACHE,
+ .val = _PAGE_NO_CACHE,
+ .set = "no cache",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_SPECIAL,
+ .val = _PAGE_SPECIAL,
+ .set = "special",
+ }
+};
+
+struct pgtable_level pg_level[5] = {
+ {
+ }, { /* pgd */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* pud */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* pmd */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* pte */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ },
+};
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * From split of dump_linuxpagetables.c
+ * Copyright 2016, Rashmica Gupta, IBM Corp.
+ *
+ */
+#include <linux/kernel.h>
+#include <asm/pgtable.h>
+
+#include "dump_linuxpagetables.h"
+
+static const struct flag_info flag_array[] = {
+ {
+ .mask = _PAGE_PRIVILEGED,
+ .val = 0,
+ .set = "user",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_READ,
+ .val = _PAGE_READ,
+ .set = "r",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_WRITE,
+ .val = _PAGE_WRITE,
+ .set = "w",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_EXEC,
+ .val = _PAGE_EXEC,
+ .set = " X ",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_PTE,
+ .val = _PAGE_PTE,
+ .set = "pte",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_PRESENT,
+ .val = _PAGE_PRESENT,
+ .set = "valid",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_PRESENT | _PAGE_INVALID,
+ .val = 0,
+ .set = " ",
+ .clear = "present",
+ }, {
+ .mask = H_PAGE_HASHPTE,
+ .val = H_PAGE_HASHPTE,
+ .set = "hpte",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_DIRTY,
+ .val = _PAGE_DIRTY,
+ .set = "dirty",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_ACCESSED,
+ .val = _PAGE_ACCESSED,
+ .set = "accessed",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_NON_IDEMPOTENT,
+ .val = _PAGE_NON_IDEMPOTENT,
+ .set = "non-idempotent",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_TOLERANT,
+ .val = _PAGE_TOLERANT,
+ .set = "tolerant",
+ .clear = " ",
+ }, {
+ .mask = H_PAGE_BUSY,
+ .val = H_PAGE_BUSY,
+ .set = "busy",
+ }, {
+#ifdef CONFIG_PPC_64K_PAGES
+ .mask = H_PAGE_COMBO,
+ .val = H_PAGE_COMBO,
+ .set = "combo",
+ }, {
+ .mask = H_PAGE_4K_PFN,
+ .val = H_PAGE_4K_PFN,
+ .set = "4K_pfn",
+ }, {
+#else /* CONFIG_PPC_64K_PAGES */
+ .mask = H_PAGE_F_GIX,
+ .val = H_PAGE_F_GIX,
+ .set = "f_gix",
+ .is_val = true,
+ .shift = H_PAGE_F_GIX_SHIFT,
+ }, {
+ .mask = H_PAGE_F_SECOND,
+ .val = H_PAGE_F_SECOND,
+ .set = "f_second",
+ }, {
+#endif /* CONFIG_PPC_64K_PAGES */
+ .mask = _PAGE_SPECIAL,
+ .val = _PAGE_SPECIAL,
+ .set = "special",
+ }
+};
+
+struct pgtable_level pg_level[5] = {
+ {
+ }, { /* pgd */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* pud */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* pmd */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* pte */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ },
+};
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * From split of dump_linuxpagetables.c
+ * Copyright 2016, Rashmica Gupta, IBM Corp.
+ *
+ */
+#include <linux/kernel.h>
+#include <asm/pgtable.h>
+
+#include "dump_linuxpagetables.h"
+
+static const struct flag_info flag_array[] = {
+ {
+ .mask = _PAGE_USER,
+ .val = _PAGE_USER,
+ .set = "user",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_RW,
+ .val = _PAGE_RW,
+ .set = "rw",
+ .clear = "r ",
+ }, {
+#ifndef CONFIG_PPC_BOOK3S_32
+ .mask = _PAGE_EXEC,
+ .val = _PAGE_EXEC,
+ .set = " X ",
+ .clear = " ",
+ }, {
+#endif
+ .mask = _PAGE_PRESENT,
+ .val = _PAGE_PRESENT,
+ .set = "present",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_GUARDED,
+ .val = _PAGE_GUARDED,
+ .set = "guarded",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_DIRTY,
+ .val = _PAGE_DIRTY,
+ .set = "dirty",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_ACCESSED,
+ .val = _PAGE_ACCESSED,
+ .set = "accessed",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_WRITETHRU,
+ .val = _PAGE_WRITETHRU,
+ .set = "write through",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_NO_CACHE,
+ .val = _PAGE_NO_CACHE,
+ .set = "no cache",
+ .clear = " ",
+ }, {
+ .mask = _PAGE_SPECIAL,
+ .val = _PAGE_SPECIAL,
+ .set = "special",
+ }
+};
+
+struct pgtable_level pg_level[5] = {
+ {
+ }, { /* pgd */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* pud */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* pmd */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ }, { /* pte */
+ .flag = flag_array,
+ .num = ARRAY_SIZE(flag_array),
+ },
+};
#include <asm/page.h>
#include <asm/pgalloc.h>
+#include "dump_linuxpagetables.h"
+
#ifdef CONFIG_PPC32
#define KERN_VIRT_START 0
#endif
{ -1, NULL },
};
-struct flag_info {
- u64 mask;
- u64 val;
- const char *set;
- const char *clear;
- bool is_val;
- int shift;
-};
-
-static const struct flag_info flag_array[] = {
- {
- .mask = _PAGE_USER | _PAGE_PRIVILEGED,
- .val = _PAGE_USER,
- .set = "user",
- .clear = " ",
- }, {
- .mask = _PAGE_RW | _PAGE_RO | _PAGE_NA,
- .val = _PAGE_RW,
- .set = "rw",
- }, {
- .mask = _PAGE_RW | _PAGE_RO | _PAGE_NA,
- .val = _PAGE_RO,
- .set = "ro",
- }, {
-#if _PAGE_NA != 0
- .mask = _PAGE_RW | _PAGE_RO | _PAGE_NA,
- .val = _PAGE_RO,
- .set = "na",
- }, {
-#endif
- .mask = _PAGE_EXEC,
- .val = _PAGE_EXEC,
- .set = " X ",
- .clear = " ",
- }, {
- .mask = _PAGE_PTE,
- .val = _PAGE_PTE,
- .set = "pte",
- .clear = " ",
- }, {
- .mask = _PAGE_PRESENT,
- .val = _PAGE_PRESENT,
- .set = "present",
- .clear = " ",
- }, {
-#ifdef CONFIG_PPC_BOOK3S_64
- .mask = H_PAGE_HASHPTE,
- .val = H_PAGE_HASHPTE,
-#else
- .mask = _PAGE_HASHPTE,
- .val = _PAGE_HASHPTE,
-#endif
- .set = "hpte",
- .clear = " ",
- }, {
-#ifndef CONFIG_PPC_BOOK3S_64
- .mask = _PAGE_GUARDED,
- .val = _PAGE_GUARDED,
- .set = "guarded",
- .clear = " ",
- }, {
-#endif
- .mask = _PAGE_DIRTY,
- .val = _PAGE_DIRTY,
- .set = "dirty",
- .clear = " ",
- }, {
- .mask = _PAGE_ACCESSED,
- .val = _PAGE_ACCESSED,
- .set = "accessed",
- .clear = " ",
- }, {
-#ifndef CONFIG_PPC_BOOK3S_64
- .mask = _PAGE_WRITETHRU,
- .val = _PAGE_WRITETHRU,
- .set = "write through",
- .clear = " ",
- }, {
-#endif
-#ifndef CONFIG_PPC_BOOK3S_64
- .mask = _PAGE_NO_CACHE,
- .val = _PAGE_NO_CACHE,
- .set = "no cache",
- .clear = " ",
- }, {
-#else
- .mask = _PAGE_NON_IDEMPOTENT,
- .val = _PAGE_NON_IDEMPOTENT,
- .set = "non-idempotent",
- .clear = " ",
- }, {
- .mask = _PAGE_TOLERANT,
- .val = _PAGE_TOLERANT,
- .set = "tolerant",
- .clear = " ",
- }, {
-#endif
-#ifdef CONFIG_PPC_BOOK3S_64
- .mask = H_PAGE_BUSY,
- .val = H_PAGE_BUSY,
- .set = "busy",
- }, {
-#ifdef CONFIG_PPC_64K_PAGES
- .mask = H_PAGE_COMBO,
- .val = H_PAGE_COMBO,
- .set = "combo",
- }, {
- .mask = H_PAGE_4K_PFN,
- .val = H_PAGE_4K_PFN,
- .set = "4K_pfn",
- }, {
-#else /* CONFIG_PPC_64K_PAGES */
- .mask = H_PAGE_F_GIX,
- .val = H_PAGE_F_GIX,
- .set = "f_gix",
- .is_val = true,
- .shift = H_PAGE_F_GIX_SHIFT,
- }, {
- .mask = H_PAGE_F_SECOND,
- .val = H_PAGE_F_SECOND,
- .set = "f_second",
- }, {
-#endif /* CONFIG_PPC_64K_PAGES */
-#endif
- .mask = _PAGE_SPECIAL,
- .val = _PAGE_SPECIAL,
- .set = "special",
- }
-};
-
-struct pgtable_level {
- const struct flag_info *flag;
- size_t num;
- u64 mask;
-};
-
-static struct pgtable_level pg_level[] = {
- {
- }, { /* pgd */
- .flag = flag_array,
- .num = ARRAY_SIZE(flag_array),
- }, { /* pud */
- .flag = flag_array,
- .num = ARRAY_SIZE(flag_array),
- }, { /* pmd */
- .flag = flag_array,
- .num = ARRAY_SIZE(flag_array),
- }, { /* pte */
- .flag = flag_array,
- .num = ARRAY_SIZE(flag_array),
- },
-};
-
static void dump_flag_info(struct pg_state *st, const struct flag_info
*flag, u64 pte, int num)
{
unsigned int i;
unsigned long addr;
+ addr = st->start_address;
+
/*
* Traverse the linux pagetable structure and dump pages that are in
* the hash pagetable.
*/
- for (i = 0; i < PTRS_PER_PGD; i++, pgd++) {
- addr = KERN_VIRT_START + i * PGDIR_SIZE;
+ for (i = 0; i < PTRS_PER_PGD; i++, pgd++, addr += PGDIR_SIZE) {
if (!pgd_none(*pgd) && !pgd_huge(*pgd))
/* pgd exists */
walk_pud(st, pgd, addr);
{
struct pg_state st = {
.seq = m,
- .start_address = KERN_VIRT_START,
.marker = address_markers,
};
+
+ if (radix_enabled())
+ st.start_address = PAGE_OFFSET;
+ else
+ st.start_address = KERN_VIRT_START;
+
/* Traverse kernel page tables */
walk_pagetables(&st);
note_page(&st, 0, 0, 0);
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <linux/types.h>
+
+struct flag_info {
+ u64 mask;
+ u64 val;
+ const char *set;
+ const char *clear;
+ bool is_val;
+ int shift;
+};
+
+struct pgtable_level {
+ const struct flag_info *flag;
+ size_t num;
+ u64 mask;
+};
+
+extern struct pgtable_level pg_level[5];
tlbiel_hash_set_isa300(0, is, 0, 2, 1);
asm volatile("ptesync": : :"memory");
+
+ asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
}
void hash__tlbiel_all(unsigned int action)
tlbiel_all_isa206(POWER7_TLB_SETS, is);
else
WARN(1, "%s called on pre-POWER7 CPU\n", __func__);
-
- asm volatile(PPC_INVALIDATE_ERAT "; isync" : : :"memory");
}
static inline unsigned long ___tlbie(unsigned long vpn, int psize,
* 4k use hugepd format, so for hash set then to
* zero
*/
- __pmd_val_bits = 0;
- __pud_val_bits = 0;
- __pgd_val_bits = 0;
+ __pmd_val_bits = HASH_PMD_VAL_BITS;
+ __pud_val_bits = HASH_PUD_VAL_BITS;
+ __pgd_val_bits = HASH_PGD_VAL_BITS;
__kernel_virt_start = H_KERN_VIRT_START;
__kernel_virt_size = H_KERN_VIRT_SIZE;
if ((get_paca_psize(addr) != MMU_PAGE_4K) && (current->mm == mm)) {
copy_mm_to_paca(mm);
- slb_flush_and_rebolt();
+ slb_flush_and_restore_bolted();
}
}
#endif /* CONFIG_PPC_64K_PAGES */
if (user_region) {
if (psize != get_paca_psize(ea)) {
copy_mm_to_paca(mm);
- slb_flush_and_rebolt();
+ slb_flush_and_restore_bolted();
}
} else if (get_paca()->vmalloc_sllp !=
mmu_psize_defs[mmu_vmalloc_psize].sllp) {
#endif
void hash_preload(struct mm_struct *mm, unsigned long ea,
- unsigned long access, unsigned long trap)
+ bool is_exec, unsigned long trap)
{
int hugepage_shift;
unsigned long vsid;
pte_t *ptep;
unsigned long flags;
int rc, ssize, update_flags = 0;
+ unsigned long access = _PAGE_PRESENT | _PAGE_READ | (is_exec ? _PAGE_EXEC : 0);
BUG_ON(REGION_ID(ea) != USER_REGION_ID);
new_pmd |= _PAGE_DIRTY;
} while (!pmd_xchg(pmdp, __pmd(old_pmd), __pmd(new_pmd)));
+ /*
+ * Make sure this is thp or devmap entry
+ */
+ if (!(old_pmd & (H_PAGE_THP_HUGE | _PAGE_DEVMAP)))
+ return 0;
+
rflags = htab_convert_pte_flags(new_pmd);
#if 0
new_pte |= _PAGE_DIRTY;
} while(!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
+ /* Make sure this is a hugetlb entry */
+ if (old_pte & (H_PAGE_THP_HUGE | _PAGE_DEVMAP))
+ return 0;
+
rflags = htab_convert_pte_flags(new_pte);
if (unlikely(mmu_psize == MMU_PAGE_16G))
offset = PTRS_PER_PUD;
#include <linux/moduleparam.h>
#include <linux/swap.h>
#include <linux/swapops.h>
+#include <linux/kmemleak.h>
#include <asm/pgtable.h>
#include <asm/pgalloc.h>
#include <asm/tlb.h>
break;
else {
#ifdef CONFIG_PPC_BOOK3S_64
- *hpdp = __hugepd(__pa(new) |
+ *hpdp = __hugepd(__pa(new) | HUGEPD_VAL_BITS |
(shift_to_mmu_psize(pshift) << 2));
#elif defined(CONFIG_PPC_8xx)
*hpdp = __hugepd(__pa(new) | _PMD_USER |
for (i = i - 1 ; i >= 0; i--, hpdp--)
*hpdp = __hugepd(0);
kmem_cache_free(cachep, new);
+ } else {
+ kmemleak_ignore(new);
}
spin_unlock(ptl);
return 0;
ret_pte = (pte_t *) pmdp;
goto out;
}
-
- if (pmd_huge(pmd)) {
+ /*
+ * pmd_large check below will handle the swap pmd pte
+ * we need to do both the check because they are config
+ * dependent.
+ */
+ if (pmd_huge(pmd) || pmd_large(pmd)) {
ret_pte = (pte_t *) pmdp;
goto out;
} else if (is_hugepd(__hugepd(pmd_val(pmd))))
unsigned long end = __fix_to_virt(FIX_HOLE);
for (; v < end; v += PAGE_SIZE)
- map_kernel_page(v, 0, 0); /* XXX gross */
+ map_kernel_page(v, 0, __pgprot(0)); /* XXX gross */
#endif
#ifdef CONFIG_HIGHMEM
- map_kernel_page(PKMAP_BASE, 0, 0); /* XXX gross */
+ map_kernel_page(PKMAP_BASE, 0, __pgprot(0)); /* XXX gross */
pkmap_page_table = virt_to_kpte(PKMAP_BASE);
kmap_pte = virt_to_kpte(__fix_to_virt(FIX_KMAP_BEGIN));
* We don't need to worry about _PAGE_PRESENT here because we are
* called with either mm->page_table_lock held or ptl lock held
*/
- unsigned long access, trap;
+ unsigned long trap;
+ bool is_exec;
if (radix_enabled()) {
prefetch((void *)address);
trap = current->thread.regs ? TRAP(current->thread.regs) : 0UL;
switch (trap) {
case 0x300:
- access = 0UL;
+ is_exec = false;
break;
case 0x400:
- access = _PAGE_EXEC;
+ is_exec = true;
break;
default:
return;
}
- hash_preload(vma->vm_mm, address, access, trap);
+ hash_preload(vma->vm_mm, address, is_exec, trap);
#endif /* CONFIG_PPC_STD_MMU */
#if (defined(CONFIG_PPC_BOOK3E_64) || defined(CONFIG_PPC_FSL_BOOK3E)) \
&& defined(CONFIG_HUGETLB_PAGE)
}
EXPORT_SYMBOL_GPL(hash__alloc_context_id);
+void slb_setup_new_exec(void);
+
static int hash__init_new_context(struct mm_struct *mm)
{
int index;
return index;
}
+void hash__setup_new_exec(void)
+{
+ slice_setup_new_exec();
+
+ slb_setup_new_exec();
+}
+
static int radix__init_new_context(struct mm_struct *mm)
{
unsigned long rts_field;
#include <asm/mmu.h>
#ifdef CONFIG_PPC_MMU_NOHASH
+#include <asm/trace.h>
/*
* On 40x and 8xx, we directly inline tlbia and tlbivax
static inline void _tlbil_all(void)
{
asm volatile ("sync; tlbia; isync" : : : "memory");
+ trace_tlbia(MMU_NO_CONTEXT);
}
static inline void _tlbil_pid(unsigned int pid)
{
asm volatile ("sync; tlbia; isync" : : : "memory");
+ trace_tlbia(pid);
}
#define _tlbil_pid_noind(pid) _tlbil_pid(pid)
unsigned int tsize, unsigned int ind)
{
asm volatile ("tlbie %0; sync" : : "r" (address) : "memory");
+ trace_tlbie(0, 0, address, pid, 0, 0, 0);
}
#elif defined(CONFIG_PPC_BOOK3E)
extern void _tlbil_va(unsigned long address, unsigned int pid,
#else /* CONFIG_PPC_MMU_NOHASH */
extern void hash_preload(struct mm_struct *mm, unsigned long ea,
- unsigned long access, unsigned long trap);
+ bool is_exec, unsigned long trap);
extern void _tlbie(unsigned long address);
}
}
+ pr_info("Starting topology update%s%s\n",
+ (prrn_enabled ? " prrn_enabled" : ""),
+ (vphn_enabled ? " vphn_enabled" : ""));
+
return rc;
}
rc = del_timer_sync(&topology_timer);
}
+ pr_info("Stopping topology update\n");
+
return rc;
}
* thus must have the low bits clear
*/
for (i = 0; i < page_size; i += PAGE_SIZE)
- BUG_ON(map_kernel_page(start + i, phys, flags));
+ BUG_ON(map_kernel_page(start + i, phys, __pgprot(flags)));
return 0;
}
* map_kernel_page adds an entry to the ioremap page table
* and adds an entry to the HPT, possibly bolting it
*/
-int map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags)
+int map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
{
pgd_t *pgdp;
pud_t *pudp;
ptep = pte_alloc_kernel(pmdp, ea);
if (!ptep)
return -ENOMEM;
- set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
- __pgprot(flags)));
} else {
pgdp = pgd_offset_k(ea);
#ifndef __PAGETABLE_PUD_FOLDED
pmd_populate_kernel(&init_mm, pmdp, ptep);
}
ptep = pte_offset_kernel(pmdp, ea);
- set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
- __pgprot(flags)));
}
+ set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot));
smp_wmb();
return 0;
pmd_t *pmdp, pmd_t pmd)
{
#ifdef CONFIG_DEBUG_VM
- WARN_ON(pte_present(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
+ /*
+ * Make sure hardware valid bit is not set. We don't do
+ * tlb flush for this update.
+ */
+
+ WARN_ON(pte_hw_valid(pmd_pte(*pmdp)) && !pte_protnone(pmd_pte(*pmdp)));
assert_spin_locked(pmd_lockptr(mm, pmdp));
- WARN_ON(!(pmd_trans_huge(pmd) || pmd_devmap(pmd)));
+ WARN_ON(!(pmd_large(pmd) || pmd_devmap(pmd)));
#endif
trace_hugepage_set_pmd(addr, pmd_val(pmd));
return set_pte_at(mm, addr, pmdp_ptep(pmdp), pmd_pte(pmd));
{
unsigned long old_pmd;
- old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, 0);
+ old_pmd = pmd_hugepage_update(vma->vm_mm, address, pmdp, _PAGE_PRESENT, _PAGE_INVALID);
flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
/*
* This ensures that generic code that rely on IRQ disabling
* map_kernel_page adds an entry to the ioremap page table
* and adds an entry to the HPT, possibly bolting it
*/
-int hash__map_kernel_page(unsigned long ea, unsigned long pa, unsigned long flags)
+int hash__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t prot)
{
pgd_t *pgdp;
pud_t *pudp;
ptep = pte_alloc_kernel(pmdp, ea);
if (!ptep)
return -ENOMEM;
- set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT,
- __pgprot(flags)));
+ set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, prot));
} else {
/*
* If the mm subsystem is not fully up, we cannot create a
* entry in the hardware page table.
*
*/
- if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, flags,
+ if (htab_bolt_mapping(ea, ea + PAGE_SIZE, pa, pgprot_val(prot),
mmu_io_psize, mmu_kernel_ssize)) {
printk(KERN_ERR "Failed to do bolted mapping IO "
"memory at %016lx !\n", pa);
}
#endif /* CONFIG_STRICT_KERNEL_RWX */
-static inline void __meminit print_mapping(unsigned long start,
- unsigned long end,
- unsigned long size)
+static inline void __meminit
+print_mapping(unsigned long start, unsigned long end, unsigned long size, bool exec)
{
char buf[10];
string_get_size(size, 1, STRING_UNITS_2, buf, sizeof(buf));
- pr_info("Mapped 0x%016lx-0x%016lx with %s pages\n", start, end, buf);
+ pr_info("Mapped 0x%016lx-0x%016lx with %s pages%s\n", start, end, buf,
+ exec ? " (exec)" : "");
+}
+
+static unsigned long next_boundary(unsigned long addr, unsigned long end)
+{
+#ifdef CONFIG_STRICT_KERNEL_RWX
+ if (addr < __pa_symbol(__init_begin))
+ return __pa_symbol(__init_begin);
+#endif
+ return end;
}
static int __meminit create_physical_mapping(unsigned long start,
int nid)
{
unsigned long vaddr, addr, mapping_size = 0;
+ bool prev_exec, exec = false;
pgprot_t prot;
- unsigned long max_mapping_size;
-#ifdef CONFIG_STRICT_KERNEL_RWX
- int split_text_mapping = 1;
-#else
- int split_text_mapping = 0;
-#endif
int psize;
start = _ALIGN_UP(start, PAGE_SIZE);
unsigned long gap, previous_size;
int rc;
- gap = end - addr;
+ gap = next_boundary(addr, end) - addr;
previous_size = mapping_size;
- max_mapping_size = PUD_SIZE;
+ prev_exec = exec;
-retry:
if (IS_ALIGNED(addr, PUD_SIZE) && gap >= PUD_SIZE &&
- mmu_psize_defs[MMU_PAGE_1G].shift &&
- PUD_SIZE <= max_mapping_size) {
+ mmu_psize_defs[MMU_PAGE_1G].shift) {
mapping_size = PUD_SIZE;
psize = MMU_PAGE_1G;
} else if (IS_ALIGNED(addr, PMD_SIZE) && gap >= PMD_SIZE &&
psize = mmu_virtual_psize;
}
- if (split_text_mapping && (mapping_size == PUD_SIZE) &&
- (addr <= __pa_symbol(__init_begin)) &&
- (addr + mapping_size) >= __pa_symbol(_stext)) {
- max_mapping_size = PMD_SIZE;
- goto retry;
- }
-
- if (split_text_mapping && (mapping_size == PMD_SIZE) &&
- (addr <= __pa_symbol(__init_begin)) &&
- (addr + mapping_size) >= __pa_symbol(_stext)) {
- mapping_size = PAGE_SIZE;
- psize = mmu_virtual_psize;
- }
-
- if (mapping_size != previous_size) {
- print_mapping(start, addr, previous_size);
- start = addr;
- }
-
vaddr = (unsigned long)__va(addr);
if (overlaps_kernel_text(vaddr, vaddr + mapping_size) ||
- overlaps_interrupt_vector_text(vaddr, vaddr + mapping_size))
+ overlaps_interrupt_vector_text(vaddr, vaddr + mapping_size)) {
prot = PAGE_KERNEL_X;
- else
+ exec = true;
+ } else {
prot = PAGE_KERNEL;
+ exec = false;
+ }
+
+ if (mapping_size != previous_size || exec != prev_exec) {
+ print_mapping(start, addr, previous_size, prev_exec);
+ start = addr;
+ }
rc = __map_kernel_page(vaddr, addr, prot, mapping_size, nid, start, end);
if (rc)
update_page_count(psize, 1);
}
- print_mapping(start, addr, mapping_size);
+ print_mapping(start, addr, mapping_size, exec);
return 0;
}
static inline int pte_looks_normal(pte_t pte)
{
-#if defined(CONFIG_PPC_BOOK3S_64)
- if ((pte_val(pte) & (_PAGE_PRESENT | _PAGE_SPECIAL)) == _PAGE_PRESENT) {
+ if (pte_present(pte) && !pte_special(pte)) {
if (pte_ci(pte))
return 0;
if (pte_user(pte))
return 1;
}
return 0;
-#else
- return (pte_val(pte) &
- (_PAGE_PRESENT | _PAGE_SPECIAL | _PAGE_NO_CACHE | _PAGE_USER |
- _PAGE_PRIVILEGED)) ==
- (_PAGE_PRESENT | _PAGE_USER);
-#endif
}
static struct page *maybe_pte_to_page(pte_t pte)
return page;
}
-#if defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0
+#ifdef CONFIG_PPC_BOOK3S
/* Server-style MMU handles coherency when hashing if HW exec permission
* is supposed per page (currently 64-bit only). If not, then, we always
return pte;
}
-#else /* defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0 */
+#else /* CONFIG_PPC_BOOK3S */
/* Embedded type MMU with HW exec support. This is a bit more complicated
* as we don't have two bits to spare for _PAGE_EXEC and _PAGE_HWEXEC so
struct page *pg;
/* No exec permission in the first place, move on */
- if (!(pte_val(pte) & _PAGE_EXEC) || !pte_looks_normal(pte))
+ if (!pte_exec(pte) || !pte_looks_normal(pte))
return pte;
/* If you set _PAGE_EXEC on weird pages you're on your own */
}
/* Else, we filter out _PAGE_EXEC */
- return __pte(pte_val(pte) & ~_PAGE_EXEC);
+ return pte_exprotect(pte);
}
static pte_t set_access_flags_filter(pte_t pte, struct vm_area_struct *vma,
* if necessary. Also if _PAGE_EXEC is already set, same deal,
* we just bail out
*/
- if (dirty || (pte_val(pte) & _PAGE_EXEC) || !is_exec_fault())
+ if (dirty || pte_exec(pte) || !is_exec_fault())
return pte;
#ifdef CONFIG_DEBUG_VM
set_bit(PG_arch_1, &pg->flags);
bail:
- return __pte(pte_val(pte) | _PAGE_EXEC);
+ return pte_mkexec(pte);
}
-#endif /* !(defined(CONFIG_PPC_STD_MMU) || _PAGE_EXEC == 0) */
+#endif /* CONFIG_PPC_BOOK3S */
/*
* set_pte stores a linux PTE into the linux page table.
pte_t pte)
{
/*
- * When handling numa faults, we already have the pte marked
- * _PAGE_PRESENT, but we can be sure that it is not in hpte.
- * Hence we can use set_pte_at for them.
+ * Make sure hardware valid bit is not set. We don't do
+ * tlb flush for this update.
*/
- VM_WARN_ON(pte_present(*ptep) && !pte_protnone(*ptep));
+ VM_WARN_ON(pte_hw_valid(*ptep) && !pte_protnone(*ptep));
/* Add the pte bit when trying to set a pte */
- pte = __pte(pte_val(pte) | _PAGE_PTE);
+ pte = pte_mkpte(pte);
/* Note: mm->context.id might not yet have been assigned as
* this context might not have been activated yet when this
void __iomem *
ioremap(phys_addr_t addr, unsigned long size)
{
- return __ioremap_caller(addr, size, _PAGE_NO_CACHE | _PAGE_GUARDED,
- __builtin_return_address(0));
+ pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
+
+ return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
}
EXPORT_SYMBOL(ioremap);
void __iomem *
ioremap_wc(phys_addr_t addr, unsigned long size)
{
- return __ioremap_caller(addr, size, _PAGE_NO_CACHE,
- __builtin_return_address(0));
+ pgprot_t prot = pgprot_noncached_wc(PAGE_KERNEL);
+
+ return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
}
EXPORT_SYMBOL(ioremap_wc);
+void __iomem *
+ioremap_wt(phys_addr_t addr, unsigned long size)
+{
+ pgprot_t prot = pgprot_cached_wthru(PAGE_KERNEL);
+
+ return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_wt);
+
+void __iomem *
+ioremap_coherent(phys_addr_t addr, unsigned long size)
+{
+ pgprot_t prot = pgprot_cached(PAGE_KERNEL);
+
+ return __ioremap_caller(addr, size, prot, __builtin_return_address(0));
+}
+EXPORT_SYMBOL(ioremap_coherent);
+
void __iomem *
ioremap_prot(phys_addr_t addr, unsigned long size, unsigned long flags)
{
+ pte_t pte = __pte(flags);
+
/* writeable implies dirty for kernel addresses */
- if ((flags & (_PAGE_RW | _PAGE_RO)) != _PAGE_RO)
- flags |= _PAGE_DIRTY | _PAGE_HWWRITE;
+ if (pte_write(pte))
+ pte = pte_mkdirty(pte);
/* we don't want to let _PAGE_USER and _PAGE_EXEC leak out */
- flags &= ~(_PAGE_USER | _PAGE_EXEC);
- flags |= _PAGE_PRIVILEGED;
+ pte = pte_exprotect(pte);
+ pte = pte_mkprivileged(pte);
- return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
+ return __ioremap_caller(addr, size, pte_pgprot(pte), __builtin_return_address(0));
}
EXPORT_SYMBOL(ioremap_prot);
void __iomem *
__ioremap(phys_addr_t addr, unsigned long size, unsigned long flags)
{
- return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
+ return __ioremap_caller(addr, size, __pgprot(flags), __builtin_return_address(0));
}
void __iomem *
-__ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags,
- void *caller)
+__ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *caller)
{
unsigned long v, i;
phys_addr_t p;
int err;
- /* Make sure we have the base flags */
- if ((flags & _PAGE_PRESENT) == 0)
- flags |= pgprot_val(PAGE_KERNEL);
-
- /* Non-cacheable page cannot be coherent */
- if (flags & _PAGE_NO_CACHE)
- flags &= ~_PAGE_COHERENT;
-
/*
* Choose an address to map it to.
* Once the vmalloc system is running, we use it.
err = 0;
for (i = 0; i < size && err == 0; i += PAGE_SIZE)
- err = map_kernel_page(v+i, p+i, flags);
+ err = map_kernel_page(v + i, p + i, prot);
if (err) {
if (slab_is_available())
vunmap((void *)v);
}
EXPORT_SYMBOL(iounmap);
-int map_kernel_page(unsigned long va, phys_addr_t pa, int flags)
+int map_kernel_page(unsigned long va, phys_addr_t pa, pgprot_t prot)
{
pmd_t *pd;
pte_t *pg;
/* The PTE should never be already set nor present in the
* hash table
*/
- BUG_ON((pte_val(*pg) & (_PAGE_PRESENT | _PAGE_HASHPTE)) &&
- flags);
- set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT,
- __pgprot(flags)));
+ BUG_ON((pte_present(*pg) | pte_hashpte(*pg)) && pgprot_val(prot));
+ set_pte_at(&init_mm, va, pg, pfn_pte(pa >> PAGE_SHIFT, prot));
}
smp_wmb();
return err;
*/
static void __init __mapin_ram_chunk(unsigned long offset, unsigned long top)
{
- unsigned long v, s, f;
+ unsigned long v, s;
phys_addr_t p;
int ktext;
for (; s < top; s += PAGE_SIZE) {
ktext = ((char *)v >= _stext && (char *)v < etext) ||
((char *)v >= _sinittext && (char *)v < _einittext);
- f = ktext ? pgprot_val(PAGE_KERNEL_TEXT) : pgprot_val(PAGE_KERNEL);
- map_kernel_page(v, p, f);
+ map_kernel_page(v, p, ktext ? PAGE_KERNEL_TEXT : PAGE_KERNEL);
#ifdef CONFIG_PPC_STD_MMU_32
if (ktext)
- hash_preload(&init_mm, v, 0, 0x300);
+ hash_preload(&init_mm, v, false, 0x300);
#endif
v += PAGE_SIZE;
p += PAGE_SIZE;
* __ioremap_at - Low level function to establish the page tables
* for an IO mapping
*/
-void __iomem * __ioremap_at(phys_addr_t pa, void *ea, unsigned long size,
- unsigned long flags)
+void __iomem *__ioremap_at(phys_addr_t pa, void *ea, unsigned long size, pgprot_t prot)
{
unsigned long i;
- /* Make sure we have the base flags */
- if ((flags & _PAGE_PRESENT) == 0)
- flags |= pgprot_val(PAGE_KERNEL);
-
/* We don't support the 4K PFN hack with ioremap */
- if (flags & H_PAGE_4K_PFN)
+ if (pgprot_val(prot) & H_PAGE_4K_PFN)
return NULL;
WARN_ON(pa & ~PAGE_MASK);
WARN_ON(size & ~PAGE_MASK);
for (i = 0; i < size; i += PAGE_SIZE)
- if (map_kernel_page((unsigned long)ea+i, pa+i, flags))
+ if (map_kernel_page((unsigned long)ea + i, pa + i, prot))
return NULL;
return (void __iomem *)ea;
}
void __iomem * __ioremap_caller(phys_addr_t addr, unsigned long size,
- unsigned long flags, void *caller)
+ pgprot_t prot, void *caller)
{
phys_addr_t paligned;
void __iomem *ret;
return NULL;
area->phys_addr = paligned;
- ret = __ioremap_at(paligned, area->addr, size, flags);
+ ret = __ioremap_at(paligned, area->addr, size, prot);
if (!ret)
vunmap(area->addr);
} else {
- ret = __ioremap_at(paligned, (void *)ioremap_bot, size, flags);
+ ret = __ioremap_at(paligned, (void *)ioremap_bot, size, prot);
if (ret)
ioremap_bot += size;
}
void __iomem * __ioremap(phys_addr_t addr, unsigned long size,
unsigned long flags)
{
- return __ioremap_caller(addr, size, flags, __builtin_return_address(0));
+ return __ioremap_caller(addr, size, __pgprot(flags), __builtin_return_address(0));
}
void __iomem * ioremap(phys_addr_t addr, unsigned long size)
{
- unsigned long flags = pgprot_val(pgprot_noncached(__pgprot(0)));
+ pgprot_t prot = pgprot_noncached(PAGE_KERNEL);
void *caller = __builtin_return_address(0);
if (ppc_md.ioremap)
- return ppc_md.ioremap(addr, size, flags, caller);
- return __ioremap_caller(addr, size, flags, caller);
+ return ppc_md.ioremap(addr, size, prot, caller);
+ return __ioremap_caller(addr, size, prot, caller);
}
void __iomem * ioremap_wc(phys_addr_t addr, unsigned long size)
{
- unsigned long flags = pgprot_val(pgprot_noncached_wc(__pgprot(0)));
+ pgprot_t prot = pgprot_noncached_wc(PAGE_KERNEL);
+ void *caller = __builtin_return_address(0);
+
+ if (ppc_md.ioremap)
+ return ppc_md.ioremap(addr, size, prot, caller);
+ return __ioremap_caller(addr, size, prot, caller);
+}
+
+void __iomem *ioremap_coherent(phys_addr_t addr, unsigned long size)
+{
+ pgprot_t prot = pgprot_cached(PAGE_KERNEL);
void *caller = __builtin_return_address(0);
if (ppc_md.ioremap)
- return ppc_md.ioremap(addr, size, flags, caller);
- return __ioremap_caller(addr, size, flags, caller);
+ return ppc_md.ioremap(addr, size, prot, caller);
+ return __ioremap_caller(addr, size, prot, caller);
}
void __iomem * ioremap_prot(phys_addr_t addr, unsigned long size,
unsigned long flags)
{
+ pte_t pte = __pte(flags);
void *caller = __builtin_return_address(0);
/* writeable implies dirty for kernel addresses */
- if (flags & _PAGE_WRITE)
- flags |= _PAGE_DIRTY;
+ if (pte_write(pte))
+ pte = pte_mkdirty(pte);
/* we don't want to let _PAGE_EXEC leak out */
- flags &= ~_PAGE_EXEC;
+ pte = pte_exprotect(pte);
/*
* Force kernel mapping.
*/
- flags &= ~_PAGE_USER;
- flags |= _PAGE_PRIVILEGED;
+ pte = pte_mkprivileged(pte);
if (ppc_md.ioremap)
- return ppc_md.ioremap(addr, size, flags, caller);
- return __ioremap_caller(addr, size, flags, caller);
+ return ppc_md.ioremap(addr, size, pte_pgprot(pte), caller);
+ return __ioremap_caller(addr, size, pte_pgprot(pte), caller);
}
*/
struct page *pmd_page(pmd_t pmd)
{
- if (pmd_trans_huge(pmd) || pmd_huge(pmd) || pmd_devmap(pmd))
+ if (pmd_large(pmd) || pmd_huge(pmd) || pmd_devmap(pmd))
return pte_page(pmd_pte(pmd));
return virt_to_page(pmd_page_vaddr(pmd));
}
* Preload a translation in the hash table
*/
void hash_preload(struct mm_struct *mm, unsigned long ea,
- unsigned long access, unsigned long trap)
+ bool is_exec, unsigned long trap)
{
pmd_t *pmd;
* 2 of the License, or (at your option) any later version.
*/
+#include <asm/asm-prototypes.h>
#include <asm/pgtable.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
enum slb_index {
LINEAR_INDEX = 0, /* Kernel linear map (0xc000000000000000) */
- VMALLOC_INDEX = 1, /* Kernel virtual map (0xd000000000000000) */
- KSTACK_INDEX = 2, /* Kernel stack map */
+ KSTACK_INDEX = 1, /* Kernel stack map */
};
-extern void slb_allocate(unsigned long ea);
+static long slb_allocate_user(struct mm_struct *mm, unsigned long ea);
#define slb_esid_mask(ssize) \
(((ssize) == MMU_SEGSIZE_256M)? ESID_MASK: ESID_MASK_1T)
return (ea & slb_esid_mask(ssize)) | SLB_ESID_V | index;
}
-static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
+static inline unsigned long __mk_vsid_data(unsigned long vsid, int ssize,
unsigned long flags)
{
- return (get_kernel_vsid(ea, ssize) << slb_vsid_shift(ssize)) | flags |
+ return (vsid << slb_vsid_shift(ssize)) | flags |
((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT);
}
+static inline unsigned long mk_vsid_data(unsigned long ea, int ssize,
+ unsigned long flags)
+{
+ return __mk_vsid_data(get_kernel_vsid(ea, ssize), ssize, flags);
+}
+
+static void assert_slb_exists(unsigned long ea)
+{
+#ifdef CONFIG_DEBUG_VM
+ unsigned long tmp;
+
+ WARN_ON_ONCE(mfmsr() & MSR_EE);
+
+ asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0");
+ WARN_ON(tmp == 0);
+#endif
+}
+
+static void assert_slb_notexists(unsigned long ea)
+{
+#ifdef CONFIG_DEBUG_VM
+ unsigned long tmp;
+
+ WARN_ON_ONCE(mfmsr() & MSR_EE);
+
+ asm volatile("slbfee. %0, %1" : "=r"(tmp) : "r"(ea) : "cr0");
+ WARN_ON(tmp != 0);
+#endif
+}
+
static inline void slb_shadow_update(unsigned long ea, int ssize,
unsigned long flags,
enum slb_index index)
*/
slb_shadow_update(ea, ssize, flags, index);
+ assert_slb_notexists(ea);
asm volatile("slbmte %0,%1" :
: "r" (mk_vsid_data(ea, ssize, flags)),
"r" (mk_esid_data(ea, ssize, index))
: "r" (be64_to_cpu(p->save_area[index].vsid)),
"r" (be64_to_cpu(p->save_area[index].esid)));
}
+
+ assert_slb_exists(local_paca->kstack);
}
/*
* Insert the bolted entries into an empty SLB.
- * This is not the same as rebolt because the bolted segments are not
- * changed, just loaded from the shadow area.
*/
void slb_restore_bolted_realmode(void)
{
__slb_restore_bolted_realmode();
get_paca()->slb_cache_ptr = 0;
+
+ get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
+ get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
}
/*
*/
void slb_flush_all_realmode(void)
{
- /*
- * This flushes all SLB entries including 0, so it must be realmode.
- */
asm volatile("slbmte %0,%0; slbia" : : "r" (0));
}
-static void __slb_flush_and_rebolt(void)
+/*
+ * This flushes non-bolted entries, it can be run in virtual mode. Must
+ * be called with interrupts disabled.
+ */
+void slb_flush_and_restore_bolted(void)
{
- /* If you change this make sure you change SLB_NUM_BOLTED
- * and PR KVM appropriately too. */
- unsigned long linear_llp, vmalloc_llp, lflags, vflags;
- unsigned long ksp_esid_data, ksp_vsid_data;
+ struct slb_shadow *p = get_slb_shadow();
- linear_llp = mmu_psize_defs[mmu_linear_psize].sllp;
- vmalloc_llp = mmu_psize_defs[mmu_vmalloc_psize].sllp;
- lflags = SLB_VSID_KERNEL | linear_llp;
- vflags = SLB_VSID_KERNEL | vmalloc_llp;
+ BUILD_BUG_ON(SLB_NUM_BOLTED != 2);
- ksp_esid_data = mk_esid_data(get_paca()->kstack, mmu_kernel_ssize, KSTACK_INDEX);
- if ((ksp_esid_data & ~0xfffffffUL) <= PAGE_OFFSET) {
- ksp_esid_data &= ~SLB_ESID_V;
- ksp_vsid_data = 0;
- slb_shadow_clear(KSTACK_INDEX);
- } else {
- /* Update stack entry; others don't change */
- slb_shadow_update(get_paca()->kstack, mmu_kernel_ssize, lflags, KSTACK_INDEX);
- ksp_vsid_data =
- be64_to_cpu(get_slb_shadow()->save_area[KSTACK_INDEX].vsid);
- }
+ WARN_ON(!irqs_disabled());
+
+ /*
+ * We can't take a PMU exception in the following code, so hard
+ * disable interrupts.
+ */
+ hard_irq_disable();
- /* We need to do this all in asm, so we're sure we don't touch
- * the stack between the slbia and rebolting it. */
asm volatile("isync\n"
"slbia\n"
- /* Slot 1 - first VMALLOC segment */
- "slbmte %0,%1\n"
- /* Slot 2 - kernel stack */
- "slbmte %2,%3\n"
- "isync"
- :: "r"(mk_vsid_data(VMALLOC_START, mmu_kernel_ssize, vflags)),
- "r"(mk_esid_data(VMALLOC_START, mmu_kernel_ssize, VMALLOC_INDEX)),
- "r"(ksp_vsid_data),
- "r"(ksp_esid_data)
+ "slbmte %0, %1\n"
+ "isync\n"
+ :: "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].vsid)),
+ "r" (be64_to_cpu(p->save_area[KSTACK_INDEX].esid))
: "memory");
+ assert_slb_exists(get_paca()->kstack);
+
+ get_paca()->slb_cache_ptr = 0;
+
+ get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
+ get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
}
-void slb_flush_and_rebolt(void)
+void slb_save_contents(struct slb_entry *slb_ptr)
{
+ int i;
+ unsigned long e, v;
- WARN_ON(!irqs_disabled());
+ /* Save slb_cache_ptr value. */
+ get_paca()->slb_save_cache_ptr = get_paca()->slb_cache_ptr;
+
+ if (!slb_ptr)
+ return;
+
+ for (i = 0; i < mmu_slb_size; i++) {
+ asm volatile("slbmfee %0,%1" : "=r" (e) : "r" (i));
+ asm volatile("slbmfev %0,%1" : "=r" (v) : "r" (i));
+ slb_ptr->esid = e;
+ slb_ptr->vsid = v;
+ slb_ptr++;
+ }
+}
+
+void slb_dump_contents(struct slb_entry *slb_ptr)
+{
+ int i, n;
+ unsigned long e, v;
+ unsigned long llp;
+
+ if (!slb_ptr)
+ return;
+
+ pr_err("SLB contents of cpu 0x%x\n", smp_processor_id());
+ pr_err("Last SLB entry inserted at slot %d\n", get_paca()->stab_rr);
+
+ for (i = 0; i < mmu_slb_size; i++) {
+ e = slb_ptr->esid;
+ v = slb_ptr->vsid;
+ slb_ptr++;
+
+ if (!e && !v)
+ continue;
+
+ pr_err("%02d %016lx %016lx\n", i, e, v);
+
+ if (!(e & SLB_ESID_V)) {
+ pr_err("\n");
+ continue;
+ }
+ llp = v & SLB_VSID_LLP;
+ if (v & SLB_VSID_B_1T) {
+ pr_err(" 1T ESID=%9lx VSID=%13lx LLP:%3lx\n",
+ GET_ESID_1T(e),
+ (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT_1T, llp);
+ } else {
+ pr_err(" 256M ESID=%9lx VSID=%13lx LLP:%3lx\n",
+ GET_ESID(e),
+ (v & ~SLB_VSID_B) >> SLB_VSID_SHIFT, llp);
+ }
+ }
+ pr_err("----------------------------------\n");
+
+ /* Dump slb cache entires as well. */
+ pr_err("SLB cache ptr value = %d\n", get_paca()->slb_save_cache_ptr);
+ pr_err("Valid SLB cache entries:\n");
+ n = min_t(int, get_paca()->slb_save_cache_ptr, SLB_CACHE_ENTRIES);
+ for (i = 0; i < n; i++)
+ pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]);
+ pr_err("Rest of SLB cache entries:\n");
+ for (i = n; i < SLB_CACHE_ENTRIES; i++)
+ pr_err("%02d EA[0-35]=%9x\n", i, get_paca()->slb_cache[i]);
+}
+void slb_vmalloc_update(void)
+{
/*
- * We can't take a PMU exception in the following code, so hard
- * disable interrupts.
+ * vmalloc is not bolted, so just have to flush non-bolted.
*/
- hard_irq_disable();
+ slb_flush_and_restore_bolted();
+}
- __slb_flush_and_rebolt();
- get_paca()->slb_cache_ptr = 0;
+static bool preload_hit(struct thread_info *ti, unsigned long esid)
+{
+ unsigned char i;
+
+ for (i = 0; i < ti->slb_preload_nr; i++) {
+ unsigned char idx;
+
+ idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
+ if (esid == ti->slb_preload_esid[idx])
+ return true;
+ }
+ return false;
}
-void slb_vmalloc_update(void)
+static bool preload_add(struct thread_info *ti, unsigned long ea)
{
- unsigned long vflags;
+ unsigned char idx;
+ unsigned long esid;
+
+ if (mmu_has_feature(MMU_FTR_1T_SEGMENT)) {
+ /* EAs are stored >> 28 so 256MB segments don't need clearing */
+ if (ea & ESID_MASK_1T)
+ ea &= ESID_MASK_1T;
+ }
+
+ esid = ea >> SID_SHIFT;
- vflags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmalloc_psize].sllp;
- slb_shadow_update(VMALLOC_START, mmu_kernel_ssize, vflags, VMALLOC_INDEX);
- slb_flush_and_rebolt();
+ if (preload_hit(ti, esid))
+ return false;
+
+ idx = (ti->slb_preload_tail + ti->slb_preload_nr) % SLB_PRELOAD_NR;
+ ti->slb_preload_esid[idx] = esid;
+ if (ti->slb_preload_nr == SLB_PRELOAD_NR)
+ ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
+ else
+ ti->slb_preload_nr++;
+
+ return true;
}
-/* Helper function to compare esids. There are four cases to handle.
- * 1. The system is not 1T segment size capable. Use the GET_ESID compare.
- * 2. The system is 1T capable, both addresses are < 1T, use the GET_ESID compare.
- * 3. The system is 1T capable, only one of the two addresses is > 1T. This is not a match.
- * 4. The system is 1T capable, both addresses are > 1T, use the GET_ESID_1T macro to compare.
- */
-static inline int esids_match(unsigned long addr1, unsigned long addr2)
+static void preload_age(struct thread_info *ti)
{
- int esid_1t_count;
+ if (!ti->slb_preload_nr)
+ return;
+ ti->slb_preload_nr--;
+ ti->slb_preload_tail = (ti->slb_preload_tail + 1) % SLB_PRELOAD_NR;
+}
- /* System is not 1T segment size capable. */
- if (!mmu_has_feature(MMU_FTR_1T_SEGMENT))
- return (GET_ESID(addr1) == GET_ESID(addr2));
+void slb_setup_new_exec(void)
+{
+ struct thread_info *ti = current_thread_info();
+ struct mm_struct *mm = current->mm;
+ unsigned long exec = 0x10000000;
- esid_1t_count = (((addr1 >> SID_SHIFT_1T) != 0) +
- ((addr2 >> SID_SHIFT_1T) != 0));
+ WARN_ON(irqs_disabled());
- /* both addresses are < 1T */
- if (esid_1t_count == 0)
- return (GET_ESID(addr1) == GET_ESID(addr2));
+ /*
+ * preload cache can only be used to determine whether a SLB
+ * entry exists if it does not start to overflow.
+ */
+ if (ti->slb_preload_nr + 2 > SLB_PRELOAD_NR)
+ return;
- /* One address < 1T, the other > 1T. Not a match */
- if (esid_1t_count == 1)
- return 0;
+ hard_irq_disable();
- /* Both addresses are > 1T. */
- return (GET_ESID_1T(addr1) == GET_ESID_1T(addr2));
+ /*
+ * We have no good place to clear the slb preload cache on exec,
+ * flush_thread is about the earliest arch hook but that happens
+ * after we switch to the mm and have aleady preloaded the SLBEs.
+ *
+ * For the most part that's probably okay to use entries from the
+ * previous exec, they will age out if unused. It may turn out to
+ * be an advantage to clear the cache before switching to it,
+ * however.
+ */
+
+ /*
+ * preload some userspace segments into the SLB.
+ * Almost all 32 and 64bit PowerPC executables are linked at
+ * 0x10000000 so it makes sense to preload this segment.
+ */
+ if (!is_kernel_addr(exec)) {
+ if (preload_add(ti, exec))
+ slb_allocate_user(mm, exec);
+ }
+
+ /* Libraries and mmaps. */
+ if (!is_kernel_addr(mm->mmap_base)) {
+ if (preload_add(ti, mm->mmap_base))
+ slb_allocate_user(mm, mm->mmap_base);
+ }
+
+ /* see switch_slb */
+ asm volatile("isync" : : : "memory");
+
+ local_irq_enable();
}
+void preload_new_slb_context(unsigned long start, unsigned long sp)
+{
+ struct thread_info *ti = current_thread_info();
+ struct mm_struct *mm = current->mm;
+ unsigned long heap = mm->start_brk;
+
+ WARN_ON(irqs_disabled());
+
+ /* see above */
+ if (ti->slb_preload_nr + 3 > SLB_PRELOAD_NR)
+ return;
+
+ hard_irq_disable();
+
+ /* Userspace entry address. */
+ if (!is_kernel_addr(start)) {
+ if (preload_add(ti, start))
+ slb_allocate_user(mm, start);
+ }
+
+ /* Top of stack, grows down. */
+ if (!is_kernel_addr(sp)) {
+ if (preload_add(ti, sp))
+ slb_allocate_user(mm, sp);
+ }
+
+ /* Bottom of heap, grows up. */
+ if (heap && !is_kernel_addr(heap)) {
+ if (preload_add(ti, heap))
+ slb_allocate_user(mm, heap);
+ }
+
+ /* see switch_slb */
+ asm volatile("isync" : : : "memory");
+
+ local_irq_enable();
+}
+
+
/* Flush all user entries from the segment table of the current processor. */
void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
{
- unsigned long offset;
- unsigned long slbie_data = 0;
- unsigned long pc = KSTK_EIP(tsk);
- unsigned long stack = KSTK_ESP(tsk);
- unsigned long exec_base;
+ struct thread_info *ti = task_thread_info(tsk);
+ unsigned char i;
/*
* We need interrupts hard-disabled here, not just soft-disabled,
* which would update the slb_cache/slb_cache_ptr fields in the PACA.
*/
hard_irq_disable();
- offset = get_paca()->slb_cache_ptr;
- if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
- offset <= SLB_CACHE_ENTRIES) {
- int i;
- asm volatile("isync" : : : "memory");
- for (i = 0; i < offset; i++) {
- slbie_data = (unsigned long)get_paca()->slb_cache[i]
- << SID_SHIFT; /* EA */
- slbie_data |= user_segment_size(slbie_data)
- << SLBIE_SSIZE_SHIFT;
- slbie_data |= SLBIE_C; /* C set for user addresses */
- asm volatile("slbie %0" : : "r" (slbie_data));
- }
- asm volatile("isync" : : : "memory");
+ asm volatile("isync" : : : "memory");
+ if (cpu_has_feature(CPU_FTR_ARCH_300)) {
+ /*
+ * SLBIA IH=3 invalidates all Class=1 SLBEs and their
+ * associated lookaside structures, which matches what
+ * switch_slb wants. So ARCH_300 does not use the slb
+ * cache.
+ */
+ asm volatile(PPC_SLBIA(3));
} else {
- __slb_flush_and_rebolt();
- }
+ unsigned long offset = get_paca()->slb_cache_ptr;
+
+ if (!mmu_has_feature(MMU_FTR_NO_SLBIE_B) &&
+ offset <= SLB_CACHE_ENTRIES) {
+ unsigned long slbie_data = 0;
+
+ for (i = 0; i < offset; i++) {
+ unsigned long ea;
+
+ ea = (unsigned long)
+ get_paca()->slb_cache[i] << SID_SHIFT;
+ /*
+ * Could assert_slb_exists here, but hypervisor
+ * or machine check could have come in and
+ * removed the entry at this point.
+ */
+
+ slbie_data = ea;
+ slbie_data |= user_segment_size(slbie_data)
+ << SLBIE_SSIZE_SHIFT;
+ slbie_data |= SLBIE_C; /* user slbs have C=1 */
+ asm volatile("slbie %0" : : "r" (slbie_data));
+ }
+
+ /* Workaround POWER5 < DD2.1 issue */
+ if (!cpu_has_feature(CPU_FTR_ARCH_207S) && offset == 1)
+ asm volatile("slbie %0" : : "r" (slbie_data));
+
+ } else {
+ struct slb_shadow *p = get_slb_shadow();
+ unsigned long ksp_esid_data =
+ be64_to_cpu(p->save_area[KSTACK_INDEX].esid);
+ unsigned long ksp_vsid_data =
+ be64_to_cpu(p->save_area[KSTACK_INDEX].vsid);
+
+ asm volatile(PPC_SLBIA(1) "\n"
+ "slbmte %0,%1\n"
+ "isync"
+ :: "r"(ksp_vsid_data),
+ "r"(ksp_esid_data));
+
+ get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
+ }
- /* Workaround POWER5 < DD2.1 issue */
- if (offset == 1 || offset > SLB_CACHE_ENTRIES)
- asm volatile("slbie %0" : : "r" (slbie_data));
+ get_paca()->slb_cache_ptr = 0;
+ }
+ get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
- get_paca()->slb_cache_ptr = 0;
copy_mm_to_paca(mm);
/*
- * preload some userspace segments into the SLB.
- * Almost all 32 and 64bit PowerPC executables are linked at
- * 0x10000000 so it makes sense to preload this segment.
+ * We gradually age out SLBs after a number of context switches to
+ * reduce reload overhead of unused entries (like we do with FP/VEC
+ * reload). Each time we wrap 256 switches, take an entry out of the
+ * SLB preload cache.
*/
- exec_base = 0x10000000;
-
- if (is_kernel_addr(pc) || is_kernel_addr(stack) ||
- is_kernel_addr(exec_base))
- return;
+ tsk->thread.load_slb++;
+ if (!tsk->thread.load_slb) {
+ unsigned long pc = KSTK_EIP(tsk);
- slb_allocate(pc);
+ preload_age(ti);
+ preload_add(ti, pc);
+ }
- if (!esids_match(pc, stack))
- slb_allocate(stack);
+ for (i = 0; i < ti->slb_preload_nr; i++) {
+ unsigned char idx;
+ unsigned long ea;
- if (!esids_match(pc, exec_base) &&
- !esids_match(stack, exec_base))
- slb_allocate(exec_base);
-}
+ idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
+ ea = (unsigned long)ti->slb_preload_esid[idx] << SID_SHIFT;
-static inline void patch_slb_encoding(unsigned int *insn_addr,
- unsigned int immed)
-{
+ slb_allocate_user(mm, ea);
+ }
/*
- * This function patches either an li or a cmpldi instruction with
- * a new immediate value. This relies on the fact that both li
- * (which is actually addi) and cmpldi both take a 16-bit immediate
- * value, and it is situated in the same location in the instruction,
- * ie. bits 16-31 (Big endian bit order) or the lower 16 bits.
- * The signedness of the immediate operand differs between the two
- * instructions however this code is only ever patching a small value,
- * much less than 1 << 15, so we can get away with it.
- * To patch the value we read the existing instruction, clear the
- * immediate value, and or in our new value, then write the instruction
- * back.
+ * Synchronize slbmte preloads with possible subsequent user memory
+ * address accesses by the kernel (user mode won't happen until
+ * rfid, which is safe).
*/
- unsigned int insn = (*insn_addr & 0xffff0000) | immed;
- patch_instruction(insn_addr, insn);
+ asm volatile("isync" : : : "memory");
}
-extern u32 slb_miss_kernel_load_linear[];
-extern u32 slb_miss_kernel_load_io[];
-extern u32 slb_compare_rr_to_size[];
-extern u32 slb_miss_kernel_load_vmemmap[];
-
void slb_set_size(u16 size)
{
- if (mmu_slb_size == size)
- return;
-
mmu_slb_size = size;
- patch_slb_encoding(slb_compare_rr_to_size, mmu_slb_size);
}
void slb_initialize(void)
{
unsigned long linear_llp, vmalloc_llp, io_llp;
- unsigned long lflags, vflags;
+ unsigned long lflags;
static int slb_encoding_inited;
#ifdef CONFIG_SPARSEMEM_VMEMMAP
unsigned long vmemmap_llp;
#endif
if (!slb_encoding_inited) {
slb_encoding_inited = 1;
- patch_slb_encoding(slb_miss_kernel_load_linear,
- SLB_VSID_KERNEL | linear_llp);
- patch_slb_encoding(slb_miss_kernel_load_io,
- SLB_VSID_KERNEL | io_llp);
- patch_slb_encoding(slb_compare_rr_to_size,
- mmu_slb_size);
-
pr_devel("SLB: linear LLP = %04lx\n", linear_llp);
pr_devel("SLB: io LLP = %04lx\n", io_llp);
-
#ifdef CONFIG_SPARSEMEM_VMEMMAP
- patch_slb_encoding(slb_miss_kernel_load_vmemmap,
- SLB_VSID_KERNEL | vmemmap_llp);
pr_devel("SLB: vmemmap LLP = %04lx\n", vmemmap_llp);
#endif
}
- get_paca()->stab_rr = SLB_NUM_BOLTED;
+ get_paca()->stab_rr = SLB_NUM_BOLTED - 1;
+ get_paca()->slb_kern_bitmap = (1U << SLB_NUM_BOLTED) - 1;
+ get_paca()->slb_used_bitmap = get_paca()->slb_kern_bitmap;
lflags = SLB_VSID_KERNEL | linear_llp;
- vflags = SLB_VSID_KERNEL | vmalloc_llp;
/* Invalidate the entire SLB (even entry 0) & all the ERATS */
asm volatile("isync":::"memory");
asm volatile("slbmte %0,%0"::"r" (0) : "memory");
asm volatile("isync; slbia; isync":::"memory");
create_shadowed_slbe(PAGE_OFFSET, mmu_kernel_ssize, lflags, LINEAR_INDEX);
- create_shadowed_slbe(VMALLOC_START, mmu_kernel_ssize, vflags, VMALLOC_INDEX);
/* For the boot cpu, we're running on the stack in init_thread_union,
* which is in the first segment of the linear mapping, and also
asm volatile("isync":::"memory");
}
-static void insert_slb_entry(unsigned long vsid, unsigned long ea,
- int bpsize, int ssize)
+static void slb_cache_update(unsigned long esid_data)
{
- unsigned long flags, vsid_data, esid_data;
- enum slb_index index;
int slb_cache_index;
- /*
- * We are irq disabled, hence should be safe to access PACA.
- */
- VM_WARN_ON(!irqs_disabled());
-
- /*
- * We can't take a PMU exception in the following code, so hard
- * disable interrupts.
- */
- hard_irq_disable();
-
- index = get_paca()->stab_rr;
-
- /*
- * simple round-robin replacement of slb starting at SLB_NUM_BOLTED.
- */
- if (index < (mmu_slb_size - 1))
- index++;
- else
- index = SLB_NUM_BOLTED;
-
- get_paca()->stab_rr = index;
-
- flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;
- vsid_data = (vsid << slb_vsid_shift(ssize)) | flags |
- ((unsigned long) ssize << SLB_VSID_SSIZE_SHIFT);
- esid_data = mk_esid_data(ea, ssize, index);
-
- /*
- * No need for an isync before or after this slbmte. The exception
- * we enter with and the rfid we exit with are context synchronizing.
- * Also we only handle user segments here.
- */
- asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data)
- : "memory");
+ if (cpu_has_feature(CPU_FTR_ARCH_300))
+ return; /* ISAv3.0B and later does not use slb_cache */
/*
* Now update slb cache entries
*/
- slb_cache_index = get_paca()->slb_cache_ptr;
+ slb_cache_index = local_paca->slb_cache_ptr;
if (slb_cache_index < SLB_CACHE_ENTRIES) {
/*
* We have space in slb cache for optimized switch_slb().
* Top 36 bits from esid_data as per ISA
*/
- get_paca()->slb_cache[slb_cache_index++] = esid_data >> 28;
- get_paca()->slb_cache_ptr++;
+ local_paca->slb_cache[slb_cache_index++] = esid_data >> 28;
+ local_paca->slb_cache_ptr++;
} else {
/*
* Our cache is full and the current cache content strictly
* doesn't indicate the active SLB conents. Bump the ptr
* so that switch_slb() will ignore the cache.
*/
- get_paca()->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
+ local_paca->slb_cache_ptr = SLB_CACHE_ENTRIES + 1;
}
}
-static void handle_multi_context_slb_miss(int context_id, unsigned long ea)
+static enum slb_index alloc_slb_index(bool kernel)
{
- struct mm_struct *mm = current->mm;
- unsigned long vsid;
- int bpsize;
+ enum slb_index index;
/*
- * We are always above 1TB, hence use high user segment size.
+ * The allocation bitmaps can become out of synch with the SLB
+ * when the _switch code does slbie when bolting a new stack
+ * segment and it must not be anywhere else in the SLB. This leaves
+ * a kernel allocated entry that is unused in the SLB. With very
+ * large systems or small segment sizes, the bitmaps could slowly
+ * fill with these entries. They will eventually be cleared out
+ * by the round robin allocator in that case, so it's probably not
+ * worth accounting for.
*/
- vsid = get_vsid(context_id, ea, mmu_highuser_ssize);
- bpsize = get_slice_psize(mm, ea);
- insert_slb_entry(vsid, ea, bpsize, mmu_highuser_ssize);
+
+ /*
+ * SLBs beyond 32 entries are allocated with stab_rr only
+ * POWER7/8/9 have 32 SLB entries, this could be expanded if a
+ * future CPU has more.
+ */
+ if (local_paca->slb_used_bitmap != U32_MAX) {
+ index = ffz(local_paca->slb_used_bitmap);
+ local_paca->slb_used_bitmap |= 1U << index;
+ if (kernel)
+ local_paca->slb_kern_bitmap |= 1U << index;
+ } else {
+ /* round-robin replacement of slb starting at SLB_NUM_BOLTED. */
+ index = local_paca->stab_rr;
+ if (index < (mmu_slb_size - 1))
+ index++;
+ else
+ index = SLB_NUM_BOLTED;
+ local_paca->stab_rr = index;
+ if (index < 32) {
+ if (kernel)
+ local_paca->slb_kern_bitmap |= 1U << index;
+ else
+ local_paca->slb_kern_bitmap &= ~(1U << index);
+ }
+ }
+ BUG_ON(index < SLB_NUM_BOLTED);
+
+ return index;
}
-void slb_miss_large_addr(struct pt_regs *regs)
+static long slb_insert_entry(unsigned long ea, unsigned long context,
+ unsigned long flags, int ssize, bool kernel)
{
- enum ctx_state prev_state = exception_enter();
- unsigned long ea = regs->dar;
- int context;
+ unsigned long vsid;
+ unsigned long vsid_data, esid_data;
+ enum slb_index index;
- if (REGION_ID(ea) != USER_REGION_ID)
- goto slb_bad_addr;
+ vsid = get_vsid(context, ea, ssize);
+ if (!vsid)
+ return -EFAULT;
/*
- * Are we beyound what the page table layout supports ?
+ * There must not be a kernel SLB fault in alloc_slb_index or before
+ * slbmte here or the allocation bitmaps could get out of whack with
+ * the SLB.
+ *
+ * User SLB faults or preloads take this path which might get inlined
+ * into the caller, so add compiler barriers here to ensure unsafe
+ * memory accesses do not come between.
*/
- if ((ea & ~REGION_MASK) >= H_PGTABLE_RANGE)
- goto slb_bad_addr;
+ barrier();
- /* Lower address should have been handled by asm code */
- if (ea < (1UL << MAX_EA_BITS_PER_CONTEXT))
- goto slb_bad_addr;
+ index = alloc_slb_index(kernel);
+
+ vsid_data = __mk_vsid_data(vsid, ssize, flags);
+ esid_data = mk_esid_data(ea, ssize, index);
+
+ /*
+ * No need for an isync before or after this slbmte. The exception
+ * we enter with and the rfid we exit with are context synchronizing.
+ * User preloads should add isync afterwards in case the kernel
+ * accesses user memory before it returns to userspace with rfid.
+ */
+ assert_slb_notexists(ea);
+ asm volatile("slbmte %0, %1" : : "r" (vsid_data), "r" (esid_data));
+
+ barrier();
+
+ if (!kernel)
+ slb_cache_update(esid_data);
+
+ return 0;
+}
+
+static long slb_allocate_kernel(unsigned long ea, unsigned long id)
+{
+ unsigned long context;
+ unsigned long flags;
+ int ssize;
+
+ if (id == KERNEL_REGION_ID) {
+
+ /* We only support upto MAX_PHYSMEM_BITS */
+ if ((ea & ~REGION_MASK) > (1UL << MAX_PHYSMEM_BITS))
+ return -EFAULT;
+
+ flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_linear_psize].sllp;
+
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ } else if (id == VMEMMAP_REGION_ID) {
+
+ if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT))
+ return -EFAULT;
+
+ flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_vmemmap_psize].sllp;
+#endif
+ } else if (id == VMALLOC_REGION_ID) {
+
+ if ((ea & ~REGION_MASK) >= (1ULL << MAX_EA_BITS_PER_CONTEXT))
+ return -EFAULT;
+
+ if (ea < H_VMALLOC_END)
+ flags = get_paca()->vmalloc_sllp;
+ else
+ flags = SLB_VSID_KERNEL | mmu_psize_defs[mmu_io_psize].sllp;
+ } else {
+ return -EFAULT;
+ }
+
+ ssize = MMU_SEGSIZE_1T;
+ if (!mmu_has_feature(MMU_FTR_1T_SEGMENT))
+ ssize = MMU_SEGSIZE_256M;
+
+ context = get_kernel_context(ea);
+ return slb_insert_entry(ea, context, flags, ssize, true);
+}
+
+static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
+{
+ unsigned long context;
+ unsigned long flags;
+ int bpsize;
+ int ssize;
/*
* consider this as bad access if we take a SLB miss
* on an address above addr limit.
*/
- if (ea >= current->mm->context.slb_addr_limit)
- goto slb_bad_addr;
+ if (ea >= mm->context.slb_addr_limit)
+ return -EFAULT;
- context = get_ea_context(¤t->mm->context, ea);
+ context = get_user_context(&mm->context, ea);
if (!context)
- goto slb_bad_addr;
+ return -EFAULT;
+
+ if (unlikely(ea >= H_PGTABLE_RANGE)) {
+ WARN_ON(1);
+ return -EFAULT;
+ }
- handle_multi_context_slb_miss(context, ea);
- exception_exit(prev_state);
- return;
+ ssize = user_segment_size(ea);
-slb_bad_addr:
- if (user_mode(regs))
- _exception(SIGSEGV, regs, SEGV_BNDERR, ea);
- else
- bad_page_fault(regs, ea, SIGSEGV);
- exception_exit(prev_state);
+ bpsize = get_slice_psize(mm, ea);
+ flags = SLB_VSID_USER | mmu_psize_defs[bpsize].sllp;
+
+ return slb_insert_entry(ea, context, flags, ssize, false);
+}
+
+long do_slb_fault(struct pt_regs *regs, unsigned long ea)
+{
+ unsigned long id = REGION_ID(ea);
+
+ /* IRQs are not reconciled here, so can't check irqs_disabled */
+ VM_WARN_ON(mfmsr() & MSR_EE);
+
+ if (unlikely(!(regs->msr & MSR_RI)))
+ return -EINVAL;
+
+ /*
+ * SLB kernel faults must be very careful not to touch anything
+ * that is not bolted. E.g., PACA and global variables are okay,
+ * mm->context stuff is not.
+ *
+ * SLB user faults can access all of kernel memory, but must be
+ * careful not to touch things like IRQ state because it is not
+ * "reconciled" here. The difficulty is that we must use
+ * fast_exception_return to return from kernel SLB faults without
+ * looking at possible non-bolted memory. We could test user vs
+ * kernel faults in the interrupt handler asm and do a full fault,
+ * reconcile, ret_from_except for user faults which would make them
+ * first class kernel code. But for performance it's probably nicer
+ * if they go via fast_exception_return too.
+ */
+ if (id >= KERNEL_REGION_ID) {
+ long err;
+#ifdef CONFIG_DEBUG_VM
+ /* Catch recursive kernel SLB faults. */
+ BUG_ON(local_paca->in_kernel_slb_handler);
+ local_paca->in_kernel_slb_handler = 1;
+#endif
+ err = slb_allocate_kernel(ea, id);
+#ifdef CONFIG_DEBUG_VM
+ local_paca->in_kernel_slb_handler = 0;
+#endif
+ return err;
+ } else {
+ struct mm_struct *mm = current->mm;
+ long err;
+
+ if (unlikely(!mm))
+ return -EFAULT;
+
+ err = slb_allocate_user(mm, ea);
+ if (!err)
+ preload_add(current_thread_info(), ea);
+
+ return err;
+ }
+}
+
+void do_bad_slb_fault(struct pt_regs *regs, unsigned long ea, long err)
+{
+ if (err == -EFAULT) {
+ if (user_mode(regs))
+ _exception(SIGSEGV, regs, SEGV_BNDERR, ea);
+ else
+ bad_page_fault(regs, ea, SIGSEGV);
+ } else if (err == -EINVAL) {
+ unrecoverable_exception(regs);
+ } else {
+ BUG();
+ }
}
+++ /dev/null
-/*
- * Low-level SLB routines
- *
- * Copyright (C) 2004 David Gibson <dwg@au.ibm.com>, IBM
- *
- * Based on earlier C version:
- * Dave Engebretsen and Mike Corrigan {engebret|mikejc}@us.ibm.com
- * Copyright (c) 2001 Dave Engebretsen
- * Copyright (C) 2002 Anton Blanchard <anton@au.ibm.com>, IBM
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <asm/processor.h>
-#include <asm/ppc_asm.h>
-#include <asm/asm-offsets.h>
-#include <asm/cputable.h>
-#include <asm/page.h>
-#include <asm/mmu.h>
-#include <asm/pgtable.h>
-#include <asm/firmware.h>
-#include <asm/feature-fixups.h>
-
-/*
- * This macro generates asm code to compute the VSID scramble
- * function. Used in slb_allocate() and do_stab_bolted. The function
- * computed is: (protovsid*VSID_MULTIPLIER) % VSID_MODULUS
- *
- * rt = register containing the proto-VSID and into which the
- * VSID will be stored
- * rx = scratch register (clobbered)
- * rf = flags
- *
- * - rt and rx must be different registers
- * - The answer will end up in the low VSID_BITS bits of rt. The higher
- * bits may contain other garbage, so you may need to mask the
- * result.
- */
-#define ASM_VSID_SCRAMBLE(rt, rx, rf, size) \
- lis rx,VSID_MULTIPLIER_##size@h; \
- ori rx,rx,VSID_MULTIPLIER_##size@l; \
- mulld rt,rt,rx; /* rt = rt * MULTIPLIER */ \
-/* \
- * powermac get slb fault before feature fixup, so make 65 bit part \
- * the default part of feature fixup \
- */ \
-BEGIN_MMU_FTR_SECTION \
- srdi rx,rt,VSID_BITS_65_##size; \
- clrldi rt,rt,(64-VSID_BITS_65_##size); \
- add rt,rt,rx; \
- addi rx,rt,1; \
- srdi rx,rx,VSID_BITS_65_##size; \
- add rt,rt,rx; \
- rldimi rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_65_##size)); \
-MMU_FTR_SECTION_ELSE \
- srdi rx,rt,VSID_BITS_##size; \
- clrldi rt,rt,(64-VSID_BITS_##size); \
- add rt,rt,rx; /* add high and low bits */ \
- addi rx,rt,1; \
- srdi rx,rx,VSID_BITS_##size; /* extract 2^VSID_BITS bit */ \
- add rt,rt,rx; \
- rldimi rf,rt,SLB_VSID_SHIFT_##size,(64 - (SLB_VSID_SHIFT_##size + VSID_BITS_##size)); \
-ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_68_BIT_VA)
-
-
-/* void slb_allocate(unsigned long ea);
- *
- * Create an SLB entry for the given EA (user or kernel).
- * r3 = faulting address, r13 = PACA
- * r9, r10, r11 are clobbered by this function
- * r3 is preserved.
- * No other registers are examined or changed.
- */
-_GLOBAL(slb_allocate)
- /*
- * Check if the address falls within the range of the first context, or
- * if we may need to handle multi context. For the first context we
- * allocate the slb entry via the fast path below. For large address we
- * branch out to C-code and see if additional contexts have been
- * allocated.
- * The test here is:
- * (ea & ~REGION_MASK) >= (1ull << MAX_EA_BITS_PER_CONTEXT)
- */
- rldicr. r9,r3,4,(63 - MAX_EA_BITS_PER_CONTEXT - 4)
- bne- 8f
-
- srdi r9,r3,60 /* get region */
- srdi r10,r3,SID_SHIFT /* get esid */
- cmpldi cr7,r9,0xc /* cmp PAGE_OFFSET for later use */
-
- /* r3 = address, r10 = esid, cr7 = <> PAGE_OFFSET */
- blt cr7,0f /* user or kernel? */
-
- /* Check if hitting the linear mapping or some other kernel space
- */
- bne cr7,1f
-
- /* Linear mapping encoding bits, the "li" instruction below will
- * be patched by the kernel at boot
- */
-.globl slb_miss_kernel_load_linear
-slb_miss_kernel_load_linear:
- li r11,0
- /*
- * context = (ea >> 60) - (0xc - 1)
- * r9 = region id.
- */
- subi r9,r9,KERNEL_REGION_CONTEXT_OFFSET
-
-BEGIN_FTR_SECTION
- b .Lslb_finish_load
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
- b .Lslb_finish_load_1T
-
-1:
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
- cmpldi cr0,r9,0xf
- bne 1f
-/* Check virtual memmap region. To be patched at kernel boot */
-.globl slb_miss_kernel_load_vmemmap
-slb_miss_kernel_load_vmemmap:
- li r11,0
- b 6f
-1:
-#endif /* CONFIG_SPARSEMEM_VMEMMAP */
-
- /*
- * r10 contains the ESID, which is the original faulting EA shifted
- * right by 28 bits. We need to compare that with (H_VMALLOC_END >> 28)
- * which is 0xd00038000. That can't be used as an immediate, even if we
- * ignored the 0xd, so we have to load it into a register, and we only
- * have one register free. So we must load all of (H_VMALLOC_END >> 28)
- * into a register and compare ESID against that.
- */
- lis r11,(H_VMALLOC_END >> 32)@h // r11 = 0xffffffffd0000000
- ori r11,r11,(H_VMALLOC_END >> 32)@l // r11 = 0xffffffffd0003800
- // Rotate left 4, then mask with 0xffffffff0
- rldic r11,r11,4,28 // r11 = 0xd00038000
- cmpld r10,r11 // if r10 >= r11
- bge 5f // goto io_mapping
-
- /*
- * vmalloc mapping gets the encoding from the PACA as the mapping
- * can be demoted from 64K -> 4K dynamically on some machines.
- */
- lhz r11,PACAVMALLOCSLLP(r13)
- b 6f
-5:
- /* IO mapping */
-.globl slb_miss_kernel_load_io
-slb_miss_kernel_load_io:
- li r11,0
-6:
- /*
- * context = (ea >> 60) - (0xc - 1)
- * r9 = region id.
- */
- subi r9,r9,KERNEL_REGION_CONTEXT_OFFSET
-
-BEGIN_FTR_SECTION
- b .Lslb_finish_load
-END_MMU_FTR_SECTION_IFCLR(MMU_FTR_1T_SEGMENT)
- b .Lslb_finish_load_1T
-
-0: /*
- * For userspace addresses, make sure this is region 0.
- */
- cmpdi r9, 0
- bne- 8f
- /*
- * user space make sure we are within the allowed limit
- */
- ld r11,PACA_SLB_ADDR_LIMIT(r13)
- cmpld r3,r11
- bge- 8f
-
- /* when using slices, we extract the psize off the slice bitmaps
- * and then we need to get the sllp encoding off the mmu_psize_defs
- * array.
- *
- * XXX This is a bit inefficient especially for the normal case,
- * so we should try to implement a fast path for the standard page
- * size using the old sllp value so we avoid the array. We cannot
- * really do dynamic patching unfortunately as processes might flip
- * between 4k and 64k standard page size
- */
-#ifdef CONFIG_PPC_MM_SLICES
- /* r10 have esid */
- cmpldi r10,16
- /* below SLICE_LOW_TOP */
- blt 5f
- /*
- * Handle hpsizes,
- * r9 is get_paca()->context.high_slices_psize[index], r11 is mask_index
- */
- srdi r11,r10,(SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT + 1) /* index */
- addi r9,r11,PACAHIGHSLICEPSIZE
- lbzx r9,r13,r9 /* r9 is hpsizes[r11] */
- /* r11 = (r10 >> (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)) & 0x1 */
- rldicl r11,r10,(64 - (SLICE_HIGH_SHIFT - SLICE_LOW_SHIFT)),63
- b 6f
-
-5:
- /*
- * Handle lpsizes
- * r9 is get_paca()->context.low_slices_psize[index], r11 is mask_index
- */
- srdi r11,r10,1 /* index */
- addi r9,r11,PACALOWSLICESPSIZE
- lbzx r9,r13,r9 /* r9 is lpsizes[r11] */
- rldicl r11,r10,0,63 /* r11 = r10 & 0x1 */
-6:
- sldi r11,r11,2 /* index * 4 */
- /* Extract the psize and multiply to get an array offset */
- srd r9,r9,r11
- andi. r9,r9,0xf
- mulli r9,r9,MMUPSIZEDEFSIZE
-
- /* Now get to the array and obtain the sllp
- */
- ld r11,PACATOC(r13)
- ld r11,mmu_psize_defs@got(r11)
- add r11,r11,r9
- ld r11,MMUPSIZESLLP(r11)
- ori r11,r11,SLB_VSID_USER
-#else
- /* paca context sllp already contains the SLB_VSID_USER bits */
- lhz r11,PACACONTEXTSLLP(r13)
-#endif /* CONFIG_PPC_MM_SLICES */
-
- ld r9,PACACONTEXTID(r13)
-BEGIN_FTR_SECTION
- cmpldi r10,0x1000
- bge .Lslb_finish_load_1T
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_1T_SEGMENT)
- b .Lslb_finish_load
-
-8: /* invalid EA - return an error indication */
- crset 4*cr0+eq /* indicate failure */
- blr
-
-/*
- * Finish loading of an SLB entry and return
- *
- * r3 = EA, r9 = context, r10 = ESID, r11 = flags, clobbers r9, cr7 = <> PAGE_OFFSET
- */
-.Lslb_finish_load:
- rldimi r10,r9,ESID_BITS,0
- ASM_VSID_SCRAMBLE(r10,r9,r11,256M)
- /* r3 = EA, r11 = VSID data */
- /*
- * Find a slot, round robin. Previously we tried to find a
- * free slot first but that took too long. Unfortunately we
- * dont have any LRU information to help us choose a slot.
- */
-
- mr r9,r3
-
- /* slb_finish_load_1T continues here. r9=EA with non-ESID bits clear */
-7: ld r10,PACASTABRR(r13)
- addi r10,r10,1
- /* This gets soft patched on boot. */
-.globl slb_compare_rr_to_size
-slb_compare_rr_to_size:
- cmpldi r10,0
-
- blt+ 4f
- li r10,SLB_NUM_BOLTED
-
-4:
- std r10,PACASTABRR(r13)
-
-3:
- rldimi r9,r10,0,36 /* r9 = EA[0:35] | entry */
- oris r10,r9,SLB_ESID_V@h /* r10 = r9 | SLB_ESID_V */
-
- /* r9 = ESID data, r11 = VSID data */
-
- /*
- * No need for an isync before or after this slbmte. The exception
- * we enter with and the rfid we exit with are context synchronizing.
- */
- slbmte r11,r10
-
- /* we're done for kernel addresses */
- crclr 4*cr0+eq /* set result to "success" */
- bgelr cr7
-
- /* Update the slb cache */
- lhz r9,PACASLBCACHEPTR(r13) /* offset = paca->slb_cache_ptr */
- cmpldi r9,SLB_CACHE_ENTRIES
- bge 1f
-
- /* still room in the slb cache */
- sldi r11,r9,2 /* r11 = offset * sizeof(u32) */
- srdi r10,r10,28 /* get the 36 bits of the ESID */
- add r11,r11,r13 /* r11 = (u32 *)paca + offset */
- stw r10,PACASLBCACHE(r11) /* paca->slb_cache[offset] = esid */
- addi r9,r9,1 /* offset++ */
- b 2f
-1: /* offset >= SLB_CACHE_ENTRIES */
- li r9,SLB_CACHE_ENTRIES+1
-2:
- sth r9,PACASLBCACHEPTR(r13) /* paca->slb_cache_ptr = offset */
- crclr 4*cr0+eq /* set result to "success" */
- blr
-
-/*
- * Finish loading of a 1T SLB entry (for the kernel linear mapping) and return.
- *
- * r3 = EA, r9 = context, r10 = ESID(256MB), r11 = flags, clobbers r9
- */
-.Lslb_finish_load_1T:
- srdi r10,r10,(SID_SHIFT_1T - SID_SHIFT) /* get 1T ESID */
- rldimi r10,r9,ESID_BITS_1T,0
- ASM_VSID_SCRAMBLE(r10,r9,r11,1T)
-
- li r10,MMU_SEGSIZE_1T
- rldimi r11,r10,SLB_VSID_SSIZE_SHIFT,0 /* insert segment size */
-
- /* r3 = EA, r11 = VSID data */
- clrrdi r9,r3,SID_SHIFT_1T /* clear out non-ESID bits */
- b 7b
-
-
-_ASM_NOKPROBE_SYMBOL(slb_allocate)
-_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_linear)
-_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_io)
-_ASM_NOKPROBE_SYMBOL(slb_compare_rr_to_size)
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
-_ASM_NOKPROBE_SYMBOL(slb_miss_kernel_load_vmemmap)
-#endif
#include <linux/spinlock.h>
#include <linux/export.h>
#include <linux/hugetlb.h>
+#include <linux/sched/mm.h>
#include <asm/mman.h>
#include <asm/mmu.h>
#include <asm/copro.h>
#endif
+static inline bool slice_addr_is_low(unsigned long addr)
+{
+ u64 tmp = (u64)addr;
+
+ return tmp < SLICE_LOW_TOP;
+}
+
static void slice_range_to_mask(unsigned long start, unsigned long len,
struct slice_mask *ret)
{
if (SLICE_NUM_HIGH)
bitmap_zero(ret->high_slices, SLICE_NUM_HIGH);
- if (start < SLICE_LOW_TOP) {
+ if (slice_addr_is_low(start)) {
unsigned long mend = min(end,
(unsigned long)(SLICE_LOW_TOP - 1));
- (1u << GET_LOW_SLICE_INDEX(start));
}
- if ((start + len) > SLICE_LOW_TOP) {
+ if (SLICE_NUM_HIGH && !slice_addr_is_low(end)) {
unsigned long start_index = GET_HIGH_SLICE_INDEX(start);
unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT));
unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index;
if (!slice_low_has_vma(mm, i))
ret->low_slices |= 1u << i;
- if (high_limit <= SLICE_LOW_TOP)
+ if (slice_addr_is_low(high_limit - 1))
return;
for (i = 0; i < GET_HIGH_SLICE_INDEX(high_limit); i++)
unsigned long end = start + len - 1;
u64 low_slices = 0;
- if (start < SLICE_LOW_TOP) {
+ if (slice_addr_is_low(start)) {
unsigned long mend = min(end,
(unsigned long)(SLICE_LOW_TOP - 1));
if ((low_slices & available->low_slices) != low_slices)
return false;
- if (SLICE_NUM_HIGH && ((start + len) > SLICE_LOW_TOP)) {
+ if (SLICE_NUM_HIGH && !slice_addr_is_low(end)) {
unsigned long start_index = GET_HIGH_SLICE_INDEX(start);
unsigned long align_end = ALIGN(end, (1UL << SLICE_HIGH_SHIFT));
unsigned long count = GET_HIGH_SLICE_INDEX(align_end) - start_index;
copy_mm_to_paca(current->active_mm);
local_irq_save(flags);
- slb_flush_and_rebolt();
+ slb_flush_and_restore_bolted();
local_irq_restore(flags);
#endif
}
int end, unsigned long *boundary_addr)
{
unsigned long slice;
- if (addr < SLICE_LOW_TOP) {
+ if (slice_addr_is_low(addr)) {
slice = GET_LOW_SLICE_INDEX(addr);
*boundary_addr = (slice + end) << SLICE_LOW_SHIFT;
return !!(available->low_slices & (1u << slice));
VM_BUG_ON(radix_enabled());
- if (addr < SLICE_LOW_TOP) {
+ if (slice_addr_is_low(addr)) {
psizes = mm->context.low_slices_psize;
index = GET_LOW_SLICE_INDEX(addr);
} else {
bitmap_fill(mask->high_slices, SLICE_NUM_HIGH);
}
+#ifdef CONFIG_PPC_BOOK3S_64
+void slice_setup_new_exec(void)
+{
+ struct mm_struct *mm = current->mm;
+
+ slice_dbg("slice_setup_new_exec(mm=%p)\n", mm);
+
+ if (!is_32bit_task())
+ return;
+
+ mm->context.slb_addr_limit = DEFAULT_MAP_WINDOW;
+}
+#endif
+
void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
unsigned long len, unsigned int psize)
{
__tlbiel_lpid_guest(lpid, set, RIC_FLUSH_TLB);
asm volatile("ptesync": : :"memory");
+ asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
}
goto local;
}
_tlbie_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
- goto local;
} else {
local:
_tlbiel_va_range(addr, end, pid, PAGE_SIZE, mmu_virtual_psize, true);
for (psize = 0; psize < MMU_PAGE_COUNT; ++psize) {
struct mmu_psize_def *def = &mmu_psize_defs[psize];
+ if (!def->shift)
+ continue;
+
if (tlb1ps & (1U << (def->shift - 10))) {
def->flags |= MMU_PAGE_SIZE_DIRECT;
# SPDX-License-Identifier: GPL-2.0
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
# SPDX-License-Identifier: GPL-2.0
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
obj-$(CONFIG_PERF_EVENTS) += callchain.o perf_regs.o
if (ret)
goto err_free_cpuhp_mem;
- pr_info("%s performance monitor hardware support registered\n",
+ pr_debug("%s performance monitor hardware support registered\n",
pmu_ptr->pmu.name);
return 0;
case 6:
if (psel == 0x64)
return pmc >= 3;
+ break;
case 8:
return unit == 0xd;
}
config ACADIA
bool "Acadia"
depends on 40x
- default n
select PPC40x_SIMPLE
select 405EZ
help
config EP405
bool "EP405/EP405PC"
depends on 40x
- default n
select 405GP
select PCI
help
config HOTFOOT
bool "Hotfoot"
depends on 40x
- default n
select PPC40x_SIMPLE
select PCI
help
config KILAUEA
bool "Kilauea"
depends on 40x
- default n
select 405EX
select PPC40x_SIMPLE
select PPC4xx_PCI_EXPRESS
config MAKALU
bool "Makalu"
depends on 40x
- default n
select 405EX
select PCI
select PPC4xx_PCI_EXPRESS
config XILINX_VIRTEX_GENERIC_BOARD
bool "Generic Xilinx Virtex board"
depends on 40x
- default n
select XILINX_VIRTEX_II_PRO
select XILINX_VIRTEX_4_FX
select XILINX_INTC
config OBS600
bool "OpenBlockS 600"
depends on 40x
- default n
select 405EX
select PPC40x_SIMPLE
help
config PPC40x_SIMPLE
bool "Simple PowerPC 40x board support"
depends on 40x
- default n
help
This option enables the simple PowerPC 40x platform support.
config APM8018X
bool "APM8018X"
depends on 40x
- default n
select PPC40x_SIMPLE
help
This option enables support for the AppliedMicro APM8018X evaluation
config PPC_47x
bool "Support for 47x variant"
depends on 44x
- default n
select MPIC
help
This option enables support for the 47x family of processors and is
config BAMBOO
bool "Bamboo"
depends on 44x
- default n
select PPC44x_SIMPLE
select 440EP
select PCI
config BLUESTONE
bool "Bluestone"
depends on 44x
- default n
select PPC44x_SIMPLE
select APM821xx
select PCI_MSI
config SAM440EP
bool "Sam440ep"
depends on 44x
- default n
select 440EP
select PCI
help
config SEQUOIA
bool "Sequoia"
depends on 44x
- default n
select PPC44x_SIMPLE
select 440EPX
help
config TAISHAN
bool "Taishan"
depends on 44x
- default n
select PPC44x_SIMPLE
select 440GX
select PCI
config KATMAI
bool "Katmai"
depends on 44x
- default n
select PPC44x_SIMPLE
select 440SPe
select PCI
config RAINIER
bool "Rainier"
depends on 44x
- default n
select PPC44x_SIMPLE
select 440GRX
select PCI
config WARP
bool "PIKA Warp"
depends on 44x
- default n
select 440EP
help
This option enables support for the PIKA Warp(tm) Appliance. The Warp
config ARCHES
bool "Arches"
depends on 44x
- default n
select PPC44x_SIMPLE
select 460EX # Odd since it uses 460GT but the effects are the same
select PCI
config CANYONLANDS
bool "Canyonlands"
depends on 44x
- default n
select 460EX
select PCI
select PPC4xx_PCI_EXPRESS
config GLACIER
bool "Glacier"
depends on 44x
- default n
select PPC44x_SIMPLE
select 460EX # Odd since it uses 460GT but the effects are the same
select PCI
config REDWOOD
bool "Redwood"
depends on 44x
- default n
select PPC44x_SIMPLE
select 460SX
select PCI
config EIGER
bool "Eiger"
depends on 44x
- default n
select PPC44x_SIMPLE
select 460SX
select PCI
config YOSEMITE
bool "Yosemite"
depends on 44x
- default n
select PPC44x_SIMPLE
select 440EP
select PCI
config ISS4xx
bool "ISS 4xx Simulator"
depends on (44x || 40x)
- default n
select 405GP if 40x
select 440GP if 44x && !PPC_47x
select PPC_FPU
config CURRITUCK
bool "IBM Currituck (476fpe) Support"
depends on PPC_47x
- default n
select SWIOTLB
select 476FPE
select PPC4xx_PCI_EXPRESS
config FSP2
bool "IBM FSP2 (476fpe) Support"
depends on PPC_47x
- default n
select 476FPE
select IBM_EMAC_EMAC4 if IBM_EMAC
select IBM_EMAC_RGMII if IBM_EMAC
config AKEBONO
bool "IBM Akebono (476gtr) Support"
depends on PPC_47x
- default n
select SWIOTLB
select 476FPE
select PPC4xx_PCI_EXPRESS
config ICON
bool "Icon"
depends on 44x
- default n
select PPC44x_SIMPLE
select 440SPe
select PCI
config XILINX_VIRTEX440_GENERIC_BOARD
bool "Generic Xilinx Virtex 5 FXT board support"
depends on 44x
- default n
select XILINX_VIRTEX_5_FXT
select XILINX_INTC
help
config PPC44x_SIMPLE
bool "Simple PowerPC 44x board support"
depends on 44x
- default n
help
This option enables the simple PowerPC 44x platform support.
for_each_compatible_node(np, NULL, compat) {
irq = irq_of_parse_and_map(np, 0);
if (irq == NO_IRQ) {
- pr_err("device tree node %s is missing a interrupt",
- np->name);
+ pr_err("device tree node %pOFn is missing a interrupt",
+ np);
return;
}
rc = request_irq(irq, errirq_handler, 0, np->name, np);
if (rc) {
- pr_err("fsp_of_probe: request_irq failed: np=%s rc=%d",
- np->full_name, rc);
+ pr_err("fsp_of_probe: request_irq failed: np=%pOF rc=%d",
+ np, rc);
return;
}
}
int len;
struct resource rsrc;
- int ioflags;
ocm = ocm_get_node(count);
/* ioremap the non-cached region */
if (ocm->nc.memtotal) {
- ioflags = _PAGE_NO_CACHE | _PAGE_GUARDED | _PAGE_EXEC;
ocm->nc.virt = __ioremap(ocm->nc.phys, ocm->nc.memtotal,
- ioflags);
+ _PAGE_EXEC | PAGE_KERNEL_NCG);
if (!ocm->nc.virt) {
printk(KERN_ERR
/* ioremap the cached region */
if (ocm->c.memtotal) {
- ioflags = _PAGE_EXEC;
ocm->c.virt = __ioremap(ocm->c.phys, ocm->c.memtotal,
- ioflags);
+ _PAGE_EXEC | PAGE_KERNEL);
if (!ocm->c.virt) {
printk(KERN_ERR
u32 reset_type = DBCR0_RST_SYSTEM;
const u32 *prop;
- np = of_find_node_by_type(NULL, "cpu");
+ np = of_get_cpu_node(0, NULL);
if (np) {
prop = of_get_property(np, "reset-type", NULL);
config PQ2ADS
bool
- default n
config 8260
bool
/* Map the spin table */
if (ioremappable)
- spin_table = ioremap_prot(*cpu_rel_addr,
- sizeof(struct epapr_spin_table), _PAGE_COHERENT);
+ spin_table = ioremap_coherent(*cpu_rel_addr,
+ sizeof(struct epapr_spin_table));
else
spin_table = phys_to_virt(*cpu_rel_addr);
int found = 0;
/* The cpu node should have timebase and clock frequency properties */
- cpu = of_find_node_by_type(NULL, "cpu");
+ cpu = of_get_cpu_node(0, NULL);
if (cpu) {
fp = of_get_property(cpu, name, NULL);
* we have to enable the timebase). The decrementer interrupt
* is wired into the vector table, nothing to do here for that.
*/
- cpu = of_find_node_by_type(NULL, "cpu");
+ cpu = of_get_cpu_node(0, NULL);
virq= irq_of_parse_and_map(cpu, 0);
+ of_node_put(cpu);
irq = virq_to_hw(virq);
sys_tmr2 = immr_map(im_sit);
pr_err("Machine check in kernel mode.\n");
pr_err("Caused by (from SRR1=%lx): ", reason);
if (reason & 0x40000000)
- pr_err("Fetch error at address %lx\n", regs->nip);
+ pr_cont("Fetch error at address %lx\n", regs->nip);
else
- pr_err("Data access error at address %lx\n", regs->dar);
+ pr_cont("Data access error at address %lx\n", regs->dar);
#ifdef CONFIG_PCI
/* the qspan pci read routines can cause machine checks -- Cort
config KVM_GUEST
bool "KVM Guest support"
- default n
select EPAPR_PARAVIRT
---help---
This option enables various optimizations for running under the KVM
config EPAPR_PARAVIRT
bool "ePAPR para-virtualization support"
- default n
help
Enables ePAPR para-virtualization support for guests.
config UDBG_RTAS_CONSOLE
bool "RTAS based debug console"
depends on PPC_RTAS
- default n
config PPC_SMP_MUXED_IPI
bool
config IPIC
bool
- default n
config MPIC
bool
- default n
config MPIC_TIMER
bool "MPIC Global Timer"
depends on MPIC && FSL_SOC
- default n
help
The MPIC global timer is a hardware timer inside the
Freescale PIC complying with OpenPIC standard. When the
config FSL_MPIC_TIMER_WAKEUP
tristate "Freescale MPIC global timer wakeup driver"
depends on FSL_SOC && MPIC_TIMER && PM
- default n
help
The driver provides a way to wake up the system by MPIC
timer.
config PPC_EPAPR_HV_PIC
bool
- default n
select EPAPR_PARAVIRT
config MPIC_WEIRD
bool
- default n
config MPIC_MSGR
bool "MPIC message register support"
depends on MPIC
- default n
help
Enables support for the MPIC message registers. These
registers are used for inter-processor communication.
config PPC_I8259
bool
- default n
config U3_DART
bool
depends on PPC64
- default n
config PPC_RTAS
bool
- default n
config RTAS_ERROR_LOGGING
bool
depends on PPC_RTAS
- default n
config PPC_RTAS_DAEMON
bool
depends on PPC_RTAS
- default n
config RTAS_PROC
bool "Proc interface to RTAS"
config MMIO_NVRAM
bool
- default n
config MPIC_U3_HT_IRQS
bool
- default n
config MPIC_BROKEN_REGREAD
bool
config PPC_MPC106
bool
- default n
config PPC_970_NAP
bool
- default n
config PPC_P7_NAP
bool
- default n
config PPC_INDIRECT_PIO
bool
config FSL_ULI1575
bool
- default n
select GENERIC_ISA_DMA
help
Supports for the ULI1575 PCIe south bridge that exists on some
# SPDX-License-Identifier: GPL-2.0
config PPC64
bool "64-bit kernel"
- default n
select ZLIB_DEFLATE
help
This option selects whether a 32-bit or a 64-bit kernel
select PPC_HAVE_PMU_SUPPORT
select SYS_SUPPORTS_HUGETLBFS
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
select ARCH_SUPPORTS_NUMA_BALANCING
select IRQ_WORK
bool
default y if PPC_BOOK3S_64
default y if PPC_8xx && HUGETLB_PAGE
- default n
config PPC_HAVE_PMU_SUPPORT
bool
config FORCE_SMP
# Allow platforms to force SMP=y by selecting this
bool
- default n
select SMP
config SMP
config PPC_DOORBELL
bool
- default n
endmenu
# SPDX-License-Identifier: GPL-2.0
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
-
obj-$(CONFIG_FSL_ULI1575) += fsl_uli1575.o
obj-$(CONFIG_PPC_PMAC) += powermac/
# SPDX-License-Identifier: GPL-2.0
config PPC_CELL
bool
- default n
config PPC_CELL_COMMON
bool
select IBM_EMAC_RGMII if IBM_EMAC
select IBM_EMAC_ZMII if IBM_EMAC #test only
select IBM_EMAC_TAH if IBM_EMAC #test only
- default n
config PPC_IBM_CELL_BLADE
bool "IBM Cell Blade"
config SPU_BASE
bool
- default n
select PPC_COPRO_BASE
config CBE_RAS
static int __init spu_map_interrupts(struct spu *spu, struct device_node *np)
{
- struct of_phandle_args oirq;
- int ret;
int i;
for (i=0; i < 3; i++) {
- ret = of_irq_parse_one(np, i, &oirq);
- if (ret) {
- pr_debug("spu_new: failed to get irq %d\n", i);
- goto err;
- }
- ret = -EINVAL;
- pr_debug(" irq %d no 0x%x on %pOF\n", i, oirq.args[0],
- oirq.np);
- spu->irqs[i] = irq_create_of_mapping(&oirq);
- if (!spu->irqs[i]) {
- pr_debug("spu_new: failed to map it !\n");
+ spu->irqs[i] = irq_of_parse_and_map(np, i);
+ if (!spu->irqs[i])
goto err;
- }
}
return 0;
err:
- pr_debug("failed to map irq %x for spu %s\n", *oirq.args,
- spu->name);
+ pr_debug("failed to map irq %x for spu %s\n", i, spu->name);
for (; i >= 0; i--) {
if (spu->irqs[i])
irq_dispose_mapping(spu->irqs[i]);
}
- return ret;
+ return -EINVAL;
}
static int spu_map_resource(struct spu *spu, int nr,
for_each_node_by_type(node, "spe") {
ret = fn(node);
if (ret) {
- printk(KERN_WARNING "%s: Error initializing %s\n",
- __func__, node->name);
+ printk(KERN_WARNING "%s: Error initializing %pOFn\n",
+ __func__, node);
of_node_put(node);
break;
}
}
error = of_address_to_resource(np, 0, &res);
if (error) {
- pr_err("no valid reg found for %s\n", np->name);
+ pr_err("no valid reg found for %pOFn\n", np);
goto out_put;
}
select PPC_RTAS
select MMIO_NVRAM
select ATA_NONSTANDARD if ATA
- default n
help
This option enables support for the Maple 970FX Evaluation Board.
For more information, refer to <http://www.970eval.com>
config PPC_PASEMI
depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN
bool "PA Semi SoC-based platforms"
- default n
select MPIC
select PCI
select PPC_UDBG_16550
res.start = 0xfd800000;
res.end = res.start + 0x1000;
}
- dma_status = __ioremap(res.start, resource_size(&res), 0);
+ dma_status = ioremap_cache(res.start, resource_size(&res));
pci_dev_put(iob_pdev);
for (i = 0; i < MAX_TXCH; i++)
# SPDX-License-Identifier: GPL-2.0
CFLAGS_bootx_init.o += -fPIC
+CFLAGS_bootx_init.o += $(call cc-option, -fno-stack-protector)
ifdef CONFIG_FUNCTION_TRACER
# Do not trace early boot code
-CFLAGS_REMOVE_bootx_init.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_bootx_init.o = $(CC_FLAGS_FTRACE)
endif
obj-y += pic.o setup.o time.o feature.o pci.o \
unsigned long flags;
struct macio_chip *macio;
struct device_node *np;
- struct device_node *cpus;
const int dflt_reset_lines[] = { KL_GPIO_RESET_CPU0,
KL_GPIO_RESET_CPU1,
KL_GPIO_RESET_CPU2,
if (macio->type != macio_keylargo)
return -ENODEV;
- cpus = of_find_node_by_path("/cpus");
- if (cpus == NULL)
- return -ENODEV;
- for (np = cpus->child; np != NULL; np = np->sibling) {
+ for_each_of_cpu_node(np) {
const u32 *num = of_get_property(np, "reg", NULL);
const u32 *rst = of_get_property(np, "soft-reset", NULL);
if (num == NULL || rst == NULL)
break;
}
}
- of_node_put(cpus);
if (np == NULL || reset_io == 0)
reset_io = dflt_reset_lines[param];
unsigned long flags;
struct macio_chip *macio;
struct device_node *np;
- struct device_node *cpus;
macio = &macio_chips[0];
if (macio->type != macio_keylargo2 && macio->type != macio_shasta)
return -ENODEV;
- cpus = of_find_node_by_path("/cpus");
- if (cpus == NULL)
- return -ENODEV;
- for (np = cpus->child; np != NULL; np = np->sibling) {
+ for_each_of_cpu_node(np) {
const u32 *num = of_get_property(np, "reg", NULL);
const u32 *rst = of_get_property(np, "soft-reset", NULL);
if (num == NULL || rst == NULL)
break;
}
}
- of_node_put(cpus);
if (np == NULL || reset_io == 0)
return -ENODEV;
* supposed to be set when not supported, but I'm not very confident
* that all Apple OF revs did it properly, I do it the paranoid way.
*/
- while (uninorth_base && uninorth_rev > 3) {
- struct device_node *cpus = of_find_node_by_path("/cpus");
+ if (uninorth_base && uninorth_rev > 3) {
struct device_node *np;
- if (!cpus || !cpus->child) {
- printk(KERN_WARNING "Can't find CPU(s) in device tree !\n");
- of_node_put(cpus);
- break;
- }
- np = cpus->child;
- /* Nap mode not supported on SMP */
- if (np->sibling) {
- of_node_put(cpus);
- break;
- }
- /* Nap mode not supported if flush-on-lock property is present */
- if (of_get_property(np, "flush-on-lock", NULL)) {
- of_node_put(cpus);
- break;
+ for_each_of_cpu_node(np) {
+ int cpu_count = 1;
+
+ /* Nap mode not supported on SMP */
+ if (of_get_property(np, "flush-on-lock", NULL) ||
+ (cpu_count > 1)) {
+ powersave_nap = 0;
+ of_node_put(np);
+ break;
+ }
+
+ cpu_count++;
+ powersave_nap = 1;
}
- of_node_put(cpus);
- powersave_nap = 1;
- printk(KERN_DEBUG "Processor NAP mode on idle enabled.\n");
- break;
}
+ if (powersave_nap)
+ printk(KERN_DEBUG "Processor NAP mode on idle enabled.\n");
/* On CPUs that support it (750FX), lowspeed by default during
* NAP mode
{
/* Checks "l2cr-value" property in the registry */
if (cpu_has_feature(CPU_FTR_L2CR)) {
- struct device_node *np = of_find_node_by_name(NULL, "cpus");
- if (!np)
- np = of_find_node_by_type(NULL, "cpu");
- if (np) {
+ struct device_node *np;
+
+ for_each_of_cpu_node(np) {
const unsigned int *l2cr =
of_get_property(np, "l2cr-value", NULL);
if (l2cr) {
_set_L2CR(ppc_override_l2cr_value);
}
of_node_put(np);
+ break;
}
}
/* Set loops_per_jiffy to a half-way reasonable value,
for use until calibrate_delay gets called. */
loops_per_jiffy = 50000000 / HZ;
- cpu = of_find_node_by_type(NULL, "cpu");
- if (cpu != NULL) {
+
+ for_each_of_cpu_node(cpu) {
fp = of_get_property(cpu, "clock-frequency", NULL);
if (fp != NULL) {
if (pvr >= 0x30 && pvr < 0x80)
else
/* 601, 603, etc. */
loops_per_jiffy = *fp / (2 * HZ);
+ of_node_put(cpu);
+ break;
}
- of_node_put(cpu);
}
/* See if newworld or oldworld */
#define DBG(x...)
#endif
-/*
- * Offset between Unix time (1970-based) and Mac time (1904-based). Cuda and PMU
- * times wrap in 2040. If we need to handle later times, the read_time functions
- * need to be changed to interpret wrapped times as post-2040.
- */
-#define RTC_OFFSET 2082844800
-
/*
* Calibrate the decrementer frequency with the VIA timer 1.
*/
return delta;
}
-#ifdef CONFIG_ADB_CUDA
-static time64_t cuda_get_time(void)
-{
- struct adb_request req;
- time64_t now;
-
- if (cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_GET_TIME) < 0)
- return 0;
- while (!req.complete)
- cuda_poll();
- if (req.reply_len != 7)
- printk(KERN_ERR "cuda_get_time: got %d byte reply\n",
- req.reply_len);
- now = (u32)((req.reply[3] << 24) + (req.reply[4] << 16) +
- (req.reply[5] << 8) + req.reply[6]);
- /* it's either after year 2040, or the RTC has gone backwards */
- WARN_ON(now < RTC_OFFSET);
-
- return now - RTC_OFFSET;
-}
-
-#define cuda_get_rtc_time(tm) rtc_time64_to_tm(cuda_get_time(), (tm))
-
-static int cuda_set_rtc_time(struct rtc_time *tm)
-{
- u32 nowtime;
- struct adb_request req;
-
- nowtime = lower_32_bits(rtc_tm_to_time64(tm) + RTC_OFFSET);
- if (cuda_request(&req, NULL, 6, CUDA_PACKET, CUDA_SET_TIME,
- nowtime >> 24, nowtime >> 16, nowtime >> 8,
- nowtime) < 0)
- return -ENXIO;
- while (!req.complete)
- cuda_poll();
- if ((req.reply_len != 3) && (req.reply_len != 7))
- printk(KERN_ERR "cuda_set_rtc_time: got %d byte reply\n",
- req.reply_len);
- return 0;
-}
-
-#else
-#define cuda_get_time() 0
-#define cuda_get_rtc_time(tm)
-#define cuda_set_rtc_time(tm) 0
-#endif
-
-#ifdef CONFIG_ADB_PMU
-static time64_t pmu_get_time(void)
-{
- struct adb_request req;
- time64_t now;
-
- if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0)
- return 0;
- pmu_wait_complete(&req);
- if (req.reply_len != 4)
- printk(KERN_ERR "pmu_get_time: got %d byte reply from PMU\n",
- req.reply_len);
- now = (u32)((req.reply[0] << 24) + (req.reply[1] << 16) +
- (req.reply[2] << 8) + req.reply[3]);
-
- /* it's either after year 2040, or the RTC has gone backwards */
- WARN_ON(now < RTC_OFFSET);
-
- return now - RTC_OFFSET;
-}
-
-#define pmu_get_rtc_time(tm) rtc_time64_to_tm(pmu_get_time(), (tm))
-
-static int pmu_set_rtc_time(struct rtc_time *tm)
-{
- u32 nowtime;
- struct adb_request req;
-
- nowtime = lower_32_bits(rtc_tm_to_time64(tm) + RTC_OFFSET);
- if (pmu_request(&req, NULL, 5, PMU_SET_RTC, nowtime >> 24,
- nowtime >> 16, nowtime >> 8, nowtime) < 0)
- return -ENXIO;
- pmu_wait_complete(&req);
- if (req.reply_len != 0)
- printk(KERN_ERR "pmu_set_rtc_time: %d byte reply from PMU\n",
- req.reply_len);
- return 0;
-}
-
-#else
-#define pmu_get_time() 0
-#define pmu_get_rtc_time(tm)
-#define pmu_set_rtc_time(tm) 0
-#endif
-
#ifdef CONFIG_PMAC_SMU
static time64_t smu_get_time(void)
{
return 0;
return rtc_tm_to_time64(&tm);
}
-
-#else
-#define smu_get_time() 0
-#define smu_get_rtc_time(tm, spin)
-#define smu_set_rtc_time(tm, spin) 0
#endif
/* Can't be __init, it's called when suspending and resuming */
{
/* Get the time from the RTC, used only at boot time */
switch (sys_ctrler) {
+#ifdef CONFIG_ADB_CUDA
case SYS_CTRLER_CUDA:
return cuda_get_time();
+#endif
+#ifdef CONFIG_ADB_PMU
case SYS_CTRLER_PMU:
return pmu_get_time();
+#endif
+#ifdef CONFIG_PMAC_SMU
case SYS_CTRLER_SMU:
return smu_get_time();
+#endif
default:
return 0;
}
{
/* Get the time from the RTC, used only at boot time */
switch (sys_ctrler) {
+#ifdef CONFIG_ADB_CUDA
case SYS_CTRLER_CUDA:
- cuda_get_rtc_time(tm);
+ rtc_time64_to_tm(cuda_get_time(), tm);
break;
+#endif
+#ifdef CONFIG_ADB_PMU
case SYS_CTRLER_PMU:
- pmu_get_rtc_time(tm);
+ rtc_time64_to_tm(pmu_get_time(), tm);
break;
+#endif
+#ifdef CONFIG_PMAC_SMU
case SYS_CTRLER_SMU:
smu_get_rtc_time(tm, 1);
break;
+#endif
default:
;
}
int pmac_set_rtc_time(struct rtc_time *tm)
{
switch (sys_ctrler) {
+#ifdef CONFIG_ADB_CUDA
case SYS_CTRLER_CUDA:
return cuda_set_rtc_time(tm);
+#endif
+#ifdef CONFIG_ADB_PMU
case SYS_CTRLER_PMU:
return pmu_set_rtc_time(tm);
+#endif
+#ifdef CONFIG_PMAC_SMU
case SYS_CTRLER_SMU:
return smu_set_rtc_time(tm, 1);
+#endif
default:
return -ENODEV;
}
select PPC_SCOM
select ARCH_RANDOM
select CPU_FREQ
- select CPU_FREQ_GOV_PERFORMANCE
- select CPU_FREQ_GOV_POWERSAVE
- select CPU_FREQ_GOV_USERSPACE
- select CPU_FREQ_GOV_ONDEMAND
- select CPU_FREQ_GOV_CONSERVATIVE
select PPC_DOORBELL
select MMU_NOTIFIER
select FORCE_SMP
config PPC_MEMTRACE
bool "Enable removal of RAM from kernel mappings for tracing"
depends on PPC_POWERNV && MEMORY_HOTREMOVE
- default n
help
Enabling this option allows for the removal of memory (RAM)
from the kernel mappings to be used for hardware tracing.
eeh_probe_devices();
eeh_addr_cache_build();
- if (eeh_has_flag(EEH_POSTPONED_PROBE)) {
- eeh_clear_flag(EEH_POSTPONED_PROBE);
- if (eeh_enabled())
- pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n");
- else
- pr_info("EEH: No capable adapters found\n");
- }
-
/* Register OPAL event notifier */
eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR));
if (eeh_event_irq < 0) {
if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA)
return NULL;
- /* Skip if we haven't probed yet */
- if (phb->ioda.pe_rmap[config_addr] == IODA_INVALID_PE) {
- eeh_add_flag(EEH_POSTPONED_PROBE);
- return NULL;
- }
-
/* Initialize eeh device */
edev->class_code = pdn->class_code;
edev->mode &= 0xFFFFFF00;
EEH_STATE_MMIO_ENABLED |
EEH_STATE_DMA_ENABLED);
} else if (!(pe->state & EEH_PE_ISOLATED)) {
- eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+ eeh_pe_mark_isolated(pe);
pnv_eeh_get_phb_diag(pe);
if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
if (phb->freeze_pe)
phb->freeze_pe(phb, pe->addr);
- eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+ eeh_pe_mark_isolated(pe);
pnv_eeh_get_phb_diag(pe);
if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
int ret;
/* The VF PE should have only one child device */
- edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, list);
+ edev = list_first_entry_or_null(&pe->edevs, struct eeh_dev, entry);
pdn = eeh_dev_to_pdn(edev);
if (!pdn)
return -ENXIO;
return pnv_eeh_bridge_reset(bus->self, option);
}
-/**
- * pnv_eeh_wait_state - Wait for PE state
- * @pe: EEH PE
- * @max_wait: maximal period in millisecond
- *
- * Wait for the state of associated PE. It might take some time
- * to retrieve the PE's state.
- */
-static int pnv_eeh_wait_state(struct eeh_pe *pe, int max_wait)
-{
- int ret;
- int mwait;
-
- while (1) {
- ret = pnv_eeh_get_state(pe, &mwait);
-
- /*
- * If the PE's state is temporarily unavailable,
- * we have to wait for the specified time. Otherwise,
- * the PE's state will be returned immediately.
- */
- if (ret != EEH_STATE_UNAVAILABLE)
- return ret;
-
- if (max_wait <= 0) {
- pr_warn("%s: Timeout getting PE#%x's state (%d)\n",
- __func__, pe->addr, max_wait);
- return EEH_STATE_NOT_SUPPORT;
- }
-
- max_wait -= mwait;
- msleep(mwait);
- }
-
- return EEH_STATE_NOT_SUPPORT;
-}
-
/**
* pnv_eeh_get_log - Retrieve error log
* @pe: EEH PE
if ((ret == EEH_NEXT_ERR_FROZEN_PE ||
ret == EEH_NEXT_ERR_FENCED_PHB) &&
!((*pe)->state & EEH_PE_ISOLATED)) {
- eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
+ eeh_pe_mark_isolated(*pe);
pnv_eeh_get_phb_diag(*pe);
if (eeh_has_flag(EEH_EARLY_DUMP_LOG))
}
/* We possibly migrate to another PE */
- eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
+ eeh_pe_mark_isolated(*pe);
}
/*
.get_pe_addr = pnv_eeh_get_pe_addr,
.get_state = pnv_eeh_get_state,
.reset = pnv_eeh_reset,
- .wait_state = pnv_eeh_wait_state,
.get_log = pnv_eeh_get_log,
.configure_bridge = pnv_eeh_configure_bridge,
.err_inject = pnv_eeh_err_inject,
walk_memory_range(start_pfn, end_pfn, (void *)MEM_OFFLINE,
change_memblock_state);
- lock_device_hotplug();
- remove_memory(nid, start_pfn << PAGE_SHIFT, nr_pages << PAGE_SHIFT);
- unlock_device_hotplug();
return true;
}
static u64 memtrace_alloc_node(u32 nid, u64 size)
{
- u64 start_pfn, end_pfn, nr_pages;
+ u64 start_pfn, end_pfn, nr_pages, pfn;
u64 base_pfn;
+ u64 bytes = memory_block_size_bytes();
if (!node_spanned_pages(nid))
return 0;
end_pfn = round_down(end_pfn - nr_pages, nr_pages);
for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) {
- if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true)
+ if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) {
+ /*
+ * Remove memory in memory block size chunks so that
+ * iomem resources are always split to the same size and
+ * we never try to remove memory that spans two iomem
+ * resources.
+ */
+ lock_device_hotplug();
+ end_pfn = base_pfn + nr_pages;
+ for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) {
+ remove_memory(nid, pfn << PAGE_SHIFT, bytes);
+ }
+ unlock_device_hotplug();
return base_pfn << PAGE_SHIFT;
+ }
}
return 0;
#include <linux/pci.h>
#include <linux/memblock.h>
#include <linux/iommu.h>
-#include <linux/debugfs.h>
+#include <linux/sizes.h>
#include <asm/debugfs.h>
#include <asm/tlb.h>
*/
static DEFINE_SPINLOCK(npu_context_lock);
-/*
- * When an address shootdown range exceeds this threshold we invalidate the
- * entire TLB on the GPU for the given PID rather than each specific address in
- * the range.
- */
-static uint64_t atsd_threshold = 2 * 1024 * 1024;
-static struct dentry *atsd_threshold_dentry;
-
/*
* Other types of TCE cache invalidation are not functional in the
* hardware.
}
/* MMIO ATSD register offsets */
-#define XTS_ATSD_AVA 1
-#define XTS_ATSD_STAT 2
-
-static void mmio_launch_invalidate(struct mmio_atsd_reg *mmio_atsd_reg,
- unsigned long launch, unsigned long va)
-{
- struct npu *npu = mmio_atsd_reg->npu;
- int reg = mmio_atsd_reg->reg;
-
- __raw_writeq_be(va, npu->mmio_atsd_regs[reg] + XTS_ATSD_AVA);
- eieio();
- __raw_writeq_be(launch, npu->mmio_atsd_regs[reg]);
-}
+#define XTS_ATSD_LAUNCH 0
+#define XTS_ATSD_AVA 1
+#define XTS_ATSD_STAT 2
-static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
- unsigned long pid, bool flush)
+static unsigned long get_atsd_launch_val(unsigned long pid, unsigned long psize)
{
- int i;
- unsigned long launch;
-
- for (i = 0; i <= max_npu2_index; i++) {
- if (mmio_atsd_reg[i].reg < 0)
- continue;
+ unsigned long launch = 0;
- /* IS set to invalidate matching PID */
- launch = PPC_BIT(12);
-
- /* PRS set to process-scoped */
- launch |= PPC_BIT(13);
+ if (psize == MMU_PAGE_COUNT) {
+ /* IS set to invalidate entire matching PID */
+ launch |= PPC_BIT(12);
+ } else {
+ /* AP set to invalidate region of psize */
+ launch |= (u64)mmu_get_ap(psize) << PPC_BITLSHIFT(17);
+ }
- /* AP */
- launch |= (u64)
- mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
+ /* PRS set to process-scoped */
+ launch |= PPC_BIT(13);
- /* PID */
- launch |= pid << PPC_BITLSHIFT(38);
+ /* PID */
+ launch |= pid << PPC_BITLSHIFT(38);
- /* No flush */
- launch |= !flush << PPC_BITLSHIFT(39);
+ /* Leave "No flush" (bit 39) 0 so every ATSD performs a flush */
- /* Invalidating the entire process doesn't use a va */
- mmio_launch_invalidate(&mmio_atsd_reg[i], launch, 0);
- }
+ return launch;
}
-static void mmio_invalidate_va(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
- unsigned long va, unsigned long pid, bool flush)
+static void mmio_atsd_regs_write(struct mmio_atsd_reg
+ mmio_atsd_reg[NV_MAX_NPUS], unsigned long offset,
+ unsigned long val)
{
- int i;
- unsigned long launch;
+ struct npu *npu;
+ int i, reg;
for (i = 0; i <= max_npu2_index; i++) {
- if (mmio_atsd_reg[i].reg < 0)
+ reg = mmio_atsd_reg[i].reg;
+ if (reg < 0)
continue;
- /* IS set to invalidate target VA */
- launch = 0;
+ npu = mmio_atsd_reg[i].npu;
+ __raw_writeq_be(val, npu->mmio_atsd_regs[reg] + offset);
+ }
+}
+
+static void mmio_invalidate_pid(struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS],
+ unsigned long pid)
+{
+ unsigned long launch = get_atsd_launch_val(pid, MMU_PAGE_COUNT);
- /* PRS set to process scoped */
- launch |= PPC_BIT(13);
+ /* Invalidating the entire process doesn't use a va */
+ mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_LAUNCH, launch);
+}
- /* AP */
- launch |= (u64)
- mmu_get_ap(mmu_virtual_psize) << PPC_BITLSHIFT(17);
+static void mmio_invalidate_range(struct mmio_atsd_reg
+ mmio_atsd_reg[NV_MAX_NPUS], unsigned long pid,
+ unsigned long start, unsigned long psize)
+{
+ unsigned long launch = get_atsd_launch_val(pid, psize);
- /* PID */
- launch |= pid << PPC_BITLSHIFT(38);
+ /* Write all VAs first */
+ mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_AVA, start);
- /* No flush */
- launch |= !flush << PPC_BITLSHIFT(39);
+ /* Issue one barrier for all address writes */
+ eieio();
- mmio_launch_invalidate(&mmio_atsd_reg[i], launch, va);
- }
+ /* Launch */
+ mmio_atsd_regs_write(mmio_atsd_reg, XTS_ATSD_LAUNCH, launch);
}
#define mn_to_npu_context(x) container_of(x, struct npu_context, mn)
}
/*
- * Invalidate either a single address or an entire PID depending on
- * the value of va.
+ * Invalidate a virtual address range
*/
-static void mmio_invalidate(struct npu_context *npu_context, int va,
- unsigned long address, bool flush)
+static void mmio_invalidate(struct npu_context *npu_context,
+ unsigned long start, unsigned long size)
{
struct mmio_atsd_reg mmio_atsd_reg[NV_MAX_NPUS];
unsigned long pid = npu_context->mm->context.id;
+ unsigned long atsd_start = 0;
+ unsigned long end = start + size - 1;
+ int atsd_psize = MMU_PAGE_COUNT;
+
+ /*
+ * Convert the input range into one of the supported sizes. If the range
+ * doesn't fit, use the next larger supported size. Invalidation latency
+ * is high, so over-invalidation is preferred to issuing multiple
+ * invalidates.
+ *
+ * A 4K page size isn't supported by NPU/GPU ATS, so that case is
+ * ignored.
+ */
+ if (size == SZ_64K) {
+ atsd_start = start;
+ atsd_psize = MMU_PAGE_64K;
+ } else if (ALIGN_DOWN(start, SZ_2M) == ALIGN_DOWN(end, SZ_2M)) {
+ atsd_start = ALIGN_DOWN(start, SZ_2M);
+ atsd_psize = MMU_PAGE_2M;
+ } else if (ALIGN_DOWN(start, SZ_1G) == ALIGN_DOWN(end, SZ_1G)) {
+ atsd_start = ALIGN_DOWN(start, SZ_1G);
+ atsd_psize = MMU_PAGE_1G;
+ }
if (npu_context->nmmu_flush)
/*
* an invalidate.
*/
acquire_atsd_reg(npu_context, mmio_atsd_reg);
- if (va)
- mmio_invalidate_va(mmio_atsd_reg, address, pid, flush);
+
+ if (atsd_psize == MMU_PAGE_COUNT)
+ mmio_invalidate_pid(mmio_atsd_reg, pid);
else
- mmio_invalidate_pid(mmio_atsd_reg, pid, flush);
+ mmio_invalidate_range(mmio_atsd_reg, pid, atsd_start,
+ atsd_psize);
mmio_invalidate_wait(mmio_atsd_reg);
- if (flush) {
- /*
- * The GPU requires two flush ATSDs to ensure all entries have
- * been flushed. We use PID 0 as it will never be used for a
- * process on the GPU.
- */
- mmio_invalidate_pid(mmio_atsd_reg, 0, true);
- mmio_invalidate_wait(mmio_atsd_reg);
- mmio_invalidate_pid(mmio_atsd_reg, 0, true);
- mmio_invalidate_wait(mmio_atsd_reg);
- }
+
+ /*
+ * The GPU requires two flush ATSDs to ensure all entries have been
+ * flushed. We use PID 0 as it will never be used for a process on the
+ * GPU.
+ */
+ mmio_invalidate_pid(mmio_atsd_reg, 0);
+ mmio_invalidate_wait(mmio_atsd_reg);
+ mmio_invalidate_pid(mmio_atsd_reg, 0);
+ mmio_invalidate_wait(mmio_atsd_reg);
+
release_atsd_reg(mmio_atsd_reg);
}
* There should be no more translation requests for this PID, but we
* need to ensure any entries for it are removed from the TLB.
*/
- mmio_invalidate(npu_context, 0, 0, true);
+ mmio_invalidate(npu_context, 0, ~0UL);
}
static void pnv_npu2_mn_change_pte(struct mmu_notifier *mn,
pte_t pte)
{
struct npu_context *npu_context = mn_to_npu_context(mn);
-
- mmio_invalidate(npu_context, 1, address, true);
+ mmio_invalidate(npu_context, address, PAGE_SIZE);
}
static void pnv_npu2_mn_invalidate_range(struct mmu_notifier *mn,
unsigned long start, unsigned long end)
{
struct npu_context *npu_context = mn_to_npu_context(mn);
- unsigned long address;
-
- if (end - start > atsd_threshold) {
- /*
- * Just invalidate the entire PID if the address range is too
- * large.
- */
- mmio_invalidate(npu_context, 0, 0, true);
- } else {
- for (address = start; address < end; address += PAGE_SIZE)
- mmio_invalidate(npu_context, 1, address, false);
-
- /* Do the flush only on the final addess == end */
- mmio_invalidate(npu_context, 1, address, true);
- }
+ mmio_invalidate(npu_context, start, end - start);
}
static const struct mmu_notifier_ops nv_nmmu_notifier_ops = {
static int npu_index;
uint64_t rc = 0;
- if (!atsd_threshold_dentry) {
- atsd_threshold_dentry = debugfs_create_x64("atsd_threshold",
- 0600, powerpc_debugfs_root, &atsd_threshold);
- }
-
phb->npu.nmmu_flush =
of_property_read_bool(phb->hose->dn, "ibm,nmmu-flush");
for_each_child_of_node(phb->hose->dn, dn) {
}
j = 0;
- pcaps[i].pg.name = node->name;
+ pcaps[i].pg.name = kasprintf(GFP_KERNEL, "%pOFn", node);
if (has_min) {
powercap_add_attr(min, "powercap-min",
&pcaps[i].pattrs[j]);
while (--i >= 0) {
kfree(pcaps[i].pattrs);
kfree(pcaps[i].pg.attrs);
+ kfree(pcaps[i].pg.name);
}
kobject_put(powercap_kobj);
out_pcaps:
}
if (!of_property_read_u32(node, "ibm,chip-id", &chipid))
- sprintf(sgs[i].name, "%s%d", node->name, chipid);
+ sprintf(sgs[i].name, "%pOFn%d", node, chipid);
else
- sprintf(sgs[i].name, "%s", node->name);
+ sprintf(sgs[i].name, "%pOFn", node);
sgs[i].sg.name = sgs[i].name;
if (add_attr_group(ops, len, &sgs[i], sgid)) {
count = of_property_count_strings(sysparam, "param-name");
if (count < 0) {
pr_err("SYSPARAM: No string found of property param-name in "
- "the node %s\n", sysparam->name);
+ "the node %pOFn\n", sysparam);
goto out_param_buf;
}
return recovered;
}
-void pnv_platform_error_reboot(struct pt_regs *regs, const char *msg)
+void __noreturn pnv_platform_error_reboot(struct pt_regs *regs, const char *msg)
{
panic_flush_kmsg_start();
static void __noreturn pnv_restart(char *cmd)
{
- long rc = OPAL_BUSY;
+ long rc;
pnv_prepare_going_down();
- while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
- rc = opal_cec_reboot();
- if (rc == OPAL_BUSY_EVENT)
- opal_poll_events(NULL);
+ do {
+ if (!cmd)
+ rc = opal_cec_reboot();
+ else if (strcmp(cmd, "full") == 0)
+ rc = opal_cec_reboot2(OPAL_REBOOT_FULL_IPL, NULL);
else
+ rc = OPAL_UNSUPPORTED;
+
+ if (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) {
+ /* Opal is busy wait for some time and retry */
+ opal_poll_events(NULL);
mdelay(10);
- }
+
+ } else if (cmd && rc) {
+ /* Unknown error while issuing reboot */
+ if (rc == OPAL_UNSUPPORTED)
+ pr_err("Unsupported '%s' reboot.\n", cmd);
+ else
+ pr_err("Unable to issue '%s' reboot. Err=%ld\n",
+ cmd, rc);
+ pr_info("Forcing a cec-reboot\n");
+ cmd = NULL;
+ rc = OPAL_BUSY;
+
+ } else if (rc != OPAL_SUCCESS) {
+ /* Unknown error while issuing cec-reboot */
+ pr_err("Unable to reboot. Err=%ld\n", rc);
+ }
+
+ } while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT);
+
for (;;)
opal_poll_events(NULL);
}
return ret_freq;
}
+static long pnv_machine_check_early(struct pt_regs *regs)
+{
+ long handled = 0;
+
+ if (cur_cpu_spec && cur_cpu_spec->machine_check_early)
+ handled = cur_cpu_spec->machine_check_early(regs);
+
+ return handled;
+}
+
define_machine(powernv) {
.name = "PowerNV",
.probe = pnv_probe,
.machine_shutdown = pnv_shutdown,
.power_save = NULL,
.calibrate_decr = generic_calibrate_decr,
+ .machine_check_early = pnv_machine_check_early,
#ifdef CONFIG_KEXEC_CORE
.kexec_cpu_down = pnv_kexec_cpu_down,
#endif
config PS3_DYNAMIC_DMA
depends on PPC_PS3
bool "PS3 Platform dynamic DMA page table management"
- default n
help
This option will enable kernel support to take advantage of the
per device dynamic DMA page table management provided by the Cell
config PS3_REPOSITORY_WRITE
bool "PS3 Repository write support" if PS3_ADVANCED
depends on PPC_PS3
- default n
help
Enables support for writing to the PS3 System Repository.
db_set_64(db, &os_area_db_id_rtc_diff, saved_params.rtc_diff);
count = os_area_flash_write(db, sizeof(struct os_area_db), pos);
- if (count < sizeof(struct os_area_db)) {
+ if (count < 0 || count < sizeof(struct os_area_db)) {
pr_debug("%s: os_area_flash_write failed %zd\n", __func__,
count);
error = count < 0 ? count : -EIO;
goto fail_ioremap;
}
- spu->local_store = (__force void *)ioremap_prot(spu->local_store_phys,
- LS_SIZE, pgprot_val(pgprot_noncached_wc(__pgprot(0))));
+ spu->local_store = (__force void *)ioremap_wc(spu->local_store_phys, LS_SIZE);
if (!spu->local_store) {
pr_debug("%s:%d: ioremap local_store failed\n",
config PPC_SPLPAR
depends on PPC_PSERIES
bool "Support for shared-processor logical partitions"
- default n
help
Enabling this option will make the kernel run more efficiently
on logically-partitioned pSeries systems which use shared
bool "Support for shared-memory logical partitions"
depends on PPC_PSERIES
select LPARCFG
- default n
help
Select this option to enable shared memory partition support.
With this option a system running in an LPAR can be given more
bool "Support for GX bus based adapters"
help
Bus device driver for GX bus based adapters.
+
+config PAPR_SCM
+ depends on PPC_PSERIES && MEMORY_HOTPLUG
+ select LIBNVDIMM
+ tristate "Support for the PAPR Storage Class Memory interface"
+ help
+ Enable access to hypervisor provided storage class memory.
obj-$(CONFIG_PSERIES_ENERGY) += pseries_energy.o
obj-$(CONFIG_HOTPLUG_CPU) += hotplug-cpu.o
-obj-$(CONFIG_MEMORY_HOTPLUG) += hotplug-memory.o
+obj-$(CONFIG_MEMORY_HOTPLUG) += hotplug-memory.o pmem.o
obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o
obj-$(CONFIG_HVCS) += hvcserver.o
obj-$(CONFIG_LPARCFG) += lparcfg.o
obj-$(CONFIG_IBMVIO) += vio.o
obj-$(CONFIG_IBMEBUS) += ibmebus.o
+obj-$(CONFIG_PAPR_SCM) += papr_scm.o
ifdef CONFIG_PPC_PSERIES
obj-$(CONFIG_SUSPEND) += suspend.o
struct pseries_hp_work {
struct work_struct work;
struct pseries_hp_errorlog *errlog;
- struct completion *hp_completion;
- int *rc;
};
struct cc_workarea {
return 0;
}
-static int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
+int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_elog)
{
int rc;
case PSERIES_HP_ELOG_RESOURCE_CPU:
rc = dlpar_cpu(hp_elog);
break;
+ case PSERIES_HP_ELOG_RESOURCE_PMEM:
+ rc = dlpar_hp_pmem(hp_elog);
+ break;
+
default:
pr_warn_ratelimited("Invalid resource (%d) specified\n",
hp_elog->resource);
struct pseries_hp_work *hp_work =
container_of(work, struct pseries_hp_work, work);
- if (hp_work->rc)
- *(hp_work->rc) = handle_dlpar_errorlog(hp_work->errlog);
- else
- handle_dlpar_errorlog(hp_work->errlog);
-
- if (hp_work->hp_completion)
- complete(hp_work->hp_completion);
+ handle_dlpar_errorlog(hp_work->errlog);
kfree(hp_work->errlog);
kfree((void *)work);
}
-void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog,
- struct completion *hotplug_done, int *rc)
+void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog)
{
struct pseries_hp_work *work;
struct pseries_hp_errorlog *hp_errlog_copy;
if (work) {
INIT_WORK((struct work_struct *)work, pseries_hp_work_fn);
work->errlog = hp_errlog_copy;
- work->hp_completion = hotplug_done;
- work->rc = rc;
queue_work(pseries_hp_wq, (struct work_struct *)work);
} else {
- *rc = -ENOMEM;
kfree(hp_errlog_copy);
- complete(hotplug_done);
}
}
static ssize_t dlpar_store(struct class *class, struct class_attribute *attr,
const char *buf, size_t count)
{
- struct pseries_hp_errorlog *hp_elog;
- struct completion hotplug_done;
+ struct pseries_hp_errorlog hp_elog;
char *argbuf;
char *args;
int rc;
args = argbuf = kstrdup(buf, GFP_KERNEL);
- hp_elog = kzalloc(sizeof(*hp_elog), GFP_KERNEL);
- if (!hp_elog || !argbuf) {
+ if (!argbuf) {
pr_info("Could not allocate resources for DLPAR operation\n");
kfree(argbuf);
- kfree(hp_elog);
return -ENOMEM;
}
* Parse out the request from the user, this will be in the form:
* <resource> <action> <id_type> <id>
*/
- rc = dlpar_parse_resource(&args, hp_elog);
+ rc = dlpar_parse_resource(&args, &hp_elog);
if (rc)
goto dlpar_store_out;
- rc = dlpar_parse_action(&args, hp_elog);
+ rc = dlpar_parse_action(&args, &hp_elog);
if (rc)
goto dlpar_store_out;
- rc = dlpar_parse_id_type(&args, hp_elog);
+ rc = dlpar_parse_id_type(&args, &hp_elog);
if (rc)
goto dlpar_store_out;
- init_completion(&hotplug_done);
- queue_hotplug_event(hp_elog, &hotplug_done, &rc);
- wait_for_completion(&hotplug_done);
+ rc = handle_dlpar_errorlog(&hp_elog);
dlpar_store_out:
kfree(argbuf);
- kfree(hp_elog);
if (rc)
pr_err("Could not handle DLPAR request \"%s\"\n", buf);
/* Register our dtl buffer with the hypervisor. The HV expects the
* buffer size to be passed in the second word of the buffer */
- ((u32 *)dtl->buf)[1] = DISPATCH_LOG_BYTES;
+ ((u32 *)dtl->buf)[1] = cpu_to_be32(DISPATCH_LOG_BYTES);
hwcpu = get_hard_smp_processor_id(dtl->cpu);
addr = __pa(dtl->buf);
static u64 dtl_current_index(struct dtl *dtl)
{
- return lppaca_of(dtl->cpu).dtl_idx;
+ return be64_to_cpu(lppaca_of(dtl->cpu).dtl_idx);
}
#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
/**
* pseries_eeh_get_state - Retrieve PE state
* @pe: EEH PE
- * @state: return value
+ * @delay: suggested time to wait if state is unavailable
*
* Retrieve the state of the specified PE. On RTAS compliant
* pseries platform, there already has one dedicated RTAS function
* RTAS calls for the purpose, we need to try the new one and back
* to the old one if the new one couldn't work properly.
*/
-static int pseries_eeh_get_state(struct eeh_pe *pe, int *state)
+static int pseries_eeh_get_state(struct eeh_pe *pe, int *delay)
{
int config_addr;
int ret;
break;
case 5:
if (rets[2]) {
- if (state) *state = rets[2];
+ if (delay)
+ *delay = rets[2];
result = EEH_STATE_UNAVAILABLE;
} else {
result = EEH_STATE_NOT_SUPPORT;
return ret;
}
-/**
- * pseries_eeh_wait_state - Wait for PE state
- * @pe: EEH PE
- * @max_wait: maximal period in millisecond
- *
- * Wait for the state of associated PE. It might take some time
- * to retrieve the PE's state.
- */
-static int pseries_eeh_wait_state(struct eeh_pe *pe, int max_wait)
-{
- int ret;
- int mwait;
-
- /*
- * According to PAPR, the state of PE might be temporarily
- * unavailable. Under the circumstance, we have to wait
- * for indicated time determined by firmware. The maximal
- * wait time is 5 minutes, which is acquired from the original
- * EEH implementation. Also, the original implementation
- * also defined the minimal wait time as 1 second.
- */
-#define EEH_STATE_MIN_WAIT_TIME (1000)
-#define EEH_STATE_MAX_WAIT_TIME (300 * 1000)
-
- while (1) {
- ret = pseries_eeh_get_state(pe, &mwait);
-
- /*
- * If the PE's state is temporarily unavailable,
- * we have to wait for the specified time. Otherwise,
- * the PE's state will be returned immediately.
- */
- if (ret != EEH_STATE_UNAVAILABLE)
- return ret;
-
- if (max_wait <= 0) {
- pr_warn("%s: Timeout when getting PE's state (%d)\n",
- __func__, max_wait);
- return EEH_STATE_NOT_SUPPORT;
- }
-
- if (mwait <= 0) {
- pr_warn("%s: Firmware returned bad wait value %d\n",
- __func__, mwait);
- mwait = EEH_STATE_MIN_WAIT_TIME;
- } else if (mwait > EEH_STATE_MAX_WAIT_TIME) {
- pr_warn("%s: Firmware returned too long wait value %d\n",
- __func__, mwait);
- mwait = EEH_STATE_MAX_WAIT_TIME;
- }
-
- max_wait -= mwait;
- msleep(mwait);
- }
-
- return EEH_STATE_NOT_SUPPORT;
-}
-
/**
* pseries_eeh_get_log - Retrieve error log
* @pe: EEH PE
.get_pe_addr = pseries_eeh_get_pe_addr,
.get_state = pseries_eeh_get_state,
.reset = pseries_eeh_reset,
- .wait_state = pseries_eeh_wait_state,
.get_log = pseries_eeh_get_log,
.configure_bridge = pseries_eeh_configure_bridge,
.err_inject = NULL,
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
-#include <asm/prom.h>
+#include <linux/interrupt.h>
+#include <linux/of_irq.h>
#include "pseries.h"
irq_handler_t handler,
const char *name)
{
- int i, index, count = 0;
- struct of_phandle_args oirq;
- unsigned int virqs[16];
+ int i, virq, rc;
- /* First try to do a proper OF tree parsing */
- for (index = 0; of_irq_parse_one(np, index, &oirq) == 0;
- index++) {
- if (count > 15)
- break;
- virqs[count] = irq_create_of_mapping(&oirq);
- if (!virqs[count]) {
- pr_err("event-sources: Unable to allocate "
- "interrupt number for %pOF\n",
- np);
- WARN_ON(1);
- } else {
- count++;
- }
- }
+ for (i = 0; i < 16; i++) {
+ virq = of_irq_get(np, i);
+ if (virq < 0)
+ return;
+ if (WARN(!virq, "event-sources: Unable to allocate "
+ "interrupt number for %pOF\n", np))
+ continue;
- /* Now request them */
- for (i = 0; i < count; i++) {
- if (request_irq(virqs[i], handler, 0, name, NULL)) {
- pr_err("event-sources: Unable to request interrupt "
- "%d for %pOF\n", virqs[i], np);
- WARN_ON(1);
+ rc = request_irq(virq, handler, 0, name, NULL);
+ if (WARN(rc, "event-sources: Unable to request interrupt %d for %pOF\n",
+ virq, np))
return;
- }
}
}
-
{FW_FEATURE_SET_MODE, "hcall-set-mode"},
{FW_FEATURE_BEST_ENERGY, "hcall-best-energy-1*"},
{FW_FEATURE_HPT_RESIZE, "hcall-hpt-resize"},
+ {FW_FEATURE_BLOCK_REMOVE, "hcall-block-remove"},
+ {FW_FEATURE_PAPR_SCM, "hcall-scm"},
};
/* Build up the firmware features bitmask using the contents of
if (cpumask_empty(tmp)) {
printk(KERN_ERR "Unable to find space in cpu_present_mask for"
- " processor %s with %d thread(s)\n", np->name,
+ " processor %pOFn with %d thread(s)\n", np,
nthreads);
goto out_unlock;
}
if (rc) {
saved_rc = rc;
- pr_warn("Failed to attach node %s, rc: %d, drc index: %x\n",
- dn->name, rc, drc_index);
+ pr_warn("Failed to attach node %pOFn, rc: %d, drc index: %x\n",
+ dn, rc, drc_index);
rc = dlpar_release_drc(drc_index);
if (!rc)
rc = dlpar_online_cpu(dn);
if (rc) {
saved_rc = rc;
- pr_warn("Failed to online cpu %s, rc: %d, drc index: %x\n",
- dn->name, rc, drc_index);
+ pr_warn("Failed to online cpu %pOFn, rc: %d, drc index: %x\n",
+ dn, rc, drc_index);
rc = dlpar_detach_node(dn);
if (!rc)
return saved_rc;
}
- pr_debug("Successfully added CPU %s, drc index: %x\n", dn->name,
+ pr_debug("Successfully added CPU %pOFn, drc index: %x\n", dn,
drc_index);
return rc;
}
{
int rc;
- pr_debug("Attempting to remove CPU %s, drc index: %x\n",
- dn->name, drc_index);
+ pr_debug("Attempting to remove CPU %pOFn, drc index: %x\n",
+ dn, drc_index);
rc = dlpar_offline_cpu(dn);
if (rc) {
- pr_warn("Failed to offline CPU %s, rc: %d\n", dn->name, rc);
+ pr_warn("Failed to offline CPU %pOFn, rc: %d\n", dn, rc);
return -EINVAL;
}
rc = dlpar_release_drc(drc_index);
if (rc) {
- pr_warn("Failed to release drc (%x) for CPU %s, rc: %d\n",
- drc_index, dn->name, rc);
+ pr_warn("Failed to release drc (%x) for CPU %pOFn, rc: %d\n",
+ drc_index, dn, rc);
dlpar_online_cpu(dn);
return rc;
}
if (rc) {
int saved_rc = rc;
- pr_warn("Failed to detach CPU %s, rc: %d", dn->name, rc);
+ pr_warn("Failed to detach CPU %pOFn, rc: %d", dn, rc);
rc = dlpar_acquire_drc(drc_index);
if (!rc)
rc = of_property_read_u32(dn, "ibm,my-drc-index",
&cpu_drcs[cpus_found - 1]);
if (rc) {
- pr_warn("Error occurred getting drc-index for %s\n",
- dn->name);
+ pr_warn("Error occurred getting drc-index for %pOFn\n",
+ dn);
of_node_put(dn);
return -1;
}
return new_prop;
}
-static u32 find_aa_index(struct device_node *dr_node,
- struct property *ala_prop, const u32 *lmb_assoc)
+static bool find_aa_index(struct device_node *dr_node,
+ struct property *ala_prop,
+ const u32 *lmb_assoc, u32 *aa_index)
{
- u32 *assoc_arrays;
- u32 aa_index;
+ u32 *assoc_arrays, new_prop_size;
+ struct property *new_prop;
int aa_arrays, aa_array_entries, aa_array_sz;
int i, index;
aa_array_entries = be32_to_cpu(assoc_arrays[1]);
aa_array_sz = aa_array_entries * sizeof(u32);
- aa_index = -1;
for (i = 0; i < aa_arrays; i++) {
index = (i * aa_array_entries) + 2;
if (memcmp(&assoc_arrays[index], &lmb_assoc[1], aa_array_sz))
continue;
- aa_index = i;
- break;
+ *aa_index = i;
+ return true;
}
- if (aa_index == -1) {
- struct property *new_prop;
- u32 new_prop_size;
-
- new_prop_size = ala_prop->length + aa_array_sz;
- new_prop = dlpar_clone_property(ala_prop, new_prop_size);
- if (!new_prop)
- return -1;
-
- assoc_arrays = new_prop->value;
+ new_prop_size = ala_prop->length + aa_array_sz;
+ new_prop = dlpar_clone_property(ala_prop, new_prop_size);
+ if (!new_prop)
+ return false;
- /* increment the number of entries in the lookup array */
- assoc_arrays[0] = cpu_to_be32(aa_arrays + 1);
+ assoc_arrays = new_prop->value;
- /* copy the new associativity into the lookup array */
- index = aa_arrays * aa_array_entries + 2;
- memcpy(&assoc_arrays[index], &lmb_assoc[1], aa_array_sz);
+ /* increment the number of entries in the lookup array */
+ assoc_arrays[0] = cpu_to_be32(aa_arrays + 1);
- of_update_property(dr_node, new_prop);
+ /* copy the new associativity into the lookup array */
+ index = aa_arrays * aa_array_entries + 2;
+ memcpy(&assoc_arrays[index], &lmb_assoc[1], aa_array_sz);
- /*
- * The associativity lookup array index for this lmb is
- * number of entries - 1 since we added its associativity
- * to the end of the lookup array.
- */
- aa_index = be32_to_cpu(assoc_arrays[0]) - 1;
- }
+ of_update_property(dr_node, new_prop);
- return aa_index;
+ /*
+ * The associativity lookup array index for this lmb is
+ * number of entries - 1 since we added its associativity
+ * to the end of the lookup array.
+ */
+ *aa_index = be32_to_cpu(assoc_arrays[0]) - 1;
+ return true;
}
-static u32 lookup_lmb_associativity_index(struct drmem_lmb *lmb)
+static int update_lmb_associativity_index(struct drmem_lmb *lmb)
{
struct device_node *parent, *lmb_node, *dr_node;
struct property *ala_prop;
const u32 *lmb_assoc;
u32 aa_index;
+ bool found;
parent = of_find_node_by_path("/");
if (!parent)
return -ENODEV;
}
- aa_index = find_aa_index(dr_node, ala_prop, lmb_assoc);
+ found = find_aa_index(dr_node, ala_prop, lmb_assoc, &aa_index);
dlpar_free_cc_nodes(lmb_node);
- return aa_index;
-}
-static int dlpar_add_device_tree_lmb(struct drmem_lmb *lmb)
-{
- int rc, aa_index;
-
- lmb->flags |= DRCONF_MEM_ASSIGNED;
-
- aa_index = lookup_lmb_associativity_index(lmb);
- if (aa_index < 0) {
- pr_err("Couldn't find associativity index for drc index %x\n",
- lmb->drc_index);
- return aa_index;
+ if (!found) {
+ pr_err("Could not find LMB associativity\n");
+ return -1;
}
lmb->aa_index = aa_index;
-
- rtas_hp_event = true;
- rc = drmem_update_dt();
- rtas_hp_event = false;
-
- return rc;
-}
-
-static int dlpar_remove_device_tree_lmb(struct drmem_lmb *lmb)
-{
- int rc;
-
- lmb->flags &= ~DRCONF_MEM_ASSIGNED;
- lmb->aa_index = 0xffffffff;
-
- rtas_hp_event = true;
- rc = drmem_update_dt();
- rtas_hp_event = false;
-
- return rc;
+ return 0;
}
static struct memory_block *lmb_to_memblock(struct drmem_lmb *lmb)
/* Update memory regions for memory remove */
memblock_remove(lmb->base_addr, block_sz);
- dlpar_remove_device_tree_lmb(lmb);
+ invalidate_lmb_associativity_index(lmb);
+ lmb->flags &= ~DRCONF_MEM_ASSIGNED;
+
return 0;
}
if (lmb->flags & DRCONF_MEM_ASSIGNED)
return -EINVAL;
- rc = dlpar_add_device_tree_lmb(lmb);
+ rc = update_lmb_associativity_index(lmb);
if (rc) {
- pr_err("Couldn't update device tree for drc index %x\n",
- lmb->drc_index);
dlpar_release_drc(lmb->drc_index);
return rc;
}
/* Add the memory */
rc = add_memory(nid, lmb->base_addr, block_sz);
if (rc) {
- dlpar_remove_device_tree_lmb(lmb);
+ invalidate_lmb_associativity_index(lmb);
return rc;
}
rc = dlpar_online_lmb(lmb);
if (rc) {
remove_memory(nid, lmb->base_addr, block_sz);
- dlpar_remove_device_tree_lmb(lmb);
+ invalidate_lmb_associativity_index(lmb);
} else {
lmb->flags |= DRCONF_MEM_ASSIGNED;
}
break;
}
+ if (!rc) {
+ rtas_hp_event = true;
+ rc = drmem_update_dt();
+ rtas_hp_event = false;
+ }
+
unlock_device_hotplug();
return rc;
}
struct platform_device *ofdev;
ofdev = to_platform_device(dev);
- return sprintf(buf, "%s\n", ofdev->dev.of_node->name);
+ return sprintf(buf, "%pOFn\n", ofdev->dev.of_node);
}
static DEVICE_ATTR_RO(name);
#include <asm/kexec.h>
#include <asm/fadump.h>
#include <asm/asm-prototypes.h>
+#include <asm/debugfs.h>
#include "pseries.h"
BUG_ON(lpar_rc != H_SUCCESS);
}
+
+/*
+ * As defined in the PAPR's section 14.5.4.1.8
+ * The control mask doesn't include the returned reference and change bit from
+ * the processed PTE.
+ */
+#define HBLKR_AVPN 0x0100000000000000UL
+#define HBLKR_CTRL_MASK 0xf800000000000000UL
+#define HBLKR_CTRL_SUCCESS 0x8000000000000000UL
+#define HBLKR_CTRL_ERRNOTFOUND 0x8800000000000000UL
+#define HBLKR_CTRL_ERRBUSY 0xa000000000000000UL
+
+/**
+ * H_BLOCK_REMOVE caller.
+ * @idx should point to the latest @param entry set with a PTEX.
+ * If PTE cannot be processed because another CPUs has already locked that
+ * group, those entries are put back in @param starting at index 1.
+ * If entries has to be retried and @retry_busy is set to true, these entries
+ * are retried until success. If @retry_busy is set to false, the returned
+ * is the number of entries yet to process.
+ */
+static unsigned long call_block_remove(unsigned long idx, unsigned long *param,
+ bool retry_busy)
+{
+ unsigned long i, rc, new_idx;
+ unsigned long retbuf[PLPAR_HCALL9_BUFSIZE];
+
+ if (idx < 2) {
+ pr_warn("Unexpected empty call to H_BLOCK_REMOVE");
+ return 0;
+ }
+again:
+ new_idx = 0;
+ if (idx > PLPAR_HCALL9_BUFSIZE) {
+ pr_err("Too many PTEs (%lu) for H_BLOCK_REMOVE", idx);
+ idx = PLPAR_HCALL9_BUFSIZE;
+ } else if (idx < PLPAR_HCALL9_BUFSIZE)
+ param[idx] = HBR_END;
+
+ rc = plpar_hcall9(H_BLOCK_REMOVE, retbuf,
+ param[0], /* AVA */
+ param[1], param[2], param[3], param[4], /* TS0-7 */
+ param[5], param[6], param[7], param[8]);
+ if (rc == H_SUCCESS)
+ return 0;
+
+ BUG_ON(rc != H_PARTIAL);
+
+ /* Check that the unprocessed entries were 'not found' or 'busy' */
+ for (i = 0; i < idx-1; i++) {
+ unsigned long ctrl = retbuf[i] & HBLKR_CTRL_MASK;
+
+ if (ctrl == HBLKR_CTRL_ERRBUSY) {
+ param[++new_idx] = param[i+1];
+ continue;
+ }
+
+ BUG_ON(ctrl != HBLKR_CTRL_SUCCESS
+ && ctrl != HBLKR_CTRL_ERRNOTFOUND);
+ }
+
+ /*
+ * If there were entries found busy, retry these entries if requested,
+ * of if all the entries have to be retried.
+ */
+ if (new_idx && (retry_busy || new_idx == (PLPAR_HCALL9_BUFSIZE-1))) {
+ idx = new_idx + 1;
+ goto again;
+ }
+
+ return new_idx;
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/*
* Limit iterations holding pSeries_lpar_tlbie_lock to 3. We also need
*/
#define PPC64_HUGE_HPTE_BATCH 12
-static void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
- unsigned long *vpn, int count,
- int psize, int ssize)
+static void hugepage_block_invalidate(unsigned long *slot, unsigned long *vpn,
+ int count, int psize, int ssize)
{
unsigned long param[PLPAR_HCALL9_BUFSIZE];
- int i = 0, pix = 0, rc;
- unsigned long flags = 0;
- int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+ unsigned long shift, current_vpgb, vpgb;
+ int i, pix = 0;
- if (lock_tlbie)
- spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
+ shift = mmu_psize_defs[psize].shift;
+
+ for (i = 0; i < count; i++) {
+ /*
+ * Shifting 3 bits more on the right to get a
+ * 8 pages aligned virtual addresse.
+ */
+ vpgb = (vpn[i] >> (shift - VPN_SHIFT + 3));
+ if (!pix || vpgb != current_vpgb) {
+ /*
+ * Need to start a new 8 pages block, flush
+ * the current one if needed.
+ */
+ if (pix)
+ (void)call_block_remove(pix, param, true);
+ current_vpgb = vpgb;
+ param[0] = hpte_encode_avpn(vpn[i], psize, ssize);
+ pix = 1;
+ }
+
+ param[pix++] = HBR_REQUEST | HBLKR_AVPN | slot[i];
+ if (pix == PLPAR_HCALL9_BUFSIZE) {
+ pix = call_block_remove(pix, param, false);
+ /*
+ * pix = 0 means that all the entries were
+ * removed, we can start a new block.
+ * Otherwise, this means that there are entries
+ * to retry, and pix points to latest one, so
+ * we should increment it and try to continue
+ * the same block.
+ */
+ if (pix)
+ pix++;
+ }
+ }
+ if (pix)
+ (void)call_block_remove(pix, param, true);
+}
+
+static void hugepage_bulk_invalidate(unsigned long *slot, unsigned long *vpn,
+ int count, int psize, int ssize)
+{
+ unsigned long param[PLPAR_HCALL9_BUFSIZE];
+ int i = 0, pix = 0, rc;
for (i = 0; i < count; i++) {
param[6], param[7]);
BUG_ON(rc != H_SUCCESS);
}
+}
+
+static inline void __pSeries_lpar_hugepage_invalidate(unsigned long *slot,
+ unsigned long *vpn,
+ int count, int psize,
+ int ssize)
+{
+ unsigned long flags = 0;
+ int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
+
+ if (lock_tlbie)
+ spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
+
+ if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE))
+ hugepage_block_invalidate(slot, vpn, count, psize, ssize);
+ else
+ hugepage_bulk_invalidate(slot, vpn, count, psize, ssize);
if (lock_tlbie)
spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
return 0;
}
+
+static inline unsigned long compute_slot(real_pte_t pte,
+ unsigned long vpn,
+ unsigned long index,
+ unsigned long shift,
+ int ssize)
+{
+ unsigned long slot, hash, hidx;
+
+ hash = hpt_hash(vpn, shift, ssize);
+ hidx = __rpte_to_hidx(pte, index);
+ if (hidx & _PTEIDX_SECONDARY)
+ hash = ~hash;
+ slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+ slot += hidx & _PTEIDX_GROUP_IX;
+ return slot;
+}
+
+/**
+ * The hcall H_BLOCK_REMOVE implies that the virtual pages to processed are
+ * "all within the same naturally aligned 8 page virtual address block".
+ */
+static void do_block_remove(unsigned long number, struct ppc64_tlb_batch *batch,
+ unsigned long *param)
+{
+ unsigned long vpn;
+ unsigned long i, pix = 0;
+ unsigned long index, shift, slot, current_vpgb, vpgb;
+ real_pte_t pte;
+ int psize, ssize;
+
+ psize = batch->psize;
+ ssize = batch->ssize;
+
+ for (i = 0; i < number; i++) {
+ vpn = batch->vpn[i];
+ pte = batch->pte[i];
+ pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
+ /*
+ * Shifting 3 bits more on the right to get a
+ * 8 pages aligned virtual addresse.
+ */
+ vpgb = (vpn >> (shift - VPN_SHIFT + 3));
+ if (!pix || vpgb != current_vpgb) {
+ /*
+ * Need to start a new 8 pages block, flush
+ * the current one if needed.
+ */
+ if (pix)
+ (void)call_block_remove(pix, param,
+ true);
+ current_vpgb = vpgb;
+ param[0] = hpte_encode_avpn(vpn, psize,
+ ssize);
+ pix = 1;
+ }
+
+ slot = compute_slot(pte, vpn, index, shift, ssize);
+ param[pix++] = HBR_REQUEST | HBLKR_AVPN | slot;
+
+ if (pix == PLPAR_HCALL9_BUFSIZE) {
+ pix = call_block_remove(pix, param, false);
+ /*
+ * pix = 0 means that all the entries were
+ * removed, we can start a new block.
+ * Otherwise, this means that there are entries
+ * to retry, and pix points to latest one, so
+ * we should increment it and try to continue
+ * the same block.
+ */
+ if (pix)
+ pix++;
+ }
+ } pte_iterate_hashed_end();
+ }
+
+ if (pix)
+ (void)call_block_remove(pix, param, true);
+}
+
/*
* Take a spinlock around flushes to avoid bouncing the hypervisor tlbie
* lock.
struct ppc64_tlb_batch *batch = this_cpu_ptr(&ppc64_tlb_batch);
int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE);
unsigned long param[PLPAR_HCALL9_BUFSIZE];
- unsigned long hash, index, shift, hidx, slot;
+ unsigned long index, shift, slot;
real_pte_t pte;
int psize, ssize;
if (lock_tlbie)
spin_lock_irqsave(&pSeries_lpar_tlbie_lock, flags);
+ if (firmware_has_feature(FW_FEATURE_BLOCK_REMOVE)) {
+ do_block_remove(number, batch, param);
+ goto out;
+ }
+
psize = batch->psize;
ssize = batch->ssize;
pix = 0;
vpn = batch->vpn[i];
pte = batch->pte[i];
pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) {
- hash = hpt_hash(vpn, shift, ssize);
- hidx = __rpte_to_hidx(pte, index);
- if (hidx & _PTEIDX_SECONDARY)
- hash = ~hash;
- slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
- slot += hidx & _PTEIDX_GROUP_IX;
+ slot = compute_slot(pte, vpn, index, shift, ssize);
if (!firmware_has_feature(FW_FEATURE_BULK_REMOVE)) {
/*
* lpar doesn't use the passed actual page size
BUG_ON(rc != H_SUCCESS);
}
+out:
if (lock_tlbie)
spin_unlock_irqrestore(&pSeries_lpar_tlbie_lock, flags);
}
return 0;
}
machine_device_initcall(pseries, reserve_vrma_context_id);
+
+#ifdef CONFIG_DEBUG_FS
+/* debugfs file interface for vpa data */
+static ssize_t vpa_file_read(struct file *filp, char __user *buf, size_t len,
+ loff_t *pos)
+{
+ int cpu = (long)filp->private_data;
+ struct lppaca *lppaca = &lppaca_of(cpu);
+
+ return simple_read_from_buffer(buf, len, pos, lppaca,
+ sizeof(struct lppaca));
+}
+
+static const struct file_operations vpa_fops = {
+ .open = simple_open,
+ .read = vpa_file_read,
+ .llseek = default_llseek,
+};
+
+static int __init vpa_debugfs_init(void)
+{
+ char name[16];
+ long i;
+ static struct dentry *vpa_dir;
+
+ if (!firmware_has_feature(FW_FEATURE_SPLPAR))
+ return 0;
+
+ vpa_dir = debugfs_create_dir("vpa", powerpc_debugfs_root);
+ if (!vpa_dir) {
+ pr_warn("%s: can't create vpa root dir\n", __func__);
+ return -ENOMEM;
+ }
+
+ /* set up the per-cpu vpa file*/
+ for_each_possible_cpu(i) {
+ struct dentry *d;
+
+ sprintf(name, "cpu-%ld", i);
+
+ d = debugfs_create_file(name, 0400, vpa_dir, (void *)i,
+ &vpa_fops);
+ if (!d) {
+ pr_warn("%s: can't create per-cpu vpa file\n",
+ __func__);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+machine_arch_initcall(pseries, vpa_debugfs_init);
+#endif /* CONFIG_DEBUG_FS */
static ssize_t lparcfg_write(struct file *file, const char __user * buf,
size_t count, loff_t * off)
{
- int kbuf_sz = 64;
- char kbuf[kbuf_sz];
+ char kbuf[64];
char *tmp;
u64 new_entitled, *new_entitled_ptr = &new_entitled;
u8 new_weight, *new_weight_ptr = &new_weight;
if (!firmware_has_feature(FW_FEATURE_SPLPAR))
return -EINVAL;
- if (count > kbuf_sz)
+ if (count > sizeof(kbuf))
return -EINVAL;
if (copy_from_user(kbuf, buf, count))
static void prrn_update_node(__be32 phandle)
{
- struct pseries_hp_errorlog *hp_elog;
+ struct pseries_hp_errorlog hp_elog;
struct device_node *dn;
/*
return;
}
- hp_elog = kzalloc(sizeof(*hp_elog), GFP_KERNEL);
- if(!hp_elog)
- return;
-
- hp_elog->resource = PSERIES_HP_ELOG_RESOURCE_MEM;
- hp_elog->action = PSERIES_HP_ELOG_ACTION_READD;
- hp_elog->id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
- hp_elog->_drc_u.drc_index = phandle;
-
- queue_hotplug_event(hp_elog, NULL, NULL);
+ hp_elog.resource = PSERIES_HP_ELOG_RESOURCE_MEM;
+ hp_elog.action = PSERIES_HP_ELOG_ACTION_READD;
+ hp_elog.id_type = PSERIES_HP_ELOG_ID_DRC_INDEX;
+ hp_elog._drc_u.drc_index = phandle;
- kfree(hp_elog);
+ handle_dlpar_errorlog(&hp_elog);
}
int pseries_devicetree_update(s32 scope)
if (rc)
return rc;
+ stop_topology_update();
+
do {
rc = rtas_ibm_suspend_me(streamid);
if (rc == -EAGAIN)
return rc;
post_mobility_fixup();
+
+ start_topology_update();
+
return count;
}
/* Get the top level device in the PE */
edev = pdn_to_eeh_dev(PCI_DN(dn));
if (edev->pe)
- edev = list_first_entry(&edev->pe->edevs, struct eeh_dev, list);
+ edev = list_first_entry(&edev->pe->edevs, struct eeh_dev,
+ entry);
dn = pci_device_to_OF_node(edev->pdev);
if (!dn)
return NULL;
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+#define pr_fmt(fmt) "papr-scm: " fmt
+
+#include <linux/of.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+#include <linux/ndctl.h>
+#include <linux/sched.h>
+#include <linux/libnvdimm.h>
+#include <linux/platform_device.h>
+
+#include <asm/plpar_wrappers.h>
+
+#define BIND_ANY_ADDR (~0ul)
+
+#define PAPR_SCM_DIMM_CMD_MASK \
+ ((1ul << ND_CMD_GET_CONFIG_SIZE) | \
+ (1ul << ND_CMD_GET_CONFIG_DATA) | \
+ (1ul << ND_CMD_SET_CONFIG_DATA))
+
+struct papr_scm_priv {
+ struct platform_device *pdev;
+ struct device_node *dn;
+ uint32_t drc_index;
+ uint64_t blocks;
+ uint64_t block_size;
+ int metadata_size;
+
+ uint64_t bound_addr;
+
+ struct nvdimm_bus_descriptor bus_desc;
+ struct nvdimm_bus *bus;
+ struct nvdimm *nvdimm;
+ struct resource res;
+ struct nd_region *region;
+ struct nd_interleave_set nd_set;
+};
+
+static int drc_pmem_bind(struct papr_scm_priv *p)
+{
+ unsigned long ret[PLPAR_HCALL_BUFSIZE];
+ uint64_t rc, token;
+
+ /*
+ * When the hypervisor cannot map all the requested memory in a single
+ * hcall it returns H_BUSY and we call again with the token until
+ * we get H_SUCCESS. Aborting the retry loop before getting H_SUCCESS
+ * leave the system in an undefined state, so we wait.
+ */
+ token = 0;
+
+ do {
+ rc = plpar_hcall(H_SCM_BIND_MEM, ret, p->drc_index, 0,
+ p->blocks, BIND_ANY_ADDR, token);
+ token = be64_to_cpu(ret[0]);
+ cond_resched();
+ } while (rc == H_BUSY);
+
+ if (rc) {
+ dev_err(&p->pdev->dev, "bind err: %lld\n", rc);
+ return -ENXIO;
+ }
+
+ p->bound_addr = be64_to_cpu(ret[1]);
+
+ dev_dbg(&p->pdev->dev, "bound drc %x to %pR\n", p->drc_index, &p->res);
+
+ return 0;
+}
+
+static int drc_pmem_unbind(struct papr_scm_priv *p)
+{
+ unsigned long ret[PLPAR_HCALL_BUFSIZE];
+ uint64_t rc, token;
+
+ token = 0;
+
+ /* NB: unbind has the same retry requirements mentioned above */
+ do {
+ rc = plpar_hcall(H_SCM_UNBIND_MEM, ret, p->drc_index,
+ p->bound_addr, p->blocks, token);
+ token = be64_to_cpu(ret);
+ cond_resched();
+ } while (rc == H_BUSY);
+
+ if (rc)
+ dev_err(&p->pdev->dev, "unbind error: %lld\n", rc);
+
+ return !!rc;
+}
+
+static int papr_scm_meta_get(struct papr_scm_priv *p,
+ struct nd_cmd_get_config_data_hdr *hdr)
+{
+ unsigned long data[PLPAR_HCALL_BUFSIZE];
+ int64_t ret;
+
+ if (hdr->in_offset >= p->metadata_size || hdr->in_length != 1)
+ return -EINVAL;
+
+ ret = plpar_hcall(H_SCM_READ_METADATA, data, p->drc_index,
+ hdr->in_offset, 1);
+
+ if (ret == H_PARAMETER) /* bad DRC index */
+ return -ENODEV;
+ if (ret)
+ return -EINVAL; /* other invalid parameter */
+
+ hdr->out_buf[0] = data[0] & 0xff;
+
+ return 0;
+}
+
+static int papr_scm_meta_set(struct papr_scm_priv *p,
+ struct nd_cmd_set_config_hdr *hdr)
+{
+ int64_t ret;
+
+ if (hdr->in_offset >= p->metadata_size || hdr->in_length != 1)
+ return -EINVAL;
+
+ ret = plpar_hcall_norets(H_SCM_WRITE_METADATA,
+ p->drc_index, hdr->in_offset, hdr->in_buf[0], 1);
+
+ if (ret == H_PARAMETER) /* bad DRC index */
+ return -ENODEV;
+ if (ret)
+ return -EINVAL; /* other invalid parameter */
+
+ return 0;
+}
+
+int papr_scm_ndctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
+ unsigned int cmd, void *buf, unsigned int buf_len, int *cmd_rc)
+{
+ struct nd_cmd_get_config_size *get_size_hdr;
+ struct papr_scm_priv *p;
+
+ /* Only dimm-specific calls are supported atm */
+ if (!nvdimm)
+ return -EINVAL;
+
+ p = nvdimm_provider_data(nvdimm);
+
+ switch (cmd) {
+ case ND_CMD_GET_CONFIG_SIZE:
+ get_size_hdr = buf;
+
+ get_size_hdr->status = 0;
+ get_size_hdr->max_xfer = 1;
+ get_size_hdr->config_size = p->metadata_size;
+ *cmd_rc = 0;
+ break;
+
+ case ND_CMD_GET_CONFIG_DATA:
+ *cmd_rc = papr_scm_meta_get(p, buf);
+ break;
+
+ case ND_CMD_SET_CONFIG_DATA:
+ *cmd_rc = papr_scm_meta_set(p, buf);
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ dev_dbg(&p->pdev->dev, "returned with cmd_rc = %d\n", *cmd_rc);
+
+ return 0;
+}
+
+static const struct attribute_group *region_attr_groups[] = {
+ &nd_region_attribute_group,
+ &nd_device_attribute_group,
+ &nd_mapping_attribute_group,
+ &nd_numa_attribute_group,
+ NULL,
+};
+
+static const struct attribute_group *bus_attr_groups[] = {
+ &nvdimm_bus_attribute_group,
+ NULL,
+};
+
+static const struct attribute_group *papr_scm_dimm_groups[] = {
+ &nvdimm_attribute_group,
+ &nd_device_attribute_group,
+ NULL,
+};
+
+static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
+{
+ struct device *dev = &p->pdev->dev;
+ struct nd_mapping_desc mapping;
+ struct nd_region_desc ndr_desc;
+ unsigned long dimm_flags;
+
+ p->bus_desc.ndctl = papr_scm_ndctl;
+ p->bus_desc.module = THIS_MODULE;
+ p->bus_desc.of_node = p->pdev->dev.of_node;
+ p->bus_desc.attr_groups = bus_attr_groups;
+ p->bus_desc.provider_name = kstrdup(p->pdev->name, GFP_KERNEL);
+
+ if (!p->bus_desc.provider_name)
+ return -ENOMEM;
+
+ p->bus = nvdimm_bus_register(NULL, &p->bus_desc);
+ if (!p->bus) {
+ dev_err(dev, "Error creating nvdimm bus %pOF\n", p->dn);
+ return -ENXIO;
+ }
+
+ dimm_flags = 0;
+ set_bit(NDD_ALIASING, &dimm_flags);
+
+ p->nvdimm = nvdimm_create(p->bus, p, papr_scm_dimm_groups,
+ dimm_flags, PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
+ if (!p->nvdimm) {
+ dev_err(dev, "Error creating DIMM object for %pOF\n", p->dn);
+ goto err;
+ }
+
+ /* now add the region */
+
+ memset(&mapping, 0, sizeof(mapping));
+ mapping.nvdimm = p->nvdimm;
+ mapping.start = 0;
+ mapping.size = p->blocks * p->block_size; // XXX: potential overflow?
+
+ memset(&ndr_desc, 0, sizeof(ndr_desc));
+ ndr_desc.attr_groups = region_attr_groups;
+ ndr_desc.numa_node = dev_to_node(&p->pdev->dev);
+ ndr_desc.res = &p->res;
+ ndr_desc.of_node = p->dn;
+ ndr_desc.provider_data = p;
+ ndr_desc.mapping = &mapping;
+ ndr_desc.num_mappings = 1;
+ ndr_desc.nd_set = &p->nd_set;
+ set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
+
+ p->region = nvdimm_pmem_region_create(p->bus, &ndr_desc);
+ if (!p->region) {
+ dev_err(dev, "Error registering region %pR from %pOF\n",
+ ndr_desc.res, p->dn);
+ goto err;
+ }
+
+ return 0;
+
+err: nvdimm_bus_unregister(p->bus);
+ kfree(p->bus_desc.provider_name);
+ return -ENXIO;
+}
+
+static int papr_scm_probe(struct platform_device *pdev)
+{
+ uint32_t drc_index, metadata_size, unit_cap[2];
+ struct device_node *dn = pdev->dev.of_node;
+ struct papr_scm_priv *p;
+ int rc;
+
+ /* check we have all the required DT properties */
+ if (of_property_read_u32(dn, "ibm,my-drc-index", &drc_index)) {
+ dev_err(&pdev->dev, "%pOF: missing drc-index!\n", dn);
+ return -ENODEV;
+ }
+
+ if (of_property_read_u32_array(dn, "ibm,unit-capacity", unit_cap, 2)) {
+ dev_err(&pdev->dev, "%pOF: missing unit-capacity!\n", dn);
+ return -ENODEV;
+ }
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ /* optional DT properties */
+ of_property_read_u32(dn, "ibm,metadata-size", &metadata_size);
+
+ p->dn = dn;
+ p->drc_index = drc_index;
+ p->block_size = unit_cap[0];
+ p->blocks = unit_cap[1];
+
+ /* might be zero */
+ p->metadata_size = metadata_size;
+ p->pdev = pdev;
+
+ /* request the hypervisor to bind this region to somewhere in memory */
+ rc = drc_pmem_bind(p);
+ if (rc)
+ goto err;
+
+ /* setup the resource for the newly bound range */
+ p->res.start = p->bound_addr;
+ p->res.end = p->bound_addr + p->blocks * p->block_size;
+ p->res.name = pdev->name;
+ p->res.flags = IORESOURCE_MEM;
+
+ rc = papr_scm_nvdimm_init(p);
+ if (rc)
+ goto err2;
+
+ platform_set_drvdata(pdev, p);
+
+ return 0;
+
+err2: drc_pmem_unbind(p);
+err: kfree(p);
+ return rc;
+}
+
+static int papr_scm_remove(struct platform_device *pdev)
+{
+ struct papr_scm_priv *p = platform_get_drvdata(pdev);
+
+ nvdimm_bus_unregister(p->bus);
+ drc_pmem_unbind(p);
+ kfree(p);
+
+ return 0;
+}
+
+static const struct of_device_id papr_scm_match[] = {
+ { .compatible = "ibm,pmemory" },
+ { },
+};
+
+static struct platform_driver papr_scm_driver = {
+ .probe = papr_scm_probe,
+ .remove = papr_scm_remove,
+ .driver = {
+ .name = "papr_scm",
+ .owner = THIS_MODULE,
+ .of_match_table = papr_scm_match,
+ },
+};
+
+module_platform_driver(papr_scm_driver);
+MODULE_DEVICE_TABLE(of, papr_scm_match);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("IBM Corporation");
{
pSeries_request_regions();
+ eeh_probe_devices();
eeh_addr_cache_build();
#ifdef CONFIG_PCI_IOV
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Handles hot and cold plug of persistent memory regions on pseries.
+ */
+
+#define pr_fmt(fmt) "pseries-pmem: " fmt
+
+#include <linux/kernel.h>
+#include <linux/interrupt.h>
+#include <linux/delay.h>
+#include <linux/sched.h> /* for idle_task_exit */
+#include <linux/sched/hotplug.h>
+#include <linux/cpu.h>
+#include <linux/of.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <asm/prom.h>
+#include <asm/rtas.h>
+#include <asm/firmware.h>
+#include <asm/machdep.h>
+#include <asm/vdso_datapage.h>
+#include <asm/plpar_wrappers.h>
+#include <asm/topology.h>
+
+#include "pseries.h"
+#include "offline_states.h"
+
+static struct device_node *pmem_node;
+
+static ssize_t pmem_drc_add_node(u32 drc_index)
+{
+ struct device_node *dn;
+ int rc;
+
+ pr_debug("Attempting to add pmem node, drc index: %x\n", drc_index);
+
+ rc = dlpar_acquire_drc(drc_index);
+ if (rc) {
+ pr_err("Failed to acquire DRC, rc: %d, drc index: %x\n",
+ rc, drc_index);
+ return -EINVAL;
+ }
+
+ dn = dlpar_configure_connector(cpu_to_be32(drc_index), pmem_node);
+ if (!dn) {
+ pr_err("configure-connector failed for drc %x\n", drc_index);
+ dlpar_release_drc(drc_index);
+ return -EINVAL;
+ }
+
+ /* NB: The of reconfig notifier creates platform device from the node */
+ rc = dlpar_attach_node(dn, pmem_node);
+ if (rc) {
+ pr_err("Failed to attach node %s, rc: %d, drc index: %x\n",
+ dn->name, rc, drc_index);
+
+ if (dlpar_release_drc(drc_index))
+ dlpar_free_cc_nodes(dn);
+
+ return rc;
+ }
+
+ pr_info("Successfully added %pOF, drc index: %x\n", dn, drc_index);
+
+ return 0;
+}
+
+static ssize_t pmem_drc_remove_node(u32 drc_index)
+{
+ struct device_node *dn;
+ uint32_t index;
+ int rc;
+
+ for_each_child_of_node(pmem_node, dn) {
+ if (of_property_read_u32(dn, "ibm,my-drc-index", &index))
+ continue;
+ if (index == drc_index)
+ break;
+ }
+
+ if (!dn) {
+ pr_err("Attempting to remove unused DRC index %x\n", drc_index);
+ return -ENODEV;
+ }
+
+ pr_debug("Attempting to remove %pOF, drc index: %x\n", dn, drc_index);
+
+ /* * NB: tears down the ibm,pmemory device as a side-effect */
+ rc = dlpar_detach_node(dn);
+ if (rc)
+ return rc;
+
+ rc = dlpar_release_drc(drc_index);
+ if (rc) {
+ pr_err("Failed to release drc (%x) for CPU %s, rc: %d\n",
+ drc_index, dn->name, rc);
+ dlpar_attach_node(dn, pmem_node);
+ return rc;
+ }
+
+ pr_info("Successfully removed PMEM with drc index: %x\n", drc_index);
+
+ return 0;
+}
+
+int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
+{
+ u32 count, drc_index;
+ int rc;
+
+ /* slim chance, but we might get a hotplug event while booting */
+ if (!pmem_node)
+ pmem_node = of_find_node_by_type(NULL, "ibm,persistent-memory");
+ if (!pmem_node) {
+ pr_err("Hotplug event for a pmem device, but none exists\n");
+ return -ENODEV;
+ }
+
+ if (hp_elog->id_type != PSERIES_HP_ELOG_ID_DRC_INDEX) {
+ pr_err("Unsupported hotplug event type %d\n",
+ hp_elog->id_type);
+ return -EINVAL;
+ }
+
+ count = hp_elog->_drc_u.drc_count;
+ drc_index = hp_elog->_drc_u.drc_index;
+
+ lock_device_hotplug();
+
+ if (hp_elog->action == PSERIES_HP_ELOG_ACTION_ADD) {
+ rc = pmem_drc_add_node(drc_index);
+ } else if (hp_elog->action == PSERIES_HP_ELOG_ACTION_REMOVE) {
+ rc = pmem_drc_remove_node(drc_index);
+ } else {
+ pr_err("Unsupported hotplug action (%d)\n", hp_elog->action);
+ rc = -EINVAL;
+ }
+
+ unlock_device_hotplug();
+ return rc;
+}
+
+const struct of_device_id drc_pmem_match[] = {
+ { .type = "ibm,persistent-memory", },
+ {}
+};
+
+static int pseries_pmem_init(void)
+{
+ pmem_node = of_find_node_by_type(NULL, "ibm,persistent-memory");
+ if (!pmem_node)
+ return 0;
+
+ /*
+ * The generic OF bus probe/populate handles creating platform devices
+ * from the child (ibm,pmemory) nodes. The generic code registers an of
+ * reconfig notifier to handle the hot-add/remove cases too.
+ */
+ of_platform_bus_probe(pmem_node, drc_pmem_match, NULL);
+
+ return 0;
+}
+machine_arch_initcall(pseries, pseries_pmem_init);
extern int pSeries_system_reset_exception(struct pt_regs *regs);
extern int pSeries_machine_check_exception(struct pt_regs *regs);
+extern long pseries_machine_check_realmode(struct pt_regs *regs);
#ifdef CONFIG_SMP
extern void smp_init_pseries(void);
extern int dlpar_acquire_drc(u32 drc_index);
extern int dlpar_release_drc(u32 drc_index);
-void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog,
- struct completion *hotplug_done, int *rc);
+void queue_hotplug_event(struct pseries_hp_errorlog *hp_errlog);
+int handle_dlpar_errorlog(struct pseries_hp_errorlog *hp_errlog);
+
#ifdef CONFIG_MEMORY_HOTPLUG
int dlpar_memory(struct pseries_hp_errorlog *hp_elog);
+int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog);
#else
static inline int dlpar_memory(struct pseries_hp_errorlog *hp_elog)
{
return -EOPNOTSUPP;
}
+static inline int dlpar_hp_pmem(struct pseries_hp_errorlog *hp_elog)
+{
+ return -EOPNOTSUPP;
+}
#endif
#ifdef CONFIG_HOTPLUG_CPU
#include <asm/machdep.h>
#include <asm/rtas.h>
#include <asm/firmware.h>
+#include <asm/mce.h>
#include "pseries.h"
static irqreturn_t ras_epow_interrupt(int irq, void *dev_id);
static irqreturn_t ras_error_interrupt(int irq, void *dev_id);
+/* RTAS pseries MCE errorlog section. */
+struct pseries_mc_errorlog {
+ __be32 fru_id;
+ __be32 proc_id;
+ u8 error_type;
+ /*
+ * sub_err_type (1 byte). Bit fields depends on error_type
+ *
+ * MSB0
+ * |
+ * V
+ * 01234567
+ * XXXXXXXX
+ *
+ * For error_type == MC_ERROR_TYPE_UE
+ * XXXXXXXX
+ * X 1: Permanent or Transient UE.
+ * X 1: Effective address provided.
+ * X 1: Logical address provided.
+ * XX 2: Reserved.
+ * XXX 3: Type of UE error.
+ *
+ * For error_type != MC_ERROR_TYPE_UE
+ * XXXXXXXX
+ * X 1: Effective address provided.
+ * XXXXX 5: Reserved.
+ * XX 2: Type of SLB/ERAT/TLB error.
+ */
+ u8 sub_err_type;
+ u8 reserved_1[6];
+ __be64 effective_address;
+ __be64 logical_address;
+} __packed;
+
+/* RTAS pseries MCE error types */
+#define MC_ERROR_TYPE_UE 0x00
+#define MC_ERROR_TYPE_SLB 0x01
+#define MC_ERROR_TYPE_ERAT 0x02
+#define MC_ERROR_TYPE_TLB 0x04
+#define MC_ERROR_TYPE_D_CACHE 0x05
+#define MC_ERROR_TYPE_I_CACHE 0x07
+
+/* RTAS pseries MCE error sub types */
+#define MC_ERROR_UE_INDETERMINATE 0
+#define MC_ERROR_UE_IFETCH 1
+#define MC_ERROR_UE_PAGE_TABLE_WALK_IFETCH 2
+#define MC_ERROR_UE_LOAD_STORE 3
+#define MC_ERROR_UE_PAGE_TABLE_WALK_LOAD_STORE 4
+
+#define MC_ERROR_SLB_PARITY 0
+#define MC_ERROR_SLB_MULTIHIT 1
+#define MC_ERROR_SLB_INDETERMINATE 2
+
+#define MC_ERROR_ERAT_PARITY 1
+#define MC_ERROR_ERAT_MULTIHIT 2
+#define MC_ERROR_ERAT_INDETERMINATE 3
+
+#define MC_ERROR_TLB_PARITY 1
+#define MC_ERROR_TLB_MULTIHIT 2
+#define MC_ERROR_TLB_INDETERMINATE 3
+
+static inline u8 rtas_mc_error_sub_type(const struct pseries_mc_errorlog *mlog)
+{
+ switch (mlog->error_type) {
+ case MC_ERROR_TYPE_UE:
+ return (mlog->sub_err_type & 0x07);
+ case MC_ERROR_TYPE_SLB:
+ case MC_ERROR_TYPE_ERAT:
+ case MC_ERROR_TYPE_TLB:
+ return (mlog->sub_err_type & 0x03);
+ default:
+ return 0;
+ }
+}
+
+static
+inline u64 rtas_mc_get_effective_addr(const struct pseries_mc_errorlog *mlog)
+{
+ __be64 addr = 0;
+
+ switch (mlog->error_type) {
+ case MC_ERROR_TYPE_UE:
+ if (mlog->sub_err_type & 0x40)
+ addr = mlog->effective_address;
+ break;
+ case MC_ERROR_TYPE_SLB:
+ case MC_ERROR_TYPE_ERAT:
+ case MC_ERROR_TYPE_TLB:
+ if (mlog->sub_err_type & 0x80)
+ addr = mlog->effective_address;
+ default:
+ break;
+ }
+ return be64_to_cpu(addr);
+}
/*
* Enable the hotplug interrupt late because processing them may touch other
* hotplug events on the ras_log_buf to be handled by rtas_errd.
*/
if (hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_MEM ||
- hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU)
- queue_hotplug_event(hp_elog, NULL, NULL);
+ hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_CPU ||
+ hp_elog->resource == PSERIES_HP_ELOG_RESOURCE_PMEM)
+ queue_hotplug_event(hp_elog);
else
log_error(ras_log_buf, ERR_TYPE_RTAS_LOG, 0);
return 0; /* need to perform reset */
}
+#define VAL_TO_STRING(ar, val) \
+ (((val) < ARRAY_SIZE(ar)) ? ar[(val)] : "Unknown")
+
+static void pseries_print_mce_info(struct pt_regs *regs,
+ struct rtas_error_log *errp)
+{
+ const char *level, *sevstr;
+ struct pseries_errorlog *pseries_log;
+ struct pseries_mc_errorlog *mce_log;
+ u8 error_type, err_sub_type;
+ u64 addr;
+ u8 initiator = rtas_error_initiator(errp);
+ int disposition = rtas_error_disposition(errp);
+
+ static const char * const initiators[] = {
+ "Unknown",
+ "CPU",
+ "PCI",
+ "ISA",
+ "Memory",
+ "Power Mgmt",
+ };
+ static const char * const mc_err_types[] = {
+ "UE",
+ "SLB",
+ "ERAT",
+ "TLB",
+ "D-Cache",
+ "Unknown",
+ "I-Cache",
+ };
+ static const char * const mc_ue_types[] = {
+ "Indeterminate",
+ "Instruction fetch",
+ "Page table walk ifetch",
+ "Load/Store",
+ "Page table walk Load/Store",
+ };
+
+ /* SLB sub errors valid values are 0x0, 0x1, 0x2 */
+ static const char * const mc_slb_types[] = {
+ "Parity",
+ "Multihit",
+ "Indeterminate",
+ };
+
+ /* TLB and ERAT sub errors valid values are 0x1, 0x2, 0x3 */
+ static const char * const mc_soft_types[] = {
+ "Unknown",
+ "Parity",
+ "Multihit",
+ "Indeterminate",
+ };
+
+ if (!rtas_error_extended(errp)) {
+ pr_err("Machine check interrupt: Missing extended error log\n");
+ return;
+ }
+
+ pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
+ if (pseries_log == NULL)
+ return;
+
+ mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
+
+ error_type = mce_log->error_type;
+ err_sub_type = rtas_mc_error_sub_type(mce_log);
+
+ switch (rtas_error_severity(errp)) {
+ case RTAS_SEVERITY_NO_ERROR:
+ level = KERN_INFO;
+ sevstr = "Harmless";
+ break;
+ case RTAS_SEVERITY_WARNING:
+ level = KERN_WARNING;
+ sevstr = "";
+ break;
+ case RTAS_SEVERITY_ERROR:
+ case RTAS_SEVERITY_ERROR_SYNC:
+ level = KERN_ERR;
+ sevstr = "Severe";
+ break;
+ case RTAS_SEVERITY_FATAL:
+ default:
+ level = KERN_ERR;
+ sevstr = "Fatal";
+ break;
+ }
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* Display faulty slb contents for SLB errors. */
+ if (error_type == MC_ERROR_TYPE_SLB)
+ slb_dump_contents(local_paca->mce_faulty_slbs);
+#endif
+
+ printk("%s%s Machine check interrupt [%s]\n", level, sevstr,
+ disposition == RTAS_DISP_FULLY_RECOVERED ?
+ "Recovered" : "Not recovered");
+ if (user_mode(regs)) {
+ printk("%s NIP: [%016lx] PID: %d Comm: %s\n", level,
+ regs->nip, current->pid, current->comm);
+ } else {
+ printk("%s NIP [%016lx]: %pS\n", level, regs->nip,
+ (void *)regs->nip);
+ }
+ printk("%s Initiator: %s\n", level,
+ VAL_TO_STRING(initiators, initiator));
+
+ switch (error_type) {
+ case MC_ERROR_TYPE_UE:
+ printk("%s Error type: %s [%s]\n", level,
+ VAL_TO_STRING(mc_err_types, error_type),
+ VAL_TO_STRING(mc_ue_types, err_sub_type));
+ break;
+ case MC_ERROR_TYPE_SLB:
+ printk("%s Error type: %s [%s]\n", level,
+ VAL_TO_STRING(mc_err_types, error_type),
+ VAL_TO_STRING(mc_slb_types, err_sub_type));
+ break;
+ case MC_ERROR_TYPE_ERAT:
+ case MC_ERROR_TYPE_TLB:
+ printk("%s Error type: %s [%s]\n", level,
+ VAL_TO_STRING(mc_err_types, error_type),
+ VAL_TO_STRING(mc_soft_types, err_sub_type));
+ break;
+ default:
+ printk("%s Error type: %s\n", level,
+ VAL_TO_STRING(mc_err_types, error_type));
+ break;
+ }
+
+ addr = rtas_mc_get_effective_addr(mce_log);
+ if (addr)
+ printk("%s Effective address: %016llx\n", level, addr);
+}
+
+static int mce_handle_error(struct rtas_error_log *errp)
+{
+ struct pseries_errorlog *pseries_log;
+ struct pseries_mc_errorlog *mce_log;
+ int disposition = rtas_error_disposition(errp);
+ u8 error_type;
+
+ if (!rtas_error_extended(errp))
+ goto out;
+
+ pseries_log = get_pseries_errorlog(errp, PSERIES_ELOG_SECT_ID_MCE);
+ if (pseries_log == NULL)
+ goto out;
+
+ mce_log = (struct pseries_mc_errorlog *)pseries_log->data;
+ error_type = mce_log->error_type;
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ if (disposition == RTAS_DISP_NOT_RECOVERED) {
+ switch (error_type) {
+ case MC_ERROR_TYPE_SLB:
+ case MC_ERROR_TYPE_ERAT:
+ /*
+ * Store the old slb content in paca before flushing.
+ * Print this when we go to virtual mode.
+ * There are chances that we may hit MCE again if there
+ * is a parity error on the SLB entry we trying to read
+ * for saving. Hence limit the slb saving to single
+ * level of recursion.
+ */
+ if (local_paca->in_mce == 1)
+ slb_save_contents(local_paca->mce_faulty_slbs);
+ flush_and_reload_slb();
+ disposition = RTAS_DISP_FULLY_RECOVERED;
+ rtas_set_disposition_recovered(errp);
+ break;
+ default:
+ break;
+ }
+ }
+#endif
+
+out:
+ return disposition;
+}
+
/*
* Process MCE rtas errlog event.
*/
int recovered = 0;
int disposition = rtas_error_disposition(err);
+ pseries_print_mce_info(regs, err);
+
if (!(regs->msr & MSR_RI)) {
/* If MSR_RI isn't set, we cannot recover */
+ pr_err("Machine check interrupt unrecoverable: MSR(RI=0)\n");
recovered = 0;
} else if (disposition == RTAS_DISP_FULLY_RECOVERED) {
struct rtas_error_log *errp;
if (fwnmi_active) {
- errp = fwnmi_get_errinfo(regs);
fwnmi_release_errinfo();
+ errp = fwnmi_get_errlog();
if (errp && recover_mce(regs, errp))
return 1;
}
return 0;
}
+
+long pseries_machine_check_realmode(struct pt_regs *regs)
+{
+ struct rtas_error_log *errp;
+ int disposition;
+
+ if (fwnmi_active) {
+ errp = fwnmi_get_errinfo(regs);
+ /*
+ * Call to fwnmi_release_errinfo() in real mode causes kernel
+ * to panic. Hence we will call it as soon as we go into
+ * virtual mode.
+ */
+ disposition = mce_handle_error(errp);
+ if (disposition == RTAS_DISP_FULLY_RECOVERED)
+ return 1;
+ }
+
+ return 0;
+}
u8 *mce_data_buf;
unsigned int i;
int nr_cpus = num_possible_cpus();
+#ifdef CONFIG_PPC_BOOK3S_64
+ struct slb_entry *slb_ptr;
+ size_t size;
+#endif
int ibm_nmi_register = rtas_token("ibm,nmi-register");
if (ibm_nmi_register == RTAS_UNKNOWN_SERVICE)
paca_ptrs[i]->mce_data_buf = mce_data_buf +
(RTAS_ERROR_LOG_MAX * i);
}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+ /* Allocate per cpu slb area to save old slb contents during MCE */
+ size = sizeof(struct slb_entry) * mmu_slb_size * nr_cpus;
+ slb_ptr = __va(memblock_alloc_base(size, sizeof(struct slb_entry),
+ ppc64_rma_size));
+ for_each_possible_cpu(i)
+ paca_ptrs[i]->mce_faulty_slbs = slb_ptr + (mmu_slb_size * i);
+#endif
}
static void pseries_8259_cascade(struct irq_desc *desc)
.calibrate_decr = generic_calibrate_decr,
.progress = rtas_progress,
.system_reset_exception = pSeries_system_reset_exception,
+ .machine_check_early = pseries_machine_check_realmode,
.machine_check_exception = pSeries_machine_check_exception,
#ifdef CONFIG_KEXEC_CORE
.machine_kexec = pSeries_machine_kexec,
struct device_node *parent_node;
const __be32 *prop;
enum vio_dev_family family;
- const char *of_node_name = of_node->name ? of_node->name : "<unknown>";
/*
* Determine if this node is a under the /vdevice node or under the
else if (!strcmp(parent_node->type, "vdevice"))
family = VDEVICE;
else {
- pr_warn("%s: parent(%pOF) of %s not recognized.\n",
+ pr_warn("%s: parent(%pOF) of %pOFn not recognized.\n",
__func__,
parent_node,
- of_node_name);
+ of_node);
of_node_put(parent_node);
return NULL;
}
of_node_put(parent_node);
} else {
- pr_warn("%s: could not determine the parent of node %s.\n",
- __func__, of_node_name);
+ pr_warn("%s: could not determine the parent of node %pOFn.\n",
+ __func__, of_node);
return NULL;
}
if (family == PFO) {
if (of_get_property(of_node, "interrupt-controller", NULL)) {
- pr_debug("%s: Skipping the interrupt controller %s.\n",
- __func__, of_node_name);
+ pr_debug("%s: Skipping the interrupt controller %pOFn.\n",
+ __func__, of_node);
return NULL;
}
}
if (of_node->type != NULL)
viodev->type = of_node->type;
else {
- pr_warn("%s: node %s is missing the 'device_type' "
- "property.\n", __func__, of_node_name);
+ pr_warn("%s: node %pOFn is missing the 'device_type' "
+ "property.\n", __func__, of_node);
goto out;
}
prop = of_get_property(of_node, "reg", NULL);
if (prop == NULL) {
- pr_warn("%s: node %s missing 'reg'\n",
- __func__, of_node_name);
+ pr_warn("%s: node %pOFn missing 'reg'\n",
+ __func__, of_node);
goto out;
}
unit_address = of_read_number(prop, 1);
if (prop != NULL)
viodev->resource_id = of_read_number(prop, 1);
- dev_set_name(&viodev->dev, "%s", of_node_name);
- viodev->type = of_node_name;
+ dev_set_name(&viodev->dev, "%pOFn", of_node);
+ viodev->type = dev_name(&viodev->dev);
viodev->irq = 0;
}
snprintf(kobj_name, sizeof(kobj_name), "%x",
(uint32_t)of_read_number(prop, 1));
} else if (!strcmp(dev_type, "ibm,platform-facilities"))
- snprintf(kobj_name, sizeof(kobj_name), "%s", vnode->name);
+ snprintf(kobj_name, sizeof(kobj_name), "%pOFn", vnode);
else
return NULL;
config PPC4xx_PCI_EXPRESS
bool
depends on PCI && 4xx
- default n
config PPC4xx_HSTA_MSI
bool
depends on PCI_MSI
depends on PCI && 4xx
- default n
config PPC4xx_MSI
bool
depends on PCI_MSI
depends on PCI && 4xx
- default n
config PPC_MSI_BITMAP
bool
config SCOM_DEBUGFS
bool "Expose SCOM controllers via debugfs"
depends on PPC_SCOM && DEBUG_FS
- default n
config GE_FPGA
bool
- default n
config FSL_CORENET_RCPM
bool
# SPDX-License-Identifier: GPL-2.0
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
obj-$(CONFIG_PPC_EARLY_DEBUG_MEMCONS) += udbg_memcons.o
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
-
obj-$(CONFIG_PPC_XICS) += xics/
obj-$(CONFIG_PPC_XIVE) += xive/
goto out_free;
}
- cache_sram->base_virt = ioremap_prot(cache_sram->base_phys,
- cache_sram->size, _PAGE_COHERENT | PAGE_KERNEL);
+ cache_sram->base_virt = ioremap_coherent(cache_sram->base_phys,
+ cache_sram->size);
if (!cache_sram->base_virt) {
- dev_err(&dev->dev, "%pOF: ioremap_prot failed\n",
- dev->dev.of_node);
+ dev_err(&dev->dev, "%pOF: ioremap_coherent failed\n",
+ dev->dev.of_node);
ret = -ENOMEM;
goto out_release;
}
u32 ipic_get_mcp_status(void)
{
- return ipic_read(primary_ipic->regs, IPIC_SERSR);
+ return primary_ipic ? ipic_read(primary_ipic->regs, IPIC_SERSR) : 0;
}
void ipic_clear_mcp_status(u32 mask)
# SPDX-License-Identifier: GPL-2.0
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
obj-y += xics-common.o
obj-$(CONFIG_PPC_ICP_NATIVE) += icp-native.o
# SPDX-License-Identifier: GPL-2.0
config PPC_XIVE
bool
- default n
select PPC_SMP_MUXED_IPI
select HARDIRQS_SW_RESEND
config PPC_XIVE_NATIVE
bool
- default n
select PPC_XIVE
depends on PPC_POWERNV
config PPC_XIVE_SPAPR
bool
- default n
select PPC_XIVE
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
obj-y += common.o
obj-$(CONFIG_PPC_XIVE_NATIVE) += native.o
{
struct xive_cpu *xc = __this_cpu_read(xive_cpu);
- DBG_VERBOSE("IPI eoi: irq=%d [0x%lx] (HW IRQ 0x%x) pending=%02x\n",
- d->irq, irqd_to_hwirq(d), xc->hw_ipi, xc->pending_prio);
-
/* Handle possible race with unplug and drop stale IPIs */
if (!xc)
return;
+
+ DBG_VERBOSE("IPI eoi: irq=%d [0x%lx] (HW IRQ 0x%x) pending=%02x\n",
+ d->irq, irqd_to_hwirq(d), xc->hw_ipi, xc->pending_prio);
+
xive_do_source_eoi(xc->hw_ipi, &xc->ipi_data);
xive_do_queue_eoi(xc);
}
#ifdef CONFIG_SMP
static int xive_native_get_ipi(unsigned int cpu, struct xive_cpu *xc)
{
- struct device_node *np;
- unsigned int chip_id;
s64 irq;
- /* Find the chip ID */
- np = of_get_cpu_node(cpu, NULL);
- if (np) {
- if (of_property_read_u32(np, "ibm,chip-id", &chip_id) < 0)
- chip_id = 0;
- }
-
/* Allocate an IPI and populate info about it */
for (;;) {
- irq = opal_xive_allocate_irq(chip_id);
+ irq = opal_xive_allocate_irq(xc->chip_id);
if (irq == OPAL_BUSY) {
msleep(OPAL_BUSY_DELAY_MS);
continue;
# SPDX-License-Identifier: GPL-2.0
# Makefile for xmon
-subdir-ccflags-$(CONFIG_PPC_WERROR) := -Werror
+# Disable clang warning for using setjmp without setjmp.h header
+subdir-ccflags-y := $(call cc-disable-warning, builtin-requires-header)
GCOV_PROFILE := n
UBSAN_SANITIZE := n
# Disable ftrace for the entire directory
ORIG_CFLAGS := $(KBUILD_CFLAGS)
-KBUILD_CFLAGS = $(subst -mno-sched-epilog,,$(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS)))
+KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS))
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
DUMP(p, cpu_start, "%#-*x");
DUMP(p, kexec_state, "%#-*x");
#ifdef CONFIG_PPC_BOOK3S_64
- for (i = 0; i < SLB_NUM_BOLTED; i++) {
- u64 esid, vsid;
+ if (!early_radix_enabled()) {
+ for (i = 0; i < SLB_NUM_BOLTED; i++) {
+ u64 esid, vsid;
- if (!p->slb_shadow_ptr)
- continue;
+ if (!p->slb_shadow_ptr)
+ continue;
+
+ esid = be64_to_cpu(p->slb_shadow_ptr->save_area[i].esid);
+ vsid = be64_to_cpu(p->slb_shadow_ptr->save_area[i].vsid);
- esid = be64_to_cpu(p->slb_shadow_ptr->save_area[i].esid);
- vsid = be64_to_cpu(p->slb_shadow_ptr->save_area[i].vsid);
+ if (esid || vsid) {
+ printf(" %-*s[%d] = 0x%016llx 0x%016llx\n",
+ 22, "slb_shadow", i, esid, vsid);
+ }
+ }
+ DUMP(p, vmalloc_sllp, "%#-*x");
+ DUMP(p, stab_rr, "%#-*x");
+ DUMP(p, slb_used_bitmap, "%#-*x");
+ DUMP(p, slb_kern_bitmap, "%#-*x");
- if (esid || vsid) {
- printf(" %-*s[%d] = 0x%016llx 0x%016llx\n",
- 22, "slb_shadow", i, esid, vsid);
+ if (!early_cpu_has_feature(CPU_FTR_ARCH_300)) {
+ DUMP(p, slb_cache_ptr, "%#-*x");
+ for (i = 0; i < SLB_CACHE_ENTRIES; i++)
+ printf(" %-*s[%d] = 0x%016x\n",
+ 22, "slb_cache", i, p->slb_cache[i]);
}
}
- DUMP(p, vmalloc_sllp, "%#-*x");
- DUMP(p, slb_cache_ptr, "%#-*x");
- for (i = 0; i < SLB_CACHE_ENTRIES; i++)
- printf(" %-*s[%d] = 0x%016x\n",
- 22, "slb_cache", i, p->slb_cache[i]);
DUMP(p, rfi_flush_fallback_area, "%-*px");
#endif
DUMP(p, __current, "%-*px");
DUMP(p, kstack, "%#-*llx");
printf(" %-*s = 0x%016llx\n", 25, "kstack_base", p->kstack & ~(THREAD_SIZE - 1));
- DUMP(p, stab_rr, "%#-*llx");
+#ifdef CONFIG_STACKPROTECTOR
+ DUMP(p, canary, "%#-*lx");
+#endif
DUMP(p, saved_r1, "%#-*llx");
DUMP(p, trap_save, "%#-*x");
DUMP(p, irq_soft_mask, "%#-*x");
DUMP(p, accounting.utime, "%#-*lx");
DUMP(p, accounting.stime, "%#-*lx");
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
DUMP(p, accounting.utime_scaled, "%#-*lx");
+#endif
DUMP(p, accounting.starttime, "%#-*lx");
DUMP(p, accounting.starttime_user, "%#-*lx");
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
DUMP(p, accounting.startspurr, "%#-*lx");
DUMP(p, accounting.utime_sspurr, "%#-*lx");
+#endif
DUMP(p, accounting.steal_time, "%#-*lx");
#undef DUMP
#ifdef CONFIG_PPC_BOOK3S_64
void format_pte(void *ptep, unsigned long pte)
{
+ pte_t entry = __pte(pte);
+
printf("ptep @ 0x%016lx = 0x%016lx\n", (unsigned long)ptep, pte);
printf("Maps physical address = 0x%016lx\n", pte & PTE_RPN_MASK);
printf("Flags = %s%s%s%s%s\n",
- (pte & _PAGE_ACCESSED) ? "Accessed " : "",
- (pte & _PAGE_DIRTY) ? "Dirty " : "",
- (pte & _PAGE_READ) ? "Read " : "",
- (pte & _PAGE_WRITE) ? "Write " : "",
- (pte & _PAGE_EXEC) ? "Exec " : "");
+ pte_young(entry) ? "Accessed " : "",
+ pte_dirty(entry) ? "Dirty " : "",
+ pte_read(entry) ? "Read " : "",
+ pte_write(entry) ? "Write " : "",
+ pte_exec(entry) ? "Exec " : "");
}
static void show_pte(unsigned long addr)
init_cpu_possible(cpumask_of(0));
- for_each_node_by_type(np, "cpu") {
+ for_each_of_cpu_node(np) {
const __be32 *cell = of_get_property(np, "reg", NULL);
u64 id = -1;
if (cell) id = of_read_number(cell, of_n_addr_cells(np));
return val;
}
-#define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
+#define xchg(ptr,x) \
+({ __typeof__(*(ptr)) __ret; \
+ __ret = (__typeof__(*(ptr))) \
+ __xchg((unsigned long)(x), (ptr), sizeof(*(ptr))); \
+ __ret; \
+})
void __xchg_called_with_bad_pointer(void);
#include <linux/atomic.h>
#include <linux/irqdomain.h>
-#define OF_ROOT_NODE_ADDR_CELLS_DEFAULT 2
-#define OF_ROOT_NODE_SIZE_CELLS_DEFAULT 1
-
#define of_compat_cmp(s1, s2, l) strncmp((s1), (s2), (l))
#define of_prop_cmp(s1, s2) strcasecmp((s1), (s2))
#define of_node_cmp(s1, s2) strcmp((s1), (s2))
} while(0)
void synchronize_user_stack(void);
-void fault_in_user_windows(void);
+struct pt_regs;
+void fault_in_user_windows(struct pt_regs *);
#endif /* __SPARC64_SWITCH_TO_64_H */
#include <linux/sysrq.h>
#include <linux/nmi.h>
#include <linux/context_tracking.h>
+#include <linux/signal.h>
#include <linux/uaccess.h>
#include <asm/page.h>
force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) sp, 0, current);
}
-void fault_in_user_windows(void)
+static const char uwfault32[] = KERN_INFO \
+ "%s[%d]: bad register window fault: SP %08lx (orig_sp %08lx) TPC %08lx O7 %08lx\n";
+static const char uwfault64[] = KERN_INFO \
+ "%s[%d]: bad register window fault: SP %016lx (orig_sp %016lx) TPC %08lx O7 %016lx\n";
+
+void fault_in_user_windows(struct pt_regs *regs)
{
struct thread_info *t = current_thread_info();
unsigned long window;
do {
struct reg_window *rwin = &t->reg_window[window];
int winsize = sizeof(struct reg_window);
- unsigned long sp;
+ unsigned long sp, orig_sp;
- sp = t->rwbuf_stkptrs[window];
+ orig_sp = sp = t->rwbuf_stkptrs[window];
if (test_thread_64bit_stack(sp))
sp += STACK_BIAS;
stack_unaligned(sp);
if (unlikely(copy_to_user((char __user *)sp,
- rwin, winsize)))
+ rwin, winsize))) {
+ if (show_unhandled_signals)
+ printk_ratelimited(is_compat_task() ?
+ uwfault32 : uwfault64,
+ current->comm, current->pid,
+ sp, orig_sp,
+ regs->tpc,
+ regs->u_regs[UREG_I7]);
goto barf;
+ }
} while (window--);
}
set_thread_wsaved(0);
barf:
set_thread_wsaved(window + 1);
- user_exit();
- do_exit(SIGILL);
+ force_sig(SIGSEGV, current);
}
asmlinkage long sparc_do_fork(unsigned long clone_flags,
wrpr %g0, RTRAP_PSTATE_IRQOFF, %pstate
__handle_user_windows:
+ add %sp, PTREGS_OFF, %o0
call fault_in_user_windows
661: wrpr %g0, RTRAP_PSTATE, %pstate
/* If userspace is using ADI, it could potentially pass
get_sigframe(ksig, regs, sigframe_size);
if (invalid_frame_pointer(sf, sigframe_size)) {
- do_exit(SIGILL);
+ if (show_unhandled_signals)
+ pr_info("%s[%d] bad frame in setup_frame32: %08lx TPC %08lx O7 %08lx\n",
+ current->comm, current->pid, (unsigned long)sf,
+ regs->tpc, regs->u_regs[UREG_I7]);
+ force_sigsegv(ksig->sig, current);
return -EINVAL;
}
get_sigframe(ksig, regs, sigframe_size);
if (invalid_frame_pointer(sf, sigframe_size)) {
- do_exit(SIGILL);
+ if (show_unhandled_signals)
+ pr_info("%s[%d] bad frame in setup_rt_frame32: %08lx TPC %08lx O7 %08lx\n",
+ current->comm, current->pid, (unsigned long)sf,
+ regs->tpc, regs->u_regs[UREG_I7]);
+ force_sigsegv(ksig->sig, current);
return -EINVAL;
}
get_sigframe(ksig, regs, sf_size);
if (invalid_frame_pointer (sf)) {
- do_exit(SIGILL); /* won't return, actually */
+ if (show_unhandled_signals)
+ pr_info("%s[%d] bad frame in setup_rt_frame: %016lx TPC %016lx O7 %016lx\n",
+ current->comm, current->pid, (unsigned long)sf,
+ regs->tpc, regs->u_regs[UREG_I7]);
+ force_sigsegv(ksig->sig, current);
return -EINVAL;
}
}
return numa_latency[from][to];
}
+EXPORT_SYMBOL(__node_distance);
static int __init find_best_numa_node_for_mlgroup(struct mdesc_mlgroup *grp)
{
#ifdef CONFIG_IRQ_REMAP
+extern raw_spinlock_t irq_2_ir_lock;
+
extern bool irq_remapping_cap(enum irq_remap_cap cap);
extern void set_irq_remapping_broken(void);
extern int irq_remapping_prepare(void);
int ret;
version = GET_APIC_VERSION(apic_read(APIC_LVR));
- for_each_node_by_type(dn, "cpu") {
+ for_each_of_cpu_node(dn) {
ret = of_property_read_u32(dn, "reg", &apic_id);
if (ret < 0) {
pr_warn("%pOF: missing local APIC ID\n", dn);
head-y := arch/xtensa/kernel/head.o
core-y += arch/xtensa/kernel/ arch/xtensa/mm/
core-y += $(buildvar) $(buildplf)
+core-y += arch/xtensa/boot/dts/
libs-y += arch/xtensa/lib/ $(LIBGCC)
drivers-$(CONFIG_OPROFILE) += arch/xtensa/oprofile/
-ifneq ($(CONFIG_BUILTIN_DTB),"")
-core-$(CONFIG_OF) += arch/xtensa/boot/dts/
-endif
-
boot := arch/xtensa/boot
all Image zImage uImage: vmlinux
$(Q)$(MAKE) $(build)=$(boot) $@
-%.dtb:
- $(Q)$(MAKE) $(build)=$(boot)/dts $(boot)/dts/$@
-
-dtbs: scripts
- $(Q)$(MAKE) $(build)=$(boot)/dts
-
define archhelp
@echo '* Image - Kernel ELF image with reset vector'
@echo '* zImage - Compressed kernel image (arch/xtensa/boot/images/zImage.*)'
@echo '* uImage - U-Boot wrapped image'
- @echo ' dtbs - Build device tree blobs for enabled boards'
endef
u32 freq;
if (!base) {
- pr_err("%s: invalid address\n", np->name);
+ pr_err("%pOFn: invalid address\n", np);
return;
}
clk = clk_register_fixed_rate(NULL, np->name, NULL, 0, freq);
if (IS_ERR(clk)) {
- pr_err("%s: clk registration failed\n", np->name);
+ pr_err("%pOFn: clk registration failed\n", np);
return;
}
if (of_clk_add_provider(np, of_clk_src_simple_get, clk)) {
- pr_err("%s: clk provider registration failed\n", np->name);
+ pr_err("%pOFn: clk provider registration failed\n", np);
return;
}
}
* queue, remove the entity from its old weight counter (if
* there is a counter associated with the entity).
*/
- if (prev_weight != new_weight) {
- if (bfqq) {
- root = &bfqd->queue_weights_tree;
- __bfq_weights_tree_remove(bfqd, bfqq, root);
- } else
- bfqd->num_active_groups--;
+ if (prev_weight != new_weight && bfqq) {
+ root = &bfqd->queue_weights_tree;
+ __bfq_weights_tree_remove(bfqd, bfqq, root);
}
entity->weight = new_weight;
/*
* Add the entity, if it is not a weight-raised queue,
* to the counter associated with its new weight.
*/
- if (prev_weight != new_weight) {
- if (bfqq && bfqq->wr_coeff == 1) {
- /* If we get here, root has been initialized. */
- bfq_weights_tree_add(bfqd, bfqq, root);
- } else
- bfqd->num_active_groups++;
+ if (prev_weight != new_weight && bfqq && bfqq->wr_coeff == 1) {
+ /* If we get here, root has been initialized. */
+ bfq_weights_tree_add(bfqd, bfqq, root);
}
new_st->wsum += entity->weight;
if (!q->limits.max_write_same_sectors)
goto not_supported;
break;
- case REQ_OP_ZONE_REPORT:
case REQ_OP_ZONE_RESET:
if (!blk_queue_is_zoned(q))
goto not_supported;
#include "blk.h"
-static struct bio *next_bio(struct bio *bio, unsigned int nr_pages,
- gfp_t gfp)
+struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp)
{
struct bio *new = bio_alloc(gfp, nr_pages);
end_sect = sector + req_sects;
- bio = next_bio(bio, 0, gfp_mask);
+ bio = blk_next_bio(bio, 0, gfp_mask);
bio->bi_iter.bi_sector = sector;
bio_set_dev(bio, bdev);
bio_set_op_attrs(bio, op, 0);
max_write_same_sectors = UINT_MAX >> 9;
while (nr_sects) {
- bio = next_bio(bio, 1, gfp_mask);
+ bio = blk_next_bio(bio, 1, gfp_mask);
bio->bi_iter.bi_sector = sector;
bio_set_dev(bio, bdev);
bio->bi_vcnt = 1;
return -EOPNOTSUPP;
while (nr_sects) {
- bio = next_bio(bio, 0, gfp_mask);
+ bio = blk_next_bio(bio, 0, gfp_mask);
bio->bi_iter.bi_sector = sector;
bio_set_dev(bio, bdev);
bio->bi_opf = REQ_OP_WRITE_ZEROES;
return -EPERM;
while (nr_sects != 0) {
- bio = next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
- gfp_mask);
+ bio = blk_next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
+ gfp_mask);
bio->bi_iter.bi_sector = sector;
bio_set_dev(bio, bdev);
bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
REQ_OP_NAME(WRITE),
REQ_OP_NAME(FLUSH),
REQ_OP_NAME(DISCARD),
- REQ_OP_NAME(ZONE_REPORT),
REQ_OP_NAME(SECURE_ERASE),
REQ_OP_NAME(ZONE_RESET),
REQ_OP_NAME(WRITE_SAME),
rq_qos_throttle(q, bio, NULL);
- trace_block_getrq(q, bio, bio->bi_opf);
-
rq = blk_mq_get_request(q, bio, bio->bi_opf, &data);
if (unlikely(!rq)) {
rq_qos_cleanup(q, bio);
return BLK_QC_T_NONE;
}
+ trace_block_getrq(q, bio, bio->bi_opf);
+
rq_qos_track(q, rq, bio);
cookie = request_to_qc_t(data.hctx, rq);
}
}
+static ssize_t queue_nr_zones_show(struct request_queue *q, char *page)
+{
+ return queue_var_show(blk_queue_nr_zones(q), page);
+}
+
static ssize_t queue_nomerges_show(struct request_queue *q, char *page)
{
return queue_var_show((blk_queue_nomerges(q) << 1) |
.show = queue_zoned_show,
};
+static struct queue_sysfs_entry queue_nr_zones_entry = {
+ .attr = {.name = "nr_zones", .mode = 0444 },
+ .show = queue_nr_zones_show,
+};
+
static struct queue_sysfs_entry queue_nomerges_entry = {
.attr = {.name = "nomerges", .mode = 0644 },
.show = queue_nomerges_show,
&queue_write_zeroes_max_entry.attr,
&queue_nonrot_entry.attr,
&queue_zoned_entry.attr,
+ &queue_nr_zones_entry.attr,
&queue_nomerges_entry.attr,
&queue_rq_affinity_entry.attr,
&queue_iostats_entry.attr,
if (q->queue_tags)
__blk_queue_free_tags(q);
+ blk_queue_free_zone_bitmaps(q);
+
if (!q->mq_ops) {
if (q->exit_rq_fn)
q->exit_rq_fn(q, q->fq->flush_rq);
#include <linux/module.h>
#include <linux/rbtree.h>
#include <linux/blkdev.h>
+#include <linux/blk-mq.h>
+
+#include "blk.h"
static inline sector_t blk_zone_start(struct request_queue *q,
sector_t sector)
}
EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock);
+static inline unsigned int __blkdev_nr_zones(struct request_queue *q,
+ sector_t nr_sectors)
+{
+ unsigned long zone_sectors = blk_queue_zone_sectors(q);
+
+ return (nr_sectors + zone_sectors - 1) >> ilog2(zone_sectors);
+}
+
+/**
+ * blkdev_nr_zones - Get number of zones
+ * @bdev: Target block device
+ *
+ * Description:
+ * Return the total number of zones of a zoned block device.
+ * For a regular block device, the number of zones is always 0.
+ */
+unsigned int blkdev_nr_zones(struct block_device *bdev)
+{
+ struct request_queue *q = bdev_get_queue(bdev);
+
+ if (!blk_queue_is_zoned(q))
+ return 0;
+
+ return __blkdev_nr_zones(q, bdev->bd_part->nr_sects);
+}
+EXPORT_SYMBOL_GPL(blkdev_nr_zones);
+
/*
- * Check that a zone report belongs to the partition.
- * If yes, fix its start sector and write pointer, copy it in the
- * zone information array and return true. Return false otherwise.
+ * Check that a zone report belongs to this partition, and if yes, fix its start
+ * sector and write pointer and return true. Return false otherwise.
*/
-static bool blkdev_report_zone(struct block_device *bdev,
- struct blk_zone *rep,
- struct blk_zone *zone)
+static bool blkdev_report_zone(struct block_device *bdev, struct blk_zone *rep)
{
sector_t offset = get_start_sect(bdev);
rep->wp = rep->start + rep->len;
else
rep->wp -= offset;
- memcpy(zone, rep, sizeof(struct blk_zone));
-
return true;
}
+static int blk_report_zones(struct gendisk *disk, sector_t sector,
+ struct blk_zone *zones, unsigned int *nr_zones,
+ gfp_t gfp_mask)
+{
+ struct request_queue *q = disk->queue;
+ unsigned int z = 0, n, nrz = *nr_zones;
+ sector_t capacity = get_capacity(disk);
+ int ret;
+
+ while (z < nrz && sector < capacity) {
+ n = nrz - z;
+ ret = disk->fops->report_zones(disk, sector, &zones[z], &n,
+ gfp_mask);
+ if (ret)
+ return ret;
+ if (!n)
+ break;
+ sector += blk_queue_zone_sectors(q) * n;
+ z += n;
+ }
+
+ WARN_ON(z > *nr_zones);
+ *nr_zones = z;
+
+ return 0;
+}
+
/**
* blkdev_report_zones - Get zones information
* @bdev: Target block device
* requested by @nr_zones. The number of zones actually reported is
* returned in @nr_zones.
*/
-int blkdev_report_zones(struct block_device *bdev,
- sector_t sector,
- struct blk_zone *zones,
- unsigned int *nr_zones,
+int blkdev_report_zones(struct block_device *bdev, sector_t sector,
+ struct blk_zone *zones, unsigned int *nr_zones,
gfp_t gfp_mask)
{
struct request_queue *q = bdev_get_queue(bdev);
- struct blk_zone_report_hdr *hdr;
- unsigned int nrz = *nr_zones;
- struct page *page;
- unsigned int nr_rep;
- size_t rep_bytes;
- unsigned int nr_pages;
- struct bio *bio;
- struct bio_vec *bv;
- unsigned int i, n, nz;
- unsigned int ofst;
- void *addr;
+ unsigned int i, nrz;
int ret;
- if (!q)
- return -ENXIO;
-
if (!blk_queue_is_zoned(q))
return -EOPNOTSUPP;
- if (!nrz)
- return 0;
-
- if (sector > bdev->bd_part->nr_sects) {
- *nr_zones = 0;
- return 0;
- }
-
/*
- * The zone report has a header. So make room for it in the
- * payload. Also make sure that the report fits in a single BIO
- * that will not be split down the stack.
+ * A block device that advertized itself as zoned must have a
+ * report_zones method. If it does not have one defined, the device
+ * driver has a bug. So warn about that.
*/
- rep_bytes = sizeof(struct blk_zone_report_hdr) +
- sizeof(struct blk_zone) * nrz;
- rep_bytes = (rep_bytes + PAGE_SIZE - 1) & PAGE_MASK;
- if (rep_bytes > (queue_max_sectors(q) << 9))
- rep_bytes = queue_max_sectors(q) << 9;
-
- nr_pages = min_t(unsigned int, BIO_MAX_PAGES,
- rep_bytes >> PAGE_SHIFT);
- nr_pages = min_t(unsigned int, nr_pages,
- queue_max_segments(q));
-
- bio = bio_alloc(gfp_mask, nr_pages);
- if (!bio)
- return -ENOMEM;
-
- bio_set_dev(bio, bdev);
- bio->bi_iter.bi_sector = blk_zone_start(q, sector);
- bio_set_op_attrs(bio, REQ_OP_ZONE_REPORT, 0);
+ if (WARN_ON_ONCE(!bdev->bd_disk->fops->report_zones))
+ return -EOPNOTSUPP;
- for (i = 0; i < nr_pages; i++) {
- page = alloc_page(gfp_mask);
- if (!page) {
- ret = -ENOMEM;
- goto out;
- }
- if (!bio_add_page(bio, page, PAGE_SIZE, 0)) {
- __free_page(page);
- break;
- }
+ if (!*nr_zones || sector >= bdev->bd_part->nr_sects) {
+ *nr_zones = 0;
+ return 0;
}
- if (i == 0)
- ret = -ENOMEM;
- else
- ret = submit_bio_wait(bio);
+ nrz = min(*nr_zones,
+ __blkdev_nr_zones(q, bdev->bd_part->nr_sects - sector));
+ ret = blk_report_zones(bdev->bd_disk, get_start_sect(bdev) + sector,
+ zones, &nrz, gfp_mask);
if (ret)
- goto out;
-
- /*
- * Process the report result: skip the header and go through the
- * reported zones to fixup and fixup the zone information for
- * partitions. At the same time, return the zone information into
- * the zone array.
- */
- n = 0;
- nz = 0;
- nr_rep = 0;
- bio_for_each_segment_all(bv, bio, i) {
-
- if (!bv->bv_page)
- break;
-
- addr = kmap_atomic(bv->bv_page);
-
- /* Get header in the first page */
- ofst = 0;
- if (!nr_rep) {
- hdr = addr;
- nr_rep = hdr->nr_zones;
- ofst = sizeof(struct blk_zone_report_hdr);
- }
-
- /* Fixup and report zones */
- while (ofst < bv->bv_len &&
- n < nr_rep && nz < nrz) {
- if (blkdev_report_zone(bdev, addr + ofst, &zones[nz]))
- nz++;
- ofst += sizeof(struct blk_zone);
- n++;
- }
-
- kunmap_atomic(addr);
+ return ret;
- if (n >= nr_rep || nz >= nrz)
+ for (i = 0; i < nrz; i++) {
+ if (!blkdev_report_zone(bdev, zones))
break;
-
+ zones++;
}
- *nr_zones = nz;
-out:
- bio_for_each_segment_all(bv, bio, i)
- __free_page(bv->bv_page);
- bio_put(bio);
+ *nr_zones = i;
- return ret;
+ return 0;
}
EXPORT_SYMBOL_GPL(blkdev_report_zones);
struct request_queue *q = bdev_get_queue(bdev);
sector_t zone_sectors;
sector_t end_sector = sector + nr_sectors;
- struct bio *bio;
+ struct bio *bio = NULL;
+ struct blk_plug plug;
int ret;
- if (!q)
- return -ENXIO;
-
if (!blk_queue_is_zoned(q))
return -EOPNOTSUPP;
- if (end_sector > bdev->bd_part->nr_sects)
+ if (bdev_read_only(bdev))
+ return -EPERM;
+
+ if (!nr_sectors || end_sector > bdev->bd_part->nr_sects)
/* Out of range */
return -EINVAL;
end_sector != bdev->bd_part->nr_sects)
return -EINVAL;
+ blk_start_plug(&plug);
while (sector < end_sector) {
- bio = bio_alloc(gfp_mask, 0);
+ bio = blk_next_bio(bio, 0, gfp_mask);
bio->bi_iter.bi_sector = sector;
bio_set_dev(bio, bdev);
bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
- ret = submit_bio_wait(bio);
- bio_put(bio);
-
- if (ret)
- return ret;
-
sector += zone_sectors;
/* This may take a while, so be nice to others */
}
- return 0;
+ ret = submit_bio_wait(bio);
+ bio_put(bio);
+
+ blk_finish_plug(&plug);
+
+ return ret;
}
EXPORT_SYMBOL_GPL(blkdev_reset_zones);
if (!rep.nr_zones)
return -EINVAL;
- if (rep.nr_zones > INT_MAX / sizeof(struct blk_zone))
- return -ERANGE;
+ rep.nr_zones = min(blkdev_nr_zones(bdev), rep.nr_zones);
zones = kvmalloc_array(rep.nr_zones, sizeof(struct blk_zone),
GFP_KERNEL | __GFP_ZERO);
return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,
GFP_KERNEL);
}
+
+static inline unsigned long *blk_alloc_zone_bitmap(int node,
+ unsigned int nr_zones)
+{
+ return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
+ GFP_NOIO, node);
+}
+
+/*
+ * Allocate an array of struct blk_zone to get nr_zones zone information.
+ * The allocated array may be smaller than nr_zones.
+ */
+static struct blk_zone *blk_alloc_zones(int node, unsigned int *nr_zones)
+{
+ size_t size = *nr_zones * sizeof(struct blk_zone);
+ struct page *page;
+ int order;
+
+ for (order = get_order(size); order > 0; order--) {
+ page = alloc_pages_node(node, GFP_NOIO | __GFP_ZERO, order);
+ if (page) {
+ *nr_zones = min_t(unsigned int, *nr_zones,
+ (PAGE_SIZE << order) / sizeof(struct blk_zone));
+ return page_address(page);
+ }
+ }
+
+ return NULL;
+}
+
+void blk_queue_free_zone_bitmaps(struct request_queue *q)
+{
+ kfree(q->seq_zones_bitmap);
+ q->seq_zones_bitmap = NULL;
+ kfree(q->seq_zones_wlock);
+ q->seq_zones_wlock = NULL;
+}
+
+/**
+ * blk_revalidate_disk_zones - (re)allocate and initialize zone bitmaps
+ * @disk: Target disk
+ *
+ * Helper function for low-level device drivers to (re) allocate and initialize
+ * a disk request queue zone bitmaps. This functions should normally be called
+ * within the disk ->revalidate method. For BIO based queues, no zone bitmap
+ * is allocated.
+ */
+int blk_revalidate_disk_zones(struct gendisk *disk)
+{
+ struct request_queue *q = disk->queue;
+ unsigned int nr_zones = __blkdev_nr_zones(q, get_capacity(disk));
+ unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
+ unsigned int i, rep_nr_zones = 0, z = 0, nrz;
+ struct blk_zone *zones = NULL;
+ sector_t sector = 0;
+ int ret = 0;
+
+ /*
+ * BIO based queues do not use a scheduler so only q->nr_zones
+ * needs to be updated so that the sysfs exposed value is correct.
+ */
+ if (!queue_is_rq_based(q)) {
+ q->nr_zones = nr_zones;
+ return 0;
+ }
+
+ if (!blk_queue_is_zoned(q) || !nr_zones) {
+ nr_zones = 0;
+ goto update;
+ }
+
+ /* Allocate bitmaps */
+ ret = -ENOMEM;
+ seq_zones_wlock = blk_alloc_zone_bitmap(q->node, nr_zones);
+ if (!seq_zones_wlock)
+ goto out;
+ seq_zones_bitmap = blk_alloc_zone_bitmap(q->node, nr_zones);
+ if (!seq_zones_bitmap)
+ goto out;
+
+ /* Get zone information and initialize seq_zones_bitmap */
+ rep_nr_zones = nr_zones;
+ zones = blk_alloc_zones(q->node, &rep_nr_zones);
+ if (!zones)
+ goto out;
+
+ while (z < nr_zones) {
+ nrz = min(nr_zones - z, rep_nr_zones);
+ ret = blk_report_zones(disk, sector, zones, &nrz, GFP_NOIO);
+ if (ret)
+ goto out;
+ if (!nrz)
+ break;
+ for (i = 0; i < nrz; i++) {
+ if (zones[i].type != BLK_ZONE_TYPE_CONVENTIONAL)
+ set_bit(z, seq_zones_bitmap);
+ z++;
+ }
+ sector += nrz * blk_queue_zone_sectors(q);
+ }
+
+ if (WARN_ON(z != nr_zones)) {
+ ret = -EIO;
+ goto out;
+ }
+
+update:
+ /*
+ * Install the new bitmaps, making sure the queue is stopped and
+ * all I/Os are completed (i.e. a scheduler is not referencing the
+ * bitmaps).
+ */
+ blk_mq_freeze_queue(q);
+ q->nr_zones = nr_zones;
+ swap(q->seq_zones_wlock, seq_zones_wlock);
+ swap(q->seq_zones_bitmap, seq_zones_bitmap);
+ blk_mq_unfreeze_queue(q);
+
+out:
+ free_pages((unsigned long)zones,
+ get_order(rep_nr_zones * sizeof(struct blk_zone)));
+ kfree(seq_zones_wlock);
+ kfree(seq_zones_bitmap);
+
+ if (ret) {
+ pr_warn("%s: failed to revalidate zones\n", disk->disk_name);
+ blk_mq_freeze_queue(q);
+ blk_queue_free_zone_bitmaps(q);
+ blk_mq_unfreeze_queue(q);
+ }
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(blk_revalidate_disk_zones);
+
static inline int blk_iolatency_init(struct request_queue *q) { return 0; }
#endif
+struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp);
+
+#ifdef CONFIG_BLK_DEV_ZONED
+void blk_queue_free_zone_bitmaps(struct request_queue *q);
+#else
+static inline void blk_queue_free_zone_bitmaps(struct request_queue *q) {}
+#endif
+
#endif /* BLK_INTERNAL_H */
return blkdev_report_zones_ioctl(bdev, mode, cmd, arg);
case BLKRESETZONE:
return blkdev_reset_zones_ioctl(bdev, mode, cmd, arg);
+ case BLKGETZONESZ:
+ return put_uint(arg, bdev_zone_sectors(bdev));
+ case BLKGETNRZONES:
+ return put_uint(arg, blkdev_nr_zones(bdev));
case HDIO_GETGEO:
return blkdev_getgeo(bdev, argp);
case BLKRAGET:
/* These specific Samsung models/firmware-revs do not handle LPM well */
{ "SAMSUNG MZMPC128HBFU-000MV", "CXM14M1Q", ATA_HORKAGE_NOLPM, },
{ "SAMSUNG SSD PM830 mSATA *", "CXM13D1Q", ATA_HORKAGE_NOLPM, },
+ { "SAMSUNG MZ7TD256HAFV-000L9", "DXT02L5Q", ATA_HORKAGE_NOLPM, },
/* devices that don't properly handle queued TRIM commands */
{ "Micron_M500IT_*", "MU01", ATA_HORKAGE_NO_NCQ_TRIM |
#ifdef CONFIG_BLK_DEV_ZONED
int null_zone_init(struct nullb_device *dev);
void null_zone_exit(struct nullb_device *dev);
-blk_status_t null_zone_report(struct nullb *nullb, struct bio *bio);
+int null_zone_report(struct gendisk *disk, sector_t sector,
+ struct blk_zone *zones, unsigned int *nr_zones,
+ gfp_t gfp_mask);
void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
unsigned int nr_sectors);
void null_zone_reset(struct nullb_cmd *cmd, sector_t sector);
return -EINVAL;
}
static inline void null_zone_exit(struct nullb_device *dev) {}
-static inline blk_status_t null_zone_report(struct nullb *nullb,
- struct bio *bio)
+static inline int null_zone_report(struct gendisk *disk, sector_t sector,
+ struct blk_zone *zones,
+ unsigned int *nr_zones, gfp_t gfp_mask)
{
- return BLK_STS_NOTSUPP;
+ return -EOPNOTSUPP;
}
static inline void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
unsigned int nr_sectors)
blk_mq_start_stopped_hw_queues(q, true);
}
-static bool cmd_report_zone(struct nullb *nullb, struct nullb_cmd *cmd)
-{
- struct nullb_device *dev = cmd->nq->dev;
-
- if (dev->queue_mode == NULL_Q_BIO) {
- if (bio_op(cmd->bio) == REQ_OP_ZONE_REPORT) {
- cmd->error = null_zone_report(nullb, cmd->bio);
- return true;
- }
- } else {
- if (req_op(cmd->rq) == REQ_OP_ZONE_REPORT) {
- cmd->error = null_zone_report(nullb, cmd->rq->bio);
- return true;
- }
- }
-
- return false;
-}
-
static blk_status_t null_handle_cmd(struct nullb_cmd *cmd)
{
struct nullb_device *dev = cmd->nq->dev;
struct nullb *nullb = dev->nullb;
int err = 0;
- if (cmd_report_zone(nullb, cmd))
- goto out;
-
if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) {
struct request *rq = cmd->rq;
.owner = THIS_MODULE,
.open = null_open,
.release = null_release,
+ .report_zones = null_zone_report,
};
static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq)
disk->queue = nullb->q;
strncpy(disk->disk_name, nullb->disk_name, DISK_NAME_LEN);
+ if (nullb->dev->zoned) {
+ int ret = blk_revalidate_disk_zones(disk);
+
+ if (ret != 0)
+ return ret;
+ }
+
add_disk(disk);
return 0;
}
kvfree(dev->zones);
}
-static void null_zone_fill_bio(struct nullb_device *dev, struct bio *bio,
- unsigned int zno, unsigned int nr_zones)
+int null_zone_report(struct gendisk *disk, sector_t sector,
+ struct blk_zone *zones, unsigned int *nr_zones,
+ gfp_t gfp_mask)
{
- struct blk_zone_report_hdr *hdr = NULL;
- struct bio_vec bvec;
- struct bvec_iter iter;
- void *addr;
- unsigned int zones_to_cpy;
-
- bio_for_each_segment(bvec, bio, iter) {
- addr = kmap_atomic(bvec.bv_page);
-
- zones_to_cpy = bvec.bv_len / sizeof(struct blk_zone);
-
- if (!hdr) {
- hdr = (struct blk_zone_report_hdr *)addr;
- hdr->nr_zones = nr_zones;
- zones_to_cpy--;
- addr += sizeof(struct blk_zone_report_hdr);
- }
-
- zones_to_cpy = min_t(unsigned int, zones_to_cpy, nr_zones);
-
- memcpy(addr, &dev->zones[zno],
- zones_to_cpy * sizeof(struct blk_zone));
-
- kunmap_atomic(addr);
+ struct nullb *nullb = disk->private_data;
+ struct nullb_device *dev = nullb->dev;
+ unsigned int zno, nrz = 0;
- nr_zones -= zones_to_cpy;
- zno += zones_to_cpy;
+ if (!dev->zoned)
+ /* Not a zoned null device */
+ return -EOPNOTSUPP;
- if (!nr_zones)
- break;
+ zno = null_zone_no(dev, sector);
+ if (zno < dev->nr_zones) {
+ nrz = min_t(unsigned int, *nr_zones, dev->nr_zones - zno);
+ memcpy(zones, &dev->zones[zno], nrz * sizeof(struct blk_zone));
}
-}
-blk_status_t null_zone_report(struct nullb *nullb, struct bio *bio)
-{
- struct nullb_device *dev = nullb->dev;
- unsigned int zno = null_zone_no(dev, bio->bi_iter.bi_sector);
- unsigned int nr_zones = dev->nr_zones - zno;
- unsigned int max_zones;
+ *nr_zones = nrz;
- max_zones = (bio->bi_iter.bi_size / sizeof(struct blk_zone)) - 1;
- nr_zones = min_t(unsigned int, nr_zones, max_zones);
- null_zone_fill_bio(nullb->dev, bio, zno, nr_zones);
-
- return BLK_STS_OK;
+ return 0;
}
void null_zone_write(struct nullb_cmd *cmd, sector_t sector,
goto err_out;
rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
if (rc)
- dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+ rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
if (rc) {
dev_err(&pdev->dev, "DMA mask error %d\n", rc);
goto err_out_regions;
goto err_out;
rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
if (rc)
- dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+ rc = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
if (rc) {
dev_err(&pdev->dev, "DMA mask error %d\n", rc);
goto err_out_regions;
dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename);
+ if (!info)
+ return 0;
+
blkif_free(info, 0);
mutex_lock(&info->mutex);
vfree(vmalloc (size));
}
- vaddr = (unsigned long) __ioremap (paddr, size,
- _PAGE_WRITETHRU);
+ vaddr = (unsigned long)ioremap_wt(paddr, size);
#else
vaddr = (unsigned long)z_remap_nocache_nonser(paddr, size);
return 0;
}
+static int fsl_mc_dma_configure(struct device *dev)
+{
+ struct device *dma_dev = dev;
+
+ while (dev_is_fsl_mc(dma_dev))
+ dma_dev = dma_dev->parent;
+
+ return of_dma_configure(dev, dma_dev->of_node, 0);
+}
+
static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
.name = "fsl-mc",
.match = fsl_mc_bus_match,
.uevent = fsl_mc_bus_uevent,
+ .dma_configure = fsl_mc_dma_configure,
.dev_groups = fsl_mc_dev_groups,
};
EXPORT_SYMBOL_GPL(fsl_mc_bus_type);
mc_dev->icid = parent_mc_dev->icid;
mc_dev->dma_mask = FSL_MC_DEFAULT_DMA_MASK;
mc_dev->dev.dma_mask = &mc_dev->dma_mask;
+ mc_dev->dev.coherent_dma_mask = mc_dev->dma_mask;
dev_set_msi_domain(&mc_dev->dev,
dev_get_msi_domain(&parent_mc_dev->dev));
}
goto error_cleanup_dev;
}
- /* Objects are coherent, unless 'no shareability' flag set. */
- if (!(obj_desc->flags & FSL_MC_OBJ_FLAG_NO_MEM_SHAREABILITY))
- arch_setup_dma_ops(&mc_dev->dev, 0, 0, NULL, true);
-
/*
* The device-specific probe callback will get invoked by device_add()
*/
*ranges_start = of_get_property(mc_node, "ranges", &ranges_len);
if (!(*ranges_start) || !ranges_len) {
dev_warn(dev,
- "missing or empty ranges property for device tree node '%s'\n",
- mc_node->name);
+ "missing or empty ranges property for device tree node '%pOFn'\n",
+ mc_node);
return 0;
}
tuple_len = range_tuple_cell_count * sizeof(__be32);
if (ranges_len % tuple_len != 0) {
- dev_err(dev, "malformed ranges property '%s'\n", mc_node->name);
+ dev_err(dev, "malformed ranges property '%pOFn'\n", mc_node);
return -EINVAL;
}
tuple_len = (*cell_count) * sizeof(__be32);
if (ranges_len % tuple_len) {
- pr_warn("malformed ranges entry '%s'\n", node->name);
+ pr_warn("malformed ranges entry '%pOFn'\n", node);
return -EINVAL;
}
return 0;
static int gdrom_get_last_session(struct cdrom_device_info *cd_info,
struct cdrom_multisession *ms_info)
{
- int fentry, lentry, track, data, tocuse, err;
+ int fentry, lentry, track, data, err;
+
if (!gd.toc)
return -ENOMEM;
- tocuse = 1;
+
/* Check if GD-ROM */
err = gdrom_readtoc_cmd(gd.toc, 1);
/* Not a GD-ROM so check if standard CD-ROM */
if (err) {
- tocuse = 0;
err = gdrom_readtoc_cmd(gd.toc, 0);
if (err) {
pr_info("Could not get CD table of contents\n");
gd.gdrom_rq = blk_mq_init_sq_queue(&gd.tag_set, &gdrom_mq_ops, 1,
BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING);
if (IS_ERR(gd.gdrom_rq)) {
- rc = PTR_ERR(gd.gdrom_rq);
+ err = PTR_ERR(gd.gdrom_rq);
gd.gdrom_rq = NULL;
goto probe_fail_requestq;
}
pr_warn("%s: pmu-dfs base register not set, dynamic frequency scaling not available\n",
__func__);
- for_each_node_by_type(dn, "cpu")
+ for_each_of_cpu_node(dn)
ncpus++;
cpuclk = kcalloc(ncpus, sizeof(*cpuclk), GFP_KERNEL);
if (WARN_ON(!clks))
goto clks_out;
- for_each_node_by_type(dn, "cpu") {
+ for_each_of_cpu_node(dn) {
struct clk_init_data init;
struct clk *clk;
char *clk_name = kzalloc(5, GFP_KERNEL);
/******************** CPU err device********************************/
static u32 cpc925_cpu_mask_disabled(void)
{
- struct device_node *cpus;
- struct device_node *cpunode = NULL;
+ struct device_node *cpunode;
static u32 mask = 0;
/* use cached value if available */
mask = APIMASK_ADI0 | APIMASK_ADI1;
- cpus = of_find_node_by_path("/cpus");
- if (cpus == NULL) {
- cpc925_printk(KERN_DEBUG, "No /cpus node !\n");
- return 0;
- }
-
- while ((cpunode = of_get_next_child(cpus, cpunode)) != NULL) {
+ for_each_of_cpu_node(cpunode) {
const u32 *reg = of_get_property(cpunode, "reg", NULL);
-
- if (strcmp(cpunode->type, "cpu")) {
- cpc925_printk(KERN_ERR, "Not a cpu node in /cpus: %s\n", cpunode->name);
- continue;
- }
-
if (reg == NULL || *reg > 2) {
cpc925_printk(KERN_ERR, "Bad reg value at %pOF\n", cpunode);
continue;
"Assuming PI id is equal to CPU MPIC id!\n");
}
- of_node_put(cpunode);
- of_node_put(cpus);
-
return mask;
}
scpi_pd->domain = i;
scpi_pd->ops = scpi_ops;
- sprintf(scpi_pd->name, "%s.%d", np->name, i);
+ sprintf(scpi_pd->name, "%pOFn.%d", np, i);
scpi_pd->genpd.name = scpi_pd->name;
scpi_pd->genpd.power_off = scpi_pd_power_off;
scpi_pd->genpd.power_on = scpi_pd_power_on;
}
/* The CEC module handles HDMI hotplug detection */
- cec_np = of_find_compatible_node(np->parent, NULL,
- "mediatek,mt8173-cec");
+ cec_np = of_get_compatible_child(np->parent, "mediatek,mt8173-cec");
if (!cec_np) {
dev_err(dev, "Failed to find CEC node\n");
return -EINVAL;
if (!cec_pdev) {
dev_err(hdmi->dev, "Waiting for CEC device %pOF\n",
cec_np);
+ of_node_put(cec_np);
return -EPROBE_DEFER;
}
+ of_node_put(cec_np);
hdmi->cec_dev = &cec_pdev->dev;
/*
struct device_node *child, *node;
int ret;
- node = of_find_compatible_node(dev->of_node, NULL,
- "qcom,gpu-pwrlevels");
+ node = of_get_compatible_child(dev->of_node, "qcom,gpu-pwrlevels");
if (!node) {
dev_err(dev, "Could not find the GPU powerlevels\n");
return -ENXIO;
dev_pm_opp_add(dev, val, 0);
}
+ of_node_put(node);
+
return 0;
}
and include PCI device scope covered by these DMA
remapping devices.
+config INTEL_IOMMU_DEBUGFS
+ bool "Export Intel IOMMU internals in Debugfs"
+ depends on INTEL_IOMMU && IOMMU_DEBUGFS
+ help
+ !!!WARNING!!!
+
+ DO NOT ENABLE THIS OPTION UNLESS YOU REALLY KNOW WHAT YOU ARE DOING!!!
+
+ Expose Intel IOMMU internals in Debugfs.
+
+ This option is -NOT- intended for production environments, and should
+ only be enabled for debugging Intel IOMMU.
+
config INTEL_IOMMU_SVM
bool "Support for Shared Virtual Memory with Intel IOMMU"
depends on INTEL_IOMMU && X86
obj-$(CONFIG_ARM_SMMU_V3) += arm-smmu-v3.o
obj-$(CONFIG_DMAR_TABLE) += dmar.o
obj-$(CONFIG_INTEL_IOMMU) += intel-iommu.o intel-pasid.o
+obj-$(CONFIG_INTEL_IOMMU_DEBUGFS) += intel-iommu-debugfs.o
obj-$(CONFIG_INTEL_IOMMU_SVM) += intel-svm.o
obj-$(CONFIG_IPMMU_VMSA) += ipmmu-vmsa.o
obj-$(CONFIG_IRQ_REMAP) += intel_irq_remapping.o irq_remapping.o
return (irq_remapping_enabled == 1);
case IOMMU_CAP_NOEXEC:
return false;
+ default:
+ break;
}
return false;
NULL,
};
-static int iommu_init_pci(struct amd_iommu *iommu)
+static int __init iommu_init_pci(struct amd_iommu *iommu)
{
int cap_ptr = iommu->cap_ptr;
u32 range, misc, low, high;
+// SPDX-License-Identifier: GPL-2.0
/*
* IOMMU API for ARM architected SMMUv3 implementations.
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program. If not, see <http://www.gnu.org/licenses/>.
- *
* Copyright (C) 2015 ARM Limited
*
* Author: Will Deacon <will.deacon@arm.com>
int gerr_irq;
int combined_irq;
- atomic_t sync_nr;
+ u32 sync_nr;
+ u8 prev_cmd_opcode;
unsigned long ias; /* IPA */
unsigned long oas; /* PA */
struct mutex init_mutex; /* Protects smmu pointer */
struct io_pgtable_ops *pgtbl_ops;
+ bool non_strict;
enum arm_smmu_domain_stage stage;
union {
}
/*
- * Wait for the SMMU to consume items. If drain is true, wait until the queue
+ * Wait for the SMMU to consume items. If sync is true, wait until the queue
* is empty. Otherwise, wait until there is at least one free slot.
*/
static int queue_poll_cons(struct arm_smmu_queue *q, bool sync, bool wfe)
struct arm_smmu_queue *q = &smmu->cmdq.q;
bool wfe = !!(smmu->features & ARM_SMMU_FEAT_SEV);
+ smmu->prev_cmd_opcode = FIELD_GET(CMDQ_0_OP, cmd[0]);
+
while (queue_insert_raw(q, cmd) == -ENOSPC) {
if (queue_poll_cons(q, false, wfe))
dev_err_ratelimited(smmu->dev, "CMDQ timeout\n");
struct arm_smmu_cmdq_ent ent = {
.opcode = CMDQ_OP_CMD_SYNC,
.sync = {
- .msidata = atomic_inc_return_relaxed(&smmu->sync_nr),
.msiaddr = virt_to_phys(&smmu->sync_count),
},
};
- arm_smmu_cmdq_build_cmd(cmd, &ent);
-
spin_lock_irqsave(&smmu->cmdq.lock, flags);
- arm_smmu_cmdq_insert_cmd(smmu, cmd);
+
+ /* Piggy-back on the previous command if it's a SYNC */
+ if (smmu->prev_cmd_opcode == CMDQ_OP_CMD_SYNC) {
+ ent.sync.msidata = smmu->sync_nr;
+ } else {
+ ent.sync.msidata = ++smmu->sync_nr;
+ arm_smmu_cmdq_build_cmd(cmd, &ent);
+ arm_smmu_cmdq_insert_cmd(smmu, cmd);
+ }
+
spin_unlock_irqrestore(&smmu->cmdq.lock, flags);
return __arm_smmu_sync_poll_msi(smmu, ent.sync.msidata);
}
/* IO_PGTABLE API */
-static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu)
-{
- arm_smmu_cmdq_issue_sync(smmu);
-}
-
static void arm_smmu_tlb_sync(void *cookie)
{
struct arm_smmu_domain *smmu_domain = cookie;
- __arm_smmu_tlb_sync(smmu_domain->smmu);
+
+ arm_smmu_cmdq_issue_sync(smmu_domain->smmu);
}
static void arm_smmu_tlb_inv_context(void *cookie)
cmd.tlbi.vmid = smmu_domain->s2_cfg.vmid;
}
+ /*
+ * NOTE: when io-pgtable is in non-strict mode, we may get here with
+ * PTEs previously cleared by unmaps on the current CPU not yet visible
+ * to the SMMU. We are relying on the DSB implicit in queue_inc_prod()
+ * to guarantee those are observed before the TLBI. Do be careful, 007.
+ */
arm_smmu_cmdq_issue_cmd(smmu, &cmd);
- __arm_smmu_tlb_sync(smmu);
+ arm_smmu_cmdq_issue_sync(smmu);
}
static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size,
if (smmu->features & ARM_SMMU_FEAT_COHERENCY)
pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
+ if (smmu_domain->non_strict)
+ pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
+
pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
if (!pgtbl_ops)
return -ENOMEM;
return ops->unmap(ops, iova, size);
}
+static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+ if (smmu_domain->smmu)
+ arm_smmu_tlb_inv_context(smmu_domain);
+}
+
static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
{
struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu;
if (smmu)
- __arm_smmu_tlb_sync(smmu);
+ arm_smmu_cmdq_issue_sync(smmu);
}
static phys_addr_t
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
- if (domain->type != IOMMU_DOMAIN_UNMANAGED)
- return -EINVAL;
-
- switch (attr) {
- case DOMAIN_ATTR_NESTING:
- *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
- return 0;
+ switch (domain->type) {
+ case IOMMU_DOMAIN_UNMANAGED:
+ switch (attr) {
+ case DOMAIN_ATTR_NESTING:
+ *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
+ return 0;
+ default:
+ return -ENODEV;
+ }
+ break;
+ case IOMMU_DOMAIN_DMA:
+ switch (attr) {
+ case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+ *(int *)data = smmu_domain->non_strict;
+ return 0;
+ default:
+ return -ENODEV;
+ }
+ break;
default:
- return -ENODEV;
+ return -EINVAL;
}
}
int ret = 0;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
- if (domain->type != IOMMU_DOMAIN_UNMANAGED)
- return -EINVAL;
-
mutex_lock(&smmu_domain->init_mutex);
- switch (attr) {
- case DOMAIN_ATTR_NESTING:
- if (smmu_domain->smmu) {
- ret = -EPERM;
- goto out_unlock;
+ switch (domain->type) {
+ case IOMMU_DOMAIN_UNMANAGED:
+ switch (attr) {
+ case DOMAIN_ATTR_NESTING:
+ if (smmu_domain->smmu) {
+ ret = -EPERM;
+ goto out_unlock;
+ }
+
+ if (*(int *)data)
+ smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
+ else
+ smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+ break;
+ default:
+ ret = -ENODEV;
+ }
+ break;
+ case IOMMU_DOMAIN_DMA:
+ switch(attr) {
+ case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+ smmu_domain->non_strict = *(int *)data;
+ break;
+ default:
+ ret = -ENODEV;
}
-
- if (*(int *)data)
- smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
- else
- smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
-
break;
default:
- ret = -ENODEV;
+ ret = -EINVAL;
}
out_unlock:
.attach_dev = arm_smmu_attach_dev,
.map = arm_smmu_map,
.unmap = arm_smmu_unmap,
- .flush_iotlb_all = arm_smmu_iotlb_sync,
+ .flush_iotlb_all = arm_smmu_flush_iotlb_all,
.iotlb_sync = arm_smmu_iotlb_sync,
.iova_to_phys = arm_smmu_iova_to_phys,
.add_device = arm_smmu_add_device,
{
int ret;
- atomic_set(&smmu->sync_nr, 0);
ret = arm_smmu_init_queues(smmu);
if (ret)
return ret;
irq = smmu->combined_irq;
if (irq) {
/*
- * Cavium ThunderX2 implementation doesn't not support unique
- * irq lines. Use single irq line for all the SMMUv3 interrupts.
+ * Cavium ThunderX2 implementation doesn't support unique irq
+ * lines. Use a single irq line for all the SMMUv3 interrupts.
*/
ret = devm_request_threaded_irq(smmu->dev, irq,
arm_smmu_combined_irq_handler,
#include <linux/spinlock.h>
#include <linux/amba/bus.h>
+#include <linux/fsl/mc.h>
#include "io-pgtable.h"
#include "arm-smmu-regs.h"
const struct iommu_gather_ops *tlb_ops;
struct arm_smmu_cfg cfg;
enum arm_smmu_domain_stage stage;
+ bool non_strict;
struct mutex init_mutex; /* Protects smmu pointer */
spinlock_t cb_lock; /* Serialises ATS1* ops and TLB syncs */
struct iommu_domain domain;
struct arm_smmu_cfg *cfg = &smmu_domain->cfg;
void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
- writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
+ /*
+ * NOTE: this is not a relaxed write; it needs to guarantee that PTEs
+ * cleared by the current CPU are visible to the SMMU before the TLBI.
+ */
+ writel(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID);
arm_smmu_tlb_sync_context(cookie);
}
struct arm_smmu_device *smmu = smmu_domain->smmu;
void __iomem *base = ARM_SMMU_GR0(smmu);
- writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
+ /* NOTE: see above */
+ writel(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
arm_smmu_tlb_sync_global(smmu);
}
bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS;
void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx);
+ if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+ wmb();
+
if (stage1) {
reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA;
struct arm_smmu_domain *smmu_domain = cookie;
void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu);
+ if (smmu_domain->smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
+ wmb();
+
writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID);
}
if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK)
pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_NO_DMA;
+ if (smmu_domain->non_strict)
+ pgtbl_cfg.quirks |= IO_PGTABLE_QUIRK_NON_STRICT;
+
smmu_domain->smmu = smmu;
pgtbl_ops = alloc_io_pgtable_ops(fmt, &pgtbl_cfg, smmu_domain);
if (!pgtbl_ops) {
return ops->unmap(ops, iova, size);
}
+static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain)
+{
+ struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
+
+ if (smmu_domain->tlb_ops)
+ smmu_domain->tlb_ops->tlb_flush_all(smmu_domain);
+}
+
static void arm_smmu_iotlb_sync(struct iommu_domain *domain)
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
if (dev_is_pci(dev))
group = pci_device_group(dev);
+ else if (dev_is_fsl_mc(dev))
+ group = fsl_mc_device_group(dev);
else
group = generic_device_group(dev);
{
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
- if (domain->type != IOMMU_DOMAIN_UNMANAGED)
- return -EINVAL;
-
- switch (attr) {
- case DOMAIN_ATTR_NESTING:
- *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
- return 0;
+ switch(domain->type) {
+ case IOMMU_DOMAIN_UNMANAGED:
+ switch (attr) {
+ case DOMAIN_ATTR_NESTING:
+ *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED);
+ return 0;
+ default:
+ return -ENODEV;
+ }
+ break;
+ case IOMMU_DOMAIN_DMA:
+ switch (attr) {
+ case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+ *(int *)data = smmu_domain->non_strict;
+ return 0;
+ default:
+ return -ENODEV;
+ }
+ break;
default:
- return -ENODEV;
+ return -EINVAL;
}
}
int ret = 0;
struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain);
- if (domain->type != IOMMU_DOMAIN_UNMANAGED)
- return -EINVAL;
-
mutex_lock(&smmu_domain->init_mutex);
- switch (attr) {
- case DOMAIN_ATTR_NESTING:
- if (smmu_domain->smmu) {
- ret = -EPERM;
- goto out_unlock;
+ switch(domain->type) {
+ case IOMMU_DOMAIN_UNMANAGED:
+ switch (attr) {
+ case DOMAIN_ATTR_NESTING:
+ if (smmu_domain->smmu) {
+ ret = -EPERM;
+ goto out_unlock;
+ }
+
+ if (*(int *)data)
+ smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
+ else
+ smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
+ break;
+ default:
+ ret = -ENODEV;
+ }
+ break;
+ case IOMMU_DOMAIN_DMA:
+ switch (attr) {
+ case DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE:
+ smmu_domain->non_strict = *(int *)data;
+ break;
+ default:
+ ret = -ENODEV;
}
-
- if (*(int *)data)
- smmu_domain->stage = ARM_SMMU_DOMAIN_NESTED;
- else
- smmu_domain->stage = ARM_SMMU_DOMAIN_S1;
-
break;
default:
- ret = -ENODEV;
+ ret = -EINVAL;
}
-
out_unlock:
mutex_unlock(&smmu_domain->init_mutex);
return ret;
.attach_dev = arm_smmu_attach_dev,
.map = arm_smmu_map,
.unmap = arm_smmu_unmap,
- .flush_iotlb_all = arm_smmu_iotlb_sync,
+ .flush_iotlb_all = arm_smmu_flush_iotlb_all,
.iotlb_sync = arm_smmu_iotlb_sync,
.iova_to_phys = arm_smmu_iova_to_phys,
.add_device = arm_smmu_add_device,
bus_set_iommu(&pci_bus_type, &arm_smmu_ops);
}
#endif
+#ifdef CONFIG_FSL_MC_BUS
+ if (!iommu_present(&fsl_mc_bus_type))
+ bus_set_iommu(&fsl_mc_bus_type, &arm_smmu_ops);
+#endif
}
static int arm_smmu_device_probe(struct platform_device *pdev)
};
struct list_head msi_page_list;
spinlock_t msi_lock;
+
+ /* Domain for flush queue callback; NULL if flush queue not in use */
+ struct iommu_domain *fq_domain;
};
static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie)
return ret;
}
+static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad)
+{
+ struct iommu_dma_cookie *cookie;
+ struct iommu_domain *domain;
+
+ cookie = container_of(iovad, struct iommu_dma_cookie, iovad);
+ domain = cookie->fq_domain;
+ /*
+ * The IOMMU driver supporting DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE
+ * implies that ops->flush_iotlb_all must be non-NULL.
+ */
+ domain->ops->flush_iotlb_all(domain);
+}
+
/**
* iommu_dma_init_domain - Initialise a DMA mapping domain
* @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iova_domain *iovad = &cookie->iovad;
unsigned long order, base_pfn, end_pfn;
+ int attr;
if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE)
return -EINVAL;
}
init_iova_domain(iovad, 1UL << order, base_pfn);
+
+ if (!cookie->fq_domain && !iommu_domain_get_attr(domain,
+ DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) && attr) {
+ cookie->fq_domain = domain;
+ init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all, NULL);
+ }
+
if (!dev)
return 0;
/* The MSI case is only ever cleaning up its most recent allocation */
if (cookie->type == IOMMU_DMA_MSI_COOKIE)
cookie->msi_iova -= size;
+ else if (cookie->fq_domain) /* non-strict mode */
+ queue_iova(iovad, iova_pfn(iovad, iova),
+ size >> iova_shift(iovad), 0);
else
free_iova_fast(iovad, iova_pfn(iovad, iova),
size >> iova_shift(iovad));
dma_addr -= iova_off;
size = iova_align(iovad, size + iova_off);
- WARN_ON(iommu_unmap(domain, dma_addr, size) != size);
+ WARN_ON(iommu_unmap_fast(domain, dma_addr, size) != size);
+ if (!cookie->fq_domain)
+ iommu_tlb_sync(domain);
iommu_dma_free_iova(cookie, dma_addr, size);
}
void iommu_dma_free(struct device *dev, struct page **pages, size_t size,
dma_addr_t *handle)
{
- __iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle, size);
+ __iommu_dma_unmap(iommu_get_dma_domain(dev), *handle, size);
__iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
*handle = IOMMU_MAPPING_ERROR;
}
unsigned long attrs, int prot, dma_addr_t *handle,
void (*flush_page)(struct device *, const void *, phys_addr_t))
{
- struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iova_domain *iovad = &cookie->iovad;
struct page **pages;
}
static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
- size_t size, int prot)
+ size_t size, int prot, struct iommu_domain *domain)
{
- struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
struct iommu_dma_cookie *cookie = domain->iova_cookie;
size_t iova_off = 0;
dma_addr_t iova;
dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size, int prot)
{
- return __iommu_dma_map(dev, page_to_phys(page) + offset, size, prot);
+ return __iommu_dma_map(dev, page_to_phys(page) + offset, size, prot,
+ iommu_get_dma_domain(dev));
}
void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size,
enum dma_data_direction dir, unsigned long attrs)
{
- __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size);
+ __iommu_dma_unmap(iommu_get_dma_domain(dev), handle, size);
}
/*
int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg,
int nents, int prot)
{
- struct iommu_domain *domain = iommu_get_domain_for_dev(dev);
+ struct iommu_domain *domain = iommu_get_dma_domain(dev);
struct iommu_dma_cookie *cookie = domain->iova_cookie;
struct iova_domain *iovad = &cookie->iovad;
struct scatterlist *s, *prev = NULL;
sg = tmp;
}
end = sg_dma_address(sg) + sg_dma_len(sg);
- __iommu_dma_unmap(iommu_get_domain_for_dev(dev), start, end - start);
+ __iommu_dma_unmap(iommu_get_dma_domain(dev), start, end - start);
}
dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys,
size_t size, enum dma_data_direction dir, unsigned long attrs)
{
return __iommu_dma_map(dev, phys, size,
- dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO);
+ dma_info_to_prot(dir, false, attrs) | IOMMU_MMIO,
+ iommu_get_dma_domain(dev));
}
void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle,
size_t size, enum dma_data_direction dir, unsigned long attrs)
{
- __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size);
+ __iommu_dma_unmap(iommu_get_dma_domain(dev), handle, size);
}
int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr)
if (!msi_page)
return NULL;
- iova = __iommu_dma_map(dev, msi_addr, size, prot);
+ iova = __iommu_dma_map(dev, msi_addr, size, prot, domain);
if (iommu_dma_mapping_error(dev, iova))
goto out_free_page;
return ~(u32)0;
}
- for_each_node_by_type(node, "cpu") {
+ for_each_of_cpu_node(node) {
prop = of_get_property(node, "reg", &len);
for (i = 0; i < len / sizeof(u32); i++) {
if (be32_to_cpup(&prop[i]) == vcpu) {
return 0;
}
+static int fsl_pamu_set_windows(struct iommu_domain *domain, u32 w_count)
+{
+ struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain);
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&dma_domain->domain_lock, flags);
+ /* Ensure domain is inactive i.e. DMA should be disabled for the domain */
+ if (dma_domain->enabled) {
+ pr_debug("Can't set geometry attributes as domain is active\n");
+ spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+ return -EBUSY;
+ }
+
+ /* Ensure that the geometry has been set for the domain */
+ if (!dma_domain->geom_size) {
+ pr_debug("Please configure geometry before setting the number of windows\n");
+ spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+ return -EINVAL;
+ }
+
+ /*
+ * Ensure we have valid window count i.e. it should be less than
+ * maximum permissible limit and should be a power of two.
+ */
+ if (w_count > pamu_get_max_subwin_cnt() || !is_power_of_2(w_count)) {
+ pr_debug("Invalid window count\n");
+ spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+ return -EINVAL;
+ }
+
+ ret = pamu_set_domain_geometry(dma_domain, &domain->geometry,
+ w_count > 1 ? w_count : 0);
+ if (!ret) {
+ kfree(dma_domain->win_arr);
+ dma_domain->win_arr = kcalloc(w_count,
+ sizeof(*dma_domain->win_arr),
+ GFP_ATOMIC);
+ if (!dma_domain->win_arr) {
+ spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+ return -ENOMEM;
+ }
+ dma_domain->win_cnt = w_count;
+ }
+ spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
+
+ return ret;
+}
+
static int fsl_pamu_set_domain_attr(struct iommu_domain *domain,
enum iommu_attr attr_type, void *data)
{
case DOMAIN_ATTR_FSL_PAMU_ENABLE:
ret = configure_domain_dma_state(dma_domain, *(int *)data);
break;
+ case DOMAIN_ATTR_WINDOWS:
+ ret = fsl_pamu_set_windows(domain, *(u32 *)data);
+ break;
default:
pr_debug("Unsupported attribute type\n");
ret = -EINVAL;
case DOMAIN_ATTR_FSL_PAMUV1:
*(int *)data = DOMAIN_ATTR_FSL_PAMUV1;
break;
+ case DOMAIN_ATTR_WINDOWS:
+ *(u32 *)data = dma_domain->win_cnt;
+ break;
default:
pr_debug("Unsupported attribute type\n");
ret = -EINVAL;
static struct iommu_group *get_pci_device_group(struct pci_dev *pdev)
{
struct pci_controller *pci_ctl;
- bool pci_endpt_partioning;
+ bool pci_endpt_partitioning;
struct iommu_group *group = NULL;
pci_ctl = pci_bus_to_host(pdev->bus);
- pci_endpt_partioning = check_pci_ctl_endpt_part(pci_ctl);
+ pci_endpt_partitioning = check_pci_ctl_endpt_part(pci_ctl);
/* We can partition PCIe devices so assign device group to the device */
- if (pci_endpt_partioning) {
+ if (pci_endpt_partitioning) {
group = pci_device_group(&pdev->dev);
/*
iommu_group_remove_device(dev);
}
-static int fsl_pamu_set_windows(struct iommu_domain *domain, u32 w_count)
-{
- struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain);
- unsigned long flags;
- int ret;
-
- spin_lock_irqsave(&dma_domain->domain_lock, flags);
- /* Ensure domain is inactive i.e. DMA should be disabled for the domain */
- if (dma_domain->enabled) {
- pr_debug("Can't set geometry attributes as domain is active\n");
- spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
- return -EBUSY;
- }
-
- /* Ensure that the geometry has been set for the domain */
- if (!dma_domain->geom_size) {
- pr_debug("Please configure geometry before setting the number of windows\n");
- spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
- return -EINVAL;
- }
-
- /*
- * Ensure we have valid window count i.e. it should be less than
- * maximum permissible limit and should be a power of two.
- */
- if (w_count > pamu_get_max_subwin_cnt() || !is_power_of_2(w_count)) {
- pr_debug("Invalid window count\n");
- spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
- return -EINVAL;
- }
-
- ret = pamu_set_domain_geometry(dma_domain, &domain->geometry,
- w_count > 1 ? w_count : 0);
- if (!ret) {
- kfree(dma_domain->win_arr);
- dma_domain->win_arr = kcalloc(w_count,
- sizeof(*dma_domain->win_arr),
- GFP_ATOMIC);
- if (!dma_domain->win_arr) {
- spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
- return -ENOMEM;
- }
- dma_domain->win_cnt = w_count;
- }
- spin_unlock_irqrestore(&dma_domain->domain_lock, flags);
-
- return ret;
-}
-
-static u32 fsl_pamu_get_windows(struct iommu_domain *domain)
-{
- struct fsl_dma_domain *dma_domain = to_fsl_dma_domain(domain);
-
- return dma_domain->win_cnt;
-}
-
static const struct iommu_ops fsl_pamu_ops = {
.capable = fsl_pamu_capable,
.domain_alloc = fsl_pamu_domain_alloc,
.detach_dev = fsl_pamu_detach_device,
.domain_window_enable = fsl_pamu_window_enable,
.domain_window_disable = fsl_pamu_window_disable,
- .domain_get_windows = fsl_pamu_get_windows,
- .domain_set_windows = fsl_pamu_set_windows,
.iova_to_phys = fsl_pamu_iova_to_phys,
.domain_set_attr = fsl_pamu_set_domain_attr,
.domain_get_attr = fsl_pamu_get_domain_attr,
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright © 2018 Intel Corporation.
+ *
+ * Authors: Gayatri Kammela <gayatri.kammela@intel.com>
+ * Sohil Mehta <sohil.mehta@intel.com>
+ * Jacob Pan <jacob.jun.pan@linux.intel.com>
+ */
+
+#include <linux/debugfs.h>
+#include <linux/dmar.h>
+#include <linux/intel-iommu.h>
+#include <linux/pci.h>
+
+#include <asm/irq_remapping.h>
+
+struct iommu_regset {
+ int offset;
+ const char *regs;
+};
+
+#define IOMMU_REGSET_ENTRY(_reg_) \
+ { DMAR_##_reg_##_REG, __stringify(_reg_) }
+static const struct iommu_regset iommu_regs[] = {
+ IOMMU_REGSET_ENTRY(VER),
+ IOMMU_REGSET_ENTRY(CAP),
+ IOMMU_REGSET_ENTRY(ECAP),
+ IOMMU_REGSET_ENTRY(GCMD),
+ IOMMU_REGSET_ENTRY(GSTS),
+ IOMMU_REGSET_ENTRY(RTADDR),
+ IOMMU_REGSET_ENTRY(CCMD),
+ IOMMU_REGSET_ENTRY(FSTS),
+ IOMMU_REGSET_ENTRY(FECTL),
+ IOMMU_REGSET_ENTRY(FEDATA),
+ IOMMU_REGSET_ENTRY(FEADDR),
+ IOMMU_REGSET_ENTRY(FEUADDR),
+ IOMMU_REGSET_ENTRY(AFLOG),
+ IOMMU_REGSET_ENTRY(PMEN),
+ IOMMU_REGSET_ENTRY(PLMBASE),
+ IOMMU_REGSET_ENTRY(PLMLIMIT),
+ IOMMU_REGSET_ENTRY(PHMBASE),
+ IOMMU_REGSET_ENTRY(PHMLIMIT),
+ IOMMU_REGSET_ENTRY(IQH),
+ IOMMU_REGSET_ENTRY(IQT),
+ IOMMU_REGSET_ENTRY(IQA),
+ IOMMU_REGSET_ENTRY(ICS),
+ IOMMU_REGSET_ENTRY(IRTA),
+ IOMMU_REGSET_ENTRY(PQH),
+ IOMMU_REGSET_ENTRY(PQT),
+ IOMMU_REGSET_ENTRY(PQA),
+ IOMMU_REGSET_ENTRY(PRS),
+ IOMMU_REGSET_ENTRY(PECTL),
+ IOMMU_REGSET_ENTRY(PEDATA),
+ IOMMU_REGSET_ENTRY(PEADDR),
+ IOMMU_REGSET_ENTRY(PEUADDR),
+ IOMMU_REGSET_ENTRY(MTRRCAP),
+ IOMMU_REGSET_ENTRY(MTRRDEF),
+ IOMMU_REGSET_ENTRY(MTRR_FIX64K_00000),
+ IOMMU_REGSET_ENTRY(MTRR_FIX16K_80000),
+ IOMMU_REGSET_ENTRY(MTRR_FIX16K_A0000),
+ IOMMU_REGSET_ENTRY(MTRR_FIX4K_C0000),
+ IOMMU_REGSET_ENTRY(MTRR_FIX4K_C8000),
+ IOMMU_REGSET_ENTRY(MTRR_FIX4K_D0000),
+ IOMMU_REGSET_ENTRY(MTRR_FIX4K_D8000),
+ IOMMU_REGSET_ENTRY(MTRR_FIX4K_E0000),
+ IOMMU_REGSET_ENTRY(MTRR_FIX4K_E8000),
+ IOMMU_REGSET_ENTRY(MTRR_FIX4K_F0000),
+ IOMMU_REGSET_ENTRY(MTRR_FIX4K_F8000),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSBASE0),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSMASK0),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSBASE1),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSMASK1),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSBASE2),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSMASK2),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSBASE3),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSMASK3),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSBASE4),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSMASK4),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSBASE5),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSMASK5),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSBASE6),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSMASK6),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSBASE7),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSMASK7),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSBASE8),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSMASK8),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSBASE9),
+ IOMMU_REGSET_ENTRY(MTRR_PHYSMASK9),
+ IOMMU_REGSET_ENTRY(VCCAP),
+ IOMMU_REGSET_ENTRY(VCMD),
+ IOMMU_REGSET_ENTRY(VCRSP),
+};
+
+static int iommu_regset_show(struct seq_file *m, void *unused)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ unsigned long flag;
+ int i, ret = 0;
+ u64 value;
+
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ if (!drhd->reg_base_addr) {
+ seq_puts(m, "IOMMU: Invalid base address\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ seq_printf(m, "IOMMU: %s Register Base Address: %llx\n",
+ iommu->name, drhd->reg_base_addr);
+ seq_puts(m, "Name\t\t\tOffset\t\tContents\n");
+ /*
+ * Publish the contents of the 64-bit hardware registers
+ * by adding the offset to the pointer (virtual address).
+ */
+ raw_spin_lock_irqsave(&iommu->register_lock, flag);
+ for (i = 0 ; i < ARRAY_SIZE(iommu_regs); i++) {
+ value = dmar_readq(iommu->reg + iommu_regs[i].offset);
+ seq_printf(m, "%-16s\t0x%02x\t\t0x%016llx\n",
+ iommu_regs[i].regs, iommu_regs[i].offset,
+ value);
+ }
+ raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
+ seq_putc(m, '\n');
+ }
+out:
+ rcu_read_unlock();
+
+ return ret;
+}
+DEFINE_SHOW_ATTRIBUTE(iommu_regset);
+
+static void ctx_tbl_entry_show(struct seq_file *m, struct intel_iommu *iommu,
+ int bus)
+{
+ struct context_entry *context;
+ int devfn;
+
+ seq_printf(m, " Context Table Entries for Bus: %d\n", bus);
+ seq_puts(m, " Entry\tB:D.F\tHigh\tLow\n");
+
+ for (devfn = 0; devfn < 256; devfn++) {
+ context = iommu_context_addr(iommu, bus, devfn, 0);
+ if (!context)
+ return;
+
+ if (!context_present(context))
+ continue;
+
+ seq_printf(m, " %-5d\t%02x:%02x.%x\t%-6llx\t%llx\n", devfn,
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
+ context[0].hi, context[0].lo);
+ }
+}
+
+static void root_tbl_entry_show(struct seq_file *m, struct intel_iommu *iommu)
+{
+ unsigned long flags;
+ int bus;
+
+ spin_lock_irqsave(&iommu->lock, flags);
+ seq_printf(m, "IOMMU %s: Root Table Address:%llx\n", iommu->name,
+ (u64)virt_to_phys(iommu->root_entry));
+ seq_puts(m, "Root Table Entries:\n");
+
+ for (bus = 0; bus < 256; bus++) {
+ if (!(iommu->root_entry[bus].lo & 1))
+ continue;
+
+ seq_printf(m, " Bus: %d H: %llx L: %llx\n", bus,
+ iommu->root_entry[bus].hi,
+ iommu->root_entry[bus].lo);
+
+ ctx_tbl_entry_show(m, iommu, bus);
+ seq_putc(m, '\n');
+ }
+ spin_unlock_irqrestore(&iommu->lock, flags);
+}
+
+static int dmar_translation_struct_show(struct seq_file *m, void *unused)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ root_tbl_entry_show(m, iommu);
+ seq_putc(m, '\n');
+ }
+ rcu_read_unlock();
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(dmar_translation_struct);
+
+#ifdef CONFIG_IRQ_REMAP
+static void ir_tbl_remap_entry_show(struct seq_file *m,
+ struct intel_iommu *iommu)
+{
+ struct irte *ri_entry;
+ unsigned long flags;
+ int idx;
+
+ seq_puts(m, " Entry SrcID DstID Vct IRTE_high\t\tIRTE_low\n");
+
+ raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
+ for (idx = 0; idx < INTR_REMAP_TABLE_ENTRIES; idx++) {
+ ri_entry = &iommu->ir_table->base[idx];
+ if (!ri_entry->present || ri_entry->p_pst)
+ continue;
+
+ seq_printf(m, " %-5d %02x:%02x.%01x %08x %02x %016llx\t%016llx\n",
+ idx, PCI_BUS_NUM(ri_entry->sid),
+ PCI_SLOT(ri_entry->sid), PCI_FUNC(ri_entry->sid),
+ ri_entry->dest_id, ri_entry->vector,
+ ri_entry->high, ri_entry->low);
+ }
+ raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+}
+
+static void ir_tbl_posted_entry_show(struct seq_file *m,
+ struct intel_iommu *iommu)
+{
+ struct irte *pi_entry;
+ unsigned long flags;
+ int idx;
+
+ seq_puts(m, " Entry SrcID PDA_high PDA_low Vct IRTE_high\t\tIRTE_low\n");
+
+ raw_spin_lock_irqsave(&irq_2_ir_lock, flags);
+ for (idx = 0; idx < INTR_REMAP_TABLE_ENTRIES; idx++) {
+ pi_entry = &iommu->ir_table->base[idx];
+ if (!pi_entry->present || !pi_entry->p_pst)
+ continue;
+
+ seq_printf(m, " %-5d %02x:%02x.%01x %08x %08x %02x %016llx\t%016llx\n",
+ idx, PCI_BUS_NUM(pi_entry->sid),
+ PCI_SLOT(pi_entry->sid), PCI_FUNC(pi_entry->sid),
+ pi_entry->pda_h, pi_entry->pda_l << 6,
+ pi_entry->vector, pi_entry->high,
+ pi_entry->low);
+ }
+ raw_spin_unlock_irqrestore(&irq_2_ir_lock, flags);
+}
+
+/*
+ * For active IOMMUs go through the Interrupt remapping
+ * table and print valid entries in a table format for
+ * Remapped and Posted Interrupts.
+ */
+static int ir_translation_struct_show(struct seq_file *m, void *unused)
+{
+ struct dmar_drhd_unit *drhd;
+ struct intel_iommu *iommu;
+ u64 irta;
+
+ rcu_read_lock();
+ for_each_active_iommu(iommu, drhd) {
+ if (!ecap_ir_support(iommu->ecap))
+ continue;
+
+ seq_printf(m, "Remapped Interrupt supported on IOMMU: %s\n",
+ iommu->name);
+
+ if (iommu->ir_table) {
+ irta = virt_to_phys(iommu->ir_table->base);
+ seq_printf(m, " IR table address:%llx\n", irta);
+ ir_tbl_remap_entry_show(m, iommu);
+ } else {
+ seq_puts(m, "Interrupt Remapping is not enabled\n");
+ }
+ seq_putc(m, '\n');
+ }
+
+ seq_puts(m, "****\n\n");
+
+ for_each_active_iommu(iommu, drhd) {
+ if (!cap_pi_support(iommu->cap))
+ continue;
+
+ seq_printf(m, "Posted Interrupt supported on IOMMU: %s\n",
+ iommu->name);
+
+ if (iommu->ir_table) {
+ irta = virt_to_phys(iommu->ir_table->base);
+ seq_printf(m, " IR table address:%llx\n", irta);
+ ir_tbl_posted_entry_show(m, iommu);
+ } else {
+ seq_puts(m, "Interrupt Remapping is not enabled\n");
+ }
+ seq_putc(m, '\n');
+ }
+ rcu_read_unlock();
+
+ return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(ir_translation_struct);
+#endif
+
+void __init intel_iommu_debugfs_init(void)
+{
+ struct dentry *intel_iommu_debug = debugfs_create_dir("intel",
+ iommu_debugfs_dir);
+
+ debugfs_create_file("iommu_regset", 0444, intel_iommu_debug, NULL,
+ &iommu_regset_fops);
+ debugfs_create_file("dmar_translation_struct", 0444, intel_iommu_debug,
+ NULL, &dmar_translation_struct_fops);
+#ifdef CONFIG_IRQ_REMAP
+ debugfs_create_file("ir_translation_struct", 0444, intel_iommu_debug,
+ NULL, &ir_translation_struct_fops);
+#endif
+}
static int force_on = 0;
int intel_iommu_tboot_noforce;
-/*
- * 0: Present
- * 1-11: Reserved
- * 12-63: Context Ptr (12 - (haw-1))
- * 64-127: Reserved
- */
-struct root_entry {
- u64 lo;
- u64 hi;
-};
#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
/*
return re->hi & VTD_PAGE_MASK;
}
-/*
- * low 64 bits:
- * 0: present
- * 1: fault processing disable
- * 2-3: translation type
- * 12-63: address space root
- * high 64 bits:
- * 0-2: address width
- * 3-6: aval
- * 8-23: domain id
- */
-struct context_entry {
- u64 lo;
- u64 hi;
-};
static inline void context_clear_pasid_enable(struct context_entry *context)
{
return (context->lo & 1);
}
-static inline bool context_present(struct context_entry *context)
+bool context_present(struct context_entry *context)
{
return context_pasid_enabled(context) ?
__context_present(context) :
domain->iommu_superpage = domain_update_iommu_superpage(NULL);
}
-static inline struct context_entry *iommu_context_addr(struct intel_iommu *iommu,
- u8 bus, u8 devfn, int alloc)
+struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
+ u8 devfn, int alloc)
{
struct root_entry *root = &iommu->root_entry[bus];
struct context_entry *context;
cpuhp_setup_state(CPUHP_IOMMU_INTEL_DEAD, "iommu/intel:dead", NULL,
intel_iommu_cpu_dead);
intel_iommu_enabled = 1;
+ intel_iommu_debugfs_init();
return 0;
* in single-threaded environment with interrupt disabled, so no need to tabke
* the dmar_global_lock.
*/
-static DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
+DEFINE_RAW_SPINLOCK(irq_2_ir_lock);
static const struct irq_domain_ops intel_ir_domain_ops;
static void iommu_disable_irq_remapping(struct intel_iommu *iommu);
}
io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
+ io_pgtable_tlb_sync(&data->iop);
return size;
}
io_pgtable_tlb_sync(iop);
ptep = iopte_deref(pte[i], lvl);
__arm_v7s_free_table(ptep, lvl + 1, data);
+ } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
+ /*
+ * Order the PTE update against queueing the IOVA, to
+ * guarantee that a flush callback from a different CPU
+ * has observed it before the TLBIALL can be issued.
+ */
+ smp_wmb();
} else {
io_pgtable_tlb_add_flush(iop, iova, blk_size,
blk_size, true);
IO_PGTABLE_QUIRK_NO_PERMS |
IO_PGTABLE_QUIRK_TLBI_ON_MAP |
IO_PGTABLE_QUIRK_ARM_MTK_4GB |
- IO_PGTABLE_QUIRK_NO_DMA))
+ IO_PGTABLE_QUIRK_NO_DMA |
+ IO_PGTABLE_QUIRK_NON_STRICT))
return NULL;
/* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */
return 0;
tablep = iopte_deref(pte, data);
+ } else if (unmap_idx >= 0) {
+ io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
+ io_pgtable_tlb_sync(&data->iop);
+ return size;
}
- if (unmap_idx < 0)
- return __arm_lpae_unmap(data, iova, size, lvl, tablep);
-
- io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true);
- return size;
+ return __arm_lpae_unmap(data, iova, size, lvl, tablep);
}
static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data,
io_pgtable_tlb_sync(iop);
ptep = iopte_deref(pte, data);
__arm_lpae_free_pgtable(data, lvl + 1, ptep);
+ } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) {
+ /*
+ * Order the PTE update against queueing the IOVA, to
+ * guarantee that a flush callback from a different CPU
+ * has observed it before the TLBIALL can be issued.
+ */
+ smp_wmb();
} else {
io_pgtable_tlb_add_flush(iop, iova, size, size, true);
}
u64 reg;
struct arm_lpae_io_pgtable *data;
- if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA))
+ if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA |
+ IO_PGTABLE_QUIRK_NON_STRICT))
return NULL;
data = arm_lpae_alloc_pgtable(cfg);
struct arm_lpae_io_pgtable *data;
/* The NS quirk doesn't apply at stage 2 */
- if (cfg->quirks & ~IO_PGTABLE_QUIRK_NO_DMA)
+ if (cfg->quirks & ~(IO_PGTABLE_QUIRK_NO_DMA |
+ IO_PGTABLE_QUIRK_NON_STRICT))
return NULL;
data = arm_lpae_alloc_pgtable(cfg);
* be accessed by a fully cache-coherent IOMMU or CPU (e.g. for a
* software-emulated IOMMU), such that pagetable updates need not
* be treated as explicit DMA data.
+ *
+ * IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs
+ * on unmap, for DMA domains using the flush queue mechanism for
+ * delayed invalidation.
*/
#define IO_PGTABLE_QUIRK_ARM_NS BIT(0)
#define IO_PGTABLE_QUIRK_NO_PERMS BIT(1)
#define IO_PGTABLE_QUIRK_TLBI_ON_MAP BIT(2)
#define IO_PGTABLE_QUIRK_ARM_MTK_4GB BIT(3)
#define IO_PGTABLE_QUIRK_NO_DMA BIT(4)
+ #define IO_PGTABLE_QUIRK_NON_STRICT BIT(5)
unsigned long quirks;
unsigned long pgsize_bitmap;
unsigned int ias;
#include <linux/pci.h>
#include <linux/bitops.h>
#include <linux/property.h>
+#include <linux/fsl/mc.h>
#include <trace/events/iommu.h>
static struct kset *iommu_group_kset;
#else
static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_DMA;
#endif
+static bool iommu_dma_strict __read_mostly = true;
struct iommu_callback_data {
const struct iommu_ops *ops;
}
early_param("iommu.passthrough", iommu_set_def_domain_type);
+static int __init iommu_dma_setup(char *str)
+{
+ return kstrtobool(str, &iommu_dma_strict);
+}
+early_param("iommu.strict", iommu_dma_setup);
+
static ssize_t iommu_group_attr_show(struct kobject *kobj,
struct attribute *__attr, char *buf)
{
return iommu_group_alloc();
}
+/* Get the IOMMU group for device on fsl-mc bus */
+struct iommu_group *fsl_mc_device_group(struct device *dev)
+{
+ struct device *cont_dev = fsl_mc_cont_dev(dev);
+ struct iommu_group *group;
+
+ group = iommu_group_get(cont_dev);
+ if (!group)
+ group = iommu_group_alloc();
+ return group;
+}
+
/**
* iommu_group_get_for_dev - Find or create the IOMMU group for a device
* @dev: target device
group->default_domain = dom;
if (!group->domain)
group->domain = dom;
+
+ if (dom && !iommu_dma_strict) {
+ int attr = 1;
+ iommu_domain_set_attr(dom,
+ DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
+ &attr);
+ }
}
ret = iommu_group_add_device(group, dev);
EXPORT_SYMBOL_GPL(iommu_get_domain_for_dev);
/*
- * IOMMU groups are really the natrual working unit of the IOMMU, but
+ * For IOMMU_DOMAIN_DMA implementations which already provide their own
+ * guarantees that the group and its default domain are valid and correct.
+ */
+struct iommu_domain *iommu_get_dma_domain(struct device *dev)
+{
+ return dev->iommu_group->default_domain;
+}
+
+/*
+ * IOMMU groups are really the natural working unit of the IOMMU, but
* the IOMMU API works on domains and devices. Bridge that gap by
* iterating over the devices in a group. Ideally we'd have a single
* device which represents the requestor ID of the group, but we also
struct iommu_domain_geometry *geometry;
bool *paging;
int ret = 0;
- u32 *count;
switch (attr) {
case DOMAIN_ATTR_GEOMETRY:
case DOMAIN_ATTR_PAGING:
paging = data;
*paging = (domain->pgsize_bitmap != 0UL);
- break;
- case DOMAIN_ATTR_WINDOWS:
- count = data;
-
- if (domain->ops->domain_get_windows != NULL)
- *count = domain->ops->domain_get_windows(domain);
- else
- ret = -ENODEV;
-
break;
default:
if (!domain->ops->domain_get_attr)
enum iommu_attr attr, void *data)
{
int ret = 0;
- u32 *count;
switch (attr) {
- case DOMAIN_ATTR_WINDOWS:
- count = data;
-
- if (domain->ops->domain_set_windows != NULL)
- ret = domain->ops->domain_set_windows(domain, *count);
- else
- ret = -ENODEV;
-
- break;
default:
if (domain->ops->domain_set_attr == NULL)
return -EINVAL;
iovad->granule = granule;
iovad->start_pfn = start_pfn;
iovad->dma_32bit_pfn = 1UL << (32 - iova_shift(iovad));
+ iovad->max32_alloc_size = iovad->dma_32bit_pfn;
iovad->flush_cb = NULL;
iovad->fq = NULL;
iovad->anchor.pfn_lo = iovad->anchor.pfn_hi = IOVA_ANCHOR;
cached_iova = rb_entry(iovad->cached32_node, struct iova, node);
if (free->pfn_hi < iovad->dma_32bit_pfn &&
- free->pfn_lo >= cached_iova->pfn_lo)
+ free->pfn_lo >= cached_iova->pfn_lo) {
iovad->cached32_node = rb_next(&free->node);
+ iovad->max32_alloc_size = iovad->dma_32bit_pfn;
+ }
cached_iova = rb_entry(iovad->cached_node, struct iova, node);
if (free->pfn_lo >= cached_iova->pfn_lo)
/* Walk the tree backwards */
spin_lock_irqsave(&iovad->iova_rbtree_lock, flags);
+ if (limit_pfn <= iovad->dma_32bit_pfn &&
+ size >= iovad->max32_alloc_size)
+ goto iova32_full;
+
curr = __get_cached_rbnode(iovad, limit_pfn);
curr_iova = rb_entry(curr, struct iova, node);
do {
curr_iova = rb_entry(curr, struct iova, node);
} while (curr && new_pfn <= curr_iova->pfn_hi);
- if (limit_pfn < size || new_pfn < iovad->start_pfn) {
- spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
- return -ENOMEM;
- }
+ if (limit_pfn < size || new_pfn < iovad->start_pfn)
+ goto iova32_full;
/* pfn_lo will point to size aligned address if size_aligned is set */
new->pfn_lo = new_pfn;
__cached_rbnode_insert_update(iovad, new);
spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
-
-
return 0;
+
+iova32_full:
+ iovad->max32_alloc_size = size;
+ spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags);
+ return -ENOMEM;
}
static struct kmem_cache *iova_cache;
+// SPDX-License-Identifier: GPL-2.0
/*
* IPMMU VMSA
*
* Copyright (C) 2014 Renesas Electronics Corporation
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; version 2 of the License.
*/
#include <linux/bitmap.h>
#include <linux/of_iommu.h>
#include <linux/of_pci.h>
#include <linux/slab.h>
+#include <linux/fsl/mc.h>
#define NO_IOMMU 1
struct of_phandle_args iommu_spec = { .args_count = 1 };
int err;
- err = of_pci_map_rid(info->np, alias, "iommu-map",
- "iommu-map-mask", &iommu_spec.np,
- iommu_spec.args);
+ err = of_map_rid(info->np, alias, "iommu-map", "iommu-map-mask",
+ &iommu_spec.np, iommu_spec.args);
if (err)
return err == -ENODEV ? NO_IOMMU : err;
return err;
}
+static int of_fsl_mc_iommu_init(struct fsl_mc_device *mc_dev,
+ struct device_node *master_np)
+{
+ struct of_phandle_args iommu_spec = { .args_count = 1 };
+ int err;
+
+ err = of_map_rid(master_np, mc_dev->icid, "iommu-map",
+ "iommu-map-mask", &iommu_spec.np,
+ iommu_spec.args);
+ if (err)
+ return err == -ENODEV ? NO_IOMMU : err;
+
+ err = of_iommu_xlate(&mc_dev->dev, &iommu_spec);
+ of_node_put(iommu_spec.np);
+ return err;
+}
+
const struct iommu_ops *of_iommu_configure(struct device *dev,
struct device_node *master_np)
{
err = pci_for_each_dma_alias(to_pci_dev(dev),
of_pci_iommu_init, &info);
+ } else if (dev_is_fsl_mc(dev)) {
+ err = of_fsl_mc_iommu_init(to_fsl_mc_device(dev), master_np);
} else {
struct of_phandle_args iommu_spec;
int idx = 0;
#include <linux/init.h>
#include <linux/proc_fs.h>
-#include <asm/macintosh.h>
-#include <asm/macints.h>
+#include <asm/macintosh.h>
+#include <asm/macints.h>
#include <asm/mac_iop.h>
#include <asm/mac_oss.h>
#include <asm/adb_iop.h>
-#include <linux/adb.h>
+#include <linux/adb.h>
/*#define DEBUG_ADB_IOP*/
#endif
static enum adb_iop_state {
- idle,
- sending,
- awaiting_reply
+ idle,
+ sending,
+ awaiting_reply
} adb_iop_state;
static void adb_iop_start(void);
{
req->complete = 1;
current_req = req->next;
- if (req->done) (*req->done)(req);
+ if (req->done)
+ (*req->done)(req);
adb_iop_state = state;
}
static void adb_iop_listen(struct iop_msg *msg)
{
- struct adb_iopmsg *amsg = (struct adb_iopmsg *) msg->message;
+ struct adb_iopmsg *amsg = (struct adb_iopmsg *)msg->message;
struct adb_request *req;
unsigned long flags;
#ifdef DEBUG_ADB_IOP
#ifdef DEBUG_ADB_IOP
printk("adb_iop_listen %p: rcvd packet, %d bytes: %02X %02X", req,
- (uint) amsg->count + 2, (uint) amsg->flags, (uint) amsg->cmd);
+ (uint)amsg->count + 2, (uint)amsg->flags, (uint)amsg->cmd);
for (i = 0; i < amsg->count; i++)
- printk(" %02X", (uint) amsg->data[i]);
+ printk(" %02X", (uint)amsg->data[i]);
printk("\n");
#endif
/* get the packet to send */
req = current_req;
- if (!req) return;
+ if (!req)
+ return;
local_irq_save(flags);
#ifdef DEBUG_ADB_IOP
printk("adb_iop_start %p: sending packet, %d bytes:", req, req->nbytes);
- for (i = 0 ; i < req->nbytes ; i++)
- printk(" %02X", (uint) req->data[i]);
+ for (i = 0; i < req->nbytes; i++)
+ printk(" %02X", (uint)req->data[i]);
printk("\n");
#endif
/* Now send it. The IOP manager will call adb_iop_complete */
/* when the packet has been sent. */
- iop_send_message(ADB_IOP, ADB_CHAN, req,
- sizeof(amsg), (__u8 *) &amsg, adb_iop_complete);
+ iop_send_message(ADB_IOP, ADB_CHAN, req, sizeof(amsg), (__u8 *)&amsg,
+ adb_iop_complete);
}
int adb_iop_probe(void)
{
- if (!iop_ism_present) return -ENODEV;
+ if (!iop_ism_present)
+ return -ENODEV;
return 0;
}
int adb_iop_init(void)
{
- printk("adb: IOP ISM driver v0.4 for Unified ADB.\n");
+ pr_info("adb: IOP ISM driver v0.4 for Unified ADB\n");
iop_listen(ADB_IOP, ADB_CHAN, adb_iop_listen, "ADB");
return 0;
}
int err;
err = adb_iop_write(req);
- if (err) return err;
+ if (err)
+ return err;
if (sync) {
- while (!req->complete) adb_iop_poll();
+ while (!req->complete)
+ adb_iop_poll();
}
return 0;
}
}
local_irq_restore(flags);
- if (adb_iop_state == idle) adb_iop_start();
+
+ if (adb_iop_state == idle)
+ adb_iop_start();
return 0;
}
void adb_iop_poll(void)
{
- if (adb_iop_state == idle) adb_iop_start();
+ if (adb_iop_state == idle)
+ adb_iop_start();
iop_ism_irq_poll(ADB_IOP);
}
}
/* Now fill in the handler_id field of the adb_handler entries. */
- pr_debug("adb devices:\n");
for (i = 1; i < 16; i++) {
if (adb_handler[i].original_address == 0)
continue;
adb_request(&req, NULL, ADBREQ_SYNC | ADBREQ_REPLY, 1,
(i << 4) | 0xf);
adb_handler[i].handler_id = req.reply[2];
- pr_debug(" [%d]: %d %x\n", i, adb_handler[i].original_address,
- adb_handler[i].handler_id);
+ printk(KERN_DEBUG "adb device [%d]: %d 0x%X\n", i,
+ adb_handler[i].original_address,
+ adb_handler[i].handler_id);
devmask |= 1 << i;
}
return devmask;
mutex_lock(&adb_handler_mutex);
ret = try_handler_change(address, new_id);
mutex_unlock(&adb_handler_mutex);
+ if (ret)
+ pr_debug("adb handler change: [%d] 0x%X\n", address, new_id);
return ret;
}
EXPORT_SYMBOL(adb_try_handler_change);
struct input_dev *input_dev;
int err;
int i;
+ char *keyboard_type;
if (adbhid[id]) {
pr_err("Trying to reregister ADB HID on ID %d\n", id);
memcpy(hid->keycode, adb_to_linux_keycodes, sizeof(adb_to_linux_keycodes));
- pr_info("Detected ADB keyboard, type ");
switch (original_handler_id) {
default:
- pr_cont("<unknown>.\n");
+ keyboard_type = "<unknown>";
input_dev->id.version = ADB_KEYBOARD_UNKNOWN;
break;
case 0x01: case 0x02: case 0x03: case 0x06: case 0x08:
case 0x0C: case 0x10: case 0x18: case 0x1B: case 0x1C:
case 0xC0: case 0xC3: case 0xC6:
- pr_cont("ANSI.\n");
+ keyboard_type = "ANSI";
input_dev->id.version = ADB_KEYBOARD_ANSI;
break;
case 0x04: case 0x05: case 0x07: case 0x09: case 0x0D:
case 0x11: case 0x14: case 0x19: case 0x1D: case 0xC1:
case 0xC4: case 0xC7:
- pr_cont("ISO, swapping keys.\n");
+ keyboard_type = "ISO, swapping keys";
input_dev->id.version = ADB_KEYBOARD_ISO;
i = hid->keycode[10];
hid->keycode[10] = hid->keycode[50];
case 0x12: case 0x15: case 0x16: case 0x17: case 0x1A:
case 0x1E: case 0xC2: case 0xC5: case 0xC8: case 0xC9:
- pr_cont("JIS.\n");
+ keyboard_type = "JIS";
input_dev->id.version = ADB_KEYBOARD_JIS;
break;
}
+ pr_info("Detected ADB keyboard, type %s.\n", keyboard_type);
for (i = 0; i < 128; i++)
if (hid->keycode[i])
->get it to send separate codes for left and right shift,
control, option keys */
#if 0 /* handler 5 doesn't send separate codes for R modifiers */
- if (adb_try_handler_change(id, 5))
- printk("ADB keyboard at %d, handler set to 5\n", id);
- else
+ if (!adb_try_handler_change(id, 5))
#endif
- if (adb_try_handler_change(id, 3))
- printk("ADB keyboard at %d, handler set to 3\n", id);
- else
- printk("ADB keyboard at %d, handler 1\n", id);
+ adb_try_handler_change(id, 3);
adb_get_infos(id, &default_id, &cur_handler_id);
+ printk(KERN_DEBUG "ADB keyboard at %d has handler 0x%X\n",
+ id, cur_handler_id);
reg |= adbhid_input_reregister(id, default_id, org_handler_id,
cur_handler_id, 0);
}
for (i = 0; i < mouse_ids.nids; i++) {
int id = mouse_ids.id[i];
int mouse_kind;
+ char *desc = "standard";
adb_get_infos(id, &default_id, &org_handler_id);
if (adb_try_handler_change(id, 4)) {
- printk("ADB mouse at %d, handler set to 4", id);
mouse_kind = ADBMOUSE_EXTENDED;
}
else if (adb_try_handler_change(id, 0x2F)) {
- printk("ADB mouse at %d, handler set to 0x2F", id);
mouse_kind = ADBMOUSE_MICROSPEED;
}
else if (adb_try_handler_change(id, 0x42)) {
- printk("ADB mouse at %d, handler set to 0x42", id);
mouse_kind = ADBMOUSE_TRACKBALLPRO;
}
else if (adb_try_handler_change(id, 0x66)) {
- printk("ADB mouse at %d, handler set to 0x66", id);
mouse_kind = ADBMOUSE_MICROSPEED;
}
else if (adb_try_handler_change(id, 0x5F)) {
- printk("ADB mouse at %d, handler set to 0x5F", id);
mouse_kind = ADBMOUSE_MICROSPEED;
}
else if (adb_try_handler_change(id, 3)) {
- printk("ADB mouse at %d, handler set to 3", id);
mouse_kind = ADBMOUSE_MS_A3;
}
else if (adb_try_handler_change(id, 2)) {
- printk("ADB mouse at %d, handler set to 2", id);
mouse_kind = ADBMOUSE_STANDARD_200;
}
else {
- printk("ADB mouse at %d, handler 1", id);
mouse_kind = ADBMOUSE_STANDARD_100;
}
if ((mouse_kind == ADBMOUSE_TRACKBALLPRO)
|| (mouse_kind == ADBMOUSE_MICROSPEED)) {
+ desc = "Microspeed/MacPoint or compatible";
init_microspeed(id);
} else if (mouse_kind == ADBMOUSE_MS_A3) {
+ desc = "Mouse Systems A3 Mouse or compatible";
init_ms_a3(id);
} else if (mouse_kind == ADBMOUSE_EXTENDED) {
+ desc = "extended";
/*
* Register 1 is usually used for device
* identification. Here, we try to identify
(req.reply[1] == 0x9a) && ((req.reply[2] == 0x21)
|| (req.reply[2] == 0x20))) {
mouse_kind = ADBMOUSE_TRACKBALL;
+ desc = "trackman/mouseman";
init_trackball(id);
}
else if ((req.reply_len >= 4) &&
(req.reply[1] == 0x74) && (req.reply[2] == 0x70) &&
(req.reply[3] == 0x61) && (req.reply[4] == 0x64)) {
mouse_kind = ADBMOUSE_TRACKPAD;
+ desc = "trackpad";
init_trackpad(id);
}
else if ((req.reply_len >= 4) &&
(req.reply[1] == 0x4b) && (req.reply[2] == 0x4d) &&
(req.reply[3] == 0x4c) && (req.reply[4] == 0x31)) {
mouse_kind = ADBMOUSE_TURBOMOUSE5;
+ desc = "TurboMouse 5";
init_turbomouse(id);
}
else if ((req.reply_len == 9) &&
(req.reply[1] == 0x4b) && (req.reply[2] == 0x4f) &&
(req.reply[3] == 0x49) && (req.reply[4] == 0x54)) {
if (adb_try_handler_change(id, 0x42)) {
- pr_cont("\nADB MacAlly 2-button mouse at %d, handler set to 0x42", id);
mouse_kind = ADBMOUSE_MACALLY2;
+ desc = "MacAlly 2-button";
}
}
}
- pr_cont("\n");
adb_get_infos(id, &default_id, &cur_handler_id);
+ printk(KERN_DEBUG "ADB mouse (%s) at %d has handler 0x%X\n",
+ desc, id, cur_handler_id);
reg |= adbhid_input_reregister(id, default_id, org_handler_id,
cur_handler_id, mouse_kind);
}
struct adb_request req;
unsigned char r1_buffer[8];
- pr_cont(" (trackpad)");
-
adb_request(&req, NULL, ADBREQ_SYNC | ADBREQ_REPLY, 1,
ADB_READREG(id,1));
if (req.reply_len < 8)
- pr_cont("bad length for reg. 1\n");
+ pr_err("%s: bad length for reg. 1\n", __func__);
else
{
memcpy(r1_buffer, &req.reply[1], 8);
{
struct adb_request req;
- pr_cont(" (trackman/mouseman)");
-
adb_request(&req, NULL, ADBREQ_SYNC, 3,
ADB_WRITEREG(id,1), 00,0x81);
{
struct adb_request req;
- pr_cont(" (TurboMouse 5)");
-
adb_request(&req, NULL, ADBREQ_SYNC, 1, ADB_FLUSH(id));
adb_request(&req, NULL, ADBREQ_SYNC, 1, ADB_FLUSH(3));
{
struct adb_request req;
- pr_cont(" (Microspeed/MacPoint or compatible)");
-
adb_request(&req, NULL, ADBREQ_SYNC, 1, ADB_FLUSH(id));
/* This will initialize mice using the Microspeed, MacPoint and
{
struct adb_request req;
- pr_cont(" (Mouse Systems A3 Mouse, or compatible)");
adb_request(&req, NULL, ADBREQ_SYNC, 3,
ADB_WRITEREG(id, 0x2),
0x00,
struct macio_dev *in_bay,
struct resource *parent_res)
{
+ char name[MAX_NODE_NAME_SIZE + 1];
struct macio_dev *dev;
const u32 *reg;
-
+
if (np == NULL)
return NULL;
#endif
/* MacIO itself has a different reg, we use it's PCI base */
+ snprintf(name, sizeof(name), "%pOFn", np);
if (np == chip->of_node) {
dev_set_name(&dev->ofdev.dev, "%1d.%08x:%.*s",
chip->lbus.index,
#else
0, /* NuBus may want to do something better here */
#endif
- MAX_NODE_NAME_SIZE, np->name);
+ MAX_NODE_NAME_SIZE, name);
} else {
reg = of_get_property(np, "reg", NULL);
dev_set_name(&dev->ofdev.dev, "%1d.%08x:%.*s",
chip->lbus.index,
- reg ? *reg : 0, MAX_NODE_NAME_SIZE, np->name);
+ reg ? *reg : 0, MAX_NODE_NAME_SIZE, name);
}
/* Setup interrupts & resources */
static DEVICE_ATTR_RO(modalias);
static DEVICE_ATTR_RO(devspec);
-macio_config_of_attr (name, "%s\n");
+static ssize_t name_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sprintf(buf, "%pOFn\n", dev->of_node);
+}
+static DEVICE_ATTR_RO(name);
+
macio_config_of_attr (type, "%s\n");
static struct attribute *macio_dev_attrs[] = {
buf, nb, false);
}
}
+
+/* Offset between Unix time (1970-based) and Mac time (1904-based) */
+#define RTC_OFFSET 2082844800
+
+time64_t cuda_get_time(void)
+{
+ struct adb_request req;
+ u32 now;
+
+ if (cuda_request(&req, NULL, 2, CUDA_PACKET, CUDA_GET_TIME) < 0)
+ return 0;
+ while (!req.complete)
+ cuda_poll();
+ if (req.reply_len != 7)
+ pr_err("%s: got %d byte reply\n", __func__, req.reply_len);
+ now = (req.reply[3] << 24) + (req.reply[4] << 16) +
+ (req.reply[5] << 8) + req.reply[6];
+ return (time64_t)now - RTC_OFFSET;
+}
+
+int cuda_set_rtc_time(struct rtc_time *tm)
+{
+ u32 now;
+ struct adb_request req;
+
+ now = lower_32_bits(rtc_tm_to_time64(tm) + RTC_OFFSET);
+ if (cuda_request(&req, NULL, 6, CUDA_PACKET, CUDA_SET_TIME,
+ now >> 24, now >> 16, now >> 8, now) < 0)
+ return -ENXIO;
+ while (!req.complete)
+ cuda_poll();
+ if ((req.reply_len != 3) && (req.reply_len != 7))
+ pr_err("%s: got %d byte reply\n", __func__, req.reply_len);
+ return 0;
+}
*
* 1999-08-02 (jmt) - Initial rewrite for Unified ADB.
* 2000-03-29 Tony Mantler <tonym@mac.linux-m68k.org>
- * - Big overhaul, should actually work now.
+ * - Big overhaul, should actually work now.
* 2006-12-31 Finn Thain - Another overhaul.
*
* Suggested reading:
* Apple's "ADB Analyzer" bus sniffer is invaluable:
* ftp://ftp.apple.com/developer/Tool_Chest/Devices_-_Hardware/Apple_Desktop_Bus/
*/
-
+
#include <stdarg.h>
#include <linux/types.h>
#include <linux/errno.h>
#define ST_ODD 0x20 /* ADB state: odd data byte */
#define ST_IDLE 0x30 /* ADB state: idle, nothing to send */
-static int macii_init_via(void);
+static int macii_init_via(void);
static void macii_start(void);
static irqreturn_t macii_interrupt(int irq, void *arg);
static void macii_queue_poll(void);
static int command_byte; /* the most recent command byte transmitted */
static int autopoll_devs; /* bits set are device addresses to be polled */
-/* Sanity check for request queue. Doesn't check for cycles. */
-static int request_is_queued(struct adb_request *req) {
- struct adb_request *cur;
- unsigned long flags;
- local_irq_save(flags);
- cur = current_req;
- while (cur) {
- if (cur == req) {
- local_irq_restore(flags);
- return 1;
- }
- cur = cur->next;
- }
- local_irq_restore(flags);
- return 0;
-}
-
/* Check for MacII style ADB */
static int macii_probe(void)
{
- if (macintosh_config->adb_type != MAC_ADB_II) return -ENODEV;
+ if (macintosh_config->adb_type != MAC_ADB_II)
+ return -ENODEV;
via = via1;
- printk("adb: Mac II ADB Driver v1.0 for Unified ADB\n");
+ pr_info("adb: Mac II ADB Driver v1.0 for Unified ADB\n");
return 0;
}
{
unsigned long flags;
int err;
-
+
local_irq_save(flags);
-
+
err = macii_init_via();
- if (err) goto out;
+ if (err)
+ goto out;
err = request_irq(IRQ_MAC_ADB, macii_interrupt, 0, "ADB",
macii_interrupt);
- if (err) goto out;
+ if (err)
+ goto out;
macii_state = idle;
out:
return err;
}
-/* initialize the hardware */
+/* initialize the hardware */
static int macii_init_via(void)
{
unsigned char x;
/* Set up state: idle */
via[B] |= ST_IDLE;
- last_status = via[B] & (ST_MASK|CTLR_IRQ);
+ last_status = via[B] & (ST_MASK | CTLR_IRQ);
/* Shift register on input */
via[ACR] = (via[ACR] & ~SR_CTRL) | SR_EXT;
int next_device;
static struct adb_request req;
- if (!autopoll_devs) return;
+ if (!autopoll_devs)
+ return;
device_mask = (1 << (((command_byte & 0xF0) >> 4) + 1)) - 1;
if (autopoll_devs & ~device_mask)
else
next_device = ffs(autopoll_devs) - 1;
- BUG_ON(request_is_queued(&req));
-
- adb_request(&req, NULL, ADBREQ_NOSEND, 1,
- ADB_READREG(next_device, 0));
+ adb_request(&req, NULL, ADBREQ_NOSEND, 1, ADB_READREG(next_device, 0));
req.sent = 0;
req.complete = 0;
static int macii_send_request(struct adb_request *req, int sync)
{
int err;
- unsigned long flags;
- BUG_ON(request_is_queued(req));
-
- local_irq_save(flags);
err = macii_write(req);
- local_irq_restore(flags);
+ if (err)
+ return err;
- if (!err && sync) {
- while (!req->complete) {
+ if (sync)
+ while (!req->complete)
macii_poll();
- }
- BUG_ON(request_is_queued(req));
- }
- return err;
+ return 0;
}
/* Send an ADB request (append to request queue) */
static int macii_write(struct adb_request *req)
{
+ unsigned long flags;
+
if (req->nbytes < 2 || req->data[0] != ADB_PACKET || req->nbytes > 15) {
req->complete = 1;
return -EINVAL;
}
-
+
req->next = NULL;
req->sent = 0;
req->complete = 0;
req->reply_len = 0;
+ local_irq_save(flags);
+
if (current_req != NULL) {
last_req->next = req;
last_req = req;
} else {
current_req = req;
last_req = req;
- if (macii_state == idle) macii_start();
+ if (macii_state == idle)
+ macii_start();
}
+
+ local_irq_restore(flags);
+
return 0;
}
/* bit 1 == device 1, and so on. */
autopoll_devs = devs & 0xFFFE;
- if (!autopoll_devs) return 0;
+ if (!autopoll_devs)
+ return 0;
local_irq_save(flags);
return err;
}
-static inline int need_autopoll(void) {
+static inline int need_autopoll(void)
+{
/* Was the last command Talk Reg 0
* and is the target on the autopoll list?
*/
/* Prod the chip without interrupts */
static void macii_poll(void)
{
- disable_irq(IRQ_MAC_ADB);
macii_interrupt(0, NULL);
- enable_irq(IRQ_MAC_ADB);
}
/* Reset the bus */
static int macii_reset_bus(void)
{
static struct adb_request req;
-
- if (request_is_queued(&req))
- return 0;
/* Command = 0, Address = ignored */
- adb_request(&req, NULL, 0, 1, ADB_BUSRESET);
+ adb_request(&req, NULL, ADBREQ_NOSEND, 1, ADB_BUSRESET);
+ macii_send_request(&req, 1);
/* Don't want any more requests during the Global Reset low time. */
udelay(3000);
req = current_req;
- BUG_ON(req == NULL);
-
- BUG_ON(macii_state != idle);
-
/* Now send it. Be careful though, that first byte of the request
* is actually ADB_PACKET; the real data begins at index 1!
* And req->nbytes is the number of bytes of real data plus one.
* to be activity on the ADB bus. The chip will poll to achieve this.
*
* The basic ADB state machine was left unchanged from the original MacII code
- * by Alan Cox, which was based on the CUDA driver for PowerMac.
+ * by Alan Cox, which was based on the CUDA driver for PowerMac.
* The syntax of the ADB status lines is totally different on MacII,
* though. MacII uses the states Command -> Even -> Odd -> Even ->...-> Idle
* for sending and Idle -> Even -> Odd -> Even ->...-> Idle for receiving.
static irqreturn_t macii_interrupt(int irq, void *arg)
{
int x;
- static int entered;
struct adb_request *req;
+ unsigned long flags;
+
+ local_irq_save(flags);
if (!arg) {
/* Clear the SR IRQ flag when polling. */
if (via[IFR] & SR_INT)
via[IFR] = SR_INT;
- else
+ else {
+ local_irq_restore(flags);
return IRQ_NONE;
+ }
}
- BUG_ON(entered++);
-
last_status = status;
- status = via[B] & (ST_MASK|CTLR_IRQ);
+ status = via[B] & (ST_MASK | CTLR_IRQ);
switch (macii_state) {
- case idle:
- if (reading_reply) {
- reply_ptr = current_req->reply;
- } else {
- BUG_ON(current_req != NULL);
- reply_ptr = reply_buf;
- }
+ case idle:
+ if (reading_reply) {
+ reply_ptr = current_req->reply;
+ } else {
+ WARN_ON(current_req);
+ reply_ptr = reply_buf;
+ }
- x = via[SR];
+ x = via[SR];
- if ((status & CTLR_IRQ) && (x == 0xFF)) {
- /* Bus timeout without SRQ sequence:
- * data is "FF" while CTLR_IRQ is "H"
- */
- reply_len = 0;
- srq_asserted = 0;
- macii_state = read_done;
- } else {
- macii_state = reading;
- *reply_ptr = x;
- reply_len = 1;
- }
+ if ((status & CTLR_IRQ) && (x == 0xFF)) {
+ /* Bus timeout without SRQ sequence:
+ * data is "FF" while CTLR_IRQ is "H"
+ */
+ reply_len = 0;
+ srq_asserted = 0;
+ macii_state = read_done;
+ } else {
+ macii_state = reading;
+ *reply_ptr = x;
+ reply_len = 1;
+ }
- /* set ADB state = even for first data byte */
- via[B] = (via[B] & ~ST_MASK) | ST_EVEN;
- break;
+ /* set ADB state = even for first data byte */
+ via[B] = (via[B] & ~ST_MASK) | ST_EVEN;
+ break;
- case sending:
- req = current_req;
- if (data_index >= req->nbytes) {
- req->sent = 1;
- macii_state = idle;
-
- if (req->reply_expected) {
- reading_reply = 1;
- } else {
- req->complete = 1;
- current_req = req->next;
- if (req->done) (*req->done)(req);
-
- if (current_req)
- macii_start();
- else
- if (need_autopoll())
- macii_autopoll(autopoll_devs);
- }
+ case sending:
+ req = current_req;
+ if (data_index >= req->nbytes) {
+ req->sent = 1;
+ macii_state = idle;
- if (macii_state == idle) {
- /* reset to shift in */
- via[ACR] &= ~SR_OUT;
- x = via[SR];
- /* set ADB state idle - might get SRQ */
- via[B] = (via[B] & ~ST_MASK) | ST_IDLE;
- }
+ if (req->reply_expected) {
+ reading_reply = 1;
} else {
- via[SR] = req->data[data_index++];
-
- if ( (via[B] & ST_MASK) == ST_CMD ) {
- /* just sent the command byte, set to EVEN */
- via[B] = (via[B] & ~ST_MASK) | ST_EVEN;
- } else {
- /* invert state bits, toggle ODD/EVEN */
- via[B] ^= ST_MASK;
- }
- }
- break;
-
- case reading:
- x = via[SR];
- BUG_ON((status & ST_MASK) == ST_CMD ||
- (status & ST_MASK) == ST_IDLE);
-
- /* Bus timeout with SRQ sequence:
- * data is "XX FF" while CTLR_IRQ is "L L"
- * End of packet without SRQ sequence:
- * data is "XX...YY 00" while CTLR_IRQ is "L...H L"
- * End of packet SRQ sequence:
- * data is "XX...YY 00" while CTLR_IRQ is "L...L L"
- * (where XX is the first response byte and
- * YY is the last byte of valid response data.)
- */
+ req->complete = 1;
+ current_req = req->next;
+ if (req->done)
+ (*req->done)(req);
- srq_asserted = 0;
- if (!(status & CTLR_IRQ)) {
- if (x == 0xFF) {
- if (!(last_status & CTLR_IRQ)) {
- macii_state = read_done;
- reply_len = 0;
- srq_asserted = 1;
- }
- } else if (x == 0x00) {
- macii_state = read_done;
- if (!(last_status & CTLR_IRQ))
- srq_asserted = 1;
- }
+ if (current_req)
+ macii_start();
+ else if (need_autopoll())
+ macii_autopoll(autopoll_devs);
}
- if (macii_state == reading) {
- BUG_ON(reply_len > 15);
- reply_ptr++;
- *reply_ptr = x;
- reply_len++;
+ if (macii_state == idle) {
+ /* reset to shift in */
+ via[ACR] &= ~SR_OUT;
+ x = via[SR];
+ /* set ADB state idle - might get SRQ */
+ via[B] = (via[B] & ~ST_MASK) | ST_IDLE;
}
+ } else {
+ via[SR] = req->data[data_index++];
- /* invert state bits, toggle ODD/EVEN */
- via[B] ^= ST_MASK;
- break;
+ if ((via[B] & ST_MASK) == ST_CMD) {
+ /* just sent the command byte, set to EVEN */
+ via[B] = (via[B] & ~ST_MASK) | ST_EVEN;
+ } else {
+ /* invert state bits, toggle ODD/EVEN */
+ via[B] ^= ST_MASK;
+ }
+ }
+ break;
- case read_done:
- x = via[SR];
+ case reading:
+ x = via[SR];
+ WARN_ON((status & ST_MASK) == ST_CMD ||
+ (status & ST_MASK) == ST_IDLE);
+
+ /* Bus timeout with SRQ sequence:
+ * data is "XX FF" while CTLR_IRQ is "L L"
+ * End of packet without SRQ sequence:
+ * data is "XX...YY 00" while CTLR_IRQ is "L...H L"
+ * End of packet SRQ sequence:
+ * data is "XX...YY 00" while CTLR_IRQ is "L...L L"
+ * (where XX is the first response byte and
+ * YY is the last byte of valid response data.)
+ */
- if (reading_reply) {
- reading_reply = 0;
- req = current_req;
- req->reply_len = reply_len;
- req->complete = 1;
- current_req = req->next;
- if (req->done) (*req->done)(req);
- } else if (reply_len && autopoll_devs)
- adb_input(reply_buf, reply_len, 0);
+ srq_asserted = 0;
+ if (!(status & CTLR_IRQ)) {
+ if (x == 0xFF) {
+ if (!(last_status & CTLR_IRQ)) {
+ macii_state = read_done;
+ reply_len = 0;
+ srq_asserted = 1;
+ }
+ } else if (x == 0x00) {
+ macii_state = read_done;
+ if (!(last_status & CTLR_IRQ))
+ srq_asserted = 1;
+ }
+ }
- macii_state = idle;
+ if (macii_state == reading &&
+ reply_len < ARRAY_SIZE(reply_buf)) {
+ reply_ptr++;
+ *reply_ptr = x;
+ reply_len++;
+ }
- /* SRQ seen before, initiate poll now */
- if (srq_asserted)
- macii_queue_poll();
+ /* invert state bits, toggle ODD/EVEN */
+ via[B] ^= ST_MASK;
+ break;
- if (current_req)
- macii_start();
- else
- if (need_autopoll())
- macii_autopoll(autopoll_devs);
+ case read_done:
+ x = via[SR];
- if (macii_state == idle)
- via[B] = (via[B] & ~ST_MASK) | ST_IDLE;
- break;
+ if (reading_reply) {
+ reading_reply = 0;
+ req = current_req;
+ req->reply_len = reply_len;
+ req->complete = 1;
+ current_req = req->next;
+ if (req->done)
+ (*req->done)(req);
+ } else if (reply_len && autopoll_devs)
+ adb_input(reply_buf, reply_len, 0);
+
+ macii_state = idle;
+
+ /* SRQ seen before, initiate poll now */
+ if (srq_asserted)
+ macii_queue_poll();
+
+ if (current_req)
+ macii_start();
+ else if (need_autopoll())
+ macii_autopoll(autopoll_devs);
+
+ if (macii_state == idle)
+ via[B] = (via[B] & ~ST_MASK) | ST_IDLE;
+ break;
- default:
+ default:
break;
}
- entered--;
+ local_irq_restore(flags);
return IRQ_HANDLED;
}
pmu_wait_complete(&req);
}
+/* Offset between Unix time (1970-based) and Mac time (1904-based) */
+#define RTC_OFFSET 2082844800
+
+time64_t pmu_get_time(void)
+{
+ struct adb_request req;
+ u32 now;
+
+ if (pmu_request(&req, NULL, 1, PMU_READ_RTC) < 0)
+ return 0;
+ pmu_wait_complete(&req);
+ if (req.reply_len != 4)
+ pr_err("%s: got %d byte reply\n", __func__, req.reply_len);
+ now = (req.reply[0] << 24) + (req.reply[1] << 16) +
+ (req.reply[2] << 8) + req.reply[3];
+ return (time64_t)now - RTC_OFFSET;
+}
+
+int pmu_set_rtc_time(struct rtc_time *tm)
+{
+ u32 now;
+ struct adb_request req;
+
+ now = lower_32_bits(rtc_tm_to_time64(tm) + RTC_OFFSET);
+ if (pmu_request(&req, NULL, 5, PMU_SET_RTC,
+ now >> 24, now >> 16, now >> 8, now) < 0)
+ return -ENXIO;
+ pmu_wait_complete(&req);
+ if (req.reply_len != 0)
+ pr_err("%s: got %d byte reply\n", __func__, req.reply_len);
+ return 0;
+}
+
void
pmu_restart(void)
{
fct = smu_fan_create(fan, 0);
if (fct == NULL) {
printk(KERN_WARNING "windfarm: Failed to create SMU "
- "RPM fan %s\n", fan->name);
+ "RPM fan %pOFn\n", fan);
continue;
}
list_add(&fct->link, &smu_fans);
fct = smu_fan_create(fan, 1);
if (fct == NULL) {
printk(KERN_WARNING "windfarm: Failed to create SMU "
- "PWM fan %s\n", fan->name);
+ "PWM fan %pOFn\n", fan);
continue;
}
list_add(&fct->link, &smu_fans);
#define VERSION "1.0"
-#define DEBUG
-
-#ifdef DEBUG
-#define DBG(args...) printk(args)
-#else
-#define DBG(args...) do { } while(0)
-#endif
-
/* If the cache is older than 800ms we'll refetch it */
#define MAX_AGE msecs_to_jiffies(800)
buf[i+2] = data[3];
buf[i+3] = data[2];
}
-#ifdef DEBUG
- DBG(KERN_DEBUG "sat %d partition %x:", sat_id, id);
- for (i = 0; i < len; ++i)
- DBG(" %x", buf[i]);
- DBG("\n");
-#endif
+ printk(KERN_DEBUG "sat %d partition %x:", sat_id, id);
+ print_hex_dump(KERN_DEBUG, " ", DUMP_PREFIX_OFFSET,
+ 16, 1, buf, len, false);
if (size)
*size = len;
return (struct smu_sdbp_header *) buf;
if (err < 0)
return err;
sat->last_read = jiffies;
+
#ifdef LOTSA_DEBUG
{
int i;
- DBG(KERN_DEBUG "wf_sat_get: data is");
- for (i = 0; i < 16; ++i)
- DBG(" %.2x", sat->cache[i]);
- DBG("\n");
+ printk(KERN_DEBUG "wf_sat_get: data is");
+ print_hex_dump(KERN_DEBUG, " ", DUMP_PREFIX_OFFSET,
+ 16, 1, sat->cache, 16, false);
}
#endif
return 0;
If unsure, say N.
-config DM_MQ_DEFAULT
- bool "request-based DM: use blk-mq I/O path by default"
- depends on BLK_DEV_DM
- ---help---
- This option enables the blk-mq based I/O path for request-based
- DM devices by default. With the option the dm_mod.use_blk_mq
- module/boot option defaults to Y, without it to N, but it can
- still be overriden either way.
-
- If unsure say N.
-
config DM_DEBUG
bool "Device mapper debugging support"
depends on BLK_DEV_DM
struct policy_work work;
struct entry *e;
- if (unlikely(WARN_ON_ONCE(!mq->migrations_allowed)))
+ if (WARN_ON_ONCE(!mq->migrations_allowed))
return;
e = q_peek(&mq->clean, mq->clean.nr_levels / 2, true);
struct dm_stats stats;
- struct kthread_worker kworker;
- struct task_struct *kworker_task;
-
- /* for request-based merge heuristic in dm_request_fn() */
- unsigned seq_rq_merge_deadline_usecs;
- int last_rq_rw;
- sector_t last_rq_pos;
- ktime_t last_rq_start_time;
-
/* for blk-mq request-based DM support */
struct blk_mq_tag_set *tag_set;
- bool use_blk_mq:1;
bool init_tio_pdu:1;
struct srcu_struct io_barrier;
static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
struct crypt_config *cc;
+ const char *devname = dm_table_device_name(ti->table);
int key_size;
unsigned int align_mask;
unsigned long long tmpll;
}
ret = -ENOMEM;
- cc->io_queue = alloc_workqueue("kcryptd_io", WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1);
+ cc->io_queue = alloc_workqueue("kcryptd_io/%s",
+ WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
+ 1, devname);
if (!cc->io_queue) {
ti->error = "Couldn't create kcryptd io queue";
goto bad;
}
if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
- cc->crypt_queue = alloc_workqueue("kcryptd", WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM, 1);
+ cc->crypt_queue = alloc_workqueue("kcryptd/%s",
+ WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
+ 1, devname);
else
- cc->crypt_queue = alloc_workqueue("kcryptd",
+ cc->crypt_queue = alloc_workqueue("kcryptd/%s",
WQ_HIGHPRI | WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
- num_online_cpus());
+ num_online_cpus(), devname);
if (!cc->crypt_queue) {
ti->error = "Couldn't create kcryptd queue";
goto bad;
spin_lock_init(&cc->write_thread_lock);
cc->write_tree = RB_ROOT;
- cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write");
+ cc->write_thread = kthread_create(dmcrypt_write, cc, "dmcrypt_write/%s", devname);
if (IS_ERR(cc->write_thread)) {
ret = PTR_ERR(cc->write_thread);
cc->write_thread = NULL;
if (bio_op(bio) == REQ_OP_ZONE_RESET)
goto map_bio;
- /* We need to remap reported zones, so remember the BIO iter */
- if (bio_op(bio) == REQ_OP_ZONE_REPORT)
- goto map_bio;
-
/* Are we alive ? */
elapsed = (jiffies - fc->start_time) / HZ;
if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval) {
if (bio_op(bio) == REQ_OP_ZONE_RESET)
return DM_ENDIO_DONE;
- if (bio_op(bio) == REQ_OP_ZONE_REPORT) {
- dm_remap_zone_report(ti, bio, fc->start);
- return DM_ENDIO_DONE;
- }
-
if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) &&
all_corrupt_bio_flags_match(bio, fc)) {
return 0;
}
+#ifdef CONFIG_BLK_DEV_ZONED
+static int flakey_report_zones(struct dm_target *ti, sector_t sector,
+ struct blk_zone *zones, unsigned int *nr_zones,
+ gfp_t gfp_mask)
+{
+ struct flakey_c *fc = ti->private;
+ int ret;
+
+ /* Do report and remap it */
+ ret = blkdev_report_zones(fc->dev->bdev, flakey_map_sector(ti, sector),
+ zones, nr_zones, gfp_mask);
+ if (ret != 0)
+ return ret;
+
+ if (*nr_zones)
+ dm_remap_zone_report(ti, fc->start, zones, nr_zones);
+ return 0;
+}
+#endif
+
static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data)
{
struct flakey_c *fc = ti->private;
.version = {1, 5, 0},
#ifdef CONFIG_BLK_DEV_ZONED
.features = DM_TARGET_ZONED_HM,
+ .report_zones = flakey_report_zones,
#endif
.module = THIS_MODULE,
.ctr = flakey_ctr,
}
static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kernel,
- int ioctl_flags,
- struct dm_ioctl **param, int *param_flags)
+ int ioctl_flags, struct dm_ioctl **param, int *param_flags)
{
struct dm_ioctl *dmi;
int secure_data;
*param_flags |= DM_PARAMS_MALLOC;
- if (copy_from_user(dmi, user, param_kernel->data_size))
- goto bad;
+ /* Copy from param_kernel (which was already copied from user) */
+ memcpy(dmi, param_kernel, minimum_data_size);
-data_copied:
- /*
- * Abort if something changed the ioctl data while it was being copied.
- */
- if (dmi->data_size != param_kernel->data_size) {
- DMERR("rejecting ioctl: data size modified while processing parameters");
+ if (copy_from_user(&dmi->data, (char __user *)user + minimum_data_size,
+ param_kernel->data_size - minimum_data_size))
goto bad;
- }
-
+data_copied:
/* Wipe the user buffer so we do not return it to userspace */
if (secure_data && clear_user(user, param_kernel->data_size))
goto bad;
return DM_MAPIO_REMAPPED;
}
-#ifdef CONFIG_BLK_DEV_ZONED
-static int linear_end_io(struct dm_target *ti, struct bio *bio,
- blk_status_t *error)
-{
- struct linear_c *lc = ti->private;
-
- if (!*error && bio_op(bio) == REQ_OP_ZONE_REPORT)
- dm_remap_zone_report(ti, bio, lc->start);
-
- return DM_ENDIO_DONE;
-}
-#endif
-
static void linear_status(struct dm_target *ti, status_type_t type,
unsigned status_flags, char *result, unsigned maxlen)
{
return 0;
}
+#ifdef CONFIG_BLK_DEV_ZONED
+static int linear_report_zones(struct dm_target *ti, sector_t sector,
+ struct blk_zone *zones, unsigned int *nr_zones,
+ gfp_t gfp_mask)
+{
+ struct linear_c *lc = (struct linear_c *) ti->private;
+ int ret;
+
+ /* Do report and remap it */
+ ret = blkdev_report_zones(lc->dev->bdev, linear_map_sector(ti, sector),
+ zones, nr_zones, gfp_mask);
+ if (ret != 0)
+ return ret;
+
+ if (*nr_zones)
+ dm_remap_zone_report(ti, lc->start, zones, nr_zones);
+ return 0;
+}
+#endif
+
static int linear_iterate_devices(struct dm_target *ti,
iterate_devices_callout_fn fn, void *data)
{
.name = "linear",
.version = {1, 4, 0},
#ifdef CONFIG_BLK_DEV_ZONED
- .end_io = linear_end_io,
.features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_ZONED_HM,
+ .report_zones = linear_report_zones,
#else
.features = DM_TARGET_PASSES_INTEGRITY,
#endif
static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
{
if (m->queue_mode == DM_TYPE_NONE) {
- /*
- * Default to request-based.
- */
- if (dm_use_blk_mq(dm_table_get_md(ti->table)))
- m->queue_mode = DM_TYPE_MQ_REQUEST_BASED;
- else
- m->queue_mode = DM_TYPE_REQUEST_BASED;
-
+ m->queue_mode = DM_TYPE_REQUEST_BASED;
} else if (m->queue_mode == DM_TYPE_BIO_BASED) {
INIT_WORK(&m->process_queued_bios, process_queued_bios);
/*
* get the queue busy feedback (via BLK_STS_RESOURCE),
* otherwise I/O merging can suffer.
*/
- if (q->mq_ops)
- return DM_MAPIO_REQUEUE;
- else
- return DM_MAPIO_DELAY_REQUEUE;
+ return DM_MAPIO_REQUEUE;
}
clone->bio = clone->biotail = NULL;
clone->rq_disk = bdev->bd_disk;
static void process_queued_io_list(struct multipath *m)
{
- if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED)
+ if (m->queue_mode == DM_TYPE_REQUEST_BASED)
dm_mq_kick_requeue_list(dm_table_get_md(m->ti->table));
else if (m->queue_mode == DM_TYPE_BIO_BASED)
queue_work(kmultipathd, &m->process_queued_bios);
if (!strcasecmp(queue_mode_name, "bio"))
m->queue_mode = DM_TYPE_BIO_BASED;
- else if (!strcasecmp(queue_mode_name, "rq"))
+ else if (!strcasecmp(queue_mode_name, "rq") ||
+ !strcasecmp(queue_mode_name, "mq"))
m->queue_mode = DM_TYPE_REQUEST_BASED;
- else if (!strcasecmp(queue_mode_name, "mq"))
- m->queue_mode = DM_TYPE_MQ_REQUEST_BASED;
else {
ti->error = "Unknown 'queue_mode' requested";
r = -EINVAL;
case DM_TYPE_BIO_BASED:
DMEMIT("queue_mode bio ");
break;
- case DM_TYPE_MQ_REQUEST_BASED:
- DMEMIT("queue_mode mq ");
- break;
default:
WARN_ON_ONCE(true);
break;
/* no paths available, for blk-mq: rely on IO mapping to delay requeue */
if (!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
- return (m->queue_mode != DM_TYPE_MQ_REQUEST_BASED);
+ return (m->queue_mode != DM_TYPE_REQUEST_BASED);
/* Guess which priority_group will be used at next mapping time */
pg = READ_ONCE(m->current_pg);
}
/* Enable bitmap creation for RAID levels != 0 */
- mddev->bitmap_info.offset = rt_is_raid0(rs->raid_type) ? 0 : to_sector(4096);
+ mddev->bitmap_info.offset = (rt_is_raid0(rs->raid_type) || rs->journal_dev.dev) ? 0 : to_sector(4096);
mddev->bitmap_info.default_offset = mddev->bitmap_info.offset;
if (!test_and_clear_bit(FirstUse, &rdev->flags)) {
#define RESERVED_REQUEST_BASED_IOS 256
static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS;
-static bool use_blk_mq = IS_ENABLED(CONFIG_DM_MQ_DEFAULT);
-
-bool dm_use_blk_mq_default(void)
-{
- return use_blk_mq;
-}
-
-bool dm_use_blk_mq(struct mapped_device *md)
-{
- return md->use_blk_mq;
-}
-EXPORT_SYMBOL_GPL(dm_use_blk_mq);
-
unsigned dm_get_reserved_rq_based_ios(void)
{
return __dm_get_module_param(&reserved_rq_based_ios,
return queue_is_rq_based(md->queue);
}
-static void dm_old_start_queue(struct request_queue *q)
-{
- unsigned long flags;
-
- spin_lock_irqsave(q->queue_lock, flags);
- if (blk_queue_stopped(q))
- blk_start_queue(q);
- spin_unlock_irqrestore(q->queue_lock, flags);
-}
-
-static void dm_mq_start_queue(struct request_queue *q)
+void dm_start_queue(struct request_queue *q)
{
blk_mq_unquiesce_queue(q);
blk_mq_kick_requeue_list(q);
}
-void dm_start_queue(struct request_queue *q)
-{
- if (!q->mq_ops)
- dm_old_start_queue(q);
- else
- dm_mq_start_queue(q);
-}
-
-static void dm_old_stop_queue(struct request_queue *q)
-{
- unsigned long flags;
-
- spin_lock_irqsave(q->queue_lock, flags);
- if (!blk_queue_stopped(q))
- blk_stop_queue(q);
- spin_unlock_irqrestore(q->queue_lock, flags);
-}
-
-static void dm_mq_stop_queue(struct request_queue *q)
+void dm_stop_queue(struct request_queue *q)
{
if (blk_mq_queue_stopped(q))
return;
blk_mq_quiesce_queue(q);
}
-void dm_stop_queue(struct request_queue *q)
-{
- if (!q->mq_ops)
- dm_old_stop_queue(q);
- else
- dm_mq_stop_queue(q);
-}
-
/*
* Partial completion handling for request-based dm
*/
*/
static void rq_completed(struct mapped_device *md, int rw, bool run_queue)
{
- struct request_queue *q = md->queue;
- unsigned long flags;
-
atomic_dec(&md->pending[rw]);
/* nudge anyone waiting on suspend queue */
if (!md_in_flight(md))
wake_up(&md->wait);
- /*
- * Run this off this callpath, as drivers could invoke end_io while
- * inside their request_fn (and holding the queue lock). Calling
- * back into ->request_fn() could deadlock attempting to grab the
- * queue lock again.
- */
- if (!q->mq_ops && run_queue) {
- spin_lock_irqsave(q->queue_lock, flags);
- blk_run_queue_async(q);
- spin_unlock_irqrestore(q->queue_lock, flags);
- }
-
/*
* dm_put() must be at the end of this function. See the comment above
*/
tio->ti->type->release_clone_rq(clone);
rq_end_stats(md, rq);
- if (!rq->q->mq_ops)
- blk_end_request_all(rq, error);
- else
- blk_mq_end_request(rq, error);
+ blk_mq_end_request(rq, error);
rq_completed(md, rw, true);
}
-/*
- * Requeue the original request of a clone.
- */
-static void dm_old_requeue_request(struct request *rq, unsigned long delay_ms)
-{
- struct request_queue *q = rq->q;
- unsigned long flags;
-
- spin_lock_irqsave(q->queue_lock, flags);
- blk_requeue_request(q, rq);
- blk_delay_queue(q, delay_ms);
- spin_unlock_irqrestore(q->queue_lock, flags);
-}
-
static void __dm_mq_kick_requeue_list(struct request_queue *q, unsigned long msecs)
{
blk_mq_delay_kick_requeue_list(q, msecs);
tio->ti->type->release_clone_rq(tio->clone);
}
- if (!rq->q->mq_ops)
- dm_old_requeue_request(rq, delay_ms);
- else
- dm_mq_delay_requeue_request(rq, delay_ms);
-
+ dm_mq_delay_requeue_request(rq, delay_ms);
rq_completed(md, rw, false);
}
rq_end_stats(md, rq);
rw = rq_data_dir(rq);
- if (!rq->q->mq_ops)
- blk_end_request_all(rq, tio->error);
- else
- blk_mq_end_request(rq, tio->error);
+ blk_mq_end_request(rq, tio->error);
rq_completed(md, rw, false);
return;
}
struct dm_rq_target_io *tio = tio_from_request(rq);
tio->error = error;
- if (!rq->q->mq_ops)
- blk_complete_request(rq);
- else
- blk_mq_complete_request(rq);
+ blk_mq_complete_request(rq);
}
/*
* Complete the not-mapped clone and the original request with the error status
* through softirq context.
* Target's rq_end_io() function isn't called.
- * This may be used when the target's map_rq() or clone_and_map_rq() functions fail.
+ * This may be used when the target's clone_and_map_rq() function fails.
*/
static void dm_kill_unmapped_request(struct request *rq, blk_status_t error)
{
dm_complete_request(rq, error);
}
-/*
- * Called with the clone's queue lock held (in the case of .request_fn)
- */
static void end_clone_request(struct request *clone, blk_status_t error)
{
struct dm_rq_target_io *tio = clone->end_io_data;
- /*
- * Actual request completion is done in a softirq context which doesn't
- * hold the clone's queue lock. Otherwise, deadlock could occur because:
- * - another request may be submitted by the upper level driver
- * of the stacking during the completion
- * - the submission which requires queue lock may be done
- * against this clone's queue
- */
dm_complete_request(tio->orig, error);
}
return 0;
}
-static void map_tio_request(struct kthread_work *work);
-
static void init_tio(struct dm_rq_target_io *tio, struct request *rq,
struct mapped_device *md)
{
*/
if (!md->init_tio_pdu)
memset(&tio->info, 0, sizeof(tio->info));
- if (md->kworker_task)
- kthread_init_work(&tio->work, map_tio_request);
}
/*
blk_rq_unprep_clone(clone);
tio->ti->type->release_clone_rq(clone);
tio->clone = NULL;
- if (!rq->q->mq_ops)
- r = DM_MAPIO_DELAY_REQUEUE;
- else
- r = DM_MAPIO_REQUEUE;
+ r = DM_MAPIO_REQUEUE;
goto check_again;
}
break;
return r;
}
+/* DEPRECATED: previously used for request-based merge heuristic in dm_request_fn() */
+ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
+{
+ return sprintf(buf, "%u\n", 0);
+}
+
+ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
+ const char *buf, size_t count)
+{
+ return count;
+}
+
static void dm_start_request(struct mapped_device *md, struct request *orig)
{
- if (!orig->q->mq_ops)
- blk_start_request(orig);
- else
- blk_mq_start_request(orig);
+ blk_mq_start_request(orig);
atomic_inc(&md->pending[rq_data_dir(orig)]);
- if (md->seq_rq_merge_deadline_usecs) {
- md->last_rq_pos = rq_end_sector(orig);
- md->last_rq_rw = rq_data_dir(orig);
- md->last_rq_start_time = ktime_get();
- }
-
if (unlikely(dm_stats_used(&md->stats))) {
struct dm_rq_target_io *tio = tio_from_request(orig);
tio->duration_jiffies = jiffies;
dm_get(md);
}
-static int __dm_rq_init_rq(struct mapped_device *md, struct request *rq)
+static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
+ unsigned int hctx_idx, unsigned int numa_node)
{
+ struct mapped_device *md = set->driver_data;
struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq);
/*
return 0;
}
-static int dm_rq_init_rq(struct request_queue *q, struct request *rq, gfp_t gfp)
-{
- return __dm_rq_init_rq(q->rq_alloc_data, rq);
-}
-
-static void map_tio_request(struct kthread_work *work)
-{
- struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work);
-
- if (map_request(tio) == DM_MAPIO_REQUEUE)
- dm_requeue_original_request(tio, false);
-}
-
-ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf)
-{
- return sprintf(buf, "%u\n", md->seq_rq_merge_deadline_usecs);
-}
-
-#define MAX_SEQ_RQ_MERGE_DEADLINE_USECS 100000
-
-ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md,
- const char *buf, size_t count)
-{
- unsigned deadline;
-
- if (dm_get_md_type(md) != DM_TYPE_REQUEST_BASED)
- return count;
-
- if (kstrtouint(buf, 10, &deadline))
- return -EINVAL;
-
- if (deadline > MAX_SEQ_RQ_MERGE_DEADLINE_USECS)
- deadline = MAX_SEQ_RQ_MERGE_DEADLINE_USECS;
-
- md->seq_rq_merge_deadline_usecs = deadline;
-
- return count;
-}
-
-static bool dm_old_request_peeked_before_merge_deadline(struct mapped_device *md)
-{
- ktime_t kt_deadline;
-
- if (!md->seq_rq_merge_deadline_usecs)
- return false;
-
- kt_deadline = ns_to_ktime((u64)md->seq_rq_merge_deadline_usecs * NSEC_PER_USEC);
- kt_deadline = ktime_add_safe(md->last_rq_start_time, kt_deadline);
-
- return !ktime_after(ktime_get(), kt_deadline);
-}
-
-/*
- * q->request_fn for old request-based dm.
- * Called with the queue lock held.
- */
-static void dm_old_request_fn(struct request_queue *q)
-{
- struct mapped_device *md = q->queuedata;
- struct dm_target *ti = md->immutable_target;
- struct request *rq;
- struct dm_rq_target_io *tio;
- sector_t pos = 0;
-
- if (unlikely(!ti)) {
- int srcu_idx;
- struct dm_table *map = dm_get_live_table(md, &srcu_idx);
-
- if (unlikely(!map)) {
- dm_put_live_table(md, srcu_idx);
- return;
- }
- ti = dm_table_find_target(map, pos);
- dm_put_live_table(md, srcu_idx);
- }
-
- /*
- * For suspend, check blk_queue_stopped() and increment
- * ->pending within a single queue_lock not to increment the
- * number of in-flight I/Os after the queue is stopped in
- * dm_suspend().
- */
- while (!blk_queue_stopped(q)) {
- rq = blk_peek_request(q);
- if (!rq)
- return;
-
- /* always use block 0 to find the target for flushes for now */
- pos = 0;
- if (req_op(rq) != REQ_OP_FLUSH)
- pos = blk_rq_pos(rq);
-
- if ((dm_old_request_peeked_before_merge_deadline(md) &&
- md_in_flight(md) && rq->bio && !bio_multiple_segments(rq->bio) &&
- md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) ||
- (ti->type->busy && ti->type->busy(ti))) {
- blk_delay_queue(q, 10);
- return;
- }
-
- dm_start_request(md, rq);
-
- tio = tio_from_request(rq);
- init_tio(tio, rq, md);
- /* Establish tio->ti before queuing work (map_tio_request) */
- tio->ti = ti;
- kthread_queue_work(&md->kworker, &tio->work);
- BUG_ON(!irqs_disabled());
- }
-}
-
-/*
- * Fully initialize a .request_fn request-based queue.
- */
-int dm_old_init_request_queue(struct mapped_device *md, struct dm_table *t)
-{
- struct dm_target *immutable_tgt;
-
- /* Fully initialize the queue */
- md->queue->cmd_size = sizeof(struct dm_rq_target_io);
- md->queue->rq_alloc_data = md;
- md->queue->request_fn = dm_old_request_fn;
- md->queue->init_rq_fn = dm_rq_init_rq;
-
- immutable_tgt = dm_table_get_immutable_target(t);
- if (immutable_tgt && immutable_tgt->per_io_data_size) {
- /* any target-specific per-io data is immediately after the tio */
- md->queue->cmd_size += immutable_tgt->per_io_data_size;
- md->init_tio_pdu = true;
- }
- if (blk_init_allocated_queue(md->queue) < 0)
- return -EINVAL;
-
- /* disable dm_old_request_fn's merge heuristic by default */
- md->seq_rq_merge_deadline_usecs = 0;
-
- blk_queue_softirq_done(md->queue, dm_softirq_done);
-
- /* Initialize the request-based DM worker thread */
- kthread_init_worker(&md->kworker);
- md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker,
- "kdmwork-%s", dm_device_name(md));
- if (IS_ERR(md->kworker_task)) {
- int error = PTR_ERR(md->kworker_task);
- md->kworker_task = NULL;
- return error;
- }
-
- return 0;
-}
-
-static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq,
- unsigned int hctx_idx, unsigned int numa_node)
-{
- return __dm_rq_init_rq(set->driver_data, rq);
-}
-
static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
const struct blk_mq_queue_data *bd)
{
struct dm_target *immutable_tgt;
int err;
- if (!dm_table_all_blk_mq_devices(t)) {
- DMERR("request-based dm-mq may only be stacked on blk-mq device(s)");
- return -EINVAL;
- }
-
md->tag_set = kzalloc_node(sizeof(struct blk_mq_tag_set), GFP_KERNEL, md->numa_node_id);
if (!md->tag_set)
return -ENOMEM;
module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools");
+/* Unused, but preserved for userspace compatibility */
+static bool use_blk_mq = true;
module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices");
struct bio clone;
};
-bool dm_use_blk_mq_default(void);
-bool dm_use_blk_mq(struct mapped_device *md);
-
-int dm_old_init_request_queue(struct mapped_device *md, struct dm_table *t);
int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t);
void dm_mq_cleanup_mapped_device(struct mapped_device *md);
static ssize_t dm_attr_use_blk_mq_show(struct mapped_device *md, char *buf)
{
- sprintf(buf, "%d\n", dm_use_blk_mq(md));
+ /* Purely for userspace compatibility */
+ sprintf(buf, "%d\n", true);
return strlen(buf);
}
bool integrity_supported:1;
bool singleton:1;
- bool all_blk_mq:1;
unsigned integrity_added:1;
/*
static bool __table_type_request_based(enum dm_queue_mode table_type)
{
- return (table_type == DM_TYPE_REQUEST_BASED ||
- table_type == DM_TYPE_MQ_REQUEST_BASED);
+ return table_type == DM_TYPE_REQUEST_BASED;
}
void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type)
BUG_ON(!request_based); /* No targets in this table */
- /*
- * The only way to establish DM_TYPE_MQ_REQUEST_BASED is by
- * having a compatible target use dm_table_set_type.
- */
t->type = DM_TYPE_REQUEST_BASED;
verify_rq_based:
int srcu_idx;
struct dm_table *live_table = dm_get_live_table(t->md, &srcu_idx);
- /* inherit live table's type and all_blk_mq */
- if (live_table) {
+ /* inherit live table's type */
+ if (live_table)
t->type = live_table->type;
- t->all_blk_mq = live_table->all_blk_mq;
- }
dm_put_live_table(t->md, srcu_idx);
return 0;
}
DMERR("table load rejected: including non-request-stackable devices");
return -EINVAL;
}
- if (v.sq_count && v.mq_count) {
+ if (v.sq_count > 0) {
DMERR("table load rejected: not all devices are blk-mq request-stackable");
return -EINVAL;
}
- t->all_blk_mq = v.mq_count > 0;
-
- if (!t->all_blk_mq &&
- (t->type == DM_TYPE_MQ_REQUEST_BASED || t->type == DM_TYPE_NVME_BIO_BASED)) {
- DMERR("table load rejected: all devices are not blk-mq request-stackable");
- return -EINVAL;
- }
return 0;
}
return __table_type_request_based(dm_table_get_type(t));
}
-bool dm_table_all_blk_mq_devices(struct dm_table *t)
-{
- return t->all_blk_mq;
-}
-
static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md)
{
enum dm_queue_mode type = dm_table_get_type(t);
*/
if (blk_queue_add_random(q) && dm_table_all_devices_attribute(t, device_is_not_random))
blk_queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q);
+
+ /*
+ * For a zoned target, the number of zones should be updated for the
+ * correct value to be exposed in sysfs queue/nr_zones. For a BIO based
+ * target, this is all that is needed. For a request based target, the
+ * queue zone bitmaps must also be updated.
+ * Use blk_revalidate_disk_zones() to handle this.
+ */
+ if (blk_queue_is_zoned(q))
+ blk_revalidate_disk_zones(t->md->disk);
}
unsigned int dm_table_get_num_targets(struct dm_table *t)
}
EXPORT_SYMBOL(dm_table_get_md);
+const char *dm_table_device_name(struct dm_table *t)
+{
+ return dm_device_name(t->md);
+}
+EXPORT_SYMBOL_GPL(dm_table_device_name);
+
void dm_table_run_md_queue_async(struct dm_table *t)
{
struct mapped_device *md;
struct request_queue *queue;
- unsigned long flags;
if (!dm_table_request_based(t))
return;
md = dm_table_get_md(t);
queue = dm_get_md_queue(md);
- if (queue) {
- if (queue->mq_ops)
- blk_mq_run_hw_queues(queue, true);
- else {
- spin_lock_irqsave(queue->queue_lock, flags);
- blk_run_queue_async(queue);
- spin_unlock_irqrestore(queue->queue_lock, flags);
- }
- }
+ if (queue)
+ blk_mq_run_hw_queues(queue, true);
}
EXPORT_SYMBOL(dm_table_run_md_queue_async);
* Ensures the thin is not destroyed until the worker has finished
* iterating the active_thins list.
*/
- atomic_t refcount;
+ refcount_t refcount;
struct completion can_destroy;
};
*--------------------------------------------------------------*/
static void thin_get(struct thin_c *tc)
{
- atomic_inc(&tc->refcount);
+ refcount_inc(&tc->refcount);
}
static void thin_put(struct thin_c *tc)
{
- if (atomic_dec_and_test(&tc->refcount))
+ if (refcount_dec_and_test(&tc->refcount))
complete(&tc->can_destroy);
}
r = -EINVAL;
goto bad;
}
- atomic_set(&tc->refcount, 1);
+ refcount_set(&tc->refcount, 1);
init_completion(&tc->can_destroy);
list_add_tail_rcu(&tc->list, &tc->pool->active_thins);
spin_unlock_irqrestore(&tc->pool->lock, flags);
static struct wc_memory_entry *memory_entry(struct dm_writecache *wc, struct wc_entry *e)
{
- if (is_power_of_2(sizeof(struct wc_entry)) && 0)
- return &sb(wc)->entries[e - wc->entries];
- else
- return &sb(wc)->entries[e->index];
+ return &sb(wc)->entries[e->index];
}
static void *memory_data(struct dm_writecache *wc, struct wc_entry *e)
struct rb_node node;
struct list_head link;
sector_t no;
- atomic_t ref;
+ unsigned int ref;
unsigned long state;
struct page *page;
void *data;
RB_CLEAR_NODE(&mblk->node);
INIT_LIST_HEAD(&mblk->link);
- atomic_set(&mblk->ref, 0);
+ mblk->ref = 0;
mblk->state = 0;
mblk->no = mblk_no;
mblk->data = page_address(mblk->page);
}
/*
- * Lookup a metadata block in the rbtree.
+ * Lookup a metadata block in the rbtree. If the block is found, increment
+ * its reference count.
*/
-static struct dmz_mblock *dmz_lookup_mblock(struct dmz_metadata *zmd,
- sector_t mblk_no)
+static struct dmz_mblock *dmz_get_mblock_fast(struct dmz_metadata *zmd,
+ sector_t mblk_no)
{
struct rb_root *root = &zmd->mblk_rbtree;
struct rb_node *node = root->rb_node;
while (node) {
mblk = container_of(node, struct dmz_mblock, node);
- if (mblk->no == mblk_no)
+ if (mblk->no == mblk_no) {
+ /*
+ * If this is the first reference to the block,
+ * remove it from the LRU list.
+ */
+ mblk->ref++;
+ if (mblk->ref == 1 &&
+ !test_bit(DMZ_META_DIRTY, &mblk->state))
+ list_del_init(&mblk->link);
return mblk;
+ }
node = (mblk->no < mblk_no) ? node->rb_left : node->rb_right;
}
}
/*
- * Read a metadata block from disk.
+ * Read an uncached metadata block from disk and add it to the cache.
*/
-static struct dmz_mblock *dmz_fetch_mblock(struct dmz_metadata *zmd,
- sector_t mblk_no)
+static struct dmz_mblock *dmz_get_mblock_slow(struct dmz_metadata *zmd,
+ sector_t mblk_no)
{
- struct dmz_mblock *mblk;
+ struct dmz_mblock *mblk, *m;
sector_t block = zmd->sb[zmd->mblk_primary].block + mblk_no;
struct bio *bio;
- /* Get block and insert it */
+ /* Get a new block and a BIO to read it */
mblk = dmz_alloc_mblock(zmd, mblk_no);
if (!mblk)
return NULL;
- spin_lock(&zmd->mblk_lock);
- atomic_inc(&mblk->ref);
- set_bit(DMZ_META_READING, &mblk->state);
- dmz_insert_mblock(zmd, mblk);
- spin_unlock(&zmd->mblk_lock);
-
bio = bio_alloc(GFP_NOIO, 1);
if (!bio) {
dmz_free_mblock(zmd, mblk);
return NULL;
}
+ spin_lock(&zmd->mblk_lock);
+
+ /*
+ * Make sure that another context did not start reading
+ * the block already.
+ */
+ m = dmz_get_mblock_fast(zmd, mblk_no);
+ if (m) {
+ spin_unlock(&zmd->mblk_lock);
+ dmz_free_mblock(zmd, mblk);
+ bio_put(bio);
+ return m;
+ }
+
+ mblk->ref++;
+ set_bit(DMZ_META_READING, &mblk->state);
+ dmz_insert_mblock(zmd, mblk);
+
+ spin_unlock(&zmd->mblk_lock);
+
+ /* Submit read BIO */
bio->bi_iter.bi_sector = dmz_blk2sect(block);
bio_set_dev(bio, zmd->dev->bdev);
bio->bi_private = mblk;
spin_lock(&zmd->mblk_lock);
- if (atomic_dec_and_test(&mblk->ref)) {
+ mblk->ref--;
+ if (mblk->ref == 0) {
if (test_bit(DMZ_META_ERROR, &mblk->state)) {
rb_erase(&mblk->node, &zmd->mblk_rbtree);
dmz_free_mblock(zmd, mblk);
/* Check rbtree */
spin_lock(&zmd->mblk_lock);
- mblk = dmz_lookup_mblock(zmd, mblk_no);
- if (mblk) {
- /* Cache hit: remove block from LRU list */
- if (atomic_inc_return(&mblk->ref) == 1 &&
- !test_bit(DMZ_META_DIRTY, &mblk->state))
- list_del_init(&mblk->link);
- }
+ mblk = dmz_get_mblock_fast(zmd, mblk_no);
spin_unlock(&zmd->mblk_lock);
if (!mblk) {
/* Cache miss: read the block from disk */
- mblk = dmz_fetch_mblock(zmd, mblk_no);
+ mblk = dmz_get_mblock_slow(zmd, mblk_no);
if (!mblk)
return ERR_PTR(-ENOMEM);
}
spin_lock(&zmd->mblk_lock);
clear_bit(DMZ_META_DIRTY, &mblk->state);
- if (atomic_read(&mblk->ref) == 0)
+ if (mblk->ref == 0)
list_add_tail(&mblk->link, &zmd->mblk_lru_list);
spin_unlock(&zmd->mblk_lock);
}
mblk = list_first_entry(&zmd->mblk_dirty_list,
struct dmz_mblock, link);
dmz_dev_warn(zmd->dev, "mblock %llu still in dirty list (ref %u)",
- (u64)mblk->no, atomic_read(&mblk->ref));
+ (u64)mblk->no, mblk->ref);
list_del_init(&mblk->link);
rb_erase(&mblk->node, &zmd->mblk_rbtree);
dmz_free_mblock(zmd, mblk);
root = &zmd->mblk_rbtree;
rbtree_postorder_for_each_entry_safe(mblk, next, root, node) {
dmz_dev_warn(zmd->dev, "mblock %llu ref %u still in rbtree",
- (u64)mblk->no, atomic_read(&mblk->ref));
- atomic_set(&mblk->ref, 0);
+ (u64)mblk->no, mblk->ref);
+ mblk->ref = 0;
dmz_free_mblock(zmd, mblk);
}
struct dmz_target *target;
struct dm_zone *zone;
struct bio *bio;
- atomic_t ref;
+ refcount_t ref;
blk_status_t status;
};
*/
struct dm_chunk_work {
struct work_struct work;
- atomic_t refcount;
+ refcount_t refcount;
struct dmz_target *target;
unsigned int chunk;
struct bio_list bio_list;
if (nr_blocks == dmz_bio_blocks(bio)) {
/* Setup and submit the BIO */
bio->bi_iter.bi_sector = sector;
- atomic_inc(&bioctx->ref);
+ refcount_inc(&bioctx->ref);
generic_make_request(bio);
return 0;
}
bio_advance(bio, clone->bi_iter.bi_size);
/* Submit the clone */
- atomic_inc(&bioctx->ref);
+ refcount_inc(&bioctx->ref);
generic_make_request(clone);
return 0;
/* Setup and submit the BIO */
bio_set_dev(bio, dmz->dev->bdev);
bio->bi_iter.bi_sector = dmz_start_sect(dmz->metadata, zone) + dmz_blk2sect(chunk_block);
- atomic_inc(&bioctx->ref);
+ refcount_inc(&bioctx->ref);
generic_make_request(bio);
if (dmz_is_seq(zone))
*/
static inline void dmz_get_chunk_work(struct dm_chunk_work *cw)
{
- atomic_inc(&cw->refcount);
+ refcount_inc(&cw->refcount);
}
/*
*/
static void dmz_put_chunk_work(struct dm_chunk_work *cw)
{
- if (atomic_dec_and_test(&cw->refcount)) {
+ if (refcount_dec_and_test(&cw->refcount)) {
WARN_ON(!bio_list_empty(&cw->bio_list));
radix_tree_delete(&cw->target->chunk_rxtree, cw->chunk);
kfree(cw);
goto out;
INIT_WORK(&cw->work, dmz_chunk_work);
- atomic_set(&cw->refcount, 0);
+ refcount_set(&cw->refcount, 0);
cw->target = dmz;
cw->chunk = chunk;
bio_list_init(&cw->bio_list);
bioctx->target = dmz;
bioctx->zone = NULL;
bioctx->bio = bio;
- atomic_set(&bioctx->ref, 1);
+ refcount_set(&bioctx->ref, 1);
bioctx->status = BLK_STS_OK;
/* Set the BIO pending in the flush list */
if (bioctx->status == BLK_STS_OK && *error)
bioctx->status = *error;
- if (!atomic_dec_and_test(&bioctx->ref))
+ if (!refcount_dec_and_test(&bioctx->ref))
return DM_ENDIO_INCOMPLETE;
/* Done */
dev->zone_nr_blocks = dmz_sect2blk(dev->zone_nr_sectors);
dev->zone_nr_blocks_shift = ilog2(dev->zone_nr_blocks);
- dev->nr_zones = (dev->capacity + dev->zone_nr_sectors - 1)
- >> dev->zone_nr_sectors_shift;
+ dev->nr_zones = blkdev_nr_zones(dev->bdev);
dmz->dev = dev;
return dm_get_geometry(md, geo);
}
+static int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
+ struct blk_zone *zones, unsigned int *nr_zones,
+ gfp_t gfp_mask)
+{
+#ifdef CONFIG_BLK_DEV_ZONED
+ struct mapped_device *md = disk->private_data;
+ struct dm_target *tgt;
+ struct dm_table *map;
+ int srcu_idx, ret;
+
+ if (dm_suspended_md(md))
+ return -EAGAIN;
+
+ map = dm_get_live_table(md, &srcu_idx);
+ if (!map)
+ return -EIO;
+
+ tgt = dm_table_find_target(map, sector);
+ if (!dm_target_is_valid(tgt)) {
+ ret = -EIO;
+ goto out;
+ }
+
+ /*
+ * If we are executing this, we already know that the block device
+ * is a zoned device and so each target should have support for that
+ * type of drive. A missing report_zones method means that the target
+ * driver has a problem.
+ */
+ if (WARN_ON(!tgt->type->report_zones)) {
+ ret = -EIO;
+ goto out;
+ }
+
+ /*
+ * blkdev_report_zones() will loop and call this again to cover all the
+ * zones of the target, eventually moving on to the next target.
+ * So there is no need to loop here trying to fill the entire array
+ * of zones.
+ */
+ ret = tgt->type->report_zones(tgt, sector, zones,
+ nr_zones, gfp_mask);
+
+out:
+ dm_put_live_table(md, srcu_idx);
+ return ret;
+#else
+ return -ENOTSUPP;
+#endif
+}
+
static int dm_prepare_ioctl(struct mapped_device *md, int *srcu_idx,
struct block_device **bdev)
__acquires(md->io_barrier)
EXPORT_SYMBOL_GPL(dm_accept_partial_bio);
/*
- * The zone descriptors obtained with a zone report indicate zone positions
- * within the target backing device, regardless of that device is a partition
- * and regardless of the target mapping start sector on the device or partition.
- * The zone descriptors start sector and write pointer position must be adjusted
- * to match their relative position within the dm device.
- * A target may call dm_remap_zone_report() after completion of a
- * REQ_OP_ZONE_REPORT bio to remap the zone descriptors obtained from the
- * backing device.
+ * The zone descriptors obtained with a zone report indicate
+ * zone positions within the underlying device of the target. The zone
+ * descriptors must be remapped to match their position within the dm device.
+ * The caller target should obtain the zones information using
+ * blkdev_report_zones() to ensure that remapping for partition offset is
+ * already handled.
*/
-void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start)
+void dm_remap_zone_report(struct dm_target *ti, sector_t start,
+ struct blk_zone *zones, unsigned int *nr_zones)
{
#ifdef CONFIG_BLK_DEV_ZONED
- struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone);
- struct bio *report_bio = tio->io->orig_bio;
- struct blk_zone_report_hdr *hdr = NULL;
struct blk_zone *zone;
- unsigned int nr_rep = 0;
- unsigned int ofst;
- sector_t part_offset;
- struct bio_vec bvec;
- struct bvec_iter iter;
- void *addr;
-
- if (bio->bi_status)
- return;
-
- /*
- * bio sector was incremented by the request size on completion. Taking
- * into account the original request sector, the target start offset on
- * the backing device and the target mapping offset (ti->begin), the
- * start sector of the backing device. The partition offset is always 0
- * if the target uses a whole device.
- */
- part_offset = bio->bi_iter.bi_sector + ti->begin - (start + bio_end_sector(report_bio));
+ unsigned int nrz = *nr_zones;
+ int i;
/*
- * Remap the start sector of the reported zones. For sequential zones,
- * also remap the write pointer position.
+ * Remap the start sector and write pointer position of the zones in
+ * the array. Since we may have obtained from the target underlying
+ * device more zones that the target size, also adjust the number
+ * of zones.
*/
- bio_for_each_segment(bvec, report_bio, iter) {
- addr = kmap_atomic(bvec.bv_page);
-
- /* Remember the report header in the first page */
- if (!hdr) {
- hdr = addr;
- ofst = sizeof(struct blk_zone_report_hdr);
- } else
- ofst = 0;
-
- /* Set zones start sector */
- while (hdr->nr_zones && ofst < bvec.bv_len) {
- zone = addr + ofst;
- zone->start -= part_offset;
- if (zone->start >= start + ti->len) {
- hdr->nr_zones = 0;
- break;
- }
- zone->start = zone->start + ti->begin - start;
- if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) {
- if (zone->cond == BLK_ZONE_COND_FULL)
- zone->wp = zone->start + zone->len;
- else if (zone->cond == BLK_ZONE_COND_EMPTY)
- zone->wp = zone->start;
- else
- zone->wp = zone->wp + ti->begin - start - part_offset;
- }
- ofst += sizeof(struct blk_zone);
- hdr->nr_zones--;
- nr_rep++;
+ for (i = 0; i < nrz; i++) {
+ zone = zones + i;
+ if (zone->start >= start + ti->len) {
+ memset(zone, 0, sizeof(struct blk_zone) * (nrz - i));
+ break;
}
- if (addr != hdr)
- kunmap_atomic(addr);
-
- if (!hdr->nr_zones)
- break;
- }
+ zone->start = zone->start + ti->begin - start;
+ if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL)
+ continue;
- if (hdr) {
- hdr->nr_zones = nr_rep;
- kunmap_atomic(hdr);
+ if (zone->cond == BLK_ZONE_COND_FULL)
+ zone->wp = zone->start + zone->len;
+ else if (zone->cond == BLK_ZONE_COND_EMPTY)
+ zone->wp = zone->start;
+ else
+ zone->wp = zone->wp + ti->begin - start;
}
- bio_advance(report_bio, report_bio->bi_iter.bi_size);
-
+ *nr_zones = i;
#else /* !CONFIG_BLK_DEV_ZONED */
- bio->bi_status = BLK_STS_NOTSUPP;
+ *nr_zones = 0;
#endif
}
EXPORT_SYMBOL_GPL(dm_remap_zone_report);
return r;
}
- if (bio_op(bio) != REQ_OP_ZONE_REPORT)
- bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector));
+ bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector));
clone->bi_iter.bi_size = to_bytes(len);
if (unlikely(bio_integrity(bio) != NULL))
*/
static int __split_and_process_non_flush(struct clone_info *ci)
{
- struct bio *bio = ci->bio;
struct dm_target *ti;
unsigned len;
int r;
if (unlikely(__process_abnormal_io(ci, ti, &r)))
return r;
- if (bio_op(bio) == REQ_OP_ZONE_REPORT)
- len = ci->sector_count;
- else
- len = min_t(sector_t, max_io_len(ci->sector, ti),
- ci->sector_count);
+ len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count);
r = __clone_and_map_data_bio(ci, ti, ci->sector, &len);
if (r < 0)
* We take a clone of the original to store in
* ci.io->orig_bio to be used by end_io_acct() and
* for dec_pending to use for completion handling.
- * As this path is not used for REQ_OP_ZONE_REPORT,
- * the usage of io->orig_bio in dm_remap_zone_report()
- * won't be affected by this reassignment.
*/
struct bio *b = bio_split(bio, bio_sectors(bio) - ci.sector_count,
GFP_NOIO, &md->queue->bio_split);
* Defend against IO still getting in during teardown
* - as was seen for a time with nvme-fcloop
*/
- if (unlikely(WARN_ON_ONCE(!ti || !dm_target_is_valid(ti)))) {
+ if (WARN_ON_ONCE(!ti || !dm_target_is_valid(ti))) {
error = -EIO;
goto out;
}
static void dm_init_normal_md_queue(struct mapped_device *md)
{
- md->use_blk_mq = false;
-
/*
* Initialize aspects of queue that aren't relevant for blk-mq
*/
{
if (md->wq)
destroy_workqueue(md->wq);
- if (md->kworker_task)
- kthread_stop(md->kworker_task);
bioset_exit(&md->bs);
bioset_exit(&md->io_bs);
goto bad_io_barrier;
md->numa_node_id = numa_node_id;
- md->use_blk_mq = dm_use_blk_mq_default();
md->init_tio_pdu = false;
md->type = DM_TYPE_NONE;
mutex_init(&md->suspend_lock);
INIT_WORK(&md->work, dm_wq_work);
init_waitqueue_head(&md->eventq);
init_completion(&md->kobj_holder.completion);
- md->kworker_task = NULL;
md->disk->major = _major;
md->disk->first_minor = minor;
switch (type) {
case DM_TYPE_REQUEST_BASED:
- dm_init_normal_md_queue(md);
- r = dm_old_init_request_queue(md, t);
- if (r) {
- DMERR("Cannot initialize queue for request-based mapped device");
- return r;
- }
- break;
- case DM_TYPE_MQ_REQUEST_BASED:
r = dm_mq_init_request_queue(md, t);
if (r) {
DMERR("Cannot initialize queue for request-based dm-mq mapped device");
blk_set_queue_dying(md->queue);
- if (dm_request_based(md) && md->kworker_task)
- kthread_flush_worker(&md->kworker);
-
/*
* Take suspend_lock so that presuspend and postsuspend methods
* do not race with internal suspend.
* Stop md->queue before flushing md->wq in case request-based
* dm defers requests to md->wq from md->queue.
*/
- if (dm_request_based(md)) {
+ if (dm_request_based(md))
dm_stop_queue(md->queue);
- if (md->kworker_task)
- kthread_flush_worker(&md->kworker);
- }
flush_workqueue(md->wq);
goto out;
break;
case DM_TYPE_REQUEST_BASED:
- case DM_TYPE_MQ_REQUEST_BASED:
pool_size = max(dm_get_reserved_rq_based_ios(), min_pool_size);
front_pad = offsetof(struct dm_rq_clone_bio_info, clone);
/* per_io_data_size is used for blk-mq pdu at queue allocation */
.release = dm_blk_close,
.ioctl = dm_blk_ioctl,
.getgeo = dm_blk_getgeo,
+ .report_zones = dm_blk_report_zones,
.pr_ops = &dm_pr_ops,
.owner = THIS_MODULE
};
struct dm_target *dm_table_get_wildcard_target(struct dm_table *t);
bool dm_table_bio_based(struct dm_table *t);
bool dm_table_request_based(struct dm_table *t);
-bool dm_table_all_blk_mq_devices(struct dm_table *t);
void dm_table_free_md_mempools(struct dm_table *t);
struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
goto out;
}
if (mddev->pers) {
- mddev->pers->quiesce(mddev, 1);
+ mddev_suspend(mddev);
md_bitmap_destroy(mddev);
- mddev->pers->quiesce(mddev, 0);
+ mddev_resume(mddev);
}
mddev->bitmap_info.offset = 0;
if (mddev->bitmap_info.file) {
mddev->bitmap_info.offset = offset;
if (mddev->pers) {
struct bitmap *bitmap;
- mddev->pers->quiesce(mddev, 1);
bitmap = md_bitmap_create(mddev, -1);
+ mddev_suspend(mddev);
if (IS_ERR(bitmap))
rv = PTR_ERR(bitmap);
else {
if (rv)
mddev->bitmap_info.offset = 0;
}
- mddev->pers->quiesce(mddev, 0);
if (rv) {
md_bitmap_destroy(mddev);
+ mddev_resume(mddev);
goto out;
}
+ mddev_resume(mddev);
}
}
}
int mode;
};
-struct suspend_info {
- int slot;
- sector_t lo;
- sector_t hi;
- struct list_head list;
-};
-
struct resync_info {
__le64 lo;
__le64 hi;
struct dlm_lock_resource **other_bitmap_lockres;
struct dlm_lock_resource *resync_lockres;
struct list_head suspend_list;
+
spinlock_t suspend_lock;
+ /* record the region which write should be suspended */
+ sector_t suspend_lo;
+ sector_t suspend_hi;
+ int suspend_from; /* the slot which broadcast suspend_lo/hi */
+
struct md_thread *recovery_thread;
unsigned long recovery_map;
/* communication loc resources */
RE_ADD,
BITMAP_NEEDS_SYNC,
CHANGE_CAPACITY,
+ BITMAP_RESIZE,
};
struct cluster_msg {
ri->hi = cpu_to_le64(hi);
}
-static struct suspend_info *read_resync_info(struct mddev *mddev, struct dlm_lock_resource *lockres)
+static int read_resync_info(struct mddev *mddev,
+ struct dlm_lock_resource *lockres)
{
struct resync_info ri;
- struct suspend_info *s = NULL;
- sector_t hi = 0;
+ struct md_cluster_info *cinfo = mddev->cluster_info;
+ int ret = 0;
dlm_lock_sync(lockres, DLM_LOCK_CR);
memcpy(&ri, lockres->lksb.sb_lvbptr, sizeof(struct resync_info));
- hi = le64_to_cpu(ri.hi);
- if (hi > 0) {
- s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
- if (!s)
- goto out;
- s->hi = hi;
- s->lo = le64_to_cpu(ri.lo);
+ if (le64_to_cpu(ri.hi) > 0) {
+ cinfo->suspend_hi = le64_to_cpu(ri.hi);
+ cinfo->suspend_lo = le64_to_cpu(ri.lo);
+ ret = 1;
}
dlm_unlock_sync(lockres);
-out:
- return s;
+ return ret;
}
static void recover_bitmaps(struct md_thread *thread)
struct dlm_lock_resource *bm_lockres;
char str[64];
int slot, ret;
- struct suspend_info *s, *tmp;
sector_t lo, hi;
while (cinfo->recovery_map) {
/* Clear suspend_area associated with the bitmap */
spin_lock_irq(&cinfo->suspend_lock);
- list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list)
- if (slot == s->slot) {
- list_del(&s->list);
- kfree(s);
- }
+ cinfo->suspend_hi = 0;
+ cinfo->suspend_lo = 0;
+ cinfo->suspend_from = -1;
spin_unlock_irq(&cinfo->suspend_lock);
+ /* Kick off a reshape if needed */
+ if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
+ test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
+ mddev->reshape_position != MaxSector)
+ md_wakeup_thread(mddev->sync_thread);
+
if (hi > 0) {
if (lo < mddev->recovery_cp)
mddev->recovery_cp = lo;
}
}
-static void __remove_suspend_info(struct md_cluster_info *cinfo, int slot)
-{
- struct suspend_info *s, *tmp;
-
- list_for_each_entry_safe(s, tmp, &cinfo->suspend_list, list)
- if (slot == s->slot) {
- list_del(&s->list);
- kfree(s);
- break;
- }
-}
-
static void remove_suspend_info(struct mddev *mddev, int slot)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
mddev->pers->quiesce(mddev, 1);
spin_lock_irq(&cinfo->suspend_lock);
- __remove_suspend_info(cinfo, slot);
+ cinfo->suspend_hi = 0;
+ cinfo->suspend_lo = 0;
spin_unlock_irq(&cinfo->suspend_lock);
mddev->pers->quiesce(mddev, 0);
}
-
static void process_suspend_info(struct mddev *mddev,
int slot, sector_t lo, sector_t hi)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
- struct suspend_info *s;
+ struct mdp_superblock_1 *sb = NULL;
+ struct md_rdev *rdev;
if (!hi) {
/*
return;
}
+ rdev_for_each(rdev, mddev)
+ if (rdev->raid_disk > -1 && !test_bit(Faulty, &rdev->flags)) {
+ sb = page_address(rdev->sb_page);
+ break;
+ }
+
/*
* The bitmaps are not same for different nodes
* if RESYNCING is happening in one node, then
* sync_low/hi is used to record the region which
* arrived in the previous RESYNCING message,
*
- * Call bitmap_sync_with_cluster to clear
- * NEEDED_MASK and set RESYNC_MASK since
- * resync thread is running in another node,
- * so we don't need to do the resync again
- * with the same section */
- md_bitmap_sync_with_cluster(mddev, cinfo->sync_low, cinfo->sync_hi, lo, hi);
+ * Call md_bitmap_sync_with_cluster to clear NEEDED_MASK
+ * and set RESYNC_MASK since resync thread is running
+ * in another node, so we don't need to do the resync
+ * again with the same section.
+ *
+ * Skip md_bitmap_sync_with_cluster in case reshape
+ * happening, because reshaping region is small and
+ * we don't want to trigger lots of WARN.
+ */
+ if (sb && !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE))
+ md_bitmap_sync_with_cluster(mddev, cinfo->sync_low,
+ cinfo->sync_hi, lo, hi);
cinfo->sync_low = lo;
cinfo->sync_hi = hi;
- s = kzalloc(sizeof(struct suspend_info), GFP_KERNEL);
- if (!s)
- return;
- s->slot = slot;
- s->lo = lo;
- s->hi = hi;
mddev->pers->quiesce(mddev, 1);
spin_lock_irq(&cinfo->suspend_lock);
- /* Remove existing entry (if exists) before adding */
- __remove_suspend_info(cinfo, slot);
- list_add(&s->list, &cinfo->suspend_list);
+ cinfo->suspend_from = slot;
+ cinfo->suspend_lo = lo;
+ cinfo->suspend_hi = hi;
spin_unlock_irq(&cinfo->suspend_lock);
mddev->pers->quiesce(mddev, 0);
}
case BITMAP_NEEDS_SYNC:
__recover_slot(mddev, le32_to_cpu(msg->slot));
break;
+ case BITMAP_RESIZE:
+ if (le64_to_cpu(msg->high) != mddev->pers->size(mddev, 0, 0))
+ ret = md_bitmap_resize(mddev->bitmap,
+ le64_to_cpu(msg->high), 0, 0);
+ break;
default:
ret = -1;
pr_warn("%s:%d Received unknown message from %d\n",
struct md_cluster_info *cinfo = mddev->cluster_info;
int i, ret = 0;
struct dlm_lock_resource *bm_lockres;
- struct suspend_info *s;
char str[64];
sector_t lo, hi;
bm_lockres->flags |= DLM_LKF_NOQUEUE;
ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
if (ret == -EAGAIN) {
- s = read_resync_info(mddev, bm_lockres);
- if (s) {
+ if (read_resync_info(mddev, bm_lockres)) {
pr_info("%s:%d Resync[%llu..%llu] in progress on %d\n",
__func__, __LINE__,
- (unsigned long long) s->lo,
- (unsigned long long) s->hi, i);
- spin_lock_irq(&cinfo->suspend_lock);
- s->slot = i;
- list_add(&s->list, &cinfo->suspend_list);
- spin_unlock_irq(&cinfo->suspend_lock);
+ (unsigned long long) cinfo->suspend_lo,
+ (unsigned long long) cinfo->suspend_hi,
+ i);
+ cinfo->suspend_from = i;
}
ret = 0;
lockres_free(bm_lockres);
if (!cinfo)
return 0;
- /* BITMAP_NEEDS_SYNC message should be sent when node
+ /*
+ * BITMAP_NEEDS_SYNC message should be sent when node
* is leaving the cluster with dirty bitmap, also we
- * can only deliver it when dlm connection is available */
- if (cinfo->slot_number > 0 && mddev->recovery_cp != MaxSector)
+ * can only deliver it when dlm connection is available.
+ *
+ * Also, we should send BITMAP_NEEDS_SYNC message in
+ * case reshaping is interrupted.
+ */
+ if ((cinfo->slot_number > 0 && mddev->recovery_cp != MaxSector) ||
+ (mddev->reshape_position != MaxSector &&
+ test_bit(MD_CLOSING, &mddev->flags)))
resync_bitmap(mddev);
set_bit(MD_CLUSTER_HOLDING_MUTEX_FOR_RECVD, &cinfo->state);
unlock_comm(cinfo);
}
+static int update_bitmap_size(struct mddev *mddev, sector_t size)
+{
+ struct md_cluster_info *cinfo = mddev->cluster_info;
+ struct cluster_msg cmsg = {0};
+ int ret;
+
+ cmsg.type = cpu_to_le32(BITMAP_RESIZE);
+ cmsg.high = cpu_to_le64(size);
+ ret = sendmsg(cinfo, &cmsg, 0);
+ if (ret)
+ pr_err("%s:%d: failed to send BITMAP_RESIZE message (%d)\n",
+ __func__, __LINE__, ret);
+ return ret;
+}
+
+static int resize_bitmaps(struct mddev *mddev, sector_t newsize, sector_t oldsize)
+{
+ struct bitmap_counts *counts;
+ char str[64];
+ struct dlm_lock_resource *bm_lockres;
+ struct bitmap *bitmap = mddev->bitmap;
+ unsigned long my_pages = bitmap->counts.pages;
+ int i, rv;
+
+ /*
+ * We need to ensure all the nodes can grow to a larger
+ * bitmap size before make the reshaping.
+ */
+ rv = update_bitmap_size(mddev, newsize);
+ if (rv)
+ return rv;
+
+ for (i = 0; i < mddev->bitmap_info.nodes; i++) {
+ if (i == md_cluster_ops->slot_number(mddev))
+ continue;
+
+ bitmap = get_bitmap_from_slot(mddev, i);
+ if (IS_ERR(bitmap)) {
+ pr_err("can't get bitmap from slot %d\n", i);
+ goto out;
+ }
+ counts = &bitmap->counts;
+
+ /*
+ * If we can hold the bitmap lock of one node then
+ * the slot is not occupied, update the pages.
+ */
+ snprintf(str, 64, "bitmap%04d", i);
+ bm_lockres = lockres_init(mddev, str, NULL, 1);
+ if (!bm_lockres) {
+ pr_err("Cannot initialize %s lock\n", str);
+ goto out;
+ }
+ bm_lockres->flags |= DLM_LKF_NOQUEUE;
+ rv = dlm_lock_sync(bm_lockres, DLM_LOCK_PW);
+ if (!rv)
+ counts->pages = my_pages;
+ lockres_free(bm_lockres);
+
+ if (my_pages != counts->pages)
+ /*
+ * Let's revert the bitmap size if one node
+ * can't resize bitmap
+ */
+ goto out;
+ }
+
+ return 0;
+out:
+ md_bitmap_free(bitmap);
+ update_bitmap_size(mddev, oldsize);
+ return -1;
+}
+
/*
* return 0 if all the bitmaps have the same sync_size
*/
return dlm_lock_sync_interruptible(cinfo->resync_lockres, DLM_LOCK_EX, mddev);
}
+static void resync_info_get(struct mddev *mddev, sector_t *lo, sector_t *hi)
+{
+ struct md_cluster_info *cinfo = mddev->cluster_info;
+
+ spin_lock_irq(&cinfo->suspend_lock);
+ *lo = cinfo->suspend_lo;
+ *hi = cinfo->suspend_hi;
+ spin_unlock_irq(&cinfo->suspend_lock);
+}
+
static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi)
{
struct md_cluster_info *cinfo = mddev->cluster_info;
{
struct md_cluster_info *cinfo = mddev->cluster_info;
int ret = 0;
- struct suspend_info *s;
if ((direction == READ) &&
test_bit(MD_CLUSTER_SUSPEND_READ_BALANCING, &cinfo->state))
return 1;
spin_lock_irq(&cinfo->suspend_lock);
- if (list_empty(&cinfo->suspend_list))
- goto out;
- list_for_each_entry(s, &cinfo->suspend_list, list)
- if (hi > s->lo && lo < s->hi) {
- ret = 1;
- break;
- }
-out:
+ if (hi > cinfo->suspend_lo && lo < cinfo->suspend_hi)
+ ret = 1;
spin_unlock_irq(&cinfo->suspend_lock);
return ret;
}
.resync_start = resync_start,
.resync_finish = resync_finish,
.resync_info_update = resync_info_update,
+ .resync_info_get = resync_info_get,
.metadata_update_start = metadata_update_start,
.metadata_update_finish = metadata_update_finish,
.metadata_update_cancel = metadata_update_cancel,
.remove_disk = remove_disk,
.load_bitmaps = load_bitmaps,
.gather_bitmaps = gather_bitmaps,
+ .resize_bitmaps = resize_bitmaps,
.lock_all_bitmaps = lock_all_bitmaps,
.unlock_all_bitmaps = unlock_all_bitmaps,
.update_size = update_size,
int (*leave)(struct mddev *mddev);
int (*slot_number)(struct mddev *mddev);
int (*resync_info_update)(struct mddev *mddev, sector_t lo, sector_t hi);
+ void (*resync_info_get)(struct mddev *mddev, sector_t *lo, sector_t *hi);
int (*metadata_update_start)(struct mddev *mddev);
int (*metadata_update_finish)(struct mddev *mddev);
void (*metadata_update_cancel)(struct mddev *mddev);
int (*remove_disk)(struct mddev *mddev, struct md_rdev *rdev);
void (*load_bitmaps)(struct mddev *mddev, int total_slots);
int (*gather_bitmaps)(struct md_rdev *rdev);
+ int (*resize_bitmaps)(struct mddev *mddev, sector_t newsize, sector_t oldsize);
int (*lock_all_bitmaps)(struct mddev *mddev);
void (*unlock_all_bitmaps)(struct mddev *mddev);
void (*update_size)(struct mddev *mddev, sector_t old_dev_sectors);
rdev_dec_pending(rdev, mddev);
if (atomic_dec_and_test(&fi->flush_pending)) {
- if (bio->bi_iter.bi_size == 0)
+ if (bio->bi_iter.bi_size == 0) {
/* an empty barrier - all done */
bio_endio(bio);
- else {
+ mempool_free(fi, mddev->flush_pool);
+ } else {
INIT_WORK(&fi->flush_work, submit_flushes);
queue_work(md_wq, &fi->flush_work);
}
rcu_read_unlock();
if (atomic_dec_and_test(&fi->flush_pending)) {
- if (bio->bi_iter.bi_size == 0)
+ if (bio->bi_iter.bi_size == 0) {
/* an empty barrier - all done */
bio_endio(bio);
- else {
+ mempool_free(fi, mddev->flush_pool);
+ } else {
INIT_WORK(&fi->flush_work, submit_flushes);
queue_work(md_wq, &fi->flush_work);
}
mddev->to_remove = &md_redundancy_group;
module_put(pers->owner);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-}
-
-void md_stop(struct mddev *mddev)
-{
- /* stop the array and free an attached data structures.
- * This is called from dm-raid
- */
- __md_stop(mddev);
if (mddev->flush_bio_pool) {
mempool_destroy(mddev->flush_bio_pool);
mddev->flush_bio_pool = NULL;
mempool_destroy(mddev->flush_pool);
mddev->flush_pool = NULL;
}
+}
+
+void md_stop(struct mddev *mddev)
+{
+ /* stop the array and free an attached data structures.
+ * This is called from dm-raid
+ */
+ __md_stop(mddev);
bioset_exit(&mddev->bio_set);
bioset_exit(&mddev->sync_set);
}
else if (!mddev->bitmap)
j = mddev->recovery_cp;
- } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
+ } else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
max_sectors = mddev->resync_max_sectors;
- else {
+ /*
+ * If the original node aborts reshaping then we continue the
+ * reshaping, so set j again to avoid restart reshape from the
+ * first beginning
+ */
+ if (mddev_is_clustered(mddev) &&
+ mddev->reshape_position != MaxSector)
+ j = mddev->reshape_position;
+ } else {
/* recovery follows the physical size of devices */
max_sectors = mddev->dev_sectors;
j = MaxSector;
mddev_lock_nointr(mddev);
md_set_array_sectors(mddev, mddev->pers->size(mddev, 0, 0));
mddev_unlock(mddev);
- set_capacity(mddev->gendisk, mddev->array_sectors);
- revalidate_disk(mddev->gendisk);
+ if (!mddev_is_clustered(mddev)) {
+ set_capacity(mddev->gendisk, mddev->array_sectors);
+ revalidate_disk(mddev->gendisk);
+ }
}
spin_lock(&mddev->lock);
*/
void md_check_recovery(struct mddev *mddev)
{
+ if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
+ /* Write superblock - thread that called mddev_suspend()
+ * holds reconfig_mutex for us.
+ */
+ set_bit(MD_UPDATING_SB, &mddev->flags);
+ smp_mb__after_atomic();
+ if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
+ md_update_sb(mddev, 0);
+ clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
+ wake_up(&mddev->sb_wait);
+ }
+
if (mddev->suspended)
return;
unlock:
wake_up(&mddev->sb_wait);
mddev_unlock(mddev);
- } else if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags) && mddev->sb_flags) {
- /* Write superblock - thread that called mddev_suspend()
- * holds reconfig_mutex for us.
- */
- set_bit(MD_UPDATING_SB, &mddev->flags);
- smp_mb__after_atomic();
- if (test_bit(MD_ALLOW_SB_UPDATE, &mddev->flags))
- md_update_sb(mddev, 0);
- clear_bit_unlock(MD_UPDATING_SB, &mddev->flags);
- wake_up(&mddev->sb_wait);
}
}
EXPORT_SYMBOL(md_check_recovery);
void md_reap_sync_thread(struct mddev *mddev)
{
struct md_rdev *rdev;
+ sector_t old_dev_sectors = mddev->dev_sectors;
+ bool is_reshaped = false;
/* resync has finished, collect result */
md_unregister_thread(&mddev->sync_thread);
}
}
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
- mddev->pers->finish_reshape)
+ mddev->pers->finish_reshape) {
mddev->pers->finish_reshape(mddev);
+ if (mddev_is_clustered(mddev))
+ is_reshaped = true;
+ }
/* If array is no-longer degraded, then any saved_raid_disk
* information must be scrapped.
clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery);
clear_bit(MD_RECOVERY_CHECK, &mddev->recovery);
+ /*
+ * We call md_cluster_ops->update_size here because sync_size could
+ * be changed by md_update_sb, and MD_RECOVERY_RESHAPE is cleared,
+ * so it is time to update size across cluster.
+ */
+ if (mddev_is_clustered(mddev) && is_reshaped
+ && !test_bit(MD_CLOSING, &mddev->flags))
+ md_cluster_ops->update_size(mddev, old_dev_sectors);
wake_up(&resync_wait);
/* flag recovery needed just to double check */
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
}
if (role != rdev2->raid_disk) {
- /* got activated */
- if (rdev2->raid_disk == -1 && role != 0xffff) {
+ /*
+ * got activated except reshape is happening.
+ */
+ if (rdev2->raid_disk == -1 && role != 0xffff &&
+ !(le32_to_cpu(sb->feature_map) &
+ MD_FEATURE_RESHAPE_ACTIVE)) {
rdev2->saved_raid_disk = role;
ret = remove_and_add_spares(mddev, rdev2);
pr_info("Activated spare: %s\n",
if (mddev->raid_disks != le32_to_cpu(sb->raid_disks))
update_raid_disks(mddev, le32_to_cpu(sb->raid_disks));
+ /*
+ * Since mddev->delta_disks has already updated in update_raid_disks,
+ * so it is time to check reshape.
+ */
+ if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
+ (le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
+ /*
+ * reshape is happening in the remote node, we need to
+ * update reshape_position and call start_reshape.
+ */
+ mddev->reshape_position = sb->reshape_position;
+ if (mddev->pers->update_reshape_pos)
+ mddev->pers->update_reshape_pos(mddev);
+ if (mddev->pers->start_reshape)
+ mddev->pers->start_reshape(mddev);
+ } else if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery) &&
+ mddev->reshape_position != MaxSector &&
+ !(le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) {
+ /* reshape is just done in another node. */
+ mddev->reshape_position = MaxSector;
+ if (mddev->pers->update_reshape_pos)
+ mddev->pers->update_reshape_pos(mddev);
+ }
+
/* Finally set the event to be up to date */
mddev->events = le64_to_cpu(sb->events);
}
int (*check_reshape) (struct mddev *mddev);
int (*start_reshape) (struct mddev *mddev);
void (*finish_reshape) (struct mddev *mddev);
+ void (*update_reshape_pos) (struct mddev *mddev);
/* quiesce suspends or resumes internal processing.
* 1 - stop new actions and wait for action io to complete
* 0 - return to normal behaviour
*/
if (rdev->saved_raid_disk >= 0 &&
rdev->saved_raid_disk >= first &&
+ rdev->saved_raid_disk < conf->raid_disks &&
conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
first = last = rdev->saved_raid_disk;
#include <linux/seq_file.h>
#include <linux/ratelimit.h>
#include <linux/kthread.h>
+#include <linux/raid/md_p.h>
#include <trace/events/block.h>
#include "md.h"
#include "raid10.h"
first = last = rdev->raid_disk;
if (rdev->saved_raid_disk >= first &&
+ rdev->saved_raid_disk < conf->geo.raid_disks &&
conf->mirrors[rdev->saved_raid_disk].rdev == NULL)
mirror = rdev->saved_raid_disk;
else
sector_t sect;
int must_sync;
int any_working;
+ int need_recover = 0;
+ int need_replace = 0;
struct raid10_info *mirror = &conf->mirrors[i];
struct md_rdev *mrdev, *mreplace;
mrdev = rcu_dereference(mirror->rdev);
mreplace = rcu_dereference(mirror->replacement);
- if ((mrdev == NULL ||
- test_bit(Faulty, &mrdev->flags) ||
- test_bit(In_sync, &mrdev->flags)) &&
- (mreplace == NULL ||
- test_bit(Faulty, &mreplace->flags))) {
+ if (mrdev != NULL &&
+ !test_bit(Faulty, &mrdev->flags) &&
+ !test_bit(In_sync, &mrdev->flags))
+ need_recover = 1;
+ if (mreplace != NULL &&
+ !test_bit(Faulty, &mreplace->flags))
+ need_replace = 1;
+
+ if (!need_recover && !need_replace) {
rcu_read_unlock();
continue;
}
r10_bio->devs[1].devnum = i;
r10_bio->devs[1].addr = to_addr;
- if (!test_bit(In_sync, &mrdev->flags)) {
+ if (need_recover) {
bio = r10_bio->devs[1].bio;
bio->bi_next = biolist;
biolist = bio;
bio = r10_bio->devs[1].repl_bio;
if (bio)
bio->bi_end_io = NULL;
- /* Note: if mreplace != NULL, then bio
+ /* Note: if need_replace, then bio
* cannot be NULL as r10buf_pool_alloc will
* have allocated it.
- * So the second test here is pointless.
- * But it keeps semantic-checkers happy, and
- * this comment keeps human reviewers
- * happy.
*/
- if (mreplace == NULL || bio == NULL ||
- test_bit(Faulty, &mreplace->flags))
+ if (!need_replace)
break;
bio->bi_next = biolist;
biolist = bio;
spin_unlock_irq(&conf->device_lock);
if (mddev->delta_disks && mddev->bitmap) {
- ret = md_bitmap_resize(mddev->bitmap,
- raid10_size(mddev, 0, conf->geo.raid_disks),
- 0, 0);
+ struct mdp_superblock_1 *sb = NULL;
+ sector_t oldsize, newsize;
+
+ oldsize = raid10_size(mddev, 0, 0);
+ newsize = raid10_size(mddev, 0, conf->geo.raid_disks);
+
+ if (!mddev_is_clustered(mddev)) {
+ ret = md_bitmap_resize(mddev->bitmap, newsize, 0, 0);
+ if (ret)
+ goto abort;
+ else
+ goto out;
+ }
+
+ rdev_for_each(rdev, mddev) {
+ if (rdev->raid_disk > -1 &&
+ !test_bit(Faulty, &rdev->flags))
+ sb = page_address(rdev->sb_page);
+ }
+
+ /*
+ * some node is already performing reshape, and no need to
+ * call md_bitmap_resize again since it should be called when
+ * receiving BITMAP_RESIZE msg
+ */
+ if ((sb && (le32_to_cpu(sb->feature_map) &
+ MD_FEATURE_RESHAPE_ACTIVE)) || (oldsize == newsize))
+ goto out;
+
+ ret = md_bitmap_resize(mddev->bitmap, newsize, 0, 0);
if (ret)
goto abort;
+
+ ret = md_cluster_ops->resize_bitmaps(mddev, newsize, oldsize);
+ if (ret) {
+ md_bitmap_resize(mddev->bitmap, oldsize, 0, 0);
+ goto abort;
+ }
}
+out:
if (mddev->delta_disks > 0) {
rdev_for_each(rdev, mddev)
if (rdev->raid_disk < 0 &&
r10_bio->master_bio = read_bio;
r10_bio->read_slot = r10_bio->devs[r10_bio->read_slot].devnum;
+ /*
+ * Broadcast RESYNC message to other nodes, so all nodes would not
+ * write to the region to avoid conflict.
+ */
+ if (mddev_is_clustered(mddev) && conf->cluster_sync_high <= sector_nr) {
+ struct mdp_superblock_1 *sb = NULL;
+ int sb_reshape_pos = 0;
+
+ conf->cluster_sync_low = sector_nr;
+ conf->cluster_sync_high = sector_nr + CLUSTER_RESYNC_WINDOW_SECTORS;
+ sb = page_address(rdev->sb_page);
+ if (sb) {
+ sb_reshape_pos = le64_to_cpu(sb->reshape_position);
+ /*
+ * Set cluster_sync_low again if next address for array
+ * reshape is less than cluster_sync_low. Since we can't
+ * update cluster_sync_low until it has finished reshape.
+ */
+ if (sb_reshape_pos < conf->cluster_sync_low)
+ conf->cluster_sync_low = sb_reshape_pos;
+ }
+
+ md_cluster_ops->resync_info_update(mddev, conf->cluster_sync_low,
+ conf->cluster_sync_high);
+ }
+
/* Now find the locations in the new layout */
__raid10_find_phys(&conf->geo, r10_bio);
conf->fullsync = 0;
}
+static void raid10_update_reshape_pos(struct mddev *mddev)
+{
+ struct r10conf *conf = mddev->private;
+ sector_t lo, hi;
+
+ md_cluster_ops->resync_info_get(mddev, &lo, &hi);
+ if (((mddev->reshape_position <= hi) && (mddev->reshape_position >= lo))
+ || mddev->reshape_position == MaxSector)
+ conf->reshape_progress = mddev->reshape_position;
+ else
+ WARN_ON_ONCE(1);
+}
+
static int handle_reshape_read_error(struct mddev *mddev,
struct r10bio *r10_bio)
{
.check_reshape = raid10_check_reshape,
.start_reshape = raid10_start_reshape,
.finish_reshape = raid10_finish_reshape,
+ .update_reshape_pos = raid10_update_reshape_pos,
.congested = raid10_congested,
};
set_bit(MD_HAS_JOURNAL, &conf->mddev->flags);
return 0;
- rcu_assign_pointer(conf->log, NULL);
- md_unregister_thread(&log->reclaim_thread);
reclaim_thread:
mempool_exit(&log->meta_pool);
out_mempool:
pr_debug("raid456: error called\n");
spin_lock_irqsave(&conf->device_lock, flags);
+
+ if (test_bit(In_sync, &rdev->flags) &&
+ mddev->degraded == conf->max_degraded) {
+ /*
+ * Don't allow to achieve failed state
+ * Don't try to recover this device
+ */
+ conf->recovery_disabled = mddev->recovery_disabled;
+ spin_unlock_irqrestore(&conf->device_lock, flags);
+ return;
+ }
+
set_bit(Faulty, &rdev->flags);
clear_bit(In_sync, &rdev->flags);
mddev->degraded = raid5_calc_degraded(conf);
u32 val;
int rc, templ_major, templ_minor, len;
- pci_write_config_word(dev, fn->dvsec_afu_info_pos, afu_idx);
+ pci_write_config_byte(dev,
+ fn->dvsec_afu_info_pos + OCXL_DVSEC_AFU_INFO_AFU_IDX,
+ afu_idx);
rc = read_afu_info(dev, fn, OCXL_DVSEC_TEMPL_VERSION, &val);
if (rc)
return rc;
#include <linux/tcp.h>
#include <linux/if_vlan.h>
#include <linux/interrupt.h>
-#include <net/busy_poll.h>
#include <linux/clk.h>
#include <linux/if_ether.h>
#include <linux/net_tstamp.h>
#include <net/tcp.h>
#include <net/ipv6.h>
#include <net/ip6_checksum.h>
-#include <net/busy_poll.h>
#include <linux/prefetch.h>
#include "bnx2x_cmn.h"
#include "bnx2x_init.h"
if (!compat)
return NULL;
- priv->mdio_dn = of_find_compatible_node(dn, NULL, compat);
+ priv->mdio_dn = of_get_compatible_child(dn, compat);
kfree(compat);
if (!priv->mdio_dn) {
dev_err(kdev, "unable to find MDIO bus node\n");
padlen = 0;
/* No room for FCS, need to reallocate skb. */
else
- padlen = ETH_FCS_LEN - tailroom;
+ padlen = ETH_FCS_LEN;
} else {
/* Add room for FCS. */
padlen += ETH_FCS_LEN;
reset_level = HNAE3_FUNC_RESET;
}
- err_sts = (le32_to_cpu(desc[0].data[4]) >> 8) & 0x3;
- if (err_sts) {
- hclge_log_error(dev, hw_err_lst3, err_sts);
- reset_level = HNAE3_FUNC_RESET;
+ if (cmd == HCLGE_PPP_CMD0_INT_CMD) {
+ err_sts = (le32_to_cpu(desc[0].data[4]) >> 8) & 0x3;
+ if (err_sts) {
+ hclge_log_error(dev, hw_err_lst3, err_sts);
+ reset_level = HNAE3_FUNC_RESET;
+ }
}
/* clear PPP INT */
/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include <linux/prefetch.h>
-#include <net/busy_poll.h>
#include <linux/bpf_trace.h>
#include <net/xdp.h>
#include "i40e.h"
/* Copyright(c) 2013 - 2018 Intel Corporation. */
#include <linux/prefetch.h>
-#include <net/busy_poll.h>
#include "iavf.h"
#include "iavf_trace.h"
return 0;
}
+/**
+ * ice_dev_onetime_setup - Temporary HW/FW workarounds
+ * @hw: pointer to the HW structure
+ *
+ * This function provides temporary workarounds for certain issues
+ * that are expected to be fixed in the HW/FW.
+ */
+void ice_dev_onetime_setup(struct ice_hw *hw)
+{
+ /* configure Rx - set non pxe mode */
+ wr32(hw, GLLAN_RCTL_0, 0x1);
+
+#define MBX_PF_VT_PFALLOC 0x00231E80
+ /* set VFs per PF */
+ wr32(hw, MBX_PF_VT_PFALLOC, rd32(hw, PF_VT_PFALLOC_HIF));
+}
+
/**
* ice_clear_pf_cfg - Clear PF configuration
* @hw: pointer to the hardware structure
*
* Get Link Status (0x607). Returns the link status of the adapter.
*/
-enum ice_status
+static enum ice_status
ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
struct ice_link_status *link, struct ice_sq_cd *cd)
{
if (status)
goto err_unroll_sched;
+ ice_dev_onetime_setup(hw);
+
/* Get MAC information */
/* A single port can report up to two (LAN and WoL) addresses */
mac_buf = devm_kcalloc(ice_hw_to_dev(hw), 2,
if (!status)
ice_parse_caps(hw, buf, le32_to_cpu(cmd->count), opc);
else if (hw->adminq.sq_last_status == ICE_AQ_RC_ENOMEM)
- *cap_count =
- DIV_ROUND_UP(le16_to_cpu(desc.datalen),
- sizeof(struct ice_aqc_list_caps_elem));
+ *cap_count = le32_to_cpu(cmd->count);
return status;
}
return ice_aq_send_cmd(pi->hw, &desc, NULL, 0, cd);
}
-/**
- * ice_aq_set_event_mask
- * @hw: pointer to the hw struct
- * @port_num: port number of the physical function
- * @mask: event mask to be set
- * @cd: pointer to command details structure or NULL
- *
- * Set event mask (0x0613)
- */
-enum ice_status
-ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask,
- struct ice_sq_cd *cd)
-{
- struct ice_aqc_set_event_mask *cmd;
- struct ice_aq_desc desc;
-
- cmd = &desc.params.set_event_mask;
-
- ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_event_mask);
-
- cmd->lport_num = port_num;
-
- cmd->event_mask = cpu_to_le16(mask);
-
- return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
-}
-
/**
* __ice_aq_get_set_rss_lut
* @hw: pointer to the hardware structure
struct ice_sq_cd *cd);
void ice_clear_pxe_mode(struct ice_hw *hw);
enum ice_status ice_get_caps(struct ice_hw *hw);
+
+void ice_dev_onetime_setup(struct ice_hw *hw);
+
enum ice_status
ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
u32 rxq_index);
ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link,
struct ice_sq_cd *cd);
enum ice_status
-ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
- struct ice_link_status *link, struct ice_sq_cd *cd);
-enum ice_status
-ice_aq_set_event_mask(struct ice_hw *hw, u8 port_num, u16 mask,
- struct ice_sq_cd *cd);
-enum ice_status
ice_dis_vsi_txq(struct ice_port_info *pi, u8 num_queues, u16 *q_ids,
u32 *q_teids, enum ice_disq_rst_src rst_src, u16 vmvf_num,
struct ice_sq_cd *cmd_details);
/* Defines that help manage the driver vs FW API checks.
* Take a look at ice_aq_ver_check in ice_controlq.c for actual usage.
- *
*/
#define EXP_FW_API_VER_BRANCH 0x00
-#define EXP_FW_API_VER_MAJOR 0x00
-#define EXP_FW_API_VER_MINOR 0x01
+#define EXP_FW_API_VER_MAJOR 0x01
+#define EXP_FW_API_VER_MINOR 0x03
/* Different control queue types: These are mainly for SW consumption. */
enum ice_ctl_q {
/* Device IDs */
/* Intel(R) Ethernet Controller E810-C for backplane */
-#define ICE_DEV_ID_C810_BACKPLANE 0x1591
+#define ICE_DEV_ID_E810C_BACKPLANE 0x1591
/* Intel(R) Ethernet Controller E810-C for QSFP */
-#define ICE_DEV_ID_C810_QSFP 0x1592
+#define ICE_DEV_ID_E810C_QSFP 0x1592
/* Intel(R) Ethernet Controller E810-C for SFP */
-#define ICE_DEV_ID_C810_SFP 0x1593
+#define ICE_DEV_ID_E810C_SFP 0x1593
#endif /* _ICE_DEVIDS_H_ */
#define VPINT_ALLOC_LAST_S 12
#define VPINT_ALLOC_LAST_M ICE_M(0x7FF, 12)
#define VPINT_ALLOC_VALID_M BIT(31)
+#define VPINT_ALLOC_PCI(_VF) (0x0009D000 + ((_VF) * 4))
+#define VPINT_ALLOC_PCI_FIRST_S 0
+#define VPINT_ALLOC_PCI_FIRST_M ICE_M(0x7FF, 0)
+#define VPINT_ALLOC_PCI_LAST_S 12
+#define VPINT_ALLOC_PCI_LAST_M ICE_M(0x7FF, 12)
+#define VPINT_ALLOC_PCI_VALID_M BIT(31)
+#define GLLAN_RCTL_0 0x002941F8
#define QRX_CONTEXT(_i, _QRX) (0x00280000 + ((_i) * 8192 + (_QRX) * 4))
#define QRX_CTRL(_QRX) (0x00120000 + ((_QRX) * 4))
#define QRX_CTRL_MAX_INDEX 2047
#define GLV_UPRCL(_i) (0x003B2000 + ((_i) * 8))
#define GLV_UPTCH(_i) (0x0030A004 + ((_i) * 8))
#define GLV_UPTCL(_i) (0x0030A000 + ((_i) * 8))
+#define PF_VT_PFALLOC_HIF 0x0009DD80
#define VSIQF_HKEY_MAX_INDEX 12
#define VSIQF_HLUT_MAX_INDEX 15
#define VFINT_DYN_CTLN(_i) (0x00003800 + ((_i) * 4))
* @irq: interrupt number
* @data: pointer to a q_vector
*/
-irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data)
+static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data)
{
struct ice_q_vector *q_vector = (struct ice_q_vector *)data;
vsi->hw_base_vector = 0;
ice_vsi_clear_rings(vsi);
ice_vsi_free_arrays(vsi, false);
+ ice_dev_onetime_setup(&vsi->back->hw);
ice_vsi_set_num_qs(vsi);
/* Initialize VSI struct elements and create VSI in FW */
int ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena);
-irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data);
#endif /* !_ICE_LIB_H_ */
}
}
-/**
- * ice_watchdog_subtask - periodic tasks not using event driven scheduling
- * @pf: board private structure
- */
-static void ice_watchdog_subtask(struct ice_pf *pf)
-{
- int i;
-
- /* if interface is down do nothing */
- if (test_bit(__ICE_DOWN, pf->state) ||
- test_bit(__ICE_CFG_BUSY, pf->state))
- return;
-
- /* make sure we don't do these things too often */
- if (time_before(jiffies,
- pf->serv_tmr_prev + pf->serv_tmr_period))
- return;
-
- pf->serv_tmr_prev = jiffies;
-
- /* Update the stats for active netdevs so the network stack
- * can look at updated numbers whenever it cares to
- */
- ice_update_pf_stats(pf);
- for (i = 0; i < pf->num_alloc_vsi; i++)
- if (pf->vsi[i] && pf->vsi[i]->netdev)
- ice_update_vsi_stats(pf->vsi[i]);
-}
-
/**
* ice_print_link_msg - print link up or down message
* @vsi: the VSI whose link status is being queried
speed, fc);
}
-/**
- * ice_init_link_events - enable/initialize link events
- * @pi: pointer to the port_info instance
- *
- * Returns -EIO on failure, 0 on success
- */
-static int ice_init_link_events(struct ice_port_info *pi)
-{
- u16 mask;
-
- mask = ~((u16)(ICE_AQ_LINK_EVENT_UPDOWN | ICE_AQ_LINK_EVENT_MEDIA_NA |
- ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL));
-
- if (ice_aq_set_event_mask(pi->hw, pi->lport, mask, NULL)) {
- dev_dbg(ice_hw_to_dev(pi->hw),
- "Failed to set link event mask for port %d\n",
- pi->lport);
- return -EIO;
- }
-
- if (ice_aq_get_link_info(pi, true, NULL, NULL)) {
- dev_dbg(ice_hw_to_dev(pi->hw),
- "Failed to enable link events for port %d\n",
- pi->lport);
- return -EIO;
- }
-
- return 0;
-}
-
/**
* ice_vsi_link_event - update the vsi's netdev
* @vsi: the vsi on which the link event occurred
}
/**
- * ice_handle_link_event - handle link event via ARQ
- * @pf: pf that the link event is associated with
- *
- * Return -EINVAL if port_info is null
- * Return status on succes
+ * ice_watchdog_subtask - periodic tasks not using event driven scheduling
+ * @pf: board private structure
*/
-static int ice_handle_link_event(struct ice_pf *pf)
+static void ice_watchdog_subtask(struct ice_pf *pf)
{
- struct ice_port_info *port_info;
- int status;
+ int i;
- port_info = pf->hw.port_info;
- if (!port_info)
- return -EINVAL;
+ /* if interface is down do nothing */
+ if (test_bit(__ICE_DOWN, pf->state) ||
+ test_bit(__ICE_CFG_BUSY, pf->state))
+ return;
- status = ice_link_event(pf, port_info);
- if (status)
- dev_dbg(&pf->pdev->dev,
- "Could not process link event, error %d\n", status);
+ /* make sure we don't do these things too often */
+ if (time_before(jiffies,
+ pf->serv_tmr_prev + pf->serv_tmr_period))
+ return;
- return status;
+ pf->serv_tmr_prev = jiffies;
+
+ if (ice_link_event(pf, pf->hw.port_info))
+ dev_dbg(&pf->pdev->dev, "ice_link_event failed\n");
+
+ /* Update the stats for active netdevs so the network stack
+ * can look at updated numbers whenever it cares to
+ */
+ ice_update_pf_stats(pf);
+ for (i = 0; i < pf->num_alloc_vsi; i++)
+ if (pf->vsi[i] && pf->vsi[i]->netdev)
+ ice_update_vsi_stats(pf->vsi[i]);
}
/**
opcode = le16_to_cpu(event.desc.opcode);
switch (opcode) {
- case ice_aqc_opc_get_link_status:
- if (ice_handle_link_event(pf))
- dev_err(&pf->pdev->dev,
- "Could not handle link event\n");
- break;
case ice_mbx_opc_send_msg_to_pf:
ice_vc_process_vf_msg(pf, &event);
break;
/* since everything is good, start the service timer */
mod_timer(&pf->serv_tmr, round_jiffies(jiffies + pf->serv_tmr_period));
- err = ice_init_link_events(pf->hw.port_info);
- if (err) {
- dev_err(&pdev->dev, "ice_init_link_events failed: %d\n", err);
- goto err_alloc_sw_unroll;
- }
-
return 0;
err_alloc_sw_unroll:
* Class, Class Mask, private data (not used) }
*/
static const struct pci_device_id ice_pci_tbl[] = {
- { PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_BACKPLANE), 0 },
- { PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_QSFP), 0 },
- { PCI_VDEVICE(INTEL, ICE_DEV_ID_C810_SFP), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_BACKPLANE), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_QSFP), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E810C_SFP), 0 },
/* required last entry */
{ 0, }
};
vsi = pf->vsi[vf->lan_vsi_idx];
wr32(hw, VPINT_ALLOC(vf->vf_id), 0);
+ wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), 0);
first = vf->first_vector_idx;
last = first + pf->num_vf_msix - 1;
VPINT_ALLOC_VALID_M);
wr32(hw, VPINT_ALLOC(vf->vf_id), reg);
+ reg = (((first << VPINT_ALLOC_PCI_FIRST_S) & VPINT_ALLOC_PCI_FIRST_M) |
+ ((last << VPINT_ALLOC_PCI_LAST_S) & VPINT_ALLOC_PCI_LAST_M) |
+ VPINT_ALLOC_PCI_VALID_M);
+ wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), reg);
/* map the interrupts to its functions */
for (v = first; v <= last; v++) {
reg = (((abs_vf_id << GLINT_VECT2FUNC_VF_NUM_S) &
wr32(hw, GLINT_VECT2FUNC(v), reg);
}
+ /* set regardless of mapping mode */
+ wr32(hw, VPLAN_TXQ_MAPENA(vf->vf_id), VPLAN_TXQ_MAPENA_TX_ENA_M);
+
/* VF Tx queues allocation */
if (vsi->tx_mapping_mode == ICE_VSI_MAP_CONTIG) {
- wr32(hw, VPLAN_TXQ_MAPENA(vf->vf_id),
- VPLAN_TXQ_MAPENA_TX_ENA_M);
/* set the VF PF Tx queue range
* VFNUMQ value should be set to (number of queues - 1). A value
* of 0 means 1 queue and a value of 255 means 256 queues
"Scattered mode for VF Tx queues is not yet implemented\n");
}
+ /* set regardless of mapping mode */
+ wr32(hw, VPLAN_RXQ_MAPENA(vf->vf_id), VPLAN_RXQ_MAPENA_RX_ENA_M);
+
/* VF Rx queues allocation */
if (vsi->rx_mapping_mode == ICE_VSI_MAP_CONTIG) {
- wr32(hw, VPLAN_RXQ_MAPENA(vf->vf_id),
- VPLAN_RXQ_MAPENA_RX_ENA_M);
/* set the VF PF Rx queue range
* VFNUMQ value should be set to (number of queues - 1). A value
* of 0 means 1 queue and a value of 255 means 256 queues
#include "ixgbe_ipsec.h"
#include <net/xdp.h>
-#include <net/busy_poll.h>
/* common prefix used by pr_<> macros */
#undef pr_fmt
sizeof(struct nix_cq_ctx_s));
else if (req->ctype == NIX_AQ_CTYPE_RSS)
memcpy(&rsp->rss, ctx,
- sizeof(struct nix_cq_ctx_s));
+ sizeof(struct nix_rsse_s));
else if (req->ctype == NIX_AQ_CTYPE_MCE)
memcpy(&rsp->mce, ctx,
sizeof(struct nix_rx_mce_s));
return 0;
/* Add a new one to the list, at the tail */
- mce = kzalloc(sizeof(*mce), GFP_KERNEL);
+ mce = kzalloc(sizeof(*mce), GFP_ATOMIC);
if (!mce)
return -ENOMEM;
mce->idx = idx;
#include <linux/slab.h>
#include <linux/hash.h>
#include <net/ip.h>
-#include <net/busy_poll.h>
#include <net/vxlan.h>
#include <net/devlink.h>
*
*/
-#include <net/busy_poll.h>
#include <linux/bpf.h>
#include <linux/bpf_trace.h>
#include <linux/mlx4/cq.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
#include <linux/tcp.h>
-#include <net/busy_poll.h>
#include <net/ip6_checksum.h>
#include <net/page_pool.h>
#include <net/inet_ecn.h>
config NET_VENDOR_MICROCHIP
bool "Microchip devices"
default y
- depends on SPI
---help---
If you have a network (Ethernet) card belonging to this class, say Y.
#include <net/tcp.h>
#include <asm/byteorder.h>
#include <asm/processor.h>
-#include <net/busy_poll.h>
#include "myri10ge_mcp.h"
#include "myri10ge_mcp_gen_header.h"
static bool rtl_wol_pll_power_down(struct rtl8169_private *tp)
{
- if (!netif_running(tp->dev) || !__rtl8169_get_wol(tp))
+ struct phy_device *phydev;
+
+ if (!__rtl8169_get_wol(tp))
return false;
- phy_speed_down(tp->dev->phydev, false);
+ /* phydev may not be attached to netdevice */
+ phydev = mdiobus_get_phy(tp->mii_bus, 0);
+
+ phy_speed_down(phydev, false);
rtl_wol_suspend_quirk(tp);
return true;
return -ENODEV;
}
- mdio_internal = of_find_compatible_node(mdio_mux, NULL,
+ mdio_internal = of_get_compatible_child(mdio_mux,
"allwinner,sun8i-h3-mdio-internal");
+ of_node_put(mdio_mux);
if (!mdio_internal) {
dev_err(priv->device, "Cannot get internal_mdio node\n");
return -ENODEV;
gmac->rst_ephy = of_reset_control_get_exclusive(iphynode, NULL);
if (IS_ERR(gmac->rst_ephy)) {
ret = PTR_ERR(gmac->rst_ephy);
- if (ret == -EPROBE_DEFER)
+ if (ret == -EPROBE_DEFER) {
+ of_node_put(iphynode);
+ of_node_put(mdio_internal);
return ret;
+ }
continue;
}
dev_info(priv->device, "Found internal PHY node\n");
+ of_node_put(iphynode);
+ of_node_put(mdio_internal);
return 0;
}
+
+ of_node_put(mdio_internal);
return -ENODEV;
}
.name = "Generic 10G PHY",
.soft_reset = gen10g_no_soft_reset,
.config_init = gen10g_config_init,
- .features = 0,
+ .features = PHY_10GBIT_FEATURES,
.config_aneg = gen10g_config_aneg,
.read_status = gen10g_read_status,
.suspend = gen10g_suspend,
struct device_node *matched_node;
int ret;
- matched_node = of_find_compatible_node(node, NULL, "marvell,nfc-uart");
+ matched_node = of_get_compatible_child(node, "marvell,nfc-uart");
if (!matched_node) {
- matched_node = of_find_compatible_node(node, NULL,
- "mrvl,nfc-uart");
+ matched_node = of_get_compatible_child(node, "mrvl,nfc-uart");
if (!matched_node)
return -ENODEV;
}
return (strlen(name) == len) && (strncmp(node_name, name, len) == 0);
}
+EXPORT_SYMBOL(of_node_name_eq);
bool of_node_name_prefix(const struct device_node *np, const char *prefix)
{
return strncmp(kbasename(np->full_name), prefix, strlen(prefix)) == 0;
}
+EXPORT_SYMBOL(of_node_name_prefix);
int of_n_addr_cells(struct device_node *np)
{
ac = of_n_addr_cells(cpun);
cell = of_get_property(cpun, prop_name, &prop_len);
+ if (!cell && !ac && arch_match_cpu_phys_id(cpu, 0))
+ return true;
if (!cell || !ac)
return false;
prop_len /= sizeof(*cell) * ac;
{
struct device_node *cpun;
- for_each_node_by_type(cpun, "cpu") {
+ for_each_of_cpu_node(cpun) {
if (arch_find_n_match_cpu_physical_id(cpun, cpu, thread))
return cpun;
}
}
EXPORT_SYMBOL(of_get_next_available_child);
+/**
+ * of_get_next_cpu_node - Iterate on cpu nodes
+ * @prev: previous child of the /cpus node, or NULL to get first
+ *
+ * Returns a cpu node pointer with refcount incremented, use of_node_put()
+ * on it when done. Returns NULL when prev is the last child. Decrements
+ * the refcount of prev.
+ */
+struct device_node *of_get_next_cpu_node(struct device_node *prev)
+{
+ struct device_node *next = NULL;
+ unsigned long flags;
+ struct device_node *node;
+
+ if (!prev)
+ node = of_find_node_by_path("/cpus");
+
+ raw_spin_lock_irqsave(&devtree_lock, flags);
+ if (prev)
+ next = prev->sibling;
+ else if (node) {
+ next = node->child;
+ of_node_put(node);
+ }
+ for (; next; next = next->sibling) {
+ if (!(of_node_name_eq(next, "cpu") ||
+ (next->type && !of_node_cmp(next->type, "cpu"))))
+ continue;
+ if (!__of_device_is_available(next))
+ continue;
+ if (of_node_get(next))
+ break;
+ }
+ of_node_put(prev);
+ raw_spin_unlock_irqrestore(&devtree_lock, flags);
+ return next;
+}
+EXPORT_SYMBOL(of_get_next_cpu_node);
+
/**
* of_get_compatible_child - Find compatible child node
* @parent: parent node
/* OF on pmac has nodes instead of properties named "l2-cache"
* beneath CPU nodes.
*/
- if (!strcmp(np->type, "cpu"))
+ if (IS_ENABLED(CONFIG_PPC_PMAC) && !strcmp(np->type, "cpu"))
for_each_child_of_node(np, child)
if (!strcmp(child->type, "cache"))
return child;
return cache_level;
}
+
+/**
+ * of_map_rid - Translate a requester ID through a downstream mapping.
+ * @np: root complex device node.
+ * @rid: device requester ID to map.
+ * @map_name: property name of the map to use.
+ * @map_mask_name: optional property name of the mask to use.
+ * @target: optional pointer to a target device node.
+ * @id_out: optional pointer to receive the translated ID.
+ *
+ * Given a device requester ID, look up the appropriate implementation-defined
+ * platform ID and/or the target device which receives transactions on that
+ * ID, as per the "iommu-map" and "msi-map" bindings. Either of @target or
+ * @id_out may be NULL if only the other is required. If @target points to
+ * a non-NULL device node pointer, only entries targeting that node will be
+ * matched; if it points to a NULL value, it will receive the device node of
+ * the first matching target phandle, with a reference held.
+ *
+ * Return: 0 on success or a standard error code on failure.
+ */
+int of_map_rid(struct device_node *np, u32 rid,
+ const char *map_name, const char *map_mask_name,
+ struct device_node **target, u32 *id_out)
+{
+ u32 map_mask, masked_rid;
+ int map_len;
+ const __be32 *map = NULL;
+
+ if (!np || !map_name || (!target && !id_out))
+ return -EINVAL;
+
+ map = of_get_property(np, map_name, &map_len);
+ if (!map) {
+ if (target)
+ return -ENODEV;
+ /* Otherwise, no map implies no translation */
+ *id_out = rid;
+ return 0;
+ }
+
+ if (!map_len || map_len % (4 * sizeof(*map))) {
+ pr_err("%pOF: Error: Bad %s length: %d\n", np,
+ map_name, map_len);
+ return -EINVAL;
+ }
+
+ /* The default is to select all bits. */
+ map_mask = 0xffffffff;
+
+ /*
+ * Can be overridden by "{iommu,msi}-map-mask" property.
+ * If of_property_read_u32() fails, the default is used.
+ */
+ if (map_mask_name)
+ of_property_read_u32(np, map_mask_name, &map_mask);
+
+ masked_rid = map_mask & rid;
+ for ( ; map_len > 0; map_len -= 4 * sizeof(*map), map += 4) {
+ struct device_node *phandle_node;
+ u32 rid_base = be32_to_cpup(map + 0);
+ u32 phandle = be32_to_cpup(map + 1);
+ u32 out_base = be32_to_cpup(map + 2);
+ u32 rid_len = be32_to_cpup(map + 3);
+
+ if (rid_base & ~map_mask) {
+ pr_err("%pOF: Invalid %s translation - %s-mask (0x%x) ignores rid-base (0x%x)\n",
+ np, map_name, map_name,
+ map_mask, rid_base);
+ return -EFAULT;
+ }
+
+ if (masked_rid < rid_base || masked_rid >= rid_base + rid_len)
+ continue;
+
+ phandle_node = of_find_node_by_phandle(phandle);
+ if (!phandle_node)
+ return -ENODEV;
+
+ if (target) {
+ if (*target)
+ of_node_put(phandle_node);
+ else
+ *target = phandle_node;
+
+ if (*target != phandle_node)
+ continue;
+ }
+
+ if (id_out)
+ *id_out = masked_rid - rid_base + out_base;
+
+ pr_debug("%pOF: %s, using mask %08x, rid-base: %08x, out-base: %08x, length: %08x, rid: %08x -> %08x\n",
+ np, map_name, map_mask, rid_base, out_base,
+ rid_len, rid, masked_rid - rid_base + out_base);
+ return 0;
+ }
+
+ pr_err("%pOF: Invalid %s translation - no match for rid 0x%x on %pOF\n",
+ np, map_name, rid, target && *target ? *target : NULL);
+ return -EFAULT;
+}
+EXPORT_SYMBOL_GPL(of_map_rid);
return -ENODEV;
/* Name & Type */
- csize = snprintf(str, len, "of:N%sT%s", dev->of_node->name,
+ /* %p eats all alphanum characters, so %c must be used here */
+ csize = snprintf(str, len, "of:N%pOFn%c%s", dev->of_node, 'T',
dev->of_node->type);
tsize = csize;
len -= csize;
if ((!dev) || (!dev->of_node))
return;
- add_uevent_var(env, "OF_NAME=%s", dev->of_node->name);
+ add_uevent_var(env, "OF_NAME=%pOFn", dev->of_node);
add_uevent_var(env, "OF_FULLNAME=%pOF", dev->of_node);
if (dev->of_node->type && strcmp("<NULL>", dev->of_node->type) != 0)
add_uevent_var(env, "OF_TYPE=%s", dev->of_node->type);
#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_irq.h>
-#include <linux/of_pci.h>
#include <linux/string.h>
#include <linux/slab.h>
* "msi-map" property.
*/
for (parent_dev = dev; parent_dev; parent_dev = parent_dev->parent)
- if (!of_pci_map_rid(parent_dev->of_node, rid_in, "msi-map",
- "msi-map-mask", np, &rid_out))
+ if (!of_map_rid(parent_dev->of_node, rid_in, "msi-map",
+ "msi-map-mask", np, &rid_out))
break;
return rid_out;
}
return rc;
}
- dev_dbg(&mdio->dev, "registered phy %s at address %i\n",
- child->name, addr);
+ dev_dbg(&mdio->dev, "registered phy %pOFn at address %i\n",
+ child, addr);
return 0;
}
return rc;
}
- dev_dbg(&mdio->dev, "registered mdio device %s at address %i\n",
- child->name, addr);
+ dev_dbg(&mdio->dev, "registered mdio device %pOFn at address %i\n",
+ child, addr);
return 0;
}
continue;
/* be noisy to encourage people to set reg property */
- dev_info(&mdio->dev, "scan phy %s at address %i\n",
- child->name, addr);
+ dev_info(&mdio->dev, "scan phy %pOFn at address %i\n",
+ child, addr);
if (of_mdiobus_child_is_phy(child)) {
rc = of_mdiobus_register_phy(mdio, child, addr);
{
u32 nid;
int r;
- struct device_node *cpus;
- struct device_node *np = NULL;
-
- cpus = of_find_node_by_path("/cpus");
- if (!cpus)
- return;
-
- for_each_child_of_node(cpus, np) {
- /* Skip things that are not CPUs */
- if (of_node_cmp(np->type, "cpu") != 0)
- continue;
+ struct device_node *np;
+ for_each_of_cpu_node(np) {
r = of_property_read_u32(np, "numa-node-id", &nid);
if (r)
continue;
else
node_set(nid, numa_nodes_parsed);
}
-
- of_node_put(cpus);
}
static int __init of_numa_parse_memory_nodes(void)
np = of_get_next_parent(np);
}
if (np && r)
- pr_warn("Invalid \"numa-node-id\" property in node %s\n",
- np->name);
+ pr_warn("Invalid \"numa-node-id\" property in node %pOFn\n",
+ np);
of_node_put(np);
/*
char stem[0];
};
+#if defined(CONFIG_SPARC)
+#define OF_ROOT_NODE_ADDR_CELLS_DEFAULT 2
+#else
+#define OF_ROOT_NODE_ADDR_CELLS_DEFAULT 1
+#endif
+
+#define OF_ROOT_NODE_SIZE_CELLS_DEFAULT 1
+
extern struct mutex of_mutex;
extern struct list_head aliases_lookup;
extern struct kset *of_kset;
for_each_child_of_node(overlay_node, child) {
ret = add_changeset_node(ovcs, target_node, child);
if (ret) {
- pr_debug("Failed to apply node @%pOF/%s, err=%d\n",
- target_node, child->name, ret);
+ pr_debug("Failed to apply node @%pOF/%pOFn, err=%d\n",
+ target_node, child, ret);
of_node_put(child);
return ret;
}
*/
reg = of_get_property(node, "reg", NULL);
if (reg && (addr = of_translate_address(node, reg)) != OF_BAD_ADDR) {
- dev_set_name(dev, dev_name(dev) ? "%llx.%s:%s" : "%llx.%s",
- (unsigned long long)addr, node->name,
+ dev_set_name(dev, dev_name(dev) ? "%llx.%pOFn:%s" : "%llx.%pOFn",
+ (unsigned long long)addr, node,
dev_name(dev));
return;
}
WARN_ON(rc);
}
if (of_irq_to_resource_table(np, res, num_irq) != num_irq)
- pr_debug("not all legacy IRQ resources mapped for %s\n",
- np->name);
+ pr_debug("not all legacy IRQ resources mapped for %pOFn\n",
+ np);
}
dev->dev.of_node = of_node_get(np);
#size-cells = <0>;
reg = <0>;
- test-mux-dev {
- reg = <32>;
+ test-mux-dev@20 {
+ reg = <0x20>;
compatible = "unittest-i2c-dev";
status = "okay";
};
#size-cells = <0>;
reg = <0>;
- test-mux-dev {
- reg = <32>;
+ test-mux-dev@20 {
+ reg = <0x20>;
compatible = "unittest-i2c-dev";
status = "okay";
};
for_each_child_of_node(np, child) {
if (child->parent != np) {
- pr_err("Child node %s links to wrong parent %s\n",
- child->name, np->name);
+ pr_err("Child node %pOFn links to wrong parent %pOFn\n",
+ child, np);
rc = -EINVAL;
goto put_child;
}
of_unittest_printf_one(np, "%pOF", full_name);
of_unittest_printf_one(np, "%pOFf", full_name);
+ of_unittest_printf_one(np, "%pOFn", "dev");
+ of_unittest_printf_one(np, "%2pOFn", "dev");
+ of_unittest_printf_one(np, "%5pOFn", " dev");
+ of_unittest_printf_one(np, "%pOFnc", "dev:test-sub-device");
of_unittest_printf_one(np, "%pOFp", phandle_str);
of_unittest_printf_one(np, "%pOFP", "dev@100");
of_unittest_printf_one(np, "ABC %pOFP ABC", "ABC dev@100 ABC");
for_each_child_of_node(np, child) {
for_each_child_of_node(child, grandchild)
unittest(of_find_device_by_node(grandchild),
- "Could not create device for node '%s'\n",
- grandchild->name);
+ "Could not create device for node '%pOFn'\n",
+ grandchild);
}
of_platform_depopulate(&test_bus->dev);
for_each_child_of_node(np, child) {
for_each_child_of_node(child, grandchild)
unittest(!of_find_device_by_node(grandchild),
- "device didn't get destroyed '%s'\n",
- grandchild->name);
+ "device didn't get destroyed '%pOFn'\n",
+ grandchild);
}
platform_device_unregister(test_bus);
}
}
- for (np = overlay_base_root->child; np; np = np->sibling) {
- if (of_get_child_by_name(of_root, np->name)) {
- unittest(0, "illegal node name in overlay_base %s",
- np->name);
- return;
+ for_each_child_of_node(overlay_base_root, np) {
+ struct device_node *base_child;
+ for_each_child_of_node(of_root, base_child) {
+ if (!strcmp(np->full_name, base_child->full_name)) {
+ unittest(0, "illegal node name in overlay_base %pOFn",
+ np);
+ return;
+ }
}
}
pe = edev ? edev->pe : NULL;
if (pe) {
eeh_serialize_lock(&flags);
- eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
+ eeh_pe_mark_isolated(pe);
eeh_serialize_unlock(flags);
eeh_pe_set_option(pe, EEH_OPT_FREEZE_PE);
}
EXPORT_SYMBOL_GPL(devm_of_pci_get_host_bridge_resources);
#endif /* CONFIG_OF_ADDRESS */
-/**
- * of_pci_map_rid - Translate a requester ID through a downstream mapping.
- * @np: root complex device node.
- * @rid: PCI requester ID to map.
- * @map_name: property name of the map to use.
- * @map_mask_name: optional property name of the mask to use.
- * @target: optional pointer to a target device node.
- * @id_out: optional pointer to receive the translated ID.
- *
- * Given a PCI requester ID, look up the appropriate implementation-defined
- * platform ID and/or the target device which receives transactions on that
- * ID, as per the "iommu-map" and "msi-map" bindings. Either of @target or
- * @id_out may be NULL if only the other is required. If @target points to
- * a non-NULL device node pointer, only entries targeting that node will be
- * matched; if it points to a NULL value, it will receive the device node of
- * the first matching target phandle, with a reference held.
- *
- * Return: 0 on success or a standard error code on failure.
- */
-int of_pci_map_rid(struct device_node *np, u32 rid,
- const char *map_name, const char *map_mask_name,
- struct device_node **target, u32 *id_out)
-{
- u32 map_mask, masked_rid;
- int map_len;
- const __be32 *map = NULL;
-
- if (!np || !map_name || (!target && !id_out))
- return -EINVAL;
-
- map = of_get_property(np, map_name, &map_len);
- if (!map) {
- if (target)
- return -ENODEV;
- /* Otherwise, no map implies no translation */
- *id_out = rid;
- return 0;
- }
-
- if (!map_len || map_len % (4 * sizeof(*map))) {
- pr_err("%pOF: Error: Bad %s length: %d\n", np,
- map_name, map_len);
- return -EINVAL;
- }
-
- /* The default is to select all bits. */
- map_mask = 0xffffffff;
-
- /*
- * Can be overridden by "{iommu,msi}-map-mask" property.
- * If of_property_read_u32() fails, the default is used.
- */
- if (map_mask_name)
- of_property_read_u32(np, map_mask_name, &map_mask);
-
- masked_rid = map_mask & rid;
- for ( ; map_len > 0; map_len -= 4 * sizeof(*map), map += 4) {
- struct device_node *phandle_node;
- u32 rid_base = be32_to_cpup(map + 0);
- u32 phandle = be32_to_cpup(map + 1);
- u32 out_base = be32_to_cpup(map + 2);
- u32 rid_len = be32_to_cpup(map + 3);
-
- if (rid_base & ~map_mask) {
- pr_err("%pOF: Invalid %s translation - %s-mask (0x%x) ignores rid-base (0x%x)\n",
- np, map_name, map_name,
- map_mask, rid_base);
- return -EFAULT;
- }
-
- if (masked_rid < rid_base || masked_rid >= rid_base + rid_len)
- continue;
-
- phandle_node = of_find_node_by_phandle(phandle);
- if (!phandle_node)
- return -ENODEV;
-
- if (target) {
- if (*target)
- of_node_put(phandle_node);
- else
- *target = phandle_node;
-
- if (*target != phandle_node)
- continue;
- }
-
- if (id_out)
- *id_out = masked_rid - rid_base + out_base;
-
- pr_debug("%pOF: %s, using mask %08x, rid-base: %08x, out-base: %08x, length: %08x, rid: %08x -> %08x\n",
- np, map_name, map_mask, rid_base, out_base,
- rid_len, rid, masked_rid - rid_base + out_base);
- return 0;
- }
-
- pr_err("%pOF: Invalid %s translation - no match for rid 0x%x on %pOF\n",
- np, map_name, rid, target && *target ? *target : NULL);
- return -EFAULT;
-}
-
#if IS_ENABLED(CONFIG_OF_IRQ)
/**
* of_irq_parse_pci - Resolve the interrupt for a PCI device
if (!cf->mem_base || !cf->io_virt || !cf->gpio_base ||
(__ioremap_at(io.start, cf->io_virt, cf->io_size,
- pgprot_val(pgprot_noncached(__pgprot(0)))) == NULL)) {
+ pgprot_noncached(PAGE_KERNEL)) == NULL)) {
dev_err(device, "can't ioremap ranges\n");
status = -ENOMEM;
goto fail1;
if (bci->dev->of_node) {
struct device_node *phynode;
- phynode = of_find_compatible_node(bci->dev->of_node->parent,
- NULL, "ti,twl4030-usb");
+ phynode = of_get_compatible_child(bci->dev->of_node->parent,
+ "ti,twl4030-usb");
if (phynode) {
bci->usb_nb.notifier_call = twl4030_bci_usb_ncb;
bci->transceiver = devm_usb_get_phy_by_node(
bci->dev, phynode, &bci->usb_nb);
+ of_node_put(phynode);
if (IS_ERR(bci->transceiver)) {
ret = PTR_ERR(bci->transceiver);
if (ret == -EPROBE_DEFER)
case REQ_OP_READ:
case REQ_OP_WRITE:
return sd_setup_read_write_cmnd(cmd);
- case REQ_OP_ZONE_REPORT:
- return sd_zbc_setup_report_cmnd(cmd);
case REQ_OP_ZONE_RESET:
return sd_zbc_setup_reset_cmnd(cmd);
default:
.check_events = sd_check_events,
.revalidate_disk = sd_revalidate_disk,
.unlock_native_capacity = sd_unlock_native_capacity,
+ .report_zones = sd_zbc_report_zones,
.pr_ops = &sd_pr_ops,
};
scsi_set_resid(SCpnt, blk_rq_bytes(req));
}
break;
- case REQ_OP_ZONE_REPORT:
- if (!result) {
- good_bytes = scsi_bufflen(SCpnt)
- - scsi_get_resid(SCpnt);
- scsi_set_resid(SCpnt, 0);
- } else {
- good_bytes = 0;
- scsi_set_resid(SCpnt, blk_rq_bytes(req));
- }
- break;
default:
/*
* In case of bogus fw or device, we could end up having
del_gendisk(sdkp->disk);
sd_shutdown(dev);
- sd_zbc_remove(sdkp);
-
free_opal_dev(sdkp->opal_dev);
blk_register_region(devt, SD_MINORS, NULL,
#ifdef CONFIG_BLK_DEV_ZONED
u32 nr_zones;
u32 zone_blocks;
- u32 zone_shift;
u32 zones_optimal_open;
u32 zones_optimal_nonseq;
u32 zones_max_open;
#ifdef CONFIG_BLK_DEV_ZONED
extern int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buffer);
-extern void sd_zbc_remove(struct scsi_disk *sdkp);
extern void sd_zbc_print_zones(struct scsi_disk *sdkp);
-extern int sd_zbc_setup_report_cmnd(struct scsi_cmnd *cmd);
extern int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd);
extern void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes,
struct scsi_sense_hdr *sshdr);
+extern int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
+ struct blk_zone *zones, unsigned int *nr_zones,
+ gfp_t gfp_mask);
#else /* CONFIG_BLK_DEV_ZONED */
return 0;
}
-static inline void sd_zbc_remove(struct scsi_disk *sdkp) {}
-
static inline void sd_zbc_print_zones(struct scsi_disk *sdkp) {}
-static inline int sd_zbc_setup_report_cmnd(struct scsi_cmnd *cmd)
-{
- return BLKPREP_INVALID;
-}
-
static inline int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd)
{
return BLKPREP_INVALID;
unsigned int good_bytes,
struct scsi_sense_hdr *sshdr) {}
+#define sd_zbc_report_zones NULL
+
#endif /* CONFIG_BLK_DEV_ZONED */
#endif /* _SCSI_DISK_H */
}
/**
- * sd_zbc_report_zones - Issue a REPORT ZONES scsi command.
+ * sd_zbc_do_report_zones - Issue a REPORT ZONES scsi command.
* @sdkp: The target disk
* @buf: Buffer to use for the reply
* @buflen: the buffer size
* @lba: Start LBA of the report
+ * @partial: Do partial report
*
* For internal use during device validation.
+ * Using partial=true can significantly speed up execution of a report zones
+ * command because the disk does not have to count all possible report matching
+ * zones and will only report the count of zones fitting in the command reply
+ * buffer.
*/
-static int sd_zbc_report_zones(struct scsi_disk *sdkp, unsigned char *buf,
- unsigned int buflen, sector_t lba)
+static int sd_zbc_do_report_zones(struct scsi_disk *sdkp, unsigned char *buf,
+ unsigned int buflen, sector_t lba,
+ bool partial)
{
struct scsi_device *sdp = sdkp->device;
const int timeout = sdp->request_queue->rq_timeout;
cmd[1] = ZI_REPORT_ZONES;
put_unaligned_be64(lba, &cmd[2]);
put_unaligned_be32(buflen, &cmd[10]);
+ if (partial)
+ cmd[14] = ZBC_REPORT_ZONE_PARTIAL;
memset(buf, 0, buflen);
result = scsi_execute_req(sdp, cmd, DMA_FROM_DEVICE,
}
/**
- * sd_zbc_setup_report_cmnd - Prepare a REPORT ZONES scsi command
- * @cmd: The command to setup
+ * sd_zbc_report_zones - Disk report zones operation.
+ * @disk: The target disk
+ * @sector: Start 512B sector of the report
+ * @zones: Array of zone descriptors
+ * @nr_zones: Number of descriptors in the array
+ * @gfp_mask: Memory allocation mask
*
- * Call in sd_init_command() for a REQ_OP_ZONE_REPORT request.
+ * Execute a report zones command on the target disk.
*/
-int sd_zbc_setup_report_cmnd(struct scsi_cmnd *cmd)
+int sd_zbc_report_zones(struct gendisk *disk, sector_t sector,
+ struct blk_zone *zones, unsigned int *nr_zones,
+ gfp_t gfp_mask)
{
- struct request *rq = cmd->request;
- struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
- sector_t lba, sector = blk_rq_pos(rq);
- unsigned int nr_bytes = blk_rq_bytes(rq);
- int ret;
-
- WARN_ON(nr_bytes == 0);
+ struct scsi_disk *sdkp = scsi_disk(disk);
+ unsigned int i, buflen, nrz = *nr_zones;
+ unsigned char *buf;
+ size_t offset = 0;
+ int ret = 0;
if (!sd_is_zoned(sdkp))
/* Not a zoned device */
- return BLKPREP_KILL;
-
- ret = scsi_init_io(cmd);
- if (ret != BLKPREP_OK)
- return ret;
-
- cmd->cmd_len = 16;
- memset(cmd->cmnd, 0, cmd->cmd_len);
- cmd->cmnd[0] = ZBC_IN;
- cmd->cmnd[1] = ZI_REPORT_ZONES;
- lba = sectors_to_logical(sdkp->device, sector);
- put_unaligned_be64(lba, &cmd->cmnd[2]);
- put_unaligned_be32(nr_bytes, &cmd->cmnd[10]);
- /* Do partial report for speeding things up */
- cmd->cmnd[14] = ZBC_REPORT_ZONE_PARTIAL;
-
- cmd->sc_data_direction = DMA_FROM_DEVICE;
- cmd->sdb.length = nr_bytes;
- cmd->transfersize = sdkp->device->sector_size;
- cmd->allowed = 0;
-
- return BLKPREP_OK;
-}
-
-/**
- * sd_zbc_report_zones_complete - Process a REPORT ZONES scsi command reply.
- * @scmd: The completed report zones command
- * @good_bytes: reply size in bytes
- *
- * Convert all reported zone descriptors to struct blk_zone. The conversion
- * is done in-place, directly in the request specified sg buffer.
- */
-static void sd_zbc_report_zones_complete(struct scsi_cmnd *scmd,
- unsigned int good_bytes)
-{
- struct request *rq = scmd->request;
- struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
- struct sg_mapping_iter miter;
- struct blk_zone_report_hdr hdr;
- struct blk_zone zone;
- unsigned int offset, bytes = 0;
- unsigned long flags;
- u8 *buf;
-
- if (good_bytes < 64)
- return;
+ return -EOPNOTSUPP;
- memset(&hdr, 0, sizeof(struct blk_zone_report_hdr));
-
- sg_miter_start(&miter, scsi_sglist(scmd), scsi_sg_count(scmd),
- SG_MITER_TO_SG | SG_MITER_ATOMIC);
-
- local_irq_save(flags);
- while (sg_miter_next(&miter) && bytes < good_bytes) {
+ /*
+ * Get a reply buffer for the number of requested zones plus a header.
+ * For ATA, buffers must be aligned to 512B.
+ */
+ buflen = roundup((nrz + 1) * 64, 512);
+ buf = kmalloc(buflen, gfp_mask);
+ if (!buf)
+ return -ENOMEM;
- buf = miter.addr;
- offset = 0;
+ ret = sd_zbc_do_report_zones(sdkp, buf, buflen,
+ sectors_to_logical(sdkp->device, sector), true);
+ if (ret)
+ goto out_free_buf;
- if (bytes == 0) {
- /* Set the report header */
- hdr.nr_zones = min_t(unsigned int,
- (good_bytes - 64) / 64,
- get_unaligned_be32(&buf[0]) / 64);
- memcpy(buf, &hdr, sizeof(struct blk_zone_report_hdr));
- offset += 64;
- bytes += 64;
- }
+ nrz = min(nrz, get_unaligned_be32(&buf[0]) / 64);
+ for (i = 0; i < nrz; i++) {
+ offset += 64;
+ sd_zbc_parse_report(sdkp, buf + offset, zones);
+ zones++;
+ }
- /* Parse zone descriptors */
- while (offset < miter.length && hdr.nr_zones) {
- WARN_ON(offset > miter.length);
- buf = miter.addr + offset;
- sd_zbc_parse_report(sdkp, buf, &zone);
- memcpy(buf, &zone, sizeof(struct blk_zone));
- offset += 64;
- bytes += 64;
- hdr.nr_zones--;
- }
+ *nr_zones = nrz;
- if (!hdr.nr_zones)
- break;
+out_free_buf:
+ kfree(buf);
- }
- sg_miter_stop(&miter);
- local_irq_restore(flags);
+ return ret;
}
/**
case REQ_OP_WRITE_ZEROES:
case REQ_OP_WRITE_SAME:
break;
-
- case REQ_OP_ZONE_REPORT:
-
- if (!result)
- sd_zbc_report_zones_complete(cmd, good_bytes);
- break;
-
}
}
/**
- * sd_zbc_read_zoned_characteristics - Read zoned block device characteristics
+ * sd_zbc_check_zoned_characteristics - Check zoned block device characteristics
* @sdkp: Target disk
* @buf: Buffer where to store the VPD page data
*
- * Read VPD page B6.
+ * Read VPD page B6, get information and check that reads are unconstrained.
*/
-static int sd_zbc_read_zoned_characteristics(struct scsi_disk *sdkp,
- unsigned char *buf)
+static int sd_zbc_check_zoned_characteristics(struct scsi_disk *sdkp,
+ unsigned char *buf)
{
if (scsi_get_vpd_page(sdkp->device, 0xb6, buf, 64)) {
sd_printk(KERN_NOTICE, sdkp,
- "Unconstrained-read check failed\n");
+ "Read zoned characteristics VPD page failed\n");
return -ENODEV;
}
sdkp->zones_max_open = get_unaligned_be32(&buf[16]);
}
- return 0;
-}
-
-/**
- * sd_zbc_check_capacity - Check reported capacity.
- * @sdkp: Target disk
- * @buf: Buffer to use for commands
- *
- * ZBC drive may report only the capacity of the first conventional zones at
- * LBA 0. This is indicated by the RC_BASIS field of the read capacity reply.
- * Check this here. If the disk reported only its conventional zones capacity,
- * get the total capacity by doing a report zones.
- */
-static int sd_zbc_check_capacity(struct scsi_disk *sdkp, unsigned char *buf)
-{
- sector_t lba;
- int ret;
-
- if (sdkp->rc_basis != 0)
- return 0;
-
- /* Do a report zone to get the maximum LBA to check capacity */
- ret = sd_zbc_report_zones(sdkp, buf, SD_BUF_SIZE, 0);
- if (ret)
- return ret;
-
- /* The max_lba field is the capacity of this device */
- lba = get_unaligned_be64(&buf[8]);
- if (lba + 1 == sdkp->capacity)
- return 0;
-
- if (sdkp->first_scan)
- sd_printk(KERN_WARNING, sdkp,
- "Changing capacity from %llu to max LBA+1 %llu\n",
- (unsigned long long)sdkp->capacity,
- (unsigned long long)lba + 1);
- sdkp->capacity = lba + 1;
+ /*
+ * Check for unconstrained reads: host-managed devices with
+ * constrained reads (drives failing read after write pointer)
+ * are not supported.
+ */
+ if (!sdkp->urswrz) {
+ if (sdkp->first_scan)
+ sd_printk(KERN_NOTICE, sdkp,
+ "constrained reads devices are not supported\n");
+ return -ENODEV;
+ }
return 0;
}
#define SD_ZBC_BUF_SIZE 131072U
/**
- * sd_zbc_check_zone_size - Check the device zone sizes
+ * sd_zbc_check_zones - Check the device capacity and zone sizes
* @sdkp: Target disk
*
- * Check that all zones of the device are equal. The last zone can however
- * be smaller. The zone size must also be a power of two number of LBAs.
+ * Check that the device capacity as reported by READ CAPACITY matches the
+ * max_lba value (plus one)of the report zones command reply. Also check that
+ * all zones of the device have an equal size, only allowing the last zone of
+ * the disk to have a smaller size (runt zone). The zone size must also be a
+ * power of two.
*
* Returns the zone size in number of blocks upon success or an error code
* upon failure.
*/
-static s64 sd_zbc_check_zone_size(struct scsi_disk *sdkp)
+static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks)
{
u64 zone_blocks = 0;
- sector_t block = 0;
+ sector_t max_lba, block = 0;
unsigned char *buf;
unsigned char *rec;
unsigned int buf_len;
unsigned int list_length;
- s64 ret;
+ int ret;
u8 same;
/* Get a buffer */
if (!buf)
return -ENOMEM;
- /* Do a report zone to get the same field */
- ret = sd_zbc_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, 0);
+ /* Do a report zone to get max_lba and the same field */
+ ret = sd_zbc_do_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, 0, false);
if (ret)
goto out_free;
+ if (sdkp->rc_basis == 0) {
+ /* The max_lba field is the capacity of this device */
+ max_lba = get_unaligned_be64(&buf[8]);
+ if (sdkp->capacity != max_lba + 1) {
+ if (sdkp->first_scan)
+ sd_printk(KERN_WARNING, sdkp,
+ "Changing capacity from %llu to max LBA+1 %llu\n",
+ (unsigned long long)sdkp->capacity,
+ (unsigned long long)max_lba + 1);
+ sdkp->capacity = max_lba + 1;
+ }
+ }
+
+ /*
+ * Check same field: for any value other than 0, we know that all zones
+ * have the same size.
+ */
same = buf[4] & 0x0f;
if (same > 0) {
rec = &buf[64];
}
if (block < sdkp->capacity) {
- ret = sd_zbc_report_zones(sdkp, buf,
- SD_ZBC_BUF_SIZE, block);
+ ret = sd_zbc_do_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE,
+ block, true);
if (ret)
goto out_free;
}
if (sdkp->first_scan)
sd_printk(KERN_NOTICE, sdkp,
"Zone size too large\n");
- ret = -ENODEV;
+ ret = -EFBIG;
} else {
- ret = zone_blocks;
+ *zblocks = zone_blocks;
+ ret = 0;
}
out_free:
return ret;
}
-/**
- * sd_zbc_alloc_zone_bitmap - Allocate a zone bitmap (one bit per zone).
- * @nr_zones: Number of zones to allocate space for.
- * @numa_node: NUMA node to allocate the memory from.
- */
-static inline unsigned long *
-sd_zbc_alloc_zone_bitmap(u32 nr_zones, int numa_node)
-{
- return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
- GFP_KERNEL, numa_node);
-}
-
-/**
- * sd_zbc_get_seq_zones - Parse report zones reply to identify sequential zones
- * @sdkp: disk used
- * @buf: report reply buffer
- * @buflen: length of @buf
- * @zone_shift: logarithm base 2 of the number of blocks in a zone
- * @seq_zones_bitmap: bitmap of sequential zones to set
- *
- * Parse reported zone descriptors in @buf to identify sequential zones and
- * set the reported zone bit in @seq_zones_bitmap accordingly.
- * Since read-only and offline zones cannot be written, do not
- * mark them as sequential in the bitmap.
- * Return the LBA after the last zone reported.
- */
-static sector_t sd_zbc_get_seq_zones(struct scsi_disk *sdkp, unsigned char *buf,
- unsigned int buflen, u32 zone_shift,
- unsigned long *seq_zones_bitmap)
-{
- sector_t lba, next_lba = sdkp->capacity;
- unsigned int buf_len, list_length;
- unsigned char *rec;
- u8 type, cond;
-
- list_length = get_unaligned_be32(&buf[0]) + 64;
- buf_len = min(list_length, buflen);
- rec = buf + 64;
-
- while (rec < buf + buf_len) {
- type = rec[0] & 0x0f;
- cond = (rec[1] >> 4) & 0xf;
- lba = get_unaligned_be64(&rec[16]);
- if (type != ZBC_ZONE_TYPE_CONV &&
- cond != ZBC_ZONE_COND_READONLY &&
- cond != ZBC_ZONE_COND_OFFLINE)
- set_bit(lba >> zone_shift, seq_zones_bitmap);
- next_lba = lba + get_unaligned_be64(&rec[8]);
- rec += 64;
- }
-
- return next_lba;
-}
-
-/**
- * sd_zbc_setup_seq_zones_bitmap - Initialize a seq zone bitmap.
- * @sdkp: target disk
- * @zone_shift: logarithm base 2 of the number of blocks in a zone
- * @nr_zones: number of zones to set up a seq zone bitmap for
- *
- * Allocate a zone bitmap and initialize it by identifying sequential zones.
- */
-static unsigned long *
-sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp, u32 zone_shift,
- u32 nr_zones)
-{
- struct request_queue *q = sdkp->disk->queue;
- unsigned long *seq_zones_bitmap;
- sector_t lba = 0;
- unsigned char *buf;
- int ret = -ENOMEM;
-
- seq_zones_bitmap = sd_zbc_alloc_zone_bitmap(nr_zones, q->node);
- if (!seq_zones_bitmap)
- return ERR_PTR(-ENOMEM);
-
- buf = kmalloc(SD_ZBC_BUF_SIZE, GFP_KERNEL);
- if (!buf)
- goto out;
-
- while (lba < sdkp->capacity) {
- ret = sd_zbc_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, lba);
- if (ret)
- goto out;
- lba = sd_zbc_get_seq_zones(sdkp, buf, SD_ZBC_BUF_SIZE,
- zone_shift, seq_zones_bitmap);
- }
-
- if (lba != sdkp->capacity) {
- /* Something went wrong */
- ret = -EIO;
- }
-
-out:
- kfree(buf);
- if (ret) {
- kfree(seq_zones_bitmap);
- return ERR_PTR(ret);
- }
- return seq_zones_bitmap;
-}
-
-static void sd_zbc_cleanup(struct scsi_disk *sdkp)
-{
- struct request_queue *q = sdkp->disk->queue;
-
- kfree(q->seq_zones_bitmap);
- q->seq_zones_bitmap = NULL;
-
- kfree(q->seq_zones_wlock);
- q->seq_zones_wlock = NULL;
-
- q->nr_zones = 0;
-}
-
-static int sd_zbc_setup(struct scsi_disk *sdkp, u32 zone_blocks)
-{
- struct request_queue *q = sdkp->disk->queue;
- u32 zone_shift = ilog2(zone_blocks);
- u32 nr_zones;
- int ret;
-
- /* chunk_sectors indicates the zone size */
- blk_queue_chunk_sectors(q,
- logical_to_sectors(sdkp->device, zone_blocks));
- nr_zones = round_up(sdkp->capacity, zone_blocks) >> zone_shift;
-
- /*
- * Initialize the device request queue information if the number
- * of zones changed.
- */
- if (nr_zones != sdkp->nr_zones || nr_zones != q->nr_zones) {
- unsigned long *seq_zones_wlock = NULL, *seq_zones_bitmap = NULL;
- size_t zone_bitmap_size;
-
- if (nr_zones) {
- seq_zones_wlock = sd_zbc_alloc_zone_bitmap(nr_zones,
- q->node);
- if (!seq_zones_wlock) {
- ret = -ENOMEM;
- goto err;
- }
-
- seq_zones_bitmap = sd_zbc_setup_seq_zones_bitmap(sdkp,
- zone_shift, nr_zones);
- if (IS_ERR(seq_zones_bitmap)) {
- ret = PTR_ERR(seq_zones_bitmap);
- kfree(seq_zones_wlock);
- goto err;
- }
- }
- zone_bitmap_size = BITS_TO_LONGS(nr_zones) *
- sizeof(unsigned long);
- blk_mq_freeze_queue(q);
- if (q->nr_zones != nr_zones) {
- /* READ16/WRITE16 is mandatory for ZBC disks */
- sdkp->device->use_16_for_rw = 1;
- sdkp->device->use_10_for_rw = 0;
-
- sdkp->zone_blocks = zone_blocks;
- sdkp->zone_shift = zone_shift;
- sdkp->nr_zones = nr_zones;
- q->nr_zones = nr_zones;
- swap(q->seq_zones_wlock, seq_zones_wlock);
- swap(q->seq_zones_bitmap, seq_zones_bitmap);
- } else if (memcmp(q->seq_zones_bitmap, seq_zones_bitmap,
- zone_bitmap_size) != 0) {
- memcpy(q->seq_zones_bitmap, seq_zones_bitmap,
- zone_bitmap_size);
- }
- blk_mq_unfreeze_queue(q);
- kfree(seq_zones_wlock);
- kfree(seq_zones_bitmap);
- }
-
- return 0;
-
-err:
- sd_zbc_cleanup(sdkp);
- return ret;
-}
-
int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buf)
{
- int64_t zone_blocks;
+ struct gendisk *disk = sdkp->disk;
+ unsigned int nr_zones;
+ u32 zone_blocks;
int ret;
if (!sd_is_zoned(sdkp))
*/
return 0;
- /* Get zoned block device characteristics */
- ret = sd_zbc_read_zoned_characteristics(sdkp, buf);
- if (ret)
- goto err;
-
- /*
- * Check for unconstrained reads: host-managed devices with
- * constrained reads (drives failing read after write pointer)
- * are not supported.
- */
- if (!sdkp->urswrz) {
- if (sdkp->first_scan)
- sd_printk(KERN_NOTICE, sdkp,
- "constrained reads devices are not supported\n");
- ret = -ENODEV;
- goto err;
- }
-
- /* Check capacity */
- ret = sd_zbc_check_capacity(sdkp, buf);
+ /* Check zoned block device characteristics (unconstrained reads) */
+ ret = sd_zbc_check_zoned_characteristics(sdkp, buf);
if (ret)
goto err;
* Check zone size: only devices with a constant zone size (except
* an eventual last runt zone) that is a power of 2 are supported.
*/
- zone_blocks = sd_zbc_check_zone_size(sdkp);
- ret = -EFBIG;
- if (zone_blocks != (u32)zone_blocks)
- goto err;
- ret = zone_blocks;
- if (ret < 0)
+ ret = sd_zbc_check_zones(sdkp, &zone_blocks);
+ if (ret != 0)
goto err;
/* The drive satisfies the kernel restrictions: set it up */
- ret = sd_zbc_setup(sdkp, zone_blocks);
- if (ret)
- goto err;
+ blk_queue_chunk_sectors(sdkp->disk->queue,
+ logical_to_sectors(sdkp->device, zone_blocks));
+ nr_zones = round_up(sdkp->capacity, zone_blocks) >> ilog2(zone_blocks);
+
+ /* READ16/WRITE16 is mandatory for ZBC disks */
+ sdkp->device->use_16_for_rw = 1;
+ sdkp->device->use_10_for_rw = 0;
+
+ /*
+ * If something changed, revalidate the disk zone bitmaps once we have
+ * the capacity, that is on the second revalidate execution during disk
+ * scan and always during normal revalidate.
+ */
+ if (sdkp->first_scan)
+ return 0;
+ if (sdkp->zone_blocks != zone_blocks ||
+ sdkp->nr_zones != nr_zones ||
+ disk->queue->nr_zones != nr_zones) {
+ ret = blk_revalidate_disk_zones(disk);
+ if (ret != 0)
+ goto err;
+ sdkp->zone_blocks = zone_blocks;
+ sdkp->nr_zones = nr_zones;
+ }
return 0;
err:
sdkp->capacity = 0;
- sd_zbc_cleanup(sdkp);
return ret;
}
-void sd_zbc_remove(struct scsi_disk *sdkp)
-{
- sd_zbc_cleanup(sdkp);
-}
-
void sd_zbc_print_zones(struct scsi_disk *sdkp)
{
if (!sd_is_zoned(sdkp) || !sdkp->capacity)
domains_node = of_get_child_by_name(np_pmu, "domains");
if (!domains_node) {
- pr_err("%s: failed to find domains sub-node\n", np_pmu->name);
+ pr_err("%pOFn: failed to find domains sub-node\n", np_pmu);
return 0;
}
pmu->pmc_base = of_iomap(pmu->of_node, 0);
pmu->pmu_base = of_iomap(pmu->of_node, 1);
if (!pmu->pmc_base || !pmu->pmu_base) {
- pr_err("%s: failed to map PMU\n", np_pmu->name);
+ pr_err("%pOFn: failed to map PMU\n", np_pmu);
iounmap(pmu->pmu_base);
iounmap(pmu->pmc_base);
kfree(pmu);
break;
domain->pmu = pmu;
- domain->base.name = kstrdup(np->name, GFP_KERNEL);
+ domain->base.name = kasprintf(GFP_KERNEL, "%pOFn", np);
if (!domain->base.name) {
kfree(domain);
break;
/* Loss of the interrupt controller is not a fatal error. */
parent_irq = irq_of_parse_and_map(pmu->of_node, 0);
if (!parent_irq) {
- pr_err("%s: no interrupt specified\n", np_pmu->name);
+ pr_err("%pOFn: no interrupt specified\n", np_pmu);
} else {
ret = dove_init_pmu_irq(pmu, parent_irq);
if (ret)
static int zero_priv_mem(phys_addr_t addr, size_t sz)
{
/* map as cacheable, non-guarded */
- void __iomem *tmpp = ioremap_prot(addr, sz, 0);
+ void __iomem *tmpp = ioremap_cache(addr, sz);
if (!tmpp)
return -ENOMEM;
pdev = of_find_device_by_node(np2);
if (!pdev) {
- pr_err("%s: failed to lookup pdev\n", np2->name);
+ pr_err("%pOFn: failed to lookup pdev\n", np2);
of_node_put(np2);
return -EINVAL;
}
pdev = of_find_device_by_node(np2);
if (!pdev) {
ret = -EINVAL;
- pr_err("%s: failed to lookup pdev\n", np2->name);
+ pr_err("%pOFn: failed to lookup pdev\n", np2);
of_node_put(np2);
goto err_miss_siram_property;
}
adev->domain_id = id->domain_id;
adev->version = id->svc_version;
if (np)
- strncpy(adev->name, np->name, APR_NAME_SIZE);
+ snprintf(adev->name, APR_NAME_SIZE, "%pOFn", np);
else
strncpy(adev->name, id->name, APR_NAME_SIZE);
error = of_property_read_u32(node, "reg", &id);
if (error) {
dev_err(pmu->dev,
- "%s: failed to retrieve domain id (reg): %d\n",
- node->name, error);
+ "%pOFn: failed to retrieve domain id (reg): %d\n",
+ node, error);
return -EINVAL;
}
if (id >= pmu->info->num_domains) {
- dev_err(pmu->dev, "%s: invalid domain id %d\n",
- node->name, id);
+ dev_err(pmu->dev, "%pOFn: invalid domain id %d\n",
+ node, id);
return -EINVAL;
}
pd_info = &pmu->info->domain_info[id];
if (!pd_info) {
- dev_err(pmu->dev, "%s: undefined domain id %d\n",
- node->name, id);
+ dev_err(pmu->dev, "%pOFn: undefined domain id %d\n",
+ node, id);
return -EINVAL;
}
if (!pd->clks)
return -ENOMEM;
} else {
- dev_dbg(pmu->dev, "%s: doesn't have clocks: %d\n",
- node->name, pd->num_clks);
+ dev_dbg(pmu->dev, "%pOFn: doesn't have clocks: %d\n",
+ node, pd->num_clks);
pd->num_clks = 0;
}
if (IS_ERR(pd->clks[i].clk)) {
error = PTR_ERR(pd->clks[i].clk);
dev_err(pmu->dev,
- "%s: failed to get clk at index %d: %d\n",
- node->name, i, error);
+ "%pOFn: failed to get clk at index %d: %d\n",
+ node, i, error);
return error;
}
}
error = rockchip_pd_power(pd, true);
if (error) {
dev_err(pmu->dev,
- "failed to power on domain '%s': %d\n",
- node->name, error);
+ "failed to power on domain '%pOFn': %d\n",
+ node, error);
goto err_unprepare_clocks;
}
error = of_property_read_u32(parent, "reg", &idx);
if (error) {
dev_err(pmu->dev,
- "%s: failed to retrieve domain id (reg): %d\n",
- parent->name, error);
+ "%pOFn: failed to retrieve domain id (reg): %d\n",
+ parent, error);
goto err_out;
}
parent_domain = pmu->genpd_data.domains[idx];
error = rockchip_pm_add_one_domain(pmu, np);
if (error) {
- dev_err(pmu->dev, "failed to handle node %s: %d\n",
- np->name, error);
+ dev_err(pmu->dev, "failed to handle node %pOFn: %d\n",
+ np, error);
goto err_out;
}
error = of_property_read_u32(np, "reg", &idx);
if (error) {
dev_err(pmu->dev,
- "%s: failed to retrieve domain id (reg): %d\n",
- np->name, error);
+ "%pOFn: failed to retrieve domain id (reg): %d\n",
+ np, error);
goto err_out;
}
child_domain = pmu->genpd_data.domains[idx];
for_each_available_child_of_node(np, node) {
error = rockchip_pm_add_one_domain(pmu, node);
if (error) {
- dev_err(dev, "failed to handle node %s: %d\n",
- node->name, error);
+ dev_err(dev, "failed to handle node %pOFn: %d\n",
+ node, error);
of_node_put(node);
goto err_out;
}
error = rockchip_pm_add_subdomain(pmu, node);
if (error < 0) {
- dev_err(dev, "failed to handle subdomain node %s: %d\n",
- node->name, error);
+ dev_err(dev, "failed to handle subdomain node %pOFn: %d\n",
+ node, error);
of_node_put(node);
goto err_out;
}
id = tegra_powergate_lookup(pmc, np->name);
if (id < 0) {
- pr_err("powergate lookup failed for %s: %d\n", np->name, id);
+ pr_err("powergate lookup failed for %pOFn: %d\n", np, id);
goto free_mem;
}
err = tegra_powergate_of_get_clks(pg, np);
if (err < 0) {
- pr_err("failed to get clocks for %s: %d\n", np->name, err);
+ pr_err("failed to get clocks for %pOFn: %d\n", np, err);
goto set_available;
}
err = tegra_powergate_of_get_resets(pg, np, off);
if (err < 0) {
- pr_err("failed to get resets for %s: %d\n", np->name, err);
+ pr_err("failed to get resets for %pOFn: %d\n", np, err);
goto remove_clks;
}
err = pm_genpd_init(&pg->genpd, NULL, off);
if (err < 0) {
- pr_err("failed to initialise PM domain %s: %d\n", np->name,
+ pr_err("failed to initialise PM domain %pOFn: %d\n", np,
err);
goto remove_resets;
}
err = of_genpd_add_provider_simple(np, &pg->genpd);
if (err < 0) {
- pr_err("failed to add PM domain provider for %s: %d\n",
- np->name, err);
+ pr_err("failed to add PM domain provider for %pOFn: %d\n",
+ np, err);
goto remove_genpd;
}
ret = of_address_to_resource(node, index, &res);
if (ret) {
- dev_err(dev, "Can't translate of node(%s) address for index(%d)\n",
- node->name, index);
+ dev_err(dev, "Can't translate of node(%pOFn) address for index(%d)\n",
+ node, index);
return ERR_PTR(ret);
}
regs = devm_ioremap_resource(kdev->dev, &res);
if (IS_ERR(regs))
- dev_err(dev, "Failed to map register base for index(%d) node(%s)\n",
- index, node->name);
+ dev_err(dev, "Failed to map register base for index(%d) node(%pOFn)\n",
+ index, node);
if (_size)
*_size = resource_size(&res);
ret = of_address_to_resource(node, index, &res);
if (ret) {
- dev_err(kdev->dev, "Can't translate of node(%s) address for index(%d)\n",
- node->name, index);
+ dev_err(kdev->dev, "Can't translate of node(%pOFn) address for index(%d)\n",
+ node, index);
return ERR_PTR(ret);
}
regs = devm_ioremap_resource(kdev->dev, &res);
if (IS_ERR(regs))
- dev_err(kdev->dev, "Failed to map register base for index(%d) node(%s)\n",
- index, node->name);
+ dev_err(kdev->dev, "Failed to map register base for index(%d) node(%pOFn)\n",
+ index, node);
return regs;
}
* TURBOchannel bus services.
*
* Copyright (c) Harald Koerfgen, 1998
- * Copyright (c) 2001, 2003, 2005, 2006 Maciej W. Rozycki
+ * Copyright (c) 2001, 2003, 2005, 2006, 2018 Maciej W. Rozycki
* Copyright (c) 2005 James Simmons
*
* This file is subject to the terms and conditions of the GNU
* directory of this archive for more details.
*/
#include <linux/compiler.h>
+#include <linux/dma-mapping.h>
#include <linux/errno.h>
#include <linux/init.h>
#include <linux/ioport.h>
tdev->dev.bus = &tc_bus_type;
tdev->slot = slot;
+ /* TURBOchannel has 34-bit DMA addressing (16GiB space). */
+ tdev->dma_mask = DMA_BIT_MASK(34);
+ tdev->dev.dma_mask = &tdev->dma_mask;
+ tdev->dev.coherent_dma_mask = DMA_BIT_MASK(34);
+
for (i = 0; i < 8; i++) {
tdev->firmware[i] =
readb(module + offset + TC_FIRM_VER + 4 * i);
endmenu
menu "STMicroelectronics thermal drivers"
-depends on ARCH_STI && OF
+depends on (ARCH_STI || ARCH_STM32) && OF
source "drivers/thermal/st/Kconfig"
endmenu
obj-$(CONFIG_INT340X_THERMAL) += int340x_thermal/
obj-$(CONFIG_INTEL_BXT_PMIC_THERMAL) += intel_bxt_pmic_thermal.o
obj-$(CONFIG_INTEL_PCH_THERMAL) += intel_pch_thermal.o
-obj-$(CONFIG_ST_THERMAL) += st/
+obj-y += st/
obj-$(CONFIG_QCOM_TSENS) += qcom/
obj-y += tegra/
obj-$(CONFIG_HISI_THERMAL) += hisi_thermal.o
/* First memory region points towards the status register */
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (IS_ERR(res))
- return PTR_ERR(res);
+ if (!res)
+ return -EIO;
/*
* Edit the resource start address and length to map over all the
THERMAL_EVENT_UNSPECIFIED);
delay = msecs_to_jiffies(thermal->zone->passive_delay);
- schedule_delayed_work(&thermal->work, delay);
+ queue_delayed_work(system_freezable_wq, &thermal->work, delay);
return;
}
struct da9062_thermal *thermal = data;
disable_irq_nosync(thermal->irq);
- schedule_delayed_work(&thermal->work, 0);
+ queue_delayed_work(system_freezable_wq, &thermal->work, 0);
return IRQ_HANDLED;
}
#define HI3660_TEMP_STEP (205)
#define HI3660_TEMP_LAG (4000)
-#define HI6220_DEFAULT_SENSOR 2
-#define HI3660_DEFAULT_SENSOR 1
+#define HI6220_CLUSTER0_SENSOR 2
+#define HI6220_CLUSTER1_SENSOR 1
+
+#define HI3660_LITTLE_SENSOR 0
+#define HI3660_BIG_SENSOR 1
+#define HI3660_G3D_SENSOR 2
+#define HI3660_MODEM_SENSOR 3
+
+struct hisi_thermal_data;
struct hisi_thermal_sensor {
+ struct hisi_thermal_data *data;
struct thermal_zone_device *tzd;
+ const char *irq_name;
uint32_t id;
uint32_t thres_temp;
};
+struct hisi_thermal_ops {
+ int (*get_temp)(struct hisi_thermal_sensor *sensor);
+ int (*enable_sensor)(struct hisi_thermal_sensor *sensor);
+ int (*disable_sensor)(struct hisi_thermal_sensor *sensor);
+ int (*irq_handler)(struct hisi_thermal_sensor *sensor);
+ int (*probe)(struct hisi_thermal_data *data);
+};
+
struct hisi_thermal_data {
- int (*get_temp)(struct hisi_thermal_data *data);
- int (*enable_sensor)(struct hisi_thermal_data *data);
- int (*disable_sensor)(struct hisi_thermal_data *data);
- int (*irq_handler)(struct hisi_thermal_data *data);
+ const struct hisi_thermal_ops *ops;
+ struct hisi_thermal_sensor *sensor;
struct platform_device *pdev;
struct clk *clk;
- struct hisi_thermal_sensor sensor;
void __iomem *regs;
- int irq;
+ int nr_sensors;
};
/*
(value << 4), addr + HI6220_TEMP0_CFG);
}
-static int hi6220_thermal_irq_handler(struct hisi_thermal_data *data)
+static int hi6220_thermal_irq_handler(struct hisi_thermal_sensor *sensor)
{
+ struct hisi_thermal_data *data = sensor->data;
+
hi6220_thermal_alarm_clear(data->regs, 1);
return 0;
}
-static int hi3660_thermal_irq_handler(struct hisi_thermal_data *data)
+static int hi3660_thermal_irq_handler(struct hisi_thermal_sensor *sensor)
{
- hi3660_thermal_alarm_clear(data->regs, data->sensor.id, 1);
+ struct hisi_thermal_data *data = sensor->data;
+
+ hi3660_thermal_alarm_clear(data->regs, sensor->id, 1);
return 0;
}
-static int hi6220_thermal_get_temp(struct hisi_thermal_data *data)
+static int hi6220_thermal_get_temp(struct hisi_thermal_sensor *sensor)
{
+ struct hisi_thermal_data *data = sensor->data;
+
return hi6220_thermal_get_temperature(data->regs);
}
-static int hi3660_thermal_get_temp(struct hisi_thermal_data *data)
+static int hi3660_thermal_get_temp(struct hisi_thermal_sensor *sensor)
{
- return hi3660_thermal_get_temperature(data->regs, data->sensor.id);
+ struct hisi_thermal_data *data = sensor->data;
+
+ return hi3660_thermal_get_temperature(data->regs, sensor->id);
}
-static int hi6220_thermal_disable_sensor(struct hisi_thermal_data *data)
+static int hi6220_thermal_disable_sensor(struct hisi_thermal_sensor *sensor)
{
+ struct hisi_thermal_data *data = sensor->data;
+
/* disable sensor module */
hi6220_thermal_enable(data->regs, 0);
hi6220_thermal_alarm_enable(data->regs, 0);
return 0;
}
-static int hi3660_thermal_disable_sensor(struct hisi_thermal_data *data)
+static int hi3660_thermal_disable_sensor(struct hisi_thermal_sensor *sensor)
{
+ struct hisi_thermal_data *data = sensor->data;
+
/* disable sensor module */
- hi3660_thermal_alarm_enable(data->regs, data->sensor.id, 0);
+ hi3660_thermal_alarm_enable(data->regs, sensor->id, 0);
return 0;
}
-static int hi6220_thermal_enable_sensor(struct hisi_thermal_data *data)
+static int hi6220_thermal_enable_sensor(struct hisi_thermal_sensor *sensor)
{
- struct hisi_thermal_sensor *sensor = &data->sensor;
+ struct hisi_thermal_data *data = sensor->data;
int ret;
/* enable clock for tsensor */
return 0;
}
-static int hi3660_thermal_enable_sensor(struct hisi_thermal_data *data)
+static int hi3660_thermal_enable_sensor(struct hisi_thermal_sensor *sensor)
{
unsigned int value;
- struct hisi_thermal_sensor *sensor = &data->sensor;
+ struct hisi_thermal_data *data = sensor->data;
/* disable interrupt */
hi3660_thermal_alarm_enable(data->regs, sensor->id, 0);
{
struct platform_device *pdev = data->pdev;
struct device *dev = &pdev->dev;
- struct resource *res;
int ret;
- data->get_temp = hi6220_thermal_get_temp;
- data->enable_sensor = hi6220_thermal_enable_sensor;
- data->disable_sensor = hi6220_thermal_disable_sensor;
- data->irq_handler = hi6220_thermal_irq_handler;
-
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- data->regs = devm_ioremap_resource(dev, res);
- if (IS_ERR(data->regs)) {
- dev_err(dev, "failed to get io address\n");
- return PTR_ERR(data->regs);
- }
-
data->clk = devm_clk_get(dev, "thermal_clk");
if (IS_ERR(data->clk)) {
ret = PTR_ERR(data->clk);
return ret;
}
- data->irq = platform_get_irq(pdev, 0);
- if (data->irq < 0)
- return data->irq;
+ data->sensor = devm_kzalloc(dev, sizeof(*data->sensor), GFP_KERNEL);
+ if (!data->sensor)
+ return -ENOMEM;
- data->sensor.id = HI6220_DEFAULT_SENSOR;
+ data->sensor[0].id = HI6220_CLUSTER0_SENSOR;
+ data->sensor[0].irq_name = "tsensor_intr";
+ data->sensor[0].data = data;
+ data->nr_sensors = 1;
return 0;
}
{
struct platform_device *pdev = data->pdev;
struct device *dev = &pdev->dev;
- struct resource *res;
- data->get_temp = hi3660_thermal_get_temp;
- data->enable_sensor = hi3660_thermal_enable_sensor;
- data->disable_sensor = hi3660_thermal_disable_sensor;
- data->irq_handler = hi3660_thermal_irq_handler;
+ data->nr_sensors = 2;
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- data->regs = devm_ioremap_resource(dev, res);
- if (IS_ERR(data->regs)) {
- dev_err(dev, "failed to get io address\n");
- return PTR_ERR(data->regs);
- }
+ data->sensor = devm_kzalloc(dev, sizeof(*data->sensor) *
+ data->nr_sensors, GFP_KERNEL);
+ if (!data->sensor)
+ return -ENOMEM;
- data->irq = platform_get_irq(pdev, 0);
- if (data->irq < 0)
- return data->irq;
+ data->sensor[0].id = HI3660_BIG_SENSOR;
+ data->sensor[0].irq_name = "tsensor_a73";
+ data->sensor[0].data = data;
- data->sensor.id = HI3660_DEFAULT_SENSOR;
+ data->sensor[1].id = HI3660_LITTLE_SENSOR;
+ data->sensor[1].irq_name = "tsensor_a53";
+ data->sensor[1].data = data;
return 0;
}
static int hisi_thermal_get_temp(void *__data, int *temp)
{
- struct hisi_thermal_data *data = __data;
- struct hisi_thermal_sensor *sensor = &data->sensor;
+ struct hisi_thermal_sensor *sensor = __data;
+ struct hisi_thermal_data *data = sensor->data;
- *temp = data->get_temp(data);
+ *temp = data->ops->get_temp(sensor);
- dev_dbg(&data->pdev->dev, "id=%d, temp=%d, thres=%d\n",
- sensor->id, *temp, sensor->thres_temp);
+ dev_dbg(&data->pdev->dev, "tzd=%p, id=%d, temp=%d, thres=%d\n",
+ sensor->tzd, sensor->id, *temp, sensor->thres_temp);
return 0;
}
static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev)
{
- struct hisi_thermal_data *data = dev;
- struct hisi_thermal_sensor *sensor = &data->sensor;
+ struct hisi_thermal_sensor *sensor = dev;
+ struct hisi_thermal_data *data = sensor->data;
int temp = 0;
- data->irq_handler(data);
+ data->ops->irq_handler(sensor);
- hisi_thermal_get_temp(data, &temp);
+ hisi_thermal_get_temp(sensor, &temp);
if (temp >= sensor->thres_temp) {
- dev_crit(&data->pdev->dev, "THERMAL ALARM: %d > %d\n",
- temp, sensor->thres_temp);
+ dev_crit(&data->pdev->dev,
+ "sensor <%d> THERMAL ALARM: %d > %d\n",
+ sensor->id, temp, sensor->thres_temp);
- thermal_zone_device_update(data->sensor.tzd,
+ thermal_zone_device_update(sensor->tzd,
THERMAL_EVENT_UNSPECIFIED);
} else {
- dev_crit(&data->pdev->dev, "THERMAL ALARM stopped: %d < %d\n",
- temp, sensor->thres_temp);
+ dev_crit(&data->pdev->dev,
+ "sensor <%d> THERMAL ALARM stopped: %d < %d\n",
+ sensor->id, temp, sensor->thres_temp);
}
return IRQ_HANDLED;
}
static int hisi_thermal_register_sensor(struct platform_device *pdev,
- struct hisi_thermal_data *data,
struct hisi_thermal_sensor *sensor)
{
int ret, i;
const struct thermal_trip *trip;
sensor->tzd = devm_thermal_zone_of_sensor_register(&pdev->dev,
- sensor->id, data,
+ sensor->id, sensor,
&hisi_of_thermal_ops);
if (IS_ERR(sensor->tzd)) {
ret = PTR_ERR(sensor->tzd);
return 0;
}
+static const struct hisi_thermal_ops hi6220_ops = {
+ .get_temp = hi6220_thermal_get_temp,
+ .enable_sensor = hi6220_thermal_enable_sensor,
+ .disable_sensor = hi6220_thermal_disable_sensor,
+ .irq_handler = hi6220_thermal_irq_handler,
+ .probe = hi6220_thermal_probe,
+};
+
+static const struct hisi_thermal_ops hi3660_ops = {
+ .get_temp = hi3660_thermal_get_temp,
+ .enable_sensor = hi3660_thermal_enable_sensor,
+ .disable_sensor = hi3660_thermal_disable_sensor,
+ .irq_handler = hi3660_thermal_irq_handler,
+ .probe = hi3660_thermal_probe,
+};
+
static const struct of_device_id of_hisi_thermal_match[] = {
{
.compatible = "hisilicon,tsensor",
- .data = hi6220_thermal_probe
+ .data = &hi6220_ops,
},
{
.compatible = "hisilicon,hi3660-tsensor",
- .data = hi3660_thermal_probe
+ .data = &hi3660_ops,
},
{ /* end */ }
};
static int hisi_thermal_probe(struct platform_device *pdev)
{
struct hisi_thermal_data *data;
- int (*platform_probe)(struct hisi_thermal_data *);
struct device *dev = &pdev->dev;
- int ret;
+ struct resource *res;
+ int i, ret;
data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
if (!data)
data->pdev = pdev;
platform_set_drvdata(pdev, data);
+ data->ops = of_device_get_match_data(dev);
- platform_probe = of_device_get_match_data(dev);
- if (!platform_probe) {
- dev_err(dev, "failed to get probe func\n");
- return -EINVAL;
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ data->regs = devm_ioremap_resource(dev, res);
+ if (IS_ERR(data->regs)) {
+ dev_err(dev, "failed to get io address\n");
+ return PTR_ERR(data->regs);
}
- ret = platform_probe(data);
+ ret = data->ops->probe(data);
if (ret)
return ret;
- ret = hisi_thermal_register_sensor(pdev, data,
- &data->sensor);
- if (ret) {
- dev_err(dev, "failed to register thermal sensor: %d\n", ret);
- return ret;
- }
+ for (i = 0; i < data->nr_sensors; i++) {
+ struct hisi_thermal_sensor *sensor = &data->sensor[i];
- ret = data->enable_sensor(data);
- if (ret) {
- dev_err(dev, "Failed to setup the sensor: %d\n", ret);
- return ret;
- }
+ ret = hisi_thermal_register_sensor(pdev, sensor);
+ if (ret) {
+ dev_err(dev, "failed to register thermal sensor: %d\n",
+ ret);
+ return ret;
+ }
+
+ ret = platform_get_irq_byname(pdev, sensor->irq_name);
+ if (ret < 0)
+ return ret;
- if (data->irq) {
- ret = devm_request_threaded_irq(dev, data->irq, NULL,
- hisi_thermal_alarm_irq_thread,
- IRQF_ONESHOT, "hisi_thermal", data);
+ ret = devm_request_threaded_irq(dev, ret, NULL,
+ hisi_thermal_alarm_irq_thread,
+ IRQF_ONESHOT, sensor->irq_name,
+ sensor);
if (ret < 0) {
- dev_err(dev, "failed to request alarm irq: %d\n", ret);
+ dev_err(dev, "Failed to request alarm irq: %d\n", ret);
return ret;
}
- }
- hisi_thermal_toggle_sensor(&data->sensor, true);
+ ret = data->ops->enable_sensor(sensor);
+ if (ret) {
+ dev_err(dev, "Failed to setup the sensor: %d\n", ret);
+ return ret;
+ }
+
+ hisi_thermal_toggle_sensor(sensor, true);
+ }
return 0;
}
static int hisi_thermal_remove(struct platform_device *pdev)
{
struct hisi_thermal_data *data = platform_get_drvdata(pdev);
- struct hisi_thermal_sensor *sensor = &data->sensor;
+ int i;
- hisi_thermal_toggle_sensor(sensor, false);
+ for (i = 0; i < data->nr_sensors; i++) {
+ struct hisi_thermal_sensor *sensor = &data->sensor[i];
- data->disable_sensor(data);
+ hisi_thermal_toggle_sensor(sensor, false);
+ data->ops->disable_sensor(sensor);
+ }
return 0;
}
static int hisi_thermal_suspend(struct device *dev)
{
struct hisi_thermal_data *data = dev_get_drvdata(dev);
+ int i;
- data->disable_sensor(data);
+ for (i = 0; i < data->nr_sensors; i++)
+ data->ops->disable_sensor(&data->sensor[i]);
return 0;
}
static int hisi_thermal_resume(struct device *dev)
{
struct hisi_thermal_data *data = dev_get_drvdata(dev);
+ int i, ret = 0;
+
+ for (i = 0; i < data->nr_sensors; i++)
+ ret |= data->ops->enable_sensor(&data->sensor[i]);
- return data->enable_sensor(data);
+ return ret;
}
#endif
} else {
ret = imx_init_from_tempmon_data(pdev);
if (ret) {
- dev_err(&pdev->dev, "failed to init from from fsl,tempmon-data\n");
+ dev_err(&pdev->dev, "failed to init from fsl,tempmon-data\n");
return ret;
}
}
if (ret != -EPROBE_DEFER)
dev_err(&pdev->dev,
"failed to get thermal clk: %d\n", ret);
- cpufreq_cooling_unregister(data->cdev);
- cpufreq_cpu_put(data->policy);
- return ret;
+ goto cpufreq_put;
}
/*
ret = clk_prepare_enable(data->thermal_clk);
if (ret) {
dev_err(&pdev->dev, "failed to enable thermal clk: %d\n", ret);
- cpufreq_cooling_unregister(data->cdev);
- cpufreq_cpu_put(data->policy);
- return ret;
+ goto cpufreq_put;
}
data->tz = thermal_zone_device_register("imx_thermal_zone",
ret = PTR_ERR(data->tz);
dev_err(&pdev->dev,
"failed to register thermal zone device %d\n", ret);
- clk_disable_unprepare(data->thermal_clk);
- cpufreq_cooling_unregister(data->cdev);
- cpufreq_cpu_put(data->policy);
- return ret;
+ goto clk_disable;
}
dev_info(&pdev->dev, "%s CPU temperature grade - max:%dC"
0, "imx_thermal", data);
if (ret < 0) {
dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret);
- clk_disable_unprepare(data->thermal_clk);
- thermal_zone_device_unregister(data->tz);
- cpufreq_cooling_unregister(data->cdev);
- cpufreq_cpu_put(data->policy);
- return ret;
+ goto thermal_zone_unregister;
}
return 0;
+
+thermal_zone_unregister:
+ thermal_zone_device_unregister(data->tz);
+clk_disable:
+ clk_disable_unprepare(data->thermal_clk);
+cpufreq_put:
+ cpufreq_cooling_unregister(data->cdev);
+ cpufreq_cpu_put(data->policy);
+
+ return ret;
}
static int imx_thermal_remove(struct platform_device *pdev)
/*** Private data structures to represent thermal device tree data ***/
/**
- * struct __thermal_bind_param - a match between trip and cooling device
+ * struct __thermal_cooling_bind_param - a cooling device for a trip point
* @cooling_device: a pointer to identify the referred cooling device
- * @trip_id: the trip point index
- * @usage: the percentage (from 0 to 100) of cooling contribution
* @min: minimum cooling state used at this trip point
* @max: maximum cooling state used at this trip point
*/
-struct __thermal_bind_params {
+struct __thermal_cooling_bind_param {
struct device_node *cooling_device;
- unsigned int trip_id;
- unsigned int usage;
unsigned long min;
unsigned long max;
};
+/**
+ * struct __thermal_bind_param - a match between trip and cooling device
+ * @tcbp: a pointer to an array of cooling devices
+ * @count: number of elements in array
+ * @trip_id: the trip point index
+ * @usage: the percentage (from 0 to 100) of cooling contribution
+ */
+
+struct __thermal_bind_params {
+ struct __thermal_cooling_bind_param *tcbp;
+ unsigned int count;
+ unsigned int trip_id;
+ unsigned int usage;
+};
+
/**
* struct __thermal_zone - internal representation of a thermal zone
* @mode: current thermal zone device mode (enabled/disabled)
struct thermal_cooling_device *cdev)
{
struct __thermal_zone *data = thermal->devdata;
- int i;
+ struct __thermal_bind_params *tbp;
+ struct __thermal_cooling_bind_param *tcbp;
+ int i, j;
if (!data || IS_ERR(data))
return -ENODEV;
/* find where to bind */
for (i = 0; i < data->num_tbps; i++) {
- struct __thermal_bind_params *tbp = data->tbps + i;
+ tbp = data->tbps + i;
- if (tbp->cooling_device == cdev->np) {
- int ret;
+ for (j = 0; j < tbp->count; j++) {
+ tcbp = tbp->tcbp + j;
- ret = thermal_zone_bind_cooling_device(thermal,
+ if (tcbp->cooling_device == cdev->np) {
+ int ret;
+
+ ret = thermal_zone_bind_cooling_device(thermal,
tbp->trip_id, cdev,
- tbp->max,
- tbp->min,
+ tcbp->max,
+ tcbp->min,
tbp->usage);
- if (ret)
- return ret;
+ if (ret)
+ return ret;
+ }
}
}
struct thermal_cooling_device *cdev)
{
struct __thermal_zone *data = thermal->devdata;
- int i;
+ struct __thermal_bind_params *tbp;
+ struct __thermal_cooling_bind_param *tcbp;
+ int i, j;
if (!data || IS_ERR(data))
return -ENODEV;
/* find where to unbind */
for (i = 0; i < data->num_tbps; i++) {
- struct __thermal_bind_params *tbp = data->tbps + i;
+ tbp = data->tbps + i;
+
+ for (j = 0; j < tbp->count; j++) {
+ tcbp = tbp->tcbp + j;
- if (tbp->cooling_device == cdev->np) {
- int ret;
+ if (tcbp->cooling_device == cdev->np) {
+ int ret;
- ret = thermal_zone_unbind_cooling_device(thermal,
- tbp->trip_id, cdev);
- if (ret)
- return ret;
+ ret = thermal_zone_unbind_cooling_device(thermal,
+ tbp->trip_id, cdev);
+ if (ret)
+ return ret;
+ }
}
}
if (sensor_specs.args_count >= 1) {
id = sensor_specs.args[0];
WARN(sensor_specs.args_count > 1,
- "%s: too many cells in sensor specifier %d\n",
- sensor_specs.np->name, sensor_specs.args_count);
+ "%pOFn: too many cells in sensor specifier %d\n",
+ sensor_specs.np, sensor_specs.args_count);
} else {
id = 0;
}
int ntrips)
{
struct of_phandle_args cooling_spec;
+ struct __thermal_cooling_bind_param *__tcbp;
struct device_node *trip;
- int ret, i;
+ int ret, i, count;
u32 prop;
/* Default weight. Usage is optional */
goto end;
}
- ret = of_parse_phandle_with_args(np, "cooling-device", "#cooling-cells",
- 0, &cooling_spec);
- if (ret < 0) {
- pr_err("missing cooling_device property\n");
+ count = of_count_phandle_with_args(np, "cooling-device",
+ "#cooling-cells");
+ if (!count) {
+ pr_err("Add a cooling_device property with at least one device\n");
goto end;
}
- __tbp->cooling_device = cooling_spec.np;
- if (cooling_spec.args_count >= 2) { /* at least min and max */
- __tbp->min = cooling_spec.args[0];
- __tbp->max = cooling_spec.args[1];
- } else {
- pr_err("wrong reference to cooling device, missing limits\n");
+
+ __tcbp = kcalloc(count, sizeof(*__tcbp), GFP_KERNEL);
+ if (!__tcbp)
+ goto end;
+
+ for (i = 0; i < count; i++) {
+ ret = of_parse_phandle_with_args(np, "cooling-device",
+ "#cooling-cells", i, &cooling_spec);
+ if (ret < 0) {
+ pr_err("Invalid cooling-device entry\n");
+ goto free_tcbp;
+ }
+
+ __tcbp[i].cooling_device = cooling_spec.np;
+
+ if (cooling_spec.args_count >= 2) { /* at least min and max */
+ __tcbp[i].min = cooling_spec.args[0];
+ __tcbp[i].max = cooling_spec.args[1];
+ } else {
+ pr_err("wrong reference to cooling device, missing limits\n");
+ }
}
+ __tbp->tcbp = __tcbp;
+ __tbp->count = count;
+
+ goto end;
+
+free_tcbp:
+ for (i = i - 1; i >= 0; i--)
+ of_node_put(__tcbp[i].cooling_device);
+ kfree(__tcbp);
end:
of_node_put(trip);
return tz;
free_tbps:
- for (i = i - 1; i >= 0; i--)
- of_node_put(tz->tbps[i].cooling_device);
+ for (i = i - 1; i >= 0; i--) {
+ struct __thermal_bind_params *tbp = tz->tbps + i;
+ int j;
+
+ for (j = 0; j < tbp->count; j++)
+ of_node_put(tbp->tcbp[j].cooling_device);
+
+ kfree(tbp->tcbp);
+ }
+
kfree(tz->tbps);
free_trips:
for (i = 0; i < tz->ntrips; i++)
static inline void of_thermal_free_zone(struct __thermal_zone *tz)
{
- int i;
+ struct __thermal_bind_params *tbp;
+ int i, j;
+
+ for (i = 0; i < tz->num_tbps; i++) {
+ tbp = tz->tbps + i;
+
+ for (j = 0; j < tbp->count; j++)
+ of_node_put(tbp->tcbp[j].cooling_device);
+
+ kfree(tbp->tcbp);
+ }
- for (i = 0; i < tz->num_tbps; i++)
- of_node_put(tz->tbps[i].cooling_device);
kfree(tz->tbps);
for (i = 0; i < tz->ntrips; i++)
of_node_put(tz->trips[i].np);
tz = thermal_of_build_thermal_zone(child);
if (IS_ERR(tz)) {
- pr_err("failed to build thermal zone %s: %ld\n",
- child->name,
+ pr_err("failed to build thermal zone %pOFn: %ld\n",
+ child,
PTR_ERR(tz));
continue;
}
tz->passive_delay,
tz->polling_delay);
if (IS_ERR(zone)) {
- pr_err("Failed to build %s zone %ld\n", child->name,
+ pr_err("Failed to build %pOFn zone %ld\n", child,
PTR_ERR(zone));
kfree(tzp);
kfree(ops);
#include <linux/regmap.h>
#include <linux/thermal.h>
+#include "thermal_core.h"
+
#define QPNP_TM_REG_TYPE 0x04
#define QPNP_TM_REG_SUBTYPE 0x05
#define QPNP_TM_REG_STATUS 0x08
#define STATUS_GEN2_STATE_MASK GENMASK(6, 4)
#define STATUS_GEN2_STATE_SHIFT 4
-#define SHUTDOWN_CTRL1_OVERRIDE_MASK GENMASK(7, 6)
+#define SHUTDOWN_CTRL1_OVERRIDE_S2 BIT(6)
#define SHUTDOWN_CTRL1_THRESHOLD_MASK GENMASK(1, 0)
+#define SHUTDOWN_CTRL1_RATE_25HZ BIT(3)
+
#define ALARM_CTRL_FORCE_ENABLE BIT(7)
/*
#define TEMP_THRESH_STEP 5000 /* Threshold step: 5 C */
#define THRESH_MIN 0
+#define THRESH_MAX 3
+
+/* Stage 2 Threshold Min: 125 C */
+#define STAGE2_THRESHOLD_MIN 125000
+/* Stage 2 Threshold Max: 140 C */
+#define STAGE2_THRESHOLD_MAX 140000
/* Temperature in Milli Celsius reported during stage 0 if no ADC is present */
#define DEFAULT_TEMP 37000
struct qpnp_tm_chip {
struct regmap *map;
+ struct device *dev;
struct thermal_zone_device *tz_dev;
unsigned int subtype;
long temp;
unsigned int stage;
unsigned int prev_stage;
unsigned int base;
+ /* protects .thresh, .stage and chip registers */
+ struct mutex lock;
+ bool initialized;
+
struct iio_channel *adc;
};
unsigned int stage, stage_new, stage_old;
int ret;
+ WARN_ON(!mutex_is_locked(&chip->lock));
+
ret = qpnp_tm_get_temp_stage(chip);
if (ret < 0)
return ret;
if (!temp)
return -EINVAL;
+ if (!chip->initialized) {
+ *temp = DEFAULT_TEMP;
+ return 0;
+ }
+
if (!chip->adc) {
+ mutex_lock(&chip->lock);
ret = qpnp_tm_update_temp_no_adc(chip);
+ mutex_unlock(&chip->lock);
if (ret < 0)
return ret;
} else {
return 0;
}
+static int qpnp_tm_update_critical_trip_temp(struct qpnp_tm_chip *chip,
+ int temp)
+{
+ u8 reg;
+ bool disable_s2_shutdown = false;
+
+ WARN_ON(!mutex_is_locked(&chip->lock));
+
+ /*
+ * Default: S2 and S3 shutdown enabled, thresholds at
+ * 105C/125C/145C, monitoring at 25Hz
+ */
+ reg = SHUTDOWN_CTRL1_RATE_25HZ;
+
+ if (temp == THERMAL_TEMP_INVALID ||
+ temp < STAGE2_THRESHOLD_MIN) {
+ chip->thresh = THRESH_MIN;
+ goto skip;
+ }
+
+ if (temp <= STAGE2_THRESHOLD_MAX) {
+ chip->thresh = THRESH_MAX -
+ ((STAGE2_THRESHOLD_MAX - temp) /
+ TEMP_THRESH_STEP);
+ disable_s2_shutdown = true;
+ } else {
+ chip->thresh = THRESH_MAX;
+
+ if (chip->adc)
+ disable_s2_shutdown = true;
+ else
+ dev_warn(chip->dev,
+ "No ADC is configured and critical temperature is above the maximum stage 2 threshold of 140 C! Configuring stage 2 shutdown at 140 C.\n");
+ }
+
+skip:
+ reg |= chip->thresh;
+ if (disable_s2_shutdown)
+ reg |= SHUTDOWN_CTRL1_OVERRIDE_S2;
+
+ return qpnp_tm_write(chip, QPNP_TM_REG_SHUTDOWN_CTRL1, reg);
+}
+
+static int qpnp_tm_set_trip_temp(void *data, int trip, int temp)
+{
+ struct qpnp_tm_chip *chip = data;
+ const struct thermal_trip *trip_points;
+ int ret;
+
+ trip_points = of_thermal_get_trip_points(chip->tz_dev);
+ if (!trip_points)
+ return -EINVAL;
+
+ if (trip_points[trip].type != THERMAL_TRIP_CRITICAL)
+ return 0;
+
+ mutex_lock(&chip->lock);
+ ret = qpnp_tm_update_critical_trip_temp(chip, temp);
+ mutex_unlock(&chip->lock);
+
+ return ret;
+}
+
static const struct thermal_zone_of_device_ops qpnp_tm_sensor_ops = {
.get_temp = qpnp_tm_get_temp,
+ .set_trip_temp = qpnp_tm_set_trip_temp,
};
static irqreturn_t qpnp_tm_isr(int irq, void *data)
return IRQ_HANDLED;
}
+static int qpnp_tm_get_critical_trip_temp(struct qpnp_tm_chip *chip)
+{
+ int ntrips;
+ const struct thermal_trip *trips;
+ int i;
+
+ ntrips = of_thermal_get_ntrips(chip->tz_dev);
+ if (ntrips <= 0)
+ return THERMAL_TEMP_INVALID;
+
+ trips = of_thermal_get_trip_points(chip->tz_dev);
+ if (!trips)
+ return THERMAL_TEMP_INVALID;
+
+ for (i = 0; i < ntrips; i++) {
+ if (of_thermal_is_trip_valid(chip->tz_dev, i) &&
+ trips[i].type == THERMAL_TRIP_CRITICAL)
+ return trips[i].temperature;
+ }
+
+ return THERMAL_TEMP_INVALID;
+}
+
/*
* This function initializes the internal temp value based on only the
* current thermal stage and threshold. Setup threshold control and
unsigned int stage;
int ret;
u8 reg = 0;
+ int crit_temp;
+
+ mutex_lock(&chip->lock);
ret = qpnp_tm_read(chip, QPNP_TM_REG_SHUTDOWN_CTRL1, ®);
if (ret < 0)
- return ret;
+ goto out;
chip->thresh = reg & SHUTDOWN_CTRL1_THRESHOLD_MASK;
chip->temp = DEFAULT_TEMP;
ret = qpnp_tm_get_temp_stage(chip);
if (ret < 0)
- return ret;
+ goto out;
chip->stage = ret;
stage = chip->subtype == QPNP_TM_SUBTYPE_GEN1
(stage - 1) * TEMP_STAGE_STEP +
TEMP_THRESH_MIN;
- /*
- * Set threshold and disable software override of stage 2 and 3
- * shutdowns.
- */
- chip->thresh = THRESH_MIN;
- reg &= ~(SHUTDOWN_CTRL1_OVERRIDE_MASK | SHUTDOWN_CTRL1_THRESHOLD_MASK);
- reg |= chip->thresh & SHUTDOWN_CTRL1_THRESHOLD_MASK;
- ret = qpnp_tm_write(chip, QPNP_TM_REG_SHUTDOWN_CTRL1, reg);
+ crit_temp = qpnp_tm_get_critical_trip_temp(chip);
+ ret = qpnp_tm_update_critical_trip_temp(chip, crit_temp);
if (ret < 0)
- return ret;
+ goto out;
/* Enable the thermal alarm PMIC module in always-on mode. */
reg = ALARM_CTRL_FORCE_ENABLE;
ret = qpnp_tm_write(chip, QPNP_TM_REG_ALARM_CTRL, reg);
+ chip->initialized = true;
+
+out:
+ mutex_unlock(&chip->lock);
return ret;
}
return -ENOMEM;
dev_set_drvdata(&pdev->dev, chip);
+ chip->dev = &pdev->dev;
+
+ mutex_init(&chip->lock);
chip->map = dev_get_regmap(pdev->dev.parent, NULL);
if (!chip->map)
chip->subtype = subtype;
+ /*
+ * Register the sensor before initializing the hardware to be able to
+ * read the trip points. get_temp() returns the default temperature
+ * before the hardware initialization is completed.
+ */
+ chip->tz_dev = devm_thermal_zone_of_sensor_register(
+ &pdev->dev, 0, chip, &qpnp_tm_sensor_ops);
+ if (IS_ERR(chip->tz_dev)) {
+ dev_err(&pdev->dev, "failed to register sensor\n");
+ return PTR_ERR(chip->tz_dev);
+ }
+
ret = qpnp_tm_init(chip);
if (ret < 0) {
dev_err(&pdev->dev, "init failed\n");
if (ret < 0)
return ret;
- chip->tz_dev = devm_thermal_zone_of_sensor_register(&pdev->dev, 0, chip,
- &qpnp_tm_sensor_ops);
- if (IS_ERR(chip->tz_dev)) {
- dev_err(&pdev->dev, "failed to register sensor\n");
- return PTR_ERR(chip->tz_dev);
- }
+ thermal_zone_device_update(chip->tz_dev, THERMAL_EVENT_UNSPECIFIED);
return 0;
}
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2015, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
*/
#include <linux/platform_device.h>
const struct tsens_data data_8916 = {
.num_sensors = 5,
.ops = &ops_8916,
+ .reg_offsets = { [SROT_CTRL_OFFSET] = 0x0 },
.hw_ids = (unsigned int []){0, 1, 2, 4, 5 },
};
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2015, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
*/
#include <linux/platform_device.h>
{
int ret;
unsigned int mask;
- struct regmap *map = tmdev->map;
+ struct regmap *map = tmdev->tm_map;
ret = regmap_read(map, THRESHOLD_ADDR, &tmdev->ctx.threshold);
if (ret)
static int resume_8960(struct tsens_device *tmdev)
{
int ret;
- struct regmap *map = tmdev->map;
+ struct regmap *map = tmdev->tm_map;
ret = regmap_update_bits(map, CNTL_ADDR, SW_RST, SW_RST);
if (ret)
int ret;
u32 reg, mask;
- ret = regmap_read(tmdev->map, CNTL_ADDR, ®);
+ ret = regmap_read(tmdev->tm_map, CNTL_ADDR, ®);
if (ret)
return ret;
mask = BIT(id + SENSOR0_SHIFT);
- ret = regmap_write(tmdev->map, CNTL_ADDR, reg | SW_RST);
+ ret = regmap_write(tmdev->tm_map, CNTL_ADDR, reg | SW_RST);
if (ret)
return ret;
else
reg |= mask | SLP_CLK_ENA_8660 | EN;
- ret = regmap_write(tmdev->map, CNTL_ADDR, reg);
+ ret = regmap_write(tmdev->tm_map, CNTL_ADDR, reg);
if (ret)
return ret;
mask <<= SENSOR0_SHIFT;
mask |= EN;
- ret = regmap_read(tmdev->map, CNTL_ADDR, ®_cntl);
+ ret = regmap_read(tmdev->tm_map, CNTL_ADDR, ®_cntl);
if (ret)
return;
else
reg_cntl &= ~SLP_CLK_ENA_8660;
- regmap_write(tmdev->map, CNTL_ADDR, reg_cntl);
+ regmap_write(tmdev->tm_map, CNTL_ADDR, reg_cntl);
}
static int init_8960(struct tsens_device *tmdev)
int ret, i;
u32 reg_cntl;
- tmdev->map = dev_get_regmap(tmdev->dev, NULL);
- if (!tmdev->map)
+ tmdev->tm_map = dev_get_regmap(tmdev->dev, NULL);
+ if (!tmdev->tm_map)
return -ENODEV;
/*
}
reg_cntl = SW_RST;
- ret = regmap_update_bits(tmdev->map, CNTL_ADDR, SW_RST, reg_cntl);
+ ret = regmap_update_bits(tmdev->tm_map, CNTL_ADDR, SW_RST, reg_cntl);
if (ret)
return ret;
if (tmdev->num_sensors > 1) {
reg_cntl |= SLP_CLK_ENA | (MEASURE_PERIOD << 18);
reg_cntl &= ~SW_RST;
- ret = regmap_update_bits(tmdev->map, CONFIG_ADDR,
+ ret = regmap_update_bits(tmdev->tm_map, CONFIG_ADDR,
CONFIG_MASK, CONFIG);
} else {
reg_cntl |= SLP_CLK_ENA_8660 | (MEASURE_PERIOD << 16);
}
reg_cntl |= GENMASK(tmdev->num_sensors - 1, 0) << SENSOR0_SHIFT;
- ret = regmap_write(tmdev->map, CNTL_ADDR, reg_cntl);
+ ret = regmap_write(tmdev->tm_map, CNTL_ADDR, reg_cntl);
if (ret)
return ret;
reg_cntl |= EN;
- ret = regmap_write(tmdev->map, CNTL_ADDR, reg_cntl);
+ ret = regmap_write(tmdev->tm_map, CNTL_ADDR, reg_cntl);
if (ret)
return ret;
timeout = jiffies + usecs_to_jiffies(TIMEOUT_US);
do {
- ret = regmap_read(tmdev->map, INT_STATUS_ADDR, &trdy);
+ ret = regmap_read(tmdev->tm_map, INT_STATUS_ADDR, &trdy);
if (ret)
return ret;
if (!(trdy & TRDY_MASK))
continue;
- ret = regmap_read(tmdev->map, s->status, &code);
+ ret = regmap_read(tmdev->tm_map, s->status, &code);
if (ret)
return ret;
*temp = code_to_mdegC(code, s);
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2015, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
*/
#include <linux/platform_device.h>
const struct tsens_data data_8974 = {
.num_sensors = 11,
.ops = &ops_8974,
+ .reg_offsets = { [SROT_CTRL_OFFSET] = 0x0 },
};
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2015, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
*/
#include <linux/err.h>
#include <linux/regmap.h>
#include "tsens.h"
-#define S0_ST_ADDR 0x1030
+/* SROT */
+#define TSENS_EN BIT(0)
+
+/* TM */
+#define STATUS_OFFSET 0x30
#define SN_ADDR_OFFSET 0x4
#define SN_ST_TEMP_MASK 0x3ff
#define CAL_DEGC_PT1 30
unsigned int status_reg;
int last_temp = 0, ret;
- status_reg = S0_ST_ADDR + s->hw_id * SN_ADDR_OFFSET;
- ret = regmap_read(tmdev->map, status_reg, &code);
+ status_reg = tmdev->tm_offset + STATUS_OFFSET + s->hw_id * SN_ADDR_OFFSET;
+ ret = regmap_read(tmdev->tm_map, status_reg, &code);
if (ret)
return ret;
last_temp = code & SN_ST_TEMP_MASK;
int __init init_common(struct tsens_device *tmdev)
{
- void __iomem *base;
+ void __iomem *tm_base, *srot_base;
struct resource *res;
+ u32 code;
+ int ret;
struct platform_device *op = of_find_device_by_node(tmdev->dev->of_node);
+ u16 ctrl_offset = tmdev->reg_offsets[SROT_CTRL_OFFSET];
if (!op)
return -EINVAL;
- /* The driver only uses the TM register address space for now */
if (op->num_resources > 1) {
+ /* DT with separate SROT and TM address space */
tmdev->tm_offset = 0;
+ res = platform_get_resource(op, IORESOURCE_MEM, 1);
+ srot_base = devm_ioremap_resource(&op->dev, res);
+ if (IS_ERR(srot_base))
+ return PTR_ERR(srot_base);
+
+ tmdev->srot_map = devm_regmap_init_mmio(tmdev->dev,
+ srot_base, &tsens_config);
+ if (IS_ERR(tmdev->srot_map))
+ return PTR_ERR(tmdev->srot_map);
+
} else {
/* old DTs where SROT and TM were in a contiguous 2K block */
tmdev->tm_offset = 0x1000;
}
res = platform_get_resource(op, IORESOURCE_MEM, 0);
- base = devm_ioremap_resource(&op->dev, res);
- if (IS_ERR(base))
- return PTR_ERR(base);
-
- tmdev->map = devm_regmap_init_mmio(tmdev->dev, base, &tsens_config);
- if (IS_ERR(tmdev->map))
- return PTR_ERR(tmdev->map);
+ tm_base = devm_ioremap_resource(&op->dev, res);
+ if (IS_ERR(tm_base))
+ return PTR_ERR(tm_base);
+
+ tmdev->tm_map = devm_regmap_init_mmio(tmdev->dev, tm_base, &tsens_config);
+ if (IS_ERR(tmdev->tm_map))
+ return PTR_ERR(tmdev->tm_map);
+
+ if (tmdev->srot_map) {
+ ret = regmap_read(tmdev->srot_map, ctrl_offset, &code);
+ if (ret)
+ return ret;
+ if (!(code & TSENS_EN)) {
+ dev_err(tmdev->dev, "tsens device is not enabled\n");
+ return -ENODEV;
+ }
+ }
return 0;
}
int ret;
status_reg = tmdev->tm_offset + STATUS_OFFSET + s->hw_id * 4;
- ret = regmap_read(tmdev->map, status_reg, &code);
+ ret = regmap_read(tmdev->tm_map, status_reg, &code);
if (ret)
return ret;
last_temp = code & LAST_TEMP_MASK;
goto done;
/* Try a second time */
- ret = regmap_read(tmdev->map, status_reg, &code);
+ ret = regmap_read(tmdev->tm_map, status_reg, &code);
if (ret)
return ret;
if (code & STATUS_VALID_BIT) {
}
/* Try a third/last time */
- ret = regmap_read(tmdev->map, status_reg, &code);
+ ret = regmap_read(tmdev->tm_map, status_reg, &code);
if (ret)
return ret;
if (code & STATUS_VALID_BIT) {
const struct tsens_data data_tsens_v2 = {
.ops = &ops_generic_v2,
+ .reg_offsets = { [SROT_CTRL_OFFSET] = 0x4 },
};
/* Kept around for backward compatibility with old msm8996.dtsi */
const struct tsens_data data_8996 = {
.num_sensors = 13,
.ops = &ops_generic_v2,
+ .reg_offsets = { [SROT_CTRL_OFFSET] = 0x4 },
};
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2015, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
*/
#include <linux/err.h>
{
int i;
struct thermal_zone_device *tzd;
- u32 *hw_id, n = tmdev->num_sensors;
-
- hw_id = devm_kcalloc(tmdev->dev, n, sizeof(u32), GFP_KERNEL);
- if (!hw_id)
- return -ENOMEM;
for (i = 0; i < tmdev->num_sensors; i++) {
tmdev->sensor[i].tmdev = tmdev;
else
tmdev->sensor[i].hw_id = i;
}
+ for (i = 0; i < REG_ARRAY_SIZE; i++) {
+ tmdev->reg_offsets[i] = data->reg_offsets[i];
+ }
if (!tmdev->ops || !tmdev->ops->init || !tmdev->ops->get_temp)
return -EINVAL;
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2015, The Linux Foundation. All rights reserved.
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
*/
+
#ifndef __QCOM_TSENS_H__
#define __QCOM_TSENS_H__
int (*get_trend)(struct tsens_device *, int, enum thermal_trend *);
};
+enum reg_list {
+ SROT_CTRL_OFFSET,
+
+ REG_ARRAY_SIZE,
+};
+
/**
* struct tsens_data - tsens instance specific data
* @num_sensors: Max number of sensors supported by platform
* @ops: operations the tsens instance supports
* @hw_ids: Subset of sensors ids supported by platform, if not the first n
+ * @reg_offsets: Register offsets for commonly used registers
*/
struct tsens_data {
const u32 num_sensors;
const struct tsens_ops *ops;
+ const u16 reg_offsets[REG_ARRAY_SIZE];
unsigned int *hw_ids;
};
struct tsens_device {
struct device *dev;
u32 num_sensors;
- struct regmap *map;
+ struct regmap *tm_map;
+ struct regmap *srot_map;
u32 tm_offset;
+ u16 reg_offsets[REG_ARRAY_SIZE];
struct tsens_context ctx;
const struct tsens_ops *ops;
struct tsens_sensor sensor[0];
if (sensor_specs.args_count >= 1) {
id = sensor_specs.args[0];
WARN(sensor_specs.args_count > 1,
- "%s: too many cells in sensor specifier %d\n",
- sensor_specs.np->name, sensor_specs.args_count);
+ "%pOFn: too many cells in sensor specifier %d\n",
+ sensor_specs.np, sensor_specs.args_count);
} else {
id = 0;
}
static const struct of_device_id qoriq_tmu_match[] = {
{ .compatible = "fsl,qoriq-tmu", },
+ { .compatible = "fsl,imx8mq-tmu", },
{},
};
MODULE_DEVICE_TABLE(of, qoriq_tmu_match);
}
static const struct of_device_id rcar_gen3_thermal_dt_ids[] = {
+ { .compatible = "renesas,r8a774a1-thermal", },
{ .compatible = "renesas,r8a7795-thermal", },
{ .compatible = "renesas,r8a7796-thermal", },
{ .compatible = "renesas,r8a77965-thermal", },
+ { .compatible = "renesas,r8a77980-thermal", },
{},
};
MODULE_DEVICE_TABLE(of, rcar_gen3_thermal_dt_ids);
.compatible = "renesas,rcar-gen2-thermal",
.data = &rcar_gen2_thermal,
},
+ {
+ .compatible = "renesas,thermal-r8a77970",
+ .data = &rcar_gen3_thermal,
+ },
{
.compatible = "renesas,thermal-r8a77995",
.data = &rcar_gen3_thermal,
rcar_thermal_for_each_priv(priv, common) {
if (rcar_thermal_had_changed(priv, status)) {
rcar_thermal_irq_disable(priv);
- schedule_delayed_work(&priv->work,
- msecs_to_jiffies(300));
+ queue_delayed_work(system_freezable_wq, &priv->work,
+ msecs_to_jiffies(300));
}
}
rcar_thermal_for_each_priv(priv, common) {
rcar_thermal_irq_disable(priv);
+ cancel_delayed_work_sync(&priv->work);
if (priv->chip->use_of_thermal)
thermal_remove_hwmon_sysfs(priv->zone);
else
pm_runtime_get_sync(dev);
for (i = 0; i < chip->nirqs; i++) {
- irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
+ irq = platform_get_resource(pdev, IORESOURCE_IRQ, i);
if (!irq)
continue;
if (!common->base) {
+#
+# STMicroelectronics thermal drivers configuration
+#
+
config ST_THERMAL
tristate "Thermal sensors on STMicroelectronics STi series of SoCs"
help
config ST_THERMAL_MEMMAP
select ST_THERMAL
tristate "STi series memory mapped access based thermal sensors"
+
+config STM32_THERMAL
+ tristate "Thermal framework support on STMicroelectronics STM32 series of SoCs"
+ depends on MACH_STM32MP157
+ default y
+ help
+ Support for thermal framework on STMicroelectronics STM32 series of
+ SoCs. This thermal driver allows to access to general thermal framework
+ functionalities and to acces to SoC sensor functionalities. This
+ configuration is fully dependent of MACH_STM32MP157.
obj-$(CONFIG_ST_THERMAL) := st_thermal.o
obj-$(CONFIG_ST_THERMAL_SYSCFG) += st_thermal_syscfg.o
obj-$(CONFIG_ST_THERMAL_MEMMAP) += st_thermal_memmap.o
+obj-$(CONFIG_STM32_THERMAL) := stm_thermal.o
\ No newline at end of file
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) STMicroelectronics 2018 - All Rights Reserved
+ * Author: David Hernandez Sanchez <david.hernandezsanchez@st.com> for
+ * STMicroelectronics.
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/thermal.h>
+
+#include "../thermal_core.h"
+#include "../thermal_hwmon.h"
+
+/* DTS register offsets */
+#define DTS_CFGR1_OFFSET 0x0
+#define DTS_T0VALR1_OFFSET 0x8
+#define DTS_RAMPVALR_OFFSET 0X10
+#define DTS_ITR1_OFFSET 0x14
+#define DTS_DR_OFFSET 0x1C
+#define DTS_SR_OFFSET 0x20
+#define DTS_ITENR_OFFSET 0x24
+#define DTS_CIFR_OFFSET 0x28
+
+/* DTS_CFGR1 register mask definitions */
+#define HSREF_CLK_DIV_MASK GENMASK(30, 24)
+#define TS1_SMP_TIME_MASK GENMASK(19, 16)
+#define TS1_INTRIG_SEL_MASK GENMASK(11, 8)
+
+/* DTS_T0VALR1 register mask definitions */
+#define TS1_T0_MASK GENMASK(17, 16)
+#define TS1_FMT0_MASK GENMASK(15, 0)
+
+/* DTS_RAMPVALR register mask definitions */
+#define TS1_RAMP_COEFF_MASK GENMASK(15, 0)
+
+/* DTS_ITR1 register mask definitions */
+#define TS1_HITTHD_MASK GENMASK(31, 16)
+#define TS1_LITTHD_MASK GENMASK(15, 0)
+
+/* DTS_DR register mask definitions */
+#define TS1_MFREQ_MASK GENMASK(15, 0)
+
+/* Less significant bit position definitions */
+#define TS1_T0_POS 16
+#define TS1_SMP_TIME_POS 16
+#define TS1_HITTHD_POS 16
+#define HSREF_CLK_DIV_POS 24
+
+/* DTS_CFGR1 bit definitions */
+#define TS1_EN BIT(0)
+#define TS1_START BIT(4)
+#define REFCLK_SEL BIT(20)
+#define REFCLK_LSE REFCLK_SEL
+#define Q_MEAS_OPT BIT(21)
+#define CALIBRATION_CONTROL Q_MEAS_OPT
+
+/* DTS_SR bit definitions */
+#define TS_RDY BIT(15)
+/* Bit definitions below are common for DTS_SR, DTS_ITENR and DTS_CIFR */
+#define HIGH_THRESHOLD BIT(2)
+#define LOW_THRESHOLD BIT(1)
+
+/* Constants */
+#define ADJUST 100
+#define ONE_MHZ 1000000
+#define POLL_TIMEOUT 5000
+#define STARTUP_TIME 40
+#define TS1_T0_VAL0 30
+#define TS1_T0_VAL1 130
+#define NO_HW_TRIG 0
+
+/* The Thermal Framework expects millidegrees */
+#define mcelsius(temp) ((temp) * 1000)
+
+/* The Sensor expects oC degrees */
+#define celsius(temp) ((temp) / 1000)
+
+struct stm_thermal_sensor {
+ struct device *dev;
+ struct thermal_zone_device *th_dev;
+ enum thermal_device_mode mode;
+ struct clk *clk;
+ int high_temp;
+ int low_temp;
+ int temp_critical;
+ int temp_passive;
+ unsigned int low_temp_enabled;
+ int num_trips;
+ int irq;
+ unsigned int irq_enabled;
+ void __iomem *base;
+ int t0, fmt0, ramp_coeff;
+};
+
+static irqreturn_t stm_thermal_alarm_irq(int irq, void *sdata)
+{
+ struct stm_thermal_sensor *sensor = sdata;
+
+ disable_irq_nosync(irq);
+ sensor->irq_enabled = false;
+
+ return IRQ_WAKE_THREAD;
+}
+
+static irqreturn_t stm_thermal_alarm_irq_thread(int irq, void *sdata)
+{
+ u32 value;
+ struct stm_thermal_sensor *sensor = sdata;
+
+ /* read IT reason in SR and clear flags */
+ value = readl_relaxed(sensor->base + DTS_SR_OFFSET);
+
+ if ((value & LOW_THRESHOLD) == LOW_THRESHOLD)
+ writel_relaxed(LOW_THRESHOLD, sensor->base + DTS_CIFR_OFFSET);
+
+ if ((value & HIGH_THRESHOLD) == HIGH_THRESHOLD)
+ writel_relaxed(HIGH_THRESHOLD, sensor->base + DTS_CIFR_OFFSET);
+
+ thermal_zone_device_update(sensor->th_dev, THERMAL_EVENT_UNSPECIFIED);
+
+ return IRQ_HANDLED;
+}
+
+static int stm_sensor_power_on(struct stm_thermal_sensor *sensor)
+{
+ int ret;
+ u32 value;
+
+ /* Enable sensor */
+ value = readl_relaxed(sensor->base + DTS_CFGR1_OFFSET);
+ value |= TS1_EN;
+ writel_relaxed(value, sensor->base + DTS_CFGR1_OFFSET);
+
+ /*
+ * The DTS block can be enabled by setting TSx_EN bit in
+ * DTS_CFGRx register. It requires a startup time of
+ * 40μs. Use 5 ms as arbitrary timeout.
+ */
+ ret = readl_poll_timeout(sensor->base + DTS_SR_OFFSET,
+ value, (value & TS_RDY),
+ STARTUP_TIME, POLL_TIMEOUT);
+ if (ret)
+ return ret;
+
+ /* Start continuous measuring */
+ value = readl_relaxed(sensor->base +
+ DTS_CFGR1_OFFSET);
+ value |= TS1_START;
+ writel_relaxed(value, sensor->base +
+ DTS_CFGR1_OFFSET);
+
+ return 0;
+}
+
+static int stm_sensor_power_off(struct stm_thermal_sensor *sensor)
+{
+ u32 value;
+
+ /* Stop measuring */
+ value = readl_relaxed(sensor->base + DTS_CFGR1_OFFSET);
+ value &= ~TS1_START;
+ writel_relaxed(value, sensor->base + DTS_CFGR1_OFFSET);
+
+ /* Ensure stop is taken into account */
+ usleep_range(STARTUP_TIME, POLL_TIMEOUT);
+
+ /* Disable sensor */
+ value = readl_relaxed(sensor->base + DTS_CFGR1_OFFSET);
+ value &= ~TS1_EN;
+ writel_relaxed(value, sensor->base + DTS_CFGR1_OFFSET);
+
+ /* Ensure disable is taken into account */
+ return readl_poll_timeout(sensor->base + DTS_SR_OFFSET, value,
+ !(value & TS_RDY),
+ STARTUP_TIME, POLL_TIMEOUT);
+}
+
+static int stm_thermal_calibration(struct stm_thermal_sensor *sensor)
+{
+ u32 value, clk_freq;
+ u32 prescaler;
+
+ /* Figure out prescaler value for PCLK during calibration */
+ clk_freq = clk_get_rate(sensor->clk);
+ if (!clk_freq)
+ return -EINVAL;
+
+ prescaler = 0;
+ clk_freq /= ONE_MHZ;
+ if (clk_freq) {
+ while (prescaler <= clk_freq)
+ prescaler++;
+ }
+
+ value = readl_relaxed(sensor->base + DTS_CFGR1_OFFSET);
+
+ /* Clear prescaler */
+ value &= ~HSREF_CLK_DIV_MASK;
+
+ /* Set prescaler. pclk_freq/prescaler < 1MHz */
+ value |= (prescaler << HSREF_CLK_DIV_POS);
+
+ /* Select PCLK as reference clock */
+ value &= ~REFCLK_SEL;
+
+ /* Set maximal sampling time for better precision */
+ value |= TS1_SMP_TIME_MASK;
+
+ /* Measure with calibration */
+ value &= ~CALIBRATION_CONTROL;
+
+ /* select trigger */
+ value &= ~TS1_INTRIG_SEL_MASK;
+ value |= NO_HW_TRIG;
+
+ writel_relaxed(value, sensor->base + DTS_CFGR1_OFFSET);
+
+ return 0;
+}
+
+/* Fill in DTS structure with factory sensor values */
+static int stm_thermal_read_factory_settings(struct stm_thermal_sensor *sensor)
+{
+ /* Retrieve engineering calibration temperature */
+ sensor->t0 = readl_relaxed(sensor->base + DTS_T0VALR1_OFFSET) &
+ TS1_T0_MASK;
+ if (!sensor->t0)
+ sensor->t0 = TS1_T0_VAL0;
+ else
+ sensor->t0 = TS1_T0_VAL1;
+
+ /* Retrieve fmt0 and put it on Hz */
+ sensor->fmt0 = ADJUST * readl_relaxed(sensor->base + DTS_T0VALR1_OFFSET)
+ & TS1_FMT0_MASK;
+
+ /* Retrieve ramp coefficient */
+ sensor->ramp_coeff = readl_relaxed(sensor->base + DTS_RAMPVALR_OFFSET) &
+ TS1_RAMP_COEFF_MASK;
+
+ if (!sensor->fmt0 || !sensor->ramp_coeff) {
+ dev_err(sensor->dev, "%s: wrong setting\n", __func__);
+ return -EINVAL;
+ }
+
+ dev_dbg(sensor->dev, "%s: T0 = %doC, FMT0 = %dHz, RAMP_COEFF = %dHz/oC",
+ __func__, sensor->t0, sensor->fmt0, sensor->ramp_coeff);
+
+ return 0;
+}
+
+static int stm_thermal_calculate_threshold(struct stm_thermal_sensor *sensor,
+ int temp, u32 *th)
+{
+ int freqM;
+ u32 sampling_time;
+
+ /* Retrieve the number of periods to sample */
+ sampling_time = (readl_relaxed(sensor->base + DTS_CFGR1_OFFSET) &
+ TS1_SMP_TIME_MASK) >> TS1_SMP_TIME_POS;
+
+ /* Figure out the CLK_PTAT frequency for a given temperature */
+ freqM = ((temp - sensor->t0) * sensor->ramp_coeff)
+ + sensor->fmt0;
+
+ dev_dbg(sensor->dev, "%s: freqM for threshold = %d Hz",
+ __func__, freqM);
+
+ /* Figure out the threshold sample number */
+ *th = clk_get_rate(sensor->clk);
+ if (!*th)
+ return -EINVAL;
+
+ *th = *th / freqM;
+
+ *th *= sampling_time;
+
+ return 0;
+}
+
+static int stm_thermal_set_threshold(struct stm_thermal_sensor *sensor)
+{
+ u32 value, th;
+ int ret;
+
+ value = readl_relaxed(sensor->base + DTS_ITR1_OFFSET);
+
+ /* Erase threshold content */
+ value &= ~(TS1_LITTHD_MASK | TS1_HITTHD_MASK);
+
+ /* Retrieve the sample threshold number th for a given temperature */
+ ret = stm_thermal_calculate_threshold(sensor, sensor->high_temp, &th);
+ if (ret)
+ return ret;
+
+ value |= th & TS1_LITTHD_MASK;
+
+ if (sensor->low_temp_enabled) {
+ /* Retrieve the sample threshold */
+ ret = stm_thermal_calculate_threshold(sensor, sensor->low_temp,
+ &th);
+ if (ret)
+ return ret;
+
+ value |= (TS1_HITTHD_MASK & (th << TS1_HITTHD_POS));
+ }
+
+ /* Write value on the Low interrupt threshold */
+ writel_relaxed(value, sensor->base + DTS_ITR1_OFFSET);
+
+ return 0;
+}
+
+/* Disable temperature interrupt */
+static int stm_disable_irq(struct stm_thermal_sensor *sensor)
+{
+ u32 value;
+
+ /* Disable IT generation for low and high thresholds */
+ value = readl_relaxed(sensor->base + DTS_ITENR_OFFSET);
+ writel_relaxed(value & ~(LOW_THRESHOLD | HIGH_THRESHOLD),
+ sensor->base + DTS_ITENR_OFFSET);
+
+ dev_dbg(sensor->dev, "%s: IT disabled on sensor side", __func__);
+
+ return 0;
+}
+
+/* Enable temperature interrupt */
+static int stm_enable_irq(struct stm_thermal_sensor *sensor)
+{
+ u32 value;
+
+ /*
+ * Code below enables High temperature threshold using a low threshold
+ * sampling value
+ */
+
+ /* Make sure LOW_THRESHOLD IT is clear before enabling */
+ writel_relaxed(LOW_THRESHOLD, sensor->base + DTS_CIFR_OFFSET);
+
+ /* Enable IT generation for low threshold */
+ value = readl_relaxed(sensor->base + DTS_ITENR_OFFSET);
+ value |= LOW_THRESHOLD;
+
+ /* Enable the low temperature threshold if needed */
+ if (sensor->low_temp_enabled) {
+ /* Make sure HIGH_THRESHOLD IT is clear before enabling */
+ writel_relaxed(HIGH_THRESHOLD, sensor->base + DTS_CIFR_OFFSET);
+
+ /* Enable IT generation for high threshold */
+ value |= HIGH_THRESHOLD;
+ }
+
+ /* Enable thresholds */
+ writel_relaxed(value, sensor->base + DTS_ITENR_OFFSET);
+
+ dev_dbg(sensor->dev, "%s: IT enabled on sensor side", __func__);
+
+ return 0;
+}
+
+static int stm_thermal_update_threshold(struct stm_thermal_sensor *sensor)
+{
+ int ret;
+
+ sensor->mode = THERMAL_DEVICE_DISABLED;
+
+ ret = stm_sensor_power_off(sensor);
+ if (ret)
+ return ret;
+
+ ret = stm_disable_irq(sensor);
+ if (ret)
+ return ret;
+
+ ret = stm_thermal_set_threshold(sensor);
+ if (ret)
+ return ret;
+
+ ret = stm_enable_irq(sensor);
+ if (ret)
+ return ret;
+
+ ret = stm_sensor_power_on(sensor);
+ if (ret)
+ return ret;
+
+ sensor->mode = THERMAL_DEVICE_ENABLED;
+
+ return 0;
+}
+
+/* Callback to get temperature from HW */
+static int stm_thermal_get_temp(void *data, int *temp)
+{
+ struct stm_thermal_sensor *sensor = data;
+ u32 sampling_time;
+ int freqM, ret;
+
+ if (sensor->mode != THERMAL_DEVICE_ENABLED)
+ return -EAGAIN;
+
+ /* Retrieve the number of samples */
+ ret = readl_poll_timeout(sensor->base + DTS_DR_OFFSET, freqM,
+ (freqM & TS1_MFREQ_MASK), STARTUP_TIME,
+ POLL_TIMEOUT);
+
+ if (ret)
+ return ret;
+
+ if (!freqM)
+ return -ENODATA;
+
+ /* Retrieve the number of periods sampled */
+ sampling_time = (readl_relaxed(sensor->base + DTS_CFGR1_OFFSET) &
+ TS1_SMP_TIME_MASK) >> TS1_SMP_TIME_POS;
+
+ /* Figure out the number of samples per period */
+ freqM /= sampling_time;
+
+ /* Figure out the CLK_PTAT frequency */
+ freqM = clk_get_rate(sensor->clk) / freqM;
+ if (!freqM)
+ return -EINVAL;
+
+ dev_dbg(sensor->dev, "%s: freqM=%d\n", __func__, freqM);
+
+ /* Figure out the temperature in mili celsius */
+ *temp = mcelsius(sensor->t0 + ((freqM - sensor->fmt0) /
+ sensor->ramp_coeff));
+
+ dev_dbg(sensor->dev, "%s: temperature = %d millicelsius",
+ __func__, *temp);
+
+ /* Update thresholds */
+ if (sensor->num_trips > 1) {
+ /* Update alarm threshold value to next higher trip point */
+ if (sensor->high_temp == sensor->temp_passive &&
+ celsius(*temp) >= sensor->temp_passive) {
+ sensor->high_temp = sensor->temp_critical;
+ sensor->low_temp = sensor->temp_passive;
+ sensor->low_temp_enabled = true;
+ ret = stm_thermal_update_threshold(sensor);
+ if (ret)
+ return ret;
+ }
+
+ if (sensor->high_temp == sensor->temp_critical &&
+ celsius(*temp) < sensor->temp_passive) {
+ sensor->high_temp = sensor->temp_passive;
+ sensor->low_temp_enabled = false;
+ ret = stm_thermal_update_threshold(sensor);
+ if (ret)
+ return ret;
+ }
+
+ /*
+ * Re-enable alarm IRQ if temperature below critical
+ * temperature
+ */
+ if (!sensor->irq_enabled &&
+ (celsius(*temp) < sensor->temp_critical)) {
+ sensor->irq_enabled = true;
+ enable_irq(sensor->irq);
+ }
+ }
+
+ return 0;
+}
+
+/* Registers DTS irq to be visible by GIC */
+static int stm_register_irq(struct stm_thermal_sensor *sensor)
+{
+ struct device *dev = sensor->dev;
+ struct platform_device *pdev = to_platform_device(dev);
+ int ret;
+
+ sensor->irq = platform_get_irq(pdev, 0);
+ if (sensor->irq < 0) {
+ dev_err(dev, "%s: Unable to find IRQ\n", __func__);
+ return sensor->irq;
+ }
+
+ ret = devm_request_threaded_irq(dev, sensor->irq,
+ stm_thermal_alarm_irq,
+ stm_thermal_alarm_irq_thread,
+ IRQF_ONESHOT,
+ dev->driver->name, sensor);
+ if (ret) {
+ dev_err(dev, "%s: Failed to register IRQ %d\n", __func__,
+ sensor->irq);
+ return ret;
+ }
+
+ sensor->irq_enabled = true;
+
+ dev_dbg(dev, "%s: thermal IRQ registered", __func__);
+
+ return 0;
+}
+
+static int stm_thermal_sensor_off(struct stm_thermal_sensor *sensor)
+{
+ int ret;
+
+ ret = stm_sensor_power_off(sensor);
+ if (ret)
+ return ret;
+
+ clk_disable_unprepare(sensor->clk);
+
+ return 0;
+}
+
+static int stm_thermal_prepare(struct stm_thermal_sensor *sensor)
+{
+ int ret;
+ struct device *dev = sensor->dev;
+
+ ret = clk_prepare_enable(sensor->clk);
+ if (ret)
+ return ret;
+
+ ret = stm_thermal_calibration(sensor);
+ if (ret)
+ goto thermal_unprepare;
+
+ /* Set threshold(s) for IRQ */
+ ret = stm_thermal_set_threshold(sensor);
+ if (ret)
+ goto thermal_unprepare;
+
+ ret = stm_enable_irq(sensor);
+ if (ret)
+ goto thermal_unprepare;
+
+ ret = stm_sensor_power_on(sensor);
+ if (ret) {
+ dev_err(dev, "%s: failed to power on sensor\n", __func__);
+ goto irq_disable;
+ }
+
+ return 0;
+
+irq_disable:
+ stm_disable_irq(sensor);
+
+thermal_unprepare:
+ clk_disable_unprepare(sensor->clk);
+
+ return ret;
+}
+
+#ifdef CONFIG_PM_SLEEP
+static int stm_thermal_suspend(struct device *dev)
+{
+ int ret;
+ struct platform_device *pdev = to_platform_device(dev);
+ struct stm_thermal_sensor *sensor = platform_get_drvdata(pdev);
+
+ ret = stm_thermal_sensor_off(sensor);
+ if (ret)
+ return ret;
+
+ sensor->mode = THERMAL_DEVICE_DISABLED;
+
+ return 0;
+}
+
+static int stm_thermal_resume(struct device *dev)
+{
+ int ret;
+ struct platform_device *pdev = to_platform_device(dev);
+ struct stm_thermal_sensor *sensor = platform_get_drvdata(pdev);
+
+ ret = stm_thermal_prepare(sensor);
+ if (ret)
+ return ret;
+
+ sensor->mode = THERMAL_DEVICE_ENABLED;
+
+ return 0;
+}
+#endif /* CONFIG_PM_SLEEP */
+
+SIMPLE_DEV_PM_OPS(stm_thermal_pm_ops, stm_thermal_suspend, stm_thermal_resume);
+
+static const struct thermal_zone_of_device_ops stm_tz_ops = {
+ .get_temp = stm_thermal_get_temp,
+};
+
+static const struct of_device_id stm_thermal_of_match[] = {
+ { .compatible = "st,stm32-thermal"},
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, stm_thermal_of_match);
+
+static int stm_thermal_probe(struct platform_device *pdev)
+{
+ struct stm_thermal_sensor *sensor;
+ struct resource *res;
+ const struct thermal_trip *trip;
+ void __iomem *base;
+ int ret, i;
+
+ if (!pdev->dev.of_node) {
+ dev_err(&pdev->dev, "%s: device tree node not found\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ sensor = devm_kzalloc(&pdev->dev, sizeof(*sensor), GFP_KERNEL);
+ if (!sensor)
+ return -ENOMEM;
+
+ platform_set_drvdata(pdev, sensor);
+
+ sensor->dev = &pdev->dev;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(base))
+ return PTR_ERR(base);
+
+ /* Populate sensor */
+ sensor->base = base;
+
+ ret = stm_thermal_read_factory_settings(sensor);
+ if (ret)
+ return ret;
+
+ sensor->clk = devm_clk_get(&pdev->dev, "pclk");
+ if (IS_ERR(sensor->clk)) {
+ dev_err(&pdev->dev, "%s: failed to fetch PCLK clock\n",
+ __func__);
+ return PTR_ERR(sensor->clk);
+ }
+
+ /* Register IRQ into GIC */
+ ret = stm_register_irq(sensor);
+ if (ret)
+ return ret;
+
+ sensor->th_dev = devm_thermal_zone_of_sensor_register(&pdev->dev, 0,
+ sensor,
+ &stm_tz_ops);
+
+ if (IS_ERR(sensor->th_dev)) {
+ dev_err(&pdev->dev, "%s: thermal zone sensor registering KO\n",
+ __func__);
+ ret = PTR_ERR(sensor->th_dev);
+ return ret;
+ }
+
+ if (!sensor->th_dev->ops->get_crit_temp) {
+ /* Critical point must be provided */
+ ret = -EINVAL;
+ goto err_tz;
+ }
+
+ ret = sensor->th_dev->ops->get_crit_temp(sensor->th_dev,
+ &sensor->temp_critical);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "Not able to read critical_temp: %d\n", ret);
+ goto err_tz;
+ }
+
+ sensor->temp_critical = celsius(sensor->temp_critical);
+
+ /* Set thresholds for IRQ */
+ sensor->high_temp = sensor->temp_critical;
+
+ trip = of_thermal_get_trip_points(sensor->th_dev);
+ sensor->num_trips = of_thermal_get_ntrips(sensor->th_dev);
+
+ /* Find out passive temperature if it exists */
+ for (i = (sensor->num_trips - 1); i >= 0; i--) {
+ if (trip[i].type == THERMAL_TRIP_PASSIVE) {
+ sensor->temp_passive = celsius(trip[i].temperature);
+ /* Update high temperature threshold */
+ sensor->high_temp = sensor->temp_passive;
+ }
+ }
+
+ /*
+ * Ensure low_temp_enabled flag is disabled.
+ * By disabling low_temp_enabled, low threshold IT will not be
+ * configured neither enabled because it is not needed as high
+ * threshold is set on the lowest temperature trip point after
+ * probe.
+ */
+ sensor->low_temp_enabled = false;
+
+ /* Configure and enable HW sensor */
+ ret = stm_thermal_prepare(sensor);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "Not able to enable sensor: %d\n", ret);
+ goto err_tz;
+ }
+
+ /*
+ * Thermal_zone doesn't enable hwmon as default,
+ * enable it here
+ */
+ sensor->th_dev->tzp->no_hwmon = false;
+ ret = thermal_add_hwmon_sysfs(sensor->th_dev);
+ if (ret)
+ goto err_tz;
+
+ sensor->mode = THERMAL_DEVICE_ENABLED;
+
+ dev_info(&pdev->dev, "%s: Driver initialized successfully\n",
+ __func__);
+
+ return 0;
+
+err_tz:
+ thermal_zone_of_sensor_unregister(&pdev->dev, sensor->th_dev);
+ return ret;
+}
+
+static int stm_thermal_remove(struct platform_device *pdev)
+{
+ struct stm_thermal_sensor *sensor = platform_get_drvdata(pdev);
+
+ stm_thermal_sensor_off(sensor);
+ thermal_remove_hwmon_sysfs(sensor->th_dev);
+ thermal_zone_of_sensor_unregister(&pdev->dev, sensor->th_dev);
+
+ return 0;
+}
+
+static struct platform_driver stm_thermal_driver = {
+ .driver = {
+ .name = "stm_thermal",
+ .pm = &stm_thermal_pm_ops,
+ .of_match_table = stm_thermal_of_match,
+ },
+ .probe = stm_thermal_probe,
+ .remove = stm_thermal_remove,
+};
+module_platform_driver(stm_thermal_driver);
+
+MODULE_DESCRIPTION("STMicroelectronics STM32 Thermal Sensor Driver");
+MODULE_AUTHOR("David Hernandez Sanchez <david.hernandezsanchez@st.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:stm_thermal");
#include <linux/init.h>
#include <linux/pci.h>
#include <linux/console.h>
-#include <asm/io.h>
#ifdef CONFIG_PMAC_BACKLIGHT
#include <asm/backlight.h>
#endif /* CONFIG_PMAC_BACKLIGHT */
#ifdef CONFIG_PPC
- p->screen_base = __ioremap(addr, 0x200000, _PAGE_NO_CACHE);
+ p->screen_base = ioremap_wc(addr, 0x200000);
#else
p->screen_base = ioremap(addr, 0x200000);
#endif
#include <linux/nvram.h>
#include <linux/adb.h>
#include <linux/cuda.h>
-#include <asm/io.h>
#include <asm/prom.h>
-#include <asm/pgtable.h>
#include <asm/btext.h>
#include "macmodes.h"
goto error_out;
}
/* map at most 8MB for the frame buffer */
- p->frame_buffer = __ioremap(p->frame_buffer_phys, 0x800000,
- _PAGE_WRITETHRU);
+ p->frame_buffer = ioremap_wt(p->frame_buffer_phys, 0x800000);
if (!p->control_regs_phys ||
!request_mem_region(p->control_regs_phys, p->control_regs_size,
pr_info("Freescale Display Interface Unit (DIU) framebuffer driver\n");
#ifdef CONFIG_NOT_COHERENT_CACHE
- np = of_find_node_by_type(NULL, "cpu");
+ np = of_get_cpu_node(0, NULL);
if (!np) {
pr_err("fsl-diu-fb: can't find 'cpu' device node\n");
return -ENODEV;
#include <linux/nvram.h>
#include <linux/of_device.h>
#include <linux/of_platform.h>
-#include <asm/io.h>
#include <asm/prom.h>
-#include <asm/pgtable.h>
#include "macmodes.h"
#include "platinumfb.h"
/* frame buffer - map only 4MB */
pinfo->frame_buffer_phys = pinfo->rsrc_fb.start;
- pinfo->frame_buffer = __ioremap(pinfo->rsrc_fb.start, 0x400000,
- _PAGE_WRITETHRU);
+ pinfo->frame_buffer = ioremap_wt(pinfo->rsrc_fb.start, 0x400000);
pinfo->base_frame_buffer = pinfo->frame_buffer;
/* registers */
#include <linux/nvram.h>
#include <linux/adb.h>
#include <linux/cuda.h>
-#include <asm/io.h>
#ifdef CONFIG_MAC
#include <asm/macintosh.h>
#else
#include <asm/prom.h>
#endif
-#include <asm/pgtable.h>
#include "macmodes.h"
#include "valkyriefb.h"
int __init valkyriefb_init(void)
{
struct fb_info_valkyrie *p;
- unsigned long frame_buffer_phys, cmap_regs_phys, flags;
+ unsigned long frame_buffer_phys, cmap_regs_phys;
int err;
char *option = NULL;
/* Hardcoded addresses... welcome to 68k Macintosh country :-) */
frame_buffer_phys = 0xf9000000;
cmap_regs_phys = 0x50f24000;
- flags = IOMAP_NOCACHE_SER; /* IOMAP_WRITETHROUGH?? */
#else /* ppc (!CONFIG_MAC) */
{
struct device_node *dp;
frame_buffer_phys = r.start;
cmap_regs_phys = r.start + 0x304000;
- flags = _PAGE_WRITETHRU;
}
#endif /* ppc (!CONFIG_MAC) */
}
p->total_vram = 0x100000;
p->frame_buffer_phys = frame_buffer_phys;
- p->frame_buffer = __ioremap(frame_buffer_phys, p->total_vram, flags);
+#ifdef CONFIG_MAC
+ p->frame_buffer = ioremap_nocache(frame_buffer_phys, p->total_vram);
+#else
+ p->frame_buffer = ioremap_wt(frame_buffer_phys, p->total_vram);
+#endif
p->cmap_regs_phys = cmap_regs_phys;
p->cmap_regs = ioremap(p->cmap_regs_phys, 0x1000);
p->valkyrie_regs_phys = cmap_regs_phys+0x6000;
struct sockaddr_in *ipv4 = (struct sockaddr_in *)&iface->sockaddr;
struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)&iface->sockaddr;
- seq_printf(m, "\t\tSpeed: %zu bps\n", iface->speed);
+ seq_printf(m, "\tSpeed: %zu bps\n", iface->speed);
seq_puts(m, "\t\tCapabilities: ");
if (iface->rdma_capable)
seq_puts(m, "rdma ");
if ((ses->serverDomain == NULL) ||
(ses->serverOS == NULL) ||
(ses->serverNOS == NULL)) {
- seq_printf(m, "\n%d) Name: %s Uses: %d Capability: 0x%x\tSession Status: %d\t",
+ seq_printf(m, "\n%d) Name: %s Uses: %d Capability: 0x%x\tSession Status: %d ",
i, ses->serverName, ses->ses_count,
ses->capabilities, ses->status);
if (ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST)
seq_printf(m,
"\n%d) Name: %s Domain: %s Uses: %d OS:"
" %s\n\tNOS: %s\tCapability: 0x%x\n\tSMB"
- " session status: %d\t",
+ " session status: %d ",
i, ses->serverName, ses->serverDomain,
ses->ses_count, ses->serverOS, ses->serverNOS,
ses->capabilities, ses->status);
}
if (server->rdma)
seq_printf(m, "RDMA\n\t");
- seq_printf(m, "TCP status: %d\n\tLocal Users To "
+ seq_printf(m, "TCP status: %d Instance: %d\n\tLocal Users To "
"Server: %d SecMode: 0x%x Req On Wire: %d",
- server->tcpStatus, server->srv_count,
+ server->tcpStatus,
+ server->reconnect_instance,
+ server->srv_count,
server->sec_mode, in_flight(server));
#ifdef CONFIG_CIFS_STATS2
seq_printf(m, "\n\tServer interfaces: %zu\n",
ses->iface_count);
for (j = 0; j < ses->iface_count; j++) {
- seq_printf(m, "\t%d)\n", j);
+ seq_printf(m, "\t%d)", j);
cifs_dump_iface(m, &ses->iface_list[j]);
}
spin_unlock(&ses->iface_lock);
atomic_set(&totBufAllocCount, 0);
atomic_set(&totSmBufAllocCount, 0);
#endif /* CONFIG_CIFS_STATS2 */
+ atomic_set(&tcpSesReconnectCount, 0);
+ atomic_set(&tconInfoReconnectCount, 0);
+
spin_lock(&GlobalMid_Lock);
GlobalMaxActiveXid = 0;
GlobalCurrentXid = 0;
*/
#ifdef CONFIG_CIFS_DEBUG
+
+/*
+ * When adding tracepoints and debug messages we have various choices.
+ * Some considerations:
+ *
+ * Use cifs_dbg(VFS, ...) for things we always want logged, and the user to see
+ * cifs_info(...) slightly less important, admin can filter via loglevel > 6
+ * cifs_dbg(FYI, ...) minor debugging messages, off by default
+ * trace_smb3_* ftrace functions are preferred for complex debug messages
+ * intended for developers or experienced admins, off by default
+ */
+
+/* Information level messages, minor events */
+#define cifs_info_func(ratefunc, fmt, ...) \
+do { \
+ pr_info_ ## ratefunc("CIFS: " fmt, ##__VA_ARGS__); \
+} while (0)
+
+#define cifs_info(fmt, ...) \
+do { \
+ cifs_info_func(ratelimited, fmt, ##__VA_ARGS__); \
+} while (0)
+
/* information message: e.g., configuration, major event */
#define cifs_dbg_func(ratefunc, type, fmt, ...) \
do { \
if (0) \
pr_debug(fmt, ##__VA_ARGS__); \
} while (0)
+
+#define cifs_info(fmt, ...) \
+do { \
+ pr_info("CIFS: "fmt, ##__VA_ARGS__); \
+} while (0)
#endif
#endif /* _H_CIFS_DEBUG */
*/
mnt = ERR_PTR(-ENOMEM);
+ cifs_sb = CIFS_SB(mntpt->d_sb);
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) {
+ mnt = ERR_PTR(-EREMOTE);
+ goto cdda_exit;
+ }
+
/* always use tree name prefix */
full_path = build_path_from_dentry_optional_prefix(mntpt, true);
if (full_path == NULL)
goto cdda_exit;
- cifs_sb = CIFS_SB(mntpt->d_sb);
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink)) {
mnt = ERR_CAST(tlink);
*/
#define CIFS_MOUNT_UID_FROM_ACL 0x2000000 /* try to get UID via special SID */
#define CIFS_MOUNT_NO_HANDLE_CACHE 0x4000000 /* disable caching dir handles */
+#define CIFS_MOUNT_NO_DFS 0x8000000 /* disable DFS resolving */
struct cifs_sb_info {
struct rb_root tlink_tree;
/* snapshots[]; */
} __packed;
+struct smb_query_info {
+ __u32 info_type;
+ __u32 file_info_class;
+ __u32 additional_information;
+ __u32 flags;
+ __u32 input_buffer_length;
+ __u32 output_buffer_length;
+ /* char buffer[]; */
+} __packed;
+
#define CIFS_IOCTL_MAGIC 0xCF
#define CIFS_IOC_COPYCHUNK_FILE _IOW(CIFS_IOCTL_MAGIC, 3, int)
#define CIFS_IOC_SET_INTEGRITY _IO(CIFS_IOCTL_MAGIC, 4)
#define CIFS_IOC_GET_MNT_INFO _IOR(CIFS_IOCTL_MAGIC, 5, struct smb_mnt_fs_info)
#define CIFS_ENUMERATE_SNAPSHOTS _IOR(CIFS_IOCTL_MAGIC, 6, struct smb_snapshot_array)
+#define CIFS_QUERY_INFO _IOWR(CIFS_IOCTL_MAGIC, 7, struct smb_query_info)
MODULE_PARM_DESC(cifs_max_pending, "Simultaneous requests to server for "
"CIFS/SMB1 dialect (N/A for SMB3) "
"Default: 32767 Range: 2 to 32767.");
+#ifdef CONFIG_CIFS_STATS2
+unsigned int slow_rsp_threshold = 1;
+module_param(slow_rsp_threshold, uint, 0644);
+MODULE_PARM_DESC(slow_rsp_threshold, "Amount of time (in seconds) to wait "
+ "before logging that a response is delayed. "
+ "Default: 1 (if set to 0 disables msg).");
+#endif /* STATS2 */
+
module_param(enable_oplocks, bool, 0644);
MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks. Default: y/Y/1");
seq_puts(s, ",unix");
else
seq_puts(s, ",nounix");
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)
+ seq_puts(s, ",nodfs");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS)
seq_puts(s, ",posixpaths");
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SET_UID)
struct cifs_mnt_data mnt_data;
struct dentry *root;
- cifs_dbg(FYI, "Devname: %s flags: %d\n", dev_name, flags);
+ /*
+ * Prints in Kernel / CIFS log the attempted mount operation
+ * If CIFS_DEBUG && cifs_FYI
+ */
+ if (cifsFYI)
+ cifs_dbg(FYI, "Devname: %s flags: %d\n", dev_name, flags);
+ else
+ cifs_info("Attempting to mount %s\n", dev_name);
volume_info = cifs_get_volume_info((char *)data, dev_name, is_smb3);
if (IS_ERR(volume_info))
#ifdef CONFIG_CIFS_STATS2
atomic_set(&totBufAllocCount, 0);
atomic_set(&totSmBufAllocCount, 0);
+ if (slow_rsp_threshold < 1)
+ cifs_dbg(FYI, "slow_response_threshold msgs disabled\n");
+ else if (slow_rsp_threshold > 32767)
+ cifs_dbg(VFS,
+ "slow response threshold set higher than recommended (0 to 32767)\n");
#endif /* CONFIG_CIFS_STATS2 */
atomic_set(&midCount, 0);
cifs_proc_clean();
}
-MODULE_AUTHOR("Steve French <sfrench@us.ibm.com>");
+MODULE_AUTHOR("Steve French");
MODULE_LICENSE("GPL"); /* combination of LGPL + GPL source behaves as GPL */
MODULE_DESCRIPTION
- ("VFS to access servers complying with the SNIA CIFS Specification "
- "e.g. Samba and Windows");
+ ("VFS to access SMB3 servers e.g. Samba, Macs, Azure and Windows (and "
+ "also older servers complying with the SNIA CIFS Specification)");
MODULE_VERSION(CIFS_VERSION);
MODULE_SOFTDEP("pre: arc4");
MODULE_SOFTDEP("pre: des");
extern const struct export_operations cifs_export_ops;
#endif /* CONFIG_CIFS_NFSD_EXPORT */
-#define CIFS_VERSION "2.13"
+#define CIFS_VERSION "2.14"
#endif /* _CIFSFS_H */
#define CIFS_MAGIC_NUMBER 0xFF534D42 /* the first four bytes of SMB PDUs */
+#define SMB_PATH_MAX 260
#define CIFS_PORT 445
#define RFC1001_PORT 139
enum securityEnum (*select_sectype)(struct TCP_Server_Info *,
enum securityEnum);
int (*next_header)(char *);
+ /* ioctl passthrough for query_info */
+ int (*ioctl_query_info)(const unsigned int xid,
+ struct cifs_tcon *tcon,
+ __le16 *path, int is_dir,
+ unsigned long p);
};
struct smb_version_values {
/* 16th byte of RFC1001 workstation name is always null */
char workstation_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
__u32 sequence_number; /* for signing, protected by srv_mutex */
+ __u32 reconnect_instance; /* incremented on each reconnect */
struct session_key session_key;
unsigned long lstrp; /* when we got last response from this server */
struct cifs_secmech secmech; /* crypto sec mech functs, descriptors */
* a single wsize request with a single call.
*/
#define CIFS_DEFAULT_IOSIZE (1024 * 1024)
+#define SMB3_DEFAULT_IOSIZE (4 * 1024 * 1024)
/*
* Windows only supports a max of 60kb reads and 65535 byte writes. Default to
struct list_head tcon_list;
int tc_count;
struct list_head rlist; /* reconnect list */
+ atomic_t num_local_opens; /* num of all opens including disconnected */
+ atomic_t num_remote_opens; /* num of all network opens on server */
struct list_head openFileList;
spinlock_t open_file_lock; /* protects list above */
struct cifs_ses *ses; /* pointer to session associated with */
__u64 offset;
__u64 length;
__u32 pid;
- __u32 type;
+ __u16 type;
+ __u16 flags;
};
/*
#ifdef CONFIG_CIFS_STATS2
GLOBAL_EXTERN atomic_t totBufAllocCount; /* total allocated over all time */
GLOBAL_EXTERN atomic_t totSmBufAllocCount;
+extern unsigned int slow_rsp_threshold; /* number of secs before logging */
#endif
GLOBAL_EXTERN atomic_t smBufAllocCount;
GLOBAL_EXTERN atomic_t midCount;
extern void cifs_reopen_persistent_handles(struct cifs_tcon *tcon);
extern bool cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset,
- __u64 length, __u8 type,
+ __u64 length, __u8 type, __u16 flags,
struct cifsLockInfo **conf_lock,
int rw_check);
extern void cifs_add_pending_open(struct cifs_fid *fid,
struct smb_rqst rqst = { .rq_iov = rdata->iov,
.rq_nvec = 2,
.rq_pages = rdata->pages,
+ .rq_offset = rdata->page_offset,
.rq_npages = rdata->nr_pages,
.rq_pagesz = rdata->pagesz,
.rq_tailsz = rdata->tailsz };
rqst.rq_iov = iov;
rqst.rq_nvec = 2;
rqst.rq_pages = wdata->pages;
+ rqst.rq_offset = wdata->page_offset;
rqst.rq_npages = wdata->nr_pages;
rqst.rq_pagesz = wdata->pagesz;
rqst.rq_tailsz = wdata->tailsz;
le16_to_cpu(response_data->BytesPerSector) *
le32_to_cpu(response_data->
SectorsPerAllocationUnit);
+ /*
+ * much prefer larger but if server doesn't report
+ * a valid size than 4K is a reasonable minimum
+ */
+ if (FSData->f_bsize < 512)
+ FSData->f_bsize = 4096;
+
FSData->f_blocks =
le32_to_cpu(response_data->TotalAllocationUnits);
FSData->f_bfree = FSData->f_bavail =
le32_to_cpu(response_data->BytesPerSector) *
le32_to_cpu(response_data->
SectorsPerAllocationUnit);
+ /*
+ * much prefer larger but if server doesn't report
+ * a valid size than 4K is a reasonable minimum
+ */
+ if (FSData->f_bsize < 512)
+ FSData->f_bsize = 4096;
+
FSData->f_blocks =
le64_to_cpu(response_data->TotalAllocationUnits);
FSData->f_bfree = FSData->f_bavail =
data_offset);
FSData->f_bsize =
le32_to_cpu(response_data->BlockSize);
+ /*
+ * much prefer larger but if server doesn't report
+ * a valid size than 4K is a reasonable minimum
+ */
+ if (FSData->f_bsize < 512)
+ FSData->f_bsize = 4096;
+
FSData->f_blocks =
le64_to_cpu(response_data->TotalBlocks);
FSData->f_bfree =
{ Opt_ignore, "dev" },
{ Opt_ignore, "mand" },
{ Opt_ignore, "nomand" },
+ { Opt_ignore, "relatime" },
{ Opt_ignore, "_netdev" },
{ Opt_err, NULL }
server->maxBuf = 0;
server->max_read = 0;
- cifs_dbg(FYI, "Reconnecting tcp session\n");
+ cifs_dbg(FYI, "Mark tcp session as need reconnect\n");
trace_smb3_reconnect(server->CurrentMid, server->hostname);
/* before reconnecting the tcp session, mark the smb session (uid)
volume_info->target_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL);
tcp_ses->session_estab = false;
tcp_ses->sequence_number = 0;
+ tcp_ses->reconnect_instance = 0;
tcp_ses->lstrp = jiffies;
spin_lock_init(&tcp_ses->req_lock);
INIT_LIST_HEAD(&tcp_ses->tcp_ses_list);
if (rc)
goto out_fail;
- if (volume_info->nodfs) {
- tcon->Flags &= ~SMB_SHARE_IS_IN_DFS;
- cifs_dbg(FYI, "DFS disabled (%d)\n", tcon->Flags);
- }
tcon->use_persistent = false;
/* check if SMB2 or later, CIFS does not support persistent handles */
if (volume_info->persistent) {
cifs_sb->actimeo = pvolume_info->actimeo;
cifs_sb->local_nls = pvolume_info->local_nls;
+ if (pvolume_info->nodfs)
+ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_DFS;
if (pvolume_info->noperm)
cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM;
if (pvolume_info->setuids)
struct dfs_info3_param *referrals = NULL;
char *full_path = NULL, *ref_path = NULL, *mdata = NULL;
+ if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)
+ return -EREMOTE;
+
full_path = build_unc_path_to_root(volume_info, cifs_sb);
if (IS_ERR(full_path))
return PTR_ERR(full_path);
server->ops->set_fid(cfile, fid, oplock);
list_add(&cfile->tlist, &tcon->openFileList);
+ atomic_inc(&tcon->num_local_opens);
/* if readable file instance put first in list*/
if (file->f_mode & FMODE_READ)
/* remove it from the lists */
list_del(&cifs_file->flist);
list_del(&cifs_file->tlist);
+ atomic_dec(&tcon->num_local_opens);
if (list_empty(&cifsi->openFileList)) {
cifs_dbg(FYI, "closing last open instance for inode %p\n",
}
static struct cifsLockInfo *
-cifs_lock_init(__u64 offset, __u64 length, __u8 type)
+cifs_lock_init(__u64 offset, __u64 length, __u8 type, __u16 flags)
{
struct cifsLockInfo *lock =
kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
lock->length = length;
lock->type = type;
lock->pid = current->tgid;
+ lock->flags = flags;
INIT_LIST_HEAD(&lock->blist);
init_waitqueue_head(&lock->block_q);
return lock;
/* @rw_check : 0 - no op, 1 - read, 2 - write */
static bool
cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
- __u64 length, __u8 type, struct cifsFileInfo *cfile,
+ __u64 length, __u8 type, __u16 flags,
+ struct cifsFileInfo *cfile,
struct cifsLockInfo **conf_lock, int rw_check)
{
struct cifsLockInfo *li;
((server->ops->compare_fids(cfile, cur_cfile) &&
current->tgid == li->pid) || type == li->type))
continue;
+ if (rw_check == CIFS_LOCK_OP &&
+ (flags & FL_OFDLCK) && (li->flags & FL_OFDLCK) &&
+ server->ops->compare_fids(cfile, cur_cfile))
+ continue;
if (conf_lock)
*conf_lock = li;
return true;
bool
cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
- __u8 type, struct cifsLockInfo **conf_lock,
- int rw_check)
+ __u8 type, __u16 flags,
+ struct cifsLockInfo **conf_lock, int rw_check)
{
bool rc = false;
struct cifs_fid_locks *cur;
list_for_each_entry(cur, &cinode->llist, llist) {
rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
- cfile, conf_lock, rw_check);
+ flags, cfile, conf_lock,
+ rw_check);
if (rc)
break;
}
down_read(&cinode->lock_sem);
exist = cifs_find_lock_conflict(cfile, offset, length, type,
- &conf_lock, CIFS_LOCK_OP);
+ flock->fl_flags, &conf_lock,
+ CIFS_LOCK_OP);
if (exist) {
flock->fl_start = conf_lock->offset;
flock->fl_end = conf_lock->offset + conf_lock->length - 1;
down_write(&cinode->lock_sem);
exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
- lock->type, &conf_lock, CIFS_LOCK_OP);
+ lock->type, lock->flags, &conf_lock,
+ CIFS_LOCK_OP);
if (!exist && cinode->can_cache_brlcks) {
list_add_tail(&lock->llist, &cfile->llist->locks);
up_write(&cinode->lock_sem);
cifs_dbg(FYI, "Lease on file - not implemented yet\n");
if (flock->fl_flags &
(~(FL_POSIX | FL_FLOCK | FL_SLEEP |
- FL_ACCESS | FL_LEASE | FL_CLOSE)))
+ FL_ACCESS | FL_LEASE | FL_CLOSE | FL_OFDLCK)))
cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
*type = server->vals->large_lock_type;
if (lock) {
struct cifsLockInfo *lock;
- lock = cifs_lock_init(flock->fl_start, length, type);
+ lock = cifs_lock_init(flock->fl_start, length, type,
+ flock->fl_flags);
if (!lock)
return -ENOMEM;
cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
tcon->ses->server);
-
cifs_sb = CIFS_FILE_SB(file);
netfid = cfile->fid.netfid;
cinode = CIFS_I(file_inode(file));
pgoff_t end, index;
struct cifs_writedata *wdata;
int rc = 0;
+ unsigned int xid;
/*
* If wsize is smaller than the page cache size, default to writing
if (cifs_sb->wsize < PAGE_SIZE)
return generic_writepages(mapping, wbc);
+ xid = get_xid();
if (wbc->range_cyclic) {
index = mapping->writeback_index; /* Start from prev offset */
end = -1;
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
mapping->writeback_index = index;
+ free_xid(xid);
return rc;
}
goto out;
if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
- server->vals->exclusive_lock_type, NULL,
- CIFS_WRITE_OP))
+ server->vals->exclusive_lock_type, 0,
+ NULL, CIFS_WRITE_OP))
rc = __generic_file_write_iter(iocb, from);
else
rc = -EACCES;
down_read(&cinode->lock_sem);
if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
tcon->ses->server->vals->shared_lock_type,
- NULL, CIFS_READ_OP))
+ 0, NULL, CIFS_READ_OP))
rc = generic_file_read_iter(iocb, to);
up_read(&cinode->lock_sem);
return rc;
struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
struct TCP_Server_Info *server;
pid_t pid;
+ unsigned int xid;
+ xid = get_xid();
/*
* Reads as many pages as possible from fscache. Returns -ENOBUFS
* immediately if the cookie is negative
*/
rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
&num_pages);
- if (rc == 0)
+ if (rc == 0) {
+ free_xid(xid);
return rc;
+ }
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
pid = open_file->pid;
*/
if (unlikely(rsize < PAGE_SIZE)) {
add_credits_and_wake_if(server, credits, 0);
+ free_xid(xid);
return 0;
}
* allocator.
*/
cifs_fscache_readpages_cancel(mapping->host, page_list);
+ free_xid(xid);
return rc;
}
else
cifs_dbg(FYI, "Bytes read %d\n", rc);
- file_inode(file)->i_atime =
- current_time(file_inode(file));
+ /* we do not want atime to be less than mtime, it broke some apps */
+ file_inode(file)->i_atime = current_time(file_inode(file));
+ if (timespec64_compare(&(file_inode(file)->i_atime), &(file_inode(file)->i_mtime)))
+ file_inode(file)->i_atime = file_inode(file)->i_mtime;
+ else
+ file_inode(file)->i_atime = current_time(file_inode(file));
if (PAGE_SIZE > rc)
memset(read_data + rc, 0, PAGE_SIZE - rc);
cifs_revalidate_cache(inode, fattr);
spin_lock(&inode->i_lock);
- inode->i_atime = fattr->cf_atime;
+ /* we do not want atime to be less than mtime, it broke some apps */
+ if (timespec64_compare(&fattr->cf_atime, &fattr->cf_mtime))
+ inode->i_atime = fattr->cf_mtime;
+ else
+ inode->i_atime = fattr->cf_atime;
inode->i_mtime = fattr->cf_mtime;
inode->i_ctime = fattr->cf_ctime;
inode->i_rdev = fattr->cf_rdev;
} else if (rc == -EREMOTE) {
cifs_create_dfs_fattr(&fattr, sb);
rc = 0;
- } else if (rc == -EACCES && backup_cred(cifs_sb)) {
- srchinf = kzalloc(sizeof(struct cifs_search_info),
- GFP_KERNEL);
- if (srchinf == NULL) {
- rc = -ENOMEM;
- goto cgii_exit;
- }
+ } else if ((rc == -EACCES) && backup_cred(cifs_sb) &&
+ (strcmp(server->vals->version_string, SMB1_VERSION_STRING)
+ == 0)) {
+ /*
+ * For SMB2 and later the backup intent flag is already
+ * sent if needed on open and there is no path based
+ * FindFirst operation to use to retry with
+ */
- srchinf->endOfSearch = false;
+ srchinf = kzalloc(sizeof(struct cifs_search_info),
+ GFP_KERNEL);
+ if (srchinf == NULL) {
+ rc = -ENOMEM;
+ goto cgii_exit;
+ }
+
+ srchinf->endOfSearch = false;
+ if (tcon->unix_ext)
+ srchinf->info_level = SMB_FIND_FILE_UNIX;
+ else if ((tcon->ses->capabilities &
+ tcon->ses->server->vals->cap_nt_find) == 0)
+ srchinf->info_level = SMB_FIND_FILE_INFO_STANDARD;
+ else if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM)
srchinf->info_level = SMB_FIND_FILE_ID_FULL_DIR_INFO;
+ else /* no srvino useful for fallback to some netapp */
+ srchinf->info_level = SMB_FIND_FILE_DIRECTORY_INFO;
- srchflgs = CIFS_SEARCH_CLOSE_ALWAYS |
- CIFS_SEARCH_CLOSE_AT_END |
- CIFS_SEARCH_BACKUP_SEARCH;
+ srchflgs = CIFS_SEARCH_CLOSE_ALWAYS |
+ CIFS_SEARCH_CLOSE_AT_END |
+ CIFS_SEARCH_BACKUP_SEARCH;
- rc = CIFSFindFirst(xid, tcon, full_path,
- cifs_sb, NULL, srchflgs, srchinf, false);
- if (!rc) {
- data =
- (FILE_ALL_INFO *)srchinf->srch_entries_start;
+ rc = CIFSFindFirst(xid, tcon, full_path,
+ cifs_sb, NULL, srchflgs, srchinf, false);
+ if (!rc) {
+ data = (FILE_ALL_INFO *)srchinf->srch_entries_start;
- cifs_dir_info_to_fattr(&fattr,
- (FILE_DIRECTORY_INFO *)data, cifs_sb);
- fattr.cf_uniqueid = le64_to_cpu(
- ((SEARCH_ID_FULL_DIR_INFO *)data)->UniqueId);
- validinum = true;
+ cifs_dir_info_to_fattr(&fattr,
+ (FILE_DIRECTORY_INFO *)data, cifs_sb);
+ fattr.cf_uniqueid = le64_to_cpu(
+ ((SEARCH_ID_FULL_DIR_INFO *)data)->UniqueId);
+ validinum = true;
- cifs_buf_release(srchinf->ntwrk_buf_start);
- }
- kfree(srchinf);
- if (rc)
- goto cgii_exit;
+ cifs_buf_release(srchinf->ntwrk_buf_start);
+ }
+ kfree(srchinf);
+ if (rc)
+ goto cgii_exit;
} else
goto cgii_exit;
#include "cifs_debug.h"
#include "cifsfs.h"
#include "cifs_ioctl.h"
+#include "smb2proto.h"
#include <linux/btrfs.h>
+static long cifs_ioctl_query_info(unsigned int xid, struct file *filep,
+ unsigned long p)
+{
+ struct inode *inode = file_inode(filep);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
+ struct dentry *dentry = filep->f_path.dentry;
+ unsigned char *path;
+ __le16 *utf16_path = NULL, root_path;
+ int rc = 0;
+
+ path = build_path_from_dentry(dentry);
+ if (path == NULL)
+ return -ENOMEM;
+
+ cifs_dbg(FYI, "%s %s\n", __func__, path);
+
+ if (!path[0]) {
+ root_path = 0;
+ utf16_path = &root_path;
+ } else {
+ utf16_path = cifs_convert_path_to_utf16(path + 1, cifs_sb);
+ if (!utf16_path) {
+ rc = -ENOMEM;
+ goto ici_exit;
+ }
+ }
+
+ if (tcon->ses->server->ops->ioctl_query_info)
+ rc = tcon->ses->server->ops->ioctl_query_info(
+ xid, tcon, utf16_path,
+ filep->private_data ? 0 : 1, p);
+ else
+ rc = -EOPNOTSUPP;
+
+ ici_exit:
+ if (utf16_path != &root_path)
+ kfree(utf16_path);
+ kfree(path);
+ return rc;
+}
+
static long cifs_ioctl_copychunk(unsigned int xid, struct file *dst_file,
unsigned long srcfd)
{
struct inode *inode = file_inode(filep);
int rc = -ENOTTY; /* strange error - but the precedent */
unsigned int xid;
- struct cifs_sb_info *cifs_sb;
struct cifsFileInfo *pSMBFile = filep->private_data;
struct cifs_tcon *tcon;
__u64 ExtAttrBits = 0;
xid = get_xid();
- cifs_sb = CIFS_SB(inode->i_sb);
cifs_dbg(FYI, "cifs ioctl 0x%x\n", command);
switch (command) {
case FS_IOC_GETFLAGS:
case CIFS_IOC_COPYCHUNK_FILE:
rc = cifs_ioctl_copychunk(xid, filep, arg);
break;
+ case CIFS_QUERY_INFO:
+ rc = cifs_ioctl_query_info(xid, filep, arg);
+ break;
case CIFS_IOC_SET_INTEGRITY:
if (pSMBFile == NULL)
break;
ret_buf->crfid.fid = kzalloc(sizeof(struct cifs_fid),
GFP_KERNEL);
spin_lock_init(&ret_buf->stat_lock);
+ atomic_set(&ret_buf->num_local_opens, 0);
+ atomic_set(&ret_buf->num_remote_opens, 0);
}
return ret_buf;
}
/*
* Identifiers for functions that use the open, operation, close pattern
- * in smb2inode.c:smb2_open_op_close()
+ * in smb2inode.c:smb2_compound_op()
*/
#define SMB2_OP_SET_DELETE 1
#define SMB2_OP_SET_INFO 2
#include "smb2proto.h"
static int
-smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
- struct cifs_sb_info *cifs_sb, const char *full_path,
- __u32 desired_access, __u32 create_disposition,
- __u32 create_options, void *data, int command)
+smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
+ struct cifs_sb_info *cifs_sb, const char *full_path,
+ __u32 desired_access, __u32 create_disposition,
+ __u32 create_options, void *ptr, int command)
{
- int rc, tmprc = 0;
+ int rc;
__le16 *utf16_path = NULL;
__u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
struct cifs_open_parms oparms;
struct cifs_fid fid;
- bool use_cached_root_handle = false;
-
- if ((strcmp(full_path, "") == 0) && (create_options == 0) &&
- (desired_access == FILE_READ_ATTRIBUTES) &&
- (create_disposition == FILE_OPEN) &&
- (tcon->nohandlecache == false)) {
- rc = open_shroot(xid, tcon, &fid);
- if (rc == 0)
- use_cached_root_handle = true;
- }
+ struct cifs_ses *ses = tcon->ses;
+ struct TCP_Server_Info *server = ses->server;
+ int num_rqst = 0;
+ struct smb_rqst rqst[3];
+ int resp_buftype[3];
+ struct kvec rsp_iov[3];
+ struct kvec open_iov[SMB2_CREATE_IOV_SIZE];
+ struct kvec qi_iov[1];
+ struct kvec si_iov[SMB2_SET_INFO_IOV_SIZE];
+ struct kvec close_iov[1];
+ struct smb2_query_info_rsp *qi_rsp = NULL;
+ int flags = 0;
+ __u8 delete_pending[8] = {1, 0, 0, 0, 0, 0, 0, 0};
+ unsigned int size[2];
+ void *data[2];
+ struct smb2_file_rename_info rename_info;
+ struct smb2_file_link_info link_info;
+ int len;
- if (use_cached_root_handle == false) {
- utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb);
- if (!utf16_path)
- return -ENOMEM;
-
- oparms.tcon = tcon;
- oparms.desired_access = desired_access;
- oparms.disposition = create_disposition;
- oparms.create_options = create_options;
- oparms.fid = &fid;
- oparms.reconnect = false;
-
- rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL,
- NULL);
- if (rc) {
- kfree(utf16_path);
- return rc;
- }
- }
+ if (smb3_encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
+
+ memset(rqst, 0, sizeof(rqst));
+ resp_buftype[0] = resp_buftype[1] = resp_buftype[2] = CIFS_NO_BUFFER;
+ memset(rsp_iov, 0, sizeof(rsp_iov));
+
+ /* Open */
+ utf16_path = cifs_convert_path_to_utf16(full_path, cifs_sb);
+ if (!utf16_path)
+ return -ENOMEM;
+
+ oparms.tcon = tcon;
+ oparms.desired_access = desired_access;
+ oparms.disposition = create_disposition;
+ oparms.create_options = create_options;
+ if (backup_cred(cifs_sb))
+ oparms.create_options |= CREATE_OPEN_BACKUP_INTENT;
+ oparms.fid = &fid;
+ oparms.reconnect = false;
+ memset(&open_iov, 0, sizeof(open_iov));
+ rqst[num_rqst].rq_iov = open_iov;
+ rqst[num_rqst].rq_nvec = SMB2_CREATE_IOV_SIZE;
+ rc = SMB2_open_init(tcon, &rqst[num_rqst], &oplock, &oparms,
+ utf16_path);
+ kfree(utf16_path);
+ if (rc)
+ goto finished;
+
+ smb2_set_next_command(server, &rqst[num_rqst++]);
+
+ /* Operation */
switch (command) {
- case SMB2_OP_DELETE:
- break;
case SMB2_OP_QUERY_INFO:
- tmprc = SMB2_query_info(xid, tcon, fid.persistent_fid,
- fid.volatile_fid,
- (struct smb2_file_all_info *)data);
+ memset(&qi_iov, 0, sizeof(qi_iov));
+ rqst[num_rqst].rq_iov = qi_iov;
+ rqst[num_rqst].rq_nvec = 1;
+
+ rc = SMB2_query_info_init(tcon, &rqst[num_rqst], COMPOUND_FID,
+ COMPOUND_FID, FILE_ALL_INFORMATION,
+ SMB2_O_INFO_FILE, 0,
+ sizeof(struct smb2_file_all_info) +
+ PATH_MAX * 2, 0, NULL);
+ smb2_set_next_command(server, &rqst[num_rqst]);
+ smb2_set_related(&rqst[num_rqst++]);
+ break;
+ case SMB2_OP_DELETE:
break;
case SMB2_OP_MKDIR:
/*
*/
break;
case SMB2_OP_RMDIR:
- tmprc = SMB2_rmdir(xid, tcon, fid.persistent_fid,
- fid.volatile_fid);
- break;
- case SMB2_OP_RENAME:
- tmprc = SMB2_rename(xid, tcon, fid.persistent_fid,
- fid.volatile_fid, (__le16 *)data);
- break;
- case SMB2_OP_HARDLINK:
- tmprc = SMB2_set_hardlink(xid, tcon, fid.persistent_fid,
- fid.volatile_fid, (__le16 *)data);
+ memset(&si_iov, 0, sizeof(si_iov));
+ rqst[num_rqst].rq_iov = si_iov;
+ rqst[num_rqst].rq_nvec = 1;
+
+ size[0] = 8;
+ data[0] = &delete_pending[0];
+
+ rc = SMB2_set_info_init(tcon, &rqst[num_rqst], COMPOUND_FID,
+ COMPOUND_FID, current->tgid,
+ FILE_DISPOSITION_INFORMATION,
+ SMB2_O_INFO_FILE, 0, data, size);
+ smb2_set_next_command(server, &rqst[num_rqst]);
+ smb2_set_related(&rqst[num_rqst++]);
break;
case SMB2_OP_SET_EOF:
- tmprc = SMB2_set_eof(xid, tcon, fid.persistent_fid,
- fid.volatile_fid, current->tgid,
- (__le64 *)data, false);
+ memset(&si_iov, 0, sizeof(si_iov));
+ rqst[num_rqst].rq_iov = si_iov;
+ rqst[num_rqst].rq_nvec = 1;
+
+ size[0] = 8; /* sizeof __le64 */
+ data[0] = ptr;
+
+ rc = SMB2_set_info_init(tcon, &rqst[num_rqst], COMPOUND_FID,
+ COMPOUND_FID, current->tgid,
+ FILE_END_OF_FILE_INFORMATION,
+ SMB2_O_INFO_FILE, 0, data, size);
+ smb2_set_next_command(server, &rqst[num_rqst]);
+ smb2_set_related(&rqst[num_rqst++]);
break;
case SMB2_OP_SET_INFO:
- tmprc = SMB2_set_info(xid, tcon, fid.persistent_fid,
- fid.volatile_fid,
- (FILE_BASIC_INFO *)data);
+ memset(&si_iov, 0, sizeof(si_iov));
+ rqst[num_rqst].rq_iov = si_iov;
+ rqst[num_rqst].rq_nvec = 1;
+
+
+ size[0] = sizeof(FILE_BASIC_INFO);
+ data[0] = ptr;
+
+ rc = SMB2_set_info_init(tcon, &rqst[num_rqst], COMPOUND_FID,
+ COMPOUND_FID, current->tgid,
+ FILE_BASIC_INFORMATION,
+ SMB2_O_INFO_FILE, 0, data, size);
+ smb2_set_next_command(server, &rqst[num_rqst]);
+ smb2_set_related(&rqst[num_rqst++]);
+ break;
+ case SMB2_OP_RENAME:
+ memset(&si_iov, 0, sizeof(si_iov));
+ rqst[num_rqst].rq_iov = si_iov;
+ rqst[num_rqst].rq_nvec = 2;
+
+ len = (2 * UniStrnlen((wchar_t *)ptr, PATH_MAX));
+
+ rename_info.ReplaceIfExists = 1;
+ rename_info.RootDirectory = 0;
+ rename_info.FileNameLength = cpu_to_le32(len);
+
+ size[0] = sizeof(struct smb2_file_rename_info);
+ data[0] = &rename_info;
+
+ size[1] = len + 2 /* null */;
+ data[1] = (__le16 *)ptr;
+
+ rc = SMB2_set_info_init(tcon, &rqst[num_rqst], COMPOUND_FID,
+ COMPOUND_FID, current->tgid,
+ FILE_RENAME_INFORMATION,
+ SMB2_O_INFO_FILE, 0, data, size);
+ smb2_set_next_command(server, &rqst[num_rqst]);
+ smb2_set_related(&rqst[num_rqst++]);
+ break;
+ case SMB2_OP_HARDLINK:
+ memset(&si_iov, 0, sizeof(si_iov));
+ rqst[num_rqst].rq_iov = si_iov;
+ rqst[num_rqst].rq_nvec = 2;
+
+ len = (2 * UniStrnlen((wchar_t *)ptr, PATH_MAX));
+
+ link_info.ReplaceIfExists = 0;
+ link_info.RootDirectory = 0;
+ link_info.FileNameLength = cpu_to_le32(len);
+
+ size[0] = sizeof(struct smb2_file_link_info);
+ data[0] = &link_info;
+
+ size[1] = len + 2 /* null */;
+ data[1] = (__le16 *)ptr;
+
+ rc = SMB2_set_info_init(tcon, &rqst[num_rqst], COMPOUND_FID,
+ COMPOUND_FID, current->tgid,
+ FILE_LINK_INFORMATION,
+ SMB2_O_INFO_FILE, 0, data, size);
+ smb2_set_next_command(server, &rqst[num_rqst]);
+ smb2_set_related(&rqst[num_rqst++]);
break;
default:
cifs_dbg(VFS, "Invalid command\n");
- break;
+ rc = -EINVAL;
}
+ if (rc)
+ goto finished;
- if (use_cached_root_handle)
- close_shroot(&tcon->crfid);
- else
- rc = SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
- if (tmprc)
- rc = tmprc;
- kfree(utf16_path);
+ /* Close */
+ memset(&close_iov, 0, sizeof(close_iov));
+ rqst[num_rqst].rq_iov = close_iov;
+ rqst[num_rqst].rq_nvec = 1;
+ rc = SMB2_close_init(tcon, &rqst[num_rqst], COMPOUND_FID,
+ COMPOUND_FID);
+ smb2_set_related(&rqst[num_rqst++]);
+ if (rc)
+ goto finished;
+
+ rc = compound_send_recv(xid, ses, flags, num_rqst, rqst,
+ resp_buftype, rsp_iov);
+
+ finished:
+ SMB2_open_free(&rqst[0]);
+ switch (command) {
+ case SMB2_OP_QUERY_INFO:
+ if (rc == 0) {
+ qi_rsp = (struct smb2_query_info_rsp *)
+ rsp_iov[1].iov_base;
+ rc = smb2_validate_and_copy_iov(
+ le16_to_cpu(qi_rsp->OutputBufferOffset),
+ le32_to_cpu(qi_rsp->OutputBufferLength),
+ &rsp_iov[1], sizeof(struct smb2_file_all_info),
+ ptr);
+ }
+ if (rqst[1].rq_iov)
+ SMB2_query_info_free(&rqst[1]);
+ if (rqst[2].rq_iov)
+ SMB2_close_free(&rqst[2]);
+ break;
+ case SMB2_OP_DELETE:
+ case SMB2_OP_MKDIR:
+ if (rqst[1].rq_iov)
+ SMB2_close_free(&rqst[1]);
+ break;
+ case SMB2_OP_HARDLINK:
+ case SMB2_OP_RENAME:
+ case SMB2_OP_RMDIR:
+ case SMB2_OP_SET_EOF:
+ case SMB2_OP_SET_INFO:
+ if (rqst[1].rq_iov)
+ SMB2_set_info_free(&rqst[1]);
+ if (rqst[2].rq_iov)
+ SMB2_close_free(&rqst[2]);
+ break;
+ }
+ free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
+ free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base);
+ free_rsp_buf(resp_buftype[2], rsp_iov[2].iov_base);
return rc;
}
{
int rc;
struct smb2_file_all_info *smb2_data;
+ __u32 create_options = 0;
*adjust_tz = false;
*symlink = false;
GFP_KERNEL);
if (smb2_data == NULL)
return -ENOMEM;
+ if (backup_cred(cifs_sb))
+ create_options |= CREATE_OPEN_BACKUP_INTENT;
- rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path,
- FILE_READ_ATTRIBUTES, FILE_OPEN, 0,
- smb2_data, SMB2_OP_QUERY_INFO);
+ rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
+ FILE_READ_ATTRIBUTES, FILE_OPEN, create_options,
+ smb2_data, SMB2_OP_QUERY_INFO);
if (rc == -EOPNOTSUPP) {
*symlink = true;
+ create_options |= OPEN_REPARSE_POINT;
+
/* Failed on a symbolic link - query a reparse point info */
- rc = smb2_open_op_close(xid, tcon, cifs_sb, full_path,
- FILE_READ_ATTRIBUTES, FILE_OPEN,
- OPEN_REPARSE_POINT, smb2_data,
- SMB2_OP_QUERY_INFO);
+ rc = smb2_compound_op(xid, tcon, cifs_sb, full_path,
+ FILE_READ_ATTRIBUTES, FILE_OPEN,
+ create_options, smb2_data,
+ SMB2_OP_QUERY_INFO);
}
if (rc)
goto out;
smb2_mkdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
struct cifs_sb_info *cifs_sb)
{
- return smb2_open_op_close(xid, tcon, cifs_sb, name,
- FILE_WRITE_ATTRIBUTES, FILE_CREATE,
- CREATE_NOT_FILE, NULL, SMB2_OP_MKDIR);
+ return smb2_compound_op(xid, tcon, cifs_sb, name,
+ FILE_WRITE_ATTRIBUTES, FILE_CREATE,
+ CREATE_NOT_FILE, NULL, SMB2_OP_MKDIR);
}
void
cifs_i = CIFS_I(inode);
dosattrs = cifs_i->cifsAttrs | ATTR_READONLY;
data.Attributes = cpu_to_le32(dosattrs);
- tmprc = smb2_open_op_close(xid, tcon, cifs_sb, name,
- FILE_WRITE_ATTRIBUTES, FILE_CREATE,
- CREATE_NOT_FILE, &data, SMB2_OP_SET_INFO);
+ tmprc = smb2_compound_op(xid, tcon, cifs_sb, name,
+ FILE_WRITE_ATTRIBUTES, FILE_CREATE,
+ CREATE_NOT_FILE, &data, SMB2_OP_SET_INFO);
if (tmprc == 0)
cifs_i->cifsAttrs = dosattrs;
}
smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
struct cifs_sb_info *cifs_sb)
{
- return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
- CREATE_NOT_FILE,
- NULL, SMB2_OP_RMDIR);
+ return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
+ CREATE_NOT_FILE,
+ NULL, SMB2_OP_RMDIR);
}
int
smb2_unlink(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
struct cifs_sb_info *cifs_sb)
{
- return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
- CREATE_DELETE_ON_CLOSE | OPEN_REPARSE_POINT,
- NULL, SMB2_OP_DELETE);
+ return smb2_compound_op(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
+ CREATE_DELETE_ON_CLOSE | OPEN_REPARSE_POINT,
+ NULL, SMB2_OP_DELETE);
}
static int
goto smb2_rename_path;
}
- rc = smb2_open_op_close(xid, tcon, cifs_sb, from_name, access,
- FILE_OPEN, 0, smb2_to_name, command);
+ rc = smb2_compound_op(xid, tcon, cifs_sb, from_name, access,
+ FILE_OPEN, 0, smb2_to_name, command);
smb2_rename_path:
kfree(smb2_to_name);
return rc;
struct cifs_sb_info *cifs_sb, bool set_alloc)
{
__le64 eof = cpu_to_le64(size);
- return smb2_open_op_close(xid, tcon, cifs_sb, full_path,
- FILE_WRITE_DATA, FILE_OPEN, 0, &eof,
- SMB2_OP_SET_EOF);
+
+ return smb2_compound_op(xid, tcon, cifs_sb, full_path,
+ FILE_WRITE_DATA, FILE_OPEN, 0, &eof,
+ SMB2_OP_SET_EOF);
}
int
if (IS_ERR(tlink))
return PTR_ERR(tlink);
- rc = smb2_open_op_close(xid, tlink_tcon(tlink), cifs_sb, full_path,
- FILE_WRITE_ATTRIBUTES, FILE_OPEN, 0, buf,
- SMB2_OP_SET_INFO);
+ rc = smb2_compound_op(xid, tlink_tcon(tlink), cifs_sb, full_path,
+ FILE_WRITE_ATTRIBUTES, FILE_OPEN, 0, buf,
+ SMB2_OP_SET_INFO);
cifs_put_tlink(tlink);
return rc;
}
{STATUS_FLT_BUFFER_TOO_SMALL, -ENOBUFS, "STATUS_FLT_BUFFER_TOO_SMALL"},
{STATUS_FVE_PARTIAL_METADATA, -EIO, "STATUS_FVE_PARTIAL_METADATA"},
{STATUS_UNSUCCESSFUL, -EIO, "STATUS_UNSUCCESSFUL"},
- {STATUS_NOT_IMPLEMENTED, -ENOSYS, "STATUS_NOT_IMPLEMENTED"},
+ {STATUS_NOT_IMPLEMENTED, -EOPNOTSUPP, "STATUS_NOT_IMPLEMENTED"},
{STATUS_INVALID_INFO_CLASS, -EIO, "STATUS_INVALID_INFO_CLASS"},
{STATUS_INFO_LENGTH_MISMATCH, -EIO, "STATUS_INFO_LENGTH_MISMATCH"},
{STATUS_ACCESS_VIOLATION, -EACCES, "STATUS_ACCESS_VIOLATION"},
int *val, rc = 0;
spin_lock(&server->req_lock);
val = server->ops->get_credits_field(server, optype);
+
+ /* eg found case where write overlapping reconnect messed up credits */
+ if (((optype & CIFS_OP_MASK) == CIFS_NEG_OP) && (*val != 0))
+ trace_smb3_reconnect_with_invalid_credits(server->CurrentMid,
+ server->hostname, *val);
+
*val += add;
if (*val > 65000) {
*val = 65000; /* Don't get near 64K credits, avoid srv bugs */
{
spin_lock(&server->req_lock);
server->credits = val;
+ if (val == 1)
+ server->reconnect_instance++;
spin_unlock(&server->req_lock);
+ /* don't log while holding the lock */
+ if (val == 1)
+ cifs_dbg(FYI, "set credits to 1 due to smb2 reconnect\n");
}
static int *
return wsize;
}
+static unsigned int
+smb3_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
+{
+ struct TCP_Server_Info *server = tcon->ses->server;
+ unsigned int wsize;
+
+ /* start with specified wsize, or default */
+ wsize = volume_info->wsize ? volume_info->wsize : SMB3_DEFAULT_IOSIZE;
+ wsize = min_t(unsigned int, wsize, server->max_write);
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ if (server->rdma) {
+ if (server->sign)
+ wsize = min_t(unsigned int,
+ wsize, server->smbd_conn->max_fragmented_send_size);
+ else
+ wsize = min_t(unsigned int,
+ wsize, server->smbd_conn->max_readwrite_size);
+ }
+#endif
+ if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
+ wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE);
+
+ return wsize;
+}
+
static unsigned int
smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
{
return rsize;
}
+static unsigned int
+smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
+{
+ struct TCP_Server_Info *server = tcon->ses->server;
+ unsigned int rsize;
+
+ /* start with specified rsize, or default */
+ rsize = volume_info->rsize ? volume_info->rsize : SMB3_DEFAULT_IOSIZE;
+ rsize = min_t(unsigned int, rsize, server->max_read);
+#ifdef CONFIG_CIFS_SMB_DIRECT
+ if (server->rdma) {
+ if (server->sign)
+ rsize = min_t(unsigned int,
+ rsize, server->smbd_conn->max_fragmented_recv_size);
+ else
+ rsize = min_t(unsigned int,
+ rsize, server->smbd_conn->max_readwrite_size);
+ }
+#endif
+
+ if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
+ rsize = min_t(unsigned int, rsize, SMB2_MAX_BUFFER_SIZE);
+
+ return rsize;
+}
static int
parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
seq_printf(m, "\nBytes read: %llu Bytes written: %llu",
(long long)(tcon->bytes_read),
(long long)(tcon->bytes_written));
+ seq_printf(m, "\nOpen files: %d total (local), %d open on server",
+ atomic_read(&tcon->num_local_opens),
+ atomic_read(&tcon->num_remote_opens));
seq_printf(m, "\nTreeConnects: %d total %d failed",
atomic_read(&sent[SMB2_TREE_CONNECT_HE]),
atomic_read(&failed[SMB2_TREE_CONNECT_HE]));
return rc;
}
+static int
+smb2_ioctl_query_info(const unsigned int xid,
+ struct cifs_tcon *tcon,
+ __le16 *path, int is_dir,
+ unsigned long p)
+{
+ struct cifs_ses *ses = tcon->ses;
+ char __user *arg = (char __user *)p;
+ struct smb_query_info qi;
+ struct smb_query_info __user *pqi;
+ int rc = 0;
+ int flags = 0;
+ struct smb2_query_info_rsp *rsp = NULL;
+ void *buffer = NULL;
+ struct smb_rqst rqst[3];
+ int resp_buftype[3];
+ struct kvec rsp_iov[3];
+ struct kvec open_iov[SMB2_CREATE_IOV_SIZE];
+ struct cifs_open_parms oparms;
+ u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
+ struct cifs_fid fid;
+ struct kvec qi_iov[1];
+ struct kvec close_iov[1];
+
+ memset(rqst, 0, sizeof(rqst));
+ resp_buftype[0] = resp_buftype[1] = resp_buftype[2] = CIFS_NO_BUFFER;
+ memset(rsp_iov, 0, sizeof(rsp_iov));
+
+ if (copy_from_user(&qi, arg, sizeof(struct smb_query_info)))
+ return -EFAULT;
+
+ if (qi.output_buffer_length > 1024)
+ return -EINVAL;
+
+ if (!ses || !(ses->server))
+ return -EIO;
+
+ if (smb3_encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
+
+ buffer = kmalloc(qi.output_buffer_length, GFP_KERNEL);
+ if (buffer == NULL)
+ return -ENOMEM;
+
+ if (copy_from_user(buffer, arg + sizeof(struct smb_query_info),
+ qi.output_buffer_length)) {
+ rc = -EFAULT;
+ goto iqinf_exit;
+ }
+
+ /* Open */
+ memset(&open_iov, 0, sizeof(open_iov));
+ rqst[0].rq_iov = open_iov;
+ rqst[0].rq_nvec = SMB2_CREATE_IOV_SIZE;
+
+ memset(&oparms, 0, sizeof(oparms));
+ oparms.tcon = tcon;
+ oparms.desired_access = FILE_READ_ATTRIBUTES | READ_CONTROL;
+ oparms.disposition = FILE_OPEN;
+ if (is_dir)
+ oparms.create_options = CREATE_NOT_FILE;
+ else
+ oparms.create_options = CREATE_NOT_DIR;
+ oparms.fid = &fid;
+ oparms.reconnect = false;
+
+ rc = SMB2_open_init(tcon, &rqst[0], &oplock, &oparms, path);
+ if (rc)
+ goto iqinf_exit;
+ smb2_set_next_command(ses->server, &rqst[0]);
+
+ /* Query */
+ memset(&qi_iov, 0, sizeof(qi_iov));
+ rqst[1].rq_iov = qi_iov;
+ rqst[1].rq_nvec = 1;
+
+ rc = SMB2_query_info_init(tcon, &rqst[1], COMPOUND_FID, COMPOUND_FID,
+ qi.file_info_class, qi.info_type,
+ qi.additional_information,
+ qi.input_buffer_length,
+ qi.output_buffer_length, buffer);
+ if (rc)
+ goto iqinf_exit;
+ smb2_set_next_command(ses->server, &rqst[1]);
+ smb2_set_related(&rqst[1]);
+
+ /* Close */
+ memset(&close_iov, 0, sizeof(close_iov));
+ rqst[2].rq_iov = close_iov;
+ rqst[2].rq_nvec = 1;
+
+ rc = SMB2_close_init(tcon, &rqst[2], COMPOUND_FID, COMPOUND_FID);
+ if (rc)
+ goto iqinf_exit;
+ smb2_set_related(&rqst[2]);
+
+ rc = compound_send_recv(xid, ses, flags, 3, rqst,
+ resp_buftype, rsp_iov);
+ if (rc)
+ goto iqinf_exit;
+ pqi = (struct smb_query_info __user *)arg;
+ rsp = (struct smb2_query_info_rsp *)rsp_iov[1].iov_base;
+ if (le32_to_cpu(rsp->OutputBufferLength) < qi.input_buffer_length)
+ qi.input_buffer_length = le32_to_cpu(rsp->OutputBufferLength);
+ if (copy_to_user(&pqi->input_buffer_length, &qi.input_buffer_length,
+ sizeof(qi.input_buffer_length))) {
+ rc = -EFAULT;
+ goto iqinf_exit;
+ }
+ if (copy_to_user(pqi + 1, rsp->Buffer, qi.input_buffer_length)) {
+ rc = -EFAULT;
+ goto iqinf_exit;
+ }
+
+ iqinf_exit:
+ kfree(buffer);
+ SMB2_open_free(&rqst[0]);
+ SMB2_query_info_free(&rqst[1]);
+ SMB2_close_free(&rqst[2]);
+ free_rsp_buf(resp_buftype[0], rsp_iov[0].iov_base);
+ free_rsp_buf(resp_buftype[1], rsp_iov[1].iov_base);
+ free_rsp_buf(resp_buftype[2], rsp_iov[2].iov_base);
+ return rc;
+}
+
static ssize_t
smb2_copychunk_range(const unsigned int xid,
struct cifsFileInfo *srcfile,
}
return SMB2_set_eof(xid, tcon, cfile->fid.persistent_fid,
- cfile->fid.volatile_fid, cfile->pid, &eof, false);
+ cfile->fid.volatile_fid, cfile->pid, &eof);
}
static int
CIFS_CACHE_READ(cinode) ? 1 : 0);
}
-static void
+void
smb2_set_related(struct smb_rqst *rqst)
{
struct smb2_sync_hdr *shdr;
char smb2_padding[7] = {0, 0, 0, 0, 0, 0, 0};
-static void
+void
smb2_set_next_command(struct TCP_Server_Info *server, struct smb_rqst *rqst)
{
struct smb2_sync_hdr *shdr;
flags |= CIFS_TRANSFORM_REQ;
memset(rqst, 0, sizeof(rqst));
- memset(resp_buftype, 0, sizeof(resp_buftype));
+ resp_buftype[0] = resp_buftype[1] = resp_buftype[2] = CIFS_NO_BUFFER;
memset(rsp_iov, 0, sizeof(rsp_iov));
memset(&open_iov, 0, sizeof(open_iov));
rc = SMB2_query_info_init(tcon, &rqst[1], COMPOUND_FID, COMPOUND_FID,
FS_FULL_SIZE_INFORMATION,
SMB2_O_INFO_FILESYSTEM, 0,
- sizeof(struct smb2_fs_full_size_info));
+ sizeof(struct smb2_fs_full_size_info), 0,
+ NULL);
if (rc)
goto qfs_exit;
smb2_set_next_command(server, &rqst[1]);
.set_acl = set_smb2_acl,
#endif /* CIFS_ACL */
.next_header = smb2_next_header,
+ .ioctl_query_info = smb2_ioctl_query_info,
};
struct smb_version_operations smb21_operations = {
.set_acl = set_smb2_acl,
#endif /* CIFS_ACL */
.next_header = smb2_next_header,
+ .ioctl_query_info = smb2_ioctl_query_info,
};
struct smb_version_operations smb30_operations = {
.downgrade_oplock = smb2_downgrade_oplock,
.need_neg = smb2_need_neg,
.negotiate = smb2_negotiate,
- .negotiate_wsize = smb2_negotiate_wsize,
- .negotiate_rsize = smb2_negotiate_rsize,
+ .negotiate_wsize = smb3_negotiate_wsize,
+ .negotiate_rsize = smb3_negotiate_rsize,
.sess_setup = SMB2_sess_setup,
.logoff = SMB2_logoff,
.tree_connect = SMB2_tcon,
.set_acl = set_smb2_acl,
#endif /* CIFS_ACL */
.next_header = smb2_next_header,
+ .ioctl_query_info = smb2_ioctl_query_info,
};
struct smb_version_operations smb311_operations = {
.downgrade_oplock = smb2_downgrade_oplock,
.need_neg = smb2_need_neg,
.negotiate = smb2_negotiate,
- .negotiate_wsize = smb2_negotiate_wsize,
- .negotiate_rsize = smb2_negotiate_rsize,
+ .negotiate_wsize = smb3_negotiate_wsize,
+ .negotiate_rsize = smb3_negotiate_rsize,
.sess_setup = SMB2_sess_setup,
.logoff = SMB2_logoff,
.tree_connect = SMB2_tcon,
.set_acl = set_smb2_acl,
#endif /* CIFS_ACL */
.next_header = smb2_next_header,
+ .ioctl_query_info = smb2_ioctl_query_info,
};
struct smb_version_values smb20_values = {
/* SMB2 TREE_CONNECT request must be called with TreeId == 0 */
tcon->tid = 0;
-
+ atomic_set(&tcon->num_remote_opens, 0);
rc = smb2_plain_req_init(SMB2_TREE_CONNECT, tcon, (void **) &req,
&total_len);
if (rc) {
{
int i;
- cifs_small_buf_release(rqst->rq_iov[0].iov_base);
- for (i = 1; i < rqst->rq_nvec; i++)
- if (rqst->rq_iov[i].iov_base != smb2_padding)
- kfree(rqst->rq_iov[i].iov_base);
+ if (rqst && rqst->rq_iov) {
+ cifs_small_buf_release(rqst->rq_iov[0].iov_base);
+ for (i = 1; i < rqst->rq_nvec; i++)
+ if (rqst->rq_iov[i].iov_base != smb2_padding)
+ kfree(rqst->rq_iov[i].iov_base);
+ }
}
int
struct cifs_ses *ses = tcon->ses;
struct kvec iov[SMB2_CREATE_IOV_SIZE];
struct kvec rsp_iov = {NULL, 0};
- int resp_buftype;
+ int resp_buftype = CIFS_NO_BUFFER;
int rc = 0;
int flags = 0;
ses->Suid, oparms->create_options,
oparms->desired_access);
+ atomic_inc(&tcon->num_remote_opens);
oparms->fid->persistent_fid = rsp->PersistentFileId;
oparms->fid->volatile_fid = rsp->VolatileFileId;
goto ioctl_exit;
}
- *out_data = kmalloc(*plen, GFP_KERNEL);
+ *out_data = kmemdup((char *)rsp + le32_to_cpu(rsp->OutputOffset),
+ *plen, GFP_KERNEL);
if (*out_data == NULL) {
rc = -ENOMEM;
goto ioctl_exit;
}
- memcpy(*out_data, (char *)rsp + le32_to_cpu(rsp->OutputOffset), *plen);
ioctl_exit:
free_rsp_buf(resp_buftype, rsp);
return rc;
void
SMB2_close_free(struct smb_rqst *rqst)
{
- cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */
+ if (rqst && rqst->rq_iov)
+ cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */
}
int
struct cifs_ses *ses = tcon->ses;
struct kvec iov[1];
struct kvec rsp_iov;
- int resp_buftype;
+ int resp_buftype = CIFS_NO_BUFFER;
int rc = 0;
cifs_dbg(FYI, "Close\n");
goto close_exit;
}
+ atomic_dec(&tcon->num_remote_opens);
+
/* BB FIXME - decode close response, update inode for caching */
close_exit:
* If SMB buffer fields are valid, copy into temporary buffer to hold result.
* Caller must free buffer.
*/
-static int
-validate_and_copy_iov(unsigned int offset, unsigned int buffer_length,
- struct kvec *iov, unsigned int minbufsize,
- char *data)
+int
+smb2_validate_and_copy_iov(unsigned int offset, unsigned int buffer_length,
+ struct kvec *iov, unsigned int minbufsize,
+ char *data)
{
char *begin_of_buf = offset + (char *)iov->iov_base;
int rc;
SMB2_query_info_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
u64 persistent_fid, u64 volatile_fid,
u8 info_class, u8 info_type, u32 additional_info,
- size_t output_len)
+ size_t output_len, size_t input_len, void *input)
{
struct smb2_query_info_req *req;
struct kvec *iov = rqst->rq_iov;
req->VolatileFileId = volatile_fid;
req->AdditionalInformation = cpu_to_le32(additional_info);
- /*
- * We do not use the input buffer (do not send extra byte)
- */
- req->InputBufferOffset = 0;
-
req->OutputBufferLength = cpu_to_le32(output_len);
+ if (input_len) {
+ req->InputBufferLength = cpu_to_le32(input_len);
+ /* total_len for smb query request never close to le16 max */
+ req->InputBufferOffset = cpu_to_le16(total_len - 1);
+ memcpy(req->Buffer, input, input_len);
+ }
iov[0].iov_base = (char *)req;
/* 1 for Buffer */
- iov[0].iov_len = total_len - 1;
+ iov[0].iov_len = total_len - 1 + input_len;
return 0;
}
void
SMB2_query_info_free(struct smb_rqst *rqst)
{
- cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */
+ if (rqst && rqst->rq_iov)
+ cifs_small_buf_release(rqst->rq_iov[0].iov_base); /* request */
}
static int
struct kvec iov[1];
struct kvec rsp_iov;
int rc = 0;
- int resp_buftype;
+ int resp_buftype = CIFS_NO_BUFFER;
struct cifs_ses *ses = tcon->ses;
int flags = 0;
rc = SMB2_query_info_init(tcon, &rqst, persistent_fid, volatile_fid,
info_class, info_type, additional_info,
- output_len);
+ output_len, 0, NULL);
if (rc)
goto qinf_exit;
}
}
- rc = validate_and_copy_iov(le16_to_cpu(rsp->OutputBufferOffset),
- le32_to_cpu(rsp->OutputBufferLength),
- &rsp_iov, min_len, *data);
+ rc = smb2_validate_and_copy_iov(le16_to_cpu(rsp->OutputBufferOffset),
+ le32_to_cpu(rsp->OutputBufferLength),
+ &rsp_iov, min_len, *data);
qinf_exit:
SMB2_query_info_free(&rqst);
return rc;
}
-static int
-send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
+int
+SMB2_set_info_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
u64 persistent_fid, u64 volatile_fid, u32 pid, u8 info_class,
- u8 info_type, u32 additional_info, unsigned int num,
+ u8 info_type, u32 additional_info,
void **data, unsigned int *size)
{
- struct smb_rqst rqst;
struct smb2_set_info_req *req;
- struct smb2_set_info_rsp *rsp = NULL;
- struct kvec *iov;
- struct kvec rsp_iov;
- int rc = 0;
- int resp_buftype;
- unsigned int i;
- struct cifs_ses *ses = tcon->ses;
- int flags = 0;
- unsigned int total_len;
-
- if (!ses || !(ses->server))
- return -EIO;
-
- if (!num)
- return -EINVAL;
-
- iov = kmalloc_array(num, sizeof(struct kvec), GFP_KERNEL);
- if (!iov)
- return -ENOMEM;
+ struct kvec *iov = rqst->rq_iov;
+ unsigned int i, total_len;
+ int rc;
rc = smb2_plain_req_init(SMB2_SET_INFO, tcon, (void **) &req, &total_len);
- if (rc) {
- kfree(iov);
+ if (rc)
return rc;
- }
-
- if (smb3_encryption_required(tcon))
- flags |= CIFS_TRANSFORM_REQ;
req->sync_hdr.ProcessId = cpu_to_le32(pid);
-
req->InfoType = info_type;
req->FileInfoClass = info_class;
req->PersistentFileId = persistent_fid;
/* 1 for Buffer */
iov[0].iov_len = total_len - 1;
- for (i = 1; i < num; i++) {
+ for (i = 1; i < rqst->rq_nvec; i++) {
le32_add_cpu(&req->BufferLength, size[i]);
iov[i].iov_base = (char *)data[i];
iov[i].iov_len = size[i];
}
+ return 0;
+}
+
+void
+SMB2_set_info_free(struct smb_rqst *rqst)
+{
+ if (rqst && rqst->rq_iov)
+ cifs_buf_release(rqst->rq_iov[0].iov_base); /* request */
+}
+
+static int
+send_set_info(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_fid, u64 volatile_fid, u32 pid, u8 info_class,
+ u8 info_type, u32 additional_info, unsigned int num,
+ void **data, unsigned int *size)
+{
+ struct smb_rqst rqst;
+ struct smb2_set_info_rsp *rsp = NULL;
+ struct kvec *iov;
+ struct kvec rsp_iov;
+ int rc = 0;
+ int resp_buftype;
+ struct cifs_ses *ses = tcon->ses;
+ int flags = 0;
+
+ if (!ses || !(ses->server))
+ return -EIO;
+
+ if (!num)
+ return -EINVAL;
+
+ if (smb3_encryption_required(tcon))
+ flags |= CIFS_TRANSFORM_REQ;
+
+ iov = kmalloc_array(num, sizeof(struct kvec), GFP_KERNEL);
+ if (!iov)
+ return -ENOMEM;
+
memset(&rqst, 0, sizeof(struct smb_rqst));
rqst.rq_iov = iov;
rqst.rq_nvec = num;
+ rc = SMB2_set_info_init(tcon, &rqst, persistent_fid, volatile_fid, pid,
+ info_class, info_type, additional_info,
+ data, size);
+ if (rc) {
+ kfree(iov);
+ return rc;
+ }
+
+
rc = cifs_send_recv(xid, ses, &rqst, &resp_buftype, flags,
&rsp_iov);
- cifs_buf_release(req);
+ SMB2_set_info_free(&rqst);
rsp = (struct smb2_set_info_rsp *)rsp_iov.iov_base;
if (rc != 0) {
return rc;
}
-int
-SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid, __le16 *target_file)
-{
- struct smb2_file_rename_info info;
- void **data;
- unsigned int size[2];
- int rc;
- int len = (2 * UniStrnlen((wchar_t *)target_file, PATH_MAX));
-
- data = kmalloc_array(2, sizeof(void *), GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- info.ReplaceIfExists = 1; /* 1 = replace existing target with new */
- /* 0 = fail if target already exists */
- info.RootDirectory = 0; /* MBZ for network ops (why does spec say?) */
- info.FileNameLength = cpu_to_le32(len);
-
- data[0] = &info;
- size[0] = sizeof(struct smb2_file_rename_info);
-
- data[1] = target_file;
- size[1] = len + 2 /* null */;
-
- rc = send_set_info(xid, tcon, persistent_fid, volatile_fid,
- current->tgid, FILE_RENAME_INFORMATION, SMB2_O_INFO_FILE,
- 0, 2, data, size);
- kfree(data);
- return rc;
-}
-
-int
-SMB2_rmdir(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid)
-{
- __u8 delete_pending = 1;
- void *data;
- unsigned int size;
-
- data = &delete_pending;
- size = 1; /* sizeof __u8 */
-
- return send_set_info(xid, tcon, persistent_fid, volatile_fid,
- current->tgid, FILE_DISPOSITION_INFORMATION, SMB2_O_INFO_FILE,
- 0, 1, &data, &size);
-}
-
-int
-SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid, __le16 *target_file)
-{
- struct smb2_file_link_info info;
- void **data;
- unsigned int size[2];
- int rc;
- int len = (2 * UniStrnlen((wchar_t *)target_file, PATH_MAX));
-
- data = kmalloc_array(2, sizeof(void *), GFP_KERNEL);
- if (!data)
- return -ENOMEM;
-
- info.ReplaceIfExists = 0; /* 1 = replace existing link with new */
- /* 0 = fail if link already exists */
- info.RootDirectory = 0; /* MBZ for network ops (why does spec say?) */
- info.FileNameLength = cpu_to_le32(len);
-
- data[0] = &info;
- size[0] = sizeof(struct smb2_file_link_info);
-
- data[1] = target_file;
- size[1] = len + 2 /* null */;
-
- rc = send_set_info(xid, tcon, persistent_fid, volatile_fid,
- current->tgid, FILE_LINK_INFORMATION, SMB2_O_INFO_FILE,
- 0, 2, data, size);
- kfree(data);
- return rc;
-}
-
int
SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon, u64 persistent_fid,
- u64 volatile_fid, u32 pid, __le64 *eof, bool is_falloc)
+ u64 volatile_fid, u32 pid, __le64 *eof)
{
struct smb2_file_eof_info info;
void *data;
data = &info;
size = sizeof(struct smb2_file_eof_info);
- if (is_falloc)
- return send_set_info(xid, tcon, persistent_fid, volatile_fid,
- pid, FILE_ALLOCATION_INFORMATION, SMB2_O_INFO_FILE,
- 0, 1, &data, &size);
- else
- return send_set_info(xid, tcon, persistent_fid, volatile_fid,
+ return send_set_info(xid, tcon, persistent_fid, volatile_fid,
pid, FILE_END_OF_FILE_INFORMATION, SMB2_O_INFO_FILE,
0, 1, &data, &size);
}
-int
-SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid, FILE_BASIC_INFO *buf)
-{
- unsigned int size;
- size = sizeof(FILE_BASIC_INFO);
- return send_set_info(xid, tcon, persistent_fid, volatile_fid,
- current->tgid, FILE_BASIC_INFORMATION, SMB2_O_INFO_FILE,
- 0, 1, (void **)&buf, &size);
-}
-
int
SMB2_set_acl(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid,
struct kvec iov[1];
struct kvec rsp_iov;
int resp_buf_type;
+ __u64 *please_key_high;
+ __u64 *please_key_low;
cifs_dbg(FYI, "SMB2_lease_break\n");
rc = smb2_plain_req_init(SMB2_OPLOCK_BREAK, tcon, (void **) &req,
rc = cifs_send_recv(xid, ses, &rqst, &resp_buf_type, flags, &rsp_iov);
cifs_small_buf_release(req);
+ please_key_low = (__u64 *)req->LeaseKey;
+ please_key_high = (__u64 *)(req->LeaseKey+8);
if (rc) {
cifs_stats_fail_inc(tcon, SMB2_OPLOCK_BREAK_HE);
+ trace_smb3_lease_err(le32_to_cpu(lease_state), tcon->tid,
+ ses->Suid, *please_key_low, *please_key_high, rc);
cifs_dbg(FYI, "Send error in Lease Break = %d\n", rc);
- }
+ } else
+ trace_smb3_lease_done(le32_to_cpu(lease_state), tcon->tid,
+ ses->Suid, *please_key_low, *please_key_high);
return rc;
}
#define SVHDX_OPEN_DEVICE_CONTEX 0x9CCBCF9E04C1E643980E158DA1F6EC83
#define SMB2_CREATE_TAG_POSIX 0x93AD25509CB411E7B42383DE968BCD7C
+/* Flag (SMB3 open response) values */
+#define SMB2_CREATE_FLAG_REPARSEPOINT 0x01
/*
* Maximum number of iovs we need for an open/create request.
struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 89 */
__u8 OplockLevel;
- __u8 Reserved;
+ __u8 Flag; /* 0x01 if reparse point */
__le32 CreateAction;
__le64 CreationTime;
__le64 LastAccessTime;
__u8 Buffer[1];
} __packed;
+/*
+ * Maximum number of iovs we need for a set-info request.
+ * The largest one is rename/hardlink
+ * [0] : struct smb2_set_info_req + smb2_file_[rename|link]_info
+ * [1] : path
+ * [2] : compound padding
+ */
+#define SMB2_SET_INFO_IOV_SIZE 3
+
struct smb2_set_info_req {
struct smb2_sync_hdr sync_hdr;
__le16 StructureSize; /* Must be 33 */
extern int smb3_crypto_aead_allocate(struct TCP_Server_Info *server);
extern unsigned long smb_rqst_len(struct TCP_Server_Info *server,
struct smb_rqst *rqst);
+extern void smb2_set_next_command(struct TCP_Server_Info *server,
+ struct smb_rqst *rqst);
+extern void smb2_set_related(struct smb_rqst *rqst);
/*
* SMB2 Worker functions - most of protocol specific implementation details
extern int SMB2_query_info_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
u64 persistent_fid, u64 volatile_fid,
u8 info_class, u8 info_type,
- u32 additional_info, size_t output_len);
+ u32 additional_info, size_t output_len,
+ size_t input_len, void *input);
extern void SMB2_query_info_free(struct smb_rqst *rqst);
extern int SMB2_query_acl(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_file_id, u64 volatile_file_id,
extern int SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, int index,
struct cifs_search_info *srch_inf);
-extern int SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid,
- __le16 *target_file);
-extern int SMB2_rmdir(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid);
-extern int SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid,
- __le16 *target_file);
extern int SMB2_set_eof(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, u32 pid,
- __le64 *eof, bool is_fallocate);
-extern int SMB2_set_info(const unsigned int xid, struct cifs_tcon *tcon,
- u64 persistent_fid, u64 volatile_fid,
- FILE_BASIC_INFO *buf);
+ __le64 *eof);
+extern int SMB2_set_info_init(struct cifs_tcon *tcon, struct smb_rqst *rqst,
+ u64 persistent_fid, u64 volatile_fid, u32 pid,
+ u8 info_class, u8 info_type, u32 additional_info,
+ void **data, unsigned int *size);
+extern void SMB2_set_info_free(struct smb_rqst *rqst);
extern int SMB2_set_acl(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid,
struct cifs_ntsd *pnntsd, int pacllen, int aclflag);
extern int smb3_encryption_required(const struct cifs_tcon *tcon);
extern int smb2_validate_iov(unsigned int offset, unsigned int buffer_length,
struct kvec *iov, unsigned int min_buf_size);
+extern int smb2_validate_and_copy_iov(unsigned int offset,
+ unsigned int buffer_length,
+ struct kvec *iov,
+ unsigned int minbufsize, char *data);
extern void smb2_copy_fs_info_to_kstatfs(
struct smb2_fs_full_size_info *pfs_inf,
struct kstatfs *kst);
int rc;
list_for_each_entry(smbdirect_mr, &info->mr_list, list) {
- if (smbdirect_mr->state == MR_INVALIDATED ||
- smbdirect_mr->state == MR_ERROR) {
+ if (smbdirect_mr->state == MR_INVALIDATED)
+ ib_dma_unmap_sg(
+ info->id->device, smbdirect_mr->sgl,
+ smbdirect_mr->sgl_count,
+ smbdirect_mr->dir);
+ else if (smbdirect_mr->state == MR_ERROR) {
/* recover this MR entry */
rc = ib_dereg_mr(smbdirect_mr->mr);
smbd_disconnect_rdma_connection(info);
continue;
}
+ } else
+ /* This MR is being used, don't recover it */
+ continue;
- if (smbdirect_mr->state == MR_INVALIDATED)
- ib_dma_unmap_sg(
- info->id->device, smbdirect_mr->sgl,
- smbdirect_mr->sgl_count,
- smbdirect_mr->dir);
-
- smbdirect_mr->state = MR_READY;
+ smbdirect_mr->state = MR_READY;
- /* smbdirect_mr->state is updated by this function
- * and is read and updated by I/O issuing CPUs trying
- * to get a MR, the call to atomic_inc_return
- * implicates a memory barrier and guarantees this
- * value is updated before waking up any calls to
- * get_mr() from the I/O issuing CPUs
- */
- if (atomic_inc_return(&info->mr_ready_count) == 1)
- wake_up_interruptible(&info->wait_mr);
- }
+ /* smbdirect_mr->state is updated by this function
+ * and is read and updated by I/O issuing CPUs trying
+ * to get a MR, the call to atomic_inc_return
+ * implicates a memory barrier and guarantees this
+ * value is updated before waking up any calls to
+ * get_mr() from the I/O issuing CPUs
+ */
+ if (atomic_inc_return(&info->mr_ready_count) == 1)
+ wake_up_interruptible(&info->wait_mr);
}
}
DEFINE_SMB3_OPEN_DONE_EVENT(open_done);
DEFINE_SMB3_OPEN_DONE_EVENT(posix_mkdir_done);
+
+DECLARE_EVENT_CLASS(smb3_lease_done_class,
+ TP_PROTO(__u32 lease_state,
+ __u32 tid,
+ __u64 sesid,
+ __u64 lease_key_low,
+ __u64 lease_key_high),
+ TP_ARGS(lease_state, tid, sesid, lease_key_low, lease_key_high),
+ TP_STRUCT__entry(
+ __field(__u32, lease_state)
+ __field(__u32, tid)
+ __field(__u64, sesid)
+ __field(__u64, lease_key_low)
+ __field(__u64, lease_key_high)
+ ),
+ TP_fast_assign(
+ __entry->lease_state = lease_state;
+ __entry->tid = tid;
+ __entry->sesid = sesid;
+ __entry->lease_key_low = lease_key_low;
+ __entry->lease_key_high = lease_key_high;
+ ),
+ TP_printk("sid=0x%llx tid=0x%x lease_key=0x%llx%llx lease_state=0x%x",
+ __entry->sesid, __entry->tid, __entry->lease_key_high,
+ __entry->lease_key_low, __entry->lease_state)
+)
+
+#define DEFINE_SMB3_LEASE_DONE_EVENT(name) \
+DEFINE_EVENT(smb3_lease_done_class, smb3_##name, \
+ TP_PROTO(__u32 lease_state, \
+ __u32 tid, \
+ __u64 sesid, \
+ __u64 lease_key_low, \
+ __u64 lease_key_high), \
+ TP_ARGS(lease_state, tid, sesid, lease_key_low, lease_key_high))
+
+DEFINE_SMB3_LEASE_DONE_EVENT(lease_done);
+
+DECLARE_EVENT_CLASS(smb3_lease_err_class,
+ TP_PROTO(__u32 lease_state,
+ __u32 tid,
+ __u64 sesid,
+ __u64 lease_key_low,
+ __u64 lease_key_high,
+ int rc),
+ TP_ARGS(lease_state, tid, sesid, lease_key_low, lease_key_high, rc),
+ TP_STRUCT__entry(
+ __field(__u32, lease_state)
+ __field(__u32, tid)
+ __field(__u64, sesid)
+ __field(__u64, lease_key_low)
+ __field(__u64, lease_key_high)
+ __field(int, rc)
+ ),
+ TP_fast_assign(
+ __entry->lease_state = lease_state;
+ __entry->tid = tid;
+ __entry->sesid = sesid;
+ __entry->lease_key_low = lease_key_low;
+ __entry->lease_key_high = lease_key_high;
+ __entry->rc = rc;
+ ),
+ TP_printk("sid=0x%llx tid=0x%x lease_key=0x%llx%llx lease_state=0x%x rc=%d",
+ __entry->sesid, __entry->tid, __entry->lease_key_high,
+ __entry->lease_key_low, __entry->lease_state, __entry->rc)
+)
+
+#define DEFINE_SMB3_LEASE_ERR_EVENT(name) \
+DEFINE_EVENT(smb3_lease_err_class, smb3_##name, \
+ TP_PROTO(__u32 lease_state, \
+ __u32 tid, \
+ __u64 sesid, \
+ __u64 lease_key_low, \
+ __u64 lease_key_high, \
+ int rc), \
+ TP_ARGS(lease_state, tid, sesid, lease_key_low, lease_key_high, rc))
+
+DEFINE_SMB3_LEASE_ERR_EVENT(lease_err);
+
DECLARE_EVENT_CLASS(smb3_reconnect_class,
TP_PROTO(__u64 currmid,
char *hostname),
DEFINE_SMB3_RECONNECT_EVENT(reconnect);
DEFINE_SMB3_RECONNECT_EVENT(partial_send_reconnect);
+DECLARE_EVENT_CLASS(smb3_credit_class,
+ TP_PROTO(__u64 currmid,
+ char *hostname,
+ int credits),
+ TP_ARGS(currmid, hostname, credits),
+ TP_STRUCT__entry(
+ __field(__u64, currmid)
+ __field(char *, hostname)
+ __field(int, credits)
+ ),
+ TP_fast_assign(
+ __entry->currmid = currmid;
+ __entry->hostname = hostname;
+ __entry->credits = credits;
+ ),
+ TP_printk("server=%s current_mid=0x%llx credits=%d",
+ __entry->hostname,
+ __entry->currmid,
+ __entry->credits)
+)
+
+#define DEFINE_SMB3_CREDIT_EVENT(name) \
+DEFINE_EVENT(smb3_credit_class, smb3_##name, \
+ TP_PROTO(__u64 currmid, \
+ char *hostname, \
+ int credits), \
+ TP_ARGS(currmid, hostname, credits))
+
+DEFINE_SMB3_CREDIT_EVENT(reconnect_with_invalid_credits);
+
#endif /* _CIFS_TRACE_H */
#undef TRACE_INCLUDE_PATH
cifs_small_buf_release(midEntry->resp_buf);
#ifdef CONFIG_CIFS_STATS2
now = jiffies;
- /* commands taking longer than one second are indications that
- something is wrong, unless it is quite a slow link or server */
- if (time_after(now, midEntry->when_alloc + HZ) &&
+ /*
+ * commands taking longer than one second (default) can be indications
+ * that something is wrong, unless it is quite a slow link or a very
+ * busy server. Note that this calc is unlikely or impossible to wrap
+ * as long as slow_rsp_threshold is not set way above recommended max
+ * value (32767 ie 9 hours) and is generally harmless even if wrong
+ * since only affects debug counters - so leaving the calc as simple
+ * comparison rather than doing multiple conversions and overflow
+ * checks
+ */
+ if ((slow_rsp_threshold != 0) &&
+ time_after(now, midEntry->when_alloc + (slow_rsp_threshold * HZ)) &&
(midEntry->command != command)) {
/* smb2slowcmd[NUMBER_OF_SMB2_COMMANDS] counts by command */
if ((le16_to_cpu(midEntry->command) < NUMBER_OF_SMB2_COMMANDS) &&
if (cifsFYI & CIFS_TIMER) {
pr_debug(" CIFS slow rsp: cmd %d mid %llu",
midEntry->command, midEntry->mid);
- pr_info(" A: 0x%lx S: 0x%lx R: 0x%lx\n",
+ cifs_info(" A: 0x%lx S: 0x%lx R: 0x%lx\n",
now - midEntry->when_alloc,
now - midEntry->when_sent,
now - midEntry->when_received);
int i, j, rc = 0;
int timeout, optype;
struct mid_q_entry *midQ[MAX_COMPOUND];
- unsigned int credits = 1;
+ unsigned int credits = 0;
char *buf;
timeout = flags & CIFS_TIMEOUT_MASK;
mutex_unlock(&ses->server->srv_mutex);
- for (i = 0; i < num_rqst; i++) {
- if (rc < 0)
- goto out;
+ if (rc < 0)
+ goto out;
- if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP))
- smb311_update_preauth_hash(ses, rqst[i].rq_iov,
- rqst[i].rq_nvec);
+ /*
+ * Compounding is never used during session establish.
+ */
+ if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP))
+ smb311_update_preauth_hash(ses, rqst[0].rq_iov,
+ rqst[0].rq_nvec);
- if (timeout == CIFS_ASYNC_OP)
- goto out;
+ if (timeout == CIFS_ASYNC_OP)
+ goto out;
+ for (i = 0; i < num_rqst; i++) {
rc = wait_for_response(ses->server, midQ[i]);
if (rc != 0) {
- cifs_dbg(FYI, "Cancelling wait for mid %llu\n",
- midQ[i]->mid);
+ cifs_dbg(VFS, "Cancelling wait for mid %llu cmd: %d\n",
+ midQ[i]->mid, le16_to_cpu(midQ[i]->command));
send_cancel(ses->server, &rqst[i], midQ[i]);
spin_lock(&GlobalMid_Lock);
if (midQ[i]->mid_state == MID_REQUEST_SUBMITTED) {
}
spin_unlock(&GlobalMid_Lock);
}
+ }
+
+ for (i = 0; i < num_rqst; i++)
+ if (midQ[i]->resp_buf)
+ credits += ses->server->ops->get_credits(midQ[i]);
+ if (!credits)
+ credits = 1;
+
+ for (i = 0; i < num_rqst; i++) {
+ if (rc < 0)
+ goto out;
rc = cifs_sync_mid_result(midQ[i], ses->server);
if (rc != 0) {
- add_credits(ses->server, 1, optype);
+ add_credits(ses->server, credits, optype);
return rc;
}
else
resp_buf_type[i] = CIFS_SMALL_BUFFER;
- if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) {
- struct kvec iov = {
- .iov_base = resp_iov[i].iov_base,
- .iov_len = resp_iov[i].iov_len
- };
- smb311_update_preauth_hash(ses, &iov, 1);
- }
-
- credits = ses->server->ops->get_credits(midQ[i]);
-
rc = ses->server->ops->check_receive(midQ[i], ses->server,
flags & CIFS_LOG_ERROR);
/* mark it so buf will not be freed by cifs_delete_mid */
if ((flags & CIFS_NO_RESP) == 0)
midQ[i]->resp_buf = NULL;
+
+ }
+
+ /*
+ * Compounding is never used during session establish.
+ */
+ if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) {
+ struct kvec iov = {
+ .iov_base = resp_iov[0].iov_base,
+ .iov_len = resp_iov[0].iov_len
+ };
+ smb311_update_preauth_hash(ses, &iov, 1);
}
+
out:
/*
* This will dequeue all mids. After this it is important that the
return nfs4_do_check_delegation(inode, flags, false);
}
-static int nfs_delegation_claim_locks(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid)
+static int nfs_delegation_claim_locks(struct nfs4_state *state, const nfs4_stateid *stateid)
{
struct inode *inode = state->inode;
struct file_lock *fl;
spin_lock(&flctx->flc_lock);
restart:
list_for_each_entry(fl, list, fl_list) {
- if (nfs_file_open_context(fl->fl_file) != ctx)
+ if (nfs_file_open_context(fl->fl_file)->state != state)
continue;
spin_unlock(&flctx->flc_lock);
status = nfs4_lock_delegation_recall(fl, state, stateid);
int err;
again:
- spin_lock(&inode->i_lock);
- list_for_each_entry(ctx, &nfsi->open_files, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
state = ctx->state;
if (state == NULL)
continue;
continue;
if (!nfs4_stateid_match(&state->stateid, stateid))
continue;
- get_nfs_open_context(ctx);
- spin_unlock(&inode->i_lock);
+ if (!get_nfs_open_context(ctx))
+ continue;
+ rcu_read_unlock();
sp = state->owner;
/* Block nfs4_proc_unlck */
mutex_lock(&sp->so_delegreturn_mutex);
seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
err = nfs4_open_delegation_recall(ctx, state, stateid, type);
if (!err)
- err = nfs_delegation_claim_locks(ctx, state, stateid);
+ err = nfs_delegation_claim_locks(state, stateid);
if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
err = -EAGAIN;
mutex_unlock(&sp->so_delegreturn_mutex);
return err;
goto again;
}
- spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
return 0;
}
return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
}
+static int
+nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry,
+ struct inode *inode, int error)
+{
+ switch (error) {
+ case 1:
+ dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
+ __func__, dentry);
+ return 1;
+ case 0:
+ nfs_mark_for_revalidate(dir);
+ if (inode && S_ISDIR(inode->i_mode)) {
+ /* Purge readdir caches. */
+ nfs_zap_caches(inode);
+ /*
+ * We can't d_drop the root of a disconnected tree:
+ * its d_hash is on the s_anon list and d_drop() would hide
+ * it from shrink_dcache_for_unmount(), leading to busy
+ * inodes on unmount and further oopses.
+ */
+ if (IS_ROOT(dentry))
+ return 1;
+ }
+ dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
+ __func__, dentry);
+ return 0;
+ }
+ dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n",
+ __func__, dentry, error);
+ return error;
+}
+
+static int
+nfs_lookup_revalidate_negative(struct inode *dir, struct dentry *dentry,
+ unsigned int flags)
+{
+ int ret = 1;
+ if (nfs_neg_need_reval(dir, dentry, flags)) {
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
+ ret = 0;
+ }
+ return nfs_lookup_revalidate_done(dir, dentry, NULL, ret);
+}
+
+static int
+nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry,
+ struct inode *inode)
+{
+ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
+}
+
+static int
+nfs_lookup_revalidate_dentry(struct inode *dir, struct dentry *dentry,
+ struct inode *inode)
+{
+ struct nfs_fh *fhandle;
+ struct nfs_fattr *fattr;
+ struct nfs4_label *label;
+ int ret;
+
+ ret = -ENOMEM;
+ fhandle = nfs_alloc_fhandle();
+ fattr = nfs_alloc_fattr();
+ label = nfs4_label_alloc(NFS_SERVER(inode), GFP_KERNEL);
+ if (fhandle == NULL || fattr == NULL || IS_ERR(label))
+ goto out;
+
+ ret = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
+ if (ret < 0) {
+ if (ret == -ESTALE || ret == -ENOENT)
+ ret = 0;
+ goto out;
+ }
+ ret = 0;
+ if (nfs_compare_fh(NFS_FH(inode), fhandle))
+ goto out;
+ if (nfs_refresh_inode(inode, fattr) < 0)
+ goto out;
+
+ nfs_setsecurity(inode, fattr, label);
+ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+
+ /* set a readdirplus hint that we had a cache miss */
+ nfs_force_use_readdirplus(dir);
+ ret = 1;
+out:
+ nfs_free_fattr(fattr);
+ nfs_free_fhandle(fhandle);
+ nfs4_label_free(label);
+ return nfs_lookup_revalidate_done(dir, dentry, inode, ret);
+}
+
/*
* This is called every time the dcache has a lookup hit,
* and we should check whether we can really trust that
* If the parent directory is seen to have changed, we throw out the
* cached dentry and do a new lookup.
*/
-static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
+static int
+nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
+ unsigned int flags)
{
- struct inode *dir;
struct inode *inode;
- struct dentry *parent;
- struct nfs_fh *fhandle = NULL;
- struct nfs_fattr *fattr = NULL;
- struct nfs4_label *label = NULL;
int error;
- if (flags & LOOKUP_RCU) {
- parent = READ_ONCE(dentry->d_parent);
- dir = d_inode_rcu(parent);
- if (!dir)
- return -ECHILD;
- } else {
- parent = dget_parent(dentry);
- dir = d_inode(parent);
- }
nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
inode = d_inode(dentry);
- if (!inode) {
- if (nfs_neg_need_reval(dir, dentry, flags)) {
- if (flags & LOOKUP_RCU)
- return -ECHILD;
- goto out_bad;
- }
- goto out_valid;
- }
+ if (!inode)
+ return nfs_lookup_revalidate_negative(dir, dentry, flags);
if (is_bad_inode(inode)) {
- if (flags & LOOKUP_RCU)
- return -ECHILD;
dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
__func__, dentry);
goto out_bad;
}
if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
- goto out_set_verifier;
+ return nfs_lookup_revalidate_delegated(dir, dentry, inode);
/* Force a full look up iff the parent directory has changed */
if (!(flags & (LOOKUP_EXCL | LOOKUP_REVAL)) &&
nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
error = nfs_lookup_verify_inode(inode, flags);
if (error) {
- if (flags & LOOKUP_RCU)
- return -ECHILD;
if (error == -ESTALE)
- goto out_zap_parent;
- goto out_error;
+ nfs_zap_caches(dir);
+ goto out_bad;
}
nfs_advise_use_readdirplus(dir);
goto out_valid;
if (NFS_STALE(inode))
goto out_bad;
- error = -ENOMEM;
- fhandle = nfs_alloc_fhandle();
- fattr = nfs_alloc_fattr();
- if (fhandle == NULL || fattr == NULL)
- goto out_error;
-
- label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT);
- if (IS_ERR(label))
- goto out_error;
-
trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
- error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr, label);
+ error = nfs_lookup_revalidate_dentry(dir, dentry, inode);
trace_nfs_lookup_revalidate_exit(dir, dentry, flags, error);
- if (error == -ESTALE || error == -ENOENT)
- goto out_bad;
- if (error)
- goto out_error;
- if (nfs_compare_fh(NFS_FH(inode), fhandle))
- goto out_bad;
- if ((error = nfs_refresh_inode(inode, fattr)) != 0)
- goto out_bad;
-
- nfs_setsecurity(inode, fattr, label);
-
- nfs_free_fattr(fattr);
- nfs_free_fhandle(fhandle);
- nfs4_label_free(label);
+ return error;
+out_valid:
+ return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
+out_bad:
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
+ return nfs_lookup_revalidate_done(dir, dentry, inode, 0);
+}
- /* set a readdirplus hint that we had a cache miss */
- nfs_force_use_readdirplus(dir);
+static int
+__nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
+ int (*reval)(struct inode *, struct dentry *, unsigned int))
+{
+ struct dentry *parent;
+ struct inode *dir;
+ int ret;
-out_set_verifier:
- nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
- out_valid:
if (flags & LOOKUP_RCU) {
+ parent = READ_ONCE(dentry->d_parent);
+ dir = d_inode_rcu(parent);
+ if (!dir)
+ return -ECHILD;
+ ret = reval(dir, dentry, flags);
if (parent != READ_ONCE(dentry->d_parent))
return -ECHILD;
- } else
+ } else {
+ parent = dget_parent(dentry);
+ ret = reval(d_inode(parent), dentry, flags);
dput(parent);
- dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is valid\n",
- __func__, dentry);
- return 1;
-out_zap_parent:
- nfs_zap_caches(dir);
- out_bad:
- WARN_ON(flags & LOOKUP_RCU);
- nfs_free_fattr(fattr);
- nfs_free_fhandle(fhandle);
- nfs4_label_free(label);
- nfs_mark_for_revalidate(dir);
- if (inode && S_ISDIR(inode->i_mode)) {
- /* Purge readdir caches. */
- nfs_zap_caches(inode);
- /*
- * We can't d_drop the root of a disconnected tree:
- * its d_hash is on the s_anon list and d_drop() would hide
- * it from shrink_dcache_for_unmount(), leading to busy
- * inodes on unmount and further oopses.
- */
- if (IS_ROOT(dentry))
- goto out_valid;
}
- dput(parent);
- dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
- __func__, dentry);
- return 0;
-out_error:
- WARN_ON(flags & LOOKUP_RCU);
- nfs_free_fattr(fattr);
- nfs_free_fhandle(fhandle);
- nfs4_label_free(label);
- dput(parent);
- dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) lookup returned error %d\n",
- __func__, dentry, error);
- return error;
+ return ret;
+}
+
+static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate);
}
/*
}
EXPORT_SYMBOL_GPL(nfs_atomic_open);
-static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
+static int
+nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
+ unsigned int flags)
{
struct inode *inode;
- int ret = 0;
if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
- goto no_open;
+ goto full_reval;
if (d_mountpoint(dentry))
- goto no_open;
- if (NFS_SB(dentry->d_sb)->caps & NFS_CAP_ATOMIC_OPEN_V1)
- goto no_open;
+ goto full_reval;
inode = d_inode(dentry);
/* We can't create new files in nfs_open_revalidate(), so we
* optimize away revalidation of negative dentries.
*/
- if (inode == NULL) {
- struct dentry *parent;
- struct inode *dir;
-
- if (flags & LOOKUP_RCU) {
- parent = READ_ONCE(dentry->d_parent);
- dir = d_inode_rcu(parent);
- if (!dir)
- return -ECHILD;
- } else {
- parent = dget_parent(dentry);
- dir = d_inode(parent);
- }
- if (!nfs_neg_need_reval(dir, dentry, flags))
- ret = 1;
- else if (flags & LOOKUP_RCU)
- ret = -ECHILD;
- if (!(flags & LOOKUP_RCU))
- dput(parent);
- else if (parent != READ_ONCE(dentry->d_parent))
- return -ECHILD;
- goto out;
- }
+ if (inode == NULL)
+ goto full_reval;
+
+ if (NFS_PROTO(dir)->have_delegation(inode, FMODE_READ))
+ return nfs_lookup_revalidate_delegated(dir, dentry, inode);
/* NFS only supports OPEN on regular files */
if (!S_ISREG(inode->i_mode))
- goto no_open;
+ goto full_reval;
+
/* We cannot do exclusive creation on a positive dentry */
- if (flags & LOOKUP_EXCL)
- goto no_open;
+ if (flags & (LOOKUP_EXCL | LOOKUP_REVAL))
+ goto reval_dentry;
+
+ /* Check if the directory changed */
+ if (!nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU))
+ goto reval_dentry;
/* Let f_op->open() actually open (and revalidate) the file */
- ret = 1;
+ return 1;
+reval_dentry:
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
+ return nfs_lookup_revalidate_dentry(dir, dentry, inode);;
-out:
- return ret;
+full_reval:
+ return nfs_do_lookup_revalidate(dir, dentry, flags);
+}
-no_open:
- return nfs_lookup_revalidate(dentry, flags);
+static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ return __nfs_lookup_revalidate(dentry, flags,
+ nfs4_do_lookup_revalidate);
}
#endif /* CONFIG_NFSV4 */
.id = LAYOUT_NFSV4_1_FILES,
.name = "LAYOUT_NFSV4_1_FILES",
.owner = THIS_MODULE,
+ .max_layoutget_response = 4096, /* 1 page or so... */
.alloc_layout_hdr = filelayout_alloc_layout_hdr,
.free_layout_hdr = filelayout_free_layout_hdr,
.alloc_lseg = filelayout_alloc_lseg,
.name = "LAYOUT_FLEX_FILES",
.owner = THIS_MODULE,
.flags = PNFS_LAYOUTGET_ON_OPEN,
+ .max_layoutget_response = 4096, /* 1 page or so... */
.set_layoutdriver = ff_layout_set_layoutdriver,
.alloc_layout_hdr = ff_layout_alloc_layout_hdr,
.free_layout_hdr = ff_layout_free_layout_hdr,
struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx);
struct rpc_cred *cred;
- if (mirror) {
+ if (mirror && !mirror->mirror_ds->ds_versions[0].tightly_coupled) {
cred = ff_layout_get_mirror_cred(mirror, lseg->pls_range.iomode);
if (!cred)
cred = get_rpccred(mdscred);
static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context *ctx)
{
- struct nfs_lock_context *head = &ctx->lock_context;
- struct nfs_lock_context *pos = head;
+ struct nfs_lock_context *pos;
- do {
+ list_for_each_entry_rcu(pos, &ctx->lock_context.list, list) {
if (pos->lockowner != current->files)
continue;
- refcount_inc(&pos->count);
- return pos;
- } while ((pos = list_entry(pos->list.next, typeof(*pos), list)) != head);
+ if (refcount_inc_not_zero(&pos->count))
+ return pos;
+ }
return NULL;
}
struct nfs_lock_context *res, *new = NULL;
struct inode *inode = d_inode(ctx->dentry);
- spin_lock(&inode->i_lock);
+ rcu_read_lock();
res = __nfs_find_lock_context(ctx);
+ rcu_read_unlock();
if (res == NULL) {
- spin_unlock(&inode->i_lock);
new = kmalloc(sizeof(*new), GFP_KERNEL);
if (new == NULL)
return ERR_PTR(-ENOMEM);
spin_lock(&inode->i_lock);
res = __nfs_find_lock_context(ctx);
if (res == NULL) {
- list_add_tail(&new->list, &ctx->lock_context.list);
+ list_add_tail_rcu(&new->list, &ctx->lock_context.list);
new->open_context = ctx;
res = new;
new = NULL;
}
+ spin_unlock(&inode->i_lock);
+ kfree(new);
}
- spin_unlock(&inode->i_lock);
- kfree(new);
return res;
}
EXPORT_SYMBOL_GPL(nfs_get_lock_context);
if (!refcount_dec_and_lock(&l_ctx->count, &inode->i_lock))
return;
- list_del(&l_ctx->list);
+ list_del_rcu(&l_ctx->list);
spin_unlock(&inode->i_lock);
- kfree(l_ctx);
+ kfree_rcu(l_ctx, rcu_head);
}
EXPORT_SYMBOL_GPL(nfs_put_lock_context);
struct nfs_open_context *get_nfs_open_context(struct nfs_open_context *ctx)
{
- if (ctx != NULL)
- refcount_inc(&ctx->lock_context.count);
- return ctx;
+ if (ctx != NULL && refcount_inc_not_zero(&ctx->lock_context.count))
+ return ctx;
+ return NULL;
}
EXPORT_SYMBOL_GPL(get_nfs_open_context);
struct inode *inode = d_inode(ctx->dentry);
struct super_block *sb = ctx->dentry->d_sb;
+ if (!refcount_dec_and_test(&ctx->lock_context.count))
+ return;
if (!list_empty(&ctx->list)) {
- if (!refcount_dec_and_lock(&ctx->lock_context.count, &inode->i_lock))
- return;
- list_del(&ctx->list);
+ spin_lock(&inode->i_lock);
+ list_del_rcu(&ctx->list);
spin_unlock(&inode->i_lock);
- } else if (!refcount_dec_and_test(&ctx->lock_context.count))
- return;
+ }
if (inode != NULL)
NFS_PROTO(inode)->close_context(ctx, is_sync);
if (ctx->cred != NULL)
dput(ctx->dentry);
nfs_sb_deactive(sb);
kfree(ctx->mdsthreshold);
- kfree(ctx);
+ kfree_rcu(ctx, rcu_head);
}
void put_nfs_open_context(struct nfs_open_context *ctx)
struct nfs_inode *nfsi = NFS_I(inode);
spin_lock(&inode->i_lock);
- if (ctx->mode & FMODE_WRITE)
- list_add(&ctx->list, &nfsi->open_files);
- else
- list_add_tail(&ctx->list, &nfsi->open_files);
+ list_add_tail_rcu(&ctx->list, &nfsi->open_files);
spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL_GPL(nfs_inode_attach_open_context);
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_open_context *pos, *ctx = NULL;
- spin_lock(&inode->i_lock);
- list_for_each_entry(pos, &nfsi->open_files, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(pos, &nfsi->open_files, list) {
if (cred != NULL && pos->cred != cred)
continue;
if ((pos->mode & (FMODE_READ|FMODE_WRITE)) != mode)
continue;
ctx = get_nfs_open_context(pos);
- break;
+ if (ctx)
+ break;
}
- spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
return ctx;
}
if (ctx->error < 0)
invalidate_inode_pages2(inode->i_mapping);
filp->private_data = NULL;
- spin_lock(&inode->i_lock);
- list_move_tail(&ctx->list, &NFS_I(inode)->open_files);
- spin_unlock(&inode->i_lock);
put_nfs_open_context_sync(ctx);
}
}
{
struct inode *inode = &nfsi->vfs_inode;
- assert_spin_locked(&inode->i_lock);
-
if (!S_ISREG(inode->i_mode))
return false;
if (list_empty(&nfsi->open_files))
return false;
- /* Note: This relies on nfsi->open_files being ordered with writers
- * being placed at the head of the list.
- * See nfs_inode_attach_open_context()
- */
- return (list_first_entry(&nfsi->open_files,
- struct nfs_open_context,
- list)->mode & FMODE_WRITE) == FMODE_WRITE;
+ return inode_is_open_for_write(inode);
}
static bool nfs_file_has_buffered_writers(struct nfs_inode *nfsi)
static int nfs3_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
{
struct inode *inode = hdr->inode;
+ struct nfs_server *server = NFS_SERVER(inode);
if (hdr->pgio_done_cb != NULL)
return hdr->pgio_done_cb(task, hdr);
if (nfs3_async_handle_jukebox(task, inode))
return -EAGAIN;
+ if (task->tk_status >= 0 && !server->read_hdrsize)
+ cmpxchg(&server->read_hdrsize, 0, hdr->res.replen);
+
nfs_invalidate_atime(inode);
nfs_refresh_inode(inode, &hdr->fattr);
return 0;
struct rpc_message *msg)
{
msg->rpc_proc = &nfs3_procedures[NFS3PROC_READ];
+ hdr->args.replen = NFS_SERVER(hdr->inode)->read_hdrsize;
}
static int nfs3_proc_pgio_rpc_prepare(struct rpc_task *task,
const void *data)
{
const struct nfs_pgio_args *args = data;
+ unsigned int replen = args->replen ? args->replen : NFS3_readres_sz;
encode_read3args(xdr, args);
prepare_reply_buffer(req, args->pages, args->pgbase,
- args->count, NFS3_readres_sz);
+ args->count, replen);
req->rq_rcv_buf.flags |= XDRBUF_READ;
}
encode_nfs_fh3(xdr, args->fh);
encode_uint32(xdr, args->mask);
- if (args->mask & (NFS_ACL | NFS_DFACL))
+ if (args->mask & (NFS_ACL | NFS_DFACL)) {
prepare_reply_buffer(req, args->pages, 0,
NFSACL_MAXPAGES << PAGE_SHIFT,
ACL3_getaclres_sz);
+ req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES;
+ }
}
static void nfs3_xdr_enc_setacl3args(struct rpc_rqst *req,
void *data)
{
struct nfs_pgio_res *result = data;
+ unsigned int pos;
enum nfs_stat status;
int error;
+ pos = xdr_stream_pos(xdr);
error = decode_nfsstat3(xdr, &status);
if (unlikely(error))
goto out;
result->op_status = status;
if (status != NFS3_OK)
goto out_status;
+ result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2);
error = decode_read3resok(xdr, result);
out:
return error;
unsigned int n_wronly; /* Number of write-only references */
unsigned int n_rdwr; /* Number of read/write references */
fmode_t state; /* State on the server (R,W, or RW) */
- atomic_t count;
+ refcount_t count;
wait_queue_head_t waitq;
+ struct rcu_head rcu_head;
};
/*
* Session has been established, and the client marked ready.
- * Set the mount rsize and wsize with negotiated fore channel
- * attributes which will be bound checked in nfs_server_set_fsinfo.
+ * Limit the mount rsize, wsize and dtsize using negotiated fore
+ * channel attributes.
*/
-static void nfs4_session_set_rwsize(struct nfs_server *server)
+static void nfs4_session_limit_rwsize(struct nfs_server *server)
{
#ifdef CONFIG_NFS_V4_1
struct nfs4_session *sess;
server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead;
server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead;
- if (!server->rsize || server->rsize > server_resp_sz)
+ if (server->dtsize > server_resp_sz)
+ server->dtsize = server_resp_sz;
+ if (server->rsize > server_resp_sz)
server->rsize = server_resp_sz;
- if (!server->wsize || server->wsize > server_rqst_sz)
+ if (server->wsize > server_rqst_sz)
server->wsize = server_rqst_sz;
#endif /* CONFIG_NFS_V4_1 */
}
(unsigned long long) server->fsid.minor);
nfs_display_fhandle(mntfh, "Pseudo-fs root FH");
- nfs4_session_set_rwsize(server);
-
error = nfs_probe_fsinfo(server, mntfh, fattr);
if (error < 0)
goto out;
+ nfs4_session_limit_rwsize(server);
+
if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
server->namelen = NFS4_MAXNAMLEN;
return false;
}
-static int can_open_cached(struct nfs4_state *state, fmode_t mode, int open_mode)
+static int can_open_cached(struct nfs4_state *state, fmode_t mode,
+ int open_mode, enum open_claim_type4 claim)
{
int ret = 0;
if (open_mode & (O_EXCL|O_TRUNC))
goto out;
+ switch (claim) {
+ case NFS4_OPEN_CLAIM_NULL:
+ case NFS4_OPEN_CLAIM_FH:
+ goto out;
+ default:
+ break;
+ }
switch (mode & (FMODE_READ|FMODE_WRITE)) {
case FMODE_READ:
ret |= test_bit(NFS_O_RDONLY_STATE, &state->flags) != 0
for (;;) {
spin_lock(&state->owner->so_lock);
- if (can_open_cached(state, fmode, open_mode)) {
+ if (can_open_cached(state, fmode, open_mode, claim)) {
update_open_stateflags(state, fmode);
spin_unlock(&state->owner->so_lock);
goto out_return_state;
out:
return ERR_PTR(ret);
out_return_state:
- atomic_inc(&state->count);
+ refcount_inc(&state->count);
return state;
}
update:
update_open_stateid(state, &data->o_res.stateid, NULL,
data->o_arg.fmode);
- atomic_inc(&state->count);
+ refcount_inc(&state->count);
return state;
}
return ERR_CAST(inode);
if (data->state != NULL && data->state->inode == inode) {
state = data->state;
- atomic_inc(&state->count);
+ refcount_inc(&state->count);
} else
state = nfs4_get_open_state(inode, data->owner);
iput(inode);
return ret;
}
-static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state)
+static struct nfs_open_context *
+nfs4_state_find_open_context_mode(struct nfs4_state *state, fmode_t mode)
{
struct nfs_inode *nfsi = NFS_I(state->inode);
struct nfs_open_context *ctx;
- spin_lock(&state->inode->i_lock);
- list_for_each_entry(ctx, &nfsi->open_files, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
if (ctx->state != state)
continue;
- get_nfs_open_context(ctx);
- spin_unlock(&state->inode->i_lock);
+ if ((ctx->mode & mode) != mode)
+ continue;
+ if (!get_nfs_open_context(ctx))
+ continue;
+ rcu_read_unlock();
return ctx;
}
- spin_unlock(&state->inode->i_lock);
+ rcu_read_unlock();
return ERR_PTR(-ENOENT);
}
+static struct nfs_open_context *
+nfs4_state_find_open_context(struct nfs4_state *state)
+{
+ struct nfs_open_context *ctx;
+
+ ctx = nfs4_state_find_open_context_mode(state, FMODE_READ|FMODE_WRITE);
+ if (!IS_ERR(ctx))
+ return ctx;
+ ctx = nfs4_state_find_open_context_mode(state, FMODE_WRITE);
+ if (!IS_ERR(ctx))
+ return ctx;
+ return nfs4_state_find_open_context_mode(state, FMODE_READ);
+}
+
static struct nfs4_opendata *nfs4_open_recoverdata_alloc(struct nfs_open_context *ctx,
struct nfs4_state *state, enum open_claim_type4 claim)
{
if (opendata == NULL)
return ERR_PTR(-ENOMEM);
opendata->state = state;
- atomic_inc(&state->count);
+ refcount_inc(&state->count);
return opendata;
}
if (data->state != NULL) {
struct nfs_delegation *delegation;
- if (can_open_cached(data->state, data->o_arg.fmode, data->o_arg.open_flags))
+ if (can_open_cached(data->state, data->o_arg.fmode,
+ data->o_arg.open_flags, claim))
goto out_no_action;
rcu_read_lock();
delegation = rcu_dereference(NFS_I(data->state->inode)->delegation);
state = kzalloc(sizeof(*state), GFP_NOFS);
if (!state)
return NULL;
- atomic_set(&state->count, 1);
+ refcount_set(&state->count, 1);
INIT_LIST_HEAD(&state->lock_states);
spin_lock_init(&state->state_lock);
seqlock_init(&state->seqlock);
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs4_state *state;
- list_for_each_entry(state, &nfsi->open_states, inode_states) {
+ list_for_each_entry_rcu(state, &nfsi->open_states, inode_states) {
if (state->owner != owner)
continue;
if (!nfs4_valid_open_stateid(state))
continue;
- if (atomic_inc_not_zero(&state->count))
+ if (refcount_inc_not_zero(&state->count))
return state;
}
return NULL;
static void
nfs4_free_open_state(struct nfs4_state *state)
{
- kfree(state);
+ kfree_rcu(state, rcu_head);
}
struct nfs4_state *
struct nfs4_state *state, *new;
struct nfs_inode *nfsi = NFS_I(inode);
- spin_lock(&inode->i_lock);
+ rcu_read_lock();
state = __nfs4_find_state_byowner(inode, owner);
- spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
if (state)
goto out;
new = nfs4_alloc_open_state();
state = new;
state->owner = owner;
atomic_inc(&owner->so_count);
- list_add(&state->inode_states, &nfsi->open_states);
+ list_add_rcu(&state->inode_states, &nfsi->open_states);
ihold(inode);
state->inode = inode;
spin_unlock(&inode->i_lock);
struct inode *inode = state->inode;
struct nfs4_state_owner *owner = state->owner;
- if (!atomic_dec_and_lock(&state->count, &owner->so_lock))
+ if (!refcount_dec_and_lock(&state->count, &owner->so_lock))
return;
spin_lock(&inode->i_lock);
- list_del(&state->inode_states);
+ list_del_rcu(&state->inode_states);
list_del(&state->open_states);
spin_unlock(&inode->i_lock);
spin_unlock(&owner->so_lock);
struct nfs4_state *state;
bool found = false;
- spin_lock(&inode->i_lock);
- list_for_each_entry(ctx, &nfsi->open_files, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
state = ctx->state;
if (state == NULL)
continue;
nfs4_state_mark_reclaim_nograce(clp, state))
found = true;
}
- spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
nfs_inode_find_delegation_state_and_recover(inode, stateid);
if (found)
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_open_context *ctx;
- spin_lock(&inode->i_lock);
- list_for_each_entry(ctx, &nfsi->open_files, list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
if (ctx->state != state)
continue;
set_bit(NFS_CONTEXT_BAD, &ctx->flags);
}
- spin_unlock(&inode->i_lock);
+ rcu_read_unlock();
}
static void nfs4_state_mark_recovery_failed(struct nfs4_state *state, int error)
return status;
}
+#ifdef CONFIG_NFS_V4_2
+static void nfs42_complete_copies(struct nfs4_state_owner *sp, struct nfs4_state *state)
+{
+ struct nfs4_copy_state *copy;
+
+ if (!test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags))
+ return;
+
+ spin_lock(&sp->so_server->nfs_client->cl_lock);
+ list_for_each_entry(copy, &sp->so_server->ss_copies, copies) {
+ if (nfs4_stateid_match_other(&state->stateid, ©->parent_state->stateid))
+ continue;
+ copy->flags = 1;
+ complete(©->completion);
+ break;
+ }
+ spin_unlock(&sp->so_server->nfs_client->cl_lock);
+}
+#else /* !CONFIG_NFS_V4_2 */
+static inline void nfs42_complete_copies(struct nfs4_state_owner *sp,
+ struct nfs4_state *state)
+{
+}
+#endif /* CONFIG_NFS_V4_2 */
+
+static int __nfs4_reclaim_open_state(struct nfs4_state_owner *sp, struct nfs4_state *state,
+ const struct nfs4_state_recovery_ops *ops)
+{
+ struct nfs4_lock_state *lock;
+ int status;
+
+ status = ops->recover_open(sp, state);
+ if (status < 0)
+ return status;
+
+ status = nfs4_reclaim_locks(state, ops);
+ if (status < 0)
+ return status;
+
+ if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) {
+ spin_lock(&state->state_lock);
+ list_for_each_entry(lock, &state->lock_states, ls_locks) {
+ if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags))
+ pr_warn_ratelimited("NFS: %s: Lock reclaim failed!\n", __func__);
+ }
+ spin_unlock(&state->state_lock);
+ }
+
+ nfs42_complete_copies(sp, state);
+ clear_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
+ return status;
+}
+
static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs4_state_recovery_ops *ops)
{
struct nfs4_state *state;
- struct nfs4_lock_state *lock;
int status = 0;
/* Note: we rely on the sp->so_states list being ordered
continue;
if (state->state == 0)
continue;
- atomic_inc(&state->count);
+ refcount_inc(&state->count);
spin_unlock(&sp->so_lock);
- status = ops->recover_open(sp, state);
- if (status >= 0) {
- status = nfs4_reclaim_locks(state, ops);
- if (status >= 0) {
- if (!test_bit(NFS_DELEGATED_STATE, &state->flags)) {
- spin_lock(&state->state_lock);
- list_for_each_entry(lock, &state->lock_states, ls_locks) {
- if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags))
- pr_warn_ratelimited("NFS: "
- "%s: Lock reclaim "
- "failed!\n", __func__);
- }
- spin_unlock(&state->state_lock);
- }
- clear_bit(NFS_STATE_RECLAIM_NOGRACE,
- &state->flags);
-#ifdef CONFIG_NFS_V4_2
- if (test_bit(NFS_CLNT_DST_SSC_COPY_STATE, &state->flags)) {
- struct nfs4_copy_state *copy;
-
- spin_lock(&sp->so_server->nfs_client->cl_lock);
- list_for_each_entry(copy, &sp->so_server->ss_copies, copies) {
- if (memcmp(&state->stateid.other, ©->parent_state->stateid.other, NFS4_STATEID_SIZE))
- continue;
- copy->flags = 1;
- complete(©->completion);
- printk("AGLO: server rebooted waking up the copy\n");
- break;
- }
- spin_unlock(&sp->so_server->nfs_client->cl_lock);
- }
-#endif /* CONFIG_NFS_V4_2 */
- nfs4_put_open_state(state);
- spin_lock(&sp->so_lock);
- goto restart;
- }
- }
+ status = __nfs4_reclaim_open_state(sp, state, ops);
+
switch (status) {
- default:
- printk(KERN_ERR "NFS: %s: unhandled error %d\n",
- __func__, status);
- /* Fall through */
- case -ENOENT:
- case -ENOMEM:
- case -EACCES:
- case -EROFS:
- case -EIO:
- case -ESTALE:
- /* Open state on this file cannot be recovered */
- nfs4_state_mark_recovery_failed(state, status);
- break;
- case -EAGAIN:
- ssleep(1);
- /* Fall through */
- case -NFS4ERR_ADMIN_REVOKED:
- case -NFS4ERR_STALE_STATEID:
- case -NFS4ERR_OLD_STATEID:
- case -NFS4ERR_BAD_STATEID:
- case -NFS4ERR_RECLAIM_BAD:
- case -NFS4ERR_RECLAIM_CONFLICT:
- nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
+ default:
+ if (status >= 0)
break;
- case -NFS4ERR_EXPIRED:
- case -NFS4ERR_NO_GRACE:
- nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
- case -NFS4ERR_STALE_CLIENTID:
- case -NFS4ERR_BADSESSION:
- case -NFS4ERR_BADSLOT:
- case -NFS4ERR_BAD_HIGH_SLOT:
- case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
- goto out_err;
+ printk(KERN_ERR "NFS: %s: unhandled error %d\n", __func__, status);
+ /* Fall through */
+ case -ENOENT:
+ case -ENOMEM:
+ case -EACCES:
+ case -EROFS:
+ case -EIO:
+ case -ESTALE:
+ /* Open state on this file cannot be recovered */
+ nfs4_state_mark_recovery_failed(state, status);
+ break;
+ case -EAGAIN:
+ ssleep(1);
+ /* Fall through */
+ case -NFS4ERR_ADMIN_REVOKED:
+ case -NFS4ERR_STALE_STATEID:
+ case -NFS4ERR_OLD_STATEID:
+ case -NFS4ERR_BAD_STATEID:
+ case -NFS4ERR_RECLAIM_BAD:
+ case -NFS4ERR_RECLAIM_CONFLICT:
+ nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
+ break;
+ case -NFS4ERR_EXPIRED:
+ case -NFS4ERR_NO_GRACE:
+ nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
+ case -NFS4ERR_STALE_CLIENTID:
+ case -NFS4ERR_BADSESSION:
+ case -NFS4ERR_BADSLOT:
+ case -NFS4ERR_BAD_HIGH_SLOT:
+ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ goto out_err;
}
nfs4_put_open_state(state);
spin_lock(&sp->so_lock);
static int nfs4_recovery_handle_error(struct nfs_client *clp, int error)
{
switch (error) {
- case 0:
- break;
- case -NFS4ERR_CB_PATH_DOWN:
- nfs40_handle_cb_pathdown(clp);
- break;
- case -NFS4ERR_NO_GRACE:
- nfs4_state_end_reclaim_reboot(clp);
- break;
- case -NFS4ERR_STALE_CLIENTID:
- set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
- nfs4_state_start_reclaim_reboot(clp);
- break;
- case -NFS4ERR_EXPIRED:
- set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
- nfs4_state_start_reclaim_nograce(clp);
- break;
- case -NFS4ERR_BADSESSION:
- case -NFS4ERR_BADSLOT:
- case -NFS4ERR_BAD_HIGH_SLOT:
- case -NFS4ERR_DEADSESSION:
- case -NFS4ERR_SEQ_FALSE_RETRY:
- case -NFS4ERR_SEQ_MISORDERED:
- set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
- /* Zero session reset errors */
- break;
- case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
- set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
- break;
- default:
- dprintk("%s: failed to handle error %d for server %s\n",
- __func__, error, clp->cl_hostname);
- return error;
+ case 0:
+ break;
+ case -NFS4ERR_CB_PATH_DOWN:
+ nfs40_handle_cb_pathdown(clp);
+ break;
+ case -NFS4ERR_NO_GRACE:
+ nfs4_state_end_reclaim_reboot(clp);
+ break;
+ case -NFS4ERR_STALE_CLIENTID:
+ set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+ nfs4_state_start_reclaim_reboot(clp);
+ break;
+ case -NFS4ERR_EXPIRED:
+ set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
+ nfs4_state_start_reclaim_nograce(clp);
+ break;
+ case -NFS4ERR_BADSESSION:
+ case -NFS4ERR_BADSLOT:
+ case -NFS4ERR_BAD_HIGH_SLOT:
+ case -NFS4ERR_DEADSESSION:
+ case -NFS4ERR_SEQ_FALSE_RETRY:
+ case -NFS4ERR_SEQ_MISORDERED:
+ set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
+ /* Zero session reset errors */
+ break;
+ case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
+ set_bit(NFS4CLNT_BIND_CONN_TO_SESSION, &clp->cl_state);
+ break;
+ default:
+ dprintk("%s: failed to handle error %d for server %s\n",
+ __func__, error, clp->cl_hostname);
+ return error;
}
dprintk("%s: handled error %d for server %s\n", __func__, error,
clp->cl_hostname);
static int decode_attr_filehandle(struct xdr_stream *xdr, uint32_t *bitmap, struct nfs_fh *fh)
{
__be32 *p;
- int len;
+ u32 len;
if (fh != NULL)
memset(fh, 0, sizeof(*fh));
void nfs_set_pgio_error(struct nfs_pgio_header *hdr, int error, loff_t pos)
{
- spin_lock(&hdr->lock);
- if (!test_and_set_bit(NFS_IOHDR_ERROR, &hdr->flags)
- || pos < hdr->io_start + hdr->good_bytes) {
+ unsigned int new = pos - hdr->io_start;
+
+ if (hdr->good_bytes > new) {
+ hdr->good_bytes = new;
clear_bit(NFS_IOHDR_EOF, &hdr->flags);
- hdr->good_bytes = pos - hdr->io_start;
- hdr->error = error;
+ if (!test_and_set_bit(NFS_IOHDR_ERROR, &hdr->flags))
+ hdr->error = error;
}
- spin_unlock(&hdr->lock);
}
static inline struct nfs_page *
if (hdr) {
INIT_LIST_HEAD(&hdr->pages);
- spin_lock_init(&hdr->lock);
hdr->rw_ops = ops;
}
return hdr;
return ret;
}
+static void nfs_pageio_error_cleanup(struct nfs_pageio_descriptor *desc)
+{
+ u32 midx;
+ struct nfs_pgio_mirror *mirror;
+
+ if (!desc->pg_error)
+ return;
+
+ for (midx = 0; midx < desc->pg_mirror_count; midx++) {
+ mirror = &desc->pg_mirrors[midx];
+ desc->pg_completion_ops->error_cleanup(&mirror->pg_list);
+ }
+}
+
int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
struct nfs_page *req)
{
return 1;
out_failed:
- /*
- * We might have failed before sending any reqs over wire.
- * Clean up rest of the reqs in mirror pg_list.
- */
- if (desc->pg_error) {
- struct nfs_pgio_mirror *mirror;
- void (*func)(struct list_head *);
-
- /* remember fatal errors */
- if (nfs_error_is_fatal(desc->pg_error))
- nfs_context_set_write_error(req->wb_context,
- desc->pg_error);
-
- func = desc->pg_completion_ops->error_cleanup;
- for (midx = 0; midx < desc->pg_mirror_count; midx++) {
- mirror = &desc->pg_mirrors[midx];
- func(&mirror->pg_list);
- }
- }
+ nfs_pageio_error_cleanup(desc);
return 0;
}
for (midx = 0; midx < desc->pg_mirror_count; midx++)
nfs_pageio_complete_mirror(desc, midx);
+ if (desc->pg_error < 0)
+ nfs_pageio_error_cleanup(desc);
if (desc->pg_ops->pg_cleanup)
desc->pg_ops->pg_cleanup(desc);
nfs_pageio_cleanup_mirroring(desc);
struct page **pages;
int i;
- pages = kcalloc(size, sizeof(struct page *), gfp_flags);
+ pages = kmalloc_array(size, sizeof(struct page *), gfp_flags);
if (!pages) {
dprintk("%s: can't alloc array of %zu pages\n", __func__, size);
return NULL;
pages[i] = alloc_page(gfp_flags);
if (!pages[i]) {
dprintk("%s: failed to allocate page\n", __func__);
- nfs4_free_pages(pages, size);
+ nfs4_free_pages(pages, i);
return NULL;
}
}
gfp_t gfp_flags)
{
struct nfs_server *server = pnfs_find_server(ino, ctx);
+ size_t max_reply_sz = server->pnfs_curr_ld->max_layoutget_response;
size_t max_pages = max_response_pages(server);
struct nfs4_layoutget *lgp;
if (lgp == NULL)
return NULL;
+ if (max_reply_sz) {
+ size_t npages = (max_reply_sz + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ if (npages < max_pages)
+ max_pages = npages;
+ }
+
lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags);
if (!lgp->args.layout.pages) {
kfree(lgp);
if (!nfs_have_layout(ino))
return false;
retry:
+ rcu_read_lock();
spin_lock(&ino->i_lock);
lo = nfsi->layout;
if (!lo || !pnfs_layout_is_valid(lo) ||
pnfs_get_layout_hdr(lo);
if (test_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) {
spin_unlock(&ino->i_lock);
+ rcu_read_unlock();
wait_on_bit(&lo->plh_flags, NFS_LAYOUT_RETURN,
TASK_UNINTERRUPTIBLE);
pnfs_put_layout_hdr(lo);
skip_read = true;
}
- list_for_each_entry(ctx, &nfsi->open_files, list) {
+ list_for_each_entry_rcu(ctx, &nfsi->open_files, list) {
state = ctx->state;
if (state == NULL)
continue;
out_noroc:
spin_unlock(&ino->i_lock);
+ rcu_read_unlock();
pnfs_layoutcommit_inode(ino, true);
if (roc) {
struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
struct module *owner;
unsigned flags;
unsigned max_deviceinfo_size;
+ unsigned max_layoutget_response;
int (*set_layoutdriver) (struct nfs_server *, const struct nfs_fh *);
int (*clear_layoutdriver) (struct nfs_server *);
struct nfs_pgio_header *hdr)
{
if (hdr->res.eof) {
- loff_t bound;
+ loff_t pos = hdr->args.offset + hdr->res.count;
+ unsigned int new = pos - hdr->io_start;
- bound = hdr->args.offset + hdr->res.count;
- spin_lock(&hdr->lock);
- if (bound < hdr->io_start + hdr->good_bytes) {
+ if (hdr->good_bytes > new) {
+ hdr->good_bytes = new;
set_bit(NFS_IOHDR_EOF, &hdr->flags);
clear_bit(NFS_IOHDR_ERROR, &hdr->flags);
- hdr->good_bytes = bound - hdr->io_start;
}
- spin_unlock(&hdr->lock);
} else if (hdr->res.count < hdr->args.count)
nfs_readpage_retry(task, hdr);
}
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2014 Samsung Electronics Co., Ltd.
* Author: Tomasz Figa <t.figa@samsung.com>
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
* Device Tree binding constants for Samsung Exynos3250 clock controllers.
*/
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2013 Samsung Electronics Co., Ltd.
* Author: Andrzej Hajda <a.hajda@samsung.com>
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
* Device Tree binding constants for Exynos4 clock controller.
-*/
+ */
#ifndef _DT_BINDINGS_CLOCK_EXYNOS_4_H
#define _DT_BINDINGS_CLOCK_EXYNOS_4_H
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2013 Samsung Electronics Co., Ltd.
* Author: Andrzej Hajda <a.hajda@samsung.com>
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
* Device Tree binding constants for Exynos5250 clock controller.
-*/
+ */
#ifndef _DT_BINDINGS_CLOCK_EXYNOS_5250_H
#define _DT_BINDINGS_CLOCK_EXYNOS_5250_H
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2014 Samsung Electronics Co., Ltd.
* Author: Rahul Sharma <rahul.sharma@samsung.com>
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
* Provides Constants for Exynos5260 clocks.
-*/
+ */
#ifndef _DT_BINDINGS_CLK_EXYNOS5260_H
#define _DT_BINDINGS_CLK_EXYNOS5260_H
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2014 Samsung Electronics Co., Ltd.
* Copyright (c) 2016 Krzysztof Kozlowski
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
* Device Tree binding constants for Exynos5421 clock controller.
-*/
+ */
#ifndef _DT_BINDINGS_CLOCK_EXYNOS_5410_H
#define _DT_BINDINGS_CLOCK_EXYNOS_5410_H
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2013 Samsung Electronics Co., Ltd.
* Author: Andrzej Hajda <a.hajda@samsung.com>
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
* Device Tree binding constants for Exynos5420 clock controller.
-*/
+ */
#ifndef _DT_BINDINGS_CLOCK_EXYNOS_5420_H
#define _DT_BINDINGS_CLOCK_EXYNOS_5420_H
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2014 Samsung Electronics Co., Ltd.
* Author: Chanwoo Choi <cw00.choi@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
*/
#ifndef _DT_BINDINGS_CLOCK_EXYNOS5433_H
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2014 Samsung Electronics Co., Ltd.
* Author: Naveen Krishna Ch <naveenkrishna.ch@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
-*/
+ */
#ifndef _DT_BINDINGS_CLOCK_EXYNOS7_H
#define _DT_BINDINGS_CLOCK_EXYNOS7_H
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2013 Heiko Stuebner <heiko@sntech.de>
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
* Device Tree binding constants clock controllers of Samsung S3C2410 and later.
*/
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2013 Heiko Stuebner <heiko@sntech.de>
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
* Device Tree binding constants clock controllers of Samsung S3C2412.
*/
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (c) 2013 Heiko Stuebner <heiko@sntech.de>
*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
* Device Tree binding constants clock controllers of Samsung S3C2443 and later.
*/
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* This header provides constants for the ARM GIC.
*/
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 OR MIT */
/*
* This header provides constants for most IRQ bindings.
*
+/* SPDX-License-Identifier: GPL-2.0+ */
/*
* thermal_exynos.h - Samsung EXYNOS TMU device tree definitions
*
* Copyright (C) 2014 Samsung Electronics
* Lukasz Majewski <l.majewski@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
*/
#ifndef _EXYNOS_THERMAL_TMU_DT_H
REQ_OP_FLUSH = 2,
/* discard sectors */
REQ_OP_DISCARD = 3,
- /* get zone information */
- REQ_OP_ZONE_REPORT = 4,
/* securely erase sectors */
REQ_OP_SECURE_ERASE = 5,
/* seset a zone write pointer */
#ifdef CONFIG_BLK_DEV_ZONED
-struct blk_zone_report_hdr {
- unsigned int nr_zones;
- u8 padding[60];
-};
-
+extern unsigned int blkdev_nr_zones(struct block_device *bdev);
extern int blkdev_report_zones(struct block_device *bdev,
sector_t sector, struct blk_zone *zones,
unsigned int *nr_zones, gfp_t gfp_mask);
extern int blkdev_reset_zones(struct block_device *bdev, sector_t sectors,
sector_t nr_sectors, gfp_t gfp_mask);
+extern int blk_revalidate_disk_zones(struct gendisk *disk);
extern int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg);
#else /* CONFIG_BLK_DEV_ZONED */
+static inline unsigned int blkdev_nr_zones(struct block_device *bdev)
+{
+ return 0;
+}
+
+static inline int blk_revalidate_disk_zones(struct gendisk *disk)
+{
+ return 0;
+}
+
static inline int blkdev_report_zones_ioctl(struct block_device *bdev,
fmode_t mode, unsigned int cmd,
unsigned long arg)
}
#ifdef CONFIG_BLK_DEV_ZONED
+static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
+{
+ return blk_queue_is_zoned(q) ? q->nr_zones : 0;
+}
+
static inline unsigned int blk_queue_zone_no(struct request_queue *q,
sector_t sector)
{
return false;
return test_bit(blk_queue_zone_no(q, sector), q->seq_zones_bitmap);
}
+#else /* CONFIG_BLK_DEV_ZONED */
+static inline unsigned int blk_queue_nr_zones(struct request_queue *q)
+{
+ return 0;
+}
#endif /* CONFIG_BLK_DEV_ZONED */
static inline bool rq_is_sync(struct request *rq)
int (*getgeo)(struct block_device *, struct hd_geometry *);
/* this callback is with swap_lock and sometimes page table lock held */
void (*swap_slot_free_notify) (struct block_device *, unsigned long);
+ int (*report_zones)(struct gendisk *, sector_t sector,
+ struct blk_zone *zones, unsigned int *nr_zones,
+ gfp_t gfp_mask);
struct module *owner;
const struct pr_ops *pr_ops;
};
#ifndef _LINUX_CUDA_H
#define _LINUX_CUDA_H
+#include <linux/rtc.h>
#include <uapi/linux/cuda.h>
void (*done)(struct adb_request *), int nbytes, ...);
extern void cuda_poll(void);
+extern time64_t cuda_get_time(void);
+extern int cuda_set_rtc_time(struct rtc_time *tm);
+
#endif /* _LINUX_CUDA_H */
DM_TYPE_NONE = 0,
DM_TYPE_BIO_BASED = 1,
DM_TYPE_REQUEST_BASED = 2,
- DM_TYPE_MQ_REQUEST_BASED = 3,
- DM_TYPE_DAX_BIO_BASED = 4,
- DM_TYPE_NVME_BIO_BASED = 5,
+ DM_TYPE_DAX_BIO_BASED = 3,
+ DM_TYPE_NVME_BIO_BASED = 4,
};
typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t;
typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti, struct block_device **bdev);
+typedef int (*dm_report_zones_fn) (struct dm_target *ti, sector_t sector,
+ struct blk_zone *zones,
+ unsigned int *nr_zones,
+ gfp_t gfp_mask);
+
/*
* These iteration functions are typically used to check (and combine)
* properties of underlying devices.
dm_status_fn status;
dm_message_fn message;
dm_prepare_ioctl_fn prepare_ioctl;
+#ifdef CONFIG_BLK_DEV_ZONED
+ dm_report_zones_fn report_zones;
+#endif
dm_busy_fn busy;
dm_iterate_devices_fn iterate_devices;
dm_io_hints_fn io_hints;
int dm_suspended(struct dm_target *ti);
int dm_noflush_suspending(struct dm_target *ti);
void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors);
-void dm_remap_zone_report(struct dm_target *ti, struct bio *bio,
- sector_t start);
+void dm_remap_zone_report(struct dm_target *ti, sector_t start,
+ struct blk_zone *zones, unsigned int *nr_zones);
union map_info *dm_get_rq_mapinfo(struct request *rq);
struct queue_limits *dm_get_queue_limits(struct mapped_device *md);
unsigned int dm_table_get_num_targets(struct dm_table *t);
fmode_t dm_table_get_mode(struct dm_table *t);
struct mapped_device *dm_table_get_md(struct dm_table *t);
+const char *dm_table_device_name(struct dm_table *t);
/*
* Trigger an event.
#include <linux/dma-mapping.h>
#include <linux/mem_encrypt.h>
+#define DIRECT_MAPPING_ERROR 0
+
#ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA
#include <asm/dma-direct.h>
#else
#define dev_is_fsl_mc(_dev) (0)
#endif
+/* Macro to check if a device is a container device */
+#define fsl_mc_is_cont_dev(_dev) (to_fsl_mc_device(_dev)->flags & \
+ FSL_MC_IS_DPRC)
+
+/* Macro to get the container device of a MC device */
+#define fsl_mc_cont_dev(_dev) (fsl_mc_is_cont_dev(_dev) ? \
+ (_dev) : (_dev)->parent)
+
/*
* module_fsl_mc_driver() - Helper macro for drivers that don't do
* anything special in module init/exit. This eliminates a lot of
#define DMAR_PEDATA_REG 0xe4 /* Page request event interrupt data register */
#define DMAR_PEADDR_REG 0xe8 /* Page request event interrupt addr register */
#define DMAR_PEUADDR_REG 0xec /* Page request event Upper address register */
+#define DMAR_MTRRCAP_REG 0x100 /* MTRR capability register */
+#define DMAR_MTRRDEF_REG 0x108 /* MTRR default type register */
+#define DMAR_MTRR_FIX64K_00000_REG 0x120 /* MTRR Fixed range registers */
+#define DMAR_MTRR_FIX16K_80000_REG 0x128
+#define DMAR_MTRR_FIX16K_A0000_REG 0x130
+#define DMAR_MTRR_FIX4K_C0000_REG 0x138
+#define DMAR_MTRR_FIX4K_C8000_REG 0x140
+#define DMAR_MTRR_FIX4K_D0000_REG 0x148
+#define DMAR_MTRR_FIX4K_D8000_REG 0x150
+#define DMAR_MTRR_FIX4K_E0000_REG 0x158
+#define DMAR_MTRR_FIX4K_E8000_REG 0x160
+#define DMAR_MTRR_FIX4K_F0000_REG 0x168
+#define DMAR_MTRR_FIX4K_F8000_REG 0x170
+#define DMAR_MTRR_PHYSBASE0_REG 0x180 /* MTRR Variable range registers */
+#define DMAR_MTRR_PHYSMASK0_REG 0x188
+#define DMAR_MTRR_PHYSBASE1_REG 0x190
+#define DMAR_MTRR_PHYSMASK1_REG 0x198
+#define DMAR_MTRR_PHYSBASE2_REG 0x1a0
+#define DMAR_MTRR_PHYSMASK2_REG 0x1a8
+#define DMAR_MTRR_PHYSBASE3_REG 0x1b0
+#define DMAR_MTRR_PHYSMASK3_REG 0x1b8
+#define DMAR_MTRR_PHYSBASE4_REG 0x1c0
+#define DMAR_MTRR_PHYSMASK4_REG 0x1c8
+#define DMAR_MTRR_PHYSBASE5_REG 0x1d0
+#define DMAR_MTRR_PHYSMASK5_REG 0x1d8
+#define DMAR_MTRR_PHYSBASE6_REG 0x1e0
+#define DMAR_MTRR_PHYSMASK6_REG 0x1e8
+#define DMAR_MTRR_PHYSBASE7_REG 0x1f0
+#define DMAR_MTRR_PHYSMASK7_REG 0x1f8
+#define DMAR_MTRR_PHYSBASE8_REG 0x200
+#define DMAR_MTRR_PHYSMASK8_REG 0x208
+#define DMAR_MTRR_PHYSBASE9_REG 0x210
+#define DMAR_MTRR_PHYSMASK9_REG 0x218
+#define DMAR_VCCAP_REG 0xe00 /* Virtual command capability register */
+#define DMAR_VCMD_REG 0xe10 /* Virtual command register */
+#define DMAR_VCRSP_REG 0xe20 /* Virtual command response register */
#define OFFSET_STRIDE (9)
struct pasid_state_entry;
struct page_req_dsc;
+/*
+ * 0: Present
+ * 1-11: Reserved
+ * 12-63: Context Ptr (12 - (haw-1))
+ * 64-127: Reserved
+ */
+struct root_entry {
+ u64 lo;
+ u64 hi;
+};
+
+/*
+ * low 64 bits:
+ * 0: present
+ * 1: fault processing disable
+ * 2-3: translation type
+ * 12-63: address space root
+ * high 64 bits:
+ * 0-2: address width
+ * 3-6: aval
+ * 8-23: domain id
+ */
+struct context_entry {
+ u64 lo;
+ u64 hi;
+};
+
struct dmar_domain {
int nid; /* node id */
extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev);
#endif
+#ifdef CONFIG_INTEL_IOMMU_DEBUGFS
+void intel_iommu_debugfs_init(void);
+#else
+static inline void intel_iommu_debugfs_init(void) {}
+#endif /* CONFIG_INTEL_IOMMU_DEBUGFS */
+
extern const struct attribute_group *intel_iommu_groups[];
+bool context_present(struct context_entry *context);
+struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus,
+ u8 devfn, int alloc);
#endif
DOMAIN_ATTR_FSL_PAMU_ENABLE,
DOMAIN_ATTR_FSL_PAMUV1,
DOMAIN_ATTR_NESTING, /* two stages of translation */
+ DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
DOMAIN_ATTR_MAX,
};
* @apply_resv_region: Temporary helper call-back for iova reserved ranges
* @domain_window_enable: Configure and enable a particular window for a domain
* @domain_window_disable: Disable a particular window for a domain
- * @domain_set_windows: Set the number of windows for a domain
- * @domain_get_windows: Return the number of windows for a domain
* @of_xlate: add OF master IDs to iommu grouping
* @pgsize_bitmap: bitmap of all possible supported page sizes
*/
int (*domain_window_enable)(struct iommu_domain *domain, u32 wnd_nr,
phys_addr_t paddr, u64 size, int prot);
void (*domain_window_disable)(struct iommu_domain *domain, u32 wnd_nr);
- /* Set the number of windows per domain */
- int (*domain_set_windows)(struct iommu_domain *domain, u32 w_count);
- /* Get the number of windows per domain */
- u32 (*domain_get_windows)(struct iommu_domain *domain);
int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
bool (*is_attach_deferred)(struct iommu_domain *domain, struct device *dev);
extern void iommu_detach_device(struct iommu_domain *domain,
struct device *dev);
extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev);
+extern struct iommu_domain *iommu_get_dma_domain(struct device *dev);
extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
phys_addr_t paddr, size_t size, int prot);
extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
extern struct iommu_group *pci_device_group(struct device *dev);
/* Generic device grouping function */
extern struct iommu_group *generic_device_group(struct device *dev);
+/* FSL-MC device grouping function */
+struct iommu_group *fsl_mc_device_group(struct device *dev);
/**
* struct iommu_fwspec - per-device IOMMU instance data
unsigned long granule; /* pfn granularity for this domain */
unsigned long start_pfn; /* Lower limit for this domain */
unsigned long dma_32bit_pfn;
+ unsigned long max32_alloc_size; /* Size of last failed allocation */
struct iova anchor; /* rbtree lookup anchor */
struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */
#ifndef LIBFDT_ENV_H
#define LIBFDT_ENV_H
+#include <linux/kernel.h> /* For INT_MAX */
#include <linux/string.h>
#include <asm/byteorder.h>
struct nfs_open_context *open_context;
fl_owner_t lockowner;
atomic_t io_count;
+ struct rcu_head rcu_head;
};
struct nfs4_state;
struct list_head list;
struct nfs4_threshold *mdsthreshold;
+ struct rcu_head rcu_head;
};
struct nfs_open_dir_context {
unsigned short mountd_port;
unsigned short mountd_protocol;
struct rpc_wait_queue uoc_rpcwaitq;
+
+ /* XDR related information */
+ unsigned int read_hdrsize;
};
/* Server capabilities */
__u32 count;
unsigned int pgbase;
struct page ** pages;
- const u32 * bitmask; /* used by write */
- enum nfs3_stable_how stable; /* used by write */
+ union {
+ unsigned int replen; /* used by read */
+ struct {
+ const u32 * bitmask; /* used by write */
+ enum nfs3_stable_how stable; /* used by write */
+ };
+ };
};
struct nfs_pgio_res {
struct nfs_fattr * fattr;
__u32 count;
__u32 op_status;
- int eof; /* used by read */
- struct nfs_writeverf * verf; /* used by write */
- const struct nfs_server *server; /* used by write */
-
+ union {
+ struct {
+ unsigned int replen; /* used by read */
+ int eof; /* used by read */
+ };
+ struct {
+ struct nfs_writeverf * verf; /* used by write */
+ const struct nfs_server *server; /* used by write */
+ };
+ };
};
/*
const struct nfs_rw_ops *rw_ops;
struct nfs_io_completion *io_completion;
struct nfs_direct_req *dreq;
- spinlock_t lock;
- /* fields protected by lock */
+
int pnfs_error;
int error; /* merge with pnfs_error */
- unsigned long good_bytes; /* boundary of good data */
+ unsigned int good_bytes; /* boundary of good data */
unsigned long flags;
/*
#include <asm/prom.h>
#endif
-/* Default #address and #size cells. Allow arch asm/prom.h to override */
-#if !defined(OF_ROOT_NODE_ADDR_CELLS_DEFAULT)
-#define OF_ROOT_NODE_ADDR_CELLS_DEFAULT 1
-#define OF_ROOT_NODE_SIZE_CELLS_DEFAULT 1
-#endif
-
#define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags)
#define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags)
const char *name,
int *lenp);
extern struct device_node *of_get_cpu_node(int cpu, unsigned int *thread);
+extern struct device_node *of_get_next_cpu_node(struct device_node *prev);
+
#define for_each_property_of_node(dn, pp) \
for (pp = dn->properties; pp != NULL; pp = pp->next)
extern int of_cpu_node_to_id(struct device_node *np);
+int of_map_rid(struct device_node *np, u32 rid,
+ const char *map_name, const char *map_mask_name,
+ struct device_node **target, u32 *id_out);
+
#else /* CONFIG_OF */
static inline void of_core_init(void)
return NULL;
}
+static inline struct device_node *of_get_next_cpu_node(struct device_node *prev)
+{
+ return NULL;
+}
+
static inline int of_n_addr_cells(struct device_node *np)
{
return 0;
return -ENODEV;
}
+static inline int of_map_rid(struct device_node *np, u32 rid,
+ const char *map_name, const char *map_mask_name,
+ struct device_node **target, u32 *id_out)
+{
+ return -EINVAL;
+}
+
#define of_match_ptr(_ptr) NULL
#define of_match_node(_matches, _node) NULL
#endif /* CONFIG_OF */
static inline const char *of_node_get_device_type(const struct device_node *np)
{
- return of_get_property(np, "type", NULL);
+ return of_get_property(np, "device_type", NULL);
}
static inline bool of_node_is_type(const struct device_node *np, const char *type)
for (child = of_get_next_available_child(parent, NULL); child != NULL; \
child = of_get_next_available_child(parent, child))
+#define for_each_of_cpu_node(cpu) \
+ for (cpu = of_get_next_cpu_node(NULL); cpu != NULL; \
+ cpu = of_get_next_cpu_node(cpu))
+
#define for_each_node_with_property(dn, prop_name) \
for (dn = of_find_node_with_property(NULL, prop_name); dn; \
dn = of_find_node_with_property(dn, prop_name))
unsigned int devfn);
int of_pci_get_devfn(struct device_node *np);
void of_pci_check_probe_only(void);
-int of_pci_map_rid(struct device_node *np, u32 rid,
- const char *map_name, const char *map_mask_name,
- struct device_node **target, u32 *id_out);
#else
static inline struct device_node *of_pci_find_child_device(struct device_node *parent,
unsigned int devfn)
return -EINVAL;
}
-static inline int of_pci_map_rid(struct device_node *np, u32 rid,
- const char *map_name, const char *map_mask_name,
- struct device_node **target, u32 *id_out)
-{
- return -EINVAL;
-}
-
static inline void of_pci_check_probe_only(void) { }
#endif
#ifndef _LINUX_PMU_H
#define _LINUX_PMU_H
+#include <linux/rtc.h>
#include <uapi/linux/pmu.h>
extern void pmu_enable_irled(int on);
+extern time64_t pmu_get_time(void);
+extern int pmu_set_rtc_time(struct rtc_time *tm);
+
extern void pmu_restart(void);
extern void pmu_shutdown(void);
extern void pmu_unlock(void);
const struct rpc_credops *cr_ops;
unsigned long cr_expire; /* when to gc */
unsigned long cr_flags; /* various flags */
- atomic_t cr_count; /* ref count */
+ refcount_t cr_count; /* ref count */
kuid_t cr_uid;
* differ from the flavor in
* au_ops->au_flavor in gss
* case) */
- atomic_t au_count; /* Reference counter */
+ refcount_t au_count; /* Reference counter */
struct rpc_cred_cache * au_credcache;
/* per-flavor data */
int (*crkey_timeout)(struct rpc_cred *);
bool (*crkey_to_expire)(struct rpc_cred *);
char * (*crstringify_acceptor)(struct rpc_cred *);
+ bool (*crneed_reencode)(struct rpc_task *);
};
extern const struct rpc_authops authunix_ops;
__be32 * rpcauth_checkverf(struct rpc_task *, __be32 *);
int rpcauth_wrap_req(struct rpc_task *task, kxdreproc_t encode, void *rqstp, __be32 *data, void *obj);
int rpcauth_unwrap_resp(struct rpc_task *task, kxdrdproc_t decode, void *rqstp, __be32 *data, void *obj);
+bool rpcauth_xmit_need_reencode(struct rpc_task *task);
int rpcauth_refreshcred(struct rpc_task *);
void rpcauth_invalcred(struct rpc_task *);
int rpcauth_uptodatecred(struct rpc_task *);
char * rpcauth_stringify_acceptor(struct rpc_cred *);
static inline
-struct rpc_cred * get_rpccred(struct rpc_cred *cred)
+struct rpc_cred *get_rpccred(struct rpc_cred *cred)
{
- if (cred != NULL)
- atomic_inc(&cred->cr_count);
- return cred;
+ if (cred != NULL && refcount_inc_not_zero(&cred->cr_count))
+ return cred;
+ return NULL;
}
/**
static inline struct rpc_cred *
get_rpccred_rcu(struct rpc_cred *cred)
{
- if (atomic_inc_not_zero(&cred->cr_count))
- return cred;
- return NULL;
+ return get_rpccred(cred);
}
#endif /* __KERNEL__ */
refcount_t count;
enum rpc_gss_proc gc_proc;
u32 gc_seq;
+ u32 gc_seq_xmit;
spinlock_t gc_seq_lock;
struct gss_ctx *gc_gss_ctx;
struct xdr_netobj gc_wire_ctx;
#ifdef CONFIG_SUNRPC_BACKCHANNEL
struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid);
void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied);
+void xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task);
void xprt_free_bc_request(struct rpc_rqst *req);
int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs);
void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs);
u8 acceptor_integ[GSS_KRB5_MAX_KEYLEN];
};
-extern spinlock_t krb5_seq_lock;
+extern u32 gss_seq_send_fetch_and_inc(struct krb5_ctx *ctx);
+extern u64 gss_seq_send64_fetch_and_inc(struct krb5_ctx *ctx);
/* The length of the Kerberos GSS token header */
#define GSS_KRB5_TOK_HDR_LEN (16)
#define RPC_TASK_RUNNING 0
#define RPC_TASK_QUEUED 1
#define RPC_TASK_ACTIVE 2
-#define RPC_TASK_MSG_RECV 3
-#define RPC_TASK_MSG_RECV_WAIT 4
+#define RPC_TASK_NEED_XMIT 3
+#define RPC_TASK_NEED_RECV 4
+#define RPC_TASK_MSG_PIN_WAIT 5
#define RPC_IS_RUNNING(t) test_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
#define rpc_set_running(t) set_bit(RPC_TASK_RUNNING, &(t)->tk_runstate)
struct rpc_wait_queue {
spinlock_t lock;
struct list_head tasks[RPC_NR_PRIORITY]; /* task queue for each priority level */
- pid_t owner; /* process id of last task serviced */
unsigned char maxpriority; /* maximum priority (0 if queue is not a priority queue) */
unsigned char priority; /* current priority */
unsigned char nr; /* # tasks remaining for cookie */
* from a single cookie. The aim is to improve
* performance of NFS operations such as read/write.
*/
-#define RPC_BATCH_COUNT 16
#define RPC_IS_PRIORITY(q) ((q)->maxpriority > 0)
/*
struct rpc_task *task);
void rpc_wake_up_queued_task(struct rpc_wait_queue *,
struct rpc_task *);
+void rpc_wake_up_queued_task_set_status(struct rpc_wait_queue *,
+ struct rpc_task *,
+ int);
void rpc_wake_up(struct rpc_wait_queue *);
struct rpc_task *rpc_wake_up_next(struct rpc_wait_queue *);
struct rpc_task *rpc_wake_up_first_on_wq(struct workqueue_struct *wq,
struct sockaddr_storage xpt_remote; /* remote peer's address */
size_t xpt_remotelen; /* length of address */
char xpt_remotebuf[INET6_ADDRSTRLEN + 10];
- struct rpc_wait_queue xpt_bc_pending; /* backchannel wait queue */
struct list_head xpt_users; /* callbacks on free */
struct net *xpt_net;
#include <asm/unaligned.h>
#include <linux/scatterlist.h>
+struct bio_vec;
struct rpc_rqst;
/*
struct kvec head[1], /* RPC header + non-page data */
tail[1]; /* Appended after page data */
+ struct bio_vec *bvec;
struct page ** pages; /* Array of pages */
unsigned int page_base, /* Start of page data */
page_len, /* Length of page data */
flags; /* Flags for data disposition */
#define XDRBUF_READ 0x01 /* target of file read */
#define XDRBUF_WRITE 0x02 /* source of file write */
+#define XDRBUF_SPARSE_PAGES 0x04 /* Page array is sparse */
unsigned int buflen, /* Total length of storage buffer */
len; /* Length of XDR encoded message */
buf->head[0].iov_base = start;
buf->head[0].iov_len = len;
buf->tail[0].iov_len = 0;
+ buf->bvec = NULL;
+ buf->pages = NULL;
buf->page_len = 0;
buf->flags = 0;
buf->len = 0;
void xdr_inline_pages(struct xdr_buf *, unsigned int,
struct page **, unsigned int, unsigned int);
void xdr_terminate_string(struct xdr_buf *, const u32);
+size_t xdr_buf_pagecount(struct xdr_buf *buf);
+int xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp);
+void xdr_free_bvec(struct xdr_buf *buf);
static inline __be32 *xdr_encode_array(__be32 *p, const void *s, unsigned int len)
{
typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to, size_t len);
-size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len);
extern int csum_partial_copy_to_xdr(struct xdr_buf *, struct sk_buff *);
-extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int,
- struct xdr_skb_reader *, xdr_skb_read_actor);
extern int xdr_encode_word(struct xdr_buf *, unsigned int, u32);
extern int xdr_decode_word(struct xdr_buf *, unsigned int, u32 *);
struct page **rq_enc_pages; /* scratch pages for use by
gss privacy code */
void (*rq_release_snd_buf)(struct rpc_rqst *); /* release rq_enc_pages */
- struct list_head rq_list;
+
+ union {
+ struct list_head rq_list; /* Slot allocation list */
+ struct rb_node rq_recv; /* Receive queue */
+ };
+
+ struct list_head rq_xmit; /* Send queue */
+ struct list_head rq_xmit2; /* Send queue */
void *rq_buffer; /* Call XDR encode buffer */
size_t rq_callsize;
/* A cookie used to track the
state of the transport
connection */
+ atomic_t rq_pin;
/*
* Partial send handling
void (*connect)(struct rpc_xprt *xprt, struct rpc_task *task);
int (*buf_alloc)(struct rpc_task *task);
void (*buf_free)(struct rpc_task *task);
- int (*send_request)(struct rpc_task *task);
+ void (*prepare_request)(struct rpc_rqst *req);
+ int (*send_request)(struct rpc_rqst *req);
void (*set_retrans_timeout)(struct rpc_task *task);
void (*timer)(struct rpc_xprt *xprt, struct rpc_task *task);
void (*release_request)(struct rpc_task *task);
*/
spinlock_t transport_lock; /* lock transport info */
spinlock_t reserve_lock; /* lock slot table */
- spinlock_t recv_lock; /* lock receive list */
+ spinlock_t queue_lock; /* send/receive queue lock */
u32 xid; /* Next XID value to use */
struct rpc_task * snd_task; /* Task blocked in send */
+
+ struct list_head xmit_queue; /* Send queue */
+
struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
struct svc_serv *bc_serv; /* The RPC service which will */
struct list_head bc_pa_list; /* List of preallocated
* backchannel rpc_rqst's */
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
- struct list_head recv;
+
+ struct rb_root recv_queue; /* Receive queue */
struct {
unsigned long bind_count, /* total number of binds */
struct rpc_xprt *xprt_create_transport(struct xprt_create *args);
void xprt_connect(struct rpc_task *task);
void xprt_reserve(struct rpc_task *task);
-void xprt_request_init(struct rpc_task *task);
void xprt_retry_reserve(struct rpc_task *task);
int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task);
void xprt_free_slot(struct rpc_xprt *xprt,
struct rpc_rqst *req);
-void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task);
+void xprt_request_prepare(struct rpc_rqst *req);
bool xprt_prepare_transmit(struct rpc_task *task);
+void xprt_request_enqueue_transmit(struct rpc_task *task);
+void xprt_request_enqueue_receive(struct rpc_task *task);
+void xprt_request_wait_receive(struct rpc_task *task);
+bool xprt_request_need_retransmit(struct rpc_task *task);
void xprt_transmit(struct rpc_task *task);
void xprt_end_transmit(struct rpc_task *task);
int xprt_adjust_timeout(struct rpc_rqst *req);
void xprt_set_retrans_timeout_def(struct rpc_task *task);
void xprt_set_retrans_timeout_rtt(struct rpc_task *task);
void xprt_wake_pending_tasks(struct rpc_xprt *xprt, int status);
-void xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action);
-void xprt_write_space(struct rpc_xprt *xprt);
+void xprt_wait_for_buffer_space(struct rpc_xprt *xprt);
+bool xprt_write_space(struct rpc_xprt *xprt);
void xprt_adjust_cwnd(struct rpc_xprt *xprt, struct rpc_task *task, int result);
struct rpc_rqst * xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid);
void xprt_update_rtt(struct rpc_task *task);
void xprt_pin_rqst(struct rpc_rqst *req);
void xprt_unpin_rqst(struct rpc_rqst *req);
void xprt_release_rqst_cong(struct rpc_task *task);
+bool xprt_request_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req);
void xprt_disconnect_done(struct rpc_xprt *xprt);
void xprt_force_disconnect(struct rpc_xprt *xprt);
void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
#define XPRT_BINDING (5)
#define XPRT_CLOSING (6)
#define XPRT_CONGESTED (9)
+#define XPRT_CWND_WAIT (10)
+#define XPRT_WRITE_SPACE (11)
static inline void xprt_set_connected(struct rpc_xprt *xprt)
{
/*
* State of TCP reply receive
*/
- __be32 tcp_fraghdr,
- tcp_xid,
- tcp_calldir;
+ struct {
+ struct {
+ __be32 fraghdr,
+ xid,
+ calldir;
+ } __attribute__((packed));
- u32 tcp_offset,
- tcp_reclen;
+ u32 offset,
+ len;
- unsigned long tcp_copied,
- tcp_flags;
+ unsigned long copied;
+ } recv;
+
+ /*
+ * State of TCP transmit queue
+ */
+ struct {
+ u32 offset;
+ } xmit;
/*
* Connection of transports
void (*old_error_report)(struct sock *);
};
-/*
- * TCP receive state flags
- */
-#define TCP_RCV_LAST_FRAG (1UL << 0)
-#define TCP_RCV_COPY_FRAGHDR (1UL << 1)
-#define TCP_RCV_COPY_XID (1UL << 2)
-#define TCP_RCV_COPY_DATA (1UL << 3)
-#define TCP_RCV_READ_CALLDIR (1UL << 4)
-#define TCP_RCV_COPY_CALLDIR (1UL << 5)
-
/*
* TCP RPC flags
*/
-#define TCP_RPC_REPLY (1UL << 6)
-
#define XPRT_SOCK_CONNECTING 1U
#define XPRT_SOCK_DATA_READY (2)
#define XPRT_SOCK_UPD_TIMEOUT (3)
/* Accessory functions. */
-void *swiotlb_alloc(struct device *hwdev, size_t size, dma_addr_t *dma_handle,
- gfp_t flags, unsigned long attrs);
-void swiotlb_free(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_addr, unsigned long attrs);
-
extern dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
int nelems, enum dma_data_direction dir);
-extern int
-swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr);
-
extern int
swiotlb_dma_supported(struct device *hwdev, u64 mask);
#endif
extern void swiotlb_print_info(void);
-extern int is_swiotlb_buffer(phys_addr_t paddr);
extern void swiotlb_set_max_segment(unsigned int);
extern const struct dma_map_ops swiotlb_dma_ops;
device. */
struct device dev; /* Generic device interface. */
struct resource resource; /* Address space of this device. */
+ u64 dma_mask; /* DMA addressable range. */
char vendor[9];
char name[9];
char firmware[9];
u32 table_id;
/* filter_set is an optimization that an entry is set */
bool filter_set;
+ bool dump_all_families;
unsigned char protocol;
unsigned char rt_type;
unsigned int flags;
{ REQ_OP_WRITE, "WRITE" }, \
{ REQ_OP_FLUSH, "FLUSH" }, \
{ REQ_OP_DISCARD, "DISCARD" }, \
- { REQ_OP_ZONE_REPORT, "ZONE_REPORT" }, \
{ REQ_OP_SECURE_ERASE, "SECURE_ERASE" }, \
{ REQ_OP_ZONE_RESET, "ZONE_RESET" }, \
{ REQ_OP_WRITE_SAME, "WRITE_SAME" }, \
);
#define DEFINE_MR_EVENT(name) \
- DEFINE_EVENT(xprtrdma_mr, name, \
+ DEFINE_EVENT(xprtrdma_mr, xprtrdma_mr_##name, \
TP_PROTO( \
const struct rpcrdma_mr *mr \
), \
** Connection events
**/
-TRACE_EVENT(xprtrdma_conn_upcall,
+TRACE_EVENT(xprtrdma_cm_event,
TP_PROTO(
const struct rpcrdma_xprt *r_xprt,
struct rdma_cm_event *event
DEFINE_RXPRT_EVENT(xprtrdma_reconnect);
DEFINE_RXPRT_EVENT(xprtrdma_inject_dsc);
-TRACE_EVENT(xprtrdma_qp_error,
+TRACE_EVENT(xprtrdma_qp_event,
TP_PROTO(
const struct rpcrdma_xprt *r_xprt,
const struct ib_event *event
TP_STRUCT__entry(
__field(const void *, req)
__field(int, num_sge)
- __field(bool, signaled)
+ __field(int, signaled)
__field(int, status)
),
DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li);
DEFINE_FRWR_DONE_EVENT(xprtrdma_wc_li_wake);
-DEFINE_MR_EVENT(xprtrdma_localinv);
-DEFINE_MR_EVENT(xprtrdma_dma_map);
-DEFINE_MR_EVENT(xprtrdma_dma_unmap);
-DEFINE_MR_EVENT(xprtrdma_remoteinv);
-DEFINE_MR_EVENT(xprtrdma_recover_mr);
+DEFINE_MR_EVENT(localinv);
+DEFINE_MR_EVENT(map);
+DEFINE_MR_EVENT(unmap);
+DEFINE_MR_EVENT(remoteinv);
+DEFINE_MR_EVENT(recycle);
/**
** Reply events
__get_str(addr), __get_str(port), __entry->status)
);
-TRACE_EVENT(xs_tcp_data_ready,
- TP_PROTO(struct rpc_xprt *xprt, int err, unsigned int total),
+TRACE_EVENT(xs_stream_read_data,
+ TP_PROTO(struct rpc_xprt *xprt, ssize_t err, size_t total),
TP_ARGS(xprt, err, total),
TP_STRUCT__entry(
- __field(int, err)
- __field(unsigned int, total)
+ __field(ssize_t, err)
+ __field(size_t, total)
__string(addr, xprt ? xprt->address_strings[RPC_DISPLAY_ADDR] :
"(null)")
__string(port, xprt ? xprt->address_strings[RPC_DISPLAY_PORT] :
xprt->address_strings[RPC_DISPLAY_PORT] : "(null)");
),
- TP_printk("peer=[%s]:%s err=%d total=%u", __get_str(addr),
+ TP_printk("peer=[%s]:%s err=%zd total=%zu", __get_str(addr),
__get_str(port), __entry->err, __entry->total)
);
-#define rpc_show_sock_xprt_flags(flags) \
- __print_flags(flags, "|", \
- { TCP_RCV_LAST_FRAG, "TCP_RCV_LAST_FRAG" }, \
- { TCP_RCV_COPY_FRAGHDR, "TCP_RCV_COPY_FRAGHDR" }, \
- { TCP_RCV_COPY_XID, "TCP_RCV_COPY_XID" }, \
- { TCP_RCV_COPY_DATA, "TCP_RCV_COPY_DATA" }, \
- { TCP_RCV_READ_CALLDIR, "TCP_RCV_READ_CALLDIR" }, \
- { TCP_RCV_COPY_CALLDIR, "TCP_RCV_COPY_CALLDIR" }, \
- { TCP_RPC_REPLY, "TCP_RPC_REPLY" })
-
-TRACE_EVENT(xs_tcp_data_recv,
+TRACE_EVENT(xs_stream_read_request,
TP_PROTO(struct sock_xprt *xs),
TP_ARGS(xs),
__string(addr, xs->xprt.address_strings[RPC_DISPLAY_ADDR])
__string(port, xs->xprt.address_strings[RPC_DISPLAY_PORT])
__field(u32, xid)
- __field(unsigned long, flags)
__field(unsigned long, copied)
__field(unsigned int, reclen)
- __field(unsigned long, offset)
+ __field(unsigned int, offset)
),
TP_fast_assign(
__assign_str(addr, xs->xprt.address_strings[RPC_DISPLAY_ADDR]);
__assign_str(port, xs->xprt.address_strings[RPC_DISPLAY_PORT]);
- __entry->xid = be32_to_cpu(xs->tcp_xid);
- __entry->flags = xs->tcp_flags;
- __entry->copied = xs->tcp_copied;
- __entry->reclen = xs->tcp_reclen;
- __entry->offset = xs->tcp_offset;
+ __entry->xid = be32_to_cpu(xs->recv.xid);
+ __entry->copied = xs->recv.copied;
+ __entry->reclen = xs->recv.len;
+ __entry->offset = xs->recv.offset;
),
- TP_printk("peer=[%s]:%s xid=0x%08x flags=%s copied=%lu reclen=%u offset=%lu",
+ TP_printk("peer=[%s]:%s xid=0x%08x copied=%lu reclen=%u offset=%u",
__get_str(addr), __get_str(port), __entry->xid,
- rpc_show_sock_xprt_flags(__entry->flags),
__entry->copied, __entry->reclen, __entry->offset)
);
* sector specified in the report request structure.
* @BLKRESETZONE: Reset the write pointer of the zones in the specified
* sector range. The sector range must be zone aligned.
+ * @BLKGETZONESZ: Get the device zone size in number of 512 B sectors.
*/
#define BLKREPORTZONE _IOWR(0x12, 130, struct blk_zone_report)
#define BLKRESETZONE _IOW(0x12, 131, struct blk_zone_range)
+#define BLKGETZONESZ _IOW(0x12, 132, __u32)
+#define BLKGETNRZONES _IOW(0x12, 133, __u32)
#endif /* _UAPI_BLKZONED_H */
#include <linux/pfn.h>
#include <linux/set_memory.h>
-#define DIRECT_MAPPING_ERROR 0
-
/*
* Most architectures use ZONE_DMA for the first 16 Megabytes, but
* some use it for entirely different regions:
#include <linux/cache.h>
#include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
#include <linux/mm.h>
#include <linux/export.h>
#include <linux/spinlock.h>
*/
static unsigned long io_tlb_nslabs;
-/*
- * When the IOMMU overflows we return a fallback buffer. This sets the size.
- */
-static unsigned long io_tlb_overflow = 32*1024;
-
-static phys_addr_t io_tlb_overflow_buffer;
-
/*
* This is a free list describing the number of free entries available from
* each index
return 0;
}
early_param("swiotlb", setup_io_tlb_npages);
-/* make io_tlb_overflow tunable too? */
unsigned long swiotlb_nr_tbl(void)
{
bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
memset(vaddr, 0, bytes);
-
- vaddr = phys_to_virt(io_tlb_overflow_buffer);
- bytes = PAGE_ALIGN(io_tlb_overflow);
- set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
- memset(vaddr, 0, bytes);
}
int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
{
- void *v_overflow_buffer;
unsigned long i, bytes;
bytes = nslabs << IO_TLB_SHIFT;
io_tlb_start = __pa(tlb);
io_tlb_end = io_tlb_start + bytes;
- /*
- * Get the overflow emergency buffer
- */
- v_overflow_buffer = memblock_virt_alloc_low_nopanic(
- PAGE_ALIGN(io_tlb_overflow),
- PAGE_SIZE);
- if (!v_overflow_buffer)
- return -ENOMEM;
-
- io_tlb_overflow_buffer = __pa(v_overflow_buffer);
-
/*
* Allocate and initialize the free list array. This array is used
* to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
{
unsigned long i, bytes;
- unsigned char *v_overflow_buffer;
bytes = nslabs << IO_TLB_SHIFT;
set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
memset(tlb, 0, bytes);
- /*
- * Get the overflow emergency buffer
- */
- v_overflow_buffer = (void *)__get_free_pages(GFP_DMA,
- get_order(io_tlb_overflow));
- if (!v_overflow_buffer)
- goto cleanup2;
-
- set_memory_decrypted((unsigned long)v_overflow_buffer,
- io_tlb_overflow >> PAGE_SHIFT);
- memset(v_overflow_buffer, 0, io_tlb_overflow);
- io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer);
-
/*
* Allocate and initialize the free list array. This array is used
* to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
sizeof(int)));
io_tlb_list = NULL;
cleanup3:
- free_pages((unsigned long)v_overflow_buffer,
- get_order(io_tlb_overflow));
- io_tlb_overflow_buffer = 0;
-cleanup2:
io_tlb_end = 0;
io_tlb_start = 0;
io_tlb_nslabs = 0;
return;
if (late_alloc) {
- free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer),
- get_order(io_tlb_overflow));
free_pages((unsigned long)io_tlb_orig_addr,
get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
free_pages((unsigned long)phys_to_virt(io_tlb_start),
get_order(io_tlb_nslabs << IO_TLB_SHIFT));
} else {
- memblock_free_late(io_tlb_overflow_buffer,
- PAGE_ALIGN(io_tlb_overflow));
memblock_free_late(__pa(io_tlb_orig_addr),
PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
memblock_free_late(__pa(io_tlb_list),
max_segment = 0;
}
-int is_swiotlb_buffer(phys_addr_t paddr)
+static int is_swiotlb_buffer(phys_addr_t paddr)
{
return paddr >= io_tlb_start && paddr < io_tlb_end;
}
return tlb_addr;
}
-/*
- * Allocates bounce buffer and returns its physical address.
- */
-static phys_addr_t
-map_single(struct device *hwdev, phys_addr_t phys, size_t size,
- enum dma_data_direction dir, unsigned long attrs)
-{
- dma_addr_t start_dma_addr;
-
- if (swiotlb_force == SWIOTLB_NO_FORCE) {
- dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n",
- &phys);
- return SWIOTLB_MAP_ERROR;
- }
-
- start_dma_addr = __phys_to_dma(hwdev, io_tlb_start);
- return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size,
- dir, attrs);
-}
-
/*
* tlb_addr is the physical address of the bounce buffer to unmap.
*/
}
}
-static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr,
- size_t size)
-{
- u64 mask = DMA_BIT_MASK(32);
-
- if (dev && dev->coherent_dma_mask)
- mask = dev->coherent_dma_mask;
- return addr + size - 1 <= mask;
-}
-
-static void *
-swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
- unsigned long attrs)
+static dma_addr_t swiotlb_bounce_page(struct device *dev, phys_addr_t *phys,
+ size_t size, enum dma_data_direction dir, unsigned long attrs)
{
- phys_addr_t phys_addr;
-
- if (swiotlb_force == SWIOTLB_NO_FORCE)
- goto out_warn;
-
- phys_addr = swiotlb_tbl_map_single(dev,
- __phys_to_dma(dev, io_tlb_start),
- 0, size, DMA_FROM_DEVICE, attrs);
- if (phys_addr == SWIOTLB_MAP_ERROR)
- goto out_warn;
-
- *dma_handle = __phys_to_dma(dev, phys_addr);
- if (!dma_coherent_ok(dev, *dma_handle, size))
- goto out_unmap;
-
- memset(phys_to_virt(phys_addr), 0, size);
- return phys_to_virt(phys_addr);
+ dma_addr_t dma_addr;
-out_unmap:
- dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n",
- (unsigned long long)dev->coherent_dma_mask,
- (unsigned long long)*dma_handle);
-
- /*
- * DMA_TO_DEVICE to avoid memcpy in unmap_single.
- * DMA_ATTR_SKIP_CPU_SYNC is optional.
- */
- swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
- DMA_ATTR_SKIP_CPU_SYNC);
-out_warn:
- if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) {
- dev_warn(dev,
- "swiotlb: coherent allocation failed, size=%zu\n",
- size);
- dump_stack();
+ if (unlikely(swiotlb_force == SWIOTLB_NO_FORCE)) {
+ dev_warn_ratelimited(dev,
+ "Cannot do DMA to address %pa\n", phys);
+ return DIRECT_MAPPING_ERROR;
}
- return NULL;
-}
-
-static bool swiotlb_free_buffer(struct device *dev, size_t size,
- dma_addr_t dma_addr)
-{
- phys_addr_t phys_addr = dma_to_phys(dev, dma_addr);
- WARN_ON_ONCE(irqs_disabled());
-
- if (!is_swiotlb_buffer(phys_addr))
- return false;
-
- /*
- * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single.
- * DMA_ATTR_SKIP_CPU_SYNC is optional.
- */
- swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE,
- DMA_ATTR_SKIP_CPU_SYNC);
- return true;
-}
-
-static void
-swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
- int do_panic)
-{
- if (swiotlb_force == SWIOTLB_NO_FORCE)
- return;
-
- /*
- * Ran out of IOMMU space for this operation. This is very bad.
- * Unfortunately the drivers cannot handle this operation properly.
- * unless they check for dma_mapping_error (most don't)
- * When the mapping is small enough return a static buffer to limit
- * the damage, or panic when the transfer is too big.
- */
- dev_err_ratelimited(dev, "DMA: Out of SW-IOMMU space for %zu bytes\n",
- size);
+ /* Oh well, have to allocate and map a bounce buffer. */
+ *phys = swiotlb_tbl_map_single(dev, __phys_to_dma(dev, io_tlb_start),
+ *phys, size, dir, attrs);
+ if (*phys == SWIOTLB_MAP_ERROR)
+ return DIRECT_MAPPING_ERROR;
- if (size <= io_tlb_overflow || !do_panic)
- return;
+ /* Ensure that the address returned is DMA'ble */
+ dma_addr = __phys_to_dma(dev, *phys);
+ if (unlikely(!dma_capable(dev, dma_addr, size))) {
+ swiotlb_tbl_unmap_single(dev, *phys, size, dir,
+ attrs | DMA_ATTR_SKIP_CPU_SYNC);
+ return DIRECT_MAPPING_ERROR;
+ }
- if (dir == DMA_BIDIRECTIONAL)
- panic("DMA: Random memory could be DMA accessed\n");
- if (dir == DMA_FROM_DEVICE)
- panic("DMA: Random memory could be DMA written\n");
- if (dir == DMA_TO_DEVICE)
- panic("DMA: Random memory could be DMA read\n");
+ return dma_addr;
}
/*
enum dma_data_direction dir,
unsigned long attrs)
{
- phys_addr_t map, phys = page_to_phys(page) + offset;
+ phys_addr_t phys = page_to_phys(page) + offset;
dma_addr_t dev_addr = phys_to_dma(dev, phys);
BUG_ON(dir == DMA_NONE);
* we can safely return the device addr and not worry about bounce
* buffering it.
*/
- if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE)
- return dev_addr;
-
- trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
-
- /* Oh well, have to allocate and map a bounce buffer. */
- map = map_single(dev, phys, size, dir, attrs);
- if (map == SWIOTLB_MAP_ERROR) {
- swiotlb_full(dev, size, dir, 1);
- return __phys_to_dma(dev, io_tlb_overflow_buffer);
+ if (!dma_capable(dev, dev_addr, size) ||
+ swiotlb_force == SWIOTLB_FORCE) {
+ trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
+ dev_addr = swiotlb_bounce_page(dev, &phys, size, dir, attrs);
}
- dev_addr = __phys_to_dma(dev, map);
+ if (!dev_is_dma_coherent(dev) &&
+ (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
+ arch_sync_dma_for_device(dev, phys, size, dir);
- /* Ensure that the address returned is DMA'ble */
- if (dma_capable(dev, dev_addr, size))
- return dev_addr;
-
- attrs |= DMA_ATTR_SKIP_CPU_SYNC;
- swiotlb_tbl_unmap_single(dev, map, size, dir, attrs);
-
- return __phys_to_dma(dev, io_tlb_overflow_buffer);
+ return dev_addr;
}
/*
* After this call, reads by the cpu to the buffer are guaranteed to see
* whatever the device wrote there.
*/
-static void unmap_single(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, enum dma_data_direction dir,
- unsigned long attrs)
+void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
+ size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
{
phys_addr_t paddr = dma_to_phys(hwdev, dev_addr);
BUG_ON(dir == DMA_NONE);
+ if (!dev_is_dma_coherent(hwdev) &&
+ (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0)
+ arch_sync_dma_for_cpu(hwdev, paddr, size, dir);
+
if (is_swiotlb_buffer(paddr)) {
swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
return;
dma_mark_clean(phys_to_virt(paddr), size);
}
-void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr,
- size_t size, enum dma_data_direction dir,
- unsigned long attrs)
-{
- unmap_single(hwdev, dev_addr, size, dir, attrs);
-}
-
/*
* Make physical memory consistent for a single streaming mode DMA translation
* after a transfer.
BUG_ON(dir == DMA_NONE);
- if (is_swiotlb_buffer(paddr)) {
+ if (!dev_is_dma_coherent(hwdev) && target == SYNC_FOR_CPU)
+ arch_sync_dma_for_cpu(hwdev, paddr, size, dir);
+
+ if (is_swiotlb_buffer(paddr))
swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target);
- return;
- }
- if (dir != DMA_FROM_DEVICE)
- return;
+ if (!dev_is_dma_coherent(hwdev) && target == SYNC_FOR_DEVICE)
+ arch_sync_dma_for_device(hwdev, paddr, size, dir);
- dma_mark_clean(phys_to_virt(paddr), size);
+ if (!is_swiotlb_buffer(paddr) && dir == DMA_FROM_DEVICE)
+ dma_mark_clean(phys_to_virt(paddr), size);
}
void
* appropriate dma address and length. They are obtained via
* sg_dma_{address,length}(SG).
*
- * NOTE: An implementation may be able to use a smaller number of
- * DMA address/length pairs than there are SG table elements.
- * (for example via virtual mapping capabilities)
- * The routine returns the number of addr/length pairs actually
- * used, at most nents.
- *
* Device ownership issues as mentioned above for swiotlb_map_page are the
* same here.
*/
int
-swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
+swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nelems,
enum dma_data_direction dir, unsigned long attrs)
{
struct scatterlist *sg;
int i;
- BUG_ON(dir == DMA_NONE);
-
for_each_sg(sgl, sg, nelems, i) {
- phys_addr_t paddr = sg_phys(sg);
- dma_addr_t dev_addr = phys_to_dma(hwdev, paddr);
-
- if (swiotlb_force == SWIOTLB_FORCE ||
- !dma_capable(hwdev, dev_addr, sg->length)) {
- phys_addr_t map = map_single(hwdev, sg_phys(sg),
- sg->length, dir, attrs);
- if (map == SWIOTLB_MAP_ERROR) {
- /* Don't panic here, we expect map_sg users
- to do proper error handling. */
- swiotlb_full(hwdev, sg->length, dir, 0);
- attrs |= DMA_ATTR_SKIP_CPU_SYNC;
- swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir,
- attrs);
- sg_dma_len(sgl) = 0;
- return 0;
- }
- sg->dma_address = __phys_to_dma(hwdev, map);
- } else
- sg->dma_address = dev_addr;
+ sg->dma_address = swiotlb_map_page(dev, sg_page(sg), sg->offset,
+ sg->length, dir, attrs);
+ if (sg->dma_address == DIRECT_MAPPING_ERROR)
+ goto out_error;
sg_dma_len(sg) = sg->length;
}
+
return nelems;
+
+out_error:
+ swiotlb_unmap_sg_attrs(dev, sgl, i, dir,
+ attrs | DMA_ATTR_SKIP_CPU_SYNC);
+ sg_dma_len(sgl) = 0;
+ return 0;
}
/*
BUG_ON(dir == DMA_NONE);
for_each_sg(sgl, sg, nelems, i)
- unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir,
+ swiotlb_unmap_page(hwdev, sg->dma_address, sg_dma_len(sg), dir,
attrs);
}
swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE);
}
-int
-swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
-{
- return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer));
-}
-
/*
* Return whether the given device DMA address mask can be supported
* properly. For example, if your device can only drive the low 24-bits
return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
}
-void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
- gfp_t gfp, unsigned long attrs)
-{
- void *vaddr;
-
- /* temporary workaround: */
- if (gfp & __GFP_NOWARN)
- attrs |= DMA_ATTR_NO_WARN;
-
- /*
- * Don't print a warning when the first allocation attempt fails.
- * swiotlb_alloc_coherent() will print a warning when the DMA memory
- * allocation ultimately failed.
- */
- gfp |= __GFP_NOWARN;
-
- vaddr = dma_direct_alloc(dev, size, dma_handle, gfp, attrs);
- if (!vaddr)
- vaddr = swiotlb_alloc_buffer(dev, size, dma_handle, attrs);
- return vaddr;
-}
-
-void swiotlb_free(struct device *dev, size_t size, void *vaddr,
- dma_addr_t dma_addr, unsigned long attrs)
-{
- if (!swiotlb_free_buffer(dev, size, dma_addr))
- dma_direct_free(dev, size, vaddr, dma_addr, attrs);
-}
-
const struct dma_map_ops swiotlb_dma_ops = {
- .mapping_error = swiotlb_dma_mapping_error,
- .alloc = swiotlb_alloc,
- .free = swiotlb_free,
+ .mapping_error = dma_direct_mapping_error,
+ .alloc = dma_direct_alloc,
+ .free = dma_direct_free,
.sync_single_for_cpu = swiotlb_sync_single_for_cpu,
.sync_single_for_device = swiotlb_sync_single_for_device,
.sync_sg_for_cpu = swiotlb_sync_sg_for_cpu,
fmt = "f";
for (pass = false; strspn(fmt,"fnpPFcC"); fmt++, pass = true) {
+ int precision;
if (pass) {
if (buf < end)
*buf = ':';
buf = device_node_gen_full_name(dn, buf, end);
break;
case 'n': /* name */
- buf = string(buf, end, dn->name, str_spec);
+ p = kbasename(of_node_full_name(dn));
+ precision = str_spec.precision;
+ str_spec.precision = strchrnul(p, '@') - p;
+ buf = string(buf, end, p, str_spec);
+ str_spec.precision = precision;
break;
case 'p': /* phandle */
buf = number(buf, end, (unsigned int)dn->phandle, num_spec);
return;
br_multicast_update_query_timer(br, query, max_delay);
- br_multicast_mark_router(br, port);
+
+ /* Based on RFC4541, section 2.1.1 IGMP Forwarding Rules,
+ * the arrival port for IGMP Queries where the source address
+ * is 0.0.0.0 should not be added to router port list.
+ */
+ if ((saddr->proto == htons(ETH_P_IP) && saddr->u.ip4) ||
+ (saddr->proto == htons(ETH_P_IPV6) &&
+ !ipv6_addr_any(&saddr->u.ip6)))
+ br_multicast_mark_router(br, port);
}
static void br_ip4_multicast_query(struct net_bridge *br,
return -EINVAL;
}
- if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
- netdev_rx_csum_fault(skb->dev);
+ if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
+ !skb->csum_complete_sw)
+ netdev_rx_csum_fault(NULL);
}
return 0;
fault:
if (!master_idx)
return false;
- master = netdev_master_upper_dev_get(dev);
+ master = dev ? netdev_master_upper_dev_get(dev) : NULL;
if (!master || master->ifindex != master_idx)
return true;
static bool neigh_ifindex_filtered(struct net_device *dev, int filter_idx)
{
- if (filter_idx && dev->ifindex != filter_idx)
+ if (filter_idx && (!dev || dev->ifindex != filter_idx))
return true;
return false;
int idx;
int s_idx = cb->family;
int type = cb->nlh->nlmsg_type - RTM_BASE;
+ int ret = 0;
if (s_idx == 0)
s_idx = 1;
cb->prev_seq = 0;
cb->seq = 0;
}
- if (dumpit(skb, cb))
+ ret = dumpit(skb, cb);
+ if (ret < 0)
break;
}
cb->family = idx;
- return skb->len;
+ return skb->len ? : ret;
}
struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
if (IS_ERR(net)) {
+ fillargs->netnsid = -1;
NL_SET_ERR_MSG(extack, "ipv4: Invalid target network namespace id");
return PTR_ERR(net);
}
struct net_device *dev;
struct in_device *in_dev;
struct hlist_head *head;
- int err;
+ int err = 0;
s_h = cb->args[0];
s_idx = idx = cb->args[1];
err = inet_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
skb->sk, cb);
if (err < 0)
- return err;
+ goto put_tgt_net;
+ err = 0;
if (fillargs.ifindex) {
dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
- if (!dev)
- return -ENODEV;
+ if (!dev) {
+ err = -ENODEV;
+ goto put_tgt_net;
+ }
in_dev = __in_dev_get_rtnl(dev);
if (in_dev) {
if (fillargs.netnsid >= 0)
put_net(tgt_net);
- return skb->len;
+ return err < 0 ? err : skb->len;
}
static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh,
return -EINVAL;
}
+ filter->dump_all_families = (rtm->rtm_family == AF_UNSPEC);
filter->flags = rtm->rtm_flags;
filter->protocol = rtm->rtm_protocol;
filter->rt_type = rtm->rtm_type;
if (filter.table_id) {
tb = fib_get_table(net, filter.table_id);
if (!tb) {
+ if (filter.dump_all_families)
+ return skb->len;
+
NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist");
return -ENOENT;
}
mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id);
if (!mrt) {
+ if (filter.dump_all_families)
+ return skb->len;
+
NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist");
return -ENOENT;
}
struct net *net = dev_net(skb->dev);
sk = __udp4_lib_lookup(net, iph->daddr, uh->dest,
- iph->saddr, uh->source, skb->dev->ifindex, 0,
- udptable, NULL);
+ iph->saddr, uh->source, skb->dev->ifindex,
+ inet_sdif(skb), udptable, NULL);
if (!sk) {
__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
return; /* No socket for error */
/* Note, we are only interested in != 0 or == 0, thus the
* force to int.
*/
- return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
- inet_compute_pseudo);
+ err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
+ inet_compute_pseudo);
+ if (err)
+ return err;
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) {
+ /* If SW calculated the value, we know it's bad */
+ if (skb->csum_complete_sw)
+ return 1;
+
+ /* HW says the value is bad. Let's validate that.
+ * skb->csum is no longer the full packet checksum,
+ * so don't treat it as such.
+ */
+ skb_checksum_complete_unset(skb);
+ }
+
+ return 0;
}
/* wrapper for udp_queue_rcv_skb tacking care of csum conversion and
fillargs->netnsid = nla_get_s32(tb[i]);
net = rtnl_get_net_ns_capable(sk, fillargs->netnsid);
if (IS_ERR(net)) {
+ fillargs->netnsid = -1;
NL_SET_ERR_MSG_MOD(extack, "Invalid target network namespace id");
return PTR_ERR(net);
}
struct net_device *dev;
struct inet6_dev *idev;
struct hlist_head *head;
+ int err = 0;
s_h = cb->args[0];
s_idx = idx = cb->args[1];
s_ip_idx = cb->args[2];
if (cb->strict_check) {
- int err;
-
err = inet6_valid_dump_ifaddr_req(nlh, &fillargs, &tgt_net,
skb->sk, cb);
if (err < 0)
- return err;
+ goto put_tgt_net;
+ err = 0;
if (fillargs.ifindex) {
dev = __dev_get_by_index(tgt_net, fillargs.ifindex);
- if (!dev)
- return -ENODEV;
+ if (!dev) {
+ err = -ENODEV;
+ goto put_tgt_net;
+ }
idev = __in6_dev_get(dev);
if (idev) {
err = in6_dump_addrs(idev, skb, cb, s_ip_idx,
if (fillargs.netnsid >= 0)
put_net(tgt_net);
- return skb->len;
+ return err < 0 ? err : skb->len;
}
static int inet6_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
* Note, we are only interested in != 0 or == 0, thus the
* force to int.
*/
- return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
- ip6_compute_pseudo);
+ err = (__force int)skb_checksum_init_zero_check(skb, proto, uh->check,
+ ip6_compute_pseudo);
+ if (err)
+ return err;
+
+ if (skb->ip_summed == CHECKSUM_COMPLETE && !skb->csum_valid) {
+ /* If SW calculated the value, we know it's bad */
+ if (skb->csum_complete_sw)
+ return 1;
+
+ /* HW says the value is bad. Let's validate that.
+ * skb->csum is no longer the full packet checksum,
+ * so don't treat is as such.
+ */
+ skb_checksum_complete_unset(skb);
+ }
+
+ return 0;
}
EXPORT_SYMBOL(udp6_csum_init);
if (arg.filter.table_id) {
tb = fib6_get_table(net, arg.filter.table_id);
if (!tb) {
+ if (arg.filter.dump_all_families)
+ return skb->len;
+
NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist");
return -ENOENT;
}
mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id);
if (!mrt) {
+ if (filter.dump_all_families)
+ return skb->len;
+
NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist");
return -ENOENT;
}
return 0;
}
- memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
-
switch (msg->icmph.icmp6_type) {
case NDISC_NEIGHBOUR_SOLICITATION:
+ memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
ndisc_recv_ns(skb);
break;
grt = ip6_nh_lookup_table(net, cfg, gw_addr, tbid, 0);
if (grt) {
if (!grt->dst.error &&
+ /* ignore match if it is the default route */
+ grt->from && !ipv6_addr_any(&grt->from->fib6_dst.addr) &&
(grt->rt6i_flags & flags || dev != grt->dst.dev)) {
NL_SET_ERR_MSG(extack,
"Nexthop has invalid gateway or device mismatch");
struct net *net = dev_net(skb->dev);
sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source,
- inet6_iif(skb), 0, udptable, skb);
+ inet6_iif(skb), inet6_sdif(skb), udptable, skb);
if (!sk) {
__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
ICMP6_MIB_INERRORS);
const struct nla_policy rtm_tca_policy[TCA_MAX + 1] = {
[TCA_KIND] = { .type = NLA_STRING },
- [TCA_OPTIONS] = { .type = NLA_NESTED },
[TCA_RATE] = { .type = NLA_BINARY,
.len = sizeof(struct tc_estimator) },
[TCA_STAB] = { .type = NLA_NESTED },
sock_put(&smc->sk); /* sock_hold in smc_lgr_register_conn() */
}
-/* Unregister connection and trigger lgr freeing if applicable
+/* Unregister connection from lgr
*/
static void smc_lgr_unregister_conn(struct smc_connection *conn)
{
struct smc_link_group *lgr = conn->lgr;
- int reduced = 0;
write_lock_bh(&lgr->conns_lock);
if (conn->alert_token_local) {
- reduced = 1;
__smc_lgr_unregister_conn(conn);
}
write_unlock_bh(&lgr->conns_lock);
- if (!reduced || lgr->conns_num)
- return;
- smc_lgr_schedule_free_work(lgr);
}
/* Send delete link, either as client to request the initiation
return rc;
}
-static void smc_buf_unuse(struct smc_connection *conn)
+static void smc_buf_unuse(struct smc_connection *conn,
+ struct smc_link_group *lgr)
{
if (conn->sndbuf_desc)
conn->sndbuf_desc->used = 0;
conn->rmb_desc->used = 0;
} else {
/* buf registration failed, reuse not possible */
- struct smc_link_group *lgr = conn->lgr;
-
write_lock_bh(&lgr->rmbs_lock);
list_del(&conn->rmb_desc->list);
write_unlock_bh(&lgr->rmbs_lock);
/* remove a finished connection from its link group */
void smc_conn_free(struct smc_connection *conn)
{
- if (!conn->lgr)
+ struct smc_link_group *lgr = conn->lgr;
+
+ if (!lgr)
return;
- if (conn->lgr->is_smcd) {
+ if (lgr->is_smcd) {
smc_ism_unset_conn(conn);
tasklet_kill(&conn->rx_tsklet);
} else {
smc_cdc_tx_dismiss_slots(conn);
}
- smc_lgr_unregister_conn(conn);
- smc_buf_unuse(conn);
+ smc_lgr_unregister_conn(conn); /* unsets conn->lgr */
+ smc_buf_unuse(conn, lgr); /* allow buffer reuse */
+
+ if (!lgr->conns_num)
+ smc_lgr_schedule_free_work(lgr);
}
static void smc_link_clear(struct smc_link *lnk)
static unsigned int auth_hashbits = RPC_CREDCACHE_DEFAULT_HASHBITS;
-static DEFINE_SPINLOCK(rpc_authflavor_lock);
-static const struct rpc_authops *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
- &authnull_ops, /* AUTH_NULL */
- &authunix_ops, /* AUTH_UNIX */
+static const struct rpc_authops __rcu *auth_flavors[RPC_AUTH_MAXFLAVOR] = {
+ [RPC_AUTH_NULL] = (const struct rpc_authops __force __rcu *)&authnull_ops,
+ [RPC_AUTH_UNIX] = (const struct rpc_authops __force __rcu *)&authunix_ops,
NULL, /* others can be loadable modules */
};
int
rpcauth_register(const struct rpc_authops *ops)
{
+ const struct rpc_authops *old;
rpc_authflavor_t flavor;
- int ret = -EPERM;
if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR)
return -EINVAL;
- spin_lock(&rpc_authflavor_lock);
- if (auth_flavors[flavor] == NULL) {
- auth_flavors[flavor] = ops;
- ret = 0;
- }
- spin_unlock(&rpc_authflavor_lock);
- return ret;
+ old = cmpxchg((const struct rpc_authops ** __force)&auth_flavors[flavor], NULL, ops);
+ if (old == NULL || old == ops)
+ return 0;
+ return -EPERM;
}
EXPORT_SYMBOL_GPL(rpcauth_register);
int
rpcauth_unregister(const struct rpc_authops *ops)
{
+ const struct rpc_authops *old;
rpc_authflavor_t flavor;
- int ret = -EPERM;
if ((flavor = ops->au_flavor) >= RPC_AUTH_MAXFLAVOR)
return -EINVAL;
- spin_lock(&rpc_authflavor_lock);
- if (auth_flavors[flavor] == ops) {
- auth_flavors[flavor] = NULL;
- ret = 0;
- }
- spin_unlock(&rpc_authflavor_lock);
- return ret;
+
+ old = cmpxchg((const struct rpc_authops ** __force)&auth_flavors[flavor], ops, NULL);
+ if (old == ops || old == NULL)
+ return 0;
+ return -EPERM;
}
EXPORT_SYMBOL_GPL(rpcauth_unregister);
+static const struct rpc_authops *
+rpcauth_get_authops(rpc_authflavor_t flavor)
+{
+ const struct rpc_authops *ops;
+
+ if (flavor >= RPC_AUTH_MAXFLAVOR)
+ return NULL;
+
+ rcu_read_lock();
+ ops = rcu_dereference(auth_flavors[flavor]);
+ if (ops == NULL) {
+ rcu_read_unlock();
+ request_module("rpc-auth-%u", flavor);
+ rcu_read_lock();
+ ops = rcu_dereference(auth_flavors[flavor]);
+ if (ops == NULL)
+ goto out;
+ }
+ if (!try_module_get(ops->owner))
+ ops = NULL;
+out:
+ rcu_read_unlock();
+ return ops;
+}
+
+static void
+rpcauth_put_authops(const struct rpc_authops *ops)
+{
+ module_put(ops->owner);
+}
+
/**
* rpcauth_get_pseudoflavor - check if security flavor is supported
* @flavor: a security flavor
rpc_authflavor_t
rpcauth_get_pseudoflavor(rpc_authflavor_t flavor, struct rpcsec_gss_info *info)
{
- const struct rpc_authops *ops;
+ const struct rpc_authops *ops = rpcauth_get_authops(flavor);
rpc_authflavor_t pseudoflavor;
- ops = auth_flavors[flavor];
- if (ops == NULL)
- request_module("rpc-auth-%u", flavor);
- spin_lock(&rpc_authflavor_lock);
- ops = auth_flavors[flavor];
- if (ops == NULL || !try_module_get(ops->owner)) {
- spin_unlock(&rpc_authflavor_lock);
+ if (!ops)
return RPC_AUTH_MAXFLAVOR;
- }
- spin_unlock(&rpc_authflavor_lock);
-
pseudoflavor = flavor;
if (ops->info2flavor != NULL)
pseudoflavor = ops->info2flavor(info);
- module_put(ops->owner);
+ rpcauth_put_authops(ops);
return pseudoflavor;
}
EXPORT_SYMBOL_GPL(rpcauth_get_pseudoflavor);
const struct rpc_authops *ops;
int result;
- if (flavor >= RPC_AUTH_MAXFLAVOR)
- return -EINVAL;
-
- ops = auth_flavors[flavor];
+ ops = rpcauth_get_authops(flavor);
if (ops == NULL)
- request_module("rpc-auth-%u", flavor);
- spin_lock(&rpc_authflavor_lock);
- ops = auth_flavors[flavor];
- if (ops == NULL || !try_module_get(ops->owner)) {
- spin_unlock(&rpc_authflavor_lock);
return -ENOENT;
- }
- spin_unlock(&rpc_authflavor_lock);
result = -ENOENT;
if (ops->flavor2info != NULL)
result = ops->flavor2info(pseudoflavor, info);
- module_put(ops->owner);
+ rpcauth_put_authops(ops);
return result;
}
EXPORT_SYMBOL_GPL(rpcauth_get_gssinfo);
int
rpcauth_list_flavors(rpc_authflavor_t *array, int size)
{
- rpc_authflavor_t flavor;
- int result = 0;
+ const struct rpc_authops *ops;
+ rpc_authflavor_t flavor, pseudos[4];
+ int i, len, result = 0;
- spin_lock(&rpc_authflavor_lock);
+ rcu_read_lock();
for (flavor = 0; flavor < RPC_AUTH_MAXFLAVOR; flavor++) {
- const struct rpc_authops *ops = auth_flavors[flavor];
- rpc_authflavor_t pseudos[4];
- int i, len;
-
+ ops = rcu_dereference(auth_flavors[flavor]);
if (result >= size) {
result = -ENOMEM;
break;
array[result++] = pseudos[i];
}
}
- spin_unlock(&rpc_authflavor_lock);
+ rcu_read_unlock();
dprintk("RPC: %s returns %d\n", __func__, result);
return result;
struct rpc_auth *
rpcauth_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
{
- struct rpc_auth *auth;
+ struct rpc_auth *auth = ERR_PTR(-EINVAL);
const struct rpc_authops *ops;
- u32 flavor = pseudoflavor_to_flavor(args->pseudoflavor);
+ u32 flavor = pseudoflavor_to_flavor(args->pseudoflavor);
- auth = ERR_PTR(-EINVAL);
- if (flavor >= RPC_AUTH_MAXFLAVOR)
+ ops = rpcauth_get_authops(flavor);
+ if (ops == NULL)
goto out;
- if ((ops = auth_flavors[flavor]) == NULL)
- request_module("rpc-auth-%u", flavor);
- spin_lock(&rpc_authflavor_lock);
- ops = auth_flavors[flavor];
- if (ops == NULL || !try_module_get(ops->owner)) {
- spin_unlock(&rpc_authflavor_lock);
- goto out;
- }
- spin_unlock(&rpc_authflavor_lock);
auth = ops->create(args, clnt);
- module_put(ops->owner);
+
+ rpcauth_put_authops(ops);
if (IS_ERR(auth))
return auth;
if (clnt->cl_auth)
void
rpcauth_release(struct rpc_auth *auth)
{
- if (!atomic_dec_and_test(&auth->au_count))
+ if (!refcount_dec_and_test(&auth->au_count))
return;
auth->au_ops->destroy(auth);
}
static DEFINE_SPINLOCK(rpc_credcache_lock);
-static void
+/*
+ * On success, the caller is responsible for freeing the reference
+ * held by the hashtable
+ */
+static bool
rpcauth_unhash_cred_locked(struct rpc_cred *cred)
{
+ if (!test_and_clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags))
+ return false;
hlist_del_rcu(&cred->cr_hash);
- smp_mb__before_atomic();
- clear_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags);
+ return true;
}
-static int
+static bool
rpcauth_unhash_cred(struct rpc_cred *cred)
{
spinlock_t *cache_lock;
- int ret;
+ bool ret;
+ if (!test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags))
+ return false;
cache_lock = &cred->cr_auth->au_credcache->lock;
spin_lock(cache_lock);
- ret = atomic_read(&cred->cr_count) == 0;
- if (ret)
- rpcauth_unhash_cred_locked(cred);
+ ret = rpcauth_unhash_cred_locked(cred);
spin_unlock(cache_lock);
return ret;
}
}
}
+static void
+rpcauth_lru_add_locked(struct rpc_cred *cred)
+{
+ if (!list_empty(&cred->cr_lru))
+ return;
+ number_cred_unused++;
+ list_add_tail(&cred->cr_lru, &cred_unused);
+}
+
+static void
+rpcauth_lru_add(struct rpc_cred *cred)
+{
+ if (!list_empty(&cred->cr_lru))
+ return;
+ spin_lock(&rpc_credcache_lock);
+ rpcauth_lru_add_locked(cred);
+ spin_unlock(&rpc_credcache_lock);
+}
+
+static void
+rpcauth_lru_remove_locked(struct rpc_cred *cred)
+{
+ if (list_empty(&cred->cr_lru))
+ return;
+ number_cred_unused--;
+ list_del_init(&cred->cr_lru);
+}
+
+static void
+rpcauth_lru_remove(struct rpc_cred *cred)
+{
+ if (list_empty(&cred->cr_lru))
+ return;
+ spin_lock(&rpc_credcache_lock);
+ rpcauth_lru_remove_locked(cred);
+ spin_unlock(&rpc_credcache_lock);
+}
+
/*
* Clear the RPC credential cache, and delete those credentials
* that are not referenced.
head = &cache->hashtable[i];
while (!hlist_empty(head)) {
cred = hlist_entry(head->first, struct rpc_cred, cr_hash);
- get_rpccred(cred);
- if (!list_empty(&cred->cr_lru)) {
- list_del(&cred->cr_lru);
- number_cred_unused--;
- }
- list_add_tail(&cred->cr_lru, &free);
rpcauth_unhash_cred_locked(cred);
+ /* Note: We now hold a reference to cred */
+ rpcauth_lru_remove_locked(cred);
+ list_add_tail(&cred->cr_lru, &free);
}
}
spin_unlock(&cache->lock);
static long
rpcauth_prune_expired(struct list_head *free, int nr_to_scan)
{
- spinlock_t *cache_lock;
struct rpc_cred *cred, *next;
unsigned long expired = jiffies - RPC_AUTH_EXPIRY_MORATORIUM;
long freed = 0;
if (nr_to_scan-- == 0)
break;
+ if (refcount_read(&cred->cr_count) > 1) {
+ rpcauth_lru_remove_locked(cred);
+ continue;
+ }
/*
* Enforce a 60 second garbage collection moratorium
* Note that the cred_unused list must be time-ordered.
*/
- if (time_in_range(cred->cr_expire, expired, jiffies) &&
- test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) {
- freed = SHRINK_STOP;
- break;
- }
-
- list_del_init(&cred->cr_lru);
- number_cred_unused--;
- freed++;
- if (atomic_read(&cred->cr_count) != 0)
+ if (!time_in_range(cred->cr_expire, expired, jiffies))
+ continue;
+ if (!rpcauth_unhash_cred(cred))
continue;
- cache_lock = &cred->cr_auth->au_credcache->lock;
- spin_lock(cache_lock);
- if (atomic_read(&cred->cr_count) == 0) {
- get_rpccred(cred);
- list_add_tail(&cred->cr_lru, free);
- rpcauth_unhash_cred_locked(cred);
- }
- spin_unlock(cache_lock);
+ rpcauth_lru_remove_locked(cred);
+ freed++;
+ list_add_tail(&cred->cr_lru, free);
}
- return freed;
+ return freed ? freed : SHRINK_STOP;
}
static unsigned long
if (!entry->cr_ops->crmatch(acred, entry, flags))
continue;
if (flags & RPCAUTH_LOOKUP_RCU) {
- if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) &&
- !test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags))
- cred = entry;
+ if (test_bit(RPCAUTH_CRED_NEW, &entry->cr_flags) ||
+ refcount_read(&entry->cr_count) == 0)
+ continue;
+ cred = entry;
break;
}
- spin_lock(&cache->lock);
- if (test_bit(RPCAUTH_CRED_HASHED, &entry->cr_flags) == 0) {
- spin_unlock(&cache->lock);
- continue;
- }
cred = get_rpccred(entry);
- spin_unlock(&cache->lock);
- break;
+ if (cred)
+ break;
}
rcu_read_unlock();
if (!entry->cr_ops->crmatch(acred, entry, flags))
continue;
cred = get_rpccred(entry);
- break;
+ if (cred)
+ break;
}
if (cred == NULL) {
cred = new;
set_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags);
+ refcount_inc(&cred->cr_count);
hlist_add_head_rcu(&cred->cr_hash, &cache->hashtable[nr]);
} else
list_add_tail(&new->cr_lru, &free);
{
INIT_HLIST_NODE(&cred->cr_hash);
INIT_LIST_HEAD(&cred->cr_lru);
- atomic_set(&cred->cr_count, 1);
+ refcount_set(&cred->cr_count, 1);
cred->cr_auth = auth;
cred->cr_ops = ops;
cred->cr_expire = jiffies;
{
if (cred == NULL)
return;
- /* Fast path for unhashed credentials */
- if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) == 0) {
- if (atomic_dec_and_test(&cred->cr_count))
- cred->cr_ops->crdestroy(cred);
- return;
- }
-
- if (!atomic_dec_and_lock(&cred->cr_count, &rpc_credcache_lock))
- return;
- if (!list_empty(&cred->cr_lru)) {
- number_cred_unused--;
- list_del_init(&cred->cr_lru);
- }
- if (test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags) != 0) {
- if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) {
- cred->cr_expire = jiffies;
- list_add_tail(&cred->cr_lru, &cred_unused);
- number_cred_unused++;
- goto out_nodestroy;
- }
- if (!rpcauth_unhash_cred(cred)) {
- /* We were hashed and someone looked us up... */
- goto out_nodestroy;
- }
+ rcu_read_lock();
+ if (refcount_dec_and_test(&cred->cr_count))
+ goto destroy;
+ if (refcount_read(&cred->cr_count) != 1 ||
+ !test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags))
+ goto out;
+ if (test_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags) != 0) {
+ cred->cr_expire = jiffies;
+ rpcauth_lru_add(cred);
+ /* Race breaker */
+ if (unlikely(!test_bit(RPCAUTH_CRED_HASHED, &cred->cr_flags)))
+ rpcauth_lru_remove(cred);
+ } else if (rpcauth_unhash_cred(cred)) {
+ rpcauth_lru_remove(cred);
+ if (refcount_dec_and_test(&cred->cr_count))
+ goto destroy;
}
- spin_unlock(&rpc_credcache_lock);
- cred->cr_ops->crdestroy(cred);
+out:
+ rcu_read_unlock();
return;
-out_nodestroy:
- spin_unlock(&rpc_credcache_lock);
+destroy:
+ rcu_read_unlock();
+ cred->cr_ops->crdestroy(cred);
}
EXPORT_SYMBOL_GPL(put_rpccred);
return rpcauth_unwrap_req_decode(decode, rqstp, data, obj);
}
+bool
+rpcauth_xmit_need_reencode(struct rpc_task *task)
+{
+ struct rpc_cred *cred = task->tk_rqstp->rq_cred;
+
+ if (!cred || !cred->cr_ops->crneed_reencode)
+ return false;
+ return cred->cr_ops->crneed_reencode(task);
+}
+
int
rpcauth_refreshcred(struct rpc_task *task)
{
static struct rpc_auth generic_auth = {
.au_ops = &generic_auth_ops,
- .au_count = ATOMIC_INIT(0),
+ .au_count = REFCOUNT_INIT(1),
};
static bool generic_key_to_expire(struct rpc_cred *cred)
auth->au_flavor = flavor;
if (gss_pseudoflavor_to_datatouch(gss_auth->mech, flavor))
auth->au_flags |= RPCAUTH_AUTH_DATATOUCH;
- atomic_set(&auth->au_count, 1);
+ refcount_set(&auth->au_count, 1);
kref_init(&gss_auth->kref);
err = rpcauth_init_credcache(auth);
if (strcmp(gss_auth->target_name, args->target_name))
continue;
}
- if (!atomic_inc_not_zero(&gss_auth->rpc_auth.au_count))
+ if (!refcount_inc_not_zero(&gss_auth->rpc_auth.au_count))
continue;
goto out;
}
return decode(rqstp, &xdr, obj);
}
+static bool
+gss_seq_is_newer(u32 new, u32 old)
+{
+ return (s32)(new - old) > 0;
+}
+
+static bool
+gss_xmit_need_reencode(struct rpc_task *task)
+{
+ struct rpc_rqst *req = task->tk_rqstp;
+ struct rpc_cred *cred = req->rq_cred;
+ struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
+ u32 win, seq_xmit;
+ bool ret = true;
+
+ if (!ctx)
+ return true;
+
+ if (gss_seq_is_newer(req->rq_seqno, READ_ONCE(ctx->gc_seq)))
+ goto out;
+
+ seq_xmit = READ_ONCE(ctx->gc_seq_xmit);
+ while (gss_seq_is_newer(req->rq_seqno, seq_xmit)) {
+ u32 tmp = seq_xmit;
+
+ seq_xmit = cmpxchg(&ctx->gc_seq_xmit, tmp, req->rq_seqno);
+ if (seq_xmit == tmp) {
+ ret = false;
+ goto out;
+ }
+ }
+
+ win = ctx->gc_win;
+ if (win > 0)
+ ret = !gss_seq_is_newer(req->rq_seqno, seq_xmit - win);
+out:
+ gss_put_ctx(ctx);
+ return ret;
+}
+
static int
gss_unwrap_resp(struct rpc_task *task,
kxdrdproc_t decode, void *rqstp, __be32 *p, void *obj)
.crunwrap_resp = gss_unwrap_resp,
.crkey_timeout = gss_key_timeout,
.crstringify_acceptor = gss_stringify_acceptor,
+ .crneed_reencode = gss_xmit_need_reencode,
};
static const struct rpc_credops gss_nullops = {
#include <linux/sunrpc/gss_krb5.h>
#include <linux/random.h>
#include <linux/crypto.h>
+#include <linux/atomic.h>
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
-DEFINE_SPINLOCK(krb5_seq_lock);
-
static void *
setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token)
{
return krb5_hdr;
}
+u32
+gss_seq_send_fetch_and_inc(struct krb5_ctx *ctx)
+{
+ u32 old, seq_send = READ_ONCE(ctx->seq_send);
+
+ do {
+ old = seq_send;
+ seq_send = cmpxchg(&ctx->seq_send, old, old + 1);
+ } while (old != seq_send);
+ return seq_send;
+}
+
+u64
+gss_seq_send64_fetch_and_inc(struct krb5_ctx *ctx)
+{
+ u64 old, seq_send = READ_ONCE(ctx->seq_send);
+
+ do {
+ old = seq_send;
+ seq_send = cmpxchg64(&ctx->seq_send64, old, old + 1);
+ } while (old != seq_send);
+ return seq_send;
+}
+
static u32
gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
struct xdr_netobj *token)
memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
- spin_lock(&krb5_seq_lock);
- seq_send = ctx->seq_send++;
- spin_unlock(&krb5_seq_lock);
+ seq_send = gss_seq_send_fetch_and_inc(ctx);
if (krb5_make_seq_num(ctx, ctx->seq, ctx->initiate ? 0 : 0xff,
seq_send, ptr + GSS_KRB5_TOK_HDR_LEN, ptr + 8))
.data = cksumdata};
void *krb5_hdr;
s32 now;
- u64 seq_send;
u8 *cksumkey;
unsigned int cksum_usage;
__be64 seq_send_be64;
/* Set up the sequence number. Now 64-bits in clear
* text and w/o direction indicator */
- spin_lock(&krb5_seq_lock);
- seq_send = ctx->seq_send64++;
- spin_unlock(&krb5_seq_lock);
-
- seq_send_be64 = cpu_to_be64(seq_send);
+ seq_send_be64 = cpu_to_be64(gss_seq_send64_fetch_and_inc(ctx));
memcpy(krb5_hdr + 8, (char *) &seq_send_be64, 8);
if (ctx->initiate) {
memcpy(ptr + GSS_KRB5_TOK_HDR_LEN, md5cksum.data, md5cksum.len);
- spin_lock(&krb5_seq_lock);
- seq_send = kctx->seq_send++;
- spin_unlock(&krb5_seq_lock);
+ seq_send = gss_seq_send_fetch_and_inc(kctx);
/* XXX would probably be more efficient to compute checksum
* and encrypt at the same time: */
*be16ptr++ = 0;
be64ptr = (__be64 *)be16ptr;
- spin_lock(&krb5_seq_lock);
- *be64ptr = cpu_to_be64(kctx->seq_send64++);
- spin_unlock(&krb5_seq_lock);
+ *be64ptr = cpu_to_be64(gss_seq_send64_fetch_and_inc(kctx));
err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, pages);
if (err)
if (status)
return status;
spin_lock(®istered_mechs_lock);
- list_add(&gm->gm_list, ®istered_mechs);
+ list_add_rcu(&gm->gm_list, ®istered_mechs);
spin_unlock(®istered_mechs_lock);
dprintk("RPC: registered gss mechanism %s\n", gm->gm_name);
return 0;
void gss_mech_unregister(struct gss_api_mech *gm)
{
spin_lock(®istered_mechs_lock);
- list_del(&gm->gm_list);
+ list_del_rcu(&gm->gm_list);
spin_unlock(®istered_mechs_lock);
dprintk("RPC: unregistered gss mechanism %s\n", gm->gm_name);
gss_mech_free(gm);
{
struct gss_api_mech *pos, *gm = NULL;
- spin_lock(®istered_mechs_lock);
- list_for_each_entry(pos, ®istered_mechs, gm_list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(pos, ®istered_mechs, gm_list) {
if (0 == strcmp(name, pos->gm_name)) {
if (try_module_get(pos->gm_owner))
gm = pos;
break;
}
}
- spin_unlock(®istered_mechs_lock);
+ rcu_read_unlock();
return gm;
}
dprintk("RPC: %s(%s)\n", __func__, buf);
request_module("rpc-auth-gss-%s", buf);
- spin_lock(®istered_mechs_lock);
- list_for_each_entry(pos, ®istered_mechs, gm_list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(pos, ®istered_mechs, gm_list) {
if (obj->len == pos->gm_oid.len) {
if (0 == memcmp(obj->data, pos->gm_oid.data, obj->len)) {
if (try_module_get(pos->gm_owner))
}
}
}
- spin_unlock(®istered_mechs_lock);
+ rcu_read_unlock();
return gm;
}
{
struct gss_api_mech *gm = NULL, *pos;
- spin_lock(®istered_mechs_lock);
- list_for_each_entry(pos, ®istered_mechs, gm_list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(pos, ®istered_mechs, gm_list) {
if (!mech_supports_pseudoflavor(pos, pseudoflavor))
continue;
if (try_module_get(pos->gm_owner))
gm = pos;
break;
}
- spin_unlock(®istered_mechs_lock);
+ rcu_read_unlock();
return gm;
}
struct gss_api_mech *pos = NULL;
int j, i = 0;
- spin_lock(®istered_mechs_lock);
- list_for_each_entry(pos, ®istered_mechs, gm_list) {
+ rcu_read_lock();
+ list_for_each_entry_rcu(pos, ®istered_mechs, gm_list) {
for (j = 0; j < pos->gm_pf_num; j++) {
if (i >= size) {
spin_unlock(®istered_mechs_lock);
array_ptr[i++] = pos->gm_pfs[j].pseudoflavor;
}
}
- spin_unlock(®istered_mechs_lock);
+ rcu_read_unlock();
return i;
}
xdr_inline_pages(&req->rq_rcv_buf,
PAGE_SIZE/2 /* pretty arbitrary */,
arg->pages, 0 /* page base */, arg->npages * PAGE_SIZE);
+ req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES;
done:
if (err)
dprintk("RPC: gssx_enc_accept_sec_context: %d\n", err);
static struct rpc_auth *
nul_create(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
{
- atomic_inc(&null_auth.au_count);
+ refcount_inc(&null_auth.au_count);
return &null_auth;
}
.au_flags = RPCAUTH_AUTH_NO_CRKEY_TIMEOUT,
.au_ops = &authnull_ops,
.au_flavor = RPC_AUTH_NULL,
- .au_count = ATOMIC_INIT(0),
+ .au_count = REFCOUNT_INIT(1),
};
static
.cr_lru = LIST_HEAD_INIT(null_cred.cr_lru),
.cr_auth = &null_auth,
.cr_ops = &null_credops,
- .cr_count = ATOMIC_INIT(1),
+ .cr_count = REFCOUNT_INIT(2),
.cr_flags = 1UL << RPCAUTH_CRED_UPTODATE,
};
{
dprintk("RPC: creating UNIX authenticator for client %p\n",
clnt);
- atomic_inc(&unix_auth.au_count);
+ refcount_inc(&unix_auth.au_count);
return &unix_auth;
}
.au_flags = RPCAUTH_AUTH_NO_CRKEY_TIMEOUT,
.au_ops = &authunix_ops,
.au_flavor = RPC_AUTH_UNIX,
- .au_count = ATOMIC_INIT(0),
+ .au_count = REFCOUNT_INIT(1),
};
static
return NULL;
req->rq_xprt = xprt;
- INIT_LIST_HEAD(&req->rq_list);
INIT_LIST_HEAD(&req->rq_bc_list);
/* Preallocate one XDR receive buffer */
static void call_reserve(struct rpc_task *task);
static void call_reserveresult(struct rpc_task *task);
static void call_allocate(struct rpc_task *task);
+static void call_encode(struct rpc_task *task);
static void call_decode(struct rpc_task *task);
static void call_bind(struct rpc_task *task);
static void call_bind_status(struct rpc_task *task);
struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
{
struct rpc_task *task;
- struct xdr_buf *xbufp = &req->rq_snd_buf;
struct rpc_task_setup task_setup_data = {
.callback_ops = &rpc_default_ops,
- .flags = RPC_TASK_SOFTCONN,
+ .flags = RPC_TASK_SOFTCONN |
+ RPC_TASK_NO_RETRANS_TIMEOUT,
};
dprintk("RPC: rpc_run_bc_task req= %p\n", req);
* Create an rpc_task to send the data
*/
task = rpc_new_task(&task_setup_data);
- task->tk_rqstp = req;
-
- /*
- * Set up the xdr_buf length.
- * This also indicates that the buffer is XDR encoded already.
- */
- xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
- xbufp->tail[0].iov_len;
+ xprt_init_bc_request(req, task);
task->tk_action = call_bc_transmit;
atomic_inc(&task->tk_count);
task->tk_status = 0;
if (status >= 0) {
if (task->tk_rqstp) {
- xprt_request_init(task);
task->tk_action = call_refresh;
return;
}
dprint_status(task);
task->tk_status = 0;
- task->tk_action = call_bind;
+ task->tk_action = call_encode;
if (req->rq_buffer)
return;
rpc_exit(task, -ERESTARTSYS);
}
-static inline int
+static int
rpc_task_need_encode(struct rpc_task *task)
{
- return task->tk_rqstp->rq_snd_buf.len == 0;
+ return test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate) == 0 &&
+ (!(task->tk_flags & RPC_TASK_SENT) ||
+ !(task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) ||
+ xprt_request_need_retransmit(task));
}
-static inline void
-rpc_task_force_reencode(struct rpc_task *task)
-{
- task->tk_rqstp->rq_snd_buf.len = 0;
- task->tk_rqstp->rq_bytes_sent = 0;
-}
-
-/*
- * 3. Encode arguments of an RPC call
- */
static void
rpc_xdr_encode(struct rpc_task *task)
{
xdr_buf_init(&req->rq_rcv_buf,
req->rq_rbuffer,
req->rq_rcvsize);
+ req->rq_bytes_sent = 0;
p = rpc_encode_header(task);
if (p == NULL) {
task->tk_status = rpcauth_wrap_req(task, encode, req, p,
task->tk_msg.rpc_argp);
+ if (task->tk_status == 0)
+ xprt_request_prepare(req);
+}
+
+/*
+ * 3. Encode arguments of an RPC call
+ */
+static void
+call_encode(struct rpc_task *task)
+{
+ if (!rpc_task_need_encode(task))
+ goto out;
+ /* Encode here so that rpcsec_gss can use correct sequence number. */
+ rpc_xdr_encode(task);
+ /* Did the encode result in an error condition? */
+ if (task->tk_status != 0) {
+ /* Was the error nonfatal? */
+ if (task->tk_status == -EAGAIN || task->tk_status == -ENOMEM)
+ rpc_delay(task, HZ >> 4);
+ else
+ rpc_exit(task, task->tk_status);
+ return;
+ }
+
+ /* Add task to reply queue before transmission to avoid races */
+ if (rpc_reply_expected(task))
+ xprt_request_enqueue_receive(task);
+ xprt_request_enqueue_transmit(task);
+out:
+ task->tk_action = call_bind;
}
/*
static void
call_transmit(struct rpc_task *task)
{
- int is_retrans = RPC_WAS_SENT(task);
-
dprint_status(task);
- task->tk_action = call_status;
- if (task->tk_status < 0)
- return;
- if (!xprt_prepare_transmit(task))
- return;
- task->tk_action = call_transmit_status;
- /* Encode here so that rpcsec_gss can use correct sequence number. */
- if (rpc_task_need_encode(task)) {
- rpc_xdr_encode(task);
- /* Did the encode result in an error condition? */
- if (task->tk_status != 0) {
- /* Was the error nonfatal? */
- if (task->tk_status == -EAGAIN)
- rpc_delay(task, HZ >> 4);
- else
- rpc_exit(task, task->tk_status);
+ task->tk_status = 0;
+ if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate)) {
+ if (!xprt_prepare_transmit(task))
return;
- }
+ xprt_transmit(task);
}
- xprt_transmit(task);
- if (task->tk_status < 0)
- return;
- if (is_retrans)
- task->tk_client->cl_stats->rpcretrans++;
- /*
- * On success, ensure that we call xprt_end_transmit() before sleeping
- * in order to allow access to the socket to other RPC requests.
- */
- call_transmit_status(task);
- if (rpc_reply_expected(task))
- return;
- task->tk_action = rpc_exit_task;
- rpc_wake_up_queued_task(&task->tk_rqstp->rq_xprt->pending, task);
+ task->tk_action = call_transmit_status;
+ xprt_end_transmit(task);
}
/*
* test first.
*/
if (task->tk_status == 0) {
- xprt_end_transmit(task);
- rpc_task_force_reencode(task);
+ xprt_request_wait_receive(task);
return;
}
switch (task->tk_status) {
- case -EAGAIN:
- case -ENOBUFS:
- break;
default:
dprint_status(task);
- xprt_end_transmit(task);
- rpc_task_force_reencode(task);
+ break;
+ case -EBADMSG:
+ task->tk_status = 0;
+ task->tk_action = call_encode;
break;
/*
* Special cases: if we've been waiting on the
* socket just returned a connection error,
* then hold onto the transport lock.
*/
+ case -ENOBUFS:
+ rpc_delay(task, HZ>>2);
+ /* fall through */
+ case -EBADSLT:
+ case -EAGAIN:
+ task->tk_action = call_transmit;
+ task->tk_status = 0;
+ break;
case -ECONNREFUSED:
case -EHOSTDOWN:
case -ENETDOWN:
case -ENETUNREACH:
case -EPERM:
if (RPC_IS_SOFTCONN(task)) {
- xprt_end_transmit(task);
if (!task->tk_msg.rpc_proc->p_proc)
trace_xprt_ping(task->tk_xprt,
task->tk_status);
case -EADDRINUSE:
case -ENOTCONN:
case -EPIPE:
- rpc_task_force_reencode(task);
+ break;
}
}
{
struct rpc_rqst *req = task->tk_rqstp;
+ if (rpc_task_need_encode(task))
+ xprt_request_enqueue_transmit(task);
+ if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+ goto out_wakeup;
+
if (!xprt_prepare_transmit(task))
goto out_retry;
"error: %d\n", task->tk_status);
goto out_done;
}
- if (req->rq_connect_cookie != req->rq_xprt->connect_cookie)
- req->rq_bytes_sent = 0;
xprt_transmit(task);
- if (task->tk_status == -EAGAIN)
- goto out_nospace;
-
xprt_end_transmit(task);
dprint_status(task);
switch (task->tk_status) {
case -ENOTCONN:
case -EPIPE:
break;
+ case -EAGAIN:
+ goto out_retry;
case -ETIMEDOUT:
/*
* Problem reaching the server. Disconnect and let the
"error: %d\n", task->tk_status);
break;
}
+out_wakeup:
rpc_wake_up_queued_task(&req->rq_xprt->pending, task);
out_done:
task->tk_action = rpc_exit_task;
return;
-out_nospace:
- req->rq_connect_cookie = req->rq_xprt->connect_cookie;
out_retry:
task->tk_status = 0;
}
call_status(struct rpc_task *task)
{
struct rpc_clnt *clnt = task->tk_client;
- struct rpc_rqst *req = task->tk_rqstp;
int status;
if (!task->tk_msg.rpc_proc->p_proc)
trace_xprt_ping(task->tk_xprt, task->tk_status);
- if (req->rq_reply_bytes_recvd > 0 && !req->rq_bytes_sent)
- task->tk_status = req->rq_reply_bytes_recvd;
-
dprint_status(task);
status = task->tk_status;
/* fall through */
case -EPIPE:
case -ENOTCONN:
- task->tk_action = call_bind;
- break;
- case -ENOBUFS:
- rpc_delay(task, HZ>>2);
- /* fall through */
case -EAGAIN:
- task->tk_action = call_transmit;
+ task->tk_action = call_encode;
break;
case -EIO:
/* shutdown or soft timeout */
rpcauth_invalcred(task);
retry:
- task->tk_action = call_bind;
+ task->tk_action = call_encode;
task->tk_status = 0;
}
dprint_status(task);
+ if (!decode) {
+ task->tk_action = rpc_exit_task;
+ return;
+ }
+
if (task->tk_flags & RPC_CALL_MAJORSEEN) {
if (clnt->cl_chatty) {
printk(KERN_NOTICE "%s: server %s OK\n",
if (req->rq_rcv_buf.len < 12) {
if (!RPC_IS_SOFT(task)) {
- task->tk_action = call_bind;
+ task->tk_action = call_encode;
goto out_retry;
}
dprintk("RPC: %s: too small RPC reply size (%d bytes)\n",
goto out_retry;
return;
}
-
task->tk_action = rpc_exit_task;
- if (decode) {
- task->tk_status = rpcauth_unwrap_resp(task, decode, req, p,
- task->tk_msg.rpc_resp);
- }
+ task->tk_status = rpcauth_unwrap_resp(task, decode, req, p,
+ task->tk_msg.rpc_resp);
+
dprintk("RPC: %5u call_decode result %d\n", task->tk_pid,
task->tk_status);
return;
task->tk_garb_retry--;
dprintk("RPC: %5u %s: retry garbled creds\n",
task->tk_pid, __func__);
- task->tk_action = call_bind;
+ task->tk_action = call_encode;
goto out_retry;
case RPC_AUTH_TOOWEAK:
printk(KERN_NOTICE "RPC: server %s requires stronger "
task->tk_garb_retry--;
dprintk("RPC: %5u %s: retrying\n",
task->tk_pid, __func__);
- task->tk_action = call_bind;
+ task->tk_action = call_encode;
out_retry:
return ERR_PTR(-EAGAIN);
}
list_add(&task->u.tk_wait.timer_list, &queue->timer_list.list);
}
-static void rpc_rotate_queue_owner(struct rpc_wait_queue *queue)
-{
- struct list_head *q = &queue->tasks[queue->priority];
- struct rpc_task *task;
-
- if (!list_empty(q)) {
- task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
- if (task->tk_owner == queue->owner)
- list_move_tail(&task->u.tk_wait.list, q);
- }
-}
-
static void rpc_set_waitqueue_priority(struct rpc_wait_queue *queue, int priority)
{
if (queue->priority != priority) {
- /* Fairness: rotate the list when changing priority */
- rpc_rotate_queue_owner(queue);
queue->priority = priority;
+ queue->nr = 1U << priority;
}
}
-static void rpc_set_waitqueue_owner(struct rpc_wait_queue *queue, pid_t pid)
-{
- queue->owner = pid;
- queue->nr = RPC_BATCH_COUNT;
-}
-
static void rpc_reset_waitqueue_priority(struct rpc_wait_queue *queue)
{
rpc_set_waitqueue_priority(queue, queue->maxpriority);
- rpc_set_waitqueue_owner(queue, 0);
}
/*
- * Add new request to a priority queue.
+ * Add a request to a queue list
*/
-static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
- struct rpc_task *task,
- unsigned char queue_priority)
+static void
+__rpc_list_enqueue_task(struct list_head *q, struct rpc_task *task)
{
- struct list_head *q;
struct rpc_task *t;
- INIT_LIST_HEAD(&task->u.tk_wait.links);
- if (unlikely(queue_priority > queue->maxpriority))
- queue_priority = queue->maxpriority;
- if (queue_priority > queue->priority)
- rpc_set_waitqueue_priority(queue, queue_priority);
- q = &queue->tasks[queue_priority];
list_for_each_entry(t, q, u.tk_wait.list) {
if (t->tk_owner == task->tk_owner) {
- list_add_tail(&task->u.tk_wait.list, &t->u.tk_wait.links);
+ list_add_tail(&task->u.tk_wait.links,
+ &t->u.tk_wait.links);
+ /* Cache the queue head in task->u.tk_wait.list */
+ task->u.tk_wait.list.next = q;
+ task->u.tk_wait.list.prev = NULL;
return;
}
}
+ INIT_LIST_HEAD(&task->u.tk_wait.links);
list_add_tail(&task->u.tk_wait.list, q);
}
+/*
+ * Remove request from a queue list
+ */
+static void
+__rpc_list_dequeue_task(struct rpc_task *task)
+{
+ struct list_head *q;
+ struct rpc_task *t;
+
+ if (task->u.tk_wait.list.prev == NULL) {
+ list_del(&task->u.tk_wait.links);
+ return;
+ }
+ if (!list_empty(&task->u.tk_wait.links)) {
+ t = list_first_entry(&task->u.tk_wait.links,
+ struct rpc_task,
+ u.tk_wait.links);
+ /* Assume __rpc_list_enqueue_task() cached the queue head */
+ q = t->u.tk_wait.list.next;
+ list_add_tail(&t->u.tk_wait.list, q);
+ list_del(&task->u.tk_wait.links);
+ }
+ list_del(&task->u.tk_wait.list);
+}
+
+/*
+ * Add new request to a priority queue.
+ */
+static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
+ struct rpc_task *task,
+ unsigned char queue_priority)
+{
+ if (unlikely(queue_priority > queue->maxpriority))
+ queue_priority = queue->maxpriority;
+ __rpc_list_enqueue_task(&queue->tasks[queue_priority], task);
+}
+
/*
* Add new request to wait queue.
*
*/
static void __rpc_remove_wait_queue_priority(struct rpc_task *task)
{
- struct rpc_task *t;
-
- if (!list_empty(&task->u.tk_wait.links)) {
- t = list_entry(task->u.tk_wait.links.next, struct rpc_task, u.tk_wait.list);
- list_move(&t->u.tk_wait.list, &task->u.tk_wait.list);
- list_splice_init(&task->u.tk_wait.links, &t->u.tk_wait.links);
- }
+ __rpc_list_dequeue_task(task);
}
/*
__rpc_disable_timer(queue, task);
if (RPC_IS_PRIORITY(queue))
__rpc_remove_wait_queue_priority(task);
- list_del(&task->u.tk_wait.list);
+ else
+ list_del(&task->u.tk_wait.list);
queue->qlen--;
dprintk("RPC: %5u removed from queue %p \"%s\"\n",
task->tk_pid, queue, rpc_qname(queue));
/*
* Wake up a queued task while the queue lock is being held
*/
-static void rpc_wake_up_task_on_wq_queue_locked(struct workqueue_struct *wq,
- struct rpc_wait_queue *queue, struct rpc_task *task)
+static struct rpc_task *
+rpc_wake_up_task_on_wq_queue_action_locked(struct workqueue_struct *wq,
+ struct rpc_wait_queue *queue, struct rpc_task *task,
+ bool (*action)(struct rpc_task *, void *), void *data)
{
if (RPC_IS_QUEUED(task)) {
smp_rmb();
- if (task->tk_waitqueue == queue)
- __rpc_do_wake_up_task_on_wq(wq, queue, task);
+ if (task->tk_waitqueue == queue) {
+ if (action == NULL || action(task, data)) {
+ __rpc_do_wake_up_task_on_wq(wq, queue, task);
+ return task;
+ }
+ }
}
+ return NULL;
+}
+
+static void
+rpc_wake_up_task_on_wq_queue_locked(struct workqueue_struct *wq,
+ struct rpc_wait_queue *queue, struct rpc_task *task)
+{
+ rpc_wake_up_task_on_wq_queue_action_locked(wq, queue, task, NULL, NULL);
}
/*
struct rpc_wait_queue *queue,
struct rpc_task *task)
{
+ if (!RPC_IS_QUEUED(task))
+ return;
spin_lock_bh(&queue->lock);
rpc_wake_up_task_on_wq_queue_locked(wq, queue, task);
spin_unlock_bh(&queue->lock);
*/
void rpc_wake_up_queued_task(struct rpc_wait_queue *queue, struct rpc_task *task)
{
+ if (!RPC_IS_QUEUED(task))
+ return;
spin_lock_bh(&queue->lock);
rpc_wake_up_task_queue_locked(queue, task);
spin_unlock_bh(&queue->lock);
}
EXPORT_SYMBOL_GPL(rpc_wake_up_queued_task);
+static bool rpc_task_action_set_status(struct rpc_task *task, void *status)
+{
+ task->tk_status = *(int *)status;
+ return true;
+}
+
+static void
+rpc_wake_up_task_queue_set_status_locked(struct rpc_wait_queue *queue,
+ struct rpc_task *task, int status)
+{
+ rpc_wake_up_task_on_wq_queue_action_locked(rpciod_workqueue, queue,
+ task, rpc_task_action_set_status, &status);
+}
+
+/**
+ * rpc_wake_up_queued_task_set_status - wake up a task and set task->tk_status
+ * @queue: pointer to rpc_wait_queue
+ * @task: pointer to rpc_task
+ * @status: integer error value
+ *
+ * If @task is queued on @queue, then it is woken up, and @task->tk_status is
+ * set to the value of @status.
+ */
+void
+rpc_wake_up_queued_task_set_status(struct rpc_wait_queue *queue,
+ struct rpc_task *task, int status)
+{
+ if (!RPC_IS_QUEUED(task))
+ return;
+ spin_lock_bh(&queue->lock);
+ rpc_wake_up_task_queue_set_status_locked(queue, task, status);
+ spin_unlock_bh(&queue->lock);
+}
+
/*
* Wake up the next task on a priority queue.
*/
* Service a batch of tasks from a single owner.
*/
q = &queue->tasks[queue->priority];
- if (!list_empty(q)) {
- task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
- if (queue->owner == task->tk_owner) {
- if (--queue->nr)
- goto out;
- list_move_tail(&task->u.tk_wait.list, q);
- }
- /*
- * Check if we need to switch queues.
- */
- goto new_owner;
+ if (!list_empty(q) && --queue->nr) {
+ task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
+ goto out;
}
/*
else
q = q - 1;
if (!list_empty(q)) {
- task = list_entry(q->next, struct rpc_task, u.tk_wait.list);
+ task = list_first_entry(q, struct rpc_task, u.tk_wait.list);
goto new_queue;
}
} while (q != &queue->tasks[queue->priority]);
new_queue:
rpc_set_waitqueue_priority(queue, (unsigned int)(q - &queue->tasks[0]));
-new_owner:
- rpc_set_waitqueue_owner(queue, task->tk_owner);
out:
return task;
}
queue, rpc_qname(queue));
spin_lock_bh(&queue->lock);
task = __rpc_find_next_queued(queue);
- if (task != NULL) {
- if (func(task, data))
- rpc_wake_up_task_on_wq_queue_locked(wq, queue, task);
- else
- task = NULL;
- }
+ if (task != NULL)
+ task = rpc_wake_up_task_on_wq_queue_action_locked(wq, queue,
+ task, func, data);
spin_unlock_bh(&queue->lock);
return task;
* Possibly called several times to iterate over an sk_buff and copy
* data out of it.
*/
-size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
+static size_t
+xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
{
if (len > desc->count)
len = desc->count;
desc->offset += len;
return len;
}
-EXPORT_SYMBOL_GPL(xdr_skb_read_bits);
/**
* xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer
* @copy_actor: virtual method for copying data
*
*/
-ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb_reader *desc, xdr_skb_read_actor copy_actor)
+static ssize_t
+xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb_reader *desc, xdr_skb_read_actor copy_actor)
{
struct page **ppage = xdr->pages;
unsigned int len, pglen = xdr->page_len;
/* ACL likes to be lazy in allocating pages - ACLs
* are small by default but can get huge. */
- if (unlikely(*ppage == NULL)) {
+ if ((xdr->flags & XDRBUF_SPARSE_PAGES) && *ppage == NULL) {
*ppage = alloc_page(GFP_ATOMIC);
if (unlikely(*ppage == NULL)) {
if (copied == 0)
out:
return copied;
}
-EXPORT_SYMBOL_GPL(xdr_partial_copy_from_skb);
/**
* csum_partial_copy_to_xdr - checksum and copy data
mutex_init(&xprt->xpt_mutex);
spin_lock_init(&xprt->xpt_lock);
set_bit(XPT_BUSY, &xprt->xpt_flags);
- rpc_init_wait_queue(&xprt->xpt_bc_pending, "xpt_bc_pending");
xprt->xpt_net = get_net(net);
strcpy(xprt->xpt_remotebuf, "uninitialized");
}
else
len = xprt->xpt_ops->xpo_sendto(rqstp);
mutex_unlock(&xprt->xpt_mutex);
- rpc_wake_up(&xprt->xpt_bc_pending);
trace_svc_send(rqstp, len);
svc_xprt_release(rqstp);
if (!bc_xprt)
return -EAGAIN;
- spin_lock(&bc_xprt->recv_lock);
+ spin_lock(&bc_xprt->queue_lock);
req = xprt_lookup_rqst(bc_xprt, xid);
if (!req)
goto unlock_notfound;
memcpy(dst->iov_base, src->iov_base, src->iov_len);
xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len);
rqstp->rq_arg.len = 0;
- spin_unlock(&bc_xprt->recv_lock);
+ spin_unlock(&bc_xprt->queue_lock);
return 0;
unlock_notfound:
printk(KERN_NOTICE
__func__, ntohl(calldir),
bc_xprt, ntohl(xid));
unlock_eagain:
- spin_unlock(&bc_xprt->recv_lock);
+ spin_unlock(&bc_xprt->queue_lock);
return -EAGAIN;
}
#include <linux/errno.h>
#include <linux/sunrpc/xdr.h>
#include <linux/sunrpc/msg_prot.h>
+#include <linux/bvec.h>
/*
* XDR functions for basic NFS types
}
EXPORT_SYMBOL_GPL(xdr_terminate_string);
+size_t
+xdr_buf_pagecount(struct xdr_buf *buf)
+{
+ if (!buf->page_len)
+ return 0;
+ return (buf->page_base + buf->page_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
+}
+
+int
+xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp)
+{
+ size_t i, n = xdr_buf_pagecount(buf);
+
+ if (n != 0 && buf->bvec == NULL) {
+ buf->bvec = kmalloc_array(n, sizeof(buf->bvec[0]), gfp);
+ if (!buf->bvec)
+ return -ENOMEM;
+ for (i = 0; i < n; i++) {
+ buf->bvec[i].bv_page = buf->pages[i];
+ buf->bvec[i].bv_len = PAGE_SIZE;
+ buf->bvec[i].bv_offset = 0;
+ }
+ }
+ return 0;
+}
+
+void
+xdr_free_bvec(struct xdr_buf *buf)
+{
+ kfree(buf->bvec);
+ buf->bvec = NULL;
+}
+
void
xdr_inline_pages(struct xdr_buf *xdr, unsigned int offset,
struct page **pages, unsigned int base, unsigned int len)
static void xprt_init(struct rpc_xprt *xprt, struct net *net);
static __be32 xprt_alloc_xid(struct rpc_xprt *xprt);
static void xprt_connect_status(struct rpc_task *task);
-static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
-static void __xprt_put_cong(struct rpc_xprt *, struct rpc_rqst *);
static void xprt_destroy(struct rpc_xprt *xprt);
static DEFINE_SPINLOCK(xprt_list_lock);
}
EXPORT_SYMBOL_GPL(xprt_load_transport);
+static void xprt_clear_locked(struct rpc_xprt *xprt)
+{
+ xprt->snd_task = NULL;
+ if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
+ smp_mb__before_atomic();
+ clear_bit(XPRT_LOCKED, &xprt->state);
+ smp_mb__after_atomic();
+ } else
+ queue_work(xprtiod_workqueue, &xprt->task_cleanup);
+}
+
/**
* xprt_reserve_xprt - serialize write access to transports
* @task: task that is requesting access to the transport
int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
- int priority;
if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
if (task == xprt->snd_task)
return 1;
goto out_sleep;
}
+ if (test_bit(XPRT_WRITE_SPACE, &xprt->state))
+ goto out_unlock;
xprt->snd_task = task;
- if (req != NULL)
- req->rq_ntrans++;
return 1;
+out_unlock:
+ xprt_clear_locked(xprt);
out_sleep:
dprintk("RPC: %5u failed to lock transport %p\n",
task->tk_pid, xprt);
- task->tk_timeout = 0;
+ task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0;
task->tk_status = -EAGAIN;
- if (req == NULL)
- priority = RPC_PRIORITY_LOW;
- else if (!req->rq_ntrans)
- priority = RPC_PRIORITY_NORMAL;
- else
- priority = RPC_PRIORITY_HIGH;
- rpc_sleep_on_priority(&xprt->sending, task, NULL, priority);
+ rpc_sleep_on(&xprt->sending, task, NULL);
return 0;
}
EXPORT_SYMBOL_GPL(xprt_reserve_xprt);
-static void xprt_clear_locked(struct rpc_xprt *xprt)
+static bool
+xprt_need_congestion_window_wait(struct rpc_xprt *xprt)
{
- xprt->snd_task = NULL;
- if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
- smp_mb__before_atomic();
- clear_bit(XPRT_LOCKED, &xprt->state);
- smp_mb__after_atomic();
- } else
- queue_work(xprtiod_workqueue, &xprt->task_cleanup);
+ return test_bit(XPRT_CWND_WAIT, &xprt->state);
+}
+
+static void
+xprt_set_congestion_window_wait(struct rpc_xprt *xprt)
+{
+ if (!list_empty(&xprt->xmit_queue)) {
+ /* Peek at head of queue to see if it can make progress */
+ if (list_first_entry(&xprt->xmit_queue, struct rpc_rqst,
+ rq_xmit)->rq_cong)
+ return;
+ }
+ set_bit(XPRT_CWND_WAIT, &xprt->state);
+}
+
+static void
+xprt_test_and_clear_congestion_window_wait(struct rpc_xprt *xprt)
+{
+ if (!RPCXPRT_CONGESTED(xprt))
+ clear_bit(XPRT_CWND_WAIT, &xprt->state);
}
/*
* Same as xprt_reserve_xprt, but Van Jacobson congestion control is
* integrated into the decision of whether a request is allowed to be
* woken up and given access to the transport.
+ * Note that the lock is only granted if we know there are free slots.
*/
int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
{
struct rpc_rqst *req = task->tk_rqstp;
- int priority;
if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
if (task == xprt->snd_task)
xprt->snd_task = task;
return 1;
}
- if (__xprt_get_cong(xprt, task)) {
+ if (test_bit(XPRT_WRITE_SPACE, &xprt->state))
+ goto out_unlock;
+ if (!xprt_need_congestion_window_wait(xprt)) {
xprt->snd_task = task;
- req->rq_ntrans++;
return 1;
}
+out_unlock:
xprt_clear_locked(xprt);
out_sleep:
- if (req)
- __xprt_put_cong(xprt, req);
dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt);
- task->tk_timeout = 0;
+ task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0;
task->tk_status = -EAGAIN;
- if (req == NULL)
- priority = RPC_PRIORITY_LOW;
- else if (!req->rq_ntrans)
- priority = RPC_PRIORITY_NORMAL;
- else
- priority = RPC_PRIORITY_HIGH;
- rpc_sleep_on_priority(&xprt->sending, task, NULL, priority);
+ rpc_sleep_on(&xprt->sending, task, NULL);
return 0;
}
EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong);
{
int retval;
+ if (test_bit(XPRT_LOCKED, &xprt->state) && xprt->snd_task == task)
+ return 1;
spin_lock_bh(&xprt->transport_lock);
retval = xprt->ops->reserve_xprt(xprt, task);
spin_unlock_bh(&xprt->transport_lock);
static bool __xprt_lock_write_func(struct rpc_task *task, void *data)
{
struct rpc_xprt *xprt = data;
- struct rpc_rqst *req;
- req = task->tk_rqstp;
xprt->snd_task = task;
- if (req)
- req->rq_ntrans++;
return true;
}
{
if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
return;
-
+ if (test_bit(XPRT_WRITE_SPACE, &xprt->state))
+ goto out_unlock;
if (rpc_wake_up_first_on_wq(xprtiod_workqueue, &xprt->sending,
__xprt_lock_write_func, xprt))
return;
+out_unlock:
xprt_clear_locked(xprt);
}
-static bool __xprt_lock_write_cong_func(struct rpc_task *task, void *data)
-{
- struct rpc_xprt *xprt = data;
- struct rpc_rqst *req;
-
- req = task->tk_rqstp;
- if (req == NULL) {
- xprt->snd_task = task;
- return true;
- }
- if (__xprt_get_cong(xprt, task)) {
- xprt->snd_task = task;
- req->rq_ntrans++;
- return true;
- }
- return false;
-}
-
static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
{
if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
return;
- if (RPCXPRT_CONGESTED(xprt))
+ if (test_bit(XPRT_WRITE_SPACE, &xprt->state))
+ goto out_unlock;
+ if (xprt_need_congestion_window_wait(xprt))
goto out_unlock;
if (rpc_wake_up_first_on_wq(xprtiod_workqueue, &xprt->sending,
- __xprt_lock_write_cong_func, xprt))
+ __xprt_lock_write_func, xprt))
return;
out_unlock:
xprt_clear_locked(xprt);
}
-static void xprt_task_clear_bytes_sent(struct rpc_task *task)
-{
- if (task != NULL) {
- struct rpc_rqst *req = task->tk_rqstp;
- if (req != NULL)
- req->rq_bytes_sent = 0;
- }
-}
-
/**
* xprt_release_xprt - allow other requests to use a transport
* @xprt: transport with other tasks potentially waiting
void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
{
if (xprt->snd_task == task) {
- xprt_task_clear_bytes_sent(task);
xprt_clear_locked(xprt);
__xprt_lock_write_next(xprt);
}
void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
{
if (xprt->snd_task == task) {
- xprt_task_clear_bytes_sent(task);
xprt_clear_locked(xprt);
__xprt_lock_write_next_cong(xprt);
}
static inline void xprt_release_write(struct rpc_xprt *xprt, struct rpc_task *task)
{
+ if (xprt->snd_task != task)
+ return;
spin_lock_bh(&xprt->transport_lock);
xprt->ops->release_xprt(xprt, task);
spin_unlock_bh(&xprt->transport_lock);
* overflowed. Put the task to sleep if this is the case.
*/
static int
-__xprt_get_cong(struct rpc_xprt *xprt, struct rpc_task *task)
+__xprt_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
{
- struct rpc_rqst *req = task->tk_rqstp;
-
if (req->rq_cong)
return 1;
dprintk("RPC: %5u xprt_cwnd_limited cong = %lu cwnd = %lu\n",
- task->tk_pid, xprt->cong, xprt->cwnd);
- if (RPCXPRT_CONGESTED(xprt))
+ req->rq_task->tk_pid, xprt->cong, xprt->cwnd);
+ if (RPCXPRT_CONGESTED(xprt)) {
+ xprt_set_congestion_window_wait(xprt);
return 0;
+ }
req->rq_cong = 1;
xprt->cong += RPC_CWNDSCALE;
return 1;
return;
req->rq_cong = 0;
xprt->cong -= RPC_CWNDSCALE;
+ xprt_test_and_clear_congestion_window_wait(xprt);
__xprt_lock_write_next_cong(xprt);
}
+/**
+ * xprt_request_get_cong - Request congestion control credits
+ * @xprt: pointer to transport
+ * @req: pointer to RPC request
+ *
+ * Useful for transports that require congestion control.
+ */
+bool
+xprt_request_get_cong(struct rpc_xprt *xprt, struct rpc_rqst *req)
+{
+ bool ret = false;
+
+ if (req->rq_cong)
+ return true;
+ spin_lock_bh(&xprt->transport_lock);
+ ret = __xprt_get_cong(xprt, req) != 0;
+ spin_unlock_bh(&xprt->transport_lock);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(xprt_request_get_cong);
+
/**
* xprt_release_rqst_cong - housekeeping when request is complete
* @task: RPC request that recently completed
}
EXPORT_SYMBOL_GPL(xprt_release_rqst_cong);
+/*
+ * Clear the congestion window wait flag and wake up the next
+ * entry on xprt->sending
+ */
+static void
+xprt_clear_congestion_window_wait(struct rpc_xprt *xprt)
+{
+ if (test_and_clear_bit(XPRT_CWND_WAIT, &xprt->state)) {
+ spin_lock_bh(&xprt->transport_lock);
+ __xprt_lock_write_next_cong(xprt);
+ spin_unlock_bh(&xprt->transport_lock);
+ }
+}
+
/**
* xprt_adjust_cwnd - adjust transport congestion window
* @xprt: pointer to xprt
/**
* xprt_wait_for_buffer_space - wait for transport output buffer to clear
- * @task: task to be put to sleep
- * @action: function pointer to be executed after wait
+ * @xprt: transport
*
* Note that we only set the timer for the case of RPC_IS_SOFT(), since
* we don't in general want to force a socket disconnection due to
* an incomplete RPC call transmission.
*/
-void xprt_wait_for_buffer_space(struct rpc_task *task, rpc_action action)
+void xprt_wait_for_buffer_space(struct rpc_xprt *xprt)
{
- struct rpc_rqst *req = task->tk_rqstp;
- struct rpc_xprt *xprt = req->rq_xprt;
-
- task->tk_timeout = RPC_IS_SOFT(task) ? req->rq_timeout : 0;
- rpc_sleep_on(&xprt->pending, task, action);
+ set_bit(XPRT_WRITE_SPACE, &xprt->state);
}
EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space);
+static bool
+xprt_clear_write_space_locked(struct rpc_xprt *xprt)
+{
+ if (test_and_clear_bit(XPRT_WRITE_SPACE, &xprt->state)) {
+ __xprt_lock_write_next(xprt);
+ dprintk("RPC: write space: waking waiting task on "
+ "xprt %p\n", xprt);
+ return true;
+ }
+ return false;
+}
+
/**
* xprt_write_space - wake the task waiting for transport output buffer space
* @xprt: transport with waiting tasks
*
* Can be called in a soft IRQ context, so xprt_write_space never sleeps.
*/
-void xprt_write_space(struct rpc_xprt *xprt)
+bool xprt_write_space(struct rpc_xprt *xprt)
{
+ bool ret;
+
+ if (!test_bit(XPRT_WRITE_SPACE, &xprt->state))
+ return false;
spin_lock_bh(&xprt->transport_lock);
- if (xprt->snd_task) {
- dprintk("RPC: write space: waking waiting task on "
- "xprt %p\n", xprt);
- rpc_wake_up_queued_task_on_wq(xprtiod_workqueue,
- &xprt->pending, xprt->snd_task);
- }
+ ret = xprt_clear_write_space_locked(xprt);
spin_unlock_bh(&xprt->transport_lock);
+ return ret;
}
EXPORT_SYMBOL_GPL(xprt_write_space);
dprintk("RPC: disconnected transport %p\n", xprt);
spin_lock_bh(&xprt->transport_lock);
xprt_clear_connected(xprt);
+ xprt_clear_write_space_locked(xprt);
xprt_wake_pending_tasks(xprt, -EAGAIN);
spin_unlock_bh(&xprt->transport_lock);
}
}
EXPORT_SYMBOL_GPL(xprt_force_disconnect);
+static unsigned int
+xprt_connect_cookie(struct rpc_xprt *xprt)
+{
+ return READ_ONCE(xprt->connect_cookie);
+}
+
+static bool
+xprt_request_retransmit_after_disconnect(struct rpc_task *task)
+{
+ struct rpc_rqst *req = task->tk_rqstp;
+ struct rpc_xprt *xprt = req->rq_xprt;
+
+ return req->rq_connect_cookie != xprt_connect_cookie(xprt) ||
+ !xprt_connected(xprt);
+}
+
/**
* xprt_conditional_disconnect - force a transport to disconnect
* @xprt: transport to disconnect
xprt_schedule_autodisconnect(struct rpc_xprt *xprt)
__must_hold(&xprt->transport_lock)
{
- if (list_empty(&xprt->recv) && xprt_has_timer(xprt))
+ if (RB_EMPTY_ROOT(&xprt->recv_queue) && xprt_has_timer(xprt))
mod_timer(&xprt->timer, xprt->last_used + xprt->idle_timeout);
}
struct rpc_xprt *xprt = from_timer(xprt, t, timer);
spin_lock(&xprt->transport_lock);
- if (!list_empty(&xprt->recv))
+ if (!RB_EMPTY_ROOT(&xprt->recv_queue))
goto out_abort;
/* Reset xprt->last_used to avoid connect/autodisconnect cycling */
xprt->last_used = jiffies;
goto out;
if (xprt->snd_task != task)
goto out;
- xprt_task_clear_bytes_sent(task);
xprt->snd_task = cookie;
ret = true;
out:
xprt->ops->close(xprt);
if (!xprt_connected(xprt)) {
- task->tk_rqstp->rq_bytes_sent = 0;
task->tk_timeout = task->tk_rqstp->rq_timeout;
task->tk_rqstp->rq_connect_cookie = xprt->connect_cookie;
rpc_sleep_on(&xprt->pending, task, xprt_connect_status);
static void xprt_connect_status(struct rpc_task *task)
{
- struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
-
- if (task->tk_status == 0) {
- xprt->stat.connect_count++;
- xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start;
+ switch (task->tk_status) {
+ case 0:
dprintk("RPC: %5u xprt_connect_status: connection established\n",
task->tk_pid);
- return;
- }
-
- switch (task->tk_status) {
+ break;
case -ECONNREFUSED:
case -ECONNRESET:
case -ECONNABORTED:
default:
dprintk("RPC: %5u xprt_connect_status: error %d connecting to "
"server %s\n", task->tk_pid, -task->tk_status,
- xprt->servername);
+ task->tk_rqstp->rq_xprt->servername);
task->tk_status = -EIO;
}
}
+enum xprt_xid_rb_cmp {
+ XID_RB_EQUAL,
+ XID_RB_LEFT,
+ XID_RB_RIGHT,
+};
+static enum xprt_xid_rb_cmp
+xprt_xid_cmp(__be32 xid1, __be32 xid2)
+{
+ if (xid1 == xid2)
+ return XID_RB_EQUAL;
+ if ((__force u32)xid1 < (__force u32)xid2)
+ return XID_RB_LEFT;
+ return XID_RB_RIGHT;
+}
+
+static struct rpc_rqst *
+xprt_request_rb_find(struct rpc_xprt *xprt, __be32 xid)
+{
+ struct rb_node *n = xprt->recv_queue.rb_node;
+ struct rpc_rqst *req;
+
+ while (n != NULL) {
+ req = rb_entry(n, struct rpc_rqst, rq_recv);
+ switch (xprt_xid_cmp(xid, req->rq_xid)) {
+ case XID_RB_LEFT:
+ n = n->rb_left;
+ break;
+ case XID_RB_RIGHT:
+ n = n->rb_right;
+ break;
+ case XID_RB_EQUAL:
+ return req;
+ }
+ }
+ return NULL;
+}
+
+static void
+xprt_request_rb_insert(struct rpc_xprt *xprt, struct rpc_rqst *new)
+{
+ struct rb_node **p = &xprt->recv_queue.rb_node;
+ struct rb_node *n = NULL;
+ struct rpc_rqst *req;
+
+ while (*p != NULL) {
+ n = *p;
+ req = rb_entry(n, struct rpc_rqst, rq_recv);
+ switch(xprt_xid_cmp(new->rq_xid, req->rq_xid)) {
+ case XID_RB_LEFT:
+ p = &n->rb_left;
+ break;
+ case XID_RB_RIGHT:
+ p = &n->rb_right;
+ break;
+ case XID_RB_EQUAL:
+ WARN_ON_ONCE(new != req);
+ return;
+ }
+ }
+ rb_link_node(&new->rq_recv, n, p);
+ rb_insert_color(&new->rq_recv, &xprt->recv_queue);
+}
+
+static void
+xprt_request_rb_remove(struct rpc_xprt *xprt, struct rpc_rqst *req)
+{
+ rb_erase(&req->rq_recv, &xprt->recv_queue);
+}
+
/**
* xprt_lookup_rqst - find an RPC request corresponding to an XID
* @xprt: transport on which the original request was transmitted
* @xid: RPC XID of incoming reply
*
- * Caller holds xprt->recv_lock.
+ * Caller holds xprt->queue_lock.
*/
struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid)
{
struct rpc_rqst *entry;
- list_for_each_entry(entry, &xprt->recv, rq_list)
- if (entry->rq_xid == xid) {
- trace_xprt_lookup_rqst(xprt, xid, 0);
- entry->rq_rtt = ktime_sub(ktime_get(), entry->rq_xtime);
- return entry;
- }
+ entry = xprt_request_rb_find(xprt, xid);
+ if (entry != NULL) {
+ trace_xprt_lookup_rqst(xprt, xid, 0);
+ entry->rq_rtt = ktime_sub(ktime_get(), entry->rq_xtime);
+ return entry;
+ }
dprintk("RPC: xprt_lookup_rqst did not find xid %08x\n",
ntohl(xid));
}
EXPORT_SYMBOL_GPL(xprt_lookup_rqst);
+static bool
+xprt_is_pinned_rqst(struct rpc_rqst *req)
+{
+ return atomic_read(&req->rq_pin) != 0;
+}
+
/**
* xprt_pin_rqst - Pin a request on the transport receive list
* @req: Request to pin
*
* Caller must ensure this is atomic with the call to xprt_lookup_rqst()
- * so should be holding the xprt transport lock.
+ * so should be holding the xprt receive lock.
*/
void xprt_pin_rqst(struct rpc_rqst *req)
{
- set_bit(RPC_TASK_MSG_RECV, &req->rq_task->tk_runstate);
+ atomic_inc(&req->rq_pin);
}
EXPORT_SYMBOL_GPL(xprt_pin_rqst);
* xprt_unpin_rqst - Unpin a request on the transport receive list
* @req: Request to pin
*
- * Caller should be holding the xprt transport lock.
+ * Caller should be holding the xprt receive lock.
*/
void xprt_unpin_rqst(struct rpc_rqst *req)
{
- struct rpc_task *task = req->rq_task;
-
- clear_bit(RPC_TASK_MSG_RECV, &task->tk_runstate);
- if (test_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate))
- wake_up_bit(&task->tk_runstate, RPC_TASK_MSG_RECV);
+ if (!test_bit(RPC_TASK_MSG_PIN_WAIT, &req->rq_task->tk_runstate)) {
+ atomic_dec(&req->rq_pin);
+ return;
+ }
+ if (atomic_dec_and_test(&req->rq_pin))
+ wake_up_var(&req->rq_pin);
}
EXPORT_SYMBOL_GPL(xprt_unpin_rqst);
static void xprt_wait_on_pinned_rqst(struct rpc_rqst *req)
-__must_hold(&req->rq_xprt->recv_lock)
{
- struct rpc_task *task = req->rq_task;
+ wait_var_event(&req->rq_pin, !xprt_is_pinned_rqst(req));
+}
- if (task && test_bit(RPC_TASK_MSG_RECV, &task->tk_runstate)) {
- spin_unlock(&req->rq_xprt->recv_lock);
- set_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate);
- wait_on_bit(&task->tk_runstate, RPC_TASK_MSG_RECV,
- TASK_UNINTERRUPTIBLE);
- clear_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate);
- spin_lock(&req->rq_xprt->recv_lock);
- }
+static bool
+xprt_request_data_received(struct rpc_task *task)
+{
+ return !test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) &&
+ READ_ONCE(task->tk_rqstp->rq_reply_bytes_recvd) != 0;
+}
+
+static bool
+xprt_request_need_enqueue_receive(struct rpc_task *task, struct rpc_rqst *req)
+{
+ return !test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) &&
+ READ_ONCE(task->tk_rqstp->rq_reply_bytes_recvd) == 0;
+}
+
+/**
+ * xprt_request_enqueue_receive - Add an request to the receive queue
+ * @task: RPC task
+ *
+ */
+void
+xprt_request_enqueue_receive(struct rpc_task *task)
+{
+ struct rpc_rqst *req = task->tk_rqstp;
+ struct rpc_xprt *xprt = req->rq_xprt;
+
+ if (!xprt_request_need_enqueue_receive(task, req))
+ return;
+ spin_lock(&xprt->queue_lock);
+
+ /* Update the softirq receive buffer */
+ memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
+ sizeof(req->rq_private_buf));
+
+ /* Add request to the receive list */
+ xprt_request_rb_insert(xprt, req);
+ set_bit(RPC_TASK_NEED_RECV, &task->tk_runstate);
+ spin_unlock(&xprt->queue_lock);
+
+ xprt_reset_majortimeo(req);
+ /* Turn off autodisconnect */
+ del_singleshot_timer_sync(&xprt->timer);
+}
+
+/**
+ * xprt_request_dequeue_receive_locked - Remove a request from the receive queue
+ * @task: RPC task
+ *
+ * Caller must hold xprt->queue_lock.
+ */
+static void
+xprt_request_dequeue_receive_locked(struct rpc_task *task)
+{
+ struct rpc_rqst *req = task->tk_rqstp;
+
+ if (test_and_clear_bit(RPC_TASK_NEED_RECV, &task->tk_runstate))
+ xprt_request_rb_remove(req->rq_xprt, req);
}
/**
* xprt_update_rtt - Update RPC RTT statistics
* @task: RPC request that recently completed
*
- * Caller holds xprt->recv_lock.
+ * Caller holds xprt->queue_lock.
*/
void xprt_update_rtt(struct rpc_task *task)
{
* @task: RPC request that recently completed
* @copied: actual number of bytes received from the transport
*
- * Caller holds xprt->recv_lock.
+ * Caller holds xprt->queue_lock.
*/
void xprt_complete_rqst(struct rpc_task *task, int copied)
{
xprt->stat.recvs++;
- list_del_init(&req->rq_list);
req->rq_private_buf.len = copied;
/* Ensure all writes are done before we update */
/* req->rq_reply_bytes_recvd */
smp_wmb();
req->rq_reply_bytes_recvd = copied;
+ xprt_request_dequeue_receive_locked(task);
rpc_wake_up_queued_task(&xprt->pending, task);
}
EXPORT_SYMBOL_GPL(xprt_complete_rqst);
task->tk_status = 0;
}
+/**
+ * xprt_request_wait_receive - wait for the reply to an RPC request
+ * @task: RPC task about to send a request
+ *
+ */
+void xprt_request_wait_receive(struct rpc_task *task)
+{
+ struct rpc_rqst *req = task->tk_rqstp;
+ struct rpc_xprt *xprt = req->rq_xprt;
+
+ if (!test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate))
+ return;
+ /*
+ * Sleep on the pending queue if we're expecting a reply.
+ * The spinlock ensures atomicity between the test of
+ * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on().
+ */
+ spin_lock(&xprt->queue_lock);
+ if (test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) {
+ xprt->ops->set_retrans_timeout(task);
+ rpc_sleep_on(&xprt->pending, task, xprt_timer);
+ /*
+ * Send an extra queue wakeup call if the
+ * connection was dropped in case the call to
+ * rpc_sleep_on() raced.
+ */
+ if (xprt_request_retransmit_after_disconnect(task))
+ rpc_wake_up_queued_task_set_status(&xprt->pending,
+ task, -ENOTCONN);
+ }
+ spin_unlock(&xprt->queue_lock);
+}
+
+static bool
+xprt_request_need_enqueue_transmit(struct rpc_task *task, struct rpc_rqst *req)
+{
+ return !test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
+}
+
+/**
+ * xprt_request_enqueue_transmit - queue a task for transmission
+ * @task: pointer to rpc_task
+ *
+ * Add a task to the transmission queue.
+ */
+void
+xprt_request_enqueue_transmit(struct rpc_task *task)
+{
+ struct rpc_rqst *pos, *req = task->tk_rqstp;
+ struct rpc_xprt *xprt = req->rq_xprt;
+
+ if (xprt_request_need_enqueue_transmit(task, req)) {
+ spin_lock(&xprt->queue_lock);
+ /*
+ * Requests that carry congestion control credits are added
+ * to the head of the list to avoid starvation issues.
+ */
+ if (req->rq_cong) {
+ xprt_clear_congestion_window_wait(xprt);
+ list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
+ if (pos->rq_cong)
+ continue;
+ /* Note: req is added _before_ pos */
+ list_add_tail(&req->rq_xmit, &pos->rq_xmit);
+ INIT_LIST_HEAD(&req->rq_xmit2);
+ goto out;
+ }
+ } else if (RPC_IS_SWAPPER(task)) {
+ list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
+ if (pos->rq_cong || pos->rq_bytes_sent)
+ continue;
+ if (RPC_IS_SWAPPER(pos->rq_task))
+ continue;
+ /* Note: req is added _before_ pos */
+ list_add_tail(&req->rq_xmit, &pos->rq_xmit);
+ INIT_LIST_HEAD(&req->rq_xmit2);
+ goto out;
+ }
+ } else {
+ list_for_each_entry(pos, &xprt->xmit_queue, rq_xmit) {
+ if (pos->rq_task->tk_owner != task->tk_owner)
+ continue;
+ list_add_tail(&req->rq_xmit2, &pos->rq_xmit2);
+ INIT_LIST_HEAD(&req->rq_xmit);
+ goto out;
+ }
+ }
+ list_add_tail(&req->rq_xmit, &xprt->xmit_queue);
+ INIT_LIST_HEAD(&req->rq_xmit2);
+out:
+ set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
+ spin_unlock(&xprt->queue_lock);
+ }
+}
+
+/**
+ * xprt_request_dequeue_transmit_locked - remove a task from the transmission queue
+ * @task: pointer to rpc_task
+ *
+ * Remove a task from the transmission queue
+ * Caller must hold xprt->queue_lock
+ */
+static void
+xprt_request_dequeue_transmit_locked(struct rpc_task *task)
+{
+ struct rpc_rqst *req = task->tk_rqstp;
+
+ if (!test_and_clear_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+ return;
+ if (!list_empty(&req->rq_xmit)) {
+ list_del(&req->rq_xmit);
+ if (!list_empty(&req->rq_xmit2)) {
+ struct rpc_rqst *next = list_first_entry(&req->rq_xmit2,
+ struct rpc_rqst, rq_xmit2);
+ list_del(&req->rq_xmit2);
+ list_add_tail(&next->rq_xmit, &next->rq_xprt->xmit_queue);
+ }
+ } else
+ list_del(&req->rq_xmit2);
+}
+
+/**
+ * xprt_request_dequeue_transmit - remove a task from the transmission queue
+ * @task: pointer to rpc_task
+ *
+ * Remove a task from the transmission queue
+ */
+static void
+xprt_request_dequeue_transmit(struct rpc_task *task)
+{
+ struct rpc_rqst *req = task->tk_rqstp;
+ struct rpc_xprt *xprt = req->rq_xprt;
+
+ spin_lock(&xprt->queue_lock);
+ xprt_request_dequeue_transmit_locked(task);
+ spin_unlock(&xprt->queue_lock);
+}
+
+/**
+ * xprt_request_prepare - prepare an encoded request for transport
+ * @req: pointer to rpc_rqst
+ *
+ * Calls into the transport layer to do whatever is needed to prepare
+ * the request for transmission or receive.
+ */
+void
+xprt_request_prepare(struct rpc_rqst *req)
+{
+ struct rpc_xprt *xprt = req->rq_xprt;
+
+ if (xprt->ops->prepare_request)
+ xprt->ops->prepare_request(req);
+}
+
+/**
+ * xprt_request_need_retransmit - Test if a task needs retransmission
+ * @task: pointer to rpc_task
+ *
+ * Test for whether a connection breakage requires the task to retransmit
+ */
+bool
+xprt_request_need_retransmit(struct rpc_task *task)
+{
+ return xprt_request_retransmit_after_disconnect(task);
+}
+
/**
* xprt_prepare_transmit - reserve the transport before sending a request
* @task: RPC task about to send a request
{
struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
- bool ret = false;
dprintk("RPC: %5u xprt_prepare_transmit\n", task->tk_pid);
- spin_lock_bh(&xprt->transport_lock);
- if (!req->rq_bytes_sent) {
- if (req->rq_reply_bytes_recvd) {
- task->tk_status = req->rq_reply_bytes_recvd;
- goto out_unlock;
- }
- if ((task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT)
- && xprt_connected(xprt)
- && req->rq_connect_cookie == xprt->connect_cookie) {
- xprt->ops->set_retrans_timeout(task);
- rpc_sleep_on(&xprt->pending, task, xprt_timer);
- goto out_unlock;
- }
- }
- if (!xprt->ops->reserve_xprt(xprt, task)) {
- task->tk_status = -EAGAIN;
- goto out_unlock;
+ if (!xprt_lock_write(xprt, task)) {
+ /* Race breaker: someone may have transmitted us */
+ if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+ rpc_wake_up_queued_task_set_status(&xprt->sending,
+ task, 0);
+ return false;
+
}
- ret = true;
-out_unlock:
- spin_unlock_bh(&xprt->transport_lock);
- return ret;
+ return true;
}
void xprt_end_transmit(struct rpc_task *task)
}
/**
- * xprt_transmit - send an RPC request on a transport
- * @task: controlling RPC task
+ * xprt_request_transmit - send an RPC request on a transport
+ * @req: pointer to request to transmit
+ * @snd_task: RPC task that owns the transport lock
*
- * We have to copy the iovec because sendmsg fiddles with its contents.
+ * This performs the transmission of a single request.
+ * Note that if the request is not the same as snd_task, then it
+ * does need to be pinned.
+ * Returns '0' on success.
*/
-void xprt_transmit(struct rpc_task *task)
+static int
+xprt_request_transmit(struct rpc_rqst *req, struct rpc_task *snd_task)
{
- struct rpc_rqst *req = task->tk_rqstp;
- struct rpc_xprt *xprt = req->rq_xprt;
+ struct rpc_xprt *xprt = req->rq_xprt;
+ struct rpc_task *task = req->rq_task;
unsigned int connect_cookie;
+ int is_retrans = RPC_WAS_SENT(task);
int status;
dprintk("RPC: %5u xprt_transmit(%u)\n", task->tk_pid, req->rq_slen);
- if (!req->rq_reply_bytes_recvd) {
- if (list_empty(&req->rq_list) && rpc_reply_expected(task)) {
- /*
- * Add to the list only if we're expecting a reply
- */
- /* Update the softirq receive buffer */
- memcpy(&req->rq_private_buf, &req->rq_rcv_buf,
- sizeof(req->rq_private_buf));
- /* Add request to the receive list */
- spin_lock(&xprt->recv_lock);
- list_add_tail(&req->rq_list, &xprt->recv);
- spin_unlock(&xprt->recv_lock);
- xprt_reset_majortimeo(req);
- /* Turn off autodisconnect */
- del_singleshot_timer_sync(&xprt->timer);
+ if (!req->rq_bytes_sent) {
+ if (xprt_request_data_received(task)) {
+ status = 0;
+ goto out_dequeue;
}
- } else if (!req->rq_bytes_sent)
- return;
+ /* Verify that our message lies in the RPCSEC_GSS window */
+ if (rpcauth_xmit_need_reencode(task)) {
+ status = -EBADMSG;
+ goto out_dequeue;
+ }
+ }
+
+ /*
+ * Update req->rq_ntrans before transmitting to avoid races with
+ * xprt_update_rtt(), which needs to know that it is recording a
+ * reply to the first transmission.
+ */
+ req->rq_ntrans++;
connect_cookie = xprt->connect_cookie;
- status = xprt->ops->send_request(task);
+ status = xprt->ops->send_request(req);
trace_xprt_transmit(xprt, req->rq_xid, status);
if (status != 0) {
- task->tk_status = status;
- return;
+ req->rq_ntrans--;
+ return status;
}
+
+ if (is_retrans)
+ task->tk_client->cl_stats->rpcretrans++;
+
xprt_inject_disconnect(xprt);
dprintk("RPC: %5u xmit complete\n", task->tk_pid);
task->tk_flags |= RPC_TASK_SENT;
spin_lock_bh(&xprt->transport_lock);
- xprt->ops->set_retrans_timeout(task);
-
xprt->stat.sends++;
xprt->stat.req_u += xprt->stat.sends - xprt->stat.recvs;
xprt->stat.bklog_u += xprt->backlog.qlen;
spin_unlock_bh(&xprt->transport_lock);
req->rq_connect_cookie = connect_cookie;
- if (rpc_reply_expected(task) && !READ_ONCE(req->rq_reply_bytes_recvd)) {
- /*
- * Sleep on the pending queue if we're expecting a reply.
- * The spinlock ensures atomicity between the test of
- * req->rq_reply_bytes_recvd, and the call to rpc_sleep_on().
- */
- spin_lock(&xprt->recv_lock);
- if (!req->rq_reply_bytes_recvd) {
- rpc_sleep_on(&xprt->pending, task, xprt_timer);
- /*
- * Send an extra queue wakeup call if the
- * connection was dropped in case the call to
- * rpc_sleep_on() raced.
- */
- if (!xprt_connected(xprt))
- xprt_wake_pending_tasks(xprt, -ENOTCONN);
- }
- spin_unlock(&xprt->recv_lock);
+out_dequeue:
+ xprt_request_dequeue_transmit(task);
+ rpc_wake_up_queued_task_set_status(&xprt->sending, task, status);
+ return status;
+}
+
+/**
+ * xprt_transmit - send an RPC request on a transport
+ * @task: controlling RPC task
+ *
+ * Attempts to drain the transmit queue. On exit, either the transport
+ * signalled an error that needs to be handled before transmission can
+ * resume, or @task finished transmitting, and detected that it already
+ * received a reply.
+ */
+void
+xprt_transmit(struct rpc_task *task)
+{
+ struct rpc_rqst *next, *req = task->tk_rqstp;
+ struct rpc_xprt *xprt = req->rq_xprt;
+ int status;
+
+ spin_lock(&xprt->queue_lock);
+ while (!list_empty(&xprt->xmit_queue)) {
+ next = list_first_entry(&xprt->xmit_queue,
+ struct rpc_rqst, rq_xmit);
+ xprt_pin_rqst(next);
+ spin_unlock(&xprt->queue_lock);
+ status = xprt_request_transmit(next, task);
+ if (status == -EBADMSG && next != req)
+ status = 0;
+ cond_resched();
+ spin_lock(&xprt->queue_lock);
+ xprt_unpin_rqst(next);
+ if (status == 0) {
+ if (!xprt_request_data_received(task) ||
+ test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+ continue;
+ } else if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
+ task->tk_status = status;
+ break;
}
+ spin_unlock(&xprt->queue_lock);
}
static void xprt_add_backlog(struct rpc_xprt *xprt, struct rpc_task *task)
}
EXPORT_SYMBOL_GPL(xprt_alloc_slot);
-void xprt_lock_and_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task)
-{
- /* Note: grabbing the xprt_lock_write() ensures that we throttle
- * new slot allocation if the transport is congested (i.e. when
- * reconnecting a stream transport or when out of socket write
- * buffer space).
- */
- if (xprt_lock_write(xprt, task)) {
- xprt_alloc_slot(xprt, task);
- xprt_release_write(xprt, task);
- }
-}
-EXPORT_SYMBOL_GPL(xprt_lock_and_alloc_slot);
-
void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
{
spin_lock(&xprt->reserve_lock);
}
EXPORT_SYMBOL_GPL(xprt_free);
+static void
+xprt_init_connect_cookie(struct rpc_rqst *req, struct rpc_xprt *xprt)
+{
+ req->rq_connect_cookie = xprt_connect_cookie(xprt) - 1;
+}
+
+static __be32
+xprt_alloc_xid(struct rpc_xprt *xprt)
+{
+ __be32 xid;
+
+ spin_lock(&xprt->reserve_lock);
+ xid = (__force __be32)xprt->xid++;
+ spin_unlock(&xprt->reserve_lock);
+ return xid;
+}
+
+static void
+xprt_init_xid(struct rpc_xprt *xprt)
+{
+ xprt->xid = prandom_u32();
+}
+
+static void
+xprt_request_init(struct rpc_task *task)
+{
+ struct rpc_xprt *xprt = task->tk_xprt;
+ struct rpc_rqst *req = task->tk_rqstp;
+
+ req->rq_timeout = task->tk_client->cl_timeout->to_initval;
+ req->rq_task = task;
+ req->rq_xprt = xprt;
+ req->rq_buffer = NULL;
+ req->rq_xid = xprt_alloc_xid(xprt);
+ xprt_init_connect_cookie(req, xprt);
+ req->rq_bytes_sent = 0;
+ req->rq_snd_buf.len = 0;
+ req->rq_snd_buf.buflen = 0;
+ req->rq_rcv_buf.len = 0;
+ req->rq_rcv_buf.buflen = 0;
+ req->rq_release_snd_buf = NULL;
+ xprt_reset_majortimeo(req);
+ dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid,
+ req, ntohl(req->rq_xid));
+}
+
+static void
+xprt_do_reserve(struct rpc_xprt *xprt, struct rpc_task *task)
+{
+ xprt->ops->alloc_slot(xprt, task);
+ if (task->tk_rqstp != NULL)
+ xprt_request_init(task);
+}
+
/**
* xprt_reserve - allocate an RPC request slot
* @task: RPC task requesting a slot allocation
task->tk_timeout = 0;
task->tk_status = -EAGAIN;
if (!xprt_throttle_congested(xprt, task))
- xprt->ops->alloc_slot(xprt, task);
+ xprt_do_reserve(xprt, task);
}
/**
task->tk_timeout = 0;
task->tk_status = -EAGAIN;
- xprt->ops->alloc_slot(xprt, task);
-}
-
-static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
-{
- __be32 xid;
-
- spin_lock(&xprt->reserve_lock);
- xid = (__force __be32)xprt->xid++;
- spin_unlock(&xprt->reserve_lock);
- return xid;
+ xprt_do_reserve(xprt, task);
}
-static inline void xprt_init_xid(struct rpc_xprt *xprt)
-{
- xprt->xid = prandom_u32();
-}
-
-void xprt_request_init(struct rpc_task *task)
+static void
+xprt_request_dequeue_all(struct rpc_task *task, struct rpc_rqst *req)
{
- struct rpc_xprt *xprt = task->tk_xprt;
- struct rpc_rqst *req = task->tk_rqstp;
+ struct rpc_xprt *xprt = req->rq_xprt;
- INIT_LIST_HEAD(&req->rq_list);
- req->rq_timeout = task->tk_client->cl_timeout->to_initval;
- req->rq_task = task;
- req->rq_xprt = xprt;
- req->rq_buffer = NULL;
- req->rq_xid = xprt_alloc_xid(xprt);
- req->rq_connect_cookie = xprt->connect_cookie - 1;
- req->rq_bytes_sent = 0;
- req->rq_snd_buf.len = 0;
- req->rq_snd_buf.buflen = 0;
- req->rq_rcv_buf.len = 0;
- req->rq_rcv_buf.buflen = 0;
- req->rq_release_snd_buf = NULL;
- xprt_reset_majortimeo(req);
- dprintk("RPC: %5u reserved req %p xid %08x\n", task->tk_pid,
- req, ntohl(req->rq_xid));
+ if (test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate) ||
+ test_bit(RPC_TASK_NEED_RECV, &task->tk_runstate) ||
+ xprt_is_pinned_rqst(req)) {
+ spin_lock(&xprt->queue_lock);
+ xprt_request_dequeue_transmit_locked(task);
+ xprt_request_dequeue_receive_locked(task);
+ while (xprt_is_pinned_rqst(req)) {
+ set_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
+ spin_unlock(&xprt->queue_lock);
+ xprt_wait_on_pinned_rqst(req);
+ spin_lock(&xprt->queue_lock);
+ clear_bit(RPC_TASK_MSG_PIN_WAIT, &task->tk_runstate);
+ }
+ spin_unlock(&xprt->queue_lock);
+ }
}
/**
if (req == NULL) {
if (task->tk_client) {
xprt = task->tk_xprt;
- if (xprt->snd_task == task)
- xprt_release_write(xprt, task);
+ xprt_release_write(xprt, task);
}
return;
}
task->tk_ops->rpc_count_stats(task, task->tk_calldata);
else if (task->tk_client)
rpc_count_iostats(task, task->tk_client->cl_metrics);
- spin_lock(&xprt->recv_lock);
- if (!list_empty(&req->rq_list)) {
- list_del_init(&req->rq_list);
- xprt_wait_on_pinned_rqst(req);
- }
- spin_unlock(&xprt->recv_lock);
+ xprt_request_dequeue_all(task, req);
spin_lock_bh(&xprt->transport_lock);
xprt->ops->release_xprt(xprt, task);
if (xprt->ops->release_request)
if (req->rq_buffer)
xprt->ops->buf_free(task);
xprt_inject_disconnect(xprt);
+ xdr_free_bvec(&req->rq_rcv_buf);
if (req->rq_cred != NULL)
put_rpccred(req->rq_cred);
task->tk_rqstp = NULL;
xprt_free_bc_request(req);
}
+#ifdef CONFIG_SUNRPC_BACKCHANNEL
+void
+xprt_init_bc_request(struct rpc_rqst *req, struct rpc_task *task)
+{
+ struct xdr_buf *xbufp = &req->rq_snd_buf;
+
+ task->tk_rqstp = req;
+ req->rq_task = task;
+ xprt_init_connect_cookie(req, req->rq_xprt);
+ /*
+ * Set up the xdr_buf length.
+ * This also indicates that the buffer is XDR encoded already.
+ */
+ xbufp->len = xbufp->head[0].iov_len + xbufp->page_len +
+ xbufp->tail[0].iov_len;
+ req->rq_bytes_sent = 0;
+}
+#endif
+
static void xprt_init(struct rpc_xprt *xprt, struct net *net)
{
kref_init(&xprt->kref);
spin_lock_init(&xprt->transport_lock);
spin_lock_init(&xprt->reserve_lock);
- spin_lock_init(&xprt->recv_lock);
+ spin_lock_init(&xprt->queue_lock);
INIT_LIST_HEAD(&xprt->free);
- INIT_LIST_HEAD(&xprt->recv);
+ xprt->recv_queue = RB_ROOT;
+ INIT_LIST_HEAD(&xprt->xmit_queue);
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
spin_lock_init(&xprt->bc_pa_lock);
INIT_LIST_HEAD(&xprt->bc_pa_list);
rpc_init_wait_queue(&xprt->binding, "xprt_binding");
rpc_init_wait_queue(&xprt->pending, "xprt_pending");
- rpc_init_priority_wait_queue(&xprt->sending, "xprt_sending");
+ rpc_init_wait_queue(&xprt->sending, "xprt_sending");
rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog");
xprt_init_xid(xprt);
rqst = &req->rl_slot;
rqst->rq_xprt = xprt;
- INIT_LIST_HEAD(&rqst->rq_list);
INIT_LIST_HEAD(&rqst->rq_bc_list);
__set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state);
- spin_lock_bh(&xprt->bc_pa_lock);
+ spin_lock(&xprt->bc_pa_lock);
list_add(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
- spin_unlock_bh(&xprt->bc_pa_lock);
+ spin_unlock(&xprt->bc_pa_lock);
size = r_xprt->rx_data.inline_rsize;
rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL);
if (!xprt_connected(rqst->rq_xprt))
goto drop_connection;
+ if (!xprt_request_get_cong(rqst->rq_xprt, rqst))
+ return -EBADSLT;
+
rc = rpcrdma_bc_marshal_reply(rqst);
if (rc < 0)
goto failed_marshal;
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
struct rpc_rqst *rqst, *tmp;
- spin_lock_bh(&xprt->bc_pa_lock);
+ spin_lock(&xprt->bc_pa_lock);
list_for_each_entry_safe(rqst, tmp, &xprt->bc_pa_list, rq_bc_pa_list) {
list_del(&rqst->rq_bc_pa_list);
- spin_unlock_bh(&xprt->bc_pa_lock);
+ spin_unlock(&xprt->bc_pa_lock);
rpcrdma_bc_free_rqst(r_xprt, rqst);
- spin_lock_bh(&xprt->bc_pa_lock);
+ spin_lock(&xprt->bc_pa_lock);
}
- spin_unlock_bh(&xprt->bc_pa_lock);
+ spin_unlock(&xprt->bc_pa_lock);
}
/**
rpcrdma_recv_buffer_put(req->rl_reply);
req->rl_reply = NULL;
- spin_lock_bh(&xprt->bc_pa_lock);
+ spin_lock(&xprt->bc_pa_lock);
list_add_tail(&rqst->rq_bc_pa_list, &xprt->bc_pa_list);
- spin_unlock_bh(&xprt->bc_pa_lock);
+ spin_unlock(&xprt->bc_pa_lock);
}
/**
return true;
}
-static int
-fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
-{
- static struct ib_fmr_attr fmr_attr = {
- .max_pages = RPCRDMA_MAX_FMR_SGES,
- .max_maps = 1,
- .page_shift = PAGE_SHIFT
- };
-
- mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
- sizeof(u64), GFP_KERNEL);
- if (!mr->fmr.fm_physaddrs)
- goto out_free;
-
- mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
- sizeof(*mr->mr_sg), GFP_KERNEL);
- if (!mr->mr_sg)
- goto out_free;
-
- sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES);
-
- mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
- &fmr_attr);
- if (IS_ERR(mr->fmr.fm_mr))
- goto out_fmr_err;
-
- INIT_LIST_HEAD(&mr->mr_list);
- return 0;
-
-out_fmr_err:
- dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__,
- PTR_ERR(mr->fmr.fm_mr));
-
-out_free:
- kfree(mr->mr_sg);
- kfree(mr->fmr.fm_physaddrs);
- return -ENOMEM;
-}
-
-static int
+static void
__fmr_unmap(struct rpcrdma_mr *mr)
{
LIST_HEAD(l);
list_add(&mr->fmr.fm_mr->list, &l);
rc = ib_unmap_fmr(&l);
list_del(&mr->fmr.fm_mr->list);
- return rc;
+ if (rc)
+ pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n",
+ mr, rc);
}
+/* Release an MR.
+ */
static void
fmr_op_release_mr(struct rpcrdma_mr *mr)
{
- LIST_HEAD(unmap_list);
int rc;
kfree(mr->fmr.fm_physaddrs);
/* In case this one was left mapped, try to unmap it
* to prevent dealloc_fmr from failing with EBUSY
*/
- rc = __fmr_unmap(mr);
- if (rc)
- pr_err("rpcrdma: final ib_unmap_fmr for %p failed %i\n",
- mr, rc);
+ __fmr_unmap(mr);
rc = ib_dealloc_fmr(mr->fmr.fm_mr);
if (rc)
kfree(mr);
}
-/* Reset of a single FMR.
+/* MRs are dynamically allocated, so simply clean up and release the MR.
+ * A replacement MR will subsequently be allocated on demand.
*/
static void
-fmr_op_recover_mr(struct rpcrdma_mr *mr)
+fmr_mr_recycle_worker(struct work_struct *work)
{
+ struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle);
struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
- int rc;
- /* ORDER: invalidate first */
- rc = __fmr_unmap(mr);
- if (rc)
- goto out_release;
-
- /* ORDER: then DMA unmap */
- rpcrdma_mr_unmap_and_put(mr);
+ trace_xprtrdma_mr_recycle(mr);
- r_xprt->rx_stats.mrs_recovered++;
- return;
-
-out_release:
- pr_err("rpcrdma: FMR reset failed (%d), %p released\n", rc, mr);
- r_xprt->rx_stats.mrs_orphaned++;
-
- trace_xprtrdma_dma_unmap(mr);
+ trace_xprtrdma_mr_unmap(mr);
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
mr->mr_sg, mr->mr_nents, mr->mr_dir);
spin_lock(&r_xprt->rx_buf.rb_mrlock);
list_del(&mr->mr_all);
+ r_xprt->rx_stats.mrs_recycled++;
spin_unlock(&r_xprt->rx_buf.rb_mrlock);
-
fmr_op_release_mr(mr);
}
+static int
+fmr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
+{
+ static struct ib_fmr_attr fmr_attr = {
+ .max_pages = RPCRDMA_MAX_FMR_SGES,
+ .max_maps = 1,
+ .page_shift = PAGE_SHIFT
+ };
+
+ mr->fmr.fm_physaddrs = kcalloc(RPCRDMA_MAX_FMR_SGES,
+ sizeof(u64), GFP_KERNEL);
+ if (!mr->fmr.fm_physaddrs)
+ goto out_free;
+
+ mr->mr_sg = kcalloc(RPCRDMA_MAX_FMR_SGES,
+ sizeof(*mr->mr_sg), GFP_KERNEL);
+ if (!mr->mr_sg)
+ goto out_free;
+
+ sg_init_table(mr->mr_sg, RPCRDMA_MAX_FMR_SGES);
+
+ mr->fmr.fm_mr = ib_alloc_fmr(ia->ri_pd, RPCRDMA_FMR_ACCESS_FLAGS,
+ &fmr_attr);
+ if (IS_ERR(mr->fmr.fm_mr))
+ goto out_fmr_err;
+
+ INIT_LIST_HEAD(&mr->mr_list);
+ INIT_WORK(&mr->mr_recycle, fmr_mr_recycle_worker);
+ return 0;
+
+out_fmr_err:
+ dprintk("RPC: %s: ib_alloc_fmr returned %ld\n", __func__,
+ PTR_ERR(mr->fmr.fm_mr));
+
+out_free:
+ kfree(mr->mr_sg);
+ kfree(mr->fmr.fm_physaddrs);
+ return -ENOMEM;
+}
+
/* On success, sets:
* ep->rep_attr.cap.max_send_wr
* ep->rep_attr.cap.max_recv_wr
ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
RPCRDMA_MAX_FMR_SGES);
+ ia->ri_max_segs += 2; /* segments for head and tail buffers */
return 0;
}
mr->mr_sg, i, mr->mr_dir);
if (!mr->mr_nents)
goto out_dmamap_err;
- trace_xprtrdma_dma_map(mr);
+ trace_xprtrdma_mr_map(mr);
for (i = 0, dma_pages = mr->fmr.fm_physaddrs; i < mr->mr_nents; i++)
dma_pages[i] = sg_dma_address(&mr->mr_sg[i]);
list_for_each_entry(mr, mrs, mr_list) {
dprintk("RPC: %s: unmapping fmr %p\n",
__func__, &mr->fmr);
- trace_xprtrdma_localinv(mr);
+ trace_xprtrdma_mr_localinv(mr);
list_add_tail(&mr->fmr.fm_mr->list, &unmap_list);
}
r_xprt->rx_stats.local_inv_needed++;
rc = ib_unmap_fmr(&unmap_list);
if (rc)
- goto out_reset;
+ goto out_release;
/* ORDER: Now DMA unmap all of the req's MRs, and return
* them to the free MW list.
return;
-out_reset:
+out_release:
pr_err("rpcrdma: ib_unmap_fmr failed (%i)\n", rc);
while (!list_empty(mrs)) {
mr = rpcrdma_mr_pop(mrs);
list_del(&mr->fmr.fm_mr->list);
- fmr_op_recover_mr(mr);
+ rpcrdma_mr_recycle(mr);
}
}
.ro_map = fmr_op_map,
.ro_send = fmr_op_send,
.ro_unmap_sync = fmr_op_unmap_sync,
- .ro_recover_mr = fmr_op_recover_mr,
.ro_open = fmr_op_open,
.ro_maxpages = fmr_op_maxpages,
.ro_init_mr = fmr_op_init_mr,
return false;
}
+static void
+frwr_op_release_mr(struct rpcrdma_mr *mr)
+{
+ int rc;
+
+ rc = ib_dereg_mr(mr->frwr.fr_mr);
+ if (rc)
+ pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
+ mr, rc);
+ kfree(mr->mr_sg);
+ kfree(mr);
+}
+
+/* MRs are dynamically allocated, so simply clean up and release the MR.
+ * A replacement MR will subsequently be allocated on demand.
+ */
+static void
+frwr_mr_recycle_worker(struct work_struct *work)
+{
+ struct rpcrdma_mr *mr = container_of(work, struct rpcrdma_mr, mr_recycle);
+ enum rpcrdma_frwr_state state = mr->frwr.fr_state;
+ struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
+
+ trace_xprtrdma_mr_recycle(mr);
+
+ if (state != FRWR_FLUSHED_LI) {
+ trace_xprtrdma_mr_unmap(mr);
+ ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
+ mr->mr_sg, mr->mr_nents, mr->mr_dir);
+ }
+
+ spin_lock(&r_xprt->rx_buf.rb_mrlock);
+ list_del(&mr->mr_all);
+ r_xprt->rx_stats.mrs_recycled++;
+ spin_unlock(&r_xprt->rx_buf.rb_mrlock);
+ frwr_op_release_mr(mr);
+}
+
static int
frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
{
goto out_list_err;
INIT_LIST_HEAD(&mr->mr_list);
+ INIT_WORK(&mr->mr_recycle, frwr_mr_recycle_worker);
sg_init_table(mr->mr_sg, depth);
init_completion(&frwr->fr_linv_done);
return 0;
return rc;
}
-static void
-frwr_op_release_mr(struct rpcrdma_mr *mr)
-{
- int rc;
-
- rc = ib_dereg_mr(mr->frwr.fr_mr);
- if (rc)
- pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
- mr, rc);
- kfree(mr->mr_sg);
- kfree(mr);
-}
-
-static int
-__frwr_mr_reset(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
-{
- struct rpcrdma_frwr *frwr = &mr->frwr;
- int rc;
-
- rc = ib_dereg_mr(frwr->fr_mr);
- if (rc) {
- pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n",
- rc, mr);
- return rc;
- }
-
- frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
- ia->ri_max_frwr_depth);
- if (IS_ERR(frwr->fr_mr)) {
- pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",
- PTR_ERR(frwr->fr_mr), mr);
- return PTR_ERR(frwr->fr_mr);
- }
-
- dprintk("RPC: %s: recovered FRWR %p\n", __func__, frwr);
- frwr->fr_state = FRWR_IS_INVALID;
- return 0;
-}
-
-/* Reset of a single FRWR. Generate a fresh rkey by replacing the MR.
- */
-static void
-frwr_op_recover_mr(struct rpcrdma_mr *mr)
-{
- enum rpcrdma_frwr_state state = mr->frwr.fr_state;
- struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
- struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- int rc;
-
- rc = __frwr_mr_reset(ia, mr);
- if (state != FRWR_FLUSHED_LI) {
- trace_xprtrdma_dma_unmap(mr);
- ib_dma_unmap_sg(ia->ri_device,
- mr->mr_sg, mr->mr_nents, mr->mr_dir);
- }
- if (rc)
- goto out_release;
-
- rpcrdma_mr_put(mr);
- r_xprt->rx_stats.mrs_recovered++;
- return;
-
-out_release:
- pr_err("rpcrdma: FRWR reset failed %d, %p released\n", rc, mr);
- r_xprt->rx_stats.mrs_orphaned++;
-
- spin_lock(&r_xprt->rx_buf.rb_mrlock);
- list_del(&mr->mr_all);
- spin_unlock(&r_xprt->rx_buf.rb_mrlock);
-
- frwr_op_release_mr(mr);
-}
-
/* On success, sets:
* ep->rep_attr.cap.max_send_wr
* ep->rep_attr.cap.max_recv_wr
ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
ia->ri_max_frwr_depth);
+ ia->ri_max_segs += 2; /* segments for head and tail buffers */
return 0;
}
mr = NULL;
do {
if (mr)
- rpcrdma_mr_defer_recovery(mr);
+ rpcrdma_mr_recycle(mr);
mr = rpcrdma_mr_get(r_xprt);
if (!mr)
return ERR_PTR(-EAGAIN);
mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
if (!mr->mr_nents)
goto out_dmamap_err;
- trace_xprtrdma_dma_map(mr);
+ trace_xprtrdma_mr_map(mr);
ibmr = frwr->fr_mr;
n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
out_mapmr_err:
pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
frwr->fr_mr, n, mr->mr_nents);
- rpcrdma_mr_defer_recovery(mr);
+ rpcrdma_mr_recycle(mr);
return ERR_PTR(-EIO);
}
list_for_each_entry(mr, mrs, mr_list)
if (mr->mr_handle == rep->rr_inv_rkey) {
list_del_init(&mr->mr_list);
- trace_xprtrdma_remoteinv(mr);
+ trace_xprtrdma_mr_remoteinv(mr);
mr->frwr.fr_state = FRWR_IS_INVALID;
rpcrdma_mr_unmap_and_put(mr);
break; /* only one invalidated MR per RPC */
mr->frwr.fr_state = FRWR_IS_INVALID;
frwr = &mr->frwr;
- trace_xprtrdma_localinv(mr);
+ trace_xprtrdma_mr_localinv(mr);
frwr->fr_cqe.done = frwr_wc_localinv;
last = &frwr->fr_invwr;
if (bad_wr != first)
wait_for_completion(&frwr->fr_linv_done);
if (rc)
- goto reset_mrs;
+ goto out_release;
/* ORDER: Now DMA unmap all of the MRs, and return
* them to the free MR list.
}
return;
-reset_mrs:
+out_release:
pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc);
- /* Find and reset the MRs in the LOCAL_INV WRs that did not
+ /* Unmap and release the MRs in the LOCAL_INV WRs that did not
* get posted.
*/
while (bad_wr) {
frwr = container_of(bad_wr, struct rpcrdma_frwr,
fr_invwr);
mr = container_of(frwr, struct rpcrdma_mr, frwr);
-
- __frwr_mr_reset(ia, mr);
-
bad_wr = bad_wr->next;
+
+ list_del(&mr->mr_list);
+ frwr_op_release_mr(mr);
}
- goto unmap;
}
const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
.ro_send = frwr_op_send,
.ro_reminv = frwr_op_reminv,
.ro_unmap_sync = frwr_op_unmap_sync,
- .ro_recover_mr = frwr_op_recover_mr,
.ro_open = frwr_op_open,
.ro_maxpages = frwr_op_maxpages,
.ro_init_mr = frwr_op_init_mr,
size = RPCRDMA_HDRLEN_MIN;
/* Maximum Read list size */
- maxsegs += 2; /* segment for head and tail buffers */
size = maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32);
/* Minimal Read chunk size */
size = RPCRDMA_HDRLEN_MIN;
/* Maximum Write list size */
- maxsegs += 2; /* segment for head and tail buffers */
size = sizeof(__be32); /* segment count */
size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32);
size += sizeof(__be32); /* list discriminator */
struct rpcrdma_mr *mr;
mr = rpcrdma_mr_pop(&req->rl_registered);
- rpcrdma_mr_defer_recovery(mr);
+ rpcrdma_mr_recycle(mr);
}
/* This implementation supports the following combinations
out_err:
switch (ret) {
case -EAGAIN:
- xprt_wait_for_buffer_space(rqst->rq_task, NULL);
+ xprt_wait_for_buffer_space(rqst->rq_xprt);
break;
case -ENOBUFS:
break;
struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
struct rpc_rqst *rqst = rep->rr_rqst;
- unsigned long cwnd;
int status;
xprt->reestablish_timeout = 0;
goto out_badheader;
out:
- spin_lock(&xprt->recv_lock);
- cwnd = xprt->cwnd;
- xprt->cwnd = r_xprt->rx_buf.rb_credits << RPC_CWNDSHIFT;
- if (xprt->cwnd > cwnd)
- xprt_release_rqst_cong(rqst->rq_task);
-
+ spin_lock(&xprt->queue_lock);
xprt_complete_rqst(rqst->rq_task, status);
xprt_unpin_rqst(rqst);
- spin_unlock(&xprt->recv_lock);
+ spin_unlock(&xprt->queue_lock);
return;
/* If the incoming reply terminated a pending RPC, the next
/* Match incoming rpcrdma_rep to an rpcrdma_req to
* get context for handling any incoming chunks.
*/
- spin_lock(&xprt->recv_lock);
+ spin_lock(&xprt->queue_lock);
rqst = xprt_lookup_rqst(xprt, rep->rr_xid);
if (!rqst)
goto out_norqst;
xprt_pin_rqst(rqst);
+ spin_unlock(&xprt->queue_lock);
if (credits == 0)
credits = 1; /* don't deadlock */
else if (credits > buf->rb_max_requests)
credits = buf->rb_max_requests;
- buf->rb_credits = credits;
-
- spin_unlock(&xprt->recv_lock);
+ if (buf->rb_credits != credits) {
+ spin_lock_bh(&xprt->transport_lock);
+ buf->rb_credits = credits;
+ xprt->cwnd = credits << RPC_CWNDSHIFT;
+ spin_unlock_bh(&xprt->transport_lock);
+ }
req = rpcr_to_rdmar(rqst);
req->rl_reply = rep;
* is corrupt.
*/
out_norqst:
- spin_unlock(&xprt->recv_lock);
+ spin_unlock(&xprt->queue_lock);
trace_xprtrdma_reply_rqst(rep);
goto repost;
if (src->iov_len < 24)
goto out_shortreply;
- spin_lock(&xprt->recv_lock);
+ spin_lock(&xprt->queue_lock);
req = xprt_lookup_rqst(xprt, xid);
if (!req)
goto out_notfound;
rcvbuf->len = 0;
out_unlock:
- spin_unlock(&xprt->recv_lock);
+ spin_unlock(&xprt->queue_lock);
out:
return ret;
* connection.
*/
static int
-xprt_rdma_bc_send_request(struct rpc_task *task)
+xprt_rdma_bc_send_request(struct rpc_rqst *rqst)
{
- struct rpc_rqst *rqst = task->tk_rqstp;
struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt;
struct svcxprt_rdma *rdma;
int ret;
dprintk("svcrdma: sending bc call with xid: %08x\n",
be32_to_cpu(rqst->rq_xid));
- if (!mutex_trylock(&sxprt->xpt_mutex)) {
- rpc_sleep_on(&sxprt->xpt_bc_pending, task, NULL);
- if (!mutex_trylock(&sxprt->xpt_mutex))
- return -EAGAIN;
- rpc_wake_up_queued_task(&sxprt->xpt_bc_pending, task);
- }
+ mutex_lock(&sxprt->xpt_mutex);
ret = -ENOTCONN;
rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt);
xprt_rdma_bc_close(struct rpc_xprt *xprt)
{
dprintk("svcrdma: %s: xprt %p\n", __func__, xprt);
+ xprt->cwnd = RPC_CWNDSHIFT;
}
static void
}
}
-void
-rpcrdma_conn_func(struct rpcrdma_ep *ep)
-{
- schedule_delayed_work(&ep->rep_connect_worker, 0);
-}
-
-void
-rpcrdma_connect_worker(struct work_struct *work)
-{
- struct rpcrdma_ep *ep =
- container_of(work, struct rpcrdma_ep, rep_connect_worker.work);
- struct rpcrdma_xprt *r_xprt =
- container_of(ep, struct rpcrdma_xprt, rx_ep);
- struct rpc_xprt *xprt = &r_xprt->rx_xprt;
-
- spin_lock_bh(&xprt->transport_lock);
- if (ep->rep_connected > 0) {
- if (!xprt_test_and_set_connected(xprt))
- xprt_wake_pending_tasks(xprt, 0);
- } else {
- if (xprt_test_and_clear_connected(xprt))
- xprt_wake_pending_tasks(xprt, -ENOTCONN);
- }
- spin_unlock_bh(&xprt->transport_lock);
-}
-
+/**
+ * xprt_rdma_connect_worker - establish connection in the background
+ * @work: worker thread context
+ *
+ * Requester holds the xprt's send lock to prevent activity on this
+ * transport while a fresh connection is being established. RPC tasks
+ * sleep on the xprt's pending queue waiting for connect to complete.
+ */
static void
xprt_rdma_connect_worker(struct work_struct *work)
{
struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt,
rx_connect_worker.work);
struct rpc_xprt *xprt = &r_xprt->rx_xprt;
- int rc = 0;
-
- xprt_clear_connected(xprt);
+ int rc;
rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
- if (rc)
- xprt_wake_pending_tasks(xprt, rc);
-
xprt_clear_connecting(xprt);
+ if (r_xprt->rx_ep.rep_connected > 0) {
+ if (!xprt_test_and_set_connected(xprt)) {
+ xprt->stat.connect_count++;
+ xprt->stat.connect_time += (long)jiffies -
+ xprt->stat.connect_start;
+ xprt_wake_pending_tasks(xprt, -EAGAIN);
+ }
+ } else {
+ if (xprt_test_and_clear_connected(xprt))
+ xprt_wake_pending_tasks(xprt, rc);
+ }
}
+/**
+ * xprt_rdma_inject_disconnect - inject a connection fault
+ * @xprt: transport context
+ *
+ * If @xprt is connected, disconnect it to simulate spurious connection
+ * loss.
+ */
static void
xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
{
- struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt,
- rx_xprt);
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
trace_xprtrdma_inject_dsc(r_xprt);
rdma_disconnect(r_xprt->rx_ia.ri_id);
}
-/*
- * xprt_rdma_destroy
+/**
+ * xprt_rdma_destroy - Full tear down of transport
+ * @xprt: doomed transport context
*
- * Destroy the xprt.
- * Free all memory associated with the object, including its own.
- * NOTE: none of the *destroy methods free memory for their top-level
- * objects, even though they may have allocated it (they do free
- * private memory). It's up to the caller to handle it. In this
- * case (RDMA transport), all structure memory is inlined with the
- * struct rpcrdma_xprt.
+ * Caller guarantees there will be no more calls to us with
+ * this @xprt.
*/
static void
xprt_rdma_destroy(struct rpc_xprt *xprt)
cancel_delayed_work_sync(&r_xprt->rx_connect_worker);
- xprt_clear_connected(xprt);
-
rpcrdma_ep_destroy(&r_xprt->rx_ep, &r_xprt->rx_ia);
rpcrdma_buffer_destroy(&r_xprt->rx_buf);
rpcrdma_ia_close(&r_xprt->rx_ia);
}
/**
- * xprt_rdma_close - Close down RDMA connection
- * @xprt: generic transport to be closed
+ * xprt_rdma_close - close a transport connection
+ * @xprt: transport context
*
- * Called during transport shutdown reconnect, or device
- * removal. Caller holds the transport's write lock.
+ * Called during transport shutdown, reconnect, or device removal.
+ * Caller holds @xprt's send lock to prevent activity on this
+ * transport while the connection is torn down.
*/
static void
xprt_rdma_close(struct rpc_xprt *xprt)
xprt->reestablish_timeout = 0;
xprt_disconnect_done(xprt);
rpcrdma_ep_disconnect(ep, ia);
+
+ /* Prepare @xprt for the next connection by reinitializing
+ * its credit grant to one (see RFC 8166, Section 3.3.3).
+ */
+ r_xprt->rx_buf.rb_credits = 1;
+ xprt->cwnd = RPC_CWNDSHIFT;
}
/**
xprt_force_disconnect(xprt);
}
+/**
+ * xprt_rdma_connect - try to establish a transport connection
+ * @xprt: transport state
+ * @task: RPC scheduler context
+ *
+ */
static void
xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
{
* 0: Success; rq_buffer points to RPC buffer to use
* ENOMEM: Out of memory, call again later
* EIO: A permanent error occurred, do not retry
- *
- * The RDMA allocate/free functions need the task structure as a place
- * to hide the struct rpcrdma_req, which is necessary for the actual
- * send/recv sequence.
- *
- * xprt_rdma_allocate provides buffers that are already mapped for
- * DMA, and a local DMA lkey is provided for each.
*/
static int
xprt_rdma_allocate(struct rpc_task *task)
/**
* xprt_rdma_send_request - marshal and send an RPC request
- * @task: RPC task with an RPC message in rq_snd_buf
+ * @rqst: RPC message in rq_snd_buf
*
* Caller holds the transport's write lock.
*
* sent. Do not try to send this message again.
*/
static int
-xprt_rdma_send_request(struct rpc_task *task)
+xprt_rdma_send_request(struct rpc_rqst *rqst)
{
- struct rpc_rqst *rqst = task->tk_rqstp;
struct rpc_xprt *xprt = rqst->rq_xprt;
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
if (!xprt_connected(xprt))
goto drop_connection;
+ if (!xprt_request_get_cong(xprt, rqst))
+ return -EBADSLT;
+
rc = rpcrdma_marshal_req(r_xprt, rqst);
if (rc < 0)
goto failed_marshal;
/* An RPC with no reply will throw off credit accounting,
* so drop the connection to reset the credit grant.
*/
- if (!rpc_reply_expected(task))
+ if (!rpc_reply_expected(rqst->rq_task))
goto drop_connection;
return 0;
0, /* need a local port? */
xprt->stat.bind_count,
xprt->stat.connect_count,
- xprt->stat.connect_time,
+ xprt->stat.connect_time / HZ,
idle_time,
xprt->stat.sends,
xprt->stat.recvs,
r_xprt->rx_stats.bad_reply_count,
r_xprt->rx_stats.nomsg_call_count);
seq_printf(seq, "%lu %lu %lu %lu %lu %lu\n",
- r_xprt->rx_stats.mrs_recovered,
+ r_xprt->rx_stats.mrs_recycled,
r_xprt->rx_stats.mrs_orphaned,
r_xprt->rx_stats.mrs_allocated,
r_xprt->rx_stats.local_inv_needed,
}
}
+/**
+ * rpcrdma_disconnect_worker - Force a disconnect
+ * @work: endpoint to be disconnected
+ *
+ * Provider callbacks can possibly run in an IRQ context. This function
+ * is invoked in a worker thread to guarantee that disconnect wake-up
+ * calls are always done in process context.
+ */
+static void
+rpcrdma_disconnect_worker(struct work_struct *work)
+{
+ struct rpcrdma_ep *ep = container_of(work, struct rpcrdma_ep,
+ rep_disconnect_worker.work);
+ struct rpcrdma_xprt *r_xprt =
+ container_of(ep, struct rpcrdma_xprt, rx_ep);
+
+ xprt_force_disconnect(&r_xprt->rx_xprt);
+}
+
+/**
+ * rpcrdma_qp_event_handler - Handle one QP event (error notification)
+ * @event: details of the event
+ * @context: ep that owns QP where event occurred
+ *
+ * Called from the RDMA provider (device driver) possibly in an interrupt
+ * context.
+ */
static void
-rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context)
+rpcrdma_qp_event_handler(struct ib_event *event, void *context)
{
struct rpcrdma_ep *ep = context;
struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt,
rx_ep);
- trace_xprtrdma_qp_error(r_xprt, event);
- pr_err("rpcrdma: %s on device %s ep %p\n",
- ib_event_msg(event->event), event->device->name, context);
+ trace_xprtrdma_qp_event(r_xprt, event);
+ pr_err("rpcrdma: %s on device %s connected to %s:%s\n",
+ ib_event_msg(event->event), event->device->name,
+ rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt));
if (ep->rep_connected == 1) {
ep->rep_connected = -EIO;
- rpcrdma_conn_func(ep);
+ schedule_delayed_work(&ep->rep_disconnect_worker, 0);
wake_up_all(&ep->rep_connect_wait);
}
}
rpcrdma_set_max_header_sizes(r_xprt);
}
+/**
+ * rpcrdma_cm_event_handler - Handle RDMA CM events
+ * @id: rdma_cm_id on which an event has occurred
+ * @event: details of the event
+ *
+ * Called with @id's mutex held. Returns 1 if caller should
+ * destroy @id, otherwise 0.
+ */
static int
-rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
+rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
{
- struct rpcrdma_xprt *xprt = id->context;
- struct rpcrdma_ia *ia = &xprt->rx_ia;
- struct rpcrdma_ep *ep = &xprt->rx_ep;
- int connstate = 0;
+ struct rpcrdma_xprt *r_xprt = id->context;
+ struct rpcrdma_ia *ia = &r_xprt->rx_ia;
+ struct rpcrdma_ep *ep = &r_xprt->rx_ep;
+ struct rpc_xprt *xprt = &r_xprt->rx_xprt;
+
+ might_sleep();
- trace_xprtrdma_conn_upcall(xprt, event);
+ trace_xprtrdma_cm_event(r_xprt, event);
switch (event->event) {
case RDMA_CM_EVENT_ADDR_RESOLVED:
case RDMA_CM_EVENT_ROUTE_RESOLVED:
ia->ri_async_rc = 0;
complete(&ia->ri_done);
- break;
+ return 0;
case RDMA_CM_EVENT_ADDR_ERROR:
ia->ri_async_rc = -EPROTO;
complete(&ia->ri_done);
- break;
+ return 0;
case RDMA_CM_EVENT_ROUTE_ERROR:
ia->ri_async_rc = -ENETUNREACH;
complete(&ia->ri_done);
- break;
+ return 0;
case RDMA_CM_EVENT_DEVICE_REMOVAL:
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
pr_info("rpcrdma: removing device %s for %s:%s\n",
ia->ri_device->name,
- rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt));
+ rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt));
#endif
set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
ep->rep_connected = -ENODEV;
- xprt_force_disconnect(&xprt->rx_xprt);
+ xprt_force_disconnect(xprt);
wait_for_completion(&ia->ri_remove_done);
ia->ri_id = NULL;
/* Return 1 to ensure the core destroys the id. */
return 1;
case RDMA_CM_EVENT_ESTABLISHED:
- ++xprt->rx_xprt.connect_cookie;
- connstate = 1;
- rpcrdma_update_connect_private(xprt, &event->param.conn);
- goto connected;
+ ++xprt->connect_cookie;
+ ep->rep_connected = 1;
+ rpcrdma_update_connect_private(r_xprt, &event->param.conn);
+ wake_up_all(&ep->rep_connect_wait);
+ break;
case RDMA_CM_EVENT_CONNECT_ERROR:
- connstate = -ENOTCONN;
- goto connected;
+ ep->rep_connected = -ENOTCONN;
+ goto disconnected;
case RDMA_CM_EVENT_UNREACHABLE:
- connstate = -ENETUNREACH;
- goto connected;
+ ep->rep_connected = -ENETUNREACH;
+ goto disconnected;
case RDMA_CM_EVENT_REJECTED:
dprintk("rpcrdma: connection to %s:%s rejected: %s\n",
- rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt),
+ rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt),
rdma_reject_msg(id, event->status));
- connstate = -ECONNREFUSED;
+ ep->rep_connected = -ECONNREFUSED;
if (event->status == IB_CM_REJ_STALE_CONN)
- connstate = -EAGAIN;
- goto connected;
+ ep->rep_connected = -EAGAIN;
+ goto disconnected;
case RDMA_CM_EVENT_DISCONNECTED:
- ++xprt->rx_xprt.connect_cookie;
- connstate = -ECONNABORTED;
-connected:
- ep->rep_connected = connstate;
- rpcrdma_conn_func(ep);
+ ++xprt->connect_cookie;
+ ep->rep_connected = -ECONNABORTED;
+disconnected:
+ xprt_force_disconnect(xprt);
wake_up_all(&ep->rep_connect_wait);
- /*FALLTHROUGH*/
+ break;
default:
- dprintk("RPC: %s: %s:%s on %s/%s (ep 0x%p): %s\n",
- __func__,
- rpcrdma_addrstr(xprt), rpcrdma_portstr(xprt),
- ia->ri_device->name, ia->ri_ops->ro_displayname,
- ep, rdma_event_msg(event->event));
break;
}
+ dprintk("RPC: %s: %s:%s on %s/%s: %s\n", __func__,
+ rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt),
+ ia->ri_device->name, ia->ri_ops->ro_displayname,
+ rdma_event_msg(event->event));
return 0;
}
init_completion(&ia->ri_done);
init_completion(&ia->ri_remove_done);
- id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_conn_upcall,
+ id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_cm_event_handler,
xprt, RDMA_PS_TCP, IB_QPT_RC);
if (IS_ERR(id)) {
rc = PTR_ERR(id);
if (rc)
return rc;
- ep->rep_attr.event_handler = rpcrdma_qp_async_error_upcall;
+ ep->rep_attr.event_handler = rpcrdma_qp_event_handler;
ep->rep_attr.qp_context = ep;
ep->rep_attr.srq = NULL;
ep->rep_attr.cap.max_send_sge = max_sge;
cdata->max_requests >> 2);
ep->rep_send_count = ep->rep_send_batch;
init_waitqueue_head(&ep->rep_connect_wait);
- INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker);
+ INIT_DELAYED_WORK(&ep->rep_disconnect_worker,
+ rpcrdma_disconnect_worker);
sendcq = ib_alloc_cq(ia->ri_device, NULL,
ep->rep_attr.cap.max_send_wr + 1,
void
rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
{
- cancel_delayed_work_sync(&ep->rep_connect_worker);
+ cancel_delayed_work_sync(&ep->rep_disconnect_worker);
if (ia->ri_id && ia->ri_id->qp) {
rpcrdma_ep_disconnect(ep, ia);
{
struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
rx_ia);
+ struct rpc_xprt *xprt = &r_xprt->rx_xprt;
int rc;
retry:
}
ep->rep_connected = 0;
+ xprt_clear_connected(xprt);
+
rpcrdma_post_recvs(r_xprt, true);
rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma);
sc->sc_xprt = r_xprt;
buf->rb_sc_ctxs[i] = sc;
}
- buf->rb_flags = 0;
return 0;
}
}
-static void
-rpcrdma_mr_recovery_worker(struct work_struct *work)
-{
- struct rpcrdma_buffer *buf = container_of(work, struct rpcrdma_buffer,
- rb_recovery_worker.work);
- struct rpcrdma_mr *mr;
-
- spin_lock(&buf->rb_recovery_lock);
- while (!list_empty(&buf->rb_stale_mrs)) {
- mr = rpcrdma_mr_pop(&buf->rb_stale_mrs);
- spin_unlock(&buf->rb_recovery_lock);
-
- trace_xprtrdma_recover_mr(mr);
- mr->mr_xprt->rx_ia.ri_ops->ro_recover_mr(mr);
-
- spin_lock(&buf->rb_recovery_lock);
- }
- spin_unlock(&buf->rb_recovery_lock);
-}
-
-void
-rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr)
-{
- struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
- struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
-
- spin_lock(&buf->rb_recovery_lock);
- rpcrdma_mr_push(mr, &buf->rb_stale_mrs);
- spin_unlock(&buf->rb_recovery_lock);
-
- schedule_delayed_work(&buf->rb_recovery_worker, 0);
-}
-
static void
rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt)
{
LIST_HEAD(free);
LIST_HEAD(all);
- for (count = 0; count < 3; count++) {
+ for (count = 0; count < ia->ri_max_segs; count++) {
struct rpcrdma_mr *mr;
int rc;
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
int i, rc;
+ buf->rb_flags = 0;
buf->rb_max_requests = r_xprt->rx_data.max_requests;
buf->rb_bc_srv_max_requests = 0;
spin_lock_init(&buf->rb_mrlock);
spin_lock_init(&buf->rb_lock);
- spin_lock_init(&buf->rb_recovery_lock);
INIT_LIST_HEAD(&buf->rb_mrs);
INIT_LIST_HEAD(&buf->rb_all);
- INIT_LIST_HEAD(&buf->rb_stale_mrs);
INIT_DELAYED_WORK(&buf->rb_refresh_worker,
rpcrdma_mr_refresh_worker);
- INIT_DELAYED_WORK(&buf->rb_recovery_worker,
- rpcrdma_mr_recovery_worker);
rpcrdma_mrs_create(r_xprt);
void
rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
{
- cancel_delayed_work_sync(&buf->rb_recovery_worker);
cancel_delayed_work_sync(&buf->rb_refresh_worker);
rpcrdma_sendctxs_destroy(buf);
{
struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
- trace_xprtrdma_dma_unmap(mr);
+ trace_xprtrdma_mr_unmap(mr);
ib_dma_unmap_sg(r_xprt->rx_ia.ri_device,
mr->mr_sg, mr->mr_nents, mr->mr_dir);
__rpcrdma_mr_put(&r_xprt->rx_buf, mr);
struct ib_recv_wr *wr, *bad_wr;
int needed, count, rc;
+ rc = 0;
+ count = 0;
needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1);
if (buf->rb_posted_receives > needed)
- return;
+ goto out;
needed -= buf->rb_posted_receives;
count = 0;
--needed;
}
if (!count)
- return;
+ goto out;
rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr,
(const struct ib_recv_wr **)&bad_wr);
}
}
buf->rb_posted_receives += count;
+out:
trace_xprtrdma_post_recvs(r_xprt, count, rc);
}
wait_queue_head_t rep_connect_wait;
struct rpcrdma_connect_private rep_cm_private;
struct rdma_conn_param rep_remote_cma;
- struct delayed_work rep_connect_worker;
+ struct delayed_work rep_disconnect_worker;
};
/* Pre-allocate extra Work Requests for handling backward receives
u32 mr_handle;
u32 mr_length;
u64 mr_offset;
+ struct work_struct mr_recycle;
struct list_head mr_all;
};
u32 rb_bc_max_requests;
- spinlock_t rb_recovery_lock; /* protect rb_stale_mrs */
- struct list_head rb_stale_mrs;
- struct delayed_work rb_recovery_worker;
struct delayed_work rb_refresh_worker;
};
#define rdmab_to_ia(b) (&container_of((b), struct rpcrdma_xprt, rx_buf)->rx_ia)
unsigned long hardway_register_count;
unsigned long failed_marshal_count;
unsigned long bad_reply_count;
- unsigned long mrs_recovered;
+ unsigned long mrs_recycled;
unsigned long mrs_orphaned;
unsigned long mrs_allocated;
unsigned long empty_sendctx_q;
struct list_head *mrs);
void (*ro_unmap_sync)(struct rpcrdma_xprt *,
struct list_head *);
- void (*ro_recover_mr)(struct rpcrdma_mr *mr);
int (*ro_open)(struct rpcrdma_ia *,
struct rpcrdma_ep *,
struct rpcrdma_create_data_internal *);
struct rpcrdma_create_data_internal *);
void rpcrdma_ep_destroy(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *);
-void rpcrdma_conn_func(struct rpcrdma_ep *ep);
void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *);
int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *,
struct rpcrdma_mr *rpcrdma_mr_get(struct rpcrdma_xprt *r_xprt);
void rpcrdma_mr_put(struct rpcrdma_mr *mr);
void rpcrdma_mr_unmap_and_put(struct rpcrdma_mr *mr);
-void rpcrdma_mr_defer_recovery(struct rpcrdma_mr *mr);
+
+static inline void
+rpcrdma_mr_recycle(struct rpcrdma_mr *mr)
+{
+ schedule_work(&mr->mr_recycle);
+}
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
void rpcrdma_buffer_put(struct rpcrdma_req *);
extern unsigned int xprt_rdma_max_inline_read;
void xprt_rdma_format_addresses(struct rpc_xprt *xprt, struct sockaddr *sap);
void xprt_rdma_free_addresses(struct rpc_xprt *xprt);
-void rpcrdma_connect_worker(struct work_struct *work);
void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq);
int xprt_rdma_init(void);
void xprt_rdma_cleanup(void);
#include <net/checksum.h>
#include <net/udp.h>
#include <net/tcp.h>
+#include <linux/bvec.h>
+#include <linux/uio.h>
#include <trace/events/sunrpc.h>
#include "sunrpc.h"
-#define RPC_TCP_READ_CHUNK_SZ (3*512*1024)
-
static void xs_close(struct rpc_xprt *xprt);
static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
struct socket *sock);
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &xprt_min_resvport_limit,
- .extra2 = &xprt_max_resvport
+ .extra2 = &xprt_max_resvport_limit
},
{
.procname = "max_resvport",
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &xprt_min_resvport,
+ .extra1 = &xprt_min_resvport_limit,
.extra2 = &xprt_max_resvport_limit
},
{
}
}
+static size_t
+xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp)
+{
+ size_t i,n;
+
+ if (!(buf->flags & XDRBUF_SPARSE_PAGES))
+ return want;
+ if (want > buf->page_len)
+ want = buf->page_len;
+ n = (buf->page_base + want + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ for (i = 0; i < n; i++) {
+ if (buf->pages[i])
+ continue;
+ buf->bvec[i].bv_page = buf->pages[i] = alloc_page(gfp);
+ if (!buf->pages[i]) {
+ buf->page_len = (i * PAGE_SIZE) - buf->page_base;
+ return buf->page_len;
+ }
+ }
+ return want;
+}
+
+static ssize_t
+xs_sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags, size_t seek)
+{
+ ssize_t ret;
+ if (seek != 0)
+ iov_iter_advance(&msg->msg_iter, seek);
+ ret = sock_recvmsg(sock, msg, flags);
+ return ret > 0 ? ret + seek : ret;
+}
+
+static ssize_t
+xs_read_kvec(struct socket *sock, struct msghdr *msg, int flags,
+ struct kvec *kvec, size_t count, size_t seek)
+{
+ iov_iter_kvec(&msg->msg_iter, READ | ITER_KVEC, kvec, 1, count);
+ return xs_sock_recvmsg(sock, msg, flags, seek);
+}
+
+static ssize_t
+xs_read_bvec(struct socket *sock, struct msghdr *msg, int flags,
+ struct bio_vec *bvec, unsigned long nr, size_t count,
+ size_t seek)
+{
+ iov_iter_bvec(&msg->msg_iter, READ | ITER_BVEC, bvec, nr, count);
+ return xs_sock_recvmsg(sock, msg, flags, seek);
+}
+
+static ssize_t
+xs_read_discard(struct socket *sock, struct msghdr *msg, int flags,
+ size_t count)
+{
+ struct kvec kvec = { 0 };
+ return xs_read_kvec(sock, msg, flags | MSG_TRUNC, &kvec, count, 0);
+}
+
+static ssize_t
+xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags,
+ struct xdr_buf *buf, size_t count, size_t seek, size_t *read)
+{
+ size_t want, seek_init = seek, offset = 0;
+ ssize_t ret;
+
+ if (seek < buf->head[0].iov_len) {
+ want = min_t(size_t, count, buf->head[0].iov_len);
+ ret = xs_read_kvec(sock, msg, flags, &buf->head[0], want, seek);
+ if (ret <= 0)
+ goto sock_err;
+ offset += ret;
+ if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+ goto out;
+ if (ret != want)
+ goto eagain;
+ seek = 0;
+ } else {
+ seek -= buf->head[0].iov_len;
+ offset += buf->head[0].iov_len;
+ }
+ if (seek < buf->page_len) {
+ want = xs_alloc_sparse_pages(buf,
+ min_t(size_t, count - offset, buf->page_len),
+ GFP_NOWAIT);
+ ret = xs_read_bvec(sock, msg, flags, buf->bvec,
+ xdr_buf_pagecount(buf),
+ want + buf->page_base,
+ seek + buf->page_base);
+ if (ret <= 0)
+ goto sock_err;
+ offset += ret - buf->page_base;
+ if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+ goto out;
+ if (ret != want)
+ goto eagain;
+ seek = 0;
+ } else {
+ seek -= buf->page_len;
+ offset += buf->page_len;
+ }
+ if (seek < buf->tail[0].iov_len) {
+ want = min_t(size_t, count - offset, buf->tail[0].iov_len);
+ ret = xs_read_kvec(sock, msg, flags, &buf->tail[0], want, seek);
+ if (ret <= 0)
+ goto sock_err;
+ offset += ret;
+ if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+ goto out;
+ if (ret != want)
+ goto eagain;
+ } else
+ offset += buf->tail[0].iov_len;
+ ret = -EMSGSIZE;
+ msg->msg_flags |= MSG_TRUNC;
+out:
+ *read = offset - seek_init;
+ return ret;
+eagain:
+ ret = -EAGAIN;
+ goto out;
+sock_err:
+ offset += seek;
+ goto out;
+}
+
+static void
+xs_read_header(struct sock_xprt *transport, struct xdr_buf *buf)
+{
+ if (!transport->recv.copied) {
+ if (buf->head[0].iov_len >= transport->recv.offset)
+ memcpy(buf->head[0].iov_base,
+ &transport->recv.xid,
+ transport->recv.offset);
+ transport->recv.copied = transport->recv.offset;
+ }
+}
+
+static bool
+xs_read_stream_request_done(struct sock_xprt *transport)
+{
+ return transport->recv.fraghdr & cpu_to_be32(RPC_LAST_STREAM_FRAGMENT);
+}
+
+static ssize_t
+xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg,
+ int flags, struct rpc_rqst *req)
+{
+ struct xdr_buf *buf = &req->rq_private_buf;
+ size_t want, read;
+ ssize_t ret;
+
+ xs_read_header(transport, buf);
+
+ want = transport->recv.len - transport->recv.offset;
+ ret = xs_read_xdr_buf(transport->sock, msg, flags, buf,
+ transport->recv.copied + want, transport->recv.copied,
+ &read);
+ transport->recv.offset += read;
+ transport->recv.copied += read;
+ if (transport->recv.offset == transport->recv.len) {
+ if (xs_read_stream_request_done(transport))
+ msg->msg_flags |= MSG_EOR;
+ return transport->recv.copied;
+ }
+
+ switch (ret) {
+ case -EMSGSIZE:
+ return transport->recv.copied;
+ case 0:
+ return -ESHUTDOWN;
+ default:
+ if (ret < 0)
+ return ret;
+ }
+ return -EAGAIN;
+}
+
+static size_t
+xs_read_stream_headersize(bool isfrag)
+{
+ if (isfrag)
+ return sizeof(__be32);
+ return 3 * sizeof(__be32);
+}
+
+static ssize_t
+xs_read_stream_header(struct sock_xprt *transport, struct msghdr *msg,
+ int flags, size_t want, size_t seek)
+{
+ struct kvec kvec = {
+ .iov_base = &transport->recv.fraghdr,
+ .iov_len = want,
+ };
+ return xs_read_kvec(transport->sock, msg, flags, &kvec, want, seek);
+}
+
+#if defined(CONFIG_SUNRPC_BACKCHANNEL)
+static ssize_t
+xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
+{
+ struct rpc_xprt *xprt = &transport->xprt;
+ struct rpc_rqst *req;
+ ssize_t ret;
+
+ /* Look up and lock the request corresponding to the given XID */
+ req = xprt_lookup_bc_request(xprt, transport->recv.xid);
+ if (!req) {
+ printk(KERN_WARNING "Callback slot table overflowed\n");
+ return -ESHUTDOWN;
+ }
+
+ ret = xs_read_stream_request(transport, msg, flags, req);
+ if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+ xprt_complete_bc_request(req, ret);
+
+ return ret;
+}
+#else /* CONFIG_SUNRPC_BACKCHANNEL */
+static ssize_t
+xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags)
+{
+ return -ESHUTDOWN;
+}
+#endif /* CONFIG_SUNRPC_BACKCHANNEL */
+
+static ssize_t
+xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags)
+{
+ struct rpc_xprt *xprt = &transport->xprt;
+ struct rpc_rqst *req;
+ ssize_t ret = 0;
+
+ /* Look up and lock the request corresponding to the given XID */
+ spin_lock(&xprt->queue_lock);
+ req = xprt_lookup_rqst(xprt, transport->recv.xid);
+ if (!req) {
+ msg->msg_flags |= MSG_TRUNC;
+ goto out;
+ }
+ xprt_pin_rqst(req);
+ spin_unlock(&xprt->queue_lock);
+
+ ret = xs_read_stream_request(transport, msg, flags, req);
+
+ spin_lock(&xprt->queue_lock);
+ if (msg->msg_flags & (MSG_EOR|MSG_TRUNC))
+ xprt_complete_rqst(req->rq_task, ret);
+ xprt_unpin_rqst(req);
+out:
+ spin_unlock(&xprt->queue_lock);
+ return ret;
+}
+
+static ssize_t
+xs_read_stream(struct sock_xprt *transport, int flags)
+{
+ struct msghdr msg = { 0 };
+ size_t want, read = 0;
+ ssize_t ret = 0;
+
+ if (transport->recv.len == 0) {
+ want = xs_read_stream_headersize(transport->recv.copied != 0);
+ ret = xs_read_stream_header(transport, &msg, flags, want,
+ transport->recv.offset);
+ if (ret <= 0)
+ goto out_err;
+ transport->recv.offset = ret;
+ if (ret != want) {
+ ret = -EAGAIN;
+ goto out_err;
+ }
+ transport->recv.len = be32_to_cpu(transport->recv.fraghdr) &
+ RPC_FRAGMENT_SIZE_MASK;
+ transport->recv.offset -= sizeof(transport->recv.fraghdr);
+ read = ret;
+ }
+
+ switch (be32_to_cpu(transport->recv.calldir)) {
+ case RPC_CALL:
+ ret = xs_read_stream_call(transport, &msg, flags);
+ break;
+ case RPC_REPLY:
+ ret = xs_read_stream_reply(transport, &msg, flags);
+ }
+ if (msg.msg_flags & MSG_TRUNC) {
+ transport->recv.calldir = cpu_to_be32(-1);
+ transport->recv.copied = -1;
+ }
+ if (ret < 0)
+ goto out_err;
+ read += ret;
+ if (transport->recv.offset < transport->recv.len) {
+ ret = xs_read_discard(transport->sock, &msg, flags,
+ transport->recv.len - transport->recv.offset);
+ if (ret <= 0)
+ goto out_err;
+ transport->recv.offset += ret;
+ read += ret;
+ if (transport->recv.offset != transport->recv.len)
+ return -EAGAIN;
+ }
+ if (xs_read_stream_request_done(transport)) {
+ trace_xs_stream_read_request(transport);
+ transport->recv.copied = 0;
+ }
+ transport->recv.offset = 0;
+ transport->recv.len = 0;
+ return read;
+out_err:
+ switch (ret) {
+ case 0:
+ case -ESHUTDOWN:
+ xprt_force_disconnect(&transport->xprt);
+ return -ESHUTDOWN;
+ }
+ return ret;
+}
+
+static void xs_stream_data_receive(struct sock_xprt *transport)
+{
+ size_t read = 0;
+ ssize_t ret = 0;
+
+ mutex_lock(&transport->recv_mutex);
+ if (transport->sock == NULL)
+ goto out;
+ clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
+ for (;;) {
+ ret = xs_read_stream(transport, MSG_DONTWAIT);
+ if (ret <= 0)
+ break;
+ read += ret;
+ cond_resched();
+ }
+out:
+ mutex_unlock(&transport->recv_mutex);
+ trace_xs_stream_read_data(&transport->xprt, ret, read);
+}
+
+static void xs_stream_data_receive_workfn(struct work_struct *work)
+{
+ struct sock_xprt *transport =
+ container_of(work, struct sock_xprt, recv_worker);
+ xs_stream_data_receive(transport);
+}
+
+static void
+xs_stream_reset_connect(struct sock_xprt *transport)
+{
+ transport->recv.offset = 0;
+ transport->recv.len = 0;
+ transport->recv.copied = 0;
+ transport->xmit.offset = 0;
+ transport->xprt.stat.connect_count++;
+ transport->xprt.stat.connect_start = jiffies;
+}
+
#define XS_SENDMSG_FLAGS (MSG_DONTWAIT | MSG_NOSIGNAL)
static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more)
return err;
}
-static void xs_nospace_callback(struct rpc_task *task)
-{
- struct sock_xprt *transport = container_of(task->tk_rqstp->rq_xprt, struct sock_xprt, xprt);
-
- transport->inet->sk_write_pending--;
-}
-
/**
- * xs_nospace - place task on wait queue if transmit was incomplete
- * @task: task to put to sleep
+ * xs_nospace - handle transmit was incomplete
+ * @req: pointer to RPC request
*
*/
-static int xs_nospace(struct rpc_task *task)
+static int xs_nospace(struct rpc_rqst *req)
{
- struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct sock *sk = transport->inet;
int ret = -EAGAIN;
dprintk("RPC: %5u xmit incomplete (%u left of %u)\n",
- task->tk_pid, req->rq_slen - req->rq_bytes_sent,
+ req->rq_task->tk_pid,
+ req->rq_slen - transport->xmit.offset,
req->rq_slen);
/* Protect against races with write_space */
if (xprt_connected(xprt)) {
/* wait for more buffer space */
sk->sk_write_pending++;
- xprt_wait_for_buffer_space(task, xs_nospace_callback);
+ xprt_wait_for_buffer_space(xprt);
} else
ret = -ENOTCONN;
return ret;
}
+static void
+xs_stream_prepare_request(struct rpc_rqst *req)
+{
+ req->rq_task->tk_status = xdr_alloc_bvec(&req->rq_rcv_buf, GFP_NOIO);
+}
+
+/*
+ * Determine if the previous message in the stream was aborted before it
+ * could complete transmission.
+ */
+static bool
+xs_send_request_was_aborted(struct sock_xprt *transport, struct rpc_rqst *req)
+{
+ return transport->xmit.offset != 0 && req->rq_bytes_sent == 0;
+}
+
/*
* Construct a stream transport record marker in @buf.
*/
/**
* xs_local_send_request - write an RPC request to an AF_LOCAL socket
- * @task: RPC task that manages the state of an RPC request
+ * @req: pointer to RPC request
*
* Return values:
* 0: The request has been sent
* ENOTCONN: Caller needs to invoke connect logic then call again
* other: Some other error occured, the request was not sent
*/
-static int xs_local_send_request(struct rpc_task *task)
+static int xs_local_send_request(struct rpc_rqst *req)
{
- struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport =
container_of(xprt, struct sock_xprt, xprt);
int status;
int sent = 0;
+ /* Close the stream if the previous transmission was incomplete */
+ if (xs_send_request_was_aborted(transport, req)) {
+ xs_close(xprt);
+ return -ENOTCONN;
+ }
+
xs_encode_stream_record_marker(&req->rq_snd_buf);
xs_pktdump("packet data:",
req->rq_svec->iov_base, req->rq_svec->iov_len);
req->rq_xtime = ktime_get();
- status = xs_sendpages(transport->sock, NULL, 0, xdr, req->rq_bytes_sent,
+ status = xs_sendpages(transport->sock, NULL, 0, xdr,
+ transport->xmit.offset,
true, &sent);
dprintk("RPC: %s(%u) = %d\n",
- __func__, xdr->len - req->rq_bytes_sent, status);
+ __func__, xdr->len - transport->xmit.offset, status);
if (status == -EAGAIN && sock_writeable(transport->inet))
status = -ENOBUFS;
if (likely(sent > 0) || status == 0) {
- req->rq_bytes_sent += sent;
- req->rq_xmit_bytes_sent += sent;
+ transport->xmit.offset += sent;
+ req->rq_bytes_sent = transport->xmit.offset;
if (likely(req->rq_bytes_sent >= req->rq_slen)) {
+ req->rq_xmit_bytes_sent += transport->xmit.offset;
req->rq_bytes_sent = 0;
+ transport->xmit.offset = 0;
return 0;
}
status = -EAGAIN;
case -ENOBUFS:
break;
case -EAGAIN:
- status = xs_nospace(task);
+ status = xs_nospace(req);
break;
default:
dprintk("RPC: sendmsg returned unrecognized error %d\n",
/**
* xs_udp_send_request - write an RPC request to a UDP socket
- * @task: address of RPC task that manages the state of an RPC request
+ * @req: pointer to RPC request
*
* Return values:
* 0: The request has been sent
* ENOTCONN: Caller needs to invoke connect logic then call again
* other: Some other error occurred, the request was not sent
*/
-static int xs_udp_send_request(struct rpc_task *task)
+static int xs_udp_send_request(struct rpc_rqst *req)
{
- struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct xdr_buf *xdr = &req->rq_snd_buf;
if (!xprt_bound(xprt))
return -ENOTCONN;
+
+ if (!xprt_request_get_cong(xprt, req))
+ return -EBADSLT;
+
req->rq_xtime = ktime_get();
status = xs_sendpages(transport->sock, xs_addr(xprt), xprt->addrlen,
- xdr, req->rq_bytes_sent, true, &sent);
+ xdr, 0, true, &sent);
dprintk("RPC: xs_udp_send_request(%u) = %d\n",
- xdr->len - req->rq_bytes_sent, status);
+ xdr->len, status);
/* firewall is blocking us, don't return -EAGAIN or we end up looping */
if (status == -EPERM)
/* Should we call xs_close() here? */
break;
case -EAGAIN:
- status = xs_nospace(task);
+ status = xs_nospace(req);
break;
case -ENETUNREACH:
case -ENOBUFS:
/**
* xs_tcp_send_request - write an RPC request to a TCP socket
- * @task: address of RPC task that manages the state of an RPC request
+ * @req: pointer to RPC request
*
* Return values:
* 0: The request has been sent
* XXX: In the case of soft timeouts, should we eventually give up
* if sendmsg is not able to make progress?
*/
-static int xs_tcp_send_request(struct rpc_task *task)
+static int xs_tcp_send_request(struct rpc_rqst *req)
{
- struct rpc_rqst *req = task->tk_rqstp;
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct xdr_buf *xdr = &req->rq_snd_buf;
int status;
int sent;
+ /* Close the stream if the previous transmission was incomplete */
+ if (xs_send_request_was_aborted(transport, req)) {
+ if (transport->sock != NULL)
+ kernel_sock_shutdown(transport->sock, SHUT_RDWR);
+ return -ENOTCONN;
+ }
+
xs_encode_stream_record_marker(&req->rq_snd_buf);
xs_pktdump("packet data:",
* completes while the socket holds a reference to the pages,
* then we may end up resending corrupted data.
*/
- if (task->tk_flags & RPC_TASK_SENT)
+ if (req->rq_task->tk_flags & RPC_TASK_SENT)
zerocopy = false;
if (test_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state))
while (1) {
sent = 0;
status = xs_sendpages(transport->sock, NULL, 0, xdr,
- req->rq_bytes_sent, zerocopy, &sent);
+ transport->xmit.offset,
+ zerocopy, &sent);
dprintk("RPC: xs_tcp_send_request(%u) = %d\n",
- xdr->len - req->rq_bytes_sent, status);
+ xdr->len - transport->xmit.offset, status);
/* If we've sent the entire packet, immediately
* reset the count of bytes sent. */
- req->rq_bytes_sent += sent;
- req->rq_xmit_bytes_sent += sent;
+ transport->xmit.offset += sent;
+ req->rq_bytes_sent = transport->xmit.offset;
if (likely(req->rq_bytes_sent >= req->rq_slen)) {
+ req->rq_xmit_bytes_sent += transport->xmit.offset;
req->rq_bytes_sent = 0;
+ transport->xmit.offset = 0;
return 0;
}
/* Should we call xs_close() here? */
break;
case -EAGAIN:
- status = xs_nospace(task);
+ status = xs_nospace(req);
break;
case -ECONNRESET:
case -ECONNREFUSED:
return status;
}
-/**
- * xs_tcp_release_xprt - clean up after a tcp transmission
- * @xprt: transport
- * @task: rpc task
- *
- * This cleans up if an error causes us to abort the transmission of a request.
- * In this case, the socket may need to be reset in order to avoid confusing
- * the server.
- */
-static void xs_tcp_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
-{
- struct rpc_rqst *req;
-
- if (task != xprt->snd_task)
- return;
- if (task == NULL)
- goto out_release;
- req = task->tk_rqstp;
- if (req == NULL)
- goto out_release;
- if (req->rq_bytes_sent == 0)
- goto out_release;
- if (req->rq_bytes_sent == req->rq_snd_buf.len)
- goto out_release;
- set_bit(XPRT_CLOSE_WAIT, &xprt->state);
-out_release:
- xprt_release_xprt(xprt, task);
-}
-
static void xs_save_old_callbacks(struct sock_xprt *transport, struct sock *sk)
{
transport->old_data_ready = sk->sk_data_ready;
module_put(THIS_MODULE);
}
-static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
-{
- struct xdr_skb_reader desc = {
- .skb = skb,
- .offset = sizeof(rpc_fraghdr),
- .count = skb->len - sizeof(rpc_fraghdr),
- };
-
- if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0)
- return -1;
- if (desc.count)
- return -1;
- return 0;
-}
-
-/**
- * xs_local_data_read_skb
- * @xprt: transport
- * @sk: socket
- * @skb: skbuff
- *
- * Currently this assumes we can read the whole reply in a single gulp.
- */
-static void xs_local_data_read_skb(struct rpc_xprt *xprt,
- struct sock *sk,
- struct sk_buff *skb)
-{
- struct rpc_task *task;
- struct rpc_rqst *rovr;
- int repsize, copied;
- u32 _xid;
- __be32 *xp;
-
- repsize = skb->len - sizeof(rpc_fraghdr);
- if (repsize < 4) {
- dprintk("RPC: impossible RPC reply size %d\n", repsize);
- return;
- }
-
- /* Copy the XID from the skb... */
- xp = skb_header_pointer(skb, sizeof(rpc_fraghdr), sizeof(_xid), &_xid);
- if (xp == NULL)
- return;
-
- /* Look up and lock the request corresponding to the given XID */
- spin_lock(&xprt->recv_lock);
- rovr = xprt_lookup_rqst(xprt, *xp);
- if (!rovr)
- goto out_unlock;
- xprt_pin_rqst(rovr);
- spin_unlock(&xprt->recv_lock);
- task = rovr->rq_task;
-
- copied = rovr->rq_private_buf.buflen;
- if (copied > repsize)
- copied = repsize;
-
- if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) {
- dprintk("RPC: sk_buff copy failed\n");
- spin_lock(&xprt->recv_lock);
- goto out_unpin;
- }
-
- spin_lock(&xprt->recv_lock);
- xprt_complete_rqst(task, copied);
-out_unpin:
- xprt_unpin_rqst(rovr);
- out_unlock:
- spin_unlock(&xprt->recv_lock);
-}
-
-static void xs_local_data_receive(struct sock_xprt *transport)
-{
- struct sk_buff *skb;
- struct sock *sk;
- int err;
-
-restart:
- mutex_lock(&transport->recv_mutex);
- sk = transport->inet;
- if (sk == NULL)
- goto out;
- for (;;) {
- skb = skb_recv_datagram(sk, 0, 1, &err);
- if (skb != NULL) {
- xs_local_data_read_skb(&transport->xprt, sk, skb);
- skb_free_datagram(sk, skb);
- continue;
- }
- if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
- break;
- if (need_resched()) {
- mutex_unlock(&transport->recv_mutex);
- cond_resched();
- goto restart;
- }
- }
-out:
- mutex_unlock(&transport->recv_mutex);
-}
-
-static void xs_local_data_receive_workfn(struct work_struct *work)
-{
- struct sock_xprt *transport =
- container_of(work, struct sock_xprt, recv_worker);
- xs_local_data_receive(transport);
-}
-
/**
* xs_udp_data_read_skb - receive callback for UDP sockets
* @xprt: transport
return;
/* Look up and lock the request corresponding to the given XID */
- spin_lock(&xprt->recv_lock);
+ spin_lock(&xprt->queue_lock);
rovr = xprt_lookup_rqst(xprt, *xp);
if (!rovr)
goto out_unlock;
xprt_pin_rqst(rovr);
xprt_update_rtt(rovr->rq_task);
- spin_unlock(&xprt->recv_lock);
+ spin_unlock(&xprt->queue_lock);
task = rovr->rq_task;
if ((copied = rovr->rq_private_buf.buflen) > repsize)
/* Suck it into the iovec, verify checksum if not done by hw. */
if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) {
- spin_lock(&xprt->recv_lock);
+ spin_lock(&xprt->queue_lock);
__UDPX_INC_STATS(sk, UDP_MIB_INERRORS);
goto out_unpin;
}
spin_lock_bh(&xprt->transport_lock);
xprt_adjust_cwnd(xprt, task, copied);
spin_unlock_bh(&xprt->transport_lock);
- spin_lock(&xprt->recv_lock);
+ spin_lock(&xprt->queue_lock);
xprt_complete_rqst(task, copied);
__UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS);
out_unpin:
xprt_unpin_rqst(rovr);
out_unlock:
- spin_unlock(&xprt->recv_lock);
+ spin_unlock(&xprt->queue_lock);
}
static void xs_udp_data_receive(struct sock_xprt *transport)
struct sock *sk;
int err;
-restart:
mutex_lock(&transport->recv_mutex);
sk = transport->inet;
if (sk == NULL)
goto out;
+ clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
for (;;) {
skb = skb_recv_udp(sk, 0, 1, &err);
- if (skb != NULL) {
- xs_udp_data_read_skb(&transport->xprt, sk, skb);
- consume_skb(skb);
- continue;
- }
- if (!test_and_clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
+ if (skb == NULL)
break;
- if (need_resched()) {
- mutex_unlock(&transport->recv_mutex);
- cond_resched();
- goto restart;
- }
+ xs_udp_data_read_skb(&transport->xprt, sk, skb);
+ consume_skb(skb);
+ cond_resched();
}
out:
mutex_unlock(&transport->recv_mutex);
xprt_force_disconnect(xprt);
}
-static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc)
-{
- struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
- size_t len, used;
- char *p;
-
- p = ((char *) &transport->tcp_fraghdr) + transport->tcp_offset;
- len = sizeof(transport->tcp_fraghdr) - transport->tcp_offset;
- used = xdr_skb_read_bits(desc, p, len);
- transport->tcp_offset += used;
- if (used != len)
- return;
-
- transport->tcp_reclen = ntohl(transport->tcp_fraghdr);
- if (transport->tcp_reclen & RPC_LAST_STREAM_FRAGMENT)
- transport->tcp_flags |= TCP_RCV_LAST_FRAG;
- else
- transport->tcp_flags &= ~TCP_RCV_LAST_FRAG;
- transport->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK;
-
- transport->tcp_flags &= ~TCP_RCV_COPY_FRAGHDR;
- transport->tcp_offset = 0;
-
- /* Sanity check of the record length */
- if (unlikely(transport->tcp_reclen < 8)) {
- dprintk("RPC: invalid TCP record fragment length\n");
- xs_tcp_force_close(xprt);
- return;
- }
- dprintk("RPC: reading TCP record fragment of length %d\n",
- transport->tcp_reclen);
-}
-
-static void xs_tcp_check_fraghdr(struct sock_xprt *transport)
-{
- if (transport->tcp_offset == transport->tcp_reclen) {
- transport->tcp_flags |= TCP_RCV_COPY_FRAGHDR;
- transport->tcp_offset = 0;
- if (transport->tcp_flags & TCP_RCV_LAST_FRAG) {
- transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
- transport->tcp_flags |= TCP_RCV_COPY_XID;
- transport->tcp_copied = 0;
- }
- }
-}
-
-static inline void xs_tcp_read_xid(struct sock_xprt *transport, struct xdr_skb_reader *desc)
-{
- size_t len, used;
- char *p;
-
- len = sizeof(transport->tcp_xid) - transport->tcp_offset;
- dprintk("RPC: reading XID (%zu bytes)\n", len);
- p = ((char *) &transport->tcp_xid) + transport->tcp_offset;
- used = xdr_skb_read_bits(desc, p, len);
- transport->tcp_offset += used;
- if (used != len)
- return;
- transport->tcp_flags &= ~TCP_RCV_COPY_XID;
- transport->tcp_flags |= TCP_RCV_READ_CALLDIR;
- transport->tcp_copied = 4;
- dprintk("RPC: reading %s XID %08x\n",
- (transport->tcp_flags & TCP_RPC_REPLY) ? "reply for"
- : "request with",
- ntohl(transport->tcp_xid));
- xs_tcp_check_fraghdr(transport);
-}
-
-static inline void xs_tcp_read_calldir(struct sock_xprt *transport,
- struct xdr_skb_reader *desc)
-{
- size_t len, used;
- u32 offset;
- char *p;
-
- /*
- * We want transport->tcp_offset to be 8 at the end of this routine
- * (4 bytes for the xid and 4 bytes for the call/reply flag).
- * When this function is called for the first time,
- * transport->tcp_offset is 4 (after having already read the xid).
- */
- offset = transport->tcp_offset - sizeof(transport->tcp_xid);
- len = sizeof(transport->tcp_calldir) - offset;
- dprintk("RPC: reading CALL/REPLY flag (%zu bytes)\n", len);
- p = ((char *) &transport->tcp_calldir) + offset;
- used = xdr_skb_read_bits(desc, p, len);
- transport->tcp_offset += used;
- if (used != len)
- return;
- transport->tcp_flags &= ~TCP_RCV_READ_CALLDIR;
- /*
- * We don't yet have the XDR buffer, so we will write the calldir
- * out after we get the buffer from the 'struct rpc_rqst'
- */
- switch (ntohl(transport->tcp_calldir)) {
- case RPC_REPLY:
- transport->tcp_flags |= TCP_RCV_COPY_CALLDIR;
- transport->tcp_flags |= TCP_RCV_COPY_DATA;
- transport->tcp_flags |= TCP_RPC_REPLY;
- break;
- case RPC_CALL:
- transport->tcp_flags |= TCP_RCV_COPY_CALLDIR;
- transport->tcp_flags |= TCP_RCV_COPY_DATA;
- transport->tcp_flags &= ~TCP_RPC_REPLY;
- break;
- default:
- dprintk("RPC: invalid request message type\n");
- xs_tcp_force_close(&transport->xprt);
- }
- xs_tcp_check_fraghdr(transport);
-}
-
-static inline void xs_tcp_read_common(struct rpc_xprt *xprt,
- struct xdr_skb_reader *desc,
- struct rpc_rqst *req)
-{
- struct sock_xprt *transport =
- container_of(xprt, struct sock_xprt, xprt);
- struct xdr_buf *rcvbuf;
- size_t len;
- ssize_t r;
-
- rcvbuf = &req->rq_private_buf;
-
- if (transport->tcp_flags & TCP_RCV_COPY_CALLDIR) {
- /*
- * Save the RPC direction in the XDR buffer
- */
- memcpy(rcvbuf->head[0].iov_base + transport->tcp_copied,
- &transport->tcp_calldir,
- sizeof(transport->tcp_calldir));
- transport->tcp_copied += sizeof(transport->tcp_calldir);
- transport->tcp_flags &= ~TCP_RCV_COPY_CALLDIR;
- }
-
- len = desc->count;
- if (len > transport->tcp_reclen - transport->tcp_offset)
- desc->count = transport->tcp_reclen - transport->tcp_offset;
- r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied,
- desc, xdr_skb_read_bits);
-
- if (desc->count) {
- /* Error when copying to the receive buffer,
- * usually because we weren't able to allocate
- * additional buffer pages. All we can do now
- * is turn off TCP_RCV_COPY_DATA, so the request
- * will not receive any additional updates,
- * and time out.
- * Any remaining data from this record will
- * be discarded.
- */
- transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
- dprintk("RPC: XID %08x truncated request\n",
- ntohl(transport->tcp_xid));
- dprintk("RPC: xprt = %p, tcp_copied = %lu, "
- "tcp_offset = %u, tcp_reclen = %u\n",
- xprt, transport->tcp_copied,
- transport->tcp_offset, transport->tcp_reclen);
- return;
- }
-
- transport->tcp_copied += r;
- transport->tcp_offset += r;
- desc->count = len - r;
-
- dprintk("RPC: XID %08x read %zd bytes\n",
- ntohl(transport->tcp_xid), r);
- dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, "
- "tcp_reclen = %u\n", xprt, transport->tcp_copied,
- transport->tcp_offset, transport->tcp_reclen);
-
- if (transport->tcp_copied == req->rq_private_buf.buflen)
- transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
- else if (transport->tcp_offset == transport->tcp_reclen) {
- if (transport->tcp_flags & TCP_RCV_LAST_FRAG)
- transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
- }
-}
-
-/*
- * Finds the request corresponding to the RPC xid and invokes the common
- * tcp read code to read the data.
- */
-static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
- struct xdr_skb_reader *desc)
-{
- struct sock_xprt *transport =
- container_of(xprt, struct sock_xprt, xprt);
- struct rpc_rqst *req;
-
- dprintk("RPC: read reply XID %08x\n", ntohl(transport->tcp_xid));
-
- /* Find and lock the request corresponding to this xid */
- spin_lock(&xprt->recv_lock);
- req = xprt_lookup_rqst(xprt, transport->tcp_xid);
- if (!req) {
- dprintk("RPC: XID %08x request not found!\n",
- ntohl(transport->tcp_xid));
- spin_unlock(&xprt->recv_lock);
- return -1;
- }
- xprt_pin_rqst(req);
- spin_unlock(&xprt->recv_lock);
-
- xs_tcp_read_common(xprt, desc, req);
-
- spin_lock(&xprt->recv_lock);
- if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
- xprt_complete_rqst(req->rq_task, transport->tcp_copied);
- xprt_unpin_rqst(req);
- spin_unlock(&xprt->recv_lock);
- return 0;
-}
-
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
-/*
- * Obtains an rpc_rqst previously allocated and invokes the common
- * tcp read code to read the data. The result is placed in the callback
- * queue.
- * If we're unable to obtain the rpc_rqst we schedule the closing of the
- * connection and return -1.
- */
-static int xs_tcp_read_callback(struct rpc_xprt *xprt,
- struct xdr_skb_reader *desc)
-{
- struct sock_xprt *transport =
- container_of(xprt, struct sock_xprt, xprt);
- struct rpc_rqst *req;
-
- /* Look up the request corresponding to the given XID */
- req = xprt_lookup_bc_request(xprt, transport->tcp_xid);
- if (req == NULL) {
- printk(KERN_WARNING "Callback slot table overflowed\n");
- xprt_force_disconnect(xprt);
- return -1;
- }
-
- dprintk("RPC: read callback XID %08x\n", ntohl(req->rq_xid));
- xs_tcp_read_common(xprt, desc, req);
-
- if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
- xprt_complete_bc_request(req, transport->tcp_copied);
-
- return 0;
-}
-
-static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
- struct xdr_skb_reader *desc)
-{
- struct sock_xprt *transport =
- container_of(xprt, struct sock_xprt, xprt);
-
- return (transport->tcp_flags & TCP_RPC_REPLY) ?
- xs_tcp_read_reply(xprt, desc) :
- xs_tcp_read_callback(xprt, desc);
-}
-
static int xs_tcp_bc_up(struct svc_serv *serv, struct net *net)
{
int ret;
{
return PAGE_SIZE;
}
-#else
-static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
- struct xdr_skb_reader *desc)
-{
- return xs_tcp_read_reply(xprt, desc);
-}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
-/*
- * Read data off the transport. This can be either an RPC_CALL or an
- * RPC_REPLY. Relay the processing to helper functions.
- */
-static void xs_tcp_read_data(struct rpc_xprt *xprt,
- struct xdr_skb_reader *desc)
-{
- struct sock_xprt *transport =
- container_of(xprt, struct sock_xprt, xprt);
-
- if (_xs_tcp_read_data(xprt, desc) == 0)
- xs_tcp_check_fraghdr(transport);
- else {
- /*
- * The transport_lock protects the request handling.
- * There's no need to hold it to update the tcp_flags.
- */
- transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
- }
-}
-
-static inline void xs_tcp_read_discard(struct sock_xprt *transport, struct xdr_skb_reader *desc)
-{
- size_t len;
-
- len = transport->tcp_reclen - transport->tcp_offset;
- if (len > desc->count)
- len = desc->count;
- desc->count -= len;
- desc->offset += len;
- transport->tcp_offset += len;
- dprintk("RPC: discarded %zu bytes\n", len);
- xs_tcp_check_fraghdr(transport);
-}
-
-static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len)
-{
- struct rpc_xprt *xprt = rd_desc->arg.data;
- struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
- struct xdr_skb_reader desc = {
- .skb = skb,
- .offset = offset,
- .count = len,
- };
- size_t ret;
-
- dprintk("RPC: xs_tcp_data_recv started\n");
- do {
- trace_xs_tcp_data_recv(transport);
- /* Read in a new fragment marker if necessary */
- /* Can we ever really expect to get completely empty fragments? */
- if (transport->tcp_flags & TCP_RCV_COPY_FRAGHDR) {
- xs_tcp_read_fraghdr(xprt, &desc);
- continue;
- }
- /* Read in the xid if necessary */
- if (transport->tcp_flags & TCP_RCV_COPY_XID) {
- xs_tcp_read_xid(transport, &desc);
- continue;
- }
- /* Read in the call/reply flag */
- if (transport->tcp_flags & TCP_RCV_READ_CALLDIR) {
- xs_tcp_read_calldir(transport, &desc);
- continue;
- }
- /* Read in the request data */
- if (transport->tcp_flags & TCP_RCV_COPY_DATA) {
- xs_tcp_read_data(xprt, &desc);
- continue;
- }
- /* Skip over any trailing bytes on short reads */
- xs_tcp_read_discard(transport, &desc);
- } while (desc.count);
- ret = len - desc.count;
- if (ret < rd_desc->count)
- rd_desc->count -= ret;
- else
- rd_desc->count = 0;
- trace_xs_tcp_data_recv(transport);
- dprintk("RPC: xs_tcp_data_recv done\n");
- return ret;
-}
-
-static void xs_tcp_data_receive(struct sock_xprt *transport)
-{
- struct rpc_xprt *xprt = &transport->xprt;
- struct sock *sk;
- read_descriptor_t rd_desc = {
- .arg.data = xprt,
- };
- unsigned long total = 0;
- int read = 0;
-
-restart:
- mutex_lock(&transport->recv_mutex);
- sk = transport->inet;
- if (sk == NULL)
- goto out;
-
- /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */
- for (;;) {
- rd_desc.count = RPC_TCP_READ_CHUNK_SZ;
- lock_sock(sk);
- read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv);
- if (rd_desc.count != 0 || read < 0) {
- clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state);
- release_sock(sk);
- break;
- }
- release_sock(sk);
- total += read;
- if (need_resched()) {
- mutex_unlock(&transport->recv_mutex);
- cond_resched();
- goto restart;
- }
- }
- if (test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state))
- queue_work(xprtiod_workqueue, &transport->recv_worker);
-out:
- mutex_unlock(&transport->recv_mutex);
- trace_xs_tcp_data_ready(xprt, read, total);
-}
-
-static void xs_tcp_data_receive_workfn(struct work_struct *work)
-{
- struct sock_xprt *transport =
- container_of(work, struct sock_xprt, recv_worker);
- xs_tcp_data_receive(transport);
-}
-
/**
* xs_tcp_state_change - callback to handle TCP socket state changes
* @sk: socket whose state has changed
case TCP_ESTABLISHED:
spin_lock(&xprt->transport_lock);
if (!xprt_test_and_set_connected(xprt)) {
-
- /* Reset TCP record info */
- transport->tcp_offset = 0;
- transport->tcp_reclen = 0;
- transport->tcp_copied = 0;
- transport->tcp_flags =
- TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID;
xprt->connect_cookie++;
clear_bit(XPRT_SOCK_CONNECTING, &transport->sock_state);
xprt_clear_connecting(xprt);
+ xprt->stat.connect_count++;
+ xprt->stat.connect_time += (long)jiffies -
+ xprt->stat.connect_start;
xprt_wake_pending_tasks(xprt, -EAGAIN);
}
spin_unlock(&xprt->transport_lock);
if (!wq || test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &wq->flags) == 0)
goto out;
- xprt_write_space(xprt);
+ if (xprt_write_space(xprt))
+ sk->sk_write_pending--;
out:
rcu_read_unlock();
}
spin_unlock_bh(&xprt->transport_lock);
}
-static unsigned short xs_get_random_port(void)
+static int xs_get_random_port(void)
{
- unsigned short range = xprt_max_resvport - xprt_min_resvport + 1;
- unsigned short rand = (unsigned short) prandom_u32() % range;
- return rand + xprt_min_resvport;
+ unsigned short min = xprt_min_resvport, max = xprt_max_resvport;
+ unsigned short range;
+ unsigned short rand;
+
+ if (max < min)
+ return -EADDRINUSE;
+ range = max - min + 1;
+ rand = (unsigned short) prandom_u32() % range;
+ return rand + min;
}
/**
transport->srcport = xs_sock_getport(sock);
}
-static unsigned short xs_get_srcport(struct sock_xprt *transport)
+static int xs_get_srcport(struct sock_xprt *transport)
{
- unsigned short port = transport->srcport;
+ int port = transport->srcport;
if (port == 0 && transport->xprt.resvport)
port = xs_get_random_port();
{
struct sockaddr_storage myaddr;
int err, nloop = 0;
- unsigned short port = xs_get_srcport(transport);
+ int port = xs_get_srcport(transport);
unsigned short last;
/*
* transport->xprt.resvport == 1) xs_get_srcport above will
* ensure that port is non-zero and we will bind as needed.
*/
- if (port == 0)
- return 0;
+ if (port <= 0)
+ return port;
memcpy(&myaddr, &transport->srcaddr, transport->xprt.addrlen);
do {
write_unlock_bh(&sk->sk_callback_lock);
}
- /* Tell the socket layer to start connecting... */
- xprt->stat.connect_count++;
- xprt->stat.connect_start = jiffies;
+ xs_stream_reset_connect(transport);
+
return kernel_connect(sock, xs_addr(xprt), xprt->addrlen, 0);
}
case 0:
dprintk("RPC: xprt %p connected to %s\n",
xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
+ xprt->stat.connect_count++;
+ xprt->stat.connect_time += (long)jiffies -
+ xprt->stat.connect_start;
xprt_set_connected(xprt);
case -ENOBUFS:
break;
xs_set_memalloc(xprt);
+ /* Reset TCP record info */
+ xs_stream_reset_connect(transport);
+
/* Tell the socket layer to start connecting... */
- xprt->stat.connect_count++;
- xprt->stat.connect_start = jiffies;
set_bit(XPRT_SOCK_CONNECTING, &transport->sock_state);
ret = kernel_connect(sock, xs_addr(xprt), xprt->addrlen, O_NONBLOCK);
switch (ret) {
"%llu %llu %lu %llu %llu\n",
xprt->stat.bind_count,
xprt->stat.connect_count,
- xprt->stat.connect_time,
+ xprt->stat.connect_time / HZ,
idle_time,
xprt->stat.sends,
xprt->stat.recvs,
transport->srcport,
xprt->stat.bind_count,
xprt->stat.connect_count,
- xprt->stat.connect_time,
+ xprt->stat.connect_time / HZ,
idle_time,
xprt->stat.sends,
xprt->stat.recvs,
/*
* The send routine. Borrows from svc_send
*/
-static int bc_send_request(struct rpc_task *task)
+static int bc_send_request(struct rpc_rqst *req)
{
- struct rpc_rqst *req = task->tk_rqstp;
struct svc_xprt *xprt;
int len;
* Grab the mutex to serialize data as the connection is shared
* with the fore channel
*/
- if (!mutex_trylock(&xprt->xpt_mutex)) {
- rpc_sleep_on(&xprt->xpt_bc_pending, task, NULL);
- if (!mutex_trylock(&xprt->xpt_mutex))
- return -EAGAIN;
- rpc_wake_up_queued_task(&xprt->xpt_bc_pending, task);
- }
+ mutex_lock(&xprt->xpt_mutex);
if (test_bit(XPT_DEAD, &xprt->xpt_flags))
len = -ENOTCONN;
else
static const struct rpc_xprt_ops xs_local_ops = {
.reserve_xprt = xprt_reserve_xprt,
- .release_xprt = xs_tcp_release_xprt,
+ .release_xprt = xprt_release_xprt,
.alloc_slot = xprt_alloc_slot,
.free_slot = xprt_free_slot,
.rpcbind = xs_local_rpcbind,
.connect = xs_local_connect,
.buf_alloc = rpc_malloc,
.buf_free = rpc_free,
+ .prepare_request = xs_stream_prepare_request,
.send_request = xs_local_send_request,
.set_retrans_timeout = xprt_set_retrans_timeout_def,
.close = xs_close,
static const struct rpc_xprt_ops xs_tcp_ops = {
.reserve_xprt = xprt_reserve_xprt,
- .release_xprt = xs_tcp_release_xprt,
- .alloc_slot = xprt_lock_and_alloc_slot,
+ .release_xprt = xprt_release_xprt,
+ .alloc_slot = xprt_alloc_slot,
.free_slot = xprt_free_slot,
.rpcbind = rpcb_getport_async,
.set_port = xs_set_port,
.connect = xs_connect,
.buf_alloc = rpc_malloc,
.buf_free = rpc_free,
+ .prepare_request = xs_stream_prepare_request,
.send_request = xs_tcp_send_request,
.set_retrans_timeout = xprt_set_retrans_timeout_def,
.close = xs_tcp_shutdown,
xprt->ops = &xs_local_ops;
xprt->timeout = &xs_local_default_timeout;
- INIT_WORK(&transport->recv_worker, xs_local_data_receive_workfn);
- INIT_DELAYED_WORK(&transport->connect_worker,
- xs_dummy_setup_socket);
+ INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn);
+ INIT_DELAYED_WORK(&transport->connect_worker, xs_dummy_setup_socket);
switch (sun->sun_family) {
case AF_LOCAL:
xprt->connect_timeout = xprt->timeout->to_initval *
(xprt->timeout->to_retries + 1);
- INIT_WORK(&transport->recv_worker, xs_tcp_data_receive_workfn);
+ INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn);
INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_setup_socket);
switch (addr->sa_family) {
static int param_set_portnr(const char *val, const struct kernel_param *kp)
{
- if (kp->arg == &xprt_min_resvport)
- return param_set_uint_minmax(val, kp,
- RPC_MIN_RESVPORT,
- xprt_max_resvport);
return param_set_uint_minmax(val, kp,
- xprt_min_resvport,
+ RPC_MIN_RESVPORT,
RPC_MAX_RESVPORT);
}
subdir-$(CONFIG_MODVERSIONS) += genksyms
subdir-y += mod
subdir-$(CONFIG_SECURITY_SELINUX) += selinux
-subdir-$(CONFIG_DTC) += dtc
subdir-$(CONFIG_GDB_SCRIPTS) += gdb
# Let clean descend into subdirs
-subdir- += basic kconfig package gcc-plugins
+subdir- += basic dtc kconfig package gcc-plugins
quiet_cmd_dtc = DTC $@
cmd_dtc = mkdir -p $(dir ${dtc-tmp}) ; \
- $(CPP) $(dtc_cpp_flags) -x assembler-with-cpp -o $(dtc-tmp) $< ; \
+ $(HOSTCC) -E $(dtc_cpp_flags) -x assembler-with-cpp -o $(dtc-tmp) $< ; \
$(DTC) -O dtb -o $@ -b 0 \
$(addprefix -i,$(dir $<) $(DTC_INCLUDE)) $(DTC_FLAGS) \
-d $(depfile).dtc.tmp $(dtc-tmp) ; \
# SPDX-License-Identifier: GPL-2.0
# scripts/dtc makefile
-hostprogs-y := dtc
+hostprogs-$(CONFIG_DTC) := dtc
always := $(hostprogs-y)
dtc-objs := dtc.o flattree.o fstree.o data.o livetree.o treesource.o \
# Source files need to get at the userspace version of libfdt_env.h to compile
HOST_EXTRACFLAGS := -I$(src)/libfdt
+ifeq ($(wildcard /usr/include/yaml.h),)
+HOST_EXTRACFLAGS += -DNO_YAML
+else
+dtc-objs += yamltree.o
+HOSTLDLIBS_dtc := -lyaml
+endif
+
# Generated files need one more search path to include headers in source tree
HOSTCFLAGS_dtc-lexer.lex.o := -I$(src)
HOSTCFLAGS_dtc-parser.tab.o := -I$(src)
treesource.c \
util.c
+ifneq ($(NO_YAML),1)
+DTC_SRCS += yamltree.c
+endif
+
DTC_GEN_SRCS = dtc-lexer.lex.c dtc-parser.tab.c
DTC_OBJS = $(DTC_SRCS:%.c=%.o) $(DTC_GEN_SRCS:%.c=%.o)
}
WARNING(simple_bus_reg, check_simple_bus_reg, NULL, ®_format, &simple_bus_bridge);
+static const struct bus_type i2c_bus = {
+ .name = "i2c-bus",
+};
+
+static void check_i2c_bus_bridge(struct check *c, struct dt_info *dti, struct node *node)
+{
+ if (strprefixeq(node->name, node->basenamelen, "i2c-bus") ||
+ strprefixeq(node->name, node->basenamelen, "i2c-arb")) {
+ node->bus = &i2c_bus;
+ } else if (strprefixeq(node->name, node->basenamelen, "i2c")) {
+ struct node *child;
+ for_each_child(node, child) {
+ if (strprefixeq(child->name, node->basenamelen, "i2c-bus"))
+ return;
+ }
+ node->bus = &i2c_bus;
+ } else
+ return;
+
+ if (!node->children)
+ return;
+
+ if (node_addr_cells(node) != 1)
+ FAIL(c, dti, node, "incorrect #address-cells for I2C bus");
+ if (node_size_cells(node) != 0)
+ FAIL(c, dti, node, "incorrect #size-cells for I2C bus");
+
+}
+WARNING(i2c_bus_bridge, check_i2c_bus_bridge, NULL, &addr_size_cells);
+
+static void check_i2c_bus_reg(struct check *c, struct dt_info *dti, struct node *node)
+{
+ struct property *prop;
+ const char *unitname = get_unitname(node);
+ char unit_addr[17];
+ uint32_t reg = 0;
+ int len;
+ cell_t *cells = NULL;
+
+ if (!node->parent || (node->parent->bus != &i2c_bus))
+ return;
+
+ prop = get_property(node, "reg");
+ if (prop)
+ cells = (cell_t *)prop->val.val;
+
+ if (!cells) {
+ FAIL(c, dti, node, "missing or empty reg property");
+ return;
+ }
+
+ reg = fdt32_to_cpu(*cells);
+ snprintf(unit_addr, sizeof(unit_addr), "%x", reg);
+ if (!streq(unitname, unit_addr))
+ FAIL(c, dti, node, "I2C bus unit address format error, expected \"%s\"",
+ unit_addr);
+
+ for (len = prop->val.len; len > 0; len -= 4) {
+ reg = fdt32_to_cpu(*(cells++));
+ if (reg > 0x3ff)
+ FAIL_PROP(c, dti, node, prop, "I2C address must be less than 10-bits, got \"0x%x\"",
+ reg);
+
+ }
+}
+WARNING(i2c_bus_reg, check_i2c_bus_reg, NULL, ®_format, &i2c_bus_bridge);
+
+static const struct bus_type spi_bus = {
+ .name = "spi-bus",
+};
+
+static void check_spi_bus_bridge(struct check *c, struct dt_info *dti, struct node *node)
+{
+
+ if (strprefixeq(node->name, node->basenamelen, "spi")) {
+ node->bus = &spi_bus;
+ } else {
+ /* Try to detect SPI buses which don't have proper node name */
+ struct node *child;
+
+ if (node_addr_cells(node) != 1 || node_size_cells(node) != 0)
+ return;
+
+ for_each_child(node, child) {
+ struct property *prop;
+ for_each_property(child, prop) {
+ if (strprefixeq(prop->name, 4, "spi-")) {
+ node->bus = &spi_bus;
+ break;
+ }
+ }
+ if (node->bus == &spi_bus)
+ break;
+ }
+
+ if (node->bus == &spi_bus && get_property(node, "reg"))
+ FAIL(c, dti, node, "node name for SPI buses should be 'spi'");
+ }
+ if (node->bus != &spi_bus || !node->children)
+ return;
+
+ if (node_addr_cells(node) != 1)
+ FAIL(c, dti, node, "incorrect #address-cells for SPI bus");
+ if (node_size_cells(node) != 0)
+ FAIL(c, dti, node, "incorrect #size-cells for SPI bus");
+
+}
+WARNING(spi_bus_bridge, check_spi_bus_bridge, NULL, &addr_size_cells);
+
+static void check_spi_bus_reg(struct check *c, struct dt_info *dti, struct node *node)
+{
+ struct property *prop;
+ const char *unitname = get_unitname(node);
+ char unit_addr[9];
+ uint32_t reg = 0;
+ cell_t *cells = NULL;
+
+ if (!node->parent || (node->parent->bus != &spi_bus))
+ return;
+
+ prop = get_property(node, "reg");
+ if (prop)
+ cells = (cell_t *)prop->val.val;
+
+ if (!cells) {
+ FAIL(c, dti, node, "missing or empty reg property");
+ return;
+ }
+
+ reg = fdt32_to_cpu(*cells);
+ snprintf(unit_addr, sizeof(unit_addr), "%x", reg);
+ if (!streq(unitname, unit_addr))
+ FAIL(c, dti, node, "SPI bus unit address format error, expected \"%s\"",
+ unit_addr);
+}
+WARNING(spi_bus_reg, check_spi_bus_reg, NULL, ®_format, &spi_bus_bridge);
+
static void check_unit_address_format(struct check *c, struct dt_info *dti,
struct node *node)
{
&simple_bus_bridge,
&simple_bus_reg,
+ &i2c_bus_bridge,
+ &i2c_bus_reg,
+
+ &spi_bus_bridge,
+ &spi_bus_reg,
+
&avoid_default_addr_size,
&avoid_unnecessary_addr_size,
&unique_unit_address,
struct data d;
char *q;
- d = data_grow_for(empty_data, len + 1);
+ d = data_add_marker(empty_data, TYPE_STRING, NULL);
+ d = data_grow_for(d, len + 1);
q = d.val;
while (i < len) {
{
struct data d = empty_data;
+ d = data_add_marker(d, TYPE_NONE, NULL);
while (!feof(f) && (d.len < maxlen)) {
size_t chunksize, ret;
}
| propdataprefix DT_REF
{
+ $1 = data_add_marker($1, TYPE_STRING, $2);
$$ = data_add_marker($1, REF_PATH, $2);
}
| propdataprefix DT_INCBIN '(' DT_STRING ',' integer_prim ',' integer_prim ')'
DT_BITS DT_LITERAL '<'
{
unsigned long long bits;
+ enum markertype type = TYPE_UINT32;
bits = $2;
- if ((bits != 8) && (bits != 16) &&
- (bits != 32) && (bits != 64)) {
+ switch (bits) {
+ case 8: type = TYPE_UINT8; break;
+ case 16: type = TYPE_UINT16; break;
+ case 32: type = TYPE_UINT32; break;
+ case 64: type = TYPE_UINT64; break;
+ default:
ERROR(&@2, "Array elements must be"
" 8, 16, 32 or 64-bits");
bits = 32;
}
- $$.data = empty_data;
+ $$.data = data_add_marker(empty_data, type, NULL);
$$.bits = bits;
}
| '<'
{
- $$.data = empty_data;
+ $$.data = data_add_marker(empty_data, TYPE_UINT32, NULL);
$$.bits = 32;
}
| arrayprefix integer_prim
bytestring:
/* empty */
{
- $$ = empty_data;
+ $$ = data_add_marker(empty_data, TYPE_UINT8, NULL);
}
| bytestring DT_BYTE
{
"\n\tOutput formats are:\n"
"\t\tdts - device tree source text\n"
"\t\tdtb - device tree blob\n"
+#ifndef NO_YAML
+ "\t\tyaml - device tree encoded as YAML\n"
+#endif
"\t\tasm - assembler source",
"\n\tBlob version to produce, defaults to "stringify(DEFAULT_FDT_VERSION)" (for dtb and asm output)",
"\n\tOutput dependency file",
return fallback;
if (!strcasecmp(s, ".dts"))
return "dts";
+ if (!strcasecmp(s, ".yaml"))
+ return "yaml";
if (!strcasecmp(s, ".dtb"))
return "dtb";
return fallback;
if (streq(outform, "dts")) {
dt_to_source(outf, dti);
+#ifndef NO_YAML
+ } else if (streq(outform, "yaml")) {
+ if (!streq(inform, "dts"))
+ die("YAML output format requires dts input format\n");
+ dt_to_yaml(outf, dti);
+#endif
} else if (streq(outform, "dtb")) {
dt_to_blob(outf, dti, outversion);
} else if (streq(outform, "asm")) {
/* Data blobs */
enum markertype {
+ TYPE_NONE,
REF_PHANDLE,
REF_PATH,
LABEL,
+ TYPE_UINT8,
+ TYPE_UINT16,
+ TYPE_UINT32,
+ TYPE_UINT64,
+ TYPE_STRING,
};
+extern const char *markername(enum markertype markertype);
struct marker {
enum markertype type;
for_each_marker(m) \
if ((m)->type == (t))
+size_t type_marker_length(struct marker *m);
+
void data_free(struct data d);
struct data data_grow_for(struct data d, int xlen);
void dt_to_source(FILE *f, struct dt_info *dti);
struct dt_info *dt_from_source(const char *f);
+/* YAML source */
+
+void dt_to_yaml(FILE *f, struct dt_info *dti);
+
/* FS trees */
struct dt_info *dt_from_fs(const char *dirname);
padlen = 0;
if (quiet < 1)
fprintf(stderr,
- "Warning: blob size %d >= minimum size %d\n",
+ "Warning: blob size %"PRIu32" >= minimum size %d\n",
fdt32_to_cpu(fdt.totalsize), minsize);
}
}
#include "libfdt_internal.h"
-int fdt_check_header(const void *fdt)
+/*
+ * Minimal sanity check for a read-only tree. fdt_ro_probe_() checks
+ * that the given buffer contains what appears to be a flattened
+ * device tree with sane information in its header.
+ */
+int fdt_ro_probe_(const void *fdt)
{
if (fdt_magic(fdt) == FDT_MAGIC) {
/* Complete tree */
return 0;
}
+static int check_off_(uint32_t hdrsize, uint32_t totalsize, uint32_t off)
+{
+ return (off >= hdrsize) && (off <= totalsize);
+}
+
+static int check_block_(uint32_t hdrsize, uint32_t totalsize,
+ uint32_t base, uint32_t size)
+{
+ if (!check_off_(hdrsize, totalsize, base))
+ return 0; /* block start out of bounds */
+ if ((base + size) < base)
+ return 0; /* overflow */
+ if (!check_off_(hdrsize, totalsize, base + size))
+ return 0; /* block end out of bounds */
+ return 1;
+}
+
+size_t fdt_header_size_(uint32_t version)
+{
+ if (version <= 1)
+ return FDT_V1_SIZE;
+ else if (version <= 2)
+ return FDT_V2_SIZE;
+ else if (version <= 3)
+ return FDT_V3_SIZE;
+ else if (version <= 16)
+ return FDT_V16_SIZE;
+ else
+ return FDT_V17_SIZE;
+}
+
+int fdt_check_header(const void *fdt)
+{
+ size_t hdrsize;
+
+ if (fdt_magic(fdt) != FDT_MAGIC)
+ return -FDT_ERR_BADMAGIC;
+ hdrsize = fdt_header_size(fdt);
+ if ((fdt_version(fdt) < FDT_FIRST_SUPPORTED_VERSION)
+ || (fdt_last_comp_version(fdt) > FDT_LAST_SUPPORTED_VERSION))
+ return -FDT_ERR_BADVERSION;
+ if (fdt_version(fdt) < fdt_last_comp_version(fdt))
+ return -FDT_ERR_BADVERSION;
+
+ if ((fdt_totalsize(fdt) < hdrsize)
+ || (fdt_totalsize(fdt) > INT_MAX))
+ return -FDT_ERR_TRUNCATED;
+
+ /* Bounds check memrsv block */
+ if (!check_off_(hdrsize, fdt_totalsize(fdt), fdt_off_mem_rsvmap(fdt)))
+ return -FDT_ERR_TRUNCATED;
+
+ /* Bounds check structure block */
+ if (fdt_version(fdt) < 17) {
+ if (!check_off_(hdrsize, fdt_totalsize(fdt),
+ fdt_off_dt_struct(fdt)))
+ return -FDT_ERR_TRUNCATED;
+ } else {
+ if (!check_block_(hdrsize, fdt_totalsize(fdt),
+ fdt_off_dt_struct(fdt),
+ fdt_size_dt_struct(fdt)))
+ return -FDT_ERR_TRUNCATED;
+ }
+
+ /* Bounds check strings block */
+ if (!check_block_(hdrsize, fdt_totalsize(fdt),
+ fdt_off_dt_strings(fdt), fdt_size_dt_strings(fdt)))
+ return -FDT_ERR_TRUNCATED;
+
+ return 0;
+}
+
const void *fdt_offset_ptr(const void *fdt, int offset, unsigned int len)
{
unsigned absoffset = offset + fdt_off_dt_struct(fdt);
int fdt_move(const void *fdt, void *buf, int bufsize)
{
- FDT_CHECK_HEADER(fdt);
+ FDT_RO_PROBE(fdt);
if (fdt_totalsize(fdt) > bufsize)
return -FDT_ERR_NOSPACE;
/*
* libfdt - Flat Device Tree manipulation
* Copyright (C) 2014 David Gibson <david@gibson.dropbear.id.au>
+ * Copyright (C) 2018 embedded brains GmbH
*
* libfdt is dual licensed: you can use it either under the terms of
* the GPL, or the BSD license, at your option.
#include "libfdt_internal.h"
-int fdt_address_cells(const void *fdt, int nodeoffset)
+static int fdt_cells(const void *fdt, int nodeoffset, const char *name)
{
- const fdt32_t *ac;
+ const fdt32_t *c;
int val;
int len;
- ac = fdt_getprop(fdt, nodeoffset, "#address-cells", &len);
- if (!ac)
+ c = fdt_getprop(fdt, nodeoffset, name, &len);
+ if (!c)
return 2;
- if (len != sizeof(*ac))
+ if (len != sizeof(*c))
return -FDT_ERR_BADNCELLS;
- val = fdt32_to_cpu(*ac);
+ val = fdt32_to_cpu(*c);
if ((val <= 0) || (val > FDT_MAX_NCELLS))
return -FDT_ERR_BADNCELLS;
return val;
}
-int fdt_size_cells(const void *fdt, int nodeoffset)
+int fdt_address_cells(const void *fdt, int nodeoffset)
{
- const fdt32_t *sc;
- int val;
- int len;
-
- sc = fdt_getprop(fdt, nodeoffset, "#size-cells", &len);
- if (!sc)
- return 2;
-
- if (len != sizeof(*sc))
- return -FDT_ERR_BADNCELLS;
-
- val = fdt32_to_cpu(*sc);
- if ((val < 0) || (val > FDT_MAX_NCELLS))
- return -FDT_ERR_BADNCELLS;
+ return fdt_cells(fdt, nodeoffset, "#address-cells");
+}
- return val;
+int fdt_size_cells(const void *fdt, int nodeoffset)
+{
+ return fdt_cells(fdt, nodeoffset, "#size-cells");
}
int len = 0, namelen;
const char *name;
- FDT_CHECK_HEADER(fdt);
+ FDT_RO_PROBE(fdt);
for (;;) {
name = fdt_get_name(fdt, nodeoffset, &namelen);
uint32_t delta = fdt_get_max_phandle(fdt);
int ret;
- FDT_CHECK_HEADER(fdt);
- FDT_CHECK_HEADER(fdto);
+ FDT_RO_PROBE(fdt);
+ FDT_RO_PROBE(fdto);
ret = overlay_adjust_local_phandles(fdto, delta);
if (ret)
return 0;
}
+const char *fdt_get_string(const void *fdt, int stroffset, int *lenp)
+{
+ uint32_t absoffset = stroffset + fdt_off_dt_strings(fdt);
+ size_t len;
+ int err;
+ const char *s, *n;
+
+ err = fdt_ro_probe_(fdt);
+ if (err != 0)
+ goto fail;
+
+ err = -FDT_ERR_BADOFFSET;
+ if (absoffset >= fdt_totalsize(fdt))
+ goto fail;
+ len = fdt_totalsize(fdt) - absoffset;
+
+ if (fdt_magic(fdt) == FDT_MAGIC) {
+ if (stroffset < 0)
+ goto fail;
+ if (fdt_version(fdt) >= 17) {
+ if (stroffset >= fdt_size_dt_strings(fdt))
+ goto fail;
+ if ((fdt_size_dt_strings(fdt) - stroffset) < len)
+ len = fdt_size_dt_strings(fdt) - stroffset;
+ }
+ } else if (fdt_magic(fdt) == FDT_SW_MAGIC) {
+ if ((stroffset >= 0)
+ || (stroffset < -fdt_size_dt_strings(fdt)))
+ goto fail;
+ if ((-stroffset) < len)
+ len = -stroffset;
+ } else {
+ err = -FDT_ERR_INTERNAL;
+ goto fail;
+ }
+
+ s = (const char *)fdt + absoffset;
+ n = memchr(s, '\0', len);
+ if (!n) {
+ /* missing terminating NULL */
+ err = -FDT_ERR_TRUNCATED;
+ goto fail;
+ }
+
+ if (lenp)
+ *lenp = n - s;
+ return s;
+
+fail:
+ if (lenp)
+ *lenp = err;
+ return NULL;
+}
+
const char *fdt_string(const void *fdt, int stroffset)
{
- return (const char *)fdt + fdt_off_dt_strings(fdt) + stroffset;
+ return fdt_get_string(fdt, stroffset, NULL);
}
static int fdt_string_eq_(const void *fdt, int stroffset,
const char *s, int len)
{
- const char *p = fdt_string(fdt, stroffset);
+ int slen;
+ const char *p = fdt_get_string(fdt, stroffset, &slen);
- return (strlen(p) == len) && (memcmp(p, s, len) == 0);
+ return p && (slen == len) && (memcmp(p, s, len) == 0);
}
uint32_t fdt_get_max_phandle(const void *fdt)
return 0;
}
+static const struct fdt_reserve_entry *fdt_mem_rsv(const void *fdt, int n)
+{
+ int offset = n * sizeof(struct fdt_reserve_entry);
+ int absoffset = fdt_off_mem_rsvmap(fdt) + offset;
+
+ if (absoffset < fdt_off_mem_rsvmap(fdt))
+ return NULL;
+ if (absoffset > fdt_totalsize(fdt) - sizeof(struct fdt_reserve_entry))
+ return NULL;
+ return fdt_mem_rsv_(fdt, n);
+}
+
int fdt_get_mem_rsv(const void *fdt, int n, uint64_t *address, uint64_t *size)
{
- FDT_CHECK_HEADER(fdt);
- *address = fdt64_to_cpu(fdt_mem_rsv_(fdt, n)->address);
- *size = fdt64_to_cpu(fdt_mem_rsv_(fdt, n)->size);
+ const struct fdt_reserve_entry *re;
+
+ FDT_RO_PROBE(fdt);
+ re = fdt_mem_rsv(fdt, n);
+ if (!re)
+ return -FDT_ERR_BADOFFSET;
+
+ *address = fdt64_ld(&re->address);
+ *size = fdt64_ld(&re->size);
return 0;
}
int fdt_num_mem_rsv(const void *fdt)
{
- int i = 0;
+ int i;
+ const struct fdt_reserve_entry *re;
- while (fdt64_to_cpu(fdt_mem_rsv_(fdt, i)->size) != 0)
- i++;
- return i;
+ for (i = 0; (re = fdt_mem_rsv(fdt, i)) != NULL; i++) {
+ if (fdt64_ld(&re->size) == 0)
+ return i;
+ }
+ return -FDT_ERR_TRUNCATED;
}
static int nextprop_(const void *fdt, int offset)
{
int depth;
- FDT_CHECK_HEADER(fdt);
+ FDT_RO_PROBE(fdt);
for (depth = 0;
(offset >= 0) && (depth >= 0);
const char *p = path;
int offset = 0;
- FDT_CHECK_HEADER(fdt);
+ FDT_RO_PROBE(fdt);
/* see if we have an alias */
if (*path != '/') {
const char *nameptr;
int err;
- if (((err = fdt_check_header(fdt)) != 0)
+ if (((err = fdt_ro_probe_(fdt)) != 0)
|| ((err = fdt_check_node_offset_(fdt, nodeoffset)) < 0))
goto fail;
prop = fdt_offset_ptr_(fdt, offset);
if (lenp)
- *lenp = fdt32_to_cpu(prop->len);
+ *lenp = fdt32_ld(&prop->len);
return prop;
}
offset = -FDT_ERR_INTERNAL;
break;
}
- if (fdt_string_eq_(fdt, fdt32_to_cpu(prop->nameoff),
+ if (fdt_string_eq_(fdt, fdt32_ld(&prop->nameoff),
name, namelen)) {
if (poffset)
*poffset = offset;
/* Handle realignment */
if (fdt_version(fdt) < 0x10 && (poffset + sizeof(*prop)) % 8 &&
- fdt32_to_cpu(prop->len) >= 8)
+ fdt32_ld(&prop->len) >= 8)
return prop->data + 4;
return prop->data;
}
prop = fdt_get_property_by_offset_(fdt, offset, lenp);
if (!prop)
return NULL;
- if (namep)
- *namep = fdt_string(fdt, fdt32_to_cpu(prop->nameoff));
+ if (namep) {
+ const char *name;
+ int namelen;
+ name = fdt_get_string(fdt, fdt32_ld(&prop->nameoff),
+ &namelen);
+ if (!name) {
+ if (lenp)
+ *lenp = namelen;
+ return NULL;
+ }
+ *namep = name;
+ }
/* Handle realignment */
if (fdt_version(fdt) < 0x10 && (offset + sizeof(*prop)) % 8 &&
- fdt32_to_cpu(prop->len) >= 8)
+ fdt32_ld(&prop->len) >= 8)
return prop->data + 4;
return prop->data;
}
return 0;
}
- return fdt32_to_cpu(*php);
+ return fdt32_ld(php);
}
const char *fdt_get_alias_namelen(const void *fdt,
int offset, depth, namelen;
const char *name;
- FDT_CHECK_HEADER(fdt);
+ FDT_RO_PROBE(fdt);
if (buflen < 2)
return -FDT_ERR_NOSPACE;
int offset, depth;
int supernodeoffset = -FDT_ERR_INTERNAL;
- FDT_CHECK_HEADER(fdt);
+ FDT_RO_PROBE(fdt);
if (supernodedepth < 0)
return -FDT_ERR_NOTFOUND;
const void *val;
int len;
- FDT_CHECK_HEADER(fdt);
+ FDT_RO_PROBE(fdt);
/* FIXME: The algorithm here is pretty horrible: we scan each
* property of a node in fdt_getprop(), then if that didn't
if ((phandle == 0) || (phandle == -1))
return -FDT_ERR_BADPHANDLE;
- FDT_CHECK_HEADER(fdt);
+ FDT_RO_PROBE(fdt);
/* FIXME: The algorithm here is pretty horrible: we
* potentially scan each property of a node in
{
int offset, err;
- FDT_CHECK_HEADER(fdt);
+ FDT_RO_PROBE(fdt);
/* FIXME: The algorithm here is pretty horrible: we scan each
* property of a node in fdt_node_check_compatible(), then if
return offset; /* error from fdt_next_node() */
}
+
+int fdt_check_full(const void *fdt, size_t bufsize)
+{
+ int err;
+ int num_memrsv;
+ int offset, nextoffset = 0;
+ uint32_t tag;
+ unsigned depth = 0;
+ const void *prop;
+ const char *propname;
+
+ if (bufsize < FDT_V1_SIZE)
+ return -FDT_ERR_TRUNCATED;
+ err = fdt_check_header(fdt);
+ if (err != 0)
+ return err;
+ if (bufsize < fdt_totalsize(fdt))
+ return -FDT_ERR_TRUNCATED;
+
+ num_memrsv = fdt_num_mem_rsv(fdt);
+ if (num_memrsv < 0)
+ return num_memrsv;
+
+ while (1) {
+ offset = nextoffset;
+ tag = fdt_next_tag(fdt, offset, &nextoffset);
+
+ if (nextoffset < 0)
+ return nextoffset;
+
+ switch (tag) {
+ case FDT_NOP:
+ break;
+
+ case FDT_END:
+ if (depth != 0)
+ return -FDT_ERR_BADSTRUCTURE;
+ return 0;
+
+ case FDT_BEGIN_NODE:
+ depth++;
+ if (depth > INT_MAX)
+ return -FDT_ERR_BADSTRUCTURE;
+ break;
+
+ case FDT_END_NODE:
+ if (depth == 0)
+ return -FDT_ERR_BADSTRUCTURE;
+ depth--;
+ break;
+
+ case FDT_PROP:
+ prop = fdt_getprop_by_offset(fdt, offset, &propname,
+ &err);
+ if (!prop)
+ return err;
+ break;
+
+ default:
+ return -FDT_ERR_INTERNAL;
+ }
+ }
+}
(fdt_off_dt_strings(fdt) + fdt_size_dt_strings(fdt)));
}
-static int fdt_rw_check_header_(void *fdt)
+static int fdt_rw_probe_(void *fdt)
{
- FDT_CHECK_HEADER(fdt);
+ FDT_RO_PROBE(fdt);
if (fdt_version(fdt) < 17)
return -FDT_ERR_BADVERSION;
return 0;
}
-#define FDT_RW_CHECK_HEADER(fdt) \
+#define FDT_RW_PROBE(fdt) \
{ \
int err_; \
- if ((err_ = fdt_rw_check_header_(fdt)) != 0) \
+ if ((err_ = fdt_rw_probe_(fdt)) != 0) \
return err_; \
}
struct fdt_reserve_entry *re;
int err;
- FDT_RW_CHECK_HEADER(fdt);
+ FDT_RW_PROBE(fdt);
re = fdt_mem_rsv_w_(fdt, fdt_num_mem_rsv(fdt));
err = fdt_splice_mem_rsv_(fdt, re, 0, 1);
{
struct fdt_reserve_entry *re = fdt_mem_rsv_w_(fdt, n);
- FDT_RW_CHECK_HEADER(fdt);
+ FDT_RW_PROBE(fdt);
if (n >= fdt_num_mem_rsv(fdt))
return -FDT_ERR_NOTFOUND;
int oldlen, newlen;
int err;
- FDT_RW_CHECK_HEADER(fdt);
+ FDT_RW_PROBE(fdt);
namep = (char *)(uintptr_t)fdt_get_name(fdt, nodeoffset, &oldlen);
if (!namep)
struct fdt_property *prop;
int err;
- FDT_RW_CHECK_HEADER(fdt);
+ FDT_RW_PROBE(fdt);
err = fdt_resize_property_(fdt, nodeoffset, name, len, &prop);
if (err == -FDT_ERR_NOTFOUND)
struct fdt_property *prop;
int err, oldlen, newlen;
- FDT_RW_CHECK_HEADER(fdt);
+ FDT_RW_PROBE(fdt);
prop = fdt_get_property_w(fdt, nodeoffset, name, &oldlen);
if (prop) {
struct fdt_property *prop;
int len, proplen;
- FDT_RW_CHECK_HEADER(fdt);
+ FDT_RW_PROBE(fdt);
prop = fdt_get_property_w(fdt, nodeoffset, name, &len);
if (!prop)
uint32_t tag;
fdt32_t *endtag;
- FDT_RW_CHECK_HEADER(fdt);
+ FDT_RW_PROBE(fdt);
offset = fdt_subnode_offset_namelen(fdt, parentoffset, name, namelen);
if (offset >= 0)
{
int endoffset;
- FDT_RW_CHECK_HEADER(fdt);
+ FDT_RW_PROBE(fdt);
endoffset = fdt_node_end_offset_(fdt, nodeoffset);
if (endoffset < 0)
const char *fdtend = fdtstart + fdt_totalsize(fdt);
char *tmp;
- FDT_CHECK_HEADER(fdt);
+ FDT_RO_PROBE(fdt);
mem_rsv_size = (fdt_num_mem_rsv(fdt)+1)
* sizeof(struct fdt_reserve_entry);
{
int mem_rsv_size;
- FDT_RW_CHECK_HEADER(fdt);
+ FDT_RW_PROBE(fdt);
mem_rsv_size = (fdt_num_mem_rsv(fdt)+1)
* sizeof(struct fdt_reserve_entry);
#include "libfdt_internal.h"
-static int fdt_sw_check_header_(void *fdt)
+static int fdt_sw_probe_(void *fdt)
{
- if (fdt_magic(fdt) != FDT_SW_MAGIC)
+ if (fdt_magic(fdt) == FDT_MAGIC)
+ return -FDT_ERR_BADSTATE;
+ else if (fdt_magic(fdt) != FDT_SW_MAGIC)
return -FDT_ERR_BADMAGIC;
- /* FIXME: should check more details about the header state */
return 0;
}
-#define FDT_SW_CHECK_HEADER(fdt) \
+#define FDT_SW_PROBE(fdt) \
+ { \
+ int err; \
+ if ((err = fdt_sw_probe_(fdt)) != 0) \
+ return err; \
+ }
+
+/* 'memrsv' state: Initial state after fdt_create()
+ *
+ * Allowed functions:
+ * fdt_add_reservmap_entry()
+ * fdt_finish_reservemap() [moves to 'struct' state]
+ */
+static int fdt_sw_probe_memrsv_(void *fdt)
+{
+ int err = fdt_sw_probe_(fdt);
+ if (err)
+ return err;
+
+ if (fdt_off_dt_strings(fdt) != 0)
+ return -FDT_ERR_BADSTATE;
+ return 0;
+}
+
+#define FDT_SW_PROBE_MEMRSV(fdt) \
+ { \
+ int err; \
+ if ((err = fdt_sw_probe_memrsv_(fdt)) != 0) \
+ return err; \
+ }
+
+/* 'struct' state: Enter this state after fdt_finish_reservemap()
+ *
+ * Allowed functions:
+ * fdt_begin_node()
+ * fdt_end_node()
+ * fdt_property*()
+ * fdt_finish() [moves to 'complete' state]
+ */
+static int fdt_sw_probe_struct_(void *fdt)
+{
+ int err = fdt_sw_probe_(fdt);
+ if (err)
+ return err;
+
+ if (fdt_off_dt_strings(fdt) != fdt_totalsize(fdt))
+ return -FDT_ERR_BADSTATE;
+ return 0;
+}
+
+#define FDT_SW_PROBE_STRUCT(fdt) \
{ \
int err; \
- if ((err = fdt_sw_check_header_(fdt)) != 0) \
+ if ((err = fdt_sw_probe_struct_(fdt)) != 0) \
return err; \
}
+/* 'complete' state: Enter this state after fdt_finish()
+ *
+ * Allowed functions: none
+ */
+
static void *fdt_grab_space_(void *fdt, size_t len)
{
int offset = fdt_size_dt_struct(fdt);
int fdt_create(void *buf, int bufsize)
{
+ const size_t hdrsize = FDT_ALIGN(sizeof(struct fdt_header),
+ sizeof(struct fdt_reserve_entry));
void *fdt = buf;
- if (bufsize < sizeof(struct fdt_header))
+ if (bufsize < hdrsize)
return -FDT_ERR_NOSPACE;
memset(buf, 0, bufsize);
fdt_set_last_comp_version(fdt, FDT_FIRST_SUPPORTED_VERSION);
fdt_set_totalsize(fdt, bufsize);
- fdt_set_off_mem_rsvmap(fdt, FDT_ALIGN(sizeof(struct fdt_header),
- sizeof(struct fdt_reserve_entry)));
+ fdt_set_off_mem_rsvmap(fdt, hdrsize);
fdt_set_off_dt_struct(fdt, fdt_off_mem_rsvmap(fdt));
- fdt_set_off_dt_strings(fdt, bufsize);
+ fdt_set_off_dt_strings(fdt, 0);
return 0;
}
size_t headsize, tailsize;
char *oldtail, *newtail;
- FDT_SW_CHECK_HEADER(fdt);
+ FDT_SW_PROBE(fdt);
- headsize = fdt_off_dt_struct(fdt);
+ headsize = fdt_off_dt_struct(fdt) + fdt_size_dt_struct(fdt);
tailsize = fdt_size_dt_strings(fdt);
+ if ((headsize + tailsize) > fdt_totalsize(fdt))
+ return -FDT_ERR_INTERNAL;
+
if ((headsize + tailsize) > bufsize)
return -FDT_ERR_NOSPACE;
memmove(buf, fdt, headsize);
}
- fdt_set_off_dt_strings(buf, bufsize);
fdt_set_totalsize(buf, bufsize);
+ if (fdt_off_dt_strings(buf))
+ fdt_set_off_dt_strings(buf, bufsize);
return 0;
}
struct fdt_reserve_entry *re;
int offset;
- FDT_SW_CHECK_HEADER(fdt);
-
- if (fdt_size_dt_struct(fdt))
- return -FDT_ERR_BADSTATE;
+ FDT_SW_PROBE_MEMRSV(fdt);
offset = fdt_off_dt_struct(fdt);
if ((offset + sizeof(*re)) > fdt_totalsize(fdt))
int fdt_finish_reservemap(void *fdt)
{
- return fdt_add_reservemap_entry(fdt, 0, 0);
+ int err = fdt_add_reservemap_entry(fdt, 0, 0);
+
+ if (err)
+ return err;
+
+ fdt_set_off_dt_strings(fdt, fdt_totalsize(fdt));
+ return 0;
}
int fdt_begin_node(void *fdt, const char *name)
{
struct fdt_node_header *nh;
- int namelen = strlen(name) + 1;
+ int namelen;
- FDT_SW_CHECK_HEADER(fdt);
+ FDT_SW_PROBE_STRUCT(fdt);
+ namelen = strlen(name) + 1;
nh = fdt_grab_space_(fdt, sizeof(*nh) + FDT_TAGALIGN(namelen));
if (! nh)
return -FDT_ERR_NOSPACE;
{
fdt32_t *en;
- FDT_SW_CHECK_HEADER(fdt);
+ FDT_SW_PROBE_STRUCT(fdt);
en = fdt_grab_space_(fdt, FDT_TAGSIZE);
if (! en)
struct fdt_property *prop;
int nameoff;
- FDT_SW_CHECK_HEADER(fdt);
+ FDT_SW_PROBE_STRUCT(fdt);
nameoff = fdt_find_add_string_(fdt, name);
if (nameoff == 0)
uint32_t tag;
int offset, nextoffset;
- FDT_SW_CHECK_HEADER(fdt);
+ FDT_SW_PROBE_STRUCT(fdt);
/* Add terminator */
end = fdt_grab_space_(fdt, sizeof(*end));
/* Error codes: codes for bad device tree blobs */
#define FDT_ERR_TRUNCATED 8
- /* FDT_ERR_TRUNCATED: Structure block of the given device tree
- * ends without an FDT_END tag. */
+ /* FDT_ERR_TRUNCATED: FDT or a sub-block is improperly
+ * terminated (overflows, goes outside allowed bounds, or
+ * isn't properly terminated). */
#define FDT_ERR_BADMAGIC 9
/* FDT_ERR_BADMAGIC: Given "device tree" appears not to be a
* device tree at all - it is missing the flattened device
uint32_t fdt_next_tag(const void *fdt, int offset, int *nextoffset);
+/*
+ * Alignment helpers:
+ * These helpers access words from a device tree blob. They're
+ * built to work even with unaligned pointers on platforms (ike
+ * ARM) that don't like unaligned loads and stores
+ */
+
+static inline uint32_t fdt32_ld(const fdt32_t *p)
+{
+ fdt32_t v;
+
+ memcpy(&v, p, sizeof(v));
+ return fdt32_to_cpu(v);
+}
+
+static inline uint64_t fdt64_ld(const fdt64_t *p)
+{
+ fdt64_t v;
+
+ memcpy(&v, p, sizeof(v));
+ return fdt64_to_cpu(v);
+}
+
/**********************************************************************/
/* Traversal functions */
/**********************************************************************/
/* General functions */
/**********************************************************************/
#define fdt_get_header(fdt, field) \
- (fdt32_to_cpu(((const struct fdt_header *)(fdt))->field))
+ (fdt32_ld(&((const struct fdt_header *)(fdt))->field))
#define fdt_magic(fdt) (fdt_get_header(fdt, magic))
#define fdt_totalsize(fdt) (fdt_get_header(fdt, totalsize))
#define fdt_off_dt_struct(fdt) (fdt_get_header(fdt, off_dt_struct))
#undef fdt_set_hdr_
/**
- * fdt_check_header - sanity check a device tree or possible device tree
+ * fdt_header_size - return the size of the tree's header
+ * @fdt: pointer to a flattened device tree
+ */
+size_t fdt_header_size_(uint32_t version);
+static inline size_t fdt_header_size(const void *fdt)
+{
+ return fdt_header_size_(fdt_version(fdt));
+}
+
+/**
+ * fdt_check_header - sanity check a device tree header
+
* @fdt: pointer to data which might be a flattened device tree
*
* fdt_check_header() checks that the given buffer contains what
- * appears to be a flattened device tree with sane information in its
- * header.
+ * appears to be a flattened device tree, and that the header contains
+ * valid information (to the extent that can be determined from the
+ * header alone).
*
* returns:
* 0, if the buffer appears to contain a valid device tree
* -FDT_ERR_BADMAGIC,
* -FDT_ERR_BADVERSION,
- * -FDT_ERR_BADSTATE, standard meanings, as above
+ * -FDT_ERR_BADSTATE,
+ * -FDT_ERR_TRUNCATED, standard meanings, as above
*/
int fdt_check_header(const void *fdt);
/* Read-only functions */
/**********************************************************************/
+int fdt_check_full(const void *fdt, size_t bufsize);
+
+/**
+ * fdt_get_string - retrieve a string from the strings block of a device tree
+ * @fdt: pointer to the device tree blob
+ * @stroffset: offset of the string within the strings block (native endian)
+ * @lenp: optional pointer to return the string's length
+ *
+ * fdt_get_string() retrieves a pointer to a single string from the
+ * strings block of the device tree blob at fdt, and optionally also
+ * returns the string's length in *lenp.
+ *
+ * returns:
+ * a pointer to the string, on success
+ * NULL, if stroffset is out of bounds, or doesn't point to a valid string
+ */
+const char *fdt_get_string(const void *fdt, int stroffset, int *lenp);
+
/**
* fdt_string - retrieve a string from the strings block of a device tree
* @fdt: pointer to the device tree blob
*
* returns:
* a pointer to the string, on success
- * NULL, if stroffset is out of bounds
+ * NULL, if stroffset is out of bounds, or doesn't point to a valid string
*/
const char *fdt_string(const void *fdt, int stroffset);
*
* returns:
* 0 <= n < FDT_MAX_NCELLS, on success
- * 2, if the node has no #address-cells property
+ * 2, if the node has no #size-cells property
* -FDT_ERR_BADNCELLS, if the node has a badly formatted or invalid
* #size-cells property
* -FDT_ERR_BADMAGIC,
fdt64_t tmp = cpu_to_fdt64(val);
return fdt_property(fdt, name, &tmp, sizeof(tmp));
}
+
+#ifndef SWIG /* Not available in Python */
static inline int fdt_property_cell(void *fdt, const char *name, uint32_t val)
{
return fdt_property_u32(fdt, name, val);
}
+#endif
/**
* fdt_property_placeholder - add a new property and return a ptr to its value
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
+#include <limits.h>
#ifdef __CHECKER__
#define FDT_FORCE __attribute__((force))
#define FDT_ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1))
#define FDT_TAGALIGN(x) (FDT_ALIGN((x), FDT_TAGSIZE))
-#define FDT_CHECK_HEADER(fdt) \
+int fdt_ro_probe_(const void *fdt);
+#define FDT_RO_PROBE(fdt) \
{ \
int err_; \
- if ((err_ = fdt_check_header(fdt)) != 0) \
+ if ((err_ = fdt_ro_probe_(fdt)) != 0) \
return err_; \
}
cell_t get_node_phandle(struct node *root, struct node *node)
{
static cell_t phandle = 1; /* FIXME: ick, static local */
+ struct data d = empty_data;
if ((node->phandle != 0) && (node->phandle != -1))
return node->phandle;
node->phandle = phandle;
+ d = data_add_marker(d, TYPE_UINT32, NULL);
+ d = data_append_cell(d, phandle);
+
if (!get_property(node, "linux,phandle")
&& (phandle_format & PHANDLE_LEGACY))
- add_property(node,
- build_property("linux,phandle",
- data_append_cell(empty_data, phandle)));
+ add_property(node, build_property("linux,phandle", d));
if (!get_property(node, "phandle")
&& (phandle_format & PHANDLE_EPAPR))
- add_property(node,
- build_property("phandle",
- data_append_cell(empty_data, phandle)));
+ add_property(node, build_property("phandle", d));
/* If the node *does* have a phandle property, we must
* be dealing with a self-referencing phandle, which will be
|| strchr("\a\b\t\n\v\f\r", c));
}
-static void write_propval_string(FILE *f, struct data val)
+static void write_propval_string(FILE *f, const char *s, size_t len)
{
- const char *str = val.val;
- int i;
- struct marker *m = val.markers;
-
- assert(str[val.len-1] == '\0');
+ const char *end = s + len - 1;
+ assert(*end == '\0');
- while (m && (m->offset == 0)) {
- if (m->type == LABEL)
- fprintf(f, "%s: ", m->ref);
- m = m->next;
- }
fprintf(f, "\"");
-
- for (i = 0; i < (val.len-1); i++) {
- char c = str[i];
-
+ while (s < end) {
+ char c = *s++;
switch (c) {
case '\a':
fprintf(f, "\\a");
fprintf(f, "\\\"");
break;
case '\0':
- fprintf(f, "\", ");
- while (m && (m->offset <= (i + 1))) {
- if (m->type == LABEL) {
- assert(m->offset == (i+1));
- fprintf(f, "%s: ", m->ref);
- }
- m = m->next;
- }
- fprintf(f, "\"");
+ fprintf(f, "\\0");
break;
default:
if (isprint((unsigned char)c))
fprintf(f, "%c", c);
else
- fprintf(f, "\\x%02hhx", c);
+ fprintf(f, "\\x%02"PRIx8, c);
}
}
fprintf(f, "\"");
-
- /* Wrap up any labels at the end of the value */
- for_each_marker_of_type(m, LABEL) {
- assert (m->offset == val.len);
- fprintf(f, " %s:", m->ref);
- }
}
-static void write_propval_cells(FILE *f, struct data val)
+static void write_propval_int(FILE *f, const char *p, size_t len, size_t width)
{
- void *propend = val.val + val.len;
- fdt32_t *cp = (fdt32_t *)val.val;
- struct marker *m = val.markers;
-
- fprintf(f, "<");
- for (;;) {
- while (m && (m->offset <= ((char *)cp - val.val))) {
- if (m->type == LABEL) {
- assert(m->offset == ((char *)cp - val.val));
- fprintf(f, "%s: ", m->ref);
- }
- m = m->next;
- }
+ const char *end = p + len;
+ assert(len % width == 0);
- fprintf(f, "0x%x", fdt32_to_cpu(*cp++));
- if ((void *)cp >= propend)
+ for (; p < end; p += width) {
+ switch (width) {
+ case 1:
+ fprintf(f, " %02"PRIx8, *(const uint8_t*)p);
+ break;
+ case 2:
+ fprintf(f, " 0x%02"PRIx16, fdt16_to_cpu(*(const fdt16_t*)p));
+ break;
+ case 4:
+ fprintf(f, " 0x%02"PRIx32, fdt32_to_cpu(*(const fdt32_t*)p));
+ break;
+ case 8:
+ fprintf(f, " 0x%02"PRIx64, fdt64_to_cpu(*(const fdt64_t*)p));
break;
- fprintf(f, " ");
+ }
}
+}
- /* Wrap up any labels at the end of the value */
- for_each_marker_of_type(m, LABEL) {
- assert (m->offset == val.len);
- fprintf(f, " %s:", m->ref);
- }
- fprintf(f, ">");
+static bool has_data_type_information(struct marker *m)
+{
+ return m->type >= TYPE_UINT8;
}
-static void write_propval_bytes(FILE *f, struct data val)
+static struct marker *next_type_marker(struct marker *m)
{
- void *propend = val.val + val.len;
- const char *bp = val.val;
- struct marker *m = val.markers;
-
- fprintf(f, "[");
- for (;;) {
- while (m && (m->offset == (bp-val.val))) {
- if (m->type == LABEL)
- fprintf(f, "%s: ", m->ref);
- m = m->next;
- }
+ while (m && !has_data_type_information(m))
+ m = m->next;
+ return m;
+}
- fprintf(f, "%02hhx", (unsigned char)(*bp++));
- if ((const void *)bp >= propend)
- break;
- fprintf(f, " ");
- }
+size_t type_marker_length(struct marker *m)
+{
+ struct marker *next = next_type_marker(m->next);
- /* Wrap up any labels at the end of the value */
- for_each_marker_of_type(m, LABEL) {
- assert (m->offset == val.len);
- fprintf(f, " %s:", m->ref);
- }
- fprintf(f, "]");
+ if (next)
+ return next->offset - m->offset;
+ return 0;
}
-static void write_propval(FILE *f, struct property *prop)
+static const char *delim_start[] = {
+ [TYPE_UINT8] = "[",
+ [TYPE_UINT16] = "/bits/ 16 <",
+ [TYPE_UINT32] = "<",
+ [TYPE_UINT64] = "/bits/ 64 <",
+ [TYPE_STRING] = "",
+};
+static const char *delim_end[] = {
+ [TYPE_UINT8] = " ]",
+ [TYPE_UINT16] = " >",
+ [TYPE_UINT32] = " >",
+ [TYPE_UINT64] = " >",
+ [TYPE_STRING] = "",
+};
+
+static enum markertype guess_value_type(struct property *prop)
{
int len = prop->val.len;
const char *p = prop->val.val;
int nnotstringlbl = 0, nnotcelllbl = 0;
int i;
- if (len == 0) {
- fprintf(f, ";\n");
- return;
- }
-
for (i = 0; i < len; i++) {
if (! isstring(p[i]))
nnotstring++;
nnotcelllbl++;
}
- fprintf(f, " = ");
if ((p[len-1] == '\0') && (nnotstring == 0) && (nnul < (len-nnul))
&& (nnotstringlbl == 0)) {
- write_propval_string(f, prop->val);
+ return TYPE_STRING;
} else if (((len % sizeof(cell_t)) == 0) && (nnotcelllbl == 0)) {
- write_propval_cells(f, prop->val);
- } else {
- write_propval_bytes(f, prop->val);
+ return TYPE_UINT32;
}
- fprintf(f, ";\n");
+ return TYPE_UINT8;
+}
+
+static void write_propval(FILE *f, struct property *prop)
+{
+ size_t len = prop->val.len;
+ struct marker *m = prop->val.markers;
+ struct marker dummy_marker;
+ enum markertype emit_type = TYPE_NONE;
+
+ if (len == 0) {
+ fprintf(f, ";\n");
+ return;
+ }
+
+ fprintf(f, " = ");
+
+ if (!next_type_marker(m)) {
+ /* data type information missing, need to guess */
+ dummy_marker.type = guess_value_type(prop);
+ dummy_marker.next = prop->val.markers;
+ dummy_marker.offset = 0;
+ dummy_marker.ref = NULL;
+ m = &dummy_marker;
+ }
+
+ struct marker *m_label = prop->val.markers;
+ for_each_marker(m) {
+ size_t chunk_len;
+ const char *p = &prop->val.val[m->offset];
+
+ if (!has_data_type_information(m))
+ continue;
+
+ chunk_len = type_marker_length(m);
+ if (!chunk_len)
+ chunk_len = len - m->offset;
+
+ if (emit_type != TYPE_NONE)
+ fprintf(f, "%s, ", delim_end[emit_type]);
+ emit_type = m->type;
+
+ for_each_marker_of_type(m_label, LABEL) {
+ if (m_label->offset > m->offset)
+ break;
+ fprintf(f, "%s: ", m_label->ref);
+ }
+
+ fprintf(f, "%s", delim_start[emit_type]);
+
+ if (chunk_len <= 0)
+ continue;
+
+ switch(emit_type) {
+ case TYPE_UINT16:
+ write_propval_int(f, p, chunk_len, 2);
+ break;
+ case TYPE_UINT32:
+ write_propval_int(f, p, chunk_len, 4);
+ break;
+ case TYPE_UINT64:
+ write_propval_int(f, p, chunk_len, 8);
+ break;
+ case TYPE_STRING:
+ write_propval_string(f, p, chunk_len);
+ break;
+ default:
+ write_propval_int(f, p, chunk_len, 1);
+ }
+ }
+
+ /* Wrap up any labels at the end of the value */
+ for_each_marker_of_type(m_label, LABEL) {
+ assert (m_label->offset == len);
+ fprintf(f, " %s:", m_label->ref);
+ }
+
+ fprintf(f, "%s;\n", delim_end[emit_type] ? : "");
}
static void write_tree_source_node(FILE *f, struct node *tree, int level)
write_tree_source_node(f, dti->dt, 0);
}
-
DTC_LINUX_PATH=`pwd`/scripts/dtc
DTC_SOURCE="checks.c data.c dtc.c dtc.h flattree.c fstree.c livetree.c srcpos.c \
- srcpos.h treesource.c util.c util.h version_gen.h Makefile.dtc \
+ srcpos.h treesource.c util.c util.h version_gen.h yamltree.c Makefile.dtc \
dtc-lexer.l dtc-parser.y"
LIBFDT_SOURCE="Makefile.libfdt fdt.c fdt.h fdt_addresses.c fdt_empty_tree.c \
fdt_overlay.c fdt_ro.c fdt_rw.c fdt_strerror.c fdt_sw.c \
return val;
}
-int utilfdt_read_err_len(const char *filename, char **buffp, off_t *len)
+int utilfdt_read_err(const char *filename, char **buffp, size_t *len)
{
int fd = 0; /* assume stdin */
char *buf = NULL;
- off_t bufsize = 1024, offset = 0;
+ size_t bufsize = 1024, offset = 0;
int ret = 0;
*buffp = NULL;
free(buf);
else
*buffp = buf;
- *len = bufsize;
+ if (len)
+ *len = bufsize;
return ret;
}
-int utilfdt_read_err(const char *filename, char **buffp)
-{
- off_t len;
- return utilfdt_read_err_len(filename, buffp, &len);
-}
-
-char *utilfdt_read_len(const char *filename, off_t *len)
+char *utilfdt_read(const char *filename, size_t *len)
{
char *buff;
- int ret = utilfdt_read_err_len(filename, &buff, len);
+ int ret = utilfdt_read_err(filename, &buff, len);
if (ret) {
fprintf(stderr, "Couldn't open blob from '%s': %s\n", filename,
return buff;
}
-char *utilfdt_read(const char *filename)
-{
- off_t len;
- return utilfdt_read_len(filename, &len);
-}
-
int utilfdt_write_err(const char *filename, const void *blob)
{
int fd = 1; /* assume stdout */
* stderr.
*
* @param filename The filename to read, or - for stdin
- * @return Pointer to allocated buffer containing fdt, or NULL on error
- */
-char *utilfdt_read(const char *filename);
-
-/**
- * Like utilfdt_read(), but also passes back the size of the file read.
- *
* @param len If non-NULL, the amount of data we managed to read
+ * @return Pointer to allocated buffer containing fdt, or NULL on error
*/
-char *utilfdt_read_len(const char *filename, off_t *len);
+char *utilfdt_read(const char *filename, size_t *len);
/**
* Read a device tree file into a buffer. Does not report errors, but only
*
* @param filename The filename to read, or - for stdin
* @param buffp Returns pointer to buffer containing fdt
- * @return 0 if ok, else an errno value representing the error
- */
-int utilfdt_read_err(const char *filename, char **buffp);
-
-/**
- * Like utilfdt_read_err(), but also passes back the size of the file read.
- *
* @param len If non-NULL, the amount of data we managed to read
+ * @return 0 if ok, else an errno value representing the error
*/
-int utilfdt_read_err_len(const char *filename, char **buffp, off_t *len);
+int utilfdt_read_err(const char *filename, char **buffp, size_t *len);
/**
* Write a device tree buffer to a file. This will report any errors on
-#define DTC_VERSION "DTC 1.4.6-g84e414b0"
+#define DTC_VERSION "DTC 1.4.7-gc86da84d"
--- /dev/null
+/*
+ * (C) Copyright Linaro, Ltd. 2018
+ * (C) Copyright Arm Holdings. 2017
+ * (C) Copyright David Gibson <dwg@au1.ibm.com>, IBM Corporation. 2005.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+ */
+
+#include <stdlib.h>
+#include <yaml.h>
+#include "dtc.h"
+#include "srcpos.h"
+
+char *yaml_error_name[] = {
+ [YAML_NO_ERROR] = "no error",
+ [YAML_MEMORY_ERROR] = "memory error",
+ [YAML_READER_ERROR] = "reader error",
+ [YAML_SCANNER_ERROR] = "scanner error",
+ [YAML_PARSER_ERROR] = "parser error",
+ [YAML_COMPOSER_ERROR] = "composer error",
+ [YAML_WRITER_ERROR] = "writer error",
+ [YAML_EMITTER_ERROR] = "emitter error",
+};
+
+#define yaml_emitter_emit_or_die(emitter, event) ( \
+{ \
+ if (!yaml_emitter_emit(emitter, event)) \
+ die("yaml '%s': %s in %s, line %i\n", \
+ yaml_error_name[(emitter)->error], \
+ (emitter)->problem, __func__, __LINE__); \
+})
+
+static void yaml_propval_int(yaml_emitter_t *emitter, struct marker *markers, char *data, int len, int width)
+{
+ yaml_event_t event;
+ void *tag;
+ int off, start_offset = markers->offset;
+
+ switch(width) {
+ case 1: tag = "!u8"; break;
+ case 2: tag = "!u16"; break;
+ case 4: tag = "!u32"; break;
+ case 8: tag = "!u64"; break;
+ default:
+ die("Invalid width %i", width);
+ }
+ assert(len % width == 0);
+
+ yaml_sequence_start_event_initialize(&event, NULL,
+ (yaml_char_t *)tag, width == 4, YAML_FLOW_SEQUENCE_STYLE);
+ yaml_emitter_emit_or_die(emitter, &event);
+
+ for (off = 0; off < len; off += width) {
+ char buf[32];
+ struct marker *m;
+ bool is_phandle = false;
+
+ switch(width) {
+ case 1:
+ sprintf(buf, "0x%"PRIx8, *(uint8_t*)(data + off));
+ break;
+ case 2:
+ sprintf(buf, "0x%"PRIx16, fdt16_to_cpu(*(fdt16_t*)(data + off)));
+ break;
+ case 4:
+ sprintf(buf, "0x%"PRIx32, fdt32_to_cpu(*(fdt32_t*)(data + off)));
+ m = markers;
+ is_phandle = false;
+ for_each_marker_of_type(m, REF_PHANDLE) {
+ if (m->offset == (start_offset + off)) {
+ is_phandle = true;
+ break;
+ }
+ }
+ break;
+ case 8:
+ sprintf(buf, "0x%"PRIx64, fdt64_to_cpu(*(fdt64_t*)(data + off)));
+ break;
+ }
+
+ if (is_phandle)
+ yaml_scalar_event_initialize(&event, NULL,
+ (yaml_char_t*)"!phandle", (yaml_char_t *)buf,
+ strlen(buf), 0, 0, YAML_PLAIN_SCALAR_STYLE);
+ else
+ yaml_scalar_event_initialize(&event, NULL,
+ (yaml_char_t*)YAML_INT_TAG, (yaml_char_t *)buf,
+ strlen(buf), 1, 1, YAML_PLAIN_SCALAR_STYLE);
+ yaml_emitter_emit_or_die(emitter, &event);
+ }
+
+ yaml_sequence_end_event_initialize(&event);
+ yaml_emitter_emit_or_die(emitter, &event);
+}
+
+static void yaml_propval_string(yaml_emitter_t *emitter, char *str, int len)
+{
+ yaml_event_t event;
+ int i;
+
+ assert(str[len-1] == '\0');
+
+ /* Make sure the entire string is in the lower 7-bit ascii range */
+ for (i = 0; i < len; i++)
+ assert(isascii(str[i]));
+
+ yaml_scalar_event_initialize(&event, NULL,
+ (yaml_char_t *)YAML_STR_TAG, (yaml_char_t*)str,
+ len-1, 0, 1, YAML_DOUBLE_QUOTED_SCALAR_STYLE);
+ yaml_emitter_emit_or_die(emitter, &event);
+}
+
+static void yaml_propval(yaml_emitter_t *emitter, struct property *prop)
+{
+ yaml_event_t event;
+ int len = prop->val.len;
+ struct marker *m = prop->val.markers;
+
+ /* Emit the property name */
+ yaml_scalar_event_initialize(&event, NULL,
+ (yaml_char_t *)YAML_STR_TAG, (yaml_char_t*)prop->name,
+ strlen(prop->name), 1, 1, YAML_PLAIN_SCALAR_STYLE);
+ yaml_emitter_emit_or_die(emitter, &event);
+
+ /* Boolean properties are easiest to deal with. Length is zero, so just emit 'true' */
+ if (len == 0) {
+ yaml_scalar_event_initialize(&event, NULL,
+ (yaml_char_t *)YAML_BOOL_TAG,
+ (yaml_char_t*)"true",
+ strlen("true"), 1, 0, YAML_PLAIN_SCALAR_STYLE);
+ yaml_emitter_emit_or_die(emitter, &event);
+ return;
+ }
+
+ if (!m)
+ die("No markers present in property '%s' value\n", prop->name);
+
+ yaml_sequence_start_event_initialize(&event, NULL,
+ (yaml_char_t *)YAML_SEQ_TAG, 1, YAML_FLOW_SEQUENCE_STYLE);
+ yaml_emitter_emit_or_die(emitter, &event);
+
+ for_each_marker(m) {
+ int chunk_len;
+ char *data = &prop->val.val[m->offset];
+
+ if (m->type < TYPE_UINT8)
+ continue;
+
+ chunk_len = type_marker_length(m) ? : len;
+ assert(chunk_len > 0);
+ len -= chunk_len;
+
+ switch(m->type) {
+ case TYPE_UINT16:
+ yaml_propval_int(emitter, m, data, chunk_len, 2);
+ break;
+ case TYPE_UINT32:
+ yaml_propval_int(emitter, m, data, chunk_len, 4);
+ break;
+ case TYPE_UINT64:
+ yaml_propval_int(emitter, m, data, chunk_len, 8);
+ break;
+ case TYPE_STRING:
+ yaml_propval_string(emitter, data, chunk_len);
+ break;
+ default:
+ yaml_propval_int(emitter, m, data, chunk_len, 1);
+ break;
+ }
+ }
+
+ yaml_sequence_end_event_initialize(&event);
+ yaml_emitter_emit_or_die(emitter, &event);
+}
+
+
+static void yaml_tree(struct node *tree, yaml_emitter_t *emitter)
+{
+ struct property *prop;
+ struct node *child;
+ yaml_event_t event;
+
+ if (tree->deleted)
+ return;
+
+ yaml_mapping_start_event_initialize(&event, NULL,
+ (yaml_char_t *)YAML_MAP_TAG, 1, YAML_ANY_MAPPING_STYLE);
+ yaml_emitter_emit_or_die(emitter, &event);
+
+ for_each_property(tree, prop)
+ yaml_propval(emitter, prop);
+
+ /* Loop over all the children, emitting them into the map */
+ for_each_child(tree, child) {
+ yaml_scalar_event_initialize(&event, NULL,
+ (yaml_char_t *)YAML_STR_TAG, (yaml_char_t*)child->name,
+ strlen(child->name), 1, 0, YAML_PLAIN_SCALAR_STYLE);
+ yaml_emitter_emit_or_die(emitter, &event);
+ yaml_tree(child, emitter);
+ }
+
+ yaml_mapping_end_event_initialize(&event);
+ yaml_emitter_emit_or_die(emitter, &event);
+}
+
+void dt_to_yaml(FILE *f, struct dt_info *dti)
+{
+ yaml_emitter_t emitter;
+ yaml_event_t event;
+
+ yaml_emitter_initialize(&emitter);
+ yaml_emitter_set_output_file(&emitter, f);
+ yaml_stream_start_event_initialize(&event, YAML_UTF8_ENCODING);
+ yaml_emitter_emit_or_die(&emitter, &event);
+
+ yaml_document_start_event_initialize(&event, NULL, NULL, NULL, 0);
+ yaml_emitter_emit_or_die(&emitter, &event);
+
+ yaml_sequence_start_event_initialize(&event, NULL, (yaml_char_t *)YAML_SEQ_TAG, 1, YAML_ANY_SEQUENCE_STYLE);
+ yaml_emitter_emit_or_die(&emitter, &event);
+
+ yaml_tree(dti->dt, &emitter);
+
+ yaml_sequence_end_event_initialize(&event);
+ yaml_emitter_emit_or_die(&emitter, &event);
+
+ yaml_document_end_event_initialize(&event, 0);
+ yaml_emitter_emit_or_die(&emitter, &event);
+
+ yaml_stream_end_event_initialize(&event);
+ yaml_emitter_emit_or_die(&emitter, &event);
+
+ yaml_emitter_delete(&emitter);
+}
# add vrf table
ip li add ${VRF} type vrf table ${VRF_TABLE}
ip li set ${VRF} up
- ip ro add table ${VRF_TABLE} unreachable default
- ip -6 ro add table ${VRF_TABLE} unreachable default
+ ip ro add table ${VRF_TABLE} unreachable default metric 8192
+ ip -6 ro add table ${VRF_TABLE} unreachable default metric 8192
# create test interfaces
ip li add ${NETIFS[p1]} type veth peer name ${NETIFS[p2]}
for n in 1 3 5 7; do
ip li set ${NETIFS[p${n}]} up
ip addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
- ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]}
+ ip addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad
done
# move peer interfaces to namespace and add addresses
for n in 2 4 6 8; do
ip li set ${NETIFS[p${n}]} netns ${PEER_NS} up
ip -netns ${PEER_NS} addr add ${V4ADDRS[p${n}]}/24 dev ${NETIFS[p${n}]}
- ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]}
+ ip -netns ${PEER_NS} addr add ${V6ADDRS[p${n}]}/64 dev ${NETIFS[p${n}]} nodad
done
- set +e
+ ip -6 ro add default via ${V6ADDRS[p3]/::[0-9]/::64}
+ ip -6 ro add table ${VRF_TABLE} default via ${V6ADDRS[p7]/::[0-9]/::64}
- # let DAD complete - assume default of 1 probe
- sleep 1
+ set +e
}
cleanup()
tm \
vphn \
math \
- ptrace
+ ptrace \
+ security
endif
: "memory")
#define mb() asm volatile("sync" : : : "memory");
+#define barrier() asm volatile("" : : : "memory");
#define SPRN_MMCR2 769
#define SPRN_MMCRA 770
#include <stdint.h>
#include <stdbool.h>
#include <linux/auxvec.h>
+#include <linux/perf_event.h>
#include "reg.h"
/* Avoid headaches with PRI?64 - just use %ll? always */
int pick_online_cpu(void);
+int read_debugfs_file(char *debugfs_file, int *result);
+int write_debugfs_file(char *debugfs_file, int result);
+void set_dscr(unsigned long val);
+int perf_event_open_counter(unsigned int type,
+ unsigned long config, int group_fd);
+int perf_event_enable(int fd);
+int perf_event_disable(int fd);
+int perf_event_reset(int fd);
+
static inline bool have_hwcap(unsigned long ftr)
{
return ((unsigned long)get_auxv_entry(AT_HWCAP) & ftr) == ftr;
#define PPC_FEATURE2_ARCH_3_00 0x00800000
#endif
+#if defined(__powerpc64__)
+#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.gp_regs[PT_NIP]
+#elif defined(__powerpc__)
+#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.uc_regs->gregs[PT_NIP]
+#else
+#error implement UCONTEXT_NIA
+#endif
+
#endif /* _SELFTESTS_POWERPC_UTILS_H */
subpage_prot
tempfile
prot_sao
-segv_errors
\ No newline at end of file
+segv_errors
+wild_bctr
\ No newline at end of file
noarg:
$(MAKE) -C ../
-TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors
+TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr
TEST_GEN_FILES := tempfile
top_srcdir = ../../../../..
$(OUTPUT)/prot_sao: ../utils.c
+$(OUTPUT)/wild_bctr: CFLAGS += -m64
+
$(OUTPUT)/tempfile:
dd if=/dev/zero of=$@ bs=64k count=1
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2018, Michael Ellerman, IBM Corp.
+ *
+ * Test that an out-of-bounds branch to counter behaves as expected.
+ */
+
+#include <setjmp.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <ucontext.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+
+#define BAD_NIP 0x788c545a18000000ull
+
+static struct pt_regs signal_regs;
+static jmp_buf setjmp_env;
+
+static void save_regs(ucontext_t *ctxt)
+{
+ struct pt_regs *regs = ctxt->uc_mcontext.regs;
+
+ memcpy(&signal_regs, regs, sizeof(signal_regs));
+}
+
+static void segv_handler(int signum, siginfo_t *info, void *ctxt_v)
+{
+ save_regs(ctxt_v);
+ longjmp(setjmp_env, 1);
+}
+
+static void usr2_handler(int signum, siginfo_t *info, void *ctxt_v)
+{
+ save_regs(ctxt_v);
+}
+
+static int ok(void)
+{
+ printf("Everything is OK in here.\n");
+ return 0;
+}
+
+#define REG_POISON 0x5a5aUL
+#define POISONED_REG(n) ((REG_POISON << 48) | ((n) << 32) | (REG_POISON << 16) | (n))
+
+static inline void poison_regs(void)
+{
+ #define POISON_REG(n) \
+ "lis " __stringify(n) "," __stringify(REG_POISON) ";" \
+ "addi " __stringify(n) "," __stringify(n) "," __stringify(n) ";" \
+ "sldi " __stringify(n) "," __stringify(n) ", 32 ;" \
+ "oris " __stringify(n) "," __stringify(n) "," __stringify(REG_POISON) ";" \
+ "addi " __stringify(n) "," __stringify(n) "," __stringify(n) ";"
+
+ asm (POISON_REG(15)
+ POISON_REG(16)
+ POISON_REG(17)
+ POISON_REG(18)
+ POISON_REG(19)
+ POISON_REG(20)
+ POISON_REG(21)
+ POISON_REG(22)
+ POISON_REG(23)
+ POISON_REG(24)
+ POISON_REG(25)
+ POISON_REG(26)
+ POISON_REG(27)
+ POISON_REG(28)
+ POISON_REG(29)
+ : // inputs
+ : // outputs
+ : "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25",
+ "26", "27", "28", "29"
+ );
+ #undef POISON_REG
+}
+
+static int check_regs(void)
+{
+ unsigned long i;
+
+ for (i = 15; i <= 29; i++)
+ FAIL_IF(signal_regs.gpr[i] != POISONED_REG(i));
+
+ printf("Regs OK\n");
+ return 0;
+}
+
+static void dump_regs(void)
+{
+ for (int i = 0; i < 32; i += 4) {
+ printf("r%02d 0x%016lx r%02d 0x%016lx " \
+ "r%02d 0x%016lx r%02d 0x%016lx\n",
+ i, signal_regs.gpr[i],
+ i+1, signal_regs.gpr[i+1],
+ i+2, signal_regs.gpr[i+2],
+ i+3, signal_regs.gpr[i+3]);
+ }
+}
+
+int test_wild_bctr(void)
+{
+ int (*func_ptr)(void);
+ struct sigaction segv = {
+ .sa_sigaction = segv_handler,
+ .sa_flags = SA_SIGINFO
+ };
+ struct sigaction usr2 = {
+ .sa_sigaction = usr2_handler,
+ .sa_flags = SA_SIGINFO
+ };
+
+ FAIL_IF(sigaction(SIGSEGV, &segv, NULL));
+ FAIL_IF(sigaction(SIGUSR2, &usr2, NULL));
+
+ bzero(&signal_regs, sizeof(signal_regs));
+
+ if (setjmp(setjmp_env) == 0) {
+ func_ptr = ok;
+ func_ptr();
+
+ kill(getpid(), SIGUSR2);
+ printf("Regs before:\n");
+ dump_regs();
+ bzero(&signal_regs, sizeof(signal_regs));
+
+ poison_regs();
+
+ func_ptr = (int (*)(void))BAD_NIP;
+ func_ptr();
+
+ FAIL_IF(1); /* we didn't segv? */
+ }
+
+ FAIL_IF(signal_regs.nip != BAD_NIP);
+
+ printf("All good - took SEGV as expected branching to 0x%llx\n", BAD_NIP);
+
+ dump_regs();
+ FAIL_IF(check_regs());
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_wild_bctr, "wild_bctr");
+}
extern char __start___ex_table[];
extern char __stop___ex_table[];
-#if defined(__powerpc64__)
-#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.gp_regs[PT_NIP]
-#elif defined(__powerpc__)
-#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.uc_regs->gregs[PT_NIP]
-#else
-#error implement UCONTEXT_NIA
-#endif
-
struct extbl_entry {
int insn;
int fixup;
TEST_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \
ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \
- perf-hwbreak
+ perf-hwbreak ptrace-syscall
top_srcdir = ../../../../..
include ../../lib.mk
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A ptrace test for testing PTRACE_SYSEMU, PTRACE_SETREGS and
+ * PTRACE_GETREG. This test basically create a child process that executes
+ * syscalls and the parent process check if it is being traced appropriated.
+ *
+ * This test is heavily based on tools/testing/selftests/x86/ptrace_syscall.c
+ * test, and it was adapted to run on Powerpc by
+ * Breno Leitao <leitao@debian.org>
+ */
+#define _GNU_SOURCE
+
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <sys/user.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <err.h>
+#include <string.h>
+#include <sys/auxv.h>
+#include "utils.h"
+
+/* Bitness-agnostic defines for user_regs_struct fields. */
+#define user_syscall_nr gpr[0]
+#define user_arg0 gpr[3]
+#define user_arg1 gpr[4]
+#define user_arg2 gpr[5]
+#define user_arg3 gpr[6]
+#define user_arg4 gpr[7]
+#define user_arg5 gpr[8]
+#define user_ip nip
+
+#define PTRACE_SYSEMU 0x1d
+
+static int nerrs;
+
+static void wait_trap(pid_t chld)
+{
+ siginfo_t si;
+
+ if (waitid(P_PID, chld, &si, WEXITED|WSTOPPED) != 0)
+ err(1, "waitid");
+ if (si.si_pid != chld)
+ errx(1, "got unexpected pid in event\n");
+ if (si.si_code != CLD_TRAPPED)
+ errx(1, "got unexpected event type %d\n", si.si_code);
+}
+
+static void test_ptrace_syscall_restart(void)
+{
+ int status;
+ struct pt_regs regs;
+ pid_t chld;
+
+ printf("[RUN]\tptrace-induced syscall restart\n");
+
+ chld = fork();
+ if (chld < 0)
+ err(1, "fork");
+
+ /*
+ * Child process is running 4 syscalls after ptrace.
+ *
+ * 1) getpid()
+ * 2) gettid()
+ * 3) tgkill() -> Send SIGSTOP
+ * 4) gettid() -> Where the tests will happen essentially
+ */
+ if (chld == 0) {
+ if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0)
+ err(1, "PTRACE_TRACEME");
+
+ pid_t pid = getpid(), tid = syscall(SYS_gettid);
+
+ printf("\tChild will make one syscall\n");
+ syscall(SYS_tgkill, pid, tid, SIGSTOP);
+
+ syscall(SYS_gettid, 10, 11, 12, 13, 14, 15);
+ _exit(0);
+ }
+ /* Parent process below */
+
+ /* Wait for SIGSTOP sent by tgkill above. */
+ if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status))
+ err(1, "waitpid");
+
+ printf("[RUN]\tSYSEMU\n");
+ if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+ err(1, "PTRACE_SYSEMU");
+ wait_trap(chld);
+
+ if (ptrace(PTRACE_GETREGS, chld, 0, ®s) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ /*
+ * Ptrace trapped prior to executing the syscall, thus r3 still has
+ * the syscall number instead of the sys_gettid() result
+ */
+ if (regs.user_syscall_nr != SYS_gettid ||
+ regs.user_arg0 != 10 || regs.user_arg1 != 11 ||
+ regs.user_arg2 != 12 || regs.user_arg3 != 13 ||
+ regs.user_arg4 != 14 || regs.user_arg5 != 15) {
+ printf("[FAIL]\tInitial args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n",
+ (unsigned long)regs.user_syscall_nr,
+ (unsigned long)regs.user_arg0,
+ (unsigned long)regs.user_arg1,
+ (unsigned long)regs.user_arg2,
+ (unsigned long)regs.user_arg3,
+ (unsigned long)regs.user_arg4,
+ (unsigned long)regs.user_arg5);
+ nerrs++;
+ } else {
+ printf("[OK]\tInitial nr and args are correct\n"); }
+
+ printf("[RUN]\tRestart the syscall (ip = 0x%lx)\n",
+ (unsigned long)regs.user_ip);
+
+ /*
+ * Rewind to retry the same syscall again. This will basically test
+ * the rewind process together with PTRACE_SETREGS and PTRACE_GETREGS.
+ */
+ regs.user_ip -= 4;
+ if (ptrace(PTRACE_SETREGS, chld, 0, ®s) != 0)
+ err(1, "PTRACE_SETREGS");
+
+ if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+ err(1, "PTRACE_SYSEMU");
+ wait_trap(chld);
+
+ if (ptrace(PTRACE_GETREGS, chld, 0, ®s) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ if (regs.user_syscall_nr != SYS_gettid ||
+ regs.user_arg0 != 10 || regs.user_arg1 != 11 ||
+ regs.user_arg2 != 12 || regs.user_arg3 != 13 ||
+ regs.user_arg4 != 14 || regs.user_arg5 != 15) {
+ printf("[FAIL]\tRestart nr or args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n",
+ (unsigned long)regs.user_syscall_nr,
+ (unsigned long)regs.user_arg0,
+ (unsigned long)regs.user_arg1,
+ (unsigned long)regs.user_arg2,
+ (unsigned long)regs.user_arg3,
+ (unsigned long)regs.user_arg4,
+ (unsigned long)regs.user_arg5);
+ nerrs++;
+ } else {
+ printf("[OK]\tRestarted nr and args are correct\n");
+ }
+
+ printf("[RUN]\tChange nr and args and restart the syscall (ip = 0x%lx)\n",
+ (unsigned long)regs.user_ip);
+
+ /*
+ * Inject a new syscall (getpid) in the same place the previous
+ * syscall (gettid), rewind and re-execute.
+ */
+ regs.user_syscall_nr = SYS_getpid;
+ regs.user_arg0 = 20;
+ regs.user_arg1 = 21;
+ regs.user_arg2 = 22;
+ regs.user_arg3 = 23;
+ regs.user_arg4 = 24;
+ regs.user_arg5 = 25;
+ regs.user_ip -= 4;
+
+ if (ptrace(PTRACE_SETREGS, chld, 0, ®s) != 0)
+ err(1, "PTRACE_SETREGS");
+
+ if (ptrace(PTRACE_SYSEMU, chld, 0, 0) != 0)
+ err(1, "PTRACE_SYSEMU");
+ wait_trap(chld);
+
+ if (ptrace(PTRACE_GETREGS, chld, 0, ®s) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ /* Check that ptrace stopped at the new syscall that was
+ * injected, and guarantee that it haven't executed, i.e, user_args
+ * contain the arguments and not the syscall return value, for
+ * instance.
+ */
+ if (regs.user_syscall_nr != SYS_getpid
+ || regs.user_arg0 != 20 || regs.user_arg1 != 21
+ || regs.user_arg2 != 22 || regs.user_arg3 != 23
+ || regs.user_arg4 != 24 || regs.user_arg5 != 25) {
+
+ printf("[FAIL]\tRestart nr or args are wrong (nr=%lu, args=%lu %lu %lu %lu %lu %lu)\n",
+ (unsigned long)regs.user_syscall_nr,
+ (unsigned long)regs.user_arg0,
+ (unsigned long)regs.user_arg1,
+ (unsigned long)regs.user_arg2,
+ (unsigned long)regs.user_arg3,
+ (unsigned long)regs.user_arg4,
+ (unsigned long)regs.user_arg5);
+ nerrs++;
+ } else {
+ printf("[OK]\tReplacement nr and args are correct\n");
+ }
+
+ if (ptrace(PTRACE_CONT, chld, 0, 0) != 0)
+ err(1, "PTRACE_CONT");
+
+ if (waitpid(chld, &status, 0) != chld)
+ err(1, "waitpid");
+
+ /* Guarantee that the process executed properly, returning 0 */
+ if (!WIFEXITED(status) || WEXITSTATUS(status) != 0) {
+ printf("[FAIL]\tChild failed\n");
+ nerrs++;
+ } else {
+ printf("[OK]\tChild exited cleanly\n");
+ }
+}
+
+int ptrace_syscall(void)
+{
+ test_ptrace_syscall_restart();
+
+ return nerrs;
+}
+
+int main(void)
+{
+ return test_harness(ptrace_syscall, "ptrace_syscall");
+}
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0+
+
+TEST_GEN_PROGS := rfi_flush
+
+CFLAGS += -I../../../../../usr/include
+
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2018 IBM Corporation.
+ */
+
+#define __SANE_USERSPACE_TYPES__
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <malloc.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "utils.h"
+
+#define CACHELINE_SIZE 128
+
+struct perf_event_read {
+ __u64 nr;
+ __u64 l1d_misses;
+};
+
+static inline __u64 load(void *addr)
+{
+ __u64 tmp;
+
+ asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr));
+
+ return tmp;
+}
+
+static void syscall_loop(char *p, unsigned long iterations,
+ unsigned long zero_size)
+{
+ for (unsigned long i = 0; i < iterations; i++) {
+ for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE)
+ load(p + j);
+ getppid();
+ }
+}
+
+int rfi_flush_test(void)
+{
+ char *p;
+ int repetitions = 10;
+ int fd, passes = 0, iter, rc = 0;
+ struct perf_event_read v;
+ __u64 l1d_misses_total = 0;
+ unsigned long iterations = 100000, zero_size = 24 * 1024;
+ int rfi_flush_org, rfi_flush;
+
+ SKIP_IF(geteuid() != 0);
+
+ if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_org)) {
+ perror("Unable to read powerpc/rfi_flush debugfs file");
+ SKIP_IF(1);
+ }
+
+ rfi_flush = rfi_flush_org;
+
+ fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1);
+ FAIL_IF(fd < 0);
+
+ p = (char *)memalign(zero_size, CACHELINE_SIZE);
+
+ FAIL_IF(perf_event_enable(fd));
+
+ set_dscr(1);
+
+ iter = repetitions;
+
+again:
+ FAIL_IF(perf_event_reset(fd));
+
+ syscall_loop(p, iterations, zero_size);
+
+ FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v));
+
+ /* Expect at least zero_size/CACHELINE_SIZE misses per iteration */
+ if (v.l1d_misses >= (iterations * zero_size / CACHELINE_SIZE) && rfi_flush)
+ passes++;
+ else if (v.l1d_misses < iterations && !rfi_flush)
+ passes++;
+
+ l1d_misses_total += v.l1d_misses;
+
+ while (--iter)
+ goto again;
+
+ if (passes < repetitions) {
+ printf("FAIL (L1D misses with rfi_flush=%d: %llu %c %lu) [%d/%d failures]\n",
+ rfi_flush, l1d_misses_total, rfi_flush ? '<' : '>',
+ rfi_flush ? (repetitions * iterations * zero_size / CACHELINE_SIZE) : iterations,
+ repetitions - passes, repetitions);
+ rc = 1;
+ } else
+ printf("PASS (L1D misses with rfi_flush=%d: %llu %c %lu) [%d/%d pass]\n",
+ rfi_flush, l1d_misses_total, rfi_flush ? '>' : '<',
+ rfi_flush ? (repetitions * iterations * zero_size / CACHELINE_SIZE) : iterations,
+ passes, repetitions);
+
+ if (rfi_flush == rfi_flush_org) {
+ rfi_flush = !rfi_flush_org;
+ if (write_debugfs_file("powerpc/rfi_flush", rfi_flush) < 0) {
+ perror("error writing to powerpc/rfi_flush debugfs file");
+ return 1;
+ }
+ iter = repetitions;
+ l1d_misses_total = 0;
+ passes = 0;
+ goto again;
+ }
+
+ perf_event_disable(fd);
+ close(fd);
+
+ set_dscr(0);
+
+ if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_org) < 0) {
+ perror("unable to restore original value of powerpc/rfi_flush debugfs file");
+ return 1;
+ }
+
+ return rc;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(rfi_flush_test, "rfi_flush_test");
+}
int test_tmspr()
{
- pthread_t thread;
+ pthread_t *thread;
int thread_num;
unsigned long i;
/* To cause some context switching */
thread_num = 10 * sysconf(_SC_NPROCESSORS_ONLN);
+ thread = malloc(thread_num * sizeof(pthread_t));
+ if (thread == NULL)
+ return EXIT_FAILURE;
+
/* Test TFIAR and TFHAR */
- for (i = 0 ; i < thread_num ; i += 2){
- if (pthread_create(&thread, NULL, (void*)tfiar_tfhar, (void *)i))
+ for (i = 0; i < thread_num; i += 2) {
+ if (pthread_create(&thread[i], NULL, (void *)tfiar_tfhar,
+ (void *)i))
return EXIT_FAILURE;
}
- if (pthread_join(thread, NULL) != 0)
- return EXIT_FAILURE;
-
/* Test TEXASR */
- for (i = 0 ; i < thread_num ; i++){
- if (pthread_create(&thread, NULL, (void*)texasr, (void *)i))
+ for (i = 1; i < thread_num; i += 2) {
+ if (pthread_create(&thread[i], NULL, (void *)texasr, (void *)i))
return EXIT_FAILURE;
}
- if (pthread_join(thread, NULL) != 0)
- return EXIT_FAILURE;
+
+ for (i = 0; i < thread_num; i++) {
+ if (pthread_join(thread[i], NULL) != 0)
+ return EXIT_FAILURE;
+ }
+
+ free(thread);
if (passed)
return 0;
}
/* Check if we were not expecting a failure and a it occurred. */
- if (!expecting_failure() && is_failure(cr_)) {
+ if (!expecting_failure() && is_failure(cr_) &&
+ !failure_is_reschedule()) {
printf("\n\tUnexpected transaction failure 0x%02lx\n\t",
failure_code());
return (void *) -1;
/*
* Check if TM failed due to the cause we were expecting. 0xda is a
- * TM_CAUSE_FAC_UNAV cause, otherwise it's an unexpected cause.
+ * TM_CAUSE_FAC_UNAV cause, otherwise it's an unexpected cause, unless
+ * it was caused by a reschedule.
*/
- if (is_failure(cr_) && !failure_is_unavailable()) {
+ if (is_failure(cr_) && !failure_is_unavailable() &&
+ !failure_is_reschedule()) {
printf("\n\tUnexpected failure cause 0x%02lx\n\t",
failure_code());
return (void *) -1;
return (failure_code() & TM_CAUSE_FAC_UNAV) == TM_CAUSE_FAC_UNAV;
}
+static inline bool failure_is_reschedule(void)
+{
+ if ((failure_code() & TM_CAUSE_RESCHED) == TM_CAUSE_RESCHED ||
+ (failure_code() & TM_CAUSE_KVM_RESCHED) == TM_CAUSE_KVM_RESCHED)
+ return true;
+
+ return false;
+}
+
static inline bool failure_is_nesting(void)
{
return (__builtin_get_texasru() & 0x400000);
#include <fcntl.h>
#include <link.h>
#include <sched.h>
+#include <signal.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
+#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <unistd.h>
+#include <asm/unistd.h>
+#include <linux/limits.h>
#include "utils.h"
static char auxv[4096];
+extern unsigned int dscr_insn[];
int read_auxv(char *buf, ssize_t buf_size)
{
return strcmp(uts.machine, "ppc64le") == 0;
}
+
+int read_debugfs_file(char *debugfs_file, int *result)
+{
+ int rc = -1, fd;
+ char path[PATH_MAX];
+ char value[16];
+
+ strcpy(path, "/sys/kernel/debug/");
+ strncat(path, debugfs_file, PATH_MAX - strlen(path) - 1);
+
+ if ((fd = open(path, O_RDONLY)) < 0)
+ return rc;
+
+ if ((rc = read(fd, value, sizeof(value))) < 0)
+ return rc;
+
+ value[15] = 0;
+ *result = atoi(value);
+ close(fd);
+
+ return 0;
+}
+
+int write_debugfs_file(char *debugfs_file, int result)
+{
+ int rc = -1, fd;
+ char path[PATH_MAX];
+ char value[16];
+
+ strcpy(path, "/sys/kernel/debug/");
+ strncat(path, debugfs_file, PATH_MAX - strlen(path) - 1);
+
+ if ((fd = open(path, O_WRONLY)) < 0)
+ return rc;
+
+ snprintf(value, 16, "%d", result);
+
+ if ((rc = write(fd, value, strlen(value))) < 0)
+ return rc;
+
+ close(fd);
+
+ return 0;
+}
+
+static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
+ int cpu, int group_fd, unsigned long flags)
+{
+ return syscall(__NR_perf_event_open, hw_event, pid, cpu,
+ group_fd, flags);
+}
+
+static void perf_event_attr_init(struct perf_event_attr *event_attr,
+ unsigned int type,
+ unsigned long config)
+{
+ memset(event_attr, 0, sizeof(*event_attr));
+
+ event_attr->type = type;
+ event_attr->size = sizeof(struct perf_event_attr);
+ event_attr->config = config;
+ event_attr->read_format = PERF_FORMAT_GROUP;
+ event_attr->disabled = 1;
+ event_attr->exclude_kernel = 1;
+ event_attr->exclude_hv = 1;
+ event_attr->exclude_guest = 1;
+}
+
+int perf_event_open_counter(unsigned int type,
+ unsigned long config, int group_fd)
+{
+ int fd;
+ struct perf_event_attr event_attr;
+
+ perf_event_attr_init(&event_attr, type, config);
+
+ fd = perf_event_open(&event_attr, 0, -1, group_fd, 0);
+
+ if (fd < 0)
+ perror("perf_event_open() failed");
+
+ return fd;
+}
+
+int perf_event_enable(int fd)
+{
+ if (ioctl(fd, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP) == -1) {
+ perror("error while enabling perf events");
+ return -1;
+ }
+
+ return 0;
+}
+
+int perf_event_disable(int fd)
+{
+ if (ioctl(fd, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP) == -1) {
+ perror("error disabling perf events");
+ return -1;
+ }
+
+ return 0;
+}
+
+int perf_event_reset(int fd)
+{
+ if (ioctl(fd, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP) == -1) {
+ perror("error resetting perf events");
+ return -1;
+ }
+
+ return 0;
+}
+
+static void sigill_handler(int signr, siginfo_t *info, void *unused)
+{
+ static int warned = 0;
+ ucontext_t *ctx = (ucontext_t *)unused;
+ unsigned long *pc = &UCONTEXT_NIA(ctx);
+
+ if (*pc == (unsigned long)&dscr_insn) {
+ if (!warned++)
+ printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
+ *pc += 4;
+ } else {
+ printf("SIGILL at %p\n", pc);
+ abort();
+ }
+}
+
+void set_dscr(unsigned long val)
+{
+ static int init = 0;
+ struct sigaction sa;
+
+ if (!init) {
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = sigill_handler;
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGILL, &sa, NULL))
+ perror("sigill_handler");
+ init = 1;
+ }
+
+ asm volatile("dscr_insn: mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+}