.. kernel-doc:: block/genhd.c
:export:
+.. kernel-doc:: block/bdev.c
+ :export:
+
Char devices
============
properties:
compatible:
- oneOf:
- - const: qcom,dsi-phy-7nm
- - const: qcom,dsi-phy-7nm-8150
- - const: qcom,sc7280-dsi-phy-7nm
+ enum:
+ - qcom,dsi-phy-7nm
+ - qcom,dsi-phy-7nm-8150
+ - qcom,sc7280-dsi-phy-7nm
reg:
items:
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpio/gpio-virtio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Virtio GPIO controller
+
+maintainers:
+ - Viresh Kumar <viresh.kumar@linaro.org>
+
+allOf:
+ - $ref: /schemas/virtio/virtio-device.yaml#
+
+description:
+ Virtio GPIO controller, see /schemas/virtio/virtio-device.yaml for more
+ details.
+
+properties:
+ $nodename:
+ const: gpio
+
+ compatible:
+ const: virtio,device29
+
+ gpio-controller: true
+
+ "#gpio-cells":
+ const: 2
+
+ interrupt-controller: true
+
+ "#interrupt-cells":
+ const: 2
+
+required:
+ - compatible
+ - gpio-controller
+ - "#gpio-cells"
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ virtio@3000 {
+ compatible = "virtio,mmio";
+ reg = <0x3000 0x100>;
+ interrupts = <41>;
+
+ gpio {
+ compatible = "virtio,device29";
+ gpio-controller;
+ #gpio-cells = <2>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+ };
+
+...
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/i2c/i2c-virtio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Virtio I2C Adapter
+
+maintainers:
+ - Viresh Kumar <viresh.kumar@linaro.org>
+
+allOf:
+ - $ref: /schemas/i2c/i2c-controller.yaml#
+ - $ref: /schemas/virtio/virtio-device.yaml#
+
+description:
+ Virtio I2C device, see /schemas/virtio/virtio-device.yaml for more details.
+
+properties:
+ $nodename:
+ const: i2c
+
+ compatible:
+ const: virtio,device22
+
+required:
+ - compatible
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ virtio@3000 {
+ compatible = "virtio,mmio";
+ reg = <0x3000 0x100>;
+ interrupts = <41>;
+
+ i2c {
+ compatible = "virtio,device22";
+
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ light-sensor@20 {
+ compatible = "dynaimage,al3320a";
+ reg = <0x20>;
+ };
+ };
+ };
+
+...
description:
Regulator for the LRADC reference voltage
+ wakeup-source: true
+
patternProperties:
"^button-[0-9]+$":
type: object
+++ /dev/null
-Qualcomm PM8941 PMIC Power Key
-
-PROPERTIES
-
-- compatible:
- Usage: required
- Value type: <string>
- Definition: must be one of:
- "qcom,pm8941-pwrkey"
- "qcom,pm8941-resin"
- "qcom,pmk8350-pwrkey"
- "qcom,pmk8350-resin"
-
-- reg:
- Usage: required
- Value type: <prop-encoded-array>
- Definition: base address of registers for block
-
-- interrupts:
- Usage: required
- Value type: <prop-encoded-array>
- Definition: key change interrupt; The format of the specifier is
- defined by the binding document describing the node's
- interrupt parent.
-
-- debounce:
- Usage: optional
- Value type: <u32>
- Definition: time in microseconds that key must be pressed or released
- for state change interrupt to trigger.
-
-- bias-pull-up:
- Usage: optional
- Value type: <empty>
- Definition: presence of this property indicates that the KPDPWR_N pin
- should be configured for pull up.
-
-- linux,code:
- Usage: optional
- Value type: <u32>
- Definition: The input key-code associated with the power key.
- Use the linux event codes defined in
- include/dt-bindings/input/linux-event-codes.h
- When property is omitted KEY_POWER is assumed.
-
-EXAMPLE
-
- pwrkey@800 {
- compatible = "qcom,pm8941-pwrkey";
- reg = <0x800>;
- interrupts = <0x0 0x8 0 IRQ_TYPE_EDGE_BOTH>;
- debounce = <15625>;
- bias-pull-up;
- linux,code = <KEY_POWER>;
- };
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/qcom,pm8941-pwrkey.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm PM8941 PMIC Power Key
+
+maintainers:
+ - Courtney Cavin <courtney.cavin@sonymobile.com>
+ - Vinod Koul <vkoul@kernel.org>
+
+allOf:
+ - $ref: input.yaml#
+
+properties:
+ compatible:
+ enum:
+ - qcom,pm8941-pwrkey
+ - qcom,pm8941-resin
+ - qcom,pmk8350-pwrkey
+ - qcom,pmk8350-resin
+
+ interrupts:
+ maxItems: 1
+
+ debounce:
+ description: |
+ Time in microseconds that key must be pressed or
+ released for state change interrupt to trigger.
+ $ref: /schemas/types.yaml#/definitions/uint32
+
+ bias-pull-up:
+ description: |
+ Presence of this property indicates that the KPDPWR_N
+ pin should be configured for pull up.
+ $ref: /schemas/types.yaml#/definitions/flag
+
+ linux,code:
+ description: |
+ The input key-code associated with the power key.
+ Use the linux event codes defined in
+ include/dt-bindings/input/linux-event-codes.h
+ When property is omitted KEY_POWER is assumed.
+
+required:
+ - compatible
+ - interrupts
+
+unevaluatedProperties: false
+...
+++ /dev/null
-* Regulator Haptic Device Tree Bindings
-
-Required Properties:
- - compatible : Should be "regulator-haptic"
- - haptic-supply : Power supply to the haptic motor.
- [*] refer Documentation/devicetree/bindings/regulator/regulator.txt
-
- - max-microvolt : The maximum voltage value supplied to the haptic motor.
- [The unit of the voltage is a micro]
-
- - min-microvolt : The minimum voltage value supplied to the haptic motor.
- [The unit of the voltage is a micro]
-
-Example:
-
- haptics {
- compatible = "regulator-haptic";
- haptic-supply = <&motor_regulator>;
- max-microvolt = <2700000>;
- min-microvolt = <1100000>;
- };
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/input/regulator-haptic.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Regulator Haptic Device Tree Bindings
+
+maintainers:
+ - Jaewon Kim <jaewon02.kim@samsung.com>
+
+properties:
+ compatible:
+ const: regulator-haptic
+
+ haptic-supply:
+ description: >
+ Power supply to the haptic motor
+
+ max-microvolt:
+ description: >
+ The maximum voltage value supplied to the haptic motor
+
+ min-microvolt:
+ description: >
+ The minimum voltage value supplied to the haptic motor
+
+required:
+ - compatible
+ - haptic-supply
+ - max-microvolt
+ - min-microvolt
+
+additionalProperties: false
+
+examples:
+ - |
+ haptics {
+ compatible = "regulator-haptic";
+ haptic-supply = <&motor_regulator>;
+ max-microvolt = <2700000>;
+ min-microvolt = <1100000>;
+ };
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/touchscreen/chipone,icn8318.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ChipOne ICN8318 Touchscreen Controller Device Tree Bindings
+
+maintainers:
+ - Dmitry Torokhov <dmitry.torokhov@gmail.com>
+
+allOf:
+ - $ref: touchscreen.yaml#
+
+properties:
+ compatible:
+ const: chipone,icn8318
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ wake-gpios:
+ maxItems: 1
+
+unevaluatedProperties: false
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - wake-gpios
+ - touchscreen-size-x
+ - touchscreen-size-y
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ touchscreen@40 {
+ compatible = "chipone,icn8318";
+ reg = <0x40>;
+ interrupt-parent = <&pio>;
+ interrupts = <9 IRQ_TYPE_EDGE_FALLING>; /* EINT9 (PG9) */
+ pinctrl-names = "default";
+ pinctrl-0 = <&ts_wake_pin_p66>;
+ wake-gpios = <&pio 1 3 GPIO_ACTIVE_HIGH>; /* PB3 */
+ touchscreen-size-x = <800>;
+ touchscreen-size-y = <480>;
+ touchscreen-inverted-x;
+ touchscreen-swapped-x-y;
+ };
+ };
+
+...
+++ /dev/null
-* ChipOne icn8318 I2C touchscreen controller
-
-Required properties:
- - compatible : "chipone,icn8318"
- - reg : I2C slave address of the chip (0x40)
- - interrupts : interrupt specification for the icn8318 interrupt
- - wake-gpios : GPIO specification for the WAKE input
- - touchscreen-size-x : horizontal resolution of touchscreen (in pixels)
- - touchscreen-size-y : vertical resolution of touchscreen (in pixels)
-
-Optional properties:
- - pinctrl-names : should be "default"
- - pinctrl-0: : a phandle pointing to the pin settings for the
- control gpios
- - touchscreen-fuzz-x : horizontal noise value of the absolute input
- device (in pixels)
- - touchscreen-fuzz-y : vertical noise value of the absolute input
- device (in pixels)
- - touchscreen-inverted-x : X axis is inverted (boolean)
- - touchscreen-inverted-y : Y axis is inverted (boolean)
- - touchscreen-swapped-x-y : X and Y axis are swapped (boolean)
- Swapping is done after inverting the axis
-
-Example:
-
-i2c@00000000 {
- /* ... */
-
- chipone_icn8318@40 {
- compatible = "chipone,icn8318";
- reg = <0x40>;
- interrupt-parent = <&pio>;
- interrupts = <9 IRQ_TYPE_EDGE_FALLING>; /* EINT9 (PG9) */
- pinctrl-names = "default";
- pinctrl-0 = <&ts_wake_pin_p66>;
- wake-gpios = <&pio 1 3 GPIO_ACTIVE_HIGH>; /* PB3 */
- touchscreen-size-x = <800>;
- touchscreen-size-y = <480>;
- touchscreen-inverted-x;
- touchscreen-swapped-x-y;
- };
-
- /* ... */
-};
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/touchscreen/pixcir,pixcir_ts.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Pixcir Touchscreen Controller Device Tree Bindings
+
+maintainers:
+ - Dmitry Torokhov <dmitry.torokhov@gmail.com>
+
+allOf:
+ - $ref: touchscreen.yaml#
+
+properties:
+ compatible:
+ enum:
+ - pixcir,pixcir_ts
+ - pixcir,pixcir_tangoc
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ attb-gpio:
+ maxItems: 1
+
+ reset-gpios:
+ maxItems: 1
+
+ enable-gpios:
+ maxItems: 1
+
+ wake-gpios:
+ maxItems: 1
+
+unevaluatedProperties: false
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - attb-gpio
+ - touchscreen-size-x
+ - touchscreen-size-y
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ touchscreen@5c {
+ compatible = "pixcir,pixcir_ts";
+ reg = <0x5c>;
+ interrupts = <2 0>;
+ attb-gpio = <&gpf 2 0 2>;
+ touchscreen-size-x = <800>;
+ touchscreen-size-y = <600>;
+ };
+ };
+
+...
+++ /dev/null
-* Pixcir I2C touchscreen controllers
-
-Required properties:
-- compatible: must be "pixcir,pixcir_ts" or "pixcir,pixcir_tangoc"
-- reg: I2C address of the chip
-- interrupts: interrupt to which the chip is connected
-- attb-gpio: GPIO connected to the ATTB line of the chip
-- touchscreen-size-x: horizontal resolution of touchscreen (in pixels)
-- touchscreen-size-y: vertical resolution of touchscreen (in pixels)
-
-Optional properties:
-- reset-gpios: GPIO connected to the RESET line of the chip
-- enable-gpios: GPIO connected to the ENABLE line of the chip
-- wake-gpios: GPIO connected to the WAKE line of the chip
-
-Example:
-
- i2c@00000000 {
- /* ... */
-
- pixcir_ts@5c {
- compatible = "pixcir,pixcir_ts";
- reg = <0x5c>;
- interrupts = <2 0>;
- attb-gpio = <&gpf 2 0 2>;
- touchscreen-size-x = <800>;
- touchscreen-size-y = <600>;
- };
-
- /* ... */
- };
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/touchscreen/ti,tsc2005.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments TSC2004 and TSC2005 touchscreen controller bindings
+
+maintainers:
+ - Marek Vasut <marex@denx.de>
+ - Michael Welling <mwelling@ieee.org>
+
+properties:
+ $nodename:
+ pattern: "^touchscreen(@.*)?$"
+
+ compatible:
+ enum:
+ - ti,tsc2004
+ - ti,tsc2005
+
+ reg:
+ maxItems: 1
+ description: |
+ I2C address when used on the I2C bus, or the SPI chip select index
+ when used on the SPI bus
+
+ interrupts:
+ maxItems: 1
+
+ reset-gpios:
+ maxItems: 1
+ description: GPIO specifier for the controller reset line
+
+ spi-max-frequency:
+ description: TSC2005 SPI bus clock frequency.
+ maximum: 25000000
+
+ ti,x-plate-ohms:
+ description: resistance of the touchscreen's X plates in ohm (defaults to 280)
+
+ ti,esd-recovery-timeout-ms:
+ description: |
+ if the touchscreen does not respond after the configured time
+ (in milli seconds), the driver will reset it. This is disabled
+ by default.
+
+ vio-supply:
+ description: Regulator specifier
+
+ touchscreen-fuzz-pressure: true
+ touchscreen-fuzz-x: true
+ touchscreen-fuzz-y: true
+ touchscreen-max-pressure: true
+ touchscreen-size-x: true
+ touchscreen-size-y: true
+
+allOf:
+ - $ref: touchscreen.yaml#
+ - if:
+ properties:
+ compatible:
+ contains:
+ const: ti,tsc2004
+ then:
+ properties:
+ spi-max-frequency: false
+
+additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/gpio/gpio.h>
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ touchscreen@48 {
+ compatible = "ti,tsc2004";
+ reg = <0x48>;
+ vio-supply = <&vio>;
+
+ reset-gpios = <&gpio4 8 GPIO_ACTIVE_HIGH>;
+ interrupts-extended = <&gpio1 27 IRQ_TYPE_EDGE_RISING>;
+
+ touchscreen-fuzz-x = <4>;
+ touchscreen-fuzz-y = <7>;
+ touchscreen-fuzz-pressure = <2>;
+ touchscreen-size-x = <4096>;
+ touchscreen-size-y = <4096>;
+ touchscreen-max-pressure = <2048>;
+
+ ti,x-plate-ohms = <280>;
+ ti,esd-recovery-timeout-ms = <8000>;
+ };
+ };
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/gpio/gpio.h>
+ spi {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ touchscreen@0 {
+ compatible = "ti,tsc2005";
+ spi-max-frequency = <6000000>;
+ reg = <0>;
+
+ vio-supply = <&vio>;
+
+ reset-gpios = <&gpio4 8 GPIO_ACTIVE_HIGH>; /* 104 */
+ interrupts-extended = <&gpio4 4 IRQ_TYPE_EDGE_RISING>; /* 100 */
+
+ touchscreen-fuzz-x = <4>;
+ touchscreen-fuzz-y = <7>;
+ touchscreen-fuzz-pressure = <2>;
+ touchscreen-size-x = <4096>;
+ touchscreen-size-y = <4096>;
+ touchscreen-max-pressure = <2048>;
+
+ ti,x-plate-ohms = <280>;
+ ti,esd-recovery-timeout-ms = <8000>;
+ };
+ };
+++ /dev/null
-* Texas Instruments tsc2004 and tsc2005 touchscreen controllers
-
-Required properties:
- - compatible : "ti,tsc2004" or "ti,tsc2005"
- - reg : Device address
- - interrupts : IRQ specifier
- - spi-max-frequency : Maximum SPI clocking speed of the device
- (for tsc2005)
-
-Optional properties:
- - vio-supply : Regulator specifier
- - reset-gpios : GPIO specifier for the controller reset line
- - ti,x-plate-ohms : integer, resistance of the touchscreen's X plates
- in ohm (defaults to 280)
- - ti,esd-recovery-timeout-ms : integer, if the touchscreen does not respond after
- the configured time (in milli seconds), the driver
- will reset it. This is disabled by default.
- - properties defined in touchscreen.txt
-
-Example:
-
-&i2c3 {
- tsc2004@48 {
- compatible = "ti,tsc2004";
- reg = <0x48>;
- vio-supply = <&vio>;
-
- reset-gpios = <&gpio4 8 GPIO_ACTIVE_HIGH>;
- interrupts-extended = <&gpio1 27 IRQ_TYPE_EDGE_RISING>;
-
- touchscreen-fuzz-x = <4>;
- touchscreen-fuzz-y = <7>;
- touchscreen-fuzz-pressure = <2>;
- touchscreen-size-x = <4096>;
- touchscreen-size-y = <4096>;
- touchscreen-max-pressure = <2048>;
-
- ti,x-plate-ohms = <280>;
- ti,esd-recovery-timeout-ms = <8000>;
- };
-}
-
-&mcspi1 {
- tsc2005@0 {
- compatible = "ti,tsc2005";
- spi-max-frequency = <6000000>;
- reg = <0>;
-
- vio-supply = <&vio>;
-
- reset-gpios = <&gpio4 8 GPIO_ACTIVE_HIGH>; /* 104 */
- interrupts-extended = <&gpio4 4 IRQ_TYPE_EDGE_RISING>; /* 100 */
-
- touchscreen-fuzz-x = <4>;
- touchscreen-fuzz-y = <7>;
- touchscreen-fuzz-pressure = <2>;
- touchscreen-size-x = <4096>;
- touchscreen-size-y = <4096>;
- touchscreen-max-pressure = <2048>;
-
- ti,x-plate-ohms = <280>;
- ti,esd-recovery-timeout-ms = <8000>;
- };
-}
+++ /dev/null
-Qualcomm PON Device
-
-The Power On device for Qualcomm PM8xxx is MFD supporting pwrkey
-and resin along with the Android reboot-mode.
-
-This DT node has pwrkey and resin as sub nodes.
-
-Required Properties:
--compatible: Must be one of:
- "qcom,pm8916-pon"
- "qcom,pms405-pon"
- "qcom,pm8998-pon"
-
--reg: Specifies the physical address of the pon register
-
-Optional subnode:
--pwrkey: Specifies the subnode pwrkey and should follow the
- qcom,pm8941-pwrkey.txt description.
--resin: Specifies the subnode resin and should follow the
- qcom,pm8xxx-pwrkey.txt description.
-
-The rest of the properties should follow the generic reboot-mode description
-found in reboot-mode.txt
-
-Example:
-
- pon@800 {
- compatible = "qcom,pm8916-pon";
-
- reg = <0x800>;
- mode-bootloader = <0x2>;
- mode-recovery = <0x1>;
-
- pwrkey {
- compatible = "qcom,pm8941-pwrkey";
- interrupts = <0x0 0x8 0 IRQ_TYPE_EDGE_BOTH>;
- debounce = <15625>;
- bias-pull-up;
- linux,code = <KEY_POWER>;
- };
-
- resin {
- compatible = "qcom,pm8941-resin";
- interrupts = <0x0 0x8 1 IRQ_TYPE_EDGE_BOTH>;
- debounce = <15625>;
- bias-pull-up;
- linux,code = <KEY_VOLUMEDOWN>;
- };
- };
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/power/reset/qcom,pon.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm PON Device
+
+maintainers:
+ - Vinod Koul <vkoul@kernel.org>
+
+description: |
+ The Power On device for Qualcomm PM8xxx is MFD supporting pwrkey
+ and resin along with the Android reboot-mode.
+
+ This DT node has pwrkey and resin as sub nodes.
+
+allOf:
+ - $ref: reboot-mode.yaml#
+
+properties:
+ compatible:
+ enum:
+ - qcom,pm8916-pon
+ - qcom,pms405-pon
+ - qcom,pm8998-pon
+
+ reg:
+ maxItems: 1
+
+ pwrkey:
+ type: object
+ $ref: "../../input/qcom,pm8941-pwrkey.yaml#"
+
+ resin:
+ type: object
+ $ref: "../../input/qcom,pm8941-pwrkey.yaml#"
+
+required:
+ - compatible
+ - reg
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/input/linux-event-codes.h>
+ #include <dt-bindings/spmi/spmi.h>
+ spmi_bus: spmi@c440000 {
+ reg = <0x0c440000 0x1100>;
+ #address-cells = <2>;
+ #size-cells = <0>;
+ pmk8350: pmic@0 {
+ reg = <0x0 SPMI_USID>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ pmk8350_pon: pon_hlos@1300 {
+ reg = <0x1300>;
+ compatible = "qcom,pm8998-pon";
+
+ pwrkey {
+ compatible = "qcom,pm8941-pwrkey";
+ interrupts = < 0x0 0x8 0 IRQ_TYPE_EDGE_BOTH >;
+ debounce = <15625>;
+ bias-pull-up;
+ linux,code = <KEY_POWER>;
+ };
+
+ resin {
+ compatible = "qcom,pm8941-resin";
+ interrupts = <0x0 0x8 1 IRQ_TYPE_EDGE_BOTH>;
+ debounce = <15625>;
+ bias-pull-up;
+ linux,code = <KEY_VOLUMEDOWN>;
+ };
+ };
+ };
+ };
+...
"^mode-.*$":
$ref: /schemas/types.yaml#/definitions/uint32
-additionalProperties: false
+additionalProperties: true
examples:
- |
- enum:
- rockchip,px30-pwm
- rockchip,rk3308-pwm
+ - rockchip,rk3568-pwm
- const: rockchip,rk3328-pwm
reg:
- dallas,ds3232
# I2C-BUS INTERFACE REAL TIME CLOCK MODULE
- epson,rx8010
+ # I2C-BUS INTERFACE REAL TIME CLOCK MODULE
+ - epson,rx8025
+ - epson,rx8035
# I2C-BUS INTERFACE REAL TIME CLOCK MODULE with Battery Backed RAM
- epson,rx8571
# I2C-BUS INTERFACE REAL TIME CLOCK MODULE
if:
properties:
compatible:
- oneOf:
- - const: ti,omap2-mcspi
- - const: ti,omap4-mcspi
+ enum:
+ - ti,omap2-mcspi
+ - ti,omap4-mcspi
then:
properties:
xlnx,num-ss-bits:
description: Number of chip selects used.
- $ref: /schemas/types.yaml#/definitions/uint32
minimum: 1
maximum: 32
xlnx,num-transfer-bits:
description: Number of bits per transfer. This will be 8 if not specified.
- $ref: /schemas/types.yaml#/definitions/uint32
enum: [8, 16, 32]
default: 8
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright 2021 Linaro Ltd.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/thermal/qcom-lmh.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Limits Management Hardware(LMh)
+
+maintainers:
+ - Thara Gopinath <thara.gopinath@linaro.org>
+
+description:
+ Limits Management Hardware(LMh) is a hardware infrastructure on some
+ Qualcomm SoCs that can enforce temperature and current limits as
+ programmed by software for certain IPs like CPU.
+
+properties:
+ compatible:
+ enum:
+ - qcom,sdm845-lmh
+
+ reg:
+ items:
+ - description: core registers
+
+ interrupts:
+ maxItems: 1
+
+ '#interrupt-cells':
+ const: 1
+
+ interrupt-controller: true
+
+ cpus:
+ description:
+ phandle of the first cpu in the LMh cluster
+ $ref: /schemas/types.yaml#/definitions/phandle
+
+ qcom,lmh-temp-arm-millicelsius:
+ description:
+ An integer expressing temperature threshold at which the LMh thermal
+ FSM is engaged.
+
+ qcom,lmh-temp-low-millicelsius:
+ description:
+ An integer expressing temperature threshold at which the state machine
+ will attempt to remove frequency throttling.
+
+ qcom,lmh-temp-high-millicelsius:
+ description:
+ An integer expressing temperature threshold at which the state machine
+ will attempt to throttle the frequency.
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - '#interrupt-cells'
+ - interrupt-controller
+ - cpus
+ - qcom,lmh-temp-arm-millicelsius
+ - qcom,lmh-temp-low-millicelsius
+ - qcom,lmh-temp-high-millicelsius
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+ lmh@17d70800 {
+ compatible = "qcom,sdm845-lmh";
+ reg = <0x17d70800 0x400>;
+ interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>;
+ cpus = <&CPU4>;
+ qcom,lmh-temp-arm-millicelsius = <65000>;
+ qcom,lmh-temp-low-millicelsius = <94500>;
+ qcom,lmh-temp-high-millicelsius = <95000>;
+ interrupt-controller;
+ #interrupt-cells = <1>;
+ };
- polling-delay
- polling-delay-passive
- thermal-sensors
- - trips
+
additionalProperties: false
additionalProperties: false
- reg
- interrupts
-additionalProperties: false
+additionalProperties:
+ type: object
examples:
- |
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/virtio/virtio-device.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Virtio device bindings
+
+maintainers:
+ - Viresh Kumar <viresh.kumar@linaro.org>
+
+description:
+ These bindings are applicable to virtio devices irrespective of the bus they
+ are bound to, like mmio or pci.
+
+# We need a select here so we don't match all nodes with 'virtio,mmio'
+properties:
+ compatible:
+ pattern: "^virtio,device[0-9a-f]{1,8}$"
+ description: Virtio device nodes.
+ "virtio,deviceID", where ID is the virtio device id. The textual
+ representation of ID shall be in lower case hexadecimal with leading
+ zeroes suppressed.
+
+required:
+ - compatible
+
+additionalProperties: true
+
+examples:
+ - |
+ virtio@3000 {
+ compatible = "virtio,mmio";
+ reg = <0x3000 0x100>;
+ interrupts = <43>;
+
+ i2c {
+ compatible = "virtio,device22";
+ };
+ };
+...
properties:
compatible:
- oneOf:
- - const: maxim,max6369
- - const: maxim,max6370
- - const: maxim,max6371
- - const: maxim,max6372
- - const: maxim,max6373
- - const: maxim,max6374
+ enum:
+ - maxim,max6369
+ - maxim,max6370
+ - maxim,max6371
+ - maxim,max6372
+ - maxim,max6373
+ - maxim,max6374
reg:
description: This is a 1-byte memory-mapped address
| openrisc: | TODO |
| parisc: | ok |
| powerpc: | ok |
- | riscv: | TODO |
+ | riscv: | ok |
| s390: | ok |
| sh: | TODO |
| sparc: | TODO |
.. kernel-doc:: fs/fs-writeback.c
:export:
-.. kernel-doc:: fs/block_dev.c
- :export:
-
.. kernel-doc:: fs/anon_inodes.c
:export:
iommu
media/index
sysfs-platform_profile
+ vduse
.. only:: subproject and html
'z' 10-4F drivers/s390/crypto/zcrypt_api.h conflict!
'|' 00-7F linux/media.h
0x80 00-1F linux/fb.h
+0x81 00-1F linux/vduse.h
0x89 00-06 arch/x86/include/asm/sockios.h
0x89 0B-DF linux/sockios.h
0x89 E0-EF linux/sockios.h SIOCPROTOPRIVATE range
--- /dev/null
+==================================
+VDUSE - "vDPA Device in Userspace"
+==================================
+
+vDPA (virtio data path acceleration) device is a device that uses a
+datapath which complies with the virtio specifications with vendor
+specific control path. vDPA devices can be both physically located on
+the hardware or emulated by software. VDUSE is a framework that makes it
+possible to implement software-emulated vDPA devices in userspace. And
+to make the device emulation more secure, the emulated vDPA device's
+control path is handled in the kernel and only the data path is
+implemented in the userspace.
+
+Note that only virtio block device is supported by VDUSE framework now,
+which can reduce security risks when the userspace process that implements
+the data path is run by an unprivileged user. The support for other device
+types can be added after the security issue of corresponding device driver
+is clarified or fixed in the future.
+
+Create/Destroy VDUSE devices
+------------------------
+
+VDUSE devices are created as follows:
+
+1. Create a new VDUSE instance with ioctl(VDUSE_CREATE_DEV) on
+ /dev/vduse/control.
+
+2. Setup each virtqueue with ioctl(VDUSE_VQ_SETUP) on /dev/vduse/$NAME.
+
+3. Begin processing VDUSE messages from /dev/vduse/$NAME. The first
+ messages will arrive while attaching the VDUSE instance to vDPA bus.
+
+4. Send the VDPA_CMD_DEV_NEW netlink message to attach the VDUSE
+ instance to vDPA bus.
+
+VDUSE devices are destroyed as follows:
+
+1. Send the VDPA_CMD_DEV_DEL netlink message to detach the VDUSE
+ instance from vDPA bus.
+
+2. Close the file descriptor referring to /dev/vduse/$NAME.
+
+3. Destroy the VDUSE instance with ioctl(VDUSE_DESTROY_DEV) on
+ /dev/vduse/control.
+
+The netlink messages can be sent via vdpa tool in iproute2 or use the
+below sample codes:
+
+.. code-block:: c
+
+ static int netlink_add_vduse(const char *name, enum vdpa_command cmd)
+ {
+ struct nl_sock *nlsock;
+ struct nl_msg *msg;
+ int famid;
+
+ nlsock = nl_socket_alloc();
+ if (!nlsock)
+ return -ENOMEM;
+
+ if (genl_connect(nlsock))
+ goto free_sock;
+
+ famid = genl_ctrl_resolve(nlsock, VDPA_GENL_NAME);
+ if (famid < 0)
+ goto close_sock;
+
+ msg = nlmsg_alloc();
+ if (!msg)
+ goto close_sock;
+
+ if (!genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, famid, 0, 0, cmd, 0))
+ goto nla_put_failure;
+
+ NLA_PUT_STRING(msg, VDPA_ATTR_DEV_NAME, name);
+ if (cmd == VDPA_CMD_DEV_NEW)
+ NLA_PUT_STRING(msg, VDPA_ATTR_MGMTDEV_DEV_NAME, "vduse");
+
+ if (nl_send_sync(nlsock, msg))
+ goto close_sock;
+
+ nl_close(nlsock);
+ nl_socket_free(nlsock);
+
+ return 0;
+ nla_put_failure:
+ nlmsg_free(msg);
+ close_sock:
+ nl_close(nlsock);
+ free_sock:
+ nl_socket_free(nlsock);
+ return -1;
+ }
+
+How VDUSE works
+---------------
+
+As mentioned above, a VDUSE device is created by ioctl(VDUSE_CREATE_DEV) on
+/dev/vduse/control. With this ioctl, userspace can specify some basic configuration
+such as device name (uniquely identify a VDUSE device), virtio features, virtio
+configuration space, the number of virtqueues and so on for this emulated device.
+Then a char device interface (/dev/vduse/$NAME) is exported to userspace for device
+emulation. Userspace can use the VDUSE_VQ_SETUP ioctl on /dev/vduse/$NAME to
+add per-virtqueue configuration such as the max size of virtqueue to the device.
+
+After the initialization, the VDUSE device can be attached to vDPA bus via
+the VDPA_CMD_DEV_NEW netlink message. Userspace needs to read()/write() on
+/dev/vduse/$NAME to receive/reply some control messages from/to VDUSE kernel
+module as follows:
+
+.. code-block:: c
+
+ static int vduse_message_handler(int dev_fd)
+ {
+ int len;
+ struct vduse_dev_request req;
+ struct vduse_dev_response resp;
+
+ len = read(dev_fd, &req, sizeof(req));
+ if (len != sizeof(req))
+ return -1;
+
+ resp.request_id = req.request_id;
+
+ switch (req.type) {
+
+ /* handle different types of messages */
+
+ }
+
+ len = write(dev_fd, &resp, sizeof(resp));
+ if (len != sizeof(resp))
+ return -1;
+
+ return 0;
+ }
+
+There are now three types of messages introduced by VDUSE framework:
+
+- VDUSE_GET_VQ_STATE: Get the state for virtqueue, userspace should return
+ avail index for split virtqueue or the device/driver ring wrap counters and
+ the avail and used index for packed virtqueue.
+
+- VDUSE_SET_STATUS: Set the device status, userspace should follow
+ the virtio spec: https://docs.oasis-open.org/virtio/virtio/v1.1/virtio-v1.1.html
+ to process this message. For example, fail to set the FEATURES_OK device
+ status bit if the device can not accept the negotiated virtio features
+ get from the VDUSE_DEV_GET_FEATURES ioctl.
+
+- VDUSE_UPDATE_IOTLB: Notify userspace to update the memory mapping for specified
+ IOVA range, userspace should firstly remove the old mapping, then setup the new
+ mapping via the VDUSE_IOTLB_GET_FD ioctl.
+
+After DRIVER_OK status bit is set via the VDUSE_SET_STATUS message, userspace is
+able to start the dataplane processing as follows:
+
+1. Get the specified virtqueue's information with the VDUSE_VQ_GET_INFO ioctl,
+ including the size, the IOVAs of descriptor table, available ring and used ring,
+ the state and the ready status.
+
+2. Pass the above IOVAs to the VDUSE_IOTLB_GET_FD ioctl so that those IOVA regions
+ can be mapped into userspace. Some sample codes is shown below:
+
+.. code-block:: c
+
+ static int perm_to_prot(uint8_t perm)
+ {
+ int prot = 0;
+
+ switch (perm) {
+ case VDUSE_ACCESS_WO:
+ prot |= PROT_WRITE;
+ break;
+ case VDUSE_ACCESS_RO:
+ prot |= PROT_READ;
+ break;
+ case VDUSE_ACCESS_RW:
+ prot |= PROT_READ | PROT_WRITE;
+ break;
+ }
+
+ return prot;
+ }
+
+ static void *iova_to_va(int dev_fd, uint64_t iova, uint64_t *len)
+ {
+ int fd;
+ void *addr;
+ size_t size;
+ struct vduse_iotlb_entry entry;
+
+ entry.start = iova;
+ entry.last = iova;
+
+ /*
+ * Find the first IOVA region that overlaps with the specified
+ * range [start, last] and return the corresponding file descriptor.
+ */
+ fd = ioctl(dev_fd, VDUSE_IOTLB_GET_FD, &entry);
+ if (fd < 0)
+ return NULL;
+
+ size = entry.last - entry.start + 1;
+ *len = entry.last - iova + 1;
+ addr = mmap(0, size, perm_to_prot(entry.perm), MAP_SHARED,
+ fd, entry.offset);
+ close(fd);
+ if (addr == MAP_FAILED)
+ return NULL;
+
+ /*
+ * Using some data structures such as linked list to store
+ * the iotlb mapping. The munmap(2) should be called for the
+ * cached mapping when the corresponding VDUSE_UPDATE_IOTLB
+ * message is received or the device is reset.
+ */
+
+ return addr + iova - entry.start;
+ }
+
+3. Setup the kick eventfd for the specified virtqueues with the VDUSE_VQ_SETUP_KICKFD
+ ioctl. The kick eventfd is used by VDUSE kernel module to notify userspace to
+ consume the available ring. This is optional since userspace can choose to poll the
+ available ring instead.
+
+4. Listen to the kick eventfd (optional) and consume the available ring. The buffer
+ described by the descriptors in the descriptor table should be also mapped into
+ userspace via the VDUSE_IOTLB_GET_FD ioctl before accessing.
+
+5. Inject an interrupt for specific virtqueue with the VDUSE_INJECT_VQ_IRQ ioctl
+ after the used ring is filled.
+
+For more details on the uAPI, please see include/uapi/linux/vduse.h.
T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
F: block/
F: drivers/block/
-F: fs/block_dev.c
F: include/linux/blk*
F: kernel/trace/blktrace.c
F: lib/sbitmap.c
F: drivers/ata/sata_promise.*
LIBATA SUBSYSTEM (Serial and Parallel ATA drivers)
-M: Jens Axboe <axboe@kernel.dk>
+M: Damien Le Moal <damien.lemoal@opensource.wdc.com>
L: linux-ide@vger.kernel.org
S: Maintained
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
+T: git git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/libata.git
F: Documentation/devicetree/bindings/ata/
F: drivers/ata/
F: include/linux/ata.h
NVM EXPRESS TARGET DRIVER
M: Christoph Hellwig <hch@lst.de>
M: Sagi Grimberg <sagi@grimberg.me>
-M: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
+M: Chaitanya Kulkarni <kch@nvidia.com>
L: linux-nvme@lists.infradead.org
S: Supported
W: http://git.infradead.org/nvme.git
F: include/linux/cpu_cooling.h
F: include/linux/thermal.h
F: include/uapi/linux/thermal.h
+F: tools/thermal/
THERMAL DRIVER FOR AMLOGIC SOCS
M: Guillaume La Roque <glaroque@baylibre.com>
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
+ select BUILDTIME_TABLE_SORT if MMU
select CLONE_BACKWARDS
select CLINT_TIMER if !MMU
select COMMON_CLK
Image.%: Image
$(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
-zinstall install:
- $(Q)$(MAKE) $(build)=$(boot) $@
+install: install-image = Image
+zinstall: install-image = Image.gz
+install zinstall:
+ $(CONFIG_SHELL) $(srctree)/$(boot)/install.sh $(KERNELRELEASE) \
+ $(boot)/$(install-image) System.map "$(INSTALL_PATH)"
archclean:
$(Q)$(MAKE) $(clean)=$(boot)
$(obj)/loader.bin: $(obj)/loader FORCE
$(call if_changed,objcopy)
-
-install:
- $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
- $(obj)/Image System.map "$(INSTALL_PATH)"
-
-zinstall:
- $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
- $(obj)/Image.gz System.map "$(INSTALL_PATH)"
aliases {
ethernet0 = &emac1;
+ serial0 = &serial0;
+ serial1 = &serial1;
+ serial2 = &serial2;
+ serial3 = &serial3;
};
chosen {
- stdout-path = &serial0;
+ stdout-path = "serial0:115200n8";
};
cpus {
CONFIG_PCIEPORTBUS=y
CONFIG_PCI_HOST_GENERIC=y
CONFIG_PCIE_XILINX=y
+CONFIG_PCIE_FU740=y
CONFIG_DEVTMPFS=y
CONFIG_DEVTMPFS_MOUNT=y
CONFIG_BLK_DEV_LOOP=y
CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_NVME=m
CONFIG_BLK_DEV_SD=y
CONFIG_BLK_DEV_SR=y
CONFIG_SCSI_VIRTIO=y
CONFIG_NFS_V4_2=y
CONFIG_ROOT_NFS=y
CONFIG_9P_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=m
CONFIG_CRYPTO_USER_API_HASH=y
CONFIG_CRYPTO_DEV_VIRTIO=y
CONFIG_PRINTK_TIME=y
*/
#define ELF_ET_DYN_BASE ((TASK_SIZE / 3) * 2)
+#ifdef CONFIG_64BIT
+#define STACK_RND_MASK (0x3ffff >> (PAGE_SHIFT - 12))
+#endif
/*
* This yields a mask that user programs can use to figure out what
* instruction set this CPU supports. This could be done in user space,
}
BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0)
- EXCEPTION_TABLE(0x10)
.rel.dyn : AT(ADDR(.rel.dyn) - LOAD_OFFSET) {
*(.rel.dyn*)
* Copyright (C) 2017 SiFive
*/
+#define RO_EXCEPTION_TABLE_ALIGN 16
+
#ifdef CONFIG_XIP_KERNEL
#include "vmlinux-xip.lds.S"
#else
*(.srodata*)
}
- EXCEPTION_TABLE(0x10)
-
. = ALIGN(SECTION_ALIGN);
_data = .;
# Makefile for the kernel block layer
#
-obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-sysfs.o \
+obj-$(CONFIG_BLOCK) := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \
blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
blk-exec.o blk-merge.o blk-timeout.o \
blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
+ * Copyright (C) 2016 - 2020 Christoph Hellwig
+ */
+
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/kmod.h>
+#include <linux/major.h>
+#include <linux/device_cgroup.h>
+#include <linux/blkdev.h>
+#include <linux/backing-dev.h>
+#include <linux/module.h>
+#include <linux/blkpg.h>
+#include <linux/magic.h>
+#include <linux/buffer_head.h>
+#include <linux/swap.h>
+#include <linux/writeback.h>
+#include <linux/mount.h>
+#include <linux/pseudo_fs.h>
+#include <linux/uio.h>
+#include <linux/namei.h>
+#include <linux/cleancache.h>
+#include <linux/part_stat.h>
+#include <linux/uaccess.h>
+#include "../fs/internal.h"
+#include "blk.h"
+
+struct bdev_inode {
+ struct block_device bdev;
+ struct inode vfs_inode;
+};
+
+static inline struct bdev_inode *BDEV_I(struct inode *inode)
+{
+ return container_of(inode, struct bdev_inode, vfs_inode);
+}
+
+struct block_device *I_BDEV(struct inode *inode)
+{
+ return &BDEV_I(inode)->bdev;
+}
+EXPORT_SYMBOL(I_BDEV);
+
+static void bdev_write_inode(struct block_device *bdev)
+{
+ struct inode *inode = bdev->bd_inode;
+ int ret;
+
+ spin_lock(&inode->i_lock);
+ while (inode->i_state & I_DIRTY) {
+ spin_unlock(&inode->i_lock);
+ ret = write_inode_now(inode, true);
+ if (ret) {
+ char name[BDEVNAME_SIZE];
+ pr_warn_ratelimited("VFS: Dirty inode writeback failed "
+ "for block device %s (err=%d).\n",
+ bdevname(bdev, name), ret);
+ }
+ spin_lock(&inode->i_lock);
+ }
+ spin_unlock(&inode->i_lock);
+}
+
+/* Kill _all_ buffers and pagecache , dirty or not.. */
+static void kill_bdev(struct block_device *bdev)
+{
+ struct address_space *mapping = bdev->bd_inode->i_mapping;
+
+ if (mapping_empty(mapping))
+ return;
+
+ invalidate_bh_lrus();
+ truncate_inode_pages(mapping, 0);
+}
+
+/* Invalidate clean unused buffers and pagecache. */
+void invalidate_bdev(struct block_device *bdev)
+{
+ struct address_space *mapping = bdev->bd_inode->i_mapping;
+
+ if (mapping->nrpages) {
+ invalidate_bh_lrus();
+ lru_add_drain_all(); /* make sure all lru add caches are flushed */
+ invalidate_mapping_pages(mapping, 0, -1);
+ }
+ /* 99% of the time, we don't need to flush the cleancache on the bdev.
+ * But, for the strange corners, lets be cautious
+ */
+ cleancache_invalidate_inode(mapping);
+}
+EXPORT_SYMBOL(invalidate_bdev);
+
+/*
+ * Drop all buffers & page cache for given bdev range. This function bails
+ * with error if bdev has other exclusive owner (such as filesystem).
+ */
+int truncate_bdev_range(struct block_device *bdev, fmode_t mode,
+ loff_t lstart, loff_t lend)
+{
+ /*
+ * If we don't hold exclusive handle for the device, upgrade to it
+ * while we discard the buffer cache to avoid discarding buffers
+ * under live filesystem.
+ */
+ if (!(mode & FMODE_EXCL)) {
+ int err = bd_prepare_to_claim(bdev, truncate_bdev_range);
+ if (err)
+ goto invalidate;
+ }
+
+ truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend);
+ if (!(mode & FMODE_EXCL))
+ bd_abort_claiming(bdev, truncate_bdev_range);
+ return 0;
+
+invalidate:
+ /*
+ * Someone else has handle exclusively open. Try invalidating instead.
+ * The 'end' argument is inclusive so the rounding is safe.
+ */
+ return invalidate_inode_pages2_range(bdev->bd_inode->i_mapping,
+ lstart >> PAGE_SHIFT,
+ lend >> PAGE_SHIFT);
+}
+
+static void set_init_blocksize(struct block_device *bdev)
+{
+ unsigned int bsize = bdev_logical_block_size(bdev);
+ loff_t size = i_size_read(bdev->bd_inode);
+
+ while (bsize < PAGE_SIZE) {
+ if (size & bsize)
+ break;
+ bsize <<= 1;
+ }
+ bdev->bd_inode->i_blkbits = blksize_bits(bsize);
+}
+
+int set_blocksize(struct block_device *bdev, int size)
+{
+ /* Size must be a power of two, and between 512 and PAGE_SIZE */
+ if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
+ return -EINVAL;
+
+ /* Size cannot be smaller than the size supported by the device */
+ if (size < bdev_logical_block_size(bdev))
+ return -EINVAL;
+
+ /* Don't change the size if it is same as current */
+ if (bdev->bd_inode->i_blkbits != blksize_bits(size)) {
+ sync_blockdev(bdev);
+ bdev->bd_inode->i_blkbits = blksize_bits(size);
+ kill_bdev(bdev);
+ }
+ return 0;
+}
+
+EXPORT_SYMBOL(set_blocksize);
+
+int sb_set_blocksize(struct super_block *sb, int size)
+{
+ if (set_blocksize(sb->s_bdev, size))
+ return 0;
+ /* If we get here, we know size is power of two
+ * and it's value is between 512 and PAGE_SIZE */
+ sb->s_blocksize = size;
+ sb->s_blocksize_bits = blksize_bits(size);
+ return sb->s_blocksize;
+}
+
+EXPORT_SYMBOL(sb_set_blocksize);
+
+int sb_min_blocksize(struct super_block *sb, int size)
+{
+ int minsize = bdev_logical_block_size(sb->s_bdev);
+ if (size < minsize)
+ size = minsize;
+ return sb_set_blocksize(sb, size);
+}
+
+EXPORT_SYMBOL(sb_min_blocksize);
+
+int __sync_blockdev(struct block_device *bdev, int wait)
+{
+ if (!bdev)
+ return 0;
+ if (!wait)
+ return filemap_flush(bdev->bd_inode->i_mapping);
+ return filemap_write_and_wait(bdev->bd_inode->i_mapping);
+}
+
+/*
+ * Write out and wait upon all the dirty data associated with a block
+ * device via its mapping. Does not take the superblock lock.
+ */
+int sync_blockdev(struct block_device *bdev)
+{
+ return __sync_blockdev(bdev, 1);
+}
+EXPORT_SYMBOL(sync_blockdev);
+
+/*
+ * Write out and wait upon all dirty data associated with this
+ * device. Filesystem data as well as the underlying block
+ * device. Takes the superblock lock.
+ */
+int fsync_bdev(struct block_device *bdev)
+{
+ struct super_block *sb = get_super(bdev);
+ if (sb) {
+ int res = sync_filesystem(sb);
+ drop_super(sb);
+ return res;
+ }
+ return sync_blockdev(bdev);
+}
+EXPORT_SYMBOL(fsync_bdev);
+
+/**
+ * freeze_bdev -- lock a filesystem and force it into a consistent state
+ * @bdev: blockdevice to lock
+ *
+ * If a superblock is found on this device, we take the s_umount semaphore
+ * on it to make sure nobody unmounts until the snapshot creation is done.
+ * The reference counter (bd_fsfreeze_count) guarantees that only the last
+ * unfreeze process can unfreeze the frozen filesystem actually when multiple
+ * freeze requests arrive simultaneously. It counts up in freeze_bdev() and
+ * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze
+ * actually.
+ */
+int freeze_bdev(struct block_device *bdev)
+{
+ struct super_block *sb;
+ int error = 0;
+
+ mutex_lock(&bdev->bd_fsfreeze_mutex);
+ if (++bdev->bd_fsfreeze_count > 1)
+ goto done;
+
+ sb = get_active_super(bdev);
+ if (!sb)
+ goto sync;
+ if (sb->s_op->freeze_super)
+ error = sb->s_op->freeze_super(sb);
+ else
+ error = freeze_super(sb);
+ deactivate_super(sb);
+
+ if (error) {
+ bdev->bd_fsfreeze_count--;
+ goto done;
+ }
+ bdev->bd_fsfreeze_sb = sb;
+
+sync:
+ sync_blockdev(bdev);
+done:
+ mutex_unlock(&bdev->bd_fsfreeze_mutex);
+ return error;
+}
+EXPORT_SYMBOL(freeze_bdev);
+
+/**
+ * thaw_bdev -- unlock filesystem
+ * @bdev: blockdevice to unlock
+ *
+ * Unlocks the filesystem and marks it writeable again after freeze_bdev().
+ */
+int thaw_bdev(struct block_device *bdev)
+{
+ struct super_block *sb;
+ int error = -EINVAL;
+
+ mutex_lock(&bdev->bd_fsfreeze_mutex);
+ if (!bdev->bd_fsfreeze_count)
+ goto out;
+
+ error = 0;
+ if (--bdev->bd_fsfreeze_count > 0)
+ goto out;
+
+ sb = bdev->bd_fsfreeze_sb;
+ if (!sb)
+ goto out;
+
+ if (sb->s_op->thaw_super)
+ error = sb->s_op->thaw_super(sb);
+ else
+ error = thaw_super(sb);
+ if (error)
+ bdev->bd_fsfreeze_count++;
+ else
+ bdev->bd_fsfreeze_sb = NULL;
+out:
+ mutex_unlock(&bdev->bd_fsfreeze_mutex);
+ return error;
+}
+EXPORT_SYMBOL(thaw_bdev);
+
+/**
+ * bdev_read_page() - Start reading a page from a block device
+ * @bdev: The device to read the page from
+ * @sector: The offset on the device to read the page to (need not be aligned)
+ * @page: The page to read
+ *
+ * On entry, the page should be locked. It will be unlocked when the page
+ * has been read. If the block driver implements rw_page synchronously,
+ * that will be true on exit from this function, but it need not be.
+ *
+ * Errors returned by this function are usually "soft", eg out of memory, or
+ * queue full; callers should try a different route to read this page rather
+ * than propagate an error back up the stack.
+ *
+ * Return: negative errno if an error occurs, 0 if submission was successful.
+ */
+int bdev_read_page(struct block_device *bdev, sector_t sector,
+ struct page *page)
+{
+ const struct block_device_operations *ops = bdev->bd_disk->fops;
+ int result = -EOPNOTSUPP;
+
+ if (!ops->rw_page || bdev_get_integrity(bdev))
+ return result;
+
+ result = blk_queue_enter(bdev->bd_disk->queue, 0);
+ if (result)
+ return result;
+ result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
+ REQ_OP_READ);
+ blk_queue_exit(bdev->bd_disk->queue);
+ return result;
+}
+
+/**
+ * bdev_write_page() - Start writing a page to a block device
+ * @bdev: The device to write the page to
+ * @sector: The offset on the device to write the page to (need not be aligned)
+ * @page: The page to write
+ * @wbc: The writeback_control for the write
+ *
+ * On entry, the page should be locked and not currently under writeback.
+ * On exit, if the write started successfully, the page will be unlocked and
+ * under writeback. If the write failed already (eg the driver failed to
+ * queue the page to the device), the page will still be locked. If the
+ * caller is a ->writepage implementation, it will need to unlock the page.
+ *
+ * Errors returned by this function are usually "soft", eg out of memory, or
+ * queue full; callers should try a different route to write this page rather
+ * than propagate an error back up the stack.
+ *
+ * Return: negative errno if an error occurs, 0 if submission was successful.
+ */
+int bdev_write_page(struct block_device *bdev, sector_t sector,
+ struct page *page, struct writeback_control *wbc)
+{
+ int result;
+ const struct block_device_operations *ops = bdev->bd_disk->fops;
+
+ if (!ops->rw_page || bdev_get_integrity(bdev))
+ return -EOPNOTSUPP;
+ result = blk_queue_enter(bdev->bd_disk->queue, 0);
+ if (result)
+ return result;
+
+ set_page_writeback(page);
+ result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
+ REQ_OP_WRITE);
+ if (result) {
+ end_page_writeback(page);
+ } else {
+ clean_page_buffers(page);
+ unlock_page(page);
+ }
+ blk_queue_exit(bdev->bd_disk->queue);
+ return result;
+}
+
+/*
+ * pseudo-fs
+ */
+
+static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
+static struct kmem_cache * bdev_cachep __read_mostly;
+
+static struct inode *bdev_alloc_inode(struct super_block *sb)
+{
+ struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
+
+ if (!ei)
+ return NULL;
+ memset(&ei->bdev, 0, sizeof(ei->bdev));
+ return &ei->vfs_inode;
+}
+
+static void bdev_free_inode(struct inode *inode)
+{
+ struct block_device *bdev = I_BDEV(inode);
+
+ free_percpu(bdev->bd_stats);
+ kfree(bdev->bd_meta_info);
+
+ if (!bdev_is_partition(bdev)) {
+ if (bdev->bd_disk && bdev->bd_disk->bdi)
+ bdi_put(bdev->bd_disk->bdi);
+ kfree(bdev->bd_disk);
+ }
+
+ if (MAJOR(bdev->bd_dev) == BLOCK_EXT_MAJOR)
+ blk_free_ext_minor(MINOR(bdev->bd_dev));
+
+ kmem_cache_free(bdev_cachep, BDEV_I(inode));
+}
+
+static void init_once(void *data)
+{
+ struct bdev_inode *ei = data;
+
+ inode_init_once(&ei->vfs_inode);
+}
+
+static void bdev_evict_inode(struct inode *inode)
+{
+ truncate_inode_pages_final(&inode->i_data);
+ invalidate_inode_buffers(inode); /* is it needed here? */
+ clear_inode(inode);
+}
+
+static const struct super_operations bdev_sops = {
+ .statfs = simple_statfs,
+ .alloc_inode = bdev_alloc_inode,
+ .free_inode = bdev_free_inode,
+ .drop_inode = generic_delete_inode,
+ .evict_inode = bdev_evict_inode,
+};
+
+static int bd_init_fs_context(struct fs_context *fc)
+{
+ struct pseudo_fs_context *ctx = init_pseudo(fc, BDEVFS_MAGIC);
+ if (!ctx)
+ return -ENOMEM;
+ fc->s_iflags |= SB_I_CGROUPWB;
+ ctx->ops = &bdev_sops;
+ return 0;
+}
+
+static struct file_system_type bd_type = {
+ .name = "bdev",
+ .init_fs_context = bd_init_fs_context,
+ .kill_sb = kill_anon_super,
+};
+
+struct super_block *blockdev_superblock __read_mostly;
+EXPORT_SYMBOL_GPL(blockdev_superblock);
+
+void __init bdev_cache_init(void)
+{
+ int err;
+ static struct vfsmount *bd_mnt;
+
+ bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
+ 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
+ SLAB_MEM_SPREAD|SLAB_ACCOUNT|SLAB_PANIC),
+ init_once);
+ err = register_filesystem(&bd_type);
+ if (err)
+ panic("Cannot register bdev pseudo-fs");
+ bd_mnt = kern_mount(&bd_type);
+ if (IS_ERR(bd_mnt))
+ panic("Cannot create bdev pseudo-fs");
+ blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */
+}
+
+struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
+{
+ struct block_device *bdev;
+ struct inode *inode;
+
+ inode = new_inode(blockdev_superblock);
+ if (!inode)
+ return NULL;
+ inode->i_mode = S_IFBLK;
+ inode->i_rdev = 0;
+ inode->i_data.a_ops = &def_blk_aops;
+ mapping_set_gfp_mask(&inode->i_data, GFP_USER);
+
+ bdev = I_BDEV(inode);
+ mutex_init(&bdev->bd_fsfreeze_mutex);
+ spin_lock_init(&bdev->bd_size_lock);
+ bdev->bd_disk = disk;
+ bdev->bd_partno = partno;
+ bdev->bd_inode = inode;
+ bdev->bd_stats = alloc_percpu(struct disk_stats);
+ if (!bdev->bd_stats) {
+ iput(inode);
+ return NULL;
+ }
+ return bdev;
+}
+
+void bdev_add(struct block_device *bdev, dev_t dev)
+{
+ bdev->bd_dev = dev;
+ bdev->bd_inode->i_rdev = dev;
+ bdev->bd_inode->i_ino = dev;
+ insert_inode_hash(bdev->bd_inode);
+}
+
+long nr_blockdev_pages(void)
+{
+ struct inode *inode;
+ long ret = 0;
+
+ spin_lock(&blockdev_superblock->s_inode_list_lock);
+ list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list)
+ ret += inode->i_mapping->nrpages;
+ spin_unlock(&blockdev_superblock->s_inode_list_lock);
+
+ return ret;
+}
+
+/**
+ * bd_may_claim - test whether a block device can be claimed
+ * @bdev: block device of interest
+ * @whole: whole block device containing @bdev, may equal @bdev
+ * @holder: holder trying to claim @bdev
+ *
+ * Test whether @bdev can be claimed by @holder.
+ *
+ * CONTEXT:
+ * spin_lock(&bdev_lock).
+ *
+ * RETURNS:
+ * %true if @bdev can be claimed, %false otherwise.
+ */
+static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
+ void *holder)
+{
+ if (bdev->bd_holder == holder)
+ return true; /* already a holder */
+ else if (bdev->bd_holder != NULL)
+ return false; /* held by someone else */
+ else if (whole == bdev)
+ return true; /* is a whole device which isn't held */
+
+ else if (whole->bd_holder == bd_may_claim)
+ return true; /* is a partition of a device that is being partitioned */
+ else if (whole->bd_holder != NULL)
+ return false; /* is a partition of a held device */
+ else
+ return true; /* is a partition of an un-held device */
+}
+
+/**
+ * bd_prepare_to_claim - claim a block device
+ * @bdev: block device of interest
+ * @holder: holder trying to claim @bdev
+ *
+ * Claim @bdev. This function fails if @bdev is already claimed by another
+ * holder and waits if another claiming is in progress. return, the caller
+ * has ownership of bd_claiming and bd_holder[s].
+ *
+ * RETURNS:
+ * 0 if @bdev can be claimed, -EBUSY otherwise.
+ */
+int bd_prepare_to_claim(struct block_device *bdev, void *holder)
+{
+ struct block_device *whole = bdev_whole(bdev);
+
+ if (WARN_ON_ONCE(!holder))
+ return -EINVAL;
+retry:
+ spin_lock(&bdev_lock);
+ /* if someone else claimed, fail */
+ if (!bd_may_claim(bdev, whole, holder)) {
+ spin_unlock(&bdev_lock);
+ return -EBUSY;
+ }
+
+ /* if claiming is already in progress, wait for it to finish */
+ if (whole->bd_claiming) {
+ wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
+ DEFINE_WAIT(wait);
+
+ prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
+ spin_unlock(&bdev_lock);
+ schedule();
+ finish_wait(wq, &wait);
+ goto retry;
+ }
+
+ /* yay, all mine */
+ whole->bd_claiming = holder;
+ spin_unlock(&bdev_lock);
+ return 0;
+}
+EXPORT_SYMBOL_GPL(bd_prepare_to_claim); /* only for the loop driver */
+
+static void bd_clear_claiming(struct block_device *whole, void *holder)
+{
+ lockdep_assert_held(&bdev_lock);
+ /* tell others that we're done */
+ BUG_ON(whole->bd_claiming != holder);
+ whole->bd_claiming = NULL;
+ wake_up_bit(&whole->bd_claiming, 0);
+}
+
+/**
+ * bd_finish_claiming - finish claiming of a block device
+ * @bdev: block device of interest
+ * @holder: holder that has claimed @bdev
+ *
+ * Finish exclusive open of a block device. Mark the device as exlusively
+ * open by the holder and wake up all waiters for exclusive open to finish.
+ */
+static void bd_finish_claiming(struct block_device *bdev, void *holder)
+{
+ struct block_device *whole = bdev_whole(bdev);
+
+ spin_lock(&bdev_lock);
+ BUG_ON(!bd_may_claim(bdev, whole, holder));
+ /*
+ * Note that for a whole device bd_holders will be incremented twice,
+ * and bd_holder will be set to bd_may_claim before being set to holder
+ */
+ whole->bd_holders++;
+ whole->bd_holder = bd_may_claim;
+ bdev->bd_holders++;
+ bdev->bd_holder = holder;
+ bd_clear_claiming(whole, holder);
+ spin_unlock(&bdev_lock);
+}
+
+/**
+ * bd_abort_claiming - abort claiming of a block device
+ * @bdev: block device of interest
+ * @holder: holder that has claimed @bdev
+ *
+ * Abort claiming of a block device when the exclusive open failed. This can be
+ * also used when exclusive open is not actually desired and we just needed
+ * to block other exclusive openers for a while.
+ */
+void bd_abort_claiming(struct block_device *bdev, void *holder)
+{
+ spin_lock(&bdev_lock);
+ bd_clear_claiming(bdev_whole(bdev), holder);
+ spin_unlock(&bdev_lock);
+}
+EXPORT_SYMBOL(bd_abort_claiming);
+
+static void blkdev_flush_mapping(struct block_device *bdev)
+{
+ WARN_ON_ONCE(bdev->bd_holders);
+ sync_blockdev(bdev);
+ kill_bdev(bdev);
+ bdev_write_inode(bdev);
+}
+
+static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
+{
+ struct gendisk *disk = bdev->bd_disk;
+ int ret = 0;
+
+ if (disk->fops->open) {
+ ret = disk->fops->open(bdev, mode);
+ if (ret) {
+ /* avoid ghost partitions on a removed medium */
+ if (ret == -ENOMEDIUM &&
+ test_bit(GD_NEED_PART_SCAN, &disk->state))
+ bdev_disk_changed(disk, true);
+ return ret;
+ }
+ }
+
+ if (!bdev->bd_openers)
+ set_init_blocksize(bdev);
+ if (test_bit(GD_NEED_PART_SCAN, &disk->state))
+ bdev_disk_changed(disk, false);
+ bdev->bd_openers++;
+ return 0;;
+}
+
+static void blkdev_put_whole(struct block_device *bdev, fmode_t mode)
+{
+ if (!--bdev->bd_openers)
+ blkdev_flush_mapping(bdev);
+ if (bdev->bd_disk->fops->release)
+ bdev->bd_disk->fops->release(bdev->bd_disk, mode);
+}
+
+static int blkdev_get_part(struct block_device *part, fmode_t mode)
+{
+ struct gendisk *disk = part->bd_disk;
+ int ret;
+
+ if (part->bd_openers)
+ goto done;
+
+ ret = blkdev_get_whole(bdev_whole(part), mode);
+ if (ret)
+ return ret;
+
+ ret = -ENXIO;
+ if (!bdev_nr_sectors(part))
+ goto out_blkdev_put;
+
+ disk->open_partitions++;
+ set_init_blocksize(part);
+done:
+ part->bd_openers++;
+ return 0;
+
+out_blkdev_put:
+ blkdev_put_whole(bdev_whole(part), mode);
+ return ret;
+}
+
+static void blkdev_put_part(struct block_device *part, fmode_t mode)
+{
+ struct block_device *whole = bdev_whole(part);
+
+ if (--part->bd_openers)
+ return;
+ blkdev_flush_mapping(part);
+ whole->bd_disk->open_partitions--;
+ blkdev_put_whole(whole, mode);
+}
+
+struct block_device *blkdev_get_no_open(dev_t dev)
+{
+ struct block_device *bdev;
+ struct inode *inode;
+
+ inode = ilookup(blockdev_superblock, dev);
+ if (!inode) {
+ blk_request_module(dev);
+ inode = ilookup(blockdev_superblock, dev);
+ if (!inode)
+ return NULL;
+ }
+
+ /* switch from the inode reference to a device mode one: */
+ bdev = &BDEV_I(inode)->bdev;
+ if (!kobject_get_unless_zero(&bdev->bd_device.kobj))
+ bdev = NULL;
+ iput(inode);
+
+ if (!bdev)
+ return NULL;
+ if ((bdev->bd_disk->flags & GENHD_FL_HIDDEN) ||
+ !try_module_get(bdev->bd_disk->fops->owner)) {
+ put_device(&bdev->bd_device);
+ return NULL;
+ }
+
+ return bdev;
+}
+
+void blkdev_put_no_open(struct block_device *bdev)
+{
+ module_put(bdev->bd_disk->fops->owner);
+ put_device(&bdev->bd_device);
+}
+
+/**
+ * blkdev_get_by_dev - open a block device by device number
+ * @dev: device number of block device to open
+ * @mode: FMODE_* mask
+ * @holder: exclusive holder identifier
+ *
+ * Open the block device described by device number @dev. If @mode includes
+ * %FMODE_EXCL, the block device is opened with exclusive access. Specifying
+ * %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may nest for
+ * the same @holder.
+ *
+ * Use this interface ONLY if you really do not have anything better - i.e. when
+ * you are behind a truly sucky interface and all you are given is a device
+ * number. Everything else should use blkdev_get_by_path().
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * Reference to the block_device on success, ERR_PTR(-errno) on failure.
+ */
+struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
+{
+ bool unblock_events = true;
+ struct block_device *bdev;
+ struct gendisk *disk;
+ int ret;
+
+ ret = devcgroup_check_permission(DEVCG_DEV_BLOCK,
+ MAJOR(dev), MINOR(dev),
+ ((mode & FMODE_READ) ? DEVCG_ACC_READ : 0) |
+ ((mode & FMODE_WRITE) ? DEVCG_ACC_WRITE : 0));
+ if (ret)
+ return ERR_PTR(ret);
+
+ bdev = blkdev_get_no_open(dev);
+ if (!bdev)
+ return ERR_PTR(-ENXIO);
+ disk = bdev->bd_disk;
+
+ if (mode & FMODE_EXCL) {
+ ret = bd_prepare_to_claim(bdev, holder);
+ if (ret)
+ goto put_blkdev;
+ }
+
+ disk_block_events(disk);
+
+ mutex_lock(&disk->open_mutex);
+ ret = -ENXIO;
+ if (!disk_live(disk))
+ goto abort_claiming;
+ if (bdev_is_partition(bdev))
+ ret = blkdev_get_part(bdev, mode);
+ else
+ ret = blkdev_get_whole(bdev, mode);
+ if (ret)
+ goto abort_claiming;
+ if (mode & FMODE_EXCL) {
+ bd_finish_claiming(bdev, holder);
+
+ /*
+ * Block event polling for write claims if requested. Any write
+ * holder makes the write_holder state stick until all are
+ * released. This is good enough and tracking individual
+ * writeable reference is too fragile given the way @mode is
+ * used in blkdev_get/put().
+ */
+ if ((mode & FMODE_WRITE) && !bdev->bd_write_holder &&
+ (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
+ bdev->bd_write_holder = true;
+ unblock_events = false;
+ }
+ }
+ mutex_unlock(&disk->open_mutex);
+
+ if (unblock_events)
+ disk_unblock_events(disk);
+ return bdev;
+
+abort_claiming:
+ if (mode & FMODE_EXCL)
+ bd_abort_claiming(bdev, holder);
+ mutex_unlock(&disk->open_mutex);
+ disk_unblock_events(disk);
+put_blkdev:
+ blkdev_put_no_open(bdev);
+ return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(blkdev_get_by_dev);
+
+/**
+ * blkdev_get_by_path - open a block device by name
+ * @path: path to the block device to open
+ * @mode: FMODE_* mask
+ * @holder: exclusive holder identifier
+ *
+ * Open the block device described by the device file at @path. If @mode
+ * includes %FMODE_EXCL, the block device is opened with exclusive access.
+ * Specifying %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may
+ * nest for the same @holder.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * Reference to the block_device on success, ERR_PTR(-errno) on failure.
+ */
+struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
+ void *holder)
+{
+ struct block_device *bdev;
+ dev_t dev;
+ int error;
+
+ error = lookup_bdev(path, &dev);
+ if (error)
+ return ERR_PTR(error);
+
+ bdev = blkdev_get_by_dev(dev, mode, holder);
+ if (!IS_ERR(bdev) && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
+ blkdev_put(bdev, mode);
+ return ERR_PTR(-EACCES);
+ }
+
+ return bdev;
+}
+EXPORT_SYMBOL(blkdev_get_by_path);
+
+void blkdev_put(struct block_device *bdev, fmode_t mode)
+{
+ struct gendisk *disk = bdev->bd_disk;
+
+ /*
+ * Sync early if it looks like we're the last one. If someone else
+ * opens the block device between now and the decrement of bd_openers
+ * then we did a sync that we didn't need to, but that's not the end
+ * of the world and we want to avoid long (could be several minute)
+ * syncs while holding the mutex.
+ */
+ if (bdev->bd_openers == 1)
+ sync_blockdev(bdev);
+
+ mutex_lock(&disk->open_mutex);
+ if (mode & FMODE_EXCL) {
+ struct block_device *whole = bdev_whole(bdev);
+ bool bdev_free;
+
+ /*
+ * Release a claim on the device. The holder fields
+ * are protected with bdev_lock. open_mutex is to
+ * synchronize disk_holder unlinking.
+ */
+ spin_lock(&bdev_lock);
+
+ WARN_ON_ONCE(--bdev->bd_holders < 0);
+ WARN_ON_ONCE(--whole->bd_holders < 0);
+
+ if ((bdev_free = !bdev->bd_holders))
+ bdev->bd_holder = NULL;
+ if (!whole->bd_holders)
+ whole->bd_holder = NULL;
+
+ spin_unlock(&bdev_lock);
+
+ /*
+ * If this was the last claim, remove holder link and
+ * unblock evpoll if it was a write holder.
+ */
+ if (bdev_free && bdev->bd_write_holder) {
+ disk_unblock_events(disk);
+ bdev->bd_write_holder = false;
+ }
+ }
+
+ /*
+ * Trigger event checking and tell drivers to flush MEDIA_CHANGE
+ * event. This is to ensure detection of media removal commanded
+ * from userland - e.g. eject(1).
+ */
+ disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
+
+ if (bdev_is_partition(bdev))
+ blkdev_put_part(bdev, mode);
+ else
+ blkdev_put_whole(bdev, mode);
+ mutex_unlock(&disk->open_mutex);
+
+ blkdev_put_no_open(bdev);
+}
+EXPORT_SYMBOL(blkdev_put);
+
+/**
+ * lookup_bdev - lookup a struct block_device by name
+ * @pathname: special file representing the block device
+ * @dev: return value of the block device's dev_t
+ *
+ * Get a reference to the blockdevice at @pathname in the current
+ * namespace if possible and return it. Return ERR_PTR(error)
+ * otherwise.
+ */
+int lookup_bdev(const char *pathname, dev_t *dev)
+{
+ struct inode *inode;
+ struct path path;
+ int error;
+
+ if (!pathname || !*pathname)
+ return -EINVAL;
+
+ error = kern_path(pathname, LOOKUP_FOLLOW, &path);
+ if (error)
+ return error;
+
+ inode = d_backing_inode(path.dentry);
+ error = -ENOTBLK;
+ if (!S_ISBLK(inode->i_mode))
+ goto out_path_put;
+ error = -EACCES;
+ if (!may_open_dev(&path))
+ goto out_path_put;
+
+ *dev = inode->i_rdev;
+ error = 0;
+out_path_put:
+ path_put(&path);
+ return error;
+}
+EXPORT_SYMBOL(lookup_bdev);
+
+int __invalidate_device(struct block_device *bdev, bool kill_dirty)
+{
+ struct super_block *sb = get_super(bdev);
+ int res = 0;
+
+ if (sb) {
+ /*
+ * no need to lock the super, get_super holds the
+ * read mutex so the filesystem cannot go away
+ * under us (->put_super runs with the write lock
+ * hold).
+ */
+ shrink_dcache_sb(sb);
+ res = invalidate_inodes(sb, kill_dirty);
+ drop_super(sb);
+ }
+ invalidate_bdev(bdev);
+ return res;
+}
+EXPORT_SYMBOL(__invalidate_device);
+
+void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
+{
+ struct inode *inode, *old_inode = NULL;
+
+ spin_lock(&blockdev_superblock->s_inode_list_lock);
+ list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
+ struct address_space *mapping = inode->i_mapping;
+ struct block_device *bdev;
+
+ spin_lock(&inode->i_lock);
+ if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
+ mapping->nrpages == 0) {
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
+ __iget(inode);
+ spin_unlock(&inode->i_lock);
+ spin_unlock(&blockdev_superblock->s_inode_list_lock);
+ /*
+ * We hold a reference to 'inode' so it couldn't have been
+ * removed from s_inodes list while we dropped the
+ * s_inode_list_lock We cannot iput the inode now as we can
+ * be holding the last reference and we cannot iput it under
+ * s_inode_list_lock. So we keep the reference and iput it
+ * later.
+ */
+ iput(old_inode);
+ old_inode = inode;
+ bdev = I_BDEV(inode);
+
+ mutex_lock(&bdev->bd_disk->open_mutex);
+ if (bdev->bd_openers)
+ func(bdev, arg);
+ mutex_unlock(&bdev->bd_disk->open_mutex);
+
+ spin_lock(&blockdev_superblock->s_inode_list_lock);
+ }
+ spin_unlock(&blockdev_superblock->s_inode_list_lock);
+ iput(old_inode);
+}
}
}
+/*
+ * Allow 4x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
+ * queues. This is important for md arrays to benefit from merging
+ * requests.
+ */
+static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
+{
+ if (plug->multiple_queues)
+ return BLK_MAX_REQUEST_COUNT * 4;
+ return BLK_MAX_REQUEST_COUNT;
+}
+
/**
* blk_mq_submit_bio - Create and send a request to block device.
* @bio: Bio pointer.
else
last = list_entry_rq(plug->mq_list.prev);
- if (request_count >= BLK_MAX_REQUEST_COUNT || (last &&
+ if (request_count >= blk_plug_max_rq_count(plug) || (last &&
blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
blk_flush_plug_list(plug, false);
trace_block_plug(q);
void blk_throtl_exit(struct request_queue *q)
{
BUG_ON(!q->td);
+ del_timer_sync(&q->td->service_queue.pending_timer);
throtl_shutdown_wq(q);
blkcg_deactivate_policy(q, &blkcg_policy_throtl);
free_percpu(q->td->latency_buckets[READ]);
bio->bi_opf &= ~REQ_HIPRI;
}
+extern const struct address_space_operations def_blk_aops;
+
#endif /* BLK_INTERNAL_H */
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 1991, 1992 Linus Torvalds
+ * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
+ * Copyright (C) 2016 - 2020 Christoph Hellwig
+ */
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/mpage.h>
+#include <linux/uio.h>
+#include <linux/namei.h>
+#include <linux/task_io_accounting_ops.h>
+#include <linux/falloc.h>
+#include <linux/suspend.h>
+#include "blk.h"
+
+static struct inode *bdev_file_inode(struct file *file)
+{
+ return file->f_mapping->host;
+}
+
+static int blkdev_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh, int create)
+{
+ bh->b_bdev = I_BDEV(inode);
+ bh->b_blocknr = iblock;
+ set_buffer_mapped(bh);
+ return 0;
+}
+
+static unsigned int dio_bio_write_op(struct kiocb *iocb)
+{
+ unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
+
+ /* avoid the need for a I/O completion work item */
+ if (iocb->ki_flags & IOCB_DSYNC)
+ op |= REQ_FUA;
+ return op;
+}
+
+#define DIO_INLINE_BIO_VECS 4
+
+static void blkdev_bio_end_io_simple(struct bio *bio)
+{
+ struct task_struct *waiter = bio->bi_private;
+
+ WRITE_ONCE(bio->bi_private, NULL);
+ blk_wake_io_task(waiter);
+}
+
+static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
+ struct iov_iter *iter, unsigned int nr_pages)
+{
+ struct file *file = iocb->ki_filp;
+ struct block_device *bdev = I_BDEV(bdev_file_inode(file));
+ struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
+ loff_t pos = iocb->ki_pos;
+ bool should_dirty = false;
+ struct bio bio;
+ ssize_t ret;
+ blk_qc_t qc;
+
+ if ((pos | iov_iter_alignment(iter)) &
+ (bdev_logical_block_size(bdev) - 1))
+ return -EINVAL;
+
+ if (nr_pages <= DIO_INLINE_BIO_VECS)
+ vecs = inline_vecs;
+ else {
+ vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec),
+ GFP_KERNEL);
+ if (!vecs)
+ return -ENOMEM;
+ }
+
+ bio_init(&bio, vecs, nr_pages);
+ bio_set_dev(&bio, bdev);
+ bio.bi_iter.bi_sector = pos >> 9;
+ bio.bi_write_hint = iocb->ki_hint;
+ bio.bi_private = current;
+ bio.bi_end_io = blkdev_bio_end_io_simple;
+ bio.bi_ioprio = iocb->ki_ioprio;
+
+ ret = bio_iov_iter_get_pages(&bio, iter);
+ if (unlikely(ret))
+ goto out;
+ ret = bio.bi_iter.bi_size;
+
+ if (iov_iter_rw(iter) == READ) {
+ bio.bi_opf = REQ_OP_READ;
+ if (iter_is_iovec(iter))
+ should_dirty = true;
+ } else {
+ bio.bi_opf = dio_bio_write_op(iocb);
+ task_io_account_write(ret);
+ }
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ bio.bi_opf |= REQ_NOWAIT;
+ if (iocb->ki_flags & IOCB_HIPRI)
+ bio_set_polled(&bio, iocb);
+
+ qc = submit_bio(&bio);
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (!READ_ONCE(bio.bi_private))
+ break;
+ if (!(iocb->ki_flags & IOCB_HIPRI) ||
+ !blk_poll(bdev_get_queue(bdev), qc, true))
+ blk_io_schedule();
+ }
+ __set_current_state(TASK_RUNNING);
+
+ bio_release_pages(&bio, should_dirty);
+ if (unlikely(bio.bi_status))
+ ret = blk_status_to_errno(bio.bi_status);
+
+out:
+ if (vecs != inline_vecs)
+ kfree(vecs);
+
+ bio_uninit(&bio);
+
+ return ret;
+}
+
+struct blkdev_dio {
+ union {
+ struct kiocb *iocb;
+ struct task_struct *waiter;
+ };
+ size_t size;
+ atomic_t ref;
+ bool multi_bio : 1;
+ bool should_dirty : 1;
+ bool is_sync : 1;
+ struct bio bio;
+};
+
+static struct bio_set blkdev_dio_pool;
+
+static int blkdev_iopoll(struct kiocb *kiocb, bool wait)
+{
+ struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host);
+ struct request_queue *q = bdev_get_queue(bdev);
+
+ return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait);
+}
+
+static void blkdev_bio_end_io(struct bio *bio)
+{
+ struct blkdev_dio *dio = bio->bi_private;
+ bool should_dirty = dio->should_dirty;
+
+ if (bio->bi_status && !dio->bio.bi_status)
+ dio->bio.bi_status = bio->bi_status;
+
+ if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) {
+ if (!dio->is_sync) {
+ struct kiocb *iocb = dio->iocb;
+ ssize_t ret;
+
+ if (likely(!dio->bio.bi_status)) {
+ ret = dio->size;
+ iocb->ki_pos += ret;
+ } else {
+ ret = blk_status_to_errno(dio->bio.bi_status);
+ }
+
+ dio->iocb->ki_complete(iocb, ret, 0);
+ if (dio->multi_bio)
+ bio_put(&dio->bio);
+ } else {
+ struct task_struct *waiter = dio->waiter;
+
+ WRITE_ONCE(dio->waiter, NULL);
+ blk_wake_io_task(waiter);
+ }
+ }
+
+ if (should_dirty) {
+ bio_check_pages_dirty(bio);
+ } else {
+ bio_release_pages(bio, false);
+ bio_put(bio);
+ }
+}
+
+static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+ unsigned int nr_pages)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = bdev_file_inode(file);
+ struct block_device *bdev = I_BDEV(inode);
+ struct blk_plug plug;
+ struct blkdev_dio *dio;
+ struct bio *bio;
+ bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
+ bool is_read = (iov_iter_rw(iter) == READ), is_sync;
+ loff_t pos = iocb->ki_pos;
+ blk_qc_t qc = BLK_QC_T_NONE;
+ int ret = 0;
+
+ if ((pos | iov_iter_alignment(iter)) &
+ (bdev_logical_block_size(bdev) - 1))
+ return -EINVAL;
+
+ bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool);
+
+ dio = container_of(bio, struct blkdev_dio, bio);
+ dio->is_sync = is_sync = is_sync_kiocb(iocb);
+ if (dio->is_sync) {
+ dio->waiter = current;
+ bio_get(bio);
+ } else {
+ dio->iocb = iocb;
+ }
+
+ dio->size = 0;
+ dio->multi_bio = false;
+ dio->should_dirty = is_read && iter_is_iovec(iter);
+
+ /*
+ * Don't plug for HIPRI/polled IO, as those should go straight
+ * to issue
+ */
+ if (!is_poll)
+ blk_start_plug(&plug);
+
+ for (;;) {
+ bio_set_dev(bio, bdev);
+ bio->bi_iter.bi_sector = pos >> 9;
+ bio->bi_write_hint = iocb->ki_hint;
+ bio->bi_private = dio;
+ bio->bi_end_io = blkdev_bio_end_io;
+ bio->bi_ioprio = iocb->ki_ioprio;
+
+ ret = bio_iov_iter_get_pages(bio, iter);
+ if (unlikely(ret)) {
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+ break;
+ }
+
+ if (is_read) {
+ bio->bi_opf = REQ_OP_READ;
+ if (dio->should_dirty)
+ bio_set_pages_dirty(bio);
+ } else {
+ bio->bi_opf = dio_bio_write_op(iocb);
+ task_io_account_write(bio->bi_iter.bi_size);
+ }
+ if (iocb->ki_flags & IOCB_NOWAIT)
+ bio->bi_opf |= REQ_NOWAIT;
+
+ dio->size += bio->bi_iter.bi_size;
+ pos += bio->bi_iter.bi_size;
+
+ nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS);
+ if (!nr_pages) {
+ bool polled = false;
+
+ if (iocb->ki_flags & IOCB_HIPRI) {
+ bio_set_polled(bio, iocb);
+ polled = true;
+ }
+
+ qc = submit_bio(bio);
+
+ if (polled)
+ WRITE_ONCE(iocb->ki_cookie, qc);
+ break;
+ }
+
+ if (!dio->multi_bio) {
+ /*
+ * AIO needs an extra reference to ensure the dio
+ * structure which is embedded into the first bio
+ * stays around.
+ */
+ if (!is_sync)
+ bio_get(bio);
+ dio->multi_bio = true;
+ atomic_set(&dio->ref, 2);
+ } else {
+ atomic_inc(&dio->ref);
+ }
+
+ submit_bio(bio);
+ bio = bio_alloc(GFP_KERNEL, nr_pages);
+ }
+
+ if (!is_poll)
+ blk_finish_plug(&plug);
+
+ if (!is_sync)
+ return -EIOCBQUEUED;
+
+ for (;;) {
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ if (!READ_ONCE(dio->waiter))
+ break;
+
+ if (!(iocb->ki_flags & IOCB_HIPRI) ||
+ !blk_poll(bdev_get_queue(bdev), qc, true))
+ blk_io_schedule();
+ }
+ __set_current_state(TASK_RUNNING);
+
+ if (!ret)
+ ret = blk_status_to_errno(dio->bio.bi_status);
+ if (likely(!ret))
+ ret = dio->size;
+
+ bio_put(&dio->bio);
+ return ret;
+}
+
+static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+{
+ unsigned int nr_pages;
+
+ if (!iov_iter_count(iter))
+ return 0;
+
+ nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
+ if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_VECS)
+ return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
+
+ return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
+}
+
+static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
+{
+ return block_write_full_page(page, blkdev_get_block, wbc);
+}
+
+static int blkdev_readpage(struct file * file, struct page * page)
+{
+ return block_read_full_page(page, blkdev_get_block);
+}
+
+static void blkdev_readahead(struct readahead_control *rac)
+{
+ mpage_readahead(rac, blkdev_get_block);
+}
+
+static int blkdev_write_begin(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned flags, struct page **pagep,
+ void **fsdata)
+{
+ return block_write_begin(mapping, pos, len, flags, pagep,
+ blkdev_get_block);
+}
+
+static int blkdev_write_end(struct file *file, struct address_space *mapping,
+ loff_t pos, unsigned len, unsigned copied, struct page *page,
+ void *fsdata)
+{
+ int ret;
+ ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+
+ unlock_page(page);
+ put_page(page);
+
+ return ret;
+}
+
+static int blkdev_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ return generic_writepages(mapping, wbc);
+}
+
+const struct address_space_operations def_blk_aops = {
+ .set_page_dirty = __set_page_dirty_buffers,
+ .readpage = blkdev_readpage,
+ .readahead = blkdev_readahead,
+ .writepage = blkdev_writepage,
+ .write_begin = blkdev_write_begin,
+ .write_end = blkdev_write_end,
+ .writepages = blkdev_writepages,
+ .direct_IO = blkdev_direct_IO,
+ .migratepage = buffer_migrate_page_norefs,
+ .is_dirty_writeback = buffer_check_dirty_writeback,
+};
+
+/*
+ * for a block special file file_inode(file)->i_size is zero
+ * so we compute the size by hand (just as in block_read/write above)
+ */
+static loff_t blkdev_llseek(struct file *file, loff_t offset, int whence)
+{
+ struct inode *bd_inode = bdev_file_inode(file);
+ loff_t retval;
+
+ inode_lock(bd_inode);
+ retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
+ inode_unlock(bd_inode);
+ return retval;
+}
+
+static int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
+ int datasync)
+{
+ struct inode *bd_inode = bdev_file_inode(filp);
+ struct block_device *bdev = I_BDEV(bd_inode);
+ int error;
+
+ error = file_write_and_wait_range(filp, start, end);
+ if (error)
+ return error;
+
+ /*
+ * There is no need to serialise calls to blkdev_issue_flush with
+ * i_mutex and doing so causes performance issues with concurrent
+ * O_SYNC writers to a block device.
+ */
+ error = blkdev_issue_flush(bdev);
+ if (error == -EOPNOTSUPP)
+ error = 0;
+
+ return error;
+}
+
+static int blkdev_open(struct inode *inode, struct file *filp)
+{
+ struct block_device *bdev;
+
+ /*
+ * Preserve backwards compatibility and allow large file access
+ * even if userspace doesn't ask for it explicitly. Some mkfs
+ * binary needs it. We might want to drop this workaround
+ * during an unstable branch.
+ */
+ filp->f_flags |= O_LARGEFILE;
+ filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
+
+ if (filp->f_flags & O_NDELAY)
+ filp->f_mode |= FMODE_NDELAY;
+ if (filp->f_flags & O_EXCL)
+ filp->f_mode |= FMODE_EXCL;
+ if ((filp->f_flags & O_ACCMODE) == 3)
+ filp->f_mode |= FMODE_WRITE_IOCTL;
+
+ bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp);
+ if (IS_ERR(bdev))
+ return PTR_ERR(bdev);
+ filp->f_mapping = bdev->bd_inode->i_mapping;
+ filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
+ return 0;
+}
+
+static int blkdev_close(struct inode *inode, struct file *filp)
+{
+ struct block_device *bdev = I_BDEV(bdev_file_inode(filp));
+
+ blkdev_put(bdev, filp->f_mode);
+ return 0;
+}
+
+static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
+{
+ struct block_device *bdev = I_BDEV(bdev_file_inode(file));
+ fmode_t mode = file->f_mode;
+
+ /*
+ * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have
+ * to updated it before every ioctl.
+ */
+ if (file->f_flags & O_NDELAY)
+ mode |= FMODE_NDELAY;
+ else
+ mode &= ~FMODE_NDELAY;
+
+ return blkdev_ioctl(bdev, mode, cmd, arg);
+}
+
+/*
+ * Write data to the block device. Only intended for the block device itself
+ * and the raw driver which basically is a fake block device.
+ *
+ * Does not take i_mutex for the write and thus is not for general purpose
+ * use.
+ */
+static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *bd_inode = bdev_file_inode(file);
+ loff_t size = i_size_read(bd_inode);
+ struct blk_plug plug;
+ size_t shorted = 0;
+ ssize_t ret;
+
+ if (bdev_read_only(I_BDEV(bd_inode)))
+ return -EPERM;
+
+ if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode->i_rdev))
+ return -ETXTBSY;
+
+ if (!iov_iter_count(from))
+ return 0;
+
+ if (iocb->ki_pos >= size)
+ return -ENOSPC;
+
+ if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
+ return -EOPNOTSUPP;
+
+ size -= iocb->ki_pos;
+ if (iov_iter_count(from) > size) {
+ shorted = iov_iter_count(from) - size;
+ iov_iter_truncate(from, size);
+ }
+
+ blk_start_plug(&plug);
+ ret = __generic_file_write_iter(iocb, from);
+ if (ret > 0)
+ ret = generic_write_sync(iocb, ret);
+ iov_iter_reexpand(from, iov_iter_count(from) + shorted);
+ blk_finish_plug(&plug);
+ return ret;
+}
+
+static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *bd_inode = bdev_file_inode(file);
+ loff_t size = i_size_read(bd_inode);
+ loff_t pos = iocb->ki_pos;
+ size_t shorted = 0;
+ ssize_t ret;
+
+ if (pos >= size)
+ return 0;
+
+ size -= pos;
+ if (iov_iter_count(to) > size) {
+ shorted = iov_iter_count(to) - size;
+ iov_iter_truncate(to, size);
+ }
+
+ ret = generic_file_read_iter(iocb, to);
+ iov_iter_reexpand(to, iov_iter_count(to) + shorted);
+ return ret;
+}
+
+#define BLKDEV_FALLOC_FL_SUPPORTED \
+ (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
+ FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
+
+static long blkdev_fallocate(struct file *file, int mode, loff_t start,
+ loff_t len)
+{
+ struct block_device *bdev = I_BDEV(bdev_file_inode(file));
+ loff_t end = start + len - 1;
+ loff_t isize;
+ int error;
+
+ /* Fail if we don't recognize the flags. */
+ if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED)
+ return -EOPNOTSUPP;
+
+ /* Don't go off the end of the device. */
+ isize = i_size_read(bdev->bd_inode);
+ if (start >= isize)
+ return -EINVAL;
+ if (end >= isize) {
+ if (mode & FALLOC_FL_KEEP_SIZE) {
+ len = isize - start;
+ end = start + len - 1;
+ } else
+ return -EINVAL;
+ }
+
+ /*
+ * Don't allow IO that isn't aligned to logical block size.
+ */
+ if ((start | len) & (bdev_logical_block_size(bdev) - 1))
+ return -EINVAL;
+
+ /* Invalidate the page cache, including dirty pages. */
+ error = truncate_bdev_range(bdev, file->f_mode, start, end);
+ if (error)
+ return error;
+
+ switch (mode) {
+ case FALLOC_FL_ZERO_RANGE:
+ case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
+ error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
+ GFP_KERNEL, BLKDEV_ZERO_NOUNMAP);
+ break;
+ case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
+ error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
+ GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK);
+ break;
+ case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
+ error = blkdev_issue_discard(bdev, start >> 9, len >> 9,
+ GFP_KERNEL, 0);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+ if (error)
+ return error;
+
+ /*
+ * Invalidate the page cache again; if someone wandered in and dirtied
+ * a page, we just discard it - userspace has no way of knowing whether
+ * the write happened before or after discard completing...
+ */
+ return truncate_bdev_range(bdev, file->f_mode, start, end);
+}
+
+const struct file_operations def_blk_fops = {
+ .open = blkdev_open,
+ .release = blkdev_close,
+ .llseek = blkdev_llseek,
+ .read_iter = blkdev_read_iter,
+ .write_iter = blkdev_write_iter,
+ .iopoll = blkdev_iopoll,
+ .mmap = generic_file_mmap,
+ .fsync = blkdev_fsync,
+ .unlocked_ioctl = block_ioctl,
+#ifdef CONFIG_COMPAT
+ .compat_ioctl = compat_blkdev_ioctl,
+#endif
+ .splice_read = generic_file_splice_read,
+ .splice_write = iter_file_splice_write,
+ .fallocate = blkdev_fallocate,
+};
+
+static __init int blkdev_init(void)
+{
+ return bioset_init(&blkdev_dio_pool, 4,
+ offsetof(struct blkdev_dio, bio),
+ BIOSET_NEED_BVECS|BIOSET_PERCPU_CACHE);
+}
+module_init(blkdev_init);
void (*probe)(dev_t devt);
} *major_names[BLKDEV_MAJOR_HASH_SIZE];
static DEFINE_MUTEX(major_names_lock);
+static DEFINE_SPINLOCK(major_names_spinlock);
/* index in the above - for now: assume no multimajor ranges */
static inline int major_to_index(unsigned major)
{
struct blk_major_name *dp;
- mutex_lock(&major_names_lock);
+ spin_lock(&major_names_spinlock);
for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next)
if (dp->major == offset)
seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
- mutex_unlock(&major_names_lock);
+ spin_unlock(&major_names_spinlock);
}
#endif /* CONFIG_PROC_FS */
p->next = NULL;
index = major_to_index(major);
+ spin_lock(&major_names_spinlock);
for (n = &major_names[index]; *n; n = &(*n)->next) {
if ((*n)->major == major)
break;
*n = p;
else
ret = -EBUSY;
+ spin_unlock(&major_names_spinlock);
if (ret < 0) {
printk("register_blkdev: cannot get major %u for %s\n",
int index = major_to_index(major);
mutex_lock(&major_names_lock);
+ spin_lock(&major_names_spinlock);
for (n = &major_names[index]; *n; n = &(*n)->next)
if ((*n)->major == major)
break;
p = *n;
*n = p->next;
}
+ spin_unlock(&major_names_spinlock);
mutex_unlock(&major_names_lock);
kfree(p);
}
}
reg_base = devm_platform_ioremap_resource(pdev, 0);
- if (!reg_base)
- return -EINVAL;
+ if (IS_ERR(reg_base))
+ return PTR_ERR(reg_base);
disk = blk_alloc_disk(NUMA_NO_NODE);
if (!disk)
goto out_free_vblk;
/* Default queue sizing is to fill the ring. */
- if (likely(!virtblk_queue_depth)) {
+ if (!virtblk_queue_depth) {
queue_depth = vblk->vqs[0].vq->num_free;
/* ... but without indirect descs, we use 2 descs per req */
if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
else
blk_size = queue_logical_block_size(q);
- if (unlikely(blk_size < SECTOR_SIZE || blk_size > PAGE_SIZE)) {
+ if (blk_size < SECTOR_SIZE || blk_size > PAGE_SIZE) {
dev_err(&vdev->dev,
"block size is changed unexpectedly, now is %u\n",
blk_size);
smi_info->handlers->get_result(smi_info->si_sm, msg, 3);
if (msg[2] != 0) {
/* Error clearing flags */
- dev_warn(smi_info->io.dev,
+ dev_warn_ratelimited(smi_info->io.dev,
"Error clearing flags: %2.2x\n", msg[2]);
}
smi_info->si_state = SI_NORMAL;
/* We got the flags from the SMI, now handle them. */
smi_info->handlers->get_result(smi_info->si_sm, msg, 4);
if (msg[2] != 0) {
- dev_warn(smi_info->io.dev,
- "Couldn't get irq info: %x.\n", msg[2]);
- dev_warn(smi_info->io.dev,
- "Maybe ok, but ipmi might run very slowly.\n");
+ dev_warn_ratelimited(smi_info->io.dev,
+ "Couldn't get irq info: %x,\n"
+ "Maybe ok, but ipmi might run very slowly.\n",
+ msg[2]);
smi_info->si_state = SI_NORMAL;
break;
}
smi_info->handlers->get_result(smi_info->si_sm, msg, 4);
if (msg[2] != 0)
- dev_warn(smi_info->io.dev,
+ dev_warn_ratelimited(smi_info->io.dev,
"Could not set the global enables: 0x%x.\n",
msg[2]);
if (cc != IPMI_CC_NO_ERROR &&
++retry_count <= GET_DEVICE_ID_MAX_RETRY) {
- dev_warn(smi_info->io.dev,
+ dev_warn_ratelimited(smi_info->io.dev,
"BMC returned 0x%2.2x, retry get bmc device id\n",
cc);
goto retry;
\
return snprintf(buf, 10, "%u\n", smi_get_stat(smi_info, name)); \
} \
-static DEVICE_ATTR(name, 0444, name##_show, NULL)
+static DEVICE_ATTR_RO(name)
static ssize_t type_show(struct device *dev,
struct device_attribute *attr,
return snprintf(buf, 10, "%s\n", si_to_str[smi_info->io.si_type]);
}
-static DEVICE_ATTR(type, 0444, type_show, NULL);
+static DEVICE_ATTR_RO(type);
static ssize_t interrupts_enabled_show(struct device *dev,
struct device_attribute *attr,
return snprintf(buf, 10, "%d\n", enabled);
}
-static DEVICE_ATTR(interrupts_enabled, 0444,
- interrupts_enabled_show, NULL);
+static DEVICE_ATTR_RO(interrupts_enabled);
IPMI_SI_ATTR(short_timeouts);
IPMI_SI_ATTR(long_timeouts);
smi_info->io.irq,
smi_info->io.slave_addr);
}
-static DEVICE_ATTR(params, 0444, params_show, NULL);
+static DEVICE_ATTR_RO(params);
static struct attribute *ipmi_si_dev_attrs[] = {
&dev_attr_type.attr,
{ P_GPLL0_OUT_ODD, 2 },
};
-static const struct clk_parent_data gcc_parent_data_2[] = {
- { .fw_name = "bi_tcxo" },
- { .hw = &gpll0_out_odd.clkr.hw },
-};
static const struct clk_parent_data gcc_parent_data_2_ao[] = {
{ .fw_name = "bi_tcxo_ao" },
{ .hw = &gpll0_out_odd.clkr.hw },
struct sk_buff *skb, u16 source_node_id,
bool is_broadcast, u16 ether_type)
{
- struct fwnet_device *dev;
int status;
- __be64 guid;
switch (ether_type) {
case ETH_P_ARP:
goto err;
}
- dev = netdev_priv(net);
/* Write metadata, and then pass to the receive level */
skb->dev = net;
skb->ip_summed = CHECKSUM_NONE;
* Parse the encapsulation header. This actually does the job of
* converting to an ethernet-like pseudo frame header.
*/
- guid = cpu_to_be64(dev->card->guid);
if (dev_hard_header(skb, net, ether_type,
is_broadcast ? net->broadcast : net->dev_addr,
NULL, skb->len) >= 0) {
}
EXPORT_SYMBOL(qcom_scm_qsmmu500_wait_safe_toggle);
+bool qcom_scm_lmh_dcvsh_available(void)
+{
+ return __qcom_scm_is_call_available(__scm->dev, QCOM_SCM_SVC_LMH, QCOM_SCM_LMH_LIMIT_DCVSH);
+}
+EXPORT_SYMBOL(qcom_scm_lmh_dcvsh_available);
+
+int qcom_scm_lmh_profile_change(u32 profile_id)
+{
+ struct qcom_scm_desc desc = {
+ .svc = QCOM_SCM_SVC_LMH,
+ .cmd = QCOM_SCM_LMH_LIMIT_PROFILE_CHANGE,
+ .arginfo = QCOM_SCM_ARGS(1, QCOM_SCM_VAL),
+ .args[0] = profile_id,
+ .owner = ARM_SMCCC_OWNER_SIP,
+ };
+
+ return qcom_scm_call(__scm->dev, &desc, NULL);
+}
+EXPORT_SYMBOL(qcom_scm_lmh_profile_change);
+
+int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val,
+ u64 limit_node, u32 node_id, u64 version)
+{
+ dma_addr_t payload_phys;
+ u32 *payload_buf;
+ int ret, payload_size = 5 * sizeof(u32);
+
+ struct qcom_scm_desc desc = {
+ .svc = QCOM_SCM_SVC_LMH,
+ .cmd = QCOM_SCM_LMH_LIMIT_DCVSH,
+ .arginfo = QCOM_SCM_ARGS(5, QCOM_SCM_RO, QCOM_SCM_VAL, QCOM_SCM_VAL,
+ QCOM_SCM_VAL, QCOM_SCM_VAL),
+ .args[1] = payload_size,
+ .args[2] = limit_node,
+ .args[3] = node_id,
+ .args[4] = version,
+ .owner = ARM_SMCCC_OWNER_SIP,
+ };
+
+ payload_buf = dma_alloc_coherent(__scm->dev, payload_size, &payload_phys, GFP_KERNEL);
+ if (!payload_buf)
+ return -ENOMEM;
+
+ payload_buf[0] = payload_fn;
+ payload_buf[1] = 0;
+ payload_buf[2] = payload_reg;
+ payload_buf[3] = 1;
+ payload_buf[4] = payload_val;
+
+ desc.args[0] = payload_phys;
+
+ ret = qcom_scm_call(__scm->dev, &desc, NULL);
+
+ dma_free_coherent(__scm->dev, payload_size, payload_buf, payload_phys);
+ return ret;
+}
+EXPORT_SYMBOL(qcom_scm_lmh_dcvsh);
+
static int qcom_scm_find_dload_address(struct device *dev, u64 *addr)
{
struct device_node *tcsr;
#define QCOM_SCM_SVC_HDCP 0x11
#define QCOM_SCM_HDCP_INVOKE 0x01
+#define QCOM_SCM_SVC_LMH 0x13
+#define QCOM_SCM_LMH_LIMIT_PROFILE_CHANGE 0x01
+#define QCOM_SCM_LMH_LIMIT_DCVSH 0x10
+
#define QCOM_SCM_SVC_SMMU_PROGRAM 0x15
#define QCOM_SCM_SMMU_CONFIG_ERRATA1 0x03
#define QCOM_SCM_SMMU_CONFIG_ERRATA1_CLIENT_ALL 0x02
MODULE_DESCRIPTION(DRIVER_DESC);
MODULE_LICENSE("GPL");
-static bool use_ktime = true;
-module_param(use_ktime, bool, 0400);
-MODULE_PARM_DESC(use_ktime, "Use ktime for measuring I/O speed");
-
/*
* Option parsing.
*/
char cooked;
int bads;
int reads;
- int speed;
int loop;
int fuzz;
int axes[4];
int axtime;
};
-/*
- * Time macros.
- */
-
-#ifdef __i386__
-
-#include <linux/i8253.h>
-
-#define GET_TIME(x) do { if (boot_cpu_has(X86_FEATURE_TSC)) x = (unsigned int)rdtsc(); else x = get_time_pit(); } while (0)
-#define DELTA(x,y) (boot_cpu_has(X86_FEATURE_TSC) ? ((y) - (x)) : ((x) - (y) + ((x) < (y) ? PIT_TICK_RATE / HZ : 0)))
-#define TIME_NAME (boot_cpu_has(X86_FEATURE_TSC)?"TSC":"PIT")
-static unsigned int get_time_pit(void)
-{
- unsigned long flags;
- unsigned int count;
-
- raw_spin_lock_irqsave(&i8253_lock, flags);
- outb_p(0x00, 0x43);
- count = inb_p(0x40);
- count |= inb_p(0x40) << 8;
- raw_spin_unlock_irqrestore(&i8253_lock, flags);
-
- return count;
-}
-#elif defined(__x86_64__)
-#define GET_TIME(x) do { x = (unsigned int)rdtsc(); } while (0)
-#define DELTA(x,y) ((y)-(x))
-#define TIME_NAME "TSC"
-#elif defined(__alpha__) || defined(CONFIG_ARM) || defined(CONFIG_ARM64) || defined(CONFIG_PPC) || defined(CONFIG_RISCV)
-#define GET_TIME(x) do { x = get_cycles(); } while (0)
-#define DELTA(x,y) ((y)-(x))
-#define TIME_NAME "get_cycles"
-#else
-#define FAKE_TIME
-static unsigned long analog_faketime = 0;
-#define GET_TIME(x) do { x = analog_faketime++; } while(0)
-#define DELTA(x,y) ((y)-(x))
-#define TIME_NAME "Unreliable"
-#warning Precise timer not defined for this architecture.
-#endif
-
-static inline u64 get_time(void)
-{
- if (use_ktime) {
- return ktime_get_ns();
- } else {
- unsigned int x;
- GET_TIME(x);
- return x;
- }
-}
-
-static inline unsigned int delta(u64 x, u64 y)
-{
- if (use_ktime)
- return y - x;
- else
- return DELTA((unsigned int)x, (unsigned int)y);
-}
-
/*
* analog_decode() decodes analog joystick data and reports input events.
*/
static int analog_cooked_read(struct analog_port *port)
{
struct gameport *gameport = port->gameport;
- u64 time[4], start, loop, now;
+ ktime_t time[4], start, loop, now;
unsigned int loopout, timeout;
unsigned char data[4], this, last;
unsigned long flags;
int i, j;
loopout = (ANALOG_LOOP_TIME * port->loop) / 1000;
- timeout = ANALOG_MAX_TIME * port->speed;
+ timeout = ANALOG_MAX_TIME * NSEC_PER_MSEC;
local_irq_save(flags);
gameport_trigger(gameport);
- now = get_time();
+ now = ktime_get();
local_irq_restore(flags);
start = now;
local_irq_disable();
this = gameport_read(gameport) & port->mask;
- now = get_time();
+ now = ktime_get();
local_irq_restore(flags);
- if ((last ^ this) && (delta(loop, now) < loopout)) {
+ if ((last ^ this) && (ktime_sub(now, loop) < loopout)) {
data[i] = last ^ this;
time[i] = now;
i++;
}
- } while (this && (i < 4) && (delta(start, now) < timeout));
+ } while (this && (i < 4) && (ktime_sub(now, start) < timeout));
this <<= 4;
this |= data[i];
for (j = 0; j < 4; j++)
if (data[i] & (1 << j))
- port->axes[j] = (delta(start, time[i]) << ANALOG_FUZZ_BITS) / port->loop;
+ port->axes[j] = ((u32)ktime_sub(time[i], start) << ANALOG_FUZZ_BITS) / port->loop;
}
return -(this != port->mask);
{
struct gameport *gameport = port->gameport;
unsigned int i, t, tx;
- u64 t1, t2, t3;
+ ktime_t t1, t2, t3;
unsigned long flags;
- if (use_ktime) {
- port->speed = 1000000;
- } else {
- local_irq_save(flags);
- t1 = get_time();
-#ifdef FAKE_TIME
- analog_faketime += 830;
-#endif
- mdelay(1);
- t2 = get_time();
- t3 = get_time();
- local_irq_restore(flags);
-
- port->speed = delta(t1, t2) - delta(t2, t3);
- }
-
tx = ~0;
for (i = 0; i < 50; i++) {
local_irq_save(flags);
- t1 = get_time();
+ t1 = ktime_get();
for (t = 0; t < 50; t++) {
gameport_read(gameport);
- t2 = get_time();
+ t2 = ktime_get();
}
- t3 = get_time();
+ t3 = ktime_get();
local_irq_restore(flags);
udelay(i);
- t = delta(t1, t2) - delta(t2, t3);
+ t = ktime_sub(t2, t1) - ktime_sub(t3, t2);
if (t < tx) tx = t;
}
t = gameport_read(gameport);
msleep(ANALOG_MAX_TIME);
port->mask = (gameport_read(gameport) ^ t) & t & 0xf;
- port->fuzz = (port->speed * ANALOG_FUZZ_MAGIC) / port->loop / 1000 + ANALOG_FUZZ_BITS;
+ port->fuzz = (NSEC_PER_MSEC * ANALOG_FUZZ_MAGIC) / port->loop / 1000 + ANALOG_FUZZ_BITS;
for (i = 0; i < ANALOG_INIT_RETRIES; i++) {
if (!analog_cooked_read(port))
select SERIO
help
Say Y here if you want to use a LK201 or LK401 style serial
- keyboard. This keyboard is also useable on PCs if you attach
+ keyboard. This keyboard is also usable on PCs if you attach
it with the inputattach program. The connector pinout is
described within lkkbd.c.
MODULE_DEVICE_TABLE(of, adc_keys_of_match);
#endif
-static struct platform_driver __refdata adc_keys_driver = {
+static struct platform_driver adc_keys_driver = {
.driver = {
.name = "adc_keys",
.of_match_table = of_match_ptr(adc_keys_of_match),
#include <linux/platform_device.h>
#include <linux/input.h>
#include <linux/i2c.h>
-#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
#include <linux/slab.h>
#include <linux/platform_data/adp5588.h>
#include <linux/platform_device.h>
#include <linux/input.h>
#include <linux/i2c.h>
-#include <linux/gpio.h>
+#include <linux/gpio/driver.h>
#include <linux/slab.h>
#include <linux/input/adp5589.h>
if (!keypad->enabled) {
ep93xx_keypad_config(keypad);
- clk_enable(keypad->clk);
+ clk_prepare_enable(keypad->clk);
keypad->enabled = true;
}
struct ep93xx_keypad *keypad = input_get_drvdata(pdev);
if (keypad->enabled) {
- clk_disable(keypad->clk);
+ clk_disable_unprepare(keypad->clk);
keypad->enabled = false;
}
}
To compile this driver as a module, choose M here: the module will be
called gpio-vibra.
-config INPUT_IXP4XX_BEEPER
- tristate "IXP4XX Beeper support"
- depends on ARCH_IXP4XX
- help
- If you say yes here, you can connect a beeper to the
- ixp4xx gpio pins. This is used by the LinkSys NSLU2.
-
- If unsure, say Y.
-
- To compile this driver as a module, choose M here: the
- module will be called ixp4xx-beeper.
-
config INPUT_COBALT_BTNS
tristate "Cobalt button interface"
depends on MIPS_COBALT
To compile this driver as a module, choose M here: the
module will be called xen-kbdfront.
-config INPUT_SIRFSOC_ONKEY
- tristate "CSR SiRFSoC power on/off/suspend key support"
- depends on ARCH_SIRF && OF
- default y
- help
- Say Y here if you want to support for the SiRFSoC power on/off/suspend key
- in Linux, after you press the onkey, system will suspend.
-
- If unsure, say N.
-
config INPUT_IDEAPAD_SLIDEBAR
tristate "IdeaPad Laptop Slidebar"
depends on INPUT
obj-$(CONFIG_INPUT_IMS_PCU) += ims-pcu.o
obj-$(CONFIG_INPUT_IQS269A) += iqs269a.o
obj-$(CONFIG_INPUT_IQS626A) += iqs626a.o
-obj-$(CONFIG_INPUT_IXP4XX_BEEPER) += ixp4xx-beeper.o
obj-$(CONFIG_INPUT_KEYSPAN_REMOTE) += keyspan_remote.o
obj-$(CONFIG_INPUT_KXTJ9) += kxtj9.o
obj-$(CONFIG_INPUT_M68K_BEEP) += m68kspkr.o
obj-$(CONFIG_INPUT_RK805_PWRKEY) += rk805-pwrkey.o
obj-$(CONFIG_INPUT_SC27XX_VIBRA) += sc27xx-vibra.o
obj-$(CONFIG_INPUT_SGI_BTNS) += sgi_btns.o
-obj-$(CONFIG_INPUT_SIRFSOC_ONKEY) += sirfsoc-onkey.o
obj-$(CONFIG_INPUT_SOC_BUTTON_ARRAY) += soc_button_array.o
obj-$(CONFIG_INPUT_SPARCSPKR) += sparcspkr.o
obj-$(CONFIG_INPUT_STPMIC1_ONKEY) += stpmic1_onkey.o
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Generic IXP4xx beeper driver
- *
- * Copyright (C) 2005 Tower Technologies
- *
- * based on nslu2-io.c
- * Copyright (C) 2004 Karen Spearel
- *
- * Author: Alessandro Zummo <a.zummo@towertech.it>
- * Maintainers: http://www.nslu2-linux.org/
- */
-
-#include <linux/module.h>
-#include <linux/input.h>
-#include <linux/delay.h>
-#include <linux/platform_device.h>
-#include <linux/interrupt.h>
-#include <linux/gpio.h>
-#include <mach/hardware.h>
-
-MODULE_AUTHOR("Alessandro Zummo <a.zummo@towertech.it>");
-MODULE_DESCRIPTION("ixp4xx beeper driver");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:ixp4xx-beeper");
-
-static DEFINE_SPINLOCK(beep_lock);
-
-static int ixp4xx_timer2_irq;
-
-static void ixp4xx_spkr_control(unsigned int pin, unsigned int count)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&beep_lock, flags);
-
- if (count) {
- gpio_direction_output(pin, 0);
- *IXP4XX_OSRT2 = (count & ~IXP4XX_OST_RELOAD_MASK) | IXP4XX_OST_ENABLE;
- } else {
- gpio_direction_output(pin, 1);
- gpio_direction_input(pin);
- *IXP4XX_OSRT2 = 0;
- }
-
- spin_unlock_irqrestore(&beep_lock, flags);
-}
-
-static int ixp4xx_spkr_event(struct input_dev *dev, unsigned int type, unsigned int code, int value)
-{
- unsigned int pin = (unsigned int) input_get_drvdata(dev);
- unsigned int count = 0;
-
- if (type != EV_SND)
- return -1;
-
- switch (code) {
- case SND_BELL:
- if (value)
- value = 1000;
- case SND_TONE:
- break;
- default:
- return -1;
- }
-
- if (value > 20 && value < 32767)
- count = (ixp4xx_timer_freq / (value * 4)) - 1;
-
- ixp4xx_spkr_control(pin, count);
-
- return 0;
-}
-
-static irqreturn_t ixp4xx_spkr_interrupt(int irq, void *dev_id)
-{
- unsigned int pin = (unsigned int) dev_id;
-
- /* clear interrupt */
- *IXP4XX_OSST = IXP4XX_OSST_TIMER_2_PEND;
-
- /* flip the beeper output */
- gpio_set_value(pin, !gpio_get_value(pin));
-
- return IRQ_HANDLED;
-}
-
-static int ixp4xx_spkr_probe(struct platform_device *dev)
-{
- struct input_dev *input_dev;
- int irq;
- int err;
-
- input_dev = input_allocate_device();
- if (!input_dev)
- return -ENOMEM;
-
- input_set_drvdata(input_dev, (void *) dev->id);
-
- input_dev->name = "ixp4xx beeper";
- input_dev->phys = "ixp4xx/gpio";
- input_dev->id.bustype = BUS_HOST;
- input_dev->id.vendor = 0x001f;
- input_dev->id.product = 0x0001;
- input_dev->id.version = 0x0100;
- input_dev->dev.parent = &dev->dev;
-
- input_dev->evbit[0] = BIT_MASK(EV_SND);
- input_dev->sndbit[0] = BIT_MASK(SND_BELL) | BIT_MASK(SND_TONE);
- input_dev->event = ixp4xx_spkr_event;
-
- irq = platform_get_irq(dev, 0);
- if (irq < 0) {
- err = irq;
- goto err_free_device;
- }
-
- err = gpio_request(dev->id, "ixp4-beeper");
- if (err)
- goto err_free_device;
-
- err = request_irq(irq, &ixp4xx_spkr_interrupt,
- IRQF_NO_SUSPEND, "ixp4xx-beeper",
- (void *) dev->id);
- if (err)
- goto err_free_gpio;
- ixp4xx_timer2_irq = irq;
-
- err = input_register_device(input_dev);
- if (err)
- goto err_free_irq;
-
- platform_set_drvdata(dev, input_dev);
-
- return 0;
-
- err_free_irq:
- free_irq(irq, (void *)dev->id);
- err_free_gpio:
- gpio_free(dev->id);
- err_free_device:
- input_free_device(input_dev);
-
- return err;
-}
-
-static int ixp4xx_spkr_remove(struct platform_device *dev)
-{
- struct input_dev *input_dev = platform_get_drvdata(dev);
- unsigned int pin = (unsigned int) input_get_drvdata(input_dev);
-
- input_unregister_device(input_dev);
-
- /* turn the speaker off */
- disable_irq(ixp4xx_timer2_irq);
- ixp4xx_spkr_control(pin, 0);
-
- free_irq(ixp4xx_timer2_irq, (void *)dev->id);
- gpio_free(dev->id);
-
- return 0;
-}
-
-static void ixp4xx_spkr_shutdown(struct platform_device *dev)
-{
- struct input_dev *input_dev = platform_get_drvdata(dev);
- unsigned int pin = (unsigned int) input_get_drvdata(input_dev);
-
- /* turn off the speaker */
- disable_irq(ixp4xx_timer2_irq);
- ixp4xx_spkr_control(pin, 0);
-}
-
-static struct platform_driver ixp4xx_spkr_platform_driver = {
- .driver = {
- .name = "ixp4xx-beeper",
- },
- .probe = ixp4xx_spkr_probe,
- .remove = ixp4xx_spkr_remove,
- .shutdown = ixp4xx_spkr_shutdown,
-};
-module_platform_driver(ixp4xx_spkr_platform_driver);
-
}
if (pwrkey->data->supports_ps_hold_poff_config) {
- pwrkey->reboot_notifier.notifier_call = pm8941_reboot_notify,
+ pwrkey->reboot_notifier.notifier_call = pm8941_reboot_notify;
error = register_reboot_notifier(&pwrkey->reboot_notifier);
if (error) {
dev_err(&pdev->dev, "failed to register reboot notifier: %d\n",
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Power key driver for SiRF PrimaII
- *
- * Copyright (c) 2013 - 2014 Cambridge Silicon Radio Limited, a CSR plc group
- * company.
- */
-
-#include <linux/module.h>
-#include <linux/interrupt.h>
-#include <linux/delay.h>
-#include <linux/platform_device.h>
-#include <linux/input.h>
-#include <linux/rtc/sirfsoc_rtciobrg.h>
-#include <linux/of.h>
-#include <linux/workqueue.h>
-
-struct sirfsoc_pwrc_drvdata {
- u32 pwrc_base;
- struct input_dev *input;
- struct delayed_work work;
-};
-
-#define PWRC_ON_KEY_BIT (1 << 0)
-
-#define PWRC_INT_STATUS 0xc
-#define PWRC_INT_MASK 0x10
-#define PWRC_PIN_STATUS 0x14
-#define PWRC_KEY_DETECT_UP_TIME 20 /* ms*/
-
-static int sirfsoc_pwrc_is_on_key_down(struct sirfsoc_pwrc_drvdata *pwrcdrv)
-{
- u32 state = sirfsoc_rtc_iobrg_readl(pwrcdrv->pwrc_base +
- PWRC_PIN_STATUS);
- return !(state & PWRC_ON_KEY_BIT); /* ON_KEY is active low */
-}
-
-static void sirfsoc_pwrc_report_event(struct work_struct *work)
-{
- struct sirfsoc_pwrc_drvdata *pwrcdrv =
- container_of(work, struct sirfsoc_pwrc_drvdata, work.work);
-
- if (sirfsoc_pwrc_is_on_key_down(pwrcdrv)) {
- schedule_delayed_work(&pwrcdrv->work,
- msecs_to_jiffies(PWRC_KEY_DETECT_UP_TIME));
- } else {
- input_event(pwrcdrv->input, EV_KEY, KEY_POWER, 0);
- input_sync(pwrcdrv->input);
- }
-}
-
-static irqreturn_t sirfsoc_pwrc_isr(int irq, void *dev_id)
-{
- struct sirfsoc_pwrc_drvdata *pwrcdrv = dev_id;
- u32 int_status;
-
- int_status = sirfsoc_rtc_iobrg_readl(pwrcdrv->pwrc_base +
- PWRC_INT_STATUS);
- sirfsoc_rtc_iobrg_writel(int_status & ~PWRC_ON_KEY_BIT,
- pwrcdrv->pwrc_base + PWRC_INT_STATUS);
-
- input_event(pwrcdrv->input, EV_KEY, KEY_POWER, 1);
- input_sync(pwrcdrv->input);
- schedule_delayed_work(&pwrcdrv->work,
- msecs_to_jiffies(PWRC_KEY_DETECT_UP_TIME));
-
- return IRQ_HANDLED;
-}
-
-static void sirfsoc_pwrc_toggle_interrupts(struct sirfsoc_pwrc_drvdata *pwrcdrv,
- bool enable)
-{
- u32 int_mask;
-
- int_mask = sirfsoc_rtc_iobrg_readl(pwrcdrv->pwrc_base + PWRC_INT_MASK);
- if (enable)
- int_mask |= PWRC_ON_KEY_BIT;
- else
- int_mask &= ~PWRC_ON_KEY_BIT;
- sirfsoc_rtc_iobrg_writel(int_mask, pwrcdrv->pwrc_base + PWRC_INT_MASK);
-}
-
-static int sirfsoc_pwrc_open(struct input_dev *input)
-{
- struct sirfsoc_pwrc_drvdata *pwrcdrv = input_get_drvdata(input);
-
- sirfsoc_pwrc_toggle_interrupts(pwrcdrv, true);
-
- return 0;
-}
-
-static void sirfsoc_pwrc_close(struct input_dev *input)
-{
- struct sirfsoc_pwrc_drvdata *pwrcdrv = input_get_drvdata(input);
-
- sirfsoc_pwrc_toggle_interrupts(pwrcdrv, false);
- cancel_delayed_work_sync(&pwrcdrv->work);
-}
-
-static const struct of_device_id sirfsoc_pwrc_of_match[] = {
- { .compatible = "sirf,prima2-pwrc" },
- {},
-};
-MODULE_DEVICE_TABLE(of, sirfsoc_pwrc_of_match);
-
-static int sirfsoc_pwrc_probe(struct platform_device *pdev)
-{
- struct device_node *np = pdev->dev.of_node;
- struct sirfsoc_pwrc_drvdata *pwrcdrv;
- int irq;
- int error;
-
- pwrcdrv = devm_kzalloc(&pdev->dev, sizeof(struct sirfsoc_pwrc_drvdata),
- GFP_KERNEL);
- if (!pwrcdrv) {
- dev_info(&pdev->dev, "Not enough memory for the device data\n");
- return -ENOMEM;
- }
-
- /*
- * We can't use of_iomap because pwrc is not mapped in memory,
- * the so-called base address is only offset in rtciobrg
- */
- error = of_property_read_u32(np, "reg", &pwrcdrv->pwrc_base);
- if (error) {
- dev_err(&pdev->dev,
- "unable to find base address of pwrc node in dtb\n");
- return error;
- }
-
- pwrcdrv->input = devm_input_allocate_device(&pdev->dev);
- if (!pwrcdrv->input)
- return -ENOMEM;
-
- pwrcdrv->input->name = "sirfsoc pwrckey";
- pwrcdrv->input->phys = "pwrc/input0";
- pwrcdrv->input->evbit[0] = BIT_MASK(EV_KEY);
- input_set_capability(pwrcdrv->input, EV_KEY, KEY_POWER);
-
- INIT_DELAYED_WORK(&pwrcdrv->work, sirfsoc_pwrc_report_event);
-
- pwrcdrv->input->open = sirfsoc_pwrc_open;
- pwrcdrv->input->close = sirfsoc_pwrc_close;
-
- input_set_drvdata(pwrcdrv->input, pwrcdrv);
-
- /* Make sure the device is quiesced */
- sirfsoc_pwrc_toggle_interrupts(pwrcdrv, false);
-
- irq = platform_get_irq(pdev, 0);
- error = devm_request_irq(&pdev->dev, irq,
- sirfsoc_pwrc_isr, 0,
- "sirfsoc_pwrc_int", pwrcdrv);
- if (error) {
- dev_err(&pdev->dev, "unable to claim irq %d, error: %d\n",
- irq, error);
- return error;
- }
-
- error = input_register_device(pwrcdrv->input);
- if (error) {
- dev_err(&pdev->dev,
- "unable to register input device, error: %d\n",
- error);
- return error;
- }
-
- dev_set_drvdata(&pdev->dev, pwrcdrv);
- device_init_wakeup(&pdev->dev, 1);
-
- return 0;
-}
-
-static int __maybe_unused sirfsoc_pwrc_resume(struct device *dev)
-{
- struct sirfsoc_pwrc_drvdata *pwrcdrv = dev_get_drvdata(dev);
- struct input_dev *input = pwrcdrv->input;
-
- /*
- * Do not mask pwrc interrupt as we want pwrc work as a wakeup source
- * if users touch X_ONKEY_B, see arch/arm/mach-prima2/pm.c
- */
- mutex_lock(&input->mutex);
- if (input_device_enabled(input))
- sirfsoc_pwrc_toggle_interrupts(pwrcdrv, true);
- mutex_unlock(&input->mutex);
-
- return 0;
-}
-
-static SIMPLE_DEV_PM_OPS(sirfsoc_pwrc_pm_ops, NULL, sirfsoc_pwrc_resume);
-
-static struct platform_driver sirfsoc_pwrc_driver = {
- .probe = sirfsoc_pwrc_probe,
- .driver = {
- .name = "sirfsoc-pwrc",
- .pm = &sirfsoc_pwrc_pm_ops,
- .of_match_table = sirfsoc_pwrc_of_match,
- }
-};
-
-module_platform_driver(sirfsoc_pwrc_driver);
-
-MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Binghua Duan <Binghua.Duan@csr.com>, Xianglong Du <Xianglong.Du@csr.com>");
-MODULE_DESCRIPTION("CSR Prima2 PWRC Driver");
-MODULE_ALIAS("platform:sirfsoc-pwrc");
#define ETP_FW_PAGE_SIZE_512 512
#define ETP_FW_SIGNATURE_SIZE 6
-#define ETP_PRODUCT_ID_DELBIN 0x00C2
+#define ETP_PRODUCT_ID_WHITEBOX 0x00B8
#define ETP_PRODUCT_ID_VOXEL 0x00BF
+#define ETP_PRODUCT_ID_DELBIN 0x00C2
#define ETP_PRODUCT_ID_MAGPIE 0x0120
#define ETP_PRODUCT_ID_BOBBA 0x0121
u32 quirks;
} elan_i2c_quirks[] = {
{ 0x0D, ETP_PRODUCT_ID_DELBIN, ETP_QUIRK_QUICK_WAKEUP },
+ { 0x0D, ETP_PRODUCT_ID_WHITEBOX, ETP_QUIRK_QUICK_WAKEUP },
{ 0x10, ETP_PRODUCT_ID_VOXEL, ETP_QUIRK_QUICK_WAKEUP },
{ 0x14, ETP_PRODUCT_ID_MAGPIE, ETP_QUIRK_QUICK_WAKEUP },
{ 0x14, ETP_PRODUCT_ID_BOBBA, ETP_QUIRK_QUICK_WAKEUP },
.detach = parkbd_detach,
.devmodel = true,
};
-
-static int __init parkbd_init(void)
-{
- return parport_register_driver(&parkbd_parport_driver);
-}
-
-static void __exit parkbd_exit(void)
-{
- parport_unregister_driver(&parkbd_parport_driver);
-}
-
-module_init(parkbd_init);
-module_exit(parkbd_exit);
+module_parport_driver(parkbd_parport_driver);
- JASTEC USB Touch Controller/DigiTech DTR-02U
- Zytronic controllers
- Elo TouchSystems 2700 IntelliTouch
- - EasyTouch USB Touch Controller from Data Modul
+ - EasyTouch USB Touch Controller from Data Module
- e2i (Mimo monitors)
Have a look at <http://linux.chapter7.ch/touchkit/> for
* the identification registers.
*/
switch (rdbuf[0]) {
+ case 0x11: /* EDT EP0110M09 */
case 0x35: /* EDT EP0350M09 */
case 0x43: /* EDT EP0430M09 */
case 0x50: /* EDT EP0500M09 */
enum mms_type {
TYPE_MMS114 = 114,
+ TYPE_MMS134S = 134,
TYPE_MMS136 = 136,
TYPE_MMS152 = 152,
TYPE_MMS345L = 345,
goto out;
/* MMS136 has slightly different event size */
- if (data->type == TYPE_MMS136)
+ if (data->type == TYPE_MMS134S || data->type == TYPE_MMS136)
touch_size = packet_size / MMS136_EVENT_SIZE;
else
touch_size = packet_size / MMS114_EVENT_SIZE;
break;
case TYPE_MMS114:
+ case TYPE_MMS134S:
case TYPE_MMS136:
error = __mms114_read_reg(data, MMS114_TSP_REV, 6, buf);
if (error)
if (error < 0)
return error;
- /* Only MMS114 and MMS136 have configuration and power on registers */
- if (data->type != TYPE_MMS114 && data->type != TYPE_MMS136)
+ /* MMS114, MMS134S and MMS136 have configuration and power on registers */
+ if (data->type != TYPE_MMS114 && data->type != TYPE_MMS134S &&
+ data->type != TYPE_MMS136)
return 0;
error = mms114_set_active(data, true);
0, data->props.max_y, 0, 0);
}
- if (data->type == TYPE_MMS114 || data->type == TYPE_MMS136) {
+ if (data->type == TYPE_MMS114 || data->type == TYPE_MMS134S ||
+ data->type == TYPE_MMS136) {
/*
* The firmware handles movement and pressure fuzz, so
* don't duplicate that in software.
{
.compatible = "melfas,mms114",
.data = (void *)TYPE_MMS114,
+ }, {
+ .compatible = "melfas,mms134s",
+ .data = (void *)TYPE_MMS134S,
}, {
.compatible = "melfas,mms136",
.data = (void *)TYPE_MMS136,
return new_iova->pfn_lo;
}
+EXPORT_SYMBOL_GPL(alloc_iova_fast);
/**
* free_iova_fast - free iova pfn range into rcache
free_iova(iovad, pfn);
}
+EXPORT_SYMBOL_GPL(free_iova_fast);
#define fq_ring_for_each(i, fq) \
for ((i) = (fq)->head; (i) != (fq)->tail; (i) = ((i) + 1) % IOVA_FQ_SIZE)
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/debugfs.h>
+#include <linux/utsname.h>
#define DEFAULT_COUNT 10
MODULE_PARM_DESC(cpoint_count, " Crash Point Count, number of times the "\
"crash point is to be hit to trigger action");
+/* For test debug reporting. */
+char *lkdtm_kernel_info;
/* Return the crashtype number or NULL if the name is invalid */
static const struct crashtype *find_crashtype(const char *name)
crash_count = cpoint_count;
#endif
+ /* Common initialization. */
+ lkdtm_kernel_info = kasprintf(GFP_KERNEL, "kernel (%s %s)",
+ init_uts_ns.name.release,
+ init_uts_ns.name.machine);
+
/* Handle test-specific initialization. */
lkdtm_bugs_init(&recur_count);
lkdtm_perms_init();
if (lkdtm_kprobe != NULL)
unregister_kprobe(lkdtm_kprobe);
+ kfree(lkdtm_kernel_info);
+
pr_info("Crash point unregistered\n");
}
#define pr_fmt(fmt) "lkdtm: " fmt
#include <linux/kernel.h>
-#include <generated/compile.h>
-#include <generated/utsrelease.h>
-#define LKDTM_KERNEL "kernel (" UTS_RELEASE " " UTS_MACHINE ")"
+extern char *lkdtm_kernel_info;
#define pr_expected_config(kconfig) \
{ \
if (IS_ENABLED(kconfig)) \
- pr_err("Unexpected! This " LKDTM_KERNEL " was built with " #kconfig "=y\n"); \
+ pr_err("Unexpected! This %s was built with " #kconfig "=y\n", \
+ lkdtm_kernel_info); \
else \
- pr_warn("This is probably expected, since this " LKDTM_KERNEL " was built *without* " #kconfig "=y\n"); \
+ pr_warn("This is probably expected, since this %s was built *without* " #kconfig "=y\n", \
+ lkdtm_kernel_info); \
}
#ifndef MODULE
if (IS_ENABLED(kconfig)) { \
switch (lkdtm_check_bool_cmdline(param)) { \
case 0: \
- pr_warn("This is probably expected, since this " LKDTM_KERNEL " was built with " #kconfig "=y but booted with '" param "=N'\n"); \
+ pr_warn("This is probably expected, since this %s was built with " #kconfig "=y but booted with '" param "=N'\n", \
+ lkdtm_kernel_info); \
break; \
case 1: \
- pr_err("Unexpected! This " LKDTM_KERNEL " was built with " #kconfig "=y and booted with '" param "=Y'\n"); \
+ pr_err("Unexpected! This %s was built with " #kconfig "=y and booted with '" param "=Y'\n", \
+ lkdtm_kernel_info); \
break; \
default: \
- pr_err("Unexpected! This " LKDTM_KERNEL " was built with " #kconfig "=y (and booted without '" param "' specified)\n"); \
+ pr_err("Unexpected! This %s was built with " #kconfig "=y (and booted without '" param "' specified)\n", \
+ lkdtm_kernel_info); \
} \
} else { \
switch (lkdtm_check_bool_cmdline(param)) { \
case 0: \
- pr_warn("This is probably expected, as this " LKDTM_KERNEL " was built *without* " #kconfig "=y and booted with '" param "=N'\n"); \
+ pr_warn("This is probably expected, as this %s was built *without* " #kconfig "=y and booted with '" param "=N'\n", \
+ lkdtm_kernel_info); \
break; \
case 1: \
- pr_err("Unexpected! This " LKDTM_KERNEL " was built *without* " #kconfig "=y but booted with '" param "=Y'\n"); \
+ pr_err("Unexpected! This %s was built *without* " #kconfig "=y but booted with '" param "=Y'\n", \
+ lkdtm_kernel_info); \
break; \
default: \
- pr_err("This is probably expected, since this " LKDTM_KERNEL " was built *without* " #kconfig "=y (and booted without '" param "' specified)\n"); \
+ pr_err("This is probably expected, since this %s was built *without* " #kconfig "=y (and booted without '" param "' specified)\n", \
+ lkdtm_kernel_info); \
break; \
} \
} \
static void nvme_put_subsystem(struct nvme_subsystem *subsys);
static void nvme_remove_invalid_namespaces(struct nvme_ctrl *ctrl,
unsigned nsid);
+static void nvme_update_keep_alive(struct nvme_ctrl *ctrl,
+ struct nvme_command *cmd);
/*
* Prepare a queue for teardown.
return effects;
}
-static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
+static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects,
+ struct nvme_command *cmd, int status)
{
if (effects & NVME_CMD_EFFECTS_CSE_MASK) {
nvme_unfreeze(ctrl);
nvme_queue_scan(ctrl);
flush_work(&ctrl->scan_work);
}
+
+ switch (cmd->common.opcode) {
+ case nvme_admin_set_features:
+ switch (le32_to_cpu(cmd->common.cdw10) & 0xFF) {
+ case NVME_FEAT_KATO:
+ /*
+ * Keep alive commands interval on the host should be
+ * updated when KATO is modified by Set Features
+ * commands.
+ */
+ if (!status)
+ nvme_update_keep_alive(ctrl, cmd);
+ break;
+ default:
+ break;
+ }
+ break;
+ default:
+ break;
+ }
}
int nvme_execute_passthru_rq(struct request *rq)
effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode);
ret = nvme_execute_rq(disk, rq, false);
if (effects) /* nothing to be done for zero cmd effects */
- nvme_passthru_end(ctrl, effects);
+ nvme_passthru_end(ctrl, effects, cmd, ret);
return ret;
}
}
EXPORT_SYMBOL_GPL(nvme_stop_keep_alive);
+static void nvme_update_keep_alive(struct nvme_ctrl *ctrl,
+ struct nvme_command *cmd)
+{
+ unsigned int new_kato =
+ DIV_ROUND_UP(le32_to_cpu(cmd->common.cdw11), 1000);
+
+ dev_info(ctrl->device,
+ "keep alive interval updated from %u ms to %u ms\n",
+ ctrl->kato * 1000 / 2, new_kato * 1000 / 2);
+
+ nvme_stop_keep_alive(ctrl);
+ ctrl->kato = new_kato;
+ nvme_start_keep_alive(ctrl);
+}
+
/*
* In NVMe 1.0 the CNS field was just a binary controller or namespace
* flag, thus sending any new CNS opcodes has a big chance of not working.
return error;
}
-static bool nvme_multi_css(struct nvme_ctrl *ctrl)
-{
- return (ctrl->ctrl_config & NVME_CC_CSS_MASK) == NVME_CC_CSS_CSI;
-}
-
static int nvme_process_ns_desc(struct nvme_ctrl *ctrl, struct nvme_ns_ids *ids,
struct nvme_ns_id_desc *cur, bool *csi_seen)
{
goto out_unfreeze;
}
+ set_bit(NVME_NS_READY, &ns->flags);
blk_mq_unfreeze_queue(ns->disk->queue);
if (blk_queue_is_zoned(ns->queue)) {
if (nvme_ns_head_multipath(ns->head)) {
blk_mq_freeze_queue(ns->head->disk->queue);
nvme_update_disk_info(ns->head->disk, ns, id);
+ nvme_mpath_revalidate_paths(ns);
blk_stack_limits(&ns->head->disk->queue->limits,
&ns->queue->limits, 0);
disk_update_readahead(ns->head->disk);
nvme_get_ctrl(ctrl);
- device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups);
+ if (device_add_disk(ctrl->device, ns->disk, nvme_ns_id_attr_groups))
+ goto out_cleanup_ns_from_list;
+
if (!nvme_ns_head_multipath(ns->head))
nvme_add_ns_cdev(ns);
return;
+ out_cleanup_ns_from_list:
+ nvme_put_ctrl(ctrl);
+ down_write(&ctrl->namespaces_rwsem);
+ list_del_init(&ns->list);
+ up_write(&ctrl->namespaces_rwsem);
out_unlink_ns:
mutex_lock(&ctrl->subsys->lock);
list_del_rcu(&ns->siblings);
if (test_and_set_bit(NVME_NS_REMOVING, &ns->flags))
return;
+ clear_bit(NVME_NS_READY, &ns->flags);
set_capacity(ns->disk, 0);
nvme_fault_inject_fini(&ns->fault_inject);
list_del_rcu(&ns->siblings);
mutex_unlock(&ns->ctrl->subsys->lock);
- synchronize_rcu(); /* guarantee not available in head->list */
- nvme_mpath_clear_current_path(ns);
- synchronize_srcu(&ns->head->srcu); /* wait for concurrent submissions */
+ /* guarantee not available in head->list */
+ synchronize_rcu();
+
+ /* wait for concurrent submissions */
+ if (nvme_mpath_clear_current_path(ns))
+ synchronize_srcu(&ns->head->srcu);
if (!nvme_ns_head_multipath(ns->head))
nvme_cdev_del(&ns->cdev, &ns->cdev_device);
mutex_unlock(&ctrl->scan_lock);
}
+void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
+{
+ struct nvme_ns_head *head = ns->head;
+ sector_t capacity = get_capacity(head->disk);
+ int node;
+
+ list_for_each_entry_rcu(ns, &head->list, siblings) {
+ if (capacity != get_capacity(ns->disk))
+ clear_bit(NVME_NS_READY, &ns->flags);
+ }
+
+ for_each_node(node)
+ rcu_assign_pointer(head->current_path[node], NULL);
+}
+
static bool nvme_path_is_disabled(struct nvme_ns *ns)
{
/*
ns->ctrl->state != NVME_CTRL_DELETING)
return true;
if (test_bit(NVME_NS_ANA_PENDING, &ns->flags) ||
- test_bit(NVME_NS_REMOVING, &ns->flags))
+ !test_bit(NVME_NS_READY, &ns->flags))
return true;
return false;
}
ctrl->subsys->instance, head->instance);
blk_queue_flag_set(QUEUE_FLAG_NONROT, head->disk->queue);
+ blk_queue_flag_set(QUEUE_FLAG_NOWAIT, head->disk->queue);
+
/* set to a default value of 512 until the disk is validated */
blk_queue_logical_block_size(head->disk->queue, 512);
blk_set_stacking_limits(&head->disk->queue->limits);
#define NVME_NS_DEAD 1
#define NVME_NS_ANA_PENDING 2
#define NVME_NS_FORCE_RO 3
+#define NVME_NS_READY 4
struct cdev cdev;
struct device cdev_device;
void nvme_mpath_uninit(struct nvme_ctrl *ctrl);
void nvme_mpath_stop(struct nvme_ctrl *ctrl);
bool nvme_mpath_clear_current_path(struct nvme_ns *ns);
+void nvme_mpath_revalidate_paths(struct nvme_ns *ns);
void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl);
void nvme_mpath_shutdown_disk(struct nvme_ns_head *head);
{
return false;
}
+static inline void nvme_mpath_revalidate_paths(struct nvme_ns *ns)
+{
+}
static inline void nvme_mpath_clear_ctrl_paths(struct nvme_ctrl *ctrl)
{
}
struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid);
void nvme_put_ns(struct nvme_ns *ns);
+static inline bool nvme_multi_css(struct nvme_ctrl *ctrl)
+{
+ return (ctrl->ctrl_config & NVME_CC_CSS_MASK) == NVME_CC_CSS_CSI;
+}
+
#endif /* _NVME_H */
u32 pdu_len;
u32 pdu_sent;
u16 ttag;
+ __le16 status;
struct list_head entry;
struct llist_node lentry;
__le32 ddgst;
static int nvme_tcp_process_nvme_cqe(struct nvme_tcp_queue *queue,
struct nvme_completion *cqe)
{
+ struct nvme_tcp_request *req;
struct request *rq;
rq = nvme_find_rq(nvme_tcp_tagset(queue), cqe->command_id);
return -EINVAL;
}
- if (!nvme_try_complete_req(rq, cqe->status, cqe->result))
+ req = blk_mq_rq_to_pdu(rq);
+ if (req->status == cpu_to_le16(NVME_SC_SUCCESS))
+ req->status = cqe->status;
+
+ if (!nvme_try_complete_req(rq, req->status, cqe->result))
nvme_complete_rq(rq);
queue->nr_cqe++;
queue->ddgst_remaining = NVME_TCP_DIGEST_LENGTH;
} else {
if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
- nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
+ nvme_tcp_end_request(rq,
+ le16_to_cpu(req->status));
queue->nr_cqe++;
}
nvme_tcp_init_recv_ctx(queue);
return 0;
if (queue->recv_ddgst != queue->exp_ddgst) {
+ struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue),
+ pdu->command_id);
+ struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
+
+ req->status = cpu_to_le16(NVME_SC_DATA_XFER_ERROR);
+
dev_err(queue->ctrl->ctrl.device,
"data digest error: recv %#x expected %#x\n",
le32_to_cpu(queue->recv_ddgst),
le32_to_cpu(queue->exp_ddgst));
- return -EIO;
}
if (pdu->hdr.flags & NVME_TCP_F_DATA_SUCCESS) {
struct request *rq = nvme_cid_to_rq(nvme_tcp_tagset(queue),
pdu->command_id);
+ struct nvme_tcp_request *req = blk_mq_rq_to_pdu(rq);
- nvme_tcp_end_request(rq, NVME_SC_SUCCESS);
+ nvme_tcp_end_request(rq, le16_to_cpu(req->status));
queue->nr_cqe++;
}
return ret;
req->state = NVME_TCP_SEND_CMD_PDU;
+ req->status = cpu_to_le16(NVME_SC_SUCCESS);
req->offset = 0;
req->data_sent = 0;
req->pdu_len = 0;
if (unlikely(ret))
return ret;
- if (nvmet_req_passthru_ctrl(req))
+ if (nvmet_is_passthru_req(req))
return nvmet_parse_passthru_admin_cmd(req);
switch (cmd->common.opcode) {
}
/* passthru subsystems use the underlying controller's version */
- if (nvmet_passthru_ctrl(subsys))
+ if (nvmet_is_passthru_subsys(subsys))
return -EINVAL;
ret = sscanf(page, "%d.%d.%d\n", &major, &minor, &tertiary);
{
struct nvmet_subsys *subsys = to_subsys(item);
- return snprintf(page, PAGE_SIZE, "%s\n", subsys->serial);
+ return snprintf(page, PAGE_SIZE, "%*s\n",
+ NVMET_SN_MAX_SIZE, subsys->serial);
}
static ssize_t
mutex_lock(&subsys->lock);
ret = 0;
- if (nvmet_passthru_ctrl(subsys)) {
+ if (nvmet_is_passthru_subsys(subsys)) {
pr_info("cannot enable both passthru and regular namespaces for a single subsystem");
goto out_unlock;
}
if (unlikely(ret))
return ret;
- if (nvmet_req_passthru_ctrl(req))
+ if (nvmet_is_passthru_req(req))
return nvmet_parse_passthru_io_cmd(req);
ret = nvmet_req_find_ns(req);
ctrl->cap |= (15ULL << 24);
/* maximum queue entries supported: */
ctrl->cap |= NVMET_QUEUE_SIZE - 1;
+
+ if (nvmet_is_passthru_subsys(ctrl->subsys))
+ nvmet_passthrough_override_cap(ctrl);
}
struct nvmet_ctrl *nvmet_ctrl_find_get(const char *subsysnqn,
goto out_put_subsystem;
mutex_init(&ctrl->lock);
- nvmet_init_cap(ctrl);
-
ctrl->port = req->port;
INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
kref_init(&ctrl->ref);
ctrl->subsys = subsys;
+ nvmet_init_cap(ctrl);
WRITE_ONCE(ctrl->aen_enabled, NVMET_AEN_CFG_OPTIONAL);
ctrl->changed_ns_list = kmalloc_array(NVME_MAX_CHANGED_NAMESPACES,
void nvmet_passthru_ctrl_disable(struct nvmet_subsys *subsys);
u16 nvmet_parse_passthru_admin_cmd(struct nvmet_req *req);
u16 nvmet_parse_passthru_io_cmd(struct nvmet_req *req);
-static inline struct nvme_ctrl *nvmet_passthru_ctrl(struct nvmet_subsys *subsys)
+static inline bool nvmet_is_passthru_subsys(struct nvmet_subsys *subsys)
{
return subsys->passthru_ctrl;
}
{
return 0;
}
-static inline struct nvme_ctrl *nvmet_passthru_ctrl(struct nvmet_subsys *subsys)
+static inline bool nvmet_is_passthru_subsys(struct nvmet_subsys *subsys)
{
return NULL;
}
#endif /* CONFIG_NVME_TARGET_PASSTHRU */
-static inline struct nvme_ctrl *
-nvmet_req_passthru_ctrl(struct nvmet_req *req)
+static inline bool nvmet_is_passthru_req(struct nvmet_req *req)
{
- return nvmet_passthru_ctrl(nvmet_req_subsys(req));
+ return nvmet_is_passthru_subsys(nvmet_req_subsys(req));
}
+void nvmet_passthrough_override_cap(struct nvmet_ctrl *ctrl);
+
u16 errno_to_nvme_status(struct nvmet_req *req, int errno);
u16 nvmet_report_invalid_opcode(struct nvmet_req *req);
*/
static DEFINE_XARRAY(passthru_subsystems);
+void nvmet_passthrough_override_cap(struct nvmet_ctrl *ctrl)
+{
+ /*
+ * Multiple command set support can only be declared if the underlying
+ * controller actually supports it.
+ */
+ if (!nvme_multi_css(ctrl->subsys->passthru_ctrl))
+ ctrl->cap &= ~(1ULL << 43);
+}
+
static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
static void nvmet_passthru_execute_cmd(struct nvmet_req *req)
{
- struct nvme_ctrl *ctrl = nvmet_req_passthru_ctrl(req);
+ struct nvme_ctrl *ctrl = nvmet_req_subsys(req)->passthru_ctrl;
struct request_queue *q = ctrl->admin_q;
struct nvme_ns *ns = NULL;
struct request *rq = NULL;
*/
static void nvmet_passthru_set_host_behaviour(struct nvmet_req *req)
{
- struct nvme_ctrl *ctrl = nvmet_req_passthru_ctrl(req);
+ struct nvme_ctrl *ctrl = nvmet_req_subsys(req)->passthru_ctrl;
struct nvme_feat_host_behavior *host;
u16 status = NVME_SC_INTERNAL;
int ret;
struct property *p;
struct device_node *con_np = to_of_node(fwnode);
+ if (IS_ENABLED(CONFIG_X86))
+ return 0;
+
if (!con_np)
return -EINVAL;
config PWM_JZ4740
tristate "Ingenic JZ47xx PWM support"
- depends on MIPS
+ depends on MIPS || COMPILE_TEST
depends on COMMON_CLK
select MFD_SYSCON
help
config PWM_KEEMBAY
tristate "Intel Keem Bay PWM driver"
- depends on ARCH_KEEMBAY || (ARM64 && COMPILE_TEST)
+ depends on ARCH_KEEMBAY || COMPILE_TEST
+ depends on COMMON_CLK && HAS_IOMEM
help
The platform driver for Intel Keem Bay PWM controller.
*
* Returns: 0 on success or a negative error code on failure.
*/
-int pwmchip_remove(struct pwm_chip *chip)
+void pwmchip_remove(struct pwm_chip *chip)
{
pwmchip_sysfs_unexport(chip);
free_pwms(chip);
mutex_unlock(&pwm_lock);
-
- return 0;
}
EXPORT_SYMBOL_GPL(pwmchip_remove);
struct ab8500_pwm_chip {
struct pwm_chip chip;
+ unsigned int hwid;
};
+static struct ab8500_pwm_chip *ab8500_pwm_from_chip(struct pwm_chip *chip)
+{
+ return container_of(chip, struct ab8500_pwm_chip, chip);
+}
+
static int ab8500_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
const struct pwm_state *state)
{
int ret;
u8 reg;
unsigned int higher_val, lower_val;
+ struct ab8500_pwm_chip *ab8500 = ab8500_pwm_from_chip(chip);
if (state->polarity != PWM_POLARITY_NORMAL)
return -EINVAL;
if (!state->enabled) {
ret = abx500_mask_and_set_register_interruptible(chip->dev,
AB8500_MISC, AB8500_PWM_OUT_CTRL7_REG,
- 1 << (chip->base - 1), 0);
+ 1 << ab8500->hwid, 0);
if (ret < 0)
dev_err(chip->dev, "%s: Failed to disable PWM, Error %d\n",
*/
higher_val = ((state->duty_cycle & 0x0300) >> 8);
- reg = AB8500_PWM_OUT_CTRL1_REG + ((chip->base - 1) * 2);
+ reg = AB8500_PWM_OUT_CTRL1_REG + (ab8500->hwid * 2);
ret = abx500_set_register_interruptible(chip->dev, AB8500_MISC,
reg, (u8)lower_val);
ret = abx500_mask_and_set_register_interruptible(chip->dev,
AB8500_MISC, AB8500_PWM_OUT_CTRL7_REG,
- 1 << (chip->base - 1), 1 << (chip->base - 1));
+ 1 << ab8500->hwid, 1 << ab8500->hwid);
if (ret < 0)
dev_err(chip->dev, "%s: Failed to enable PWM, Error %d\n",
pwm->label, ret);
struct ab8500_pwm_chip *ab8500;
int err;
+ if (pdev->id < 1 || pdev->id > 31)
+ return dev_err_probe(&pdev->dev, EINVAL, "Invalid device id %d\n", pdev->id);
+
/*
* Nothing to be done in probe, this is required to get the
* device which is required for ab8500 read and write
ab8500->chip.dev = &pdev->dev;
ab8500->chip.ops = &ab8500_pwm_ops;
ab8500->chip.npwm = 1;
+ ab8500->hwid = pdev->id - 1;
- err = pwmchip_add(&ab8500->chip);
+ err = devm_pwmchip_add(&pdev->dev, &ab8500->chip);
if (err < 0)
return dev_err_probe(&pdev->dev, err, "Failed to add pwm chip\n");
dev_dbg(&pdev->dev, "pwm probe successful\n");
- platform_set_drvdata(pdev, ab8500);
-
- return 0;
-}
-
-static int ab8500_pwm_remove(struct platform_device *pdev)
-{
- struct ab8500_pwm_chip *ab8500 = platform_get_drvdata(pdev);
- int err;
-
- err = pwmchip_remove(&ab8500->chip);
- if (err < 0)
- return err;
-
- dev_dbg(&pdev->dev, "pwm driver removed\n");
return 0;
}
.name = "ab8500-pwm",
},
.probe = ab8500_pwm_probe,
- .remove = ab8500_pwm_remove,
};
module_platform_driver(ab8500_pwm_driver);
static int atmel_hlcdc_pwm_remove(struct platform_device *pdev)
{
struct atmel_hlcdc_pwm *chip = platform_get_drvdata(pdev);
- int ret;
- ret = pwmchip_remove(&chip->chip);
- if (ret)
- return ret;
+ pwmchip_remove(&chip->chip);
clk_disable_unprepare(chip->hlcdc->periph_clk);
static int atmel_tcb_pwm_remove(struct platform_device *pdev)
{
struct atmel_tcb_pwm_chip *tcbpwm = platform_get_drvdata(pdev);
- int err;
- err = pwmchip_remove(&tcbpwm->chip);
- if (err < 0)
- return err;
+ pwmchip_remove(&tcbpwm->chip);
clk_disable_unprepare(tcbpwm->slow_clk);
clk_put(tcbpwm->slow_clk);
void __iomem *base;
const struct atmel_pwm_data *data;
- unsigned int updated_pwms;
- /* ISR is cleared when read, ensure only one thread does that */
- struct mutex isr_lock;
+ /*
+ * The hardware supports a mechanism to update a channel's duty cycle at
+ * the end of the currently running period. When such an update is
+ * pending we delay disabling the PWM until the new configuration is
+ * active because otherwise pmw_config(duty_cycle=0); pwm_disable();
+ * might not result in an inactive output.
+ * This bitmask tracks for which channels an update is pending in
+ * hardware.
+ */
+ u32 update_pending;
+
+ /* Protects .update_pending */
+ spinlock_t lock;
};
static inline struct atmel_pwm_chip *to_atmel_pwm_chip(struct pwm_chip *chip)
atmel_pwm_writel(chip, base + offset, val);
}
+static void atmel_pwm_update_pending(struct atmel_pwm_chip *chip)
+{
+ /*
+ * Each channel that has its bit in ISR set started a new period since
+ * ISR was cleared and so there is no more update pending. Note that
+ * reading ISR clears it, so this needs to handle all channels to not
+ * loose information.
+ */
+ u32 isr = atmel_pwm_readl(chip, PWM_ISR);
+
+ chip->update_pending &= ~isr;
+}
+
+static void atmel_pwm_set_pending(struct atmel_pwm_chip *chip, unsigned int ch)
+{
+ spin_lock(&chip->lock);
+
+ /*
+ * Clear pending flags in hardware because otherwise there might still
+ * be a stale flag in ISR.
+ */
+ atmel_pwm_update_pending(chip);
+
+ chip->update_pending |= (1 << ch);
+
+ spin_unlock(&chip->lock);
+}
+
+static int atmel_pwm_test_pending(struct atmel_pwm_chip *chip, unsigned int ch)
+{
+ int ret = 0;
+
+ spin_lock(&chip->lock);
+
+ if (chip->update_pending & (1 << ch)) {
+ atmel_pwm_update_pending(chip);
+
+ if (chip->update_pending & (1 << ch))
+ ret = 1;
+ }
+
+ spin_unlock(&chip->lock);
+
+ return ret;
+}
+
+static int atmel_pwm_wait_nonpending(struct atmel_pwm_chip *chip, unsigned int ch)
+{
+ unsigned long timeout = jiffies + 2 * HZ;
+ int ret;
+
+ while ((ret = atmel_pwm_test_pending(chip, ch)) &&
+ time_before(jiffies, timeout))
+ usleep_range(10, 100);
+
+ return ret ? -ETIMEDOUT : 0;
+}
+
static int atmel_pwm_calculate_cprd_and_pres(struct pwm_chip *chip,
unsigned long clkrate,
const struct pwm_state *state,
atmel_pwm_ch_writel(atmel_pwm, pwm->hwpwm,
atmel_pwm->data->regs.duty_upd, cdty);
+ atmel_pwm_set_pending(atmel_pwm, pwm->hwpwm);
}
static void atmel_pwm_set_cprd_cdty(struct pwm_chip *chip,
struct atmel_pwm_chip *atmel_pwm = to_atmel_pwm_chip(chip);
unsigned long timeout = jiffies + 2 * HZ;
- /*
- * Wait for at least a complete period to have passed before disabling a
- * channel to be sure that CDTY has been updated
- */
- mutex_lock(&atmel_pwm->isr_lock);
- atmel_pwm->updated_pwms |= atmel_pwm_readl(atmel_pwm, PWM_ISR);
-
- while (!(atmel_pwm->updated_pwms & (1 << pwm->hwpwm)) &&
- time_before(jiffies, timeout)) {
- usleep_range(10, 100);
- atmel_pwm->updated_pwms |= atmel_pwm_readl(atmel_pwm, PWM_ISR);
- }
+ atmel_pwm_wait_nonpending(atmel_pwm, pwm->hwpwm);
- mutex_unlock(&atmel_pwm->isr_lock);
atmel_pwm_writel(atmel_pwm, PWM_DIS, 1 << pwm->hwpwm);
/*
val |= PWM_CMR_CPOL;
atmel_pwm_ch_writel(atmel_pwm, pwm->hwpwm, PWM_CMR, val);
atmel_pwm_set_cprd_cdty(chip, pwm, cprd, cdty);
- mutex_lock(&atmel_pwm->isr_lock);
- atmel_pwm->updated_pwms |= atmel_pwm_readl(atmel_pwm, PWM_ISR);
- atmel_pwm->updated_pwms &= ~(1 << pwm->hwpwm);
- mutex_unlock(&atmel_pwm->isr_lock);
atmel_pwm_writel(atmel_pwm, PWM_ENA, 1 << pwm->hwpwm);
} else if (cstate.enabled) {
atmel_pwm_disable(chip, pwm, true);
tmp <<= pres;
state->period = DIV64_U64_ROUND_UP(tmp, rate);
+ /* Wait for an updated duty_cycle queued in hardware */
+ atmel_pwm_wait_nonpending(atmel_pwm, pwm->hwpwm);
+
cdty = atmel_pwm_ch_readl(atmel_pwm, pwm->hwpwm,
atmel_pwm->data->regs.duty);
tmp = (u64)(cprd - cdty) * NSEC_PER_SEC;
if (!atmel_pwm)
return -ENOMEM;
- mutex_init(&atmel_pwm->isr_lock);
atmel_pwm->data = of_device_get_match_data(&pdev->dev);
- atmel_pwm->updated_pwms = 0;
+
+ atmel_pwm->update_pending = 0;
+ spin_lock_init(&atmel_pwm->lock);
atmel_pwm->base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(atmel_pwm->base))
pwmchip_remove(&atmel_pwm->chip);
clk_unprepare(atmel_pwm->clk);
- mutex_destroy(&atmel_pwm->isr_lock);
return 0;
}
if (kp == NULL)
return -ENOMEM;
- platform_set_drvdata(pdev, kp);
-
kp->chip.dev = &pdev->dev;
kp->chip.ops = &kona_pwm_ops;
kp->chip.npwm = 6;
clk_disable_unprepare(kp->clk);
- ret = pwmchip_add(&kp->chip);
+ ret = devm_pwmchip_add(&pdev->dev, &kp->chip);
if (ret < 0)
dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret);
return ret;
}
-static int kona_pwmc_remove(struct platform_device *pdev)
-{
- struct kona_pwmc *kp = platform_get_drvdata(pdev);
-
- return pwmchip_remove(&kp->chip);
-}
-
static const struct of_device_id bcm_kona_pwmc_dt[] = {
{ .compatible = "brcm,kona-pwm" },
{ },
.of_match_table = bcm_kona_pwmc_dt,
},
.probe = kona_pwmc_probe,
- .remove = kona_pwmc_remove,
};
module_platform_driver(kona_pwmc_driver);
static int brcmstb_pwm_remove(struct platform_device *pdev)
{
struct brcmstb_pwm *p = platform_get_drvdata(pdev);
- int ret;
- ret = pwmchip_remove(&p->chip);
+ pwmchip_remove(&p->chip);
clk_disable_unprepare(p->clk);
- return ret;
+ return 0;
}
#ifdef CONFIG_PM_SLEEP
struct cros_ec_pwm_device *ec_pwm = platform_get_drvdata(dev);
struct pwm_chip *chip = &ec_pwm->chip;
- return pwmchip_remove(chip);
+ pwmchip_remove(chip);
+
+ return 0;
}
#ifdef CONFIG_OF
ep93xx_pwm->chip.ops = &ep93xx_pwm_ops;
ep93xx_pwm->chip.npwm = 1;
- ret = pwmchip_add(&ep93xx_pwm->chip);
+ ret = devm_pwmchip_add(&pdev->dev, &ep93xx_pwm->chip);
if (ret < 0)
return ret;
- platform_set_drvdata(pdev, ep93xx_pwm);
return 0;
}
-static int ep93xx_pwm_remove(struct platform_device *pdev)
-{
- struct ep93xx_pwm *ep93xx_pwm = platform_get_drvdata(pdev);
-
- return pwmchip_remove(&ep93xx_pwm->chip);
-}
-
static struct platform_driver ep93xx_pwm_driver = {
.driver = {
.name = "ep93xx-pwm",
},
.probe = ep93xx_pwm_probe,
- .remove = ep93xx_pwm_remove,
};
module_platform_driver(ep93xx_pwm_driver);
fpc->chip.ops = &fsl_pwm_ops;
fpc->chip.npwm = 8;
- ret = pwmchip_add(&fpc->chip);
+ ret = devm_pwmchip_add(&pdev->dev, &fpc->chip);
if (ret < 0) {
dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret);
return ret;
return fsl_pwm_init(fpc);
}
-static int fsl_pwm_remove(struct platform_device *pdev)
-{
- struct fsl_pwm_chip *fpc = platform_get_drvdata(pdev);
-
- return pwmchip_remove(&fpc->chip);
-}
-
#ifdef CONFIG_PM_SLEEP
static int fsl_pwm_suspend(struct device *dev)
{
.pm = &fsl_pwm_pm_ops,
},
.probe = fsl_pwm_probe,
- .remove = fsl_pwm_remove,
};
module_platform_driver(fsl_pwm_driver);
pwm_chip = platform_get_drvdata(pdev);
+ pwmchip_remove(&pwm_chip->chip);
+
reset_control_assert(pwm_chip->rstc);
msleep(30);
reset_control_deassert(pwm_chip->rstc);
clk_disable_unprepare(pwm_chip->clk);
- return pwmchip_remove(&pwm_chip->chip);
+ return 0;
}
static const struct of_device_id hibvt_pwm_of_match[] = {
static int img_pwm_remove(struct platform_device *pdev)
{
struct img_pwm_chip *pwm_chip = platform_get_drvdata(pdev);
- u32 val;
- unsigned int i;
- int ret;
-
- ret = pm_runtime_get_sync(&pdev->dev);
- if (ret < 0) {
- pm_runtime_put(&pdev->dev);
- return ret;
- }
-
- for (i = 0; i < pwm_chip->chip.npwm; i++) {
- val = img_pwm_readl(pwm_chip, PWM_CTRL_CFG);
- val &= ~BIT(i);
- img_pwm_writel(pwm_chip, PWM_CTRL_CFG, val);
- }
- pm_runtime_put(&pdev->dev);
pm_runtime_disable(&pdev->dev);
if (!pm_runtime_status_suspended(&pdev->dev))
img_pwm_runtime_suspend(&pdev->dev);
- return pwmchip_remove(&pwm_chip->chip);
+ pwmchip_remove(&pwm_chip->chip);
+
+ return 0;
}
#ifdef CONFIG_PM_SLEEP
static int pwm_imx_tpm_remove(struct platform_device *pdev)
{
struct imx_tpm_pwm_chip *tpm = platform_get_drvdata(pdev);
- int ret = pwmchip_remove(&tpm->chip);
+
+ pwmchip_remove(&tpm->chip);
clk_disable_unprepare(tpm->clk);
- return ret;
+ return 0;
}
static int __maybe_unused pwm_imx_tpm_suspend(struct device *dev)
if (imx == NULL)
return -ENOMEM;
- platform_set_drvdata(pdev, imx);
-
imx->clk_ipg = devm_clk_get(&pdev->dev, "ipg");
if (IS_ERR(imx->clk_ipg))
return dev_err_probe(&pdev->dev, PTR_ERR(imx->clk_ipg),
if (!(pwmcr & MX3_PWMCR_EN))
pwm_imx27_clk_disable_unprepare(imx);
- return pwmchip_add(&imx->chip);
-}
-
-static int pwm_imx27_remove(struct platform_device *pdev)
-{
- struct pwm_imx27_chip *imx;
-
- imx = platform_get_drvdata(pdev);
-
- return pwmchip_remove(&imx->chip);
+ return devm_pwmchip_add(&pdev->dev, &imx->chip);
}
static struct platform_driver imx_pwm_driver = {
.of_match_table = pwm_imx27_dt_ids,
},
.probe = pwm_imx27_probe,
- .remove = pwm_imx27_remove,
};
module_platform_driver(imx_pwm_driver);
if (!pc)
return -ENOMEM;
- platform_set_drvdata(pdev, pc);
-
io_base = devm_platform_ioremap_resource(pdev, 0);
if (IS_ERR(io_base))
return PTR_ERR(io_base);
lgm_pwm_init(pc);
- ret = pwmchip_add(&pc->chip);
+ ret = devm_pwmchip_add(dev, &pc->chip);
if (ret < 0)
return dev_err_probe(dev, ret, "failed to add PWM chip\n");
return 0;
}
-static int lgm_pwm_remove(struct platform_device *pdev)
-{
- struct lgm_pwm_chip *pc = platform_get_drvdata(pdev);
-
- return pwmchip_remove(&pc->chip);
-}
-
static const struct of_device_id lgm_pwm_of_match[] = {
{ .compatible = "intel,lgm-pwm" },
{ }
.of_match_table = lgm_pwm_of_match,
},
.probe = lgm_pwm_probe,
- .remove = lgm_pwm_remove,
};
module_platform_driver(lgm_pwm_driver);
if (!iqs620_pwm)
return -ENOMEM;
- platform_set_drvdata(pdev, iqs620_pwm);
iqs620_pwm->iqs62x = iqs62x;
ret = regmap_read(iqs62x->regmap, IQS620_PWR_SETTINGS, &val);
if (ret)
return ret;
- ret = pwmchip_add(&iqs620_pwm->chip);
+ ret = devm_pwmchip_add(&pdev->dev, &iqs620_pwm->chip);
if (ret)
dev_err(&pdev->dev, "Failed to add device: %d\n", ret);
return ret;
}
-static int iqs620_pwm_remove(struct platform_device *pdev)
-{
- struct iqs620_pwm_private *iqs620_pwm = platform_get_drvdata(pdev);
- int ret;
-
- ret = pwmchip_remove(&iqs620_pwm->chip);
- if (ret)
- dev_err(&pdev->dev, "Failed to remove device: %d\n", ret);
-
- return ret;
-}
-
static struct platform_driver iqs620_pwm_platform_driver = {
.driver = {
.name = "iqs620a-pwm",
},
.probe = iqs620_pwm_probe,
- .remove = iqs620_pwm_remove,
};
module_platform_driver(iqs620_pwm_platform_driver);
jz4740->chip.ops = &jz4740_pwm_ops;
jz4740->chip.npwm = info->num_pwms;
- platform_set_drvdata(pdev, jz4740);
-
- return pwmchip_add(&jz4740->chip);
-}
-
-static int jz4740_pwm_remove(struct platform_device *pdev)
-{
- struct jz4740_pwm_chip *jz4740 = platform_get_drvdata(pdev);
-
- return pwmchip_remove(&jz4740->chip);
+ return devm_pwmchip_add(dev, &jz4740->chip);
}
static const struct soc_info __maybe_unused jz4740_soc_info = {
.of_match_table = of_match_ptr(jz4740_pwm_dt_ids),
},
.probe = jz4740_pwm_probe,
- .remove = jz4740_pwm_remove,
};
module_platform_driver(jz4740_pwm_driver);
priv->chip.ops = &keembay_pwm_ops;
priv->chip.npwm = KMB_TOTAL_PWM_CHANNELS;
- ret = pwmchip_add(&priv->chip);
+ ret = devm_pwmchip_add(dev, &priv->chip);
if (ret)
return dev_err_probe(dev, ret, "Failed to add PWM chip\n");
- platform_set_drvdata(pdev, priv);
-
return 0;
}
-static int keembay_pwm_remove(struct platform_device *pdev)
-{
- struct keembay_pwm *priv = platform_get_drvdata(pdev);
-
- return pwmchip_remove(&priv->chip);
-}
-
static const struct of_device_id keembay_pwm_of_match[] = {
{ .compatible = "intel,keembay-pwm" },
{ }
static struct platform_driver keembay_pwm_driver = {
.probe = keembay_pwm_probe,
- .remove = keembay_pwm_remove,
.driver = {
.name = "pwm-keembay",
.of_match_table = keembay_pwm_of_match,
lp3943_pwm->chip.ops = &lp3943_pwm_ops;
lp3943_pwm->chip.npwm = LP3943_NUM_PWMS;
- platform_set_drvdata(pdev, lp3943_pwm);
-
- return pwmchip_add(&lp3943_pwm->chip);
-}
-
-static int lp3943_pwm_remove(struct platform_device *pdev)
-{
- struct lp3943_pwm *lp3943_pwm = platform_get_drvdata(pdev);
-
- return pwmchip_remove(&lp3943_pwm->chip);
+ return devm_pwmchip_add(&pdev->dev, &lp3943_pwm->chip);
}
#ifdef CONFIG_OF
static struct platform_driver lp3943_pwm_driver = {
.probe = lp3943_pwm_probe,
- .remove = lp3943_pwm_remove,
.driver = {
.name = "lp3943-pwm",
.of_match_table = of_match_ptr(lp3943_pwm_of_match),
lpc32xx->chip.ops = &lpc32xx_pwm_ops;
lpc32xx->chip.npwm = 1;
- ret = pwmchip_add(&lpc32xx->chip);
- if (ret < 0) {
- dev_err(&pdev->dev, "failed to add PWM chip, error %d\n", ret);
- return ret;
- }
-
- /* When PWM is disable, configure the output to the default value */
+ /* If PWM is disabled, configure the output to the default value */
val = readl(lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2));
val &= ~PWM_PIN_LEVEL;
writel(val, lpc32xx->base + (lpc32xx->chip.pwms[0].hwpwm << 2));
- platform_set_drvdata(pdev, lpc32xx);
+ ret = devm_pwmchip_add(&pdev->dev, &lpc32xx->chip);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "failed to add PWM chip, error %d\n", ret);
+ return ret;
+ }
return 0;
}
-static int lpc32xx_pwm_remove(struct platform_device *pdev)
-{
- struct lpc32xx_pwm_chip *lpc32xx = platform_get_drvdata(pdev);
-
- return pwmchip_remove(&lpc32xx->chip);
-}
-
static const struct of_device_id lpc32xx_pwm_dt_ids[] = {
{ .compatible = "nxp,lpc3220-pwm", },
{ /* sentinel */ }
.of_match_table = lpc32xx_pwm_dt_ids,
},
.probe = lpc32xx_pwm_probe,
- .remove = lpc32xx_pwm_remove,
};
module_platform_driver(lpc32xx_pwm_driver);
}
}
- platform_set_drvdata(pdev, pc);
-
pc->chip.dev = &pdev->dev;
pc->chip.ops = &pwm_mediatek_ops;
pc->chip.npwm = pc->soc->num_pwms;
- ret = pwmchip_add(&pc->chip);
+ ret = devm_pwmchip_add(&pdev->dev, &pc->chip);
if (ret < 0) {
dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret);
return ret;
return 0;
}
-static int pwm_mediatek_remove(struct platform_device *pdev)
-{
- struct pwm_mediatek_chip *pc = platform_get_drvdata(pdev);
-
- return pwmchip_remove(&pc->chip);
-}
-
static const struct pwm_mediatek_of_data mt2712_pwm_data = {
.num_pwms = 8,
.pwm45_fixup = false,
.of_match_table = pwm_mediatek_of_match,
},
.probe = pwm_mediatek_probe,
- .remove = pwm_mediatek_remove,
};
module_platform_driver(pwm_mediatek_driver);
* Author: YH Huang <yh.huang@mediatek.com>
*/
+#include <linux/bitfield.h>
#include <linux/clk.h>
#include <linux/err.h>
#include <linux/io.h>
struct clk *clk_main;
struct clk *clk_mm;
void __iomem *base;
+ bool enabled;
};
static inline struct mtk_disp_pwm *to_mtk_disp_pwm(struct pwm_chip *chip)
writel(value, address);
}
-static int mtk_disp_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
- int duty_ns, int period_ns)
+static int mtk_disp_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ const struct pwm_state *state)
{
struct mtk_disp_pwm *mdp = to_mtk_disp_pwm(chip);
u32 clk_div, period, high_width, value;
u64 div, rate;
int err;
+ if (state->polarity != PWM_POLARITY_NORMAL)
+ return -EINVAL;
+
+ if (!state->enabled) {
+ mtk_disp_pwm_update_bits(mdp, DISP_PWM_EN, mdp->data->enable_mask,
+ 0x0);
+
+ if (mdp->enabled) {
+ clk_disable_unprepare(mdp->clk_mm);
+ clk_disable_unprepare(mdp->clk_main);
+ }
+
+ mdp->enabled = false;
+ return 0;
+ }
+
+ if (!mdp->enabled) {
+ err = clk_prepare_enable(mdp->clk_main);
+ if (err < 0) {
+ dev_err(chip->dev, "Can't enable mdp->clk_main: %pe\n",
+ ERR_PTR(err));
+ return err;
+ }
+
+ err = clk_prepare_enable(mdp->clk_mm);
+ if (err < 0) {
+ dev_err(chip->dev, "Can't enable mdp->clk_mm: %pe\n",
+ ERR_PTR(err));
+ clk_disable_unprepare(mdp->clk_main);
+ return err;
+ }
+ }
+
/*
* Find period, high_width and clk_div to suit duty_ns and period_ns.
* Calculate proper div value to keep period value in the bound.
* high_width = (PWM_CLK_RATE * duty_ns) / (10^9 * (clk_div + 1))
*/
rate = clk_get_rate(mdp->clk_main);
- clk_div = div_u64(rate * period_ns, NSEC_PER_SEC) >>
+ clk_div = mul_u64_u64_div_u64(state->period, rate, NSEC_PER_SEC) >>
PWM_PERIOD_BIT_WIDTH;
- if (clk_div > PWM_CLKDIV_MAX)
+ if (clk_div > PWM_CLKDIV_MAX) {
+ if (!mdp->enabled) {
+ clk_disable_unprepare(mdp->clk_mm);
+ clk_disable_unprepare(mdp->clk_main);
+ }
return -EINVAL;
+ }
div = NSEC_PER_SEC * (clk_div + 1);
- period = div64_u64(rate * period_ns, div);
+ period = mul_u64_u64_div_u64(state->period, rate, div);
if (period > 0)
period--;
- high_width = div64_u64(rate * duty_ns, div);
+ high_width = mul_u64_u64_div_u64(state->duty_cycle, rate, div);
value = period | (high_width << PWM_HIGH_WIDTH_SHIFT);
- err = clk_enable(mdp->clk_main);
- if (err < 0)
- return err;
-
- err = clk_enable(mdp->clk_mm);
- if (err < 0) {
- clk_disable(mdp->clk_main);
- return err;
- }
-
mtk_disp_pwm_update_bits(mdp, mdp->data->con0,
PWM_CLKDIV_MASK,
clk_div << PWM_CLKDIV_SHIFT);
mtk_disp_pwm_update_bits(mdp, mdp->data->commit,
mdp->data->commit_mask,
0x0);
+ } else {
+ /*
+ * For MT2701, disable double buffer before writing register
+ * and select manual mode and use PWM_PERIOD/PWM_HIGH_WIDTH.
+ */
+ mtk_disp_pwm_update_bits(mdp, mdp->data->bls_debug,
+ mdp->data->bls_debug_mask,
+ mdp->data->bls_debug_mask);
+ mtk_disp_pwm_update_bits(mdp, mdp->data->con0,
+ mdp->data->con0_sel,
+ mdp->data->con0_sel);
}
- clk_disable(mdp->clk_mm);
- clk_disable(mdp->clk_main);
+ mtk_disp_pwm_update_bits(mdp, DISP_PWM_EN, mdp->data->enable_mask,
+ mdp->data->enable_mask);
+ mdp->enabled = true;
return 0;
}
-static int mtk_disp_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+static void mtk_disp_pwm_get_state(struct pwm_chip *chip,
+ struct pwm_device *pwm,
+ struct pwm_state *state)
{
struct mtk_disp_pwm *mdp = to_mtk_disp_pwm(chip);
+ u64 rate, period, high_width;
+ u32 clk_div, con0, con1;
int err;
- err = clk_enable(mdp->clk_main);
- if (err < 0)
- return err;
-
- err = clk_enable(mdp->clk_mm);
+ err = clk_prepare_enable(mdp->clk_main);
if (err < 0) {
- clk_disable(mdp->clk_main);
- return err;
+ dev_err(chip->dev, "Can't enable mdp->clk_main: %pe\n", ERR_PTR(err));
+ return;
}
- mtk_disp_pwm_update_bits(mdp, DISP_PWM_EN, mdp->data->enable_mask,
- mdp->data->enable_mask);
-
- return 0;
-}
-
-static void mtk_disp_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
-{
- struct mtk_disp_pwm *mdp = to_mtk_disp_pwm(chip);
-
- mtk_disp_pwm_update_bits(mdp, DISP_PWM_EN, mdp->data->enable_mask,
- 0x0);
+ err = clk_prepare_enable(mdp->clk_mm);
+ if (err < 0) {
+ dev_err(chip->dev, "Can't enable mdp->clk_mm: %pe\n", ERR_PTR(err));
+ clk_disable_unprepare(mdp->clk_main);
+ return;
+ }
- clk_disable(mdp->clk_mm);
- clk_disable(mdp->clk_main);
+ rate = clk_get_rate(mdp->clk_main);
+ con0 = readl(mdp->base + mdp->data->con0);
+ con1 = readl(mdp->base + mdp->data->con1);
+ state->enabled = !!(con0 & BIT(0));
+ clk_div = FIELD_GET(PWM_CLKDIV_MASK, con0);
+ period = FIELD_GET(PWM_PERIOD_MASK, con1);
+ /*
+ * period has 12 bits, clk_div 11 and NSEC_PER_SEC has 30,
+ * so period * (clk_div + 1) * NSEC_PER_SEC doesn't overflow.
+ */
+ state->period = DIV64_U64_ROUND_UP(period * (clk_div + 1) * NSEC_PER_SEC, rate);
+ high_width = FIELD_GET(PWM_HIGH_WIDTH_MASK, con1);
+ state->duty_cycle = DIV64_U64_ROUND_UP(high_width * (clk_div + 1) * NSEC_PER_SEC,
+ rate);
+ state->polarity = PWM_POLARITY_NORMAL;
+ clk_disable_unprepare(mdp->clk_mm);
+ clk_disable_unprepare(mdp->clk_main);
}
static const struct pwm_ops mtk_disp_pwm_ops = {
- .config = mtk_disp_pwm_config,
- .enable = mtk_disp_pwm_enable,
- .disable = mtk_disp_pwm_disable,
+ .apply = mtk_disp_pwm_apply,
+ .get_state = mtk_disp_pwm_get_state,
.owner = THIS_MODULE,
};
if (IS_ERR(mdp->clk_mm))
return PTR_ERR(mdp->clk_mm);
- ret = clk_prepare(mdp->clk_main);
- if (ret < 0)
- return ret;
-
- ret = clk_prepare(mdp->clk_mm);
- if (ret < 0)
- goto disable_clk_main;
-
mdp->chip.dev = &pdev->dev;
mdp->chip.ops = &mtk_disp_pwm_ops;
mdp->chip.npwm = 1;
ret = pwmchip_add(&mdp->chip);
if (ret < 0) {
- dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret);
- goto disable_clk_mm;
+ dev_err(&pdev->dev, "pwmchip_add() failed: %pe\n", ERR_PTR(ret));
+ return ret;
}
platform_set_drvdata(pdev, mdp);
- /*
- * For MT2701, disable double buffer before writing register
- * and select manual mode and use PWM_PERIOD/PWM_HIGH_WIDTH.
- */
- if (!mdp->data->has_commit) {
- mtk_disp_pwm_update_bits(mdp, mdp->data->bls_debug,
- mdp->data->bls_debug_mask,
- mdp->data->bls_debug_mask);
- mtk_disp_pwm_update_bits(mdp, mdp->data->con0,
- mdp->data->con0_sel,
- mdp->data->con0_sel);
- }
-
return 0;
-
-disable_clk_mm:
- clk_unprepare(mdp->clk_mm);
-disable_clk_main:
- clk_unprepare(mdp->clk_main);
- return ret;
}
static int mtk_disp_pwm_remove(struct platform_device *pdev)
{
struct mtk_disp_pwm *mdp = platform_get_drvdata(pdev);
- int ret;
- ret = pwmchip_remove(&mdp->chip);
- clk_unprepare(mdp->clk_mm);
- clk_unprepare(mdp->clk_main);
+ pwmchip_remove(&mdp->chip);
- return ret;
+ return 0;
}
static const struct mtk_pwm_data mt2701_pwm_data = {
return ret;
}
- ret = pwmchip_add(&mxs->chip);
+ /* FIXME: Only do this if the PWM isn't already running */
+ ret = stmp_reset_block(mxs->base);
+ if (ret)
+ return dev_err_probe(&pdev->dev, ret, "failed to reset PWM\n");
+
+ ret = devm_pwmchip_add(&pdev->dev, &mxs->chip);
if (ret < 0) {
dev_err(&pdev->dev, "failed to add pwm chip %d\n", ret);
return ret;
}
- platform_set_drvdata(pdev, mxs);
-
- ret = stmp_reset_block(mxs->base);
- if (ret)
- goto pwm_remove;
-
return 0;
-
-pwm_remove:
- pwmchip_remove(&mxs->chip);
- return ret;
-}
-
-static int mxs_pwm_remove(struct platform_device *pdev)
-{
- struct mxs_pwm_chip *mxs = platform_get_drvdata(pdev);
-
- return pwmchip_remove(&mxs->chip);
}
static const struct of_device_id mxs_pwm_dt_ids[] = {
.of_match_table = mxs_pwm_dt_ids,
},
.probe = mxs_pwm_probe,
- .remove = mxs_pwm_remove,
};
module_platform_driver(mxs_pwm_driver);
priv->ec = ec;
priv->dev = &pdev->dev;
- platform_set_drvdata(pdev, priv);
-
chip = &priv->chip;
chip->dev = &pdev->dev;
chip->ops = &ntxec_pwm_ops;
- chip->base = -1;
chip->npwm = 1;
- return pwmchip_add(chip);
-}
-
-static int ntxec_pwm_remove(struct platform_device *pdev)
-{
- struct ntxec_pwm *priv = platform_get_drvdata(pdev);
- struct pwm_chip *chip = &priv->chip;
-
- return pwmchip_remove(chip);
+ return devm_pwmchip_add(&pdev->dev, chip);
}
static struct platform_driver ntxec_pwm_driver = {
.name = "ntxec-pwm",
},
.probe = ntxec_pwm_probe,
- .remove = ntxec_pwm_remove,
};
module_platform_driver(ntxec_pwm_driver);
static int pwm_omap_dmtimer_remove(struct platform_device *pdev)
{
struct pwm_omap_dmtimer_chip *omap = platform_get_drvdata(pdev);
- int ret;
- ret = pwmchip_remove(&omap->chip);
- if (ret)
- return ret;
+ pwmchip_remove(&omap->chip);
if (pm_runtime_active(&omap->dm_timer_pdev->dev))
omap->pdata->stop(omap->dm_timer);
static int pca9685_pwm_remove(struct i2c_client *client)
{
struct pca9685 *pca = i2c_get_clientdata(client);
- int ret;
- ret = pwmchip_remove(&pca->chip);
- if (ret)
- return ret;
+ pwmchip_remove(&pca->chip);
if (!pm_runtime_enabled(&client->dev)) {
/* Put chip in sleep state if runtime PM is disabled */
if (IS_ERR(pc->mmio_base))
return PTR_ERR(pc->mmio_base);
- ret = pwmchip_add(&pc->chip);
+ ret = devm_pwmchip_add(&pdev->dev, &pc->chip);
if (ret < 0) {
dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret);
return ret;
}
- platform_set_drvdata(pdev, pc);
return 0;
}
-static int pwm_remove(struct platform_device *pdev)
-{
- struct pxa_pwm_chip *pc;
-
- pc = platform_get_drvdata(pdev);
-
- return pwmchip_remove(&pc->chip);
-}
-
static struct platform_driver pwm_driver = {
.driver = {
.name = "pxa25x-pwm",
.of_match_table = pwm_of_match,
},
.probe = pwm_probe,
- .remove = pwm_remove,
.id_table = pwm_id_table,
};
rpipwm->chip.base = -1;
rpipwm->chip.npwm = RASPBERRYPI_FIRMWARE_PWM_NUM;
- platform_set_drvdata(pdev, rpipwm);
-
ret = raspberrypi_pwm_get_property(rpipwm->firmware, RPI_PWM_CUR_DUTY_REG,
&rpipwm->duty_cycle);
if (ret) {
return ret;
}
- return pwmchip_add(&rpipwm->chip);
-}
-
-static int raspberrypi_pwm_remove(struct platform_device *pdev)
-{
- struct raspberrypi_pwm *rpipwm = platform_get_drvdata(pdev);
-
- return pwmchip_remove(&rpipwm->chip);
+ return devm_pwmchip_add(dev, &rpipwm->chip);
}
static const struct of_device_id raspberrypi_pwm_of_match[] = {
.of_match_table = raspberrypi_pwm_of_match,
},
.probe = raspberrypi_pwm_probe,
- .remove = raspberrypi_pwm_remove,
};
module_platform_driver(raspberrypi_pwm_driver);
static int rcar_pwm_remove(struct platform_device *pdev)
{
struct rcar_pwm_chip *rcar_pwm = platform_get_drvdata(pdev);
- int ret;
- ret = pwmchip_remove(&rcar_pwm->chip);
+ pwmchip_remove(&rcar_pwm->chip);
pm_runtime_disable(&pdev->dev);
- return ret;
+ return 0;
}
static const struct of_device_id rcar_pwm_of_table[] = {
static int tpu_remove(struct platform_device *pdev)
{
struct tpu_device *tpu = platform_get_drvdata(pdev);
- int ret;
- ret = pwmchip_remove(&tpu->chip);
+ pwmchip_remove(&tpu->chip);
pm_runtime_disable(&pdev->dev);
- return ret;
+ return 0;
}
#ifdef CONFIG_OF
{
struct rockchip_pwm_chip *pc = platform_get_drvdata(pdev);
- /*
- * Disable the PWM clk before unpreparing it if the PWM device is still
- * running. This should only happen when the last PWM user left it
- * enabled, or when nobody requested a PWM that was previously enabled
- * by the bootloader.
- *
- * FIXME: Maybe the core should disable all PWM devices in
- * pwmchip_remove(). In this case we'd only have to call
- * clk_unprepare() after pwmchip_remove().
- *
- */
- if (pwm_is_enabled(pc->chip.pwms))
- clk_disable(pc->clk);
+ pwmchip_remove(&pc->chip);
clk_unprepare(pc->pclk);
clk_unprepare(pc->clk);
- return pwmchip_remove(&pc->chip);
+ return 0;
}
static struct platform_driver rockchip_pwm_driver = {
static int pwm_samsung_remove(struct platform_device *pdev)
{
struct samsung_pwm_chip *chip = platform_get_drvdata(pdev);
- int ret;
- ret = pwmchip_remove(&chip->chip);
- if (ret < 0)
- return ret;
+ pwmchip_remove(&chip->chip);
clk_disable_unprepare(chip->base_clk);
struct pwm_sifive_ddata *ddata = platform_get_drvdata(dev);
bool is_enabled = false;
struct pwm_device *pwm;
- int ret, ch;
+ int ch;
for (ch = 0; ch < ddata->chip.npwm; ch++) {
pwm = &ddata->chip.pwms[ch];
clk_disable(ddata->clk);
clk_disable_unprepare(ddata->clk);
- ret = pwmchip_remove(&ddata->chip);
+ pwmchip_remove(&ddata->chip);
clk_notifier_unregister(ddata->clk, &ddata->notifier);
- return ret;
+ return 0;
}
static const struct of_device_id pwm_sifive_of_match[] = {
chip->ops = &sl28cpld_pwm_ops;
chip->npwm = 1;
- platform_set_drvdata(pdev, priv);
-
- ret = pwmchip_add(&priv->pwm_chip);
+ ret = devm_pwmchip_add(&pdev->dev, &priv->pwm_chip);
if (ret) {
dev_err(&pdev->dev, "failed to add PWM chip (%pe)",
ERR_PTR(ret));
return 0;
}
-static int sl28cpld_pwm_remove(struct platform_device *pdev)
-{
- struct sl28cpld_pwm *priv = platform_get_drvdata(pdev);
-
- return pwmchip_remove(&priv->pwm_chip);
-}
-
static const struct of_device_id sl28cpld_pwm_of_match[] = {
{ .compatible = "kontron,sl28cpld-pwm" },
{}
static struct platform_driver sl28cpld_pwm_driver = {
.probe = sl28cpld_pwm_probe,
- .remove = sl28cpld_pwm_remove,
.driver = {
.name = "sl28cpld-pwm",
.of_match_table = sl28cpld_pwm_of_match,
priv->chip.ops = &stm32_pwm_lp_ops;
priv->chip.npwm = 1;
- ret = pwmchip_add(&priv->chip);
+ ret = devm_pwmchip_add(&pdev->dev, &priv->chip);
if (ret < 0)
return ret;
return 0;
}
-static int stm32_pwm_lp_remove(struct platform_device *pdev)
-{
- struct stm32_pwm_lp *priv = platform_get_drvdata(pdev);
-
- pwm_disable(&priv->chip.pwms[0]);
-
- return pwmchip_remove(&priv->chip);
-}
-
static int __maybe_unused stm32_pwm_lp_suspend(struct device *dev)
{
struct stm32_pwm_lp *priv = dev_get_drvdata(dev);
static struct platform_driver stm32_pwm_lp_driver = {
.probe = stm32_pwm_lp_probe,
- .remove = stm32_pwm_lp_remove,
.driver = {
.name = "stm32-pwm-lp",
.of_match_table = of_match_ptr(stm32_pwm_lp_of_match),
static int sun4i_pwm_remove(struct platform_device *pdev)
{
struct sun4i_pwm_chip *pwm = platform_get_drvdata(pdev);
- int ret;
- ret = pwmchip_remove(&pwm->chip);
- if (ret)
- return ret;
+ pwmchip_remove(&pwm->chip);
clk_disable_unprepare(pwm->bus_clk);
reset_control_assert(pwm->rst);
if (IS_ERR(pc->mmio_base))
return PTR_ERR(pc->mmio_base);
- ret = pwmchip_add(&pc->chip);
+ ret = devm_pwmchip_add(&pdev->dev, &pc->chip);
if (ret < 0) {
dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret);
return ret;
static int ecap_pwm_remove(struct platform_device *pdev)
{
- struct ecap_pwm_chip *pc = platform_get_drvdata(pdev);
-
pm_runtime_disable(&pdev->dev);
- return pwmchip_remove(&pc->chip);
+ return 0;
}
#ifdef CONFIG_PM_SLEEP
{
struct ehrpwm_pwm_chip *pc = platform_get_drvdata(pdev);
+ pwmchip_remove(&pc->chip);
+
clk_unprepare(pc->tbclk);
pm_runtime_disable(&pdev->dev);
- return pwmchip_remove(&pc->chip);
+ return 0;
}
#ifdef CONFIG_PM_SLEEP
static int twl_pwmled_probe(struct platform_device *pdev)
{
struct twl_pwmled_chip *twl;
- int ret;
twl = devm_kzalloc(&pdev->dev, sizeof(*twl), GFP_KERNEL);
if (!twl)
mutex_init(&twl->mutex);
- ret = pwmchip_add(&twl->chip);
- if (ret < 0)
- return ret;
-
- platform_set_drvdata(pdev, twl);
-
- return 0;
-}
-
-static int twl_pwmled_remove(struct platform_device *pdev)
-{
- struct twl_pwmled_chip *twl = platform_get_drvdata(pdev);
-
- return pwmchip_remove(&twl->chip);
+ return devm_pwmchip_add(&pdev->dev, &twl->chip);
}
#ifdef CONFIG_OF
.of_match_table = of_match_ptr(twl_pwmled_of_match),
},
.probe = twl_pwmled_probe,
- .remove = twl_pwmled_remove,
};
module_platform_driver(twl_pwmled_driver);
static int twl_pwm_probe(struct platform_device *pdev)
{
struct twl_pwm_chip *twl;
- int ret;
twl = devm_kzalloc(&pdev->dev, sizeof(*twl), GFP_KERNEL);
if (!twl)
mutex_init(&twl->mutex);
- ret = pwmchip_add(&twl->chip);
- if (ret < 0)
- return ret;
-
- platform_set_drvdata(pdev, twl);
-
- return 0;
-}
-
-static int twl_pwm_remove(struct platform_device *pdev)
-{
- struct twl_pwm_chip *twl = platform_get_drvdata(pdev);
-
- return pwmchip_remove(&twl->chip);
+ return devm_pwmchip_add(&pdev->dev, &twl->chip);
}
#ifdef CONFIG_OF
.of_match_table = of_match_ptr(twl_pwm_of_match),
},
.probe = twl_pwm_probe,
- .remove = twl_pwm_remove,
};
module_platform_driver(twl_pwm_driver);
Say yes here to enable debugging support in the RTC framework
and individual RTC drivers.
+config RTC_LIB_KUNIT_TEST
+ tristate "KUnit test for RTC lib functions" if !KUNIT_ALL_TESTS
+ depends on KUNIT
+ default KUNIT_ALL_TESTS
+ help
+ Enable this option to test RTC library functions.
+
+ If unsure, say N.
+
config RTC_NVMEM
bool "RTC non volatile storage support"
select NVMEM
config RTC_DRV_RX8010
tristate "Epson RX8010SJ"
+ select REGMAP_I2C
help
If you say yes here you get support for the Epson RX8010SJ RTC
chip.
rtc-core-$(CONFIG_RTC_INTF_PROC) += proc.o
rtc-core-$(CONFIG_RTC_INTF_SYSFS) += sysfs.o
+obj-$(CONFIG_RTC_LIB_KUNIT_TEST) += lib_test.o
+
# Keep the list ordered.
obj-$(CONFIG_RTC_DRV_88PM80X) += rtc-88pm80x.o
* Author: Alessandro Zummo <a.zummo@towertech.it>
*
* based on arch/arm/common/rtctime.c and other bits
+ *
+ * Author: Cassio Neri <cassio.neri@gmail.com> (rtc_time64_to_tm)
*/
#include <linux/export.h>
{ 0, 31, 60, 91, 121, 152, 182, 213, 244, 274, 305, 335, 366 }
};
-#define LEAPS_THRU_END_OF(y) ((y) / 4 - (y) / 100 + (y) / 400)
-
/*
* The number of days in the month.
*/
}
EXPORT_SYMBOL(rtc_year_days);
-/*
- * rtc_time64_to_tm - Converts time64_t to rtc_time.
- * Convert seconds since 01-01-1970 00:00:00 to Gregorian date.
+/**
+ * rtc_time64_to_tm - converts time64_t to rtc_time.
+ *
+ * @time: The number of seconds since 01-01-1970 00:00:00.
+ * (Must be positive.)
+ * @tm: Pointer to the struct rtc_time.
*/
void rtc_time64_to_tm(time64_t time, struct rtc_time *tm)
{
- unsigned int month, year, secs;
+ unsigned int secs;
int days;
+ u64 u64tmp;
+ u32 u32tmp, udays, century, day_of_century, year_of_century, year,
+ day_of_year, month, day;
+ bool is_Jan_or_Feb, is_leap_year;
+
/* time must be positive */
days = div_s64_rem(time, 86400, &secs);
/* day of the week, 1970-01-01 was a Thursday */
tm->tm_wday = (days + 4) % 7;
- year = 1970 + days / 365;
- days -= (year - 1970) * 365
- + LEAPS_THRU_END_OF(year - 1)
- - LEAPS_THRU_END_OF(1970 - 1);
- while (days < 0) {
- year -= 1;
- days += 365 + is_leap_year(year);
- }
- tm->tm_year = year - 1900;
- tm->tm_yday = days + 1;
-
- for (month = 0; month < 11; month++) {
- int newdays;
-
- newdays = days - rtc_month_days(month, year);
- if (newdays < 0)
- break;
- days = newdays;
- }
- tm->tm_mon = month;
- tm->tm_mday = days + 1;
+ /*
+ * The following algorithm is, basically, Proposition 6.3 of Neri
+ * and Schneider [1]. In a few words: it works on the computational
+ * (fictitious) calendar where the year starts in March, month = 2
+ * (*), and finishes in February, month = 13. This calendar is
+ * mathematically convenient because the day of the year does not
+ * depend on whether the year is leap or not. For instance:
+ *
+ * March 1st 0-th day of the year;
+ * ...
+ * April 1st 31-st day of the year;
+ * ...
+ * January 1st 306-th day of the year; (Important!)
+ * ...
+ * February 28th 364-th day of the year;
+ * February 29th 365-th day of the year (if it exists).
+ *
+ * After having worked out the date in the computational calendar
+ * (using just arithmetics) it's easy to convert it to the
+ * corresponding date in the Gregorian calendar.
+ *
+ * [1] "Euclidean Affine Functions and Applications to Calendar
+ * Algorithms". https://arxiv.org/abs/2102.06959
+ *
+ * (*) The numbering of months follows rtc_time more closely and
+ * thus, is slightly different from [1].
+ */
+
+ udays = ((u32) days) + 719468;
+
+ u32tmp = 4 * udays + 3;
+ century = u32tmp / 146097;
+ day_of_century = u32tmp % 146097 / 4;
+
+ u32tmp = 4 * day_of_century + 3;
+ u64tmp = 2939745ULL * u32tmp;
+ year_of_century = upper_32_bits(u64tmp);
+ day_of_year = lower_32_bits(u64tmp) / 2939745 / 4;
+
+ year = 100 * century + year_of_century;
+ is_leap_year = year_of_century != 0 ?
+ year_of_century % 4 == 0 : century % 4 == 0;
+
+ u32tmp = 2141 * day_of_year + 132377;
+ month = u32tmp >> 16;
+ day = ((u16) u32tmp) / 2141;
+
+ /*
+ * Recall that January 01 is the 306-th day of the year in the
+ * computational (not Gregorian) calendar.
+ */
+ is_Jan_or_Feb = day_of_year >= 306;
+
+ /* Converts to the Gregorian calendar. */
+ year = year + is_Jan_or_Feb;
+ month = is_Jan_or_Feb ? month - 12 : month;
+ day = day + 1;
+
+ day_of_year = is_Jan_or_Feb ?
+ day_of_year - 306 : day_of_year + 31 + 28 + is_leap_year;
+
+ /* Converts to rtc_time's format. */
+ tm->tm_year = (int) (year - 1900);
+ tm->tm_mon = (int) month;
+ tm->tm_mday = (int) day;
+ tm->tm_yday = (int) day_of_year + 1;
tm->tm_hour = secs / 3600;
secs -= tm->tm_hour * 3600;
--- /dev/null
+// SPDX-License-Identifier: LGPL-2.1+
+
+#include <kunit/test.h>
+#include <linux/rtc.h>
+
+/*
+ * Advance a date by one day.
+ */
+static void advance_date(int *year, int *month, int *mday, int *yday)
+{
+ if (*mday != rtc_month_days(*month - 1, *year)) {
+ ++*mday;
+ ++*yday;
+ return;
+ }
+
+ *mday = 1;
+ if (*month != 12) {
+ ++*month;
+ ++*yday;
+ return;
+ }
+
+ *month = 1;
+ *yday = 1;
+ ++*year;
+}
+
+/*
+ * Checks every day in a 160000 years interval starting on 1970-01-01
+ * against the expected result.
+ */
+static void rtc_time64_to_tm_test_date_range(struct kunit *test)
+{
+ /*
+ * 160000 years = (160000 / 400) * 400 years
+ * = (160000 / 400) * 146097 days
+ * = (160000 / 400) * 146097 * 86400 seconds
+ */
+ time64_t total_secs = ((time64_t) 160000) / 400 * 146097 * 86400;
+
+ int year = 1970;
+ int month = 1;
+ int mday = 1;
+ int yday = 1;
+
+ struct rtc_time result;
+ time64_t secs;
+ s64 days;
+
+ for (secs = 0; secs <= total_secs; secs += 86400) {
+
+ rtc_time64_to_tm(secs, &result);
+
+ days = div_s64(secs, 86400);
+
+ #define FAIL_MSG "%d/%02d/%02d (%2d) : %ld", \
+ year, month, mday, yday, days
+
+ KUNIT_ASSERT_EQ_MSG(test, year - 1900, result.tm_year, FAIL_MSG);
+ KUNIT_ASSERT_EQ_MSG(test, month - 1, result.tm_mon, FAIL_MSG);
+ KUNIT_ASSERT_EQ_MSG(test, mday, result.tm_mday, FAIL_MSG);
+ KUNIT_ASSERT_EQ_MSG(test, yday, result.tm_yday, FAIL_MSG);
+
+ advance_date(&year, &month, &mday, &yday);
+ }
+}
+
+static struct kunit_case rtc_lib_test_cases[] = {
+ KUNIT_CASE(rtc_time64_to_tm_test_date_range),
+ {}
+};
+
+static struct kunit_suite rtc_lib_test_suite = {
+ .name = "rtc_lib_test_cases",
+ .test_cases = rtc_lib_test_cases,
+};
+
+kunit_test_suite(rtc_lib_test_suite);
+
+MODULE_LICENSE("GPL");
if (!pm_trace_rtc_valid())
return -EIO;
- /* REVISIT: if the clock has a "century" register, use
- * that instead of the heuristic in mc146818_get_time().
- * That'll make Y3K compatility (year > 2070) easy!
- */
mc146818_get_time(t);
return 0;
}
static int cmos_set_time(struct device *dev, struct rtc_time *t)
{
- /* REVISIT: set the "century" register if available
- *
- * NOTE: this ignores the issue whereby updating the seconds
+ /* NOTE: this ignores the issue whereby updating the seconds
* takes effect exactly 500ms after we write the register.
* (Also queueing and other delays before we get this far.)
*/
#define RX8025_ADJ_DATA_MAX 62
#define RX8025_ADJ_DATA_MIN -62
+enum rx_model {
+ model_rx_unknown,
+ model_rx_8025,
+ model_rx_8035,
+ model_last
+};
+
static const struct i2c_device_id rx8025_id[] = {
- { "rx8025", 0 },
+ { "rx8025", model_rx_8025 },
+ { "rx8035", model_rx_8035 },
{ }
};
MODULE_DEVICE_TABLE(i2c, rx8025_id);
struct rx8025_data {
struct rtc_device *rtc;
+ enum rx_model model;
u8 ctrl1;
};
length, values);
}
+static int rx8025_is_osc_stopped(enum rx_model model, int ctrl2)
+{
+ int xstp = ctrl2 & RX8025_BIT_CTRL2_XST;
+ /* XSTP bit has different polarity on RX-8025 vs RX-8035.
+ * RX-8025: 0 == oscillator stopped
+ * RX-8035: 1 == oscillator stopped
+ */
+
+ if (model == model_rx_8025)
+ xstp = !xstp;
+
+ return xstp;
+}
+
static int rx8025_check_validity(struct device *dev)
{
struct i2c_client *client = to_i2c_client(dev);
+ struct rx8025_data *drvdata = dev_get_drvdata(dev);
int ctrl2;
+ int xstp;
ctrl2 = rx8025_read_reg(client, RX8025_REG_CTRL2);
if (ctrl2 < 0)
return -EINVAL;
}
- if (!(ctrl2 & RX8025_BIT_CTRL2_XST)) {
+ xstp = rx8025_is_osc_stopped(drvdata->model, ctrl2);
+ if (xstp) {
dev_warn(dev, "crystal stopped, date is invalid\n");
return -EINVAL;
}
static int rx8025_reset_validity(struct i2c_client *client)
{
+ struct rx8025_data *drvdata = i2c_get_clientdata(client);
int ctrl2 = rx8025_read_reg(client, RX8025_REG_CTRL2);
if (ctrl2 < 0)
ctrl2 &= ~(RX8025_BIT_CTRL2_PON | RX8025_BIT_CTRL2_VDET);
+ if (drvdata->model == model_rx_8025)
+ ctrl2 |= RX8025_BIT_CTRL2_XST;
+ else
+ ctrl2 &= ~(RX8025_BIT_CTRL2_XST);
+
return rx8025_write_reg(client, RX8025_REG_CTRL2,
- ctrl2 | RX8025_BIT_CTRL2_XST);
+ ctrl2);
}
static irqreturn_t rx8025_handle_irq(int irq, void *dev_id)
{
struct i2c_client *client = dev_id;
struct rx8025_data *rx8025 = i2c_get_clientdata(client);
- int status;
+ int status, xstp;
rtc_lock(rx8025->rtc);
status = rx8025_read_reg(client, RX8025_REG_CTRL2);
if (status < 0)
goto out;
- if (!(status & RX8025_BIT_CTRL2_XST))
+ xstp = rx8025_is_osc_stopped(rx8025->model, status);
+ if (xstp)
dev_warn(&client->dev, "Oscillation stop was detected,"
"you may have to readjust the clock\n");
i2c_set_clientdata(client, rx8025);
+ if (id)
+ rx8025->model = id->driver_data;
+
err = rx8025_init_client(client);
if (err)
return err;
data[RTC_WEEKDAY] = 1 << tm->tm_wday;
data[RTC_DATE] = tm->tm_mday;
data[RTC_MONTH] = tm->tm_mon + 1;
- data[RTC_YEAR1] = tm->tm_year > 100 ? (tm->tm_year - 100) : 0;
+ data[RTC_YEAR1] = tm->tm_year - 100;
- if (tm->tm_year < 100) {
- pr_err("RTC cannot handle the year %d\n",
- 1900 + tm->tm_year);
- return -EINVAL;
- } else {
- return 0;
- }
+ return 0;
}
/*
if (ret)
return ret;
- device_init_wakeup(&pdev->dev, 1);
-
- info->rtc_dev = devm_rtc_device_register(&pdev->dev, "s5m-rtc",
- &s5m_rtc_ops, THIS_MODULE);
-
+ info->rtc_dev = devm_rtc_allocate_device(&pdev->dev);
if (IS_ERR(info->rtc_dev))
return PTR_ERR(info->rtc_dev);
- if (!info->irq) {
- dev_info(&pdev->dev, "Alarm IRQ not available\n");
- return 0;
+ info->rtc_dev->ops = &s5m_rtc_ops;
+
+ if (info->device_type == S5M8763X) {
+ info->rtc_dev->range_min = RTC_TIMESTAMP_BEGIN_0000;
+ info->rtc_dev->range_max = RTC_TIMESTAMP_END_9999;
+ } else {
+ info->rtc_dev->range_min = RTC_TIMESTAMP_BEGIN_2000;
+ info->rtc_dev->range_max = RTC_TIMESTAMP_END_2099;
}
- ret = devm_request_threaded_irq(&pdev->dev, info->irq, NULL,
- s5m_rtc_alarm_irq, 0, "rtc-alarm0",
- info);
- if (ret < 0) {
- dev_err(&pdev->dev, "Failed to request alarm IRQ: %d: %d\n",
- info->irq, ret);
- return ret;
+ if (!info->irq) {
+ clear_bit(RTC_FEATURE_ALARM, info->rtc_dev->features);
+ } else {
+ ret = devm_request_threaded_irq(&pdev->dev, info->irq, NULL,
+ s5m_rtc_alarm_irq, 0, "rtc-alarm0",
+ info);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "Failed to request alarm IRQ: %d: %d\n",
+ info->irq, ret);
+ return ret;
+ }
+ device_init_wakeup(&pdev->dev, 1);
}
- return 0;
+ return devm_rtc_register_device(info->rtc_dev);
}
#ifdef CONFIG_PM_SLEEP
};
module_platform_driver(tps65910_rtc_driver);
-MODULE_ALIAS("platform:rtc-tps65910");
+MODULE_ALIAS("platform:tps65910-rtc");
MODULE_AUTHOR("Venu Byravarasu <vbyravarasu@nvidia.com>");
MODULE_LICENSE("GPL");
NULL
};
+static const struct attribute_group imok_attribute_group = {
+ .attrs = imok_attr,
+};
+
static const struct attribute_group data_attribute_group = {
.bin_attrs = data_attributes,
- .attrs = imok_attr,
};
static ssize_t available_uuids_show(struct device *dev,
if (result)
goto free_rel_misc;
+ if (acpi_has_method(priv->adev->handle, "IMOK")) {
+ result = sysfs_create_group(&pdev->dev.kobj, &imok_attribute_group);
+ if (result)
+ goto free_imok;
+ }
+
if (priv->data_vault) {
result = sysfs_create_group(&pdev->dev.kobj,
&data_attribute_group);
}
free_uuid:
sysfs_remove_group(&pdev->dev.kobj, &uuid_attribute_group);
+free_imok:
+ sysfs_remove_group(&pdev->dev.kobj, &imok_attribute_group);
free_rel_misc:
if (!priv->rel_misc_dev_res)
acpi_thermal_rel_misc_device_remove(priv->adev->handle);
if (priv->data_vault)
sysfs_remove_group(&pdev->dev.kobj, &data_attribute_group);
sysfs_remove_group(&pdev->dev.kobj, &uuid_attribute_group);
+ sysfs_remove_group(&pdev->dev.kobj, &imok_attribute_group);
thermal_zone_device_unregister(priv->thermal);
kfree(priv->data_vault);
kfree(priv->trts);
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, NULL),
X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, NULL),
X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, NULL),
{}
};
trip points. The temperature reported by the thermal sensor reflects the
real time die temperature if an ADC is present or an estimate of the
temperature based upon the over temperature stage value.
+
+config QCOM_LMH
+ tristate "Qualcomm Limits Management Hardware"
+ depends on ARCH_QCOM
+ help
+ This enables initialization of Qualcomm limits management
+ hardware(LMh). LMh allows for hardware-enforced mitigation for cpus based on
+ input from temperature and current sensors. On many newer Qualcomm SoCs
+ LMh is configured in the firmware and this feature need not be enabled.
+ However, on certain SoCs like sdm845 LMh has to be configured from kernel.
tsens-8960.o
obj-$(CONFIG_QCOM_SPMI_ADC_TM5) += qcom-spmi-adc-tm5.o
obj-$(CONFIG_QCOM_SPMI_TEMP_ALARM) += qcom-spmi-temp-alarm.o
+obj-$(CONFIG_QCOM_LMH) += lmh.o
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright (C) 2021, Linaro Limited. All rights reserved.
+ */
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/irqdomain.h>
+#include <linux/err.h>
+#include <linux/platform_device.h>
+#include <linux/of_platform.h>
+#include <linux/slab.h>
+#include <linux/qcom_scm.h>
+
+#define LMH_NODE_DCVS 0x44435653
+#define LMH_CLUSTER0_NODE_ID 0x6370302D
+#define LMH_CLUSTER1_NODE_ID 0x6370312D
+
+#define LMH_SUB_FN_THERMAL 0x54484D4C
+#define LMH_SUB_FN_CRNT 0x43524E54
+#define LMH_SUB_FN_REL 0x52454C00
+#define LMH_SUB_FN_BCL 0x42434C00
+
+#define LMH_ALGO_MODE_ENABLE 0x454E424C
+#define LMH_TH_HI_THRESHOLD 0x48494748
+#define LMH_TH_LOW_THRESHOLD 0x4C4F5700
+#define LMH_TH_ARM_THRESHOLD 0x41524D00
+
+#define LMH_REG_DCVS_INTR_CLR 0x8
+
+struct lmh_hw_data {
+ void __iomem *base;
+ struct irq_domain *domain;
+ int irq;
+};
+
+static irqreturn_t lmh_handle_irq(int hw_irq, void *data)
+{
+ struct lmh_hw_data *lmh_data = data;
+ int irq = irq_find_mapping(lmh_data->domain, 0);
+
+ /* Call the cpufreq driver to handle the interrupt */
+ if (irq)
+ generic_handle_irq(irq);
+
+ return 0;
+}
+
+static void lmh_enable_interrupt(struct irq_data *d)
+{
+ struct lmh_hw_data *lmh_data = irq_data_get_irq_chip_data(d);
+
+ /* Clear the existing interrupt */
+ writel(0xff, lmh_data->base + LMH_REG_DCVS_INTR_CLR);
+ enable_irq(lmh_data->irq);
+}
+
+static void lmh_disable_interrupt(struct irq_data *d)
+{
+ struct lmh_hw_data *lmh_data = irq_data_get_irq_chip_data(d);
+
+ disable_irq_nosync(lmh_data->irq);
+}
+
+static struct irq_chip lmh_irq_chip = {
+ .name = "lmh",
+ .irq_enable = lmh_enable_interrupt,
+ .irq_disable = lmh_disable_interrupt
+};
+
+static int lmh_irq_map(struct irq_domain *d, unsigned int irq, irq_hw_number_t hw)
+{
+ struct lmh_hw_data *lmh_data = d->host_data;
+
+ irq_set_chip_and_handler(irq, &lmh_irq_chip, handle_simple_irq);
+ irq_set_chip_data(irq, lmh_data);
+
+ return 0;
+}
+
+static const struct irq_domain_ops lmh_irq_ops = {
+ .map = lmh_irq_map,
+ .xlate = irq_domain_xlate_onecell,
+};
+
+static int lmh_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct device_node *np = dev->of_node;
+ struct device_node *cpu_node;
+ struct lmh_hw_data *lmh_data;
+ int temp_low, temp_high, temp_arm, cpu_id, ret;
+ u32 node_id;
+
+ lmh_data = devm_kzalloc(dev, sizeof(*lmh_data), GFP_KERNEL);
+ if (!lmh_data)
+ return -ENOMEM;
+
+ lmh_data->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(lmh_data->base))
+ return PTR_ERR(lmh_data->base);
+
+ cpu_node = of_parse_phandle(np, "cpus", 0);
+ if (!cpu_node)
+ return -EINVAL;
+ cpu_id = of_cpu_node_to_id(cpu_node);
+ of_node_put(cpu_node);
+
+ ret = of_property_read_u32(np, "qcom,lmh-temp-high-millicelsius", &temp_high);
+ if (ret) {
+ dev_err(dev, "missing qcom,lmh-temp-high-millicelsius property\n");
+ return ret;
+ }
+
+ ret = of_property_read_u32(np, "qcom,lmh-temp-low-millicelsius", &temp_low);
+ if (ret) {
+ dev_err(dev, "missing qcom,lmh-temp-low-millicelsius property\n");
+ return ret;
+ }
+
+ ret = of_property_read_u32(np, "qcom,lmh-temp-arm-millicelsius", &temp_arm);
+ if (ret) {
+ dev_err(dev, "missing qcom,lmh-temp-arm-millicelsius property\n");
+ return ret;
+ }
+
+ /*
+ * Only sdm845 has lmh hardware currently enabled from hlos. If this is needed
+ * for other platforms, revisit this to check if the <cpu-id, node-id> should be part
+ * of a dt match table.
+ */
+ if (cpu_id == 0) {
+ node_id = LMH_CLUSTER0_NODE_ID;
+ } else if (cpu_id == 4) {
+ node_id = LMH_CLUSTER1_NODE_ID;
+ } else {
+ dev_err(dev, "Wrong CPU id associated with LMh node\n");
+ return -EINVAL;
+ }
+
+ if (!qcom_scm_lmh_dcvsh_available())
+ return -EINVAL;
+
+ ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_CRNT, LMH_ALGO_MODE_ENABLE, 1,
+ LMH_NODE_DCVS, node_id, 0);
+ if (ret)
+ dev_err(dev, "Error %d enabling current subfunction\n", ret);
+
+ ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_REL, LMH_ALGO_MODE_ENABLE, 1,
+ LMH_NODE_DCVS, node_id, 0);
+ if (ret)
+ dev_err(dev, "Error %d enabling reliability subfunction\n", ret);
+
+ ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_BCL, LMH_ALGO_MODE_ENABLE, 1,
+ LMH_NODE_DCVS, node_id, 0);
+ if (ret)
+ dev_err(dev, "Error %d enabling BCL subfunction\n", ret);
+
+ ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_THERMAL, LMH_ALGO_MODE_ENABLE, 1,
+ LMH_NODE_DCVS, node_id, 0);
+ if (ret) {
+ dev_err(dev, "Error %d enabling thermal subfunction\n", ret);
+ return ret;
+ }
+
+ ret = qcom_scm_lmh_profile_change(0x1);
+ if (ret) {
+ dev_err(dev, "Error %d changing profile\n", ret);
+ return ret;
+ }
+
+ /* Set default thermal trips */
+ ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_THERMAL, LMH_TH_ARM_THRESHOLD, temp_arm,
+ LMH_NODE_DCVS, node_id, 0);
+ if (ret) {
+ dev_err(dev, "Error setting thermal ARM threshold%d\n", ret);
+ return ret;
+ }
+
+ ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_THERMAL, LMH_TH_HI_THRESHOLD, temp_high,
+ LMH_NODE_DCVS, node_id, 0);
+ if (ret) {
+ dev_err(dev, "Error setting thermal HI threshold%d\n", ret);
+ return ret;
+ }
+
+ ret = qcom_scm_lmh_dcvsh(LMH_SUB_FN_THERMAL, LMH_TH_LOW_THRESHOLD, temp_low,
+ LMH_NODE_DCVS, node_id, 0);
+ if (ret) {
+ dev_err(dev, "Error setting thermal ARM threshold%d\n", ret);
+ return ret;
+ }
+
+ lmh_data->irq = platform_get_irq(pdev, 0);
+ lmh_data->domain = irq_domain_add_linear(np, 1, &lmh_irq_ops, lmh_data);
+ if (!lmh_data->domain) {
+ dev_err(dev, "Error adding irq_domain\n");
+ return -EINVAL;
+ }
+
+ /* Disable the irq and let cpufreq enable it when ready to handle the interrupt */
+ irq_set_status_flags(lmh_data->irq, IRQ_NOAUTOEN);
+ ret = devm_request_irq(dev, lmh_data->irq, lmh_handle_irq,
+ IRQF_ONESHOT | IRQF_NO_SUSPEND,
+ "lmh-irq", lmh_data);
+ if (ret) {
+ dev_err(dev, "Error %d registering irq %x\n", ret, lmh_data->irq);
+ irq_domain_remove(lmh_data->domain);
+ return ret;
+ }
+
+ return 0;
+}
+
+static const struct of_device_id lmh_table[] = {
+ { .compatible = "qcom,sdm845-lmh", },
+ {}
+};
+MODULE_DEVICE_TABLE(of, lmh_table);
+
+static struct platform_driver lmh_driver = {
+ .probe = lmh_probe,
+ .driver = {
+ .name = "qcom-lmh",
+ .of_match_table = lmh_table,
+ .suppress_bind_attrs = true,
+ },
+};
+module_platform_driver(lmh_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("QCOM LMh driver");
&adc_tm->channels[i],
&adc_tm5_ops);
if (IS_ERR(tzd)) {
+ if (PTR_ERR(tzd) == -ENODEV) {
+ dev_warn(adc_tm->dev, "thermal sensor on channel %d is not used\n",
+ adc_tm->channels[i].channel);
+ continue;
+ }
+
dev_err(adc_tm->dev, "Error registering TZ zone for channel %d: %ld\n",
adc_tm->channels[i].channel, PTR_ERR(tzd));
return PTR_ERR(tzd);
struct thermal_zone_device *zone;
struct equation_coefs coef;
int tj_t;
- int id; /* thermal channel id */
+ unsigned int id; /* thermal channel id */
};
struct rcar_gen3_thermal_priv {
return 0;
}
-static const struct thermal_zone_of_device_ops rcar_gen3_tz_of_ops = {
+static int rcar_gen3_thermal_mcelsius_to_temp(struct rcar_gen3_thermal_tsc *tsc,
+ int mcelsius)
+{
+ int celsius, val;
+
+ celsius = DIV_ROUND_CLOSEST(mcelsius, 1000);
+ if (celsius <= INT_FIXPT(tsc->tj_t))
+ val = celsius * tsc->coef.a1 + tsc->coef.b1;
+ else
+ val = celsius * tsc->coef.a2 + tsc->coef.b2;
+
+ return INT_FIXPT(val);
+}
+
+static int rcar_gen3_thermal_set_trips(void *devdata, int low, int high)
+{
+ struct rcar_gen3_thermal_tsc *tsc = devdata;
+ u32 irqmsk = 0;
+
+ if (low != -INT_MAX) {
+ irqmsk |= IRQ_TEMPD1;
+ rcar_gen3_thermal_write(tsc, REG_GEN3_IRQTEMP1,
+ rcar_gen3_thermal_mcelsius_to_temp(tsc, low));
+ }
+
+ if (high != INT_MAX) {
+ irqmsk |= IRQ_TEMP2;
+ rcar_gen3_thermal_write(tsc, REG_GEN3_IRQTEMP2,
+ rcar_gen3_thermal_mcelsius_to_temp(tsc, high));
+ }
+
+ rcar_gen3_thermal_write(tsc, REG_GEN3_IRQMSK, irqmsk);
+
+ return 0;
+}
+
+static struct thermal_zone_of_device_ops rcar_gen3_tz_of_ops = {
.get_temp = rcar_gen3_thermal_get_temp,
+ .set_trips = rcar_gen3_thermal_set_trips,
};
+static irqreturn_t rcar_gen3_thermal_irq(int irq, void *data)
+{
+ struct rcar_gen3_thermal_priv *priv = data;
+ unsigned int i;
+ u32 status;
+
+ for (i = 0; i < priv->num_tscs; i++) {
+ status = rcar_gen3_thermal_read(priv->tscs[i], REG_GEN3_IRQSTR);
+ rcar_gen3_thermal_write(priv->tscs[i], REG_GEN3_IRQSTR, 0);
+ if (status)
+ thermal_zone_device_update(priv->tscs[i]->zone,
+ THERMAL_EVENT_UNSPECIFIED);
+ }
+
+ return IRQ_HANDLED;
+}
+
static const struct soc_device_attribute r8a7795es1[] = {
{ .soc_id = "r8a7795", .revision = "ES1.*" },
{ /* sentinel */ }
rcar_gen3_thermal_write(tsc, REG_GEN3_IRQCTL, 0x3F);
rcar_gen3_thermal_write(tsc, REG_GEN3_IRQMSK, 0);
+ if (tsc->zone->ops->set_trips)
+ rcar_gen3_thermal_write(tsc, REG_GEN3_IRQEN,
+ IRQ_TEMPD1 | IRQ_TEMP2);
rcar_gen3_thermal_write(tsc, REG_GEN3_CTSR,
CTSR_PONM | CTSR_AOUT | CTSR_THBGR | CTSR_VMEN);
rcar_gen3_thermal_write(tsc, REG_GEN3_IRQCTL, 0);
rcar_gen3_thermal_write(tsc, REG_GEN3_IRQMSK, 0);
+ if (tsc->zone->ops->set_trips)
+ rcar_gen3_thermal_write(tsc, REG_GEN3_IRQEN,
+ IRQ_TEMPD1 | IRQ_TEMP2);
reg_val = rcar_gen3_thermal_read(tsc, REG_GEN3_THCTR);
reg_val |= THCTR_THSST;
thermal_remove_hwmon_sysfs(zone);
}
+static int rcar_gen3_thermal_request_irqs(struct rcar_gen3_thermal_priv *priv,
+ struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ unsigned int i;
+ char *irqname;
+ int ret, irq;
+
+ for (i = 0; i < 2; i++) {
+ irq = platform_get_irq_optional(pdev, i);
+ if (irq < 0)
+ return irq;
+
+ irqname = devm_kasprintf(dev, GFP_KERNEL, "%s:ch%d",
+ dev_name(dev), i);
+ if (!irqname)
+ return -ENOMEM;
+
+ ret = devm_request_threaded_irq(dev, irq, NULL,
+ rcar_gen3_thermal_irq,
+ IRQF_ONESHOT, irqname, priv);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
static int rcar_gen3_thermal_probe(struct platform_device *pdev)
{
struct rcar_gen3_thermal_priv *priv;
const int *ths_tj_1 = of_device_get_match_data(dev);
struct resource *res;
struct thermal_zone_device *zone;
- int ret, i;
+ unsigned int i;
+ int ret;
/* default values if FUSEs are missing */
/* TODO: Read values from hardware on supported platforms */
platform_set_drvdata(pdev, priv);
+ if (rcar_gen3_thermal_request_irqs(priv, pdev))
+ rcar_gen3_tz_of_ops.set_trips = NULL;
+
pm_runtime_enable(dev);
pm_runtime_get_sync(dev);
priv->tscs[i] = tsc;
- priv->thermal_init(tsc);
- rcar_gen3_thermal_calc_coefs(tsc, ptat, thcodes[i], *ths_tj_1);
-
zone = devm_thermal_zone_of_sensor_register(dev, i, tsc,
&rcar_gen3_tz_of_ops);
if (IS_ERR(zone)) {
}
tsc->zone = zone;
+ priv->thermal_init(tsc);
+ rcar_gen3_thermal_calc_coefs(tsc, ptat, thcodes[i], *ths_tj_1);
+
tsc->zone->tzp->no_hwmon = false;
ret = thermal_add_hwmon_sysfs(tsc->zone);
if (ret)
if (ret < 0)
goto error_unregister;
- dev_info(dev, "TSC%d: Loaded %d trip points\n", i, ret);
+ dev_info(dev, "TSC%u: Loaded %d trip points\n", i, ret);
}
priv->num_tscs = i;
for (i = 0; i < priv->num_tscs; i++) {
struct rcar_gen3_thermal_tsc *tsc = priv->tscs[i];
+ struct thermal_zone_device *zone = tsc->zone;
priv->thermal_init(tsc);
+ if (zone->ops->set_trips)
+ rcar_gen3_thermal_set_trips(tsc, zone->prev_low_trip,
+ zone->prev_high_trip);
}
return 0;
data->sclk = devm_clk_get(&pdev->dev, "tmu_sclk");
if (IS_ERR(data->sclk)) {
dev_err(&pdev->dev, "Failed to get sclk\n");
+ ret = PTR_ERR(data->sclk);
goto err_clk;
} else {
ret = clk_prepare_enable(data->sclk);
# SPDX-License-Identifier: GPL-2.0-only
menu "NVIDIA Tegra thermal drivers"
-depends on ARCH_TEGRA
+depends on ARCH_TEGRA || COMPILE_TEST
config TEGRA_SOCTHERM
tristate "Tegra SOCTHERM thermal management"
Enable this option for support for sensing system temperature of NVIDIA
Tegra systems-on-chip with the BPMP coprocessor (Tegra186).
+config TEGRA30_TSENSOR
+ tristate "Tegra30 Thermal Sensor"
+ depends on ARCH_TEGRA_3x_SOC || COMPILE_TEST
+ help
+ Enable this option to support thermal management of NVIDIA Tegra30
+ system-on-chip.
+
endmenu
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_TEGRA_SOCTHERM) += tegra-soctherm.o
obj-$(CONFIG_TEGRA_BPMP_THERMAL) += tegra-bpmp-thermal.o
+obj-$(CONFIG_TEGRA30_TSENSOR) += tegra30-tsensor.o
tegra-soctherm-y := soctherm.o soctherm-fuse.o
tegra-soctherm-$(CONFIG_ARCH_TEGRA_124_SOC) += tegra124-soctherm.o
temp = clamp_val(trip_temp, min_low_temp, max_high_temp);
if (temp != trip_temp)
- dev_info(dev, "soctherm: trip temperature %d forced to %d\n",
- trip_temp, temp);
+ dev_dbg(dev, "soctherm: trip temperature %d forced to %d\n",
+ trip_temp, temp);
return temp;
}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tegra30 SoC Thermal Sensor driver
+ *
+ * Based on downstream HWMON driver from NVIDIA.
+ * Copyright (C) 2011 NVIDIA Corporation
+ *
+ * Author: Dmitry Osipenko <digetx@gmail.com>
+ * Copyright (C) 2021 GRATE-DRIVER project
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/math.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/reset.h>
+#include <linux/slab.h>
+#include <linux/thermal.h>
+#include <linux/types.h>
+
+#include <soc/tegra/fuse.h>
+
+#include "../thermal_core.h"
+#include "../thermal_hwmon.h"
+
+#define TSENSOR_SENSOR0_CONFIG0 0x0
+#define TSENSOR_SENSOR0_CONFIG0_SENSOR_STOP BIT(0)
+#define TSENSOR_SENSOR0_CONFIG0_HW_FREQ_DIV_EN BIT(1)
+#define TSENSOR_SENSOR0_CONFIG0_THERMAL_RST_EN BIT(2)
+#define TSENSOR_SENSOR0_CONFIG0_DVFS_EN BIT(3)
+#define TSENSOR_SENSOR0_CONFIG0_INTR_OVERFLOW_EN BIT(4)
+#define TSENSOR_SENSOR0_CONFIG0_INTR_HW_FREQ_DIV_EN BIT(5)
+#define TSENSOR_SENSOR0_CONFIG0_INTR_THERMAL_RST_EN BIT(6)
+#define TSENSOR_SENSOR0_CONFIG0_M GENMASK(23, 8)
+#define TSENSOR_SENSOR0_CONFIG0_N GENMASK(31, 24)
+
+#define TSENSOR_SENSOR0_CONFIG1 0x8
+#define TSENSOR_SENSOR0_CONFIG1_TH1 GENMASK(15, 0)
+#define TSENSOR_SENSOR0_CONFIG1_TH2 GENMASK(31, 16)
+
+#define TSENSOR_SENSOR0_CONFIG2 0xc
+#define TSENSOR_SENSOR0_CONFIG2_TH3 GENMASK(15, 0)
+
+#define TSENSOR_SENSOR0_STATUS0 0x18
+#define TSENSOR_SENSOR0_STATUS0_STATE GENMASK(2, 0)
+#define TSENSOR_SENSOR0_STATUS0_INTR BIT(8)
+#define TSENSOR_SENSOR0_STATUS0_CURRENT_VALID BIT(9)
+
+#define TSENSOR_SENSOR0_TS_STATUS1 0x1c
+#define TSENSOR_SENSOR0_TS_STATUS1_CURRENT_COUNT GENMASK(31, 16)
+
+#define TEGRA30_FUSE_TEST_PROG_VER 0x28
+
+#define TEGRA30_FUSE_TSENSOR_CALIB 0x98
+#define TEGRA30_FUSE_TSENSOR_CALIB_LOW GENMASK(15, 0)
+#define TEGRA30_FUSE_TSENSOR_CALIB_HIGH GENMASK(31, 16)
+
+#define TEGRA30_FUSE_SPARE_BIT 0x144
+
+struct tegra_tsensor;
+
+struct tegra_tsensor_calibration_data {
+ int a, b, m, n, p, r;
+};
+
+struct tegra_tsensor_channel {
+ void __iomem *regs;
+ unsigned int id;
+ struct tegra_tsensor *ts;
+ struct thermal_zone_device *tzd;
+};
+
+struct tegra_tsensor {
+ void __iomem *regs;
+ bool swap_channels;
+ struct clk *clk;
+ struct device *dev;
+ struct reset_control *rst;
+ struct tegra_tsensor_channel ch[2];
+ struct tegra_tsensor_calibration_data calib;
+};
+
+static int tegra_tsensor_hw_enable(const struct tegra_tsensor *ts)
+{
+ u32 val;
+ int err;
+
+ err = reset_control_assert(ts->rst);
+ if (err) {
+ dev_err(ts->dev, "failed to assert hardware reset: %d\n", err);
+ return err;
+ }
+
+ err = clk_prepare_enable(ts->clk);
+ if (err) {
+ dev_err(ts->dev, "failed to enable clock: %d\n", err);
+ return err;
+ }
+
+ fsleep(1000);
+
+ err = reset_control_deassert(ts->rst);
+ if (err) {
+ dev_err(ts->dev, "failed to deassert hardware reset: %d\n", err);
+ goto disable_clk;
+ }
+
+ /*
+ * Sensors are enabled after reset by default, but not gauging
+ * until clock counter is programmed.
+ *
+ * M: number of reference clock pulses after which every
+ * temperature / voltage measurement is made
+ *
+ * N: number of reference clock counts for which the counter runs
+ */
+ val = FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_M, 12500);
+ val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_N, 255);
+
+ /* apply the same configuration to both channels */
+ writel_relaxed(val, ts->regs + 0x40 + TSENSOR_SENSOR0_CONFIG0);
+ writel_relaxed(val, ts->regs + 0x80 + TSENSOR_SENSOR0_CONFIG0);
+
+ return 0;
+
+disable_clk:
+ clk_disable_unprepare(ts->clk);
+
+ return err;
+}
+
+static int tegra_tsensor_hw_disable(const struct tegra_tsensor *ts)
+{
+ int err;
+
+ err = reset_control_assert(ts->rst);
+ if (err) {
+ dev_err(ts->dev, "failed to assert hardware reset: %d\n", err);
+ return err;
+ }
+
+ clk_disable_unprepare(ts->clk);
+
+ return 0;
+}
+
+static void devm_tegra_tsensor_hw_disable(void *data)
+{
+ const struct tegra_tsensor *ts = data;
+
+ tegra_tsensor_hw_disable(ts);
+}
+
+static int tegra_tsensor_get_temp(void *data, int *temp)
+{
+ const struct tegra_tsensor_channel *tsc = data;
+ const struct tegra_tsensor *ts = tsc->ts;
+ int err, c1, c2, c3, c4, counter;
+ u32 val;
+
+ /*
+ * Counter will be invalid if hardware is misprogrammed or not enough
+ * time passed since the time when sensor was enabled.
+ */
+ err = readl_relaxed_poll_timeout(tsc->regs + TSENSOR_SENSOR0_STATUS0, val,
+ val & TSENSOR_SENSOR0_STATUS0_CURRENT_VALID,
+ 21 * USEC_PER_MSEC,
+ 21 * USEC_PER_MSEC * 50);
+ if (err) {
+ dev_err_once(ts->dev, "ch%u: counter invalid\n", tsc->id);
+ return err;
+ }
+
+ val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_TS_STATUS1);
+ counter = FIELD_GET(TSENSOR_SENSOR0_TS_STATUS1_CURRENT_COUNT, val);
+
+ /*
+ * This shouldn't happen with a valid counter status, nevertheless
+ * lets verify the value since it's in a separate (from status)
+ * register.
+ */
+ if (counter == 0xffff) {
+ dev_err_once(ts->dev, "ch%u: counter overflow\n", tsc->id);
+ return -EINVAL;
+ }
+
+ /*
+ * temperature = a * counter + b
+ * temperature = m * (temperature ^ 2) + n * temperature + p
+ */
+ c1 = DIV_ROUND_CLOSEST(ts->calib.a * counter + ts->calib.b, 1000000);
+ c1 = c1 ?: 1;
+ c2 = DIV_ROUND_CLOSEST(ts->calib.p, c1);
+ c3 = c1 * ts->calib.m;
+ c4 = ts->calib.n;
+
+ *temp = DIV_ROUND_CLOSEST(c1 * (c2 + c3 + c4), 1000);
+
+ return 0;
+}
+
+static int tegra_tsensor_temp_to_counter(const struct tegra_tsensor *ts, int temp)
+{
+ int c1, c2;
+
+ c1 = DIV_ROUND_CLOSEST(ts->calib.p - temp * 1000, ts->calib.m);
+ c2 = -ts->calib.r - int_sqrt(ts->calib.r * ts->calib.r - c1);
+
+ return DIV_ROUND_CLOSEST(c2 * 1000000 - ts->calib.b, ts->calib.a);
+}
+
+static int tegra_tsensor_set_trips(void *data, int low, int high)
+{
+ const struct tegra_tsensor_channel *tsc = data;
+ const struct tegra_tsensor *ts = tsc->ts;
+ u32 val;
+
+ /*
+ * TSENSOR doesn't trigger interrupt on the "low" temperature breach,
+ * hence bail out if high temperature is unspecified.
+ */
+ if (high == INT_MAX)
+ return 0;
+
+ val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_CONFIG1);
+ val &= ~TSENSOR_SENSOR0_CONFIG1_TH1;
+
+ high = tegra_tsensor_temp_to_counter(ts, high);
+ val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG1_TH1, high);
+ writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_CONFIG1);
+
+ return 0;
+}
+
+static const struct thermal_zone_of_device_ops ops = {
+ .get_temp = tegra_tsensor_get_temp,
+ .set_trips = tegra_tsensor_set_trips,
+};
+
+static bool
+tegra_tsensor_handle_channel_interrupt(const struct tegra_tsensor *ts,
+ unsigned int id)
+{
+ const struct tegra_tsensor_channel *tsc = &ts->ch[id];
+ u32 val;
+
+ val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_STATUS0);
+ writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_STATUS0);
+
+ if (FIELD_GET(TSENSOR_SENSOR0_STATUS0_STATE, val) == 5)
+ dev_err_ratelimited(ts->dev, "ch%u: counter overflowed\n", id);
+
+ if (!FIELD_GET(TSENSOR_SENSOR0_STATUS0_INTR, val))
+ return false;
+
+ thermal_zone_device_update(tsc->tzd, THERMAL_EVENT_UNSPECIFIED);
+
+ return true;
+}
+
+static irqreturn_t tegra_tsensor_isr(int irq, void *data)
+{
+ const struct tegra_tsensor *ts = data;
+ bool handled = false;
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(ts->ch); i++)
+ handled |= tegra_tsensor_handle_channel_interrupt(ts, i);
+
+ return handled ? IRQ_HANDLED : IRQ_NONE;
+}
+
+static int tegra_tsensor_disable_hw_channel(const struct tegra_tsensor *ts,
+ unsigned int id)
+{
+ const struct tegra_tsensor_channel *tsc = &ts->ch[id];
+ struct thermal_zone_device *tzd = tsc->tzd;
+ u32 val;
+ int err;
+
+ if (!tzd)
+ goto stop_channel;
+
+ err = thermal_zone_device_disable(tzd);
+ if (err) {
+ dev_err(ts->dev, "ch%u: failed to disable zone: %d\n", id, err);
+ return err;
+ }
+
+stop_channel:
+ /* stop channel gracefully */
+ val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_CONFIG0);
+ val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_SENSOR_STOP, 1);
+ writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_CONFIG0);
+
+ return 0;
+}
+
+static void tegra_tsensor_get_hw_channel_trips(struct thermal_zone_device *tzd,
+ int *hot_trip, int *crit_trip)
+{
+ unsigned int i;
+
+ /*
+ * 90C is the maximal critical temperature of all Tegra30 SoC variants,
+ * use it for the default trip if unspecified in a device-tree.
+ */
+ *hot_trip = 85000;
+ *crit_trip = 90000;
+
+ for (i = 0; i < tzd->trips; i++) {
+ enum thermal_trip_type type;
+ int trip_temp;
+
+ tzd->ops->get_trip_temp(tzd, i, &trip_temp);
+ tzd->ops->get_trip_type(tzd, i, &type);
+
+ if (type == THERMAL_TRIP_HOT)
+ *hot_trip = trip_temp;
+
+ if (type == THERMAL_TRIP_CRITICAL)
+ *crit_trip = trip_temp;
+ }
+
+ /* clamp hardware trips to the calibration limits */
+ *hot_trip = clamp(*hot_trip, 25000, 90000);
+
+ /*
+ * Kernel will perform a normal system shut down if it will
+ * see that critical temperature is breached, hence set the
+ * hardware limit by 5C higher in order to allow system to
+ * shut down gracefully before sending signal to the Power
+ * Management controller.
+ */
+ *crit_trip = clamp(*crit_trip + 5000, 25000, 90000);
+}
+
+static int tegra_tsensor_enable_hw_channel(const struct tegra_tsensor *ts,
+ unsigned int id)
+{
+ const struct tegra_tsensor_channel *tsc = &ts->ch[id];
+ struct thermal_zone_device *tzd = tsc->tzd;
+ int err, hot_trip = 0, crit_trip = 0;
+ u32 val;
+
+ if (!tzd) {
+ val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_CONFIG0);
+ val &= ~TSENSOR_SENSOR0_CONFIG0_SENSOR_STOP;
+ writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_CONFIG0);
+
+ return 0;
+ }
+
+ tegra_tsensor_get_hw_channel_trips(tzd, &hot_trip, &crit_trip);
+
+ /* prevent potential racing with tegra_tsensor_set_trips() */
+ mutex_lock(&tzd->lock);
+
+ dev_info_once(ts->dev, "ch%u: PMC emergency shutdown trip set to %dC\n",
+ id, DIV_ROUND_CLOSEST(crit_trip, 1000));
+
+ hot_trip = tegra_tsensor_temp_to_counter(ts, hot_trip);
+ crit_trip = tegra_tsensor_temp_to_counter(ts, crit_trip);
+
+ /* program LEVEL2 counter threshold */
+ val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_CONFIG1);
+ val &= ~TSENSOR_SENSOR0_CONFIG1_TH2;
+ val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG1_TH2, hot_trip);
+ writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_CONFIG1);
+
+ /* program LEVEL3 counter threshold */
+ val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_CONFIG2);
+ val &= ~TSENSOR_SENSOR0_CONFIG2_TH3;
+ val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG2_TH3, crit_trip);
+ writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_CONFIG2);
+
+ /*
+ * Enable sensor, emergency shutdown, interrupts for level 1/2/3
+ * breaches and counter overflow condition.
+ *
+ * Disable DIV2 throttle for now since we need to figure out how
+ * to integrate it properly with the thermal framework.
+ *
+ * Thermal levels supported by hardware:
+ *
+ * Level 0 = cold
+ * Level 1 = passive cooling (cpufreq DVFS)
+ * Level 2 = passive cooling assisted by hardware (DIV2)
+ * Level 3 = emergency shutdown assisted by hardware (PMC)
+ */
+ val = readl_relaxed(tsc->regs + TSENSOR_SENSOR0_CONFIG0);
+ val &= ~TSENSOR_SENSOR0_CONFIG0_SENSOR_STOP;
+ val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_DVFS_EN, 1);
+ val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_HW_FREQ_DIV_EN, 0);
+ val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_THERMAL_RST_EN, 1);
+ val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_INTR_OVERFLOW_EN, 1);
+ val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_INTR_HW_FREQ_DIV_EN, 1);
+ val |= FIELD_PREP(TSENSOR_SENSOR0_CONFIG0_INTR_THERMAL_RST_EN, 1);
+ writel_relaxed(val, tsc->regs + TSENSOR_SENSOR0_CONFIG0);
+
+ mutex_unlock(&tzd->lock);
+
+ err = thermal_zone_device_enable(tzd);
+ if (err) {
+ dev_err(ts->dev, "ch%u: failed to enable zone: %d\n", id, err);
+ return err;
+ }
+
+ return 0;
+}
+
+static bool tegra_tsensor_fuse_read_spare(unsigned int spare)
+{
+ u32 val = 0;
+
+ tegra_fuse_readl(TEGRA30_FUSE_SPARE_BIT + spare * 4, &val);
+
+ return !!val;
+}
+
+static int tegra_tsensor_nvmem_setup(struct tegra_tsensor *ts)
+{
+ u32 i, ate_ver = 0, cal = 0, t1_25C = 0, t2_90C = 0;
+ int err, c1_25C, c2_90C;
+
+ err = tegra_fuse_readl(TEGRA30_FUSE_TEST_PROG_VER, &ate_ver);
+ if (err) {
+ dev_err_probe(ts->dev, err, "failed to get ATE version\n");
+ return err;
+ }
+
+ if (ate_ver < 8) {
+ dev_info(ts->dev, "unsupported ATE version: %u\n", ate_ver);
+ return -ENODEV;
+ }
+
+ /*
+ * We have two TSENSOR channels in a two different spots on SoC.
+ * Second channel provides more accurate data on older SoC versions,
+ * use it as a primary channel.
+ */
+ if (ate_ver <= 21) {
+ dev_info_once(ts->dev,
+ "older ATE version detected, channels remapped\n");
+ ts->swap_channels = true;
+ }
+
+ err = tegra_fuse_readl(TEGRA30_FUSE_TSENSOR_CALIB, &cal);
+ if (err) {
+ dev_err(ts->dev, "failed to get calibration data: %d\n", err);
+ return err;
+ }
+
+ /* get calibrated counter values for 25C/90C thresholds */
+ c1_25C = FIELD_GET(TEGRA30_FUSE_TSENSOR_CALIB_LOW, cal);
+ c2_90C = FIELD_GET(TEGRA30_FUSE_TSENSOR_CALIB_HIGH, cal);
+
+ /* and calibrated temperatures corresponding to the counter values */
+ for (i = 0; i < 7; i++) {
+ t1_25C |= tegra_tsensor_fuse_read_spare(14 + i) << i;
+ t1_25C |= tegra_tsensor_fuse_read_spare(21 + i) << i;
+
+ t2_90C |= tegra_tsensor_fuse_read_spare(0 + i) << i;
+ t2_90C |= tegra_tsensor_fuse_read_spare(7 + i) << i;
+ }
+
+ if (c2_90C - c1_25C <= t2_90C - t1_25C) {
+ dev_err(ts->dev, "invalid calibration data: %d %d %u %u\n",
+ c2_90C, c1_25C, t2_90C, t1_25C);
+ return -EINVAL;
+ }
+
+ /* all calibration coefficients are premultiplied by 1000000 */
+
+ ts->calib.a = DIV_ROUND_CLOSEST((t2_90C - t1_25C) * 1000000,
+ (c2_90C - c1_25C));
+
+ ts->calib.b = t1_25C * 1000000 - ts->calib.a * c1_25C;
+
+ if (tegra_sku_info.revision == TEGRA_REVISION_A01) {
+ ts->calib.m = -2775;
+ ts->calib.n = 1338811;
+ ts->calib.p = -7300000;
+ } else {
+ ts->calib.m = -3512;
+ ts->calib.n = 1528943;
+ ts->calib.p = -11100000;
+ }
+
+ /* except the coefficient of a reduced quadratic equation */
+ ts->calib.r = DIV_ROUND_CLOSEST(ts->calib.n, ts->calib.m * 2);
+
+ dev_info_once(ts->dev,
+ "calibration: %d %d %u %u ATE ver: %u SoC rev: %u\n",
+ c2_90C, c1_25C, t2_90C, t1_25C, ate_ver,
+ tegra_sku_info.revision);
+
+ return 0;
+}
+
+static int tegra_tsensor_register_channel(struct tegra_tsensor *ts,
+ unsigned int id)
+{
+ struct tegra_tsensor_channel *tsc = &ts->ch[id];
+ unsigned int hw_id = ts->swap_channels ? !id : id;
+
+ tsc->ts = ts;
+ tsc->id = id;
+ tsc->regs = ts->regs + 0x40 * (hw_id + 1);
+
+ tsc->tzd = devm_thermal_zone_of_sensor_register(ts->dev, id, tsc, &ops);
+ if (IS_ERR(tsc->tzd)) {
+ if (PTR_ERR(tsc->tzd) != -ENODEV)
+ return dev_err_probe(ts->dev, PTR_ERR(tsc->tzd),
+ "failed to register thermal zone\n");
+
+ /*
+ * It's okay if sensor isn't assigned to any thermal zone
+ * in a device-tree.
+ */
+ tsc->tzd = NULL;
+ return 0;
+ }
+
+ if (devm_thermal_add_hwmon_sysfs(tsc->tzd))
+ dev_warn(ts->dev, "failed to add hwmon sysfs attributes\n");
+
+ return 0;
+}
+
+static int tegra_tsensor_probe(struct platform_device *pdev)
+{
+ struct tegra_tsensor *ts;
+ unsigned int i;
+ int err, irq;
+
+ ts = devm_kzalloc(&pdev->dev, sizeof(*ts), GFP_KERNEL);
+ if (!ts)
+ return -ENOMEM;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ ts->dev = &pdev->dev;
+ platform_set_drvdata(pdev, ts);
+
+ ts->regs = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(ts->regs))
+ return PTR_ERR(ts->regs);
+
+ ts->clk = devm_clk_get(&pdev->dev, NULL);
+ if (IS_ERR(ts->clk))
+ return dev_err_probe(&pdev->dev, PTR_ERR(ts->clk),
+ "failed to get clock\n");
+
+ ts->rst = devm_reset_control_get_exclusive(&pdev->dev, NULL);
+ if (IS_ERR(ts->rst))
+ return dev_err_probe(&pdev->dev, PTR_ERR(ts->rst),
+ "failed to get reset control\n");
+
+ err = tegra_tsensor_nvmem_setup(ts);
+ if (err)
+ return err;
+
+ err = tegra_tsensor_hw_enable(ts);
+ if (err)
+ return err;
+
+ err = devm_add_action_or_reset(&pdev->dev,
+ devm_tegra_tsensor_hw_disable,
+ ts);
+ if (err)
+ return err;
+
+ for (i = 0; i < ARRAY_SIZE(ts->ch); i++) {
+ err = tegra_tsensor_register_channel(ts, i);
+ if (err)
+ return err;
+ }
+
+ err = devm_request_threaded_irq(&pdev->dev, irq, NULL,
+ tegra_tsensor_isr, IRQF_ONESHOT,
+ "tegra_tsensor", ts);
+ if (err)
+ return dev_err_probe(&pdev->dev, err,
+ "failed to request interrupt\n");
+
+ for (i = 0; i < ARRAY_SIZE(ts->ch); i++) {
+ err = tegra_tsensor_enable_hw_channel(ts, i);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static int __maybe_unused tegra_tsensor_suspend(struct device *dev)
+{
+ struct tegra_tsensor *ts = dev_get_drvdata(dev);
+ unsigned int i;
+ int err;
+
+ for (i = 0; i < ARRAY_SIZE(ts->ch); i++) {
+ err = tegra_tsensor_disable_hw_channel(ts, i);
+ if (err)
+ goto enable_channel;
+ }
+
+ err = tegra_tsensor_hw_disable(ts);
+ if (err)
+ goto enable_channel;
+
+ return 0;
+
+enable_channel:
+ while (i--)
+ tegra_tsensor_enable_hw_channel(ts, i);
+
+ return err;
+}
+
+static int __maybe_unused tegra_tsensor_resume(struct device *dev)
+{
+ struct tegra_tsensor *ts = dev_get_drvdata(dev);
+ unsigned int i;
+ int err;
+
+ err = tegra_tsensor_hw_enable(ts);
+ if (err)
+ return err;
+
+ for (i = 0; i < ARRAY_SIZE(ts->ch); i++) {
+ err = tegra_tsensor_enable_hw_channel(ts, i);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static const struct dev_pm_ops tegra_tsensor_pm_ops = {
+ SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(tegra_tsensor_suspend,
+ tegra_tsensor_resume)
+};
+
+static const struct of_device_id tegra_tsensor_of_match[] = {
+ { .compatible = "nvidia,tegra30-tsensor", },
+ {},
+};
+MODULE_DEVICE_TABLE(of, tegra_tsensor_of_match);
+
+static struct platform_driver tegra_tsensor_driver = {
+ .probe = tegra_tsensor_probe,
+ .driver = {
+ .name = "tegra30-tsensor",
+ .of_match_table = tegra_tsensor_of_match,
+ .pm = &tegra_tsensor_pm_ops,
+ },
+};
+module_platform_driver(tegra_tsensor_driver);
+
+MODULE_DESCRIPTION("NVIDIA Tegra30 Thermal Sensor driver");
+MODULE_AUTHOR("Dmitry Osipenko <digetx@gmail.com>");
+MODULE_LICENSE("GPL");
vDPA block device simulator which terminates IO request in a
memory buffer.
+config VDPA_USER
+ tristate "VDUSE (vDPA Device in Userspace) support"
+ depends on EVENTFD && MMU && HAS_DMA
+ select DMA_OPS
+ select VHOST_IOTLB
+ select IOMMU_IOVA
+ help
+ With VDUSE it is possible to emulate a vDPA Device
+ in a userspace program.
+
config IFCVF
tristate "Intel IFC VF vDPA driver"
depends on PCI_MSI
config MLX5_VDPA_NET
tristate "vDPA driver for ConnectX devices"
select MLX5_VDPA
+ select VHOST_RING
depends on MLX5_CORE
help
VDPA network driver for ConnectX6 and newer. Provides offloading
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_VDPA) += vdpa.o
obj-$(CONFIG_VDPA_SIM) += vdpa_sim/
+obj-$(CONFIG_VDPA_USER) += vdpa_user/
obj-$(CONFIG_IFCVF) += ifcvf/
obj-$(CONFIG_MLX5_VDPA) += mlx5/
obj-$(CONFIG_VP_VDPA) += virtio_pci/
return -EIO;
}
- for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
+ hw->nr_vring = ifc_ioread16(&hw->common_cfg->num_queues);
+
+ for (i = 0; i < hw->nr_vring; i++) {
ifc_iowrite16(i, &hw->common_cfg->queue_select);
notify_off = ifc_ioread16(&hw->common_cfg->queue_notify_off);
hw->vring[i].notify_addr = hw->notify_base +
u32 q_pair_id;
ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg;
- q_pair_id = qid / (IFCVF_MAX_QUEUE_PAIRS * 2);
+ q_pair_id = qid / hw->nr_vring;
avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2];
last_avail_idx = ifc_ioread16(avail_idx_addr);
u32 q_pair_id;
ifcvf_lm = (struct ifcvf_lm_cfg __iomem *)hw->lm_cfg;
- q_pair_id = qid / (IFCVF_MAX_QUEUE_PAIRS * 2);
+ q_pair_id = qid / hw->nr_vring;
avail_idx_addr = &ifcvf_lm->vring_lm_cfg[q_pair_id].idx_addr[qid % 2];
hw->vring[qid].last_avail_idx = num;
ifc_iowrite16(num, avail_idx_addr);
#define N3000_DEVICE_ID 0x1041
#define N3000_SUBSYS_DEVICE_ID 0x001A
-#define IFCVF_NET_SUPPORTED_FEATURES \
- ((1ULL << VIRTIO_NET_F_MAC) | \
- (1ULL << VIRTIO_F_ANY_LAYOUT) | \
- (1ULL << VIRTIO_F_VERSION_1) | \
- (1ULL << VIRTIO_NET_F_STATUS) | \
- (1ULL << VIRTIO_F_ORDER_PLATFORM) | \
- (1ULL << VIRTIO_F_ACCESS_PLATFORM) | \
- (1ULL << VIRTIO_NET_F_MRG_RXBUF))
-
-/* Only one queue pair for now. */
-#define IFCVF_MAX_QUEUE_PAIRS 1
+/* Max 8 data queue pairs(16 queues) and one control vq for now. */
+#define IFCVF_MAX_QUEUES 17
#define IFCVF_QUEUE_ALIGNMENT PAGE_SIZE
#define IFCVF_QUEUE_MAX 32768
#define ifcvf_private_to_vf(adapter) \
(&((struct ifcvf_adapter *)adapter)->vf)
-#define IFCVF_MAX_INTR (IFCVF_MAX_QUEUE_PAIRS * 2 + 1)
-
struct vring_info {
u64 desc;
u64 avail;
u32 dev_type;
struct virtio_pci_common_cfg __iomem *common_cfg;
void __iomem *net_cfg;
- struct vring_info vring[IFCVF_MAX_QUEUE_PAIRS * 2];
+ struct vring_info vring[IFCVF_MAX_QUEUES];
void __iomem * const *base;
char config_msix_name[256];
struct vdpa_callback config_cb;
struct ifcvf_lm_cfg {
u8 reserved[IFCVF_LM_RING_STATE_OFFSET];
- struct ifcvf_vring_lm_cfg vring_lm_cfg[IFCVF_MAX_QUEUE_PAIRS];
+ struct ifcvf_vring_lm_cfg vring_lm_cfg[IFCVF_MAX_QUEUES];
+};
+
+struct ifcvf_vdpa_mgmt_dev {
+ struct vdpa_mgmt_dev mdev;
+ struct ifcvf_adapter *adapter;
+ struct pci_dev *pdev;
};
int ifcvf_init_hw(struct ifcvf_hw *hw, struct pci_dev *dev);
struct pci_dev *pdev = adapter->pdev;
struct ifcvf_hw *vf = &adapter->vf;
int vector, i, ret, irq;
+ u16 max_intr;
- ret = pci_alloc_irq_vectors(pdev, IFCVF_MAX_INTR,
- IFCVF_MAX_INTR, PCI_IRQ_MSIX);
+ /* all queues and config interrupt */
+ max_intr = vf->nr_vring + 1;
+
+ ret = pci_alloc_irq_vectors(pdev, max_intr,
+ max_intr, PCI_IRQ_MSIX);
if (ret < 0) {
IFCVF_ERR(pdev, "Failed to alloc IRQ vectors\n");
return ret;
return ret;
}
- for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
+ for (i = 0; i < vf->nr_vring; i++) {
snprintf(vf->vring[i].msix_name, 256, "ifcvf[%s]-%d\n",
pci_name(pdev), i);
vector = i + IFCVF_MSI_QUEUE_OFF;
u8 status;
int ret;
- vf->nr_vring = IFCVF_MAX_QUEUE_PAIRS * 2;
ret = ifcvf_start_hw(vf);
if (ret < 0) {
status = ifcvf_get_status(vf);
struct ifcvf_hw *vf = ifcvf_private_to_vf(private);
int i;
- for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++)
+ for (i = 0; i < vf->nr_vring; i++)
vf->vring[i].cb.callback = NULL;
ifcvf_stop_hw(vf);
struct ifcvf_hw *vf = ifcvf_private_to_vf(adapter);
int i;
- for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++) {
+ for (i = 0; i < vf->nr_vring; i++) {
vf->vring[i].last_avail_idx = 0;
vf->vring[i].desc = 0;
vf->vring[i].avail = 0;
struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev);
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
struct pci_dev *pdev = adapter->pdev;
-
+ u32 type = vf->dev_type;
u64 features;
- switch (vf->dev_type) {
- case VIRTIO_ID_NET:
- features = ifcvf_get_features(vf) & IFCVF_NET_SUPPORTED_FEATURES;
- break;
- case VIRTIO_ID_BLOCK:
+ if (type == VIRTIO_ID_NET || type == VIRTIO_ID_BLOCK)
features = ifcvf_get_features(vf);
- break;
- default:
+ else {
features = 0;
IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", vf->dev_type);
}
int ret;
vf = vdpa_to_vf(vdpa_dev);
- adapter = dev_get_drvdata(vdpa_dev->dev.parent);
+ adapter = vdpa_to_adapter(vdpa_dev);
status_old = ifcvf_get_status(vf);
if (status_old == status)
return;
- if ((status_old & VIRTIO_CONFIG_S_DRIVER_OK) &&
- !(status & VIRTIO_CONFIG_S_DRIVER_OK)) {
- ifcvf_stop_datapath(adapter);
- ifcvf_free_irq(adapter, IFCVF_MAX_QUEUE_PAIRS * 2);
- }
-
- if (status == 0) {
- ifcvf_reset_vring(adapter);
- return;
- }
-
if ((status & VIRTIO_CONFIG_S_DRIVER_OK) &&
!(status_old & VIRTIO_CONFIG_S_DRIVER_OK)) {
ret = ifcvf_request_irq(adapter);
ifcvf_set_status(vf, status);
}
+static int ifcvf_vdpa_reset(struct vdpa_device *vdpa_dev)
+{
+ struct ifcvf_adapter *adapter;
+ struct ifcvf_hw *vf;
+ u8 status_old;
+
+ vf = vdpa_to_vf(vdpa_dev);
+ adapter = vdpa_to_adapter(vdpa_dev);
+ status_old = ifcvf_get_status(vf);
+
+ if (status_old == 0)
+ return 0;
+
+ if (status_old & VIRTIO_CONFIG_S_DRIVER_OK) {
+ ifcvf_stop_datapath(adapter);
+ ifcvf_free_irq(adapter, vf->nr_vring);
+ }
+
+ ifcvf_reset_vring(adapter);
+
+ return 0;
+}
+
static u16 ifcvf_vdpa_get_vq_num_max(struct vdpa_device *vdpa_dev)
{
return IFCVF_QUEUE_MAX;
.set_features = ifcvf_vdpa_set_features,
.get_status = ifcvf_vdpa_get_status,
.set_status = ifcvf_vdpa_set_status,
+ .reset = ifcvf_vdpa_reset,
.get_vq_num_max = ifcvf_vdpa_get_vq_num_max,
.get_vq_state = ifcvf_vdpa_get_vq_state,
.set_vq_state = ifcvf_vdpa_set_vq_state,
.get_vq_notification = ifcvf_get_vq_notification,
};
-static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+static struct virtio_device_id id_table_net[] = {
+ {VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID},
+ {0},
+};
+
+static struct virtio_device_id id_table_blk[] = {
+ {VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID},
+ {0},
+};
+
+static u32 get_dev_type(struct pci_dev *pdev)
{
- struct device *dev = &pdev->dev;
- struct ifcvf_adapter *adapter;
- struct ifcvf_hw *vf;
- int ret, i;
+ u32 dev_type;
- ret = pcim_enable_device(pdev);
- if (ret) {
- IFCVF_ERR(pdev, "Failed to enable device\n");
- return ret;
- }
+ /* This drirver drives both modern virtio devices and transitional
+ * devices in modern mode.
+ * vDPA requires feature bit VIRTIO_F_ACCESS_PLATFORM,
+ * so legacy devices and transitional devices in legacy
+ * mode will not work for vDPA, this driver will not
+ * drive devices with legacy interface.
+ */
- ret = pcim_iomap_regions(pdev, BIT(0) | BIT(2) | BIT(4),
- IFCVF_DRIVER_NAME);
- if (ret) {
- IFCVF_ERR(pdev, "Failed to request MMIO region\n");
- return ret;
- }
+ if (pdev->device < 0x1040)
+ dev_type = pdev->subsystem_device;
+ else
+ dev_type = pdev->device - 0x1040;
- ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
- if (ret) {
- IFCVF_ERR(pdev, "No usable DMA configuration\n");
- return ret;
- }
+ return dev_type;
+}
- ret = devm_add_action_or_reset(dev, ifcvf_free_irq_vectors, pdev);
- if (ret) {
- IFCVF_ERR(pdev,
- "Failed for adding devres for freeing irq vectors\n");
- return ret;
- }
+static int ifcvf_vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
+{
+ struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
+ struct ifcvf_adapter *adapter;
+ struct pci_dev *pdev;
+ struct ifcvf_hw *vf;
+ struct device *dev;
+ int ret, i;
+ ifcvf_mgmt_dev = container_of(mdev, struct ifcvf_vdpa_mgmt_dev, mdev);
+ if (ifcvf_mgmt_dev->adapter)
+ return -EOPNOTSUPP;
+
+ pdev = ifcvf_mgmt_dev->pdev;
+ dev = &pdev->dev;
adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
- dev, &ifc_vdpa_ops, NULL);
+ dev, &ifc_vdpa_ops, name, false);
if (IS_ERR(adapter)) {
IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
return PTR_ERR(adapter);
}
- pci_set_master(pdev);
- pci_set_drvdata(pdev, adapter);
+ ifcvf_mgmt_dev->adapter = adapter;
+ pci_set_drvdata(pdev, ifcvf_mgmt_dev);
vf = &adapter->vf;
-
- /* This drirver drives both modern virtio devices and transitional
- * devices in modern mode.
- * vDPA requires feature bit VIRTIO_F_ACCESS_PLATFORM,
- * so legacy devices and transitional devices in legacy
- * mode will not work for vDPA, this driver will not
- * drive devices with legacy interface.
- */
- if (pdev->device < 0x1040)
- vf->dev_type = pdev->subsystem_device;
- else
- vf->dev_type = pdev->device - 0x1040;
-
+ vf->dev_type = get_dev_type(pdev);
vf->base = pcim_iomap_table(pdev);
adapter->pdev = pdev;
goto err;
}
- for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++)
+ for (i = 0; i < vf->nr_vring; i++)
vf->vring[i].irq = -EINVAL;
vf->hw_features = ifcvf_get_hw_features(vf);
- ret = vdpa_register_device(&adapter->vdpa, IFCVF_MAX_QUEUE_PAIRS * 2);
+ adapter->vdpa.mdev = &ifcvf_mgmt_dev->mdev;
+ ret = _vdpa_register_device(&adapter->vdpa, vf->nr_vring);
if (ret) {
- IFCVF_ERR(pdev, "Failed to register ifcvf to vdpa bus");
+ IFCVF_ERR(pdev, "Failed to register to vDPA bus");
goto err;
}
return ret;
}
+static void ifcvf_vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
+{
+ struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
+
+ ifcvf_mgmt_dev = container_of(mdev, struct ifcvf_vdpa_mgmt_dev, mdev);
+ _vdpa_unregister_device(dev);
+ ifcvf_mgmt_dev->adapter = NULL;
+}
+
+static const struct vdpa_mgmtdev_ops ifcvf_vdpa_mgmt_dev_ops = {
+ .dev_add = ifcvf_vdpa_dev_add,
+ .dev_del = ifcvf_vdpa_dev_del
+};
+
+static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
+ struct device *dev = &pdev->dev;
+ u32 dev_type;
+ int ret;
+
+ ifcvf_mgmt_dev = kzalloc(sizeof(struct ifcvf_vdpa_mgmt_dev), GFP_KERNEL);
+ if (!ifcvf_mgmt_dev) {
+ IFCVF_ERR(pdev, "Failed to alloc memory for the vDPA management device\n");
+ return -ENOMEM;
+ }
+
+ dev_type = get_dev_type(pdev);
+ switch (dev_type) {
+ case VIRTIO_ID_NET:
+ ifcvf_mgmt_dev->mdev.id_table = id_table_net;
+ break;
+ case VIRTIO_ID_BLOCK:
+ ifcvf_mgmt_dev->mdev.id_table = id_table_blk;
+ break;
+ default:
+ IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", dev_type);
+ ret = -EOPNOTSUPP;
+ goto err;
+ }
+
+ ifcvf_mgmt_dev->mdev.ops = &ifcvf_vdpa_mgmt_dev_ops;
+ ifcvf_mgmt_dev->mdev.device = dev;
+ ifcvf_mgmt_dev->pdev = pdev;
+
+ ret = pcim_enable_device(pdev);
+ if (ret) {
+ IFCVF_ERR(pdev, "Failed to enable device\n");
+ goto err;
+ }
+
+ ret = pcim_iomap_regions(pdev, BIT(0) | BIT(2) | BIT(4),
+ IFCVF_DRIVER_NAME);
+ if (ret) {
+ IFCVF_ERR(pdev, "Failed to request MMIO region\n");
+ goto err;
+ }
+
+ ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
+ if (ret) {
+ IFCVF_ERR(pdev, "No usable DMA configuration\n");
+ goto err;
+ }
+
+ ret = devm_add_action_or_reset(dev, ifcvf_free_irq_vectors, pdev);
+ if (ret) {
+ IFCVF_ERR(pdev,
+ "Failed for adding devres for freeing irq vectors\n");
+ goto err;
+ }
+
+ pci_set_master(pdev);
+
+ ret = vdpa_mgmtdev_register(&ifcvf_mgmt_dev->mdev);
+ if (ret) {
+ IFCVF_ERR(pdev,
+ "Failed to initialize the management interfaces\n");
+ goto err;
+ }
+
+ return 0;
+
+err:
+ kfree(ifcvf_mgmt_dev);
+ return ret;
+}
+
static void ifcvf_remove(struct pci_dev *pdev)
{
- struct ifcvf_adapter *adapter = pci_get_drvdata(pdev);
+ struct ifcvf_vdpa_mgmt_dev *ifcvf_mgmt_dev;
- vdpa_unregister_device(&adapter->vdpa);
+ ifcvf_mgmt_dev = pci_get_drvdata(pdev);
+ vdpa_mgmtdev_unregister(&ifcvf_mgmt_dev->mdev);
+ kfree(ifcvf_mgmt_dev);
}
static struct pci_device_id ifcvf_pci_ids[] = {
#define __MLX5_VDPA_H__
#include <linux/etherdevice.h>
-#include <linux/if_vlan.h>
+#include <linux/vringh.h>
#include <linux/vdpa.h>
#include <linux/mlx5/driver.h>
bool valid;
};
+struct mlx5_control_vq {
+ struct vhost_iotlb *iotlb;
+ /* spinlock to synchronize iommu table */
+ spinlock_t iommu_lock;
+ struct vringh vring;
+ bool ready;
+ u64 desc_addr;
+ u64 device_addr;
+ u64 driver_addr;
+ struct vdpa_callback event_cb;
+ struct vringh_kiov riov;
+ struct vringh_kiov wiov;
+ unsigned short head;
+};
+
+struct mlx5_ctrl_wq_ent {
+ struct work_struct work;
+ struct mlx5_vdpa_dev *mvdev;
+};
+
struct mlx5_vdpa_dev {
struct vdpa_device vdev;
struct mlx5_core_dev *mdev;
u64 actual_features;
u8 status;
u32 max_vqs;
+ u16 max_idx;
u32 generation;
struct mlx5_vdpa_mr mr;
+ struct mlx5_control_vq cvq;
+ struct workqueue_struct *wq;
};
int mlx5_vdpa_alloc_pd(struct mlx5_vdpa_dev *dev, u32 *pdn, u16 uid);
int mlx5_vdpa_create_tis(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tisn);
void mlx5_vdpa_destroy_tis(struct mlx5_vdpa_dev *mvdev, u32 tisn);
int mlx5_vdpa_create_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 *rqtn);
+int mlx5_vdpa_modify_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 rqtn);
void mlx5_vdpa_destroy_rqt(struct mlx5_vdpa_dev *mvdev, u32 rqtn);
int mlx5_vdpa_create_tir(struct mlx5_vdpa_dev *mvdev, void *in, u32 *tirn);
void mlx5_vdpa_destroy_tir(struct mlx5_vdpa_dev *mvdev, u32 tirn);
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2020 Mellanox Technologies Ltd. */
+#include <linux/vhost_types.h>
#include <linux/vdpa.h>
#include <linux/gcd.h>
#include <linux/string.h>
mlx5_vdpa_destroy_mkey(mvdev, &mr->mkey);
}
-static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
+static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src)
{
- struct mlx5_vdpa_mr *mr = &mvdev->mr;
+ struct vhost_iotlb_map *map;
+ u64 start = 0, last = ULLONG_MAX;
int err;
- if (mr->initialized)
- return 0;
-
- if (iotlb)
- err = create_user_mr(mvdev, iotlb);
- else
- err = create_dma_mr(mvdev, mr);
-
- if (!err)
- mr->initialized = true;
+ if (!src) {
+ err = vhost_iotlb_add_range(mvdev->cvq.iotlb, start, last, start, VHOST_ACCESS_RW);
+ return err;
+ }
- return err;
+ for (map = vhost_iotlb_itree_first(src, start, last); map;
+ map = vhost_iotlb_itree_next(map, start, last)) {
+ err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start, map->last,
+ map->addr, map->perm);
+ if (err)
+ return err;
+ }
+ return 0;
}
-int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
+static void prune_iotlb(struct mlx5_vdpa_dev *mvdev)
{
- int err;
-
- mutex_lock(&mvdev->mr.mkey_mtx);
- err = _mlx5_vdpa_create_mr(mvdev, iotlb);
- mutex_unlock(&mvdev->mr.mkey_mtx);
- return err;
+ vhost_iotlb_del_range(mvdev->cvq.iotlb, 0, ULLONG_MAX);
}
static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
if (!mr->initialized)
goto out;
+ prune_iotlb(mvdev);
if (mr->user_mr)
destroy_user_mr(mvdev, mr);
else
mutex_unlock(&mr->mkey_mtx);
}
+static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
+{
+ struct mlx5_vdpa_mr *mr = &mvdev->mr;
+ int err;
+
+ if (mr->initialized)
+ return 0;
+
+ if (iotlb)
+ err = create_user_mr(mvdev, iotlb);
+ else
+ err = create_dma_mr(mvdev, mr);
+
+ if (err)
+ return err;
+
+ err = dup_iotlb(mvdev, iotlb);
+ if (err)
+ goto out_err;
+
+ mr->initialized = true;
+ return 0;
+
+out_err:
+ if (iotlb)
+ destroy_user_mr(mvdev, mr);
+ else
+ destroy_dma_mr(mvdev, mr);
+
+ return err;
+}
+
+int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
+{
+ int err;
+
+ mutex_lock(&mvdev->mr.mkey_mtx);
+ err = _mlx5_vdpa_create_mr(mvdev, iotlb);
+ mutex_unlock(&mvdev->mr.mkey_mtx);
+ return err;
+}
+
int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
bool *change_map)
{
// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
/* Copyright (c) 2020 Mellanox Technologies Ltd. */
+#include <linux/iova.h>
#include <linux/mlx5/driver.h>
#include "mlx5_vdpa.h"
return err;
}
+int mlx5_vdpa_modify_rqt(struct mlx5_vdpa_dev *mvdev, void *in, int inlen, u32 rqtn)
+{
+ u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {};
+
+ MLX5_SET(modify_rqt_in, in, uid, mvdev->res.uid);
+ MLX5_SET(modify_rqt_in, in, rqtn, rqtn);
+ MLX5_SET(modify_rqt_in, in, opcode, MLX5_CMD_OP_MODIFY_RQT);
+ return mlx5_cmd_exec(mvdev->mdev, in, inlen, out, sizeof(out));
+}
+
void mlx5_vdpa_destroy_rqt(struct mlx5_vdpa_dev *mvdev, u32 rqtn)
{
u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {};
return mlx5_cmd_exec_in(mvdev->mdev, destroy_mkey, in);
}
+static int init_ctrl_vq(struct mlx5_vdpa_dev *mvdev)
+{
+ mvdev->cvq.iotlb = vhost_iotlb_alloc(0, 0);
+ if (!mvdev->cvq.iotlb)
+ return -ENOMEM;
+
+ vringh_set_iotlb(&mvdev->cvq.vring, mvdev->cvq.iotlb, &mvdev->cvq.iommu_lock);
+
+ return 0;
+}
+
+static void cleanup_ctrl_vq(struct mlx5_vdpa_dev *mvdev)
+{
+ vhost_iotlb_free(mvdev->cvq.iotlb);
+}
+
int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev)
{
u64 offset = MLX5_CAP64_DEV_VDPA_EMULATION(mvdev->mdev, doorbell_bar_offset);
err = -ENOMEM;
goto err_key;
}
+
+ err = init_ctrl_vq(mvdev);
+ if (err)
+ goto err_ctrl;
+
res->valid = true;
return 0;
+err_ctrl:
+ iounmap(res->kick_addr);
err_key:
dealloc_pd(mvdev, res->pdn, res->uid);
err_pd:
if (!res->valid)
return;
+ cleanup_ctrl_vq(mvdev);
iounmap(res->kick_addr);
res->kick_addr = NULL;
dealloc_pd(mvdev, res->pdn, res->uid);
(VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER | VIRTIO_CONFIG_S_DRIVER_OK | \
VIRTIO_CONFIG_S_FEATURES_OK | VIRTIO_CONFIG_S_NEEDS_RESET | VIRTIO_CONFIG_S_FAILED)
+#define MLX5_FEATURE(_mvdev, _feature) (!!((_mvdev)->actual_features & BIT_ULL(_feature)))
+
struct mlx5_vdpa_net_resources {
u32 tisn;
u32 tdn;
u16 avail_index;
u16 used_index;
bool ready;
- struct vdpa_callback cb;
bool restore;
};
u64 device_addr;
u64 driver_addr;
u32 num_ent;
- struct vdpa_callback event_cb;
/* Resources for implementing the notification channel from the device
* to the driver. fwqp is the firmware end of an RC connection; the
*/
#define MLX5_MAX_SUPPORTED_VQS 16
+static bool is_index_valid(struct mlx5_vdpa_dev *mvdev, u16 idx)
+{
+ if (unlikely(idx > mvdev->max_idx))
+ return false;
+
+ return true;
+}
+
struct mlx5_vdpa_net {
struct mlx5_vdpa_dev mvdev;
struct mlx5_vdpa_net_resources res;
struct virtio_net_config config;
struct mlx5_vdpa_virtqueue vqs[MLX5_MAX_SUPPORTED_VQS];
+ struct vdpa_callback event_cbs[MLX5_MAX_SUPPORTED_VQS + 1];
/* Serialize vq resources creation and destruction. This is required
* since memory map might change and we need to destroy and create
struct mlx5_flow_handle *rx_rule;
bool setup;
u16 mtu;
+ u32 cur_num_vqs;
};
static void free_resources(struct mlx5_vdpa_net *ndev);
static void init_mvqs(struct mlx5_vdpa_net *ndev);
-static int setup_driver(struct mlx5_vdpa_net *ndev);
+static int setup_driver(struct mlx5_vdpa_dev *mvdev);
static void teardown_driver(struct mlx5_vdpa_net *ndev);
static bool mlx5_vdpa_debug;
+#define MLX5_CVQ_MAX_ENT 16
+
#define MLX5_LOG_VIO_FLAG(_feature) \
do { \
if (features & BIT_ULL(_feature)) \
mlx5_vdpa_info(mvdev, "%s\n", #_status); \
} while (0)
+/* TODO: cross-endian support */
+static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
+{
+ return virtio_legacy_is_little_endian() ||
+ (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
+}
+
+static u16 mlx5vdpa16_to_cpu(struct mlx5_vdpa_dev *mvdev, __virtio16 val)
+{
+ return __virtio16_to_cpu(mlx5_vdpa_is_little_endian(mvdev), val);
+}
+
+static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
+{
+ return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
+}
+
static inline u32 mlx5_vdpa_max_qps(int max_vqs)
{
return max_vqs / 2;
}
+static u16 ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev)
+{
+ if (!(mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_MQ)))
+ return 2;
+
+ return 2 * mlx5_vdpa_max_qps(mvdev->max_vqs);
+}
+
+static bool is_ctrl_vq_idx(struct mlx5_vdpa_dev *mvdev, u16 idx)
+{
+ return idx == ctrl_vq_idx(mvdev);
+}
+
static void print_status(struct mlx5_vdpa_dev *mvdev, u8 status, bool set)
{
if (status & ~VALID_STATUS_MASK)
static void mlx5_vdpa_handle_completions(struct mlx5_vdpa_virtqueue *mvq, int num)
{
+ struct mlx5_vdpa_net *ndev = mvq->ndev;
+ struct vdpa_callback *event_cb;
+
+ event_cb = &ndev->event_cbs[mvq->index];
mlx5_cq_set_ci(&mvq->cq.mcq);
/* make sure CQ cosumer update is visible to the hardware before updating
*/
dma_wmb();
rx_post(&mvq->vqqp, num);
- if (mvq->event_cb.callback)
- mvq->event_cb.callback(mvq->event_cb.private);
+ if (event_cb->callback)
+ event_cb->callback(event_cb->private);
}
static void mlx5_vdpa_cq_comp(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe)
if (!mvq->num_ent)
return 0;
- if (mvq->initialized) {
- mlx5_vdpa_warn(&ndev->mvdev, "attempt re init\n");
- return -EINVAL;
- }
+ if (mvq->initialized)
+ return 0;
err = cq_create(ndev, idx, mvq->num_ent);
if (err)
static int create_rqt(struct mlx5_vdpa_net *ndev)
{
- int log_max_rqt;
__be32 *list;
+ int max_rqt;
void *rqtc;
int inlen;
void *in;
int i, j;
int err;
- log_max_rqt = min_t(int, 1, MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
- if (log_max_rqt < 1)
+ max_rqt = min_t(int, MLX5_MAX_SUPPORTED_VQS / 2,
+ 1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
+ if (max_rqt < 1)
return -EOPNOTSUPP;
- inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + (1 << log_max_rqt) * MLX5_ST_SZ_BYTES(rq_num);
+ inlen = MLX5_ST_SZ_BYTES(create_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num);
in = kzalloc(inlen, GFP_KERNEL);
if (!in)
return -ENOMEM;
rqtc = MLX5_ADDR_OF(create_rqt_in, in, rqt_context);
MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
- MLX5_SET(rqtc, rqtc, rqt_max_size, 1 << log_max_rqt);
- MLX5_SET(rqtc, rqtc, rqt_actual_size, 1);
+ MLX5_SET(rqtc, rqtc, rqt_max_size, max_rqt);
list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
- for (i = 0, j = 0; j < ndev->mvdev.max_vqs; j++) {
+ for (i = 0, j = 0; j < max_rqt; j++) {
if (!ndev->vqs[j].initialized)
continue;
i++;
}
}
+ MLX5_SET(rqtc, rqtc, rqt_actual_size, i);
err = mlx5_vdpa_create_rqt(&ndev->mvdev, in, inlen, &ndev->res.rqtn);
kfree(in);
return 0;
}
+#define MLX5_MODIFY_RQT_NUM_RQS ((u64)1)
+
+static int modify_rqt(struct mlx5_vdpa_net *ndev, int num)
+{
+ __be32 *list;
+ int max_rqt;
+ void *rqtc;
+ int inlen;
+ void *in;
+ int i, j;
+ int err;
+
+ max_rqt = min_t(int, ndev->cur_num_vqs / 2,
+ 1 << MLX5_CAP_GEN(ndev->mvdev.mdev, log_max_rqt_size));
+ if (max_rqt < 1)
+ return -EOPNOTSUPP;
+
+ inlen = MLX5_ST_SZ_BYTES(modify_rqt_in) + max_rqt * MLX5_ST_SZ_BYTES(rq_num);
+ in = kzalloc(inlen, GFP_KERNEL);
+ if (!in)
+ return -ENOMEM;
+
+ MLX5_SET(modify_rqt_in, in, uid, ndev->mvdev.res.uid);
+ MLX5_SET64(modify_rqt_in, in, bitmask, MLX5_MODIFY_RQT_NUM_RQS);
+ rqtc = MLX5_ADDR_OF(modify_rqt_in, in, ctx);
+ MLX5_SET(rqtc, rqtc, list_q_type, MLX5_RQTC_LIST_Q_TYPE_VIRTIO_NET_Q);
+
+ list = MLX5_ADDR_OF(rqtc, rqtc, rq_num[0]);
+ for (i = 0, j = 0; j < num; j++) {
+ if (!ndev->vqs[j].initialized)
+ continue;
+
+ if (!vq_is_tx(ndev->vqs[j].index)) {
+ list[i] = cpu_to_be32(ndev->vqs[j].virtq_id);
+ i++;
+ }
+ }
+ MLX5_SET(rqtc, rqtc, rqt_actual_size, i);
+ err = mlx5_vdpa_modify_rqt(&ndev->mvdev, in, inlen, ndev->res.rqtn);
+ kfree(in);
+ if (err)
+ return err;
+
+ return 0;
+}
+
static void destroy_rqt(struct mlx5_vdpa_net *ndev)
{
mlx5_vdpa_destroy_rqt(&ndev->mvdev, ndev->res.rqtn);
ndev->rx_rule = NULL;
}
+static virtio_net_ctrl_ack handle_ctrl_mac(struct mlx5_vdpa_dev *mvdev, u8 cmd)
+{
+ struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+ struct mlx5_control_vq *cvq = &mvdev->cvq;
+ virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
+ struct mlx5_core_dev *pfmdev;
+ size_t read;
+ u8 mac[ETH_ALEN];
+
+ pfmdev = pci_get_drvdata(pci_physfn(mvdev->mdev->pdev));
+ switch (cmd) {
+ case VIRTIO_NET_CTRL_MAC_ADDR_SET:
+ read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)mac, ETH_ALEN);
+ if (read != ETH_ALEN)
+ break;
+
+ if (!memcmp(ndev->config.mac, mac, 6)) {
+ status = VIRTIO_NET_OK;
+ break;
+ }
+
+ if (!is_zero_ether_addr(ndev->config.mac)) {
+ if (mlx5_mpfs_del_mac(pfmdev, ndev->config.mac)) {
+ mlx5_vdpa_warn(mvdev, "failed to delete old MAC %pM from MPFS table\n",
+ ndev->config.mac);
+ break;
+ }
+ }
+
+ if (mlx5_mpfs_add_mac(pfmdev, mac)) {
+ mlx5_vdpa_warn(mvdev, "failed to insert new MAC %pM into MPFS table\n",
+ mac);
+ break;
+ }
+
+ memcpy(ndev->config.mac, mac, ETH_ALEN);
+ status = VIRTIO_NET_OK;
+ break;
+
+ default:
+ break;
+ }
+
+ return status;
+}
+
+static int change_num_qps(struct mlx5_vdpa_dev *mvdev, int newqps)
+{
+ struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+ int cur_qps = ndev->cur_num_vqs / 2;
+ int err;
+ int i;
+
+ if (cur_qps > newqps) {
+ err = modify_rqt(ndev, 2 * newqps);
+ if (err)
+ return err;
+
+ for (i = ndev->cur_num_vqs - 1; i >= 2 * newqps; i--)
+ teardown_vq(ndev, &ndev->vqs[i]);
+
+ ndev->cur_num_vqs = 2 * newqps;
+ } else {
+ ndev->cur_num_vqs = 2 * newqps;
+ for (i = cur_qps * 2; i < 2 * newqps; i++) {
+ err = setup_vq(ndev, &ndev->vqs[i]);
+ if (err)
+ goto clean_added;
+ }
+ err = modify_rqt(ndev, 2 * newqps);
+ if (err)
+ goto clean_added;
+ }
+ return 0;
+
+clean_added:
+ for (--i; i >= cur_qps; --i)
+ teardown_vq(ndev, &ndev->vqs[i]);
+
+ return err;
+}
+
+static virtio_net_ctrl_ack handle_ctrl_mq(struct mlx5_vdpa_dev *mvdev, u8 cmd)
+{
+ struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+ virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
+ struct mlx5_control_vq *cvq = &mvdev->cvq;
+ struct virtio_net_ctrl_mq mq;
+ size_t read;
+ u16 newqps;
+
+ switch (cmd) {
+ case VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET:
+ read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, (void *)&mq, sizeof(mq));
+ if (read != sizeof(mq))
+ break;
+
+ newqps = mlx5vdpa16_to_cpu(mvdev, mq.virtqueue_pairs);
+ if (ndev->cur_num_vqs == 2 * newqps) {
+ status = VIRTIO_NET_OK;
+ break;
+ }
+
+ if (newqps & (newqps - 1))
+ break;
+
+ if (!change_num_qps(mvdev, newqps))
+ status = VIRTIO_NET_OK;
+
+ break;
+ default:
+ break;
+ }
+
+ return status;
+}
+
+static void mlx5_cvq_kick_handler(struct work_struct *work)
+{
+ virtio_net_ctrl_ack status = VIRTIO_NET_ERR;
+ struct virtio_net_ctrl_hdr ctrl;
+ struct mlx5_ctrl_wq_ent *wqent;
+ struct mlx5_vdpa_dev *mvdev;
+ struct mlx5_control_vq *cvq;
+ struct mlx5_vdpa_net *ndev;
+ size_t read, write;
+ int err;
+
+ wqent = container_of(work, struct mlx5_ctrl_wq_ent, work);
+ mvdev = wqent->mvdev;
+ ndev = to_mlx5_vdpa_ndev(mvdev);
+ cvq = &mvdev->cvq;
+ if (!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)))
+ goto out;
+
+ if (!cvq->ready)
+ goto out;
+
+ while (true) {
+ err = vringh_getdesc_iotlb(&cvq->vring, &cvq->riov, &cvq->wiov, &cvq->head,
+ GFP_ATOMIC);
+ if (err <= 0)
+ break;
+
+ read = vringh_iov_pull_iotlb(&cvq->vring, &cvq->riov, &ctrl, sizeof(ctrl));
+ if (read != sizeof(ctrl))
+ break;
+
+ switch (ctrl.class) {
+ case VIRTIO_NET_CTRL_MAC:
+ status = handle_ctrl_mac(mvdev, ctrl.cmd);
+ break;
+ case VIRTIO_NET_CTRL_MQ:
+ status = handle_ctrl_mq(mvdev, ctrl.cmd);
+ break;
+
+ default:
+ break;
+ }
+
+ /* Make sure data is written before advancing index */
+ smp_wmb();
+
+ write = vringh_iov_push_iotlb(&cvq->vring, &cvq->wiov, &status, sizeof(status));
+ vringh_complete_iotlb(&cvq->vring, cvq->head, write);
+ vringh_kiov_cleanup(&cvq->riov);
+ vringh_kiov_cleanup(&cvq->wiov);
+
+ if (vringh_need_notify_iotlb(&cvq->vring))
+ vringh_notify(&cvq->vring);
+ }
+out:
+ kfree(wqent);
+}
+
static void mlx5_vdpa_kick_vq(struct vdpa_device *vdev, u16 idx)
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
- struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
+ struct mlx5_vdpa_virtqueue *mvq;
+ struct mlx5_ctrl_wq_ent *wqent;
+
+ if (!is_index_valid(mvdev, idx))
+ return;
+
+ if (unlikely(is_ctrl_vq_idx(mvdev, idx))) {
+ if (!mvdev->cvq.ready)
+ return;
+
+ wqent = kzalloc(sizeof(*wqent), GFP_ATOMIC);
+ if (!wqent)
+ return;
+ wqent->mvdev = mvdev;
+ INIT_WORK(&wqent->work, mlx5_cvq_kick_handler);
+ queue_work(mvdev->wq, &wqent->work);
+ return;
+ }
+
+ mvq = &ndev->vqs[idx];
if (unlikely(!mvq->ready))
return;
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
- struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
+ struct mlx5_vdpa_virtqueue *mvq;
+
+ if (!is_index_valid(mvdev, idx))
+ return -EINVAL;
+ if (is_ctrl_vq_idx(mvdev, idx)) {
+ mvdev->cvq.desc_addr = desc_area;
+ mvdev->cvq.device_addr = device_area;
+ mvdev->cvq.driver_addr = driver_area;
+ return 0;
+ }
+
+ mvq = &ndev->vqs[idx];
mvq->desc_addr = desc_area;
mvq->device_addr = device_area;
mvq->driver_addr = driver_area;
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
struct mlx5_vdpa_virtqueue *mvq;
+ if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
+ return;
+
mvq = &ndev->vqs[idx];
mvq->num_ent = num;
}
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
- struct mlx5_vdpa_virtqueue *vq = &ndev->vqs[idx];
- vq->event_cb = *cb;
+ ndev->event_cbs[idx] = *cb;
+}
+
+static void mlx5_cvq_notify(struct vringh *vring)
+{
+ struct mlx5_control_vq *cvq = container_of(vring, struct mlx5_control_vq, vring);
+
+ if (!cvq->event_cb.callback)
+ return;
+
+ cvq->event_cb.callback(cvq->event_cb.private);
+}
+
+static void set_cvq_ready(struct mlx5_vdpa_dev *mvdev, bool ready)
+{
+ struct mlx5_control_vq *cvq = &mvdev->cvq;
+
+ cvq->ready = ready;
+ if (!ready)
+ return;
+
+ cvq->vring.notify = mlx5_cvq_notify;
}
static void mlx5_vdpa_set_vq_ready(struct vdpa_device *vdev, u16 idx, bool ready)
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
- struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
+ struct mlx5_vdpa_virtqueue *mvq;
+
+ if (!is_index_valid(mvdev, idx))
+ return;
+
+ if (is_ctrl_vq_idx(mvdev, idx)) {
+ set_cvq_ready(mvdev, ready);
+ return;
+ }
+ mvq = &ndev->vqs[idx];
if (!ready)
suspend_vq(ndev, mvq);
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
- struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
- return mvq->ready;
+ if (!is_index_valid(mvdev, idx))
+ return false;
+
+ if (is_ctrl_vq_idx(mvdev, idx))
+ return mvdev->cvq.ready;
+
+ return ndev->vqs[idx].ready;
}
static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
- struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
+ struct mlx5_vdpa_virtqueue *mvq;
+ if (!is_index_valid(mvdev, idx))
+ return -EINVAL;
+
+ if (is_ctrl_vq_idx(mvdev, idx)) {
+ mvdev->cvq.vring.last_avail_idx = state->split.avail_index;
+ return 0;
+ }
+
+ mvq = &ndev->vqs[idx];
if (mvq->fw_state == MLX5_VIRTIO_NET_Q_OBJECT_STATE_RDY) {
mlx5_vdpa_warn(mvdev, "can't modify available index\n");
return -EINVAL;
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
- struct mlx5_vdpa_virtqueue *mvq = &ndev->vqs[idx];
+ struct mlx5_vdpa_virtqueue *mvq;
struct mlx5_virtq_attr attr;
int err;
+ if (!is_index_valid(mvdev, idx))
+ return -EINVAL;
+
+ if (is_ctrl_vq_idx(mvdev, idx)) {
+ state->split.avail_index = mvdev->cvq.vring.last_avail_idx;
+ return 0;
+ }
+
+ mvq = &ndev->vqs[idx];
/* If the virtq object was destroyed, use the value saved at
* the last minute of suspend_vq. This caters for userspace
* that cares about emulating the index after vq is stopped.
u16 dev_features;
dev_features = MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, device_features_bits_mask);
- ndev->mvdev.mlx_features = mlx_to_vritio_features(dev_features);
+ ndev->mvdev.mlx_features |= mlx_to_vritio_features(dev_features);
if (MLX5_CAP_DEV_VDPA_EMULATION(mvdev->mdev, virtio_version_1_0))
ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_VERSION_1);
ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_F_ACCESS_PLATFORM);
+ ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_VQ);
+ ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR);
+ ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MQ);
+
print_features(mvdev, ndev->mvdev.mlx_features, false);
return ndev->mvdev.mlx_features;
}
return 0;
}
-static int setup_virtqueues(struct mlx5_vdpa_net *ndev)
+static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
{
+ struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+ struct mlx5_control_vq *cvq = &mvdev->cvq;
int err;
int i;
- for (i = 0; i < 2 * mlx5_vdpa_max_qps(ndev->mvdev.max_vqs); i++) {
+ for (i = 0; i < 2 * mlx5_vdpa_max_qps(mvdev->max_vqs); i++) {
err = setup_vq(ndev, &ndev->vqs[i]);
if (err)
goto err_vq;
}
+ if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
+ err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
+ MLX5_CVQ_MAX_ENT, false,
+ (struct vring_desc *)(uintptr_t)cvq->desc_addr,
+ (struct vring_avail *)(uintptr_t)cvq->driver_addr,
+ (struct vring_used *)(uintptr_t)cvq->device_addr);
+ if (err)
+ goto err_vq;
+ }
+
return 0;
err_vq:
}
}
-/* TODO: cross-endian support */
-static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
-{
- return virtio_legacy_is_little_endian() ||
- (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
-}
-
-static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
+static void update_cvq_info(struct mlx5_vdpa_dev *mvdev)
{
- return __cpu_to_virtio16(mlx5_vdpa_is_little_endian(mvdev), val);
+ if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_CTRL_VQ)) {
+ if (MLX5_FEATURE(mvdev, VIRTIO_NET_F_MQ)) {
+ /* MQ supported. CVQ index is right above the last data virtqueue's */
+ mvdev->max_idx = mvdev->max_vqs;
+ } else {
+ /* Only CVQ supportted. data virtqueues occupy indices 0 and 1.
+ * CVQ gets index 2
+ */
+ mvdev->max_idx = 2;
+ }
+ } else {
+ /* Two data virtqueues only: one for rx and one for tx */
+ mvdev->max_idx = 1;
+ }
}
static int mlx5_vdpa_set_features(struct vdpa_device *vdev, u64 features)
ndev->mvdev.actual_features = features & ndev->mvdev.mlx_features;
ndev->config.mtu = cpu_to_mlx5vdpa16(mvdev, ndev->mtu);
ndev->config.status |= cpu_to_mlx5vdpa16(mvdev, VIRTIO_NET_S_LINK_UP);
+ update_cvq_info(mvdev);
return err;
}
static int save_channel_info(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *mvq)
{
struct mlx5_vq_restore_info *ri = &mvq->ri;
- struct mlx5_virtq_attr attr;
+ struct mlx5_virtq_attr attr = {};
int err;
- if (!mvq->initialized)
- return 0;
-
- err = query_virtqueue(ndev, mvq, &attr);
- if (err)
- return err;
+ if (mvq->initialized) {
+ err = query_virtqueue(ndev, mvq, &attr);
+ if (err)
+ return err;
+ }
ri->avail_index = attr.available_index;
ri->used_index = attr.used_index;
ri->desc_addr = mvq->desc_addr;
ri->device_addr = mvq->device_addr;
ri->driver_addr = mvq->driver_addr;
- ri->cb = mvq->event_cb;
ri->restore = true;
return 0;
}
mvq->desc_addr = ri->desc_addr;
mvq->device_addr = ri->device_addr;
mvq->driver_addr = ri->driver_addr;
- mvq->event_cb = ri->cb;
}
}
-static int mlx5_vdpa_change_map(struct mlx5_vdpa_net *ndev, struct vhost_iotlb *iotlb)
+static int mlx5_vdpa_change_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
{
+ struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
int err;
suspend_vqs(ndev);
goto err_mr;
teardown_driver(ndev);
- mlx5_vdpa_destroy_mr(&ndev->mvdev);
- err = mlx5_vdpa_create_mr(&ndev->mvdev, iotlb);
+ mlx5_vdpa_destroy_mr(mvdev);
+ err = mlx5_vdpa_create_mr(mvdev, iotlb);
if (err)
goto err_mr;
- if (!(ndev->mvdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
+ if (!(mvdev->status & VIRTIO_CONFIG_S_DRIVER_OK))
return 0;
restore_channels_info(ndev);
- err = setup_driver(ndev);
+ err = setup_driver(mvdev);
if (err)
goto err_setup;
return 0;
err_setup:
- mlx5_vdpa_destroy_mr(&ndev->mvdev);
+ mlx5_vdpa_destroy_mr(mvdev);
err_mr:
return err;
}
-static int setup_driver(struct mlx5_vdpa_net *ndev)
+static int setup_driver(struct mlx5_vdpa_dev *mvdev)
{
+ struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
int err;
mutex_lock(&ndev->reslock);
if (ndev->setup) {
- mlx5_vdpa_warn(&ndev->mvdev, "setup driver called for already setup driver\n");
+ mlx5_vdpa_warn(mvdev, "setup driver called for already setup driver\n");
err = 0;
goto out;
}
- err = setup_virtqueues(ndev);
+ err = setup_virtqueues(mvdev);
if (err) {
- mlx5_vdpa_warn(&ndev->mvdev, "setup_virtqueues\n");
+ mlx5_vdpa_warn(mvdev, "setup_virtqueues\n");
goto out;
}
err = create_rqt(ndev);
if (err) {
- mlx5_vdpa_warn(&ndev->mvdev, "create_rqt\n");
+ mlx5_vdpa_warn(mvdev, "create_rqt\n");
goto err_rqt;
}
err = create_tir(ndev);
if (err) {
- mlx5_vdpa_warn(&ndev->mvdev, "create_tir\n");
+ mlx5_vdpa_warn(mvdev, "create_tir\n");
goto err_tir;
}
err = add_fwd_to_tir(ndev);
if (err) {
- mlx5_vdpa_warn(&ndev->mvdev, "add_fwd_to_tir\n");
+ mlx5_vdpa_warn(mvdev, "add_fwd_to_tir\n");
goto err_fwd;
}
ndev->setup = true;
int err;
print_status(mvdev, status, true);
- if (!status) {
- mlx5_vdpa_info(mvdev, "performing device reset\n");
- teardown_driver(ndev);
- clear_vqs_ready(ndev);
- mlx5_vdpa_destroy_mr(&ndev->mvdev);
- ndev->mvdev.status = 0;
- ndev->mvdev.mlx_features = 0;
- ++mvdev->generation;
- if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
- if (mlx5_vdpa_create_mr(mvdev, NULL))
- mlx5_vdpa_warn(mvdev, "create MR failed\n");
- }
- return;
- }
if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
- err = setup_driver(ndev);
+ err = setup_driver(mvdev);
if (err) {
mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
goto err_setup;
ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
}
+static int mlx5_vdpa_reset(struct vdpa_device *vdev)
+{
+ struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
+ struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
+
+ print_status(mvdev, 0, true);
+ mlx5_vdpa_info(mvdev, "performing device reset\n");
+ teardown_driver(ndev);
+ clear_vqs_ready(ndev);
+ mlx5_vdpa_destroy_mr(&ndev->mvdev);
+ ndev->mvdev.status = 0;
+ ndev->mvdev.mlx_features = 0;
+ memset(ndev->event_cbs, 0, sizeof(ndev->event_cbs));
+ ndev->mvdev.actual_features = 0;
+ ++mvdev->generation;
+ if (MLX5_CAP_GEN(mvdev->mdev, umem_uid_0)) {
+ if (mlx5_vdpa_create_mr(mvdev, NULL))
+ mlx5_vdpa_warn(mvdev, "create MR failed\n");
+ }
+
+ return 0;
+}
+
static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
{
return sizeof(struct virtio_net_config);
static int mlx5_vdpa_set_map(struct vdpa_device *vdev, struct vhost_iotlb *iotlb)
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
- struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
bool change_map;
int err;
}
if (change_map)
- return mlx5_vdpa_change_map(ndev, iotlb);
+ return mlx5_vdpa_change_map(mvdev, iotlb);
return 0;
}
struct mlx5_vdpa_net *ndev;
phys_addr_t addr;
+ if (!is_index_valid(mvdev, idx) || is_ctrl_vq_idx(mvdev, idx))
+ return ret;
+
/* If SF BAR size is smaller than PAGE_SIZE, do not use direct
* notification to avoid the risk of mapping pages that contain BAR of more
* than one SF
.get_vendor_id = mlx5_vdpa_get_vendor_id,
.get_status = mlx5_vdpa_get_status,
.set_status = mlx5_vdpa_set_status,
+ .reset = mlx5_vdpa_reset,
.get_config_size = mlx5_vdpa_get_config_size,
.get_config = mlx5_vdpa_get_config,
.set_config = mlx5_vdpa_set_config,
max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
- name);
+ name, false);
if (IS_ERR(ndev))
return PTR_ERR(ndev);
err = mlx5_mpfs_add_mac(pfmdev, config->mac);
if (err)
goto err_mtu;
+
+ ndev->mvdev.mlx_features |= BIT_ULL(VIRTIO_NET_F_MAC);
}
+ config->max_virtqueue_pairs = cpu_to_mlx5vdpa16(mvdev, mlx5_vdpa_max_qps(max_vqs));
mvdev->vdev.dma_dev = &mdev->pdev->dev;
err = mlx5_vdpa_alloc_resources(&ndev->mvdev);
if (err)
if (err)
goto err_mr;
+ mvdev->wq = create_singlethread_workqueue("mlx5_vdpa_ctrl_wq");
+ if (!mvdev->wq) {
+ err = -ENOMEM;
+ goto err_res2;
+ }
+
+ ndev->cur_num_vqs = 2 * mlx5_vdpa_max_qps(max_vqs);
mvdev->vdev.mdev = &mgtdev->mgtdev;
- err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs));
+ err = _vdpa_register_device(&mvdev->vdev, ndev->cur_num_vqs + 1);
if (err)
goto err_reg;
return 0;
err_reg:
+ destroy_workqueue(mvdev->wq);
+err_res2:
free_resources(ndev);
err_mr:
mlx5_vdpa_destroy_mr(mvdev);
static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
{
struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
+ struct mlx5_vdpa_dev *mvdev = to_mvdev(dev);
+ destroy_workqueue(mvdev->wq);
_vdpa_unregister_device(dev);
mgtdev->ndev = NULL;
}
* @config: the bus operations that is supported by this device
* @size: size of the parent structure that contains private data
* @name: name of the vdpa device; optional.
+ * @use_va: indicate whether virtual address must be used by this device
*
* Driver should use vdpa_alloc_device() wrapper macro instead of
* using this directly.
*/
struct vdpa_device *__vdpa_alloc_device(struct device *parent,
const struct vdpa_config_ops *config,
- size_t size, const char *name)
+ size_t size, const char *name,
+ bool use_va)
{
struct vdpa_device *vdev;
int err = -EINVAL;
if (!!config->dma_map != !!config->dma_unmap)
goto err;
+ /* It should only work for the device that use on-chip IOMMU */
+ if (use_va && !(config->dma_map || config->set_map))
+ goto err;
+
err = -ENOMEM;
vdev = kzalloc(size, GFP_KERNEL);
if (!vdev)
vdev->index = err;
vdev->config = config;
vdev->features_valid = false;
+ vdev->use_va = use_va;
if (name)
err = dev_set_name(&vdev->dev, "%s", name);
vq->vring.notify = NULL;
}
-static void vdpasim_reset(struct vdpasim *vdpasim)
+static void vdpasim_do_reset(struct vdpasim *vdpasim)
{
int i;
int ret;
/* We set the limit_pfn to the maximum (ULONG_MAX - 1) */
- iova = alloc_iova(&vdpasim->iova, size, ULONG_MAX - 1, true);
+ iova = alloc_iova(&vdpasim->iova, size >> iova_shift(&vdpasim->iova),
+ ULONG_MAX - 1, true);
if (!iova)
return DMA_MAPPING_ERROR;
ops = &vdpasim_config_ops;
vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops,
- dev_attr->name);
+ dev_attr->name, false);
if (IS_ERR(vdpasim)) {
ret = PTR_ERR(vdpasim);
goto err_alloc;
spin_lock(&vdpasim->lock);
vdpasim->status = status;
- if (status == 0)
- vdpasim_reset(vdpasim);
spin_unlock(&vdpasim->lock);
}
+static int vdpasim_reset(struct vdpa_device *vdpa)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ spin_lock(&vdpasim->lock);
+ vdpasim->status = 0;
+ vdpasim_do_reset(vdpasim);
+ spin_unlock(&vdpasim->lock);
+
+ return 0;
+}
+
static size_t vdpasim_get_config_size(struct vdpa_device *vdpa)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
}
static int vdpasim_dma_map(struct vdpa_device *vdpa, u64 iova, u64 size,
- u64 pa, u32 perm)
+ u64 pa, u32 perm, void *opaque)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
int ret;
spin_lock(&vdpasim->iommu_lock);
- ret = vhost_iotlb_add_range(vdpasim->iommu, iova, iova + size - 1, pa,
- perm);
+ ret = vhost_iotlb_add_range_ctx(vdpasim->iommu, iova, iova + size - 1,
+ pa, perm, opaque);
spin_unlock(&vdpasim->iommu_lock);
return ret;
.get_vendor_id = vdpasim_get_vendor_id,
.get_status = vdpasim_get_status,
.set_status = vdpasim_set_status,
+ .reset = vdpasim_reset,
.get_config_size = vdpasim_get_config_size,
.get_config = vdpasim_get_config,
.set_config = vdpasim_set_config,
.get_vendor_id = vdpasim_get_vendor_id,
.get_status = vdpasim_get_status,
.set_status = vdpasim_set_status,
+ .reset = vdpasim_reset,
.get_config_size = vdpasim_get_config_size,
.get_config = vdpasim_get_config,
.set_config = vdpasim_set_config,
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
+
+vduse-y := vduse_dev.o iova_domain.o
+
+obj-$(CONFIG_VDPA_USER) += vduse.o
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * MMU-based software IOTLB.
+ *
+ * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
+ *
+ * Author: Xie Yongji <xieyongji@bytedance.com>
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/file.h>
+#include <linux/anon_inodes.h>
+#include <linux/highmem.h>
+#include <linux/vmalloc.h>
+#include <linux/vdpa.h>
+
+#include "iova_domain.h"
+
+static int vduse_iotlb_add_range(struct vduse_iova_domain *domain,
+ u64 start, u64 last,
+ u64 addr, unsigned int perm,
+ struct file *file, u64 offset)
+{
+ struct vdpa_map_file *map_file;
+ int ret;
+
+ map_file = kmalloc(sizeof(*map_file), GFP_ATOMIC);
+ if (!map_file)
+ return -ENOMEM;
+
+ map_file->file = get_file(file);
+ map_file->offset = offset;
+
+ ret = vhost_iotlb_add_range_ctx(domain->iotlb, start, last,
+ addr, perm, map_file);
+ if (ret) {
+ fput(map_file->file);
+ kfree(map_file);
+ return ret;
+ }
+ return 0;
+}
+
+static void vduse_iotlb_del_range(struct vduse_iova_domain *domain,
+ u64 start, u64 last)
+{
+ struct vdpa_map_file *map_file;
+ struct vhost_iotlb_map *map;
+
+ while ((map = vhost_iotlb_itree_first(domain->iotlb, start, last))) {
+ map_file = (struct vdpa_map_file *)map->opaque;
+ fput(map_file->file);
+ kfree(map_file);
+ vhost_iotlb_map_free(domain->iotlb, map);
+ }
+}
+
+int vduse_domain_set_map(struct vduse_iova_domain *domain,
+ struct vhost_iotlb *iotlb)
+{
+ struct vdpa_map_file *map_file;
+ struct vhost_iotlb_map *map;
+ u64 start = 0ULL, last = ULLONG_MAX;
+ int ret;
+
+ spin_lock(&domain->iotlb_lock);
+ vduse_iotlb_del_range(domain, start, last);
+
+ for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
+ map = vhost_iotlb_itree_next(map, start, last)) {
+ map_file = (struct vdpa_map_file *)map->opaque;
+ ret = vduse_iotlb_add_range(domain, map->start, map->last,
+ map->addr, map->perm,
+ map_file->file,
+ map_file->offset);
+ if (ret)
+ goto err;
+ }
+ spin_unlock(&domain->iotlb_lock);
+
+ return 0;
+err:
+ vduse_iotlb_del_range(domain, start, last);
+ spin_unlock(&domain->iotlb_lock);
+ return ret;
+}
+
+void vduse_domain_clear_map(struct vduse_iova_domain *domain,
+ struct vhost_iotlb *iotlb)
+{
+ struct vhost_iotlb_map *map;
+ u64 start = 0ULL, last = ULLONG_MAX;
+
+ spin_lock(&domain->iotlb_lock);
+ for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
+ map = vhost_iotlb_itree_next(map, start, last)) {
+ vduse_iotlb_del_range(domain, map->start, map->last);
+ }
+ spin_unlock(&domain->iotlb_lock);
+}
+
+static int vduse_domain_map_bounce_page(struct vduse_iova_domain *domain,
+ u64 iova, u64 size, u64 paddr)
+{
+ struct vduse_bounce_map *map;
+ u64 last = iova + size - 1;
+
+ while (iova <= last) {
+ map = &domain->bounce_maps[iova >> PAGE_SHIFT];
+ if (!map->bounce_page) {
+ map->bounce_page = alloc_page(GFP_ATOMIC);
+ if (!map->bounce_page)
+ return -ENOMEM;
+ }
+ map->orig_phys = paddr;
+ paddr += PAGE_SIZE;
+ iova += PAGE_SIZE;
+ }
+ return 0;
+}
+
+static void vduse_domain_unmap_bounce_page(struct vduse_iova_domain *domain,
+ u64 iova, u64 size)
+{
+ struct vduse_bounce_map *map;
+ u64 last = iova + size - 1;
+
+ while (iova <= last) {
+ map = &domain->bounce_maps[iova >> PAGE_SHIFT];
+ map->orig_phys = INVALID_PHYS_ADDR;
+ iova += PAGE_SIZE;
+ }
+}
+
+static void do_bounce(phys_addr_t orig, void *addr, size_t size,
+ enum dma_data_direction dir)
+{
+ unsigned long pfn = PFN_DOWN(orig);
+ unsigned int offset = offset_in_page(orig);
+ char *buffer;
+ unsigned int sz = 0;
+
+ while (size) {
+ sz = min_t(size_t, PAGE_SIZE - offset, size);
+
+ buffer = kmap_atomic(pfn_to_page(pfn));
+ if (dir == DMA_TO_DEVICE)
+ memcpy(addr, buffer + offset, sz);
+ else
+ memcpy(buffer + offset, addr, sz);
+ kunmap_atomic(buffer);
+
+ size -= sz;
+ pfn++;
+ addr += sz;
+ offset = 0;
+ }
+}
+
+static void vduse_domain_bounce(struct vduse_iova_domain *domain,
+ dma_addr_t iova, size_t size,
+ enum dma_data_direction dir)
+{
+ struct vduse_bounce_map *map;
+ unsigned int offset;
+ void *addr;
+ size_t sz;
+
+ if (iova >= domain->bounce_size)
+ return;
+
+ while (size) {
+ map = &domain->bounce_maps[iova >> PAGE_SHIFT];
+ offset = offset_in_page(iova);
+ sz = min_t(size_t, PAGE_SIZE - offset, size);
+
+ if (WARN_ON(!map->bounce_page ||
+ map->orig_phys == INVALID_PHYS_ADDR))
+ return;
+
+ addr = page_address(map->bounce_page) + offset;
+ do_bounce(map->orig_phys + offset, addr, sz, dir);
+ size -= sz;
+ iova += sz;
+ }
+}
+
+static struct page *
+vduse_domain_get_coherent_page(struct vduse_iova_domain *domain, u64 iova)
+{
+ u64 start = iova & PAGE_MASK;
+ u64 last = start + PAGE_SIZE - 1;
+ struct vhost_iotlb_map *map;
+ struct page *page = NULL;
+
+ spin_lock(&domain->iotlb_lock);
+ map = vhost_iotlb_itree_first(domain->iotlb, start, last);
+ if (!map)
+ goto out;
+
+ page = pfn_to_page((map->addr + iova - map->start) >> PAGE_SHIFT);
+ get_page(page);
+out:
+ spin_unlock(&domain->iotlb_lock);
+
+ return page;
+}
+
+static struct page *
+vduse_domain_get_bounce_page(struct vduse_iova_domain *domain, u64 iova)
+{
+ struct vduse_bounce_map *map;
+ struct page *page = NULL;
+
+ spin_lock(&domain->iotlb_lock);
+ map = &domain->bounce_maps[iova >> PAGE_SHIFT];
+ if (!map->bounce_page)
+ goto out;
+
+ page = map->bounce_page;
+ get_page(page);
+out:
+ spin_unlock(&domain->iotlb_lock);
+
+ return page;
+}
+
+static void
+vduse_domain_free_bounce_pages(struct vduse_iova_domain *domain)
+{
+ struct vduse_bounce_map *map;
+ unsigned long pfn, bounce_pfns;
+
+ bounce_pfns = domain->bounce_size >> PAGE_SHIFT;
+
+ for (pfn = 0; pfn < bounce_pfns; pfn++) {
+ map = &domain->bounce_maps[pfn];
+ if (WARN_ON(map->orig_phys != INVALID_PHYS_ADDR))
+ continue;
+
+ if (!map->bounce_page)
+ continue;
+
+ __free_page(map->bounce_page);
+ map->bounce_page = NULL;
+ }
+}
+
+void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain)
+{
+ if (!domain->bounce_map)
+ return;
+
+ spin_lock(&domain->iotlb_lock);
+ if (!domain->bounce_map)
+ goto unlock;
+
+ vduse_iotlb_del_range(domain, 0, domain->bounce_size - 1);
+ domain->bounce_map = 0;
+unlock:
+ spin_unlock(&domain->iotlb_lock);
+}
+
+static int vduse_domain_init_bounce_map(struct vduse_iova_domain *domain)
+{
+ int ret = 0;
+
+ if (domain->bounce_map)
+ return 0;
+
+ spin_lock(&domain->iotlb_lock);
+ if (domain->bounce_map)
+ goto unlock;
+
+ ret = vduse_iotlb_add_range(domain, 0, domain->bounce_size - 1,
+ 0, VHOST_MAP_RW, domain->file, 0);
+ if (ret)
+ goto unlock;
+
+ domain->bounce_map = 1;
+unlock:
+ spin_unlock(&domain->iotlb_lock);
+ return ret;
+}
+
+static dma_addr_t
+vduse_domain_alloc_iova(struct iova_domain *iovad,
+ unsigned long size, unsigned long limit)
+{
+ unsigned long shift = iova_shift(iovad);
+ unsigned long iova_len = iova_align(iovad, size) >> shift;
+ unsigned long iova_pfn;
+
+ /*
+ * Freeing non-power-of-two-sized allocations back into the IOVA caches
+ * will come back to bite us badly, so we have to waste a bit of space
+ * rounding up anything cacheable to make sure that can't happen. The
+ * order of the unadjusted size will still match upon freeing.
+ */
+ if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1)))
+ iova_len = roundup_pow_of_two(iova_len);
+ iova_pfn = alloc_iova_fast(iovad, iova_len, limit >> shift, true);
+
+ return iova_pfn << shift;
+}
+
+static void vduse_domain_free_iova(struct iova_domain *iovad,
+ dma_addr_t iova, size_t size)
+{
+ unsigned long shift = iova_shift(iovad);
+ unsigned long iova_len = iova_align(iovad, size) >> shift;
+
+ free_iova_fast(iovad, iova >> shift, iova_len);
+}
+
+dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain,
+ struct page *page, unsigned long offset,
+ size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ struct iova_domain *iovad = &domain->stream_iovad;
+ unsigned long limit = domain->bounce_size - 1;
+ phys_addr_t pa = page_to_phys(page) + offset;
+ dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
+
+ if (!iova)
+ return DMA_MAPPING_ERROR;
+
+ if (vduse_domain_init_bounce_map(domain))
+ goto err;
+
+ if (vduse_domain_map_bounce_page(domain, (u64)iova, (u64)size, pa))
+ goto err;
+
+ if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
+ vduse_domain_bounce(domain, iova, size, DMA_TO_DEVICE);
+
+ return iova;
+err:
+ vduse_domain_free_iova(iovad, iova, size);
+ return DMA_MAPPING_ERROR;
+}
+
+void vduse_domain_unmap_page(struct vduse_iova_domain *domain,
+ dma_addr_t dma_addr, size_t size,
+ enum dma_data_direction dir, unsigned long attrs)
+{
+ struct iova_domain *iovad = &domain->stream_iovad;
+
+ if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
+ vduse_domain_bounce(domain, dma_addr, size, DMA_FROM_DEVICE);
+
+ vduse_domain_unmap_bounce_page(domain, (u64)dma_addr, (u64)size);
+ vduse_domain_free_iova(iovad, dma_addr, size);
+}
+
+void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain,
+ size_t size, dma_addr_t *dma_addr,
+ gfp_t flag, unsigned long attrs)
+{
+ struct iova_domain *iovad = &domain->consistent_iovad;
+ unsigned long limit = domain->iova_limit;
+ dma_addr_t iova = vduse_domain_alloc_iova(iovad, size, limit);
+ void *orig = alloc_pages_exact(size, flag);
+
+ if (!iova || !orig)
+ goto err;
+
+ spin_lock(&domain->iotlb_lock);
+ if (vduse_iotlb_add_range(domain, (u64)iova, (u64)iova + size - 1,
+ virt_to_phys(orig), VHOST_MAP_RW,
+ domain->file, (u64)iova)) {
+ spin_unlock(&domain->iotlb_lock);
+ goto err;
+ }
+ spin_unlock(&domain->iotlb_lock);
+
+ *dma_addr = iova;
+
+ return orig;
+err:
+ *dma_addr = DMA_MAPPING_ERROR;
+ if (orig)
+ free_pages_exact(orig, size);
+ if (iova)
+ vduse_domain_free_iova(iovad, iova, size);
+
+ return NULL;
+}
+
+void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size,
+ void *vaddr, dma_addr_t dma_addr,
+ unsigned long attrs)
+{
+ struct iova_domain *iovad = &domain->consistent_iovad;
+ struct vhost_iotlb_map *map;
+ struct vdpa_map_file *map_file;
+ phys_addr_t pa;
+
+ spin_lock(&domain->iotlb_lock);
+ map = vhost_iotlb_itree_first(domain->iotlb, (u64)dma_addr,
+ (u64)dma_addr + size - 1);
+ if (WARN_ON(!map)) {
+ spin_unlock(&domain->iotlb_lock);
+ return;
+ }
+ map_file = (struct vdpa_map_file *)map->opaque;
+ fput(map_file->file);
+ kfree(map_file);
+ pa = map->addr;
+ vhost_iotlb_map_free(domain->iotlb, map);
+ spin_unlock(&domain->iotlb_lock);
+
+ vduse_domain_free_iova(iovad, dma_addr, size);
+ free_pages_exact(phys_to_virt(pa), size);
+}
+
+static vm_fault_t vduse_domain_mmap_fault(struct vm_fault *vmf)
+{
+ struct vduse_iova_domain *domain = vmf->vma->vm_private_data;
+ unsigned long iova = vmf->pgoff << PAGE_SHIFT;
+ struct page *page;
+
+ if (!domain)
+ return VM_FAULT_SIGBUS;
+
+ if (iova < domain->bounce_size)
+ page = vduse_domain_get_bounce_page(domain, iova);
+ else
+ page = vduse_domain_get_coherent_page(domain, iova);
+
+ if (!page)
+ return VM_FAULT_SIGBUS;
+
+ vmf->page = page;
+
+ return 0;
+}
+
+static const struct vm_operations_struct vduse_domain_mmap_ops = {
+ .fault = vduse_domain_mmap_fault,
+};
+
+static int vduse_domain_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ struct vduse_iova_domain *domain = file->private_data;
+
+ vma->vm_flags |= VM_DONTDUMP | VM_DONTEXPAND;
+ vma->vm_private_data = domain;
+ vma->vm_ops = &vduse_domain_mmap_ops;
+
+ return 0;
+}
+
+static int vduse_domain_release(struct inode *inode, struct file *file)
+{
+ struct vduse_iova_domain *domain = file->private_data;
+
+ spin_lock(&domain->iotlb_lock);
+ vduse_iotlb_del_range(domain, 0, ULLONG_MAX);
+ vduse_domain_free_bounce_pages(domain);
+ spin_unlock(&domain->iotlb_lock);
+ put_iova_domain(&domain->stream_iovad);
+ put_iova_domain(&domain->consistent_iovad);
+ vhost_iotlb_free(domain->iotlb);
+ vfree(domain->bounce_maps);
+ kfree(domain);
+
+ return 0;
+}
+
+static const struct file_operations vduse_domain_fops = {
+ .owner = THIS_MODULE,
+ .mmap = vduse_domain_mmap,
+ .release = vduse_domain_release,
+};
+
+void vduse_domain_destroy(struct vduse_iova_domain *domain)
+{
+ fput(domain->file);
+}
+
+struct vduse_iova_domain *
+vduse_domain_create(unsigned long iova_limit, size_t bounce_size)
+{
+ struct vduse_iova_domain *domain;
+ struct file *file;
+ struct vduse_bounce_map *map;
+ unsigned long pfn, bounce_pfns;
+
+ bounce_pfns = PAGE_ALIGN(bounce_size) >> PAGE_SHIFT;
+ if (iova_limit <= bounce_size)
+ return NULL;
+
+ domain = kzalloc(sizeof(*domain), GFP_KERNEL);
+ if (!domain)
+ return NULL;
+
+ domain->iotlb = vhost_iotlb_alloc(0, 0);
+ if (!domain->iotlb)
+ goto err_iotlb;
+
+ domain->iova_limit = iova_limit;
+ domain->bounce_size = PAGE_ALIGN(bounce_size);
+ domain->bounce_maps = vzalloc(bounce_pfns *
+ sizeof(struct vduse_bounce_map));
+ if (!domain->bounce_maps)
+ goto err_map;
+
+ for (pfn = 0; pfn < bounce_pfns; pfn++) {
+ map = &domain->bounce_maps[pfn];
+ map->orig_phys = INVALID_PHYS_ADDR;
+ }
+ file = anon_inode_getfile("[vduse-domain]", &vduse_domain_fops,
+ domain, O_RDWR);
+ if (IS_ERR(file))
+ goto err_file;
+
+ domain->file = file;
+ spin_lock_init(&domain->iotlb_lock);
+ init_iova_domain(&domain->stream_iovad,
+ PAGE_SIZE, IOVA_START_PFN);
+ init_iova_domain(&domain->consistent_iovad,
+ PAGE_SIZE, bounce_pfns);
+
+ return domain;
+err_file:
+ vfree(domain->bounce_maps);
+err_map:
+ vhost_iotlb_free(domain->iotlb);
+err_iotlb:
+ kfree(domain);
+ return NULL;
+}
+
+int vduse_domain_init(void)
+{
+ return iova_cache_get();
+}
+
+void vduse_domain_exit(void)
+{
+ iova_cache_put();
+}
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * MMU-based software IOTLB.
+ *
+ * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
+ *
+ * Author: Xie Yongji <xieyongji@bytedance.com>
+ *
+ */
+
+#ifndef _VDUSE_IOVA_DOMAIN_H
+#define _VDUSE_IOVA_DOMAIN_H
+
+#include <linux/iova.h>
+#include <linux/dma-mapping.h>
+#include <linux/vhost_iotlb.h>
+
+#define IOVA_START_PFN 1
+
+#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
+
+struct vduse_bounce_map {
+ struct page *bounce_page;
+ u64 orig_phys;
+};
+
+struct vduse_iova_domain {
+ struct iova_domain stream_iovad;
+ struct iova_domain consistent_iovad;
+ struct vduse_bounce_map *bounce_maps;
+ size_t bounce_size;
+ unsigned long iova_limit;
+ int bounce_map;
+ struct vhost_iotlb *iotlb;
+ spinlock_t iotlb_lock;
+ struct file *file;
+};
+
+int vduse_domain_set_map(struct vduse_iova_domain *domain,
+ struct vhost_iotlb *iotlb);
+
+void vduse_domain_clear_map(struct vduse_iova_domain *domain,
+ struct vhost_iotlb *iotlb);
+
+dma_addr_t vduse_domain_map_page(struct vduse_iova_domain *domain,
+ struct page *page, unsigned long offset,
+ size_t size, enum dma_data_direction dir,
+ unsigned long attrs);
+
+void vduse_domain_unmap_page(struct vduse_iova_domain *domain,
+ dma_addr_t dma_addr, size_t size,
+ enum dma_data_direction dir, unsigned long attrs);
+
+void *vduse_domain_alloc_coherent(struct vduse_iova_domain *domain,
+ size_t size, dma_addr_t *dma_addr,
+ gfp_t flag, unsigned long attrs);
+
+void vduse_domain_free_coherent(struct vduse_iova_domain *domain, size_t size,
+ void *vaddr, dma_addr_t dma_addr,
+ unsigned long attrs);
+
+void vduse_domain_reset_bounce_map(struct vduse_iova_domain *domain);
+
+void vduse_domain_destroy(struct vduse_iova_domain *domain);
+
+struct vduse_iova_domain *vduse_domain_create(unsigned long iova_limit,
+ size_t bounce_size);
+
+int vduse_domain_init(void);
+
+void vduse_domain_exit(void);
+
+#endif /* _VDUSE_IOVA_DOMAIN_H */
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VDUSE: vDPA Device in Userspace
+ *
+ * Copyright (C) 2020-2021 Bytedance Inc. and/or its affiliates. All rights reserved.
+ *
+ * Author: Xie Yongji <xieyongji@bytedance.com>
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/cdev.h>
+#include <linux/device.h>
+#include <linux/eventfd.h>
+#include <linux/slab.h>
+#include <linux/wait.h>
+#include <linux/dma-map-ops.h>
+#include <linux/poll.h>
+#include <linux/file.h>
+#include <linux/uio.h>
+#include <linux/vdpa.h>
+#include <linux/nospec.h>
+#include <uapi/linux/vduse.h>
+#include <uapi/linux/vdpa.h>
+#include <uapi/linux/virtio_config.h>
+#include <uapi/linux/virtio_ids.h>
+#include <uapi/linux/virtio_blk.h>
+#include <linux/mod_devicetable.h>
+
+#include "iova_domain.h"
+
+#define DRV_AUTHOR "Yongji Xie <xieyongji@bytedance.com>"
+#define DRV_DESC "vDPA Device in Userspace"
+#define DRV_LICENSE "GPL v2"
+
+#define VDUSE_DEV_MAX (1U << MINORBITS)
+#define VDUSE_BOUNCE_SIZE (64 * 1024 * 1024)
+#define VDUSE_IOVA_SIZE (128 * 1024 * 1024)
+#define VDUSE_MSG_DEFAULT_TIMEOUT 30
+
+struct vduse_virtqueue {
+ u16 index;
+ u16 num_max;
+ u32 num;
+ u64 desc_addr;
+ u64 driver_addr;
+ u64 device_addr;
+ struct vdpa_vq_state state;
+ bool ready;
+ bool kicked;
+ spinlock_t kick_lock;
+ spinlock_t irq_lock;
+ struct eventfd_ctx *kickfd;
+ struct vdpa_callback cb;
+ struct work_struct inject;
+ struct work_struct kick;
+};
+
+struct vduse_dev;
+
+struct vduse_vdpa {
+ struct vdpa_device vdpa;
+ struct vduse_dev *dev;
+};
+
+struct vduse_dev {
+ struct vduse_vdpa *vdev;
+ struct device *dev;
+ struct vduse_virtqueue *vqs;
+ struct vduse_iova_domain *domain;
+ char *name;
+ struct mutex lock;
+ spinlock_t msg_lock;
+ u64 msg_unique;
+ u32 msg_timeout;
+ wait_queue_head_t waitq;
+ struct list_head send_list;
+ struct list_head recv_list;
+ struct vdpa_callback config_cb;
+ struct work_struct inject;
+ spinlock_t irq_lock;
+ int minor;
+ bool broken;
+ bool connected;
+ u64 api_version;
+ u64 device_features;
+ u64 driver_features;
+ u32 device_id;
+ u32 vendor_id;
+ u32 generation;
+ u32 config_size;
+ void *config;
+ u8 status;
+ u32 vq_num;
+ u32 vq_align;
+};
+
+struct vduse_dev_msg {
+ struct vduse_dev_request req;
+ struct vduse_dev_response resp;
+ struct list_head list;
+ wait_queue_head_t waitq;
+ bool completed;
+};
+
+struct vduse_control {
+ u64 api_version;
+};
+
+static DEFINE_MUTEX(vduse_lock);
+static DEFINE_IDR(vduse_idr);
+
+static dev_t vduse_major;
+static struct class *vduse_class;
+static struct cdev vduse_ctrl_cdev;
+static struct cdev vduse_cdev;
+static struct workqueue_struct *vduse_irq_wq;
+
+static u32 allowed_device_id[] = {
+ VIRTIO_ID_BLOCK,
+};
+
+static inline struct vduse_dev *vdpa_to_vduse(struct vdpa_device *vdpa)
+{
+ struct vduse_vdpa *vdev = container_of(vdpa, struct vduse_vdpa, vdpa);
+
+ return vdev->dev;
+}
+
+static inline struct vduse_dev *dev_to_vduse(struct device *dev)
+{
+ struct vdpa_device *vdpa = dev_to_vdpa(dev);
+
+ return vdpa_to_vduse(vdpa);
+}
+
+static struct vduse_dev_msg *vduse_find_msg(struct list_head *head,
+ uint32_t request_id)
+{
+ struct vduse_dev_msg *msg;
+
+ list_for_each_entry(msg, head, list) {
+ if (msg->req.request_id == request_id) {
+ list_del(&msg->list);
+ return msg;
+ }
+ }
+
+ return NULL;
+}
+
+static struct vduse_dev_msg *vduse_dequeue_msg(struct list_head *head)
+{
+ struct vduse_dev_msg *msg = NULL;
+
+ if (!list_empty(head)) {
+ msg = list_first_entry(head, struct vduse_dev_msg, list);
+ list_del(&msg->list);
+ }
+
+ return msg;
+}
+
+static void vduse_enqueue_msg(struct list_head *head,
+ struct vduse_dev_msg *msg)
+{
+ list_add_tail(&msg->list, head);
+}
+
+static void vduse_dev_broken(struct vduse_dev *dev)
+{
+ struct vduse_dev_msg *msg, *tmp;
+
+ if (unlikely(dev->broken))
+ return;
+
+ list_splice_init(&dev->recv_list, &dev->send_list);
+ list_for_each_entry_safe(msg, tmp, &dev->send_list, list) {
+ list_del(&msg->list);
+ msg->completed = 1;
+ msg->resp.result = VDUSE_REQ_RESULT_FAILED;
+ wake_up(&msg->waitq);
+ }
+ dev->broken = true;
+ wake_up(&dev->waitq);
+}
+
+static int vduse_dev_msg_sync(struct vduse_dev *dev,
+ struct vduse_dev_msg *msg)
+{
+ int ret;
+
+ if (unlikely(dev->broken))
+ return -EIO;
+
+ init_waitqueue_head(&msg->waitq);
+ spin_lock(&dev->msg_lock);
+ if (unlikely(dev->broken)) {
+ spin_unlock(&dev->msg_lock);
+ return -EIO;
+ }
+ msg->req.request_id = dev->msg_unique++;
+ vduse_enqueue_msg(&dev->send_list, msg);
+ wake_up(&dev->waitq);
+ spin_unlock(&dev->msg_lock);
+ if (dev->msg_timeout)
+ ret = wait_event_killable_timeout(msg->waitq, msg->completed,
+ (long)dev->msg_timeout * HZ);
+ else
+ ret = wait_event_killable(msg->waitq, msg->completed);
+
+ spin_lock(&dev->msg_lock);
+ if (!msg->completed) {
+ list_del(&msg->list);
+ msg->resp.result = VDUSE_REQ_RESULT_FAILED;
+ /* Mark the device as malfunction when there is a timeout */
+ if (!ret)
+ vduse_dev_broken(dev);
+ }
+ ret = (msg->resp.result == VDUSE_REQ_RESULT_OK) ? 0 : -EIO;
+ spin_unlock(&dev->msg_lock);
+
+ return ret;
+}
+
+static int vduse_dev_get_vq_state_packed(struct vduse_dev *dev,
+ struct vduse_virtqueue *vq,
+ struct vdpa_vq_state_packed *packed)
+{
+ struct vduse_dev_msg msg = { 0 };
+ int ret;
+
+ msg.req.type = VDUSE_GET_VQ_STATE;
+ msg.req.vq_state.index = vq->index;
+
+ ret = vduse_dev_msg_sync(dev, &msg);
+ if (ret)
+ return ret;
+
+ packed->last_avail_counter =
+ msg.resp.vq_state.packed.last_avail_counter & 0x0001;
+ packed->last_avail_idx =
+ msg.resp.vq_state.packed.last_avail_idx & 0x7FFF;
+ packed->last_used_counter =
+ msg.resp.vq_state.packed.last_used_counter & 0x0001;
+ packed->last_used_idx =
+ msg.resp.vq_state.packed.last_used_idx & 0x7FFF;
+
+ return 0;
+}
+
+static int vduse_dev_get_vq_state_split(struct vduse_dev *dev,
+ struct vduse_virtqueue *vq,
+ struct vdpa_vq_state_split *split)
+{
+ struct vduse_dev_msg msg = { 0 };
+ int ret;
+
+ msg.req.type = VDUSE_GET_VQ_STATE;
+ msg.req.vq_state.index = vq->index;
+
+ ret = vduse_dev_msg_sync(dev, &msg);
+ if (ret)
+ return ret;
+
+ split->avail_index = msg.resp.vq_state.split.avail_index;
+
+ return 0;
+}
+
+static int vduse_dev_set_status(struct vduse_dev *dev, u8 status)
+{
+ struct vduse_dev_msg msg = { 0 };
+
+ msg.req.type = VDUSE_SET_STATUS;
+ msg.req.s.status = status;
+
+ return vduse_dev_msg_sync(dev, &msg);
+}
+
+static int vduse_dev_update_iotlb(struct vduse_dev *dev,
+ u64 start, u64 last)
+{
+ struct vduse_dev_msg msg = { 0 };
+
+ if (last < start)
+ return -EINVAL;
+
+ msg.req.type = VDUSE_UPDATE_IOTLB;
+ msg.req.iova.start = start;
+ msg.req.iova.last = last;
+
+ return vduse_dev_msg_sync(dev, &msg);
+}
+
+static ssize_t vduse_dev_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+ struct file *file = iocb->ki_filp;
+ struct vduse_dev *dev = file->private_data;
+ struct vduse_dev_msg *msg;
+ int size = sizeof(struct vduse_dev_request);
+ ssize_t ret;
+
+ if (iov_iter_count(to) < size)
+ return -EINVAL;
+
+ spin_lock(&dev->msg_lock);
+ while (1) {
+ msg = vduse_dequeue_msg(&dev->send_list);
+ if (msg)
+ break;
+
+ ret = -EAGAIN;
+ if (file->f_flags & O_NONBLOCK)
+ goto unlock;
+
+ spin_unlock(&dev->msg_lock);
+ ret = wait_event_interruptible_exclusive(dev->waitq,
+ !list_empty(&dev->send_list));
+ if (ret)
+ return ret;
+
+ spin_lock(&dev->msg_lock);
+ }
+ spin_unlock(&dev->msg_lock);
+ ret = copy_to_iter(&msg->req, size, to);
+ spin_lock(&dev->msg_lock);
+ if (ret != size) {
+ ret = -EFAULT;
+ vduse_enqueue_msg(&dev->send_list, msg);
+ goto unlock;
+ }
+ vduse_enqueue_msg(&dev->recv_list, msg);
+unlock:
+ spin_unlock(&dev->msg_lock);
+
+ return ret;
+}
+
+static bool is_mem_zero(const char *ptr, int size)
+{
+ int i;
+
+ for (i = 0; i < size; i++) {
+ if (ptr[i])
+ return false;
+ }
+ return true;
+}
+
+static ssize_t vduse_dev_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+ struct file *file = iocb->ki_filp;
+ struct vduse_dev *dev = file->private_data;
+ struct vduse_dev_response resp;
+ struct vduse_dev_msg *msg;
+ size_t ret;
+
+ ret = copy_from_iter(&resp, sizeof(resp), from);
+ if (ret != sizeof(resp))
+ return -EINVAL;
+
+ if (!is_mem_zero((const char *)resp.reserved, sizeof(resp.reserved)))
+ return -EINVAL;
+
+ spin_lock(&dev->msg_lock);
+ msg = vduse_find_msg(&dev->recv_list, resp.request_id);
+ if (!msg) {
+ ret = -ENOENT;
+ goto unlock;
+ }
+
+ memcpy(&msg->resp, &resp, sizeof(resp));
+ msg->completed = 1;
+ wake_up(&msg->waitq);
+unlock:
+ spin_unlock(&dev->msg_lock);
+
+ return ret;
+}
+
+static __poll_t vduse_dev_poll(struct file *file, poll_table *wait)
+{
+ struct vduse_dev *dev = file->private_data;
+ __poll_t mask = 0;
+
+ poll_wait(file, &dev->waitq, wait);
+
+ spin_lock(&dev->msg_lock);
+
+ if (unlikely(dev->broken))
+ mask |= EPOLLERR;
+ if (!list_empty(&dev->send_list))
+ mask |= EPOLLIN | EPOLLRDNORM;
+ if (!list_empty(&dev->recv_list))
+ mask |= EPOLLOUT | EPOLLWRNORM;
+
+ spin_unlock(&dev->msg_lock);
+
+ return mask;
+}
+
+static void vduse_dev_reset(struct vduse_dev *dev)
+{
+ int i;
+ struct vduse_iova_domain *domain = dev->domain;
+
+ /* The coherent mappings are handled in vduse_dev_free_coherent() */
+ if (domain->bounce_map)
+ vduse_domain_reset_bounce_map(domain);
+
+ dev->status = 0;
+ dev->driver_features = 0;
+ dev->generation++;
+ spin_lock(&dev->irq_lock);
+ dev->config_cb.callback = NULL;
+ dev->config_cb.private = NULL;
+ spin_unlock(&dev->irq_lock);
+ flush_work(&dev->inject);
+
+ for (i = 0; i < dev->vq_num; i++) {
+ struct vduse_virtqueue *vq = &dev->vqs[i];
+
+ vq->ready = false;
+ vq->desc_addr = 0;
+ vq->driver_addr = 0;
+ vq->device_addr = 0;
+ vq->num = 0;
+ memset(&vq->state, 0, sizeof(vq->state));
+
+ spin_lock(&vq->kick_lock);
+ vq->kicked = false;
+ if (vq->kickfd)
+ eventfd_ctx_put(vq->kickfd);
+ vq->kickfd = NULL;
+ spin_unlock(&vq->kick_lock);
+
+ spin_lock(&vq->irq_lock);
+ vq->cb.callback = NULL;
+ vq->cb.private = NULL;
+ spin_unlock(&vq->irq_lock);
+ flush_work(&vq->inject);
+ flush_work(&vq->kick);
+ }
+}
+
+static int vduse_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 idx,
+ u64 desc_area, u64 driver_area,
+ u64 device_area)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+ struct vduse_virtqueue *vq = &dev->vqs[idx];
+
+ vq->desc_addr = desc_area;
+ vq->driver_addr = driver_area;
+ vq->device_addr = device_area;
+
+ return 0;
+}
+
+static void vduse_vq_kick(struct vduse_virtqueue *vq)
+{
+ spin_lock(&vq->kick_lock);
+ if (!vq->ready)
+ goto unlock;
+
+ if (vq->kickfd)
+ eventfd_signal(vq->kickfd, 1);
+ else
+ vq->kicked = true;
+unlock:
+ spin_unlock(&vq->kick_lock);
+}
+
+static void vduse_vq_kick_work(struct work_struct *work)
+{
+ struct vduse_virtqueue *vq = container_of(work,
+ struct vduse_virtqueue, kick);
+
+ vduse_vq_kick(vq);
+}
+
+static void vduse_vdpa_kick_vq(struct vdpa_device *vdpa, u16 idx)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+ struct vduse_virtqueue *vq = &dev->vqs[idx];
+
+ if (!eventfd_signal_allowed()) {
+ schedule_work(&vq->kick);
+ return;
+ }
+ vduse_vq_kick(vq);
+}
+
+static void vduse_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 idx,
+ struct vdpa_callback *cb)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+ struct vduse_virtqueue *vq = &dev->vqs[idx];
+
+ spin_lock(&vq->irq_lock);
+ vq->cb.callback = cb->callback;
+ vq->cb.private = cb->private;
+ spin_unlock(&vq->irq_lock);
+}
+
+static void vduse_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 idx, u32 num)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+ struct vduse_virtqueue *vq = &dev->vqs[idx];
+
+ vq->num = num;
+}
+
+static void vduse_vdpa_set_vq_ready(struct vdpa_device *vdpa,
+ u16 idx, bool ready)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+ struct vduse_virtqueue *vq = &dev->vqs[idx];
+
+ vq->ready = ready;
+}
+
+static bool vduse_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 idx)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+ struct vduse_virtqueue *vq = &dev->vqs[idx];
+
+ return vq->ready;
+}
+
+static int vduse_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 idx,
+ const struct vdpa_vq_state *state)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+ struct vduse_virtqueue *vq = &dev->vqs[idx];
+
+ if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
+ vq->state.packed.last_avail_counter =
+ state->packed.last_avail_counter;
+ vq->state.packed.last_avail_idx = state->packed.last_avail_idx;
+ vq->state.packed.last_used_counter =
+ state->packed.last_used_counter;
+ vq->state.packed.last_used_idx = state->packed.last_used_idx;
+ } else
+ vq->state.split.avail_index = state->split.avail_index;
+
+ return 0;
+}
+
+static int vduse_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 idx,
+ struct vdpa_vq_state *state)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+ struct vduse_virtqueue *vq = &dev->vqs[idx];
+
+ if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED))
+ return vduse_dev_get_vq_state_packed(dev, vq, &state->packed);
+
+ return vduse_dev_get_vq_state_split(dev, vq, &state->split);
+}
+
+static u32 vduse_vdpa_get_vq_align(struct vdpa_device *vdpa)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+ return dev->vq_align;
+}
+
+static u64 vduse_vdpa_get_features(struct vdpa_device *vdpa)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+ return dev->device_features;
+}
+
+static int vduse_vdpa_set_features(struct vdpa_device *vdpa, u64 features)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+ dev->driver_features = features;
+ return 0;
+}
+
+static void vduse_vdpa_set_config_cb(struct vdpa_device *vdpa,
+ struct vdpa_callback *cb)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+ spin_lock(&dev->irq_lock);
+ dev->config_cb.callback = cb->callback;
+ dev->config_cb.private = cb->private;
+ spin_unlock(&dev->irq_lock);
+}
+
+static u16 vduse_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+ u16 num_max = 0;
+ int i;
+
+ for (i = 0; i < dev->vq_num; i++)
+ if (num_max < dev->vqs[i].num_max)
+ num_max = dev->vqs[i].num_max;
+
+ return num_max;
+}
+
+static u32 vduse_vdpa_get_device_id(struct vdpa_device *vdpa)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+ return dev->device_id;
+}
+
+static u32 vduse_vdpa_get_vendor_id(struct vdpa_device *vdpa)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+ return dev->vendor_id;
+}
+
+static u8 vduse_vdpa_get_status(struct vdpa_device *vdpa)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+ return dev->status;
+}
+
+static void vduse_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+ if (vduse_dev_set_status(dev, status))
+ return;
+
+ dev->status = status;
+}
+
+static size_t vduse_vdpa_get_config_size(struct vdpa_device *vdpa)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+ return dev->config_size;
+}
+
+static void vduse_vdpa_get_config(struct vdpa_device *vdpa, unsigned int offset,
+ void *buf, unsigned int len)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+ if (len > dev->config_size - offset)
+ return;
+
+ memcpy(buf, dev->config + offset, len);
+}
+
+static void vduse_vdpa_set_config(struct vdpa_device *vdpa, unsigned int offset,
+ const void *buf, unsigned int len)
+{
+ /* Now we only support read-only configuration space */
+}
+
+static int vduse_vdpa_reset(struct vdpa_device *vdpa)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+ if (vduse_dev_set_status(dev, 0))
+ return -EIO;
+
+ vduse_dev_reset(dev);
+
+ return 0;
+}
+
+static u32 vduse_vdpa_get_generation(struct vdpa_device *vdpa)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+ return dev->generation;
+}
+
+static int vduse_vdpa_set_map(struct vdpa_device *vdpa,
+ struct vhost_iotlb *iotlb)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+ int ret;
+
+ ret = vduse_domain_set_map(dev->domain, iotlb);
+ if (ret)
+ return ret;
+
+ ret = vduse_dev_update_iotlb(dev, 0ULL, ULLONG_MAX);
+ if (ret) {
+ vduse_domain_clear_map(dev->domain, iotlb);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void vduse_vdpa_free(struct vdpa_device *vdpa)
+{
+ struct vduse_dev *dev = vdpa_to_vduse(vdpa);
+
+ dev->vdev = NULL;
+}
+
+static const struct vdpa_config_ops vduse_vdpa_config_ops = {
+ .set_vq_address = vduse_vdpa_set_vq_address,
+ .kick_vq = vduse_vdpa_kick_vq,
+ .set_vq_cb = vduse_vdpa_set_vq_cb,
+ .set_vq_num = vduse_vdpa_set_vq_num,
+ .set_vq_ready = vduse_vdpa_set_vq_ready,
+ .get_vq_ready = vduse_vdpa_get_vq_ready,
+ .set_vq_state = vduse_vdpa_set_vq_state,
+ .get_vq_state = vduse_vdpa_get_vq_state,
+ .get_vq_align = vduse_vdpa_get_vq_align,
+ .get_features = vduse_vdpa_get_features,
+ .set_features = vduse_vdpa_set_features,
+ .set_config_cb = vduse_vdpa_set_config_cb,
+ .get_vq_num_max = vduse_vdpa_get_vq_num_max,
+ .get_device_id = vduse_vdpa_get_device_id,
+ .get_vendor_id = vduse_vdpa_get_vendor_id,
+ .get_status = vduse_vdpa_get_status,
+ .set_status = vduse_vdpa_set_status,
+ .get_config_size = vduse_vdpa_get_config_size,
+ .get_config = vduse_vdpa_get_config,
+ .set_config = vduse_vdpa_set_config,
+ .get_generation = vduse_vdpa_get_generation,
+ .reset = vduse_vdpa_reset,
+ .set_map = vduse_vdpa_set_map,
+ .free = vduse_vdpa_free,
+};
+
+static dma_addr_t vduse_dev_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ struct vduse_dev *vdev = dev_to_vduse(dev);
+ struct vduse_iova_domain *domain = vdev->domain;
+
+ return vduse_domain_map_page(domain, page, offset, size, dir, attrs);
+}
+
+static void vduse_dev_unmap_page(struct device *dev, dma_addr_t dma_addr,
+ size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ struct vduse_dev *vdev = dev_to_vduse(dev);
+ struct vduse_iova_domain *domain = vdev->domain;
+
+ return vduse_domain_unmap_page(domain, dma_addr, size, dir, attrs);
+}
+
+static void *vduse_dev_alloc_coherent(struct device *dev, size_t size,
+ dma_addr_t *dma_addr, gfp_t flag,
+ unsigned long attrs)
+{
+ struct vduse_dev *vdev = dev_to_vduse(dev);
+ struct vduse_iova_domain *domain = vdev->domain;
+ unsigned long iova;
+ void *addr;
+
+ *dma_addr = DMA_MAPPING_ERROR;
+ addr = vduse_domain_alloc_coherent(domain, size,
+ (dma_addr_t *)&iova, flag, attrs);
+ if (!addr)
+ return NULL;
+
+ *dma_addr = (dma_addr_t)iova;
+
+ return addr;
+}
+
+static void vduse_dev_free_coherent(struct device *dev, size_t size,
+ void *vaddr, dma_addr_t dma_addr,
+ unsigned long attrs)
+{
+ struct vduse_dev *vdev = dev_to_vduse(dev);
+ struct vduse_iova_domain *domain = vdev->domain;
+
+ vduse_domain_free_coherent(domain, size, vaddr, dma_addr, attrs);
+}
+
+static size_t vduse_dev_max_mapping_size(struct device *dev)
+{
+ struct vduse_dev *vdev = dev_to_vduse(dev);
+ struct vduse_iova_domain *domain = vdev->domain;
+
+ return domain->bounce_size;
+}
+
+static const struct dma_map_ops vduse_dev_dma_ops = {
+ .map_page = vduse_dev_map_page,
+ .unmap_page = vduse_dev_unmap_page,
+ .alloc = vduse_dev_alloc_coherent,
+ .free = vduse_dev_free_coherent,
+ .max_mapping_size = vduse_dev_max_mapping_size,
+};
+
+static unsigned int perm_to_file_flags(u8 perm)
+{
+ unsigned int flags = 0;
+
+ switch (perm) {
+ case VDUSE_ACCESS_WO:
+ flags |= O_WRONLY;
+ break;
+ case VDUSE_ACCESS_RO:
+ flags |= O_RDONLY;
+ break;
+ case VDUSE_ACCESS_RW:
+ flags |= O_RDWR;
+ break;
+ default:
+ WARN(1, "invalidate vhost IOTLB permission\n");
+ break;
+ }
+
+ return flags;
+}
+
+static int vduse_kickfd_setup(struct vduse_dev *dev,
+ struct vduse_vq_eventfd *eventfd)
+{
+ struct eventfd_ctx *ctx = NULL;
+ struct vduse_virtqueue *vq;
+ u32 index;
+
+ if (eventfd->index >= dev->vq_num)
+ return -EINVAL;
+
+ index = array_index_nospec(eventfd->index, dev->vq_num);
+ vq = &dev->vqs[index];
+ if (eventfd->fd >= 0) {
+ ctx = eventfd_ctx_fdget(eventfd->fd);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+ } else if (eventfd->fd != VDUSE_EVENTFD_DEASSIGN)
+ return 0;
+
+ spin_lock(&vq->kick_lock);
+ if (vq->kickfd)
+ eventfd_ctx_put(vq->kickfd);
+ vq->kickfd = ctx;
+ if (vq->ready && vq->kicked && vq->kickfd) {
+ eventfd_signal(vq->kickfd, 1);
+ vq->kicked = false;
+ }
+ spin_unlock(&vq->kick_lock);
+
+ return 0;
+}
+
+static bool vduse_dev_is_ready(struct vduse_dev *dev)
+{
+ int i;
+
+ for (i = 0; i < dev->vq_num; i++)
+ if (!dev->vqs[i].num_max)
+ return false;
+
+ return true;
+}
+
+static void vduse_dev_irq_inject(struct work_struct *work)
+{
+ struct vduse_dev *dev = container_of(work, struct vduse_dev, inject);
+
+ spin_lock_irq(&dev->irq_lock);
+ if (dev->config_cb.callback)
+ dev->config_cb.callback(dev->config_cb.private);
+ spin_unlock_irq(&dev->irq_lock);
+}
+
+static void vduse_vq_irq_inject(struct work_struct *work)
+{
+ struct vduse_virtqueue *vq = container_of(work,
+ struct vduse_virtqueue, inject);
+
+ spin_lock_irq(&vq->irq_lock);
+ if (vq->ready && vq->cb.callback)
+ vq->cb.callback(vq->cb.private);
+ spin_unlock_irq(&vq->irq_lock);
+}
+
+static long vduse_dev_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ struct vduse_dev *dev = file->private_data;
+ void __user *argp = (void __user *)arg;
+ int ret;
+
+ if (unlikely(dev->broken))
+ return -EPERM;
+
+ switch (cmd) {
+ case VDUSE_IOTLB_GET_FD: {
+ struct vduse_iotlb_entry entry;
+ struct vhost_iotlb_map *map;
+ struct vdpa_map_file *map_file;
+ struct vduse_iova_domain *domain = dev->domain;
+ struct file *f = NULL;
+
+ ret = -EFAULT;
+ if (copy_from_user(&entry, argp, sizeof(entry)))
+ break;
+
+ ret = -EINVAL;
+ if (entry.start > entry.last)
+ break;
+
+ spin_lock(&domain->iotlb_lock);
+ map = vhost_iotlb_itree_first(domain->iotlb,
+ entry.start, entry.last);
+ if (map) {
+ map_file = (struct vdpa_map_file *)map->opaque;
+ f = get_file(map_file->file);
+ entry.offset = map_file->offset;
+ entry.start = map->start;
+ entry.last = map->last;
+ entry.perm = map->perm;
+ }
+ spin_unlock(&domain->iotlb_lock);
+ ret = -EINVAL;
+ if (!f)
+ break;
+
+ ret = -EFAULT;
+ if (copy_to_user(argp, &entry, sizeof(entry))) {
+ fput(f);
+ break;
+ }
+ ret = receive_fd(f, perm_to_file_flags(entry.perm));
+ fput(f);
+ break;
+ }
+ case VDUSE_DEV_GET_FEATURES:
+ /*
+ * Just mirror what driver wrote here.
+ * The driver is expected to check FEATURE_OK later.
+ */
+ ret = put_user(dev->driver_features, (u64 __user *)argp);
+ break;
+ case VDUSE_DEV_SET_CONFIG: {
+ struct vduse_config_data config;
+ unsigned long size = offsetof(struct vduse_config_data,
+ buffer);
+
+ ret = -EFAULT;
+ if (copy_from_user(&config, argp, size))
+ break;
+
+ ret = -EINVAL;
+ if (config.length == 0 ||
+ config.length > dev->config_size - config.offset)
+ break;
+
+ ret = -EFAULT;
+ if (copy_from_user(dev->config + config.offset, argp + size,
+ config.length))
+ break;
+
+ ret = 0;
+ break;
+ }
+ case VDUSE_DEV_INJECT_CONFIG_IRQ:
+ ret = 0;
+ queue_work(vduse_irq_wq, &dev->inject);
+ break;
+ case VDUSE_VQ_SETUP: {
+ struct vduse_vq_config config;
+ u32 index;
+
+ ret = -EFAULT;
+ if (copy_from_user(&config, argp, sizeof(config)))
+ break;
+
+ ret = -EINVAL;
+ if (config.index >= dev->vq_num)
+ break;
+
+ if (!is_mem_zero((const char *)config.reserved,
+ sizeof(config.reserved)))
+ break;
+
+ index = array_index_nospec(config.index, dev->vq_num);
+ dev->vqs[index].num_max = config.max_size;
+ ret = 0;
+ break;
+ }
+ case VDUSE_VQ_GET_INFO: {
+ struct vduse_vq_info vq_info;
+ struct vduse_virtqueue *vq;
+ u32 index;
+
+ ret = -EFAULT;
+ if (copy_from_user(&vq_info, argp, sizeof(vq_info)))
+ break;
+
+ ret = -EINVAL;
+ if (vq_info.index >= dev->vq_num)
+ break;
+
+ index = array_index_nospec(vq_info.index, dev->vq_num);
+ vq = &dev->vqs[index];
+ vq_info.desc_addr = vq->desc_addr;
+ vq_info.driver_addr = vq->driver_addr;
+ vq_info.device_addr = vq->device_addr;
+ vq_info.num = vq->num;
+
+ if (dev->driver_features & BIT_ULL(VIRTIO_F_RING_PACKED)) {
+ vq_info.packed.last_avail_counter =
+ vq->state.packed.last_avail_counter;
+ vq_info.packed.last_avail_idx =
+ vq->state.packed.last_avail_idx;
+ vq_info.packed.last_used_counter =
+ vq->state.packed.last_used_counter;
+ vq_info.packed.last_used_idx =
+ vq->state.packed.last_used_idx;
+ } else
+ vq_info.split.avail_index =
+ vq->state.split.avail_index;
+
+ vq_info.ready = vq->ready;
+
+ ret = -EFAULT;
+ if (copy_to_user(argp, &vq_info, sizeof(vq_info)))
+ break;
+
+ ret = 0;
+ break;
+ }
+ case VDUSE_VQ_SETUP_KICKFD: {
+ struct vduse_vq_eventfd eventfd;
+
+ ret = -EFAULT;
+ if (copy_from_user(&eventfd, argp, sizeof(eventfd)))
+ break;
+
+ ret = vduse_kickfd_setup(dev, &eventfd);
+ break;
+ }
+ case VDUSE_VQ_INJECT_IRQ: {
+ u32 index;
+
+ ret = -EFAULT;
+ if (get_user(index, (u32 __user *)argp))
+ break;
+
+ ret = -EINVAL;
+ if (index >= dev->vq_num)
+ break;
+
+ ret = 0;
+ index = array_index_nospec(index, dev->vq_num);
+ queue_work(vduse_irq_wq, &dev->vqs[index].inject);
+ break;
+ }
+ default:
+ ret = -ENOIOCTLCMD;
+ break;
+ }
+
+ return ret;
+}
+
+static int vduse_dev_release(struct inode *inode, struct file *file)
+{
+ struct vduse_dev *dev = file->private_data;
+
+ spin_lock(&dev->msg_lock);
+ /* Make sure the inflight messages can processed after reconncection */
+ list_splice_init(&dev->recv_list, &dev->send_list);
+ spin_unlock(&dev->msg_lock);
+ dev->connected = false;
+
+ return 0;
+}
+
+static struct vduse_dev *vduse_dev_get_from_minor(int minor)
+{
+ struct vduse_dev *dev;
+
+ mutex_lock(&vduse_lock);
+ dev = idr_find(&vduse_idr, minor);
+ mutex_unlock(&vduse_lock);
+
+ return dev;
+}
+
+static int vduse_dev_open(struct inode *inode, struct file *file)
+{
+ int ret;
+ struct vduse_dev *dev = vduse_dev_get_from_minor(iminor(inode));
+
+ if (!dev)
+ return -ENODEV;
+
+ ret = -EBUSY;
+ mutex_lock(&dev->lock);
+ if (dev->connected)
+ goto unlock;
+
+ ret = 0;
+ dev->connected = true;
+ file->private_data = dev;
+unlock:
+ mutex_unlock(&dev->lock);
+
+ return ret;
+}
+
+static const struct file_operations vduse_dev_fops = {
+ .owner = THIS_MODULE,
+ .open = vduse_dev_open,
+ .release = vduse_dev_release,
+ .read_iter = vduse_dev_read_iter,
+ .write_iter = vduse_dev_write_iter,
+ .poll = vduse_dev_poll,
+ .unlocked_ioctl = vduse_dev_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
+ .llseek = noop_llseek,
+};
+
+static struct vduse_dev *vduse_dev_create(void)
+{
+ struct vduse_dev *dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+
+ if (!dev)
+ return NULL;
+
+ mutex_init(&dev->lock);
+ spin_lock_init(&dev->msg_lock);
+ INIT_LIST_HEAD(&dev->send_list);
+ INIT_LIST_HEAD(&dev->recv_list);
+ spin_lock_init(&dev->irq_lock);
+
+ INIT_WORK(&dev->inject, vduse_dev_irq_inject);
+ init_waitqueue_head(&dev->waitq);
+
+ return dev;
+}
+
+static void vduse_dev_destroy(struct vduse_dev *dev)
+{
+ kfree(dev);
+}
+
+static struct vduse_dev *vduse_find_dev(const char *name)
+{
+ struct vduse_dev *dev;
+ int id;
+
+ idr_for_each_entry(&vduse_idr, dev, id)
+ if (!strcmp(dev->name, name))
+ return dev;
+
+ return NULL;
+}
+
+static int vduse_destroy_dev(char *name)
+{
+ struct vduse_dev *dev = vduse_find_dev(name);
+
+ if (!dev)
+ return -EINVAL;
+
+ mutex_lock(&dev->lock);
+ if (dev->vdev || dev->connected) {
+ mutex_unlock(&dev->lock);
+ return -EBUSY;
+ }
+ dev->connected = true;
+ mutex_unlock(&dev->lock);
+
+ vduse_dev_reset(dev);
+ device_destroy(vduse_class, MKDEV(MAJOR(vduse_major), dev->minor));
+ idr_remove(&vduse_idr, dev->minor);
+ kvfree(dev->config);
+ kfree(dev->vqs);
+ vduse_domain_destroy(dev->domain);
+ kfree(dev->name);
+ vduse_dev_destroy(dev);
+ module_put(THIS_MODULE);
+
+ return 0;
+}
+
+static bool device_is_allowed(u32 device_id)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(allowed_device_id); i++)
+ if (allowed_device_id[i] == device_id)
+ return true;
+
+ return false;
+}
+
+static bool features_is_valid(u64 features)
+{
+ if (!(features & (1ULL << VIRTIO_F_ACCESS_PLATFORM)))
+ return false;
+
+ /* Now we only support read-only configuration space */
+ if (features & (1ULL << VIRTIO_BLK_F_CONFIG_WCE))
+ return false;
+
+ return true;
+}
+
+static bool vduse_validate_config(struct vduse_dev_config *config)
+{
+ if (!is_mem_zero((const char *)config->reserved,
+ sizeof(config->reserved)))
+ return false;
+
+ if (config->vq_align > PAGE_SIZE)
+ return false;
+
+ if (config->config_size > PAGE_SIZE)
+ return false;
+
+ if (!device_is_allowed(config->device_id))
+ return false;
+
+ if (!features_is_valid(config->features))
+ return false;
+
+ return true;
+}
+
+static ssize_t msg_timeout_show(struct device *device,
+ struct device_attribute *attr, char *buf)
+{
+ struct vduse_dev *dev = dev_get_drvdata(device);
+
+ return sysfs_emit(buf, "%u\n", dev->msg_timeout);
+}
+
+static ssize_t msg_timeout_store(struct device *device,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct vduse_dev *dev = dev_get_drvdata(device);
+ int ret;
+
+ ret = kstrtouint(buf, 10, &dev->msg_timeout);
+ if (ret < 0)
+ return ret;
+
+ return count;
+}
+
+static DEVICE_ATTR_RW(msg_timeout);
+
+static struct attribute *vduse_dev_attrs[] = {
+ &dev_attr_msg_timeout.attr,
+ NULL
+};
+
+ATTRIBUTE_GROUPS(vduse_dev);
+
+static int vduse_create_dev(struct vduse_dev_config *config,
+ void *config_buf, u64 api_version)
+{
+ int i, ret;
+ struct vduse_dev *dev;
+
+ ret = -EEXIST;
+ if (vduse_find_dev(config->name))
+ goto err;
+
+ ret = -ENOMEM;
+ dev = vduse_dev_create();
+ if (!dev)
+ goto err;
+
+ dev->api_version = api_version;
+ dev->device_features = config->features;
+ dev->device_id = config->device_id;
+ dev->vendor_id = config->vendor_id;
+ dev->name = kstrdup(config->name, GFP_KERNEL);
+ if (!dev->name)
+ goto err_str;
+
+ dev->domain = vduse_domain_create(VDUSE_IOVA_SIZE - 1,
+ VDUSE_BOUNCE_SIZE);
+ if (!dev->domain)
+ goto err_domain;
+
+ dev->config = config_buf;
+ dev->config_size = config->config_size;
+ dev->vq_align = config->vq_align;
+ dev->vq_num = config->vq_num;
+ dev->vqs = kcalloc(dev->vq_num, sizeof(*dev->vqs), GFP_KERNEL);
+ if (!dev->vqs)
+ goto err_vqs;
+
+ for (i = 0; i < dev->vq_num; i++) {
+ dev->vqs[i].index = i;
+ INIT_WORK(&dev->vqs[i].inject, vduse_vq_irq_inject);
+ INIT_WORK(&dev->vqs[i].kick, vduse_vq_kick_work);
+ spin_lock_init(&dev->vqs[i].kick_lock);
+ spin_lock_init(&dev->vqs[i].irq_lock);
+ }
+
+ ret = idr_alloc(&vduse_idr, dev, 1, VDUSE_DEV_MAX, GFP_KERNEL);
+ if (ret < 0)
+ goto err_idr;
+
+ dev->minor = ret;
+ dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
+ dev->dev = device_create(vduse_class, NULL,
+ MKDEV(MAJOR(vduse_major), dev->minor),
+ dev, "%s", config->name);
+ if (IS_ERR(dev->dev)) {
+ ret = PTR_ERR(dev->dev);
+ goto err_dev;
+ }
+ __module_get(THIS_MODULE);
+
+ return 0;
+err_dev:
+ idr_remove(&vduse_idr, dev->minor);
+err_idr:
+ kfree(dev->vqs);
+err_vqs:
+ vduse_domain_destroy(dev->domain);
+err_domain:
+ kfree(dev->name);
+err_str:
+ vduse_dev_destroy(dev);
+err:
+ kvfree(config_buf);
+ return ret;
+}
+
+static long vduse_ioctl(struct file *file, unsigned int cmd,
+ unsigned long arg)
+{
+ int ret;
+ void __user *argp = (void __user *)arg;
+ struct vduse_control *control = file->private_data;
+
+ mutex_lock(&vduse_lock);
+ switch (cmd) {
+ case VDUSE_GET_API_VERSION:
+ ret = put_user(control->api_version, (u64 __user *)argp);
+ break;
+ case VDUSE_SET_API_VERSION: {
+ u64 api_version;
+
+ ret = -EFAULT;
+ if (get_user(api_version, (u64 __user *)argp))
+ break;
+
+ ret = -EINVAL;
+ if (api_version > VDUSE_API_VERSION)
+ break;
+
+ ret = 0;
+ control->api_version = api_version;
+ break;
+ }
+ case VDUSE_CREATE_DEV: {
+ struct vduse_dev_config config;
+ unsigned long size = offsetof(struct vduse_dev_config, config);
+ void *buf;
+
+ ret = -EFAULT;
+ if (copy_from_user(&config, argp, size))
+ break;
+
+ ret = -EINVAL;
+ if (vduse_validate_config(&config) == false)
+ break;
+
+ buf = vmemdup_user(argp + size, config.config_size);
+ if (IS_ERR(buf)) {
+ ret = PTR_ERR(buf);
+ break;
+ }
+ config.name[VDUSE_NAME_MAX - 1] = '\0';
+ ret = vduse_create_dev(&config, buf, control->api_version);
+ break;
+ }
+ case VDUSE_DESTROY_DEV: {
+ char name[VDUSE_NAME_MAX];
+
+ ret = -EFAULT;
+ if (copy_from_user(name, argp, VDUSE_NAME_MAX))
+ break;
+
+ name[VDUSE_NAME_MAX - 1] = '\0';
+ ret = vduse_destroy_dev(name);
+ break;
+ }
+ default:
+ ret = -EINVAL;
+ break;
+ }
+ mutex_unlock(&vduse_lock);
+
+ return ret;
+}
+
+static int vduse_release(struct inode *inode, struct file *file)
+{
+ struct vduse_control *control = file->private_data;
+
+ kfree(control);
+ return 0;
+}
+
+static int vduse_open(struct inode *inode, struct file *file)
+{
+ struct vduse_control *control;
+
+ control = kmalloc(sizeof(struct vduse_control), GFP_KERNEL);
+ if (!control)
+ return -ENOMEM;
+
+ control->api_version = VDUSE_API_VERSION;
+ file->private_data = control;
+
+ return 0;
+}
+
+static const struct file_operations vduse_ctrl_fops = {
+ .owner = THIS_MODULE,
+ .open = vduse_open,
+ .release = vduse_release,
+ .unlocked_ioctl = vduse_ioctl,
+ .compat_ioctl = compat_ptr_ioctl,
+ .llseek = noop_llseek,
+};
+
+static char *vduse_devnode(struct device *dev, umode_t *mode)
+{
+ return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
+}
+
+static void vduse_mgmtdev_release(struct device *dev)
+{
+}
+
+static struct device vduse_mgmtdev = {
+ .init_name = "vduse",
+ .release = vduse_mgmtdev_release,
+};
+
+static struct vdpa_mgmt_dev mgmt_dev;
+
+static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
+{
+ struct vduse_vdpa *vdev;
+ int ret;
+
+ if (dev->vdev)
+ return -EEXIST;
+
+ vdev = vdpa_alloc_device(struct vduse_vdpa, vdpa, dev->dev,
+ &vduse_vdpa_config_ops, name, true);
+ if (IS_ERR(vdev))
+ return PTR_ERR(vdev);
+
+ dev->vdev = vdev;
+ vdev->dev = dev;
+ vdev->vdpa.dev.dma_mask = &vdev->vdpa.dev.coherent_dma_mask;
+ ret = dma_set_mask_and_coherent(&vdev->vdpa.dev, DMA_BIT_MASK(64));
+ if (ret) {
+ put_device(&vdev->vdpa.dev);
+ return ret;
+ }
+ set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
+ vdev->vdpa.dma_dev = &vdev->vdpa.dev;
+ vdev->vdpa.mdev = &mgmt_dev;
+
+ return 0;
+}
+
+static int vdpa_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
+{
+ struct vduse_dev *dev;
+ int ret;
+
+ mutex_lock(&vduse_lock);
+ dev = vduse_find_dev(name);
+ if (!dev || !vduse_dev_is_ready(dev)) {
+ mutex_unlock(&vduse_lock);
+ return -EINVAL;
+ }
+ ret = vduse_dev_init_vdpa(dev, name);
+ mutex_unlock(&vduse_lock);
+ if (ret)
+ return ret;
+
+ ret = _vdpa_register_device(&dev->vdev->vdpa, dev->vq_num);
+ if (ret) {
+ put_device(&dev->vdev->vdpa.dev);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void vdpa_dev_del(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev)
+{
+ _vdpa_unregister_device(dev);
+}
+
+static const struct vdpa_mgmtdev_ops vdpa_dev_mgmtdev_ops = {
+ .dev_add = vdpa_dev_add,
+ .dev_del = vdpa_dev_del,
+};
+
+static struct virtio_device_id id_table[] = {
+ { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
+ { 0 },
+};
+
+static struct vdpa_mgmt_dev mgmt_dev = {
+ .device = &vduse_mgmtdev,
+ .id_table = id_table,
+ .ops = &vdpa_dev_mgmtdev_ops,
+};
+
+static int vduse_mgmtdev_init(void)
+{
+ int ret;
+
+ ret = device_register(&vduse_mgmtdev);
+ if (ret)
+ return ret;
+
+ ret = vdpa_mgmtdev_register(&mgmt_dev);
+ if (ret)
+ goto err;
+
+ return 0;
+err:
+ device_unregister(&vduse_mgmtdev);
+ return ret;
+}
+
+static void vduse_mgmtdev_exit(void)
+{
+ vdpa_mgmtdev_unregister(&mgmt_dev);
+ device_unregister(&vduse_mgmtdev);
+}
+
+static int vduse_init(void)
+{
+ int ret;
+ struct device *dev;
+
+ vduse_class = class_create(THIS_MODULE, "vduse");
+ if (IS_ERR(vduse_class))
+ return PTR_ERR(vduse_class);
+
+ vduse_class->devnode = vduse_devnode;
+ vduse_class->dev_groups = vduse_dev_groups;
+
+ ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
+ if (ret)
+ goto err_chardev_region;
+
+ /* /dev/vduse/control */
+ cdev_init(&vduse_ctrl_cdev, &vduse_ctrl_fops);
+ vduse_ctrl_cdev.owner = THIS_MODULE;
+ ret = cdev_add(&vduse_ctrl_cdev, vduse_major, 1);
+ if (ret)
+ goto err_ctrl_cdev;
+
+ dev = device_create(vduse_class, NULL, vduse_major, NULL, "control");
+ if (IS_ERR(dev)) {
+ ret = PTR_ERR(dev);
+ goto err_device;
+ }
+
+ /* /dev/vduse/$DEVICE */
+ cdev_init(&vduse_cdev, &vduse_dev_fops);
+ vduse_cdev.owner = THIS_MODULE;
+ ret = cdev_add(&vduse_cdev, MKDEV(MAJOR(vduse_major), 1),
+ VDUSE_DEV_MAX - 1);
+ if (ret)
+ goto err_cdev;
+
+ vduse_irq_wq = alloc_workqueue("vduse-irq",
+ WQ_HIGHPRI | WQ_SYSFS | WQ_UNBOUND, 0);
+ if (!vduse_irq_wq)
+ goto err_wq;
+
+ ret = vduse_domain_init();
+ if (ret)
+ goto err_domain;
+
+ ret = vduse_mgmtdev_init();
+ if (ret)
+ goto err_mgmtdev;
+
+ return 0;
+err_mgmtdev:
+ vduse_domain_exit();
+err_domain:
+ destroy_workqueue(vduse_irq_wq);
+err_wq:
+ cdev_del(&vduse_cdev);
+err_cdev:
+ device_destroy(vduse_class, vduse_major);
+err_device:
+ cdev_del(&vduse_ctrl_cdev);
+err_ctrl_cdev:
+ unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
+err_chardev_region:
+ class_destroy(vduse_class);
+ return ret;
+}
+module_init(vduse_init);
+
+static void vduse_exit(void)
+{
+ vduse_mgmtdev_exit();
+ vduse_domain_exit();
+ destroy_workqueue(vduse_irq_wq);
+ cdev_del(&vduse_cdev);
+ device_destroy(vduse_class, vduse_major);
+ cdev_del(&vduse_ctrl_cdev);
+ unregister_chrdev_region(vduse_major, VDUSE_DEV_MAX);
+ class_destroy(vduse_class);
+}
+module_exit(vduse_exit);
+
+MODULE_LICENSE(DRV_LICENSE);
+MODULE_AUTHOR(DRV_AUTHOR);
+MODULE_DESCRIPTION(DRV_DESC);
}
vp_modern_set_status(mdev, status);
+}
- if (!(status & VIRTIO_CONFIG_S_DRIVER_OK) &&
- (s & VIRTIO_CONFIG_S_DRIVER_OK))
+static int vp_vdpa_reset(struct vdpa_device *vdpa)
+{
+ struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
+ struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev;
+ u8 s = vp_vdpa_get_status(vdpa);
+
+ vp_modern_set_status(mdev, 0);
+
+ if (s & VIRTIO_CONFIG_S_DRIVER_OK)
vp_vdpa_free_irq(vp_vdpa);
+
+ return 0;
}
static u16 vp_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
.set_features = vp_vdpa_set_features,
.get_status = vp_vdpa_get_status,
.set_status = vp_vdpa_set_status,
+ .reset = vp_vdpa_reset,
.get_vq_num_max = vp_vdpa_get_vq_num_max,
.get_vq_state = vp_vdpa_get_vq_state,
.get_vq_notification = vp_vdpa_get_vq_notification,
return ret;
vp_vdpa = vdpa_alloc_device(struct vp_vdpa, vdpa,
- dev, &vp_vdpa_ops, NULL);
+ dev, &vp_vdpa_ops, NULL, false);
if (IS_ERR(vp_vdpa)) {
dev_err(dev, "vp_vdpa: Failed to allocate vDPA structure\n");
return PTR_ERR(vp_vdpa);
EXPORT_SYMBOL_GPL(vhost_iotlb_map_free);
/**
- * vhost_iotlb_add_range - add a new range to vhost IOTLB
+ * vhost_iotlb_add_range_ctx - add a new range to vhost IOTLB
* @iotlb: the IOTLB
* @start: start of the IOVA range
* @last: last of IOVA range
* @addr: the address that is mapped to @start
* @perm: access permission of this range
+ * @opaque: the opaque pointer for the new mapping
*
* Returns an error last is smaller than start or memory allocation
* fails
*/
-int vhost_iotlb_add_range(struct vhost_iotlb *iotlb,
- u64 start, u64 last,
- u64 addr, unsigned int perm)
+int vhost_iotlb_add_range_ctx(struct vhost_iotlb *iotlb,
+ u64 start, u64 last,
+ u64 addr, unsigned int perm,
+ void *opaque)
{
struct vhost_iotlb_map *map;
map->last = last;
map->addr = addr;
map->perm = perm;
+ map->opaque = opaque;
iotlb->nmaps++;
vhost_iotlb_itree_insert(map, &iotlb->root);
return 0;
}
+EXPORT_SYMBOL_GPL(vhost_iotlb_add_range_ctx);
+
+int vhost_iotlb_add_range(struct vhost_iotlb *iotlb,
+ u64 start, u64 last,
+ u64 addr, unsigned int perm)
+{
+ return vhost_iotlb_add_range_ctx(iotlb, start, last,
+ addr, perm, NULL);
+}
EXPORT_SYMBOL_GPL(vhost_iotlb_add_range);
/**
+// SPDX-License-Identifier: GPL-2.0+
/*******************************************************************************
* Vhost kernel TCM fabric driver for virtio SCSI initiators
*
* (C) Copyright 2010-2013 Datera, Inc.
* (C) Copyright 2010-2012 IBM Corp.
*
- * Licensed to the Linux Foundation under the General Public License (GPL) version 2.
- *
* Authors: Nicholas A. Bellinger <nab@daterainc.com>
* Stefan Hajnoczi <stefanha@linux.vnet.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
****************************************************************************/
#include <linux/module.h>
irq_bypass_unregister_producer(&vq->call_ctx.producer);
}
-static void vhost_vdpa_reset(struct vhost_vdpa *v)
+static int vhost_vdpa_reset(struct vhost_vdpa *v)
{
struct vdpa_device *vdpa = v->vdpa;
- vdpa_reset(vdpa);
v->in_batch = 0;
+
+ return vdpa_reset(vdpa);
}
static long vhost_vdpa_get_device_id(struct vhost_vdpa *v, u8 __user *argp)
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
u8 status, status_old;
- int nvqs = v->nvqs;
+ int ret, nvqs = v->nvqs;
u16 i;
if (copy_from_user(&status, statusp, sizeof(status)))
if (status != 0 && (ops->get_status(vdpa) & ~status) != 0)
return -EINVAL;
- ops->set_status(vdpa, status);
+ if (status == 0) {
+ ret = ops->reset(vdpa);
+ if (ret)
+ return ret;
+ } else
+ ops->set_status(vdpa, status);
if ((status & VIRTIO_CONFIG_S_DRIVER_OK) && !(status_old & VIRTIO_CONFIG_S_DRIVER_OK))
for (i = 0; i < nvqs; i++)
return r;
}
-static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last)
+static void vhost_vdpa_pa_unmap(struct vhost_vdpa *v, u64 start, u64 last)
{
struct vhost_dev *dev = &v->vdev;
struct vhost_iotlb *iotlb = dev->iotlb;
unsigned long pfn, pinned;
while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
- pinned = map->size >> PAGE_SHIFT;
- for (pfn = map->addr >> PAGE_SHIFT;
+ pinned = PFN_DOWN(map->size);
+ for (pfn = PFN_DOWN(map->addr);
pinned > 0; pfn++, pinned--) {
page = pfn_to_page(pfn);
if (map->perm & VHOST_ACCESS_WO)
set_page_dirty_lock(page);
unpin_user_page(page);
}
- atomic64_sub(map->size >> PAGE_SHIFT, &dev->mm->pinned_vm);
+ atomic64_sub(PFN_DOWN(map->size), &dev->mm->pinned_vm);
vhost_iotlb_map_free(iotlb, map);
}
}
+static void vhost_vdpa_va_unmap(struct vhost_vdpa *v, u64 start, u64 last)
+{
+ struct vhost_dev *dev = &v->vdev;
+ struct vhost_iotlb *iotlb = dev->iotlb;
+ struct vhost_iotlb_map *map;
+ struct vdpa_map_file *map_file;
+
+ while ((map = vhost_iotlb_itree_first(iotlb, start, last)) != NULL) {
+ map_file = (struct vdpa_map_file *)map->opaque;
+ fput(map_file->file);
+ kfree(map_file);
+ vhost_iotlb_map_free(iotlb, map);
+ }
+}
+
+static void vhost_vdpa_iotlb_unmap(struct vhost_vdpa *v, u64 start, u64 last)
+{
+ struct vdpa_device *vdpa = v->vdpa;
+
+ if (vdpa->use_va)
+ return vhost_vdpa_va_unmap(v, start, last);
+
+ return vhost_vdpa_pa_unmap(v, start, last);
+}
+
static void vhost_vdpa_iotlb_free(struct vhost_vdpa *v)
{
struct vhost_dev *dev = &v->vdev;
return flags | IOMMU_CACHE;
}
-static int vhost_vdpa_map(struct vhost_vdpa *v,
- u64 iova, u64 size, u64 pa, u32 perm)
+static int vhost_vdpa_map(struct vhost_vdpa *v, u64 iova,
+ u64 size, u64 pa, u32 perm, void *opaque)
{
struct vhost_dev *dev = &v->vdev;
struct vdpa_device *vdpa = v->vdpa;
const struct vdpa_config_ops *ops = vdpa->config;
int r = 0;
- r = vhost_iotlb_add_range(dev->iotlb, iova, iova + size - 1,
- pa, perm);
+ r = vhost_iotlb_add_range_ctx(dev->iotlb, iova, iova + size - 1,
+ pa, perm, opaque);
if (r)
return r;
if (ops->dma_map) {
- r = ops->dma_map(vdpa, iova, size, pa, perm);
+ r = ops->dma_map(vdpa, iova, size, pa, perm, opaque);
} else if (ops->set_map) {
if (!v->in_batch)
r = ops->set_map(vdpa, dev->iotlb);
r = iommu_map(v->domain, iova, pa, size,
perm_to_iommu_flags(perm));
}
-
- if (r)
+ if (r) {
vhost_iotlb_del_range(dev->iotlb, iova, iova + size - 1);
- else
- atomic64_add(size >> PAGE_SHIFT, &dev->mm->pinned_vm);
+ return r;
+ }
- return r;
+ if (!vdpa->use_va)
+ atomic64_add(PFN_DOWN(size), &dev->mm->pinned_vm);
+
+ return 0;
}
static void vhost_vdpa_unmap(struct vhost_vdpa *v, u64 iova, u64 size)
}
}
-static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
- struct vhost_iotlb_msg *msg)
+static int vhost_vdpa_va_map(struct vhost_vdpa *v,
+ u64 iova, u64 size, u64 uaddr, u32 perm)
+{
+ struct vhost_dev *dev = &v->vdev;
+ u64 offset, map_size, map_iova = iova;
+ struct vdpa_map_file *map_file;
+ struct vm_area_struct *vma;
+ int ret;
+
+ mmap_read_lock(dev->mm);
+
+ while (size) {
+ vma = find_vma(dev->mm, uaddr);
+ if (!vma) {
+ ret = -EINVAL;
+ break;
+ }
+ map_size = min(size, vma->vm_end - uaddr);
+ if (!(vma->vm_file && (vma->vm_flags & VM_SHARED) &&
+ !(vma->vm_flags & (VM_IO | VM_PFNMAP))))
+ goto next;
+
+ map_file = kzalloc(sizeof(*map_file), GFP_KERNEL);
+ if (!map_file) {
+ ret = -ENOMEM;
+ break;
+ }
+ offset = (vma->vm_pgoff << PAGE_SHIFT) + uaddr - vma->vm_start;
+ map_file->offset = offset;
+ map_file->file = get_file(vma->vm_file);
+ ret = vhost_vdpa_map(v, map_iova, map_size, uaddr,
+ perm, map_file);
+ if (ret) {
+ fput(map_file->file);
+ kfree(map_file);
+ break;
+ }
+next:
+ size -= map_size;
+ uaddr += map_size;
+ map_iova += map_size;
+ }
+ if (ret)
+ vhost_vdpa_unmap(v, iova, map_iova - iova);
+
+ mmap_read_unlock(dev->mm);
+
+ return ret;
+}
+
+static int vhost_vdpa_pa_map(struct vhost_vdpa *v,
+ u64 iova, u64 size, u64 uaddr, u32 perm)
{
struct vhost_dev *dev = &v->vdev;
- struct vhost_iotlb *iotlb = dev->iotlb;
struct page **page_list;
unsigned long list_size = PAGE_SIZE / sizeof(struct page *);
unsigned int gup_flags = FOLL_LONGTERM;
unsigned long npages, cur_base, map_pfn, last_pfn = 0;
unsigned long lock_limit, sz2pin, nchunks, i;
- u64 iova = msg->iova;
+ u64 start = iova;
long pinned;
int ret = 0;
- if (msg->iova < v->range.first || !msg->size ||
- msg->iova > U64_MAX - msg->size + 1 ||
- msg->iova + msg->size - 1 > v->range.last)
- return -EINVAL;
-
- if (vhost_iotlb_itree_first(iotlb, msg->iova,
- msg->iova + msg->size - 1))
- return -EEXIST;
-
/* Limit the use of memory for bookkeeping */
page_list = (struct page **) __get_free_page(GFP_KERNEL);
if (!page_list)
return -ENOMEM;
- if (msg->perm & VHOST_ACCESS_WO)
+ if (perm & VHOST_ACCESS_WO)
gup_flags |= FOLL_WRITE;
- npages = PAGE_ALIGN(msg->size + (iova & ~PAGE_MASK)) >> PAGE_SHIFT;
+ npages = PFN_UP(size + (iova & ~PAGE_MASK));
if (!npages) {
ret = -EINVAL;
goto free;
mmap_read_lock(dev->mm);
- lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ lock_limit = PFN_DOWN(rlimit(RLIMIT_MEMLOCK));
if (npages + atomic64_read(&dev->mm->pinned_vm) > lock_limit) {
ret = -ENOMEM;
goto unlock;
}
- cur_base = msg->uaddr & PAGE_MASK;
+ cur_base = uaddr & PAGE_MASK;
iova &= PAGE_MASK;
nchunks = 0;
if (last_pfn && (this_pfn != last_pfn + 1)) {
/* Pin a contiguous chunk of memory */
- csize = (last_pfn - map_pfn + 1) << PAGE_SHIFT;
+ csize = PFN_PHYS(last_pfn - map_pfn + 1);
ret = vhost_vdpa_map(v, iova, csize,
- map_pfn << PAGE_SHIFT,
- msg->perm);
+ PFN_PHYS(map_pfn),
+ perm, NULL);
if (ret) {
/*
* Unpin the pages that are left unmapped
last_pfn = this_pfn;
}
- cur_base += pinned << PAGE_SHIFT;
+ cur_base += PFN_PHYS(pinned);
npages -= pinned;
}
/* Pin the rest chunk */
- ret = vhost_vdpa_map(v, iova, (last_pfn - map_pfn + 1) << PAGE_SHIFT,
- map_pfn << PAGE_SHIFT, msg->perm);
+ ret = vhost_vdpa_map(v, iova, PFN_PHYS(last_pfn - map_pfn + 1),
+ PFN_PHYS(map_pfn), perm, NULL);
out:
if (ret) {
if (nchunks) {
for (pfn = map_pfn; pfn <= last_pfn; pfn++)
unpin_user_page(pfn_to_page(pfn));
}
- vhost_vdpa_unmap(v, msg->iova, msg->size);
+ vhost_vdpa_unmap(v, start, size);
}
unlock:
mmap_read_unlock(dev->mm);
free:
free_page((unsigned long)page_list);
return ret;
+
+}
+
+static int vhost_vdpa_process_iotlb_update(struct vhost_vdpa *v,
+ struct vhost_iotlb_msg *msg)
+{
+ struct vhost_dev *dev = &v->vdev;
+ struct vdpa_device *vdpa = v->vdpa;
+ struct vhost_iotlb *iotlb = dev->iotlb;
+
+ if (msg->iova < v->range.first || !msg->size ||
+ msg->iova > U64_MAX - msg->size + 1 ||
+ msg->iova + msg->size - 1 > v->range.last)
+ return -EINVAL;
+
+ if (vhost_iotlb_itree_first(iotlb, msg->iova,
+ msg->iova + msg->size - 1))
+ return -EEXIST;
+
+ if (vdpa->use_va)
+ return vhost_vdpa_va_map(v, msg->iova, msg->size,
+ msg->uaddr, msg->perm);
+
+ return vhost_vdpa_pa_map(v, msg->iova, msg->size, msg->uaddr,
+ msg->perm);
}
static int vhost_vdpa_process_iotlb_msg(struct vhost_dev *dev,
return -EBUSY;
nvqs = v->nvqs;
- vhost_vdpa_reset(v);
+ r = vhost_vdpa_reset(v);
+ if (r)
+ goto err;
vqs = kmalloc_array(nvqs, sizeof(*vqs), GFP_KERNEL);
if (!vqs) {
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
if (remap_pfn_range(vma, vmf->address & PAGE_MASK,
- notify.addr >> PAGE_SHIFT, PAGE_SIZE,
+ PFN_DOWN(notify.addr), PAGE_SIZE,
vma->vm_page_prot))
return VM_FAULT_SIGBUS;
size_t nbytes;
size_t iov_len, payload_len;
int head;
- bool restore_flag = false;
+ u32 flags_to_restore = 0;
spin_lock_bh(&vsock->send_pkt_list_lock);
if (list_empty(&vsock->send_pkt_list)) {
* small rx buffers, headers of packets in rx queue are
* created dynamically and are initialized with header
* of current packet(except length). But in case of
- * SOCK_SEQPACKET, we also must clear record delimeter
- * bit(VIRTIO_VSOCK_SEQ_EOR). Otherwise, instead of one
- * packet with delimeter(which marks end of record),
- * there will be sequence of packets with delimeter
- * bit set. After initialized header will be copied to
- * rx buffer, this bit will be restored.
+ * SOCK_SEQPACKET, we also must clear message delimeter
+ * bit (VIRTIO_VSOCK_SEQ_EOM) and MSG_EOR bit
+ * (VIRTIO_VSOCK_SEQ_EOR) if set. Otherwise,
+ * there will be sequence of packets with these
+ * bits set. After initialized header will be copied to
+ * rx buffer, these required bits will be restored.
*/
- if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) {
- pkt->hdr.flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
- restore_flag = true;
+ if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM) {
+ pkt->hdr.flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
+ flags_to_restore |= VIRTIO_VSOCK_SEQ_EOM;
+
+ if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) {
+ pkt->hdr.flags &= ~cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
+ flags_to_restore |= VIRTIO_VSOCK_SEQ_EOR;
+ }
}
}
* to send it with the next available buffer.
*/
if (pkt->off < pkt->len) {
- if (restore_flag)
- pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
+ pkt->hdr.flags |= cpu_to_le32(flags_to_restore);
/* We are queueing the same virtio_vsock_pkt to handle
* the remaining bytes, and we want to deliver it
#include <linux/virtio_config.h>
#include <linux/module.h>
#include <linux/idr.h>
+#include <linux/of.h>
#include <uapi/linux/virtio_ids.h>
/* Unique numbering for virtio devices. */
/* Acknowledge the device's existence again. */
virtio_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
+
+ of_node_put(dev->dev.of_node);
}
static struct bus_type virtio_bus = {
}
EXPORT_SYMBOL_GPL(unregister_virtio_driver);
+static int virtio_device_of_init(struct virtio_device *dev)
+{
+ struct device_node *np, *pnode = dev_of_node(dev->dev.parent);
+ char compat[] = "virtio,deviceXXXXXXXX";
+ int ret, count;
+
+ if (!pnode)
+ return 0;
+
+ count = of_get_available_child_count(pnode);
+ if (!count)
+ return 0;
+
+ /* There can be only 1 child node */
+ if (WARN_ON(count > 1))
+ return -EINVAL;
+
+ np = of_get_next_available_child(pnode, NULL);
+ if (WARN_ON(!np))
+ return -ENODEV;
+
+ ret = snprintf(compat, sizeof(compat), "virtio,device%x", dev->id.device);
+ BUG_ON(ret >= sizeof(compat));
+
+ if (!of_device_is_compatible(np, compat)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ dev->dev.of_node = np;
+ return 0;
+
+out:
+ of_node_put(np);
+ return ret;
+}
+
/**
* register_virtio_device - register virtio device
* @dev : virtio device to be registered
dev->index = err;
dev_set_name(&dev->dev, "virtio%u", dev->index);
+ err = virtio_device_of_init(dev);
+ if (err)
+ goto out_ida_remove;
+
spin_lock_init(&dev->config_lock);
dev->config_enabled = false;
dev->config_change_pending = false;
*/
err = device_add(&dev->dev);
if (err)
- ida_simple_remove(&virtio_index_ida, dev->index);
+ goto out_of_node_put;
+
+ return 0;
+
+out_of_node_put:
+ of_node_put(dev->dev.of_node);
+out_ida_remove:
+ ida_simple_remove(&virtio_index_ida, dev->index);
out:
- if (err)
- virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
+ virtio_add_status(dev, VIRTIO_CONFIG_S_FAILED);
return err;
}
EXPORT_SYMBOL_GPL(register_virtio_device);
callbacks[VIRTIO_BALLOON_VQ_REPORTING] = balloon_ack;
}
- err = vb->vdev->config->find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX,
- vqs, callbacks, names, NULL, NULL);
+ err = virtio_find_vqs(vb->vdev, VIRTIO_BALLOON_VQ_MAX, vqs,
+ callbacks, names, NULL);
if (err)
return err;
source "fs/cifs/Kconfig"
source "fs/ksmbd/Kconfig"
-config CIFS_COMMON
+config SMBFS_COMMON
tristate
default y if CIFS=y
default m if CIFS=m
kernel_read_file.o remap_range.o
ifeq ($(CONFIG_BLOCK),y)
-obj-y += buffer.o block_dev.o direct-io.o mpage.o
+obj-y += buffer.o direct-io.o mpage.o
else
obj-y += no-block.o
endif
obj-$(CONFIG_NLS) += nls/
obj-$(CONFIG_UNICODE) += unicode/
obj-$(CONFIG_SYSV_FS) += sysv/
-obj-$(CONFIG_CIFS_COMMON) += cifs_common/
+obj-$(CONFIG_SMBFS_COMMON) += smbfs_common/
obj-$(CONFIG_CIFS) += cifs/
obj-$(CONFIG_SMB_SERVER) += ksmbd/
obj-$(CONFIG_HPFS_FS) += hpfs/
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 1991, 1992 Linus Torvalds
- * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
- * Copyright (C) 2016 - 2020 Christoph Hellwig
- */
-
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/fcntl.h>
-#include <linux/slab.h>
-#include <linux/kmod.h>
-#include <linux/major.h>
-#include <linux/device_cgroup.h>
-#include <linux/highmem.h>
-#include <linux/blkdev.h>
-#include <linux/backing-dev.h>
-#include <linux/module.h>
-#include <linux/blkpg.h>
-#include <linux/magic.h>
-#include <linux/buffer_head.h>
-#include <linux/swap.h>
-#include <linux/pagevec.h>
-#include <linux/writeback.h>
-#include <linux/mpage.h>
-#include <linux/mount.h>
-#include <linux/pseudo_fs.h>
-#include <linux/uio.h>
-#include <linux/namei.h>
-#include <linux/log2.h>
-#include <linux/cleancache.h>
-#include <linux/task_io_accounting_ops.h>
-#include <linux/falloc.h>
-#include <linux/part_stat.h>
-#include <linux/uaccess.h>
-#include <linux/suspend.h>
-#include "internal.h"
-#include "../block/blk.h"
-
-struct bdev_inode {
- struct block_device bdev;
- struct inode vfs_inode;
-};
-
-static const struct address_space_operations def_blk_aops;
-
-static inline struct bdev_inode *BDEV_I(struct inode *inode)
-{
- return container_of(inode, struct bdev_inode, vfs_inode);
-}
-
-struct block_device *I_BDEV(struct inode *inode)
-{
- return &BDEV_I(inode)->bdev;
-}
-EXPORT_SYMBOL(I_BDEV);
-
-static void bdev_write_inode(struct block_device *bdev)
-{
- struct inode *inode = bdev->bd_inode;
- int ret;
-
- spin_lock(&inode->i_lock);
- while (inode->i_state & I_DIRTY) {
- spin_unlock(&inode->i_lock);
- ret = write_inode_now(inode, true);
- if (ret) {
- char name[BDEVNAME_SIZE];
- pr_warn_ratelimited("VFS: Dirty inode writeback failed "
- "for block device %s (err=%d).\n",
- bdevname(bdev, name), ret);
- }
- spin_lock(&inode->i_lock);
- }
- spin_unlock(&inode->i_lock);
-}
-
-/* Kill _all_ buffers and pagecache , dirty or not.. */
-static void kill_bdev(struct block_device *bdev)
-{
- struct address_space *mapping = bdev->bd_inode->i_mapping;
-
- if (mapping_empty(mapping))
- return;
-
- invalidate_bh_lrus();
- truncate_inode_pages(mapping, 0);
-}
-
-/* Invalidate clean unused buffers and pagecache. */
-void invalidate_bdev(struct block_device *bdev)
-{
- struct address_space *mapping = bdev->bd_inode->i_mapping;
-
- if (mapping->nrpages) {
- invalidate_bh_lrus();
- lru_add_drain_all(); /* make sure all lru add caches are flushed */
- invalidate_mapping_pages(mapping, 0, -1);
- }
- /* 99% of the time, we don't need to flush the cleancache on the bdev.
- * But, for the strange corners, lets be cautious
- */
- cleancache_invalidate_inode(mapping);
-}
-EXPORT_SYMBOL(invalidate_bdev);
-
-/*
- * Drop all buffers & page cache for given bdev range. This function bails
- * with error if bdev has other exclusive owner (such as filesystem).
- */
-int truncate_bdev_range(struct block_device *bdev, fmode_t mode,
- loff_t lstart, loff_t lend)
-{
- /*
- * If we don't hold exclusive handle for the device, upgrade to it
- * while we discard the buffer cache to avoid discarding buffers
- * under live filesystem.
- */
- if (!(mode & FMODE_EXCL)) {
- int err = bd_prepare_to_claim(bdev, truncate_bdev_range);
- if (err)
- goto invalidate;
- }
-
- truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend);
- if (!(mode & FMODE_EXCL))
- bd_abort_claiming(bdev, truncate_bdev_range);
- return 0;
-
-invalidate:
- /*
- * Someone else has handle exclusively open. Try invalidating instead.
- * The 'end' argument is inclusive so the rounding is safe.
- */
- return invalidate_inode_pages2_range(bdev->bd_inode->i_mapping,
- lstart >> PAGE_SHIFT,
- lend >> PAGE_SHIFT);
-}
-
-static void set_init_blocksize(struct block_device *bdev)
-{
- unsigned int bsize = bdev_logical_block_size(bdev);
- loff_t size = i_size_read(bdev->bd_inode);
-
- while (bsize < PAGE_SIZE) {
- if (size & bsize)
- break;
- bsize <<= 1;
- }
- bdev->bd_inode->i_blkbits = blksize_bits(bsize);
-}
-
-int set_blocksize(struct block_device *bdev, int size)
-{
- /* Size must be a power of two, and between 512 and PAGE_SIZE */
- if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
- return -EINVAL;
-
- /* Size cannot be smaller than the size supported by the device */
- if (size < bdev_logical_block_size(bdev))
- return -EINVAL;
-
- /* Don't change the size if it is same as current */
- if (bdev->bd_inode->i_blkbits != blksize_bits(size)) {
- sync_blockdev(bdev);
- bdev->bd_inode->i_blkbits = blksize_bits(size);
- kill_bdev(bdev);
- }
- return 0;
-}
-
-EXPORT_SYMBOL(set_blocksize);
-
-int sb_set_blocksize(struct super_block *sb, int size)
-{
- if (set_blocksize(sb->s_bdev, size))
- return 0;
- /* If we get here, we know size is power of two
- * and it's value is between 512 and PAGE_SIZE */
- sb->s_blocksize = size;
- sb->s_blocksize_bits = blksize_bits(size);
- return sb->s_blocksize;
-}
-
-EXPORT_SYMBOL(sb_set_blocksize);
-
-int sb_min_blocksize(struct super_block *sb, int size)
-{
- int minsize = bdev_logical_block_size(sb->s_bdev);
- if (size < minsize)
- size = minsize;
- return sb_set_blocksize(sb, size);
-}
-
-EXPORT_SYMBOL(sb_min_blocksize);
-
-static int
-blkdev_get_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh, int create)
-{
- bh->b_bdev = I_BDEV(inode);
- bh->b_blocknr = iblock;
- set_buffer_mapped(bh);
- return 0;
-}
-
-static struct inode *bdev_file_inode(struct file *file)
-{
- return file->f_mapping->host;
-}
-
-static unsigned int dio_bio_write_op(struct kiocb *iocb)
-{
- unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
-
- /* avoid the need for a I/O completion work item */
- if (iocb->ki_flags & IOCB_DSYNC)
- op |= REQ_FUA;
- return op;
-}
-
-#define DIO_INLINE_BIO_VECS 4
-
-static void blkdev_bio_end_io_simple(struct bio *bio)
-{
- struct task_struct *waiter = bio->bi_private;
-
- WRITE_ONCE(bio->bi_private, NULL);
- blk_wake_io_task(waiter);
-}
-
-static ssize_t
-__blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
- unsigned int nr_pages)
-{
- struct file *file = iocb->ki_filp;
- struct block_device *bdev = I_BDEV(bdev_file_inode(file));
- struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
- loff_t pos = iocb->ki_pos;
- bool should_dirty = false;
- struct bio bio;
- ssize_t ret;
- blk_qc_t qc;
-
- if ((pos | iov_iter_alignment(iter)) &
- (bdev_logical_block_size(bdev) - 1))
- return -EINVAL;
-
- if (nr_pages <= DIO_INLINE_BIO_VECS)
- vecs = inline_vecs;
- else {
- vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec),
- GFP_KERNEL);
- if (!vecs)
- return -ENOMEM;
- }
-
- bio_init(&bio, vecs, nr_pages);
- bio_set_dev(&bio, bdev);
- bio.bi_iter.bi_sector = pos >> 9;
- bio.bi_write_hint = iocb->ki_hint;
- bio.bi_private = current;
- bio.bi_end_io = blkdev_bio_end_io_simple;
- bio.bi_ioprio = iocb->ki_ioprio;
-
- ret = bio_iov_iter_get_pages(&bio, iter);
- if (unlikely(ret))
- goto out;
- ret = bio.bi_iter.bi_size;
-
- if (iov_iter_rw(iter) == READ) {
- bio.bi_opf = REQ_OP_READ;
- if (iter_is_iovec(iter))
- should_dirty = true;
- } else {
- bio.bi_opf = dio_bio_write_op(iocb);
- task_io_account_write(ret);
- }
- if (iocb->ki_flags & IOCB_NOWAIT)
- bio.bi_opf |= REQ_NOWAIT;
- if (iocb->ki_flags & IOCB_HIPRI)
- bio_set_polled(&bio, iocb);
-
- qc = submit_bio(&bio);
- for (;;) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (!READ_ONCE(bio.bi_private))
- break;
- if (!(iocb->ki_flags & IOCB_HIPRI) ||
- !blk_poll(bdev_get_queue(bdev), qc, true))
- blk_io_schedule();
- }
- __set_current_state(TASK_RUNNING);
-
- bio_release_pages(&bio, should_dirty);
- if (unlikely(bio.bi_status))
- ret = blk_status_to_errno(bio.bi_status);
-
-out:
- if (vecs != inline_vecs)
- kfree(vecs);
-
- bio_uninit(&bio);
-
- return ret;
-}
-
-struct blkdev_dio {
- union {
- struct kiocb *iocb;
- struct task_struct *waiter;
- };
- size_t size;
- atomic_t ref;
- bool multi_bio : 1;
- bool should_dirty : 1;
- bool is_sync : 1;
- struct bio bio;
-};
-
-static struct bio_set blkdev_dio_pool;
-
-static int blkdev_iopoll(struct kiocb *kiocb, bool wait)
-{
- struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host);
- struct request_queue *q = bdev_get_queue(bdev);
-
- return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait);
-}
-
-static void blkdev_bio_end_io(struct bio *bio)
-{
- struct blkdev_dio *dio = bio->bi_private;
- bool should_dirty = dio->should_dirty;
-
- if (bio->bi_status && !dio->bio.bi_status)
- dio->bio.bi_status = bio->bi_status;
-
- if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) {
- if (!dio->is_sync) {
- struct kiocb *iocb = dio->iocb;
- ssize_t ret;
-
- if (likely(!dio->bio.bi_status)) {
- ret = dio->size;
- iocb->ki_pos += ret;
- } else {
- ret = blk_status_to_errno(dio->bio.bi_status);
- }
-
- dio->iocb->ki_complete(iocb, ret, 0);
- if (dio->multi_bio)
- bio_put(&dio->bio);
- } else {
- struct task_struct *waiter = dio->waiter;
-
- WRITE_ONCE(dio->waiter, NULL);
- blk_wake_io_task(waiter);
- }
- }
-
- if (should_dirty) {
- bio_check_pages_dirty(bio);
- } else {
- bio_release_pages(bio, false);
- bio_put(bio);
- }
-}
-
-static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
- unsigned int nr_pages)
-{
- struct file *file = iocb->ki_filp;
- struct inode *inode = bdev_file_inode(file);
- struct block_device *bdev = I_BDEV(inode);
- struct blk_plug plug;
- struct blkdev_dio *dio;
- struct bio *bio;
- bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
- bool is_read = (iov_iter_rw(iter) == READ), is_sync;
- loff_t pos = iocb->ki_pos;
- blk_qc_t qc = BLK_QC_T_NONE;
- int ret = 0;
-
- if ((pos | iov_iter_alignment(iter)) &
- (bdev_logical_block_size(bdev) - 1))
- return -EINVAL;
-
- bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool);
-
- dio = container_of(bio, struct blkdev_dio, bio);
- dio->is_sync = is_sync = is_sync_kiocb(iocb);
- if (dio->is_sync) {
- dio->waiter = current;
- bio_get(bio);
- } else {
- dio->iocb = iocb;
- }
-
- dio->size = 0;
- dio->multi_bio = false;
- dio->should_dirty = is_read && iter_is_iovec(iter);
-
- /*
- * Don't plug for HIPRI/polled IO, as those should go straight
- * to issue
- */
- if (!is_poll)
- blk_start_plug(&plug);
-
- for (;;) {
- bio_set_dev(bio, bdev);
- bio->bi_iter.bi_sector = pos >> 9;
- bio->bi_write_hint = iocb->ki_hint;
- bio->bi_private = dio;
- bio->bi_end_io = blkdev_bio_end_io;
- bio->bi_ioprio = iocb->ki_ioprio;
-
- ret = bio_iov_iter_get_pages(bio, iter);
- if (unlikely(ret)) {
- bio->bi_status = BLK_STS_IOERR;
- bio_endio(bio);
- break;
- }
-
- if (is_read) {
- bio->bi_opf = REQ_OP_READ;
- if (dio->should_dirty)
- bio_set_pages_dirty(bio);
- } else {
- bio->bi_opf = dio_bio_write_op(iocb);
- task_io_account_write(bio->bi_iter.bi_size);
- }
- if (iocb->ki_flags & IOCB_NOWAIT)
- bio->bi_opf |= REQ_NOWAIT;
-
- dio->size += bio->bi_iter.bi_size;
- pos += bio->bi_iter.bi_size;
-
- nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS);
- if (!nr_pages) {
- bool polled = false;
-
- if (iocb->ki_flags & IOCB_HIPRI) {
- bio_set_polled(bio, iocb);
- polled = true;
- }
-
- qc = submit_bio(bio);
-
- if (polled)
- WRITE_ONCE(iocb->ki_cookie, qc);
- break;
- }
-
- if (!dio->multi_bio) {
- /*
- * AIO needs an extra reference to ensure the dio
- * structure which is embedded into the first bio
- * stays around.
- */
- if (!is_sync)
- bio_get(bio);
- dio->multi_bio = true;
- atomic_set(&dio->ref, 2);
- } else {
- atomic_inc(&dio->ref);
- }
-
- submit_bio(bio);
- bio = bio_alloc(GFP_KERNEL, nr_pages);
- }
-
- if (!is_poll)
- blk_finish_plug(&plug);
-
- if (!is_sync)
- return -EIOCBQUEUED;
-
- for (;;) {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (!READ_ONCE(dio->waiter))
- break;
-
- if (!(iocb->ki_flags & IOCB_HIPRI) ||
- !blk_poll(bdev_get_queue(bdev), qc, true))
- blk_io_schedule();
- }
- __set_current_state(TASK_RUNNING);
-
- if (!ret)
- ret = blk_status_to_errno(dio->bio.bi_status);
- if (likely(!ret))
- ret = dio->size;
-
- bio_put(&dio->bio);
- return ret;
-}
-
-static ssize_t
-blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
-{
- unsigned int nr_pages;
-
- if (!iov_iter_count(iter))
- return 0;
-
- nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
- if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_VECS)
- return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
-
- return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
-}
-
-static __init int blkdev_init(void)
-{
- return bioset_init(&blkdev_dio_pool, 4,
- offsetof(struct blkdev_dio, bio),
- BIOSET_NEED_BVECS|BIOSET_PERCPU_CACHE);
-}
-module_init(blkdev_init);
-
-int __sync_blockdev(struct block_device *bdev, int wait)
-{
- if (!bdev)
- return 0;
- if (!wait)
- return filemap_flush(bdev->bd_inode->i_mapping);
- return filemap_write_and_wait(bdev->bd_inode->i_mapping);
-}
-
-/*
- * Write out and wait upon all the dirty data associated with a block
- * device via its mapping. Does not take the superblock lock.
- */
-int sync_blockdev(struct block_device *bdev)
-{
- return __sync_blockdev(bdev, 1);
-}
-EXPORT_SYMBOL(sync_blockdev);
-
-/*
- * Write out and wait upon all dirty data associated with this
- * device. Filesystem data as well as the underlying block
- * device. Takes the superblock lock.
- */
-int fsync_bdev(struct block_device *bdev)
-{
- struct super_block *sb = get_super(bdev);
- if (sb) {
- int res = sync_filesystem(sb);
- drop_super(sb);
- return res;
- }
- return sync_blockdev(bdev);
-}
-EXPORT_SYMBOL(fsync_bdev);
-
-/**
- * freeze_bdev -- lock a filesystem and force it into a consistent state
- * @bdev: blockdevice to lock
- *
- * If a superblock is found on this device, we take the s_umount semaphore
- * on it to make sure nobody unmounts until the snapshot creation is done.
- * The reference counter (bd_fsfreeze_count) guarantees that only the last
- * unfreeze process can unfreeze the frozen filesystem actually when multiple
- * freeze requests arrive simultaneously. It counts up in freeze_bdev() and
- * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze
- * actually.
- */
-int freeze_bdev(struct block_device *bdev)
-{
- struct super_block *sb;
- int error = 0;
-
- mutex_lock(&bdev->bd_fsfreeze_mutex);
- if (++bdev->bd_fsfreeze_count > 1)
- goto done;
-
- sb = get_active_super(bdev);
- if (!sb)
- goto sync;
- if (sb->s_op->freeze_super)
- error = sb->s_op->freeze_super(sb);
- else
- error = freeze_super(sb);
- deactivate_super(sb);
-
- if (error) {
- bdev->bd_fsfreeze_count--;
- goto done;
- }
- bdev->bd_fsfreeze_sb = sb;
-
-sync:
- sync_blockdev(bdev);
-done:
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return error;
-}
-EXPORT_SYMBOL(freeze_bdev);
-
-/**
- * thaw_bdev -- unlock filesystem
- * @bdev: blockdevice to unlock
- *
- * Unlocks the filesystem and marks it writeable again after freeze_bdev().
- */
-int thaw_bdev(struct block_device *bdev)
-{
- struct super_block *sb;
- int error = -EINVAL;
-
- mutex_lock(&bdev->bd_fsfreeze_mutex);
- if (!bdev->bd_fsfreeze_count)
- goto out;
-
- error = 0;
- if (--bdev->bd_fsfreeze_count > 0)
- goto out;
-
- sb = bdev->bd_fsfreeze_sb;
- if (!sb)
- goto out;
-
- if (sb->s_op->thaw_super)
- error = sb->s_op->thaw_super(sb);
- else
- error = thaw_super(sb);
- if (error)
- bdev->bd_fsfreeze_count++;
- else
- bdev->bd_fsfreeze_sb = NULL;
-out:
- mutex_unlock(&bdev->bd_fsfreeze_mutex);
- return error;
-}
-EXPORT_SYMBOL(thaw_bdev);
-
-static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
-{
- return block_write_full_page(page, blkdev_get_block, wbc);
-}
-
-static int blkdev_readpage(struct file * file, struct page * page)
-{
- return block_read_full_page(page, blkdev_get_block);
-}
-
-static void blkdev_readahead(struct readahead_control *rac)
-{
- mpage_readahead(rac, blkdev_get_block);
-}
-
-static int blkdev_write_begin(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
- struct page **pagep, void **fsdata)
-{
- return block_write_begin(mapping, pos, len, flags, pagep,
- blkdev_get_block);
-}
-
-static int blkdev_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
-{
- int ret;
- ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
-
- unlock_page(page);
- put_page(page);
-
- return ret;
-}
-
-/*
- * private llseek:
- * for a block special file file_inode(file)->i_size is zero
- * so we compute the size by hand (just as in block_read/write above)
- */
-static loff_t block_llseek(struct file *file, loff_t offset, int whence)
-{
- struct inode *bd_inode = bdev_file_inode(file);
- loff_t retval;
-
- inode_lock(bd_inode);
- retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
- inode_unlock(bd_inode);
- return retval;
-}
-
-static int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
- int datasync)
-{
- struct inode *bd_inode = bdev_file_inode(filp);
- struct block_device *bdev = I_BDEV(bd_inode);
- int error;
-
- error = file_write_and_wait_range(filp, start, end);
- if (error)
- return error;
-
- /*
- * There is no need to serialise calls to blkdev_issue_flush with
- * i_mutex and doing so causes performance issues with concurrent
- * O_SYNC writers to a block device.
- */
- error = blkdev_issue_flush(bdev);
- if (error == -EOPNOTSUPP)
- error = 0;
-
- return error;
-}
-
-/**
- * bdev_read_page() - Start reading a page from a block device
- * @bdev: The device to read the page from
- * @sector: The offset on the device to read the page to (need not be aligned)
- * @page: The page to read
- *
- * On entry, the page should be locked. It will be unlocked when the page
- * has been read. If the block driver implements rw_page synchronously,
- * that will be true on exit from this function, but it need not be.
- *
- * Errors returned by this function are usually "soft", eg out of memory, or
- * queue full; callers should try a different route to read this page rather
- * than propagate an error back up the stack.
- *
- * Return: negative errno if an error occurs, 0 if submission was successful.
- */
-int bdev_read_page(struct block_device *bdev, sector_t sector,
- struct page *page)
-{
- const struct block_device_operations *ops = bdev->bd_disk->fops;
- int result = -EOPNOTSUPP;
-
- if (!ops->rw_page || bdev_get_integrity(bdev))
- return result;
-
- result = blk_queue_enter(bdev->bd_disk->queue, 0);
- if (result)
- return result;
- result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
- REQ_OP_READ);
- blk_queue_exit(bdev->bd_disk->queue);
- return result;
-}
-
-/**
- * bdev_write_page() - Start writing a page to a block device
- * @bdev: The device to write the page to
- * @sector: The offset on the device to write the page to (need not be aligned)
- * @page: The page to write
- * @wbc: The writeback_control for the write
- *
- * On entry, the page should be locked and not currently under writeback.
- * On exit, if the write started successfully, the page will be unlocked and
- * under writeback. If the write failed already (eg the driver failed to
- * queue the page to the device), the page will still be locked. If the
- * caller is a ->writepage implementation, it will need to unlock the page.
- *
- * Errors returned by this function are usually "soft", eg out of memory, or
- * queue full; callers should try a different route to write this page rather
- * than propagate an error back up the stack.
- *
- * Return: negative errno if an error occurs, 0 if submission was successful.
- */
-int bdev_write_page(struct block_device *bdev, sector_t sector,
- struct page *page, struct writeback_control *wbc)
-{
- int result;
- const struct block_device_operations *ops = bdev->bd_disk->fops;
-
- if (!ops->rw_page || bdev_get_integrity(bdev))
- return -EOPNOTSUPP;
- result = blk_queue_enter(bdev->bd_disk->queue, 0);
- if (result)
- return result;
-
- set_page_writeback(page);
- result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
- REQ_OP_WRITE);
- if (result) {
- end_page_writeback(page);
- } else {
- clean_page_buffers(page);
- unlock_page(page);
- }
- blk_queue_exit(bdev->bd_disk->queue);
- return result;
-}
-
-/*
- * pseudo-fs
- */
-
-static __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
-static struct kmem_cache * bdev_cachep __read_mostly;
-
-static struct inode *bdev_alloc_inode(struct super_block *sb)
-{
- struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
-
- if (!ei)
- return NULL;
- memset(&ei->bdev, 0, sizeof(ei->bdev));
- return &ei->vfs_inode;
-}
-
-static void bdev_free_inode(struct inode *inode)
-{
- struct block_device *bdev = I_BDEV(inode);
-
- free_percpu(bdev->bd_stats);
- kfree(bdev->bd_meta_info);
-
- if (!bdev_is_partition(bdev)) {
- if (bdev->bd_disk && bdev->bd_disk->bdi)
- bdi_put(bdev->bd_disk->bdi);
- kfree(bdev->bd_disk);
- }
-
- if (MAJOR(bdev->bd_dev) == BLOCK_EXT_MAJOR)
- blk_free_ext_minor(MINOR(bdev->bd_dev));
-
- kmem_cache_free(bdev_cachep, BDEV_I(inode));
-}
-
-static void init_once(void *data)
-{
- struct bdev_inode *ei = data;
-
- inode_init_once(&ei->vfs_inode);
-}
-
-static void bdev_evict_inode(struct inode *inode)
-{
- truncate_inode_pages_final(&inode->i_data);
- invalidate_inode_buffers(inode); /* is it needed here? */
- clear_inode(inode);
-}
-
-static const struct super_operations bdev_sops = {
- .statfs = simple_statfs,
- .alloc_inode = bdev_alloc_inode,
- .free_inode = bdev_free_inode,
- .drop_inode = generic_delete_inode,
- .evict_inode = bdev_evict_inode,
-};
-
-static int bd_init_fs_context(struct fs_context *fc)
-{
- struct pseudo_fs_context *ctx = init_pseudo(fc, BDEVFS_MAGIC);
- if (!ctx)
- return -ENOMEM;
- fc->s_iflags |= SB_I_CGROUPWB;
- ctx->ops = &bdev_sops;
- return 0;
-}
-
-static struct file_system_type bd_type = {
- .name = "bdev",
- .init_fs_context = bd_init_fs_context,
- .kill_sb = kill_anon_super,
-};
-
-struct super_block *blockdev_superblock __read_mostly;
-EXPORT_SYMBOL_GPL(blockdev_superblock);
-
-void __init bdev_cache_init(void)
-{
- int err;
- static struct vfsmount *bd_mnt;
-
- bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
- 0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
- SLAB_MEM_SPREAD|SLAB_ACCOUNT|SLAB_PANIC),
- init_once);
- err = register_filesystem(&bd_type);
- if (err)
- panic("Cannot register bdev pseudo-fs");
- bd_mnt = kern_mount(&bd_type);
- if (IS_ERR(bd_mnt))
- panic("Cannot create bdev pseudo-fs");
- blockdev_superblock = bd_mnt->mnt_sb; /* For writeback */
-}
-
-struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
-{
- struct block_device *bdev;
- struct inode *inode;
-
- inode = new_inode(blockdev_superblock);
- if (!inode)
- return NULL;
- inode->i_mode = S_IFBLK;
- inode->i_rdev = 0;
- inode->i_data.a_ops = &def_blk_aops;
- mapping_set_gfp_mask(&inode->i_data, GFP_USER);
-
- bdev = I_BDEV(inode);
- mutex_init(&bdev->bd_fsfreeze_mutex);
- spin_lock_init(&bdev->bd_size_lock);
- bdev->bd_disk = disk;
- bdev->bd_partno = partno;
- bdev->bd_inode = inode;
- bdev->bd_stats = alloc_percpu(struct disk_stats);
- if (!bdev->bd_stats) {
- iput(inode);
- return NULL;
- }
- return bdev;
-}
-
-void bdev_add(struct block_device *bdev, dev_t dev)
-{
- bdev->bd_dev = dev;
- bdev->bd_inode->i_rdev = dev;
- bdev->bd_inode->i_ino = dev;
- insert_inode_hash(bdev->bd_inode);
-}
-
-long nr_blockdev_pages(void)
-{
- struct inode *inode;
- long ret = 0;
-
- spin_lock(&blockdev_superblock->s_inode_list_lock);
- list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list)
- ret += inode->i_mapping->nrpages;
- spin_unlock(&blockdev_superblock->s_inode_list_lock);
-
- return ret;
-}
-
-/**
- * bd_may_claim - test whether a block device can be claimed
- * @bdev: block device of interest
- * @whole: whole block device containing @bdev, may equal @bdev
- * @holder: holder trying to claim @bdev
- *
- * Test whether @bdev can be claimed by @holder.
- *
- * CONTEXT:
- * spin_lock(&bdev_lock).
- *
- * RETURNS:
- * %true if @bdev can be claimed, %false otherwise.
- */
-static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
- void *holder)
-{
- if (bdev->bd_holder == holder)
- return true; /* already a holder */
- else if (bdev->bd_holder != NULL)
- return false; /* held by someone else */
- else if (whole == bdev)
- return true; /* is a whole device which isn't held */
-
- else if (whole->bd_holder == bd_may_claim)
- return true; /* is a partition of a device that is being partitioned */
- else if (whole->bd_holder != NULL)
- return false; /* is a partition of a held device */
- else
- return true; /* is a partition of an un-held device */
-}
-
-/**
- * bd_prepare_to_claim - claim a block device
- * @bdev: block device of interest
- * @holder: holder trying to claim @bdev
- *
- * Claim @bdev. This function fails if @bdev is already claimed by another
- * holder and waits if another claiming is in progress. return, the caller
- * has ownership of bd_claiming and bd_holder[s].
- *
- * RETURNS:
- * 0 if @bdev can be claimed, -EBUSY otherwise.
- */
-int bd_prepare_to_claim(struct block_device *bdev, void *holder)
-{
- struct block_device *whole = bdev_whole(bdev);
-
- if (WARN_ON_ONCE(!holder))
- return -EINVAL;
-retry:
- spin_lock(&bdev_lock);
- /* if someone else claimed, fail */
- if (!bd_may_claim(bdev, whole, holder)) {
- spin_unlock(&bdev_lock);
- return -EBUSY;
- }
-
- /* if claiming is already in progress, wait for it to finish */
- if (whole->bd_claiming) {
- wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
- DEFINE_WAIT(wait);
-
- prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
- spin_unlock(&bdev_lock);
- schedule();
- finish_wait(wq, &wait);
- goto retry;
- }
-
- /* yay, all mine */
- whole->bd_claiming = holder;
- spin_unlock(&bdev_lock);
- return 0;
-}
-EXPORT_SYMBOL_GPL(bd_prepare_to_claim); /* only for the loop driver */
-
-static void bd_clear_claiming(struct block_device *whole, void *holder)
-{
- lockdep_assert_held(&bdev_lock);
- /* tell others that we're done */
- BUG_ON(whole->bd_claiming != holder);
- whole->bd_claiming = NULL;
- wake_up_bit(&whole->bd_claiming, 0);
-}
-
-/**
- * bd_finish_claiming - finish claiming of a block device
- * @bdev: block device of interest
- * @holder: holder that has claimed @bdev
- *
- * Finish exclusive open of a block device. Mark the device as exlusively
- * open by the holder and wake up all waiters for exclusive open to finish.
- */
-static void bd_finish_claiming(struct block_device *bdev, void *holder)
-{
- struct block_device *whole = bdev_whole(bdev);
-
- spin_lock(&bdev_lock);
- BUG_ON(!bd_may_claim(bdev, whole, holder));
- /*
- * Note that for a whole device bd_holders will be incremented twice,
- * and bd_holder will be set to bd_may_claim before being set to holder
- */
- whole->bd_holders++;
- whole->bd_holder = bd_may_claim;
- bdev->bd_holders++;
- bdev->bd_holder = holder;
- bd_clear_claiming(whole, holder);
- spin_unlock(&bdev_lock);
-}
-
-/**
- * bd_abort_claiming - abort claiming of a block device
- * @bdev: block device of interest
- * @holder: holder that has claimed @bdev
- *
- * Abort claiming of a block device when the exclusive open failed. This can be
- * also used when exclusive open is not actually desired and we just needed
- * to block other exclusive openers for a while.
- */
-void bd_abort_claiming(struct block_device *bdev, void *holder)
-{
- spin_lock(&bdev_lock);
- bd_clear_claiming(bdev_whole(bdev), holder);
- spin_unlock(&bdev_lock);
-}
-EXPORT_SYMBOL(bd_abort_claiming);
-
-static void blkdev_flush_mapping(struct block_device *bdev)
-{
- WARN_ON_ONCE(bdev->bd_holders);
- sync_blockdev(bdev);
- kill_bdev(bdev);
- bdev_write_inode(bdev);
-}
-
-static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
-{
- struct gendisk *disk = bdev->bd_disk;
- int ret = 0;
-
- if (disk->fops->open) {
- ret = disk->fops->open(bdev, mode);
- if (ret) {
- /* avoid ghost partitions on a removed medium */
- if (ret == -ENOMEDIUM &&
- test_bit(GD_NEED_PART_SCAN, &disk->state))
- bdev_disk_changed(disk, true);
- return ret;
- }
- }
-
- if (!bdev->bd_openers)
- set_init_blocksize(bdev);
- if (test_bit(GD_NEED_PART_SCAN, &disk->state))
- bdev_disk_changed(disk, false);
- bdev->bd_openers++;
- return 0;;
-}
-
-static void blkdev_put_whole(struct block_device *bdev, fmode_t mode)
-{
- if (!--bdev->bd_openers)
- blkdev_flush_mapping(bdev);
- if (bdev->bd_disk->fops->release)
- bdev->bd_disk->fops->release(bdev->bd_disk, mode);
-}
-
-static int blkdev_get_part(struct block_device *part, fmode_t mode)
-{
- struct gendisk *disk = part->bd_disk;
- int ret;
-
- if (part->bd_openers)
- goto done;
-
- ret = blkdev_get_whole(bdev_whole(part), mode);
- if (ret)
- return ret;
-
- ret = -ENXIO;
- if (!bdev_nr_sectors(part))
- goto out_blkdev_put;
-
- disk->open_partitions++;
- set_init_blocksize(part);
-done:
- part->bd_openers++;
- return 0;
-
-out_blkdev_put:
- blkdev_put_whole(bdev_whole(part), mode);
- return ret;
-}
-
-static void blkdev_put_part(struct block_device *part, fmode_t mode)
-{
- struct block_device *whole = bdev_whole(part);
-
- if (--part->bd_openers)
- return;
- blkdev_flush_mapping(part);
- whole->bd_disk->open_partitions--;
- blkdev_put_whole(whole, mode);
-}
-
-struct block_device *blkdev_get_no_open(dev_t dev)
-{
- struct block_device *bdev;
- struct inode *inode;
-
- inode = ilookup(blockdev_superblock, dev);
- if (!inode) {
- blk_request_module(dev);
- inode = ilookup(blockdev_superblock, dev);
- if (!inode)
- return NULL;
- }
-
- /* switch from the inode reference to a device mode one: */
- bdev = &BDEV_I(inode)->bdev;
- if (!kobject_get_unless_zero(&bdev->bd_device.kobj))
- bdev = NULL;
- iput(inode);
-
- if (!bdev)
- return NULL;
- if ((bdev->bd_disk->flags & GENHD_FL_HIDDEN) ||
- !try_module_get(bdev->bd_disk->fops->owner)) {
- put_device(&bdev->bd_device);
- return NULL;
- }
-
- return bdev;
-}
-
-void blkdev_put_no_open(struct block_device *bdev)
-{
- module_put(bdev->bd_disk->fops->owner);
- put_device(&bdev->bd_device);
-}
-
-/**
- * blkdev_get_by_dev - open a block device by device number
- * @dev: device number of block device to open
- * @mode: FMODE_* mask
- * @holder: exclusive holder identifier
- *
- * Open the block device described by device number @dev. If @mode includes
- * %FMODE_EXCL, the block device is opened with exclusive access. Specifying
- * %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may nest for
- * the same @holder.
- *
- * Use this interface ONLY if you really do not have anything better - i.e. when
- * you are behind a truly sucky interface and all you are given is a device
- * number. Everything else should use blkdev_get_by_path().
- *
- * CONTEXT:
- * Might sleep.
- *
- * RETURNS:
- * Reference to the block_device on success, ERR_PTR(-errno) on failure.
- */
-struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
-{
- bool unblock_events = true;
- struct block_device *bdev;
- struct gendisk *disk;
- int ret;
-
- ret = devcgroup_check_permission(DEVCG_DEV_BLOCK,
- MAJOR(dev), MINOR(dev),
- ((mode & FMODE_READ) ? DEVCG_ACC_READ : 0) |
- ((mode & FMODE_WRITE) ? DEVCG_ACC_WRITE : 0));
- if (ret)
- return ERR_PTR(ret);
-
- bdev = blkdev_get_no_open(dev);
- if (!bdev)
- return ERR_PTR(-ENXIO);
- disk = bdev->bd_disk;
-
- if (mode & FMODE_EXCL) {
- ret = bd_prepare_to_claim(bdev, holder);
- if (ret)
- goto put_blkdev;
- }
-
- disk_block_events(disk);
-
- mutex_lock(&disk->open_mutex);
- ret = -ENXIO;
- if (!disk_live(disk))
- goto abort_claiming;
- if (bdev_is_partition(bdev))
- ret = blkdev_get_part(bdev, mode);
- else
- ret = blkdev_get_whole(bdev, mode);
- if (ret)
- goto abort_claiming;
- if (mode & FMODE_EXCL) {
- bd_finish_claiming(bdev, holder);
-
- /*
- * Block event polling for write claims if requested. Any write
- * holder makes the write_holder state stick until all are
- * released. This is good enough and tracking individual
- * writeable reference is too fragile given the way @mode is
- * used in blkdev_get/put().
- */
- if ((mode & FMODE_WRITE) && !bdev->bd_write_holder &&
- (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
- bdev->bd_write_holder = true;
- unblock_events = false;
- }
- }
- mutex_unlock(&disk->open_mutex);
-
- if (unblock_events)
- disk_unblock_events(disk);
- return bdev;
-
-abort_claiming:
- if (mode & FMODE_EXCL)
- bd_abort_claiming(bdev, holder);
- mutex_unlock(&disk->open_mutex);
- disk_unblock_events(disk);
-put_blkdev:
- blkdev_put_no_open(bdev);
- return ERR_PTR(ret);
-}
-EXPORT_SYMBOL(blkdev_get_by_dev);
-
-/**
- * blkdev_get_by_path - open a block device by name
- * @path: path to the block device to open
- * @mode: FMODE_* mask
- * @holder: exclusive holder identifier
- *
- * Open the block device described by the device file at @path. If @mode
- * includes %FMODE_EXCL, the block device is opened with exclusive access.
- * Specifying %FMODE_EXCL with a %NULL @holder is invalid. Exclusive opens may
- * nest for the same @holder.
- *
- * CONTEXT:
- * Might sleep.
- *
- * RETURNS:
- * Reference to the block_device on success, ERR_PTR(-errno) on failure.
- */
-struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
- void *holder)
-{
- struct block_device *bdev;
- dev_t dev;
- int error;
-
- error = lookup_bdev(path, &dev);
- if (error)
- return ERR_PTR(error);
-
- bdev = blkdev_get_by_dev(dev, mode, holder);
- if (!IS_ERR(bdev) && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
- blkdev_put(bdev, mode);
- return ERR_PTR(-EACCES);
- }
-
- return bdev;
-}
-EXPORT_SYMBOL(blkdev_get_by_path);
-
-static int blkdev_open(struct inode * inode, struct file * filp)
-{
- struct block_device *bdev;
-
- /*
- * Preserve backwards compatibility and allow large file access
- * even if userspace doesn't ask for it explicitly. Some mkfs
- * binary needs it. We might want to drop this workaround
- * during an unstable branch.
- */
- filp->f_flags |= O_LARGEFILE;
-
- filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
-
- if (filp->f_flags & O_NDELAY)
- filp->f_mode |= FMODE_NDELAY;
- if (filp->f_flags & O_EXCL)
- filp->f_mode |= FMODE_EXCL;
- if ((filp->f_flags & O_ACCMODE) == 3)
- filp->f_mode |= FMODE_WRITE_IOCTL;
-
- bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp);
- if (IS_ERR(bdev))
- return PTR_ERR(bdev);
- filp->f_mapping = bdev->bd_inode->i_mapping;
- filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
- return 0;
-}
-
-void blkdev_put(struct block_device *bdev, fmode_t mode)
-{
- struct gendisk *disk = bdev->bd_disk;
-
- /*
- * Sync early if it looks like we're the last one. If someone else
- * opens the block device between now and the decrement of bd_openers
- * then we did a sync that we didn't need to, but that's not the end
- * of the world and we want to avoid long (could be several minute)
- * syncs while holding the mutex.
- */
- if (bdev->bd_openers == 1)
- sync_blockdev(bdev);
-
- mutex_lock(&disk->open_mutex);
- if (mode & FMODE_EXCL) {
- struct block_device *whole = bdev_whole(bdev);
- bool bdev_free;
-
- /*
- * Release a claim on the device. The holder fields
- * are protected with bdev_lock. open_mutex is to
- * synchronize disk_holder unlinking.
- */
- spin_lock(&bdev_lock);
-
- WARN_ON_ONCE(--bdev->bd_holders < 0);
- WARN_ON_ONCE(--whole->bd_holders < 0);
-
- if ((bdev_free = !bdev->bd_holders))
- bdev->bd_holder = NULL;
- if (!whole->bd_holders)
- whole->bd_holder = NULL;
-
- spin_unlock(&bdev_lock);
-
- /*
- * If this was the last claim, remove holder link and
- * unblock evpoll if it was a write holder.
- */
- if (bdev_free && bdev->bd_write_holder) {
- disk_unblock_events(disk);
- bdev->bd_write_holder = false;
- }
- }
-
- /*
- * Trigger event checking and tell drivers to flush MEDIA_CHANGE
- * event. This is to ensure detection of media removal commanded
- * from userland - e.g. eject(1).
- */
- disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
-
- if (bdev_is_partition(bdev))
- blkdev_put_part(bdev, mode);
- else
- blkdev_put_whole(bdev, mode);
- mutex_unlock(&disk->open_mutex);
-
- blkdev_put_no_open(bdev);
-}
-EXPORT_SYMBOL(blkdev_put);
-
-static int blkdev_close(struct inode * inode, struct file * filp)
-{
- struct block_device *bdev = I_BDEV(bdev_file_inode(filp));
- blkdev_put(bdev, filp->f_mode);
- return 0;
-}
-
-static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
-{
- struct block_device *bdev = I_BDEV(bdev_file_inode(file));
- fmode_t mode = file->f_mode;
-
- /*
- * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have
- * to updated it before every ioctl.
- */
- if (file->f_flags & O_NDELAY)
- mode |= FMODE_NDELAY;
- else
- mode &= ~FMODE_NDELAY;
-
- return blkdev_ioctl(bdev, mode, cmd, arg);
-}
-
-/*
- * Write data to the block device. Only intended for the block device itself
- * and the raw driver which basically is a fake block device.
- *
- * Does not take i_mutex for the write and thus is not for general purpose
- * use.
- */
-static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
-{
- struct file *file = iocb->ki_filp;
- struct inode *bd_inode = bdev_file_inode(file);
- loff_t size = i_size_read(bd_inode);
- struct blk_plug plug;
- size_t shorted = 0;
- ssize_t ret;
-
- if (bdev_read_only(I_BDEV(bd_inode)))
- return -EPERM;
-
- if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode->i_rdev))
- return -ETXTBSY;
-
- if (!iov_iter_count(from))
- return 0;
-
- if (iocb->ki_pos >= size)
- return -ENOSPC;
-
- if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
- return -EOPNOTSUPP;
-
- size -= iocb->ki_pos;
- if (iov_iter_count(from) > size) {
- shorted = iov_iter_count(from) - size;
- iov_iter_truncate(from, size);
- }
-
- blk_start_plug(&plug);
- ret = __generic_file_write_iter(iocb, from);
- if (ret > 0)
- ret = generic_write_sync(iocb, ret);
- iov_iter_reexpand(from, iov_iter_count(from) + shorted);
- blk_finish_plug(&plug);
- return ret;
-}
-
-static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
-{
- struct file *file = iocb->ki_filp;
- struct inode *bd_inode = bdev_file_inode(file);
- loff_t size = i_size_read(bd_inode);
- loff_t pos = iocb->ki_pos;
- size_t shorted = 0;
- ssize_t ret;
-
- if (pos >= size)
- return 0;
-
- size -= pos;
- if (iov_iter_count(to) > size) {
- shorted = iov_iter_count(to) - size;
- iov_iter_truncate(to, size);
- }
-
- ret = generic_file_read_iter(iocb, to);
- iov_iter_reexpand(to, iov_iter_count(to) + shorted);
- return ret;
-}
-
-static int blkdev_writepages(struct address_space *mapping,
- struct writeback_control *wbc)
-{
- return generic_writepages(mapping, wbc);
-}
-
-static const struct address_space_operations def_blk_aops = {
- .set_page_dirty = __set_page_dirty_buffers,
- .readpage = blkdev_readpage,
- .readahead = blkdev_readahead,
- .writepage = blkdev_writepage,
- .write_begin = blkdev_write_begin,
- .write_end = blkdev_write_end,
- .writepages = blkdev_writepages,
- .direct_IO = blkdev_direct_IO,
- .migratepage = buffer_migrate_page_norefs,
- .is_dirty_writeback = buffer_check_dirty_writeback,
-};
-
-#define BLKDEV_FALLOC_FL_SUPPORTED \
- (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
- FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
-
-static long blkdev_fallocate(struct file *file, int mode, loff_t start,
- loff_t len)
-{
- struct block_device *bdev = I_BDEV(bdev_file_inode(file));
- loff_t end = start + len - 1;
- loff_t isize;
- int error;
-
- /* Fail if we don't recognize the flags. */
- if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED)
- return -EOPNOTSUPP;
-
- /* Don't go off the end of the device. */
- isize = i_size_read(bdev->bd_inode);
- if (start >= isize)
- return -EINVAL;
- if (end >= isize) {
- if (mode & FALLOC_FL_KEEP_SIZE) {
- len = isize - start;
- end = start + len - 1;
- } else
- return -EINVAL;
- }
-
- /*
- * Don't allow IO that isn't aligned to logical block size.
- */
- if ((start | len) & (bdev_logical_block_size(bdev) - 1))
- return -EINVAL;
-
- /* Invalidate the page cache, including dirty pages. */
- error = truncate_bdev_range(bdev, file->f_mode, start, end);
- if (error)
- return error;
-
- switch (mode) {
- case FALLOC_FL_ZERO_RANGE:
- case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
- error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
- GFP_KERNEL, BLKDEV_ZERO_NOUNMAP);
- break;
- case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
- error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
- GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK);
- break;
- case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
- error = blkdev_issue_discard(bdev, start >> 9, len >> 9,
- GFP_KERNEL, 0);
- break;
- default:
- return -EOPNOTSUPP;
- }
- if (error)
- return error;
-
- /*
- * Invalidate the page cache again; if someone wandered in and dirtied
- * a page, we just discard it - userspace has no way of knowing whether
- * the write happened before or after discard completing...
- */
- return truncate_bdev_range(bdev, file->f_mode, start, end);
-}
-
-const struct file_operations def_blk_fops = {
- .open = blkdev_open,
- .release = blkdev_close,
- .llseek = block_llseek,
- .read_iter = blkdev_read_iter,
- .write_iter = blkdev_write_iter,
- .iopoll = blkdev_iopoll,
- .mmap = generic_file_mmap,
- .fsync = blkdev_fsync,
- .unlocked_ioctl = block_ioctl,
-#ifdef CONFIG_COMPAT
- .compat_ioctl = compat_blkdev_ioctl,
-#endif
- .splice_read = generic_file_splice_read,
- .splice_write = iter_file_splice_write,
- .fallocate = blkdev_fallocate,
-};
-
-/**
- * lookup_bdev - lookup a struct block_device by name
- * @pathname: special file representing the block device
- * @dev: return value of the block device's dev_t
- *
- * Get a reference to the blockdevice at @pathname in the current
- * namespace if possible and return it. Return ERR_PTR(error)
- * otherwise.
- */
-int lookup_bdev(const char *pathname, dev_t *dev)
-{
- struct inode *inode;
- struct path path;
- int error;
-
- if (!pathname || !*pathname)
- return -EINVAL;
-
- error = kern_path(pathname, LOOKUP_FOLLOW, &path);
- if (error)
- return error;
-
- inode = d_backing_inode(path.dentry);
- error = -ENOTBLK;
- if (!S_ISBLK(inode->i_mode))
- goto out_path_put;
- error = -EACCES;
- if (!may_open_dev(&path))
- goto out_path_put;
-
- *dev = inode->i_rdev;
- error = 0;
-out_path_put:
- path_put(&path);
- return error;
-}
-EXPORT_SYMBOL(lookup_bdev);
-
-int __invalidate_device(struct block_device *bdev, bool kill_dirty)
-{
- struct super_block *sb = get_super(bdev);
- int res = 0;
-
- if (sb) {
- /*
- * no need to lock the super, get_super holds the
- * read mutex so the filesystem cannot go away
- * under us (->put_super runs with the write lock
- * hold).
- */
- shrink_dcache_sb(sb);
- res = invalidate_inodes(sb, kill_dirty);
- drop_super(sb);
- }
- invalidate_bdev(bdev);
- return res;
-}
-EXPORT_SYMBOL(__invalidate_device);
-
-void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
-{
- struct inode *inode, *old_inode = NULL;
-
- spin_lock(&blockdev_superblock->s_inode_list_lock);
- list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
- struct address_space *mapping = inode->i_mapping;
- struct block_device *bdev;
-
- spin_lock(&inode->i_lock);
- if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
- mapping->nrpages == 0) {
- spin_unlock(&inode->i_lock);
- continue;
- }
- __iget(inode);
- spin_unlock(&inode->i_lock);
- spin_unlock(&blockdev_superblock->s_inode_list_lock);
- /*
- * We hold a reference to 'inode' so it couldn't have been
- * removed from s_inodes list while we dropped the
- * s_inode_list_lock We cannot iput the inode now as we can
- * be holding the last reference and we cannot iput it under
- * s_inode_list_lock. So we keep the reference and iput it
- * later.
- */
- iput(old_inode);
- old_inode = inode;
- bdev = I_BDEV(inode);
-
- mutex_lock(&bdev->bd_disk->open_mutex);
- if (bdev->bd_openers)
- func(bdev, arg);
- mutex_unlock(&bdev->bd_disk->open_mutex);
-
- spin_lock(&blockdev_superblock->s_inode_list_lock);
- }
- spin_unlock(&blockdev_superblock->s_inode_list_lock);
- iput(old_inode);
-}
#include <linux/random.h>
#include <linux/highmem.h>
#include <linux/fips.h>
-#include "../cifs_common/arc4.h"
+#include "../smbfs_common/arc4.h"
#include <crypto/aead.h>
int __cifs_calc_signature(struct smb_rqst *rqst,
#include <net/sock.h>
#include <asm/unaligned.h>
-#include "smbfsctl.h"
+#include "../smbfs_common/smbfsctl.h"
#define CIFS_PROT 0
#define POSIX_PROT (CIFS_PROT+1)
cifs_dbg(FYI, "clear cached root file handle\n");
SMB2_close(0, cfid->tcon, cfid->fid->persistent_fid,
cfid->fid->volatile_fid);
- cfid->is_valid = false;
- cfid->file_all_info_is_valid = false;
- cfid->has_lease = false;
- if (cfid->dentry) {
- dput(cfid->dentry);
- cfid->dentry = NULL;
- }
+ }
+
+ /*
+ * We only check validity above to send SMB2_close,
+ * but we still need to invalidate these entries
+ * when this function is called
+ */
+ cfid->is_valid = false;
+ cfid->file_all_info_is_valid = false;
+ cfid->has_lease = false;
+ if (cfid->dentry) {
+ dput(cfid->dentry);
+ cfid->dentry = NULL;
}
}
#include "cifsglob.h"
#include "cifs_debug.h"
#include "cifsproto.h"
-#include "../cifs_common/md4.h"
+#include "../smbfs_common/md4.h"
#ifndef false
#define false 0
+++ /dev/null
-/* SPDX-License-Identifier: LGPL-2.1 */
-/*
- * fs/cifs/smbfsctl.h: SMB, CIFS, SMB2 FSCTL definitions
- *
- * Copyright (c) International Business Machines Corp., 2002,2013
- * Author(s): Steve French (sfrench@us.ibm.com)
- *
- */
-
-/* IOCTL information */
-/*
- * List of ioctl/fsctl function codes that are or could be useful in the
- * future to remote clients like cifs or SMB2/SMB3 client. This is probably
- * a slightly larger set of fsctls that NTFS local filesystem could handle,
- * including the seven below that we do not have struct definitions for.
- * Even with protocol definitions for most of these now available, we still
- * need to do some experimentation to identify which are practical to do
- * remotely. Some of the following, such as the encryption/compression ones
- * could be invoked from tools via a specialized hook into the VFS rather
- * than via the standard vfs entry points
- *
- * See MS-SMB2 Section 2.2.31 (last checked June 2013, all of that list are
- * below). Additional detail on less common ones can be found in MS-FSCC
- * section 2.3.
- */
-
-/*
- * FSCTL values are 32 bits and are constructed as
- * <device 16bits> <access 2bits> <function 12bits> <method 2bits>
- */
-/* Device */
-#define FSCTL_DEVICE_DFS (0x0006 << 16)
-#define FSCTL_DEVICE_FILE_SYSTEM (0x0009 << 16)
-#define FSCTL_DEVICE_NAMED_PIPE (0x0011 << 16)
-#define FSCTL_DEVICE_NETWORK_FILE_SYSTEM (0x0014 << 16)
-#define FSCTL_DEVICE_MASK 0xffff0000
-/* Access */
-#define FSCTL_DEVICE_ACCESS_FILE_ANY_ACCESS (0x00 << 14)
-#define FSCTL_DEVICE_ACCESS_FILE_READ_ACCESS (0x01 << 14)
-#define FSCTL_DEVICE_ACCESS_FILE_WRITE_ACCESS (0x02 << 14)
-#define FSCTL_DEVICE_ACCESS_FILE_READ_WRITE_ACCESS (0x03 << 14)
-#define FSCTL_DEVICE_ACCESS_MASK 0x0000c000
-/* Function */
-#define FSCTL_DEVICE_FUNCTION_MASK 0x00003ffc
-/* Method */
-#define FSCTL_DEVICE_METHOD_BUFFERED 0x00
-#define FSCTL_DEVICE_METHOD_IN_DIRECT 0x01
-#define FSCTL_DEVICE_METHOD_OUT_DIRECT 0x02
-#define FSCTL_DEVICE_METHOD_NEITHER 0x03
-#define FSCTL_DEVICE_METHOD_MASK 0x00000003
-
-
-#define FSCTL_DFS_GET_REFERRALS 0x00060194
-#define FSCTL_DFS_GET_REFERRALS_EX 0x000601B0
-#define FSCTL_REQUEST_OPLOCK_LEVEL_1 0x00090000
-#define FSCTL_REQUEST_OPLOCK_LEVEL_2 0x00090004
-#define FSCTL_REQUEST_BATCH_OPLOCK 0x00090008
-#define FSCTL_LOCK_VOLUME 0x00090018
-#define FSCTL_UNLOCK_VOLUME 0x0009001C
-#define FSCTL_IS_PATHNAME_VALID 0x0009002C /* BB add struct */
-#define FSCTL_GET_COMPRESSION 0x0009003C /* BB add struct */
-#define FSCTL_SET_COMPRESSION 0x0009C040 /* BB add struct */
-#define FSCTL_QUERY_FAT_BPB 0x00090058 /* BB add struct */
-/* Verify the next FSCTL number, we had it as 0x00090090 before */
-#define FSCTL_FILESYSTEM_GET_STATS 0x00090060 /* BB add struct */
-#define FSCTL_GET_NTFS_VOLUME_DATA 0x00090064 /* BB add struct */
-#define FSCTL_GET_RETRIEVAL_POINTERS 0x00090073 /* BB add struct */
-#define FSCTL_IS_VOLUME_DIRTY 0x00090078 /* BB add struct */
-#define FSCTL_ALLOW_EXTENDED_DASD_IO 0x00090083 /* BB add struct */
-#define FSCTL_REQUEST_FILTER_OPLOCK 0x0009008C
-#define FSCTL_FIND_FILES_BY_SID 0x0009008F /* BB add struct */
-#define FSCTL_SET_OBJECT_ID 0x00090098 /* BB add struct */
-#define FSCTL_GET_OBJECT_ID 0x0009009C /* BB add struct */
-#define FSCTL_DELETE_OBJECT_ID 0x000900A0 /* BB add struct */
-#define FSCTL_SET_REPARSE_POINT 0x000900A4 /* BB add struct */
-#define FSCTL_GET_REPARSE_POINT 0x000900A8 /* BB add struct */
-#define FSCTL_DELETE_REPARSE_POINT 0x000900AC /* BB add struct */
-#define FSCTL_SET_OBJECT_ID_EXTENDED 0x000900BC /* BB add struct */
-#define FSCTL_CREATE_OR_GET_OBJECT_ID 0x000900C0 /* BB add struct */
-#define FSCTL_SET_SPARSE 0x000900C4 /* BB add struct */
-#define FSCTL_SET_ZERO_DATA 0x000980C8
-#define FSCTL_SET_ENCRYPTION 0x000900D7 /* BB add struct */
-#define FSCTL_ENCRYPTION_FSCTL_IO 0x000900DB /* BB add struct */
-#define FSCTL_WRITE_RAW_ENCRYPTED 0x000900DF /* BB add struct */
-#define FSCTL_READ_RAW_ENCRYPTED 0x000900E3 /* BB add struct */
-#define FSCTL_READ_FILE_USN_DATA 0x000900EB /* BB add struct */
-#define FSCTL_WRITE_USN_CLOSE_RECORD 0x000900EF /* BB add struct */
-#define FSCTL_SIS_COPYFILE 0x00090100 /* BB add struct */
-#define FSCTL_RECALL_FILE 0x00090117 /* BB add struct */
-#define FSCTL_QUERY_SPARING_INFO 0x00090138 /* BB add struct */
-#define FSCTL_SET_ZERO_ON_DEALLOC 0x00090194 /* BB add struct */
-#define FSCTL_SET_SHORT_NAME_BEHAVIOR 0x000901B4 /* BB add struct */
-#define FSCTL_GET_INTEGRITY_INFORMATION 0x0009027C
-#define FSCTL_GET_RETRIEVAL_POINTERS_AND_REFCOUNT 0x000903d3
-#define FSCTL_GET_RETRIEVAL_POINTER_COUNT 0x0009042b
-#define FSCTL_QUERY_ALLOCATED_RANGES 0x000940CF
-#define FSCTL_SET_DEFECT_MANAGEMENT 0x00098134 /* BB add struct */
-#define FSCTL_FILE_LEVEL_TRIM 0x00098208 /* BB add struct */
-#define FSCTL_DUPLICATE_EXTENTS_TO_FILE 0x00098344
-#define FSCTL_SIS_LINK_FILES 0x0009C104
-#define FSCTL_SET_INTEGRITY_INFORMATION 0x0009C280
-#define FSCTL_PIPE_PEEK 0x0011400C /* BB add struct */
-#define FSCTL_PIPE_TRANSCEIVE 0x0011C017 /* BB add struct */
-/* strange that the number for this op is not sequential with previous op */
-#define FSCTL_PIPE_WAIT 0x00110018 /* BB add struct */
-/* Enumerate previous versions of a file */
-#define FSCTL_SRV_ENUMERATE_SNAPSHOTS 0x00144064
-/* Retrieve an opaque file reference for server-side data movement ie copy */
-#define FSCTL_SRV_REQUEST_RESUME_KEY 0x00140078
-#define FSCTL_LMR_REQUEST_RESILIENCY 0x001401D4
-#define FSCTL_LMR_GET_LINK_TRACK_INF 0x001400E8 /* BB add struct */
-#define FSCTL_LMR_SET_LINK_TRACK_INF 0x001400EC /* BB add struct */
-#define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204
-/* Perform server-side data movement */
-#define FSCTL_SRV_COPYCHUNK 0x001440F2
-#define FSCTL_SRV_COPYCHUNK_WRITE 0x001480F2
-#define FSCTL_QUERY_NETWORK_INTERFACE_INFO 0x001401FC /* BB add struct */
-#define FSCTL_SRV_READ_HASH 0x001441BB /* BB add struct */
-
-/* See FSCC 2.1.2.5 */
-#define IO_REPARSE_TAG_MOUNT_POINT 0xA0000003
-#define IO_REPARSE_TAG_HSM 0xC0000004
-#define IO_REPARSE_TAG_SIS 0x80000007
-#define IO_REPARSE_TAG_HSM2 0x80000006
-#define IO_REPARSE_TAG_DRIVER_EXTENDER 0x80000005
-/* Used by the DFS filter. See MS-DFSC */
-#define IO_REPARSE_TAG_DFS 0x8000000A
-/* Used by the DFS filter See MS-DFSC */
-#define IO_REPARSE_TAG_DFSR 0x80000012
-#define IO_REPARSE_TAG_FILTER_MANAGER 0x8000000B
-/* See section MS-FSCC 2.1.2.4 */
-#define IO_REPARSE_TAG_SYMLINK 0xA000000C
-#define IO_REPARSE_TAG_DEDUP 0x80000013
-#define IO_REPARSE_APPXSTREAM 0xC0000014
-/* NFS symlinks, Win 8/SMB3 and later */
-#define IO_REPARSE_TAG_NFS 0x80000014
-/*
- * AzureFileSync - see
- * https://docs.microsoft.com/en-us/azure/storage/files/storage-sync-cloud-tiering
- */
-#define IO_REPARSE_TAG_AZ_FILE_SYNC 0x8000001e
-/* WSL reparse tags */
-#define IO_REPARSE_TAG_LX_SYMLINK 0xA000001D
-#define IO_REPARSE_TAG_AF_UNIX 0x80000023
-#define IO_REPARSE_TAG_LX_FIFO 0x80000024
-#define IO_REPARSE_TAG_LX_CHR 0x80000025
-#define IO_REPARSE_TAG_LX_BLK 0x80000026
-
-/* fsctl flags */
-/* If Flags is set to this value, the request is an FSCTL not ioctl request */
-#define SMB2_0_IOCTL_IS_FSCTL 0x00000001
-
+++ /dev/null
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Makefile for Linux filesystem routines that are shared by client and server.
-#
-
-obj-$(CONFIG_CIFS_COMMON) += cifs_arc4.o
-obj-$(CONFIG_CIFS_COMMON) += cifs_md4.o
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- * Common values for ARC4 Cipher Algorithm
- */
-
-#ifndef _CRYPTO_ARC4_H
-#define _CRYPTO_ARC4_H
-
-#include <linux/types.h>
-
-#define ARC4_MIN_KEY_SIZE 1
-#define ARC4_MAX_KEY_SIZE 256
-#define ARC4_BLOCK_SIZE 1
-
-struct arc4_ctx {
- u32 S[256];
- u32 x, y;
-};
-
-int cifs_arc4_setkey(struct arc4_ctx *ctx, const u8 *in_key, unsigned int key_len);
-void cifs_arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in, unsigned int len);
-
-#endif /* _CRYPTO_ARC4_H */
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Cryptographic API
- *
- * ARC4 Cipher Algorithm
- *
- * Jon Oberheide <jon@oberheide.org>
- */
-
-#include <linux/module.h>
-#include "arc4.h"
-
-MODULE_LICENSE("GPL");
-
-int cifs_arc4_setkey(struct arc4_ctx *ctx, const u8 *in_key, unsigned int key_len)
-{
- int i, j = 0, k = 0;
-
- ctx->x = 1;
- ctx->y = 0;
-
- for (i = 0; i < 256; i++)
- ctx->S[i] = i;
-
- for (i = 0; i < 256; i++) {
- u32 a = ctx->S[i];
-
- j = (j + in_key[k] + a) & 0xff;
- ctx->S[i] = ctx->S[j];
- ctx->S[j] = a;
- if (++k >= key_len)
- k = 0;
- }
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(cifs_arc4_setkey);
-
-void cifs_arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in, unsigned int len)
-{
- u32 *const S = ctx->S;
- u32 x, y, a, b;
- u32 ty, ta, tb;
-
- if (len == 0)
- return;
-
- x = ctx->x;
- y = ctx->y;
-
- a = S[x];
- y = (y + a) & 0xff;
- b = S[y];
-
- do {
- S[y] = a;
- a = (a + b) & 0xff;
- S[x] = b;
- x = (x + 1) & 0xff;
- ta = S[x];
- ty = (y + ta) & 0xff;
- tb = S[ty];
- *out++ = *in++ ^ S[a];
- if (--len == 0)
- break;
- y = ty;
- a = ta;
- b = tb;
- } while (true);
-
- ctx->x = x;
- ctx->y = y;
-}
-EXPORT_SYMBOL_GPL(cifs_arc4_crypt);
-
-static int __init
-init_cifs_common(void)
-{
- return 0;
-}
-static void __init
-exit_cifs_common(void)
-{
-}
-
-module_init(init_cifs_common)
-module_exit(exit_cifs_common)
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Cryptographic API.
- *
- * MD4 Message Digest Algorithm (RFC1320).
- *
- * Implementation derived from Andrew Tridgell and Steve French's
- * CIFS MD4 implementation, and the cryptoapi implementation
- * originally based on the public domain implementation written
- * by Colin Plumb in 1993.
- *
- * Copyright (c) Andrew Tridgell 1997-1998.
- * Modified by Steve French (sfrench@us.ibm.com) 2002
- * Copyright (c) Cryptoapi developers.
- * Copyright (c) 2002 David S. Miller (davem@redhat.com)
- * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
- *
- */
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <asm/byteorder.h>
-#include "md4.h"
-
-MODULE_LICENSE("GPL");
-
-static inline u32 lshift(u32 x, unsigned int s)
-{
- x &= 0xFFFFFFFF;
- return ((x << s) & 0xFFFFFFFF) | (x >> (32 - s));
-}
-
-static inline u32 F(u32 x, u32 y, u32 z)
-{
- return (x & y) | ((~x) & z);
-}
-
-static inline u32 G(u32 x, u32 y, u32 z)
-{
- return (x & y) | (x & z) | (y & z);
-}
-
-static inline u32 H(u32 x, u32 y, u32 z)
-{
- return x ^ y ^ z;
-}
-
-#define ROUND1(a,b,c,d,k,s) (a = lshift(a + F(b,c,d) + k, s))
-#define ROUND2(a,b,c,d,k,s) (a = lshift(a + G(b,c,d) + k + (u32)0x5A827999,s))
-#define ROUND3(a,b,c,d,k,s) (a = lshift(a + H(b,c,d) + k + (u32)0x6ED9EBA1,s))
-
-static void md4_transform(u32 *hash, u32 const *in)
-{
- u32 a, b, c, d;
-
- a = hash[0];
- b = hash[1];
- c = hash[2];
- d = hash[3];
-
- ROUND1(a, b, c, d, in[0], 3);
- ROUND1(d, a, b, c, in[1], 7);
- ROUND1(c, d, a, b, in[2], 11);
- ROUND1(b, c, d, a, in[3], 19);
- ROUND1(a, b, c, d, in[4], 3);
- ROUND1(d, a, b, c, in[5], 7);
- ROUND1(c, d, a, b, in[6], 11);
- ROUND1(b, c, d, a, in[7], 19);
- ROUND1(a, b, c, d, in[8], 3);
- ROUND1(d, a, b, c, in[9], 7);
- ROUND1(c, d, a, b, in[10], 11);
- ROUND1(b, c, d, a, in[11], 19);
- ROUND1(a, b, c, d, in[12], 3);
- ROUND1(d, a, b, c, in[13], 7);
- ROUND1(c, d, a, b, in[14], 11);
- ROUND1(b, c, d, a, in[15], 19);
-
- ROUND2(a, b, c, d, in[0], 3);
- ROUND2(d, a, b, c, in[4], 5);
- ROUND2(c, d, a, b, in[8], 9);
- ROUND2(b, c, d, a, in[12], 13);
- ROUND2(a, b, c, d, in[1], 3);
- ROUND2(d, a, b, c, in[5], 5);
- ROUND2(c, d, a, b, in[9], 9);
- ROUND2(b, c, d, a, in[13], 13);
- ROUND2(a, b, c, d, in[2], 3);
- ROUND2(d, a, b, c, in[6], 5);
- ROUND2(c, d, a, b, in[10], 9);
- ROUND2(b, c, d, a, in[14], 13);
- ROUND2(a, b, c, d, in[3], 3);
- ROUND2(d, a, b, c, in[7], 5);
- ROUND2(c, d, a, b, in[11], 9);
- ROUND2(b, c, d, a, in[15], 13);
-
- ROUND3(a, b, c, d, in[0], 3);
- ROUND3(d, a, b, c, in[8], 9);
- ROUND3(c, d, a, b, in[4], 11);
- ROUND3(b, c, d, a, in[12], 15);
- ROUND3(a, b, c, d, in[2], 3);
- ROUND3(d, a, b, c, in[10], 9);
- ROUND3(c, d, a, b, in[6], 11);
- ROUND3(b, c, d, a, in[14], 15);
- ROUND3(a, b, c, d, in[1], 3);
- ROUND3(d, a, b, c, in[9], 9);
- ROUND3(c, d, a, b, in[5], 11);
- ROUND3(b, c, d, a, in[13], 15);
- ROUND3(a, b, c, d, in[3], 3);
- ROUND3(d, a, b, c, in[11], 9);
- ROUND3(c, d, a, b, in[7], 11);
- ROUND3(b, c, d, a, in[15], 15);
-
- hash[0] += a;
- hash[1] += b;
- hash[2] += c;
- hash[3] += d;
-}
-
-static inline void md4_transform_helper(struct md4_ctx *ctx)
-{
- le32_to_cpu_array(ctx->block, ARRAY_SIZE(ctx->block));
- md4_transform(ctx->hash, ctx->block);
-}
-
-int cifs_md4_init(struct md4_ctx *mctx)
-{
- memset(mctx, 0, sizeof(struct md4_ctx));
- mctx->hash[0] = 0x67452301;
- mctx->hash[1] = 0xefcdab89;
- mctx->hash[2] = 0x98badcfe;
- mctx->hash[3] = 0x10325476;
- mctx->byte_count = 0;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(cifs_md4_init);
-
-int cifs_md4_update(struct md4_ctx *mctx, const u8 *data, unsigned int len)
-{
- const u32 avail = sizeof(mctx->block) - (mctx->byte_count & 0x3f);
-
- mctx->byte_count += len;
-
- if (avail > len) {
- memcpy((char *)mctx->block + (sizeof(mctx->block) - avail),
- data, len);
- return 0;
- }
-
- memcpy((char *)mctx->block + (sizeof(mctx->block) - avail),
- data, avail);
-
- md4_transform_helper(mctx);
- data += avail;
- len -= avail;
-
- while (len >= sizeof(mctx->block)) {
- memcpy(mctx->block, data, sizeof(mctx->block));
- md4_transform_helper(mctx);
- data += sizeof(mctx->block);
- len -= sizeof(mctx->block);
- }
-
- memcpy(mctx->block, data, len);
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(cifs_md4_update);
-
-int cifs_md4_final(struct md4_ctx *mctx, u8 *out)
-{
- const unsigned int offset = mctx->byte_count & 0x3f;
- char *p = (char *)mctx->block + offset;
- int padding = 56 - (offset + 1);
-
- *p++ = 0x80;
- if (padding < 0) {
- memset(p, 0x00, padding + sizeof(u64));
- md4_transform_helper(mctx);
- p = (char *)mctx->block;
- padding = 56;
- }
-
- memset(p, 0, padding);
- mctx->block[14] = mctx->byte_count << 3;
- mctx->block[15] = mctx->byte_count >> 29;
- le32_to_cpu_array(mctx->block, (sizeof(mctx->block) -
- sizeof(u64)) / sizeof(u32));
- md4_transform(mctx->hash, mctx->block);
- cpu_to_le32_array(mctx->hash, ARRAY_SIZE(mctx->hash));
- memcpy(out, mctx->hash, sizeof(mctx->hash));
- memset(mctx, 0, sizeof(*mctx));
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(cifs_md4_final);
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0+ */
-/*
- * Common values for ARC4 Cipher Algorithm
- */
-
-#ifndef _CIFS_MD4_H
-#define _CIFS_MD4_H
-
-#include <linux/types.h>
-
-#define MD4_DIGEST_SIZE 16
-#define MD4_HMAC_BLOCK_SIZE 64
-#define MD4_BLOCK_WORDS 16
-#define MD4_HASH_WORDS 4
-
-struct md4_ctx {
- u32 hash[MD4_HASH_WORDS];
- u32 block[MD4_BLOCK_WORDS];
- u64 byte_count;
-};
-
-
-int cifs_md4_init(struct md4_ctx *mctx);
-int cifs_md4_update(struct md4_ctx *mctx, const u8 *data, unsigned int len);
-int cifs_md4_final(struct md4_ctx *mctx, u8 *out);
-
-#endif /* _CIFS_MD4_H */
return new_fd;
}
+int receive_fd(struct file *file, unsigned int o_flags)
+{
+ return __receive_fd(file, NULL, o_flags);
+}
+EXPORT_SYMBOL_GPL(receive_fd);
+
static int ksys_dup3(unsigned int oldfd, unsigned int newfd, int flags)
{
int err = -EBADF;
return invalf(fc, "%s: not usable as path", param->key);
}
- f->refcnt++; /* filename_lookup() drops our ref. */
ret = filename_lookup(param->dirfd, f, flags, _path, NULL);
if (ret < 0) {
errorf(fc, "%s: Lookup failure for '%s'", param->key, f->name);
struct pipe_inode_info;
/*
- * block_dev.c
+ * block/bdev.c
*/
#ifdef CONFIG_BLOCK
extern void __init bdev_cache_init(void);
}
raw_spin_unlock(&wqe->lock);
io_worker_ref_put(wqe->wq);
+ kfree(worker);
return;
}
if (!io_queue_worker_create(worker, acct, create_worker_cont)) {
clear_bit_unlock(0, &worker->create_state);
io_worker_release(worker);
+ kfree(worker);
}
}
if (!IS_ERR(tsk)) {
io_init_new_worker(wqe, worker, tsk);
} else if (!io_should_retry_thread(PTR_ERR(tsk))) {
+ kfree(worker);
goto fail;
} else {
INIT_WORK(&worker->work, io_workqueue_create);
wq_list_add_after(&work->list, &tail->list, &acct->work_list);
}
+static bool io_wq_work_match_item(struct io_wq_work *work, void *data)
+{
+ return work == data;
+}
+
static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
{
struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
*/
if (test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state) ||
(work->flags & IO_WQ_WORK_CANCEL)) {
-run_cancel:
io_run_cancel(work, wqe);
return;
}
bool did_create;
did_create = io_wqe_create_worker(wqe, acct);
- if (unlikely(!did_create)) {
- raw_spin_lock(&wqe->lock);
- /* fatal condition, failed to create the first worker */
- if (!acct->nr_workers) {
- raw_spin_unlock(&wqe->lock);
- goto run_cancel;
- }
- raw_spin_unlock(&wqe->lock);
+ if (likely(did_create))
+ return;
+
+ raw_spin_lock(&wqe->lock);
+ /* fatal condition, failed to create the first worker */
+ if (!acct->nr_workers) {
+ struct io_cb_cancel_data match = {
+ .fn = io_wq_work_match_item,
+ .data = work,
+ .cancel_all = false,
+ };
+
+ if (io_acct_cancel_pending_work(wqe, acct, &match))
+ raw_spin_lock(&wqe->lock);
}
+ raw_spin_unlock(&wqe->lock);
}
}
{
struct io_worker *worker;
- if (cb->func != create_worker_cb || cb->func != create_worker_cont)
+ if (cb->func != create_worker_cb && cb->func != create_worker_cont)
return false;
worker = container_of(cb, struct io_worker, create_work);
return worker->wqe->wq == data;
while ((cb = task_work_cancel_match(wq->task, io_task_work_match, wq)) != NULL) {
struct io_worker *worker;
+ struct io_wqe_acct *acct;
worker = container_of(cb, struct io_worker, create_work);
- atomic_dec(&worker->wqe->acct[worker->create_index].nr_running);
+ acct = io_wqe_get_acct(worker);
+ atomic_dec(&acct->nr_running);
+ raw_spin_lock(&worker->wqe->lock);
+ acct->nr_workers--;
+ raw_spin_unlock(&worker->wqe->lock);
io_worker_ref_put(wq);
clear_bit_unlock(0, &worker->create_state);
io_worker_release(worker);
struct io_timeout_data *io = req->async_data;
if (hrtimer_try_to_cancel(&io->timer) != -1) {
+ if (status)
+ req_set_fail(req);
atomic_set(&req->ctx->cq_timeouts,
atomic_read(&req->ctx->cq_timeouts) + 1);
list_del_init(&req->timeout.list);
static void io_cqring_ev_posted_iopoll(struct io_ring_ctx *ctx)
{
+ /* see waitqueue_active() comment */
+ smp_mb();
+
if (ctx->flags & IORING_SETUP_SQPOLL) {
- if (wq_has_sleeper(&ctx->cq_wait))
+ if (waitqueue_active(&ctx->cq_wait))
wake_up_all(&ctx->cq_wait);
}
if (io_should_trigger_evfd(ctx))
if (ctx->flags & IORING_SETUP_SQPOLL) {
sqd = ctx->sq_data;
if (sqd) {
+ /*
+ * Observe the correct sqd->lock -> ctx->uring_lock
+ * ordering. Fine to drop uring_lock here, we hold
+ * a ref to the ctx.
+ */
+ mutex_unlock(&ctx->uring_lock);
mutex_lock(&sqd->lock);
+ mutex_lock(&ctx->uring_lock);
tctx = sqd->thread->io_uring;
}
} else {
BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8));
BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
- BUILD_BUG_ON(__REQ_F_LAST_BIT >= 8 * sizeof(int));
+ BUILD_BUG_ON(__REQ_F_LAST_BIT > 8 * sizeof(int));
req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC |
SLAB_ACCOUNT);
void putname(struct filename *name)
{
- if (IS_ERR_OR_NULL(name))
+ if (IS_ERR(name))
return;
BUG_ON(name->refcnt <= 0);
return err;
}
-static int __filename_lookup(int dfd, struct filename *name, unsigned flags,
+int filename_lookup(int dfd, struct filename *name, unsigned flags,
struct path *path, struct path *root)
{
int retval;
return retval;
}
-int filename_lookup(int dfd, struct filename *name, unsigned flags,
- struct path *path, struct path *root)
-{
- int retval = __filename_lookup(dfd, name, flags, path, root);
-
- putname(name);
- return retval;
-}
-
/* Returns 0 and nd will be valid on success; Retuns error, otherwise. */
static int path_parentat(struct nameidata *nd, unsigned flags,
struct path *parent)
return err;
}
-static int __filename_parentat(int dfd, struct filename *name,
- unsigned int flags, struct path *parent,
- struct qstr *last, int *type)
+/* Note: this does not consume "name" */
+static int filename_parentat(int dfd, struct filename *name,
+ unsigned int flags, struct path *parent,
+ struct qstr *last, int *type)
{
int retval;
struct nameidata nd;
return retval;
}
-static int filename_parentat(int dfd, struct filename *name,
- unsigned int flags, struct path *parent,
- struct qstr *last, int *type)
-{
- int retval = __filename_parentat(dfd, name, flags, parent, last, type);
-
- putname(name);
- return retval;
-}
-
/* does lookup, returns the object with parent locked */
-struct dentry *kern_path_locked(const char *name, struct path *path)
+static struct dentry *__kern_path_locked(struct filename *name, struct path *path)
{
struct dentry *d;
struct qstr last;
int type, error;
- error = filename_parentat(AT_FDCWD, getname_kernel(name), 0, path,
- &last, &type);
+ error = filename_parentat(AT_FDCWD, name, 0, path, &last, &type);
if (error)
return ERR_PTR(error);
if (unlikely(type != LAST_NORM)) {
return d;
}
+struct dentry *kern_path_locked(const char *name, struct path *path)
+{
+ struct filename *filename = getname_kernel(name);
+ struct dentry *res = __kern_path_locked(filename, path);
+
+ putname(filename);
+ return res;
+}
+
int kern_path(const char *name, unsigned int flags, struct path *path)
{
- return filename_lookup(AT_FDCWD, getname_kernel(name),
- flags, path, NULL);
+ struct filename *filename = getname_kernel(name);
+ int ret = filename_lookup(AT_FDCWD, filename, flags, path, NULL);
+
+ putname(filename);
+ return ret;
+
}
EXPORT_SYMBOL(kern_path);
const char *name, unsigned int flags,
struct path *path)
{
+ struct filename *filename;
struct path root = {.mnt = mnt, .dentry = dentry};
+ int ret;
+
+ filename = getname_kernel(name);
/* the first argument of filename_lookup() is ignored with root */
- return filename_lookup(AT_FDCWD, getname_kernel(name),
- flags , path, &root);
+ ret = filename_lookup(AT_FDCWD, filename, flags, path, &root);
+ putname(filename);
+ return ret;
}
EXPORT_SYMBOL(vfs_path_lookup);
int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
struct path *path, int *empty)
{
- return filename_lookup(dfd, getname_flags(name, flags, empty),
- flags, path, NULL);
+ struct filename *filename = getname_flags(name, flags, empty);
+ int ret = filename_lookup(dfd, filename, flags, path, NULL);
+
+ putname(filename);
+ return ret;
}
EXPORT_SYMBOL(user_path_at_empty);
return file;
}
-static struct dentry *__filename_create(int dfd, struct filename *name,
- struct path *path, unsigned int lookup_flags)
+static struct dentry *filename_create(int dfd, struct filename *name,
+ struct path *path, unsigned int lookup_flags)
{
struct dentry *dentry = ERR_PTR(-EEXIST);
struct qstr last;
*/
lookup_flags &= LOOKUP_REVAL;
- error = __filename_parentat(dfd, name, lookup_flags, path, &last, &type);
+ error = filename_parentat(dfd, name, lookup_flags, path, &last, &type);
if (error)
return ERR_PTR(error);
return dentry;
}
-static inline struct dentry *filename_create(int dfd, struct filename *name,
+struct dentry *kern_path_create(int dfd, const char *pathname,
struct path *path, unsigned int lookup_flags)
{
- struct dentry *res = __filename_create(dfd, name, path, lookup_flags);
+ struct filename *filename = getname_kernel(pathname);
+ struct dentry *res = filename_create(dfd, filename, path, lookup_flags);
- putname(name);
+ putname(filename);
return res;
}
-
-struct dentry *kern_path_create(int dfd, const char *pathname,
- struct path *path, unsigned int lookup_flags)
-{
- return filename_create(dfd, getname_kernel(pathname),
- path, lookup_flags);
-}
EXPORT_SYMBOL(kern_path_create);
void done_path_create(struct path *path, struct dentry *dentry)
inline struct dentry *user_path_create(int dfd, const char __user *pathname,
struct path *path, unsigned int lookup_flags)
{
- return filename_create(dfd, getname(pathname), path, lookup_flags);
+ struct filename *filename = getname(pathname);
+ struct dentry *res = filename_create(dfd, filename, path, lookup_flags);
+
+ putname(filename);
+ return res;
}
EXPORT_SYMBOL(user_path_create);
if (error)
goto out1;
retry:
- dentry = __filename_create(dfd, name, &path, lookup_flags);
+ dentry = filename_create(dfd, name, &path, lookup_flags);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto out1;
unsigned int lookup_flags = LOOKUP_DIRECTORY;
retry:
- dentry = __filename_create(dfd, name, &path, lookup_flags);
+ dentry = filename_create(dfd, name, &path, lookup_flags);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto out_putname;
int type;
unsigned int lookup_flags = 0;
retry:
- error = __filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
+ error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
if (error)
goto exit1;
struct inode *delegated_inode = NULL;
unsigned int lookup_flags = 0;
retry:
- error = __filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
+ error = filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
if (error)
goto exit1;
goto out_putnames;
}
retry:
- dentry = __filename_create(newdfd, to, &path, lookup_flags);
+ dentry = filename_create(newdfd, to, &path, lookup_flags);
error = PTR_ERR(dentry);
if (IS_ERR(dentry))
goto out_putnames;
if (flags & AT_SYMLINK_FOLLOW)
how |= LOOKUP_FOLLOW;
retry:
- error = __filename_lookup(olddfd, old, how, &old_path, NULL);
+ error = filename_lookup(olddfd, old, how, &old_path, NULL);
if (error)
goto out_putnames;
- new_dentry = __filename_create(newdfd, new, &new_path,
+ new_dentry = filename_create(newdfd, new, &new_path,
(how & LOOKUP_REVAL));
error = PTR_ERR(new_dentry);
if (IS_ERR(new_dentry))
target_flags = 0;
retry:
- error = __filename_parentat(olddfd, from, lookup_flags, &old_path,
- &old_last, &old_type);
+ error = filename_parentat(olddfd, from, lookup_flags, &old_path,
+ &old_last, &old_type);
if (error)
goto put_names;
- error = __filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last,
- &new_type);
+ error = filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last,
+ &new_type);
if (error)
goto exit1;
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Makefile for Linux filesystem routines that are shared by client and server.
+#
+
+obj-$(CONFIG_SMBFS_COMMON) += cifs_arc4.o
+obj-$(CONFIG_SMBFS_COMMON) += cifs_md4.o
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Common values for ARC4 Cipher Algorithm
+ */
+
+#ifndef _CRYPTO_ARC4_H
+#define _CRYPTO_ARC4_H
+
+#include <linux/types.h>
+
+#define ARC4_MIN_KEY_SIZE 1
+#define ARC4_MAX_KEY_SIZE 256
+#define ARC4_BLOCK_SIZE 1
+
+struct arc4_ctx {
+ u32 S[256];
+ u32 x, y;
+};
+
+int cifs_arc4_setkey(struct arc4_ctx *ctx, const u8 *in_key, unsigned int key_len);
+void cifs_arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in, unsigned int len);
+
+#endif /* _CRYPTO_ARC4_H */
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Cryptographic API
+ *
+ * ARC4 Cipher Algorithm
+ *
+ * Jon Oberheide <jon@oberheide.org>
+ */
+
+#include <linux/module.h>
+#include "arc4.h"
+
+MODULE_LICENSE("GPL");
+
+int cifs_arc4_setkey(struct arc4_ctx *ctx, const u8 *in_key, unsigned int key_len)
+{
+ int i, j = 0, k = 0;
+
+ ctx->x = 1;
+ ctx->y = 0;
+
+ for (i = 0; i < 256; i++)
+ ctx->S[i] = i;
+
+ for (i = 0; i < 256; i++) {
+ u32 a = ctx->S[i];
+
+ j = (j + in_key[k] + a) & 0xff;
+ ctx->S[i] = ctx->S[j];
+ ctx->S[j] = a;
+ if (++k >= key_len)
+ k = 0;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cifs_arc4_setkey);
+
+void cifs_arc4_crypt(struct arc4_ctx *ctx, u8 *out, const u8 *in, unsigned int len)
+{
+ u32 *const S = ctx->S;
+ u32 x, y, a, b;
+ u32 ty, ta, tb;
+
+ if (len == 0)
+ return;
+
+ x = ctx->x;
+ y = ctx->y;
+
+ a = S[x];
+ y = (y + a) & 0xff;
+ b = S[y];
+
+ do {
+ S[y] = a;
+ a = (a + b) & 0xff;
+ S[x] = b;
+ x = (x + 1) & 0xff;
+ ta = S[x];
+ ty = (y + ta) & 0xff;
+ tb = S[ty];
+ *out++ = *in++ ^ S[a];
+ if (--len == 0)
+ break;
+ y = ty;
+ a = ta;
+ b = tb;
+ } while (true);
+
+ ctx->x = x;
+ ctx->y = y;
+}
+EXPORT_SYMBOL_GPL(cifs_arc4_crypt);
+
+static int __init
+init_smbfs_common(void)
+{
+ return 0;
+}
+static void __init
+exit_smbfs_common(void)
+{
+}
+
+module_init(init_smbfs_common)
+module_exit(exit_smbfs_common)
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Cryptographic API.
+ *
+ * MD4 Message Digest Algorithm (RFC1320).
+ *
+ * Implementation derived from Andrew Tridgell and Steve French's
+ * CIFS MD4 implementation, and the cryptoapi implementation
+ * originally based on the public domain implementation written
+ * by Colin Plumb in 1993.
+ *
+ * Copyright (c) Andrew Tridgell 1997-1998.
+ * Modified by Steve French (sfrench@us.ibm.com) 2002
+ * Copyright (c) Cryptoapi developers.
+ * Copyright (c) 2002 David S. Miller (davem@redhat.com)
+ * Copyright (c) 2002 James Morris <jmorris@intercode.com.au>
+ *
+ */
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include "md4.h"
+
+MODULE_LICENSE("GPL");
+
+static inline u32 lshift(u32 x, unsigned int s)
+{
+ x &= 0xFFFFFFFF;
+ return ((x << s) & 0xFFFFFFFF) | (x >> (32 - s));
+}
+
+static inline u32 F(u32 x, u32 y, u32 z)
+{
+ return (x & y) | ((~x) & z);
+}
+
+static inline u32 G(u32 x, u32 y, u32 z)
+{
+ return (x & y) | (x & z) | (y & z);
+}
+
+static inline u32 H(u32 x, u32 y, u32 z)
+{
+ return x ^ y ^ z;
+}
+
+#define ROUND1(a,b,c,d,k,s) (a = lshift(a + F(b,c,d) + k, s))
+#define ROUND2(a,b,c,d,k,s) (a = lshift(a + G(b,c,d) + k + (u32)0x5A827999,s))
+#define ROUND3(a,b,c,d,k,s) (a = lshift(a + H(b,c,d) + k + (u32)0x6ED9EBA1,s))
+
+static void md4_transform(u32 *hash, u32 const *in)
+{
+ u32 a, b, c, d;
+
+ a = hash[0];
+ b = hash[1];
+ c = hash[2];
+ d = hash[3];
+
+ ROUND1(a, b, c, d, in[0], 3);
+ ROUND1(d, a, b, c, in[1], 7);
+ ROUND1(c, d, a, b, in[2], 11);
+ ROUND1(b, c, d, a, in[3], 19);
+ ROUND1(a, b, c, d, in[4], 3);
+ ROUND1(d, a, b, c, in[5], 7);
+ ROUND1(c, d, a, b, in[6], 11);
+ ROUND1(b, c, d, a, in[7], 19);
+ ROUND1(a, b, c, d, in[8], 3);
+ ROUND1(d, a, b, c, in[9], 7);
+ ROUND1(c, d, a, b, in[10], 11);
+ ROUND1(b, c, d, a, in[11], 19);
+ ROUND1(a, b, c, d, in[12], 3);
+ ROUND1(d, a, b, c, in[13], 7);
+ ROUND1(c, d, a, b, in[14], 11);
+ ROUND1(b, c, d, a, in[15], 19);
+
+ ROUND2(a, b, c, d, in[0], 3);
+ ROUND2(d, a, b, c, in[4], 5);
+ ROUND2(c, d, a, b, in[8], 9);
+ ROUND2(b, c, d, a, in[12], 13);
+ ROUND2(a, b, c, d, in[1], 3);
+ ROUND2(d, a, b, c, in[5], 5);
+ ROUND2(c, d, a, b, in[9], 9);
+ ROUND2(b, c, d, a, in[13], 13);
+ ROUND2(a, b, c, d, in[2], 3);
+ ROUND2(d, a, b, c, in[6], 5);
+ ROUND2(c, d, a, b, in[10], 9);
+ ROUND2(b, c, d, a, in[14], 13);
+ ROUND2(a, b, c, d, in[3], 3);
+ ROUND2(d, a, b, c, in[7], 5);
+ ROUND2(c, d, a, b, in[11], 9);
+ ROUND2(b, c, d, a, in[15], 13);
+
+ ROUND3(a, b, c, d, in[0], 3);
+ ROUND3(d, a, b, c, in[8], 9);
+ ROUND3(c, d, a, b, in[4], 11);
+ ROUND3(b, c, d, a, in[12], 15);
+ ROUND3(a, b, c, d, in[2], 3);
+ ROUND3(d, a, b, c, in[10], 9);
+ ROUND3(c, d, a, b, in[6], 11);
+ ROUND3(b, c, d, a, in[14], 15);
+ ROUND3(a, b, c, d, in[1], 3);
+ ROUND3(d, a, b, c, in[9], 9);
+ ROUND3(c, d, a, b, in[5], 11);
+ ROUND3(b, c, d, a, in[13], 15);
+ ROUND3(a, b, c, d, in[3], 3);
+ ROUND3(d, a, b, c, in[11], 9);
+ ROUND3(c, d, a, b, in[7], 11);
+ ROUND3(b, c, d, a, in[15], 15);
+
+ hash[0] += a;
+ hash[1] += b;
+ hash[2] += c;
+ hash[3] += d;
+}
+
+static inline void md4_transform_helper(struct md4_ctx *ctx)
+{
+ le32_to_cpu_array(ctx->block, ARRAY_SIZE(ctx->block));
+ md4_transform(ctx->hash, ctx->block);
+}
+
+int cifs_md4_init(struct md4_ctx *mctx)
+{
+ memset(mctx, 0, sizeof(struct md4_ctx));
+ mctx->hash[0] = 0x67452301;
+ mctx->hash[1] = 0xefcdab89;
+ mctx->hash[2] = 0x98badcfe;
+ mctx->hash[3] = 0x10325476;
+ mctx->byte_count = 0;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cifs_md4_init);
+
+int cifs_md4_update(struct md4_ctx *mctx, const u8 *data, unsigned int len)
+{
+ const u32 avail = sizeof(mctx->block) - (mctx->byte_count & 0x3f);
+
+ mctx->byte_count += len;
+
+ if (avail > len) {
+ memcpy((char *)mctx->block + (sizeof(mctx->block) - avail),
+ data, len);
+ return 0;
+ }
+
+ memcpy((char *)mctx->block + (sizeof(mctx->block) - avail),
+ data, avail);
+
+ md4_transform_helper(mctx);
+ data += avail;
+ len -= avail;
+
+ while (len >= sizeof(mctx->block)) {
+ memcpy(mctx->block, data, sizeof(mctx->block));
+ md4_transform_helper(mctx);
+ data += sizeof(mctx->block);
+ len -= sizeof(mctx->block);
+ }
+
+ memcpy(mctx->block, data, len);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cifs_md4_update);
+
+int cifs_md4_final(struct md4_ctx *mctx, u8 *out)
+{
+ const unsigned int offset = mctx->byte_count & 0x3f;
+ char *p = (char *)mctx->block + offset;
+ int padding = 56 - (offset + 1);
+
+ *p++ = 0x80;
+ if (padding < 0) {
+ memset(p, 0x00, padding + sizeof(u64));
+ md4_transform_helper(mctx);
+ p = (char *)mctx->block;
+ padding = 56;
+ }
+
+ memset(p, 0, padding);
+ mctx->block[14] = mctx->byte_count << 3;
+ mctx->block[15] = mctx->byte_count >> 29;
+ le32_to_cpu_array(mctx->block, (sizeof(mctx->block) -
+ sizeof(u64)) / sizeof(u32));
+ md4_transform(mctx->hash, mctx->block);
+ cpu_to_le32_array(mctx->hash, ARRAY_SIZE(mctx->hash));
+ memcpy(out, mctx->hash, sizeof(mctx->hash));
+ memset(mctx, 0, sizeof(*mctx));
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(cifs_md4_final);
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Common values for ARC4 Cipher Algorithm
+ */
+
+#ifndef _CIFS_MD4_H
+#define _CIFS_MD4_H
+
+#include <linux/types.h>
+
+#define MD4_DIGEST_SIZE 16
+#define MD4_HMAC_BLOCK_SIZE 64
+#define MD4_BLOCK_WORDS 16
+#define MD4_HASH_WORDS 4
+
+struct md4_ctx {
+ u32 hash[MD4_HASH_WORDS];
+ u32 block[MD4_BLOCK_WORDS];
+ u64 byte_count;
+};
+
+
+int cifs_md4_init(struct md4_ctx *mctx);
+int cifs_md4_update(struct md4_ctx *mctx, const u8 *data, unsigned int len);
+int cifs_md4_final(struct md4_ctx *mctx, u8 *out);
+
+#endif /* _CIFS_MD4_H */
--- /dev/null
+/* SPDX-License-Identifier: LGPL-2.1+ */
+/*
+ * fs/cifs/smbfsctl.h: SMB, CIFS, SMB2 FSCTL definitions
+ *
+ * Copyright (c) International Business Machines Corp., 2002,2013
+ * Author(s): Steve French (sfrench@us.ibm.com)
+ *
+ */
+
+/* IOCTL information */
+/*
+ * List of ioctl/fsctl function codes that are or could be useful in the
+ * future to remote clients like cifs or SMB2/SMB3 client. This is probably
+ * a slightly larger set of fsctls that NTFS local filesystem could handle,
+ * including the seven below that we do not have struct definitions for.
+ * Even with protocol definitions for most of these now available, we still
+ * need to do some experimentation to identify which are practical to do
+ * remotely. Some of the following, such as the encryption/compression ones
+ * could be invoked from tools via a specialized hook into the VFS rather
+ * than via the standard vfs entry points
+ *
+ * See MS-SMB2 Section 2.2.31 (last checked September 2021, all of that list are
+ * below). Additional detail on less common ones can be found in MS-FSCC
+ * section 2.3.
+ */
+
+#ifndef __SMBFSCTL_H
+#define __SMBFSCTL_H
+
+/*
+ * FSCTL values are 32 bits and are constructed as
+ * <device 16bits> <access 2bits> <function 12bits> <method 2bits>
+ */
+/* Device */
+#define FSCTL_DEVICE_DFS (0x0006 << 16)
+#define FSCTL_DEVICE_FILE_SYSTEM (0x0009 << 16)
+#define FSCTL_DEVICE_NAMED_PIPE (0x0011 << 16)
+#define FSCTL_DEVICE_NETWORK_FILE_SYSTEM (0x0014 << 16)
+#define FSCTL_DEVICE_MASK 0xffff0000
+/* Access */
+#define FSCTL_DEVICE_ACCESS_FILE_ANY_ACCESS (0x00 << 14)
+#define FSCTL_DEVICE_ACCESS_FILE_READ_ACCESS (0x01 << 14)
+#define FSCTL_DEVICE_ACCESS_FILE_WRITE_ACCESS (0x02 << 14)
+#define FSCTL_DEVICE_ACCESS_FILE_READ_WRITE_ACCESS (0x03 << 14)
+#define FSCTL_DEVICE_ACCESS_MASK 0x0000c000
+/* Function */
+#define FSCTL_DEVICE_FUNCTION_MASK 0x00003ffc
+/* Method */
+#define FSCTL_DEVICE_METHOD_BUFFERED 0x00
+#define FSCTL_DEVICE_METHOD_IN_DIRECT 0x01
+#define FSCTL_DEVICE_METHOD_OUT_DIRECT 0x02
+#define FSCTL_DEVICE_METHOD_NEITHER 0x03
+#define FSCTL_DEVICE_METHOD_MASK 0x00000003
+
+
+#define FSCTL_DFS_GET_REFERRALS 0x00060194
+#define FSCTL_DFS_GET_REFERRALS_EX 0x000601B0
+#define FSCTL_REQUEST_OPLOCK_LEVEL_1 0x00090000
+#define FSCTL_REQUEST_OPLOCK_LEVEL_2 0x00090004
+#define FSCTL_REQUEST_BATCH_OPLOCK 0x00090008
+#define FSCTL_LOCK_VOLUME 0x00090018
+#define FSCTL_UNLOCK_VOLUME 0x0009001C
+#define FSCTL_IS_PATHNAME_VALID 0x0009002C /* BB add struct */
+#define FSCTL_GET_COMPRESSION 0x0009003C /* BB add struct */
+#define FSCTL_SET_COMPRESSION 0x0009C040 /* BB add struct */
+#define FSCTL_QUERY_FAT_BPB 0x00090058 /* BB add struct */
+/* Verify the next FSCTL number, we had it as 0x00090090 before */
+#define FSCTL_FILESYSTEM_GET_STATS 0x00090060 /* BB add struct */
+#define FSCTL_GET_NTFS_VOLUME_DATA 0x00090064 /* BB add struct */
+#define FSCTL_GET_RETRIEVAL_POINTERS 0x00090073 /* BB add struct */
+#define FSCTL_IS_VOLUME_DIRTY 0x00090078 /* BB add struct */
+#define FSCTL_ALLOW_EXTENDED_DASD_IO 0x00090083 /* BB add struct */
+#define FSCTL_REQUEST_FILTER_OPLOCK 0x0009008C
+#define FSCTL_FIND_FILES_BY_SID 0x0009008F /* BB add struct */
+#define FSCTL_SET_OBJECT_ID 0x00090098 /* BB add struct */
+#define FSCTL_GET_OBJECT_ID 0x0009009C /* BB add struct */
+#define FSCTL_DELETE_OBJECT_ID 0x000900A0 /* BB add struct */
+#define FSCTL_SET_REPARSE_POINT 0x000900A4 /* BB add struct */
+#define FSCTL_GET_REPARSE_POINT 0x000900A8 /* BB add struct */
+#define FSCTL_DELETE_REPARSE_POINT 0x000900AC /* BB add struct */
+#define FSCTL_SET_OBJECT_ID_EXTENDED 0x000900BC /* BB add struct */
+#define FSCTL_CREATE_OR_GET_OBJECT_ID 0x000900C0 /* BB add struct */
+#define FSCTL_SET_SPARSE 0x000900C4 /* BB add struct */
+#define FSCTL_SET_ZERO_DATA 0x000980C8
+#define FSCTL_SET_ENCRYPTION 0x000900D7 /* BB add struct */
+#define FSCTL_ENCRYPTION_FSCTL_IO 0x000900DB /* BB add struct */
+#define FSCTL_WRITE_RAW_ENCRYPTED 0x000900DF /* BB add struct */
+#define FSCTL_READ_RAW_ENCRYPTED 0x000900E3 /* BB add struct */
+#define FSCTL_READ_FILE_USN_DATA 0x000900EB /* BB add struct */
+#define FSCTL_WRITE_USN_CLOSE_RECORD 0x000900EF /* BB add struct */
+#define FSCTL_SIS_COPYFILE 0x00090100 /* BB add struct */
+#define FSCTL_RECALL_FILE 0x00090117 /* BB add struct */
+#define FSCTL_QUERY_SPARING_INFO 0x00090138 /* BB add struct */
+#define FSCTL_SET_ZERO_ON_DEALLOC 0x00090194 /* BB add struct */
+#define FSCTL_SET_SHORT_NAME_BEHAVIOR 0x000901B4 /* BB add struct */
+#define FSCTL_GET_INTEGRITY_INFORMATION 0x0009027C
+#define FSCTL_GET_REFS_VOLUME_DATA 0x000902D8 /* See MS-FSCC 2.3.24 */
+#define FSCTL_GET_RETRIEVAL_POINTERS_AND_REFCOUNT 0x000903d3
+#define FSCTL_GET_RETRIEVAL_POINTER_COUNT 0x0009042b
+#define FSCTL_QUERY_ALLOCATED_RANGES 0x000940CF
+#define FSCTL_SET_DEFECT_MANAGEMENT 0x00098134 /* BB add struct */
+#define FSCTL_FILE_LEVEL_TRIM 0x00098208 /* BB add struct */
+#define FSCTL_DUPLICATE_EXTENTS_TO_FILE 0x00098344
+#define FSCTL_SIS_LINK_FILES 0x0009C104
+#define FSCTL_SET_INTEGRITY_INFORMATION 0x0009C280
+#define FSCTL_PIPE_PEEK 0x0011400C /* BB add struct */
+#define FSCTL_PIPE_TRANSCEIVE 0x0011C017 /* BB add struct */
+/* strange that the number for this op is not sequential with previous op */
+#define FSCTL_PIPE_WAIT 0x00110018 /* BB add struct */
+/* Enumerate previous versions of a file */
+#define FSCTL_SRV_ENUMERATE_SNAPSHOTS 0x00144064
+/* Retrieve an opaque file reference for server-side data movement ie copy */
+#define FSCTL_SRV_REQUEST_RESUME_KEY 0x00140078
+#define FSCTL_LMR_REQUEST_RESILIENCY 0x001401D4
+#define FSCTL_LMR_GET_LINK_TRACK_INF 0x001400E8 /* BB add struct */
+#define FSCTL_LMR_SET_LINK_TRACK_INF 0x001400EC /* BB add struct */
+#define FSCTL_VALIDATE_NEGOTIATE_INFO 0x00140204
+/* Perform server-side data movement */
+#define FSCTL_SRV_COPYCHUNK 0x001440F2
+#define FSCTL_SRV_COPYCHUNK_WRITE 0x001480F2
+#define FSCTL_QUERY_NETWORK_INTERFACE_INFO 0x001401FC /* BB add struct */
+#define FSCTL_SRV_READ_HASH 0x001441BB /* BB add struct */
+
+/* See FSCC 2.1.2.5 */
+#define IO_REPARSE_TAG_MOUNT_POINT 0xA0000003
+#define IO_REPARSE_TAG_HSM 0xC0000004
+#define IO_REPARSE_TAG_SIS 0x80000007
+#define IO_REPARSE_TAG_HSM2 0x80000006
+#define IO_REPARSE_TAG_DRIVER_EXTENDER 0x80000005
+/* Used by the DFS filter. See MS-DFSC */
+#define IO_REPARSE_TAG_DFS 0x8000000A
+/* Used by the DFS filter See MS-DFSC */
+#define IO_REPARSE_TAG_DFSR 0x80000012
+#define IO_REPARSE_TAG_FILTER_MANAGER 0x8000000B
+/* See section MS-FSCC 2.1.2.4 */
+#define IO_REPARSE_TAG_SYMLINK 0xA000000C
+#define IO_REPARSE_TAG_DEDUP 0x80000013
+#define IO_REPARSE_APPXSTREAM 0xC0000014
+/* NFS symlinks, Win 8/SMB3 and later */
+#define IO_REPARSE_TAG_NFS 0x80000014
+/*
+ * AzureFileSync - see
+ * https://docs.microsoft.com/en-us/azure/storage/files/storage-sync-cloud-tiering
+ */
+#define IO_REPARSE_TAG_AZ_FILE_SYNC 0x8000001e
+/* WSL reparse tags */
+#define IO_REPARSE_TAG_LX_SYMLINK 0xA000001D
+#define IO_REPARSE_TAG_AF_UNIX 0x80000023
+#define IO_REPARSE_TAG_LX_FIFO 0x80000024
+#define IO_REPARSE_TAG_LX_CHR 0x80000025
+#define IO_REPARSE_TAG_LX_BLK 0x80000026
+
+#define IO_REPARSE_TAG_LX_SYMLINK_LE cpu_to_le32(0xA000001D)
+#define IO_REPARSE_TAG_AF_UNIX_LE cpu_to_le32(0x80000023)
+#define IO_REPARSE_TAG_LX_FIFO_LE cpu_to_le32(0x80000024)
+#define IO_REPARSE_TAG_LX_CHR_LE cpu_to_le32(0x80000025)
+#define IO_REPARSE_TAG_LX_BLK_LE cpu_to_le32(0x80000026)
+
+/* fsctl flags */
+/* If Flags is set to this value, the request is an FSCTL not ioctl request */
+#define SMB2_0_IOCTL_IS_FSCTL 0x00000001
+#endif /* __SMBFSCTL_H */
}
/* Tree-based key-value access APIs */
-struct xbc_node * __init xbc_node_find_child(struct xbc_node *parent,
+struct xbc_node * __init xbc_node_find_subkey(struct xbc_node *parent,
const char *key);
const char * __init xbc_node_find_value(struct xbc_node *parent,
*/
static inline struct xbc_node * __init xbc_find_node(const char *key)
{
- return xbc_node_find_child(NULL, key);
+ return xbc_node_find_subkey(NULL, key);
}
/**
extern int __receive_fd(struct file *file, int __user *ufd,
unsigned int o_flags);
+
+extern int receive_fd(struct file *file, unsigned int o_flags);
+
static inline int receive_fd_user(struct file *file, int __user *ufd,
unsigned int o_flags)
{
return -EFAULT;
return __receive_fd(file, ufd, o_flags);
}
-static inline int receive_fd(struct file *file, unsigned int o_flags)
-{
- return __receive_fd(file, NULL, o_flags);
-}
int receive_fd_replace(int new_fd, struct file *file, unsigned int o_flags);
extern void flush_delayed_fput(void);
void *pwm_get_chip_data(struct pwm_device *pwm);
int pwmchip_add(struct pwm_chip *chip);
-int pwmchip_remove(struct pwm_chip *chip);
+void pwmchip_remove(struct pwm_chip *chip);
int devm_pwmchip_add(struct device *dev, struct pwm_chip *chip);
u32 *resp);
extern int qcom_scm_qsmmu500_wait_safe_toggle(bool en);
+
+extern int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val,
+ u64 limit_node, u32 node_id, u64 version);
+extern int qcom_scm_lmh_profile_change(u32 profile_id);
+extern bool qcom_scm_lmh_dcvsh_available(void);
+
#else
#include <linux/errno.h>
static inline int qcom_scm_qsmmu500_wait_safe_toggle(bool en)
{ return -ENODEV; }
+
+static inline int qcom_scm_lmh_dcvsh(u32 payload_fn, u32 payload_reg, u32 payload_val,
+ u64 limit_node, u32 node_id, u64 version)
+ { return -ENODEV; }
+
+static inline int qcom_scm_lmh_profile_change(u32 profile_id) { return -ENODEV; }
+
+static inline bool qcom_scm_lmh_dcvsh_available(void) { return -ENODEV; }
#endif
#endif
#define DECLARE_RWSEM(lockname) \
struct rw_semaphore lockname = __RWSEM_INITIALIZER(lockname)
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-extern void __rwsem_init(struct rw_semaphore *rwsem, const char *name,
+extern void __init_rwsem(struct rw_semaphore *rwsem, const char *name,
struct lock_class_key *key);
-#else
-static inline void __rwsem_init(struct rw_semaphore *rwsem, const char *name,
- struct lock_class_key *key)
-{
-}
-#endif
#define init_rwsem(sem) \
do { \
static struct lock_class_key __key; \
\
- init_rwbase_rt(&(sem)->rwbase); \
- __rwsem_init((sem), #sem, &__key); \
+ __init_rwsem((sem), #sem, &__key); \
} while (0)
static __always_inline int rwsem_is_locked(struct rw_semaphore *sem)
};
/**
- * struct thermal_zone_of_device_ops - scallbacks for handling DT based zones
+ * struct thermal_zone_of_device_ops - callbacks for handling DT based zones
*
* Mandatory:
* @get_temp: a pointer to a function that reads the sensor temperature.
struct thermal_zone_device *tz)
{ }
static inline struct thermal_cooling_device *
-thermal_cooling_device_register(char *type, void *devdata,
+thermal_cooling_device_register(const char *type, void *devdata,
const struct thermal_cooling_device_ops *ops)
{ return ERR_PTR(-ENODEV); }
static inline struct thermal_cooling_device *
thermal_of_cooling_device_register(struct device_node *np,
- char *type, void *devdata, const struct thermal_cooling_device_ops *ops)
+ const char *type, void *devdata,
+ const struct thermal_cooling_device_ops *ops)
{ return ERR_PTR(-ENODEV); }
static inline struct thermal_cooling_device *
devm_thermal_of_cooling_device_register(struct device *dev,
#define TIME64_MIN (-TIME64_MAX - 1)
#define KTIME_MAX ((s64)~((u64)1 << 63))
+#define KTIME_MIN (-KTIME_MAX - 1)
#define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC)
+#define KTIME_SEC_MIN (KTIME_MIN / NSEC_PER_SEC)
/*
* Limits for settimeofday():
*/
static inline s64 timespec64_to_ns(const struct timespec64 *ts)
{
- /* Prevent multiplication overflow */
- if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX)
+ /* Prevent multiplication overflow / underflow */
+ if (ts->tv_sec >= KTIME_SEC_MAX)
return KTIME_MAX;
+ if (ts->tv_sec <= KTIME_SEC_MIN)
+ return KTIME_MIN;
+
return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
}
* @last_used_idx: used index
*/
struct vdpa_vq_state_packed {
- u16 last_avail_counter:1;
- u16 last_avail_idx:15;
- u16 last_used_counter:1;
- u16 last_used_idx:15;
+ u16 last_avail_counter:1;
+ u16 last_avail_idx:15;
+ u16 last_used_counter:1;
+ u16 last_used_idx:15;
};
struct vdpa_vq_state {
- union {
- struct vdpa_vq_state_split split;
- struct vdpa_vq_state_packed packed;
- };
+ union {
+ struct vdpa_vq_state_split split;
+ struct vdpa_vq_state_packed packed;
+ };
};
struct vdpa_mgmt_dev;
* @config: the configuration ops for this device.
* @index: device index
* @features_valid: were features initialized? for legacy guests
+ * @use_va: indicate whether virtual address must be used by this device
* @nvqs: maximum number of supported virtqueues
* @mdev: management device pointer; caller must setup when registering device as part
* of dev_add() mgmtdev ops callback before invoking _vdpa_register_device().
const struct vdpa_config_ops *config;
unsigned int index;
bool features_valid;
+ bool use_va;
int nvqs;
struct vdpa_mgmt_dev *mdev;
};
u64 last;
};
+/**
+ * Corresponding file area for device memory mapping
+ * @file: vma->vm_file for the mapping
+ * @offset: mapping offset in the vm_file
+ */
+struct vdpa_map_file {
+ struct file *file;
+ u64 offset;
+};
+
/**
* struct vdpa_config_ops - operations for configuring a vDPA device.
* Note: vDPA device drivers are required to implement all of the
* @vdev: vdpa device
* @idx: virtqueue index
* @state: pointer to returned state (last_avail_idx)
- * @get_vq_notification: Get the notification area for a virtqueue
+ * @get_vq_notification: Get the notification area for a virtqueue
* @vdev: vdpa device
* @idx: virtqueue index
* Returns the notifcation area
* @set_status: Set the device status
* @vdev: vdpa device
* @status: virtio device status
+ * @reset: Reset device
+ * @vdev: vdpa device
+ * Returns integer: success (0) or error (< 0)
* @get_config_size: Get the size of the configuration space
* @vdev: vdpa device
* Returns size_t: configuration size
u32 (*get_vendor_id)(struct vdpa_device *vdev);
u8 (*get_status)(struct vdpa_device *vdev);
void (*set_status)(struct vdpa_device *vdev, u8 status);
+ int (*reset)(struct vdpa_device *vdev);
size_t (*get_config_size)(struct vdpa_device *vdev);
void (*get_config)(struct vdpa_device *vdev, unsigned int offset,
void *buf, unsigned int len);
/* DMA ops */
int (*set_map)(struct vdpa_device *vdev, struct vhost_iotlb *iotlb);
int (*dma_map)(struct vdpa_device *vdev, u64 iova, u64 size,
- u64 pa, u32 perm);
+ u64 pa, u32 perm, void *opaque);
int (*dma_unmap)(struct vdpa_device *vdev, u64 iova, u64 size);
/* Free device resources */
struct vdpa_device *__vdpa_alloc_device(struct device *parent,
const struct vdpa_config_ops *config,
- size_t size, const char *name);
+ size_t size, const char *name,
+ bool use_va);
/**
* vdpa_alloc_device - allocate and initilaize a vDPA device
* @parent: the parent device
* @config: the bus operations that is supported by this device
* @name: name of the vdpa device
+ * @use_va: indicate whether virtual address must be used by this device
*
* Return allocated data structure or ERR_PTR upon error
*/
-#define vdpa_alloc_device(dev_struct, member, parent, config, name) \
+#define vdpa_alloc_device(dev_struct, member, parent, config, name, use_va) \
container_of(__vdpa_alloc_device( \
parent, config, \
sizeof(dev_struct) + \
BUILD_BUG_ON_ZERO(offsetof( \
- dev_struct, member)), name), \
+ dev_struct, member)), name, use_va), \
dev_struct, member)
int vdpa_register_device(struct vdpa_device *vdev, int nvqs);
return vdev->dma_dev;
}
-static inline void vdpa_reset(struct vdpa_device *vdev)
+static inline int vdpa_reset(struct vdpa_device *vdev)
{
- const struct vdpa_config_ops *ops = vdev->config;
+ const struct vdpa_config_ops *ops = vdev->config;
vdev->features_valid = false;
- ops->set_status(vdev, 0);
+ return ops->reset(vdev);
}
static inline int vdpa_set_features(struct vdpa_device *vdev, u64 features)
{
- const struct vdpa_config_ops *ops = vdev->config;
+ const struct vdpa_config_ops *ops = vdev->config;
vdev->features_valid = true;
- return ops->set_features(vdev, features);
+ return ops->set_features(vdev, features);
}
-
-static inline void vdpa_get_config(struct vdpa_device *vdev, unsigned offset,
- void *buf, unsigned int len)
+static inline void vdpa_get_config(struct vdpa_device *vdev,
+ unsigned int offset, void *buf,
+ unsigned int len)
{
- const struct vdpa_config_ops *ops = vdev->config;
+ const struct vdpa_config_ops *ops = vdev->config;
/*
* Config accesses aren't supposed to trigger before features are set.
u32 perm;
u32 flags_padding;
u64 __subtree_last;
+ void *opaque;
};
#define VHOST_IOTLB_FLAG_RETIRE 0x1
unsigned int flags;
};
+int vhost_iotlb_add_range_ctx(struct vhost_iotlb *iotlb, u64 start, u64 last,
+ u64 addr, unsigned int perm, void *opaque);
int vhost_iotlb_add_range(struct vhost_iotlb *iotlb, u64 start, u64 last,
u64 addr, unsigned int perm);
void vhost_iotlb_del_range(struct vhost_iotlb *iotlb, u64 start, u64 last);
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+#ifndef _UAPI_VDUSE_H_
+#define _UAPI_VDUSE_H_
+
+#include <linux/types.h>
+
+#define VDUSE_BASE 0x81
+
+/* The ioctls for control device (/dev/vduse/control) */
+
+#define VDUSE_API_VERSION 0
+
+/*
+ * Get the version of VDUSE API that kernel supported (VDUSE_API_VERSION).
+ * This is used for future extension.
+ */
+#define VDUSE_GET_API_VERSION _IOR(VDUSE_BASE, 0x00, __u64)
+
+/* Set the version of VDUSE API that userspace supported. */
+#define VDUSE_SET_API_VERSION _IOW(VDUSE_BASE, 0x01, __u64)
+
+/**
+ * struct vduse_dev_config - basic configuration of a VDUSE device
+ * @name: VDUSE device name, needs to be NUL terminated
+ * @vendor_id: virtio vendor id
+ * @device_id: virtio device id
+ * @features: virtio features
+ * @vq_num: the number of virtqueues
+ * @vq_align: the allocation alignment of virtqueue's metadata
+ * @reserved: for future use, needs to be initialized to zero
+ * @config_size: the size of the configuration space
+ * @config: the buffer of the configuration space
+ *
+ * Structure used by VDUSE_CREATE_DEV ioctl to create VDUSE device.
+ */
+struct vduse_dev_config {
+#define VDUSE_NAME_MAX 256
+ char name[VDUSE_NAME_MAX];
+ __u32 vendor_id;
+ __u32 device_id;
+ __u64 features;
+ __u32 vq_num;
+ __u32 vq_align;
+ __u32 reserved[13];
+ __u32 config_size;
+ __u8 config[];
+};
+
+/* Create a VDUSE device which is represented by a char device (/dev/vduse/$NAME) */
+#define VDUSE_CREATE_DEV _IOW(VDUSE_BASE, 0x02, struct vduse_dev_config)
+
+/*
+ * Destroy a VDUSE device. Make sure there are no more references
+ * to the char device (/dev/vduse/$NAME).
+ */
+#define VDUSE_DESTROY_DEV _IOW(VDUSE_BASE, 0x03, char[VDUSE_NAME_MAX])
+
+/* The ioctls for VDUSE device (/dev/vduse/$NAME) */
+
+/**
+ * struct vduse_iotlb_entry - entry of IOTLB to describe one IOVA region [start, last]
+ * @offset: the mmap offset on returned file descriptor
+ * @start: start of the IOVA region
+ * @last: last of the IOVA region
+ * @perm: access permission of the IOVA region
+ *
+ * Structure used by VDUSE_IOTLB_GET_FD ioctl to find an overlapped IOVA region.
+ */
+struct vduse_iotlb_entry {
+ __u64 offset;
+ __u64 start;
+ __u64 last;
+#define VDUSE_ACCESS_RO 0x1
+#define VDUSE_ACCESS_WO 0x2
+#define VDUSE_ACCESS_RW 0x3
+ __u8 perm;
+};
+
+/*
+ * Find the first IOVA region that overlaps with the range [start, last]
+ * and return the corresponding file descriptor. Return -EINVAL means the
+ * IOVA region doesn't exist. Caller should set start and last fields.
+ */
+#define VDUSE_IOTLB_GET_FD _IOWR(VDUSE_BASE, 0x10, struct vduse_iotlb_entry)
+
+/*
+ * Get the negotiated virtio features. It's a subset of the features in
+ * struct vduse_dev_config which can be accepted by virtio driver. It's
+ * only valid after FEATURES_OK status bit is set.
+ */
+#define VDUSE_DEV_GET_FEATURES _IOR(VDUSE_BASE, 0x11, __u64)
+
+/**
+ * struct vduse_config_data - data used to update configuration space
+ * @offset: the offset from the beginning of configuration space
+ * @length: the length to write to configuration space
+ * @buffer: the buffer used to write from
+ *
+ * Structure used by VDUSE_DEV_SET_CONFIG ioctl to update device
+ * configuration space.
+ */
+struct vduse_config_data {
+ __u32 offset;
+ __u32 length;
+ __u8 buffer[];
+};
+
+/* Set device configuration space */
+#define VDUSE_DEV_SET_CONFIG _IOW(VDUSE_BASE, 0x12, struct vduse_config_data)
+
+/*
+ * Inject a config interrupt. It's usually used to notify virtio driver
+ * that device configuration space has changed.
+ */
+#define VDUSE_DEV_INJECT_CONFIG_IRQ _IO(VDUSE_BASE, 0x13)
+
+/**
+ * struct vduse_vq_config - basic configuration of a virtqueue
+ * @index: virtqueue index
+ * @max_size: the max size of virtqueue
+ * @reserved: for future use, needs to be initialized to zero
+ *
+ * Structure used by VDUSE_VQ_SETUP ioctl to setup a virtqueue.
+ */
+struct vduse_vq_config {
+ __u32 index;
+ __u16 max_size;
+ __u16 reserved[13];
+};
+
+/*
+ * Setup the specified virtqueue. Make sure all virtqueues have been
+ * configured before the device is attached to vDPA bus.
+ */
+#define VDUSE_VQ_SETUP _IOW(VDUSE_BASE, 0x14, struct vduse_vq_config)
+
+/**
+ * struct vduse_vq_state_split - split virtqueue state
+ * @avail_index: available index
+ */
+struct vduse_vq_state_split {
+ __u16 avail_index;
+};
+
+/**
+ * struct vduse_vq_state_packed - packed virtqueue state
+ * @last_avail_counter: last driver ring wrap counter observed by device
+ * @last_avail_idx: device available index
+ * @last_used_counter: device ring wrap counter
+ * @last_used_idx: used index
+ */
+struct vduse_vq_state_packed {
+ __u16 last_avail_counter;
+ __u16 last_avail_idx;
+ __u16 last_used_counter;
+ __u16 last_used_idx;
+};
+
+/**
+ * struct vduse_vq_info - information of a virtqueue
+ * @index: virtqueue index
+ * @num: the size of virtqueue
+ * @desc_addr: address of desc area
+ * @driver_addr: address of driver area
+ * @device_addr: address of device area
+ * @split: split virtqueue state
+ * @packed: packed virtqueue state
+ * @ready: ready status of virtqueue
+ *
+ * Structure used by VDUSE_VQ_GET_INFO ioctl to get virtqueue's information.
+ */
+struct vduse_vq_info {
+ __u32 index;
+ __u32 num;
+ __u64 desc_addr;
+ __u64 driver_addr;
+ __u64 device_addr;
+ union {
+ struct vduse_vq_state_split split;
+ struct vduse_vq_state_packed packed;
+ };
+ __u8 ready;
+};
+
+/* Get the specified virtqueue's information. Caller should set index field. */
+#define VDUSE_VQ_GET_INFO _IOWR(VDUSE_BASE, 0x15, struct vduse_vq_info)
+
+/**
+ * struct vduse_vq_eventfd - eventfd configuration for a virtqueue
+ * @index: virtqueue index
+ * @fd: eventfd, -1 means de-assigning the eventfd
+ *
+ * Structure used by VDUSE_VQ_SETUP_KICKFD ioctl to setup kick eventfd.
+ */
+struct vduse_vq_eventfd {
+ __u32 index;
+#define VDUSE_EVENTFD_DEASSIGN -1
+ int fd;
+};
+
+/*
+ * Setup kick eventfd for specified virtqueue. The kick eventfd is used
+ * by VDUSE kernel module to notify userspace to consume the avail vring.
+ */
+#define VDUSE_VQ_SETUP_KICKFD _IOW(VDUSE_BASE, 0x16, struct vduse_vq_eventfd)
+
+/*
+ * Inject an interrupt for specific virtqueue. It's used to notify virtio driver
+ * to consume the used vring.
+ */
+#define VDUSE_VQ_INJECT_IRQ _IOW(VDUSE_BASE, 0x17, __u32)
+
+/* The control messages definition for read(2)/write(2) on /dev/vduse/$NAME */
+
+/**
+ * enum vduse_req_type - request type
+ * @VDUSE_GET_VQ_STATE: get the state for specified virtqueue from userspace
+ * @VDUSE_SET_STATUS: set the device status
+ * @VDUSE_UPDATE_IOTLB: Notify userspace to update the memory mapping for
+ * specified IOVA range via VDUSE_IOTLB_GET_FD ioctl
+ */
+enum vduse_req_type {
+ VDUSE_GET_VQ_STATE,
+ VDUSE_SET_STATUS,
+ VDUSE_UPDATE_IOTLB,
+};
+
+/**
+ * struct vduse_vq_state - virtqueue state
+ * @index: virtqueue index
+ * @split: split virtqueue state
+ * @packed: packed virtqueue state
+ */
+struct vduse_vq_state {
+ __u32 index;
+ union {
+ struct vduse_vq_state_split split;
+ struct vduse_vq_state_packed packed;
+ };
+};
+
+/**
+ * struct vduse_dev_status - device status
+ * @status: device status
+ */
+struct vduse_dev_status {
+ __u8 status;
+};
+
+/**
+ * struct vduse_iova_range - IOVA range [start, last]
+ * @start: start of the IOVA range
+ * @last: last of the IOVA range
+ */
+struct vduse_iova_range {
+ __u64 start;
+ __u64 last;
+};
+
+/**
+ * struct vduse_dev_request - control request
+ * @type: request type
+ * @request_id: request id
+ * @reserved: for future use
+ * @vq_state: virtqueue state, only index field is available
+ * @s: device status
+ * @iova: IOVA range for updating
+ * @padding: padding
+ *
+ * Structure used by read(2) on /dev/vduse/$NAME.
+ */
+struct vduse_dev_request {
+ __u32 type;
+ __u32 request_id;
+ __u32 reserved[4];
+ union {
+ struct vduse_vq_state vq_state;
+ struct vduse_dev_status s;
+ struct vduse_iova_range iova;
+ __u32 padding[32];
+ };
+};
+
+/**
+ * struct vduse_dev_response - response to control request
+ * @request_id: corresponding request id
+ * @result: the result of request
+ * @reserved: for future use, needs to be initialized to zero
+ * @vq_state: virtqueue state
+ * @padding: padding
+ *
+ * Structure used by write(2) on /dev/vduse/$NAME.
+ */
+struct vduse_dev_response {
+ __u32 request_id;
+#define VDUSE_REQ_RESULT_OK 0x00
+#define VDUSE_REQ_RESULT_FAILED 0x01
+ __u32 result;
+ __u32 reserved[4];
+ union {
+ struct vduse_vq_state vq_state;
+ __u32 padding[32];
+ };
+};
+
+#endif /* _UAPI_VDUSE_H_ */
#define VIRTIO_ID_SOUND 25 /* virtio sound */
#define VIRTIO_ID_FS 26 /* virtio filesystem */
#define VIRTIO_ID_PMEM 27 /* virtio pmem */
+#define VIRTIO_ID_RPMB 28 /* virtio rpmb */
#define VIRTIO_ID_MAC80211_HWSIM 29 /* virtio mac80211-hwsim */
+#define VIRTIO_ID_VIDEO_ENCODER 30 /* virtio video encoder */
+#define VIRTIO_ID_VIDEO_DECODER 31 /* virtio video decoder */
#define VIRTIO_ID_SCMI 32 /* virtio SCMI */
+#define VIRTIO_ID_NITRO_SEC_MOD 33 /* virtio nitro secure module*/
#define VIRTIO_ID_I2C_ADAPTER 34 /* virtio i2c adapter */
+#define VIRTIO_ID_WATCHDOG 35 /* virtio watchdog */
+#define VIRTIO_ID_CAN 36 /* virtio can */
+#define VIRTIO_ID_DMABUF 37 /* virtio dmabuf */
+#define VIRTIO_ID_PARAM_SERV 38 /* virtio parameter server */
+#define VIRTIO_ID_AUDIO_POLICY 39 /* virtio audio policy */
#define VIRTIO_ID_BT 40 /* virtio bluetooth */
#define VIRTIO_ID_GPIO 41 /* virtio gpio */
/* VIRTIO_VSOCK_OP_RW flags values */
enum virtio_vsock_rw {
- VIRTIO_VSOCK_SEQ_EOR = 1,
+ VIRTIO_VSOCK_SEQ_EOM = 1,
+ VIRTIO_VSOCK_SEQ_EOR = 2,
};
#endif /* _UAPI_LINUX_VIRTIO_VSOCK_H */
return -ESRCH;
}
+static void __attach_to_pi_owner(struct task_struct *p, union futex_key *key,
+ struct futex_pi_state **ps)
+{
+ /*
+ * No existing pi state. First waiter. [2]
+ *
+ * This creates pi_state, we have hb->lock held, this means nothing can
+ * observe this state, wait_lock is irrelevant.
+ */
+ struct futex_pi_state *pi_state = alloc_pi_state();
+
+ /*
+ * Initialize the pi_mutex in locked state and make @p
+ * the owner of it:
+ */
+ rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
+
+ /* Store the key for possible exit cleanups: */
+ pi_state->key = *key;
+
+ WARN_ON(!list_empty(&pi_state->list));
+ list_add(&pi_state->list, &p->pi_state_list);
+ /*
+ * Assignment without holding pi_state->pi_mutex.wait_lock is safe
+ * because there is no concurrency as the object is not published yet.
+ */
+ pi_state->owner = p;
+
+ *ps = pi_state;
+}
/*
* Lookup the task for the TID provided from user space and attach to
* it after doing proper sanity checks.
struct task_struct **exiting)
{
pid_t pid = uval & FUTEX_TID_MASK;
- struct futex_pi_state *pi_state;
struct task_struct *p;
/*
return ret;
}
- /*
- * No existing pi state. First waiter. [2]
- *
- * This creates pi_state, we have hb->lock held, this means nothing can
- * observe this state, wait_lock is irrelevant.
- */
- pi_state = alloc_pi_state();
-
- /*
- * Initialize the pi_mutex in locked state and make @p
- * the owner of it:
- */
- rt_mutex_init_proxy_locked(&pi_state->pi_mutex, p);
-
- /* Store the key for possible exit cleanups: */
- pi_state->key = *key;
-
- WARN_ON(!list_empty(&pi_state->list));
- list_add(&pi_state->list, &p->pi_state_list);
- /*
- * Assignment without holding pi_state->pi_mutex.wait_lock is safe
- * because there is no concurrency as the object is not published yet.
- */
- pi_state->owner = p;
+ __attach_to_pi_owner(p, key, ps);
raw_spin_unlock_irq(&p->pi_lock);
put_task_struct(p);
- *ps = pi_state;
-
return 0;
}
newval |= FUTEX_WAITERS;
ret = lock_pi_update_atomic(uaddr, uval, newval);
- /* If the take over worked, return 1 */
- return ret < 0 ? ret : 1;
+ if (ret)
+ return ret;
+
+ /*
+ * If the waiter bit was requested the caller also needs PI
+ * state attached to the new owner of the user space futex.
+ *
+ * @task is guaranteed to be alive and it cannot be exiting
+ * because it is either sleeping or waiting in
+ * futex_requeue_pi_wakeup_sync().
+ *
+ * No need to do the full attach_to_pi_owner() exercise
+ * because @task is known and valid.
+ */
+ if (set_waiters) {
+ raw_spin_lock_irq(&task->pi_lock);
+ __attach_to_pi_owner(task, key, ps);
+ raw_spin_unlock_irq(&task->pi_lock);
+ }
+ return 1;
}
/*
* @hb: the hash_bucket of the requeue target futex
*
* During futex_requeue, with requeue_pi=1, it is possible to acquire the
- * target futex if it is uncontended or via a lock steal. Set the futex_q key
- * to the requeue target futex so the waiter can detect the wakeup on the right
- * futex, but remove it from the hb and NULL the rt_waiter so it can detect
- * atomic lock acquisition. Set the q->lock_ptr to the requeue target hb->lock
- * to protect access to the pi_state to fixup the owner later. Must be called
- * with both q->lock_ptr and hb->lock held.
+ * target futex if it is uncontended or via a lock steal.
+ *
+ * 1) Set @q::key to the requeue target futex key so the waiter can detect
+ * the wakeup on the right futex.
+ *
+ * 2) Dequeue @q from the hash bucket.
+ *
+ * 3) Set @q::rt_waiter to NULL so the woken up task can detect atomic lock
+ * acquisition.
+ *
+ * 4) Set the q->lock_ptr to the requeue target hb->lock for the case that
+ * the waiter has to fixup the pi state.
+ *
+ * 5) Complete the requeue state so the waiter can make progress. After
+ * this point the waiter task can return from the syscall immediately in
+ * case that the pi state does not have to be fixed up.
+ *
+ * 6) Wake the waiter task.
+ *
+ * Must be called with both q->lock_ptr and hb->lock held.
*/
static inline
void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
{
struct futex_q *top_waiter = NULL;
u32 curval;
- int ret, vpid;
+ int ret;
if (get_futex_value_locked(&curval, pifutex))
return -EFAULT;
* and waiting on the 'waitqueue' futex which is always !PI.
*/
if (!top_waiter->rt_waiter || top_waiter->pi_state)
- ret = -EINVAL;
+ return -EINVAL;
/* Ensure we requeue to the expected futex. */
if (!match_futex(top_waiter->requeue_pi_key, key2))
return -EAGAIN;
/*
- * Try to take the lock for top_waiter. Set the FUTEX_WAITERS bit in
- * the contended case or if set_waiters is 1. The pi_state is returned
- * in ps in contended cases.
+ * Try to take the lock for top_waiter and set the FUTEX_WAITERS bit
+ * in the contended case or if @set_waiters is true.
+ *
+ * In the contended case PI state is attached to the lock owner. If
+ * the user space lock can be acquired then PI state is attached to
+ * the new owner (@top_waiter->task) when @set_waiters is true.
*/
- vpid = task_pid_vnr(top_waiter->task);
ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
exiting, set_waiters);
if (ret == 1) {
- /* Dequeue, wake up and update top_waiter::requeue_state */
+ /*
+ * Lock was acquired in user space and PI state was
+ * attached to @top_waiter->task. That means state is fully
+ * consistent and the waiter can return to user space
+ * immediately after the wakeup.
+ */
requeue_pi_wake_futex(top_waiter, key2, hb2);
- return vpid;
} else if (ret < 0) {
/* Rewind top_waiter::requeue_state */
futex_requeue_pi_complete(top_waiter, ret);
&exiting, nr_requeue);
/*
- * At this point the top_waiter has either taken uaddr2 or is
- * waiting on it. If the former, then the pi_state will not
- * exist yet, look it up one more time to ensure we have a
- * reference to it. If the lock was taken, @ret contains the
- * VPID of the top waiter task.
- * If the lock was not taken, we have pi_state and an initial
- * refcount on it. In case of an error we have nothing.
+ * At this point the top_waiter has either taken uaddr2 or
+ * is waiting on it. In both cases pi_state has been
+ * established and an initial refcount on it. In case of an
+ * error there's nothing.
*
* The top waiter's requeue_state is up to date:
*
- * - If the lock was acquired atomically (ret > 0), then
+ * - If the lock was acquired atomically (ret == 1), then
* the state is Q_REQUEUE_PI_LOCKED.
*
+ * The top waiter has been dequeued and woken up and can
+ * return to user space immediately. The kernel/user
+ * space state is consistent. In case that there must be
+ * more waiters requeued the WAITERS bit in the user
+ * space futex is set so the top waiter task has to go
+ * into the syscall slowpath to unlock the futex. This
+ * will block until this requeue operation has been
+ * completed and the hash bucket locks have been
+ * dropped.
+ *
* - If the trylock failed with an error (ret < 0) then
* the state is either Q_REQUEUE_PI_NONE, i.e. "nothing
* happened", or Q_REQUEUE_PI_IGNORE when there was an
* the same sanity checks for requeue_pi as the loop
* below does.
*/
- if (ret > 0) {
- WARN_ON(pi_state);
- task_count++;
- /*
- * If futex_proxy_trylock_atomic() acquired the
- * user space futex, then the user space value
- * @uaddr2 has been set to the @hb1's top waiter
- * task VPID. This task is guaranteed to be alive
- * and cannot be exiting because it is either
- * sleeping or blocked on @hb2 lock.
- *
- * The @uaddr2 futex cannot have waiters either as
- * otherwise futex_proxy_trylock_atomic() would not
- * have succeeded.
- *
- * In order to requeue waiters to @hb2, pi state is
- * required. Hand in the VPID value (@ret) and
- * allocate PI state with an initial refcount on
- * it.
- */
- ret = attach_to_pi_owner(uaddr2, ret, &key2, &pi_state,
- &exiting);
- WARN_ON(ret);
- }
-
switch (ret) {
case 0:
/* We hold a reference on the pi state. */
break;
+ case 1:
+ /*
+ * futex_proxy_trylock_atomic() acquired the user space
+ * futex. Adjust task_count.
+ */
+ task_count++;
+ ret = 0;
+ break;
+
/*
* If the above failed, then pi_state is NULL and
* waiter::requeue_state is correct.
}
/*
- * We took an extra initial reference to the pi_state either in
- * futex_proxy_trylock_atomic() or in attach_to_pi_owner(). We need
- * to drop it here again.
+ * We took an extra initial reference to the pi_state in
+ * futex_proxy_trylock_atomic(). We need to drop it here again.
*/
put_pi_state(pi_state);
* other configuration and we fail to report; also, see
* lockdep.
*/
- if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter->ww_ctx)
+ if (IS_ENABLED(CONFIG_PREEMPT_RT) && orig_waiter && orig_waiter->ww_ctx)
ret = 0;
raw_spin_unlock(&lock->wait_lock);
#include "rwbase_rt.c"
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-void __rwsem_init(struct rw_semaphore *sem, const char *name,
+void __init_rwsem(struct rw_semaphore *sem, const char *name,
struct lock_class_key *key)
{
+ init_rwbase_rt(&(sem)->rwbase);
+
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
debug_check_no_locks_freed((void *)sem, sizeof(*sem));
lockdep_init_map_wait(&sem->dep_map, name, key, 0, LD_WAIT_SLEEP);
-}
-EXPORT_SYMBOL(__rwsem_init);
#endif
+}
+EXPORT_SYMBOL(__init_rwsem);
static inline void __down_read(struct rw_semaphore *sem)
{
struct task_struct *push_task = rq->curr;
lockdep_assert_rq_held(rq);
- SCHED_WARN_ON(rq->cpu != smp_processor_id());
/*
* Ensure the thing is persistent until balance_push_set(.on = false);
rq->balance_callback = &balance_push_callback;
/*
- * Only active while going offline.
+ * Only active while going offline and when invoked on the outgoing
+ * CPU.
*/
- if (!cpu_dying(rq->cpu))
+ if (!cpu_dying(rq->cpu) || rq != this_rq())
return;
/*
cpuidle_use_deepest_state(latency_ns);
it.done = 0;
- hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ hrtimer_init_on_stack(&it.timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_HARD);
it.timer.function = idle_inject_timer_fn;
hrtimer_start(&it.timer, ns_to_ktime(duration_ns),
- HRTIMER_MODE_REL_PINNED);
+ HRTIMER_MODE_REL_PINNED_HARD);
while (!READ_ONCE(it.done))
do_idle();
trace_boot_hist_add_array(struct xbc_node *hnode, char **bufp,
char *end, const char *key)
{
- struct xbc_node *knode, *anode;
+ struct xbc_node *anode;
const char *p;
char sep;
- knode = xbc_node_find_child(hnode, key);
- if (knode) {
- anode = xbc_node_get_child(knode);
+ p = xbc_node_find_value(hnode, key, &anode);
+ if (p) {
if (!anode) {
pr_err("hist.%s requires value(s).\n", key);
return -EINVAL;
append_printf(bufp, end, ":%s(%s)", handler, p);
/* Compose 'action' parameter */
- knode = xbc_node_find_child(hnode, "trace");
+ knode = xbc_node_find_subkey(hnode, "trace");
if (!knode)
- knode = xbc_node_find_child(hnode, "save");
+ knode = xbc_node_find_subkey(hnode, "save");
if (knode) {
anode = xbc_node_get_child(knode);
sep = ',';
}
append_printf(bufp, end, ")");
- } else if (xbc_node_find_child(hnode, "snapshot")) {
+ } else if (xbc_node_find_subkey(hnode, "snapshot")) {
append_printf(bufp, end, ".snapshot()");
} else {
pr_err("hist.%s requires an action.\n",
break;
}
- if (xbc_node_find_child(hnode, param))
+ if (xbc_node_find_subkey(hnode, param))
ret = trace_boot_hist_add_one_handler(hnode, bufp, end, handler, param);
return ret;
if (p)
append_printf(&buf, end, ":name=%s", p);
- node = xbc_node_find_child(hnode, "var");
+ node = xbc_node_find_subkey(hnode, "var");
if (node) {
xbc_node_for_each_key_value(node, knode, p) {
/* Expression must not include spaces. */
}
/* Histogram control attributes (mutual exclusive) */
- if (xbc_node_find_child(hnode, "pause"))
+ if (xbc_node_find_value(hnode, "pause", NULL))
append_printf(&buf, end, ":pause");
- else if (xbc_node_find_child(hnode, "continue"))
+ else if (xbc_node_find_value(hnode, "continue", NULL))
append_printf(&buf, end, ":continue");
- else if (xbc_node_find_child(hnode, "clear"))
+ else if (xbc_node_find_value(hnode, "clear", NULL))
append_printf(&buf, end, ":clear");
/* Histogram handler and actions */
- node = xbc_node_find_child(hnode, "onmax");
+ node = xbc_node_find_subkey(hnode, "onmax");
if (node && trace_boot_hist_add_handlers(node, &buf, end, "var") < 0)
return -EINVAL;
- node = xbc_node_find_child(hnode, "onchange");
+ node = xbc_node_find_subkey(hnode, "onchange");
if (node && trace_boot_hist_add_handlers(node, &buf, end, "var") < 0)
return -EINVAL;
- node = xbc_node_find_child(hnode, "onmatch");
+ node = xbc_node_find_subkey(hnode, "onmatch");
if (node && trace_boot_hist_add_handlers(node, &buf, end, "event") < 0)
return -EINVAL;
}
}
- if (xbc_node_find_child(hnode, "keys")) {
+ if (xbc_node_find_subkey(hnode, "keys")) {
if (trace_boot_compose_hist_cmd(hnode, buf, size) == 0) {
tmp = kstrdup(buf, GFP_KERNEL);
if (trigger_process_regex(file, buf) < 0)
else if (trigger_process_regex(file, buf) < 0)
pr_err("Failed to apply an action: %s\n", p);
}
- anode = xbc_node_find_child(enode, "hist");
+ anode = xbc_node_find_subkey(enode, "hist");
if (anode)
trace_boot_init_histograms(file, anode, buf, ARRAY_SIZE(buf));
} else if (xbc_node_find_value(enode, "actions", NULL))
bool enable, enable_all = false;
const char *data;
- node = xbc_node_find_child(node, "event");
+ node = xbc_node_find_subkey(node, "event");
if (!node)
return;
/* per-event key starts with "event.GROUP.EVENT" */
struct trace_array *tr;
const char *p;
- node = xbc_node_find_child(node, "instance");
+ node = xbc_node_find_subkey(node, "instance");
if (!node)
return;
}
/**
- * xbc_node_find_child() - Find a child node which matches given key
+ * xbc_node_find_subkey() - Find a subkey node which matches given key
* @parent: An XBC node.
* @key: A key string.
*
- * Search a node under @parent which matches @key. The @key can contain
+ * Search a key node under @parent which matches @key. The @key can contain
* several words jointed with '.'. If @parent is NULL, this searches the
* node from whole tree. Return NULL if no node is matched.
*/
struct xbc_node * __init
-xbc_node_find_child(struct xbc_node *parent, const char *key)
+xbc_node_find_subkey(struct xbc_node *parent, const char *key)
{
struct xbc_node *node;
xbc_node_find_value(struct xbc_node *parent, const char *key,
struct xbc_node **vnode)
{
- struct xbc_node *node = xbc_node_find_child(parent, key);
+ struct xbc_node *node = xbc_node_find_subkey(parent, key);
if (!node || !xbc_node_is_key(node))
return NULL;
{
const struct vsock_transport *transport;
struct vsock_sock *vsk;
- ssize_t record_len;
+ ssize_t msg_len;
long timeout;
int err = 0;
DEFINE_WAIT(wait);
if (err <= 0)
goto out;
- record_len = transport->seqpacket_dequeue(vsk, msg, flags);
+ msg_len = transport->seqpacket_dequeue(vsk, msg, flags);
- if (record_len < 0) {
+ if (msg_len < 0) {
err = -ENOMEM;
goto out;
}
* packet.
*/
if (flags & MSG_TRUNC)
- err = record_len;
+ err = msg_len;
else
err = len - msg_data_left(msg);
/* Always set MSG_TRUNC if real length of packet is
* bigger than user's buffer.
*/
- if (record_len > len)
+ if (msg_len > len)
msg->msg_flags |= MSG_TRUNC;
}
goto out;
if (msg_data_left(info->msg) == 0 &&
- info->type == VIRTIO_VSOCK_TYPE_SEQPACKET)
- pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
+ info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) {
+ pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
+
+ if (info->msg->msg_flags & MSG_EOR)
+ pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
+ }
}
trace_virtio_transport_alloc_pkt(src_cid, src_port,
dequeued_len += pkt_len;
}
- if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR) {
+ if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM) {
msg_ready = true;
vvs->msg_count--;
+
+ if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR)
+ msg->msg_flags |= MSG_EOR;
}
virtio_transport_dec_rx_pkt(vvs, pkt);
goto out;
}
- if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR)
+ if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM)
vvs->msg_count++;
/* Try to copy small packets into the buffer of last packet queued,
/* If there is space in the last packet queued, we copy the
* new packet in its buffer. We avoid this if the last packet
- * queued has VIRTIO_VSOCK_SEQ_EOR set, because this is
- * delimiter of SEQPACKET record, so 'pkt' is the first packet
- * of a new record.
+ * queued has VIRTIO_VSOCK_SEQ_EOM set, because this is
+ * delimiter of SEQPACKET message, so 'pkt' is the first packet
+ * of a new message.
*/
if ((pkt->len <= last_pkt->buf_len - last_pkt->len) &&
- !(le32_to_cpu(last_pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR)) {
+ !(le32_to_cpu(last_pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM)) {
memcpy(last_pkt->buf + last_pkt->len, pkt->buf,
pkt->len);
last_pkt->len += pkt->len;
} else {
... when != krealloc(E, ...)
when any
-* \(kfree\|kzfree\)(E)
+* \(kfree\|kfree_sensitive\)(E)
...
}
|
list_remove_head(x,c,...)
|
+list_entry_is_head(c,...)
+|
sizeof(<+...c...+>)
|
&c->member
case EM_ARM:
case EM_MICROBLAZE:
case EM_MIPS:
+ case EM_RISCV:
case EM_XTENSA:
break;
default:
}
#define MESSAGES_CNT 7
+#define MSG_EOR_IDX (MESSAGES_CNT / 2)
static void test_seqpacket_msg_bounds_client(const struct test_opts *opts)
{
int fd;
/* Send several messages, one with MSG_EOR flag */
for (int i = 0; i < MESSAGES_CNT; i++)
- send_byte(fd, 1, 0);
+ send_byte(fd, 1, (i == MSG_EOR_IDX) ? MSG_EOR : 0);
control_writeln("SENDDONE");
close(fd);
perror("message bound violated");
exit(EXIT_FAILURE);
}
+
+ if ((i == MSG_EOR_IDX) ^ !!(msg.msg_flags & MSG_EOR)) {
+ perror("MSG_EOR");
+ exit(EXIT_FAILURE);
+ }
}
close(fd);
# Add "-fstack-protector" only if toolchain supports it.
override CFLAGS+= $(call cc-option,-fstack-protector-strong)
CC?= $(CROSS_COMPILE)gcc
-PKG_CONFIG?= pkg-config
+PKG_CONFIG?= $(CROSS_COMPILE)pkg-config
override CFLAGS+=-D VERSION=\"$(VERSION)\"
-LDFLAGS+=
TARGET=tmon
INSTALL_PROGRAM=install -m 755 -p
$(PKG_CONFIG) --cflags $(STATIC) panel ncurses 2> /dev/null)
OBJS = tmon.o tui.o sysfs.o pid.o
-OBJS +=
tmon: $(OBJS) Makefile tmon.h
$(CC) $(CFLAGS) $(LDFLAGS) $(OBJS) -o $(TARGET) $(TMON_LIBS)
sudo valgrind -v --track-origins=yes --tool=memcheck --leak-check=yes --show-reachable=yes --num-callers=20 --track-fds=yes ./$(TARGET) 1> /dev/null
install:
- - mkdir -p $(INSTALL_ROOT)/$(BINDIR)
- - $(INSTALL_PROGRAM) "$(TARGET)" "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)"
+ - $(INSTALL_PROGRAM) -D "$(TARGET)" "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)"
uninstall:
$(DEL_FILE) "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)"
clean:
- find . -name "*.o" | xargs $(DEL_FILE)
- rm -f $(TARGET)
+ rm -f $(TARGET) $(OBJS)
dist:
git tag v$(VERSION)