# Kdevelop4
*.kdev4
+
+# Clang's compilation database file
+/compile_commands.json
Javi Merino <javi.merino@kernel.org> <javi.merino@arm.com>
<javier@osg.samsung.com> <javier.martinez@collabora.co.uk>
Jean Tourrilhes <jt@hpl.hp.com>
+<jean-philippe@linaro.org> <jean-philippe.brucker@arm.com>
Jeff Garzik <jgarzik@pretzel.yyz.us>
Jeff Layton <jlayton@kernel.org> <jlayton@redhat.com>
Jeff Layton <jlayton@kernel.org> <jlayton@poochiereds.net>
Juha Yrjola <at solidboot.com>
Juha Yrjola <juha.yrjola@nokia.com>
Juha Yrjola <juha.yrjola@solidboot.com>
+Julien Thierry <julien.thierry.kdev@gmail.com> <julien.thierry@arm.com>
Kay Sievers <kay.sievers@vrfy.org>
Kenneth W Chen <kenneth.w.chen@intel.com>
Konstantin Khlebnikov <koct9i@gmail.com> <k.khlebnikov@samsung.com>
Li Yang <leoyang.li@nxp.com> <leo@zh-kernel.org>
Li Yang <leoyang.li@nxp.com> <leoli@freescale.com>
Maciej W. Rozycki <macro@mips.com> <macro@imgtec.com>
+Marc Zyngier <maz@kernel.org> <marc.zyngier@arm.com>
Marcin Nowakowski <marcin.nowakowski@mips.com> <marcin.nowakowski@imgtec.com>
Mark Brown <broonie@sirena.org.uk>
Mark Yao <markyao0591@gmail.com> <mark.yao@rock-chips.com>
.. note::
Implementation details for the powerpc platform are discussed in
- the file Documentation/powerpc/eeh-pci-error-recovery.txt
+ the file Documentation/powerpc/eeh-pci-error-recovery.rst
As of this writing, there is a growing list of device drivers with
patches implementing error recovery. Not all of these patches are in
- drivers/net/cxgb3
- drivers/net/s2io.c
- drivers/net/qlge
+
+The End
+-------
Using hlist_nulls to protect read-mostly linked lists and
objects using SLAB_TYPESAFE_BY_RCU allocations.
-Please read the basics in Documentation/RCU/listRCU.txt
+Please read the basics in Documentation/RCU/listRCU.rst
Using special makers (called 'nulls') is a convenient way
to solve following problem :
+++ /dev/null
-# -*- coding: utf-8; mode: python -*-
-
-project = 'Linux Kernel User Documentation'
-
-tags.add("subproject")
-
-latex_documents = [
- ('index', 'linux-user.tex', 'Linux Kernel User Documentation',
- 'The kernel development community', 'manual'),
-]
collapses sequences of basic pages into huge pages.
The THP behaviour is controlled via :ref:`sysfs <thp_sysfs>`
-interface and using madivse(2) and prctl(2) system calls.
+interface and using madvise(2) and prctl(2) system calls.
Transparent Hugepage Support maximizes the usefulness of free memory
if compared to the reservation approach of hugetlbfs by allowing all
import os
import sphinx
+from subprocess import check_output
+
# Get Sphinx version
major, minor, patch = sphinx.version_info[:3]
\\setsansfont{DejaVu Sans}
\\setromanfont{DejaVu Serif}
\\setmonofont{DejaVu Sans Mono}
-
'''
}
+# At least one book (translations) may have Asian characters
+# with are only displayed if xeCJK is used
+
+cjk_cmd = check_output(['fc-list', '--format="%{family[0]}\n"']).decode('utf-8', 'ignore')
+if cjk_cmd.find("Noto Sans CJK SC") >= 0:
+ print ("enabling CJK for LaTeX builder")
+ latex_elements['preamble'] += '''
+ % This is needed for translations
+ \\usepackage{xeCJK}
+ \\setCJKmainfont{Noto Sans CJK SC}
+ '''
+
# Fix reference escape troubles with Sphinx 1.4.x
if major == 1 and minor > 3:
latex_elements['preamble'] += '\\renewcommand*{\\DUrole}[2]{ #2 }\n'
'The kernel development community', 'manual'),
]
+# Add all other index files from Documentation/ subdirectories
+for fn in os.listdir('.'):
+ doc = os.path.join(fn, "index")
+ if os.path.exists(doc + ".rst"):
+ has = False
+ for l in latex_documents:
+ if l[0] == doc:
+ has = True
+ break
+ if not has:
+ latex_documents.append((doc, fn + '.tex',
+ 'Linux %s Documentation' % fn.capitalize(),
+ 'The kernel development community',
+ 'manual'))
+
# The name of an image file (relative to this directory) to place at the top of
# the title page.
#latex_logo = None
+++ /dev/null
-# -*- coding: utf-8; mode: python -*-
-
-project = "Core-API Documentation"
-
-tags.add("subproject")
-
-latex_documents = [
- ('index', 'core-api.tex', project,
- 'The kernel development community', 'manual'),
-]
+++ /dev/null
-# -*- coding: utf-8; mode: python -*-
-
-project = 'Linux Kernel Crypto API'
-
-tags.add("subproject")
-
-latex_documents = [
- ('index', 'crypto-api.tex', 'Linux Kernel Crypto API manual',
- 'The kernel development community', 'manual'),
-]
+++ /dev/null
-# -*- coding: utf-8; mode: python -*-
-
-project = "Development tools for the kernel"
-
-tags.add("subproject")
-
-latex_documents = [
- ('index', 'dev-tools.tex', project,
- 'The kernel development community', 'manual'),
-]
https://www.devicetree.org/specifications/
[6] ARM Linux Kernel documentation - Booting AArch64 Linux
- Documentation/arm64/booting.txt
+ Documentation/arm64/booting.rst
# SPDX-License-Identifier: GPL-2.0
%YAML 1.2
---
-$id: http://devicetree.org/schemas/arm/shmobile.yaml#
+$id: http://devicetree.org/schemas/arm/renesas.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Renesas SH-Mobile, R-Mobile, and R-Car Platform Device Tree Bindings
# SPDX-License-Identifier: GPL-2.0
%YAML 1.2
---
-$id: http://devicetree.org/schemas/arm/milbeaut.yaml#
+$id: http://devicetree.org/schemas/arm/socionext/milbeaut.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Milbeaut platforms device tree bindings
# SPDX-License-Identifier: GPL-2.0
%YAML 1.2
---
-$id: http://devicetree.org/schemas/arm/ti/davinci.yaml#
+$id: http://devicetree.org/schemas/arm/ti/ti,davinci.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Texas Instruments DaVinci Platforms Device Tree Bindings
# SPDX-License-Identifier: GPL-2.0
%YAML 1.2
---
-$id: http://devicetree.org/schemas/phy/allwinner,sun4i-a10-ccu.yaml#
+$id: http://devicetree.org/schemas/clock/allwinner,sun4i-a10-ccu.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Allwinner Clock Control Unit Device Tree Bindings
# Copyright 2019 Linaro Ltd.
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/firmware/intel-ixp4xx-network-processing-engine.yaml#"
+$id: "http://devicetree.org/schemas/firmware/intel,ixp4xx-network-processing-engine.yaml#"
$schema: "http://devicetree.org/meta-schemas/core.yaml#"
title: Intel IXP4xx Network Processing Engine
# SPDX-License-Identifier: GPL-2.0
%YAML 1.2
---
-$id: http://devicetree.org/schemas/iio/accelerometers/adi,adxl345.yaml#
+$id: http://devicetree.org/schemas/iio/accel/adi,adxl345.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Analog Devices ADXL345/ADXL375 3-Axis Digital Accelerometers
# SPDX-License-Identifier: GPL-2.0
%YAML 1.2
---
-$id: http://devicetree.org/schemas/iio/accelerometers/adi,adxl372.yaml#
+$id: http://devicetree.org/schemas/iio/accel/adi,adxl372.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Analog Devices ADXL372 3-Axis, +/-(200g) Digital Accelerometer
- compatible: should be "amazon,al-fic"
- reg: physical base address and size of the registers
- interrupt-controller: identifies the node as an interrupt controller
-- #interrupt-cells: must be 2.
- First cell defines the index of the interrupt within the controller.
- Second cell is used to specify the trigger type and must be one of the
- following:
- - bits[3:0] trigger type and level flags
- 1 = low-to-high edge triggered
- 4 = active high level-sensitive
-- interrupt-parent: specifies the parent interrupt controller.
+- #interrupt-cells : must be 2. Specifies the number of cells needed to encode
+ an interrupt source. Supported trigger types are low-to-high edge
+ triggered and active high level-sensitive.
- interrupts: describes which input line in the interrupt parent, this
fic's output is connected to. This field property depends on the parent's
binding
+Please refer to interrupts.txt in this directory for details of the common
+Interrupt Controllers bindings used by client devices.
+
Example:
-amazon_fic: interrupt-controller@0xfd8a8500 {
+amazon_fic: interrupt-controller@fd8a8500 {
compatible = "amazon,al-fic";
interrupt-controller;
#interrupt-cells = <2>;
# Copyright 2018 Linaro Ltd.
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/interrupt/intel-ixp4xx-interrupt.yaml#"
+$id: "http://devicetree.org/schemas/interrupt-controller/intel,ixp4xx-interrupt.yaml#"
$schema: "http://devicetree.org/meta-schemas/core.yaml#"
title: Intel IXP4xx XScale Networking Processors Interrupt Controller
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2019 Linaro Ltd.
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/misc/intel,ixp4xx-ahb-queue-manager.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Intel IXP4xx AHB Queue Manager
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+description: |
+ The IXP4xx AHB Queue Manager maintains queues as circular buffers in
+ an 8KB embedded SRAM along with hardware pointers. It is used by both
+ the XScale processor and the NPEs (Network Processing Units) in the
+ IXP4xx for accelerating queues, especially for networking. Clients pick
+ queues from the queue manager with foo-queue = <&qmgr N> where the
+ &qmgr is a phandle to the queue manager and N is the queue resource
+ number. The queue resources available and their specific purpose
+ on a certain IXP4xx system will vary.
+
+properties:
+ compatible:
+ items:
+ - const: intel,ixp4xx-ahb-queue-manager
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ items:
+ - description: Interrupt for queues 0-31
+ - description: Interrupt for queues 32-63
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ qmgr: queue-manager@60000000 {
+ compatible = "intel,ixp4xx-ahb-queue-manager";
+ reg = <0x60000000 0x4000>;
+ interrupts = <3 IRQ_TYPE_LEVEL_HIGH>, <4 IRQ_TYPE_LEVEL_HIGH>;
+ };
+++ /dev/null
-# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
-# Copyright 2019 Linaro Ltd.
-%YAML 1.2
----
-$id: "http://devicetree.org/schemas/misc/intel-ixp4xx-ahb-queue-manager.yaml#"
-$schema: "http://devicetree.org/meta-schemas/core.yaml#"
-
-title: Intel IXP4xx AHB Queue Manager
-
-maintainers:
- - Linus Walleij <linus.walleij@linaro.org>
-
-description: |
- The IXP4xx AHB Queue Manager maintains queues as circular buffers in
- an 8KB embedded SRAM along with hardware pointers. It is used by both
- the XScale processor and the NPEs (Network Processing Units) in the
- IXP4xx for accelerating queues, especially for networking. Clients pick
- queues from the queue manager with foo-queue = <&qmgr N> where the
- &qmgr is a phandle to the queue manager and N is the queue resource
- number. The queue resources available and their specific purpose
- on a certain IXP4xx system will vary.
-
-properties:
- compatible:
- items:
- - const: intel,ixp4xx-ahb-queue-manager
-
- reg:
- maxItems: 1
-
- interrupts:
- items:
- - description: Interrupt for queues 0-31
- - description: Interrupt for queues 32-63
-
-required:
- - compatible
- - reg
- - interrupts
-
-examples:
- - |
- #include <dt-bindings/interrupt-controller/irq.h>
-
- qmgr: queue-manager@60000000 {
- compatible = "intel,ixp4xx-ahb-queue-manager";
- reg = <0x60000000 0x4000>;
- interrupts = <3 IRQ_TYPE_LEVEL_HIGH>, <4 IRQ_TYPE_LEVEL_HIGH>;
- };
# SPDX-License-Identifier: GPL-2.0
%YAML 1.2
---
-$id: http://devicetree.org/schemas/net/allwinner,sun8i-a83t-gmac.yaml#
+$id: http://devicetree.org/schemas/net/allwinner,sun8i-a83t-emac.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Allwinner A83t EMAC Device Tree Bindings
examples:
- |
- sid@1c23800 {
+ efuse@1c23800 {
compatible = "allwinner,sun4i-a10-sid";
reg = <0x01c23800 0x10>;
};
- |
- sid@1c23800 {
+ efuse@1c23800 {
compatible = "allwinner,sun7i-a20-sid";
reg = <0x01c23800 0x200>;
};
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/nvmem/nvmem-consumer.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NVMEM (Non Volatile Memory) Consumer Device Tree Bindings
+
+maintainers:
+ - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+
+select: true
+
+properties:
+ nvmem:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ description:
+ List of phandle to the nvmem providers.
+
+ nvmem-cells:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ description:
+ List of phandle to the nvmem data cells.
+
+ nvmem-names:
+ $ref: /schemas/types.yaml#/definitions/string-array
+ description:
+ Names for the each nvmem provider.
+
+ nvmem-cell-names:
+ $ref: /schemas/types.yaml#/definitions/string-array
+ description:
+ Names for each nvmem-cells specified.
+
+dependencies:
+ nvmem-names: [ nvmem ]
+ nvmem-cell-names: [ nvmem-cells ]
+
+examples:
+ - |
+ tsens {
+ /* ... */
+ nvmem-cells = <&tsens_calibration>;
+ nvmem-cell-names = "calibration";
+ };
-= NVMEM(Non Volatile Memory) Data Device Tree Bindings =
-
-This binding is intended to represent the location of hardware
-configuration data stored in NVMEMs like eeprom, efuses and so on.
-
-On a significant proportion of boards, the manufacturer has stored
-some data on NVMEM, for the OS to be able to retrieve these information
-and act upon it. Obviously, the OS has to know about where to retrieve
-these data from, and where they are stored on the storage device.
-
-This document is here to document this.
-
-= Data providers =
-Contains bindings specific to provider drivers and data cells as children
-of this node.
-
-Optional properties:
- read-only: Mark the provider as read only.
-
-= Data cells =
-These are the child nodes of the provider which contain data cell
-information like offset and size in nvmem provider.
-
-Required properties:
-reg: specifies the offset in byte within the storage device.
-
-Optional properties:
-
-bits: Is pair of bit location and number of bits, which specifies offset
- in bit and number of bits within the address range specified by reg property.
- Offset takes values from 0-7.
-
-For example:
-
- /* Provider */
- qfprom: qfprom@700000 {
- ...
-
- /* Data cells */
- tsens_calibration: calib@404 {
- reg = <0x404 0x10>;
- };
-
- tsens_calibration_bckp: calib_bckp@504 {
- reg = <0x504 0x11>;
- bits = <6 128>
- };
-
- pvs_version: pvs-version@6 {
- reg = <0x6 0x2>
- bits = <7 2>
- };
-
- speed_bin: speed-bin@c{
- reg = <0xc 0x1>;
- bits = <2 3>;
-
- };
- ...
- };
-
-= Data consumers =
-Are device nodes which consume nvmem data cells/providers.
-
-Required-properties:
-nvmem-cells: list of phandle to the nvmem data cells.
-nvmem-cell-names: names for the each nvmem-cells specified. Required if
- nvmem-cells is used.
-
-Optional-properties:
-nvmem : list of phandles to nvmem providers.
-nvmem-names: names for the each nvmem provider. required if nvmem is used.
-
-For example:
-
- tsens {
- ...
- nvmem-cells = <&tsens_calibration>;
- nvmem-cell-names = "calibration";
- };
+This file has been moved to nvmem.yaml and nvmem-consumer.yaml.
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/nvmem/nvmem.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NVMEM (Non Volatile Memory) Device Tree Bindings
+
+maintainers:
+ - Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+
+description: |
+ This binding is intended to represent the location of hardware
+ configuration data stored in NVMEMs like eeprom, efuses and so on.
+
+ On a significant proportion of boards, the manufacturer has stored
+ some data on NVMEM, for the OS to be able to retrieve these
+ information and act upon it. Obviously, the OS has to know about
+ where to retrieve these data from, and where they are stored on the
+ storage device.
+
+properties:
+ $nodename:
+ pattern: "^(eeprom|efuse|nvram)(@.*|-[0-9a-f])*$"
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 1
+
+ read-only:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description:
+ Mark the provider as read only.
+
+patternProperties:
+ "^.*@[0-9a-f]+$":
+ type: object
+
+ properties:
+ reg:
+ maxItems: 1
+ description:
+ Offset and size in bytes within the storage device.
+
+ bits:
+ maxItems: 1
+ items:
+ items:
+ - minimum: 0
+ maximum: 7
+ description:
+ Offset in bit within the address range specified by reg.
+ - minimum: 1
+ description:
+ Size in bit within the address range specified by reg.
+
+ required:
+ - reg
+
+ additionalProperties: false
+
+examples:
+ - |
+ qfprom: eeprom@700000 {
+ #address-cells = <1>;
+ #size-cells = <1>;
+
+ /* ... */
+
+ /* Data cells */
+ tsens_calibration: calib@404 {
+ reg = <0x404 0x10>;
+ };
+
+ tsens_calibration_bckp: calib_bckp@504 {
+ reg = <0x504 0x11>;
+ bits = <6 128>;
+ };
+
+ pvs_version: pvs-version@6 {
+ reg = <0x6 0x2>;
+ bits = <7 2>;
+ };
+
+ speed_bin: speed-bin@c{
+ reg = <0xc 0x1>;
+ bits = <2 3>;
+ };
+ };
+
+...
# SPDX-License-Identifier: GPL-2.0
%YAML 1.2
---
-$id: http://devicetree.org/schemas/display/allwinner,sun6i-a31-mipi-dphy.yaml#
+$id: http://devicetree.org/schemas/phy/allwinner,sun6i-a31-mipi-dphy.yaml#
$schema: http://devicetree.org/meta-schemas/core.yaml#
title: Allwinner A31 MIPI D-PHY Controller Device Tree Bindings
# Copyright 2018 Linaro Ltd.
%YAML 1.2
---
-$id: "http://devicetree.org/schemas/timer/intel-ixp4xx-timer.yaml#"
+$id: "http://devicetree.org/schemas/timer/intel,ixp4xx-timer.yaml#"
$schema: "http://devicetree.org/meta-schemas/core.yaml#"
title: Intel IXP4xx XScale Networking Processors Timers
- power-on-time-ms : Specifies the time it takes from the time the host
initiates the power-on sequence to a port until the port has adequate
power. The value is given in ms in a 0 - 510 range (default is 100ms).
- - swap-dx-lanes : Specifies the downstream ports which will swap the
- differential-pair (D+/D-), default is not-swapped.
- - swap-us-lanes : Selects the upstream port differential-pair (D+/D-)
- swapping (boolean, default is not-swapped)
+ - swap-dx-lanes : Specifies the ports which will swap the differential-pair
+ (D+/D-), default is not-swapped.
Examples:
usb2512b@2c {
+++ /dev/null
-# -*- coding: utf-8; mode: python -*-
-
-project = 'Linux Kernel Documentation Guide'
-
-tags.add("subproject")
-
-latex_documents = [
- ('index', 'kernel-doc-guide.tex', 'Linux Kernel Documentation Guide',
- 'The kernel development community', 'manual'),
-]
+++ /dev/null
-# -*- coding: utf-8; mode: python -*-
-
-project = "Linux 802.11 Driver Developer's Guide"
-
-tags.add("subproject")
-
-latex_documents = [
- ('index', '80211.tex', project,
- 'The kernel development community', 'manual'),
-]
+++ /dev/null
-# -*- coding: utf-8; mode: python -*-
-
-project = "The Linux driver implementer's API guide"
-
-tags.add("subproject")
-
-latex_documents = [
- ('index', 'driver-api.tex', project,
- 'The kernel development community', 'manual'),
-]
Several sysfs attributes are generated by the Generic Counter interface,
and reside under the /sys/bus/counter/devices/counterX directory, where
counterX refers to the respective counter device. Please see
-Documentation/ABI/testing/sys-bus-counter-generic-sysfs for detailed
+Documentation/ABI/testing/sysfs-bus-counter for detailed
information on each Generic Counter interface sysfs attribute.
Through these sysfs attributes, programs and scripts may interact with
For a more detailed breakdown of the available Generic Counter interface
sysfs attributes, please refer to the
-Documentation/ABI/testing/sys-bus-counter file.
+Documentation/ABI/testing/sysfs-bus-counter file.
The Signals and Counts associated with the Counter device are registered
to the system as well by the counter_register function. The
In order to get reference to a PHY without help from DeviceTree, the framework
offers lookups which can be compared to clkdev that allow clk structures to be
-bound to devices. A lookup can be made be made during runtime when a handle to
-the struct phy already exists.
+bound to devices. A lookup can be made during runtime when a handle to the
+struct phy already exists.
The framework offers the following API for registering and unregistering the
lookups::
+++ /dev/null
-# -*- coding: utf-8; mode: python -*-
-
-project = "Device Power Management"
-
-tags.add("subproject")
-
-latex_documents = [
- ('index', 'pm.tex', project,
- 'The kernel development community', 'manual'),
-]
+++ /dev/null
-# -*- coding: utf-8; mode: python -*-
-
-project = "Linux Filesystems API"
-
-tags.add("subproject")
-
-latex_documents = [
- ('index', 'filesystems.tex', project,
- 'The kernel development community', 'manual'),
-]
+++ /dev/null
-# -*- coding: utf-8; mode: python -*-
-
-project = "Linux GPU Driver Developer's Guide"
-
-tags.add("subproject")
-
-latex_documents = [
- ('index', 'gpu.tex', project,
- 'The kernel development community', 'manual'),
-]
Addresses scanned: PCI space
- Datasheet: http://support.amd.com/us/Processor_TechDocs/32559.pdf
+ Datasheet: http://www.amd.com/system/files/TechDocs/32559.pdf
Author: Rudolf Marek
netlabel/index
networking/index
pcmcia/index
+ power/index
target/index
timers/index
watchdog/index
+ virtual/index
input/index
hwmon/index
gpu/index
arm64/index
ia64/index
m68k/index
+ powerpc/index
riscv/index
s390/index
sh/index
+++ /dev/null
-# -*- coding: utf-8; mode: python -*-
-
-project = "The Linux input driver subsystem"
-
-tags.add("subproject")
-
-latex_documents = [
- ('index', 'linux-input.tex', project,
- 'The kernel development community', 'manual'),
-]
+++ /dev/null
-# -*- coding: utf-8; mode: python -*-
-
-project = "Kernel Hacking Guides"
-
-tags.add("subproject")
-
-latex_documents = [
- ('index', 'kernel-hacking.tex', project,
- 'The kernel development community', 'manual'),
-]
**changes** the list will have to get the write lock.
NOTE! RCU is better for list traversal, but requires careful
- attention to design detail (see Documentation/RCU/listRCU.txt).
+ attention to design detail (see Documentation/RCU/listRCU.rst).
Also, you cannot "upgrade" a read-lock to a write-lock, so if you at _any_
time need to do any changes (even if you don't do it every time), you have
NOTE! We are working hard to remove reader-writer spinlocks in most
cases, so please don't add a new one without consensus. (Instead, see
- Documentation/RCU/rcu.txt for complete information.)
+ Documentation/RCU/rcu.rst for complete information.)
----
+++ /dev/null
-# -*- coding: utf-8; mode: python -*-
-
-project = 'Linux Kernel Development Documentation'
-
-tags.add("subproject")
-
-latex_documents = [
- ('index', 'maintainer.tex', 'Linux Kernel Development Documentation',
- 'The kernel development community', 'manual'),
-]
+++ /dev/null
-# -*- coding: utf-8; mode: python -*-
-
-# SPDX-License-Identifier: GPL-2.0
-
-project = 'Linux Media Subsystem Documentation'
-
-tags.add("subproject")
-
-latex_documents = [
- ('index', 'media.tex', 'Linux Media Subsystem Documentation',
- 'The kernel development community', 'manual'),
-]
[*] For information on bus mastering DMA and coherency please read:
- Documentation/PCI/pci.rst
+ Documentation/driver-api/pci/pci.rst
Documentation/DMA-API-HOWTO.txt
Documentation/DMA-API.txt
+++ /dev/null
-# -*- coding: utf-8; mode: python -*-
-
-project = "Linux Networking Documentation"
-
-tags.add("subproject")
-
-latex_documents = [
- ('index', 'networking.tex', project,
- 'The kernel development community', 'manual'),
-]
-:orphan:
+.. SPDX-License-Identifier: GPL-2.0
================
Power Management
+++ /dev/null
-DAWR issues on POWER9
-============================
-
-On POWER9 the Data Address Watchpoint Register (DAWR) can cause a checkstop
-if it points to cache inhibited (CI) memory. Currently Linux has no way to
-disinguish CI memory when configuring the DAWR, so (for now) the DAWR is
-disabled by this commit:
-
- commit 9654153158d3e0684a1bdb76dbababdb7111d5a0
- Author: Michael Neuling <mikey@neuling.org>
- Date: Tue Mar 27 15:37:24 2018 +1100
- powerpc: Disable DAWR in the base POWER9 CPU features
-
-Technical Details:
-============================
-
-DAWR has 6 different ways of being set.
-1) ptrace
-2) h_set_mode(DAWR)
-3) h_set_dabr()
-4) kvmppc_set_one_reg()
-5) xmon
-
-For ptrace, we now advertise zero breakpoints on POWER9 via the
-PPC_PTRACE_GETHWDBGINFO call. This results in GDB falling back to
-software emulation of the watchpoint (which is slow).
-
-h_set_mode(DAWR) and h_set_dabr() will now return an error to the
-guest on a POWER9 host. Current Linux guests ignore this error, so
-they will silently not get the DAWR.
-
-kvmppc_set_one_reg() will store the value in the vcpu but won't
-actually set it on POWER9 hardware. This is done so we don't break
-migration from POWER8 to POWER9, at the cost of silently losing the
-DAWR on the migration.
-
-For xmon, the 'bd' command will return an error on P9.
-
-Consequences for users
-============================
-
-For GDB watchpoints (ie 'watch' command) on POWER9 bare metal , GDB
-will accept the command. Unfortunately since there is no hardware
-support for the watchpoint, GDB will software emulate the watchpoint
-making it run very slowly.
-
-The same will also be true for any guests started on a POWER9
-host. The watchpoint will fail and GDB will fall back to software
-emulation.
-
-If a guest is started on a POWER8 host, GDB will accept the watchpoint
-and configure the hardware to use the DAWR. This will run at full
-speed since it can use the hardware emulation. Unfortunately if this
-guest is migrated to a POWER9 host, the watchpoint will be lost on the
-POWER9. Loads and stores to the watchpoint locations will not be
-trapped in GDB. The watchpoint is remembered, so if the guest is
-migrated back to the POWER8 host, it will start working again.
-
-Force enabling the DAWR
-=============================
-Kernels (since ~v5.2) have an option to force enable the DAWR via:
-
- echo Y > /sys/kernel/debug/powerpc/dawr_enable_dangerous
-
-This enables the DAWR even on POWER9.
-
-This is a dangerous setting, USE AT YOUR OWN RISK.
-
-Some users may not care about a bad user crashing their box
-(ie. single user/desktop systems) and really want the DAWR. This
-allows them to force enable DAWR.
-
-This flag can also be used to disable DAWR access. Once this is
-cleared, all DAWR access should be cleared immediately and your
-machine once again safe from crashing.
-
-Userspace may get confused by toggling this. If DAWR is force
-enabled/disabled between getting the number of breakpoints (via
-PTRACE_GETHWDBGINFO) and setting the breakpoint, userspace will get an
-inconsistent view of what's available. Similarly for guests.
-
-For the DAWR to be enabled in a KVM guest, the DAWR needs to be force
-enabled in the host AND the guest. For this reason, this won't work on
-POWERVM as it doesn't allow the HCALL to work. Writes of 'Y' to the
-dawr_enable_dangerous file will fail if the hypervisor doesn't support
-writing the DAWR.
-
-To double check the DAWR is working, run this kernel selftest:
- tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
-Any errors/failures/skips mean something is wrong.
--- /dev/null
+========================
+The PowerPC boot wrapper
+========================
+
+Copyright (C) Secret Lab Technologies Ltd.
+
+PowerPC image targets compresses and wraps the kernel image (vmlinux) with
+a boot wrapper to make it usable by the system firmware. There is no
+standard PowerPC firmware interface, so the boot wrapper is designed to
+be adaptable for each kind of image that needs to be built.
+
+The boot wrapper can be found in the arch/powerpc/boot/ directory. The
+Makefile in that directory has targets for all the available image types.
+The different image types are used to support all of the various firmware
+interfaces found on PowerPC platforms. OpenFirmware is the most commonly
+used firmware type on general purpose PowerPC systems from Apple, IBM and
+others. U-Boot is typically found on embedded PowerPC hardware, but there
+are a handful of other firmware implementations which are also popular. Each
+firmware interface requires a different image format.
+
+The boot wrapper is built from the makefile in arch/powerpc/boot/Makefile and
+it uses the wrapper script (arch/powerpc/boot/wrapper) to generate target
+image. The details of the build system is discussed in the next section.
+Currently, the following image format targets exist:
+
+ ==================== ========================================================
+ cuImage.%: Backwards compatible uImage for older version of
+ U-Boot (for versions that don't understand the device
+ tree). This image embeds a device tree blob inside
+ the image. The boot wrapper, kernel and device tree
+ are all embedded inside the U-Boot uImage file format
+ with boot wrapper code that extracts data from the old
+ bd_info structure and loads the data into the device
+ tree before jumping into the kernel.
+
+ Because of the series of #ifdefs found in the
+ bd_info structure used in the old U-Boot interfaces,
+ cuImages are platform specific. Each specific
+ U-Boot platform has a different platform init file
+ which populates the embedded device tree with data
+ from the platform specific bd_info file. The platform
+ specific cuImage platform init code can be found in
+ `arch/powerpc/boot/cuboot.*.c`. Selection of the correct
+ cuImage init code for a specific board can be found in
+ the wrapper structure.
+
+ dtbImage.%: Similar to zImage, except device tree blob is embedded
+ inside the image instead of provided by firmware. The
+ output image file can be either an elf file or a flat
+ binary depending on the platform.
+
+ dtbImages are used on systems which do not have an
+ interface for passing a device tree directly.
+ dtbImages are similar to simpleImages except that
+ dtbImages have platform specific code for extracting
+ data from the board firmware, but simpleImages do not
+ talk to the firmware at all.
+
+ PlayStation 3 support uses dtbImage. So do Embedded
+ Planet boards using the PlanetCore firmware. Board
+ specific initialization code is typically found in a
+ file named arch/powerpc/boot/<platform>.c; but this
+ can be overridden by the wrapper script.
+
+ simpleImage.%: Firmware independent compressed image that does not
+ depend on any particular firmware interface and embeds
+ a device tree blob. This image is a flat binary that
+ can be loaded to any location in RAM and jumped to.
+ Firmware cannot pass any configuration data to the
+ kernel with this image type and it depends entirely on
+ the embedded device tree for all information.
+
+ The simpleImage is useful for booting systems with
+ an unknown firmware interface or for booting from
+ a debugger when no firmware is present (such as on
+ the Xilinx Virtex platform). The only assumption that
+ simpleImage makes is that RAM is correctly initialized
+ and that the MMU is either off or has RAM mapped to
+ base address 0.
+
+ simpleImage also supports inserting special platform
+ specific initialization code to the start of the bootup
+ sequence. The virtex405 platform uses this feature to
+ ensure that the cache is invalidated before caching
+ is enabled. Platform specific initialization code is
+ added as part of the wrapper script and is keyed on
+ the image target name. For example, all
+ simpleImage.virtex405-* targets will add the
+ virtex405-head.S initialization code (This also means
+ that the dts file for virtex405 targets should be
+ named (virtex405-<board>.dts). Search the wrapper
+ script for 'virtex405' and see the file
+ arch/powerpc/boot/virtex405-head.S for details.
+
+ treeImage.%; Image format for used with OpenBIOS firmware found
+ on some ppc4xx hardware. This image embeds a device
+ tree blob inside the image.
+
+ uImage: Native image format used by U-Boot. The uImage target
+ does not add any boot code. It just wraps a compressed
+ vmlinux in the uImage data structure. This image
+ requires a version of U-Boot that is able to pass
+ a device tree to the kernel at boot. If using an older
+ version of U-Boot, then you need to use a cuImage
+ instead.
+
+ zImage.%: Image format which does not embed a device tree.
+ Used by OpenFirmware and other firmware interfaces
+ which are able to supply a device tree. This image
+ expects firmware to provide the device tree at boot.
+ Typically, if you have general purpose PowerPC
+ hardware then you want this image format.
+ ==================== ========================================================
+
+Image types which embed a device tree blob (simpleImage, dtbImage, treeImage,
+and cuImage) all generate the device tree blob from a file in the
+arch/powerpc/boot/dts/ directory. The Makefile selects the correct device
+tree source based on the name of the target. Therefore, if the kernel is
+built with 'make treeImage.walnut simpleImage.virtex405-ml403', then the
+build system will use arch/powerpc/boot/dts/walnut.dts to build
+treeImage.walnut and arch/powerpc/boot/dts/virtex405-ml403.dts to build
+the simpleImage.virtex405-ml403.
+
+Two special targets called 'zImage' and 'zImage.initrd' also exist. These
+targets build all the default images as selected by the kernel configuration.
+Default images are selected by the boot wrapper Makefile
+(arch/powerpc/boot/Makefile) by adding targets to the $image-y variable. Look
+at the Makefile to see which default image targets are available.
+
+How it is built
+---------------
+arch/powerpc is designed to support multiplatform kernels, which means
+that a single vmlinux image can be booted on many different target boards.
+It also means that the boot wrapper must be able to wrap for many kinds of
+images on a single build. The design decision was made to not use any
+conditional compilation code (#ifdef, etc) in the boot wrapper source code.
+All of the boot wrapper pieces are buildable at any time regardless of the
+kernel configuration. Building all the wrapper bits on every kernel build
+also ensures that obscure parts of the wrapper are at the very least compile
+tested in a large variety of environments.
+
+The wrapper is adapted for different image types at link time by linking in
+just the wrapper bits that are appropriate for the image type. The 'wrapper
+script' (found in arch/powerpc/boot/wrapper) is called by the Makefile and
+is responsible for selecting the correct wrapper bits for the image type.
+The arguments are well documented in the script's comment block, so they
+are not repeated here. However, it is worth mentioning that the script
+uses the -p (platform) argument as the main method of deciding which wrapper
+bits to compile in. Look for the large 'case "$platform" in' block in the
+middle of the script. This is also the place where platform specific fixups
+can be selected by changing the link order.
+
+In particular, care should be taken when working with cuImages. cuImage
+wrapper bits are very board specific and care should be taken to make sure
+the target you are trying to build is supported by the wrapper bits.
+++ /dev/null
-The PowerPC boot wrapper
-------------------------
-Copyright (C) Secret Lab Technologies Ltd.
-
-PowerPC image targets compresses and wraps the kernel image (vmlinux) with
-a boot wrapper to make it usable by the system firmware. There is no
-standard PowerPC firmware interface, so the boot wrapper is designed to
-be adaptable for each kind of image that needs to be built.
-
-The boot wrapper can be found in the arch/powerpc/boot/ directory. The
-Makefile in that directory has targets for all the available image types.
-The different image types are used to support all of the various firmware
-interfaces found on PowerPC platforms. OpenFirmware is the most commonly
-used firmware type on general purpose PowerPC systems from Apple, IBM and
-others. U-Boot is typically found on embedded PowerPC hardware, but there
-are a handful of other firmware implementations which are also popular. Each
-firmware interface requires a different image format.
-
-The boot wrapper is built from the makefile in arch/powerpc/boot/Makefile and
-it uses the wrapper script (arch/powerpc/boot/wrapper) to generate target
-image. The details of the build system is discussed in the next section.
-Currently, the following image format targets exist:
-
- cuImage.%: Backwards compatible uImage for older version of
- U-Boot (for versions that don't understand the device
- tree). This image embeds a device tree blob inside
- the image. The boot wrapper, kernel and device tree
- are all embedded inside the U-Boot uImage file format
- with boot wrapper code that extracts data from the old
- bd_info structure and loads the data into the device
- tree before jumping into the kernel.
- Because of the series of #ifdefs found in the
- bd_info structure used in the old U-Boot interfaces,
- cuImages are platform specific. Each specific
- U-Boot platform has a different platform init file
- which populates the embedded device tree with data
- from the platform specific bd_info file. The platform
- specific cuImage platform init code can be found in
- arch/powerpc/boot/cuboot.*.c. Selection of the correct
- cuImage init code for a specific board can be found in
- the wrapper structure.
- dtbImage.%: Similar to zImage, except device tree blob is embedded
- inside the image instead of provided by firmware. The
- output image file can be either an elf file or a flat
- binary depending on the platform.
- dtbImages are used on systems which do not have an
- interface for passing a device tree directly.
- dtbImages are similar to simpleImages except that
- dtbImages have platform specific code for extracting
- data from the board firmware, but simpleImages do not
- talk to the firmware at all.
- PlayStation 3 support uses dtbImage. So do Embedded
- Planet boards using the PlanetCore firmware. Board
- specific initialization code is typically found in a
- file named arch/powerpc/boot/<platform>.c; but this
- can be overridden by the wrapper script.
- simpleImage.%: Firmware independent compressed image that does not
- depend on any particular firmware interface and embeds
- a device tree blob. This image is a flat binary that
- can be loaded to any location in RAM and jumped to.
- Firmware cannot pass any configuration data to the
- kernel with this image type and it depends entirely on
- the embedded device tree for all information.
- The simpleImage is useful for booting systems with
- an unknown firmware interface or for booting from
- a debugger when no firmware is present (such as on
- the Xilinx Virtex platform). The only assumption that
- simpleImage makes is that RAM is correctly initialized
- and that the MMU is either off or has RAM mapped to
- base address 0.
- simpleImage also supports inserting special platform
- specific initialization code to the start of the bootup
- sequence. The virtex405 platform uses this feature to
- ensure that the cache is invalidated before caching
- is enabled. Platform specific initialization code is
- added as part of the wrapper script and is keyed on
- the image target name. For example, all
- simpleImage.virtex405-* targets will add the
- virtex405-head.S initialization code (This also means
- that the dts file for virtex405 targets should be
- named (virtex405-<board>.dts). Search the wrapper
- script for 'virtex405' and see the file
- arch/powerpc/boot/virtex405-head.S for details.
- treeImage.%; Image format for used with OpenBIOS firmware found
- on some ppc4xx hardware. This image embeds a device
- tree blob inside the image.
- uImage: Native image format used by U-Boot. The uImage target
- does not add any boot code. It just wraps a compressed
- vmlinux in the uImage data structure. This image
- requires a version of U-Boot that is able to pass
- a device tree to the kernel at boot. If using an older
- version of U-Boot, then you need to use a cuImage
- instead.
- zImage.%: Image format which does not embed a device tree.
- Used by OpenFirmware and other firmware interfaces
- which are able to supply a device tree. This image
- expects firmware to provide the device tree at boot.
- Typically, if you have general purpose PowerPC
- hardware then you want this image format.
-
-Image types which embed a device tree blob (simpleImage, dtbImage, treeImage,
-and cuImage) all generate the device tree blob from a file in the
-arch/powerpc/boot/dts/ directory. The Makefile selects the correct device
-tree source based on the name of the target. Therefore, if the kernel is
-built with 'make treeImage.walnut simpleImage.virtex405-ml403', then the
-build system will use arch/powerpc/boot/dts/walnut.dts to build
-treeImage.walnut and arch/powerpc/boot/dts/virtex405-ml403.dts to build
-the simpleImage.virtex405-ml403.
-
-Two special targets called 'zImage' and 'zImage.initrd' also exist. These
-targets build all the default images as selected by the kernel configuration.
-Default images are selected by the boot wrapper Makefile
-(arch/powerpc/boot/Makefile) by adding targets to the $image-y variable. Look
-at the Makefile to see which default image targets are available.
-
-How it is built
----------------
-arch/powerpc is designed to support multiplatform kernels, which means
-that a single vmlinux image can be booted on many different target boards.
-It also means that the boot wrapper must be able to wrap for many kinds of
-images on a single build. The design decision was made to not use any
-conditional compilation code (#ifdef, etc) in the boot wrapper source code.
-All of the boot wrapper pieces are buildable at any time regardless of the
-kernel configuration. Building all the wrapper bits on every kernel build
-also ensures that obscure parts of the wrapper are at the very least compile
-tested in a large variety of environments.
-
-The wrapper is adapted for different image types at link time by linking in
-just the wrapper bits that are appropriate for the image type. The 'wrapper
-script' (found in arch/powerpc/boot/wrapper) is called by the Makefile and
-is responsible for selecting the correct wrapper bits for the image type.
-The arguments are well documented in the script's comment block, so they
-are not repeated here. However, it is worth mentioning that the script
-uses the -p (platform) argument as the main method of deciding which wrapper
-bits to compile in. Look for the large 'case "$platform" in' block in the
-middle of the script. This is also the place where platform specific fixups
-can be selected by changing the link order.
-
-In particular, care should be taken when working with cuImages. cuImage
-wrapper bits are very board specific and care should be taken to make sure
-the target you are trying to build is supported by the wrapper bits.
--- /dev/null
+============
+CPU Families
+============
+
+This document tries to summarise some of the different cpu families that exist
+and are supported by arch/powerpc.
+
+
+Book3S (aka sPAPR)
+------------------
+
+- Hash MMU
+- Mix of 32 & 64 bit::
+
+ +--------------+ +----------------+
+ | Old POWER | --------------> | RS64 (threads) |
+ +--------------+ +----------------+
+ |
+ |
+ v
+ +--------------+ +----------------+ +------+
+ | 601 | --------------> | 603 | ---> | e300 |
+ +--------------+ +----------------+ +------+
+ | |
+ | |
+ v v
+ +--------------+ +----------------+ +-------+
+ | 604 | | 750 (G3) | ---> | 750CX |
+ +--------------+ +----------------+ +-------+
+ | | |
+ | | |
+ v v v
+ +--------------+ +----------------+ +-------+
+ | 620 (64 bit) | | 7400 | | 750CL |
+ +--------------+ +----------------+ +-------+
+ | | |
+ | | |
+ v v v
+ +--------------+ +----------------+ +-------+
+ | POWER3/630 | | 7410 | | 750FX |
+ +--------------+ +----------------+ +-------+
+ | |
+ | |
+ v v
+ +--------------+ +----------------+
+ | POWER3+ | | 7450 |
+ +--------------+ +----------------+
+ | |
+ | |
+ v v
+ +--------------+ +----------------+
+ | POWER4 | | 7455 |
+ +--------------+ +----------------+
+ | |
+ | |
+ v v
+ +--------------+ +-------+ +----------------+
+ | POWER4+ | --> | 970 | | 7447 |
+ +--------------+ +-------+ +----------------+
+ | | |
+ | | |
+ v v v
+ +--------------+ +-------+ +----------------+
+ | POWER5 | | 970FX | | 7448 |
+ +--------------+ +-------+ +----------------+
+ | | |
+ | | |
+ v v v
+ +--------------+ +-------+ +----------------+
+ | POWER5+ | | 970MP | | e600 |
+ +--------------+ +-------+ +----------------+
+ |
+ |
+ v
+ +--------------+
+ | POWER5++ |
+ +--------------+
+ |
+ |
+ v
+ +--------------+ +-------+
+ | POWER6 | <-?-> | Cell |
+ +--------------+ +-------+
+ |
+ |
+ v
+ +--------------+
+ | POWER7 |
+ +--------------+
+ |
+ |
+ v
+ +--------------+
+ | POWER7+ |
+ +--------------+
+ |
+ |
+ v
+ +--------------+
+ | POWER8 |
+ +--------------+
+
+
+ +---------------+
+ | PA6T (64 bit) |
+ +---------------+
+
+
+IBM BookE
+---------
+
+- Software loaded TLB.
+- All 32 bit::
+
+ +--------------+
+ | 401 |
+ +--------------+
+ |
+ |
+ v
+ +--------------+
+ | 403 |
+ +--------------+
+ |
+ |
+ v
+ +--------------+
+ | 405 |
+ +--------------+
+ |
+ |
+ v
+ +--------------+
+ | 440 |
+ +--------------+
+ |
+ |
+ v
+ +--------------+ +----------------+
+ | 450 | --> | BG/P |
+ +--------------+ +----------------+
+ |
+ |
+ v
+ +--------------+
+ | 460 |
+ +--------------+
+ |
+ |
+ v
+ +--------------+
+ | 476 |
+ +--------------+
+
+
+Motorola/Freescale 8xx
+----------------------
+
+- Software loaded with hardware assist.
+- All 32 bit::
+
+ +-------------+
+ | MPC8xx Core |
+ +-------------+
+
+
+Freescale BookE
+---------------
+
+- Software loaded TLB.
+- e6500 adds HW loaded indirect TLB entries.
+- Mix of 32 & 64 bit::
+
+ +--------------+
+ | e200 |
+ +--------------+
+
+
+ +--------------------------------+
+ | e500 |
+ +--------------------------------+
+ |
+ |
+ v
+ +--------------------------------+
+ | e500v2 |
+ +--------------------------------+
+ |
+ |
+ v
+ +--------------------------------+
+ | e500mc (Book3e) |
+ +--------------------------------+
+ |
+ |
+ v
+ +--------------------------------+
+ | e5500 (64 bit) |
+ +--------------------------------+
+ |
+ |
+ v
+ +--------------------------------+
+ | e6500 (HW TLB) (Multithreaded) |
+ +--------------------------------+
+
+
+IBM A2 core
+-----------
+
+- Book3E, software loaded TLB + HW loaded indirect TLB entries.
+- 64 bit::
+
+ +--------------+ +----------------+
+ | A2 core | --> | WSP |
+ +--------------+ +----------------+
+ |
+ |
+ v
+ +--------------+
+ | BG/Q |
+ +--------------+
+++ /dev/null
-CPU Families
-============
-
-This document tries to summarise some of the different cpu families that exist
-and are supported by arch/powerpc.
-
-
-Book3S (aka sPAPR)
-------------------
-
- - Hash MMU
- - Mix of 32 & 64 bit
-
- +--------------+ +----------------+
- | Old POWER | --------------> | RS64 (threads) |
- +--------------+ +----------------+
- |
- |
- v
- +--------------+ +----------------+ +------+
- | 601 | --------------> | 603 | ---> | e300 |
- +--------------+ +----------------+ +------+
- | |
- | |
- v v
- +--------------+ +----------------+ +-------+
- | 604 | | 750 (G3) | ---> | 750CX |
- +--------------+ +----------------+ +-------+
- | | |
- | | |
- v v v
- +--------------+ +----------------+ +-------+
- | 620 (64 bit) | | 7400 | | 750CL |
- +--------------+ +----------------+ +-------+
- | | |
- | | |
- v v v
- +--------------+ +----------------+ +-------+
- | POWER3/630 | | 7410 | | 750FX |
- +--------------+ +----------------+ +-------+
- | |
- | |
- v v
- +--------------+ +----------------+
- | POWER3+ | | 7450 |
- +--------------+ +----------------+
- | |
- | |
- v v
- +--------------+ +----------------+
- | POWER4 | | 7455 |
- +--------------+ +----------------+
- | |
- | |
- v v
- +--------------+ +-------+ +----------------+
- | POWER4+ | --> | 970 | | 7447 |
- +--------------+ +-------+ +----------------+
- | | |
- | | |
- v v v
- +--------------+ +-------+ +----------------+
- | POWER5 | | 970FX | | 7448 |
- +--------------+ +-------+ +----------------+
- | | |
- | | |
- v v v
- +--------------+ +-------+ +----------------+
- | POWER5+ | | 970MP | | e600 |
- +--------------+ +-------+ +----------------+
- |
- |
- v
- +--------------+
- | POWER5++ |
- +--------------+
- |
- |
- v
- +--------------+ +-------+
- | POWER6 | <-?-> | Cell |
- +--------------+ +-------+
- |
- |
- v
- +--------------+
- | POWER7 |
- +--------------+
- |
- |
- v
- +--------------+
- | POWER7+ |
- +--------------+
- |
- |
- v
- +--------------+
- | POWER8 |
- +--------------+
-
-
- +---------------+
- | PA6T (64 bit) |
- +---------------+
-
-
-IBM BookE
----------
-
- - Software loaded TLB.
- - All 32 bit
-
- +--------------+
- | 401 |
- +--------------+
- |
- |
- v
- +--------------+
- | 403 |
- +--------------+
- |
- |
- v
- +--------------+
- | 405 |
- +--------------+
- |
- |
- v
- +--------------+
- | 440 |
- +--------------+
- |
- |
- v
- +--------------+ +----------------+
- | 450 | --> | BG/P |
- +--------------+ +----------------+
- |
- |
- v
- +--------------+
- | 460 |
- +--------------+
- |
- |
- v
- +--------------+
- | 476 |
- +--------------+
-
-
-Motorola/Freescale 8xx
-----------------------
-
- - Software loaded with hardware assist.
- - All 32 bit
-
- +-------------+
- | MPC8xx Core |
- +-------------+
-
-
-Freescale BookE
----------------
-
- - Software loaded TLB.
- - e6500 adds HW loaded indirect TLB entries.
- - Mix of 32 & 64 bit
-
- +--------------+
- | e200 |
- +--------------+
-
-
- +--------------------------------+
- | e500 |
- +--------------------------------+
- |
- |
- v
- +--------------------------------+
- | e500v2 |
- +--------------------------------+
- |
- |
- v
- +--------------------------------+
- | e500mc (Book3e) |
- +--------------------------------+
- |
- |
- v
- +--------------------------------+
- | e5500 (64 bit) |
- +--------------------------------+
- |
- |
- v
- +--------------------------------+
- | e6500 (HW TLB) (Multithreaded) |
- +--------------------------------+
-
-
-IBM A2 core
------------
-
- - Book3E, software loaded TLB + HW loaded indirect TLB entries.
- - 64 bit
-
- +--------------+ +----------------+
- | A2 core | --> | WSP |
- +--------------+ +----------------+
- |
- |
- v
- +--------------+
- | BG/Q |
- +--------------+
--- /dev/null
+============
+CPU Features
+============
+
+Hollis Blanchard <hollis@austin.ibm.com>
+5 Jun 2002
+
+This document describes the system (including self-modifying code) used in the
+PPC Linux kernel to support a variety of PowerPC CPUs without requiring
+compile-time selection.
+
+Early in the boot process the ppc32 kernel detects the current CPU type and
+chooses a set of features accordingly. Some examples include Altivec support,
+split instruction and data caches, and if the CPU supports the DOZE and NAP
+sleep modes.
+
+Detection of the feature set is simple. A list of processors can be found in
+arch/powerpc/kernel/cputable.c. The PVR register is masked and compared with
+each value in the list. If a match is found, the cpu_features of cur_cpu_spec
+is assigned to the feature bitmask for this processor and a __setup_cpu
+function is called.
+
+C code may test 'cur_cpu_spec[smp_processor_id()]->cpu_features' for a
+particular feature bit. This is done in quite a few places, for example
+in ppc_setup_l2cr().
+
+Implementing cpufeatures in assembly is a little more involved. There are
+several paths that are performance-critical and would suffer if an array
+index, structure dereference, and conditional branch were added. To avoid the
+performance penalty but still allow for runtime (rather than compile-time) CPU
+selection, unused code is replaced by 'nop' instructions. This nop'ing is
+based on CPU 0's capabilities, so a multi-processor system with non-identical
+processors will not work (but such a system would likely have other problems
+anyways).
+
+After detecting the processor type, the kernel patches out sections of code
+that shouldn't be used by writing nop's over it. Using cpufeatures requires
+just 2 macros (found in arch/powerpc/include/asm/cputable.h), as seen in head.S
+transfer_to_handler::
+
+ #ifdef CONFIG_ALTIVEC
+ BEGIN_FTR_SECTION
+ mfspr r22,SPRN_VRSAVE /* if G4, save vrsave register value */
+ stw r22,THREAD_VRSAVE(r23)
+ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
+ #endif /* CONFIG_ALTIVEC */
+
+If CPU 0 supports Altivec, the code is left untouched. If it doesn't, both
+instructions are replaced with nop's.
+
+The END_FTR_SECTION macro has two simpler variations: END_FTR_SECTION_IFSET
+and END_FTR_SECTION_IFCLR. These simply test if a flag is set (in
+cur_cpu_spec[0]->cpu_features) or is cleared, respectively. These two macros
+should be used in the majority of cases.
+
+The END_FTR_SECTION macros are implemented by storing information about this
+code in the '__ftr_fixup' ELF section. When do_cpu_ftr_fixups
+(arch/powerpc/kernel/misc.S) is invoked, it will iterate over the records in
+__ftr_fixup, and if the required feature is not present it will loop writing
+nop's from each BEGIN_FTR_SECTION to END_FTR_SECTION.
+++ /dev/null
-Hollis Blanchard <hollis@austin.ibm.com>
-5 Jun 2002
-
-This document describes the system (including self-modifying code) used in the
-PPC Linux kernel to support a variety of PowerPC CPUs without requiring
-compile-time selection.
-
-Early in the boot process the ppc32 kernel detects the current CPU type and
-chooses a set of features accordingly. Some examples include Altivec support,
-split instruction and data caches, and if the CPU supports the DOZE and NAP
-sleep modes.
-
-Detection of the feature set is simple. A list of processors can be found in
-arch/powerpc/kernel/cputable.c. The PVR register is masked and compared with
-each value in the list. If a match is found, the cpu_features of cur_cpu_spec
-is assigned to the feature bitmask for this processor and a __setup_cpu
-function is called.
-
-C code may test 'cur_cpu_spec[smp_processor_id()]->cpu_features' for a
-particular feature bit. This is done in quite a few places, for example
-in ppc_setup_l2cr().
-
-Implementing cpufeatures in assembly is a little more involved. There are
-several paths that are performance-critical and would suffer if an array
-index, structure dereference, and conditional branch were added. To avoid the
-performance penalty but still allow for runtime (rather than compile-time) CPU
-selection, unused code is replaced by 'nop' instructions. This nop'ing is
-based on CPU 0's capabilities, so a multi-processor system with non-identical
-processors will not work (but such a system would likely have other problems
-anyways).
-
-After detecting the processor type, the kernel patches out sections of code
-that shouldn't be used by writing nop's over it. Using cpufeatures requires
-just 2 macros (found in arch/powerpc/include/asm/cputable.h), as seen in head.S
-transfer_to_handler:
-
- #ifdef CONFIG_ALTIVEC
- BEGIN_FTR_SECTION
- mfspr r22,SPRN_VRSAVE /* if G4, save vrsave register value */
- stw r22,THREAD_VRSAVE(r23)
- END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC)
- #endif /* CONFIG_ALTIVEC */
-
-If CPU 0 supports Altivec, the code is left untouched. If it doesn't, both
-instructions are replaced with nop's.
-
-The END_FTR_SECTION macro has two simpler variations: END_FTR_SECTION_IFSET
-and END_FTR_SECTION_IFCLR. These simply test if a flag is set (in
-cur_cpu_spec[0]->cpu_features) or is cleared, respectively. These two macros
-should be used in the majority of cases.
-
-The END_FTR_SECTION macros are implemented by storing information about this
-code in the '__ftr_fixup' ELF section. When do_cpu_ftr_fixups
-(arch/powerpc/kernel/misc.S) is invoked, it will iterate over the records in
-__ftr_fixup, and if the required feature is not present it will loop writing
-nop's from each BEGIN_FTR_SECTION to END_FTR_SECTION.
--- /dev/null
+====================================
+Coherent Accelerator Interface (CXL)
+====================================
+
+Introduction
+============
+
+ The coherent accelerator interface is designed to allow the
+ coherent connection of accelerators (FPGAs and other devices) to a
+ POWER system. These devices need to adhere to the Coherent
+ Accelerator Interface Architecture (CAIA).
+
+ IBM refers to this as the Coherent Accelerator Processor Interface
+ or CAPI. In the kernel it's referred to by the name CXL to avoid
+ confusion with the ISDN CAPI subsystem.
+
+ Coherent in this context means that the accelerator and CPUs can
+ both access system memory directly and with the same effective
+ addresses.
+
+
+Hardware overview
+=================
+
+ ::
+
+ POWER8/9 FPGA
+ +----------+ +---------+
+ | | | |
+ | CPU | | AFU |
+ | | | |
+ | | | |
+ | | | |
+ +----------+ +---------+
+ | PHB | | |
+ | +------+ | PSL |
+ | | CAPP |<------>| |
+ +---+------+ PCIE +---------+
+
+ The POWER8/9 chip has a Coherently Attached Processor Proxy (CAPP)
+ unit which is part of the PCIe Host Bridge (PHB). This is managed
+ by Linux by calls into OPAL. Linux doesn't directly program the
+ CAPP.
+
+ The FPGA (or coherently attached device) consists of two parts.
+ The POWER Service Layer (PSL) and the Accelerator Function Unit
+ (AFU). The AFU is used to implement specific functionality behind
+ the PSL. The PSL, among other things, provides memory address
+ translation services to allow each AFU direct access to userspace
+ memory.
+
+ The AFU is the core part of the accelerator (eg. the compression,
+ crypto etc function). The kernel has no knowledge of the function
+ of the AFU. Only userspace interacts directly with the AFU.
+
+ The PSL provides the translation and interrupt services that the
+ AFU needs. This is what the kernel interacts with. For example, if
+ the AFU needs to read a particular effective address, it sends
+ that address to the PSL, the PSL then translates it, fetches the
+ data from memory and returns it to the AFU. If the PSL has a
+ translation miss, it interrupts the kernel and the kernel services
+ the fault. The context to which this fault is serviced is based on
+ who owns that acceleration function.
+
+ - POWER8 and PSL Version 8 are compliant to the CAIA Version 1.0.
+ - POWER9 and PSL Version 9 are compliant to the CAIA Version 2.0.
+
+ This PSL Version 9 provides new features such as:
+
+ * Interaction with the nest MMU on the P9 chip.
+ * Native DMA support.
+ * Supports sending ASB_Notify messages for host thread wakeup.
+ * Supports Atomic operations.
+ * etc.
+
+ Cards with a PSL9 won't work on a POWER8 system and cards with a
+ PSL8 won't work on a POWER9 system.
+
+AFU Modes
+=========
+
+ There are two programming modes supported by the AFU. Dedicated
+ and AFU directed. AFU may support one or both modes.
+
+ When using dedicated mode only one MMU context is supported. In
+ this mode, only one userspace process can use the accelerator at
+ time.
+
+ When using AFU directed mode, up to 16K simultaneous contexts can
+ be supported. This means up to 16K simultaneous userspace
+ applications may use the accelerator (although specific AFUs may
+ support fewer). In this mode, the AFU sends a 16 bit context ID
+ with each of its requests. This tells the PSL which context is
+ associated with each operation. If the PSL can't translate an
+ operation, the ID can also be accessed by the kernel so it can
+ determine the userspace context associated with an operation.
+
+
+MMIO space
+==========
+
+ A portion of the accelerator MMIO space can be directly mapped
+ from the AFU to userspace. Either the whole space can be mapped or
+ just a per context portion. The hardware is self describing, hence
+ the kernel can determine the offset and size of the per context
+ portion.
+
+
+Interrupts
+==========
+
+ AFUs may generate interrupts that are destined for userspace. These
+ are received by the kernel as hardware interrupts and passed onto
+ userspace by a read syscall documented below.
+
+ Data storage faults and error interrupts are handled by the kernel
+ driver.
+
+
+Work Element Descriptor (WED)
+=============================
+
+ The WED is a 64-bit parameter passed to the AFU when a context is
+ started. Its format is up to the AFU hence the kernel has no
+ knowledge of what it represents. Typically it will be the
+ effective address of a work queue or status block where the AFU
+ and userspace can share control and status information.
+
+
+
+
+User API
+========
+
+1. AFU character devices
+
+ For AFUs operating in AFU directed mode, two character device
+ files will be created. /dev/cxl/afu0.0m will correspond to a
+ master context and /dev/cxl/afu0.0s will correspond to a slave
+ context. Master contexts have access to the full MMIO space an
+ AFU provides. Slave contexts have access to only the per process
+ MMIO space an AFU provides.
+
+ For AFUs operating in dedicated process mode, the driver will
+ only create a single character device per AFU called
+ /dev/cxl/afu0.0d. This will have access to the entire MMIO space
+ that the AFU provides (like master contexts in AFU directed).
+
+ The types described below are defined in include/uapi/misc/cxl.h
+
+ The following file operations are supported on both slave and
+ master devices.
+
+ A userspace library libcxl is available here:
+
+ https://github.com/ibm-capi/libcxl
+
+ This provides a C interface to this kernel API.
+
+open
+----
+
+ Opens the device and allocates a file descriptor to be used with
+ the rest of the API.
+
+ A dedicated mode AFU only has one context and only allows the
+ device to be opened once.
+
+ An AFU directed mode AFU can have many contexts, the device can be
+ opened once for each context that is available.
+
+ When all available contexts are allocated the open call will fail
+ and return -ENOSPC.
+
+ Note:
+ IRQs need to be allocated for each context, which may limit
+ the number of contexts that can be created, and therefore
+ how many times the device can be opened. The POWER8 CAPP
+ supports 2040 IRQs and 3 are used by the kernel, so 2037 are
+ left. If 1 IRQ is needed per context, then only 2037
+ contexts can be allocated. If 4 IRQs are needed per context,
+ then only 2037/4 = 509 contexts can be allocated.
+
+
+ioctl
+-----
+
+ CXL_IOCTL_START_WORK:
+ Starts the AFU context and associates it with the current
+ process. Once this ioctl is successfully executed, all memory
+ mapped into this process is accessible to this AFU context
+ using the same effective addresses. No additional calls are
+ required to map/unmap memory. The AFU memory context will be
+ updated as userspace allocates and frees memory. This ioctl
+ returns once the AFU context is started.
+
+ Takes a pointer to a struct cxl_ioctl_start_work
+
+ ::
+
+ struct cxl_ioctl_start_work {
+ __u64 flags;
+ __u64 work_element_descriptor;
+ __u64 amr;
+ __s16 num_interrupts;
+ __s16 reserved1;
+ __s32 reserved2;
+ __u64 reserved3;
+ __u64 reserved4;
+ __u64 reserved5;
+ __u64 reserved6;
+ };
+
+ flags:
+ Indicates which optional fields in the structure are
+ valid.
+
+ work_element_descriptor:
+ The Work Element Descriptor (WED) is a 64-bit argument
+ defined by the AFU. Typically this is an effective
+ address pointing to an AFU specific structure
+ describing what work to perform.
+
+ amr:
+ Authority Mask Register (AMR), same as the powerpc
+ AMR. This field is only used by the kernel when the
+ corresponding CXL_START_WORK_AMR value is specified in
+ flags. If not specified the kernel will use a default
+ value of 0.
+
+ num_interrupts:
+ Number of userspace interrupts to request. This field
+ is only used by the kernel when the corresponding
+ CXL_START_WORK_NUM_IRQS value is specified in flags.
+ If not specified the minimum number required by the
+ AFU will be allocated. The min and max number can be
+ obtained from sysfs.
+
+ reserved fields:
+ For ABI padding and future extensions
+
+ CXL_IOCTL_GET_PROCESS_ELEMENT:
+ Get the current context id, also known as the process element.
+ The value is returned from the kernel as a __u32.
+
+
+mmap
+----
+
+ An AFU may have an MMIO space to facilitate communication with the
+ AFU. If it does, the MMIO space can be accessed via mmap. The size
+ and contents of this area are specific to the particular AFU. The
+ size can be discovered via sysfs.
+
+ In AFU directed mode, master contexts are allowed to map all of
+ the MMIO space and slave contexts are allowed to only map the per
+ process MMIO space associated with the context. In dedicated
+ process mode the entire MMIO space can always be mapped.
+
+ This mmap call must be done after the START_WORK ioctl.
+
+ Care should be taken when accessing MMIO space. Only 32 and 64-bit
+ accesses are supported by POWER8. Also, the AFU will be designed
+ with a specific endianness, so all MMIO accesses should consider
+ endianness (recommend endian(3) variants like: le64toh(),
+ be64toh() etc). These endian issues equally apply to shared memory
+ queues the WED may describe.
+
+
+read
+----
+
+ Reads events from the AFU. Blocks if no events are pending
+ (unless O_NONBLOCK is supplied). Returns -EIO in the case of an
+ unrecoverable error or if the card is removed.
+
+ read() will always return an integral number of events.
+
+ The buffer passed to read() must be at least 4K bytes.
+
+ The result of the read will be a buffer of one or more events,
+ each event is of type struct cxl_event, of varying size::
+
+ struct cxl_event {
+ struct cxl_event_header header;
+ union {
+ struct cxl_event_afu_interrupt irq;
+ struct cxl_event_data_storage fault;
+ struct cxl_event_afu_error afu_error;
+ };
+ };
+
+ The struct cxl_event_header is defined as
+
+ ::
+
+ struct cxl_event_header {
+ __u16 type;
+ __u16 size;
+ __u16 process_element;
+ __u16 reserved1;
+ };
+
+ type:
+ This defines the type of event. The type determines how
+ the rest of the event is structured. These types are
+ described below and defined by enum cxl_event_type.
+
+ size:
+ This is the size of the event in bytes including the
+ struct cxl_event_header. The start of the next event can
+ be found at this offset from the start of the current
+ event.
+
+ process_element:
+ Context ID of the event.
+
+ reserved field:
+ For future extensions and padding.
+
+ If the event type is CXL_EVENT_AFU_INTERRUPT then the event
+ structure is defined as
+
+ ::
+
+ struct cxl_event_afu_interrupt {
+ __u16 flags;
+ __u16 irq; /* Raised AFU interrupt number */
+ __u32 reserved1;
+ };
+
+ flags:
+ These flags indicate which optional fields are present
+ in this struct. Currently all fields are mandatory.
+
+ irq:
+ The IRQ number sent by the AFU.
+
+ reserved field:
+ For future extensions and padding.
+
+ If the event type is CXL_EVENT_DATA_STORAGE then the event
+ structure is defined as
+
+ ::
+
+ struct cxl_event_data_storage {
+ __u16 flags;
+ __u16 reserved1;
+ __u32 reserved2;
+ __u64 addr;
+ __u64 dsisr;
+ __u64 reserved3;
+ };
+
+ flags:
+ These flags indicate which optional fields are present in
+ this struct. Currently all fields are mandatory.
+
+ address:
+ The address that the AFU unsuccessfully attempted to
+ access. Valid accesses will be handled transparently by the
+ kernel but invalid accesses will generate this event.
+
+ dsisr:
+ This field gives information on the type of fault. It is a
+ copy of the DSISR from the PSL hardware when the address
+ fault occurred. The form of the DSISR is as defined in the
+ CAIA.
+
+ reserved fields:
+ For future extensions
+
+ If the event type is CXL_EVENT_AFU_ERROR then the event structure
+ is defined as
+
+ ::
+
+ struct cxl_event_afu_error {
+ __u16 flags;
+ __u16 reserved1;
+ __u32 reserved2;
+ __u64 error;
+ };
+
+ flags:
+ These flags indicate which optional fields are present in
+ this struct. Currently all fields are Mandatory.
+
+ error:
+ Error status from the AFU. Defined by the AFU.
+
+ reserved fields:
+ For future extensions and padding
+
+
+2. Card character device (powerVM guest only)
+
+ In a powerVM guest, an extra character device is created for the
+ card. The device is only used to write (flash) a new image on the
+ FPGA accelerator. Once the image is written and verified, the
+ device tree is updated and the card is reset to reload the updated
+ image.
+
+open
+----
+
+ Opens the device and allocates a file descriptor to be used with
+ the rest of the API. The device can only be opened once.
+
+ioctl
+-----
+
+CXL_IOCTL_DOWNLOAD_IMAGE / CXL_IOCTL_VALIDATE_IMAGE:
+ Starts and controls flashing a new FPGA image. Partial
+ reconfiguration is not supported (yet), so the image must contain
+ a copy of the PSL and AFU(s). Since an image can be quite large,
+ the caller may have to iterate, splitting the image in smaller
+ chunks.
+
+ Takes a pointer to a struct cxl_adapter_image::
+
+ struct cxl_adapter_image {
+ __u64 flags;
+ __u64 data;
+ __u64 len_data;
+ __u64 len_image;
+ __u64 reserved1;
+ __u64 reserved2;
+ __u64 reserved3;
+ __u64 reserved4;
+ };
+
+ flags:
+ These flags indicate which optional fields are present in
+ this struct. Currently all fields are mandatory.
+
+ data:
+ Pointer to a buffer with part of the image to write to the
+ card.
+
+ len_data:
+ Size of the buffer pointed to by data.
+
+ len_image:
+ Full size of the image.
+
+
+Sysfs Class
+===========
+
+ A cxl sysfs class is added under /sys/class/cxl to facilitate
+ enumeration and tuning of the accelerators. Its layout is
+ described in Documentation/ABI/testing/sysfs-class-cxl
+
+
+Udev rules
+==========
+
+ The following udev rules could be used to create a symlink to the
+ most logical chardev to use in any programming mode (afuX.Yd for
+ dedicated, afuX.Ys for afu directed), since the API is virtually
+ identical for each::
+
+ SUBSYSTEM=="cxl", ATTRS{mode}=="dedicated_process", SYMLINK="cxl/%b"
+ SUBSYSTEM=="cxl", ATTRS{mode}=="afu_directed", \
+ KERNEL=="afu[0-9]*.[0-9]*s", SYMLINK="cxl/%b"
+++ /dev/null
-Coherent Accelerator Interface (CXL)
-====================================
-
-Introduction
-============
-
- The coherent accelerator interface is designed to allow the
- coherent connection of accelerators (FPGAs and other devices) to a
- POWER system. These devices need to adhere to the Coherent
- Accelerator Interface Architecture (CAIA).
-
- IBM refers to this as the Coherent Accelerator Processor Interface
- or CAPI. In the kernel it's referred to by the name CXL to avoid
- confusion with the ISDN CAPI subsystem.
-
- Coherent in this context means that the accelerator and CPUs can
- both access system memory directly and with the same effective
- addresses.
-
-
-Hardware overview
-=================
-
- POWER8/9 FPGA
- +----------+ +---------+
- | | | |
- | CPU | | AFU |
- | | | |
- | | | |
- | | | |
- +----------+ +---------+
- | PHB | | |
- | +------+ | PSL |
- | | CAPP |<------>| |
- +---+------+ PCIE +---------+
-
- The POWER8/9 chip has a Coherently Attached Processor Proxy (CAPP)
- unit which is part of the PCIe Host Bridge (PHB). This is managed
- by Linux by calls into OPAL. Linux doesn't directly program the
- CAPP.
-
- The FPGA (or coherently attached device) consists of two parts.
- The POWER Service Layer (PSL) and the Accelerator Function Unit
- (AFU). The AFU is used to implement specific functionality behind
- the PSL. The PSL, among other things, provides memory address
- translation services to allow each AFU direct access to userspace
- memory.
-
- The AFU is the core part of the accelerator (eg. the compression,
- crypto etc function). The kernel has no knowledge of the function
- of the AFU. Only userspace interacts directly with the AFU.
-
- The PSL provides the translation and interrupt services that the
- AFU needs. This is what the kernel interacts with. For example, if
- the AFU needs to read a particular effective address, it sends
- that address to the PSL, the PSL then translates it, fetches the
- data from memory and returns it to the AFU. If the PSL has a
- translation miss, it interrupts the kernel and the kernel services
- the fault. The context to which this fault is serviced is based on
- who owns that acceleration function.
-
- POWER8 <-----> PSL Version 8 is compliant to the CAIA Version 1.0.
- POWER9 <-----> PSL Version 9 is compliant to the CAIA Version 2.0.
- This PSL Version 9 provides new features such as:
- * Interaction with the nest MMU on the P9 chip.
- * Native DMA support.
- * Supports sending ASB_Notify messages for host thread wakeup.
- * Supports Atomic operations.
- * ....
-
- Cards with a PSL9 won't work on a POWER8 system and cards with a
- PSL8 won't work on a POWER9 system.
-
-AFU Modes
-=========
-
- There are two programming modes supported by the AFU. Dedicated
- and AFU directed. AFU may support one or both modes.
-
- When using dedicated mode only one MMU context is supported. In
- this mode, only one userspace process can use the accelerator at
- time.
-
- When using AFU directed mode, up to 16K simultaneous contexts can
- be supported. This means up to 16K simultaneous userspace
- applications may use the accelerator (although specific AFUs may
- support fewer). In this mode, the AFU sends a 16 bit context ID
- with each of its requests. This tells the PSL which context is
- associated with each operation. If the PSL can't translate an
- operation, the ID can also be accessed by the kernel so it can
- determine the userspace context associated with an operation.
-
-
-MMIO space
-==========
-
- A portion of the accelerator MMIO space can be directly mapped
- from the AFU to userspace. Either the whole space can be mapped or
- just a per context portion. The hardware is self describing, hence
- the kernel can determine the offset and size of the per context
- portion.
-
-
-Interrupts
-==========
-
- AFUs may generate interrupts that are destined for userspace. These
- are received by the kernel as hardware interrupts and passed onto
- userspace by a read syscall documented below.
-
- Data storage faults and error interrupts are handled by the kernel
- driver.
-
-
-Work Element Descriptor (WED)
-=============================
-
- The WED is a 64-bit parameter passed to the AFU when a context is
- started. Its format is up to the AFU hence the kernel has no
- knowledge of what it represents. Typically it will be the
- effective address of a work queue or status block where the AFU
- and userspace can share control and status information.
-
-
-
-
-User API
-========
-
-1. AFU character devices
-
- For AFUs operating in AFU directed mode, two character device
- files will be created. /dev/cxl/afu0.0m will correspond to a
- master context and /dev/cxl/afu0.0s will correspond to a slave
- context. Master contexts have access to the full MMIO space an
- AFU provides. Slave contexts have access to only the per process
- MMIO space an AFU provides.
-
- For AFUs operating in dedicated process mode, the driver will
- only create a single character device per AFU called
- /dev/cxl/afu0.0d. This will have access to the entire MMIO space
- that the AFU provides (like master contexts in AFU directed).
-
- The types described below are defined in include/uapi/misc/cxl.h
-
- The following file operations are supported on both slave and
- master devices.
-
- A userspace library libcxl is available here:
- https://github.com/ibm-capi/libcxl
- This provides a C interface to this kernel API.
-
-open
-----
-
- Opens the device and allocates a file descriptor to be used with
- the rest of the API.
-
- A dedicated mode AFU only has one context and only allows the
- device to be opened once.
-
- An AFU directed mode AFU can have many contexts, the device can be
- opened once for each context that is available.
-
- When all available contexts are allocated the open call will fail
- and return -ENOSPC.
-
- Note: IRQs need to be allocated for each context, which may limit
- the number of contexts that can be created, and therefore
- how many times the device can be opened. The POWER8 CAPP
- supports 2040 IRQs and 3 are used by the kernel, so 2037 are
- left. If 1 IRQ is needed per context, then only 2037
- contexts can be allocated. If 4 IRQs are needed per context,
- then only 2037/4 = 509 contexts can be allocated.
-
-
-ioctl
------
-
- CXL_IOCTL_START_WORK:
- Starts the AFU context and associates it with the current
- process. Once this ioctl is successfully executed, all memory
- mapped into this process is accessible to this AFU context
- using the same effective addresses. No additional calls are
- required to map/unmap memory. The AFU memory context will be
- updated as userspace allocates and frees memory. This ioctl
- returns once the AFU context is started.
-
- Takes a pointer to a struct cxl_ioctl_start_work:
-
- struct cxl_ioctl_start_work {
- __u64 flags;
- __u64 work_element_descriptor;
- __u64 amr;
- __s16 num_interrupts;
- __s16 reserved1;
- __s32 reserved2;
- __u64 reserved3;
- __u64 reserved4;
- __u64 reserved5;
- __u64 reserved6;
- };
-
- flags:
- Indicates which optional fields in the structure are
- valid.
-
- work_element_descriptor:
- The Work Element Descriptor (WED) is a 64-bit argument
- defined by the AFU. Typically this is an effective
- address pointing to an AFU specific structure
- describing what work to perform.
-
- amr:
- Authority Mask Register (AMR), same as the powerpc
- AMR. This field is only used by the kernel when the
- corresponding CXL_START_WORK_AMR value is specified in
- flags. If not specified the kernel will use a default
- value of 0.
-
- num_interrupts:
- Number of userspace interrupts to request. This field
- is only used by the kernel when the corresponding
- CXL_START_WORK_NUM_IRQS value is specified in flags.
- If not specified the minimum number required by the
- AFU will be allocated. The min and max number can be
- obtained from sysfs.
-
- reserved fields:
- For ABI padding and future extensions
-
- CXL_IOCTL_GET_PROCESS_ELEMENT:
- Get the current context id, also known as the process element.
- The value is returned from the kernel as a __u32.
-
-
-mmap
-----
-
- An AFU may have an MMIO space to facilitate communication with the
- AFU. If it does, the MMIO space can be accessed via mmap. The size
- and contents of this area are specific to the particular AFU. The
- size can be discovered via sysfs.
-
- In AFU directed mode, master contexts are allowed to map all of
- the MMIO space and slave contexts are allowed to only map the per
- process MMIO space associated with the context. In dedicated
- process mode the entire MMIO space can always be mapped.
-
- This mmap call must be done after the START_WORK ioctl.
-
- Care should be taken when accessing MMIO space. Only 32 and 64-bit
- accesses are supported by POWER8. Also, the AFU will be designed
- with a specific endianness, so all MMIO accesses should consider
- endianness (recommend endian(3) variants like: le64toh(),
- be64toh() etc). These endian issues equally apply to shared memory
- queues the WED may describe.
-
-
-read
-----
-
- Reads events from the AFU. Blocks if no events are pending
- (unless O_NONBLOCK is supplied). Returns -EIO in the case of an
- unrecoverable error or if the card is removed.
-
- read() will always return an integral number of events.
-
- The buffer passed to read() must be at least 4K bytes.
-
- The result of the read will be a buffer of one or more events,
- each event is of type struct cxl_event, of varying size.
-
- struct cxl_event {
- struct cxl_event_header header;
- union {
- struct cxl_event_afu_interrupt irq;
- struct cxl_event_data_storage fault;
- struct cxl_event_afu_error afu_error;
- };
- };
-
- The struct cxl_event_header is defined as:
-
- struct cxl_event_header {
- __u16 type;
- __u16 size;
- __u16 process_element;
- __u16 reserved1;
- };
-
- type:
- This defines the type of event. The type determines how
- the rest of the event is structured. These types are
- described below and defined by enum cxl_event_type.
-
- size:
- This is the size of the event in bytes including the
- struct cxl_event_header. The start of the next event can
- be found at this offset from the start of the current
- event.
-
- process_element:
- Context ID of the event.
-
- reserved field:
- For future extensions and padding.
-
- If the event type is CXL_EVENT_AFU_INTERRUPT then the event
- structure is defined as:
-
- struct cxl_event_afu_interrupt {
- __u16 flags;
- __u16 irq; /* Raised AFU interrupt number */
- __u32 reserved1;
- };
-
- flags:
- These flags indicate which optional fields are present
- in this struct. Currently all fields are mandatory.
-
- irq:
- The IRQ number sent by the AFU.
-
- reserved field:
- For future extensions and padding.
-
- If the event type is CXL_EVENT_DATA_STORAGE then the event
- structure is defined as:
-
- struct cxl_event_data_storage {
- __u16 flags;
- __u16 reserved1;
- __u32 reserved2;
- __u64 addr;
- __u64 dsisr;
- __u64 reserved3;
- };
-
- flags:
- These flags indicate which optional fields are present in
- this struct. Currently all fields are mandatory.
-
- address:
- The address that the AFU unsuccessfully attempted to
- access. Valid accesses will be handled transparently by the
- kernel but invalid accesses will generate this event.
-
- dsisr:
- This field gives information on the type of fault. It is a
- copy of the DSISR from the PSL hardware when the address
- fault occurred. The form of the DSISR is as defined in the
- CAIA.
-
- reserved fields:
- For future extensions
-
- If the event type is CXL_EVENT_AFU_ERROR then the event structure
- is defined as:
-
- struct cxl_event_afu_error {
- __u16 flags;
- __u16 reserved1;
- __u32 reserved2;
- __u64 error;
- };
-
- flags:
- These flags indicate which optional fields are present in
- this struct. Currently all fields are Mandatory.
-
- error:
- Error status from the AFU. Defined by the AFU.
-
- reserved fields:
- For future extensions and padding
-
-
-2. Card character device (powerVM guest only)
-
- In a powerVM guest, an extra character device is created for the
- card. The device is only used to write (flash) a new image on the
- FPGA accelerator. Once the image is written and verified, the
- device tree is updated and the card is reset to reload the updated
- image.
-
-open
-----
-
- Opens the device and allocates a file descriptor to be used with
- the rest of the API. The device can only be opened once.
-
-ioctl
------
-
-CXL_IOCTL_DOWNLOAD_IMAGE:
-CXL_IOCTL_VALIDATE_IMAGE:
- Starts and controls flashing a new FPGA image. Partial
- reconfiguration is not supported (yet), so the image must contain
- a copy of the PSL and AFU(s). Since an image can be quite large,
- the caller may have to iterate, splitting the image in smaller
- chunks.
-
- Takes a pointer to a struct cxl_adapter_image:
- struct cxl_adapter_image {
- __u64 flags;
- __u64 data;
- __u64 len_data;
- __u64 len_image;
- __u64 reserved1;
- __u64 reserved2;
- __u64 reserved3;
- __u64 reserved4;
- };
-
- flags:
- These flags indicate which optional fields are present in
- this struct. Currently all fields are mandatory.
-
- data:
- Pointer to a buffer with part of the image to write to the
- card.
-
- len_data:
- Size of the buffer pointed to by data.
-
- len_image:
- Full size of the image.
-
-
-Sysfs Class
-===========
-
- A cxl sysfs class is added under /sys/class/cxl to facilitate
- enumeration and tuning of the accelerators. Its layout is
- described in Documentation/ABI/testing/sysfs-class-cxl
-
-
-Udev rules
-==========
-
- The following udev rules could be used to create a symlink to the
- most logical chardev to use in any programming mode (afuX.Yd for
- dedicated, afuX.Ys for afu directed), since the API is virtually
- identical for each:
-
- SUBSYSTEM=="cxl", ATTRS{mode}=="dedicated_process", SYMLINK="cxl/%b"
- SUBSYSTEM=="cxl", ATTRS{mode}=="afu_directed", \
- KERNEL=="afu[0-9]*.[0-9]*s", SYMLINK="cxl/%b"
--- /dev/null
+================================
+Coherent Accelerator (CXL) Flash
+================================
+
+Introduction
+============
+
+ The IBM Power architecture provides support for CAPI (Coherent
+ Accelerator Power Interface), which is available to certain PCIe slots
+ on Power 8 systems. CAPI can be thought of as a special tunneling
+ protocol through PCIe that allow PCIe adapters to look like special
+ purpose co-processors which can read or write an application's
+ memory and generate page faults. As a result, the host interface to
+ an adapter running in CAPI mode does not require the data buffers to
+ be mapped to the device's memory (IOMMU bypass) nor does it require
+ memory to be pinned.
+
+ On Linux, Coherent Accelerator (CXL) kernel services present CAPI
+ devices as a PCI device by implementing a virtual PCI host bridge.
+ This abstraction simplifies the infrastructure and programming
+ model, allowing for drivers to look similar to other native PCI
+ device drivers.
+
+ CXL provides a mechanism by which user space applications can
+ directly talk to a device (network or storage) bypassing the typical
+ kernel/device driver stack. The CXL Flash Adapter Driver enables a
+ user space application direct access to Flash storage.
+
+ The CXL Flash Adapter Driver is a kernel module that sits in the
+ SCSI stack as a low level device driver (below the SCSI disk and
+ protocol drivers) for the IBM CXL Flash Adapter. This driver is
+ responsible for the initialization of the adapter, setting up the
+ special path for user space access, and performing error recovery. It
+ communicates directly the Flash Accelerator Functional Unit (AFU)
+ as described in Documentation/powerpc/cxl.rst.
+
+ The cxlflash driver supports two, mutually exclusive, modes of
+ operation at the device (LUN) level:
+
+ - Any flash device (LUN) can be configured to be accessed as a
+ regular disk device (i.e.: /dev/sdc). This is the default mode.
+
+ - Any flash device (LUN) can be configured to be accessed from
+ user space with a special block library. This mode further
+ specifies the means of accessing the device and provides for
+ either raw access to the entire LUN (referred to as direct
+ or physical LUN access) or access to a kernel/AFU-mediated
+ partition of the LUN (referred to as virtual LUN access). The
+ segmentation of a disk device into virtual LUNs is assisted
+ by special translation services provided by the Flash AFU.
+
+Overview
+========
+
+ The Coherent Accelerator Interface Architecture (CAIA) introduces a
+ concept of a master context. A master typically has special privileges
+ granted to it by the kernel or hypervisor allowing it to perform AFU
+ wide management and control. The master may or may not be involved
+ directly in each user I/O, but at the minimum is involved in the
+ initial setup before the user application is allowed to send requests
+ directly to the AFU.
+
+ The CXL Flash Adapter Driver establishes a master context with the
+ AFU. It uses memory mapped I/O (MMIO) for this control and setup. The
+ Adapter Problem Space Memory Map looks like this::
+
+ +-------------------------------+
+ | 512 * 64 KB User MMIO |
+ | (per context) |
+ | User Accessible |
+ +-------------------------------+
+ | 512 * 128 B per context |
+ | Provisioning and Control |
+ | Trusted Process accessible |
+ +-------------------------------+
+ | 64 KB Global |
+ | Trusted Process accessible |
+ +-------------------------------+
+
+ This driver configures itself into the SCSI software stack as an
+ adapter driver. The driver is the only entity that is considered a
+ Trusted Process to program the Provisioning and Control and Global
+ areas in the MMIO Space shown above. The master context driver
+ discovers all LUNs attached to the CXL Flash adapter and instantiates
+ scsi block devices (/dev/sdb, /dev/sdc etc.) for each unique LUN
+ seen from each path.
+
+ Once these scsi block devices are instantiated, an application
+ written to a specification provided by the block library may get
+ access to the Flash from user space (without requiring a system call).
+
+ This master context driver also provides a series of ioctls for this
+ block library to enable this user space access. The driver supports
+ two modes for accessing the block device.
+
+ The first mode is called a virtual mode. In this mode a single scsi
+ block device (/dev/sdb) may be carved up into any number of distinct
+ virtual LUNs. The virtual LUNs may be resized as long as the sum of
+ the sizes of all the virtual LUNs, along with the meta-data associated
+ with it does not exceed the physical capacity.
+
+ The second mode is called the physical mode. In this mode a single
+ block device (/dev/sdb) may be opened directly by the block library
+ and the entire space for the LUN is available to the application.
+
+ Only the physical mode provides persistence of the data. i.e. The
+ data written to the block device will survive application exit and
+ restart and also reboot. The virtual LUNs do not persist (i.e. do
+ not survive after the application terminates or the system reboots).
+
+
+Block library API
+=================
+
+ Applications intending to get access to the CXL Flash from user
+ space should use the block library, as it abstracts the details of
+ interfacing directly with the cxlflash driver that are necessary for
+ performing administrative actions (i.e.: setup, tear down, resize).
+ The block library can be thought of as a 'user' of services,
+ implemented as IOCTLs, that are provided by the cxlflash driver
+ specifically for devices (LUNs) operating in user space access
+ mode. While it is not a requirement that applications understand
+ the interface between the block library and the cxlflash driver,
+ a high-level overview of each supported service (IOCTL) is provided
+ below.
+
+ The block library can be found on GitHub:
+ http://github.com/open-power/capiflash
+
+
+CXL Flash Driver LUN IOCTLs
+===========================
+
+ Users, such as the block library, that wish to interface with a flash
+ device (LUN) via user space access need to use the services provided
+ by the cxlflash driver. As these services are implemented as ioctls,
+ a file descriptor handle must first be obtained in order to establish
+ the communication channel between a user and the kernel. This file
+ descriptor is obtained by opening the device special file associated
+ with the scsi disk device (/dev/sdb) that was created during LUN
+ discovery. As per the location of the cxlflash driver within the
+ SCSI protocol stack, this open is actually not seen by the cxlflash
+ driver. Upon successful open, the user receives a file descriptor
+ (herein referred to as fd1) that should be used for issuing the
+ subsequent ioctls listed below.
+
+ The structure definitions for these IOCTLs are available in:
+ uapi/scsi/cxlflash_ioctl.h
+
+DK_CXLFLASH_ATTACH
+------------------
+
+ This ioctl obtains, initializes, and starts a context using the CXL
+ kernel services. These services specify a context id (u16) by which
+ to uniquely identify the context and its allocated resources. The
+ services additionally provide a second file descriptor (herein
+ referred to as fd2) that is used by the block library to initiate
+ memory mapped I/O (via mmap()) to the CXL flash device and poll for
+ completion events. This file descriptor is intentionally installed by
+ this driver and not the CXL kernel services to allow for intermediary
+ notification and access in the event of a non-user-initiated close(),
+ such as a killed process. This design point is described in further
+ detail in the description for the DK_CXLFLASH_DETACH ioctl.
+
+ There are a few important aspects regarding the "tokens" (context id
+ and fd2) that are provided back to the user:
+
+ - These tokens are only valid for the process under which they
+ were created. The child of a forked process cannot continue
+ to use the context id or file descriptor created by its parent
+ (see DK_CXLFLASH_VLUN_CLONE for further details).
+
+ - These tokens are only valid for the lifetime of the context and
+ the process under which they were created. Once either is
+ destroyed, the tokens are to be considered stale and subsequent
+ usage will result in errors.
+
+ - A valid adapter file descriptor (fd2 >= 0) is only returned on
+ the initial attach for a context. Subsequent attaches to an
+ existing context (DK_CXLFLASH_ATTACH_REUSE_CONTEXT flag present)
+ do not provide the adapter file descriptor as it was previously
+ made known to the application.
+
+ - When a context is no longer needed, the user shall detach from
+ the context via the DK_CXLFLASH_DETACH ioctl. When this ioctl
+ returns with a valid adapter file descriptor and the return flag
+ DK_CXLFLASH_APP_CLOSE_ADAP_FD is present, the application _must_
+ close the adapter file descriptor following a successful detach.
+
+ - When this ioctl returns with a valid fd2 and the return flag
+ DK_CXLFLASH_APP_CLOSE_ADAP_FD is present, the application _must_
+ close fd2 in the following circumstances:
+
+ + Following a successful detach of the last user of the context
+ + Following a successful recovery on the context's original fd2
+ + In the child process of a fork(), following a clone ioctl,
+ on the fd2 associated with the source context
+
+ - At any time, a close on fd2 will invalidate the tokens. Applications
+ should exercise caution to only close fd2 when appropriate (outlined
+ in the previous bullet) to avoid premature loss of I/O.
+
+DK_CXLFLASH_USER_DIRECT
+-----------------------
+ This ioctl is responsible for transitioning the LUN to direct
+ (physical) mode access and configuring the AFU for direct access from
+ user space on a per-context basis. Additionally, the block size and
+ last logical block address (LBA) are returned to the user.
+
+ As mentioned previously, when operating in user space access mode,
+ LUNs may be accessed in whole or in part. Only one mode is allowed
+ at a time and if one mode is active (outstanding references exist),
+ requests to use the LUN in a different mode are denied.
+
+ The AFU is configured for direct access from user space by adding an
+ entry to the AFU's resource handle table. The index of the entry is
+ treated as a resource handle that is returned to the user. The user
+ is then able to use the handle to reference the LUN during I/O.
+
+DK_CXLFLASH_USER_VIRTUAL
+------------------------
+ This ioctl is responsible for transitioning the LUN to virtual mode
+ of access and configuring the AFU for virtual access from user space
+ on a per-context basis. Additionally, the block size and last logical
+ block address (LBA) are returned to the user.
+
+ As mentioned previously, when operating in user space access mode,
+ LUNs may be accessed in whole or in part. Only one mode is allowed
+ at a time and if one mode is active (outstanding references exist),
+ requests to use the LUN in a different mode are denied.
+
+ The AFU is configured for virtual access from user space by adding
+ an entry to the AFU's resource handle table. The index of the entry
+ is treated as a resource handle that is returned to the user. The
+ user is then able to use the handle to reference the LUN during I/O.
+
+ By default, the virtual LUN is created with a size of 0. The user
+ would need to use the DK_CXLFLASH_VLUN_RESIZE ioctl to adjust the grow
+ the virtual LUN to a desired size. To avoid having to perform this
+ resize for the initial creation of the virtual LUN, the user has the
+ option of specifying a size as part of the DK_CXLFLASH_USER_VIRTUAL
+ ioctl, such that when success is returned to the user, the
+ resource handle that is provided is already referencing provisioned
+ storage. This is reflected by the last LBA being a non-zero value.
+
+ When a LUN is accessible from more than one port, this ioctl will
+ return with the DK_CXLFLASH_ALL_PORTS_ACTIVE return flag set. This
+ provides the user with a hint that I/O can be retried in the event
+ of an I/O error as the LUN can be reached over multiple paths.
+
+DK_CXLFLASH_VLUN_RESIZE
+-----------------------
+ This ioctl is responsible for resizing a previously created virtual
+ LUN and will fail if invoked upon a LUN that is not in virtual
+ mode. Upon success, an updated last LBA is returned to the user
+ indicating the new size of the virtual LUN associated with the
+ resource handle.
+
+ The partitioning of virtual LUNs is jointly mediated by the cxlflash
+ driver and the AFU. An allocation table is kept for each LUN that is
+ operating in the virtual mode and used to program a LUN translation
+ table that the AFU references when provided with a resource handle.
+
+ This ioctl can return -EAGAIN if an AFU sync operation takes too long.
+ In addition to returning a failure to user, cxlflash will also schedule
+ an asynchronous AFU reset. Should the user choose to retry the operation,
+ it is expected to succeed. If this ioctl fails with -EAGAIN, the user
+ can either retry the operation or treat it as a failure.
+
+DK_CXLFLASH_RELEASE
+-------------------
+ This ioctl is responsible for releasing a previously obtained
+ reference to either a physical or virtual LUN. This can be
+ thought of as the inverse of the DK_CXLFLASH_USER_DIRECT or
+ DK_CXLFLASH_USER_VIRTUAL ioctls. Upon success, the resource handle
+ is no longer valid and the entry in the resource handle table is
+ made available to be used again.
+
+ As part of the release process for virtual LUNs, the virtual LUN
+ is first resized to 0 to clear out and free the translation tables
+ associated with the virtual LUN reference.
+
+DK_CXLFLASH_DETACH
+------------------
+ This ioctl is responsible for unregistering a context with the
+ cxlflash driver and release outstanding resources that were
+ not explicitly released via the DK_CXLFLASH_RELEASE ioctl. Upon
+ success, all "tokens" which had been provided to the user from the
+ DK_CXLFLASH_ATTACH onward are no longer valid.
+
+ When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful
+ attach, the application _must_ close the fd2 associated with the context
+ following the detach of the final user of the context.
+
+DK_CXLFLASH_VLUN_CLONE
+----------------------
+ This ioctl is responsible for cloning a previously created
+ context to a more recently created context. It exists solely to
+ support maintaining user space access to storage after a process
+ forks. Upon success, the child process (which invoked the ioctl)
+ will have access to the same LUNs via the same resource handle(s)
+ as the parent, but under a different context.
+
+ Context sharing across processes is not supported with CXL and
+ therefore each fork must be met with establishing a new context
+ for the child process. This ioctl simplifies the state management
+ and playback required by a user in such a scenario. When a process
+ forks, child process can clone the parents context by first creating
+ a context (via DK_CXLFLASH_ATTACH) and then using this ioctl to
+ perform the clone from the parent to the child.
+
+ The clone itself is fairly simple. The resource handle and lun
+ translation tables are copied from the parent context to the child's
+ and then synced with the AFU.
+
+ When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful
+ attach, the application _must_ close the fd2 associated with the source
+ context (still resident/accessible in the parent process) following the
+ clone. This is to avoid a stale entry in the file descriptor table of the
+ child process.
+
+ This ioctl can return -EAGAIN if an AFU sync operation takes too long.
+ In addition to returning a failure to user, cxlflash will also schedule
+ an asynchronous AFU reset. Should the user choose to retry the operation,
+ it is expected to succeed. If this ioctl fails with -EAGAIN, the user
+ can either retry the operation or treat it as a failure.
+
+DK_CXLFLASH_VERIFY
+------------------
+ This ioctl is used to detect various changes such as the capacity of
+ the disk changing, the number of LUNs visible changing, etc. In cases
+ where the changes affect the application (such as a LUN resize), the
+ cxlflash driver will report the changed state to the application.
+
+ The user calls in when they want to validate that a LUN hasn't been
+ changed in response to a check condition. As the user is operating out
+ of band from the kernel, they will see these types of events without
+ the kernel's knowledge. When encountered, the user's architected
+ behavior is to call in to this ioctl, indicating what they want to
+ verify and passing along any appropriate information. For now, only
+ verifying a LUN change (ie: size different) with sense data is
+ supported.
+
+DK_CXLFLASH_RECOVER_AFU
+-----------------------
+ This ioctl is used to drive recovery (if such an action is warranted)
+ of a specified user context. Any state associated with the user context
+ is re-established upon successful recovery.
+
+ User contexts are put into an error condition when the device needs to
+ be reset or is terminating. Users are notified of this error condition
+ by seeing all 0xF's on an MMIO read. Upon encountering this, the
+ architected behavior for a user is to call into this ioctl to recover
+ their context. A user may also call into this ioctl at any time to
+ check if the device is operating normally. If a failure is returned
+ from this ioctl, the user is expected to gracefully clean up their
+ context via release/detach ioctls. Until they do, the context they
+ hold is not relinquished. The user may also optionally exit the process
+ at which time the context/resources they held will be freed as part of
+ the release fop.
+
+ When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful
+ attach, the application _must_ unmap and close the fd2 associated with the
+ original context following this ioctl returning success and indicating that
+ the context was recovered (DK_CXLFLASH_RECOVER_AFU_CONTEXT_RESET).
+
+DK_CXLFLASH_MANAGE_LUN
+----------------------
+ This ioctl is used to switch a LUN from a mode where it is available
+ for file-system access (legacy), to a mode where it is set aside for
+ exclusive user space access (superpipe). In case a LUN is visible
+ across multiple ports and adapters, this ioctl is used to uniquely
+ identify each LUN by its World Wide Node Name (WWNN).
+
+
+CXL Flash Driver Host IOCTLs
+============================
+
+ Each host adapter instance that is supported by the cxlflash driver
+ has a special character device associated with it to enable a set of
+ host management function. These character devices are hosted in a
+ class dedicated for cxlflash and can be accessed via `/dev/cxlflash/*`.
+
+ Applications can be written to perform various functions using the
+ host ioctl APIs below.
+
+ The structure definitions for these IOCTLs are available in:
+ uapi/scsi/cxlflash_ioctl.h
+
+HT_CXLFLASH_LUN_PROVISION
+-------------------------
+ This ioctl is used to create and delete persistent LUNs on cxlflash
+ devices that lack an external LUN management interface. It is only
+ valid when used with AFUs that support the LUN provision capability.
+
+ When sufficient space is available, LUNs can be created by specifying
+ the target port to host the LUN and a desired size in 4K blocks. Upon
+ success, the LUN ID and WWID of the created LUN will be returned and
+ the SCSI bus can be scanned to detect the change in LUN topology. Note
+ that partial allocations are not supported. Should a creation fail due
+ to a space issue, the target port can be queried for its current LUN
+ geometry.
+
+ To remove a LUN, the device must first be disassociated from the Linux
+ SCSI subsystem. The LUN deletion can then be initiated by specifying a
+ target port and LUN ID. Upon success, the LUN geometry associated with
+ the port will be updated to reflect new number of provisioned LUNs and
+ available capacity.
+
+ To query the LUN geometry of a port, the target port is specified and
+ upon success, the following information is presented:
+
+ - Maximum number of provisioned LUNs allowed for the port
+ - Current number of provisioned LUNs for the port
+ - Maximum total capacity of provisioned LUNs for the port (4K blocks)
+ - Current total capacity of provisioned LUNs for the port (4K blocks)
+
+ With this information, the number of available LUNs and capacity can be
+ can be calculated.
+
+HT_CXLFLASH_AFU_DEBUG
+---------------------
+ This ioctl is used to debug AFUs by supporting a command pass-through
+ interface. It is only valid when used with AFUs that support the AFU
+ debug capability.
+
+ With exception of buffer management, AFU debug commands are opaque to
+ cxlflash and treated as pass-through. For debug commands that do require
+ data transfer, the user supplies an adequately sized data buffer and must
+ specify the data transfer direction with respect to the host. There is a
+ maximum transfer size of 256K imposed. Note that partial read completions
+ are not supported - when errors are experienced with a host read data
+ transfer, the data buffer is not copied back to the user.
+++ /dev/null
-Introduction
-============
-
- The IBM Power architecture provides support for CAPI (Coherent
- Accelerator Power Interface), which is available to certain PCIe slots
- on Power 8 systems. CAPI can be thought of as a special tunneling
- protocol through PCIe that allow PCIe adapters to look like special
- purpose co-processors which can read or write an application's
- memory and generate page faults. As a result, the host interface to
- an adapter running in CAPI mode does not require the data buffers to
- be mapped to the device's memory (IOMMU bypass) nor does it require
- memory to be pinned.
-
- On Linux, Coherent Accelerator (CXL) kernel services present CAPI
- devices as a PCI device by implementing a virtual PCI host bridge.
- This abstraction simplifies the infrastructure and programming
- model, allowing for drivers to look similar to other native PCI
- device drivers.
-
- CXL provides a mechanism by which user space applications can
- directly talk to a device (network or storage) bypassing the typical
- kernel/device driver stack. The CXL Flash Adapter Driver enables a
- user space application direct access to Flash storage.
-
- The CXL Flash Adapter Driver is a kernel module that sits in the
- SCSI stack as a low level device driver (below the SCSI disk and
- protocol drivers) for the IBM CXL Flash Adapter. This driver is
- responsible for the initialization of the adapter, setting up the
- special path for user space access, and performing error recovery. It
- communicates directly the Flash Accelerator Functional Unit (AFU)
- as described in Documentation/powerpc/cxl.txt.
-
- The cxlflash driver supports two, mutually exclusive, modes of
- operation at the device (LUN) level:
-
- - Any flash device (LUN) can be configured to be accessed as a
- regular disk device (i.e.: /dev/sdc). This is the default mode.
-
- - Any flash device (LUN) can be configured to be accessed from
- user space with a special block library. This mode further
- specifies the means of accessing the device and provides for
- either raw access to the entire LUN (referred to as direct
- or physical LUN access) or access to a kernel/AFU-mediated
- partition of the LUN (referred to as virtual LUN access). The
- segmentation of a disk device into virtual LUNs is assisted
- by special translation services provided by the Flash AFU.
-
-Overview
-========
-
- The Coherent Accelerator Interface Architecture (CAIA) introduces a
- concept of a master context. A master typically has special privileges
- granted to it by the kernel or hypervisor allowing it to perform AFU
- wide management and control. The master may or may not be involved
- directly in each user I/O, but at the minimum is involved in the
- initial setup before the user application is allowed to send requests
- directly to the AFU.
-
- The CXL Flash Adapter Driver establishes a master context with the
- AFU. It uses memory mapped I/O (MMIO) for this control and setup. The
- Adapter Problem Space Memory Map looks like this:
-
- +-------------------------------+
- | 512 * 64 KB User MMIO |
- | (per context) |
- | User Accessible |
- +-------------------------------+
- | 512 * 128 B per context |
- | Provisioning and Control |
- | Trusted Process accessible |
- +-------------------------------+
- | 64 KB Global |
- | Trusted Process accessible |
- +-------------------------------+
-
- This driver configures itself into the SCSI software stack as an
- adapter driver. The driver is the only entity that is considered a
- Trusted Process to program the Provisioning and Control and Global
- areas in the MMIO Space shown above. The master context driver
- discovers all LUNs attached to the CXL Flash adapter and instantiates
- scsi block devices (/dev/sdb, /dev/sdc etc.) for each unique LUN
- seen from each path.
-
- Once these scsi block devices are instantiated, an application
- written to a specification provided by the block library may get
- access to the Flash from user space (without requiring a system call).
-
- This master context driver also provides a series of ioctls for this
- block library to enable this user space access. The driver supports
- two modes for accessing the block device.
-
- The first mode is called a virtual mode. In this mode a single scsi
- block device (/dev/sdb) may be carved up into any number of distinct
- virtual LUNs. The virtual LUNs may be resized as long as the sum of
- the sizes of all the virtual LUNs, along with the meta-data associated
- with it does not exceed the physical capacity.
-
- The second mode is called the physical mode. In this mode a single
- block device (/dev/sdb) may be opened directly by the block library
- and the entire space for the LUN is available to the application.
-
- Only the physical mode provides persistence of the data. i.e. The
- data written to the block device will survive application exit and
- restart and also reboot. The virtual LUNs do not persist (i.e. do
- not survive after the application terminates or the system reboots).
-
-
-Block library API
-=================
-
- Applications intending to get access to the CXL Flash from user
- space should use the block library, as it abstracts the details of
- interfacing directly with the cxlflash driver that are necessary for
- performing administrative actions (i.e.: setup, tear down, resize).
- The block library can be thought of as a 'user' of services,
- implemented as IOCTLs, that are provided by the cxlflash driver
- specifically for devices (LUNs) operating in user space access
- mode. While it is not a requirement that applications understand
- the interface between the block library and the cxlflash driver,
- a high-level overview of each supported service (IOCTL) is provided
- below.
-
- The block library can be found on GitHub:
- http://github.com/open-power/capiflash
-
-
-CXL Flash Driver LUN IOCTLs
-===========================
-
- Users, such as the block library, that wish to interface with a flash
- device (LUN) via user space access need to use the services provided
- by the cxlflash driver. As these services are implemented as ioctls,
- a file descriptor handle must first be obtained in order to establish
- the communication channel between a user and the kernel. This file
- descriptor is obtained by opening the device special file associated
- with the scsi disk device (/dev/sdb) that was created during LUN
- discovery. As per the location of the cxlflash driver within the
- SCSI protocol stack, this open is actually not seen by the cxlflash
- driver. Upon successful open, the user receives a file descriptor
- (herein referred to as fd1) that should be used for issuing the
- subsequent ioctls listed below.
-
- The structure definitions for these IOCTLs are available in:
- uapi/scsi/cxlflash_ioctl.h
-
-DK_CXLFLASH_ATTACH
-------------------
-
- This ioctl obtains, initializes, and starts a context using the CXL
- kernel services. These services specify a context id (u16) by which
- to uniquely identify the context and its allocated resources. The
- services additionally provide a second file descriptor (herein
- referred to as fd2) that is used by the block library to initiate
- memory mapped I/O (via mmap()) to the CXL flash device and poll for
- completion events. This file descriptor is intentionally installed by
- this driver and not the CXL kernel services to allow for intermediary
- notification and access in the event of a non-user-initiated close(),
- such as a killed process. This design point is described in further
- detail in the description for the DK_CXLFLASH_DETACH ioctl.
-
- There are a few important aspects regarding the "tokens" (context id
- and fd2) that are provided back to the user:
-
- - These tokens are only valid for the process under which they
- were created. The child of a forked process cannot continue
- to use the context id or file descriptor created by its parent
- (see DK_CXLFLASH_VLUN_CLONE for further details).
-
- - These tokens are only valid for the lifetime of the context and
- the process under which they were created. Once either is
- destroyed, the tokens are to be considered stale and subsequent
- usage will result in errors.
-
- - A valid adapter file descriptor (fd2 >= 0) is only returned on
- the initial attach for a context. Subsequent attaches to an
- existing context (DK_CXLFLASH_ATTACH_REUSE_CONTEXT flag present)
- do not provide the adapter file descriptor as it was previously
- made known to the application.
-
- - When a context is no longer needed, the user shall detach from
- the context via the DK_CXLFLASH_DETACH ioctl. When this ioctl
- returns with a valid adapter file descriptor and the return flag
- DK_CXLFLASH_APP_CLOSE_ADAP_FD is present, the application _must_
- close the adapter file descriptor following a successful detach.
-
- - When this ioctl returns with a valid fd2 and the return flag
- DK_CXLFLASH_APP_CLOSE_ADAP_FD is present, the application _must_
- close fd2 in the following circumstances:
-
- + Following a successful detach of the last user of the context
- + Following a successful recovery on the context's original fd2
- + In the child process of a fork(), following a clone ioctl,
- on the fd2 associated with the source context
-
- - At any time, a close on fd2 will invalidate the tokens. Applications
- should exercise caution to only close fd2 when appropriate (outlined
- in the previous bullet) to avoid premature loss of I/O.
-
-DK_CXLFLASH_USER_DIRECT
------------------------
- This ioctl is responsible for transitioning the LUN to direct
- (physical) mode access and configuring the AFU for direct access from
- user space on a per-context basis. Additionally, the block size and
- last logical block address (LBA) are returned to the user.
-
- As mentioned previously, when operating in user space access mode,
- LUNs may be accessed in whole or in part. Only one mode is allowed
- at a time and if one mode is active (outstanding references exist),
- requests to use the LUN in a different mode are denied.
-
- The AFU is configured for direct access from user space by adding an
- entry to the AFU's resource handle table. The index of the entry is
- treated as a resource handle that is returned to the user. The user
- is then able to use the handle to reference the LUN during I/O.
-
-DK_CXLFLASH_USER_VIRTUAL
-------------------------
- This ioctl is responsible for transitioning the LUN to virtual mode
- of access and configuring the AFU for virtual access from user space
- on a per-context basis. Additionally, the block size and last logical
- block address (LBA) are returned to the user.
-
- As mentioned previously, when operating in user space access mode,
- LUNs may be accessed in whole or in part. Only one mode is allowed
- at a time and if one mode is active (outstanding references exist),
- requests to use the LUN in a different mode are denied.
-
- The AFU is configured for virtual access from user space by adding
- an entry to the AFU's resource handle table. The index of the entry
- is treated as a resource handle that is returned to the user. The
- user is then able to use the handle to reference the LUN during I/O.
-
- By default, the virtual LUN is created with a size of 0. The user
- would need to use the DK_CXLFLASH_VLUN_RESIZE ioctl to adjust the grow
- the virtual LUN to a desired size. To avoid having to perform this
- resize for the initial creation of the virtual LUN, the user has the
- option of specifying a size as part of the DK_CXLFLASH_USER_VIRTUAL
- ioctl, such that when success is returned to the user, the
- resource handle that is provided is already referencing provisioned
- storage. This is reflected by the last LBA being a non-zero value.
-
- When a LUN is accessible from more than one port, this ioctl will
- return with the DK_CXLFLASH_ALL_PORTS_ACTIVE return flag set. This
- provides the user with a hint that I/O can be retried in the event
- of an I/O error as the LUN can be reached over multiple paths.
-
-DK_CXLFLASH_VLUN_RESIZE
------------------------
- This ioctl is responsible for resizing a previously created virtual
- LUN and will fail if invoked upon a LUN that is not in virtual
- mode. Upon success, an updated last LBA is returned to the user
- indicating the new size of the virtual LUN associated with the
- resource handle.
-
- The partitioning of virtual LUNs is jointly mediated by the cxlflash
- driver and the AFU. An allocation table is kept for each LUN that is
- operating in the virtual mode and used to program a LUN translation
- table that the AFU references when provided with a resource handle.
-
- This ioctl can return -EAGAIN if an AFU sync operation takes too long.
- In addition to returning a failure to user, cxlflash will also schedule
- an asynchronous AFU reset. Should the user choose to retry the operation,
- it is expected to succeed. If this ioctl fails with -EAGAIN, the user
- can either retry the operation or treat it as a failure.
-
-DK_CXLFLASH_RELEASE
--------------------
- This ioctl is responsible for releasing a previously obtained
- reference to either a physical or virtual LUN. This can be
- thought of as the inverse of the DK_CXLFLASH_USER_DIRECT or
- DK_CXLFLASH_USER_VIRTUAL ioctls. Upon success, the resource handle
- is no longer valid and the entry in the resource handle table is
- made available to be used again.
-
- As part of the release process for virtual LUNs, the virtual LUN
- is first resized to 0 to clear out and free the translation tables
- associated with the virtual LUN reference.
-
-DK_CXLFLASH_DETACH
-------------------
- This ioctl is responsible for unregistering a context with the
- cxlflash driver and release outstanding resources that were
- not explicitly released via the DK_CXLFLASH_RELEASE ioctl. Upon
- success, all "tokens" which had been provided to the user from the
- DK_CXLFLASH_ATTACH onward are no longer valid.
-
- When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful
- attach, the application _must_ close the fd2 associated with the context
- following the detach of the final user of the context.
-
-DK_CXLFLASH_VLUN_CLONE
-----------------------
- This ioctl is responsible for cloning a previously created
- context to a more recently created context. It exists solely to
- support maintaining user space access to storage after a process
- forks. Upon success, the child process (which invoked the ioctl)
- will have access to the same LUNs via the same resource handle(s)
- as the parent, but under a different context.
-
- Context sharing across processes is not supported with CXL and
- therefore each fork must be met with establishing a new context
- for the child process. This ioctl simplifies the state management
- and playback required by a user in such a scenario. When a process
- forks, child process can clone the parents context by first creating
- a context (via DK_CXLFLASH_ATTACH) and then using this ioctl to
- perform the clone from the parent to the child.
-
- The clone itself is fairly simple. The resource handle and lun
- translation tables are copied from the parent context to the child's
- and then synced with the AFU.
-
- When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful
- attach, the application _must_ close the fd2 associated with the source
- context (still resident/accessible in the parent process) following the
- clone. This is to avoid a stale entry in the file descriptor table of the
- child process.
-
- This ioctl can return -EAGAIN if an AFU sync operation takes too long.
- In addition to returning a failure to user, cxlflash will also schedule
- an asynchronous AFU reset. Should the user choose to retry the operation,
- it is expected to succeed. If this ioctl fails with -EAGAIN, the user
- can either retry the operation or treat it as a failure.
-
-DK_CXLFLASH_VERIFY
-------------------
- This ioctl is used to detect various changes such as the capacity of
- the disk changing, the number of LUNs visible changing, etc. In cases
- where the changes affect the application (such as a LUN resize), the
- cxlflash driver will report the changed state to the application.
-
- The user calls in when they want to validate that a LUN hasn't been
- changed in response to a check condition. As the user is operating out
- of band from the kernel, they will see these types of events without
- the kernel's knowledge. When encountered, the user's architected
- behavior is to call in to this ioctl, indicating what they want to
- verify and passing along any appropriate information. For now, only
- verifying a LUN change (ie: size different) with sense data is
- supported.
-
-DK_CXLFLASH_RECOVER_AFU
------------------------
- This ioctl is used to drive recovery (if such an action is warranted)
- of a specified user context. Any state associated with the user context
- is re-established upon successful recovery.
-
- User contexts are put into an error condition when the device needs to
- be reset or is terminating. Users are notified of this error condition
- by seeing all 0xF's on an MMIO read. Upon encountering this, the
- architected behavior for a user is to call into this ioctl to recover
- their context. A user may also call into this ioctl at any time to
- check if the device is operating normally. If a failure is returned
- from this ioctl, the user is expected to gracefully clean up their
- context via release/detach ioctls. Until they do, the context they
- hold is not relinquished. The user may also optionally exit the process
- at which time the context/resources they held will be freed as part of
- the release fop.
-
- When the DK_CXLFLASH_APP_CLOSE_ADAP_FD flag was returned on a successful
- attach, the application _must_ unmap and close the fd2 associated with the
- original context following this ioctl returning success and indicating that
- the context was recovered (DK_CXLFLASH_RECOVER_AFU_CONTEXT_RESET).
-
-DK_CXLFLASH_MANAGE_LUN
-----------------------
- This ioctl is used to switch a LUN from a mode where it is available
- for file-system access (legacy), to a mode where it is set aside for
- exclusive user space access (superpipe). In case a LUN is visible
- across multiple ports and adapters, this ioctl is used to uniquely
- identify each LUN by its World Wide Node Name (WWNN).
-
-
-CXL Flash Driver Host IOCTLs
-============================
-
- Each host adapter instance that is supported by the cxlflash driver
- has a special character device associated with it to enable a set of
- host management function. These character devices are hosted in a
- class dedicated for cxlflash and can be accessed via /dev/cxlflash/*.
-
- Applications can be written to perform various functions using the
- host ioctl APIs below.
-
- The structure definitions for these IOCTLs are available in:
- uapi/scsi/cxlflash_ioctl.h
-
-HT_CXLFLASH_LUN_PROVISION
--------------------------
- This ioctl is used to create and delete persistent LUNs on cxlflash
- devices that lack an external LUN management interface. It is only
- valid when used with AFUs that support the LUN provision capability.
-
- When sufficient space is available, LUNs can be created by specifying
- the target port to host the LUN and a desired size in 4K blocks. Upon
- success, the LUN ID and WWID of the created LUN will be returned and
- the SCSI bus can be scanned to detect the change in LUN topology. Note
- that partial allocations are not supported. Should a creation fail due
- to a space issue, the target port can be queried for its current LUN
- geometry.
-
- To remove a LUN, the device must first be disassociated from the Linux
- SCSI subsystem. The LUN deletion can then be initiated by specifying a
- target port and LUN ID. Upon success, the LUN geometry associated with
- the port will be updated to reflect new number of provisioned LUNs and
- available capacity.
-
- To query the LUN geometry of a port, the target port is specified and
- upon success, the following information is presented:
-
- - Maximum number of provisioned LUNs allowed for the port
- - Current number of provisioned LUNs for the port
- - Maximum total capacity of provisioned LUNs for the port (4K blocks)
- - Current total capacity of provisioned LUNs for the port (4K blocks)
-
- With this information, the number of available LUNs and capacity can be
- can be calculated.
-
-HT_CXLFLASH_AFU_DEBUG
----------------------
- This ioctl is used to debug AFUs by supporting a command pass-through
- interface. It is only valid when used with AFUs that support the AFU
- debug capability.
-
- With exception of buffer management, AFU debug commands are opaque to
- cxlflash and treated as pass-through. For debug commands that do require
- data transfer, the user supplies an adequately sized data buffer and must
- specify the data transfer direction with respect to the host. There is a
- maximum transfer size of 256K imposed. Note that partial read completions
- are not supported - when errors are experienced with a host read data
- transfer, the data buffer is not copied back to the user.
--- /dev/null
+=====================
+DAWR issues on POWER9
+=====================
+
+On POWER9 the Data Address Watchpoint Register (DAWR) can cause a checkstop
+if it points to cache inhibited (CI) memory. Currently Linux has no way to
+disinguish CI memory when configuring the DAWR, so (for now) the DAWR is
+disabled by this commit::
+
+ commit 9654153158d3e0684a1bdb76dbababdb7111d5a0
+ Author: Michael Neuling <mikey@neuling.org>
+ Date: Tue Mar 27 15:37:24 2018 +1100
+ powerpc: Disable DAWR in the base POWER9 CPU features
+
+Technical Details:
+==================
+
+DAWR has 6 different ways of being set.
+1) ptrace
+2) h_set_mode(DAWR)
+3) h_set_dabr()
+4) kvmppc_set_one_reg()
+5) xmon
+
+For ptrace, we now advertise zero breakpoints on POWER9 via the
+PPC_PTRACE_GETHWDBGINFO call. This results in GDB falling back to
+software emulation of the watchpoint (which is slow).
+
+h_set_mode(DAWR) and h_set_dabr() will now return an error to the
+guest on a POWER9 host. Current Linux guests ignore this error, so
+they will silently not get the DAWR.
+
+kvmppc_set_one_reg() will store the value in the vcpu but won't
+actually set it on POWER9 hardware. This is done so we don't break
+migration from POWER8 to POWER9, at the cost of silently losing the
+DAWR on the migration.
+
+For xmon, the 'bd' command will return an error on P9.
+
+Consequences for users
+======================
+
+For GDB watchpoints (ie 'watch' command) on POWER9 bare metal , GDB
+will accept the command. Unfortunately since there is no hardware
+support for the watchpoint, GDB will software emulate the watchpoint
+making it run very slowly.
+
+The same will also be true for any guests started on a POWER9
+host. The watchpoint will fail and GDB will fall back to software
+emulation.
+
+If a guest is started on a POWER8 host, GDB will accept the watchpoint
+and configure the hardware to use the DAWR. This will run at full
+speed since it can use the hardware emulation. Unfortunately if this
+guest is migrated to a POWER9 host, the watchpoint will be lost on the
+POWER9. Loads and stores to the watchpoint locations will not be
+trapped in GDB. The watchpoint is remembered, so if the guest is
+migrated back to the POWER8 host, it will start working again.
+
+Force enabling the DAWR
+=======================
+Kernels (since ~v5.2) have an option to force enable the DAWR via::
+
+ echo Y > /sys/kernel/debug/powerpc/dawr_enable_dangerous
+
+This enables the DAWR even on POWER9.
+
+This is a dangerous setting, USE AT YOUR OWN RISK.
+
+Some users may not care about a bad user crashing their box
+(ie. single user/desktop systems) and really want the DAWR. This
+allows them to force enable DAWR.
+
+This flag can also be used to disable DAWR access. Once this is
+cleared, all DAWR access should be cleared immediately and your
+machine once again safe from crashing.
+
+Userspace may get confused by toggling this. If DAWR is force
+enabled/disabled between getting the number of breakpoints (via
+PTRACE_GETHWDBGINFO) and setting the breakpoint, userspace will get an
+inconsistent view of what's available. Similarly for guests.
+
+For the DAWR to be enabled in a KVM guest, the DAWR needs to be force
+enabled in the host AND the guest. For this reason, this won't work on
+POWERVM as it doesn't allow the HCALL to work. Writes of 'Y' to the
+dawr_enable_dangerous file will fail if the hypervisor doesn't support
+writing the DAWR.
+
+To double check the DAWR is working, run this kernel selftest:
+
+ tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
+
+Any errors/failures/skips mean something is wrong.
--- /dev/null
+===================================
+DSCR (Data Stream Control Register)
+===================================
+
+DSCR register in powerpc allows user to have some control of prefetch of data
+stream in the processor. Please refer to the ISA documents or related manual
+for more detailed information regarding how to use this DSCR to attain this
+control of the prefetches . This document here provides an overview of kernel
+support for DSCR, related kernel objects, it's functionalities and exported
+user interface.
+
+(A) Data Structures:
+
+ (1) thread_struct::
+
+ dscr /* Thread DSCR value */
+ dscr_inherit /* Thread has changed default DSCR */
+
+ (2) PACA::
+
+ dscr_default /* per-CPU DSCR default value */
+
+ (3) sysfs.c::
+
+ dscr_default /* System DSCR default value */
+
+(B) Scheduler Changes:
+
+ Scheduler will write the per-CPU DSCR default which is stored in the
+ CPU's PACA value into the register if the thread has dscr_inherit value
+ cleared which means that it has not changed the default DSCR till now.
+ If the dscr_inherit value is set which means that it has changed the
+ default DSCR value, scheduler will write the changed value which will
+ now be contained in thread struct's dscr into the register instead of
+ the per-CPU default PACA based DSCR value.
+
+ NOTE: Please note here that the system wide global DSCR value never
+ gets used directly in the scheduler process context switch at all.
+
+(C) SYSFS Interface:
+
+ - Global DSCR default: /sys/devices/system/cpu/dscr_default
+ - CPU specific DSCR default: /sys/devices/system/cpu/cpuN/dscr
+
+ Changing the global DSCR default in the sysfs will change all the CPU
+ specific DSCR defaults immediately in their PACA structures. Again if
+ the current process has the dscr_inherit clear, it also writes the new
+ value into every CPU's DSCR register right away and updates the current
+ thread's DSCR value as well.
+
+ Changing the CPU specific DSCR default value in the sysfs does exactly
+ the same thing as above but unlike the global one above, it just changes
+ stuff for that particular CPU instead for all the CPUs on the system.
+
+(D) User Space Instructions:
+
+ The DSCR register can be accessed in the user space using any of these
+ two SPR numbers available for that purpose.
+
+ (1) Problem state SPR: 0x03 (Un-privileged, POWER8 only)
+ (2) Privileged state SPR: 0x11 (Privileged)
+
+ Accessing DSCR through privileged SPR number (0x11) from user space
+ works, as it is emulated following an illegal instruction exception
+ inside the kernel. Both mfspr and mtspr instructions are emulated.
+
+ Accessing DSCR through user level SPR (0x03) from user space will first
+ create a facility unavailable exception. Inside this exception handler
+ all mfspr instruction based read attempts will get emulated and returned
+ where as the first mtspr instruction based write attempts will enable
+ the DSCR facility for the next time around (both for read and write) by
+ setting DSCR facility in the FSCR register.
+
+(E) Specifics about 'dscr_inherit':
+
+ The thread struct element 'dscr_inherit' represents whether the thread
+ in question has attempted and changed the DSCR itself using any of the
+ following methods. This element signifies whether the thread wants to
+ use the CPU default DSCR value or its own changed DSCR value in the
+ kernel.
+
+ (1) mtspr instruction (SPR number 0x03)
+ (2) mtspr instruction (SPR number 0x11)
+ (3) ptrace interface (Explicitly set user DSCR value)
+
+ Any child of the process created after this event in the process inherits
+ this same behaviour as well.
+++ /dev/null
- DSCR (Data Stream Control Register)
- ================================================
-
-DSCR register in powerpc allows user to have some control of prefetch of data
-stream in the processor. Please refer to the ISA documents or related manual
-for more detailed information regarding how to use this DSCR to attain this
-control of the prefetches . This document here provides an overview of kernel
-support for DSCR, related kernel objects, it's functionalities and exported
-user interface.
-
-(A) Data Structures:
-
- (1) thread_struct:
- dscr /* Thread DSCR value */
- dscr_inherit /* Thread has changed default DSCR */
-
- (2) PACA:
- dscr_default /* per-CPU DSCR default value */
-
- (3) sysfs.c:
- dscr_default /* System DSCR default value */
-
-(B) Scheduler Changes:
-
- Scheduler will write the per-CPU DSCR default which is stored in the
- CPU's PACA value into the register if the thread has dscr_inherit value
- cleared which means that it has not changed the default DSCR till now.
- If the dscr_inherit value is set which means that it has changed the
- default DSCR value, scheduler will write the changed value which will
- now be contained in thread struct's dscr into the register instead of
- the per-CPU default PACA based DSCR value.
-
- NOTE: Please note here that the system wide global DSCR value never
- gets used directly in the scheduler process context switch at all.
-
-(C) SYSFS Interface:
-
- Global DSCR default: /sys/devices/system/cpu/dscr_default
- CPU specific DSCR default: /sys/devices/system/cpu/cpuN/dscr
-
- Changing the global DSCR default in the sysfs will change all the CPU
- specific DSCR defaults immediately in their PACA structures. Again if
- the current process has the dscr_inherit clear, it also writes the new
- value into every CPU's DSCR register right away and updates the current
- thread's DSCR value as well.
-
- Changing the CPU specific DSCR default value in the sysfs does exactly
- the same thing as above but unlike the global one above, it just changes
- stuff for that particular CPU instead for all the CPUs on the system.
-
-(D) User Space Instructions:
-
- The DSCR register can be accessed in the user space using any of these
- two SPR numbers available for that purpose.
-
- (1) Problem state SPR: 0x03 (Un-privileged, POWER8 only)
- (2) Privileged state SPR: 0x11 (Privileged)
-
- Accessing DSCR through privileged SPR number (0x11) from user space
- works, as it is emulated following an illegal instruction exception
- inside the kernel. Both mfspr and mtspr instructions are emulated.
-
- Accessing DSCR through user level SPR (0x03) from user space will first
- create a facility unavailable exception. Inside this exception handler
- all mfspr instruction based read attempts will get emulated and returned
- where as the first mtspr instruction based write attempts will enable
- the DSCR facility for the next time around (both for read and write) by
- setting DSCR facility in the FSCR register.
-
-(E) Specifics about 'dscr_inherit':
-
- The thread struct element 'dscr_inherit' represents whether the thread
- in question has attempted and changed the DSCR itself using any of the
- following methods. This element signifies whether the thread wants to
- use the CPU default DSCR value or its own changed DSCR value in the
- kernel.
-
- (1) mtspr instruction (SPR number 0x03)
- (2) mtspr instruction (SPR number 0x11)
- (3) ptrace interface (Explicitly set user DSCR value)
-
- Any child of the process created after this event in the process inherits
- this same behaviour as well.
--- /dev/null
+==========================
+PCI Bus EEH Error Recovery
+==========================
+
+Linas Vepstas <linas@austin.ibm.com>
+
+12 January 2005
+
+
+Overview:
+---------
+The IBM POWER-based pSeries and iSeries computers include PCI bus
+controller chips that have extended capabilities for detecting and
+reporting a large variety of PCI bus error conditions. These features
+go under the name of "EEH", for "Enhanced Error Handling". The EEH
+hardware features allow PCI bus errors to be cleared and a PCI
+card to be "rebooted", without also having to reboot the operating
+system.
+
+This is in contrast to traditional PCI error handling, where the
+PCI chip is wired directly to the CPU, and an error would cause
+a CPU machine-check/check-stop condition, halting the CPU entirely.
+Another "traditional" technique is to ignore such errors, which
+can lead to data corruption, both of user data or of kernel data,
+hung/unresponsive adapters, or system crashes/lockups. Thus,
+the idea behind EEH is that the operating system can become more
+reliable and robust by protecting it from PCI errors, and giving
+the OS the ability to "reboot"/recover individual PCI devices.
+
+Future systems from other vendors, based on the PCI-E specification,
+may contain similar features.
+
+
+Causes of EEH Errors
+--------------------
+EEH was originally designed to guard against hardware failure, such
+as PCI cards dying from heat, humidity, dust, vibration and bad
+electrical connections. The vast majority of EEH errors seen in
+"real life" are due to either poorly seated PCI cards, or,
+unfortunately quite commonly, due to device driver bugs, device firmware
+bugs, and sometimes PCI card hardware bugs.
+
+The most common software bug, is one that causes the device to
+attempt to DMA to a location in system memory that has not been
+reserved for DMA access for that card. This is a powerful feature,
+as it prevents what; otherwise, would have been silent memory
+corruption caused by the bad DMA. A number of device driver
+bugs have been found and fixed in this way over the past few
+years. Other possible causes of EEH errors include data or
+address line parity errors (for example, due to poor electrical
+connectivity due to a poorly seated card), and PCI-X split-completion
+errors (due to software, device firmware, or device PCI hardware bugs).
+The vast majority of "true hardware failures" can be cured by
+physically removing and re-seating the PCI card.
+
+
+Detection and Recovery
+----------------------
+In the following discussion, a generic overview of how to detect
+and recover from EEH errors will be presented. This is followed
+by an overview of how the current implementation in the Linux
+kernel does it. The actual implementation is subject to change,
+and some of the finer points are still being debated. These
+may in turn be swayed if or when other architectures implement
+similar functionality.
+
+When a PCI Host Bridge (PHB, the bus controller connecting the
+PCI bus to the system CPU electronics complex) detects a PCI error
+condition, it will "isolate" the affected PCI card. Isolation
+will block all writes (either to the card from the system, or
+from the card to the system), and it will cause all reads to
+return all-ff's (0xff, 0xffff, 0xffffffff for 8/16/32-bit reads).
+This value was chosen because it is the same value you would
+get if the device was physically unplugged from the slot.
+This includes access to PCI memory, I/O space, and PCI config
+space. Interrupts; however, will continued to be delivered.
+
+Detection and recovery are performed with the aid of ppc64
+firmware. The programming interfaces in the Linux kernel
+into the firmware are referred to as RTAS (Run-Time Abstraction
+Services). The Linux kernel does not (should not) access
+the EEH function in the PCI chipsets directly, primarily because
+there are a number of different chipsets out there, each with
+different interfaces and quirks. The firmware provides a
+uniform abstraction layer that will work with all pSeries
+and iSeries hardware (and be forwards-compatible).
+
+If the OS or device driver suspects that a PCI slot has been
+EEH-isolated, there is a firmware call it can make to determine if
+this is the case. If so, then the device driver should put itself
+into a consistent state (given that it won't be able to complete any
+pending work) and start recovery of the card. Recovery normally
+would consist of resetting the PCI device (holding the PCI #RST
+line high for two seconds), followed by setting up the device
+config space (the base address registers (BAR's), latency timer,
+cache line size, interrupt line, and so on). This is followed by a
+reinitialization of the device driver. In a worst-case scenario,
+the power to the card can be toggled, at least on hot-plug-capable
+slots. In principle, layers far above the device driver probably
+do not need to know that the PCI card has been "rebooted" in this
+way; ideally, there should be at most a pause in Ethernet/disk/USB
+I/O while the card is being reset.
+
+If the card cannot be recovered after three or four resets, the
+kernel/device driver should assume the worst-case scenario, that the
+card has died completely, and report this error to the sysadmin.
+In addition, error messages are reported through RTAS and also through
+syslogd (/var/log/messages) to alert the sysadmin of PCI resets.
+The correct way to deal with failed adapters is to use the standard
+PCI hotplug tools to remove and replace the dead card.
+
+
+Current PPC64 Linux EEH Implementation
+--------------------------------------
+At this time, a generic EEH recovery mechanism has been implemented,
+so that individual device drivers do not need to be modified to support
+EEH recovery. This generic mechanism piggy-backs on the PCI hotplug
+infrastructure, and percolates events up through the userspace/udev
+infrastructure. Following is a detailed description of how this is
+accomplished.
+
+EEH must be enabled in the PHB's very early during the boot process,
+and if a PCI slot is hot-plugged. The former is performed by
+eeh_init() in arch/powerpc/platforms/pseries/eeh.c, and the later by
+drivers/pci/hotplug/pSeries_pci.c calling in to the eeh.c code.
+EEH must be enabled before a PCI scan of the device can proceed.
+Current Power5 hardware will not work unless EEH is enabled;
+although older Power4 can run with it disabled. Effectively,
+EEH can no longer be turned off. PCI devices *must* be
+registered with the EEH code; the EEH code needs to know about
+the I/O address ranges of the PCI device in order to detect an
+error. Given an arbitrary address, the routine
+pci_get_device_by_addr() will find the pci device associated
+with that address (if any).
+
+The default arch/powerpc/include/asm/io.h macros readb(), inb(), insb(),
+etc. include a check to see if the i/o read returned all-0xff's.
+If so, these make a call to eeh_dn_check_failure(), which in turn
+asks the firmware if the all-ff's value is the sign of a true EEH
+error. If it is not, processing continues as normal. The grand
+total number of these false alarms or "false positives" can be
+seen in /proc/ppc64/eeh (subject to change). Normally, almost
+all of these occur during boot, when the PCI bus is scanned, where
+a large number of 0xff reads are part of the bus scan procedure.
+
+If a frozen slot is detected, code in
+arch/powerpc/platforms/pseries/eeh.c will print a stack trace to
+syslog (/var/log/messages). This stack trace has proven to be very
+useful to device-driver authors for finding out at what point the EEH
+error was detected, as the error itself usually occurs slightly
+beforehand.
+
+Next, it uses the Linux kernel notifier chain/work queue mechanism to
+allow any interested parties to find out about the failure. Device
+drivers, or other parts of the kernel, can use
+`eeh_register_notifier(struct notifier_block *)` to find out about EEH
+events. The event will include a pointer to the pci device, the
+device node and some state info. Receivers of the event can "do as
+they wish"; the default handler will be described further in this
+section.
+
+To assist in the recovery of the device, eeh.c exports the
+following functions:
+
+rtas_set_slot_reset()
+ assert the PCI #RST line for 1/8th of a second
+rtas_configure_bridge()
+ ask firmware to configure any PCI bridges
+ located topologically under the pci slot.
+eeh_save_bars() and eeh_restore_bars():
+ save and restore the PCI
+ config-space info for a device and any devices under it.
+
+
+A handler for the EEH notifier_block events is implemented in
+drivers/pci/hotplug/pSeries_pci.c, called handle_eeh_events().
+It saves the device BAR's and then calls rpaphp_unconfig_pci_adapter().
+This last call causes the device driver for the card to be stopped,
+which causes uevents to go out to user space. This triggers
+user-space scripts that might issue commands such as "ifdown eth0"
+for ethernet cards, and so on. This handler then sleeps for 5 seconds,
+hoping to give the user-space scripts enough time to complete.
+It then resets the PCI card, reconfigures the device BAR's, and
+any bridges underneath. It then calls rpaphp_enable_pci_slot(),
+which restarts the device driver and triggers more user-space
+events (for example, calling "ifup eth0" for ethernet cards).
+
+
+Device Shutdown and User-Space Events
+-------------------------------------
+This section documents what happens when a pci slot is unconfigured,
+focusing on how the device driver gets shut down, and on how the
+events get delivered to user-space scripts.
+
+Following is an example sequence of events that cause a device driver
+close function to be called during the first phase of an EEH reset.
+The following sequence is an example of the pcnet32 device driver::
+
+ rpa_php_unconfig_pci_adapter (struct slot *) // in rpaphp_pci.c
+ {
+ calls
+ pci_remove_bus_device (struct pci_dev *) // in /drivers/pci/remove.c
+ {
+ calls
+ pci_destroy_dev (struct pci_dev *)
+ {
+ calls
+ device_unregister (&dev->dev) // in /drivers/base/core.c
+ {
+ calls
+ device_del (struct device *)
+ {
+ calls
+ bus_remove_device() // in /drivers/base/bus.c
+ {
+ calls
+ device_release_driver()
+ {
+ calls
+ struct device_driver->remove() which is just
+ pci_device_remove() // in /drivers/pci/pci_driver.c
+ {
+ calls
+ struct pci_driver->remove() which is just
+ pcnet32_remove_one() // in /drivers/net/pcnet32.c
+ {
+ calls
+ unregister_netdev() // in /net/core/dev.c
+ {
+ calls
+ dev_close() // in /net/core/dev.c
+ {
+ calls dev->stop();
+ which is just pcnet32_close() // in pcnet32.c
+ {
+ which does what you wanted
+ to stop the device
+ }
+ }
+ }
+ which
+ frees pcnet32 device driver memory
+ }
+ }}}}}}
+
+
+in drivers/pci/pci_driver.c,
+struct device_driver->remove() is just pci_device_remove()
+which calls struct pci_driver->remove() which is pcnet32_remove_one()
+which calls unregister_netdev() (in net/core/dev.c)
+which calls dev_close() (in net/core/dev.c)
+which calls dev->stop() which is pcnet32_close()
+which then does the appropriate shutdown.
+
+---
+
+Following is the analogous stack trace for events sent to user-space
+when the pci device is unconfigured::
+
+ rpa_php_unconfig_pci_adapter() { // in rpaphp_pci.c
+ calls
+ pci_remove_bus_device (struct pci_dev *) { // in /drivers/pci/remove.c
+ calls
+ pci_destroy_dev (struct pci_dev *) {
+ calls
+ device_unregister (&dev->dev) { // in /drivers/base/core.c
+ calls
+ device_del(struct device * dev) { // in /drivers/base/core.c
+ calls
+ kobject_del() { //in /libs/kobject.c
+ calls
+ kobject_uevent() { // in /libs/kobject.c
+ calls
+ kset_uevent() { // in /lib/kobject.c
+ calls
+ kset->uevent_ops->uevent() // which is really just
+ a call to
+ dev_uevent() { // in /drivers/base/core.c
+ calls
+ dev->bus->uevent() which is really just a call to
+ pci_uevent () { // in drivers/pci/hotplug.c
+ which prints device name, etc....
+ }
+ }
+ then kobject_uevent() sends a netlink uevent to userspace
+ --> userspace uevent
+ (during early boot, nobody listens to netlink events and
+ kobject_uevent() executes uevent_helper[], which runs the
+ event process /sbin/hotplug)
+ }
+ }
+ kobject_del() then calls sysfs_remove_dir(), which would
+ trigger any user-space daemon that was watching /sysfs,
+ and notice the delete event.
+
+
+Pro's and Con's of the Current Design
+-------------------------------------
+There are several issues with the current EEH software recovery design,
+which may be addressed in future revisions. But first, note that the
+big plus of the current design is that no changes need to be made to
+individual device drivers, so that the current design throws a wide net.
+The biggest negative of the design is that it potentially disturbs
+network daemons and file systems that didn't need to be disturbed.
+
+- A minor complaint is that resetting the network card causes
+ user-space back-to-back ifdown/ifup burps that potentially disturb
+ network daemons, that didn't need to even know that the pci
+ card was being rebooted.
+
+- A more serious concern is that the same reset, for SCSI devices,
+ causes havoc to mounted file systems. Scripts cannot post-facto
+ unmount a file system without flushing pending buffers, but this
+ is impossible, because I/O has already been stopped. Thus,
+ ideally, the reset should happen at or below the block layer,
+ so that the file systems are not disturbed.
+
+ Reiserfs does not tolerate errors returned from the block device.
+ Ext3fs seems to be tolerant, retrying reads/writes until it does
+ succeed. Both have been only lightly tested in this scenario.
+
+ The SCSI-generic subsystem already has built-in code for performing
+ SCSI device resets, SCSI bus resets, and SCSI host-bus-adapter
+ (HBA) resets. These are cascaded into a chain of attempted
+ resets if a SCSI command fails. These are completely hidden
+ from the block layer. It would be very natural to add an EEH
+ reset into this chain of events.
+
+- If a SCSI error occurs for the root device, all is lost unless
+ the sysadmin had the foresight to run /bin, /sbin, /etc, /var
+ and so on, out of ramdisk/tmpfs.
+
+
+Conclusions
+-----------
+There's forward progress ...
+++ /dev/null
-
-
- PCI Bus EEH Error Recovery
- --------------------------
- Linas Vepstas
- <linas@austin.ibm.com>
- 12 January 2005
-
-
-Overview:
----------
-The IBM POWER-based pSeries and iSeries computers include PCI bus
-controller chips that have extended capabilities for detecting and
-reporting a large variety of PCI bus error conditions. These features
-go under the name of "EEH", for "Enhanced Error Handling". The EEH
-hardware features allow PCI bus errors to be cleared and a PCI
-card to be "rebooted", without also having to reboot the operating
-system.
-
-This is in contrast to traditional PCI error handling, where the
-PCI chip is wired directly to the CPU, and an error would cause
-a CPU machine-check/check-stop condition, halting the CPU entirely.
-Another "traditional" technique is to ignore such errors, which
-can lead to data corruption, both of user data or of kernel data,
-hung/unresponsive adapters, or system crashes/lockups. Thus,
-the idea behind EEH is that the operating system can become more
-reliable and robust by protecting it from PCI errors, and giving
-the OS the ability to "reboot"/recover individual PCI devices.
-
-Future systems from other vendors, based on the PCI-E specification,
-may contain similar features.
-
-
-Causes of EEH Errors
---------------------
-EEH was originally designed to guard against hardware failure, such
-as PCI cards dying from heat, humidity, dust, vibration and bad
-electrical connections. The vast majority of EEH errors seen in
-"real life" are due to either poorly seated PCI cards, or,
-unfortunately quite commonly, due to device driver bugs, device firmware
-bugs, and sometimes PCI card hardware bugs.
-
-The most common software bug, is one that causes the device to
-attempt to DMA to a location in system memory that has not been
-reserved for DMA access for that card. This is a powerful feature,
-as it prevents what; otherwise, would have been silent memory
-corruption caused by the bad DMA. A number of device driver
-bugs have been found and fixed in this way over the past few
-years. Other possible causes of EEH errors include data or
-address line parity errors (for example, due to poor electrical
-connectivity due to a poorly seated card), and PCI-X split-completion
-errors (due to software, device firmware, or device PCI hardware bugs).
-The vast majority of "true hardware failures" can be cured by
-physically removing and re-seating the PCI card.
-
-
-Detection and Recovery
-----------------------
-In the following discussion, a generic overview of how to detect
-and recover from EEH errors will be presented. This is followed
-by an overview of how the current implementation in the Linux
-kernel does it. The actual implementation is subject to change,
-and some of the finer points are still being debated. These
-may in turn be swayed if or when other architectures implement
-similar functionality.
-
-When a PCI Host Bridge (PHB, the bus controller connecting the
-PCI bus to the system CPU electronics complex) detects a PCI error
-condition, it will "isolate" the affected PCI card. Isolation
-will block all writes (either to the card from the system, or
-from the card to the system), and it will cause all reads to
-return all-ff's (0xff, 0xffff, 0xffffffff for 8/16/32-bit reads).
-This value was chosen because it is the same value you would
-get if the device was physically unplugged from the slot.
-This includes access to PCI memory, I/O space, and PCI config
-space. Interrupts; however, will continued to be delivered.
-
-Detection and recovery are performed with the aid of ppc64
-firmware. The programming interfaces in the Linux kernel
-into the firmware are referred to as RTAS (Run-Time Abstraction
-Services). The Linux kernel does not (should not) access
-the EEH function in the PCI chipsets directly, primarily because
-there are a number of different chipsets out there, each with
-different interfaces and quirks. The firmware provides a
-uniform abstraction layer that will work with all pSeries
-and iSeries hardware (and be forwards-compatible).
-
-If the OS or device driver suspects that a PCI slot has been
-EEH-isolated, there is a firmware call it can make to determine if
-this is the case. If so, then the device driver should put itself
-into a consistent state (given that it won't be able to complete any
-pending work) and start recovery of the card. Recovery normally
-would consist of resetting the PCI device (holding the PCI #RST
-line high for two seconds), followed by setting up the device
-config space (the base address registers (BAR's), latency timer,
-cache line size, interrupt line, and so on). This is followed by a
-reinitialization of the device driver. In a worst-case scenario,
-the power to the card can be toggled, at least on hot-plug-capable
-slots. In principle, layers far above the device driver probably
-do not need to know that the PCI card has been "rebooted" in this
-way; ideally, there should be at most a pause in Ethernet/disk/USB
-I/O while the card is being reset.
-
-If the card cannot be recovered after three or four resets, the
-kernel/device driver should assume the worst-case scenario, that the
-card has died completely, and report this error to the sysadmin.
-In addition, error messages are reported through RTAS and also through
-syslogd (/var/log/messages) to alert the sysadmin of PCI resets.
-The correct way to deal with failed adapters is to use the standard
-PCI hotplug tools to remove and replace the dead card.
-
-
-Current PPC64 Linux EEH Implementation
---------------------------------------
-At this time, a generic EEH recovery mechanism has been implemented,
-so that individual device drivers do not need to be modified to support
-EEH recovery. This generic mechanism piggy-backs on the PCI hotplug
-infrastructure, and percolates events up through the userspace/udev
-infrastructure. Following is a detailed description of how this is
-accomplished.
-
-EEH must be enabled in the PHB's very early during the boot process,
-and if a PCI slot is hot-plugged. The former is performed by
-eeh_init() in arch/powerpc/platforms/pseries/eeh.c, and the later by
-drivers/pci/hotplug/pSeries_pci.c calling in to the eeh.c code.
-EEH must be enabled before a PCI scan of the device can proceed.
-Current Power5 hardware will not work unless EEH is enabled;
-although older Power4 can run with it disabled. Effectively,
-EEH can no longer be turned off. PCI devices *must* be
-registered with the EEH code; the EEH code needs to know about
-the I/O address ranges of the PCI device in order to detect an
-error. Given an arbitrary address, the routine
-pci_get_device_by_addr() will find the pci device associated
-with that address (if any).
-
-The default arch/powerpc/include/asm/io.h macros readb(), inb(), insb(),
-etc. include a check to see if the i/o read returned all-0xff's.
-If so, these make a call to eeh_dn_check_failure(), which in turn
-asks the firmware if the all-ff's value is the sign of a true EEH
-error. If it is not, processing continues as normal. The grand
-total number of these false alarms or "false positives" can be
-seen in /proc/ppc64/eeh (subject to change). Normally, almost
-all of these occur during boot, when the PCI bus is scanned, where
-a large number of 0xff reads are part of the bus scan procedure.
-
-If a frozen slot is detected, code in
-arch/powerpc/platforms/pseries/eeh.c will print a stack trace to
-syslog (/var/log/messages). This stack trace has proven to be very
-useful to device-driver authors for finding out at what point the EEH
-error was detected, as the error itself usually occurs slightly
-beforehand.
-
-Next, it uses the Linux kernel notifier chain/work queue mechanism to
-allow any interested parties to find out about the failure. Device
-drivers, or other parts of the kernel, can use
-eeh_register_notifier(struct notifier_block *) to find out about EEH
-events. The event will include a pointer to the pci device, the
-device node and some state info. Receivers of the event can "do as
-they wish"; the default handler will be described further in this
-section.
-
-To assist in the recovery of the device, eeh.c exports the
-following functions:
-
-rtas_set_slot_reset() -- assert the PCI #RST line for 1/8th of a second
-rtas_configure_bridge() -- ask firmware to configure any PCI bridges
- located topologically under the pci slot.
-eeh_save_bars() and eeh_restore_bars(): save and restore the PCI
- config-space info for a device and any devices under it.
-
-
-A handler for the EEH notifier_block events is implemented in
-drivers/pci/hotplug/pSeries_pci.c, called handle_eeh_events().
-It saves the device BAR's and then calls rpaphp_unconfig_pci_adapter().
-This last call causes the device driver for the card to be stopped,
-which causes uevents to go out to user space. This triggers
-user-space scripts that might issue commands such as "ifdown eth0"
-for ethernet cards, and so on. This handler then sleeps for 5 seconds,
-hoping to give the user-space scripts enough time to complete.
-It then resets the PCI card, reconfigures the device BAR's, and
-any bridges underneath. It then calls rpaphp_enable_pci_slot(),
-which restarts the device driver and triggers more user-space
-events (for example, calling "ifup eth0" for ethernet cards).
-
-
-Device Shutdown and User-Space Events
--------------------------------------
-This section documents what happens when a pci slot is unconfigured,
-focusing on how the device driver gets shut down, and on how the
-events get delivered to user-space scripts.
-
-Following is an example sequence of events that cause a device driver
-close function to be called during the first phase of an EEH reset.
-The following sequence is an example of the pcnet32 device driver.
-
- rpa_php_unconfig_pci_adapter (struct slot *) // in rpaphp_pci.c
- {
- calls
- pci_remove_bus_device (struct pci_dev *) // in /drivers/pci/remove.c
- {
- calls
- pci_destroy_dev (struct pci_dev *)
- {
- calls
- device_unregister (&dev->dev) // in /drivers/base/core.c
- {
- calls
- device_del (struct device *)
- {
- calls
- bus_remove_device() // in /drivers/base/bus.c
- {
- calls
- device_release_driver()
- {
- calls
- struct device_driver->remove() which is just
- pci_device_remove() // in /drivers/pci/pci_driver.c
- {
- calls
- struct pci_driver->remove() which is just
- pcnet32_remove_one() // in /drivers/net/pcnet32.c
- {
- calls
- unregister_netdev() // in /net/core/dev.c
- {
- calls
- dev_close() // in /net/core/dev.c
- {
- calls dev->stop();
- which is just pcnet32_close() // in pcnet32.c
- {
- which does what you wanted
- to stop the device
- }
- }
- }
- which
- frees pcnet32 device driver memory
- }
- }}}}}}
-
-
- in drivers/pci/pci_driver.c,
- struct device_driver->remove() is just pci_device_remove()
- which calls struct pci_driver->remove() which is pcnet32_remove_one()
- which calls unregister_netdev() (in net/core/dev.c)
- which calls dev_close() (in net/core/dev.c)
- which calls dev->stop() which is pcnet32_close()
- which then does the appropriate shutdown.
-
----
-Following is the analogous stack trace for events sent to user-space
-when the pci device is unconfigured.
-
-rpa_php_unconfig_pci_adapter() { // in rpaphp_pci.c
- calls
- pci_remove_bus_device (struct pci_dev *) { // in /drivers/pci/remove.c
- calls
- pci_destroy_dev (struct pci_dev *) {
- calls
- device_unregister (&dev->dev) { // in /drivers/base/core.c
- calls
- device_del(struct device * dev) { // in /drivers/base/core.c
- calls
- kobject_del() { //in /libs/kobject.c
- calls
- kobject_uevent() { // in /libs/kobject.c
- calls
- kset_uevent() { // in /lib/kobject.c
- calls
- kset->uevent_ops->uevent() // which is really just
- a call to
- dev_uevent() { // in /drivers/base/core.c
- calls
- dev->bus->uevent() which is really just a call to
- pci_uevent () { // in drivers/pci/hotplug.c
- which prints device name, etc....
- }
- }
- then kobject_uevent() sends a netlink uevent to userspace
- --> userspace uevent
- (during early boot, nobody listens to netlink events and
- kobject_uevent() executes uevent_helper[], which runs the
- event process /sbin/hotplug)
- }
- }
- kobject_del() then calls sysfs_remove_dir(), which would
- trigger any user-space daemon that was watching /sysfs,
- and notice the delete event.
-
-
-Pro's and Con's of the Current Design
--------------------------------------
-There are several issues with the current EEH software recovery design,
-which may be addressed in future revisions. But first, note that the
-big plus of the current design is that no changes need to be made to
-individual device drivers, so that the current design throws a wide net.
-The biggest negative of the design is that it potentially disturbs
-network daemons and file systems that didn't need to be disturbed.
-
--- A minor complaint is that resetting the network card causes
- user-space back-to-back ifdown/ifup burps that potentially disturb
- network daemons, that didn't need to even know that the pci
- card was being rebooted.
-
--- A more serious concern is that the same reset, for SCSI devices,
- causes havoc to mounted file systems. Scripts cannot post-facto
- unmount a file system without flushing pending buffers, but this
- is impossible, because I/O has already been stopped. Thus,
- ideally, the reset should happen at or below the block layer,
- so that the file systems are not disturbed.
-
- Reiserfs does not tolerate errors returned from the block device.
- Ext3fs seems to be tolerant, retrying reads/writes until it does
- succeed. Both have been only lightly tested in this scenario.
-
- The SCSI-generic subsystem already has built-in code for performing
- SCSI device resets, SCSI bus resets, and SCSI host-bus-adapter
- (HBA) resets. These are cascaded into a chain of attempted
- resets if a SCSI command fails. These are completely hidden
- from the block layer. It would be very natural to add an EEH
- reset into this chain of events.
-
--- If a SCSI error occurs for the root device, all is lost unless
- the sysadmin had the foresight to run /bin, /sbin, /etc, /var
- and so on, out of ramdisk/tmpfs.
-
-
-Conclusions
------------
-There's forward progress ...
-
-
--- /dev/null
+======================
+Firmware-Assisted Dump
+======================
+
+July 2011
+
+The goal of firmware-assisted dump is to enable the dump of
+a crashed system, and to do so from a fully-reset system, and
+to minimize the total elapsed time until the system is back
+in production use.
+
+- Firmware assisted dump (fadump) infrastructure is intended to replace
+ the existing phyp assisted dump.
+- Fadump uses the same firmware interfaces and memory reservation model
+ as phyp assisted dump.
+- Unlike phyp dump, fadump exports the memory dump through /proc/vmcore
+ in the ELF format in the same way as kdump. This helps us reuse the
+ kdump infrastructure for dump capture and filtering.
+- Unlike phyp dump, userspace tool does not need to refer any sysfs
+ interface while reading /proc/vmcore.
+- Unlike phyp dump, fadump allows user to release all the memory reserved
+ for dump, with a single operation of echo 1 > /sys/kernel/fadump_release_mem.
+- Once enabled through kernel boot parameter, fadump can be
+ started/stopped through /sys/kernel/fadump_registered interface (see
+ sysfs files section below) and can be easily integrated with kdump
+ service start/stop init scripts.
+
+Comparing with kdump or other strategies, firmware-assisted
+dump offers several strong, practical advantages:
+
+- Unlike kdump, the system has been reset, and loaded
+ with a fresh copy of the kernel. In particular,
+ PCI and I/O devices have been reinitialized and are
+ in a clean, consistent state.
+- Once the dump is copied out, the memory that held the dump
+ is immediately available to the running kernel. And therefore,
+ unlike kdump, fadump doesn't need a 2nd reboot to get back
+ the system to the production configuration.
+
+The above can only be accomplished by coordination with,
+and assistance from the Power firmware. The procedure is
+as follows:
+
+- The first kernel registers the sections of memory with the
+ Power firmware for dump preservation during OS initialization.
+ These registered sections of memory are reserved by the first
+ kernel during early boot.
+
+- When a system crashes, the Power firmware will save
+ the low memory (boot memory of size larger of 5% of system RAM
+ or 256MB) of RAM to the previous registered region. It will
+ also save system registers, and hardware PTE's.
+
+ NOTE:
+ The term 'boot memory' means size of the low memory chunk
+ that is required for a kernel to boot successfully when
+ booted with restricted memory. By default, the boot memory
+ size will be the larger of 5% of system RAM or 256MB.
+ Alternatively, user can also specify boot memory size
+ through boot parameter 'crashkernel=' which will override
+ the default calculated size. Use this option if default
+ boot memory size is not sufficient for second kernel to
+ boot successfully. For syntax of crashkernel= parameter,
+ refer to Documentation/admin-guide/kdump/kdump.rst. If any offset is
+ provided in crashkernel= parameter, it will be ignored
+ as fadump uses a predefined offset to reserve memory
+ for boot memory dump preservation in case of a crash.
+
+- After the low memory (boot memory) area has been saved, the
+ firmware will reset PCI and other hardware state. It will
+ *not* clear the RAM. It will then launch the bootloader, as
+ normal.
+
+- The freshly booted kernel will notice that there is a new
+ node (ibm,dump-kernel) in the device tree, indicating that
+ there is crash data available from a previous boot. During
+ the early boot OS will reserve rest of the memory above
+ boot memory size effectively booting with restricted memory
+ size. This will make sure that the second kernel will not
+ touch any of the dump memory area.
+
+- User-space tools will read /proc/vmcore to obtain the contents
+ of memory, which holds the previous crashed kernel dump in ELF
+ format. The userspace tools may copy this info to disk, or
+ network, nas, san, iscsi, etc. as desired.
+
+- Once the userspace tool is done saving dump, it will echo
+ '1' to /sys/kernel/fadump_release_mem to release the reserved
+ memory back to general use, except the memory required for
+ next firmware-assisted dump registration.
+
+ e.g.::
+
+ # echo 1 > /sys/kernel/fadump_release_mem
+
+Please note that the firmware-assisted dump feature
+is only available on Power6 and above systems with recent
+firmware versions.
+
+Implementation details:
+-----------------------
+
+During boot, a check is made to see if firmware supports
+this feature on that particular machine. If it does, then
+we check to see if an active dump is waiting for us. If yes
+then everything but boot memory size of RAM is reserved during
+early boot (See Fig. 2). This area is released once we finish
+collecting the dump from user land scripts (e.g. kdump scripts)
+that are run. If there is dump data, then the
+/sys/kernel/fadump_release_mem file is created, and the reserved
+memory is held.
+
+If there is no waiting dump data, then only the memory required
+to hold CPU state, HPTE region, boot memory dump and elfcore
+header, is usually reserved at an offset greater than boot memory
+size (see Fig. 1). This area is *not* released: this region will
+be kept permanently reserved, so that it can act as a receptacle
+for a copy of the boot memory content in addition to CPU state
+and HPTE region, in the case a crash does occur. Since this reserved
+memory area is used only after the system crash, there is no point in
+blocking this significant chunk of memory from production kernel.
+Hence, the implementation uses the Linux kernel's Contiguous Memory
+Allocator (CMA) for memory reservation if CMA is configured for kernel.
+With CMA reservation this memory will be available for applications to
+use it, while kernel is prevented from using it. With this fadump will
+still be able to capture all of the kernel memory and most of the user
+space memory except the user pages that were present in CMA region::
+
+ o Memory Reservation during first kernel
+
+ Low memory Top of memory
+ 0 boot memory size |
+ | | |<--Reserved dump area -->| |
+ V V | Permanent Reservation | V
+ +-----------+----------/ /---+---+----+-----------+----+------+
+ | | |CPU|HPTE| DUMP |ELF | |
+ +-----------+----------/ /---+---+----+-----------+----+------+
+ | ^
+ | |
+ \ /
+ -------------------------------------------
+ Boot memory content gets transferred to
+ reserved area by firmware at the time of
+ crash
+ Fig. 1
+
+ o Memory Reservation during second kernel after crash
+
+ Low memory Top of memory
+ 0 boot memory size |
+ | |<------------- Reserved dump area ----------- -->|
+ V V V
+ +-----------+----------/ /---+---+----+-----------+----+------+
+ | | |CPU|HPTE| DUMP |ELF | |
+ +-----------+----------/ /---+---+----+-----------+----+------+
+ | |
+ V V
+ Used by second /proc/vmcore
+ kernel to boot
+ Fig. 2
+
+Currently the dump will be copied from /proc/vmcore to a
+a new file upon user intervention. The dump data available through
+/proc/vmcore will be in ELF format. Hence the existing kdump
+infrastructure (kdump scripts) to save the dump works fine with
+minor modifications.
+
+The tools to examine the dump will be same as the ones
+used for kdump.
+
+How to enable firmware-assisted dump (fadump):
+----------------------------------------------
+
+1. Set config option CONFIG_FA_DUMP=y and build kernel.
+2. Boot into linux kernel with 'fadump=on' kernel cmdline option.
+ By default, fadump reserved memory will be initialized as CMA area.
+ Alternatively, user can boot linux kernel with 'fadump=nocma' to
+ prevent fadump to use CMA.
+3. Optionally, user can also set 'crashkernel=' kernel cmdline
+ to specify size of the memory to reserve for boot memory dump
+ preservation.
+
+NOTE:
+ 1. 'fadump_reserve_mem=' parameter has been deprecated. Instead
+ use 'crashkernel=' to specify size of the memory to reserve
+ for boot memory dump preservation.
+ 2. If firmware-assisted dump fails to reserve memory then it
+ will fallback to existing kdump mechanism if 'crashkernel='
+ option is set at kernel cmdline.
+ 3. if user wants to capture all of user space memory and ok with
+ reserved memory not available to production system, then
+ 'fadump=nocma' kernel parameter can be used to fallback to
+ old behaviour.
+
+Sysfs/debugfs files:
+--------------------
+
+Firmware-assisted dump feature uses sysfs file system to hold
+the control files and debugfs file to display memory reserved region.
+
+Here is the list of files under kernel sysfs:
+
+ /sys/kernel/fadump_enabled
+ This is used to display the fadump status.
+
+ - 0 = fadump is disabled
+ - 1 = fadump is enabled
+
+ This interface can be used by kdump init scripts to identify if
+ fadump is enabled in the kernel and act accordingly.
+
+ /sys/kernel/fadump_registered
+ This is used to display the fadump registration status as well
+ as to control (start/stop) the fadump registration.
+
+ - 0 = fadump is not registered.
+ - 1 = fadump is registered and ready to handle system crash.
+
+ To register fadump echo 1 > /sys/kernel/fadump_registered and
+ echo 0 > /sys/kernel/fadump_registered for un-register and stop the
+ fadump. Once the fadump is un-registered, the system crash will not
+ be handled and vmcore will not be captured. This interface can be
+ easily integrated with kdump service start/stop.
+
+ /sys/kernel/fadump_release_mem
+ This file is available only when fadump is active during
+ second kernel. This is used to release the reserved memory
+ region that are held for saving crash dump. To release the
+ reserved memory echo 1 to it::
+
+ echo 1 > /sys/kernel/fadump_release_mem
+
+ After echo 1, the content of the /sys/kernel/debug/powerpc/fadump_region
+ file will change to reflect the new memory reservations.
+
+ The existing userspace tools (kdump infrastructure) can be easily
+ enhanced to use this interface to release the memory reserved for
+ dump and continue without 2nd reboot.
+
+Here is the list of files under powerpc debugfs:
+(Assuming debugfs is mounted on /sys/kernel/debug directory.)
+
+ /sys/kernel/debug/powerpc/fadump_region
+ This file shows the reserved memory regions if fadump is
+ enabled otherwise this file is empty. The output format
+ is::
+
+ <region>: [<start>-<end>] <reserved-size> bytes, Dumped: <dump-size>
+
+ e.g.
+ Contents when fadump is registered during first kernel::
+
+ # cat /sys/kernel/debug/powerpc/fadump_region
+ CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x0
+ HPTE: [0x0000006fff0020-0x0000006fff101f] 0x1000 bytes, Dumped: 0x0
+ DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x0
+
+ Contents when fadump is active during second kernel::
+
+ # cat /sys/kernel/debug/powerpc/fadump_region
+ CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x40020
+ HPTE: [0x0000006fff0020-0x0000006fff101f] 0x1000 bytes, Dumped: 0x1000
+ DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x10000000
+ : [0x00000010000000-0x0000006ffaffff] 0x5ffb0000 bytes, Dumped: 0x5ffb0000
+
+NOTE:
+ Please refer to Documentation/filesystems/debugfs.txt on
+ how to mount the debugfs filesystem.
+
+
+TODO:
+-----
+ - Need to come up with the better approach to find out more
+ accurate boot memory size that is required for a kernel to
+ boot successfully when booted with restricted memory.
+ - The fadump implementation introduces a fadump crash info structure
+ in the scratch area before the ELF core header. The idea of introducing
+ this structure is to pass some important crash info data to the second
+ kernel which will help second kernel to populate ELF core header with
+ correct data before it gets exported through /proc/vmcore. The current
+ design implementation does not address a possibility of introducing
+ additional fields (in future) to this structure without affecting
+ compatibility. Need to come up with the better approach to address this.
+
+ The possible approaches are:
+
+ 1. Introduce version field for version tracking, bump up the version
+ whenever a new field is added to the structure in future. The version
+ field can be used to find out what fields are valid for the current
+ version of the structure.
+ 2. Reserve the area of predefined size (say PAGE_SIZE) for this
+ structure and have unused area as reserved (initialized to zero)
+ for future field additions.
+
+ The advantage of approach 1 over 2 is we don't need to reserve extra space.
+
+Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
+
+This document is based on the original documentation written for phyp
+
+assisted dump by Linas Vepstas and Manish Ahuja.
+++ /dev/null
-
- Firmware-Assisted Dump
- ------------------------
- July 2011
-
-The goal of firmware-assisted dump is to enable the dump of
-a crashed system, and to do so from a fully-reset system, and
-to minimize the total elapsed time until the system is back
-in production use.
-
-- Firmware assisted dump (fadump) infrastructure is intended to replace
- the existing phyp assisted dump.
-- Fadump uses the same firmware interfaces and memory reservation model
- as phyp assisted dump.
-- Unlike phyp dump, fadump exports the memory dump through /proc/vmcore
- in the ELF format in the same way as kdump. This helps us reuse the
- kdump infrastructure for dump capture and filtering.
-- Unlike phyp dump, userspace tool does not need to refer any sysfs
- interface while reading /proc/vmcore.
-- Unlike phyp dump, fadump allows user to release all the memory reserved
- for dump, with a single operation of echo 1 > /sys/kernel/fadump_release_mem.
-- Once enabled through kernel boot parameter, fadump can be
- started/stopped through /sys/kernel/fadump_registered interface (see
- sysfs files section below) and can be easily integrated with kdump
- service start/stop init scripts.
-
-Comparing with kdump or other strategies, firmware-assisted
-dump offers several strong, practical advantages:
-
--- Unlike kdump, the system has been reset, and loaded
- with a fresh copy of the kernel. In particular,
- PCI and I/O devices have been reinitialized and are
- in a clean, consistent state.
--- Once the dump is copied out, the memory that held the dump
- is immediately available to the running kernel. And therefore,
- unlike kdump, fadump doesn't need a 2nd reboot to get back
- the system to the production configuration.
-
-The above can only be accomplished by coordination with,
-and assistance from the Power firmware. The procedure is
-as follows:
-
--- The first kernel registers the sections of memory with the
- Power firmware for dump preservation during OS initialization.
- These registered sections of memory are reserved by the first
- kernel during early boot.
-
--- When a system crashes, the Power firmware will save
- the low memory (boot memory of size larger of 5% of system RAM
- or 256MB) of RAM to the previous registered region. It will
- also save system registers, and hardware PTE's.
-
- NOTE: The term 'boot memory' means size of the low memory chunk
- that is required for a kernel to boot successfully when
- booted with restricted memory. By default, the boot memory
- size will be the larger of 5% of system RAM or 256MB.
- Alternatively, user can also specify boot memory size
- through boot parameter 'crashkernel=' which will override
- the default calculated size. Use this option if default
- boot memory size is not sufficient for second kernel to
- boot successfully. For syntax of crashkernel= parameter,
- refer to Documentation/admin-guide/kdump/kdump.rst. If any offset is
- provided in crashkernel= parameter, it will be ignored
- as fadump uses a predefined offset to reserve memory
- for boot memory dump preservation in case of a crash.
-
--- After the low memory (boot memory) area has been saved, the
- firmware will reset PCI and other hardware state. It will
- *not* clear the RAM. It will then launch the bootloader, as
- normal.
-
--- The freshly booted kernel will notice that there is a new
- node (ibm,dump-kernel) in the device tree, indicating that
- there is crash data available from a previous boot. During
- the early boot OS will reserve rest of the memory above
- boot memory size effectively booting with restricted memory
- size. This will make sure that the second kernel will not
- touch any of the dump memory area.
-
--- User-space tools will read /proc/vmcore to obtain the contents
- of memory, which holds the previous crashed kernel dump in ELF
- format. The userspace tools may copy this info to disk, or
- network, nas, san, iscsi, etc. as desired.
-
--- Once the userspace tool is done saving dump, it will echo
- '1' to /sys/kernel/fadump_release_mem to release the reserved
- memory back to general use, except the memory required for
- next firmware-assisted dump registration.
-
- e.g.
- # echo 1 > /sys/kernel/fadump_release_mem
-
-Please note that the firmware-assisted dump feature
-is only available on Power6 and above systems with recent
-firmware versions.
-
-Implementation details:
-----------------------
-
-During boot, a check is made to see if firmware supports
-this feature on that particular machine. If it does, then
-we check to see if an active dump is waiting for us. If yes
-then everything but boot memory size of RAM is reserved during
-early boot (See Fig. 2). This area is released once we finish
-collecting the dump from user land scripts (e.g. kdump scripts)
-that are run. If there is dump data, then the
-/sys/kernel/fadump_release_mem file is created, and the reserved
-memory is held.
-
-If there is no waiting dump data, then only the memory required
-to hold CPU state, HPTE region, boot memory dump and elfcore
-header, is usually reserved at an offset greater than boot memory
-size (see Fig. 1). This area is *not* released: this region will
-be kept permanently reserved, so that it can act as a receptacle
-for a copy of the boot memory content in addition to CPU state
-and HPTE region, in the case a crash does occur. Since this reserved
-memory area is used only after the system crash, there is no point in
-blocking this significant chunk of memory from production kernel.
-Hence, the implementation uses the Linux kernel's Contiguous Memory
-Allocator (CMA) for memory reservation if CMA is configured for kernel.
-With CMA reservation this memory will be available for applications to
-use it, while kernel is prevented from using it. With this fadump will
-still be able to capture all of the kernel memory and most of the user
-space memory except the user pages that were present in CMA region.
-
- o Memory Reservation during first kernel
-
- Low memory Top of memory
- 0 boot memory size |
- | | |<--Reserved dump area -->| |
- V V | Permanent Reservation | V
- +-----------+----------/ /---+---+----+-----------+----+------+
- | | |CPU|HPTE| DUMP |ELF | |
- +-----------+----------/ /---+---+----+-----------+----+------+
- | ^
- | |
- \ /
- -------------------------------------------
- Boot memory content gets transferred to
- reserved area by firmware at the time of
- crash
- Fig. 1
-
- o Memory Reservation during second kernel after crash
-
- Low memory Top of memory
- 0 boot memory size |
- | |<------------- Reserved dump area ----------- -->|
- V V V
- +-----------+----------/ /---+---+----+-----------+----+------+
- | | |CPU|HPTE| DUMP |ELF | |
- +-----------+----------/ /---+---+----+-----------+----+------+
- | |
- V V
- Used by second /proc/vmcore
- kernel to boot
- Fig. 2
-
-Currently the dump will be copied from /proc/vmcore to a
-a new file upon user intervention. The dump data available through
-/proc/vmcore will be in ELF format. Hence the existing kdump
-infrastructure (kdump scripts) to save the dump works fine with
-minor modifications.
-
-The tools to examine the dump will be same as the ones
-used for kdump.
-
-How to enable firmware-assisted dump (fadump):
--------------------------------------
-
-1. Set config option CONFIG_FA_DUMP=y and build kernel.
-2. Boot into linux kernel with 'fadump=on' kernel cmdline option.
- By default, fadump reserved memory will be initialized as CMA area.
- Alternatively, user can boot linux kernel with 'fadump=nocma' to
- prevent fadump to use CMA.
-3. Optionally, user can also set 'crashkernel=' kernel cmdline
- to specify size of the memory to reserve for boot memory dump
- preservation.
-
-NOTE: 1. 'fadump_reserve_mem=' parameter has been deprecated. Instead
- use 'crashkernel=' to specify size of the memory to reserve
- for boot memory dump preservation.
- 2. If firmware-assisted dump fails to reserve memory then it
- will fallback to existing kdump mechanism if 'crashkernel='
- option is set at kernel cmdline.
- 3. if user wants to capture all of user space memory and ok with
- reserved memory not available to production system, then
- 'fadump=nocma' kernel parameter can be used to fallback to
- old behaviour.
-
-Sysfs/debugfs files:
-------------
-
-Firmware-assisted dump feature uses sysfs file system to hold
-the control files and debugfs file to display memory reserved region.
-
-Here is the list of files under kernel sysfs:
-
- /sys/kernel/fadump_enabled
-
- This is used to display the fadump status.
- 0 = fadump is disabled
- 1 = fadump is enabled
-
- This interface can be used by kdump init scripts to identify if
- fadump is enabled in the kernel and act accordingly.
-
- /sys/kernel/fadump_registered
-
- This is used to display the fadump registration status as well
- as to control (start/stop) the fadump registration.
- 0 = fadump is not registered.
- 1 = fadump is registered and ready to handle system crash.
-
- To register fadump echo 1 > /sys/kernel/fadump_registered and
- echo 0 > /sys/kernel/fadump_registered for un-register and stop the
- fadump. Once the fadump is un-registered, the system crash will not
- be handled and vmcore will not be captured. This interface can be
- easily integrated with kdump service start/stop.
-
- /sys/kernel/fadump_release_mem
-
- This file is available only when fadump is active during
- second kernel. This is used to release the reserved memory
- region that are held for saving crash dump. To release the
- reserved memory echo 1 to it:
-
- echo 1 > /sys/kernel/fadump_release_mem
-
- After echo 1, the content of the /sys/kernel/debug/powerpc/fadump_region
- file will change to reflect the new memory reservations.
-
- The existing userspace tools (kdump infrastructure) can be easily
- enhanced to use this interface to release the memory reserved for
- dump and continue without 2nd reboot.
-
-Here is the list of files under powerpc debugfs:
-(Assuming debugfs is mounted on /sys/kernel/debug directory.)
-
- /sys/kernel/debug/powerpc/fadump_region
-
- This file shows the reserved memory regions if fadump is
- enabled otherwise this file is empty. The output format
- is:
- <region>: [<start>-<end>] <reserved-size> bytes, Dumped: <dump-size>
-
- e.g.
- Contents when fadump is registered during first kernel
-
- # cat /sys/kernel/debug/powerpc/fadump_region
- CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x0
- HPTE: [0x0000006fff0020-0x0000006fff101f] 0x1000 bytes, Dumped: 0x0
- DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x0
-
- Contents when fadump is active during second kernel
-
- # cat /sys/kernel/debug/powerpc/fadump_region
- CPU : [0x0000006ffb0000-0x0000006fff001f] 0x40020 bytes, Dumped: 0x40020
- HPTE: [0x0000006fff0020-0x0000006fff101f] 0x1000 bytes, Dumped: 0x1000
- DUMP: [0x0000006fff1020-0x0000007fff101f] 0x10000000 bytes, Dumped: 0x10000000
- : [0x00000010000000-0x0000006ffaffff] 0x5ffb0000 bytes, Dumped: 0x5ffb0000
-
-NOTE: Please refer to Documentation/filesystems/debugfs.txt on
- how to mount the debugfs filesystem.
-
-
-TODO:
------
- o Need to come up with the better approach to find out more
- accurate boot memory size that is required for a kernel to
- boot successfully when booted with restricted memory.
- o The fadump implementation introduces a fadump crash info structure
- in the scratch area before the ELF core header. The idea of introducing
- this structure is to pass some important crash info data to the second
- kernel which will help second kernel to populate ELF core header with
- correct data before it gets exported through /proc/vmcore. The current
- design implementation does not address a possibility of introducing
- additional fields (in future) to this structure without affecting
- compatibility. Need to come up with the better approach to address this.
- The possible approaches are:
- 1. Introduce version field for version tracking, bump up the version
- whenever a new field is added to the structure in future. The version
- field can be used to find out what fields are valid for the current
- version of the structure.
- 2. Reserve the area of predefined size (say PAGE_SIZE) for this
- structure and have unused area as reserved (initialized to zero)
- for future field additions.
- The advantage of approach 1 over 2 is we don't need to reserve extra space.
----
-Author: Mahesh Salgaonkar <mahesh@linux.vnet.ibm.com>
-This document is based on the original documentation written for phyp
-assisted dump by Linas Vepstas and Manish Ahuja.
--- /dev/null
+===============================================================
+HVCS IBM "Hypervisor Virtual Console Server" Installation Guide
+===============================================================
+
+for Linux Kernel 2.6.4+
+
+Copyright (C) 2004 IBM Corporation
+
+.. ===========================================================================
+.. NOTE:Eight space tabs are the optimum editor setting for reading this file.
+.. ===========================================================================
+
+
+Author(s): Ryan S. Arnold <rsa@us.ibm.com>
+
+Date Created: March, 02, 2004
+Last Changed: August, 24, 2004
+
+.. Table of contents:
+
+ 1. Driver Introduction:
+ 2. System Requirements
+ 3. Build Options:
+ 3.1 Built-in:
+ 3.2 Module:
+ 4. Installation:
+ 5. Connection:
+ 6. Disconnection:
+ 7. Configuration:
+ 8. Questions & Answers:
+ 9. Reporting Bugs:
+
+1. Driver Introduction:
+=======================
+
+This is the device driver for the IBM Hypervisor Virtual Console Server,
+"hvcs". The IBM hvcs provides a tty driver interface to allow Linux user
+space applications access to the system consoles of logically partitioned
+operating systems (Linux and AIX) running on the same partitioned Power5
+ppc64 system. Physical hardware consoles per partition are not practical
+on this hardware so system consoles are accessed by this driver using
+firmware interfaces to virtual terminal devices.
+
+2. System Requirements:
+=======================
+
+This device driver was written using 2.6.4 Linux kernel APIs and will only
+build and run on kernels of this version or later.
+
+This driver was written to operate solely on IBM Power5 ppc64 hardware
+though some care was taken to abstract the architecture dependent firmware
+calls from the driver code.
+
+Sysfs must be mounted on the system so that the user can determine which
+major and minor numbers are associated with each vty-server. Directions
+for sysfs mounting are outside the scope of this document.
+
+3. Build Options:
+=================
+
+The hvcs driver registers itself as a tty driver. The tty layer
+dynamically allocates a block of major and minor numbers in a quantity
+requested by the registering driver. The hvcs driver asks the tty layer
+for 64 of these major/minor numbers by default to use for hvcs device node
+entries.
+
+If the default number of device entries is adequate then this driver can be
+built into the kernel. If not, the default can be over-ridden by inserting
+the driver as a module with insmod parameters.
+
+3.1 Built-in:
+-------------
+
+The following menuconfig example demonstrates selecting to build this
+driver into the kernel::
+
+ Device Drivers --->
+ Character devices --->
+ <*> IBM Hypervisor Virtual Console Server Support
+
+Begin the kernel make process.
+
+3.2 Module:
+-----------
+
+The following menuconfig example demonstrates selecting to build this
+driver as a kernel module::
+
+ Device Drivers --->
+ Character devices --->
+ <M> IBM Hypervisor Virtual Console Server Support
+
+The make process will build the following kernel modules:
+
+ - hvcs.ko
+ - hvcserver.ko
+
+To insert the module with the default allocation execute the following
+commands in the order they appear::
+
+ insmod hvcserver.ko
+ insmod hvcs.ko
+
+The hvcserver module contains architecture specific firmware calls and must
+be inserted first, otherwise the hvcs module will not find some of the
+symbols it expects.
+
+To override the default use an insmod parameter as follows (requesting 4
+tty devices as an example)::
+
+ insmod hvcs.ko hvcs_parm_num_devs=4
+
+There is a maximum number of dev entries that can be specified on insmod.
+We think that 1024 is currently a decent maximum number of server adapters
+to allow. This can always be changed by modifying the constant in the
+source file before building.
+
+NOTE: The length of time it takes to insmod the driver seems to be related
+to the number of tty interfaces the registering driver requests.
+
+In order to remove the driver module execute the following command::
+
+ rmmod hvcs.ko
+
+The recommended method for installing hvcs as a module is to use depmod to
+build a current modules.dep file in /lib/modules/`uname -r` and then
+execute::
+
+ modprobe hvcs hvcs_parm_num_devs=4
+
+The modules.dep file indicates that hvcserver.ko needs to be inserted
+before hvcs.ko and modprobe uses this file to smartly insert the modules in
+the proper order.
+
+The following modprobe command is used to remove hvcs and hvcserver in the
+proper order::
+
+ modprobe -r hvcs
+
+4. Installation:
+================
+
+The tty layer creates sysfs entries which contain the major and minor
+numbers allocated for the hvcs driver. The following snippet of "tree"
+output of the sysfs directory shows where these numbers are presented::
+
+ sys/
+ |-- *other sysfs base dirs*
+ |
+ |-- class
+ | |-- *other classes of devices*
+ | |
+ | `-- tty
+ | |-- *other tty devices*
+ | |
+ | |-- hvcs0
+ | | `-- dev
+ | |-- hvcs1
+ | | `-- dev
+ | |-- hvcs2
+ | | `-- dev
+ | |-- hvcs3
+ | | `-- dev
+ | |
+ | |-- *other tty devices*
+ |
+ |-- *other sysfs base dirs*
+
+For the above examples the following output is a result of cat'ing the
+"dev" entry in the hvcs directory::
+
+ Pow5:/sys/class/tty/hvcs0/ # cat dev
+ 254:0
+
+ Pow5:/sys/class/tty/hvcs1/ # cat dev
+ 254:1
+
+ Pow5:/sys/class/tty/hvcs2/ # cat dev
+ 254:2
+
+ Pow5:/sys/class/tty/hvcs3/ # cat dev
+ 254:3
+
+The output from reading the "dev" attribute is the char device major and
+minor numbers that the tty layer has allocated for this driver's use. Most
+systems running hvcs will already have the device entries created or udev
+will do it automatically.
+
+Given the example output above, to manually create a /dev/hvcs* node entry
+mknod can be used as follows::
+
+ mknod /dev/hvcs0 c 254 0
+ mknod /dev/hvcs1 c 254 1
+ mknod /dev/hvcs2 c 254 2
+ mknod /dev/hvcs3 c 254 3
+
+Using mknod to manually create the device entries makes these device nodes
+persistent. Once created they will exist prior to the driver insmod.
+
+Attempting to connect an application to /dev/hvcs* prior to insertion of
+the hvcs module will result in an error message similar to the following::
+
+ "/dev/hvcs*: No such device".
+
+NOTE: Just because there is a device node present doesn't mean that there
+is a vty-server device configured for that node.
+
+5. Connection
+=============
+
+Since this driver controls devices that provide a tty interface a user can
+interact with the device node entries using any standard tty-interactive
+method (e.g. "cat", "dd", "echo"). The intent of this driver however, is
+to provide real time console interaction with a Linux partition's console,
+which requires the use of applications that provide bi-directional,
+interactive I/O with a tty device.
+
+Applications (e.g. "minicom" and "screen") that act as terminal emulators
+or perform terminal type control sequence conversion on the data being
+passed through them are NOT acceptable for providing interactive console
+I/O. These programs often emulate antiquated terminal types (vt100 and
+ANSI) and expect inbound data to take the form of one of these supported
+terminal types but they either do not convert, or do not _adequately_
+convert, outbound data into the terminal type of the terminal which invoked
+them (though screen makes an attempt and can apparently be configured with
+much termcap wrestling.)
+
+For this reason kermit and cu are two of the recommended applications for
+interacting with a Linux console via an hvcs device. These programs simply
+act as a conduit for data transfer to and from the tty device. They do not
+require inbound data to take the form of a particular terminal type, nor do
+they cook outbound data to a particular terminal type.
+
+In order to ensure proper functioning of console applications one must make
+sure that once connected to a /dev/hvcs console that the console's $TERM
+env variable is set to the exact terminal type of the terminal emulator
+used to launch the interactive I/O application. If one is using xterm and
+kermit to connect to /dev/hvcs0 when the console prompt becomes available
+one should "export TERM=xterm" on the console. This tells ncurses
+applications that are invoked from the console that they should output
+control sequences that xterm can understand.
+
+As a precautionary measure an hvcs user should always "exit" from their
+session before disconnecting an application such as kermit from the device
+node. If this is not done, the next user to connect to the console will
+continue using the previous user's logged in session which includes
+using the $TERM variable that the previous user supplied.
+
+Hotplug add and remove of vty-server adapters affects which /dev/hvcs* node
+is used to connect to each vty-server adapter. In order to determine which
+vty-server adapter is associated with which /dev/hvcs* node a special sysfs
+attribute has been added to each vty-server sysfs entry. This entry is
+called "index" and showing it reveals an integer that refers to the
+/dev/hvcs* entry to use to connect to that device. For instance cating the
+index attribute of vty-server adapter 30000004 shows the following::
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat index
+ 2
+
+This index of '2' means that in order to connect to vty-server adapter
+30000004 the user should interact with /dev/hvcs2.
+
+It should be noted that due to the system hotplug I/O capabilities of a
+system the /dev/hvcs* entry that interacts with a particular vty-server
+adapter is not guaranteed to remain the same across system reboots. Look
+in the Q & A section for more on this issue.
+
+6. Disconnection
+================
+
+As a security feature to prevent the delivery of stale data to an
+unintended target the Power5 system firmware disables the fetching of data
+and discards that data when a connection between a vty-server and a vty has
+been severed. As an example, when a vty-server is immediately disconnected
+from a vty following output of data to the vty the vty adapter may not have
+enough time between when it received the data interrupt and when the
+connection was severed to fetch the data from firmware before the fetch is
+disabled by firmware.
+
+When hvcs is being used to serve consoles this behavior is not a huge issue
+because the adapter stays connected for large amounts of time following
+almost all data writes. When hvcs is being used as a tty conduit to tunnel
+data between two partitions [see Q & A below] this is a huge problem
+because the standard Linux behavior when cat'ing or dd'ing data to a device
+is to open the tty, send the data, and then close the tty. If this driver
+manually terminated vty-server connections on tty close this would close
+the vty-server and vty connection before the target vty has had a chance to
+fetch the data.
+
+Additionally, disconnecting a vty-server and vty only on module removal or
+adapter removal is impractical because other vty-servers in other
+partitions may require the usage of the target vty at any time.
+
+Due to this behavioral restriction disconnection of vty-servers from the
+connected vty is a manual procedure using a write to a sysfs attribute
+outlined below, on the other hand the initial vty-server connection to a
+vty is established automatically by this driver. Manual vty-server
+connection is never required.
+
+In order to terminate the connection between a vty-server and vty the
+"vterm_state" sysfs attribute within each vty-server's sysfs entry is used.
+Reading this attribute reveals the current connection state of the
+vty-server adapter. A zero means that the vty-server is not connected to a
+vty. A one indicates that a connection is active.
+
+Writing a '0' (zero) to the vterm_state attribute will disconnect the VTERM
+connection between the vty-server and target vty ONLY if the vterm_state
+previously read '1'. The write directive is ignored if the vterm_state
+read '0' or if any value other than '0' was written to the vterm_state
+attribute. The following example will show the method used for verifying
+the vty-server connection status and disconnecting a vty-server connection::
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat vterm_state
+ 1
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # echo 0 > vterm_state
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat vterm_state
+ 0
+
+All vty-server connections are automatically terminated when the device is
+hotplug removed and when the module is removed.
+
+7. Configuration
+================
+
+Each vty-server has a sysfs entry in the /sys/devices/vio directory, which
+is symlinked in several other sysfs tree directories, notably under the
+hvcs driver entry, which looks like the following example::
+
+ Pow5:/sys/bus/vio/drivers/hvcs # ls
+ . .. 30000003 30000004 rescan
+
+By design, firmware notifies the hvcs driver of vty-server lifetimes and
+partner vty removals but not the addition of partner vtys. Since an HMC
+Super Admin can add partner info dynamically we have provided the hvcs
+driver sysfs directory with the "rescan" update attribute which will query
+firmware and update the partner info for all the vty-servers that this
+driver manages. Writing a '1' to the attribute triggers the update. An
+explicit example follows:
+
+ Pow5:/sys/bus/vio/drivers/hvcs # echo 1 > rescan
+
+Reading the attribute will indicate a state of '1' or '0'. A one indicates
+that an update is in process. A zero indicates that an update has
+completed or was never executed.
+
+Vty-server entries in this directory are a 32 bit partition unique unit
+address that is created by firmware. An example vty-server sysfs entry
+looks like the following::
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # ls
+ . current_vty devspec name partner_vtys
+ .. index partner_clcs vterm_state
+
+Each entry is provided, by default with a "name" attribute. Reading the
+"name" attribute will reveal the device type as shown in the following
+example::
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000003 # cat name
+ vty-server
+
+Each entry is also provided, by default, with a "devspec" attribute which
+reveals the full device specification when read, as shown in the following
+example::
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat devspec
+ /vdevice/vty-server@30000004
+
+Each vty-server sysfs dir is provided with two read-only attributes that
+provide lists of easily parsed partner vty data: "partner_vtys" and
+"partner_clcs"::
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat partner_vtys
+ 30000000
+ 30000001
+ 30000002
+ 30000000
+ 30000000
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat partner_clcs
+ U5112.428.103048A-V3-C0
+ U5112.428.103048A-V3-C2
+ U5112.428.103048A-V3-C3
+ U5112.428.103048A-V4-C0
+ U5112.428.103048A-V5-C0
+
+Reading partner_vtys returns a list of partner vtys. Vty unit address
+numbering is only per-partition-unique so entries will frequently repeat.
+
+Reading partner_clcs returns a list of "converged location codes" which are
+composed of a system serial number followed by "-V*", where the '*' is the
+target partition number, and "-C*", where the '*' is the slot of the
+adapter. The first vty partner corresponds to the first clc item, the
+second vty partner to the second clc item, etc.
+
+A vty-server can only be connected to a single vty at a time. The entry,
+"current_vty" prints the clc of the currently selected partner vty when
+read.
+
+The current_vty can be changed by writing a valid partner clc to the entry
+as in the following example::
+
+ Pow5:/sys/bus/vio/drivers/hvcs/30000004 # echo U5112.428.10304
+ 8A-V4-C0 > current_vty
+
+Changing the current_vty when a vty-server is already connected to a vty
+does not affect the current connection. The change takes effect when the
+currently open connection is freed.
+
+Information on the "vterm_state" attribute was covered earlier on the
+chapter entitled "disconnection".
+
+8. Questions & Answers:
+=======================
+
+Q: What are the security concerns involving hvcs?
+
+A: There are three main security concerns:
+
+ 1. The creator of the /dev/hvcs* nodes has the ability to restrict
+ the access of the device entries to certain users or groups. It
+ may be best to create a special hvcs group privilege for providing
+ access to system consoles.
+
+ 2. To provide network security when grabbing the console it is
+ suggested that the user connect to the console hosting partition
+ using a secure method, such as SSH or sit at a hardware console.
+
+ 3. Make sure to exit the user session when done with a console or
+ the next vty-server connection (which may be from another
+ partition) will experience the previously logged in session.
+
+---------------------------------------------------------------------------
+
+Q: How do I multiplex a console that I grab through hvcs so that other
+people can see it:
+
+A: You can use "screen" to directly connect to the /dev/hvcs* device and
+setup a session on your machine with the console group privileges. As
+pointed out earlier by default screen doesn't provide the termcap settings
+for most terminal emulators to provide adequate character conversion from
+term type "screen" to others. This means that curses based programs may
+not display properly in screen sessions.
+
+---------------------------------------------------------------------------
+
+Q: Why are the colors all messed up?
+Q: Why are the control characters acting strange or not working?
+Q: Why is the console output all strange and unintelligible?
+
+A: Please see the preceding section on "Connection" for a discussion of how
+applications can affect the display of character control sequences.
+Additionally, just because you logged into the console using and xterm
+doesn't mean someone else didn't log into the console with the HMC console
+(vt320) before you and leave the session logged in. The best thing to do
+is to export TERM to the terminal type of your terminal emulator when you
+get the console. Additionally make sure to "exit" the console before you
+disconnect from the console. This will ensure that the next user gets
+their own TERM type set when they login.
+
+---------------------------------------------------------------------------
+
+Q: When I try to CONNECT kermit to an hvcs device I get:
+"Sorry, can't open connection: /dev/hvcs*"What is happening?
+
+A: Some other Power5 console mechanism has a connection to the vty and
+isn't giving it up. You can try to force disconnect the consoles from the
+HMC by right clicking on the partition and then selecting "close terminal".
+Otherwise you have to hunt down the people who have console authority. It
+is possible that you already have the console open using another kermit
+session and just forgot about it. Please review the console options for
+Power5 systems to determine the many ways a system console can be held.
+
+OR
+
+A: Another user may not have a connectivity method currently attached to a
+/dev/hvcs device but the vterm_state may reveal that they still have the
+vty-server connection established. They need to free this using the method
+outlined in the section on "Disconnection" in order for others to connect
+to the target vty.
+
+OR
+
+A: The user profile you are using to execute kermit probably doesn't have
+permissions to use the /dev/hvcs* device.
+
+OR
+
+A: You probably haven't inserted the hvcs.ko module yet but the /dev/hvcs*
+entry still exists (on systems without udev).
+
+OR
+
+A: There is not a corresponding vty-server device that maps to an existing
+/dev/hvcs* entry.
+
+---------------------------------------------------------------------------
+
+Q: When I try to CONNECT kermit to an hvcs device I get:
+"Sorry, write access to UUCP lockfile directory denied."
+
+A: The /dev/hvcs* entry you have specified doesn't exist where you said it
+does? Maybe you haven't inserted the module (on systems with udev).
+
+---------------------------------------------------------------------------
+
+Q: If I already have one Linux partition installed can I use hvcs on said
+partition to provide the console for the install of a second Linux
+partition?
+
+A: Yes granted that your are connected to the /dev/hvcs* device using
+kermit or cu or some other program that doesn't provide terminal emulation.
+
+---------------------------------------------------------------------------
+
+Q: Can I connect to more than one partition's console at a time using this
+driver?
+
+A: Yes. Of course this means that there must be more than one vty-server
+configured for this partition and each must point to a disconnected vty.
+
+---------------------------------------------------------------------------
+
+Q: Does the hvcs driver support dynamic (hotplug) addition of devices?
+
+A: Yes, if you have dlpar and hotplug enabled for your system and it has
+been built into the kernel the hvcs drivers is configured to dynamically
+handle additions of new devices and removals of unused devices.
+
+---------------------------------------------------------------------------
+
+Q: For some reason /dev/hvcs* doesn't map to the same vty-server adapter
+after a reboot. What happened?
+
+A: Assignment of vty-server adapters to /dev/hvcs* entries is always done
+in the order that the adapters are exposed. Due to hotplug capabilities of
+this driver assignment of hotplug added vty-servers may be in a different
+order than how they would be exposed on module load. Rebooting or
+reloading the module after dynamic addition may result in the /dev/hvcs*
+and vty-server coupling changing if a vty-server adapter was added in a
+slot between two other vty-server adapters. Refer to the section above
+on how to determine which vty-server goes with which /dev/hvcs* node.
+Hint; look at the sysfs "index" attribute for the vty-server.
+
+---------------------------------------------------------------------------
+
+Q: Can I use /dev/hvcs* as a conduit to another partition and use a tty
+device on that partition as the other end of the pipe?
+
+A: Yes, on Power5 platforms the hvc_console driver provides a tty interface
+for extra /dev/hvc* devices (where /dev/hvc0 is most likely the console).
+In order to get a tty conduit working between the two partitions the HMC
+Super Admin must create an additional "serial server" for the target
+partition with the HMC gui which will show up as /dev/hvc* when the target
+partition is rebooted.
+
+The HMC Super Admin then creates an additional "serial client" for the
+current partition and points this at the target partition's newly created
+"serial server" adapter (remember the slot). This shows up as an
+additional /dev/hvcs* device.
+
+Now a program on the target system can be configured to read or write to
+/dev/hvc* and another program on the current partition can be configured to
+read or write to /dev/hvcs*. Now you have a tty conduit between two
+partitions.
+
+---------------------------------------------------------------------------
+
+9. Reporting Bugs:
+==================
+
+The proper channel for reporting bugs is either through the Linux OS
+distribution company that provided your OS or by posting issues to the
+PowerPC development mailing list at:
+
+linuxppc-dev@lists.ozlabs.org
+
+This request is to provide a documented and searchable public exchange
+of the problems and solutions surrounding this driver for the benefit of
+all users.
+++ /dev/null
-===========================================================================
- HVCS
- IBM "Hypervisor Virtual Console Server" Installation Guide
- for Linux Kernel 2.6.4+
- Copyright (C) 2004 IBM Corporation
-
-===========================================================================
-NOTE:Eight space tabs are the optimum editor setting for reading this file.
-===========================================================================
-
- Author(s) : Ryan S. Arnold <rsa@us.ibm.com>
- Date Created: March, 02, 2004
- Last Changed: August, 24, 2004
-
----------------------------------------------------------------------------
-Table of contents:
-
- 1. Driver Introduction:
- 2. System Requirements
- 3. Build Options:
- 3.1 Built-in:
- 3.2 Module:
- 4. Installation:
- 5. Connection:
- 6. Disconnection:
- 7. Configuration:
- 8. Questions & Answers:
- 9. Reporting Bugs:
-
----------------------------------------------------------------------------
-1. Driver Introduction:
-
-This is the device driver for the IBM Hypervisor Virtual Console Server,
-"hvcs". The IBM hvcs provides a tty driver interface to allow Linux user
-space applications access to the system consoles of logically partitioned
-operating systems (Linux and AIX) running on the same partitioned Power5
-ppc64 system. Physical hardware consoles per partition are not practical
-on this hardware so system consoles are accessed by this driver using
-firmware interfaces to virtual terminal devices.
-
----------------------------------------------------------------------------
-2. System Requirements:
-
-This device driver was written using 2.6.4 Linux kernel APIs and will only
-build and run on kernels of this version or later.
-
-This driver was written to operate solely on IBM Power5 ppc64 hardware
-though some care was taken to abstract the architecture dependent firmware
-calls from the driver code.
-
-Sysfs must be mounted on the system so that the user can determine which
-major and minor numbers are associated with each vty-server. Directions
-for sysfs mounting are outside the scope of this document.
-
----------------------------------------------------------------------------
-3. Build Options:
-
-The hvcs driver registers itself as a tty driver. The tty layer
-dynamically allocates a block of major and minor numbers in a quantity
-requested by the registering driver. The hvcs driver asks the tty layer
-for 64 of these major/minor numbers by default to use for hvcs device node
-entries.
-
-If the default number of device entries is adequate then this driver can be
-built into the kernel. If not, the default can be over-ridden by inserting
-the driver as a module with insmod parameters.
-
----------------------------------------------------------------------------
-3.1 Built-in:
-
-The following menuconfig example demonstrates selecting to build this
-driver into the kernel.
-
- Device Drivers --->
- Character devices --->
- <*> IBM Hypervisor Virtual Console Server Support
-
-Begin the kernel make process.
-
----------------------------------------------------------------------------
-3.2 Module:
-
-The following menuconfig example demonstrates selecting to build this
-driver as a kernel module.
-
- Device Drivers --->
- Character devices --->
- <M> IBM Hypervisor Virtual Console Server Support
-
-The make process will build the following kernel modules:
-
- hvcs.ko
- hvcserver.ko
-
-To insert the module with the default allocation execute the following
-commands in the order they appear:
-
- insmod hvcserver.ko
- insmod hvcs.ko
-
-The hvcserver module contains architecture specific firmware calls and must
-be inserted first, otherwise the hvcs module will not find some of the
-symbols it expects.
-
-To override the default use an insmod parameter as follows (requesting 4
-tty devices as an example):
-
- insmod hvcs.ko hvcs_parm_num_devs=4
-
-There is a maximum number of dev entries that can be specified on insmod.
-We think that 1024 is currently a decent maximum number of server adapters
-to allow. This can always be changed by modifying the constant in the
-source file before building.
-
-NOTE: The length of time it takes to insmod the driver seems to be related
-to the number of tty interfaces the registering driver requests.
-
-In order to remove the driver module execute the following command:
-
- rmmod hvcs.ko
-
-The recommended method for installing hvcs as a module is to use depmod to
-build a current modules.dep file in /lib/modules/`uname -r` and then
-execute:
-
-modprobe hvcs hvcs_parm_num_devs=4
-
-The modules.dep file indicates that hvcserver.ko needs to be inserted
-before hvcs.ko and modprobe uses this file to smartly insert the modules in
-the proper order.
-
-The following modprobe command is used to remove hvcs and hvcserver in the
-proper order:
-
-modprobe -r hvcs
-
----------------------------------------------------------------------------
-4. Installation:
-
-The tty layer creates sysfs entries which contain the major and minor
-numbers allocated for the hvcs driver. The following snippet of "tree"
-output of the sysfs directory shows where these numbers are presented:
-
- sys/
- |-- *other sysfs base dirs*
- |
- |-- class
- | |-- *other classes of devices*
- | |
- | `-- tty
- | |-- *other tty devices*
- | |
- | |-- hvcs0
- | | `-- dev
- | |-- hvcs1
- | | `-- dev
- | |-- hvcs2
- | | `-- dev
- | |-- hvcs3
- | | `-- dev
- | |
- | |-- *other tty devices*
- |
- |-- *other sysfs base dirs*
-
-For the above examples the following output is a result of cat'ing the
-"dev" entry in the hvcs directory:
-
- Pow5:/sys/class/tty/hvcs0/ # cat dev
- 254:0
-
- Pow5:/sys/class/tty/hvcs1/ # cat dev
- 254:1
-
- Pow5:/sys/class/tty/hvcs2/ # cat dev
- 254:2
-
- Pow5:/sys/class/tty/hvcs3/ # cat dev
- 254:3
-
-The output from reading the "dev" attribute is the char device major and
-minor numbers that the tty layer has allocated for this driver's use. Most
-systems running hvcs will already have the device entries created or udev
-will do it automatically.
-
-Given the example output above, to manually create a /dev/hvcs* node entry
-mknod can be used as follows:
-
- mknod /dev/hvcs0 c 254 0
- mknod /dev/hvcs1 c 254 1
- mknod /dev/hvcs2 c 254 2
- mknod /dev/hvcs3 c 254 3
-
-Using mknod to manually create the device entries makes these device nodes
-persistent. Once created they will exist prior to the driver insmod.
-
-Attempting to connect an application to /dev/hvcs* prior to insertion of
-the hvcs module will result in an error message similar to the following:
-
- "/dev/hvcs*: No such device".
-
-NOTE: Just because there is a device node present doesn't mean that there
-is a vty-server device configured for that node.
-
----------------------------------------------------------------------------
-5. Connection
-
-Since this driver controls devices that provide a tty interface a user can
-interact with the device node entries using any standard tty-interactive
-method (e.g. "cat", "dd", "echo"). The intent of this driver however, is
-to provide real time console interaction with a Linux partition's console,
-which requires the use of applications that provide bi-directional,
-interactive I/O with a tty device.
-
-Applications (e.g. "minicom" and "screen") that act as terminal emulators
-or perform terminal type control sequence conversion on the data being
-passed through them are NOT acceptable for providing interactive console
-I/O. These programs often emulate antiquated terminal types (vt100 and
-ANSI) and expect inbound data to take the form of one of these supported
-terminal types but they either do not convert, or do not _adequately_
-convert, outbound data into the terminal type of the terminal which invoked
-them (though screen makes an attempt and can apparently be configured with
-much termcap wrestling.)
-
-For this reason kermit and cu are two of the recommended applications for
-interacting with a Linux console via an hvcs device. These programs simply
-act as a conduit for data transfer to and from the tty device. They do not
-require inbound data to take the form of a particular terminal type, nor do
-they cook outbound data to a particular terminal type.
-
-In order to ensure proper functioning of console applications one must make
-sure that once connected to a /dev/hvcs console that the console's $TERM
-env variable is set to the exact terminal type of the terminal emulator
-used to launch the interactive I/O application. If one is using xterm and
-kermit to connect to /dev/hvcs0 when the console prompt becomes available
-one should "export TERM=xterm" on the console. This tells ncurses
-applications that are invoked from the console that they should output
-control sequences that xterm can understand.
-
-As a precautionary measure an hvcs user should always "exit" from their
-session before disconnecting an application such as kermit from the device
-node. If this is not done, the next user to connect to the console will
-continue using the previous user's logged in session which includes
-using the $TERM variable that the previous user supplied.
-
-Hotplug add and remove of vty-server adapters affects which /dev/hvcs* node
-is used to connect to each vty-server adapter. In order to determine which
-vty-server adapter is associated with which /dev/hvcs* node a special sysfs
-attribute has been added to each vty-server sysfs entry. This entry is
-called "index" and showing it reveals an integer that refers to the
-/dev/hvcs* entry to use to connect to that device. For instance cating the
-index attribute of vty-server adapter 30000004 shows the following.
-
- Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat index
- 2
-
-This index of '2' means that in order to connect to vty-server adapter
-30000004 the user should interact with /dev/hvcs2.
-
-It should be noted that due to the system hotplug I/O capabilities of a
-system the /dev/hvcs* entry that interacts with a particular vty-server
-adapter is not guaranteed to remain the same across system reboots. Look
-in the Q & A section for more on this issue.
-
----------------------------------------------------------------------------
-6. Disconnection
-
-As a security feature to prevent the delivery of stale data to an
-unintended target the Power5 system firmware disables the fetching of data
-and discards that data when a connection between a vty-server and a vty has
-been severed. As an example, when a vty-server is immediately disconnected
-from a vty following output of data to the vty the vty adapter may not have
-enough time between when it received the data interrupt and when the
-connection was severed to fetch the data from firmware before the fetch is
-disabled by firmware.
-
-When hvcs is being used to serve consoles this behavior is not a huge issue
-because the adapter stays connected for large amounts of time following
-almost all data writes. When hvcs is being used as a tty conduit to tunnel
-data between two partitions [see Q & A below] this is a huge problem
-because the standard Linux behavior when cat'ing or dd'ing data to a device
-is to open the tty, send the data, and then close the tty. If this driver
-manually terminated vty-server connections on tty close this would close
-the vty-server and vty connection before the target vty has had a chance to
-fetch the data.
-
-Additionally, disconnecting a vty-server and vty only on module removal or
-adapter removal is impractical because other vty-servers in other
-partitions may require the usage of the target vty at any time.
-
-Due to this behavioral restriction disconnection of vty-servers from the
-connected vty is a manual procedure using a write to a sysfs attribute
-outlined below, on the other hand the initial vty-server connection to a
-vty is established automatically by this driver. Manual vty-server
-connection is never required.
-
-In order to terminate the connection between a vty-server and vty the
-"vterm_state" sysfs attribute within each vty-server's sysfs entry is used.
-Reading this attribute reveals the current connection state of the
-vty-server adapter. A zero means that the vty-server is not connected to a
-vty. A one indicates that a connection is active.
-
-Writing a '0' (zero) to the vterm_state attribute will disconnect the VTERM
-connection between the vty-server and target vty ONLY if the vterm_state
-previously read '1'. The write directive is ignored if the vterm_state
-read '0' or if any value other than '0' was written to the vterm_state
-attribute. The following example will show the method used for verifying
-the vty-server connection status and disconnecting a vty-server connection.
-
- Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat vterm_state
- 1
-
- Pow5:/sys/bus/vio/drivers/hvcs/30000004 # echo 0 > vterm_state
-
- Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat vterm_state
- 0
-
-All vty-server connections are automatically terminated when the device is
-hotplug removed and when the module is removed.
-
----------------------------------------------------------------------------
-7. Configuration
-
-Each vty-server has a sysfs entry in the /sys/devices/vio directory, which
-is symlinked in several other sysfs tree directories, notably under the
-hvcs driver entry, which looks like the following example:
-
- Pow5:/sys/bus/vio/drivers/hvcs # ls
- . .. 30000003 30000004 rescan
-
-By design, firmware notifies the hvcs driver of vty-server lifetimes and
-partner vty removals but not the addition of partner vtys. Since an HMC
-Super Admin can add partner info dynamically we have provided the hvcs
-driver sysfs directory with the "rescan" update attribute which will query
-firmware and update the partner info for all the vty-servers that this
-driver manages. Writing a '1' to the attribute triggers the update. An
-explicit example follows:
-
- Pow5:/sys/bus/vio/drivers/hvcs # echo 1 > rescan
-
-Reading the attribute will indicate a state of '1' or '0'. A one indicates
-that an update is in process. A zero indicates that an update has
-completed or was never executed.
-
-Vty-server entries in this directory are a 32 bit partition unique unit
-address that is created by firmware. An example vty-server sysfs entry
-looks like the following:
-
- Pow5:/sys/bus/vio/drivers/hvcs/30000004 # ls
- . current_vty devspec name partner_vtys
- .. index partner_clcs vterm_state
-
-Each entry is provided, by default with a "name" attribute. Reading the
-"name" attribute will reveal the device type as shown in the following
-example:
-
- Pow5:/sys/bus/vio/drivers/hvcs/30000003 # cat name
- vty-server
-
-Each entry is also provided, by default, with a "devspec" attribute which
-reveals the full device specification when read, as shown in the following
-example:
-
- Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat devspec
- /vdevice/vty-server@30000004
-
-Each vty-server sysfs dir is provided with two read-only attributes that
-provide lists of easily parsed partner vty data: "partner_vtys" and
-"partner_clcs".
-
- Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat partner_vtys
- 30000000
- 30000001
- 30000002
- 30000000
- 30000000
-
- Pow5:/sys/bus/vio/drivers/hvcs/30000004 # cat partner_clcs
- U5112.428.103048A-V3-C0
- U5112.428.103048A-V3-C2
- U5112.428.103048A-V3-C3
- U5112.428.103048A-V4-C0
- U5112.428.103048A-V5-C0
-
-Reading partner_vtys returns a list of partner vtys. Vty unit address
-numbering is only per-partition-unique so entries will frequently repeat.
-
-Reading partner_clcs returns a list of "converged location codes" which are
-composed of a system serial number followed by "-V*", where the '*' is the
-target partition number, and "-C*", where the '*' is the slot of the
-adapter. The first vty partner corresponds to the first clc item, the
-second vty partner to the second clc item, etc.
-
-A vty-server can only be connected to a single vty at a time. The entry,
-"current_vty" prints the clc of the currently selected partner vty when
-read.
-
-The current_vty can be changed by writing a valid partner clc to the entry
-as in the following example:
-
- Pow5:/sys/bus/vio/drivers/hvcs/30000004 # echo U5112.428.10304
- 8A-V4-C0 > current_vty
-
-Changing the current_vty when a vty-server is already connected to a vty
-does not affect the current connection. The change takes effect when the
-currently open connection is freed.
-
-Information on the "vterm_state" attribute was covered earlier on the
-chapter entitled "disconnection".
-
----------------------------------------------------------------------------
-8. Questions & Answers:
-===========================================================================
-Q: What are the security concerns involving hvcs?
-
-A: There are three main security concerns:
-
- 1. The creator of the /dev/hvcs* nodes has the ability to restrict
- the access of the device entries to certain users or groups. It
- may be best to create a special hvcs group privilege for providing
- access to system consoles.
-
- 2. To provide network security when grabbing the console it is
- suggested that the user connect to the console hosting partition
- using a secure method, such as SSH or sit at a hardware console.
-
- 3. Make sure to exit the user session when done with a console or
- the next vty-server connection (which may be from another
- partition) will experience the previously logged in session.
-
----------------------------------------------------------------------------
-Q: How do I multiplex a console that I grab through hvcs so that other
-people can see it:
-
-A: You can use "screen" to directly connect to the /dev/hvcs* device and
-setup a session on your machine with the console group privileges. As
-pointed out earlier by default screen doesn't provide the termcap settings
-for most terminal emulators to provide adequate character conversion from
-term type "screen" to others. This means that curses based programs may
-not display properly in screen sessions.
-
----------------------------------------------------------------------------
-Q: Why are the colors all messed up?
-Q: Why are the control characters acting strange or not working?
-Q: Why is the console output all strange and unintelligible?
-
-A: Please see the preceding section on "Connection" for a discussion of how
-applications can affect the display of character control sequences.
-Additionally, just because you logged into the console using and xterm
-doesn't mean someone else didn't log into the console with the HMC console
-(vt320) before you and leave the session logged in. The best thing to do
-is to export TERM to the terminal type of your terminal emulator when you
-get the console. Additionally make sure to "exit" the console before you
-disconnect from the console. This will ensure that the next user gets
-their own TERM type set when they login.
-
----------------------------------------------------------------------------
-Q: When I try to CONNECT kermit to an hvcs device I get:
-"Sorry, can't open connection: /dev/hvcs*"What is happening?
-
-A: Some other Power5 console mechanism has a connection to the vty and
-isn't giving it up. You can try to force disconnect the consoles from the
-HMC by right clicking on the partition and then selecting "close terminal".
-Otherwise you have to hunt down the people who have console authority. It
-is possible that you already have the console open using another kermit
-session and just forgot about it. Please review the console options for
-Power5 systems to determine the many ways a system console can be held.
-
-OR
-
-A: Another user may not have a connectivity method currently attached to a
-/dev/hvcs device but the vterm_state may reveal that they still have the
-vty-server connection established. They need to free this using the method
-outlined in the section on "Disconnection" in order for others to connect
-to the target vty.
-
-OR
-
-A: The user profile you are using to execute kermit probably doesn't have
-permissions to use the /dev/hvcs* device.
-
-OR
-
-A: You probably haven't inserted the hvcs.ko module yet but the /dev/hvcs*
-entry still exists (on systems without udev).
-
-OR
-
-A: There is not a corresponding vty-server device that maps to an existing
-/dev/hvcs* entry.
-
----------------------------------------------------------------------------
-Q: When I try to CONNECT kermit to an hvcs device I get:
-"Sorry, write access to UUCP lockfile directory denied."
-
-A: The /dev/hvcs* entry you have specified doesn't exist where you said it
-does? Maybe you haven't inserted the module (on systems with udev).
-
----------------------------------------------------------------------------
-Q: If I already have one Linux partition installed can I use hvcs on said
-partition to provide the console for the install of a second Linux
-partition?
-
-A: Yes granted that your are connected to the /dev/hvcs* device using
-kermit or cu or some other program that doesn't provide terminal emulation.
-
----------------------------------------------------------------------------
-Q: Can I connect to more than one partition's console at a time using this
-driver?
-
-A: Yes. Of course this means that there must be more than one vty-server
-configured for this partition and each must point to a disconnected vty.
-
----------------------------------------------------------------------------
-Q: Does the hvcs driver support dynamic (hotplug) addition of devices?
-
-A: Yes, if you have dlpar and hotplug enabled for your system and it has
-been built into the kernel the hvcs drivers is configured to dynamically
-handle additions of new devices and removals of unused devices.
-
----------------------------------------------------------------------------
-Q: For some reason /dev/hvcs* doesn't map to the same vty-server adapter
-after a reboot. What happened?
-
-A: Assignment of vty-server adapters to /dev/hvcs* entries is always done
-in the order that the adapters are exposed. Due to hotplug capabilities of
-this driver assignment of hotplug added vty-servers may be in a different
-order than how they would be exposed on module load. Rebooting or
-reloading the module after dynamic addition may result in the /dev/hvcs*
-and vty-server coupling changing if a vty-server adapter was added in a
-slot between two other vty-server adapters. Refer to the section above
-on how to determine which vty-server goes with which /dev/hvcs* node.
-Hint; look at the sysfs "index" attribute for the vty-server.
-
----------------------------------------------------------------------------
-Q: Can I use /dev/hvcs* as a conduit to another partition and use a tty
-device on that partition as the other end of the pipe?
-
-A: Yes, on Power5 platforms the hvc_console driver provides a tty interface
-for extra /dev/hvc* devices (where /dev/hvc0 is most likely the console).
-In order to get a tty conduit working between the two partitions the HMC
-Super Admin must create an additional "serial server" for the target
-partition with the HMC gui which will show up as /dev/hvc* when the target
-partition is rebooted.
-
-The HMC Super Admin then creates an additional "serial client" for the
-current partition and points this at the target partition's newly created
-"serial server" adapter (remember the slot). This shows up as an
-additional /dev/hvcs* device.
-
-Now a program on the target system can be configured to read or write to
-/dev/hvc* and another program on the current partition can be configured to
-read or write to /dev/hvcs*. Now you have a tty conduit between two
-partitions.
-
----------------------------------------------------------------------------
-9. Reporting Bugs:
-
-The proper channel for reporting bugs is either through the Linux OS
-distribution company that provided your OS or by posting issues to the
-PowerPC development mailing list at:
-
-linuxppc-dev@lists.ozlabs.org
-
-This request is to provide a documented and searchable public exchange
-of the problems and solutions surrounding this driver for the benefit of
-all users.
--- /dev/null
+.. SPDX-License-Identifier: GPL-2.0
+
+=======
+powerpc
+=======
+
+.. toctree::
+ :maxdepth: 1
+
+ bootwrapper
+ cpu_families
+ cpu_features
+ cxl
+ cxlflash
+ dawr-power9
+ dscr
+ eeh-pci-error-recovery
+ firmware-assisted-dump
+ hvcs
+ isa-versions
+ mpc52xx
+ pci_iov_resource_on_powernv
+ pmu-ebb
+ ptrace
+ qe_firmware
+ syscall64-abi
+ transactional_memory
+
+.. only:: subproject and html
+
+ Indices
+ =======
+
+ * :ref:`genindex`
-:orphan:
-
+==========================
CPU to ISA Version Mapping
==========================
Mapping of some CPU versions to relevant ISA versions.
-========= ====================
+========= ====================================================================
CPU Architecture version
-========= ====================
+========= ====================================================================
Power9 Power ISA v3.0B
Power8 Power ISA v2.07
Power7 Power ISA v2.06
- PowerPC Virtual Environment Architecture Book II v2.01
- PowerPC Operating Environment Architecture Book III v2.01
- Plus Altivec/VMX ~= 2.03
-========= ====================
+========= ====================================================================
Key Features
PPC970 No
========== ====
-========== ====================
+========== ====================================
CPU Transactional Memory
-========== ====================
+========== ====================================
Power9 Yes (* see transactional_memory.txt)
Power8 Yes
Power7 No
Power5+ No
Power5 No
PPC970 No
-========== ====================
+========== ====================================
--- /dev/null
+=============================
+Linux 2.6.x on MPC52xx family
+=============================
+
+For the latest info, go to http://www.246tNt.com/mpc52xx/
+
+To compile/use :
+
+ - U-Boot::
+
+ # <edit Makefile to set ARCH=ppc & CROSS_COMPILE=... ( also EXTRAVERSION
+ if you wish to ).
+ # make lite5200_defconfig
+ # make uImage
+
+ then, on U-boot:
+ => tftpboot 200000 uImage
+ => tftpboot 400000 pRamdisk
+ => bootm 200000 400000
+
+ - DBug::
+
+ # <edit Makefile to set ARCH=ppc & CROSS_COMPILE=... ( also EXTRAVERSION
+ if you wish to ).
+ # make lite5200_defconfig
+ # cp your_initrd.gz arch/ppc/boot/images/ramdisk.image.gz
+ # make zImage.initrd
+ # make
+
+ then in DBug:
+ DBug> dn -i zImage.initrd.lite5200
+
+
+Some remarks:
+
+ - The port is named mpc52xxx, and config options are PPC_MPC52xx. The MGT5100
+ is not supported, and I'm not sure anyone is interesting in working on it
+ so. I didn't took 5xxx because there's apparently a lot of 5xxx that have
+ nothing to do with the MPC5200. I also included the 'MPC' for the same
+ reason.
+ - Of course, I inspired myself from the 2.4 port. If you think I forgot to
+ mention you/your company in the copyright of some code, I'll correct it
+ ASAP.
+++ /dev/null
-Linux 2.6.x on MPC52xx family
------------------------------
-
-For the latest info, go to http://www.246tNt.com/mpc52xx/
-
-To compile/use :
-
- - U-Boot:
- # <edit Makefile to set ARCH=ppc & CROSS_COMPILE=... ( also EXTRAVERSION
- if you wish to ).
- # make lite5200_defconfig
- # make uImage
-
- then, on U-boot:
- => tftpboot 200000 uImage
- => tftpboot 400000 pRamdisk
- => bootm 200000 400000
-
- - DBug:
- # <edit Makefile to set ARCH=ppc & CROSS_COMPILE=... ( also EXTRAVERSION
- if you wish to ).
- # make lite5200_defconfig
- # cp your_initrd.gz arch/ppc/boot/images/ramdisk.image.gz
- # make zImage.initrd
- # make
-
- then in DBug:
- DBug> dn -i zImage.initrd.lite5200
-
-
-Some remarks :
- - The port is named mpc52xxx, and config options are PPC_MPC52xx. The MGT5100
- is not supported, and I'm not sure anyone is interesting in working on it
- so. I didn't took 5xxx because there's apparently a lot of 5xxx that have
- nothing to do with the MPC5200. I also included the 'MPC' for the same
- reason.
- - Of course, I inspired myself from the 2.4 port. If you think I forgot to
- mention you/your company in the copyright of some code, I'll correct it
- ASAP.
--- /dev/null
+===================================================
+PCI Express I/O Virtualization Resource on Powerenv
+===================================================
+
+Wei Yang <weiyang@linux.vnet.ibm.com>
+
+Benjamin Herrenschmidt <benh@au1.ibm.com>
+
+Bjorn Helgaas <bhelgaas@google.com>
+
+26 Aug 2014
+
+This document describes the requirement from hardware for PCI MMIO resource
+sizing and assignment on PowerKVM and how generic PCI code handles this
+requirement. The first two sections describe the concepts of Partitionable
+Endpoints and the implementation on P8 (IODA2). The next two sections talks
+about considerations on enabling SRIOV on IODA2.
+
+1. Introduction to Partitionable Endpoints
+==========================================
+
+A Partitionable Endpoint (PE) is a way to group the various resources
+associated with a device or a set of devices to provide isolation between
+partitions (i.e., filtering of DMA, MSIs etc.) and to provide a mechanism
+to freeze a device that is causing errors in order to limit the possibility
+of propagation of bad data.
+
+There is thus, in HW, a table of PE states that contains a pair of "frozen"
+state bits (one for MMIO and one for DMA, they get set together but can be
+cleared independently) for each PE.
+
+When a PE is frozen, all stores in any direction are dropped and all loads
+return all 1's value. MSIs are also blocked. There's a bit more state that
+captures things like the details of the error that caused the freeze etc., but
+that's not critical.
+
+The interesting part is how the various PCIe transactions (MMIO, DMA, ...)
+are matched to their corresponding PEs.
+
+The following section provides a rough description of what we have on P8
+(IODA2). Keep in mind that this is all per PHB (PCI host bridge). Each PHB
+is a completely separate HW entity that replicates the entire logic, so has
+its own set of PEs, etc.
+
+2. Implementation of Partitionable Endpoints on P8 (IODA2)
+==========================================================
+
+P8 supports up to 256 Partitionable Endpoints per PHB.
+
+ * Inbound
+
+ For DMA, MSIs and inbound PCIe error messages, we have a table (in
+ memory but accessed in HW by the chip) that provides a direct
+ correspondence between a PCIe RID (bus/dev/fn) with a PE number.
+ We call this the RTT.
+
+ - For DMA we then provide an entire address space for each PE that can
+ contain two "windows", depending on the value of PCI address bit 59.
+ Each window can be configured to be remapped via a "TCE table" (IOMMU
+ translation table), which has various configurable characteristics
+ not described here.
+
+ - For MSIs, we have two windows in the address space (one at the top of
+ the 32-bit space and one much higher) which, via a combination of the
+ address and MSI value, will result in one of the 2048 interrupts per
+ bridge being triggered. There's a PE# in the interrupt controller
+ descriptor table as well which is compared with the PE# obtained from
+ the RTT to "authorize" the device to emit that specific interrupt.
+
+ - Error messages just use the RTT.
+
+ * Outbound. That's where the tricky part is.
+
+ Like other PCI host bridges, the Power8 IODA2 PHB supports "windows"
+ from the CPU address space to the PCI address space. There is one M32
+ window and sixteen M64 windows. They have different characteristics.
+ First what they have in common: they forward a configurable portion of
+ the CPU address space to the PCIe bus and must be naturally aligned
+ power of two in size. The rest is different:
+
+ - The M32 window:
+
+ * Is limited to 4GB in size.
+
+ * Drops the top bits of the address (above the size) and replaces
+ them with a configurable value. This is typically used to generate
+ 32-bit PCIe accesses. We configure that window at boot from FW and
+ don't touch it from Linux; it's usually set to forward a 2GB
+ portion of address space from the CPU to PCIe
+ 0x8000_0000..0xffff_ffff. (Note: The top 64KB are actually
+ reserved for MSIs but this is not a problem at this point; we just
+ need to ensure Linux doesn't assign anything there, the M32 logic
+ ignores that however and will forward in that space if we try).
+
+ * It is divided into 256 segments of equal size. A table in the chip
+ maps each segment to a PE#. That allows portions of the MMIO space
+ to be assigned to PEs on a segment granularity. For a 2GB window,
+ the segment granularity is 2GB/256 = 8MB.
+
+ Now, this is the "main" window we use in Linux today (excluding
+ SR-IOV). We basically use the trick of forcing the bridge MMIO windows
+ onto a segment alignment/granularity so that the space behind a bridge
+ can be assigned to a PE.
+
+ Ideally we would like to be able to have individual functions in PEs
+ but that would mean using a completely different address allocation
+ scheme where individual function BARs can be "grouped" to fit in one or
+ more segments.
+
+ - The M64 windows:
+
+ * Must be at least 256MB in size.
+
+ * Do not translate addresses (the address on PCIe is the same as the
+ address on the PowerBus). There is a way to also set the top 14
+ bits which are not conveyed by PowerBus but we don't use this.
+
+ * Can be configured to be segmented. When not segmented, we can
+ specify the PE# for the entire window. When segmented, a window
+ has 256 segments; however, there is no table for mapping a segment
+ to a PE#. The segment number *is* the PE#.
+
+ * Support overlaps. If an address is covered by multiple windows,
+ there's a defined ordering for which window applies.
+
+ We have code (fairly new compared to the M32 stuff) that exploits that
+ for large BARs in 64-bit space:
+
+ We configure an M64 window to cover the entire region of address space
+ that has been assigned by FW for the PHB (about 64GB, ignore the space
+ for the M32, it comes out of a different "reserve"). We configure it
+ as segmented.
+
+ Then we do the same thing as with M32, using the bridge alignment
+ trick, to match to those giant segments.
+
+ Since we cannot remap, we have two additional constraints:
+
+ - We do the PE# allocation *after* the 64-bit space has been assigned
+ because the addresses we use directly determine the PE#. We then
+ update the M32 PE# for the devices that use both 32-bit and 64-bit
+ spaces or assign the remaining PE# to 32-bit only devices.
+
+ - We cannot "group" segments in HW, so if a device ends up using more
+ than one segment, we end up with more than one PE#. There is a HW
+ mechanism to make the freeze state cascade to "companion" PEs but
+ that only works for PCIe error messages (typically used so that if
+ you freeze a switch, it freezes all its children). So we do it in
+ SW. We lose a bit of effectiveness of EEH in that case, but that's
+ the best we found. So when any of the PEs freezes, we freeze the
+ other ones for that "domain". We thus introduce the concept of
+ "master PE" which is the one used for DMA, MSIs, etc., and "secondary
+ PEs" that are used for the remaining M64 segments.
+
+ We would like to investigate using additional M64 windows in "single
+ PE" mode to overlay over specific BARs to work around some of that, for
+ example for devices with very large BARs, e.g., GPUs. It would make
+ sense, but we haven't done it yet.
+
+3. Considerations for SR-IOV on PowerKVM
+========================================
+
+ * SR-IOV Background
+
+ The PCIe SR-IOV feature allows a single Physical Function (PF) to
+ support several Virtual Functions (VFs). Registers in the PF's SR-IOV
+ Capability control the number of VFs and whether they are enabled.
+
+ When VFs are enabled, they appear in Configuration Space like normal
+ PCI devices, but the BARs in VF config space headers are unusual. For
+ a non-VF device, software uses BARs in the config space header to
+ discover the BAR sizes and assign addresses for them. For VF devices,
+ software uses VF BAR registers in the *PF* SR-IOV Capability to
+ discover sizes and assign addresses. The BARs in the VF's config space
+ header are read-only zeros.
+
+ When a VF BAR in the PF SR-IOV Capability is programmed, it sets the
+ base address for all the corresponding VF(n) BARs. For example, if the
+ PF SR-IOV Capability is programmed to enable eight VFs, and it has a
+ 1MB VF BAR0, the address in that VF BAR sets the base of an 8MB region.
+ This region is divided into eight contiguous 1MB regions, each of which
+ is a BAR0 for one of the VFs. Note that even though the VF BAR
+ describes an 8MB region, the alignment requirement is for a single VF,
+ i.e., 1MB in this example.
+
+ There are several strategies for isolating VFs in PEs:
+
+ - M32 window: There's one M32 window, and it is split into 256
+ equally-sized segments. The finest granularity possible is a 256MB
+ window with 1MB segments. VF BARs that are 1MB or larger could be
+ mapped to separate PEs in this window. Each segment can be
+ individually mapped to a PE via the lookup table, so this is quite
+ flexible, but it works best when all the VF BARs are the same size. If
+ they are different sizes, the entire window has to be small enough that
+ the segment size matches the smallest VF BAR, which means larger VF
+ BARs span several segments.
+
+ - Non-segmented M64 window: A non-segmented M64 window is mapped entirely
+ to a single PE, so it could only isolate one VF.
+
+ - Single segmented M64 windows: A segmented M64 window could be used just
+ like the M32 window, but the segments can't be individually mapped to
+ PEs (the segment number is the PE#), so there isn't as much
+ flexibility. A VF with multiple BARs would have to be in a "domain" of
+ multiple PEs, which is not as well isolated as a single PE.
+
+ - Multiple segmented M64 windows: As usual, each window is split into 256
+ equally-sized segments, and the segment number is the PE#. But if we
+ use several M64 windows, they can be set to different base addresses
+ and different segment sizes. If we have VFs that each have a 1MB BAR
+ and a 32MB BAR, we could use one M64 window to assign 1MB segments and
+ another M64 window to assign 32MB segments.
+
+ Finally, the plan to use M64 windows for SR-IOV, which will be described
+ more in the next two sections. For a given VF BAR, we need to
+ effectively reserve the entire 256 segments (256 * VF BAR size) and
+ position the VF BAR to start at the beginning of a free range of
+ segments/PEs inside that M64 window.
+
+ The goal is of course to be able to give a separate PE for each VF.
+
+ The IODA2 platform has 16 M64 windows, which are used to map MMIO
+ range to PE#. Each M64 window defines one MMIO range and this range is
+ divided into 256 segments, with each segment corresponding to one PE.
+
+ We decide to leverage this M64 window to map VFs to individual PEs, since
+ SR-IOV VF BARs are all the same size.
+
+ But doing so introduces another problem: total_VFs is usually smaller
+ than the number of M64 window segments, so if we map one VF BAR directly
+ to one M64 window, some part of the M64 window will map to another
+ device's MMIO range.
+
+ IODA supports 256 PEs, so segmented windows contain 256 segments, so if
+ total_VFs is less than 256, we have the situation in Figure 1.0, where
+ segments [total_VFs, 255] of the M64 window may map to some MMIO range on
+ other devices::
+
+ 0 1 total_VFs - 1
+ +------+------+- -+------+------+
+ | | | ... | | |
+ +------+------+- -+------+------+
+
+ VF(n) BAR space
+
+ 0 1 total_VFs - 1 255
+ +------+------+- -+------+------+- -+------+------+
+ | | | ... | | | ... | | |
+ +------+------+- -+------+------+- -+------+------+
+
+ M64 window
+
+ Figure 1.0 Direct map VF(n) BAR space
+
+ Our current solution is to allocate 256 segments even if the VF(n) BAR
+ space doesn't need that much, as shown in Figure 1.1::
+
+ 0 1 total_VFs - 1 255
+ +------+------+- -+------+------+- -+------+------+
+ | | | ... | | | ... | | |
+ +------+------+- -+------+------+- -+------+------+
+
+ VF(n) BAR space + extra
+
+ 0 1 total_VFs - 1 255
+ +------+------+- -+------+------+- -+------+------+
+ | | | ... | | | ... | | |
+ +------+------+- -+------+------+- -+------+------+
+
+ M64 window
+
+ Figure 1.1 Map VF(n) BAR space + extra
+
+ Allocating the extra space ensures that the entire M64 window will be
+ assigned to this one SR-IOV device and none of the space will be
+ available for other devices. Note that this only expands the space
+ reserved in software; there are still only total_VFs VFs, and they only
+ respond to segments [0, total_VFs - 1]. There's nothing in hardware that
+ responds to segments [total_VFs, 255].
+
+4. Implications for the Generic PCI Code
+========================================
+
+The PCIe SR-IOV spec requires that the base of the VF(n) BAR space be
+aligned to the size of an individual VF BAR.
+
+In IODA2, the MMIO address determines the PE#. If the address is in an M32
+window, we can set the PE# by updating the table that translates segments
+to PE#s. Similarly, if the address is in an unsegmented M64 window, we can
+set the PE# for the window. But if it's in a segmented M64 window, the
+segment number is the PE#.
+
+Therefore, the only way to control the PE# for a VF is to change the base
+of the VF(n) BAR space in the VF BAR. If the PCI core allocates the exact
+amount of space required for the VF(n) BAR space, the VF BAR value is fixed
+and cannot be changed.
+
+On the other hand, if the PCI core allocates additional space, the VF BAR
+value can be changed as long as the entire VF(n) BAR space remains inside
+the space allocated by the core.
+
+Ideally the segment size will be the same as an individual VF BAR size.
+Then each VF will be in its own PE. The VF BARs (and therefore the PE#s)
+are contiguous. If VF0 is in PE(x), then VF(n) is in PE(x+n). If we
+allocate 256 segments, there are (256 - numVFs) choices for the PE# of VF0.
+
+If the segment size is smaller than the VF BAR size, it will take several
+segments to cover a VF BAR, and a VF will be in several PEs. This is
+possible, but the isolation isn't as good, and it reduces the number of PE#
+choices because instead of consuming only numVFs segments, the VF(n) BAR
+space will consume (numVFs * n) segments. That means there aren't as many
+available segments for adjusting base of the VF(n) BAR space.
+++ /dev/null
-Wei Yang <weiyang@linux.vnet.ibm.com>
-Benjamin Herrenschmidt <benh@au1.ibm.com>
-Bjorn Helgaas <bhelgaas@google.com>
-26 Aug 2014
-
-This document describes the requirement from hardware for PCI MMIO resource
-sizing and assignment on PowerKVM and how generic PCI code handles this
-requirement. The first two sections describe the concepts of Partitionable
-Endpoints and the implementation on P8 (IODA2). The next two sections talks
-about considerations on enabling SRIOV on IODA2.
-
-1. Introduction to Partitionable Endpoints
-
-A Partitionable Endpoint (PE) is a way to group the various resources
-associated with a device or a set of devices to provide isolation between
-partitions (i.e., filtering of DMA, MSIs etc.) and to provide a mechanism
-to freeze a device that is causing errors in order to limit the possibility
-of propagation of bad data.
-
-There is thus, in HW, a table of PE states that contains a pair of "frozen"
-state bits (one for MMIO and one for DMA, they get set together but can be
-cleared independently) for each PE.
-
-When a PE is frozen, all stores in any direction are dropped and all loads
-return all 1's value. MSIs are also blocked. There's a bit more state that
-captures things like the details of the error that caused the freeze etc., but
-that's not critical.
-
-The interesting part is how the various PCIe transactions (MMIO, DMA, ...)
-are matched to their corresponding PEs.
-
-The following section provides a rough description of what we have on P8
-(IODA2). Keep in mind that this is all per PHB (PCI host bridge). Each PHB
-is a completely separate HW entity that replicates the entire logic, so has
-its own set of PEs, etc.
-
-2. Implementation of Partitionable Endpoints on P8 (IODA2)
-
-P8 supports up to 256 Partitionable Endpoints per PHB.
-
- * Inbound
-
- For DMA, MSIs and inbound PCIe error messages, we have a table (in
- memory but accessed in HW by the chip) that provides a direct
- correspondence between a PCIe RID (bus/dev/fn) with a PE number.
- We call this the RTT.
-
- - For DMA we then provide an entire address space for each PE that can
- contain two "windows", depending on the value of PCI address bit 59.
- Each window can be configured to be remapped via a "TCE table" (IOMMU
- translation table), which has various configurable characteristics
- not described here.
-
- - For MSIs, we have two windows in the address space (one at the top of
- the 32-bit space and one much higher) which, via a combination of the
- address and MSI value, will result in one of the 2048 interrupts per
- bridge being triggered. There's a PE# in the interrupt controller
- descriptor table as well which is compared with the PE# obtained from
- the RTT to "authorize" the device to emit that specific interrupt.
-
- - Error messages just use the RTT.
-
- * Outbound. That's where the tricky part is.
-
- Like other PCI host bridges, the Power8 IODA2 PHB supports "windows"
- from the CPU address space to the PCI address space. There is one M32
- window and sixteen M64 windows. They have different characteristics.
- First what they have in common: they forward a configurable portion of
- the CPU address space to the PCIe bus and must be naturally aligned
- power of two in size. The rest is different:
-
- - The M32 window:
-
- * Is limited to 4GB in size.
-
- * Drops the top bits of the address (above the size) and replaces
- them with a configurable value. This is typically used to generate
- 32-bit PCIe accesses. We configure that window at boot from FW and
- don't touch it from Linux; it's usually set to forward a 2GB
- portion of address space from the CPU to PCIe
- 0x8000_0000..0xffff_ffff. (Note: The top 64KB are actually
- reserved for MSIs but this is not a problem at this point; we just
- need to ensure Linux doesn't assign anything there, the M32 logic
- ignores that however and will forward in that space if we try).
-
- * It is divided into 256 segments of equal size. A table in the chip
- maps each segment to a PE#. That allows portions of the MMIO space
- to be assigned to PEs on a segment granularity. For a 2GB window,
- the segment granularity is 2GB/256 = 8MB.
-
- Now, this is the "main" window we use in Linux today (excluding
- SR-IOV). We basically use the trick of forcing the bridge MMIO windows
- onto a segment alignment/granularity so that the space behind a bridge
- can be assigned to a PE.
-
- Ideally we would like to be able to have individual functions in PEs
- but that would mean using a completely different address allocation
- scheme where individual function BARs can be "grouped" to fit in one or
- more segments.
-
- - The M64 windows:
-
- * Must be at least 256MB in size.
-
- * Do not translate addresses (the address on PCIe is the same as the
- address on the PowerBus). There is a way to also set the top 14
- bits which are not conveyed by PowerBus but we don't use this.
-
- * Can be configured to be segmented. When not segmented, we can
- specify the PE# for the entire window. When segmented, a window
- has 256 segments; however, there is no table for mapping a segment
- to a PE#. The segment number *is* the PE#.
-
- * Support overlaps. If an address is covered by multiple windows,
- there's a defined ordering for which window applies.
-
- We have code (fairly new compared to the M32 stuff) that exploits that
- for large BARs in 64-bit space:
-
- We configure an M64 window to cover the entire region of address space
- that has been assigned by FW for the PHB (about 64GB, ignore the space
- for the M32, it comes out of a different "reserve"). We configure it
- as segmented.
-
- Then we do the same thing as with M32, using the bridge alignment
- trick, to match to those giant segments.
-
- Since we cannot remap, we have two additional constraints:
-
- - We do the PE# allocation *after* the 64-bit space has been assigned
- because the addresses we use directly determine the PE#. We then
- update the M32 PE# for the devices that use both 32-bit and 64-bit
- spaces or assign the remaining PE# to 32-bit only devices.
-
- - We cannot "group" segments in HW, so if a device ends up using more
- than one segment, we end up with more than one PE#. There is a HW
- mechanism to make the freeze state cascade to "companion" PEs but
- that only works for PCIe error messages (typically used so that if
- you freeze a switch, it freezes all its children). So we do it in
- SW. We lose a bit of effectiveness of EEH in that case, but that's
- the best we found. So when any of the PEs freezes, we freeze the
- other ones for that "domain". We thus introduce the concept of
- "master PE" which is the one used for DMA, MSIs, etc., and "secondary
- PEs" that are used for the remaining M64 segments.
-
- We would like to investigate using additional M64 windows in "single
- PE" mode to overlay over specific BARs to work around some of that, for
- example for devices with very large BARs, e.g., GPUs. It would make
- sense, but we haven't done it yet.
-
-3. Considerations for SR-IOV on PowerKVM
-
- * SR-IOV Background
-
- The PCIe SR-IOV feature allows a single Physical Function (PF) to
- support several Virtual Functions (VFs). Registers in the PF's SR-IOV
- Capability control the number of VFs and whether they are enabled.
-
- When VFs are enabled, they appear in Configuration Space like normal
- PCI devices, but the BARs in VF config space headers are unusual. For
- a non-VF device, software uses BARs in the config space header to
- discover the BAR sizes and assign addresses for them. For VF devices,
- software uses VF BAR registers in the *PF* SR-IOV Capability to
- discover sizes and assign addresses. The BARs in the VF's config space
- header are read-only zeros.
-
- When a VF BAR in the PF SR-IOV Capability is programmed, it sets the
- base address for all the corresponding VF(n) BARs. For example, if the
- PF SR-IOV Capability is programmed to enable eight VFs, and it has a
- 1MB VF BAR0, the address in that VF BAR sets the base of an 8MB region.
- This region is divided into eight contiguous 1MB regions, each of which
- is a BAR0 for one of the VFs. Note that even though the VF BAR
- describes an 8MB region, the alignment requirement is for a single VF,
- i.e., 1MB in this example.
-
- There are several strategies for isolating VFs in PEs:
-
- - M32 window: There's one M32 window, and it is split into 256
- equally-sized segments. The finest granularity possible is a 256MB
- window with 1MB segments. VF BARs that are 1MB or larger could be
- mapped to separate PEs in this window. Each segment can be
- individually mapped to a PE via the lookup table, so this is quite
- flexible, but it works best when all the VF BARs are the same size. If
- they are different sizes, the entire window has to be small enough that
- the segment size matches the smallest VF BAR, which means larger VF
- BARs span several segments.
-
- - Non-segmented M64 window: A non-segmented M64 window is mapped entirely
- to a single PE, so it could only isolate one VF.
-
- - Single segmented M64 windows: A segmented M64 window could be used just
- like the M32 window, but the segments can't be individually mapped to
- PEs (the segment number is the PE#), so there isn't as much
- flexibility. A VF with multiple BARs would have to be in a "domain" of
- multiple PEs, which is not as well isolated as a single PE.
-
- - Multiple segmented M64 windows: As usual, each window is split into 256
- equally-sized segments, and the segment number is the PE#. But if we
- use several M64 windows, they can be set to different base addresses
- and different segment sizes. If we have VFs that each have a 1MB BAR
- and a 32MB BAR, we could use one M64 window to assign 1MB segments and
- another M64 window to assign 32MB segments.
-
- Finally, the plan to use M64 windows for SR-IOV, which will be described
- more in the next two sections. For a given VF BAR, we need to
- effectively reserve the entire 256 segments (256 * VF BAR size) and
- position the VF BAR to start at the beginning of a free range of
- segments/PEs inside that M64 window.
-
- The goal is of course to be able to give a separate PE for each VF.
-
- The IODA2 platform has 16 M64 windows, which are used to map MMIO
- range to PE#. Each M64 window defines one MMIO range and this range is
- divided into 256 segments, with each segment corresponding to one PE.
-
- We decide to leverage this M64 window to map VFs to individual PEs, since
- SR-IOV VF BARs are all the same size.
-
- But doing so introduces another problem: total_VFs is usually smaller
- than the number of M64 window segments, so if we map one VF BAR directly
- to one M64 window, some part of the M64 window will map to another
- device's MMIO range.
-
- IODA supports 256 PEs, so segmented windows contain 256 segments, so if
- total_VFs is less than 256, we have the situation in Figure 1.0, where
- segments [total_VFs, 255] of the M64 window may map to some MMIO range on
- other devices:
-
- 0 1 total_VFs - 1
- +------+------+- -+------+------+
- | | | ... | | |
- +------+------+- -+------+------+
-
- VF(n) BAR space
-
- 0 1 total_VFs - 1 255
- +------+------+- -+------+------+- -+------+------+
- | | | ... | | | ... | | |
- +------+------+- -+------+------+- -+------+------+
-
- M64 window
-
- Figure 1.0 Direct map VF(n) BAR space
-
- Our current solution is to allocate 256 segments even if the VF(n) BAR
- space doesn't need that much, as shown in Figure 1.1:
-
- 0 1 total_VFs - 1 255
- +------+------+- -+------+------+- -+------+------+
- | | | ... | | | ... | | |
- +------+------+- -+------+------+- -+------+------+
-
- VF(n) BAR space + extra
-
- 0 1 total_VFs - 1 255
- +------+------+- -+------+------+- -+------+------+
- | | | ... | | | ... | | |
- +------+------+- -+------+------+- -+------+------+
-
- M64 window
-
- Figure 1.1 Map VF(n) BAR space + extra
-
- Allocating the extra space ensures that the entire M64 window will be
- assigned to this one SR-IOV device and none of the space will be
- available for other devices. Note that this only expands the space
- reserved in software; there are still only total_VFs VFs, and they only
- respond to segments [0, total_VFs - 1]. There's nothing in hardware that
- responds to segments [total_VFs, 255].
-
-4. Implications for the Generic PCI Code
-
-The PCIe SR-IOV spec requires that the base of the VF(n) BAR space be
-aligned to the size of an individual VF BAR.
-
-In IODA2, the MMIO address determines the PE#. If the address is in an M32
-window, we can set the PE# by updating the table that translates segments
-to PE#s. Similarly, if the address is in an unsegmented M64 window, we can
-set the PE# for the window. But if it's in a segmented M64 window, the
-segment number is the PE#.
-
-Therefore, the only way to control the PE# for a VF is to change the base
-of the VF(n) BAR space in the VF BAR. If the PCI core allocates the exact
-amount of space required for the VF(n) BAR space, the VF BAR value is fixed
-and cannot be changed.
-
-On the other hand, if the PCI core allocates additional space, the VF BAR
-value can be changed as long as the entire VF(n) BAR space remains inside
-the space allocated by the core.
-
-Ideally the segment size will be the same as an individual VF BAR size.
-Then each VF will be in its own PE. The VF BARs (and therefore the PE#s)
-are contiguous. If VF0 is in PE(x), then VF(n) is in PE(x+n). If we
-allocate 256 segments, there are (256 - numVFs) choices for the PE# of VF0.
-
-If the segment size is smaller than the VF BAR size, it will take several
-segments to cover a VF BAR, and a VF will be in several PEs. This is
-possible, but the isolation isn't as good, and it reduces the number of PE#
-choices because instead of consuming only numVFs segments, the VF(n) BAR
-space will consume (numVFs * n) segments. That means there aren't as many
-available segments for adjusting base of the VF(n) BAR space.
--- /dev/null
+========================
+PMU Event Based Branches
+========================
+
+Event Based Branches (EBBs) are a feature which allows the hardware to
+branch directly to a specified user space address when certain events occur.
+
+The full specification is available in Power ISA v2.07:
+
+ https://www.power.org/documentation/power-isa-version-2-07/
+
+One type of event for which EBBs can be configured is PMU exceptions. This
+document describes the API for configuring the Power PMU to generate EBBs,
+using the Linux perf_events API.
+
+
+Terminology
+-----------
+
+Throughout this document we will refer to an "EBB event" or "EBB events". This
+just refers to a struct perf_event which has set the "EBB" flag in its
+attr.config. All events which can be configured on the hardware PMU are
+possible "EBB events".
+
+
+Background
+----------
+
+When a PMU EBB occurs it is delivered to the currently running process. As such
+EBBs can only sensibly be used by programs for self-monitoring.
+
+It is a feature of the perf_events API that events can be created on other
+processes, subject to standard permission checks. This is also true of EBB
+events, however unless the target process enables EBBs (via mtspr(BESCR)) no
+EBBs will ever be delivered.
+
+This makes it possible for a process to enable EBBs for itself, but not
+actually configure any events. At a later time another process can come along
+and attach an EBB event to the process, which will then cause EBBs to be
+delivered to the first process. It's not clear if this is actually useful.
+
+
+When the PMU is configured for EBBs, all PMU interrupts are delivered to the
+user process. This means once an EBB event is scheduled on the PMU, no non-EBB
+events can be configured. This means that EBB events can not be run
+concurrently with regular 'perf' commands, or any other perf events.
+
+It is however safe to run 'perf' commands on a process which is using EBBs. The
+kernel will in general schedule the EBB event, and perf will be notified that
+its events could not run.
+
+The exclusion between EBB events and regular events is implemented using the
+existing "pinned" and "exclusive" attributes of perf_events. This means EBB
+events will be given priority over other events, unless they are also pinned.
+If an EBB event and a regular event are both pinned, then whichever is enabled
+first will be scheduled and the other will be put in error state. See the
+section below titled "Enabling an EBB event" for more information.
+
+
+Creating an EBB event
+---------------------
+
+To request that an event is counted using EBB, the event code should have bit
+63 set.
+
+EBB events must be created with a particular, and restrictive, set of
+attributes - this is so that they interoperate correctly with the rest of the
+perf_events subsystem.
+
+An EBB event must be created with the "pinned" and "exclusive" attributes set.
+Note that if you are creating a group of EBB events, only the leader can have
+these attributes set.
+
+An EBB event must NOT set any of the "inherit", "sample_period", "freq" or
+"enable_on_exec" attributes.
+
+An EBB event must be attached to a task. This is specified to perf_event_open()
+by passing a pid value, typically 0 indicating the current task.
+
+All events in a group must agree on whether they want EBB. That is all events
+must request EBB, or none may request EBB.
+
+EBB events must specify the PMC they are to be counted on. This ensures
+userspace is able to reliably determine which PMC the event is scheduled on.
+
+
+Enabling an EBB event
+---------------------
+
+Once an EBB event has been successfully opened, it must be enabled with the
+perf_events API. This can be achieved either via the ioctl() interface, or the
+prctl() interface.
+
+However, due to the design of the perf_events API, enabling an event does not
+guarantee that it has been scheduled on the PMU. To ensure that the EBB event
+has been scheduled on the PMU, you must perform a read() on the event. If the
+read() returns EOF, then the event has not been scheduled and EBBs are not
+enabled.
+
+This behaviour occurs because the EBB event is pinned and exclusive. When the
+EBB event is enabled it will force all other non-pinned events off the PMU. In
+this case the enable will be successful. However if there is already an event
+pinned on the PMU then the enable will not be successful.
+
+
+Reading an EBB event
+--------------------
+
+It is possible to read() from an EBB event. However the results are
+meaningless. Because interrupts are being delivered to the user process the
+kernel is not able to count the event, and so will return a junk value.
+
+
+Closing an EBB event
+--------------------
+
+When an EBB event is finished with, you can close it using close() as for any
+regular event. If this is the last EBB event the PMU will be deconfigured and
+no further PMU EBBs will be delivered.
+
+
+EBB Handler
+-----------
+
+The EBB handler is just regular userspace code, however it must be written in
+the style of an interrupt handler. When the handler is entered all registers
+are live (possibly) and so must be saved somehow before the handler can invoke
+other code.
+
+It's up to the program how to handle this. For C programs a relatively simple
+option is to create an interrupt frame on the stack and save registers there.
+
+Fork
+----
+
+EBB events are not inherited across fork. If the child process wishes to use
+EBBs it should open a new event for itself. Similarly the EBB state in
+BESCR/EBBHR/EBBRR is cleared across fork().
+++ /dev/null
-PMU Event Based Branches
-========================
-
-Event Based Branches (EBBs) are a feature which allows the hardware to
-branch directly to a specified user space address when certain events occur.
-
-The full specification is available in Power ISA v2.07:
-
- https://www.power.org/documentation/power-isa-version-2-07/
-
-One type of event for which EBBs can be configured is PMU exceptions. This
-document describes the API for configuring the Power PMU to generate EBBs,
-using the Linux perf_events API.
-
-
-Terminology
------------
-
-Throughout this document we will refer to an "EBB event" or "EBB events". This
-just refers to a struct perf_event which has set the "EBB" flag in its
-attr.config. All events which can be configured on the hardware PMU are
-possible "EBB events".
-
-
-Background
-----------
-
-When a PMU EBB occurs it is delivered to the currently running process. As such
-EBBs can only sensibly be used by programs for self-monitoring.
-
-It is a feature of the perf_events API that events can be created on other
-processes, subject to standard permission checks. This is also true of EBB
-events, however unless the target process enables EBBs (via mtspr(BESCR)) no
-EBBs will ever be delivered.
-
-This makes it possible for a process to enable EBBs for itself, but not
-actually configure any events. At a later time another process can come along
-and attach an EBB event to the process, which will then cause EBBs to be
-delivered to the first process. It's not clear if this is actually useful.
-
-
-When the PMU is configured for EBBs, all PMU interrupts are delivered to the
-user process. This means once an EBB event is scheduled on the PMU, no non-EBB
-events can be configured. This means that EBB events can not be run
-concurrently with regular 'perf' commands, or any other perf events.
-
-It is however safe to run 'perf' commands on a process which is using EBBs. The
-kernel will in general schedule the EBB event, and perf will be notified that
-its events could not run.
-
-The exclusion between EBB events and regular events is implemented using the
-existing "pinned" and "exclusive" attributes of perf_events. This means EBB
-events will be given priority over other events, unless they are also pinned.
-If an EBB event and a regular event are both pinned, then whichever is enabled
-first will be scheduled and the other will be put in error state. See the
-section below titled "Enabling an EBB event" for more information.
-
-
-Creating an EBB event
----------------------
-
-To request that an event is counted using EBB, the event code should have bit
-63 set.
-
-EBB events must be created with a particular, and restrictive, set of
-attributes - this is so that they interoperate correctly with the rest of the
-perf_events subsystem.
-
-An EBB event must be created with the "pinned" and "exclusive" attributes set.
-Note that if you are creating a group of EBB events, only the leader can have
-these attributes set.
-
-An EBB event must NOT set any of the "inherit", "sample_period", "freq" or
-"enable_on_exec" attributes.
-
-An EBB event must be attached to a task. This is specified to perf_event_open()
-by passing a pid value, typically 0 indicating the current task.
-
-All events in a group must agree on whether they want EBB. That is all events
-must request EBB, or none may request EBB.
-
-EBB events must specify the PMC they are to be counted on. This ensures
-userspace is able to reliably determine which PMC the event is scheduled on.
-
-
-Enabling an EBB event
----------------------
-
-Once an EBB event has been successfully opened, it must be enabled with the
-perf_events API. This can be achieved either via the ioctl() interface, or the
-prctl() interface.
-
-However, due to the design of the perf_events API, enabling an event does not
-guarantee that it has been scheduled on the PMU. To ensure that the EBB event
-has been scheduled on the PMU, you must perform a read() on the event. If the
-read() returns EOF, then the event has not been scheduled and EBBs are not
-enabled.
-
-This behaviour occurs because the EBB event is pinned and exclusive. When the
-EBB event is enabled it will force all other non-pinned events off the PMU. In
-this case the enable will be successful. However if there is already an event
-pinned on the PMU then the enable will not be successful.
-
-
-Reading an EBB event
---------------------
-
-It is possible to read() from an EBB event. However the results are
-meaningless. Because interrupts are being delivered to the user process the
-kernel is not able to count the event, and so will return a junk value.
-
-
-Closing an EBB event
---------------------
-
-When an EBB event is finished with, you can close it using close() as for any
-regular event. If this is the last EBB event the PMU will be deconfigured and
-no further PMU EBBs will be delivered.
-
-
-EBB Handler
------------
-
-The EBB handler is just regular userspace code, however it must be written in
-the style of an interrupt handler. When the handler is entered all registers
-are live (possibly) and so must be saved somehow before the handler can invoke
-other code.
-
-It's up to the program how to handle this. For C programs a relatively simple
-option is to create an interrupt frame on the stack and save registers there.
-
-Fork
-----
-
-EBB events are not inherited across fork. If the child process wishes to use
-EBBs it should open a new event for itself. Similarly the EBB state in
-BESCR/EBBHR/EBBRR is cleared across fork().
--- /dev/null
+======
+Ptrace
+======
+
+GDB intends to support the following hardware debug features of BookE
+processors:
+
+4 hardware breakpoints (IAC)
+2 hardware watchpoints (read, write and read-write) (DAC)
+2 value conditions for the hardware watchpoints (DVC)
+
+For that, we need to extend ptrace so that GDB can query and set these
+resources. Since we're extending, we're trying to create an interface
+that's extendable and that covers both BookE and server processors, so
+that GDB doesn't need to special-case each of them. We added the
+following 3 new ptrace requests.
+
+1. PTRACE_PPC_GETHWDEBUGINFO
+============================
+
+Query for GDB to discover the hardware debug features. The main info to
+be returned here is the minimum alignment for the hardware watchpoints.
+BookE processors don't have restrictions here, but server processors have
+an 8-byte alignment restriction for hardware watchpoints. We'd like to avoid
+adding special cases to GDB based on what it sees in AUXV.
+
+Since we're at it, we added other useful info that the kernel can return to
+GDB: this query will return the number of hardware breakpoints, hardware
+watchpoints and whether it supports a range of addresses and a condition.
+The query will fill the following structure provided by the requesting process::
+
+ struct ppc_debug_info {
+ unit32_t version;
+ unit32_t num_instruction_bps;
+ unit32_t num_data_bps;
+ unit32_t num_condition_regs;
+ unit32_t data_bp_alignment;
+ unit32_t sizeof_condition; /* size of the DVC register */
+ uint64_t features; /* bitmask of the individual flags */
+ };
+
+features will have bits indicating whether there is support for::
+
+ #define PPC_DEBUG_FEATURE_INSN_BP_RANGE 0x1
+ #define PPC_DEBUG_FEATURE_INSN_BP_MASK 0x2
+ #define PPC_DEBUG_FEATURE_DATA_BP_RANGE 0x4
+ #define PPC_DEBUG_FEATURE_DATA_BP_MASK 0x8
+ #define PPC_DEBUG_FEATURE_DATA_BP_DAWR 0x10
+
+2. PTRACE_SETHWDEBUG
+
+Sets a hardware breakpoint or watchpoint, according to the provided structure::
+
+ struct ppc_hw_breakpoint {
+ uint32_t version;
+ #define PPC_BREAKPOINT_TRIGGER_EXECUTE 0x1
+ #define PPC_BREAKPOINT_TRIGGER_READ 0x2
+ #define PPC_BREAKPOINT_TRIGGER_WRITE 0x4
+ uint32_t trigger_type; /* only some combinations allowed */
+ #define PPC_BREAKPOINT_MODE_EXACT 0x0
+ #define PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE 0x1
+ #define PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE 0x2
+ #define PPC_BREAKPOINT_MODE_MASK 0x3
+ uint32_t addr_mode; /* address match mode */
+
+ #define PPC_BREAKPOINT_CONDITION_MODE 0x3
+ #define PPC_BREAKPOINT_CONDITION_NONE 0x0
+ #define PPC_BREAKPOINT_CONDITION_AND 0x1
+ #define PPC_BREAKPOINT_CONDITION_EXACT 0x1 /* different name for the same thing as above */
+ #define PPC_BREAKPOINT_CONDITION_OR 0x2
+ #define PPC_BREAKPOINT_CONDITION_AND_OR 0x3
+ #define PPC_BREAKPOINT_CONDITION_BE_ALL 0x00ff0000 /* byte enable bits */
+ #define PPC_BREAKPOINT_CONDITION_BE(n) (1<<((n)+16))
+ uint32_t condition_mode; /* break/watchpoint condition flags */
+
+ uint64_t addr;
+ uint64_t addr2;
+ uint64_t condition_value;
+ };
+
+A request specifies one event, not necessarily just one register to be set.
+For instance, if the request is for a watchpoint with a condition, both the
+DAC and DVC registers will be set in the same request.
+
+With this GDB can ask for all kinds of hardware breakpoints and watchpoints
+that the BookE supports. COMEFROM breakpoints available in server processors
+are not contemplated, but that is out of the scope of this work.
+
+ptrace will return an integer (handle) uniquely identifying the breakpoint or
+watchpoint just created. This integer will be used in the PTRACE_DELHWDEBUG
+request to ask for its removal. Return -ENOSPC if the requested breakpoint
+can't be allocated on the registers.
+
+Some examples of using the structure to:
+
+- set a breakpoint in the first breakpoint register::
+
+ p.version = PPC_DEBUG_CURRENT_VERSION;
+ p.trigger_type = PPC_BREAKPOINT_TRIGGER_EXECUTE;
+ p.addr_mode = PPC_BREAKPOINT_MODE_EXACT;
+ p.condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
+ p.addr = (uint64_t) address;
+ p.addr2 = 0;
+ p.condition_value = 0;
+
+- set a watchpoint which triggers on reads in the second watchpoint register::
+
+ p.version = PPC_DEBUG_CURRENT_VERSION;
+ p.trigger_type = PPC_BREAKPOINT_TRIGGER_READ;
+ p.addr_mode = PPC_BREAKPOINT_MODE_EXACT;
+ p.condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
+ p.addr = (uint64_t) address;
+ p.addr2 = 0;
+ p.condition_value = 0;
+
+- set a watchpoint which triggers only with a specific value::
+
+ p.version = PPC_DEBUG_CURRENT_VERSION;
+ p.trigger_type = PPC_BREAKPOINT_TRIGGER_READ;
+ p.addr_mode = PPC_BREAKPOINT_MODE_EXACT;
+ p.condition_mode = PPC_BREAKPOINT_CONDITION_AND | PPC_BREAKPOINT_CONDITION_BE_ALL;
+ p.addr = (uint64_t) address;
+ p.addr2 = 0;
+ p.condition_value = (uint64_t) condition;
+
+- set a ranged hardware breakpoint::
+
+ p.version = PPC_DEBUG_CURRENT_VERSION;
+ p.trigger_type = PPC_BREAKPOINT_TRIGGER_EXECUTE;
+ p.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
+ p.condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
+ p.addr = (uint64_t) begin_range;
+ p.addr2 = (uint64_t) end_range;
+ p.condition_value = 0;
+
+- set a watchpoint in server processors (BookS)::
+
+ p.version = 1;
+ p.trigger_type = PPC_BREAKPOINT_TRIGGER_RW;
+ p.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
+ or
+ p.addr_mode = PPC_BREAKPOINT_MODE_EXACT;
+
+ p.condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
+ p.addr = (uint64_t) begin_range;
+ /* For PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE addr2 needs to be specified, where
+ * addr2 - addr <= 8 Bytes.
+ */
+ p.addr2 = (uint64_t) end_range;
+ p.condition_value = 0;
+
+3. PTRACE_DELHWDEBUG
+
+Takes an integer which identifies an existing breakpoint or watchpoint
+(i.e., the value returned from PTRACE_SETHWDEBUG), and deletes the
+corresponding breakpoint or watchpoint..
+++ /dev/null
-GDB intends to support the following hardware debug features of BookE
-processors:
-
-4 hardware breakpoints (IAC)
-2 hardware watchpoints (read, write and read-write) (DAC)
-2 value conditions for the hardware watchpoints (DVC)
-
-For that, we need to extend ptrace so that GDB can query and set these
-resources. Since we're extending, we're trying to create an interface
-that's extendable and that covers both BookE and server processors, so
-that GDB doesn't need to special-case each of them. We added the
-following 3 new ptrace requests.
-
-1. PTRACE_PPC_GETHWDEBUGINFO
-
-Query for GDB to discover the hardware debug features. The main info to
-be returned here is the minimum alignment for the hardware watchpoints.
-BookE processors don't have restrictions here, but server processors have
-an 8-byte alignment restriction for hardware watchpoints. We'd like to avoid
-adding special cases to GDB based on what it sees in AUXV.
-
-Since we're at it, we added other useful info that the kernel can return to
-GDB: this query will return the number of hardware breakpoints, hardware
-watchpoints and whether it supports a range of addresses and a condition.
-The query will fill the following structure provided by the requesting process:
-
-struct ppc_debug_info {
- unit32_t version;
- unit32_t num_instruction_bps;
- unit32_t num_data_bps;
- unit32_t num_condition_regs;
- unit32_t data_bp_alignment;
- unit32_t sizeof_condition; /* size of the DVC register */
- uint64_t features; /* bitmask of the individual flags */
-};
-
-features will have bits indicating whether there is support for:
-
-#define PPC_DEBUG_FEATURE_INSN_BP_RANGE 0x1
-#define PPC_DEBUG_FEATURE_INSN_BP_MASK 0x2
-#define PPC_DEBUG_FEATURE_DATA_BP_RANGE 0x4
-#define PPC_DEBUG_FEATURE_DATA_BP_MASK 0x8
-#define PPC_DEBUG_FEATURE_DATA_BP_DAWR 0x10
-
-2. PTRACE_SETHWDEBUG
-
-Sets a hardware breakpoint or watchpoint, according to the provided structure:
-
-struct ppc_hw_breakpoint {
- uint32_t version;
-#define PPC_BREAKPOINT_TRIGGER_EXECUTE 0x1
-#define PPC_BREAKPOINT_TRIGGER_READ 0x2
-#define PPC_BREAKPOINT_TRIGGER_WRITE 0x4
- uint32_t trigger_type; /* only some combinations allowed */
-#define PPC_BREAKPOINT_MODE_EXACT 0x0
-#define PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE 0x1
-#define PPC_BREAKPOINT_MODE_RANGE_EXCLUSIVE 0x2
-#define PPC_BREAKPOINT_MODE_MASK 0x3
- uint32_t addr_mode; /* address match mode */
-
-#define PPC_BREAKPOINT_CONDITION_MODE 0x3
-#define PPC_BREAKPOINT_CONDITION_NONE 0x0
-#define PPC_BREAKPOINT_CONDITION_AND 0x1
-#define PPC_BREAKPOINT_CONDITION_EXACT 0x1 /* different name for the same thing as above */
-#define PPC_BREAKPOINT_CONDITION_OR 0x2
-#define PPC_BREAKPOINT_CONDITION_AND_OR 0x3
-#define PPC_BREAKPOINT_CONDITION_BE_ALL 0x00ff0000 /* byte enable bits */
-#define PPC_BREAKPOINT_CONDITION_BE(n) (1<<((n)+16))
- uint32_t condition_mode; /* break/watchpoint condition flags */
-
- uint64_t addr;
- uint64_t addr2;
- uint64_t condition_value;
-};
-
-A request specifies one event, not necessarily just one register to be set.
-For instance, if the request is for a watchpoint with a condition, both the
-DAC and DVC registers will be set in the same request.
-
-With this GDB can ask for all kinds of hardware breakpoints and watchpoints
-that the BookE supports. COMEFROM breakpoints available in server processors
-are not contemplated, but that is out of the scope of this work.
-
-ptrace will return an integer (handle) uniquely identifying the breakpoint or
-watchpoint just created. This integer will be used in the PTRACE_DELHWDEBUG
-request to ask for its removal. Return -ENOSPC if the requested breakpoint
-can't be allocated on the registers.
-
-Some examples of using the structure to:
-
-- set a breakpoint in the first breakpoint register
-
- p.version = PPC_DEBUG_CURRENT_VERSION;
- p.trigger_type = PPC_BREAKPOINT_TRIGGER_EXECUTE;
- p.addr_mode = PPC_BREAKPOINT_MODE_EXACT;
- p.condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
- p.addr = (uint64_t) address;
- p.addr2 = 0;
- p.condition_value = 0;
-
-- set a watchpoint which triggers on reads in the second watchpoint register
-
- p.version = PPC_DEBUG_CURRENT_VERSION;
- p.trigger_type = PPC_BREAKPOINT_TRIGGER_READ;
- p.addr_mode = PPC_BREAKPOINT_MODE_EXACT;
- p.condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
- p.addr = (uint64_t) address;
- p.addr2 = 0;
- p.condition_value = 0;
-
-- set a watchpoint which triggers only with a specific value
-
- p.version = PPC_DEBUG_CURRENT_VERSION;
- p.trigger_type = PPC_BREAKPOINT_TRIGGER_READ;
- p.addr_mode = PPC_BREAKPOINT_MODE_EXACT;
- p.condition_mode = PPC_BREAKPOINT_CONDITION_AND | PPC_BREAKPOINT_CONDITION_BE_ALL;
- p.addr = (uint64_t) address;
- p.addr2 = 0;
- p.condition_value = (uint64_t) condition;
-
-- set a ranged hardware breakpoint
-
- p.version = PPC_DEBUG_CURRENT_VERSION;
- p.trigger_type = PPC_BREAKPOINT_TRIGGER_EXECUTE;
- p.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
- p.condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
- p.addr = (uint64_t) begin_range;
- p.addr2 = (uint64_t) end_range;
- p.condition_value = 0;
-
-- set a watchpoint in server processors (BookS)
-
- p.version = 1;
- p.trigger_type = PPC_BREAKPOINT_TRIGGER_RW;
- p.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
- or
- p.addr_mode = PPC_BREAKPOINT_MODE_EXACT;
-
- p.condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
- p.addr = (uint64_t) begin_range;
- /* For PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE addr2 needs to be specified, where
- * addr2 - addr <= 8 Bytes.
- */
- p.addr2 = (uint64_t) end_range;
- p.condition_value = 0;
-
-3. PTRACE_DELHWDEBUG
-
-Takes an integer which identifies an existing breakpoint or watchpoint
-(i.e., the value returned from PTRACE_SETHWDEBUG), and deletes the
-corresponding breakpoint or watchpoint..
--- /dev/null
+=========================================
+Freescale QUICC Engine Firmware Uploading
+=========================================
+
+(c) 2007 Timur Tabi <timur at freescale.com>,
+ Freescale Semiconductor
+
+.. Table of Contents
+
+ I - Software License for Firmware
+
+ II - Microcode Availability
+
+ III - Description and Terminology
+
+ IV - Microcode Programming Details
+
+ V - Firmware Structure Layout
+
+ VI - Sample Code for Creating Firmware Files
+
+Revision Information
+====================
+
+November 30, 2007: Rev 1.0 - Initial version
+
+I - Software License for Firmware
+=================================
+
+Each firmware file comes with its own software license. For information on
+the particular license, please see the license text that is distributed with
+the firmware.
+
+II - Microcode Availability
+===========================
+
+Firmware files are distributed through various channels. Some are available on
+http://opensource.freescale.com. For other firmware files, please contact
+your Freescale representative or your operating system vendor.
+
+III - Description and Terminology
+=================================
+
+In this document, the term 'microcode' refers to the sequence of 32-bit
+integers that compose the actual QE microcode.
+
+The term 'firmware' refers to a binary blob that contains the microcode as
+well as other data that
+
+ 1) describes the microcode's purpose
+ 2) describes how and where to upload the microcode
+ 3) specifies the values of various registers
+ 4) includes additional data for use by specific device drivers
+
+Firmware files are binary files that contain only a firmware.
+
+IV - Microcode Programming Details
+===================================
+
+The QE architecture allows for only one microcode present in I-RAM for each
+RISC processor. To replace any current microcode, a full QE reset (which
+disables the microcode) must be performed first.
+
+QE microcode is uploaded using the following procedure:
+
+1) The microcode is placed into I-RAM at a specific location, using the
+ IRAM.IADD and IRAM.IDATA registers.
+
+2) The CERCR.CIR bit is set to 0 or 1, depending on whether the firmware
+ needs split I-RAM. Split I-RAM is only meaningful for SOCs that have
+ QEs with multiple RISC processors, such as the 8360. Splitting the I-RAM
+ allows each processor to run a different microcode, effectively creating an
+ asymmetric multiprocessing (AMP) system.
+
+3) The TIBCR trap registers are loaded with the addresses of the trap handlers
+ in the microcode.
+
+4) The RSP.ECCR register is programmed with the value provided.
+
+5) If necessary, device drivers that need the virtual traps and extended mode
+ data will use them.
+
+Virtual Microcode Traps
+
+These virtual traps are conditional branches in the microcode. These are
+"soft" provisional introduced in the ROMcode in order to enable higher
+flexibility and save h/w traps If new features are activated or an issue is
+being fixed in the RAM package utilizing they should be activated. This data
+structure signals the microcode which of these virtual traps is active.
+
+This structure contains 6 words that the application should copy to some
+specific been defined. This table describes the structure::
+
+ ---------------------------------------------------------------
+ | Offset in | | Destination Offset | Size of |
+ | array | Protocol | within PRAM | Operand |
+ --------------------------------------------------------------|
+ | 0 | Ethernet | 0xF8 | 4 bytes |
+ | | interworking | | |
+ ---------------------------------------------------------------
+ | 4 | ATM | 0xF8 | 4 bytes |
+ | | interworking | | |
+ ---------------------------------------------------------------
+ | 8 | PPP | 0xF8 | 4 bytes |
+ | | interworking | | |
+ ---------------------------------------------------------------
+ | 12 | Ethernet RX | 0x22 | 1 byte |
+ | | Distributor Page | | |
+ ---------------------------------------------------------------
+ | 16 | ATM Globtal | 0x28 | 1 byte |
+ | | Params Table | | |
+ ---------------------------------------------------------------
+ | 20 | Insert Frame | 0xF8 | 4 bytes |
+ ---------------------------------------------------------------
+
+
+Extended Modes
+
+This is a double word bit array (64 bits) that defines special functionality
+which has an impact on the software drivers. Each bit has its own impact
+and has special instructions for the s/w associated with it. This structure is
+described in this table::
+
+ -----------------------------------------------------------------------
+ | Bit # | Name | Description |
+ -----------------------------------------------------------------------
+ | 0 | General | Indicates that prior to each host command |
+ | | push command | given by the application, the software must |
+ | | | assert a special host command (push command)|
+ | | | CECDR = 0x00800000. |
+ | | | CECR = 0x01c1000f. |
+ -----------------------------------------------------------------------
+ | 1 | UCC ATM | Indicates that after issuing ATM RX INIT |
+ | | RX INIT | command, the host must issue another special|
+ | | push command | command (push command) and immediately |
+ | | | following that re-issue the ATM RX INIT |
+ | | | command. (This makes the sequence of |
+ | | | initializing the ATM receiver a sequence of |
+ | | | three host commands) |
+ | | | CECDR = 0x00800000. |
+ | | | CECR = 0x01c1000f. |
+ -----------------------------------------------------------------------
+ | 2 | Add/remove | Indicates that following the specific host |
+ | | command | command: "Add/Remove entry in Hash Lookup |
+ | | validation | Table" used in Interworking setup, the user |
+ | | | must issue another command. |
+ | | | CECDR = 0xce000003. |
+ | | | CECR = 0x01c10f58. |
+ -----------------------------------------------------------------------
+ | 3 | General push | Indicates that the s/w has to initialize |
+ | | command | some pointers in the Ethernet thread pages |
+ | | | which are used when Header Compression is |
+ | | | activated. The full details of these |
+ | | | pointers is located in the software drivers.|
+ -----------------------------------------------------------------------
+ | 4 | General push | Indicates that after issuing Ethernet TX |
+ | | command | INIT command, user must issue this command |
+ | | | for each SNUM of Ethernet TX thread. |
+ | | | CECDR = 0x00800003. |
+ | | | CECR = 0x7'b{0}, 8'b{Enet TX thread SNUM}, |
+ | | | 1'b{1}, 12'b{0}, 4'b{1} |
+ -----------------------------------------------------------------------
+ | 5 - 31 | N/A | Reserved, set to zero. |
+ -----------------------------------------------------------------------
+
+V - Firmware Structure Layout
+==============================
+
+QE microcode from Freescale is typically provided as a header file. This
+header file contains macros that define the microcode binary itself as well as
+some other data used in uploading that microcode. The format of these files
+do not lend themselves to simple inclusion into other code. Hence,
+the need for a more portable format. This section defines that format.
+
+Instead of distributing a header file, the microcode and related data are
+embedded into a binary blob. This blob is passed to the qe_upload_firmware()
+function, which parses the blob and performs everything necessary to upload
+the microcode.
+
+All integers are big-endian. See the comments for function
+qe_upload_firmware() for up-to-date implementation information.
+
+This structure supports versioning, where the version of the structure is
+embedded into the structure itself. To ensure forward and backwards
+compatibility, all versions of the structure must use the same 'qe_header'
+structure at the beginning.
+
+'header' (type: struct qe_header):
+ The 'length' field is the size, in bytes, of the entire structure,
+ including all the microcode embedded in it, as well as the CRC (if
+ present).
+
+ The 'magic' field is an array of three bytes that contains the letters
+ 'Q', 'E', and 'F'. This is an identifier that indicates that this
+ structure is a QE Firmware structure.
+
+ The 'version' field is a single byte that indicates the version of this
+ structure. If the layout of the structure should ever need to be
+ changed to add support for additional types of microcode, then the
+ version number should also be changed.
+
+The 'id' field is a null-terminated string(suitable for printing) that
+identifies the firmware.
+
+The 'count' field indicates the number of 'microcode' structures. There
+must be one and only one 'microcode' structure for each RISC processor.
+Therefore, this field also represents the number of RISC processors for this
+SOC.
+
+The 'soc' structure contains the SOC numbers and revisions used to match
+the microcode to the SOC itself. Normally, the microcode loader should
+check the data in this structure with the SOC number and revisions, and
+only upload the microcode if there's a match. However, this check is not
+made on all platforms.
+
+Although it is not recommended, you can specify '0' in the soc.model
+field to skip matching SOCs altogether.
+
+The 'model' field is a 16-bit number that matches the actual SOC. The
+'major' and 'minor' fields are the major and minor revision numbers,
+respectively, of the SOC.
+
+For example, to match the 8323, revision 1.0::
+
+ soc.model = 8323
+ soc.major = 1
+ soc.minor = 0
+
+'padding' is necessary for structure alignment. This field ensures that the
+'extended_modes' field is aligned on a 64-bit boundary.
+
+'extended_modes' is a bitfield that defines special functionality which has an
+impact on the device drivers. Each bit has its own impact and has special
+instructions for the driver associated with it. This field is stored in
+the QE library and available to any driver that calles qe_get_firmware_info().
+
+'vtraps' is an array of 8 words that contain virtual trap values for each
+virtual traps. As with 'extended_modes', this field is stored in the QE
+library and available to any driver that calles qe_get_firmware_info().
+
+'microcode' (type: struct qe_microcode):
+ For each RISC processor there is one 'microcode' structure. The first
+ 'microcode' structure is for the first RISC, and so on.
+
+ The 'id' field is a null-terminated string suitable for printing that
+ identifies this particular microcode.
+
+ 'traps' is an array of 16 words that contain hardware trap values
+ for each of the 16 traps. If trap[i] is 0, then this particular
+ trap is to be ignored (i.e. not written to TIBCR[i]). The entire value
+ is written as-is to the TIBCR[i] register, so be sure to set the EN
+ and T_IBP bits if necessary.
+
+ 'eccr' is the value to program into the ECCR register.
+
+ 'iram_offset' is the offset into IRAM to start writing the
+ microcode.
+
+ 'count' is the number of 32-bit words in the microcode.
+
+ 'code_offset' is the offset, in bytes, from the beginning of this
+ structure where the microcode itself can be found. The first
+ microcode binary should be located immediately after the 'microcode'
+ array.
+
+ 'major', 'minor', and 'revision' are the major, minor, and revision
+ version numbers, respectively, of the microcode. If all values are 0,
+ then these fields are ignored.
+
+ 'reserved' is necessary for structure alignment. Since 'microcode'
+ is an array, the 64-bit 'extended_modes' field needs to be aligned
+ on a 64-bit boundary, and this can only happen if the size of
+ 'microcode' is a multiple of 8 bytes. To ensure that, we add
+ 'reserved'.
+
+After the last microcode is a 32-bit CRC. It can be calculated using
+this algorithm::
+
+ u32 crc32(const u8 *p, unsigned int len)
+ {
+ unsigned int i;
+ u32 crc = 0;
+
+ while (len--) {
+ crc ^= *p++;
+ for (i = 0; i < 8; i++)
+ crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0);
+ }
+ return crc;
+ }
+
+VI - Sample Code for Creating Firmware Files
+============================================
+
+A Python program that creates firmware binaries from the header files normally
+distributed by Freescale can be found on http://opensource.freescale.com.
+++ /dev/null
- Freescale QUICC Engine Firmware Uploading
- -----------------------------------------
-
-(c) 2007 Timur Tabi <timur at freescale.com>,
- Freescale Semiconductor
-
-Table of Contents
-=================
-
- I - Software License for Firmware
-
- II - Microcode Availability
-
- III - Description and Terminology
-
- IV - Microcode Programming Details
-
- V - Firmware Structure Layout
-
- VI - Sample Code for Creating Firmware Files
-
-Revision Information
-====================
-
-November 30, 2007: Rev 1.0 - Initial version
-
-I - Software License for Firmware
-=================================
-
-Each firmware file comes with its own software license. For information on
-the particular license, please see the license text that is distributed with
-the firmware.
-
-II - Microcode Availability
-===========================
-
-Firmware files are distributed through various channels. Some are available on
-http://opensource.freescale.com. For other firmware files, please contact
-your Freescale representative or your operating system vendor.
-
-III - Description and Terminology
-================================
-
-In this document, the term 'microcode' refers to the sequence of 32-bit
-integers that compose the actual QE microcode.
-
-The term 'firmware' refers to a binary blob that contains the microcode as
-well as other data that
-
- 1) describes the microcode's purpose
- 2) describes how and where to upload the microcode
- 3) specifies the values of various registers
- 4) includes additional data for use by specific device drivers
-
-Firmware files are binary files that contain only a firmware.
-
-IV - Microcode Programming Details
-===================================
-
-The QE architecture allows for only one microcode present in I-RAM for each
-RISC processor. To replace any current microcode, a full QE reset (which
-disables the microcode) must be performed first.
-
-QE microcode is uploaded using the following procedure:
-
-1) The microcode is placed into I-RAM at a specific location, using the
- IRAM.IADD and IRAM.IDATA registers.
-
-2) The CERCR.CIR bit is set to 0 or 1, depending on whether the firmware
- needs split I-RAM. Split I-RAM is only meaningful for SOCs that have
- QEs with multiple RISC processors, such as the 8360. Splitting the I-RAM
- allows each processor to run a different microcode, effectively creating an
- asymmetric multiprocessing (AMP) system.
-
-3) The TIBCR trap registers are loaded with the addresses of the trap handlers
- in the microcode.
-
-4) The RSP.ECCR register is programmed with the value provided.
-
-5) If necessary, device drivers that need the virtual traps and extended mode
- data will use them.
-
-Virtual Microcode Traps
-
-These virtual traps are conditional branches in the microcode. These are
-"soft" provisional introduced in the ROMcode in order to enable higher
-flexibility and save h/w traps If new features are activated or an issue is
-being fixed in the RAM package utilizing they should be activated. This data
-structure signals the microcode which of these virtual traps is active.
-
-This structure contains 6 words that the application should copy to some
-specific been defined. This table describes the structure.
-
- ---------------------------------------------------------------
- | Offset in | | Destination Offset | Size of |
- | array | Protocol | within PRAM | Operand |
- --------------------------------------------------------------|
- | 0 | Ethernet | 0xF8 | 4 bytes |
- | | interworking | | |
- ---------------------------------------------------------------
- | 4 | ATM | 0xF8 | 4 bytes |
- | | interworking | | |
- ---------------------------------------------------------------
- | 8 | PPP | 0xF8 | 4 bytes |
- | | interworking | | |
- ---------------------------------------------------------------
- | 12 | Ethernet RX | 0x22 | 1 byte |
- | | Distributor Page | | |
- ---------------------------------------------------------------
- | 16 | ATM Globtal | 0x28 | 1 byte |
- | | Params Table | | |
- ---------------------------------------------------------------
- | 20 | Insert Frame | 0xF8 | 4 bytes |
- ---------------------------------------------------------------
-
-
-Extended Modes
-
-This is a double word bit array (64 bits) that defines special functionality
-which has an impact on the software drivers. Each bit has its own impact
-and has special instructions for the s/w associated with it. This structure is
-described in this table:
-
- -----------------------------------------------------------------------
- | Bit # | Name | Description |
- -----------------------------------------------------------------------
- | 0 | General | Indicates that prior to each host command |
- | | push command | given by the application, the software must |
- | | | assert a special host command (push command)|
- | | | CECDR = 0x00800000. |
- | | | CECR = 0x01c1000f. |
- -----------------------------------------------------------------------
- | 1 | UCC ATM | Indicates that after issuing ATM RX INIT |
- | | RX INIT | command, the host must issue another special|
- | | push command | command (push command) and immediately |
- | | | following that re-issue the ATM RX INIT |
- | | | command. (This makes the sequence of |
- | | | initializing the ATM receiver a sequence of |
- | | | three host commands) |
- | | | CECDR = 0x00800000. |
- | | | CECR = 0x01c1000f. |
- -----------------------------------------------------------------------
- | 2 | Add/remove | Indicates that following the specific host |
- | | command | command: "Add/Remove entry in Hash Lookup |
- | | validation | Table" used in Interworking setup, the user |
- | | | must issue another command. |
- | | | CECDR = 0xce000003. |
- | | | CECR = 0x01c10f58. |
- -----------------------------------------------------------------------
- | 3 | General push | Indicates that the s/w has to initialize |
- | | command | some pointers in the Ethernet thread pages |
- | | | which are used when Header Compression is |
- | | | activated. The full details of these |
- | | | pointers is located in the software drivers.|
- -----------------------------------------------------------------------
- | 4 | General push | Indicates that after issuing Ethernet TX |
- | | command | INIT command, user must issue this command |
- | | | for each SNUM of Ethernet TX thread. |
- | | | CECDR = 0x00800003. |
- | | | CECR = 0x7'b{0}, 8'b{Enet TX thread SNUM}, |
- | | | 1'b{1}, 12'b{0}, 4'b{1} |
- -----------------------------------------------------------------------
- | 5 - 31 | N/A | Reserved, set to zero. |
- -----------------------------------------------------------------------
-
-V - Firmware Structure Layout
-==============================
-
-QE microcode from Freescale is typically provided as a header file. This
-header file contains macros that define the microcode binary itself as well as
-some other data used in uploading that microcode. The format of these files
-do not lend themselves to simple inclusion into other code. Hence,
-the need for a more portable format. This section defines that format.
-
-Instead of distributing a header file, the microcode and related data are
-embedded into a binary blob. This blob is passed to the qe_upload_firmware()
-function, which parses the blob and performs everything necessary to upload
-the microcode.
-
-All integers are big-endian. See the comments for function
-qe_upload_firmware() for up-to-date implementation information.
-
-This structure supports versioning, where the version of the structure is
-embedded into the structure itself. To ensure forward and backwards
-compatibility, all versions of the structure must use the same 'qe_header'
-structure at the beginning.
-
-'header' (type: struct qe_header):
- The 'length' field is the size, in bytes, of the entire structure,
- including all the microcode embedded in it, as well as the CRC (if
- present).
-
- The 'magic' field is an array of three bytes that contains the letters
- 'Q', 'E', and 'F'. This is an identifier that indicates that this
- structure is a QE Firmware structure.
-
- The 'version' field is a single byte that indicates the version of this
- structure. If the layout of the structure should ever need to be
- changed to add support for additional types of microcode, then the
- version number should also be changed.
-
-The 'id' field is a null-terminated string(suitable for printing) that
-identifies the firmware.
-
-The 'count' field indicates the number of 'microcode' structures. There
-must be one and only one 'microcode' structure for each RISC processor.
-Therefore, this field also represents the number of RISC processors for this
-SOC.
-
-The 'soc' structure contains the SOC numbers and revisions used to match
-the microcode to the SOC itself. Normally, the microcode loader should
-check the data in this structure with the SOC number and revisions, and
-only upload the microcode if there's a match. However, this check is not
-made on all platforms.
-
-Although it is not recommended, you can specify '0' in the soc.model
-field to skip matching SOCs altogether.
-
-The 'model' field is a 16-bit number that matches the actual SOC. The
-'major' and 'minor' fields are the major and minor revision numbers,
-respectively, of the SOC.
-
-For example, to match the 8323, revision 1.0:
- soc.model = 8323
- soc.major = 1
- soc.minor = 0
-
-'padding' is necessary for structure alignment. This field ensures that the
-'extended_modes' field is aligned on a 64-bit boundary.
-
-'extended_modes' is a bitfield that defines special functionality which has an
-impact on the device drivers. Each bit has its own impact and has special
-instructions for the driver associated with it. This field is stored in
-the QE library and available to any driver that calles qe_get_firmware_info().
-
-'vtraps' is an array of 8 words that contain virtual trap values for each
-virtual traps. As with 'extended_modes', this field is stored in the QE
-library and available to any driver that calles qe_get_firmware_info().
-
-'microcode' (type: struct qe_microcode):
- For each RISC processor there is one 'microcode' structure. The first
- 'microcode' structure is for the first RISC, and so on.
-
- The 'id' field is a null-terminated string suitable for printing that
- identifies this particular microcode.
-
- 'traps' is an array of 16 words that contain hardware trap values
- for each of the 16 traps. If trap[i] is 0, then this particular
- trap is to be ignored (i.e. not written to TIBCR[i]). The entire value
- is written as-is to the TIBCR[i] register, so be sure to set the EN
- and T_IBP bits if necessary.
-
- 'eccr' is the value to program into the ECCR register.
-
- 'iram_offset' is the offset into IRAM to start writing the
- microcode.
-
- 'count' is the number of 32-bit words in the microcode.
-
- 'code_offset' is the offset, in bytes, from the beginning of this
- structure where the microcode itself can be found. The first
- microcode binary should be located immediately after the 'microcode'
- array.
-
- 'major', 'minor', and 'revision' are the major, minor, and revision
- version numbers, respectively, of the microcode. If all values are 0,
- then these fields are ignored.
-
- 'reserved' is necessary for structure alignment. Since 'microcode'
- is an array, the 64-bit 'extended_modes' field needs to be aligned
- on a 64-bit boundary, and this can only happen if the size of
- 'microcode' is a multiple of 8 bytes. To ensure that, we add
- 'reserved'.
-
-After the last microcode is a 32-bit CRC. It can be calculated using
-this algorithm:
-
-u32 crc32(const u8 *p, unsigned int len)
-{
- unsigned int i;
- u32 crc = 0;
-
- while (len--) {
- crc ^= *p++;
- for (i = 0; i < 8; i++)
- crc = (crc >> 1) ^ ((crc & 1) ? 0xedb88320 : 0);
- }
- return crc;
-}
-
-VI - Sample Code for Creating Firmware Files
-============================================
-
-A Python program that creates firmware binaries from the header files normally
-distributed by Freescale can be found on http://opensource.freescale.com.
--- /dev/null
+===============================================
+Power Architecture 64-bit Linux system call ABI
+===============================================
+
+syscall
+=======
+
+syscall calling sequence\ [1]_ matches the Power Architecture 64-bit ELF ABI
+specification C function calling sequence, including register preservation
+rules, with the following differences.
+
+.. [1] Some syscalls (typically low-level management functions) may have
+ different calling sequences (e.g., rt_sigreturn).
+
+Parameters and return value
+---------------------------
+The system call number is specified in r0.
+
+There is a maximum of 6 integer parameters to a syscall, passed in r3-r8.
+
+Both a return value and a return error code are returned. cr0.SO is the return
+error code, and r3 is the return value or error code. When cr0.SO is clear,
+the syscall succeeded and r3 is the return value. When cr0.SO is set, the
+syscall failed and r3 is the error code that generally corresponds to errno.
+
+Stack
+-----
+System calls do not modify the caller's stack frame. For example, the caller's
+stack frame LR and CR save fields are not used.
+
+Register preservation rules
+---------------------------
+Register preservation rules match the ELF ABI calling sequence with the
+following differences:
+
+=========== ============= ========================================
+r0 Volatile (System call number.)
+r3 Volatile (Parameter 1, and return value.)
+r4-r8 Volatile (Parameters 2-6.)
+cr0 Volatile (cr0.SO is the return error condition)
+cr1, cr5-7 Nonvolatile
+lr Nonvolatile
+=========== ============= ========================================
+
+All floating point and vector data registers as well as control and status
+registers are nonvolatile.
+
+Invocation
+----------
+The syscall is performed with the sc instruction, and returns with execution
+continuing at the instruction following the sc instruction.
+
+Transactional Memory
+--------------------
+Syscall behavior can change if the processor is in transactional or suspended
+transaction state, and the syscall can affect the behavior of the transaction.
+
+If the processor is in suspended state when a syscall is made, the syscall
+will be performed as normal, and will return as normal. The syscall will be
+performed in suspended state, so its side effects will be persistent according
+to the usual transactional memory semantics. A syscall may or may not result
+in the transaction being doomed by hardware.
+
+If the processor is in transactional state when a syscall is made, then the
+behavior depends on the presence of PPC_FEATURE2_HTM_NOSC in the AT_HWCAP2 ELF
+auxiliary vector.
+
+- If present, which is the case for newer kernels, then the syscall will not
+ be performed and the transaction will be doomed by the kernel with the
+ failure code TM_CAUSE_SYSCALL | TM_CAUSE_PERSISTENT in the TEXASR SPR.
+
+- If not present (older kernels), then the kernel will suspend the
+ transactional state and the syscall will proceed as in the case of a
+ suspended state syscall, and will resume the transactional state before
+ returning to the caller. This case is not well defined or supported, so this
+ behavior should not be relied upon.
+
+
+vsyscall
+========
+
+vsyscall calling sequence matches the syscall calling sequence, with the
+following differences. Some vsyscalls may have different calling sequences.
+
+Parameters and return value
+---------------------------
+r0 is not used as an input. The vsyscall is selected by its address.
+
+Stack
+-----
+The vsyscall may or may not use the caller's stack frame save areas.
+
+Register preservation rules
+---------------------------
+
+=========== ========
+r0 Volatile
+cr1, cr5-7 Volatile
+lr Volatile
+=========== ========
+
+Invocation
+----------
+The vsyscall is performed with a branch-with-link instruction to the vsyscall
+function address.
+
+Transactional Memory
+--------------------
+vsyscalls will run in the same transactional state as the caller. A vsyscall
+may or may not result in the transaction being doomed by hardware.
+++ /dev/null
-===============================================
-Power Architecture 64-bit Linux system call ABI
-===============================================
-
-syscall
-=======
-
-syscall calling sequence[*] matches the Power Architecture 64-bit ELF ABI
-specification C function calling sequence, including register preservation
-rules, with the following differences.
-
-[*] Some syscalls (typically low-level management functions) may have
- different calling sequences (e.g., rt_sigreturn).
-
-Parameters and return value
----------------------------
-The system call number is specified in r0.
-
-There is a maximum of 6 integer parameters to a syscall, passed in r3-r8.
-
-Both a return value and a return error code are returned. cr0.SO is the return
-error code, and r3 is the return value or error code. When cr0.SO is clear,
-the syscall succeeded and r3 is the return value. When cr0.SO is set, the
-syscall failed and r3 is the error code that generally corresponds to errno.
-
-Stack
------
-System calls do not modify the caller's stack frame. For example, the caller's
-stack frame LR and CR save fields are not used.
-
-Register preservation rules
----------------------------
-Register preservation rules match the ELF ABI calling sequence with the
-following differences:
-
-r0: Volatile. (System call number.)
-r3: Volatile. (Parameter 1, and return value.)
-r4-r8: Volatile. (Parameters 2-6.)
-cr0: Volatile (cr0.SO is the return error condition)
-cr1, cr5-7: Nonvolatile.
-lr: Nonvolatile.
-
-All floating point and vector data registers as well as control and status
-registers are nonvolatile.
-
-Invocation
-----------
-The syscall is performed with the sc instruction, and returns with execution
-continuing at the instruction following the sc instruction.
-
-Transactional Memory
---------------------
-Syscall behavior can change if the processor is in transactional or suspended
-transaction state, and the syscall can affect the behavior of the transaction.
-
-If the processor is in suspended state when a syscall is made, the syscall
-will be performed as normal, and will return as normal. The syscall will be
-performed in suspended state, so its side effects will be persistent according
-to the usual transactional memory semantics. A syscall may or may not result
-in the transaction being doomed by hardware.
-
-If the processor is in transactional state when a syscall is made, then the
-behavior depends on the presence of PPC_FEATURE2_HTM_NOSC in the AT_HWCAP2 ELF
-auxiliary vector.
-
-- If present, which is the case for newer kernels, then the syscall will not
- be performed and the transaction will be doomed by the kernel with the
- failure code TM_CAUSE_SYSCALL | TM_CAUSE_PERSISTENT in the TEXASR SPR.
-
-- If not present (older kernels), then the kernel will suspend the
- transactional state and the syscall will proceed as in the case of a
- suspended state syscall, and will resume the transactional state before
- returning to the caller. This case is not well defined or supported, so this
- behavior should not be relied upon.
-
-
-vsyscall
-========
-
-vsyscall calling sequence matches the syscall calling sequence, with the
-following differences. Some vsyscalls may have different calling sequences.
-
-Parameters and return value
----------------------------
-r0 is not used as an input. The vsyscall is selected by its address.
-
-Stack
------
-The vsyscall may or may not use the caller's stack frame save areas.
-
-Register preservation rules
----------------------------
-r0: Volatile.
-cr1, cr5-7: Volatile.
-lr: Volatile.
-
-Invocation
-----------
-The vsyscall is performed with a branch-with-link instruction to the vsyscall
-function address.
-
-Transactional Memory
---------------------
-vsyscalls will run in the same transactional state as the caller. A vsyscall
-may or may not result in the transaction being doomed by hardware.
--- /dev/null
+============================
+Transactional Memory support
+============================
+
+POWER kernel support for this feature is currently limited to supporting
+its use by user programs. It is not currently used by the kernel itself.
+
+This file aims to sum up how it is supported by Linux and what behaviour you
+can expect from your user programs.
+
+
+Basic overview
+==============
+
+Hardware Transactional Memory is supported on POWER8 processors, and is a
+feature that enables a different form of atomic memory access. Several new
+instructions are presented to delimit transactions; transactions are
+guaranteed to either complete atomically or roll back and undo any partial
+changes.
+
+A simple transaction looks like this::
+
+ begin_move_money:
+ tbegin
+ beq abort_handler
+
+ ld r4, SAVINGS_ACCT(r3)
+ ld r5, CURRENT_ACCT(r3)
+ subi r5, r5, 1
+ addi r4, r4, 1
+ std r4, SAVINGS_ACCT(r3)
+ std r5, CURRENT_ACCT(r3)
+
+ tend
+
+ b continue
+
+ abort_handler:
+ ... test for odd failures ...
+
+ /* Retry the transaction if it failed because it conflicted with
+ * someone else: */
+ b begin_move_money
+
+
+The 'tbegin' instruction denotes the start point, and 'tend' the end point.
+Between these points the processor is in 'Transactional' state; any memory
+references will complete in one go if there are no conflicts with other
+transactional or non-transactional accesses within the system. In this
+example, the transaction completes as though it were normal straight-line code
+IF no other processor has touched SAVINGS_ACCT(r3) or CURRENT_ACCT(r3); an
+atomic move of money from the current account to the savings account has been
+performed. Even though the normal ld/std instructions are used (note no
+lwarx/stwcx), either *both* SAVINGS_ACCT(r3) and CURRENT_ACCT(r3) will be
+updated, or neither will be updated.
+
+If, in the meantime, there is a conflict with the locations accessed by the
+transaction, the transaction will be aborted by the CPU. Register and memory
+state will roll back to that at the 'tbegin', and control will continue from
+'tbegin+4'. The branch to abort_handler will be taken this second time; the
+abort handler can check the cause of the failure, and retry.
+
+Checkpointed registers include all GPRs, FPRs, VRs/VSRs, LR, CCR/CR, CTR, FPCSR
+and a few other status/flag regs; see the ISA for details.
+
+Causes of transaction aborts
+============================
+
+- Conflicts with cache lines used by other processors
+- Signals
+- Context switches
+- See the ISA for full documentation of everything that will abort transactions.
+
+
+Syscalls
+========
+
+Syscalls made from within an active transaction will not be performed and the
+transaction will be doomed by the kernel with the failure code TM_CAUSE_SYSCALL
+| TM_CAUSE_PERSISTENT.
+
+Syscalls made from within a suspended transaction are performed as normal and
+the transaction is not explicitly doomed by the kernel. However, what the
+kernel does to perform the syscall may result in the transaction being doomed
+by the hardware. The syscall is performed in suspended mode so any side
+effects will be persistent, independent of transaction success or failure. No
+guarantees are provided by the kernel about which syscalls will affect
+transaction success.
+
+Care must be taken when relying on syscalls to abort during active transactions
+if the calls are made via a library. Libraries may cache values (which may
+give the appearance of success) or perform operations that cause transaction
+failure before entering the kernel (which may produce different failure codes).
+Examples are glibc's getpid() and lazy symbol resolution.
+
+
+Signals
+=======
+
+Delivery of signals (both sync and async) during transactions provides a second
+thread state (ucontext/mcontext) to represent the second transactional register
+state. Signal delivery 'treclaim's to capture both register states, so signals
+abort transactions. The usual ucontext_t passed to the signal handler
+represents the checkpointed/original register state; the signal appears to have
+arisen at 'tbegin+4'.
+
+If the sighandler ucontext has uc_link set, a second ucontext has been
+delivered. For future compatibility the MSR.TS field should be checked to
+determine the transactional state -- if so, the second ucontext in uc->uc_link
+represents the active transactional registers at the point of the signal.
+
+For 64-bit processes, uc->uc_mcontext.regs->msr is a full 64-bit MSR and its TS
+field shows the transactional mode.
+
+For 32-bit processes, the mcontext's MSR register is only 32 bits; the top 32
+bits are stored in the MSR of the second ucontext, i.e. in
+uc->uc_link->uc_mcontext.regs->msr. The top word contains the transactional
+state TS.
+
+However, basic signal handlers don't need to be aware of transactions
+and simply returning from the handler will deal with things correctly:
+
+Transaction-aware signal handlers can read the transactional register state
+from the second ucontext. This will be necessary for crash handlers to
+determine, for example, the address of the instruction causing the SIGSEGV.
+
+Example signal handler::
+
+ void crash_handler(int sig, siginfo_t *si, void *uc)
+ {
+ ucontext_t *ucp = uc;
+ ucontext_t *transactional_ucp = ucp->uc_link;
+
+ if (ucp_link) {
+ u64 msr = ucp->uc_mcontext.regs->msr;
+ /* May have transactional ucontext! */
+ #ifndef __powerpc64__
+ msr |= ((u64)transactional_ucp->uc_mcontext.regs->msr) << 32;
+ #endif
+ if (MSR_TM_ACTIVE(msr)) {
+ /* Yes, we crashed during a transaction. Oops. */
+ fprintf(stderr, "Transaction to be restarted at 0x%llx, but "
+ "crashy instruction was at 0x%llx\n",
+ ucp->uc_mcontext.regs->nip,
+ transactional_ucp->uc_mcontext.regs->nip);
+ }
+ }
+
+ fix_the_problem(ucp->dar);
+ }
+
+When in an active transaction that takes a signal, we need to be careful with
+the stack. It's possible that the stack has moved back up after the tbegin.
+The obvious case here is when the tbegin is called inside a function that
+returns before a tend. In this case, the stack is part of the checkpointed
+transactional memory state. If we write over this non transactionally or in
+suspend, we are in trouble because if we get a tm abort, the program counter and
+stack pointer will be back at the tbegin but our in memory stack won't be valid
+anymore.
+
+To avoid this, when taking a signal in an active transaction, we need to use
+the stack pointer from the checkpointed state, rather than the speculated
+state. This ensures that the signal context (written tm suspended) will be
+written below the stack required for the rollback. The transaction is aborted
+because of the treclaim, so any memory written between the tbegin and the
+signal will be rolled back anyway.
+
+For signals taken in non-TM or suspended mode, we use the
+normal/non-checkpointed stack pointer.
+
+Any transaction initiated inside a sighandler and suspended on return
+from the sighandler to the kernel will get reclaimed and discarded.
+
+Failure cause codes used by kernel
+==================================
+
+These are defined in <asm/reg.h>, and distinguish different reasons why the
+kernel aborted a transaction:
+
+ ====================== ================================
+ TM_CAUSE_RESCHED Thread was rescheduled.
+ TM_CAUSE_TLBI Software TLB invalid.
+ TM_CAUSE_FAC_UNAV FP/VEC/VSX unavailable trap.
+ TM_CAUSE_SYSCALL Syscall from active transaction.
+ TM_CAUSE_SIGNAL Signal delivered.
+ TM_CAUSE_MISC Currently unused.
+ TM_CAUSE_ALIGNMENT Alignment fault.
+ TM_CAUSE_EMULATE Emulation that touched memory.
+ ====================== ================================
+
+These can be checked by the user program's abort handler as TEXASR[0:7]. If
+bit 7 is set, it indicates that the error is consider persistent. For example
+a TM_CAUSE_ALIGNMENT will be persistent while a TM_CAUSE_RESCHED will not.
+
+GDB
+===
+
+GDB and ptrace are not currently TM-aware. If one stops during a transaction,
+it looks like the transaction has just started (the checkpointed state is
+presented). The transaction cannot then be continued and will take the failure
+handler route. Furthermore, the transactional 2nd register state will be
+inaccessible. GDB can currently be used on programs using TM, but not sensibly
+in parts within transactions.
+
+POWER9
+======
+
+TM on POWER9 has issues with storing the complete register state. This
+is described in this commit::
+
+ commit 4bb3c7a0208fc13ca70598efd109901a7cd45ae7
+ Author: Paul Mackerras <paulus@ozlabs.org>
+ Date: Wed Mar 21 21:32:01 2018 +1100
+ KVM: PPC: Book3S HV: Work around transactional memory bugs in POWER9
+
+To account for this different POWER9 chips have TM enabled in
+different ways.
+
+On POWER9N DD2.01 and below, TM is disabled. ie
+HWCAP2[PPC_FEATURE2_HTM] is not set.
+
+On POWER9N DD2.1 TM is configured by firmware to always abort a
+transaction when tm suspend occurs. So tsuspend will cause a
+transaction to be aborted and rolled back. Kernel exceptions will also
+cause the transaction to be aborted and rolled back and the exception
+will not occur. If userspace constructs a sigcontext that enables TM
+suspend, the sigcontext will be rejected by the kernel. This mode is
+advertised to users with HWCAP2[PPC_FEATURE2_HTM_NO_SUSPEND] set.
+HWCAP2[PPC_FEATURE2_HTM] is not set in this mode.
+
+On POWER9N DD2.2 and above, KVM and POWERVM emulate TM for guests (as
+described in commit 4bb3c7a0208f), hence TM is enabled for guests
+ie. HWCAP2[PPC_FEATURE2_HTM] is set for guest userspace. Guests that
+makes heavy use of TM suspend (tsuspend or kernel suspend) will result
+in traps into the hypervisor and hence will suffer a performance
+degradation. Host userspace has TM disabled
+ie. HWCAP2[PPC_FEATURE2_HTM] is not set. (although we make enable it
+at some point in the future if we bring the emulation into host
+userspace context switching).
+
+POWER9C DD1.2 and above are only available with POWERVM and hence
+Linux only runs as a guest. On these systems TM is emulated like on
+POWER9N DD2.2.
+
+Guest migration from POWER8 to POWER9 will work with POWER9N DD2.2 and
+POWER9C DD1.2. Since earlier POWER9 processors don't support TM
+emulation, migration from POWER8 to POWER9 is not supported there.
+++ /dev/null
-Transactional Memory support
-============================
-
-POWER kernel support for this feature is currently limited to supporting
-its use by user programs. It is not currently used by the kernel itself.
-
-This file aims to sum up how it is supported by Linux and what behaviour you
-can expect from your user programs.
-
-
-Basic overview
-==============
-
-Hardware Transactional Memory is supported on POWER8 processors, and is a
-feature that enables a different form of atomic memory access. Several new
-instructions are presented to delimit transactions; transactions are
-guaranteed to either complete atomically or roll back and undo any partial
-changes.
-
-A simple transaction looks like this:
-
-begin_move_money:
- tbegin
- beq abort_handler
-
- ld r4, SAVINGS_ACCT(r3)
- ld r5, CURRENT_ACCT(r3)
- subi r5, r5, 1
- addi r4, r4, 1
- std r4, SAVINGS_ACCT(r3)
- std r5, CURRENT_ACCT(r3)
-
- tend
-
- b continue
-
-abort_handler:
- ... test for odd failures ...
-
- /* Retry the transaction if it failed because it conflicted with
- * someone else: */
- b begin_move_money
-
-
-The 'tbegin' instruction denotes the start point, and 'tend' the end point.
-Between these points the processor is in 'Transactional' state; any memory
-references will complete in one go if there are no conflicts with other
-transactional or non-transactional accesses within the system. In this
-example, the transaction completes as though it were normal straight-line code
-IF no other processor has touched SAVINGS_ACCT(r3) or CURRENT_ACCT(r3); an
-atomic move of money from the current account to the savings account has been
-performed. Even though the normal ld/std instructions are used (note no
-lwarx/stwcx), either *both* SAVINGS_ACCT(r3) and CURRENT_ACCT(r3) will be
-updated, or neither will be updated.
-
-If, in the meantime, there is a conflict with the locations accessed by the
-transaction, the transaction will be aborted by the CPU. Register and memory
-state will roll back to that at the 'tbegin', and control will continue from
-'tbegin+4'. The branch to abort_handler will be taken this second time; the
-abort handler can check the cause of the failure, and retry.
-
-Checkpointed registers include all GPRs, FPRs, VRs/VSRs, LR, CCR/CR, CTR, FPCSR
-and a few other status/flag regs; see the ISA for details.
-
-Causes of transaction aborts
-============================
-
-- Conflicts with cache lines used by other processors
-- Signals
-- Context switches
-- See the ISA for full documentation of everything that will abort transactions.
-
-
-Syscalls
-========
-
-Syscalls made from within an active transaction will not be performed and the
-transaction will be doomed by the kernel with the failure code TM_CAUSE_SYSCALL
-| TM_CAUSE_PERSISTENT.
-
-Syscalls made from within a suspended transaction are performed as normal and
-the transaction is not explicitly doomed by the kernel. However, what the
-kernel does to perform the syscall may result in the transaction being doomed
-by the hardware. The syscall is performed in suspended mode so any side
-effects will be persistent, independent of transaction success or failure. No
-guarantees are provided by the kernel about which syscalls will affect
-transaction success.
-
-Care must be taken when relying on syscalls to abort during active transactions
-if the calls are made via a library. Libraries may cache values (which may
-give the appearance of success) or perform operations that cause transaction
-failure before entering the kernel (which may produce different failure codes).
-Examples are glibc's getpid() and lazy symbol resolution.
-
-
-Signals
-=======
-
-Delivery of signals (both sync and async) during transactions provides a second
-thread state (ucontext/mcontext) to represent the second transactional register
-state. Signal delivery 'treclaim's to capture both register states, so signals
-abort transactions. The usual ucontext_t passed to the signal handler
-represents the checkpointed/original register state; the signal appears to have
-arisen at 'tbegin+4'.
-
-If the sighandler ucontext has uc_link set, a second ucontext has been
-delivered. For future compatibility the MSR.TS field should be checked to
-determine the transactional state -- if so, the second ucontext in uc->uc_link
-represents the active transactional registers at the point of the signal.
-
-For 64-bit processes, uc->uc_mcontext.regs->msr is a full 64-bit MSR and its TS
-field shows the transactional mode.
-
-For 32-bit processes, the mcontext's MSR register is only 32 bits; the top 32
-bits are stored in the MSR of the second ucontext, i.e. in
-uc->uc_link->uc_mcontext.regs->msr. The top word contains the transactional
-state TS.
-
-However, basic signal handlers don't need to be aware of transactions
-and simply returning from the handler will deal with things correctly:
-
-Transaction-aware signal handlers can read the transactional register state
-from the second ucontext. This will be necessary for crash handlers to
-determine, for example, the address of the instruction causing the SIGSEGV.
-
-Example signal handler:
-
- void crash_handler(int sig, siginfo_t *si, void *uc)
- {
- ucontext_t *ucp = uc;
- ucontext_t *transactional_ucp = ucp->uc_link;
-
- if (ucp_link) {
- u64 msr = ucp->uc_mcontext.regs->msr;
- /* May have transactional ucontext! */
-#ifndef __powerpc64__
- msr |= ((u64)transactional_ucp->uc_mcontext.regs->msr) << 32;
-#endif
- if (MSR_TM_ACTIVE(msr)) {
- /* Yes, we crashed during a transaction. Oops. */
- fprintf(stderr, "Transaction to be restarted at 0x%llx, but "
- "crashy instruction was at 0x%llx\n",
- ucp->uc_mcontext.regs->nip,
- transactional_ucp->uc_mcontext.regs->nip);
- }
- }
-
- fix_the_problem(ucp->dar);
- }
-
-When in an active transaction that takes a signal, we need to be careful with
-the stack. It's possible that the stack has moved back up after the tbegin.
-The obvious case here is when the tbegin is called inside a function that
-returns before a tend. In this case, the stack is part of the checkpointed
-transactional memory state. If we write over this non transactionally or in
-suspend, we are in trouble because if we get a tm abort, the program counter and
-stack pointer will be back at the tbegin but our in memory stack won't be valid
-anymore.
-
-To avoid this, when taking a signal in an active transaction, we need to use
-the stack pointer from the checkpointed state, rather than the speculated
-state. This ensures that the signal context (written tm suspended) will be
-written below the stack required for the rollback. The transaction is aborted
-because of the treclaim, so any memory written between the tbegin and the
-signal will be rolled back anyway.
-
-For signals taken in non-TM or suspended mode, we use the
-normal/non-checkpointed stack pointer.
-
-Any transaction initiated inside a sighandler and suspended on return
-from the sighandler to the kernel will get reclaimed and discarded.
-
-Failure cause codes used by kernel
-==================================
-
-These are defined in <asm/reg.h>, and distinguish different reasons why the
-kernel aborted a transaction:
-
- TM_CAUSE_RESCHED Thread was rescheduled.
- TM_CAUSE_TLBI Software TLB invalid.
- TM_CAUSE_FAC_UNAV FP/VEC/VSX unavailable trap.
- TM_CAUSE_SYSCALL Syscall from active transaction.
- TM_CAUSE_SIGNAL Signal delivered.
- TM_CAUSE_MISC Currently unused.
- TM_CAUSE_ALIGNMENT Alignment fault.
- TM_CAUSE_EMULATE Emulation that touched memory.
-
-These can be checked by the user program's abort handler as TEXASR[0:7]. If
-bit 7 is set, it indicates that the error is consider persistent. For example
-a TM_CAUSE_ALIGNMENT will be persistent while a TM_CAUSE_RESCHED will not.
-
-GDB
-===
-
-GDB and ptrace are not currently TM-aware. If one stops during a transaction,
-it looks like the transaction has just started (the checkpointed state is
-presented). The transaction cannot then be continued and will take the failure
-handler route. Furthermore, the transactional 2nd register state will be
-inaccessible. GDB can currently be used on programs using TM, but not sensibly
-in parts within transactions.
-
-POWER9
-======
-
-TM on POWER9 has issues with storing the complete register state. This
-is described in this commit:
-
- commit 4bb3c7a0208fc13ca70598efd109901a7cd45ae7
- Author: Paul Mackerras <paulus@ozlabs.org>
- Date: Wed Mar 21 21:32:01 2018 +1100
- KVM: PPC: Book3S HV: Work around transactional memory bugs in POWER9
-
-To account for this different POWER9 chips have TM enabled in
-different ways.
-
-On POWER9N DD2.01 and below, TM is disabled. ie
-HWCAP2[PPC_FEATURE2_HTM] is not set.
-
-On POWER9N DD2.1 TM is configured by firmware to always abort a
-transaction when tm suspend occurs. So tsuspend will cause a
-transaction to be aborted and rolled back. Kernel exceptions will also
-cause the transaction to be aborted and rolled back and the exception
-will not occur. If userspace constructs a sigcontext that enables TM
-suspend, the sigcontext will be rejected by the kernel. This mode is
-advertised to users with HWCAP2[PPC_FEATURE2_HTM_NO_SUSPEND] set.
-HWCAP2[PPC_FEATURE2_HTM] is not set in this mode.
-
-On POWER9N DD2.2 and above, KVM and POWERVM emulate TM for guests (as
-described in commit 4bb3c7a0208f), hence TM is enabled for guests
-ie. HWCAP2[PPC_FEATURE2_HTM] is set for guest userspace. Guests that
-makes heavy use of TM suspend (tsuspend or kernel suspend) will result
-in traps into the hypervisor and hence will suffer a performance
-degradation. Host userspace has TM disabled
-ie. HWCAP2[PPC_FEATURE2_HTM] is not set. (although we make enable it
-at some point in the future if we bring the emulation into host
-userspace context switching).
-
-POWER9C DD1.2 and above are only available with POWERVM and hence
-Linux only runs as a guest. On these systems TM is emulated like on
-POWER9N DD2.2.
-
-Guest migration from POWER8 to POWER9 will work with POWER9N DD2.2 and
-POWER9C DD1.2. Since earlier POWER9 processors don't support TM
-emulation, migration from POWER8 to POWER9 is not supported there.
+++ /dev/null
-# -*- coding: utf-8; mode: python -*-
-
-project = 'Linux Kernel Development Documentation'
-
-tags.add("subproject")
-
-latex_documents = [
- ('index', 'process.tex', 'Linux Kernel Development Documentation',
- 'The kernel development community', 'manual'),
-]
lead to a crash, possible overwriting sensitive contents at the end of the
stack (when built without `CONFIG_THREAD_INFO_IN_TASK=y`), or overwriting
memory adjacent to the stack (when built without `CONFIG_VMAP_STACK=y`)
+
+Implicit switch case fall-through
+---------------------------------
+The C language allows switch cases to "fall through" when
+a "break" statement is missing at the end of a case. This,
+however, introduces ambiguity in the code, as it's not always
+clear if the missing break is intentional or a bug. As there
+have been a long list of flaws `due to missing "break" statements
+<https://cwe.mitre.org/data/definitions/484.html>`_, we no longer allow
+"implicit fall-through". In order to identify an intentional fall-through
+case, we have adopted the marking used by static analyzers: a comment
+saying `/* Fall through */`. Once the C++17 `__attribute__((fallthrough))`
+is more widely handled by C compilers, static analyzers, and IDEs, we can
+switch to using that instead.
add it to an iommu_group and a vfio_group. Then we could pass through
the mdev to a guest.
+
+VFIO-CCW Regions
+----------------
+
+The vfio-ccw driver exposes MMIO regions to accept requests from and return
+results to userspace.
+
vfio-ccw I/O region
-------------------
ret_code stores a return code for each access of the region.
+This region is always available.
+
+vfio-ccw cmd region
+-------------------
+
+The vfio-ccw cmd region is used to accept asynchronous instructions
+from userspace::
+
+ #define VFIO_CCW_ASYNC_CMD_HSCH (1 << 0)
+ #define VFIO_CCW_ASYNC_CMD_CSCH (1 << 1)
+ struct ccw_cmd_region {
+ __u32 command;
+ __u32 ret_code;
+ } __packed;
+
+This region is exposed via region type VFIO_REGION_SUBTYPE_CCW_ASYNC_CMD.
+
+Currently, CLEAR SUBCHANNEL and HALT SUBCHANNEL use this region.
+
vfio-ccw operation details
--------------------------
through DASD/ECKD device online in a guest now and use it as a block
device.
-While the current code allows the guest to start channel programs via
-START SUBCHANNEL, support for HALT SUBCHANNEL or CLEAR SUBCHANNEL is
-not yet implemented.
+The current code allows the guest to start channel programs via
+START SUBCHANNEL, and to issue HALT SUBCHANNEL and CLEAR SUBCHANNEL.
vfio-ccw supports classic (command mode) channel I/O only. Transport
mode (HPF) is not supported.
+++ /dev/null
-# -*- coding: utf-8; mode: python -*-
-
-project = "SuperH architecture implementation manual"
-
-tags.add("subproject")
-
-latex_documents = [
- ('index', 'sh.tex', project,
- 'The kernel development community', 'manual'),
-]
+++ /dev/null
-# -*- coding: utf-8; mode: python -*-
-
-project = "Linux Sound Subsystem Documentation"
-
-tags.add("subproject")
-
-latex_documents = [
- ('index', 'sound.tex', project,
- 'The kernel development community', 'manual'),
-]
and os.path.normpath(namespace["__file__"]) != os.path.normpath(config_file) ):
config_file = os.path.abspath(config_file)
+ # Let's avoid one conf.py file just due to latex_documents
+ start = config_file.find('Documentation/')
+ if start >= 0:
+ start = config_file.find('/', start + 1)
+
+ end = config_file.rfind('/')
+ if start >= 0 and end > 0:
+ dir = config_file[start + 1:end]
+
+ print("source directory: %s" % dir)
+ new_latex_docs = []
+ latex_documents = namespace['latex_documents']
+
+ for l in latex_documents:
+ if l[0].find(dir + '/') == 0:
+ has = True
+ fn = l[0][len(dir) + 1:]
+ new_latex_docs.append((fn, l[1], l[2], l[3], l[4]))
+ break
+
+ namespace['latex_documents'] = new_latex_docs
+
+ # If there is an extra conf.py file, load it
if os.path.isfile(config_file):
sys.stdout.write("load additional sphinx-config: %s\n" % config_file)
config = namespace.copy()
del config['__file__']
namespace.update(config)
else:
- sys.stderr.write("WARNING: additional sphinx-config not found: %s\n" % config_file)
+ config = namespace.copy()
+ config['tags'].add("subproject")
+ namespace.update(config)
* Per inserire blocchi di testo con caratteri a dimensione fissa (codici di
esempio, casi d'uso, eccetera): utilizzate ``::`` quando non è necessario
evidenziare la sintassi, specialmente per piccoli frammenti; invece,
- utilizzate ``.. code-block:: <language>`` per blocchi di più lunghi che
- potranno beneficiare dell'avere la sintassi evidenziata.
+ utilizzate ``.. code-block:: <language>`` per blocchi più lunghi che
+ beneficeranno della sintassi evidenziata. Per un breve pezzo di codice da
+ inserire nel testo, usate \`\`.
Il dominio C
Il nome della funzione (per esempio ioctl) rimane nel testo ma il nome del suo
riferimento cambia da ``ioctl`` a ``VIDIOC_LOG_STATUS``. Anche la voce
-nell'indice cambia in ``VIDIOC_LOG_STATUS`` e si potrà quindi fare riferimento
-a questa funzione scrivendo:
-
-.. code-block:: rst
-
- :c:func:`VIDIOC_LOG_STATUS`
+nell'indice cambia in ``VIDIOC_LOG_STATUS``.
+
+Notate che per una funzione non c'è bisogno di usare ``c:func:`` per generarne
+i riferimenti nella documentazione. Grazie a qualche magica estensione a
+Sphinx, il sistema di generazione della documentazione trasformerà
+automaticamente un riferimento ad una ``funzione()`` in un riferimento
+incrociato quando questa ha una voce nell'indice. Se trovate degli usi di
+``c:func:`` nella documentazione del kernel, sentitevi liberi di rimuoverli.
Tabelle a liste
code-of-conduct
development-process
submitting-patches
+ programming-language
coding-style
maintainer-pgp-guide
email-clients
.. include:: ../disclaimer-ita.rst
:Original: :ref:`Documentation/process/kernel-docs.rst <kernel_docs>`
+:Translator: Federico Vaga <federico.vaga@vaga.pv.it>
.. _it_kernel_docs:
Indice di documenti per le persone interessate a capire e/o scrivere per il kernel Linux
========================================================================================
-.. warning::
-
- TODO ancora da tradurre
+.. note::
+ Questo documento contiene riferimenti a documenti in lingua inglese; inoltre
+ utilizza dai campi *ReStructuredText* di supporto alla ricerca e che per
+ questo motivo è meglio non tradurre al fine di garantirne un corretto
+ utilizzo.
+ Per questi motivi il documento non verrà tradotto. Per favore fate
+ riferimento al documento originale in lingua inglese.
kernel.
Se per qualche ragione preferite rimanere con sottochiavi RSA, nel comando
- precedente, sostituite "ed25519" con "rsa2048".
+ precedente, sostituite "ed25519" con "rsa2048". In aggiunta, se avete
+ intenzione di usare un dispositivo hardware che non supporta le chiavi
+ ED25519 ECC, come la Nitrokey Pro o la Yubikey, allora dovreste usare
+ "nistp256" al posto di "ed25519".
Copia di riserva della chiave primaria per gestire il recupero da disastro
--------------------------------------------------------------------------
soluzioni disponibili:
- `Nitrokey Start`_: è Open hardware e Free Software, è basata sul progetto
- `GnuK`_ della FSIJ. Ha il supporto per chiavi ECC, ma meno funzionalità di
- sicurezza (come la resistenza alla manomissione o alcuni attacchi ad un
- canale laterale).
+ `GnuK`_ della FSIJ. Questo è uno dei pochi dispositivi a supportare le chiavi
+ ECC ED25519, ma offre meno funzionalità di sicurezza (come la resistenza
+ alla manomissione o alcuni attacchi ad un canale laterale).
- `Nitrokey Pro`_: è simile alla Nitrokey Start, ma è più resistente alla
- manomissione e offre più funzionalità di sicurezza, ma l'ECC.
-- `Yubikey 4`_: l'hardware e il software sono proprietari, ma è più economica
+ manomissione e offre più funzionalità di sicurezza. La Pro 2 supporta la
+ crittografia ECC (NISTP).
+- `Yubikey 5`_: l'hardware e il software sono proprietari, ma è più economica
della Nitrokey Pro ed è venduta anche con porta USB-C il che è utile con i
computer portatili più recenti. In aggiunta, offre altre funzionalità di
- sicurezza come FIDO, U2F, ma non l'ECC
+ sicurezza come FIDO, U2F, e ora supporta anche le chiavi ECC (NISTP)
`Su LWN c'è una buona recensione`_ dei modelli elencati qui sopra e altri.
+La scelta dipenderà dal costo, dalla disponibilità nella vostra area
+geografica e vostre considerazioni sull'hardware aperto/proprietario.
+
Se volete usare chiavi ECC, la vostra migliore scelta sul mercato è la
Nitrokey Start.
.. _`Nitrokey Start`: https://shop.nitrokey.com/shop/product/nitrokey-start-6
-.. _`Nitrokey Pro`: https://shop.nitrokey.com/shop/product/nitrokey-pro-3
-.. _`Yubikey 4`: https://www.yubico.com/product/yubikey-4-series/
+.. _`Nitrokey Pro 2`: https://shop.nitrokey.com/shop/product/nitrokey-pro-2-3
+.. _`Yubikey 5`: https://www.yubico.com/product/yubikey-5-overview/
.. _Gnuk: http://www.fsij.org/doc-gnuk/
.. _`Su LWN c'è una buona recensione`: https://lwn.net/Articles/736231/
--- /dev/null
+.. include:: ../disclaimer-ita.rst
+
+:Original: :ref:`Documentation/process/programming-language.rst <programming_language>`
+:Translator: Federico Vaga <federico.vaga@vaga.pv.it>
+
+.. _it_programming_language:
+
+Linguaggio di programmazione
+============================
+
+Il kernel è scritto nel linguaggio di programmazione C [c-language]_.
+Più precisamente, il kernel viene compilato con ``gcc`` [gcc]_ usando
+l'opzione ``-std=gnu89`` [gcc-c-dialect-options]_: il dialetto GNU
+dello standard ISO C90 (con l'aggiunta di alcune funzionalità da C99)
+
+Questo dialetto contiene diverse estensioni al linguaggio [gnu-extensions]_,
+e molte di queste vengono usate sistematicamente dal kernel.
+
+Il kernel offre un certo livello di supporto per la compilazione con ``clang``
+[clang]_ e ``icc`` [icc]_ su diverse architetture, tuttavia in questo momento
+il supporto non è completo e richiede delle patch aggiuntive.
+
+Attributi
+---------
+
+Una delle estensioni più comuni e usate nel kernel sono gli attributi
+[gcc-attribute-syntax]_. Gli attributi permettono di aggiungere una semantica,
+definita dell'implementazione, alle entità del linguaggio (come le variabili,
+le funzioni o i tipi) senza dover fare importanti modifiche sintattiche al
+linguaggio stesso (come l'aggiunta di nuove parole chiave) [n2049]_.
+
+In alcuni casi, gli attributi sono opzionali (ovvero un compilatore che non
+dovesse supportarli dovrebbe produrre comunque codice corretto, anche se
+più lento o che non esegue controlli aggiuntivi durante la compilazione).
+
+Il kernel definisce alcune pseudo parole chiave (per esempio ``__pure``)
+in alternativa alla sintassi GNU per gli attributi (per esempio
+``__attribute__((__pure__))``) allo scopo di mostrare quali funzionalità si
+possono usare e/o per accorciare il codice.
+
+Per maggiori informazioni consultate il file d'intestazione
+``include/linux/compiler_attributes.h``.
+
+.. [c-language] http://www.open-std.org/jtc1/sc22/wg14/www/standards
+.. [gcc] https://gcc.gnu.org
+.. [clang] https://clang.llvm.org
+.. [icc] https://software.intel.com/en-us/c-compilers
+.. [gcc-c-dialect-options] https://gcc.gnu.org/onlinedocs/gcc/C-Dialect-Options.html
+.. [gnu-extensions] https://gcc.gnu.org/onlinedocs/gcc/C-Extensions.html
+.. [gcc-attribute-syntax] https://gcc.gnu.org/onlinedocs/gcc/Attribute-Syntax.html
+.. [n2049] http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2049.pdf
[*] 버스 마스터링 DMA 와 일관성에 대해서는 다음을 참고하시기 바랍니다:
- Documentation/PCI/pci.rst
+ Documentation/driver-api/pci/pci.rst
Documentation/DMA-API-HOWTO.txt
Documentation/DMA-API.txt
+++ /dev/null
-# -*- coding: utf-8; mode: python -*-
-
-project = "The Linux kernel user-space API guide"
-
-tags.add("subproject")
-
-latex_documents = [
- ('index', 'userspace-api.tex', project,
- 'The kernel development community', 'manual'),
-]
amd-memory-encryption
cpuid
+ vcpu-requests
+++ /dev/null
-# -*- coding: utf-8; mode: python -*-
-
-project = "Linux Memory Management Documentation"
-
-tags.add("subproject")
-
-latex_documents = [
- ('index', 'memory-management.tex', project,
- 'The kernel development community', 'manual'),
-]
and loop forever. This is generally not what a watchdog user wants.
For those wishing to learn more please see:
- Documentation/kdump/kdump.rst
+ Documentation/admin-guide/kdump/kdump.rst
Documentation/admin-guide/kernel-parameters.txt (panic=)
Your Linux Distribution specific documentation.
+++ /dev/null
-# -*- coding: utf-8; mode: python -*-
-
-project = "X86 architecture specific documentation"
-
-tags.add("subproject")
-
-latex_documents = [
- ('index', 'x86.tex', project,
- 'The kernel development community', 'manual'),
-]
W: http://ez.analog.com/community/linux-device-drivers
S: Supported
F: drivers/iio/adc/ad7124.c
-F: Documentation/devicetree/bindings/iio/adc/adi,ad7124.txt
+F: Documentation/devicetree/bindings/iio/adc/adi,ad7124.yaml
ANALOG DEVICES INC AD7606 DRIVER
M: Stefan Popa <stefan.popa@analog.com>
ARM ARCHITECTED TIMER DRIVER
M: Mark Rutland <mark.rutland@arm.com>
-M: Marc Zyngier <marc.zyngier@arm.com>
+M: Marc Zyngier <maz@kernel.org>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
F: arch/arm/include/asm/arch_timer.h
L: cgroups@vger.kernel.org
L: linux-block@vger.kernel.org
T: git git://git.kernel.dk/linux-block
-F: Documentation/cgroup-v1/blkio-controller.rst
+F: Documentation/admin-guide/cgroup-v1/blkio-controller.rst
F: block/blk-cgroup.c
F: include/linux/blk-cgroup.h
F: block/blk-throttle.c
F: drivers/misc/cxl/
F: include/misc/cxl*
F: include/uapi/misc/cxl.h
-F: Documentation/powerpc/cxl.txt
+F: Documentation/powerpc/cxl.rst
F: Documentation/ABI/testing/sysfs-class-cxl
CXLFLASH (IBM Coherent Accelerator Processor Interface CAPI Flash) SCSI DRIVER
S: Supported
F: drivers/scsi/cxlflash/
F: include/uapi/scsi/cxlflash_ioctl.h
-F: Documentation/powerpc/cxlflash.txt
+F: Documentation/powerpc/cxlflash.rst
CYBERPRO FB DRIVER
M: Russell King <linux@armlinux.org.uk>
R: Jon Olson <jonolson@google.com>
L: netdev@vger.kernel.org
S: Supported
-F: Documentation/networking/device_drivers/google/gve.txt
+F: Documentation/networking/device_drivers/google/gve.rst
F: drivers/net/ethernet/google
GPD POCKET FAN DRIVER
F: include/uapi/linux/ipx.h
IRQ DOMAINS (IRQ NUMBER MAPPING LIBRARY)
-M: Marc Zyngier <marc.zyngier@arm.com>
+M: Marc Zyngier <maz@kernel.org>
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core
F: Documentation/IRQ-domain.txt
IRQCHIP DRIVERS
M: Thomas Gleixner <tglx@linutronix.de>
M: Jason Cooper <jason@lakedaemon.net>
-M: Marc Zyngier <marc.zyngier@arm.com>
+M: Marc Zyngier <maz@kernel.org>
L: linux-kernel@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git irq/core
F: tools/testing/selftests/kvm/
KERNEL VIRTUAL MACHINE FOR ARM/ARM64 (KVM/arm, KVM/arm64)
-M: Marc Zyngier <marc.zyngier@arm.com>
+M: Marc Zyngier <maz@kernel.org>
R: James Morse <james.morse@arm.com>
-R: Julien Thierry <julien.thierry@arm.com>
-R: Suzuki K Pouloze <suzuki.poulose@arm.com>
+R: Julien Thierry <julien.thierry.kdev@gmail.com>
+R: Suzuki K Poulose <suzuki.poulose@arm.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: kvmarm@lists.cs.columbia.edu
T: git git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm.git
M: "VMware, Inc." <pv-drivers@vmware.com>
L: virtualization@lists.linux-foundation.org
S: Supported
-F: Documentation/virt/paravirt_ops.txt
+F: Documentation/virt/paravirt_ops.rst
F: arch/*/kernel/paravirt*
F: arch/*/include/asm/paravirt*.h
F: include/linux/hypervisor.h
F: drivers/pci/pcie/aer.c
F: drivers/pci/pcie/dpc.c
F: drivers/pci/pcie/err.c
-F: Documentation/powerpc/eeh-pci-error-recovery.txt
+F: Documentation/powerpc/eeh-pci-error-recovery.rst
F: arch/powerpc/kernel/eeh*.c
F: arch/powerpc/platforms/*/eeh*.c
F: arch/powerpc/include/*/eeh*.h
F: drivers/mtd/nand/raw/r852.h
RISC-V ARCHITECTURE
+M: Paul Walmsley <paul.walmsley@sifive.com>
M: Palmer Dabbelt <palmer@sifive.com>
M: Albert Ou <aou@eecs.berkeley.edu>
L: linux-riscv@lists.infradead.org
S390 VFIO-CCW DRIVER
M: Cornelia Huck <cohuck@redhat.com>
-M: Farhan Ali <alifm@linux.ibm.com>
M: Eric Farman <farman@linux.ibm.com>
R: Halil Pasic <pasic@linux.ibm.com>
L: linux-s390@vger.kernel.org
F: include/uapi/linux/virtio_input.h
VIRTIO IOMMU DRIVER
-M: Jean-Philippe Brucker <jean-philippe.brucker@arm.com>
+M: Jean-Philippe Brucker <jean-philippe@linaro.org>
L: virtualization@lists.linux-foundation.org
S: Maintained
F: drivers/iommu/virtio-iommu.c
F: include/linux/vme*
VMWARE BALLOON DRIVER
-M: Julien Freche <jfreche@vmware.com>
M: Nadav Amit <namit@vmware.com>
M: "VMware, Inc." <pv-drivers@vmware.com>
L: linux-kernel@vger.kernel.org
VERSION = 5
PATCHLEVEL = 3
SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION = -rc2
NAME = Bobtail Squid
# *DOCUMENTATION*
# warn about C99 declaration after statement
KBUILD_CFLAGS += -Wdeclaration-after-statement
+# Warn about unmarked fall-throughs in switch statement.
+KBUILD_CFLAGS += $(call cc-option,-Wimplicit-fallthrough=3,)
+
# Variable Length Arrays (VLAs) should not be used anywhere in the kernel
KBUILD_CFLAGS += -Wvla
DEBUG_IMX7D_UART
default "debug/ks8695.S" if DEBUG_KS8695_UART
default "debug/msm.S" if DEBUG_QCOM_UARTDM
- default "debug/netx.S" if DEBUG_NETX_UART
default "debug/omap2plus.S" if DEBUG_OMAP2PLUS_UART
default "debug/renesas-scif.S" if DEBUG_R7S72100_SCIF2
default "debug/renesas-scif.S" if DEBUG_RCAR_GEN1_SCIF0
config DEBUG_UART_PHYS
hex "Physical base address of debug UART"
- default 0x00100a00 if DEBUG_NETX_UART
default 0x01c20000 if DEBUG_DAVINCI_DMx_UART0
default 0x01c28000 if DEBUG_SUNXI_UART0
default 0x01c28400 if DEBUG_SUNXI_UART1
DEBUG_LL_UART_8250 || DEBUG_LL_UART_PL01X || \
DEBUG_LL_UART_EFM32 || \
DEBUG_UART_8250 || DEBUG_UART_PL01X || DEBUG_MESON_UARTAO || \
- DEBUG_NETX_UART || \
DEBUG_QCOM_UARTDM || DEBUG_R7S72100_SCIF2 || \
DEBUG_RCAR_GEN1_SCIF0 || DEBUG_RCAR_GEN1_SCIF2 || \
DEBUG_RCAR_GEN2_SCIF0 || DEBUG_RCAR_GEN2_SCIF1 || \
default 0xc881f000 if DEBUG_RV1108_UART2
default 0xc8821000 if DEBUG_RV1108_UART1
default 0xc8912000 if DEBUG_RV1108_UART0
- default 0xe0000a00 if DEBUG_NETX_UART
default 0xe0010fe0 if ARCH_RPC
default 0xf0000be0 if ARCH_EBSA110
default 0xf0010000 if DEBUG_ASM9260_UART
default DEBUG_UART_PHYS if !MMU
depends on DEBUG_LL_UART_8250 || DEBUG_LL_UART_PL01X || \
DEBUG_UART_8250 || DEBUG_UART_PL01X || DEBUG_MESON_UARTAO || \
- DEBUG_NETX_UART || \
DEBUG_QCOM_UARTDM || DEBUG_S3C24XX_UART || \
DEBUG_S3C64XX_UART || \
DEBUG_BCM63XX_UART || DEBUG_ASM9260_UART || \
};
mdio-bus-mux {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
/* BIT(9) = 1 => external mdio */
mdio_ext: mdio@200 {
reg = <0x200>;
};
&i2c2 {
- clock_frequency = <100000>;
+ clock-frequency = <100000>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_i2c2>;
status = "okay";
};
&i2c2 {
- clock_frequency = <100000>;
+ clock-frequency = <100000>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_i2c2>;
status = "okay";
};
&i2c2 {
- clock_frequency = <100000>;
+ clock-frequency = <100000>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_i2c2>;
status = "okay";
};
&i2c2 {
- clock_frequency = <100000>;
+ clock-frequency = <100000>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_i2c2>;
status = "okay";
};
&i2c2 {
- clock_frequency = <100000>;
+ clock-frequency = <100000>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_i2c2>;
status = "okay";
};
&i2c3 {
- clock_frequency = <100000>;
+ clock-frequency = <100000>;
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_i2c3>;
status = "okay";
reg = <0x40330200 0x200>;
};
- usbphy1: usb-phy@0x40350000 {
+ usbphy1: usb-phy@40350000 {
compatible = "fsl,imx7ulp-usbphy", "fsl,imx6ul-usbphy";
reg = <0x40350000 0x1000>;
interrupts = <GIC_SPI 39 IRQ_TYPE_LEVEL_HIGH>;
CONFIG_HIGH_RES_TIMERS=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_KALLSYMS_ALL=y
-CONFIG_MODULES=y
-CONFIG_MODULE_UNLOAD=y
-# CONFIG_BLK_DEV_BSG is not set
-CONFIG_PARTITION_ADVANCED=y
CONFIG_ARCH_U8500=y
-CONFIG_MACH_HREFV60=y
-CONFIG_MACH_SNOWBALL=y
CONFIG_SMP=y
CONFIG_NR_CPUS=2
-CONFIG_PREEMPT=y
-CONFIG_AEABI=y
CONFIG_HIGHMEM=y
CONFIG_ARM_APPENDED_DTB=y
CONFIG_ARM_ATAG_DTB_COMPAT=y
CONFIG_ARM_U8500_CPUIDLE=y
CONFIG_VFP=y
CONFIG_NEON=y
+CONFIG_MODULES=y
+CONFIG_MODULE_UNLOAD=y
+# CONFIG_BLK_DEV_BSG is not set
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_CMA=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_UNIX=y
CONFIG_SMSC_PHY=y
CONFIG_CW1200=y
CONFIG_CW1200_WLAN_SDIO=y
-# CONFIG_INPUT_MOUSEDEV_PSAUX is not set
CONFIG_INPUT_EVDEV=y
# CONFIG_KEYBOARD_ATKBD is not set
CONFIG_KEYBOARD_GPIO=y
CONFIG_RMI4_I2C=y
CONFIG_RMI4_F11=y
# CONFIG_SERIO is not set
-CONFIG_VT_HW_CONSOLE_BINDING=y
# CONFIG_LEGACY_PTYS is not set
CONFIG_SERIAL_AMBA_PL011=y
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
CONFIG_SPI_PL022=y
CONFIG_GPIO_STMPE=y
CONFIG_GPIO_TC3589X=y
+CONFIG_SENSORS_IIO_HWMON=y
CONFIG_THERMAL=y
CONFIG_CPU_THERMAL=y
CONFIG_WATCHDOG=y
CONFIG_MFD_TC3589X=y
CONFIG_REGULATOR_AB8500=y
CONFIG_REGULATOR_GPIO=y
+CONFIG_DRM=y
+CONFIG_DRM_PANEL_SAMSUNG_S6D16D0=y
+CONFIG_DRM_LIMA=y
+CONFIG_DRM_MCDE=y
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
+CONFIG_BACKLIGHT_GENERIC=m
+CONFIG_LOGO=y
CONFIG_SOUND=y
CONFIG_SND=y
CONFIG_SND_SOC=y
CONFIG_USB=y
CONFIG_USB_MUSB_HDRC=y
CONFIG_USB_MUSB_UX500=y
+CONFIG_MUSB_PIO_ONLY=y
CONFIG_AB8500_USB=y
CONFIG_USB_GADGET=y
CONFIG_USB_ETH=m
CONFIG_RTC_DRV_PL031=y
CONFIG_DMADEVICES=y
CONFIG_STE_DMA40=y
+CONFIG_HWSPINLOCK=y
CONFIG_HSEM_U8500=y
CONFIG_IIO=y
CONFIG_IIO_SW_TRIGGER=y
CONFIG_ROOT_NFS=y
CONFIG_NLS_CODEPAGE_437=y
CONFIG_NLS_ISO8859_1=y
+CONFIG_CRYPTO_DEV_UX500=y
+CONFIG_CRYPTO_DEV_UX500_CRYP=y
+CONFIG_CRYPTO_DEV_UX500_HASH=y
+CONFIG_CRYPTO_DEV_UX500_DEBUG=y
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_FS=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_KERNEL=y
# CONFIG_SCHED_DEBUG is not set
-# CONFIG_DEBUG_PREEMPT is not set
# CONFIG_FTRACE is not set
CONFIG_DEBUG_USER=y
CONFIG_CORESIGHT=y
CONFIG_CORESIGHT_SINK_TPIU=y
CONFIG_CORESIGHT_SINK_ETBV10=y
CONFIG_CORESIGHT_SOURCE_ETM3X=y
-CONFIG_CRYPTO_DEV_UX500=y
-CONFIG_CRYPTO_DEV_UX500_CRYP=y
-CONFIG_CRYPTO_DEV_UX500_HASH=y
-CONFIG_CRYPTO_DEV_UX500_DEBUG=y
#define DEEPSLEEP_SLEEPENABLE_BIT BIT(31)
.text
+ .arch armv5te
/*
* Move DaVinci into deep sleep state
*
+++ /dev/null
-# SPDX-License-Identifier: GPL-2.0-only
-menu "NetX Implementations"
- depends on ARCH_NETX
-
-config MACH_NXDKN
- bool "Enable Hilscher nxdkn Eval Board support"
- help
- Board support for the Hilscher NetX Eval Board
-
-config MACH_NXDB500
- bool "Enable Hilscher nxdb500 Eval Board support"
- select ARM_AMBA
- help
- Board support for the Hilscher nxdb500 Eval Board
-
-config MACH_NXEB500HMI
- bool "Enable Hilscher nxeb500hmi Eval Board support"
- select ARM_AMBA
- help
- Board support for the Hilscher nxeb500hmi Eval Board
-
-endmenu
+++ /dev/null
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# Makefile for the linux kernel.
-#
-
-# Object file lists.
-
-obj-y += time.o generic.o pfifo.o xc.o
-
-# Specific board support
-obj-$(CONFIG_MACH_NXDKN) += nxdkn.o
-obj-$(CONFIG_MACH_NXDB500) += nxdb500.o fb.o
-obj-$(CONFIG_MACH_NXEB500HMI) += nxeb500hmi.o fb.o
+++ /dev/null
-# SPDX-License-Identifier: GPL-2.0-only
- zreladdr-y += 0x80008000
-
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * arch/arm/mach-netx/fb.c
- *
- * Copyright (c) 2005 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- */
-
-#include <linux/device.h>
-#include <linux/init.h>
-#include <linux/dma-mapping.h>
-#include <linux/amba/bus.h>
-#include <linux/amba/clcd.h>
-#include <linux/err.h>
-#include <linux/gfp.h>
-
-#include <asm/irq.h>
-
-#include <mach/netx-regs.h>
-#include <mach/hardware.h>
-
-static struct clcd_panel *netx_panel;
-
-void netx_clcd_enable(struct clcd_fb *fb)
-{
-}
-
-int netx_clcd_setup(struct clcd_fb *fb)
-{
- dma_addr_t dma;
-
- fb->panel = netx_panel;
-
- fb->fb.screen_base = dma_alloc_wc(&fb->dev->dev, 1024 * 1024, &dma,
- GFP_KERNEL);
- if (!fb->fb.screen_base) {
- printk(KERN_ERR "CLCD: unable to map framebuffer\n");
- return -ENOMEM;
- }
-
- fb->fb.fix.smem_start = dma;
- fb->fb.fix.smem_len = 1024*1024;
-
- return 0;
-}
-
-int netx_clcd_mmap(struct clcd_fb *fb, struct vm_area_struct *vma)
-{
- return dma_mmap_wc(&fb->dev->dev, vma, fb->fb.screen_base,
- fb->fb.fix.smem_start, fb->fb.fix.smem_len);
-}
-
-void netx_clcd_remove(struct clcd_fb *fb)
-{
- dma_free_wc(&fb->dev->dev, fb->fb.fix.smem_len, fb->fb.screen_base,
- fb->fb.fix.smem_start);
-}
-
-static AMBA_AHB_DEVICE(fb, "fb", 0, 0x00104000, { NETX_IRQ_LCD }, NULL);
-
-int netx_fb_init(struct clcd_board *board, struct clcd_panel *panel)
-{
- netx_panel = panel;
- fb_device.dev.platform_data = board;
- return amba_device_register(&fb_device, &iomem_resource);
-}
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * arch/arm/mach-netx/fb.h
- *
- * Copyright (c) 2005 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- */
-
-void netx_clcd_enable(struct clcd_fb *fb);
-int netx_clcd_setup(struct clcd_fb *fb);
-int netx_clcd_mmap(struct clcd_fb *fb, struct vm_area_struct *vma);
-void netx_clcd_remove(struct clcd_fb *fb);
-int netx_fb_init(struct clcd_board *board, struct clcd_panel *panel);
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * arch/arm/mach-netx/generic.c
- *
- * Copyright (C) 2005 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- */
-
-#include <linux/device.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/io.h>
-#include <linux/irqchip/arm-vic.h>
-#include <linux/reboot.h>
-#include <mach/hardware.h>
-#include <asm/mach/map.h>
-#include <mach/netx-regs.h>
-#include <asm/mach/irq.h>
-
-static struct map_desc netx_io_desc[] __initdata = {
- {
- .virtual = NETX_IO_VIRT,
- .pfn = __phys_to_pfn(NETX_IO_PHYS),
- .length = NETX_IO_SIZE,
- .type = MT_DEVICE
- }
-};
-
-void __init netx_map_io(void)
-{
- iotable_init(netx_io_desc, ARRAY_SIZE(netx_io_desc));
-}
-
-static struct resource netx_rtc_resources[] = {
- [0] = {
- .start = 0x00101200,
- .end = 0x00101220,
- .flags = IORESOURCE_MEM,
- },
-};
-
-static struct platform_device netx_rtc_device = {
- .name = "netx-rtc",
- .id = 0,
- .num_resources = ARRAY_SIZE(netx_rtc_resources),
- .resource = netx_rtc_resources,
-};
-
-static struct platform_device *devices[] __initdata = {
- &netx_rtc_device,
-};
-
-#if 0
-#define DEBUG_IRQ(fmt...) printk(fmt)
-#else
-#define DEBUG_IRQ(fmt...) while (0) {}
-#endif
-
-static void netx_hif_demux_handler(struct irq_desc *desc)
-{
- unsigned int irq = NETX_IRQ_HIF_CHAINED(0);
- unsigned int stat;
-
- stat = ((readl(NETX_DPMAS_INT_EN) &
- readl(NETX_DPMAS_INT_STAT)) >> 24) & 0x1f;
-
- while (stat) {
- if (stat & 1) {
- DEBUG_IRQ("handling irq %d\n", irq);
- generic_handle_irq(irq);
- }
- irq++;
- stat >>= 1;
- }
-}
-
-static int
-netx_hif_irq_type(struct irq_data *d, unsigned int type)
-{
- unsigned int val, irq;
-
- val = readl(NETX_DPMAS_IF_CONF1);
-
- irq = d->irq - NETX_IRQ_HIF_CHAINED(0);
-
- if (type & IRQ_TYPE_EDGE_RISING) {
- DEBUG_IRQ("rising edges\n");
- val |= (1 << 26) << irq;
- }
- if (type & IRQ_TYPE_EDGE_FALLING) {
- DEBUG_IRQ("falling edges\n");
- val &= ~((1 << 26) << irq);
- }
- if (type & IRQ_TYPE_LEVEL_LOW) {
- DEBUG_IRQ("low level\n");
- val &= ~((1 << 26) << irq);
- }
- if (type & IRQ_TYPE_LEVEL_HIGH) {
- DEBUG_IRQ("high level\n");
- val |= (1 << 26) << irq;
- }
-
- writel(val, NETX_DPMAS_IF_CONF1);
-
- return 0;
-}
-
-static void
-netx_hif_ack_irq(struct irq_data *d)
-{
- unsigned int val, irq;
-
- irq = d->irq - NETX_IRQ_HIF_CHAINED(0);
- writel((1 << 24) << irq, NETX_DPMAS_INT_STAT);
-
- val = readl(NETX_DPMAS_INT_EN);
- val &= ~((1 << 24) << irq);
- writel(val, NETX_DPMAS_INT_EN);
-
- DEBUG_IRQ("%s: irq %d\n", __func__, d->irq);
-}
-
-static void
-netx_hif_mask_irq(struct irq_data *d)
-{
- unsigned int val, irq;
-
- irq = d->irq - NETX_IRQ_HIF_CHAINED(0);
- val = readl(NETX_DPMAS_INT_EN);
- val &= ~((1 << 24) << irq);
- writel(val, NETX_DPMAS_INT_EN);
- DEBUG_IRQ("%s: irq %d\n", __func__, d->irq);
-}
-
-static void
-netx_hif_unmask_irq(struct irq_data *d)
-{
- unsigned int val, irq;
-
- irq = d->irq - NETX_IRQ_HIF_CHAINED(0);
- val = readl(NETX_DPMAS_INT_EN);
- val |= (1 << 24) << irq;
- writel(val, NETX_DPMAS_INT_EN);
- DEBUG_IRQ("%s: irq %d\n", __func__, d->irq);
-}
-
-static struct irq_chip netx_hif_chip = {
- .irq_ack = netx_hif_ack_irq,
- .irq_mask = netx_hif_mask_irq,
- .irq_unmask = netx_hif_unmask_irq,
- .irq_set_type = netx_hif_irq_type,
-};
-
-void __init netx_init_irq(void)
-{
- int irq;
-
- vic_init(io_p2v(NETX_PA_VIC), NETX_IRQ_VIC_START, ~0, 0);
-
- for (irq = NETX_IRQ_HIF_CHAINED(0); irq <= NETX_IRQ_HIF_LAST; irq++) {
- irq_set_chip_and_handler(irq, &netx_hif_chip,
- handle_level_irq);
- irq_clear_status_flags(irq, IRQ_NOREQUEST);
- }
-
- writel(NETX_DPMAS_INT_EN_GLB_EN, NETX_DPMAS_INT_EN);
- irq_set_chained_handler(NETX_IRQ_HIF, netx_hif_demux_handler);
-}
-
-static int __init netx_init(void)
-{
- return platform_add_devices(devices, ARRAY_SIZE(devices));
-}
-
-subsys_initcall(netx_init);
-
-void netx_restart(enum reboot_mode mode, const char *cmd)
-{
- writel(NETX_SYSTEM_RES_CR_FIRMW_RES_EN | NETX_SYSTEM_RES_CR_FIRMW_RES,
- NETX_SYSTEM_RES_CR);
-}
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * arch/arm/mach-netx/generic.h
- *
- * Copyright (c) 2005 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- */
-
-#include <linux/reboot.h>
-
-extern void __init netx_map_io(void);
-extern void __init netx_init_irq(void);
-extern void netx_restart(enum reboot_mode, const char *);
-
-extern void netx_timer_init(void);
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * arch/arm/mach-netx/include/mach/hardware.h
- *
- * Copyright (C) 2005 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- */
-#ifndef __ASM_ARCH_HARDWARE_H
-#define __ASM_ARCH_HARDWARE_H
-
-#define NETX_IO_PHYS 0x00100000
-#define NETX_IO_VIRT 0xe0000000
-#define NETX_IO_SIZE 0x00100000
-
-#define SRAM_INTERNAL_PHYS_0 0x00000
-#define SRAM_INTERNAL_PHYS_1 0x08000
-#define SRAM_INTERNAL_PHYS_2 0x10000
-#define SRAM_INTERNAL_PHYS_3 0x18000
-#define SRAM_INTERNAL_PHYS(no) ((no) * 0x8000)
-
-#define XPEC_MEM_SIZE 0x4000
-#define XMAC_MEM_SIZE 0x1000
-#define SRAM_MEM_SIZE 0x8000
-
-#define io_p2v(x) IOMEM((x) - NETX_IO_PHYS + NETX_IO_VIRT)
-#define io_v2p(x) ((x) - NETX_IO_VIRT + NETX_IO_PHYS)
-
-#endif
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * arch/arm/mach-netx/include/mach/irqs.h
- *
- * Copyright (C) 2005 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- */
-
-#define NETX_IRQ_VIC_START 64
-#define NETX_IRQ_SOFTINT (NETX_IRQ_VIC_START + 0)
-#define NETX_IRQ_TIMER0 (NETX_IRQ_VIC_START + 1)
-#define NETX_IRQ_TIMER1 (NETX_IRQ_VIC_START + 2)
-#define NETX_IRQ_TIMER2 (NETX_IRQ_VIC_START + 3)
-#define NETX_IRQ_SYSTIME_NS (NETX_IRQ_VIC_START + 4)
-#define NETX_IRQ_SYSTIME_S (NETX_IRQ_VIC_START + 5)
-#define NETX_IRQ_GPIO_15 (NETX_IRQ_VIC_START + 6)
-#define NETX_IRQ_WATCHDOG (NETX_IRQ_VIC_START + 7)
-#define NETX_IRQ_UART0 (NETX_IRQ_VIC_START + 8)
-#define NETX_IRQ_UART1 (NETX_IRQ_VIC_START + 9)
-#define NETX_IRQ_UART2 (NETX_IRQ_VIC_START + 10)
-#define NETX_IRQ_USB (NETX_IRQ_VIC_START + 11)
-#define NETX_IRQ_SPI (NETX_IRQ_VIC_START + 12)
-#define NETX_IRQ_I2C (NETX_IRQ_VIC_START + 13)
-#define NETX_IRQ_LCD (NETX_IRQ_VIC_START + 14)
-#define NETX_IRQ_HIF (NETX_IRQ_VIC_START + 15)
-#define NETX_IRQ_GPIO_0_14 (NETX_IRQ_VIC_START + 16)
-#define NETX_IRQ_XPEC0 (NETX_IRQ_VIC_START + 17)
-#define NETX_IRQ_XPEC1 (NETX_IRQ_VIC_START + 18)
-#define NETX_IRQ_XPEC2 (NETX_IRQ_VIC_START + 19)
-#define NETX_IRQ_XPEC3 (NETX_IRQ_VIC_START + 20)
-#define NETX_IRQ_XPEC(no) (NETX_IRQ_VIC_START + 17 + (no))
-#define NETX_IRQ_MSYNC0 (NETX_IRQ_VIC_START + 21)
-#define NETX_IRQ_MSYNC1 (NETX_IRQ_VIC_START + 22)
-#define NETX_IRQ_MSYNC2 (NETX_IRQ_VIC_START + 23)
-#define NETX_IRQ_MSYNC3 (NETX_IRQ_VIC_START + 24)
-#define NETX_IRQ_IRQ_PHY (NETX_IRQ_VIC_START + 25)
-#define NETX_IRQ_ISO_AREA (NETX_IRQ_VIC_START + 26)
-/* int 27 is reserved */
-/* int 28 is reserved */
-#define NETX_IRQ_TIMER3 (NETX_IRQ_VIC_START + 29)
-#define NETX_IRQ_TIMER4 (NETX_IRQ_VIC_START + 30)
-/* int 31 is reserved */
-
-#define NETX_IRQS (NETX_IRQ_VIC_START + 32)
-
-/* for multiplexed irqs on gpio 0..14 */
-#define NETX_IRQ_GPIO(x) (NETX_IRQS + (x))
-#define NETX_IRQ_GPIO_LAST NETX_IRQ_GPIO(14)
-
-/* Host interface interrupts */
-#define NETX_IRQ_HIF_CHAINED(x) (NETX_IRQ_GPIO_LAST + 1 + (x))
-#define NETX_IRQ_HIF_PIO35 NETX_IRQ_HIF_CHAINED(0)
-#define NETX_IRQ_HIF_PIO36 NETX_IRQ_HIF_CHAINED(1)
-#define NETX_IRQ_HIF_PIO40 NETX_IRQ_HIF_CHAINED(2)
-#define NETX_IRQ_HIF_PIO47 NETX_IRQ_HIF_CHAINED(3)
-#define NETX_IRQ_HIF_PIO72 NETX_IRQ_HIF_CHAINED(4)
-#define NETX_IRQ_HIF_LAST NETX_IRQ_HIF_CHAINED(4)
-
-#define NR_IRQS (NETX_IRQ_HIF_LAST + 1)
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * arch/arm/mach-netx/include/mach/netx-regs.h
- *
- * Copyright (c) 2005 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- */
-
-#ifndef __ASM_ARCH_NETX_REGS_H
-#define __ASM_ARCH_NETX_REGS_H
-
-/* offsets relative to the beginning of the io space */
-#define NETX_OFS_SYSTEM 0x00000
-#define NETX_OFS_MEMCR 0x00100
-#define NETX_OFS_DPMAS 0x03000
-#define NETX_OFS_GPIO 0x00800
-#define NETX_OFS_PIO 0x00900
-#define NETX_OFS_UART0 0x00a00
-#define NETX_OFS_UART1 0x00a40
-#define NETX_OFS_UART2 0x00a80
-#define NETX_OF_MIIMU 0x00b00
-#define NETX_OFS_SPI 0x00c00
-#define NETX_OFS_I2C 0x00d00
-#define NETX_OFS_SYSTIME 0x01100
-#define NETX_OFS_RTC 0x01200
-#define NETX_OFS_EXTBUS 0x03600
-#define NETX_OFS_LCD 0x04000
-#define NETX_OFS_USB 0x20000
-#define NETX_OFS_XMAC0 0x60000
-#define NETX_OFS_XMAC1 0x61000
-#define NETX_OFS_XMAC2 0x62000
-#define NETX_OFS_XMAC3 0x63000
-#define NETX_OFS_XMAC(no) (0x60000 + (no) * 0x1000)
-#define NETX_OFS_PFIFO 0x64000
-#define NETX_OFS_XPEC0 0x70000
-#define NETX_OFS_XPEC1 0x74000
-#define NETX_OFS_XPEC2 0x78000
-#define NETX_OFS_XPEC3 0x7c000
-#define NETX_OFS_XPEC(no) (0x70000 + (no) * 0x4000)
-#define NETX_OFS_VIC 0xff000
-
-/* physical addresses */
-#define NETX_PA_SYSTEM (NETX_IO_PHYS + NETX_OFS_SYSTEM)
-#define NETX_PA_MEMCR (NETX_IO_PHYS + NETX_OFS_MEMCR)
-#define NETX_PA_DPMAS (NETX_IO_PHYS + NETX_OFS_DPMAS)
-#define NETX_PA_GPIO (NETX_IO_PHYS + NETX_OFS_GPIO)
-#define NETX_PA_PIO (NETX_IO_PHYS + NETX_OFS_PIO)
-#define NETX_PA_UART0 (NETX_IO_PHYS + NETX_OFS_UART0)
-#define NETX_PA_UART1 (NETX_IO_PHYS + NETX_OFS_UART1)
-#define NETX_PA_UART2 (NETX_IO_PHYS + NETX_OFS_UART2)
-#define NETX_PA_MIIMU (NETX_IO_PHYS + NETX_OF_MIIMU)
-#define NETX_PA_SPI (NETX_IO_PHYS + NETX_OFS_SPI)
-#define NETX_PA_I2C (NETX_IO_PHYS + NETX_OFS_I2C)
-#define NETX_PA_SYSTIME (NETX_IO_PHYS + NETX_OFS_SYSTIME)
-#define NETX_PA_RTC (NETX_IO_PHYS + NETX_OFS_RTC)
-#define NETX_PA_EXTBUS (NETX_IO_PHYS + NETX_OFS_EXTBUS)
-#define NETX_PA_LCD (NETX_IO_PHYS + NETX_OFS_LCD)
-#define NETX_PA_USB (NETX_IO_PHYS + NETX_OFS_USB)
-#define NETX_PA_XMAC0 (NETX_IO_PHYS + NETX_OFS_XMAC0)
-#define NETX_PA_XMAC1 (NETX_IO_PHYS + NETX_OFS_XMAC1)
-#define NETX_PA_XMAC2 (NETX_IO_PHYS + NETX_OFS_XMAC2)
-#define NETX_PA_XMAC3 (NETX_IO_PHYS + NETX_OFS_XMAC3)
-#define NETX_PA_XMAC(no) (NETX_IO_PHYS + NETX_OFS_XMAC(no))
-#define NETX_PA_PFIFO (NETX_IO_PHYS + NETX_OFS_PFIFO)
-#define NETX_PA_XPEC0 (NETX_IO_PHYS + NETX_OFS_XPEC0)
-#define NETX_PA_XPEC1 (NETX_IO_PHYS + NETX_OFS_XPEC1)
-#define NETX_PA_XPEC2 (NETX_IO_PHYS + NETX_OFS_XPEC2)
-#define NETX_PA_XPEC3 (NETX_IO_PHYS + NETX_OFS_XPEC3)
-#define NETX_PA_XPEC(no) (NETX_IO_PHYS + NETX_OFS_XPEC(no))
-#define NETX_PA_VIC (NETX_IO_PHYS + NETX_OFS_VIC)
-
-/* virtual addresses */
-#define NETX_VA_SYSTEM (NETX_IO_VIRT + NETX_OFS_SYSTEM)
-#define NETX_VA_MEMCR (NETX_IO_VIRT + NETX_OFS_MEMCR)
-#define NETX_VA_DPMAS (NETX_IO_VIRT + NETX_OFS_DPMAS)
-#define NETX_VA_GPIO (NETX_IO_VIRT + NETX_OFS_GPIO)
-#define NETX_VA_PIO (NETX_IO_VIRT + NETX_OFS_PIO)
-#define NETX_VA_UART0 (NETX_IO_VIRT + NETX_OFS_UART0)
-#define NETX_VA_UART1 (NETX_IO_VIRT + NETX_OFS_UART1)
-#define NETX_VA_UART2 (NETX_IO_VIRT + NETX_OFS_UART2)
-#define NETX_VA_MIIMU (NETX_IO_VIRT + NETX_OF_MIIMU)
-#define NETX_VA_SPI (NETX_IO_VIRT + NETX_OFS_SPI)
-#define NETX_VA_I2C (NETX_IO_VIRT + NETX_OFS_I2C)
-#define NETX_VA_SYSTIME (NETX_IO_VIRT + NETX_OFS_SYSTIME)
-#define NETX_VA_RTC (NETX_IO_VIRT + NETX_OFS_RTC)
-#define NETX_VA_EXTBUS (NETX_IO_VIRT + NETX_OFS_EXTBUS)
-#define NETX_VA_LCD (NETX_IO_VIRT + NETX_OFS_LCD)
-#define NETX_VA_USB (NETX_IO_VIRT + NETX_OFS_USB)
-#define NETX_VA_XMAC0 (NETX_IO_VIRT + NETX_OFS_XMAC0)
-#define NETX_VA_XMAC1 (NETX_IO_VIRT + NETX_OFS_XMAC1)
-#define NETX_VA_XMAC2 (NETX_IO_VIRT + NETX_OFS_XMAC2)
-#define NETX_VA_XMAC3 (NETX_IO_VIRT + NETX_OFS_XMAC3)
-#define NETX_VA_XMAC(no) (NETX_IO_VIRT + NETX_OFS_XMAC(no))
-#define NETX_VA_PFIFO (NETX_IO_VIRT + NETX_OFS_PFIFO)
-#define NETX_VA_XPEC0 (NETX_IO_VIRT + NETX_OFS_XPEC0)
-#define NETX_VA_XPEC1 (NETX_IO_VIRT + NETX_OFS_XPEC1)
-#define NETX_VA_XPEC2 (NETX_IO_VIRT + NETX_OFS_XPEC2)
-#define NETX_VA_XPEC3 (NETX_IO_VIRT + NETX_OFS_XPEC3)
-#define NETX_VA_XPEC(no) (NETX_IO_VIRT + NETX_OFS_XPEC(no))
-#define NETX_VA_VIC (NETX_IO_VIRT + NETX_OFS_VIC)
-
-/*********************************
- * System functions *
- *********************************/
-
-/* Registers */
-#define NETX_SYSTEM_REG(ofs) IOMEM(NETX_VA_SYSTEM + (ofs))
-#define NETX_SYSTEM_BOO_SR NETX_SYSTEM_REG(0x00)
-#define NETX_SYSTEM_IOC_CR NETX_SYSTEM_REG(0x04)
-#define NETX_SYSTEM_IOC_MR NETX_SYSTEM_REG(0x08)
-
-/* FIXME: Docs are not consistent */
-/* #define NETX_SYSTEM_RES_CR NETX_SYSTEM_REG(0x08) */
-#define NETX_SYSTEM_RES_CR NETX_SYSTEM_REG(0x0c)
-
-#define NETX_SYSTEM_PHY_CONTROL NETX_SYSTEM_REG(0x10)
-#define NETX_SYSTEM_REV NETX_SYSTEM_REG(0x34)
-#define NETX_SYSTEM_IOC_ACCESS_KEY NETX_SYSTEM_REG(0x70)
-#define NETX_SYSTEM_WDG_TR NETX_SYSTEM_REG(0x200)
-#define NETX_SYSTEM_WDG_CTR NETX_SYSTEM_REG(0x204)
-#define NETX_SYSTEM_WDG_IRQ_TIMEOUT NETX_SYSTEM_REG(0x208)
-#define NETX_SYSTEM_WDG_RES_TIMEOUT NETX_SYSTEM_REG(0x20c)
-
-/* Bits */
-#define NETX_SYSTEM_RES_CR_RSTIN (1<<0)
-#define NETX_SYSTEM_RES_CR_WDG_RES (1<<1)
-#define NETX_SYSTEM_RES_CR_HOST_RES (1<<2)
-#define NETX_SYSTEM_RES_CR_FIRMW_RES (1<<3)
-#define NETX_SYSTEM_RES_CR_XPEC0_RES (1<<4)
-#define NETX_SYSTEM_RES_CR_XPEC1_RES (1<<5)
-#define NETX_SYSTEM_RES_CR_XPEC2_RES (1<<6)
-#define NETX_SYSTEM_RES_CR_XPEC3_RES (1<<7)
-#define NETX_SYSTEM_RES_CR_DIS_XPEC0_RES (1<<16)
-#define NETX_SYSTEM_RES_CR_DIS_XPEC1_RES (1<<17)
-#define NETX_SYSTEM_RES_CR_DIS_XPEC2_RES (1<<18)
-#define NETX_SYSTEM_RES_CR_DIS_XPEC3_RES (1<<19)
-#define NETX_SYSTEM_RES_CR_FIRMW_FLG0 (1<<20)
-#define NETX_SYSTEM_RES_CR_FIRMW_FLG1 (1<<21)
-#define NETX_SYSTEM_RES_CR_FIRMW_FLG2 (1<<22)
-#define NETX_SYSTEM_RES_CR_FIRMW_FLG3 (1<<23)
-#define NETX_SYSTEM_RES_CR_FIRMW_RES_EN (1<<24)
-#define NETX_SYSTEM_RES_CR_RSTOUT (1<<25)
-#define NETX_SYSTEM_RES_CR_EN_RSTOUT (1<<26)
-
-#define PHY_CONTROL_RESET (1<<31)
-#define PHY_CONTROL_SIM_BYP (1<<30)
-#define PHY_CONTROL_CLK_XLATIN (1<<29)
-#define PHY_CONTROL_PHY1_EN (1<<21)
-#define PHY_CONTROL_PHY1_NP_MSG_CODE
-#define PHY_CONTROL_PHY1_AUTOMDIX (1<<17)
-#define PHY_CONTROL_PHY1_FIXMODE (1<<16)
-#define PHY_CONTROL_PHY1_MODE(mode) (((mode) & 0x7) << 13)
-#define PHY_CONTROL_PHY0_EN (1<<12)
-#define PHY_CONTROL_PHY0_NP_MSG_CODE
-#define PHY_CONTROL_PHY0_AUTOMDIX (1<<8)
-#define PHY_CONTROL_PHY0_FIXMODE (1<<7)
-#define PHY_CONTROL_PHY0_MODE(mode) (((mode) & 0x7) << 4)
-#define PHY_CONTROL_PHY_ADDRESS(adr) ((adr) & 0xf)
-
-#define PHY_MODE_10BASE_T_HALF 0
-#define PHY_MODE_10BASE_T_FULL 1
-#define PHY_MODE_100BASE_TX_FX_FULL 2
-#define PHY_MODE_100BASE_TX_FX_HALF 3
-#define PHY_MODE_100BASE_TX_HALF 4
-#define PHY_MODE_REPEATER 5
-#define PHY_MODE_POWER_DOWN 6
-#define PHY_MODE_ALL 7
-
-/* Bits */
-#define VECT_CNTL_ENABLE (1 << 5)
-
-/*******************************
- * GPIO and timer module *
- *******************************/
-
-/* Registers */
-#define NETX_GPIO_REG(ofs) IOMEM(NETX_VA_GPIO + (ofs))
-#define NETX_GPIO_CFG(gpio) NETX_GPIO_REG(0x0 + ((gpio)<<2))
-#define NETX_GPIO_THRESHOLD_CAPTURE(gpio) NETX_GPIO_REG(0x40 + ((gpio)<<2))
-#define NETX_GPIO_COUNTER_CTRL(counter) NETX_GPIO_REG(0x80 + ((counter)<<2))
-#define NETX_GPIO_COUNTER_MAX(counter) NETX_GPIO_REG(0x94 + ((counter)<<2))
-#define NETX_GPIO_COUNTER_CURRENT(counter) NETX_GPIO_REG(0xa8 + ((counter)<<2))
-#define NETX_GPIO_IRQ_ENABLE NETX_GPIO_REG(0xbc)
-#define NETX_GPIO_IRQ_DISABLE NETX_GPIO_REG(0xc0)
-#define NETX_GPIO_SYSTIME_NS_CMP NETX_GPIO_REG(0xc4)
-#define NETX_GPIO_LINE NETX_GPIO_REG(0xc8)
-#define NETX_GPIO_IRQ NETX_GPIO_REG(0xd0)
-
-/* Bits */
-#define NETX_GPIO_CFG_IOCFG_GP_INPUT (0x0)
-#define NETX_GPIO_CFG_IOCFG_GP_OUTPUT (0x1)
-#define NETX_GPIO_CFG_IOCFG_GP_UART (0x2)
-#define NETX_GPIO_CFG_INV (1<<2)
-#define NETX_GPIO_CFG_MODE_INPUT_READ (0<<3)
-#define NETX_GPIO_CFG_MODE_INPUT_CAPTURE_CONT_RISING (1<<3)
-#define NETX_GPIO_CFG_MODE_INPUT_CAPTURE_ONCE_RISING (2<<3)
-#define NETX_GPIO_CFG_MODE_INPUT_CAPTURE_HIGH_LEVEL (3<<3)
-#define NETX_GPIO_CFG_COUNT_REF_COUNTER0 (0<<5)
-#define NETX_GPIO_CFG_COUNT_REF_COUNTER1 (1<<5)
-#define NETX_GPIO_CFG_COUNT_REF_COUNTER2 (2<<5)
-#define NETX_GPIO_CFG_COUNT_REF_COUNTER3 (3<<5)
-#define NETX_GPIO_CFG_COUNT_REF_COUNTER4 (4<<5)
-#define NETX_GPIO_CFG_COUNT_REF_SYSTIME (7<<5)
-
-#define NETX_GPIO_COUNTER_CTRL_RUN (1<<0)
-#define NETX_GPIO_COUNTER_CTRL_SYM (1<<1)
-#define NETX_GPIO_COUNTER_CTRL_ONCE (1<<2)
-#define NETX_GPIO_COUNTER_CTRL_IRQ_EN (1<<3)
-#define NETX_GPIO_COUNTER_CTRL_CNT_EVENT (1<<4)
-#define NETX_GPIO_COUNTER_CTRL_RST_EN (1<<5)
-#define NETX_GPIO_COUNTER_CTRL_SEL_EVENT (1<<6)
-#define NETX_GPIO_COUNTER_CTRL_GPIO_REF /* FIXME */
-
-#define GPIO_BIT(gpio) (1<<(gpio))
-#define COUNTER_BIT(counter) ((1<<16)<<(counter))
-
-/*******************************
- * PIO *
- *******************************/
-
-/* Registers */
-#define NETX_PIO_REG(ofs) IOMEM(NETX_VA_PIO + (ofs))
-#define NETX_PIO_INPIO NETX_PIO_REG(0x0)
-#define NETX_PIO_OUTPIO NETX_PIO_REG(0x4)
-#define NETX_PIO_OEPIO NETX_PIO_REG(0x8)
-
-/*******************************
- * MII Unit *
- *******************************/
-
-/* Registers */
-#define NETX_MIIMU IOMEM(NETX_VA_MIIMU)
-
-/* Bits */
-#define MIIMU_SNRDY (1<<0)
-#define MIIMU_PREAMBLE (1<<1)
-#define MIIMU_OPMODE_WRITE (1<<2)
-#define MIIMU_MDC_PERIOD (1<<3)
-#define MIIMU_PHY_NRES (1<<4)
-#define MIIMU_RTA (1<<5)
-#define MIIMU_REGADDR(adr) (((adr) & 0x1f) << 6)
-#define MIIMU_PHYADDR(adr) (((adr) & 0x1f) << 11)
-#define MIIMU_DATA(data) (((data) & 0xffff) << 16)
-
-/*******************************
- * xmac / xpec *
- *******************************/
-
-/* XPEC register offsets relative to NETX_VA_XPEC(no) */
-#define NETX_XPEC_R0_OFS 0x00
-#define NETX_XPEC_R1_OFS 0x04
-#define NETX_XPEC_R2_OFS 0x08
-#define NETX_XPEC_R3_OFS 0x0c
-#define NETX_XPEC_R4_OFS 0x10
-#define NETX_XPEC_R5_OFS 0x14
-#define NETX_XPEC_R6_OFS 0x18
-#define NETX_XPEC_R7_OFS 0x1c
-#define NETX_XPEC_RANGE01_OFS 0x20
-#define NETX_XPEC_RANGE23_OFS 0x24
-#define NETX_XPEC_RANGE45_OFS 0x28
-#define NETX_XPEC_RANGE67_OFS 0x2c
-#define NETX_XPEC_PC_OFS 0x48
-#define NETX_XPEC_TIMER_OFS(timer) (0x30 + ((timer)<<2))
-#define NETX_XPEC_IRQ_OFS 0x8c
-#define NETX_XPEC_SYSTIME_NS_OFS 0x90
-#define NETX_XPEC_FIFO_DATA_OFS 0x94
-#define NETX_XPEC_SYSTIME_S_OFS 0x98
-#define NETX_XPEC_ADC_OFS 0x9c
-#define NETX_XPEC_URX_COUNT_OFS 0x40
-#define NETX_XPEC_UTX_COUNT_OFS 0x44
-#define NETX_XPEC_PC_OFS 0x48
-#define NETX_XPEC_ZERO_OFS 0x4c
-#define NETX_XPEC_STATCFG_OFS 0x50
-#define NETX_XPEC_EC_MASKA_OFS 0x54
-#define NETX_XPEC_EC_MASKB_OFS 0x58
-#define NETX_XPEC_EC_MASK0_OFS 0x5c
-#define NETX_XPEC_EC_MASK8_OFS 0x7c
-#define NETX_XPEC_EC_MASK9_OFS 0x80
-#define NETX_XPEC_XPU_HOLD_PC_OFS 0x100
-#define NETX_XPEC_RAM_START_OFS 0x2000
-
-/* Bits */
-#define XPU_HOLD_PC (1<<0)
-
-/* XMAC register offsets relative to NETX_VA_XMAC(no) */
-#define NETX_XMAC_RPU_PROGRAM_START_OFS 0x000
-#define NETX_XMAC_RPU_PROGRAM_END_OFS 0x3ff
-#define NETX_XMAC_TPU_PROGRAM_START_OFS 0x400
-#define NETX_XMAC_TPU_PROGRAM_END_OFS 0x7ff
-#define NETX_XMAC_RPU_HOLD_PC_OFS 0xa00
-#define NETX_XMAC_TPU_HOLD_PC_OFS 0xa04
-#define NETX_XMAC_STATUS_SHARED0_OFS 0x840
-#define NETX_XMAC_CONFIG_SHARED0_OFS 0x844
-#define NETX_XMAC_STATUS_SHARED1_OFS 0x848
-#define NETX_XMAC_CONFIG_SHARED1_OFS 0x84c
-#define NETX_XMAC_STATUS_SHARED2_OFS 0x850
-#define NETX_XMAC_CONFIG_SHARED2_OFS 0x854
-#define NETX_XMAC_STATUS_SHARED3_OFS 0x858
-#define NETX_XMAC_CONFIG_SHARED3_OFS 0x85c
-
-#define RPU_HOLD_PC (1<<15)
-#define TPU_HOLD_PC (1<<15)
-
-/*******************************
- * Pointer FIFO *
- *******************************/
-
-/* Registers */
-#define NETX_PFIFO_REG(ofs) IOMEM(NETX_VA_PFIFO + (ofs))
-#define NETX_PFIFO_BASE(pfifo) NETX_PFIFO_REG(0x00 + ((pfifo)<<2))
-#define NETX_PFIFO_BORDER_BASE(pfifo) NETX_PFIFO_REG(0x80 + ((pfifo)<<2))
-#define NETX_PFIFO_RESET NETX_PFIFO_REG(0x100)
-#define NETX_PFIFO_FULL NETX_PFIFO_REG(0x104)
-#define NETX_PFIFO_EMPTY NETX_PFIFO_REG(0x108)
-#define NETX_PFIFO_OVEFLOW NETX_PFIFO_REG(0x10c)
-#define NETX_PFIFO_UNDERRUN NETX_PFIFO_REG(0x110)
-#define NETX_PFIFO_FILL_LEVEL(pfifo) NETX_PFIFO_REG(0x180 + ((pfifo)<<2))
-#define NETX_PFIFO_XPEC_ISR(xpec) NETX_PFIFO_REG(0x400 + ((xpec) << 2))
-
-
-/*******************************
- * Memory Controller *
- *******************************/
-
-/* Registers */
-#define NETX_MEMCR_REG(ofs) IOMEM(NETX_VA_MEMCR + (ofs))
-#define NETX_MEMCR_SRAM_CTRL(cs) NETX_MEMCR_REG(0x0 + 4 * (cs)) /* SRAM for CS 0..2 */
-#define NETX_MEMCR_SDRAM_CFG_CTRL NETX_MEMCR_REG(0x40)
-#define NETX_MEMCR_SDRAM_TIMING_CTRL NETX_MEMCR_REG(0x44)
-#define NETX_MEMCR_SDRAM_MODE NETX_MEMCR_REG(0x48)
-#define NETX_MEMCR_SDRAM_EXT_MODE NETX_MEMCR_REG(0x4c)
-#define NETX_MEMCR_PRIO_TIMESLOT_CTRL NETX_MEMCR_REG(0x80)
-#define NETX_MEMCR_PRIO_ACCESS_CTRL NETX_MEMCR_REG(0x84)
-
-/* Bits */
-#define NETX_MEMCR_SRAM_CTRL_WIDTHEXTMEM(x) (((x) & 0x3) << 24)
-#define NETX_MEMCR_SRAM_CTRL_WSPOSTPAUSEEXTMEM(x) (((x) & 0x3) << 16)
-#define NETX_MEMCR_SRAM_CTRL_WSPREPASEEXTMEM(x) (((x) & 0x3) << 8)
-#define NETX_MEMCR_SRAM_CTRL_WSEXTMEM(x) (((x) & 0x1f) << 0)
-
-
-/*******************************
- * Dual Port Memory *
- *******************************/
-
-/* Registers */
-#define NETX_DPMAS_REG(ofs) IOMEM(NETX_VA_DPMAS + (ofs))
-#define NETX_DPMAS_SYS_STAT NETX_DPMAS_REG(0x4d8)
-#define NETX_DPMAS_INT_STAT NETX_DPMAS_REG(0x4e0)
-#define NETX_DPMAS_INT_EN NETX_DPMAS_REG(0x4f0)
-#define NETX_DPMAS_IF_CONF0 NETX_DPMAS_REG(0x608)
-#define NETX_DPMAS_IF_CONF1 NETX_DPMAS_REG(0x60c)
-#define NETX_DPMAS_EXT_CONFIG(cs) NETX_DPMAS_REG(0x610 + 4 * (cs))
-#define NETX_DPMAS_IO_MODE0 NETX_DPMAS_REG(0x620) /* I/O 32..63 */
-#define NETX_DPMAS_DRV_EN0 NETX_DPMAS_REG(0x624)
-#define NETX_DPMAS_DATA0 NETX_DPMAS_REG(0x628)
-#define NETX_DPMAS_IO_MODE1 NETX_DPMAS_REG(0x630) /* I/O 64..84 */
-#define NETX_DPMAS_DRV_EN1 NETX_DPMAS_REG(0x634)
-#define NETX_DPMAS_DATA1 NETX_DPMAS_REG(0x638)
-
-/* Bits */
-#define NETX_DPMAS_INT_EN_GLB_EN (1<<31)
-#define NETX_DPMAS_INT_EN_MEM_LCK (1<<30)
-#define NETX_DPMAS_INT_EN_WDG (1<<29)
-#define NETX_DPMAS_INT_EN_PIO72 (1<<28)
-#define NETX_DPMAS_INT_EN_PIO47 (1<<27)
-#define NETX_DPMAS_INT_EN_PIO40 (1<<26)
-#define NETX_DPMAS_INT_EN_PIO36 (1<<25)
-#define NETX_DPMAS_INT_EN_PIO35 (1<<24)
-
-#define NETX_DPMAS_IF_CONF0_HIF_DISABLED (0<<28)
-#define NETX_DPMAS_IF_CONF0_HIF_EXT_BUS (1<<28)
-#define NETX_DPMAS_IF_CONF0_HIF_UP_8BIT (2<<28)
-#define NETX_DPMAS_IF_CONF0_HIF_UP_16BIT (3<<28)
-#define NETX_DPMAS_IF_CONF0_HIF_IO (4<<28)
-#define NETX_DPMAS_IF_CONF0_WAIT_DRV_PP (1<<14)
-#define NETX_DPMAS_IF_CONF0_WAIT_DRV_OD (2<<14)
-#define NETX_DPMAS_IF_CONF0_WAIT_DRV_TRI (3<<14)
-
-#define NETX_DPMAS_IF_CONF1_IRQ_POL_PIO35 (1<<26)
-#define NETX_DPMAS_IF_CONF1_IRQ_POL_PIO36 (1<<27)
-#define NETX_DPMAS_IF_CONF1_IRQ_POL_PIO40 (1<<28)
-#define NETX_DPMAS_IF_CONF1_IRQ_POL_PIO47 (1<<29)
-#define NETX_DPMAS_IF_CONF1_IRQ_POL_PIO72 (1<<30)
-
-#define NETX_EXT_CONFIG_TALEWIDTH(x) (((x) & 0x7) << 29)
-#define NETX_EXT_CONFIG_TADRHOLD(x) (((x) & 0x7) << 26)
-#define NETX_EXT_CONFIG_TCSON(x) (((x) & 0x7) << 23)
-#define NETX_EXT_CONFIG_TRDON(x) (((x) & 0x7) << 20)
-#define NETX_EXT_CONFIG_TWRON(x) (((x) & 0x7) << 17)
-#define NETX_EXT_CONFIG_TWROFF(x) (((x) & 0x1f) << 12)
-#define NETX_EXT_CONFIG_TRDWRCYC(x) (((x) & 0x1f) << 7)
-#define NETX_EXT_CONFIG_WAIT_POL (1<<6)
-#define NETX_EXT_CONFIG_WAIT_EN (1<<5)
-#define NETX_EXT_CONFIG_NRD_MODE (1<<4)
-#define NETX_EXT_CONFIG_DS_MODE (1<<3)
-#define NETX_EXT_CONFIG_NWR_MODE (1<<2)
-#define NETX_EXT_CONFIG_16BIT (1<<1)
-#define NETX_EXT_CONFIG_CS_ENABLE (1<<0)
-
-#define NETX_DPMAS_IO_MODE0_WRL (1<<13)
-#define NETX_DPMAS_IO_MODE0_WAIT (1<<14)
-#define NETX_DPMAS_IO_MODE0_READY (1<<15)
-#define NETX_DPMAS_IO_MODE0_CS0 (1<<19)
-#define NETX_DPMAS_IO_MODE0_EXTRD (1<<20)
-
-#define NETX_DPMAS_IO_MODE1_CS2 (1<<15)
-#define NETX_DPMAS_IO_MODE1_CS1 (1<<16)
-#define NETX_DPMAS_IO_MODE1_SAMPLE_NPOR (0<<30)
-#define NETX_DPMAS_IO_MODE1_SAMPLE_100MHZ (1<<30)
-#define NETX_DPMAS_IO_MODE1_SAMPLE_NPIO36 (2<<30)
-#define NETX_DPMAS_IO_MODE1_SAMPLE_PIO36 (3<<30)
-
-/*******************************
- * I2C *
- *******************************/
-#define NETX_I2C_REG(ofs) IOMEM(NETX_VA_I2C, (ofs))
-#define NETX_I2C_CTRL NETX_I2C_REG(0x0)
-#define NETX_I2C_DATA NETX_I2C_REG(0x4)
-
-#endif /* __ASM_ARCH_NETX_REGS_H */
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * arch/arm/mach-netx/include/mach/pfifo.h
- *
- * Copyright (c) 2005 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- */
-
-
-#ifndef ASM_ARCH_PFIFO_H
-#define ASM_ARCH_PFIFO_H
-
-static inline int pfifo_push(int no, unsigned int pointer)
-{
- writel(pointer, NETX_PFIFO_BASE(no));
- return 0;
-}
-
-static inline unsigned int pfifo_pop(int no)
-{
- return readl(NETX_PFIFO_BASE(no));
-}
-
-static inline int pfifo_fill_level(int no)
-{
-
- return readl(NETX_PFIFO_FILL_LEVEL(no));
-}
-
-static inline int pfifo_full(int no)
-{
- return readl(NETX_PFIFO_FULL) & (1<<no) ? 1 : 0;
-}
-
-static inline int pfifo_empty(int no)
-{
- return readl(NETX_PFIFO_EMPTY) & (1<<no) ? 1 : 0;
-}
-
-int pfifo_request(unsigned int pfifo_mask);
-void pfifo_free(unsigned int pfifo_mask);
-
-#endif /* ASM_ARCH_PFIFO_H */
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * arch/arm/mach-netx/include/mach/uncompress.h
- *
- * Copyright (C) 2005 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- */
-
-/*
- * The following code assumes the serial port has already been
- * initialized by the bootloader. We search for the first enabled
- * port in the most probable order. If you didn't setup a port in
- * your bootloader then nothing will appear (which might be desired).
- *
- * This does not append a newline
- */
-
-#define REG(x) (*(volatile unsigned long *)(x))
-
-#define UART1_BASE 0x100a00
-#define UART2_BASE 0x100a80
-
-#define UART_DR 0x0
-
-#define UART_CR 0x14
-#define CR_UART_EN (1<<0)
-
-#define UART_FR 0x18
-#define FR_BUSY (1<<3)
-#define FR_TXFF (1<<5)
-
-static inline void putc(char c)
-{
- unsigned long base;
-
- if (REG(UART1_BASE + UART_CR) & CR_UART_EN)
- base = UART1_BASE;
- else if (REG(UART2_BASE + UART_CR) & CR_UART_EN)
- base = UART2_BASE;
- else
- return;
-
- while (REG(base + UART_FR) & FR_TXFF);
- REG(base + UART_DR) = c;
-}
-
-static inline void flush(void)
-{
- unsigned long base;
-
- if (REG(UART1_BASE + UART_CR) & CR_UART_EN)
- base = UART1_BASE;
- else if (REG(UART2_BASE + UART_CR) & CR_UART_EN)
- base = UART2_BASE;
- else
- return;
-
- while (REG(base + UART_FR) & FR_BUSY);
-}
-
-/*
- * nothing to do
- */
-#define arch_decomp_setup()
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * arch/arm/mach-netx/include/mach/xc.h
- *
- * Copyright (C) 2005 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- */
-
-#ifndef __ASM_ARCH_XC_H
-#define __ASM_ARCH_XC_H
-
-struct xc {
- int no;
- unsigned int type;
- unsigned int version;
- void __iomem *xpec_base;
- void __iomem *xmac_base;
- void __iomem *sram_base;
- int irq;
- struct device *dev;
-};
-
-int xc_reset(struct xc *x);
-int xc_stop(struct xc* x);
-int xc_start(struct xc *x);
-int xc_running(struct xc *x);
-int xc_request_firmware(struct xc* x);
-struct xc* request_xc(int xcno, struct device *dev);
-void free_xc(struct xc *x);
-
-#endif /* __ASM_ARCH_XC_H */
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * arch/arm/mach-netx/nxdb500.c
- *
- * Copyright (c) 2005 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- */
-
-#include <linux/dma-mapping.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/mtd/plat-ram.h>
-#include <linux/platform_device.h>
-#include <linux/amba/bus.h>
-#include <linux/amba/clcd.h>
-
-#include <mach/hardware.h>
-#include <asm/mach-types.h>
-#include <asm/mach/arch.h>
-#include <mach/netx-regs.h>
-#include <linux/platform_data/eth-netx.h>
-
-#include "generic.h"
-#include "fb.h"
-
-static struct clcd_panel qvga = {
- .mode = {
- .name = "QVGA",
- .refresh = 60,
- .xres = 240,
- .yres = 320,
- .pixclock = 187617,
- .left_margin = 6,
- .right_margin = 26,
- .upper_margin = 0,
- .lower_margin = 6,
- .hsync_len = 6,
- .vsync_len = 1,
- .sync = 0,
- .vmode = FB_VMODE_NONINTERLACED,
- },
- .width = -1,
- .height = -1,
- .tim2 = 16,
- .cntl = CNTL_LCDTFT | CNTL_BGR,
- .bpp = 16,
- .grayscale = 0,
-};
-
-static inline int nxdb500_check(struct clcd_fb *fb, struct fb_var_screeninfo *var)
-{
- var->green.length = 5;
- var->green.msb_right = 0;
-
- return clcdfb_check(fb, var);
-}
-
-static int nxdb500_clcd_setup(struct clcd_fb *fb)
-{
- unsigned int val;
-
- fb->fb.var.green.length = 5;
- fb->fb.var.green.msb_right = 0;
-
- /* enable asic control */
- val = readl(NETX_SYSTEM_IOC_ACCESS_KEY);
- writel(val, NETX_SYSTEM_IOC_ACCESS_KEY);
-
- writel(3, NETX_SYSTEM_IOC_CR);
-
- val = readl(NETX_PIO_OUTPIO);
- writel(val | 1, NETX_PIO_OUTPIO);
-
- val = readl(NETX_PIO_OEPIO);
- writel(val | 1, NETX_PIO_OEPIO);
- return netx_clcd_setup(fb);
-}
-
-static struct clcd_board clcd_data = {
- .name = "netX",
- .check = nxdb500_check,
- .decode = clcdfb_decode,
- .enable = netx_clcd_enable,
- .setup = nxdb500_clcd_setup,
- .mmap = netx_clcd_mmap,
- .remove = netx_clcd_remove,
-};
-
-static struct netxeth_platform_data eth0_platform_data = {
- .xcno = 0,
-};
-
-static struct platform_device netx_eth0_device = {
- .name = "netx-eth",
- .id = 0,
- .num_resources = 0,
- .resource = NULL,
- .dev = {
- .platform_data = ð0_platform_data,
- }
-};
-
-static struct netxeth_platform_data eth1_platform_data = {
- .xcno = 1,
-};
-
-static struct platform_device netx_eth1_device = {
- .name = "netx-eth",
- .id = 1,
- .num_resources = 0,
- .resource = NULL,
- .dev = {
- .platform_data = ð1_platform_data,
- }
-};
-
-static struct resource netx_uart0_resources[] = {
- [0] = {
- .start = 0x00100A00,
- .end = 0x00100A3F,
- .flags = IORESOURCE_MEM,
- },
- [1] = {
- .start = (NETX_IRQ_UART0),
- .end = (NETX_IRQ_UART0),
- .flags = IORESOURCE_IRQ,
- },
-};
-
-static struct platform_device netx_uart0_device = {
- .name = "netx-uart",
- .id = 0,
- .num_resources = ARRAY_SIZE(netx_uart0_resources),
- .resource = netx_uart0_resources,
-};
-
-static struct resource netx_uart1_resources[] = {
- [0] = {
- .start = 0x00100A40,
- .end = 0x00100A7F,
- .flags = IORESOURCE_MEM,
- },
- [1] = {
- .start = (NETX_IRQ_UART1),
- .end = (NETX_IRQ_UART1),
- .flags = IORESOURCE_IRQ,
- },
-};
-
-static struct platform_device netx_uart1_device = {
- .name = "netx-uart",
- .id = 1,
- .num_resources = ARRAY_SIZE(netx_uart1_resources),
- .resource = netx_uart1_resources,
-};
-
-static struct resource netx_uart2_resources[] = {
- [0] = {
- .start = 0x00100A80,
- .end = 0x00100ABF,
- .flags = IORESOURCE_MEM,
- },
- [1] = {
- .start = (NETX_IRQ_UART2),
- .end = (NETX_IRQ_UART2),
- .flags = IORESOURCE_IRQ,
- },
-};
-
-static struct platform_device netx_uart2_device = {
- .name = "netx-uart",
- .id = 2,
- .num_resources = ARRAY_SIZE(netx_uart2_resources),
- .resource = netx_uart2_resources,
-};
-
-static struct platform_device *devices[] __initdata = {
- &netx_eth0_device,
- &netx_eth1_device,
- &netx_uart0_device,
- &netx_uart1_device,
- &netx_uart2_device,
-};
-
-static void __init nxdb500_init(void)
-{
- netx_fb_init(&clcd_data, &qvga);
- platform_add_devices(devices, ARRAY_SIZE(devices));
-}
-
-MACHINE_START(NXDB500, "Hilscher nxdb500")
- .atag_offset = 0x100,
- .map_io = netx_map_io,
- .init_irq = netx_init_irq,
- .init_time = netx_timer_init,
- .init_machine = nxdb500_init,
- .restart = netx_restart,
-MACHINE_END
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * arch/arm/mach-netx/nxdkn.c
- *
- * Copyright (c) 2005 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- */
-
-#include <linux/dma-mapping.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/mtd/plat-ram.h>
-#include <linux/platform_device.h>
-#include <linux/amba/bus.h>
-#include <linux/amba/clcd.h>
-
-#include <mach/hardware.h>
-#include <asm/mach-types.h>
-#include <asm/mach/arch.h>
-#include <mach/netx-regs.h>
-#include <linux/platform_data/eth-netx.h>
-
-#include "generic.h"
-
-static struct netxeth_platform_data eth0_platform_data = {
- .xcno = 0,
-};
-
-static struct platform_device nxdkn_eth0_device = {
- .name = "netx-eth",
- .id = 0,
- .num_resources = 0,
- .resource = NULL,
- .dev = {
- .platform_data = ð0_platform_data,
- }
-};
-
-static struct netxeth_platform_data eth1_platform_data = {
- .xcno = 1,
-};
-
-static struct platform_device nxdkn_eth1_device = {
- .name = "netx-eth",
- .id = 1,
- .num_resources = 0,
- .resource = NULL,
- .dev = {
- .platform_data = ð1_platform_data,
- }
-};
-
-static struct resource netx_uart0_resources[] = {
- [0] = {
- .start = 0x00100A00,
- .end = 0x00100A3F,
- .flags = IORESOURCE_MEM,
- },
- [1] = {
- .start = (NETX_IRQ_UART0),
- .end = (NETX_IRQ_UART0),
- .flags = IORESOURCE_IRQ,
- },
-};
-
-static struct platform_device netx_uart0_device = {
- .name = "netx-uart",
- .id = 0,
- .num_resources = ARRAY_SIZE(netx_uart0_resources),
- .resource = netx_uart0_resources,
-};
-
-static struct platform_device *devices[] __initdata = {
- &nxdkn_eth0_device,
- &nxdkn_eth1_device,
- &netx_uart0_device,
-};
-
-static void __init nxdkn_init(void)
-{
- platform_add_devices(devices, ARRAY_SIZE(devices));
-}
-
-MACHINE_START(NXDKN, "Hilscher nxdkn")
- .atag_offset = 0x100,
- .map_io = netx_map_io,
- .init_irq = netx_init_irq,
- .init_time = netx_timer_init,
- .init_machine = nxdkn_init,
- .restart = netx_restart,
-MACHINE_END
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * arch/arm/mach-netx/nxeb500hmi.c
- *
- * Copyright (c) 2005 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- */
-
-#include <linux/dma-mapping.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/mtd/plat-ram.h>
-#include <linux/platform_device.h>
-#include <linux/amba/bus.h>
-#include <linux/amba/clcd.h>
-
-#include <mach/hardware.h>
-#include <asm/mach-types.h>
-#include <asm/mach/arch.h>
-#include <mach/netx-regs.h>
-#include <linux/platform_data/eth-netx.h>
-
-#include "generic.h"
-#include "fb.h"
-
-static struct clcd_panel qvga = {
- .mode = {
- .name = "QVGA",
- .refresh = 60,
- .xres = 240,
- .yres = 320,
- .pixclock = 187617,
- .left_margin = 6,
- .right_margin = 26,
- .upper_margin = 0,
- .lower_margin = 6,
- .hsync_len = 6,
- .vsync_len = 1,
- .sync = 0,
- .vmode = FB_VMODE_NONINTERLACED,
- },
- .width = -1,
- .height = -1,
- .tim2 = 16,
- .cntl = CNTL_LCDTFT | CNTL_BGR,
- .bpp = 16,
- .grayscale = 0,
-};
-
-static inline int nxeb500hmi_check(struct clcd_fb *fb, struct fb_var_screeninfo *var)
-{
- var->green.length = 5;
- var->green.msb_right = 0;
-
- return clcdfb_check(fb, var);
-}
-
-static int nxeb500hmi_clcd_setup(struct clcd_fb *fb)
-{
- unsigned int val;
-
- fb->fb.var.green.length = 5;
- fb->fb.var.green.msb_right = 0;
-
- /* enable asic control */
- val = readl(NETX_SYSTEM_IOC_ACCESS_KEY);
- writel(val, NETX_SYSTEM_IOC_ACCESS_KEY);
-
- writel(3, NETX_SYSTEM_IOC_CR);
-
- /* GPIO 14 is used for display enable on newer boards */
- writel(9, NETX_GPIO_CFG(14));
-
- val = readl(NETX_PIO_OUTPIO);
- writel(val | 1, NETX_PIO_OUTPIO);
-
- val = readl(NETX_PIO_OEPIO);
- writel(val | 1, NETX_PIO_OEPIO);
- return netx_clcd_setup(fb);
-}
-
-static struct clcd_board clcd_data = {
- .name = "netX",
- .check = nxeb500hmi_check,
- .decode = clcdfb_decode,
- .enable = netx_clcd_enable,
- .setup = nxeb500hmi_clcd_setup,
- .mmap = netx_clcd_mmap,
- .remove = netx_clcd_remove,
-};
-
-static struct netxeth_platform_data eth0_platform_data = {
- .xcno = 0,
-};
-
-static struct platform_device netx_eth0_device = {
- .name = "netx-eth",
- .id = 0,
- .num_resources = 0,
- .resource = NULL,
- .dev = {
- .platform_data = ð0_platform_data,
- }
-};
-
-static struct netxeth_platform_data eth1_platform_data = {
- .xcno = 1,
-};
-
-static struct platform_device netx_eth1_device = {
- .name = "netx-eth",
- .id = 1,
- .num_resources = 0,
- .resource = NULL,
- .dev = {
- .platform_data = ð1_platform_data,
- }
-};
-
-static struct resource netx_cf_resources[] = {
- [0] = {
- .start = 0x20000000,
- .end = 0x25ffffff,
- .flags = IORESOURCE_MEM | IORESOURCE_MEM_8AND16BIT,
- },
-};
-
-static struct platform_device netx_cf_device = {
- .name = "netx-cf",
- .id = 0,
- .resource = netx_cf_resources,
- .num_resources = ARRAY_SIZE(netx_cf_resources),
-};
-
-static struct resource netx_uart0_resources[] = {
- [0] = {
- .start = 0x00100A00,
- .end = 0x00100A3F,
- .flags = IORESOURCE_MEM,
- },
- [1] = {
- .start = (NETX_IRQ_UART0),
- .end = (NETX_IRQ_UART0),
- .flags = IORESOURCE_IRQ,
- },
-};
-
-static struct platform_device netx_uart0_device = {
- .name = "netx-uart",
- .id = 0,
- .num_resources = ARRAY_SIZE(netx_uart0_resources),
- .resource = netx_uart0_resources,
-};
-
-static struct platform_device *devices[] __initdata = {
- &netx_eth0_device,
- &netx_eth1_device,
- &netx_cf_device,
- &netx_uart0_device,
-};
-
-static void __init nxeb500hmi_init(void)
-{
- netx_fb_init(&clcd_data, &qvga);
- platform_add_devices(devices, ARRAY_SIZE(devices));
-}
-
-MACHINE_START(NXEB500HMI, "Hilscher nxeb500hmi")
- .atag_offset = 0x100,
- .map_io = netx_map_io,
- .init_irq = netx_init_irq,
- .init_time = netx_timer_init,
- .init_machine = nxeb500hmi_init,
- .restart = netx_restart,
-MACHINE_END
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * arch/arm/mach-netx/pfifo.c
- *
- * Copyright (c) 2005 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/io.h>
-
-#include <mach/hardware.h>
-#include <mach/netx-regs.h>
-#include <mach/pfifo.h>
-
-static DEFINE_MUTEX(pfifo_lock);
-
-static unsigned int pfifo_used = 0;
-
-int pfifo_request(unsigned int pfifo_mask)
-{
- int err = 0;
- unsigned int val;
-
- mutex_lock(&pfifo_lock);
-
- if (pfifo_mask & pfifo_used) {
- err = -EBUSY;
- goto out;
- }
-
- pfifo_used |= pfifo_mask;
-
- val = readl(NETX_PFIFO_RESET);
- writel(val | pfifo_mask, NETX_PFIFO_RESET);
- writel(val, NETX_PFIFO_RESET);
-
-out:
- mutex_unlock(&pfifo_lock);
- return err;
-}
-
-void pfifo_free(unsigned int pfifo_mask)
-{
- mutex_lock(&pfifo_lock);
- pfifo_used &= ~pfifo_mask;
- mutex_unlock(&pfifo_lock);
-}
-
-EXPORT_SYMBOL(pfifo_push);
-EXPORT_SYMBOL(pfifo_pop);
-EXPORT_SYMBOL(pfifo_fill_level);
-EXPORT_SYMBOL(pfifo_empty);
-EXPORT_SYMBOL(pfifo_request);
-EXPORT_SYMBOL(pfifo_free);
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * arch/arm/mach-netx/time.c
- *
- * Copyright (c) 2005 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- */
-
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/irq.h>
-#include <linux/clocksource.h>
-#include <linux/clockchips.h>
-#include <linux/io.h>
-
-#include <mach/hardware.h>
-#include <asm/mach/time.h>
-#include <mach/netx-regs.h>
-
-#define NETX_CLOCK_FREQ 100000000
-#define NETX_LATCH DIV_ROUND_CLOSEST(NETX_CLOCK_FREQ, HZ)
-
-#define TIMER_CLOCKEVENT 0
-#define TIMER_CLOCKSOURCE 1
-
-static inline void timer_shutdown(struct clock_event_device *evt)
-{
- /* disable timer */
- writel(0, NETX_GPIO_COUNTER_CTRL(TIMER_CLOCKEVENT));
-}
-
-static int netx_shutdown(struct clock_event_device *evt)
-{
- timer_shutdown(evt);
-
- return 0;
-}
-
-static int netx_set_oneshot(struct clock_event_device *evt)
-{
- u32 tmode = NETX_GPIO_COUNTER_CTRL_IRQ_EN | NETX_GPIO_COUNTER_CTRL_RUN;
-
- timer_shutdown(evt);
- writel(0, NETX_GPIO_COUNTER_MAX(TIMER_CLOCKEVENT));
- writel(tmode, NETX_GPIO_COUNTER_CTRL(TIMER_CLOCKEVENT));
-
- return 0;
-}
-
-static int netx_set_periodic(struct clock_event_device *evt)
-{
- u32 tmode = NETX_GPIO_COUNTER_CTRL_RST_EN |
- NETX_GPIO_COUNTER_CTRL_IRQ_EN | NETX_GPIO_COUNTER_CTRL_RUN;
-
- timer_shutdown(evt);
- writel(NETX_LATCH, NETX_GPIO_COUNTER_MAX(TIMER_CLOCKEVENT));
- writel(tmode, NETX_GPIO_COUNTER_CTRL(TIMER_CLOCKEVENT));
-
- return 0;
-}
-
-static int netx_set_next_event(unsigned long evt,
- struct clock_event_device *clk)
-{
- writel(0 - evt, NETX_GPIO_COUNTER_CURRENT(TIMER_CLOCKEVENT));
- return 0;
-}
-
-static struct clock_event_device netx_clockevent = {
- .name = "netx-timer" __stringify(TIMER_CLOCKEVENT),
- .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT,
- .set_next_event = netx_set_next_event,
- .set_state_shutdown = netx_shutdown,
- .set_state_periodic = netx_set_periodic,
- .set_state_oneshot = netx_set_oneshot,
- .tick_resume = netx_shutdown,
-};
-
-/*
- * IRQ handler for the timer
- */
-static irqreturn_t
-netx_timer_interrupt(int irq, void *dev_id)
-{
- struct clock_event_device *evt = &netx_clockevent;
-
- /* acknowledge interrupt */
- writel(COUNTER_BIT(0), NETX_GPIO_IRQ);
-
- evt->event_handler(evt);
-
- return IRQ_HANDLED;
-}
-
-static struct irqaction netx_timer_irq = {
- .name = "NetX Timer Tick",
- .flags = IRQF_TIMER | IRQF_IRQPOLL,
- .handler = netx_timer_interrupt,
-};
-
-/*
- * Set up timer interrupt
- */
-void __init netx_timer_init(void)
-{
- /* disable timer initially */
- writel(0, NETX_GPIO_COUNTER_CTRL(0));
-
- /* Reset the timer value to zero */
- writel(0, NETX_GPIO_COUNTER_CURRENT(0));
-
- writel(NETX_LATCH, NETX_GPIO_COUNTER_MAX(0));
-
- /* acknowledge interrupt */
- writel(COUNTER_BIT(0), NETX_GPIO_IRQ);
-
- /* Enable the interrupt in the specific timer
- * register and start timer
- */
- writel(COUNTER_BIT(0), NETX_GPIO_IRQ_ENABLE);
- writel(NETX_GPIO_COUNTER_CTRL_IRQ_EN | NETX_GPIO_COUNTER_CTRL_RUN,
- NETX_GPIO_COUNTER_CTRL(0));
-
- setup_irq(NETX_IRQ_TIMER0, &netx_timer_irq);
-
- /* Setup timer one for clocksource */
- writel(0, NETX_GPIO_COUNTER_CTRL(TIMER_CLOCKSOURCE));
- writel(0, NETX_GPIO_COUNTER_CURRENT(TIMER_CLOCKSOURCE));
- writel(0xffffffff, NETX_GPIO_COUNTER_MAX(TIMER_CLOCKSOURCE));
-
- writel(NETX_GPIO_COUNTER_CTRL_RUN,
- NETX_GPIO_COUNTER_CTRL(TIMER_CLOCKSOURCE));
-
- clocksource_mmio_init(NETX_GPIO_COUNTER_CURRENT(TIMER_CLOCKSOURCE),
- "netx_timer", NETX_CLOCK_FREQ, 200, 32, clocksource_mmio_readl_up);
-
- /* with max_delta_ns >= delta2ns(0x800) the system currently runs fine.
- * Adding some safety ... */
- netx_clockevent.cpumask = cpumask_of(0);
- clockevents_config_and_register(&netx_clockevent, NETX_CLOCK_FREQ,
- 0xa00, 0xfffffffe);
-}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * arch/arm/mach-netx/xc.c
- *
- * Copyright (c) 2005 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- */
-
-#include <linux/init.h>
-#include <linux/device.h>
-#include <linux/firmware.h>
-#include <linux/mutex.h>
-#include <linux/slab.h>
-#include <linux/io.h>
-#include <linux/export.h>
-
-#include <mach/hardware.h>
-#include <mach/irqs.h>
-#include <mach/netx-regs.h>
-
-#include <mach/xc.h>
-
-static DEFINE_MUTEX(xc_lock);
-
-static int xc_in_use = 0;
-
-struct fw_desc {
- unsigned int ofs;
- unsigned int size;
- unsigned int patch_ofs;
- unsigned int patch_entries;
-};
-
-struct fw_header {
- unsigned int magic;
- unsigned int type;
- unsigned int version;
- unsigned int reserved[5];
- struct fw_desc fw_desc[3];
-} __attribute__ ((packed));
-
-int xc_stop(struct xc *x)
-{
- writel(RPU_HOLD_PC, x->xmac_base + NETX_XMAC_RPU_HOLD_PC_OFS);
- writel(TPU_HOLD_PC, x->xmac_base + NETX_XMAC_TPU_HOLD_PC_OFS);
- writel(XPU_HOLD_PC, x->xpec_base + NETX_XPEC_XPU_HOLD_PC_OFS);
- return 0;
-}
-
-int xc_start(struct xc *x)
-{
- writel(0, x->xmac_base + NETX_XMAC_RPU_HOLD_PC_OFS);
- writel(0, x->xmac_base + NETX_XMAC_TPU_HOLD_PC_OFS);
- writel(0, x->xpec_base + NETX_XPEC_XPU_HOLD_PC_OFS);
- return 0;
-}
-
-int xc_running(struct xc *x)
-{
- return (readl(x->xmac_base + NETX_XMAC_RPU_HOLD_PC_OFS) & RPU_HOLD_PC)
- || (readl(x->xmac_base + NETX_XMAC_TPU_HOLD_PC_OFS) & TPU_HOLD_PC)
- || (readl(x->xpec_base + NETX_XPEC_XPU_HOLD_PC_OFS) & XPU_HOLD_PC) ?
- 0 : 1;
-}
-
-int xc_reset(struct xc *x)
-{
- writel(0, x->xpec_base + NETX_XPEC_PC_OFS);
- return 0;
-}
-
-static int xc_check_ptr(struct xc *x, unsigned long adr, unsigned int size)
-{
- if (adr >= NETX_PA_XMAC(x->no) &&
- adr + size < NETX_PA_XMAC(x->no) + XMAC_MEM_SIZE)
- return 0;
-
- if (adr >= NETX_PA_XPEC(x->no) &&
- adr + size < NETX_PA_XPEC(x->no) + XPEC_MEM_SIZE)
- return 0;
-
- dev_err(x->dev, "Illegal pointer in firmware found. aborting\n");
-
- return -1;
-}
-
-static int xc_patch(struct xc *x, const void *patch, int count)
-{
- unsigned int val, adr;
- const unsigned int *data = patch;
-
- int i;
- for (i = 0; i < count; i++) {
- adr = *data++;
- val = *data++;
- if (xc_check_ptr(x, adr, 4) < 0)
- return -EINVAL;
-
- writel(val, (void __iomem *)io_p2v(adr));
- }
- return 0;
-}
-
-int xc_request_firmware(struct xc *x)
-{
- int ret;
- char name[16];
- const struct firmware *fw;
- struct fw_header *head;
- unsigned int size;
- int i;
- const void *src;
- unsigned long dst;
-
- sprintf(name, "xc%d.bin", x->no);
-
- ret = request_firmware(&fw, name, x->dev);
-
- if (ret < 0) {
- dev_err(x->dev, "request_firmware failed\n");
- return ret;
- }
-
- head = (struct fw_header *)fw->data;
- if (head->magic != 0x4e657458) {
- if (head->magic == 0x5874654e) {
- dev_err(x->dev,
- "firmware magic is 'XteN'. Endianness problems?\n");
- ret = -ENODEV;
- goto exit_release_firmware;
- }
- dev_err(x->dev, "unrecognized firmware magic 0x%08x\n",
- head->magic);
- ret = -ENODEV;
- goto exit_release_firmware;
- }
-
- x->type = head->type;
- x->version = head->version;
-
- ret = -EINVAL;
-
- for (i = 0; i < 3; i++) {
- src = fw->data + head->fw_desc[i].ofs;
- dst = *(unsigned int *)src;
- src += sizeof (unsigned int);
- size = head->fw_desc[i].size - sizeof (unsigned int);
-
- if (xc_check_ptr(x, dst, size))
- goto exit_release_firmware;
-
- memcpy((void *)io_p2v(dst), src, size);
-
- src = fw->data + head->fw_desc[i].patch_ofs;
- size = head->fw_desc[i].patch_entries;
- ret = xc_patch(x, src, size);
- if (ret < 0)
- goto exit_release_firmware;
- }
-
- ret = 0;
-
- exit_release_firmware:
- release_firmware(fw);
-
- return ret;
-}
-
-struct xc *request_xc(int xcno, struct device *dev)
-{
- struct xc *x = NULL;
-
- mutex_lock(&xc_lock);
-
- if (xcno > 3)
- goto exit;
- if (xc_in_use & (1 << xcno))
- goto exit;
-
- x = kmalloc(sizeof (struct xc), GFP_KERNEL);
- if (!x)
- goto exit;
-
- if (!request_mem_region
- (NETX_PA_XPEC(xcno), XPEC_MEM_SIZE, kobject_name(&dev->kobj)))
- goto exit_free;
-
- if (!request_mem_region
- (NETX_PA_XMAC(xcno), XMAC_MEM_SIZE, kobject_name(&dev->kobj)))
- goto exit_release_1;
-
- if (!request_mem_region
- (SRAM_INTERNAL_PHYS(xcno), SRAM_MEM_SIZE, kobject_name(&dev->kobj)))
- goto exit_release_2;
-
- x->xpec_base = (void * __iomem)io_p2v(NETX_PA_XPEC(xcno));
- x->xmac_base = (void * __iomem)io_p2v(NETX_PA_XMAC(xcno));
- x->sram_base = ioremap(SRAM_INTERNAL_PHYS(xcno), SRAM_MEM_SIZE);
- if (!x->sram_base)
- goto exit_release_3;
-
- x->irq = NETX_IRQ_XPEC(xcno);
-
- x->no = xcno;
- x->dev = dev;
-
- xc_in_use |= (1 << xcno);
-
- goto exit;
-
- exit_release_3:
- release_mem_region(SRAM_INTERNAL_PHYS(xcno), SRAM_MEM_SIZE);
- exit_release_2:
- release_mem_region(NETX_PA_XMAC(xcno), XMAC_MEM_SIZE);
- exit_release_1:
- release_mem_region(NETX_PA_XPEC(xcno), XPEC_MEM_SIZE);
- exit_free:
- kfree(x);
- x = NULL;
- exit:
- mutex_unlock(&xc_lock);
- return x;
-}
-
-void free_xc(struct xc *x)
-{
- int xcno = x->no;
-
- mutex_lock(&xc_lock);
-
- iounmap(x->sram_base);
- release_mem_region(SRAM_INTERNAL_PHYS(xcno), SRAM_MEM_SIZE);
- release_mem_region(NETX_PA_XMAC(xcno), XMAC_MEM_SIZE);
- release_mem_region(NETX_PA_XPEC(xcno), XPEC_MEM_SIZE);
- xc_in_use &= ~(1 << x->no);
- kfree(x);
-
- mutex_unlock(&xc_lock);
-}
-
-EXPORT_SYMBOL(free_xc);
-EXPORT_SYMBOL(request_xc);
-EXPORT_SYMBOL(xc_request_firmware);
-EXPORT_SYMBOL(xc_reset);
-EXPORT_SYMBOL(xc_running);
-EXPORT_SYMBOL(xc_start);
-EXPORT_SYMBOL(xc_stop);
#define MX8MM_IOMUXC_SAI3_RXFS_GPIO4_IO28 0x1CC 0x434 0x000 0x5 0x0
#define MX8MM_IOMUXC_SAI3_RXFS_TPSMP_HTRANS0 0x1CC 0x434 0x000 0x7 0x0
#define MX8MM_IOMUXC_SAI3_RXC_SAI3_RX_BCLK 0x1D0 0x438 0x000 0x0 0x0
-#define MX8MM_IOMUXC_SAI3_RXC_GPT1_CAPTURE2 0x1D0 0x438 0x000 0x1 0x0
+#define MX8MM_IOMUXC_SAI3_RXC_GPT1_CLK 0x1D0 0x438 0x000 0x1 0x0
#define MX8MM_IOMUXC_SAI3_RXC_SAI5_RX_BCLK 0x1D0 0x438 0x4D0 0x2 0x2
#define MX8MM_IOMUXC_SAI3_RXC_GPIO4_IO29 0x1D0 0x438 0x000 0x5 0x0
#define MX8MM_IOMUXC_SAI3_RXC_TPSMP_HTRANS1 0x1D0 0x438 0x000 0x7 0x0
#define MX8MM_IOMUXC_SAI3_RXD_GPIO4_IO30 0x1D4 0x43C 0x000 0x5 0x0
#define MX8MM_IOMUXC_SAI3_RXD_TPSMP_HDATA0 0x1D4 0x43C 0x000 0x7 0x0
#define MX8MM_IOMUXC_SAI3_TXFS_SAI3_TX_SYNC 0x1D8 0x440 0x000 0x0 0x0
-#define MX8MM_IOMUXC_SAI3_TXFS_GPT1_CLK 0x1D8 0x440 0x000 0x1 0x0
+#define MX8MM_IOMUXC_SAI3_TXFS_GPT1_CAPTURE2 0x1D8 0x440 0x000 0x1 0x0
#define MX8MM_IOMUXC_SAI3_TXFS_SAI5_RX_DATA1 0x1D8 0x440 0x4D8 0x2 0x2
#define MX8MM_IOMUXC_SAI3_TXFS_GPIO4_IO31 0x1D8 0x440 0x000 0x5 0x0
#define MX8MM_IOMUXC_SAI3_TXFS_TPSMP_HDATA1 0x1D8 0x440 0x000 0x7 0x0
sai2: sai@308b0000 {
#sound-dai-cells = <0>;
- compatible = "fsl,imx8mq-sai",
- "fsl,imx6sx-sai";
+ compatible = "fsl,imx8mq-sai";
reg = <0x308b0000 0x10000>;
interrupts = <GIC_SPI 96 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&clk IMX8MQ_CLK_SAI2_IPG>,
({ \
set_thread_flag(TIF_32BIT); \
})
-#ifdef CONFIG_GENERIC_COMPAT_VDSO
+#ifdef CONFIG_COMPAT_VDSO
#define COMPAT_ARCH_DLINFO \
do { \
/* \
/*
* Huge pte definitions.
*/
-#define pte_huge(pte) (!(pte_val(pte) & PTE_TABLE_BIT))
#define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT))
/*
regs->pmr_save = GIC_PRIO_IRQON;
}
+static inline void set_ssbs_bit(struct pt_regs *regs)
+{
+ regs->pstate |= PSR_SSBS_BIT;
+}
+
+static inline void set_compat_ssbs_bit(struct pt_regs *regs)
+{
+ regs->pstate |= PSR_AA32_SSBS_BIT;
+}
+
static inline void start_thread(struct pt_regs *regs, unsigned long pc,
unsigned long sp)
{
regs->pstate = PSR_MODE_EL0t;
if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE)
- regs->pstate |= PSR_SSBS_BIT;
+ set_ssbs_bit(regs);
regs->sp = sp;
}
#endif
if (arm64_get_ssbd_state() != ARM64_SSBD_FORCE_ENABLE)
- regs->pstate |= PSR_AA32_SSBS_BIT;
+ set_compat_ssbs_bit(regs);
regs->compat_sp = sp;
}
#include <linux/percpu.h>
#include <linux/sched.h>
#include <linux/sched/task_stack.h>
+#include <linux/types.h>
#include <asm/memory.h>
#include <asm/ptrace.h>
#include <asm/sdei.h>
-struct stackframe {
- unsigned long fp;
- unsigned long pc;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- int graph;
-#endif
-};
-
enum stack_type {
STACK_TYPE_UNKNOWN,
STACK_TYPE_TASK,
STACK_TYPE_OVERFLOW,
STACK_TYPE_SDEI_NORMAL,
STACK_TYPE_SDEI_CRITICAL,
+ __NR_STACK_TYPES
};
struct stack_info {
enum stack_type type;
};
+/*
+ * A snapshot of a frame record or fp/lr register values, along with some
+ * accounting information necessary for robust unwinding.
+ *
+ * @fp: The fp value in the frame record (or the real fp)
+ * @pc: The fp value in the frame record (or the real lr)
+ *
+ * @stacks_done: Stacks which have been entirely unwound, for which it is no
+ * longer valid to unwind to.
+ *
+ * @prev_fp: The fp that pointed to this frame record, or a synthetic value
+ * of 0. This is used to ensure that within a stack, each
+ * subsequent frame record is at an increasing address.
+ * @prev_type: The type of stack this frame record was on, or a synthetic
+ * value of STACK_TYPE_UNKNOWN. This is used to detect a
+ * transition from one stack to another.
+ *
+ * @graph: When FUNCTION_GRAPH_TRACER is selected, holds the index of a
+ * replacement lr value in the ftrace graph stack.
+ */
+struct stackframe {
+ unsigned long fp;
+ unsigned long pc;
+ DECLARE_BITMAP(stacks_done, __NR_STACK_TYPES);
+ unsigned long prev_fp;
+ enum stack_type prev_type;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ int graph;
+#endif
+};
+
extern int unwind_frame(struct task_struct *tsk, struct stackframe *frame);
extern void walk_stackframe(struct task_struct *tsk, struct stackframe *frame,
int (*fn)(struct stackframe *, void *), void *data);
return true;
}
-static inline bool on_task_stack(struct task_struct *tsk, unsigned long sp,
- struct stack_info *info)
+static inline bool on_task_stack(const struct task_struct *tsk,
+ unsigned long sp,
+ struct stack_info *info)
{
unsigned long low = (unsigned long)task_stack_page(tsk);
unsigned long high = low + THREAD_SIZE;
* We can only safely access per-cpu stacks from current in a non-preemptible
* context.
*/
-static inline bool on_accessible_stack(struct task_struct *tsk,
- unsigned long sp,
- struct stack_info *info)
+static inline bool on_accessible_stack(const struct task_struct *tsk,
+ unsigned long sp,
+ struct stack_info *info)
{
+ if (info)
+ info->type = STACK_TYPE_UNKNOWN;
+
if (on_task_stack(tsk, sp, info))
return true;
if (tsk != current || preemptible())
return false;
}
+static inline void start_backtrace(struct stackframe *frame,
+ unsigned long fp, unsigned long pc)
+{
+ frame->fp = fp;
+ frame->pc = pc;
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ frame->graph = 0;
+#endif
+
+ /*
+ * Prime the first unwind.
+ *
+ * In unwind_frame() we'll check that the FP points to a valid stack,
+ * which can't be STACK_TYPE_UNKNOWN, and the first unwind will be
+ * treated as a transition to whichever stack that happens to be. The
+ * prev_fp value won't be used, but we set it to 0 such that it is
+ * definitely not an accessible stack address.
+ */
+ bitmap_zero(frame->stacks_done, __NR_STACK_TYPES);
+ frame->prev_fp = 0;
+ frame->prev_type = STACK_TYPE_UNKNOWN;
+}
+
#endif /* __ASM_STACKTRACE_H */
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
#define _UAPI__ASM_BPF_PERF_EVENT_H__
b.eq el1_ia
cmp x24, #ESR_ELx_EC_SYS64 // configurable trap
b.eq el1_undef
- cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception
- b.eq el1_sp_pc
cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception
- b.eq el1_sp_pc
+ b.eq el1_pc
cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL1
b.eq el1_undef
cmp x24, #ESR_ELx_EC_BREAKPT_CUR // debug exception in EL1
bl do_mem_abort
kernel_exit 1
-el1_sp_pc:
+el1_pc:
/*
- * Stack or PC alignment exception handling
+ * PC alignment exception handling. We don't handle SP alignment faults,
+ * since we will have hit a recursive exception when trying to push the
+ * initial pt_regs.
*/
mrs x0, far_el1
inherit_daif pstate=x23, tmp=x2
ccmp x24, #ESR_ELx_EC_WFx, #4, ne
b.eq el0_sys
cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception
- b.eq el0_sp_pc
+ b.eq el0_sp
cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception
- b.eq el0_sp_pc
+ b.eq el0_pc
cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0
b.eq el0_undef
cmp x24, #ESR_ELx_EC_BREAKPT_LOW // debug exception in EL0
cmp x24, #ESR_ELx_EC_FP_EXC32 // FP/ASIMD exception
b.eq el0_fpsimd_exc
cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception
- b.eq el0_sp_pc
+ b.eq el0_pc
cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0
b.eq el0_undef
cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap
mov x1, sp
bl do_fpsimd_exc
b ret_to_user
+el0_sp:
+ ldr x26, [sp, #S_SP]
+ b el0_sp_pc
+el0_pc:
+ mrs x26, far_el1
el0_sp_pc:
/*
* Stack or PC alignment exception handling
*/
- mrs x26, far_el1
gic_prio_kentry_setup tmp=x0
enable_da_f
#ifdef CONFIG_TRACE_IRQFLAGS
#define arm64_le128_to_cpu(x) arm64_cpu_to_le128(x)
+static void __fpsimd_to_sve(void *sst, struct user_fpsimd_state const *fst,
+ unsigned int vq)
+{
+ unsigned int i;
+ __uint128_t *p;
+
+ for (i = 0; i < SVE_NUM_ZREGS; ++i) {
+ p = (__uint128_t *)ZREG(sst, vq, i);
+ *p = arm64_cpu_to_le128(fst->vregs[i]);
+ }
+}
+
/*
* Transfer the FPSIMD state in task->thread.uw.fpsimd_state to
* task->thread.sve_state.
unsigned int vq;
void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
- unsigned int i;
- __uint128_t *p;
if (!system_supports_sve())
return;
vq = sve_vq_from_vl(task->thread.sve_vl);
- for (i = 0; i < 32; ++i) {
- p = (__uint128_t *)ZREG(sst, vq, i);
- *p = arm64_cpu_to_le128(fst->vregs[i]);
- }
+ __fpsimd_to_sve(sst, fst, vq);
}
/*
return;
vq = sve_vq_from_vl(task->thread.sve_vl);
- for (i = 0; i < 32; ++i) {
+ for (i = 0; i < SVE_NUM_ZREGS; ++i) {
p = (__uint128_t const *)ZREG(sst, vq, i);
fst->vregs[i] = arm64_le128_to_cpu(*p);
}
unsigned int vq;
void *sst = task->thread.sve_state;
struct user_fpsimd_state const *fst = &task->thread.uw.fpsimd_state;
- unsigned int i;
- __uint128_t *p;
if (!test_tsk_thread_flag(task, TIF_SVE))
return;
vq = sve_vq_from_vl(task->thread.sve_vl);
memset(sst, 0, SVE_SIG_REGS_SIZE(vq));
-
- for (i = 0; i < 32; ++i) {
- p = (__uint128_t *)ZREG(sst, vq, i);
- *p = arm64_cpu_to_le128(fst->vregs[i]);
- }
+ __fpsimd_to_sve(sst, fst, vq);
}
int sve_set_vector_length(struct task_struct *task,
return;
}
- frame.fp = regs->regs[29];
- frame.pc = regs->pc;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- frame.graph = 0;
-#endif
-
+ start_backtrace(&frame, regs->regs[29], regs->pc);
walk_stackframe(current, &frame, callchain_trace, entry);
}
childregs->pstate |= PSR_UAO_BIT;
if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE)
- childregs->pstate |= PSR_SSBS_BIT;
+ set_ssbs_bit(childregs);
if (system_uses_irq_prio_masking())
childregs->pmr_save = GIC_PRIO_IRQON;
}
}
+/*
+ * Force SSBS state on context-switch, since it may be lost after migrating
+ * from a CPU which treats the bit as RES0 in a heterogeneous system.
+ */
+static void ssbs_thread_switch(struct task_struct *next)
+{
+ struct pt_regs *regs = task_pt_regs(next);
+
+ /*
+ * Nothing to do for kernel threads, but 'regs' may be junk
+ * (e.g. idle task) so check the flags and bail early.
+ */
+ if (unlikely(next->flags & PF_KTHREAD))
+ return;
+
+ /* If the mitigation is enabled, then we leave SSBS clear. */
+ if ((arm64_get_ssbd_state() == ARM64_SSBD_FORCE_ENABLE) ||
+ test_tsk_thread_flag(next, TIF_SSBD))
+ return;
+
+ if (compat_user_mode(regs))
+ set_compat_ssbs_bit(regs);
+ else if (user_mode(regs))
+ set_ssbs_bit(regs);
+}
+
/*
* We store our current task in sp_el0, which is clobbered by userspace. Keep a
* shadow copy so that we can restore this upon entry from userspace.
entry_task_switch(next);
uao_thread_switch(next);
ptrauth_thread_switch(next);
+ ssbs_thread_switch(next);
/*
* Complete any pending TLB or cache maintenance on this CPU in case
if (!stack_page)
return 0;
- frame.fp = thread_saved_fp(p);
- frame.pc = thread_saved_pc(p);
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- frame.graph = 0;
-#endif
+ start_backtrace(&frame, thread_saved_fp(p), thread_saved_pc(p));
+
do {
if (unwind_frame(p, &frame))
goto out;
data.level = level + 2;
data.addr = NULL;
- frame.fp = (unsigned long)__builtin_frame_address(0);
- frame.pc = (unsigned long)return_address; /* dummy */
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- frame.graph = 0;
-#endif
-
+ start_backtrace(&frame,
+ (unsigned long)__builtin_frame_address(0),
+ (unsigned long)return_address);
walk_stackframe(current, &frame, save_return_addr, &data);
if (!data.level)
* ldp x29, x30, [sp]
* add sp, sp, #0x10
*/
+
+/*
+ * Unwind from one frame record (A) to the next frame record (B).
+ *
+ * We terminate early if the location of B indicates a malformed chain of frame
+ * records (e.g. a cycle), determined based on the location and fp value of A
+ * and the location (but not the fp value) of B.
+ */
int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
{
unsigned long fp = frame->fp;
+ struct stack_info info;
if (fp & 0xf)
return -EINVAL;
if (!tsk)
tsk = current;
- if (!on_accessible_stack(tsk, fp, NULL))
+ if (!on_accessible_stack(tsk, fp, &info))
+ return -EINVAL;
+
+ if (test_bit(info.type, frame->stacks_done))
return -EINVAL;
+ /*
+ * As stacks grow downward, any valid record on the same stack must be
+ * at a strictly higher address than the prior record.
+ *
+ * Stacks can nest in several valid orders, e.g.
+ *
+ * TASK -> IRQ -> OVERFLOW -> SDEI_NORMAL
+ * TASK -> SDEI_NORMAL -> SDEI_CRITICAL -> OVERFLOW
+ *
+ * ... but the nesting itself is strict. Once we transition from one
+ * stack to another, it's never valid to unwind back to that first
+ * stack.
+ */
+ if (info.type == frame->prev_type) {
+ if (fp <= frame->prev_fp)
+ return -EINVAL;
+ } else {
+ set_bit(frame->prev_type, frame->stacks_done);
+ }
+
+ /*
+ * Record this frame record's values and location. The prev_fp and
+ * prev_type are only meaningful to the next unwind_frame() invocation.
+ */
frame->fp = READ_ONCE_NOCHECK(*(unsigned long *)(fp));
frame->pc = READ_ONCE_NOCHECK(*(unsigned long *)(fp + 8));
+ frame->prev_fp = fp;
+ frame->prev_type = info.type;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
if (tsk->ret_stack &&
data.skip = trace->skip;
data.no_sched_functions = 0;
- frame.fp = regs->regs[29];
- frame.pc = regs->pc;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- frame.graph = 0;
-#endif
-
+ start_backtrace(&frame, regs->regs[29], regs->pc);
walk_stackframe(current, &frame, save_trace, &data);
}
EXPORT_SYMBOL_GPL(save_stack_trace_regs);
data.no_sched_functions = nosched;
if (tsk != current) {
- frame.fp = thread_saved_fp(tsk);
- frame.pc = thread_saved_pc(tsk);
+ start_backtrace(&frame, thread_saved_fp(tsk),
+ thread_saved_pc(tsk));
} else {
/* We don't want this function nor the caller */
data.skip += 2;
- frame.fp = (unsigned long)__builtin_frame_address(0);
- frame.pc = (unsigned long)__save_stack_trace;
+ start_backtrace(&frame,
+ (unsigned long)__builtin_frame_address(0),
+ (unsigned long)__save_stack_trace);
}
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- frame.graph = 0;
-#endif
walk_stackframe(tsk, &frame, save_trace, &data);
if (!in_lock_functions(regs->pc))
return regs->pc;
- frame.fp = regs->regs[29];
- frame.pc = regs->pc;
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- frame.graph = 0;
-#endif
+ start_backtrace(&frame, regs->regs[29], regs->pc);
+
do {
int ret = unwind_frame(NULL, &frame);
if (ret < 0)
return;
if (tsk == current) {
- frame.fp = (unsigned long)__builtin_frame_address(0);
- frame.pc = (unsigned long)dump_backtrace;
+ start_backtrace(&frame,
+ (unsigned long)__builtin_frame_address(0),
+ (unsigned long)dump_backtrace);
} else {
/*
* task blocked in __switch_to
*/
- frame.fp = thread_saved_fp(tsk);
- frame.pc = thread_saved_pc(tsk);
+ start_backtrace(&frame,
+ thread_saved_fp(tsk),
+ thread_saved_pc(tsk));
}
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- frame.graph = 0;
-#endif
printk("Call trace:\n");
do {
OBJECT_FILES_NON_STANDARD := y
KCOV_INSTRUMENT := n
-ifeq ($(c-gettimeofday-y),)
CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny
-else
-CFLAGS_vgettimeofday.o = -O2 -mcmodel=tiny -include $(c-gettimeofday-y)
+
+ifneq ($(c-gettimeofday-y),)
+ CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
endif
# Clang versions less than 8 do not support -mcmodel=tiny
# Link rule for the .so file, .lds has to be first
$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
- $(call if_changed,ld)
- $(call if_changed,vdso_check)
+ $(call if_changed,vdsold_and_vdso_check)
# Strip rule for the .so file
$(obj)/%.so: OBJCOPYFLAGS := -S
$(call if_changed,vdsosym)
# Actual build commands
-quiet_cmd_vdsocc = VDSOCC $@
- cmd_vdsocc = $(CC) $(a_flags) $(c_flags) -c -o $@ $<
+quiet_cmd_vdsold_and_vdso_check = LD $@
+ cmd_vdsold_and_vdso_check = $(cmd_ld); $(cmd_vdso_check)
# Install commands for the unstripped file
quiet_cmd_vdso_install = INSTALL $@
# Link rule for the .so file, .lds has to be first
$(obj)/vdso.so.raw: $(src)/vdso.lds $(obj-vdso) FORCE
- $(call if_changed,vdsold)
- $(call if_changed,vdso_check)
+ $(call if_changed,vdsold_and_vdso_check)
# Compilation rules for the vDSO sources
$(c-obj-vdso): %.o: %.c FORCE
$(call if_changed_dep,vdsoas)
# Actual build commands
-quiet_cmd_vdsold = VDSOL $@
+quiet_cmd_vdsold_and_vdso_check = LD32 $@
+ cmd_vdsold_and_vdso_check = $(cmd_vdsold); $(cmd_vdso_check)
+
+quiet_cmd_vdsold = LD32 $@
cmd_vdsold = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_LDFLAGS) \
-Wl,-T $(filter %.lds,$^) $(filter %.o,$^) -o $@
-quiet_cmd_vdsocc = VDSOC $@
+quiet_cmd_vdsocc = CC32 $@
cmd_vdsocc = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) -c -o $@ $<
-quiet_cmd_vdsocc_gettimeofday = VDSOC_GTD $@
+quiet_cmd_vdsocc_gettimeofday = CC32 $@
cmd_vdsocc_gettimeofday = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_CFLAGS) $(VDSO_CFLAGS_gettimeofday_o) -c -o $@ $<
-quiet_cmd_vdsoas = VDSOA $@
+quiet_cmd_vdsoas = AS32 $@
cmd_vdsoas = $(COMPATCC) -Wp,-MD,$(depfile) $(VDSO_AFLAGS) -c -o $@ $<
quiet_cmd_vdsomunge = MUNGE $@
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_BYTEORDER_H
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef __ASM_CSKY_CACHECTL_H
#define __ASM_CSKY_CACHECTL_H
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef _ASM_CSKY_PERF_REGS_H
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef _CSKY_PTRACE_H
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#ifndef __ASM_CSKY_SIGCONTEXT_H
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
#define __ARCH_WANT_SYS_CLONE
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __ASM_AUXVEC_H
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __NDS32_BYTEORDER_H__
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
// Copyright (C) 1994, 1995, 1996 by Ralf Baechle
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef _ASM_CACHECTL
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/* Copyright (C) 2005-2019 Andes Technology Corporation */
#ifndef _FP_UDF_IEX_CRTL_H
#define _FP_UDF_IEX_CRTL_H
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __ASM_NDS32_PARAM_H
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef __UAPI_ASM_NDS32_PTRACE_H
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
// Copyright (C) 2005-2017 Andes Technology Corporation
#ifndef _ASMNDS32_SIGCONTEXT_H
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
// Copyright (C) 2005-2017 Andes Technology Corporation
#define __ARCH_WANT_STAT64
int __kprobes parisc_kprobe_break_handler(struct pt_regs *regs);
int __kprobes parisc_kprobe_ss_handler(struct pt_regs *regs);
+static inline int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
+{
+ return 0;
+}
#endif /* CONFIG_KPROBES */
#endif /* _PARISC_KPROBES_H */
/* Flush Instruction Tlb */
- LDREG ITLB_SID_BASE(%r1), %r20
+88: LDREG ITLB_SID_BASE(%r1), %r20
LDREG ITLB_SID_STRIDE(%r1), %r21
LDREG ITLB_SID_COUNT(%r1), %r22
LDREG ITLB_OFF_BASE(%r1), %arg0
add %r21, %r20, %r20 /* increment space */
fitdone:
+ ALTERNATIVE(88b, fitdone, ALT_COND_NO_SPLIT_TLB, INSN_NOP)
/* Flush Data Tlb */
select ARCH_32BIT_OFF_T if PPC32
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEVMEM_IS_ALLOWED
+ select ARCH_HAS_DMA_MMAP_PGPROT
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
#define H_SCM_UNBIND_MEM 0x3F0
#define H_SCM_QUERY_BLOCK_MEM_BINDING 0x3F4
#define H_SCM_QUERY_LOGICAL_MEM_BINDING 0x3F8
-#define H_SCM_MEM_QUERY 0x3FC
-#define H_SCM_BLOCK_CLEAR 0x400
-#define MAX_HCALL_OPCODE H_SCM_BLOCK_CLEAR
+#define H_SCM_UNBIND_ALL 0x3FC
+#define H_SCM_HEALTH 0x400
+#define H_SCM_PERFORMANCE_STATS 0x418
+#define MAX_HCALL_OPCODE H_SCM_PERFORMANCE_STATS
+
+/* Scope args for H_SCM_UNBIND_ALL */
+#define H_UNBIND_SCOPE_ALL (0x1)
+#define H_UNBIND_SCOPE_DRC (0x2)
/* H_VIOCTL functions */
#define H_GET_VIOA_DUMP_SIZE 0x01
};
/*
- * We use the top bit of each memslot->arch.rmap entry as a lock bit,
- * and bit 32 as a present flag. The bottom 32 bits are the
- * index in the guest HPT of a HPTE that points to the page.
+ * The rmap array of size number of guest pages is allocated for each memslot.
+ * This array is used to store usage specific information about the guest page.
+ * Below are the encodings of the various possible usage types.
*/
-#define KVMPPC_RMAP_LOCK_BIT 63
+/* Free bits which can be used to define a new usage */
+#define KVMPPC_RMAP_TYPE_MASK 0xff00000000000000
+#define KVMPPC_RMAP_NESTED 0xc000000000000000 /* Nested rmap array */
+#define KVMPPC_RMAP_HPT 0x0100000000000000 /* HPT guest */
+
+/*
+ * rmap usage definition for a hash page table (hpt) guest:
+ * 0x0000080000000000 Lock bit
+ * 0x0000018000000000 RC bits
+ * 0x0000000100000000 Present bit
+ * 0x00000000ffffffff HPT index bits
+ * The bottom 32 bits are the index in the guest HPT of a HPTE that points to
+ * the page.
+ */
+#define KVMPPC_RMAP_LOCK_BIT 43
#define KVMPPC_RMAP_RC_SHIFT 32
#define KVMPPC_RMAP_REFERENCED (HPTE_R_R << KVMPPC_RMAP_RC_SHIFT)
#define KVMPPC_RMAP_PRESENT 0x100000000ul
union kvmppc_one_reg *val);
extern int kvmppc_xive_native_set_vp(struct kvm_vcpu *vcpu,
union kvmppc_one_reg *val);
+extern bool kvmppc_xive_native_supported(void);
#else
static inline int kvmppc_xive_set_xive(struct kvm *kvm, u32 irq, u32 server,
#ifdef CONFIG_PPC_PSERIES
get_lppaca()->pmcregs_in_use = inuse;
#endif
- } else {
+ }
#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
- get_paca()->pmcregs_in_use = inuse;
+ get_paca()->pmcregs_in_use = inuse;
#endif
- }
#endif
}
/* Setup/used by frontend */
int target;
+ /*
+ * saved_p means that there is a queue entry for this interrupt
+ * in some CPU's queue (not including guest vcpu queues), even
+ * if P is not set in the source ESB.
+ * stale_p means that there is no queue entry for this interrupt
+ * in some CPU's queue, even if P is set in the source ESB.
+ */
bool saved_p;
+ bool stale_p;
};
#define XIVE_IRQ_FLAG_STORE_EOI 0x01
#define XIVE_IRQ_FLAG_LSI 0x02
extern int xive_native_set_queue_state(u32 vp_id, uint32_t prio, u32 qtoggle,
u32 qindex);
extern int xive_native_get_vp_state(u32 vp_id, u64 *out_state);
+extern bool xive_native_has_queue_state_support(void);
#else
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
#define _UAPI__ASM_BPF_PERF_EVENT_H__
signal.o sysfs.o cacheinfo.o time.o \
prom.o traps.o setup-common.o \
udbg.o misc.o io.o misc_$(BITS).o \
- of_platform.o prom_parse.o
+ of_platform.o prom_parse.o \
+ dma-common.o
obj-$(CONFIG_PPC64) += setup_64.o sys_ppc32.o \
signal_64.o ptrace32.o \
paca.o nvram_64.o firmware.o
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Contains common dma routines for all powerpc platforms.
+ *
+ * Copyright (C) 2019 Shawn Anastasio.
+ */
+
+#include <linux/mm.h>
+#include <linux/dma-noncoherent.h>
+
+pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot,
+ unsigned long attrs)
+{
+ if (!dev_is_dma_coherent(dev))
+ return pgprot_noncached(prot);
+ return prot;
+}
*
* Call convention:
*
- * syscall register convention is in Documentation/powerpc/syscall64-abi.txt
+ * syscall register convention is in Documentation/powerpc/syscall64-abi.rst
*
* For hypercalls, the register convention is as follows:
* r0 volatile
goto bad;
if (MSR_TM_ACTIVE(msr_hi<<32)) {
+ /* Trying to start TM on non TM system */
+ if (!cpu_has_feature(CPU_FTR_TM))
+ goto bad;
/* We only recheckpoint on return if we're
* transaction.
*/
if (MSR_TM_ACTIVE(msr)) {
/* We recheckpoint on return. */
struct ucontext __user *uc_transact;
+
+ /* Trying to start TM on non TM system */
+ if (!cpu_has_feature(CPU_FTR_TM))
+ goto badframe;
+
if (__get_user(uc_transact, &uc->uc_link))
goto badframe;
if (restore_tm_sigcontexts(current, &uc->uc_mcontext,
if (xics_on_xive()) {
kvmppc_xive_init_module();
kvm_register_device_ops(&kvm_xive_ops, KVM_DEV_TYPE_XICS);
- kvmppc_xive_native_init_module();
- kvm_register_device_ops(&kvm_xive_native_ops,
- KVM_DEV_TYPE_XIVE);
+ if (kvmppc_xive_native_supported()) {
+ kvmppc_xive_native_init_module();
+ kvm_register_device_ops(&kvm_xive_native_ops,
+ KVM_DEV_TYPE_XIVE);
+ }
} else
#endif
kvm_register_device_ops(&kvm_xics_ops, KVM_DEV_TYPE_XICS);
case 2:
case 6:
pte->may_write = true;
+ /* fall through */
case 3:
case 5:
case 7:
*val = get_reg_val(id, vcpu->arch.pspb);
break;
case KVM_REG_PPC_DPDES:
- *val = get_reg_val(id, vcpu->arch.vcore->dpdes);
+ /*
+ * On POWER9, where we are emulating msgsndp etc.,
+ * we return 1 bit for each vcpu, which can come from
+ * either vcore->dpdes or doorbell_request.
+ * On POWER8, doorbell_request is 0.
+ */
+ *val = get_reg_val(id, vcpu->arch.vcore->dpdes |
+ vcpu->arch.doorbell_request);
break;
case KVM_REG_PPC_VTB:
*val = get_reg_val(id, vcpu->arch.vcore->vtb);
if (!spin_trylock(&pvc->lock))
continue;
prepare_threads(pvc);
- if (!pvc->n_runnable) {
+ if (!pvc->n_runnable || !pvc->kvm->arch.mmu_ready) {
list_del_init(&pvc->preempt_list);
if (pvc->runner == NULL) {
pvc->vcore_state = VCORE_INACTIVE;
spin_unlock(&lp->lock);
}
-static bool recheck_signals(struct core_info *cip)
+static bool recheck_signals_and_mmu(struct core_info *cip)
{
int sub, i;
struct kvm_vcpu *vcpu;
+ struct kvmppc_vcore *vc;
- for (sub = 0; sub < cip->n_subcores; ++sub)
- for_each_runnable_thread(i, vcpu, cip->vc[sub])
+ for (sub = 0; sub < cip->n_subcores; ++sub) {
+ vc = cip->vc[sub];
+ if (!vc->kvm->arch.mmu_ready)
+ return true;
+ for_each_runnable_thread(i, vcpu, vc)
if (signal_pending(vcpu->arch.run_task))
return true;
+ }
return false;
}
local_irq_disable();
hard_irq_disable();
if (lazy_irq_pending() || need_resched() ||
- recheck_signals(&core_info) || !vc->kvm->arch.mmu_ready) {
+ recheck_signals_and_mmu(&core_info)) {
local_irq_enable();
vc->vcore_state = VCORE_INACTIVE;
/* Unlock all except the primary vcore */
mtspr(SPRN_DEC, vcpu->arch.dec_expires - mftb());
if (kvmhv_on_pseries()) {
+ /*
+ * We need to save and restore the guest visible part of the
+ * psscr (i.e. using SPRN_PSSCR_PR) since the hypervisor
+ * doesn't do this for us. Note only required if pseries since
+ * this is done in kvmhv_load_hv_regs_and_go() below otherwise.
+ */
+ unsigned long host_psscr;
/* call our hypervisor to load up HV regs and go */
struct hv_guest_state hvregs;
+ host_psscr = mfspr(SPRN_PSSCR_PR);
+ mtspr(SPRN_PSSCR_PR, vcpu->arch.psscr);
kvmhv_save_hv_regs(vcpu, &hvregs);
hvregs.lpcr = lpcr;
vcpu->arch.regs.msr = vcpu->arch.shregs.msr;
vcpu->arch.shregs.msr = vcpu->arch.regs.msr;
vcpu->arch.shregs.dar = mfspr(SPRN_DAR);
vcpu->arch.shregs.dsisr = mfspr(SPRN_DSISR);
+ vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
+ mtspr(SPRN_PSSCR_PR, host_psscr);
/* H_CEDE has to be handled now, not later */
if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
vcpu->arch.vpa.dirty = 1;
save_pmu = lp->pmcregs_in_use;
}
+ /* Must save pmu if this guest is capable of running nested guests */
+ save_pmu |= nesting_enabled(vcpu->kvm);
kvmhv_save_guest_pmu(vcpu, save_pmu);
} else {
rev->forw = rev->back = pte_index;
*rmap = (*rmap & ~KVMPPC_RMAP_INDEX) |
- pte_index | KVMPPC_RMAP_PRESENT;
+ pte_index | KVMPPC_RMAP_PRESENT | KVMPPC_RMAP_HPT;
}
unlock_rmap(rmap);
}
ld r11, VCPU_XIVE_SAVED_STATE(r4)
li r9, TM_QW1_OS
lwz r8, VCPU_XIVE_CAM_WORD(r4)
+ cmpwi r8, 0
+ beq no_xive
li r7, TM_QW1_OS + TM_WORD2
mfmsr r0
andi. r0, r0, MSR_DR /* in real mode? */
kvm_cede_exit:
ld r9, HSTATE_KVM_VCPU(r13)
#ifdef CONFIG_KVM_XICS
- /* Abort if we still have a pending escalation */
+ /* are we using XIVE with single escalation? */
+ ld r10, VCPU_XIVE_ESC_VADDR(r9)
+ cmpdi r10, 0
+ beq 3f
+ li r6, XIVE_ESB_SET_PQ_00
+ /*
+ * If we still have a pending escalation, abort the cede,
+ * and we must set PQ to 10 rather than 00 so that we don't
+ * potentially end up with two entries for the escalation
+ * interrupt in the XIVE interrupt queue. In that case
+ * we also don't want to set xive_esc_on to 1 here in
+ * case we race with xive_esc_irq().
+ */
lbz r5, VCPU_XIVE_ESC_ON(r9)
cmpwi r5, 0
- beq 1f
+ beq 4f
li r0, 0
stb r0, VCPU_CEDED(r9)
-1: /* Enable XIVE escalation */
- li r5, XIVE_ESB_SET_PQ_00
+ li r6, XIVE_ESB_SET_PQ_10
+ b 5f
+4: li r0, 1
+ stb r0, VCPU_XIVE_ESC_ON(r9)
+ /* make sure store to xive_esc_on is seen before xive_esc_irq runs */
+ sync
+5: /* Enable XIVE escalation */
mfmsr r0
andi. r0, r0, MSR_DR /* in real mode? */
beq 1f
- ld r10, VCPU_XIVE_ESC_VADDR(r9)
- cmpdi r10, 0
- beq 3f
- ldx r0, r10, r5
+ ldx r0, r10, r6
b 2f
1: ld r10, VCPU_XIVE_ESC_RADDR(r9)
- cmpdi r10, 0
- beq 3f
- ldcix r0, r10, r5
+ ldcix r0, r10, r6
2: sync
- li r0, 1
- stb r0, VCPU_XIVE_ESC_ON(r9)
#endif /* CONFIG_KVM_XICS */
3: b guest_exit_cont
void __iomem *tima = local_paca->kvm_hstate.xive_tima_virt;
u64 pq;
- if (!tima)
+ /*
+ * Nothing to do if the platform doesn't have a XIVE
+ * or this vCPU doesn't have its own XIVE context
+ * (e.g. because it's not using an in-kernel interrupt controller).
+ */
+ if (!tima || !vcpu->arch.xive_cam_word)
return;
+
eieio();
__raw_writeq(vcpu->arch.xive_saved_state.w01, tima + TM_QW1_OS);
__raw_writel(vcpu->arch.xive_cam_word, tima + TM_QW1_OS + TM_WORD2);
*/
vcpu->arch.xive_esc_on = false;
+ /* This orders xive_esc_on = false vs. subsequent stale_p = true */
+ smp_wmb(); /* goes with smp_mb() in cleanup_single_escalation */
+
return IRQ_HANDLED;
}
vcpu->arch.xive_esc_raddr = 0;
}
+/*
+ * In single escalation mode, the escalation interrupt is marked so
+ * that EOI doesn't re-enable it, but just sets the stale_p flag to
+ * indicate that the P bit has already been dealt with. However, the
+ * assembly code that enters the guest sets PQ to 00 without clearing
+ * stale_p (because it has no easy way to address it). Hence we have
+ * to adjust stale_p before shutting down the interrupt.
+ */
+void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
+ struct kvmppc_xive_vcpu *xc, int irq)
+{
+ struct irq_data *d = irq_get_irq_data(irq);
+ struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
+
+ /*
+ * This slightly odd sequence gives the right result
+ * (i.e. stale_p set if xive_esc_on is false) even if
+ * we race with xive_esc_irq() and xive_irq_eoi().
+ */
+ xd->stale_p = false;
+ smp_mb(); /* paired with smb_wmb in xive_esc_irq */
+ if (!vcpu->arch.xive_esc_on)
+ xd->stale_p = true;
+}
+
void kvmppc_xive_cleanup_vcpu(struct kvm_vcpu *vcpu)
{
struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu;
/* Mask the VP IPI */
xive_vm_esb_load(&xc->vp_ipi_data, XIVE_ESB_SET_PQ_01);
- /* Disable the VP */
- xive_native_disable_vp(xc->vp_id);
-
- /* Free the queues & associated interrupts */
+ /* Free escalations */
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
- struct xive_q *q = &xc->queues[i];
-
- /* Free the escalation irq */
if (xc->esc_virq[i]) {
+ if (xc->xive->single_escalation)
+ xive_cleanup_single_escalation(vcpu, xc,
+ xc->esc_virq[i]);
free_irq(xc->esc_virq[i], vcpu);
irq_dispose_mapping(xc->esc_virq[i]);
kfree(xc->esc_virq_names[i]);
}
- /* Free the queue */
+ }
+
+ /* Disable the VP */
+ xive_native_disable_vp(xc->vp_id);
+
+ /* Clear the cam word so guest entry won't try to push context */
+ vcpu->arch.xive_cam_word = 0;
+
+ /* Free the queues */
+ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
+ struct xive_q *q = &xc->queues[i];
+
xive_native_disable_queue(xc->vp_id, q, i);
if (q->qpage) {
free_pages((unsigned long)q->qpage,
xive->single_escalation = xive_native_has_single_escalation();
- if (ret) {
- kfree(xive);
+ if (ret)
return ret;
- }
return 0;
}
int kvmppc_xive_attach_escalation(struct kvm_vcpu *vcpu, u8 prio,
bool single_escalation);
struct kvmppc_xive *kvmppc_xive_get_device(struct kvm *kvm, u32 type);
+void xive_cleanup_single_escalation(struct kvm_vcpu *vcpu,
+ struct kvmppc_xive_vcpu *xc, int irq);
#endif /* CONFIG_KVM_XICS */
#endif /* _KVM_PPC_BOOK3S_XICS_H */
xc->valid = false;
kvmppc_xive_disable_vcpu_interrupts(vcpu);
- /* Disable the VP */
- xive_native_disable_vp(xc->vp_id);
-
- /* Free the queues & associated interrupts */
+ /* Free escalations */
for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
/* Free the escalation irq */
if (xc->esc_virq[i]) {
+ if (xc->xive->single_escalation)
+ xive_cleanup_single_escalation(vcpu, xc,
+ xc->esc_virq[i]);
free_irq(xc->esc_virq[i], vcpu);
irq_dispose_mapping(xc->esc_virq[i]);
kfree(xc->esc_virq_names[i]);
xc->esc_virq[i] = 0;
}
+ }
+
+ /* Disable the VP */
+ xive_native_disable_vp(xc->vp_id);
- /* Free the queue */
+ /* Clear the cam word so guest entry won't try to push context */
+ vcpu->arch.xive_cam_word = 0;
+
+ /* Free the queues */
+ for (i = 0; i < KVMPPC_XIVE_Q_COUNT; i++) {
kvmppc_xive_native_cleanup_queue(vcpu, i);
}
xive->ops = &kvmppc_xive_native_ops;
if (ret)
- kfree(xive);
+ return ret;
- return ret;
+ return 0;
}
/*
return 0;
}
+bool kvmppc_xive_native_supported(void)
+{
+ return xive_native_has_queue_state_support();
+}
+
static int xive_native_debug_show(struct seq_file *m, void *private)
{
struct kvmppc_xive *xive = m->private;
*/
if (inst == KVMPPC_INST_SW_BREAKPOINT) {
run->exit_reason = KVM_EXIT_DEBUG;
+ run->debug.arch.status = 0;
run->debug.arch.address = kvmppc_get_pc(vcpu);
emulated = EMULATE_EXIT_USER;
advance = 0;
rs = get_rs(inst);
rt = get_rt(inst);
- /*
- * if mmio_vsx_tx_sx_enabled == 0, copy data between
- * VSR[0..31] and memory
- * if mmio_vsx_tx_sx_enabled == 1, copy data between
- * VSR[32..63] and memory
- */
vcpu->arch.mmio_vsx_copy_nums = 0;
vcpu->arch.mmio_vsx_offset = 0;
vcpu->arch.mmio_copy_type = KVMPPC_VSX_COPY_NONE;
* a POWER9 processor) and the PowerNV platform, as
* nested is not yet supported.
*/
- r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE);
+ r = xive_enabled() && !!cpu_has_feature(CPU_FTR_HVMODE) &&
+ kvmppc_xive_native_supported();
break;
#endif
*
* For guests on platforms before POWER9, we clamp the it limit to 1G
* to avoid some funky things such as RTAS bugs etc...
+ *
+ * On POWER9 we limit to 1TB in case the host erroneously told us that
+ * the RMA was >1TB. Effective address bits 0:23 are treated as zero
+ * (meaning the access is aliased to zero i.e. addr = addr % 1TB)
+ * for virtual real mode addressing and so it doesn't make sense to
+ * have an area larger than 1TB as it can't be addressed.
*/
if (!early_cpu_has_feature(CPU_FTR_HVMODE)) {
ppc64_rma_size = first_memblock_size;
if (!early_cpu_has_feature(CPU_FTR_ARCH_300))
ppc64_rma_size = min_t(u64, ppc64_rma_size, 0x40000000);
+ else
+ ppc64_rma_size = min_t(u64, ppc64_rma_size,
+ 1UL << SID_SHIFT_1T);
/* Finally limit subsequent allocations */
memblock_set_current_limit(ppc64_rma_size);
#ifdef CONFIG_ZONE_DMA
max_zone_pfns[ZONE_DMA] = min(max_low_pfn,
- ((1UL << ARCH_ZONE_DMA_BITS) - 1) >> PAGE_SHIFT);
+ 1UL << (ARCH_ZONE_DMA_BITS - PAGE_SHIFT));
#endif
max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
#ifdef CONFIG_HIGHMEM
#include <linux/sched.h>
#include <linux/libnvdimm.h>
#include <linux/platform_device.h>
+#include <linux/delay.h>
#include <asm/plpar_wrappers.h>
static int drc_pmem_bind(struct papr_scm_priv *p)
{
unsigned long ret[PLPAR_HCALL_BUFSIZE];
- uint64_t rc, token;
uint64_t saved = 0;
+ uint64_t token;
+ int64_t rc;
/*
* When the hypervisor cannot map all the requested memory in a single
} while (rc == H_BUSY);
if (rc) {
+ /* H_OVERLAP needs a separate error path */
+ if (rc == H_OVERLAP)
+ return -EBUSY;
+
dev_err(&p->pdev->dev, "bind err: %lld\n", rc);
return -ENXIO;
}
static int drc_pmem_unbind(struct papr_scm_priv *p)
{
unsigned long ret[PLPAR_HCALL_BUFSIZE];
- uint64_t rc, token;
+ uint64_t token = 0;
+ int64_t rc;
- token = 0;
+ dev_dbg(&p->pdev->dev, "unbind drc %x\n", p->drc_index);
- /* NB: unbind has the same retry requirements mentioned above */
+ /* NB: unbind has the same retry requirements as drc_pmem_bind() */
do {
- rc = plpar_hcall(H_SCM_UNBIND_MEM, ret, p->drc_index,
- p->bound_addr, p->blocks, token);
+
+ /* Unbind of all SCM resources associated with drcIndex */
+ rc = plpar_hcall(H_SCM_UNBIND_ALL, ret, H_UNBIND_SCOPE_DRC,
+ p->drc_index, token);
token = ret[0];
- cond_resched();
+
+ /* Check if we are stalled for some time */
+ if (H_IS_LONG_BUSY(rc)) {
+ msleep(get_longbusy_msecs(rc));
+ rc = H_BUSY;
+ } else if (rc == H_BUSY) {
+ cond_resched();
+ }
+
} while (rc == H_BUSY);
if (rc)
dev_err(&p->pdev->dev, "unbind error: %lld\n", rc);
+ else
+ dev_dbg(&p->pdev->dev, "unbind drc %x complete\n",
+ p->drc_index);
- return !!rc;
+ return rc == H_SUCCESS ? 0 : -ENXIO;
}
static int papr_scm_meta_get(struct papr_scm_priv *p,
/* request the hypervisor to bind this region to somewhere in memory */
rc = drc_pmem_bind(p);
+
+ /* If phyp says drc memory still bound then force unbound and retry */
+ if (rc == -EBUSY) {
+ dev_warn(&pdev->dev, "Retrying bind after unbinding\n");
+ drc_pmem_unbind(p);
+ rc = drc_pmem_bind(p);
+ }
+
if (rc)
goto err;
static u32 xive_scan_interrupts(struct xive_cpu *xc, bool just_peek)
{
u32 irq = 0;
- u8 prio;
+ u8 prio = 0;
/* Find highest pending priority */
while (xc->pending_prio != 0) {
irq = xive_read_eq(&xc->queue[prio], just_peek);
/* Found something ? That's it */
- if (irq)
- break;
+ if (irq) {
+ if (just_peek || irq_to_desc(irq))
+ break;
+ /*
+ * We should never get here; if we do then we must
+ * have failed to synchronize the interrupt properly
+ * when shutting it down.
+ */
+ pr_crit("xive: got interrupt %d without descriptor, dropping\n",
+ irq);
+ WARN_ON(1);
+ continue;
+ }
/* Clear pending bits */
xc->pending_prio &= ~(1 << prio);
*/
static void xive_do_source_eoi(u32 hw_irq, struct xive_irq_data *xd)
{
+ xd->stale_p = false;
/* If the XIVE supports the new "store EOI facility, use it */
if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
xive_esb_write(xd, XIVE_ESB_STORE_EOI, 0);
}
}
-/* irq_chip eoi callback */
+/* irq_chip eoi callback, called with irq descriptor lock held */
static void xive_irq_eoi(struct irq_data *d)
{
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
if (!irqd_irq_disabled(d) && !irqd_is_forwarded_to_vcpu(d) &&
!(xd->flags & XIVE_IRQ_NO_EOI))
xive_do_source_eoi(irqd_to_hwirq(d), xd);
+ else
+ xd->stale_p = true;
/*
* Clear saved_p to indicate that it's no longer occupying
*/
if (mask) {
val = xive_esb_read(xd, XIVE_ESB_SET_PQ_01);
- xd->saved_p = !!(val & XIVE_ESB_VAL_P);
- } else if (xd->saved_p)
+ if (!xd->stale_p && !!(val & XIVE_ESB_VAL_P))
+ xd->saved_p = true;
+ xd->stale_p = false;
+ } else if (xd->saved_p) {
xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
- else
+ xd->saved_p = false;
+ } else {
xive_esb_read(xd, XIVE_ESB_SET_PQ_00);
+ xd->stale_p = false;
+ }
}
/*
* Now go through the entire mask until we find a valid
* target.
*/
- for (;;) {
+ do {
/*
* We re-check online as the fallback case passes us
* an untested affinity mask
if (cpu_online(cpu) && xive_try_pick_target(cpu))
return cpu;
cpu = cpumask_next(cpu, mask);
- if (cpu == first)
- break;
/* Wrap around */
if (cpu >= nr_cpu_ids)
cpu = cpumask_first(mask);
- }
+ } while (cpu != first);
+
return -1;
}
unsigned int hw_irq = (unsigned int)irqd_to_hwirq(d);
int target, rc;
+ xd->saved_p = false;
+ xd->stale_p = false;
pr_devel("xive_irq_startup: irq %d [0x%x] data @%p\n",
d->irq, hw_irq, d);
return 0;
}
+/* called with irq descriptor lock held */
static void xive_irq_shutdown(struct irq_data *d)
{
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
/* Mask the interrupt at the source */
xive_do_source_set_mask(xd, true);
- /*
- * The above may have set saved_p. We clear it otherwise it
- * will prevent re-enabling later on. It is ok to forget the
- * fact that the interrupt might be in a queue because we are
- * accounting that already in xive_dec_target_count() and will
- * be re-routing it to a new queue with proper accounting when
- * it's started up again
- */
- xd->saved_p = false;
-
/*
* Mask the interrupt in HW in the IVT/EAS and set the number
* to be the "bad" IRQ number
return 1;
}
+/*
+ * Caller holds the irq descriptor lock, so this won't be called
+ * concurrently with xive_get_irqchip_state on the same interrupt.
+ */
static int xive_irq_set_vcpu_affinity(struct irq_data *d, void *state)
{
struct xive_irq_data *xd = irq_data_get_irq_handler_data(d);
/* Set it to PQ=10 state to prevent further sends */
pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_10);
+ if (!xd->stale_p) {
+ xd->saved_p = !!(pq & XIVE_ESB_VAL_P);
+ xd->stale_p = !xd->saved_p;
+ }
/* No target ? nothing to do */
if (xd->target == XIVE_INVALID_TARGET) {
* An untargetted interrupt should have been
* also masked at the source
*/
- WARN_ON(pq & 2);
+ WARN_ON(xd->saved_p);
return 0;
}
* This saved_p is cleared by the host EOI, when we know
* for sure the queue slot is no longer in use.
*/
- if (pq & 2) {
- pq = xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
- xd->saved_p = true;
+ if (xd->saved_p) {
+ xive_esb_read(xd, XIVE_ESB_SET_PQ_11);
/*
* Sync the XIVE source HW to ensure the interrupt
*/
if (xive_ops->sync_source)
xive_ops->sync_source(hw_irq);
- } else
- xd->saved_p = false;
+ }
} else {
irqd_clr_forwarded_to_vcpu(d);
return 0;
}
+/* Called with irq descriptor lock held. */
+static int xive_get_irqchip_state(struct irq_data *data,
+ enum irqchip_irq_state which, bool *state)
+{
+ struct xive_irq_data *xd = irq_data_get_irq_handler_data(data);
+
+ switch (which) {
+ case IRQCHIP_STATE_ACTIVE:
+ *state = !xd->stale_p &&
+ (xd->saved_p ||
+ !!(xive_esb_read(xd, XIVE_ESB_GET) & XIVE_ESB_VAL_P));
+ return 0;
+ default:
+ return -EINVAL;
+ }
+}
+
static struct irq_chip xive_irq_chip = {
.name = "XIVE-IRQ",
.irq_startup = xive_irq_startup,
.irq_set_type = xive_irq_set_type,
.irq_retrigger = xive_irq_retrigger,
.irq_set_vcpu_affinity = xive_irq_set_vcpu_affinity,
+ .irq_get_irqchip_state = xive_get_irqchip_state,
};
bool is_xive_irq(struct irq_chip *chip)
raw_spin_lock(&desc->lock);
xd = irq_desc_get_handler_data(desc);
+ /*
+ * Clear saved_p to indicate that it's no longer pending
+ */
+ xd->saved_p = false;
+
/*
* For LSIs, we EOI, this will cause a resend if it's
* still asserted. Otherwise do an MSI retrigger.
}
EXPORT_SYMBOL_GPL(xive_native_set_queue_state);
+bool xive_native_has_queue_state_support(void)
+{
+ return opal_check_token(OPAL_XIVE_GET_QUEUE_STATE) &&
+ opal_check_token(OPAL_XIVE_SET_QUEUE_STATE);
+}
+EXPORT_SYMBOL_GPL(xive_native_has_queue_state_support);
+
int xive_native_get_vp_state(u32 vp_id, u64 *out_state)
{
__be64 state;
#size-cells = <0>;
status = "disabled";
};
+ eth0: ethernet@10090000 {
+ compatible = "sifive,fu540-c000-gem";
+ interrupt-parent = <&plic0>;
+ interrupts = <53>;
+ reg = <0x0 0x10090000 0x0 0x2000
+ 0x0 0x100a0000 0x0 0x1000>;
+ local-mac-address = [00 00 00 00 00 00];
+ clock-names = "pclk", "hclk";
+ clocks = <&prci PRCI_CLK_GEMGXLPLL>,
+ <&prci PRCI_CLK_GEMGXLPLL>;
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "disabled";
+ };
+
};
};
disable-wp;
};
};
+
+ð0 {
+ status = "okay";
+ phy-mode = "gmii";
+ phy-handle = <&phy0>;
+ phy0: ethernet-phy@0 {
+ reg = <0>;
+ };
+};
generic-y += local.h
generic-y += local64.h
generic-y += mm-arch-hooks.h
+generic-y += msi.h
generic-y += percpu.h
generic-y += preempt.h
generic-y += sections.h
-/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
/*
* Copyright (C) 2012 ARM Ltd.
* Copyright (C) 2015 Regents of the University of California
-/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
/*
* Copyright (C) 2012 ARM Ltd.
* Copyright (C) 2015 Regents of the University of California
-/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
/*
* Copyright (C) 2012 ARM Ltd.
* Copyright (C) 2015 Regents of the University of California
-/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
/*
* Copied from arch/arm64/include/asm/hwcap.h
*
-/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
/*
* Copyright (C) 2012 Regents of the University of California
*/
-/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
/*
* Copyright (C) 2012 Regents of the University of California
*/
-/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
/*
* Copyright (C) 2012 ARM Ltd.
* Copyright (C) 2017 SiFive, Inc.
#ifdef __LP64__
#define __ARCH_WANT_NEW_STAT
#define __ARCH_WANT_SET_GET_RLIMIT
+#define __ARCH_WANT_SYS_CLONE3
#endif /* __LP64__ */
#include <asm-generic/unistd.h>
obj-y := head.o als.o startup.o mem_detect.o ipl_parm.o ipl_report.o
obj-y += string.o ebcdic.o sclp_early_core.o mem.o ipl_vmparm.o cmdline.o
-obj-y += ctype.o text_dma.o
+obj-y += version.o ctype.o text_dma.o
obj-$(CONFIG_PROTECTED_VIRTUALIZATION_GUEST) += uv.o
obj-$(CONFIG_RELOCATABLE) += machine_kexec_reloc.o
obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
unsigned long get_random_base(unsigned long safe_addr);
extern int kaslr_enabled;
+extern const char kernel_version[];
unsigned long read_ipl_report(unsigned long safe_offset);
.quad 0 # INITRD_SIZE
.quad 0 # OLDMEM_BASE
.quad 0 # OLDMEM_SIZE
+ .quad kernel_version # points to kernel version string
.org COMMAND_LINE
.byte "root=/dev/ram0 ro"
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <generated/utsrelease.h>
+#include <generated/compile.h>
+#include "boot.h"
+
+const char kernel_version[] = UTS_RELEASE
+ " (" LINUX_COMPILE_BY "@" LINUX_COMPILE_HOST ") " UTS_VERSION;
return PTR_ERR(rc); \
} while(0)
-static int hpyfs_vm_create_guest(struct dentry *systems_dir,
+static int hypfs_vm_create_guest(struct dentry *systems_dir,
struct diag2fc_data *data)
{
char guest_name[NAME_LEN + 1] = {};
}
for (i = 0; i < count; i++) {
- rc = hpyfs_vm_create_guest(dir, &(data[i]));
+ rc = hypfs_vm_create_guest(dir, &(data[i]));
if (rc)
goto failed;
}
#include <linux/typecheck.h>
#include <linux/compiler.h>
+#include <linux/types.h>
#include <asm/atomic_ops.h>
#include <asm/barrier.h>
return ((unsigned char *)ptr) + ((nr ^ (BITS_PER_LONG - 8)) >> 3);
}
-static inline void set_bit(unsigned long nr, volatile unsigned long *ptr)
+static inline void arch_set_bit(unsigned long nr, volatile unsigned long *ptr)
{
unsigned long *addr = __bitops_word(nr, ptr);
unsigned long mask;
__atomic64_or(mask, (long *)addr);
}
-static inline void clear_bit(unsigned long nr, volatile unsigned long *ptr)
+static inline void arch_clear_bit(unsigned long nr, volatile unsigned long *ptr)
{
unsigned long *addr = __bitops_word(nr, ptr);
unsigned long mask;
__atomic64_and(mask, (long *)addr);
}
-static inline void change_bit(unsigned long nr, volatile unsigned long *ptr)
+static inline void arch_change_bit(unsigned long nr,
+ volatile unsigned long *ptr)
{
unsigned long *addr = __bitops_word(nr, ptr);
unsigned long mask;
__atomic64_xor(mask, (long *)addr);
}
-static inline int
-test_and_set_bit(unsigned long nr, volatile unsigned long *ptr)
+static inline bool arch_test_and_set_bit(unsigned long nr,
+ volatile unsigned long *ptr)
{
unsigned long *addr = __bitops_word(nr, ptr);
unsigned long old, mask;
return (old & mask) != 0;
}
-static inline int
-test_and_clear_bit(unsigned long nr, volatile unsigned long *ptr)
+static inline bool arch_test_and_clear_bit(unsigned long nr,
+ volatile unsigned long *ptr)
{
unsigned long *addr = __bitops_word(nr, ptr);
unsigned long old, mask;
return (old & ~mask) != 0;
}
-static inline int
-test_and_change_bit(unsigned long nr, volatile unsigned long *ptr)
+static inline bool arch_test_and_change_bit(unsigned long nr,
+ volatile unsigned long *ptr)
{
unsigned long *addr = __bitops_word(nr, ptr);
unsigned long old, mask;
return (old & mask) != 0;
}
-static inline void __set_bit(unsigned long nr, volatile unsigned long *ptr)
+static inline void arch___set_bit(unsigned long nr, volatile unsigned long *ptr)
{
unsigned char *addr = __bitops_byte(nr, ptr);
*addr |= 1 << (nr & 7);
}
-static inline void
-__clear_bit(unsigned long nr, volatile unsigned long *ptr)
+static inline void arch___clear_bit(unsigned long nr,
+ volatile unsigned long *ptr)
{
unsigned char *addr = __bitops_byte(nr, ptr);
*addr &= ~(1 << (nr & 7));
}
-static inline void __change_bit(unsigned long nr, volatile unsigned long *ptr)
+static inline void arch___change_bit(unsigned long nr,
+ volatile unsigned long *ptr)
{
unsigned char *addr = __bitops_byte(nr, ptr);
*addr ^= 1 << (nr & 7);
}
-static inline int
-__test_and_set_bit(unsigned long nr, volatile unsigned long *ptr)
+static inline bool arch___test_and_set_bit(unsigned long nr,
+ volatile unsigned long *ptr)
{
unsigned char *addr = __bitops_byte(nr, ptr);
unsigned char ch;
return (ch >> (nr & 7)) & 1;
}
-static inline int
-__test_and_clear_bit(unsigned long nr, volatile unsigned long *ptr)
+static inline bool arch___test_and_clear_bit(unsigned long nr,
+ volatile unsigned long *ptr)
{
unsigned char *addr = __bitops_byte(nr, ptr);
unsigned char ch;
return (ch >> (nr & 7)) & 1;
}
-static inline int
-__test_and_change_bit(unsigned long nr, volatile unsigned long *ptr)
+static inline bool arch___test_and_change_bit(unsigned long nr,
+ volatile unsigned long *ptr)
{
unsigned char *addr = __bitops_byte(nr, ptr);
unsigned char ch;
return (ch >> (nr & 7)) & 1;
}
-static inline int test_bit(unsigned long nr, const volatile unsigned long *ptr)
+static inline bool arch_test_bit(unsigned long nr,
+ const volatile unsigned long *ptr)
{
const volatile unsigned char *addr;
return (*addr >> (nr & 7)) & 1;
}
-static inline int test_and_set_bit_lock(unsigned long nr,
- volatile unsigned long *ptr)
+static inline bool arch_test_and_set_bit_lock(unsigned long nr,
+ volatile unsigned long *ptr)
{
- if (test_bit(nr, ptr))
+ if (arch_test_bit(nr, ptr))
return 1;
- return test_and_set_bit(nr, ptr);
+ return arch_test_and_set_bit(nr, ptr);
}
-static inline void clear_bit_unlock(unsigned long nr,
- volatile unsigned long *ptr)
+static inline void arch_clear_bit_unlock(unsigned long nr,
+ volatile unsigned long *ptr)
{
smp_mb__before_atomic();
- clear_bit(nr, ptr);
+ arch_clear_bit(nr, ptr);
}
-static inline void __clear_bit_unlock(unsigned long nr,
- volatile unsigned long *ptr)
+static inline void arch___clear_bit_unlock(unsigned long nr,
+ volatile unsigned long *ptr)
{
smp_mb();
- __clear_bit(nr, ptr);
+ arch___clear_bit(nr, ptr);
}
+#include <asm-generic/bitops-instrumented.h>
+
/*
* Functions which use MSB0 bit numbering.
* The bits are numbered:
return clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
}
-static inline int test_and_clear_bit_inv(unsigned long nr, volatile unsigned long *ptr)
+static inline bool test_and_clear_bit_inv(unsigned long nr,
+ volatile unsigned long *ptr)
{
return test_and_clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
}
return __clear_bit(nr ^ (BITS_PER_LONG - 1), ptr);
}
-static inline int test_bit_inv(unsigned long nr,
- const volatile unsigned long *ptr)
+static inline bool test_bit_inv(unsigned long nr,
+ const volatile unsigned long *ptr)
{
return test_bit(nr ^ (BITS_PER_LONG - 1), ptr);
}
#define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \
VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC)
+#define ARCH_ZONE_DMA_BITS 31
+
#include <asm-generic/memory_model.h>
#include <asm-generic/getorder.h>
#define INITRD_SIZE_OFFSET 0x10410
#define OLDMEM_BASE_OFFSET 0x10418
#define OLDMEM_SIZE_OFFSET 0x10420
+#define KERNEL_VERSION_OFFSET 0x10428
#define COMMAND_LINE_OFFSET 0x10480
#ifndef __ASSEMBLY__
unsigned long initrd_size; /* 0x10410 */
unsigned long oldmem_base; /* 0x10418 */
unsigned long oldmem_size; /* 0x10420 */
- char pad1[0x10480 - 0x10428]; /* 0x10428 - 0x10480 */
+ unsigned long kernel_version; /* 0x10428 */
+ char pad1[0x10480 - 0x10430]; /* 0x10430 - 0x10480 */
char command_line[ARCH_COMMAND_LINE_SIZE]; /* 0x10480 */
};
#define __ARCH_WANT_SYS_FORK
#define __ARCH_WANT_SYS_VFORK
#define __ARCH_WANT_SYS_CLONE
+#define __ARCH_WANT_SYS_CLONE3
#endif /* _ASM_S390_UNISTD_H_ */
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
#define _UAPI__ASM_BPF_PERF_EVENT_H__
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_S390_UAPI_IPL_H
#define _ASM_S390_UAPI_IPL_H
#include <linux/ioctl.h>
#include <linux/compiler.h>
+#include <linux/types.h>
/* Name of the zcrypt device driver. */
#define ZCRYPT_NAME "zcrypt"
* @payload_len: Payload length
*/
struct ep11_cprb {
- uint16_t cprb_len;
+ __u16 cprb_len;
unsigned char cprb_ver_id;
unsigned char pad_000[2];
unsigned char flags;
unsigned char func_id[2];
- uint32_t source_id;
- uint32_t target_id;
- uint32_t ret_code;
- uint32_t reserved1;
- uint32_t reserved2;
- uint32_t payload_len;
+ __u32 source_id;
+ __u32 target_id;
+ __u32 ret_code;
+ __u32 reserved1;
+ __u32 reserved2;
+ __u32 payload_len;
} __attribute__((packed));
/**
* @dom_id: Usage domain id
*/
struct ep11_target_dev {
- uint16_t ap_id;
- uint16_t dom_id;
+ __u16 ap_id;
+ __u16 dom_id;
};
/**
* @resp: Addr to response block
*/
struct ep11_urb {
- uint16_t targets_num;
- uint64_t targets;
- uint64_t weight;
- uint64_t req_no;
- uint64_t req_len;
- uint64_t req;
- uint64_t resp_len;
- uint64_t resp;
+ __u16 targets_num;
+ __u64 targets;
+ __u64 weight;
+ __u64 req_no;
+ __u64 req_len;
+ __u64 req;
+ __u64 resp_len;
+ __u64 resp;
} __attribute__((packed));
/**
432 common fsmount sys_fsmount sys_fsmount
433 common fspick sys_fspick sys_fspick
434 common pidfd_open sys_pidfd_open sys_pidfd_open
-# 435 reserved for clone3
+435 common clone3 sys_clone3 sys_clone3
#ifdef CONFIG_PGSTE
-static int page_table_allocate_pgste_min = 0;
-static int page_table_allocate_pgste_max = 1;
int page_table_allocate_pgste = 0;
EXPORT_SYMBOL(page_table_allocate_pgste);
.maxlen = sizeof(int),
.mode = S_IRUGO | S_IWUSR,
.proc_handler = proc_dointvec_minmax,
- .extra1 = &page_table_allocate_pgste_min,
- .extra2 = &page_table_allocate_pgste_max,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{ }
};
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#include <asm-generic/setup.h>
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#include <asm-generic/types.h>
-/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* SPDX-License-Identifier: GPL-2.0-or-later WITH Linux-syscall-note */
/*
* Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
*/
ENTRY(page_fault)
ASM_CLAC
- pushl $0; /* %gs's slot on the stack */
+ pushl $do_page_fault
+ jmp common_exception_read_cr2
+END(page_fault)
+common_exception_read_cr2:
+ /* the function address is in %gs's slot on the stack */
SAVE_ALL switch_stacks=1 skip_gs=1
ENCODE_FRAME_POINTER
/* fixup %gs */
GS_TO_REG %ecx
+ movl PT_GS(%esp), %edi
REG_TO_PTGS %ecx
SET_KERNEL_GS %ecx
TRACE_IRQS_OFF
movl %esp, %eax # pt_regs pointer
- call do_page_fault
+ CALL_NOSPEC %edi
jmp ret_from_exception
-END(page_fault)
+END(common_exception_read_cr2)
common_exception:
/* the function address is in %gs's slot on the stack */
ENTRY(async_page_fault)
ASM_CLAC
pushl $do_async_page_fault
- jmp common_exception
+ jmp common_exception_read_cr2
END(async_page_fault)
#endif
#include <asm/intel-family.h>
#include <asm/apic.h>
#include <asm/cpu_device_id.h>
-#include <asm/hypervisor.h>
#include "../perf_event.h"
};
static struct extra_reg intel_icl_extra_regs[] __read_mostly = {
- INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffff9fffull, RSP_0),
- INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffff9fffull, RSP_1),
+ INTEL_UEVENT_EXTRA_REG(0x01b7, MSR_OFFCORE_RSP_0, 0x3fffffbfffull, RSP_0),
+ INTEL_UEVENT_EXTRA_REG(0x01bb, MSR_OFFCORE_RSP_1, 0x3fffffbfffull, RSP_1),
INTEL_UEVENT_PEBS_LDLAT_EXTRA_REG(0x01cd),
INTEL_UEVENT_EXTRA_REG(0x01c6, MSR_PEBS_FRONTEND, 0x7fff17, FE),
EVENT_EXTRA_END
* Disable the check for real HW, so we don't
* mess with potentionaly enabled registers:
*/
- if (hypervisor_is_type(X86_HYPER_NATIVE))
+ if (!boot_cpu_has(X86_FEATURE_HYPERVISOR))
return true;
/*
case INTEL_FAM6_SKYLAKE_X:
pmem = true;
+ /* fall through */
case INTEL_FAM6_SKYLAKE_MOBILE:
case INTEL_FAM6_SKYLAKE_DESKTOP:
case INTEL_FAM6_KABYLAKE_MOBILE:
case INTEL_FAM6_ICELAKE_X:
case INTEL_FAM6_ICELAKE_XEON_D:
pmem = true;
+ /* fall through */
case INTEL_FAM6_ICELAKE_MOBILE:
case INTEL_FAM6_ICELAKE_DESKTOP:
x86_pmu.late_ack = true;
struct event_constraint intel_icl_pebs_event_constraints[] = {
INTEL_FLAGS_UEVENT_CONSTRAINT(0x1c0, 0x100000000ULL), /* INST_RETIRED.PREC_DIST */
- INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x400000000ULL), /* SLOTS */
+ INTEL_FLAGS_UEVENT_CONSTRAINT(0x0400, 0x800000000ULL), /* SLOTS */
INTEL_PLD_CONSTRAINT(0x1cd, 0xff), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
INTEL_FLAGS_UEVENT_CONSTRAINT_DATALA_LD(0x1d0, 0xf), /* MEM_INST_RETIRED.LOAD */
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_X86_BYTEORDER_H
#define _ASM_X86_BYTEORDER_H
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_X86_HWCAP2_H
#define _ASM_X86_HWCAP2_H
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_X86_SIGCONTEXT32_H
#define _ASM_X86_SIGCONTEXT32_H
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_X86_TYPES_H
#define _ASM_X86_TYPES_H
static ssize_t mds_show_state(char *buf)
{
- if (!hypervisor_is_type(X86_HYPER_NATIVE)) {
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
return sprintf(buf, "%s; SMT Host state unknown\n",
mds_strings[mds_mitigation]);
}
/* Set up %gs.
*
- * The base of %gs always points to the bottom of the irqstack
- * union. If the stack protector canary is enabled, it is
- * located at %gs:40. Note that, on SMP, the boot cpu uses
- * init data section till per cpu areas are set up.
+ * The base of %gs always points to fixed_percpu_data. If the
+ * stack protector canary is enabled, it is located at %gs:40.
+ * Note that, on SMP, the boot cpu uses init data section until
+ * the per cpu areas are set up.
*/
movl $MSR_GS_BASE,%ecx
movl initial_gs(%rip),%eax
if (!hpet_cfg_working())
goto out_nohpet;
- /* Validate that the counter is counting */
- if (!hpet_counting())
- goto out_nohpet;
-
/*
* Read the period and check for a sane value:
*/
}
hpet_print_config();
+ /*
+ * Validate that the counter is counting. This needs to be done
+ * after sanitizing the config registers to properly deal with
+ * force enabled HPETs.
+ */
+ if (!hpet_counting())
+ goto out_nohpet;
+
clocksource_register_hz(&clocksource_hpet, (u32)hpet_freq);
if (id & HPET_ID_LEGSUP) {
{
int ret;
- if (!access_ok(fp, sizeof(*frame)))
+ if (__range_not_ok(fp, sizeof(*frame), TASK_SIZE))
return 0;
ret = 1;
{},
};
+/*
+ * Some devices have a portrait LCD but advertise a landscape resolution (and
+ * pitch). We simply swap width and height for these devices so that we can
+ * correctly deal with some of them coming with multiple resolutions.
+ */
+static const struct dmi_system_id efifb_dmi_swap_width_height[] __initconst = {
+ {
+ /*
+ * Lenovo MIIX310-10ICR, only some batches have the troublesome
+ * 800x1280 portrait screen. Luckily the portrait version has
+ * its own BIOS version, so we match on that.
+ */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION, "MIIX 310-10ICR"),
+ DMI_EXACT_MATCH(DMI_BIOS_VERSION, "1HCN44WW"),
+ },
+ },
+ {
+ /* Lenovo MIIX 320-10ICR with 800x1280 portrait screen */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION,
+ "Lenovo MIIX 320-10ICR"),
+ },
+ },
+ {
+ /* Lenovo D330 with 800x1280 or 1200x1920 portrait screen */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_EXACT_MATCH(DMI_PRODUCT_VERSION,
+ "Lenovo ideapad D330-10IGM"),
+ },
+ },
+ {},
+};
+
__init void sysfb_apply_efi_quirks(void)
{
if (screen_info.orig_video_isVGA != VIDEO_TYPE_EFI ||
!(screen_info.capabilities & VIDEO_CAPABILITY_SKIP_QUIRKS))
dmi_check_system(efifb_dmi_system_table);
+
+ if (screen_info.orig_video_isVGA == VIDEO_TYPE_EFI &&
+ dmi_check_system(efifb_dmi_swap_width_height)) {
+ u16 temp = screen_info.lfb_width;
+
+ screen_info.lfb_width = screen_info.lfb_height;
+ screen_info.lfb_height = temp;
+ screen_info.lfb_linelength = 4 * screen_info.lfb_width;
+ }
}
pmd = pmd_offset(pud, address);
pmd_k = pmd_offset(pud_k, address);
- if (!pmd_present(*pmd_k))
- return NULL;
- if (!pmd_present(*pmd))
+ if (pmd_present(*pmd) != pmd_present(*pmd_k))
set_pmd(pmd, *pmd_k);
+
+ if (!pmd_present(*pmd_k))
+ return NULL;
else
- BUG_ON(pmd_page(*pmd) != pmd_page(*pmd_k));
+ BUG_ON(pmd_pfn(*pmd) != pmd_pfn(*pmd_k));
return pmd_k;
}
spin_lock(&pgd_lock);
list_for_each_entry(page, &pgd_list, lru) {
spinlock_t *pgt_lock;
- pmd_t *ret;
/* the pgt_lock only for Xen */
pgt_lock = &pgd_page_get_mm(page)->page_table_lock;
spin_lock(pgt_lock);
- ret = vmalloc_sync_one(page_address(page), address);
+ vmalloc_sync_one(page_address(page), address);
spin_unlock(pgt_lock);
-
- if (!ret)
- break;
}
spin_unlock(&pgd_lock);
}
* there is no active group, then the primary expectation for
* this device is probably a high throughput.
*
- * We are now left only with explaining the additional
- * compound condition that is checked below for deciding
- * whether the scenario is asymmetric. To explain this
- * compound condition, we need to add that the function
+ * We are now left only with explaining the two sub-conditions in the
+ * additional compound condition that is checked below for deciding
+ * whether the scenario is asymmetric. To explain the first
+ * sub-condition, we need to add that the function
* bfq_asymmetric_scenario checks the weights of only
- * non-weight-raised queues, for efficiency reasons (see
- * comments on bfq_weights_tree_add()). Then the fact that
- * bfqq is weight-raised is checked explicitly here. More
- * precisely, the compound condition below takes into account
- * also the fact that, even if bfqq is being weight-raised,
- * the scenario is still symmetric if all queues with requests
- * waiting for completion happen to be
- * weight-raised. Actually, we should be even more precise
- * here, and differentiate between interactive weight raising
- * and soft real-time weight raising.
+ * non-weight-raised queues, for efficiency reasons (see comments on
+ * bfq_weights_tree_add()). Then the fact that bfqq is weight-raised
+ * is checked explicitly here. More precisely, the compound condition
+ * below takes into account also the fact that, even if bfqq is being
+ * weight-raised, the scenario is still symmetric if all queues with
+ * requests waiting for completion happen to be
+ * weight-raised. Actually, we should be even more precise here, and
+ * differentiate between interactive weight raising and soft real-time
+ * weight raising.
+ *
+ * The second sub-condition checked in the compound condition is
+ * whether there is a fair amount of already in-flight I/O not
+ * belonging to bfqq. If so, I/O dispatching is to be plugged, for the
+ * following reason. The drive may decide to serve in-flight
+ * non-bfqq's I/O requests before bfqq's ones, thereby delaying the
+ * arrival of new I/O requests for bfqq (recall that bfqq is sync). If
+ * I/O-dispatching is not plugged, then, while bfqq remains empty, a
+ * basically uncontrolled amount of I/O from other queues may be
+ * dispatched too, possibly causing the service of bfqq's I/O to be
+ * delayed even longer in the drive. This problem gets more and more
+ * serious as the speed and the queue depth of the drive grow,
+ * because, as these two quantities grow, the probability to find no
+ * queue busy but many requests in flight grows too. By contrast,
+ * plugging I/O dispatching minimizes the delay induced by already
+ * in-flight I/O, and enables bfqq to recover the bandwidth it may
+ * lose because of this delay.
*
* As a side note, it is worth considering that the above
- * device-idling countermeasures may however fail in the
- * following unlucky scenario: if idling is (correctly)
- * disabled in a time period during which all symmetry
- * sub-conditions hold, and hence the device is allowed to
- * enqueue many requests, but at some later point in time some
- * sub-condition stops to hold, then it may become impossible
- * to let requests be served in the desired order until all
- * the requests already queued in the device have been served.
+ * device-idling countermeasures may however fail in the following
+ * unlucky scenario: if I/O-dispatch plugging is (correctly) disabled
+ * in a time period during which all symmetry sub-conditions hold, and
+ * therefore the device is allowed to enqueue many requests, but at
+ * some later point in time some sub-condition stops to hold, then it
+ * may become impossible to make requests be served in the desired
+ * order until all the requests already queued in the device have been
+ * served. The last sub-condition commented above somewhat mitigates
+ * this problem for weight-raised queues.
*/
static bool idling_needed_for_service_guarantees(struct bfq_data *bfqd,
struct bfq_queue *bfqq)
{
return (bfqq->wr_coeff > 1 &&
- bfqd->wr_busy_queues <
- bfq_tot_busy_queues(bfqd)) ||
+ (bfqd->wr_busy_queues <
+ bfq_tot_busy_queues(bfqd) ||
+ bfqd->rq_in_driver >=
+ bfqq->dispatched + 4)) ||
bfq_asymmetric_scenario(bfqd, bfqq);
}
static LIST_HEAD(all_blkcgs); /* protected by blkcg_pol_mutex */
-static bool blkcg_debug_stats = false;
+bool blkcg_debug_stats = false;
static struct workqueue_struct *blkcg_punt_bio_wq;
static bool blkcg_policy_enabled(struct request_queue *q,
dbytes, dios);
}
- if (!blkcg_debug_stats)
- goto next;
-
- if (atomic_read(&blkg->use_delay)) {
+ if (blkcg_debug_stats && atomic_read(&blkg->use_delay)) {
has_stats = true;
off += scnprintf(buf+off, size-off,
" use_delay=%d delay_nsec=%llu",
has_stats = true;
off += written;
}
-next:
+
if (has_stats) {
if (off < size - 1) {
off += scnprintf(buf+off, size-off, "\n");
unsigned long long avg_lat;
unsigned long long cur_win;
+ if (!blkcg_debug_stats)
+ return 0;
+
if (iolat->ssd)
return iolatency_ssd_stat(iolat, buf, size);
e->type->ops.completed_request(rq, now);
}
-static inline void blk_mq_sched_started_request(struct request *rq)
-{
- struct request_queue *q = rq->q;
- struct elevator_queue *e = q->elevator;
-
- if (e && e->type->ops.started_request)
- e->type->ops.started_request(rq);
-}
-
static inline void blk_mq_sched_requeue_request(struct request *rq)
{
struct request_queue *q = rq->q;
{
struct request_queue *q = rq->q;
- blk_mq_sched_started_request(rq);
-
trace_block_rq_issue(q, rq);
if (test_bit(QUEUE_FLAG_STATS, &q->queue_flags)) {
rq = blk_mq_get_request(q, bio, &data);
if (unlikely(!rq)) {
rq_qos_cleanup(q, bio);
- if (bio->bi_opf & REQ_NOWAIT)
+
+ cookie = BLK_QC_T_NONE;
+ if (bio->bi_opf & REQ_NOWAIT_INLINE)
+ cookie = BLK_QC_T_EAGAIN;
+ else if (bio->bi_opf & REQ_NOWAIT)
bio_wouldblock_error(bio);
- return BLK_QC_T_NONE;
+ return cookie;
}
trace_block_getrq(q, bio, bio->bi_opf);
return -1;
data->got_token = true;
+ smp_wmb();
list_del_init(&curr->entry);
wake_up_process(data->task);
return 1;
return;
prepare_to_wait_exclusive(&rqw->wait, &data.wq, TASK_UNINTERRUPTIBLE);
+ has_sleeper = !wq_has_single_sleeper(&rqw->wait);
do {
+ /* The memory barrier in set_task_state saves us here. */
if (data.got_token)
break;
if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) {
* which means we now have two. Put our local token
* and wake anyone else potentially waiting for one.
*/
+ smp_rmb();
if (data.got_token)
cleanup_cb(rqw, private_data);
break;
}
io_schedule();
- has_sleeper = false;
+ has_sleeper = true;
+ set_current_state(TASK_UNINTERRUPTIBLE);
} while (1);
finish_wait(&rqw->wait, &data.wq);
}
* page (which might not be idential to the Linux PAGE_SIZE). Because
* of that they are not limited by our notion of "segment size".
*/
- q->limits.max_segment_size = UINT_MAX;
+ if (mask)
+ q->limits.max_segment_size = UINT_MAX;
}
EXPORT_SYMBOL(blk_queue_virt_boundary);
* The default polling interval can be specified by the kernel
* parameter block.events_dfl_poll_msecs which defaults to 0
* (disable). This can also be modified runtime by writing to
- * /sys/module/block/events_dfl_poll_msecs.
+ * /sys/module/block/parameters/events_dfl_poll_msecs.
*/
static int disk_events_set_dfl_poll_msecs(const char *val,
const struct kernel_param *kp)
/* Move to ITS specific data */
its = (struct acpi_iort_its_group *)node->node_data;
- if (idx > its->its_count) {
- dev_err(dev, "requested ITS ID index [%d] is greater than available [%d]\n",
+ if (idx >= its->its_count) {
+ dev_err(dev, "requested ITS ID index [%d] overruns ITS entries [%d]\n",
idx, its->its_count);
return -ENXIO;
}
if (rc)
return rc;
- device_lock(dev);
+ nfit_device_lock(dev);
nd_desc = dev_get_drvdata(dev);
if (nd_desc) {
struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
break;
}
}
- device_unlock(dev);
+ nfit_device_unlock(dev);
if (rc)
return rc;
return size;
ssize_t rc = -ENXIO;
bool busy;
- device_lock(dev);
+ nfit_device_lock(dev);
nd_desc = dev_get_drvdata(dev);
if (!nd_desc) {
device_unlock(dev);
}
mutex_unlock(&acpi_desc->init_mutex);
- device_unlock(dev);
+ nfit_device_unlock(dev);
return rc;
}
if (val != 1)
return -EINVAL;
- device_lock(dev);
+ nfit_device_lock(dev);
nd_desc = dev_get_drvdata(dev);
if (nd_desc) {
struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
rc = acpi_nfit_ars_rescan(acpi_desc, ARS_REQ_LONG);
}
- device_unlock(dev);
+ nfit_device_unlock(dev);
if (rc)
return rc;
return size;
struct acpi_device *adev = data;
struct device *dev = &adev->dev;
- device_lock(dev->parent);
+ nfit_device_lock(dev->parent);
__acpi_nvdimm_notify(dev, event);
- device_unlock(dev->parent);
+ nfit_device_unlock(dev->parent);
}
static bool acpi_nvdimm_has_method(struct acpi_device *adev, char *method)
struct device *dev = acpi_desc->dev;
/* Bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
- device_lock(dev);
- device_unlock(dev);
+ nfit_device_lock(dev);
+ nfit_device_unlock(dev);
/* Bounce the init_mutex to complete initial registration */
mutex_lock(&acpi_desc->init_mutex);
* acpi_nfit_ars_rescan() submissions have had a chance to
* either submit or see ->cancel set.
*/
- device_lock(bus_dev);
- device_unlock(bus_dev);
+ nfit_device_lock(bus_dev);
+ nfit_device_unlock(bus_dev);
flush_workqueue(nfit_wq);
}
static void acpi_nfit_notify(struct acpi_device *adev, u32 event)
{
- device_lock(&adev->dev);
+ nfit_device_lock(&adev->dev);
__acpi_nfit_notify(&adev->dev, adev->handle, event);
- device_unlock(&adev->dev);
+ nfit_device_unlock(&adev->dev);
}
static const struct acpi_device_id acpi_nfit_ids[] = {
return container_of(nd_desc, struct acpi_nfit_desc, nd_desc);
}
+#ifdef CONFIG_PROVE_LOCKING
+static inline void nfit_device_lock(struct device *dev)
+{
+ device_lock(dev);
+ mutex_lock(&dev->lockdep_mutex);
+}
+
+static inline void nfit_device_unlock(struct device *dev)
+{
+ mutex_unlock(&dev->lockdep_mutex);
+ device_unlock(dev);
+}
+#else
+static inline void nfit_device_lock(struct device *dev)
+{
+ device_lock(dev);
+}
+
+static inline void nfit_device_unlock(struct device *dev)
+{
+ device_unlock(dev);
+}
+#endif
+
const guid_t *to_nfit_uuid(enum nfit_uuids id);
int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *nfit, acpi_size sz);
void acpi_nfit_shutdown(void *data);
else
return_error = BR_DEAD_REPLY;
mutex_unlock(&context->context_mgr_node_lock);
- if (target_node && target_proc == proc) {
+ if (target_node && target_proc->pid == proc->pid) {
binder_user_error("%d:%d got transaction to context manager from process owning it\n",
proc->pid, thread->pid);
return_error = BR_FAILED_REPLY;
buffer_offset = off_start_offset;
off_end_offset = off_start_offset + tr->offsets_size;
sg_buf_offset = ALIGN(off_end_offset, sizeof(void *));
- sg_buf_end_offset = sg_buf_offset + extra_buffers_size;
+ sg_buf_end_offset = sg_buf_offset + extra_buffers_size -
+ ALIGN(secctx_sz, sizeof(u64));
off_min = 0;
for (buffer_offset = off_start_offset; buffer_offset < off_end_offset;
buffer_offset += sizeof(binder_size_t)) {
hpriv->mmio = devm_ioremap_resource(dev,
platform_get_resource(pdev, IORESOURCE_MEM, 0));
if (IS_ERR(hpriv->mmio)) {
- dev_err(dev, "no mmio space\n");
rc = PTR_ERR(hpriv->mmio);
goto err_out;
}
kobject_init(&dev->kobj, &device_ktype);
INIT_LIST_HEAD(&dev->dma_pools);
mutex_init(&dev->mutex);
+#ifdef CONFIG_PROVE_LOCKING
+ mutex_init(&dev->lockdep_mutex);
+#endif
lockdep_set_novalidate_class(&dev->mutex);
spin_lock_init(&dev->devres_lock);
INIT_LIST_HEAD(&dev->devres_head);
}
EXPORT_SYMBOL_GPL(put_device);
+bool kill_device(struct device *dev)
+{
+ /*
+ * Require the device lock and set the "dead" flag to guarantee that
+ * the update behavior is consistent with the other bitfields near
+ * it and that we cannot have an asynchronous probe routine trying
+ * to run while we are tearing out the bus/class/sysfs from
+ * underneath the device.
+ */
+ lockdep_assert_held(&dev->mutex);
+
+ if (dev->p->dead)
+ return false;
+ dev->p->dead = true;
+ return true;
+}
+EXPORT_SYMBOL_GPL(kill_device);
+
/**
* device_del - delete device from system.
* @dev: device.
struct kobject *glue_dir = NULL;
struct class_interface *class_intf;
- /*
- * Hold the device lock and set the "dead" flag to guarantee that
- * the update behavior is consistent with the other bitfields near
- * it and that we cannot have an asynchronous probe routine trying
- * to run while we are tearing out the bus/class/sysfs from
- * underneath the device.
- */
device_lock(dev);
- dev->p->dead = true;
+ kill_device(dev);
device_unlock(dev);
/* Notify clients of device removal. This call must come
int fw_map_paged_buf(struct fw_priv *fw_priv);
#else
static inline void fw_free_paged_buf(struct fw_priv *fw_priv) {}
-int fw_grow_paged_buf(struct fw_priv *fw_priv, int pages_needed) { return -ENXIO; }
-int fw_map_paged_buf(struct fw_priv *fw_priv) { return -ENXIO; }
+static inline int fw_grow_paged_buf(struct fw_priv *fw_priv, int pages_needed) { return -ENXIO; }
+static inline int fw_map_paged_buf(struct fw_priv *fw_priv) { return -ENXIO; }
#endif
#endif /* __FIRMWARE_LOADER_H */
unsigned int key_len;
char secret[SHARED_SECRET_MAX]; /* 64 byte */
unsigned int resp_size;
- SHASH_DESC_ON_STACK(desc, connection->cram_hmac_tfm);
+ struct shash_desc *desc;
struct packet_info pi;
struct net_conf *nc;
int err, rv;
memcpy(secret, nc->shared_secret, key_len);
rcu_read_unlock();
+ desc = kmalloc(sizeof(struct shash_desc) +
+ crypto_shash_descsize(connection->cram_hmac_tfm),
+ GFP_KERNEL);
+ if (!desc) {
+ rv = -1;
+ goto fail;
+ }
desc->tfm = connection->cram_hmac_tfm;
rv = crypto_shash_setkey(connection->cram_hmac_tfm, (u8 *)secret, key_len);
kfree(peers_ch);
kfree(response);
kfree(right_response);
- shash_desc_zero(desc);
+ if (desc) {
+ shash_desc_zero(desc);
+ kfree(desc);
+ }
return rv;
}
unsigned long long m;
m = hpets->hp_tick_freq + (dis >> 1);
- do_div(m, dis);
- return (unsigned long)m;
+ return div64_ul(m, dis);
}
static int
return 0;
}
-static struct cn_dev cdev = {
- .input = cn_rx_skb,
-};
-
static int cn_init(void)
{
struct cn_dev *dev = &cdev;
struct netlink_kernel_cfg cfg = {
.groups = CN_NETLINK_USERS + 0xf,
- .input = dev->input,
+ .input = cn_rx_skb,
};
dev->nls = netlink_kernel_create(&init_net, NETLINK_CONNECTOR, &cfg);
int err = -ENODEV;
cpu = of_get_cpu_node(policy->cpu, NULL);
+ if (!cpu)
+ goto out;
+ max_freqp = of_get_property(cpu, "clock-frequency", NULL);
of_node_put(cpu);
- if (!cpu)
+ if (!max_freqp) {
+ err = -EINVAL;
goto out;
+ }
+
+ /* we need the freq in kHz */
+ max_freq = *max_freqp / 1000;
dn = of_find_compatible_node(NULL, NULL, "1682m-sdc");
if (!dn)
}
pr_debug("init cpufreq on CPU %d\n", policy->cpu);
-
- max_freqp = of_get_property(cpu, "clock-frequency", NULL);
- if (!max_freqp) {
- err = -EINVAL;
- goto out_unmap_sdcpwr;
- }
-
- /* we need the freq in kHz */
- max_freq = *max_freqp / 1000;
-
pr_debug("max clock-frequency is at %u kHz\n", max_freq);
pr_debug("initializing frequency table\n");
cpufreq_generic_init(policy, pas_freqs, get_gizmo_latency());
return 0;
-out_unmap_sdcpwr:
- iounmap(sdcpwr_mapbase);
-
out_unmap_sdcasr:
iounmap(sdcasr_mapbase);
out:
device->bc_implemented = BC_IMPLEMENTED;
break;
}
- /* else fall through to case address error */
+ /* else, fall through - to case address error */
case RCODE_ADDRESS_ERROR:
device->bc_implemented = BC_UNIMPLEMENTED;
}
if ((data[0] & bit) == (data[1] & bit))
continue;
- /* 1394-1995 IRM, fall through to retry. */
+ /* fall through - It's a 1394-1995 IRM, retry. */
default:
if (retry) {
retry--;
switch (port_type) {
case SELFID_PORT_CHILD:
(*child_port_count)++;
+ /* fall through */
case SELFID_PORT_PARENT:
case SELFID_PORT_NCONN:
(*total_port_count)++;
config ISCSI_IBFT_FIND
bool "iSCSI Boot Firmware Table Attributes"
- depends on X86 && ACPI
+ depends on X86 && ISCSI_IBFT
default n
help
This option enables the kernel to find the region of memory
config ISCSI_IBFT
tristate "iSCSI Boot Firmware Table Attributes module"
select ISCSI_BOOT_SYSFS
- depends on ISCSI_IBFT_FIND && SCSI && SCSI_LOWLEVEL
+ select ISCSI_IBFT_FIND if X86
+ depends on ACPI && SCSI && SCSI_LOWLEVEL
default n
help
This option enables support for detection and exposing of iSCSI
MODULE_LICENSE("GPL");
MODULE_VERSION(IBFT_ISCSI_VERSION);
+#ifndef CONFIG_ISCSI_IBFT_FIND
+struct acpi_table_ibft *ibft_addr;
+#endif
+
struct ibft_hdr {
u8 id;
u8 version;
config FPGA_MGR_ALTERA_PS_SPI
tristate "Altera FPGA Passive Serial over SPI"
depends on SPI
+ select BITREVERSE
help
FPGA manager driver support for Altera Arria/Cyclone/Stratix
using the passive serial interface over SPI.
.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
};
int amdgpu_ras_enable = -1;
-uint amdgpu_ras_mask = 0xffffffff;
+uint amdgpu_ras_mask = 0xfffffffb;
/**
* DOC: vramlimit (int)
return -EINVAL;
if (is_support_sw_smu(adev)) {
- err = smu_get_current_rpm(&adev->smu, &speed);
+ err = smu_get_fan_speed_rpm(&adev->smu, &speed);
if (err)
return err;
} else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) {
return -EINVAL;
if (is_support_sw_smu(adev)) {
- err = smu_get_current_rpm(&adev->smu, &rpm);
+ err = smu_get_fan_speed_rpm(&adev->smu, &rpm);
if (err)
return err;
} else if (adev->powerplay.pp_funcs->get_fan_speed_rpm) {
static int amdgpu_ras_release_vram(struct amdgpu_device *adev,
struct amdgpu_bo **bo_ptr);
-static void amdgpu_ras_self_test(struct amdgpu_device *adev)
-{
- /* TODO */
-}
-
static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,
size_t size, loff_t *pos)
{
if (!obj)
return -EINVAL;
+ if (block_info.block_id != TA_RAS_BLOCK__UMC) {
+ DRM_INFO("%s error injection is not supported yet\n",
+ ras_block_str(info->head.block));
+ return -EINVAL;
+ }
+
ret = psp_ras_trigger_error(&adev->psp, &block_info);
if (ret)
DRM_ERROR("RAS ERROR: inject %s error failed ret %d\n",
amdgpu_ras_check_supported(adev, &con->hw_supported,
&con->supported);
+ if (!con->hw_supported) {
+ amdgpu_ras_set_context(adev, NULL);
+ kfree(con);
+ return 0;
+ }
+
con->features = 0;
INIT_LIST_HEAD(&con->head);
/* Might need get this flag from vbios. */
if (amdgpu_ras_fs_init(adev))
goto fs_out;
- amdgpu_ras_self_test(adev);
-
DRM_INFO("RAS INFO: ras initialized successfully, "
"hardware ability[%x] ras_mask[%x]\n",
con->hw_supported, con->supported);
}
nv_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
+
+ /* Initialize all compute VMIDs to have no GDS, GWS, or OA
+ acccess. These should be enabled by FW for target VMIDs. */
+ for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
+ }
}
static void gfx_v10_0_tcp_harvest(struct amdgpu_device *adev)
cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
TIME_STAMP_INT_ENABLE, 0);
WREG32(cp_int_cntl_reg, cp_int_cntl);
+ break;
case AMDGPU_IRQ_STATE_ENABLE:
cp_int_cntl = RREG32(cp_int_cntl_reg);
cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
}
cik_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
+
+ /* Initialize all compute VMIDs to have no GDS, GWS, or OA
+ acccess. These should be enabled by FW for target VMIDs. */
+ for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
+ WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
+ WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
+ WREG32(amdgpu_gds_reg_offset[i].gws, 0);
+ WREG32(amdgpu_gds_reg_offset[i].oa, 0);
+ }
}
static void gfx_v7_0_config_init(struct amdgpu_device *adev)
}
vi_srbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
+
+ /* Initialize all compute VMIDs to have no GDS, GWS, or OA
+ acccess. These should be enabled by FW for target VMIDs. */
+ for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
+ WREG32(amdgpu_gds_reg_offset[i].mem_base, 0);
+ WREG32(amdgpu_gds_reg_offset[i].mem_size, 0);
+ WREG32(amdgpu_gds_reg_offset[i].gws, 0);
+ WREG32(amdgpu_gds_reg_offset[i].oa, 0);
+ }
}
static void gfx_v8_0_config_init(struct amdgpu_device *adev)
}
soc15_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
+
+ /* Initialize all compute VMIDs to have no GDS, GWS, or OA
+ acccess. These should be enabled by FW for target VMIDs. */
+ for (i = FIRST_COMPUTE_VMID; i < LAST_COMPUTE_VMID; i++) {
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_BASE, 2 * i, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_VMID0_SIZE, 2 * i, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_GWS_VMID0, i, 0);
+ WREG32_SOC15_OFFSET(GC, 0, mmGDS_OA_VMID0, i, 0);
+ }
}
static void gfx_v9_0_constants_init(struct amdgpu_device *adev)
WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
upper_32_bits(adev->vcn.gpu_addr));
offset = size;
- /* No signed header for now from firmware
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
- */
- WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0);
}
WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size);
case CHIP_RAVEN:
pcache_info = raven_cache_info;
num_of_cache_types = ARRAY_SIZE(raven_cache_info);
+ break;
case CHIP_NAVI10:
pcache_info = navi10_cache_info;
num_of_cache_types = ARRAY_SIZE(navi10_cache_info);
switch (type) {
case KFD_MQD_TYPE_CP:
- pr_debug("%s@%i\n", __func__, __LINE__);
case KFD_MQD_TYPE_COMPUTE:
pr_debug("%s@%i\n", __func__, __LINE__);
mqd->allocate_mqd = allocate_mqd;
struct dc_context *ctx,
struct clk_mgr_internal *clk_mgr)
{
+ dce_clk_mgr_construct(ctx, clk_mgr);
+
memcpy(clk_mgr->max_clks_by_state,
dce110_max_clks_by_state,
sizeof(dce110_max_clks_by_state));
- dce_clk_mgr_construct(ctx, clk_mgr);
-
clk_mgr->regs = &disp_clk_regs;
clk_mgr->clk_mgr_shift = &disp_clk_shift;
clk_mgr->clk_mgr_mask = &disp_clk_mask;
struct dc_context *ctx,
struct clk_mgr_internal *clk_mgr)
{
+ dce_clk_mgr_construct(ctx, clk_mgr);
+
memcpy(clk_mgr->max_clks_by_state,
dce112_max_clks_by_state,
sizeof(dce112_max_clks_by_state));
- dce_clk_mgr_construct(ctx, clk_mgr);
-
clk_mgr->regs = &disp_clk_regs;
clk_mgr->clk_mgr_shift = &disp_clk_shift;
clk_mgr->clk_mgr_mask = &disp_clk_mask;
void dce120_clk_mgr_construct(struct dc_context *ctx, struct clk_mgr_internal *clk_mgr)
{
+ dce_clk_mgr_construct(ctx, clk_mgr);
+
memcpy(clk_mgr->max_clks_by_state,
dce120_max_clks_by_state,
sizeof(dce120_max_clks_by_state));
- dce_clk_mgr_construct(ctx, clk_mgr);
-
clk_mgr->base.dprefclk_khz = 600000;
clk_mgr->base.funcs = &dce120_funcs;
}
void dcn2_init_clocks(struct clk_mgr *clk_mgr)
{
memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
+ // Assumption is that boot state always supports pstate
+ clk_mgr->clks.p_state_change_support = true;
}
void dcn2_enable_pme_wa(struct clk_mgr *clk_mgr_base)
struct dccg *dccg)
{
clk_mgr->base.ctx = ctx;
+ clk_mgr->pp_smu = pp_smu;
clk_mgr->base.funcs = &dcn2_funcs;
clk_mgr->regs = &clk_mgr_regs;
clk_mgr->clk_mgr_shift = &clk_mgr_shift;
static void destruct(struct dc *dc)
{
- dc_release_state(dc->current_state);
- dc->current_state = NULL;
+ if (dc->current_state) {
+ dc_release_state(dc->current_state);
+ dc->current_state = NULL;
+ }
destroy_links(dc);
uint32_t read_dpcd_retry_cnt = 10;
enum dc_status status = DC_ERROR_UNEXPECTED;
int i;
+ union max_down_spread max_down_spread = { {0} };
// Read DPCD 00101h to find out the number of lanes currently set
for (i = 0; i < read_dpcd_retry_cnt; i++) {
msleep(8);
}
- ASSERT(status == DC_OK);
-
// Read DPCD 00100h to find if standard link rates are set
core_link_read_dpcd(link, DP_LINK_BW_SET,
&link_bw_set, sizeof(link_bw_set));
link->cur_link_settings.link_rate = link_bw_set;
link->cur_link_settings.use_link_rate_set = false;
}
+ // Read DPCD 00003h to find the max down spread.
+ core_link_read_dpcd(link, DP_MAX_DOWNSPREAD,
+ &max_down_spread.raw, sizeof(max_down_spread));
+ link->cur_link_settings.link_spread =
+ max_down_spread.bits.MAX_DOWN_SPREAD ?
+ LINK_SPREAD_05_DOWNSPREAD_30KHZ : LINK_SPREAD_DISABLED;
}
static bool detect_dp(
return false;
}
- if (link->connector_signal == SIGNAL_TYPE_EDP) {
- /* On detect, we want to make sure current link settings are
- * up to date, especially if link was powered on by GOP.
- */
- read_edp_current_link_settings_on_detect(link);
- }
-
prev_sink = link->local_sink;
if (prev_sink != NULL) {
dc_sink_retain(prev_sink);
}
case SIGNAL_TYPE_EDP: {
+ read_edp_current_link_settings_on_detect(link);
detect_edp_sink_caps(link);
sink_caps.transaction_type =
DDC_TRANSACTION_TYPE_I2C_OVER_AUX;
if (core_dc->current_state->res_ctx.pipe_ctx[i].stream) {
if (core_dc->current_state->res_ctx.
pipe_ctx[i].stream->link
- == link)
+ == link) {
/* DMCU -1 for all controller id values,
* therefore +1 here
*/
core_dc->current_state->
res_ctx.pipe_ctx[i].stream_res.tg->inst +
1;
+
+ /* Disable brightness ramping when the display is blanked
+ * as it can hang the DMCU
+ */
+ if (core_dc->current_state->res_ctx.pipe_ctx[i].plane_state == NULL)
+ frame_ramp = 0;
+ }
}
}
abm->funcs->set_backlight_level_pwm(
/* Retrain with preferred link settings only relevant for
* DP signal type
+ * Check for non-DP signal or if passive dongle present
*/
- if (!dc_is_dp_signal(link->connector_signal))
+ if (!dc_is_dp_signal(link->connector_signal) ||
+ link->dongle_max_pix_clk > 0)
return;
for (i = 0; i < MAX_PIPES; i++) {
link->dpcd_caps.dongle_type = DISPLAY_DONGLE_NONE;
ddc_service_set_dongle_type(link->ddc,
link->dpcd_caps.dongle_type);
+ link->dpcd_caps.is_branch_dev = false;
return;
}
/* DPCD 0x5 bit 0 = 1, it indicate it's branch device */
- link->dpcd_caps.is_branch_dev = ds_port.fields.PORT_PRESENT;
+ if (ds_port.fields.PORT_TYPE == DOWNSTREAM_DP) {
+ link->dpcd_caps.is_branch_dev = false;
+ }
+
+ else {
+ link->dpcd_caps.is_branch_dev = ds_port.fields.PORT_PRESENT;
+ }
switch (ds_port.fields.PORT_TYPE) {
case DOWNSTREAM_VGA:
link->dpcd_caps.dongle_type = DISPLAY_DONGLE_DP_VGA_CONVERTER;
break;
- case DOWNSTREAM_DVI_HDMI:
- /* At this point we don't know is it DVI or HDMI,
+ case DOWNSTREAM_DVI_HDMI_DP_PLUS_PLUS:
+ /* At this point we don't know is it DVI or HDMI or DP++,
* assume DVI.*/
link->dpcd_caps.dongle_type = DISPLAY_DONGLE_DP_DVI_CONVERTER;
break;
det_caps, sizeof(det_caps));
switch (port_caps->bits.DWN_STRM_PORTX_TYPE) {
+ /*Handle DP case as DONGLE_NONE*/
+ case DOWN_STREAM_DETAILED_DP:
+ link->dpcd_caps.dongle_type = DISPLAY_DONGLE_NONE;
+ break;
case DOWN_STREAM_DETAILED_VGA:
link->dpcd_caps.dongle_type =
DISPLAY_DONGLE_DP_VGA_CONVERTER;
DISPLAY_DONGLE_DP_DVI_CONVERTER;
break;
case DOWN_STREAM_DETAILED_HDMI:
+ case DOWN_STREAM_DETAILED_DP_PLUS_PLUS:
+ /*Handle DP++ active converter case, process DP++ case as HDMI case according DP1.4 spec*/
link->dpcd_caps.dongle_type =
DISPLAY_DONGLE_DP_HDMI_CONVERTER;
link->dpcd_caps.dongle_caps.is_dp_hdmi_s3d_converter =
hdmi_caps.bits.FRAME_SEQ_TO_FRAME_PACK;
- link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr422_pass_through =
- hdmi_caps.bits.YCrCr422_PASS_THROUGH;
- link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr420_pass_through =
- hdmi_caps.bits.YCrCr420_PASS_THROUGH;
- link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr422_converter =
- hdmi_caps.bits.YCrCr422_CONVERSION;
- link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr420_converter =
- hdmi_caps.bits.YCrCr420_CONVERSION;
+ /*YCBCR capability only for HDMI case*/
+ if (port_caps->bits.DWN_STRM_PORTX_TYPE
+ == DOWN_STREAM_DETAILED_HDMI) {
+ link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr422_pass_through =
+ hdmi_caps.bits.YCrCr422_PASS_THROUGH;
+ link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr420_pass_through =
+ hdmi_caps.bits.YCrCr420_PASS_THROUGH;
+ link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr422_converter =
+ hdmi_caps.bits.YCrCr422_CONVERSION;
+ link->dpcd_caps.dongle_caps.is_dp_hdmi_ycbcr420_converter =
+ hdmi_caps.bits.YCrCr420_CONVERSION;
+ }
link->dpcd_caps.dongle_caps.dp_hdmi_max_bpc =
translate_dpcd_max_bpc(
* PORT_CONNECTIVITY == 1 (as instructed by HW team).
*/
update_num_audio(&straps, &num_audio, &pool->audio_support);
- for (i = 0; i < pool->pipe_count && i < num_audio; i++) {
+ for (i = 0; i < caps->num_audio; i++) {
struct audio *aud = create_funcs->create_audio(ctx, i);
if (aud == NULL) {
return pool->audios[i];
}
}
+
+ /* use engine id to find free audio */
+ if ((id < pool->audio_count) && (res_ctx->is_audio_acquired[id] == false)) {
+ return pool->audios[id];
+ }
+
/*not found the matching one, first come first serve*/
for (i = 0; i < pool->audio_count; i++) {
if (res_ctx->is_audio_acquired[i] == false) {
pix_clk /= 2;
if (timing->pixel_encoding != PIXEL_ENCODING_YCBCR422) {
switch (timing->display_color_depth) {
+ case COLOR_DEPTH_666:
case COLOR_DEPTH_888:
normalized_pix_clk = pix_clk;
break;
/* TODO: Add check if ASIC support and EDID audio */
if (!stream->converter_disable_audio &&
dc_is_audio_capable_signal(pipe_ctx->stream->signal) &&
- stream->audio_info.mode_count) {
+ stream->audio_info.mode_count && stream->audio_info.flags.all) {
pipe_ctx->stream_res.audio = find_first_free_audio(
&context->res_ctx, pool, pipe_ctx->stream_res.stream_enc->id);
pipe_ctx->stream->dmdata_address = attr->address;
- if (pipe_ctx->stream_res.stream_enc->funcs->set_dynamic_metadata != NULL) {
+ if (pipe_ctx->stream_res.stream_enc &&
+ pipe_ctx->stream_res.stream_enc->funcs->set_dynamic_metadata != NULL) {
if (pipe_ctx->stream->dmdata_address.quad_part != 0) {
/* if using dynamic meta, don't set up generic infopackets */
pipe_ctx->stream_res.encoder_info_frame.hdrsmd.valid = false;
s2 |= (backlight_8_bit << ATOM_S2_CURRENT_BL_LEVEL_SHIFT);
REG_WRITE(BIOS_SCRATCH_2, s2);
+
+ /* waitDMCUReadyForCmd */
+ REG_WAIT(MASTER_COMM_CNTL_REG, MASTER_COMM_INTERRUPT,
+ 0, 1, 80000);
}
static void dce_abm_init(struct abm *abm)
void dce110_enable_audio_stream(struct pipe_ctx *pipe_ctx)
{
/* notify audio driver for audio modes of monitor */
- struct dc *core_dc = pipe_ctx->stream->ctx->dc;
+ struct dc *core_dc;
struct pp_smu_funcs *pp_smu = NULL;
- struct clk_mgr *clk_mgr = core_dc->clk_mgr;
+ struct clk_mgr *clk_mgr;
unsigned int i, num_audio = 1;
+ if (!pipe_ctx->stream)
+ return;
+
+ core_dc = pipe_ctx->stream->ctx->dc;
+ clk_mgr = core_dc->clk_mgr;
+
if (pipe_ctx->stream_res.audio && pipe_ctx->stream_res.audio->enabled == true)
return;
void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx, int option)
{
- struct dc *dc = pipe_ctx->stream->ctx->dc;
+ struct dc *dc;
struct pp_smu_funcs *pp_smu = NULL;
- struct clk_mgr *clk_mgr = dc->clk_mgr;
+ struct clk_mgr *clk_mgr;
+
+ if (!pipe_ctx || !pipe_ctx->stream)
+ return;
+
+ dc = pipe_ctx->stream->ctx->dc;
+ clk_mgr = dc->clk_mgr;
if (pipe_ctx->stream_res.audio && pipe_ctx->stream_res.audio->enabled == false)
return;
pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control(
pipe_ctx->stream_res.stream_enc, true);
if (pipe_ctx->stream_res.audio) {
+ pipe_ctx->stream_res.audio->enabled = false;
+
if (dc->res_pool->pp_smu)
pp_smu = dc->res_pool->pp_smu;
/* dal_audio_disable_azalia_audio_jack_presence(stream->audio,
* stream->stream_engine_id);
*/
- if (pipe_ctx->stream_res.audio)
- pipe_ctx->stream_res.audio->enabled = false;
}
}
* everything down.
*/
if (dcb->funcs->is_accelerated_mode(dcb) || dc->config.power_down_display_on_boot) {
- for (i = 0; i < dc->res_pool->pipe_count; i++) {
- struct hubp *hubp = dc->res_pool->hubps[i];
- struct dpp *dpp = dc->res_pool->dpps[i];
-
- hubp->funcs->hubp_init(hubp);
- dc->res_pool->opps[i]->mpc_tree_params.opp_id = dc->res_pool->opps[i]->inst;
- plane_atomic_power_down(dc, dpp, hubp);
- }
-
- apply_DEGVIDCN10_253_wa(dc);
+ dc->hwss.init_pipes(dc, dc->current_state);
}
for (i = 0; i < dc->res_pool->audio_count; i++) {
return result;
}
-
-
-
-
static bool
dcn10_set_output_transfer_func(struct pipe_ctx *pipe_ctx,
const struct dc_stream_state *stream)
if (removed_pipe[i])
dcn10_disable_plane(dc, &dc->current_state->res_ctx.pipe_ctx[i]);
+ for (i = 0; i < dc->res_pool->pipe_count; i++)
+ if (removed_pipe[i]) {
+ dc->hwss.optimize_bandwidth(dc, context);
+ break;
+ }
+
if (dc->hwseq->wa.DEGVIDCN10_254)
hubbub1_wm_change_req_wa(dc->res_pool->hubbub);
}
.num_audio = 3,
.num_stream_encoder = 3,
.num_pll = 3,
- .num_ddc = 3,
+ .num_ddc = 4,
};
static const struct dc_plane_cap plane_cap = {
switch (dccg_dcn->base.ctx->dc->res_pool->pipe_count) {
case 6:
REG_UPDATE(DPPCLK_DTO_CTRL, DPPCLK_DTO_DB_EN[5], 1);
+ /* Fall through */
case 5:
REG_UPDATE(DPPCLK_DTO_CTRL, DPPCLK_DTO_DB_EN[4], 1);
+ /* Fall through */
case 4:
REG_UPDATE(DPPCLK_DTO_CTRL, DPPCLK_DTO_DB_EN[3], 1);
+ /* Fall through */
case 3:
REG_UPDATE(DPPCLK_DTO_CTRL, DPPCLK_DTO_DB_EN[2], 1);
+ /* Fall through */
case 2:
REG_UPDATE(DPPCLK_DTO_CTRL, DPPCLK_DTO_DB_EN[1], 1);
+ /* Fall through */
case 1:
REG_UPDATE(DPPCLK_DTO_CTRL, DPPCLK_DTO_DB_EN[0], 1);
break;
break;
default:
ASSERT(false);
+ block_size = page_table_block_size;
break;
}
struct dcn_vmid_page_table_config phys_config;
REG_SET(DCN_VM_FB_LOCATION_BASE, 0,
- FB_BASE, pa_config->system_aperture.fb_base);
+ FB_BASE, pa_config->system_aperture.fb_base >> 24);
REG_SET(DCN_VM_FB_LOCATION_TOP, 0,
- FB_TOP, pa_config->system_aperture.fb_top);
+ FB_TOP, pa_config->system_aperture.fb_top >> 24);
REG_SET(DCN_VM_FB_OFFSET, 0,
- FB_OFFSET, pa_config->system_aperture.fb_offset);
+ FB_OFFSET, pa_config->system_aperture.fb_offset >> 24);
REG_SET(DCN_VM_AGP_BOT, 0,
- AGP_BOT, pa_config->system_aperture.agp_bot);
+ AGP_BOT, pa_config->system_aperture.agp_bot >> 24);
REG_SET(DCN_VM_AGP_TOP, 0,
- AGP_TOP, pa_config->system_aperture.agp_top);
+ AGP_TOP, pa_config->system_aperture.agp_top >> 24);
REG_SET(DCN_VM_AGP_BASE, 0,
- AGP_BASE, pa_config->system_aperture.agp_base);
+ AGP_BASE, pa_config->system_aperture.agp_base >> 24);
if (pa_config->gart_config.page_table_start_addr != pa_config->gart_config.page_table_end_addr) {
- phys_config.depth = 1;
- phys_config.block_size = 4096;
phys_config.page_table_start_addr = pa_config->gart_config.page_table_start_addr >> 12;
phys_config.page_table_end_addr = pa_config->gart_config.page_table_end_addr >> 12;
phys_config.page_table_base_addr = pa_config->gart_config.page_table_base_addr;
-
+ phys_config.depth = 0;
+ phys_config.block_size = 0;
// Init VMID 0 based on PA config
dcn20_vmid_setup(&hubbub1->vmid[0], &phys_config);
}
apt.sys_default.quad_part = 0;
- apt.sys_high.quad_part = dc->vm_pa_config.system_aperture.start_addr;
- apt.sys_low.quad_part = dc->vm_pa_config.system_aperture.end_addr;
+ apt.sys_low.quad_part = dc->vm_pa_config.system_aperture.start_addr;
+ apt.sys_high.quad_part = dc->vm_pa_config.system_aperture.end_addr;
// Program system aperture settings
pipe_ctx->plane_res.hubp->funcs->hubp_set_vm_system_aperture_settings(pipe_ctx->plane_res.hubp, &apt);
CRTC_STATE_VACTIVE);
pipe->stream_res.tg->funcs->wait_for_state(pipe->stream_res.tg,
CRTC_STATE_VBLANK);
+ pipe->stream_res.tg->funcs->wait_for_state(pipe->stream_res.tg,
+ CRTC_STATE_VACTIVE);
pipe->stream_res.tg->funcs->lock_doublebuffer_disable(
pipe->stream_res.tg);
}
if (pipe->plane_state != NULL)
flip_immediate = pipe->plane_state->flip_immediate;
+ if (flip_immediate && lock) {
+ while (pipe->plane_res.hubp->funcs->hubp_is_flip_pending(pipe->plane_res.hubp)) {
+ udelay(1);
+ }
+
+ if (pipe->bottom_pipe != NULL)
+ while (pipe->bottom_pipe->plane_res.hubp->funcs->hubp_is_flip_pending(pipe->bottom_pipe->plane_res.hubp)) {
+ udelay(1);
+ }
+ }
+
/* In flip immediate and pipe splitting case, we need to use GSL
* for synchronization. Only do setup on locking and on flip type change.
*/
else if (pipe_ctx->stream_res.audio) {
dc->hwss.disable_audio_stream(pipe_ctx, FREE_ACQUIRED_RESOURCE);
}
-
}
+#ifdef CONFIG_DRM_AMD_DC_DSC_SUPPORT
+ else if (pipe_ctx->stream_res.dsc)
+ dp_set_dsc_enable(pipe_ctx, false);
+#endif
/* by upper caller loop, parent pipe: pipe0, will be reset last.
* back end share by all pipes and will be disable only when disable
optc1->min_h_blank = 32;
optc1->min_v_blank = 3;
optc1->min_v_blank_interlace = 5;
- optc1->min_h_sync_width = 8;
+ optc1->min_h_sync_width = 4;// Minimum HSYNC = 8 pixels asked By HW in the first place for no actual reason. Oculus Rift S will not light up with 8 as it's hsyncWidth is 6. Changing it to 4 to fix that issue.
optc1->min_v_sync_width = 1;
optc1->comb_opp_id = 0xf;
}
if (dc->bb_overrides.min_dcfclk_mhz > 0)
min_dcfclk = dc->bb_overrides.min_dcfclk_mhz;
+ else
+ // Accounting for SOC/DCF relationship, we can go as high as
+ // 506Mhz in Vmin. We need to code 507 since SMU will round down to 506.
+ min_dcfclk = 507;
for (i = 0; i < num_states; i++) {
int min_fclk_required_by_uclk;
*
*/
+#include <linux/delay.h>
+
#include "dcn20_vmid.h"
#include "reg_helper.h"
#define FN(reg_name, field_name) \
vmid->shifts->field_name, vmid->masks->field_name
+static void dcn20_wait_for_vmid_ready(struct dcn20_vmid *vmid)
+{
+ /* According the hardware spec, we need to poll for the lowest
+ * bit of PAGE_TABLE_BASE_ADDR_LO32 = 1 any time a GPUVM
+ * context is updated. We can't use REG_WAIT here since we
+ * don't have a seperate field to wait on.
+ *
+ * TODO: Confirm timeout / poll interval with hardware team
+ */
+
+ int max_times = 10000;
+ int delay_us = 5;
+ int i;
+
+ for (i = 0; i < max_times; ++i) {
+ uint32_t entry_lo32;
+
+ REG_GET(PAGE_TABLE_BASE_ADDR_LO32,
+ VM_CONTEXT0_PAGE_DIRECTORY_ENTRY_LO32,
+ &entry_lo32);
+
+ if (entry_lo32 & 0x1)
+ return;
+
+ udelay(delay_us);
+ }
+
+ /* VM setup timed out */
+ DC_LOG_WARNING("Timeout while waiting for GPUVM context update\n");
+ ASSERT(0);
+}
+
void dcn20_vmid_setup(struct dcn20_vmid *vmid, const struct dcn_vmid_page_table_config *config)
{
REG_SET(PAGE_TABLE_START_ADDR_HI32, 0,
REG_SET(PAGE_TABLE_BASE_ADDR_HI32, 0,
VM_CONTEXT0_PAGE_DIRECTORY_ENTRY_HI32, (config->page_table_base_addr >> 32) & 0xFFFFFFFF);
+ /* Note: per hardware spec PAGE_TABLE_BASE_ADDR_LO32 must be programmed last in sequence */
REG_SET(PAGE_TABLE_BASE_ADDR_LO32, 0,
VM_CONTEXT0_PAGE_DIRECTORY_ENTRY_LO32, config->page_table_base_addr & 0xFFFFFFFF);
+
+ dcn20_wait_for_vmid_ready(vmid);
}
vdsc_cfg->rc_bits = (hrd_delay * vdsc_cfg->bits_per_pixel) / 16;
vdsc_cfg->initial_dec_delay = hrd_delay - vdsc_cfg->initial_xmit_delay;
+ /* As per DSC spec v1.2a recommendation: */
+ if (vdsc_cfg->native_420)
+ vdsc_cfg->second_line_offset_adj = 512;
+ else
+ vdsc_cfg->second_line_offset_adj = 0;
+
return 0;
}
EXPORT_SYMBOL(drm_dsc_compute_rc_parameters);
struct clock_source *clock_sources[MAX_CLOCK_SOURCES];
unsigned int clk_src_count;
- struct audio *audios[MAX_PIPES];
+ struct audio *audios[MAX_AUDIOS];
unsigned int audio_count;
struct audio_support audio_support;
};
enum dcn_hubbub_page_table_block_size {
- DCN_PAGE_TABLE_BLOCK_SIZE_4KB,
- DCN_PAGE_TABLE_BLOCK_SIZE_64KB
+ DCN_PAGE_TABLE_BLOCK_SIZE_4KB = 0,
+ DCN_PAGE_TABLE_BLOCK_SIZE_64KB = 4,
};
struct dcn_hubbub_phys_addr_config {
* Data types shared between different Virtual HW blocks
******************************************************************************/
+#define MAX_AUDIOS 7
#define MAX_PIPES 6
#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
#define MAX_DWB_PIPES 1
enum dpcd_downstream_port_type {
DOWNSTREAM_DP = 0,
DOWNSTREAM_VGA,
- DOWNSTREAM_DVI_HDMI,
+ DOWNSTREAM_DVI_HDMI_DP_PLUS_PLUS,/* DVI, HDMI, DP++ */
DOWNSTREAM_NONDDC /* has no EDID (TV,CV) */
};
{
int ret = 0, clk_id = 0;
uint32_t param = 0;
+ uint32_t clock_limit;
if (!min && !max)
return -EINVAL;
- if (!smu_clk_dpm_is_enabled(smu, clk_type))
+ if (!smu_clk_dpm_is_enabled(smu, clk_type)) {
+ switch (clk_type) {
+ case SMU_MCLK:
+ case SMU_UCLK:
+ clock_limit = smu->smu_table.boot_values.uclk;
+ break;
+ case SMU_GFXCLK:
+ case SMU_SCLK:
+ clock_limit = smu->smu_table.boot_values.gfxclk;
+ break;
+ case SMU_SOCCLK:
+ clock_limit = smu->smu_table.boot_values.socclk;
+ break;
+ default:
+ clock_limit = 0;
+ break;
+ }
+
+ /* clock in Mhz unit */
+ if (min)
+ *min = clock_limit / 100;
+ if (max)
+ *max = clock_limit / 100;
+
return 0;
+ }
mutex_lock(&smu->mutex);
clk_id = smu_clk_get_index(smu, clk_type);
return 0;
}
+static int smu_default_set_performance_level(struct smu_context *smu, enum amd_dpm_forced_level level)
+{
+ int ret = 0;
+ uint32_t sclk_mask, mclk_mask, soc_mask;
+
+ switch (level) {
+ case AMD_DPM_FORCED_LEVEL_HIGH:
+ ret = smu_force_dpm_limit_value(smu, true);
+ break;
+ case AMD_DPM_FORCED_LEVEL_LOW:
+ ret = smu_force_dpm_limit_value(smu, false);
+ break;
+ case AMD_DPM_FORCED_LEVEL_AUTO:
+ case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
+ ret = smu_unforce_dpm_levels(smu);
+ break;
+ case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
+ case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
+ case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
+ ret = smu_get_profiling_clk_mask(smu, level,
+ &sclk_mask,
+ &mclk_mask,
+ &soc_mask);
+ if (ret)
+ return ret;
+ smu_force_clk_levels(smu, SMU_SCLK, 1 << sclk_mask);
+ smu_force_clk_levels(smu, SMU_MCLK, 1 << mclk_mask);
+ smu_force_clk_levels(smu, SMU_SOCCLK, 1 << soc_mask);
+ break;
+ case AMD_DPM_FORCED_LEVEL_MANUAL:
+ case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT:
+ default:
+ break;
+ }
+ return ret;
+}
+
int smu_adjust_power_state_dynamic(struct smu_context *smu,
enum amd_dpm_forced_level level,
bool skip_display_settings)
{
int ret = 0;
int index = 0;
- uint32_t sclk_mask, mclk_mask, soc_mask;
long workload;
struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
}
if (smu_dpm_ctx->dpm_level != level) {
- switch (level) {
- case AMD_DPM_FORCED_LEVEL_HIGH:
- ret = smu_force_dpm_limit_value(smu, true);
- break;
- case AMD_DPM_FORCED_LEVEL_LOW:
- ret = smu_force_dpm_limit_value(smu, false);
- break;
-
- case AMD_DPM_FORCED_LEVEL_AUTO:
- case AMD_DPM_FORCED_LEVEL_PROFILE_STANDARD:
- ret = smu_unforce_dpm_levels(smu);
- break;
-
- case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK:
- case AMD_DPM_FORCED_LEVEL_PROFILE_MIN_MCLK:
- case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
- ret = smu_get_profiling_clk_mask(smu, level,
- &sclk_mask,
- &mclk_mask,
- &soc_mask);
- if (ret)
- return ret;
- smu_force_clk_levels(smu, SMU_SCLK, 1 << sclk_mask);
- smu_force_clk_levels(smu, SMU_MCLK, 1 << mclk_mask);
- smu_force_clk_levels(smu, SMU_SOCCLK, 1 << soc_mask);
- break;
-
- case AMD_DPM_FORCED_LEVEL_MANUAL:
- case AMD_DPM_FORCED_LEVEL_PROFILE_EXIT:
- default:
- break;
+ ret = smu_asic_set_performance_level(smu, level);
+ if (ret) {
+ ret = smu_default_set_performance_level(smu, level);
}
-
if (!ret)
smu_dpm_ctx->dpm_level = level;
}
int (*tables_init)(struct smu_context *smu, struct smu_table *tables);
int (*set_thermal_fan_table)(struct smu_context *smu);
int (*get_fan_speed_percent)(struct smu_context *smu, uint32_t *speed);
+ int (*get_fan_speed_rpm)(struct smu_context *smu, uint32_t *speed);
int (*set_watermarks_table)(struct smu_context *smu, void *watermarks,
struct dm_pp_wm_sets_with_clock_ranges_soc15 *clock_ranges);
int (*get_current_clk_freq_by_table)(struct smu_context *smu,
int (*get_thermal_temperature_range)(struct smu_context *smu, struct smu_temperature_range *range);
int (*get_uclk_dpm_states)(struct smu_context *smu, uint32_t *clocks_in_khz, uint32_t *num_states);
int (*set_default_od_settings)(struct smu_context *smu, bool initialize);
+ int (*set_performance_level)(struct smu_context *smu, enum amd_dpm_forced_level level);
};
struct smu_funcs
int (*set_watermarks_for_clock_ranges)(struct smu_context *smu,
struct dm_pp_wm_sets_with_clock_ranges_soc15 *clock_ranges);
int (*conv_power_profile_to_pplib_workload)(int power_profile);
- int (*get_current_rpm)(struct smu_context *smu, uint32_t *speed);
uint32_t (*get_fan_control_mode)(struct smu_context *smu);
int (*set_fan_control_mode)(struct smu_context *smu, uint32_t mode);
int (*set_fan_speed_percent)(struct smu_context *smu, uint32_t speed);
((smu)->funcs->init_max_sustainable_clocks ? (smu)->funcs->init_max_sustainable_clocks((smu)) : 0)
#define smu_set_default_od_settings(smu, initialize) \
((smu)->ppt_funcs->set_default_od_settings ? (smu)->ppt_funcs->set_default_od_settings((smu), (initialize)) : 0)
-#define smu_get_current_rpm(smu, speed) \
- ((smu)->funcs->get_current_rpm ? (smu)->funcs->get_current_rpm((smu), (speed)) : 0)
#define smu_set_fan_speed_rpm(smu, speed) \
((smu)->funcs->set_fan_speed_rpm ? (smu)->funcs->set_fan_speed_rpm((smu), (speed)) : 0)
#define smu_send_smc_msg(smu, msg) \
((smu)->ppt_funcs->get_fan_speed_percent ? (smu)->ppt_funcs->get_fan_speed_percent((smu), (speed)) : 0)
#define smu_set_fan_speed_percent(smu, speed) \
((smu)->funcs->set_fan_speed_percent ? (smu)->funcs->set_fan_speed_percent((smu), (speed)) : 0)
+#define smu_get_fan_speed_rpm(smu, speed) \
+ ((smu)->ppt_funcs->get_fan_speed_rpm ? (smu)->ppt_funcs->get_fan_speed_rpm((smu), (speed)) : 0)
#define smu_msg_get_index(smu, msg) \
((smu)->ppt_funcs? ((smu)->ppt_funcs->get_smu_msg_index? (smu)->ppt_funcs->get_smu_msg_index((smu), (msg)) : -EINVAL) : -EINVAL)
((smu)->funcs->baco_get_state? (smu)->funcs->baco_get_state((smu), (state)) : 0)
#define smu_baco_reset(smu) \
((smu)->funcs->baco_reset? (smu)->funcs->baco_reset((smu)) : 0)
+#define smu_asic_set_performance_level(smu, level) \
+ ((smu)->ppt_funcs->set_performance_level? (smu)->ppt_funcs->set_performance_level((smu), (level)) : -EINVAL);
+
extern int smu_get_atom_data_table(struct smu_context *smu, uint32_t table,
uint16_t *size, uint8_t *frev, uint8_t *crev,
return ret;
}
+static bool navi10_is_support_fine_grained_dpm(struct smu_context *smu, enum smu_clk_type clk_type)
+{
+ PPTable_t *pptable = smu->smu_table.driver_pptable;
+ DpmDescriptor_t *dpm_desc = NULL;
+ uint32_t clk_index = 0;
+
+ clk_index = smu_clk_get_index(smu, clk_type);
+ dpm_desc = &pptable->DpmDescriptor[clk_index];
+
+ /* 0 - Fine grained DPM, 1 - Discrete DPM */
+ return dpm_desc->SnapToDiscrete == 0 ? true : false;
+}
+
static int navi10_print_clk_levels(struct smu_context *smu,
enum smu_clk_type clk_type, char *buf)
{
int i, size = 0, ret = 0;
uint32_t cur_value = 0, value = 0, count = 0;
+ uint32_t freq_values[3] = {0};
+ uint32_t mark_index = 0;
switch (clk_type) {
case SMU_GFXCLK:
ret = smu_get_current_clk_freq(smu, clk_type, &cur_value);
if (ret)
return size;
+
/* 10KHz -> MHz */
cur_value = cur_value / 100;
- size += sprintf(buf, "current clk: %uMhz\n", cur_value);
-
ret = smu_get_dpm_level_count(smu, clk_type, &count);
if (ret)
return size;
- for (i = 0; i < count; i++) {
- ret = smu_get_dpm_freq_by_index(smu, clk_type, i, &value);
+ if (!navi10_is_support_fine_grained_dpm(smu, clk_type)) {
+ for (i = 0; i < count; i++) {
+ ret = smu_get_dpm_freq_by_index(smu, clk_type, i, &value);
+ if (ret)
+ return size;
+
+ size += sprintf(buf + size, "%d: %uMhz %s\n", i, value,
+ cur_value == value ? "*" : "");
+ }
+ } else {
+ ret = smu_get_dpm_freq_by_index(smu, clk_type, 0, &freq_values[0]);
+ if (ret)
+ return size;
+ ret = smu_get_dpm_freq_by_index(smu, clk_type, count - 1, &freq_values[2]);
if (ret)
return size;
- size += sprintf(buf + size, "%d: %uMhz %s\n", i, value,
- cur_value == value ? "*" : "");
+ freq_values[1] = cur_value;
+ mark_index = cur_value == freq_values[0] ? 0 :
+ cur_value == freq_values[2] ? 2 : 1;
+ if (mark_index != 1)
+ freq_values[1] = (freq_values[0] + freq_values[2]) / 2;
+
+ for (i = 0; i < 3; i++) {
+ size += sprintf(buf + size, "%d: %uMhz %s\n", i, freq_values[i],
+ i == mark_index ? "*" : "");
+ }
+
}
break;
default:
return !!(feature_enabled & SMC_DPM_FEATURE);
}
-static int navi10_get_fan_speed(struct smu_context *smu, uint16_t *value)
+static int navi10_get_fan_speed_rpm(struct smu_context *smu,
+ uint32_t *speed)
{
SmuMetrics_t metrics;
int ret = 0;
- if (!value)
+ if (!speed)
return -EINVAL;
memset(&metrics, 0, sizeof(metrics));
if (ret)
return ret;
- *value = metrics.CurrFanSpeed;
+ *speed = metrics.CurrFanSpeed;
return ret;
}
{
int ret = 0;
uint32_t percent = 0;
- uint16_t current_rpm;
+ uint32_t current_rpm;
PPTable_t *pptable = smu->smu_table.driver_pptable;
- ret = navi10_get_fan_speed(smu, ¤t_rpm);
+ ret = navi10_get_fan_speed_rpm(smu, ¤t_rpm);
if (ret)
return ret;
return 0;
}
+static int navi10_set_peak_clock_by_device(struct smu_context *smu)
+{
+ struct amdgpu_device *adev = smu->adev;
+ int ret = 0;
+ uint32_t sclk_freq = 0, uclk_freq = 0;
+ uint32_t uclk_level = 0;
+
+ switch (adev->rev_id) {
+ case 0xf0: /* XTX */
+ case 0xc0:
+ sclk_freq = NAVI10_PEAK_SCLK_XTX;
+ break;
+ case 0xf1: /* XT */
+ case 0xc1:
+ sclk_freq = NAVI10_PEAK_SCLK_XT;
+ break;
+ default: /* XL */
+ sclk_freq = NAVI10_PEAK_SCLK_XL;
+ break;
+ }
+
+ ret = smu_get_dpm_level_count(smu, SMU_UCLK, &uclk_level);
+ if (ret)
+ return ret;
+ ret = smu_get_dpm_freq_by_index(smu, SMU_UCLK, uclk_level - 1, &uclk_freq);
+ if (ret)
+ return ret;
+
+ ret = smu_set_soft_freq_range(smu, SMU_SCLK, sclk_freq, sclk_freq);
+ if (ret)
+ return ret;
+ ret = smu_set_soft_freq_range(smu, SMU_UCLK, uclk_freq, uclk_freq);
+ if (ret)
+ return ret;
+
+ return ret;
+}
+
+static int navi10_set_performance_level(struct smu_context *smu, enum amd_dpm_forced_level level)
+{
+ int ret = 0;
+
+ switch (level) {
+ case AMD_DPM_FORCED_LEVEL_PROFILE_PEAK:
+ ret = navi10_set_peak_clock_by_device(smu);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
static const struct pptable_funcs navi10_ppt_funcs = {
.tables_init = navi10_tables_init,
.alloc_dpm_context = navi10_allocate_dpm_context,
.unforce_dpm_levels = navi10_unforce_dpm_levels,
.is_dpm_running = navi10_is_dpm_running,
.get_fan_speed_percent = navi10_get_fan_speed_percent,
+ .get_fan_speed_rpm = navi10_get_fan_speed_rpm,
.get_power_profile_mode = navi10_get_power_profile_mode,
.set_power_profile_mode = navi10_set_power_profile_mode,
.get_profiling_clk_mask = navi10_get_profiling_clk_mask,
.get_uclk_dpm_states = navi10_get_uclk_dpm_states,
.get_ppfeature_status = navi10_get_ppfeature_status,
.set_ppfeature_status = navi10_set_ppfeature_status,
+ .set_performance_level = navi10_set_performance_level,
};
void navi10_set_ppt_funcs(struct smu_context *smu)
#ifndef __NAVI10_PPT_H__
#define __NAVI10_PPT_H__
+#define NAVI10_PEAK_SCLK_XTX (1830)
+#define NAVI10_PEAK_SCLK_XT (1755)
+#define NAVI10_PEAK_SCLK_XL (1625)
+
extern void navi10_set_ppt_funcs(struct smu_context *smu);
#endif
return ret;
}
-static int smu_v11_0_get_current_rpm(struct smu_context *smu,
- uint32_t *current_rpm)
-{
- int ret;
-
- ret = smu_send_smc_msg(smu, SMU_MSG_GetCurrentRpm);
-
- if (ret) {
- pr_err("Attempt to get current RPM from SMC Failed!\n");
- return ret;
- }
-
- smu_read_smc_arg(smu, current_rpm);
-
- return 0;
-}
-
static uint32_t
smu_v11_0_get_fan_control_mode(struct smu_context *smu)
{
.set_deep_sleep_dcefclk = smu_v11_0_set_deep_sleep_dcefclk,
.display_clock_voltage_request = smu_v11_0_display_clock_voltage_request,
.set_watermarks_for_clock_ranges = smu_v11_0_set_watermarks_for_clock_ranges,
- .get_current_rpm = smu_v11_0_get_current_rpm,
.get_fan_control_mode = smu_v11_0_get_fan_control_mode,
.set_fan_control_mode = smu_v11_0_set_fan_control_mode,
.set_fan_speed_percent = smu_v11_0_set_fan_speed_percent,
return ret;
}
+static int vega20_get_fan_speed_rpm(struct smu_context *smu,
+ uint32_t *speed)
+{
+ int ret;
+
+ ret = smu_send_smc_msg(smu, SMU_MSG_GetCurrentRpm);
+
+ if (ret) {
+ pr_err("Attempt to get current RPM from SMC Failed!\n");
+ return ret;
+ }
+
+ smu_read_smc_arg(smu, speed);
+
+ return 0;
+}
+
static int vega20_get_fan_speed_percent(struct smu_context *smu,
uint32_t *speed)
{
uint32_t current_rpm = 0, percent = 0;
PPTable_t *pptable = smu->smu_table.driver_pptable;
- ret = smu_get_current_rpm(smu, ¤t_rpm);
+ ret = vega20_get_fan_speed_rpm(smu, ¤t_rpm);
if (ret)
return ret;
.is_dpm_running = vega20_is_dpm_running,
.set_thermal_fan_table = vega20_set_thermal_fan_table,
.get_fan_speed_percent = vega20_get_fan_speed_percent,
+ .get_fan_speed_rpm = vega20_get_fan_speed_rpm,
.set_watermarks_table = vega20_set_watermarks_table,
.get_thermal_temperature_range = vega20_get_thermal_temperature_range
};
* simple XOR between the two handle the addition nicely.
*/
cmdline = &connector->cmdline_mode;
- if (cmdline->specified) {
+ if (cmdline->specified && cmdline->rotation_reflection) {
unsigned int cmdline_rest, panel_rest;
unsigned int cmdline_rot, panel_rot;
unsigned int sum_rot, sum_rest;
struct drm_device *dev = fb->dev;
struct drm_atomic_state *state;
struct drm_plane *plane;
- struct drm_connector *conn;
+ struct drm_connector *conn __maybe_unused;
struct drm_connector_state *conn_state;
int i, ret;
unsigned plane_mask;
*
* Additionals options can be provided following the mode, using a comma to
* separate each option. Valid options can be found in
- * Documentation/fb/modedb.txt.
+ * Documentation/fb/modedb.rst.
*
* The intermediate drm_cmdline_mode structure is required to store additional
* options from the command line modline like the force-enable/disable flag.
subdir-ccflags-y += $(call cc-disable-warning, unused-parameter)
subdir-ccflags-y += $(call cc-disable-warning, type-limits)
subdir-ccflags-y += $(call cc-disable-warning, missing-field-initializers)
-subdir-ccflags-y += $(call cc-disable-warning, implicit-fallthrough)
subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable)
# clang warnings
subdir-ccflags-y += $(call cc-disable-warning, sign-compare)
case INTEL_OUTPUT_DDI:
if (WARN_ON(!HAS_DDI(to_i915(dev))))
break;
- /* else: fall through */
+ /* else, fall through */
case INTEL_OUTPUT_DP:
case INTEL_OUTPUT_HDMI:
case INTEL_OUTPUT_EDP:
switch (lane_info) {
default:
MISSING_CASE(lane_info);
+ /* fall through */
case 1:
case 2:
case 4:
*/
if (!i915_terminally_wedged(i915))
return VM_FAULT_SIGBUS;
- /* else: fall through */
+ /* else, fall through */
case -EAGAIN:
/*
* EAGAIN means the gpu is hung and we'll wait for the error
switch (type) {
default:
MISSING_CASE(type);
- /* fallthrough to use PAGE_KERNEL anyway */
+ /* fallthrough - to use PAGE_KERNEL anyway */
case I915_MAP_WB:
pgprot = PAGE_KERNEL;
break;
switch (engine->id) {
default:
MISSING_CASE(engine->id);
+ /* fall through */
case RCS0:
mmio = RENDER_HWS_PGA_GEN7;
break;
if (ret)
goto fail;
- spin_lock_init(&dpu_enc->enc_spinlock);
-
atomic_set(&dpu_enc->frame_done_timeout_ms, 0);
timer_setup(&dpu_enc->frame_done_timer,
dpu_encoder_frame_done_timeout, 0);
drm_encoder_helper_add(&dpu_enc->base, &dpu_encoder_helper_funcs);
+ spin_lock_init(&dpu_enc->enc_spinlock);
dpu_enc->enabled = false;
return &dpu_enc->base;
msm_submitqueue_init(dev, ctx);
- ctx->aspace = priv->gpu->aspace;
+ ctx->aspace = priv->gpu ? priv->gpu->aspace : NULL;
file->driver_priv = ctx;
return 0;
* because display controller, GPU, etc. are not coherent:
*/
if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED))
- dma_map_sg(dev->dev, msm_obj->sgt->sgl,
+ dma_sync_sg_for_device(dev->dev, msm_obj->sgt->sgl,
msm_obj->sgt->nents, DMA_BIDIRECTIONAL);
}
* GPU, etc. are not coherent:
*/
if (msm_obj->flags & (MSM_BO_WC|MSM_BO_UNCACHED))
- dma_unmap_sg(obj->dev->dev, msm_obj->sgt->sgl,
+ dma_sync_sg_for_cpu(obj->dev->dev, msm_obj->sgt->sgl,
msm_obj->sgt->nents,
DMA_BIDIRECTIONAL);
static void __ttm_dma_free_page(struct dma_pool *pool, struct dma_page *d_page)
{
+ unsigned long attrs = 0;
dma_addr_t dma = d_page->dma;
d_page->vaddr &= ~VADDR_FLAG_HUGE_POOL;
- dma_free_coherent(pool->dev, pool->size, (void *)d_page->vaddr, dma);
+ if (pool->type & IS_HUGE)
+ attrs = DMA_ATTR_NO_WARN;
+
+ dma_free_attrs(pool->dev, pool->size, (void *)d_page->vaddr, dma, attrs);
kfree(d_page);
d_page = NULL;
static const u16 NCT6106_REG_WEIGHT_TEMP_SEL[] = { 0x168, 0x178, 0x188 };
static const u16 NCT6106_REG_WEIGHT_TEMP_STEP[] = { 0x169, 0x179, 0x189 };
static const u16 NCT6106_REG_WEIGHT_TEMP_STEP_TOL[] = { 0x16a, 0x17a, 0x18a };
-static const u16 NCT6106_REG_WEIGHT_DUTY_STEP[] = { 0x16b, 0x17b, 0x17c };
+static const u16 NCT6106_REG_WEIGHT_DUTY_STEP[] = { 0x16b, 0x17b, 0x18b };
static const u16 NCT6106_REG_WEIGHT_TEMP_BASE[] = { 0x16c, 0x17c, 0x18c };
static const u16 NCT6106_REG_WEIGHT_DUTY_BASE[] = { 0x16d, 0x17d, 0x18d };
data->REG_FAN_TIME[0] = NCT6106_REG_FAN_STOP_TIME;
data->REG_FAN_TIME[1] = NCT6106_REG_FAN_STEP_UP_TIME;
data->REG_FAN_TIME[2] = NCT6106_REG_FAN_STEP_DOWN_TIME;
+ data->REG_TOLERANCE_H = NCT6106_REG_TOLERANCE_H;
data->REG_PWM[0] = NCT6106_REG_PWM;
data->REG_PWM[1] = NCT6106_REG_FAN_START_OUTPUT;
data->REG_PWM[2] = NCT6106_REG_FAN_STOP_OUTPUT;
static u64 occ_get_powr_avg(u64 *accum, u32 *samples)
{
- return div64_u64(get_unaligned_be64(accum) * 1000000ULL,
- get_unaligned_be32(samples));
+ u64 divisor = get_unaligned_be32(samples);
+
+ return (divisor == 0) ? 0 :
+ div64_u64(get_unaligned_be64(accum) * 1000000ULL, divisor);
}
static ssize_t occ_show_power_2(struct device *dev,
/*
* We need gpu_i2c_suspend() even if it is stub, for runtime pm to work
* correctly. Without it, lspci shows runtime pm status as "D0" for the card.
- * Documentation/power/pci.txt also insists for driver to provide this.
+ * Documentation/power/pci.rst also insists for driver to provide this.
*/
static __maybe_unused int gpu_i2c_suspend(struct device *dev)
{
#include <linux/mem_encrypt.h>
#include <asm/pci-direct.h>
#include <asm/iommu.h>
+#include <asm/apic.h>
+#include <asm/msidef.h>
#include <asm/gart.h>
#include <asm/x86_init.h>
#include <asm/iommu_table.h>
return 0;
}
+#define XT_INT_DEST_MODE(x) (((x) & 0x1ULL) << 2)
+#define XT_INT_DEST_LO(x) (((x) & 0xFFFFFFULL) << 8)
+#define XT_INT_VEC(x) (((x) & 0xFFULL) << 32)
+#define XT_INT_DEST_HI(x) ((((x) >> 24) & 0xFFULL) << 56)
+
+/**
+ * Setup the IntCapXT registers with interrupt routing information
+ * based on the PCI MSI capability block registers, accessed via
+ * MMIO MSI address low/hi and MSI data registers.
+ */
+static void iommu_update_intcapxt(struct amd_iommu *iommu)
+{
+ u64 val;
+ u32 addr_lo = readl(iommu->mmio_base + MMIO_MSI_ADDR_LO_OFFSET);
+ u32 addr_hi = readl(iommu->mmio_base + MMIO_MSI_ADDR_HI_OFFSET);
+ u32 data = readl(iommu->mmio_base + MMIO_MSI_DATA_OFFSET);
+ bool dm = (addr_lo >> MSI_ADDR_DEST_MODE_SHIFT) & 0x1;
+ u32 dest = ((addr_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xFF);
+
+ if (x2apic_enabled())
+ dest |= MSI_ADDR_EXT_DEST_ID(addr_hi);
+
+ val = XT_INT_VEC(data & 0xFF) |
+ XT_INT_DEST_MODE(dm) |
+ XT_INT_DEST_LO(dest) |
+ XT_INT_DEST_HI(dest);
+
+ /**
+ * Current IOMMU implemtation uses the same IRQ for all
+ * 3 IOMMU interrupts.
+ */
+ writeq(val, iommu->mmio_base + MMIO_INTCAPXT_EVT_OFFSET);
+ writeq(val, iommu->mmio_base + MMIO_INTCAPXT_PPR_OFFSET);
+ writeq(val, iommu->mmio_base + MMIO_INTCAPXT_GALOG_OFFSET);
+}
+
+static void _irq_notifier_notify(struct irq_affinity_notify *notify,
+ const cpumask_t *mask)
+{
+ struct amd_iommu *iommu;
+
+ for_each_iommu(iommu) {
+ if (iommu->dev->irq == notify->irq) {
+ iommu_update_intcapxt(iommu);
+ break;
+ }
+ }
+}
+
+static void _irq_notifier_release(struct kref *ref)
+{
+}
+
+static int iommu_init_intcapxt(struct amd_iommu *iommu)
+{
+ int ret;
+ struct irq_affinity_notify *notify = &iommu->intcapxt_notify;
+
+ /**
+ * IntCapXT requires XTSup=1, which can be inferred
+ * amd_iommu_xt_mode.
+ */
+ if (amd_iommu_xt_mode != IRQ_REMAP_X2APIC_MODE)
+ return 0;
+
+ /**
+ * Also, we need to setup notifier to update the IntCapXT registers
+ * whenever the irq affinity is changed from user-space.
+ */
+ notify->irq = iommu->dev->irq;
+ notify->notify = _irq_notifier_notify,
+ notify->release = _irq_notifier_release,
+ ret = irq_set_affinity_notifier(iommu->dev->irq, notify);
+ if (ret) {
+ pr_err("Failed to register irq affinity notifier (devid=%#x, irq %d)\n",
+ iommu->devid, iommu->dev->irq);
+ return ret;
+ }
+
+ iommu_update_intcapxt(iommu);
+ iommu_feature_enable(iommu, CONTROL_INTCAPXT_EN);
+ return ret;
+}
+
static int iommu_init_msi(struct amd_iommu *iommu)
{
int ret;
return ret;
enable_faults:
+ ret = iommu_init_intcapxt(iommu);
+ if (ret)
+ return ret;
+
iommu_feature_enable(iommu, CONTROL_EVT_INT_EN);
if (iommu->ppr_log != NULL)
#define MMIO_PPR_LOG_OFFSET 0x0038
#define MMIO_GA_LOG_BASE_OFFSET 0x00e0
#define MMIO_GA_LOG_TAIL_OFFSET 0x00e8
+#define MMIO_MSI_ADDR_LO_OFFSET 0x015C
+#define MMIO_MSI_ADDR_HI_OFFSET 0x0160
+#define MMIO_MSI_DATA_OFFSET 0x0164
+#define MMIO_INTCAPXT_EVT_OFFSET 0x0170
+#define MMIO_INTCAPXT_PPR_OFFSET 0x0178
+#define MMIO_INTCAPXT_GALOG_OFFSET 0x0180
#define MMIO_CMD_HEAD_OFFSET 0x2000
#define MMIO_CMD_TAIL_OFFSET 0x2008
#define MMIO_EVT_HEAD_OFFSET 0x2010
#define CONTROL_GALOG_EN 0x1CULL
#define CONTROL_GAINT_EN 0x1DULL
#define CONTROL_XT_EN 0x32ULL
+#define CONTROL_INTCAPXT_EN 0x33ULL
#define CTRL_INV_TO_MASK (7 << CONTROL_INV_TIMEOUT)
#define CTRL_INV_TO_NONE 0
/* DebugFS Info */
struct dentry *debugfs;
#endif
+ /* IRQ notifier for IntCapXT interrupt */
+ struct irq_affinity_notify intcapxt_notify;
};
static inline struct amd_iommu *dev_to_amd_iommu(struct device *dev)
(u64)0, (u64)0, (u64)0);
else
seq_printf(m, "%-6d\t0x%016llx:0x%016llx:0x%016llx\n",
- tbl_wlk->pasid, tbl_wlk->pasid_tbl_entry->val[0],
+ tbl_wlk->pasid, tbl_wlk->pasid_tbl_entry->val[2],
tbl_wlk->pasid_tbl_entry->val[1],
- tbl_wlk->pasid_tbl_entry->val[2]);
+ tbl_wlk->pasid_tbl_entry->val[0]);
}
static void pasid_tbl_walk(struct seq_file *m, struct pasid_entry *tbl_entry,
static void domain_remove_dev_info(struct dmar_domain *domain);
static void dmar_remove_one_dev_info(struct device *dev);
static void __dmar_remove_one_dev_info(struct device_domain_info *info);
-static void domain_context_clear(struct intel_iommu *iommu,
- struct device *dev);
static int domain_detach_iommu(struct dmar_domain *domain,
struct intel_iommu *iommu);
static bool device_is_rmrr_locked(struct device *dev);
return agaw;
}
+static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu,
+ int guest_width)
+{
+ int adjust_width, agaw;
+ unsigned long sagaw;
+ int err;
+
+ init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
+
+ err = init_iova_flush_queue(&domain->iovad,
+ iommu_flush_iova, iova_entry_free);
+ if (err)
+ return err;
+
+ domain_reserve_special_ranges(domain);
+
+ /* calculate AGAW */
+ if (guest_width > cap_mgaw(iommu->cap))
+ guest_width = cap_mgaw(iommu->cap);
+ domain->gaw = guest_width;
+ adjust_width = guestwidth_to_adjustwidth(guest_width);
+ agaw = width_to_agaw(adjust_width);
+ sagaw = cap_sagaw(iommu->cap);
+ if (!test_bit(agaw, &sagaw)) {
+ /* hardware doesn't support it, choose a bigger one */
+ pr_debug("Hardware doesn't support agaw %d\n", agaw);
+ agaw = find_next_bit(&sagaw, 5, agaw);
+ if (agaw >= 5)
+ return -ENODEV;
+ }
+ domain->agaw = agaw;
+
+ if (ecap_coherent(iommu->ecap))
+ domain->iommu_coherency = 1;
+ else
+ domain->iommu_coherency = 0;
+
+ if (ecap_sc_support(iommu->ecap))
+ domain->iommu_snooping = 1;
+ else
+ domain->iommu_snooping = 0;
+
+ if (intel_iommu_superpage)
+ domain->iommu_superpage = fls(cap_super_page_val(iommu->cap));
+ else
+ domain->iommu_superpage = 0;
+
+ domain->nid = iommu->node;
+
+ /* always allocate the top pgd */
+ domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
+ if (!domain->pgd)
+ return -ENOMEM;
+ __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
+ return 0;
+}
+
static void domain_exit(struct dmar_domain *domain)
{
- struct page *freelist;
/* Remove associated devices and clear attached or cached domains */
domain_remove_dev_info(domain);
/* destroy iovas */
put_iova_domain(&domain->iovad);
- freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
+ if (domain->pgd) {
+ struct page *freelist;
- dma_free_pagelist(freelist);
+ freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
+ dma_free_pagelist(freelist);
+ }
free_domain_mem(domain);
}
return ret;
}
-struct domain_context_mapping_data {
- struct dmar_domain *domain;
- struct intel_iommu *iommu;
- struct pasid_table *table;
-};
-
-static int domain_context_mapping_cb(struct pci_dev *pdev,
- u16 alias, void *opaque)
-{
- struct domain_context_mapping_data *data = opaque;
-
- return domain_context_mapping_one(data->domain, data->iommu,
- data->table, PCI_BUS_NUM(alias),
- alias & 0xff);
-}
-
static int
domain_context_mapping(struct dmar_domain *domain, struct device *dev)
{
- struct domain_context_mapping_data data;
struct pasid_table *table;
struct intel_iommu *iommu;
u8 bus, devfn;
return -ENODEV;
table = intel_pasid_get_table(dev);
-
- if (!dev_is_pci(dev))
- return domain_context_mapping_one(domain, iommu, table,
- bus, devfn);
-
- data.domain = domain;
- data.iommu = iommu;
- data.table = table;
-
- return pci_for_each_dma_alias(to_pci_dev(dev),
- &domain_context_mapping_cb, &data);
+ return domain_context_mapping_one(domain, iommu, table, bus, devfn);
}
static int domain_context_mapped_cb(struct pci_dev *pdev,
return 0;
}
-static int domain_init(struct dmar_domain *domain, int guest_width)
-{
- int adjust_width;
-
- init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
- domain_reserve_special_ranges(domain);
-
- /* calculate AGAW */
- domain->gaw = guest_width;
- adjust_width = guestwidth_to_adjustwidth(guest_width);
- domain->agaw = width_to_agaw(adjust_width);
-
- domain->iommu_coherency = 0;
- domain->iommu_snooping = 0;
- domain->iommu_superpage = 0;
- domain->max_addr = 0;
-
- /* always allocate the top pgd */
- domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
- if (!domain->pgd)
- return -ENOMEM;
- domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
- return 0;
-}
-
static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw)
{
struct device_domain_info *info;
domain = alloc_domain(0);
if (!domain)
return NULL;
-
- if (domain_init(domain, gaw)) {
+ if (domain_init(domain, iommu, gaw)) {
domain_exit(domain);
return NULL;
}
- if (init_iova_flush_queue(&domain->iovad,
- iommu_flush_iova,
- iova_entry_free)) {
- pr_warn("iova flush queue initialization failed\n");
- intel_iommu_strict = 1;
- }
-
out:
return domain;
}
return iommu_domain_identity_map(domain, start, end);
}
+static int md_domain_init(struct dmar_domain *domain, int guest_width);
+
static int __init si_domain_init(int hw)
{
struct dmar_rmrr_unit *rmrr;
if (!si_domain)
return -EFAULT;
- if (domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+ if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
domain_exit(si_domain);
return -EFAULT;
}
freelist = domain_unmap(domain, start_pfn, last_pfn);
- if (intel_iommu_strict || (pdev && pdev->untrusted)) {
+ if (intel_iommu_strict || (pdev && pdev->untrusted) ||
+ !has_iova_flush_queue(&domain->iovad)) {
iommu_flush_iotlb_psi(iommu, domain, start_pfn,
nrpages, !freelist, 0);
/* free iova */
return ret;
}
-static int domain_context_clear_one_cb(struct pci_dev *pdev, u16 alias, void *opaque)
-{
- struct intel_iommu *iommu = opaque;
-
- domain_context_clear_one(iommu, PCI_BUS_NUM(alias), alias & 0xff);
- return 0;
-}
-
-/*
- * NB - intel-iommu lacks any sort of reference counting for the users of
- * dependent devices. If multiple endpoints have intersecting dependent
- * devices, unbinding the driver from any one of them will possibly leave
- * the others unable to operate.
- */
-static void domain_context_clear(struct intel_iommu *iommu, struct device *dev)
-{
- if (!iommu || !dev || !dev_is_pci(dev))
- return;
-
- pci_for_each_dma_alias(to_pci_dev(dev), &domain_context_clear_one_cb, iommu);
-}
-
static void __dmar_remove_one_dev_info(struct device_domain_info *info)
{
struct dmar_domain *domain;
PASID_RID2PASID);
iommu_disable_dev_iotlb(info);
- domain_context_clear(iommu, info->dev);
+ domain_context_clear_one(iommu, info->bus, info->devfn);
intel_pasid_free_table(info->dev);
}
spin_unlock_irqrestore(&device_domain_lock, flags);
}
+static int md_domain_init(struct dmar_domain *domain, int guest_width)
+{
+ int adjust_width;
+
+ init_iova_domain(&domain->iovad, VTD_PAGE_SIZE, IOVA_START_PFN);
+ domain_reserve_special_ranges(domain);
+
+ /* calculate AGAW */
+ domain->gaw = guest_width;
+ adjust_width = guestwidth_to_adjustwidth(guest_width);
+ domain->agaw = width_to_agaw(adjust_width);
+
+ domain->iommu_coherency = 0;
+ domain->iommu_snooping = 0;
+ domain->iommu_superpage = 0;
+ domain->max_addr = 0;
+
+ /* always allocate the top pgd */
+ domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
+ if (!domain->pgd)
+ return -ENOMEM;
+ domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
+ return 0;
+}
+
static struct iommu_domain *intel_iommu_domain_alloc(unsigned type)
{
struct dmar_domain *dmar_domain;
pr_err("Can't allocate dmar_domain\n");
return NULL;
}
- if (domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+ if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
pr_err("Domain initialization failed\n");
domain_exit(dmar_domain);
return NULL;
}
EXPORT_SYMBOL_GPL(init_iova_domain);
+bool has_iova_flush_queue(struct iova_domain *iovad)
+{
+ return !!iovad->fq;
+}
+
static void free_iova_flush_queue(struct iova_domain *iovad)
{
- if (!iovad->fq)
+ if (!has_iova_flush_queue(iovad))
return;
if (timer_pending(&iovad->fq_timer))
int init_iova_flush_queue(struct iova_domain *iovad,
iova_flush_cb flush_cb, iova_entry_dtor entry_dtor)
{
+ struct iova_fq __percpu *queue;
int cpu;
atomic64_set(&iovad->fq_flush_start_cnt, 0);
atomic64_set(&iovad->fq_flush_finish_cnt, 0);
- iovad->fq = alloc_percpu(struct iova_fq);
- if (!iovad->fq)
+ queue = alloc_percpu(struct iova_fq);
+ if (!queue)
return -ENOMEM;
iovad->flush_cb = flush_cb;
for_each_possible_cpu(cpu) {
struct iova_fq *fq;
- fq = per_cpu_ptr(iovad->fq, cpu);
+ fq = per_cpu_ptr(queue, cpu);
fq->head = 0;
fq->tail = 0;
spin_lock_init(&fq->lock);
}
+ smp_wmb();
+
+ iovad->fq = queue;
+
timer_setup(&iovad->fq_timer, fq_flush_timeout, 0);
atomic_set(&iovad->fq_timer_on, 0);
struct iova *cached_iova;
cached_iova = rb_entry(iovad->cached32_node, struct iova, node);
- if (free->pfn_hi < iovad->dma_32bit_pfn &&
- free->pfn_lo >= cached_iova->pfn_lo) {
+ if (free == cached_iova ||
+ (free->pfn_hi < iovad->dma_32bit_pfn &&
+ free->pfn_lo >= cached_iova->pfn_lo)) {
iovad->cached32_node = rb_next(&free->node);
iovad->max32_alloc_size = iovad->dma_32bit_pfn;
}
if (dc->io_disable) {
pr_err("I/O disabled on cached dev %s",
dc->backing_dev_name);
+ kfree(env[1]);
+ kfree(env[2]);
+ kfree(buf);
return -EIO;
}
static inline int check_pad(struct v4l2_subdev *sd, __u32 pad)
{
#if defined(CONFIG_MEDIA_CONTROLLER)
- if (sd->entity.graph_obj.mdev) {
+ if (sd->entity.num_pads) {
if (pad >= sd->entity.num_pads)
return -EINVAL;
return 0;
tristate "I2C EEPROMs / RAMs / ROMs from most vendors"
depends on I2C && SYSFS
select NVMEM
+ select NVMEM_SYSFS
select REGMAP_I2C
help
Enable this driver to get read/write support to most I2C EEPROMs
tristate "SPI EEPROMs from most vendors"
depends on SPI && SYSFS
select NVMEM
+ select NVMEM_SYSFS
help
Enable this driver to get read/write support to most SPI EEPROMs,
after you configure the board init code to know about each eeprom
depends on SPI && SYSFS
select REGMAP
select NVMEM
+ select NVMEM_SYSFS
help
Driver for the microwire EEPROM chipsets 93xx46x. The driver
supports both read and write commands and also the command to
goto free_dma_pool;
}
- dev_dbg(hdev->dev, "cpu accessible memory at bus address 0x%llx\n",
- hdev->cpu_accessible_dma_address);
+ dev_dbg(hdev->dev, "cpu accessible memory at bus address %pad\n",
+ &hdev->cpu_accessible_dma_address);
hdev->cpu_accessible_dma_pool = gen_pool_create(ilog2(32), -1);
if (!hdev->cpu_accessible_dma_pool) {
case GOYA_ASYNC_EVENT_ID_AXI_ECC:
case GOYA_ASYNC_EVENT_ID_L2_RAM_ECC:
case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_05_SW_RESET:
- case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
goya_print_irq_info(hdev, event_type, false);
hl_device_reset(hdev, true, false);
break;
goya_unmask_irq(hdev, event_type);
break;
+ case GOYA_ASYNC_EVENT_ID_PSOC_GPIO_10_VRHOT_ICRIT:
case GOYA_ASYNC_EVENT_ID_TPC0_BMON_SPMU:
case GOYA_ASYNC_EVENT_ID_TPC1_BMON_SPMU:
case GOYA_ASYNC_EVENT_ID_TPC2_BMON_SPMU:
#define MEI_DEV_ID_ICP_LP 0x34E0 /* Ice Lake Point LP */
+#define MEI_DEV_ID_MCC 0x4B70 /* Mule Creek Canyon (EHL) */
+#define MEI_DEV_ID_MCC_4 0x4B75 /* Mule Creek Canyon 4 (EHL) */
+
/*
* MEI HW Section
*/
{MEI_PCI_DEVICE(MEI_DEV_ID_ICP_LP, MEI_ME_PCH12_CFG)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_MCC, MEI_ME_PCH12_CFG)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_MCC_4, MEI_ME_PCH8_CFG)},
+
/* required last entry */
{0, }
};
switch (density) {
case ONENAND_DEVICE_DENSITY_8Gb:
this->options |= ONENAND_HAS_NOP_1;
+ /* fall through */
case ONENAND_DEVICE_DENSITY_4Gb:
if (ONENAND_IS_DDP(this))
this->options |= ONENAND_HAS_2PLANE;
CAN_ERR_CRTL_TX_WARNING :
CAN_ERR_CRTL_RX_WARNING;
}
- case CAN_STATE_ERROR_WARNING: /* fallthrough */
+ /* fall through */
+ case CAN_STATE_ERROR_WARNING:
/*
* from: ERROR_ACTIVE, ERROR_WARNING
* to : ERROR_PASSIVE, BUS_OFF
netdev_dbg(dev, "Error Active\n");
cf->can_id |= CAN_ERR_PROT;
cf->data[2] = CAN_ERR_PROT_ACTIVE;
- case CAN_STATE_ERROR_WARNING: /* fallthrough */
+ /* fall through */
+ case CAN_STATE_ERROR_WARNING:
reg_idr = AT91_IRQ_ERRA | AT91_IRQ_WARN | AT91_IRQ_BOFF;
reg_ier = AT91_IRQ_ERRP;
break;
pciefd_can_writereg(priv, CANFD_CLK_SEL_80MHZ,
PCIEFD_REG_CAN_CLK_SEL);
- /* fallthough */
+ /* fall through */
case CANFD_CLK_SEL_80MHZ:
priv->ucan.can.clock.freq = 80 * 1000 * 1000;
break;
if (new_state >= CAN_STATE_ERROR_WARNING &&
new_state <= CAN_STATE_BUS_OFF)
priv->can.can_stats.error_warning++;
- case CAN_STATE_ERROR_WARNING: /* fallthrough */
+ /* fall through */
+ case CAN_STATE_ERROR_WARNING:
if (new_state >= CAN_STATE_ERROR_PASSIVE &&
new_state <= CAN_STATE_BUS_OFF)
priv->can.can_stats.error_passive++;
new_state = CAN_STATE_ERROR_WARNING;
break;
}
- /* else: fall through */
+ /* fall through */
case CAN_STATE_ERROR_WARNING:
if (n & PCAN_USB_ERROR_BUS_HEAVY) {
hw_cons = le16_to_cpu(*txdata->tx_cons_sb);
sw_cons = txdata->tx_pkt_cons;
+ /* Ensure subsequent loads occur after hw_cons */
+ smp_rmb();
+
while (sw_cons != hw_cons) {
u16 pkt_cons;
return cphy_cause_link_change;
}
-static void my3216_poll(struct work_struct *work)
+static void my3126_poll(struct work_struct *work)
{
struct cphy *cphy = container_of(work, struct cphy, phy_update.work);
return NULL;
cphy_init(cphy, dev, phy_addr, &my3126_ops, mdio_ops);
- INIT_DELAYED_WORK(&cphy->phy_update, my3216_poll);
+ INIT_DELAYED_WORK(&cphy->phy_update, my3126_poll);
cphy->bmsr = 0;
return cphy;
char name[IFNAMSIZ];
u32 devcap2;
u16 flags;
- int pos;
/* If we want to instantiate Virtual Functions, then our
* parent bridge's PCI-E needs to support Alternative Routing
* and above.
*/
pbridge = pdev->bus->self;
- pos = pci_find_capability(pbridge, PCI_CAP_ID_EXP);
- pci_read_config_word(pbridge, pos + PCI_EXP_FLAGS, &flags);
- pci_read_config_dword(pbridge, pos + PCI_EXP_DEVCAP2, &devcap2);
+ pcie_capability_read_word(pbridge, PCI_EXP_FLAGS, &flags);
+ pcie_capability_read_dword(pbridge, PCI_EXP_DEVCAP2, &devcap2);
if ((flags & PCI_EXP_FLAGS_VERS) < 2 ||
!(devcap2 & PCI_EXP_DEVCAP2_ARI)) {
static struct ch_tc_flower_entry *allocate_flower_entry(void)
{
struct ch_tc_flower_entry *new = kzalloc(sizeof(*new), GFP_KERNEL);
- spin_lock_init(&new->lock);
+ if (new)
+ spin_lock_init(&new->lock);
return new;
}
} else {
unsigned int pack_align;
unsigned int ingpad, ingpack;
- unsigned int pcie_cap;
/* T5 introduced the separation of the Free List Padding and
* Packing Boundaries. Thus, we can select a smaller Padding
* multiple of the Maximum Payload Size.
*/
pack_align = fl_align;
- pcie_cap = pci_find_capability(adap->pdev, PCI_CAP_ID_EXP);
- if (pcie_cap) {
+ if (pci_is_pcie(adap->pdev)) {
unsigned int mps, mps_log;
u16 devctl;
* [bits 7:5] encodes sizes as powers of 2 starting at
* 128 bytes.
*/
- pci_read_config_word(adap->pdev,
- pcie_cap + PCI_EXP_DEVCTL,
- &devctl);
+ pcie_capability_read_word(adap->pdev, PCI_EXP_DEVCTL,
+ &devctl);
mps_log = ((devctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5) + 7;
mps = 1 << mps_log;
if (mps > pack_align)
int status;
if (netif_running(netdev)) {
+ /* be_tx_timeout() must not run concurrently with this
+ * function, synchronize with an already-running dev_watchdog
+ */
+ netif_tx_lock_bh(netdev);
/* device cannot transmit now, avoid dev_watchdog timeouts */
netif_carrier_off(netdev);
+ netif_tx_unlock_bh(netdev);
be_close(netdev);
}
HCLGE_MBX_GET_QID_IN_PF, /* (VF -> PF) get queue id in pf */
HCLGE_MBX_LINK_STAT_MODE, /* (PF -> VF) link mode has changed */
HCLGE_MBX_GET_LINK_MODE, /* (VF -> PF) get the link mode of pf */
- HLCGE_MBX_PUSH_VLAN_INFO, /* (PF -> VF) push port base vlan */
+ HCLGE_MBX_PUSH_VLAN_INFO, /* (PF -> VF) push port base vlan */
HCLGE_MBX_GET_MEDIA_TYPE, /* (VF -> PF) get media type */
HCLGE_MBX_GET_VF_FLR_STATUS = 200, /* (M7 -> PF) get vf reset status */
memcpy(&msg_data[6], &vlan_tag, sizeof(u16));
return hclge_send_mbx_msg(vport, msg_data, sizeof(msg_data),
- HLCGE_MBX_PUSH_VLAN_INFO, vfid);
+ HCLGE_MBX_PUSH_VLAN_INFO, vfid);
}
static int hclge_set_vf_vlan_cfg(struct hclge_vport *vport,
case HCLGE_MBX_LINK_STAT_CHANGE:
case HCLGE_MBX_ASSERTING_RESET:
case HCLGE_MBX_LINK_STAT_MODE:
- case HLCGE_MBX_PUSH_VLAN_INFO:
+ case HCLGE_MBX_PUSH_VLAN_INFO:
/* set this mbx event as pending. This is required as we
* might loose interrupt event when mbx task is busy
* handling. This shall be cleared when mbx task just
hclgevf_reset_task_schedule(hdev);
break;
- case HLCGE_MBX_PUSH_VLAN_INFO:
+ case HCLGE_MBX_PUSH_VLAN_INFO:
state = le16_to_cpu(msg_q[1]);
vlan_info = &msg_q[1];
hclgevf_update_port_base_vlan_info(hdev, state,
s32 igc_read_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
{
struct igc_adapter *adapter = hw->back;
- u16 cap_offset;
- cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
- if (!cap_offset)
+ if (!pci_is_pcie(adapter->pdev))
return -IGC_ERR_CONFIG;
- pci_read_config_word(adapter->pdev, cap_offset + reg, value);
+ pcie_capability_read_word(adapter->pdev, reg, value);
return IGC_SUCCESS;
}
s32 igc_write_pcie_cap_reg(struct igc_hw *hw, u32 reg, u16 *value)
{
struct igc_adapter *adapter = hw->back;
- u16 cap_offset;
- cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
- if (!cap_offset)
+ if (!pci_is_pcie(adapter->pdev))
return -IGC_ERR_CONFIG;
- pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
+ pcie_capability_write_word(adapter->pdev, reg, *value);
return IGC_SUCCESS;
}
list_add(&indr_priv->list,
&rpriv->uplink_priv.tc_indr_block_priv_list);
- block_cb = flow_block_cb_alloc(f->net,
- mlx5e_rep_indr_setup_block_cb,
+ block_cb = flow_block_cb_alloc(mlx5e_rep_indr_setup_block_cb,
indr_priv, indr_priv,
mlx5e_rep_indr_tc_block_unbind);
if (IS_ERR(block_cb)) {
if (!indr_priv)
return -ENOENT;
- block_cb = flow_block_cb_lookup(f,
+ block_cb = flow_block_cb_lookup(f->block,
mlx5e_rep_indr_setup_block_cb,
indr_priv);
if (!block_cb)
bool register_block = false;
int err;
- block_cb = flow_block_cb_lookup(f, mlxsw_sp_setup_tc_block_cb_flower,
+ block_cb = flow_block_cb_lookup(f->block,
+ mlxsw_sp_setup_tc_block_cb_flower,
mlxsw_sp);
if (!block_cb) {
acl_block = mlxsw_sp_acl_block_create(mlxsw_sp, f->net);
if (!acl_block)
return -ENOMEM;
- block_cb = flow_block_cb_alloc(f->net,
- mlxsw_sp_setup_tc_block_cb_flower,
+ block_cb = flow_block_cb_alloc(mlxsw_sp_setup_tc_block_cb_flower,
mlxsw_sp, acl_block,
mlxsw_sp_tc_block_flower_release);
if (IS_ERR(block_cb)) {
struct flow_block_cb *block_cb;
int err;
- block_cb = flow_block_cb_lookup(f, mlxsw_sp_setup_tc_block_cb_flower,
+ block_cb = flow_block_cb_lookup(f->block,
+ mlxsw_sp_setup_tc_block_cb_flower,
mlxsw_sp);
if (!block_cb)
return;
struct flow_block_offload *f)
{
struct flow_block_cb *block_cb;
- tc_setup_cb_t *cb;
+ flow_setup_cb_t *cb;
bool ingress;
int err;
&mlxsw_sp_block_cb_list))
return -EBUSY;
- block_cb = flow_block_cb_alloc(f->net, cb, mlxsw_sp_port,
+ block_cb = flow_block_cb_alloc(cb, mlxsw_sp_port,
mlxsw_sp_port, NULL);
if (IS_ERR(block_cb))
return PTR_ERR(block_cb);
case FLOW_BLOCK_UNBIND:
mlxsw_sp_setup_tc_block_flower_unbind(mlxsw_sp_port,
f, ingress);
- block_cb = flow_block_cb_lookup(f, cb, mlxsw_sp_port);
+ block_cb = flow_block_cb_lookup(f->block, cb, mlxsw_sp_port);
if (!block_cb)
return -ENOENT;
if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
return -EOPNOTSUPP;
- block_cb = flow_block_cb_lookup(f, ocelot_setup_tc_block_cb_flower,
- port);
+ block_cb = flow_block_cb_lookup(f->block,
+ ocelot_setup_tc_block_cb_flower, port);
if (!block_cb) {
port_block = ocelot_port_block_create(port);
if (!port_block)
return -ENOMEM;
- block_cb = flow_block_cb_alloc(f->net,
- ocelot_setup_tc_block_cb_flower,
+ block_cb = flow_block_cb_alloc(ocelot_setup_tc_block_cb_flower,
port, port_block,
ocelot_tc_block_unbind);
if (IS_ERR(block_cb)) {
{
struct flow_block_cb *block_cb;
- block_cb = flow_block_cb_lookup(f, ocelot_setup_tc_block_cb_flower,
- port);
+ block_cb = flow_block_cb_lookup(f->block,
+ ocelot_setup_tc_block_cb_flower, port);
if (!block_cb)
return;
struct flow_block_offload *f)
{
struct flow_block_cb *block_cb;
- tc_setup_cb_t *cb;
+ flow_setup_cb_t *cb;
int err;
netdev_dbg(port->dev, "tc_block command %d, binder_type %d\n",
if (flow_block_cb_is_busy(cb, port, &ocelot_block_cb_list))
return -EBUSY;
- block_cb = flow_block_cb_alloc(f->net, cb, port, port, NULL);
+ block_cb = flow_block_cb_alloc(cb, port, port, NULL);
if (IS_ERR(block_cb))
return PTR_ERR(block_cb);
list_add_tail(&block_cb->driver_list, f->driver_block_list);
return 0;
case FLOW_BLOCK_UNBIND:
- block_cb = flow_block_cb_lookup(f, cb, port);
+ block_cb = flow_block_cb_lookup(f->block, cb, port);
if (!block_cb)
return -ENOENT;
&nfp_block_cb_list))
return -EBUSY;
- block_cb = flow_block_cb_alloc(f->net,
- nfp_flower_setup_tc_block_cb,
+ block_cb = flow_block_cb_alloc(nfp_flower_setup_tc_block_cb,
repr, repr, NULL);
if (IS_ERR(block_cb))
return PTR_ERR(block_cb);
list_add_tail(&block_cb->driver_list, &nfp_block_cb_list);
return 0;
case FLOW_BLOCK_UNBIND:
- block_cb = flow_block_cb_lookup(f, nfp_flower_setup_tc_block_cb,
+ block_cb = flow_block_cb_lookup(f->block,
+ nfp_flower_setup_tc_block_cb,
repr);
if (!block_cb)
return -ENOENT;
cb_priv->app = app;
list_add(&cb_priv->list, &priv->indr_block_cb_priv);
- block_cb = flow_block_cb_alloc(f->net,
- nfp_flower_setup_indr_block_cb,
+ block_cb = flow_block_cb_alloc(nfp_flower_setup_indr_block_cb,
cb_priv, cb_priv,
nfp_flower_setup_indr_tc_release);
if (IS_ERR(block_cb)) {
if (!cb_priv)
return -ENOENT;
- block_cb = flow_block_cb_lookup(f,
+ block_cb = flow_block_cb_lookup(f->block,
nfp_flower_setup_indr_block_cb,
cb_priv);
if (!block_cb)
SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_LOCAL_INV_FENCE, 1);
/* Check atomic operations support in PCI configuration space. */
- pci_read_config_dword(cdev->pdev,
- cdev->pdev->pcie_cap + PCI_EXP_DEVCTL2,
- &pci_status_control);
+ pcie_capability_read_dword(cdev->pdev, PCI_EXP_DEVCTL2,
+ &pci_status_control);
if (pci_status_control & PCI_EXP_DEVCTL2_LTR_EN)
SET_FIELD(dev->dev_caps, QED_RDMA_DEV_CAP_ATOMIC_OP, 1);
ret = phy_read_paged(tp->phydev, 0x0a46, 0x13);
if (ret & BIT(8))
- phy_modify_paged(tp->phydev, 0x0c41, 0x12, 0, BIT(1));
+ phy_modify_paged(tp->phydev, 0x0c41, 0x15, 0, BIT(1));
else
- phy_modify_paged(tp->phydev, 0x0c41, 0x12, BIT(1), 0);
+ phy_modify_paged(tp->phydev, 0x0c41, 0x15, BIT(1), 0);
/* Enable PHY auto speed down */
phy_modify_paged(tp->phydev, 0x0a44, 0x11, 0, BIT(3) | BIT(2));
if (unlikely(!skb)) {
++net_device_ctx->eth_stats.rx_no_memory;
- rcu_read_unlock();
return NVSP_STAT_FAIL;
}
static void sfp_hwmon_to_rx_power(long *value)
{
- *value = DIV_ROUND_CLOSEST(*value, 100);
+ *value = DIV_ROUND_CLOSEST(*value, 10);
}
static void sfp_hwmon_calibrate(struct sfp *sfp, unsigned int slope, int offset,
static netdev_tx_t vrf_process_v6_outbound(struct sk_buff *skb,
struct net_device *dev)
{
- const struct ipv6hdr *iph = ipv6_hdr(skb);
+ const struct ipv6hdr *iph;
struct net *net = dev_net(skb->dev);
- struct flowi6 fl6 = {
- /* needed to match OIF rule */
- .flowi6_oif = dev->ifindex,
- .flowi6_iif = LOOPBACK_IFINDEX,
- .daddr = iph->daddr,
- .saddr = iph->saddr,
- .flowlabel = ip6_flowinfo(iph),
- .flowi6_mark = skb->mark,
- .flowi6_proto = iph->nexthdr,
- .flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF,
- };
+ struct flowi6 fl6;
int ret = NET_XMIT_DROP;
struct dst_entry *dst;
struct dst_entry *dst_null = &net->ipv6.ip6_null_entry->dst;
+ if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct ipv6hdr)))
+ goto err;
+
+ iph = ipv6_hdr(skb);
+
+ memset(&fl6, 0, sizeof(fl6));
+ /* needed to match OIF rule */
+ fl6.flowi6_oif = dev->ifindex;
+ fl6.flowi6_iif = LOOPBACK_IFINDEX;
+ fl6.daddr = iph->daddr;
+ fl6.saddr = iph->saddr;
+ fl6.flowlabel = ip6_flowinfo(iph);
+ fl6.flowi6_mark = skb->mark;
+ fl6.flowi6_proto = iph->nexthdr;
+ fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF;
+
dst = ip6_route_output(net, NULL, &fl6);
if (dst == dst_null)
goto err;
static netdev_tx_t vrf_process_v4_outbound(struct sk_buff *skb,
struct net_device *vrf_dev)
{
- struct iphdr *ip4h = ip_hdr(skb);
+ struct iphdr *ip4h;
int ret = NET_XMIT_DROP;
- struct flowi4 fl4 = {
- /* needed to match OIF rule */
- .flowi4_oif = vrf_dev->ifindex,
- .flowi4_iif = LOOPBACK_IFINDEX,
- .flowi4_tos = RT_TOS(ip4h->tos),
- .flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF,
- .flowi4_proto = ip4h->protocol,
- .daddr = ip4h->daddr,
- .saddr = ip4h->saddr,
- };
+ struct flowi4 fl4;
struct net *net = dev_net(vrf_dev);
struct rtable *rt;
+ if (!pskb_may_pull(skb, ETH_HLEN + sizeof(struct iphdr)))
+ goto err;
+
+ ip4h = ip_hdr(skb);
+
+ memset(&fl4, 0, sizeof(fl4));
+ /* needed to match OIF rule */
+ fl4.flowi4_oif = vrf_dev->ifindex;
+ fl4.flowi4_iif = LOOPBACK_IFINDEX;
+ fl4.flowi4_tos = RT_TOS(ip4h->tos);
+ fl4.flowi4_flags = FLOWI_FLAG_ANYSRC | FLOWI_FLAG_SKIP_NH_OIF;
+ fl4.flowi4_proto = ip4h->protocol;
+ fl4.daddr = ip4h->daddr;
+ fl4.saddr = ip4h->saddr;
+
rt = ip_route_output_flow(net, &fl4, NULL);
if (IS_ERR(rt))
goto err;
.info.subcmd = QCA_NL80211_VENDOR_SUBCMD_DMG_RF_GET_SECTOR_CFG,
.flags = WIPHY_VENDOR_CMD_NEED_WDEV |
WIPHY_VENDOR_CMD_NEED_RUNNING,
+ .policy = wil_rf_sector_policy,
.doit = wil_rf_sector_get_cfg
},
{
.info.subcmd = QCA_NL80211_VENDOR_SUBCMD_DMG_RF_SET_SECTOR_CFG,
.flags = WIPHY_VENDOR_CMD_NEED_WDEV |
WIPHY_VENDOR_CMD_NEED_RUNNING,
+ .policy = wil_rf_sector_policy,
.doit = wil_rf_sector_set_cfg
},
{
QCA_NL80211_VENDOR_SUBCMD_DMG_RF_GET_SELECTED_SECTOR,
.flags = WIPHY_VENDOR_CMD_NEED_WDEV |
WIPHY_VENDOR_CMD_NEED_RUNNING,
+ .policy = wil_rf_sector_policy,
.doit = wil_rf_sector_get_selected
},
{
QCA_NL80211_VENDOR_SUBCMD_DMG_RF_SET_SELECTED_SECTOR,
.flags = WIPHY_VENDOR_CMD_NEED_WDEV |
WIPHY_VENDOR_CMD_NEED_RUNNING,
+ .policy = wil_rf_sector_policy,
.doit = wil_rf_sector_set_selected
},
};
},
.flags = WIPHY_VENDOR_CMD_NEED_WDEV |
WIPHY_VENDOR_CMD_NEED_NETDEV,
+ .policy = VENDOR_CMD_RAW_DATA,
.doit = brcmf_cfg80211_vndr_cmds_dcmd_handler
},
};
.flags = WIPHY_VENDOR_CMD_NEED_NETDEV |
WIPHY_VENDOR_CMD_NEED_RUNNING,
.doit = wlcore_vendor_cmd_smart_config_start,
+ .policy = wlcore_vendor_attr_policy,
},
{
.info = {
.flags = WIPHY_VENDOR_CMD_NEED_NETDEV |
WIPHY_VENDOR_CMD_NEED_RUNNING,
.doit = wlcore_vendor_cmd_smart_config_stop,
+ .policy = wlcore_vendor_attr_policy,
},
{
.info = {
.flags = WIPHY_VENDOR_CMD_NEED_NETDEV |
WIPHY_VENDOR_CMD_NEED_RUNNING,
.doit = wlcore_vendor_cmd_smart_config_set_group_key,
+ .policy = wlcore_vendor_attr_policy,
},
};
struct nd_btt *nd_btt = to_nd_btt(dev);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
rc = nd_size_select_store(dev, buf, &nd_btt->lbasize,
btt_lbasize_supported);
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
nvdimm_bus_unlock(dev);
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc ? rc : len;
}
struct nd_btt *nd_btt = to_nd_btt(dev);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len);
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc ? rc : len;
}
struct nd_btt *nd_btt = to_nd_btt(dev);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len);
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
nvdimm_bus_unlock(dev);
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc;
}
struct nd_btt *nd_btt = to_nd_btt(dev);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
if (dev->driver)
rc = sprintf(buf, "%llu\n", nd_btt->size);
else {
/* no size to convey if the btt instance is disabled */
rc = -ENXIO;
}
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc;
}
int nvdimm_major;
static int nvdimm_bus_major;
-static struct class *nd_class;
+struct class *nd_class;
static DEFINE_IDA(nd_ida);
static int to_nd_device_type(struct device *dev)
{
nvdimm_bus_lock(&nvdimm_bus->dev);
if (--nvdimm_bus->probe_active == 0)
- wake_up(&nvdimm_bus->probe_wait);
+ wake_up(&nvdimm_bus->wait);
nvdimm_bus_unlock(&nvdimm_bus->dev);
}
dev->driver->name, dev_name(dev));
nvdimm_bus_probe_start(nvdimm_bus);
+ debug_nvdimm_lock(dev);
rc = nd_drv->probe(dev);
+ debug_nvdimm_unlock(dev);
+
if (rc == 0)
nd_region_probe_success(nvdimm_bus, dev);
else
struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
int rc = 0;
- if (nd_drv->remove)
+ if (nd_drv->remove) {
+ debug_nvdimm_lock(dev);
rc = nd_drv->remove(dev);
+ debug_nvdimm_unlock(dev);
+ }
nd_region_disable(nvdimm_bus, dev);
dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name,
void nd_device_notify(struct device *dev, enum nvdimm_event event)
{
- device_lock(dev);
+ nd_device_lock(dev);
if (dev->driver) {
struct nd_device_driver *nd_drv;
if (nd_drv->notify)
nd_drv->notify(dev, event);
}
- device_unlock(dev);
+ nd_device_unlock(dev);
}
EXPORT_SYMBOL(nd_device_notify);
kfree(nvdimm_bus);
}
-static bool is_nvdimm_bus(struct device *dev)
+bool is_nvdimm_bus(struct device *dev)
{
return dev->release == nvdimm_bus_release;
}
return NULL;
INIT_LIST_HEAD(&nvdimm_bus->list);
INIT_LIST_HEAD(&nvdimm_bus->mapping_list);
- init_waitqueue_head(&nvdimm_bus->probe_wait);
+ init_waitqueue_head(&nvdimm_bus->wait);
nvdimm_bus->id = ida_simple_get(&nd_ida, 0, 0, GFP_KERNEL);
if (nvdimm_bus->id < 0) {
kfree(nvdimm_bus);
list_del_init(&nvdimm_bus->list);
mutex_unlock(&nvdimm_bus_list_mutex);
+ wait_event(nvdimm_bus->wait,
+ atomic_read(&nvdimm_bus->ioctl_active) == 0);
+
nd_synchronize();
device_for_each_child(&nvdimm_bus->dev, NULL, child_unregister);
void nd_device_unregister(struct device *dev, enum nd_async_mode mode)
{
+ bool killed;
+
switch (mode) {
case ND_ASYNC:
+ /*
+ * In the async case this is being triggered with the
+ * device lock held and the unregistration work needs to
+ * be moved out of line iff this is thread has won the
+ * race to schedule the deletion.
+ */
+ if (!kill_device(dev))
+ return;
+
get_device(dev);
async_schedule_domain(nd_async_device_unregister, dev,
&nd_async_domain);
break;
case ND_SYNC:
+ /*
+ * In the sync case the device is being unregistered due
+ * to a state change of the parent. Claim the kill state
+ * to synchronize against other unregistration requests,
+ * or otherwise let the async path handle it if the
+ * unregistration was already queued.
+ */
+ nd_device_lock(dev);
+ killed = kill_device(dev);
+ nd_device_unlock(dev);
+
+ if (!killed)
+ return;
+
nd_synchronize();
device_unregister(dev);
break;
do {
if (nvdimm_bus->probe_active == 0)
break;
- nvdimm_bus_unlock(&nvdimm_bus->dev);
- wait_event(nvdimm_bus->probe_wait,
+ nvdimm_bus_unlock(dev);
+ nd_device_unlock(dev);
+ wait_event(nvdimm_bus->wait,
nvdimm_bus->probe_active == 0);
- nvdimm_bus_lock(&nvdimm_bus->dev);
+ nd_device_lock(dev);
+ nvdimm_bus_lock(dev);
} while (true);
}
int read_only, unsigned int ioctl_cmd, unsigned long arg)
{
struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
- static char out_env[ND_CMD_MAX_ENVELOPE];
- static char in_env[ND_CMD_MAX_ENVELOPE];
const struct nd_cmd_desc *desc = NULL;
unsigned int cmd = _IOC_NR(ioctl_cmd);
struct device *dev = &nvdimm_bus->dev;
void __user *p = (void __user *) arg;
+ char *out_env = NULL, *in_env = NULL;
const char *cmd_name, *dimm_name;
u32 in_len = 0, out_len = 0;
unsigned int func = cmd;
unsigned long cmd_mask;
struct nd_cmd_pkg pkg;
int rc, i, cmd_rc;
+ void *buf = NULL;
u64 buf_len = 0;
- void *buf;
if (nvdimm) {
desc = nd_cmd_dimm_desc(cmd);
case ND_CMD_ARS_START:
case ND_CMD_CLEAR_ERROR:
case ND_CMD_CALL:
- dev_dbg(&nvdimm_bus->dev, "'%s' command while read-only.\n",
+ dev_dbg(dev, "'%s' command while read-only.\n",
nvdimm ? nvdimm_cmd_name(cmd)
: nvdimm_bus_cmd_name(cmd));
return -EPERM;
}
/* process an input envelope */
+ in_env = kzalloc(ND_CMD_MAX_ENVELOPE, GFP_KERNEL);
+ if (!in_env)
+ return -ENOMEM;
for (i = 0; i < desc->in_num; i++) {
u32 in_size, copy;
if (in_size == UINT_MAX) {
dev_err(dev, "%s:%s unknown input size cmd: %s field: %d\n",
__func__, dimm_name, cmd_name, i);
- return -ENXIO;
+ rc = -ENXIO;
+ goto out;
}
- if (in_len < sizeof(in_env))
- copy = min_t(u32, sizeof(in_env) - in_len, in_size);
+ if (in_len < ND_CMD_MAX_ENVELOPE)
+ copy = min_t(u32, ND_CMD_MAX_ENVELOPE - in_len, in_size);
else
copy = 0;
- if (copy && copy_from_user(&in_env[in_len], p + in_len, copy))
- return -EFAULT;
+ if (copy && copy_from_user(&in_env[in_len], p + in_len, copy)) {
+ rc = -EFAULT;
+ goto out;
+ }
in_len += in_size;
}
}
/* process an output envelope */
+ out_env = kzalloc(ND_CMD_MAX_ENVELOPE, GFP_KERNEL);
+ if (!out_env) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
for (i = 0; i < desc->out_num; i++) {
u32 out_size = nd_cmd_out_size(nvdimm, cmd, desc, i,
(u32 *) in_env, (u32 *) out_env, 0);
if (out_size == UINT_MAX) {
dev_dbg(dev, "%s unknown output size cmd: %s field: %d\n",
dimm_name, cmd_name, i);
- return -EFAULT;
+ rc = -EFAULT;
+ goto out;
}
- if (out_len < sizeof(out_env))
- copy = min_t(u32, sizeof(out_env) - out_len, out_size);
+ if (out_len < ND_CMD_MAX_ENVELOPE)
+ copy = min_t(u32, ND_CMD_MAX_ENVELOPE - out_len, out_size);
else
copy = 0;
if (copy && copy_from_user(&out_env[out_len],
- p + in_len + out_len, copy))
- return -EFAULT;
+ p + in_len + out_len, copy)) {
+ rc = -EFAULT;
+ goto out;
+ }
out_len += out_size;
}
if (buf_len > ND_IOCTL_MAX_BUFLEN) {
dev_dbg(dev, "%s cmd: %s buf_len: %llu > %d\n", dimm_name,
cmd_name, buf_len, ND_IOCTL_MAX_BUFLEN);
- return -EINVAL;
+ rc = -EINVAL;
+ goto out;
}
buf = vmalloc(buf_len);
- if (!buf)
- return -ENOMEM;
+ if (!buf) {
+ rc = -ENOMEM;
+ goto out;
+ }
if (copy_from_user(buf, p, buf_len)) {
rc = -EFAULT;
goto out;
}
- nvdimm_bus_lock(&nvdimm_bus->dev);
+ nd_device_lock(dev);
+ nvdimm_bus_lock(dev);
rc = nd_cmd_clear_to_send(nvdimm_bus, nvdimm, func, buf);
if (rc)
goto out_unlock;
nvdimm_account_cleared_poison(nvdimm_bus, clear_err->address,
clear_err->cleared);
}
- nvdimm_bus_unlock(&nvdimm_bus->dev);
if (copy_to_user(p, buf, buf_len))
rc = -EFAULT;
- vfree(buf);
- return rc;
-
- out_unlock:
- nvdimm_bus_unlock(&nvdimm_bus->dev);
- out:
+out_unlock:
+ nvdimm_bus_unlock(dev);
+ nd_device_unlock(dev);
+out:
+ kfree(in_env);
+ kfree(out_env);
vfree(buf);
return rc;
}
-static long nd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
-{
- long id = (long) file->private_data;
- int rc = -ENXIO, ro;
- struct nvdimm_bus *nvdimm_bus;
-
- ro = ((file->f_flags & O_ACCMODE) == O_RDONLY);
- mutex_lock(&nvdimm_bus_list_mutex);
- list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) {
- if (nvdimm_bus->id == id) {
- rc = __nd_ioctl(nvdimm_bus, NULL, ro, cmd, arg);
- break;
- }
- }
- mutex_unlock(&nvdimm_bus_list_mutex);
-
- return rc;
-}
+enum nd_ioctl_mode {
+ BUS_IOCTL,
+ DIMM_IOCTL,
+};
static int match_dimm(struct device *dev, void *data)
{
return 0;
}
-static long nvdimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+static long nd_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
+ enum nd_ioctl_mode mode)
+
{
- int rc = -ENXIO, ro;
- struct nvdimm_bus *nvdimm_bus;
+ struct nvdimm_bus *nvdimm_bus, *found = NULL;
+ long id = (long) file->private_data;
+ struct nvdimm *nvdimm = NULL;
+ int rc, ro;
ro = ((file->f_flags & O_ACCMODE) == O_RDONLY);
mutex_lock(&nvdimm_bus_list_mutex);
list_for_each_entry(nvdimm_bus, &nvdimm_bus_list, list) {
- struct device *dev = device_find_child(&nvdimm_bus->dev,
- file->private_data, match_dimm);
- struct nvdimm *nvdimm;
-
- if (!dev)
- continue;
+ if (mode == DIMM_IOCTL) {
+ struct device *dev;
+
+ dev = device_find_child(&nvdimm_bus->dev,
+ file->private_data, match_dimm);
+ if (!dev)
+ continue;
+ nvdimm = to_nvdimm(dev);
+ found = nvdimm_bus;
+ } else if (nvdimm_bus->id == id) {
+ found = nvdimm_bus;
+ }
- nvdimm = to_nvdimm(dev);
- rc = __nd_ioctl(nvdimm_bus, nvdimm, ro, cmd, arg);
- put_device(dev);
- break;
+ if (found) {
+ atomic_inc(&nvdimm_bus->ioctl_active);
+ break;
+ }
}
mutex_unlock(&nvdimm_bus_list_mutex);
+ if (!found)
+ return -ENXIO;
+
+ nvdimm_bus = found;
+ rc = __nd_ioctl(nvdimm_bus, nvdimm, ro, cmd, arg);
+
+ if (nvdimm)
+ put_device(&nvdimm->dev);
+ if (atomic_dec_and_test(&nvdimm_bus->ioctl_active))
+ wake_up(&nvdimm_bus->wait);
+
return rc;
}
+static long bus_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ return nd_ioctl(file, cmd, arg, BUS_IOCTL);
+}
+
+static long dimm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ return nd_ioctl(file, cmd, arg, DIMM_IOCTL);
+}
+
static int nd_open(struct inode *inode, struct file *file)
{
long minor = iminor(inode);
static const struct file_operations nvdimm_bus_fops = {
.owner = THIS_MODULE,
.open = nd_open,
- .unlocked_ioctl = nd_ioctl,
- .compat_ioctl = nd_ioctl,
+ .unlocked_ioctl = bus_ioctl,
+ .compat_ioctl = bus_ioctl,
.llseek = noop_llseek,
};
static const struct file_operations nvdimm_fops = {
.owner = THIS_MODULE,
.open = nd_open,
- .unlocked_ioctl = nvdimm_ioctl,
- .compat_ioctl = nvdimm_ioctl,
+ .unlocked_ioctl = dimm_ioctl,
+ .compat_ioctl = dimm_ioctl,
.llseek = noop_llseek,
};
*
* Enforce that uuids can only be changed while the device is disabled
* (driver detached)
- * LOCKING: expects device_lock() is held on entry
+ * LOCKING: expects nd_device_lock() is held on entry
*/
int nd_uuid_store(struct device *dev, u8 **uuid_out, const char *buf,
size_t len)
static int flush_namespaces(struct device *dev, void *data)
{
- device_lock(dev);
- device_unlock(dev);
+ nd_device_lock(dev);
+ nd_device_unlock(dev);
return 0;
}
static int flush_regions_dimms(struct device *dev, void *data)
{
- device_lock(dev);
- device_unlock(dev);
+ nd_device_lock(dev);
+ nd_device_unlock(dev);
device_for_each_child(dev, NULL, flush_namespaces);
return 0;
}
* done while probing is idle and the DIMM is not in active use
* in any region.
*/
- device_lock(dev);
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
rc = __security_store(dev, buf, len);
nvdimm_bus_unlock(dev);
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc;
}
struct nd_region *nd_region = to_nd_region(dev->parent);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
rc = __alt_name_store(dev, buf, len);
rc = nd_namespace_label_update(nd_region, dev);
dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc);
nvdimm_bus_unlock(dev);
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc < 0 ? rc : len;
}
if (rc)
return rc;
- device_lock(dev);
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
rc = __size_store(dev, val);
dev_dbg(dev, "%llx %s (%d)\n", val, rc < 0 ? "fail" : "success", rc);
nvdimm_bus_unlock(dev);
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc < 0 ? rc : len;
}
} else
return -ENXIO;
- device_lock(dev);
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
if (to_ndns(dev)->claim)
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
nvdimm_bus_unlock(dev);
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc < 0 ? rc : len;
}
} else
return -ENXIO;
- device_lock(dev);
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
if (to_ndns(dev)->claim)
rc = -EBUSY;
dev_dbg(dev, "result: %zd %s: %s%s", rc, rc < 0 ? "tried" : "wrote",
buf, buf[len - 1] == '\n' ? "" : "\n");
nvdimm_bus_unlock(dev);
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc ? rc : len;
}
struct nd_namespace_common *ndns = to_ndns(dev);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
rc = sprintf(buf, "%s\n", ndns->claim ? dev_name(ndns->claim) : "");
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc;
}
struct nd_region *nd_region = to_nd_region(dev->parent);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
rc = __holder_class_store(dev, buf);
rc = nd_namespace_label_update(nd_region, dev);
dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc);
nvdimm_bus_unlock(dev);
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc < 0 ? rc : len;
}
struct nd_namespace_common *ndns = to_ndns(dev);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
if (ndns->claim_class == NVDIMM_CCLASS_NONE)
rc = sprintf(buf, "\n");
else if ((ndns->claim_class == NVDIMM_CCLASS_BTT) ||
rc = sprintf(buf, "dax\n");
else
rc = sprintf(buf, "<unknown>\n");
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc;
}
char *mode;
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
claim = ndns->claim;
if (claim && is_nd_btt(claim))
mode = "safe";
else
mode = "raw";
rc = sprintf(buf, "%s\n", mode);
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc;
}
* Flush any in-progess probes / removals in the driver
* for the raw personality of this namespace.
*/
- device_lock(&ndns->dev);
- device_unlock(&ndns->dev);
+ nd_device_lock(&ndns->dev);
+ nd_device_unlock(&ndns->dev);
if (ndns->dev.driver) {
dev_dbg(&ndns->dev, "is active, can't bind %s\n",
dev_name(dev));
#include <linux/sizes.h>
#include <linux/mutex.h>
#include <linux/nd.h>
+#include "nd.h"
extern struct list_head nvdimm_bus_list;
extern struct mutex nvdimm_bus_list_mutex;
struct nvdimm_bus {
struct nvdimm_bus_descriptor *nd_desc;
- wait_queue_head_t probe_wait;
+ wait_queue_head_t wait;
struct list_head list;
struct device dev;
int id, probe_active;
+ atomic_t ioctl_active;
struct list_head mapping_list;
struct mutex reconfig_mutex;
struct badrange badrange;
struct nd_namespace_common **_ndns, const char *buf,
size_t len);
struct nd_pfn *to_nd_pfn_safe(struct device *dev);
+bool is_nvdimm_bus(struct device *dev);
+
+#ifdef CONFIG_PROVE_LOCKING
+extern struct class *nd_class;
+
+enum {
+ LOCK_BUS,
+ LOCK_NDCTL,
+ LOCK_REGION,
+ LOCK_DIMM = LOCK_REGION,
+ LOCK_NAMESPACE,
+ LOCK_CLAIM,
+};
+
+static inline void debug_nvdimm_lock(struct device *dev)
+{
+ if (is_nd_region(dev))
+ mutex_lock_nested(&dev->lockdep_mutex, LOCK_REGION);
+ else if (is_nvdimm(dev))
+ mutex_lock_nested(&dev->lockdep_mutex, LOCK_DIMM);
+ else if (is_nd_btt(dev) || is_nd_pfn(dev) || is_nd_dax(dev))
+ mutex_lock_nested(&dev->lockdep_mutex, LOCK_CLAIM);
+ else if (dev->parent && (is_nd_region(dev->parent)))
+ mutex_lock_nested(&dev->lockdep_mutex, LOCK_NAMESPACE);
+ else if (is_nvdimm_bus(dev))
+ mutex_lock_nested(&dev->lockdep_mutex, LOCK_BUS);
+ else if (dev->class && dev->class == nd_class)
+ mutex_lock_nested(&dev->lockdep_mutex, LOCK_NDCTL);
+ else
+ dev_WARN(dev, "unknown lock level\n");
+}
+
+static inline void debug_nvdimm_unlock(struct device *dev)
+{
+ mutex_unlock(&dev->lockdep_mutex);
+}
+
+static inline void nd_device_lock(struct device *dev)
+{
+ device_lock(dev);
+ debug_nvdimm_lock(dev);
+}
+
+static inline void nd_device_unlock(struct device *dev)
+{
+ debug_nvdimm_unlock(dev);
+ device_unlock(dev);
+}
+#else
+static inline void nd_device_lock(struct device *dev)
+{
+ device_lock(dev);
+}
+
+static inline void nd_device_unlock(struct device *dev)
+{
+ device_unlock(dev);
+}
+
+static inline void debug_nvdimm_lock(struct device *dev)
+{
+}
+
+static inline void debug_nvdimm_unlock(struct device *dev)
+{
+}
+#endif
#endif /* __ND_CORE_H__ */
struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
ssize_t rc = 0;
- device_lock(dev);
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
if (dev->driver)
rc = -EBUSY;
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
nvdimm_bus_unlock(dev);
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc ? rc : len;
}
struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
rc = nd_size_select_store(dev, buf, &nd_pfn->align,
nd_pfn_supported_alignments());
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
nvdimm_bus_unlock(dev);
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc ? rc : len;
}
struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len);
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc ? rc : len;
}
struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len);
dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
buf[len - 1] == '\n' ? "" : "\n");
nvdimm_bus_unlock(dev);
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc;
}
struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
if (dev->driver) {
struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
u64 offset = __le64_to_cpu(pfn_sb->dataoff);
/* no address to convey if the pfn instance is disabled */
rc = -ENXIO;
}
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc;
}
struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
if (dev->driver) {
struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
u64 offset = __le64_to_cpu(pfn_sb->dataoff);
/* no size to convey if the pfn instance is disabled */
rc = -ENXIO;
}
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc;
}
nvdimm_namespace_detach_btt(to_nd_btt(dev));
else {
/*
- * Note, this assumes device_lock() context to not race
- * nd_pmem_notify()
+ * Note, this assumes nd_device_lock() context to not
+ * race nd_pmem_notify()
*/
sysfs_put(pmem->bb_state);
pmem->bb_state = NULL;
if (rc)
return rc;
- rc = nd_region_register_namespaces(nd_region, &err);
- if (rc < 0)
- return rc;
-
- ndrd = dev_get_drvdata(dev);
- ndrd->ns_active = rc;
- ndrd->ns_count = rc + err;
-
- if (rc && err && rc == err)
- return -ENODEV;
-
if (is_nd_pmem(&nd_region->dev)) {
struct resource ndr_res;
nvdimm_badblocks_populate(nd_region, &nd_region->bb, &ndr_res);
}
+ rc = nd_region_register_namespaces(nd_region, &err);
+ if (rc < 0)
+ return rc;
+
+ ndrd = dev_get_drvdata(dev);
+ ndrd->ns_active = rc;
+ ndrd->ns_count = rc + err;
+
+ if (rc && err && rc == err)
+ return -ENODEV;
+
nd_region->btt_seed = nd_btt_create(nd_region);
nd_region->pfn_seed = nd_pfn_create(nd_region);
nd_region->dax_seed = nd_dax_create(nd_region);
nvdimm_bus_unlock(dev);
/*
- * Note, this assumes device_lock() context to not race
+ * Note, this assumes nd_device_lock() context to not race
* nd_region_notify()
*/
sysfs_put(nd_region->bb_state);
* the v1.1 namespace label cookie definition. To read all this
* data we need to wait for probing to settle.
*/
- device_lock(dev);
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
if (nd_region->ndr_mappings) {
}
}
nvdimm_bus_unlock(dev);
- device_unlock(dev);
+ nd_device_unlock(dev);
if (rc)
return rc;
* memory nvdimm_bus_lock() is dropped, but that's userspace's
* problem to not race itself.
*/
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
available = nd_region_available_dpa(nd_region);
nvdimm_bus_unlock(dev);
+ nd_device_unlock(dev);
return sprintf(buf, "%llu\n", available);
}
struct nd_region *nd_region = to_nd_region(dev);
unsigned long long available = 0;
+ nd_device_lock(dev);
nvdimm_bus_lock(dev);
wait_nvdimm_bus_probe_idle(dev);
available = nd_region_allocatable_dpa(nd_region);
nvdimm_bus_unlock(dev);
+ nd_device_unlock(dev);
return sprintf(buf, "%llu\n", available);
}
struct nd_region *nd_region = to_nd_region(dev);
ssize_t rc;
- device_lock(dev);
+ nd_device_lock(dev);
if (dev->driver)
rc = badblocks_show(&nd_region->bb, buf, 0);
else
rc = -ENXIO;
- device_unlock(dev);
+ nd_device_unlock(dev);
return rc;
}
memset(subsys->subnqn + off, 0, sizeof(subsys->subnqn) - off);
}
-static void __nvme_release_subsystem(struct nvme_subsystem *subsys)
+static void nvme_release_subsystem(struct device *dev)
{
+ struct nvme_subsystem *subsys =
+ container_of(dev, struct nvme_subsystem, dev);
+
ida_simple_remove(&nvme_subsystems_ida, subsys->instance);
kfree(subsys);
}
-static void nvme_release_subsystem(struct device *dev)
-{
- __nvme_release_subsystem(container_of(dev, struct nvme_subsystem, dev));
-}
-
static void nvme_destroy_subsystem(struct kref *ref)
{
struct nvme_subsystem *subsys =
mutex_lock(&nvme_subsystems_lock);
found = __nvme_find_get_subsystem(subsys->subnqn);
if (found) {
- __nvme_release_subsystem(subsys);
+ put_device(&subsys->dev);
subsys = found;
if (!nvme_validate_cntlid(subsys, ctrl, id)) {
MODULE_PARM_DESC(multipath,
"turn on native support for multiple controllers per subsystem");
-inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
-{
- return multipath && ctrl->subsys && (ctrl->subsys->cmic & (1 << 3));
-}
-
/*
* If multipathing is enabled we need to always use the subsystem instance
* number for numbering our devices to avoid conflicts between subsystems that
{
int error;
- if (!nvme_ctrl_use_ana(ctrl))
+ /* check if multipath is enabled and we have the capability */
+ if (!multipath || !ctrl->subsys || !(ctrl->subsys->cmic & (1 << 3)))
return 0;
ctrl->anacap = id->anacap;
extern const struct block_device_operations nvme_ns_head_ops;
#ifdef CONFIG_NVME_MULTIPATH
-bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl);
+static inline bool nvme_ctrl_use_ana(struct nvme_ctrl *ctrl)
+{
+ return ctrl->ana_log_buf != NULL;
+}
+
void nvme_set_disk_name(char *disk_name, struct nvme_ns *ns,
struct nvme_ctrl *ctrl, int *flags);
void nvme_failover_req(struct request *req);
if (!dev->ctrl.tagset) {
dev->tagset.ops = &nvme_mq_ops;
dev->tagset.nr_hw_queues = dev->online_queues - 1;
- dev->tagset.nr_maps = 1; /* default */
- if (dev->io_queues[HCTX_TYPE_READ])
- dev->tagset.nr_maps++;
+ dev->tagset.nr_maps = 2; /* default + read */
if (dev->io_queues[HCTX_TYPE_POLL])
dev->tagset.nr_maps++;
dev->tagset.timeout = NVME_IO_TIMEOUT;
.driver_data = NVME_QUIRK_LIGHTNVM, },
{ PCI_DEVICE(0x1d1d, 0x2601), /* CNEX Granby */
.driver_data = NVME_QUIRK_LIGHTNVM, },
+ { PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */
+ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2001) },
{ PCI_DEVICE(PCI_VENDOR_ID_APPLE, 0x2003) },
unregister_pm_notifier(&rapl_pm_notifier);
}
-module_init(rapl_init);
+fs_initcall(rapl_init);
module_exit(rapl_exit);
MODULE_DESCRIPTION("Intel Runtime Average Power Limit (RAPL) common code");
return class_register(&powercap_class);
}
-device_initcall(powercap_init);
+fs_initcall(powercap_init);
MODULE_DESCRIPTION("PowerCap sysfs Driver");
MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>");
int retries = 0, cc;
unsigned long laob = 0;
- WARN_ON_ONCE(aob && ((queue_type(q) != QDIO_IQDIO_QFMT) ||
- !q->u.out.use_cq));
- if (q->u.out.use_cq && aob != 0) {
+ if (aob) {
fc = QDIO_SIGA_WRITEQ;
laob = aob;
}
{
unsigned long phys_aob = 0;
- if (!q->use_cq)
- return 0;
-
if (!q->aobs[bufnr]) {
struct qaob *aob = qdio_allocate_aob();
q->aobs[bufnr] = aob;
for_each_output_queue(irq_ptr, q, i) {
if (use_cq) {
+ if (multicast_outbound(q))
+ continue;
if (qdio_enable_async_operation(&q->u.out) < 0) {
use_cq = 0;
continue;
/* One SIGA-W per buffer required for unicast HSI */
WARN_ON_ONCE(count > 1 && !multicast_outbound(q));
- phys_aob = qdio_aob_for_buffer(&q->u.out, bufnr);
+ if (q->u.out.use_cq)
+ phys_aob = qdio_aob_for_buffer(&q->u.out, bufnr);
rc = qdio_kick_outbound_q(q, phys_aob);
} else if (need_siga_sync(q)) {
rc = qdio_siga_sync_q(q);
+ } else if (count < QDIO_MAX_BUFFERS_PER_Q &&
+ get_buf_state(q, prev_buf(bufnr), &state, 0) > 0 &&
+ state == SLSB_CU_OUTPUT_PRIMED) {
+ /* The previous buffer is not processed yet, tack on. */
+ qperf_inc(q, fast_requeue);
} else {
- /* try to fast requeue buffers */
- get_buf_state(q, prev_buf(bufnr), &state, 0);
- if (state != SLSB_CU_OUTPUT_PRIMED)
- rc = qdio_kick_outbound_q(q, 0);
- else
- qperf_inc(q, fast_requeue);
+ rc = qdio_kick_outbound_q(q, 0);
}
/* in case of SIGA errors we must process the error immediately */
sizeof(*pa->pa_iova_pfn) +
sizeof(*pa->pa_pfn),
GFP_KERNEL);
- if (unlikely(!pa->pa_iova_pfn))
+ if (unlikely(!pa->pa_iova_pfn)) {
+ pa->pa_nr = 0;
return -ENOMEM;
+ }
pa->pa_pfn = pa->pa_iova_pfn + pa->pa_nr;
pa->pa_iova_pfn[0] = pa->pa_iova >> PAGE_SHIFT;
static int ccwchain_handle_ccw(u32 cda, struct channel_program *cp)
{
struct ccwchain *chain;
- int len;
+ int len, ret;
/* Copy 2K (the most we support today) of possible CCWs */
len = copy_from_iova(cp->mdev, cp->guest_cp, cda,
memcpy(chain->ch_ccw, cp->guest_cp, len * sizeof(struct ccw1));
/* Loop for tics on this new chain. */
- return ccwchain_loop_tic(chain, cp);
+ ret = ccwchain_loop_tic(chain, cp);
+
+ if (ret)
+ ccwchain_free(chain);
+
+ return ret;
}
/* Loop for TICs. */
/* Build a ccwchain for the first CCW segment */
ret = ccwchain_handle_ccw(orb->cmd.cpa, cp);
- if (ret)
- cp_free(cp);
-
- /* It is safe to force: if not set but idals used
- * ccwchain_calc_length returns an error.
- */
- cp->orb.cmd.c64 = 1;
- if (!ret)
+ if (!ret) {
cp->initialized = true;
+ /* It is safe to force: if it was not set but idals used
+ * ccwchain_calc_length would have returned an error.
+ */
+ cp->orb.cmd.c64 = 1;
+ }
+
return ret;
}
(SCSW_ACTL_DEVACT | SCSW_ACTL_SCHACT));
if (scsw_is_solicited(&irb->scsw)) {
cp_update_scsw(&private->cp, &irb->scsw);
- if (is_final)
+ if (is_final && private->state == VFIO_CCW_STATE_CP_PENDING)
cp_free(&private->cp);
}
mutex_lock(&private->io_mutex);
struct airq_iv *aiv;
};
static struct airq_info *airq_areas[MAX_AIRQ_AREAS];
+static DEFINE_MUTEX(airq_areas_lock);
+
static u8 *summary_indicators;
static inline u8 *get_summary_indicator(struct airq_info *info)
unsigned long bit, flags;
for (i = 0; i < MAX_AIRQ_AREAS && !indicator_addr; i++) {
+ mutex_lock(&airq_areas_lock);
if (!airq_areas[i])
airq_areas[i] = new_airq_info(i);
info = airq_areas[i];
+ mutex_unlock(&airq_areas_lock);
if (!info)
return 0;
write_lock_irqsave(&info->lock, flags);
source "drivers/scsi/csiostor/Kconfig"
-endif # SCSI_LOWLEVEL
-
source "drivers/scsi/pcmcia/Kconfig"
+endif # SCSI_LOWLEVEL
+
source "drivers/scsi/device_handler/Kconfig"
endmenu
#define ALUA_FAILOVER_TIMEOUT 60
#define ALUA_FAILOVER_RETRIES 5
#define ALUA_RTPG_DELAY_MSECS 5
+#define ALUA_RTPG_RETRY_DELAY 2
/* device handler flags */
#define ALUA_OPTIMIZE_STPG 0x01
case SCSI_ACCESS_STATE_TRANSITIONING:
if (time_before(jiffies, pg->expiry)) {
/* State transition, retry */
- pg->interval = 2;
+ pg->interval = ALUA_RTPG_RETRY_DELAY;
err = SCSI_DH_RETRY;
} else {
struct alua_dh_data *h;
spin_lock_irqsave(&pg->lock, flags);
pg->flags &= ~ALUA_PG_RUNNING;
pg->flags |= ALUA_PG_RUN_RTPG;
+ if (!pg->interval)
+ pg->interval = ALUA_RTPG_RETRY_DELAY;
spin_unlock_irqrestore(&pg->lock, flags);
queue_delayed_work(kaluad_wq, &pg->rtpg_work,
pg->interval * HZ);
spin_lock_irqsave(&pg->lock, flags);
if (err == SCSI_DH_RETRY || pg->flags & ALUA_PG_RUN_RTPG) {
pg->flags &= ~ALUA_PG_RUNNING;
+ if (!pg->interval && !(pg->flags & ALUA_PG_RUN_RTPG))
+ pg->interval = ALUA_RTPG_RETRY_DELAY;
pg->flags |= ALUA_PG_RUN_RTPG;
spin_unlock_irqrestore(&pg->lock, flags);
queue_delayed_work(kaluad_wq, &pg->rtpg_work,
{
struct fcoe_fcf *fcf;
struct fcoe_fcf new;
- unsigned long sol_tov = msecs_to_jiffies(FCOE_CTRL_SOL_TOV);
+ unsigned long sol_tov = msecs_to_jiffies(FCOE_CTLR_SOL_TOV);
int first = 0;
int mtu_valid;
int found = 0;
hpsa_disable_interrupt_mode(h); /* pci_init 2 */
/*
* call pci_disable_device before pci_release_regions per
- * Documentation/PCI/pci.rst
+ * Documentation/driver-api/pci/pci.rst
*/
pci_disable_device(h->pdev); /* pci_init 1 */
pci_release_regions(h->pdev); /* pci_init 2 */
clean1:
/*
* call pci_disable_device before pci_release_regions per
- * Documentation/PCI/pci.rst
+ * Documentation/driver-api/pci/pci.rst
*/
pci_disable_device(h->pdev);
pci_release_regions(h->pdev);
spin_lock_irqsave(vhost->host->host_lock, flags);
ibmvfc_purge_requests(vhost, DID_ERROR);
- ibmvfc_free_event_pool(vhost);
spin_unlock_irqrestore(vhost->host->host_lock, flags);
+ ibmvfc_free_event_pool(vhost);
ibmvfc_free_mem(vhost);
spin_lock(&ibmvfc_driver_lock);
(struct megasas_instance *) shost->hostdata;
u32 size;
unsigned long dmachunk = CRASH_DMA_BUF_SIZE;
+ unsigned long chunk_left_bytes;
unsigned long src_addr;
unsigned long flags;
u32 buff_offset;
}
size = (instance->fw_crash_buffer_size * dmachunk) - buff_offset;
+ chunk_left_bytes = dmachunk - (buff_offset % dmachunk);
+ size = (size > chunk_left_bytes) ? chunk_left_bytes : size;
size = (size >= PAGE_SIZE) ? (PAGE_SIZE - 1) : size;
src_addr = (unsigned long)instance->crash_buf[buff_offset / dmachunk] +
if ((event_log_level < MFI_EVT_CLASS_DEBUG) ||
(event_log_level > MFI_EVT_CLASS_DEAD)) {
- printk(KERN_WARNING "megarid_sas: provided event log level is out of range, setting it to default 2(CLASS_CRITICAL), permissible range is: -2 to 4\n");
+ pr_warn("megaraid_sas: provided event log level is out of range, setting it to default 2(CLASS_CRITICAL), permissible range is: -2 to 4\n");
event_log_level = MFI_EVT_CLASS_CRITICAL;
}
return 0;
}
-int
+static int
megasas_alloc_cmdlist_fusion(struct megasas_instance *instance)
{
u32 max_mpt_cmd, i, j;
return 0;
}
-int
+
+static int
megasas_alloc_request_fusion(struct megasas_instance *instance)
{
struct fusion_context *fusion;
return 0;
}
-int
+static int
megasas_alloc_reply_fusion(struct megasas_instance *instance)
{
int i, count;
return 0;
}
-int
+static int
megasas_alloc_rdpq_fusion(struct megasas_instance *instance)
{
int i, j, k, msix_count;
* and is used as SMID of the cmd.
* SMID value range is from 1 to max_fw_cmds.
*/
-int
+static int
megasas_alloc_cmds_fusion(struct megasas_instance *instance)
{
int i;
*
* This is the main function for initializing firmware.
*/
-u32
+static u32
megasas_init_adapter_fusion(struct megasas_instance *instance)
{
struct fusion_context *fusion;
* @ext_status : ext status of cmd returned by FW
*/
-void
+static void
map_cmd_status(struct fusion_context *fusion,
struct scsi_cmnd *scmd, u8 status, u8 ext_status,
u32 data_length, u8 *sense)
*
* Used to set the PD LBA in CDB for FP IOs
*/
-void
+static void
megasas_set_pd_lba(struct MPI2_RAID_SCSI_IO_REQUEST *io_request, u8 cdb_len,
struct IO_REQUEST_INFO *io_info, struct scsi_cmnd *scp,
struct MR_DRV_RAID_MAP_ALL *local_map_ptr, u32 ref_tag)
* Prepares the io_request and chain elements (sg_frame) for IO
* The IO can be for PD (Fast Path) or LD
*/
-void
+static void
megasas_build_ldio_fusion(struct megasas_instance *instance,
struct scsi_cmnd *scp,
struct megasas_cmd_fusion *cmd)
* Invokes helper functions to prepare request frames
* and sets flags appropriate for IO/Non-IO cmd
*/
-int
+static int
megasas_build_io_fusion(struct megasas_instance *instance,
struct scsi_cmnd *scp,
struct megasas_cmd_fusion *cmd)
/* megasas_prepate_secondRaid1_IO
* It prepares the raid 1 second IO
*/
-void megasas_prepare_secondRaid1_IO(struct megasas_instance *instance,
- struct megasas_cmd_fusion *cmd,
- struct megasas_cmd_fusion *r1_cmd)
+static void megasas_prepare_secondRaid1_IO(struct megasas_instance *instance,
+ struct megasas_cmd_fusion *cmd,
+ struct megasas_cmd_fusion *r1_cmd)
{
union MEGASAS_REQUEST_DESCRIPTOR_UNION *req_desc, *req_desc2 = NULL;
struct fusion_context *fusion;
blk_queue_max_integrity_segments(q, shost->sg_prot_tablesize);
}
- shost->max_sectors = min_t(unsigned int, shost->max_sectors,
- dma_max_mapping_size(dev) << SECTOR_SHIFT);
+ if (dev->dma_mask) {
+ shost->max_sectors = min_t(unsigned int, shost->max_sectors,
+ dma_max_mapping_size(dev) >> SECTOR_SHIFT);
+ }
blk_queue_max_hw_sectors(q, shost->max_sectors);
if (shost->unchecked_isa_dma)
blk_queue_bounce_limit(q, BLK_BOUNCE_ISA);
/*
* Upload a microcode to the I-RAM at a specific address.
*
- * See Documentation/powerpc/qe_firmware.txt for information on QE microcode
+ * See Documentation/powerpc/qe_firmware.rst for information on QE microcode
* uploading.
*
* Currently, only version 1 is supported, so the 'version' field must be
return 0;
if (caps & DCB_CAP_DCBX_VER_IEEE) {
- iscsi_dcb_app.selector = IEEE_8021QAZ_APP_SEL_ANY;
-
+ iscsi_dcb_app.selector = IEEE_8021QAZ_APP_SEL_STREAM;
ret = dcb_ieee_getapp_mask(ndev, &iscsi_dcb_app);
-
+ if (!ret) {
+ iscsi_dcb_app.selector = IEEE_8021QAZ_APP_SEL_ANY;
+ ret = dcb_ieee_getapp_mask(ndev, &iscsi_dcb_app);
+ }
} else if (caps & DCB_CAP_DCBX_VER_CEE) {
iscsi_dcb_app.selector = DCB_APP_IDTYPE_PORTNUM;
iscsi_app = &dcb_work->dcb_app;
if (iscsi_app->dcbx & DCB_CAP_DCBX_VER_IEEE) {
- if (iscsi_app->app.selector != IEEE_8021QAZ_APP_SEL_ANY)
+ if ((iscsi_app->app.selector != IEEE_8021QAZ_APP_SEL_STREAM) &&
+ (iscsi_app->app.selector != IEEE_8021QAZ_APP_SEL_ANY))
goto out;
priority = iscsi_app->app.priority;
rapl_mmio_cpu_online, rapl_mmio_cpu_down_prep);
if (ret < 0) {
powercap_unregister_control_type(rapl_mmio_priv.control_type);
+ rapl_mmio_priv.control_type = NULL;
return ret;
}
rapl_mmio_priv.pcap_rapl_online = ret;
static void proc_thermal_rapl_remove(void)
{
+ if (IS_ERR_OR_NULL(rapl_mmio_priv.control_type))
+ return;
+
cpuhp_remove_state(rapl_mmio_priv.pcap_rapl_online);
powercap_unregister_control_type(rapl_mmio_priv.control_type);
}
* using the 2.6 Linux kernel kref construct.
*
* For direction on installation and usage of this driver please reference
- * Documentation/powerpc/hvcs.txt.
+ * Documentation/powerpc/hvcs.rst.
*/
#include <linux/device.h>
depends on SERIAL_VT8500=y
select SERIAL_CORE_CONSOLE
-config SERIAL_NETX
- tristate "NetX serial port support"
- depends on ARCH_NETX
- select SERIAL_CORE
- help
- If you have a machine based on a Hilscher NetX SoC you
- can enable its onboard serial port by enabling this option.
-
- To compile this driver as a module, choose M here: the
- module will be called netx-serial.
-
-config SERIAL_NETX_CONSOLE
- bool "Console on NetX serial port"
- depends on SERIAL_NETX=y
- select SERIAL_CORE_CONSOLE
- help
- If you have enabled the serial port on the Hilscher NetX SoC
- you can make it the console by answering Y to this option.
-
config SERIAL_OMAP
tristate "OMAP serial port support"
depends on ARCH_OMAP2PLUS
obj-$(CONFIG_SERIAL_UARTLITE) += uartlite.o
obj-$(CONFIG_SERIAL_MSM) += msm_serial.o
obj-$(CONFIG_SERIAL_QCOM_GENI) += qcom_geni_serial.o
-obj-$(CONFIG_SERIAL_NETX) += netx-serial.o
obj-$(CONFIG_SERIAL_KS8695) += serial_ks8695.o
obj-$(CONFIG_SERIAL_OMAP) += omap-serial.o
obj-$(CONFIG_SERIAL_ALTERA_UART) += altera_uart.o
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2005 Sascha Hauer <s.hauer@pengutronix.de>, Pengutronix
- */
-
-#if defined(CONFIG_SERIAL_NETX_CONSOLE) && defined(CONFIG_MAGIC_SYSRQ)
-#define SUPPORT_SYSRQ
-#endif
-
-#include <linux/device.h>
-#include <linux/module.h>
-#include <linux/ioport.h>
-#include <linux/init.h>
-#include <linux/console.h>
-#include <linux/sysrq.h>
-#include <linux/platform_device.h>
-#include <linux/tty.h>
-#include <linux/tty_flip.h>
-#include <linux/serial_core.h>
-#include <linux/serial.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <mach/hardware.h>
-#include <mach/netx-regs.h>
-
-/* We've been assigned a range on the "Low-density serial ports" major */
-#define SERIAL_NX_MAJOR 204
-#define MINOR_START 170
-
-enum uart_regs {
- UART_DR = 0x00,
- UART_SR = 0x04,
- UART_LINE_CR = 0x08,
- UART_BAUDDIV_MSB = 0x0c,
- UART_BAUDDIV_LSB = 0x10,
- UART_CR = 0x14,
- UART_FR = 0x18,
- UART_IIR = 0x1c,
- UART_ILPR = 0x20,
- UART_RTS_CR = 0x24,
- UART_RTS_LEAD = 0x28,
- UART_RTS_TRAIL = 0x2c,
- UART_DRV_ENABLE = 0x30,
- UART_BRM_CR = 0x34,
- UART_RXFIFO_IRQLEVEL = 0x38,
- UART_TXFIFO_IRQLEVEL = 0x3c,
-};
-
-#define SR_FE (1<<0)
-#define SR_PE (1<<1)
-#define SR_BE (1<<2)
-#define SR_OE (1<<3)
-
-#define LINE_CR_BRK (1<<0)
-#define LINE_CR_PEN (1<<1)
-#define LINE_CR_EPS (1<<2)
-#define LINE_CR_STP2 (1<<3)
-#define LINE_CR_FEN (1<<4)
-#define LINE_CR_5BIT (0<<5)
-#define LINE_CR_6BIT (1<<5)
-#define LINE_CR_7BIT (2<<5)
-#define LINE_CR_8BIT (3<<5)
-#define LINE_CR_BITS_MASK (3<<5)
-
-#define CR_UART_EN (1<<0)
-#define CR_SIREN (1<<1)
-#define CR_SIRLP (1<<2)
-#define CR_MSIE (1<<3)
-#define CR_RIE (1<<4)
-#define CR_TIE (1<<5)
-#define CR_RTIE (1<<6)
-#define CR_LBE (1<<7)
-
-#define FR_CTS (1<<0)
-#define FR_DSR (1<<1)
-#define FR_DCD (1<<2)
-#define FR_BUSY (1<<3)
-#define FR_RXFE (1<<4)
-#define FR_TXFF (1<<5)
-#define FR_RXFF (1<<6)
-#define FR_TXFE (1<<7)
-
-#define IIR_MIS (1<<0)
-#define IIR_RIS (1<<1)
-#define IIR_TIS (1<<2)
-#define IIR_RTIS (1<<3)
-#define IIR_MASK 0xf
-
-#define RTS_CR_AUTO (1<<0)
-#define RTS_CR_RTS (1<<1)
-#define RTS_CR_COUNT (1<<2)
-#define RTS_CR_MOD2 (1<<3)
-#define RTS_CR_RTS_POL (1<<4)
-#define RTS_CR_CTS_CTR (1<<5)
-#define RTS_CR_CTS_POL (1<<6)
-#define RTS_CR_STICK (1<<7)
-
-#define UART_PORT_SIZE 0x40
-#define DRIVER_NAME "netx-uart"
-
-struct netx_port {
- struct uart_port port;
-};
-
-static void netx_stop_tx(struct uart_port *port)
-{
- unsigned int val;
- val = readl(port->membase + UART_CR);
- writel(val & ~CR_TIE, port->membase + UART_CR);
-}
-
-static void netx_stop_rx(struct uart_port *port)
-{
- unsigned int val;
- val = readl(port->membase + UART_CR);
- writel(val & ~CR_RIE, port->membase + UART_CR);
-}
-
-static void netx_enable_ms(struct uart_port *port)
-{
- unsigned int val;
- val = readl(port->membase + UART_CR);
- writel(val | CR_MSIE, port->membase + UART_CR);
-}
-
-static inline void netx_transmit_buffer(struct uart_port *port)
-{
- struct circ_buf *xmit = &port->state->xmit;
-
- if (port->x_char) {
- writel(port->x_char, port->membase + UART_DR);
- port->icount.tx++;
- port->x_char = 0;
- return;
- }
-
- if (uart_tx_stopped(port) || uart_circ_empty(xmit)) {
- netx_stop_tx(port);
- return;
- }
-
- do {
- /* send xmit->buf[xmit->tail]
- * out the port here */
- writel(xmit->buf[xmit->tail], port->membase + UART_DR);
- xmit->tail = (xmit->tail + 1) &
- (UART_XMIT_SIZE - 1);
- port->icount.tx++;
- if (uart_circ_empty(xmit))
- break;
- } while (!(readl(port->membase + UART_FR) & FR_TXFF));
-
- if (uart_circ_empty(xmit))
- netx_stop_tx(port);
-}
-
-static void netx_start_tx(struct uart_port *port)
-{
- writel(
- readl(port->membase + UART_CR) | CR_TIE, port->membase + UART_CR);
-
- if (!(readl(port->membase + UART_FR) & FR_TXFF))
- netx_transmit_buffer(port);
-}
-
-static unsigned int netx_tx_empty(struct uart_port *port)
-{
- return readl(port->membase + UART_FR) & FR_BUSY ? 0 : TIOCSER_TEMT;
-}
-
-static void netx_txint(struct uart_port *port)
-{
- struct circ_buf *xmit = &port->state->xmit;
-
- if (uart_circ_empty(xmit) || uart_tx_stopped(port)) {
- netx_stop_tx(port);
- return;
- }
-
- netx_transmit_buffer(port);
-
- if (uart_circ_chars_pending(xmit) < WAKEUP_CHARS)
- uart_write_wakeup(port);
-}
-
-static void netx_rxint(struct uart_port *port, unsigned long *flags)
-{
- unsigned char rx, flg, status;
-
- while (!(readl(port->membase + UART_FR) & FR_RXFE)) {
- rx = readl(port->membase + UART_DR);
- flg = TTY_NORMAL;
- port->icount.rx++;
- status = readl(port->membase + UART_SR);
- if (status & SR_BE) {
- writel(0, port->membase + UART_SR);
- if (uart_handle_break(port))
- continue;
- }
-
- if (unlikely(status & (SR_FE | SR_PE | SR_OE))) {
-
- if (status & SR_PE)
- port->icount.parity++;
- else if (status & SR_FE)
- port->icount.frame++;
- if (status & SR_OE)
- port->icount.overrun++;
-
- status &= port->read_status_mask;
-
- if (status & SR_BE)
- flg = TTY_BREAK;
- else if (status & SR_PE)
- flg = TTY_PARITY;
- else if (status & SR_FE)
- flg = TTY_FRAME;
- }
-
- if (uart_handle_sysrq_char(port, rx))
- continue;
-
- uart_insert_char(port, status, SR_OE, rx, flg);
- }
-
- spin_unlock_irqrestore(&port->lock, *flags);
- tty_flip_buffer_push(&port->state->port);
- spin_lock_irqsave(&port->lock, *flags);
-}
-
-static irqreturn_t netx_int(int irq, void *dev_id)
-{
- struct uart_port *port = dev_id;
- unsigned long flags;
- unsigned char status;
-
- spin_lock_irqsave(&port->lock,flags);
-
- status = readl(port->membase + UART_IIR) & IIR_MASK;
- while (status) {
- if (status & IIR_RIS)
- netx_rxint(port, &flags);
- if (status & IIR_TIS)
- netx_txint(port);
- if (status & IIR_MIS) {
- if (readl(port->membase + UART_FR) & FR_CTS)
- uart_handle_cts_change(port, 1);
- else
- uart_handle_cts_change(port, 0);
- }
- writel(0, port->membase + UART_IIR);
- status = readl(port->membase + UART_IIR) & IIR_MASK;
- }
-
- spin_unlock_irqrestore(&port->lock,flags);
- return IRQ_HANDLED;
-}
-
-static unsigned int netx_get_mctrl(struct uart_port *port)
-{
- unsigned int ret = TIOCM_DSR | TIOCM_CAR;
-
- if (readl(port->membase + UART_FR) & FR_CTS)
- ret |= TIOCM_CTS;
-
- return ret;
-}
-
-static void netx_set_mctrl(struct uart_port *port, unsigned int mctrl)
-{
- unsigned int val;
-
- /* FIXME: Locking needed ? */
- if (mctrl & TIOCM_RTS) {
- val = readl(port->membase + UART_RTS_CR);
- writel(val | RTS_CR_RTS, port->membase + UART_RTS_CR);
- }
-}
-
-static void netx_break_ctl(struct uart_port *port, int break_state)
-{
- unsigned int line_cr;
- spin_lock_irq(&port->lock);
-
- line_cr = readl(port->membase + UART_LINE_CR);
- if (break_state != 0)
- line_cr |= LINE_CR_BRK;
- else
- line_cr &= ~LINE_CR_BRK;
- writel(line_cr, port->membase + UART_LINE_CR);
-
- spin_unlock_irq(&port->lock);
-}
-
-static int netx_startup(struct uart_port *port)
-{
- int ret;
-
- ret = request_irq(port->irq, netx_int, 0,
- DRIVER_NAME, port);
- if (ret) {
- dev_err(port->dev, "unable to grab irq%d\n",port->irq);
- goto exit;
- }
-
- writel(readl(port->membase + UART_LINE_CR) | LINE_CR_FEN,
- port->membase + UART_LINE_CR);
-
- writel(CR_MSIE | CR_RIE | CR_TIE | CR_RTIE | CR_UART_EN,
- port->membase + UART_CR);
-
-exit:
- return ret;
-}
-
-static void netx_shutdown(struct uart_port *port)
-{
- writel(0, port->membase + UART_CR) ;
-
- free_irq(port->irq, port);
-}
-
-static void
-netx_set_termios(struct uart_port *port, struct ktermios *termios,
- struct ktermios *old)
-{
- unsigned int baud, quot;
- unsigned char old_cr;
- unsigned char line_cr = LINE_CR_FEN;
- unsigned char rts_cr = 0;
-
- switch (termios->c_cflag & CSIZE) {
- case CS5:
- line_cr |= LINE_CR_5BIT;
- break;
- case CS6:
- line_cr |= LINE_CR_6BIT;
- break;
- case CS7:
- line_cr |= LINE_CR_7BIT;
- break;
- case CS8:
- line_cr |= LINE_CR_8BIT;
- break;
- }
-
- if (termios->c_cflag & CSTOPB)
- line_cr |= LINE_CR_STP2;
-
- if (termios->c_cflag & PARENB) {
- line_cr |= LINE_CR_PEN;
- if (!(termios->c_cflag & PARODD))
- line_cr |= LINE_CR_EPS;
- }
-
- if (termios->c_cflag & CRTSCTS)
- rts_cr = RTS_CR_AUTO | RTS_CR_CTS_CTR | RTS_CR_RTS_POL;
-
- baud = uart_get_baud_rate(port, termios, old, 0, port->uartclk/16);
- quot = baud * 4096;
- quot /= 1000;
- quot *= 256;
- quot /= 100000;
-
- spin_lock_irq(&port->lock);
-
- uart_update_timeout(port, termios->c_cflag, baud);
-
- old_cr = readl(port->membase + UART_CR);
-
- /* disable interrupts */
- writel(old_cr & ~(CR_MSIE | CR_RIE | CR_TIE | CR_RTIE),
- port->membase + UART_CR);
-
- /* drain transmitter */
- while (readl(port->membase + UART_FR) & FR_BUSY);
-
- /* disable UART */
- writel(old_cr & ~CR_UART_EN, port->membase + UART_CR);
-
- /* modem status interrupts */
- old_cr &= ~CR_MSIE;
- if (UART_ENABLE_MS(port, termios->c_cflag))
- old_cr |= CR_MSIE;
-
- writel((quot>>8) & 0xff, port->membase + UART_BAUDDIV_MSB);
- writel(quot & 0xff, port->membase + UART_BAUDDIV_LSB);
- writel(line_cr, port->membase + UART_LINE_CR);
-
- writel(rts_cr, port->membase + UART_RTS_CR);
-
- /*
- * Characters to ignore
- */
- port->ignore_status_mask = 0;
- if (termios->c_iflag & IGNPAR)
- port->ignore_status_mask |= SR_PE;
- if (termios->c_iflag & IGNBRK) {
- port->ignore_status_mask |= SR_BE;
- /*
- * If we're ignoring parity and break indicators,
- * ignore overruns too (for real raw support).
- */
- if (termios->c_iflag & IGNPAR)
- port->ignore_status_mask |= SR_PE;
- }
-
- port->read_status_mask = 0;
- if (termios->c_iflag & (IGNBRK | BRKINT | PARMRK))
- port->read_status_mask |= SR_BE;
- if (termios->c_iflag & INPCK)
- port->read_status_mask |= SR_PE | SR_FE;
-
- writel(old_cr, port->membase + UART_CR);
-
- spin_unlock_irq(&port->lock);
-}
-
-static const char *netx_type(struct uart_port *port)
-{
- return port->type == PORT_NETX ? "NETX" : NULL;
-}
-
-static void netx_release_port(struct uart_port *port)
-{
- release_mem_region(port->mapbase, UART_PORT_SIZE);
-}
-
-static int netx_request_port(struct uart_port *port)
-{
- return request_mem_region(port->mapbase, UART_PORT_SIZE,
- DRIVER_NAME) != NULL ? 0 : -EBUSY;
-}
-
-static void netx_config_port(struct uart_port *port, int flags)
-{
- if (flags & UART_CONFIG_TYPE && netx_request_port(port) == 0)
- port->type = PORT_NETX;
-}
-
-static int
-netx_verify_port(struct uart_port *port, struct serial_struct *ser)
-{
- int ret = 0;
-
- if (ser->type != PORT_UNKNOWN && ser->type != PORT_NETX)
- ret = -EINVAL;
-
- return ret;
-}
-
-static struct uart_ops netx_pops = {
- .tx_empty = netx_tx_empty,
- .set_mctrl = netx_set_mctrl,
- .get_mctrl = netx_get_mctrl,
- .stop_tx = netx_stop_tx,
- .start_tx = netx_start_tx,
- .stop_rx = netx_stop_rx,
- .enable_ms = netx_enable_ms,
- .break_ctl = netx_break_ctl,
- .startup = netx_startup,
- .shutdown = netx_shutdown,
- .set_termios = netx_set_termios,
- .type = netx_type,
- .release_port = netx_release_port,
- .request_port = netx_request_port,
- .config_port = netx_config_port,
- .verify_port = netx_verify_port,
-};
-
-static struct netx_port netx_ports[] = {
- {
- .port = {
- .type = PORT_NETX,
- .iotype = UPIO_MEM,
- .membase = (char __iomem *)io_p2v(NETX_PA_UART0),
- .mapbase = NETX_PA_UART0,
- .irq = NETX_IRQ_UART0,
- .uartclk = 100000000,
- .fifosize = 16,
- .flags = UPF_BOOT_AUTOCONF,
- .ops = &netx_pops,
- .line = 0,
- },
- }, {
- .port = {
- .type = PORT_NETX,
- .iotype = UPIO_MEM,
- .membase = (char __iomem *)io_p2v(NETX_PA_UART1),
- .mapbase = NETX_PA_UART1,
- .irq = NETX_IRQ_UART1,
- .uartclk = 100000000,
- .fifosize = 16,
- .flags = UPF_BOOT_AUTOCONF,
- .ops = &netx_pops,
- .line = 1,
- },
- }, {
- .port = {
- .type = PORT_NETX,
- .iotype = UPIO_MEM,
- .membase = (char __iomem *)io_p2v(NETX_PA_UART2),
- .mapbase = NETX_PA_UART2,
- .irq = NETX_IRQ_UART2,
- .uartclk = 100000000,
- .fifosize = 16,
- .flags = UPF_BOOT_AUTOCONF,
- .ops = &netx_pops,
- .line = 2,
- },
- }
-};
-
-#ifdef CONFIG_SERIAL_NETX_CONSOLE
-
-static void netx_console_putchar(struct uart_port *port, int ch)
-{
- while (readl(port->membase + UART_FR) & FR_BUSY);
- writel(ch, port->membase + UART_DR);
-}
-
-static void
-netx_console_write(struct console *co, const char *s, unsigned int count)
-{
- struct uart_port *port = &netx_ports[co->index].port;
- unsigned char cr_save;
-
- cr_save = readl(port->membase + UART_CR);
- writel(cr_save | CR_UART_EN, port->membase + UART_CR);
-
- uart_console_write(port, s, count, netx_console_putchar);
-
- while (readl(port->membase + UART_FR) & FR_BUSY);
- writel(cr_save, port->membase + UART_CR);
-}
-
-static void __init
-netx_console_get_options(struct uart_port *port, int *baud,
- int *parity, int *bits, int *flow)
-{
- unsigned char line_cr;
-
- *baud = (readl(port->membase + UART_BAUDDIV_MSB) << 8) |
- readl(port->membase + UART_BAUDDIV_LSB);
- *baud *= 1000;
- *baud /= 4096;
- *baud *= 1000;
- *baud /= 256;
- *baud *= 100;
-
- line_cr = readl(port->membase + UART_LINE_CR);
- *parity = 'n';
- if (line_cr & LINE_CR_PEN) {
- if (line_cr & LINE_CR_EPS)
- *parity = 'e';
- else
- *parity = 'o';
- }
-
- switch (line_cr & LINE_CR_BITS_MASK) {
- case LINE_CR_8BIT:
- *bits = 8;
- break;
- case LINE_CR_7BIT:
- *bits = 7;
- break;
- case LINE_CR_6BIT:
- *bits = 6;
- break;
- case LINE_CR_5BIT:
- *bits = 5;
- break;
- }
-
- if (readl(port->membase + UART_RTS_CR) & RTS_CR_AUTO)
- *flow = 'r';
-}
-
-static int __init
-netx_console_setup(struct console *co, char *options)
-{
- struct netx_port *sport;
- int baud = 9600;
- int bits = 8;
- int parity = 'n';
- int flow = 'n';
-
- /*
- * Check whether an invalid uart number has been specified, and
- * if so, search for the first available port that does have
- * console support.
- */
- if (co->index == -1 || co->index >= ARRAY_SIZE(netx_ports))
- co->index = 0;
- sport = &netx_ports[co->index];
-
- if (options) {
- uart_parse_options(options, &baud, &parity, &bits, &flow);
- } else {
- /* if the UART is enabled, assume it has been correctly setup
- * by the bootloader and get the options
- */
- if (readl(sport->port.membase + UART_CR) & CR_UART_EN) {
- netx_console_get_options(&sport->port, &baud,
- &parity, &bits, &flow);
- }
-
- }
-
- return uart_set_options(&sport->port, co, baud, parity, bits, flow);
-}
-
-static struct uart_driver netx_reg;
-static struct console netx_console = {
- .name = "ttyNX",
- .write = netx_console_write,
- .device = uart_console_device,
- .setup = netx_console_setup,
- .flags = CON_PRINTBUFFER,
- .index = -1,
- .data = &netx_reg,
-};
-
-static int __init netx_console_init(void)
-{
- register_console(&netx_console);
- return 0;
-}
-console_initcall(netx_console_init);
-
-#define NETX_CONSOLE &netx_console
-#else
-#define NETX_CONSOLE NULL
-#endif
-
-static struct uart_driver netx_reg = {
- .owner = THIS_MODULE,
- .driver_name = DRIVER_NAME,
- .dev_name = "ttyNX",
- .major = SERIAL_NX_MAJOR,
- .minor = MINOR_START,
- .nr = ARRAY_SIZE(netx_ports),
- .cons = NETX_CONSOLE,
-};
-
-static int serial_netx_suspend(struct platform_device *pdev, pm_message_t state)
-{
- struct netx_port *sport = platform_get_drvdata(pdev);
-
- if (sport)
- uart_suspend_port(&netx_reg, &sport->port);
-
- return 0;
-}
-
-static int serial_netx_resume(struct platform_device *pdev)
-{
- struct netx_port *sport = platform_get_drvdata(pdev);
-
- if (sport)
- uart_resume_port(&netx_reg, &sport->port);
-
- return 0;
-}
-
-static int serial_netx_probe(struct platform_device *pdev)
-{
- struct uart_port *port = &netx_ports[pdev->id].port;
-
- dev_info(&pdev->dev, "initialising\n");
-
- port->dev = &pdev->dev;
-
- writel(1, port->membase + UART_RXFIFO_IRQLEVEL);
- uart_add_one_port(&netx_reg, &netx_ports[pdev->id].port);
- platform_set_drvdata(pdev, &netx_ports[pdev->id]);
-
- return 0;
-}
-
-static int serial_netx_remove(struct platform_device *pdev)
-{
- struct netx_port *sport = platform_get_drvdata(pdev);
-
- if (sport)
- uart_remove_one_port(&netx_reg, &sport->port);
-
- return 0;
-}
-
-static struct platform_driver serial_netx_driver = {
- .probe = serial_netx_probe,
- .remove = serial_netx_remove,
-
- .suspend = serial_netx_suspend,
- .resume = serial_netx_resume,
-
- .driver = {
- .name = DRIVER_NAME,
- },
-};
-
-static int __init netx_serial_init(void)
-{
- int ret;
-
- printk(KERN_INFO "Serial: NetX driver\n");
-
- ret = uart_register_driver(&netx_reg);
- if (ret)
- return ret;
-
- ret = platform_driver_register(&serial_netx_driver);
- if (ret != 0)
- uart_unregister_driver(&netx_reg);
-
- return 0;
-}
-
-static void __exit netx_serial_exit(void)
-{
- platform_driver_unregister(&serial_netx_driver);
- uart_unregister_driver(&netx_reg);
-}
-
-module_init(netx_serial_init);
-module_exit(netx_serial_exit);
-
-MODULE_AUTHOR("Sascha Hauer");
-MODULE_DESCRIPTION("NetX serial port driver");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:" DRIVER_NAME);
list_for_each_entry_safe(waiter, next, &sem->read_wait, list) {
tsk = waiter->task;
- smp_mb();
- waiter->task = NULL;
+ smp_store_release(&waiter->task, NULL);
wake_up_process(tsk);
put_task_struct(tsk);
}
for (;;) {
set_current_state(TASK_UNINTERRUPTIBLE);
- if (!waiter.task)
+ if (!smp_load_acquire(&waiter.task))
break;
if (!timeout)
break;
char *buf)
{
struct con_driver *con = dev_get_drvdata(dev);
- int bind = con_is_bound(con->con);
+ int bind;
+
+ console_lock();
+ bind = con_is_bound(con->con);
+ console_unlock();
return snprintf(buf, PAGE_SIZE, "%i\n", bind);
}
local_mem = devm_memremap(hcd->self.sysdev, phys_addr,
size, MEMREMAP_WC);
- if (!local_mem)
- return -ENOMEM;
+ if (IS_ERR(local_mem))
+ return PTR_ERR(local_mem);
/*
* Here we pass a dma_addr_t but the arg type is a phys_addr_t.
break;
case PCI_VENDOR_ID_AMD:
/* AMD PLL quirk */
- if (usb_amd_find_chipset_info())
+ if (usb_amd_quirk_pll_check())
ehci->amd_pll_fix = 1;
/* AMD8111 EHCI doesn't work, according to AMD errata */
if (pdev->device == 0x7463) {
break;
case PCI_VENDOR_ID_ATI:
/* AMD PLL quirk */
- if (usb_amd_find_chipset_info())
+ if (usb_amd_quirk_pll_check())
ehci->amd_pll_fix = 1;
/*
return result;
error_set_cluster_id:
- wusb_cluster_id_put(wusbhc->cluster_id);
+ wusb_cluster_id_put(addr);
error_cluster_id_get:
goto out;
{
struct ohci_hcd *ohci = hcd_to_ohci(hcd);
- if (usb_amd_find_chipset_info())
+ if (usb_amd_quirk_pll_check())
ohci->flags |= OHCI_QUIRK_AMD_PLL;
/* SB800 needs pre-fetch fix */
struct amd_chipset_type sb_type;
int isoc_reqs;
int probe_count;
- int probe_result;
+ bool need_pll_quirk;
} amd_chipset;
static DEFINE_SPINLOCK(amd_lock);
}
EXPORT_SYMBOL_GPL(sb800_prefetch);
-int usb_amd_find_chipset_info(void)
+static void usb_amd_find_chipset_info(void)
{
unsigned long flags;
struct amd_chipset_info info;
- int ret;
+ info.need_pll_quirk = 0;
spin_lock_irqsave(&amd_lock, flags);
if (amd_chipset.probe_count > 0) {
amd_chipset.probe_count++;
spin_unlock_irqrestore(&amd_lock, flags);
- return amd_chipset.probe_result;
+ return;
}
memset(&info, 0, sizeof(info));
spin_unlock_irqrestore(&amd_lock, flags);
if (!amd_chipset_sb_type_init(&info)) {
- ret = 0;
goto commit;
}
- /* Below chipset generations needn't enable AMD PLL quirk */
- if (info.sb_type.gen == AMD_CHIPSET_UNKNOWN ||
- info.sb_type.gen == AMD_CHIPSET_SB600 ||
- info.sb_type.gen == AMD_CHIPSET_YANGTZE ||
- (info.sb_type.gen == AMD_CHIPSET_SB700 &&
- info.sb_type.rev > 0x3b)) {
+ switch (info.sb_type.gen) {
+ case AMD_CHIPSET_SB700:
+ info.need_pll_quirk = info.sb_type.rev <= 0x3B;
+ break;
+ case AMD_CHIPSET_SB800:
+ case AMD_CHIPSET_HUDSON2:
+ case AMD_CHIPSET_BOLTON:
+ info.need_pll_quirk = 1;
+ break;
+ default:
+ info.need_pll_quirk = 0;
+ break;
+ }
+
+ if (!info.need_pll_quirk) {
if (info.smbus_dev) {
pci_dev_put(info.smbus_dev);
info.smbus_dev = NULL;
}
- ret = 0;
goto commit;
}
}
}
- ret = info.probe_result = 1;
printk(KERN_DEBUG "QUIRK: Enable AMD PLL fix\n");
commit:
/* Mark that we where here */
amd_chipset.probe_count++;
- ret = amd_chipset.probe_result;
spin_unlock_irqrestore(&amd_lock, flags);
amd_chipset = info;
spin_unlock_irqrestore(&amd_lock, flags);
}
-
- return ret;
}
-EXPORT_SYMBOL_GPL(usb_amd_find_chipset_info);
int usb_hcd_amd_remote_wakeup_quirk(struct pci_dev *pdev)
{
}
EXPORT_SYMBOL_GPL(usb_amd_prefetch_quirk);
+bool usb_amd_quirk_pll_check(void)
+{
+ usb_amd_find_chipset_info();
+ return amd_chipset.need_pll_quirk;
+}
+EXPORT_SYMBOL_GPL(usb_amd_quirk_pll_check);
+
/*
* The hardware normally enables the A-link power management feature, which
* lets the system lower the power consumption in idle states.
amd_chipset.nb_type = 0;
memset(&amd_chipset.sb_type, 0, sizeof(amd_chipset.sb_type));
amd_chipset.isoc_reqs = 0;
- amd_chipset.probe_result = 0;
+ amd_chipset.need_pll_quirk = 0;
spin_unlock_irqrestore(&amd_lock, flags);
#ifdef CONFIG_USB_PCI
void uhci_reset_hc(struct pci_dev *pdev, unsigned long base);
int uhci_check_and_reset_hc(struct pci_dev *pdev, unsigned long base);
-int usb_amd_find_chipset_info(void);
int usb_hcd_amd_remote_wakeup_quirk(struct pci_dev *pdev);
bool usb_amd_hang_symptom_quirk(void);
bool usb_amd_prefetch_quirk(void);
void usb_amd_dev_put(void);
+bool usb_amd_quirk_pll_check(void);
void usb_amd_quirk_pll_disable(void);
void usb_amd_quirk_pll_enable(void);
void usb_asmedia_modifyflowcontrol(struct pci_dev *pdev);
xhci->quirks |= XHCI_AMD_0x96_HOST;
/* AMD PLL quirk */
- if (pdev->vendor == PCI_VENDOR_ID_AMD && usb_amd_find_chipset_info())
+ if (pdev->vendor == PCI_VENDOR_ID_AMD && usb_amd_quirk_pll_check())
xhci->quirks |= XHCI_AMD_PLL_FIX;
if (pdev->vendor == PCI_VENDOR_ID_AMD &&
if (!usb_endpoint_xfer_isoc(&urb->ep->desc) && usb_urb_dir_out(urb) &&
usb_endpoint_maxp(&urb->ep->desc) >= TRB_IDT_MAX_SIZE &&
urb->transfer_buffer_length <= TRB_IDT_MAX_SIZE &&
- !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP))
+ !(urb->transfer_flags & URB_NO_TRANSFER_DMA_MAP) &&
+ !urb->num_sgs)
return true;
return false;
#ifdef CONFIG_OF
static void usb251xb_get_ports_field(struct usb251xb *hub,
- const char *prop_name, u8 port_cnt, u8 *fld)
+ const char *prop_name, u8 port_cnt,
+ bool ds_only, u8 *fld)
{
struct device *dev = hub->dev;
struct property *prop;
u32 port;
of_property_for_each_u32(dev->of_node, prop_name, prop, p, port) {
- if ((port >= 1) && (port <= port_cnt))
+ if ((port >= ds_only ? 1 : 0) && (port <= port_cnt))
*fld |= BIT(port);
else
dev_warn(dev, "port %u doesn't exist\n", port);
hub->non_rem_dev = USB251XB_DEF_NON_REMOVABLE_DEVICES;
usb251xb_get_ports_field(hub, "non-removable-ports", data->port_cnt,
- &hub->non_rem_dev);
+ true, &hub->non_rem_dev);
hub->port_disable_sp = USB251XB_DEF_PORT_DISABLE_SELF;
usb251xb_get_ports_field(hub, "sp-disabled-ports", data->port_cnt,
- &hub->port_disable_sp);
+ true, &hub->port_disable_sp);
hub->port_disable_bp = USB251XB_DEF_PORT_DISABLE_BUS;
usb251xb_get_ports_field(hub, "bp-disabled-ports", data->port_cnt,
- &hub->port_disable_bp);
+ true, &hub->port_disable_bp);
hub->max_power_sp = USB251XB_DEF_MAX_POWER_SELF;
if (!of_property_read_u32(np, "sp-max-total-current-microamp",
*/
hub->port_swap = USB251XB_DEF_PORT_SWAP;
usb251xb_get_ports_field(hub, "swap-dx-lanes", data->port_cnt,
- &hub->port_swap);
- if (of_get_property(np, "swap-us-lanes", NULL))
- hub->port_swap |= BIT(0);
+ false, &hub->port_swap);
/* The following parameters are currently not exposed to devicetree, but
* may be as soon as needed.
* status of a command.
*/
+#include <linux/blkdev.h>
+#include <linux/dma-mapping.h>
#include <linux/module.h>
#include <linux/mutex.h>
static int slave_configure(struct scsi_device *sdev)
{
struct us_data *us = host_to_us(sdev->host);
+ struct device *dev = us->pusb_dev->bus->sysdev;
/*
* Many devices have trouble transferring more than 32KB at a time,
blk_queue_max_hw_sectors(sdev->request_queue, 2048);
}
+ /*
+ * The max_hw_sectors should be up to maximum size of a mapping for
+ * the device. Otherwise, a DMA API might fail on swiotlb environment.
+ */
+ blk_queue_max_hw_sectors(sdev->request_queue,
+ min_t(size_t, queue_max_hw_sectors(sdev->request_queue),
+ dma_max_mapping_size(dev) >> SECTOR_SHIFT));
+
/*
* Some USB host controllers can't do DMA; they have to use PIO.
* They indicate this by setting their dma_mask to NULL. For
call->tmp_u = htonl(0);
afs_extract_to_tmp(call);
}
+ /* Fall through */
- /* Fall through - and extract the returned data length */
+ /* extract the returned data length */
case 1:
_debug("extract data length");
ret = afs_extract_data(call, true);
call->bvec[0].bv_page = req->pages[req->index];
iov_iter_bvec(&call->iter, READ, call->bvec, 1, size);
ASSERTCMP(size, <=, PAGE_SIZE);
+ /* Fall through */
- /* Fall through - and extract the returned data */
+ /* extract the returned data */
case 2:
_debug("extract data %zu/%llu",
iov_iter_count(&call->iter), req->remain);
/* Discard any excess data the server gave us */
iov_iter_discard(&call->iter, READ, req->actual_len - req->len);
call->unmarshall = 3;
-
/* Fall through */
+
case 3:
_debug("extract discard %zu/%llu",
iov_iter_count(&call->iter), req->actual_len - req->len);
no_more_data:
call->unmarshall = 4;
afs_extract_to_buf(call, (21 + 3 + 6) * 4);
+ /* Fall through */
- /* Fall through - and extract the metadata */
+ /* extract the metadata */
case 4:
ret = afs_extract_data(call, false);
if (ret < 0)
case 0:
call->unmarshall++;
afs_extract_to_buf(call, 12 * 4);
+ /* Fall through */
- /* Fall through - and extract the returned status record */
+ /* extract the returned status record */
case 1:
_debug("extract status");
ret = afs_extract_data(call, true);
xdr_decode_AFSFetchVolumeStatus(&bp, call->out_volstatus);
call->unmarshall++;
afs_extract_to_tmp(call);
+ /* Fall through */
- /* Fall through - and extract the volume name length */
+ /* extract the volume name length */
case 2:
ret = afs_extract_data(call, true);
if (ret < 0)
size = (call->count + 3) & ~3; /* It's padded */
afs_extract_to_buf(call, size);
call->unmarshall++;
+ /* Fall through */
- /* Fall through - and extract the volume name */
+ /* extract the volume name */
case 3:
_debug("extract volname");
ret = afs_extract_data(call, true);
_debug("volname '%s'", p);
afs_extract_to_tmp(call);
call->unmarshall++;
+ /* Fall through */
- /* Fall through - and extract the offline message length */
+ /* extract the offline message length */
case 4:
ret = afs_extract_data(call, true);
if (ret < 0)
size = (call->count + 3) & ~3; /* It's padded */
afs_extract_to_buf(call, size);
call->unmarshall++;
+ /* Fall through */
- /* Fall through - and extract the offline message */
+ /* extract the offline message */
case 5:
_debug("extract offline");
ret = afs_extract_data(call, true);
afs_extract_to_tmp(call);
call->unmarshall++;
+ /* Fall through */
- /* Fall through - and extract the message of the day length */
+ /* extract the message of the day length */
case 6:
ret = afs_extract_data(call, true);
if (ret < 0)
size = (call->count + 3) & ~3; /* It's padded */
afs_extract_to_buf(call, size);
call->unmarshall++;
+ /* Fall through */
- /* Fall through - and extract the message of the day */
+ /* extract the message of the day */
case 7:
_debug("extract motd");
ret = afs_extract_data(call, false);
case 0:
afs_extract_to_tmp(call);
call->unmarshall++;
+ /* Fall through */
- /* Fall through - and extract the capabilities word count */
+ /* Extract the capabilities word count */
case 1:
ret = afs_extract_data(call, true);
if (ret < 0)
call->count2 = count;
iov_iter_discard(&call->iter, READ, count * sizeof(__be32));
call->unmarshall++;
+ /* Fall through */
- /* Fall through - and extract capabilities words */
+ /* Extract capabilities words */
case 2:
ret = afs_extract_data(call, false);
if (ret < 0)
case 0:
afs_extract_to_tmp(call);
call->unmarshall++;
+ /* Fall through */
/* Extract the file status count and array in two steps */
- /* Fall through */
case 1:
_debug("extract status count");
ret = afs_extract_data(call, true);
call->unmarshall++;
more_counts:
afs_extract_to_buf(call, 21 * sizeof(__be32));
-
/* Fall through */
+
case 2:
_debug("extract status array %u", call->count);
ret = afs_extract_data(call, true);
call->count = 0;
call->unmarshall++;
afs_extract_to_tmp(call);
+ /* Fall through */
/* Extract the callback count and array in two steps */
- /* Fall through */
case 3:
_debug("extract CB count");
ret = afs_extract_data(call, true);
call->unmarshall++;
more_cbs:
afs_extract_to_buf(call, 3 * sizeof(__be32));
-
/* Fall through */
+
case 4:
_debug("extract CB array");
ret = afs_extract_data(call, true);
afs_extract_to_buf(call, 6 * sizeof(__be32));
call->unmarshall++;
-
/* Fall through */
+
case 5:
ret = afs_extract_data(call, false);
if (ret < 0)
case 0:
afs_extract_to_tmp(call);
call->unmarshall++;
+ /* Fall through */
/* extract the returned data length */
case 1:
acl->size = call->count2;
afs_extract_begin(call, acl->data, size);
call->unmarshall++;
+ /* Fall through */
/* extract the returned data */
case 2:
afs_extract_to_buf(call, (21 + 6) * 4);
call->unmarshall++;
+ /* Fall through */
/* extract the metadata */
case 3:
req->offset = req->pos & (PAGE_SIZE - 1);
afs_extract_to_tmp64(call);
call->unmarshall++;
+ /* Fall through */
- /* Fall through - and extract the returned data length */
+ /* extract the returned data length */
case 1:
_debug("extract data length");
ret = afs_extract_data(call, true);
call->bvec[0].bv_page = req->pages[req->index];
iov_iter_bvec(&call->iter, READ, call->bvec, 1, size);
ASSERTCMP(size, <=, PAGE_SIZE);
+ /* Fall through */
- /* Fall through - and extract the returned data */
+ /* extract the returned data */
case 2:
_debug("extract data %zu/%llu",
iov_iter_count(&call->iter), req->remain);
/* Discard any excess data the server gave us */
iov_iter_discard(&call->iter, READ, req->actual_len - req->len);
call->unmarshall = 3;
-
/* Fall through */
+
case 3:
_debug("extract discard %zu/%llu",
iov_iter_count(&call->iter), req->actual_len - req->len);
sizeof(struct yfs_xdr_YFSFetchStatus) +
sizeof(struct yfs_xdr_YFSCallBack) +
sizeof(struct yfs_xdr_YFSVolSync));
+ /* Fall through */
- /* Fall through - and extract the metadata */
+ /* extract the metadata */
case 4:
ret = afs_extract_data(call, false);
if (ret < 0)
req->file_size = call->out_scb->status.size;
call->unmarshall++;
-
/* Fall through */
+
case 5:
break;
}
case 0:
call->unmarshall++;
afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchVolumeStatus));
+ /* Fall through */
- /* Fall through - and extract the returned status record */
+ /* extract the returned status record */
case 1:
_debug("extract status");
ret = afs_extract_data(call, true);
xdr_decode_YFSFetchVolumeStatus(&bp, call->out_volstatus);
call->unmarshall++;
afs_extract_to_tmp(call);
+ /* Fall through */
- /* Fall through - and extract the volume name length */
+ /* extract the volume name length */
case 2:
ret = afs_extract_data(call, true);
if (ret < 0)
size = (call->count + 3) & ~3; /* It's padded */
afs_extract_to_buf(call, size);
call->unmarshall++;
+ /* Fall through */
- /* Fall through - and extract the volume name */
+ /* extract the volume name */
case 3:
_debug("extract volname");
ret = afs_extract_data(call, true);
_debug("volname '%s'", p);
afs_extract_to_tmp(call);
call->unmarshall++;
+ /* Fall through */
- /* Fall through - and extract the offline message length */
+ /* extract the offline message length */
case 4:
ret = afs_extract_data(call, true);
if (ret < 0)
size = (call->count + 3) & ~3; /* It's padded */
afs_extract_to_buf(call, size);
call->unmarshall++;
+ /* Fall through */
- /* Fall through - and extract the offline message */
+ /* extract the offline message */
case 5:
_debug("extract offline");
ret = afs_extract_data(call, true);
afs_extract_to_tmp(call);
call->unmarshall++;
+ /* Fall through */
- /* Fall through - and extract the message of the day length */
+ /* extract the message of the day length */
case 6:
ret = afs_extract_data(call, true);
if (ret < 0)
size = (call->count + 3) & ~3; /* It's padded */
afs_extract_to_buf(call, size);
call->unmarshall++;
+ /* Fall through */
- /* Fall through - and extract the message of the day */
+ /* extract the message of the day */
case 7:
_debug("extract motd");
ret = afs_extract_data(call, false);
_debug("motd '%s'", p);
call->unmarshall++;
-
/* Fall through */
+
case 8:
break;
}
case 0:
afs_extract_to_tmp(call);
call->unmarshall++;
+ /* Fall through */
/* Extract the file status count and array in two steps */
- /* Fall through */
case 1:
_debug("extract status count");
ret = afs_extract_data(call, true);
call->unmarshall++;
more_counts:
afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchStatus));
-
/* Fall through */
+
case 2:
_debug("extract status array %u", call->count);
ret = afs_extract_data(call, true);
call->count = 0;
call->unmarshall++;
afs_extract_to_tmp(call);
+ /* Fall through */
/* Extract the callback count and array in two steps */
- /* Fall through */
case 3:
_debug("extract CB count");
ret = afs_extract_data(call, true);
call->unmarshall++;
more_cbs:
afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSCallBack));
-
/* Fall through */
+
case 4:
_debug("extract CB array");
ret = afs_extract_data(call, true);
afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSVolSync));
call->unmarshall++;
-
/* Fall through */
+
case 5:
ret = afs_extract_data(call, false);
if (ret < 0)
xdr_decode_YFSVolSync(&bp, call->out_volsync);
call->unmarshall++;
-
/* Fall through */
+
case 6:
break;
}
case 0:
afs_extract_to_tmp(call);
call->unmarshall++;
+ /* Fall through */
/* Extract the file ACL length */
case 1:
iov_iter_discard(&call->iter, READ, size);
}
call->unmarshall++;
+ /* Fall through */
/* Extract the file ACL */
case 2:
afs_extract_to_tmp(call);
call->unmarshall++;
+ /* Fall through */
/* Extract the volume ACL length */
case 3:
iov_iter_discard(&call->iter, READ, size);
}
call->unmarshall++;
+ /* Fall through */
/* Extract the volume ACL */
case 4:
sizeof(struct yfs_xdr_YFSFetchStatus) +
sizeof(struct yfs_xdr_YFSVolSync));
call->unmarshall++;
+ /* Fall through */
/* extract the metadata */
case 5:
xdr_decode_YFSVolSync(&bp, call->out_volsync);
call->unmarshall++;
+ /* Fall through */
case 6:
break;
struct bio *bio;
bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
bool is_read = (iov_iter_rw(iter) == READ), is_sync;
+ bool nowait = (iocb->ki_flags & IOCB_NOWAIT) != 0;
loff_t pos = iocb->ki_pos;
blk_qc_t qc = BLK_QC_T_NONE;
- int ret = 0;
+ gfp_t gfp;
+ ssize_t ret;
if ((pos | iov_iter_alignment(iter)) &
(bdev_logical_block_size(bdev) - 1))
return -EINVAL;
- bio = bio_alloc_bioset(GFP_KERNEL, nr_pages, &blkdev_dio_pool);
+ if (nowait)
+ gfp = GFP_NOWAIT;
+ else
+ gfp = GFP_KERNEL;
+
+ bio = bio_alloc_bioset(gfp, nr_pages, &blkdev_dio_pool);
+ if (!bio)
+ return -EAGAIN;
dio = container_of(bio, struct blkdev_dio, bio);
dio->is_sync = is_sync = is_sync_kiocb(iocb);
if (!is_poll)
blk_start_plug(&plug);
+ ret = 0;
for (;;) {
+ int err;
+
bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = pos >> 9;
bio->bi_write_hint = iocb->ki_hint;
bio->bi_end_io = blkdev_bio_end_io;
bio->bi_ioprio = iocb->ki_ioprio;
- ret = bio_iov_iter_get_pages(bio, iter);
- if (unlikely(ret)) {
+ err = bio_iov_iter_get_pages(bio, iter);
+ if (unlikely(err)) {
+ if (!ret)
+ ret = err;
bio->bi_status = BLK_STS_IOERR;
bio_endio(bio);
break;
task_io_account_write(bio->bi_iter.bi_size);
}
+ /*
+ * Tell underlying layer to not block for resource shortage.
+ * And if we would have blocked, return error inline instead
+ * of through the bio->bi_end_io() callback.
+ */
+ if (nowait)
+ bio->bi_opf |= (REQ_NOWAIT | REQ_NOWAIT_INLINE);
+
dio->size += bio->bi_iter.bi_size;
pos += bio->bi_iter.bi_size;
}
qc = submit_bio(bio);
+ if (qc == BLK_QC_T_EAGAIN) {
+ if (!ret)
+ ret = -EAGAIN;
+ goto error;
+ }
if (polled)
WRITE_ONCE(iocb->ki_cookie, qc);
atomic_inc(&dio->ref);
}
- submit_bio(bio);
- bio = bio_alloc(GFP_KERNEL, nr_pages);
+ qc = submit_bio(bio);
+ if (qc == BLK_QC_T_EAGAIN) {
+ if (!ret)
+ ret = -EAGAIN;
+ goto error;
+ }
+ ret += bio->bi_iter.bi_size;
+
+ bio = bio_alloc(gfp, nr_pages);
+ if (!bio) {
+ if (!ret)
+ ret = -EAGAIN;
+ goto error;
+ }
}
if (!is_poll)
}
__set_current_state(TASK_RUNNING);
+out:
if (!ret)
ret = blk_status_to_errno(dio->bio.bi_status);
- if (likely(!ret))
- ret = dio->size;
bio_put(&dio->bio);
return ret;
+error:
+ if (!is_poll)
+ blk_finish_plug(&plug);
+ goto out;
}
static ssize_t
tristate "Btrfs filesystem support"
select CRYPTO
select CRYPTO_CRC32C
+ select LIBCRC32C
select ZLIB_INFLATE
select ZLIB_DEFLATE
select LZO_COMPRESS
percpu_counter_destroy(&fs_info->dev_replace.bio_counter);
cleanup_srcu_struct(&fs_info->subvol_srcu);
+ btrfs_free_csum_hash(fs_info);
btrfs_free_stripe_hash_table(fs_info);
btrfs_free_ref_cache(fs_info);
}
return 0;
}
+/*
+ * Check if the inode has flags compatible with compression
+ */
+static inline bool inode_can_compress(struct inode *inode)
+{
+ if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW ||
+ BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
+ return false;
+ return true;
+}
+
+/*
+ * Check if the inode needs to be submitted to compression, based on mount
+ * options, defragmentation, properties or heuristics.
+ */
static inline int inode_need_compress(struct inode *inode, u64 start, u64 end)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ if (!inode_can_compress(inode)) {
+ WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
+ KERN_ERR "BTRFS: unexpected compression for ino %llu\n",
+ btrfs_ino(BTRFS_I(inode)));
+ return 0;
+ }
/* force compress */
if (btrfs_test_opt(fs_info, FORCE_COMPRESS))
return 1;
} else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) {
ret = run_delalloc_nocow(inode, locked_page, start, end,
page_started, 0, nr_written);
- } else if (!inode_need_compress(inode, start, end)) {
+ } else if (!inode_can_compress(inode) ||
+ !inode_need_compress(inode, start, end)) {
ret = cow_file_range(inode, locked_page, start, end, end,
page_started, nr_written, 1, NULL);
} else {
if (blockers) {
btrfs_assert_no_spinning_writers(eb);
eb->blocking_writers--;
- /* Use the lighter barrier after atomic */
- smp_mb__after_atomic();
- cond_wake_up_nomb(&eb->write_lock_wq);
+ /*
+ * We need to order modifying blocking_writers above with
+ * actually waking up the sleepers to ensure they see the
+ * updated value of blocking_writers
+ */
+ cond_wake_up(&eb->write_lock_wq);
} else {
btrfs_assert_spinning_writers_put(eb);
write_unlock(&eb->lock);
struct extent_state **cached_state)
{
struct btrfs_ordered_extent *ordered;
- struct extent_state *cachedp = NULL;
+ struct extent_state *cache = NULL;
+ struct extent_state **cachedp = &cache;
if (cached_state)
- cachedp = *cached_state;
+ cachedp = cached_state;
while (1) {
- lock_extent_bits(tree, start, end, &cachedp);
+ lock_extent_bits(tree, start, end, cachedp);
ordered = btrfs_lookup_ordered_range(inode, start,
end - start + 1);
if (!ordered) {
* aren't exposing it outside of this function
*/
if (!cached_state)
- refcount_dec(&cachedp->refs);
+ refcount_dec(&cache->refs);
break;
}
- unlock_extent_cached(tree, start, end, &cachedp);
+ unlock_extent_cached(tree, start, end, cachedp);
btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
}
u64 stripe_len;
u64 raid56_full_stripe_start = (u64)-1;
int data_stripes;
+ int ret = 0;
ASSERT(op != BTRFS_MAP_DISCARD);
btrfs_crit(fs_info,
"stripe math has gone wrong, stripe_offset=%llu offset=%llu start=%llu logical=%llu stripe_len=%llu",
stripe_offset, offset, em->start, logical, stripe_len);
- free_extent_map(em);
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
/* stripe_offset is the offset of this block in its stripe */
io_geom->stripe_offset = stripe_offset;
io_geom->raid56_stripe_offset = raid56_full_stripe_start;
- return 0;
+out:
+ /* once for us */
+ free_extent_map(em);
+ return ret;
}
static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
membarrier_execve(current);
rseq_execve(current);
acct_update_integrals(current);
- task_numa_free(current);
+ task_numa_free(current, false);
free_bprm(bprm);
kfree(pathbuf);
if (filename)
struct file *file;
off_t io_end;
- size_t io_pages;
+ size_t io_len;
};
struct io_ring_ctx {
#define REQ_F_IO_DRAIN 16 /* drain existing IO first */
#define REQ_F_IO_DRAINED 32 /* drain done */
#define REQ_F_LINK 64 /* linked sqes */
-#define REQ_F_FAIL_LINK 128 /* fail rest of links */
+#define REQ_F_LINK_DONE 128 /* linked sqes done */
+#define REQ_F_FAIL_LINK 256 /* fail rest of links */
u64 user_data;
u32 result;
u32 sequence;
if ((req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN)
return false;
- return req->sequence > ctx->cached_cq_tail + ctx->sq_ring->dropped;
+ return req->sequence != ctx->cached_cq_tail + ctx->sq_ring->dropped;
}
static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
nxt->flags |= REQ_F_LINK;
}
+ nxt->flags |= REQ_F_LINK_DONE;
INIT_WORK(&nxt->work, io_sq_wq_submit_work);
queue_work(req->ctx->sqo_wq, &nxt->work);
}
*/
offset = buf_addr - imu->ubuf;
iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len);
- if (offset)
- iov_iter_advance(iter, offset);
+
+ if (offset) {
+ /*
+ * Don't use iov_iter_advance() here, as it's really slow for
+ * using the latter parts of a big fixed buffer - it iterates
+ * over each segment manually. We can cheat a bit here, because
+ * we know that:
+ *
+ * 1) it's a BVEC iter, we set it up
+ * 2) all bvecs are PAGE_SIZE in size, except potentially the
+ * first and last bvec
+ *
+ * So just find our index, and adjust the iterator afterwards.
+ * If the offset is within the first bvec (or the whole first
+ * bvec, just use iov_iter_advance(). This makes it easier
+ * since we can just skip the first segment, which may not
+ * be PAGE_SIZE aligned.
+ */
+ const struct bio_vec *bvec = imu->bvec;
+
+ if (offset <= bvec->bv_len) {
+ iov_iter_advance(iter, offset);
+ } else {
+ unsigned long seg_skip;
+
+ /* skip first vec */
+ offset -= bvec->bv_len;
+ seg_skip = 1 + (offset >> PAGE_SHIFT);
+
+ iter->bvec = bvec + seg_skip;
+ iter->nr_segs -= seg_skip;
+ iter->count -= (seg_skip << PAGE_SHIFT);
+ iter->iov_offset = offset & ~PAGE_MASK;
+ if (iter->iov_offset)
+ iter->count -= iter->iov_offset;
+ }
+ }
+
return 0;
}
off_t io_end = kiocb->ki_pos + len;
if (filp == async_list->file && kiocb->ki_pos == async_list->io_end) {
- unsigned long max_pages;
+ unsigned long max_bytes;
/* Use 8x RA size as a decent limiter for both reads/writes */
- max_pages = filp->f_ra.ra_pages;
- if (!max_pages)
- max_pages = VM_READAHEAD_PAGES;
- max_pages *= 8;
-
- /* If max pages are exceeded, reset the state */
- len >>= PAGE_SHIFT;
- if (async_list->io_pages + len <= max_pages) {
+ max_bytes = filp->f_ra.ra_pages << (PAGE_SHIFT + 3);
+ if (!max_bytes)
+ max_bytes = VM_READAHEAD_PAGES << (PAGE_SHIFT + 3);
+
+ /* If max len are exceeded, reset the state */
+ if (async_list->io_len + len <= max_bytes) {
req->flags |= REQ_F_SEQ_PREV;
- async_list->io_pages += len;
+ async_list->io_len += len;
} else {
io_end = 0;
- async_list->io_pages = 0;
+ async_list->io_len = 0;
}
}
/* New file? Reset state. */
if (async_list->file != filp) {
- async_list->io_pages = 0;
+ async_list->io_len = 0;
async_list->file = filp;
}
async_list->io_end = io_end;
INIT_LIST_HEAD(&poll->wait.entry);
init_waitqueue_func_entry(&poll->wait, io_poll_wake);
+ INIT_LIST_HEAD(&req->list);
+
mask = vfs_poll(poll->file, &ipt.pt) & poll->events;
spin_lock_irq(&ctx->completion_lock);
/* async context always use a copy of the sqe */
kfree(sqe);
+ /* req from defer and link list needn't decrease async cnt */
+ if (req->flags & (REQ_F_IO_DRAINED | REQ_F_LINK_DONE))
+ goto out;
+
if (!async_list)
break;
if (!list_empty(&req_list)) {
}
}
+out:
if (cur_mm) {
set_fs(old_fs);
unuse_mm(cur_mm);
ret = true;
spin_lock(&list->lock);
list_add_tail(&req->list, &list->list);
+ /*
+ * Ensure we see a simultaneous modification from io_sq_wq_submit_work()
+ */
+ smp_mb();
if (!atomic_read(&list->cnt)) {
list_del_init(&req->list);
ret = false;
-# SPDX-License-Identifier: GPL-2.0-or-newer
+# SPDX-License-Identifier: GPL-2.0-or-later
#
# Copyright (c) 2019 Oracle.
# All Rights Reserved.
p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
disconnect = disconnect_mount(p, how);
-
if (mnt_has_parent(p)) {
mnt_add_count(p->mnt_parent, -1);
if (!disconnect) {
list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
} else {
umount_mnt(p);
- hlist_add_head(&p->mnt_umount, &unmounted);
}
}
change_mnt_propagation(p, MS_PRIVATE);
+ if (disconnect)
+ hlist_add_head(&p->mnt_umount, &unmounted);
}
}
override_cred->cap_permitted;
}
+ /*
+ * The new set of credentials can *only* be used in
+ * task-synchronous circumstances, and does not need
+ * RCU freeing, unless somebody then takes a separate
+ * reference to it.
+ *
+ * NOTE! This is _only_ true because this credential
+ * is used purely for override_creds() that installs
+ * it as the subjective cred. Other threads will be
+ * accessing ->real_cred, not the subjective cred.
+ *
+ * If somebody _does_ make a copy of this (using the
+ * 'get_current_cred()' function), that will clear the
+ * non_rcu field, because now that other user may be
+ * expecting RCU freeing. But normal thread-synchronous
+ * cred accesses will keep things non-RCY.
+ */
+ override_cred->non_rcu = 1;
+
old_cred = override_creds(override_cred);
retry:
res = user_path_at(dfd, filename, lookup_flags, &path);
static inline int
arch_futex_atomic_op_inuser(int op, u32 oparg, int *oval, u32 __user *uaddr)
{
- int oldval = 0, ret;
-
- pagefault_disable();
-
- switch (op) {
- case FUTEX_OP_SET:
- case FUTEX_OP_ADD:
- case FUTEX_OP_OR:
- case FUTEX_OP_ANDN:
- case FUTEX_OP_XOR:
- default:
- ret = -ENOSYS;
- }
-
- pagefault_enable();
-
- if (!ret)
- *oval = oldval;
-
- return ret;
+ return -ENOSYS;
}
static inline int
extern struct blkcg blkcg_root;
extern struct cgroup_subsys_state * const blkcg_root_css;
+extern bool blkcg_debug_stats;
struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg,
struct request_queue *q, bool update_hint);
__REQ_RAHEAD, /* read ahead, can fail anytime */
__REQ_BACKGROUND, /* background IO */
__REQ_NOWAIT, /* Don't wait if request will block */
+ __REQ_NOWAIT_INLINE, /* Return would-block error inline */
/*
* When a shared kthread needs to issue a bio for a cgroup, doing
* so synchronously can lead to priority inversions as the kthread
#define REQ_RAHEAD (1ULL << __REQ_RAHEAD)
#define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND)
#define REQ_NOWAIT (1ULL << __REQ_NOWAIT)
+#define REQ_NOWAIT_INLINE (1ULL << __REQ_NOWAIT_INLINE)
#define REQ_CGROUP_PUNT (1ULL << __REQ_CGROUP_PUNT)
#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP)
typedef unsigned int blk_qc_t;
#define BLK_QC_T_NONE -1U
+#define BLK_QC_T_EAGAIN -2U
#define BLK_QC_T_SHIFT 16
#define BLK_QC_T_INTERNAL (1U << 31)
static inline bool blk_qc_t_valid(blk_qc_t cookie)
{
- return cookie != BLK_QC_T_NONE;
+ return cookie != BLK_QC_T_NONE && cookie != BLK_QC_T_EAGAIN;
}
static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie)
u32 seq, groups;
struct sock *nls;
- void (*input) (struct sk_buff *skb);
struct cn_queue_dev *cbdev;
};
struct user_struct *user; /* real user ID subscription */
struct user_namespace *user_ns; /* user_ns the caps and keyrings are relative to. */
struct group_info *group_info; /* supplementary groups for euid/fsgid */
- struct rcu_head rcu; /* RCU deletion hook */
+ /* RCU deletion */
+ union {
+ int non_rcu; /* Can we skip RCU deletion? */
+ struct rcu_head rcu; /* RCU deletion hook */
+ };
} __randomize_layout;
extern void __put_cred(struct cred *);
if (!cred)
return cred;
validate_creds(cred);
+ nonconst_cred->non_rcu = 0;
return get_new_cred(nonconst_cred);
}
if (!atomic_inc_not_zero(&nonconst_cred->usage))
return NULL;
validate_creds(cred);
+ nonconst_cred->non_rcu = 0;
return cred;
}
* This identifies the device type and carries type-specific
* information.
* @mutex: Mutex to synchronize calls to its driver.
+ * @lockdep_mutex: An optional debug lock that a subsystem can use as a
+ * peer lock to gain localized lockdep coverage of the device_lock.
* @bus: Type of bus device is on.
* @driver: Which driver has allocated this
* @platform_data: Platform data specific to the device.
core doesn't touch it */
void *driver_data; /* Driver data, set and get with
dev_set_drvdata/dev_get_drvdata */
+#ifdef CONFIG_PROVE_LOCKING
+ struct mutex lockdep_mutex;
+#endif
struct mutex mutex; /* mutex to synchronize calls to
* its driver.
*/
*/
extern struct device *get_device(struct device *dev);
extern void put_device(struct device *dev);
+extern bool kill_device(struct device *dev);
#ifdef CONFIG_DEVTMPFS
extern int devtmpfs_create_node(struct device *dev);
*/
static inline bool dma_addressing_limited(struct device *dev)
{
- return min_not_zero(*dev->dma_mask, dev->bus_dma_mask) <
- dma_get_required_mask(dev);
+ return min_not_zero(dma_get_mask(dev), dev->bus_dma_mask) <
+ dma_get_required_mask(dev);
}
#ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS
struct request *(*dispatch_request)(struct blk_mq_hw_ctx *);
bool (*has_work)(struct blk_mq_hw_ctx *);
void (*completed_request)(struct request *, u64);
- void (*started_request)(struct request *);
void (*requeue_request)(struct request *);
struct request *(*former_request)(struct request_queue *, struct request *);
struct request *(*next_request)(struct request_queue *, struct request *);
void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to);
void init_iova_domain(struct iova_domain *iovad, unsigned long granule,
unsigned long start_pfn);
+bool has_iova_flush_queue(struct iova_domain *iovad);
int init_iova_flush_queue(struct iova_domain *iovad,
iova_flush_cb flush_cb, iova_entry_dtor entry_dtor);
struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn);
{
}
+static inline bool has_iova_flush_queue(struct iova_domain *iovad)
+{
+ return false;
+}
+
static inline int init_iova_flush_queue(struct iova_domain *iovad,
iova_flush_cb flush_cb,
iova_entry_dtor entry_dtor)
/* SPDX-License-Identifier: GPL-2.0-only */
/****************************************************************************
- * ip_conntrack_h323_asn1.h - BER and PER decoding library for H.323
- * conntrack/NAT module.
+ * BER and PER decoding library for H.323 conntrack/NAT module.
*
* Copyright (c) 2006 by Jing Min Zhao <zhaojingmin@users.sourceforge.net>
*
}
/**
- * of_property_read_bool - Findfrom a property
+ * of_property_read_bool - Find a property
* @np: device node from which the property value is to be read.
* @propname: name of the property to be searched.
*
u64 last_sum_exec_runtime;
struct callback_head numa_work;
- struct numa_group *numa_group;
+ /*
+ * This pointer is only modified for current in syscall and
+ * pagefault context (and for tasks being destroyed), so it can be read
+ * from any of the following contexts:
+ * - RCU read-side critical section
+ * - current->numa_group from everywhere
+ * - task's runqueue locked, task not running
+ */
+ struct numa_group __rcu *numa_group;
/*
* numa_faults is an array split into four regions:
extern void task_numa_fault(int last_node, int node, int pages, int flags);
extern pid_t task_numa_group_id(struct task_struct *p);
extern void set_numabalancing_state(bool enabled);
-extern void task_numa_free(struct task_struct *p);
+extern void task_numa_free(struct task_struct *p, bool final);
extern bool should_numa_migrate_memory(struct task_struct *p, struct page *page,
int src_nid, int dst_cpu);
#else
static inline void set_numabalancing_state(bool enabled)
{
}
-static inline void task_numa_free(struct task_struct *p)
+static inline void task_numa_free(struct task_struct *p, bool final)
{
}
static inline bool should_numa_migrate_memory(struct task_struct *p,
return !list_empty(&wq_head->head);
}
+/**
+ * wq_has_single_sleeper - check if there is only one sleeper
+ * @wq_head: wait queue head
+ *
+ * Returns true of wq_head has only one sleeper on the list.
+ *
+ * Please refer to the comment for waitqueue_active.
+ */
+static inline bool wq_has_single_sleeper(struct wait_queue_head *wq_head)
+{
+ return list_is_singular(&wq_head->head);
+}
+
/**
* wq_has_sleeper - check if there are any waiting processes
* @wq_head: wait queue head
u8 rx_nss;
};
-#define VENDOR_CMD_RAW_DATA ((const struct nla_policy *)ERR_PTR(-ENODATA))
+#define VENDOR_CMD_RAW_DATA ((const struct nla_policy *)(long)(-ENODATA))
/**
* struct wiphy_vendor_command - vendor command definition
#define _NET_FLOW_OFFLOAD_H
#include <linux/kernel.h>
+#include <linux/list.h>
#include <net/flow_dissector.h>
-#include <net/sch_generic.h>
struct flow_match {
struct flow_dissector *dissector;
FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS,
};
+struct flow_block {
+ struct list_head cb_list;
+};
+
struct netlink_ext_ack;
struct flow_block_offload {
enum flow_block_binder_type binder_type;
bool block_shared;
struct net *net;
+ struct flow_block *block;
struct list_head cb_list;
struct list_head *driver_block_list;
struct netlink_ext_ack *extack;
};
+enum tc_setup_type;
+typedef int flow_setup_cb_t(enum tc_setup_type type, void *type_data,
+ void *cb_priv);
+
struct flow_block_cb {
struct list_head driver_list;
struct list_head list;
- struct net *net;
- tc_setup_cb_t *cb;
+ flow_setup_cb_t *cb;
void *cb_ident;
void *cb_priv;
void (*release)(void *cb_priv);
unsigned int refcnt;
};
-struct flow_block_cb *flow_block_cb_alloc(struct net *net, tc_setup_cb_t *cb,
+struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb,
void *cb_ident, void *cb_priv,
void (*release)(void *cb_priv));
void flow_block_cb_free(struct flow_block_cb *block_cb);
-struct flow_block_cb *flow_block_cb_lookup(struct flow_block_offload *offload,
- tc_setup_cb_t *cb, void *cb_ident);
+struct flow_block_cb *flow_block_cb_lookup(struct flow_block *block,
+ flow_setup_cb_t *cb, void *cb_ident);
void *flow_block_cb_priv(struct flow_block_cb *block_cb);
void flow_block_cb_incref(struct flow_block_cb *block_cb);
list_move(&block_cb->list, &offload->cb_list);
}
-bool flow_block_cb_is_busy(tc_setup_cb_t *cb, void *cb_ident,
+bool flow_block_cb_is_busy(flow_setup_cb_t *cb, void *cb_ident,
struct list_head *driver_block_list);
int flow_block_cb_setup_simple(struct flow_block_offload *f,
- struct list_head *driver_list, tc_setup_cb_t *cb,
+ struct list_head *driver_list,
+ flow_setup_cb_t *cb,
void *cb_ident, void *cb_priv, bool ingress_only);
enum flow_cls_command {
return flow_cmd->rule;
}
+static inline void flow_block_init(struct flow_block *flow_block)
+{
+ INIT_LIST_HEAD(&flow_block->cb_list);
+}
+
#endif /* _NET_FLOW_OFFLOAD_H */
#define NF_CT_EXPECT_CLASS_DEFAULT 0
#define NF_CT_EXPECT_MAX_CNT 255
+/* Allow to reuse expectations with the same tuples from different master
+ * conntracks.
+ */
+#define NF_CT_EXP_F_SKIP_MASTER 0x1
+
int nf_conntrack_expect_pernet_init(struct net *net);
void nf_conntrack_expect_pernet_fini(struct net *net);
u_int8_t, const __be16 *, const __be16 *);
void nf_ct_expect_put(struct nf_conntrack_expect *exp);
int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
- u32 portid, int report);
-static inline int nf_ct_expect_related(struct nf_conntrack_expect *expect)
+ u32 portid, int report, unsigned int flags);
+static inline int nf_ct_expect_related(struct nf_conntrack_expect *expect,
+ unsigned int flags)
{
- return nf_ct_expect_related_report(expect, 0, 0);
+ return nf_ct_expect_related_report(expect, 0, 0, flags);
}
#endif /*_NF_CONNTRACK_EXPECT_H*/
u8 options;
u8 wscale;
u16 mss;
+ u16 mss_encode;
u32 tsval;
u32 tsecr;
};
#include <linux/rhashtable.h>
#include <net/netfilter/nf_flow_table.h>
#include <net/netlink.h>
+#include <net/flow_offload.h>
struct module;
* @stats: per-cpu chain stats
* @chain: the chain
* @dev_name: device name that this base chain is attached to (if any)
- * @cb_list: list of flow block callbacks (for hardware offload)
+ * @flow_block: flow block (for hardware offload)
*/
struct nft_base_chain {
struct nf_hook_ops ops;
struct nft_stats __percpu *stats;
struct nft_chain chain;
char dev_name[IFNAMSIZ];
- struct list_head cb_list;
+ struct flow_block flow_block;
};
static inline struct nft_base_chain *nft_base_chain(const struct nft_chain *chain)
#include <linux/workqueue.h>
#include <net/sch_generic.h>
#include <net/act_api.h>
-#include <net/flow_offload.h>
#include <net/net_namespace.h>
/* TC action not accessible from user space */
}
static inline
-int tc_setup_cb_block_register(struct tcf_block *block, tc_setup_cb_t *cb,
+int tc_setup_cb_block_register(struct tcf_block *block, flow_setup_cb_t *cb,
void *cb_priv)
{
return 0;
}
static inline
-void tc_setup_cb_block_unregister(struct tcf_block *block, tc_setup_cb_t *cb,
+void tc_setup_cb_block_unregister(struct tcf_block *block, flow_setup_cb_t *cb,
void *cb_priv)
{
}
#include <linux/mutex.h>
#include <net/gen_stats.h>
#include <net/rtnetlink.h>
+#include <net/flow_offload.h>
struct Qdisc_ops;
struct qdisc_walker;
struct module;
struct bpf_flow_keys;
-typedef int tc_setup_cb_t(enum tc_setup_type type,
- void *type_data, void *cb_priv);
-
typedef int tc_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv,
enum tc_setup_type type, void *type_data);
void (*walk)(struct tcf_proto *tp,
struct tcf_walker *arg, bool rtnl_held);
int (*reoffload)(struct tcf_proto *tp, bool add,
- tc_setup_cb_t *cb, void *cb_priv,
+ flow_setup_cb_t *cb, void *cb_priv,
struct netlink_ext_ack *extack);
void (*bind_class)(void *, u32, unsigned long);
void * (*tmplt_create)(struct net *net,
refcount_t refcnt;
struct net *net;
struct Qdisc *q;
- struct list_head cb_list;
+ struct flow_block flow_block;
struct list_head owner_list;
bool keep_dst;
unsigned int offloadcnt; /* Number of oddloaded filters */
return skb_rb_first(&sk->tcp_rtx_queue);
}
+static inline struct sk_buff *tcp_rtx_queue_tail(const struct sock *sk)
+{
+ return skb_rb_last(&sk->tcp_rtx_queue);
+}
+
static inline struct sk_buff *tcp_write_queue_head(const struct sock *sk)
{
return skb_peek(&sk->sk_write_queue);
* FIP tunable parameters.
*/
#define FCOE_CTLR_START_DELAY 2000 /* mS after first adv. to choose FCF */
-#define FCOE_CTRL_SOL_TOV 2000 /* min. solicitation interval (mS) */
+#define FCOE_CTLR_SOL_TOV 2000 /* min. solicitation interval (mS) */
#define FCOE_CTLR_FCF_LIMIT 20 /* max. number of FCF entries */
#define FCOE_CTLR_VN2VN_LOGIN_LIMIT 3 /* max. VN2VN rport login retries */
/* Structure that defines QE firmware binary files.
*
- * See Documentation/powerpc/qe_firmware.txt for a description of these
+ * See Documentation/powerpc/qe_firmware.rst for a description of these
* fields.
*/
struct qe_firmware {
if (snd_BUG_ON(!stream))
return;
- if (stream->direction == SND_COMPRESS_PLAYBACK)
- stream->runtime->state = SNDRV_PCM_STATE_SETUP;
- else
- stream->runtime->state = SNDRV_PCM_STATE_PREPARED;
+ stream->runtime->state = SNDRV_PCM_STATE_SETUP;
wake_up(&stream->runtime->sleep);
}
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
-/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
+/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
/*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _UAPI_LINUX_BPFILTER_H
#define _UAPI_LINUX_BPFILTER_H
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* Copyright (c) 2015-2018, Intel Corporation.
*/
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* Intel Speed Select Interface: OS to hardware Interface
* Copyright (c) 2019, Intel Corporation.
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _NF_SYNPROXY_H
#define _NF_SYNPROXY_H
#define NL80211_HT_CAPABILITY_LEN 26
#define NL80211_VHT_CAPABILITY_LEN 12
#define NL80211_HE_MIN_CAPABILITY_LEN 16
-#define NL80211_HE_MAX_CAPABILITY_LEN 51
+#define NL80211_HE_MAX_CAPABILITY_LEN 54
#define NL80211_MAX_NR_CIPHER_SUITES 5
#define NL80211_MAX_NR_AKM_SUITES 2
-/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
/*
* Userspace interface for AMD Secure Encrypted Virtualization (SEV)
* platform management commands.
-/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* SPDX-License-Identifier: GPL-2.0-or-later WITH Linux-syscall-note */
/* Types and definitions for AF_RXRPC.
*
* Copyright (C) 2007 Red Hat, Inc. All Rights Reserved.
#define PORT_PNX8XXX 70
-/* Hilscher netx */
-#define PORT_NETX 71
-
/* SUN4V Hypervisor Console */
#define PORT_SUNHV 72
-/* SPDX-License-Identifier: GPL-2.0+ */
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
/*
* g_uvc.h -- USB Video Class Gadget driver API
*
-/* SPDX-License-Identifier: (GPL-2.0 OR CDDL-1.0) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR CDDL-1.0) */
/*
* Virtual Device for Guest <-> VMM/Host communication, type definitions
* which are also used for the vboxguest ioctl interface / by vboxsf
-/* SPDX-License-Identifier: (GPL-2.0 OR CDDL-1.0) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR CDDL-1.0) */
/*
* VBoxGuest - VirtualBox Guest Additions Driver Interface.
*
#define V4L2_PIX_FMT_RGBX444 v4l2_fourcc('R', 'X', '1', '2') /* 16 rrrrgggg bbbbxxxx */
#define V4L2_PIX_FMT_ABGR444 v4l2_fourcc('A', 'B', '1', '2') /* 16 aaaabbbb ggggrrrr */
#define V4L2_PIX_FMT_XBGR444 v4l2_fourcc('X', 'B', '1', '2') /* 16 xxxxbbbb ggggrrrr */
-#define V4L2_PIX_FMT_BGRA444 v4l2_fourcc('B', 'A', '1', '2') /* 16 bbbbgggg rrrraaaa */
+
+/*
+ * Originally this had 'BA12' as fourcc, but this clashed with the older
+ * V4L2_PIX_FMT_SGRBG12 which inexplicably used that same fourcc.
+ * So use 'GA12' instead for V4L2_PIX_FMT_BGRA444.
+ */
+#define V4L2_PIX_FMT_BGRA444 v4l2_fourcc('G', 'A', '1', '2') /* 16 bbbbgggg rrrraaaa */
#define V4L2_PIX_FMT_BGRX444 v4l2_fourcc('B', 'X', '1', '2') /* 16 bbbbgggg rrrrxxxx */
#define V4L2_PIX_FMT_RGB555 v4l2_fourcc('R', 'G', 'B', 'O') /* 16 RGB-5-5-5 */
#define V4L2_PIX_FMT_ARGB555 v4l2_fourcc('A', 'R', '1', '5') /* 16 ARGB-1-5-5-5 */
-/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause */
/*
* Definitions for virtio-pmem devices.
*
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _UAPI_VMCORE_H
#define _UAPI_VMCORE_H
-/* SPDX-License-Identifier: GPL-2.0-only */
+/* SPDX-License-Identifier: GPL-2.0-only WITH Linux-syscall-note */
/*
* User API methods for ACPI-WMI mapping driver
*
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef __QCOM_FASTRPC_H__
#define __QCOM_FASTRPC_H__
-/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) */
+/* SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) */
/*
* This file contains defines, structures, etc. that are used
-/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/* SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) or BSD-3-Clause */
/* Authors: Bernard Metzler <bmt@zurich.ibm.com> */
/* Copyright (c) 2008-2019, IBM Corporation */
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* UFS Transport SGIO v4 BSG Message Support
*
-/* SPDX-License-Identifier: GPL-2.0 */
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
/*
* skl-tplg-interface.h - Intel DSP FW private data interface
*
Select this if you are building a kernel for a desktop system.
-config PREEMPT_LL
+config PREEMPT
bool "Preemptible Kernel (Low-Latency Desktop)"
depends on !ARCH_NO_PREEMPT
- select PREEMPT
+ select PREEMPTION
select UNINLINE_SPIN_UNLOCK if !ARCH_INLINE_SPIN_UNLOCK
help
This option reduces the latency of the kernel by making
config PREEMPT_RT
bool "Fully Preemptible Kernel (Real-Time)"
depends on EXPERT && ARCH_SUPPORTS_RT
- select PREEMPT
+ select PREEMPTION
help
This option turns the kernel into a real-time kernel by replacing
various locking primitives (spinlocks, rwlocks, etc.) with
config PREEMPT_COUNT
bool
-config PREEMPT
+config PREEMPTION
bool
select PREEMPT_COUNT
BUG_ON(cred == current->cred);
BUG_ON(cred == current->real_cred);
- call_rcu(&cred->rcu, put_cred_rcu);
+ if (cred->non_rcu)
+ put_cred_rcu(&cred->rcu);
+ else
+ call_rcu(&cred->rcu, put_cred_rcu);
}
EXPORT_SYMBOL(__put_cred);
old = task->cred;
memcpy(new, old, sizeof(struct cred));
+ new->non_rcu = 0;
atomic_set(&new->usage, 1);
set_cred_subscribers(new, 0);
get_group_info(new->group_info);
validate_creds(old);
validate_creds(new);
- get_cred(new);
+
+ /*
+ * NOTE! This uses 'get_new_cred()' rather than 'get_cred()'.
+ *
+ * That means that we do not clear the 'non_rcu' flag, since
+ * we are only installing the cred into the thread-synchronous
+ * '->cred' pointer, not the '->real_cred' pointer that is
+ * visible to other threads under RCU.
+ *
+ * Also note that we did validate_creds() manually, not depending
+ * on the validation in 'get_cred()'.
+ */
+ get_new_cred((struct cred *)new);
alter_cred_subscribers(new, 1);
rcu_assign_pointer(current->cred, new);
alter_cred_subscribers(old, -1);
validate_creds(old);
*new = *old;
+ new->non_rcu = 0;
atomic_set(&new->usage, 1);
set_cred_subscribers(new, 0);
get_uid(new->user);
goto err_unlock;
}
- perf_install_in_context(ctx, event, cpu);
+ perf_install_in_context(ctx, event, event->cpu);
perf_unpin_context(ctx);
mutex_unlock(&ctx->mutex);
if (group_dead)
kill_orphaned_pgrp(tsk->group_leader, NULL);
+ tsk->exit_state = EXIT_ZOMBIE;
if (unlikely(tsk->ptrace)) {
int sig = thread_group_leader(tsk) &&
thread_group_empty(tsk) &&
WARN_ON(tsk == current);
cgroup_free(tsk);
- task_numa_free(tsk);
+ task_numa_free(tsk, true);
security_task_free(tsk);
exit_creds(tsk);
delayacct_tsk_free(tsk);
unsigned long nr_stack_trace_entries;
-#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
+#ifdef CONFIG_PROVE_LOCKING
/*
* Stack-trace: tightly packed array of stack backtrace
* addresses. Protected by the graph_lock.
DEFINE_PER_CPU(struct lockdep_stats, lockdep_stats);
#endif
-#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
+#ifdef CONFIG_PROVE_LOCKING
/*
* Locking printouts:
*/
#endif
}
-#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
+#ifdef CONFIG_PROVE_LOCKING
static int mark_lock(struct task_struct *curr, struct held_lock *this,
enum lock_usage_bit new_bit);
return ret;
}
-#else /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */
+#else /* CONFIG_PROVE_LOCKING */
static inline int
mark_usage(struct task_struct *curr, struct held_lock *hlock, int check)
return 0;
}
-#endif /* defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) */
+#endif /* CONFIG_PROVE_LOCKING */
/*
* Initialize a lock instance's lock-class mapping info:
*/
static void check_flags(unsigned long flags)
{
-#if defined(CONFIG_PROVE_LOCKING) && defined(CONFIG_DEBUG_LOCKDEP) && \
- defined(CONFIG_TRACE_IRQFLAGS)
+#if defined(CONFIG_PROVE_LOCKING) && defined(CONFIG_DEBUG_LOCKDEP)
if (!debug_locks)
return;
static int lockdep_stats_show(struct seq_file *m, void *v)
{
- struct lock_class *class;
unsigned long nr_unused = 0, nr_uncategorized = 0,
nr_irq_safe = 0, nr_irq_unsafe = 0,
nr_softirq_safe = 0, nr_softirq_unsafe = 0,
sum_forward_deps = 0;
#ifdef CONFIG_PROVE_LOCKING
+ struct lock_class *class;
+
list_for_each_entry(class, &all_lock_classes, lock_entry) {
if (class->usage_mask == 0)
might_sleep();
+#ifdef CONFIG_DEBUG_MUTEXES
+ DEBUG_LOCKS_WARN_ON(lock->magic != lock);
+#endif
+
ww = container_of(lock, struct ww_mutex, base);
if (use_ww_ctx && ww_ctx) {
if (unlikely(ww_ctx == READ_ONCE(ww->ctx)))
*/
int __sched mutex_trylock(struct mutex *lock)
{
- bool locked = __mutex_trylock(lock);
+ bool locked;
+
+#ifdef CONFIG_DEBUG_MUTEXES
+ DEBUG_LOCKS_WARN_ON(lock->magic != lock);
+#endif
+ locked = __mutex_trylock(lock);
if (locked)
mutex_acquire(&lock->dep_map, 0, 1, _RET_IP_);
preempt_disable();
rcu_read_lock();
owner = rwsem_owner_flags(sem, &flags);
- if ((flags & nonspinnable) || (owner && !owner_on_cpu(owner)))
+ /*
+ * Don't check the read-owner as the entry may be stale.
+ */
+ if ((flags & nonspinnable) ||
+ (owner && !(flags & RWSEM_READER_OWNED) && !owner_on_cpu(owner)))
ret = false;
rcu_read_unlock();
preempt_enable();
atomic_long_add(-RWSEM_READER_BIAS, &sem->count);
adjustment = 0;
if (rwsem_optimistic_spin(sem, false)) {
+ /* rwsem_optimistic_spin() implies ACQUIRE on success */
/*
* Wake up other readers in the wait list if the front
* waiter is a reader.
}
return sem;
} else if (rwsem_reader_phase_trylock(sem, waiter.last_rowner)) {
+ /* rwsem_reader_phase_trylock() implies ACQUIRE on success */
return sem;
}
*/
if (adjustment && !(atomic_long_read(&sem->count) &
(RWSEM_WRITER_MASK | RWSEM_FLAG_HANDOFF))) {
+ /* Provide lock ACQUIRE */
+ smp_acquire__after_ctrl_dep();
raw_spin_unlock_irq(&sem->wait_lock);
rwsem_set_reader_owned(sem);
lockevent_inc(rwsem_rlock_fast);
wake_up_q(&wake_q);
/* wait to be given the lock */
- while (true) {
+ for (;;) {
set_current_state(state);
- if (!waiter.task)
+ if (!smp_load_acquire(&waiter.task)) {
+ /* Matches rwsem_mark_wake()'s smp_store_release(). */
break;
+ }
if (signal_pending_state(state, current)) {
raw_spin_lock_irq(&sem->wait_lock);
if (waiter.task)
goto out_nolock;
raw_spin_unlock_irq(&sem->wait_lock);
+ /* Ordered by sem->wait_lock against rwsem_mark_wake(). */
break;
}
schedule();
__set_current_state(TASK_RUNNING);
lockevent_inc(rwsem_rlock);
return sem;
+
out_nolock:
list_del(&waiter.list);
if (list_empty(&sem->wait_list)) {
/* do optimistic spinning and steal lock if possible */
if (rwsem_can_spin_on_owner(sem, RWSEM_WR_NONSPINNABLE) &&
- rwsem_optimistic_spin(sem, true))
+ rwsem_optimistic_spin(sem, true)) {
+ /* rwsem_optimistic_spin() implies ACQUIRE on success */
return sem;
+ }
/*
* Disable reader optimistic spinning for this rwsem after
wait:
/* wait until we successfully acquire the lock */
set_current_state(state);
- while (true) {
- if (rwsem_try_write_lock(sem, wstate))
+ for (;;) {
+ if (rwsem_try_write_lock(sem, wstate)) {
+ /* rwsem_try_write_lock() implies ACQUIRE on success */
break;
+ }
raw_spin_unlock_irq(&sem->wait_lock);
unsigned long faults[0];
};
+/*
+ * For functions that can be called in multiple contexts that permit reading
+ * ->numa_group (see struct task_struct for locking rules).
+ */
+static struct numa_group *deref_task_numa_group(struct task_struct *p)
+{
+ return rcu_dereference_check(p->numa_group, p == current ||
+ (lockdep_is_held(&task_rq(p)->lock) && !READ_ONCE(p->on_cpu)));
+}
+
+static struct numa_group *deref_curr_numa_group(struct task_struct *p)
+{
+ return rcu_dereference_protected(p->numa_group, p == current);
+}
+
static inline unsigned long group_faults_priv(struct numa_group *ng);
static inline unsigned long group_faults_shared(struct numa_group *ng);
{
unsigned long smin = task_scan_min(p);
unsigned long period = smin;
+ struct numa_group *ng;
/* Scale the maximum scan period with the amount of shared memory. */
- if (p->numa_group) {
- struct numa_group *ng = p->numa_group;
+ rcu_read_lock();
+ ng = rcu_dereference(p->numa_group);
+ if (ng) {
unsigned long shared = group_faults_shared(ng);
unsigned long private = group_faults_priv(ng);
period *= shared + 1;
period /= private + shared + 1;
}
+ rcu_read_unlock();
return max(smin, period);
}
{
unsigned long smin = task_scan_min(p);
unsigned long smax;
+ struct numa_group *ng;
/* Watch for min being lower than max due to floor calculations */
smax = sysctl_numa_balancing_scan_period_max / task_nr_scan_windows(p);
/* Scale the maximum scan period with the amount of shared memory. */
- if (p->numa_group) {
- struct numa_group *ng = p->numa_group;
+ ng = deref_curr_numa_group(p);
+ if (ng) {
unsigned long shared = group_faults_shared(ng);
unsigned long private = group_faults_priv(ng);
unsigned long period = smax;
p->numa_scan_period = sysctl_numa_balancing_scan_delay;
p->numa_work.next = &p->numa_work;
p->numa_faults = NULL;
- p->numa_group = NULL;
+ RCU_INIT_POINTER(p->numa_group, NULL);
p->last_task_numa_placement = 0;
p->last_sum_exec_runtime = 0;
pid_t task_numa_group_id(struct task_struct *p)
{
- return p->numa_group ? p->numa_group->gid : 0;
+ struct numa_group *ng;
+ pid_t gid = 0;
+
+ rcu_read_lock();
+ ng = rcu_dereference(p->numa_group);
+ if (ng)
+ gid = ng->gid;
+ rcu_read_unlock();
+
+ return gid;
}
/*
static inline unsigned long group_faults(struct task_struct *p, int nid)
{
- if (!p->numa_group)
+ struct numa_group *ng = deref_task_numa_group(p);
+
+ if (!ng)
return 0;
- return p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 0)] +
- p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 1)];
+ return ng->faults[task_faults_idx(NUMA_MEM, nid, 0)] +
+ ng->faults[task_faults_idx(NUMA_MEM, nid, 1)];
}
static inline unsigned long group_faults_cpu(struct numa_group *group, int nid)
static inline unsigned long group_weight(struct task_struct *p, int nid,
int dist)
{
+ struct numa_group *ng = deref_task_numa_group(p);
unsigned long faults, total_faults;
- if (!p->numa_group)
+ if (!ng)
return 0;
- total_faults = p->numa_group->total_faults;
+ total_faults = ng->total_faults;
if (!total_faults)
return 0;
bool should_numa_migrate_memory(struct task_struct *p, struct page * page,
int src_nid, int dst_cpu)
{
- struct numa_group *ng = p->numa_group;
+ struct numa_group *ng = deref_curr_numa_group(p);
int dst_nid = cpu_to_node(dst_cpu);
int last_cpupid, this_cpupid;
static void task_numa_compare(struct task_numa_env *env,
long taskimp, long groupimp, bool maymove)
{
+ struct numa_group *cur_ng, *p_ng = deref_curr_numa_group(env->p);
struct rq *dst_rq = cpu_rq(env->dst_cpu);
+ long imp = p_ng ? groupimp : taskimp;
struct task_struct *cur;
long src_load, dst_load;
- long load;
- long imp = env->p->numa_group ? groupimp : taskimp;
- long moveimp = imp;
int dist = env->dist;
+ long moveimp = imp;
+ long load;
if (READ_ONCE(dst_rq->numa_migrate_on))
return;
* If dst and source tasks are in the same NUMA group, or not
* in any group then look only at task weights.
*/
- if (cur->numa_group == env->p->numa_group) {
+ cur_ng = rcu_dereference(cur->numa_group);
+ if (cur_ng == p_ng) {
imp = taskimp + task_weight(cur, env->src_nid, dist) -
task_weight(cur, env->dst_nid, dist);
/*
* Add some hysteresis to prevent swapping the
* tasks within a group over tiny differences.
*/
- if (cur->numa_group)
+ if (cur_ng)
imp -= imp / 16;
} else {
/*
* Compare the group weights. If a task is all by itself
* (not part of a group), use the task weight instead.
*/
- if (cur->numa_group && env->p->numa_group)
+ if (cur_ng && p_ng)
imp += group_weight(cur, env->src_nid, dist) -
group_weight(cur, env->dst_nid, dist);
else
.best_imp = 0,
.best_cpu = -1,
};
+ unsigned long taskweight, groupweight;
struct sched_domain *sd;
+ long taskimp, groupimp;
+ struct numa_group *ng;
struct rq *best_rq;
- unsigned long taskweight, groupweight;
int nid, ret, dist;
- long taskimp, groupimp;
/*
* Pick the lowest SD_NUMA domain, as that would have the smallest
* multiple NUMA nodes; in order to better consolidate the group,
* we need to check other locations.
*/
- if (env.best_cpu == -1 || (p->numa_group && p->numa_group->active_nodes > 1)) {
+ ng = deref_curr_numa_group(p);
+ if (env.best_cpu == -1 || (ng && ng->active_nodes > 1)) {
for_each_online_node(nid) {
if (nid == env.src_nid || nid == p->numa_preferred_nid)
continue;
* A task that migrated to a second choice node will be better off
* trying for a better one later. Do not set the preferred node here.
*/
- if (p->numa_group) {
+ if (ng) {
if (env.best_cpu == -1)
nid = env.src_nid;
else
unsigned long total_faults;
u64 runtime, period;
spinlock_t *group_lock = NULL;
+ struct numa_group *ng;
/*
* The p->mm->numa_scan_seq field gets updated without
runtime = numa_get_avg_runtime(p, &period);
/* If the task is part of a group prevent parallel updates to group stats */
- if (p->numa_group) {
- group_lock = &p->numa_group->lock;
+ ng = deref_curr_numa_group(p);
+ if (ng) {
+ group_lock = &ng->lock;
spin_lock_irq(group_lock);
}
p->numa_faults[cpu_idx] += f_diff;
faults += p->numa_faults[mem_idx];
p->total_numa_faults += diff;
- if (p->numa_group) {
+ if (ng) {
/*
* safe because we can only change our own group
*
* nid and priv in a specific region because it
* is at the beginning of the numa_faults array.
*/
- p->numa_group->faults[mem_idx] += diff;
- p->numa_group->faults_cpu[mem_idx] += f_diff;
- p->numa_group->total_faults += diff;
- group_faults += p->numa_group->faults[mem_idx];
+ ng->faults[mem_idx] += diff;
+ ng->faults_cpu[mem_idx] += f_diff;
+ ng->total_faults += diff;
+ group_faults += ng->faults[mem_idx];
}
}
- if (!p->numa_group) {
+ if (!ng) {
if (faults > max_faults) {
max_faults = faults;
max_nid = nid;
}
}
- if (p->numa_group) {
- numa_group_count_active_nodes(p->numa_group);
+ if (ng) {
+ numa_group_count_active_nodes(ng);
spin_unlock_irq(group_lock);
max_nid = preferred_group_nid(p, max_nid);
}
int cpu = cpupid_to_cpu(cpupid);
int i;
- if (unlikely(!p->numa_group)) {
+ if (unlikely(!deref_curr_numa_group(p))) {
unsigned int size = sizeof(struct numa_group) +
4*nr_node_ids*sizeof(unsigned long);
if (!grp)
goto no_join;
- my_grp = p->numa_group;
+ my_grp = deref_curr_numa_group(p);
if (grp == my_grp)
goto no_join;
return;
}
-void task_numa_free(struct task_struct *p)
+/*
+ * Get rid of NUMA staticstics associated with a task (either current or dead).
+ * If @final is set, the task is dead and has reached refcount zero, so we can
+ * safely free all relevant data structures. Otherwise, there might be
+ * concurrent reads from places like load balancing and procfs, and we should
+ * reset the data back to default state without freeing ->numa_faults.
+ */
+void task_numa_free(struct task_struct *p, bool final)
{
- struct numa_group *grp = p->numa_group;
- void *numa_faults = p->numa_faults;
+ /* safe: p either is current or is being freed by current */
+ struct numa_group *grp = rcu_dereference_raw(p->numa_group);
+ unsigned long *numa_faults = p->numa_faults;
unsigned long flags;
int i;
+ if (!numa_faults)
+ return;
+
if (grp) {
spin_lock_irqsave(&grp->lock, flags);
for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
put_numa_group(grp);
}
- p->numa_faults = NULL;
- kfree(numa_faults);
+ if (final) {
+ p->numa_faults = NULL;
+ kfree(numa_faults);
+ } else {
+ p->total_numa_faults = 0;
+ for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
+ numa_faults[i] = 0;
+ }
}
/*
* actively using should be counted as local. This allows the
* scan rate to slow down when a workload has settled down.
*/
- ng = p->numa_group;
+ ng = deref_curr_numa_group(p);
if (!priv && !local && ng && ng->active_nodes > 1 &&
numa_is_active_node(cpu_node, ng) &&
numa_is_active_node(mem_node, ng))
{
int node;
unsigned long tsf = 0, tpf = 0, gsf = 0, gpf = 0;
+ struct numa_group *ng;
+ rcu_read_lock();
+ ng = rcu_dereference(p->numa_group);
for_each_online_node(node) {
if (p->numa_faults) {
tsf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 0)];
tpf = p->numa_faults[task_faults_idx(NUMA_MEM, node, 1)];
}
- if (p->numa_group) {
- gsf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 0)],
- gpf = p->numa_group->faults[task_faults_idx(NUMA_MEM, node, 1)];
+ if (ng) {
+ gsf = ng->faults[task_faults_idx(NUMA_MEM, node, 0)],
+ gpf = ng->faults[task_faults_idx(NUMA_MEM, node, 1)];
}
print_numa_stats(m, node, tsf, tpf, gsf, gpf);
}
+ rcu_read_unlock();
}
#endif /* CONFIG_NUMA_BALANCING */
#endif /* CONFIG_SCHED_DEBUG */
return -ENOMEM;
rc = __test_firmware_config_init();
- if (rc)
+ if (rc) {
+ kfree(test_fw_config);
+ pr_err("could not init firmware test config: %d\n", rc);
return rc;
+ }
rc = misc_register(&test_fw_misc_device);
if (rc) {
if (unlikely(valist == NULL))
return false;
+ /*
+ * First make sure the mappings are removed from all page-tables
+ * before they are freed.
+ */
+ vmalloc_sync_all();
+
/*
* TODO: to calculate a flush range without looping.
* The list can be up to lazy_max_pages() elements.
/*
* Implement a stub for vmalloc_sync_all() if the architecture chose not to
* have one.
+ *
+ * The purpose of this function is to make sure the vmalloc area
+ * mappings are identical in all page-tables in the system.
*/
void __weak vmalloc_sync_all(void)
{
menuconfig NF_TABLES_BRIDGE
depends on BRIDGE && NETFILTER && NF_TABLES
select NETFILTER_FAMILY_BRIDGE
- bool "Ethernet Bridge nf_tables support"
+ tristate "Ethernet Bridge nf_tables support"
if NF_TABLES_BRIDGE
tristate "Bridge packet logging"
select NF_LOG_COMMON
+endif # NF_TABLES_BRIDGE
+
config NF_CONNTRACK_BRIDGE
tristate "IPv4/IPV6 bridge connection tracking support"
depends on NF_CONNTRACK
To compile it as a module, choose M here. If unsure, say N.
-endif # NF_TABLES_BRIDGE
-
menuconfig BRIDGE_NF_EBTABLES
tristate "Ethernet Bridge tables (ebtables) support"
depends on BRIDGE && NETFILTER && NETFILTER_XTABLES
}
EXPORT_SYMBOL(flow_rule_match_enc_opts);
-struct flow_block_cb *flow_block_cb_alloc(struct net *net, tc_setup_cb_t *cb,
+struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb,
void *cb_ident, void *cb_priv,
void (*release)(void *cb_priv))
{
if (!block_cb)
return ERR_PTR(-ENOMEM);
- block_cb->net = net;
block_cb->cb = cb;
block_cb->cb_ident = cb_ident;
block_cb->cb_priv = cb_priv;
}
EXPORT_SYMBOL(flow_block_cb_free);
-struct flow_block_cb *flow_block_cb_lookup(struct flow_block_offload *f,
- tc_setup_cb_t *cb, void *cb_ident)
+struct flow_block_cb *flow_block_cb_lookup(struct flow_block *block,
+ flow_setup_cb_t *cb, void *cb_ident)
{
struct flow_block_cb *block_cb;
- list_for_each_entry(block_cb, f->driver_block_list, driver_list) {
- if (block_cb->net == f->net &&
- block_cb->cb == cb &&
+ list_for_each_entry(block_cb, &block->cb_list, list) {
+ if (block_cb->cb == cb &&
block_cb->cb_ident == cb_ident)
return block_cb;
}
}
EXPORT_SYMBOL(flow_block_cb_decref);
-bool flow_block_cb_is_busy(tc_setup_cb_t *cb, void *cb_ident,
+bool flow_block_cb_is_busy(flow_setup_cb_t *cb, void *cb_ident,
struct list_head *driver_block_list)
{
struct flow_block_cb *block_cb;
int flow_block_cb_setup_simple(struct flow_block_offload *f,
struct list_head *driver_block_list,
- tc_setup_cb_t *cb, void *cb_ident, void *cb_priv,
+ flow_setup_cb_t *cb,
+ void *cb_ident, void *cb_priv,
bool ingress_only)
{
struct flow_block_cb *block_cb;
if (flow_block_cb_is_busy(cb, cb_ident, driver_block_list))
return -EBUSY;
- block_cb = flow_block_cb_alloc(f->net, cb, cb_ident,
- cb_priv, NULL);
+ block_cb = flow_block_cb_alloc(cb, cb_ident, cb_priv, NULL);
if (IS_ERR(block_cb))
return PTR_ERR(block_cb);
list_add_tail(&block_cb->driver_list, driver_block_list);
return 0;
case FLOW_BLOCK_UNBIND:
- block_cb = flow_block_cb_lookup(f, cb, cb_ident);
+ block_cb = flow_block_cb_lookup(f->block, cb, cb_ident);
if (!block_cb)
return -ENOENT;
struct flow_block_offload *f)
{
struct flow_block_cb *block_cb;
- tc_setup_cb_t *cb;
+ flow_setup_cb_t *cb;
if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
cb = dsa_slave_setup_tc_block_cb_ig;
if (flow_block_cb_is_busy(cb, dev, &dsa_slave_block_cb_list))
return -EBUSY;
- block_cb = flow_block_cb_alloc(f->net, cb, dev, dev, NULL);
+ block_cb = flow_block_cb_alloc(cb, dev, dev, NULL);
if (IS_ERR(block_cb))
return PTR_ERR(block_cb);
list_add_tail(&block_cb->driver_list, &dsa_slave_block_cb_list);
return 0;
case FLOW_BLOCK_UNBIND:
- block_cb = flow_block_cb_lookup(f, cb, dev);
+ block_cb = flow_block_cb_lookup(f->block, cb, dev);
if (!block_cb)
return -ENOENT;
ctinfo == IP_CT_RELATED_REPLY))
return XT_CONTINUE;
- /* ip_conntrack_icmp guarantees us that we only have ICMP_ECHO,
- * TIMESTAMP, INFO_REQUEST or ADDRESS type icmp packets from here
+ /* nf_conntrack_proto_icmp guarantees us that we only have ICMP_ECHO,
+ * TIMESTAMP, INFO_REQUEST or ICMP_ADDRESS type icmp packets from here
* on, which all have an ID field [relevant for hashing]. */
hash = clusterip_hashfn(skb, cipinfo->config);
opts.options |= XT_SYNPROXY_OPT_ECN;
opts.options &= info->options;
+ opts.mss_encode = opts.mss;
+ opts.mss = info->mss;
if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
synproxy_init_timestamp_cookie(info, &opts);
else
flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0;
flow.flowi4_tos = RT_TOS(iph->tos);
flow.flowi4_scope = RT_SCOPE_UNIVERSE;
+ flow.flowi4_oif = l3mdev_master_ifindex_rcu(xt_in(par));
return rpfilter_lookup_reverse(xt_net(par), &flow, xt_in(par), info->flags) ^ invert;
}
int ret;
rtp_exp->tuple.dst.u.udp.port = htons(nated_port);
- ret = nf_ct_expect_related(rtp_exp);
+ ret = nf_ct_expect_related(rtp_exp, 0);
if (ret == 0) {
rtcp_exp->tuple.dst.u.udp.port =
htons(nated_port + 1);
- ret = nf_ct_expect_related(rtcp_exp);
+ ret = nf_ct_expect_related(rtcp_exp, 0);
if (ret == 0)
break;
else if (ret == -EBUSY) {
int ret;
exp->tuple.dst.u.tcp.port = htons(nated_port);
- ret = nf_ct_expect_related(exp);
+ ret = nf_ct_expect_related(exp, 0);
if (ret == 0)
break;
else if (ret != -EBUSY) {
int ret;
exp->tuple.dst.u.tcp.port = htons(nated_port);
- ret = nf_ct_expect_related(exp);
+ ret = nf_ct_expect_related(exp, 0);
if (ret == 0)
break;
else if (ret != -EBUSY) {
int ret;
exp->tuple.dst.u.tcp.port = htons(nated_port);
- ret = nf_ct_expect_related(exp);
+ ret = nf_ct_expect_related(exp, 0);
if (ret == 0)
break;
else if (ret != -EBUSY) {
int ret;
exp->tuple.dst.u.tcp.port = htons(nated_port);
- ret = nf_ct_expect_related(exp);
+ ret = nf_ct_expect_related(exp, 0);
if (ret == 0)
break;
else if (ret != -EBUSY) {
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *buff;
int nsize, old_factor;
+ long limit;
int nlen;
u8 flags;
if (nsize < 0)
nsize = 0;
- if (unlikely((sk->sk_wmem_queued >> 1) > sk->sk_sndbuf &&
- tcp_queue != TCP_FRAG_IN_WRITE_QUEUE)) {
+ /* tcp_sendmsg() can overshoot sk_wmem_queued by one full size skb.
+ * We need some allowance to not penalize applications setting small
+ * SO_SNDBUF values.
+ * Also allow first and last skb in retransmit queue to be split.
+ */
+ limit = sk->sk_sndbuf + 2 * SKB_TRUESIZE(GSO_MAX_SIZE);
+ if (unlikely((sk->sk_wmem_queued >> 1) > limit &&
+ tcp_queue != TCP_FRAG_IN_WRITE_QUEUE &&
+ skb != tcp_rtx_queue_head(sk) &&
+ skb != tcp_rtx_queue_tail(sk))) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPWQUEUETOOBIG);
return -ENOMEM;
}
opts.options |= XT_SYNPROXY_OPT_ECN;
opts.options &= info->options;
+ opts.mss_encode = opts.mss;
+ opts.mss = info->mss;
if (opts.options & XT_SYNPROXY_OPT_TIMESTAMP)
synproxy_init_timestamp_cookie(info, &opts);
else
if (rpfilter_addr_linklocal(&iph->saddr)) {
lookup_flags |= RT6_LOOKUP_F_IFACE;
fl6.flowi6_oif = dev->ifindex;
- } else if ((flags & XT_RPFILTER_LOOSE) == 0)
+ /* Set flowi6_oif for vrf devices to lookup route in l3mdev domain. */
+ } else if (netif_is_l3_master(dev) || netif_is_l3_slave(dev) ||
+ (flags & XT_RPFILTER_LOOSE) == 0)
fl6.flowi6_oif = dev->ifindex;
rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags);
goto out;
}
- if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE))
+ if (rt->rt6i_idev->dev == dev ||
+ l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) == dev->ifindex ||
+ (flags & XT_RPFILTER_LOOSE))
ret = true;
out:
ip6_rt_put(rt);
err = ieee80211_set_probe_resp(sdata, params->probe_resp,
params->probe_resp_len, csa);
- if (err < 0)
+ if (err < 0) {
+ kfree(new);
return err;
+ }
if (err == 0)
changed |= BSS_CHANGED_AP_PROBE_RESP;
params->civicloc,
params->civicloc_len);
- if (err < 0)
+ if (err < 0) {
+ kfree(new);
return err;
+ }
changed |= BSS_CHANGED_FTM_RESPONDER;
}
if (!check_sdata_in_driver(sdata))
return -EIO;
- if (WARN_ONCE(params->cw_min == 0 ||
- params->cw_min > params->cw_max,
- "%s: invalid CW_min/CW_max: %d/%d\n",
- sdata->name, params->cw_min, params->cw_max))
+ if (params->cw_min == 0 || params->cw_min > params->cw_max) {
+ /*
+ * If we can't configure hardware anyway, don't warn. We may
+ * never have initialized the CW parameters.
+ */
+ WARN_ONCE(local->ops->conf_tx,
+ "%s: invalid CW_min/CW_max: %d/%d\n",
+ sdata->name, params->cw_min, params->cw_max);
return -EINVAL;
+ }
trace_drv_conf_tx(local, sdata, ac, params);
if (local->ops->conf_tx)
of Network Address Translation on them.
This is FTP support on Layer 3 independent connection tracking.
- Layer 3 independent connection tracking is experimental scheme
- which generalize ip_conntrack to support other layer 3 protocols.
To compile it as a module, choose M here. If unsure, say N.
help
SIP is an application-layer control protocol that can establish,
modify, and terminate multimedia sessions (conferences) such as
- Internet telephony calls. With the ip_conntrack_sip and
+ Internet telephony calls. With the nf_conntrack_sip and
the nf_nat_sip modules you can support the protocol on a connection
tracking/NATing firewall.
depends on NETFILTER_ADVANCED
help
Helper matching allows you to match packets in dynamic connections
- tracked by a conntrack-helper, ie. ip_conntrack_ftp
+ tracked by a conntrack-helper, ie. nf_conntrack_ftp
To compile it as a module, choose M here. If unsure, say Y.
IP_VS_DBG_BUF(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n",
__func__, ct, ARG_TUPLE(&exp->tuple));
- nf_ct_expect_related(exp);
+ nf_ct_expect_related(exp, 0);
nf_ct_expect_put(exp);
}
EXPORT_SYMBOL(ip_vs_nfct_expect_related);
if (nf_nat_amanda && ct->status & IPS_NAT_MASK)
ret = nf_nat_amanda(skb, ctinfo, protoff,
off - dataoff, len, exp);
- else if (nf_ct_expect_related(exp) != 0) {
+ else if (nf_ct_expect_related(exp, 0) != 0) {
nf_ct_helper_log(skb, ct, "cannot add expectation");
ret = NF_DROP;
}
exp->class = NF_CT_EXPECT_CLASS_DEFAULT;
exp->helper = NULL;
- nf_ct_expect_related(exp);
+ nf_ct_expect_related(exp, 0);
nf_ct_expect_put(exp);
nf_ct_refresh(ct, skb, timeout * HZ);
#include <linux/netfilter/nfnetlink_conntrack.h>
#include <linux/mutex.h>
-/* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
- * in ip_conntrack_core, since we don't want the protocols to autoload
- * or depend on ctnetlink */
+/* Generic function for tcp/udp/sctp/dccp and alike. */
int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
const struct nf_conntrack_tuple *tuple)
{
static inline int expect_matches(const struct nf_conntrack_expect *a,
const struct nf_conntrack_expect *b)
{
- return a->master == b->master &&
- nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
+ return nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
net_eq(nf_ct_net(a->master), nf_ct_net(b->master)) &&
nf_ct_zone_equal_any(a->master, nf_ct_zone(b->master));
}
+static bool master_matches(const struct nf_conntrack_expect *a,
+ const struct nf_conntrack_expect *b,
+ unsigned int flags)
+{
+ if (flags & NF_CT_EXP_F_SKIP_MASTER)
+ return true;
+
+ return a->master == b->master;
+}
+
/* Generally a bad idea to call this: could have matched already. */
void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
{
nf_ct_remove_expect(last);
}
-static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
+static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect,
+ unsigned int flags)
{
const struct nf_conntrack_expect_policy *p;
struct nf_conntrack_expect *i;
}
h = nf_ct_expect_dst_hash(net, &expect->tuple);
hlist_for_each_entry_safe(i, next, &nf_ct_expect_hash[h], hnode) {
- if (expect_matches(i, expect)) {
- if (i->class != expect->class)
+ if (master_matches(i, expect, flags) &&
+ expect_matches(i, expect)) {
+ if (i->class != expect->class ||
+ i->master != expect->master)
return -EALREADY;
if (nf_ct_remove_expect(i))
}
int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
- u32 portid, int report)
+ u32 portid, int report, unsigned int flags)
{
int ret;
spin_lock_bh(&nf_conntrack_expect_lock);
- ret = __nf_ct_expect_check(expect);
+ ret = __nf_ct_expect_check(expect, flags);
if (ret < 0)
goto out;
protoff, matchoff, matchlen, exp);
else {
/* Can't expect this? Best to drop packet now. */
- if (nf_ct_expect_related(exp) != 0) {
+ if (nf_ct_expect_related(exp, 0) != 0) {
nf_ct_helper_log(skb, ct, "cannot add expectation");
ret = NF_DROP;
} else
// SPDX-License-Identifier: GPL-2.0-only
/*
- * ip_conntrack_helper_h323_asn1.c - BER and PER decoding library for H.323
- * conntrack/NAT module.
+ * BER and PER decoding library for H.323 conntrack/NAT module.
*
* Copyright (c) 2006 by Jing Min Zhao <zhaojingmin@users.sourceforge.net>
*
- * See ip_conntrack_helper_h323_asn1.h for details.
+ * See nf_conntrack_helper_h323_asn1.h for details.
*/
#ifdef __KERNEL__
ret = nat_rtp_rtcp(skb, ct, ctinfo, protoff, data, dataoff,
taddr, port, rtp_port, rtp_exp, rtcp_exp);
} else { /* Conntrack only */
- if (nf_ct_expect_related(rtp_exp) == 0) {
- if (nf_ct_expect_related(rtcp_exp) == 0) {
+ if (nf_ct_expect_related(rtp_exp, 0) == 0) {
+ if (nf_ct_expect_related(rtcp_exp, 0) == 0) {
pr_debug("nf_ct_h323: expect RTP ");
nf_ct_dump_tuple(&rtp_exp->tuple);
pr_debug("nf_ct_h323: expect RTCP ");
ret = nat_t120(skb, ct, ctinfo, protoff, data, dataoff, taddr,
port, exp);
} else { /* Conntrack only */
- if (nf_ct_expect_related(exp) == 0) {
+ if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_h323: expect T.120 ");
nf_ct_dump_tuple(&exp->tuple);
} else
ret = nat_h245(skb, ct, ctinfo, protoff, data, dataoff, taddr,
port, exp);
} else { /* Conntrack only */
- if (nf_ct_expect_related(exp) == 0) {
+ if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_q931: expect H.245 ");
nf_ct_dump_tuple(&exp->tuple);
} else
protoff, data, dataoff,
taddr, port, exp);
} else { /* Conntrack only */
- if (nf_ct_expect_related(exp) == 0) {
+ if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_q931: expect Call Forwarding ");
nf_ct_dump_tuple(&exp->tuple);
} else
ret = nat_q931(skb, ct, ctinfo, protoff, data,
taddr, i, port, exp);
} else { /* Conntrack only */
- if (nf_ct_expect_related(exp) == 0) {
+ if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect Q.931 ");
nf_ct_dump_tuple(&exp->tuple);
IPPROTO_UDP, NULL, &port);
exp->helper = nf_conntrack_helper_ras;
- if (nf_ct_expect_related(exp) == 0) {
+ if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect RAS ");
nf_ct_dump_tuple(&exp->tuple);
} else
exp->flags = NF_CT_EXPECT_PERMANENT;
exp->helper = nf_conntrack_helper_q931;
- if (nf_ct_expect_related(exp) == 0) {
+ if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect Q.931 ");
nf_ct_dump_tuple(&exp->tuple);
} else
exp->flags = NF_CT_EXPECT_PERMANENT;
exp->helper = nf_conntrack_helper_q931;
- if (nf_ct_expect_related(exp) == 0) {
+ if (nf_ct_expect_related(exp, 0) == 0) {
pr_debug("nf_ct_ras: expect Q.931 ");
nf_ct_dump_tuple(&exp->tuple);
} else
addr_beg_p - ib_ptr,
addr_end_p - addr_beg_p,
exp);
- else if (nf_ct_expect_related(exp) != 0) {
+ else if (nf_ct_expect_related(exp, 0) != 0) {
nf_ct_helper_log(skb, ct,
"cannot add expectation");
ret = NF_DROP;
if (IS_ERR(exp))
return PTR_ERR(exp);
- err = nf_ct_expect_related_report(exp, portid, report);
+ err = nf_ct_expect_related_report(exp, portid, report, 0);
nf_ct_expect_put(exp);
return err;
}
goto err_rcu;
}
- err = nf_ct_expect_related_report(exp, portid, report);
+ err = nf_ct_expect_related_report(exp, portid, report, 0);
nf_ct_expect_put(exp);
err_rcu:
rcu_read_unlock();
nf_nat_pptp_exp_gre = rcu_dereference(nf_nat_pptp_hook_exp_gre);
if (nf_nat_pptp_exp_gre && ct->status & IPS_NAT_MASK)
nf_nat_pptp_exp_gre(exp_orig, exp_reply);
- if (nf_ct_expect_related(exp_orig) != 0)
+ if (nf_ct_expect_related(exp_orig, 0) != 0)
goto out_put_both;
- if (nf_ct_expect_related(exp_reply) != 0)
+ if (nf_ct_expect_related(exp_reply, 0) != 0)
goto out_unexpect_orig;
/* Add GRE keymap entries */
// SPDX-License-Identifier: GPL-2.0-only
/*
- * ip_conntrack_proto_gre.c - Version 3.0
- *
* Connection tracking protocol helper module for GRE.
*
* GRE is a generic encapsulation protocol, which is generally not very
return -NF_ACCEPT;
}
- /* See ip_conntrack_proto_tcp.c */
+ /* See nf_conntrack_proto_tcp.c */
if (state->net->ct.sysctl_checksum &&
state->hook == NF_INET_PRE_ROUTING &&
nf_ip_checksum(skb, state->hook, dataoff, IPPROTO_ICMP)) {
struct ip_ct_tcp_state *receiver = &state->seen[!dir];
const struct nf_conntrack_tuple *tuple = &ct->tuplehash[dir].tuple;
__u32 seq, ack, sack, end, win, swin;
+ u16 win_raw;
s32 receiver_offset;
bool res, in_recv_win;
*/
seq = ntohl(tcph->seq);
ack = sack = ntohl(tcph->ack_seq);
- win = ntohs(tcph->window);
+ win_raw = ntohs(tcph->window);
+ win = win_raw;
end = segment_seq_plus_len(seq, skb->len, dataoff, tcph);
if (receiver->flags & IP_CT_TCP_FLAG_SACK_PERM)
&& state->last_seq == seq
&& state->last_ack == ack
&& state->last_end == end
- && state->last_win == win)
+ && state->last_win == win_raw)
state->retrans++;
else {
state->last_dir = dir;
state->last_seq = seq;
state->last_ack = ack;
state->last_end = end;
- state->last_win = win;
+ state->last_win = win_raw;
state->retrans = 0;
}
}
nf_ct_dump_tuple(&exp->tuple);
/* Can't expect this? Best to drop packet now. */
- if (nf_ct_expect_related(exp) != 0) {
+ if (nf_ct_expect_related(exp, 0) != 0) {
nf_ct_helper_log(skb, ct, "cannot add expectation");
ret = NF_DROP;
}
/* -EALREADY handling works around end-points that send
* SDP messages with identical port but different media type,
* we pretend expectation was set up.
+ * It also works in the case that SDP messages are sent with
+ * identical expect tuples but for different master conntracks.
*/
- int errp = nf_ct_expect_related(rtp_exp);
+ int errp = nf_ct_expect_related(rtp_exp,
+ NF_CT_EXP_F_SKIP_MASTER);
if (errp == 0 || errp == -EALREADY) {
- int errcp = nf_ct_expect_related(rtcp_exp);
+ int errcp = nf_ct_expect_related(rtcp_exp,
+ NF_CT_EXP_F_SKIP_MASTER);
if (errcp == 0 || errcp == -EALREADY)
ret = NF_ACCEPT;
ret = hooks->expect(skb, protoff, dataoff, dptr, datalen,
exp, matchoff, matchlen);
else {
- if (nf_ct_expect_related(exp) != 0) {
+ if (nf_ct_expect_related(exp, 0) != 0) {
nf_ct_helper_log(skb, ct, "cannot add expectation");
ret = NF_DROP;
} else
nf_nat_tftp = rcu_dereference(nf_nat_tftp_hook);
if (nf_nat_tftp && ct->status & IPS_NAT_MASK)
ret = nf_nat_tftp(skb, ctinfo, exp);
- else if (nf_ct_expect_related(exp) != 0) {
+ else if (nf_ct_expect_related(exp, 0) != 0) {
nf_ct_helper_log(skb, ct, "cannot add expectation");
ret = NF_DROP;
}
int res;
exp->tuple.dst.u.tcp.port = htons(port);
- res = nf_ct_expect_related(exp);
+ res = nf_ct_expect_related(exp, 0);
if (res == 0)
break;
else if (res != -EBUSY) {
* and NF_INET_LOCAL_OUT, we change the destination to map into the
* range. It might not be possible to get a unique tuple, but we try.
* At worst (or if we race), we will end up with a final duplicate in
- * __ip_conntrack_confirm and drop the packet. */
+ * __nf_conntrack_confirm and drop the packet. */
static void
get_unique_tuple(struct nf_conntrack_tuple *tuple,
const struct nf_conntrack_tuple *orig_tuple,
int ret;
exp->tuple.dst.u.tcp.port = htons(port);
- ret = nf_ct_expect_related(exp);
+ ret = nf_ct_expect_related(exp, 0);
if (ret == 0)
break;
else if (ret != -EBUSY) {
int ret;
exp->tuple.dst.u.tcp.port = htons(port);
- ret = nf_ct_expect_related(exp);
+ ret = nf_ct_expect_related(exp, 0);
if (ret == 0)
break;
else if (ret != -EBUSY) {
int ret;
exp->tuple.dst.u.udp.port = htons(port);
- ret = nf_ct_expect_related(exp);
+ ret = nf_ct_expect_related(exp, NF_CT_EXP_F_SKIP_MASTER);
if (ret == 0)
break;
else if (ret != -EBUSY) {
int ret;
rtp_exp->tuple.dst.u.udp.port = htons(port);
- ret = nf_ct_expect_related(rtp_exp);
+ ret = nf_ct_expect_related(rtp_exp,
+ NF_CT_EXP_F_SKIP_MASTER);
if (ret == -EBUSY)
continue;
else if (ret < 0) {
break;
}
rtcp_exp->tuple.dst.u.udp.port = htons(port + 1);
- ret = nf_ct_expect_related(rtcp_exp);
+ ret = nf_ct_expect_related(rtcp_exp,
+ NF_CT_EXP_F_SKIP_MASTER);
if (ret == 0)
break;
else if (ret == -EBUSY) {
= ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u.udp.port;
exp->dir = IP_CT_DIR_REPLY;
exp->expectfn = nf_nat_follow_master;
- if (nf_ct_expect_related(exp) != 0) {
+ if (nf_ct_expect_related(exp, 0) != 0) {
nf_ct_helper_log(skb, exp->master, "cannot add expectation");
return NF_DROP;
}
struct iphdr *iph, *niph;
struct tcphdr *nth;
unsigned int tcp_hdr_size;
- u16 mss = opts->mss;
+ u16 mss = opts->mss_encode;
iph = ip_hdr(skb);
state = &ct->proto.tcp;
switch (state->state) {
case TCP_CONNTRACK_CLOSE:
- if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ if (th->rst && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
ntohl(th->seq) + 1);
break;
struct ipv6hdr *iph, *niph;
struct tcphdr *nth;
unsigned int tcp_hdr_size;
- u16 mss = opts->mss;
+ u16 mss = opts->mss_encode;
iph = ipv6_hdr(skb);
state = &ct->proto.tcp;
switch (state->state) {
case TCP_CONNTRACK_CLOSE:
- if (th->rst && !test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ if (th->rst && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) {
nf_ct_seqadj_init(ct, ctinfo, synproxy->isn -
ntohl(th->seq) + 1);
break;
chain->flags |= NFT_BASE_CHAIN | flags;
basechain->policy = NF_ACCEPT;
- INIT_LIST_HEAD(&basechain->cb_list);
+ flow_block_init(&basechain->flow_block);
} else {
chain = kzalloc(sizeof(*chain), GFP_KERNEL);
if (chain == NULL)
if (nla[NFTA_CHAIN_FLAGS])
flags = ntohl(nla_get_be32(nla[NFTA_CHAIN_FLAGS]));
+ else if (chain)
+ flags = chain->flags;
nft_ctx_init(&ctx, net, skb, nlh, family, table, chain, nla);
struct flow_block_cb *block_cb;
int err;
- list_for_each_entry(block_cb, &basechain->cb_list, list) {
+ list_for_each_entry(block_cb, &basechain->flow_block.cb_list, list) {
err = block_cb->cb(type, type_data, block_cb->cb_priv);
if (err < 0)
return err;
static int nft_flow_offload_bind(struct flow_block_offload *bo,
struct nft_base_chain *basechain)
{
- list_splice(&bo->cb_list, &basechain->cb_list);
+ list_splice(&bo->cb_list, &basechain->flow_block.cb_list);
return 0;
}
return -EOPNOTSUPP;
bo.command = cmd;
+ bo.block = &basechain->flow_block;
bo.binder_type = FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS;
bo.extack = &extack;
INIT_LIST_HEAD(&bo.cb_list);
ss = nfnetlink_get_subsys(type << 8);
rcu_read_unlock();
if (!ss)
- request_module("nfnetlink-subsys-%d", type);
+ request_module_nowait("nfnetlink-subsys-%d", type);
return 0;
}
#endif
static inline void nft_chain_filter_inet_fini(void) {}
#endif /* CONFIG_NF_TABLES_IPV6 */
-#ifdef CONFIG_NF_TABLES_BRIDGE
+#if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE)
static unsigned int
nft_do_chain_bridge(void *priv,
struct sk_buff *skb,
#ifdef CONFIG_NF_TABLES_IPV6
MODULE_ALIAS_NFT_CHAIN(AF_INET6, "nat");
#endif
+#ifdef CONFIG_NF_TABLES_INET
+MODULE_ALIAS_NFT_CHAIN(1, "nat"); /* NFPROTO_INET */
+#endif
priv->l4proto, NULL, &priv->dport);
exp->timeout.expires = jiffies + priv->timeout * HZ;
- if (nf_ct_expect_related(exp) != 0)
+ if (nf_ct_expect_related(exp, 0) != 0)
regs->verdict.code = NF_DROP;
}
priv->dreg = nft_parse_register(tb[NFTA_HASH_DREG]);
priv->modulus = ntohl(nla_get_be32(tb[NFTA_HASH_MODULUS]));
- if (priv->modulus <= 1)
+ if (priv->modulus < 1)
return -ERANGE;
if (priv->offset + priv->modulus - 1 < priv->offset)
if (tb[NFTA_META_DREG] && tb[NFTA_META_SREG])
return ERR_PTR(-EINVAL);
-#ifdef CONFIG_NF_TABLES_BRIDGE
+#if IS_ENABLED(CONFIG_NF_TABLES_BRIDGE) && IS_MODULE(CONFIG_NFT_BRIDGE_META)
if (ctx->family == NFPROTO_BRIDGE)
return ERR_PTR(-EAGAIN);
#endif
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Arturo Borrero Gonzalez <arturo@debian.org>");
-MODULE_ALIAS_NFT_EXPR("nat");
+MODULE_ALIAS_NFT_EXPR("redir");
opts->options |= NF_SYNPROXY_OPT_ECN;
opts->options &= priv->info.options;
+ opts->mss_encode = opts->mss;
+ opts->mss = info->mss;
if (opts->options & NF_SYNPROXY_OPT_TIMESTAMP)
synproxy_init_timestamp_cookie(info, opts);
else
void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
const struct sk_buff *skb)
{
- struct flow_stats *stats;
+ struct sw_flow_stats *stats;
unsigned int cpu = smp_processor_id();
int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
if (likely(flow->stats_last_writer != -1) &&
likely(!rcu_access_pointer(flow->stats[cpu]))) {
/* Try to allocate CPU-specific stats. */
- struct flow_stats *new_stats;
+ struct sw_flow_stats *new_stats;
new_stats =
kmem_cache_alloc_node(flow_stats_cache,
/* We open code this to make sure cpu 0 is always considered */
for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
- struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
+ struct sw_flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
if (stats) {
/* Local CPU may write on non-local stats, so we must
/* We open code this to make sure cpu 0 is always considered */
for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
- struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
+ struct sw_flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
if (stats) {
spin_lock_bh(&stats->lock);
struct nlattr actions[];
};
-struct flow_stats {
+struct sw_flow_stats {
u64 packet_count; /* Number of packets matched. */
u64 byte_count; /* Number of bytes matched. */
unsigned long used; /* Last used time (in jiffies). */
struct cpumask cpu_used_mask;
struct sw_flow_mask *mask;
struct sw_flow_actions __rcu *sf_acts;
- struct flow_stats __rcu *stats[]; /* One for each CPU. First one
+ struct sw_flow_stats __rcu *stats[]; /* One for each CPU. First one
* is allocated at flow creation time,
* the rest are allocated on demand
* while holding the 'stats[0].lock'.
struct sw_flow *ovs_flow_alloc(void)
{
struct sw_flow *flow;
- struct flow_stats *stats;
+ struct sw_flow_stats *stats;
flow = kmem_cache_zalloc(flow_cache, GFP_KERNEL);
if (!flow)
for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask))
if (flow->stats[cpu])
kmem_cache_free(flow_stats_cache,
- (struct flow_stats __force *)flow->stats[cpu]);
+ (struct sw_flow_stats __force *)flow->stats[cpu]);
kmem_cache_free(flow_cache, flow);
}
flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
+ (nr_cpu_ids
- * sizeof(struct flow_stats *)),
+ * sizeof(struct sw_flow_stats *)),
0, 0, NULL);
if (flow_cache == NULL)
return -ENOMEM;
flow_stats_cache
- = kmem_cache_create("sw_flow_stats", sizeof(struct flow_stats),
+ = kmem_cache_create("sw_flow_stats", sizeof(struct sw_flow_stats),
0, SLAB_HWCACHE_ALIGN, NULL);
if (flow_stats_cache == NULL) {
kmem_cache_destroy(flow_cache);
if (!indr_dev->block)
return;
+ bo.block = &indr_dev->block->flow_block;
+
indr_block_cb->cb(indr_dev->dev, indr_block_cb->cb_priv, TC_SETUP_BLOCK,
&bo);
tcf_block_setup(indr_dev->block, &bo);
.command = command,
.binder_type = ei->binder_type,
.net = dev_net(dev),
+ .block = &block->flow_block,
.block_shared = tcf_block_shared(block),
.extack = extack,
};
bo.net = dev_net(dev);
bo.command = command;
bo.binder_type = ei->binder_type;
+ bo.block = &block->flow_block;
bo.block_shared = tcf_block_shared(block);
bo.extack = extack;
INIT_LIST_HEAD(&bo.cb_list);
return ERR_PTR(-ENOMEM);
}
mutex_init(&block->lock);
+ flow_block_init(&block->flow_block);
INIT_LIST_HEAD(&block->chain_list);
- INIT_LIST_HEAD(&block->cb_list);
INIT_LIST_HEAD(&block->owner_list);
INIT_LIST_HEAD(&block->chain0.filter_chain_list);
EXPORT_SYMBOL(tcf_block_put);
static int
-tcf_block_playback_offloads(struct tcf_block *block, tc_setup_cb_t *cb,
+tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
void *cb_priv, bool add, bool offload_in_use,
struct netlink_ext_ack *extack)
{
i++;
}
- list_splice(&bo->cb_list, &block->cb_list);
+ list_splice(&bo->cb_list, &block->flow_block.cb_list);
return 0;
tfilter_notify(net, skb, n, tp, block, q, parent, fh,
RTM_NEWTFILTER, false, rtnl_held);
tfilter_put(tp, fh);
- q->flags &= ~TCQ_F_CAN_BYPASS;
+ /* q pointer is NULL for shared blocks */
+ if (q)
+ q->flags &= ~TCQ_F_CAN_BYPASS;
}
errout:
if (block->nooffloaddevcnt && err_stop)
return -EOPNOTSUPP;
- list_for_each_entry(block_cb, &block->cb_list, list) {
+ list_for_each_entry(block_cb, &block->flow_block.cb_list, list) {
err = block_cb->cb(type, type_data, block_cb->cb_priv);
if (err) {
if (err_stop)
}
}
-static int cls_bpf_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+static int cls_bpf_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
void *cb_priv, struct netlink_ext_ack *extack)
{
struct cls_bpf_head *head = rtnl_dereference(tp->root);
return NULL;
}
-static int fl_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+static int fl_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
void *cb_priv, struct netlink_ext_ack *extack)
{
struct tcf_block *block = tp->chain->block;
arg->count++;
}
-static int mall_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
void *cb_priv, struct netlink_ext_ack *extack)
{
struct cls_mall_head *head = rtnl_dereference(tp->root);
}
static int u32_reoffload_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
- bool add, tc_setup_cb_t *cb, void *cb_priv,
+ bool add, flow_setup_cb_t *cb, void *cb_priv,
struct netlink_ext_ack *extack)
{
struct tc_cls_u32_offload cls_u32 = {};
}
static int u32_reoffload_knode(struct tcf_proto *tp, struct tc_u_knode *n,
- bool add, tc_setup_cb_t *cb, void *cb_priv,
+ bool add, flow_setup_cb_t *cb, void *cb_priv,
struct netlink_ext_ack *extack)
{
struct tc_u_hnode *ht = rtnl_dereference(n->ht_down);
return 0;
}
-static int u32_reoffload(struct tcf_proto *tp, bool add, tc_setup_cb_t *cb,
+static int u32_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb,
void *cb_priv, struct netlink_ext_ack *extack)
{
struct tc_u_common *tp_c = tp->data;
}
}
-/* tipc_toprsv_listener_data_ready - interrupt callback with connection request
+/* tipc_topsrv_listener_data_ready - interrupt callback with connection request
* The queued job is launched into tipc_topsrv_accept()
*/
static void tipc_topsrv_listener_data_ready(struct sock *sk)
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/* SPDX-License-Identifier: GPL-2.0 */
/*
* Simple pci display device.
*
# printing commands
cmd = @set -e; $(echo-cmd) $(cmd_$(1))
-# Add $(obj)/ for paths that are not absolute
-objectify = $(foreach o,$(1),$(if $(filter /%,$(o)),$(o),$(obj)/$(o)))
-
###
# if_changed - execute command if any prerequisite is newer than
# target, or command line has changed
multi-used-y := $(sort $(foreach m,$(obj-y), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y))), $(m))))
multi-used-m := $(sort $(foreach m,$(obj-m), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y)) $($(m:.o=-m))), $(m))))
multi-used := $(multi-used-y) $(multi-used-m)
-single-used-m := $(sort $(filter-out $(multi-used-m),$(obj-m)))
# $(subdir-obj-y) is the list of objects in $(obj-y) which uses dir/ to
# tell kbuild to descend
subdir-obj-y := $(addprefix $(obj)/,$(subdir-obj-y))
real-obj-y := $(addprefix $(obj)/,$(real-obj-y))
real-obj-m := $(addprefix $(obj)/,$(real-obj-m))
-single-used-m := $(addprefix $(obj)/,$(single-used-m))
multi-used-m := $(addprefix $(obj)/,$(multi-used-m))
subdir-ym := $(addprefix $(obj)/,$(subdir-ym))
_VALID_LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
# A kernel build generally has over 2000 entries in its compile_commands.json
-# database. If this code finds 500 or fewer, then warn the user that they might
+# database. If this code finds 300 or fewer, then warn the user that they might
# not have all the .cmd files, and they might need to compile the kernel.
-_LOW_COUNT_THRESHOLD = 500
+_LOW_COUNT_THRESHOLD = 300
def parse_arguments():
foreach my $prog (sort keys %missing) {
my $is_optional = $missing{$prog};
+ # At least on some LTS distros like CentOS 7, texlive doesn't
+ # provide all packages we need. When such distros are
+ # detected, we have to disable PDF output.
+ #
+ # So, we need to ignore the packages that distros would
+ # need for LaTeX to work
+ if ($is_optional == 2 && !$pdf) {
+ $optional--;
+ next;
+ }
+
if ($is_optional) {
print "Warning: better to also install \"$prog\".\n";
} else {
if ($pdf) {
check_missing_file("/usr/share/fonts/truetype/dejavu/DejaVuSans.ttf",
- "fonts-dejavu", 1);
+ "fonts-dejavu", 2);
+
+ check_missing_file("/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc",
+ "fonts-noto-cjk", 2);
}
- check_program("dvipng", 1) if ($pdf);
+ check_program("dvipng", 2) if ($pdf);
check_missing(\%map);
return if (!$need && !$optional);
my @fedora_tex_pkgs = (
"texlive-collection-fontsrecommended",
"texlive-collection-latex",
+ "texlive-xecjk",
"dejavu-sans-fonts",
"dejavu-serif-fonts",
"dejavu-sans-mono-fonts",
#
# Checks valid for RHEL/CentOS version 7.x.
#
- if (! $system_release =~ /Fedora/) {
+ my $old = 0;
+ my $rel;
+ $rel = $1 if ($system_release =~ /release\s+(\d+)/);
+
+ if (!($system_release =~ /Fedora/)) {
$map{"virtualenv"} = "python-virtualenv";
- }
- my $release;
+ if ($rel && $rel < 8) {
+ $old = 1;
+ $pdf = 0;
+
+ printf("Note: texlive packages on RHEL/CENTOS <= 7 are incomplete. Can't support PDF output\n");
+ printf("If you want to build PDF, please read:\n");
+ printf("\thttps://www.systutorials.com/241660/how-to-install-tex-live-on-centos-7-linux/\n");
+ }
+ } else {
+ if ($rel && $rel < 26) {
+ $old = 1;
+ }
+ }
+ if (!$rel) {
+ printf("Couldn't identify release number\n");
+ $old = 1;
+ $pdf = 0;
+ }
- $release = $1 if ($system_release =~ /Fedora\s+release\s+(\d+)/);
+ if ($pdf) {
+ check_missing_file("/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc",
+ "google-noto-sans-cjk-ttc-fonts", 2);
+ }
- check_rpm_missing(\@fedora26_opt_pkgs, 1) if ($pdf && $release >= 26);
- check_rpm_missing(\@fedora_tex_pkgs, 1) if ($pdf);
- check_missing_tex(1) if ($pdf);
+ check_rpm_missing(\@fedora26_opt_pkgs, 2) if ($pdf && !$old);
+ check_rpm_missing(\@fedora_tex_pkgs, 2) if ($pdf);
+ check_missing_tex(2) if ($pdf);
check_missing(\%map);
return if (!$need && !$optional);
- if ($release >= 18) {
+ if (!$old) {
# dnf, for Fedora 18+
printf("You should run:\n\n\tsudo dnf install -y $install\n");
} else {
"texlive-zapfding",
);
- check_rpm_missing(\@suse_tex_pkgs, 1) if ($pdf);
- check_missing_tex(1) if ($pdf);
+ $map{"latexmk"} = "texlive-latexmk-bin";
+
+ # FIXME: add support for installing CJK fonts
+ #
+ # I tried hard, but was unable to find a way to install
+ # "Noto Sans CJK SC" on openSUSE
+
+ check_rpm_missing(\@suse_tex_pkgs, 2) if ($pdf);
+ check_missing_tex(2) if ($pdf);
check_missing(\%map);
return if (!$need && !$optional);
"texlive-fontsextra",
);
- check_rpm_missing(\@tex_pkgs, 1) if ($pdf);
+ $map{"latexmk"} = "texlive-collection-basic";
+
+ if ($pdf) {
+ check_missing_file("/usr/share/fonts/google-noto-cjk/NotoSansCJK-Regular.ttc",
+ "google-noto-sans-cjk-ttc-fonts", 2);
+ }
+
+ check_rpm_missing(\@tex_pkgs, 2) if ($pdf);
check_missing(\%map);
return if (!$need && !$optional);
"texlive-latexextra",
"ttf-dejavu",
);
- check_pacman_missing(\@archlinux_tex_pkgs, 1) if ($pdf);
+ check_pacman_missing(\@archlinux_tex_pkgs, 2) if ($pdf);
+
+ if ($pdf) {
+ check_missing_file("/usr/share/fonts/noto-cjk/NotoSansCJK-Regular.ttc",
+ "noto-fonts-cjk", 2);
+ }
+
check_missing(\%map);
return if (!$need && !$optional);
);
check_missing_file("/usr/share/fonts/dejavu/DejaVuSans.ttf",
- "media-fonts/dejavu", 1) if ($pdf);
+ "media-fonts/dejavu", 2) if ($pdf);
+
+ if ($pdf) {
+ check_missing_file("/usr/share/fonts/noto-cjk/NotoSansCJKsc-Regular.otf",
+ "media-fonts/noto-cjk", 2);
+ }
check_missing(\%map);
return if (!$need && !$optional);
printf("You should run:\n\n");
- printf("\tsudo su -c 'echo \"media-gfx/imagemagick svg png\" > /etc/portage/package.use/imagemagick'\n");
- printf("\tsudo su -c 'echo \"media-gfx/graphviz cairo pdf\" > /etc/portage/package.use/graphviz'\n");
+
+ my $imagemagick = "media-gfx/imagemagick svg png";
+ my $cairo = "media-gfx/graphviz cairo pdf";
+ my $portage_imagemagick = "/etc/portage/package.use/imagemagick";
+ my $portage_cairo = "/etc/portage/package.use/graphviz";
+
+ if (qx(cat $portage_imagemagick) ne "$imagemagick\n") {
+ printf("\tsudo su -c 'echo \"$imagemagick\" > $portage_imagemagick'\n")
+ }
+ if (qx(cat $portage_cairo) ne "$cairo\n") {
+ printf("\tsudo su -c 'echo \"$cairo\" > $portage_cairo'\n");
+ }
+
printf("\tsudo emerge --ask $install\n");
}
my %map = (
"sphinx-build" => "sphinx"
);
- check_missing_tex(1) if ($pdf);
+ check_missing_tex(2) if ($pdf);
check_missing(\%map);
print "I don't know distro $system_release.\n";
print "So, I can't provide you a hint with the install procedure.\n";
check_program("make", 0);
check_program("gcc", 0);
check_python_module("sphinx_rtd_theme", 1) if (!$virtualenv);
- check_program("xelatex", 1) if ($pdf);
check_program("dot", 1);
check_program("convert", 1);
- check_program("rsvg-convert", 1) if ($pdf);
- check_program("latexmk", 1) if ($pdf);
+
+ # Extra PDF files - should use 2 for is_optional
+ check_program("xelatex", 2) if ($pdf);
+ check_program("rsvg-convert", 2) if ($pdf);
+ check_program("latexmk", 2) if ($pdf);
check_distros();
config GCC_PLUGIN_STRUCTLEAK_BYREF
bool "zero-init structs passed by reference (strong)"
depends on GCC_PLUGINS
+ depends on !(KASAN && KASAN_STACK=1)
select GCC_PLUGIN_STRUCTLEAK
help
Zero-initialize any structures on the stack that may
exposures, like CVE-2017-1000410:
https://git.kernel.org/linus/06e7e776ca4d3654
+ As a side-effect, this keeps a lot of variables on the
+ stack that can otherwise be optimized out, so combining
+ this with CONFIG_KASAN_STACK can lead to a stack overflow
+ and is disallowed.
+
config GCC_PLUGIN_STRUCTLEAK_BYREF_ALL
bool "zero-init anything passed by reference (very strong)"
depends on GCC_PLUGINS
+ depends on !(KASAN && KASAN_STACK=1)
select GCC_PLUGIN_STRUCTLEAK
help
Zero-initialize any stack variables that may be passed
++count;
}
+ /* bail out if we already reached max entries */
+ rc = -EOVERFLOW;
+ if (count >= SIDTAB_MAX)
+ goto out_unlock;
+
/* insert context into new entry */
rc = -ENOMEM;
dst = sidtab_do_lookup(s, count, 1);
vendor_id);
ret = device_add(&codec->dev);
- if (ret)
- goto err_free_codec;
+ if (ret) {
+ put_device(&codec->dev);
+ return ret;
+ }
return 0;
-err_free_codec:
- of_node_put(codec->dev.of_node);
- put_device(&codec->dev);
- kfree(codec);
- ac97_ctrl->codecs[idx] = NULL;
-
- return ret;
}
unsigned int snd_ac97_bus_scan_one(struct ac97_controller *adrv,
stream->metadata_set = false;
stream->next_track = false;
- if (stream->direction == SND_COMPRESS_PLAYBACK)
- stream->runtime->state = SNDRV_PCM_STATE_SETUP;
- else
- stream->runtime->state = SNDRV_PCM_STATE_PREPARED;
+ stream->runtime->state = SNDRV_PCM_STATE_SETUP;
} else {
return -EPERM;
}
{
int retval;
- if (stream->runtime->state != SNDRV_PCM_STATE_PREPARED)
+ switch (stream->runtime->state) {
+ case SNDRV_PCM_STATE_SETUP:
+ if (stream->direction != SND_COMPRESS_CAPTURE)
+ return -EPERM;
+ break;
+ case SNDRV_PCM_STATE_PREPARED:
+ break;
+ default:
return -EPERM;
+ }
+
retval = stream->ops->trigger(stream, SNDRV_PCM_TRIGGER_START);
if (!retval)
stream->runtime->state = SNDRV_PCM_STATE_RUNNING;
{
int retval;
- if (stream->runtime->state == SNDRV_PCM_STATE_PREPARED ||
- stream->runtime->state == SNDRV_PCM_STATE_SETUP)
+ switch (stream->runtime->state) {
+ case SNDRV_PCM_STATE_OPEN:
+ case SNDRV_PCM_STATE_SETUP:
+ case SNDRV_PCM_STATE_PREPARED:
return -EPERM;
+ default:
+ break;
+ }
+
retval = stream->ops->trigger(stream, SNDRV_PCM_TRIGGER_STOP);
if (!retval) {
snd_compr_drain_notify(stream);
{
int retval;
- if (stream->runtime->state == SNDRV_PCM_STATE_PREPARED ||
- stream->runtime->state == SNDRV_PCM_STATE_SETUP)
+ switch (stream->runtime->state) {
+ case SNDRV_PCM_STATE_OPEN:
+ case SNDRV_PCM_STATE_SETUP:
+ case SNDRV_PCM_STATE_PREPARED:
+ case SNDRV_PCM_STATE_PAUSED:
return -EPERM;
+ case SNDRV_PCM_STATE_XRUN:
+ return -EPIPE;
+ default:
+ break;
+ }
retval = stream->ops->trigger(stream, SND_COMPR_TRIGGER_DRAIN);
if (retval) {
if (stream->runtime->state != SNDRV_PCM_STATE_RUNNING)
return -EPERM;
+ /* next track doesn't have any meaning for capture streams */
+ if (stream->direction == SND_COMPRESS_CAPTURE)
+ return -EPERM;
+
/* you can signal next track if this is intended to be a gapless stream
* and current track metadata is set
*/
static int snd_compr_partial_drain(struct snd_compr_stream *stream)
{
int retval;
- if (stream->runtime->state == SNDRV_PCM_STATE_PREPARED ||
- stream->runtime->state == SNDRV_PCM_STATE_SETUP)
+
+ switch (stream->runtime->state) {
+ case SNDRV_PCM_STATE_OPEN:
+ case SNDRV_PCM_STATE_SETUP:
+ case SNDRV_PCM_STATE_PREPARED:
+ case SNDRV_PCM_STATE_PAUSED:
+ return -EPERM;
+ case SNDRV_PCM_STATE_XRUN:
+ return -EPIPE;
+ default:
+ break;
+ }
+
+ /* partial drain doesn't have any meaning for capture streams */
+ if (stream->direction == SND_COMPRESS_CAPTURE)
return -EPERM;
+
/* stream can be drained only when next track has been signalled */
if (stream->next_track == false)
return -EPERM;
spin_lock_init(&group->lock);
mutex_init(&group->mutex);
INIT_LIST_HEAD(&group->substreams);
- refcount_set(&group->refs, 0);
+ refcount_set(&group->refs, 1);
}
/* define group lock helpers */
if (!group)
return;
- do_free = refcount_dec_and_test(&group->refs) &&
- list_empty(&group->substreams);
+ do_free = refcount_dec_and_test(&group->refs);
snd_pcm_group_unlock(group, substream->pcm->nonatomic);
if (do_free)
kfree(group);
snd_pcm_group_lock_irq(target_group, nonatomic);
snd_pcm_stream_lock(substream1);
snd_pcm_group_assign(substream1, target_group);
+ refcount_inc(&target_group->refs);
snd_pcm_stream_unlock(substream1);
snd_pcm_group_unlock_irq(target_group, nonatomic);
_end:
snd_pcm_group_lock_irq(group, nonatomic);
relink_to_local(substream);
+ refcount_dec(&group->refs);
/* detach the last stream, too */
if (list_is_singular(&group->substreams)) {
relink_to_local(list_first_entry(&group->substreams,
struct snd_pcm_substream,
link_list));
- do_free = !refcount_read(&group->refs);
+ do_free = refcount_dec_and_test(&group->refs);
}
snd_pcm_group_unlock_irq(group, nonatomic);
static int hda_codec_force_resume(struct device *dev)
{
struct hda_codec *codec = dev_to_hda_codec(dev);
- bool forced_resume = !codec->relaxed_resume;
+ bool forced_resume = !codec->relaxed_resume && codec->jacktbl.used;
int ret;
/* The get/put pair below enforces the runtime resume even if the
#define AZX_DCAPS_INTEL_SKYLAKE \
(AZX_DCAPS_INTEL_PCH_BASE | AZX_DCAPS_PM_RUNTIME |\
+ AZX_DCAPS_SYNC_WRITE |\
AZX_DCAPS_SEPARATE_STREAM_TAG | AZX_DCAPS_I915_COMPONENT)
-#define AZX_DCAPS_INTEL_BROXTON \
- (AZX_DCAPS_INTEL_PCH_BASE | AZX_DCAPS_PM_RUNTIME |\
- AZX_DCAPS_SEPARATE_STREAM_TAG | AZX_DCAPS_I915_COMPONENT)
+#define AZX_DCAPS_INTEL_BROXTON AZX_DCAPS_INTEL_SKYLAKE
/* quirks for ATI SB / AMD Hudson */
#define AZX_DCAPS_PRESET_ATI_SB \
*/
static const struct hda_device_id snd_hda_id_conexant[] = {
+ HDA_CODEC_ENTRY(0x14f11f86, "CX8070", patch_conexant_auto),
HDA_CODEC_ENTRY(0x14f12008, "CX8200", patch_conexant_auto),
HDA_CODEC_ENTRY(0x14f15045, "CX20549 (Venice)", patch_conexant_auto),
HDA_CODEC_ENTRY(0x14f15047, "CX20551 (Waikiki)", patch_conexant_auto),
.name = "POD HD500",
.capabilities = LINE6_CAP_PCM
| LINE6_CAP_HWMON,
- .altsetting = 1,
+ .altsetting = 0,
.ep_ctrl_r = 0x81,
.ep_ctrl_w = 0x01,
.ep_audio_r = 0x86,
module_usb_driver(variax_driver);
-MODULE_DESCRIPTION("Vairax Workbench USB driver");
+MODULE_DESCRIPTION("Variax Workbench USB driver");
MODULE_LICENSE("GPL");
struct alternative *alt;
struct instruction *insn, *next_insn;
struct section *sec;
+ u8 visited;
int ret;
insn = first;
return 1;
}
+ visited = 1 << state.uaccess;
if (insn->visited) {
if (!insn->hint && !insn_state_match(insn, &state))
return 1;
- /* If we were here with AC=0, but now have AC=1, go again */
- if (insn->state.uaccess || !state.uaccess)
+ if (insn->visited & visited)
return 0;
}
} else
insn->state = state;
- insn->visited = true;
+ insn->visited |= visited;
if (!insn->ignore_alts) {
bool skip_orig = false;
unsigned int len;
enum insn_type type;
unsigned long immediate;
- bool alt_group, visited, dead_end, ignore, hint, save, restore, ignore_alts;
+ bool alt_group, dead_end, ignore, hint, save, restore, ignore_alts;
bool retpoline_safe;
+ u8 visited;
struct symbol *call_dest;
struct instruction *jump_dest;
struct instruction *first_jump_src;
With the metric option perf script can compute metrics for
sampling periods, similar to perf stat. This requires
- specifying a group with multiple metrics with the :S option
+ specifying a group with multiple events defining metrics with the :S option
for perf record. perf will sample on the first event, and
- compute metrics for all the events in the group. Please note
+ print computed metrics for all the events in the group. Please note
that the metric computed is averaged over the whole sampling
- period, not just for the sample point.
+ period (since the last sample), not just for the sample point.
For sample events it's possible to display misc field with -F +misc option,
following letters are displayed for each bit:
perf script --time 0%-10%,30%-40%
--max-blocks::
- Set the maximum number of program blocks to print with brstackasm for
+ Set the maximum number of program blocks to print with brstackinsn for
each sample.
--reltime::
ret = perf_add_probe_events(params.events, params.nevents);
if (ret < 0) {
+
+ /*
+ * When perf_add_probe_events() fails it calls
+ * cleanup_perf_probe_events(pevs, npevs), i.e.
+ * cleanup_perf_probe_events(params.events, params.nevents), which
+ * will call clear_perf_probe_event(), so set nevents to zero
+ * to avoid cleanup_params() to call clear_perf_probe_event() again
+ * on the same pevs.
+ */
+ params.nevents = 0;
pr_err_with_code(" Error: Failed to add events.", ret);
return ret;
}
printed += ip__fprintf_sym(ip, thread, x.cpumode, x.cpu, &lastsym, attr, fp);
if (ip == end) {
- printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, insn, fp,
+ printed += ip__fprintf_jump(ip, &br->entries[i], &x, buffer + off, len - off, ++insn, fp,
&total_cycles);
if (PRINT_FIELD(SRCCODE))
printed += print_srccode(thread, x.cpumode, ip);
* group leaders.
*/
read_counters(&(struct timespec) { .tv_nsec = t1-t0 });
- perf_evlist__close(evsel_list);
+
+ /*
+ * We need to keep evsel_list alive, because it's processed
+ * later the evsel_list will be closed after.
+ */
+ if (!STAT_RECORD)
+ perf_evlist__close(evsel_list);
return WEXITSTATUS(status);
}
perf_session__write_header(perf_stat.session, evsel_list, fd, true);
}
+ perf_evlist__close(evsel_list);
perf_session__delete(perf_stat.session);
}
xyarray__delete(evsel->sample_id);
evsel->sample_id = NULL;
zfree(&evsel->id);
+ evsel->ids = 0;
}
static void perf_evsel__free_config_terms(struct perf_evsel *evsel)
perf_evsel__close_fd(evsel);
perf_evsel__free_fd(evsel);
+ perf_evsel__free_id(evsel);
}
int perf_evsel__open_per_cpu(struct perf_evsel *evsel,
return 0;
ff.buf = (void *)fe->data;
- ff.size = event->header.size - sizeof(event->header);
+ ff.size = event->header.size - sizeof(*fe);
ff.ph = &session->header;
if (feat_ops[feat].process(&ff, NULL))
field = next;
}
}
+ pev->nargs = 0;
zfree(&pev->args);
}
void *src;
size_t decomp_size, src_size;
u64 decomp_last_rem = 0;
- size_t decomp_len = session->header.env.comp_mmap_len;
+ size_t mmap_len, decomp_len = session->header.env.comp_mmap_len;
struct decomp *decomp, *decomp_last = session->decomp_last;
- decomp = mmap(NULL, sizeof(struct decomp) + decomp_len, PROT_READ|PROT_WRITE,
+ if (decomp_last) {
+ decomp_last_rem = decomp_last->size - decomp_last->head;
+ decomp_len += decomp_last_rem;
+ }
+
+ mmap_len = sizeof(struct decomp) + decomp_len;
+ decomp = mmap(NULL, mmap_len, PROT_READ|PROT_WRITE,
MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
if (decomp == MAP_FAILED) {
pr_err("Couldn't allocate memory for decompression\n");
}
decomp->file_pos = file_offset;
+ decomp->mmap_len = mmap_len;
decomp->head = 0;
- if (decomp_last) {
- decomp_last_rem = decomp_last->size - decomp_last->head;
+ if (decomp_last_rem) {
memcpy(decomp->data, &(decomp_last->data[decomp_last->head]), decomp_last_rem);
decomp->size = decomp_last_rem;
}
decomp_size = zstd_decompress_stream(&(session->zstd_data), src, src_size,
&(decomp->data[decomp_last_rem]), decomp_len - decomp_last_rem);
if (!decomp_size) {
- munmap(decomp, sizeof(struct decomp) + decomp_len);
+ munmap(decomp, mmap_len);
pr_err("Couldn't decompress data\n");
return -1;
}
static void perf_session__release_decomp_events(struct perf_session *session)
{
struct decomp *next, *decomp;
- size_t decomp_len;
+ size_t mmap_len;
next = session->decomp;
- decomp_len = session->header.env.comp_mmap_len;
do {
decomp = next;
if (decomp == NULL)
break;
next = decomp->next;
- munmap(decomp, decomp_len + sizeof(struct decomp));
+ mmap_len = decomp->mmap_len;
+ munmap(decomp, mmap_len);
} while (1);
}
struct decomp {
struct decomp *next;
u64 file_pos;
+ size_t mmap_len;
u64 head;
size_t size;
char data[];
"stalled cycles per insn",
ratio);
} else if (have_frontend_stalled) {
- print_metric(config, ctxp, NULL, NULL,
+ out->new_line(config, ctxp);
+ print_metric(config, ctxp, NULL, "%7.2f ",
"stalled cycles per insn", 0);
}
} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) {
while (input.pos < input.size) {
ret = ZSTD_decompressStream(data->dstream, &output, &input);
if (ZSTD_isError(ret)) {
- pr_err("failed to decompress (B): %ld -> %ld : %s\n",
- src_size, output.size, ZSTD_getErrorName(ret));
+ pr_err("failed to decompress (B): %ld -> %ld, dst_size %ld : %s\n",
+ src_size, output.size, dst_size, ZSTD_getErrorName(ret));
break;
}
output.dst = dst + output.pos;
EXTRA_WARNINGS += -Wold-style-definition
EXTRA_WARNINGS += -Wpacked
EXTRA_WARNINGS += -Wredundant-decls
-EXTRA_WARNINGS += -Wshadow
EXTRA_WARNINGS += -Wstrict-prototypes
EXTRA_WARNINGS += -Wswitch-default
EXTRA_WARNINGS += -Wswitch-enum
# will do for now and keep the above -Wstrict-aliasing=3 in place
# in newer systems.
# Needed for the __raw_cmpxchg in tools/arch/x86/include/asm/cmpxchg.h
+#
+# See https://lkml.org/lkml/2006/11/28/253 and https://gcc.gnu.org/gcc-4.8/changes.html,
+# that takes into account Linus's comments (search for Wshadow) for the reasoning about
+# -Wshadow not being interesting before gcc 4.8.
+
ifneq ($(filter 3.%,$(MAKE_VERSION)),) # make-3
EXTRA_WARNINGS += -fno-strict-aliasing
+EXTRA_WARNINGS += -Wno-shadow
+else
+EXTRA_WARNINGS += -Wshadow
endif
ifneq ($(findstring $(MAKEFLAGS), w),w)
}
else {
- die "Unknow option $opt\n";
+ die "Unknown option $opt\n";
}
}
}
}
run_command "cp $good_start $good" or die "failed to copy to $good\n";
- run_command "cp $bad_start $bad" or die "faield to copy to $bad\n";
+ run_command "cp $bad_start $bad" or die "failed to copy to $bad\n";
} else {
if ( ! -f $good ) {
die "Can not find file $good\n";
header-test- += linux/bpf_perf_event.h
endif
-ifeq ($(SRCARCH),s390)
-header-test- += asm/zcrypt.h
-endif
-
ifeq ($(SRCARCH),sparc)
header-test- += asm/stat.h
header-test- += asm/uctx.h