# Please keep this list dictionary sorted.
#
Aaron Durbin <adurbin@google.com>
+Abel Vesa <abelvesa@kernel.org> <abel.vesa@nxp.com>
+Abel Vesa <abelvesa@kernel.org> <abelvesa@gmail.com>
Abhinav Kumar <quic_abhinavk@quicinc.com> <abhinavk@codeaurora.org>
Adam Oldham <oldhamca@gmail.com>
Adam Radford <aradford@gmail.com>
Atish Patra <atishp@atishpatra.org> <atish.patra@wdc.com>
Axel Dyks <xl@xlsigned.net>
Axel Lin <axel.lin@gmail.com>
+Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang@linaro.org>
+Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang@spreadtrum.com>
+Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang@unisoc.com>
+Baolin Wang <baolin.wang@linux.alibaba.com> <baolin.wang7@gmail.com>
Bart Van Assche <bvanassche@acm.org> <bart.vanassche@sandisk.com>
Bart Van Assche <bvanassche@acm.org> <bart.vanassche@wdc.com>
Ben Gardner <bgardner@wabtec.com>
Ben M Cahill <ben.m.cahill@intel.com>
+Ben Widawsky <bwidawsk@kernel.org> <ben@bwidawsk.net>
+Ben Widawsky <bwidawsk@kernel.org> <ben.widawsky@intel.com>
+Ben Widawsky <bwidawsk@kernel.org> <benjamin.widawsky@intel.com>
Björn Steinbrink <B.Steinbrink@gmx.de>
Björn Töpel <bjorn@kernel.org> <bjorn.topel@gmail.com>
Björn Töpel <bjorn@kernel.org> <bjorn.topel@intel.com>
Christian Brauner <brauner@kernel.org> <christian@brauner.io>
Christian Brauner <brauner@kernel.org> <christian.brauner@canonical.com>
Christian Brauner <brauner@kernel.org> <christian.brauner@ubuntu.com>
+Christian Marangi <ansuelsmth@gmail.com>
Christophe Ricard <christophe.ricard@gmail.com>
Christoph Hellwig <hch@lst.de>
Colin Ian King <colin.king@intel.com> <colin.king@canonical.com>
Frank Zago <fzago@systemfabricworks.com>
Gao Xiang <xiang@kernel.org> <gaoxiang25@huawei.com>
Gao Xiang <xiang@kernel.org> <hsiangkao@aol.com>
+Gao Xiang <xiang@kernel.org> <hsiangkao@linux.alibaba.com>
+Gao Xiang <xiang@kernel.org> <hsiangkao@redhat.com>
Gerald Schaefer <gerald.schaefer@linux.ibm.com> <geraldsc@de.ibm.com>
Gerald Schaefer <gerald.schaefer@linux.ibm.com> <gerald.schaefer@de.ibm.com>
Gerald Schaefer <gerald.schaefer@linux.ibm.com> <geraldsc@linux.vnet.ibm.com>
Jan Glauber <jan.glauber@gmail.com> <jang@linux.vnet.ibm.com>
Jan Glauber <jan.glauber@gmail.com> <jglauber@cavium.com>
Jarkko Sakkinen <jarkko@kernel.org> <jarkko.sakkinen@linux.intel.com>
+Jarkko Sakkinen <jarkko@kernel.org> <jarkko@profian.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgg@mellanox.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgg@nvidia.com>
Jason Gunthorpe <jgg@ziepe.ca> <jgunthorpe@obsidianresearch.com>
Sebastian Reichel <sre@kernel.org> <sebastian.reichel@collabora.co.uk>
Sebastian Reichel <sre@kernel.org> <sre@debian.org>
Sedat Dilek <sedat.dilek@gmail.com> <sedat.dilek@credativ.de>
+Seth Forshee <sforshee@kernel.org> <seth.forshee@canonical.com>
Shiraz Hashim <shiraz.linux.kernel@gmail.com> <shiraz.hashim@st.com>
Shuah Khan <shuah@kernel.org> <shuahkhan@gmail.com>
Shuah Khan <shuah@kernel.org> <shuah.khan@hp.com>
S: Fremont, California 94539
S: USA
+N: Tomas Cech
+E: sleep_walker@suse.com
+D: arm/palm treo support
+
N: Florent Chabaud
E: florent.chabaud@polytechnique.org
D: software suspend
described in ATA8 7.16 and 7.17. Only valid if
the device is not a PM.
- pio_mode: (RO) Transfer modes supported by the device when
- in PIO mode. Mostly used by PATA device.
+ pio_mode: (RO) PIO transfer mode used by the device.
+ Mostly used by PATA devices.
- xfer_mode: (RO) Current transfer mode
+ xfer_mode: (RO) Current transfer mode. Mostly used by
+ PATA devices.
- dma_mode: (RO) Transfer modes supported by the device when
- in DMA mode. Mostly used by PATA device.
+ dma_mode: (RO) DMA transfer mode used by the device.
+ Mostly used by PATA devices.
class: (RO) Device class. Can be "ata" for disk,
"atapi" for packet device, "pmp" for PM, or
-What: /sys/bus/iio/devices/iio:deviceX/conversion_mode
+What: /sys/bus/iio/devices/iio:deviceX/in_conversion_mode
KernelVersion: 4.2
Contact: linux-iio@vger.kernel.org
Description:
/sys/devices/system/cpu/vulnerabilities/srbds
/sys/devices/system/cpu/vulnerabilities/tsx_async_abort
/sys/devices/system/cpu/vulnerabilities/itlb_multihit
+ /sys/devices/system/cpu/vulnerabilities/mmio_stale_data
Date: January 2018
Contact: Linux kernel mailing list <linux-kernel@vger.kernel.org>
Description: Information about CPU vulnerabilities
DDR Backup Mode must be explicitly enabled by the user,
to invoke step 1.
- See also Documentation/devicetree/bindings/mfd/bd9571mwv.txt.
+ See also Documentation/devicetree/bindings/mfd/rohm,bd9571mwv.yaml.
Users: User space applications for embedded boards equipped with a
BD9571MWV PMIC.
special-register-buffer-data-sampling.rst
core-scheduling.rst
l1d_flush.rst
+ processor_mmio_stale_data.rst
--- /dev/null
+=========================================
+Processor MMIO Stale Data Vulnerabilities
+=========================================
+
+Processor MMIO Stale Data Vulnerabilities are a class of memory-mapped I/O
+(MMIO) vulnerabilities that can expose data. The sequences of operations for
+exposing data range from simple to very complex. Because most of the
+vulnerabilities require the attacker to have access to MMIO, many environments
+are not affected. System environments using virtualization where MMIO access is
+provided to untrusted guests may need mitigation. These vulnerabilities are
+not transient execution attacks. However, these vulnerabilities may propagate
+stale data into core fill buffers where the data can subsequently be inferred
+by an unmitigated transient execution attack. Mitigation for these
+vulnerabilities includes a combination of microcode update and software
+changes, depending on the platform and usage model. Some of these mitigations
+are similar to those used to mitigate Microarchitectural Data Sampling (MDS) or
+those used to mitigate Special Register Buffer Data Sampling (SRBDS).
+
+Data Propagators
+================
+Propagators are operations that result in stale data being copied or moved from
+one microarchitectural buffer or register to another. Processor MMIO Stale Data
+Vulnerabilities are operations that may result in stale data being directly
+read into an architectural, software-visible state or sampled from a buffer or
+register.
+
+Fill Buffer Stale Data Propagator (FBSDP)
+-----------------------------------------
+Stale data may propagate from fill buffers (FB) into the non-coherent portion
+of the uncore on some non-coherent writes. Fill buffer propagation by itself
+does not make stale data architecturally visible. Stale data must be propagated
+to a location where it is subject to reading or sampling.
+
+Sideband Stale Data Propagator (SSDP)
+-------------------------------------
+The sideband stale data propagator (SSDP) is limited to the client (including
+Intel Xeon server E3) uncore implementation. The sideband response buffer is
+shared by all client cores. For non-coherent reads that go to sideband
+destinations, the uncore logic returns 64 bytes of data to the core, including
+both requested data and unrequested stale data, from a transaction buffer and
+the sideband response buffer. As a result, stale data from the sideband
+response and transaction buffers may now reside in a core fill buffer.
+
+Primary Stale Data Propagator (PSDP)
+------------------------------------
+The primary stale data propagator (PSDP) is limited to the client (including
+Intel Xeon server E3) uncore implementation. Similar to the sideband response
+buffer, the primary response buffer is shared by all client cores. For some
+processors, MMIO primary reads will return 64 bytes of data to the core fill
+buffer including both requested data and unrequested stale data. This is
+similar to the sideband stale data propagator.
+
+Vulnerabilities
+===============
+Device Register Partial Write (DRPW) (CVE-2022-21166)
+-----------------------------------------------------
+Some endpoint MMIO registers incorrectly handle writes that are smaller than
+the register size. Instead of aborting the write or only copying the correct
+subset of bytes (for example, 2 bytes for a 2-byte write), more bytes than
+specified by the write transaction may be written to the register. On
+processors affected by FBSDP, this may expose stale data from the fill buffers
+of the core that created the write transaction.
+
+Shared Buffers Data Sampling (SBDS) (CVE-2022-21125)
+----------------------------------------------------
+After propagators may have moved data around the uncore and copied stale data
+into client core fill buffers, processors affected by MFBDS can leak data from
+the fill buffer. It is limited to the client (including Intel Xeon server E3)
+uncore implementation.
+
+Shared Buffers Data Read (SBDR) (CVE-2022-21123)
+------------------------------------------------
+It is similar to Shared Buffer Data Sampling (SBDS) except that the data is
+directly read into the architectural software-visible state. It is limited to
+the client (including Intel Xeon server E3) uncore implementation.
+
+Affected Processors
+===================
+Not all the CPUs are affected by all the variants. For instance, most
+processors for the server market (excluding Intel Xeon E3 processors) are
+impacted by only Device Register Partial Write (DRPW).
+
+Below is the list of affected Intel processors [#f1]_:
+
+ =================== ============ =========
+ Common name Family_Model Steppings
+ =================== ============ =========
+ HASWELL_X 06_3FH 2,4
+ SKYLAKE_L 06_4EH 3
+ BROADWELL_X 06_4FH All
+ SKYLAKE_X 06_55H 3,4,6,7,11
+ BROADWELL_D 06_56H 3,4,5
+ SKYLAKE 06_5EH 3
+ ICELAKE_X 06_6AH 4,5,6
+ ICELAKE_D 06_6CH 1
+ ICELAKE_L 06_7EH 5
+ ATOM_TREMONT_D 06_86H All
+ LAKEFIELD 06_8AH 1
+ KABYLAKE_L 06_8EH 9 to 12
+ ATOM_TREMONT 06_96H 1
+ ATOM_TREMONT_L 06_9CH 0
+ KABYLAKE 06_9EH 9 to 13
+ COMETLAKE 06_A5H 2,3,5
+ COMETLAKE_L 06_A6H 0,1
+ ROCKETLAKE 06_A7H 1
+ =================== ============ =========
+
+If a CPU is in the affected processor list, but not affected by a variant, it
+is indicated by new bits in MSR IA32_ARCH_CAPABILITIES. As described in a later
+section, mitigation largely remains the same for all the variants, i.e. to
+clear the CPU fill buffers via VERW instruction.
+
+New bits in MSRs
+================
+Newer processors and microcode update on existing affected processors added new
+bits to IA32_ARCH_CAPABILITIES MSR. These bits can be used to enumerate
+specific variants of Processor MMIO Stale Data vulnerabilities and mitigation
+capability.
+
+MSR IA32_ARCH_CAPABILITIES
+--------------------------
+Bit 13 - SBDR_SSDP_NO - When set, processor is not affected by either the
+ Shared Buffers Data Read (SBDR) vulnerability or the sideband stale
+ data propagator (SSDP).
+Bit 14 - FBSDP_NO - When set, processor is not affected by the Fill Buffer
+ Stale Data Propagator (FBSDP).
+Bit 15 - PSDP_NO - When set, processor is not affected by Primary Stale Data
+ Propagator (PSDP).
+Bit 17 - FB_CLEAR - When set, VERW instruction will overwrite CPU fill buffer
+ values as part of MD_CLEAR operations. Processors that do not
+ enumerate MDS_NO (meaning they are affected by MDS) but that do
+ enumerate support for both L1D_FLUSH and MD_CLEAR implicitly enumerate
+ FB_CLEAR as part of their MD_CLEAR support.
+Bit 18 - FB_CLEAR_CTRL - Processor supports read and write to MSR
+ IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]. On such processors, the FB_CLEAR_DIS
+ bit can be set to cause the VERW instruction to not perform the
+ FB_CLEAR action. Not all processors that support FB_CLEAR will support
+ FB_CLEAR_CTRL.
+
+MSR IA32_MCU_OPT_CTRL
+---------------------
+Bit 3 - FB_CLEAR_DIS - When set, VERW instruction does not perform the FB_CLEAR
+action. This may be useful to reduce the performance impact of FB_CLEAR in
+cases where system software deems it warranted (for example, when performance
+is more critical, or the untrusted software has no MMIO access). Note that
+FB_CLEAR_DIS has no impact on enumeration (for example, it does not change
+FB_CLEAR or MD_CLEAR enumeration) and it may not be supported on all processors
+that enumerate FB_CLEAR.
+
+Mitigation
+==========
+Like MDS, all variants of Processor MMIO Stale Data vulnerabilities have the
+same mitigation strategy to force the CPU to clear the affected buffers before
+an attacker can extract the secrets.
+
+This is achieved by using the otherwise unused and obsolete VERW instruction in
+combination with a microcode update. The microcode clears the affected CPU
+buffers when the VERW instruction is executed.
+
+Kernel reuses the MDS function to invoke the buffer clearing:
+
+ mds_clear_cpu_buffers()
+
+On MDS affected CPUs, the kernel already invokes CPU buffer clear on
+kernel/userspace, hypervisor/guest and C-state (idle) transitions. No
+additional mitigation is needed on such CPUs.
+
+For CPUs not affected by MDS or TAA, mitigation is needed only for the attacker
+with MMIO capability. Therefore, VERW is not required for kernel/userspace. For
+virtualization case, VERW is only needed at VMENTER for a guest with MMIO
+capability.
+
+Mitigation points
+-----------------
+Return to user space
+^^^^^^^^^^^^^^^^^^^^
+Same mitigation as MDS when affected by MDS/TAA, otherwise no mitigation
+needed.
+
+C-State transition
+^^^^^^^^^^^^^^^^^^
+Control register writes by CPU during C-state transition can propagate data
+from fill buffer to uncore buffers. Execute VERW before C-state transition to
+clear CPU fill buffers.
+
+Guest entry point
+^^^^^^^^^^^^^^^^^
+Same mitigation as MDS when processor is also affected by MDS/TAA, otherwise
+execute VERW at VMENTER only for MMIO capable guests. On CPUs not affected by
+MDS/TAA, guest without MMIO access cannot extract secrets using Processor MMIO
+Stale Data vulnerabilities, so there is no need to execute VERW for such guests.
+
+Mitigation control on the kernel command line
+---------------------------------------------
+The kernel command line allows to control the Processor MMIO Stale Data
+mitigations at boot time with the option "mmio_stale_data=". The valid
+arguments for this option are:
+
+ ========== =================================================================
+ full If the CPU is vulnerable, enable mitigation; CPU buffer clearing
+ on exit to userspace and when entering a VM. Idle transitions are
+ protected as well. It does not automatically disable SMT.
+ full,nosmt Same as full, with SMT disabled on vulnerable CPUs. This is the
+ complete mitigation.
+ off Disables mitigation completely.
+ ========== =================================================================
+
+If the CPU is affected and mmio_stale_data=off is not supplied on the kernel
+command line, then the kernel selects the appropriate mitigation.
+
+Mitigation status information
+-----------------------------
+The Linux kernel provides a sysfs interface to enumerate the current
+vulnerability status of the system: whether the system is vulnerable, and
+which mitigations are active. The relevant sysfs file is:
+
+ /sys/devices/system/cpu/vulnerabilities/mmio_stale_data
+
+The possible values in this file are:
+
+ .. list-table::
+
+ * - 'Not affected'
+ - The processor is not vulnerable
+ * - 'Vulnerable'
+ - The processor is vulnerable, but no mitigation enabled
+ * - 'Vulnerable: Clear CPU buffers attempted, no microcode'
+ - The processor is vulnerable, but microcode is not updated. The
+ mitigation is enabled on a best effort basis.
+ * - 'Mitigation: Clear CPU buffers'
+ - The processor is vulnerable and the CPU buffer clearing mitigation is
+ enabled.
+
+If the processor is vulnerable then the following information is appended to
+the above information:
+
+ ======================== ===========================================
+ 'SMT vulnerable' SMT is enabled
+ 'SMT disabled' SMT is disabled
+ 'SMT Host state unknown' Kernel runs in a VM, Host SMT state unknown
+ ======================== ===========================================
+
+References
+----------
+.. [#f1] Affected Processors
+ https://www.intel.com/content/www/us/en/developer/topic-technology/software-security-guidance/processors-affected-consolidated-product-cpu-model.html
protected: nVHE-based mode with support for guests whose
state is kept private from the host.
- Not valid if the kernel is running in EL2.
Defaults to VHE/nVHE based on hardware support. Setting
mode to "protected" will disable kexec and hibernation
srbds=off [X86,INTEL]
no_entry_flush [PPC]
no_uaccess_flush [PPC]
+ mmio_stale_data=off [X86]
+ retbleed=off [X86]
Exceptions:
This does not have any effect on
Equivalent to: l1tf=flush,nosmt [X86]
mds=full,nosmt [X86]
tsx_async_abort=full,nosmt [X86]
+ mmio_stale_data=full,nosmt [X86]
+ retbleed=auto,nosmt [X86]
mminit_loglevel=
[KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
log everything. Information is printed at KERN_DEBUG
so loglevel=8 may also need to be specified.
+ mmio_stale_data=
+ [X86,INTEL] Control mitigation for the Processor
+ MMIO Stale Data vulnerabilities.
+
+ Processor MMIO Stale Data is a class of
+ vulnerabilities that may expose data after an MMIO
+ operation. Exposed data could originate or end in
+ the same CPU buffers as affected by MDS and TAA.
+ Therefore, similar to MDS and TAA, the mitigation
+ is to clear the affected CPU buffers.
+
+ This parameter controls the mitigation. The
+ options are:
+
+ full - Enable mitigation on vulnerable CPUs
+
+ full,nosmt - Enable mitigation and disable SMT on
+ vulnerable CPUs.
+
+ off - Unconditionally disable mitigation
+
+ On MDS or TAA affected machines,
+ mmio_stale_data=off can be prevented by an active
+ MDS or TAA mitigation as these vulnerabilities are
+ mitigated with the same mechanism so in order to
+ disable this mitigation, you need to specify
+ mds=off and tsx_async_abort=off too.
+
+ Not specifying this option is equivalent to
+ mmio_stale_data=full.
+
+ For details see:
+ Documentation/admin-guide/hw-vuln/processor_mmio_stale_data.rst
+
module.sig_enforce
[KNL] When CONFIG_MODULE_SIG is set, this means that
modules without (valid) signatures will fail to load.
retain_initrd [RAM] Keep initrd memory after extraction
+ retbleed= [X86] Control mitigation of RETBleed (Arbitrary
+ Speculative Code Execution with Return Instructions)
+ vulnerability.
+
+ off - no mitigation
+ auto - automatically select a migitation
+ auto,nosmt - automatically select a mitigation,
+ disabling SMT if necessary for
+ the full mitigation (only on Zen1
+ and older without STIBP).
+ ibpb - mitigate short speculation windows on
+ basic block boundaries too. Safe, highest
+ perf impact.
+ unret - force enable untrained return thunks,
+ only effective on AMD f15h-f17h
+ based systems.
+ unret,nosmt - like unret, will disable SMT when STIBP
+ is not available.
+
+ Selecting 'auto' will choose a mitigation method at run
+ time according to the CPU.
+
+ Not specifying this option is equivalent to retbleed=auto.
+
rfkill.default_state=
0 "airplane mode". All wifi, bluetooth, wimax, gps, fm,
etc. communication is blocked by default.
eibrs - enhanced IBRS
eibrs,retpoline - enhanced IBRS + Retpolines
eibrs,lfence - enhanced IBRS + LFENCE
+ ibrs - use IBRS to protect kernel
Not specifying this option is equivalent to
spectre_v2=auto.
expediting. Set to zero to disable automatic
expediting.
+ srcutree.srcu_max_nodelay [KNL]
+ Specifies the number of no-delay instances
+ per jiffy for which the SRCU grace period
+ worker thread will be rescheduled with zero
+ delay. Beyond this limit, worker thread will
+ be rescheduled with a sleep delay of one jiffy.
+
+ srcutree.srcu_max_nodelay_phase [KNL]
+ Specifies the per-grace-period phase, number of
+ non-sleeping polls of readers. Beyond this limit,
+ grace period worker thread will be rescheduled
+ with a sleep delay of one jiffy, between each
+ rescan of the readers, for a grace period phase.
+
+ srcutree.srcu_retry_check_delay [KNL]
+ Specifies number of microseconds of non-sleeping
+ delay between each non-sleeping poll of readers.
+
srcutree.small_contention_lim [KNL]
Specifies the number of update-side contention
events per jiffy will be tolerated before
the TCM.
The TCM memory can then be remapped to another address again using
-the MMU, but notice that the TCM if often used in situations where
+the MMU, but notice that the TCM is often used in situations where
the MMU is turned off. To avoid confusion the current Linux
implementation will map the TCM 1 to 1 from physical to virtual
memory in the location specified by the kernel. Currently Linux
Appendix A. SME programmer's model (informative)
=================================================
-This section provides a minimal description of the additions made by SVE to the
+This section provides a minimal description of the additions made by SME to the
ARMv8-A programmer's model that are relevant to this document.
Note: This section is for information only and not intended to be complete or
Inter Module support
--------------------
-Refer to the file kernel/module.c for more information.
+Refer to the files in kernel/module/ for more information.
Hardware Interfaces
===================
The corresponding ksymtab entry struct ``kernel_symbol`` will have the member
``namespace`` set accordingly. A symbol that is exported without a namespace will
refer to ``NULL``. There is no default namespace if none is defined. ``modpost``
-and kernel/module.c make use the namespace at build time or module load time,
-respectively.
+and kernel/module/main.c make use the namespace at build time or module load
+time, respectively.
2.2 Using the DEFAULT_SYMBOL_NAMESPACE define
=============================================
'#clock-cells':
const: 1
+ '#reset-cells':
+ const: 1
+
required:
- compatible
- reg
- mediatek,mt8192-imp_iic_wrap_w
- mediatek,mt8192-imp_iic_wrap_n
- mediatek,mt8192-msdc_top
- - mediatek,mt8192-msdc
- mediatek,mt8192-mfgcfg
- mediatek,mt8192-imgsys
- mediatek,mt8192-imgsys2
#clock-cells = <1>;
};
- - |
- msdc: clock-controller@11f60000 {
- compatible = "mediatek,mt8192-msdc";
- reg = <0x11f60000 0x1000>;
- #clock-cells = <1>;
- };
-
- |
mfgcfg: clock-controller@13fbf000 {
compatible = "mediatek,mt8192-mfgcfg";
'#clock-cells':
const: 1
+ '#reset-cells':
+ const: 1
+
required:
- compatible
- reg
'#clock-cells':
const: 1
+ '#reset-cells':
+ const: 1
+
required:
- compatible
- reg
properties:
compatible:
enum:
- - allwinner,sun4i-a10-pll3-2x-clk
- fixed-factor-clock
"#clock-cells":
The case where SH and SP are both 1 is likely not very interesting.
maintainers:
- - Luca Ceresoli <luca@lucaceresoli.net>
+ - Luca Ceresoli <luca.ceresoli@bootlin.com>
properties:
compatible:
description: |
- For CPG core clocks, the two clock specifier cells must be "CPG_CORE"
and a core clock reference, as defined in
- <dt-bindings/clock/r9a0*-cpg.h>
+ <dt-bindings/clock/r9a0*-cpg.h>,
- For module clocks, the two clock specifier cells must be "CPG_MOD" and
- a module number, as defined in the <dt-bindings/clock/r9a07g0*-cpg.h> or
- <dt-bindings/clock/r9a09g011-cpg.h>.
+ a module number, as defined in <dt-bindings/clock/r9a0*-cpg.h>.
const: 2
'#power-domain-cells':
'#reset-cells':
description:
The single reset specifier cell must be the module number, as defined in
- the <dt-bindings/clock/r9a07g0*-cpg.h> or <dt-bindings/clock/r9a09g011-cpg.h>.
+ <dt-bindings/clock/r9a0*-cpg.h>.
const: 1
required:
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright 2022 Unisoc Inc.
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/clock/sprd,ums512-clk.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: UMS512 Soc clock controller
+
+maintainers:
+ - Orson Zhai <orsonzhai@gmail.com>
+ - Baolin Wang <baolin.wang7@gmail.com>
+ - Chunyan Zhang <zhang.lyra@gmail.com>
+
+properties:
+ compatible:
+ enum:
+ - sprd,ums512-apahb-gate
+ - sprd,ums512-ap-clk
+ - sprd,ums512-aonapb-clk
+ - sprd,ums512-pmu-gate
+ - sprd,ums512-g0-pll
+ - sprd,ums512-g2-pll
+ - sprd,ums512-g3-pll
+ - sprd,ums512-gc-pll
+ - sprd,ums512-aon-gate
+ - sprd,ums512-audcpapb-gate
+ - sprd,ums512-audcpahb-gate
+ - sprd,ums512-gpu-clk
+ - sprd,ums512-mm-clk
+ - sprd,ums512-mm-gate-clk
+ - sprd,ums512-apapb-gate
+
+ "#clock-cells":
+ const: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 4
+ description: |
+ The input parent clock(s) phandle for the clock, only list
+ fixed clocks which are declared in devicetree.
+
+ clock-names:
+ minItems: 1
+ items:
+ - const: ext-26m
+ - const: ext-32k
+ - const: ext-4m
+ - const: rco-100m
+
+ reg:
+ maxItems: 1
+
+required:
+ - compatible
+ - '#clock-cells'
+ - reg
+
+additionalProperties: false
+
+examples:
+ - |
+ ap_clk: clock-controller@20200000 {
+ compatible = "sprd,ums512-ap-clk";
+ reg = <0x20200000 0x1000>;
+ clocks = <&ext_26m>;
+ clock-names = "ext-26m";
+ #clock-cells = <1>;
+ };
+...
firmware. On some SoCs, this firmware supports DFS and DVFS in addition to
Adaptive Voltage Scaling.
-[2] Documentation/devicetree/bindings/interrupt-controller/brcm,l2-intc.txt
+[2] Documentation/devicetree/bindings/interrupt-controller/brcm,l2-intc.yaml
Node brcm,avs-cpu-data-mem
- allwinner,sun8i-a83t-display-engine
- allwinner,sun8i-r40-display-engine
- allwinner,sun9i-a80-display-engine
+ - allwinner,sun20i-d1-display-engine
- allwinner,sun50i-a64-display-engine
then:
- description: number of output lines for the green channel (G)
- description: number of output lines for the blue channel (B)
- arm,malidp-arqos-high-level:
- $ref: /schemas/types.yaml#/definitions/uint32
- description:
- integer describing the ARQoS levels of DP500's QoS signaling
-
arm,malidp-arqos-value:
$ref: /schemas/types.yaml#/definitions/uint32
description:
clocks = <&oscclk2>, <&fpgaosc0>, <&fpgaosc1>, <&fpgaosc1>;
clock-names = "pxlclk", "mclk", "aclk", "pclk";
arm,malidp-output-port-lines = /bits/ 8 <8 8 8>;
- arm,malidp-arqos-high-level = <0xd000d000>;
+ arm,malidp-arqos-value = <0xd000d000>;
port {
dp0_output: endpoint {
title: Qualcomm Display DPU dt properties for SC7180 target
maintainers:
- - Krishna Manikandan <mkrishn@codeaurora.org>
+ - Krishna Manikandan <quic_mkrishn@quicinc.com>
description: |
Device tree bindings for MSM Mobile Display Subsystem(MDSS) that encapsulates
title: Qualcomm Display DPU dt properties for SC7280
maintainers:
- - Krishna Manikandan <mkrishn@codeaurora.org>
+ - Krishna Manikandan <quic_mkrishn@quicinc.com>
description: |
Device tree bindings for MSM Mobile Display Subsystem (MDSS) that encapsulates
title: Qualcomm Display DPU dt properties for SDM845 target
maintainers:
- - Krishna Manikandan <mkrishn@codeaurora.org>
+ - Krishna Manikandan <quic_mkrishn@quicinc.com>
description: |
Device tree bindings for MSM Mobile Display Subsystem(MDSS) that encapsulates
title: Qualcomm Display DSI controller
maintainers:
- - Krishna Manikandan <mkrishn@codeaurora.org>
+ - Krishna Manikandan <quic_mkrishn@quicinc.com>
allOf:
- $ref: "../dsi-controller.yaml#"
title: Qualcomm Display DSI 10nm PHY
maintainers:
- - Krishna Manikandan <mkrishn@codeaurora.org>
+ - Krishna Manikandan <quic_mkrishn@quicinc.com>
allOf:
- $ref: dsi-phy-common.yaml#
title: Qualcomm Display DSI 14nm PHY
maintainers:
- - Krishna Manikandan <mkrishn@codeaurora.org>
+ - Krishna Manikandan <quic_mkrishn@quicinc.com>
allOf:
- $ref: dsi-phy-common.yaml#
title: Qualcomm Display DSI 20nm PHY
maintainers:
- - Krishna Manikandan <mkrishn@codeaurora.org>
+ - Krishna Manikandan <quic_mkrishn@quicinc.com>
allOf:
- $ref: dsi-phy-common.yaml#
title: Qualcomm Display DSI 28nm PHY
maintainers:
- - Krishna Manikandan <mkrishn@codeaurora.org>
+ - Krishna Manikandan <quic_mkrishn@quicinc.com>
allOf:
- $ref: dsi-phy-common.yaml#
title: Description of Qualcomm Display DSI PHY common dt properties
maintainers:
- - Krishna Manikandan <mkrishn@codeaurora.org>
+ - Krishna Manikandan <quic_mkrishn@quicinc.com>
description: |
This defines the DSI PHY dt properties which are common for all
then:
properties:
clocks:
- maxItems: 2
+ minItems: 2
required:
- clock-names
value to be used for converting remote channel measurements to
temperature.
$ref: /schemas/types.yaml#/definitions/int32
- items:
- minimum: -128
- maximum: 127
+ minimum: -128
+ maximum: 127
ti,beta-compensation:
description:
"arm,vexpress-power"
"arm,vexpress-energy"
- "arm,vexpress-sysreg,func" when controlled via vexpress-sysreg
- (see Documentation/devicetree/bindings/arm/vexpress-sysreg.txt
+ (see Documentation/devicetree/bindings/arm/vexpress-config.yaml
for more details)
Optional node properties:
- socionext,uniphier-ld11-aidet
- socionext,uniphier-ld20-aidet
- socionext,uniphier-pxs3-aidet
+ - socionext,uniphier-nx1-aidet
reg:
maxItems: 1
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0-only or BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/iommu/xen,grant-dma.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Xen specific IOMMU for virtualized devices (e.g. virtio)
+
+maintainers:
+ - Stefano Stabellini <sstabellini@kernel.org>
+
+description:
+ The Xen IOMMU represents the Xen grant table interface. Grant mappings
+ are to be used with devices connected to the Xen IOMMU using the "iommus"
+ property, which also specifies the ID of the backend domain.
+ The binding is required to restrict memory access using Xen grant mappings.
+
+properties:
+ compatible:
+ const: xen,grant-dma
+
+ '#iommu-cells':
+ const: 1
+ description:
+ The single cell is the domid (domain ID) of the domain where the backend
+ is running.
+
+required:
+ - compatible
+ - "#iommu-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ iommu {
+ compatible = "xen,grant-dma";
+ #iommu-cells = <1>;
+ };
description: 5 memory controller channels and 1 for stream-id registers
reg-names:
- maxItems: 6
items:
- const: sid
- const: broadcast
description: 17 memory controller channels and 1 for stream-id registers
reg-names:
- minItems: 18
items:
- const: sid
- const: broadcast
description: 17 memory controller channels and 1 for stream-id registers
reg-names:
- minItems: 18
items:
- const: sid
- const: broadcast
title: MAX77714 PMIC with GPIO, RTC and watchdog from Maxim Integrated.
maintainers:
- - Luca Ceresoli <luca@lucaceresoli.net>
+ - Luca Ceresoli <luca.ceresoli@bootlin.com>
description: |
MAX77714 is a Power Management IC with 4 buck regulators, 9
sd-uhs-sdr104;
sdhci,auto-cmd12;
interrupts = <0x0 0x26 0x4>;
- interrupt-names = "sdio0_0";
clocks = <&scmi_clk 245>;
clock-names = "sw_sdio";
};
non-removable;
bus-width = <0x8>;
interrupts = <0x0 0x27 0x4>;
- interrupt-names = "sdio1_0";
clocks = <&scmi_clk 245>;
clock-names = "sw_sdio";
};
- const: core
- const: axi
+ interrupts:
+ maxItems: 1
+
marvell,xenon-sdhc-id:
$ref: /schemas/types.yaml#/definitions/uint32
minimum: 0
items:
- description: Xenon IP registers
- description: Armada 3700 SoC PHY PAD Voltage Control register
- minItems: 2
marvell,pad-type:
$ref: /schemas/types.yaml#/definitions/string
- in-band-status
fixed-link:
- allOf:
- - if:
- type: array
- then:
- deprecated: true
- items:
- - minimum: 0
- maximum: 31
- description:
- Emulated PHY ID, choose any but unique to the all
- specified fixed-links
-
- - enum: [0, 1]
- description:
- Duplex configuration. 0 for half duplex or 1 for
- full duplex
-
- - enum: [10, 100, 1000, 2500, 10000]
- description:
- Link speed in Mbits/sec.
-
- - enum: [0, 1]
- description:
- Pause configuration. 0 for no pause, 1 for pause
-
- - enum: [0, 1]
- description:
- Asymmetric pause configuration. 0 for no asymmetric
- pause, 1 for asymmetric pause
-
-
- - if:
- type: object
- then:
- properties:
- speed:
- description:
- Link speed.
- $ref: /schemas/types.yaml#/definitions/uint32
- enum: [10, 100, 1000, 2500, 10000]
-
- full-duplex:
- $ref: /schemas/types.yaml#/definitions/flag
- description:
- Indicates that full-duplex is used. When absent, half
- duplex is assumed.
-
- pause:
- $ref: /schemas/types.yaml#definitions/flag
- description:
- Indicates that pause should be enabled.
-
- asym-pause:
- $ref: /schemas/types.yaml#/definitions/flag
- description:
- Indicates that asym_pause should be enabled.
-
- link-gpios:
- maxItems: 1
- description:
- GPIO to determine if the link is up
-
- required:
- - speed
+ oneOf:
+ - $ref: /schemas/types.yaml#/definitions/uint32-array
+ deprecated: true
+ items:
+ - minimum: 0
+ maximum: 31
+ description:
+ Emulated PHY ID, choose any but unique to the all
+ specified fixed-links
+
+ - enum: [0, 1]
+ description:
+ Duplex configuration. 0 for half duplex or 1 for
+ full duplex
+
+ - enum: [10, 100, 1000, 2500, 10000]
+ description:
+ Link speed in Mbits/sec.
+
+ - enum: [0, 1]
+ description:
+ Pause configuration. 0 for no pause, 1 for pause
+
+ - enum: [0, 1]
+ description:
+ Asymmetric pause configuration. 0 for no asymmetric
+ pause, 1 for asymmetric pause
+ - type: object
+ additionalProperties: false
+ properties:
+ speed:
+ description:
+ Link speed.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [10, 100, 1000, 2500, 10000]
+
+ full-duplex:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description:
+ Indicates that full-duplex is used. When absent, half
+ duplex is assumed.
+
+ pause:
+ $ref: /schemas/types.yaml#definitions/flag
+ description:
+ Indicates that pause should be enabled.
+
+ asym-pause:
+ $ref: /schemas/types.yaml#/definitions/flag
+ description:
+ Indicates that asym_pause should be enabled.
+
+ link-gpios:
+ maxItems: 1
+ description:
+ GPIO to determine if the link is up
+
+ required:
+ - speed
additionalProperties: true
Should specify the gpio for phy reset.
phy-reset-duration:
+ $ref: /schemas/types.yaml#/definitions/uint32
deprecated: true
description:
Reset duration in milliseconds. Should present only if property
and 1 millisecond will be used instead.
phy-reset-active-high:
+ type: boolean
deprecated: true
description:
If present then the reset sequence using the GPIO specified in the
"phy-reset-gpios" property is reversed (H=reset state, L=operation state).
phy-reset-post-delay:
+ $ref: /schemas/types.yaml#/definitions/uint32
deprecated: true
description:
Post reset delay in milliseconds. If present then a delay of phy-reset-post-delay
title: Qualcomm Atheros ath9k wireless devices Generic Binding
maintainers:
- - Kalle Valo <kvalo@codeaurora.org>
+ - Toke Høiland-Jørgensen <toke@toke.dk>
description: |
This node provides properties for configuring the ath9k wireless device.
title: Qualcomm Technologies ath11k wireless devices Generic Binding
maintainers:
- - Kalle Valo <kvalo@codeaurora.org>
+ - Kalle Valo <kvalo@kernel.org>
description: |
These are dt entries for Qualcomm Technologies, Inc. IEEE 802.11ax
maxItems: 1
apple,sart:
- maxItems: 1
$ref: /schemas/types.yaml#/definitions/phandle
description: |
Reference to the SART address filter.
- resets : list of phandle and reset specifier pairs. There should be two entries, one
for the whole phy and one for the port
- reset-names : list of reset signal names. Should be "global" and "port"
-See: Documentation/devicetree/bindings/reset/st,sti-powerdown.txt
+See: Documentation/devicetree/bindings/reset/st,stih407-powerdown.yaml
See: Documentation/devicetree/bindings/reset/reset.txt
Example:
title: Qualcomm QMP USB3 DP PHY controller
maintainers:
- - Manu Gautam <mgautam@codeaurora.org>
+ - Wesley Cheng <quic_wcheng@quicinc.com>
properties:
compatible:
title: Qualcomm QUSB2 phy controller
maintainers:
- - Manu Gautam <mgautam@codeaurora.org>
+ - Wesley Cheng <quic_wcheng@quicinc.com>
description:
QUSB2 controller supports LS/FS/HS usb connectivity on Qualcomm chipsets.
title: Qualcomm Synopsys Femto High-Speed USB PHY V2
maintainers:
- - Wesley Cheng <wcheng@codeaurora.org>
+ - Wesley Cheng <quic_wcheng@quicinc.com>
description: |
Qualcomm High-Speed USB PHY
- pins: List of pins. Valid values of pins properties are: gpio0, gpio1.
First 2 properties must be added in the RK805 PMIC node, documented in
-Documentation/devicetree/bindings/mfd/rk808.txt
+Documentation/devicetree/bindings/mfd/rockchip,rk808.yaml
Optional properties:
-------------------
groups:
description: The pin group to select.
enum: [
+ # common
+ i2c, spi, wdt,
+
# For MT7620 SoC
- ephy, i2c, mdio, nd_sd, pa, pcie, rgmii1, rgmii2, spi, spi refclk,
- uartf, uartlite, wdt, wled,
+ ephy, mdio, nd_sd, pa, pcie, rgmii1, rgmii2, spi refclk,
+ uartf, uartlite, wled,
# For MT7628 and MT7688 SoCs
- gpio, i2c, i2s, p0led_an, p0led_kn, p1led_an, p1led_kn, p2led_an,
+ gpio, i2s, p0led_an, p0led_kn, p1led_an, p1led_kn, p2led_an,
p2led_kn, p3led_an, p3led_kn, p4led_an, p4led_kn, perst, pwm0,
- pwm1, refclk, sdmode, spi, spi cs1, spis, uart0, uart1, uart2,
- wdt, wled_an, wled_kn,
+ pwm1, refclk, sdmode, spi cs1, spis, uart0, uart1, uart2,
+ wled_an, wled_kn,
]
function:
description: The mux function to select.
enum: [
+ # common
+ gpio, i2c, refclk, spi,
+
# For MT7620 SoC
- ephy, gpio, gpio i2s, gpio uartf, i2c, i2s uartf, mdio, nand, pa,
- pcie refclk, pcie rst, pcm gpio, pcm i2s, pcm uartf, refclk,
- rgmii1, rgmii2, sd, spi, spi refclk, uartf, uartlite, wdt refclk,
+ ephy, gpio i2s, gpio uartf, i2s uartf, mdio, nand, pa,
+ pcie refclk, pcie rst, pcm gpio, pcm i2s, pcm uartf,
+ rgmii1, rgmii2, sd, spi refclk, uartf, uartlite, wdt refclk,
wdt rst, wled,
# For MT7628 and MT7688 SoCs
- antenna, debug, gpio, i2c, i2s, jtag, p0led_an, p0led_kn,
+ antenna, debug, i2s, jtag, p0led_an, p0led_kn,
p1led_an, p1led_kn, p2led_an, p2led_kn, p3led_an, p3led_kn,
p4led_an, p4led_kn, pcie, pcm, perst, pwm, pwm0, pwm1, pwm_uart2,
- refclk, rsvd, sdxc, sdxc d5 d4, sdxc d6, sdxc d7, spi, spi cs1,
+ rsvd, sdxc, sdxc d5 d4, sdxc d6, sdxc d7, spi cs1,
spis, sw_r, uart0, uart1, uart2, utif, wdt, wled_an, wled_kn, -,
]
groups:
description: The pin group to select.
enum: [
+ # common
+ i2c, jtag, led, mdio, rgmii, spi, spi_cs1, uartf, uartlite,
+
# For RT3050, RT3052 and RT3350 SoCs
- i2c, jtag, mdio, rgmii, sdram, spi, uartf, uartlite,
+ sdram,
# For RT3352 SoC
- i2c, jtag, led, lna, mdio, pa, rgmii, spi, spi_cs1, uartf,
- uartlite,
-
- # For RT5350 SoC
- i2c, jtag, led, spi, spi_cs1, uartf, uartlite,
+ lna, pa
]
function:
description: The mux function to select.
enum: [
+ # common
+ gpio, gpio i2s, gpio uartf, i2c, i2s uartf, jtag, led, mdio,
+ pcm gpio, pcm i2s, pcm uartf, rgmii, spi, spi_cs1, uartf,
+ uartlite, wdg_cs1,
+
# For RT3050, RT3052 and RT3350 SoCs
- gpio, gpio i2s, gpio uartf, i2c, i2s uartf, jtag, mdio, pcm gpio,
- pcm i2s, pcm uartf, rgmii, sdram, spi, uartf, uartlite,
+ sdram,
# For RT3352 SoC
- gpio, gpio i2s, gpio uartf, i2c, i2s uartf, jtag, led, lna, mdio,
- pa, pcm gpio, pcm i2s, pcm uartf, rgmii, spi, spi_cs1, uartf,
- uartlite, wdg_cs1,
-
- # For RT5350 SoC
- gpio, gpio i2s, gpio uartf, i2c, i2s uartf, jtag, led, pcm gpio,
- pcm i2s, pcm uartf, spi, spi_cs1, uartf, uartlite, wdg_cs1,
+ lna, pa
]
required:
title: Maxim Integrated MAX77976 Battery charger
maintainers:
- - Luca Ceresoli <luca@lucaceresoli.net>
+ - Luca Ceresoli <luca.ceresoli@bootlin.com>
description: |
The Maxim MAX77976 is a 19Vin / 5.5A, 1-Cell Li+ battery charger
title: The Qualcomm PMIC VBUS output regulator driver
maintainers:
- - Wesley Cheng <wcheng@codeaurora.org>
+ - Wesley Cheng <quic_wcheng@quicinc.com>
description: |
This regulator driver controls the VBUS output by the Qualcomm PMIC. This
Requires node properties:
- "compatible" value: "arm,vexpress-volt"
- "arm,vexpress-sysreg,func" when controlled via vexpress-sysreg
- (see Documentation/devicetree/bindings/arm/vexpress-sysreg.txt
+ (see Documentation/devicetree/bindings/arm/vexpress-config.yaml
for more details)
Required regulator properties:
- qcom,sc7280-lpass-cpu
reg:
- minItems: 2
+ minItems: 1
maxItems: 6
description: LPAIF core registers
reg-names:
- minItems: 2
+ minItems: 1
maxItems: 6
clocks:
maxItems: 10
interrupts:
- minItems: 2
+ minItems: 1
maxItems: 4
description: LPAIF DMA buffer interrupt
interrupt-names:
- minItems: 2
+ minItems: 1
maxItems: 4
qcom,adsp:
clocks = <&clkcfg CLK_SPI0>;
interrupt-parent = <&plic>;
interrupts = <54>;
- spi-max-frequency = <25000000>;
};
...
pinctrl-names = "default";
pinctrl-0 = <&qup_spi1_default>;
interrupts = <GIC_SPI 602 IRQ_TYPE_LEVEL_HIGH>;
- spi-max-frequency = <50000000>;
#address-cells = <1>;
#size-cells = <0>;
};
- resets : list of phandle and reset specifier pairs. There should be two entries, one
for the powerdown and softreset lines of the usb3 IP
- reset-names : list of reset signal names. Names should be "powerdown" and "softreset"
-See: Documentation/devicetree/bindings/reset/st,sti-powerdown.txt
+See: Documentation/devicetree/bindings/reset/st,stih407-powerdown.yaml
See: Documentation/devicetree/bindings/reset/reset.txt
- #address-cells, #size-cells : should be '1' if the device has sub-nodes
- resets : phandle + reset specifier pairs to the powerdown and softreset lines
of the USB IP
- reset-names : should be "power" and "softreset"
-See: Documentation/devicetree/bindings/reset/st,sti-powerdown.txt
+See: Documentation/devicetree/bindings/reset/st,stih407-powerdown.yaml
See: Documentation/devicetree/bindings/reset/reset.txt
Example:
Phandle of a companion.
phys:
- maxItems: 1
+ minItems: 1
+ maxItems: 3
phy-names:
const: usb
Overrides the detected port count
phys:
- maxItems: 1
+ minItems: 1
+ maxItems: 3
phy-names:
const: usb
- resets : phandle to the powerdown and reset controller for the USB IP
- reset-names : should be "power" and "softreset".
-See: Documentation/devicetree/bindings/reset/st,sti-powerdown.txt
+See: Documentation/devicetree/bindings/reset/st,stih407-powerdown.yaml
See: Documentation/devicetree/bindings/reset/reset.txt
Example:
title: Qualcomm SuperSpeed DWC3 USB SoC controller
maintainers:
- - Manu Gautam <mgautam@codeaurora.org>
+ - Wesley Cheng <quic_wcheng@quicinc.com>
properties:
compatible:
description: ASPEED Technology Inc.
"^asus,.*":
description: AsusTek Computer Inc.
+ "^atheros,.*":
+ description: Qualcomm Atheros, Inc. (deprecated, use qca)
+ deprecated: true
"^atlas,.*":
description: Atlas Scientific LLC
"^atmel,.*":
then:
properties:
clocks:
- minItems: 2
items:
- description: High-frequency oscillator input, divided internally
- description: Low-frequency oscillator input
.. kernel-doc:: drivers/firmware/edd.c
:internal:
+Generic System Framebuffers Interface
+-------------------------------------
+
+.. kernel-doc:: drivers/firmware/sysfb.c
+ :export:
+
Intel Stratix10 SoC Service Layer
---------------------------------
Some features of the Intel Stratix10 SoC require a level of privilege
Note that it only applies to the new descriptor-based interface. For a
description of the deprecated integer-based GPIO interface please refer to
-gpio-legacy.txt (actually, there is no real mapping possible with the old
+legacy.rst (actually, there is no real mapping possible with the old
interface; you just fetch an integer from somewhere and request the
corresponding GPIO).
This document describes the consumer interface of the GPIO framework. Note that
it describes the new descriptor-based interface. For a description of the
-deprecated integer-based GPIO interface please refer to gpio-legacy.txt.
+deprecated integer-based GPIO interface please refer to legacy.rst.
Guidelines for GPIOs consumers
The two last flags are used for use cases where open drain is mandatory, such
as I2C: if the line is not already configured as open drain in the mappings
-(see board.txt), then open drain will be enforced anyway and a warning will be
+(see board.rst), then open drain will be enforced anyway and a warning will be
printed that the board configuration needs to be updated to match the use case.
Both functions return either a valid GPIO descriptor, or an error code checkable
The same is applicable for open drain or open source output lines: those do not
actively drive their output high (open drain) or low (open source), they just
switch their output to a high impedance value. The consumer should not need to
-care. (For details read about open drain in driver.txt.)
+care. (For details read about open drain in driver.rst.)
With this, all the gpiod_set_(array)_value_xxx() functions interpret the
parameter "value" as "asserted" ("1") or "de-asserted" ("0"). The physical line
ways to obtain and use GPIOs:
- The descriptor-based interface is the preferred way to manipulate GPIOs,
- and is described by all the files in this directory excepted gpio-legacy.txt.
+ and is described by all the files in this directory excepted legacy.rst.
- The legacy integer-based interface which is considered deprecated (but still
- usable for compatibility reasons) is documented in gpio-legacy.txt.
+ usable for compatibility reasons) is documented in legacy.rst.
The remainder of this document applies to the new descriptor-based interface.
-gpio-legacy.txt contains the same information applied to the legacy
+legacy.rst contains the same information applied to the legacy
integer-based interface.
--- /dev/null
+.. SPDX-License-Identifier: GPL-2.0+
+
+============================================
+The Linux Hardware Timestamping Engine (HTE)
+============================================
+
+:Author: Dipen Patel
+
+Introduction
+------------
+
+Certain devices have built in hardware timestamping engines which can
+monitor sets of system signals, lines, buses etc... in realtime for state
+change; upon detecting the change they can automatically store the timestamp at
+the moment of occurrence. Such functionality may help achieve better accuracy
+in obtaining timestamps than using software counterparts i.e. ktime and
+friends.
+
+This document describes the API that can be used by hardware timestamping
+engine provider and consumer drivers that want to use the hardware timestamping
+engine (HTE) framework. Both consumers and providers must include
+``#include <linux/hte.h>``.
+
+The HTE framework APIs for the providers
+----------------------------------------
+
+.. kernel-doc:: drivers/hte/hte.c
+ :functions: devm_hte_register_chip hte_push_ts_ns
+
+The HTE framework APIs for the consumers
+----------------------------------------
+
+.. kernel-doc:: drivers/hte/hte.c
+ :functions: hte_init_line_attr hte_ts_get hte_ts_put devm_hte_request_ts_ns hte_request_ts_ns hte_enable_ts hte_disable_ts of_hte_req_count hte_get_clk_src_info
+
+The HTE framework public structures
+-----------------------------------
+.. kernel-doc:: include/linux/hte.h
+
+More on the HTE timestamp data
+------------------------------
+The ``struct hte_ts_data`` is used to pass timestamp details between the
+consumers and the providers. It expresses timestamp data in nanoseconds in
+u64. An example of the typical timestamp data life cycle, for the GPIO line is
+as follows::
+
+ - Monitors GPIO line change.
+ - Detects the state change on GPIO line.
+ - Converts timestamps in nanoseconds.
+ - Stores GPIO raw level in raw_level variable if the provider has that
+ hardware capability.
+ - Pushes this hte_ts_data object to HTE subsystem.
+ - HTE subsystem increments seq counter and invokes consumer provided callback.
+ Based on callback return value, the HTE core invokes secondary callback in
+ the thread context.
+
+HTE subsystem debugfs attributes
+--------------------------------
+HTE subsystem creates debugfs attributes at ``/sys/kernel/debug/hte/``.
+It also creates line/signal-related debugfs attributes at
+``/sys/kernel/debug/hte/<provider>/<label or line id>/``. Note that these
+attributes are read-only.
+
+`ts_requested`
+ The total number of entities requested from the given provider,
+ where entity is specified by the provider and could represent
+ lines, GPIO, chip signals, buses etc...
+ The attribute will be available at
+ ``/sys/kernel/debug/hte/<provider>/``.
+
+`total_ts`
+ The total number of entities supported by the provider.
+ The attribute will be available at
+ ``/sys/kernel/debug/hte/<provider>/``.
+
+`dropped_timestamps`
+ The dropped timestamps for a given line.
+ The attribute will be available at
+ ``/sys/kernel/debug/hte/<provider>/<label or line id>/``.
--- /dev/null
+.. SPDX-License-Identifier: GPL-2.0
+
+============================================
+The Linux Hardware Timestamping Engine (HTE)
+============================================
+
+The HTE Subsystem
+=================
+
+.. toctree::
+ :maxdepth: 1
+
+ hte
+
+HTE Tegra Provider
+==================
+
+.. toctree::
+ :maxdepth: 1
+
+ tegra194-hte
+
--- /dev/null
+.. SPDX-License-Identifier: GPL-2.0+
+
+HTE Kernel provider driver
+==========================
+
+Description
+-----------
+The Nvidia tegra194 HTE provider driver implements two GTE
+(Generic Timestamping Engine) instances: 1) GPIO GTE and 2) LIC
+(Legacy Interrupt Controller) IRQ GTE. Both GTE instances get the
+timestamp from the system counter TSC which has 31.25MHz clock rate, and the
+driver converts clock tick rate to nanoseconds before storing it as timestamp
+value.
+
+GPIO GTE
+--------
+
+This GTE instance timestamps GPIO in real time. For that to happen GPIO
+needs to be configured as input. The always on (AON) GPIO controller instance
+supports timestamping GPIOs in real time and it has 39 GPIO lines. The GPIO GTE
+and AON GPIO controller are tightly coupled as it requires very specific bits
+to be set in GPIO config register before GPIO GTE can be used, for that GPIOLIB
+adds two optional APIs as below. The GPIO GTE code supports both kernel
+and userspace consumers. The kernel space consumers can directly talk to HTE
+subsystem while userspace consumers timestamp requests go through GPIOLIB CDEV
+framework to HTE subsystem.
+
+.. kernel-doc:: drivers/gpio/gpiolib.c
+ :functions: gpiod_enable_hw_timestamp_ns gpiod_disable_hw_timestamp_ns
+
+For userspace consumers, GPIO_V2_LINE_FLAG_EVENT_CLOCK_HTE flag must be
+specified during IOCTL calls. Refer to ``tools/gpio/gpio-event-mon.c``, which
+returns the timestamp in nanoseconds.
+
+LIC (Legacy Interrupt Controller) IRQ GTE
+-----------------------------------------
+
+This GTE instance timestamps LIC IRQ lines in real time. There are 352 IRQ
+lines which this instance can add timestamps to in real time. The hte
+devicetree binding described at ``Documentation/devicetree/bindings/hte/``
+provides an example of how a consumer can request an IRQ line. Since it is a
+one-to-one mapping with IRQ GTE provider, consumers can simply specify the IRQ
+number that they are interested in. There is no userspace consumer support for
+this GTE instance in the HTE framework.
+
+The provider source code of both IRQ and GPIO GTE instances is located at
+``drivers/hte/hte-tegra194.c``. The test driver
+``drivers/hte/hte-tegra194-test.c`` demonstrates HTE API usage for both IRQ
+and GPIO GTE.
xilinx/index
xillybus
zorro
+ hte/index
.. only:: subproject and html
| csky: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
| csky: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
| csky: | ok |
| hexagon: | ok |
| ia64: | ok |
+ | loong: | ok |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
| csky: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
| csky: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
| csky: | ok |
| hexagon: | ok |
| ia64: | ok |
+ | loong: | ok |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
| csky: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
| csky: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
| nios2: | TODO |
| openrisc: | TODO |
- | parisc: | TODO |
+ | parisc: | ok |
| powerpc: | ok |
| riscv: | ok |
| s390: | ok |
| csky: | ok |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | TODO |
| m68k: | TODO |
| microblaze: | ok |
| mips: | ok |
| s390: | ok |
| sh: | ok |
| sparc: | TODO |
- | um: | TODO |
+ | um: | ok |
| x86: | ok |
| xtensa: | TODO |
-----------------------
| csky: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
| csky: | TODO |
| hexagon: | ok |
| ia64: | TODO |
+ | loong: | TODO |
| m68k: | TODO |
| microblaze: | ok |
| mips: | ok |
| csky: | ok |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | TODO |
| m68k: | TODO |
| microblaze: | ok |
| mips: | ok |
| csky: | ok |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
| csky: | ok |
| hexagon: | TODO |
| ia64: | ok |
+ | loong: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
| csky: | ok |
| hexagon: | TODO |
| ia64: | ok |
+ | loong: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
| csky: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
| csky: | ok |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
| csky: | ok |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
| csky: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
| csky: | ok |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | ok |
| m68k: | TODO |
| microblaze: | ok |
| mips: | ok |
| csky: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
| csky: | ok |
| hexagon: | ok |
| ia64: | TODO |
+ | loong: | ok |
| m68k: | TODO |
| microblaze: | ok |
| mips: | ok |
| csky: | ok |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | ok |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
| openrisc: | ok |
| parisc: | TODO |
| powerpc: | ok |
- | riscv: | TODO |
+ | riscv: | ok |
| s390: | TODO |
| sh: | TODO |
| sparc: | ok |
| csky: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
| arch |status|
-----------------------
| alpha: | TODO |
- | arc: | TODO |
+ | arc: | ok |
| arm: | ok |
| arm64: | ok |
| csky: | ok |
| hexagon: | ok |
| ia64: | TODO |
+ | loong: | ok |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
| csky: | ok |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
| csky: | ok |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
| csky: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
| csky: | .. |
| hexagon: | .. |
| ia64: | TODO |
+ | loong: | ok |
| m68k: | .. |
| microblaze: | .. |
| mips: | TODO |
| csky: | ok |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | ok |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
| csky: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | ok |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
| csky: | ok |
| hexagon: | ok |
| ia64: | TODO |
+ | loong: | ok |
| m68k: | TODO |
| microblaze: | ok |
| mips: | ok |
| csky: | ok |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | ok |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
| csky: | TODO |
| hexagon: | TODO |
| ia64: | .. |
+ | loong: | ok |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
| csky: | ok |
| hexagon: | TODO |
| ia64: | ok |
+ | loong: | ok |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
| csky: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
| csky: | TODO |
| hexagon: | TODO |
| ia64: | ok |
+ | loong: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
| csky: | .. |
| hexagon: | .. |
| ia64: | TODO |
+ | loong: | ok |
| m68k: | .. |
| microblaze: | .. |
| mips: | ok |
| csky: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | TODO |
| m68k: | .. |
| microblaze: | .. |
| mips: | TODO |
| csky: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | TODO |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | TODO |
| csky: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | ok |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
| csky: | TODO |
| hexagon: | TODO |
| ia64: | TODO |
+ | loong: | ok |
| m68k: | TODO |
| microblaze: | TODO |
| mips: | ok |
| nios2: | TODO |
| openrisc: | TODO |
- | parisc: | TODO |
+ | parisc: | ok |
| powerpc: | ok |
| riscv: | ok |
| s390: | ok |
* Subvolumes (separate internal filesystem roots)
* Object level mirroring and striping
* Checksums on data and metadata (multiple algorithms available)
- * Compression
+ * Compression (multiple algorithms available)
+ * Reflink, deduplication
+ * Scrub (on-line checksum verification)
+ * Hierarchical quota groups (subvolume and snapshot support)
* Integrated multiple device support, with several raid algorithms
* Offline filesystem check
- * Efficient incremental backup and FS mirroring
+ * Efficient incremental backup and FS mirroring (send/receive)
+ * Trim/discard
* Online filesystem defragmentation
+ * Swapfile support
+ * Zoned mode
+ * Read/write metadata verification
+ * Online resize (shrink, grow)
-For more information please refer to the wiki
+For more information please refer to the documentation site or wiki
+
+ https://btrfs.readthedocs.io
https://btrfs.wiki.kernel.org
There are two places where extended attributes can be found. The first
place is between the end of each inode entry and the beginning of the
-next inode entry. For example, if inode.i\_extra\_isize = 28 and
-sb.inode\_size = 256, then there are 256 - (128 + 28) = 100 bytes
+next inode entry. For example, if inode.i_extra_isize = 28 and
+sb.inode_size = 256, then there are 256 - (128 + 28) = 100 bytes
available for in-inode extended attribute storage. The second place
where extended attributes can be found is in the block pointed to by
``inode.i_file_acl``. As of Linux 3.11, it is not possible for this
- Name
- Description
* - 0x0
- - \_\_le32
- - h\_magic
+ - __le32
+ - h_magic
- Magic number for identification, 0xEA020000. This value is set by the
Linux driver, though e2fsprogs doesn't seem to check it(?)
- Name
- Description
* - 0x0
- - \_\_le32
- - h\_magic
+ - __le32
+ - h_magic
- Magic number for identification, 0xEA020000.
* - 0x4
- - \_\_le32
- - h\_refcount
+ - __le32
+ - h_refcount
- Reference count.
* - 0x8
- - \_\_le32
- - h\_blocks
+ - __le32
+ - h_blocks
- Number of disk blocks used.
* - 0xC
- - \_\_le32
- - h\_hash
+ - __le32
+ - h_hash
- Hash value of all attributes.
* - 0x10
- - \_\_le32
- - h\_checksum
+ - __le32
+ - h_checksum
- Checksum of the extended attribute block.
* - 0x14
- - \_\_u32
- - h\_reserved[3]
+ - __u32
+ - h_reserved[3]
- Zero.
The checksum is calculated against the FS UUID, the 64-bit block number
- Name
- Description
* - 0x0
- - \_\_u8
- - e\_name\_len
+ - __u8
+ - e_name_len
- Length of name.
* - 0x1
- - \_\_u8
- - e\_name\_index
+ - __u8
+ - e_name_index
- Attribute name index. There is a discussion of this below.
* - 0x2
- - \_\_le16
- - e\_value\_offs
+ - __le16
+ - e_value_offs
- Location of this attribute's value on the disk block where it is stored.
Multiple attributes can share the same value. For an inode attribute
this value is relative to the start of the first entry; for a block this
value is relative to the start of the block (i.e. the header).
* - 0x4
- - \_\_le32
- - e\_value\_inum
+ - __le32
+ - e_value_inum
- The inode where the value is stored. Zero indicates the value is in the
same block as this entry. This field is only used if the
- INCOMPAT\_EA\_INODE feature is enabled.
+ INCOMPAT_EA_INODE feature is enabled.
* - 0x8
- - \_\_le32
- - e\_value\_size
+ - __le32
+ - e_value_size
- Length of attribute value.
* - 0xC
- - \_\_le32
- - e\_hash
+ - __le32
+ - e_hash
- Hash value of attribute name and attribute value. The kernel doesn't
update the hash for in-inode attributes, so for that case this value
must be zero, because e2fsck validates any non-zero hash regardless of
where the xattr lives.
* - 0x10
- char
- - e\_name[e\_name\_len]
+ - e_name[e_name_len]
- Attribute name. Does not include trailing NULL.
Attribute values can follow the end of the entry table. There appears to
be a requirement that they be aligned to 4-byte boundaries. The values
are stored starting at the end of the block and grow towards the
-xattr\_header/xattr\_entry table. When the two collide, the overflow is
+xattr_header/xattr_entry table. When the two collide, the overflow is
put into a separate disk block. If the disk block fills up, the
filesystem returns -ENOSPC.
* - 1
- “user.”
* - 2
- - “system.posix\_acl\_access”
+ - “system.posix_acl_access”
* - 3
- - “system.posix\_acl\_default”
+ - “system.posix_acl_default”
* - 4
- “trusted.”
* - 6
- “security.”
* - 7
- - “system.” (inline\_data only?)
+ - “system.” (inline_data only?)
* - 8
- “system.richacl” (SuSE kernels only?)
also shrinking the amount of file system overhead for metadata.
The administrator can set a block cluster size at mkfs time (which is
-stored in the s\_log\_cluster\_size field in the superblock); from then
+stored in the s_log_cluster_size field in the superblock); from then
on, the block bitmaps track clusters, not individual blocks. This means
that block groups can be several gigabytes in size (instead of just
128MiB); however, the minimum allocation unit becomes a cluster, not a
The inode bitmap records which entries in the inode table are in use.
As with most bitmaps, one bit represents the usage status of one data
-block or inode table entry. This implies a block group size of 8 \*
-number\_of\_bytes\_in\_a\_logical\_block.
+block or inode table entry. This implies a block group size of 8 *
+number_of_bytes_in_a_logical_block.
NOTE: If ``BLOCK_UNINIT`` is set for a given block group, various parts
of the kernel and e2fsprogs code pretends that the block bitmap contains
zeros (i.e. all blocks in the group are free). However, it is not
necessarily the case that no blocks are in use -- if ``meta_bg`` is set,
the bitmaps and group descriptor live inside the group. Unfortunately,
-ext2fs\_test\_block\_bitmap2() will return '0' for those locations,
+ext2fs_test_block_bitmap2() will return '0' for those locations,
which produces confusing debugfs output.
Inode Table
present, will be at the beginning of the block group. The bitmaps and
the inode table can be anywhere, and it is quite possible for the
bitmaps to come after the inode table, or for both to be in different
-groups (flex\_bg). Leftover space is used for file data blocks, indirect
+groups (flex_bg). Leftover space is used for file data blocks, indirect
block maps, extent tree blocks, and extended attributes.
Flexible Block Groups
---------------------
Starting in ext4, there is a new feature called flexible block groups
-(flex\_bg). In a flex\_bg, several block groups are tied together as one
+(flex_bg). In a flex_bg, several block groups are tied together as one
logical block group; the bitmap spaces and the inode table space in the
-first block group of the flex\_bg are expanded to include the bitmaps
-and inode tables of all other block groups in the flex\_bg. For example,
-if the flex\_bg size is 4, then group 0 will contain (in order) the
+first block group of the flex_bg are expanded to include the bitmaps
+and inode tables of all other block groups in the flex_bg. For example,
+if the flex_bg size is 4, then group 0 will contain (in order) the
superblock, group descriptors, data block bitmaps for groups 0-3, inode
bitmaps for groups 0-3, inode tables for groups 0-3, and the remaining
space in group 0 is for file data. The effect of this is to group the
block group metadata close together for faster loading, and to enable
large files to be continuous on disk. Backup copies of the superblock
and group descriptors are always at the beginning of block groups, even
-if flex\_bg is enabled. The number of block groups that make up a
-flex\_bg is given by 2 ^ ``sb.s_log_groups_per_flex``.
+if flex_bg is enabled. The number of block groups that make up a
+flex_bg is given by 2 ^ ``sb.s_log_groups_per_flex``.
Meta Block Groups
-----------------
-Without the option META\_BG, for safety concerns, all block group
+Without the option META_BG, for safety concerns, all block group
descriptors copies are kept in the first block group. Given the default
128MiB(2^27 bytes) block group size and 64-byte group descriptors, ext4
can have at most 2^27/64 = 2^21 block groups. This limits the entire
filesystem size to 2^21 * 2^27 = 2^48bytes or 256TiB.
The solution to this problem is to use the metablock group feature
-(META\_BG), which is already in ext3 for all 2.6 releases. With the
-META\_BG feature, ext4 filesystems are partitioned into many metablock
+(META_BG), which is already in ext3 for all 2.6 releases. With the
+META_BG feature, ext4 filesystems are partitioned into many metablock
groups. Each metablock group is a cluster of block groups whose group
descriptor structures can be stored in a single disk block. For ext4
filesystems with 4 KB block size, a single metablock group partition
a 1KB block size, and 128 block groups for filesystems with a 4KB
blocksize. Filesystems can either be created using this new block group
descriptor layout, or existing filesystems can be resized on-line, and
-the field s\_first\_meta\_bg in the superblock will indicate the first
+the field s_first_meta_bg in the superblock will indicate the first
block group using this new layout.
Please see an important note about ``BLOCK_UNINIT`` in the section about
A new feature for ext4 are three block group descriptor flags that
enable mkfs to skip initializing other parts of the block group
-metadata. Specifically, the INODE\_UNINIT and BLOCK\_UNINIT flags mean
+metadata. Specifically, the INODE_UNINIT and BLOCK_UNINIT flags mean
that the inode and block bitmaps for that group can be calculated and
therefore the on-disk bitmap blocks are not initialized. This is
generally the case for an empty block group or a block group containing
-only fixed-location block group metadata. The INODE\_ZEROED flag means
+only fixed-location block group metadata. The INODE_ZEROED flag means
that the inode table has been initialized; mkfs will unset this flag and
rely on the kernel to initialize the inode tables in the background.
By not writing zeroes to the bitmaps and inode table, mkfs time is
-reduced considerably. Note the feature flag is RO\_COMPAT\_GDT\_CSUM,
-but the dumpe2fs output prints this as “uninit\_bg”. They are the same
+reduced considerably. Note the feature flag is RO_COMPAT_GDT_CSUM,
+but the dumpe2fs output prints this as “uninit_bg”. They are the same
thing.
.. SPDX-License-Identifier: GPL-2.0
+---------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
-| i.i\_block Offset | Where It Points |
+| i.i_block Offset | Where It Points |
+=====================+==============================================================================================================================================================================================================================+
| 0 to 11 | Direct map to file blocks 0 to 11. |
+---------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
---------
Starting in early 2012, metadata checksums were added to all major ext4
-and jbd2 data structures. The associated feature flag is metadata\_csum.
+and jbd2 data structures. The associated feature flag is metadata_csum.
The desired checksum algorithm is indicated in the superblock, though as
of October 2012 the only supported algorithm is crc32c. Some data
structures did not have space to fit a full 32-bit checksum, so only the
checksum, it will request that you run ``e2fsck -D`` to have the
directories rebuilt with checksums. This has the added benefit of
removing slack space from the directory files and rebalancing the htree
-indexes. If you \_ignore\_ this step, your directories will not be
+indexes. If you _ignore_ this step, your directories will not be
protected by a checksum!
The following table describes the data elements that go into each type
- Length
- Ingredients
* - Superblock
- - \_\_le32
+ - __le32
- The entire superblock up to the checksum field. The UUID lives inside
the superblock.
* - MMP
- - \_\_le32
+ - __le32
- UUID + the entire MMP block up to the checksum field.
* - Extended Attributes
- - \_\_le32
+ - __le32
- UUID + the entire extended attribute block. The checksum field is set to
zero.
* - Directory Entries
- - \_\_le32
+ - __le32
- UUID + inode number + inode generation + the directory block up to the
fake entry enclosing the checksum field.
* - HTREE Nodes
- - \_\_le32
+ - __le32
- UUID + inode number + inode generation + all valid extents + HTREE tail.
The checksum field is set to zero.
* - Extents
- - \_\_le32
+ - __le32
- UUID + inode number + inode generation + the entire extent block up to
the checksum field.
* - Bitmaps
- - \_\_le32 or \_\_le16
+ - __le32 or __le16
- UUID + the entire bitmap. Checksums are stored in the group descriptor,
and truncated if the group descriptor size is 32 bytes (i.e. ^64bit)
* - Inodes
- - \_\_le32
+ - __le32
- UUID + inode number + inode generation + the entire inode. The checksum
field is set to zero. Each inode has its own checksum.
* - Group Descriptors
- - \_\_le16
- - If metadata\_csum, then UUID + group number + the entire descriptor;
- else if gdt\_csum, then crc16(UUID + group number + the entire
+ - __le16
+ - If metadata_csum, then UUID + group number + the entire descriptor;
+ else if gdt_csum, then crc16(UUID + group number + the entire
descriptor). In all cases, only the lower 16 bits are stored.
- Name
- Description
* - 0x0
- - \_\_le32
+ - __le32
- inode
- Number of the inode that this directory entry points to.
* - 0x4
- - \_\_le16
- - rec\_len
+ - __le16
+ - rec_len
- Length of this directory entry. Must be a multiple of 4.
* - 0x6
- - \_\_le16
- - name\_len
+ - __le16
+ - name_len
- Length of the file name.
* - 0x8
- char
- - name[EXT4\_NAME\_LEN]
+ - name[EXT4_NAME_LEN]
- File name.
Since file names cannot be longer than 255 bytes, the new directory
-entry format shortens the name\_len field and uses the space for a file
+entry format shortens the name_len field and uses the space for a file
type flag, probably to avoid having to load every inode during directory
tree traversal. This format is ``ext4_dir_entry_2``, which is at most
263 bytes long, though on disk you'll need to reference
- Name
- Description
* - 0x0
- - \_\_le32
+ - __le32
- inode
- Number of the inode that this directory entry points to.
* - 0x4
- - \_\_le16
- - rec\_len
+ - __le16
+ - rec_len
- Length of this directory entry.
* - 0x6
- - \_\_u8
- - name\_len
+ - __u8
+ - name_len
- Length of the file name.
* - 0x7
- - \_\_u8
- - file\_type
+ - __u8
+ - file_type
- File type code, see ftype_ table below.
* - 0x8
- char
- - name[EXT4\_NAME\_LEN]
+ - name[EXT4_NAME_LEN]
- File name.
.. _ftype:
- Name
- Description
* - 0x0
- - \_\_le32
+ - __le32
- hash
- The hash of the directory name
* - 0x4
- - \_\_le32
- - minor\_hash
+ - __le32
+ - minor_hash
- The minor hash of the directory name
In order to add checksums to these classic directory blocks, a phony
``struct ext4_dir_entry`` is placed at the end of each leaf block to
hold the checksum. The directory entry is 12 bytes long. The inode
-number and name\_len fields are set to zero to fool old software into
+number and name_len fields are set to zero to fool old software into
ignoring an apparently empty directory entry, and the checksum is stored
in the place where the name normally goes. The structure is
``struct ext4_dir_entry_tail``:
- Name
- Description
* - 0x0
- - \_\_le32
- - det\_reserved\_zero1
+ - __le32
+ - det_reserved_zero1
- Inode number, which must be zero.
* - 0x4
- - \_\_le16
- - det\_rec\_len
+ - __le16
+ - det_rec_len
- Length of this directory entry, which must be 12.
* - 0x6
- - \_\_u8
- - det\_reserved\_zero2
+ - __u8
+ - det_reserved_zero2
- Length of the file name, which must be zero.
* - 0x7
- - \_\_u8
- - det\_reserved\_ft
+ - __u8
+ - det_reserved_ft
- File type, which must be 0xDE.
* - 0x8
- - \_\_le32
- - det\_checksum
+ - __le32
+ - det_checksum
- Directory leaf block checksum.
The leaf directory block checksum is calculated against the FS UUID, the
A linear array of directory entries isn't great for performance, so a
new feature was added to ext3 to provide a faster (but peculiar)
balanced tree keyed off a hash of the directory entry name. If the
-EXT4\_INDEX\_FL (0x1000) flag is set in the inode, this directory uses a
+EXT4_INDEX_FL (0x1000) flag is set in the inode, this directory uses a
hashed btree (htree) to organize and find directory entries. For
backwards read-only compatibility with ext2, this tree is actually
hidden inside the directory file, masquerading as “empty” directory data
The root of the tree always lives in the first data block of the
directory. By ext2 custom, the '.' and '..' entries must appear at the
beginning of this first block, so they are put here as two
-``struct ext4_dir_entry_2``\ s and not stored in the tree. The rest of
+``struct ext4_dir_entry_2`` s and not stored in the tree. The rest of
the root node contains metadata about the tree and finally a hash->block
map to find nodes that are lower in the htree. If
``dx_root.info.indirect_levels`` is non-zero then the htree has two
levels; the data block pointed to by the root node's map is an interior
node, which is indexed by a minor hash. Interior nodes in this tree
contains a zeroed out ``struct ext4_dir_entry_2`` followed by a
-minor\_hash->block map to find leafe nodes. Leaf nodes contain a linear
+minor_hash->block map to find leafe nodes. Leaf nodes contain a linear
array of all ``struct ext4_dir_entry_2``; all of these entries
(presumably) hash to the same value. If there is an overflow, the
entries simply overflow into the next leaf node, and the
- Name
- Description
* - 0x0
- - \_\_le32
+ - __le32
- dot.inode
- inode number of this directory.
* - 0x4
- - \_\_le16
- - dot.rec\_len
+ - __le16
+ - dot.rec_len
- Length of this record, 12.
* - 0x6
- u8
- - dot.name\_len
+ - dot.name_len
- Length of the name, 1.
* - 0x7
- u8
- - dot.file\_type
+ - dot.file_type
- File type of this entry, 0x2 (directory) (if the feature flag is set).
* - 0x8
- char
- dot.name[4]
- - “.\\0\\0\\0”
+ - “.\0\0\0”
* - 0xC
- - \_\_le32
+ - __le32
- dotdot.inode
- inode number of parent directory.
* - 0x10
- - \_\_le16
- - dotdot.rec\_len
- - block\_size - 12. The record length is long enough to cover all htree
+ - __le16
+ - dotdot.rec_len
+ - block_size - 12. The record length is long enough to cover all htree
data.
* - 0x12
- u8
- - dotdot.name\_len
+ - dotdot.name_len
- Length of the name, 2.
* - 0x13
- u8
- - dotdot.file\_type
+ - dotdot.file_type
- File type of this entry, 0x2 (directory) (if the feature flag is set).
* - 0x14
- char
- - dotdot\_name[4]
- - “..\\0\\0”
+ - dotdot_name[4]
+ - “..\0\0”
* - 0x18
- - \_\_le32
- - struct dx\_root\_info.reserved\_zero
+ - __le32
+ - struct dx_root_info.reserved_zero
- Zero.
* - 0x1C
- u8
- - struct dx\_root\_info.hash\_version
+ - struct dx_root_info.hash_version
- Hash type, see dirhash_ table below.
* - 0x1D
- u8
- - struct dx\_root\_info.info\_length
+ - struct dx_root_info.info_length
- Length of the tree information, 0x8.
* - 0x1E
- u8
- - struct dx\_root\_info.indirect\_levels
- - Depth of the htree. Cannot be larger than 3 if the INCOMPAT\_LARGEDIR
+ - struct dx_root_info.indirect_levels
+ - Depth of the htree. Cannot be larger than 3 if the INCOMPAT_LARGEDIR
feature is set; cannot be larger than 2 otherwise.
* - 0x1F
- u8
- - struct dx\_root\_info.unused\_flags
+ - struct dx_root_info.unused_flags
-
* - 0x20
- - \_\_le16
+ - __le16
- limit
- - Maximum number of dx\_entries that can follow this header, plus 1 for
+ - Maximum number of dx_entries that can follow this header, plus 1 for
the header itself.
* - 0x22
- - \_\_le16
+ - __le16
- count
- - Actual number of dx\_entries that follow this header, plus 1 for the
+ - Actual number of dx_entries that follow this header, plus 1 for the
header itself.
* - 0x24
- - \_\_le32
+ - __le32
- block
- The block number (within the directory file) that goes with hash=0.
* - 0x28
- - struct dx\_entry
+ - struct dx_entry
- entries[0]
- As many 8-byte ``struct dx_entry`` as fits in the rest of the data block.
- Name
- Description
* - 0x0
- - \_\_le32
+ - __le32
- fake.inode
- Zero, to make it look like this entry is not in use.
* - 0x4
- - \_\_le16
- - fake.rec\_len
- - The size of the block, in order to hide all of the dx\_node data.
+ - __le16
+ - fake.rec_len
+ - The size of the block, in order to hide all of the dx_node data.
* - 0x6
- u8
- - name\_len
+ - name_len
- Zero. There is no name for this “unused” directory entry.
* - 0x7
- u8
- - file\_type
+ - file_type
- Zero. There is no file type for this “unused” directory entry.
* - 0x8
- - \_\_le16
+ - __le16
- limit
- - Maximum number of dx\_entries that can follow this header, plus 1 for
+ - Maximum number of dx_entries that can follow this header, plus 1 for
the header itself.
* - 0xA
- - \_\_le16
+ - __le16
- count
- - Actual number of dx\_entries that follow this header, plus 1 for the
+ - Actual number of dx_entries that follow this header, plus 1 for the
header itself.
* - 0xE
- - \_\_le32
+ - __le32
- block
- The block number (within the directory file) that goes with the lowest
hash value of this block. This value is stored in the parent block.
* - 0x12
- - struct dx\_entry
+ - struct dx_entry
- entries[0]
- As many 8-byte ``struct dx_entry`` as fits in the rest of the data block.
- Name
- Description
* - 0x0
- - \_\_le32
+ - __le32
- hash
- Hash code.
* - 0x4
- - \_\_le32
+ - __le32
- block
- Block number (within the directory file, not filesystem blocks) of the
next node in the htree.
author.)
If metadata checksums are enabled, the last 8 bytes of the directory
-block (precisely the length of one dx\_entry) are used to store a
+block (precisely the length of one dx_entry) are used to store a
``struct dx_tail``, which contains the checksum. The ``limit`` and
-``count`` entries in the dx\_root/dx\_node structures are adjusted as
-necessary to fit the dx\_tail into the block. If there is no space for
-the dx\_tail, the user is notified to run e2fsck -D to rebuild the
+``count`` entries in the dx_root/dx_node structures are adjusted as
+necessary to fit the dx_tail into the block. If there is no space for
+the dx_tail, the user is notified to run e2fsck -D to rebuild the
directory index (which will ensure that there's space for the checksum.
-The dx\_tail structure is 8 bytes long and looks like this:
+The dx_tail structure is 8 bytes long and looks like this:
.. list-table::
:widths: 8 8 24 40
- Description
* - 0x0
- u32
- - dt\_reserved
+ - dt_reserved
- Zero.
* - 0x4
- - \_\_le32
- - dt\_checksum
+ - __le32
+ - dt_checksum
- Checksum of the htree directory block.
The checksum is calculated against the FS UUID, the htree index header
-(dx\_root or dx\_node), all of the htree indices (dx\_entry) that are in
-use, and the tail block (dx\_tail).
+(dx_root or dx_node), all of the htree indices (dx_entry) that are in
+use, and the tail block (dx_tail).
To enable ext4 to store extended attribute values that do not fit in the
inode or in the single extended attribute block attached to an inode,
-the EA\_INODE feature allows us to store the value in the data blocks of
+the EA_INODE feature allows us to store the value in the data blocks of
a regular file inode. This “EA inode” is linked only from the extended
attribute name index and must not appear in a directory entry. The
-inode's i\_atime field is used to store a checksum of the xattr value;
-and i\_ctime/i\_version store a 64-bit reference count, which enables
+inode's i_atime field is used to store a checksum of the xattr value;
+and i_ctime/i_version store a 64-bit reference count, which enables
sharing of large xattr values between multiple owning inodes. For
backward compatibility with older versions of this feature, the
-i\_mtime/i\_generation *may* store a back-reference to the inode number
-and i\_generation of the **one** owning inode (in cases where the EA
+i_mtime/i_generation *may* store a back-reference to the inode number
+and i_generation of the **one** owning inode (in cases where the EA
inode is not referenced by multiple inodes) to verify that the EA inode
is the correct one being accessed.
associated with it. As noted in the Layout section above, the group
descriptors (if present) are the second item in the block group. The
standard configuration is for each block group to contain a full copy of
-the block group descriptor table unless the sparse\_super feature flag
+the block group descriptor table unless the sparse_super feature flag
is set.
Notice how the group descriptor records the location of both bitmaps and
the inode table (i.e. they can float). This means that within a block
group, the only data structures with fixed locations are the superblock
-and the group descriptor table. The flex\_bg mechanism uses this
+and the group descriptor table. The flex_bg mechanism uses this
property to group several block groups into a flex group and lay out all
of the groups' bitmaps and inode tables into one long run in the first
group of the flex group.
-If the meta\_bg feature flag is set, then several block groups are
-grouped together into a meta group. Note that in the meta\_bg case,
+If the meta_bg feature flag is set, then several block groups are
+grouped together into a meta group. Note that in the meta_bg case,
however, the first and last two block groups within the larger meta
group contain only group descriptors for the groups inside the meta
group.
-flex\_bg and meta\_bg do not appear to be mutually exclusive features.
+flex_bg and meta_bg do not appear to be mutually exclusive features.
In ext2, ext3, and ext4 (when the 64bit feature is not enabled), the
block group descriptor was only 32 bytes long and therefore ends at
-bg\_checksum. On an ext4 filesystem with the 64bit feature enabled, the
+bg_checksum. On an ext4 filesystem with the 64bit feature enabled, the
block group descriptor expands to at least the 64 bytes described below;
the size is stored in the superblock.
-If gdt\_csum is set and metadata\_csum is not set, the block group
+If gdt_csum is set and metadata_csum is not set, the block group
checksum is the crc16 of the FS UUID, the group number, and the group
-descriptor structure. If metadata\_csum is set, then the block group
+descriptor structure. If metadata_csum is set, then the block group
checksum is the lower 16 bits of the checksum of the FS UUID, the group
number, and the group descriptor structure. Both block and inode bitmap
checksums are calculated against the FS UUID, the group number, and the
- Name
- Description
* - 0x0
- - \_\_le32
- - bg\_block\_bitmap\_lo
+ - __le32
+ - bg_block_bitmap_lo
- Lower 32-bits of location of block bitmap.
* - 0x4
- - \_\_le32
- - bg\_inode\_bitmap\_lo
+ - __le32
+ - bg_inode_bitmap_lo
- Lower 32-bits of location of inode bitmap.
* - 0x8
- - \_\_le32
- - bg\_inode\_table\_lo
+ - __le32
+ - bg_inode_table_lo
- Lower 32-bits of location of inode table.
* - 0xC
- - \_\_le16
- - bg\_free\_blocks\_count\_lo
+ - __le16
+ - bg_free_blocks_count_lo
- Lower 16-bits of free block count.
* - 0xE
- - \_\_le16
- - bg\_free\_inodes\_count\_lo
+ - __le16
+ - bg_free_inodes_count_lo
- Lower 16-bits of free inode count.
* - 0x10
- - \_\_le16
- - bg\_used\_dirs\_count\_lo
+ - __le16
+ - bg_used_dirs_count_lo
- Lower 16-bits of directory count.
* - 0x12
- - \_\_le16
- - bg\_flags
+ - __le16
+ - bg_flags
- Block group flags. See the bgflags_ table below.
* - 0x14
- - \_\_le32
- - bg\_exclude\_bitmap\_lo
+ - __le32
+ - bg_exclude_bitmap_lo
- Lower 32-bits of location of snapshot exclusion bitmap.
* - 0x18
- - \_\_le16
- - bg\_block\_bitmap\_csum\_lo
+ - __le16
+ - bg_block_bitmap_csum_lo
- Lower 16-bits of the block bitmap checksum.
* - 0x1A
- - \_\_le16
- - bg\_inode\_bitmap\_csum\_lo
+ - __le16
+ - bg_inode_bitmap_csum_lo
- Lower 16-bits of the inode bitmap checksum.
* - 0x1C
- - \_\_le16
- - bg\_itable\_unused\_lo
+ - __le16
+ - bg_itable_unused_lo
- Lower 16-bits of unused inode count. If set, we needn't scan past the
- ``(sb.s_inodes_per_group - gdt.bg_itable_unused)``\ th entry in the
+ ``(sb.s_inodes_per_group - gdt.bg_itable_unused)`` th entry in the
inode table for this group.
* - 0x1E
- - \_\_le16
- - bg\_checksum
- - Group descriptor checksum; crc16(sb\_uuid+group\_num+bg\_desc) if the
- RO\_COMPAT\_GDT\_CSUM feature is set, or
- crc32c(sb\_uuid+group\_num+bg\_desc) & 0xFFFF if the
- RO\_COMPAT\_METADATA\_CSUM feature is set. The bg\_checksum
- field in bg\_desc is skipped when calculating crc16 checksum,
+ - __le16
+ - bg_checksum
+ - Group descriptor checksum; crc16(sb_uuid+group_num+bg_desc) if the
+ RO_COMPAT_GDT_CSUM feature is set, or
+ crc32c(sb_uuid+group_num+bg_desc) & 0xFFFF if the
+ RO_COMPAT_METADATA_CSUM feature is set. The bg_checksum
+ field in bg_desc is skipped when calculating crc16 checksum,
and set to zero if crc32c checksum is used.
* -
-
- These fields only exist if the 64bit feature is enabled and s_desc_size
> 32.
* - 0x20
- - \_\_le32
- - bg\_block\_bitmap\_hi
+ - __le32
+ - bg_block_bitmap_hi
- Upper 32-bits of location of block bitmap.
* - 0x24
- - \_\_le32
- - bg\_inode\_bitmap\_hi
+ - __le32
+ - bg_inode_bitmap_hi
- Upper 32-bits of location of inodes bitmap.
* - 0x28
- - \_\_le32
- - bg\_inode\_table\_hi
+ - __le32
+ - bg_inode_table_hi
- Upper 32-bits of location of inodes table.
* - 0x2C
- - \_\_le16
- - bg\_free\_blocks\_count\_hi
+ - __le16
+ - bg_free_blocks_count_hi
- Upper 16-bits of free block count.
* - 0x2E
- - \_\_le16
- - bg\_free\_inodes\_count\_hi
+ - __le16
+ - bg_free_inodes_count_hi
- Upper 16-bits of free inode count.
* - 0x30
- - \_\_le16
- - bg\_used\_dirs\_count\_hi
+ - __le16
+ - bg_used_dirs_count_hi
- Upper 16-bits of directory count.
* - 0x32
- - \_\_le16
- - bg\_itable\_unused\_hi
+ - __le16
+ - bg_itable_unused_hi
- Upper 16-bits of unused inode count.
* - 0x34
- - \_\_le32
- - bg\_exclude\_bitmap\_hi
+ - __le32
+ - bg_exclude_bitmap_hi
- Upper 32-bits of location of snapshot exclusion bitmap.
* - 0x38
- - \_\_le16
- - bg\_block\_bitmap\_csum\_hi
+ - __le16
+ - bg_block_bitmap_csum_hi
- Upper 16-bits of the block bitmap checksum.
* - 0x3A
- - \_\_le16
- - bg\_inode\_bitmap\_csum\_hi
+ - __le16
+ - bg_inode_bitmap_csum_hi
- Upper 16-bits of the inode bitmap checksum.
* - 0x3C
- - \_\_u32
- - bg\_reserved
+ - __u32
+ - bg_reserved
- Padding to 64 bytes.
.. _bgflags:
* - Value
- Description
* - 0x1
- - inode table and bitmap are not initialized (EXT4\_BG\_INODE\_UNINIT).
+ - inode table and bitmap are not initialized (EXT4_BG_INODE_UNINIT).
* - 0x2
- - block bitmap is not initialized (EXT4\_BG\_BLOCK\_UNINIT).
+ - block bitmap is not initialized (EXT4_BG_BLOCK_UNINIT).
* - 0x4
- - inode table is zeroed (EXT4\_BG\_INODE\_ZEROED).
+ - inode table is zeroed (EXT4_BG_INODE_ZEROED).
.. SPDX-License-Identifier: GPL-2.0
-The Contents of inode.i\_block
+The Contents of inode.i_block
------------------------------
Depending on the type of file an inode describes, the 60 bytes of
tree. Under the old scheme, allocating a contiguous run of 1,000 blocks
requires an indirect block to map all 1,000 entries; with extents, the
mapping is reduced to a single ``struct ext4_extent`` with
-``ee_len = 1000``. If flex\_bg is enabled, it is possible to allocate
+``ee_len = 1000``. If flex_bg is enabled, it is possible to allocate
very large files with a single extent, at a considerable reduction in
metadata block use, and some improvement in disk efficiency. The inode
must have the extents flag (0x80000) flag set for this feature to be in
- Name
- Description
* - 0x0
- - \_\_le16
- - eh\_magic
+ - __le16
+ - eh_magic
- Magic number, 0xF30A.
* - 0x2
- - \_\_le16
- - eh\_entries
+ - __le16
+ - eh_entries
- Number of valid entries following the header.
* - 0x4
- - \_\_le16
- - eh\_max
+ - __le16
+ - eh_max
- Maximum number of entries that could follow the header.
* - 0x6
- - \_\_le16
- - eh\_depth
+ - __le16
+ - eh_depth
- Depth of this extent node in the extent tree. 0 = this extent node
points to data blocks; otherwise, this extent node points to other
extent nodes. The extent tree can be at most 5 levels deep: a logical
block number can be at most ``2^32``, and the smallest ``n`` that
satisfies ``4*(((blocksize - 12)/12)^n) >= 2^32`` is 5.
* - 0x8
- - \_\_le32
- - eh\_generation
+ - __le32
+ - eh_generation
- Generation of the tree. (Used by Lustre, but not standard ext4).
Internal nodes of the extent tree, also known as index nodes, are
- Name
- Description
* - 0x0
- - \_\_le32
- - ei\_block
+ - __le32
+ - ei_block
- This index node covers file blocks from 'block' onward.
* - 0x4
- - \_\_le32
- - ei\_leaf\_lo
+ - __le32
+ - ei_leaf_lo
- Lower 32-bits of the block number of the extent node that is the next
level lower in the tree. The tree node pointed to can be either another
internal node or a leaf node, described below.
* - 0x8
- - \_\_le16
- - ei\_leaf\_hi
+ - __le16
+ - ei_leaf_hi
- Upper 16-bits of the previous field.
* - 0xA
- - \_\_u16
- - ei\_unused
+ - __u16
+ - ei_unused
-
Leaf nodes of the extent tree are recorded as ``struct ext4_extent``,
- Name
- Description
* - 0x0
- - \_\_le32
- - ee\_block
+ - __le32
+ - ee_block
- First file block number that this extent covers.
* - 0x4
- - \_\_le16
- - ee\_len
+ - __le16
+ - ee_len
- Number of blocks covered by extent. If the value of this field is <=
32768, the extent is initialized. If the value of the field is > 32768,
the extent is uninitialized and the actual extent length is ``ee_len`` -
32768. Therefore, the maximum length of a initialized extent is 32768
blocks, and the maximum length of an uninitialized extent is 32767.
* - 0x6
- - \_\_le16
- - ee\_start\_hi
+ - __le16
+ - ee_start_hi
- Upper 16-bits of the block number to which this extent points.
* - 0x8
- - \_\_le32
- - ee\_start\_lo
+ - __le32
+ - ee_start_lo
- Lower 32-bits of the block number to which this extent points.
Prior to the introduction of metadata checksums, the extent header +
- Name
- Description
* - 0x0
- - \_\_le32
- - eb\_checksum
+ - __le32
+ - eb_checksum
- Checksum of the extent block, crc32c(uuid+inum+igeneration+extentblock)
Inline Data
attribute space, then it might be found as an extended attribute
“system.data” within the inode body (“ibody EA”). This of course
constrains the amount of extended attributes one can attach to an inode.
-If the data size increases beyond i\_block + ibody EA, a regular block
+If the data size increases beyond i_block + ibody EA, a regular block
is allocated and the contents moved to that block.
Pending a change to compact the extended attribute key used to store
inline data, one ought to be able to store 160 bytes of data in a
-256-byte inode (as of June 2015, when i\_extra\_isize is 28). Prior to
+256-byte inode (as of June 2015, when i_extra_isize is 28). Prior to
that, the limit was 156 bytes due to inefficient use of inode space.
The inline data feature requires the presence of an extended attribute
Inline Directories
~~~~~~~~~~~~~~~~~~
-The first four bytes of i\_block are the inode number of the parent
+The first four bytes of i_block are the inode number of the parent
directory. Following that is a 56-byte space for an array of directory
entries; see ``struct ext4_dir_entry``. If there is a “system.data”
attribute in the inode body, the EA value is an array of
``struct ext4_dir_entry`` as well. Note that for inline directories, the
-i\_block and EA space are treated as separate dirent blocks; directory
+i_block and EA space are treated as separate dirent blocks; directory
entries cannot span the two.
Inline directory entries are not checksummed, as the inode checksum
- Name
- Description
* - 0x0
- - \_\_le16
- - i\_mode
+ - __le16
+ - i_mode
- File mode. See the table i_mode_ below.
* - 0x2
- - \_\_le16
- - i\_uid
+ - __le16
+ - i_uid
- Lower 16-bits of Owner UID.
* - 0x4
- - \_\_le32
- - i\_size\_lo
+ - __le32
+ - i_size_lo
- Lower 32-bits of size in bytes.
* - 0x8
- - \_\_le32
- - i\_atime
- - Last access time, in seconds since the epoch. However, if the EA\_INODE
+ - __le32
+ - i_atime
+ - Last access time, in seconds since the epoch. However, if the EA_INODE
inode flag is set, this inode stores an extended attribute value and
this field contains the checksum of the value.
* - 0xC
- - \_\_le32
- - i\_ctime
+ - __le32
+ - i_ctime
- Last inode change time, in seconds since the epoch. However, if the
- EA\_INODE inode flag is set, this inode stores an extended attribute
+ EA_INODE inode flag is set, this inode stores an extended attribute
value and this field contains the lower 32 bits of the attribute value's
reference count.
* - 0x10
- - \_\_le32
- - i\_mtime
+ - __le32
+ - i_mtime
- Last data modification time, in seconds since the epoch. However, if the
- EA\_INODE inode flag is set, this inode stores an extended attribute
+ EA_INODE inode flag is set, this inode stores an extended attribute
value and this field contains the number of the inode that owns the
extended attribute.
* - 0x14
- - \_\_le32
- - i\_dtime
+ - __le32
+ - i_dtime
- Deletion Time, in seconds since the epoch.
* - 0x18
- - \_\_le16
- - i\_gid
+ - __le16
+ - i_gid
- Lower 16-bits of GID.
* - 0x1A
- - \_\_le16
- - i\_links\_count
+ - __le16
+ - i_links_count
- Hard link count. Normally, ext4 does not permit an inode to have more
than 65,000 hard links. This applies to files as well as directories,
which means that there cannot be more than 64,998 subdirectories in a
directory (each subdirectory's '..' entry counts as a hard link, as does
- the '.' entry in the directory itself). With the DIR\_NLINK feature
+ the '.' entry in the directory itself). With the DIR_NLINK feature
enabled, ext4 supports more than 64,998 subdirectories by setting this
field to 1 to indicate that the number of hard links is not known.
* - 0x1C
- - \_\_le32
- - i\_blocks\_lo
- - Lower 32-bits of “block” count. If the huge\_file feature flag is not
+ - __le32
+ - i_blocks_lo
+ - Lower 32-bits of “block” count. If the huge_file feature flag is not
set on the filesystem, the file consumes ``i_blocks_lo`` 512-byte blocks
- on disk. If huge\_file is set and EXT4\_HUGE\_FILE\_FL is NOT set in
+ on disk. If huge_file is set and EXT4_HUGE_FILE_FL is NOT set in
``inode.i_flags``, then the file consumes ``i_blocks_lo + (i_blocks_hi
- << 32)`` 512-byte blocks on disk. If huge\_file is set and
- EXT4\_HUGE\_FILE\_FL IS set in ``inode.i_flags``, then this file
+ << 32)`` 512-byte blocks on disk. If huge_file is set and
+ EXT4_HUGE_FILE_FL IS set in ``inode.i_flags``, then this file
consumes (``i_blocks_lo + i_blocks_hi`` << 32) filesystem blocks on
disk.
* - 0x20
- - \_\_le32
- - i\_flags
+ - __le32
+ - i_flags
- Inode flags. See the table i_flags_ below.
* - 0x24
- 4 bytes
- - i\_osd1
+ - i_osd1
- See the table i_osd1_ for more details.
* - 0x28
- 60 bytes
- - i\_block[EXT4\_N\_BLOCKS=15]
- - Block map or extent tree. See the section “The Contents of inode.i\_block”.
+ - i_block[EXT4_N_BLOCKS=15]
+ - Block map or extent tree. See the section “The Contents of inode.i_block”.
* - 0x64
- - \_\_le32
- - i\_generation
+ - __le32
+ - i_generation
- File version (for NFS).
* - 0x68
- - \_\_le32
- - i\_file\_acl\_lo
+ - __le32
+ - i_file_acl_lo
- Lower 32-bits of extended attribute block. ACLs are of course one of
many possible extended attributes; I think the name of this field is a
result of the first use of extended attributes being for ACLs.
* - 0x6C
- - \_\_le32
- - i\_size\_high / i\_dir\_acl
+ - __le32
+ - i_size_high / i_dir_acl
- Upper 32-bits of file/directory size. In ext2/3 this field was named
- i\_dir\_acl, though it was usually set to zero and never used.
+ i_dir_acl, though it was usually set to zero and never used.
* - 0x70
- - \_\_le32
- - i\_obso\_faddr
+ - __le32
+ - i_obso_faddr
- (Obsolete) fragment address.
* - 0x74
- 12 bytes
- - i\_osd2
+ - i_osd2
- See the table i_osd2_ for more details.
* - 0x80
- - \_\_le16
- - i\_extra\_isize
+ - __le16
+ - i_extra_isize
- Size of this inode - 128. Alternately, the size of the extended inode
fields beyond the original ext2 inode, including this field.
* - 0x82
- - \_\_le16
- - i\_checksum\_hi
+ - __le16
+ - i_checksum_hi
- Upper 16-bits of the inode checksum.
* - 0x84
- - \_\_le32
- - i\_ctime\_extra
+ - __le32
+ - i_ctime_extra
- Extra change time bits. This provides sub-second precision. See Inode
Timestamps section.
* - 0x88
- - \_\_le32
- - i\_mtime\_extra
+ - __le32
+ - i_mtime_extra
- Extra modification time bits. This provides sub-second precision.
* - 0x8C
- - \_\_le32
- - i\_atime\_extra
+ - __le32
+ - i_atime_extra
- Extra access time bits. This provides sub-second precision.
* - 0x90
- - \_\_le32
- - i\_crtime
+ - __le32
+ - i_crtime
- File creation time, in seconds since the epoch.
* - 0x94
- - \_\_le32
- - i\_crtime\_extra
+ - __le32
+ - i_crtime_extra
- Extra file creation time bits. This provides sub-second precision.
* - 0x98
- - \_\_le32
- - i\_version\_hi
+ - __le32
+ - i_version_hi
- Upper 32-bits for version number.
* - 0x9C
- - \_\_le32
- - i\_projid
+ - __le32
+ - i_projid
- Project ID.
.. _i_mode:
* - Value
- Description
* - 0x1
- - S\_IXOTH (Others may execute)
+ - S_IXOTH (Others may execute)
* - 0x2
- - S\_IWOTH (Others may write)
+ - S_IWOTH (Others may write)
* - 0x4
- - S\_IROTH (Others may read)
+ - S_IROTH (Others may read)
* - 0x8
- - S\_IXGRP (Group members may execute)
+ - S_IXGRP (Group members may execute)
* - 0x10
- - S\_IWGRP (Group members may write)
+ - S_IWGRP (Group members may write)
* - 0x20
- - S\_IRGRP (Group members may read)
+ - S_IRGRP (Group members may read)
* - 0x40
- - S\_IXUSR (Owner may execute)
+ - S_IXUSR (Owner may execute)
* - 0x80
- - S\_IWUSR (Owner may write)
+ - S_IWUSR (Owner may write)
* - 0x100
- - S\_IRUSR (Owner may read)
+ - S_IRUSR (Owner may read)
* - 0x200
- - S\_ISVTX (Sticky bit)
+ - S_ISVTX (Sticky bit)
* - 0x400
- - S\_ISGID (Set GID)
+ - S_ISGID (Set GID)
* - 0x800
- - S\_ISUID (Set UID)
+ - S_ISUID (Set UID)
* -
- These are mutually-exclusive file types:
* - 0x1000
- - S\_IFIFO (FIFO)
+ - S_IFIFO (FIFO)
* - 0x2000
- - S\_IFCHR (Character device)
+ - S_IFCHR (Character device)
* - 0x4000
- - S\_IFDIR (Directory)
+ - S_IFDIR (Directory)
* - 0x6000
- - S\_IFBLK (Block device)
+ - S_IFBLK (Block device)
* - 0x8000
- - S\_IFREG (Regular file)
+ - S_IFREG (Regular file)
* - 0xA000
- - S\_IFLNK (Symbolic link)
+ - S_IFLNK (Symbolic link)
* - 0xC000
- - S\_IFSOCK (Socket)
+ - S_IFSOCK (Socket)
.. _i_flags:
* - Value
- Description
* - 0x1
- - This file requires secure deletion (EXT4\_SECRM\_FL). (not implemented)
+ - This file requires secure deletion (EXT4_SECRM_FL). (not implemented)
* - 0x2
- This file should be preserved, should undeletion be desired
- (EXT4\_UNRM\_FL). (not implemented)
+ (EXT4_UNRM_FL). (not implemented)
* - 0x4
- - File is compressed (EXT4\_COMPR\_FL). (not really implemented)
+ - File is compressed (EXT4_COMPR_FL). (not really implemented)
* - 0x8
- - All writes to the file must be synchronous (EXT4\_SYNC\_FL).
+ - All writes to the file must be synchronous (EXT4_SYNC_FL).
* - 0x10
- - File is immutable (EXT4\_IMMUTABLE\_FL).
+ - File is immutable (EXT4_IMMUTABLE_FL).
* - 0x20
- - File can only be appended (EXT4\_APPEND\_FL).
+ - File can only be appended (EXT4_APPEND_FL).
* - 0x40
- - The dump(1) utility should not dump this file (EXT4\_NODUMP\_FL).
+ - The dump(1) utility should not dump this file (EXT4_NODUMP_FL).
* - 0x80
- - Do not update access time (EXT4\_NOATIME\_FL).
+ - Do not update access time (EXT4_NOATIME_FL).
* - 0x100
- - Dirty compressed file (EXT4\_DIRTY\_FL). (not used)
+ - Dirty compressed file (EXT4_DIRTY_FL). (not used)
* - 0x200
- - File has one or more compressed clusters (EXT4\_COMPRBLK\_FL). (not used)
+ - File has one or more compressed clusters (EXT4_COMPRBLK_FL). (not used)
* - 0x400
- - Do not compress file (EXT4\_NOCOMPR\_FL). (not used)
+ - Do not compress file (EXT4_NOCOMPR_FL). (not used)
* - 0x800
- - Encrypted inode (EXT4\_ENCRYPT\_FL). This bit value previously was
- EXT4\_ECOMPR\_FL (compression error), which was never used.
+ - Encrypted inode (EXT4_ENCRYPT_FL). This bit value previously was
+ EXT4_ECOMPR_FL (compression error), which was never used.
* - 0x1000
- - Directory has hashed indexes (EXT4\_INDEX\_FL).
+ - Directory has hashed indexes (EXT4_INDEX_FL).
* - 0x2000
- - AFS magic directory (EXT4\_IMAGIC\_FL).
+ - AFS magic directory (EXT4_IMAGIC_FL).
* - 0x4000
- File data must always be written through the journal
- (EXT4\_JOURNAL\_DATA\_FL).
+ (EXT4_JOURNAL_DATA_FL).
* - 0x8000
- - File tail should not be merged (EXT4\_NOTAIL\_FL). (not used by ext4)
+ - File tail should not be merged (EXT4_NOTAIL_FL). (not used by ext4)
* - 0x10000
- All directory entry data should be written synchronously (see
- ``dirsync``) (EXT4\_DIRSYNC\_FL).
+ ``dirsync``) (EXT4_DIRSYNC_FL).
* - 0x20000
- - Top of directory hierarchy (EXT4\_TOPDIR\_FL).
+ - Top of directory hierarchy (EXT4_TOPDIR_FL).
* - 0x40000
- - This is a huge file (EXT4\_HUGE\_FILE\_FL).
+ - This is a huge file (EXT4_HUGE_FILE_FL).
* - 0x80000
- - Inode uses extents (EXT4\_EXTENTS\_FL).
+ - Inode uses extents (EXT4_EXTENTS_FL).
* - 0x100000
- - Verity protected file (EXT4\_VERITY\_FL).
+ - Verity protected file (EXT4_VERITY_FL).
* - 0x200000
- Inode stores a large extended attribute value in its data blocks
- (EXT4\_EA\_INODE\_FL).
+ (EXT4_EA_INODE_FL).
* - 0x400000
- - This file has blocks allocated past EOF (EXT4\_EOFBLOCKS\_FL).
+ - This file has blocks allocated past EOF (EXT4_EOFBLOCKS_FL).
(deprecated)
* - 0x01000000
- Inode is a snapshot (``EXT4_SNAPFILE_FL``). (not in mainline)
- Snapshot shrink has completed (``EXT4_SNAPFILE_SHRUNK_FL``). (not in
mainline)
* - 0x10000000
- - Inode has inline data (EXT4\_INLINE\_DATA\_FL).
+ - Inode has inline data (EXT4_INLINE_DATA_FL).
* - 0x20000000
- - Create children with the same project ID (EXT4\_PROJINHERIT\_FL).
+ - Create children with the same project ID (EXT4_PROJINHERIT_FL).
* - 0x80000000
- - Reserved for ext4 library (EXT4\_RESERVED\_FL).
+ - Reserved for ext4 library (EXT4_RESERVED_FL).
* -
- Aggregate flags:
* - 0x705BDFFF
- User-visible flags.
* - 0x604BC0FF
- - User-modifiable flags. Note that while EXT4\_JOURNAL\_DATA\_FL and
- EXT4\_EXTENTS\_FL can be set with setattr, they are not in the kernel's
- EXT4\_FL\_USER\_MODIFIABLE mask, since it needs to handle the setting of
+ - User-modifiable flags. Note that while EXT4_JOURNAL_DATA_FL and
+ EXT4_EXTENTS_FL can be set with setattr, they are not in the kernel's
+ EXT4_FL_USER_MODIFIABLE mask, since it needs to handle the setting of
these flags in a special manner and they are masked out of the set of
- flags that are saved directly to i\_flags.
+ flags that are saved directly to i_flags.
.. _i_osd1:
- Name
- Description
* - 0x0
- - \_\_le32
- - l\_i\_version
- - Inode version. However, if the EA\_INODE inode flag is set, this inode
+ - __le32
+ - l_i_version
+ - Inode version. However, if the EA_INODE inode flag is set, this inode
stores an extended attribute value and this field contains the upper 32
bits of the attribute value's reference count.
- Name
- Description
* - 0x0
- - \_\_le32
- - h\_i\_translator
+ - __le32
+ - h_i_translator
- ??
Masix:
- Name
- Description
* - 0x0
- - \_\_le32
- - m\_i\_reserved
+ - __le32
+ - m_i_reserved
- ??
.. _i_osd2:
- Name
- Description
* - 0x0
- - \_\_le16
- - l\_i\_blocks\_high
+ - __le16
+ - l_i_blocks_high
- Upper 16-bits of the block count. Please see the note attached to
- i\_blocks\_lo.
+ i_blocks_lo.
* - 0x2
- - \_\_le16
- - l\_i\_file\_acl\_high
+ - __le16
+ - l_i_file_acl_high
- Upper 16-bits of the extended attribute block (historically, the file
ACL location). See the Extended Attributes section below.
* - 0x4
- - \_\_le16
- - l\_i\_uid\_high
+ - __le16
+ - l_i_uid_high
- Upper 16-bits of the Owner UID.
* - 0x6
- - \_\_le16
- - l\_i\_gid\_high
+ - __le16
+ - l_i_gid_high
- Upper 16-bits of the GID.
* - 0x8
- - \_\_le16
- - l\_i\_checksum\_lo
+ - __le16
+ - l_i_checksum_lo
- Lower 16-bits of the inode checksum.
* - 0xA
- - \_\_le16
- - l\_i\_reserved
+ - __le16
+ - l_i_reserved
- Unused.
Hurd:
- Name
- Description
* - 0x0
- - \_\_le16
- - h\_i\_reserved1
+ - __le16
+ - h_i_reserved1
- ??
* - 0x2
- - \_\_u16
- - h\_i\_mode\_high
+ - __u16
+ - h_i_mode_high
- Upper 16-bits of the file mode.
* - 0x4
- - \_\_le16
- - h\_i\_uid\_high
+ - __le16
+ - h_i_uid_high
- Upper 16-bits of the Owner UID.
* - 0x6
- - \_\_le16
- - h\_i\_gid\_high
+ - __le16
+ - h_i_gid_high
- Upper 16-bits of the GID.
* - 0x8
- - \_\_u32
- - h\_i\_author
+ - __u32
+ - h_i_author
- Author code?
Masix:
- Name
- Description
* - 0x0
- - \_\_le16
- - h\_i\_reserved1
+ - __le16
+ - h_i_reserved1
- ??
* - 0x2
- - \_\_u16
- - m\_i\_file\_acl\_high
+ - __u16
+ - m_i_file_acl_high
- Upper 16-bits of the extended attribute block (historically, the file
ACL location).
* - 0x4
- - \_\_u32
- - m\_i\_reserved2[2]
+ - __u32
+ - m_i_reserved2[2]
- ??
Inode Size
on-disk inode at format time for all inodes in the filesystem to provide
space beyond the end of the original ext2 inode. The on-disk inode
record size is recorded in the superblock as ``s_inode_size``. The
-number of bytes actually used by struct ext4\_inode beyond the original
+number of bytes actually used by struct ext4_inode beyond the original
128-byte ext2 inode is recorded in the ``i_extra_isize`` field for each
-inode, which allows struct ext4\_inode to grow for a new kernel without
+inode, which allows struct ext4_inode to grow for a new kernel without
having to upgrade all of the on-disk inodes. Access to fields beyond
-EXT2\_GOOD\_OLD\_INODE\_SIZE should be verified to be within
+EXT2_GOOD_OLD_INODE_SIZE should be verified to be within
``i_extra_isize``. By default, ext4 inode records are 256 bytes, and (as
of August 2019) the inode structure is 160 bytes
(``i_extra_isize = 32``). The extra space between the end of the inode
same manner as 64-bit [cma]time. Neither crtime nor dtime are accessible
through the regular stat() interface, though debugfs will report them.
-We use the 32-bit signed time value plus (2^32 \* (extra epoch bits)).
+We use the 32-bit signed time value plus (2^32 * (extra epoch bits)).
In other words:
.. list-table::
* - Extra epoch bits
- MSB of 32-bit time
- - Adjustment for signed 32-bit to 64-bit tv\_sec
- - Decoded 64-bit tv\_sec
+ - Adjustment for signed 32-bit to 64-bit tv_sec
+ - Decoded 64-bit tv_sec
- valid time range
* - 0 0
- 1
:header-rows: 1
* - Superblock
- - descriptor\_block (data\_blocks or revocation\_block) [more data or
- revocations] commmit\_block
+ - descriptor_block (data_blocks or revocation_block) [more data or
+ revocations] commmit_block
- [more transactions...]
* -
- One transaction
* - 1024 bytes of padding
- ext4 Superblock
- Journal Superblock
- - descriptor\_block (data\_blocks or revocation\_block) [more data or
- revocations] commmit\_block
+ - descriptor_block (data_blocks or revocation_block) [more data or
+ revocations] commmit_block
- [more transactions...]
* -
-
- Name
- Description
* - 0x0
- - \_\_be32
- - h\_magic
+ - __be32
+ - h_magic
- jbd2 magic number, 0xC03B3998.
* - 0x4
- - \_\_be32
- - h\_blocktype
+ - __be32
+ - h_blocktype
- Description of what this block contains. See the jbd2_blocktype_ table
below.
* - 0x8
- - \_\_be32
- - h\_sequence
+ - __be32
+ - h_sequence
- The transaction ID that goes with this block.
.. _jbd2_blocktype:
-
- Static information describing the journal.
* - 0x0
- - journal\_header\_t (12 bytes)
- - s\_header
+ - journal_header_t (12 bytes)
+ - s_header
- Common header identifying this as a superblock.
* - 0xC
- - \_\_be32
- - s\_blocksize
+ - __be32
+ - s_blocksize
- Journal device block size.
* - 0x10
- - \_\_be32
- - s\_maxlen
+ - __be32
+ - s_maxlen
- Total number of blocks in this journal.
* - 0x14
- - \_\_be32
- - s\_first
+ - __be32
+ - s_first
- First block of log information.
* -
-
-
- Dynamic information describing the current state of the log.
* - 0x18
- - \_\_be32
- - s\_sequence
+ - __be32
+ - s_sequence
- First commit ID expected in log.
* - 0x1C
- - \_\_be32
- - s\_start
+ - __be32
+ - s_start
- Block number of the start of log. Contrary to the comments, this field
being zero does not imply that the journal is clean!
* - 0x20
- - \_\_be32
- - s\_errno
- - Error value, as set by jbd2\_journal\_abort().
+ - __be32
+ - s_errno
+ - Error value, as set by jbd2_journal_abort().
* -
-
-
- The remaining fields are only valid in a v2 superblock.
* - 0x24
- - \_\_be32
- - s\_feature\_compat;
+ - __be32
+ - s_feature_compat;
- Compatible feature set. See the table jbd2_compat_ below.
* - 0x28
- - \_\_be32
- - s\_feature\_incompat
+ - __be32
+ - s_feature_incompat
- Incompatible feature set. See the table jbd2_incompat_ below.
* - 0x2C
- - \_\_be32
- - s\_feature\_ro\_compat
+ - __be32
+ - s_feature_ro_compat
- Read-only compatible feature set. There aren't any of these currently.
* - 0x30
- - \_\_u8
- - s\_uuid[16]
+ - __u8
+ - s_uuid[16]
- 128-bit uuid for journal. This is compared against the copy in the ext4
super block at mount time.
* - 0x40
- - \_\_be32
- - s\_nr\_users
+ - __be32
+ - s_nr_users
- Number of file systems sharing this journal.
* - 0x44
- - \_\_be32
- - s\_dynsuper
+ - __be32
+ - s_dynsuper
- Location of dynamic super block copy. (Not used?)
* - 0x48
- - \_\_be32
- - s\_max\_transaction
+ - __be32
+ - s_max_transaction
- Limit of journal blocks per transaction. (Not used?)
* - 0x4C
- - \_\_be32
- - s\_max\_trans\_data
+ - __be32
+ - s_max_trans_data
- Limit of data blocks per transaction. (Not used?)
* - 0x50
- - \_\_u8
- - s\_checksum\_type
+ - __u8
+ - s_checksum_type
- Checksum algorithm used for the journal. See jbd2_checksum_type_ for
more info.
* - 0x51
- - \_\_u8[3]
- - s\_padding2
+ - __u8[3]
+ - s_padding2
-
* - 0x54
- - \_\_be32
- - s\_num\_fc\_blocks
+ - __be32
+ - s_num_fc_blocks
- Number of fast commit blocks in the journal.
* - 0x58
- - \_\_u32
- - s\_padding[42]
+ - __u32
+ - s_padding[42]
-
* - 0xFC
- - \_\_be32
- - s\_checksum
+ - __be32
+ - s_checksum
- Checksum of the entire superblock, with this field set to zero.
* - 0x100
- - \_\_u8
- - s\_users[16\*48]
+ - __u8
+ - s_users[16*48]
- ids of all file systems sharing the log. e2fsprogs/Linux don't allow
shared external journals, but I imagine Lustre (or ocfs2?), which use
the jbd2 code, might.
- Description
* - 0x1
- Journal maintains checksums on the data blocks.
- (JBD2\_FEATURE\_COMPAT\_CHECKSUM)
+ (JBD2_FEATURE_COMPAT_CHECKSUM)
.. _jbd2_incompat:
* - Value
- Description
* - 0x1
- - Journal has block revocation records. (JBD2\_FEATURE\_INCOMPAT\_REVOKE)
+ - Journal has block revocation records. (JBD2_FEATURE_INCOMPAT_REVOKE)
* - 0x2
- Journal can deal with 64-bit block numbers.
- (JBD2\_FEATURE\_INCOMPAT\_64BIT)
+ (JBD2_FEATURE_INCOMPAT_64BIT)
* - 0x4
- - Journal commits asynchronously. (JBD2\_FEATURE\_INCOMPAT\_ASYNC\_COMMIT)
+ - Journal commits asynchronously. (JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)
* - 0x8
- This journal uses v2 of the checksum on-disk format. Each journal
metadata block gets its own checksum, and the block tags in the
descriptor table contain checksums for each of the data blocks in the
- journal. (JBD2\_FEATURE\_INCOMPAT\_CSUM\_V2)
+ journal. (JBD2_FEATURE_INCOMPAT_CSUM_V2)
* - 0x10
- This journal uses v3 of the checksum on-disk format. This is the same as
v2, but the journal block tag size is fixed regardless of the size of
- block numbers. (JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3)
+ block numbers. (JBD2_FEATURE_INCOMPAT_CSUM_V3)
* - 0x20
- - Journal has fast commit blocks. (JBD2\_FEATURE\_INCOMPAT\_FAST\_COMMIT)
+ - Journal has fast commit blocks. (JBD2_FEATURE_INCOMPAT_FAST_COMMIT)
.. _jbd2_checksum_type:
- Name
- Descriptor
* - 0x0
- - journal\_header\_t
+ - journal_header_t
- (open coded)
- Common block header.
* - 0xC
- - struct journal\_block\_tag\_s
+ - struct journal_block_tag_s
- open coded array[]
- Enough tags either to fill up the block or to describe all the data
blocks that follow this descriptor block.
Journal block tags have any of the following formats, depending on which
journal feature and block tag flags are set.
-If JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 is set, the journal block tag is
+If JBD2_FEATURE_INCOMPAT_CSUM_V3 is set, the journal block tag is
defined as ``struct journal_block_tag3_s``, which looks like the
following. The size is 16 or 32 bytes.
- Name
- Descriptor
* - 0x0
- - \_\_be32
- - t\_blocknr
+ - __be32
+ - t_blocknr
- Lower 32-bits of the location of where the corresponding data block
should end up on disk.
* - 0x4
- - \_\_be32
- - t\_flags
+ - __be32
+ - t_flags
- Flags that go with the descriptor. See the table jbd2_tag_flags_ for
more info.
* - 0x8
- - \_\_be32
- - t\_blocknr\_high
+ - __be32
+ - t_blocknr_high
- Upper 32-bits of the location of where the corresponding data block
- should end up on disk. This is zero if JBD2\_FEATURE\_INCOMPAT\_64BIT is
+ should end up on disk. This is zero if JBD2_FEATURE_INCOMPAT_64BIT is
not enabled.
* - 0xC
- - \_\_be32
- - t\_checksum
+ - __be32
+ - t_checksum
- Checksum of the journal UUID, the sequence number, and the data block.
* -
-
* - 0x8
- This is the last tag in this descriptor block.
-If JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 is NOT set, the journal block tag
+If JBD2_FEATURE_INCOMPAT_CSUM_V3 is NOT set, the journal block tag
is defined as ``struct journal_block_tag_s``, which looks like the
following. The size is 8, 12, 24, or 28 bytes:
- Name
- Descriptor
* - 0x0
- - \_\_be32
- - t\_blocknr
+ - __be32
+ - t_blocknr
- Lower 32-bits of the location of where the corresponding data block
should end up on disk.
* - 0x4
- - \_\_be16
- - t\_checksum
+ - __be16
+ - t_checksum
- Checksum of the journal UUID, the sequence number, and the data block.
Note that only the lower 16 bits are stored.
* - 0x6
- - \_\_be16
- - t\_flags
+ - __be16
+ - t_flags
- Flags that go with the descriptor. See the table jbd2_tag_flags_ for
more info.
* -
- This next field is only present if the super block indicates support for
64-bit block numbers.
* - 0x8
- - \_\_be32
- - t\_blocknr\_high
+ - __be32
+ - t_blocknr_high
- Upper 32-bits of the location of where the corresponding data block
should end up on disk.
* -
``j_uuid`` field in ``struct journal_s``, but only tune2fs touches that
field.
-If JBD2\_FEATURE\_INCOMPAT\_CSUM\_V2 or
-JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 are set, the end of the block is a
+If JBD2_FEATURE_INCOMPAT_CSUM_V2 or
+JBD2_FEATURE_INCOMPAT_CSUM_V3 are set, the end of the block is a
``struct jbd2_journal_block_tail``, which looks like this:
.. list-table::
- Name
- Descriptor
* - 0x0
- - \_\_be32
- - t\_checksum
+ - __be32
+ - t_checksum
- Checksum of the journal UUID + the descriptor block, with this field set
to zero.
- Name
- Description
* - 0x0
- - journal\_header\_t
- - r\_header
+ - journal_header_t
+ - r_header
- Common block header.
* - 0xC
- - \_\_be32
- - r\_count
+ - __be32
+ - r_count
- Number of bytes used in this block.
* - 0x10
- - \_\_be32 or \_\_be64
+ - __be32 or __be64
- blocks[0]
- Blocks to revoke.
-After r\_count is a linear array of block numbers that are effectively
+After r_count is a linear array of block numbers that are effectively
revoked by this transaction. The size of each block number is 8 bytes if
the superblock advertises 64-bit block number support, or 4 bytes
otherwise.
-If JBD2\_FEATURE\_INCOMPAT\_CSUM\_V2 or
-JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3 are set, the end of the revocation
+If JBD2_FEATURE_INCOMPAT_CSUM_V2 or
+JBD2_FEATURE_INCOMPAT_CSUM_V3 are set, the end of the revocation
block is a ``struct jbd2_journal_revoke_tail``, which has this format:
.. list-table::
- Name
- Description
* - 0x0
- - \_\_be32
- - r\_checksum
+ - __be32
+ - r_checksum
- Checksum of the journal UUID + revocation block
Commit Block
- Name
- Descriptor
* - 0x0
- - journal\_header\_s
+ - journal_header_s
- (open coded)
- Common block header.
* - 0xC
- unsigned char
- - h\_chksum\_type
+ - h_chksum_type
- The type of checksum to use to verify the integrity of the data blocks
in the transaction. See jbd2_checksum_type_ for more info.
* - 0xD
- unsigned char
- - h\_chksum\_size
+ - h_chksum_size
- The number of bytes used by the checksum. Most likely 4.
* - 0xE
- unsigned char
- - h\_padding[2]
+ - h_padding[2]
-
* - 0x10
- - \_\_be32
- - h\_chksum[JBD2\_CHECKSUM\_BYTES]
+ - __be32
+ - h_chksum[JBD2_CHECKSUM_BYTES]
- 32 bytes of space to store checksums. If
- JBD2\_FEATURE\_INCOMPAT\_CSUM\_V2 or JBD2\_FEATURE\_INCOMPAT\_CSUM\_V3
+ JBD2_FEATURE_INCOMPAT_CSUM_V2 or JBD2_FEATURE_INCOMPAT_CSUM_V3
are set, the first ``__be32`` is the checksum of the journal UUID and
the entire commit block, with this field zeroed. If
- JBD2\_FEATURE\_COMPAT\_CHECKSUM is set, the first ``__be32`` is the
+ JBD2_FEATURE_COMPAT_CHECKSUM is set, the first ``__be32`` is the
crc32 of all the blocks already written to the transaction.
* - 0x30
- - \_\_be64
- - h\_commit\_sec
+ - __be64
+ - h_commit_sec
- The time that the transaction was committed, in seconds since the epoch.
* - 0x38
- - \_\_be32
- - h\_commit\_nsec
+ - __be32
+ - h_commit_nsec
- Nanoseconds component of the above timestamp.
Fast commits
filesystem against multiple hosts trying to use the filesystem
simultaneously. When a filesystem is opened (for mounting, or fsck,
etc.), the MMP code running on the node (call it node A) checks a
-sequence number. If the sequence number is EXT4\_MMP\_SEQ\_CLEAN, the
-open continues. If the sequence number is EXT4\_MMP\_SEQ\_FSCK, then
+sequence number. If the sequence number is EXT4_MMP_SEQ_CLEAN, the
+open continues. If the sequence number is EXT4_MMP_SEQ_FSCK, then
fsck is (hopefully) running, and open fails immediately. Otherwise, the
open code will wait for twice the specified MMP check interval and check
the sequence number again. If the sequence number has changed, then the
- Name
- Description
* - 0x0
- - \_\_le32
- - mmp\_magic
+ - __le32
+ - mmp_magic
- Magic number for MMP, 0x004D4D50 (“MMP”).
* - 0x4
- - \_\_le32
- - mmp\_seq
+ - __le32
+ - mmp_seq
- Sequence number, updated periodically.
* - 0x8
- - \_\_le64
- - mmp\_time
+ - __le64
+ - mmp_time
- Time that the MMP block was last updated.
* - 0x10
- char[64]
- - mmp\_nodename
+ - mmp_nodename
- Hostname of the node that opened the filesystem.
* - 0x50
- char[32]
- - mmp\_bdevname
+ - mmp_bdevname
- Block device name of the filesystem.
* - 0x70
- - \_\_le16
- - mmp\_check\_interval
+ - __le16
+ - mmp_check_interval
- The MMP re-check interval, in seconds.
* - 0x72
- - \_\_le16
- - mmp\_pad1
+ - __le16
+ - mmp_pad1
- Zero.
* - 0x74
- - \_\_le32[226]
- - mmp\_pad2
+ - __le32[226]
+ - mmp_pad2
- Zero.
* - 0x3FC
- - \_\_le32
- - mmp\_checksum
+ - __le32
+ - mmp_checksum
- Checksum of the MMP block.
performance difficulties due to fragmentation, the block allocator tries
very hard to keep each file's blocks within the same group, thereby
reducing seek times. The size of a block group is specified in
-``sb.s_blocks_per_group`` blocks, though it can also calculated as 8 \*
+``sb.s_blocks_per_group`` blocks, though it can also calculated as 8 *
``block_size_in_bytes``. With the default block size of 4KiB, each group
will contain 32,768 blocks, for a length of 128MiB. The number of block
groups is the size of the device divided by the size of a block group.
* - 10
- Replica inode, used for some non-upstream feature?
* - 11
- - Traditional first non-reserved inode. Usually this is the lost+found directory. See s\_first\_ino in the superblock.
+ - Traditional first non-reserved inode. Usually this is the lost+found directory. See s_first_ino in the superblock.
Note that there are also some inodes allocated from non-reserved inode numbers
for other filesystem features which are not referenced from standard directory
* - Superblock field
- Description
- * - s\_lpf\_ino
+ * - s_lpf_ino
- Inode number of lost+found directory.
- * - s\_prj\_quota\_inum
+ * - s_prj_quota_inum
- Inode number of quota file tracking project quotas
- * - s\_orphan\_file\_inum
+ * - s_orphan_file_inum
- Inode number of file tracking orphan inodes.
filesystem, such as block counts, inode counts, supported features,
maintenance information, and more.
-If the sparse\_super feature flag is set, redundant copies of the
+If the sparse_super feature flag is set, redundant copies of the
superblock and group descriptors are kept only in the groups whose group
number is either 0 or a power of 3, 5, or 7. If the flag is not set,
redundant copies are kept in all groups.
- Name
- Description
* - 0x0
- - \_\_le32
- - s\_inodes\_count
+ - __le32
+ - s_inodes_count
- Total inode count.
* - 0x4
- - \_\_le32
- - s\_blocks\_count\_lo
+ - __le32
+ - s_blocks_count_lo
- Total block count.
* - 0x8
- - \_\_le32
- - s\_r\_blocks\_count\_lo
+ - __le32
+ - s_r_blocks_count_lo
- This number of blocks can only be allocated by the super-user.
* - 0xC
- - \_\_le32
- - s\_free\_blocks\_count\_lo
+ - __le32
+ - s_free_blocks_count_lo
- Free block count.
* - 0x10
- - \_\_le32
- - s\_free\_inodes\_count
+ - __le32
+ - s_free_inodes_count
- Free inode count.
* - 0x14
- - \_\_le32
- - s\_first\_data\_block
+ - __le32
+ - s_first_data_block
- First data block. This must be at least 1 for 1k-block filesystems and
is typically 0 for all other block sizes.
* - 0x18
- - \_\_le32
- - s\_log\_block\_size
- - Block size is 2 ^ (10 + s\_log\_block\_size).
+ - __le32
+ - s_log_block_size
+ - Block size is 2 ^ (10 + s_log_block_size).
* - 0x1C
- - \_\_le32
- - s\_log\_cluster\_size
- - Cluster size is 2 ^ (10 + s\_log\_cluster\_size) blocks if bigalloc is
- enabled. Otherwise s\_log\_cluster\_size must equal s\_log\_block\_size.
+ - __le32
+ - s_log_cluster_size
+ - Cluster size is 2 ^ (10 + s_log_cluster_size) blocks if bigalloc is
+ enabled. Otherwise s_log_cluster_size must equal s_log_block_size.
* - 0x20
- - \_\_le32
- - s\_blocks\_per\_group
+ - __le32
+ - s_blocks_per_group
- Blocks per group.
* - 0x24
- - \_\_le32
- - s\_clusters\_per\_group
+ - __le32
+ - s_clusters_per_group
- Clusters per group, if bigalloc is enabled. Otherwise
- s\_clusters\_per\_group must equal s\_blocks\_per\_group.
+ s_clusters_per_group must equal s_blocks_per_group.
* - 0x28
- - \_\_le32
- - s\_inodes\_per\_group
+ - __le32
+ - s_inodes_per_group
- Inodes per group.
* - 0x2C
- - \_\_le32
- - s\_mtime
+ - __le32
+ - s_mtime
- Mount time, in seconds since the epoch.
* - 0x30
- - \_\_le32
- - s\_wtime
+ - __le32
+ - s_wtime
- Write time, in seconds since the epoch.
* - 0x34
- - \_\_le16
- - s\_mnt\_count
+ - __le16
+ - s_mnt_count
- Number of mounts since the last fsck.
* - 0x36
- - \_\_le16
- - s\_max\_mnt\_count
+ - __le16
+ - s_max_mnt_count
- Number of mounts beyond which a fsck is needed.
* - 0x38
- - \_\_le16
- - s\_magic
+ - __le16
+ - s_magic
- Magic signature, 0xEF53
* - 0x3A
- - \_\_le16
- - s\_state
+ - __le16
+ - s_state
- File system state. See super_state_ for more info.
* - 0x3C
- - \_\_le16
- - s\_errors
+ - __le16
+ - s_errors
- Behaviour when detecting errors. See super_errors_ for more info.
* - 0x3E
- - \_\_le16
- - s\_minor\_rev\_level
+ - __le16
+ - s_minor_rev_level
- Minor revision level.
* - 0x40
- - \_\_le32
- - s\_lastcheck
+ - __le32
+ - s_lastcheck
- Time of last check, in seconds since the epoch.
* - 0x44
- - \_\_le32
- - s\_checkinterval
+ - __le32
+ - s_checkinterval
- Maximum time between checks, in seconds.
* - 0x48
- - \_\_le32
- - s\_creator\_os
+ - __le32
+ - s_creator_os
- Creator OS. See the table super_creator_ for more info.
* - 0x4C
- - \_\_le32
- - s\_rev\_level
+ - __le32
+ - s_rev_level
- Revision level. See the table super_revision_ for more info.
* - 0x50
- - \_\_le16
- - s\_def\_resuid
+ - __le16
+ - s_def_resuid
- Default uid for reserved blocks.
* - 0x52
- - \_\_le16
- - s\_def\_resgid
+ - __le16
+ - s_def_resgid
- Default gid for reserved blocks.
* -
-
about a feature in either the compatible or incompatible feature set, it
must abort and not try to meddle with things it doesn't understand...
* - 0x54
- - \_\_le32
- - s\_first\_ino
+ - __le32
+ - s_first_ino
- First non-reserved inode.
* - 0x58
- - \_\_le16
- - s\_inode\_size
+ - __le16
+ - s_inode_size
- Size of inode structure, in bytes.
* - 0x5A
- - \_\_le16
- - s\_block\_group\_nr
+ - __le16
+ - s_block_group_nr
- Block group # of this superblock.
* - 0x5C
- - \_\_le32
- - s\_feature\_compat
+ - __le32
+ - s_feature_compat
- Compatible feature set flags. Kernel can still read/write this fs even
if it doesn't understand a flag; fsck should not do that. See the
super_compat_ table for more info.
* - 0x60
- - \_\_le32
- - s\_feature\_incompat
+ - __le32
+ - s_feature_incompat
- Incompatible feature set. If the kernel or fsck doesn't understand one
of these bits, it should stop. See the super_incompat_ table for more
info.
* - 0x64
- - \_\_le32
- - s\_feature\_ro\_compat
+ - __le32
+ - s_feature_ro_compat
- Readonly-compatible feature set. If the kernel doesn't understand one of
these bits, it can still mount read-only. See the super_rocompat_ table
for more info.
* - 0x68
- - \_\_u8
- - s\_uuid[16]
+ - __u8
+ - s_uuid[16]
- 128-bit UUID for volume.
* - 0x78
- char
- - s\_volume\_name[16]
+ - s_volume_name[16]
- Volume label.
* - 0x88
- char
- - s\_last\_mounted[64]
+ - s_last_mounted[64]
- Directory where filesystem was last mounted.
* - 0xC8
- - \_\_le32
- - s\_algorithm\_usage\_bitmap
+ - __le32
+ - s_algorithm_usage_bitmap
- For compression (Not used in e2fsprogs/Linux)
* -
-
- Performance hints. Directory preallocation should only happen if the
EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on.
* - 0xCC
- - \_\_u8
- - s\_prealloc\_blocks
+ - __u8
+ - s_prealloc_blocks
- #. of blocks to try to preallocate for ... files? (Not used in
e2fsprogs/Linux)
* - 0xCD
- - \_\_u8
- - s\_prealloc\_dir\_blocks
+ - __u8
+ - s_prealloc_dir_blocks
- #. of blocks to preallocate for directories. (Not used in
e2fsprogs/Linux)
* - 0xCE
- - \_\_le16
- - s\_reserved\_gdt\_blocks
+ - __le16
+ - s_reserved_gdt_blocks
- Number of reserved GDT entries for future filesystem expansion.
* -
-
- Journalling support is valid only if EXT4_FEATURE_COMPAT_HAS_JOURNAL is
set.
* - 0xD0
- - \_\_u8
- - s\_journal\_uuid[16]
+ - __u8
+ - s_journal_uuid[16]
- UUID of journal superblock
* - 0xE0
- - \_\_le32
- - s\_journal\_inum
+ - __le32
+ - s_journal_inum
- inode number of journal file.
* - 0xE4
- - \_\_le32
- - s\_journal\_dev
+ - __le32
+ - s_journal_dev
- Device number of journal file, if the external journal feature flag is
set.
* - 0xE8
- - \_\_le32
- - s\_last\_orphan
+ - __le32
+ - s_last_orphan
- Start of list of orphaned inodes to delete.
* - 0xEC
- - \_\_le32
- - s\_hash\_seed[4]
+ - __le32
+ - s_hash_seed[4]
- HTREE hash seed.
* - 0xFC
- - \_\_u8
- - s\_def\_hash\_version
+ - __u8
+ - s_def_hash_version
- Default hash algorithm to use for directory hashes. See super_def_hash_
for more info.
* - 0xFD
- - \_\_u8
- - s\_jnl\_backup\_type
- - If this value is 0 or EXT3\_JNL\_BACKUP\_BLOCKS (1), then the
+ - __u8
+ - s_jnl_backup_type
+ - If this value is 0 or EXT3_JNL_BACKUP_BLOCKS (1), then the
``s_jnl_blocks`` field contains a duplicate copy of the inode's
``i_block[]`` array and ``i_size``.
* - 0xFE
- - \_\_le16
- - s\_desc\_size
+ - __le16
+ - s_desc_size
- Size of group descriptors, in bytes, if the 64bit incompat feature flag
is set.
* - 0x100
- - \_\_le32
- - s\_default\_mount\_opts
+ - __le32
+ - s_default_mount_opts
- Default mount options. See the super_mountopts_ table for more info.
* - 0x104
- - \_\_le32
- - s\_first\_meta\_bg
- - First metablock block group, if the meta\_bg feature is enabled.
+ - __le32
+ - s_first_meta_bg
+ - First metablock block group, if the meta_bg feature is enabled.
* - 0x108
- - \_\_le32
- - s\_mkfs\_time
+ - __le32
+ - s_mkfs_time
- When the filesystem was created, in seconds since the epoch.
* - 0x10C
- - \_\_le32
- - s\_jnl\_blocks[17]
+ - __le32
+ - s_jnl_blocks[17]
- Backup copy of the journal inode's ``i_block[]`` array in the first 15
- elements and i\_size\_high and i\_size in the 16th and 17th elements,
+ elements and i_size_high and i_size in the 16th and 17th elements,
respectively.
* -
-
-
- 64bit support is valid only if EXT4_FEATURE_COMPAT_64BIT is set.
* - 0x150
- - \_\_le32
- - s\_blocks\_count\_hi
+ - __le32
+ - s_blocks_count_hi
- High 32-bits of the block count.
* - 0x154
- - \_\_le32
- - s\_r\_blocks\_count\_hi
+ - __le32
+ - s_r_blocks_count_hi
- High 32-bits of the reserved block count.
* - 0x158
- - \_\_le32
- - s\_free\_blocks\_count\_hi
+ - __le32
+ - s_free_blocks_count_hi
- High 32-bits of the free block count.
* - 0x15C
- - \_\_le16
- - s\_min\_extra\_isize
+ - __le16
+ - s_min_extra_isize
- All inodes have at least # bytes.
* - 0x15E
- - \_\_le16
- - s\_want\_extra\_isize
+ - __le16
+ - s_want_extra_isize
- New inodes should reserve # bytes.
* - 0x160
- - \_\_le32
- - s\_flags
+ - __le32
+ - s_flags
- Miscellaneous flags. See the super_flags_ table for more info.
* - 0x164
- - \_\_le16
- - s\_raid\_stride
+ - __le16
+ - s_raid_stride
- RAID stride. This is the number of logical blocks read from or written
to the disk before moving to the next disk. This affects the placement
of filesystem metadata, which will hopefully make RAID storage faster.
* - 0x166
- - \_\_le16
- - s\_mmp\_interval
+ - __le16
+ - s_mmp_interval
- #. seconds to wait in multi-mount prevention (MMP) checking. In theory,
MMP is a mechanism to record in the superblock which host and device
have mounted the filesystem, in order to prevent multiple mounts. This
feature does not seem to be implemented...
* - 0x168
- - \_\_le64
- - s\_mmp\_block
+ - __le64
+ - s_mmp_block
- Block # for multi-mount protection data.
* - 0x170
- - \_\_le32
- - s\_raid\_stripe\_width
+ - __le32
+ - s_raid_stripe_width
- RAID stripe width. This is the number of logical blocks read from or
written to the disk before coming back to the current disk. This is used
by the block allocator to try to reduce the number of read-modify-write
operations in a RAID5/6.
* - 0x174
- - \_\_u8
- - s\_log\_groups\_per\_flex
+ - __u8
+ - s_log_groups_per_flex
- Size of a flexible block group is 2 ^ ``s_log_groups_per_flex``.
* - 0x175
- - \_\_u8
- - s\_checksum\_type
+ - __u8
+ - s_checksum_type
- Metadata checksum algorithm type. The only valid value is 1 (crc32c).
* - 0x176
- - \_\_le16
- - s\_reserved\_pad
+ - __le16
+ - s_reserved_pad
-
* - 0x178
- - \_\_le64
- - s\_kbytes\_written
+ - __le64
+ - s_kbytes_written
- Number of KiB written to this filesystem over its lifetime.
* - 0x180
- - \_\_le32
- - s\_snapshot\_inum
+ - __le32
+ - s_snapshot_inum
- inode number of active snapshot. (Not used in e2fsprogs/Linux.)
* - 0x184
- - \_\_le32
- - s\_snapshot\_id
+ - __le32
+ - s_snapshot_id
- Sequential ID of active snapshot. (Not used in e2fsprogs/Linux.)
* - 0x188
- - \_\_le64
- - s\_snapshot\_r\_blocks\_count
+ - __le64
+ - s_snapshot_r_blocks_count
- Number of blocks reserved for active snapshot's future use. (Not used in
e2fsprogs/Linux.)
* - 0x190
- - \_\_le32
- - s\_snapshot\_list
+ - __le32
+ - s_snapshot_list
- inode number of the head of the on-disk snapshot list. (Not used in
e2fsprogs/Linux.)
* - 0x194
- - \_\_le32
- - s\_error\_count
+ - __le32
+ - s_error_count
- Number of errors seen.
* - 0x198
- - \_\_le32
- - s\_first\_error\_time
+ - __le32
+ - s_first_error_time
- First time an error happened, in seconds since the epoch.
* - 0x19C
- - \_\_le32
- - s\_first\_error\_ino
+ - __le32
+ - s_first_error_ino
- inode involved in first error.
* - 0x1A0
- - \_\_le64
- - s\_first\_error\_block
+ - __le64
+ - s_first_error_block
- Number of block involved of first error.
* - 0x1A8
- - \_\_u8
- - s\_first\_error\_func[32]
+ - __u8
+ - s_first_error_func[32]
- Name of function where the error happened.
* - 0x1C8
- - \_\_le32
- - s\_first\_error\_line
+ - __le32
+ - s_first_error_line
- Line number where error happened.
* - 0x1CC
- - \_\_le32
- - s\_last\_error\_time
+ - __le32
+ - s_last_error_time
- Time of most recent error, in seconds since the epoch.
* - 0x1D0
- - \_\_le32
- - s\_last\_error\_ino
+ - __le32
+ - s_last_error_ino
- inode involved in most recent error.
* - 0x1D4
- - \_\_le32
- - s\_last\_error\_line
+ - __le32
+ - s_last_error_line
- Line number where most recent error happened.
* - 0x1D8
- - \_\_le64
- - s\_last\_error\_block
+ - __le64
+ - s_last_error_block
- Number of block involved in most recent error.
* - 0x1E0
- - \_\_u8
- - s\_last\_error\_func[32]
+ - __u8
+ - s_last_error_func[32]
- Name of function where the most recent error happened.
* - 0x200
- - \_\_u8
- - s\_mount\_opts[64]
+ - __u8
+ - s_mount_opts[64]
- ASCIIZ string of mount options.
* - 0x240
- - \_\_le32
- - s\_usr\_quota\_inum
+ - __le32
+ - s_usr_quota_inum
- Inode number of user `quota <quota>`__ file.
* - 0x244
- - \_\_le32
- - s\_grp\_quota\_inum
+ - __le32
+ - s_grp_quota_inum
- Inode number of group `quota <quota>`__ file.
* - 0x248
- - \_\_le32
- - s\_overhead\_blocks
+ - __le32
+ - s_overhead_blocks
- Overhead blocks/clusters in fs. (Huh? This field is always zero, which
means that the kernel calculates it dynamically.)
* - 0x24C
- - \_\_le32
- - s\_backup\_bgs[2]
- - Block groups containing superblock backups (if sparse\_super2)
+ - __le32
+ - s_backup_bgs[2]
+ - Block groups containing superblock backups (if sparse_super2)
* - 0x254
- - \_\_u8
- - s\_encrypt\_algos[4]
+ - __u8
+ - s_encrypt_algos[4]
- Encryption algorithms in use. There can be up to four algorithms in use
at any time; valid algorithm codes are given in the super_encrypt_ table
below.
* - 0x258
- - \_\_u8
- - s\_encrypt\_pw\_salt[16]
+ - __u8
+ - s_encrypt_pw_salt[16]
- Salt for the string2key algorithm for encryption.
* - 0x268
- - \_\_le32
- - s\_lpf\_ino
+ - __le32
+ - s_lpf_ino
- Inode number of lost+found
* - 0x26C
- - \_\_le32
- - s\_prj\_quota\_inum
+ - __le32
+ - s_prj_quota_inum
- Inode that tracks project quotas.
* - 0x270
- - \_\_le32
- - s\_checksum\_seed
- - Checksum seed used for metadata\_csum calculations. This value is
- crc32c(~0, $orig\_fs\_uuid).
+ - __le32
+ - s_checksum_seed
+ - Checksum seed used for metadata_csum calculations. This value is
+ crc32c(~0, $orig_fs_uuid).
* - 0x274
- - \_\_u8
- - s\_wtime_hi
+ - __u8
+ - s_wtime_hi
- Upper 8 bits of the s_wtime field.
* - 0x275
- - \_\_u8
- - s\_mtime_hi
+ - __u8
+ - s_mtime_hi
- Upper 8 bits of the s_mtime field.
* - 0x276
- - \_\_u8
- - s\_mkfs_time_hi
+ - __u8
+ - s_mkfs_time_hi
- Upper 8 bits of the s_mkfs_time field.
* - 0x277
- - \_\_u8
- - s\_lastcheck_hi
+ - __u8
+ - s_lastcheck_hi
- Upper 8 bits of the s_lastcheck_hi field.
* - 0x278
- - \_\_u8
- - s\_first_error_time_hi
+ - __u8
+ - s_first_error_time_hi
- Upper 8 bits of the s_first_error_time_hi field.
* - 0x279
- - \_\_u8
- - s\_last_error_time_hi
+ - __u8
+ - s_last_error_time_hi
- Upper 8 bits of the s_last_error_time_hi field.
* - 0x27A
- - \_\_u8
- - s\_pad[2]
+ - __u8
+ - s_pad[2]
- Zero padding.
* - 0x27C
- - \_\_le16
- - s\_encoding
+ - __le16
+ - s_encoding
- Filename charset encoding.
* - 0x27E
- - \_\_le16
- - s\_encoding_flags
+ - __le16
+ - s_encoding_flags
- Filename charset encoding flags.
* - 0x280
- - \_\_le32
- - s\_orphan\_file\_inum
+ - __le32
+ - s_orphan_file_inum
- Orphan file inode number.
* - 0x284
- - \_\_le32
- - s\_reserved[94]
+ - __le32
+ - s_reserved[94]
- Padding to the end of the block.
* - 0x3FC
- - \_\_le32
- - s\_checksum
+ - __le32
+ - s_checksum
- Superblock checksum.
.. _super_state:
* - Value
- Description
* - 0x1
- - Directory preallocation (COMPAT\_DIR\_PREALLOC).
+ - Directory preallocation (COMPAT_DIR_PREALLOC).
* - 0x2
- “imagic inodes”. Not clear from the code what this does
- (COMPAT\_IMAGIC\_INODES).
+ (COMPAT_IMAGIC_INODES).
* - 0x4
- - Has a journal (COMPAT\_HAS\_JOURNAL).
+ - Has a journal (COMPAT_HAS_JOURNAL).
* - 0x8
- - Supports extended attributes (COMPAT\_EXT\_ATTR).
+ - Supports extended attributes (COMPAT_EXT_ATTR).
* - 0x10
- Has reserved GDT blocks for filesystem expansion
- (COMPAT\_RESIZE\_INODE). Requires RO\_COMPAT\_SPARSE\_SUPER.
+ (COMPAT_RESIZE_INODE). Requires RO_COMPAT_SPARSE_SUPER.
* - 0x20
- - Has directory indices (COMPAT\_DIR\_INDEX).
+ - Has directory indices (COMPAT_DIR_INDEX).
* - 0x40
- “Lazy BG”. Not in Linux kernel, seems to have been for uninitialized
- block groups? (COMPAT\_LAZY\_BG)
+ block groups? (COMPAT_LAZY_BG)
* - 0x80
- - “Exclude inode”. Not used. (COMPAT\_EXCLUDE\_INODE).
+ - “Exclude inode”. Not used. (COMPAT_EXCLUDE_INODE).
* - 0x100
- “Exclude bitmap”. Seems to be used to indicate the presence of
snapshot-related exclude bitmaps? Not defined in kernel or used in
- e2fsprogs (COMPAT\_EXCLUDE\_BITMAP).
+ e2fsprogs (COMPAT_EXCLUDE_BITMAP).
* - 0x200
- - Sparse Super Block, v2. If this flag is set, the SB field s\_backup\_bgs
+ - Sparse Super Block, v2. If this flag is set, the SB field s_backup_bgs
points to the two block groups that contain backup superblocks
- (COMPAT\_SPARSE\_SUPER2).
+ (COMPAT_SPARSE_SUPER2).
* - 0x400
- Fast commits supported. Although fast commits blocks are
backward incompatible, fast commit blocks are not always
present in the journal. If fast commit blocks are present in
the journal, JBD2 incompat feature
- (JBD2\_FEATURE\_INCOMPAT\_FAST\_COMMIT) gets
- set (COMPAT\_FAST\_COMMIT).
+ (JBD2_FEATURE_INCOMPAT_FAST_COMMIT) gets
+ set (COMPAT_FAST_COMMIT).
* - 0x1000
- Orphan file allocated. This is the special file for more efficient
tracking of unlinked but still open inodes. When there may be any
entries in the file, we additionally set proper rocompat feature
- (RO\_COMPAT\_ORPHAN\_PRESENT).
+ (RO_COMPAT_ORPHAN_PRESENT).
.. _super_incompat:
* - Value
- Description
* - 0x1
- - Compression (INCOMPAT\_COMPRESSION).
+ - Compression (INCOMPAT_COMPRESSION).
* - 0x2
- - Directory entries record the file type. See ext4\_dir\_entry\_2 below
- (INCOMPAT\_FILETYPE).
+ - Directory entries record the file type. See ext4_dir_entry_2 below
+ (INCOMPAT_FILETYPE).
* - 0x4
- - Filesystem needs recovery (INCOMPAT\_RECOVER).
+ - Filesystem needs recovery (INCOMPAT_RECOVER).
* - 0x8
- - Filesystem has a separate journal device (INCOMPAT\_JOURNAL\_DEV).
+ - Filesystem has a separate journal device (INCOMPAT_JOURNAL_DEV).
* - 0x10
- Meta block groups. See the earlier discussion of this feature
- (INCOMPAT\_META\_BG).
+ (INCOMPAT_META_BG).
* - 0x40
- - Files in this filesystem use extents (INCOMPAT\_EXTENTS).
+ - Files in this filesystem use extents (INCOMPAT_EXTENTS).
* - 0x80
- - Enable a filesystem size of 2^64 blocks (INCOMPAT\_64BIT).
+ - Enable a filesystem size of 2^64 blocks (INCOMPAT_64BIT).
* - 0x100
- - Multiple mount protection (INCOMPAT\_MMP).
+ - Multiple mount protection (INCOMPAT_MMP).
* - 0x200
- Flexible block groups. See the earlier discussion of this feature
- (INCOMPAT\_FLEX\_BG).
+ (INCOMPAT_FLEX_BG).
* - 0x400
- Inodes can be used to store large extended attribute values
- (INCOMPAT\_EA\_INODE).
+ (INCOMPAT_EA_INODE).
* - 0x1000
- - Data in directory entry (INCOMPAT\_DIRDATA). (Not implemented?)
+ - Data in directory entry (INCOMPAT_DIRDATA). (Not implemented?)
* - 0x2000
- Metadata checksum seed is stored in the superblock. This feature enables
- the administrator to change the UUID of a metadata\_csum filesystem
+ the administrator to change the UUID of a metadata_csum filesystem
while the filesystem is mounted; without it, the checksum definition
- requires all metadata blocks to be rewritten (INCOMPAT\_CSUM\_SEED).
+ requires all metadata blocks to be rewritten (INCOMPAT_CSUM_SEED).
* - 0x4000
- - Large directory >2GB or 3-level htree (INCOMPAT\_LARGEDIR). Prior to
+ - Large directory >2GB or 3-level htree (INCOMPAT_LARGEDIR). Prior to
this feature, directories could not be larger than 4GiB and could not
have an htree more than 2 levels deep. If this feature is enabled,
directories can be larger than 4GiB and have a maximum htree depth of 3.
* - 0x8000
- - Data in inode (INCOMPAT\_INLINE\_DATA).
+ - Data in inode (INCOMPAT_INLINE_DATA).
* - 0x10000
- - Encrypted inodes are present on the filesystem. (INCOMPAT\_ENCRYPT).
+ - Encrypted inodes are present on the filesystem. (INCOMPAT_ENCRYPT).
.. _super_rocompat:
- Description
* - 0x1
- Sparse superblocks. See the earlier discussion of this feature
- (RO\_COMPAT\_SPARSE\_SUPER).
+ (RO_COMPAT_SPARSE_SUPER).
* - 0x2
- This filesystem has been used to store a file greater than 2GiB
- (RO\_COMPAT\_LARGE\_FILE).
+ (RO_COMPAT_LARGE_FILE).
* - 0x4
- - Not used in kernel or e2fsprogs (RO\_COMPAT\_BTREE\_DIR).
+ - Not used in kernel or e2fsprogs (RO_COMPAT_BTREE_DIR).
* - 0x8
- This filesystem has files whose sizes are represented in units of
logical blocks, not 512-byte sectors. This implies a very large file
- indeed! (RO\_COMPAT\_HUGE\_FILE)
+ indeed! (RO_COMPAT_HUGE_FILE)
* - 0x10
- Group descriptors have checksums. In addition to detecting corruption,
this is useful for lazy formatting with uninitialized groups
- (RO\_COMPAT\_GDT\_CSUM).
+ (RO_COMPAT_GDT_CSUM).
* - 0x20
- Indicates that the old ext3 32,000 subdirectory limit no longer applies
- (RO\_COMPAT\_DIR\_NLINK). A directory's i\_links\_count will be set to 1
+ (RO_COMPAT_DIR_NLINK). A directory's i_links_count will be set to 1
if it is incremented past 64,999.
* - 0x40
- Indicates that large inodes exist on this filesystem
- (RO\_COMPAT\_EXTRA\_ISIZE).
+ (RO_COMPAT_EXTRA_ISIZE).
* - 0x80
- - This filesystem has a snapshot (RO\_COMPAT\_HAS\_SNAPSHOT).
+ - This filesystem has a snapshot (RO_COMPAT_HAS_SNAPSHOT).
* - 0x100
- - `Quota <Quota>`__ (RO\_COMPAT\_QUOTA).
+ - `Quota <Quota>`__ (RO_COMPAT_QUOTA).
* - 0x200
- This filesystem supports “bigalloc”, which means that file extents are
tracked in units of clusters (of blocks) instead of blocks
- (RO\_COMPAT\_BIGALLOC).
+ (RO_COMPAT_BIGALLOC).
* - 0x400
- This filesystem supports metadata checksumming.
- (RO\_COMPAT\_METADATA\_CSUM; implies RO\_COMPAT\_GDT\_CSUM, though
- GDT\_CSUM must not be set)
+ (RO_COMPAT_METADATA_CSUM; implies RO_COMPAT_GDT_CSUM, though
+ GDT_CSUM must not be set)
* - 0x800
- Filesystem supports replicas. This feature is neither in the kernel nor
- e2fsprogs. (RO\_COMPAT\_REPLICA)
+ e2fsprogs. (RO_COMPAT_REPLICA)
* - 0x1000
- Read-only filesystem image; the kernel will not mount this image
read-write and most tools will refuse to write to the image.
- (RO\_COMPAT\_READONLY)
+ (RO_COMPAT_READONLY)
* - 0x2000
- - Filesystem tracks project quotas. (RO\_COMPAT\_PROJECT)
+ - Filesystem tracks project quotas. (RO_COMPAT_PROJECT)
* - 0x8000
- - Verity inodes may be present on the filesystem. (RO\_COMPAT\_VERITY)
+ - Verity inodes may be present on the filesystem. (RO_COMPAT_VERITY)
* - 0x10000
- Indicates orphan file may have valid orphan entries and thus we need
to clean them up when mounting the filesystem
- (RO\_COMPAT\_ORPHAN\_PRESENT).
+ (RO_COMPAT_ORPHAN_PRESENT).
.. _super_def_hash:
* - Value
- Description
* - 0x0001
- - Print debugging info upon (re)mount. (EXT4\_DEFM\_DEBUG)
+ - Print debugging info upon (re)mount. (EXT4_DEFM_DEBUG)
* - 0x0002
- New files take the gid of the containing directory (instead of the fsgid
- of the current process). (EXT4\_DEFM\_BSDGROUPS)
+ of the current process). (EXT4_DEFM_BSDGROUPS)
* - 0x0004
- - Support userspace-provided extended attributes. (EXT4\_DEFM\_XATTR\_USER)
+ - Support userspace-provided extended attributes. (EXT4_DEFM_XATTR_USER)
* - 0x0008
- - Support POSIX access control lists (ACLs). (EXT4\_DEFM\_ACL)
+ - Support POSIX access control lists (ACLs). (EXT4_DEFM_ACL)
* - 0x0010
- - Do not support 32-bit UIDs. (EXT4\_DEFM\_UID16)
+ - Do not support 32-bit UIDs. (EXT4_DEFM_UID16)
* - 0x0020
- All data and metadata are commited to the journal.
- (EXT4\_DEFM\_JMODE\_DATA)
+ (EXT4_DEFM_JMODE_DATA)
* - 0x0040
- All data are flushed to the disk before metadata are committed to the
- journal. (EXT4\_DEFM\_JMODE\_ORDERED)
+ journal. (EXT4_DEFM_JMODE_ORDERED)
* - 0x0060
- Data ordering is not preserved; data may be written after the metadata
- has been written. (EXT4\_DEFM\_JMODE\_WBACK)
+ has been written. (EXT4_DEFM_JMODE_WBACK)
* - 0x0100
- - Disable write flushes. (EXT4\_DEFM\_NOBARRIER)
+ - Disable write flushes. (EXT4_DEFM_NOBARRIER)
* - 0x0200
- Track which blocks in a filesystem are metadata and therefore should not
be used as data blocks. This option will be enabled by default on 3.18,
- hopefully. (EXT4\_DEFM\_BLOCK\_VALIDITY)
+ hopefully. (EXT4_DEFM_BLOCK_VALIDITY)
* - 0x0400
- Enable DISCARD support, where the storage device is told about blocks
- becoming unused. (EXT4\_DEFM\_DISCARD)
+ becoming unused. (EXT4_DEFM_DISCARD)
* - 0x0800
- - Disable delayed allocation. (EXT4\_DEFM\_NODELALLOC)
+ - Disable delayed allocation. (EXT4_DEFM_NODELALLOC)
.. _super_flags:
* - Value
- Description
* - 0
- - Invalid algorithm (ENCRYPTION\_MODE\_INVALID).
+ - Invalid algorithm (ENCRYPTION_MODE_INVALID).
* - 1
- - 256-bit AES in XTS mode (ENCRYPTION\_MODE\_AES\_256\_XTS).
+ - 256-bit AES in XTS mode (ENCRYPTION_MODE_AES_256_XTS).
* - 2
- - 256-bit AES in GCM mode (ENCRYPTION\_MODE\_AES\_256\_GCM).
+ - 256-bit AES in GCM mode (ENCRYPTION_MODE_AES_256_GCM).
* - 3
- - 256-bit AES in CBC mode (ENCRYPTION\_MODE\_AES\_256\_CBC).
+ - 256-bit AES in CBC mode (ENCRYPTION_MODE_AES_256_CBC).
Total size of the superblock is 1024 bytes.
its use on each netfs inode it is helping to manage. To this end, a context
structure is defined::
- struct netfs_i_context {
+ struct netfs_inode {
+ struct inode inode;
const struct netfs_request_ops *ops;
- struct fscache_cookie *cache;
+ struct fscache_cookie *cache;
};
-A network filesystem that wants to use netfs lib must place one of these
-directly after the VFS ``struct inode`` it allocates, usually as part of its
-own struct. This can be done in a way similar to the following::
+A network filesystem that wants to use netfs lib must place one of these in its
+inode wrapper struct instead of the VFS ``struct inode``. This can be done in
+a way similar to the following::
struct my_inode {
- struct {
- /* These must be contiguous */
- struct inode vfs_inode;
- struct netfs_i_context netfs_ctx;
- };
+ struct netfs_inode netfs; /* Netfslib context and vfs inode */
...
};
-This allows netfslib to find its state by simple offset from the inode pointer,
-thereby allowing the netfslib helper functions to be pointed to directly by the
-VFS/VM operation tables.
+This allows netfslib to find its state by using ``container_of()`` from the
+inode pointer, thereby allowing the netfslib helper functions to be pointed to
+directly by the VFS/VM operation tables.
The structure contains the following fields:
+ * ``inode``
+
+ The VFS inode structure.
+
* ``ops``
The set of operations provided by the network filesystem to netfslib.
provided. Firstly, a function to perform basic initialisation on a context and
set the operations table pointer::
- void netfs_i_context_init(struct inode *inode,
- const struct netfs_request_ops *ops);
+ void netfs_inode_init(struct netfs_inode *ctx,
+ const struct netfs_request_ops *ops);
-then two functions to cast between the VFS inode structure and the netfs
-context::
+then a function to cast from the VFS inode structure to the netfs context::
- struct netfs_i_context *netfs_i_context(struct inode *inode);
- struct inode *netfs_inode(struct netfs_i_context *ctx);
+ struct netfs_inode *netfs_node(struct inode *inode);
and finally, a function to get the cache cookie pointer from the context
attached to an inode (or NULL if fscache is disabled)::
- struct fscache_cookie *netfs_i_cookie(struct inode *inode);
+ struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx);
Buffered Read Helpers
void netfs_readahead(struct readahead_control *ractl);
int netfs_read_folio(struct file *file,
- struct folio *folio);
- int netfs_write_begin(struct file *file,
+ struct folio *folio);
+ int netfs_write_begin(struct netfs_inode *ctx,
+ struct file *file,
struct address_space *mapping,
loff_t pos,
unsigned int len,
through the suppplied table of operations. Waits will be performed as
necessary before returning for helpers that are meant to be synchronous.
-If an error occurs and netfs_priv is non-NULL, ops->cleanup() will be called to
-deal with it. If some parts of the request are in progress when an error
-occurs, the request will get partially completed if sufficient data is read.
+If an error occurs, the ->free_request() will be called to clean up the
+netfs_io_request struct allocated. If some parts of the request are in
+progress when an error occurs, the request will get partially completed if
+sufficient data is read.
Additionally, there is::
* ``netfs_priv``
The network filesystem's private data. The value for this can be passed in
- to the helper functions or set during the request. The ->cleanup() op will
- be called if this is non-NULL at the end.
+ to the helper functions or set during the request.
* ``start``
* ``len``
struct netfs_request_ops {
void (*init_request)(struct netfs_io_request *rreq, struct file *file);
+ void (*free_request)(struct netfs_io_request *rreq);
int (*begin_cache_operation)(struct netfs_io_request *rreq);
void (*expand_readahead)(struct netfs_io_request *rreq);
bool (*clamp_length)(struct netfs_io_subrequest *subreq);
void (*issue_read)(struct netfs_io_subrequest *subreq);
bool (*is_still_valid)(struct netfs_io_request *rreq);
int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
- struct folio *folio, void **_fsdata);
+ struct folio **foliop, void **_fsdata);
void (*done)(struct netfs_io_request *rreq);
- void (*cleanup)(struct address_space *mapping, void *netfs_priv);
};
The operations are as follows:
* ``init_request()``
[Optional] This is called to initialise the request structure. It is given
- the file for reference and can modify the ->netfs_priv value.
+ the file for reference.
+
+ * ``free_request()``
+
+ [Optional] This is called as the request is being deallocated so that the
+ filesystem can clean up any state it has attached there.
* ``begin_cache_operation()``
allocated/grabbed the folio to be modified to allow the filesystem to flush
conflicting state before allowing it to be modified.
- It should return 0 if everything is now fine, -EAGAIN if the folio should be
- regrabbed and any other error code to abort the operation.
+ It may unlock and discard the folio it was given and set the caller's folio
+ pointer to NULL. It should return 0 if everything is now fine (``*foliop``
+ left set) or the op should be retried (``*foliop`` cleared) and any other
+ error code to abort the operation.
* ``done``
[Optional] This is called after the folios in the request have all been
unlocked (and marked uptodate if applicable).
- * ``cleanup``
-
- [Optional] This is called as the request is being deallocated so that the
- filesystem can clean up ->netfs_priv.
-
Read Helper Procedure
persistent and could change even while the overlay filesystem is mounted, as
summarized in the `Inode properties`_ table above.
+4) "idmapped mounts"
+When the upper or lower layers are idmapped mounts overlayfs will be mounted
+without support for POSIX Access Control Lists (ACLs). This limitation will
+eventually be lifted.
Changes to underlying filesystems
---------------------------------
+++ /dev/null
-.. SPDX-License-Identifier: GPL-2.0+
-
-============================================
-The Linux Hardware Timestamping Engine (HTE)
-============================================
-
-:Author: Dipen Patel
-
-Introduction
-------------
-
-Certain devices have built in hardware timestamping engines which can
-monitor sets of system signals, lines, buses etc... in realtime for state
-change; upon detecting the change they can automatically store the timestamp at
-the moment of occurrence. Such functionality may help achieve better accuracy
-in obtaining timestamps than using software counterparts i.e. ktime and
-friends.
-
-This document describes the API that can be used by hardware timestamping
-engine provider and consumer drivers that want to use the hardware timestamping
-engine (HTE) framework. Both consumers and providers must include
-``#include <linux/hte.h>``.
-
-The HTE framework APIs for the providers
-----------------------------------------
-
-.. kernel-doc:: drivers/hte/hte.c
- :functions: devm_hte_register_chip hte_push_ts_ns
-
-The HTE framework APIs for the consumers
-----------------------------------------
-
-.. kernel-doc:: drivers/hte/hte.c
- :functions: hte_init_line_attr hte_ts_get hte_ts_put devm_hte_request_ts_ns hte_request_ts_ns hte_enable_ts hte_disable_ts of_hte_req_count hte_get_clk_src_info
-
-The HTE framework public structures
------------------------------------
-.. kernel-doc:: include/linux/hte.h
-
-More on the HTE timestamp data
-------------------------------
-The ``struct hte_ts_data`` is used to pass timestamp details between the
-consumers and the providers. It expresses timestamp data in nanoseconds in
-u64. An example of the typical timestamp data life cycle, for the GPIO line is
-as follows::
-
- - Monitors GPIO line change.
- - Detects the state change on GPIO line.
- - Converts timestamps in nanoseconds.
- - Stores GPIO raw level in raw_level variable if the provider has that
- hardware capability.
- - Pushes this hte_ts_data object to HTE subsystem.
- - HTE subsystem increments seq counter and invokes consumer provided callback.
- Based on callback return value, the HTE core invokes secondary callback in
- the thread context.
-
-HTE subsystem debugfs attributes
---------------------------------
-HTE subsystem creates debugfs attributes at ``/sys/kernel/debug/hte/``.
-It also creates line/signal-related debugfs attributes at
-``/sys/kernel/debug/hte/<provider>/<label or line id>/``. Note that these
-attributes are read-only.
-
-`ts_requested`
- The total number of entities requested from the given provider,
- where entity is specified by the provider and could represent
- lines, GPIO, chip signals, buses etc...
- The attribute will be available at
- ``/sys/kernel/debug/hte/<provider>/``.
-
-`total_ts`
- The total number of entities supported by the provider.
- The attribute will be available at
- ``/sys/kernel/debug/hte/<provider>/``.
-
-`dropped_timestamps`
- The dropped timestamps for a given line.
- The attribute will be available at
- ``/sys/kernel/debug/hte/<provider>/<label or line id>/``.
+++ /dev/null
-.. SPDX-License-Identifier: GPL-2.0
-
-============================================
-The Linux Hardware Timestamping Engine (HTE)
-============================================
-
-The HTE Subsystem
-=================
-
-.. toctree::
- :maxdepth: 1
-
- hte
-
-HTE Tegra Provider
-==================
-
-.. toctree::
- :maxdepth: 1
-
- tegra194-hte
-
+++ /dev/null
-.. SPDX-License-Identifier: GPL-2.0+
-
-HTE Kernel provider driver
-==========================
-
-Description
------------
-The Nvidia tegra194 HTE provider driver implements two GTE
-(Generic Timestamping Engine) instances: 1) GPIO GTE and 2) LIC
-(Legacy Interrupt Controller) IRQ GTE. Both GTE instances get the
-timestamp from the system counter TSC which has 31.25MHz clock rate, and the
-driver converts clock tick rate to nanoseconds before storing it as timestamp
-value.
-
-GPIO GTE
---------
-
-This GTE instance timestamps GPIO in real time. For that to happen GPIO
-needs to be configured as input. The always on (AON) GPIO controller instance
-supports timestamping GPIOs in real time and it has 39 GPIO lines. The GPIO GTE
-and AON GPIO controller are tightly coupled as it requires very specific bits
-to be set in GPIO config register before GPIO GTE can be used, for that GPIOLIB
-adds two optional APIs as below. The GPIO GTE code supports both kernel
-and userspace consumers. The kernel space consumers can directly talk to HTE
-subsystem while userspace consumers timestamp requests go through GPIOLIB CDEV
-framework to HTE subsystem.
-
-.. kernel-doc:: drivers/gpio/gpiolib.c
- :functions: gpiod_enable_hw_timestamp_ns gpiod_disable_hw_timestamp_ns
-
-For userspace consumers, GPIO_V2_LINE_FLAG_EVENT_CLOCK_HTE flag must be
-specified during IOCTL calls. Refer to ``tools/gpio/gpio-event-mon.c``, which
-returns the timestamp in nanoseconds.
-
-LIC (Legacy Interrupt Controller) IRQ GTE
------------------------------------------
-
-This GTE instance timestamps LIC IRQ lines in real time. There are 352 IRQ
-lines which this instance can add timestamps to in real time. The hte
-devicetree binding described at ``Documentation/devicetree/bindings/hte/``
-provides an example of how a consumer can request an IRQ line. Since it is a
-one-to-one mapping with IRQ GTE provider, consumers can simply specify the IRQ
-number that they are interested in. There is no userspace consumer support for
-this GTE instance in the HTE framework.
-
-The provider source code of both IRQ and GPIO GTE instances is located at
-``drivers/hte/hte-tegra194.c``. The test driver
-``drivers/hte/hte-tegra194-test.c`` demonstrates HTE API usage for both IRQ
-and GPIO GTE.
scheduler/index
mhi/index
peci/index
- hte/index
Architecture-agnostic documentation
-----------------------------------
* - arm64
- Supported
- ``LLVM=1``
+ * - hexagon
+ - Maintained
+ - ``LLVM=1``
* - mips
- Maintained
- - ``CC=clang``
+ - ``LLVM=1``
* - powerpc
- Maintained
- ``CC=clang``
* - riscv
- Maintained
- - ``CC=clang``
+ - ``LLVM=1``
* - s390
- Maintained
- ``CC=clang``
+ * - um (User Mode)
+ - Maintained
+ - ``LLVM=1``
* - x86
- Supported
- ``LLVM=1``
=====================================
Normally, a stripped down copy of a module's symbol table (containing only
"core" symbols) is made available through module->symtab (See layout_symtab()
-in kernel/module.c). For livepatch modules, the symbol table copied into memory
-on module load must be exactly the same as the symbol table produced when the
-patch module was compiled. This is because the relocations in each livepatch
-relocation section refer to their respective symbols with their symbol indices,
-and the original symbol indices (and thus the symtab ordering) must be
+in kernel/module/kallsyms.c). For livepatch modules, the symbol table copied
+into memory on module load must be exactly the same as the symbol table produced
+when the patch module was compiled. This is because the relocations in each
+livepatch relocation section refer to their respective symbols with their symbol
+indices, and the original symbol indices (and thus the symtab ordering) must be
preserved in order for apply_relocate_add() to find the right symbol.
For example, take this particular rela from a livepatch module:::
``$r23``-``$r31`` ``$s0``-``$s8`` Static registers Yes
================= =============== =================== ============
-Note: The register ``$r21`` is reserved in the ELF psABI, but used by the Linux
-kernel for storing the percpu base address. It normally has no ABI name, but is
-called ``$u0`` in the kernel. You may also see ``$v0`` or ``$v1`` in some old code,
-however they are deprecated aliases of ``$a0`` and ``$a1`` respectively.
+.. Note::
+ The register ``$r21`` is reserved in the ELF psABI, but used by the Linux
+ kernel for storing the percpu base address. It normally has no ABI name,
+ but is called ``$u0`` in the kernel. You may also see ``$v0`` or ``$v1``
+ in some old code,however they are deprecated aliases of ``$a0`` and ``$a1``
+ respectively.
FPRs
----
``$f24``-``$f31`` ``$fs0``-``$fs7`` Static registers Yes
================= ================== =================== ============
-Note: You may see ``$fv0`` or ``$fv1`` in some old code, however they are deprecated
-aliases of ``$fa0`` and ``$fa1`` respectively.
+.. Note::
+ You may see ``$fv0`` or ``$fv1`` in some old code, however they are
+ deprecated aliases of ``$fa0`` and ``$fa1`` respectively.
VRs
----
https://github.com/loongson/LoongArch-Documentation/releases/latest/download/Loongson-7A1000-usermanual-2.00-EN.pdf (in English)
-Note: CPUINTC is CSR.ECFG/CSR.ESTAT and its interrupt controller described
-in Section 7.4 of "LoongArch Reference Manual, Vol 1"; LIOINTC is "Legacy I/O
-Interrupts" described in Section 11.1 of "Loongson 3A5000 Processor Reference
-Manual"; EIOINTC is "Extended I/O Interrupts" described in Section 11.2 of
-"Loongson 3A5000 Processor Reference Manual"; HTVECINTC is "HyperTransport
-Interrupts" described in Section 14.3 of "Loongson 3A5000 Processor Reference
-Manual"; PCH-PIC/PCH-MSI is "Interrupt Controller" described in Section 5 of
-"Loongson 7A1000 Bridge User Manual"; PCH-LPC is "LPC Interrupts" described in
-Section 24.3 of "Loongson 7A1000 Bridge User Manual".
+.. Note::
+ - CPUINTC is CSR.ECFG/CSR.ESTAT and its interrupt controller described
+ in Section 7.4 of "LoongArch Reference Manual, Vol 1";
+ - LIOINTC is "Legacy I/OInterrupts" described in Section 11.1 of
+ "Loongson 3A5000 Processor Reference Manual";
+ - EIOINTC is "Extended I/O Interrupts" described in Section 11.2 of
+ "Loongson 3A5000 Processor Reference Manual";
+ - HTVECINTC is "HyperTransport Interrupts" described in Section 14.3 of
+ "Loongson 3A5000 Processor Reference Manual";
+ - PCH-PIC/PCH-MSI is "Interrupt Controller" described in Section 5 of
+ "Loongson 7A1000 Bridge User Manual";
+ - PCH-LPC is "LPC Interrupts" described in Section 24.3 of
+ "Loongson 7A1000 Bridge User Manual".
Driver development
==================
-DSA switch drivers need to implement a dsa_switch_ops structure which will
+DSA switch drivers need to implement a ``dsa_switch_ops`` structure which will
contain the various members described below.
-``register_switch_driver()`` registers this dsa_switch_ops in its internal list
-of drivers to probe for. ``unregister_switch_driver()`` does the exact opposite.
+Probing, registration and device lifetime
+-----------------------------------------
-Unless requested differently by setting the priv_size member accordingly, DSA
-does not allocate any driver private context space.
+DSA switches are regular ``device`` structures on buses (be they platform, SPI,
+I2C, MDIO or otherwise). The DSA framework is not involved in their probing
+with the device core.
+
+Switch registration from the perspective of a driver means passing a valid
+``struct dsa_switch`` pointer to ``dsa_register_switch()``, usually from the
+switch driver's probing function. The following members must be valid in the
+provided structure:
+
+- ``ds->dev``: will be used to parse the switch's OF node or platform data.
+
+- ``ds->num_ports``: will be used to create the port list for this switch, and
+ to validate the port indices provided in the OF node.
+
+- ``ds->ops``: a pointer to the ``dsa_switch_ops`` structure holding the DSA
+ method implementations.
+
+- ``ds->priv``: backpointer to a driver-private data structure which can be
+ retrieved in all further DSA method callbacks.
+
+In addition, the following flags in the ``dsa_switch`` structure may optionally
+be configured to obtain driver-specific behavior from the DSA core. Their
+behavior when set is documented through comments in ``include/net/dsa.h``.
+
+- ``ds->vlan_filtering_is_global``
+
+- ``ds->needs_standalone_vlan_filtering``
+
+- ``ds->configure_vlan_while_not_filtering``
+
+- ``ds->untag_bridge_pvid``
+
+- ``ds->assisted_learning_on_cpu_port``
+
+- ``ds->mtu_enforcement_ingress``
+
+- ``ds->fdb_isolation``
+
+Internally, DSA keeps an array of switch trees (group of switches) global to
+the kernel, and attaches a ``dsa_switch`` structure to a tree on registration.
+The tree ID to which the switch is attached is determined by the first u32
+number of the ``dsa,member`` property of the switch's OF node (0 if missing).
+The switch ID within the tree is determined by the second u32 number of the
+same OF property (0 if missing). Registering multiple switches with the same
+switch ID and tree ID is illegal and will cause an error. Using platform data,
+a single switch and a single switch tree is permitted.
+
+In case of a tree with multiple switches, probing takes place asymmetrically.
+The first N-1 callers of ``dsa_register_switch()`` only add their ports to the
+port list of the tree (``dst->ports``), each port having a backpointer to its
+associated switch (``dp->ds``). Then, these switches exit their
+``dsa_register_switch()`` call early, because ``dsa_tree_setup_routing_table()``
+has determined that the tree is not yet complete (not all ports referenced by
+DSA links are present in the tree's port list). The tree becomes complete when
+the last switch calls ``dsa_register_switch()``, and this triggers the effective
+continuation of initialization (including the call to ``ds->ops->setup()``) for
+all switches within that tree, all as part of the calling context of the last
+switch's probe function.
+
+The opposite of registration takes place when calling ``dsa_unregister_switch()``,
+which removes a switch's ports from the port list of the tree. The entire tree
+is torn down when the first switch unregisters.
+
+It is mandatory for DSA switch drivers to implement the ``shutdown()`` callback
+of their respective bus, and call ``dsa_switch_shutdown()`` from it (a minimal
+version of the full teardown performed by ``dsa_unregister_switch()``).
+The reason is that DSA keeps a reference on the master net device, and if the
+driver for the master device decides to unbind on shutdown, DSA's reference
+will block that operation from finalizing.
+
+Either ``dsa_switch_shutdown()`` or ``dsa_unregister_switch()`` must be called,
+but not both, and the device driver model permits the bus' ``remove()`` method
+to be called even if ``shutdown()`` was already called. Therefore, drivers are
+expected to implement a mutual exclusion method between ``remove()`` and
+``shutdown()`` by setting their drvdata to NULL after any of these has run, and
+checking whether the drvdata is NULL before proceeding to take any action.
+
+After ``dsa_switch_shutdown()`` or ``dsa_unregister_switch()`` was called, no
+further callbacks via the provided ``dsa_switch_ops`` may take place, and the
+driver may free the data structures associated with the ``dsa_switch``.
Switch configuration
--------------------
-- ``tag_protocol``: this is to indicate what kind of tagging protocol is supported,
- should be a valid value from the ``dsa_tag_protocol`` enum
+- ``get_tag_protocol``: this is to indicate what kind of tagging protocol is
+ supported, should be a valid value from the ``dsa_tag_protocol`` enum.
+ The returned information does not have to be static; the driver is passed the
+ CPU port number, as well as the tagging protocol of a possibly stacked
+ upstream switch, in case there are hardware limitations in terms of supported
+ tag formats.
-- ``probe``: probe routine which will be invoked by the DSA platform device upon
- registration to test for the presence/absence of a switch device. For MDIO
- devices, it is recommended to issue a read towards internal registers using
- the switch pseudo-PHY and return whether this is a supported device. For other
- buses, return a non-NULL string
+- ``change_tag_protocol``: when the default tagging protocol has compatibility
+ problems with the master or other issues, the driver may support changing it
+ at runtime, either through a device tree property or through sysfs. In that
+ case, further calls to ``get_tag_protocol`` should report the protocol in
+ current use.
- ``setup``: setup function for the switch, this function is responsible for setting
up the ``dsa_switch_ops`` private structure with all it needs: register maps,
fully configured and ready to serve any kind of request. It is recommended
to issue a software reset of the switch during this setup function in order to
avoid relying on what a previous software agent such as a bootloader/firmware
- may have previously configured.
+ may have previously configured. The method responsible for undoing any
+ applicable allocations or operations done here is ``teardown``.
+
+- ``port_setup`` and ``port_teardown``: methods for initialization and
+ destruction of per-port data structures. It is mandatory for some operations
+ such as registering and unregistering devlink port regions to be done from
+ these methods, otherwise they are optional. A port will be torn down only if
+ it has been previously set up. It is possible for a port to be set up during
+ probing only to be torn down immediately afterwards, for example in case its
+ PHY cannot be found. In this case, probing of the DSA switch continues
+ without that particular port.
PHY devices and link management
-------------------------------
``BR_STATE_DISABLED`` and propagating changes to the hardware if this port is
disabled while being a bridge member
+Address databases
+-----------------
+
+Switching hardware is expected to have a table for FDB entries, however not all
+of them are active at the same time. An address database is the subset (partition)
+of FDB entries that is active (can be matched by address learning on RX, or FDB
+lookup on TX) depending on the state of the port. An address database may
+occasionally be called "FID" (Filtering ID) in this document, although the
+underlying implementation may choose whatever is available to the hardware.
+
+For example, all ports that belong to a VLAN-unaware bridge (which is
+*currently* VLAN-unaware) are expected to learn source addresses in the
+database associated by the driver with that bridge (and not with other
+VLAN-unaware bridges). During forwarding and FDB lookup, a packet received on a
+VLAN-unaware bridge port should be able to find a VLAN-unaware FDB entry having
+the same MAC DA as the packet, which is present on another port member of the
+same bridge. At the same time, the FDB lookup process must be able to not find
+an FDB entry having the same MAC DA as the packet, if that entry points towards
+a port which is a member of a different VLAN-unaware bridge (and is therefore
+associated with a different address database).
+
+Similarly, each VLAN of each offloaded VLAN-aware bridge should have an
+associated address database, which is shared by all ports which are members of
+that VLAN, but not shared by ports belonging to different bridges that are
+members of the same VID.
+
+In this context, a VLAN-unaware database means that all packets are expected to
+match on it irrespective of VLAN ID (only MAC address lookup), whereas a
+VLAN-aware database means that packets are supposed to match based on the VLAN
+ID from the classified 802.1Q header (or the pvid if untagged).
+
+At the bridge layer, VLAN-unaware FDB entries have the special VID value of 0,
+whereas VLAN-aware FDB entries have non-zero VID values. Note that a
+VLAN-unaware bridge may have VLAN-aware (non-zero VID) FDB entries, and a
+VLAN-aware bridge may have VLAN-unaware FDB entries. As in hardware, the
+software bridge keeps separate address databases, and offloads to hardware the
+FDB entries belonging to these databases, through switchdev, asynchronously
+relative to the moment when the databases become active or inactive.
+
+When a user port operates in standalone mode, its driver should configure it to
+use a separate database called a port private database. This is different from
+the databases described above, and should impede operation as standalone port
+(packet in, packet out to the CPU port) as little as possible. For example,
+on ingress, it should not attempt to learn the MAC SA of ingress traffic, since
+learning is a bridging layer service and this is a standalone port, therefore
+it would consume useless space. With no address learning, the port private
+database should be empty in a naive implementation, and in this case, all
+received packets should be trivially flooded to the CPU port.
+
+DSA (cascade) and CPU ports are also called "shared" ports because they service
+multiple address databases, and the database that a packet should be associated
+to is usually embedded in the DSA tag. This means that the CPU port may
+simultaneously transport packets coming from a standalone port (which were
+classified by hardware in one address database), and from a bridge port (which
+were classified to a different address database).
+
+Switch drivers which satisfy certain criteria are able to optimize the naive
+configuration by removing the CPU port from the flooding domain of the switch,
+and just program the hardware with FDB entries pointing towards the CPU port
+for which it is known that software is interested in those MAC addresses.
+Packets which do not match a known FDB entry will not be delivered to the CPU,
+which will save CPU cycles required for creating an skb just to drop it.
+
+DSA is able to perform host address filtering for the following kinds of
+addresses:
+
+- Primary unicast MAC addresses of ports (``dev->dev_addr``). These are
+ associated with the port private database of the respective user port,
+ and the driver is notified to install them through ``port_fdb_add`` towards
+ the CPU port.
+
+- Secondary unicast and multicast MAC addresses of ports (addresses added
+ through ``dev_uc_add()`` and ``dev_mc_add()``). These are also associated
+ with the port private database of the respective user port.
+
+- Local/permanent bridge FDB entries (``BR_FDB_LOCAL``). These are the MAC
+ addresses of the bridge ports, for which packets must be terminated locally
+ and not forwarded. They are associated with the address database for that
+ bridge.
+
+- Static bridge FDB entries installed towards foreign (non-DSA) interfaces
+ present in the same bridge as some DSA switch ports. These are also
+ associated with the address database for that bridge.
+
+- Dynamically learned FDB entries on foreign interfaces present in the same
+ bridge as some DSA switch ports, only if ``ds->assisted_learning_on_cpu_port``
+ is set to true by the driver. These are associated with the address database
+ for that bridge.
+
+For various operations detailed below, DSA provides a ``dsa_db`` structure
+which can be of the following types:
+
+- ``DSA_DB_PORT``: the FDB (or MDB) entry to be installed or deleted belongs to
+ the port private database of user port ``db->dp``.
+- ``DSA_DB_BRIDGE``: the entry belongs to one of the address databases of bridge
+ ``db->bridge``. Separation between the VLAN-unaware database and the per-VID
+ databases of this bridge is expected to be done by the driver.
+- ``DSA_DB_LAG``: the entry belongs to the address database of LAG ``db->lag``.
+ Note: ``DSA_DB_LAG`` is currently unused and may be removed in the future.
+
+The drivers which act upon the ``dsa_db`` argument in ``port_fdb_add``,
+``port_mdb_add`` etc should declare ``ds->fdb_isolation`` as true.
+
+DSA associates each offloaded bridge and each offloaded LAG with a one-based ID
+(``struct dsa_bridge :: num``, ``struct dsa_lag :: id``) for the purposes of
+refcounting addresses on shared ports. Drivers may piggyback on DSA's numbering
+scheme (the ID is readable through ``db->bridge.num`` and ``db->lag.id`` or may
+implement their own.
+
+Only the drivers which declare support for FDB isolation are notified of FDB
+entries on the CPU port belonging to ``DSA_DB_PORT`` databases.
+For compatibility/legacy reasons, ``DSA_DB_BRIDGE`` addresses are notified to
+drivers even if they do not support FDB isolation. However, ``db->bridge.num``
+and ``db->lag.id`` are always set to 0 in that case (to denote the lack of
+isolation, for refcounting purposes).
+
+Note that it is not mandatory for a switch driver to implement physically
+separate address databases for each standalone user port. Since FDB entries in
+the port private databases will always point to the CPU port, there is no risk
+for incorrect forwarding decisions. In this case, all standalone ports may
+share the same database, but the reference counting of host-filtered addresses
+(not deleting the FDB entry for a port's MAC address if it's still in use by
+another port) becomes the responsibility of the driver, because DSA is unaware
+that the port databases are in fact shared. This can be achieved by calling
+``dsa_fdb_present_in_other_db()`` and ``dsa_mdb_present_in_other_db()``.
+The down side is that the RX filtering lists of each user port are in fact
+shared, which means that user port A may accept a packet with a MAC DA it
+shouldn't have, only because that MAC address was in the RX filtering list of
+user port B. These packets will still be dropped in software, however.
+
Bridge layer
------------
+Offloading the bridge forwarding plane is optional and handled by the methods
+below. They may be absent, return -EOPNOTSUPP, or ``ds->max_num_bridges`` may
+be non-zero and exceeded, and in this case, joining a bridge port is still
+possible, but the packet forwarding will take place in software, and the ports
+under a software bridge must remain configured in the same way as for
+standalone operation, i.e. have all bridging service functions (address
+learning etc) disabled, and send all received packets to the CPU port only.
+
+Concretely, a port starts offloading the forwarding plane of a bridge once it
+returns success to the ``port_bridge_join`` method, and stops doing so after
+``port_bridge_leave`` has been called. Offloading the bridge means autonomously
+learning FDB entries in accordance with the software bridge port's state, and
+autonomously forwarding (or flooding) received packets without CPU intervention.
+This is optional even when offloading a bridge port. Tagging protocol drivers
+are expected to call ``dsa_default_offload_fwd_mark(skb)`` for packets which
+have already been autonomously forwarded in the forwarding domain of the
+ingress switch port. DSA, through ``dsa_port_devlink_setup()``, considers all
+switch ports part of the same tree ID to be part of the same bridge forwarding
+domain (capable of autonomous forwarding to each other).
+
+Offloading the TX forwarding process of a bridge is a distinct concept from
+simply offloading its forwarding plane, and refers to the ability of certain
+driver and tag protocol combinations to transmit a single skb coming from the
+bridge device's transmit function to potentially multiple egress ports (and
+thereby avoid its cloning in software).
+
+Packets for which the bridge requests this behavior are called data plane
+packets and have ``skb->offload_fwd_mark`` set to true in the tag protocol
+driver's ``xmit`` function. Data plane packets are subject to FDB lookup,
+hardware learning on the CPU port, and do not override the port STP state.
+Additionally, replication of data plane packets (multicast, flooding) is
+handled in hardware and the bridge driver will transmit a single skb for each
+packet that may or may not need replication.
+
+When the TX forwarding offload is enabled, the tag protocol driver is
+responsible to inject packets into the data plane of the hardware towards the
+correct bridging domain (FID) that the port is a part of. The port may be
+VLAN-unaware, and in this case the FID must be equal to the FID used by the
+driver for its VLAN-unaware address database associated with that bridge.
+Alternatively, the bridge may be VLAN-aware, and in that case, it is guaranteed
+that the packet is also VLAN-tagged with the VLAN ID that the bridge processed
+this packet in. It is the responsibility of the hardware to untag the VID on
+the egress-untagged ports, or keep the tag on the egress-tagged ones.
+
- ``port_bridge_join``: bridge layer function invoked when a given switch port is
added to a bridge, this function should do what's necessary at the switch
level to permit the joining port to be added to the relevant logical
domain for it to ingress/egress traffic with other members of the bridge.
+ By setting the ``tx_fwd_offload`` argument to true, the TX forwarding process
+ of this bridge is also offloaded.
- ``port_bridge_leave``: bridge layer function invoked when a given switch port is
removed from a bridge, this function should do what's necessary at the
switch level to deny the leaving port from ingress/egress traffic from the
- remaining bridge members. When the port leaves the bridge, it should be aged
- out at the switch hardware for the switch to (re) learn MAC addresses behind
- this port.
+ remaining bridge members.
- ``port_stp_state_set``: bridge layer function invoked when a given switch port STP
state is computed by the bridge layer and should be propagated to switch
- hardware to forward/block/learn traffic. The switch driver is responsible for
- computing a STP state change based on current and asked parameters and perform
- the relevant ageing based on the intersection results
+ hardware to forward/block/learn traffic.
- ``port_bridge_flags``: bridge layer function invoked when a port must
configure its settings for e.g. flooding of unknown traffic or source address
CPU port, and flooding towards the CPU port should also be enabled, due to a
lack of an explicit address filtering mechanism in the DSA core.
-- ``port_bridge_tx_fwd_offload``: bridge layer function invoked after
- ``port_bridge_join`` when a driver sets ``ds->num_fwd_offloading_bridges`` to
- a non-zero value. Returning success in this function activates the TX
- forwarding offload bridge feature for this port, which enables the tagging
- protocol driver to inject data plane packets towards the bridging domain that
- the port is a part of. Data plane packets are subject to FDB lookup, hardware
- learning on the CPU port, and do not override the port STP state.
- Additionally, replication of data plane packets (multicast, flooding) is
- handled in hardware and the bridge driver will transmit a single skb for each
- packet that needs replication. The method is provided as a configuration
- point for drivers that need to configure the hardware for enabling this
- feature.
-
-- ``port_bridge_tx_fwd_unoffload``: bridge layer function invoked when a driver
- leaves a bridge port which had the TX forwarding offload feature enabled.
+- ``port_fast_age``: bridge layer function invoked when flushing the
+ dynamically learned FDB entries on the port is necessary. This is called when
+ transitioning from an STP state where learning should take place to an STP
+ state where it shouldn't, or when leaving a bridge, or when address learning
+ is turned off via ``port_bridge_flags``.
Bridge VLAN filtering
---------------------
allowed.
- ``port_vlan_add``: bridge layer function invoked when a VLAN is configured
- (tagged or untagged) for the given switch port. If the operation is not
- supported by the hardware, this function should return ``-EOPNOTSUPP`` to
- inform the bridge code to fallback to a software implementation.
+ (tagged or untagged) for the given switch port. The CPU port becomes a member
+ of a VLAN only if a foreign bridge port is also a member of it (and
+ forwarding needs to take place in software), or the VLAN is installed to the
+ VLAN group of the bridge device itself, for termination purposes
+ (``bridge vlan add dev br0 vid 100 self``). VLANs on shared ports are
+ reference counted and removed when there is no user left. Drivers do not need
+ to manually install a VLAN on the CPU port.
- ``port_vlan_del``: bridge layer function invoked when a VLAN is removed from the
given switch port
-- ``port_vlan_dump``: bridge layer function invoked with a switchdev callback
- function that the driver has to call for each VLAN the given port is a member
- of. A switchdev object is used to carry the VID and bridge flags.
-
- ``port_fdb_add``: bridge layer function invoked when the bridge wants to install a
Forwarding Database entry, the switch hardware should be programmed with the
specified address in the specified VLAN Id in the forwarding database
- associated with this VLAN ID. If the operation is not supported, this
- function should return ``-EOPNOTSUPP`` to inform the bridge code to fallback to
- a software implementation.
-
-.. note:: VLAN ID 0 corresponds to the port private database, which, in the context
- of DSA, would be its port-based VLAN, used by the associated bridge device.
+ associated with this VLAN ID.
- ``port_fdb_del``: bridge layer function invoked when the bridge wants to remove a
Forwarding Database entry, the switch hardware should be programmed to delete
the specified MAC address from the specified VLAN ID if it was mapped into
this port forwarding database
-- ``port_fdb_dump``: bridge layer function invoked with a switchdev callback
- function that the driver has to call for each MAC address known to be behind
- the given port. A switchdev object is used to carry the VID and FDB info.
+- ``port_fdb_dump``: bridge bypass function invoked by ``ndo_fdb_dump`` on the
+ physical DSA port interfaces. Since DSA does not attempt to keep in sync its
+ hardware FDB entries with the software bridge, this method is implemented as
+ a means to view the entries visible on user ports in the hardware database.
+ The entries reported by this function have the ``self`` flag in the output of
+ the ``bridge fdb show`` command.
- ``port_mdb_add``: bridge layer function invoked when the bridge wants to install
- a multicast database entry. If the operation is not supported, this function
- should return ``-EOPNOTSUPP`` to inform the bridge code to fallback to a
- software implementation. The switch hardware should be programmed with the
+ a multicast database entry. The switch hardware should be programmed with the
specified address in the specified VLAN ID in the forwarding database
associated with this VLAN ID.
-.. note:: VLAN ID 0 corresponds to the port private database, which, in the context
- of DSA, would be its port-based VLAN, used by the associated bridge device.
-
- ``port_mdb_del``: bridge layer function invoked when the bridge wants to remove a
multicast database entry, the switch hardware should be programmed to delete
the specified MAC address from the specified VLAN ID if it was mapped into
this port forwarding database.
-- ``port_mdb_dump``: bridge layer function invoked with a switchdev callback
- function that the driver has to call for each MAC address known to be behind
- the given port. A switchdev object is used to carry the VID and MDB info.
-
Link aggregation
----------------
Default: 4K
udp_wmem_min - INTEGER
- Minimal size of send buffer used by UDP sockets in moderation.
- Each UDP socket is able to use the size for sending data, even if
- total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
-
- Default: 4K
+ UDP does not have tx memory accounting and this tunable has no effect.
RAW variables
=============
cipso_cache_bucket_size - INTEGER
The CIPSO label cache consists of a fixed size hash table with each
hash bucket containing a number of cache entries. This variable limits
- the number of entries in each hash bucket; the larger the value the
+ the number of entries in each hash bucket; the larger the value is, the
more CIPSO label mappings that can be cached. When the number of
entries in a given hash bucket reaches this limit adding new entries
causes the oldest entry in the bucket to be removed to make room.
option should only be set by experts.
Default: 0
-ip_dynaddr - BOOLEAN
+ip_dynaddr - INTEGER
If set non-zero, enables support for dynamic addresses.
If set to a non-zero value larger than 1, a kernel log
message will be printed when dynamic address rewriting
Default: 4K
sctp_wmem - vector of 3 INTEGERs: min, default, max
- Currently this tunable has no effect.
+ Only the first value ("min") is used, "default" and "max" are
+ ignored.
+
+ min: Minimum size of send buffer that can be used by SCTP sockets.
+ It is guaranteed to each SCTP socket (but not association) even
+ under moderate memory pressure.
+
+ Default: 4K
addr_scope_policy - INTEGER
Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00
Default: 0
+reconf_enable - BOOLEAN
+ Enable or disable extension of Stream Reconfiguration functionality
+ specified in RFC6525. This extension provides the ability to "reset"
+ a stream, and it includes the Parameters of "Outgoing/Incoming SSN
+ Reset", "SSN/TSN Reset" and "Add Outgoing/Incoming Streams".
+
+ - 1: Enable extension.
+ - 0: Disable extension.
+
+ Default: 0
+
+intl_enable - BOOLEAN
+ Enable or disable extension of User Message Interleaving functionality
+ specified in RFC8260. This extension allows the interleaving of user
+ messages sent on different streams. With this feature enabled, I-DATA
+ chunk will replace DATA chunk to carry user messages if also supported
+ by the peer. Note that to use this feature, one needs to set this option
+ to 1 and also needs to set socket options SCTP_FRAGMENT_INTERLEAVE to 2
+ and SCTP_INTERLEAVING_SUPPORTED to 1.
+
+ - 1: Enable extension.
+ - 0: Disable extension.
+
+ Default: 0
+
+ecn_enable - BOOLEAN
+ Control use of Explicit Congestion Notification (ECN) by SCTP.
+ Like in TCP, ECN is used only when both ends of the SCTP connection
+ indicate support for it. This feature is useful in avoiding losses
+ due to congestion by allowing supporting routers to signal congestion
+ before having to drop packets.
+
+ 1: Enable ecn.
+ 0: Disable ecn.
+
+ Default: 1
+
``/proc/sys/net/core/*``
========================
* PHY device drivers in PHYLIB being reusable by nature, being able to
configure correctly a specified delay enables more designs with similar delay
- requirements to be operate correctly
+ requirements to be operated correctly
For cases where the PHY is not capable of providing this delay, but the
Ethernet MAC driver is capable of doing so, the correct phy_interface_t value
GNU C 5.1 gcc --version
Clang/LLVM (optional) 11.0.0 clang --version
GNU make 3.81 make --version
+bash 4.2 bash --version
binutils 2.23 ld -v
flex 2.5.35 flex --version
bison 2.0 bison --version
You will need GNU make 3.81 or later to build the kernel.
+Bash
+----
+
+Some bash scripts are used for the kernel build.
+Bash 4.2 or newer is needed.
+
Binutils
--------
- <ftp://ftp.gnu.org/gnu/make/>
+Bash
+----
+
+- <ftp://ftp.gnu.org/gnu/bash/>
+
Binutils
--------
netdev FAQ
==========
+tl;dr
+-----
+
+ - designate your patch to a tree - ``[PATCH net]`` or ``[PATCH net-next]``
+ - for fixes the ``Fixes:`` tag is required, regardless of the tree
+ - don't post large series (> 15 patches), break them up
+ - don't repost your patches within one 24h period
+ - reverse xmas tree
+
What is netdev?
---------------
It is a mailing list for all network-related Linux stuff. This
version that should be applied. If there is any doubt, the maintainer
will reply and ask what should be done.
+How do I divide my work into patches?
+-------------------------------------
+
+Put yourself in the shoes of the reviewer. Each patch is read separately
+and therefore should constitute a comprehensible step towards your stated
+goal.
+
+Avoid sending series longer than 15 patches. Larger series takes longer
+to review as reviewers will defer looking at it until they find a large
+chunk of time. A small series can be reviewed in a short time, so Maintainers
+just do it. As a result, a sequence of smaller series gets merged quicker and
+with better review coverage. Re-posting large series also increases the mailing
+list traffic.
+
I made changes to only a few patches in a patch series should I resend only those changed?
------------------------------------------------------------------------------------------
No, please resend the entire patch series and make sure you do number your
* another line of text
*/
+What is "reverse xmas tree"?
+----------------------------
+
+Netdev has a convention for ordering local variables in functions.
+Order the variable declaration lines longest to shortest, e.g.::
+
+ struct scatterlist *sg;
+ struct sk_buff *skb;
+ int err, i;
+
+If there are dependencies between the variables preventing the ordering
+move the initialization out of line.
+
I am working in existing code which uses non-standard formatting. Which formatting should I use?
------------------------------------------------------------------------------------------------
Make your code follow the most recent guidelines, so that eventually all code
====
AC97 is a five wire interface commonly found on many PC sound cards. It is
-now also popular in many portable devices. This DAI has a reset line and time
+now also popular in many portable devices. This DAI has a RESET line and time
multiplexes its data on its SDATA_OUT (playback) and SDATA_IN (capture) lines.
The bit clock (BCLK) is always driven by the CODEC (usually 12.288MHz) and the
frame (FRAME) (usually 48kHz) is always driven by the controller. Each AC97
rappresentata dalla struttura ``kernel_symbol`` che avrà il campo
``namespace`` (spazio dei nomi) impostato. Un simbolo esportato senza uno spazio
dei nomi avrà questo campo impostato a ``NULL``. Non esiste uno spazio dei nomi
-di base. Il programma ``modpost`` e il codice in kernel/module.c usano lo spazio
-dei nomi, rispettivamente, durante la compilazione e durante il caricamento
-di un modulo.
+di base. Il programma ``modpost`` e il codice in kernel/module/main.c usano lo
+spazio dei nomi, rispettivamente, durante la compilazione e durante il
+caricamento di un modulo.
2.2 Usare il simbolo di preprocessore DEFAULT_SYMBOL_NAMESPACE
==============================================================
模块接口支持
------------
-更多信息请参考文件kernel/module.c。
+更多信息请参阅kernel/module/目录下的文件。
硬件接口
========
相应的 ksymtab 条目结构体 ``kernel_symbol`` 将有相应的成员 ``命名空间`` 集。
导出时未指明命名空间的符号将指向 ``NULL`` 。如果没有定义命名空间,则默认没有。
-``modpost`` 和kernel/module.c分别在构建时或模块加载时使用名称空间。
+``modpost`` 和kernel/module/main.c分别在构建时或模块加载时使用名称空间。
2.2 使用DEFAULT_SYMBOL_NAMESPACE定义
====================================
``$r23``-``$r31`` ``$s0``-``$s8`` 静态寄存器 是
================= =============== =================== ==========
-注意:``$r21``寄存器在ELF psABI中保留未使用,但是在Linux内核用于保存每CPU
-变量基地址。该寄存器没有ABI命名,不过在内核中称为``$u0``。在一些遗留代码
-中有时可能见到``$v0``和``$v1``,它们是``$a0``和``$a1``的别名,属于已经废弃
-的用法。
+.. note::
+ 注意: ``$r21`` 寄存器在ELF psABI中保留未使用,但是在Linux内核用于保
+ 存每CPU变量基地址。该寄存器没有ABI命名,不过在内核中称为 ``$u0`` 。在
+ 一些遗留代码中有时可能见到 ``$v0`` 和 ``$v1`` ,它们是 ``$a0`` 和
+ ``$a1`` 的别名,属于已经废弃的用法。
浮点寄存器
----------
``$f24``-``$f31`` ``$fs0``-``$fs7`` 静态寄存器 是
================= ================== =================== ==========
-注意:在一些遗留代码中有时可能见到 ``$v0`` 和 ``$v1`` ,它们是 ``$a0``
-和 ``$a1`` 的别名,属于已经废弃的用法。
+.. note::
+ 注意:在一些遗留代码中有时可能见到 ``$v0`` 和 ``$v1`` ,它们是
+ ``$a0`` 和 ``$a1`` 的别名,属于已经废弃的用法。
向量寄存器
https://github.com/loongson/LoongArch-Documentation/releases/latest/download/Loongson-7A1000-usermanual-2.00-EN.pdf (英文版)
-注:CPUINTC即《龙芯架构参考手册卷一》第7.4节所描述的CSR.ECFG/CSR.ESTAT寄存器及其中断
-控制逻辑;LIOINTC即《龙芯3A5000处理器使用手册》第11.1节所描述的“传统I/O中断”;EIOINTC
-即《龙芯3A5000处理器使用手册》第11.2节所描述的“扩展I/O中断”;HTVECINTC即《龙芯3A5000
-处理器使用手册》第14.3节所描述的“HyperTransport中断”;PCH-PIC/PCH-MSI即《龙芯7A1000桥
-片用户手册》第5章所描述的“中断控制器”;PCH-LPC即《龙芯7A1000桥片用户手册》第24.3节所
-描述的“LPC中断”。
+.. note::
+ - CPUINTC:即《龙芯架构参考手册卷一》第7.4节所描述的CSR.ECFG/CSR.ESTAT寄存器及其
+ 中断控制逻辑;
+ - LIOINTC:即《龙芯3A5000处理器使用手册》第11.1节所描述的“传统I/O中断”;
+ - EIOINTC:即《龙芯3A5000处理器使用手册》第11.2节所描述的“扩展I/O中断”;
+ - HTVECINTC:即《龙芯3A5000处理器使用手册》第14.3节所描述的“HyperTransport中断”;
+ - PCH-PIC/PCH-MSI:即《龙芯7A1000桥片用户手册》第5章所描述的“中断控制器”;
+ - PCH-LPC:即《龙芯7A1000桥片用户手册》第24.3节所描述的“LPC中断”。
# modprobe usbmon
#
-Verify that bus sockets are present:
+Verify that bus sockets are present::
# ls /sys/kernel/debug/usb/usbmon
0s 0u 1s 1t 1u 2s 2t 2u 3s 3t 3u 4s 4t 4u
#define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT)
- #define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES
+ #define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT)
+ #define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_BOOLEAN
#define KVM_STATS_BASE_SHIFT 8
#define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT)
by the ``hist_param`` field. The range of the Nth bucket (1 <= N < ``size``)
is [``hist_param``*(N-1), ``hist_param``*N), while the range of the last
bucket is [``hist_param``*(``size``-1), +INF). (+INF means positive infinity
- value.) The bucket value indicates how many samples fell in the bucket's range.
+ value.)
* ``KVM_STATS_TYPE_LOG_HIST``
The statistic is reported as a logarithmic histogram. The number of
buckets is specified by the ``size`` field. The range of the first bucket is
[0, 1), while the range of the last bucket is [pow(2, ``size``-2), +INF).
Otherwise, The Nth bucket (1 < N < ``size``) covers
- [pow(2, N-2), pow(2, N-1)). The bucket value indicates how many samples fell
- in the bucket's range.
+ [pow(2, N-2), pow(2, N-1)).
Bits 4-7 of ``flags`` encode the unit:
It indicates that the statistics data is used to measure time or latency.
* ``KVM_STATS_UNIT_CYCLES``
It indicates that the statistics data is used to measure CPU clock cycles.
+ * ``KVM_STATS_UNIT_BOOLEAN``
+ It indicates that the statistic will always be either 0 or 1. Boolean
+ statistics of "peak" type will never go back from 1 to 0. Boolean
+ statistics can be linear histograms (with two buckets) but not logarithmic
+ histograms.
+
+Note that, in the case of histograms, the unit applies to the bucket
+ranges, while the bucket value indicates how many samples fell in the
+bucket's range.
Bits 8-11 of ``flags``, together with ``exponent``, encode the scale of the
unit:
The ``bucket_size`` field is used as a parameter for histogram statistics data.
It is only used by linear histogram statistics data, specifying the size of a
-bucket.
+bucket in the unit expressed by bits 4-11 of ``flags`` together with ``exponent``.
The ``name`` field is the name string of the statistics data. The name string
starts at the end of ``struct kvm_stats_desc``. The maximum length including
unpoison-pfn
Software-unpoison page at PFN echoed into this file. This way
a page can be reused again. This only works for Linux
- injected failures, not for real memory failures.
+ injected failures, not for real memory failures. Once any hardware
+ memory failure happens, this feature is disabled.
Note these injection interfaces are not stable and might change between
kernel versions
ACPI VIOT DRIVER
M: Jean-Philippe Brucker <jean-philippe@linaro.org>
L: linux-acpi@vger.kernel.org
-L: iommu@lists.linux-foundation.org
+L: iommu@lists.linux.dev
S: Maintained
F: drivers/acpi/viot.c
F: include/linux/acpi_viot.h
AMD IOMMU (AMD-VI)
M: Joerg Roedel <joro@8bytes.org>
R: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
-L: iommu@lists.linux-foundation.org
+L: iommu@lists.linux.dev
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
F: drivers/iommu/amd/
AMD XGBE DRIVER
M: Tom Lendacky <thomas.lendacky@amd.com>
+M: "Shyam Sundar S K" <Shyam-sundar.S-k@amd.com>
L: netdev@vger.kernel.org
S: Supported
F: arch/arm64/boot/dts/amd/amd-seattle-xgbe*.dtsi
ARM HDLCD DRM DRIVER
M: Liviu Dudau <liviu.dudau@arm.com>
S: Supported
-F: Documentation/devicetree/bindings/display/arm,hdlcd.txt
+F: Documentation/devicetree/bindings/display/arm,hdlcd.yaml
F: drivers/gpu/drm/arm/hdlcd_*
ARM INTEGRATOR, VERSATILE AND REALVIEW SUPPORT
L: Mali DP Maintainers <malidp@foss.arm.com>
S: Supported
T: git git://anongit.freedesktop.org/drm/drm-misc
-F: Documentation/devicetree/bindings/display/arm,komeda.txt
+F: Documentation/devicetree/bindings/display/arm,komeda.yaml
F: Documentation/gpu/komeda-kms.rst
F: drivers/gpu/drm/arm/display/include/
F: drivers/gpu/drm/arm/display/komeda/
L: Mali DP Maintainers <malidp@foss.arm.com>
S: Supported
T: git git://anongit.freedesktop.org/drm/drm-misc
-F: Documentation/devicetree/bindings/display/arm,malidp.txt
+F: Documentation/devicetree/bindings/display/arm,malidp.yaml
F: Documentation/gpu/afbc.rst
F: drivers/gpu/drm/arm/
S: Maintained
T: git git://github.com/ulli-kroll/linux.git
F: Documentation/devicetree/bindings/arm/gemini.yaml
-F: Documentation/devicetree/bindings/net/cortina,gemini-ethernet.txt
+F: Documentation/devicetree/bindings/net/cortina,gemini-ethernet.yaml
F: Documentation/devicetree/bindings/pinctrl/cortina,gemini-pinctrl.txt
F: Documentation/devicetree/bindings/rtc/faraday,ftrtc010.yaml
F: arch/arm/boot/dts/gemini*
M: Chester Lin <clin@suse.com>
R: Andreas Färber <afaerber@suse.de>
R: Matthias Brugger <mbrugger@suse.com>
+R: NXP S32 Linux Team <s32@nxp.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
F: arch/arm64/boot/dts/freescale/s32g*.dts*
N: oxnas
ARM/PALM TREO SUPPORT
-M: Tomas Cech <sleep_walker@suse.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-S: Maintained
-W: http://hackndev.com
+S: Orphan
F: arch/arm/mach-pxa/palmtreo.*
ARM/PALMTX,PALMT5,PALMLD,PALMTE2,PALMTC SUPPORT
ARM/QUALCOMM SUPPORT
M: Andy Gross <agross@kernel.org>
M: Bjorn Andersson <bjorn.andersson@linaro.org>
+R: Konrad Dybcio <konrad.dybcio@somainline.org>
L: linux-arm-msm@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/qcom/linux.git
F: Documentation/devicetree/bindings/iio/accel/bosch,bma400.yaml
F: drivers/iio/accel/bma400*
-BPF (Safe dynamic programs and tools)
+BPF [GENERAL] (Safe Dynamic Programs and Tools)
M: Alexei Starovoitov <ast@kernel.org>
M: Daniel Borkmann <daniel@iogearbox.net>
M: Andrii Nakryiko <andrii@kernel.org>
-R: Martin KaFai Lau <kafai@fb.com>
-R: Song Liu <songliubraving@fb.com>
+R: Martin KaFai Lau <martin.lau@linux.dev>
+R: Song Liu <song@kernel.org>
R: Yonghong Song <yhs@fb.com>
R: John Fastabend <john.fastabend@gmail.com>
R: KP Singh <kpsingh@kernel.org>
-L: netdev@vger.kernel.org
+R: Stanislav Fomichev <sdf@google.com>
+R: Hao Luo <haoluo@google.com>
+R: Jiri Olsa <jolsa@kernel.org>
L: bpf@vger.kernel.org
S: Supported
W: https://bpf.io/
F: tools/bpf/
F: tools/lib/bpf/
F: tools/testing/selftests/bpf/
-N: bpf
-K: bpf
BPF JIT for ARM
M: Shubham Bansal <illusionist.neo@gmail.com>
-L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
-S: Maintained
+S: Odd Fixes
F: arch/arm/net/
BPF JIT for ARM64
M: Daniel Borkmann <daniel@iogearbox.net>
M: Alexei Starovoitov <ast@kernel.org>
M: Zi Shen Lim <zlim.lnx@gmail.com>
-L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
S: Supported
F: arch/arm64/net/
BPF JIT for MIPS (32-BIT AND 64-BIT)
M: Johan Almbladh <johan.almbladh@anyfinetworks.com>
M: Paul Burton <paulburton@kernel.org>
-L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
S: Maintained
F: arch/mips/net/
BPF JIT for NFP NICs
M: Jakub Kicinski <kuba@kernel.org>
-L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
-S: Supported
+S: Odd Fixes
F: drivers/net/ethernet/netronome/nfp/bpf/
BPF JIT for POWERPC (32-BIT AND 64-BIT)
M: Naveen N. Rao <naveen.n.rao@linux.ibm.com>
-L: netdev@vger.kernel.org
+M: Michael Ellerman <mpe@ellerman.id.au>
L: bpf@vger.kernel.org
-S: Maintained
+S: Supported
F: arch/powerpc/net/
BPF JIT for RISC-V (32-bit)
M: Luke Nelson <luke.r.nels@gmail.com>
M: Xi Wang <xi.wang@gmail.com>
-L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
S: Maintained
F: arch/riscv/net/
BPF JIT for RISC-V (64-bit)
M: Björn Töpel <bjorn@kernel.org>
-L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
S: Maintained
F: arch/riscv/net/
M: Ilya Leoshkevich <iii@linux.ibm.com>
M: Heiko Carstens <hca@linux.ibm.com>
M: Vasily Gorbik <gor@linux.ibm.com>
-L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
-S: Maintained
+S: Supported
F: arch/s390/net/
X: arch/s390/net/pnet.c
BPF JIT for SPARC (32-BIT AND 64-BIT)
M: David S. Miller <davem@davemloft.net>
-L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
-S: Maintained
+S: Odd Fixes
F: arch/sparc/net/
BPF JIT for X86 32-BIT
M: Wang YanQing <udknight@gmail.com>
-L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
-S: Maintained
+S: Odd Fixes
F: arch/x86/net/bpf_jit_comp32.c
BPF JIT for X86 64-BIT
M: Alexei Starovoitov <ast@kernel.org>
M: Daniel Borkmann <daniel@iogearbox.net>
-L: netdev@vger.kernel.org
L: bpf@vger.kernel.org
S: Supported
F: arch/x86/net/
X: arch/x86/net/bpf_jit_comp32.c
-BPF LSM (Security Audit and Enforcement using BPF)
+BPF [CORE]
+M: Alexei Starovoitov <ast@kernel.org>
+M: Daniel Borkmann <daniel@iogearbox.net>
+R: John Fastabend <john.fastabend@gmail.com>
+L: bpf@vger.kernel.org
+S: Maintained
+F: kernel/bpf/verifier.c
+F: kernel/bpf/tnum.c
+F: kernel/bpf/core.c
+F: kernel/bpf/syscall.c
+F: kernel/bpf/dispatcher.c
+F: kernel/bpf/trampoline.c
+F: include/linux/bpf*
+F: include/linux/filter.h
+
+BPF [BTF]
+M: Martin KaFai Lau <martin.lau@linux.dev>
+L: bpf@vger.kernel.org
+S: Maintained
+F: kernel/bpf/btf.c
+F: include/linux/btf*
+
+BPF [TRACING]
+M: Song Liu <song@kernel.org>
+R: Jiri Olsa <jolsa@kernel.org>
+L: bpf@vger.kernel.org
+S: Maintained
+F: kernel/trace/bpf_trace.c
+F: kernel/bpf/stackmap.c
+
+BPF [NETWORKING] (tc BPF, sock_addr)
+M: Martin KaFai Lau <martin.lau@linux.dev>
+M: Daniel Borkmann <daniel@iogearbox.net>
+R: John Fastabend <john.fastabend@gmail.com>
+L: bpf@vger.kernel.org
+L: netdev@vger.kernel.org
+S: Maintained
+F: net/core/filter.c
+F: net/sched/act_bpf.c
+F: net/sched/cls_bpf.c
+
+BPF [NETWORKING] (struct_ops, reuseport)
+M: Martin KaFai Lau <martin.lau@linux.dev>
+L: bpf@vger.kernel.org
+L: netdev@vger.kernel.org
+S: Maintained
+F: kernel/bpf/bpf_struct*
+
+BPF [SECURITY & LSM] (Security Audit and Enforcement using BPF)
M: KP Singh <kpsingh@kernel.org>
R: Florent Revest <revest@chromium.org>
R: Brendan Jackman <jackmanb@chromium.org>
F: kernel/bpf/bpf_lsm.c
F: security/bpf/
+BPF [STORAGE & CGROUPS]
+M: Martin KaFai Lau <martin.lau@linux.dev>
+L: bpf@vger.kernel.org
+S: Maintained
+F: kernel/bpf/cgroup.c
+F: kernel/bpf/*storage.c
+F: kernel/bpf/bpf_lru*
+
+BPF [RINGBUF]
+M: Andrii Nakryiko <andrii@kernel.org>
+L: bpf@vger.kernel.org
+S: Maintained
+F: kernel/bpf/ringbuf.c
+
+BPF [ITERATOR]
+M: Yonghong Song <yhs@fb.com>
+L: bpf@vger.kernel.org
+S: Maintained
+F: kernel/bpf/*iter.c
+
+BPF [L7 FRAMEWORK] (sockmap)
+M: John Fastabend <john.fastabend@gmail.com>
+M: Jakub Sitnicki <jakub@cloudflare.com>
+L: netdev@vger.kernel.org
+L: bpf@vger.kernel.org
+S: Maintained
+F: include/linux/skmsg.h
+F: net/core/skmsg.c
+F: net/core/sock_map.c
+F: net/ipv4/tcp_bpf.c
+F: net/ipv4/udp_bpf.c
+F: net/unix/unix_bpf.c
+
+BPF [LIBRARY] (libbpf)
+M: Andrii Nakryiko <andrii@kernel.org>
+L: bpf@vger.kernel.org
+S: Maintained
+F: tools/lib/bpf/
+
+BPF [TOOLING] (bpftool)
+M: Quentin Monnet <quentin@isovalent.com>
+L: bpf@vger.kernel.org
+S: Maintained
+F: kernel/bpf/disasm.*
+F: tools/bpf/bpftool/
+
+BPF [SELFTESTS] (Test Runners & Infrastructure)
+M: Andrii Nakryiko <andrii@kernel.org>
+R: Mykola Lysenko <mykolal@fb.com>
+L: bpf@vger.kernel.org
+S: Maintained
+F: tools/testing/selftests/bpf/
+
+BPF [MISC]
+L: bpf@vger.kernel.org
+S: Odd Fixes
+K: (?:\b|_)bpf(?:\b|_)
+
BROADCOM B44 10/100 ETHERNET DRIVER
M: Michael Chan <michael.chan@broadcom.com>
L: netdev@vger.kernel.org
N: bcm[9]?47622
BROADCOM BCM2711/BCM2835 ARM ARCHITECTURE
-M: Nicolas Saenz Julienne <nsaenz@kernel.org>
+M: Florian Fainelli <f.fainelli@gmail.com>
R: Broadcom internal kernel review list <bcm-kernel-feedback-list@broadcom.com>
L: linux-rpi-kernel@lists.infradead.org (moderated for non-subscribers)
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
-T: git git://git.kernel.org/pub/scm/linux/kernel/git/nsaenz/linux-rpi.git
+T: git git://github.com/broadcom/stblinux.git
F: Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml
F: drivers/pci/controller/pcie-brcmstb.c
F: drivers/staging/vc04_services
T: git git://git.kernel.org/pub/scm/linux/kernel/git/clk/linux.git
F: Documentation/devicetree/bindings/clock/
F: drivers/clk/
+F: include/dt-bindings/clock/
F: include/linux/clk-pr*
F: include/linux/clk/
F: include/linux/of_clk.h
M: Alison Schofield <alison.schofield@intel.com>
M: Vishal Verma <vishal.l.verma@intel.com>
M: Ira Weiny <ira.weiny@intel.com>
-M: Ben Widawsky <ben.widawsky@intel.com>
+M: Ben Widawsky <bwidawsk@kernel.org>
M: Dan Williams <dan.j.williams@intel.com>
L: linux-cxl@vger.kernel.org
S: Maintained
M: Christoph Hellwig <hch@lst.de>
M: Marek Szyprowski <m.szyprowski@samsung.com>
R: Robin Murphy <robin.murphy@arm.com>
-L: iommu@lists.linux-foundation.org
+L: iommu@lists.linux.dev
S: Supported
W: http://git.infradead.org/users/hch/dma-mapping.git
T: git git://git.infradead.org/users/hch/dma-mapping.git
DMA MAPPING BENCHMARK
M: Xiang Chen <chenxiang66@hisilicon.com>
-L: iommu@lists.linux-foundation.org
+L: iommu@lists.linux.dev
F: kernel/dma/map_benchmark.c
F: tools/testing/selftests/dma/
L: linux-media@vger.kernel.org
S: Maintained
T: git git://linuxtv.org/media_tree.git
-F: Documentation/devicetree/bindings/media/i2c/dongwoon,dw9807-vcm.txt
+F: Documentation/devicetree/bindings/media/i2c/dongwoon,dw9807-vcm.yaml
F: drivers/media/i2c/dw9807-vcm.c
DOUBLETALK DRIVER
EXYNOS SYSMMU (IOMMU) driver
M: Marek Szyprowski <m.szyprowski@samsung.com>
-L: iommu@lists.linux-foundation.org
+L: iommu@lists.linux.dev
S: Maintained
F: drivers/iommu/exynos-iommu.c
FILE LOCKING (flock() and fcntl()/lockf())
M: Jeff Layton <jlayton@kernel.org>
+M: Chuck Lever <chuck.lever@oracle.com>
L: linux-fsdevel@vger.kernel.org
S: Maintained
F: fs/fcntl.c
F: Documentation/driver-api/gpio/
F: drivers/gpio/
F: include/asm-generic/gpio.h
+F: include/dt-bindings/gpio/
F: include/linux/gpio.h
F: include/linux/gpio/
F: include/linux/of_gpio.h
M: Dipen Patel <dipenp@nvidia.com>
S: Maintained
F: Documentation/devicetree/bindings/timestamp/
-F: Documentation/hte/
+F: Documentation/driver-api/hte/
F: drivers/hte/
F: include/linux/hte.h
HWPOISON MEMORY FAILURE HANDLING
M: Naoya Horiguchi <naoya.horiguchi@nec.com>
+R: Miaohe Lin <linmiaohe@huawei.com>
L: linux-mm@kvack.org
S: Maintained
F: mm/hwpoison-inject.c
F: Documentation/devicetree/bindings/i2c/i2c.txt
F: Documentation/i2c/
F: drivers/i2c/*
+F: include/dt-bindings/i2c/i2c.h
F: include/linux/i2c-dev.h
F: include/linux/i2c-smbus.h
F: include/linux/i2c.h
F: Documentation/devicetree/bindings/i2c/
F: drivers/i2c/algos/
F: drivers/i2c/busses/
+F: include/dt-bindings/i2c/
I2C-TAOS-EVM DRIVER
M: Jean Delvare <jdelvare@suse.com>
M: Cezary Rojewski <cezary.rojewski@intel.com>
M: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
M: Liam Girdwood <liam.r.girdwood@linux.intel.com>
-M: Jie Yang <yang.jie@linux.intel.com>
+M: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
+M: Bard Liao <yung-chuan.liao@linux.intel.com>
+M: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
+M: Kai Vehmanen <kai.vehmanen@linux.intel.com>
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
S: Supported
F: sound/soc/intel/
INTEL IOMMU (VT-d)
M: David Woodhouse <dwmw2@infradead.org>
M: Lu Baolu <baolu.lu@linux.intel.com>
-L: iommu@lists.linux-foundation.org
+L: iommu@lists.linux.dev
S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
F: drivers/iommu/intel/
IOMMU DRIVERS
M: Joerg Roedel <joro@8bytes.org>
M: Will Deacon <will@kernel.org>
-L: iommu@lists.linux-foundation.org
+L: iommu@lists.linux.dev
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git
F: Documentation/devicetree/bindings/iommu/
KERNEL NFSD, SUNRPC, AND LOCKD SERVERS
M: Chuck Lever <chuck.lever@oracle.com>
+M: Jeff Layton <jlayton@kernel.org>
L: linux-nfs@vger.kernel.org
S: Supported
W: http://nfs.sourceforge.net/
R: James Morse <james.morse@arm.com>
R: Alexandru Elisei <alexandru.elisei@arm.com>
R: Suzuki K Poulose <suzuki.poulose@arm.com>
+R: Oliver Upton <oliver.upton@linux.dev>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: kvmarm@lists.cs.columbia.edu (moderated for non-subscribers)
S: Maintained
F: arch/riscv/include/uapi/asm/kvm*
F: arch/riscv/kvm/
F: tools/testing/selftests/kvm/*/riscv/
-F: tools/testing/selftests/kvm/riscv/
KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
M: Christian Borntraeger <borntraeger@linux.ibm.com>
F: tools/testing/selftests/kvm/s390x/
KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86)
+M: Sean Christopherson <seanjc@google.com>
M: Paolo Bonzini <pbonzini@redhat.com>
-R: Sean Christopherson <seanjc@google.com>
-R: Vitaly Kuznetsov <vkuznets@redhat.com>
-R: Wanpeng Li <wanpengli@tencent.com>
-R: Jim Mattson <jmattson@google.com>
-R: Joerg Roedel <joro@8bytes.org>
L: kvm@vger.kernel.org
S: Supported
-W: http://www.linux-kvm.org
T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
F: arch/x86/include/asm/kvm*
-F: arch/x86/include/asm/pvclock-abi.h
F: arch/x86/include/asm/svm.h
F: arch/x86/include/asm/vmx*.h
F: arch/x86/include/uapi/asm/kvm*
F: arch/x86/include/uapi/asm/svm.h
F: arch/x86/include/uapi/asm/vmx.h
-F: arch/x86/kernel/kvm.c
-F: arch/x86/kernel/kvmclock.c
F: arch/x86/kvm/
F: arch/x86/kvm/*/
+KVM PARAVIRT (KVM/paravirt)
+M: Paolo Bonzini <pbonzini@redhat.com>
+R: Wanpeng Li <wanpengli@tencent.com>
+R: Vitaly Kuznetsov <vkuznets@redhat.com>
+L: kvm@vger.kernel.org
+S: Supported
+T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
+F: arch/x86/kernel/kvm.c
+F: arch/x86/kernel/kvmclock.c
+F: arch/x86/include/asm/pvclock-abi.h
+F: include/linux/kvm_para.h
+F: include/uapi/linux/kvm_para.h
+F: include/uapi/asm-generic/kvm_para.h
+F: include/asm-generic/kvm_para.h
+F: arch/um/include/asm/kvm_para.h
+F: arch/x86/include/asm/kvm_para.h
+F: arch/x86/include/uapi/asm/kvm_para.h
+
+KVM X86 HYPER-V (KVM/hyper-v)
+M: Vitaly Kuznetsov <vkuznets@redhat.com>
+M: Sean Christopherson <seanjc@google.com>
+M: Paolo Bonzini <pbonzini@redhat.com>
+L: kvm@vger.kernel.org
+S: Supported
+T: git git://git.kernel.org/pub/scm/virt/kvm/kvm.git
+F: arch/x86/kvm/hyperv.*
+F: arch/x86/kvm/kvm_onhyperv.*
+F: arch/x86/kvm/svm/hyperv.*
+F: arch/x86/kvm/svm/svm_onhyperv.*
+F: arch/x86/kvm/vmx/evmcs.*
+
KERNFS
M: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
M: Tejun Heo <tj@kernel.org>
F: include/net/l3mdev.h
F: net/l3mdev
-L7 BPF FRAMEWORK
-M: John Fastabend <john.fastabend@gmail.com>
-M: Daniel Borkmann <daniel@iogearbox.net>
-M: Jakub Sitnicki <jakub@cloudflare.com>
-L: netdev@vger.kernel.org
-L: bpf@vger.kernel.org
-S: Maintained
-F: include/linux/skmsg.h
-F: net/core/skmsg.c
-F: net/core/sock_map.c
-F: net/ipv4/tcp_bpf.c
-F: net/ipv4/udp_bpf.c
-F: net/unix/unix_bpf.c
-
LANDLOCK SECURITY MODULE
M: Mickaël Salaün <mic@digikod.net>
L: linux-security-module@vger.kernel.org
L: linux-ide@vger.kernel.org
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/libata.git
+F: Documentation/ABI/testing/sysfs-ata
F: Documentation/devicetree/bindings/ata/
F: drivers/ata/
F: include/linux/ata.h
LOONGARCH
M: Huacai Chen <chenhuacai@kernel.org>
R: WANG Xuerui <kernel@xen0n.name>
+L: loongarch@lists.linux.dev
S: Maintained
T: git git://git.kernel.org/pub/scm/linux/kernel/git/chenhuacai/linux-loongson.git
F: arch/loongarch/
MEDIATEK IOMMU DRIVER
M: Yong Wu <yong.wu@mediatek.com>
-L: iommu@lists.linux-foundation.org
+L: iommu@lists.linux.dev
L: linux-mediatek@lists.infradead.org (moderated for non-subscribers)
S: Supported
F: Documentation/devicetree/bindings/iommu/mediatek*
S: Supported
W: http://www.mellanox.com
Q: https://patchwork.kernel.org/project/netdevbpf/list/
-F: drivers/net/ethernet/mellanox/mlx5/core/accel/*
F: drivers/net/ethernet/mellanox/mlx5/core/en_accel/*
F: drivers/net/ethernet/mellanox/mlx5/core/fpga/*
F: include/linux/mlx5/mlx5_ifc_fpga.h
L: linux-mm@kvack.org
S: Maintained
W: http://www.linux-mm.org
-T: quilt https://ozlabs.org/~akpm/mmotm/
-T: quilt https://ozlabs.org/~akpm/mmots/
-T: git git://github.com/hnaz/linux-mm.git
+T: git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
+T: quilt git://git.kernel.org/pub/scm/linux/kernel/git/akpm/25-new
F: include/linux/gfp.h
F: include/linux/memory_hotplug.h
F: include/linux/mm.h
F: mm/
F: tools/testing/selftests/vm/
+MEMORY HOT(UN)PLUG
+M: David Hildenbrand <david@redhat.com>
+M: Oscar Salvador <osalvador@suse.de>
+L: linux-mm@kvack.org
+S: Maintained
+F: Documentation/admin-guide/mm/memory-hotplug.rst
+F: Documentation/core-api/memory-hotplug.rst
+F: drivers/base/memory.c
+F: include/linux/memory_hotplug.h
+F: mm/memory_hotplug.c
+F: tools/testing/selftests/memory-hotplug/
+
MEMORY TECHNOLOGY DEVICES (MTD)
M: Miquel Raynal <miquel.raynal@bootlin.com>
M: Richard Weinberger <richard@nod.at>
F: Documentation/devicetree/bindings/net/
F: drivers/connector/
F: drivers/net/
+F: include/dt-bindings/net/
F: include/linux/etherdevice.h
F: include/linux/fcdevice.h
F: include/linux/fddidevice.h
NETWORKING [TLS]
M: Boris Pismenny <borisp@nvidia.com>
M: John Fastabend <john.fastabend@gmail.com>
-M: Daniel Borkmann <daniel@iogearbox.net>
M: Jakub Kicinski <kuba@kernel.org>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/net/phy/nxp-c45-tja11xx.c
NXP FSPI DRIVER
-M: Ashish Kumar <ashish.kumar@nxp.com>
+M: Han Xu <han.xu@nxp.com>
+M: Haibo Chen <haibo.chen@nxp.com>
R: Yogesh Gaur <yogeshgaur.83@gmail.com>
L: linux-spi@vger.kernel.org
S: Maintained
F: drivers/iio/gyro/fxas21002c_spi.c
NXP i.MX CLOCK DRIVERS
-M: Abel Vesa <abel.vesa@nxp.com>
+M: Abel Vesa <abelvesa@kernel.org>
L: linux-clk@vger.kernel.org
L: linux-imx@nxp.com
S: Maintained
F: sound/soc/codecs/tfa989x.c
NXP-NCI NFC DRIVER
-R: Charles Gorand <charles.gorand@effinnov.com>
L: linux-nfc@lists.01.org (subscribers-only)
-S: Supported
+S: Orphan
F: Documentation/devicetree/bindings/net/nfc/nxp,nci.yaml
F: drivers/nfc/nxp-nci
OPENCOMPUTE PTP CLOCK DRIVER
M: Jonathan Lemon <jonathan.lemon@gmail.com>
+M: Vadim Fedorenko <vadfed@fb.com>
L: netdev@vger.kernel.org
S: Maintained
F: drivers/ptp/ptp_ocp.c
M: Dong Aisheng <aisheng.dong@nxp.com>
M: Fabio Estevam <festevam@gmail.com>
M: Shawn Guo <shawnguo@kernel.org>
-M: Stefan Agner <stefan@agner.ch>
+M: Jacky Bai <ping.bai@nxp.com>
R: Pengutronix Kernel Team <kernel@pengutronix.de>
L: linux-gpio@vger.kernel.org
S: Maintained
PIN CONTROLLER - INTEL
M: Mika Westerberg <mika.westerberg@linux.intel.com>
M: Andy Shevchenko <andy@kernel.org>
-S: Maintained
+S: Supported
T: git git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/intel.git
F: drivers/pinctrl/intel/
F: Documentation/devicetree/bindings/iio/chemical/plantower,pms7003.yaml
F: drivers/iio/chemical/pms7003.c
+PLATFORM FEATURE INFRASTRUCTURE
+M: Juergen Gross <jgross@suse.com>
+S: Maintained
+F: arch/*/include/asm/platform-feature.h
+F: include/asm-generic/platform-feature.h
+F: include/linux/platform-feature.h
+F: kernel/platform-feature.c
+
PLDMFW LIBRARY
M: Jacob Keller <jacob.e.keller@intel.com>
S: Maintained
QCOM AUDIO (ASoC) DRIVERS
M: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
-M: Banajit Goswami <bgoswami@codeaurora.org>
+M: Banajit Goswami <bgoswami@quicinc.com>
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
S: Supported
F: sound/soc/codecs/lpass-va-macro.c
F: drivers/cpufreq/qcom-cpufreq-nvmem.c
QUALCOMM CRYPTO DRIVERS
-M: Thara Gopinath <thara.gopinath@linaro.org>
+M: Thara Gopinath <thara.gopinath@gmail.com>
L: linux-crypto@vger.kernel.org
L: linux-arm-msm@vger.kernel.org
S: Maintained
QUALCOMM IOMMU
M: Rob Clark <robdclark@gmail.com>
-L: iommu@lists.linux-foundation.org
+L: iommu@lists.linux.dev
L: linux-arm-msm@vger.kernel.org
S: Maintained
F: drivers/iommu/arm/arm-smmu/qcom_iommu.c
QUALCOMM TSENS THERMAL DRIVER
M: Amit Kucheria <amitk@kernel.org>
-M: Thara Gopinath <thara.gopinath@linaro.org>
+M: Thara Gopinath <thara.gopinath@gmail.com>
L: linux-pm@vger.kernel.org
L: linux-arm-msm@vger.kernel.org
S: Maintained
K: riscv
RISC-V/MICROCHIP POLARFIRE SOC SUPPORT
-M: Lewis Hanly <lewis.hanly@microchip.com>
M: Conor Dooley <conor.dooley@microchip.com>
+M: Daire McNamara <daire.mcnamara@microchip.com>
L: linux-riscv@lists.infradead.org
S: Supported
F: arch/riscv/boot/dts/microchip/
+F: drivers/char/hw_random/mpfs-rng.c
+F: drivers/clk/microchip/clk-mpfs.c
F: drivers/mailbox/mailbox-mpfs.c
+F: drivers/pci/controller/pcie-microchip-host.c
F: drivers/soc/microchip/
F: include/soc/microchip/mpfs.h
SHARED MEMORY COMMUNICATIONS (SMC) SOCKETS
M: Karsten Graul <kgraul@linux.ibm.com>
+M: Wenjia Zhang <wenjia@linux.ibm.com>
L: linux-s390@vger.kernel.org
S: Supported
W: http://www.ibm.com/developerworks/linux/linux390/
SOUND - SOUND OPEN FIRMWARE (SOF) DRIVERS
M: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
M: Liam Girdwood <lgirdwood@gmail.com>
+M: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
+M: Bard Liao <yung-chuan.liao@linux.intel.com>
M: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
-M: Kai Vehmanen <kai.vehmanen@linux.intel.com>
+R: Kai Vehmanen <kai.vehmanen@linux.intel.com>
M: Daniel Baluta <daniel.baluta@nxp.com>
L: sound-open-firmware@alsa-project.org (moderated for non-subscribers)
S: Supported
SWIOTLB SUBSYSTEM
M: Christoph Hellwig <hch@infradead.org>
-L: iommu@lists.linux-foundation.org
+L: iommu@lists.linux.dev
S: Supported
W: http://git.infradead.org/users/hch/dma-mapping.git
T: git git://git.infradead.org/users/hch/dma-mapping.git
SYNOPSYS AXS10x RESET CONTROLLER DRIVER
M: Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
S: Supported
-F: Documentation/devicetree/bindings/reset/snps,axs10x-reset.txt
+F: Documentation/devicetree/bindings/reset/snps,axs10x-reset.yaml
F: drivers/reset/reset-axs10x.c
SYNOPSYS CREG GPIO DRIVER
R: Mika Westerberg <mika.westerberg@linux.intel.com>
R: Jan Dabros <jsd@semihalf.com>
L: linux-i2c@vger.kernel.org
-S: Maintained
+S: Supported
F: drivers/i2c/busses/i2c-designware-*
SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER
F: Documentation/devicetree/bindings/usb/
F: Documentation/usb/
F: drivers/usb/
+F: include/dt-bindings/usb/
F: include/linux/usb.h
F: include/linux/usb/
M: Juergen Gross <jgross@suse.com>
M: Stefano Stabellini <sstabellini@kernel.org>
L: xen-devel@lists.xenproject.org (moderated for non-subscribers)
-L: iommu@lists.linux-foundation.org
+L: iommu@lists.linux.dev
S: Supported
F: arch/x86/xen/*swiotlb*
F: drivers/xen/*swiotlb*
VERSION = 5
PATCHLEVEL = 19
SUBLEVEL = 0
-EXTRAVERSION = -rc1
+EXTRAVERSION =
NAME = Superb Owl
# *DOCUMENTATION*
KBUILD_CFLAGS += $(stackp-flags-y)
KBUILD_CFLAGS-$(CONFIG_WERROR) += -Werror
+KBUILD_CFLAGS-$(CONFIG_CC_NO_ARRAY_BOUNDS) += -Wno-array-bounds
KBUILD_CFLAGS += $(KBUILD_CFLAGS-y) $(CONFIG_CC_IMPLICIT_FALLTHROUGH)
ifdef CONFIG_CC_IS_CLANG
KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
+# These result in bogus false positives
+KBUILD_CFLAGS += $(call cc-disable-warning, dangling-pointer)
+
ifdef CONFIG_FRAME_POINTER
KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls
else
autoksyms_recursive: descend modules.order
$(Q)$(CONFIG_SHELL) $(srctree)/scripts/adjust_autoksyms.sh \
- "$(MAKE) -f $(srctree)/Makefile vmlinux"
+ "$(MAKE) -f $(srctree)/Makefile autoksyms_recursive"
endif
autoksyms_h := $(if $(CONFIG_TRIM_UNUSED_KSYMS), include/generated/autoksyms.h)
config MMU_GATHER_NO_RANGE
bool
+ select MMU_GATHER_MERGE_VMAS
+
+config MMU_GATHER_NO_FLUSH_CACHE
+ bool
+
+config MMU_GATHER_MERGE_VMAS
+ bool
config MMU_GATHER_NO_GATHER
bool
aspeed-bmc-lenovo-hr630.dtb \
aspeed-bmc-lenovo-hr855xg2.dtb \
aspeed-bmc-microsoft-olympus.dtb \
- aspeed-bmc-nuvia-dc-scm.dtb \
aspeed-bmc-opp-lanyang.dtb \
aspeed-bmc-opp-mihawk.dtb \
aspeed-bmc-opp-mowgli.dtb \
aspeed-bmc-opp-witherspoon.dtb \
aspeed-bmc-opp-zaius.dtb \
aspeed-bmc-portwell-neptune.dtb \
+ aspeed-bmc-qcom-dc-scm-v1.dtb \
aspeed-bmc-quanta-q71l.dtb \
aspeed-bmc-quanta-s6q.dtb \
aspeed-bmc-supermicro-x11spi.dtb \
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-or-later
-// Copyright (c) 2021-2022 Qualcomm Innovation Center, Inc. All rights reserved.
-
-/dts-v1/;
-
-#include "aspeed-g6.dtsi"
-
-/ {
- model = "Nuvia DC-SCM BMC";
- compatible = "nuvia,dc-scm-bmc", "aspeed,ast2600";
-
- aliases {
- serial4 = &uart5;
- };
-
- chosen {
- stdout-path = &uart5;
- bootargs = "console=ttyS4,115200n8";
- };
-
- memory@80000000 {
- device_type = "memory";
- reg = <0x80000000 0x40000000>;
- };
-};
-
-&mdio3 {
- status = "okay";
-
- ethphy3: ethernet-phy@1 {
- compatible = "ethernet-phy-ieee802.3-c22";
- reg = <1>;
- };
-};
-
-&mac2 {
- status = "okay";
-
- /* Bootloader sets up the MAC to insert delay */
- phy-mode = "rgmii";
- phy-handle = <ðphy3>;
-
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_rgmii3_default>;
-};
-
-&mac3 {
- status = "okay";
-
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_rmii4_default>;
-
- use-ncsi;
-};
-
-&rtc {
- status = "okay";
-};
-
-&fmc {
- status = "okay";
-
- flash@0 {
- status = "okay";
- m25p,fast-read;
- label = "bmc";
- spi-max-frequency = <133000000>;
-#include "openbmc-flash-layout-64.dtsi"
- };
-
- flash@1 {
- status = "okay";
- m25p,fast-read;
- label = "alt-bmc";
- spi-max-frequency = <133000000>;
-#include "openbmc-flash-layout-64-alt.dtsi"
- };
-};
-
-&spi1 {
- status = "okay";
- pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_spi1_default>;
-
- flash@0 {
- status = "okay";
- m25p,fast-read;
- label = "bios";
- spi-max-frequency = <133000000>;
- };
-};
-
-&gpio0 {
- gpio-line-names =
- /*A0-A7*/ "","","","","","","","",
- /*B0-B7*/ "BMC_FLASH_MUX_SEL","","","","","","","",
- /*C0-C7*/ "","","","","","","","",
- /*D0-D7*/ "","","","","","","","",
- /*E0-E7*/ "","","","","","","","",
- /*F0-F7*/ "","","","","","","","",
- /*G0-G7*/ "","","","","","","","",
- /*H0-H7*/ "","","","","","","","",
- /*I0-I7*/ "","","","","","","","",
- /*J0-J7*/ "","","","","","","","",
- /*K0-K7*/ "","","","","","","","",
- /*L0-L7*/ "","","","","","","","",
- /*M0-M7*/ "","","","","","","","",
- /*N0-N7*/ "BMC_FWSPI_RST_N","","GPIO_1_BMC_3V3","","","","","",
- /*O0-O7*/ "JTAG_MUX_A","JTAG_MUX_B","","","","","","",
- /*P0-P7*/ "","","","","","","","",
- /*Q0-Q7*/ "","","","","","","","",
- /*R0-R7*/ "","","","","","","","",
- /*S0-S7*/ "","","","","","","","",
- /*T0-T7*/ "","","","","","","","",
- /*U0-U7*/ "","","","","","","","",
- /*V0-V7*/ "","","","SCMFPGA_SPARE_GPIO1_3V3",
- "SCMFPGA_SPARE_GPIO2_3V3","SCMFPGA_SPARE_GPIO3_3V3",
- "SCMFPGA_SPARE_GPIO4_3V3","SCMFPGA_SPARE_GPIO5_3V3",
- /*W0-W7*/ "","","","","","","","",
- /*X0-X7*/ "","","","","","","","",
- /*Y0-Y7*/ "","","","","","","","",
- /*Z0-Z7*/ "","","","","","","","",
- /*AA0-AA7*/ "","","","","","","","",
- /*AB0-AB7*/ "","","","","","","","",
- /*AC0-AC7*/ "","","","","","","","";
-};
-
-&gpio1 {
- gpio-line-names =
- /*A0-A7*/ "GPI_1_BMC_1V8","","","","","",
- "SCMFPGA_SPARE_GPIO1_1V8","SCMFPGA_SPARE_GPIO2_1V8",
- /*B0-B7*/ "SCMFPGA_SPARE_GPIO3_1V8","SCMFPGA_SPARE_GPIO4_1V8",
- "SCMFPGA_SPARE_GPIO5_1V8","","","","","",
- /*C0-C7*/ "","","","","","","","",
- /*D0-D7*/ "","BMC_SPI1_RST_N","BIOS_FLASH_MUX_SEL","",
- "","TPM2_PIRQ_N","TPM2_RST_N","",
- /*E0-E7*/ "","","","","","","","";
-};
-
-&i2c2 {
- status = "okay";
-};
-
-&i2c4 {
- status = "okay";
-};
-
-&i2c5 {
- status = "okay";
-};
-
-&i2c6 {
- status = "okay";
-};
-
-&i2c7 {
- status = "okay";
-};
-
-&i2c8 {
- status = "okay";
-};
-
-&i2c9 {
- status = "okay";
-};
-
-&i2c10 {
- status = "okay";
-};
-
-&i2c12 {
- status = "okay";
-};
-
-&i2c13 {
- status = "okay";
-};
-
-&i2c14 {
- status = "okay";
-};
-
-&i2c15 {
- status = "okay";
-};
-
-&vhub {
- status = "okay";
-};
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2021-2022 Qualcomm Innovation Center, Inc. All rights reserved.
+
+/dts-v1/;
+
+#include "aspeed-g6.dtsi"
+
+/ {
+ model = "Qualcomm DC-SCM V1 BMC";
+ compatible = "qcom,dc-scm-v1-bmc", "aspeed,ast2600";
+
+ aliases {
+ serial4 = &uart5;
+ };
+
+ chosen {
+ stdout-path = &uart5;
+ bootargs = "console=ttyS4,115200n8";
+ };
+
+ memory@80000000 {
+ device_type = "memory";
+ reg = <0x80000000 0x40000000>;
+ };
+};
+
+&mdio3 {
+ status = "okay";
+
+ ethphy3: ethernet-phy@1 {
+ compatible = "ethernet-phy-ieee802.3-c22";
+ reg = <1>;
+ };
+};
+
+&mac2 {
+ status = "okay";
+
+ /* Bootloader sets up the MAC to insert delay */
+ phy-mode = "rgmii";
+ phy-handle = <ðphy3>;
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_rgmii3_default>;
+};
+
+&mac3 {
+ status = "okay";
+
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_rmii4_default>;
+
+ use-ncsi;
+};
+
+&rtc {
+ status = "okay";
+};
+
+&fmc {
+ status = "okay";
+
+ flash@0 {
+ status = "okay";
+ m25p,fast-read;
+ label = "bmc";
+ spi-max-frequency = <133000000>;
+#include "openbmc-flash-layout-64.dtsi"
+ };
+
+ flash@1 {
+ status = "okay";
+ m25p,fast-read;
+ label = "alt-bmc";
+ spi-max-frequency = <133000000>;
+#include "openbmc-flash-layout-64-alt.dtsi"
+ };
+};
+
+&spi1 {
+ status = "okay";
+ pinctrl-names = "default";
+ pinctrl-0 = <&pinctrl_spi1_default>;
+
+ flash@0 {
+ status = "okay";
+ m25p,fast-read;
+ label = "bios";
+ spi-max-frequency = <133000000>;
+ };
+};
+
+&gpio0 {
+ gpio-line-names =
+ /*A0-A7*/ "","","","","","","","",
+ /*B0-B7*/ "BMC_FLASH_MUX_SEL","","","","","","","",
+ /*C0-C7*/ "","","","","","","","",
+ /*D0-D7*/ "","","","","","","","",
+ /*E0-E7*/ "","","","","","","","",
+ /*F0-F7*/ "","","","","","","","",
+ /*G0-G7*/ "","","","","","","","",
+ /*H0-H7*/ "","","","","","","","",
+ /*I0-I7*/ "","","","","","","","",
+ /*J0-J7*/ "","","","","","","","",
+ /*K0-K7*/ "","","","","","","","",
+ /*L0-L7*/ "","","","","","","","",
+ /*M0-M7*/ "","","","","","","","",
+ /*N0-N7*/ "BMC_FWSPI_RST_N","","GPIO_1_BMC_3V3","","","","","",
+ /*O0-O7*/ "JTAG_MUX_A","JTAG_MUX_B","","","","","","",
+ /*P0-P7*/ "","","","","","","","",
+ /*Q0-Q7*/ "","","","","","","","",
+ /*R0-R7*/ "","","","","","","","",
+ /*S0-S7*/ "","","","","","","","",
+ /*T0-T7*/ "","","","","","","","",
+ /*U0-U7*/ "","","","","","","","",
+ /*V0-V7*/ "","","","SCMFPGA_SPARE_GPIO1_3V3",
+ "SCMFPGA_SPARE_GPIO2_3V3","SCMFPGA_SPARE_GPIO3_3V3",
+ "SCMFPGA_SPARE_GPIO4_3V3","SCMFPGA_SPARE_GPIO5_3V3",
+ /*W0-W7*/ "","","","","","","","",
+ /*X0-X7*/ "","","","","","","","",
+ /*Y0-Y7*/ "","","","","","","","",
+ /*Z0-Z7*/ "","","","","","","","",
+ /*AA0-AA7*/ "","","","","","","","",
+ /*AB0-AB7*/ "","","","","","","","",
+ /*AC0-AC7*/ "","","","","","","","";
+};
+
+&gpio1 {
+ gpio-line-names =
+ /*A0-A7*/ "GPI_1_BMC_1V8","","","","","",
+ "SCMFPGA_SPARE_GPIO1_1V8","SCMFPGA_SPARE_GPIO2_1V8",
+ /*B0-B7*/ "SCMFPGA_SPARE_GPIO3_1V8","SCMFPGA_SPARE_GPIO4_1V8",
+ "SCMFPGA_SPARE_GPIO5_1V8","","","","","",
+ /*C0-C7*/ "","","","","","","","",
+ /*D0-D7*/ "","BMC_SPI1_RST_N","BIOS_FLASH_MUX_SEL","",
+ "","TPM2_PIRQ_N","TPM2_RST_N","",
+ /*E0-E7*/ "","","","","","","","";
+};
+
+&i2c2 {
+ status = "okay";
+};
+
+&i2c4 {
+ status = "okay";
+};
+
+&i2c5 {
+ status = "okay";
+};
+
+&i2c6 {
+ status = "okay";
+};
+
+&i2c7 {
+ status = "okay";
+};
+
+&i2c8 {
+ status = "okay";
+};
+
+&i2c9 {
+ status = "okay";
+};
+
+&i2c10 {
+ status = "okay";
+};
+
+&i2c12 {
+ status = "okay";
+};
+
+&i2c13 {
+ status = "okay";
+};
+
+&i2c14 {
+ status = "okay";
+};
+
+&i2c15 {
+ status = "okay";
+};
+
+&vhub {
+ status = "okay";
+};
status = "okay";
eeprom@53 {
- compatible = "atmel,24c32";
+ compatible = "atmel,24c02";
reg = <0x53>;
pagesize = <16>;
- size = <128>;
status = "okay";
};
};
status = "okay";
eeprom@50 {
- compatible = "atmel,24c32";
+ compatible = "atmel,24c02";
reg = <0x50>;
pagesize = <16>;
status = "okay";
};
eeprom@52 {
- compatible = "atmel,24c32";
+ compatible = "atmel,24c02";
reg = <0x52>;
pagesize = <16>;
status = "disabled";
};
eeprom@53 {
- compatible = "atmel,24c32";
+ compatible = "atmel,24c02";
reg = <0x53>;
pagesize = <16>;
status = "disabled";
port@0 {
reg = <0>;
label = "lan1";
+ phy-mode = "internal";
};
port@1 {
reg = <1>;
label = "lan2";
+ phy-mode = "internal";
};
port@2 {
reg = <2>;
label = "lan3";
+ phy-mode = "internal";
};
port@3 {
reg = <3>;
label = "lan4";
+ phy-mode = "internal";
};
port@4 {
reg = <4>;
label = "lan5";
+ phy-mode = "internal";
};
port@5 {
&expgpio {
gpio-line-names = "BT_ON",
"WL_ON",
- "",
+ "PWR_LED_OFF",
"GLOBAL_RESET",
"VDD_SD_IO_SEL",
- "CAM_GPIO",
+ "GLOBAL_SHUTDOWN",
"SD_PWR_ON",
- "SD_OC_N";
+ "SHUTDOWN_REQUEST";
};
&genet_mdio {
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_atmel_conn>;
reg = <0x4a>;
- reset-gpios = <&gpio1 14 GPIO_ACTIVE_HIGH>; /* SODIMM 106 */
+ reset-gpios = <&gpio1 14 GPIO_ACTIVE_LOW>; /* SODIMM 106 */
status = "disabled";
};
};
reg = <0x28>;
#gpio-cells = <2>;
gpio-controller;
- ngpio = <32>;
+ ngpios = <62>;
};
sgtl5000: codec@a {
regulator-name = "vddpu";
regulator-min-microvolt = <725000>;
regulator-max-microvolt = <1450000>;
- regulator-enable-ramp-delay = <150>;
+ regulator-enable-ramp-delay = <380>;
anatop-reg-offset = <0x140>;
anatop-vol-bit-shift = <9>;
anatop-vol-bit-width = <5>;
atmel_mxt_ts: touchscreen@4a {
compatible = "atmel,maxtouch";
pinctrl-names = "default";
- pinctrl-0 = <&pinctrl_atmel_conn>;
+ pinctrl-0 = <&pinctrl_atmel_conn &pinctrl_atmel_snvs_conn>;
reg = <0x4a>;
interrupt-parent = <&gpio5>;
interrupts = <4 IRQ_TYPE_EDGE_FALLING>; /* SODIMM 107 / INT */
pinctrl_atmel_conn: atmelconngrp {
fsl,pins = <
MX6UL_PAD_JTAG_MOD__GPIO1_IO10 0xb0a0 /* SODIMM 106 */
- MX6ULL_PAD_SNVS_TAMPER4__GPIO5_IO04 0xb0a0 /* SODIMM 107 */
>;
};
};
&iomuxc_snvs {
+ pinctrl_atmel_snvs_conn: atmelsnvsconngrp {
+ fsl,pins = <
+ MX6ULL_PAD_SNVS_TAMPER4__GPIO5_IO04 0xb0a0 /* SODIMM 107 */
+ >;
+ };
+
pinctrl_snvs_gpio1: snvsgpio1grp {
fsl,pins = <
MX6ULL_PAD_SNVS_TAMPER6__GPIO5_IO06 0x110a0 /* SODIMM 93 */
pinctrl-names = "default";
pinctrl-0 = <&pinctrl_usdhc2>;
bus-width = <4>;
+ no-1-8-v;
non-removable;
- cap-sd-highspeed;
- sd-uhs-ddr50;
- mmc-ddr-1_8v;
vmmc-supply = <®_wifi>;
enable-sdio-wakeup;
status = "okay";
compatible = "usb-nop-xceiv";
clocks = <&clks IMX7D_USB_HSIC_ROOT_CLK>;
clock-names = "main_clk";
+ power-domains = <&pgc_hsic_phy>;
#phy-cells = <0>;
};
compatible = "fsl,imx7d-usb", "fsl,imx27-usb";
reg = <0x30b30000 0x200>;
interrupts = <GIC_SPI 40 IRQ_TYPE_LEVEL_HIGH>;
- power-domains = <&pgc_hsic_phy>;
clocks = <&clks IMX7D_USB_CTRL_CLK>;
fsl,usbphy = <&usbphynop3>;
fsl,usbmisc = <&usbmisc3 0>;
phy4: ethernet-phy@5 {
reg = <5>;
- coma-mode-gpios = <&gpio 37 GPIO_ACTIVE_HIGH>;
+ coma-mode-gpios = <&gpio 37 GPIO_OPEN_DRAIN>;
};
phy5: ethernet-phy@6 {
reg = <6>;
- coma-mode-gpios = <&gpio 37 GPIO_ACTIVE_HIGH>;
+ coma-mode-gpios = <&gpio 37 GPIO_OPEN_DRAIN>;
};
phy6: ethernet-phy@7 {
reg = <7>;
- coma-mode-gpios = <&gpio 37 GPIO_ACTIVE_HIGH>;
+ coma-mode-gpios = <&gpio 37 GPIO_OPEN_DRAIN>;
};
phy7: ethernet-phy@8 {
reg = <8>;
- coma-mode-gpios = <&gpio 37 GPIO_ACTIVE_HIGH>;
+ coma-mode-gpios = <&gpio 37 GPIO_OPEN_DRAIN>;
};
};
sys_clk: sys_clk {
compatible = "fixed-clock";
#clock-cells = <0>;
- clock-frequency = <162500000>;
+ clock-frequency = <165625000>;
};
cpu_clk: cpu_clk {
interrupts = <GIC_SPI 108 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&gcc GCC_BLSP1_UART2_APPS_CLK>, <&gcc GCC_BLSP1_AHB_CLK>;
clock-names = "core", "iface";
+ pinctrl-names = "default";
+ pinctrl-0 = <&blsp1_uart2_default>;
status = "disabled";
};
interrupts = <GIC_SPI 113 IRQ_TYPE_NONE>;
clocks = <&gcc GCC_BLSP2_UART1_APPS_CLK>, <&gcc GCC_BLSP2_AHB_CLK>;
clock-names = "core", "iface";
+ pinctrl-names = "default", "sleep";
+ pinctrl-0 = <&blsp2_uart1_default>;
+ pinctrl-1 = <&blsp2_uart1_sleep>;
status = "disabled";
};
interrupts = <GIC_SPI 116 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&gcc GCC_BLSP2_UART4_APPS_CLK>, <&gcc GCC_BLSP2_AHB_CLK>;
clock-names = "core", "iface";
+ pinctrl-names = "default";
+ pinctrl-0 = <&blsp2_uart4_default>;
status = "disabled";
};
interrupts = <0 106 IRQ_TYPE_LEVEL_HIGH>;
clocks = <&gcc GCC_BLSP2_QUP6_I2C_APPS_CLK>, <&gcc GCC_BLSP2_AHB_CLK>;
clock-names = "core", "iface";
+ pinctrl-names = "default", "sleep";
+ pinctrl-0 = <&blsp2_i2c6_default>;
+ pinctrl-1 = <&blsp2_i2c6_sleep>;
#address-cells = <1>;
#size-cells = <0>;
};
};
};
- blsp1_uart2_active: blsp1-uart2-active {
+ blsp1_uart2_default: blsp1-uart2-default {
rx {
pins = "gpio5";
function = "blsp_uart2";
};
};
- blsp2_uart1_active: blsp2-uart1-active {
+ blsp2_uart1_default: blsp2-uart1-default {
tx-rts {
pins = "gpio41", "gpio44";
function = "blsp_uart7";
bias-pull-down;
};
- blsp2_uart4_active: blsp2-uart4-active {
+ blsp2_uart4_default: blsp2-uart4-default {
tx-rts {
pins = "gpio53", "gpio56";
function = "blsp_uart10";
bias-pull-up;
};
- /* BLSP2_I2C6 info is missing - nobody uses it though? */
+ blsp2_i2c6_default: blsp2-i2c6-default {
+ pins = "gpio87", "gpio88";
+ function = "blsp_i2c12";
+ drive-strength = <2>;
+ bias-disable;
+ };
+
+ blsp2_i2c6_sleep: blsp2-i2c6-sleep {
+ pins = "gpio87", "gpio88";
+ function = "blsp_i2c12";
+ drive-strength = <2>;
+ bias-pull-up;
+ };
spi8_default: spi8_default {
mosi {
clocks = <&pmc PMC_TYPE_PERIPHERAL 55>, <&pmc PMC_TYPE_GCK 55>;
clock-names = "pclk", "gclk";
assigned-clocks = <&pmc PMC_TYPE_CORE PMC_I2S1_MUX>;
- assigned-parrents = <&pmc PMC_TYPE_GCK 55>;
+ assigned-clock-parents = <&pmc PMC_TYPE_GCK 55>;
status = "disabled";
};
--- /dev/null
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/*
+ * Copyright (C) STMicroelectronics 2022 - All Rights Reserved
+ * Author: Alexandre Torgue <alexandre.torgue@foss.st.com> for STMicroelectronics.
+ */
+
+/ {
+ firmware {
+ optee: optee {
+ compatible = "linaro,optee-tz";
+ method = "smc";
+ };
+
+ scmi: scmi {
+ compatible = "linaro,scmi-optee";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ linaro,optee-channel-id = <0>;
+ shmem = <&scmi_shm>;
+
+ scmi_clk: protocol@14 {
+ reg = <0x14>;
+ #clock-cells = <1>;
+ };
+
+ scmi_reset: protocol@16 {
+ reg = <0x16>;
+ #reset-cells = <1>;
+ };
+
+ scmi_voltd: protocol@17 {
+ reg = <0x17>;
+
+ scmi_reguls: regulators {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ scmi_reg11: reg11@0 {
+ reg = <0>;
+ regulator-name = "reg11";
+ regulator-min-microvolt = <1100000>;
+ regulator-max-microvolt = <1100000>;
+ };
+
+ scmi_reg18: reg18@1 {
+ voltd-name = "reg18";
+ reg = <1>;
+ regulator-name = "reg18";
+ regulator-min-microvolt = <1800000>;
+ regulator-max-microvolt = <1800000>;
+ };
+
+ scmi_usb33: usb33@2 {
+ reg = <2>;
+ regulator-name = "usb33";
+ regulator-min-microvolt = <3300000>;
+ regulator-max-microvolt = <3300000>;
+ };
+ };
+ };
+ };
+ };
+
+ soc {
+ scmi_sram: sram@2ffff000 {
+ compatible = "mmio-sram";
+ reg = <0x2ffff000 0x1000>;
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges = <0 0x2ffff000 0x1000>;
+
+ scmi_shm: scmi-sram@0 {
+ compatible = "arm,scmi-shmem";
+ reg = <0 0x80>;
+ };
+ };
+ };
+};
+
+®11 {
+ status = "disabled";
+};
+
+®18 {
+ status = "disabled";
+};
+
+&usb33 {
+ status = "disabled";
+};
+
+&usbotg_hs {
+ usb33d-supply = <&scmi_usb33>;
+};
+
+&usbphyc {
+ vdda1v1-supply = <&scmi_reg11>;
+ vdda1v8-supply = <&scmi_reg18>;
+};
+
+/delete-node/ &clk_hse;
+/delete-node/ &clk_hsi;
+/delete-node/ &clk_lse;
+/delete-node/ &clk_lsi;
+/delete-node/ &clk_csi;
status = "disabled";
};
- firmware {
- optee: optee {
- compatible = "linaro,optee-tz";
- method = "smc";
- status = "disabled";
- };
-
- scmi: scmi {
- compatible = "linaro,scmi-optee";
- #address-cells = <1>;
- #size-cells = <0>;
- linaro,optee-channel-id = <0>;
- shmem = <&scmi_shm>;
- status = "disabled";
-
- scmi_clk: protocol@14 {
- reg = <0x14>;
- #clock-cells = <1>;
- };
-
- scmi_reset: protocol@16 {
- reg = <0x16>;
- #reset-cells = <1>;
- };
- };
- };
-
soc {
compatible = "simple-bus";
#address-cells = <1>;
interrupt-parent = <&intc>;
ranges;
- scmi_sram: sram@2ffff000 {
- compatible = "mmio-sram";
- reg = <0x2ffff000 0x1000>;
- #address-cells = <1>;
- #size-cells = <1>;
- ranges = <0 0x2ffff000 0x1000>;
-
- scmi_shm: scmi-sram@0 {
- compatible = "arm,scmi-shmem";
- reg = <0 0x80>;
- status = "disabled";
- };
- };
-
timers2: timer@40000000 {
#address-cells = <1>;
#size-cells = <0>;
compatible = "st,stm32-cec";
reg = <0x40016000 0x400>;
interrupts = <GIC_SPI 94 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&rcc CEC_K>, <&clk_lse>;
+ clocks = <&rcc CEC_K>, <&rcc CEC>;
clock-names = "cec", "hdmi-cec";
status = "disabled";
};
usbh_ohci: usb@5800c000 {
compatible = "generic-ohci";
reg = <0x5800c000 0x1000>;
- clocks = <&rcc USBH>, <&usbphyc>;
+ clocks = <&usbphyc>, <&rcc USBH>;
resets = <&rcc USBH_R>;
interrupts = <GIC_SPI 74 IRQ_TYPE_LEVEL_HIGH>;
status = "disabled";
usbh_ehci: usb@5800d000 {
compatible = "generic-ehci";
reg = <0x5800d000 0x1000>;
- clocks = <&rcc USBH>;
+ clocks = <&usbphyc>, <&rcc USBH>;
resets = <&rcc USBH_R>;
interrupts = <GIC_SPI 75 IRQ_TYPE_LEVEL_HIGH>;
companion = <&usbh_ohci>;
/dts-v1/;
#include "stm32mp157a-dk1.dts"
+#include "stm32mp15-scmi.dtsi"
/ {
model = "STMicroelectronics STM32MP157A-DK1 SCMI Discovery Board";
clocks = <&scmi_clk CK_SCMI_MPU>;
};
+&dsi {
+ clocks = <&rcc DSI_K>, <&scmi_clk CK_SCMI_HSE>, <&rcc DSI_PX>;
+};
+
&gpioz {
clocks = <&scmi_clk CK_SCMI_GPIOZ>;
};
resets = <&scmi_reset RST_SCMI_MCU>;
};
-&optee {
- status = "okay";
-};
-
&rcc {
compatible = "st,stm32mp1-rcc-secure", "syscon";
clock-names = "hse", "hsi", "csi", "lse", "lsi";
&rtc {
clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>;
};
-
-&scmi {
- status = "okay";
-};
-
-&scmi_shm {
- status = "okay";
-};
/dts-v1/;
#include "stm32mp157c-dk2.dts"
+#include "stm32mp15-scmi.dtsi"
/ {
model = "STMicroelectronics STM32MP157C-DK2 SCMI Discovery Board";
};
&dsi {
+ phy-dsi-supply = <&scmi_reg18>;
clocks = <&rcc DSI_K>, <&scmi_clk CK_SCMI_HSE>, <&rcc DSI_PX>;
};
resets = <&scmi_reset RST_SCMI_MCU>;
};
-&optee {
- status = "okay";
-};
-
&rcc {
compatible = "st,stm32mp1-rcc-secure", "syscon";
clock-names = "hse", "hsi", "csi", "lse", "lsi";
&rtc {
clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>;
};
-
-&scmi {
- status = "okay";
-};
-
-&scmi_shm {
- status = "okay";
-};
/dts-v1/;
#include "stm32mp157c-ed1.dts"
+#include "stm32mp15-scmi.dtsi"
/ {
model = "STMicroelectronics STM32MP157C-ED1 SCMI eval daughter";
resets = <&scmi_reset RST_SCMI_CRYP1>;
};
+&dsi {
+ clocks = <&rcc DSI_K>, <&scmi_clk CK_SCMI_HSE>, <&rcc DSI_PX>;
+};
+
&gpioz {
clocks = <&scmi_clk CK_SCMI_GPIOZ>;
};
resets = <&scmi_reset RST_SCMI_MCU>;
};
-&optee {
- status = "okay";
-};
-
&rcc {
compatible = "st,stm32mp1-rcc-secure", "syscon";
clock-names = "hse", "hsi", "csi", "lse", "lsi";
&rtc {
clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>;
};
-
-&scmi {
- status = "okay";
-};
-
-&scmi_shm {
- status = "okay";
-};
/dts-v1/;
#include "stm32mp157c-ev1.dts"
+#include "stm32mp15-scmi.dtsi"
/ {
model = "STMicroelectronics STM32MP157C-EV1 SCMI eval daughter on eval mother";
};
&dsi {
+ phy-dsi-supply = <&scmi_reg18>;
clocks = <&rcc DSI_K>, <&scmi_clk CK_SCMI_HSE>, <&rcc DSI_PX>;
};
resets = <&scmi_reset RST_SCMI_MCU>;
};
-&optee {
- status = "okay";
-};
-
&rcc {
compatible = "st,stm32mp1-rcc-secure", "syscon";
clock-names = "hse", "hsi", "csi", "lse", "lsi";
&rtc {
clocks = <&scmi_clk CK_SCMI_RTCAPB>, <&scmi_clk CK_SCMI_RTC>;
};
-
-&scmi {
- status = "okay";
-};
-
-&scmi_shm {
- status = "okay";
-};
flash@0 {
#address-cells = <1>;
#size-cells = <1>;
- compatible = "mxicy,mx25l1606e", "winbond,w25q128";
+ compatible = "mxicy,mx25l1606e", "jedec,spi-nor";
reg = <0>;
spi-max-frequency = <40000000>;
};
CONFIG_DRM=y
CONFIG_DRM_PANEL_SEIKO_43WVF1G=y
CONFIG_DRM_MXSFB=y
+CONFIG_FB=y
CONFIG_FB_MODE_HELPERS=y
CONFIG_LCD_CLASS_DEVICE=y
CONFIG_BACKLIGHT_CLASS_DEVICE=y
#else
#define MAX_DMA_ADDRESS ({ \
extern phys_addr_t arm_dma_zone_size; \
- arm_dma_zone_size && arm_dma_zone_size < (0x10000000 - PAGE_OFFSET) ? \
+ arm_dma_zone_size && arm_dma_zone_size < (0x100000000ULL - PAGE_OFFSET) ? \
(PAGE_OFFSET + arm_dma_zone_size) : 0xffffffffUL; })
#endif
}
#endif
-#ifdef CONFIG_CPU_USE_DOMAINS
-#define modify_domain(dom,type) \
- do { \
- unsigned int domain = get_domain(); \
- domain &= ~domain_mask(dom); \
- domain = domain | domain_val(dom, type); \
- set_domain(domain); \
- } while (0)
-
-#else
-static inline void modify_domain(unsigned dom, unsigned type) { }
-#endif
-
/*
* Generate the T (user) versions of the LDR/STR and related
* instructions (inline assembly)
MT_HIGH_VECTORS,
MT_MEMORY_RWX,
MT_MEMORY_RW,
+ MT_MEMORY_RO,
MT_ROM,
MT_MEMORY_RWX_NONCACHED,
MT_MEMORY_RW_DTCM,
((current_stack_pointer | (THREAD_SIZE - 1)) - 7) - 1; \
})
+
+/*
+ * Update ITSTATE after normal execution of an IT block instruction.
+ *
+ * The 8 IT state bits are split into two parts in CPSR:
+ * ITSTATE<1:0> are in CPSR<26:25>
+ * ITSTATE<7:2> are in CPSR<15:10>
+ */
+static inline unsigned long it_advance(unsigned long cpsr)
+{
+ if ((cpsr & 0x06000400) == 0) {
+ /* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */
+ cpsr &= ~PSR_IT_MASK;
+ } else {
+ /* We need to shift left ITSTATE<4:0> */
+ const unsigned long mask = 0x06001c00; /* Mask ITSTATE<4:0> */
+ unsigned long it = cpsr & mask;
+ it <<= 1;
+ it |= it >> (27 - 10); /* Carry ITSTATE<2> to correct place */
+ it &= mask;
+ cpsr &= ~mask;
+ cpsr |= it;
+ }
+ return cpsr;
+}
+
#endif /* __ASSEMBLY__ */
#endif
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <xen/arm/xen-ops.h>
b ret_fast_syscall
#endif
ENDPROC(vector_swi)
+ .ltorg
/*
* This is the really slow path. We're going to be doing
* Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
*/
ENTRY(_find_next_zero_bit_le)
- teq r1, #0
- beq 3b
+ cmp r2, r1
+ bhs 3b
ands ip, r2, #7
beq 1b @ If new byte, goto old routine
ARM( ldrb r3, [r0, r2, lsr #3] )
* Prototype: int find_next_zero_bit(void *addr, unsigned int maxbit, int offset)
*/
ENTRY(_find_next_bit_le)
- teq r1, #0
- beq 3b
+ cmp r2, r1
+ bhs 3b
ands ip, r2, #7
beq 1b @ If new byte, goto old routine
ARM( ldrb r3, [r0, r2, lsr #3] )
ENDPROC(_find_first_zero_bit_be)
ENTRY(_find_next_zero_bit_be)
- teq r1, #0
- beq 3b
+ cmp r2, r1
+ bhs 3b
ands ip, r2, #7
beq 1b @ If new byte, goto old routine
eor r3, r2, #0x18 @ big endian byte ordering
ENDPROC(_find_first_bit_be)
ENTRY(_find_next_bit_be)
- teq r1, #0
- beq 3b
+ cmp r2, r1
+ bhs 3b
ands ip, r2, #7
beq 1b @ If new byte, goto old routine
eor r3, r2, #0x18 @ big endian byte ordering
static const struct of_device_id sama5d2_ws_ids[] = {
{ .compatible = "atmel,sama5d2-gem", .data = &ws_info[0] },
- { .compatible = "atmel,at91rm9200-rtc", .data = &ws_info[1] },
+ { .compatible = "atmel,sama5d2-rtc", .data = &ws_info[1] },
{ .compatible = "atmel,sama5d3-udc", .data = &ws_info[2] },
{ .compatible = "atmel,at91rm9200-ohci", .data = &ws_info[2] },
{ .compatible = "usb-ohci", .data = &ws_info[2] },
};
static const struct of_device_id sam9x60_ws_ids[] = {
- { .compatible = "atmel,at91sam9x5-rtc", .data = &ws_info[1] },
+ { .compatible = "microchip,sam9x60-rtc", .data = &ws_info[1] },
{ .compatible = "atmel,at91rm9200-ohci", .data = &ws_info[2] },
{ .compatible = "usb-ohci", .data = &ws_info[2] },
{ .compatible = "atmel,at91sam9g45-ehci", .data = &ws_info[2] },
{ .compatible = "usb-ehci", .data = &ws_info[2] },
- { .compatible = "atmel,at91sam9260-rtt", .data = &ws_info[4] },
+ { .compatible = "microchip,sam9x60-rtt", .data = &ws_info[4] },
{ .compatible = "cdns,sam9x60-macb", .data = &ws_info[5] },
{ /* sentinel */ }
};
static const struct of_device_id sama7g5_ws_ids[] = {
- { .compatible = "atmel,at91sam9x5-rtc", .data = &ws_info[1] },
+ { .compatible = "microchip,sama7g5-rtc", .data = &ws_info[1] },
{ .compatible = "microchip,sama7g5-ohci", .data = &ws_info[2] },
{ .compatible = "usb-ohci", .data = &ws_info[2] },
{ .compatible = "atmel,at91sam9g45-ehci", .data = &ws_info[2] },
{ .compatible = "usb-ehci", .data = &ws_info[2] },
{ .compatible = "microchip,sama7g5-sdhci", .data = &ws_info[3] },
- { .compatible = "atmel,at91sam9260-rtt", .data = &ws_info[4] },
+ { .compatible = "microchip,sama7g5-rtt", .data = &ws_info[4] },
{ /* sentinel */ }
};
return ret;
}
-static void at91_pm_secure_init(void)
+static void __init at91_pm_secure_init(void)
{
int suspend_mode;
struct arm_smccc_res res;
return -ENOENT;
syscon = of_iomap(syscon_np, 0);
+ of_node_put(syscon_np);
if (!syscon)
return -ENOMEM;
/* De-Asscer SATA Reset */
cns3xxx_pwr_soft_rst(CNS3XXX_PWR_SOFTWARE_RST(SATA));
}
+ of_node_put(dn);
dn = of_find_compatible_node(NULL, NULL, "cavium,cns3420-sdhci");
if (of_device_is_available(dn)) {
cns3xxx_pwr_clk_en(CNS3XXX_PWR_CLK_EN(SDIO));
cns3xxx_pwr_soft_rst(CNS3XXX_PWR_SOFTWARE_RST(SDIO));
}
+ of_node_put(dn);
pm_power_off = cns3xxx_power_off;
np = of_find_matching_node(NULL, exynos_dt_pmu_match);
if (np)
pmu_base_addr = of_iomap(np, 0);
+ of_node_put(np);
}
static void __init exynos_init_irq(void)
}
sram_base = of_iomap(node, 0);
+ of_node_put(node);
if (!sram_base) {
pr_err("Couldn't map SRAM registers\n");
return;
}
scu_base = of_iomap(node, 0);
+ of_node_put(node);
if (!scu_base) {
pr_err("Couldn't map SCU registers\n");
return;
};
static struct gpiod_lookup_table corgi_spi_gpio_table = {
- .dev_id = "pxa2xx-spi.1",
+ .dev_id = "spi1",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", CORGI_GPIO_ADS7846_CS, "cs", 0, GPIO_ACTIVE_LOW),
GPIO_LOOKUP_IDX("gpio-pxa", CORGI_GPIO_LCDCON_CS, "cs", 1, GPIO_ACTIVE_LOW),
};
static struct gpiod_lookup_table pxa_ssp2_gpio_table = {
- .dev_id = "pxa2xx-spi.2",
+ .dev_id = "spi2",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", GPIO88_HX4700_TSC2046_CS, "cs", 0, GPIO_ACTIVE_LOW),
{ },
};
static struct gpiod_lookup_table pxa_ssp3_gpio_table = {
- .dev_id = "pxa2xx-spi.3",
+ .dev_id = "spi3",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS1, "cs", 0, GPIO_ACTIVE_LOW),
GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS2, "cs", 1, GPIO_ACTIVE_LOW),
};
static struct gpiod_lookup_table pxa_ssp4_gpio_table = {
- .dev_id = "pxa2xx-spi.4",
+ .dev_id = "spi4",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS3, "cs", 0, GPIO_ACTIVE_LOW),
GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS4, "cs", 1, GPIO_ACTIVE_LOW),
};
static struct gpiod_lookup_table littleton_spi_gpio_table = {
- .dev_id = "pxa2xx-spi.2",
+ .dev_id = "spi2",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", LITTLETON_GPIO_LCD_CS, "cs", 0, GPIO_ACTIVE_LOW),
{ },
};
static struct gpiod_lookup_table magician_spi_gpio_table = {
- .dev_id = "pxa2xx-spi.2",
+ .dev_id = "spi2",
.table = {
/* NOTICE must be GPIO, incompatibility with hw PXA SPI framing */
GPIO_LOOKUP_IDX("gpio-pxa", GPIO14_MAGICIAN_TSC2046_CS, "cs", 0, GPIO_ACTIVE_LOW),
};
static struct gpiod_lookup_table spitz_spi_gpio_table = {
- .dev_id = "pxa2xx-spi.2",
+ .dev_id = "spi2",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", SPITZ_GPIO_ADS7846_CS, "cs", 0, GPIO_ACTIVE_LOW),
GPIO_LOOKUP_IDX("gpio-pxa", SPITZ_GPIO_LCDCON_CS, "cs", 1, GPIO_ACTIVE_LOW),
};
static struct gpiod_lookup_table pxa_ssp1_gpio_table = {
- .dev_id = "pxa2xx-spi.1",
+ .dev_id = "spi1",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", GPIO24_ZIPITZ2_WIFI_CS, "cs", 0, GPIO_ACTIVE_LOW),
{ },
};
static struct gpiod_lookup_table pxa_ssp2_gpio_table = {
- .dev_id = "pxa2xx-spi.2",
+ .dev_id = "spi2",
.table = {
GPIO_LOOKUP_IDX("gpio-pxa", GPIO88_ZIPITZ2_LCD_CS, "cs", 0, GPIO_ACTIVE_LOW),
{ },
&match);
if (!match) {
pr_err("Failed to find PMU node\n");
- return;
+ goto out_put;
}
pm_data = (struct rockchip_pm_data *) match->data;
if (ret) {
pr_err("%s: matches init error %d\n", __func__, ret);
- return;
+ goto out_put;
}
}
suspend_set_ops(pm_data->ops);
+
+out_put:
+ of_node_put(np);
}
irq = irq_of_parse_and_map(np, 0);
if (!irq) {
pr_err("%s: No irq passed for timer via DT\n", __func__);
- return;
+ goto err_put_np;
}
gpt_base = of_iomap(np, 0);
if (!gpt_base) {
pr_err("%s: of iomap failed\n", __func__);
- return;
+ goto err_put_np;
}
gpt_clk = clk_get_sys("gpt0", NULL);
goto err_prepare_enable_clk;
}
+ of_node_put(np);
+
spear_clockevent_init(irq);
spear_clocksource_init();
clk_put(gpt_clk);
err_iomap:
iounmap(gpt_base);
+err_put_np:
+ of_node_put(np);
}
bool
help
This option enables or disables the use of domain switching
- via the set_fs() function.
+ using the DACR (domain access control register) to protect memory
+ domains from each other. In Linux we use three domains: kernel, user
+ and IO. The domains are used to protect userspace from kernelspace
+ and to handle IO-space as a special type of memory by assigning
+ manager or client roles to running code (such as a process).
config CPU_V7M_NUM_IRQ
int "Number of external interrupts connected to the NVIC"
if (type == TYPE_LDST)
do_alignment_finish_ldst(addr, instr, regs, offset);
+ if (thumb_mode(regs))
+ regs->ARM_cpsr = it_advance(regs->ARM_cpsr);
+
return 0;
bad_or_fault:
#include <asm/dma-iommu.h>
#include <asm/mach/map.h>
#include <asm/system_info.h>
-#include <xen/swiotlb-xen.h>
+#include <asm/xen/xen-ops.h>
#include "dma.h"
#include "mm.h"
set_dma_ops(dev, dma_ops);
-#ifdef CONFIG_XEN
- if (xen_initial_domain())
- dev->dma_ops = &xen_swiotlb_dma_ops;
-#endif
+ xen_setup_dma_ops(dev);
dev->archdata.dma_ops_setup = true;
}
.prot_sect = PMD_TYPE_SECT | PMD_SECT_AP_WRITE,
.domain = DOMAIN_KERNEL,
},
+ [MT_MEMORY_RO] = {
+ .prot_pte = L_PTE_PRESENT | L_PTE_YOUNG | L_PTE_DIRTY |
+ L_PTE_XN | L_PTE_RDONLY,
+ .prot_l1 = PMD_TYPE_TABLE,
+ .prot_sect = PMD_TYPE_SECT,
+ .domain = DOMAIN_KERNEL,
+ },
[MT_ROM] = {
.prot_sect = PMD_TYPE_SECT,
.domain = DOMAIN_KERNEL,
/* Also setup NX memory mapping */
mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_XN;
+ mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_XN;
}
if (cpu_arch >= CPU_ARCH_ARMv7 && (cr & CR_TRE)) {
/*
mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
+ mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE;
#endif
/*
mem_types[MT_MEMORY_RWX].prot_pte |= L_PTE_SHARED;
mem_types[MT_MEMORY_RW].prot_sect |= PMD_SECT_S;
mem_types[MT_MEMORY_RW].prot_pte |= L_PTE_SHARED;
+ mem_types[MT_MEMORY_RO].prot_sect |= PMD_SECT_S;
+ mem_types[MT_MEMORY_RO].prot_pte |= L_PTE_SHARED;
mem_types[MT_MEMORY_DMA_READY].prot_pte |= L_PTE_SHARED;
mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= PMD_SECT_S;
mem_types[MT_MEMORY_RWX_NONCACHED].prot_pte |= L_PTE_SHARED;
mem_types[MT_MEMORY_RWX].prot_pte |= kern_pgprot;
mem_types[MT_MEMORY_RW].prot_sect |= ecc_mask | cp->pmd;
mem_types[MT_MEMORY_RW].prot_pte |= kern_pgprot;
+ mem_types[MT_MEMORY_RO].prot_sect |= ecc_mask | cp->pmd;
+ mem_types[MT_MEMORY_RO].prot_pte |= kern_pgprot;
mem_types[MT_MEMORY_DMA_READY].prot_pte |= kern_pgprot;
mem_types[MT_MEMORY_RWX_NONCACHED].prot_sect |= ecc_mask;
mem_types[MT_ROM].prot_sect |= cp->pmd;
map.pfn = __phys_to_pfn(__atags_pointer & SECTION_MASK);
map.virtual = FDT_FIXED_BASE;
map.length = FDT_FIXED_SIZE;
- map.type = MT_ROM;
+ map.type = MT_MEMORY_RO;
create_mapping(&map);
}
#else
static unsigned int spectre_v2_install_workaround(unsigned int method)
{
- pr_info("CPU%u: Spectre V2: workarounds disabled by configuration\n",
- smp_processor_id());
+ pr_info_once("Spectre V2: workarounds disabled by configuration\n");
return SPECTRE_VULNERABLE;
}
return SPECTRE_VULNERABLE;
spectre_bhb_method = method;
- }
- pr_info("CPU%u: Spectre BHB: using %s workaround\n",
- smp_processor_id(), spectre_bhb_method_name(method));
+ pr_info("CPU%u: Spectre BHB: enabling %s workaround for all CPUs\n",
+ smp_processor_id(), spectre_bhb_method_name(method));
+ }
return SPECTRE_MITIGATED;
}
#include <linux/types.h>
#include <linux/stddef.h>
#include <asm/probes.h>
+#include <asm/ptrace.h>
#include <asm/kprobes.h>
void __init arm_probes_decode_init(void);
#endif
-/*
- * Update ITSTATE after normal execution of an IT block instruction.
- *
- * The 8 IT state bits are split into two parts in CPSR:
- * ITSTATE<1:0> are in CPSR<26:25>
- * ITSTATE<7:2> are in CPSR<15:10>
- */
-static inline unsigned long it_advance(unsigned long cpsr)
- {
- if ((cpsr & 0x06000400) == 0) {
- /* ITSTATE<2:0> == 0 means end of IT block, so clear IT state */
- cpsr &= ~PSR_IT_MASK;
- } else {
- /* We need to shift left ITSTATE<4:0> */
- const unsigned long mask = 0x06001c00; /* Mask ITSTATE<4:0> */
- unsigned long it = cpsr & mask;
- it <<= 1;
- it |= it >> (27 - 10); /* Carry ITSTATE<2> to correct place */
- it &= mask;
- cpsr &= ~mask;
- cpsr |= it;
- }
- return cpsr;
-}
-
static inline void __kprobes bx_write_pc(long pcv, struct pt_regs *regs)
{
long cpsr = regs->ARM_cpsr;
if (!xen_domain())
return 0;
+ xen_set_restricted_virtio_memory_access();
+
if (!acpi_disabled)
xen_acpi_guest_init();
else
unsigned long __pfn_to_mfn(unsigned long pfn)
{
- struct rb_node *n = phys_to_mach.rb_node;
+ struct rb_node *n;
struct xen_p2m_entry *entry;
unsigned long irqflags;
read_lock_irqsave(&p2m_lock, irqflags);
+ n = phys_to_mach.rb_node;
while (n) {
entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys);
if (entry->pfn <= pfn &&
int rc;
unsigned long irqflags;
struct xen_p2m_entry *p2m_entry;
- struct rb_node *n = phys_to_mach.rb_node;
+ struct rb_node *n;
if (mfn == INVALID_P2M_ENTRY) {
write_lock_irqsave(&p2m_lock, irqflags);
+ n = phys_to_mach.rb_node;
while (n) {
p2m_entry = rb_entry(n, struct xen_p2m_entry, rbnode_phys);
if (p2m_entry->pfn <= pfn &&
/delete-node/ cpu@3;
};
+ timer {
+ compatible = "arm,armv8-timer";
+ interrupts = <GIC_PPI 13 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
+ <GIC_PPI 14 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
+ <GIC_PPI 11 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>,
+ <GIC_PPI 10 (GIC_CPU_MASK_SIMPLE(2) | IRQ_TYPE_LEVEL_LOW)>;
+ };
+
pmu {
compatible = "arm,cortex-a53-pmu";
interrupts = <GIC_SPI 9 IRQ_TYPE_LEVEL_HIGH>,
device_type = "cpu";
compatible = "brcm,brahma-b53";
reg = <0x0>;
+ enable-method = "spin-table";
+ cpu-release-addr = <0x0 0xfff8>;
next-level-cache = <&l2>;
};
interrupts = <GIC_SPI 246 IRQ_TYPE_LEVEL_HIGH>;
pinctrl-names = "default";
pinctrl-0 = <&uart0_bus>;
- clocks = <&cmu_peri CLK_GOUT_UART0_EXT_UCLK>,
- <&cmu_peri CLK_GOUT_UART0_PCLK>;
+ clocks = <&cmu_peri CLK_GOUT_UART0_PCLK>,
+ <&cmu_peri CLK_GOUT_UART0_EXT_UCLK>;
clock-names = "uart", "clk_uart_baud0";
samsung,uart-fifosize = <64>;
status = "disabled";
interrupts = <GIC_SPI 247 IRQ_TYPE_LEVEL_HIGH>;
pinctrl-names = "default";
pinctrl-0 = <&uart1_bus>;
- clocks = <&cmu_peri CLK_GOUT_UART1_EXT_UCLK>,
- <&cmu_peri CLK_GOUT_UART1_PCLK>;
+ clocks = <&cmu_peri CLK_GOUT_UART1_PCLK>,
+ <&cmu_peri CLK_GOUT_UART1_EXT_UCLK>;
clock-names = "uart", "clk_uart_baud0";
samsung,uart-fifosize = <256>;
status = "disabled";
interrupts = <GIC_SPI 279 IRQ_TYPE_LEVEL_HIGH>;
pinctrl-names = "default";
pinctrl-0 = <&uart2_bus>;
- clocks = <&cmu_peri CLK_GOUT_UART2_EXT_UCLK>,
- <&cmu_peri CLK_GOUT_UART2_PCLK>;
+ clocks = <&cmu_peri CLK_GOUT_UART2_PCLK>,
+ <&cmu_peri CLK_GOUT_UART2_EXT_UCLK>;
clock-names = "uart", "clk_uart_baud0";
samsung,uart-fifosize = <256>;
status = "disabled";
little-endian;
};
- efuse@1e80000 {
+ sfp: efuse@1e80000 {
compatible = "fsl,ls1028a-sfp";
reg = <0x0 0x1e80000 0x0 0x10000>;
+ clocks = <&clockgen QORIQ_CLK_PLATFORM_PLL
+ QORIQ_CLK_PLL_DIV(4)>;
+ clock-names = "sfp";
#address-cells = <1>;
#size-cells = <1>;
&iomuxc {
pinctrl_eqos: eqosgrp {
fsl,pins = <
- MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x3
- MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x3
- MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x91
- MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x91
- MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x91
- MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x91
- MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x91
- MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x91
- MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x1f
- MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x1f
- MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x1f
- MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x1f
- MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x1f
- MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x1f
- MX8MP_IOMUXC_SAI2_RXC__GPIO4_IO22 0x19
+ MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x2
+ MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x2
+ MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x90
+ MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x90
+ MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x90
+ MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x90
+ MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x90
+ MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x90
+ MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x16
+ MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x16
+ MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x16
+ MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x16
+ MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x16
+ MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x16
+ MX8MP_IOMUXC_SAI2_RXC__GPIO4_IO22 0x10
>;
};
pinctrl_fec: fecgrp {
fsl,pins = <
- MX8MP_IOMUXC_SAI1_RXD2__ENET1_MDC 0x3
- MX8MP_IOMUXC_SAI1_RXD3__ENET1_MDIO 0x3
- MX8MP_IOMUXC_SAI1_RXD4__ENET1_RGMII_RD0 0x91
- MX8MP_IOMUXC_SAI1_RXD5__ENET1_RGMII_RD1 0x91
- MX8MP_IOMUXC_SAI1_RXD6__ENET1_RGMII_RD2 0x91
- MX8MP_IOMUXC_SAI1_RXD7__ENET1_RGMII_RD3 0x91
- MX8MP_IOMUXC_SAI1_TXC__ENET1_RGMII_RXC 0x91
- MX8MP_IOMUXC_SAI1_TXFS__ENET1_RGMII_RX_CTL 0x91
- MX8MP_IOMUXC_SAI1_TXD0__ENET1_RGMII_TD0 0x1f
- MX8MP_IOMUXC_SAI1_TXD1__ENET1_RGMII_TD1 0x1f
- MX8MP_IOMUXC_SAI1_TXD2__ENET1_RGMII_TD2 0x1f
- MX8MP_IOMUXC_SAI1_TXD3__ENET1_RGMII_TD3 0x1f
- MX8MP_IOMUXC_SAI1_TXD4__ENET1_RGMII_TX_CTL 0x1f
- MX8MP_IOMUXC_SAI1_TXD5__ENET1_RGMII_TXC 0x1f
- MX8MP_IOMUXC_SAI1_RXD0__GPIO4_IO02 0x19
+ MX8MP_IOMUXC_SAI1_RXD2__ENET1_MDC 0x2
+ MX8MP_IOMUXC_SAI1_RXD3__ENET1_MDIO 0x2
+ MX8MP_IOMUXC_SAI1_RXD4__ENET1_RGMII_RD0 0x90
+ MX8MP_IOMUXC_SAI1_RXD5__ENET1_RGMII_RD1 0x90
+ MX8MP_IOMUXC_SAI1_RXD6__ENET1_RGMII_RD2 0x90
+ MX8MP_IOMUXC_SAI1_RXD7__ENET1_RGMII_RD3 0x90
+ MX8MP_IOMUXC_SAI1_TXC__ENET1_RGMII_RXC 0x90
+ MX8MP_IOMUXC_SAI1_TXFS__ENET1_RGMII_RX_CTL 0x90
+ MX8MP_IOMUXC_SAI1_TXD0__ENET1_RGMII_TD0 0x16
+ MX8MP_IOMUXC_SAI1_TXD1__ENET1_RGMII_TD1 0x16
+ MX8MP_IOMUXC_SAI1_TXD2__ENET1_RGMII_TD2 0x16
+ MX8MP_IOMUXC_SAI1_TXD3__ENET1_RGMII_TD3 0x16
+ MX8MP_IOMUXC_SAI1_TXD4__ENET1_RGMII_TX_CTL 0x16
+ MX8MP_IOMUXC_SAI1_TXD5__ENET1_RGMII_TXC 0x16
+ MX8MP_IOMUXC_SAI1_RXD0__GPIO4_IO02 0x10
>;
};
pinctrl_gpio_led: gpioledgrp {
fsl,pins = <
- MX8MP_IOMUXC_NAND_READY_B__GPIO3_IO16 0x19
+ MX8MP_IOMUXC_NAND_READY_B__GPIO3_IO16 0x140
>;
};
pinctrl_i2c1: i2c1grp {
fsl,pins = <
- MX8MP_IOMUXC_I2C1_SCL__I2C1_SCL 0x400001c3
- MX8MP_IOMUXC_I2C1_SDA__I2C1_SDA 0x400001c3
+ MX8MP_IOMUXC_I2C1_SCL__I2C1_SCL 0x400001c2
+ MX8MP_IOMUXC_I2C1_SDA__I2C1_SDA 0x400001c2
>;
};
pinctrl_i2c3: i2c3grp {
fsl,pins = <
- MX8MP_IOMUXC_I2C3_SCL__I2C3_SCL 0x400001c3
- MX8MP_IOMUXC_I2C3_SDA__I2C3_SDA 0x400001c3
+ MX8MP_IOMUXC_I2C3_SCL__I2C3_SCL 0x400001c2
+ MX8MP_IOMUXC_I2C3_SDA__I2C3_SDA 0x400001c2
>;
};
pinctrl_i2c5: i2c5grp {
fsl,pins = <
- MX8MP_IOMUXC_SPDIF_RX__I2C5_SDA 0x400001c3
- MX8MP_IOMUXC_SPDIF_TX__I2C5_SCL 0x400001c3
+ MX8MP_IOMUXC_SPDIF_RX__I2C5_SDA 0x400001c2
+ MX8MP_IOMUXC_SPDIF_TX__I2C5_SCL 0x400001c2
>;
};
pinctrl_reg_usdhc2_vmmc: regusdhc2vmmcgrp {
fsl,pins = <
- MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x41
+ MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x40
>;
};
pinctrl_uart2: uart2grp {
fsl,pins = <
- MX8MP_IOMUXC_UART2_RXD__UART2_DCE_RX 0x49
- MX8MP_IOMUXC_UART2_TXD__UART2_DCE_TX 0x49
+ MX8MP_IOMUXC_UART2_RXD__UART2_DCE_RX 0x140
+ MX8MP_IOMUXC_UART2_TXD__UART2_DCE_TX 0x140
>;
};
pinctrl_usb1_vbus: usb1grp {
fsl,pins = <
- MX8MP_IOMUXC_GPIO1_IO14__USB2_OTG_PWR 0x19
+ MX8MP_IOMUXC_GPIO1_IO14__USB2_OTG_PWR 0x10
>;
};
MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d0
MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d0
MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d0
- MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1
+ MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0
>;
};
MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d4
MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d4
MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d4
- MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1
+ MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0
>;
};
MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d6
MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d6
MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d6
- MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1
+ MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0
>;
};
&iomuxc {
pinctrl_eqos: eqosgrp {
fsl,pins = <
- MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x3
- MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x3
- MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x91
- MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x91
- MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x91
- MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x91
- MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x91
- MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x91
- MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x1f
- MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x1f
- MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x1f
- MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x1f
- MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x1f
- MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x1f
- MX8MP_IOMUXC_NAND_DATA01__GPIO3_IO07 0x19
+ MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x2
+ MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x2
+ MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x90
+ MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x90
+ MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x90
+ MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x90
+ MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x90
+ MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x90
+ MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x16
+ MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x16
+ MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x16
+ MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x16
+ MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x16
+ MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x16
+ MX8MP_IOMUXC_NAND_DATA01__GPIO3_IO07 0x10
>;
};
pinctrl_uart2: uart2grp {
fsl,pins = <
- MX8MP_IOMUXC_UART2_RXD__UART2_DCE_RX 0x49
- MX8MP_IOMUXC_UART2_TXD__UART2_DCE_TX 0x49
+ MX8MP_IOMUXC_UART2_RXD__UART2_DCE_RX 0x40
+ MX8MP_IOMUXC_UART2_TXD__UART2_DCE_TX 0x40
>;
};
MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d0
MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d0
MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d0
- MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1
+ MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0
>;
};
pinctrl_reg_usb1: regusb1grp {
fsl,pins = <
- MX8MP_IOMUXC_GPIO1_IO14__GPIO1_IO14 0x19
+ MX8MP_IOMUXC_GPIO1_IO14__GPIO1_IO14 0x10
>;
};
pinctrl_reg_usdhc2_vmmc: regusdhc2vmmcgrp {
fsl,pins = <
- MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x41
+ MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x40
>;
};
};
&iomuxc {
pinctrl_eqos: eqosgrp {
fsl,pins = <
- MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x3
- MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x3
- MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x91
- MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x91
- MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x91
- MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x91
- MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x91
- MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x91
- MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x1f
- MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x1f
- MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x1f
- MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x1f
- MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x1f
- MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x1f
+ MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x2
+ MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x2
+ MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x90
+ MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x90
+ MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x90
+ MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x90
+ MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x90
+ MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x90
+ MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x16
+ MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x16
+ MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x16
+ MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x16
+ MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x16
+ MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x16
MX8MP_IOMUXC_SAI1_MCLK__GPIO4_IO20 0x10
>;
};
pinctrl_i2c2: i2c2grp {
fsl,pins = <
- MX8MP_IOMUXC_I2C2_SCL__I2C2_SCL 0x400001c3
- MX8MP_IOMUXC_I2C2_SDA__I2C2_SDA 0x400001c3
+ MX8MP_IOMUXC_I2C2_SCL__I2C2_SCL 0x400001c2
+ MX8MP_IOMUXC_I2C2_SDA__I2C2_SDA 0x400001c2
>;
};
pinctrl_i2c2_gpio: i2c2gpiogrp {
fsl,pins = <
- MX8MP_IOMUXC_I2C2_SCL__GPIO5_IO16 0x1e3
- MX8MP_IOMUXC_I2C2_SDA__GPIO5_IO17 0x1e3
+ MX8MP_IOMUXC_I2C2_SCL__GPIO5_IO16 0x1e2
+ MX8MP_IOMUXC_I2C2_SDA__GPIO5_IO17 0x1e2
>;
};
pinctrl_reg_usdhc2_vmmc: regusdhc2vmmcgrp {
fsl,pins = <
- MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x41
+ MX8MP_IOMUXC_SD2_RESET_B__GPIO2_IO19 0x40
>;
};
pinctrl_uart1: uart1grp {
fsl,pins = <
- MX8MP_IOMUXC_UART1_RXD__UART1_DCE_RX 0x49
- MX8MP_IOMUXC_UART1_TXD__UART1_DCE_TX 0x49
+ MX8MP_IOMUXC_UART1_RXD__UART1_DCE_RX 0x40
+ MX8MP_IOMUXC_UART1_TXD__UART1_DCE_TX 0x40
>;
};
MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d0
MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d0
MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d0
- MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1
+ MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0
>;
};
MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d4
MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d4
MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d4
- MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1
+ MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0
>;
};
MX8MP_IOMUXC_SD2_DATA1__USDHC2_DATA1 0x1d6
MX8MP_IOMUXC_SD2_DATA2__USDHC2_DATA2 0x1d6
MX8MP_IOMUXC_SD2_DATA3__USDHC2_DATA3 0x1d6
- MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc1
+ MX8MP_IOMUXC_GPIO1_IO04__USDHC2_VSELECT 0xc0
>;
};
};
pinctrl_hog: hoggrp {
fsl,pins = <
- MX8MP_IOMUXC_GPIO1_IO09__GPIO1_IO09 0x40000041 /* DIO0 */
- MX8MP_IOMUXC_GPIO1_IO11__GPIO1_IO11 0x40000041 /* DIO1 */
- MX8MP_IOMUXC_NAND_DQS__GPIO3_IO14 0x40000041 /* M2SKT_OFF# */
- MX8MP_IOMUXC_SD2_DATA2__GPIO2_IO17 0x40000159 /* PCIE1_WDIS# */
- MX8MP_IOMUXC_SD2_DATA3__GPIO2_IO18 0x40000159 /* PCIE2_WDIS# */
- MX8MP_IOMUXC_SD2_CMD__GPIO2_IO14 0x40000159 /* PCIE3_WDIS# */
- MX8MP_IOMUXC_NAND_DATA00__GPIO3_IO06 0x40000041 /* M2SKT_RST# */
- MX8MP_IOMUXC_SAI1_TXD6__GPIO4_IO18 0x40000159 /* M2SKT_WDIS# */
- MX8MP_IOMUXC_NAND_ALE__GPIO3_IO00 0x40000159 /* M2SKT_GDIS# */
+ MX8MP_IOMUXC_GPIO1_IO09__GPIO1_IO09 0x40000040 /* DIO0 */
+ MX8MP_IOMUXC_GPIO1_IO11__GPIO1_IO11 0x40000040 /* DIO1 */
+ MX8MP_IOMUXC_NAND_DQS__GPIO3_IO14 0x40000040 /* M2SKT_OFF# */
+ MX8MP_IOMUXC_SD2_DATA2__GPIO2_IO17 0x40000150 /* PCIE1_WDIS# */
+ MX8MP_IOMUXC_SD2_DATA3__GPIO2_IO18 0x40000150 /* PCIE2_WDIS# */
+ MX8MP_IOMUXC_SD2_CMD__GPIO2_IO14 0x40000150 /* PCIE3_WDIS# */
+ MX8MP_IOMUXC_NAND_DATA00__GPIO3_IO06 0x40000040 /* M2SKT_RST# */
+ MX8MP_IOMUXC_SAI1_TXD6__GPIO4_IO18 0x40000150 /* M2SKT_WDIS# */
+ MX8MP_IOMUXC_NAND_ALE__GPIO3_IO00 0x40000150 /* M2SKT_GDIS# */
MX8MP_IOMUXC_SAI3_TXD__GPIO5_IO01 0x40000104 /* UART_TERM */
MX8MP_IOMUXC_SAI3_TXFS__GPIO4_IO31 0x40000104 /* UART_RS485 */
MX8MP_IOMUXC_SAI3_TXC__GPIO5_IO00 0x40000104 /* UART_HALF */
pinctrl_accel: accelgrp {
fsl,pins = <
- MX8MP_IOMUXC_GPIO1_IO07__GPIO1_IO07 0x159
+ MX8MP_IOMUXC_GPIO1_IO07__GPIO1_IO07 0x150
>;
};
pinctrl_eqos: eqosgrp {
fsl,pins = <
- MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x3
- MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x3
- MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x91
- MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x91
- MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x91
- MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x91
- MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x91
- MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x91
- MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x1f
- MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x1f
- MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x1f
- MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x1f
- MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x1f
- MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x1f
- MX8MP_IOMUXC_SAI3_RXD__GPIO4_IO30 0x141 /* RST# */
- MX8MP_IOMUXC_SAI3_RXFS__GPIO4_IO28 0x159 /* IRQ# */
+ MX8MP_IOMUXC_ENET_MDC__ENET_QOS_MDC 0x2
+ MX8MP_IOMUXC_ENET_MDIO__ENET_QOS_MDIO 0x2
+ MX8MP_IOMUXC_ENET_RD0__ENET_QOS_RGMII_RD0 0x90
+ MX8MP_IOMUXC_ENET_RD1__ENET_QOS_RGMII_RD1 0x90
+ MX8MP_IOMUXC_ENET_RD2__ENET_QOS_RGMII_RD2 0x90
+ MX8MP_IOMUXC_ENET_RD3__ENET_QOS_RGMII_RD3 0x90
+ MX8MP_IOMUXC_ENET_RXC__CCM_ENET_QOS_CLOCK_GENERATE_RX_CLK 0x90
+ MX8MP_IOMUXC_ENET_RX_CTL__ENET_QOS_RGMII_RX_CTL 0x90
+ MX8MP_IOMUXC_ENET_TD0__ENET_QOS_RGMII_TD0 0x16
+ MX8MP_IOMUXC_ENET_TD1__ENET_QOS_RGMII_TD1 0x16
+ MX8MP_IOMUXC_ENET_TD2__ENET_QOS_RGMII_TD2 0x16
+ MX8MP_IOMUXC_ENET_TD3__ENET_QOS_RGMII_TD3 0x16
+ MX8MP_IOMUXC_ENET_TX_CTL__ENET_QOS_RGMII_TX_CTL 0x16
+ MX8MP_IOMUXC_ENET_TXC__CCM_ENET_QOS_CLOCK_GENERATE_TX_CLK 0x16
+ MX8MP_IOMUXC_SAI3_RXD__GPIO4_IO30 0x140 /* RST# */
+ MX8MP_IOMUXC_SAI3_RXFS__GPIO4_IO28 0x150 /* IRQ# */
>;
};
pinctrl_fec: fecgrp {
fsl,pins = <
- MX8MP_IOMUXC_SAI1_RXD4__ENET1_RGMII_RD0 0x91
- MX8MP_IOMUXC_SAI1_RXD5__ENET1_RGMII_RD1 0x91
- MX8MP_IOMUXC_SAI1_RXD6__ENET1_RGMII_RD2 0x91
- MX8MP_IOMUXC_SAI1_RXD7__ENET1_RGMII_RD3 0x91
- MX8MP_IOMUXC_SAI1_TXC__ENET1_RGMII_RXC 0x91
- MX8MP_IOMUXC_SAI1_TXFS__ENET1_RGMII_RX_CTL 0x91
- MX8MP_IOMUXC_SAI1_TXD0__ENET1_RGMII_TD0 0x1f
- MX8MP_IOMUXC_SAI1_TXD1__ENET1_RGMII_TD1 0x1f
- MX8MP_IOMUXC_SAI1_TXD2__ENET1_RGMII_TD2 0x1f
- MX8MP_IOMUXC_SAI1_TXD3__ENET1_RGMII_TD3 0x1f
- MX8MP_IOMUXC_SAI1_TXD4__ENET1_RGMII_TX_CTL 0x1f
- MX8MP_IOMUXC_SAI1_TXD5__ENET1_RGMII_TXC 0x1f
- MX8MP_IOMUXC_SAI1_RXFS__ENET1_1588_EVENT0_IN 0x141
- MX8MP_IOMUXC_SAI1_RXC__ENET1_1588_EVENT0_OUT 0x141
+ MX8MP_IOMUXC_SAI1_RXD4__ENET1_RGMII_RD0 0x90
+ MX8MP_IOMUXC_SAI1_RXD5__ENET1_RGMII_RD1 0x90
+ MX8MP_IOMUXC_SAI1_RXD6__ENET1_RGMII_RD2 0x90
+ MX8MP_IOMUXC_SAI1_RXD7__ENET1_RGMII_RD3 0x90
+ MX8MP_IOMUXC_SAI1_TXC__ENET1_RGMII_RXC 0x90
+ MX8MP_IOMUXC_SAI1_TXFS__ENET1_RGMII_RX_CTL 0x90
+ MX8MP_IOMUXC_SAI1_TXD0__ENET1_RGMII_TD0 0x16
+ MX8MP_IOMUXC_SAI1_TXD1__ENET1_RGMII_TD1 0x16
+ MX8MP_IOMUXC_SAI1_TXD2__ENET1_RGMII_TD2 0x16
+ MX8MP_IOMUXC_SAI1_TXD3__ENET1_RGMII_TD3 0x16
+ MX8MP_IOMUXC_SAI1_TXD4__ENET1_RGMII_TX_CTL 0x16
+ MX8MP_IOMUXC_SAI1_TXD5__ENET1_RGMII_TXC 0x16
+ MX8MP_IOMUXC_SAI1_RXFS__ENET1_1588_EVENT0_IN 0x140
+ MX8MP_IOMUXC_SAI1_RXC__ENET1_1588_EVENT0_OUT 0x140
>;
};
pinctrl_gsc: gscgrp {
fsl,pins = <
- MX8MP_IOMUXC_SAI1_MCLK__GPIO4_IO20 0x159
+ MX8MP_IOMUXC_SAI1_MCLK__GPIO4_IO20 0x150
>;
};
pinctrl_i2c1: i2c1grp {
fsl,pins = <
- MX8MP_IOMUXC_I2C1_SCL__I2C1_SCL 0x400001c3
- MX8MP_IOMUXC_I2C1_SDA__I2C1_SDA 0x400001c3
+ MX8MP_IOMUXC_I2C1_SCL__I2C1_SCL 0x400001c2
+ MX8MP_IOMUXC_I2C1_SDA__I2C1_SDA 0x400001c2
>;
};
pinctrl_i2c2: i2c2grp {
fsl,pins = <
- MX8MP_IOMUXC_I2C2_SCL__I2C2_SCL 0x400001c3
- MX8MP_IOMUXC_I2C2_SDA__I2C2_SDA 0x400001c3
+ MX8MP_IOMUXC_I2C2_SCL__I2C2_SCL 0x400001c2
+ MX8MP_IOMUXC_I2C2_SDA__I2C2_SDA 0x400001c2
>;
};
pinctrl_i2c3: i2c3grp {
fsl,pins = <
- MX8MP_IOMUXC_I2C3_SCL__I2C3_SCL 0x400001c3
- MX8MP_IOMUXC_I2C3_SDA__I2C3_SDA 0x400001c3
+ MX8MP_IOMUXC_I2C3_SCL__I2C3_SCL 0x400001c2
+ MX8MP_IOMUXC_I2C3_SDA__I2C3_SDA 0x400001c2
>;
};
pinctrl_i2c4: i2c4grp {
fsl,pins = <
- MX8MP_IOMUXC_I2C4_SCL__I2C4_SCL 0x400001c3
- MX8MP_IOMUXC_I2C4_SDA__I2C4_SDA 0x400001c3
+ MX8MP_IOMUXC_I2C4_SCL__I2C4_SCL 0x400001c2
+ MX8MP_IOMUXC_I2C4_SDA__I2C4_SDA 0x400001c2
>;
};
pinctrl_ksz: kszgrp {
fsl,pins = <
- MX8MP_IOMUXC_SAI3_RXC__GPIO4_IO29 0x159 /* IRQ# */
- MX8MP_IOMUXC_SAI3_MCLK__GPIO5_IO02 0x141 /* RST# */
+ MX8MP_IOMUXC_SAI3_RXC__GPIO4_IO29 0x150 /* IRQ# */
+ MX8MP_IOMUXC_SAI3_MCLK__GPIO5_IO02 0x140 /* RST# */
>;
};
pinctrl_gpio_leds: ledgrp {
fsl,pins = <
- MX8MP_IOMUXC_SD2_DATA0__GPIO2_IO15 0x19
- MX8MP_IOMUXC_SD2_DATA1__GPIO2_IO16 0x19
+ MX8MP_IOMUXC_SD2_DATA0__GPIO2_IO15 0x10
+ MX8MP_IOMUXC_SD2_DATA1__GPIO2_IO16 0x10
>;
};
pinctrl_pmic: pmicgrp {
fsl,pins = <
- MX8MP_IOMUXC_NAND_DATA01__GPIO3_IO07 0x141
+ MX8MP_IOMUXC_NAND_DATA01__GPIO3_IO07 0x140
>;
};
pinctrl_pps: ppsgrp {
fsl,pins = <
- MX8MP_IOMUXC_GPIO1_IO12__GPIO1_IO12 0x141
+ MX8MP_IOMUXC_GPIO1_IO12__GPIO1_IO12 0x140
>;
};
pinctrl_reg_usb2: regusb2grp {
fsl,pins = <
- MX8MP_IOMUXC_GPIO1_IO06__GPIO1_IO06 0x141
+ MX8MP_IOMUXC_GPIO1_IO06__GPIO1_IO06 0x140
>;
};
pinctrl_reg_wifi: regwifigrp {
fsl,pins = <
- MX8MP_IOMUXC_NAND_DATA03__GPIO3_IO09 0x119
+ MX8MP_IOMUXC_NAND_DATA03__GPIO3_IO09 0x110
>;
};
pinctrl_uart3_gpio: uart3gpiogrp {
fsl,pins = <
- MX8MP_IOMUXC_NAND_DATA02__GPIO3_IO08 0x119
+ MX8MP_IOMUXC_NAND_DATA02__GPIO3_IO08 0x110
>;
};
pgc_ispdwp: power-domain@18 {
#power-domain-cells = <0>;
reg = <IMX8MP_POWER_DOMAIN_MEDIAMIX_ISPDWP>;
- clocks = <&clk IMX8MP_CLK_MEDIA_ISP_DIV>;
+ clocks = <&clk IMX8MP_CLK_MEDIA_ISP_ROOT>;
};
};
};
};
};
- soc {
+ soc@0 {
compatible = "simple-bus";
#address-cells = <1>;
#size-cells = <1>;
vdd_l17_29-supply = <&vph_pwr>;
vdd_l20_21-supply = <&vph_pwr>;
vdd_l25-supply = <&pm8994_s5>;
- vdd_lvs1_2 = <&pm8994_s4>;
+ vdd_lvs1_2-supply = <&pm8994_s4>;
/* S1, S2, S6 and S12 are managed by RPMPD */
vdd_l17_29-supply = <&vph_pwr>;
vdd_l20_21-supply = <&vph_pwr>;
vdd_l25-supply = <&pm8994_s5>;
- vdd_lvs1_2 = <&pm8994_s4>;
+ vdd_lvs1_2-supply = <&pm8994_s4>;
/* S1, S2, S6 and S12 are managed by RPMPD */
CPU6: cpu@102 {
device_type = "cpu";
compatible = "arm,cortex-a57";
- reg = <0x0 0x101>;
+ reg = <0x0 0x102>;
enable-method = "psci";
next-level-cache = <&L2_1>;
};
CPU7: cpu@103 {
device_type = "cpu";
compatible = "arm,cortex-a57";
- reg = <0x0 0x101>;
+ reg = <0x0 0x103>;
enable-method = "psci";
next-level-cache = <&L2_1>;
};
* Copyright 2021 Google LLC.
*/
-#include "sc7180-trogdor.dtsi"
+/* This file must be included after sc7180-trogdor.dtsi */
/ {
/* BOARD-SPECIFIC TOP LEVEL NODES */
* Copyright 2020 Google LLC.
*/
-#include "sc7180-trogdor.dtsi"
+/* This file must be included after sc7180-trogdor.dtsi */
&ap_sar_sensor {
semtech,cs0-ground;
power-domains = <&dispcc MDSS_GDSC>;
- clocks = <&gcc GCC_DISP_AHB_CLK>,
+ clocks = <&dispcc DISP_CC_MDSS_AHB_CLK>,
<&dispcc DISP_CC_MDSS_MDP_CLK>;
clock-names = "iface", "core";
reg = <0x0 0x17100000 0x0 0x10000>, /* GICD */
<0x0 0x17180000 0x0 0x200000>; /* GICR * 8 */
interrupts = <GIC_PPI 9 IRQ_TYPE_LEVEL_HIGH>;
+ #address-cells = <2>;
+ #size-cells = <2>;
+ ranges;
+
+ gic_its: msi-controller@17140000 {
+ compatible = "arm,gic-v3-its";
+ reg = <0x0 0x17140000 0x0 0x20000>;
+ msi-controller;
+ #msi-cells = <1>;
+ };
};
timer@17420000 {
iommus = <&apps_smmu 0xe0 0x0>;
- interconnects = <&aggre1_noc MASTER_UFS_MEM &mc_virt SLAVE_EBI1>,
- <&gem_noc MASTER_APPSS_PROC &config_noc SLAVE_UFS_MEM_CFG>;
+ interconnects = <&aggre1_noc MASTER_UFS_MEM 0 &mc_virt SLAVE_EBI1 0>,
+ <&gem_noc MASTER_APPSS_PROC 0 &config_noc SLAVE_UFS_MEM_CFG 0>;
interconnect-names = "ufs-ddr", "cpu-ufs";
clock-names =
"core_clk",
<&cru ACLK_VIO>,
<&cru ACLK_GIC_PRE>,
<&cru PCLK_DDR>,
- <&cru ACLK_HDCP>;
+ <&cru ACLK_HDCP>,
+ <&cru ACLK_VDU>;
assigned-clock-rates =
<600000000>, <1600000000>,
<1000000000>,
<400000000>,
<200000000>,
<200000000>,
+ <400000000>,
<400000000>;
};
<&cru HCLK_PERILP1>, <&cru PCLK_PERILP1>,
<&cru ACLK_VIO>, <&cru ACLK_HDCP>,
<&cru ACLK_GIC_PRE>,
- <&cru PCLK_DDR>;
+ <&cru PCLK_DDR>,
+ <&cru ACLK_VDU>;
assigned-clock-rates =
<594000000>, <800000000>,
<1000000000>,
<100000000>, <50000000>,
<400000000>, <400000000>,
<200000000>,
- <200000000>;
+ <200000000>,
+ <400000000>;
};
grf: syscon@ff770000 {
};
&usb_host0_xhci {
+ dr_mode = "host";
status = "okay";
};
assigned-clocks = <&cru SCLK_GMAC1_RX_TX>, <&cru SCLK_GMAC1_RGMII_SPEED>, <&cru SCLK_GMAC1>;
assigned-clock-parents = <&cru SCLK_GMAC1_RGMII_SPEED>, <&cru SCLK_GMAC1>, <&gmac1_clkin>;
clock_in_out = "input";
- phy-mode = "rgmii-id";
+ phy-mode = "rgmii";
phy-supply = <&vcc_3v3>;
pinctrl-names = "default";
pinctrl-0 = <&gmac1m1_miim
clock-names = "clk_ahb", "clk_xin";
mmc-ddr-1_8v;
mmc-hs200-1_8v;
- mmc-hs400-1_8v;
ti,trm-icp = <0x2>;
ti,otap-del-sel-legacy = <0x0>;
ti,otap-del-sel-mmc-hs = <0x0>;
ti,otap-del-sel-ddr52 = <0x6>;
ti,otap-del-sel-hs200 = <0x7>;
- ti,otap-del-sel-hs400 = <0x4>;
};
sdhci1: mmc@fa00000 {
ranges;
#interrupt-cells = <3>;
interrupt-controller;
- reg = <0x00 0x01800000 0x00 0x200000>, /* GICD */
+ reg = <0x00 0x01800000 0x00 0x100000>, /* GICD */
<0x00 0x01900000 0x00 0x100000>, /* GICR */
<0x00 0x6f000000 0x00 0x2000>, /* GICC */
<0x00 0x6f010000 0x00 0x1000>, /* GICH */
struct arch_timer_cpu timer_cpu;
struct kvm_pmu pmu;
- /*
- * Anything that is not used directly from assembly code goes
- * here.
- */
-
/*
* Guest registers we preserve during guest debugging.
*
#define ID_AA64SMFR0_F32F32_SHIFT 32
#define ID_AA64SMFR0_FA64 0x1
-#define ID_AA64SMFR0_I16I64 0x4
+#define ID_AA64SMFR0_I16I64 0xf
#define ID_AA64SMFR0_F64F64 0x1
-#define ID_AA64SMFR0_I8I32 0x4
+#define ID_AA64SMFR0_I8I32 0xf
#define ID_AA64SMFR0_F16F32 0x1
#define ID_AA64SMFR0_B16F32 0x1
#define ID_AA64SMFR0_F32F32 0x1
/*
* Code only run in VHE/NVHE hyp context can assume VHE is present or
* absent. Otherwise fall back to caps.
+ * This allows the compiler to discard VHE-specific code from the
+ * nVHE object, reducing the number of external symbol references
+ * needed to link.
*/
if (is_vhe_hyp_code())
return true;
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <xen/arm/xen-ops.h>
#ifdef CONFIG_KVM
static bool is_kvm_protected_mode(const struct arm64_cpu_capabilities *entry, int __unused)
{
- if (kvm_get_mode() != KVM_MODE_PROTECTED)
- return false;
-
- if (is_kernel_in_hyp_mode()) {
- pr_warn("Protected KVM not available with VHE\n");
- return false;
- }
-
- return true;
+ return kvm_get_mode() == KVM_MODE_PROTECTED;
}
#endif /* CONFIG_KVM */
WARN_ON(num >= MAX_CPU_FEATURES);
elf_hwcap |= BIT(num);
}
-EXPORT_SYMBOL_GPL(cpu_set_feature);
bool cpu_have_feature(unsigned int num)
{
* x19-x29 per the AAPCS, and we created frame records upon entry, so we need
* to restore x0-x8, x29, and x30.
*/
-ftrace_common_return:
/* Restore function arguments */
ldp x0, x1, [sp]
ldp x2, x3, [sp, #S_X2]
* trapping to the kernel.
*
* When stored, Z0-Z31 (incorporating Vn in bits[127:0] or the
- * corresponding Zn), P0-P15 and FFR are encoded in in
+ * corresponding Zn), P0-P15 and FFR are encoded in
* task->thread.sve_state, formatted appropriately for vector
* length task->thread.sve_vl or, if SVCR.SM is set,
* task->thread.sme_vl.
if (system_supports_sme()) {
svcr = read_sysreg_s(SYS_SVCR);
- if (!system_supports_fa64())
- ffr = svcr & SVCR_SM_MASK;
+ __this_cpu_write(efi_sm_state,
+ svcr & SVCR_SM_MASK);
- __this_cpu_write(efi_sm_state, ffr);
+ /*
+ * Unless we have FA64 FFR does not
+ * exist in streaming mode.
+ */
+ if (!system_supports_fa64())
+ ffr = !(svcr & SVCR_SM_MASK);
}
sve_save_state(sve_state + sve_ffr_offset(sve_max_vl()),
sysreg_clear_set_s(SYS_SVCR,
0,
SVCR_SM_MASK);
+
+ /*
+ * Unless we have FA64 FFR does not
+ * exist in streaming mode.
+ */
if (!system_supports_fa64())
- ffr = efi_sm_state;
+ ffr = false;
}
}
}
/*
- * Turn on the call to ftrace_caller() in instrumented function
+ * Find the address the callsite must branch to in order to reach '*addr'.
+ *
+ * Due to the limited range of 'BL' instructions, modules may be placed too far
+ * away to branch directly and must use a PLT.
+ *
+ * Returns true when '*addr' contains a reachable target address, or has been
+ * modified to contain a PLT address. Returns false otherwise.
*/
-int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+static bool ftrace_find_callable_addr(struct dyn_ftrace *rec,
+ struct module *mod,
+ unsigned long *addr)
{
unsigned long pc = rec->ip;
- u32 old, new;
- long offset = (long)pc - (long)addr;
+ long offset = (long)*addr - (long)pc;
+ struct plt_entry *plt;
- if (offset < -SZ_128M || offset >= SZ_128M) {
- struct module *mod;
- struct plt_entry *plt;
+ /*
+ * When the target is within range of the 'BL' instruction, use 'addr'
+ * as-is and branch to that directly.
+ */
+ if (offset >= -SZ_128M && offset < SZ_128M)
+ return true;
- if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
- return -EINVAL;
+ /*
+ * When the target is outside of the range of a 'BL' instruction, we
+ * must use a PLT to reach it. We can only place PLTs for modules, and
+ * only when module PLT support is built-in.
+ */
+ if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
+ return false;
- /*
- * On kernels that support module PLTs, the offset between the
- * branch instruction and its target may legally exceed the
- * range of an ordinary relative 'bl' opcode. In this case, we
- * need to branch via a trampoline in the module.
- *
- * NOTE: __module_text_address() must be called with preemption
- * disabled, but we can rely on ftrace_lock to ensure that 'mod'
- * retains its validity throughout the remainder of this code.
- */
+ /*
+ * 'mod' is only set at module load time, but if we end up
+ * dealing with an out-of-range condition, we can assume it
+ * is due to a module being loaded far away from the kernel.
+ *
+ * NOTE: __module_text_address() must be called with preemption
+ * disabled, but we can rely on ftrace_lock to ensure that 'mod'
+ * retains its validity throughout the remainder of this code.
+ */
+ if (!mod) {
preempt_disable();
mod = __module_text_address(pc);
preempt_enable();
+ }
- if (WARN_ON(!mod))
- return -EINVAL;
+ if (WARN_ON(!mod))
+ return false;
- plt = get_ftrace_plt(mod, addr);
- if (!plt) {
- pr_err("ftrace: no module PLT for %ps\n", (void *)addr);
- return -EINVAL;
- }
-
- addr = (unsigned long)plt;
+ plt = get_ftrace_plt(mod, *addr);
+ if (!plt) {
+ pr_err("ftrace: no module PLT for %ps\n", (void *)*addr);
+ return false;
}
+ *addr = (unsigned long)plt;
+ return true;
+}
+
+/*
+ * Turn on the call to ftrace_caller() in instrumented function
+ */
+int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
+{
+ unsigned long pc = rec->ip;
+ u32 old, new;
+
+ if (!ftrace_find_callable_addr(rec, NULL, &addr))
+ return -EINVAL;
+
old = aarch64_insn_gen_nop();
new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
unsigned long pc = rec->ip;
u32 old, new;
+ if (!ftrace_find_callable_addr(rec, NULL, &old_addr))
+ return -EINVAL;
+ if (!ftrace_find_callable_addr(rec, NULL, &addr))
+ return -EINVAL;
+
old = aarch64_insn_gen_branch_imm(pc, old_addr,
AARCH64_INSN_BRANCH_LINK);
new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
unsigned long addr)
{
unsigned long pc = rec->ip;
- bool validate = true;
u32 old = 0, new;
- long offset = (long)pc - (long)addr;
- if (offset < -SZ_128M || offset >= SZ_128M) {
- u32 replaced;
-
- if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS))
- return -EINVAL;
-
- /*
- * 'mod' is only set at module load time, but if we end up
- * dealing with an out-of-range condition, we can assume it
- * is due to a module being loaded far away from the kernel.
- */
- if (!mod) {
- preempt_disable();
- mod = __module_text_address(pc);
- preempt_enable();
-
- if (WARN_ON(!mod))
- return -EINVAL;
- }
-
- /*
- * The instruction we are about to patch may be a branch and
- * link instruction that was redirected via a PLT entry. In
- * this case, the normal validation will fail, but we can at
- * least check that we are dealing with a branch and link
- * instruction that points into the right module.
- */
- if (aarch64_insn_read((void *)pc, &replaced))
- return -EFAULT;
-
- if (!aarch64_insn_is_bl(replaced) ||
- !within_module(pc + aarch64_get_branch_offset(replaced),
- mod))
- return -EINVAL;
-
- validate = false;
- } else {
- old = aarch64_insn_gen_branch_imm(pc, addr,
- AARCH64_INSN_BRANCH_LINK);
- }
+ if (!ftrace_find_callable_addr(rec, mod, &addr))
+ return -EINVAL;
+ old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK);
new = aarch64_insn_gen_nop();
- return ftrace_modify_code(pc, old, new, validate);
+ return ftrace_modify_code(pc, old, new, true);
}
void arch_ftrace_update_code(int command)
SYS_GCR_EL1);
}
+#ifdef CONFIG_KASAN_HW_TAGS
+/* Only called from assembly, silence sparse */
+void __init kasan_hw_tags_enable(struct alt_instr *alt, __le32 *origptr,
+ __le32 *updptr, int nr_inst);
+
void __init kasan_hw_tags_enable(struct alt_instr *alt, __le32 *origptr,
__le32 *updptr, int nr_inst)
{
if (kasan_hw_tags_enabled())
*updptr = cpu_to_le32(aarch64_insn_gen_nop());
}
+#endif
void mte_thread_init_user(void)
{
early_fixmap_init();
early_ioremap_init();
+ setup_machine_fdt(__fdt_pointer);
+
/*
* Initialise the static keys early as they may be enabled by the
- * cpufeature code, early parameters, and DT setup.
+ * cpufeature code and early parameters.
*/
jump_label_init();
-
- setup_machine_fdt(__fdt_pointer);
-
parse_early_param();
/*
struct kvm_vcpu *vcpu = kvm_get_running_vcpu();
struct arch_timer_context *timer;
+ if (WARN(!vcpu, "No vcpu context!\n"))
+ return false;
+
if (vintid == vcpu_vtimer(vcpu)->irq.irq)
timer = vcpu_vtimer(vcpu);
else if (vintid == vcpu_ptimer(vcpu)->irq.irq)
if (ret)
goto out_free_stage2_pgd;
- if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL))
+ if (!zalloc_cpumask_var(&kvm->arch.supported_cpus, GFP_KERNEL)) {
+ ret = -ENOMEM;
goto out_free_stage2_pgd;
+ }
cpumask_copy(kvm->arch.supported_cpus, cpu_possible_mask);
kvm_vgic_early_init(kvm);
return 0;
/*
- * Exclude HYP BSS from kmemleak so that it doesn't get peeked
- * at, which would end badly once the section is inaccessible.
- * None of other sections should ever be introspected.
+ * Exclude HYP sections from kmemleak so that they don't get peeked
+ * at, which would end badly once inaccessible.
*/
kmemleak_free_part(__hyp_bss_start, __hyp_bss_end - __hyp_bss_start);
+ kmemleak_free_part(__va(hyp_mem_base), hyp_mem_size);
return pkvm_drop_host_privileges();
}
return -EINVAL;
if (strcmp(arg, "protected") == 0) {
- kvm_mode = KVM_MODE_PROTECTED;
+ if (!is_kernel_in_hyp_mode())
+ kvm_mode = KVM_MODE_PROTECTED;
+ else
+ pr_warn_once("Protected KVM not available with VHE\n");
+
return 0;
}
vcpu->arch.flags &= ~KVM_ARM64_FP_ENABLED;
vcpu->arch.flags |= KVM_ARM64_FP_HOST;
+ vcpu->arch.flags &= ~KVM_ARM64_HOST_SVE_ENABLED;
if (read_sysreg(cpacr_el1) & CPACR_EL1_ZEN_EL0EN)
vcpu->arch.flags |= KVM_ARM64_HOST_SVE_ENABLED;
* operations. Do this for ZA as well for now for simplicity.
*/
if (system_supports_sme()) {
+ vcpu->arch.flags &= ~KVM_ARM64_HOST_SME_ENABLED;
if (read_sysreg(cpacr_el1) & CPACR_EL1_SMEN_EL0EN)
vcpu->arch.flags |= KVM_ARM64_HOST_SME_ENABLED;
int host_stage2_idmap_locked(phys_addr_t addr, u64 size,
enum kvm_pgtable_prot prot)
{
- hyp_assert_lock_held(&host_kvm.lock);
-
return host_stage2_try(__host_stage2_idmap, addr, addr + size, prot);
}
int host_stage2_set_owner_locked(phys_addr_t addr, u64 size, u8 owner_id)
{
- hyp_assert_lock_held(&host_kvm.lock);
-
return host_stage2_try(kvm_pgtable_stage2_set_owner, &host_kvm.pgt,
addr, size, &host_s2_pool, owner_id);
}
case SYS_ID_AA64MMFR2_EL1:
return get_pvm_id_aa64mmfr2(vcpu);
default:
- /*
- * Should never happen because all cases are covered in
- * pvm_sys_reg_descs[].
- */
- WARN_ON(1);
- break;
+ /* Unhandled ID register, RAZ */
+ return 0;
}
-
- return 0;
}
static u64 read_id_reg(const struct kvm_vcpu *vcpu,
/* Mark the specified system register as an AArch64 feature id register. */
#define AARCH64(REG) { SYS_DESC(REG), .access = pvm_access_id_aarch64 }
+/*
+ * sys_reg_desc initialiser for architecturally unallocated cpufeature ID
+ * register with encoding Op0=3, Op1=0, CRn=0, CRm=crm, Op2=op2
+ * (1 <= crm < 8, 0 <= Op2 < 8).
+ */
+#define ID_UNALLOCATED(crm, op2) { \
+ Op0(3), Op1(0), CRn(0), CRm(crm), Op2(op2), \
+ .access = pvm_access_id_aarch64, \
+}
+
/* Mark the specified system register as Read-As-Zero/Write-Ignored */
#define RAZ_WI(REG) { SYS_DESC(REG), .access = pvm_access_raz_wi }
AARCH32(SYS_MVFR0_EL1),
AARCH32(SYS_MVFR1_EL1),
AARCH32(SYS_MVFR2_EL1),
+ ID_UNALLOCATED(3,3),
AARCH32(SYS_ID_PFR2_EL1),
AARCH32(SYS_ID_DFR1_EL1),
AARCH32(SYS_ID_MMFR5_EL1),
+ ID_UNALLOCATED(3,7),
/* AArch64 ID registers */
/* CRm=4 */
AARCH64(SYS_ID_AA64PFR0_EL1),
AARCH64(SYS_ID_AA64PFR1_EL1),
+ ID_UNALLOCATED(4,2),
+ ID_UNALLOCATED(4,3),
AARCH64(SYS_ID_AA64ZFR0_EL1),
+ ID_UNALLOCATED(4,5),
+ ID_UNALLOCATED(4,6),
+ ID_UNALLOCATED(4,7),
AARCH64(SYS_ID_AA64DFR0_EL1),
AARCH64(SYS_ID_AA64DFR1_EL1),
+ ID_UNALLOCATED(5,2),
+ ID_UNALLOCATED(5,3),
AARCH64(SYS_ID_AA64AFR0_EL1),
AARCH64(SYS_ID_AA64AFR1_EL1),
+ ID_UNALLOCATED(5,6),
+ ID_UNALLOCATED(5,7),
AARCH64(SYS_ID_AA64ISAR0_EL1),
AARCH64(SYS_ID_AA64ISAR1_EL1),
+ AARCH64(SYS_ID_AA64ISAR2_EL1),
+ ID_UNALLOCATED(6,3),
+ ID_UNALLOCATED(6,4),
+ ID_UNALLOCATED(6,5),
+ ID_UNALLOCATED(6,6),
+ ID_UNALLOCATED(6,7),
AARCH64(SYS_ID_AA64MMFR0_EL1),
AARCH64(SYS_ID_AA64MMFR1_EL1),
AARCH64(SYS_ID_AA64MMFR2_EL1),
+ ID_UNALLOCATED(7,3),
+ ID_UNALLOCATED(7,4),
+ ID_UNALLOCATED(7,5),
+ ID_UNALLOCATED(7,6),
+ ID_UNALLOCATED(7,7),
/* Scalable Vector Registers are restricted. */
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET,
vgic_mmio_read_pending, vgic_mmio_write_spending,
- NULL, vgic_uaccess_write_spending, 1,
+ vgic_uaccess_read_pending, vgic_uaccess_write_spending, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR,
vgic_mmio_read_pending, vgic_mmio_write_cpending,
- NULL, vgic_uaccess_write_cpending, 1,
+ vgic_uaccess_read_pending, vgic_uaccess_write_cpending, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET,
vgic_mmio_read_active, vgic_mmio_write_sactive,
return 0;
}
-static unsigned long vgic_v3_uaccess_read_pending(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
-{
- u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
- u32 value = 0;
- int i;
-
- /*
- * pending state of interrupt is latched in pending_latch variable.
- * Userspace will save and restore pending state and line_level
- * separately.
- * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst
- * for handling of ISPENDR and ICPENDR.
- */
- for (i = 0; i < len * 8; i++) {
- struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i);
- bool state = irq->pending_latch;
-
- if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
- int err;
-
- err = irq_get_irqchip_state(irq->host_irq,
- IRQCHIP_STATE_PENDING,
- &state);
- WARN_ON(err);
- }
-
- if (state)
- value |= (1U << i);
-
- vgic_put_irq(vcpu->kvm, irq);
- }
-
- return value;
-}
-
static int vgic_v3_uaccess_write_pending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val)
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISPENDR,
vgic_mmio_read_pending, vgic_mmio_write_spending,
- vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1,
+ vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 1,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICPENDR,
vgic_mmio_read_pending, vgic_mmio_write_cpending,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ISPENDR0,
vgic_mmio_read_pending, vgic_mmio_write_spending,
- vgic_v3_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4,
+ vgic_uaccess_read_pending, vgic_v3_uaccess_write_pending, 4,
VGIC_ACCESS_32bit),
REGISTER_DESC_WITH_LENGTH_UACCESS(SZ_64K + GICR_ICPENDR0,
vgic_mmio_read_pending, vgic_mmio_write_cpending,
return 0;
}
-unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
- gpa_t addr, unsigned int len)
+static unsigned long __read_pending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len,
+ bool is_user)
{
u32 intid = VGIC_ADDR_TO_INTID(addr, 1);
u32 value = 0;
unsigned long flags;
bool val;
+ /*
+ * When used from userspace with a GICv3 model:
+ *
+ * Pending state of interrupt is latched in pending_latch
+ * variable. Userspace will save and restore pending state
+ * and line_level separately.
+ * Refer to Documentation/virt/kvm/devices/arm-vgic-v3.rst
+ * for handling of ISPENDR and ICPENDR.
+ */
raw_spin_lock_irqsave(&irq->irq_lock, flags);
if (irq->hw && vgic_irq_is_sgi(irq->intid)) {
int err;
IRQCHIP_STATE_PENDING,
&val);
WARN_RATELIMIT(err, "IRQ %d", irq->host_irq);
- } else if (vgic_irq_is_mapped_level(irq)) {
+ } else if (!is_user && vgic_irq_is_mapped_level(irq)) {
val = vgic_get_phys_line_level(irq);
} else {
- val = irq_is_pending(irq);
+ switch (vcpu->kvm->arch.vgic.vgic_model) {
+ case KVM_DEV_TYPE_ARM_VGIC_V3:
+ if (is_user) {
+ val = irq->pending_latch;
+ break;
+ }
+ fallthrough;
+ default:
+ val = irq_is_pending(irq);
+ break;
+ }
}
value |= ((u32)val << i);
return value;
}
+unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ return __read_pending(vcpu, addr, len, false);
+}
+
+unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len)
+{
+ return __read_pending(vcpu, addr, len, true);
+}
+
static bool is_vgic_v2_sgi(struct kvm_vcpu *vcpu, struct vgic_irq *irq)
{
return (vgic_irq_is_sgi(irq->intid) &&
unsigned long vgic_mmio_read_pending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len);
+unsigned long vgic_uaccess_read_pending(struct kvm_vcpu *vcpu,
+ gpa_t addr, unsigned int len);
+
void vgic_mmio_write_spending(struct kvm_vcpu *vcpu,
gpa_t addr, unsigned int len,
unsigned long val);
* the next context-switch, we broadcast TLB flush + I-cache
* invalidation over the inner shareable domain on rollover.
*/
- kvm_call_hyp(__kvm_flush_vm_context);
+ kvm_call_hyp(__kvm_flush_vm_context);
}
static bool check_update_reserved_vmid(u64 vmid, u64 newvmid)
*/
SYM_FUNC_START(__pi___dma_map_area)
add x1, x0, x1
- cmp w2, #DMA_FROM_DEVICE
- b.eq __pi_dcache_inval_poc
b __pi_dcache_clean_poc
SYM_FUNC_END(__pi___dma_map_area)
SYM_FUNC_ALIAS(__dma_map_area, __pi___dma_map_area)
#include <linux/dma-map-ops.h>
#include <linux/dma-iommu.h>
#include <xen/xen.h>
-#include <xen/swiotlb-xen.h>
#include <asm/cacheflush.h>
+#include <asm/xen/xen-ops.h>
void arch_sync_dma_for_device(phys_addr_t paddr, size_t size,
enum dma_data_direction dir)
if (iommu)
iommu_setup_dma_ops(dev, dma_base, dma_base + size - 1);
-#ifdef CONFIG_XEN
- if (xen_swiotlb_detect())
- dev->dma_ops = &xen_swiotlb_dma_ops;
-#endif
+ xen_setup_dma_ops(dev);
}
return orig_pte;
}
+static pte_t get_clear_contig_flush(struct mm_struct *mm,
+ unsigned long addr,
+ pte_t *ptep,
+ unsigned long pgsize,
+ unsigned long ncontig)
+{
+ pte_t orig_pte = get_clear_contig(mm, addr, ptep, pgsize, ncontig);
+ struct vm_area_struct vma = TLB_FLUSH_VMA(mm, 0);
+
+ flush_tlb_range(&vma, addr, addr + (pgsize * ncontig));
+ return orig_pte;
+}
+
/*
* Changing some bits of contiguous entries requires us to follow a
* Break-Before-Make approach, breaking the whole contiguous set
int ncontig, i;
size_t pgsize = 0;
unsigned long pfn = pte_pfn(pte), dpfn;
+ struct mm_struct *mm = vma->vm_mm;
pgprot_t hugeprot;
pte_t orig_pte;
if (!pte_cont(pte))
return ptep_set_access_flags(vma, addr, ptep, pte, dirty);
- ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
+ ncontig = find_num_contig(mm, addr, ptep, &pgsize);
dpfn = pgsize >> PAGE_SHIFT;
if (!__cont_access_flags_changed(ptep, pte, ncontig))
return 0;
- orig_pte = get_clear_contig(vma->vm_mm, addr, ptep, pgsize, ncontig);
+ orig_pte = get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig);
/* Make sure we don't lose the dirty or young state */
if (pte_dirty(orig_pte))
hugeprot = pte_pgprot(pte);
for (i = 0; i < ncontig; i++, ptep++, addr += pgsize, pfn += dpfn)
- set_pte_at(vma->vm_mm, addr, ptep, pfn_pte(pfn, hugeprot));
+ set_pte_at(mm, addr, ptep, pfn_pte(pfn, hugeprot));
return 1;
}
ncontig = find_num_contig(mm, addr, ptep, &pgsize);
dpfn = pgsize >> PAGE_SHIFT;
- pte = get_clear_contig(mm, addr, ptep, pgsize, ncontig);
+ pte = get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig);
pte = pte_wrprotect(pte);
hugeprot = pte_pgprot(pte);
pte_t huge_ptep_clear_flush(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep)
{
+ struct mm_struct *mm = vma->vm_mm;
size_t pgsize;
int ncontig;
- pte_t orig_pte;
if (!pte_cont(READ_ONCE(*ptep)))
return ptep_clear_flush(vma, addr, ptep);
- ncontig = find_num_contig(vma->vm_mm, addr, ptep, &pgsize);
- orig_pte = get_clear_contig(vma->vm_mm, addr, ptep, pgsize, ncontig);
- flush_tlb_range(vma, addr, addr + pgsize * ncontig);
- return orig_pte;
+ ncontig = find_num_contig(mm, addr, ptep, &pgsize);
+ return get_clear_contig_flush(mm, addr, ptep, pgsize, ncontig);
}
static int __init hugetlbpage_init(void)
bpf_jit_binary_free(header);
prog->bpf_func = NULL;
prog->jited = 0;
+ prog->jited_len = 0;
goto out_off;
}
bpf_jit_binary_lock_ro(header);
next
}
-/0b[01]+/ && block = "Enum" {
+/0b[01]+/ && block == "Enum" {
expect_fields(2)
val = $1
name = $2
#define __ASM_CSKY_TLB_H
#include <asm/cacheflush.h>
-
-#define tlb_start_vma(tlb, vma) \
- do { \
- if (!(tlb)->fullmm) \
- flush_cache_range(vma, (vma)->vm_start, (vma)->vm_end); \
- } while (0)
-
-#define tlb_end_vma(tlb, vma) \
- do { \
- if (!(tlb)->fullmm) \
- flush_tlb_range(vma, (vma)->vm_start, (vma)->vm_end); \
- } while (0)
-
-#define tlb_flush(tlb) flush_tlb_mm((tlb)->mm)
-
#include <asm-generic/tlb.h>
#endif /* __ASM_CSKY_TLB_H */
select GENERIC_CMOS_UPDATE
select GENERIC_CPU_AUTOPROBE
select GENERIC_ENTRY
- select GENERIC_FIND_FIRST_BIT
select GENERIC_GETTIMEOFDAY
select GENERIC_IRQ_MULTI_HANDLER
select GENERIC_IRQ_PROBE
select GENERIC_TIME_VSYSCALL
select GPIOLIB
select HAVE_ARCH_AUDITSYSCALL
- select HAVE_ARCH_COMPILER_H
select HAVE_ARCH_MMAP_RND_BITS if MMU
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_ASM_MODVERSIONS
select HAVE_CONTEXT_TRACKING
- select HAVE_COPY_THREAD_TLS
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DMA_CONTIGUOUS
select HAVE_EXIT_THREAD
select HAVE_IOREMAP_PROT
select HAVE_IRQ_EXIT_ON_IRQ_STACK
select HAVE_IRQ_TIME_ACCOUNTING
- select HAVE_MEMBLOCK
- select HAVE_MEMBLOCK_NODE_MAP
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI
select HAVE_PERF_EVENTS
select TRACE_IRQFLAGS_SUPPORT
select USE_PERCPU_NUMA_NODE_ID
select ZONE_DMA32
+ select MMU_GATHER_MERGE_VMAS if MMU
config 32BIT
bool
config NUMA
bool "NUMA Support"
+ select SMP
select ACPI_NUMA if ACPI
help
Say Y to compile the kernel with NUMA (Non-Uniform Memory Access)
nor \dst, \src, zero
.endm
-.macro bgt r0 r1 label
- blt \r1, \r0, \label
-.endm
-
-.macro bltz r0 label
- blt \r0, zero, \label
-.endm
-
-.macro bgez r0 label
- bge \r0, zero, \label
-.endm
-
#endif /* _ASM_ASMMACRO_H */
#include <linux/types.h>
#include <asm/barrier.h>
#include <asm/cmpxchg.h>
-#include <asm/compiler.h>
#if __SIZEOF_LONG__ == 4
#define __LL "ll.w "
__asm__ __volatile__(
"1: ll.w %1, %2 # atomic_sub_if_positive\n"
" addi.w %0, %1, %3 \n"
- " or %1, %0, $zero \n"
- " blt %0, $zero, 2f \n"
+ " move %1, %0 \n"
+ " bltz %0, 2f \n"
" sc.w %1, %2 \n"
- " beq $zero, %1, 1b \n"
+ " beqz %1, 1b \n"
"2: \n"
__WEAK_LLSC_MB
- : "=&r" (result), "=&r" (temp),
- "+" GCC_OFF_SMALL_ASM() (v->counter)
+ : "=&r" (result), "=&r" (temp), "+ZC" (v->counter)
: "I" (-i));
} else {
__asm__ __volatile__(
"1: ll.w %1, %2 # atomic_sub_if_positive\n"
" sub.w %0, %1, %3 \n"
- " or %1, %0, $zero \n"
- " blt %0, $zero, 2f \n"
+ " move %1, %0 \n"
+ " bltz %0, 2f \n"
" sc.w %1, %2 \n"
- " beq $zero, %1, 1b \n"
+ " beqz %1, 1b \n"
"2: \n"
__WEAK_LLSC_MB
- : "=&r" (result), "=&r" (temp),
- "+" GCC_OFF_SMALL_ASM() (v->counter)
+ : "=&r" (result), "=&r" (temp), "+ZC" (v->counter)
: "r" (i));
}
__asm__ __volatile__(
"1: ll.d %1, %2 # atomic64_sub_if_positive \n"
" addi.d %0, %1, %3 \n"
- " or %1, %0, $zero \n"
- " blt %0, $zero, 2f \n"
+ " move %1, %0 \n"
+ " bltz %0, 2f \n"
" sc.d %1, %2 \n"
- " beq %1, $zero, 1b \n"
+ " beqz %1, 1b \n"
"2: \n"
__WEAK_LLSC_MB
- : "=&r" (result), "=&r" (temp),
- "+" GCC_OFF_SMALL_ASM() (v->counter)
+ : "=&r" (result), "=&r" (temp), "+ZC" (v->counter)
: "I" (-i));
} else {
__asm__ __volatile__(
"1: ll.d %1, %2 # atomic64_sub_if_positive \n"
" sub.d %0, %1, %3 \n"
- " or %1, %0, $zero \n"
- " blt %0, $zero, 2f \n"
+ " move %1, %0 \n"
+ " bltz %0, 2f \n"
" sc.d %1, %2 \n"
- " beq %1, $zero, 1b \n"
+ " beqz %1, 1b \n"
"2: \n"
__WEAK_LLSC_MB
- : "=&r" (result), "=&r" (temp),
- "+" GCC_OFF_SMALL_ASM() (v->counter)
+ : "=&r" (result), "=&r" (temp), "+ZC" (v->counter)
: "r" (i));
}
__asm__ __volatile__(
"sltu %0, %1, %2\n\t"
#if (__SIZEOF_LONG__ == 4)
- "sub.w %0, $r0, %0\n\t"
+ "sub.w %0, $zero, %0\n\t"
#elif (__SIZEOF_LONG__ == 8)
- "sub.d %0, $r0, %0\n\t"
+ "sub.d %0, $zero, %0\n\t"
#endif
: "=r" (mask)
: "r" (index), "r" (size)
return regs->csr_era;
}
-static inline int compute_return_era(struct pt_regs *regs)
+static inline void compute_return_era(struct pt_regs *regs)
{
regs->csr_era += 4;
- return 0;
}
#endif /* _ASM_BRANCH_H */
__asm__ __volatile__( \
"1: " ld " %0, %2 # __cmpxchg_asm \n" \
" bne %0, %z3, 2f \n" \
- " or $t0, %z4, $zero \n" \
+ " move $t0, %z4 \n" \
" " st " $t0, %1 \n" \
- " beq $zero, $t0, 1b \n" \
+ " beqz $t0, 1b \n" \
"2: \n" \
__WEAK_LLSC_MB \
: "=&r" (__ret), "=ZB"(*m) \
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2020-2022 Loongson Technology Corporation Limited
- */
-#ifndef _ASM_COMPILER_H
-#define _ASM_COMPILER_H
-
-#define GCC_OFF_SMALL_ASM() "ZC"
-
-#define LOONGARCH_ISA_LEVEL "loongarch"
-#define LOONGARCH_ISA_ARCH_LEVEL "arch=loongarch"
-#define LOONGARCH_ISA_LEVEL_RAW loongarch
-#define LOONGARCH_ISA_ARCH_LEVEL_RAW LOONGARCH_ISA_LEVEL_RAW
-
-#endif /* _ASM_COMPILER_H */
.interp_fp_abi = LOONGARCH_ABI_FP_ANY, \
}
-#define elf_read_implies_exec(ex, exec_stk) (exec_stk == EXSTACK_DEFAULT)
-
extern int arch_elf_pt_proc(void *ehdr, void *phdr, struct file *elf,
bool is_interp, struct arch_elf_state *state);
#define fcsr1 $r1
#define fcsr2 $r2
#define fcsr3 $r3
-#define vcsr16 $r16
#endif /* _ASM_FPREGDEF_H */
#include <linux/futex.h>
#include <linux/uaccess.h>
#include <asm/barrier.h>
-#include <asm/compiler.h>
#include <asm/errno.h>
#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \
"1: ll.w %1, %4 # __futex_atomic_op\n" \
" " insn " \n" \
"2: sc.w $t0, %2 \n" \
- " beq $t0, $zero, 1b \n" \
+ " beqz $t0, 1b \n" \
"3: \n" \
" .section .fixup,\"ax\" \n" \
"4: li.w %0, %6 \n" \
"# futex_atomic_cmpxchg_inatomic \n"
"1: ll.w %1, %3 \n"
" bne %1, %z4, 3f \n"
- " or $t0, %z5, $zero \n"
+ " move $t0, %z5 \n"
"2: sc.w $t0, %2 \n"
- " beq $zero, $t0, 1b \n"
+ " beqz $t0, 1b \n"
"3: \n"
__WEAK_LLSC_MB
" .section .fixup,\"ax\" \n"
" "__UA_ADDR "\t1b, 4b \n"
" "__UA_ADDR "\t2b, 4b \n"
" .previous \n"
- : "+r" (ret), "=&r" (val), "=" GCC_OFF_SMALL_ASM() (*uaddr)
- : GCC_OFF_SMALL_ASM() (*uaddr), "Jr" (oldval), "Jr" (newval),
+ : "+r" (ret), "=&r" (val), "=ZC" (*uaddr)
+ : "ZC" (*uaddr), "Jr" (oldval), "Jr" (newval),
"i" (-EFAULT)
: "memory", "t0");
unsigned int __softirq_pending;
} ____cacheline_aligned irq_cpustat_t;
-DECLARE_PER_CPU_ALIGNED(irq_cpustat_t, irq_stat);
+DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
#define __ARCH_IRQ_STAT
#include <linux/compiler.h>
#include <linux/stringify.h>
-#include <asm/compiler.h>
#include <asm/loongarch.h>
static inline void arch_local_irq_enable(void)
#include <linux/bitops.h>
#include <linux/atomic.h>
#include <asm/cmpxchg.h>
-#include <asm/compiler.h>
typedef struct {
atomic_long_t a;
#define MAX_PACKAGES 16
-/* Chip Config register of each physical cpu package */
-extern u64 loongson_chipcfg[MAX_PACKAGES];
-#define LOONGSON_CHIPCFG(id) (*(volatile u32 *)(loongson_chipcfg[id]))
-
-/* Chip Temperature register of each physical cpu package */
-extern u64 loongson_chiptemp[MAX_PACKAGES];
-#define LOONGSON_CHIPTEMP(id) (*(volatile u32 *)(loongson_chiptemp[id]))
-
-/* Freq Control register of each physical cpu package */
-extern u64 loongson_freqctrl[MAX_PACKAGES];
-#define LOONGSON_FREQCTRL(id) (*(volatile u32 *)(loongson_freqctrl[id]))
-
#define xconf_readl(addr) readl(addr)
#define xconf_readq(addr) readq(addr)
{
asm volatile (
" st.w %[v], %[hw], 0 \n"
- " ld.b $r0, %[hw], 0 \n"
+ " ld.b $zero, %[hw], 0 \n"
:
: [hw] "r" (addr), [v] "r" (val)
);
{
asm volatile (
" st.d %[v], %[hw], 0 \n"
- " ld.b $r0, %[hw], 0 \n"
+ " ld.b $zero, %[hw], 0 \n"
:
: [hw] "r" (addr), [v] "r" (val64)
);
#define _ASM_PAGE_H
#include <linux/const.h>
+#include <asm/addrspace.h>
/*
* PAGE_SHIFT determines the page size
#define __ASM_PERCPU_H
#include <asm/cmpxchg.h>
+#include <asm/loongarch.h>
/* Use r21 for fast access */
register unsigned long __my_cpu_offset __asm__("$r21");
#define kern_addr_valid(addr) (1)
+static inline unsigned long pmd_pfn(pmd_t pmd)
+{
+ return (pmd_val(pmd) & _PFN_MASK) >> _PFN_SHIFT;
+}
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/* We don't have hardware dirty/accessed bits, generic_pmdp_establish is fine.*/
return pmd;
}
-static inline unsigned long pmd_pfn(pmd_t pmd)
-{
- return (pmd_val(pmd) & _PFN_MASK) >> _PFN_SHIFT;
-}
-
static inline struct page *pmd_page(pmd_t pmd)
{
if (pmd_trans_huge(pmd))
struct loongarch_fpu {
unsigned int fcsr;
- unsigned int vcsr;
uint64_t fcc; /* 8x8 */
union fpureg fpr[NUM_FPU_REGS];
};
*/ \
.fpu = { \
.fcsr = 0, \
- .vcsr = 0, \
.fcc = 0, \
.fpr = {{{0,},},}, \
}, \
#include <linux/atomic.h>
#include <linux/bitops.h>
#include <linux/linkage.h>
-#include <linux/smp.h>
#include <linux/threads.h>
#include <linux/cpumask.h>
+extern int smp_num_siblings;
+extern int num_processors;
+extern int disabled_cpus;
+extern cpumask_t cpu_sibling_map[];
+extern cpumask_t cpu_core_map[];
+extern cpumask_t cpu_foreign_map[];
+
void loongson3_smp_setup(void);
void loongson3_prepare_cpus(unsigned int max_cpus);
void loongson3_boot_secondary(int cpu, struct task_struct *idle);
void loongson3_cpu_die(unsigned int cpu);
#endif
-#ifdef CONFIG_SMP
-
static inline void plat_smp_setup(void)
{
loongson3_smp_setup();
}
-#else /* !CONFIG_SMP */
-
-static inline void plat_smp_setup(void) { }
-
-#endif /* !CONFIG_SMP */
-
-extern int smp_num_siblings;
-extern int num_processors;
-extern int disabled_cpus;
-extern cpumask_t cpu_sibling_map[];
-extern cpumask_t cpu_core_map[];
-extern cpumask_t cpu_foreign_map[];
-
static inline int raw_smp_processor_id(void)
{
#if defined(__VDSO__)
static __always_inline void prepare_frametrace(struct pt_regs *regs)
{
__asm__ __volatile__(
- /* Save $r1 */
+ /* Save $ra */
STORE_ONE_REG(1)
- /* Use $r1 to save PC */
- "pcaddi $r1, 0\n\t"
- STR_LONG_S " $r1, %0\n\t"
- /* Restore $r1 */
- STR_LONG_L " $r1, %1, "STR_LONGSIZE"\n\t"
+ /* Use $ra to save PC */
+ "pcaddi $ra, 0\n\t"
+ STR_LONG_S " $ra, %0\n\t"
+ /* Restore $ra */
+ STR_LONG_L " $ra, %1, "STR_LONGSIZE"\n\t"
STORE_ONE_REG(2)
STORE_ONE_REG(3)
STORE_ONE_REG(4)
}
/* How to get the thread information struct from C. */
-register struct thread_info *__current_thread_info __asm__("$r2");
+register struct thread_info *__current_thread_info __asm__("$tp");
static inline struct thread_info *current_thread_info(void)
{
return __current_thread_info;
}
-register unsigned long current_stack_pointer __asm__("$r3");
+register unsigned long current_stack_pointer __asm__("$sp");
#endif /* !__ASSEMBLY__ */
#include <asm/cpu.h>
#include <asm/cpu-features.h>
-/*
- * Standard way to access the cycle counter.
- * Currently only used on SMP for scheduling.
- *
- * We know that all SMP capable CPUs have cycle counters.
- */
-
typedef unsigned long cycles_t;
#define get_cycles get_cycles
);
}
-/*
- * LoongArch doesn't need any special per-pte or per-vma handling, except
- * we need to flush cache for area to be unmapped.
- */
-#define tlb_start_vma(tlb, vma) \
- do { \
- if (!(tlb)->fullmm) \
- flush_cache_range(vma, vma->vm_start, vma->vm_end); \
- } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
#define __tlb_remove_tlb_entry(tlb, ptep, address) do { } while (0)
static void tlb_flush(struct mmu_gather *tlb);
"2: \n" \
" .section .fixup,\"ax\" \n" \
"3: li.w %0, %3 \n" \
- " or %1, $r0, $r0 \n" \
+ " move %1, $zero \n" \
" b 2b \n" \
" .previous \n" \
" .section __ex_table,\"a\" \n" \
}
}
+#ifdef CONFIG_SMP
static int set_processor_mask(u32 id, u32 flags)
{
return cpu;
}
+#endif
static void __init acpi_process_madt(void)
{
+#ifdef CONFIG_SMP
int i;
for (i = 0; i < NR_CPUS; i++) {
__cpu_number_map[i] = -1;
__cpu_logical_map[i] = -1;
}
+#endif
loongson_sysconf.nr_cpus = num_processors;
}
OFFSET(THREAD_FCSR, loongarch_fpu, fcsr);
OFFSET(THREAD_FCC, loongarch_fpu, fcc);
- OFFSET(THREAD_VCSR, loongarch_fpu, vcsr);
BLANK();
}
* Copyright (C) 2020-2022 Loongson Technology Corporation Limited
*/
#include <linux/cacheinfo.h>
+#include <asm/bootinfo.h>
+#include <asm/cpu-info.h>
/* Populates leaf and increments to next leaf */
#define populate_cache(cache, leaf, c_level, c_type) \
leaf->ways_of_associativity = c->cache.ways; \
leaf->size = c->cache.linesz * c->cache.sets * \
c->cache.ways; \
+ if (leaf->level > 2) \
+ leaf->size *= nodes_per_package; \
leaf++; \
} while (0)
int populate_cache_leaves(unsigned int cpu)
{
- int level = 1;
+ int level = 1, nodes_per_package = 1;
struct cpuinfo_loongarch *c = ¤t_cpu_data;
struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
struct cacheinfo *this_leaf = this_cpu_ci->info_list;
+ if (loongson_sysconf.nr_nodes > 1)
+ nodes_per_package = loongson_sysconf.cores_per_package
+ / loongson_sysconf.cores_per_node;
+
if (c->icache.waysize) {
populate_cache(dcache, this_leaf, level, CACHE_TYPE_DATA);
populate_cache(icache, this_leaf, level++, CACHE_TYPE_INST);
c->cputype = CPU_UNKNOWN;
c->processor_id = read_cpucfg(LOONGARCH_CPUCFG0);
- c->fpu_vers = (read_cpucfg(LOONGARCH_CPUCFG2) >> 3) & 0x3;
+ c->fpu_vers = (read_cpucfg(LOONGARCH_CPUCFG2) & CPUCFG2_FPVERS) >> 3;
c->fpu_csr0 = FPU_CSR_RN;
c->fpu_mask = FPU_CSR_RSVD;
addi.d sp, sp, -PT_SIZE
cfi_st t2, PT_R3
- cfi_rel_offset sp, PT_R3
+ cfi_rel_offset sp, PT_R3
st.d zero, sp, PT_R0
csrrd t2, LOONGARCH_CSR_PRMD
st.d t2, sp, PT_PRMD
cfi_st a7, PT_R11
csrrd ra, LOONGARCH_CSR_ERA
st.d ra, sp, PT_ERA
- cfi_rel_offset ra, PT_ERA
+ cfi_rel_offset ra, PT_ERA
cfi_st tp, PT_R2
cfi_st u0, PT_R21
struct loongson_system_configuration loongson_sysconf;
EXPORT_SYMBOL(loongson_sysconf);
-u64 loongson_chipcfg[MAX_PACKAGES];
-u64 loongson_chiptemp[MAX_PACKAGES];
-u64 loongson_freqctrl[MAX_PACKAGES];
-unsigned long long smp_group[MAX_PACKAGES];
-
-static void __init register_addrs_set(u64 *registers, const u64 addr, int num)
-{
- u64 i;
-
- for (i = 0; i < num; i++) {
- *registers = (i << 44) | addr;
- registers++;
- }
-}
-
void __init init_environ(void)
{
int efi_boot = fw_arg0;
efi_memmap_init_early(&data);
memblock_reserve(data.phys_map & PAGE_MASK,
PAGE_ALIGN(data.size + (data.phys_map & ~PAGE_MASK)));
-
- register_addrs_set(smp_group, TO_UNCACHE(0x1fe01000), 16);
- register_addrs_set(loongson_chipcfg, TO_UNCACHE(0x1fe00180), 16);
- register_addrs_set(loongson_chiptemp, TO_UNCACHE(0x1fe0019c), 16);
- register_addrs_set(loongson_freqctrl, TO_UNCACHE(0x1fe001d0), 16);
}
static int __init init_cpu_fullname(void)
.endm
.macro sc_save_fp base
- EX fst.d $f0, \base, (0 * FPU_REG_WIDTH)
- EX fst.d $f1, \base, (1 * FPU_REG_WIDTH)
- EX fst.d $f2, \base, (2 * FPU_REG_WIDTH)
- EX fst.d $f3, \base, (3 * FPU_REG_WIDTH)
- EX fst.d $f4, \base, (4 * FPU_REG_WIDTH)
- EX fst.d $f5, \base, (5 * FPU_REG_WIDTH)
- EX fst.d $f6, \base, (6 * FPU_REG_WIDTH)
- EX fst.d $f7, \base, (7 * FPU_REG_WIDTH)
- EX fst.d $f8, \base, (8 * FPU_REG_WIDTH)
- EX fst.d $f9, \base, (9 * FPU_REG_WIDTH)
- EX fst.d $f10, \base, (10 * FPU_REG_WIDTH)
- EX fst.d $f11, \base, (11 * FPU_REG_WIDTH)
- EX fst.d $f12, \base, (12 * FPU_REG_WIDTH)
- EX fst.d $f13, \base, (13 * FPU_REG_WIDTH)
- EX fst.d $f14, \base, (14 * FPU_REG_WIDTH)
- EX fst.d $f15, \base, (15 * FPU_REG_WIDTH)
- EX fst.d $f16, \base, (16 * FPU_REG_WIDTH)
- EX fst.d $f17, \base, (17 * FPU_REG_WIDTH)
- EX fst.d $f18, \base, (18 * FPU_REG_WIDTH)
- EX fst.d $f19, \base, (19 * FPU_REG_WIDTH)
- EX fst.d $f20, \base, (20 * FPU_REG_WIDTH)
- EX fst.d $f21, \base, (21 * FPU_REG_WIDTH)
- EX fst.d $f22, \base, (22 * FPU_REG_WIDTH)
- EX fst.d $f23, \base, (23 * FPU_REG_WIDTH)
- EX fst.d $f24, \base, (24 * FPU_REG_WIDTH)
- EX fst.d $f25, \base, (25 * FPU_REG_WIDTH)
- EX fst.d $f26, \base, (26 * FPU_REG_WIDTH)
- EX fst.d $f27, \base, (27 * FPU_REG_WIDTH)
- EX fst.d $f28, \base, (28 * FPU_REG_WIDTH)
- EX fst.d $f29, \base, (29 * FPU_REG_WIDTH)
- EX fst.d $f30, \base, (30 * FPU_REG_WIDTH)
- EX fst.d $f31, \base, (31 * FPU_REG_WIDTH)
+ EX fst.d $f0, \base, (0 * FPU_REG_WIDTH)
+ EX fst.d $f1, \base, (1 * FPU_REG_WIDTH)
+ EX fst.d $f2, \base, (2 * FPU_REG_WIDTH)
+ EX fst.d $f3, \base, (3 * FPU_REG_WIDTH)
+ EX fst.d $f4, \base, (4 * FPU_REG_WIDTH)
+ EX fst.d $f5, \base, (5 * FPU_REG_WIDTH)
+ EX fst.d $f6, \base, (6 * FPU_REG_WIDTH)
+ EX fst.d $f7, \base, (7 * FPU_REG_WIDTH)
+ EX fst.d $f8, \base, (8 * FPU_REG_WIDTH)
+ EX fst.d $f9, \base, (9 * FPU_REG_WIDTH)
+ EX fst.d $f10, \base, (10 * FPU_REG_WIDTH)
+ EX fst.d $f11, \base, (11 * FPU_REG_WIDTH)
+ EX fst.d $f12, \base, (12 * FPU_REG_WIDTH)
+ EX fst.d $f13, \base, (13 * FPU_REG_WIDTH)
+ EX fst.d $f14, \base, (14 * FPU_REG_WIDTH)
+ EX fst.d $f15, \base, (15 * FPU_REG_WIDTH)
+ EX fst.d $f16, \base, (16 * FPU_REG_WIDTH)
+ EX fst.d $f17, \base, (17 * FPU_REG_WIDTH)
+ EX fst.d $f18, \base, (18 * FPU_REG_WIDTH)
+ EX fst.d $f19, \base, (19 * FPU_REG_WIDTH)
+ EX fst.d $f20, \base, (20 * FPU_REG_WIDTH)
+ EX fst.d $f21, \base, (21 * FPU_REG_WIDTH)
+ EX fst.d $f22, \base, (22 * FPU_REG_WIDTH)
+ EX fst.d $f23, \base, (23 * FPU_REG_WIDTH)
+ EX fst.d $f24, \base, (24 * FPU_REG_WIDTH)
+ EX fst.d $f25, \base, (25 * FPU_REG_WIDTH)
+ EX fst.d $f26, \base, (26 * FPU_REG_WIDTH)
+ EX fst.d $f27, \base, (27 * FPU_REG_WIDTH)
+ EX fst.d $f28, \base, (28 * FPU_REG_WIDTH)
+ EX fst.d $f29, \base, (29 * FPU_REG_WIDTH)
+ EX fst.d $f30, \base, (30 * FPU_REG_WIDTH)
+ EX fst.d $f31, \base, (31 * FPU_REG_WIDTH)
.endm
.macro sc_restore_fp base
- EX fld.d $f0, \base, (0 * FPU_REG_WIDTH)
- EX fld.d $f1, \base, (1 * FPU_REG_WIDTH)
- EX fld.d $f2, \base, (2 * FPU_REG_WIDTH)
- EX fld.d $f3, \base, (3 * FPU_REG_WIDTH)
- EX fld.d $f4, \base, (4 * FPU_REG_WIDTH)
- EX fld.d $f5, \base, (5 * FPU_REG_WIDTH)
- EX fld.d $f6, \base, (6 * FPU_REG_WIDTH)
- EX fld.d $f7, \base, (7 * FPU_REG_WIDTH)
- EX fld.d $f8, \base, (8 * FPU_REG_WIDTH)
- EX fld.d $f9, \base, (9 * FPU_REG_WIDTH)
- EX fld.d $f10, \base, (10 * FPU_REG_WIDTH)
- EX fld.d $f11, \base, (11 * FPU_REG_WIDTH)
- EX fld.d $f12, \base, (12 * FPU_REG_WIDTH)
- EX fld.d $f13, \base, (13 * FPU_REG_WIDTH)
- EX fld.d $f14, \base, (14 * FPU_REG_WIDTH)
- EX fld.d $f15, \base, (15 * FPU_REG_WIDTH)
- EX fld.d $f16, \base, (16 * FPU_REG_WIDTH)
- EX fld.d $f17, \base, (17 * FPU_REG_WIDTH)
- EX fld.d $f18, \base, (18 * FPU_REG_WIDTH)
- EX fld.d $f19, \base, (19 * FPU_REG_WIDTH)
- EX fld.d $f20, \base, (20 * FPU_REG_WIDTH)
- EX fld.d $f21, \base, (21 * FPU_REG_WIDTH)
- EX fld.d $f22, \base, (22 * FPU_REG_WIDTH)
- EX fld.d $f23, \base, (23 * FPU_REG_WIDTH)
- EX fld.d $f24, \base, (24 * FPU_REG_WIDTH)
- EX fld.d $f25, \base, (25 * FPU_REG_WIDTH)
- EX fld.d $f26, \base, (26 * FPU_REG_WIDTH)
- EX fld.d $f27, \base, (27 * FPU_REG_WIDTH)
- EX fld.d $f28, \base, (28 * FPU_REG_WIDTH)
- EX fld.d $f29, \base, (29 * FPU_REG_WIDTH)
- EX fld.d $f30, \base, (30 * FPU_REG_WIDTH)
- EX fld.d $f31, \base, (31 * FPU_REG_WIDTH)
+ EX fld.d $f0, \base, (0 * FPU_REG_WIDTH)
+ EX fld.d $f1, \base, (1 * FPU_REG_WIDTH)
+ EX fld.d $f2, \base, (2 * FPU_REG_WIDTH)
+ EX fld.d $f3, \base, (3 * FPU_REG_WIDTH)
+ EX fld.d $f4, \base, (4 * FPU_REG_WIDTH)
+ EX fld.d $f5, \base, (5 * FPU_REG_WIDTH)
+ EX fld.d $f6, \base, (6 * FPU_REG_WIDTH)
+ EX fld.d $f7, \base, (7 * FPU_REG_WIDTH)
+ EX fld.d $f8, \base, (8 * FPU_REG_WIDTH)
+ EX fld.d $f9, \base, (9 * FPU_REG_WIDTH)
+ EX fld.d $f10, \base, (10 * FPU_REG_WIDTH)
+ EX fld.d $f11, \base, (11 * FPU_REG_WIDTH)
+ EX fld.d $f12, \base, (12 * FPU_REG_WIDTH)
+ EX fld.d $f13, \base, (13 * FPU_REG_WIDTH)
+ EX fld.d $f14, \base, (14 * FPU_REG_WIDTH)
+ EX fld.d $f15, \base, (15 * FPU_REG_WIDTH)
+ EX fld.d $f16, \base, (16 * FPU_REG_WIDTH)
+ EX fld.d $f17, \base, (17 * FPU_REG_WIDTH)
+ EX fld.d $f18, \base, (18 * FPU_REG_WIDTH)
+ EX fld.d $f19, \base, (19 * FPU_REG_WIDTH)
+ EX fld.d $f20, \base, (20 * FPU_REG_WIDTH)
+ EX fld.d $f21, \base, (21 * FPU_REG_WIDTH)
+ EX fld.d $f22, \base, (22 * FPU_REG_WIDTH)
+ EX fld.d $f23, \base, (23 * FPU_REG_WIDTH)
+ EX fld.d $f24, \base, (24 * FPU_REG_WIDTH)
+ EX fld.d $f25, \base, (25 * FPU_REG_WIDTH)
+ EX fld.d $f26, \base, (26 * FPU_REG_WIDTH)
+ EX fld.d $f27, \base, (27 * FPU_REG_WIDTH)
+ EX fld.d $f28, \base, (28 * FPU_REG_WIDTH)
+ EX fld.d $f29, \base, (29 * FPU_REG_WIDTH)
+ EX fld.d $f30, \base, (30 * FPU_REG_WIDTH)
+ EX fld.d $f31, \base, (31 * FPU_REG_WIDTH)
.endm
.macro sc_save_fcc base, tmp0, tmp1
movcf2gr \tmp0, $fcc0
- move \tmp1, \tmp0
+ move \tmp1, \tmp0
movcf2gr \tmp0, $fcc1
bstrins.d \tmp1, \tmp0, 15, 8
movcf2gr \tmp0, $fcc2
bstrins.d \tmp1, \tmp0, 55, 48
movcf2gr \tmp0, $fcc7
bstrins.d \tmp1, \tmp0, 63, 56
- EX st.d \tmp1, \base, 0
+ EX st.d \tmp1, \base, 0
.endm
.macro sc_restore_fcc base, tmp0, tmp1
- EX ld.d \tmp0, \base, 0
+ EX ld.d \tmp0, \base, 0
bstrpick.d \tmp1, \tmp0, 7, 0
movgr2cf $fcc0, \tmp1
bstrpick.d \tmp1, \tmp0, 15, 8
.macro sc_save_fcsr base, tmp0
movfcsr2gr \tmp0, fcsr0
- EX st.w \tmp0, \base, 0
+ EX st.w \tmp0, \base, 0
.endm
.macro sc_restore_fcsr base, tmp0
- EX ld.w \tmp0, \base, 0
+ EX ld.w \tmp0, \base, 0
movgr2fcsr fcsr0, \tmp0
.endm
- .macro sc_save_vcsr base, tmp0
- movfcsr2gr \tmp0, vcsr16
- EX st.w \tmp0, \base, 0
- .endm
-
- .macro sc_restore_vcsr base, tmp0
- EX ld.w \tmp0, \base, 0
- movgr2fcsr vcsr16, \tmp0
- .endm
-
/*
* Save a thread's fp context.
*/
SYM_FUNC_START(_save_fp)
fpu_save_csr a0 t1
- fpu_save_double a0 t1 # clobbers t1
+ fpu_save_double a0 t1 # clobbers t1
fpu_save_cc a0 t1 t2 # clobbers t1, t2
- jirl zero, ra, 0
+ jr ra
SYM_FUNC_END(_save_fp)
EXPORT_SYMBOL(_save_fp)
* Restore a thread's fp context.
*/
SYM_FUNC_START(_restore_fp)
- fpu_restore_double a0 t1 # clobbers t1
- fpu_restore_csr a0 t1
- fpu_restore_cc a0 t1 t2 # clobbers t1, t2
- jirl zero, ra, 0
+ fpu_restore_double a0 t1 # clobbers t1
+ fpu_restore_csr a0 t1
+ fpu_restore_cc a0 t1 t2 # clobbers t1, t2
+ jr ra
SYM_FUNC_END(_restore_fp)
/*
movgr2fr.d $f30, t1
movgr2fr.d $f31, t1
- jirl zero, ra, 0
+ jr ra
SYM_FUNC_END(_init_fpu)
/*
* a2: fcsr
*/
SYM_FUNC_START(_save_fp_context)
- sc_save_fcc a1 t1 t2
- sc_save_fcsr a2 t1
- sc_save_fp a0
- li.w a0, 0 # success
- jirl zero, ra, 0
+ sc_save_fcc a1 t1 t2
+ sc_save_fcsr a2 t1
+ sc_save_fp a0
+ li.w a0, 0 # success
+ jr ra
SYM_FUNC_END(_save_fp_context)
/*
* a2: fcsr
*/
SYM_FUNC_START(_restore_fp_context)
- sc_restore_fp a0
- sc_restore_fcc a1 t1 t2
- sc_restore_fcsr a2 t1
- li.w a0, 0 # success
- jirl zero, ra, 0
+ sc_restore_fp a0
+ sc_restore_fcc a1 t1 t2
+ sc_restore_fcsr a2 t1
+ li.w a0, 0 # success
+ jr ra
SYM_FUNC_END(_restore_fp_context)
SYM_FUNC_START(fault)
li.w a0, -EFAULT # failure
- jirl zero, ra, 0
+ jr ra
SYM_FUNC_END(fault)
nop
idle 0
/* end of rollback region */
-1: jirl zero, ra, 0
+1: jr ra
SYM_FUNC_END(__arch_cpu_idle)
SYM_FUNC_START(handle_vint)
BACKUP_T0T1
SAVE_ALL
la.abs t1, __arch_cpu_idle
- LONG_L t0, sp, PT_ERA
+ LONG_L t0, sp, PT_ERA
/* 32 byte rollback region */
ori t0, t0, 0x1f
xori t0, t0, 0x1f
bne t0, t1, 1f
- LONG_S t0, sp, PT_ERA
+ LONG_S t0, sp, PT_ERA
1: move a0, sp
move a1, sp
la.abs t0, do_vint
- jirl ra, t0, 0
+ jirl ra, t0, 0
RESTORE_ALL_AND_RET
SYM_FUNC_END(handle_vint)
build_prep_\prep
move a0, sp
la.abs t0, do_\handler
- jirl ra, t0, 0
+ jirl ra, t0, 0
RESTORE_ALL_AND_RET
SYM_FUNC_END(handle_\exception)
.endm
SYM_FUNC_START(handle_sys)
la.abs t0, handle_syscall
- jirl zero, t0, 0
+ jr t0
SYM_FUNC_END(handle_sys)
__REF
-SYM_ENTRY(_stext, SYM_L_GLOBAL, SYM_A_NONE)
-
SYM_CODE_START(kernel_entry) # kernel entry point
/* Config direct window and set PG */
/* We might not get launched at the address the kernel is linked to,
so we jump there. */
la.abs t0, 0f
- jirl zero, t0, 0
+ jr t0
0:
la t0, __bss_start # clear .bss
st.d zero, t0, 0
/* KSave3 used for percpu base, initialized as 0 */
csrwr zero, PERCPU_BASE_KS
/* GPR21 used for percpu base (runtime), initialized as 0 */
- or u0, zero, zero
+ move u0, zero
la tp, init_thread_union
/* Set the SP after an empty pt_regs. */
ld.d sp, t0, CPU_BOOT_STACK
ld.d tp, t0, CPU_BOOT_TINFO
- la.abs t0, 0f
- jirl zero, t0, 0
+ la.abs t0, 0f
+ jr t0
0:
bl start_secondary
SYM_CODE_END(smpboot_entry)
#include <asm/setup.h>
DEFINE_PER_CPU(unsigned long, irq_stack);
+DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
+EXPORT_PER_CPU_SYMBOL(irq_stat);
struct irq_domain *cpu_domain;
struct irq_domain *liointc_domain;
void __init init_IRQ(void)
{
- int i, r, ipi_irq;
+ int i;
+#ifdef CONFIG_SMP
+ int r, ipi_irq;
static int ipi_dummy_dev;
+#endif
unsigned int order = get_order(IRQ_STACK_SIZE);
struct page *page;
return 0;
}
-EXPORT_SYMBOL(init_numa_memory);
#endif
void __init paging_init(void)
/*
* Copy architecture-specific thread state
*/
-int copy_thread(unsigned long clone_flags, unsigned long usp,
- unsigned long kthread_arg, struct task_struct *p, unsigned long tls)
+int copy_thread(struct task_struct *p, const struct kernel_clone_args *args)
{
unsigned long childksp;
+ unsigned long tls = args->tls;
+ unsigned long usp = args->stack;
+ unsigned long clone_flags = args->flags;
struct pt_regs *childregs, *regs = current_pt_regs();
childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32;
p->thread.csr_crmd = csr_read32(LOONGARCH_CSR_CRMD);
p->thread.csr_prmd = csr_read32(LOONGARCH_CSR_PRMD);
p->thread.csr_ecfg = csr_read32(LOONGARCH_CSR_ECFG);
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(args->fn)) {
/* kernel thread */
- p->thread.reg23 = usp; /* fn */
- p->thread.reg24 = kthread_arg;
p->thread.reg03 = childksp;
- p->thread.reg01 = (unsigned long) ret_from_kernel_thread;
+ p->thread.reg23 = (unsigned long)args->fn;
+ p->thread.reg24 = (unsigned long)args->fn_arg;
+ p->thread.reg01 = (unsigned long)ret_from_kernel_thread;
memset(childregs, 0, sizeof(struct pt_regs));
childregs->csr_euen = p->thread.csr_euen;
childregs->csr_crmd = p->thread.csr_crmd;
const void *kbuf, const void __user *ubuf)
{
const int fcc_start = NUM_FPU_REGS * sizeof(elf_fpreg_t);
- const int fcc_end = fcc_start + sizeof(u64);
+ const int fcsr_start = fcc_start + sizeof(u64);
int err;
BUG_ON(count % sizeof(elf_fpreg_t));
if (err)
return err;
- if (count > 0)
- err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
- &target->thread.fpu.fcc,
- fcc_start, fcc_end);
+ err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fpu.fcc, fcc_start,
+ fcc_start + sizeof(u64));
+ err |= user_regset_copyin(&pos, &count, &kbuf, &ubuf,
+ &target->thread.fpu.fcsr, fcsr_start,
+ fcsr_start + sizeof(u32));
return err;
}
#include <linux/console.h>
#include <acpi/reboot.h>
-#include <asm/compiler.h>
#include <asm/idle.h>
#include <asm/loongarch.h>
#include <asm/reboot.h>
#include <asm/pgalloc.h>
#include <asm/sections.h>
#include <asm/setup.h>
-#include <asm/smp.h>
#include <asm/time.h>
#define SMBIOS_BIOSSIZE_OFFSET 0x09
char *dmi_data = (char *)dm;
bios_extern = *(dmi_data + SMBIOS_BIOSEXTERN_OFFSET);
- b_info.bios_size = *(dmi_data + SMBIOS_BIOSSIZE_OFFSET);
+ b_info.bios_size = (*(dmi_data + SMBIOS_BIOSSIZE_OFFSET) + 1) << 6;
if (bios_extern & LOONGSON_EFI_ENABLE)
set_bit(EFI_BOOT, &efi.flags);
nr_cpu_ids = possible;
}
-#else
-static inline void prefill_possible_map(void) {}
#endif
void __init setup_arch(char **cmdline_p)
arch_mem_init(cmdline_p);
resource_init();
+#ifdef CONFIG_SMP
plat_smp_setup();
prefill_possible_map();
+#endif
paging_init();
}
struct secondary_data cpuboot_data;
static DEFINE_PER_CPU(int, cpu_state);
-DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
-EXPORT_PER_CPU_SYMBOL(irq_stat);
enum ipi_msg_type {
IPI_RESCHEDULE,
mb();
}
-/*
- * The target CPU should go to XKPRANGE (uncached area) and flush
- * ICache/DCache/VCache before the control CPU can safely disable its clock.
- */
-static void loongson3_play_dead(int *state_addr)
+void play_dead(void)
{
- register int val;
- register void *addr;
+ register uint64_t addr;
register void (*init_fn)(void);
- __asm__ __volatile__(
- " li.d %[addr], 0x8000000000000000\n"
- "1: cacop 0x8, %[addr], 0 \n" /* flush ICache */
- " cacop 0x8, %[addr], 1 \n"
- " cacop 0x8, %[addr], 2 \n"
- " cacop 0x8, %[addr], 3 \n"
- " cacop 0x9, %[addr], 0 \n" /* flush DCache */
- " cacop 0x9, %[addr], 1 \n"
- " cacop 0x9, %[addr], 2 \n"
- " cacop 0x9, %[addr], 3 \n"
- " addi.w %[sets], %[sets], -1 \n"
- " addi.d %[addr], %[addr], 0x40 \n"
- " bnez %[sets], 1b \n"
- " li.d %[addr], 0x8000000000000000\n"
- "2: cacop 0xa, %[addr], 0 \n" /* flush VCache */
- " cacop 0xa, %[addr], 1 \n"
- " cacop 0xa, %[addr], 2 \n"
- " cacop 0xa, %[addr], 3 \n"
- " cacop 0xa, %[addr], 4 \n"
- " cacop 0xa, %[addr], 5 \n"
- " cacop 0xa, %[addr], 6 \n"
- " cacop 0xa, %[addr], 7 \n"
- " cacop 0xa, %[addr], 8 \n"
- " cacop 0xa, %[addr], 9 \n"
- " cacop 0xa, %[addr], 10 \n"
- " cacop 0xa, %[addr], 11 \n"
- " cacop 0xa, %[addr], 12 \n"
- " cacop 0xa, %[addr], 13 \n"
- " cacop 0xa, %[addr], 14 \n"
- " cacop 0xa, %[addr], 15 \n"
- " addi.w %[vsets], %[vsets], -1 \n"
- " addi.d %[addr], %[addr], 0x40 \n"
- " bnez %[vsets], 2b \n"
- " li.w %[val], 0x7 \n" /* *state_addr = CPU_DEAD; */
- " st.w %[val], %[state_addr], 0 \n"
- " dbar 0 \n"
- " cacop 0x11, %[state_addr], 0 \n" /* flush entry of *state_addr */
- : [addr] "=&r" (addr), [val] "=&r" (val)
- : [state_addr] "r" (state_addr),
- [sets] "r" (cpu_data[smp_processor_id()].dcache.sets),
- [vsets] "r" (cpu_data[smp_processor_id()].vcache.sets));
-
+ idle_task_exit();
local_irq_enable();
- change_csr_ecfg(ECFG0_IM, ECFGF_IPI);
+ set_csr_ecfg(ECFGF_IPI);
+ __this_cpu_write(cpu_state, CPU_DEAD);
+
+ __smp_mb();
+ do {
+ __asm__ __volatile__("idle 0\n\t");
+ addr = iocsr_read64(LOONGARCH_IOCSR_MBUF0);
+ } while (addr == 0);
- __asm__ __volatile__(
- " idle 0 \n"
- " li.w $t0, 0x1020 \n"
- " iocsrrd.d %[init_fn], $t0 \n" /* Get init PC */
- : [init_fn] "=&r" (addr)
- : /* No Input */
- : "a0");
- init_fn = __va(addr);
+ init_fn = (void *)TO_CACHE(addr);
+ iocsr_write32(0xffffffff, LOONGARCH_IOCSR_IPI_CLEAR);
init_fn();
unreachable();
}
-void play_dead(void)
-{
- int *state_addr;
- unsigned int cpu = smp_processor_id();
- void (*play_dead_uncached)(int *s);
-
- idle_task_exit();
- play_dead_uncached = (void *)TO_UNCACHE(__pa((unsigned long)loongson3_play_dead));
- state_addr = &per_cpu(cpu_state, cpu);
- mb();
- play_dead_uncached(state_addr);
-}
-
-static int loongson3_enable_clock(unsigned int cpu)
-{
- uint64_t core_id = cpu_data[cpu].core;
- uint64_t package_id = cpu_data[cpu].package;
-
- LOONGSON_FREQCTRL(package_id) |= 1 << (core_id * 4 + 3);
-
- return 0;
-}
-
-static int loongson3_disable_clock(unsigned int cpu)
-{
- uint64_t core_id = cpu_data[cpu].core;
- uint64_t package_id = cpu_data[cpu].package;
-
- LOONGSON_FREQCTRL(package_id) &= ~(1 << (core_id * 4 + 3));
-
- return 0;
-}
-
-static int register_loongson3_notifier(void)
-{
- return cpuhp_setup_state_nocalls(CPUHP_LOONGARCH_SOC_PREPARE,
- "loongarch/loongson:prepare",
- loongson3_enable_clock,
- loongson3_disable_clock);
-}
-early_initcall(register_loongson3_notifier);
-
#endif
/*
move tp, a2
cpu_restore_nonscratch a1
- li.w t0, _THREAD_SIZE - 32
- PTR_ADD t0, t0, tp
+ li.w t0, _THREAD_SIZE - 32
+ PTR_ADD t0, t0, tp
set_saved_sp t0, t1, t2
ldptr.d t1, a1, THREAD_CSRPRMD
die_if_kernel("Reserved instruction in kernel code", regs);
- if (unlikely(compute_return_era(regs) < 0))
- goto out;
+ compute_return_era(regs);
if (unlikely(get_user(opcode, era) < 0)) {
status = SIGSEGV;
HEAD_TEXT_SECTION
. = ALIGN(PECOFF_SEGMENT_ALIGN);
+ _stext = .;
.text : {
TEXT_TEXT
SCHED_TEXT
STABS_DEBUG
DWARF_DEBUG
+ ELF_DETAILS
.gptab.sdata : {
*(.gptab.data)
1: st.b zero, a0, 0
addi.d a0, a0, 1
addi.d a1, a1, -1
- bgt a1, zero, 1b
+ bgtz a1, 1b
2: move a0, a1
jr ra
addi.d a0, a0, 1
addi.d a1, a1, 1
addi.d a2, a2, -1
- bgt a2, zero, 1b
+ bgtz a2, 1b
3: move a0, a2
jr ra
#include <linux/smp.h>
#include <linux/timex.h>
-#include <asm/compiler.h>
#include <asm/processor.h>
void __delay(unsigned long cycles)
.align 5
SYM_FUNC_START(clear_page)
- lu12i.w t0, 1 << (PAGE_SHIFT - 12)
- add.d t0, t0, a0
+ lu12i.w t0, 1 << (PAGE_SHIFT - 12)
+ add.d t0, t0, a0
1:
- st.d zero, a0, 0
- st.d zero, a0, 8
- st.d zero, a0, 16
- st.d zero, a0, 24
- st.d zero, a0, 32
- st.d zero, a0, 40
- st.d zero, a0, 48
- st.d zero, a0, 56
- addi.d a0, a0, 128
- st.d zero, a0, -64
- st.d zero, a0, -56
- st.d zero, a0, -48
- st.d zero, a0, -40
- st.d zero, a0, -32
- st.d zero, a0, -24
- st.d zero, a0, -16
- st.d zero, a0, -8
- bne t0, a0, 1b
+ st.d zero, a0, 0
+ st.d zero, a0, 8
+ st.d zero, a0, 16
+ st.d zero, a0, 24
+ st.d zero, a0, 32
+ st.d zero, a0, 40
+ st.d zero, a0, 48
+ st.d zero, a0, 56
+ addi.d a0, a0, 128
+ st.d zero, a0, -64
+ st.d zero, a0, -56
+ st.d zero, a0, -48
+ st.d zero, a0, -40
+ st.d zero, a0, -32
+ st.d zero, a0, -24
+ st.d zero, a0, -16
+ st.d zero, a0, -8
+ bne t0, a0, 1b
- jirl $r0, ra, 0
+ jr ra
SYM_FUNC_END(clear_page)
EXPORT_SYMBOL(clear_page)
.align 5
SYM_FUNC_START(copy_page)
- lu12i.w t8, 1 << (PAGE_SHIFT - 12)
- add.d t8, t8, a0
+ lu12i.w t8, 1 << (PAGE_SHIFT - 12)
+ add.d t8, t8, a0
1:
- ld.d t0, a1, 0
- ld.d t1, a1, 8
- ld.d t2, a1, 16
- ld.d t3, a1, 24
- ld.d t4, a1, 32
- ld.d t5, a1, 40
- ld.d t6, a1, 48
- ld.d t7, a1, 56
+ ld.d t0, a1, 0
+ ld.d t1, a1, 8
+ ld.d t2, a1, 16
+ ld.d t3, a1, 24
+ ld.d t4, a1, 32
+ ld.d t5, a1, 40
+ ld.d t6, a1, 48
+ ld.d t7, a1, 56
- st.d t0, a0, 0
- st.d t1, a0, 8
- ld.d t0, a1, 64
- ld.d t1, a1, 72
- st.d t2, a0, 16
- st.d t3, a0, 24
- ld.d t2, a1, 80
- ld.d t3, a1, 88
- st.d t4, a0, 32
- st.d t5, a0, 40
- ld.d t4, a1, 96
- ld.d t5, a1, 104
- st.d t6, a0, 48
- st.d t7, a0, 56
- ld.d t6, a1, 112
- ld.d t7, a1, 120
- addi.d a0, a0, 128
- addi.d a1, a1, 128
+ st.d t0, a0, 0
+ st.d t1, a0, 8
+ ld.d t0, a1, 64
+ ld.d t1, a1, 72
+ st.d t2, a0, 16
+ st.d t3, a0, 24
+ ld.d t2, a1, 80
+ ld.d t3, a1, 88
+ st.d t4, a0, 32
+ st.d t5, a0, 40
+ ld.d t4, a1, 96
+ ld.d t5, a1, 104
+ st.d t6, a0, 48
+ st.d t7, a0, 56
+ ld.d t6, a1, 112
+ ld.d t7, a1, 120
+ addi.d a0, a0, 128
+ addi.d a1, a1, 128
- st.d t0, a0, -64
- st.d t1, a0, -56
- st.d t2, a0, -48
- st.d t3, a0, -40
- st.d t4, a0, -32
- st.d t5, a0, -24
- st.d t6, a0, -16
- st.d t7, a0, -8
+ st.d t0, a0, -64
+ st.d t1, a0, -56
+ st.d t2, a0, -48
+ st.d t3, a0, -40
+ st.d t4, a0, -32
+ st.d t5, a0, -24
+ st.d t6, a0, -16
+ st.d t7, a0, -8
- bne t8, a0, 1b
- jirl $r0, ra, 0
+ bne t8, a0, 1b
+ jr ra
SYM_FUNC_END(copy_page)
EXPORT_SYMBOL(copy_page)
if (pcpu_handlers[cpu])
return;
- page = alloc_pages_node(cpu_to_node(cpu), GFP_KERNEL, get_order(vec_sz));
+ page = alloc_pages_node(cpu_to_node(cpu), GFP_ATOMIC, get_order(vec_sz));
if (!page)
return;
addr = page_address(page);
- pcpu_handlers[cpu] = virt_to_phys(addr);
+ pcpu_handlers[cpu] = (unsigned long)addr;
memcpy((void *)addr, (void *)eentry, vec_sz);
local_flush_icache_range((unsigned long)addr, (unsigned long)addr + vec_sz);
- csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_TLBRENTRY);
+ csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_EENTRY);
+ csr_write64(pcpu_handlers[cpu], LOONGARCH_CSR_MERRENTRY);
csr_write64(pcpu_handlers[cpu] + 80*VECSIZE, LOONGARCH_CSR_TLBRENTRY);
}
#endif
REG_S a2, sp, PT_BVADDR
li.w a1, \write
la.abs t0, do_page_fault
- jirl ra, t0, 0
+ jirl ra, t0, 0
RESTORE_ALL_AND_RET
SYM_FUNC_END(tlb_do_page_fault_\write)
.endm
csrrd a2, LOONGARCH_CSR_BADV
REG_S a2, sp, PT_BVADDR
la.abs t0, do_page_fault
- jirl ra, t0, 0
+ jirl ra, t0, 0
RESTORE_ALL_AND_RET
SYM_FUNC_END(handle_tlb_protect)
* The vmalloc handling is not in the hotpath.
*/
csrrd t0, LOONGARCH_CSR_BADV
- blt t0, $r0, vmalloc_load
+ bltz t0, vmalloc_load
csrrd t1, LOONGARCH_CSR_PGDL
vmalloc_done_load:
* see if we need to jump to huge tlb processing.
*/
andi t0, ra, _PAGE_HUGE
- bne t0, $r0, tlb_huge_update_load
+ bnez t0, tlb_huge_update_load
csrrd t0, LOONGARCH_CSR_BADV
srli.d t0, t0, (PAGE_SHIFT + PTE_ORDER)
srli.d ra, t0, _PAGE_PRESENT_SHIFT
andi ra, ra, 1
- beq ra, $r0, nopage_tlb_load
+ beqz ra, nopage_tlb_load
ori t0, t0, _PAGE_VALID
#ifdef CONFIG_SMP
sc.d t0, t1, 0
- beq t0, $r0, smp_pgtable_change_load
+ beqz t0, smp_pgtable_change_load
#else
st.d t0, t1, 0
#endif
#endif
srli.d ra, t0, _PAGE_PRESENT_SHIFT
andi ra, ra, 1
- beq ra, $r0, nopage_tlb_load
+ beqz ra, nopage_tlb_load
tlbsrch
ori t0, t0, _PAGE_VALID
#ifdef CONFIG_SMP
sc.d t0, t1, 0
- beq t0, $r0, tlb_huge_update_load
+ beqz t0, tlb_huge_update_load
ld.d t0, t1, 0
#else
st.d t0, t1, 0
#endif
- addu16i.d t1, $r0, -(CSR_TLBIDX_EHINV >> 16)
- addi.d ra, t1, 0
- csrxchg ra, t1, LOONGARCH_CSR_TLBIDX
+ addu16i.d t1, zero, -(CSR_TLBIDX_EHINV >> 16)
+ addi.d ra, t1, 0
+ csrxchg ra, t1, LOONGARCH_CSR_TLBIDX
tlbwr
- csrxchg $r0, t1, LOONGARCH_CSR_TLBIDX
+ csrxchg zero, t1, LOONGARCH_CSR_TLBIDX
/*
* A huge PTE describes an area the size of the
addi.d t0, ra, 0
/* Convert to entrylo1 */
- addi.d t1, $r0, 1
+ addi.d t1, zero, 1
slli.d t1, t1, (HPAGE_SHIFT - 1)
add.d t0, t0, t1
csrwr t0, LOONGARCH_CSR_TLBELO1
/* Set huge page tlb entry size */
- addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16)
- addu16i.d t1, $r0, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
+ addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16)
+ addu16i.d t1, zero, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
csrxchg t1, t0, LOONGARCH_CSR_TLBIDX
tlbfill
- addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16)
- addu16i.d t1, $r0, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
+ addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16)
+ addu16i.d t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
csrxchg t1, t0, LOONGARCH_CSR_TLBIDX
nopage_tlb_load:
dbar 0
csrrd ra, EXCEPTION_KS2
la.abs t0, tlb_do_page_fault_0
- jirl $r0, t0, 0
+ jr t0
SYM_FUNC_END(handle_tlb_load)
SYM_FUNC_START(handle_tlb_store)
* The vmalloc handling is not in the hotpath.
*/
csrrd t0, LOONGARCH_CSR_BADV
- blt t0, $r0, vmalloc_store
+ bltz t0, vmalloc_store
csrrd t1, LOONGARCH_CSR_PGDL
vmalloc_done_store:
* see if we need to jump to huge tlb processing.
*/
andi t0, ra, _PAGE_HUGE
- bne t0, $r0, tlb_huge_update_store
+ bnez t0, tlb_huge_update_store
csrrd t0, LOONGARCH_CSR_BADV
srli.d t0, t0, (PAGE_SHIFT + PTE_ORDER)
srli.d ra, t0, _PAGE_PRESENT_SHIFT
andi ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
xori ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
- bne ra, $r0, nopage_tlb_store
+ bnez ra, nopage_tlb_store
ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
#ifdef CONFIG_SMP
sc.d t0, t1, 0
- beq t0, $r0, smp_pgtable_change_store
+ beqz t0, smp_pgtable_change_store
#else
st.d t0, t1, 0
#endif
srli.d ra, t0, _PAGE_PRESENT_SHIFT
andi ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
xori ra, ra, ((_PAGE_PRESENT | _PAGE_WRITE) >> _PAGE_PRESENT_SHIFT)
- bne ra, $r0, nopage_tlb_store
+ bnez ra, nopage_tlb_store
tlbsrch
ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
#ifdef CONFIG_SMP
sc.d t0, t1, 0
- beq t0, $r0, tlb_huge_update_store
+ beqz t0, tlb_huge_update_store
ld.d t0, t1, 0
#else
st.d t0, t1, 0
#endif
- addu16i.d t1, $r0, -(CSR_TLBIDX_EHINV >> 16)
- addi.d ra, t1, 0
- csrxchg ra, t1, LOONGARCH_CSR_TLBIDX
+ addu16i.d t1, zero, -(CSR_TLBIDX_EHINV >> 16)
+ addi.d ra, t1, 0
+ csrxchg ra, t1, LOONGARCH_CSR_TLBIDX
tlbwr
- csrxchg $r0, t1, LOONGARCH_CSR_TLBIDX
+ csrxchg zero, t1, LOONGARCH_CSR_TLBIDX
/*
* A huge PTE describes an area the size of the
* configured huge page size. This is twice the
addi.d t0, ra, 0
/* Convert to entrylo1 */
- addi.d t1, $r0, 1
+ addi.d t1, zero, 1
slli.d t1, t1, (HPAGE_SHIFT - 1)
add.d t0, t0, t1
csrwr t0, LOONGARCH_CSR_TLBELO1
/* Set huge page tlb entry size */
- addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16)
- addu16i.d t1, $r0, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
+ addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16)
+ addu16i.d t1, zero, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
csrxchg t1, t0, LOONGARCH_CSR_TLBIDX
tlbfill
/* Reset default page size */
- addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16)
- addu16i.d t1, $r0, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
+ addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16)
+ addu16i.d t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
csrxchg t1, t0, LOONGARCH_CSR_TLBIDX
nopage_tlb_store:
dbar 0
csrrd ra, EXCEPTION_KS2
la.abs t0, tlb_do_page_fault_1
- jirl $r0, t0, 0
+ jr t0
SYM_FUNC_END(handle_tlb_store)
SYM_FUNC_START(handle_tlb_modify)
* The vmalloc handling is not in the hotpath.
*/
csrrd t0, LOONGARCH_CSR_BADV
- blt t0, $r0, vmalloc_modify
+ bltz t0, vmalloc_modify
csrrd t1, LOONGARCH_CSR_PGDL
vmalloc_done_modify:
* see if we need to jump to huge tlb processing.
*/
andi t0, ra, _PAGE_HUGE
- bne t0, $r0, tlb_huge_update_modify
+ bnez t0, tlb_huge_update_modify
csrrd t0, LOONGARCH_CSR_BADV
srli.d t0, t0, (PAGE_SHIFT + PTE_ORDER)
srli.d ra, t0, _PAGE_WRITE_SHIFT
andi ra, ra, 1
- beq ra, $r0, nopage_tlb_modify
+ beqz ra, nopage_tlb_modify
ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
#ifdef CONFIG_SMP
sc.d t0, t1, 0
- beq t0, $r0, smp_pgtable_change_modify
+ beqz t0, smp_pgtable_change_modify
#else
st.d t0, t1, 0
#endif
ertn
#ifdef CONFIG_64BIT
vmalloc_modify:
- la.abs t1, swapper_pg_dir
+ la.abs t1, swapper_pg_dir
b vmalloc_done_modify
#endif
srli.d ra, t0, _PAGE_WRITE_SHIFT
andi ra, ra, 1
- beq ra, $r0, nopage_tlb_modify
+ beqz ra, nopage_tlb_modify
tlbsrch
ori t0, t0, (_PAGE_VALID | _PAGE_DIRTY | _PAGE_MODIFIED)
#ifdef CONFIG_SMP
sc.d t0, t1, 0
- beq t0, $r0, tlb_huge_update_modify
+ beqz t0, tlb_huge_update_modify
ld.d t0, t1, 0
#else
st.d t0, t1, 0
addi.d t0, ra, 0
/* Convert to entrylo1 */
- addi.d t1, $r0, 1
+ addi.d t1, zero, 1
slli.d t1, t1, (HPAGE_SHIFT - 1)
add.d t0, t0, t1
csrwr t0, LOONGARCH_CSR_TLBELO1
/* Set huge page tlb entry size */
- addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16)
- addu16i.d t1, $r0, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
- csrxchg t1, t0, LOONGARCH_CSR_TLBIDX
+ addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16)
+ addu16i.d t1, zero, (PS_HUGE_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
+ csrxchg t1, t0, LOONGARCH_CSR_TLBIDX
tlbwr
/* Reset default page size */
- addu16i.d t0, $r0, (CSR_TLBIDX_PS >> 16)
- addu16i.d t1, $r0, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
- csrxchg t1, t0, LOONGARCH_CSR_TLBIDX
+ addu16i.d t0, zero, (CSR_TLBIDX_PS >> 16)
+ addu16i.d t1, zero, (PS_DEFAULT_SIZE << (CSR_TLBIDX_PS_SHIFT - 16))
+ csrxchg t1, t0, LOONGARCH_CSR_TLBIDX
nopage_tlb_modify:
dbar 0
csrrd ra, EXCEPTION_KS2
la.abs t0, tlb_do_page_fault_1
- jirl $r0, t0, 0
+ jr t0
SYM_FUNC_END(handle_tlb_modify)
SYM_FUNC_START(handle_tlb_refill)
endif
cflags-vdso := $(ccflags-vdso) \
+ -isystem $(shell $(CC) -print-file-name=include) \
$(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \
-O2 -g -fno-strict-aliasing -fno-common -fno-builtin -G0 \
-fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \
clocks = <&cgu X1000_CLK_RTCLK>,
<&cgu X1000_CLK_EXCLK>,
- <&cgu X1000_CLK_PCLK>;
- clock-names = "rtc", "ext", "pclk";
+ <&cgu X1000_CLK_PCLK>,
+ <&cgu X1000_CLK_TCU>;
+ clock-names = "rtc", "ext", "pclk", "tcu";
interrupt-controller;
#interrupt-cells = <1>;
clocks = <&cgu X1830_CLK_RTCLK>,
<&cgu X1830_CLK_EXCLK>,
- <&cgu X1830_CLK_PCLK>;
- clock-names = "rtc", "ext", "pclk";
+ <&cgu X1830_CLK_PCLK>,
+ <&cgu X1830_CLK_TCU>;
+ clock-names = "rtc", "ext", "pclk", "tcu";
interrupt-controller;
#interrupt-cells = <1>;
__func__);
rtc_base = of_iomap(np, 0);
+ of_node_put(np);
if (!rtc_base)
panic("%s(): Failed to ioremap Goldfish RTC base!", __func__);
of_address_to_resource(np_sysgpe, 0, &res_sys[2]))
panic("Failed to get core resources");
+ of_node_put(np_status);
+ of_node_put(np_ebu);
+ of_node_put(np_sys1);
+ of_node_put(np_syseth);
+ of_node_put(np_sysgpe);
+
if ((request_mem_region(res_status.start, resource_size(&res_status),
res_status.name) < 0) ||
(request_mem_region(res_ebu.start, resource_size(&res_ebu),
if (!ltq_eiu_membase)
panic("Failed to remap eiu memory");
}
+ of_node_put(eiu_node);
return 0;
}
of_address_to_resource(np_ebu, 0, &res_ebu))
panic("Failed to get core resources");
+ of_node_put(np_pmu);
+ of_node_put(np_cgu);
+ of_node_put(np_ebu);
+
if (!request_mem_region(res_pmu.start, resource_size(&res_pmu),
res_pmu.name) ||
!request_mem_region(res_cgu.start, resource_size(&res_cgu),
if (of_update_property(node, &gic_frequency_prop) < 0)
pr_err("error updating gic frequency property\n");
+
+ of_node_put(node);
}
#endif
np = of_find_compatible_node(NULL, NULL, lookup->compatible);
if (np) {
lookup->name = (char *)np->name;
- if (lookup->phys_addr)
+ if (lookup->phys_addr) {
+ of_node_put(np);
continue;
+ }
if (!of_address_to_resource(np, 0, &res))
lookup->phys_addr = res.start;
+ of_node_put(np);
}
}
+ of_node_put(root);
+
return 0;
}
goto default_map;
irq = irq_of_parse_and_map(node, 0);
+
+ of_node_put(node);
+
if (!irq)
goto default_map;
if (of_address_to_resource(np, 0, &res))
panic("Failed to get resource for %s", node);
+ of_node_put(np);
+
if (!request_mem_region(res.start,
resource_size(&res),
res.name))
printk(KERN_ERR "spurious ICU interrupt: %04x,%04x\n", pend1, pend2);
- atomic_inc(&irq_err_count);
-
return -1;
}
/*
* Verify a frameinfo structure. The return address should be a valid text
* address. The frame pointer may be null if its the last frame, otherwise
- * the frame pointer should point to a location in the stack after the the
+ * the frame pointer should point to a location in the stack after the
* top of the next frame up.
*/
static inline int or1k_frameinfo_valid(struct or1k_frameinfo *frameinfo)
select ARCH_WANT_FRAME_POINTERS
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_STRICT_KERNEL_RWX
+ select ARCH_HAS_STRICT_MODULE_RWX
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAS_PTE_SPECIAL
select ARCH_NO_SG_CHAIN
pgprot_val(vma->vm_page_prot) |= _PAGE_NO_CACHE;
}
-#if defined(CONFIG_STI_CONSOLE) || defined(CONFIG_FB_STI)
+#if defined(CONFIG_FB_STI)
int fb_is_primary_device(struct fb_info *info);
#else
static inline int fb_is_primary_device(struct fb_info *info)
BLANK();
DEFINE(ASM_SIGFRAME_SIZE, PARISC_RT_SIGFRAME_SIZE);
DEFINE(SIGFRAME_CONTEXT_REGS, offsetof(struct rt_sigframe, uc.uc_mcontext) - PARISC_RT_SIGFRAME_SIZE);
+#ifdef CONFIG_64BIT
DEFINE(ASM_SIGFRAME_SIZE32, PARISC_RT_SIGFRAME_SIZE32);
DEFINE(SIGFRAME_CONTEXT_REGS32, offsetof(struct compat_rt_sigframe, uc.uc_mcontext) - PARISC_RT_SIGFRAME_SIZE32);
+#else
+ DEFINE(ASM_SIGFRAME_SIZE32, PARISC_RT_SIGFRAME_SIZE);
+ DEFINE(SIGFRAME_CONTEXT_REGS32, offsetof(struct rt_sigframe, uc.uc_mcontext) - PARISC_RT_SIGFRAME_SIZE);
+#endif
BLANK();
DEFINE(ICACHE_BASE, offsetof(struct pdc_cache_info, ic_base));
DEFINE(ICACHE_STRIDE, offsetof(struct pdc_cache_info, ic_stride));
return;
if (parisc_requires_coherency()) {
- flush_user_cache_page(vma, vmaddr);
+ if (vma->vm_flags & VM_SHARED)
+ flush_data_cache();
+ else
+ flush_user_cache_page(vma, vmaddr);
return;
}
" depw %%r0,31,2,%4\n"
"1: ldw 0(%%sr1,%4),%0\n"
"2: ldw 4(%%sr1,%4),%3\n"
-" subi 32,%4,%2\n"
+" subi 32,%2,%2\n"
" mtctl %2,11\n"
" vshd %0,%3,%0\n"
"3: \n"
* that happen. Want to keep this overhead low, but still provide
* some information to the customer. All exits from this routine
* need to restore Fpu_register[0]
- */
+ */
bflags=(Fpu_register[0] & 0xf8000000);
Fpu_register[0] &= 0x07ffffff;
select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
select HAVE_IOREMAP_PROT
- select HAVE_IRQ_EXIT_ON_IRQ_STACK
select HAVE_IRQ_TIME_ACCOUNTING
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZMA if DEFAULT_UIMAGE
select IRQ_FORCED_THREADING
select MMU_GATHER_PAGE_SIZE
select MMU_GATHER_RCU_TABLE_FREE
+ select MMU_GATHER_MERGE_VMAS
select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE
select NEED_PER_CPU_EMBED_FIRST_CHUNK if PPC64
# Please keep this list sorted alphabetically.
#
+config PPC_LONG_DOUBLE_128
+ depends on PPC64
+ def_bool $(success,test "$(shell,echo __LONG_DOUBLE_128__ | $(CC) -E -P -)" = 1)
+
config PPC_BARRIER_NOSPEC
bool
default y
def_bool y
depends on PPC_POWERNV || PPC_PSERIES
+config ARCH_HAS_ADD_PAGES
+ def_bool y
+ depends on ARCH_ENABLE_MEMORY_HOTPLUG
+
config PPC_DCR_NATIVE
bool
range 13 15
default "15" if PPC_256K_PAGES
default "14" if PPC64
- default "14" if KASAN
default "13"
help
Used to define the stack size. The default is almost always what you
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_BPF_PERF_EVENT_H
+#define _ASM_POWERPC_BPF_PERF_EVENT_H
+
+#include <asm/ptrace.h>
+
+typedef struct user_pt_regs bpf_user_pt_regs_t;
+
+#endif /* _ASM_POWERPC_BPF_PERF_EVENT_H */
#ifdef __KERNEL__
-#if defined(CONFIG_VMAP_STACK) && CONFIG_THREAD_SHIFT < PAGE_SHIFT
+#ifdef CONFIG_KASAN
+#define MIN_THREAD_SHIFT (CONFIG_THREAD_SHIFT + 1)
+#else
+#define MIN_THREAD_SHIFT CONFIG_THREAD_SHIFT
+#endif
+
+#if defined(CONFIG_VMAP_STACK) && MIN_THREAD_SHIFT < PAGE_SHIFT
#define THREAD_SHIFT PAGE_SHIFT
#else
-#define THREAD_SHIFT CONFIG_THREAD_SHIFT
+#define THREAD_SHIFT MIN_THREAD_SHIFT
#endif
#define THREAD_SIZE (1 << THREAD_SHIFT)
#include <linux/pagemap.h>
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
#define __tlb_remove_tlb_entry __tlb_remove_tlb_entry
#define tlb_flush tlb_flush
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__
-#define _UAPI__ASM_BPF_PERF_EVENT_H__
-
-#include <asm/ptrace.h>
-
-typedef struct user_pt_regs bpf_user_pt_regs_t;
-
-#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */
CFLAGS_prom_init.o += -fno-stack-protector
CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING
CFLAGS_prom_init.o += -ffreestanding
+CFLAGS_prom_init.o += $(call cc-option, -ftrivial-auto-var-init=uninitialized)
ifdef CONFIG_FUNCTION_TRACER
# Do not trace early boot code
KASAN_SANITIZE_setup_64.o := n
KASAN_SANITIZE_mce.o := n
KASAN_SANITIZE_mce_power.o := n
+KASAN_SANITIZE_udbg.o := n
+KASAN_SANITIZE_udbg_16550.o := n
# we have to be particularly careful in ppc64 to exclude code that
# runs with translations off, as we cannot access the shadow with
tm_reclaim_current(0);
#endif
- memset(regs->gpr, 0, sizeof(regs->gpr));
+ memset(®s->gpr[1], 0, sizeof(regs->gpr) - sizeof(regs->gpr[0]));
regs->ctr = 0;
regs->link = 0;
regs->xer = 0;
return 0;
do {
- sp = *(unsigned long *)sp;
+ sp = READ_ONCE_NOCHECK(*(unsigned long *)sp);
if (!validate_sp(sp, p, STACK_FRAME_OVERHEAD) ||
task_is_running(p))
return 0;
if (count > 0) {
- ip = ((unsigned long *)sp)[STACK_FRAME_LR_SAVE];
+ ip = READ_ONCE_NOCHECK(((unsigned long *)sp)[STACK_FRAME_LR_SAVE]);
if (!in_sched_functions(ip))
return ip;
}
static int __init prom_find_machine_type(void)
{
- char compat[256];
+ static char compat[256] __prombss;
int len, i = 0;
#ifdef CONFIG_PPC64
phandle rtas;
# If you really need to reference something from prom_init.o add
# it to the list below:
-grep "^CONFIG_KASAN=y$" .config >/dev/null
+grep "^CONFIG_KASAN=y$" ${KCONFIG_CONFIG} >/dev/null
if [ $? -eq 0 ]
then
MEM_FUNCS="__memcpy __memset"
#ifdef CONFIG_PPC_FPU_REGS
flush_fp_to_thread(child);
- if (fpidx < (PT_FPSCR - PT_FPR0))
- memcpy(data, &child->thread.TS_FPR(fpidx), sizeof(long));
- else
+ if (fpidx < (PT_FPSCR - PT_FPR0)) {
+ if (IS_ENABLED(CONFIG_PPC32))
+ // On 32-bit the index we are passed refers to 32-bit words
+ *data = ((u32 *)child->thread.fp_state.fpr)[fpidx];
+ else
+ memcpy(data, &child->thread.TS_FPR(fpidx), sizeof(long));
+ } else
*data = child->thread.fp_state.fpscr;
#else
*data = 0;
#ifdef CONFIG_PPC_FPU_REGS
flush_fp_to_thread(child);
- if (fpidx < (PT_FPSCR - PT_FPR0))
- memcpy(&child->thread.TS_FPR(fpidx), &data, sizeof(long));
- else
+ if (fpidx < (PT_FPSCR - PT_FPR0)) {
+ if (IS_ENABLED(CONFIG_PPC32))
+ // On 32-bit the index we are passed refers to 32-bit words
+ ((u32 *)child->thread.fp_state.fpr)[fpidx] = data;
+ else
+ memcpy(&child->thread.TS_FPR(fpidx), &data, sizeof(long));
+ } else
child->thread.fp_state.fpscr = data;
#endif
* real registers.
*/
BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long));
+
+ // ptrace_get/put_fpr() rely on PPC32 and VSX being incompatible
+ BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC32) && IS_ENABLED(CONFIG_VSX));
}
*
* Return: A pointer to the specified errorlog or NULL if not found.
*/
-struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
- uint16_t section_id)
+noinstr struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log,
+ uint16_t section_id)
{
struct rtas_ext_event_log_v6 *ext_log =
(struct rtas_ext_event_log_v6 *)log->buffer;
{ "get-time-of-day", -1, -1, -1, -1, -1 },
{ "ibm,get-vpd", -1, 0, -1, 1, 2 },
{ "ibm,lpar-perftools", -1, 2, 3, -1, -1 },
- { "ibm,platform-dump", -1, 4, 5, -1, -1 },
+ { "ibm,platform-dump", -1, 4, 5, -1, -1 }, /* Special cased */
{ "ibm,read-slot-reset-state", -1, -1, -1, -1, -1 },
{ "ibm,scan-log-dump", -1, 0, 1, -1, -1 },
{ "ibm,set-dynamic-indicator", -1, 2, -1, -1, -1 },
size = 1;
end = base + size - 1;
+
+ /*
+ * Special case for ibm,platform-dump - NULL buffer
+ * address is used to indicate end of dump processing
+ */
+ if (!strcmp(f->name, "ibm,platform-dump") &&
+ base == 0)
+ return false;
+
if (!in_rmo_buf(base, end))
goto err;
}
/* Print various info about the machine that has been gathered so far. */
print_system_info();
- /* Reserve large chunks of memory for use by CMA for KVM. */
- kvm_cma_reserve();
-
- /* Reserve large chunks of memory for us by CMA for hugetlb */
- gigantic_hugetlb_cma_reserve();
-
klp_init_thread_info(&init_task);
setup_initial_init_mm(_stext, _etext, _edata, _end);
initmem_init();
+ /*
+ * Reserve large chunks of memory for use by CMA for KVM and hugetlb. These must
+ * be called after initmem_init(), so that pageblock_order is initialised.
+ */
+ kvm_cma_reserve();
+ gigantic_hugetlb_cma_reserve();
+
early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
if (ppc_md.setup_arch)
/* wait for all the CPUs to hit real mode but timeout if they don't come in */
#if defined(CONFIG_SMP) && defined(CONFIG_PPC64)
-static void __maybe_unused crash_kexec_wait_realmode(int cpu)
+noinstr static void __maybe_unused crash_kexec_wait_realmode(int cpu)
{
unsigned int msecs;
int i;
vm_unmap_aliases();
}
+/*
+ * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need
+ * updating.
+ */
+static void update_end_of_memory_vars(u64 start, u64 size)
+{
+ unsigned long end_pfn = PFN_UP(start + size);
+
+ if (end_pfn > max_pfn) {
+ max_pfn = end_pfn;
+ max_low_pfn = end_pfn;
+ high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
+ }
+}
+
+int __ref add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages,
+ struct mhp_params *params)
+{
+ int ret;
+
+ ret = __add_pages(nid, start_pfn, nr_pages, params);
+ if (ret)
+ return ret;
+
+ /* update max_pfn, max_low_pfn and high_memory */
+ update_end_of_memory_vars(start_pfn << PAGE_SHIFT,
+ nr_pages << PAGE_SHIFT);
+
+ return ret;
+}
+
int __ref arch_add_memory(int nid, u64 start, u64 size,
struct mhp_params *params)
{
rc = arch_create_linear_mapping(nid, start, size, params);
if (rc)
return rc;
- rc = __add_pages(nid, start_pfn, nr_pages, params);
+ rc = add_pages(nid, start_pfn, nr_pages, params);
if (rc)
arch_remove_linear_mapping(start, size);
return rc;
pgdp = pgd_offset_k(ea);
p4dp = p4d_offset(pgdp, ea);
if (p4d_none(*p4dp)) {
- pmdp = early_alloc_pgtable(PMD_TABLE_SIZE);
- p4d_populate(&init_mm, p4dp, pmdp);
+ pudp = early_alloc_pgtable(PUD_TABLE_SIZE);
+ p4d_populate(&init_mm, p4dp, pudp);
}
pudp = pud_offset(p4dp, ea);
if (pud_none(*pudp)) {
}
pmdp = pmd_offset(pudp, ea);
if (!pmd_present(*pmdp)) {
- ptep = early_alloc_pgtable(PAGE_SIZE);
+ ptep = early_alloc_pgtable(PTE_TABLE_SIZE);
pmd_populate_kernel(&init_mm, pmdp, ptep);
}
ptep = pte_offset_kernel(pmdp, ea);
#include <asm/cacheflush.h>
#include <asm/kdump.h>
#include <mm/mmu_decl.h>
-#include <generated/compile.h>
#include <generated/utsrelease.h>
struct regions {
int reserved_mem_size_cells;
};
-/* Simplified build-specific string for starting entropy. */
-static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
- LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
-
struct regions __initdata regions;
static __init void kaslr_get_cmdline(void *fdt)
{
unsigned long hash = 0;
- hash = rotate_xor(hash, build_str, sizeof(build_str));
+ /* build-specific string for starting entropy. */
+ hash = rotate_xor(hash, linux_banner, strlen(linux_banner));
hash = rotate_xor(hash, fdt, fdt_totalsize(fdt));
return hash;
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _MICROWATT_H
+#define _MICROWATT_H
+
+void microwatt_rng_init(void);
+
+#endif /* _MICROWATT_H */
#include <asm/archrandom.h>
#include <asm/cputable.h>
#include <asm/machdep.h>
+#include "microwatt.h"
#define DARN_ERR 0xFFFFFFFFFFFFFFFFul
return 1;
}
-static __init int rng_init(void)
+void __init microwatt_rng_init(void)
{
unsigned long val;
int i;
for (i = 0; i < 10; i++) {
if (microwatt_get_random_darn(&val)) {
ppc_md.get_random_seed = microwatt_get_random_darn;
- return 0;
+ return;
}
}
-
- pr_warn("Unable to use DARN for get_random_seed()\n");
-
- return -EIO;
}
-machine_subsys_initcall(, rng_init);
#include <asm/xics.h>
#include <asm/udbg.h>
+#include "microwatt.h"
+
static void __init microwatt_init_IRQ(void)
{
xics_init();
}
machine_arch_initcall(microwatt, microwatt_populate);
+static void __init microwatt_setup_arch(void)
+{
+ microwatt_rng_init();
+}
+
define_machine(microwatt) {
.name = "microwatt",
.probe = microwatt_probe,
.init_IRQ = microwatt_init_IRQ,
+ .setup_arch = microwatt_setup_arch,
.progress = udbg_progress,
.calibrate_decr = generic_calibrate_decr,
};
# in particular, idle code runs a bunch of things in real mode
KASAN_SANITIZE_idle.o := n
KASAN_SANITIZE_pci-ioda.o := n
+KASAN_SANITIZE_pci-ioda-tce.o := n
# pnv_machine_check_early
KASAN_SANITIZE_setup.o := n
u32 __init memcons_get_size(struct memcons *mc);
struct memcons *__init memcons_init(struct device_node *node, const char *mc_prop_name);
+void pnv_rng_init(void);
+
#endif /* _POWERNV_H */
#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/smp.h>
+#include "powernv.h"
#define DARN_ERR 0xFFFFFFFFFFFFFFFFul
static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
-
int powernv_hwrng_present(void)
{
struct powernv_rng *rng;
return 0;
}
}
-
- pr_warn("Unable to use DARN for get_random_seed()\n");
-
return -EIO;
}
rng_init_per_cpu(rng, dn);
- pr_info_once("Registering arch random hook.\n");
-
ppc_md.get_random_seed = powernv_get_random_long;
return 0;
}
-static __init int rng_init(void)
+static int __init pnv_get_random_long_early(unsigned long *v)
{
struct device_node *dn;
- int rc;
-
- for_each_compatible_node(dn, NULL, "ibm,power-rng") {
- rc = rng_create(dn);
- if (rc) {
- pr_err("Failed creating rng for %pOF (%d).\n",
- dn, rc);
- continue;
- }
- /* Create devices for hwrng driver */
- of_platform_device_create(dn, NULL, NULL);
- }
+ if (!slab_is_available())
+ return 0;
+
+ if (cmpxchg(&ppc_md.get_random_seed, pnv_get_random_long_early,
+ NULL) != pnv_get_random_long_early)
+ return 0;
+
+ for_each_compatible_node(dn, NULL, "ibm,power-rng")
+ rng_create(dn);
+
+ if (!ppc_md.get_random_seed)
+ return 0;
+ return ppc_md.get_random_seed(v);
+}
- initialise_darn();
+void __init pnv_rng_init(void)
+{
+ struct device_node *dn;
+
+ /* Prefer darn over the rest. */
+ if (!initialise_darn())
+ return;
+
+ dn = of_find_compatible_node(NULL, NULL, "ibm,power-rng");
+ if (dn)
+ ppc_md.get_random_seed = pnv_get_random_long_early;
+
+ of_node_put(dn);
+}
+
+static int __init pnv_rng_late_init(void)
+{
+ struct device_node *dn;
+ unsigned long v;
+
+ /* In case it wasn't called during init for some other reason. */
+ if (ppc_md.get_random_seed == pnv_get_random_long_early)
+ pnv_get_random_long_early(&v);
+
+ if (ppc_md.get_random_seed == powernv_get_random_long) {
+ for_each_compatible_node(dn, NULL, "ibm,power-rng")
+ of_platform_device_create(dn, NULL, NULL);
+ }
return 0;
}
-machine_subsys_initcall(powernv, rng_init);
+machine_subsys_initcall(powernv, pnv_rng_late_init);
pnv_check_guarded_cores();
/* XXX PMCS */
+
+ pnv_rng_init();
}
static void __init pnv_init(void)
u32 available_events;
int index, rc = 0;
+ if (!p->stat_buffer_len)
+ return -ENOENT;
+
available_events = (p->stat_buffer_len - sizeof(struct papr_scm_perf_stats))
/ sizeof(struct papr_scm_perf_stat);
if (available_events == 0)
static inline void pseries_lpar_read_hblkrm_characteristics(void) { }
#endif
+void pseries_rng_init(void);
+
#endif /* _PSERIES_PSERIES_H */
#include <asm/archrandom.h>
#include <asm/machdep.h>
#include <asm/plpar_wrappers.h>
+#include "pseries.h"
static int pseries_get_random_long(unsigned long *v)
return 0;
}
-static __init int rng_init(void)
+void __init pseries_rng_init(void)
{
struct device_node *dn;
dn = of_find_compatible_node(NULL, NULL, "ibm,random");
if (!dn)
- return -ENODEV;
-
- pr_info("Registering arch random hook.\n");
-
+ return;
ppc_md.get_random_seed = pseries_get_random_long;
-
of_node_put(dn);
- return 0;
}
-machine_subsys_initcall(pseries, rng_init);
}
ppc_md.pcibios_root_bridge_prepare = pseries_root_bridge_prepare;
+ pseries_rng_init();
}
static void pseries_panic(char *str)
#include <linux/of_fdt.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
+#include <linux/bitmap.h>
#include <linux/cpumask.h>
#include <linux/mm.h>
#include <linux/delay.h>
spin_lock_init(&xibm->lock);
xibm->base = base;
xibm->count = count;
- xibm->bitmap = kzalloc(xibm->count, GFP_KERNEL);
+ xibm->bitmap = bitmap_zalloc(xibm->count, GFP_KERNEL);
if (!xibm->bitmap) {
kfree(xibm);
return -ENOMEM;
list_for_each_entry_safe(xibm, tmp, &xive_irq_bitmaps, list) {
list_del(&xibm->list);
- kfree(xibm->bitmap);
+ bitmap_free(xibm->bitmap);
kfree(xibm);
}
}
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
select ARCH_SUPPORTS_HUGETLBFS if MMU
- select ARCH_SUPPORTS_PAGE_TABLE_CHECK
+ select ARCH_SUPPORTS_PAGE_TABLE_CHECK if MMU
select ARCH_USE_MEMTEST
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
select RISCV_ALTERNATIVE
default y
help
- Adds support to dynamically detect the presence of the SVPBMT extension
- (Supervisor-mode: page-based memory types) and enable its usage.
+ Adds support to dynamically detect the presence of the SVPBMT
+ ISA-extension (Supervisor-mode: page-based memory types) and
+ enable its usage.
+
+ The memory type for a page contains a combination of attributes
+ that indicate the cacheability, idempotency, and ordering
+ properties for access to that page.
The SVPBMT extension is only available on 64Bit cpus.
config ERRATA_THEAD
bool "T-HEAD errata"
+ depends on !XIP_KERNEL
select RISCV_ALTERNATIVE
help
All T-HEAD errata Kconfig depend on this Kconfig. Disabling
endif
KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax)
+KBUILD_AFLAGS_MODULE += $(call as-option,-Wa$(comma)-mno-relax)
# GCC versions that support the "-mstrict-align" option default to allowing
# unaligned accesses. While unaligned accesses are explicitly allowed in the
vdso_install:
$(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@
$(if $(CONFIG_COMPAT),$(Q)$(MAKE) \
- $(build)=arch/riscv/kernel/compat_vdso $@)
+ $(build)=arch/riscv/kernel/compat_vdso compat_$@)
ifeq ($(KBUILD_EXTMOD),)
ifeq ($(CONFIG_MMU),y)
gpio-keys {
compatible = "gpio-keys";
- key0 {
+ key {
label = "KEY0";
linux,code = <BTN_0>;
gpios = <&gpio0 10 GPIO_ACTIVE_LOW>;
gpio-keys {
compatible = "gpio-keys";
- boot {
+ key-boot {
label = "BOOT";
linux,code = <BTN_0>;
gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
gpio-keys {
compatible = "gpio-keys";
- boot {
+ key-boot {
label = "BOOT";
linux,code = <BTN_0>;
gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
gpio-keys {
compatible = "gpio-keys";
- up {
+ key-up {
label = "UP";
linux,code = <BTN_1>;
gpios = <&gpio1_0 7 GPIO_ACTIVE_LOW>;
};
- press {
+ key-press {
label = "PRESS";
linux,code = <BTN_0>;
gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
};
- down {
+ key-down {
label = "DOWN";
linux,code = <BTN_2>;
gpios = <&gpio0 1 GPIO_ACTIVE_LOW>;
gpio-keys {
compatible = "gpio-keys";
- boot {
+ key-boot {
label = "BOOT";
linux,code = <BTN_0>;
gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
riscv,isa = "rv64imafdc";
clocks = <&clkcfg CLK_CPU>;
tlb-split;
+ next-level-cache = <&cctrllr>;
status = "okay";
cpu1_intc: interrupt-controller {
riscv,isa = "rv64imafdc";
clocks = <&clkcfg CLK_CPU>;
tlb-split;
+ next-level-cache = <&cctrllr>;
status = "okay";
cpu2_intc: interrupt-controller {
riscv,isa = "rv64imafdc";
clocks = <&clkcfg CLK_CPU>;
tlb-split;
+ next-level-cache = <&cctrllr>;
status = "okay";
cpu3_intc: interrupt-controller {
riscv,isa = "rv64imafdc";
clocks = <&clkcfg CLK_CPU>;
tlb-split;
+ next-level-cache = <&cctrllr>;
status = "okay";
cpu4_intc: interrupt-controller {
#interrupt-cells = <1>;
riscv,ndev = <186>;
};
+ pdma: dma-controller@3000000 {
+ compatible = "sifive,fu540-c000-pdma", "sifive,pdma0";
+ reg = <0x0 0x3000000 0x0 0x8000>;
+ interrupt-parent = <&plic>;
+ interrupts = <5 6>, <7 8>, <9 10>, <11 12>;
+ dma-channels = <4>;
+ #dma-cells = <1>;
+ };
+
clkcfg: clkcfg@20002000 {
compatible = "microchip,mpfs-clkcfg";
reg = <0x0 0x20002000 0x0 0x1000>, <0x0 0x3E001000 0x0 0x1000>;
cpu_apply_errata |= tmp;
}
}
- if (cpu_apply_errata != cpu_req_errata)
+ if (stage != RISCV_ALTERNATIVES_MODULE &&
+ cpu_apply_errata != cpu_req_errata)
warn_miss_errata(cpu_req_errata - cpu_apply_errata);
}
"nop\n\t" \
"nop\n\t" \
"nop", \
- "li t3, %2\n\t" \
- "slli t3, t3, %4\n\t" \
+ "li t3, %1\n\t" \
+ "slli t3, t3, %3\n\t" \
"and t3, %0, t3\n\t" \
"bne t3, zero, 2f\n\t" \
- "li t3, %3\n\t" \
- "slli t3, t3, %4\n\t" \
+ "li t3, %2\n\t" \
+ "slli t3, t3, %3\n\t" \
"or %0, %0, t3\n\t" \
"2:", THEAD_VENDOR_ID, \
ERRATA_THEAD_PBMT, CONFIG_ERRATA_THEAD_PBMT) \
: "+r"(_val) \
- : "0"(_val), \
- "I"(_PAGE_MTMASK_THEAD >> ALT_THEAD_PBMT_SHIFT), \
+ : "I"(_PAGE_MTMASK_THEAD >> ALT_THEAD_PBMT_SHIFT), \
"I"(_PAGE_PMA_THEAD >> ALT_THEAD_PBMT_SHIFT), \
- "I"(ALT_THEAD_PBMT_SHIFT))
+ "I"(ALT_THEAD_PBMT_SHIFT) \
+ : "t3")
#else
#define ALT_THEAD_PMA(_val)
#endif
static inline unsigned long _pud_pfn(pud_t pud)
{
- return pud_val(pud) >> _PAGE_PFN_SHIFT;
+ return __page_val_to_pfn(pud_val(pud));
}
static inline pmd_t *pud_pgtable(pud_t pud)
static inline unsigned long _p4d_pfn(p4d_t p4d)
{
- return p4d_val(p4d) >> _PAGE_PFN_SHIFT;
+ return __page_val_to_pfn(p4d_val(p4d));
}
static inline pud_t *p4d_pgtable(p4d_t p4d)
{
if (pgtable_l4_enabled)
- return (pud_t *)pfn_to_virt(p4d_val(p4d) >> _PAGE_PFN_SHIFT);
+ return (pud_t *)pfn_to_virt(__page_val_to_pfn(p4d_val(p4d)));
return (pud_t *)pud_pgtable((pud_t) { p4d_val(p4d) });
}
static inline struct page *p4d_page(p4d_t p4d)
{
- return pfn_to_page(p4d_val(p4d) >> _PAGE_PFN_SHIFT);
+ return pfn_to_page(__page_val_to_pfn(p4d_val(p4d)));
}
#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
static inline p4d_t *pgd_pgtable(pgd_t pgd)
{
if (pgtable_l5_enabled)
- return (p4d_t *)pfn_to_virt(pgd_val(pgd) >> _PAGE_PFN_SHIFT);
+ return (p4d_t *)pfn_to_virt(__page_val_to_pfn(pgd_val(pgd)));
return (p4d_t *)p4d_pgtable((p4d_t) { pgd_val(pgd) });
}
static inline struct page *pgd_page(pgd_t pgd)
{
- return pfn_to_page(pgd_val(pgd) >> _PAGE_PFN_SHIFT);
+ return pfn_to_page(__page_val_to_pfn(pgd_val(pgd)));
}
#define pgd_page(pgd) pgd_page(pgd)
static inline unsigned long _pgd_pfn(pgd_t pgd)
{
- return pgd_val(pgd) >> _PAGE_PFN_SHIFT;
+ return __page_val_to_pfn(pgd_val(pgd));
}
static inline struct page *pmd_page(pmd_t pmd)
return __pmd(pmd_val(pmd) & ~(_PAGE_PRESENT|_PAGE_PROT_NONE));
}
-#define __pmd_to_phys(pmd) (pmd_val(pmd) >> _PAGE_PFN_SHIFT << PAGE_SHIFT)
+#define __pmd_to_phys(pmd) (__page_val_to_pfn(pmd_val(pmd)) << PAGE_SHIFT)
static inline unsigned long pmd_pfn(pmd_t pmd)
{
return ((__pmd_to_phys(pmd) & PMD_MASK) >> PAGE_SHIFT);
}
-#define __pud_to_phys(pud) (pud_val(pud) >> _PAGE_PFN_SHIFT << PAGE_SHIFT)
+#define __pud_to_phys(pud) (__page_val_to_pfn(pud_val(pud)) << PAGE_SHIFT)
static inline unsigned long pud_pfn(pud_t pud)
{
endif
obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o
obj-$(CONFIG_KGDB) += kgdb.o
-obj-$(CONFIG_KEXEC) += kexec_relocate.o crash_save_regs.o machine_kexec.o
+obj-$(CONFIG_KEXEC_CORE) += kexec_relocate.o crash_save_regs.o machine_kexec.o
obj-$(CONFIG_KEXEC_FILE) += elf_kexec.o machine_kexec_file.o
obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
unsigned int stage)
{
u32 cpu_req_feature = cpufeature_probe(stage);
- u32 cpu_apply_feature = 0;
struct alt_entry *alt;
u32 tmp;
}
tmp = (1U << alt->errata_id);
- if (cpu_req_feature & tmp) {
+ if (cpu_req_feature & tmp)
patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len);
- cpu_apply_feature |= tmp;
- }
}
}
#endif
{
const char *strtab, *name, *shstrtab;
const Elf_Shdr *sechdrs;
- Elf_Rela *relas;
+ Elf64_Rela *relas;
int i, r_type;
/* String & section header string table */
static inline unsigned long gstage_pte_page_vaddr(pte_t pte)
{
- return (unsigned long)pfn_to_virt(pte_val(pte) >> _PAGE_PFN_SHIFT);
+ return (unsigned long)pfn_to_virt(__page_val_to_pfn(pte_val(pte)));
}
static int gstage_page_size_to_level(unsigned long page_size, u32 *out_level)
if (kvm_request_pending(vcpu)) {
if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) {
+ kvm_vcpu_srcu_read_unlock(vcpu);
rcuwait_wait_event(wait,
(!vcpu->arch.power_off) && (!vcpu->arch.pause),
TASK_INTERRUPTIBLE);
+ kvm_vcpu_srcu_read_lock(vcpu);
if (vcpu->arch.power_off || vcpu->arch.pause) {
/*
* We ran out of VMIDs so we increment vmid_version and
* start assigning VMIDs from 1.
*
- * This also means existing VMIDs assignement to all Guest
+ * This also means existing VMIDs assignment to all Guest
* instances is invalid and we have force VMID re-assignement
* for all Guest instances. The Guest instances that were not
* running will automatically pick-up new VMIDs because will
select CLONE_BACKWARDS2
select DMA_OPS if PCI
select DYNAMIC_FTRACE if FUNCTION_TRACER
+ select GCC12_NO_ARRAY_BOUNDS
select GENERIC_ALLOCATOR
select GENERIC_CPU_AUTOPROBE
select GENERIC_CPU_VULNERABILITIES
select IOMMU_SUPPORT if PCI
select MMU_GATHER_NO_GATHER
select MMU_GATHER_RCU_TABLE_FREE
+ select MMU_GATHER_MERGE_VMAS
select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE if PCI
select NEED_SG_DMA_LENGTH if PCI
config KEXEC_FILE
bool "kexec file based system call"
select KEXEC_CORE
- select BUILD_BIN2C
depends on CRYPTO
depends on CRYPTO_SHA256
depends on CRYPTO_SHA256_S390
config PROTECTED_VIRTUALIZATION_GUEST
def_bool n
prompt "Protected virtualization guest support"
- select ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS
help
Select this option, if you want to be able to run this
kernel as a protected virtualization KVM guest.
KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, address-of-packed-member)
KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO),-g)
KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_DEBUG_INFO_DWARF4), $(call cc-option, -gdwarf-4,))
-
-ifdef CONFIG_CC_IS_GCC
- ifeq ($(call cc-ifversion, -ge, 1200, y), y)
- ifeq ($(call cc-ifversion, -lt, 1300, y), y)
- KBUILD_CFLAGS += $(call cc-disable-warning, array-bounds)
- KBUILD_CFLAGS_DECOMPRESSOR += $(call cc-disable-warning, array-bounds)
- endif
- endif
-endif
+KBUILD_CFLAGS_DECOMPRESSOR += $(if $(CONFIG_CC_NO_ARRAY_BOUNDS),-Wno-array-bounds)
UTS_MACHINE := s390x
STACK_SIZE := $(if $(CONFIG_KASAN),65536,16384)
ifdef CONFIG_EXPOLINE
ifdef CONFIG_EXPOLINE_EXTERN
- KBUILD_LDFLAGS_MODULE += arch/s390/lib/expoline.o
+ KBUILD_LDFLAGS_MODULE += arch/s390/lib/expoline/expoline.o
CC_FLAGS_EXPOLINE := -mindirect-branch=thunk-extern
CC_FLAGS_EXPOLINE += -mfunction-return=thunk-extern
else
$(Q)$(MAKE) $(build)=arch/s390/kernel/vdso64 include/generated/vdso64-offsets.h
$(if $(CONFIG_COMPAT),$(Q)$(MAKE) \
$(build)=arch/s390/kernel/vdso32 include/generated/vdso32-offsets.h)
+
+ifdef CONFIG_EXPOLINE_EXTERN
+modules_prepare: expoline_prepare
+expoline_prepare:
+ $(Q)$(MAKE) $(build)=arch/s390/lib/expoline arch/s390/lib/expoline/expoline.o
+endif
endif
# Don't use tabs in echo arguments
*
* Copyright IBM Corp. 2017, 2020
* Author(s): Harald Freudenberger
- *
- * The s390_arch_random_generate() function may be called from random.c
- * in interrupt context. So this implementation does the best to be very
- * fast. There is a buffer of random data which is asynchronously checked
- * and filled by a workqueue thread.
- * If there are enough bytes in the buffer the s390_arch_random_generate()
- * just delivers these bytes. Otherwise false is returned until the
- * worker thread refills the buffer.
- * The worker fills the rng buffer by pulling fresh entropy from the
- * high quality (but slow) true hardware random generator. This entropy
- * is then spread over the buffer with an pseudo random generator PRNG.
- * As the arch_get_random_seed_long() fetches 8 bytes and the calling
- * function add_interrupt_randomness() counts this as 1 bit entropy the
- * distribution needs to make sure there is in fact 1 bit entropy contained
- * in 8 bytes of the buffer. The current values pull 32 byte entropy
- * and scatter this into a 2048 byte buffer. So 8 byte in the buffer
- * will contain 1 bit of entropy.
- * The worker thread is rescheduled based on the charge level of the
- * buffer but at least with 500 ms delay to avoid too much CPU consumption.
- * So the max. amount of rng data delivered via arch_get_random_seed is
- * limited to 4k bytes per second.
*/
#include <linux/kernel.h>
#include <linux/atomic.h>
#include <linux/random.h>
-#include <linux/slab.h>
#include <linux/static_key.h>
-#include <linux/workqueue.h>
-#include <linux/moduleparam.h>
#include <asm/cpacf.h>
DEFINE_STATIC_KEY_FALSE(s390_arch_random_available);
atomic64_t s390_arch_random_counter = ATOMIC64_INIT(0);
EXPORT_SYMBOL(s390_arch_random_counter);
-
-#define ARCH_REFILL_TICKS (HZ/2)
-#define ARCH_PRNG_SEED_SIZE 32
-#define ARCH_RNG_BUF_SIZE 2048
-
-static DEFINE_SPINLOCK(arch_rng_lock);
-static u8 *arch_rng_buf;
-static unsigned int arch_rng_buf_idx;
-
-static void arch_rng_refill_buffer(struct work_struct *);
-static DECLARE_DELAYED_WORK(arch_rng_work, arch_rng_refill_buffer);
-
-bool s390_arch_random_generate(u8 *buf, unsigned int nbytes)
-{
- /* max hunk is ARCH_RNG_BUF_SIZE */
- if (nbytes > ARCH_RNG_BUF_SIZE)
- return false;
-
- /* lock rng buffer */
- if (!spin_trylock(&arch_rng_lock))
- return false;
-
- /* try to resolve the requested amount of bytes from the buffer */
- arch_rng_buf_idx -= nbytes;
- if (arch_rng_buf_idx < ARCH_RNG_BUF_SIZE) {
- memcpy(buf, arch_rng_buf + arch_rng_buf_idx, nbytes);
- atomic64_add(nbytes, &s390_arch_random_counter);
- spin_unlock(&arch_rng_lock);
- return true;
- }
-
- /* not enough bytes in rng buffer, refill is done asynchronously */
- spin_unlock(&arch_rng_lock);
-
- return false;
-}
-EXPORT_SYMBOL(s390_arch_random_generate);
-
-static void arch_rng_refill_buffer(struct work_struct *unused)
-{
- unsigned int delay = ARCH_REFILL_TICKS;
-
- spin_lock(&arch_rng_lock);
- if (arch_rng_buf_idx > ARCH_RNG_BUF_SIZE) {
- /* buffer is exhausted and needs refill */
- u8 seed[ARCH_PRNG_SEED_SIZE];
- u8 prng_wa[240];
- /* fetch ARCH_PRNG_SEED_SIZE bytes of entropy */
- cpacf_trng(NULL, 0, seed, sizeof(seed));
- /* blow this entropy up to ARCH_RNG_BUF_SIZE with PRNG */
- memset(prng_wa, 0, sizeof(prng_wa));
- cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
- &prng_wa, NULL, 0, seed, sizeof(seed));
- cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN,
- &prng_wa, arch_rng_buf, ARCH_RNG_BUF_SIZE, NULL, 0);
- arch_rng_buf_idx = ARCH_RNG_BUF_SIZE;
- }
- delay += (ARCH_REFILL_TICKS * arch_rng_buf_idx) / ARCH_RNG_BUF_SIZE;
- spin_unlock(&arch_rng_lock);
-
- /* kick next check */
- queue_delayed_work(system_long_wq, &arch_rng_work, delay);
-}
-
-/*
- * Here follows the implementation of s390_arch_get_random_long().
- *
- * The random longs to be pulled by arch_get_random_long() are
- * prepared in an 4K buffer which is filled from the NIST 800-90
- * compliant s390 drbg. By default the random long buffer is refilled
- * 256 times before the drbg itself needs a reseed. The reseed of the
- * drbg is done with 32 bytes fetched from the high quality (but slow)
- * trng which is assumed to deliver 100% entropy. So the 32 * 8 = 256
- * bits of entropy are spread over 256 * 4KB = 1MB serving 131072
- * arch_get_random_long() invocations before reseeded.
- *
- * How often the 4K random long buffer is refilled with the drbg
- * before the drbg is reseeded can be adjusted. There is a module
- * parameter 's390_arch_rnd_long_drbg_reseed' accessible via
- * /sys/module/arch_random/parameters/rndlong_drbg_reseed
- * or as kernel command line parameter
- * arch_random.rndlong_drbg_reseed=<value>
- * This parameter tells how often the drbg fills the 4K buffer before
- * it is re-seeded by fresh entropy from the trng.
- * A value of 16 results in reseeding the drbg at every 16 * 4 KB = 64
- * KB with 32 bytes of fresh entropy pulled from the trng. So a value
- * of 16 would result in 256 bits entropy per 64 KB.
- * A value of 256 results in 1MB of drbg output before a reseed of the
- * drbg is done. So this would spread the 256 bits of entropy among 1MB.
- * Setting this parameter to 0 forces the reseed to take place every
- * time the 4K buffer is depleted, so the entropy rises to 256 bits
- * entropy per 4K or 0.5 bit entropy per arch_get_random_long(). With
- * setting this parameter to negative values all this effort is
- * disabled, arch_get_random long() returns false and thus indicating
- * that the arch_get_random_long() feature is disabled at all.
- */
-
-static unsigned long rndlong_buf[512];
-static DEFINE_SPINLOCK(rndlong_lock);
-static int rndlong_buf_index;
-
-static int rndlong_drbg_reseed = 256;
-module_param_named(rndlong_drbg_reseed, rndlong_drbg_reseed, int, 0600);
-MODULE_PARM_DESC(rndlong_drbg_reseed, "s390 arch_get_random_long() drbg reseed");
-
-static inline void refill_rndlong_buf(void)
-{
- static u8 prng_ws[240];
- static int drbg_counter;
-
- if (--drbg_counter < 0) {
- /* need to re-seed the drbg */
- u8 seed[32];
-
- /* fetch seed from trng */
- cpacf_trng(NULL, 0, seed, sizeof(seed));
- /* seed drbg */
- memset(prng_ws, 0, sizeof(prng_ws));
- cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED,
- &prng_ws, NULL, 0, seed, sizeof(seed));
- /* re-init counter for drbg */
- drbg_counter = rndlong_drbg_reseed;
- }
-
- /* fill the arch_get_random_long buffer from drbg */
- cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, &prng_ws,
- (u8 *) rndlong_buf, sizeof(rndlong_buf),
- NULL, 0);
-}
-
-bool s390_arch_get_random_long(unsigned long *v)
-{
- bool rc = false;
- unsigned long flags;
-
- /* arch_get_random_long() disabled ? */
- if (rndlong_drbg_reseed < 0)
- return false;
-
- /* try to lock the random long lock */
- if (!spin_trylock_irqsave(&rndlong_lock, flags))
- return false;
-
- if (--rndlong_buf_index >= 0) {
- /* deliver next long value from the buffer */
- *v = rndlong_buf[rndlong_buf_index];
- rc = true;
- goto out;
- }
-
- /* buffer is depleted and needs refill */
- if (in_interrupt()) {
- /* delay refill in interrupt context to next caller */
- rndlong_buf_index = 0;
- goto out;
- }
-
- /* refill random long buffer */
- refill_rndlong_buf();
- rndlong_buf_index = ARRAY_SIZE(rndlong_buf);
-
- /* and provide one random long */
- *v = rndlong_buf[--rndlong_buf_index];
- rc = true;
-
-out:
- spin_unlock_irqrestore(&rndlong_lock, flags);
- return rc;
-}
-EXPORT_SYMBOL(s390_arch_get_random_long);
-
-static int __init s390_arch_random_init(void)
-{
- /* all the needed PRNO subfunctions available ? */
- if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG) &&
- cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN)) {
-
- /* alloc arch random working buffer */
- arch_rng_buf = kmalloc(ARCH_RNG_BUF_SIZE, GFP_KERNEL);
- if (!arch_rng_buf)
- return -ENOMEM;
-
- /* kick worker queue job to fill the random buffer */
- queue_delayed_work(system_long_wq,
- &arch_rng_work, ARCH_REFILL_TICKS);
-
- /* enable arch random to the outside world */
- static_branch_enable(&s390_arch_random_available);
- }
-
- return 0;
-}
-arch_initcall(s390_arch_random_init);
/*
* Kernel interface for the s390 arch_random_* functions
*
- * Copyright IBM Corp. 2017, 2020
+ * Copyright IBM Corp. 2017, 2022
*
* Author: Harald Freudenberger <freude@de.ibm.com>
*
#ifdef CONFIG_ARCH_RANDOM
#include <linux/static_key.h>
+#include <linux/preempt.h>
#include <linux/atomic.h>
+#include <asm/cpacf.h>
DECLARE_STATIC_KEY_FALSE(s390_arch_random_available);
extern atomic64_t s390_arch_random_counter;
-bool s390_arch_get_random_long(unsigned long *v);
-bool s390_arch_random_generate(u8 *buf, unsigned int nbytes);
-
static inline bool __must_check arch_get_random_long(unsigned long *v)
{
- if (static_branch_likely(&s390_arch_random_available))
- return s390_arch_get_random_long(v);
return false;
}
static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
{
- if (static_branch_likely(&s390_arch_random_available)) {
- return s390_arch_random_generate((u8 *)v, sizeof(*v));
+ if (static_branch_likely(&s390_arch_random_available) &&
+ in_task()) {
+ cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v));
+ atomic64_add(sizeof(*v), &s390_arch_random_counter);
+ return true;
}
return false;
}
static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
{
- if (static_branch_likely(&s390_arch_random_available)) {
- return s390_arch_random_generate((u8 *)v, sizeof(*v));
+ if (static_branch_likely(&s390_arch_random_available) &&
+ in_task()) {
+ cpacf_trng(NULL, 0, (u8 *)v, sizeof(*v));
+ atomic64_add(sizeof(*v), &s390_arch_random_counter);
+ return true;
}
return false;
}
#ifndef _ASM_S390_NOSPEC_ASM_H
#define _ASM_S390_NOSPEC_ASM_H
-#include <asm/alternative-asm.h>
-#include <asm/asm-offsets.h>
#include <asm/dwarf.h>
#ifdef __ASSEMBLY__
* @sb_count: number of storage blocks
* @sba: storage block element addresses
* @dcount: size of storage block elements
- * @user0: user defineable value
- * @res4: reserved paramater
- * @user1: user defineable value
+ * @user0: user definable value
+ * @res4: reserved parameter
+ * @user1: user definable value
*/
struct qaob {
u64 res0[6];
static inline bool __tlb_remove_page_size(struct mmu_gather *tlb,
struct page *page, int page_size);
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-
#define tlb_flush tlb_flush
#define pte_free_tlb pte_free_tlb
#define pmd_free_tlb pmd_free_tlb
unsigned long src;
int rc;
+ if (!(iter_is_iovec(iter) || iov_iter_is_kvec(iter)))
+ return -EINVAL;
+ /* Multi-segment iterators are not supported */
+ if (iter->nr_segs > 1)
+ return -EINVAL;
if (!csize)
return 0;
src = pfn_to_phys(pfn) + offset;
rc = copy_oldmem_user(iter->iov->iov_base, src, csize);
else
rc = copy_oldmem_kernel(iter->kvec->iov_base, src, csize);
- return rc;
+ if (rc < 0)
+ return rc;
+ iov_iter_advance(iter, csize);
+ return csize;
}
/*
return err;
}
+/* Events CPU_CYLCES and INSTRUCTIONS can be submitted with two different
+ * attribute::type values:
+ * - PERF_TYPE_HARDWARE:
+ * - pmu->type:
+ * Handle both type of invocations identical. They address the same hardware.
+ * The result is different when event modifiers exclude_kernel and/or
+ * exclude_user are also set.
+ */
+static int cpumf_pmu_event_type(struct perf_event *event)
+{
+ u64 ev = event->attr.config;
+
+ if (cpumf_generic_events_basic[PERF_COUNT_HW_CPU_CYCLES] == ev ||
+ cpumf_generic_events_basic[PERF_COUNT_HW_INSTRUCTIONS] == ev ||
+ cpumf_generic_events_user[PERF_COUNT_HW_CPU_CYCLES] == ev ||
+ cpumf_generic_events_user[PERF_COUNT_HW_INSTRUCTIONS] == ev)
+ return PERF_TYPE_HARDWARE;
+ return PERF_TYPE_RAW;
+}
+
static int cpumf_pmu_event_init(struct perf_event *event)
{
unsigned int type = event->attr.type;
err = __hw_perf_event_init(event, type);
else if (event->pmu->type == type)
/* Registered as unknown PMU */
- err = __hw_perf_event_init(event, PERF_TYPE_RAW);
+ err = __hw_perf_event_init(event, cpumf_pmu_event_type(event));
else
return -ENOENT;
/* PAI crypto PMU registered as PERF_TYPE_RAW, check event type */
if (a->type != PERF_TYPE_RAW && event->pmu->type != a->type)
return -ENOENT;
- /* PAI crypto event must be valid */
- if (a->config > PAI_CRYPTO_BASE + paicrypt_cnt)
+ /* PAI crypto event must be in valid range */
+ if (a->config < PAI_CRYPTO_BASE ||
+ a->config > PAI_CRYPTO_BASE + paicrypt_cnt)
return -EINVAL;
/* Allow only CPU wide operation, no process context for now. */
if (event->hw.target || event->cpu == -1)
if (rc)
return rc;
+ /* Event initialization sets last_tag to 0. When later on the events
+ * are deleted and re-added, do not reset the event count value to zero.
+ * Events are added, deleted and re-added when 2 or more events
+ * are active at the same time.
+ */
+ event->hw.last_tag = 0;
cpump->event = event;
event->destroy = paicrypt_event_destroy;
{
u64 sum;
- sum = paicrypt_getall(event); /* Get current value */
- local64_set(&event->hw.prev_count, sum);
- local64_set(&event->count, 0);
+ if (!event->hw.last_tag) {
+ event->hw.last_tag = 1;
+ sum = paicrypt_getall(event); /* Get current value */
+ local64_set(&event->count, 0);
+ local64_set(&event->hw.prev_count, sum);
+ }
}
static int paicrypt_add(struct perf_event *event, int flags)
if (stsi(vmms, 3, 2, 2) == 0 && vmms->count)
add_device_randomness(&vmms->vm, sizeof(vmms->vm[0]) * vmms->count);
memblock_free(vmms, PAGE_SIZE);
+
+#ifdef CONFIG_ARCH_RANDOM
+ if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
+ static_branch_enable(&s390_arch_random_available);
+#endif
}
/*
obj-y += mem.o xor.o
lib-$(CONFIG_KPROBES) += probes.o
lib-$(CONFIG_UPROBES) += probes.o
-obj-$(CONFIG_EXPOLINE_EXTERN) += expoline.o
obj-$(CONFIG_S390_KPROBES_SANITY_TEST) += test_kprobes_s390.o
test_kprobes_s390-objs += test_kprobes_asm.o test_kprobes.o
obj-$(CONFIG_S390_MODULES_SANITY_TEST_HELPERS) += test_modules_helpers.o
lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
+
+obj-$(CONFIG_EXPOLINE_EXTERN) += expoline/
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#include <asm/nospec-insn.h>
-#include <linux/linkage.h>
-
-.macro GEN_ALL_BR_THUNK_EXTERN
- .irp r1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
- GEN_BR_THUNK_EXTERN %r\r1
- .endr
-.endm
-
-GEN_ALL_BR_THUNK_EXTERN
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y += expoline.o
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <asm/nospec-insn.h>
+#include <linux/linkage.h>
+
+.macro GEN_ALL_BR_THUNK_EXTERN
+ .irp r1,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+ GEN_BR_THUNK_EXTERN %r\r1
+ .endr
+.endm
+
+GEN_ALL_BR_THUNK_EXTERN
#include <linux/cma.h>
#include <linux/gfp.h>
#include <linux/dma-direct.h>
+#include <linux/platform-feature.h>
#include <asm/processor.h>
#include <linux/uaccess.h>
#include <asm/pgalloc.h>
return is_prot_virt_guest();
}
-#ifdef CONFIG_ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS
-
-int arch_has_restricted_virtio_memory_access(void)
-{
- return is_prot_virt_guest();
-}
-EXPORT_SYMBOL(arch_has_restricted_virtio_memory_access);
-
-#endif
-
/* protected virtualization */
static void pv_init(void)
{
if (!is_prot_virt_guest())
return;
+ platform_set(PLATFORM_VIRTIO_RESTRICTED_MEM_ACCESS);
+
/* make sure bounce buffers are shared */
swiotlb_init(true, SWIOTLB_FORCE | SWIOTLB_VERBOSE);
swiotlb_update_mem_attributes();
$(obj)/purgatory.ro: $(obj)/purgatory $(obj)/purgatory.chk FORCE
$(call if_changed,objcopy)
-$(obj)/kexec-purgatory.o: $(obj)/kexec-purgatory.S $(obj)/purgatory.ro FORCE
- $(call if_changed_rule,as_o_S)
+$(obj)/kexec-purgatory.o: $(obj)/purgatory.ro
-obj-$(CONFIG_ARCH_HAS_KEXEC_PURGATORY) += kexec-purgatory.o
+obj-y += kexec-purgatory.o
#endif /* CONFIG_HAVE_IOREMAP_PROT */
#else /* CONFIG_MMU */
-#define iounmap(addr) do { } while (0)
-#define ioremap(offset, size) ((void __iomem *)(unsigned long)(offset))
+static inline void __iomem *ioremap(phys_addr_t offset, size_t size)
+{
+ return (void __iomem *)(unsigned long)offset;
+}
+
+static inline void iounmap(volatile void __iomem *addr) { }
#endif /* CONFIG_MMU */
#define ioremap_uc ioremap
select HAVE_KRETPROBES
select HAVE_KPROBES
select MMU_GATHER_RCU_TABLE_FREE if SMP
+ select MMU_GATHER_MERGE_VMAS
+ select MMU_GATHER_NO_FLUSH_CACHE
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
void __flush_tlb_pending(unsigned long, unsigned long, unsigned long *);
void flush_tlb_pending(void);
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
#define tlb_flush(tlb) flush_tlb_pending()
/*
dev->cmd_vq = vqs[0];
dev->irq_vq = vqs[1];
+ virtio_device_ready(dev->vdev);
+
for (i = 0; i < NUM_IRQ_MSGS; i++) {
void *msg = kzalloc(MAX_IRQ_MSG_SIZE, GFP_KERNEL);
dev->irq = irq_alloc_desc(numa_node_id());
if (dev->irq < 0) {
err = dev->irq;
- goto error;
+ goto err_reset;
}
um_pci_devices[free].dev = dev;
vdev->priv = dev;
um_pci_rescan();
return 0;
+err_reset:
+ virtio_reset_device(vdev);
+ vdev->config->del_vqs(vdev);
error:
mutex_unlock(&um_pci_mtx);
kfree(dev);
* casting is the right thing, but 32-bit UML can't have 64-bit virtual
* addresses
*/
-#define __pa(virt) to_phys((void *) (unsigned long) (virt))
-#define __va(phys) to_virt((unsigned long) (phys))
+#define __pa(virt) uml_to_phys((void *) (unsigned long) (virt))
+#define __va(phys) uml_to_virt((unsigned long) (phys))
#define phys_to_pfn(p) ((p) >> PAGE_SHIFT)
#define pfn_to_phys(pfn) PFN_PHYS(pfn)
extern int phys_mapping(unsigned long phys, unsigned long long *offset_out);
extern unsigned long uml_physmem;
-static inline unsigned long to_phys(void *virt)
+static inline unsigned long uml_to_phys(void *virt)
{
return(((unsigned long) virt) - uml_physmem);
}
-static inline void *to_virt(unsigned long phys)
+static inline void *uml_to_virt(unsigned long phys)
{
return((void *) uml_physmem + phys);
}
{
}
+void apply_returns(s32 *start, s32 *end)
+{
+}
+
void apply_alternatives(struct alt_instr *start, struct alt_instr *end)
{
}
signal(SIGTERM, SIG_DFL);
signal(SIGWINCH, SIG_IGN);
- fd = phys_mapping(to_phys(__syscall_stub_start), &offset);
+ fd = phys_mapping(uml_to_phys(__syscall_stub_start), &offset);
addr = mmap64((void *) STUB_CODE, UM_KERN_PAGE_SIZE,
PROT_EXEC, MAP_FIXED | MAP_PRIVATE, fd, offset);
if (addr == MAP_FAILED) {
}
if (stack != NULL) {
- fd = phys_mapping(to_phys(stack), &offset);
+ fd = phys_mapping(uml_to_phys(stack), &offset);
addr = mmap((void *) STUB_DATA,
UM_KERN_PAGE_SIZE, PROT_READ | PROT_WRITE,
MAP_FIXED | MAP_SHARED, fd, offset);
struct stub_data *data = (struct stub_data *) current_stack;
struct stub_data *child_data = (struct stub_data *) new_stack;
unsigned long long new_offset;
- int new_fd = phys_mapping(to_phys((void *)new_stack), &new_offset);
+ int new_fd = phys_mapping(uml_to_phys((void *)new_stack), &new_offset);
/*
* prepare offset and fd of child's stack as argument for parent's
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select MMU_GATHER_RCU_TABLE_FREE if PARAVIRT
+ select MMU_GATHER_MERGE_VMAS
select HAVE_POSIX_CPU_TIMERS_TASK_WORK
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE if UNWINDER_ORC || STACK_VALIDATION
def_bool y
depends on X86_GOLDFISH
-config RETPOLINE
- bool "Avoid speculative indirect branches in kernel"
- select OBJTOOL if HAVE_OBJTOOL
- default y
- help
- Compile kernel with the retpoline compiler options to guard against
- kernel-to-user data leaks by avoiding speculative indirect
- branches. Requires a compiler with -mindirect-branch=thunk-extern
- support for full protection. The kernel may run slower.
-
-config CC_HAS_SLS
- def_bool $(cc-option,-mharden-sls=all)
-
-config SLS
- bool "Mitigate Straight-Line-Speculation"
- depends on CC_HAS_SLS && X86_64
- select OBJTOOL if HAVE_OBJTOOL
- default n
- help
- Compile the kernel with straight-line-speculation options to guard
- against straight line speculation. The kernel image might be slightly
- larger.
-
config X86_CPU_RESCTRL
bool "x86 CPU resource control support"
depends on X86 && (CPU_SUP_INTEL || CPU_SUP_AMD)
config X86_MEM_ENCRYPT
select ARCH_HAS_FORCE_DMA_UNENCRYPTED
select DYNAMIC_PHYSICAL_MASK
- select ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS
def_bool n
config AMD_MEM_ENCRYPT
endmenu
+config CC_HAS_SLS
+ def_bool $(cc-option,-mharden-sls=all)
+
+config CC_HAS_RETURN_THUNK
+ def_bool $(cc-option,-mfunction-return=thunk-extern)
+
+menuconfig SPECULATION_MITIGATIONS
+ bool "Mitigations for speculative execution vulnerabilities"
+ default y
+ help
+ Say Y here to enable options which enable mitigations for
+ speculative execution hardware vulnerabilities.
+
+ If you say N, all mitigations will be disabled. You really
+ should know what you are doing to say so.
+
+if SPECULATION_MITIGATIONS
+
+config PAGE_TABLE_ISOLATION
+ bool "Remove the kernel mapping in user mode"
+ default y
+ depends on (X86_64 || X86_PAE)
+ help
+ This feature reduces the number of hardware side channels by
+ ensuring that the majority of kernel addresses are not mapped
+ into userspace.
+
+ See Documentation/x86/pti.rst for more details.
+
+config RETPOLINE
+ bool "Avoid speculative indirect branches in kernel"
+ select OBJTOOL if HAVE_OBJTOOL
+ default y
+ help
+ Compile kernel with the retpoline compiler options to guard against
+ kernel-to-user data leaks by avoiding speculative indirect
+ branches. Requires a compiler with -mindirect-branch=thunk-extern
+ support for full protection. The kernel may run slower.
+
+config RETHUNK
+ bool "Enable return-thunks"
+ depends on RETPOLINE && CC_HAS_RETURN_THUNK
+ select OBJTOOL if HAVE_OBJTOOL
+ default y if X86_64
+ help
+ Compile the kernel with the return-thunks compiler option to guard
+ against kernel-to-user data leaks by avoiding return speculation.
+ Requires a compiler with -mfunction-return=thunk-extern
+ support for full protection. The kernel may run slower.
+
+config CPU_UNRET_ENTRY
+ bool "Enable UNRET on kernel entry"
+ depends on CPU_SUP_AMD && RETHUNK && X86_64
+ default y
+ help
+ Compile the kernel with support for the retbleed=unret mitigation.
+
+config CPU_IBPB_ENTRY
+ bool "Enable IBPB on kernel entry"
+ depends on CPU_SUP_AMD && X86_64
+ default y
+ help
+ Compile the kernel with support for the retbleed=ibpb mitigation.
+
+config CPU_IBRS_ENTRY
+ bool "Enable IBRS on kernel entry"
+ depends on CPU_SUP_INTEL && X86_64
+ default y
+ help
+ Compile the kernel with support for the spectre_v2=ibrs mitigation.
+ This mitigates both spectre_v2 and retbleed at great cost to
+ performance.
+
+config SLS
+ bool "Mitigate Straight-Line-Speculation"
+ depends on CC_HAS_SLS && X86_64
+ select OBJTOOL if HAVE_OBJTOOL
+ default n
+ help
+ Compile the kernel with straight-line-speculation options to guard
+ against straight line speculation. The kernel image might be slightly
+ larger.
+
+endif
+
config ARCH_HAS_ADD_PAGES
def_bool y
depends on ARCH_ENABLE_MEMORY_HOTPLUG
RETPOLINE_CFLAGS := -mretpoline-external-thunk
RETPOLINE_VDSO_CFLAGS := -mretpoline
endif
+
+ifdef CONFIG_RETHUNK
+RETHUNK_CFLAGS := -mfunction-return=thunk-extern
+RETPOLINE_CFLAGS += $(RETHUNK_CFLAGS)
+endif
+
+export RETHUNK_CFLAGS
export RETPOLINE_CFLAGS
export RETPOLINE_VDSO_CFLAGS
void initialize_identity_maps(void *rmode)
{
unsigned long cmdline;
+ struct setup_data *sd;
/* Exclude the encryption mask from __PHYSICAL_MASK */
physical_mask &= ~sme_me_mask;
cmdline = get_cmd_line_ptr();
kernel_add_identity_map(cmdline, cmdline + COMMAND_LINE_SIZE);
+ /*
+ * Also map the setup_data entries passed via boot_params in case they
+ * need to be accessed by uncompressed kernel via the identity mapping.
+ */
+ sd = (struct setup_data *)boot_params->hdr.setup_data;
+ while (sd) {
+ unsigned long sd_addr = (unsigned long)sd;
+
+ kernel_add_identity_map(sd_addr, sd_addr + sizeof(*sd) + sd->len);
+ sd = (struct setup_data *)sd->next;
+ }
+
sev_prep_identity_maps(top_level_pgt);
/* Load the new page-table. */
return BIT_ULL(gpa_width - 1);
}
+/*
+ * The TDX module spec states that #VE may be injected for a limited set of
+ * reasons:
+ *
+ * - Emulation of the architectural #VE injection on EPT violation;
+ *
+ * - As a result of guest TD execution of a disallowed instruction,
+ * a disallowed MSR access, or CPUID virtualization;
+ *
+ * - A notification to the guest TD about anomalous behavior;
+ *
+ * The last one is opt-in and is not used by the kernel.
+ *
+ * The Intel Software Developer's Manual describes cases when instruction
+ * length field can be used in section "Information for VM Exits Due to
+ * Instruction Execution".
+ *
+ * For TDX, it ultimately means GET_VEINFO provides reliable instruction length
+ * information if #VE occurred due to instruction execution, but not for EPT
+ * violations.
+ */
+static int ve_instr_len(struct ve_info *ve)
+{
+ switch (ve->exit_reason) {
+ case EXIT_REASON_HLT:
+ case EXIT_REASON_MSR_READ:
+ case EXIT_REASON_MSR_WRITE:
+ case EXIT_REASON_CPUID:
+ case EXIT_REASON_IO_INSTRUCTION:
+ /* It is safe to use ve->instr_len for #VE due instructions */
+ return ve->instr_len;
+ case EXIT_REASON_EPT_VIOLATION:
+ /*
+ * For EPT violations, ve->insn_len is not defined. For those,
+ * the kernel must decode instructions manually and should not
+ * be using this function.
+ */
+ WARN_ONCE(1, "ve->instr_len is not defined for EPT violations");
+ return 0;
+ default:
+ WARN_ONCE(1, "Unexpected #VE-type: %lld\n", ve->exit_reason);
+ return ve->instr_len;
+ }
+}
+
static u64 __cpuidle __halt(const bool irq_disabled, const bool do_sti)
{
struct tdx_hypercall_args args = {
return __tdx_hypercall(&args, do_sti ? TDX_HCALL_ISSUE_STI : 0);
}
-static bool handle_halt(void)
+static int handle_halt(struct ve_info *ve)
{
/*
* Since non safe halt is mainly used in CPU offlining
const bool do_sti = false;
if (__halt(irq_disabled, do_sti))
- return false;
+ return -EIO;
- return true;
+ return ve_instr_len(ve);
}
void __cpuidle tdx_safe_halt(void)
WARN_ONCE(1, "HLT instruction emulation failed\n");
}
-static bool read_msr(struct pt_regs *regs)
+static int read_msr(struct pt_regs *regs, struct ve_info *ve)
{
struct tdx_hypercall_args args = {
.r10 = TDX_HYPERCALL_STANDARD,
* (GHCI), section titled "TDG.VP.VMCALL<Instruction.RDMSR>".
*/
if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
- return false;
+ return -EIO;
regs->ax = lower_32_bits(args.r11);
regs->dx = upper_32_bits(args.r11);
- return true;
+ return ve_instr_len(ve);
}
-static bool write_msr(struct pt_regs *regs)
+static int write_msr(struct pt_regs *regs, struct ve_info *ve)
{
struct tdx_hypercall_args args = {
.r10 = TDX_HYPERCALL_STANDARD,
* can be found in TDX Guest-Host-Communication Interface
* (GHCI) section titled "TDG.VP.VMCALL<Instruction.WRMSR>".
*/
- return !__tdx_hypercall(&args, 0);
+ if (__tdx_hypercall(&args, 0))
+ return -EIO;
+
+ return ve_instr_len(ve);
}
-static bool handle_cpuid(struct pt_regs *regs)
+static int handle_cpuid(struct pt_regs *regs, struct ve_info *ve)
{
struct tdx_hypercall_args args = {
.r10 = TDX_HYPERCALL_STANDARD,
*/
if (regs->ax < 0x40000000 || regs->ax > 0x4FFFFFFF) {
regs->ax = regs->bx = regs->cx = regs->dx = 0;
- return true;
+ return ve_instr_len(ve);
}
/*
* (GHCI), section titled "VP.VMCALL<Instruction.CPUID>".
*/
if (__tdx_hypercall(&args, TDX_HCALL_HAS_OUTPUT))
- return false;
+ return -EIO;
/*
* As per TDX GHCI CPUID ABI, r12-r15 registers contain contents of
regs->cx = args.r14;
regs->dx = args.r15;
- return true;
+ return ve_instr_len(ve);
}
static bool mmio_read(int size, unsigned long addr, unsigned long *val)
EPT_WRITE, addr, val);
}
-static bool handle_mmio(struct pt_regs *regs, struct ve_info *ve)
+static int handle_mmio(struct pt_regs *regs, struct ve_info *ve)
{
+ unsigned long *reg, val, vaddr;
char buffer[MAX_INSN_SIZE];
- unsigned long *reg, val;
struct insn insn = {};
enum mmio_type mmio;
int size, extend_size;
/* Only in-kernel MMIO is supported */
if (WARN_ON_ONCE(user_mode(regs)))
- return false;
+ return -EFAULT;
if (copy_from_kernel_nofault(buffer, (void *)regs->ip, MAX_INSN_SIZE))
- return false;
+ return -EFAULT;
if (insn_decode(&insn, buffer, MAX_INSN_SIZE, INSN_MODE_64))
- return false;
+ return -EINVAL;
mmio = insn_decode_mmio(&insn, &size);
if (WARN_ON_ONCE(mmio == MMIO_DECODE_FAILED))
- return false;
+ return -EINVAL;
if (mmio != MMIO_WRITE_IMM && mmio != MMIO_MOVS) {
reg = insn_get_modrm_reg_ptr(&insn, regs);
if (!reg)
- return false;
+ return -EINVAL;
}
- ve->instr_len = insn.length;
+ /*
+ * Reject EPT violation #VEs that split pages.
+ *
+ * MMIO accesses are supposed to be naturally aligned and therefore
+ * never cross page boundaries. Seeing split page accesses indicates
+ * a bug or a load_unaligned_zeropad() that stepped into an MMIO page.
+ *
+ * load_unaligned_zeropad() will recover using exception fixups.
+ */
+ vaddr = (unsigned long)insn_get_addr_ref(&insn, regs);
+ if (vaddr / PAGE_SIZE != (vaddr + size - 1) / PAGE_SIZE)
+ return -EFAULT;
/* Handle writes first */
switch (mmio) {
case MMIO_WRITE:
memcpy(&val, reg, size);
- return mmio_write(size, ve->gpa, val);
+ if (!mmio_write(size, ve->gpa, val))
+ return -EIO;
+ return insn.length;
case MMIO_WRITE_IMM:
val = insn.immediate.value;
- return mmio_write(size, ve->gpa, val);
+ if (!mmio_write(size, ve->gpa, val))
+ return -EIO;
+ return insn.length;
case MMIO_READ:
case MMIO_READ_ZERO_EXTEND:
case MMIO_READ_SIGN_EXTEND:
* decoded or handled properly. It was likely not using io.h
* helpers or accessed MMIO accidentally.
*/
- return false;
+ return -EINVAL;
default:
WARN_ONCE(1, "Unknown insn_decode_mmio() decode value?");
- return false;
+ return -EINVAL;
}
/* Handle reads */
if (!mmio_read(size, ve->gpa, &val))
- return false;
+ return -EIO;
switch (mmio) {
case MMIO_READ:
default:
/* All other cases has to be covered with the first switch() */
WARN_ON_ONCE(1);
- return false;
+ return -EINVAL;
}
if (extend_size)
memset(reg, extend_val, extend_size);
memcpy(reg, &val, size);
- return true;
+ return insn.length;
}
static bool handle_in(struct pt_regs *regs, int size, int port)
*
* Return True on success or False on failure.
*/
-static bool handle_io(struct pt_regs *regs, u32 exit_qual)
+static int handle_io(struct pt_regs *regs, struct ve_info *ve)
{
+ u32 exit_qual = ve->exit_qual;
int size, port;
- bool in;
+ bool in, ret;
if (VE_IS_IO_STRING(exit_qual))
- return false;
+ return -EIO;
in = VE_IS_IO_IN(exit_qual);
size = VE_GET_IO_SIZE(exit_qual);
if (in)
- return handle_in(regs, size, port);
+ ret = handle_in(regs, size, port);
else
- return handle_out(regs, size, port);
+ ret = handle_out(regs, size, port);
+ if (!ret)
+ return -EIO;
+
+ return ve_instr_len(ve);
}
/*
__init bool tdx_early_handle_ve(struct pt_regs *regs)
{
struct ve_info ve;
+ int insn_len;
tdx_get_ve_info(&ve);
if (ve.exit_reason != EXIT_REASON_IO_INSTRUCTION)
return false;
- return handle_io(regs, ve.exit_qual);
+ insn_len = handle_io(regs, &ve);
+ if (insn_len < 0)
+ return false;
+
+ regs->ip += insn_len;
+ return true;
}
void tdx_get_ve_info(struct ve_info *ve)
ve->instr_info = upper_32_bits(out.r10);
}
-/* Handle the user initiated #VE */
-static bool virt_exception_user(struct pt_regs *regs, struct ve_info *ve)
+/*
+ * Handle the user initiated #VE.
+ *
+ * On success, returns the number of bytes RIP should be incremented (>=0)
+ * or -errno on error.
+ */
+static int virt_exception_user(struct pt_regs *regs, struct ve_info *ve)
{
switch (ve->exit_reason) {
case EXIT_REASON_CPUID:
- return handle_cpuid(regs);
+ return handle_cpuid(regs, ve);
default:
pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
- return false;
+ return -EIO;
}
}
-/* Handle the kernel #VE */
-static bool virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
+/*
+ * Handle the kernel #VE.
+ *
+ * On success, returns the number of bytes RIP should be incremented (>=0)
+ * or -errno on error.
+ */
+static int virt_exception_kernel(struct pt_regs *regs, struct ve_info *ve)
{
switch (ve->exit_reason) {
case EXIT_REASON_HLT:
- return handle_halt();
+ return handle_halt(ve);
case EXIT_REASON_MSR_READ:
- return read_msr(regs);
+ return read_msr(regs, ve);
case EXIT_REASON_MSR_WRITE:
- return write_msr(regs);
+ return write_msr(regs, ve);
case EXIT_REASON_CPUID:
- return handle_cpuid(regs);
+ return handle_cpuid(regs, ve);
case EXIT_REASON_EPT_VIOLATION:
return handle_mmio(regs, ve);
case EXIT_REASON_IO_INSTRUCTION:
- return handle_io(regs, ve->exit_qual);
+ return handle_io(regs, ve);
default:
pr_warn("Unexpected #VE: %lld\n", ve->exit_reason);
- return false;
+ return -EIO;
}
}
bool tdx_handle_virt_exception(struct pt_regs *regs, struct ve_info *ve)
{
- bool ret;
+ int insn_len;
if (user_mode(regs))
- ret = virt_exception_user(regs, ve);
+ insn_len = virt_exception_user(regs, ve);
else
- ret = virt_exception_kernel(regs, ve);
+ insn_len = virt_exception_kernel(regs, ve);
+ if (insn_len < 0)
+ return false;
/* After successful #VE handling, move the IP */
- if (ret)
- regs->ip += ve->instr_len;
+ regs->ip += insn_len;
- return ret;
+ return true;
}
static bool tdx_tlb_flush_required(bool private)
CFLAGS_common.o += -fno-stack-protector
-obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
+obj-y := entry.o entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o
obj-y += common.o
obj-y += vdso/
#include <asm/asm-offsets.h>
#include <asm/processor-flags.h>
#include <asm/ptrace-abi.h>
+#include <asm/msr.h>
+#include <asm/nospec-branch.h>
/*
#endif
+/*
+ * IBRS kernel mitigation for Spectre_v2.
+ *
+ * Assumes full context is established (PUSH_REGS, CR3 and GS) and it clobbers
+ * the regs it uses (AX, CX, DX). Must be called before the first RET
+ * instruction (NOTE! UNTRAIN_RET includes a RET instruction)
+ *
+ * The optional argument is used to save/restore the current value,
+ * which is used on the paranoid paths.
+ *
+ * Assumes x86_spec_ctrl_{base,current} to have SPEC_CTRL_IBRS set.
+ */
+.macro IBRS_ENTER save_reg
+#ifdef CONFIG_CPU_IBRS_ENTRY
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
+ movl $MSR_IA32_SPEC_CTRL, %ecx
+
+.ifnb \save_reg
+ rdmsr
+ shl $32, %rdx
+ or %rdx, %rax
+ mov %rax, \save_reg
+ test $SPEC_CTRL_IBRS, %eax
+ jz .Ldo_wrmsr_\@
+ lfence
+ jmp .Lend_\@
+.Ldo_wrmsr_\@:
+.endif
+
+ movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx
+ movl %edx, %eax
+ shr $32, %rdx
+ wrmsr
+.Lend_\@:
+#endif
+.endm
+
+/*
+ * Similar to IBRS_ENTER, requires KERNEL GS,CR3 and clobbers (AX, CX, DX)
+ * regs. Must be called after the last RET.
+ */
+.macro IBRS_EXIT save_reg
+#ifdef CONFIG_CPU_IBRS_ENTRY
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_KERNEL_IBRS
+ movl $MSR_IA32_SPEC_CTRL, %ecx
+
+.ifnb \save_reg
+ mov \save_reg, %rdx
+.else
+ movq PER_CPU_VAR(x86_spec_ctrl_current), %rdx
+ andl $(~SPEC_CTRL_IBRS), %edx
+.endif
+
+ movl %edx, %eax
+ shr $32, %rdx
+ wrmsr
+.Lend_\@:
+#endif
+.endm
+
/*
* Mitigate Spectre v1 for conditional swapgs code paths.
*
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Common place for both 32- and 64-bit entry routines.
+ */
+
+#include <linux/linkage.h>
+#include <asm/export.h>
+#include <asm/msr-index.h>
+
+.pushsection .noinstr.text, "ax"
+
+SYM_FUNC_START(entry_ibpb)
+ movl $MSR_IA32_PRED_CMD, %ecx
+ movl $PRED_CMD_IBPB, %eax
+ xorl %edx, %edx
+ wrmsr
+ RET
+SYM_FUNC_END(entry_ibpb)
+/* For KVM */
+EXPORT_SYMBOL_GPL(entry_ibpb);
+
+.popsection
movl %ebx, PER_CPU_VAR(__stack_chk_guard)
#endif
-#ifdef CONFIG_RETPOLINE
/*
* When switching from a shallower to a deeper call stack
* the RSB may either underflow or use entries populated
* speculative execution to prevent attack.
*/
FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
-#endif
/* Restore flags or the incoming task to restore AC state. */
popfl
*/
SYM_CODE_START(entry_SYSCALL_64)
- UNWIND_HINT_EMPTY
+ UNWIND_HINT_ENTRY
ENDBR
swapgs
movq %rsp, %rdi
/* Sign extend the lower 32bit as syscall numbers are treated as int */
movslq %eax, %rsi
+
+ /* clobbers %rax, make sure it is after saving the syscall nr */
+ IBRS_ENTER
+ UNTRAIN_RET
+
call do_syscall_64 /* returns with IRQs disabled */
/*
* perf profiles. Nothing jumps here.
*/
syscall_return_via_sysret:
+ IBRS_EXIT
POP_REGS pop_rdi=0
/*
movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset
#endif
-#ifdef CONFIG_RETPOLINE
/*
* When switching from a shallower to a deeper call stack
* the RSB may either underflow or use entries populated
* speculative execution to prevent attack.
*/
FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
-#endif
/* restore callee-saved registers */
popq %r15
#endif
.endm
-/* Save all registers in pt_regs */
-SYM_CODE_START_LOCAL(push_and_clear_regs)
+SYM_CODE_START_LOCAL(xen_error_entry)
UNWIND_HINT_FUNC
PUSH_AND_CLEAR_REGS save_ret=1
ENCODE_FRAME_POINTER 8
+ UNTRAIN_RET
RET
-SYM_CODE_END(push_and_clear_regs)
+SYM_CODE_END(xen_error_entry)
/**
* idtentry_body - Macro to emit code calling the C function
*/
.macro idtentry_body cfunc has_error_code:req
- call push_and_clear_regs
- UNWIND_HINT_REGS
-
/*
* Call error_entry() and switch to the task stack if from userspace.
*
* switch the CR3. So it can skip invoking error_entry().
*/
ALTERNATIVE "call error_entry; movq %rax, %rsp", \
- "", X86_FEATURE_XENPV
+ "call xen_error_entry", X86_FEATURE_XENPV
ENCODE_FRAME_POINTER
UNWIND_HINT_REGS
SYM_CODE_START_LOCAL(common_interrupt_return)
SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
+ IBRS_EXIT
#ifdef CONFIG_DEBUG_ENTRY
/* Assert that pt_regs indicates user mode. */
testb $3, CS(%rsp)
* 1 -> no SWAPGS on exit
*
* Y GSBASE value at entry, must be restored in paranoid_exit
+ *
+ * R14 - old CR3
+ * R15 - old SPEC_CTRL
*/
SYM_CODE_START_LOCAL(paranoid_entry)
UNWIND_HINT_FUNC
* is needed here.
*/
SAVE_AND_SET_GSBASE scratch_reg=%rax save_reg=%rbx
- RET
+ jmp .Lparanoid_gsbase_done
.Lparanoid_entry_checkgs:
/* EBX = 1 -> kernel GSBASE active, no restore required */
xorl %ebx, %ebx
swapgs
.Lparanoid_kernel_gsbase:
-
FENCE_SWAPGS_KERNEL_ENTRY
+.Lparanoid_gsbase_done:
+
+ /*
+ * Once we have CR3 and %GS setup save and set SPEC_CTRL. Just like
+ * CR3 above, keep the old value in a callee saved register.
+ */
+ IBRS_ENTER save_reg=%r15
+ UNTRAIN_RET
+
RET
SYM_CODE_END(paranoid_entry)
* 1 -> no SWAPGS on exit
*
* Y User space GSBASE, must be restored unconditionally
+ *
+ * R14 - old CR3
+ * R15 - old SPEC_CTRL
*/
SYM_CODE_START_LOCAL(paranoid_exit)
UNWIND_HINT_REGS
+
+ /*
+ * Must restore IBRS state before both CR3 and %GS since we need access
+ * to the per-CPU x86_spec_ctrl_shadow variable.
+ */
+ IBRS_EXIT save_reg=%r15
+
/*
* The order of operations is important. RESTORE_CR3 requires
* kernel GSBASE.
*/
SYM_CODE_START_LOCAL(error_entry)
UNWIND_HINT_FUNC
+
+ PUSH_AND_CLEAR_REGS save_ret=1
+ ENCODE_FRAME_POINTER 8
+
testb $3, CS+8(%rsp)
jz .Lerror_kernelspace
FENCE_SWAPGS_USER_ENTRY
/* We have user CR3. Change to kernel CR3. */
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ IBRS_ENTER
+ UNTRAIN_RET
leaq 8(%rsp), %rdi /* arg0 = pt_regs pointer */
.Lerror_entry_from_usermode_after_swapgs:
+
/* Put us onto the real thread stack. */
call sync_regs
RET
.Lerror_entry_done_lfence:
FENCE_SWAPGS_KERNEL_ENTRY
leaq 8(%rsp), %rax /* return pt_regs pointer */
+ ANNOTATE_UNRET_END
RET
.Lbstep_iret:
swapgs
FENCE_SWAPGS_USER_ENTRY
SWITCH_TO_KERNEL_CR3 scratch_reg=%rax
+ IBRS_ENTER
+ UNTRAIN_RET
/*
* Pretend that the exception came from user mode: set up pt_regs
PUSH_AND_CLEAR_REGS rdx=(%rdx)
ENCODE_FRAME_POINTER
+ IBRS_ENTER
+ UNTRAIN_RET
+
/*
* At this point we no longer need to worry about stack damage
* due to nesting -- we're on the normal thread stack and we're
movq $-1, %rsi
call exc_nmi
+ /* Always restore stashed SPEC_CTRL value (see paranoid_entry) */
+ IBRS_EXIT save_reg=%r15
+
/* Always restore stashed CR3 value (see paranoid_entry) */
RESTORE_CR3 scratch_reg=%r15 save_reg=%r14
*
* Copyright 2000-2002 Andi Kleen, SuSE Labs.
*/
-#include "calling.h"
#include <asm/asm-offsets.h>
#include <asm/current.h>
#include <asm/errno.h>
#include <asm/irqflags.h>
#include <asm/asm.h>
#include <asm/smap.h>
+#include <asm/nospec-branch.h>
#include <linux/linkage.h>
#include <linux/err.h>
+#include "calling.h"
+
.section .entry.text, "ax"
/*
* 0(%ebp) arg6
*/
SYM_CODE_START(entry_SYSENTER_compat)
- UNWIND_HINT_EMPTY
+ UNWIND_HINT_ENTRY
ENDBR
/* Interrupts are off on entry. */
swapgs
cld
+ IBRS_ENTER
+ UNTRAIN_RET
+
/*
* SYSENTER doesn't filter flags, so we need to clear NT and AC
* ourselves. To save a few cycles, we can check whether
* 0(%esp) arg6
*/
SYM_CODE_START(entry_SYSCALL_compat)
- UNWIND_HINT_EMPTY
+ UNWIND_HINT_ENTRY
ENDBR
/* Interrupts are off on entry. */
swapgs
PUSH_AND_CLEAR_REGS rcx=%rbp rax=$-ENOSYS
UNWIND_HINT_REGS
+ IBRS_ENTER
+ UNTRAIN_RET
+
movq %rsp, %rdi
call do_fast_syscall_32
/* XEN PV guests always use IRET path */
*/
STACKLEAK_ERASE
+ IBRS_EXIT
+
movq RBX(%rsp), %rbx /* pt_regs->rbx */
movq RBP(%rsp), %rbp /* pt_regs->rbp */
movq EFLAGS(%rsp), %r11 /* pt_regs->flags (in r11) */
* ebp arg6
*/
SYM_CODE_START(entry_INT80_compat)
- UNWIND_HINT_EMPTY
+ UNWIND_HINT_ENTRY
ENDBR
/*
* Interrupts are off on entry.
cld
+ IBRS_ENTER
+ UNTRAIN_RET
+
movq %rsp, %rdi
call do_int80_syscall_32
jmp swapgs_restore_regs_and_return_to_usermode
endif
$(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(RANDSTRUCT_CFLAGS) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
+$(vobjs): KBUILD_AFLAGS += -DBUILD_VDSO
#
# vDSO code runs in userspace and -pg doesn't help with profiling anyway.
mov $__NR_gettimeofday, %rax
syscall
- RET
+ ret
+ int3
.balign 1024, 0xcc
mov $__NR_time, %rax
syscall
- RET
+ ret
+ int3
.balign 1024, 0xcc
mov $__NR_getcpu, %rax
syscall
- RET
+ ret
+ int3
.balign 4096, 0xcc
};
/*
- * For formats with LBR_TSX flags (e.g. LBR_FORMAT_EIP_FLAGS2), bits 61:62 in
- * MSR_LAST_BRANCH_FROM_x are the TSX flags when TSX is supported, but when
- * TSX is not supported they have no consistent behavior:
+ * For format LBR_FORMAT_EIP_FLAGS2, bits 61:62 in MSR_LAST_BRANCH_FROM_x
+ * are the TSX flags when TSX is supported, but when TSX is not supported
+ * they have no consistent behavior:
*
* - For wrmsr(), bits 61:62 are considered part of the sign extension.
* - For HW updates (branch captures) bits 61:62 are always OFF and are not
*
* Therefore, if:
*
- * 1) LBR has TSX format
+ * 1) LBR format LBR_FORMAT_EIP_FLAGS2
* 2) CPU has no TSX support enabled
*
* ... then any value passed to wrmsr() must be sign extended to 63 bits and any
bool tsx_support = boot_cpu_has(X86_FEATURE_HLE) ||
boot_cpu_has(X86_FEATURE_RTM);
- return !tsx_support && x86_pmu.lbr_has_tsx;
+ return !tsx_support;
}
static DEFINE_STATIC_KEY_FALSE(lbr_from_quirk_key);
x86_pmu.lbr_sel_map = hsw_lbr_sel_map;
x86_get_pmu(smp_processor_id())->task_ctx_cache = create_lbr_kmem_cache(size, 0);
-
- if (lbr_from_signext_quirk_needed())
- static_branch_enable(&lbr_from_quirk_key);
}
/* skylake */
switch (x86_pmu.intel_cap.lbr_format) {
case LBR_FORMAT_EIP_FLAGS2:
x86_pmu.lbr_has_tsx = 1;
- fallthrough;
+ x86_pmu.lbr_from_flags = 1;
+ if (lbr_from_signext_quirk_needed())
+ static_branch_enable(&lbr_from_quirk_key);
+ break;
+
case LBR_FORMAT_EIP_FLAGS:
x86_pmu.lbr_from_flags = 1;
break;
#include <linux/io.h>
#include <asm/apic.h>
#include <asm/desc.h>
+#include <asm/sev.h>
#include <asm/hypervisor.h>
#include <asm/hyperv-tlfs.h>
#include <asm/mshyperv.h>
}
if (hv_isolation_type_snp()) {
+ /* Negotiate GHCB Version. */
+ if (!hv_ghcb_negotiate_protocol())
+ hv_ghcb_terminate(SEV_TERM_SET_GEN,
+ GHCB_SEV_ES_PROT_UNSUPPORTED);
+
hv_ghcb_pg = alloc_percpu(union hv_ghcb *);
if (!hv_ghcb_pg)
goto free_vp_assist_page;
} hypercall;
} __packed __aligned(HV_HYP_PAGE_SIZE);
+static u16 hv_ghcb_version __ro_after_init;
+
u64 hv_ghcb_hypercall(u64 control, void *input, void *output, u32 input_size)
{
union hv_ghcb *hv_ghcb;
return status;
}
+static inline u64 rd_ghcb_msr(void)
+{
+ return __rdmsr(MSR_AMD64_SEV_ES_GHCB);
+}
+
+static inline void wr_ghcb_msr(u64 val)
+{
+ native_wrmsrl(MSR_AMD64_SEV_ES_GHCB, val);
+}
+
+static enum es_result hv_ghcb_hv_call(struct ghcb *ghcb, u64 exit_code,
+ u64 exit_info_1, u64 exit_info_2)
+{
+ /* Fill in protocol and format specifiers */
+ ghcb->protocol_version = hv_ghcb_version;
+ ghcb->ghcb_usage = GHCB_DEFAULT_USAGE;
+
+ ghcb_set_sw_exit_code(ghcb, exit_code);
+ ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
+ ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
+
+ VMGEXIT();
+
+ if (ghcb->save.sw_exit_info_1 & GENMASK_ULL(31, 0))
+ return ES_VMM_ERROR;
+ else
+ return ES_OK;
+}
+
+void hv_ghcb_terminate(unsigned int set, unsigned int reason)
+{
+ u64 val = GHCB_MSR_TERM_REQ;
+
+ /* Tell the hypervisor what went wrong. */
+ val |= GHCB_SEV_TERM_REASON(set, reason);
+
+ /* Request Guest Termination from Hypvervisor */
+ wr_ghcb_msr(val);
+ VMGEXIT();
+
+ while (true)
+ asm volatile("hlt\n" : : : "memory");
+}
+
+bool hv_ghcb_negotiate_protocol(void)
+{
+ u64 ghcb_gpa;
+ u64 val;
+
+ /* Save ghcb page gpa. */
+ ghcb_gpa = rd_ghcb_msr();
+
+ /* Do the GHCB protocol version negotiation */
+ wr_ghcb_msr(GHCB_MSR_SEV_INFO_REQ);
+ VMGEXIT();
+ val = rd_ghcb_msr();
+
+ if (GHCB_MSR_INFO(val) != GHCB_MSR_SEV_INFO_RESP)
+ return false;
+
+ if (GHCB_MSR_PROTO_MAX(val) < GHCB_PROTOCOL_MIN ||
+ GHCB_MSR_PROTO_MIN(val) > GHCB_PROTOCOL_MAX)
+ return false;
+
+ hv_ghcb_version = min_t(size_t, GHCB_MSR_PROTO_MAX(val),
+ GHCB_PROTOCOL_MAX);
+
+ /* Write ghcb page back after negotiating protocol. */
+ wr_ghcb_msr(ghcb_gpa);
+ VMGEXIT();
+
+ return true;
+}
+
void hv_ghcb_msr_write(u64 msr, u64 value)
{
union hv_ghcb *hv_ghcb;
void **ghcb_base;
unsigned long flags;
- struct es_em_ctxt ctxt;
if (!hv_ghcb_pg)
return;
ghcb_set_rax(&hv_ghcb->ghcb, lower_32_bits(value));
ghcb_set_rdx(&hv_ghcb->ghcb, upper_32_bits(value));
- if (sev_es_ghcb_hv_call(&hv_ghcb->ghcb, false, &ctxt,
- SVM_EXIT_MSR, 1, 0))
+ if (hv_ghcb_hv_call(&hv_ghcb->ghcb, SVM_EXIT_MSR, 1, 0))
pr_warn("Fail to write msr via ghcb %llx.\n", msr);
local_irq_restore(flags);
union hv_ghcb *hv_ghcb;
void **ghcb_base;
unsigned long flags;
- struct es_em_ctxt ctxt;
/* Check size of union hv_ghcb here. */
BUILD_BUG_ON(sizeof(union hv_ghcb) != HV_HYP_PAGE_SIZE);
}
ghcb_set_rcx(&hv_ghcb->ghcb, msr);
- if (sev_es_ghcb_hv_call(&hv_ghcb->ghcb, false, &ctxt,
- SVM_EXIT_MSR, 0, 0))
+ if (hv_ghcb_hv_call(&hv_ghcb->ghcb, SVM_EXIT_MSR, 0, 0))
pr_warn("Fail to read msr via ghcb %llx.\n", msr);
else
*value = (u64)lower_32_bits(hv_ghcb->ghcb.save.rax)
extern void alternative_instructions(void);
extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
extern void apply_retpolines(s32 *start, s32 *end);
+extern void apply_returns(s32 *start, s32 *end);
extern void apply_ibt_endbr(s32 *start, s32 *end);
struct module;
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
-#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */
+#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */
+#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */
#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
+#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */
+#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */
+#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
+#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
+#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
+#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */
+#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
+#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
+#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */
#endif /* _ASM_X86_CPUFEATURES_H */
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
#endif
+#ifdef CONFIG_RETPOLINE
+# define DISABLE_RETPOLINE 0
+#else
+# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
+ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
+#endif
+
+#ifdef CONFIG_RETHUNK
+# define DISABLE_RETHUNK 0
+#else
+# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31))
+#endif
+
+#ifdef CONFIG_CPU_UNRET_ENTRY
+# define DISABLE_UNRET 0
+#else
+# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31))
+#endif
+
#ifdef CONFIG_INTEL_IOMMU_SVM
# define DISABLE_ENQCMD 0
#else
#define DISABLED_MASK8 (DISABLE_TDX_GUEST)
#define DISABLED_MASK9 (DISABLE_SGX)
#define DISABLED_MASK10 0
-#define DISABLED_MASK11 0
+#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET)
#define DISABLED_MASK12 0
#define DISABLED_MASK13 0
#define DISABLED_MASK14 0
#include <asm/e820/types.h>
-struct device;
-struct resource;
-
extern struct e820_table *e820_table;
extern struct e820_table *e820_table_kexec;
extern struct e820_table *e820_table_firmware;
extern int e820__get_entry_type(u64 start, u64 end);
-extern void remove_e820_regions(struct device *dev, struct resource *avail);
-
/*
* Returns true iff the specified range [start,end) is completely contained inside
* the ISA region.
#define __efi64_argmap_get_memory_space_descriptor(phys, desc) \
(__efi64_split(phys), (desc))
-#define __efi64_argmap_set_memory_space_descriptor(phys, size, flags) \
+#define __efi64_argmap_set_memory_space_attributes(phys, size, flags) \
(__efi64_split(phys), __efi64_split(size), __efi64_split(flags))
/*
u64 ia32_misc_enable_msr;
u64 smbase;
u64 smi_count;
+ bool at_instruction_boundary;
bool tpr_access_reporting;
bool xsaves_enabled;
bool xfd_no_write_intercept;
};
enum kvm_apicv_inhibit {
+
+ /********************************************************************/
+ /* INHIBITs that are relevant to both Intel's APICv and AMD's AVIC. */
+ /********************************************************************/
+
+ /*
+ * APIC acceleration is disabled by a module parameter
+ * and/or not supported in hardware.
+ */
APICV_INHIBIT_REASON_DISABLE,
+
+ /*
+ * APIC acceleration is inhibited because AutoEOI feature is
+ * being used by a HyperV guest.
+ */
APICV_INHIBIT_REASON_HYPERV,
+
+ /*
+ * APIC acceleration is inhibited because the userspace didn't yet
+ * enable the kernel/split irqchip.
+ */
+ APICV_INHIBIT_REASON_ABSENT,
+
+ /* APIC acceleration is inhibited because KVM_GUESTDBG_BLOCKIRQ
+ * (out of band, debug measure of blocking all interrupts on this vCPU)
+ * was enabled, to avoid AVIC/APICv bypassing it.
+ */
+ APICV_INHIBIT_REASON_BLOCKIRQ,
+
+ /*
+ * For simplicity, the APIC acceleration is inhibited
+ * first time either APIC ID or APIC base are changed by the guest
+ * from their reset values.
+ */
+ APICV_INHIBIT_REASON_APIC_ID_MODIFIED,
+ APICV_INHIBIT_REASON_APIC_BASE_MODIFIED,
+
+ /******************************************************/
+ /* INHIBITs that are relevant only to the AMD's AVIC. */
+ /******************************************************/
+
+ /*
+ * AVIC is inhibited on a vCPU because it runs a nested guest.
+ *
+ * This is needed because unlike APICv, the peers of this vCPU
+ * cannot use the doorbell mechanism to signal interrupts via AVIC when
+ * a vCPU runs nested.
+ */
APICV_INHIBIT_REASON_NESTED,
+
+ /*
+ * On SVM, the wait for the IRQ window is implemented with pending vIRQ,
+ * which cannot be injected when the AVIC is enabled, thus AVIC
+ * is inhibited while KVM waits for IRQ window.
+ */
APICV_INHIBIT_REASON_IRQWIN,
+
+ /*
+ * PIT (i8254) 're-inject' mode, relies on EOI intercept,
+ * which AVIC doesn't support for edge triggered interrupts.
+ */
APICV_INHIBIT_REASON_PIT_REINJ,
+
+ /*
+ * AVIC is inhibited because the guest has x2apic in its CPUID.
+ */
APICV_INHIBIT_REASON_X2APIC,
- APICV_INHIBIT_REASON_BLOCKIRQ,
- APICV_INHIBIT_REASON_ABSENT,
+
+ /*
+ * AVIC is disabled because SEV doesn't support it.
+ */
APICV_INHIBIT_REASON_SEV,
};
u64 nested_run;
u64 directed_yield_attempted;
u64 directed_yield_successful;
+ u64 preemption_reported;
+ u64 preemption_other;
u64 guest_mode;
};
#define __ALIGN_STR __stringify(__ALIGN)
#endif
+#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+#define RET jmp __x86_return_thunk
+#else /* CONFIG_RETPOLINE */
#ifdef CONFIG_SLS
#define RET ret; int3
#else
#define RET ret
#endif
+#endif /* CONFIG_RETPOLINE */
#else /* __ASSEMBLY__ */
+#if defined(CONFIG_RETHUNK) && !defined(__DISABLE_EXPORTS) && !defined(BUILD_VDSO)
+#define ASM_RET "jmp __x86_return_thunk\n\t"
+#else /* CONFIG_RETPOLINE */
#ifdef CONFIG_SLS
#define ASM_RET "ret; int3\n\t"
#else
#define ASM_RET "ret\n\t"
#endif
+#endif /* CONFIG_RETPOLINE */
#endif /* __ASSEMBLY__ */
#ifdef CONFIG_AMD_MEM_ENCRYPT
void hv_ghcb_msr_write(u64 msr, u64 value);
void hv_ghcb_msr_read(u64 msr, u64 *value);
+bool hv_ghcb_negotiate_protocol(void);
+void hv_ghcb_terminate(unsigned int set, unsigned int reason);
#else
static inline void hv_ghcb_msr_write(u64 msr, u64 value) {}
static inline void hv_ghcb_msr_read(u64 msr, u64 *value) {}
+static inline bool hv_ghcb_negotiate_protocol(void) { return false; }
+static inline void hv_ghcb_terminate(unsigned int set, unsigned int reason) {}
#endif
extern bool hv_isolation_type_snp(void);
#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
+#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
+#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */
#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */
+#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */
#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */
#define ARCH_CAP_SSB_NO BIT(4) /*
* Not susceptible to Speculative Store Bypass
* Not susceptible to
* TSX Async Abort (TAA) vulnerabilities.
*/
+#define ARCH_CAP_SBDR_SSDP_NO BIT(13) /*
+ * Not susceptible to SBDR and SSDP
+ * variants of Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_FBSDP_NO BIT(14) /*
+ * Not susceptible to FBSDP variant of
+ * Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_PSDP_NO BIT(15) /*
+ * Not susceptible to PSDP variant of
+ * Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_FB_CLEAR BIT(17) /*
+ * VERW clears CPU fill buffer
+ * even on MDS_NO CPUs.
+ */
+#define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /*
+ * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]
+ * bit available to control VERW
+ * behavior.
+ */
+#define ARCH_CAP_RRSBA BIT(19) /*
+ * Indicates RET may use predictors
+ * other than the RSB. With eIBRS
+ * enabled predictions in kernel mode
+ * are restricted to targets in
+ * kernel.
+ */
#define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /*
#define MSR_IA32_MCU_OPT_CTRL 0x00000123
#define RNGDS_MITG_DIS BIT(0) /* SRBDS support */
#define RTM_ALLOW BIT(1) /* TSX development mode */
+#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */
#define MSR_IA32_SYSENTER_CS 0x00000174
#define MSR_IA32_SYSENTER_ESP 0x00000175
/* Fam 17h MSRs */
#define MSR_F17H_IRPERF 0xc00000e9
+#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3
+#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1)
+
/* Fam 16h MSRs */
#define MSR_F16H_L2I_PERF_CTL 0xc0010230
#define MSR_F16H_L2I_PERF_CTR 0xc0010231
#include <asm/cpufeatures.h>
#include <asm/msr-index.h>
#include <asm/unwind_hints.h>
+#include <asm/percpu.h>
#define RETPOLINE_THUNK_SIZE 32
.popsection
.endm
+/*
+ * (ab)use RETPOLINE_SAFE on RET to annotate away 'bare' RET instructions
+ * vs RETBleed validation.
+ */
+#define ANNOTATE_UNRET_SAFE ANNOTATE_RETPOLINE_SAFE
+
+/*
+ * Abuse ANNOTATE_RETPOLINE_SAFE on a NOP to indicate UNRET_END, should
+ * eventually turn into it's own annotation.
+ */
+.macro ANNOTATE_UNRET_END
+#ifdef CONFIG_DEBUG_ENTRY
+ ANNOTATE_RETPOLINE_SAFE
+ nop
+#endif
+.endm
+
/*
* JMP_NOSPEC and CALL_NOSPEC macros can be used instead of a simple
* indirect jmp/call which may be susceptible to the Spectre variant 2
* monstrosity above, manually.
*/
.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
-#ifdef CONFIG_RETPOLINE
ALTERNATIVE "jmp .Lskip_rsb_\@", "", \ftr
__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP)
.Lskip_rsb_\@:
+.endm
+
+#ifdef CONFIG_CPU_UNRET_ENTRY
+#define CALL_ZEN_UNTRAIN_RET "call zen_untrain_ret"
+#else
+#define CALL_ZEN_UNTRAIN_RET ""
+#endif
+
+/*
+ * Mitigate RETBleed for AMD/Hygon Zen uarch. Requires KERNEL CR3 because the
+ * return thunk isn't mapped into the userspace tables (then again, AMD
+ * typically has NO_MELTDOWN).
+ *
+ * While zen_untrain_ret() doesn't clobber anything but requires stack,
+ * entry_ibpb() will clobber AX, CX, DX.
+ *
+ * As such, this must be placed after every *SWITCH_TO_KERNEL_CR3 at a point
+ * where we have a stack but before any RET instruction.
+ */
+.macro UNTRAIN_RET
+#if defined(CONFIG_CPU_UNRET_ENTRY) || defined(CONFIG_CPU_IBPB_ENTRY)
+ ANNOTATE_UNRET_END
+ ALTERNATIVE_2 "", \
+ CALL_ZEN_UNTRAIN_RET, X86_FEATURE_UNRET, \
+ "call entry_ibpb", X86_FEATURE_ENTRY_IBPB
#endif
.endm
_ASM_PTR " 999b\n\t" \
".popsection\n\t"
-#ifdef CONFIG_RETPOLINE
-
typedef u8 retpoline_thunk_t[RETPOLINE_THUNK_SIZE];
+extern retpoline_thunk_t __x86_indirect_thunk_array[];
+
+extern void __x86_return_thunk(void);
+extern void zen_untrain_ret(void);
+extern void entry_ibpb(void);
+
+#ifdef CONFIG_RETPOLINE
#define GEN(reg) \
extern retpoline_thunk_t __x86_indirect_thunk_ ## reg;
#include <asm/GEN-for-each-reg.h>
#undef GEN
-extern retpoline_thunk_t __x86_indirect_thunk_array[];
-
#ifdef CONFIG_X86_64
/*
SPECTRE_V2_EIBRS,
SPECTRE_V2_EIBRS_RETPOLINE,
SPECTRE_V2_EIBRS_LFENCE,
+ SPECTRE_V2_IBRS,
};
/* The indirect branch speculation control variants */
/* The Intel SPEC CTRL MSR base value cache */
extern u64 x86_spec_ctrl_base;
+DECLARE_PER_CPU(u64, x86_spec_ctrl_current);
+extern void write_spec_ctrl_current(u64 val, bool force);
+extern u64 spec_ctrl_current(void);
/*
* With retpoline, we must use IBRS to restrict branch prediction
*/
#define firmware_restrict_branch_speculation_start() \
do { \
- u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \
- \
preempt_disable(); \
- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, \
+ spec_ctrl_current() | SPEC_CTRL_IBRS, \
X86_FEATURE_USE_IBRS_FW); \
+ alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, \
+ X86_FEATURE_USE_IBPB_FW); \
} while (0)
#define firmware_restrict_branch_speculation_end() \
do { \
- u64 val = x86_spec_ctrl_base; \
- \
- alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \
+ alternative_msr_write(MSR_IA32_SPEC_CTRL, \
+ spec_ctrl_current(), \
X86_FEATURE_USE_IBRS_FW); \
preempt_enable(); \
} while (0)
DECLARE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
+DECLARE_STATIC_KEY_FALSE(mmio_stale_data_clear);
+
#include <asm/segment.h>
/**
/* pci-irq.c */
+struct pci_dev;
+
struct irq_info {
u8 bus, devfn; /* Bus, device and function */
struct {
# define x86_default_pci_init_irq NULL
# define x86_default_pci_fixup_irqs NULL
#endif
+
+#if defined(CONFIG_PCI) && defined(CONFIG_ACPI)
+extern bool pci_use_e820;
+#else
+#define pci_use_e820 false
+#endif
void *extend_brk(size_t size, size_t align);
/*
- * Reserve space in the brk section. The name must be unique within the file,
- * and somewhat descriptive. The size is in bytes.
+ * Reserve space in the .brk section, which is a block of memory from which the
+ * caller is allowed to allocate very early (before even memblock is available)
+ * by calling extend_brk(). All allocated memory will be eventually converted
+ * to memblock. Any leftover unallocated memory will be freed.
*
- * The allocation is done using inline asm (rather than using a section
- * attribute on a normal variable) in order to allow the use of @nobits, so
- * that it doesn't take up any space in the vmlinux file.
+ * The size is in bytes.
*/
-#define RESERVE_BRK(name, size) \
- asm(".pushsection .brk_reservation,\"aw\",@nobits\n\t" \
- ".brk." #name ":\n\t" \
- ".skip " __stringify(size) "\n\t" \
- ".size .brk." #name ", " __stringify(size) "\n\t" \
- ".popsection\n\t")
+#define RESERVE_BRK(name, size) \
+ __section(".bss..brk") __aligned(1) __used \
+ static char __brk_##name[size]
extern void probe_roms(void);
+
+void clear_bss(void);
+
#ifdef __i386__
asmlinkage void __init i386_start_kernel(void);
#endif /* __i386__ */
#endif /* _SETUP */
-#else
-#define RESERVE_BRK(name,sz) \
- .pushsection .brk_reservation,"aw",@nobits; \
-.brk.name: \
-1: .skip sz; \
- .size .brk.name,.-1b; \
+
+#else /* __ASSEMBLY */
+
+.macro __RESERVE_BRK name, size
+ .pushsection .bss..brk, "aw"
+SYM_DATA_START(__brk_\name)
+ .skip \size
+SYM_DATA_END(__brk_\name)
.popsection
+.endm
+
+#define RESERVE_BRK(name, size) __RESERVE_BRK name, size
+
#endif /* __ASSEMBLY__ */
+
#endif /* _ASM_X86_SETUP_H */
struct real_mode_header;
enum stack_type;
-struct ghcb;
/* Early IDT entry points for #VC handler */
extern void vc_no_ghcb(void);
__sev_es_nmi_complete();
}
extern int __init sev_es_efi_map_ghcbs(pgd_t *pgd);
-extern enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
- bool set_ghcb_msr,
- struct es_em_ctxt *ctxt,
- u64 exit_code, u64 exit_info_1,
- u64 exit_info_2);
+
static inline int rmpadjust(unsigned long vaddr, bool rmp_psize, unsigned long attrs)
{
int rc;
* relative displacement across sections.
*/
+/*
+ * The trampoline is 8 bytes and of the general form:
+ *
+ * jmp.d32 \func
+ * ud1 %esp, %ecx
+ *
+ * That trailing #UD provides both a speculation stop and serves as a unique
+ * 3 byte signature identifying static call trampolines. Also see tramp_ud[]
+ * and __static_call_fixup().
+ */
#define __ARCH_DEFINE_STATIC_CALL_TRAMP(name, insns) \
asm(".pushsection .static_call.text, \"ax\" \n" \
".align 4 \n" \
STATIC_CALL_TRAMP_STR(name) ": \n" \
ANNOTATE_NOENDBR \
insns " \n" \
- ".byte 0x53, 0x43, 0x54 \n" \
+ ".byte 0x0f, 0xb9, 0xcc \n" \
".type " STATIC_CALL_TRAMP_STR(name) ", @function \n" \
".size " STATIC_CALL_TRAMP_STR(name) ", . - " STATIC_CALL_TRAMP_STR(name) " \n" \
".popsection \n")
#define ARCH_DEFINE_STATIC_CALL_TRAMP(name, func) \
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, ".byte 0xe9; .long " #func " - (. + 4)")
+#ifdef CONFIG_RETHUNK
+#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
+ __ARCH_DEFINE_STATIC_CALL_TRAMP(name, "jmp __x86_return_thunk")
+#else
#define ARCH_DEFINE_STATIC_CALL_NULL_TRAMP(name) \
__ARCH_DEFINE_STATIC_CALL_TRAMP(name, "ret; int3; nop; nop; nop")
+#endif
#define ARCH_DEFINE_STATIC_CALL_RET0_TRAMP(name) \
ARCH_DEFINE_STATIC_CALL_TRAMP(name, __static_call_return0)
".long " STATIC_CALL_KEY_STR(name) " - . \n" \
".popsection \n")
+extern bool __static_call_fixup(void *tramp, u8 op, void *dest);
+
#endif /* _ASM_STATIC_CALL_H */
#ifndef _ASM_X86_TLB_H
#define _ASM_X86_TLB_H
-#define tlb_start_vma(tlb, vma) do { } while (0)
-#define tlb_end_vma(tlb, vma) do { } while (0)
-
#define tlb_flush tlb_flush
static inline void tlb_flush(struct mmu_gather *tlb);
[ptr] "+m" (*_ptr), \
[old] "+a" (__old) \
: [new] ltype (__new) \
- : "memory", "cc"); \
+ : "memory"); \
if (unlikely(__err)) \
goto label; \
if (unlikely(!success)) \
#ifdef __ASSEMBLY__
.macro UNWIND_HINT_EMPTY
- UNWIND_HINT sp_reg=ORC_REG_UNDEFINED type=UNWIND_HINT_TYPE_CALL end=1
+ UNWIND_HINT type=UNWIND_HINT_TYPE_CALL end=1
+.endm
+
+.macro UNWIND_HINT_ENTRY
+ UNWIND_HINT type=UNWIND_HINT_TYPE_ENTRY end=1
.endm
.macro UNWIND_HINT_REGS base=%rsp offset=0 indirect=0 extra=1 partial=0
UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC
.endm
+.macro UNWIND_HINT_SAVE
+ UNWIND_HINT type=UNWIND_HINT_TYPE_SAVE
+.endm
+
+.macro UNWIND_HINT_RESTORE
+ UNWIND_HINT type=UNWIND_HINT_TYPE_RESTORE
+.endm
+
#else
#define UNWIND_HINT_FUNC \
#define SETUP_INDIRECT (1<<31)
/* SETUP_INDIRECT | max(SETUP_*) */
-#define SETUP_TYPE_MAX (SETUP_INDIRECT | SETUP_JAILHOUSE)
+#define SETUP_TYPE_MAX (SETUP_INDIRECT | SETUP_CC_BLOB)
/* ram_size flags */
#define RAMDISK_IMAGE_START_MASK 0x07FF
OBJECT_FILES_NON_STANDARD_test_nx.o := y
-ifdef CONFIG_FRAME_POINTER
-OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
-endif
-
# If instrumentation of this dir is enabled, boot hangs during first second.
# Probably could be more selective here, but note that files related to irqs,
# boot, dumpstack/stacktrace, etc are either non-interesting or can lead to
/* Refer to drivers/acpi/cppc_acpi.c for the description of functions */
+bool cpc_supported_by_cpu(void)
+{
+ switch (boot_cpu_data.x86_vendor) {
+ case X86_VENDOR_AMD:
+ case X86_VENDOR_HYGON:
+ if (boot_cpu_data.x86 == 0x19 && ((boot_cpu_data.x86_model <= 0x0f) ||
+ (boot_cpu_data.x86_model >= 0x20 && boot_cpu_data.x86_model <= 0x2f)))
+ return true;
+ else if (boot_cpu_data.x86 == 0x17 &&
+ boot_cpu_data.x86_model >= 0x70 && boot_cpu_data.x86_model <= 0x7f)
+ return true;
+ return boot_cpu_has(X86_FEATURE_CPPC);
+ }
+ return false;
+}
+
bool cpc_ffh_supported(void)
{
return true;
}
extern s32 __retpoline_sites[], __retpoline_sites_end[];
+extern s32 __return_sites[], __return_sites_end[];
extern s32 __ibt_endbr_seal[], __ibt_endbr_seal_end[];
extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
extern s32 __smp_locks[], __smp_locks_end[];
}
}
+#ifdef CONFIG_RETHUNK
+/*
+ * Rewrite the compiler generated return thunk tail-calls.
+ *
+ * For example, convert:
+ *
+ * JMP __x86_return_thunk
+ *
+ * into:
+ *
+ * RET
+ */
+static int patch_return(void *addr, struct insn *insn, u8 *bytes)
+{
+ int i = 0;
+
+ if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
+ return -1;
+
+ bytes[i++] = RET_INSN_OPCODE;
+
+ for (; i < insn->length;)
+ bytes[i++] = INT3_INSN_OPCODE;
+
+ return i;
+}
+
+void __init_or_module noinline apply_returns(s32 *start, s32 *end)
+{
+ s32 *s;
+
+ for (s = start; s < end; s++) {
+ void *dest = NULL, *addr = (void *)s + *s;
+ struct insn insn;
+ int len, ret;
+ u8 bytes[16];
+ u8 op;
+
+ ret = insn_decode_kernel(&insn, addr);
+ if (WARN_ON_ONCE(ret < 0))
+ continue;
+
+ op = insn.opcode.bytes[0];
+ if (op == JMP32_INSN_OPCODE)
+ dest = addr + insn.length + insn.immediate.value;
+
+ if (__static_call_fixup(addr, op, dest) ||
+ WARN_ONCE(dest != &__x86_return_thunk,
+ "missing return thunk: %pS-%pS: %*ph",
+ addr, dest, 5, addr))
+ continue;
+
+ DPRINTK("return thunk at: %pS (%px) len: %d to: %pS",
+ addr, addr, insn.length,
+ addr + insn.length + insn.immediate.value);
+
+ len = patch_return(addr, &insn, bytes);
+ if (len == insn.length) {
+ DUMP_BYTES(((u8*)addr), len, "%px: orig: ", addr);
+ DUMP_BYTES(((u8*)bytes), len, "%px: repl: ", addr);
+ text_poke_early(addr, bytes, len);
+ }
+ }
+}
+#else
+void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
+#endif /* CONFIG_RETHUNK */
+
#else /* !CONFIG_RETPOLINE || !CONFIG_OBJTOOL */
void __init_or_module noinline apply_retpolines(s32 *start, s32 *end) { }
+void __init_or_module noinline apply_returns(s32 *start, s32 *end) { }
#endif /* CONFIG_RETPOLINE && CONFIG_OBJTOOL */
* those can rewrite the retpoline thunks.
*/
apply_retpolines(__retpoline_sites, __retpoline_sites_end);
+ apply_returns(__return_sites, __return_sites_end);
/*
* Then patch alternatives, such that those paravirt calls that are in
#include <asm/suspend.h>
#include <asm/tlbflush.h>
#include <asm/tdx.h>
+#include "../kvm/vmx/vmx.h"
#ifdef CONFIG_XEN
#include <xen/interface/xen.h>
OFFSET(TSS_sp0, tss_struct, x86_tss.sp0);
OFFSET(TSS_sp1, tss_struct, x86_tss.sp1);
OFFSET(TSS_sp2, tss_struct, x86_tss.sp2);
+
+ if (IS_ENABLED(CONFIG_KVM_INTEL)) {
+ BLANK();
+ OFFSET(VMX_spec_ctrl, vcpu_vmx, spec_ctrl);
+ }
}
clear_rdrand_cpuid_bit(c);
}
+void init_spectral_chicken(struct cpuinfo_x86 *c)
+{
+#ifdef CONFIG_CPU_UNRET_ENTRY
+ u64 value;
+
+ /*
+ * On Zen2 we offer this chicken (bit) on the altar of Speculation.
+ *
+ * This suppresses speculation from the middle of a basic block, i.e. it
+ * suppresses non-branch predictions.
+ *
+ * We use STIBP as a heuristic to filter out Zen2 from the rest of F17H
+ */
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && cpu_has(c, X86_FEATURE_AMD_STIBP)) {
+ if (!rdmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, &value)) {
+ value |= MSR_ZEN2_SPECTRAL_CHICKEN_BIT;
+ wrmsrl_safe(MSR_ZEN2_SPECTRAL_CHICKEN, value);
+ }
+ }
+#endif
+}
+
static void init_amd_zn(struct cpuinfo_x86 *c)
{
set_cpu_cap(c, X86_FEATURE_ZEN);
node_reclaim_distance = 32;
#endif
- /*
- * Fix erratum 1076: CPB feature bit not being set in CPUID.
- * Always set it, except when running under a hypervisor.
- */
- if (!cpu_has(c, X86_FEATURE_HYPERVISOR) && !cpu_has(c, X86_FEATURE_CPB))
- set_cpu_cap(c, X86_FEATURE_CPB);
+ /* Fix up CPUID bits, but only if not virtualised. */
+ if (!cpu_has(c, X86_FEATURE_HYPERVISOR)) {
+
+ /* Erratum 1076: CPB feature bit not being set in CPUID. */
+ if (!cpu_has(c, X86_FEATURE_CPB))
+ set_cpu_cap(c, X86_FEATURE_CPB);
+
+ /*
+ * Zen3 (Fam19 model < 0x10) parts are not susceptible to
+ * Branch Type Confusion, but predate the allocation of the
+ * BTC_NO bit.
+ */
+ if (c->x86 == 0x19 && !cpu_has(c, X86_FEATURE_BTC_NO))
+ set_cpu_cap(c, X86_FEATURE_BTC_NO);
+ }
}
static void init_amd(struct cpuinfo_x86 *c)
case 0x12: init_amd_ln(c); break;
case 0x15: init_amd_bd(c); break;
case 0x16: init_amd_jg(c); break;
- case 0x17: fallthrough;
+ case 0x17: init_spectral_chicken(c);
+ fallthrough;
case 0x19: init_amd_zn(c); break;
}
static void __init spectre_v1_select_mitigation(void);
static void __init spectre_v2_select_mitigation(void);
+static void __init retbleed_select_mitigation(void);
+static void __init spectre_v2_user_select_mitigation(void);
static void __init ssb_select_mitigation(void);
static void __init l1tf_select_mitigation(void);
static void __init mds_select_mitigation(void);
-static void __init mds_print_mitigation(void);
+static void __init md_clear_update_mitigation(void);
+static void __init md_clear_select_mitigation(void);
static void __init taa_select_mitigation(void);
+static void __init mmio_select_mitigation(void);
static void __init srbds_select_mitigation(void);
static void __init l1d_flush_select_mitigation(void);
-/* The base value of the SPEC_CTRL MSR that always has to be preserved. */
+/* The base value of the SPEC_CTRL MSR without task-specific bits set */
u64 x86_spec_ctrl_base;
EXPORT_SYMBOL_GPL(x86_spec_ctrl_base);
+
+/* The current value of the SPEC_CTRL MSR with task-specific bits set */
+DEFINE_PER_CPU(u64, x86_spec_ctrl_current);
+EXPORT_SYMBOL_GPL(x86_spec_ctrl_current);
+
static DEFINE_MUTEX(spec_ctrl_mutex);
/*
- * The vendor and possibly platform specific bits which can be modified in
- * x86_spec_ctrl_base.
+ * Keep track of the SPEC_CTRL MSR value for the current task, which may differ
+ * from x86_spec_ctrl_base due to STIBP/SSB in __speculation_ctrl_update().
*/
-static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS;
+void write_spec_ctrl_current(u64 val, bool force)
+{
+ if (this_cpu_read(x86_spec_ctrl_current) == val)
+ return;
+
+ this_cpu_write(x86_spec_ctrl_current, val);
+
+ /*
+ * When KERNEL_IBRS this MSR is written on return-to-user, unless
+ * forced the update can be delayed until that time.
+ */
+ if (force || !cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS))
+ wrmsrl(MSR_IA32_SPEC_CTRL, val);
+}
+
+u64 spec_ctrl_current(void)
+{
+ return this_cpu_read(x86_spec_ctrl_current);
+}
+EXPORT_SYMBOL_GPL(spec_ctrl_current);
/*
* AMD specific MSR info for Speculative Store Bypass control.
*/
DEFINE_STATIC_KEY_FALSE(switch_mm_cond_l1d_flush);
+/* Controls CPU Fill buffer clear before KVM guest MMIO accesses */
+DEFINE_STATIC_KEY_FALSE(mmio_stale_data_clear);
+EXPORT_SYMBOL_GPL(mmio_stale_data_clear);
+
void __init check_bugs(void)
{
identify_boot_cpu();
if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
- /* Allow STIBP in MSR_SPEC_CTRL if supported */
- if (boot_cpu_has(X86_FEATURE_STIBP))
- x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
-
/* Select the proper CPU mitigations before patching alternatives: */
spectre_v1_select_mitigation();
spectre_v2_select_mitigation();
+ /*
+ * retbleed_select_mitigation() relies on the state set by
+ * spectre_v2_select_mitigation(); specifically it wants to know about
+ * spectre_v2=ibrs.
+ */
+ retbleed_select_mitigation();
+ /*
+ * spectre_v2_user_select_mitigation() relies on the state set by
+ * retbleed_select_mitigation(); specifically the STIBP selection is
+ * forced for UNRET.
+ */
+ spectre_v2_user_select_mitigation();
ssb_select_mitigation();
l1tf_select_mitigation();
- mds_select_mitigation();
- taa_select_mitigation();
+ md_clear_select_mitigation();
srbds_select_mitigation();
l1d_flush_select_mitigation();
- /*
- * As MDS and TAA mitigations are inter-related, print MDS
- * mitigation until after TAA mitigation selection is done.
- */
- mds_print_mitigation();
-
arch_smt_update();
#ifdef CONFIG_X86_32
#endif
}
+/*
+ * NOTE: This function is *only* called for SVM. VMX spec_ctrl handling is
+ * done in vmenter.S.
+ */
void
x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest)
{
- u64 msrval, guestval, hostval = x86_spec_ctrl_base;
+ u64 msrval, guestval = guest_spec_ctrl, hostval = spec_ctrl_current();
struct thread_info *ti = current_thread_info();
- /* Is MSR_SPEC_CTRL implemented ? */
if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) {
- /*
- * Restrict guest_spec_ctrl to supported values. Clear the
- * modifiable bits in the host base value and or the
- * modifiable bits from the guest value.
- */
- guestval = hostval & ~x86_spec_ctrl_mask;
- guestval |= guest_spec_ctrl & x86_spec_ctrl_mask;
-
- /* SSBD controlled in MSR_SPEC_CTRL */
- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
- static_cpu_has(X86_FEATURE_AMD_SSBD))
- hostval |= ssbd_tif_to_spec_ctrl(ti->flags);
-
- /* Conditional STIBP enabled? */
- if (static_branch_unlikely(&switch_to_cond_stibp))
- hostval |= stibp_tif_to_spec_ctrl(ti->flags);
-
if (hostval != guestval) {
msrval = setguest ? guestval : hostval;
wrmsrl(MSR_IA32_SPEC_CTRL, msrval);
}
}
-static void __init mds_print_mitigation(void)
-{
- if (!boot_cpu_has_bug(X86_BUG_MDS) || cpu_mitigations_off())
- return;
-
- pr_info("%s\n", mds_strings[mds_mitigation]);
-}
-
static int __init mds_cmdline(char *str)
{
if (!boot_cpu_has_bug(X86_BUG_MDS))
/* TSX previously disabled by tsx=off */
if (!boot_cpu_has(X86_FEATURE_RTM)) {
taa_mitigation = TAA_MITIGATION_TSX_DISABLED;
- goto out;
+ return;
}
if (cpu_mitigations_off()) {
*/
if (taa_mitigation == TAA_MITIGATION_OFF &&
mds_mitigation == MDS_MITIGATION_OFF)
- goto out;
+ return;
if (boot_cpu_has(X86_FEATURE_MD_CLEAR))
taa_mitigation = TAA_MITIGATION_VERW;
if (taa_nosmt || cpu_mitigations_auto_nosmt())
cpu_smt_disable(false);
-
- /*
- * Update MDS mitigation, if necessary, as the mds_user_clear is
- * now enabled for TAA mitigation.
- */
- if (mds_mitigation == MDS_MITIGATION_OFF &&
- boot_cpu_has_bug(X86_BUG_MDS)) {
- mds_mitigation = MDS_MITIGATION_FULL;
- mds_select_mitigation();
- }
-out:
- pr_info("%s\n", taa_strings[taa_mitigation]);
}
static int __init tsx_async_abort_parse_cmdline(char *str)
}
early_param("tsx_async_abort", tsx_async_abort_parse_cmdline);
+#undef pr_fmt
+#define pr_fmt(fmt) "MMIO Stale Data: " fmt
+
+enum mmio_mitigations {
+ MMIO_MITIGATION_OFF,
+ MMIO_MITIGATION_UCODE_NEEDED,
+ MMIO_MITIGATION_VERW,
+};
+
+/* Default mitigation for Processor MMIO Stale Data vulnerabilities */
+static enum mmio_mitigations mmio_mitigation __ro_after_init = MMIO_MITIGATION_VERW;
+static bool mmio_nosmt __ro_after_init = false;
+
+static const char * const mmio_strings[] = {
+ [MMIO_MITIGATION_OFF] = "Vulnerable",
+ [MMIO_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode",
+ [MMIO_MITIGATION_VERW] = "Mitigation: Clear CPU buffers",
+};
+
+static void __init mmio_select_mitigation(void)
+{
+ u64 ia32_cap;
+
+ if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) ||
+ cpu_mitigations_off()) {
+ mmio_mitigation = MMIO_MITIGATION_OFF;
+ return;
+ }
+
+ if (mmio_mitigation == MMIO_MITIGATION_OFF)
+ return;
+
+ ia32_cap = x86_read_arch_cap_msr();
+
+ /*
+ * Enable CPU buffer clear mitigation for host and VMM, if also affected
+ * by MDS or TAA. Otherwise, enable mitigation for VMM only.
+ */
+ if (boot_cpu_has_bug(X86_BUG_MDS) || (boot_cpu_has_bug(X86_BUG_TAA) &&
+ boot_cpu_has(X86_FEATURE_RTM)))
+ static_branch_enable(&mds_user_clear);
+ else
+ static_branch_enable(&mmio_stale_data_clear);
+
+ /*
+ * If Processor-MMIO-Stale-Data bug is present and Fill Buffer data can
+ * be propagated to uncore buffers, clearing the Fill buffers on idle
+ * is required irrespective of SMT state.
+ */
+ if (!(ia32_cap & ARCH_CAP_FBSDP_NO))
+ static_branch_enable(&mds_idle_clear);
+
+ /*
+ * Check if the system has the right microcode.
+ *
+ * CPU Fill buffer clear mitigation is enumerated by either an explicit
+ * FB_CLEAR or by the presence of both MD_CLEAR and L1D_FLUSH on MDS
+ * affected systems.
+ */
+ if ((ia32_cap & ARCH_CAP_FB_CLEAR) ||
+ (boot_cpu_has(X86_FEATURE_MD_CLEAR) &&
+ boot_cpu_has(X86_FEATURE_FLUSH_L1D) &&
+ !(ia32_cap & ARCH_CAP_MDS_NO)))
+ mmio_mitigation = MMIO_MITIGATION_VERW;
+ else
+ mmio_mitigation = MMIO_MITIGATION_UCODE_NEEDED;
+
+ if (mmio_nosmt || cpu_mitigations_auto_nosmt())
+ cpu_smt_disable(false);
+}
+
+static int __init mmio_stale_data_parse_cmdline(char *str)
+{
+ if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
+ return 0;
+
+ if (!str)
+ return -EINVAL;
+
+ if (!strcmp(str, "off")) {
+ mmio_mitigation = MMIO_MITIGATION_OFF;
+ } else if (!strcmp(str, "full")) {
+ mmio_mitigation = MMIO_MITIGATION_VERW;
+ } else if (!strcmp(str, "full,nosmt")) {
+ mmio_mitigation = MMIO_MITIGATION_VERW;
+ mmio_nosmt = true;
+ }
+
+ return 0;
+}
+early_param("mmio_stale_data", mmio_stale_data_parse_cmdline);
+
+#undef pr_fmt
+#define pr_fmt(fmt) "" fmt
+
+static void __init md_clear_update_mitigation(void)
+{
+ if (cpu_mitigations_off())
+ return;
+
+ if (!static_key_enabled(&mds_user_clear))
+ goto out;
+
+ /*
+ * mds_user_clear is now enabled. Update MDS, TAA and MMIO Stale Data
+ * mitigation, if necessary.
+ */
+ if (mds_mitigation == MDS_MITIGATION_OFF &&
+ boot_cpu_has_bug(X86_BUG_MDS)) {
+ mds_mitigation = MDS_MITIGATION_FULL;
+ mds_select_mitigation();
+ }
+ if (taa_mitigation == TAA_MITIGATION_OFF &&
+ boot_cpu_has_bug(X86_BUG_TAA)) {
+ taa_mitigation = TAA_MITIGATION_VERW;
+ taa_select_mitigation();
+ }
+ if (mmio_mitigation == MMIO_MITIGATION_OFF &&
+ boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) {
+ mmio_mitigation = MMIO_MITIGATION_VERW;
+ mmio_select_mitigation();
+ }
+out:
+ if (boot_cpu_has_bug(X86_BUG_MDS))
+ pr_info("MDS: %s\n", mds_strings[mds_mitigation]);
+ if (boot_cpu_has_bug(X86_BUG_TAA))
+ pr_info("TAA: %s\n", taa_strings[taa_mitigation]);
+ if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
+ pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]);
+}
+
+static void __init md_clear_select_mitigation(void)
+{
+ mds_select_mitigation();
+ taa_select_mitigation();
+ mmio_select_mitigation();
+
+ /*
+ * As MDS, TAA and MMIO Stale Data mitigations are inter-related, update
+ * and print their mitigation after MDS, TAA and MMIO Stale Data
+ * mitigation selection is done.
+ */
+ md_clear_update_mitigation();
+}
+
#undef pr_fmt
#define pr_fmt(fmt) "SRBDS: " fmt
return;
/*
- * Check to see if this is one of the MDS_NO systems supporting
- * TSX that are only exposed to SRBDS when TSX is enabled.
+ * Check to see if this is one of the MDS_NO systems supporting TSX that
+ * are only exposed to SRBDS when TSX is enabled or when CPU is affected
+ * by Processor MMIO Stale Data vulnerability.
*/
ia32_cap = x86_read_arch_cap_msr();
- if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM))
+ if ((ia32_cap & ARCH_CAP_MDS_NO) && !boot_cpu_has(X86_FEATURE_RTM) &&
+ !boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA))
srbds_mitigation = SRBDS_MITIGATION_TSX_OFF;
else if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
srbds_mitigation = SRBDS_MITIGATION_HYPERVISOR;
}
early_param("nospectre_v1", nospectre_v1_cmdline);
-#undef pr_fmt
-#define pr_fmt(fmt) "Spectre V2 : " fmt
-
static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
SPECTRE_V2_NONE;
+#undef pr_fmt
+#define pr_fmt(fmt) "RETBleed: " fmt
+
+enum retbleed_mitigation {
+ RETBLEED_MITIGATION_NONE,
+ RETBLEED_MITIGATION_UNRET,
+ RETBLEED_MITIGATION_IBPB,
+ RETBLEED_MITIGATION_IBRS,
+ RETBLEED_MITIGATION_EIBRS,
+};
+
+enum retbleed_mitigation_cmd {
+ RETBLEED_CMD_OFF,
+ RETBLEED_CMD_AUTO,
+ RETBLEED_CMD_UNRET,
+ RETBLEED_CMD_IBPB,
+};
+
+static const char * const retbleed_strings[] = {
+ [RETBLEED_MITIGATION_NONE] = "Vulnerable",
+ [RETBLEED_MITIGATION_UNRET] = "Mitigation: untrained return thunk",
+ [RETBLEED_MITIGATION_IBPB] = "Mitigation: IBPB",
+ [RETBLEED_MITIGATION_IBRS] = "Mitigation: IBRS",
+ [RETBLEED_MITIGATION_EIBRS] = "Mitigation: Enhanced IBRS",
+};
+
+static enum retbleed_mitigation retbleed_mitigation __ro_after_init =
+ RETBLEED_MITIGATION_NONE;
+static enum retbleed_mitigation_cmd retbleed_cmd __ro_after_init =
+ RETBLEED_CMD_AUTO;
+
+static int __ro_after_init retbleed_nosmt = false;
+
+static int __init retbleed_parse_cmdline(char *str)
+{
+ if (!str)
+ return -EINVAL;
+
+ while (str) {
+ char *next = strchr(str, ',');
+ if (next) {
+ *next = 0;
+ next++;
+ }
+
+ if (!strcmp(str, "off")) {
+ retbleed_cmd = RETBLEED_CMD_OFF;
+ } else if (!strcmp(str, "auto")) {
+ retbleed_cmd = RETBLEED_CMD_AUTO;
+ } else if (!strcmp(str, "unret")) {
+ retbleed_cmd = RETBLEED_CMD_UNRET;
+ } else if (!strcmp(str, "ibpb")) {
+ retbleed_cmd = RETBLEED_CMD_IBPB;
+ } else if (!strcmp(str, "nosmt")) {
+ retbleed_nosmt = true;
+ } else {
+ pr_err("Ignoring unknown retbleed option (%s).", str);
+ }
+
+ str = next;
+ }
+
+ return 0;
+}
+early_param("retbleed", retbleed_parse_cmdline);
+
+#define RETBLEED_UNTRAIN_MSG "WARNING: BTB untrained return thunk mitigation is only effective on AMD/Hygon!\n"
+#define RETBLEED_INTEL_MSG "WARNING: Spectre v2 mitigation leaves CPU vulnerable to RETBleed attacks, data leaks possible!\n"
+
+static void __init retbleed_select_mitigation(void)
+{
+ bool mitigate_smt = false;
+
+ if (!boot_cpu_has_bug(X86_BUG_RETBLEED) || cpu_mitigations_off())
+ return;
+
+ switch (retbleed_cmd) {
+ case RETBLEED_CMD_OFF:
+ return;
+
+ case RETBLEED_CMD_UNRET:
+ if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY)) {
+ retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
+ } else {
+ pr_err("WARNING: kernel not compiled with CPU_UNRET_ENTRY.\n");
+ goto do_cmd_auto;
+ }
+ break;
+
+ case RETBLEED_CMD_IBPB:
+ if (!boot_cpu_has(X86_FEATURE_IBPB)) {
+ pr_err("WARNING: CPU does not support IBPB.\n");
+ goto do_cmd_auto;
+ } else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY)) {
+ retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
+ } else {
+ pr_err("WARNING: kernel not compiled with CPU_IBPB_ENTRY.\n");
+ goto do_cmd_auto;
+ }
+ break;
+
+do_cmd_auto:
+ case RETBLEED_CMD_AUTO:
+ default:
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) {
+ if (IS_ENABLED(CONFIG_CPU_UNRET_ENTRY))
+ retbleed_mitigation = RETBLEED_MITIGATION_UNRET;
+ else if (IS_ENABLED(CONFIG_CPU_IBPB_ENTRY) && boot_cpu_has(X86_FEATURE_IBPB))
+ retbleed_mitigation = RETBLEED_MITIGATION_IBPB;
+ }
+
+ /*
+ * The Intel mitigation (IBRS or eIBRS) was already selected in
+ * spectre_v2_select_mitigation(). 'retbleed_mitigation' will
+ * be set accordingly below.
+ */
+
+ break;
+ }
+
+ switch (retbleed_mitigation) {
+ case RETBLEED_MITIGATION_UNRET:
+ setup_force_cpu_cap(X86_FEATURE_RETHUNK);
+ setup_force_cpu_cap(X86_FEATURE_UNRET);
+
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
+ pr_err(RETBLEED_UNTRAIN_MSG);
+
+ mitigate_smt = true;
+ break;
+
+ case RETBLEED_MITIGATION_IBPB:
+ setup_force_cpu_cap(X86_FEATURE_ENTRY_IBPB);
+ mitigate_smt = true;
+ break;
+
+ default:
+ break;
+ }
+
+ if (mitigate_smt && !boot_cpu_has(X86_FEATURE_STIBP) &&
+ (retbleed_nosmt || cpu_mitigations_auto_nosmt()))
+ cpu_smt_disable(false);
+
+ /*
+ * Let IBRS trump all on Intel without affecting the effects of the
+ * retbleed= cmdline option.
+ */
+ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
+ switch (spectre_v2_enabled) {
+ case SPECTRE_V2_IBRS:
+ retbleed_mitigation = RETBLEED_MITIGATION_IBRS;
+ break;
+ case SPECTRE_V2_EIBRS:
+ case SPECTRE_V2_EIBRS_RETPOLINE:
+ case SPECTRE_V2_EIBRS_LFENCE:
+ retbleed_mitigation = RETBLEED_MITIGATION_EIBRS;
+ break;
+ default:
+ pr_err(RETBLEED_INTEL_MSG);
+ }
+ }
+
+ pr_info("%s\n", retbleed_strings[retbleed_mitigation]);
+}
+
+#undef pr_fmt
+#define pr_fmt(fmt) "Spectre V2 : " fmt
+
static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init =
SPECTRE_V2_USER_NONE;
static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init =
#define SPECTRE_V2_LFENCE_MSG "WARNING: LFENCE mitigation is not recommended for this CPU, data leaks possible!\n"
#define SPECTRE_V2_EIBRS_EBPF_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible via Spectre v2 BHB attacks!\n"
#define SPECTRE_V2_EIBRS_LFENCE_EBPF_SMT_MSG "WARNING: Unprivileged eBPF is enabled with eIBRS+LFENCE mitigation and SMT, data leaks possible via Spectre v2 BHB attacks!\n"
+#define SPECTRE_V2_IBRS_PERF_MSG "WARNING: IBRS mitigation selected on Enhanced IBRS CPU, this may cause unnecessary performance loss\n"
#ifdef CONFIG_BPF_SYSCALL
void unpriv_ebpf_notify(int new_state)
SPECTRE_V2_CMD_EIBRS,
SPECTRE_V2_CMD_EIBRS_RETPOLINE,
SPECTRE_V2_CMD_EIBRS_LFENCE,
+ SPECTRE_V2_CMD_IBRS,
};
enum spectre_v2_user_cmd {
pr_info("spectre_v2_user=%s forced on command line.\n", reason);
}
+static __ro_after_init enum spectre_v2_mitigation_cmd spectre_v2_cmd;
+
static enum spectre_v2_user_cmd __init
-spectre_v2_parse_user_cmdline(enum spectre_v2_mitigation_cmd v2_cmd)
+spectre_v2_parse_user_cmdline(void)
{
char arg[20];
int ret, i;
- switch (v2_cmd) {
+ switch (spectre_v2_cmd) {
case SPECTRE_V2_CMD_NONE:
return SPECTRE_V2_USER_CMD_NONE;
case SPECTRE_V2_CMD_FORCE:
return SPECTRE_V2_USER_CMD_AUTO;
}
-static inline bool spectre_v2_in_eibrs_mode(enum spectre_v2_mitigation mode)
+static inline bool spectre_v2_in_ibrs_mode(enum spectre_v2_mitigation mode)
{
- return (mode == SPECTRE_V2_EIBRS ||
- mode == SPECTRE_V2_EIBRS_RETPOLINE ||
- mode == SPECTRE_V2_EIBRS_LFENCE);
+ return mode == SPECTRE_V2_IBRS ||
+ mode == SPECTRE_V2_EIBRS ||
+ mode == SPECTRE_V2_EIBRS_RETPOLINE ||
+ mode == SPECTRE_V2_EIBRS_LFENCE;
}
static void __init
-spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
+spectre_v2_user_select_mitigation(void)
{
enum spectre_v2_user_mitigation mode = SPECTRE_V2_USER_NONE;
bool smt_possible = IS_ENABLED(CONFIG_SMP);
cpu_smt_control == CPU_SMT_NOT_SUPPORTED)
smt_possible = false;
- cmd = spectre_v2_parse_user_cmdline(v2_cmd);
+ cmd = spectre_v2_parse_user_cmdline();
switch (cmd) {
case SPECTRE_V2_USER_CMD_NONE:
goto set_mode;
}
/*
- * If no STIBP, enhanced IBRS is enabled or SMT impossible, STIBP is not
- * required.
+ * If no STIBP, IBRS or enhanced IBRS is enabled, or SMT impossible,
+ * STIBP is not required.
*/
if (!boot_cpu_has(X86_FEATURE_STIBP) ||
!smt_possible ||
- spectre_v2_in_eibrs_mode(spectre_v2_enabled))
+ spectre_v2_in_ibrs_mode(spectre_v2_enabled))
return;
/*
boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
mode = SPECTRE_V2_USER_STRICT_PREFERRED;
+ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) {
+ if (mode != SPECTRE_V2_USER_STRICT &&
+ mode != SPECTRE_V2_USER_STRICT_PREFERRED)
+ pr_info("Selecting STIBP always-on mode to complement retbleed mitigation\n");
+ mode = SPECTRE_V2_USER_STRICT_PREFERRED;
+ }
+
spectre_v2_user_stibp = mode;
set_mode:
[SPECTRE_V2_EIBRS] = "Mitigation: Enhanced IBRS",
[SPECTRE_V2_EIBRS_LFENCE] = "Mitigation: Enhanced IBRS + LFENCE",
[SPECTRE_V2_EIBRS_RETPOLINE] = "Mitigation: Enhanced IBRS + Retpolines",
+ [SPECTRE_V2_IBRS] = "Mitigation: IBRS",
};
static const struct {
{ "eibrs,lfence", SPECTRE_V2_CMD_EIBRS_LFENCE, false },
{ "eibrs,retpoline", SPECTRE_V2_CMD_EIBRS_RETPOLINE, false },
{ "auto", SPECTRE_V2_CMD_AUTO, false },
+ { "ibrs", SPECTRE_V2_CMD_IBRS, false },
};
static void __init spec_v2_print_cond(const char *reason, bool secure)
return SPECTRE_V2_CMD_AUTO;
}
+ if (cmd == SPECTRE_V2_CMD_IBRS && !IS_ENABLED(CONFIG_CPU_IBRS_ENTRY)) {
+ pr_err("%s selected but not compiled in. Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+ if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
+ pr_err("%s selected but not Intel CPU. Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+ if (cmd == SPECTRE_V2_CMD_IBRS && !boot_cpu_has(X86_FEATURE_IBRS)) {
+ pr_err("%s selected but CPU doesn't have IBRS. Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
+ if (cmd == SPECTRE_V2_CMD_IBRS && boot_cpu_has(X86_FEATURE_XENPV)) {
+ pr_err("%s selected but running as XenPV guest. Switching to AUTO select\n",
+ mitigation_options[i].option);
+ return SPECTRE_V2_CMD_AUTO;
+ }
+
spec_v2_print_cond(mitigation_options[i].option,
mitigation_options[i].secure);
return cmd;
return SPECTRE_V2_RETPOLINE;
}
+/* Disable in-kernel use of non-RSB RET predictors */
+static void __init spec_ctrl_disable_kernel_rrsba(void)
+{
+ u64 ia32_cap;
+
+ if (!boot_cpu_has(X86_FEATURE_RRSBA_CTRL))
+ return;
+
+ ia32_cap = x86_read_arch_cap_msr();
+
+ if (ia32_cap & ARCH_CAP_RRSBA) {
+ x86_spec_ctrl_base |= SPEC_CTRL_RRSBA_DIS_S;
+ write_spec_ctrl_current(x86_spec_ctrl_base, true);
+ }
+}
+
static void __init spectre_v2_select_mitigation(void)
{
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
break;
}
+ if (IS_ENABLED(CONFIG_CPU_IBRS_ENTRY) &&
+ boot_cpu_has_bug(X86_BUG_RETBLEED) &&
+ retbleed_cmd != RETBLEED_CMD_OFF &&
+ boot_cpu_has(X86_FEATURE_IBRS) &&
+ boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) {
+ mode = SPECTRE_V2_IBRS;
+ break;
+ }
+
mode = spectre_v2_select_retpoline();
break;
mode = spectre_v2_select_retpoline();
break;
+ case SPECTRE_V2_CMD_IBRS:
+ mode = SPECTRE_V2_IBRS;
+ break;
+
case SPECTRE_V2_CMD_EIBRS:
mode = SPECTRE_V2_EIBRS;
break;
if (mode == SPECTRE_V2_EIBRS && unprivileged_ebpf_enabled())
pr_err(SPECTRE_V2_EIBRS_EBPF_MSG);
- if (spectre_v2_in_eibrs_mode(mode)) {
- /* Force it so VMEXIT will restore correctly */
+ if (spectre_v2_in_ibrs_mode(mode)) {
x86_spec_ctrl_base |= SPEC_CTRL_IBRS;
- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ write_spec_ctrl_current(x86_spec_ctrl_base, true);
}
switch (mode) {
case SPECTRE_V2_EIBRS:
break;
+ case SPECTRE_V2_IBRS:
+ setup_force_cpu_cap(X86_FEATURE_KERNEL_IBRS);
+ if (boot_cpu_has(X86_FEATURE_IBRS_ENHANCED))
+ pr_warn(SPECTRE_V2_IBRS_PERF_MSG);
+ break;
+
case SPECTRE_V2_LFENCE:
case SPECTRE_V2_EIBRS_LFENCE:
setup_force_cpu_cap(X86_FEATURE_RETPOLINE_LFENCE);
break;
}
+ /*
+ * Disable alternate RSB predictions in kernel when indirect CALLs and
+ * JMPs gets protection against BHI and Intramode-BTI, but RET
+ * prediction from a non-RSB predictor is still a risk.
+ */
+ if (mode == SPECTRE_V2_EIBRS_LFENCE ||
+ mode == SPECTRE_V2_EIBRS_RETPOLINE ||
+ mode == SPECTRE_V2_RETPOLINE)
+ spec_ctrl_disable_kernel_rrsba();
+
spectre_v2_enabled = mode;
pr_info("%s\n", spectre_v2_strings[mode]);
/*
- * If spectre v2 protection has been enabled, unconditionally fill
- * RSB during a context switch; this protects against two independent
- * issues:
+ * If Spectre v2 protection has been enabled, fill the RSB during a
+ * context switch. In general there are two types of RSB attacks
+ * across context switches, for which the CALLs/RETs may be unbalanced.
+ *
+ * 1) RSB underflow
+ *
+ * Some Intel parts have "bottomless RSB". When the RSB is empty,
+ * speculated return targets may come from the branch predictor,
+ * which could have a user-poisoned BTB or BHB entry.
+ *
+ * AMD has it even worse: *all* returns are speculated from the BTB,
+ * regardless of the state of the RSB.
+ *
+ * When IBRS or eIBRS is enabled, the "user -> kernel" attack
+ * scenario is mitigated by the IBRS branch prediction isolation
+ * properties, so the RSB buffer filling wouldn't be necessary to
+ * protect against this type of attack.
+ *
+ * The "user -> user" attack scenario is mitigated by RSB filling.
+ *
+ * 2) Poisoned RSB entry
+ *
+ * If the 'next' in-kernel return stack is shorter than 'prev',
+ * 'next' could be tricked into speculating with a user-poisoned RSB
+ * entry.
+ *
+ * The "user -> kernel" attack scenario is mitigated by SMEP and
+ * eIBRS.
*
- * - RSB underflow (and switch to BTB) on Skylake+
- * - SpectreRSB variant of spectre v2 on X86_BUG_SPECTRE_V2 CPUs
+ * The "user -> user" scenario, also known as SpectreBHB, requires
+ * RSB clearing.
+ *
+ * So to mitigate all cases, unconditionally fill RSB on context
+ * switches.
+ *
+ * FIXME: Is this pointless for retbleed-affected AMD?
*/
setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
pr_info("Spectre v2 / SpectreRSB mitigation: Filling RSB on context switch\n");
/*
- * Retpoline means the kernel is safe because it has no indirect
- * branches. Enhanced IBRS protects firmware too, so, enable restricted
- * speculation around firmware calls only when Enhanced IBRS isn't
- * supported.
+ * Similar to context switches, there are two types of RSB attacks
+ * after vmexit:
+ *
+ * 1) RSB underflow
+ *
+ * 2) Poisoned RSB entry
+ *
+ * When retpoline is enabled, both are mitigated by filling/clearing
+ * the RSB.
+ *
+ * When IBRS is enabled, while #1 would be mitigated by the IBRS branch
+ * prediction isolation protections, RSB still needs to be cleared
+ * because of #2. Note that SMEP provides no protection here, unlike
+ * user-space-poisoned RSB entries.
+ *
+ * eIBRS, on the other hand, has RSB-poisoning protections, so it
+ * doesn't need RSB clearing after vmexit.
+ */
+ if (boot_cpu_has(X86_FEATURE_RETPOLINE) ||
+ boot_cpu_has(X86_FEATURE_KERNEL_IBRS))
+ setup_force_cpu_cap(X86_FEATURE_RSB_VMEXIT);
+
+ /*
+ * Retpoline protects the kernel, but doesn't protect firmware. IBRS
+ * and Enhanced IBRS protect firmware too, so enable IBRS around
+ * firmware calls only when IBRS / Enhanced IBRS aren't otherwise
+ * enabled.
*
* Use "mode" to check Enhanced IBRS instead of boot_cpu_has(), because
* the user might select retpoline on the kernel command line and if
* the CPU supports Enhanced IBRS, kernel might un-intentionally not
* enable IBRS around firmware calls.
*/
- if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_eibrs_mode(mode)) {
+ if (boot_cpu_has_bug(X86_BUG_RETBLEED) &&
+ boot_cpu_has(X86_FEATURE_IBPB) &&
+ (boot_cpu_data.x86_vendor == X86_VENDOR_AMD ||
+ boot_cpu_data.x86_vendor == X86_VENDOR_HYGON)) {
+
+ if (retbleed_cmd != RETBLEED_CMD_IBPB) {
+ setup_force_cpu_cap(X86_FEATURE_USE_IBPB_FW);
+ pr_info("Enabling Speculation Barrier for firmware calls\n");
+ }
+
+ } else if (boot_cpu_has(X86_FEATURE_IBRS) && !spectre_v2_in_ibrs_mode(mode)) {
setup_force_cpu_cap(X86_FEATURE_USE_IBRS_FW);
pr_info("Enabling Restricted Speculation for firmware calls\n");
}
/* Set up IBPB and STIBP depending on the general spectre V2 command */
- spectre_v2_user_select_mitigation(cmd);
+ spectre_v2_cmd = cmd;
}
static void update_stibp_msr(void * __unused)
{
- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ u64 val = spec_ctrl_current() | (x86_spec_ctrl_base & SPEC_CTRL_STIBP);
+ write_spec_ctrl_current(val, true);
}
/* Update x86_spec_ctrl_base in case SMT state changed. */
/* Update the static key controlling the MDS CPU buffer clear in idle */
static void update_mds_branch_idle(void)
{
+ u64 ia32_cap = x86_read_arch_cap_msr();
+
/*
* Enable the idle clearing if SMT is active on CPUs which are
* affected only by MSBDS and not any other MDS variant.
if (!boot_cpu_has_bug(X86_BUG_MSBDS_ONLY))
return;
- if (sched_smt_active())
+ if (sched_smt_active()) {
static_branch_enable(&mds_idle_clear);
- else
+ } else if (mmio_mitigation == MMIO_MITIGATION_OFF ||
+ (ia32_cap & ARCH_CAP_FBSDP_NO)) {
static_branch_disable(&mds_idle_clear);
+ }
}
#define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n"
#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n"
+#define MMIO_MSG_SMT "MMIO Stale Data CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/processor_mmio_stale_data.html for more details.\n"
void cpu_bugs_smt_update(void)
{
break;
}
+ switch (mmio_mitigation) {
+ case MMIO_MITIGATION_VERW:
+ case MMIO_MITIGATION_UCODE_NEEDED:
+ if (sched_smt_active())
+ pr_warn_once(MMIO_MSG_SMT);
+ break;
+ case MMIO_MITIGATION_OFF:
+ break;
+ }
+
mutex_unlock(&spec_ctrl_mutex);
}
break;
}
- /*
- * If SSBD is controlled by the SPEC_CTRL MSR, then set the proper
- * bit in the mask to allow guests to use the mitigation even in the
- * case where the host does not enable it.
- */
- if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
- static_cpu_has(X86_FEATURE_AMD_SSBD)) {
- x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
- }
-
/*
* We have three CPU feature flags that are in play here:
* - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible.
x86_amd_ssb_disable();
} else {
x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ write_spec_ctrl_current(x86_spec_ctrl_base, true);
}
}
void x86_spec_ctrl_setup_ap(void)
{
if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
- wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
+ write_spec_ctrl_current(x86_spec_ctrl_base, true);
if (ssb_mode == SPEC_STORE_BYPASS_DISABLE)
x86_amd_ssb_disable();
sched_smt_active() ? "vulnerable" : "disabled");
}
+static ssize_t mmio_stale_data_show_state(char *buf)
+{
+ if (mmio_mitigation == MMIO_MITIGATION_OFF)
+ return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]);
+
+ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) {
+ return sysfs_emit(buf, "%s; SMT Host state unknown\n",
+ mmio_strings[mmio_mitigation]);
+ }
+
+ return sysfs_emit(buf, "%s; SMT %s\n", mmio_strings[mmio_mitigation],
+ sched_smt_active() ? "vulnerable" : "disabled");
+}
+
static char *stibp_state(void)
{
- if (spectre_v2_in_eibrs_mode(spectre_v2_enabled))
+ if (spectre_v2_in_ibrs_mode(spectre_v2_enabled))
return "";
switch (spectre_v2_user_stibp) {
return sprintf(buf, "%s\n", srbds_strings[srbds_mitigation]);
}
+static ssize_t retbleed_show_state(char *buf)
+{
+ if (retbleed_mitigation == RETBLEED_MITIGATION_UNRET) {
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD &&
+ boot_cpu_data.x86_vendor != X86_VENDOR_HYGON)
+ return sprintf(buf, "Vulnerable: untrained return thunk on non-Zen uarch\n");
+
+ return sprintf(buf, "%s; SMT %s\n",
+ retbleed_strings[retbleed_mitigation],
+ !sched_smt_active() ? "disabled" :
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
+ spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ?
+ "enabled with STIBP protection" : "vulnerable");
+ }
+
+ return sprintf(buf, "%s\n", retbleed_strings[retbleed_mitigation]);
+}
+
static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr,
char *buf, unsigned int bug)
{
case X86_BUG_SRBDS:
return srbds_show_state(buf);
+ case X86_BUG_MMIO_STALE_DATA:
+ return mmio_stale_data_show_state(buf);
+
+ case X86_BUG_RETBLEED:
+ return retbleed_show_state(buf);
+
default:
break;
}
{
return cpu_show_common(dev, attr, buf, X86_BUG_SRBDS);
}
+
+ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA);
+}
+
+ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ return cpu_show_common(dev, attr, buf, X86_BUG_RETBLEED);
+}
#endif
{}
};
+#define VULNBL(vendor, family, model, blacklist) \
+ X86_MATCH_VENDOR_FAM_MODEL(vendor, family, model, blacklist)
+
#define VULNBL_INTEL_STEPPINGS(model, steppings, issues) \
X86_MATCH_VENDOR_FAM_MODEL_STEPPINGS_FEATURE(INTEL, 6, \
INTEL_FAM6_##model, steppings, \
X86_FEATURE_ANY, issues)
+#define VULNBL_AMD(family, blacklist) \
+ VULNBL(AMD, family, X86_MODEL_ANY, blacklist)
+
+#define VULNBL_HYGON(family, blacklist) \
+ VULNBL(HYGON, family, X86_MODEL_ANY, blacklist)
+
#define SRBDS BIT(0)
+/* CPU is affected by X86_BUG_MMIO_STALE_DATA */
+#define MMIO BIT(1)
+/* CPU is affected by Shared Buffers Data Sampling (SBDS), a variant of X86_BUG_MMIO_STALE_DATA */
+#define MMIO_SBDS BIT(2)
+/* CPU is affected by RETbleed, speculating where you would not expect it */
+#define RETBLEED BIT(3)
static const struct x86_cpu_id cpu_vuln_blacklist[] __initconst = {
VULNBL_INTEL_STEPPINGS(IVYBRIDGE, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL_L, X86_STEPPING_ANY, SRBDS),
VULNBL_INTEL_STEPPINGS(HASWELL_G, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(HASWELL_X, X86_STEPPING_ANY, MMIO),
+ VULNBL_INTEL_STEPPINGS(BROADWELL_D, X86_STEPPING_ANY, MMIO),
VULNBL_INTEL_STEPPINGS(BROADWELL_G, X86_STEPPING_ANY, SRBDS),
+ VULNBL_INTEL_STEPPINGS(BROADWELL_X, X86_STEPPING_ANY, MMIO),
VULNBL_INTEL_STEPPINGS(BROADWELL, X86_STEPPING_ANY, SRBDS),
- VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS),
- VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS),
- VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPINGS(0x0, 0xC), SRBDS),
- VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPINGS(0x0, 0xD), SRBDS),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE_X, X86_STEPPING_ANY, MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(SKYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE_L, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(KABYLAKE, X86_STEPPING_ANY, SRBDS | MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(CANNONLAKE_L, X86_STEPPING_ANY, RETBLEED),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_D, X86_STEPPING_ANY, MMIO),
+ VULNBL_INTEL_STEPPINGS(ICELAKE_X, X86_STEPPING_ANY, MMIO),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPINGS(0x0, 0x0), MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(COMETLAKE_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(LAKEFIELD, X86_STEPPING_ANY, MMIO | MMIO_SBDS | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(ROCKETLAKE, X86_STEPPING_ANY, MMIO | RETBLEED),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_D, X86_STEPPING_ANY, MMIO),
+ VULNBL_INTEL_STEPPINGS(ATOM_TREMONT_L, X86_STEPPING_ANY, MMIO | MMIO_SBDS),
+
+ VULNBL_AMD(0x15, RETBLEED),
+ VULNBL_AMD(0x16, RETBLEED),
+ VULNBL_AMD(0x17, RETBLEED),
+ VULNBL_HYGON(0x18, RETBLEED),
{}
};
return ia32_cap;
}
+static bool arch_cap_mmio_immune(u64 ia32_cap)
+{
+ return (ia32_cap & ARCH_CAP_FBSDP_NO &&
+ ia32_cap & ARCH_CAP_PSDP_NO &&
+ ia32_cap & ARCH_CAP_SBDR_SSDP_NO);
+}
+
static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
{
u64 ia32_cap = x86_read_arch_cap_msr();
/*
* SRBDS affects CPUs which support RDRAND or RDSEED and are listed
* in the vulnerability blacklist.
+ *
+ * Some of the implications and mitigation of Shared Buffers Data
+ * Sampling (SBDS) are similar to SRBDS. Give SBDS same treatment as
+ * SRBDS.
*/
if ((cpu_has(c, X86_FEATURE_RDRAND) ||
cpu_has(c, X86_FEATURE_RDSEED)) &&
- cpu_matches(cpu_vuln_blacklist, SRBDS))
+ cpu_matches(cpu_vuln_blacklist, SRBDS | MMIO_SBDS))
setup_force_cpu_bug(X86_BUG_SRBDS);
+ /*
+ * Processor MMIO Stale Data bug enumeration
+ *
+ * Affected CPU list is generally enough to enumerate the vulnerability,
+ * but for virtualization case check for ARCH_CAP MSR bits also, VMM may
+ * not want the guest to enumerate the bug.
+ */
+ if (cpu_matches(cpu_vuln_blacklist, MMIO) &&
+ !arch_cap_mmio_immune(ia32_cap))
+ setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA);
+
+ if (!cpu_has(c, X86_FEATURE_BTC_NO)) {
+ if (cpu_matches(cpu_vuln_blacklist, RETBLEED) || (ia32_cap & ARCH_CAP_RSBA))
+ setup_force_cpu_bug(X86_BUG_RETBLEED);
+ }
+
if (cpu_matches(cpu_vuln_whitelist, NO_MELTDOWN))
return;
static inline void tsx_ap_init(void) { }
#endif /* CONFIG_CPU_SUP_INTEL */
+extern void init_spectral_chicken(struct cpuinfo_x86 *c);
+
extern void get_cpu_cap(struct cpuinfo_x86 *c);
extern void get_cpu_address_sizes(struct cpuinfo_x86 *c);
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
/* get apicid instead of initial apic id from cpuid */
c->apicid = hard_smp_processor_id();
+ /*
+ * XXX someone from Hygon needs to confirm this DTRT
+ *
+ init_spectral_chicken(c);
+ */
+
set_cpu_cap(c, X86_FEATURE_ZEN);
set_cpu_cap(c, X86_FEATURE_CPB);
{ X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 },
{ X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 },
{ X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 },
+ { X86_FEATURE_RRSBA_CTRL, CPUID_EDX, 2, 0x00000007, 2 },
{ X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 },
{ X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 },
{ X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 },
} __attribute__((packed));
};
-#define RET_SIZE 1 + IS_ENABLED(CONFIG_SLS)
+#define RET_SIZE (IS_ENABLED(CONFIG_RETPOLINE) ? 5 : 1 + IS_ENABLED(CONFIG_SLS))
static unsigned long
create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
goto fail;
ip = trampoline + size;
- memcpy(ip, retq, RET_SIZE);
+ if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
+ __text_gen_insn(ip, JMP32_INSN_OPCODE, ip, &__x86_return_thunk, JMP32_INSN_SIZE);
+ else
+ memcpy(ip, retq, sizeof(retq));
/* No need to test direct calls on created trampolines */
if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
jmp ftrace_epilogue
SYM_FUNC_END(ftrace_caller);
+STACK_FRAME_NON_STANDARD_FP(ftrace_caller)
SYM_FUNC_START(ftrace_epilogue)
/*
jmp ftrace_epilogue
SYM_FUNC_END(ftrace_regs_caller)
+STACK_FRAME_NON_STANDARD_FP(ftrace_regs_caller)
#else /* ! CONFIG_DYNAMIC_FTRACE */
jmp ftrace_stub
SYM_FUNC_END(__fentry__)
EXPORT_SYMBOL(__fentry__)
+STACK_FRAME_NON_STANDARD_FP(__fentry__)
+
#endif /* CONFIG_DYNAMIC_FTRACE */
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-SYM_FUNC_START(return_to_handler)
+SYM_CODE_START(return_to_handler)
+ UNWIND_HINT_EMPTY
+ ANNOTATE_NOENDBR
subq $16, %rsp
/* Save the return values */
int3
.Ldo_rop:
mov %rdi, (%rsp)
- UNWIND_HINT_FUNC
RET
-SYM_FUNC_END(return_to_handler)
+SYM_CODE_END(return_to_handler)
#endif
/* Don't add a printk in there. printk relies on the PDA which is not initialized
yet. */
-static void __init clear_bss(void)
+void __init clear_bss(void)
{
memset(__bss_start, 0,
(unsigned long) __bss_stop - (unsigned long) __bss_start);
+ memset(__brk_base, 0,
+ (unsigned long) __brk_limit - (unsigned long) __brk_base);
}
static unsigned long get_cmd_line_ptr(void)
#include <asm/cpufeatures.h>
#include <asm/percpu.h>
#include <asm/nops.h>
+#include <asm/nospec-branch.h>
#include <asm/bootparam.h>
#include <asm/export.h>
#include <asm/pgtable_32.h>
UNWIND_HINT_IRET_REGS offset=8
ENDBR
+ ANNOTATE_UNRET_END
+
/* Build pt_regs */
PUSH_AND_CLEAR_REGS
SYM_CODE_START_LOCAL(early_idt_handler_common)
UNWIND_HINT_IRET_REGS offset=16
+ ANNOTATE_UNRET_END
/*
* The stack is the hardware frame, an error code or zero, and the
* vector number.
UNWIND_HINT_IRET_REGS offset=8
ENDBR
+ ANNOTATE_UNRET_END
+
/* Build pt_regs */
PUSH_AND_CLEAR_REGS
{
const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
*para = NULL, *orc = NULL, *orc_ip = NULL,
- *retpolines = NULL, *ibt_endbr = NULL;
+ *retpolines = NULL, *returns = NULL, *ibt_endbr = NULL;
char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
orc_ip = s;
if (!strcmp(".retpoline_sites", secstrings + s->sh_name))
retpolines = s;
+ if (!strcmp(".return_sites", secstrings + s->sh_name))
+ returns = s;
if (!strcmp(".ibt_endbr_seal", secstrings + s->sh_name))
ibt_endbr = s;
}
void *rseg = (void *)retpolines->sh_addr;
apply_retpolines(rseg, rseg + retpolines->sh_size);
}
+ if (returns) {
+ void *rseg = (void *)returns->sh_addr;
+ apply_returns(rseg, rseg + returns->sh_size);
+ }
if (alt) {
/* patch .altinstructions */
void *aseg = (void *)alt->sh_addr;
}
if (updmsr)
- wrmsrl(MSR_IA32_SPEC_CTRL, msr);
+ write_spec_ctrl_current(msr, false);
}
static unsigned long speculation_ctrl_update_tif(struct task_struct *tsk)
#include <linux/linkage.h>
#include <asm/page_types.h>
#include <asm/kexec.h>
+#include <asm/nospec-branch.h>
#include <asm/processor-flags.h>
/*
- * Must be relocatable PIC code callable as a C function
+ * Must be relocatable PIC code callable as a C function, in particular
+ * there must be a plain RET and not jump to return thunk.
*/
#define PTR(x) (x << 2)
movl %edi, %eax
addl $(identity_mapped - relocate_kernel), %eax
pushl %eax
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_CODE_END(relocate_kernel)
SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
xorl %edx, %edx
xorl %esi, %esi
xorl %ebp, %ebp
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
1:
popl %edx
movl CP_PA_SWAP_PAGE(%edi), %esp
addl $PAGE_SIZE, %esp
2:
+ ANNOTATE_RETPOLINE_SAFE
call *%edx
/* get the re-entry point of the peer system */
movl %edi, %eax
addl $(virtual_mapped - relocate_kernel), %eax
pushl %eax
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_CODE_END(identity_mapped)
SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
popl %edi
popl %esi
popl %ebx
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_CODE_END(virtual_mapped)
/* Do the copies */
popl %edi
popl %ebx
popl %ebp
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_CODE_END(swap_pages)
.globl kexec_control_code_size
#include <asm/unwind_hints.h>
/*
- * Must be relocatable PIC code callable as a C function
+ * Must be relocatable PIC code callable as a C function, in particular
+ * there must be a plain RET and not jump to return thunk.
*/
#define PTR(x) (x << 3)
/* jump to identity mapped page */
addq $(identity_mapped - relocate_kernel), %r8
pushq %r8
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_CODE_END(relocate_kernel)
SYM_CODE_START_LOCAL_NOALIGN(identity_mapped)
xorl %r14d, %r14d
xorl %r15d, %r15d
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
1:
popq %rdx
call swap_pages
movq $virtual_mapped, %rax
pushq %rax
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_CODE_END(identity_mapped)
SYM_CODE_START_LOCAL_NOALIGN(virtual_mapped)
popq %r12
popq %rbp
popq %rbx
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_CODE_END(virtual_mapped)
/* Do the copies */
lea PAGE_SIZE(%rax), %rsi
jmp 0b
3:
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_CODE_END(swap_pages)
.globl kexec_control_code_size
// SPDX-License-Identifier: GPL-2.0
-#include <linux/dev_printk.h>
#include <linux/ioport.h>
+#include <linux/printk.h>
#include <asm/e820/api.h>
+#include <asm/pci_x86.h>
static void resource_clip(struct resource *res, resource_size_t start,
resource_size_t end)
res->start = end + 1;
}
-void remove_e820_regions(struct device *dev, struct resource *avail)
+static void remove_e820_regions(struct resource *avail)
{
int i;
struct e820_entry *entry;
u64 e820_start, e820_end;
struct resource orig = *avail;
- if (!(avail->flags & IORESOURCE_MEM))
+ if (!pci_use_e820)
return;
for (i = 0; i < e820_table->nr_entries; i++) {
resource_clip(avail, e820_start, e820_end);
if (orig.start != avail->start || orig.end != avail->end) {
- dev_info(dev, "clipped %pR to %pR for e820 entry [mem %#010Lx-%#010Lx]\n",
+ pr_info("clipped %pR to %pR for e820 entry [mem %#010Lx-%#010Lx]\n",
&orig, avail, e820_start, e820_end);
orig = *avail;
}
* the low 1MB unconditionally, as this area is needed for some ISA
* cards requiring a memory range, e.g. the i82365 PCMCIA controller.
*/
- if (avail->flags & IORESOURCE_MEM)
+ if (avail->flags & IORESOURCE_MEM) {
resource_clip(avail, BIOS_ROM_BASE, BIOS_ROM_END);
+
+ remove_e820_regions(avail);
+ }
}
#endif
-/*
- * Range of the BSS area. The size of the BSS area is determined
- * at link time, with RESERVE_BRK() facility reserving additional
- * chunks.
- */
unsigned long _brk_start = (unsigned long)__brk_base;
unsigned long _brk_end = (unsigned long)__brk_base;
return ES_VMM_ERROR;
}
-enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb, bool set_ghcb_msr,
- struct es_em_ctxt *ctxt, u64 exit_code,
- u64 exit_info_1, u64 exit_info_2)
+static enum es_result sev_es_ghcb_hv_call(struct ghcb *ghcb,
+ struct es_em_ctxt *ctxt,
+ u64 exit_code, u64 exit_info_1,
+ u64 exit_info_2)
{
/* Fill in protocol and format specifiers */
ghcb->protocol_version = ghcb_version;
ghcb_set_sw_exit_info_1(ghcb, exit_info_1);
ghcb_set_sw_exit_info_2(ghcb, exit_info_2);
- /*
- * Hyper-V unenlightened guests use a paravisor for communicating and
- * GHCB pages are being allocated and set up by that paravisor. Linux
- * should not change the GHCB page's physical address.
- */
- if (set_ghcb_msr)
- sev_es_wr_ghcb_msr(__pa(ghcb));
-
+ sev_es_wr_ghcb_msr(__pa(ghcb));
VMGEXIT();
return verify_exception_info(ghcb, ctxt);
*/
sw_scratch = __pa(ghcb) + offsetof(struct ghcb, shared_buffer);
ghcb_set_sw_scratch(ghcb, sw_scratch);
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_IOIO,
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO,
exit_info_1, exit_info_2);
if (ret != ES_OK)
return ret;
ghcb_set_rax(ghcb, rax);
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt,
- SVM_EXIT_IOIO, exit_info_1, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_IOIO, exit_info_1, 0);
if (ret != ES_OK)
return ret;
/* xgetbv will cause #GP - use reset value for xcr0 */
ghcb_set_xcr0(ghcb, 1);
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_CPUID, 0, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_CPUID, 0, 0);
if (ret != ES_OK)
return ret;
bool rdtscp = (exit_code == SVM_EXIT_RDTSCP);
enum es_result ret;
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, exit_code, 0, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, 0, 0);
if (ret != ES_OK)
return ret;
ghcb_set_sw_scratch(ghcb, (u64)__pa(data));
/* This will advance the shared buffer data points to. */
- ret = sev_es_ghcb_hv_call(ghcb, true, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, &ctxt, SVM_VMGEXIT_PSC, 0, 0);
/*
* Page State Change VMGEXIT can pass error code through
ghcb_set_rdx(ghcb, regs->dx);
}
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_MSR,
- exit_info_1, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_MSR, exit_info_1, 0);
if ((ret == ES_OK) && (!exit_info_1)) {
regs->ax = ghcb->save.rax;
ghcb_set_sw_scratch(ghcb, ghcb_pa + offsetof(struct ghcb, shared_buffer));
- return sev_es_ghcb_hv_call(ghcb, true, ctxt, exit_code, exit_info_1, exit_info_2);
+ return sev_es_ghcb_hv_call(ghcb, ctxt, exit_code, exit_info_1, exit_info_2);
}
/*
/* Using a value of 0 for ExitInfo1 means RAX holds the value */
ghcb_set_rax(ghcb, val);
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_WRITE_DR7, 0, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WRITE_DR7, 0, 0);
if (ret != ES_OK)
return ret;
static enum es_result vc_handle_wbinvd(struct ghcb *ghcb,
struct es_em_ctxt *ctxt)
{
- return sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_WBINVD, 0, 0);
+ return sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_WBINVD, 0, 0);
}
static enum es_result vc_handle_rdpmc(struct ghcb *ghcb, struct es_em_ctxt *ctxt)
ghcb_set_rcx(ghcb, ctxt->regs->cx);
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_RDPMC, 0, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_RDPMC, 0, 0);
if (ret != ES_OK)
return ret;
if (x86_platform.hyper.sev_es_hcall_prepare)
x86_platform.hyper.sev_es_hcall_prepare(ghcb, ctxt->regs);
- ret = sev_es_ghcb_hv_call(ghcb, true, ctxt, SVM_EXIT_VMMCALL, 0, 0);
+ ret = sev_es_ghcb_hv_call(ghcb, ctxt, SVM_EXIT_VMMCALL, 0, 0);
if (ret != ES_OK)
return ret;
ghcb_set_rbx(ghcb, input->data_npages);
}
- ret = sev_es_ghcb_hv_call(ghcb, true, &ctxt, exit_code, input->req_gpa, input->resp_gpa);
+ ret = sev_es_ghcb_hv_call(ghcb, &ctxt, exit_code, input->req_gpa, input->resp_gpa);
if (ret)
goto e_put;
RET = 3, /* tramp / site cond-tail-call */
};
+/*
+ * ud1 %esp, %ecx - a 3 byte #UD that is unique to trampolines, chosen such
+ * that there is no false-positive trampoline identification while also being a
+ * speculation stop.
+ */
+static const u8 tramp_ud[] = { 0x0f, 0xb9, 0xcc };
+
/*
* cs cs cs xorl %eax, %eax - a single 5 byte instruction that clears %[er]ax
*/
static const u8 retinsn[] = { RET_INSN_OPCODE, 0xcc, 0xcc, 0xcc, 0xcc };
-static void __ref __static_call_transform(void *insn, enum insn_type type, void *func)
+static void __ref __static_call_transform(void *insn, enum insn_type type,
+ void *func, bool modinit)
{
const void *emulate = NULL;
int size = CALL_INSN_SIZE;
break;
case RET:
- code = &retinsn;
+ if (cpu_feature_enabled(X86_FEATURE_RETHUNK))
+ code = text_gen_insn(JMP32_INSN_OPCODE, insn, &__x86_return_thunk);
+ else
+ code = &retinsn;
break;
}
if (memcmp(insn, code, size) == 0)
return;
- if (unlikely(system_state == SYSTEM_BOOTING))
+ if (system_state == SYSTEM_BOOTING || modinit)
return text_poke_early(insn, code, size);
text_poke_bp(insn, code, size, emulate);
{
u8 opcode = *(u8 *)insn;
- if (tramp && memcmp(insn+5, "SCT", 3)) {
+ if (tramp && memcmp(insn+5, tramp_ud, 3)) {
pr_err("trampoline signature fail");
BUG();
}
if (tramp) {
__static_call_validate(tramp, true, true);
- __static_call_transform(tramp, __sc_insn(!func, true), func);
+ __static_call_transform(tramp, __sc_insn(!func, true), func, false);
}
if (IS_ENABLED(CONFIG_HAVE_STATIC_CALL_INLINE) && site) {
__static_call_validate(site, tail, false);
- __static_call_transform(site, __sc_insn(!func, tail), func);
+ __static_call_transform(site, __sc_insn(!func, tail), func, false);
}
mutex_unlock(&text_mutex);
}
EXPORT_SYMBOL_GPL(arch_static_call_transform);
+
+#ifdef CONFIG_RETHUNK
+/*
+ * This is called by apply_returns() to fix up static call trampolines,
+ * specifically ARCH_DEFINE_STATIC_CALL_NULL_TRAMP which is recorded as
+ * having a return trampoline.
+ *
+ * The problem is that static_call() is available before determining
+ * X86_FEATURE_RETHUNK and, by implication, running alternatives.
+ *
+ * This means that __static_call_transform() above can have overwritten the
+ * return trampoline and we now need to fix things up to be consistent.
+ */
+bool __static_call_fixup(void *tramp, u8 op, void *dest)
+{
+ if (memcmp(tramp+5, tramp_ud, 3)) {
+ /* Not a trampoline site, not our problem. */
+ return false;
+ }
+
+ mutex_lock(&text_mutex);
+ if (op == RET_INSN_OPCODE || dest == &__x86_return_thunk)
+ __static_call_transform(tramp, RET, NULL, true);
+ mutex_unlock(&text_mutex);
+
+ return true;
+}
+#endif
#ifdef CONFIG_RETPOLINE
__indirect_thunk_start = .;
- *(.text.__x86.indirect_thunk)
+ *(.text.__x86.*)
__indirect_thunk_end = .;
#endif
} :text =0xcccc
*(.retpoline_sites)
__retpoline_sites_end = .;
}
+
+ . = ALIGN(8);
+ .return_sites : AT(ADDR(.return_sites) - LOAD_OFFSET) {
+ __return_sites = .;
+ *(.return_sites)
+ __return_sites_end = .;
+ }
#endif
#ifdef CONFIG_X86_KERNEL_IBT
.brk : AT(ADDR(.brk) - LOAD_OFFSET) {
__brk_base = .;
. += 64 * 1024; /* 64k alignment slop space */
- *(.brk_reservation) /* areas brk users have reserved */
+ *(.bss..brk) /* areas brk users have reserved */
__brk_limit = .;
}
#define X8(x...) X4(x), X4(x)
#define X16(x...) X8(x), X8(x)
-#define NR_FASTOP (ilog2(sizeof(ulong)) + 1)
-#define FASTOP_SIZE (8 * (1 + HAS_KERNEL_IBT))
-
struct opcode {
u64 flags;
u8 intercept;
* Moreover, they are all exactly FASTOP_SIZE bytes long, so functions for
* different operand sizes can be reached by calculation, rather than a jump
* table (which would be bigger than the code).
+ *
+ * The 16 byte alignment, considering 5 bytes for the RET thunk, 3 for ENDBR
+ * and 1 for the straight line speculation INT3, leaves 7 bytes for the
+ * body of the function. Currently none is larger than 4.
*/
static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop);
+#define FASTOP_SIZE 16
+
#define __FOP_FUNC(name) \
".align " __stringify(FASTOP_SIZE) " \n\t" \
".type " name ", @function \n\t" \
#define FOP_RET(name) \
__FOP_RET(#name)
-#define FOP_START(op) \
+#define __FOP_START(op, align) \
extern void em_##op(struct fastop *fake); \
asm(".pushsection .text, \"ax\" \n\t" \
".global em_" #op " \n\t" \
- ".align " __stringify(FASTOP_SIZE) " \n\t" \
+ ".align " __stringify(align) " \n\t" \
"em_" #op ":\n\t"
+#define FOP_START(op) __FOP_START(op, FASTOP_SIZE)
+
#define FOP_END \
".popsection")
/*
* Depending on .config the SETcc functions look like:
*
- * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT]
- * SETcc %al [3 bytes]
- * RET [1 byte]
- * INT3 [1 byte; CONFIG_SLS]
- *
- * Which gives possible sizes 4, 5, 8 or 9. When rounded up to the
- * next power-of-two alignment they become 4, 8 or 16 resp.
+ * ENDBR [4 bytes; CONFIG_X86_KERNEL_IBT]
+ * SETcc %al [3 bytes]
+ * RET | JMP __x86_return_thunk [1,5 bytes; CONFIG_RETHUNK]
+ * INT3 [1 byte; CONFIG_SLS]
*/
-#define SETCC_LENGTH (ENDBR_INSN_SIZE + 4 + IS_ENABLED(CONFIG_SLS))
-#define SETCC_ALIGN (4 << IS_ENABLED(CONFIG_SLS) << HAS_KERNEL_IBT)
-static_assert(SETCC_LENGTH <= SETCC_ALIGN);
+#define SETCC_ALIGN 16
#define FOP_SETCC(op) \
".align " __stringify(SETCC_ALIGN) " \n\t" \
#op ": \n\t" \
ASM_ENDBR \
#op " %al \n\t" \
- __FOP_RET(#op)
+ __FOP_RET(#op) \
+ ".skip " __stringify(SETCC_ALIGN) " - (.-" #op "), 0xcc \n\t"
-FOP_START(setcc)
+__FOP_START(setcc, SETCC_ALIGN)
FOP_SETCC(seto)
FOP_SETCC(setno)
FOP_SETCC(setc)
}
}
+static void kvm_lapic_xapic_id_updated(struct kvm_lapic *apic)
+{
+ struct kvm *kvm = apic->vcpu->kvm;
+
+ if (KVM_BUG_ON(apic_x2apic_mode(apic), kvm))
+ return;
+
+ if (kvm_xapic_id(apic) == apic->vcpu->vcpu_id)
+ return;
+
+ kvm_set_apicv_inhibit(apic->vcpu->kvm, APICV_INHIBIT_REASON_APIC_ID_MODIFIED);
+}
+
static int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
{
int ret = 0;
switch (reg) {
case APIC_ID: /* Local APIC ID */
- if (!apic_x2apic_mode(apic))
+ if (!apic_x2apic_mode(apic)) {
kvm_apic_set_xapic_id(apic, val >> 24);
- else
+ kvm_lapic_xapic_id_updated(apic);
+ } else {
ret = 1;
+ }
break;
case APIC_TASKPRI:
MSR_IA32_APICBASE_BASE;
if ((value & MSR_IA32_APICBASE_ENABLE) &&
- apic->base_address != APIC_DEFAULT_PHYS_BASE)
- pr_warn_once("APIC base relocation is unsupported by KVM");
+ apic->base_address != APIC_DEFAULT_PHYS_BASE) {
+ kvm_set_apicv_inhibit(apic->vcpu->kvm,
+ APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
+ }
}
void kvm_apic_update_apicv(struct kvm_vcpu *vcpu)
icr = __kvm_lapic_get_reg64(s->regs, APIC_ICR);
__kvm_lapic_set_reg(s->regs, APIC_ICR2, icr >> 32);
}
+ } else {
+ kvm_lapic_xapic_id_updated(vcpu->arch.apic);
}
return 0;
root = mmu_alloc_root(vcpu, i << (30 - PAGE_SHIFT),
i << 30, PT32_ROOT_LEVEL, true);
mmu->pae_root[i] = root | PT_PRESENT_MASK |
- shadow_me_mask;
+ shadow_me_value;
}
mmu->root.hpa = __pa(mmu->pae_root);
} else {
roots_to_free |= KVM_MMU_ROOT_CURRENT;
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
- if (is_obsolete_root(kvm, mmu->root.hpa))
+ if (is_obsolete_root(kvm, mmu->prev_roots[i].hpa))
roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
}
return true;
}
+/*
+ * Step the iterator back up a level in the paging structure. Should only be
+ * used when the iterator is below the root level.
+ */
+void tdp_iter_step_up(struct tdp_iter *iter)
+{
+ WARN_ON(!try_step_up(iter));
+}
+
/*
* Step to the next SPTE in a pre-order traversal of the paging structure.
* To get to the next SPTE, the iterator either steps down towards the goal
int min_level, gfn_t next_last_level_gfn);
void tdp_iter_next(struct tdp_iter *iter);
void tdp_iter_restart(struct tdp_iter *iter);
+void tdp_iter_step_up(struct tdp_iter *iter);
#endif /* __KVM_X86_MMU_TDP_ITER_H */
gfn_t start = slot->base_gfn;
gfn_t end = start + slot->npages;
struct tdp_iter iter;
+ int max_mapping_level;
kvm_pfn_t pfn;
rcu_read_lock();
tdp_root_for_each_pte(iter, root, start, end) {
-retry:
if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
continue;
!is_last_spte(iter.old_spte, iter.level))
continue;
+ /*
+ * This is a leaf SPTE. Check if the PFN it maps can
+ * be mapped at a higher level.
+ */
pfn = spte_to_pfn(iter.old_spte);
- if (kvm_is_reserved_pfn(pfn) ||
- iter.level >= kvm_mmu_max_mapping_level(kvm, slot, iter.gfn,
- pfn, PG_LEVEL_NUM))
+
+ if (kvm_is_reserved_pfn(pfn))
continue;
+ max_mapping_level = kvm_mmu_max_mapping_level(kvm, slot,
+ iter.gfn, pfn, PG_LEVEL_NUM);
+
+ WARN_ON(max_mapping_level < iter.level);
+
+ /*
+ * If this page is already mapped at the highest
+ * viable level, there's nothing more to do.
+ */
+ if (max_mapping_level == iter.level)
+ continue;
+
+ /*
+ * The page can be remapped at a higher level, so step
+ * up to zap the parent SPTE.
+ */
+ while (max_mapping_level > iter.level)
+ tdp_iter_step_up(&iter);
+
/* Note, a successful atomic zap also does a remote TLB flush. */
- if (tdp_mmu_zap_spte_atomic(kvm, &iter))
- goto retry;
+ tdp_mmu_zap_spte_atomic(kvm, &iter);
+
+ /*
+ * If the atomic zap fails, the iter will recurse back into
+ * the same subtree to retry.
+ */
}
rcu_read_unlock();
static int avic_kick_target_vcpus_fast(struct kvm *kvm, struct kvm_lapic *source,
u32 icrl, u32 icrh, u32 index)
{
- u32 dest, apic_id;
- struct kvm_vcpu *vcpu;
+ u32 l1_physical_id, dest;
+ struct kvm_vcpu *target_vcpu;
int dest_mode = icrl & APIC_DEST_MASK;
int shorthand = icrl & APIC_SHORT_MASK;
struct kvm_svm *kvm_svm = to_kvm_svm(kvm);
- u32 *avic_logical_id_table = page_address(kvm_svm->avic_logical_id_table_page);
if (shorthand != APIC_DEST_NOSHORT)
return -EINVAL;
- /*
- * The AVIC incomplete IPI #vmexit info provides index into
- * the physical APIC ID table, which can be used to derive
- * guest physical APIC ID.
- */
+ if (apic_x2apic_mode(source))
+ dest = icrh;
+ else
+ dest = GET_APIC_DEST_FIELD(icrh);
+
if (dest_mode == APIC_DEST_PHYSICAL) {
- apic_id = index;
+ /* broadcast destination, use slow path */
+ if (apic_x2apic_mode(source) && dest == X2APIC_BROADCAST)
+ return -EINVAL;
+ if (!apic_x2apic_mode(source) && dest == APIC_BROADCAST)
+ return -EINVAL;
+
+ l1_physical_id = dest;
+
+ if (WARN_ON_ONCE(l1_physical_id != index))
+ return -EINVAL;
+
} else {
- if (!apic_x2apic_mode(source)) {
- /* For xAPIC logical mode, the index is for logical APIC table. */
- apic_id = avic_logical_id_table[index] & 0x1ff;
+ u32 bitmap, cluster;
+ int logid_index;
+
+ if (apic_x2apic_mode(source)) {
+ /* 16 bit dest mask, 16 bit cluster id */
+ bitmap = dest & 0xFFFF0000;
+ cluster = (dest >> 16) << 4;
+ } else if (kvm_lapic_get_reg(source, APIC_DFR) == APIC_DFR_FLAT) {
+ /* 8 bit dest mask*/
+ bitmap = dest;
+ cluster = 0;
} else {
- return -EINVAL;
+ /* 4 bit desk mask, 4 bit cluster id */
+ bitmap = dest & 0xF;
+ cluster = (dest >> 4) << 2;
}
- }
- /*
- * Assuming vcpu ID is the same as physical apic ID,
- * and use it to retrieve the target vCPU.
- */
- vcpu = kvm_get_vcpu_by_id(kvm, apic_id);
- if (!vcpu)
- return -EINVAL;
+ if (unlikely(!bitmap))
+ /* guest bug: nobody to send the logical interrupt to */
+ return 0;
- if (apic_x2apic_mode(vcpu->arch.apic))
- dest = icrh;
- else
- dest = GET_APIC_DEST_FIELD(icrh);
+ if (!is_power_of_2(bitmap))
+ /* multiple logical destinations, use slow path */
+ return -EINVAL;
- /*
- * Try matching the destination APIC ID with the vCPU.
- */
- if (kvm_apic_match_dest(vcpu, source, shorthand, dest, dest_mode)) {
- vcpu->arch.apic->irr_pending = true;
- svm_complete_interrupt_delivery(vcpu,
- icrl & APIC_MODE_MASK,
- icrl & APIC_INT_LEVELTRIG,
- icrl & APIC_VECTOR_MASK);
- return 0;
+ logid_index = cluster + __ffs(bitmap);
+
+ if (apic_x2apic_mode(source)) {
+ l1_physical_id = logid_index;
+ } else {
+ u32 *avic_logical_id_table =
+ page_address(kvm_svm->avic_logical_id_table_page);
+
+ u32 logid_entry = avic_logical_id_table[logid_index];
+
+ if (WARN_ON_ONCE(index != logid_index))
+ return -EINVAL;
+
+ /* guest bug: non existing/reserved logical destination */
+ if (unlikely(!(logid_entry & AVIC_LOGICAL_ID_ENTRY_VALID_MASK)))
+ return 0;
+
+ l1_physical_id = logid_entry &
+ AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK;
+ }
}
- return -EINVAL;
+ target_vcpu = kvm_get_vcpu_by_id(kvm, l1_physical_id);
+ if (unlikely(!target_vcpu))
+ /* guest bug: non existing vCPU is a target of this IPI*/
+ return 0;
+
+ target_vcpu->arch.apic->irr_pending = true;
+ svm_complete_interrupt_delivery(target_vcpu,
+ icrl & APIC_MODE_MASK,
+ icrl & APIC_INT_LEVELTRIG,
+ icrl & APIC_VECTOR_MASK);
+ return 0;
}
static void avic_kick_target_vcpus(struct kvm *kvm, struct kvm_lapic *source,
return ret;
}
-static int avic_handle_apic_id_update(struct kvm_vcpu *vcpu)
-{
- u64 *old, *new;
- struct vcpu_svm *svm = to_svm(vcpu);
- u32 id = kvm_xapic_id(vcpu->arch.apic);
-
- if (vcpu->vcpu_id == id)
- return 0;
-
- old = avic_get_physical_id_entry(vcpu, vcpu->vcpu_id);
- new = avic_get_physical_id_entry(vcpu, id);
- if (!new || !old)
- return 1;
-
- /* We need to move physical_id_entry to new offset */
- *new = *old;
- *old = 0ULL;
- to_svm(vcpu)->avic_physical_id_cache = new;
-
- /*
- * Also update the guest physical APIC ID in the logical
- * APIC ID table entry if already setup the LDR.
- */
- if (svm->ldr_reg)
- avic_handle_ldr_update(vcpu);
-
- return 0;
-}
-
static void avic_handle_dfr_update(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
AVIC_UNACCEL_ACCESS_OFFSET_MASK;
switch (offset) {
- case APIC_ID:
- if (avic_handle_apic_id_update(vcpu))
- return 0;
- break;
case APIC_LDR:
if (avic_handle_ldr_update(vcpu))
return 0;
void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu)
{
- if (avic_handle_apic_id_update(vcpu) != 0)
- return;
avic_handle_dfr_update(vcpu);
avic_handle_ldr_update(vcpu);
}
BIT(APICV_INHIBIT_REASON_PIT_REINJ) |
BIT(APICV_INHIBIT_REASON_X2APIC) |
BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
- BIT(APICV_INHIBIT_REASON_SEV);
+ BIT(APICV_INHIBIT_REASON_SEV) |
+ BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) |
+ BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
return supported & BIT(reason);
}
return ret;
}
-void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
u64 entry;
int h_physical_id = kvm_cpu_get_apicid(cpu);
avic_update_iommu_vcpu_affinity(vcpu, h_physical_id, true);
}
-void __avic_vcpu_put(struct kvm_vcpu *vcpu)
+void avic_vcpu_put(struct kvm_vcpu *vcpu)
{
u64 entry;
struct vcpu_svm *svm = to_svm(vcpu);
WRITE_ONCE(*(svm->avic_physical_id_cache), entry);
}
-static void avic_vcpu_load(struct kvm_vcpu *vcpu)
-{
- int cpu = get_cpu();
-
- WARN_ON(cpu != vcpu->cpu);
-
- __avic_vcpu_load(vcpu, cpu);
-
- put_cpu();
-}
-
-static void avic_vcpu_put(struct kvm_vcpu *vcpu)
-{
- preempt_disable();
-
- __avic_vcpu_put(vcpu);
-
- preempt_enable();
-}
void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
{
vmcb_mark_dirty(vmcb, VMCB_AVIC);
if (activated)
- avic_vcpu_load(vcpu);
+ avic_vcpu_load(vcpu, vcpu->cpu);
else
avic_vcpu_put(vcpu);
if (!kvm_vcpu_apicv_active(vcpu))
return;
- avic_vcpu_load(vcpu);
+ avic_vcpu_load(vcpu, vcpu->cpu);
}
struct kvm_vcpu *vcpu = &svm->vcpu;
struct vmcb *vmcb01 = svm->vmcb01.ptr;
struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
+ u32 pause_count12;
+ u32 pause_thresh12;
/*
* Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2,
if (!nested_vmcb_needs_vls_intercept(svm))
vmcb02->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
+ pause_count12 = svm->pause_filter_enabled ? svm->nested.ctl.pause_filter_count : 0;
+ pause_thresh12 = svm->pause_threshold_enabled ? svm->nested.ctl.pause_filter_thresh : 0;
if (kvm_pause_in_guest(svm->vcpu.kvm)) {
- /* use guest values since host doesn't use them */
- vmcb02->control.pause_filter_count =
- svm->pause_filter_enabled ?
- svm->nested.ctl.pause_filter_count : 0;
+ /* use guest values since host doesn't intercept PAUSE */
+ vmcb02->control.pause_filter_count = pause_count12;
+ vmcb02->control.pause_filter_thresh = pause_thresh12;
- vmcb02->control.pause_filter_thresh =
- svm->pause_threshold_enabled ?
- svm->nested.ctl.pause_filter_thresh : 0;
-
- } else if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) {
- /* use host values when guest doesn't use them */
+ } else {
+ /* start from host values otherwise */
vmcb02->control.pause_filter_count = vmcb01->control.pause_filter_count;
vmcb02->control.pause_filter_thresh = vmcb01->control.pause_filter_thresh;
- } else {
- /*
- * Intercept every PAUSE otherwise and
- * ignore both host and guest values
- */
- vmcb02->control.pause_filter_count = 0;
- vmcb02->control.pause_filter_thresh = 0;
+
+ /* ... but ensure filtering is disabled if so requested. */
+ if (vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_PAUSE)) {
+ if (!pause_count12)
+ vmcb02->control.pause_filter_count = 0;
+ if (!pause_thresh12)
+ vmcb02->control.pause_filter_thresh = 0;
+ }
}
nested_svm_transition_tlb_flush(vcpu);
vmcb12->control.event_inj = svm->nested.ctl.event_inj;
vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err;
- if (!kvm_pause_in_guest(vcpu->kvm) && vmcb02->control.pause_filter_count)
+ if (!kvm_pause_in_guest(vcpu->kvm)) {
vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count;
+ vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
+
+ }
nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
if (svm->tsc_ratio_msr != kvm_default_tsc_scaling_ratio) {
WARN_ON(!svm->tsc_scaling_enabled);
vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
- svm_write_tsc_multiplier(vcpu, vcpu->arch.tsc_scaling_ratio);
+ __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
}
svm->nested.ctl.nested_cr3 = 0;
vcpu->arch.tsc_scaling_ratio =
kvm_calc_nested_tsc_multiplier(vcpu->arch.l1_tsc_scaling_ratio,
svm->tsc_ratio_msr);
- svm_write_tsc_multiplier(vcpu, vcpu->arch.tsc_scaling_ratio);
+ __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
}
/* Inverse operation of nested_copy_vmcb_control_to_cache(). asid is copied too. */
/* If source buffer is not aligned then use an intermediate buffer */
if (!IS_ALIGNED((unsigned long)vaddr, 16)) {
- src_tpage = alloc_page(GFP_KERNEL);
+ src_tpage = alloc_page(GFP_KERNEL_ACCOUNT);
if (!src_tpage)
return -ENOMEM;
if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
int dst_offset;
- dst_tpage = alloc_page(GFP_KERNEL);
+ dst_tpage = alloc_page(GFP_KERNEL_ACCOUNT);
if (!dst_tpage) {
ret = -ENOMEM;
goto e_free;
{
struct kvm_sev_info *dst = &to_kvm_svm(dst_kvm)->sev_info;
struct kvm_sev_info *src = &to_kvm_svm(src_kvm)->sev_info;
+ struct kvm_vcpu *dst_vcpu, *src_vcpu;
+ struct vcpu_svm *dst_svm, *src_svm;
struct kvm_sev_info *mirror;
+ unsigned long i;
dst->active = true;
dst->asid = src->asid;
dst->handle = src->handle;
dst->pages_locked = src->pages_locked;
dst->enc_context_owner = src->enc_context_owner;
+ dst->es_active = src->es_active;
src->asid = 0;
src->active = false;
src->handle = 0;
src->pages_locked = 0;
src->enc_context_owner = NULL;
+ src->es_active = false;
list_cut_before(&dst->regions_list, &src->regions_list, &src->regions_list);
list_del(&src->mirror_entry);
list_add_tail(&dst->mirror_entry, &owner_sev_info->mirror_vms);
}
-}
-static int sev_es_migrate_from(struct kvm *dst, struct kvm *src)
-{
- unsigned long i;
- struct kvm_vcpu *dst_vcpu, *src_vcpu;
- struct vcpu_svm *dst_svm, *src_svm;
+ kvm_for_each_vcpu(i, dst_vcpu, dst_kvm) {
+ dst_svm = to_svm(dst_vcpu);
- if (atomic_read(&src->online_vcpus) != atomic_read(&dst->online_vcpus))
- return -EINVAL;
+ sev_init_vmcb(dst_svm);
- kvm_for_each_vcpu(i, src_vcpu, src) {
- if (!src_vcpu->arch.guest_state_protected)
- return -EINVAL;
- }
+ if (!dst->es_active)
+ continue;
- kvm_for_each_vcpu(i, src_vcpu, src) {
+ /*
+ * Note, the source is not required to have the same number of
+ * vCPUs as the destination when migrating a vanilla SEV VM.
+ */
+ src_vcpu = kvm_get_vcpu(dst_kvm, i);
src_svm = to_svm(src_vcpu);
- dst_vcpu = kvm_get_vcpu(dst, i);
- dst_svm = to_svm(dst_vcpu);
/*
* Transfer VMSA and GHCB state to the destination. Nullify and
src_svm->vmcb->control.vmsa_pa = INVALID_PAGE;
src_vcpu->arch.guest_state_protected = false;
}
- to_kvm_svm(src)->sev_info.es_active = false;
- to_kvm_svm(dst)->sev_info.es_active = true;
+}
+
+static int sev_check_source_vcpus(struct kvm *dst, struct kvm *src)
+{
+ struct kvm_vcpu *src_vcpu;
+ unsigned long i;
+
+ if (!sev_es_guest(src))
+ return 0;
+
+ if (atomic_read(&src->online_vcpus) != atomic_read(&dst->online_vcpus))
+ return -EINVAL;
+
+ kvm_for_each_vcpu(i, src_vcpu, src) {
+ if (!src_vcpu->arch.guest_state_protected)
+ return -EINVAL;
+ }
return 0;
}
if (ret)
goto out_dst_vcpu;
- if (sev_es_guest(source_kvm)) {
- ret = sev_es_migrate_from(kvm, source_kvm);
- if (ret)
- goto out_source_vcpu;
- }
+ ret = sev_check_source_vcpus(kvm, source_kvm);
+ if (ret)
+ goto out_source_vcpu;
sev_migrate_from(kvm, source_kvm);
kvm_vm_dead(source_kvm);
count, in);
}
-void sev_es_init_vmcb(struct vcpu_svm *svm)
+static void sev_es_init_vmcb(struct vcpu_svm *svm)
{
struct kvm_vcpu *vcpu = &svm->vcpu;
}
}
+void sev_init_vmcb(struct vcpu_svm *svm)
+{
+ svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
+ clr_exception_intercept(svm, UD_VECTOR);
+
+ if (sev_es_guest(svm->vcpu.kvm))
+ sev_es_init_vmcb(svm);
+}
+
void sev_es_vcpu_reset(struct vcpu_svm *svm)
{
/*
return 1;
}
+void __svm_write_tsc_multiplier(u64 multiplier)
+{
+ preempt_disable();
+
+ if (multiplier == __this_cpu_read(current_tsc_ratio))
+ goto out;
+
+ wrmsrl(MSR_AMD64_TSC_RATIO, multiplier);
+ __this_cpu_write(current_tsc_ratio, multiplier);
+out:
+ preempt_enable();
+}
+
static void svm_hardware_disable(void)
{
/* Make sure we clean up behind us */
if (tsc_scaling)
- wrmsrl(MSR_AMD64_TSC_RATIO, SVM_TSC_RATIO_DEFAULT);
+ __svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT);
cpu_svm_disable();
* Set the default value, even if we don't use TSC scaling
* to avoid having stale value in the msr
*/
- wrmsrl(MSR_AMD64_TSC_RATIO, SVM_TSC_RATIO_DEFAULT);
- __this_cpu_write(current_tsc_ratio, SVM_TSC_RATIO_DEFAULT);
+ __svm_write_tsc_multiplier(SVM_TSC_RATIO_DEFAULT);
}
struct vmcb_control_area *control = &svm->vmcb->control;
int old = control->pause_filter_count;
- if (kvm_pause_in_guest(vcpu->kvm) || !old)
+ if (kvm_pause_in_guest(vcpu->kvm))
return;
control->pause_filter_count = __grow_ple_window(old,
struct vmcb_control_area *control = &svm->vmcb->control;
int old = control->pause_filter_count;
- if (kvm_pause_in_guest(vcpu->kvm) || !old)
+ if (kvm_pause_in_guest(vcpu->kvm))
return;
control->pause_filter_count =
vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
}
-void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
+static void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier)
{
- wrmsrl(MSR_AMD64_TSC_RATIO, multiplier);
+ __svm_write_tsc_multiplier(multiplier);
}
+
/* Evaluate instruction intercepts that depend on guest CPUID features. */
static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
struct vcpu_svm *svm)
svm->vmcb->control.int_ctl |= V_GIF_ENABLE_MASK;
}
- if (sev_guest(vcpu->kvm)) {
- svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ENABLE;
- clr_exception_intercept(svm, UD_VECTOR);
-
- if (sev_es_guest(vcpu->kvm)) {
- /* Perform SEV-ES specific VMCB updates */
- sev_es_init_vmcb(svm);
- }
- }
+ if (sev_guest(vcpu->kvm))
+ sev_init_vmcb(svm);
svm_hv_init_vmcb(vmcb);
init_vmcb_after_set_cpuid(vcpu);
sev_es_prepare_switch_to_guest(hostsa);
}
- if (tsc_scaling) {
- u64 tsc_ratio = vcpu->arch.tsc_scaling_ratio;
- if (tsc_ratio != __this_cpu_read(current_tsc_ratio)) {
- __this_cpu_write(current_tsc_ratio, tsc_ratio);
- wrmsrl(MSR_AMD64_TSC_RATIO, tsc_ratio);
- }
- }
+ if (tsc_scaling)
+ __svm_write_tsc_multiplier(vcpu->arch.tsc_scaling_ratio);
if (likely(tsc_aux_uret_slot >= 0))
kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull);
indirect_branch_prediction_barrier();
}
if (kvm_vcpu_apicv_active(vcpu))
- __avic_vcpu_load(vcpu, cpu);
+ avic_vcpu_load(vcpu, cpu);
}
static void svm_vcpu_put(struct kvm_vcpu *vcpu)
{
if (kvm_vcpu_apicv_active(vcpu))
- __avic_vcpu_put(vcpu);
+ avic_vcpu_put(vcpu);
svm_prepare_host_switch(vcpu);
static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
{
+ if (to_svm(vcpu)->vmcb->control.exit_code == SVM_EXIT_INTR)
+ vcpu->arch.at_instruction_boundary = true;
}
static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
bool has_error_code, u32 error_code);
int nested_svm_exit_special(struct vcpu_svm *svm);
void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu);
-void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu, u64 multiplier);
+void __svm_write_tsc_multiplier(u64 multiplier);
void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
struct vmcb_control_area *control);
void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu);
int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu);
int avic_init_vcpu(struct vcpu_svm *svm);
-void __avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
-void __avic_vcpu_put(struct kvm_vcpu *vcpu);
+void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
+void avic_vcpu_put(struct kvm_vcpu *vcpu);
void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu);
void avic_set_virtual_apic_mode(struct kvm_vcpu *vcpu);
void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);
void __init sev_hardware_setup(void);
void sev_hardware_unsetup(void);
int sev_cpu_init(struct svm_cpu_data *sd);
+void sev_init_vmcb(struct vcpu_svm *svm);
void sev_free_vcpu(struct kvm_vcpu *vcpu);
int sev_handle_vmgexit(struct kvm_vcpu *vcpu);
int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
-void sev_es_init_vmcb(struct vcpu_svm *svm);
void sev_es_vcpu_reset(struct vcpu_svm *svm);
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
void sev_es_prepare_switch_to_guest(struct sev_es_save_area *hostsa);
mov %r15, VCPU_R15(%_ASM_AX)
#endif
+ /*
+ * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
+ * untrained as soon as we exit the VM and are back to the
+ * kernel. This should be done before re-enabling interrupts
+ * because interrupt handlers won't sanitize 'ret' if the return is
+ * from the kernel.
+ */
+ UNTRAIN_RET
+
/*
* Clear all general purpose registers except RSP and RAX to prevent
* speculative use of the guest's values, even those that are reloaded
FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
#endif
+ /*
+ * Mitigate RETBleed for AMD/Hygon Zen uarch. RET should be
+ * untrained as soon as we exit the VM and are back to the
+ * kernel. This should be done before re-enabling interrupts
+ * because interrupt handlers won't sanitize RET if the return is
+ * from the kernel.
+ */
+ UNTRAIN_RET
+
pop %_ASM_BX
#ifdef CONFIG_X86_64
#include <asm/vmx.h>
-#include "lapic.h"
-#include "x86.h"
+#include "../lapic.h"
+#include "../x86.h"
extern bool __read_mostly enable_vpid;
extern bool __read_mostly flexpriority_enabled;
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_ENABLE_VMFUNC |
- SECONDARY_EXEC_TSC_SCALING |
SECONDARY_EXEC_DESC);
if (nested_cpu_has(vmcs12,
}
vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
- vmx->loaded_vmcs->launched);
+ __vmx_vcpu_run_flags(vmx));
if (vmx->msr_autoload.host.nr)
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __KVM_X86_VMX_RUN_FLAGS_H
+#define __KVM_X86_VMX_RUN_FLAGS_H
+
+#define VMX_RUN_VMRESUME (1 << 0)
+#define VMX_RUN_SAVE_SPEC_CTRL (1 << 1)
+
+#endif /* __KVM_X86_VMX_RUN_FLAGS_H */
/* SPDX-License-Identifier: GPL-2.0 */
#include <linux/linkage.h>
#include <asm/asm.h>
+#include <asm/asm-offsets.h>
#include <asm/bitsperlong.h>
#include <asm/kvm_vcpu_regs.h>
#include <asm/nospec-branch.h>
+#include <asm/percpu.h>
#include <asm/segment.h>
+#include "run_flags.h"
#define WORD_SIZE (BITS_PER_LONG / 8)
.section .noinstr.text, "ax"
-/**
- * vmx_vmenter - VM-Enter the current loaded VMCS
- *
- * %RFLAGS.ZF: !VMCS.LAUNCHED, i.e. controls VMLAUNCH vs. VMRESUME
- *
- * Returns:
- * %RFLAGS.CF is set on VM-Fail Invalid
- * %RFLAGS.ZF is set on VM-Fail Valid
- * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit
- *
- * Note that VMRESUME/VMLAUNCH fall-through and return directly if
- * they VM-Fail, whereas a successful VM-Enter + VM-Exit will jump
- * to vmx_vmexit.
- */
-SYM_FUNC_START_LOCAL(vmx_vmenter)
- /* EFLAGS.ZF is set if VMCS.LAUNCHED == 0 */
- je 2f
-
-1: vmresume
- RET
-
-2: vmlaunch
- RET
-
-3: cmpb $0, kvm_rebooting
- je 4f
- RET
-4: ud2
-
- _ASM_EXTABLE(1b, 3b)
- _ASM_EXTABLE(2b, 3b)
-
-SYM_FUNC_END(vmx_vmenter)
-
-/**
- * vmx_vmexit - Handle a VMX VM-Exit
- *
- * Returns:
- * %RFLAGS.{CF,ZF} are cleared on VM-Success, i.e. VM-Exit
- *
- * This is vmx_vmenter's partner in crime. On a VM-Exit, control will jump
- * here after hardware loads the host's state, i.e. this is the destination
- * referred to by VMCS.HOST_RIP.
- */
-SYM_FUNC_START(vmx_vmexit)
-#ifdef CONFIG_RETPOLINE
- ALTERNATIVE "jmp .Lvmexit_skip_rsb", "", X86_FEATURE_RETPOLINE
- /* Preserve guest's RAX, it's used to stuff the RSB. */
- push %_ASM_AX
-
- /* IMPORTANT: Stuff the RSB immediately after VM-Exit, before RET! */
- FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, X86_FEATURE_RETPOLINE
-
- /* Clear RFLAGS.CF and RFLAGS.ZF to preserve VM-Exit, i.e. !VM-Fail. */
- or $1, %_ASM_AX
-
- pop %_ASM_AX
-.Lvmexit_skip_rsb:
-#endif
- RET
-SYM_FUNC_END(vmx_vmexit)
-
/**
* __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
- * @vmx: struct vcpu_vmx * (forwarded to vmx_update_host_rsp)
+ * @vmx: struct vcpu_vmx *
* @regs: unsigned long * (to guest registers)
- * @launched: %true if the VMCS has been launched
+ * @flags: VMX_RUN_VMRESUME: use VMRESUME instead of VMLAUNCH
+ * VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl
*
* Returns:
* 0 on VM-Exit, 1 on VM-Fail
#endif
push %_ASM_BX
+ /* Save @vmx for SPEC_CTRL handling */
+ push %_ASM_ARG1
+
+ /* Save @flags for SPEC_CTRL handling */
+ push %_ASM_ARG3
+
/*
* Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and
* @regs is needed after VM-Exit to save the guest's register values.
*/
push %_ASM_ARG2
- /* Copy @launched to BL, _ASM_ARG3 is volatile. */
+ /* Copy @flags to BL, _ASM_ARG3 is volatile. */
mov %_ASM_ARG3B, %bl
- /* Adjust RSP to account for the CALL to vmx_vmenter(). */
- lea -WORD_SIZE(%_ASM_SP), %_ASM_ARG2
+ lea (%_ASM_SP), %_ASM_ARG2
call vmx_update_host_rsp
+ ALTERNATIVE "jmp .Lspec_ctrl_done", "", X86_FEATURE_MSR_SPEC_CTRL
+
+ /*
+ * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the
+ * host's, write the MSR.
+ *
+ * IMPORTANT: To avoid RSB underflow attacks and any other nastiness,
+ * there must not be any returns or indirect branches between this code
+ * and vmentry.
+ */
+ mov 2*WORD_SIZE(%_ASM_SP), %_ASM_DI
+ movl VMX_spec_ctrl(%_ASM_DI), %edi
+ movl PER_CPU_VAR(x86_spec_ctrl_current), %esi
+ cmp %edi, %esi
+ je .Lspec_ctrl_done
+ mov $MSR_IA32_SPEC_CTRL, %ecx
+ xor %edx, %edx
+ mov %edi, %eax
+ wrmsr
+
+.Lspec_ctrl_done:
+
+ /*
+ * Since vmentry is serializing on affected CPUs, there's no need for
+ * an LFENCE to stop speculation from skipping the wrmsr.
+ */
+
/* Load @regs to RAX. */
mov (%_ASM_SP), %_ASM_AX
/* Check if vmlaunch or vmresume is needed */
- testb %bl, %bl
+ testb $VMX_RUN_VMRESUME, %bl
/* Load guest registers. Don't clobber flags. */
mov VCPU_RCX(%_ASM_AX), %_ASM_CX
/* Load guest RAX. This kills the @regs pointer! */
mov VCPU_RAX(%_ASM_AX), %_ASM_AX
- /* Enter guest mode */
- call vmx_vmenter
+ /* Check EFLAGS.ZF from 'testb' above */
+ jz .Lvmlaunch
+
+ /*
+ * After a successful VMRESUME/VMLAUNCH, control flow "magically"
+ * resumes below at 'vmx_vmexit' due to the VMCS HOST_RIP setting.
+ * So this isn't a typical function and objtool needs to be told to
+ * save the unwind state here and restore it below.
+ */
+ UNWIND_HINT_SAVE
+
+/*
+ * If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at
+ * the 'vmx_vmexit' label below.
+ */
+.Lvmresume:
+ vmresume
+ jmp .Lvmfail
+
+.Lvmlaunch:
+ vmlaunch
+ jmp .Lvmfail
- /* Jump on VM-Fail. */
- jbe 2f
+ _ASM_EXTABLE(.Lvmresume, .Lfixup)
+ _ASM_EXTABLE(.Lvmlaunch, .Lfixup)
+
+SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
+
+ /* Restore unwind state from before the VMRESUME/VMLAUNCH. */
+ UNWIND_HINT_RESTORE
+ ENDBR
/* Temporarily save guest's RAX. */
push %_ASM_AX
mov %r15, VCPU_R15(%_ASM_AX)
#endif
- /* Clear RAX to indicate VM-Exit (as opposed to VM-Fail). */
- xor %eax, %eax
+ /* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */
+ xor %ebx, %ebx
+.Lclear_regs:
/*
- * Clear all general purpose registers except RSP and RAX to prevent
+ * Clear all general purpose registers except RSP and RBX to prevent
* speculative use of the guest's values, even those that are reloaded
* via the stack. In theory, an L1 cache miss when restoring registers
* could lead to speculative execution with the guest's values.
* Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
* free. RSP and RAX are exempt as RSP is restored by hardware during
- * VM-Exit and RAX is explicitly loaded with 0 or 1 to return VM-Fail.
+ * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return
+ * value.
*/
-1: xor %ecx, %ecx
+ xor %eax, %eax
+ xor %ecx, %ecx
xor %edx, %edx
- xor %ebx, %ebx
xor %ebp, %ebp
xor %esi, %esi
xor %edi, %edi
/* "POP" @regs. */
add $WORD_SIZE, %_ASM_SP
- pop %_ASM_BX
+ /*
+ * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before
+ * the first unbalanced RET after vmexit!
+ *
+ * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB
+ * entries and (in some cases) RSB underflow.
+ *
+ * eIBRS has its own protection against poisoned RSB, so it doesn't
+ * need the RSB filling sequence. But it does need to be enabled
+ * before the first unbalanced RET.
+ */
+
+ FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT
+
+ pop %_ASM_ARG2 /* @flags */
+ pop %_ASM_ARG1 /* @vmx */
+
+ call vmx_spec_ctrl_restore_host
+
+ /* Put return value in AX */
+ mov %_ASM_BX, %_ASM_AX
+
+ pop %_ASM_BX
#ifdef CONFIG_X86_64
pop %r12
pop %r13
pop %_ASM_BP
RET
- /* VM-Fail. Out-of-line to avoid a taken Jcc after VM-Exit. */
-2: mov $1, %eax
- jmp 1b
+.Lfixup:
+ cmpb $0, kvm_rebooting
+ jne .Lvmfail
+ ud2
+.Lvmfail:
+ /* VM-Fail: set return value to 1 */
+ mov $1, %_ASM_BX
+ jmp .Lclear_regs
+
SYM_FUNC_END(__vmx_vcpu_run)
#define L1D_CACHE_ORDER 4
static void *vmx_l1d_flush_pages;
+/* Control for disabling CPU Fill buffer clear */
+static bool __read_mostly vmx_fb_clear_ctrl_available;
+
static int vmx_setup_l1d_flush(enum vmx_l1d_flush_state l1tf)
{
struct page *page;
return sprintf(s, "%s\n", vmentry_l1d_param[l1tf_vmx_mitigation].option);
}
+static void vmx_setup_fb_clear_ctrl(void)
+{
+ u64 msr;
+
+ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES) &&
+ !boot_cpu_has_bug(X86_BUG_MDS) &&
+ !boot_cpu_has_bug(X86_BUG_TAA)) {
+ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, msr);
+ if (msr & ARCH_CAP_FB_CLEAR_CTRL)
+ vmx_fb_clear_ctrl_available = true;
+ }
+}
+
+static __always_inline void vmx_disable_fb_clear(struct vcpu_vmx *vmx)
+{
+ u64 msr;
+
+ if (!vmx->disable_fb_clear)
+ return;
+
+ msr = __rdmsr(MSR_IA32_MCU_OPT_CTRL);
+ msr |= FB_CLEAR_DIS;
+ native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, msr);
+ /* Cache the MSR value to avoid reading it later */
+ vmx->msr_ia32_mcu_opt_ctrl = msr;
+}
+
+static __always_inline void vmx_enable_fb_clear(struct vcpu_vmx *vmx)
+{
+ if (!vmx->disable_fb_clear)
+ return;
+
+ vmx->msr_ia32_mcu_opt_ctrl &= ~FB_CLEAR_DIS;
+ native_wrmsrl(MSR_IA32_MCU_OPT_CTRL, vmx->msr_ia32_mcu_opt_ctrl);
+}
+
+static void vmx_update_fb_clear_dis(struct kvm_vcpu *vcpu, struct vcpu_vmx *vmx)
+{
+ vmx->disable_fb_clear = vmx_fb_clear_ctrl_available;
+
+ /*
+ * If guest will not execute VERW, there is no need to set FB_CLEAR_DIS
+ * at VMEntry. Skip the MSR read/write when a guest has no use case to
+ * execute VERW.
+ */
+ if ((vcpu->arch.arch_capabilities & ARCH_CAP_FB_CLEAR) ||
+ ((vcpu->arch.arch_capabilities & ARCH_CAP_MDS_NO) &&
+ (vcpu->arch.arch_capabilities & ARCH_CAP_TAA_NO) &&
+ (vcpu->arch.arch_capabilities & ARCH_CAP_PSDP_NO) &&
+ (vcpu->arch.arch_capabilities & ARCH_CAP_FBSDP_NO) &&
+ (vcpu->arch.arch_capabilities & ARCH_CAP_SBDR_SSDP_NO)))
+ vmx->disable_fb_clear = false;
+}
+
static const struct kernel_param_ops vmentry_l1d_flush_ops = {
.set = vmentry_l1d_flush_set,
.get = vmentry_l1d_flush_get,
MSR_IA32_SPEC_CTRL);
}
+unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx)
+{
+ unsigned int flags = 0;
+
+ if (vmx->loaded_vmcs->launched)
+ flags |= VMX_RUN_VMRESUME;
+
+ /*
+ * If writes to the SPEC_CTRL MSR aren't intercepted, the guest is free
+ * to change it directly without causing a vmexit. In that case read
+ * it after vmexit and store it in vmx->spec_ctrl.
+ */
+ if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
+ flags |= VMX_RUN_SAVE_SPEC_CTRL;
+
+ return flags;
+}
+
static void clear_atomic_switch_msr_special(struct vcpu_vmx *vmx,
unsigned long entry, unsigned long exit)
{
ret = kvm_set_msr_common(vcpu, msr_info);
}
+ /* FB_CLEAR may have changed, also update the FB_CLEAR_DIS behavior */
+ if (msr_index == MSR_IA32_ARCH_CAPABILITIES)
+ vmx_update_fb_clear_dis(vcpu, vmx);
+
return ret;
}
kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
vpid_sync_context(vmx->vpid);
+
+ vmx_update_fb_clear_dis(vcpu, vmx);
}
static void vmx_enable_irq_window(struct kvm_vcpu *vcpu)
return;
handle_interrupt_nmi_irqoff(vcpu, gate_offset(desc));
+ vcpu->arch.at_instruction_boundary = true;
}
static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
}
}
+void noinstr vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx,
+ unsigned int flags)
+{
+ u64 hostval = this_cpu_read(x86_spec_ctrl_current);
+
+ if (!cpu_feature_enabled(X86_FEATURE_MSR_SPEC_CTRL))
+ return;
+
+ if (flags & VMX_RUN_SAVE_SPEC_CTRL)
+ vmx->spec_ctrl = __rdmsr(MSR_IA32_SPEC_CTRL);
+
+ /*
+ * If the guest/host SPEC_CTRL values differ, restore the host value.
+ *
+ * For legacy IBRS, the IBRS bit always needs to be written after
+ * transitioning from a less privileged predictor mode, regardless of
+ * whether the guest/host values differ.
+ */
+ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) ||
+ vmx->spec_ctrl != hostval)
+ native_wrmsrl(MSR_IA32_SPEC_CTRL, hostval);
+
+ barrier_nospec();
+}
+
static fastpath_t vmx_exit_handlers_fastpath(struct kvm_vcpu *vcpu)
{
switch (to_vmx(vcpu)->exit_reason.basic) {
}
static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu,
- struct vcpu_vmx *vmx)
+ struct vcpu_vmx *vmx,
+ unsigned long flags)
{
guest_state_enter_irqoff();
vmx_l1d_flush(vcpu);
else if (static_branch_unlikely(&mds_user_clear))
mds_clear_cpu_buffers();
+ else if (static_branch_unlikely(&mmio_stale_data_clear) &&
+ kvm_arch_has_assigned_device(vcpu->kvm))
+ mds_clear_cpu_buffers();
+
+ vmx_disable_fb_clear(vmx);
if (vcpu->arch.cr2 != native_read_cr2())
native_write_cr2(vcpu->arch.cr2);
vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
- vmx->loaded_vmcs->launched);
+ flags);
vcpu->arch.cr2 = native_read_cr2();
+ vmx_enable_fb_clear(vmx);
+
guest_state_exit_irqoff();
}
kvm_wait_lapic_expire(vcpu);
- /*
- * If this vCPU has touched SPEC_CTRL, restore the guest's value if
- * it's non-zero. Since vmentry is serialising on affected CPUs, there
- * is no need to worry about the conditional branch over the wrmsr
- * being speculatively taken.
- */
- x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0);
-
/* The actual VMENTER/EXIT is in the .noinstr.text section. */
- vmx_vcpu_enter_exit(vcpu, vmx);
-
- /*
- * We do not use IBRS in the kernel. If this vCPU has used the
- * SPEC_CTRL MSR it may have left it on; save the value and
- * turn it off. This is much more efficient than blindly adding
- * it to the atomic save/restore list. Especially as the former
- * (Saving guest MSRs on vmexit) doesn't even exist in KVM.
- *
- * For non-nested case:
- * If the L01 MSR bitmap does not intercept the MSR, then we need to
- * save it.
- *
- * For nested case:
- * If the L02 MSR bitmap does not intercept the MSR, then we need to
- * save it.
- */
- if (unlikely(!msr_write_intercepted(vmx, MSR_IA32_SPEC_CTRL)))
- vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL);
-
- x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0);
+ vmx_vcpu_enter_exit(vcpu, vmx, __vmx_vcpu_run_flags(vmx));
/* All fields are clean at this point */
if (static_branch_unlikely(&enable_evmcs)) {
ulong supported = BIT(APICV_INHIBIT_REASON_DISABLE) |
BIT(APICV_INHIBIT_REASON_ABSENT) |
BIT(APICV_INHIBIT_REASON_HYPERV) |
- BIT(APICV_INHIBIT_REASON_BLOCKIRQ);
+ BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |
+ BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) |
+ BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED);
return supported & BIT(reason);
}
return r;
}
+ vmx_setup_fb_clear_ctrl();
+
for_each_possible_cpu(cpu) {
INIT_LIST_HEAD(&per_cpu(loaded_vmcss_on_cpu, cpu));
#include <asm/intel_pt.h>
#include "capabilities.h"
-#include "kvm_cache_regs.h"
+#include "../kvm_cache_regs.h"
#include "posted_intr.h"
#include "vmcs.h"
#include "vmx_ops.h"
-#include "cpuid.h"
+#include "../cpuid.h"
+#include "run_flags.h"
#define MSR_TYPE_R 1
#define MSR_TYPE_W 2
u64 msr_ia32_feature_control_valid_bits;
/* SGX Launch Control public key hash */
u64 msr_ia32_sgxlepubkeyhash[4];
+ u64 msr_ia32_mcu_opt_ctrl;
+ bool disable_fb_clear;
struct pt_desc pt_desc;
struct lbr_desc lbr_desc;
struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr);
void pt_update_intercept_for_msr(struct kvm_vcpu *vcpu);
void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp);
-bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool launched);
+void vmx_spec_ctrl_restore_host(struct vcpu_vmx *vmx, unsigned int flags);
+unsigned int __vmx_vcpu_run_flags(struct vcpu_vmx *vmx);
+bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs,
+ unsigned int flags);
int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr);
void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
#include "evmcs.h"
#include "vmcs.h"
-#include "x86.h"
+#include "../x86.h"
asmlinkage void vmread_error(unsigned long field, bool fault);
__attribute__((regparm(0))) void vmread_error_trampoline(unsigned long field,
STATS_DESC_COUNTER(VCPU, nested_run),
STATS_DESC_COUNTER(VCPU, directed_yield_attempted),
STATS_DESC_COUNTER(VCPU, directed_yield_successful),
- STATS_DESC_ICOUNTER(VCPU, guest_mode)
+ STATS_DESC_COUNTER(VCPU, preemption_reported),
+ STATS_DESC_COUNTER(VCPU, preemption_other),
+ STATS_DESC_IBOOLEAN(VCPU, guest_mode)
};
const struct kvm_stats_header kvm_vcpu_stats_header = {
*/
}
+ /* Guests don't need to know "Fill buffer clear control" exists */
+ data &= ~ARCH_CAP_FB_CLEAR_CTRL;
+
return data;
}
struct kvm_memslots *slots;
static const u8 preempted = KVM_VCPU_PREEMPTED;
+ /*
+ * The vCPU can be marked preempted if and only if the VM-Exit was on
+ * an instruction boundary and will not trigger guest emulation of any
+ * kind (see vcpu_run). Vendor specific code controls (conservatively)
+ * when this is true, for example allowing the vCPU to be marked
+ * preempted if and only if the VM-Exit was due to a host interrupt.
+ */
+ if (!vcpu->arch.at_instruction_boundary) {
+ vcpu->stat.preemption_other++;
+ return;
+ }
+
+ vcpu->stat.preemption_reported++;
if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
return;
{
int idx;
- if (vcpu->preempted && !vcpu->arch.guest_state_protected)
- vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
+ if (vcpu->preempted) {
+ if (!vcpu->arch.guest_state_protected)
+ vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
- /*
- * Take the srcu lock as memslots will be accessed to check the gfn
- * cache generation against the memslots generation.
- */
- idx = srcu_read_lock(&vcpu->kvm->srcu);
- if (kvm_xen_msr_enabled(vcpu->kvm))
- kvm_xen_runstate_set_preempted(vcpu);
- else
- kvm_steal_time_set_preempted(vcpu);
- srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ /*
+ * Take the srcu lock as memslots will be accessed to check the gfn
+ * cache generation against the memslots generation.
+ */
+ idx = srcu_read_lock(&vcpu->kvm->srcu);
+ if (kvm_xen_msr_enabled(vcpu->kvm))
+ kvm_xen_runstate_set_preempted(vcpu);
+ else
+ kvm_steal_time_set_preempted(vcpu);
+ srcu_read_unlock(&vcpu->kvm->srcu, idx);
+ }
static_call(kvm_x86_vcpu_put)(vcpu);
vcpu->arch.last_host_tsc = rdtsc();
r = 0;
break;
case KVM_CAP_X86_USER_SPACE_MSR:
+ r = -EINVAL;
+ if (cap->args[0] & ~(KVM_MSR_EXIT_REASON_INVAL |
+ KVM_MSR_EXIT_REASON_UNKNOWN |
+ KVM_MSR_EXIT_REASON_FILTER))
+ break;
kvm->arch.user_space_msr_mask = cap->args[0];
r = 0;
break;
if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
return -EFAULT;
+ if (filter.flags & ~KVM_MSR_FILTER_DEFAULT_DENY)
+ return -EINVAL;
+
for (i = 0; i < ARRAY_SIZE(filter.ranges); i++)
empty &= !filter.ranges[i].nmsrs;
*/
static void kvm_pv_kick_cpu_op(struct kvm *kvm, int apicid)
{
- struct kvm_lapic_irq lapic_irq;
-
- lapic_irq.shorthand = APIC_DEST_NOSHORT;
- lapic_irq.dest_mode = APIC_DEST_PHYSICAL;
- lapic_irq.level = 0;
- lapic_irq.dest_id = apicid;
- lapic_irq.msi_redir_hint = false;
+ /*
+ * All other fields are unused for APIC_DM_REMRD, but may be consumed by
+ * common code, e.g. for tracing. Defer initialization to the compiler.
+ */
+ struct kvm_lapic_irq lapic_irq = {
+ .delivery_mode = APIC_DM_REMRD,
+ .dest_mode = APIC_DEST_PHYSICAL,
+ .shorthand = APIC_DEST_NOSHORT,
+ .dest_id = apicid,
+ };
- lapic_irq.delivery_mode = APIC_DM_REMRD;
kvm_irq_delivery_to_apic(kvm, NULL, &lapic_irq, NULL);
}
return;
down_read(&vcpu->kvm->arch.apicv_update_lock);
+ preempt_disable();
activate = kvm_vcpu_apicv_activated(vcpu);
kvm_make_request(KVM_REQ_EVENT, vcpu);
out:
+ preempt_enable();
up_read(&vcpu->kvm->arch.apicv_update_lock);
}
EXPORT_SYMBOL_GPL(kvm_vcpu_update_apicv);
vcpu->arch.l1tf_flush_l1d = true;
for (;;) {
+ /*
+ * If another guest vCPU requests a PV TLB flush in the middle
+ * of instruction emulation, the rest of the emulation could
+ * use a stale page translation. Assume that any code after
+ * this point can start executing an instruction.
+ */
+ vcpu->arch.at_instruction_boundary = false;
if (kvm_vcpu_running(vcpu)) {
r = vcpu_enter_guest(vcpu);
} else {
}
EXPORT_SYMBOL_GPL(kvm_arch_end_assignment);
-bool kvm_arch_has_assigned_device(struct kvm *kvm)
+bool noinstr kvm_arch_has_assigned_device(struct kvm *kvm)
{
- return atomic_read(&kvm->arch.assigned_device_count);
+ return arch_atomic_read(&kvm->arch.assigned_device_count);
}
EXPORT_SYMBOL_GPL(kvm_arch_has_assigned_device);
* behalf of the vCPU. Only if the VMM does actually block
* does it need to enter RUNSTATE_blocked.
*/
- if (vcpu->preempted)
- kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable);
+ if (WARN_ON_ONCE(!vcpu->preempted))
+ return;
+
+ kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable);
}
/* 32-bit compatibility definitions, also used natively in 32-bit build */
/* FSRM implies ERMS => no length checks, do the copy directly */
.Lmemmove_begin_forward:
ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM
- ALTERNATIVE "", __stringify(movq %rdx, %rcx; rep movsb; RET), X86_FEATURE_ERMS
+ ALTERNATIVE "", "jmp .Lmemmove_erms", X86_FEATURE_ERMS
/*
* movsq instruction have many startup latency
movb %r11b, (%rdi)
13:
RET
+
+.Lmemmove_erms:
+ movq %rdx, %rcx
+ rep movsb
+ RET
SYM_FUNC_END(__memmove)
EXPORT_SYMBOL(__memmove)
UNWIND_HINT_EMPTY
ANNOTATE_NOENDBR
- ALTERNATIVE_2 __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), \
- __stringify(RETPOLINE \reg), X86_FEATURE_RETPOLINE, \
- __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE
+ ALTERNATIVE_2 __stringify(RETPOLINE \reg), \
+ __stringify(lfence; ANNOTATE_RETPOLINE_SAFE; jmp *%\reg; int3), X86_FEATURE_RETPOLINE_LFENCE, \
+ __stringify(ANNOTATE_RETPOLINE_SAFE; jmp *%\reg), ALT_NOT(X86_FEATURE_RETPOLINE)
.endm
#define GEN(reg) EXPORT_THUNK(reg)
#include <asm/GEN-for-each-reg.h>
#undef GEN
+
+/*
+ * This function name is magical and is used by -mfunction-return=thunk-extern
+ * for the compiler to generate JMPs to it.
+ */
+#ifdef CONFIG_RETHUNK
+
+ .section .text.__x86.return_thunk
+
+/*
+ * Safety details here pertain to the AMD Zen{1,2} microarchitecture:
+ * 1) The RET at __x86_return_thunk must be on a 64 byte boundary, for
+ * alignment within the BTB.
+ * 2) The instruction at zen_untrain_ret must contain, and not
+ * end with, the 0xc3 byte of the RET.
+ * 3) STIBP must be enabled, or SMT disabled, to prevent the sibling thread
+ * from re-poisioning the BTB prediction.
+ */
+ .align 64
+ .skip 63, 0xcc
+SYM_FUNC_START_NOALIGN(zen_untrain_ret);
+
+ /*
+ * As executed from zen_untrain_ret, this is:
+ *
+ * TEST $0xcc, %bl
+ * LFENCE
+ * JMP __x86_return_thunk
+ *
+ * Executing the TEST instruction has a side effect of evicting any BTB
+ * prediction (potentially attacker controlled) attached to the RET, as
+ * __x86_return_thunk + 1 isn't an instruction boundary at the moment.
+ */
+ .byte 0xf6
+
+ /*
+ * As executed from __x86_return_thunk, this is a plain RET.
+ *
+ * As part of the TEST above, RET is the ModRM byte, and INT3 the imm8.
+ *
+ * We subsequently jump backwards and architecturally execute the RET.
+ * This creates a correct BTB prediction (type=ret), but in the
+ * meantime we suffer Straight Line Speculation (because the type was
+ * no branch) which is halted by the INT3.
+ *
+ * With SMT enabled and STIBP active, a sibling thread cannot poison
+ * RET's prediction to a type of its choice, but can evict the
+ * prediction due to competitive sharing. If the prediction is
+ * evicted, __x86_return_thunk will suffer Straight Line Speculation
+ * which will be contained safely by the INT3.
+ */
+SYM_INNER_LABEL(__x86_return_thunk, SYM_L_GLOBAL)
+ ret
+ int3
+SYM_CODE_END(__x86_return_thunk)
+
+ /*
+ * Ensure the TEST decoding / BTB invalidation is complete.
+ */
+ lfence
+
+ /*
+ * Jump back and execute the RET in the middle of the TEST instruction.
+ * INT3 is for SLS protection.
+ */
+ jmp __x86_return_thunk
+ int3
+SYM_FUNC_END(zen_untrain_ret)
+__EXPORT_THUNK(zen_untrain_ret)
+
+EXPORT_SYMBOL(__x86_return_thunk)
+
+#endif /* CONFIG_RETHUNK */
[__pte2cm_idx(_PAGE_PWT | _PAGE_PCD | _PAGE_PAT)] = _PAGE_CACHE_MODE_UC,
};
-/* Check that the write-protect PAT entry is set for write-protect */
+/*
+ * Check that the write-protect PAT entry is set for write-protect.
+ * To do this without making assumptions how PAT has been set up (Xen has
+ * another layout than the kernel), translate the _PAGE_CACHE_MODE_WP cache
+ * mode via the __cachemode2pte_tbl[] into protection bits (those protection
+ * bits will select a cache mode of WP or better), and then translate the
+ * protection bits back into the cache mode using __pte2cm_idx() and the
+ * __pte2cachemode_tbl[] array. This will return the really used cache mode.
+ */
bool x86_has_pat_wp(void)
{
- return __pte2cachemode_tbl[_PAGE_CACHE_MODE_WP] == _PAGE_CACHE_MODE_WP;
+ uint16_t prot = __cachemode2pte_tbl[_PAGE_CACHE_MODE_WP];
+
+ return __pte2cachemode_tbl[__pte2cm_idx(prot)] == _PAGE_CACHE_MODE_WP;
}
enum page_cache_mode pgprot2cachemode(pgprot_t pgprot)
#include <linux/swiotlb.h>
#include <linux/cc_platform.h>
#include <linux/mem_encrypt.h>
-#include <linux/virtio_config.h>
/* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */
bool force_dma_unencrypted(struct device *dev)
print_mem_encrypt_feature_info();
}
-
-int arch_has_restricted_virtio_memory_access(void)
-{
- return cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT);
-}
-EXPORT_SYMBOL_GPL(arch_has_restricted_virtio_memory_access);
#include <linux/dma-mapping.h>
#include <linux/virtio_config.h>
#include <linux/cc_platform.h>
+#include <linux/platform-feature.h>
#include <asm/tlbflush.h>
#include <asm/fixmap.h>
size = total_mem * 6 / 100;
size = clamp_val(size, IO_TLB_DEFAULT_SIZE, SZ_1G);
swiotlb_adjust_size(size);
+
+ /* Set restricted memory access for virtio. */
+ platform_set(PLATFORM_VIRTIO_RESTRICTED_MEM_ACCESS);
}
static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot)
movq %rbp, %rsp /* Restore original stack pointer */
pop %rbp
- RET
+ /* Offset to __x86_return_thunk would be wrong here */
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
SYM_FUNC_END(sme_encrypt_execute)
SYM_FUNC_START(__enc_copy)
pop %r12
pop %r15
- RET
+ /* Offset to __x86_return_thunk would be wrong here */
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
.L__enc_copy_end:
SYM_FUNC_END(__enc_copy)
{
u8 *prog = *pprog;
-#ifdef CONFIG_RETPOLINE
if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) {
EMIT_LFENCE();
EMIT2(0xFF, 0xE0 + reg);
} else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) {
OPTIMIZER_HIDE_VAR(reg);
emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip);
- } else
-#endif
- EMIT2(0xFF, 0xE0 + reg);
+ } else {
+ EMIT2(0xFF, 0xE0 + reg);
+ }
+
+ *pprog = prog;
+}
+
+static void emit_return(u8 **pprog, u8 *ip)
+{
+ u8 *prog = *pprog;
+
+ if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) {
+ emit_jump(&prog, &__x86_return_thunk, ip);
+ } else {
+ EMIT1(0xC3); /* ret */
+ if (IS_ENABLED(CONFIG_SLS))
+ EMIT1(0xCC); /* int3 */
+ }
*pprog = prog;
}
case BPF_JMP | BPF_CALL:
func = (u8 *) __bpf_call_base + imm32;
if (tail_call_reachable) {
+ /* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
EMIT3_off32(0x48, 0x8B, 0x85,
- -(bpf_prog->aux->stack_depth + 8));
+ -round_up(bpf_prog->aux->stack_depth, 8) - 8);
if (!imm32 || emit_call(&prog, func, image + addrs[i - 1] + 7))
return -EINVAL;
} else {
ctx->cleanup_addr = proglen;
pop_callee_regs(&prog, callee_regs_used);
EMIT1(0xC9); /* leave */
- EMIT1(0xC3); /* ret */
+ emit_return(&prog, image + addrs[i - 1] + (prog - temp));
break;
default:
if (flags & BPF_TRAMP_F_SKIP_FRAME)
/* skip our return address and return to parent */
EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */
- EMIT1(0xC3); /* ret */
+ emit_return(&prog, prog);
/* Make sure the trampoline generation logic doesn't overflow */
if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) {
ret = -EFAULT;
#include <linux/pci-acpi.h>
#include <asm/numa.h>
#include <asm/pci_x86.h>
-#include <asm/e820/api.h>
struct pci_root_info {
struct acpi_pci_root_info common;
#endif
};
-static bool pci_use_e820 = true;
+bool pci_use_e820 = true;
static bool pci_use_crs = true;
static bool pci_ignore_seg;
status = acpi_pci_probe_root_resources(ci);
- if (pci_use_e820) {
- resource_list_for_each_entry(entry, &ci->resources)
- remove_e820_regions(&device->dev, entry->res);
- }
-
if (pci_use_crs) {
resource_list_for_each_entry_safe(entry, tmp, &ci->resources)
if (resource_is_pcicfg_ioport(entry->res))
#include <linux/objtool.h>
#include <asm/page_types.h>
#include <asm/segment.h>
+#include <asm/nospec-branch.h>
.text
.code64
1: movq 0x20(%rsp), %rsp
pop %rbx
pop %rbp
- RET
+ ANNOTATE_UNRET_SAFE
+ ret
+ int3
.code32
2: pushl $__KERNEL_CS
if (xen_pv_domain())
return;
+ xen_set_restricted_virtio_memory_access();
+
init_hvm_pv_info();
reserve_shared_info();
static void __init xen_pv_init_platform(void)
{
+ xen_set_restricted_virtio_memory_access();
+
populate_extra_pte(fix_to_virt(FIX_PARAVIRT_BOOTMAP));
set_fixmap(FIX_PARAVIRT_BOOTMAP, xen_start_info->shared_info);
extern void early_xen_iret_patch(void);
/* First C function to be called on Xen boot */
-asmlinkage __visible void __init xen_start_kernel(void)
+asmlinkage __visible void __init xen_start_kernel(struct start_info *si)
{
struct physdev_set_iopl set_iopl;
unsigned long initrd_start = 0;
int rc;
- if (!xen_start_info)
+ if (!si)
return;
+ clear_bss();
+
+ xen_start_info = si;
+
__text_gen_insn(&early_xen_iret_patch,
JMP32_INSN_OPCODE, &early_xen_iret_patch, &xen_iret,
JMP32_INSN_SIZE);
if (!boot_cpu_has(sysenter_feature))
return;
- ret = register_callback(CALLBACKTYPE_sysenter, xen_sysenter_target);
+ ret = register_callback(CALLBACKTYPE_sysenter, xen_entry_SYSENTER_compat);
if(ret != 0)
setup_clear_cpu_cap(sysenter_feature);
}
{
int ret;
- ret = register_callback(CALLBACKTYPE_syscall, xen_syscall_target);
+ ret = register_callback(CALLBACKTYPE_syscall, xen_entry_SYSCALL_64);
if (ret != 0) {
printk(KERN_ERR "Failed to set syscall callback: %d\n", ret);
/* Pretty fatal; 64-bit userspace has no other
if (boot_cpu_has(X86_FEATURE_SYSCALL32)) {
ret = register_callback(CALLBACKTYPE_syscall32,
- xen_syscall32_target);
+ xen_entry_SYSCALL_compat);
if (ret != 0)
setup_clear_cpu_cap(X86_FEATURE_SYSCALL32);
}
.macro xen_pv_trap name
SYM_CODE_START(xen_\name)
- UNWIND_HINT_EMPTY
+ UNWIND_HINT_ENTRY
ENDBR
pop %rcx
pop %r11
*/
/* Normal 64-bit system call target */
-SYM_CODE_START(xen_syscall_target)
- UNWIND_HINT_EMPTY
+SYM_CODE_START(xen_entry_SYSCALL_64)
+ UNWIND_HINT_ENTRY
ENDBR
popq %rcx
popq %r11
movq $__USER_CS, 1*8(%rsp)
jmp entry_SYSCALL_64_after_hwframe
-SYM_CODE_END(xen_syscall_target)
+SYM_CODE_END(xen_entry_SYSCALL_64)
#ifdef CONFIG_IA32_EMULATION
/* 32-bit compat syscall target */
-SYM_CODE_START(xen_syscall32_target)
- UNWIND_HINT_EMPTY
+SYM_CODE_START(xen_entry_SYSCALL_compat)
+ UNWIND_HINT_ENTRY
ENDBR
popq %rcx
popq %r11
movq $__USER32_CS, 1*8(%rsp)
jmp entry_SYSCALL_compat_after_hwframe
-SYM_CODE_END(xen_syscall32_target)
+SYM_CODE_END(xen_entry_SYSCALL_compat)
/* 32-bit compat sysenter target */
-SYM_CODE_START(xen_sysenter_target)
- UNWIND_HINT_EMPTY
+SYM_CODE_START(xen_entry_SYSENTER_compat)
+ UNWIND_HINT_ENTRY
ENDBR
/*
* NB: Xen is polite and clears TF from EFLAGS for us. This means
movq $__USER32_CS, 1*8(%rsp)
jmp entry_SYSENTER_compat_after_hwframe
-SYM_CODE_END(xen_sysenter_target)
+SYM_CODE_END(xen_entry_SYSENTER_compat)
#else /* !CONFIG_IA32_EMULATION */
-SYM_CODE_START(xen_syscall32_target)
-SYM_CODE_START(xen_sysenter_target)
- UNWIND_HINT_EMPTY
+SYM_CODE_START(xen_entry_SYSCALL_compat)
+SYM_CODE_START(xen_entry_SYSENTER_compat)
+ UNWIND_HINT_ENTRY
ENDBR
lea 16(%rsp), %rsp /* strip %rcx, %r11 */
mov $-ENOSYS, %rax
pushq $0
jmp hypercall_iret
-SYM_CODE_END(xen_sysenter_target)
-SYM_CODE_END(xen_syscall32_target)
+SYM_CODE_END(xen_entry_SYSENTER_compat)
+SYM_CODE_END(xen_entry_SYSCALL_compat)
#endif /* CONFIG_IA32_EMULATION */
.rept (PAGE_SIZE / 32)
UNWIND_HINT_FUNC
ANNOTATE_NOENDBR
+ ANNOTATE_UNRET_SAFE
ret
/*
* Xen will write the hypercall page, and sort out ENDBR.
ANNOTATE_NOENDBR
cld
- /* Clear .bss */
- xor %eax,%eax
- mov $__bss_start, %rdi
- mov $__bss_stop, %rcx
- sub %rdi, %rcx
- shr $3, %rcx
- rep stosq
-
- mov %rsi, xen_start_info
mov initial_stack(%rip), %rsp
/* Set up %gs.
cdq
wrmsr
+ mov %rsi, %rdi
call xen_start_kernel
SYM_CODE_END(startup_xen)
__FINIT
/* These are code, but not functions. Defined in entry.S */
extern const char xen_failsafe_callback[];
-void xen_sysenter_target(void);
+void xen_entry_SYSENTER_compat(void);
#ifdef CONFIG_X86_64
-void xen_syscall_target(void);
-void xen_syscall32_target(void);
+void xen_entry_SYSCALL_64(void);
+void xen_entry_SYSCALL_compat(void);
#endif
extern void *xen_initial_gdt;
#ifdef CONFIG_HIBERNATION
- .bss
+ .section .bss, "aw"
.align 4
.Lsaved_regs:
#if defined(__XTENSA_WINDOWED_ABI__)
cpu = of_find_compatible_node(NULL, NULL, "cdns,xtensa-cpu");
if (cpu) {
clk = of_clk_get(cpu, 0);
+ of_node_put(cpu);
if (!IS_ERR(clk)) {
ccount_freq = clk_get_rate(clk);
return;
if ((eth = of_find_compatible_node(eth, NULL, "opencores,ethoc")))
update_local_mac(eth);
+ of_node_put(eth);
return 0;
}
arch_initcall(machine_setup);
spin_unlock_irq(&bfqd->lock);
#endif
+ blk_stat_disable_accounting(bfqd->queue);
wbt_enable_default(bfqd->queue);
kfree(bfqd);
bfq_init_root_group(bfqd->root_group, bfqd);
bfq_init_entity(&bfqd->oom_bfqq.entity, bfqd->root_group);
+ /* We dispatch from request queue wide instead of hw queue */
+ blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
+
wbt_disable_default(q);
+ blk_stat_enable_accounting(q);
+
return 0;
out_free:
}
EXPORT_SYMBOL(bioset_init);
-/*
- * Initialize and setup a new bio_set, based on the settings from
- * another bio_set.
- */
-int bioset_init_from_src(struct bio_set *bs, struct bio_set *src)
-{
- int flags;
-
- flags = 0;
- if (src->bvec_pool.min_nr)
- flags |= BIOSET_NEED_BVECS;
- if (src->rescue_workqueue)
- flags |= BIOSET_NEED_RESCUER;
- if (src->cache)
- flags |= BIOSET_PERCPU_CACHE;
-
- return bioset_init(bs, src->bio_pool.min_nr, src->front_pad, flags);
-}
-EXPORT_SYMBOL(bioset_init_from_src);
-
static int __init init_bio(void)
{
int i;
blk_mq_exit_queue(q);
}
- /*
- * In theory, request pool of sched_tags belongs to request queue.
- * However, the current implementation requires tag_set for freeing
- * requests, so free the pool now.
- *
- * Queue has become frozen, there can't be any in-queue requests, so
- * it is safe to free requests now.
- */
- mutex_lock(&q->sysfs_lock);
- if (q->elevator)
- blk_mq_sched_free_rqs(q);
- mutex_unlock(&q->sysfs_lock);
-
/* @q is and will stay empty, shutdown and put */
blk_put_queue(q);
}
}
for (i = 0; i < iars->nr_ia_ranges; i++) {
- iars->ia_range[i].queue = q;
ret = kobject_init_and_add(&iars->ia_range[i].kobj,
&blk_ia_range_ktype, &iars->kobj,
"%d", i);
/* there isn't chance to merge the splitted bio */
split->bi_opf |= REQ_NOMERGE;
+ blkcg_bio_issue_init(split);
bio_chain(split, *bio);
trace_block_split(split, (*bio)->bi_iter.bi_sector);
submit_bio_noacct(*bio);
}
}
-void blk_mq_debugfs_unregister(struct request_queue *q)
-{
- q->sched_debugfs_dir = NULL;
-}
-
static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx,
struct blk_mq_ctx *ctx)
{
void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx)
{
+ if (!hctx->queue->debugfs_dir)
+ return;
debugfs_remove_recursive(hctx->debugfs_dir);
hctx->sched_debugfs_dir = NULL;
hctx->debugfs_dir = NULL;
{
struct elevator_type *e = q->elevator->type;
+ lockdep_assert_held(&q->debugfs_mutex);
+
/*
* If the parent directory has not been created yet, return, we will be
* called again later on and the directory/files will be created then.
void blk_mq_debugfs_unregister_sched(struct request_queue *q)
{
+ lockdep_assert_held(&q->debugfs_mutex);
+
debugfs_remove_recursive(q->sched_debugfs_dir);
q->sched_debugfs_dir = NULL;
}
void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
{
+ lockdep_assert_held(&rqos->q->debugfs_mutex);
+
+ if (!rqos->q->debugfs_dir)
+ return;
debugfs_remove_recursive(rqos->debugfs_dir);
rqos->debugfs_dir = NULL;
}
struct request_queue *q = rqos->q;
const char *dir_name = rq_qos_id_to_name(rqos->id);
+ lockdep_assert_held(&q->debugfs_mutex);
+
if (rqos->debugfs_dir || !rqos->ops->debugfs_attrs)
return;
debugfs_create_files(rqos->debugfs_dir, rqos, rqos->ops->debugfs_attrs);
}
-void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q)
-{
- debugfs_remove_recursive(q->rqos_debugfs_dir);
- q->rqos_debugfs_dir = NULL;
-}
-
void blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
struct blk_mq_hw_ctx *hctx)
{
struct elevator_type *e = q->elevator->type;
+ lockdep_assert_held(&q->debugfs_mutex);
+
/*
* If the parent debugfs directory has not been created yet, return;
* We will be called again later on with appropriate parent debugfs
void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx)
{
+ lockdep_assert_held(&hctx->queue->debugfs_mutex);
+
+ if (!hctx->queue->debugfs_dir)
+ return;
debugfs_remove_recursive(hctx->sched_debugfs_dir);
hctx->sched_debugfs_dir = NULL;
}
int blk_mq_debugfs_rq_show(struct seq_file *m, void *v);
void blk_mq_debugfs_register(struct request_queue *q);
-void blk_mq_debugfs_unregister(struct request_queue *q);
void blk_mq_debugfs_register_hctx(struct request_queue *q,
struct blk_mq_hw_ctx *hctx);
void blk_mq_debugfs_unregister_hctx(struct blk_mq_hw_ctx *hctx);
void blk_mq_debugfs_register_rqos(struct rq_qos *rqos);
void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos);
-void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q);
#else
static inline void blk_mq_debugfs_register(struct request_queue *q)
{
}
-static inline void blk_mq_debugfs_unregister(struct request_queue *q)
-{
-}
-
static inline void blk_mq_debugfs_register_hctx(struct request_queue *q,
struct blk_mq_hw_ctx *hctx)
{
static inline void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
{
}
-
-static inline void blk_mq_debugfs_unregister_queue_rqos(struct request_queue *q)
-{
-}
#endif
#ifdef CONFIG_BLK_DEBUG_FS_ZONED
int ret;
if (!e) {
+ blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q);
q->elevator = NULL;
q->nr_requests = q->tag_set->queue_depth;
return 0;
if (ret)
goto err_free_map_and_rqs;
+ mutex_lock(&q->debugfs_mutex);
blk_mq_debugfs_register_sched(q);
+ mutex_unlock(&q->debugfs_mutex);
queue_for_each_hw_ctx(q, hctx, i) {
if (e->ops.init_hctx) {
return ret;
}
}
+ mutex_lock(&q->debugfs_mutex);
blk_mq_debugfs_register_sched_hctx(q, hctx);
+ mutex_unlock(&q->debugfs_mutex);
}
return 0;
unsigned int flags = 0;
queue_for_each_hw_ctx(q, hctx, i) {
+ mutex_lock(&q->debugfs_mutex);
blk_mq_debugfs_unregister_sched_hctx(hctx);
+ mutex_unlock(&q->debugfs_mutex);
+
if (e->type->ops.exit_hctx && hctx->sched_data) {
e->type->ops.exit_hctx(hctx, i);
hctx->sched_data = NULL;
}
flags = hctx->flags;
}
+
+ mutex_lock(&q->debugfs_mutex);
blk_mq_debugfs_unregister_sched(q);
+ mutex_unlock(&q->debugfs_mutex);
+
if (e->type->ops.exit_sched)
e->type->ops.exit_sched(e);
blk_mq_sched_tags_teardown(q, flags);
if (!blk_mq_hw_queue_mapped(data.hctx))
goto out_queue_exit;
cpu = cpumask_first_and(data.hctx->cpumask, cpu_online_mask);
+ if (cpu >= nr_cpu_ids)
+ goto out_queue_exit;
data.ctx = __blk_mq_get_ctx(q, cpu);
if (!q->elevator)
}
EXPORT_SYMBOL(blk_mq_run_hw_queue);
-/*
- * Is the request queue handled by an IO scheduler that does not respect
- * hardware queues when dispatching?
- */
-static bool blk_mq_has_sqsched(struct request_queue *q)
-{
- struct elevator_queue *e = q->elevator;
-
- if (e && e->type->ops.dispatch_request &&
- !(e->type->elevator_features & ELEVATOR_F_MQ_AWARE))
- return true;
- return false;
-}
-
/*
* Return prefered queue to dispatch from (if any) for non-mq aware IO
* scheduler.
unsigned long i;
sq_hctx = NULL;
- if (blk_mq_has_sqsched(q))
+ if (blk_queue_sq_sched(q))
sq_hctx = blk_mq_get_sq_hctx(q);
queue_for_each_hw_ctx(q, hctx, i) {
if (blk_mq_hctx_stopped(hctx))
unsigned long i;
sq_hctx = NULL;
- if (blk_mq_has_sqsched(q))
+ if (blk_queue_sq_sched(q))
sq_hctx = blk_mq_get_sq_hctx(q);
queue_for_each_hw_ctx(q, hctx, i) {
if (blk_mq_hctx_stopped(hctx))
return NULL;
}
- rq_qos_throttle(q, *bio);
-
if (blk_mq_get_hctx_type((*bio)->bi_opf) != rq->mq_hctx->type)
return NULL;
if (op_is_flush(rq->cmd_flags) != op_is_flush((*bio)->bi_opf))
return NULL;
- rq->cmd_flags = (*bio)->bi_opf;
+ /*
+ * If any qos ->throttle() end up blocking, we will have flushed the
+ * plug and hence killed the cached_rq list as well. Pop this entry
+ * before we throttle.
+ */
plug->cached_rq = rq_list_next(rq);
+ rq_qos_throttle(q, *bio);
+
+ rq->cmd_flags = (*bio)->bi_opf;
INIT_LIST_HEAD(&rq->queuelist);
return rq;
}
if (blk_mq_hw_queue_mapped(hctx))
blk_mq_tag_idle(hctx);
- blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx],
- set->queue_depth, flush_rq);
+ if (blk_queue_init_done(q))
+ blk_mq_clear_flush_rq_mapping(set->tags[hctx_idx],
+ set->queue_depth, flush_rq);
if (set->ops->exit_request)
set->ops->exit_request(set, flush_rq, hctx_idx);
if (!qe)
return false;
+ /* q->elevator needs protection from ->sysfs_lock */
+ mutex_lock(&q->sysfs_lock);
+
INIT_LIST_HEAD(&qe->node);
qe->q = q;
qe->type = q->elevator->type;
list_add(&qe->node, head);
- mutex_lock(&q->sysfs_lock);
/*
* After elevator_switch_mq, the previous elevator_queue will be
* released by elevator_release. The reference of the io scheduler
void rq_qos_exit(struct request_queue *q)
{
- blk_mq_debugfs_unregister_queue_rqos(q);
-
while (q->rq_qos) {
struct rq_qos *rqos = q->rq_qos;
q->rq_qos = rqos->next;
blk_mq_unfreeze_queue(q);
- if (rqos->ops->debugfs_attrs)
+ if (rqos->ops->debugfs_attrs) {
+ mutex_lock(&q->debugfs_mutex);
blk_mq_debugfs_register_rqos(rqos);
+ mutex_unlock(&q->debugfs_mutex);
+ }
}
static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
blk_mq_unfreeze_queue(q);
+ mutex_lock(&q->debugfs_mutex);
blk_mq_debugfs_unregister_rqos(rqos);
+ mutex_unlock(&q->debugfs_mutex);
}
typedef bool (acquire_inflight_cb_t)(struct rq_wait *rqw, void *private_data);
if (queue_is_mq(q))
blk_mq_release(q);
- blk_trace_shutdown(q);
- mutex_lock(&q->debugfs_mutex);
- debugfs_remove_recursive(q->debugfs_dir);
- mutex_unlock(&q->debugfs_mutex);
-
- if (queue_is_mq(q))
- blk_mq_debugfs_unregister(q);
-
bioset_exit(&q->bio_split);
if (blk_queue_has_srcu(q))
goto unlock;
}
+ if (queue_is_mq(q))
+ __blk_mq_register_dev(dev, q);
+ mutex_lock(&q->sysfs_lock);
+
mutex_lock(&q->debugfs_mutex);
q->debugfs_dir = debugfs_create_dir(kobject_name(q->kobj.parent),
blk_debugfs_root);
- mutex_unlock(&q->debugfs_mutex);
-
- if (queue_is_mq(q)) {
- __blk_mq_register_dev(dev, q);
+ if (queue_is_mq(q))
blk_mq_debugfs_register(q);
- }
-
- mutex_lock(&q->sysfs_lock);
+ mutex_unlock(&q->debugfs_mutex);
ret = disk_register_independent_access_ranges(disk, NULL);
if (ret)
/* Now that we've deleted all child objects, we can delete the queue. */
kobject_uevent(&q->kobj, KOBJ_REMOVE);
kobject_del(&q->kobj);
-
mutex_unlock(&q->sysfs_dir_lock);
+ mutex_lock(&q->debugfs_mutex);
+ blk_trace_shutdown(q);
+ debugfs_remove_recursive(q->debugfs_dir);
+ q->debugfs_dir = NULL;
+ q->sched_debugfs_dir = NULL;
+ q->rqos_debugfs_dir = NULL;
+ mutex_unlock(&q->debugfs_mutex);
+
kobject_put(&disk_to_dev(disk)->kobj);
}
* Prevent new I/O from crossing bio_queue_enter().
*/
blk_queue_start_drain(q);
+ blk_mq_freeze_queue_wait(q);
if (!(disk->flags & GENHD_FL_HIDDEN)) {
sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi");
pm_runtime_set_memalloc_noio(disk_to_dev(disk), false);
device_del(disk_to_dev(disk));
- blk_mq_freeze_queue_wait(q);
-
blk_throtl_cancel_bios(disk->queue);
blk_sync_queue(q);
blk_flush_integrity();
+ blk_mq_cancel_work_sync(q);
+
+ blk_mq_quiesce_queue(q);
+ if (q->elevator) {
+ mutex_lock(&q->sysfs_lock);
+ elevator_exit(q);
+ mutex_unlock(&q->sysfs_lock);
+ }
+ rq_qos_exit(q);
+ blk_mq_unquiesce_queue(q);
+
/*
* Allow using passthrough request again after the queue is torn down.
*/
NULL
};
-static void disk_release_mq(struct request_queue *q)
-{
- blk_mq_cancel_work_sync(q);
-
- /*
- * There can't be any non non-passthrough bios in flight here, but
- * requests stay around longer, including passthrough ones so we
- * still need to freeze the queue here.
- */
- blk_mq_freeze_queue(q);
-
- /*
- * Since the I/O scheduler exit code may access cgroup information,
- * perform I/O scheduler exit before disassociating from the block
- * cgroup controller.
- */
- if (q->elevator) {
- mutex_lock(&q->sysfs_lock);
- elevator_exit(q);
- mutex_unlock(&q->sysfs_lock);
- }
- rq_qos_exit(q);
- __blk_mq_unfreeze_queue(q, true);
-}
-
/**
* disk_release - releases all allocated resources of the gendisk
* @dev: the device representing this disk
might_sleep();
WARN_ON_ONCE(disk_live(disk));
- if (queue_is_mq(disk->queue))
- disk_release_mq(disk->queue);
-
blkcg_exit_queue(disk->queue);
disk_release_events(disk);
WARN_ON_ONCE(!bdev->bd_holder);
- /* FIXME: remove the following once add_disk() handles errors */
- if (WARN_ON(!bdev->bd_holder_dir))
- goto out_unlock;
-
holder = bd_find_holder_disk(bdev, disk);
if (holder) {
holder->refcnt++;
blk_stat_enable_accounting(q);
+ blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q);
+
eq->elevator_data = kqd;
q->elevator = eq;
#endif
.elevator_attrs = kyber_sched_attrs,
.elevator_name = "kyber",
- .elevator_features = ELEVATOR_F_MQ_AWARE,
.elevator_owner = THIS_MODULE,
};
spin_lock_init(&dd->lock);
spin_lock_init(&dd->zone_lock);
+ /* We dispatch from request queue wide instead of hw queue */
+ blk_queue_flag_set(QUEUE_FLAG_SQ_SCHED, q);
+
q->elevator = eq;
return 0;
# SPDX-License-Identifier: GPL-2.0-only
-/blacklist_hashes_checked
+/blacklist_hash_list
/extract-cert
/x509_certificate_list
/x509_revocation_list
bool "Provide system-wide ring of trusted keys"
depends on KEYS
depends on ASYMMETRIC_KEY_TYPE
+ depends on X509_CERTIFICATE_PARSER
help
Provide a system keyring to which trusted keys can be added. Keys in
the keyring are considered to be trusted. Keys may be added at will
# Makefile for the linux kernel signature checking certificates.
#
-obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o common.o
-obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o common.o
+obj-$(CONFIG_SYSTEM_TRUSTED_KEYRING) += system_keyring.o system_certificates.o
+obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist.o
obj-$(CONFIG_SYSTEM_REVOCATION_LIST) += revocation_certificates.o
ifneq ($(CONFIG_SYSTEM_BLACKLIST_HASH_LIST),)
-quiet_cmd_check_blacklist_hashes = CHECK $(patsubst "%",%,$(2))
- cmd_check_blacklist_hashes = $(AWK) -f $(srctree)/scripts/check-blacklist-hashes.awk $(2); touch $@
-$(eval $(call config_filename,SYSTEM_BLACKLIST_HASH_LIST))
+$(obj)/blacklist_hashes.o: $(obj)/blacklist_hash_list
+CFLAGS_blacklist_hashes.o := -I $(obj)
-$(obj)/blacklist_hashes.o: $(obj)/blacklist_hashes_checked
+quiet_cmd_check_and_copy_blacklist_hash_list = GEN $@
+ cmd_check_and_copy_blacklist_hash_list = \
+ $(AWK) -f $(srctree)/scripts/check-blacklist-hashes.awk $(CONFIG_SYSTEM_BLACKLIST_HASH_LIST) >&2; \
+ cat $(CONFIG_SYSTEM_BLACKLIST_HASH_LIST) > $@
-CFLAGS_blacklist_hashes.o += -I$(srctree)
-
-targets += blacklist_hashes_checked
-$(obj)/blacklist_hashes_checked: $(SYSTEM_BLACKLIST_HASH_LIST_SRCPREFIX)$(SYSTEM_BLACKLIST_HASH_LIST_FILENAME) scripts/check-blacklist-hashes.awk FORCE
- $(call if_changed,check_blacklist_hashes,$(SYSTEM_BLACKLIST_HASH_LIST_SRCPREFIX)$(CONFIG_SYSTEM_BLACKLIST_HASH_LIST))
+$(obj)/blacklist_hash_list: $(CONFIG_SYSTEM_BLACKLIST_HASH_LIST) FORCE
+ $(call if_changed,check_and_copy_blacklist_hash_list)
obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist_hashes.o
else
obj-$(CONFIG_SYSTEM_BLACKLIST_KEYRING) += blacklist_nohashes.o
endif
+targets += blacklist_hash_list
quiet_cmd_extract_certs = CERT $@
cmd_extract_certs = $(obj)/extract-cert $(extract-cert-in) $@
$(obj)/x509_certificate_list: $(CONFIG_SYSTEM_TRUSTED_KEYS) $(obj)/extract-cert FORCE
$(call if_changed,extract_certs)
-targets += x509_certificate_list blacklist_hashes_checked
+targets += x509_certificate_list
# If module signing is requested, say by allyesconfig, but a key has not been
# supplied, then one will need to be generated to make sure the build does not
#include <linux/err.h>
#include <linux/seq_file.h>
#include <linux/uidgid.h>
-#include <linux/verification.h>
+#include <keys/asymmetric-type.h>
#include <keys/system_keyring.h>
#include "blacklist.h"
-#include "common.h"
/*
* According to crypto/asymmetric_keys/x509_cert_parser.c:x509_note_pkey_algo(),
if (revocation_certificate_list_size)
pr_notice("Loading compiled-in revocation X.509 certificates\n");
- return load_certificate_list(revocation_certificate_list, revocation_certificate_list_size,
- blacklist_keyring);
+ return x509_load_certificate_list(revocation_certificate_list,
+ revocation_certificate_list_size,
+ blacklist_keyring);
}
late_initcall(load_revocation_certificate_list);
#endif
// SPDX-License-Identifier: GPL-2.0
#include "blacklist.h"
-const char __initdata *const blacklist_hashes[] = {
-#include CONFIG_SYSTEM_BLACKLIST_HASH_LIST
+const char __initconst *const blacklist_hashes[] = {
+#include "blacklist_hash_list"
, NULL
};
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-or-later
-
-#include <linux/kernel.h>
-#include <linux/key.h>
-#include "common.h"
-
-int load_certificate_list(const u8 cert_list[],
- const unsigned long list_size,
- const struct key *keyring)
-{
- key_ref_t key;
- const u8 *p, *end;
- size_t plen;
-
- p = cert_list;
- end = p + list_size;
- while (p < end) {
- /* Each cert begins with an ASN.1 SEQUENCE tag and must be more
- * than 256 bytes in size.
- */
- if (end - p < 4)
- goto dodgy_cert;
- if (p[0] != 0x30 &&
- p[1] != 0x82)
- goto dodgy_cert;
- plen = (p[2] << 8) | p[3];
- plen += 4;
- if (plen > end - p)
- goto dodgy_cert;
-
- key = key_create_or_update(make_key_ref(keyring, 1),
- "asymmetric",
- NULL,
- p,
- plen,
- ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
- KEY_USR_VIEW | KEY_USR_READ),
- KEY_ALLOC_NOT_IN_QUOTA |
- KEY_ALLOC_BUILT_IN |
- KEY_ALLOC_BYPASS_RESTRICTION);
- if (IS_ERR(key)) {
- pr_err("Problem loading in-kernel X.509 certificate (%ld)\n",
- PTR_ERR(key));
- } else {
- pr_notice("Loaded X.509 cert '%s'\n",
- key_ref_to_ptr(key)->description);
- key_ref_put(key);
- }
- p += plen;
- }
-
- return 0;
-
-dodgy_cert:
- pr_err("Problem parsing in-kernel X.509 certificate list\n");
- return 0;
-}
+++ /dev/null
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-
-#ifndef _CERT_COMMON_H
-#define _CERT_COMMON_H
-
-int load_certificate_list(const u8 cert_list[], const unsigned long list_size,
- const struct key *keyring);
-
-#endif
#include <openssl/err.h>
#include <openssl/engine.h>
+/*
+ * OpenSSL 3.0 deprecates the OpenSSL's ENGINE API.
+ *
+ * Remove this if/when that API is no longer used
+ */
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
#define PKEY_ID_PKCS7 2
static __attribute__((noreturn))
#include <keys/asymmetric-type.h>
#include <keys/system_keyring.h>
#include <crypto/pkcs7.h>
-#include "common.h"
static struct key *builtin_trusted_keys;
#ifdef CONFIG_SECONDARY_TRUSTED_KEYRING
pr_notice("Loading compiled-in module X.509 certificates\n");
- return load_certificate_list(system_certificate_list, module_cert_size, keyring);
+ return x509_load_certificate_list(system_certificate_list,
+ module_cert_size, keyring);
}
/*
size = system_certificate_list_size - module_cert_size;
#endif
- return load_certificate_list(p, size, builtin_trusted_keys);
+ return x509_load_certificate_list(p, size, builtin_trusted_keys);
}
late_initcall(load_system_certificate_list);
#
menuconfig CRYPTO
tristate "Cryptographic API"
+ select LIB_MEMNEQ
help
This option provides the core Cryptographic API.
CRC32c and CRC32 CRC algorithms implemented using mips crypto
instructions, when available.
+config CRYPTO_CRC32_S390
+ tristate "CRC-32 algorithms"
+ depends on S390
+ select CRYPTO_HASH
+ select CRC32
+ help
+ Select this option if you want to use hardware accelerated
+ implementations of CRC algorithms. With this option, you
+ can optimize the computation of CRC-32 (IEEE 802.3 Ethernet)
+ and CRC-32C (Castagnoli).
+
+ It is available with IBM z13 or later.
config CRYPTO_XXHASH
tristate "xxHash hash algorithm"
Extensions version 1 (AVX1), or Advanced Vector Extensions
version 2 (AVX2) instructions, when available.
+config CRYPTO_SHA512_S390
+ tristate "SHA384 and SHA512 digest algorithm"
+ depends on S390
+ select CRYPTO_HASH
+ help
+ This is the s390 hardware accelerated implementation of the
+ SHA512 secure hash standard.
+
+ It is available as of z10.
+
config CRYPTO_SHA1_OCTEON
tristate "SHA1 digest algorithm (OCTEON)"
depends on CPU_CAVIUM_OCTEON
SHA-1 secure hash standard (DFIPS 180-4) implemented
using powerpc SPE SIMD instruction set.
+config CRYPTO_SHA1_S390
+ tristate "SHA1 digest algorithm"
+ depends on S390
+ select CRYPTO_HASH
+ help
+ This is the s390 hardware accelerated implementation of the
+ SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2).
+
+ It is available as of z990.
+
config CRYPTO_SHA256
tristate "SHA224 and SHA256 digest algorithm"
select CRYPTO_HASH
SHA-256 secure hash standard (DFIPS 180-2) implemented
using sparc64 crypto instructions, when available.
+config CRYPTO_SHA256_S390
+ tristate "SHA256 digest algorithm"
+ depends on S390
+ select CRYPTO_HASH
+ help
+ This is the s390 hardware accelerated implementation of the
+ SHA256 secure hash standard (DFIPS 180-2).
+
+ It is available as of z9.
+
config CRYPTO_SHA512
tristate "SHA384 and SHA512 digest algorithms"
select CRYPTO_HASH
References:
http://keccak.noekeon.org/
+config CRYPTO_SHA3_256_S390
+ tristate "SHA3_224 and SHA3_256 digest algorithm"
+ depends on S390
+ select CRYPTO_HASH
+ help
+ This is the s390 hardware accelerated implementation of the
+ SHA3_256 secure hash standard.
+
+ It is available as of z14.
+
+config CRYPTO_SHA3_512_S390
+ tristate "SHA3_384 and SHA3_512 digest algorithm"
+ depends on S390
+ select CRYPTO_HASH
+ help
+ This is the s390 hardware accelerated implementation of the
+ SHA3_512 secure hash standard.
+
+ It is available as of z14.
+
config CRYPTO_SM3
tristate
This is the x86_64 CLMUL-NI accelerated implementation of
GHASH, the hash function used in GCM (Galois/Counter mode).
+config CRYPTO_GHASH_S390
+ tristate "GHASH hash function"
+ depends on S390
+ select CRYPTO_HASH
+ help
+ This is the s390 hardware accelerated implementation of GHASH,
+ the hash function used in GCM (Galois/Counter mode).
+
+ It is available as of z196.
+
comment "Ciphers"
config CRYPTO_AES
architecture specific assembler implementations that work on 1KB
tables or 256 bytes S-boxes.
+config CRYPTO_AES_S390
+ tristate "AES cipher algorithms"
+ depends on S390
+ select CRYPTO_ALGAPI
+ select CRYPTO_SKCIPHER
+ help
+ This is the s390 hardware accelerated implementation of the
+ AES cipher algorithms (FIPS-197).
+
+ As of z9 the ECB and CBC modes are hardware accelerated
+ for 128 bit keys.
+ As of z10 the ECB and CBC modes are hardware accelerated
+ for all AES key sizes.
+ As of z196 the CTR mode is hardware accelerated for all AES
+ key sizes and XTS mode is hardware accelerated for 256 and
+ 512 bit keys.
+
config CRYPTO_ANUBIS
tristate "Anubis cipher algorithm"
depends on CRYPTO_USER_API_ENABLE_OBSOLETE
algorithm are provided; regular processing one input block and
one that processes three blocks parallel.
+config CRYPTO_DES_S390
+ tristate "DES and Triple DES cipher algorithms"
+ depends on S390
+ select CRYPTO_ALGAPI
+ select CRYPTO_SKCIPHER
+ select CRYPTO_LIB_DES
+ help
+ This is the s390 hardware accelerated implementation of the
+ DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3).
+
+ As of z990 the ECB and CBC mode are hardware accelerated.
+ As of z196 the CTR mode is hardware accelerated.
+
config CRYPTO_FCRYPT
tristate "FCrypt cipher algorithm"
select CRYPTO_ALGAPI
select CRYPTO_SKCIPHER
select CRYPTO_ARCH_HAVE_LIB_CHACHA
+config CRYPTO_CHACHA_S390
+ tristate "ChaCha20 stream cipher"
+ depends on S390
+ select CRYPTO_SKCIPHER
+ select CRYPTO_LIB_CHACHA_GENERIC
+ select CRYPTO_ARCH_HAVE_LIB_CHACHA
+ help
+ This is the s390 SIMD implementation of the ChaCha20 stream
+ cipher (RFC 7539).
+
+ It is available as of z13.
+
config CRYPTO_SEED
tristate "SEED cipher algorithm"
depends on CRYPTO_USER_API_ENABLE_OBSOLETE
#
obj-$(CONFIG_CRYPTO) += crypto.o
-crypto-y := api.o cipher.o compress.o memneq.o
+crypto-y := api.o cipher.o compress.o
obj-$(CONFIG_CRYPTO_ENGINE) += crypto_engine.o
obj-$(CONFIG_CRYPTO_FIPS) += fips.o
This option provides support for verifying the signature(s) on a
signed PE binary.
+config FIPS_SIGNATURE_SELFTEST
+ bool "Run FIPS selftests on the X.509+PKCS7 signature verification"
+ help
+ This option causes some selftests to be run on the signature
+ verification code, using some built in data. This is required
+ for FIPS.
+ depends on KEYS
+ depends on ASYMMETRIC_KEY_TYPE
+ depends on PKCS7_MESSAGE_PARSER
+
endif # ASYMMETRIC_KEY_TYPE
x509.asn1.o \
x509_akid.asn1.o \
x509_cert_parser.o \
+ x509_loader.o \
x509_public_key.o
+x509_key_parser-$(CONFIG_FIPS_SIGNATURE_SELFTEST) += selftest.o
$(obj)/x509_cert_parser.o: \
$(obj)/x509.asn1.h \
--- /dev/null
+/* Self-testing for signature checking.
+ *
+ * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include <linux/kernel.h>
+#include <linux/cred.h>
+#include <linux/key.h>
+#include <crypto/pkcs7.h>
+#include "x509_parser.h"
+
+struct certs_test {
+ const u8 *data;
+ size_t data_len;
+ const u8 *pkcs7;
+ size_t pkcs7_len;
+};
+
+/*
+ * Set of X.509 certificates to provide public keys for the tests. These will
+ * be loaded into a temporary keyring for the duration of the testing.
+ */
+static const __initconst u8 certs_selftest_keys[] = {
+ "\x30\x82\x05\x55\x30\x82\x03\x3d\xa0\x03\x02\x01\x02\x02\x14\x73"
+ "\x98\xea\x98\x2d\xd0\x2e\xa8\xb1\xcf\x57\xc7\xf2\x97\xb3\xe6\x1a"
+ "\xfc\x8c\x0a\x30\x0d\x06\x09\x2a\x86\x48\x86\xf7\x0d\x01\x01\x0b"
+ "\x05\x00\x30\x34\x31\x32\x30\x30\x06\x03\x55\x04\x03\x0c\x29\x43"
+ "\x65\x72\x74\x69\x66\x69\x63\x61\x74\x65\x20\x76\x65\x72\x69\x66"
+ "\x69\x63\x61\x74\x69\x6f\x6e\x20\x73\x65\x6c\x66\x2d\x74\x65\x73"
+ "\x74\x69\x6e\x67\x20\x6b\x65\x79\x30\x20\x17\x0d\x32\x32\x30\x35"
+ "\x31\x38\x32\x32\x33\x32\x34\x31\x5a\x18\x0f\x32\x31\x32\x32\x30"
+ "\x34\x32\x34\x32\x32\x33\x32\x34\x31\x5a\x30\x34\x31\x32\x30\x30"
+ "\x06\x03\x55\x04\x03\x0c\x29\x43\x65\x72\x74\x69\x66\x69\x63\x61"
+ "\x74\x65\x20\x76\x65\x72\x69\x66\x69\x63\x61\x74\x69\x6f\x6e\x20"
+ "\x73\x65\x6c\x66\x2d\x74\x65\x73\x74\x69\x6e\x67\x20\x6b\x65\x79"
+ "\x30\x82\x02\x22\x30\x0d\x06\x09\x2a\x86\x48\x86\xf7\x0d\x01\x01"
+ "\x01\x05\x00\x03\x82\x02\x0f\x00\x30\x82\x02\x0a\x02\x82\x02\x01"
+ "\x00\xcc\xac\x49\xdd\x3b\xca\xb0\x15\x7e\x84\x6a\xb2\x0a\x69\x5f"
+ "\x1c\x0a\x61\x82\x3b\x4f\x2c\xa3\x95\x2c\x08\x58\x4b\xb1\x5d\x99"
+ "\xe0\xc3\xc1\x79\xc2\xb3\xeb\xc0\x1e\x6d\x3e\x54\x1d\xbd\xb7\x92"
+ "\x7b\x4d\xb5\x95\x58\xb2\x52\x2e\xc6\x24\x4b\x71\x63\x80\x32\x77"
+ "\xa7\x38\x5e\xdb\x72\xae\x6e\x0d\xec\xfb\xb6\x6d\x01\x7f\xe9\x55"
+ "\x66\xdf\xbf\x1d\x76\x78\x02\x31\xe8\xe5\x07\xf8\xb7\x82\x5c\x0d"
+ "\xd4\xbb\xfb\xa2\x59\x0d\x2e\x3a\x78\x95\x3a\x8b\x46\x06\x47\x44"
+ "\x46\xd7\xcd\x06\x6a\x41\x13\xe3\x19\xf6\xbb\x6e\x38\xf4\x83\x01"
+ "\xa3\xbf\x4a\x39\x4f\xd7\x0a\xe9\x38\xb3\xf5\x94\x14\x4e\xdd\xf7"
+ "\x43\xfd\x24\xb2\x49\x3c\xa5\xf7\x7a\x7c\xd4\x45\x3d\x97\x75\x68"
+ "\xf1\xed\x4c\x42\x0b\x70\xca\x85\xf3\xde\xe5\x88\x2c\xc5\xbe\xb6"
+ "\x97\x34\xba\x24\x02\xcd\x8b\x86\x9f\xa9\x73\xca\x73\xcf\x92\x81"
+ "\xee\x75\x55\xbb\x18\x67\x5c\xff\x3f\xb5\xdd\x33\x1b\x0c\xe9\x78"
+ "\xdb\x5c\xcf\xaa\x5c\x43\x42\xdf\x5e\xa9\x6d\xec\xd7\xd7\xff\xe6"
+ "\xa1\x3a\x92\x1a\xda\xae\xf6\x8c\x6f\x7b\xd5\xb4\x6e\x06\xe9\x8f"
+ "\xe8\xde\x09\x31\x89\xed\x0e\x11\xa1\xfa\x8a\xe9\xe9\x64\x59\x62"
+ "\x53\xda\xd1\x70\xbe\x11\xd4\x99\x97\x11\xcf\x99\xde\x0b\x9d\x94"
+ "\x7e\xaa\xb8\x52\xea\x37\xdb\x90\x7e\x35\xbd\xd9\xfe\x6d\x0a\x48"
+ "\x70\x28\xdd\xd5\x0d\x7f\x03\x80\x93\x14\x23\x8f\xb9\x22\xcd\x7c"
+ "\x29\xfe\xf1\x72\xb5\x5c\x0b\x12\xcf\x9c\x15\xf6\x11\x4c\x7a\x45"
+ "\x25\x8c\x45\x0a\x34\xac\x2d\x9a\x81\xca\x0b\x13\x22\xcd\xeb\x1a"
+ "\x38\x88\x18\x97\x96\x08\x81\xaa\xcc\x8f\x0f\x8a\x32\x7b\x76\x68"
+ "\x03\x68\x43\xbf\x11\xba\x55\x60\xfd\x80\x1c\x0d\x9b\x69\xb6\x09"
+ "\x72\xbc\x0f\x41\x2f\x07\x82\xc6\xe3\xb2\x13\x91\xc4\x6d\x14\x95"
+ "\x31\xbe\x19\xbd\xbc\xed\xe1\x4c\x74\xa2\xe0\x78\x0b\xbb\x94\xec"
+ "\x4c\x53\x3a\xa2\xb5\x84\x1d\x4b\x65\x7e\xdc\xf7\xdb\x36\x7d\xbe"
+ "\x9e\x3b\x36\x66\x42\x66\x76\x35\xbf\xbe\xf0\xc1\x3c\x7c\xe9\x42"
+ "\x5c\x24\x53\x03\x05\xa8\x67\x24\x50\x02\x75\xff\x24\x46\x3b\x35"
+ "\x89\x76\xe6\x70\xda\xc5\x51\x8c\x9a\xe5\x05\xb0\x0b\xd0\x2d\xd4"
+ "\x7d\x57\x75\x94\x6b\xf9\x0a\xad\x0e\x41\x00\x15\xd0\x4f\xc0\x7f"
+ "\x90\x2d\x18\x48\x8f\x28\xfe\x5d\xa7\xcd\x99\x9e\xbd\x02\x6c\x8a"
+ "\x31\xf3\x1c\xc7\x4b\xe6\x93\xcd\x42\xa2\xe4\x68\x10\x47\x9d\xfc"
+ "\x21\x02\x03\x01\x00\x01\xa3\x5d\x30\x5b\x30\x0c\x06\x03\x55\x1d"
+ "\x13\x01\x01\xff\x04\x02\x30\x00\x30\x0b\x06\x03\x55\x1d\x0f\x04"
+ "\x04\x03\x02\x07\x80\x30\x1d\x06\x03\x55\x1d\x0e\x04\x16\x04\x14"
+ "\xf5\x87\x03\xbb\x33\xce\x1b\x73\xee\x02\xec\xcd\xee\x5b\x88\x17"
+ "\x51\x8f\xe3\xdb\x30\x1f\x06\x03\x55\x1d\x23\x04\x18\x30\x16\x80"
+ "\x14\xf5\x87\x03\xbb\x33\xce\x1b\x73\xee\x02\xec\xcd\xee\x5b\x88"
+ "\x17\x51\x8f\xe3\xdb\x30\x0d\x06\x09\x2a\x86\x48\x86\xf7\x0d\x01"
+ "\x01\x0b\x05\x00\x03\x82\x02\x01\x00\xc0\x2e\x12\x41\x7b\x73\x85"
+ "\x16\xc8\xdb\x86\x79\xe8\xf5\xcd\x44\xf4\xc6\xe2\x81\x23\x5e\x47"
+ "\xcb\xab\x25\xf1\x1e\x58\x3e\x31\x7f\x78\xad\x85\xeb\xfe\x14\x88"
+ "\x60\xf7\x7f\xd2\x26\xa2\xf4\x98\x2a\xfd\xba\x05\x0c\x20\x33\x12"
+ "\xcc\x4d\x14\x61\x64\x81\x93\xd3\x33\xed\xc8\xff\xf1\x78\xcc\x5f"
+ "\x51\x9f\x09\xd7\xbe\x0d\x5c\x74\xfd\x9b\xdf\x52\x4a\xc9\xa8\x71"
+ "\x25\x33\x04\x10\x67\x36\xd0\xb3\x0b\xc9\xa1\x40\x72\xae\x41\x7b"
+ "\x68\xe6\xe4\x7b\xd0\x28\xf7\x6d\xe7\x3f\x50\xfc\x91\x7c\x91\x56"
+ "\xd4\xdf\xa6\xbb\xe8\x4d\x1b\x58\xaa\x28\xfa\xc1\x19\xeb\x11\x2f"
+ "\x24\x8b\x7c\xc5\xa9\x86\x26\xaa\x6e\xb7\x9b\xd5\xf8\x06\xfb\x02"
+ "\x52\x7b\x9c\x9e\xa1\xe0\x07\x8b\x5e\xe4\xb8\x55\x29\xf6\x48\x52"
+ "\x1c\x1b\x54\x2d\x46\xd8\xe5\x71\xb9\x60\xd1\x45\xb5\x92\x89\x8a"
+ "\x63\x58\x2a\xb3\xc6\xb2\x76\xe2\x3c\x82\x59\x04\xae\x5a\xc4\x99"
+ "\x7b\x2e\x4b\x46\x57\xb8\x29\x24\xb2\xfd\xee\x2c\x0d\xa4\x83\xfa"
+ "\x65\x2a\x07\x35\x8b\x97\xcf\xbd\x96\x2e\xd1\x7e\x6c\xc2\x1e\x87"
+ "\xb6\x6c\x76\x65\xb5\xb2\x62\xda\x8b\xe9\x73\xe3\xdb\x33\xdd\x13"
+ "\x3a\x17\x63\x6a\x76\xde\x8d\x8f\xe0\x47\x61\x28\x3a\x83\xff\x8f"
+ "\xe7\xc7\xe0\x4a\xa3\xe5\x07\xcf\xe9\x8c\x35\x35\x2e\xe7\x80\x66"
+ "\x31\xbf\x91\x58\x0a\xe1\x25\x3d\x38\xd3\xa4\xf0\x59\x34\x47\x07"
+ "\x62\x0f\xbe\x30\xdd\x81\x88\x58\xf0\x28\xb0\x96\xe5\x82\xf8\x05"
+ "\xb7\x13\x01\xbc\xfa\xc6\x1f\x86\x72\xcc\xf9\xee\x8e\xd9\xd6\x04"
+ "\x8c\x24\x6c\xbf\x0f\x5d\x37\x39\xcf\x45\xc1\x93\x3a\xd2\xed\x5c"
+ "\x58\x79\x74\x86\x62\x30\x7e\x8e\xbb\xdd\x7a\xa9\xed\xca\x40\xcb"
+ "\x62\x47\xf4\xb4\x9f\x52\x7f\x72\x63\xa8\xf0\x2b\xaf\x45\x2a\x48"
+ "\x19\x6d\xe3\xfb\xf9\x19\x66\x69\xc8\xcc\x62\x87\x6c\x53\x2b\x2d"
+ "\x6e\x90\x6c\x54\x3a\x82\x25\x41\xcb\x18\x6a\xa4\x22\xa8\xa1\xc4"
+ "\x47\xd7\x81\x00\x1c\x15\x51\x0f\x1a\xaf\xef\x9f\xa6\x61\x8c\xbd"
+ "\x6b\x8b\xed\xe6\xac\x0e\xb6\x3a\x4c\x92\xe6\x0f\x91\x0a\x0f\x71"
+ "\xc7\xa0\xb9\x0d\x3a\x17\x5a\x6f\x35\xc8\xe7\x50\x4f\x46\xe8\x70"
+ "\x60\x48\x06\x82\x8b\x66\x58\xe6\x73\x91\x9c\x12\x3d\x35\x8e\x46"
+ "\xad\x5a\xf5\xb3\xdb\x69\x21\x04\xfd\xd3\x1c\xdf\x94\x9d\x56\xb0"
+ "\x0a\xd1\x95\x76\x8d\xec\x9e\xdd\x0b\x15\x97\x64\xad\xe5\xf2\x62"
+ "\x02\xfc\x9e\x5f\x56\x42\x39\x05\xb3"
+};
+
+/*
+ * Signed data and detached signature blobs that form the verification tests.
+ */
+static const __initconst u8 certs_selftest_1_data[] = {
+ "\x54\x68\x69\x73\x20\x69\x73\x20\x73\x6f\x6d\x65\x20\x74\x65\x73"
+ "\x74\x20\x64\x61\x74\x61\x20\x75\x73\x65\x64\x20\x66\x6f\x72\x20"
+ "\x73\x65\x6c\x66\x2d\x74\x65\x73\x74\x69\x6e\x67\x20\x63\x65\x72"
+ "\x74\x69\x66\x69\x63\x61\x74\x65\x20\x76\x65\x72\x69\x66\x69\x63"
+ "\x61\x74\x69\x6f\x6e\x2e\x0a"
+};
+
+static const __initconst u8 certs_selftest_1_pkcs7[] = {
+ "\x30\x82\x02\xab\x06\x09\x2a\x86\x48\x86\xf7\x0d\x01\x07\x02\xa0"
+ "\x82\x02\x9c\x30\x82\x02\x98\x02\x01\x01\x31\x0d\x30\x0b\x06\x09"
+ "\x60\x86\x48\x01\x65\x03\x04\x02\x01\x30\x0b\x06\x09\x2a\x86\x48"
+ "\x86\xf7\x0d\x01\x07\x01\x31\x82\x02\x75\x30\x82\x02\x71\x02\x01"
+ "\x01\x30\x4c\x30\x34\x31\x32\x30\x30\x06\x03\x55\x04\x03\x0c\x29"
+ "\x43\x65\x72\x74\x69\x66\x69\x63\x61\x74\x65\x20\x76\x65\x72\x69"
+ "\x66\x69\x63\x61\x74\x69\x6f\x6e\x20\x73\x65\x6c\x66\x2d\x74\x65"
+ "\x73\x74\x69\x6e\x67\x20\x6b\x65\x79\x02\x14\x73\x98\xea\x98\x2d"
+ "\xd0\x2e\xa8\xb1\xcf\x57\xc7\xf2\x97\xb3\xe6\x1a\xfc\x8c\x0a\x30"
+ "\x0b\x06\x09\x60\x86\x48\x01\x65\x03\x04\x02\x01\x30\x0d\x06\x09"
+ "\x2a\x86\x48\x86\xf7\x0d\x01\x01\x01\x05\x00\x04\x82\x02\x00\xac"
+ "\xb0\xf2\x07\xd6\x99\x6d\xc0\xc0\xd9\x8d\x31\x0d\x7e\x04\xeb\xc3"
+ "\x88\x90\xc4\x58\x46\xd4\xe2\xa0\xa3\x25\xe3\x04\x50\x37\x85\x8c"
+ "\x91\xc6\xfc\xc5\xd4\x92\xfd\x05\xd8\xb8\xa3\xb8\xba\x89\x13\x00"
+ "\x88\x79\x99\x51\x6b\x5b\x28\x31\xc0\xb3\x1b\x7a\x68\x2c\x00\xdb"
+ "\x4b\x46\x11\xf3\xfa\x50\x8e\x19\x89\xa2\x4c\xda\x4c\x89\x01\x11"
+ "\x89\xee\xd3\xc8\xc1\xe7\xa7\xf6\xb2\xa2\xf8\x65\xb8\x35\x20\x33"
+ "\xba\x12\x62\xd5\xbd\xaa\x71\xe5\x5b\xc0\x6a\x32\xff\x6a\x2e\x23"
+ "\xef\x2b\xb6\x58\xb1\xfb\x5f\x82\x34\x40\x6d\x9f\xbc\x27\xac\x37"
+ "\x23\x99\xcf\x7d\x20\xb2\x39\x01\xc0\x12\xce\xd7\x5d\x2f\xb6\xab"
+ "\xb5\x56\x4f\xef\xf4\x72\x07\x58\x65\xa9\xeb\x1f\x75\x1c\x5f\x0c"
+ "\x88\xe0\xa4\xe2\xcd\x73\x2b\x9e\xb2\x05\x7e\x12\xf8\xd0\x66\x41"
+ "\xcc\x12\x63\xd4\xd6\xac\x9b\x1d\x14\x77\x8d\x1c\x57\xd5\x27\xc6"
+ "\x49\xa2\x41\x43\xf3\x59\x29\xe5\xcb\xd1\x75\xbc\x3a\x97\x2a\x72"
+ "\x22\x66\xc5\x3b\xc1\xba\xfc\x53\x18\x98\xe2\x21\x64\xc6\x52\x87"
+ "\x13\xd5\x7c\x42\xe8\xfb\x9c\x9a\x45\x32\xd5\xa5\x22\x62\x9d\xd4"
+ "\xcb\xa4\xfa\x77\xbb\x50\x24\x0b\x8b\x88\x99\x15\x56\xa9\x1e\x92"
+ "\xbf\x5d\x94\x77\xb6\xf1\x67\x01\x60\x06\x58\x5c\xdf\x18\x52\x79"
+ "\x37\x30\x93\x7d\x87\x04\xf1\xe0\x55\x59\x52\xf3\xc2\xb1\x1c\x5b"
+ "\x12\x7c\x49\x87\xfb\xf7\xed\xdd\x95\x71\xec\x4b\x1a\x85\x08\xb0"
+ "\xa0\x36\xc4\x7b\xab\x40\xe0\xf1\x98\xcc\xaf\x19\x40\x8f\x47\x6f"
+ "\xf0\x6c\x84\x29\x7f\x7f\x04\x46\xcb\x08\x0f\xe0\xc1\xc9\x70\x6e"
+ "\x95\x3b\xa4\xbc\x29\x2b\x53\x67\x45\x1b\x0d\xbc\x13\xa5\x76\x31"
+ "\xaf\xb9\xd0\xe0\x60\x12\xd2\xf4\xb7\x7c\x58\x7e\xf6\x2d\xbb\x24"
+ "\x14\x5a\x20\x24\xa8\x12\xdf\x25\xbd\x42\xce\x96\x7c\x2e\xba\x14"
+ "\x1b\x81\x9f\x18\x45\xa4\xc6\x70\x3e\x0e\xf0\xd3\x7b\x9c\x10\xbe"
+ "\xb8\x7a\x89\xc5\x9e\xd9\x97\xdf\xd7\xe7\xc6\x1d\xc0\x20\x6c\xb8"
+ "\x1e\x3a\x63\xb8\x39\x8e\x8e\x62\xd5\xd2\xb4\xcd\xff\x46\xfc\x8e"
+ "\xec\x07\x35\x0c\xff\xb0\x05\xe6\xf4\xe5\xfe\xa2\xe3\x0a\xe6\x36"
+ "\xa7\x4a\x7e\x62\x1d\xc4\x50\x39\x35\x4e\x28\xcb\x4a\xfb\x9d\xdb"
+ "\xdd\x23\xd6\x53\xb1\x74\x77\x12\xf7\x9c\xf0\x9a\x6b\xf7\xa9\x64"
+ "\x2d\x86\x21\x2a\xcf\xc6\x54\xf5\xc9\xad\xfa\xb5\x12\xb4\xf3\x51"
+ "\x77\x55\x3c\x6f\x0c\x32\xd3\x8c\x44\x39\x71\x25\xfe\x96\xd2"
+};
+
+/*
+ * List of tests to be run.
+ */
+#define TEST(data, pkcs7) { data, sizeof(data) - 1, pkcs7, sizeof(pkcs7) - 1 }
+static const struct certs_test certs_tests[] __initconst = {
+ TEST(certs_selftest_1_data, certs_selftest_1_pkcs7),
+};
+
+int __init fips_signature_selftest(void)
+{
+ struct key *keyring;
+ int ret, i;
+
+ pr_notice("Running certificate verification selftests\n");
+
+ keyring = keyring_alloc(".certs_selftest",
+ GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(),
+ (KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ |
+ KEY_USR_SEARCH,
+ KEY_ALLOC_NOT_IN_QUOTA,
+ NULL, NULL);
+ if (IS_ERR(keyring))
+ panic("Can't allocate certs selftest keyring: %ld\n",
+ PTR_ERR(keyring));
+
+ ret = x509_load_certificate_list(certs_selftest_keys,
+ sizeof(certs_selftest_keys) - 1, keyring);
+ if (ret < 0)
+ panic("Can't allocate certs selftest keyring: %d\n", ret);
+
+ for (i = 0; i < ARRAY_SIZE(certs_tests); i++) {
+ const struct certs_test *test = &certs_tests[i];
+ struct pkcs7_message *pkcs7;
+
+ pkcs7 = pkcs7_parse_message(test->pkcs7, test->pkcs7_len);
+ if (IS_ERR(pkcs7))
+ panic("Certs selftest %d: pkcs7_parse_message() = %d\n", i, ret);
+
+ pkcs7_supply_detached_data(pkcs7, test->data, test->data_len);
+
+ ret = pkcs7_verify(pkcs7, VERIFYING_MODULE_SIGNATURE);
+ if (ret < 0)
+ panic("Certs selftest %d: pkcs7_verify() = %d\n", i, ret);
+
+ ret = pkcs7_validate_trust(pkcs7, keyring);
+ if (ret < 0)
+ panic("Certs selftest %d: pkcs7_validate_trust() = %d\n", i, ret);
+
+ pkcs7_free_message(pkcs7);
+ }
+
+ key_put(keyring);
+ return 0;
+}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/kernel.h>
+#include <linux/key.h>
+#include <keys/asymmetric-type.h>
+
+int x509_load_certificate_list(const u8 cert_list[],
+ const unsigned long list_size,
+ const struct key *keyring)
+{
+ key_ref_t key;
+ const u8 *p, *end;
+ size_t plen;
+
+ p = cert_list;
+ end = p + list_size;
+ while (p < end) {
+ /* Each cert begins with an ASN.1 SEQUENCE tag and must be more
+ * than 256 bytes in size.
+ */
+ if (end - p < 4)
+ goto dodgy_cert;
+ if (p[0] != 0x30 &&
+ p[1] != 0x82)
+ goto dodgy_cert;
+ plen = (p[2] << 8) | p[3];
+ plen += 4;
+ if (plen > end - p)
+ goto dodgy_cert;
+
+ key = key_create_or_update(make_key_ref(keyring, 1),
+ "asymmetric",
+ NULL,
+ p,
+ plen,
+ ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
+ KEY_USR_VIEW | KEY_USR_READ),
+ KEY_ALLOC_NOT_IN_QUOTA |
+ KEY_ALLOC_BUILT_IN |
+ KEY_ALLOC_BYPASS_RESTRICTION);
+ if (IS_ERR(key)) {
+ pr_err("Problem loading in-kernel X.509 certificate (%ld)\n",
+ PTR_ERR(key));
+ } else {
+ pr_notice("Loaded X.509 cert '%s'\n",
+ key_ref_to_ptr(key)->description);
+ key_ref_put(key);
+ }
+ p += plen;
+ }
+
+ return 0;
+
+dodgy_cert:
+ pr_err("Problem parsing in-kernel X.509 certificate list\n");
+ return 0;
+}
bool blacklisted;
};
+/*
+ * selftest.c
+ */
+#ifdef CONFIG_FIPS_SIGNATURE_SELFTEST
+extern int __init fips_signature_selftest(void);
+#else
+static inline int fips_signature_selftest(void) { return 0; }
+#endif
+
/*
* x509_cert_parser.c
*/
/*
* Module stuff
*/
+extern int __init certs_selftest(void);
static int __init x509_key_init(void)
{
- return register_asymmetric_key_parser(&x509_key_parser);
+ int ret;
+
+ ret = register_asymmetric_key_parser(&x509_key_parser);
+ if (ret < 0)
+ return ret;
+ return fips_signature_selftest();
}
static void __exit x509_key_exit(void)
+++ /dev/null
-/*
- * Constant-time equality testing of memory regions.
- *
- * Authors:
- *
- * James Yonan <james@openvpn.net>
- * Daniel Borkmann <dborkman@redhat.com>
- *
- * This file is provided under a dual BSD/GPLv2 license. When using or
- * redistributing this file, you may do so under either license.
- *
- * GPL LICENSE SUMMARY
- *
- * Copyright(c) 2013 OpenVPN Technologies, Inc. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
- * The full GNU General Public License is included in this distribution
- * in the file called LICENSE.GPL.
- *
- * BSD LICENSE
- *
- * Copyright(c) 2013 OpenVPN Technologies, Inc. All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- * * Neither the name of OpenVPN Technologies nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <crypto/algapi.h>
-#include <asm/unaligned.h>
-
-#ifndef __HAVE_ARCH_CRYPTO_MEMNEQ
-
-/* Generic path for arbitrary size */
-static inline unsigned long
-__crypto_memneq_generic(const void *a, const void *b, size_t size)
-{
- unsigned long neq = 0;
-
-#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
- while (size >= sizeof(unsigned long)) {
- neq |= get_unaligned((unsigned long *)a) ^
- get_unaligned((unsigned long *)b);
- OPTIMIZER_HIDE_VAR(neq);
- a += sizeof(unsigned long);
- b += sizeof(unsigned long);
- size -= sizeof(unsigned long);
- }
-#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
- while (size > 0) {
- neq |= *(unsigned char *)a ^ *(unsigned char *)b;
- OPTIMIZER_HIDE_VAR(neq);
- a += 1;
- b += 1;
- size -= 1;
- }
- return neq;
-}
-
-/* Loop-free fast-path for frequently used 16-byte size */
-static inline unsigned long __crypto_memneq_16(const void *a, const void *b)
-{
- unsigned long neq = 0;
-
-#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
- if (sizeof(unsigned long) == 8) {
- neq |= get_unaligned((unsigned long *)a) ^
- get_unaligned((unsigned long *)b);
- OPTIMIZER_HIDE_VAR(neq);
- neq |= get_unaligned((unsigned long *)(a + 8)) ^
- get_unaligned((unsigned long *)(b + 8));
- OPTIMIZER_HIDE_VAR(neq);
- } else if (sizeof(unsigned int) == 4) {
- neq |= get_unaligned((unsigned int *)a) ^
- get_unaligned((unsigned int *)b);
- OPTIMIZER_HIDE_VAR(neq);
- neq |= get_unaligned((unsigned int *)(a + 4)) ^
- get_unaligned((unsigned int *)(b + 4));
- OPTIMIZER_HIDE_VAR(neq);
- neq |= get_unaligned((unsigned int *)(a + 8)) ^
- get_unaligned((unsigned int *)(b + 8));
- OPTIMIZER_HIDE_VAR(neq);
- neq |= get_unaligned((unsigned int *)(a + 12)) ^
- get_unaligned((unsigned int *)(b + 12));
- OPTIMIZER_HIDE_VAR(neq);
- } else
-#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
- {
- neq |= *(unsigned char *)(a) ^ *(unsigned char *)(b);
- OPTIMIZER_HIDE_VAR(neq);
- neq |= *(unsigned char *)(a+1) ^ *(unsigned char *)(b+1);
- OPTIMIZER_HIDE_VAR(neq);
- neq |= *(unsigned char *)(a+2) ^ *(unsigned char *)(b+2);
- OPTIMIZER_HIDE_VAR(neq);
- neq |= *(unsigned char *)(a+3) ^ *(unsigned char *)(b+3);
- OPTIMIZER_HIDE_VAR(neq);
- neq |= *(unsigned char *)(a+4) ^ *(unsigned char *)(b+4);
- OPTIMIZER_HIDE_VAR(neq);
- neq |= *(unsigned char *)(a+5) ^ *(unsigned char *)(b+5);
- OPTIMIZER_HIDE_VAR(neq);
- neq |= *(unsigned char *)(a+6) ^ *(unsigned char *)(b+6);
- OPTIMIZER_HIDE_VAR(neq);
- neq |= *(unsigned char *)(a+7) ^ *(unsigned char *)(b+7);
- OPTIMIZER_HIDE_VAR(neq);
- neq |= *(unsigned char *)(a+8) ^ *(unsigned char *)(b+8);
- OPTIMIZER_HIDE_VAR(neq);
- neq |= *(unsigned char *)(a+9) ^ *(unsigned char *)(b+9);
- OPTIMIZER_HIDE_VAR(neq);
- neq |= *(unsigned char *)(a+10) ^ *(unsigned char *)(b+10);
- OPTIMIZER_HIDE_VAR(neq);
- neq |= *(unsigned char *)(a+11) ^ *(unsigned char *)(b+11);
- OPTIMIZER_HIDE_VAR(neq);
- neq |= *(unsigned char *)(a+12) ^ *(unsigned char *)(b+12);
- OPTIMIZER_HIDE_VAR(neq);
- neq |= *(unsigned char *)(a+13) ^ *(unsigned char *)(b+13);
- OPTIMIZER_HIDE_VAR(neq);
- neq |= *(unsigned char *)(a+14) ^ *(unsigned char *)(b+14);
- OPTIMIZER_HIDE_VAR(neq);
- neq |= *(unsigned char *)(a+15) ^ *(unsigned char *)(b+15);
- OPTIMIZER_HIDE_VAR(neq);
- }
-
- return neq;
-}
-
-/* Compare two areas of memory without leaking timing information,
- * and with special optimizations for common sizes. Users should
- * not call this function directly, but should instead use
- * crypto_memneq defined in crypto/algapi.h.
- */
-noinline unsigned long __crypto_memneq(const void *a, const void *b,
- size_t size)
-{
- switch (size) {
- case 16:
- return __crypto_memneq_16(a, b);
- default:
- return __crypto_memneq_generic(a, b, size);
- }
-}
-EXPORT_SYMBOL(__crypto_memneq);
-
-#endif /* __HAVE_ARCH_CRYPTO_MEMNEQ */
static int only_lcd = -1;
module_param(only_lcd, int, 0444);
+static bool may_report_brightness_keys;
static int register_count;
static DEFINE_MUTEX(register_count_mutex);
static DEFINE_MUTEX(video_list_lock);
acpi_video_device_bind(video, data);
acpi_video_device_find_cap(data);
+ if (data->cap._BCM && data->cap._BCL)
+ may_report_brightness_keys = true;
+
mutex_lock(&video->device_list_lock);
list_add_tail(&data->entry, &video->video_device_list);
mutex_unlock(&video->device_list_lock);
break;
}
+ if (keycode)
+ may_report_brightness_keys = true;
+
acpi_notifier_call_chain(device, event, 0);
if (keycode && (report_key_events & REPORT_BRIGHTNESS_KEY_EVENTS)) {
if (register_count) {
acpi_bus_unregister_driver(&acpi_video_bus);
register_count = 0;
+ may_report_brightness_keys = false;
}
mutex_unlock(®ister_count_mutex);
}
bool acpi_video_handles_brightness_key_presses(void)
{
- bool have_video_busses;
-
- mutex_lock(&video_list_lock);
- have_video_busses = !list_empty(&video_bus_head);
- mutex_unlock(&video_list_lock);
-
- return have_video_busses &&
+ return may_report_brightness_keys &&
(report_key_events & REPORT_BRIGHTNESS_KEY_EVENTS);
}
EXPORT_SYMBOL(acpi_video_handles_brightness_key_presses);
bool osc_sb_native_usb4_support_confirmed;
EXPORT_SYMBOL_GPL(osc_sb_native_usb4_support_confirmed);
-bool osc_sb_cppc_not_supported;
+bool osc_sb_cppc2_support_acked;
static u8 sb_uuid_str[] = "0811B06E-4A27-44F9-8D60-3CBBC22E7B48";
static void acpi_bus_osc_negotiate_platform_control(void)
return;
}
-#ifdef CONFIG_ACPI_CPPC_LIB
- osc_sb_cppc_not_supported = !(capbuf_ret[OSC_SUPPORT_DWORD] &
- (OSC_SB_CPC_SUPPORT | OSC_SB_CPCV2_SUPPORT));
-#endif
-
/*
* Now run _OSC again with query flag clear and with the caps
* supported by both the OS and the platform.
capbuf_ret = context.ret.pointer;
if (context.ret.length > OSC_SUPPORT_DWORD) {
+#ifdef CONFIG_ACPI_CPPC_LIB
+ osc_sb_cppc2_support_acked = capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_CPCV2_SUPPORT;
+#endif
+
osc_sb_apei_support_acked =
capbuf_ret[OSC_SUPPORT_DWORD] & OSC_SB_APEI_SUPPORT;
osc_pc_lpi_support_confirmed =
return false;
}
+/**
+ * cpc_supported_by_cpu() - check if CPPC is supported by CPU
+ *
+ * Check if the architectural support for CPPC is present even
+ * if the _OSC hasn't prescribed it
+ *
+ * Return: true for supported, false for not supported
+ */
+bool __weak cpc_supported_by_cpu(void)
+{
+ return false;
+}
+
/**
* pcc_data_alloc() - Allocate the pcc_data memory for pcc subspace
*
acpi_status status;
int ret = -ENODATA;
- if (osc_sb_cppc_not_supported)
- return -ENODEV;
+ if (!osc_sb_cppc2_support_acked) {
+ pr_debug("CPPC v2 _OSC not acked\n");
+ if (!cpc_supported_by_cpu())
+ return -ENODEV;
+ }
/* Parse the ACPI _CPC table for this CPU. */
status = acpi_evaluate_object_typed(handle, "_CPC", NULL, &output,
if (!osc_cpc_flexible_adr_space_confirmed) {
pr_debug("Flexible address space capability not supported\n");
- goto out_free;
+ if (!cpc_supported_by_cpu())
+ goto out_free;
}
addr = ioremap(gas_t->address, gas_t->bit_width/8);
}
if (!osc_cpc_flexible_adr_space_confirmed) {
pr_debug("Flexible address space capability not supported\n");
- goto out_free;
+ if (!cpc_supported_by_cpu())
+ goto out_free;
}
} else {
if (gas_t->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE || !cpc_ffh_supported()) {
goto skip_probe;
ret = amba_read_periphid(dev);
- if (ret) {
- if (ret != -EPROBE_DEFER) {
- amba_device_put(dev);
- goto err_out;
- }
+ if (ret)
goto err_release;
- }
skip_probe:
ret = device_add(&dev->dev);
continue;
list_del_init(&ddev->node);
+ amba_device_put(ddev->dev);
kfree(ddev);
}
return err_mask;
}
-static bool ata_log_supported(struct ata_device *dev, u8 log)
+static int ata_log_supported(struct ata_device *dev, u8 log)
{
struct ata_port *ap = dev->link->ap;
if (dev->horkage & ATA_HORKAGE_NO_LOG_DIR)
- return false;
+ return 0;
if (ata_read_log_page(dev, ATA_LOG_DIRECTORY, 0, ap->sector_buf, 1))
- return false;
- return get_unaligned_le16(&ap->sector_buf[log * 2]) ? true : false;
+ return 0;
+ return get_unaligned_le16(&ap->sector_buf[log * 2]);
}
static bool ata_identify_page_supported(struct ata_device *dev, u8 page)
struct ata_cpr_log *cpr_log = NULL;
u8 *desc, *buf = NULL;
- if (ata_id_major_version(dev->id) < 11 ||
- !ata_log_supported(dev, ATA_LOG_CONCURRENT_POSITIONING_RANGES))
+ if (ata_id_major_version(dev->id) < 11)
+ goto out;
+
+ buf_len = ata_log_supported(dev, ATA_LOG_CONCURRENT_POSITIONING_RANGES);
+ if (buf_len == 0)
goto out;
/*
* Read the concurrent positioning ranges log (0x47). We can have at
- * most 255 32B range descriptors plus a 64B header.
+ * most 255 32B range descriptors plus a 64B header. This log varies in
+ * size, so use the size reported in the GPL directory. Reading beyond
+ * the supported length will result in an error.
*/
- buf_len = (64 + 255 * 32 + 511) & ~511;
+ buf_len <<= 9;
buf = kzalloc(buf_len, GFP_KERNEL);
if (!buf)
goto out;
const struct ata_port_info * const * ppi,
int n_ports)
{
- const struct ata_port_info *pi;
+ const struct ata_port_info *pi = &ata_dummy_port_info;
struct ata_host *host;
int i, j;
if (!host)
return NULL;
- for (i = 0, j = 0, pi = NULL; i < host->n_ports; i++) {
+ for (i = 0, j = 0; i < host->n_ports; i++) {
struct ata_port *ap = host->ports[i];
if (ppi[j])
/* SCSI Concurrent Positioning Ranges VPD page: SBC-5 rev 1 or later */
rbuf[1] = 0xb9;
- put_unaligned_be16(64 + (int)cpr_log->nr_cpr * 32 - 4, &rbuf[3]);
+ put_unaligned_be16(64 + (int)cpr_log->nr_cpr * 32 - 4, &rbuf[2]);
for (i = 0; i < cpr_log->nr_cpr; i++, desc += 32) {
desc[0] = cpr_log->cpr[i].num;
{ XFER_PIO_0, "XFER_PIO_0" },
{ XFER_PIO_SLOW, "XFER_PIO_SLOW" }
};
-ata_bitfield_name_match(xfer,ata_xfer_names)
+ata_bitfield_name_search(xfer, ata_xfer_names)
/*
* ATA Port attributes
static const u16 pio_cmd_timings[5] = {
0xF7F4, 0x53F3, 0x13F1, 0x5131, 0x1131
};
- u32 reg, dummy;
+ u32 reg, __maybe_unused dummy;
struct ata_device *pair = ata_dev_pair(adev);
int mode = adev->pio_mode - XFER_PIO_0;
static const u32 mwdma_timings[3] = {
0x7F0FFFF3, 0x7F035352, 0x7F024241
};
- u32 reg, dummy;
+ u32 reg, __maybe_unused dummy;
int mode = adev->dma_mode;
rdmsr(ATAC_CH0D0_DMA + 2 * adev->devno, reg, dummy);
int i;
res_dma = platform_get_resource(dma_dev, IORESOURCE_MEM, 0);
if (!res_dma) {
+ put_device(&dma_dev->dev);
of_node_put(dma_node);
return -EINVAL;
}
cf_port->dma_base = (u64)devm_ioremap(&pdev->dev, res_dma->start,
resource_size(res_dma));
if (!cf_port->dma_base) {
+ put_device(&dma_dev->dev);
of_node_put(dma_node);
return -EINVAL;
}
irq = i;
irq_handler = octeon_cf_interrupt;
}
+ put_device(&dma_dev->dev);
}
of_node_put(dma_node);
}
/* Ensure that all references to the link object have been dropped. */
device_link_synchronize_removal();
- pm_runtime_release_supplier(link, true);
+ pm_runtime_release_supplier(link);
+ /*
+ * If supplier_preactivated is set, the link has been dropped between
+ * the pm_runtime_get_suppliers() and pm_runtime_put_suppliers() calls
+ * in __driver_probe_device(). In that case, drop the supplier's
+ * PM-runtime usage counter to remove the reference taken by
+ * pm_runtime_get_suppliers().
+ */
+ if (link->supplier_preactivated)
+ pm_runtime_put_noidle(link->supplier);
+
+ pm_request_idle(link->supplier);
put_device(link->consumer);
put_device(link->supplier);
return sysfs_emit(buf, "Not affected\n");
}
+ssize_t __weak cpu_show_mmio_stale_data(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "Not affected\n");
+}
+
+ssize_t __weak cpu_show_retbleed(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ return sysfs_emit(buf, "Not affected\n");
+}
+
static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL);
static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL);
static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL);
static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL);
static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL);
static DEVICE_ATTR(srbds, 0444, cpu_show_srbds, NULL);
+static DEVICE_ATTR(mmio_stale_data, 0444, cpu_show_mmio_stale_data, NULL);
+static DEVICE_ATTR(retbleed, 0444, cpu_show_retbleed, NULL);
static struct attribute *cpu_root_vulnerabilities_attrs[] = {
&dev_attr_meltdown.attr,
&dev_attr_tsx_async_abort.attr,
&dev_attr_itlb_multihit.attr,
&dev_attr_srbds.attr,
+ &dev_attr_mmio_stale_data.attr,
+ &dev_attr_retbleed.attr,
NULL
};
#include <linux/init.h>
#include <linux/memory.h>
#include <linux/of.h>
+#include <linux/backing-dev.h>
#include "base.h"
void __init driver_init(void)
{
/* These are the core pieces */
+ bdi_init(&noop_backing_dev_info);
devtmpfs_init();
devices_init();
buses_init();
if (kstrtoull(buf, 0, &pfn) < 0)
return -EINVAL;
pfn >>= PAGE_SHIFT;
- ret = memory_failure(pfn, 0);
+ ret = memory_failure(pfn, MF_SW_SIMULATED);
if (ret == -EOPNOTSUPP)
ret = 0;
return ret ? ret : count;
/**
* pm_runtime_release_supplier - Drop references to device link's supplier.
* @link: Target device link.
- * @check_idle: Whether or not to check if the supplier device is idle.
*
- * Drop all runtime PM references associated with @link to its supplier device
- * and if @check_idle is set, check if that device is idle (and so it can be
- * suspended).
+ * Drop all runtime PM references associated with @link to its supplier device.
*/
-void pm_runtime_release_supplier(struct device_link *link, bool check_idle)
+void pm_runtime_release_supplier(struct device_link *link)
{
struct device *supplier = link->supplier;
while (refcount_dec_not_one(&link->rpm_active) &&
atomic_read(&supplier->power.usage_count) > 0)
pm_runtime_put_noidle(supplier);
-
- if (check_idle)
- pm_request_idle(supplier);
}
static void __rpm_put_suppliers(struct device *dev, bool try_to_suspend)
struct device_link *link;
list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
- device_links_read_lock_held())
- pm_runtime_release_supplier(link, try_to_suspend);
+ device_links_read_lock_held()) {
+ pm_runtime_release_supplier(link);
+ if (try_to_suspend)
+ pm_request_idle(link->supplier);
+ }
}
static void rpm_put_suppliers(struct device *dev)
if (link->flags & DL_FLAG_PM_RUNTIME) {
link->supplier_preactivated = true;
pm_runtime_get_sync(link->supplier);
- refcount_inc(&link->rpm_active);
}
device_links_read_unlock(idx);
list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
device_links_read_lock_held())
if (link->supplier_preactivated) {
- bool put;
-
link->supplier_preactivated = false;
-
- spin_lock_irq(&dev->power.lock);
-
- put = pm_runtime_status_suspended(dev) &&
- refcount_dec_not_one(&link->rpm_active);
-
- spin_unlock_irq(&dev->power.lock);
-
- if (put)
- pm_runtime_put(link->supplier);
+ pm_runtime_put(link->supplier);
}
device_links_read_unlock(idx);
return;
pm_runtime_drop_link_count(link->consumer);
- pm_runtime_release_supplier(link, true);
+ pm_runtime_release_supplier(link);
+ pm_request_idle(link->supplier);
}
static bool pm_runtime_need_not_resume(struct device *dev)
struct regmap_irq_chip_data *d = irq_data_get_irq_chip_data(data);
struct regmap *map = d->map;
const struct regmap_irq *irq_data = irq_to_regmap_irq(d, data->hwirq);
+ unsigned int reg = irq_data->reg_offset / map->reg_stride;
unsigned int mask, type;
type = irq_data->type.type_falling_val | irq_data->type.type_rising_val;
* at the corresponding offset in regmap_irq_set_type().
*/
if (d->chip->type_in_mask && type)
- mask = d->type_buf[irq_data->reg_offset / map->reg_stride];
+ mask = d->type_buf[reg] & irq_data->mask;
else
mask = irq_data->mask;
if (d->chip->clear_on_unmask)
d->clear_status = true;
- d->mask_buf[irq_data->reg_offset / map->reg_stride] &= ~mask;
+ d->mask_buf[reg] &= ~mask;
}
static void regmap_irq_disable(struct irq_data *data)
subreg = &chip->sub_reg_offsets[b];
for (i = 0; i < subreg->num_regs; i++) {
unsigned int offset = subreg->offset[i];
+ unsigned int index = offset / map->reg_stride;
if (chip->not_fixed_stride)
ret = regmap_read(map,
else
ret = regmap_read(map,
chip->status_base + offset,
- &data->status_buf[offset]);
+ &data->status_buf[index]);
if (ret)
break;
*/
bool regmap_can_raw_write(struct regmap *map)
{
- return map->bus && map->bus->write && map->format.format_val &&
- map->format.format_reg;
+ return map->write && map->format.format_val && map->format.format_reg;
}
EXPORT_SYMBOL_GPL(regmap_can_raw_write);
size_t write_len;
int ret;
- if (!map->bus)
- return -EINVAL;
- if (!map->bus->write)
+ if (!map->write)
return -ENOTSUPP;
+
if (val_len % map->format.val_bytes)
return -EINVAL;
if (!IS_ALIGNED(reg, map->reg_stride))
* Some devices don't support bulk write, for them we have a series of
* single write operations.
*/
- if (!map->bus || !map->format.parse_inplace) {
+ if (!map->write || !map->format.parse_inplace) {
map->lock(map->lock_arg);
for (i = 0; i < val_count; i++) {
unsigned int ival;
size_t read_len;
int ret;
+ if (!map->read)
+ return -ENOTSUPP;
+
if (val_len % map->format.val_bytes)
return -EINVAL;
if (!IS_ALIGNED(reg, map->reg_stride))
if (val_count == 0)
return -EINVAL;
- if (map->format.parse_inplace && (vol || map->cache_type == REGCACHE_NONE)) {
+ if (map->read && map->format.parse_inplace && (vol || map->cache_type == REGCACHE_NONE)) {
ret = regmap_raw_read(map, reg, val, val_bytes * val_count);
if (ret != 0)
return ret;
module_param_named(max_ring_page_order, xen_blkif_max_ring_order, int, 0444);
MODULE_PARM_DESC(max_ring_page_order, "Maximum order of pages to be used for the shared ring");
+static bool __read_mostly xen_blkif_trusted = true;
+module_param_named(trusted, xen_blkif_trusted, bool, 0644);
+MODULE_PARM_DESC(trusted, "Is the backend trusted");
+
#define BLK_RING_SIZE(info) \
__CONST_RING_SIZE(blkif, XEN_PAGE_SIZE * (info)->nr_ring_pages)
unsigned int feature_discard:1;
unsigned int feature_secdiscard:1;
unsigned int feature_persistent:1;
+ unsigned int bounce:1;
unsigned int discard_granularity;
unsigned int discard_alignment;
/* Number of 4KB segments handled */
if (!gnt_list_entry)
goto out_of_memory;
- if (info->feature_persistent) {
- granted_page = alloc_page(GFP_NOIO);
+ if (info->bounce) {
+ granted_page = alloc_page(GFP_NOIO | __GFP_ZERO);
if (!granted_page) {
kfree(gnt_list_entry);
goto out_of_memory;
list_for_each_entry_safe(gnt_list_entry, n,
&rinfo->grants, node) {
list_del(&gnt_list_entry->node);
- if (info->feature_persistent)
+ if (info->bounce)
__free_page(gnt_list_entry->page);
kfree(gnt_list_entry);
i--;
/* Assign a gref to this page */
gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
BUG_ON(gnt_list_entry->gref == -ENOSPC);
- if (info->feature_persistent)
+ if (info->bounce)
grant_foreign_access(gnt_list_entry, info);
else {
/* Grant access to the GFN passed by the caller */
/* Assign a gref to this page */
gnt_list_entry->gref = gnttab_claim_grant_reference(gref_head);
BUG_ON(gnt_list_entry->gref == -ENOSPC);
- if (!info->feature_persistent) {
+ if (!info->bounce) {
struct page *indirect_page;
/* Fetch a pre-allocated page to use for indirect grefs */
.grant_idx = 0,
.segments = NULL,
.rinfo = rinfo,
- .need_copy = rq_data_dir(req) && info->feature_persistent,
+ .need_copy = rq_data_dir(req) && info->bounce,
};
/*
{
blk_queue_write_cache(info->rq, info->feature_flush ? true : false,
info->feature_fua ? true : false);
- pr_info("blkfront: %s: %s %s %s %s %s\n",
+ pr_info("blkfront: %s: %s %s %s %s %s %s %s\n",
info->gd->disk_name, flush_info(info),
"persistent grants:", info->feature_persistent ?
"enabled;" : "disabled;", "indirect descriptors:",
- info->max_indirect_segments ? "enabled;" : "disabled;");
+ info->max_indirect_segments ? "enabled;" : "disabled;",
+ "bounce buffer:", info->bounce ? "enabled" : "disabled;");
}
static int xen_translate_vdev(int vdevice, int *minor, unsigned int *offset)
if (!list_empty(&rinfo->indirect_pages)) {
struct page *indirect_page, *n;
- BUG_ON(info->feature_persistent);
+ BUG_ON(info->bounce);
list_for_each_entry_safe(indirect_page, n, &rinfo->indirect_pages, lru) {
list_del(&indirect_page->lru);
__free_page(indirect_page);
NULL);
rinfo->persistent_gnts_c--;
}
- if (info->feature_persistent)
+ if (info->bounce)
__free_page(persistent_gnt->page);
kfree(persistent_gnt);
}
for (j = 0; j < segs; j++) {
persistent_gnt = rinfo->shadow[i].grants_used[j];
gnttab_end_foreign_access(persistent_gnt->gref, NULL);
- if (info->feature_persistent)
+ if (info->bounce)
__free_page(persistent_gnt->page);
kfree(persistent_gnt);
}
data.s = s;
num_sg = s->num_sg;
- if (bret->operation == BLKIF_OP_READ && info->feature_persistent) {
+ if (bret->operation == BLKIF_OP_READ && info->bounce) {
for_each_sg(s->sg, sg, num_sg, i) {
BUG_ON(sg->offset + sg->length > PAGE_SIZE);
* Add the used indirect page back to the list of
* available pages for indirect grefs.
*/
- if (!info->feature_persistent) {
+ if (!info->bounce) {
indirect_page = s->indirect_grants[i]->page;
list_add(&indirect_page->lru, &rinfo->indirect_pages);
}
if (!info)
return -ENODEV;
+ /* Check if backend is trusted. */
+ info->bounce = !xen_blkif_trusted ||
+ !xenbus_read_unsigned(dev->nodename, "trusted", 1);
+
max_page_order = xenbus_read_unsigned(info->xbdev->otherend,
"max-ring-page-order", 0);
ring_page_order = min(xen_blkif_max_ring_order, max_page_order);
return;
/* No more blkif_request(). */
- blk_mq_stop_hw_queues(info->rq);
- blk_mark_disk_dead(info->gd);
- set_capacity(info->gd, 0);
+ if (info->rq && info->gd) {
+ blk_mq_stop_hw_queues(info->rq);
+ blk_mark_disk_dead(info->gd);
+ set_capacity(info->gd, 0);
+ }
for_each_rinfo(info, rinfo, i) {
/* No more gnttab callback work. */
if (err)
goto out_of_memory;
- if (!info->feature_persistent && info->max_indirect_segments) {
+ if (!info->bounce && info->max_indirect_segments) {
/*
- * We are using indirect descriptors but not persistent
- * grants, we need to allocate a set of pages that can be
+ * We are using indirect descriptors but don't have a bounce
+ * buffer, we need to allocate a set of pages that can be
* used for mapping indirect grefs
*/
int num = INDIRECT_GREFS(grants) * BLK_RING_SIZE(info);
BUG_ON(!list_empty(&rinfo->indirect_pages));
for (i = 0; i < num; i++) {
- struct page *indirect_page = alloc_page(GFP_KERNEL);
+ struct page *indirect_page = alloc_page(GFP_KERNEL |
+ __GFP_ZERO);
if (!indirect_page)
goto out_of_memory;
list_add(&indirect_page->lru, &rinfo->indirect_pages);
info->feature_persistent =
!!xenbus_read_unsigned(info->xbdev->otherend,
"feature-persistent", 0);
+ if (info->feature_persistent)
+ info->bounce = true;
indirect_segments = xenbus_read_unsigned(info->xbdev->otherend,
"feature-max-indirect-segments", 0);
dev_dbg(&xbdev->dev, "%s removed", xbdev->nodename);
- del_gendisk(info->gd);
+ if (info->gd)
+ del_gendisk(info->gd);
mutex_lock(&blkfront_mutex);
list_del(&info->info_list);
mutex_unlock(&blkfront_mutex);
blkif_free(info, 0);
- xlbd_release_minors(info->gd->first_minor, info->gd->minors);
- blk_cleanup_disk(info->gd);
- blk_mq_free_tag_set(&info->tag_set);
+ if (info->gd) {
+ xlbd_release_minors(info->gd->first_minor, info->gd->minors);
+ blk_cleanup_disk(info->gd);
+ blk_mq_free_tag_set(&info->tag_set);
+ }
kfree(info);
return 0;
struct blkfront_info *info;
bool need_schedule_work = false;
+ /*
+ * Note that when using bounce buffers but not persistent grants
+ * there's no need to run blkfront_delay_work because grants are
+ * revoked in blkif_completion or else an error is reported and the
+ * connection is closed.
+ */
+
mutex_lock(&blkfront_mutex);
list_for_each_entry(info, &info_list, info_list) {
int ret;
apb->prst = devm_reset_control_get_optional_exclusive(apb->dev, "prst");
- if (IS_ERR(apb->prst)) {
- dev_warn(apb->dev, "Couldn't get reset control line\n");
- return PTR_ERR(apb->prst);
- }
+ if (IS_ERR(apb->prst))
+ return dev_err_probe(apb->dev, PTR_ERR(apb->prst),
+ "Couldn't get reset control line\n");
ret = reset_control_deassert(apb->prst);
if (ret)
int ret;
apb->pclk = devm_clk_get(apb->dev, "pclk");
- if (IS_ERR(apb->pclk)) {
- dev_err(apb->dev, "Couldn't get APB clock descriptor\n");
- return PTR_ERR(apb->pclk);
- }
+ if (IS_ERR(apb->pclk))
+ return dev_err_probe(apb->dev, PTR_ERR(apb->pclk),
+ "Couldn't get APB clock descriptor\n");
ret = clk_prepare_enable(apb->pclk);
if (ret) {
int ret;
axi->arst = devm_reset_control_get_optional_exclusive(axi->dev, "arst");
- if (IS_ERR(axi->arst)) {
- dev_warn(axi->dev, "Couldn't get reset control line\n");
- return PTR_ERR(axi->arst);
- }
+ if (IS_ERR(axi->arst))
+ return dev_err_probe(axi->dev, PTR_ERR(axi->arst),
+ "Couldn't get reset control line\n");
ret = reset_control_deassert(axi->arst);
if (ret)
int ret;
axi->aclk = devm_clk_get(axi->dev, "aclk");
- if (IS_ERR(axi->aclk)) {
- dev_err(axi->dev, "Couldn't get AXI Interconnect clock\n");
- return PTR_ERR(axi->aclk);
- }
+ if (IS_ERR(axi->aclk))
+ return dev_err_probe(axi->dev, PTR_ERR(axi->aclk),
+ "Couldn't get AXI Interconnect clock\n");
ret = clk_prepare_enable(axi->aclk);
if (ret) {
static int fsl_mc_bus_remove(struct platform_device *pdev)
{
struct fsl_mc *mc = platform_get_drvdata(pdev);
+ struct fsl_mc_io *mc_io;
if (!fsl_mc_is_root_dprc(&mc->root_mc_bus_dev->dev))
return -EINVAL;
+ mc_io = mc->root_mc_bus_dev->mc_io;
fsl_mc_device_remove(mc->root_mc_bus_dev);
-
- fsl_destroy_mc_io(mc->root_mc_bus_dev->mc_io);
- mc->root_mc_bus_dev->mc_io = NULL;
+ fsl_destroy_mc_io(mc_io);
bus_unregister_notifier(&fsl_mc_bus_type, &fsl_mc_nb);
driver include crash and makedumpfile.
config RANDOM_TRUST_CPU
- bool "Trust the CPU manufacturer to initialize Linux's CRNG"
+ bool "Initialize RNG using CPU RNG instructions"
+ default y
depends on ARCH_RANDOM
- default n
help
- Assume that CPU manufacturer (e.g., Intel or AMD for RDSEED or
- RDRAND, IBM for the S390 and Power PC architectures) is trustworthy
- for the purposes of initializing Linux's CRNG. Since this is not
- something that can be independently audited, this amounts to trusting
- that CPU manufacturer (perhaps with the insistence or mandate
- of a Nation State's intelligence or law enforcement agencies)
- has not installed a hidden back door to compromise the CPU's
- random number generation facilities. This can also be configured
- at boot with "random.trust_cpu=on/off".
+ Initialize the RNG using random numbers supplied by the CPU's
+ RNG instructions (e.g. RDRAND), if supported and available. These
+ random numbers are never used directly, but are rather hashed into
+ the main input pool, and this happens regardless of whether or not
+ this option is enabled. Instead, this option controls whether the
+ they are credited and hence can initialize the RNG. Additionally,
+ other sources of randomness are always used, regardless of this
+ setting. Enabling this implies trusting that the CPU can supply high
+ quality and non-backdoored random numbers.
+
+ Say Y here unless you have reason to mistrust your CPU or believe
+ its RNG facilities may be faulty. This may also be configured at
+ boot time with "random.trust_cpu=on/off".
config RANDOM_TRUST_BOOTLOADER
- bool "Trust the bootloader to initialize Linux's CRNG"
- help
- Some bootloaders can provide entropy to increase the kernel's initial
- device randomness. Say Y here to assume the entropy provided by the
- booloader is trustworthy so it will be added to the kernel's entropy
- pool. Otherwise, say N here so it will be regarded as device input that
- only mixes the entropy pool. This can also be configured at boot with
- "random.trust_bootloader=on/off".
+ bool "Initialize RNG using bootloader-supplied seed"
+ default y
+ help
+ Initialize the RNG using a seed supplied by the bootloader or boot
+ environment (e.g. EFI or a bootloader-generated device tree). This
+ seed is not used directly, but is rather hashed into the main input
+ pool, and this happens regardless of whether or not this option is
+ enabled. Instead, this option controls whether the seed is credited
+ and hence can initialize the RNG. Additionally, other sources of
+ randomness are always used, regardless of this setting. Enabling
+ this implies trusting that the bootloader can supply high quality and
+ non-backdoored seeds.
+
+ Say Y here unless you have reason to mistrust your bootloader or
+ believe its RNG facilities may be faulty. This may also be configured
+ at boot time with "random.trust_bootloader=on/off".
endmenu
goto err_find;
}
+ virtio_device_ready(vdev);
+
/* we always have a pending entropy request */
request_entropy(vi);
static int __init lp_init(void)
{
- int i, err = 0;
+ int i, err;
if (parport_nr[0] == LP_PARPORT_OFF)
return 0;
/* Control how we warn userspace. */
static struct ratelimit_state urandom_warning =
- RATELIMIT_STATE_INIT("warn_urandom_randomness", HZ, 3);
+ RATELIMIT_STATE_INIT_FLAGS("urandom_warning", HZ, 3, RATELIMIT_MSG_ON_RELEASE);
static int ratelimit_disable __read_mostly =
IS_ENABLED(CONFIG_WARN_ALL_UNSEEDED_RANDOM);
module_param_named(ratelimit_disable, ratelimit_disable, int, 0644);
/*
* Immediately overwrite the ChaCha key at index 4 with random
- * bytes, in case userspace causes copy_to_user() below to sleep
+ * bytes, in case userspace causes copy_to_iter() below to sleep
* forever, so that we still retain forward secrecy in that case.
*/
crng_make_state(chacha_state, (u8 *)&chacha_state[4], CHACHA_KEY_SIZE);
if (orig < POOL_READY_BITS && new >= POOL_READY_BITS) {
crng_reseed(); /* Sets crng_init to CRNG_READY under base_crng.lock. */
- execute_in_process_context(crng_set_ready, &set_ready);
+ if (static_key_initialized)
+ execute_in_process_context(crng_set_ready, &set_ready);
wake_up_interruptible(&crng_init_wait);
kill_fasync(&fasync, SIGIO, POLL_IN);
pr_notice("crng init done\n");
*
**********************************************************************/
-static bool used_arch_random;
-static bool trust_cpu __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU);
-static bool trust_bootloader __ro_after_init = IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER);
+static bool trust_cpu __initdata = IS_ENABLED(CONFIG_RANDOM_TRUST_CPU);
+static bool trust_bootloader __initdata = IS_ENABLED(CONFIG_RANDOM_TRUST_BOOTLOADER);
static int __init parse_trust_cpu(char *arg)
{
return kstrtobool(arg, &trust_cpu);
int __init random_init(const char *command_line)
{
ktime_t now = ktime_get_real();
- unsigned int i, arch_bytes;
+ unsigned int i, arch_bits;
unsigned long entropy;
#if defined(LATENT_ENTROPY_PLUGIN)
_mix_pool_bytes(compiletime_seed, sizeof(compiletime_seed));
#endif
- for (i = 0, arch_bytes = BLAKE2S_BLOCK_SIZE;
+ for (i = 0, arch_bits = BLAKE2S_BLOCK_SIZE * 8;
i < BLAKE2S_BLOCK_SIZE; i += sizeof(entropy)) {
if (!arch_get_random_seed_long_early(&entropy) &&
!arch_get_random_long_early(&entropy)) {
entropy = random_get_entropy();
- arch_bytes -= sizeof(entropy);
+ arch_bits -= sizeof(entropy) * 8;
}
_mix_pool_bytes(&entropy, sizeof(entropy));
}
_mix_pool_bytes(command_line, strlen(command_line));
add_latent_entropy();
+ /*
+ * If we were initialized by the bootloader before jump labels are
+ * initialized, then we should enable the static branch here, where
+ * it's guaranteed that jump labels have been initialized.
+ */
+ if (!static_branch_likely(&crng_is_ready) && crng_init >= CRNG_READY)
+ crng_set_ready(NULL);
+
if (crng_ready())
crng_reseed();
else if (trust_cpu)
- credit_init_bits(arch_bytes * 8);
- used_arch_random = arch_bytes * 8 >= POOL_READY_BITS;
+ _credit_init_bits(arch_bits);
WARN_ON(register_pm_notifier(&pm_notifier));
return 0;
}
-/*
- * Returns whether arch randomness has been mixed into the initial
- * state of the RNG, regardless of whether or not that randomness
- * was credited. Knowing this is only good for a very limited set
- * of uses, such as early init printk pointer obfuscation.
- */
-bool rng_has_arch_random(void)
-{
- return used_arch_random;
-}
-
/*
* Add device- or boot-specific data to the input pool to help
* initialize it.
* Handle random seed passed by bootloader, and credit it if
* CONFIG_RANDOM_TRUST_BOOTLOADER is set.
*/
-void __cold add_bootloader_randomness(const void *buf, size_t len)
+void __init add_bootloader_randomness(const void *buf, size_t len)
{
mix_pool_bytes(buf, len);
if (trust_bootloader)
credit_init_bits(len * 8);
}
-EXPORT_SYMBOL_GPL(add_bootloader_randomness);
#if IS_ENABLED(CONFIG_VMGENID)
static BLOCKING_NOTIFIER_HEAD(vmfork_chain);
if (new_count & MIX_INFLIGHT)
return;
- if (new_count < 64 && !time_is_before_jiffies(fast_pool->last + HZ))
+ if (new_count < 1024 && !time_is_before_jiffies(fast_pool->last + HZ))
return;
if (unlikely(!fast_pool->mix.func))
*/
static void __cold try_to_generate_entropy(void)
{
- enum { NUM_TRIAL_SAMPLES = 8192, MAX_SAMPLES_PER_BIT = 32 };
+ enum { NUM_TRIAL_SAMPLES = 8192, MAX_SAMPLES_PER_BIT = HZ / 30 };
struct entropy_timer_state stack;
unsigned int i, num_different = 0;
unsigned long last = random_get_entropy();
#include <linux/export.h>
#include <linux/gfp.h>
+struct devm_clk_state {
+ struct clk *clk;
+ void (*exit)(struct clk *clk);
+};
+
static void devm_clk_release(struct device *dev, void *res)
{
- clk_put(*(struct clk **)res);
+ struct devm_clk_state *state = res;
+
+ if (state->exit)
+ state->exit(state->clk);
+
+ clk_put(state->clk);
}
-struct clk *devm_clk_get(struct device *dev, const char *id)
+static struct clk *__devm_clk_get(struct device *dev, const char *id,
+ struct clk *(*get)(struct device *dev, const char *id),
+ int (*init)(struct clk *clk),
+ void (*exit)(struct clk *clk))
{
- struct clk **ptr, *clk;
+ struct devm_clk_state *state;
+ struct clk *clk;
+ int ret;
- ptr = devres_alloc(devm_clk_release, sizeof(*ptr), GFP_KERNEL);
- if (!ptr)
+ state = devres_alloc(devm_clk_release, sizeof(*state), GFP_KERNEL);
+ if (!state)
return ERR_PTR(-ENOMEM);
- clk = clk_get(dev, id);
- if (!IS_ERR(clk)) {
- *ptr = clk;
- devres_add(dev, ptr);
- } else {
- devres_free(ptr);
+ clk = get(dev, id);
+ if (IS_ERR(clk)) {
+ ret = PTR_ERR(clk);
+ goto err_clk_get;
}
+ if (init) {
+ ret = init(clk);
+ if (ret)
+ goto err_clk_init;
+ }
+
+ state->clk = clk;
+ state->exit = exit;
+
+ devres_add(dev, state);
+
return clk;
+
+err_clk_init:
+
+ clk_put(clk);
+err_clk_get:
+
+ devres_free(state);
+ return ERR_PTR(ret);
+}
+
+struct clk *devm_clk_get(struct device *dev, const char *id)
+{
+ return __devm_clk_get(dev, id, clk_get, NULL, NULL);
}
EXPORT_SYMBOL(devm_clk_get);
-struct clk *devm_clk_get_optional(struct device *dev, const char *id)
+struct clk *devm_clk_get_prepared(struct device *dev, const char *id)
{
- struct clk *clk = devm_clk_get(dev, id);
+ return __devm_clk_get(dev, id, clk_get, clk_prepare, clk_unprepare);
+}
+EXPORT_SYMBOL_GPL(devm_clk_get_prepared);
- if (clk == ERR_PTR(-ENOENT))
- return NULL;
+struct clk *devm_clk_get_enabled(struct device *dev, const char *id)
+{
+ return __devm_clk_get(dev, id, clk_get,
+ clk_prepare_enable, clk_disable_unprepare);
+}
+EXPORT_SYMBOL_GPL(devm_clk_get_enabled);
- return clk;
+struct clk *devm_clk_get_optional(struct device *dev, const char *id)
+{
+ return __devm_clk_get(dev, id, clk_get_optional, NULL, NULL);
}
EXPORT_SYMBOL(devm_clk_get_optional);
+struct clk *devm_clk_get_optional_prepared(struct device *dev, const char *id)
+{
+ return __devm_clk_get(dev, id, clk_get_optional,
+ clk_prepare, clk_unprepare);
+}
+EXPORT_SYMBOL_GPL(devm_clk_get_optional_prepared);
+
+struct clk *devm_clk_get_optional_enabled(struct device *dev, const char *id)
+{
+ return __devm_clk_get(dev, id, clk_get_optional,
+ clk_prepare_enable, clk_disable_unprepare);
+}
+EXPORT_SYMBOL_GPL(devm_clk_get_optional_enabled);
+
struct clk_bulk_devres {
struct clk_bulk_data *clks;
int num_clks;
static struct clk_hw *
__clk_hw_register_fixed_factor(struct device *dev, struct device_node *np,
- const char *name, const char *parent_name, int index,
+ const char *name, const char *parent_name,
+ const struct clk_hw *parent_hw, int index,
unsigned long flags, unsigned int mult, unsigned int div,
bool devm)
{
init.flags = flags;
if (parent_name)
init.parent_names = &parent_name;
+ else if (parent_hw)
+ init.parent_hws = &parent_hw;
else
init.parent_data = &pdata;
init.num_parents = 1;
const char *name, unsigned int index, unsigned long flags,
unsigned int mult, unsigned int div)
{
- return __clk_hw_register_fixed_factor(dev, NULL, name, NULL, index,
+ return __clk_hw_register_fixed_factor(dev, NULL, name, NULL, NULL, index,
flags, mult, div, true);
}
EXPORT_SYMBOL_GPL(devm_clk_hw_register_fixed_factor_index);
+/**
+ * devm_clk_hw_register_fixed_factor_parent_hw - Register a fixed factor clock with
+ * pointer to parent clock
+ * @dev: device that is registering this clock
+ * @name: name of this clock
+ * @parent_hw: pointer to parent clk
+ * @flags: fixed factor flags
+ * @mult: multiplier
+ * @div: divider
+ *
+ * Return: Pointer to fixed factor clk_hw structure that was registered or
+ * an error pointer.
+ */
+struct clk_hw *devm_clk_hw_register_fixed_factor_parent_hw(struct device *dev,
+ const char *name, const struct clk_hw *parent_hw,
+ unsigned long flags, unsigned int mult, unsigned int div)
+{
+ return __clk_hw_register_fixed_factor(dev, NULL, name, NULL, parent_hw,
+ -1, flags, mult, div, true);
+}
+EXPORT_SYMBOL_GPL(devm_clk_hw_register_fixed_factor_parent_hw);
+
+struct clk_hw *clk_hw_register_fixed_factor_parent_hw(struct device *dev,
+ const char *name, const struct clk_hw *parent_hw,
+ unsigned long flags, unsigned int mult, unsigned int div)
+{
+ return __clk_hw_register_fixed_factor(dev, NULL, name, NULL,
+ parent_hw, -1, flags, mult, div,
+ false);
+}
+EXPORT_SYMBOL_GPL(clk_hw_register_fixed_factor_parent_hw);
+
struct clk_hw *clk_hw_register_fixed_factor(struct device *dev,
const char *name, const char *parent_name, unsigned long flags,
unsigned int mult, unsigned int div)
{
- return __clk_hw_register_fixed_factor(dev, NULL, name, parent_name, -1,
+ return __clk_hw_register_fixed_factor(dev, NULL, name, parent_name, NULL, -1,
flags, mult, div, false);
}
EXPORT_SYMBOL_GPL(clk_hw_register_fixed_factor);
const char *name, const char *parent_name, unsigned long flags,
unsigned int mult, unsigned int div)
{
- return __clk_hw_register_fixed_factor(dev, NULL, name, parent_name, -1,
+ return __clk_hw_register_fixed_factor(dev, NULL, name, parent_name, NULL, -1,
flags, mult, div, true);
}
EXPORT_SYMBOL_GPL(devm_clk_hw_register_fixed_factor);
#ifdef CONFIG_OF
-static const struct of_device_id set_rate_parent_matches[] = {
- { .compatible = "allwinner,sun4i-a10-pll3-2x-clk" },
- { /* Sentinel */ },
-};
-
static struct clk_hw *_of_fixed_factor_clk_setup(struct device_node *node)
{
struct clk_hw *hw;
const char *clk_name = node->name;
- unsigned long flags = 0;
u32 div, mult;
int ret;
of_property_read_string(node, "clock-output-names", &clk_name);
- if (of_match_node(set_rate_parent_matches, node))
- flags |= CLK_SET_RATE_PARENT;
-
- hw = __clk_hw_register_fixed_factor(NULL, node, clk_name, NULL, 0,
- flags, mult, div, false);
+ hw = __clk_hw_register_fixed_factor(NULL, node, clk_name, NULL, NULL, 0,
+ 0, mult, div, false);
if (IS_ERR(hw)) {
/*
* Clear OF_POPULATED flag so that clock registration can be
hw_data->hws[i] =
devm_clk_hw_register_gate(dev, clk_gate_desc[idx].name,
- "lan966x", 0, base,
+ "lan966x", 0, gate_base,
clk_gate_desc[idx].bit_idx,
0, &clk_gate_lock);
}
EXPORT_SYMBOL_GPL(devm_clk_hw_register);
-static int devm_clk_match(struct device *dev, void *res, void *data)
-{
- struct clk *c = res;
- if (WARN_ON(!c))
- return 0;
- return c == data;
-}
-
-static int devm_clk_hw_match(struct device *dev, void *res, void *data)
-{
- struct clk_hw *hw = res;
-
- if (WARN_ON(!hw))
- return 0;
- return hw == data;
-}
-
-/**
- * devm_clk_unregister - resource managed clk_unregister()
- * @dev: device that is unregistering the clock data
- * @clk: clock to unregister
- *
- * Deallocate a clock allocated with devm_clk_register(). Normally
- * this function will not need to be called and the resource management
- * code will ensure that the resource is freed.
- */
-void devm_clk_unregister(struct device *dev, struct clk *clk)
-{
- WARN_ON(devres_release(dev, devm_clk_unregister_cb, devm_clk_match, clk));
-}
-EXPORT_SYMBOL_GPL(devm_clk_unregister);
-
-/**
- * devm_clk_hw_unregister - resource managed clk_hw_unregister()
- * @dev: device that is unregistering the hardware-specific clock data
- * @hw: link to hardware-specific clock data
- *
- * Unregister a clk_hw registered with devm_clk_hw_register(). Normally
- * this function will not need to be called and the resource management
- * code will ensure that the resource is freed.
- */
-void devm_clk_hw_unregister(struct device *dev, struct clk_hw *hw)
-{
- WARN_ON(devres_release(dev, devm_clk_hw_unregister_cb, devm_clk_hw_match,
- hw));
-}
-EXPORT_SYMBOL_GPL(devm_clk_hw_unregister);
-
static void devm_clk_release(struct device *dev, void *res)
{
clk_put(*(struct clk **)res);
* Fout = Fvco / (rdiv * odiv)
*/
static const struct imx_fracn_gppll_rate_table fracn_tbl[] = {
- PLL_FRACN_GP(650000000U, 81, 0, 0, 0, 3),
- PLL_FRACN_GP(594000000U, 198, 0, 0, 0, 8),
- PLL_FRACN_GP(560000000U, 70, 0, 0, 0, 3),
- PLL_FRACN_GP(400000000U, 50, 0, 0, 0, 3),
+ PLL_FRACN_GP(650000000U, 81, 0, 1, 0, 3),
+ PLL_FRACN_GP(594000000U, 198, 0, 1, 0, 8),
+ PLL_FRACN_GP(560000000U, 70, 0, 1, 0, 3),
+ PLL_FRACN_GP(498000000U, 83, 0, 1, 0, 4),
+ PLL_FRACN_GP(484000000U, 121, 0, 1, 0, 6),
+ PLL_FRACN_GP(445333333U, 167, 0, 1, 0, 9),
+ PLL_FRACN_GP(400000000U, 50, 0, 1, 0, 3),
PLL_FRACN_GP(393216000U, 81, 92, 100, 0, 5)
};
mfi = FIELD_GET(PLL_MFI_MASK, pll_div);
rdiv = FIELD_GET(PLL_RDIV_MASK, pll_div);
- rdiv = rdiv + 1;
odiv = FIELD_GET(PLL_ODIV_MASK, pll_div);
- switch (odiv) {
- case 0:
- odiv = 2;
- break;
- case 1:
- odiv = 3;
- break;
- default:
- break;
- }
/*
* Sometimes, the recalculated rate has deviation due to
if (rate)
return (unsigned long)rate;
+ if (!rdiv)
+ rdiv = rdiv + 1;
+
+ switch (odiv) {
+ case 0:
+ odiv = 2;
+ break;
+ case 1:
+ odiv = 3;
+ break;
+ default:
+ break;
+ }
+
/* Fvco = Fref * (MFI + MFN / MFD) */
fvco = fvco * mfi * mfd + fvco * mfn;
do_div(fvco, mfd * rdiv * odiv);
{ IMX93_CLK_A55_GATE, "a55", "a55_root", 0x8000, },
/* M33 critical clk for system run */
{ IMX93_CLK_CM33_GATE, "cm33", "m33_root", 0x8040, CLK_IS_CRITICAL },
- { IMX93_CLK_ADC1_GATE, "adc1", "osc_24m", 0x82c0, },
+ { IMX93_CLK_ADC1_GATE, "adc1", "adc_root", 0x82c0, },
{ IMX93_CLK_WDOG1_GATE, "wdog1", "osc_24m", 0x8300, },
{ IMX93_CLK_WDOG2_GATE, "wdog2", "osc_24m", 0x8340, },
{ IMX93_CLK_WDOG3_GATE, "wdog3", "osc_24m", 0x8380, },
{ IMX93_CLK_SEMA2_GATE, "sema2", "bus_wakeup_root", 0x8480, },
{ IMX93_CLK_MU_A_GATE, "mu_a", "bus_aon_root", 0x84c0, },
{ IMX93_CLK_MU_B_GATE, "mu_b", "bus_aon_root", 0x8500, },
- { IMX93_CLK_EDMA1_GATE, "edma1", "wakeup_axi_root", 0x8540, },
+ { IMX93_CLK_EDMA1_GATE, "edma1", "m33_root", 0x8540, },
{ IMX93_CLK_EDMA2_GATE, "edma2", "wakeup_axi_root", 0x8580, },
{ IMX93_CLK_FLEXSPI1_GATE, "flexspi", "flexspi_root", 0x8640, },
{ IMX93_CLK_GPIO1_GATE, "gpio1", "m33_root", 0x8880, },
{ IMX93_CLK_LCDIF_GATE, "lcdif", "media_apb_root", 0x9640, },
{ IMX93_CLK_PXP_GATE, "pxp", "media_apb_root", 0x9680, },
{ IMX93_CLK_ISI_GATE, "isi", "media_apb_root", 0x96c0, },
- { IMX93_CLK_NIC_MEDIA_GATE, "nic_media", "media_apb_root", 0x9700, },
+ { IMX93_CLK_NIC_MEDIA_GATE, "nic_media", "media_axi_root", 0x9700, },
{ IMX93_CLK_USB_CONTROLLER_GATE, "usb_controller", "hsio_root", 0x9a00, },
{ IMX93_CLK_USB_TEST_60M_GATE, "usb_test_60m", "hsio_usb_test_60m_root", 0x9a40, },
{ IMX93_CLK_HSIO_TROUT_24M_GATE, "hsio_trout_24m", "osc_24m", 0x9a80, },
GATE_ETH(CLK_ETHSYS_CRYPTO, "crypto_clk", "ethif_sel", 29),
};
+static u16 rst_ofs[] = { 0x34, };
+
+static const struct mtk_clk_rst_desc clk_rst_desc = {
+ .version = MTK_RST_SIMPLE,
+ .rst_bank_ofs = rst_ofs,
+ .rst_bank_nr = ARRAY_SIZE(rst_ofs),
+};
+
static const struct of_device_id of_match_clk_mt2701_eth[] = {
{ .compatible = "mediatek,mt2701-ethsys", },
{}
"could not register clock provider: %s: %d\n",
pdev->name, r);
- mtk_register_reset_controller(node, 1, 0x34);
+ mtk_register_reset_controller_with_dev(&pdev->dev, &clk_rst_desc);
return r;
}
GATE_G3D(CLK_G3DSYS_CORE, "g3d_core", "mfg_sel", 0),
};
+static u16 rst_ofs[] = { 0xc, };
+
+static const struct mtk_clk_rst_desc clk_rst_desc = {
+ .version = MTK_RST_SIMPLE,
+ .rst_bank_ofs = rst_ofs,
+ .rst_bank_nr = ARRAY_SIZE(rst_ofs),
+};
+
static int clk_mt2701_g3dsys_init(struct platform_device *pdev)
{
struct clk_hw_onecell_data *clk_data;
"could not register clock provider: %s: %d\n",
pdev->name, r);
- mtk_register_reset_controller(node, 1, 0xc);
+ mtk_register_reset_controller_with_dev(&pdev->dev, &clk_rst_desc);
return r;
}
GATE_HIF(CLK_HIFSYS_PCIE2, "pcie2_clk", "ethpll_500m_ck", 26),
};
+static u16 rst_ofs[] = { 0x34, };
+
+static const struct mtk_clk_rst_desc clk_rst_desc = {
+ .version = MTK_RST_SIMPLE,
+ .rst_bank_ofs = rst_ofs,
+ .rst_bank_nr = ARRAY_SIZE(rst_ofs),
+};
+
static const struct of_device_id of_match_clk_mt2701_hif[] = {
{ .compatible = "mediatek,mt2701-hifsys", },
{}
return r;
}
- mtk_register_reset_controller(node, 1, 0x34);
+ mtk_register_reset_controller_with_dev(&pdev->dev, &clk_rst_desc);
return 0;
}
FACTOR(CLK_INFRA_CLK_13M, "clk13m", "clk26m", 1, 2),
};
+static u16 infrasys_rst_ofs[] = { 0x30, 0x34, };
+static u16 pericfg_rst_ofs[] = { 0x0, 0x4, };
+
+static const struct mtk_clk_rst_desc clk_rst_desc[] = {
+ /* infrasys */
+ {
+ .version = MTK_RST_SIMPLE,
+ .rst_bank_ofs = infrasys_rst_ofs,
+ .rst_bank_nr = ARRAY_SIZE(infrasys_rst_ofs),
+ },
+ /* pericfg */
+ {
+ .version = MTK_RST_SIMPLE,
+ .rst_bank_ofs = pericfg_rst_ofs,
+ .rst_bank_nr = ARRAY_SIZE(pericfg_rst_ofs),
+ },
+};
+
static struct clk_hw_onecell_data *infra_clk_data;
static void __init mtk_infrasys_init_early(struct device_node *node)
if (r)
return r;
- mtk_register_reset_controller(node, 2, 0x30);
+ mtk_register_reset_controller_with_dev(&pdev->dev, &clk_rst_desc[0]);
return 0;
}
if (r)
return r;
- mtk_register_reset_controller(node, 2, 0x0);
+ mtk_register_reset_controller_with_dev(&pdev->dev, &clk_rst_desc[1]);
return 0;
}
0, 31, 0x0300, 4, 0, 0, 0, 0x0304, 0),
};
+static u16 infrasys_rst_ofs[] = { 0x30, 0x34, };
+static u16 pericfg_rst_ofs[] = { 0x0, 0x4, };
+
+static const struct mtk_clk_rst_desc clk_rst_desc[] = {
+ /* infra */
+ {
+ .version = MTK_RST_SIMPLE,
+ .rst_bank_ofs = infrasys_rst_ofs,
+ .rst_bank_nr = ARRAY_SIZE(infrasys_rst_ofs),
+ },
+ /* peri */
+ {
+ .version = MTK_RST_SIMPLE,
+ .rst_bank_ofs = pericfg_rst_ofs,
+ .rst_bank_nr = ARRAY_SIZE(pericfg_rst_ofs),
+ },
+};
+
static int clk_mt2712_apmixed_probe(struct platform_device *pdev)
{
struct clk_hw_onecell_data *clk_data;
pr_err("%s(): could not register clock provider: %d\n",
__func__, r);
- mtk_register_reset_controller(node, 2, 0x30);
+ mtk_register_reset_controller_with_dev(&pdev->dev, &clk_rst_desc[0]);
return r;
}
pr_err("%s(): could not register clock provider: %d\n",
__func__, r);
- mtk_register_reset_controller(node, 2, 0);
+ mtk_register_reset_controller_with_dev(&pdev->dev, &clk_rst_desc[1]);
return r;
}
"ssusb_cdr_fb", 5),
};
+static u16 rst_ofs[] = { 0x34, };
+
+static const struct mtk_clk_rst_desc clk_rst_desc = {
+ .version = MTK_RST_SIMPLE,
+ .rst_bank_ofs = rst_ofs,
+ .rst_bank_nr = ARRAY_SIZE(rst_ofs),
+};
+
static int clk_mt7622_ethsys_init(struct platform_device *pdev)
{
struct clk_hw_onecell_data *clk_data;
"could not register clock provider: %s: %d\n",
pdev->name, r);
- mtk_register_reset_controller(node, 1, 0x34);
+ mtk_register_reset_controller_with_dev(&pdev->dev, &clk_rst_desc);
return r;
}
GATE_PCIE(CLK_SATA_PM_EN, "sata_pm_en", "univpll2_d4", 30),
};
+static u16 rst_ofs[] = { 0x34, };
+
+static const struct mtk_clk_rst_desc clk_rst_desc = {
+ .version = MTK_RST_SIMPLE,
+ .rst_bank_ofs = rst_ofs,
+ .rst_bank_nr = ARRAY_SIZE(rst_ofs),
+};
+
static int clk_mt7622_ssusbsys_init(struct platform_device *pdev)
{
struct clk_hw_onecell_data *clk_data;
"could not register clock provider: %s: %d\n",
pdev->name, r);
- mtk_register_reset_controller(node, 1, 0x34);
+ mtk_register_reset_controller_with_dev(&pdev->dev, &clk_rst_desc);
return r;
}
"could not register clock provider: %s: %d\n",
pdev->name, r);
- mtk_register_reset_controller(node, 1, 0x34);
+ mtk_register_reset_controller_with_dev(&pdev->dev, &clk_rst_desc);
return r;
}
MUX(CLK_PERIBUS_SEL, "peribus_ck_sel", peribus_ck_parents, 0x05C, 0, 1),
};
+static u16 infrasys_rst_ofs[] = { 0x30, };
+static u16 pericfg_rst_ofs[] = { 0x0, 0x4, };
+
+static const struct mtk_clk_rst_desc clk_rst_desc[] = {
+ /* infrasys */
+ {
+ .version = MTK_RST_SIMPLE,
+ .rst_bank_ofs = infrasys_rst_ofs,
+ .rst_bank_nr = ARRAY_SIZE(infrasys_rst_ofs),
+ },
+ /* pericfg */
+ {
+ .version = MTK_RST_SIMPLE,
+ .rst_bank_ofs = pericfg_rst_ofs,
+ .rst_bank_nr = ARRAY_SIZE(pericfg_rst_ofs),
+ },
+};
+
static int mtk_topckgen_init(struct platform_device *pdev)
{
struct clk_hw_onecell_data *clk_data;
if (r)
return r;
- mtk_register_reset_controller(node, 1, 0x30);
+ mtk_register_reset_controller_with_dev(&pdev->dev, &clk_rst_desc[0]);
return 0;
}
clk_prepare_enable(clk_data->hws[CLK_PERI_UART0_PD]->clk);
- mtk_register_reset_controller(node, 2, 0x0);
+ mtk_register_reset_controller_with_dev(&pdev->dev, &clk_rst_desc[1]);
return 0;
}
}
};
+static u16 rst_ofs[] = { 0x34, };
+
+static const struct mtk_clk_rst_desc clk_rst_desc = {
+ .version = MTK_RST_SIMPLE,
+ .rst_bank_ofs = rst_ofs,
+ .rst_bank_nr = ARRAY_SIZE(rst_ofs),
+};
+
static int clk_mt7629_ethsys_init(struct platform_device *pdev)
{
struct clk_hw_onecell_data *clk_data;
"could not register clock provider: %s: %d\n",
pdev->name, r);
- mtk_register_reset_controller(node, 1, 0x34);
+ mtk_register_reset_controller_with_dev(&pdev->dev, &clk_rst_desc);
return r;
}
GATE_PCIE(CLK_PCIE_P0_PIPE_EN, "pcie_p0_pipe_en", "pcie0_pipe_en", 23),
};
+static u16 rst_ofs[] = { 0x34, };
+
+static const struct mtk_clk_rst_desc clk_rst_desc = {
+ .version = MTK_RST_SIMPLE,
+ .rst_bank_ofs = rst_ofs,
+ .rst_bank_nr = ARRAY_SIZE(rst_ofs),
+};
+
static int clk_mt7629_ssusbsys_init(struct platform_device *pdev)
{
struct clk_hw_onecell_data *clk_data;
"could not register clock provider: %s: %d\n",
pdev->name, r);
- mtk_register_reset_controller(node, 1, 0x34);
+ mtk_register_reset_controller_with_dev(&pdev->dev, &clk_rst_desc);
return r;
}
"could not register clock provider: %s: %d\n",
pdev->name, r);
- mtk_register_reset_controller(node, 1, 0x34);
+ mtk_register_reset_controller_with_dev(&pdev->dev, &clk_rst_desc);
return r;
}
MUX(CLK_PERI_UART3_SEL, "uart3_ck_sel", uart_ck_sel_parents, 0x40c, 3, 1),
};
+static u16 infrasys_rst_ofs[] = { 0x30, 0x34, };
+static u16 pericfg_rst_ofs[] = { 0x0, 0x4, };
+
+static const struct mtk_clk_rst_desc clk_rst_desc[] = {
+ /* infrasys */
+ {
+ .version = MTK_RST_SIMPLE,
+ .rst_bank_ofs = infrasys_rst_ofs,
+ .rst_bank_nr = ARRAY_SIZE(infrasys_rst_ofs),
+ },
+ /* pericfg */
+ {
+ .version = MTK_RST_SIMPLE,
+ .rst_bank_ofs = pericfg_rst_ofs,
+ .rst_bank_nr = ARRAY_SIZE(pericfg_rst_ofs),
+ }
+};
+
static void __init mtk_topckgen_init(struct device_node *node)
{
struct clk_hw_onecell_data *clk_data;
pr_err("%s(): could not register clock provider: %d\n",
__func__, r);
- mtk_register_reset_controller(node, 2, 0x30);
+ mtk_register_reset_controller(node, &clk_rst_desc[0]);
}
CLK_OF_DECLARE(mtk_infrasys, "mediatek,mt8135-infracfg", mtk_infrasys_init);
pr_err("%s(): could not register clock provider: %d\n",
__func__, r);
- mtk_register_reset_controller(node, 2, 0);
+ mtk_register_reset_controller(node, &clk_rst_desc[1]);
}
CLK_OF_DECLARE(mtk_pericfg, "mediatek,mt8135-pericfg", mtk_pericfg_init);
GATE_VENCLT(CLK_VENCLT_CKE1, "venclt_cke1", "venclt_sel", 4),
};
+static u16 infrasys_rst_ofs[] = { 0x30, 0x34, };
+static u16 pericfg_rst_ofs[] = { 0x0, 0x4, };
+
+static const struct mtk_clk_rst_desc clk_rst_desc[] = {
+ /* infrasys */
+ {
+ .version = MTK_RST_SIMPLE,
+ .rst_bank_ofs = infrasys_rst_ofs,
+ .rst_bank_nr = ARRAY_SIZE(infrasys_rst_ofs),
+ },
+ /* pericfg */
+ {
+ .version = MTK_RST_SIMPLE,
+ .rst_bank_ofs = pericfg_rst_ofs,
+ .rst_bank_nr = ARRAY_SIZE(pericfg_rst_ofs),
+ }
+};
+
static struct clk_hw_onecell_data *mt8173_top_clk_data __initdata;
static struct clk_hw_onecell_data *mt8173_pll_clk_data __initdata;
pr_err("%s(): could not register clock provider: %d\n",
__func__, r);
- mtk_register_reset_controller(node, 2, 0x30);
+ mtk_register_reset_controller(node, &clk_rst_desc[0]);
}
CLK_OF_DECLARE(mtk_infrasys, "mediatek,mt8173-infracfg", mtk_infrasys_init);
pr_err("%s(): could not register clock provider: %d\n",
__func__, r);
- mtk_register_reset_controller(node, 2, 0);
+ mtk_register_reset_controller(node, &clk_rst_desc[1]);
}
CLK_OF_DECLARE(mtk_pericfg, "mediatek,mt8173-pericfg", mtk_pericfg_init);
#include <dt-bindings/clock/mt8183-clk.h>
-/* Infra global controller reset set register */
-#define INFRA_RST0_SET_OFFSET 0x120
-
static DEFINE_SPINLOCK(mt8183_clk_lock);
static const struct mtk_fixed_clk top_fixed_clks[] = {
0, 0, 32, 8, 0x02B4, 1, 0x02BC, 0x0014, 1, 0x02B8, 0, 0x02B4),
};
+static u16 infra_rst_ofs[] = {
+ INFRA_RST0_SET_OFFSET,
+ INFRA_RST1_SET_OFFSET,
+ INFRA_RST2_SET_OFFSET,
+ INFRA_RST3_SET_OFFSET,
+};
+
+static const struct mtk_clk_rst_desc clk_rst_desc = {
+ .version = MTK_RST_SET_CLR,
+ .rst_bank_ofs = infra_rst_ofs,
+ .rst_bank_nr = ARRAY_SIZE(infra_rst_ofs),
+};
+
static int clk_mt8183_apmixed_probe(struct platform_device *pdev)
{
struct clk_hw_onecell_data *clk_data;
return r;
}
- mtk_register_reset_controller_set_clr(node, 4, INFRA_RST0_SET_OFFSET);
+ mtk_register_reset_controller_with_dev(&pdev->dev, &clk_rst_desc);
return r;
}
#include <linux/clk-provider.h>
#include <linux/platform_device.h>
#include <dt-bindings/clock/mt8186-clk.h>
+#include <dt-bindings/reset/mt8186-resets.h>
#include "clk-gate.h"
#include "clk-mtk.h"
GATE_INFRA_AO3(CLK_INFRA_AO_FLASHIF_66M, "infra_ao_flashif_66m", "top_axi", 29),
};
+static u16 infra_ao_rst_ofs[] = {
+ INFRA_RST0_SET_OFFSET,
+ INFRA_RST1_SET_OFFSET,
+ INFRA_RST2_SET_OFFSET,
+ INFRA_RST3_SET_OFFSET,
+ INFRA_RST4_SET_OFFSET,
+};
+
+static u16 infra_ao_idx_map[] = {
+ [MT8186_INFRA_THERMAL_CTRL_RST] = 0 * RST_NR_PER_BANK + 0,
+ [MT8186_INFRA_PTP_CTRL_RST] = 1 * RST_NR_PER_BANK + 0,
+};
+
+static struct mtk_clk_rst_desc infra_ao_rst_desc = {
+ .version = MTK_RST_SET_CLR,
+ .rst_bank_ofs = infra_ao_rst_ofs,
+ .rst_bank_nr = ARRAY_SIZE(infra_ao_rst_ofs),
+ .rst_idx_map = infra_ao_idx_map,
+ .rst_idx_map_nr = ARRAY_SIZE(infra_ao_idx_map),
+};
+
static const struct mtk_clk_desc infra_ao_desc = {
.clks = infra_ao_clks,
.num_clks = ARRAY_SIZE(infra_ao_clks),
+ .rst_desc = &infra_ao_rst_desc,
};
static const struct of_device_id of_match_clk_mt8186_infra_ao[] = {
#include <dt-bindings/clock/mt8192-clk.h>
-static const struct mtk_gate_regs msdc_cg_regs = {
- .set_ofs = 0xb4,
- .clr_ofs = 0xb4,
- .sta_ofs = 0xb4,
-};
-
static const struct mtk_gate_regs msdc_top_cg_regs = {
.set_ofs = 0x0,
.clr_ofs = 0x0,
.sta_ofs = 0x0,
};
-#define GATE_MSDC(_id, _name, _parent, _shift) \
- GATE_MTK(_id, _name, _parent, &msdc_cg_regs, _shift, &mtk_clk_gate_ops_no_setclr_inv)
-
#define GATE_MSDC_TOP(_id, _name, _parent, _shift) \
GATE_MTK(_id, _name, _parent, &msdc_top_cg_regs, _shift, &mtk_clk_gate_ops_no_setclr_inv)
-static const struct mtk_gate msdc_clks[] = {
- GATE_MSDC(CLK_MSDC_AXI_WRAP, "msdc_axi_wrap", "axi_sel", 22),
-};
-
static const struct mtk_gate msdc_top_clks[] = {
GATE_MSDC_TOP(CLK_MSDC_TOP_AES_0P, "msdc_top_aes_0p", "aes_msdcfde_sel", 0),
GATE_MSDC_TOP(CLK_MSDC_TOP_SRC_0P, "msdc_top_src_0p", "infra_msdc0_src", 1),
GATE_MSDC_TOP(CLK_MSDC_TOP_AHB2AXI_BRG_AXI, "msdc_top_ahb2axi_brg_axi", "axi_sel", 14),
};
-static const struct mtk_clk_desc msdc_desc = {
- .clks = msdc_clks,
- .num_clks = ARRAY_SIZE(msdc_clks),
-};
-
static const struct mtk_clk_desc msdc_top_desc = {
.clks = msdc_top_clks,
.num_clks = ARRAY_SIZE(msdc_top_clks),
static const struct of_device_id of_match_clk_mt8192_msdc[] = {
{
- .compatible = "mediatek,mt8192-msdc",
- .data = &msdc_desc,
- }, {
.compatible = "mediatek,mt8192-msdc_top",
.data = &msdc_top_desc,
}, {
#include "clk-pll.h"
#include <dt-bindings/clock/mt8192-clk.h>
+#include <dt-bindings/reset/mt8192-resets.h>
static DEFINE_SPINLOCK(mt8192_clk_lock);
GATE_TOP(CLK_TOP_SSUSB_PHY_REF, "ssusb_phy_ref", "clk26m", 25),
};
+static u16 infra_ao_rst_ofs[] = {
+ INFRA_RST0_SET_OFFSET,
+ INFRA_RST1_SET_OFFSET,
+ INFRA_RST2_SET_OFFSET,
+ INFRA_RST3_SET_OFFSET,
+ INFRA_RST4_SET_OFFSET,
+};
+
+static u16 infra_ao_idx_map[] = {
+ [MT8192_INFRA_RST0_THERM_CTRL_SWRST] = 0 * RST_NR_PER_BANK + 0,
+ [MT8192_INFRA_RST2_PEXTP_PHY_SWRST] = 2 * RST_NR_PER_BANK + 15,
+ [MT8192_INFRA_RST3_THERM_CTRL_PTP_SWRST] = 3 * RST_NR_PER_BANK + 5,
+ [MT8192_INFRA_RST4_PCIE_TOP_SWRST] = 4 * RST_NR_PER_BANK + 1,
+ [MT8192_INFRA_RST4_THERM_CTRL_MCU_SWRST] = 4 * RST_NR_PER_BANK + 12,
+};
+
+static const struct mtk_clk_rst_desc clk_rst_desc = {
+ .version = MTK_RST_SET_CLR,
+ .rst_bank_ofs = infra_ao_rst_ofs,
+ .rst_bank_nr = ARRAY_SIZE(infra_ao_rst_ofs),
+ .rst_idx_map = infra_ao_idx_map,
+ .rst_idx_map_nr = ARRAY_SIZE(infra_ao_idx_map),
+};
+
#define MT8192_PLL_FMAX (3800UL * MHZ)
#define MT8192_PLL_FMIN (1500UL * MHZ)
#define MT8192_INTEGER_BITS 8
if (r)
goto free_clk_data;
+ r = mtk_register_reset_controller_with_dev(&pdev->dev, &clk_rst_desc);
+ if (r)
+ goto free_clk_data;
+
r = of_clk_add_hw_provider(node, of_clk_hw_onecell_get, clk_data);
if (r)
goto free_clk_data;
#include "clk-mtk.h"
#include <dt-bindings/clock/mt8195-clk.h>
+#include <dt-bindings/reset/mt8195-resets.h>
#include <linux/clk-provider.h>
#include <linux/platform_device.h>
GATE_INFRA_AO4(CLK_INFRA_AO_PERI_UFS_MEM_SUB, "infra_ao_peri_ufs_mem_sub", "mem_466m", 31),
};
+static u16 infra_ao_rst_ofs[] = {
+ INFRA_RST0_SET_OFFSET,
+ INFRA_RST1_SET_OFFSET,
+ INFRA_RST2_SET_OFFSET,
+ INFRA_RST3_SET_OFFSET,
+ INFRA_RST4_SET_OFFSET,
+};
+
+static u16 infra_ao_idx_map[] = {
+ [MT8195_INFRA_RST0_THERM_CTRL_SWRST] = 0 * RST_NR_PER_BANK + 0,
+ [MT8195_INFRA_RST3_THERM_CTRL_PTP_SWRST] = 3 * RST_NR_PER_BANK + 5,
+ [MT8195_INFRA_RST4_THERM_CTRL_MCU_SWRST] = 4 * RST_NR_PER_BANK + 10,
+};
+
+static struct mtk_clk_rst_desc infra_ao_rst_desc = {
+ .version = MTK_RST_SET_CLR,
+ .rst_bank_ofs = infra_ao_rst_ofs,
+ .rst_bank_nr = ARRAY_SIZE(infra_ao_rst_ofs),
+ .rst_idx_map = infra_ao_idx_map,
+ .rst_idx_map_nr = ARRAY_SIZE(infra_ao_idx_map),
+};
+
static const struct mtk_clk_desc infra_ao_desc = {
.clks = infra_ao_clks,
.num_clks = ARRAY_SIZE(infra_ao_clks),
+ .rst_desc = &infra_ao_rst_desc,
};
static const struct of_device_id of_match_clk_mt8195_infra_ao[] = {
platform_set_drvdata(pdev, clk_data);
+ if (mcd->rst_desc) {
+ r = mtk_register_reset_controller_with_dev(&pdev->dev,
+ mcd->rst_desc);
+ if (r)
+ goto unregister_clks;
+ }
+
return r;
unregister_clks:
#include <linux/spinlock.h>
#include <linux/types.h>
+#include "reset.h"
+
#define MAX_MUX_GATE_BIT 31
#define INVALID_MUX_GATE_BIT (MAX_MUX_GATE_BIT + 1)
struct clk_hw *mtk_clk_register_ref2usb_tx(const char *name,
const char *parent_name, void __iomem *reg);
-void mtk_register_reset_controller(struct device_node *np,
- unsigned int num_regs, int regofs);
-
-void mtk_register_reset_controller_set_clr(struct device_node *np,
- unsigned int num_regs, int regofs);
-
struct mtk_clk_desc {
const struct mtk_gate *clks;
size_t num_clks;
+ const struct mtk_clk_rst_desc *rst_desc;
};
int mtk_clk_simple_probe(struct platform_device *pdev);
#include <linux/of.h>
#include <linux/platform_device.h>
#include <linux/regmap.h>
-#include <linux/reset-controller.h>
#include <linux/slab.h>
-#include "clk-mtk.h"
+#include "reset.h"
-struct mtk_reset {
- struct regmap *regmap;
- int regofs;
- struct reset_controller_dev rcdev;
-};
-
-static int mtk_reset_assert_set_clr(struct reset_controller_dev *rcdev,
- unsigned long id)
+static inline struct mtk_clk_rst_data *to_mtk_clk_rst_data(struct reset_controller_dev *rcdev)
{
- struct mtk_reset *data = container_of(rcdev, struct mtk_reset, rcdev);
- unsigned int reg = data->regofs + ((id / 32) << 4);
-
- return regmap_write(data->regmap, reg, 1);
+ return container_of(rcdev, struct mtk_clk_rst_data, rcdev);
}
-static int mtk_reset_deassert_set_clr(struct reset_controller_dev *rcdev,
- unsigned long id)
+static int mtk_reset_update(struct reset_controller_dev *rcdev,
+ unsigned long id, bool deassert)
{
- struct mtk_reset *data = container_of(rcdev, struct mtk_reset, rcdev);
- unsigned int reg = data->regofs + ((id / 32) << 4) + 0x4;
+ struct mtk_clk_rst_data *data = to_mtk_clk_rst_data(rcdev);
+ unsigned int val = deassert ? 0 : ~0;
- return regmap_write(data->regmap, reg, 1);
+ return regmap_update_bits(data->regmap,
+ data->desc->rst_bank_ofs[id / RST_NR_PER_BANK],
+ BIT(id % RST_NR_PER_BANK), val);
}
static int mtk_reset_assert(struct reset_controller_dev *rcdev,
- unsigned long id)
+ unsigned long id)
{
- struct mtk_reset *data = container_of(rcdev, struct mtk_reset, rcdev);
-
- return regmap_update_bits(data->regmap, data->regofs + ((id / 32) << 2),
- BIT(id % 32), ~0);
+ return mtk_reset_update(rcdev, id, false);
}
static int mtk_reset_deassert(struct reset_controller_dev *rcdev,
- unsigned long id)
+ unsigned long id)
{
- struct mtk_reset *data = container_of(rcdev, struct mtk_reset, rcdev);
-
- return regmap_update_bits(data->regmap, data->regofs + ((id / 32) << 2),
- BIT(id % 32), 0);
+ return mtk_reset_update(rcdev, id, true);
}
-static int mtk_reset(struct reset_controller_dev *rcdev,
- unsigned long id)
+static int mtk_reset(struct reset_controller_dev *rcdev, unsigned long id)
{
int ret;
return mtk_reset_deassert(rcdev, id);
}
+static int mtk_reset_update_set_clr(struct reset_controller_dev *rcdev,
+ unsigned long id, bool deassert)
+{
+ struct mtk_clk_rst_data *data = to_mtk_clk_rst_data(rcdev);
+ unsigned int deassert_ofs = deassert ? 0x4 : 0;
+
+ return regmap_write(data->regmap,
+ data->desc->rst_bank_ofs[id / RST_NR_PER_BANK] +
+ deassert_ofs,
+ BIT(id % RST_NR_PER_BANK));
+}
+
+static int mtk_reset_assert_set_clr(struct reset_controller_dev *rcdev,
+ unsigned long id)
+{
+ return mtk_reset_update_set_clr(rcdev, id, false);
+}
+
+static int mtk_reset_deassert_set_clr(struct reset_controller_dev *rcdev,
+ unsigned long id)
+{
+ return mtk_reset_update_set_clr(rcdev, id, true);
+}
+
static int mtk_reset_set_clr(struct reset_controller_dev *rcdev,
- unsigned long id)
+ unsigned long id)
{
int ret;
.reset = mtk_reset_set_clr,
};
-static void mtk_register_reset_controller_common(struct device_node *np,
- unsigned int num_regs, int regofs,
- const struct reset_control_ops *reset_ops)
+static int reset_xlate(struct reset_controller_dev *rcdev,
+ const struct of_phandle_args *reset_spec)
+{
+ struct mtk_clk_rst_data *data = to_mtk_clk_rst_data(rcdev);
+
+ if (reset_spec->args[0] >= rcdev->nr_resets ||
+ reset_spec->args[0] >= data->desc->rst_idx_map_nr)
+ return -EINVAL;
+
+ return data->desc->rst_idx_map[reset_spec->args[0]];
+}
+
+int mtk_register_reset_controller(struct device_node *np,
+ const struct mtk_clk_rst_desc *desc)
{
- struct mtk_reset *data;
- int ret;
struct regmap *regmap;
+ const struct reset_control_ops *rcops = NULL;
+ struct mtk_clk_rst_data *data;
+ int ret;
+
+ if (!desc) {
+ pr_err("mtk clock reset desc is NULL\n");
+ return -EINVAL;
+ }
+
+ switch (desc->version) {
+ case MTK_RST_SIMPLE:
+ rcops = &mtk_reset_ops;
+ break;
+ case MTK_RST_SET_CLR:
+ rcops = &mtk_reset_ops_set_clr;
+ break;
+ default:
+ pr_err("Unknown reset version %d\n", desc->version);
+ return -EINVAL;
+ }
regmap = device_node_to_regmap(np);
if (IS_ERR(regmap)) {
pr_err("Cannot find regmap for %pOF: %pe\n", np, regmap);
- return;
+ return -EINVAL;
}
data = kzalloc(sizeof(*data), GFP_KERNEL);
if (!data)
- return;
+ return -ENOMEM;
+ data->desc = desc;
data->regmap = regmap;
- data->regofs = regofs;
data->rcdev.owner = THIS_MODULE;
- data->rcdev.nr_resets = num_regs * 32;
- data->rcdev.ops = reset_ops;
+ data->rcdev.ops = rcops;
data->rcdev.of_node = np;
+ if (data->desc->rst_idx_map_nr > 0) {
+ data->rcdev.of_reset_n_cells = 1;
+ data->rcdev.nr_resets = desc->rst_idx_map_nr;
+ data->rcdev.of_xlate = reset_xlate;
+ } else {
+ data->rcdev.nr_resets = desc->rst_bank_nr * RST_NR_PER_BANK;
+ }
+
ret = reset_controller_register(&data->rcdev);
if (ret) {
pr_err("could not register reset controller: %d\n", ret);
kfree(data);
- return;
+ return ret;
}
-}
-void mtk_register_reset_controller(struct device_node *np,
- unsigned int num_regs, int regofs)
-{
- mtk_register_reset_controller_common(np, num_regs, regofs,
- &mtk_reset_ops);
+ return 0;
}
-void mtk_register_reset_controller_set_clr(struct device_node *np,
- unsigned int num_regs, int regofs)
+int mtk_register_reset_controller_with_dev(struct device *dev,
+ const struct mtk_clk_rst_desc *desc)
{
- mtk_register_reset_controller_common(np, num_regs, regofs,
- &mtk_reset_ops_set_clr);
+ struct device_node *np = dev->of_node;
+ struct regmap *regmap;
+ const struct reset_control_ops *rcops = NULL;
+ struct mtk_clk_rst_data *data;
+ int ret;
+
+ if (!desc) {
+ dev_err(dev, "mtk clock reset desc is NULL\n");
+ return -EINVAL;
+ }
+
+ switch (desc->version) {
+ case MTK_RST_SIMPLE:
+ rcops = &mtk_reset_ops;
+ break;
+ case MTK_RST_SET_CLR:
+ rcops = &mtk_reset_ops_set_clr;
+ break;
+ default:
+ dev_err(dev, "Unknown reset version %d\n", desc->version);
+ return -EINVAL;
+ }
+
+ regmap = device_node_to_regmap(np);
+ if (IS_ERR(regmap)) {
+ dev_err(dev, "Cannot find regmap %pe\n", regmap);
+ return -EINVAL;
+ }
+
+ data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ data->desc = desc;
+ data->regmap = regmap;
+ data->rcdev.owner = THIS_MODULE;
+ data->rcdev.ops = rcops;
+ data->rcdev.of_node = np;
+ data->rcdev.dev = dev;
+
+ if (data->desc->rst_idx_map_nr > 0) {
+ data->rcdev.of_reset_n_cells = 1;
+ data->rcdev.nr_resets = desc->rst_idx_map_nr;
+ data->rcdev.of_xlate = reset_xlate;
+ } else {
+ data->rcdev.nr_resets = desc->rst_bank_nr * RST_NR_PER_BANK;
+ }
+
+ ret = devm_reset_controller_register(dev, &data->rcdev);
+ if (ret) {
+ dev_err(dev, "could not register reset controller: %d\n", ret);
+ return ret;
+ }
+
+ return 0;
}
MODULE_LICENSE("GPL");
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2022 MediaTek Inc.
+ */
+
+#ifndef __DRV_CLK_MTK_RESET_H
+#define __DRV_CLK_MTK_RESET_H
+
+#include <linux/reset-controller.h>
+#include <linux/types.h>
+
+#define RST_NR_PER_BANK 32
+
+/* Infra global controller reset set register */
+#define INFRA_RST0_SET_OFFSET 0x120
+#define INFRA_RST1_SET_OFFSET 0x130
+#define INFRA_RST2_SET_OFFSET 0x140
+#define INFRA_RST3_SET_OFFSET 0x150
+#define INFRA_RST4_SET_OFFSET 0x730
+
+/**
+ * enum mtk_reset_version - Version of MediaTek clock reset controller.
+ * @MTK_RST_SIMPLE: Use the same registers for bit set and clear.
+ * @MTK_RST_SET_CLR: Use separate registers for bit set and clear.
+ * @MTK_RST_MAX: Total quantity of version for MediaTek clock reset controller.
+ */
+enum mtk_reset_version {
+ MTK_RST_SIMPLE = 0,
+ MTK_RST_SET_CLR,
+ MTK_RST_MAX,
+};
+
+/**
+ * struct mtk_clk_rst_desc - Description of MediaTek clock reset.
+ * @version: Reset version which is defined in enum mtk_reset_version.
+ * @rst_bank_ofs: Pointer to an array containing base offsets of the reset register.
+ * @rst_bank_nr: Quantity of reset bank.
+ * @rst_idx_map:Pointer to an array containing ids if input argument is index.
+ * This array is not necessary if our input argument does not mean index.
+ * @rst_idx_map_nr: Quantity of reset index map.
+ */
+struct mtk_clk_rst_desc {
+ enum mtk_reset_version version;
+ u16 *rst_bank_ofs;
+ u32 rst_bank_nr;
+ u16 *rst_idx_map;
+ u32 rst_idx_map_nr;
+};
+
+/**
+ * struct mtk_clk_rst_data - Data of MediaTek clock reset controller.
+ * @regmap: Pointer to base address of reset register address.
+ * @rcdev: Reset controller device.
+ * @desc: Pointer to description of the reset controller.
+ */
+struct mtk_clk_rst_data {
+ struct regmap *regmap;
+ struct reset_controller_dev rcdev;
+ const struct mtk_clk_rst_desc *desc;
+};
+
+/**
+ * mtk_register_reset_controller - Register MediaTek clock reset controller
+ * @np: Pointer to device node.
+ * @desc: Constant pointer to description of clock reset.
+ *
+ * Return: 0 on success and errorno otherwise.
+ */
+int mtk_register_reset_controller(struct device_node *np,
+ const struct mtk_clk_rst_desc *desc);
+
+/**
+ * mtk_register_reset_controller - Register mediatek clock reset controller with device
+ * @np: Pointer to device.
+ * @desc: Constant pointer to description of clock reset.
+ *
+ * Return: 0 on success and errorno otherwise.
+ */
+int mtk_register_reset_controller_with_dev(struct device *dev,
+ const struct mtk_clk_rst_desc *desc);
+
+#endif /* __DRV_CLK_MTK_RESET_H */
&sm1_sysclk_b_en,
};
-static int devm_clk_get_enable(struct device *dev, char *id)
-{
- struct clk *clk;
- int ret;
-
- clk = devm_clk_get(dev, id);
- if (IS_ERR(clk)) {
- ret = PTR_ERR(clk);
- dev_err_probe(dev, ret, "failed to get %s", id);
- return ret;
- }
-
- ret = clk_prepare_enable(clk);
- if (ret) {
- dev_err(dev, "failed to enable %s", id);
- return ret;
- }
-
- ret = devm_add_action_or_reset(dev,
- (void(*)(void *))clk_disable_unprepare,
- clk);
- if (ret) {
- dev_err(dev, "failed to add reset action on %s", id);
- return ret;
- }
-
- return 0;
-}
-
struct axg_audio_reset_data {
struct reset_controller_dev rstc;
struct regmap *map;
struct regmap *map;
void __iomem *regs;
struct clk_hw *hw;
+ struct clk *clk;
int ret, i;
data = of_device_get_match_data(dev);
}
/* Get the mandatory peripheral clock */
- ret = devm_clk_get_enable(dev, "pclk");
- if (ret)
- return ret;
+ clk = devm_clk_get_enabled(dev, "pclk");
+ if (IS_ERR(clk))
+ return PTR_ERR(clk);
ret = device_reset(dev);
if (ret) {
struct r8a73a4_cpg {
struct clk_onecell_data data;
spinlock_t lock;
- void __iomem *reg;
};
#define CPG_CKSCR 0xc0
static struct clk * __init
r8a73a4_cpg_register_clock(struct device_node *np, struct r8a73a4_cpg *cpg,
- const char *name)
+ void __iomem *base, const char *name)
{
const struct clk_div_table *table = NULL;
const char *parent_name;
if (!strcmp(name, "main")) {
- u32 ckscr = readl(cpg->reg + CPG_CKSCR);
+ u32 ckscr = readl(base + CPG_CKSCR);
switch ((ckscr >> 28) & 3) {
case 0: /* extal1 */
* clock implementation and we currently have no need to change
* the multiplier value.
*/
- u32 value = readl(cpg->reg + CPG_PLL0CR);
+ u32 value = readl(base + CPG_PLL0CR);
parent_name = "main";
mult = ((value >> 24) & 0x7f) + 1;
if (value & BIT(20))
div = 2;
} else if (!strcmp(name, "pll1")) {
- u32 value = readl(cpg->reg + CPG_PLL1CR);
+ u32 value = readl(base + CPG_PLL1CR);
parent_name = "main";
/* XXX: enable bit? */
default:
return ERR_PTR(-EINVAL);
}
- value = readl(cpg->reg + cr);
+ value = readl(base + cr);
switch ((value >> 5) & 7) {
case 0:
parent_name = "main";
shift = 0;
}
div *= 32;
- mult = 0x20 - ((readl(cpg->reg + CPG_FRQCRC) >> shift) & 0x1f);
+ mult = 0x20 - ((readl(base + CPG_FRQCRC) >> shift) & 0x1f);
} else {
struct div4_clk *c;
mult, div);
} else {
return clk_register_divider_table(NULL, name, parent_name, 0,
- cpg->reg + reg, shift, 4, 0,
+ base + reg, shift, 4, 0,
table, &cpg->lock);
}
}
static void __init r8a73a4_cpg_clocks_init(struct device_node *np)
{
struct r8a73a4_cpg *cpg;
+ void __iomem *base;
struct clk **clks;
unsigned int i;
int num_clks;
cpg->data.clks = clks;
cpg->data.clk_num = num_clks;
- cpg->reg = of_iomap(np, 0);
- if (WARN_ON(cpg->reg == NULL))
+ base = of_iomap(np, 0);
+ if (WARN_ON(base == NULL))
return;
for (i = 0; i < num_clks; ++i) {
of_property_read_string_index(np, "clock-output-names", i,
&name);
- clk = r8a73a4_cpg_register_clock(np, cpg, name);
+ clk = r8a73a4_cpg_register_clock(np, cpg, base, name);
if (IS_ERR(clk))
pr_err("%s: failed to register %pOFn %s clock (%ld)\n",
__func__, np, name, PTR_ERR(clk));
struct r8a7740_cpg {
struct clk_onecell_data data;
spinlock_t lock;
- void __iomem *reg;
};
#define CPG_FRQCRA 0x00
static struct clk * __init
r8a7740_cpg_register_clock(struct device_node *np, struct r8a7740_cpg *cpg,
- const char *name)
+ void __iomem *base, const char *name)
{
const struct clk_div_table *table = NULL;
const char *parent_name;
* clock implementation and we currently have no need to change
* the multiplier value.
*/
- u32 value = readl(cpg->reg + CPG_FRQCRC);
+ u32 value = readl(base + CPG_FRQCRC);
parent_name = "system";
mult = ((value >> 24) & 0x7f) + 1;
} else if (!strcmp(name, "pllc1")) {
- u32 value = readl(cpg->reg + CPG_FRQCRA);
+ u32 value = readl(base + CPG_FRQCRA);
parent_name = "system";
mult = ((value >> 24) & 0x7f) + 1;
div = 2;
} else if (!strcmp(name, "pllc2")) {
- u32 value = readl(cpg->reg + CPG_PLLC2CR);
+ u32 value = readl(base + CPG_PLLC2CR);
parent_name = "system";
mult = ((value >> 24) & 0x3f) + 1;
} else if (!strcmp(name, "usb24s")) {
- u32 value = readl(cpg->reg + CPG_USBCKCR);
+ u32 value = readl(base + CPG_USBCKCR);
if (value & BIT(7))
/* extal2 */
parent_name = of_clk_get_parent_name(np, 1);
mult, div);
} else {
return clk_register_divider_table(NULL, name, parent_name, 0,
- cpg->reg + reg, shift, 4, 0,
+ base + reg, shift, 4, 0,
table, &cpg->lock);
}
}
static void __init r8a7740_cpg_clocks_init(struct device_node *np)
{
struct r8a7740_cpg *cpg;
+ void __iomem *base;
struct clk **clks;
unsigned int i;
int num_clks;
cpg->data.clks = clks;
cpg->data.clk_num = num_clks;
- cpg->reg = of_iomap(np, 0);
- if (WARN_ON(cpg->reg == NULL))
+ base = of_iomap(np, 0);
+ if (WARN_ON(base == NULL))
return;
for (i = 0; i < num_clks; ++i) {
of_property_read_string_index(np, "clock-output-names", i,
&name);
- clk = r8a7740_cpg_register_clock(np, cpg, name);
+ clk = r8a7740_cpg_register_clock(np, cpg, base, name);
if (IS_ERR(clk))
pr_err("%s: failed to register %pOFn %s clock (%ld)\n",
__func__, np, name, PTR_ERR(clk));
#include <linux/slab.h>
#include <linux/soc/renesas/rcar-rst.h>
-struct r8a7778_cpg {
- struct clk_onecell_data data;
- spinlock_t lock;
- void __iomem *reg;
-};
-
/* PLL multipliers per bits 11, 12, and 18 of MODEMR */
static const struct {
unsigned long plla_mult;
static u32 cpg_mode_divs __initdata;
static struct clk * __init
-r8a7778_cpg_register_clock(struct device_node *np, struct r8a7778_cpg *cpg,
- const char *name)
+r8a7778_cpg_register_clock(struct device_node *np, const char *name)
{
if (!strcmp(name, "plla")) {
return clk_register_fixed_factor(NULL, "plla",
static void __init r8a7778_cpg_clocks_init(struct device_node *np)
{
- struct r8a7778_cpg *cpg;
+ struct clk_onecell_data *data;
struct clk **clks;
unsigned int i;
int num_clks;
return;
}
- cpg = kzalloc(sizeof(*cpg), GFP_KERNEL);
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
clks = kcalloc(num_clks, sizeof(*clks), GFP_KERNEL);
- if (cpg == NULL || clks == NULL) {
+ if (data == NULL || clks == NULL) {
/* We're leaking memory on purpose, there's no point in cleaning
* up as the system won't boot anyway.
*/
return;
}
- spin_lock_init(&cpg->lock);
-
- cpg->data.clks = clks;
- cpg->data.clk_num = num_clks;
-
- cpg->reg = of_iomap(np, 0);
- if (WARN_ON(cpg->reg == NULL))
- return;
+ data->clks = clks;
+ data->clk_num = num_clks;
for (i = 0; i < num_clks; ++i) {
const char *name;
of_property_read_string_index(np, "clock-output-names", i,
&name);
- clk = r8a7778_cpg_register_clock(np, cpg, name);
+ clk = r8a7778_cpg_register_clock(np, name);
if (IS_ERR(clk))
pr_err("%s: failed to register %pOFn %s clock (%ld)\n",
__func__, np, name, PTR_ERR(clk));
else
- cpg->data.clks[i] = clk;
+ data->clks[i] = clk;
}
- of_clk_add_provider(np, of_clk_src_onecell_get, &cpg->data);
+ of_clk_add_provider(np, of_clk_src_onecell_get, data);
cpg_mstp_add_clk_domain(np);
}
#define CPG_NUM_CLOCKS (R8A7779_CLK_OUT + 1)
-struct r8a7779_cpg {
- struct clk_onecell_data data;
- spinlock_t lock;
- void __iomem *reg;
-};
-
/* -----------------------------------------------------------------------------
* CPG Clock Data
*/
*/
static struct clk * __init
-r8a7779_cpg_register_clock(struct device_node *np, struct r8a7779_cpg *cpg,
+r8a7779_cpg_register_clock(struct device_node *np,
const struct cpg_clk_config *config,
unsigned int plla_mult, const char *name)
{
static void __init r8a7779_cpg_clocks_init(struct device_node *np)
{
const struct cpg_clk_config *config;
- struct r8a7779_cpg *cpg;
+ struct clk_onecell_data *data;
struct clk **clks;
unsigned int i, plla_mult;
int num_clks;
return;
}
- cpg = kzalloc(sizeof(*cpg), GFP_KERNEL);
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
clks = kcalloc(CPG_NUM_CLOCKS, sizeof(*clks), GFP_KERNEL);
- if (cpg == NULL || clks == NULL) {
+ if (data == NULL || clks == NULL) {
/* We're leaking memory on purpose, there's no point in cleaning
* up as the system won't boot anyway.
*/
return;
}
- spin_lock_init(&cpg->lock);
-
- cpg->data.clks = clks;
- cpg->data.clk_num = num_clks;
+ data->clks = clks;
+ data->clk_num = num_clks;
config = &cpg_clk_configs[CPG_CLK_CONFIG_INDEX(mode)];
plla_mult = cpg_plla_mult[CPG_PLLA_MULT_INDEX(mode)];
of_property_read_string_index(np, "clock-output-names", i,
&name);
- clk = r8a7779_cpg_register_clock(np, cpg, config,
- plla_mult, name);
+ clk = r8a7779_cpg_register_clock(np, config, plla_mult, name);
if (IS_ERR(clk))
pr_err("%s: failed to register %pOFn %s clock (%ld)\n",
__func__, np, name, PTR_ERR(clk));
else
- cpg->data.clks[i] = clk;
+ data->clks[i] = clk;
}
- of_clk_add_provider(np, of_clk_src_onecell_get, &cpg->data);
+ of_clk_add_provider(np, of_clk_src_onecell_get, data);
cpg_mstp_add_clk_domain(np);
}
#include <linux/of_address.h>
#include <linux/slab.h>
-struct rz_cpg {
- struct clk_onecell_data data;
- void __iomem *reg;
-};
-
#define CPG_FRQCR 0x10
#define CPG_FRQCR2 0x14
}
static struct clk * __init
-rz_cpg_register_clock(struct device_node *np, struct rz_cpg *cpg, const char *name)
+rz_cpg_register_clock(struct device_node *np, void __iomem *base,
+ const char *name)
{
u32 val;
unsigned mult;
}
/* If mapping regs failed, skip non-pll clocks. System will boot anyhow */
- if (!cpg->reg)
+ if (!base)
return ERR_PTR(-ENXIO);
/* FIXME:"i" and "g" are variable clocks with non-integer dividers (e.g. 2/3)
* let them run at fixed current speed and implement the details later.
*/
if (strcmp(name, "i") == 0)
- val = (readl(cpg->reg + CPG_FRQCR) >> 8) & 3;
+ val = (readl(base + CPG_FRQCR) >> 8) & 3;
else if (strcmp(name, "g") == 0)
- val = readl(cpg->reg + CPG_FRQCR2) & 3;
+ val = readl(base + CPG_FRQCR2) & 3;
else
return ERR_PTR(-EINVAL);
static void __init rz_cpg_clocks_init(struct device_node *np)
{
- struct rz_cpg *cpg;
+ struct clk_onecell_data *data;
struct clk **clks;
+ void __iomem *base;
unsigned i;
int num_clks;
if (WARN(num_clks <= 0, "can't count CPG clocks\n"))
return;
- cpg = kzalloc(sizeof(*cpg), GFP_KERNEL);
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
clks = kcalloc(num_clks, sizeof(*clks), GFP_KERNEL);
- BUG_ON(!cpg || !clks);
+ BUG_ON(!data || !clks);
- cpg->data.clks = clks;
- cpg->data.clk_num = num_clks;
+ data->clks = clks;
+ data->clk_num = num_clks;
- cpg->reg = of_iomap(np, 0);
+ base = of_iomap(np, 0);
for (i = 0; i < num_clks; ++i) {
const char *name;
of_property_read_string_index(np, "clock-output-names", i, &name);
- clk = rz_cpg_register_clock(np, cpg, name);
+ clk = rz_cpg_register_clock(np, base, name);
if (IS_ERR(clk))
pr_err("%s: failed to register %pOFn %s clock (%ld)\n",
__func__, np, name, PTR_ERR(clk));
else
- cpg->data.clks[i] = clk;
+ data->clks[i] = clk;
}
- of_clk_add_provider(np, of_clk_src_onecell_get, &cpg->data);
+ of_clk_add_provider(np, of_clk_src_onecell_get, data);
cpg_mstp_add_clk_domain(np);
}
struct sh73a0_cpg {
struct clk_onecell_data data;
spinlock_t lock;
- void __iomem *reg;
};
#define CPG_FRQCRA 0x00
static struct clk * __init
sh73a0_cpg_register_clock(struct device_node *np, struct sh73a0_cpg *cpg,
- const char *name)
+ void __iomem *base, const char *name)
{
const struct clk_div_table *table = NULL;
unsigned int shift, reg, width;
if (!strcmp(name, "main")) {
/* extal1, extal1_div2, extal2, extal2_div2 */
- u32 parent_idx = (readl(cpg->reg + CPG_CKSCR) >> 28) & 3;
+ u32 parent_idx = (readl(base + CPG_CKSCR) >> 28) & 3;
parent_name = of_clk_get_parent_name(np, parent_idx >> 1);
div = (parent_idx & 1) + 1;
} else if (!strncmp(name, "pll", 3)) {
- void __iomem *enable_reg = cpg->reg;
+ void __iomem *enable_reg = base;
u32 enable_bit = name[3] - '0';
parent_name = "main";
default:
return ERR_PTR(-EINVAL);
}
- if (readl(cpg->reg + CPG_PLLECR) & BIT(enable_bit)) {
+ if (readl(base + CPG_PLLECR) & BIT(enable_bit)) {
mult = ((readl(enable_reg) >> 24) & 0x3f) + 1;
/* handle CFG bit for PLL1 and PLL2 */
if (enable_bit == 1 || enable_bit == 2)
}
} else if (!strcmp(name, "dsi0phy") || !strcmp(name, "dsi1phy")) {
u32 phy_no = name[3] - '0';
- void __iomem *dsi_reg = cpg->reg +
+ void __iomem *dsi_reg = base +
(phy_no ? CPG_DSI1PHYCR : CPG_DSI0PHYCR);
parent_name = phy_no ? "dsi1pck" : "dsi0pck";
mult, div);
} else {
return clk_register_divider_table(NULL, name, parent_name, 0,
- cpg->reg + reg, shift, width, 0,
+ base + reg, shift, width, 0,
table, &cpg->lock);
}
}
static void __init sh73a0_cpg_clocks_init(struct device_node *np)
{
struct sh73a0_cpg *cpg;
+ void __iomem *base;
struct clk **clks;
unsigned int i;
int num_clks;
cpg->data.clks = clks;
cpg->data.clk_num = num_clks;
- cpg->reg = of_iomap(np, 0);
- if (WARN_ON(cpg->reg == NULL))
+ base = of_iomap(np, 0);
+ if (WARN_ON(base == NULL))
return;
/* Set SDHI clocks to a known state */
- writel(0x108, cpg->reg + CPG_SD0CKCR);
- writel(0x108, cpg->reg + CPG_SD1CKCR);
- writel(0x108, cpg->reg + CPG_SD2CKCR);
+ writel(0x108, base + CPG_SD0CKCR);
+ writel(0x108, base + CPG_SD1CKCR);
+ writel(0x108, base + CPG_SD2CKCR);
for (i = 0; i < num_clks; ++i) {
const char *name;
of_property_read_string_index(np, "clock-output-names", i,
&name);
- clk = sh73a0_cpg_register_clock(np, cpg, name);
+ clk = sh73a0_cpg_register_clock(np, cpg, base, name);
if (IS_ERR(clk))
pr_err("%s: failed to register %pOFn %s clock (%ld)\n",
__func__, np, name, PTR_ERR(clk));
DEF_BASE(".rpcsrc", CLK_RPCSRC, CLK_TYPE_GEN4_RPCSRC, CLK_PLL5),
/* Core Clock Outputs */
+ DEF_GEN4_Z("z0", R8A779F0_CLK_Z0, CLK_TYPE_GEN4_Z, CLK_PLL2, 2, 0),
+ DEF_GEN4_Z("z1", R8A779F0_CLK_Z1, CLK_TYPE_GEN4_Z, CLK_PLL2, 2, 8),
DEF_FIXED("s0d2", R8A779F0_CLK_S0D2, CLK_S0, 2, 1),
DEF_FIXED("s0d3", R8A779F0_CLK_S0D3, CLK_S0, 3, 1),
DEF_FIXED("s0d4", R8A779F0_CLK_S0D4, CLK_S0, 4, 1),
};
static const struct mssr_mod_clk r8a779f0_mod_clks[] __initconst = {
+ DEF_MOD("hscif0", 514, R8A779F0_CLK_S0D3),
+ DEF_MOD("hscif1", 515, R8A779F0_CLK_S0D3),
+ DEF_MOD("hscif2", 516, R8A779F0_CLK_S0D3),
+ DEF_MOD("hscif3", 517, R8A779F0_CLK_S0D3),
DEF_MOD("i2c0", 518, R8A779F0_CLK_S0D6_PER),
DEF_MOD("i2c1", 519, R8A779F0_CLK_S0D6_PER),
DEF_MOD("i2c2", 520, R8A779F0_CLK_S0D6_PER),
DEF_MOD("i2c3", 521, R8A779F0_CLK_S0D6_PER),
DEF_MOD("i2c4", 522, R8A779F0_CLK_S0D6_PER),
DEF_MOD("i2c5", 523, R8A779F0_CLK_S0D6_PER),
+ DEF_MOD("pcie0", 624, R8A779F0_CLK_S0D2),
+ DEF_MOD("pcie1", 625, R8A779F0_CLK_S0D2),
DEF_MOD("scif0", 702, R8A779F0_CLK_S0D12_PER),
DEF_MOD("scif1", 703, R8A779F0_CLK_S0D12_PER),
DEF_MOD("scif3", 704, R8A779F0_CLK_S0D12_PER),
DEF_MOD("scif4", 705, R8A779F0_CLK_S0D12_PER),
+ DEF_MOD("sdhi0", 706, R8A779F0_CLK_SD0),
DEF_MOD("sys-dmac0", 709, R8A779F0_CLK_S0D3_PER),
DEF_MOD("sys-dmac1", 710, R8A779F0_CLK_S0D3_PER),
DEF_MOD("wdt", 907, R8A779F0_CLK_R),
DEF_MOD("pfc0", 915, R8A779F0_CLK_CL16M),
+ DEF_MOD("tsc", 919, R8A779F0_CLK_CL16M),
DEF_MOD("ufs", 1514, R8A779F0_CLK_S0D4_HSC),
};
struct {
u16 div, mul;
};
- unsigned int factor;
- unsigned int frequency;
/* for dual gate */
struct {
- uint16_t group : 1, index: 3;
+ uint16_t group : 1;
u16 sel, g1, r1, g2, r2;
} dual;
};
.source = 1 + R9A06G032_##_src, .name = _n, \
.reg = _reg, .div_min = _min, .div_max = _max, \
.div_table = { __VA_ARGS__ } }
-#define D_UGATE(_idx, _n, _src, _g, _gi, _g1, _r1, _g2, _r2) \
+#define D_UGATE(_idx, _n, _src, _g, _g1, _r1, _g2, _r2) \
{ .type = K_DUALGATE, .index = R9A06G032_##_idx, \
.source = 1 + R9A06G032_##_src, .name = _n, \
- .dual = { .group = _g, .index = _gi, \
+ .dual = { .group = _g, \
.g1 = _g1, .r1 = _r1, .g2 = _g2, .r2 = _r2 }, }
enum { K_GATE = 0, K_FFC, K_DIV, K_BITSEL, K_DUALGATE };
.name = "uart_group_012",
.type = K_BITSEL,
.source = 1 + R9A06G032_DIV_UART,
- /* R9A06G032_SYSCTRL_REG_PWRCTRL_PG1_PR2 */
- .dual.sel = ((0xec / 4) << 5) | 24,
+ /* R9A06G032_SYSCTRL_REG_PWRCTRL_PG0_0 */
+ .dual.sel = ((0x34 / 4) << 5) | 30,
.dual.group = 0,
},
{
.name = "uart_group_34567",
.type = K_BITSEL,
.source = 1 + R9A06G032_DIV_P2_PG,
- /* R9A06G032_SYSCTRL_REG_PWRCTRL_PG0_0 */
- .dual.sel = ((0x34 / 4) << 5) | 30,
+ /* R9A06G032_SYSCTRL_REG_PWRCTRL_PG1_PR2 */
+ .dual.sel = ((0xec / 4) << 5) | 24,
.dual.group = 1,
},
- D_UGATE(CLK_UART0, "clk_uart0", UART_GROUP_012, 0, 0, 0x1b2, 0x1b3, 0x1b4, 0x1b5),
- D_UGATE(CLK_UART1, "clk_uart1", UART_GROUP_012, 0, 1, 0x1b6, 0x1b7, 0x1b8, 0x1b9),
- D_UGATE(CLK_UART2, "clk_uart2", UART_GROUP_012, 0, 2, 0x1ba, 0x1bb, 0x1bc, 0x1bd),
- D_UGATE(CLK_UART3, "clk_uart3", UART_GROUP_34567, 1, 0, 0x760, 0x761, 0x762, 0x763),
- D_UGATE(CLK_UART4, "clk_uart4", UART_GROUP_34567, 1, 1, 0x764, 0x765, 0x766, 0x767),
- D_UGATE(CLK_UART5, "clk_uart5", UART_GROUP_34567, 1, 2, 0x768, 0x769, 0x76a, 0x76b),
- D_UGATE(CLK_UART6, "clk_uart6", UART_GROUP_34567, 1, 3, 0x76c, 0x76d, 0x76e, 0x76f),
- D_UGATE(CLK_UART7, "clk_uart7", UART_GROUP_34567, 1, 4, 0x770, 0x771, 0x772, 0x773),
+ D_UGATE(CLK_UART0, "clk_uart0", UART_GROUP_012, 0, 0x1b2, 0x1b3, 0x1b4, 0x1b5),
+ D_UGATE(CLK_UART1, "clk_uart1", UART_GROUP_012, 0, 0x1b6, 0x1b7, 0x1b8, 0x1b9),
+ D_UGATE(CLK_UART2, "clk_uart2", UART_GROUP_012, 0, 0x1ba, 0x1bb, 0x1bc, 0x1bd),
+ D_UGATE(CLK_UART3, "clk_uart3", UART_GROUP_34567, 1, 0x760, 0x761, 0x762, 0x763),
+ D_UGATE(CLK_UART4, "clk_uart4", UART_GROUP_34567, 1, 0x764, 0x765, 0x766, 0x767),
+ D_UGATE(CLK_UART5, "clk_uart5", UART_GROUP_34567, 1, 0x768, 0x769, 0x76a, 0x76b),
+ D_UGATE(CLK_UART6, "clk_uart6", UART_GROUP_34567, 1, 0x76c, 0x76d, 0x76e, 0x76f),
+ D_UGATE(CLK_UART7, "clk_uart7", UART_GROUP_34567, 1, 0x770, 0x771, 0x772, 0x773),
};
struct r9a06g032_priv {
CLK_PLL3_DIV2_4_2,
CLK_SEL_PLL3_3,
CLK_DIV_PLL3_C,
+#ifdef CONFIG_ARM64
CLK_PLL5,
CLK_PLL5_500,
CLK_PLL5_250,
+#endif
CLK_PLL6,
CLK_PLL6_250,
CLK_P1_DIV2,
DEF_FIXED(".pll3_533", CLK_PLL3_533, CLK_PLL3, 1, 3),
DEF_MUX_RO(".sel_pll3_3", CLK_SEL_PLL3_3, SEL_PLL3_3, sel_pll3_3),
DEF_DIV("divpl3c", CLK_DIV_PLL3_C, CLK_SEL_PLL3_3, DIVPL3C, dtable_1_32),
+#ifdef CONFIG_ARM64
DEF_FIXED(".pll5", CLK_PLL5, CLK_EXTAL, 125, 1),
DEF_FIXED(".pll5_500", CLK_PLL5_500, CLK_PLL5, 1, 6),
DEF_FIXED(".pll5_250", CLK_PLL5_250, CLK_PLL5_500, 1, 2),
+#endif
DEF_FIXED(".pll6", CLK_PLL6, CLK_EXTAL, 125, 6),
DEF_FIXED(".pll6_250", CLK_PLL6_250, CLK_PLL6, 1, 2),
};
static struct rzg2l_mod_clk r9a07g043_mod_clks[] = {
+#ifdef CONFIG_ARM64
DEF_MOD("gic", R9A07G043_GIC600_GICCLK, R9A07G043_CLK_P1,
0x514, 0),
DEF_MOD("ia55_pclk", R9A07G043_IA55_PCLK, R9A07G043_CLK_P2,
0x518, 0),
DEF_MOD("ia55_clk", R9A07G043_IA55_CLK, R9A07G043_CLK_P1,
0x518, 1),
+#endif
+#ifdef CONFIG_RISCV
+ DEF_MOD("iax45_pclk", R9A07G043_IAX45_PCLK, R9A07G043_CLK_P2,
+ 0x518, 0),
+ DEF_MOD("iax45_clk", R9A07G043_IAX45_CLK, R9A07G043_CLK_P1,
+ 0x518, 1),
+#endif
DEF_MOD("dmac_aclk", R9A07G043_DMAC_ACLK, R9A07G043_CLK_P1,
0x52c, 0),
DEF_MOD("dmac_pclk", R9A07G043_DMAC_PCLK, CLK_P1_DIV2,
};
static struct rzg2l_reset r9a07g043_resets[] = {
+#ifdef CONFIG_ARM64
DEF_RST(R9A07G043_GIC600_GICRESET_N, 0x814, 0),
DEF_RST(R9A07G043_GIC600_DBG_GICRESET_N, 0x814, 1),
DEF_RST(R9A07G043_IA55_RESETN, 0x818, 0),
+#endif
+#ifdef CONFIG_RISCV
+ DEF_RST(R9A07G043_IAX45_RESETN, 0x818, 0),
+#endif
DEF_RST(R9A07G043_DMAC_ARESETN, 0x82c, 0),
DEF_RST(R9A07G043_DMAC_RST_ASYNC, 0x82c, 1),
DEF_RST(R9A07G043_OSTM0_PRESETZ, 0x834, 0),
};
static const unsigned int r9a07g043_crit_mod_clks[] __initconst = {
+#ifdef CONFIG_ARM64
MOD_CLK_BASE + R9A07G043_GIC600_GICCLK,
MOD_CLK_BASE + R9A07G043_IA55_CLK,
+#endif
+#ifdef CONFIG_RISCV
+ MOD_CLK_BASE + R9A07G043_IAX45_CLK,
+#endif
MOD_CLK_BASE + R9A07G043_DMAC_ACLK,
};
/* Module Clocks */
.mod_clks = r9a07g043_mod_clks,
.num_mod_clks = ARRAY_SIZE(r9a07g043_mod_clks),
+#ifdef CONFIG_ARM64
.num_hw_mod_clks = R9A07G043_TSU_PCLK + 1,
+#endif
+#ifdef CONFIG_RISCV
+ .num_hw_mod_clks = R9A07G043_IAX45_PCLK + 1,
+#endif
/* Resets */
.resets = r9a07g043_resets,
+#ifdef CONFIG_ARM64
.num_resets = R9A07G043_TSU_PRESETN + 1, /* Last reset ID + 1 */
+#endif
+#ifdef CONFIG_RISCV
+ .num_resets = R9A07G043_IAX45_RESETN + 1, /* Last reset ID + 1 */
+#endif
.has_clk_mon_regs = true,
};
};
static const struct {
- struct rzg2l_mod_clk common[71];
+ struct rzg2l_mod_clk common[76];
#ifdef CONFIG_CLK_R9A07G054
struct rzg2l_mod_clk drp[0];
#endif
0x534, 1),
DEF_MOD("ostm2_pclk", R9A07G044_OSTM2_PCLK, R9A07G044_CLK_P0,
0x534, 2),
+ DEF_MOD("gpt_pclk", R9A07G044_GPT_PCLK, R9A07G044_CLK_P0,
+ 0x540, 0),
+ DEF_MOD("poeg_a_clkp", R9A07G044_POEG_A_CLKP, R9A07G044_CLK_P0,
+ 0x544, 0),
+ DEF_MOD("poeg_b_clkp", R9A07G044_POEG_B_CLKP, R9A07G044_CLK_P0,
+ 0x544, 1),
+ DEF_MOD("poeg_c_clkp", R9A07G044_POEG_C_CLKP, R9A07G044_CLK_P0,
+ 0x544, 2),
+ DEF_MOD("poeg_d_clkp", R9A07G044_POEG_D_CLKP, R9A07G044_CLK_P0,
+ 0x544, 3),
DEF_MOD("wdt0_pclk", R9A07G044_WDT0_PCLK, R9A07G044_CLK_P0,
0x548, 0),
DEF_MOD("wdt0_clk", R9A07G044_WDT0_CLK, R9A07G044_OSCCLK,
DEF_RST(R9A07G044_OSTM0_PRESETZ, 0x834, 0),
DEF_RST(R9A07G044_OSTM1_PRESETZ, 0x834, 1),
DEF_RST(R9A07G044_OSTM2_PRESETZ, 0x834, 2),
+ DEF_RST(R9A07G044_GPT_RST_C, 0x840, 0),
+ DEF_RST(R9A07G044_POEG_A_RST, 0x844, 0),
+ DEF_RST(R9A07G044_POEG_B_RST, 0x844, 1),
+ DEF_RST(R9A07G044_POEG_C_RST, 0x844, 2),
+ DEF_RST(R9A07G044_POEG_D_RST, 0x844, 3),
DEF_RST(R9A07G044_WDT0_PRESETN, 0x848, 0),
DEF_RST(R9A07G044_WDT1_PRESETN, 0x848, 1),
DEF_RST(R9A07G044_WDT2_PRESETN, 0x848, 2),
};
static const struct rzg2l_mod_clk r9a09g011_mod_clks[] __initconst = {
+ DEF_MOD("pfc", R9A09G011_PFC_PCLK, CLK_MAIN, 0x400, 2),
DEF_MOD("gic", R9A09G011_GIC_CLK, CLK_SEL_B_D2, 0x400, 5),
DEF_COUPLED("eth_axi", R9A09G011_ETH0_CLK_AXI, CLK_PLL2_200, 0x40c, 8),
DEF_COUPLED("eth_chi", R9A09G011_ETH0_CLK_CHI, CLK_PLL2_100, 0x40c, 8),
DEF_MOD("eth_clk_gptp", R9A09G011_ETH0_GPTP_EXT, CLK_PLL2_100, 0x40c, 9),
DEF_MOD("syc_cnt_clk", R9A09G011_SYC_CNT_CLK, CLK_MAIN_24, 0x41c, 12),
+ DEF_MOD("wdt0_pclk", R9A09G011_WDT0_PCLK, CLK_SEL_E, 0x428, 12),
+ DEF_MOD("wdt0_clk", R9A09G011_WDT0_CLK, CLK_MAIN, 0x428, 13),
DEF_MOD("urt_pclk", R9A09G011_URT_PCLK, CLK_SEL_E, 0x438, 4),
DEF_MOD("urt0_clk", R9A09G011_URT0_CLK, CLK_SEL_W0, 0x438, 5),
DEF_MOD("ca53", R9A09G011_CA53_CLK, CLK_DIV_A, 0x448, 0),
};
static const struct rzg2l_reset r9a09g011_resets[] = {
+ DEF_RST(R9A09G011_PFC_PRESETN, 0x600, 2),
DEF_RST_MON(R9A09G011_ETH0_RST_HW_N, 0x608, 11, 11),
DEF_RST_MON(R9A09G011_SYC_RST_N, 0x610, 9, 13),
+ DEF_RST_MON(R9A09G011_WDT0_PRESETN, 0x614, 12, 19),
};
static const unsigned int r9a09g011_crit_mod_clks[] __initconst = {
#include "rcar-gen4-cpg.h"
#include "rcar-cpg-lib.h"
-static const struct rcar_gen4_cpg_pll_config *cpg_pll_config __initconst;
+static const struct rcar_gen4_cpg_pll_config *cpg_pll_config __initdata;
static unsigned int cpg_clk_extalr __initdata;
static u32 cpg_mode __initdata;
s8 monbit = info->resets[id].monbit;
if (info->has_clk_mon_regs) {
- return !(readl(priv->base + CLK_MRST_R(reg)) & bitmask);
+ return !!(readl(priv->base + CLK_MRST_R(reg)) & bitmask);
} else if (monbit >= 0) {
u32 monbitmask = BIT(monbit);
if (!reset_data)
return -ENOMEM;
+ spin_lock_init(&reset_data->lock);
reset_data->membase = base;
reset_data->rcdev.owner = THIS_MODULE;
reset_data->rcdev.ops = &stm32_reset_ops;
&r_apb2_rsb_clk.common,
&r_apb1_ir_clk.common,
&r_apb1_w1_clk.common,
+ &r_apb1_rtc_clk.common,
&ir_clk.common,
&w1_clk.common,
};
};
static const char * const omap4_func_dmic_abe_gfclk_parents[] __initconst = {
- "abe_cm:clk:0018:26",
+ "abe-clkctrl:0018:26",
"pad_clks_ck",
"slimbus_clk",
NULL,
};
static const char * const omap4_func_mcasp_abe_gfclk_parents[] __initconst = {
- "abe_cm:clk:0020:26",
+ "abe-clkctrl:0020:26",
"pad_clks_ck",
"slimbus_clk",
NULL,
};
static const char * const omap4_func_mcbsp1_gfclk_parents[] __initconst = {
- "abe_cm:clk:0028:26",
+ "abe-clkctrl:0028:26",
"pad_clks_ck",
"slimbus_clk",
NULL,
};
static const char * const omap4_func_mcbsp2_gfclk_parents[] __initconst = {
- "abe_cm:clk:0030:26",
+ "abe-clkctrl:0030:26",
"pad_clks_ck",
"slimbus_clk",
NULL,
};
static const char * const omap4_func_mcbsp3_gfclk_parents[] __initconst = {
- "abe_cm:clk:0038:26",
+ "abe-clkctrl:0038:26",
"pad_clks_ck",
"slimbus_clk",
NULL,
static const struct omap_clkctrl_reg_data omap4_abe_clkctrl_regs[] __initconst = {
{ OMAP4_L4_ABE_CLKCTRL, NULL, 0, "ocp_abe_iclk" },
- { OMAP4_AESS_CLKCTRL, omap4_aess_bit_data, CLKF_SW_SUP, "abe_cm:clk:0008:24" },
+ { OMAP4_AESS_CLKCTRL, omap4_aess_bit_data, CLKF_SW_SUP, "abe-clkctrl:0008:24" },
{ OMAP4_MCPDM_CLKCTRL, NULL, CLKF_SW_SUP, "pad_clks_ck" },
- { OMAP4_DMIC_CLKCTRL, omap4_dmic_bit_data, CLKF_SW_SUP, "abe_cm:clk:0018:24" },
- { OMAP4_MCASP_CLKCTRL, omap4_mcasp_bit_data, CLKF_SW_SUP, "abe_cm:clk:0020:24" },
- { OMAP4_MCBSP1_CLKCTRL, omap4_mcbsp1_bit_data, CLKF_SW_SUP, "abe_cm:clk:0028:24" },
- { OMAP4_MCBSP2_CLKCTRL, omap4_mcbsp2_bit_data, CLKF_SW_SUP, "abe_cm:clk:0030:24" },
- { OMAP4_MCBSP3_CLKCTRL, omap4_mcbsp3_bit_data, CLKF_SW_SUP, "abe_cm:clk:0038:24" },
- { OMAP4_SLIMBUS1_CLKCTRL, omap4_slimbus1_bit_data, CLKF_SW_SUP, "abe_cm:clk:0040:8" },
- { OMAP4_TIMER5_CLKCTRL, omap4_timer5_bit_data, CLKF_SW_SUP, "abe_cm:clk:0048:24" },
- { OMAP4_TIMER6_CLKCTRL, omap4_timer6_bit_data, CLKF_SW_SUP, "abe_cm:clk:0050:24" },
- { OMAP4_TIMER7_CLKCTRL, omap4_timer7_bit_data, CLKF_SW_SUP, "abe_cm:clk:0058:24" },
- { OMAP4_TIMER8_CLKCTRL, omap4_timer8_bit_data, CLKF_SW_SUP, "abe_cm:clk:0060:24" },
+ { OMAP4_DMIC_CLKCTRL, omap4_dmic_bit_data, CLKF_SW_SUP, "abe-clkctrl:0018:24" },
+ { OMAP4_MCASP_CLKCTRL, omap4_mcasp_bit_data, CLKF_SW_SUP, "abe-clkctrl:0020:24" },
+ { OMAP4_MCBSP1_CLKCTRL, omap4_mcbsp1_bit_data, CLKF_SW_SUP, "abe-clkctrl:0028:24" },
+ { OMAP4_MCBSP2_CLKCTRL, omap4_mcbsp2_bit_data, CLKF_SW_SUP, "abe-clkctrl:0030:24" },
+ { OMAP4_MCBSP3_CLKCTRL, omap4_mcbsp3_bit_data, CLKF_SW_SUP, "abe-clkctrl:0038:24" },
+ { OMAP4_SLIMBUS1_CLKCTRL, omap4_slimbus1_bit_data, CLKF_SW_SUP, "abe-clkctrl:0040:8" },
+ { OMAP4_TIMER5_CLKCTRL, omap4_timer5_bit_data, CLKF_SW_SUP, "abe-clkctrl:0048:24" },
+ { OMAP4_TIMER6_CLKCTRL, omap4_timer6_bit_data, CLKF_SW_SUP, "abe-clkctrl:0050:24" },
+ { OMAP4_TIMER7_CLKCTRL, omap4_timer7_bit_data, CLKF_SW_SUP, "abe-clkctrl:0058:24" },
+ { OMAP4_TIMER8_CLKCTRL, omap4_timer8_bit_data, CLKF_SW_SUP, "abe-clkctrl:0060:24" },
{ OMAP4_WD_TIMER3_CLKCTRL, NULL, CLKF_SW_SUP, "sys_32k_ck" },
{ 0 },
};
static const struct omap_clkctrl_reg_data omap4_iss_clkctrl_regs[] __initconst = {
{ OMAP4_ISS_CLKCTRL, omap4_iss_bit_data, CLKF_SW_SUP, "ducati_clk_mux_ck" },
- { OMAP4_FDIF_CLKCTRL, omap4_fdif_bit_data, CLKF_SW_SUP, "iss_cm:clk:0008:24" },
+ { OMAP4_FDIF_CLKCTRL, omap4_fdif_bit_data, CLKF_SW_SUP, "iss-clkctrl:0008:24" },
{ 0 },
};
};
static const struct omap_clkctrl_reg_data omap4_l3_dss_clkctrl_regs[] __initconst = {
- { OMAP4_DSS_CORE_CLKCTRL, omap4_dss_core_bit_data, CLKF_SW_SUP, "l3_dss_cm:clk:0000:8" },
+ { OMAP4_DSS_CORE_CLKCTRL, omap4_dss_core_bit_data, CLKF_SW_SUP, "l3-dss-clkctrl:0000:8" },
{ 0 },
};
};
static const struct omap_clkctrl_reg_data omap4_l3_gfx_clkctrl_regs[] __initconst = {
- { OMAP4_GPU_CLKCTRL, omap4_gpu_bit_data, CLKF_SW_SUP, "l3_gfx_cm:clk:0000:24" },
+ { OMAP4_GPU_CLKCTRL, omap4_gpu_bit_data, CLKF_SW_SUP, "l3-gfx-clkctrl:0000:24" },
{ 0 },
};
};
static const char * const omap4_usb_host_hs_utmi_p1_clk_parents[] __initconst = {
- "l3_init_cm:clk:0038:24",
+ "l3-init-clkctrl:0038:24",
NULL,
};
static const char * const omap4_usb_host_hs_utmi_p2_clk_parents[] __initconst = {
- "l3_init_cm:clk:0038:25",
+ "l3-init-clkctrl:0038:25",
NULL,
};
};
static const char * const omap4_usb_otg_hs_xclk_parents[] __initconst = {
- "l3_init_cm:clk:0040:24",
+ "l3-init-clkctrl:0040:24",
NULL,
};
};
static const struct omap_clkctrl_reg_data omap4_l3_init_clkctrl_regs[] __initconst = {
- { OMAP4_MMC1_CLKCTRL, omap4_mmc1_bit_data, CLKF_SW_SUP, "l3_init_cm:clk:0008:24" },
- { OMAP4_MMC2_CLKCTRL, omap4_mmc2_bit_data, CLKF_SW_SUP, "l3_init_cm:clk:0010:24" },
- { OMAP4_HSI_CLKCTRL, omap4_hsi_bit_data, CLKF_HW_SUP, "l3_init_cm:clk:0018:24" },
+ { OMAP4_MMC1_CLKCTRL, omap4_mmc1_bit_data, CLKF_SW_SUP, "l3-init-clkctrl:0008:24" },
+ { OMAP4_MMC2_CLKCTRL, omap4_mmc2_bit_data, CLKF_SW_SUP, "l3-init-clkctrl:0010:24" },
+ { OMAP4_HSI_CLKCTRL, omap4_hsi_bit_data, CLKF_HW_SUP, "l3-init-clkctrl:0018:24" },
{ OMAP4_USB_HOST_HS_CLKCTRL, omap4_usb_host_hs_bit_data, CLKF_SW_SUP, "init_60m_fclk" },
{ OMAP4_USB_OTG_HS_CLKCTRL, omap4_usb_otg_hs_bit_data, CLKF_HW_SUP, "l3_div_ck" },
{ OMAP4_USB_TLL_HS_CLKCTRL, omap4_usb_tll_hs_bit_data, CLKF_HW_SUP, "l4_div_ck" },
{ OMAP4_USB_HOST_FS_CLKCTRL, NULL, CLKF_SW_SUP, "func_48mc_fclk" },
- { OMAP4_OCP2SCP_USB_PHY_CLKCTRL, omap4_ocp2scp_usb_phy_bit_data, CLKF_HW_SUP, "l3_init_cm:clk:00c0:8" },
+ { OMAP4_OCP2SCP_USB_PHY_CLKCTRL, omap4_ocp2scp_usb_phy_bit_data, CLKF_HW_SUP, "l3-init-clkctrl:00c0:8" },
{ 0 },
};
};
static const char * const omap4_per_mcbsp4_gfclk_parents[] __initconst = {
- "l4_per_cm:clk:00c0:26",
+ "l4-per-clkctrl:00c0:26",
"pad_clks_ck",
NULL,
};
};
static const struct omap_clkctrl_reg_data omap4_l4_per_clkctrl_regs[] __initconst = {
- { OMAP4_TIMER10_CLKCTRL, omap4_timer10_bit_data, CLKF_SW_SUP, "l4_per_cm:clk:0008:24" },
- { OMAP4_TIMER11_CLKCTRL, omap4_timer11_bit_data, CLKF_SW_SUP, "l4_per_cm:clk:0010:24" },
- { OMAP4_TIMER2_CLKCTRL, omap4_timer2_bit_data, CLKF_SW_SUP, "l4_per_cm:clk:0018:24" },
- { OMAP4_TIMER3_CLKCTRL, omap4_timer3_bit_data, CLKF_SW_SUP, "l4_per_cm:clk:0020:24" },
- { OMAP4_TIMER4_CLKCTRL, omap4_timer4_bit_data, CLKF_SW_SUP, "l4_per_cm:clk:0028:24" },
- { OMAP4_TIMER9_CLKCTRL, omap4_timer9_bit_data, CLKF_SW_SUP, "l4_per_cm:clk:0030:24" },
+ { OMAP4_TIMER10_CLKCTRL, omap4_timer10_bit_data, CLKF_SW_SUP, "l4-per-clkctrl:0008:24" },
+ { OMAP4_TIMER11_CLKCTRL, omap4_timer11_bit_data, CLKF_SW_SUP, "l4-per-clkctrl:0010:24" },
+ { OMAP4_TIMER2_CLKCTRL, omap4_timer2_bit_data, CLKF_SW_SUP, "l4-per-clkctrl:0018:24" },
+ { OMAP4_TIMER3_CLKCTRL, omap4_timer3_bit_data, CLKF_SW_SUP, "l4-per-clkctrl:0020:24" },
+ { OMAP4_TIMER4_CLKCTRL, omap4_timer4_bit_data, CLKF_SW_SUP, "l4-per-clkctrl:0028:24" },
+ { OMAP4_TIMER9_CLKCTRL, omap4_timer9_bit_data, CLKF_SW_SUP, "l4-per-clkctrl:0030:24" },
{ OMAP4_ELM_CLKCTRL, NULL, 0, "l4_div_ck" },
{ OMAP4_GPIO2_CLKCTRL, omap4_gpio2_bit_data, CLKF_HW_SUP, "l4_div_ck" },
{ OMAP4_GPIO3_CLKCTRL, omap4_gpio3_bit_data, CLKF_HW_SUP, "l4_div_ck" },
{ OMAP4_I2C3_CLKCTRL, NULL, CLKF_SW_SUP, "func_96m_fclk" },
{ OMAP4_I2C4_CLKCTRL, NULL, CLKF_SW_SUP, "func_96m_fclk" },
{ OMAP4_L4_PER_CLKCTRL, NULL, 0, "l4_div_ck" },
- { OMAP4_MCBSP4_CLKCTRL, omap4_mcbsp4_bit_data, CLKF_SW_SUP, "l4_per_cm:clk:00c0:24" },
+ { OMAP4_MCBSP4_CLKCTRL, omap4_mcbsp4_bit_data, CLKF_SW_SUP, "l4-per-clkctrl:00c0:24" },
{ OMAP4_MCSPI1_CLKCTRL, NULL, CLKF_SW_SUP, "func_48m_fclk" },
{ OMAP4_MCSPI2_CLKCTRL, NULL, CLKF_SW_SUP, "func_48m_fclk" },
{ OMAP4_MCSPI3_CLKCTRL, NULL, CLKF_SW_SUP, "func_48m_fclk" },
{ OMAP4_MCSPI4_CLKCTRL, NULL, CLKF_SW_SUP, "func_48m_fclk" },
{ OMAP4_MMC3_CLKCTRL, NULL, CLKF_SW_SUP, "func_48m_fclk" },
{ OMAP4_MMC4_CLKCTRL, NULL, CLKF_SW_SUP, "func_48m_fclk" },
- { OMAP4_SLIMBUS2_CLKCTRL, omap4_slimbus2_bit_data, CLKF_SW_SUP, "l4_per_cm:clk:0118:8" },
+ { OMAP4_SLIMBUS2_CLKCTRL, omap4_slimbus2_bit_data, CLKF_SW_SUP, "l4-per-clkctrl:0118:8" },
{ OMAP4_UART1_CLKCTRL, NULL, CLKF_SW_SUP, "func_48m_fclk" },
{ OMAP4_UART2_CLKCTRL, NULL, CLKF_SW_SUP, "func_48m_fclk" },
{ OMAP4_UART3_CLKCTRL, NULL, CLKF_SW_SUP, "func_48m_fclk" },
{ OMAP4_L4_WKUP_CLKCTRL, NULL, 0, "l4_wkup_clk_mux_ck" },
{ OMAP4_WD_TIMER2_CLKCTRL, NULL, CLKF_SW_SUP, "sys_32k_ck" },
{ OMAP4_GPIO1_CLKCTRL, omap4_gpio1_bit_data, CLKF_HW_SUP, "l4_wkup_clk_mux_ck" },
- { OMAP4_TIMER1_CLKCTRL, omap4_timer1_bit_data, CLKF_SW_SUP, "l4_wkup_cm:clk:0020:24" },
+ { OMAP4_TIMER1_CLKCTRL, omap4_timer1_bit_data, CLKF_SW_SUP, "l4-wkup-clkctrl:0020:24" },
{ OMAP4_COUNTER_32K_CLKCTRL, NULL, 0, "sys_32k_ck" },
{ OMAP4_KBD_CLKCTRL, NULL, CLKF_SW_SUP, "sys_32k_ck" },
{ 0 },
};
static const char * const omap4_trace_clk_div_div_ck_parents[] __initconst = {
- "emu_sys_cm:clk:0000:22",
+ "emu-sys-clkctrl:0000:22",
NULL,
};
};
static const char * const omap4_stm_clk_div_ck_parents[] __initconst = {
- "emu_sys_cm:clk:0000:20",
+ "emu-sys-clkctrl:0000:20",
NULL,
};
* hwmod support. Once hwmod is removed, these can be removed
* also.
*/
- DT_CLK(NULL, "aess_fclk", "abe_cm:0008:24"),
- DT_CLK(NULL, "cm2_dm10_mux", "l4_per_cm:0008:24"),
- DT_CLK(NULL, "cm2_dm11_mux", "l4_per_cm:0010:24"),
- DT_CLK(NULL, "cm2_dm2_mux", "l4_per_cm:0018:24"),
- DT_CLK(NULL, "cm2_dm3_mux", "l4_per_cm:0020:24"),
- DT_CLK(NULL, "cm2_dm4_mux", "l4_per_cm:0028:24"),
- DT_CLK(NULL, "cm2_dm9_mux", "l4_per_cm:0030:24"),
- DT_CLK(NULL, "dmic_sync_mux_ck", "abe_cm:0018:26"),
- DT_CLK(NULL, "dmt1_clk_mux", "l4_wkup_cm:0020:24"),
- DT_CLK(NULL, "dss_48mhz_clk", "l3_dss_cm:0000:9"),
- DT_CLK(NULL, "dss_dss_clk", "l3_dss_cm:0000:8"),
- DT_CLK(NULL, "dss_sys_clk", "l3_dss_cm:0000:10"),
- DT_CLK(NULL, "dss_tv_clk", "l3_dss_cm:0000:11"),
- DT_CLK(NULL, "fdif_fck", "iss_cm:0008:24"),
- DT_CLK(NULL, "func_dmic_abe_gfclk", "abe_cm:0018:24"),
- DT_CLK(NULL, "func_mcasp_abe_gfclk", "abe_cm:0020:24"),
- DT_CLK(NULL, "func_mcbsp1_gfclk", "abe_cm:0028:24"),
- DT_CLK(NULL, "func_mcbsp2_gfclk", "abe_cm:0030:24"),
- DT_CLK(NULL, "func_mcbsp3_gfclk", "abe_cm:0038:24"),
- DT_CLK(NULL, "gpio1_dbclk", "l4_wkup_cm:0018:8"),
- DT_CLK(NULL, "gpio2_dbclk", "l4_per_cm:0040:8"),
- DT_CLK(NULL, "gpio3_dbclk", "l4_per_cm:0048:8"),
- DT_CLK(NULL, "gpio4_dbclk", "l4_per_cm:0050:8"),
- DT_CLK(NULL, "gpio5_dbclk", "l4_per_cm:0058:8"),
- DT_CLK(NULL, "gpio6_dbclk", "l4_per_cm:0060:8"),
- DT_CLK(NULL, "hsi_fck", "l3_init_cm:0018:24"),
- DT_CLK(NULL, "hsmmc1_fclk", "l3_init_cm:0008:24"),
- DT_CLK(NULL, "hsmmc2_fclk", "l3_init_cm:0010:24"),
- DT_CLK(NULL, "iss_ctrlclk", "iss_cm:0000:8"),
- DT_CLK(NULL, "mcasp_sync_mux_ck", "abe_cm:0020:26"),
- DT_CLK(NULL, "mcbsp1_sync_mux_ck", "abe_cm:0028:26"),
- DT_CLK(NULL, "mcbsp2_sync_mux_ck", "abe_cm:0030:26"),
- DT_CLK(NULL, "mcbsp3_sync_mux_ck", "abe_cm:0038:26"),
- DT_CLK(NULL, "mcbsp4_sync_mux_ck", "l4_per_cm:00c0:26"),
- DT_CLK(NULL, "ocp2scp_usb_phy_phy_48m", "l3_init_cm:00c0:8"),
- DT_CLK(NULL, "otg_60m_gfclk", "l3_init_cm:0040:24"),
- DT_CLK(NULL, "per_mcbsp4_gfclk", "l4_per_cm:00c0:24"),
- DT_CLK(NULL, "pmd_stm_clock_mux_ck", "emu_sys_cm:0000:20"),
- DT_CLK(NULL, "pmd_trace_clk_mux_ck", "emu_sys_cm:0000:22"),
- DT_CLK(NULL, "sgx_clk_mux", "l3_gfx_cm:0000:24"),
- DT_CLK(NULL, "slimbus1_fclk_0", "abe_cm:0040:8"),
- DT_CLK(NULL, "slimbus1_fclk_1", "abe_cm:0040:9"),
- DT_CLK(NULL, "slimbus1_fclk_2", "abe_cm:0040:10"),
- DT_CLK(NULL, "slimbus1_slimbus_clk", "abe_cm:0040:11"),
- DT_CLK(NULL, "slimbus2_fclk_0", "l4_per_cm:0118:8"),
- DT_CLK(NULL, "slimbus2_fclk_1", "l4_per_cm:0118:9"),
- DT_CLK(NULL, "slimbus2_slimbus_clk", "l4_per_cm:0118:10"),
- DT_CLK(NULL, "stm_clk_div_ck", "emu_sys_cm:0000:27"),
- DT_CLK(NULL, "timer5_sync_mux", "abe_cm:0048:24"),
- DT_CLK(NULL, "timer6_sync_mux", "abe_cm:0050:24"),
- DT_CLK(NULL, "timer7_sync_mux", "abe_cm:0058:24"),
- DT_CLK(NULL, "timer8_sync_mux", "abe_cm:0060:24"),
- DT_CLK(NULL, "trace_clk_div_div_ck", "emu_sys_cm:0000:24"),
- DT_CLK(NULL, "usb_host_hs_func48mclk", "l3_init_cm:0038:15"),
- DT_CLK(NULL, "usb_host_hs_hsic480m_p1_clk", "l3_init_cm:0038:13"),
- DT_CLK(NULL, "usb_host_hs_hsic480m_p2_clk", "l3_init_cm:0038:14"),
- DT_CLK(NULL, "usb_host_hs_hsic60m_p1_clk", "l3_init_cm:0038:11"),
- DT_CLK(NULL, "usb_host_hs_hsic60m_p2_clk", "l3_init_cm:0038:12"),
- DT_CLK(NULL, "usb_host_hs_utmi_p1_clk", "l3_init_cm:0038:8"),
- DT_CLK(NULL, "usb_host_hs_utmi_p2_clk", "l3_init_cm:0038:9"),
- DT_CLK(NULL, "usb_host_hs_utmi_p3_clk", "l3_init_cm:0038:10"),
- DT_CLK(NULL, "usb_otg_hs_xclk", "l3_init_cm:0040:8"),
- DT_CLK(NULL, "usb_tll_hs_usb_ch0_clk", "l3_init_cm:0048:8"),
- DT_CLK(NULL, "usb_tll_hs_usb_ch1_clk", "l3_init_cm:0048:9"),
- DT_CLK(NULL, "usb_tll_hs_usb_ch2_clk", "l3_init_cm:0048:10"),
- DT_CLK(NULL, "utmi_p1_gfclk", "l3_init_cm:0038:24"),
- DT_CLK(NULL, "utmi_p2_gfclk", "l3_init_cm:0038:25"),
+ DT_CLK(NULL, "aess_fclk", "abe-clkctrl:0008:24"),
+ DT_CLK(NULL, "cm2_dm10_mux", "l4-per-clkctrl:0008:24"),
+ DT_CLK(NULL, "cm2_dm11_mux", "l4-per-clkctrl:0010:24"),
+ DT_CLK(NULL, "cm2_dm2_mux", "l4-per-clkctrl:0018:24"),
+ DT_CLK(NULL, "cm2_dm3_mux", "l4-per-clkctrl:0020:24"),
+ DT_CLK(NULL, "cm2_dm4_mux", "l4-per-clkctrl:0028:24"),
+ DT_CLK(NULL, "cm2_dm9_mux", "l4-per-clkctrl:0030:24"),
+ DT_CLK(NULL, "dmic_sync_mux_ck", "abe-clkctrl:0018:26"),
+ DT_CLK(NULL, "dmt1_clk_mux", "l4-wkup-clkctrl:0020:24"),
+ DT_CLK(NULL, "dss_48mhz_clk", "l3-dss-clkctrl:0000:9"),
+ DT_CLK(NULL, "dss_dss_clk", "l3-dss-clkctrl:0000:8"),
+ DT_CLK(NULL, "dss_sys_clk", "l3-dss-clkctrl:0000:10"),
+ DT_CLK(NULL, "dss_tv_clk", "l3-dss-clkctrl:0000:11"),
+ DT_CLK(NULL, "fdif_fck", "iss-clkctrl:0008:24"),
+ DT_CLK(NULL, "func_dmic_abe_gfclk", "abe-clkctrl:0018:24"),
+ DT_CLK(NULL, "func_mcasp_abe_gfclk", "abe-clkctrl:0020:24"),
+ DT_CLK(NULL, "func_mcbsp1_gfclk", "abe-clkctrl:0028:24"),
+ DT_CLK(NULL, "func_mcbsp2_gfclk", "abe-clkctrl:0030:24"),
+ DT_CLK(NULL, "func_mcbsp3_gfclk", "abe-clkctrl:0038:24"),
+ DT_CLK(NULL, "gpio1_dbclk", "l4-wkup-clkctrl:0018:8"),
+ DT_CLK(NULL, "gpio2_dbclk", "l4-per-clkctrl:0040:8"),
+ DT_CLK(NULL, "gpio3_dbclk", "l4-per-clkctrl:0048:8"),
+ DT_CLK(NULL, "gpio4_dbclk", "l4-per-clkctrl:0050:8"),
+ DT_CLK(NULL, "gpio5_dbclk", "l4-per-clkctrl:0058:8"),
+ DT_CLK(NULL, "gpio6_dbclk", "l4-per-clkctrl:0060:8"),
+ DT_CLK(NULL, "hsi_fck", "l3-init-clkctrl:0018:24"),
+ DT_CLK(NULL, "hsmmc1_fclk", "l3-init-clkctrl:0008:24"),
+ DT_CLK(NULL, "hsmmc2_fclk", "l3-init-clkctrl:0010:24"),
+ DT_CLK(NULL, "iss_ctrlclk", "iss-clkctrl:0000:8"),
+ DT_CLK(NULL, "mcasp_sync_mux_ck", "abe-clkctrl:0020:26"),
+ DT_CLK(NULL, "mcbsp1_sync_mux_ck", "abe-clkctrl:0028:26"),
+ DT_CLK(NULL, "mcbsp2_sync_mux_ck", "abe-clkctrl:0030:26"),
+ DT_CLK(NULL, "mcbsp3_sync_mux_ck", "abe-clkctrl:0038:26"),
+ DT_CLK(NULL, "mcbsp4_sync_mux_ck", "l4-per-clkctrl:00c0:26"),
+ DT_CLK(NULL, "ocp2scp_usb_phy_phy_48m", "l3-init-clkctrl:00c0:8"),
+ DT_CLK(NULL, "otg_60m_gfclk", "l3-init-clkctrl:0040:24"),
+ DT_CLK(NULL, "per_mcbsp4_gfclk", "l4-per-clkctrl:00c0:24"),
+ DT_CLK(NULL, "pmd_stm_clock_mux_ck", "emu-sys-clkctrl:0000:20"),
+ DT_CLK(NULL, "pmd_trace_clk_mux_ck", "emu-sys-clkctrl:0000:22"),
+ DT_CLK(NULL, "sgx_clk_mux", "l3-gfx-clkctrl:0000:24"),
+ DT_CLK(NULL, "slimbus1_fclk_0", "abe-clkctrl:0040:8"),
+ DT_CLK(NULL, "slimbus1_fclk_1", "abe-clkctrl:0040:9"),
+ DT_CLK(NULL, "slimbus1_fclk_2", "abe-clkctrl:0040:10"),
+ DT_CLK(NULL, "slimbus1_slimbus_clk", "abe-clkctrl:0040:11"),
+ DT_CLK(NULL, "slimbus2_fclk_0", "l4-per-clkctrl:0118:8"),
+ DT_CLK(NULL, "slimbus2_fclk_1", "l4-per-clkctrl:0118:9"),
+ DT_CLK(NULL, "slimbus2_slimbus_clk", "l4-per-clkctrl:0118:10"),
+ DT_CLK(NULL, "stm_clk_div_ck", "emu-sys-clkctrl:0000:27"),
+ DT_CLK(NULL, "timer5_sync_mux", "abe-clkctrl:0048:24"),
+ DT_CLK(NULL, "timer6_sync_mux", "abe-clkctrl:0050:24"),
+ DT_CLK(NULL, "timer7_sync_mux", "abe-clkctrl:0058:24"),
+ DT_CLK(NULL, "timer8_sync_mux", "abe-clkctrl:0060:24"),
+ DT_CLK(NULL, "trace_clk_div_div_ck", "emu-sys-clkctrl:0000:24"),
+ DT_CLK(NULL, "usb_host_hs_func48mclk", "l3-init-clkctrl:0038:15"),
+ DT_CLK(NULL, "usb_host_hs_hsic480m_p1_clk", "l3-init-clkctrl:0038:13"),
+ DT_CLK(NULL, "usb_host_hs_hsic480m_p2_clk", "l3-init-clkctrl:0038:14"),
+ DT_CLK(NULL, "usb_host_hs_hsic60m_p1_clk", "l3-init-clkctrl:0038:11"),
+ DT_CLK(NULL, "usb_host_hs_hsic60m_p2_clk", "l3-init-clkctrl:0038:12"),
+ DT_CLK(NULL, "usb_host_hs_utmi_p1_clk", "l3-init-clkctrl:0038:8"),
+ DT_CLK(NULL, "usb_host_hs_utmi_p2_clk", "l3-init-clkctrl:0038:9"),
+ DT_CLK(NULL, "usb_host_hs_utmi_p3_clk", "l3_init-clkctrl:0038:10"),
+ DT_CLK(NULL, "usb_otg_hs_xclk", "l3-init-clkctrl:0040:8"),
+ DT_CLK(NULL, "usb_tll_hs_usb_ch0_clk", "l3-init-clkctrl:0048:8"),
+ DT_CLK(NULL, "usb_tll_hs_usb_ch1_clk", "l3-init-clkctrl:0048:9"),
+ DT_CLK(NULL, "usb_tll_hs_usb_ch2_clk", "l3-init-clkctrl:0048:10"),
+ DT_CLK(NULL, "utmi_p1_gfclk", "l3-init-clkctrl:0038:24"),
+ DT_CLK(NULL, "utmi_p2_gfclk", "l3-init-clkctrl:0038:25"),
{ .node_name = NULL },
};
};
static const char * const omap5_dmic_gfclk_parents[] __initconst = {
- "abe_cm:clk:0018:26",
+ "abe-clkctrl:0018:26",
"pad_clks_ck",
"slimbus_clk",
NULL,
};
static const char * const omap5_mcbsp1_gfclk_parents[] __initconst = {
- "abe_cm:clk:0028:26",
+ "abe-clkctrl:0028:26",
"pad_clks_ck",
"slimbus_clk",
NULL,
};
static const char * const omap5_mcbsp2_gfclk_parents[] __initconst = {
- "abe_cm:clk:0030:26",
+ "abe-clkctrl:0030:26",
"pad_clks_ck",
"slimbus_clk",
NULL,
};
static const char * const omap5_mcbsp3_gfclk_parents[] __initconst = {
- "abe_cm:clk:0038:26",
+ "abe-clkctrl:0038:26",
"pad_clks_ck",
"slimbus_clk",
NULL,
static const struct omap_clkctrl_reg_data omap5_abe_clkctrl_regs[] __initconst = {
{ OMAP5_L4_ABE_CLKCTRL, NULL, 0, "abe_iclk" },
- { OMAP5_AESS_CLKCTRL, omap5_aess_bit_data, CLKF_SW_SUP, "abe_cm:clk:0008:24" },
+ { OMAP5_AESS_CLKCTRL, omap5_aess_bit_data, CLKF_SW_SUP, "abe-clkctrl:0008:24" },
{ OMAP5_MCPDM_CLKCTRL, NULL, CLKF_SW_SUP, "pad_clks_ck" },
- { OMAP5_DMIC_CLKCTRL, omap5_dmic_bit_data, CLKF_SW_SUP, "abe_cm:clk:0018:24" },
- { OMAP5_MCBSP1_CLKCTRL, omap5_mcbsp1_bit_data, CLKF_SW_SUP, "abe_cm:clk:0028:24" },
- { OMAP5_MCBSP2_CLKCTRL, omap5_mcbsp2_bit_data, CLKF_SW_SUP, "abe_cm:clk:0030:24" },
- { OMAP5_MCBSP3_CLKCTRL, omap5_mcbsp3_bit_data, CLKF_SW_SUP, "abe_cm:clk:0038:24" },
- { OMAP5_TIMER5_CLKCTRL, omap5_timer5_bit_data, CLKF_SW_SUP, "abe_cm:clk:0048:24" },
- { OMAP5_TIMER6_CLKCTRL, omap5_timer6_bit_data, CLKF_SW_SUP, "abe_cm:clk:0050:24" },
- { OMAP5_TIMER7_CLKCTRL, omap5_timer7_bit_data, CLKF_SW_SUP, "abe_cm:clk:0058:24" },
- { OMAP5_TIMER8_CLKCTRL, omap5_timer8_bit_data, CLKF_SW_SUP, "abe_cm:clk:0060:24" },
+ { OMAP5_DMIC_CLKCTRL, omap5_dmic_bit_data, CLKF_SW_SUP, "abe-clkctrl:0018:24" },
+ { OMAP5_MCBSP1_CLKCTRL, omap5_mcbsp1_bit_data, CLKF_SW_SUP, "abe-clkctrl:0028:24" },
+ { OMAP5_MCBSP2_CLKCTRL, omap5_mcbsp2_bit_data, CLKF_SW_SUP, "abe-clkctrl:0030:24" },
+ { OMAP5_MCBSP3_CLKCTRL, omap5_mcbsp3_bit_data, CLKF_SW_SUP, "abe-clkctrl:0038:24" },
+ { OMAP5_TIMER5_CLKCTRL, omap5_timer5_bit_data, CLKF_SW_SUP, "abe-clkctrl:0048:24" },
+ { OMAP5_TIMER6_CLKCTRL, omap5_timer6_bit_data, CLKF_SW_SUP, "abe-clkctrl:0050:24" },
+ { OMAP5_TIMER7_CLKCTRL, omap5_timer7_bit_data, CLKF_SW_SUP, "abe-clkctrl:0058:24" },
+ { OMAP5_TIMER8_CLKCTRL, omap5_timer8_bit_data, CLKF_SW_SUP, "abe-clkctrl:0060:24" },
{ 0 },
};
};
static const struct omap_clkctrl_reg_data omap5_l4per_clkctrl_regs[] __initconst = {
- { OMAP5_TIMER10_CLKCTRL, omap5_timer10_bit_data, CLKF_SW_SUP, "l4per_cm:clk:0008:24" },
- { OMAP5_TIMER11_CLKCTRL, omap5_timer11_bit_data, CLKF_SW_SUP, "l4per_cm:clk:0010:24" },
- { OMAP5_TIMER2_CLKCTRL, omap5_timer2_bit_data, CLKF_SW_SUP, "l4per_cm:clk:0018:24" },
- { OMAP5_TIMER3_CLKCTRL, omap5_timer3_bit_data, CLKF_SW_SUP, "l4per_cm:clk:0020:24" },
- { OMAP5_TIMER4_CLKCTRL, omap5_timer4_bit_data, CLKF_SW_SUP, "l4per_cm:clk:0028:24" },
- { OMAP5_TIMER9_CLKCTRL, omap5_timer9_bit_data, CLKF_SW_SUP, "l4per_cm:clk:0030:24" },
+ { OMAP5_TIMER10_CLKCTRL, omap5_timer10_bit_data, CLKF_SW_SUP, "l4per-clkctrl:0008:24" },
+ { OMAP5_TIMER11_CLKCTRL, omap5_timer11_bit_data, CLKF_SW_SUP, "l4per-clkctrl:0010:24" },
+ { OMAP5_TIMER2_CLKCTRL, omap5_timer2_bit_data, CLKF_SW_SUP, "l4per-clkctrl:0018:24" },
+ { OMAP5_TIMER3_CLKCTRL, omap5_timer3_bit_data, CLKF_SW_SUP, "l4per-clkctrl:0020:24" },
+ { OMAP5_TIMER4_CLKCTRL, omap5_timer4_bit_data, CLKF_SW_SUP, "l4per-clkctrl:0028:24" },
+ { OMAP5_TIMER9_CLKCTRL, omap5_timer9_bit_data, CLKF_SW_SUP, "l4per-clkctrl:0030:24" },
{ OMAP5_GPIO2_CLKCTRL, omap5_gpio2_bit_data, CLKF_HW_SUP, "l4_root_clk_div" },
{ OMAP5_GPIO3_CLKCTRL, omap5_gpio3_bit_data, CLKF_HW_SUP, "l4_root_clk_div" },
{ OMAP5_GPIO4_CLKCTRL, omap5_gpio4_bit_data, CLKF_HW_SUP, "l4_root_clk_div" },
};
static const struct omap_clkctrl_reg_data omap5_dss_clkctrl_regs[] __initconst = {
- { OMAP5_DSS_CORE_CLKCTRL, omap5_dss_core_bit_data, CLKF_SW_SUP, "dss_cm:clk:0000:8" },
+ { OMAP5_DSS_CORE_CLKCTRL, omap5_dss_core_bit_data, CLKF_SW_SUP, "dss-clkctrl:0000:8" },
{ 0 },
};
};
static const struct omap_clkctrl_reg_data omap5_gpu_clkctrl_regs[] __initconst = {
- { OMAP5_GPU_CLKCTRL, omap5_gpu_core_bit_data, CLKF_SW_SUP, "gpu_cm:clk:0000:24" },
+ { OMAP5_GPU_CLKCTRL, omap5_gpu_core_bit_data, CLKF_SW_SUP, "gpu-clkctrl:0000:24" },
{ 0 },
};
};
static const char * const omap5_mmc1_fclk_parents[] __initconst = {
- "l3init_cm:clk:0008:24",
+ "l3init-clkctrl:0008:24",
NULL,
};
};
static const char * const omap5_mmc2_fclk_parents[] __initconst = {
- "l3init_cm:clk:0010:24",
+ "l3init-clkctrl:0010:24",
NULL,
};
};
static const char * const omap5_usb_host_hs_utmi_p1_clk_parents[] __initconst = {
- "l3init_cm:clk:0038:24",
+ "l3init-clkctrl:0038:24",
NULL,
};
static const char * const omap5_usb_host_hs_utmi_p2_clk_parents[] __initconst = {
- "l3init_cm:clk:0038:25",
+ "l3init-clkctrl:0038:25",
NULL,
};
};
static const struct omap_clkctrl_reg_data omap5_l3init_clkctrl_regs[] __initconst = {
- { OMAP5_MMC1_CLKCTRL, omap5_mmc1_bit_data, CLKF_SW_SUP, "l3init_cm:clk:0008:25" },
- { OMAP5_MMC2_CLKCTRL, omap5_mmc2_bit_data, CLKF_SW_SUP, "l3init_cm:clk:0010:25" },
+ { OMAP5_MMC1_CLKCTRL, omap5_mmc1_bit_data, CLKF_SW_SUP, "l3init-clkctrl:0008:25" },
+ { OMAP5_MMC2_CLKCTRL, omap5_mmc2_bit_data, CLKF_SW_SUP, "l3init-clkctrl:0010:25" },
{ OMAP5_USB_HOST_HS_CLKCTRL, omap5_usb_host_hs_bit_data, CLKF_SW_SUP, "l3init_60m_fclk" },
{ OMAP5_USB_TLL_HS_CLKCTRL, omap5_usb_tll_hs_bit_data, CLKF_HW_SUP, "l4_root_clk_div" },
{ OMAP5_SATA_CLKCTRL, omap5_sata_bit_data, CLKF_SW_SUP, "func_48m_fclk" },
{ OMAP5_L4_WKUP_CLKCTRL, NULL, 0, "wkupaon_iclk_mux" },
{ OMAP5_WD_TIMER2_CLKCTRL, NULL, CLKF_SW_SUP, "sys_32k_ck" },
{ OMAP5_GPIO1_CLKCTRL, omap5_gpio1_bit_data, CLKF_HW_SUP, "wkupaon_iclk_mux" },
- { OMAP5_TIMER1_CLKCTRL, omap5_timer1_bit_data, CLKF_SW_SUP, "wkupaon_cm:clk:0020:24" },
+ { OMAP5_TIMER1_CLKCTRL, omap5_timer1_bit_data, CLKF_SW_SUP, "wkupaon-clkctrl:0020:24" },
{ OMAP5_COUNTER_32K_CLKCTRL, NULL, 0, "wkupaon_iclk_mux" },
{ OMAP5_KBD_CLKCTRL, NULL, CLKF_SW_SUP, "sys_32k_ck" },
{ 0 },
static struct ti_dt_clk omap54xx_clks[] = {
DT_CLK(NULL, "timer_32k_ck", "sys_32k_ck"),
DT_CLK(NULL, "sys_clkin_ck", "sys_clkin"),
- DT_CLK(NULL, "dmic_gfclk", "abe_cm:0018:24"),
- DT_CLK(NULL, "dmic_sync_mux_ck", "abe_cm:0018:26"),
- DT_CLK(NULL, "dss_32khz_clk", "dss_cm:0000:11"),
- DT_CLK(NULL, "dss_48mhz_clk", "dss_cm:0000:9"),
- DT_CLK(NULL, "dss_dss_clk", "dss_cm:0000:8"),
- DT_CLK(NULL, "dss_sys_clk", "dss_cm:0000:10"),
- DT_CLK(NULL, "gpio1_dbclk", "wkupaon_cm:0018:8"),
- DT_CLK(NULL, "gpio2_dbclk", "l4per_cm:0040:8"),
- DT_CLK(NULL, "gpio3_dbclk", "l4per_cm:0048:8"),
- DT_CLK(NULL, "gpio4_dbclk", "l4per_cm:0050:8"),
- DT_CLK(NULL, "gpio5_dbclk", "l4per_cm:0058:8"),
- DT_CLK(NULL, "gpio6_dbclk", "l4per_cm:0060:8"),
- DT_CLK(NULL, "gpio7_dbclk", "l4per_cm:00f0:8"),
- DT_CLK(NULL, "gpio8_dbclk", "l4per_cm:00f8:8"),
- DT_CLK(NULL, "mcbsp1_gfclk", "abe_cm:0028:24"),
- DT_CLK(NULL, "mcbsp1_sync_mux_ck", "abe_cm:0028:26"),
- DT_CLK(NULL, "mcbsp2_gfclk", "abe_cm:0030:24"),
- DT_CLK(NULL, "mcbsp2_sync_mux_ck", "abe_cm:0030:26"),
- DT_CLK(NULL, "mcbsp3_gfclk", "abe_cm:0038:24"),
- DT_CLK(NULL, "mcbsp3_sync_mux_ck", "abe_cm:0038:26"),
- DT_CLK(NULL, "mmc1_32khz_clk", "l3init_cm:0008:8"),
- DT_CLK(NULL, "mmc1_fclk", "l3init_cm:0008:25"),
- DT_CLK(NULL, "mmc1_fclk_mux", "l3init_cm:0008:24"),
- DT_CLK(NULL, "mmc2_fclk", "l3init_cm:0010:25"),
- DT_CLK(NULL, "mmc2_fclk_mux", "l3init_cm:0010:24"),
- DT_CLK(NULL, "sata_ref_clk", "l3init_cm:0068:8"),
- DT_CLK(NULL, "timer10_gfclk_mux", "l4per_cm:0008:24"),
- DT_CLK(NULL, "timer11_gfclk_mux", "l4per_cm:0010:24"),
- DT_CLK(NULL, "timer1_gfclk_mux", "wkupaon_cm:0020:24"),
- DT_CLK(NULL, "timer2_gfclk_mux", "l4per_cm:0018:24"),
- DT_CLK(NULL, "timer3_gfclk_mux", "l4per_cm:0020:24"),
- DT_CLK(NULL, "timer4_gfclk_mux", "l4per_cm:0028:24"),
- DT_CLK(NULL, "timer5_gfclk_mux", "abe_cm:0048:24"),
- DT_CLK(NULL, "timer6_gfclk_mux", "abe_cm:0050:24"),
- DT_CLK(NULL, "timer7_gfclk_mux", "abe_cm:0058:24"),
- DT_CLK(NULL, "timer8_gfclk_mux", "abe_cm:0060:24"),
- DT_CLK(NULL, "timer9_gfclk_mux", "l4per_cm:0030:24"),
- DT_CLK(NULL, "usb_host_hs_hsic480m_p1_clk", "l3init_cm:0038:13"),
- DT_CLK(NULL, "usb_host_hs_hsic480m_p2_clk", "l3init_cm:0038:14"),
- DT_CLK(NULL, "usb_host_hs_hsic480m_p3_clk", "l3init_cm:0038:7"),
- DT_CLK(NULL, "usb_host_hs_hsic60m_p1_clk", "l3init_cm:0038:11"),
- DT_CLK(NULL, "usb_host_hs_hsic60m_p2_clk", "l3init_cm:0038:12"),
- DT_CLK(NULL, "usb_host_hs_hsic60m_p3_clk", "l3init_cm:0038:6"),
- DT_CLK(NULL, "usb_host_hs_utmi_p1_clk", "l3init_cm:0038:8"),
- DT_CLK(NULL, "usb_host_hs_utmi_p2_clk", "l3init_cm:0038:9"),
- DT_CLK(NULL, "usb_host_hs_utmi_p3_clk", "l3init_cm:0038:10"),
- DT_CLK(NULL, "usb_otg_ss_refclk960m", "l3init_cm:00d0:8"),
- DT_CLK(NULL, "usb_tll_hs_usb_ch0_clk", "l3init_cm:0048:8"),
- DT_CLK(NULL, "usb_tll_hs_usb_ch1_clk", "l3init_cm:0048:9"),
- DT_CLK(NULL, "usb_tll_hs_usb_ch2_clk", "l3init_cm:0048:10"),
- DT_CLK(NULL, "utmi_p1_gfclk", "l3init_cm:0038:24"),
- DT_CLK(NULL, "utmi_p2_gfclk", "l3init_cm:0038:25"),
+ DT_CLK(NULL, "dmic_gfclk", "abe-clkctrl:0018:24"),
+ DT_CLK(NULL, "dmic_sync_mux_ck", "abe-clkctrl:0018:26"),
+ DT_CLK(NULL, "dss_32khz_clk", "dss-clkctrl:0000:11"),
+ DT_CLK(NULL, "dss_48mhz_clk", "dss-clkctrl:0000:9"),
+ DT_CLK(NULL, "dss_dss_clk", "dss-clkctrl:0000:8"),
+ DT_CLK(NULL, "dss_sys_clk", "dss-clkctrl:0000:10"),
+ DT_CLK(NULL, "gpio1_dbclk", "wkupaon-clkctrl:0018:8"),
+ DT_CLK(NULL, "gpio2_dbclk", "l4per-clkctrl:0040:8"),
+ DT_CLK(NULL, "gpio3_dbclk", "l4per-clkctrl:0048:8"),
+ DT_CLK(NULL, "gpio4_dbclk", "l4per-clkctrl:0050:8"),
+ DT_CLK(NULL, "gpio5_dbclk", "l4per-clkctrl:0058:8"),
+ DT_CLK(NULL, "gpio6_dbclk", "l4per-clkctrl:0060:8"),
+ DT_CLK(NULL, "gpio7_dbclk", "l4per-clkctrl:00f0:8"),
+ DT_CLK(NULL, "gpio8_dbclk", "l4per-clkctrl:00f8:8"),
+ DT_CLK(NULL, "mcbsp1_gfclk", "abe-clkctrl:0028:24"),
+ DT_CLK(NULL, "mcbsp1_sync_mux_ck", "abe-clkctrl:0028:26"),
+ DT_CLK(NULL, "mcbsp2_gfclk", "abe-clkctrl:0030:24"),
+ DT_CLK(NULL, "mcbsp2_sync_mux_ck", "abe-clkctrl:0030:26"),
+ DT_CLK(NULL, "mcbsp3_gfclk", "abe-clkctrl:0038:24"),
+ DT_CLK(NULL, "mcbsp3_sync_mux_ck", "abe-clkctrl:0038:26"),
+ DT_CLK(NULL, "mmc1_32khz_clk", "l3init-clkctrl:0008:8"),
+ DT_CLK(NULL, "mmc1_fclk", "l3init-clkctrl:0008:25"),
+ DT_CLK(NULL, "mmc1_fclk_mux", "l3init-clkctrl:0008:24"),
+ DT_CLK(NULL, "mmc2_fclk", "l3init-clkctrl:0010:25"),
+ DT_CLK(NULL, "mmc2_fclk_mux", "l3init-clkctrl:0010:24"),
+ DT_CLK(NULL, "sata_ref_clk", "l3init-clkctrl:0068:8"),
+ DT_CLK(NULL, "timer10_gfclk_mux", "l4per-clkctrl:0008:24"),
+ DT_CLK(NULL, "timer11_gfclk_mux", "l4per-clkctrl:0010:24"),
+ DT_CLK(NULL, "timer1_gfclk_mux", "wkupaon-clkctrl:0020:24"),
+ DT_CLK(NULL, "timer2_gfclk_mux", "l4per-clkctrl:0018:24"),
+ DT_CLK(NULL, "timer3_gfclk_mux", "l4per-clkctrl:0020:24"),
+ DT_CLK(NULL, "timer4_gfclk_mux", "l4per-clkctrl:0028:24"),
+ DT_CLK(NULL, "timer5_gfclk_mux", "abe-clkctrl:0048:24"),
+ DT_CLK(NULL, "timer6_gfclk_mux", "abe-clkctrl:0050:24"),
+ DT_CLK(NULL, "timer7_gfclk_mux", "abe-clkctrl:0058:24"),
+ DT_CLK(NULL, "timer8_gfclk_mux", "abe-clkctrl:0060:24"),
+ DT_CLK(NULL, "timer9_gfclk_mux", "l4per-clkctrl:0030:24"),
+ DT_CLK(NULL, "usb_host_hs_hsic480m_p1_clk", "l3init-clkctrl:0038:13"),
+ DT_CLK(NULL, "usb_host_hs_hsic480m_p2_clk", "l3init-clkctrl:0038:14"),
+ DT_CLK(NULL, "usb_host_hs_hsic480m_p3_clk", "l3init-clkctrl:0038:7"),
+ DT_CLK(NULL, "usb_host_hs_hsic60m_p1_clk", "l3init-clkctrl:0038:11"),
+ DT_CLK(NULL, "usb_host_hs_hsic60m_p2_clk", "l3init-clkctrl:0038:12"),
+ DT_CLK(NULL, "usb_host_hs_hsic60m_p3_clk", "l3init-clkctrl:0038:6"),
+ DT_CLK(NULL, "usb_host_hs_utmi_p1_clk", "l3init-clkctrl:0038:8"),
+ DT_CLK(NULL, "usb_host_hs_utmi_p2_clk", "l3init-clkctrl:0038:9"),
+ DT_CLK(NULL, "usb_host_hs_utmi_p3_clk", "l3init-clkctrl:0038:10"),
+ DT_CLK(NULL, "usb_otg_ss_refclk960m", "l3init-clkctrl:00d0:8"),
+ DT_CLK(NULL, "usb_tll_hs_usb_ch0_clk", "l3init-clkctrl:0048:8"),
+ DT_CLK(NULL, "usb_tll_hs_usb_ch1_clk", "l3init-clkctrl:0048:9"),
+ DT_CLK(NULL, "usb_tll_hs_usb_ch2_clk", "l3init-clkctrl:0048:10"),
+ DT_CLK(NULL, "utmi_p1_gfclk", "l3init-clkctrl:0038:24"),
+ DT_CLK(NULL, "utmi_p2_gfclk", "l3init-clkctrl:0038:25"),
{ .node_name = NULL },
};
char *c;
u16 soc_mask = 0;
- if (!(ti_clk_get_features()->flags & TI_CLK_CLKCTRL_COMPAT) &&
- of_node_name_eq(node, "clk"))
- ti_clk_features.flags |= TI_CLK_CLKCTRL_COMPAT;
-
addrp = of_get_address(node, 0, NULL, NULL);
addr = (u32)of_translate_address(node, addrp);
hv_sched_clock_offset = hv_read_reference_counter();
hv_setup_sched_clock(read_hv_sched_clock_msr);
}
-EXPORT_SYMBOL_GPL(hv_init_clocksource);
if (!devpriv->usb_rx_buf)
return -ENOMEM;
- size = max(usb_endpoint_maxp(devpriv->ep_rx), MIN_BUF_SIZE);
+ size = max(usb_endpoint_maxp(devpriv->ep_tx), MIN_BUF_SIZE);
devpriv->usb_tx_buf = kzalloc(size, GFP_KERNEL);
if (!devpriv->usb_tx_buf)
return -ENOMEM;
return 0;
}
+static int amd_pstate_cpu_resume(struct cpufreq_policy *policy)
+{
+ int ret;
+
+ ret = amd_pstate_enable(true);
+ if (ret)
+ pr_err("failed to enable amd-pstate during resume, return %d\n", ret);
+
+ return ret;
+}
+
+static int amd_pstate_cpu_suspend(struct cpufreq_policy *policy)
+{
+ int ret;
+
+ ret = amd_pstate_enable(false);
+ if (ret)
+ pr_err("failed to disable amd-pstate during suspend, return %d\n", ret);
+
+ return ret;
+}
+
/* Sysfs attributes */
/*
.target = amd_pstate_target,
.init = amd_pstate_cpu_init,
.exit = amd_pstate_cpu_exit,
+ .suspend = amd_pstate_cpu_suspend,
+ .resume = amd_pstate_cpu_resume,
.set_boost = amd_pstate_set_boost,
.name = "amd-pstate",
.attr = amd_pstate_attr,
{ .compatible = "mediatek,mt8173", },
{ .compatible = "mediatek,mt8176", },
{ .compatible = "mediatek,mt8183", },
+ { .compatible = "mediatek,mt8186", },
{ .compatible = "mediatek,mt8365", },
{ .compatible = "mediatek,mt8516", },
/* Both presence and absence of sram regulator are valid cases. */
info->sram_reg = regulator_get_optional(cpu_dev, "sram");
- if (IS_ERR(info->sram_reg))
+ if (IS_ERR(info->sram_reg)) {
+ ret = PTR_ERR(info->sram_reg);
+ if (ret == -EPROBE_DEFER)
+ goto out_free_resources;
+
info->sram_reg = NULL;
- else {
+ } else {
ret = regulator_enable(info->sram_reg);
if (ret) {
dev_warn(cpu_dev, "cpu%d: failed to enable vsram\n", cpu);
if (slew_done_gpio_np)
slew_done_gpio = read_gpio(slew_done_gpio_np);
+ of_node_put(volt_gpio_np);
+ of_node_put(freq_gpio_np);
+ of_node_put(slew_done_gpio_np);
+
/* If we use the frequency GPIOs, calculate the min/max speeds based
* on the bus frequencies
*/
struct platform_device *pdev = cpufreq_get_driver_data();
int ret;
+ if (data->throttle_irq <= 0)
+ return 0;
+
ret = irq_set_affinity_hint(data->throttle_irq, policy->cpus);
if (ret)
dev_err(&pdev->dev, "Failed to set CPU affinity of %s[%d]\n",
static void qcom_cpufreq_hw_lmh_exit(struct qcom_cpufreq_data *data)
{
+ if (data->throttle_irq <= 0)
+ return;
+
free_irq(data->throttle_irq, data);
}
np = of_find_matching_node(NULL, qoriq_cpufreq_blacklist);
if (np) {
+ of_node_put(np);
dev_info(&pdev->dev, "Disabling due to erratum A-008083");
return -ENODEV;
}
Select this option if you want to use the paes cipher
for example to use protected key encrypted devices.
-config CRYPTO_SHA1_S390
- tristate "SHA1 digest algorithm"
- depends on S390
- select CRYPTO_HASH
- help
- This is the s390 hardware accelerated implementation of the
- SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2).
-
- It is available as of z990.
-
-config CRYPTO_SHA256_S390
- tristate "SHA256 digest algorithm"
- depends on S390
- select CRYPTO_HASH
- help
- This is the s390 hardware accelerated implementation of the
- SHA256 secure hash standard (DFIPS 180-2).
-
- It is available as of z9.
-
-config CRYPTO_SHA512_S390
- tristate "SHA384 and SHA512 digest algorithm"
- depends on S390
- select CRYPTO_HASH
- help
- This is the s390 hardware accelerated implementation of the
- SHA512 secure hash standard.
-
- It is available as of z10.
-
-config CRYPTO_SHA3_256_S390
- tristate "SHA3_224 and SHA3_256 digest algorithm"
- depends on S390
- select CRYPTO_HASH
- help
- This is the s390 hardware accelerated implementation of the
- SHA3_256 secure hash standard.
-
- It is available as of z14.
-
-config CRYPTO_SHA3_512_S390
- tristate "SHA3_384 and SHA3_512 digest algorithm"
- depends on S390
- select CRYPTO_HASH
- help
- This is the s390 hardware accelerated implementation of the
- SHA3_512 secure hash standard.
-
- It is available as of z14.
-
-config CRYPTO_DES_S390
- tristate "DES and Triple DES cipher algorithms"
- depends on S390
- select CRYPTO_ALGAPI
- select CRYPTO_SKCIPHER
- select CRYPTO_LIB_DES
- help
- This is the s390 hardware accelerated implementation of the
- DES cipher algorithm (FIPS 46-2), and Triple DES EDE (FIPS 46-3).
-
- As of z990 the ECB and CBC mode are hardware accelerated.
- As of z196 the CTR mode is hardware accelerated.
-
-config CRYPTO_AES_S390
- tristate "AES cipher algorithms"
- depends on S390
- select CRYPTO_ALGAPI
- select CRYPTO_SKCIPHER
- help
- This is the s390 hardware accelerated implementation of the
- AES cipher algorithms (FIPS-197).
-
- As of z9 the ECB and CBC modes are hardware accelerated
- for 128 bit keys.
- As of z10 the ECB and CBC modes are hardware accelerated
- for all AES key sizes.
- As of z196 the CTR mode is hardware accelerated for all AES
- key sizes and XTS mode is hardware accelerated for 256 and
- 512 bit keys.
-
-config CRYPTO_CHACHA_S390
- tristate "ChaCha20 stream cipher"
- depends on S390
- select CRYPTO_SKCIPHER
- select CRYPTO_LIB_CHACHA_GENERIC
- select CRYPTO_ARCH_HAVE_LIB_CHACHA
- help
- This is the s390 SIMD implementation of the ChaCha20 stream
- cipher (RFC 7539).
-
- It is available as of z13.
-
config S390_PRNG
tristate "Pseudo random number generator device driver"
depends on S390
It is available as of z9.
-config CRYPTO_GHASH_S390
- tristate "GHASH hash function"
- depends on S390
- select CRYPTO_HASH
- help
- This is the s390 hardware accelerated implementation of GHASH,
- the hash function used in GCM (Galois/Counter mode).
-
- It is available as of z196.
-
-config CRYPTO_CRC32_S390
- tristate "CRC-32 algorithms"
- depends on S390
- select CRYPTO_HASH
- select CRC32
- help
- Select this option if you want to use hardware accelerated
- implementations of CRC algorithms. With this option, you
- can optimize the computation of CRC-32 (IEEE 802.3 Ethernet)
- and CRC-32C (Castagnoli).
-
- It is available with IBM z13 or later.
-
config CRYPTO_DEV_NIAGARA2
tristate "Niagara2 Stream Processing Unit driver"
select CRYPTO_LIB_DES
struct sp_platform *sp_platform = sp->dev_specific;
struct device *dev = sp->dev;
struct platform_device *pdev = to_platform_device(dev);
- unsigned int i, count;
int ret;
- for (i = 0, count = 0; i < pdev->num_resources; i++) {
- struct resource *res = &pdev->resource[i];
-
- if (resource_type(res) == IORESOURCE_IRQ)
- count++;
- }
-
- sp_platform->irq_count = count;
+ sp_platform->irq_count = platform_irq_count(pdev);
ret = platform_get_irq(pdev, 0);
if (ret < 0) {
}
sp->psp_irq = ret;
- if (count == 1) {
+ if (sp_platform->irq_count == 1) {
sp->ccp_irq = ret;
} else {
ret = platform_get_irq(pdev, 1);
else
cxld->target_type = CXL_DECODER_ACCELERATOR;
- if (is_cxl_endpoint(to_cxl_port(cxld->dev.parent)))
+ if (is_endpoint_decoder(&cxld->dev))
return 0;
target_list.value =
return -EBUSY;
/* Check the input buffer is the expected size */
- if (info->size_in != send_cmd->in.size)
+ if ((info->size_in != CXL_VARIABLE_PAYLOAD) &&
+ (info->size_in != send_cmd->in.size))
return -ENOMEM;
/* Check the output buffer is at least large enough */
- if (send_cmd->out.size < info->size_out)
+ if ((info->size_out != CXL_VARIABLE_PAYLOAD) &&
+ (send_cmd->out.size < info->size_out))
return -ENOMEM;
*mem_cmd = (struct cxl_mem_command) {
.groups = cxl_decoder_root_attribute_groups,
};
-static bool is_endpoint_decoder(struct device *dev)
+bool is_endpoint_decoder(struct device *dev)
{
return dev->type == &cxl_decoder_endpoint_type;
}
struct cxl_decoder *to_cxl_decoder(struct device *dev);
bool is_root_decoder(struct device *dev);
+bool is_endpoint_decoder(struct device *dev);
bool is_cxl_decoder(struct device *dev);
struct cxl_decoder *cxl_root_decoder_alloc(struct cxl_port *port,
unsigned int nr_targets);
} __packed;
struct cxl_mbox_get_lsa {
- u32 offset;
- u32 length;
+ __le32 offset;
+ __le32 length;
} __packed;
struct cxl_mbox_set_lsa {
- u32 offset;
- u32 reserved;
+ __le32 offset;
+ __le32 reserved;
u8 data[];
} __packed;
{
struct cxl_dev_state *cxlds = cxlmd->cxlds;
struct cxl_port *endpoint;
+ int rc;
endpoint = devm_cxl_add_port(&parent_port->dev, &cxlmd->dev,
cxlds->component_reg_phys, parent_port);
dev_dbg(&cxlmd->dev, "add: %s\n", dev_name(&endpoint->dev));
+ rc = cxl_endpoint_autoremove(cxlmd, endpoint);
+ if (rc)
+ return rc;
+
if (!endpoint->dev.driver) {
dev_err(&cxlmd->dev, "%s failed probe\n",
dev_name(&endpoint->dev));
return -ENXIO;
}
- return cxl_endpoint_autoremove(cxlmd, endpoint);
+ return 0;
}
static void enable_suspend(void *data)
return -EINVAL;
get_lsa = (struct cxl_mbox_get_lsa) {
- .offset = cmd->in_offset,
- .length = cmd->in_length,
+ .offset = cpu_to_le32(cmd->in_offset),
+ .length = cpu_to_le32(cmd->in_length),
};
rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_LSA, &get_lsa,
return -ENOMEM;
*set_lsa = (struct cxl_mbox_set_lsa) {
- .offset = cmd->in_offset,
+ .offset = cpu_to_le32(cmd->in_offset),
};
memcpy(set_lsa->data, cmd->in_buf, cmd->in_length);
unsigned long *min_freq,
unsigned long *max_freq)
{
- unsigned long *freq_table = devfreq->profile->freq_table;
+ unsigned long *freq_table = devfreq->freq_table;
s32 qos_min_freq, qos_max_freq;
lockdep_assert_held(&devfreq->lock);
* The devfreq drivers can initialize this in either ascending or
* descending order and devfreq core supports both.
*/
- if (freq_table[0] < freq_table[devfreq->profile->max_state - 1]) {
+ if (freq_table[0] < freq_table[devfreq->max_state - 1]) {
*min_freq = freq_table[0];
- *max_freq = freq_table[devfreq->profile->max_state - 1];
+ *max_freq = freq_table[devfreq->max_state - 1];
} else {
- *min_freq = freq_table[devfreq->profile->max_state - 1];
+ *min_freq = freq_table[devfreq->max_state - 1];
*max_freq = freq_table[0];
}
{
int lev;
- for (lev = 0; lev < devfreq->profile->max_state; lev++)
- if (freq == devfreq->profile->freq_table[lev])
+ for (lev = 0; lev < devfreq->max_state; lev++)
+ if (freq == devfreq->freq_table[lev])
return lev;
return -EINVAL;
static int set_freq_table(struct devfreq *devfreq)
{
- struct devfreq_dev_profile *profile = devfreq->profile;
struct dev_pm_opp *opp;
unsigned long freq;
int i, count;
if (count <= 0)
return -EINVAL;
- profile->max_state = count;
- profile->freq_table = devm_kcalloc(devfreq->dev.parent,
- profile->max_state,
- sizeof(*profile->freq_table),
- GFP_KERNEL);
- if (!profile->freq_table) {
- profile->max_state = 0;
+ devfreq->max_state = count;
+ devfreq->freq_table = devm_kcalloc(devfreq->dev.parent,
+ devfreq->max_state,
+ sizeof(*devfreq->freq_table),
+ GFP_KERNEL);
+ if (!devfreq->freq_table)
return -ENOMEM;
- }
- for (i = 0, freq = 0; i < profile->max_state; i++, freq++) {
+ for (i = 0, freq = 0; i < devfreq->max_state; i++, freq++) {
opp = dev_pm_opp_find_freq_ceil(devfreq->dev.parent, &freq);
if (IS_ERR(opp)) {
- devm_kfree(devfreq->dev.parent, profile->freq_table);
- profile->max_state = 0;
+ devm_kfree(devfreq->dev.parent, devfreq->freq_table);
return PTR_ERR(opp);
}
dev_pm_opp_put(opp);
- profile->freq_table[i] = freq;
+ devfreq->freq_table[i] = freq;
}
return 0;
if (lev != prev_lev) {
devfreq->stats.trans_table[
- (prev_lev * devfreq->profile->max_state) + lev]++;
+ (prev_lev * devfreq->max_state) + lev]++;
devfreq->stats.total_trans++;
}
if (err < 0)
goto err_dev;
mutex_lock(&devfreq->lock);
+ } else {
+ devfreq->freq_table = devfreq->profile->freq_table;
+ devfreq->max_state = devfreq->profile->max_state;
}
devfreq->scaling_min_freq = find_available_min_freq(devfreq);
devfreq->stats.trans_table = devm_kzalloc(&devfreq->dev,
array3_size(sizeof(unsigned int),
- devfreq->profile->max_state,
- devfreq->profile->max_state),
+ devfreq->max_state,
+ devfreq->max_state),
GFP_KERNEL);
if (!devfreq->stats.trans_table) {
mutex_unlock(&devfreq->lock);
}
devfreq->stats.time_in_state = devm_kcalloc(&devfreq->dev,
- devfreq->profile->max_state,
+ devfreq->max_state,
sizeof(*devfreq->stats.time_in_state),
GFP_KERNEL);
if (!devfreq->stats.time_in_state) {
err = devfreq->governor->event_handler(devfreq, DEVFREQ_GOV_START,
NULL);
if (err) {
- dev_err(dev, "%s: Unable to start governor for the device\n",
- __func__);
+ dev_err_probe(dev, err,
+ "%s: Unable to start governor for the device\n",
+ __func__);
goto err_init;
}
create_sysfs_files(devfreq, devfreq->governor);
mutex_lock(&df->lock);
- for (i = 0; i < df->profile->max_state; i++)
+ for (i = 0; i < df->max_state; i++)
count += scnprintf(&buf[count], (PAGE_SIZE - count - 2),
- "%lu ", df->profile->freq_table[i]);
+ "%lu ", df->freq_table[i]);
mutex_unlock(&df->lock);
/* Truncate the trailing space */
if (!df->profile)
return -EINVAL;
- max_state = df->profile->max_state;
+ max_state = df->max_state;
if (max_state == 0)
return sprintf(buf, "Not Supported.\n");
len += sprintf(buf + len, " :");
for (i = 0; i < max_state; i++)
len += sprintf(buf + len, "%10lu",
- df->profile->freq_table[i]);
+ df->freq_table[i]);
len += sprintf(buf + len, " time(ms)\n");
for (i = 0; i < max_state; i++) {
- if (df->profile->freq_table[i]
- == df->previous_freq) {
+ if (df->freq_table[i] == df->previous_freq)
len += sprintf(buf + len, "*");
- } else {
+ else
len += sprintf(buf + len, " ");
- }
- len += sprintf(buf + len, "%10lu:",
- df->profile->freq_table[i]);
+
+ len += sprintf(buf + len, "%10lu:", df->freq_table[i]);
for (j = 0; j < max_state; j++)
len += sprintf(buf + len, "%10u",
df->stats.trans_table[(i * max_state) + j]);
if (!df->profile)
return -EINVAL;
- if (df->profile->max_state == 0)
+ if (df->max_state == 0)
return count;
err = kstrtoint(buf, 10, &value);
return -EINVAL;
mutex_lock(&df->lock);
- memset(df->stats.time_in_state, 0, (df->profile->max_state *
+ memset(df->stats.time_in_state, 0, (df->max_state *
sizeof(*df->stats.time_in_state)));
memset(df->stats.trans_table, 0, array3_size(sizeof(unsigned int),
- df->profile->max_state,
- df->profile->max_state));
+ df->max_state,
+ df->max_state));
df->stats.total_trans = 0;
df->stats.last_update = get_jiffies_64();
mutex_unlock(&df->lock);
count = of_get_child_count(events_np);
desc = devm_kcalloc(dev, count, sizeof(*desc), GFP_KERNEL);
- if (!desc)
+ if (!desc) {
+ of_node_put(events_np);
return -ENOMEM;
+ }
info->num_events = count;
of_id = of_match_device(exynos_ppmu_id_match, dev);
if (of_id)
info->ppmu_type = (enum exynos_ppmu_type)of_id->data;
- else
+ else {
+ of_node_put(events_np);
return -EINVAL;
+ }
j = 0;
for_each_child_of_node(events_np, node) {
}
}
- max_state = bus->devfreq->profile->max_state;
- min_freq = (bus->devfreq->profile->freq_table[0] / 1000);
- max_freq = (bus->devfreq->profile->freq_table[max_state - 1] / 1000);
+ max_state = bus->devfreq->max_state;
+ min_freq = (bus->devfreq->freq_table[0] / 1000);
+ max_freq = (bus->devfreq->freq_table[max_state - 1] / 1000);
pr_info("exynos-bus: new bus device registered: %s (%6ld KHz ~ %6ld KHz)\n",
dev_name(dev), min_freq, max_freq);
- // SPDX-License-Identifier: GPL-2.0-only
+// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/drivers/devfreq/governor_passive.c
*
#include <linux/slab.h>
#include <linux/device.h>
#include <linux/devfreq.h>
+#include <linux/units.h>
#include "governor.h"
-#define HZ_PER_KHZ 1000
-
static struct devfreq_cpu_data *
get_parent_cpu_data(struct devfreq_passive_data *p_data,
struct cpufreq_policy *policy)
return NULL;
}
+static void delete_parent_cpu_data(struct devfreq_passive_data *p_data)
+{
+ struct devfreq_cpu_data *parent_cpu_data, *tmp;
+
+ list_for_each_entry_safe(parent_cpu_data, tmp, &p_data->cpu_data_list, node) {
+ list_del(&parent_cpu_data->node);
+
+ if (parent_cpu_data->opp_table)
+ dev_pm_opp_put_opp_table(parent_cpu_data->opp_table);
+
+ kfree(parent_cpu_data);
+ }
+}
+
static unsigned long get_target_freq_by_required_opp(struct device *p_dev,
struct opp_table *p_opp_table,
struct opp_table *opp_table,
goto out;
/* Use interpolation if required opps is not available */
- for (i = 0; i < parent_devfreq->profile->max_state; i++)
- if (parent_devfreq->profile->freq_table[i] == *freq)
+ for (i = 0; i < parent_devfreq->max_state; i++)
+ if (parent_devfreq->freq_table[i] == *freq)
break;
- if (i == parent_devfreq->profile->max_state)
+ if (i == parent_devfreq->max_state)
return -EINVAL;
- if (i < devfreq->profile->max_state) {
- child_freq = devfreq->profile->freq_table[i];
+ if (i < devfreq->max_state) {
+ child_freq = devfreq->freq_table[i];
} else {
- count = devfreq->profile->max_state;
- child_freq = devfreq->profile->freq_table[count - 1];
+ count = devfreq->max_state;
+ child_freq = devfreq->freq_table[count - 1];
}
out:
{
struct devfreq_passive_data *p_data
= (struct devfreq_passive_data *)devfreq->data;
- struct devfreq_cpu_data *parent_cpu_data;
- int cpu, ret = 0;
+ int ret;
if (p_data->nb.notifier_call) {
ret = cpufreq_unregister_notifier(&p_data->nb,
return ret;
}
- for_each_possible_cpu(cpu) {
- struct cpufreq_policy *policy = cpufreq_cpu_get(cpu);
- if (!policy) {
- ret = -EINVAL;
- continue;
- }
-
- parent_cpu_data = get_parent_cpu_data(p_data, policy);
- if (!parent_cpu_data) {
- cpufreq_cpu_put(policy);
- continue;
- }
+ delete_parent_cpu_data(p_data);
- list_del(&parent_cpu_data->node);
- if (parent_cpu_data->opp_table)
- dev_pm_opp_put_opp_table(parent_cpu_data->opp_table);
- kfree(parent_cpu_data);
- cpufreq_cpu_put(policy);
- }
-
- return ret;
+ return 0;
}
static int cpufreq_passive_register_notifier(struct devfreq *devfreq)
err_put_policy:
cpufreq_cpu_put(policy);
err:
- WARN_ON(cpufreq_passive_unregister_notifier(devfreq));
return ret;
}
if (!p_data)
return -EINVAL;
- if (!p_data->this)
- p_data->this = devfreq;
+ p_data->this = devfreq;
switch (event) {
case DEVFREQ_GOV_START:
if (old->context != context)
continue;
- dma_resv_list_set(list, i, replacement, usage);
+ dma_resv_list_set(list, i, dma_fence_get(replacement), usage);
dma_fence_put(old);
}
}
{
struct vm_area_struct *vma = vmf->vma;
struct udmabuf *ubuf = vma->vm_private_data;
+ pgoff_t pgoff = vmf->pgoff;
- vmf->page = ubuf->pages[vmf->pgoff];
+ if (pgoff >= ubuf->pagecount)
+ return VM_FAULT_SIGBUS;
+ vmf->page = ubuf->pages[pgoff];
get_page(vmf->page);
return 0;
}
for (i = 0; i < init_nr_desc_per_channel; i++) {
desc = at_xdmac_alloc_desc(chan, GFP_KERNEL);
if (!desc) {
+ if (i == 0) {
+ dev_warn(chan2dev(chan),
+ "can't allocate any descriptors\n");
+ return -EIO;
+ }
dev_warn(chan2dev(chan),
"only %d descriptors have been allocated\n", i);
break;
/*
* src and dst buffers are freed by ourselves below
*/
- if (params->polled) {
+ if (params->polled)
flags = DMA_CTRL_ACK;
- } else {
- if (dma_has_cap(DMA_INTERRUPT, dev->cap_mask)) {
- flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
- } else {
- pr_err("Channel does not support interrupt!\n");
- goto err_pq_array;
- }
- }
+ else
+ flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT;
ktime = ktime_get();
while (!(kthread_should_stop() ||
runtime = ktime_to_us(ktime);
ret = 0;
-err_pq_array:
kfree(dma_pq);
err_srcs_array:
kfree(srcs);
BIT(chan->id) << DMAC_CHAN_SUSP_WE_SHIFT;
axi_dma_iowrite32(chan->chip, DMAC_CHEN, val);
} else {
- val = BIT(chan->id) << DMAC_CHAN_SUSP2_SHIFT |
- BIT(chan->id) << DMAC_CHAN_SUSP2_WE_SHIFT;
+ val = axi_dma_ioread32(chan->chip, DMAC_CHSUSPREG);
+ val |= BIT(chan->id) << DMAC_CHAN_SUSP2_SHIFT |
+ BIT(chan->id) << DMAC_CHAN_SUSP2_WE_SHIFT;
axi_dma_iowrite32(chan->chip, DMAC_CHSUSPREG, val);
}
{
u32 val;
- val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
if (chan->chip->dw->hdata->reg_map_8_channels) {
+ val = axi_dma_ioread32(chan->chip, DMAC_CHEN);
val &= ~(BIT(chan->id) << DMAC_CHAN_SUSP_SHIFT);
val |= (BIT(chan->id) << DMAC_CHAN_SUSP_WE_SHIFT);
axi_dma_iowrite32(chan->chip, DMAC_CHEN, val);
} else {
+ val = axi_dma_ioread32(chan->chip, DMAC_CHSUSPREG);
val &= ~(BIT(chan->id) << DMAC_CHAN_SUSP2_SHIFT);
val |= (BIT(chan->id) << DMAC_CHAN_SUSP2_WE_SHIFT);
axi_dma_iowrite32(chan->chip, DMAC_CHSUSPREG, val);
struct idxd_wq *wq = idxd->wqs[i];
mutex_lock(&wq->wq_lock);
- if (wq->state == IDXD_WQ_ENABLED) {
- idxd_wq_disable_cleanup(wq);
- wq->state = IDXD_WQ_DISABLED;
- }
+ idxd_wq_disable_cleanup(wq);
idxd_wq_device_reset_cleanup(wq);
mutex_unlock(&wq->wq_lock);
}
dev_dbg(dev, "IDXD reset complete\n");
if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM) && sva) {
- if (iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA))
+ if (iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA)) {
dev_warn(dev, "Unable to turn on user SVA feature.\n");
- else
+ } else {
set_bit(IDXD_FLAG_USER_PASID_ENABLED, &idxd->flags);
- if (idxd_enable_system_pasid(idxd))
- dev_warn(dev, "No in-kernel DMA with PASID.\n");
- else
- set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags);
+ if (idxd_enable_system_pasid(idxd))
+ dev_warn(dev, "No in-kernel DMA with PASID.\n");
+ else
+ set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags);
+ }
} else if (!sva) {
dev_warn(dev, "User forced SVA off via module param.\n");
}
* SDMA stops cyclic channel when DMA request triggers a channel and no SDMA
* owned buffer is available (i.e. BD_DONE was set too late).
*/
- if (!is_sdma_channel_enabled(sdmac->sdma, sdmac->channel)) {
+ if (sdmac->desc && !is_sdma_channel_enabled(sdmac->sdma, sdmac->channel)) {
dev_warn(sdmac->sdma->dev, "restart cyclic channel %d\n", sdmac->channel);
sdma_enable_channel(sdmac->sdma, sdmac->channel);
}
#if IS_ENABLED(CONFIG_SOC_IMX6Q)
MODULE_FIRMWARE("imx/sdma/sdma-imx6q.bin");
#endif
-#if IS_ENABLED(CONFIG_SOC_IMX7D)
+#if IS_ENABLED(CONFIG_SOC_IMX7D) || IS_ENABLED(CONFIG_SOC_IMX8M)
MODULE_FIRMWARE("imx/sdma/sdma-imx7d.bin");
#endif
MODULE_LICENSE("GPL");
d->core_clk = devm_clk_get_optional(dev, NULL);
if (IS_ERR(d->core_clk))
return PTR_ERR(d->core_clk);
- clk_prepare_enable(d->core_clk);
d->rst = devm_reset_control_get_optional(dev, NULL);
if (IS_ERR(d->rst))
return PTR_ERR(d->rst);
+
+ clk_prepare_enable(d->core_clk);
reset_control_deassert(d->rst);
ret = devm_add_action_or_reset(dev, ldma_clk_disable, d);
/* If the DMAC pool is empty, alloc new */
if (!desc) {
- DEFINE_SPINLOCK(lock);
+ static DEFINE_SPINLOCK(lock);
LIST_HEAD(pool);
if (!add_desc(&pool, &lock, GFP_ATOMIC, 1))
return 0;
}
-static int bam_pm_runtime_get_sync(struct device *dev)
-{
- if (pm_runtime_enabled(dev))
- return pm_runtime_get_sync(dev);
-
- return 0;
-}
-
/**
* bam_free_chan - Frees dma resources associated with specific channel
* @chan: specified channel
unsigned long flags;
int ret;
- ret = bam_pm_runtime_get_sync(bdev->dev);
+ ret = pm_runtime_get_sync(bdev->dev);
if (ret < 0)
return;
unsigned long flag;
int ret;
- ret = bam_pm_runtime_get_sync(bdev->dev);
+ ret = pm_runtime_get_sync(bdev->dev);
if (ret < 0)
return ret;
unsigned long flag;
int ret;
- ret = bam_pm_runtime_get_sync(bdev->dev);
+ ret = pm_runtime_get_sync(bdev->dev);
if (ret < 0)
return ret;
if (srcs & P_IRQ)
tasklet_schedule(&bdev->task);
- ret = bam_pm_runtime_get_sync(bdev->dev);
+ ret = pm_runtime_get_sync(bdev->dev);
if (ret < 0)
return IRQ_NONE;
if (!vd)
return;
- ret = bam_pm_runtime_get_sync(bdev->dev);
+ ret = pm_runtime_get_sync(bdev->dev);
if (ret < 0)
return;
if (ret)
goto err_unregister_dma;
- if (!bdev->bamclk) {
- pm_runtime_disable(&pdev->dev);
- return 0;
- }
-
pm_runtime_irq_safe(&pdev->dev);
pm_runtime_set_autosuspend_delay(&pdev->dev, BAM_DMA_AUTOSUSPEND_DELAY);
pm_runtime_use_autosuspend(&pdev->dev);
{
struct bam_device *bdev = dev_get_drvdata(dev);
- if (bdev->bamclk) {
- pm_runtime_force_suspend(dev);
- clk_unprepare(bdev->bamclk);
- }
+ pm_runtime_force_suspend(dev);
+ clk_unprepare(bdev->bamclk);
return 0;
}
struct bam_device *bdev = dev_get_drvdata(dev);
int ret;
- if (bdev->bamclk) {
- ret = clk_prepare(bdev->bamclk);
- if (ret)
- return ret;
+ ret = clk_prepare(bdev->bamclk);
+ if (ret)
+ return ret;
- pm_runtime_force_resume(dev);
- }
+ pm_runtime_force_resume(dev);
return 0;
}
if (dma_spec->args[0] >= xbar->xbar_requests) {
dev_err(&pdev->dev, "Invalid XBAR request number: %d\n",
dma_spec->args[0]);
+ put_device(&pdev->dev);
return ERR_PTR(-EINVAL);
}
dma_spec->np = of_parse_phandle(ofdma->of_node, "dma-masters", 0);
if (!dma_spec->np) {
dev_err(&pdev->dev, "Can't get DMA master\n");
+ put_device(&pdev->dev);
return ERR_PTR(-EINVAL);
}
map = kzalloc(sizeof(*map), GFP_KERNEL);
if (!map) {
of_node_put(dma_spec->np);
+ put_device(&pdev->dev);
return ERR_PTR(-ENOMEM);
}
mutex_unlock(&xbar->mutex);
dev_err(&pdev->dev, "Run out of free DMA requests\n");
kfree(map);
+ of_node_put(dma_spec->np);
+ put_device(&pdev->dev);
return ERR_PTR(-ENOMEM);
}
set_bit(map->xbar_out, xbar->dma_inuse);
dmi_memdev_name(handle, &bank, &device);
- /* both strings must be non-zero */
- if (bank && *bank && device && *device)
- snprintf(dimm->label, sizeof(dimm->label), "%s %s", bank, device);
+ /*
+ * Set to a NULL string when both bank and device are zero. In this case,
+ * the label assigned by default will be preserved.
+ */
+ snprintf(dimm->label, sizeof(dimm->label), "%s%s%s",
+ (bank && *bank) ? bank : "",
+ (bank && *bank && device && *device) ? " " : "",
+ (device && *device) ? device : "");
}
static void assign_dmi_dimm_info(struct dimm_info *dimm, struct memdev_dmi_entry *entry)
memset(p, 0, sizeof(*p));
}
+static void enable_intr(struct synps_edac_priv *priv)
+{
+ /* Enable UE/CE Interrupts */
+ if (priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)
+ writel(DDR_UE_MASK | DDR_CE_MASK,
+ priv->baseaddr + ECC_CLR_OFST);
+ else
+ writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK,
+ priv->baseaddr + DDR_QOS_IRQ_EN_OFST);
+
+}
+
+static void disable_intr(struct synps_edac_priv *priv)
+{
+ /* Disable UE/CE Interrupts */
+ if (priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)
+ writel(0x0, priv->baseaddr + ECC_CLR_OFST);
+ else
+ writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK,
+ priv->baseaddr + DDR_QOS_IRQ_DB_OFST);
+}
+
/**
* intr_handler - Interrupt Handler for ECC interrupts.
* @irq: IRQ number.
/* v3.0 of the controller does not have this register */
if (!(priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR))
writel(regval, priv->baseaddr + DDR_QOS_IRQ_STAT_OFST);
+ else
+ enable_intr(priv);
+
return IRQ_HANDLED;
}
init_csrows(mci);
}
-static void enable_intr(struct synps_edac_priv *priv)
-{
- /* Enable UE/CE Interrupts */
- if (priv->p_data->quirks & DDR_ECC_INTR_SELF_CLEAR)
- writel(DDR_UE_MASK | DDR_CE_MASK,
- priv->baseaddr + ECC_CLR_OFST);
- else
- writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK,
- priv->baseaddr + DDR_QOS_IRQ_EN_OFST);
-
-}
-
-static void disable_intr(struct synps_edac_priv *priv)
-{
- /* Disable UE/CE Interrupts */
- writel(DDR_QOSUE_MASK | DDR_QOSCE_MASK,
- priv->baseaddr + DDR_QOS_IRQ_DB_OFST);
-}
-
static int setup_irq(struct mem_ctl_info *mci,
struct platform_device *pdev)
{
struct fw_cdev_get_cycle_timer2 *a = &arg->get_cycle_timer2;
struct fw_card *card = client->device->card;
struct timespec64 ts = {0, 0};
- u32 cycle_time;
+ u32 cycle_time = 0;
int ret = 0;
local_irq_disable();
struct fw_device *device = fw_device(dev->parent);
struct fw_unit *unit = fw_unit(dev);
- return snprintf(buf, PAGE_SIZE, "%d\n",
- (int)(unit->directory - device->config_rom));
+ return sysfs_emit(buf, "%td\n", unit->directory - device->config_rom);
}
static struct device_attribute fw_unit_attributes[] = {
int ret;
down_read(&fw_device_rwsem);
- ret = snprintf(buf, PAGE_SIZE, "0x%08x%08x\n",
- device->config_rom[3], device->config_rom[4]);
+ ret = sysfs_emit(buf, "0x%08x%08x\n", device->config_rom[3], device->config_rom[4]);
up_read(&fw_device_rwsem);
return ret;
struct scmi_msg_resp_base_discover_agent {
__le32 agent_id;
- u8 name[SCMI_MAX_STR_SIZE];
+ u8 name[SCMI_SHORT_NAME_MAX_SIZE];
};
ret = ph->xops->do_xfer(ph, t);
if (!ret)
- memcpy(vendor_id, t->rx.buf, size);
+ strscpy(vendor_id, t->rx.buf, size);
ph->xops->xfer_put(ph, t);
calc_list_sz = (1 + (loop_num_ret - 1) / sizeof(u32)) *
sizeof(u32);
if (calc_list_sz != real_list_sz) {
- dev_err(dev,
- "Malformed reply - real_sz:%zd calc_sz:%u\n",
- real_list_sz, calc_list_sz);
- ret = -EPROTO;
- break;
+ dev_warn(dev,
+ "Malformed reply - real_sz:%zd calc_sz:%u (loop_num_ret:%d)\n",
+ real_list_sz, calc_list_sz, loop_num_ret);
+ /*
+ * Bail out if the expected list size is bigger than the
+ * total payload size of the received reply.
+ */
+ if (calc_list_sz > real_list_sz) {
+ ret = -EPROTO;
+ break;
+ }
}
for (loop = 0; loop < loop_num_ret; loop++)
ret = ph->xops->do_xfer(ph, t);
if (!ret) {
agent_info = t->rx.buf;
- strlcpy(name, agent_info->name, SCMI_MAX_STR_SIZE);
+ strscpy(name, agent_info->name, SCMI_SHORT_NAME_MAX_SIZE);
}
ph->xops->xfer_put(ph, t);
int id, ret;
u8 *prot_imp;
u32 version;
- char name[SCMI_MAX_STR_SIZE];
+ char name[SCMI_SHORT_NAME_MAX_SIZE];
struct device *dev = ph->dev;
struct scmi_revision_info *rev = scmi_revision_area_get(ph);
return NULL;
}
- id = ida_simple_get(&scmi_bus_id, 1, 0, GFP_KERNEL);
+ id = ida_alloc_min(&scmi_bus_id, 1, GFP_KERNEL);
if (id < 0) {
kfree_const(scmi_dev->name);
kfree(scmi_dev);
put_dev:
kfree_const(scmi_dev->name);
put_device(&scmi_dev->dev);
- ida_simple_remove(&scmi_bus_id, id);
+ ida_free(&scmi_bus_id, id);
return NULL;
}
{
kfree_const(scmi_dev->name);
scmi_handle_put(scmi_dev->handle);
- ida_simple_remove(&scmi_bus_id, scmi_dev->id);
+ ida_free(&scmi_bus_id, scmi_dev->id);
device_unregister(&scmi_dev->dev);
}
if (!ret) {
u32 latency = 0;
attributes = le32_to_cpu(attr->attributes);
- strlcpy(clk->name, attr->name, SCMI_MAX_STR_SIZE);
+ strscpy(clk->name, attr->name, SCMI_SHORT_NAME_MAX_SIZE);
/* clock_enable_latency field is present only since SCMI v3.1 */
if (PROTOCOL_REV_MAJOR(version) >= 0x2)
latency = le32_to_cpu(attr->clock_enable_latency);
}
struct scmi_clk_ipriv {
+ struct device *dev;
u32 clk_id;
struct scmi_clock_info *clk;
};
st->num_returned = NUM_RETURNED(flags);
p->clk->rate_discrete = RATE_DISCRETE(flags);
+ /* Warn about out of spec replies ... */
+ if (!p->clk->rate_discrete &&
+ (st->num_returned != 3 || st->num_remaining != 0)) {
+ dev_warn(p->dev,
+ "Out-of-spec CLOCK_DESCRIBE_RATES reply for %s - returned:%d remaining:%d rx_len:%zd\n",
+ p->clk->name, st->num_returned, st->num_remaining,
+ st->rx_len);
+
+ /*
+ * A known quirk: a triplet is returned but num_returned != 3
+ * Check for a safe payload size and fix.
+ */
+ if (st->num_returned != 3 && st->num_remaining == 0 &&
+ st->rx_len == sizeof(*r) + sizeof(__le32) * 2 * 3) {
+ st->num_returned = 3;
+ st->num_remaining = 0;
+ } else {
+ dev_err(p->dev,
+ "Cannot fix out-of-spec reply !\n");
+ return -EPROTO;
+ }
+ }
+
return 0;
}
*rate = RATE_TO_U64(r->rate[st->loop_idx]);
p->clk->list.num_rates++;
- //XXX dev_dbg(ph->dev, "Rate %llu Hz\n", *rate);
}
return ret;
struct scmi_clock_info *clk)
{
int ret;
-
void *iter;
- struct scmi_msg_clock_describe_rates *msg;
struct scmi_iterator_ops ops = {
.prepare_message = iter_clk_describe_prepare_message,
.update_state = iter_clk_describe_update_state,
struct scmi_clk_ipriv cpriv = {
.clk_id = clk_id,
.clk = clk,
+ .dev = ph->dev,
};
iter = ph->hops->iter_response_init(ph, &ops, SCMI_MAX_NUM_RATES,
CLOCK_DESCRIBE_RATES,
- sizeof(*msg), &cpriv);
+ sizeof(struct scmi_msg_clock_describe_rates),
+ &cpriv);
if (IS_ERR(iter))
return PTR_ERR(iter);
if (ret)
break;
+ st->rx_len = i->t->rx.len;
ret = iops->update_state(st, i->resp, i->priv);
if (ret)
break;
u32 channel_id;
u32 tee_session;
u32 caps;
+ u32 rx_len;
struct mutex mu;
struct scmi_chan_info *cinfo;
union {
return -EIO;
}
+ /* Save response size */
+ channel->rx_len = param[2].u.memref.size;
+
return 0;
}
shbuf = tee_shm_get_va(channel->tee_shm, 0);
memset(shbuf, 0, msg_size);
channel->req.msg = shbuf;
+ channel->rx_len = msg_size;
return 0;
}
struct scmi_optee_channel *channel = cinfo->transport_info;
if (channel->tee_shm)
- msg_fetch_response(channel->req.msg, SCMI_OPTEE_MAX_MSG_SIZE, xfer);
+ msg_fetch_response(channel->req.msg, channel->rx_len, xfer);
else
shmem_fetch_response(channel->req.shmem, xfer);
}
dom_info->mult_factor =
(dom_info->sustained_freq_khz * 1000) /
dom_info->sustained_perf_level;
- strlcpy(dom_info->name, attr->name, SCMI_MAX_STR_SIZE);
+ strscpy(dom_info->name, attr->name, SCMI_SHORT_NAME_MAX_SIZE);
}
ph->xops->xfer_put(ph, t);
{
int ret;
void *iter;
- struct scmi_msg_perf_describe_levels *msg;
struct scmi_iterator_ops ops = {
.prepare_message = iter_perf_levels_prepare_message,
.update_state = iter_perf_levels_update_state,
iter = ph->hops->iter_response_init(ph, &ops, MAX_OPPS,
PERF_DESCRIBE_LEVELS,
- sizeof(*msg), &ppriv);
+ sizeof(struct scmi_msg_perf_describe_levels),
+ &ppriv);
if (IS_ERR(iter))
return PTR_ERR(iter);
dom_info->state_set_notify = SUPPORTS_STATE_SET_NOTIFY(flags);
dom_info->state_set_async = SUPPORTS_STATE_SET_ASYNC(flags);
dom_info->state_set_sync = SUPPORTS_STATE_SET_SYNC(flags);
- strlcpy(dom_info->name, attr->name, SCMI_MAX_STR_SIZE);
+ strscpy(dom_info->name, attr->name, SCMI_SHORT_NAME_MAX_SIZE);
}
ph->xops->xfer_put(ph, t);
#include <asm/unaligned.h>
-#define SCMI_SHORT_NAME_MAX_SIZE 16
-
#define PROTOCOL_REV_MINOR_MASK GENMASK(15, 0)
#define PROTOCOL_REV_MAJOR_MASK GENMASK(31, 16)
#define PROTOCOL_REV_MAJOR(x) ((u16)(FIELD_GET(PROTOCOL_REV_MAJOR_MASK, (x))))
* @max_resources: Maximum acceptable number of items, configured by the caller
* depending on the underlying resources that it is querying.
* @loop_idx: The iterator loop index in the current multi-part reply.
+ * @rx_len: Size in bytes of the currenly processed message; it can be used by
+ * the user of the iterator to verify a reply size.
* @priv: Optional pointer to some additional state-related private data setup
* by the caller during the iterations.
*/
unsigned int num_remaining;
unsigned int max_resources;
unsigned int loop_idx;
+ size_t rx_len;
void *priv;
};
dom_info->latency_us = le32_to_cpu(attr->latency);
if (dom_info->latency_us == U32_MAX)
dom_info->latency_us = 0;
- strlcpy(dom_info->name, attr->name, SCMI_MAX_STR_SIZE);
+ strscpy(dom_info->name, attr->name, SCMI_SHORT_NAME_MAX_SIZE);
}
ph->xops->xfer_put(ph, t);
struct scmi_sensor_info *s)
{
void *iter;
- struct scmi_msg_sensor_list_update_intervals *msg;
struct scmi_iterator_ops ops = {
.prepare_message = iter_intervals_prepare_message,
.update_state = iter_intervals_update_state,
iter = ph->hops->iter_response_init(ph, &ops, s->intervals.count,
SENSOR_LIST_UPDATE_INTERVALS,
- sizeof(*msg), &upriv);
+ sizeof(struct scmi_msg_sensor_list_update_intervals),
+ &upriv);
if (IS_ERR(iter))
return PTR_ERR(iter);
return ph->hops->iter_response_run(iter);
}
+struct scmi_apriv {
+ bool any_axes_support_extended_names;
+ struct scmi_sensor_info *s;
+};
+
static void iter_axes_desc_prepare_message(void *message,
const unsigned int desc_index,
const void *priv)
{
struct scmi_msg_sensor_axis_description_get *msg = message;
- const struct scmi_sensor_info *s = priv;
+ const struct scmi_apriv *apriv = priv;
/* Set the number of sensors to be skipped/already read */
- msg->id = cpu_to_le32(s->id);
+ msg->id = cpu_to_le32(apriv->s->id);
msg->axis_desc_index = cpu_to_le32(desc_index);
}
u32 attrh, attrl;
struct scmi_sensor_axis_info *a;
size_t dsize = SCMI_MSG_RESP_AXIS_DESCR_BASE_SZ;
- struct scmi_sensor_info *s = priv;
+ struct scmi_apriv *apriv = priv;
const struct scmi_axis_descriptor *adesc = st->priv;
attrl = le32_to_cpu(adesc->attributes_low);
+ if (SUPPORTS_EXTENDED_AXIS_NAMES(attrl))
+ apriv->any_axes_support_extended_names = true;
- a = &s->axis[st->desc_index + st->loop_idx];
+ a = &apriv->s->axis[st->desc_index + st->loop_idx];
a->id = le32_to_cpu(adesc->id);
a->extended_attrs = SUPPORTS_EXTEND_ATTRS(attrl);
attrh = le32_to_cpu(adesc->attributes_high);
a->scale = S32_EXT(SENSOR_SCALE(attrh));
a->type = SENSOR_TYPE(attrh);
- strscpy(a->name, adesc->name, SCMI_MAX_STR_SIZE);
+ strscpy(a->name, adesc->name, SCMI_SHORT_NAME_MAX_SIZE);
if (a->extended_attrs) {
unsigned int ares = le32_to_cpu(adesc->resolution);
void *priv)
{
struct scmi_sensor_axis_info *a;
- const struct scmi_sensor_info *s = priv;
+ const struct scmi_apriv *apriv = priv;
struct scmi_sensor_axis_name_descriptor *adesc = st->priv;
+ u32 axis_id = le32_to_cpu(adesc->axis_id);
- a = &s->axis[st->desc_index + st->loop_idx];
+ if (axis_id >= st->max_resources)
+ return -EPROTO;
+
+ /*
+ * Pick the corresponding descriptor based on the axis_id embedded
+ * in the reply since the list of axes supporting extended names
+ * can be a subset of all the axes.
+ */
+ a = &apriv->s->axis[axis_id];
strscpy(a->name, adesc->name, SCMI_MAX_STR_SIZE);
st->priv = ++adesc;
scmi_sensor_axis_extended_names_get(const struct scmi_protocol_handle *ph,
struct scmi_sensor_info *s)
{
+ int ret;
void *iter;
- struct scmi_msg_sensor_axis_description_get *msg;
struct scmi_iterator_ops ops = {
.prepare_message = iter_axes_desc_prepare_message,
.update_state = iter_axes_extended_name_update_state,
.process_response = iter_axes_extended_name_process_response,
};
+ struct scmi_apriv apriv = {
+ .any_axes_support_extended_names = false,
+ .s = s,
+ };
iter = ph->hops->iter_response_init(ph, &ops, s->num_axis,
SENSOR_AXIS_NAME_GET,
- sizeof(*msg), s);
+ sizeof(struct scmi_msg_sensor_axis_description_get),
+ &apriv);
if (IS_ERR(iter))
return PTR_ERR(iter);
- return ph->hops->iter_response_run(iter);
+ /*
+ * Do not cause whole protocol initialization failure when failing to
+ * get extended names for axes.
+ */
+ ret = ph->hops->iter_response_run(iter);
+ if (ret)
+ dev_warn(ph->dev,
+ "Failed to get axes extended names for %s (ret:%d).\n",
+ s->name, ret);
+
+ return 0;
}
static int scmi_sensor_axis_description(const struct scmi_protocol_handle *ph,
{
int ret;
void *iter;
- struct scmi_msg_sensor_axis_description_get *msg;
struct scmi_iterator_ops ops = {
.prepare_message = iter_axes_desc_prepare_message,
.update_state = iter_axes_desc_update_state,
.process_response = iter_axes_desc_process_response,
};
+ struct scmi_apriv apriv = {
+ .any_axes_support_extended_names = false,
+ .s = s,
+ };
s->axis = devm_kcalloc(ph->dev, s->num_axis,
sizeof(*s->axis), GFP_KERNEL);
iter = ph->hops->iter_response_init(ph, &ops, s->num_axis,
SENSOR_AXIS_DESCRIPTION_GET,
- sizeof(*msg), s);
+ sizeof(struct scmi_msg_sensor_axis_description_get),
+ &apriv);
if (IS_ERR(iter))
return PTR_ERR(iter);
if (ret)
return ret;
- if (PROTOCOL_REV_MAJOR(version) >= 0x3)
+ if (PROTOCOL_REV_MAJOR(version) >= 0x3 &&
+ apriv.any_axes_support_extended_names)
ret = scmi_sensor_axis_extended_names_get(ph, s);
return ret;
SUPPORTS_AXIS(attrh) ?
SENSOR_AXIS_NUMBER(attrh) : 0,
SCMI_MAX_NUM_SENSOR_AXIS);
- strscpy(s->name, sdesc->name, SCMI_MAX_STR_SIZE);
+ strscpy(s->name, sdesc->name, SCMI_SHORT_NAME_MAX_SIZE);
/*
* If supported overwrite short name with the extended
{
int ret;
void *iter;
- struct scmi_msg_cmd_describe_levels *msg;
struct scmi_iterator_ops ops = {
.prepare_message = iter_volt_levels_prepare_message,
.update_state = iter_volt_levels_update_state,
iter = ph->hops->iter_response_init(ph, &ops, v->num_levels,
VOLTAGE_DESCRIBE_LEVELS,
- sizeof(*msg), &vpriv);
+ sizeof(struct scmi_msg_cmd_describe_levels),
+ &vpriv);
if (IS_ERR(iter))
return PTR_ERR(iter);
/* Retrieve domain attributes at first ... */
put_unaligned_le32(dom, td->tx.buf);
- ret = ph->xops->do_xfer(ph, td);
/* Skip domain on comms error */
- if (ret)
+ if (ph->xops->do_xfer(ph, td))
continue;
v = vinfo->domains + dom;
v->id = dom;
attributes = le32_to_cpu(resp_dom->attr);
- strlcpy(v->name, resp_dom->name, SCMI_MAX_STR_SIZE);
+ strscpy(v->name, resp_dom->name, SCMI_SHORT_NAME_MAX_SIZE);
/*
* If supported overwrite short name with the extended one;
v->async_level_set = true;
}
- ret = scmi_voltage_levels_get(ph, v);
/* Skip invalid voltage descriptors */
- if (ret)
- continue;
-
- ph->xops->reset_rx_to_maxsz(ph, td);
+ scmi_voltage_levels_get(ph, v);
}
ph->xops->xfer_put(ph, td);
#include <linux/efi.h>
#include <linux/reboot.h>
-static void (*orig_pm_power_off)(void);
+static struct sys_off_handler *efi_sys_off_handler;
int efi_reboot_quirk_mode = -1;
return false;
}
-static void efi_power_off(void)
+static int efi_power_off(struct sys_off_data *data)
{
efi.reset_system(EFI_RESET_SHUTDOWN, EFI_SUCCESS, 0, NULL);
- /*
- * The above call should not return, if it does fall back to
- * the original power off method (typically ACPI poweroff).
- */
- if (orig_pm_power_off)
- orig_pm_power_off();
+
+ return NOTIFY_DONE;
}
static int __init efi_shutdown_init(void)
return -ENODEV;
if (efi_poweroff_required()) {
- orig_pm_power_off = pm_power_off;
- pm_power_off = efi_power_off;
+ /* SYS_OFF_PRIO_FIRMWARE + 1 so that it runs before acpi_power_off */
+ efi_sys_off_handler =
+ register_sys_off_handler(SYS_OFF_MODE_POWER_OFF,
+ SYS_OFF_PRIO_FIRMWARE + 1,
+ efi_power_off, NULL);
+ if (IS_ERR(efi_sys_off_handler))
+ return PTR_ERR(efi_sys_off_handler);
}
return 0;
#include <linux/sysfb.h>
#include <video/vga.h>
-#include <asm/efi.h>
-
enum {
OVERRIDE_NONE = 0x0,
OVERRIDE_BASE = 0x1,
#include <linux/screen_info.h>
#include <linux/sysfb.h>
+static struct platform_device *pd;
+static DEFINE_MUTEX(disable_lock);
+static bool disabled;
+
+static bool sysfb_unregister(void)
+{
+ if (IS_ERR_OR_NULL(pd))
+ return false;
+
+ platform_device_unregister(pd);
+ pd = NULL;
+
+ return true;
+}
+
+/**
+ * sysfb_disable() - disable the Generic System Framebuffers support
+ *
+ * This disables the registration of system framebuffer devices that match the
+ * generic drivers that make use of the system framebuffer set up by firmware.
+ *
+ * It also unregisters a device if this was already registered by sysfb_init().
+ *
+ * Context: The function can sleep. A @disable_lock mutex is acquired to serialize
+ * against sysfb_init(), that registers a system framebuffer device.
+ */
+void sysfb_disable(void)
+{
+ mutex_lock(&disable_lock);
+ sysfb_unregister();
+ disabled = true;
+ mutex_unlock(&disable_lock);
+}
+EXPORT_SYMBOL_GPL(sysfb_disable);
+
static __init int sysfb_init(void)
{
struct screen_info *si = &screen_info;
struct simplefb_platform_data mode;
- struct platform_device *pd;
const char *name;
bool compatible;
- int ret;
+ int ret = 0;
+
+ mutex_lock(&disable_lock);
+ if (disabled)
+ goto unlock_mutex;
/* try to create a simple-framebuffer device */
compatible = sysfb_parse_mode(si, &mode);
if (compatible) {
- ret = sysfb_create_simplefb(si, &mode);
- if (!ret)
- return 0;
+ pd = sysfb_create_simplefb(si, &mode);
+ if (!IS_ERR(pd))
+ goto unlock_mutex;
}
/* if the FB is incompatible, create a legacy framebuffer device */
name = "platform-framebuffer";
pd = platform_device_alloc(name, 0);
- if (!pd)
- return -ENOMEM;
+ if (!pd) {
+ ret = -ENOMEM;
+ goto unlock_mutex;
+ }
sysfb_apply_efi_quirks(pd);
if (ret)
goto err;
- return 0;
+ goto unlock_mutex;
err:
platform_device_put(pd);
+unlock_mutex:
+ mutex_unlock(&disable_lock);
return ret;
}
return false;
}
-__init int sysfb_create_simplefb(const struct screen_info *si,
- const struct simplefb_platform_data *mode)
+__init struct platform_device *sysfb_create_simplefb(const struct screen_info *si,
+ const struct simplefb_platform_data *mode)
{
struct platform_device *pd;
struct resource res;
base |= (u64)si->ext_lfb_base << 32;
if (!base || (u64)(resource_size_t)base != base) {
printk(KERN_DEBUG "sysfb: inaccessible VRAM base\n");
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
}
/*
length = mode->height * mode->stride;
if (length > size) {
printk(KERN_WARNING "sysfb: VRAM smaller than advertised\n");
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
}
length = PAGE_ALIGN(length);
res.start = base;
res.end = res.start + length - 1;
if (res.end <= res.start)
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
pd = platform_device_alloc("simple-framebuffer", 0);
if (!pd)
- return -ENOMEM;
+ return ERR_PTR(-ENOMEM);
sysfb_apply_efi_quirks(pd);
if (ret)
goto err_put_device;
- return 0;
+ return pd;
err_put_device:
platform_device_put(pd);
- return ret;
+ return ERR_PTR(ret);
}
#include <linux/platform_device.h>
#include <linux/regmap.h>
#include <linux/seq_file.h>
+#include <linux/types.h>
#define CRYSTALCOVE_GPIO_NUM 16
#define CRYSTALCOVE_VGPIO_NUM 95
return reg + gpio % 8;
}
-static void crystalcove_update_irq_mask(struct crystalcove_gpio *cg,
- int gpio)
+static void crystalcove_update_irq_mask(struct crystalcove_gpio *cg, int gpio)
{
u8 mirqs0 = gpio < 8 ? MGPIO0IRQS0 : MGPIO1IRQS0;
int mask = BIT(gpio % 8);
return regmap_write(cg->regmap, reg, CTLO_INPUT_SET);
}
-static int crystalcove_gpio_dir_out(struct gpio_chip *chip, unsigned int gpio,
- int value)
+static int crystalcove_gpio_dir_out(struct gpio_chip *chip, unsigned int gpio, int value)
{
struct crystalcove_gpio *cg = gpiochip_get_data(chip);
int reg = to_reg(gpio, CTRL_OUT);
return val & 0x1;
}
-static void crystalcove_gpio_set(struct gpio_chip *chip,
- unsigned int gpio, int value)
+static void crystalcove_gpio_set(struct gpio_chip *chip, unsigned int gpio, int value)
{
struct crystalcove_gpio *cg = gpiochip_get_data(chip);
int reg = to_reg(gpio, CTRL_OUT);
static int crystalcove_irq_type(struct irq_data *data, unsigned int type)
{
- struct crystalcove_gpio *cg =
- gpiochip_get_data(irq_data_get_irq_chip_data(data));
+ struct crystalcove_gpio *cg = gpiochip_get_data(irq_data_get_irq_chip_data(data));
+ irq_hw_number_t hwirq = irqd_to_hwirq(data);
- if (data->hwirq >= CRYSTALCOVE_GPIO_NUM)
+ if (hwirq >= CRYSTALCOVE_GPIO_NUM)
return 0;
switch (type) {
static void crystalcove_bus_lock(struct irq_data *data)
{
- struct crystalcove_gpio *cg =
- gpiochip_get_data(irq_data_get_irq_chip_data(data));
+ struct crystalcove_gpio *cg = gpiochip_get_data(irq_data_get_irq_chip_data(data));
mutex_lock(&cg->buslock);
}
static void crystalcove_bus_sync_unlock(struct irq_data *data)
{
- struct crystalcove_gpio *cg =
- gpiochip_get_data(irq_data_get_irq_chip_data(data));
- int gpio = data->hwirq;
+ struct crystalcove_gpio *cg = gpiochip_get_data(irq_data_get_irq_chip_data(data));
+ irq_hw_number_t hwirq = irqd_to_hwirq(data);
if (cg->update & UPDATE_IRQ_TYPE)
- crystalcove_update_irq_ctrl(cg, gpio);
+ crystalcove_update_irq_ctrl(cg, hwirq);
if (cg->update & UPDATE_IRQ_MASK)
- crystalcove_update_irq_mask(cg, gpio);
+ crystalcove_update_irq_mask(cg, hwirq);
cg->update = 0;
mutex_unlock(&cg->buslock);
static void crystalcove_irq_unmask(struct irq_data *data)
{
- struct crystalcove_gpio *cg =
- gpiochip_get_data(irq_data_get_irq_chip_data(data));
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
+ struct crystalcove_gpio *cg = gpiochip_get_data(gc);
+ irq_hw_number_t hwirq = irqd_to_hwirq(data);
- if (data->hwirq < CRYSTALCOVE_GPIO_NUM) {
- cg->set_irq_mask = false;
- cg->update |= UPDATE_IRQ_MASK;
- }
+ if (hwirq >= CRYSTALCOVE_GPIO_NUM)
+ return;
+
+ gpiochip_enable_irq(gc, hwirq);
+
+ cg->set_irq_mask = false;
+ cg->update |= UPDATE_IRQ_MASK;
}
static void crystalcove_irq_mask(struct irq_data *data)
{
- struct crystalcove_gpio *cg =
- gpiochip_get_data(irq_data_get_irq_chip_data(data));
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
+ struct crystalcove_gpio *cg = gpiochip_get_data(gc);
+ irq_hw_number_t hwirq = irqd_to_hwirq(data);
- if (data->hwirq < CRYSTALCOVE_GPIO_NUM) {
- cg->set_irq_mask = true;
- cg->update |= UPDATE_IRQ_MASK;
- }
+ if (hwirq >= CRYSTALCOVE_GPIO_NUM)
+ return;
+
+ cg->set_irq_mask = true;
+ cg->update |= UPDATE_IRQ_MASK;
+
+ gpiochip_disable_irq(gc, hwirq);
}
-static struct irq_chip crystalcove_irqchip = {
+static const struct irq_chip crystalcove_irqchip = {
.name = "Crystal Cove",
.irq_mask = crystalcove_irq_mask,
.irq_unmask = crystalcove_irq_unmask,
.irq_set_type = crystalcove_irq_type,
.irq_bus_lock = crystalcove_bus_lock,
.irq_bus_sync_unlock = crystalcove_bus_sync_unlock,
- .flags = IRQCHIP_SKIP_SET_WAKE,
+ .flags = IRQCHIP_SKIP_SET_WAKE | IRQCHIP_IMMUTABLE,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
};
static irqreturn_t crystalcove_gpio_irq_handler(int irq, void *data)
return IRQ_HANDLED;
}
-static void crystalcove_gpio_dbg_show(struct seq_file *s,
- struct gpio_chip *chip)
+static void crystalcove_gpio_dbg_show(struct seq_file *s, struct gpio_chip *chip)
{
struct crystalcove_gpio *cg = gpiochip_get_data(chip);
int gpio, offset;
cg->regmap = pmic->regmap;
girq = &cg->chip.irq;
- girq->chip = &crystalcove_irqchip;
+ gpio_irq_chip_set_chip(girq, &crystalcove_irqchip);
/* This will let us handle the parent IRQ in the driver */
girq->parent_handler = NULL;
girq->num_parents = 0;
struct dln2_gpio {
struct platform_device *pdev;
struct gpio_chip gpio;
- struct irq_chip irqchip;
/*
* Cache pin direction to save us one transfer, since the hardware has
struct dln2_gpio *dln2 = gpiochip_get_data(gc);
int pin = irqd_to_hwirq(irqd);
+ gpiochip_enable_irq(gc, pin);
set_bit(pin, dln2->unmasked_irqs);
}
int pin = irqd_to_hwirq(irqd);
clear_bit(pin, dln2->unmasked_irqs);
+ gpiochip_disable_irq(gc, pin);
}
static int dln2_irq_set_type(struct irq_data *irqd, unsigned type)
mutex_unlock(&dln2->irq_lock);
}
+static const struct irq_chip dln2_irqchip = {
+ .name = "dln2-irq",
+ .irq_mask = dln2_irq_mask,
+ .irq_unmask = dln2_irq_unmask,
+ .irq_set_type = dln2_irq_set_type,
+ .irq_bus_lock = dln2_irq_bus_lock,
+ .irq_bus_sync_unlock = dln2_irq_bus_unlock,
+ .flags = IRQCHIP_IMMUTABLE,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
+};
+
static void dln2_gpio_event(struct platform_device *pdev, u16 echo,
const void *data, int len)
{
dln2->gpio.direction_output = dln2_gpio_direction_output;
dln2->gpio.set_config = dln2_gpio_set_config;
- dln2->irqchip.name = "dln2-irq",
- dln2->irqchip.irq_mask = dln2_irq_mask,
- dln2->irqchip.irq_unmask = dln2_irq_unmask,
- dln2->irqchip.irq_set_type = dln2_irq_set_type,
- dln2->irqchip.irq_bus_lock = dln2_irq_bus_lock,
- dln2->irqchip.irq_bus_sync_unlock = dln2_irq_bus_unlock,
-
girq = &dln2->gpio.irq;
- girq->chip = &dln2->irqchip;
+ gpio_irq_chip_set_chip(girq, &dln2_irqchip);
/* The event comes from the outside so no parent handler */
girq->parent_handler = NULL;
girq->num_parents = 0;
gpio->clks[1].id = "db";
err = devm_clk_bulk_get_optional(gpio->dev, DWAPB_NR_CLOCKS,
gpio->clks);
- if (err) {
- dev_err(gpio->dev, "Cannot get APB/Debounce clocks\n");
- return err;
- }
+ if (err)
+ return dev_err_probe(gpio->dev, err,
+ "Cannot get APB/Debounce clocks\n");
err = clk_bulk_prepare_enable(DWAPB_NR_CLOCKS, gpio->clks);
if (err) {
static int grgpio_remove(struct platform_device *ofdev)
{
struct grgpio_priv *priv = platform_get_drvdata(ofdev);
- int i;
- int ret = 0;
-
- if (priv->domain) {
- for (i = 0; i < GRGPIO_MAX_NGPIO; i++) {
- if (priv->uirqs[i].refcnt != 0) {
- ret = -EBUSY;
- goto out;
- }
- }
- }
gpiochip_remove(&priv->gc);
if (priv->domain)
irq_domain_remove(priv->domain);
-out:
- return ret;
+ return 0;
}
static const struct of_device_id grgpio_match[] = {
raw_spin_unlock_irqrestore(&priv->lock, flags);
}
-static void mrfld_irq_unmask_mask(struct irq_data *d, bool unmask)
+static void mrfld_irq_unmask_mask(struct mrfld_gpio *priv, u32 gpio, bool unmask)
{
- struct mrfld_gpio *priv = irq_data_get_irq_chip_data(d);
- u32 gpio = irqd_to_hwirq(d);
void __iomem *gimr = gpio_reg(&priv->chip, gpio, GIMR);
unsigned long flags;
u32 value;
static void mrfld_irq_mask(struct irq_data *d)
{
- mrfld_irq_unmask_mask(d, false);
+ struct mrfld_gpio *priv = irq_data_get_irq_chip_data(d);
+ u32 gpio = irqd_to_hwirq(d);
+
+ mrfld_irq_unmask_mask(priv, gpio, false);
+ gpiochip_disable_irq(&priv->chip, gpio);
}
static void mrfld_irq_unmask(struct irq_data *d)
{
- mrfld_irq_unmask_mask(d, true);
+ struct mrfld_gpio *priv = irq_data_get_irq_chip_data(d);
+ u32 gpio = irqd_to_hwirq(d);
+
+ gpiochip_enable_irq(&priv->chip, gpio);
+ mrfld_irq_unmask_mask(priv, gpio, true);
}
static int mrfld_irq_set_type(struct irq_data *d, unsigned int type)
return 0;
}
-static struct irq_chip mrfld_irqchip = {
+static const struct irq_chip mrfld_irqchip = {
.name = "gpio-merrifield",
.irq_ack = mrfld_irq_ack,
.irq_mask = mrfld_irq_mask,
.irq_unmask = mrfld_irq_unmask,
.irq_set_type = mrfld_irq_set_type,
.irq_set_wake = mrfld_irq_set_wake,
+ .flags = IRQCHIP_IMMUTABLE,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
};
static void mrfld_irq_handler(struct irq_desc *desc)
return retval;
girq = &priv->chip.irq;
- girq->chip = &mrfld_irqchip;
+ gpio_irq_chip_set_chip(girq, &mrfld_irqchip);
girq->init_hw = mrfld_irq_init_hw;
girq->parent_handler = mrfld_irq_handler;
girq->num_parents = 1;
// SPDX-License-Identifier: GPL-2.0+
//
-// MXC GPIO support. (c) 2008 Daniel Mack <daniel@caiaq.de>
+// MXS GPIO support. (c) 2008 Daniel Mack <daniel@caiaq.de>
// Copyright 2008 Juergen Beisert, kernel@pengutronix.de
//
// Based on code from Freescale,
.reg_bits = 8,
.val_bits = 8,
+ .use_single_read = true,
+ .use_single_write = true,
+
.readable_reg = pca953x_readable_register,
.writeable_reg = pca953x_writeable_register,
.volatile_reg = pca953x_volatile_register,
static int device_pca95xx_init(struct pca953x_chip *chip, u32 invert)
{
DECLARE_BITMAP(val, MAX_LINE);
+ u8 regaddr;
int ret;
- ret = regcache_sync_region(chip->regmap, chip->regs->output,
- chip->regs->output + NBANK(chip));
+ regaddr = pca953x_recalc_addr(chip, chip->regs->output, 0);
+ ret = regcache_sync_region(chip->regmap, regaddr,
+ regaddr + NBANK(chip) - 1);
if (ret)
goto out;
- ret = regcache_sync_region(chip->regmap, chip->regs->direction,
- chip->regs->direction + NBANK(chip));
+ regaddr = pca953x_recalc_addr(chip, chip->regs->direction, 0);
+ ret = regcache_sync_region(chip->regmap, regaddr,
+ regaddr + NBANK(chip) - 1);
if (ret)
goto out;
* sync these registers first and only then sync the rest.
*/
regaddr = pca953x_recalc_addr(chip, chip->regs->direction, 0);
- ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip));
+ ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip) - 1);
if (ret) {
dev_err(dev, "Failed to sync GPIO dir registers: %d\n", ret);
return ret;
}
regaddr = pca953x_recalc_addr(chip, chip->regs->output, 0);
- ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip));
+ ret = regcache_sync_region(chip->regmap, regaddr, regaddr + NBANK(chip) - 1);
if (ret) {
dev_err(dev, "Failed to sync GPIO out registers: %d\n", ret);
return ret;
if (chip->driver_data & PCA_PCAL) {
regaddr = pca953x_recalc_addr(chip, PCAL953X_IN_LATCH, 0);
ret = regcache_sync_region(chip->regmap, regaddr,
- regaddr + NBANK(chip));
+ regaddr + NBANK(chip) - 1);
if (ret) {
dev_err(dev, "Failed to sync INT latch registers: %d\n",
ret);
regaddr = pca953x_recalc_addr(chip, PCAL953X_INT_MASK, 0);
ret = regcache_sync_region(chip->regmap, regaddr,
- regaddr + NBANK(chip));
+ regaddr + NBANK(chip) - 1);
if (ret) {
dev_err(dev, "Failed to sync INT mask registers: %d\n",
ret);
unsigned long flags;
u16 m;
+ gpiochip_enable_irq(&ctrl->gc, line);
+
raw_spin_lock_irqsave(&ctrl->lock, flags);
m = ctrl->intr_mask[port];
m |= realtek_gpio_imr_bits(port_pin, REALTEK_GPIO_IMR_LINE_MASK);
ctrl->intr_mask[port] = m;
realtek_gpio_write_imr(ctrl, port, ctrl->intr_type[port], m);
raw_spin_unlock_irqrestore(&ctrl->lock, flags);
+
+ gpiochip_disable_irq(&ctrl->gc, line);
}
static int realtek_gpio_irq_set_type(struct irq_data *data, unsigned int flow_type)
return 0;
}
-static struct irq_chip realtek_gpio_irq_chip = {
+static const struct irq_chip realtek_gpio_irq_chip = {
.name = "realtek-otto-gpio",
.irq_ack = realtek_gpio_irq_ack,
.irq_mask = realtek_gpio_irq_mask,
.irq_unmask = realtek_gpio_irq_unmask,
.irq_set_type = realtek_gpio_irq_set_type,
.irq_set_affinity = realtek_gpio_irq_set_affinity,
+ .flags = IRQCHIP_IMMUTABLE,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
};
static const struct of_device_id realtek_gpio_of_match[] = {
irq = platform_get_irq_optional(pdev, 0);
if (!(dev_flags & GPIO_INTERRUPTS_DISABLED) && irq > 0) {
girq = &ctrl->gc.irq;
- girq->chip = &realtek_gpio_irq_chip;
+ gpio_irq_chip_set_chip(girq, &realtek_gpio_irq_chip);
girq->default_type = IRQ_TYPE_NONE;
girq->handler = handle_bad_irq;
girq->parent_handler = realtek_gpio_irq_handler;
struct sch_gpio {
struct gpio_chip chip;
- struct irq_chip irqchip;
spinlock_t lock;
unsigned short iobase;
unsigned short resume_base;
spin_unlock_irqrestore(&sch->lock, flags);
}
-static void sch_irq_mask_unmask(struct irq_data *d, int val)
+static void sch_irq_mask_unmask(struct gpio_chip *gc, irq_hw_number_t gpio_num, int val)
{
- struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
struct sch_gpio *sch = gpiochip_get_data(gc);
- irq_hw_number_t gpio_num = irqd_to_hwirq(d);
unsigned long flags;
spin_lock_irqsave(&sch->lock, flags);
static void sch_irq_mask(struct irq_data *d)
{
- sch_irq_mask_unmask(d, 0);
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ irq_hw_number_t gpio_num = irqd_to_hwirq(d);
+
+ sch_irq_mask_unmask(gc, gpio_num, 0);
+ gpiochip_disable_irq(gc, gpio_num);
}
static void sch_irq_unmask(struct irq_data *d)
{
- sch_irq_mask_unmask(d, 1);
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ irq_hw_number_t gpio_num = irqd_to_hwirq(d);
+
+ gpiochip_enable_irq(gc, gpio_num);
+ sch_irq_mask_unmask(gc, gpio_num, 1);
}
+static const struct irq_chip sch_irqchip = {
+ .name = "sch_gpio",
+ .irq_ack = sch_irq_ack,
+ .irq_mask = sch_irq_mask,
+ .irq_unmask = sch_irq_unmask,
+ .irq_set_type = sch_irq_type,
+ .flags = IRQCHIP_IMMUTABLE,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
+};
+
static u32 sch_gpio_gpe_handler(acpi_handle gpe_device, u32 gpe, void *context)
{
struct sch_gpio *sch = context;
platform_set_drvdata(pdev, sch);
- sch->irqchip.name = "sch_gpio";
- sch->irqchip.irq_ack = sch_irq_ack;
- sch->irqchip.irq_mask = sch_irq_mask;
- sch->irqchip.irq_unmask = sch_irq_unmask;
- sch->irqchip.irq_set_type = sch_irq_type;
-
girq = &sch->chip.irq;
- girq->chip = &sch->irqchip;
+ gpio_irq_chip_set_chip(girq, &sch_irqchip);
girq->num_parents = 0;
girq->parents = NULL;
girq->parent_handler = NULL;
};
struct gpio_sim_chip_name_ctx {
- struct gpio_sim_device *dev;
+ struct fwnode_handle *swnode;
char *page;
};
static int gpio_sim_emit_chip_name(struct device *dev, void *data)
{
struct gpio_sim_chip_name_ctx *ctx = data;
- struct fwnode_handle *swnode;
- struct gpio_sim_bank *bank;
/* This would be the sysfs device exported in /sys/class/gpio. */
if (dev->class)
return 0;
- swnode = dev_fwnode(dev);
+ if (device_match_fwnode(dev, ctx->swnode))
+ return sprintf(ctx->page, "%s\n", dev_name(dev));
- list_for_each_entry(bank, &ctx->dev->bank_list, siblings) {
- if (bank->swnode == swnode)
- return sprintf(ctx->page, "%s\n", dev_name(dev));
- }
-
- return -ENODATA;
+ return 0;
}
static ssize_t gpio_sim_bank_config_chip_name_show(struct config_item *item,
{
struct gpio_sim_bank *bank = to_gpio_sim_bank(item);
struct gpio_sim_device *dev = gpio_sim_bank_get_device(bank);
- struct gpio_sim_chip_name_ctx ctx = { dev, page };
+ struct gpio_sim_chip_name_ctx ctx = { bank->swnode, page };
int ret;
mutex_lock(&dev->lock);
#include <linux/of.h>
#include <linux/of_device.h>
#include <linux/of_irq.h>
+#include <linux/pinctrl/consumer.h>
#define VF610_GPIO_PER_PORT 32
printk(KERN_ERR "spurious GIU interrupt: %04x(%04x),%04x(%04x)\n",
maskl, pendl, maskh, pendh);
- atomic_inc(&irq_err_count);
-
return -EINVAL;
}
if (gpio >= WCOVE_GPIO_NUM)
return;
+ gpiochip_enable_irq(chip, gpio);
+
wg->set_irq_mask = false;
wg->update |= UPDATE_IRQ_MASK;
}
wg->set_irq_mask = true;
wg->update |= UPDATE_IRQ_MASK;
+
+ gpiochip_disable_irq(chip, gpio);
}
-static struct irq_chip wcove_irqchip = {
+static const struct irq_chip wcove_irqchip = {
.name = "Whiskey Cove",
.irq_mask = wcove_irq_mask,
.irq_unmask = wcove_irq_unmask,
.irq_set_type = wcove_irq_type,
.irq_bus_lock = wcove_bus_lock,
.irq_bus_sync_unlock = wcove_bus_sync_unlock,
+ .flags = IRQCHIP_IMMUTABLE,
+ GPIOCHIP_IRQ_RESOURCE_HELPERS,
};
static irqreturn_t wcove_gpio_irq_handler(int irq, void *data)
}
girq = &wg->chip.irq;
- girq->chip = &wcove_irqchip;
+ gpio_irq_chip_set_chip(girq, &wcove_irqchip);
/* This will let us handle the parent IRQ in the driver */
girq->parent_handler = NULL;
girq->num_parents = 0;
unsigned long *base = gpiochip_get_data(gc);
const struct winbond_gpio_info *info;
bool val;
+ int ret;
winbond_gpio_get_info(&offset, &info);
- val = winbond_sio_enter(*base);
- if (val)
- return val;
+ ret = winbond_sio_enter(*base);
+ if (ret)
+ return ret;
winbond_sio_select_logical(*base, info->dev);
const unsigned long offset = (bit % BITS_PER_LONG) & BIT(5);
map[index] &= ~(0xFFFFFFFFul << offset);
- map[index] |= v << offset;
+ map[index] |= (unsigned long)v << offset;
}
static inline int xgpio_regoffset(struct xgpio_instance *chip, int ch)
* @work: the worker that implements software debouncing
* @sw_debounced: flag indicating if the software debouncer is active
* @level: the current debounced physical level of the line
+ * @hdesc: the Hardware Timestamp Engine (HTE) descriptor
+ * @raw_level: the line level at the time of event
+ * @total_discard_seq: the running counter of the discarded events
+ * @last_seqno: the last sequence number before debounce period expires
*/
struct line {
struct gpio_desc *desc;
static void linereq_free(struct linereq *lr)
{
unsigned int i;
- bool hte;
+ bool hte = false;
for (i = 0; i < lr->num_lines; i++) {
- hte = !!test_bit(FLAG_EVENT_CLOCK_HTE,
- &lr->lines[i].desc->flags);
+ if (lr->lines[i].desc)
+ hte = !!test_bit(FLAG_EVENT_CLOCK_HTE,
+ &lr->lines[i].desc->flags);
edge_detector_stop(&lr->lines[i], hte);
if (lr->lines[i].desc)
gpiod_free(lr->lines[i].desc);
select HWMON
select BACKLIGHT_CLASS_DEVICE
select INTERVAL_TREE
- select DRM_BUDDY
help
Choose this option if you have a recent AMD Radeon graphics card.
{
bool all_hub = false;
- if (adev->family == AMDGPU_FAMILY_AI)
+ if (adev->family == AMDGPU_FAMILY_AI ||
+ adev->family == AMDGPU_FAMILY_RV)
all_hub = true;
return amdgpu_gmc_flush_gpu_tlb_pasid(adev, pasid, flush_type, all_hub);
struct amdgpu_vm *vm)
{
struct amdkfd_process_info *process_info = vm->process_info;
- struct amdgpu_bo *pd = vm->root.bo;
if (!process_info)
return;
- /* Release eviction fence from PD */
- amdgpu_bo_reserve(pd, false);
- amdgpu_bo_fence(pd, NULL, false);
- amdgpu_bo_unreserve(pd);
-
/* Update process info */
mutex_lock(&process_info->lock);
process_info->n_vms--;
return -EINVAL;
}
- /* delete kgd_mem from kfd_bo_list to avoid re-validating
- * this BO in BO's restoring after eviction.
- */
mutex_lock(&mem->process_info->lock);
ret = amdgpu_bo_reserve(bo, true);
amdgpu_amdkfd_remove_eviction_fence(
bo, mem->process_info->eviction_fence);
- list_del_init(&mem->validate_list.head);
if (size)
*size = amdgpu_bo_size(bo);
process_info->eviction_fence = new_fence;
*ef = dma_fence_get(&new_fence->base);
- /* Attach new eviction fence to all BOs */
+ /* Attach new eviction fence to all BOs except pinned ones */
list_for_each_entry(mem, &process_info->kfd_bo_list,
- validate_list.head)
+ validate_list.head) {
+ if (mem->bo->tbo.pin_count)
+ continue;
+
amdgpu_bo_fence(mem->bo,
&process_info->eviction_fence->base, true);
-
+ }
/* Attach eviction fence to PD / PT BOs */
list_for_each_entry(peer_vm, &process_info->vm_list_head,
vm_list_node) {
{
struct amdgpu_bo_list *list = container_of(rcu, struct amdgpu_bo_list,
rhead);
-
+ mutex_destroy(&list->bo_list_mutex);
kvfree(list);
}
trace_amdgpu_cs_bo_status(list->num_entries, total_size);
+ mutex_init(&list->bo_list_mutex);
*result = list;
return 0;
struct amdgpu_bo *oa_obj;
unsigned first_userptr;
unsigned num_entries;
+
+ /* Protect access during command submission.
+ */
+ struct mutex bo_list_mutex;
};
int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
return r;
}
+ mutex_lock(&p->bo_list->bo_list_mutex);
+
/* One for TTM and one for the CS job */
amdgpu_bo_list_for_each_entry(e, p->bo_list)
e->tv.num_shared = 2;
kvfree(e->user_pages);
e->user_pages = NULL;
}
+ mutex_unlock(&p->bo_list->bo_list_mutex);
}
return r;
}
{
unsigned i;
- if (error && backoff)
+ if (error && backoff) {
ttm_eu_backoff_reservation(&parser->ticket,
&parser->validated);
+ mutex_unlock(&parser->bo_list->bo_list_mutex);
+ }
for (i = 0; i < parser->num_post_deps; i++) {
drm_syncobj_put(parser->post_deps[i].syncobj);
continue;
r = amdgpu_vm_bo_update(adev, bo_va, false);
- if (r)
+ if (r) {
+ mutex_unlock(&p->bo_list->bo_list_mutex);
return r;
+ }
r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
- if (r)
+ if (r) {
+ mutex_unlock(&p->bo_list->bo_list_mutex);
return r;
+ }
}
r = amdgpu_vm_handle_moved(adev, vm);
ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
mutex_unlock(&p->adev->notifier_lock);
+ mutex_unlock(&p->bo_list->bo_list_mutex);
return 0;
*/
amdgpu_unregister_gpu_instance(tmp_adev);
- drm_fb_helper_set_suspend_unlocked(adev_to_drm(adev)->fb_helper, true);
+ drm_fb_helper_set_suspend_unlocked(adev_to_drm(tmp_adev)->fb_helper, true);
/* disable ras on ALL IPs */
if (!need_emergency_restart &&
stime, etime, mode);
}
+static bool
+amdgpu_display_robj_is_fb(struct amdgpu_device *adev, struct amdgpu_bo *robj)
+{
+ struct drm_device *dev = adev_to_drm(adev);
+ struct drm_fb_helper *fb_helper = dev->fb_helper;
+
+ if (!fb_helper || !fb_helper->buffer)
+ return false;
+
+ if (gem_to_amdgpu_bo(fb_helper->buffer->gem) != robj)
+ return false;
+
+ return true;
+}
+
int amdgpu_display_suspend_helper(struct amdgpu_device *adev)
{
struct drm_device *dev = adev_to_drm(adev);
continue;
}
robj = gem_to_amdgpu_bo(fb->obj[0]);
- r = amdgpu_bo_reserve(robj, true);
- if (r == 0) {
- amdgpu_bo_unpin(robj);
- amdgpu_bo_unreserve(robj);
+ if (!amdgpu_display_robj_is_fb(adev, robj)) {
+ r = amdgpu_bo_reserve(robj, true);
+ if (r == 0) {
+ amdgpu_bo_unpin(robj);
+ amdgpu_bo_unreserve(robj);
+ }
}
}
return 0;
int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
{
int r;
- r = amdgpu_ras_block_late_init(adev, ras_block);
- if (r)
- return r;
if (amdgpu_ras_is_supported(adev, ras_block->block)) {
if (!amdgpu_persistent_edc_harvesting_supported(adev))
amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX);
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);
if (r)
goto late_fini;
+ } else {
+ amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);
}
return 0;
case IP_VERSION(9, 1, 0):
/* RENOIR looks like RAVEN */
case IP_VERSION(9, 3, 0):
+ /* GC 10.3.7 */
+ case IP_VERSION(10, 3, 7):
if (amdgpu_tmz == 0) {
adev->gmc.tmz_enabled = false;
dev_info(adev->dev,
case IP_VERSION(10, 3, 1):
/* YELLOW_CARP*/
case IP_VERSION(10, 3, 3):
- /* GC 10.3.7 */
- case IP_VERSION(10, 3, 7):
/* Don't enable it by default yet.
*/
if (amdgpu_tmz < 1) {
if (!amdgpu_device_has_dc_support(adev)) {
if (!adev->enable_virtual_display)
/* Disable vblank IRQs aggressively for power-saving */
+ /* XXX: can this be enabled for DC? */
adev_to_drm(adev)->vblank_disable_immediate = true;
r = drm_vblank_init(adev_to_drm(adev), adev->mode_info.num_crtc);
atomic64_read(&adev->visible_pin_size),
vram_gtt.vram_size);
vram_gtt.gtt_size = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT)->size;
- vram_gtt.gtt_size *= PAGE_SIZE;
vram_gtt.gtt_size -= atomic64_read(&adev->gart_pin_size);
return copy_to_user(out, &vram_gtt,
min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0;
mem.cpu_accessible_vram.usable_heap_size * 3 / 4;
mem.gtt.total_heap_size = gtt_man->size;
- mem.gtt.total_heap_size *= PAGE_SIZE;
mem.gtt.usable_heap_size = mem.gtt.total_heap_size -
atomic64_read(&adev->gart_pin_size);
mem.gtt.heap_usage = ttm_resource_manager_usage(gtt_man);
if (amdgpu_ras_query_error_status(obj->adev, &info))
return -EINVAL;
+ /* Hardware counter will be reset automatically after the query on Vega20 and Arcturus */
+ if (obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) &&
+ obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) {
+ if (amdgpu_ras_reset_error_status(obj->adev, info.head.block))
+ dev_warn(obj->adev->dev, "Failed to reset error counter and error status");
+ }
+
s = snprintf(val, sizeof(val), "%s: %lu\n%s: %lu\n",
"ue", info.ue_count,
"ce", info.ce_count);
if (amdgpu_ras_query_error_status(obj->adev, &info))
return -EINVAL;
- if (obj->adev->asic_type == CHIP_ALDEBARAN) {
+ if (obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) &&
+ obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) {
if (amdgpu_ras_reset_error_status(obj->adev, info.head.block))
- DRM_WARN("Failed to reset error counter and error status");
+ dev_warn(obj->adev->dev, "Failed to reset error counter and error status");
}
return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count,
}
}
- if (!amdgpu_persistent_edc_harvesting_supported(adev))
- amdgpu_ras_reset_error_status(adev, info->head.block);
-
return 0;
}
if (res)
return res;
+ if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) &&
+ adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) {
+ if (amdgpu_ras_reset_error_status(adev, info.head.block))
+ dev_warn(adev->dev, "Failed to reset error counter and error status");
+ }
+
ce += info.ce_count;
ue += info.ue_count;
}
continue;
amdgpu_ras_query_error_status(adev, &info);
+
+ if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) &&
+ adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) {
+ if (amdgpu_ras_reset_error_status(adev, info.head.block))
+ dev_warn(adev->dev, "Failed to reset error counter and error status");
+ }
}
}
!amdgpu_ras_asic_supported(adev))
return;
- if (!(amdgpu_sriov_vf(adev) &&
- (adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2))))
+ /* If driver run on sriov guest side, only enable ras for aldebaran */
+ if (amdgpu_sriov_vf(adev) &&
+ adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 2))
return;
if (!adev->gmc.xgmi.connected_to_cpu) {
#include <drm/ttm/ttm_resource.h>
#include <drm/ttm/ttm_range_manager.h>
-#include "amdgpu_vram_mgr.h"
-
/* state back for walking over vram_mgr and gtt_mgr allocations */
struct amdgpu_res_cursor {
uint64_t start;
uint64_t size;
uint64_t remaining;
- void *node;
- uint32_t mem_type;
+ struct drm_mm_node *node;
};
/**
uint64_t start, uint64_t size,
struct amdgpu_res_cursor *cur)
{
- struct drm_buddy_block *block;
- struct list_head *head, *next;
struct drm_mm_node *node;
- if (!res)
- goto fallback;
-
- BUG_ON(start + size > res->num_pages << PAGE_SHIFT);
-
- cur->mem_type = res->mem_type;
-
- switch (cur->mem_type) {
- case TTM_PL_VRAM:
- head = &to_amdgpu_vram_mgr_resource(res)->blocks;
-
- block = list_first_entry_or_null(head,
- struct drm_buddy_block,
- link);
- if (!block)
- goto fallback;
-
- while (start >= amdgpu_vram_mgr_block_size(block)) {
- start -= amdgpu_vram_mgr_block_size(block);
-
- next = block->link.next;
- if (next != head)
- block = list_entry(next, struct drm_buddy_block, link);
- }
-
- cur->start = amdgpu_vram_mgr_block_start(block) + start;
- cur->size = min(amdgpu_vram_mgr_block_size(block) - start, size);
- cur->remaining = size;
- cur->node = block;
- break;
- case TTM_PL_TT:
- node = to_ttm_range_mgr_node(res)->mm_nodes;
- while (start >= node->size << PAGE_SHIFT)
- start -= node++->size << PAGE_SHIFT;
-
- cur->start = (node->start << PAGE_SHIFT) + start;
- cur->size = min((node->size << PAGE_SHIFT) - start, size);
+ if (!res || res->mem_type == TTM_PL_SYSTEM) {
+ cur->start = start;
+ cur->size = size;
cur->remaining = size;
- cur->node = node;
- break;
- default:
- goto fallback;
+ cur->node = NULL;
+ WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT);
+ return;
}
- return;
+ BUG_ON(start + size > res->num_pages << PAGE_SHIFT);
-fallback:
- cur->start = start;
- cur->size = size;
+ node = to_ttm_range_mgr_node(res)->mm_nodes;
+ while (start >= node->size << PAGE_SHIFT)
+ start -= node++->size << PAGE_SHIFT;
+
+ cur->start = (node->start << PAGE_SHIFT) + start;
+ cur->size = min((node->size << PAGE_SHIFT) - start, size);
cur->remaining = size;
- cur->node = NULL;
- WARN_ON(res && start + size > res->num_pages << PAGE_SHIFT);
- return;
+ cur->node = node;
}
/**
*/
static inline void amdgpu_res_next(struct amdgpu_res_cursor *cur, uint64_t size)
{
- struct drm_buddy_block *block;
- struct drm_mm_node *node;
- struct list_head *next;
+ struct drm_mm_node *node = cur->node;
BUG_ON(size > cur->remaining);
return;
}
- switch (cur->mem_type) {
- case TTM_PL_VRAM:
- block = cur->node;
-
- next = block->link.next;
- block = list_entry(next, struct drm_buddy_block, link);
-
- cur->node = block;
- cur->start = amdgpu_vram_mgr_block_start(block);
- cur->size = min(amdgpu_vram_mgr_block_size(block), cur->remaining);
- break;
- case TTM_PL_TT:
- node = cur->node;
-
- cur->node = ++node;
- cur->start = node->start << PAGE_SHIFT;
- cur->size = min(node->size << PAGE_SHIFT, cur->remaining);
- break;
- default:
- return;
- }
+ cur->node = ++node;
+ cur->start = node->start << PAGE_SHIFT;
+ cur->size = min(node->size << PAGE_SHIFT, cur->remaining);
}
#endif
DRM_INFO("amdgpu: %uM of VRAM memory ready\n",
(unsigned) (adev->gmc.real_vram_size / (1024 * 1024)));
- /* Compute GTT size, either bsaed on 3/4th the size of RAM size
+ /* Compute GTT size, either based on 1/2 the size of RAM size
* or whatever the user passed on module init */
if (amdgpu_gtt_size == -1) {
struct sysinfo si;
si_meminfo(&si);
- gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
- adev->gmc.mc_vram_size),
- ((uint64_t)si.totalram * si.mem_unit * 3/4));
- }
- else
+ /* Certain GL unit tests for large textures can cause problems
+ * with the OOM killer since there is no way to link this memory
+ * to a process. This was originally mitigated (but not necessarily
+ * eliminated) by limiting the GTT size. The problem is this limit
+ * is often too low for many modern games so just make the limit 1/2
+ * of system memory which aligns with TTM. The OOM accounting needs
+ * to be addressed, but we shouldn't prevent common 3D applications
+ * from being usable just to potentially mitigate that corner case.
+ */
+ gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20),
+ (u64)si.totalram * si.mem_unit / 2);
+ } else {
gtt_size = (uint64_t)amdgpu_gtt_size << 20;
+ }
/* Initialize GTT memory pool */
r = amdgpu_gtt_mgr_init(adev, gtt_size);
#include <linux/dma-direction.h>
#include <drm/gpu_scheduler.h>
-#include "amdgpu_vram_mgr.h"
#include "amdgpu.h"
#define AMDGPU_PL_GDS (TTM_PL_PRIV + 0)
#define AMDGPU_POISON 0xd0bed0be
+struct amdgpu_vram_mgr {
+ struct ttm_resource_manager manager;
+ struct drm_mm mm;
+ spinlock_t lock;
+ struct list_head reservations_pending;
+ struct list_head reserved_pages;
+ atomic64_t vis_usage;
+};
+
struct amdgpu_gtt_mgr {
struct ttm_resource_manager manager;
struct drm_mm mm;
adev_to_drm(adev)->mode_config.max_height = YRES_MAX;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+ /* disable prefer shadow for now due to hibernation issues */
+ adev_to_drm(adev)->mode_config.prefer_shadow = 0;
adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base;
{
struct amdgpu_vm_update_params params;
struct amdgpu_vm_bo_base *entry;
+ bool flush_tlb_needed = false;
int r, idx;
if (list_empty(&vm->relocated))
goto error;
list_for_each_entry(entry, &vm->relocated, vm_status) {
+ /* vm_flush_needed after updating moved PDEs */
+ flush_tlb_needed |= entry->moved;
+
r = amdgpu_vm_pde_update(¶ms, entry);
if (r)
goto error;
if (r)
goto error;
- /* vm_flush_needed after updating PDEs */
- atomic64_inc(&vm->tlb_seq);
+ if (flush_tlb_needed)
+ atomic64_inc(&vm->tlb_seq);
while (!list_empty(&vm->relocated)) {
entry = list_first_entry(&vm->relocated,
flush_tlb |= adev->gmc.xgmi.num_physical_nodes &&
adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0);
+ /*
+ * On GFX8 and older any 8 PTE block with a valid bit set enters the TLB
+ */
+ flush_tlb |= adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 0, 0);
+
memset(¶ms, 0, sizeof(params));
params.adev = adev;
params.vm = vm;
#include "atom.h"
struct amdgpu_vram_reservation {
- u64 start;
- u64 size;
- struct list_head allocated;
- struct list_head blocks;
+ struct list_head node;
+ struct drm_mm_node mm_node;
};
static inline struct amdgpu_vram_mgr *
};
/**
- * amdgpu_vram_mgr_vis_size - Calculate visible block size
+ * amdgpu_vram_mgr_vis_size - Calculate visible node size
*
* @adev: amdgpu_device pointer
- * @block: DRM BUDDY block structure
+ * @node: MM node structure
*
- * Calculate how many bytes of the DRM BUDDY block are inside visible VRAM
+ * Calculate how many bytes of the MM node are inside visible VRAM
*/
static u64 amdgpu_vram_mgr_vis_size(struct amdgpu_device *adev,
- struct drm_buddy_block *block)
+ struct drm_mm_node *node)
{
- u64 start = amdgpu_vram_mgr_block_start(block);
- u64 end = start + amdgpu_vram_mgr_block_size(block);
+ uint64_t start = node->start << PAGE_SHIFT;
+ uint64_t end = (node->size + node->start) << PAGE_SHIFT;
if (start >= adev->gmc.visible_vram_size)
return 0;
{
struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
struct ttm_resource *res = bo->tbo.resource;
- struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res);
- struct drm_buddy_block *block;
- u64 usage = 0;
+ unsigned pages = res->num_pages;
+ struct drm_mm_node *mm;
+ u64 usage;
if (amdgpu_gmc_vram_full_visible(&adev->gmc))
return amdgpu_bo_size(bo);
if (res->start >= adev->gmc.visible_vram_size >> PAGE_SHIFT)
return 0;
- list_for_each_entry(block, &vres->blocks, link)
- usage += amdgpu_vram_mgr_vis_size(adev, block);
+ mm = &container_of(res, struct ttm_range_mgr_node, base)->mm_nodes[0];
+ for (usage = 0; pages; pages -= mm->size, mm++)
+ usage += amdgpu_vram_mgr_vis_size(adev, mm);
return usage;
}
{
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
- struct drm_buddy *mm = &mgr->mm;
+ struct drm_mm *mm = &mgr->mm;
struct amdgpu_vram_reservation *rsv, *temp;
- struct drm_buddy_block *block;
uint64_t vis_usage;
- list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks) {
- if (drm_buddy_alloc_blocks(mm, rsv->start, rsv->start + rsv->size,
- rsv->size, mm->chunk_size, &rsv->allocated,
- DRM_BUDDY_RANGE_ALLOCATION))
- continue;
-
- block = amdgpu_vram_mgr_first_block(&rsv->allocated);
- if (!block)
+ list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node) {
+ if (drm_mm_reserve_node(mm, &rsv->mm_node))
continue;
dev_dbg(adev->dev, "Reservation 0x%llx - %lld, Succeeded\n",
- rsv->start, rsv->size);
+ rsv->mm_node.start, rsv->mm_node.size);
- vis_usage = amdgpu_vram_mgr_vis_size(adev, block);
+ vis_usage = amdgpu_vram_mgr_vis_size(adev, &rsv->mm_node);
atomic64_add(vis_usage, &mgr->vis_usage);
spin_lock(&man->bdev->lru_lock);
- man->usage += rsv->size;
+ man->usage += rsv->mm_node.size << PAGE_SHIFT;
spin_unlock(&man->bdev->lru_lock);
- list_move(&rsv->blocks, &mgr->reserved_pages);
+ list_move(&rsv->node, &mgr->reserved_pages);
}
}
if (!rsv)
return -ENOMEM;
- INIT_LIST_HEAD(&rsv->allocated);
- INIT_LIST_HEAD(&rsv->blocks);
+ INIT_LIST_HEAD(&rsv->node);
+ rsv->mm_node.start = start >> PAGE_SHIFT;
+ rsv->mm_node.size = size >> PAGE_SHIFT;
- rsv->start = start;
- rsv->size = size;
-
- mutex_lock(&mgr->lock);
- list_add_tail(&rsv->blocks, &mgr->reservations_pending);
+ spin_lock(&mgr->lock);
+ list_add_tail(&rsv->node, &mgr->reservations_pending);
amdgpu_vram_mgr_do_reserve(&mgr->manager);
- mutex_unlock(&mgr->lock);
+ spin_unlock(&mgr->lock);
return 0;
}
struct amdgpu_vram_reservation *rsv;
int ret;
- mutex_lock(&mgr->lock);
+ spin_lock(&mgr->lock);
- list_for_each_entry(rsv, &mgr->reservations_pending, blocks) {
- if (rsv->start <= start &&
- (start < (rsv->start + rsv->size))) {
+ list_for_each_entry(rsv, &mgr->reservations_pending, node) {
+ if ((rsv->mm_node.start <= start) &&
+ (start < (rsv->mm_node.start + rsv->mm_node.size))) {
ret = -EBUSY;
goto out;
}
}
- list_for_each_entry(rsv, &mgr->reserved_pages, blocks) {
- if (rsv->start <= start &&
- (start < (rsv->start + rsv->size))) {
+ list_for_each_entry(rsv, &mgr->reserved_pages, node) {
+ if ((rsv->mm_node.start <= start) &&
+ (start < (rsv->mm_node.start + rsv->mm_node.size))) {
ret = 0;
goto out;
}
ret = -ENOENT;
out:
- mutex_unlock(&mgr->lock);
+ spin_unlock(&mgr->lock);
return ret;
}
+/**
+ * amdgpu_vram_mgr_virt_start - update virtual start address
+ *
+ * @mem: ttm_resource to update
+ * @node: just allocated node
+ *
+ * Calculate a virtual BO start address to easily check if everything is CPU
+ * accessible.
+ */
+static void amdgpu_vram_mgr_virt_start(struct ttm_resource *mem,
+ struct drm_mm_node *node)
+{
+ unsigned long start;
+
+ start = node->start + node->size;
+ if (start > mem->num_pages)
+ start -= mem->num_pages;
+ else
+ start = 0;
+ mem->start = max(mem->start, start);
+}
+
/**
* amdgpu_vram_mgr_new - allocate new ranges
*
const struct ttm_place *place,
struct ttm_resource **res)
{
- u64 vis_usage = 0, max_bytes, cur_size, min_block_size;
+ unsigned long lpfn, num_nodes, pages_per_node, pages_left, pages;
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
- struct amdgpu_vram_mgr_resource *vres;
- u64 size, remaining_size, lpfn, fpfn;
- struct drm_buddy *mm = &mgr->mm;
- struct drm_buddy_block *block;
- unsigned long pages_per_block;
+ uint64_t vis_usage = 0, mem_bytes, max_bytes;
+ struct ttm_range_mgr_node *node;
+ struct drm_mm *mm = &mgr->mm;
+ enum drm_mm_insert_mode mode;
+ unsigned i;
int r;
- lpfn = place->lpfn << PAGE_SHIFT;
+ lpfn = place->lpfn;
if (!lpfn)
- lpfn = man->size;
-
- fpfn = place->fpfn << PAGE_SHIFT;
+ lpfn = man->size >> PAGE_SHIFT;
max_bytes = adev->gmc.mc_vram_size;
if (tbo->type != ttm_bo_type_kernel)
max_bytes -= AMDGPU_VM_RESERVED_VRAM;
+ mem_bytes = tbo->base.size;
if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
- pages_per_block = ~0ul;
+ pages_per_node = ~0ul;
+ num_nodes = 1;
} else {
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- pages_per_block = HPAGE_PMD_NR;
+ pages_per_node = HPAGE_PMD_NR;
#else
/* default to 2MB */
- pages_per_block = 2UL << (20UL - PAGE_SHIFT);
+ pages_per_node = 2UL << (20UL - PAGE_SHIFT);
#endif
- pages_per_block = max_t(uint32_t, pages_per_block,
- tbo->page_alignment);
+ pages_per_node = max_t(uint32_t, pages_per_node,
+ tbo->page_alignment);
+ num_nodes = DIV_ROUND_UP_ULL(PFN_UP(mem_bytes), pages_per_node);
}
- vres = kzalloc(sizeof(*vres), GFP_KERNEL);
- if (!vres)
+ node = kvmalloc(struct_size(node, mm_nodes, num_nodes),
+ GFP_KERNEL | __GFP_ZERO);
+ if (!node)
return -ENOMEM;
- ttm_resource_init(tbo, place, &vres->base);
+ ttm_resource_init(tbo, place, &node->base);
/* bail out quickly if there's likely not enough VRAM for this BO */
if (ttm_resource_manager_usage(man) > max_bytes) {
goto error_fini;
}
- INIT_LIST_HEAD(&vres->blocks);
-
+ mode = DRM_MM_INSERT_BEST;
if (place->flags & TTM_PL_FLAG_TOPDOWN)
- vres->flags |= DRM_BUDDY_TOPDOWN_ALLOCATION;
-
- if (fpfn || lpfn != man->size)
- /* Allocate blocks in desired range */
- vres->flags |= DRM_BUDDY_RANGE_ALLOCATION;
-
- remaining_size = vres->base.num_pages << PAGE_SHIFT;
-
- mutex_lock(&mgr->lock);
- while (remaining_size) {
- if (tbo->page_alignment)
- min_block_size = tbo->page_alignment << PAGE_SHIFT;
- else
- min_block_size = mgr->default_page_size;
-
- BUG_ON(min_block_size < mm->chunk_size);
-
- /* Limit maximum size to 2GiB due to SG table limitations */
- size = min(remaining_size, 2ULL << 30);
-
- if (size >= pages_per_block << PAGE_SHIFT)
- min_block_size = pages_per_block << PAGE_SHIFT;
-
- cur_size = size;
-
- if (fpfn + size != place->lpfn << PAGE_SHIFT) {
- /*
- * Except for actual range allocation, modify the size and
- * min_block_size conforming to continuous flag enablement
- */
- if (place->flags & TTM_PL_FLAG_CONTIGUOUS) {
- size = roundup_pow_of_two(size);
- min_block_size = size;
- /*
- * Modify the size value if size is not
- * aligned with min_block_size
- */
- } else if (!IS_ALIGNED(size, min_block_size)) {
- size = round_up(size, min_block_size);
+ mode = DRM_MM_INSERT_HIGH;
+
+ pages_left = node->base.num_pages;
+
+ /* Limit maximum size to 2GB due to SG table limitations */
+ pages = min(pages_left, 2UL << (30 - PAGE_SHIFT));
+
+ i = 0;
+ spin_lock(&mgr->lock);
+ while (pages_left) {
+ uint32_t alignment = tbo->page_alignment;
+
+ if (pages >= pages_per_node)
+ alignment = pages_per_node;
+
+ r = drm_mm_insert_node_in_range(mm, &node->mm_nodes[i], pages,
+ alignment, 0, place->fpfn,
+ lpfn, mode);
+ if (unlikely(r)) {
+ if (pages > pages_per_node) {
+ if (is_power_of_2(pages))
+ pages = pages / 2;
+ else
+ pages = rounddown_pow_of_two(pages);
+ continue;
}
+ goto error_free;
}
- r = drm_buddy_alloc_blocks(mm, fpfn,
- lpfn,
- size,
- min_block_size,
- &vres->blocks,
- vres->flags);
- if (unlikely(r))
- goto error_free_blocks;
-
- if (size > remaining_size)
- remaining_size = 0;
- else
- remaining_size -= size;
- }
- mutex_unlock(&mgr->lock);
-
- if (cur_size != size) {
- struct drm_buddy_block *block;
- struct list_head *trim_list;
- u64 original_size;
- LIST_HEAD(temp);
-
- trim_list = &vres->blocks;
- original_size = vres->base.num_pages << PAGE_SHIFT;
-
- /*
- * If size value is rounded up to min_block_size, trim the last
- * block to the required size
- */
- if (!list_is_singular(&vres->blocks)) {
- block = list_last_entry(&vres->blocks, typeof(*block), link);
- list_move_tail(&block->link, &temp);
- trim_list = &temp;
- /*
- * Compute the original_size value by subtracting the
- * last block size with (aligned size - original size)
- */
- original_size = amdgpu_vram_mgr_block_size(block) - (size - cur_size);
- }
+ vis_usage += amdgpu_vram_mgr_vis_size(adev, &node->mm_nodes[i]);
+ amdgpu_vram_mgr_virt_start(&node->base, &node->mm_nodes[i]);
+ pages_left -= pages;
+ ++i;
- mutex_lock(&mgr->lock);
- drm_buddy_block_trim(mm,
- original_size,
- trim_list);
- mutex_unlock(&mgr->lock);
-
- if (!list_empty(&temp))
- list_splice_tail(trim_list, &vres->blocks);
- }
-
- list_for_each_entry(block, &vres->blocks, link)
- vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
-
- block = amdgpu_vram_mgr_first_block(&vres->blocks);
- if (!block) {
- r = -EINVAL;
- goto error_fini;
+ if (pages > pages_left)
+ pages = pages_left;
}
+ spin_unlock(&mgr->lock);
- vres->base.start = amdgpu_vram_mgr_block_start(block) >> PAGE_SHIFT;
-
- if (amdgpu_is_vram_mgr_blocks_contiguous(&vres->blocks))
- vres->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
+ if (i == 1)
+ node->base.placement |= TTM_PL_FLAG_CONTIGUOUS;
if (adev->gmc.xgmi.connected_to_cpu)
- vres->base.bus.caching = ttm_cached;
+ node->base.bus.caching = ttm_cached;
else
- vres->base.bus.caching = ttm_write_combined;
+ node->base.bus.caching = ttm_write_combined;
atomic64_add(vis_usage, &mgr->vis_usage);
- *res = &vres->base;
+ *res = &node->base;
return 0;
-error_free_blocks:
- drm_buddy_free_list(mm, &vres->blocks);
- mutex_unlock(&mgr->lock);
+error_free:
+ while (i--)
+ drm_mm_remove_node(&node->mm_nodes[i]);
+ spin_unlock(&mgr->lock);
error_fini:
- ttm_resource_fini(man, &vres->base);
- kfree(vres);
+ ttm_resource_fini(man, &node->base);
+ kvfree(node);
return r;
}
static void amdgpu_vram_mgr_del(struct ttm_resource_manager *man,
struct ttm_resource *res)
{
- struct amdgpu_vram_mgr_resource *vres = to_amdgpu_vram_mgr_resource(res);
+ struct ttm_range_mgr_node *node = to_ttm_range_mgr_node(res);
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
struct amdgpu_device *adev = to_amdgpu_device(mgr);
- struct drm_buddy *mm = &mgr->mm;
- struct drm_buddy_block *block;
uint64_t vis_usage = 0;
+ unsigned i, pages;
- mutex_lock(&mgr->lock);
- list_for_each_entry(block, &vres->blocks, link)
- vis_usage += amdgpu_vram_mgr_vis_size(adev, block);
+ spin_lock(&mgr->lock);
+ for (i = 0, pages = res->num_pages; pages;
+ pages -= node->mm_nodes[i].size, ++i) {
+ struct drm_mm_node *mm = &node->mm_nodes[i];
+ drm_mm_remove_node(mm);
+ vis_usage += amdgpu_vram_mgr_vis_size(adev, mm);
+ }
amdgpu_vram_mgr_do_reserve(man);
-
- drm_buddy_free_list(mm, &vres->blocks);
- mutex_unlock(&mgr->lock);
+ spin_unlock(&mgr->lock);
atomic64_sub(vis_usage, &mgr->vis_usage);
ttm_resource_fini(man, res);
- kfree(vres);
+ kvfree(node);
}
/**
if (!*sgt)
return -ENOMEM;
- /* Determine the number of DRM_BUDDY blocks to export */
+ /* Determine the number of DRM_MM nodes to export */
amdgpu_res_first(res, offset, length, &cursor);
while (cursor.remaining) {
num_entries++;
sg->length = 0;
/*
- * Walk down DRM_BUDDY blocks to populate scatterlist nodes
- * @note: Use iterator api to get first the DRM_BUDDY block
+ * Walk down DRM_MM nodes to populate scatterlist nodes
+ * @note: Use iterator api to get first the DRM_MM node
* and the number of bytes from it. Access the following
- * DRM_BUDDY block(s) if more buffer needs to exported
+ * DRM_MM node(s) if more buffer needs to exported
*/
amdgpu_res_first(res, offset, length, &cursor);
for_each_sgtable_sg((*sgt), sg, i) {
struct drm_printer *printer)
{
struct amdgpu_vram_mgr *mgr = to_vram_mgr(man);
- struct drm_buddy *mm = &mgr->mm;
- struct drm_buddy_block *block;
drm_printf(printer, " vis usage:%llu\n",
amdgpu_vram_mgr_vis_usage(mgr));
- mutex_lock(&mgr->lock);
- drm_printf(printer, "default_page_size: %lluKiB\n",
- mgr->default_page_size >> 10);
-
- drm_buddy_print(mm, printer);
-
- drm_printf(printer, "reserved:\n");
- list_for_each_entry(block, &mgr->reserved_pages, link)
- drm_buddy_block_print(mm, block, printer);
- mutex_unlock(&mgr->lock);
+ spin_lock(&mgr->lock);
+ drm_mm_print(&mgr->mm, printer);
+ spin_unlock(&mgr->lock);
}
static const struct ttm_resource_manager_func amdgpu_vram_mgr_func = {
{
struct amdgpu_vram_mgr *mgr = &adev->mman.vram_mgr;
struct ttm_resource_manager *man = &mgr->manager;
- int err;
ttm_resource_manager_init(man, &adev->mman.bdev,
adev->gmc.real_vram_size);
man->func = &amdgpu_vram_mgr_func;
- err = drm_buddy_init(&mgr->mm, man->size, PAGE_SIZE);
- if (err)
- return err;
-
- mutex_init(&mgr->lock);
+ drm_mm_init(&mgr->mm, 0, man->size >> PAGE_SHIFT);
+ spin_lock_init(&mgr->lock);
INIT_LIST_HEAD(&mgr->reservations_pending);
INIT_LIST_HEAD(&mgr->reserved_pages);
- mgr->default_page_size = PAGE_SIZE;
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, &mgr->manager);
ttm_resource_manager_set_used(man, true);
if (ret)
return;
- mutex_lock(&mgr->lock);
- list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, blocks)
+ spin_lock(&mgr->lock);
+ list_for_each_entry_safe(rsv, temp, &mgr->reservations_pending, node)
kfree(rsv);
- list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, blocks) {
- drm_buddy_free_list(&mgr->mm, &rsv->blocks);
+ list_for_each_entry_safe(rsv, temp, &mgr->reserved_pages, node) {
+ drm_mm_remove_node(&rsv->mm_node);
kfree(rsv);
}
- drm_buddy_fini(&mgr->mm);
- mutex_unlock(&mgr->lock);
+ drm_mm_takedown(&mgr->mm);
+ spin_unlock(&mgr->lock);
ttm_resource_manager_cleanup(man);
ttm_set_driver_manager(&adev->mman.bdev, TTM_PL_VRAM, NULL);
+++ /dev/null
-/* SPDX-License-Identifier: MIT
- * Copyright 2021 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-
-#ifndef __AMDGPU_VRAM_MGR_H__
-#define __AMDGPU_VRAM_MGR_H__
-
-#include <drm/drm_buddy.h>
-
-struct amdgpu_vram_mgr {
- struct ttm_resource_manager manager;
- struct drm_buddy mm;
- /* protects access to buffer objects */
- struct mutex lock;
- struct list_head reservations_pending;
- struct list_head reserved_pages;
- atomic64_t vis_usage;
- u64 default_page_size;
-};
-
-struct amdgpu_vram_mgr_resource {
- struct ttm_resource base;
- struct list_head blocks;
- unsigned long flags;
-};
-
-static inline u64 amdgpu_vram_mgr_block_start(struct drm_buddy_block *block)
-{
- return drm_buddy_block_offset(block);
-}
-
-static inline u64 amdgpu_vram_mgr_block_size(struct drm_buddy_block *block)
-{
- return PAGE_SIZE << drm_buddy_block_order(block);
-}
-
-static inline struct drm_buddy_block *
-amdgpu_vram_mgr_first_block(struct list_head *list)
-{
- return list_first_entry_or_null(list, struct drm_buddy_block, link);
-}
-
-static inline bool amdgpu_is_vram_mgr_blocks_contiguous(struct list_head *head)
-{
- struct drm_buddy_block *block;
- u64 start, size;
-
- block = amdgpu_vram_mgr_first_block(head);
- if (!block)
- return false;
-
- while (head != block->link.next) {
- start = amdgpu_vram_mgr_block_start(block);
- size = amdgpu_vram_mgr_block_size(block);
-
- block = list_entry(block->link.next, struct drm_buddy_block, link);
- if (start + size != amdgpu_vram_mgr_block_start(block))
- return false;
- }
-
- return true;
-}
-
-static inline struct amdgpu_vram_mgr_resource *
-to_amdgpu_vram_mgr_resource(struct ttm_resource *res)
-{
- return container_of(res, struct amdgpu_vram_mgr_resource, base);
-}
-
-#endif
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+ /* disable prefer shadow for now due to hibernation issues */
+ adev_to_drm(adev)->mode_config.prefer_shadow = 0;
adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+ /* disable prefer shadow for now due to hibernation issues */
+ adev_to_drm(adev)->mode_config.prefer_shadow = 0;
adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
adev_to_drm(adev)->mode_config.max_width = 16384;
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+ /* disable prefer shadow for now due to hibernation issues */
+ adev_to_drm(adev)->mode_config.prefer_shadow = 0;
adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
adev_to_drm(adev)->mode_config.fb_base = adev->gmc.aper_base;
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+ /* disable prefer shadow for now due to hibernation issues */
+ adev_to_drm(adev)->mode_config.prefer_shadow = 0;
adev_to_drm(adev)->mode_config.fb_modifiers_not_supported = true;
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);
dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0);
+ dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);
}
static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd,
memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);
if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME))
- *(uint64_t *)fw_autoload_mask |= 1 << id;
+ *(uint64_t *)fw_autoload_mask |= 1ULL << id;
}
static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev,
return 0;
}
-void gfx_v11_0_rlc_stop(struct amdgpu_device *adev)
+static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev)
{
u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL);
break;
default:
BUG();
+ break;
}
}
adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);
adev->gmc.aper_size = pci_resource_len(adev->pdev, 0);
+#ifdef CONFIG_X86_64
+ if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) {
+ adev->gmc.aper_base = adev->mmhub.funcs->get_mc_fb_offset(adev);
+ adev->gmc.aper_size = adev->gmc.real_vram_size;
+ }
+#endif
/* In case the PCI BAR is larger than the actual amount of vram */
adev->gmc.visible_vram_size = adev->gmc.aper_size;
if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size)
IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPG_PSP_DEBUG, CPG_PSP_DEBUG__GPA_OVERRIDE_MASK, 0)
};
-void program_imu_rlc_ram(struct amdgpu_device *adev,
+static void program_imu_rlc_ram(struct amdgpu_device *adev,
const struct imu_rlc_ram_golden *regs,
const u32 array_size)
{
{
unsigned vmid = AMDGPU_JOB_GET_VMID(job);
+ amdgpu_ring_write(ring, PACKETJ(mmUVD_JPEG_IH_CTRL_INTERNAL_OFFSET,
+ 0, 0, PACKETJ_TYPE0));
+ amdgpu_ring_write(ring, (vmid << JPEG_IH_CTRL__IH_VMID__SHIFT));
+
amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,
0, 0, PACKETJ_TYPE0));
amdgpu_ring_write(ring, (vmid | (vmid << 4)));
8 + /* jpeg_v2_0_dec_ring_emit_vm_flush */
18 + 18 + /* jpeg_v2_0_dec_ring_emit_fence x2 vm fence */
8 + 16,
- .emit_ib_size = 22, /* jpeg_v2_0_dec_ring_emit_ib */
+ .emit_ib_size = 24, /* jpeg_v2_0_dec_ring_emit_ib */
.emit_ib = jpeg_v2_0_dec_ring_emit_ib,
.emit_fence = jpeg_v2_0_dec_ring_emit_fence,
.emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush,
#define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET 0x4084
#define mmUVD_JRBC_STATUS_INTERNAL_OFFSET 0x4089
#define mmUVD_JPEG_PITCH_INTERNAL_OFFSET 0x401f
+#define mmUVD_JPEG_IH_CTRL_INTERNAL_OFFSET 0x4149
#define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR 0x18000
/* This function is for backdoor MES firmware */
static int mes_v11_0_load_microcode(struct amdgpu_device *adev,
- enum admgpu_mes_pipe pipe)
+ enum admgpu_mes_pipe pipe, bool prime_icache)
{
int r;
uint32_t data;
/* Set 0x3FFFF (256K-1) to CP_MES_MDBOUND_LO */
WREG32_SOC15(GC, 0, regCP_MES_MDBOUND_LO, 0x3FFFF);
- /* invalidate ICACHE */
- data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL);
- data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
- data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
- WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data);
-
- /* prime the ICACHE. */
- data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL);
- data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
- WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data);
+ if (prime_icache) {
+ /* invalidate ICACHE */
+ data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data);
+
+ /* prime the ICACHE. */
+ data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL);
+ data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1);
+ WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data);
+ }
soc21_grbm_select(adev, 0, 0, 0, 0);
mutex_unlock(&adev->srbm_mutex);
int r = 0;
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
- r = mes_v11_0_load_microcode(adev, AMDGPU_MES_KIQ_PIPE);
+
+ r = mes_v11_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, false);
if (r) {
- DRM_ERROR("failed to load MES kiq fw, r=%d\n", r);
+ DRM_ERROR("failed to load MES fw, r=%d\n", r);
return r;
}
- r = mes_v11_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE);
+ r = mes_v11_0_load_microcode(adev, AMDGPU_MES_KIQ_PIPE, true);
if (r) {
- DRM_ERROR("failed to load MES fw, r=%d\n", r);
+ DRM_ERROR("failed to load MES kiq fw, r=%d\n", r);
return r;
}
+
}
mes_v11_0_enable(adev, true);
if (!adev->enable_mes_kiq) {
if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {
r = mes_v11_0_load_microcode(adev,
- AMDGPU_MES_SCHED_PIPE);
+ AMDGPU_MES_SCHED_PIPE, true);
if (r) {
DRM_ERROR("failed to MES fw, r=%d\n", r);
return r;
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},
{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)},
+ {codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},
};
static const struct amdgpu_video_codecs yc_video_codecs_decode = {
}
}
+
/**
* sdma_v5_2_gfx_stop - stop the gfx async dma engines
*
}
/**
- * sdma_v5_2_ctx_switch_enable_for_instance - start the async dma engines
- * context switch for an instance
+ * sdma_v5_2_ctx_switch_enable - stop the async dma engines context switch
*
* @adev: amdgpu_device pointer
- * @instance_idx: the index of the SDMA instance
+ * @enable: enable/disable the DMA MEs context switch.
*
- * Unhalt the async dma engines context switch.
+ * Halt or unhalt the async dma engines context switch.
*/
-static void sdma_v5_2_ctx_switch_enable_for_instance(struct amdgpu_device *adev, int instance_idx)
+static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)
{
u32 f32_cntl, phase_quantum = 0;
-
- if (WARN_ON(instance_idx >= adev->sdma.num_instances)) {
- return;
- }
+ int i;
if (amdgpu_sdma_phase_quantum) {
unsigned value = amdgpu_sdma_phase_quantum;
phase_quantum =
value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |
unit << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT;
-
- WREG32_SOC15_IP(GC,
- sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE0_QUANTUM),
- phase_quantum);
- WREG32_SOC15_IP(GC,
- sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE1_QUANTUM),
- phase_quantum);
- WREG32_SOC15_IP(GC,
- sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE2_QUANTUM),
- phase_quantum);
}
- if (!amdgpu_sriov_vf(adev)) {
- f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL));
- f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
- AUTO_CTXSW_ENABLE, 1);
- WREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL), f32_cntl);
+ for (i = 0; i < adev->sdma.num_instances; i++) {
+ if (enable && amdgpu_sdma_phase_quantum) {
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM),
+ phase_quantum);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM),
+ phase_quantum);
+ WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM),
+ phase_quantum);
+ }
+
+ if (!amdgpu_sriov_vf(adev)) {
+ f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
+ AUTO_CTXSW_ENABLE, enable ? 1 : 0);
+ WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
+ }
}
+
}
/**
- * sdma_v5_2_ctx_switch_disable_all - stop the async dma engines context switch
+ * sdma_v5_2_enable - stop the async dma engines
*
* @adev: amdgpu_device pointer
+ * @enable: enable/disable the DMA MEs.
*
- * Halt the async dma engines context switch.
+ * Halt or unhalt the async dma engines.
*/
-static void sdma_v5_2_ctx_switch_disable_all(struct amdgpu_device *adev)
+static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)
{
u32 f32_cntl;
int i;
- if (amdgpu_sriov_vf(adev))
- return;
-
- for (i = 0; i < adev->sdma.num_instances; i++) {
- f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL));
- f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL,
- AUTO_CTXSW_ENABLE, 0);
- WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl);
+ if (!enable) {
+ sdma_v5_2_gfx_stop(adev);
+ sdma_v5_2_rlc_stop(adev);
}
-}
-
-/**
- * sdma_v5_2_halt - stop the async dma engines
- *
- * @adev: amdgpu_device pointer
- *
- * Halt the async dma engines.
- */
-static void sdma_v5_2_halt(struct amdgpu_device *adev)
-{
- int i;
- u32 f32_cntl;
-
- sdma_v5_2_gfx_stop(adev);
- sdma_v5_2_rlc_stop(adev);
if (!amdgpu_sriov_vf(adev)) {
for (i = 0; i < adev->sdma.num_instances; i++) {
f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL));
- f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1);
+ f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);
WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);
}
}
* @adev: amdgpu_device pointer
*
* Set up the gfx DMA ring buffers and enable them.
- * It assumes that the dma engine is stopped for each instance.
- * The function enables the engine and preemptions sequentially for each instance.
- *
* Returns 0 for success, error for failure.
*/
static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)
ring->sched.ready = true;
- sdma_v5_2_ctx_switch_enable_for_instance(adev, i);
+ if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */
+ sdma_v5_2_ctx_switch_enable(adev, true);
+ sdma_v5_2_enable(adev, true);
+ }
r = amdgpu_ring_test_ring(ring);
if (r) {
int i, j;
/* halt the MEs */
- sdma_v5_2_halt(adev);
+ sdma_v5_2_enable(adev, false);
for (i = 0; i < adev->sdma.num_instances; i++) {
if (!adev->sdma.instance[i].fw)
int r = 0;
if (amdgpu_sriov_vf(adev)) {
- sdma_v5_2_ctx_switch_disable_all(adev);
- sdma_v5_2_halt(adev);
+ sdma_v5_2_ctx_switch_enable(adev, false);
+ sdma_v5_2_enable(adev, false);
/* set RB registers */
r = sdma_v5_2_gfx_resume(adev);
amdgpu_gfx_off_ctrl(adev, false);
sdma_v5_2_soft_reset(adev);
+ /* unhalt the MEs */
+ sdma_v5_2_enable(adev, true);
+ /* enable sdma ring preemption */
+ sdma_v5_2_ctx_switch_enable(adev, true);
- /* Soft reset supposes to disable the dma engine and preemption.
- * Now start the gfx rings and rlc compute queues.
- */
+ /* start the gfx rings and rlc compute queues */
r = sdma_v5_2_gfx_resume(adev);
if (adev->in_s0ix)
amdgpu_gfx_off_ctrl(adev, true);
if (amdgpu_sriov_vf(adev))
return 0;
- sdma_v5_2_ctx_switch_disable_all(adev);
- sdma_v5_2_halt(adev);
+ sdma_v5_2_ctx_switch_enable(adev, false);
+ sdma_v5_2_enable(adev, false);
return 0;
}
.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};
-static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p,
- struct amdgpu_job *job)
+static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p)
{
struct drm_gpu_scheduler **scheds;
/* The create msg must be in the first IB submitted */
- if (atomic_read(&job->base.entity->fence_seq))
+ if (atomic_read(&p->entity->fence_seq))
return -EINVAL;
scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC]
[AMDGPU_RING_PRIO_DEFAULT].sched;
- drm_sched_entity_modify_sched(job->base.entity, scheds, 1);
+ drm_sched_entity_modify_sched(p->entity, scheds, 1);
return 0;
}
-static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,
- uint64_t addr)
+static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr)
{
struct ttm_operation_ctx ctx = { false, false };
struct amdgpu_bo_va_mapping *map;
if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)
continue;
- r = vcn_v3_0_limit_sched(p, job);
+ r = vcn_v3_0_limit_sched(p);
if (r)
goto out;
}
struct amdgpu_job *job,
struct amdgpu_ib *ib)
{
- struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
+ struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);
uint32_t msg_lo = 0, msg_hi = 0;
unsigned i;
int r;
msg_hi = val;
} else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) &&
val == 0) {
- r = vcn_v3_0_dec_msg(p, job,
- ((u64)msg_hi) << 32 | msg_lo);
+ r = vcn_v3_0_dec_msg(p, ((u64)msg_hi) << 32 | msg_lo);
if (r)
return r;
}
num_of_cache_types = ARRAY_SIZE(beige_goby_cache_info);
break;
case IP_VERSION(10, 3, 3):
+ case IP_VERSION(10, 3, 6): /* TODO: Double check these on production silicon */
+ case IP_VERSION(10, 3, 7): /* TODO: Double check these on production silicon */
pcache_info = yellow_carp_cache_info;
num_of_cache_types = ARRAY_SIZE(yellow_carp_cache_info);
break;
case IP_VERSION(4, 1, 2):/* RENOIR */
case IP_VERSION(5, 2, 1):/* VANGOGH */
case IP_VERSION(5, 2, 3):/* YELLOW_CARP */
+ case IP_VERSION(5, 2, 6):/* GC 10.3.6 */
+ case IP_VERSION(5, 2, 7):/* GC 10.3.7 */
case IP_VERSION(6, 0, 1):
kfd->device_info.num_sdma_queues_per_engine = 2;
break;
case IP_VERSION(9, 4, 2): /* ALDEBARAN */
case IP_VERSION(10, 3, 1): /* VANGOGH */
case IP_VERSION(10, 3, 3): /* YELLOW_CARP */
+ case IP_VERSION(10, 3, 6): /* GC 10.3.6 */
+ case IP_VERSION(10, 3, 7): /* GC 10.3.7 */
case IP_VERSION(10, 1, 3): /* CYAN_SKILLFISH */
case IP_VERSION(10, 1, 4):
case IP_VERSION(10, 1, 10): /* NAVI10 */
if (gc_version < IP_VERSION(11, 0, 0)) {
/* Navi2x+, Navi1x+ */
- if (gc_version >= IP_VERSION(10, 3, 0))
+ if (gc_version == IP_VERSION(10, 3, 6))
+ kfd->device_info.no_atomic_fw_version = 14;
+ else if (gc_version == IP_VERSION(10, 3, 7))
+ kfd->device_info.no_atomic_fw_version = 3;
+ else if (gc_version >= IP_VERSION(10, 3, 0))
kfd->device_info.no_atomic_fw_version = 92;
else if (gc_version >= IP_VERSION(10, 1, 1))
kfd->device_info.no_atomic_fw_version = 145;
if (!vf)
f2g = &gfx_v10_3_kfd2kgd;
break;
+ case IP_VERSION(10, 3, 6):
+ gfx_target_version = 100306;
+ if (!vf)
+ f2g = &gfx_v10_3_kfd2kgd;
+ break;
+ case IP_VERSION(10, 3, 7):
+ gfx_target_version = 100307;
+ if (!vf)
+ f2g = &gfx_v10_3_kfd2kgd;
+ break;
case IP_VERSION(11, 0, 0):
gfx_target_version = 110000;
f2g = &gfx_v11_kfd2kgd;
struct migrate_vma *migrate, struct dma_fence **mfence,
dma_addr_t *scratch)
{
- uint64_t npages = migrate->cpages;
+ uint64_t npages = migrate->npages;
struct device *dev = adev->dev;
struct amdgpu_res_cursor cursor;
dma_addr_t *src;
mfence);
if (r)
goto out_free_vram_pages;
- amdgpu_res_next(&cursor, j << PAGE_SHIFT);
+ amdgpu_res_next(&cursor, (j + 1) << PAGE_SHIFT);
j = 0;
} else {
amdgpu_res_next(&cursor, PAGE_SIZE);
continue;
}
src[i] = svm_migrate_addr(adev, spage);
- if (i > 0 && src[i] != src[i - 1] + PAGE_SIZE) {
+ if (j > 0 && src[i] != src[i - 1] + PAGE_SIZE) {
r = svm_migrate_copy_memory_gart(adev, dst + i - j,
src + i - j, j,
FROM_VRAM_TO_RAM,
r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb, NULL,
last_start, prange->start + i,
pte_flags,
- last_start - prange->start,
+ (last_start - prange->start) << PAGE_SHIFT,
bo_adev ? bo_adev->vm_manager.vram_base_offset : 0,
NULL, dma_addr, &vm->last_update);
if (range->event == MMU_NOTIFY_RELEASE)
return true;
+ if (!mmget_not_zero(mni->mm))
+ return true;
start = mni->interval_tree.start;
last = mni->interval_tree.last;
}
svm_range_unlock(prange);
+ mmput(mni->mm);
return true;
}
bool "AMD DC - Enable new display engine"
default y
select SND_HDA_COMPONENT if SND_HDA_CORE
- select DRM_AMD_DC_DCN if (X86 || PPC64) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS)
+ select DRM_AMD_DC_DCN if (X86 || PPC_LONG_DOUBLE_128) && !(KCOV_INSTRUMENT_ALL && KCOV_ENABLE_COMPARISONS)
help
Choose this option if you want to use the new display engine
support for AMDGPU. This adds required support for Vega and
#include <linux/pci.h>
#include <linux/firmware.h>
#include <linux/component.h>
+#include <linux/dmi.h>
#include <drm/display/drm_dp_mst_helper.h>
#include <drm/display/drm_hdmi_helper.h>
vrr_active, (int) !e);
}
+static void dm_crtc_handle_vblank(struct amdgpu_crtc *acrtc)
+{
+ struct drm_crtc *crtc = &acrtc->base;
+ struct drm_device *dev = crtc->dev;
+ unsigned long flags;
+
+ drm_crtc_handle_vblank(crtc);
+
+ spin_lock_irqsave(&dev->event_lock, flags);
+
+ /* Send completion event for cursor-only commits */
+ if (acrtc->event && acrtc->pflip_status != AMDGPU_FLIP_SUBMITTED) {
+ drm_crtc_send_vblank_event(crtc, acrtc->event);
+ drm_crtc_vblank_put(crtc);
+ acrtc->event = NULL;
+ }
+
+ spin_unlock_irqrestore(&dev->event_lock, flags);
+}
+
static void dm_vupdate_high_irq(void *interrupt_params)
{
struct common_irq_params *irq_params = interrupt_params;
* if a pageflip happened inside front-porch.
*/
if (vrr_active) {
- drm_crtc_handle_vblank(&acrtc->base);
+ dm_crtc_handle_vblank(acrtc);
/* BTR processing for pre-DCE12 ASICs */
if (acrtc->dm_irq_params.stream &&
* to dm_vupdate_high_irq after end of front-porch.
*/
if (!vrr_active)
- drm_crtc_handle_vblank(&acrtc->base);
+ dm_crtc_handle_vblank(acrtc);
/**
* Following stuff must happen at start of vblank, for crc
return false;
}
+static const struct dmi_system_id hpd_disconnect_quirk_table[] = {
+ {
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3660"),
+ },
+ },
+ {
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3260"),
+ },
+ },
+ {
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Precision 3460"),
+ },
+ },
+ {}
+};
+
+static void retrieve_dmi_info(struct amdgpu_display_manager *dm)
+{
+ const struct dmi_system_id *dmi_id;
+
+ dm->aux_hpd_discon_quirk = false;
+
+ dmi_id = dmi_first_match(hpd_disconnect_quirk_table);
+ if (dmi_id) {
+ dm->aux_hpd_discon_quirk = true;
+ DRM_INFO("aux_hpd_discon_quirk attached\n");
+ }
+}
+
static int amdgpu_dm_init(struct amdgpu_device *adev)
{
struct dc_init_data init_data;
}
INIT_LIST_HEAD(&adev->dm.da_list);
+
+ retrieve_dmi_info(&adev->dm);
+
/* Display Core create. */
adev->dm.dc = dc_create(&init_data);
#if defined(CONFIG_DRM_AMD_SECURE_DISPLAY)
adev->dm.crc_rd_wrk = amdgpu_dm_crtc_secure_display_create_work();
#endif
- if (dc_enable_dmub_notifications(adev->dm.dc)) {
+ if (dc_is_dmub_outbox_supported(adev->dm.dc)) {
init_completion(&adev->dm.dmub_aux_transfer_done);
adev->dm.dmub_notify = kzalloc(sizeof(struct dmub_notification), GFP_KERNEL);
if (!adev->dm.dmub_notify) {
goto error;
}
+ /* Enable outbox notification only after IRQ handlers are registered and DMUB is alive.
+ * It is expected that DMUB will resend any pending notifications at this point, for
+ * example HPD from DPIA.
+ */
+ if (dc_is_dmub_outbox_supported(adev->dm.dc))
+ dc_enable_dmub_outbox(adev->dm.dc);
+
/* create fake encoders for MST */
dm_dp_create_fake_mst_encoders(adev);
*/
link_enc_cfg_copy(adev->dm.dc->current_state, dc_state);
- if (dc_enable_dmub_notifications(adev->dm.dc))
- amdgpu_dm_outbox_init(adev);
-
r = dm_dmub_hw_init(adev);
if (r)
DRM_ERROR("DMUB interface failed to initialize: status=%d\n", r);
}
}
+ if (dc_is_dmub_outbox_supported(adev->dm.dc)) {
+ amdgpu_dm_outbox_init(adev);
+ dc_enable_dmub_outbox(adev->dm.dc);
+ }
+
WARN_ON(!dc_commit_state(dm->dc, dc_state));
dm_gpureset_commit_state(dm->cached_dc_state, dm);
/* TODO: Remove dc_state->dccg, use dc->dccg directly. */
dc_resource_state_construct(dm->dc, dm_state->context);
- /* Re-enable outbox interrupts for DPIA. */
- if (dc_enable_dmub_notifications(adev->dm.dc))
- amdgpu_dm_outbox_init(adev);
-
/* Before powering on DC we need to re-initialize DMUB. */
dm_dmub_hw_resume(adev);
+ /* Re-enable outbox interrupts for DPIA. */
+ if (dc_is_dmub_outbox_supported(adev->dm.dc)) {
+ amdgpu_dm_outbox_init(adev);
+ dc_enable_dmub_outbox(adev->dm.dc);
+ }
+
/* power on hardware */
dc_set_power_state(dm->dc, DC_ACPI_CM_POWER_STATE_D0);
static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
{
- u32 max_cll, min_cll, max, min, q, r;
+ u32 max_avg, min_cll, max, min, q, r;
struct amdgpu_dm_backlight_caps *caps;
struct amdgpu_display_manager *dm;
struct drm_connector *conn_base;
caps = &dm->backlight_caps[i];
caps->ext_caps = &aconnector->dc_link->dpcd_sink_ext_caps;
caps->aux_support = false;
- max_cll = conn_base->hdr_sink_metadata.hdmi_type1.max_cll;
+ max_avg = conn_base->hdr_sink_metadata.hdmi_type1.max_fall;
min_cll = conn_base->hdr_sink_metadata.hdmi_type1.min_cll;
if (caps->ext_caps->bits.oled == 1 /*||
* The results of the above expressions can be verified at
* pre_computed_values.
*/
- q = max_cll >> 5;
- r = max_cll % 32;
+ q = max_avg >> 5;
+ r = max_avg % 32;
max = (1 << q) * pre_computed_values[r];
// min luminance: maxLum * (CV/255)^2 / 100
adev_to_drm(adev)->mode_config.max_height = 16384;
adev_to_drm(adev)->mode_config.preferred_depth = 24;
- adev_to_drm(adev)->mode_config.prefer_shadow = 1;
+ /* disable prefer shadow for now due to hibernation issues */
+ adev_to_drm(adev)->mode_config.prefer_shadow = 0;
/* indicates support for immediate flip */
adev_to_drm(adev)->mode_config.async_page_flip = true;
}
}
- /* Disable vblank IRQs aggressively for power-saving. */
- adev_to_drm(adev)->vblank_disable_immediate = true;
-
/* loops over all connectors on the board */
for (i = 0; i < link_cnt; i++) {
struct dc_link *link = NULL;
}
}
- if (per_pixel_alpha && plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE)
+ if (*per_pixel_alpha && plane_state->pixel_blend_mode == DRM_MODE_BLEND_COVERAGE)
*pre_multiplied_alpha = false;
}
struct amdgpu_bo *abo;
uint32_t target_vblank, last_flip_vblank;
bool vrr_active = amdgpu_dm_vrr_active(acrtc_state);
+ bool cursor_update = false;
bool pflip_present = false;
struct {
struct dc_surface_update surface_updates[MAX_SURFACES];
struct dm_plane_state *dm_new_plane_state = to_dm_plane_state(new_plane_state);
/* Cursor plane is handled after stream updates */
- if (plane->type == DRM_PLANE_TYPE_CURSOR)
+ if (plane->type == DRM_PLANE_TYPE_CURSOR) {
+ if ((fb && crtc == pcrtc) ||
+ (old_plane_state->fb && old_plane_state->crtc == pcrtc))
+ cursor_update = true;
+
continue;
+ }
if (!fb || !crtc || pcrtc != crtc)
continue;
bundle->stream_update.vrr_infopacket =
&acrtc_state->stream->vrr_infopacket;
}
+ } else if (cursor_update && acrtc_state->active_planes > 0 &&
+ !acrtc_state->force_dpms_off &&
+ acrtc_attach->base.state->event) {
+ drm_crtc_vblank_get(pcrtc);
+
+ spin_lock_irqsave(&pcrtc->dev->event_lock, flags);
+
+ acrtc_attach->event = acrtc_attach->base.state->event;
+ acrtc_attach->base.state->event = NULL;
+
+ spin_unlock_irqrestore(&pcrtc->dev->event_lock, flags);
}
/* Update the planes if changed or disable if we don't have any. */
* last successfully applied backlight values.
*/
u32 actual_brightness[AMDGPU_DM_MAX_NUM_EDP];
+
+ /**
+ * @aux_hpd_discon_quirk:
+ *
+ * quirk for hpd discon while aux is on-going.
+ * occurred on certain intel platform
+ */
+ bool aux_hpd_discon_quirk;
};
enum dsc_clock_force_state {
ssize_t result = 0;
struct aux_payload payload;
enum aux_return_code_type operation_result;
+ struct amdgpu_device *adev;
+ struct ddc_service *ddc;
if (WARN_ON(msg->size > 16))
return -E2BIG;
result = dc_link_aux_transfer_raw(TO_DM_AUX(aux)->ddc_service, &payload,
&operation_result);
+ /*
+ * w/a on certain intel platform where hpd is unexpected to pull low during
+ * 1st sideband message transaction by return AUX_RET_ERROR_HPD_DISCON
+ * aux transaction is succuess in such case, therefore bypass the error
+ */
+ ddc = TO_DM_AUX(aux)->ddc_service;
+ adev = ddc->ctx->driver_context;
+ if (adev->dm.aux_hpd_discon_quirk) {
+ if (msg->address == DP_SIDEBAND_MSG_DOWN_REQ_BASE &&
+ operation_result == AUX_RET_ERROR_HPD_DISCON) {
+ result = 0;
+ operation_result = AUX_RET_SUCCESS;
+ }
+ }
+
if (payload.write && result >= 0)
result = msg->size;
void dcn31_init_clocks(struct clk_mgr *clk_mgr)
{
+ uint32_t ref_dtbclk = clk_mgr->clks.ref_dtbclk_khz;
+
memset(&(clk_mgr->clks), 0, sizeof(struct dc_clocks));
// Assumption is that boot state always supports pstate
+ clk_mgr->clks.ref_dtbclk_khz = ref_dtbclk; // restore ref_dtbclk
clk_mgr->clks.p_state_change_support = true;
clk_mgr->clks.prev_p_state_change_support = true;
clk_mgr->clks.pwr_state = DCN_PWR_STATE_UNKNOWN;
}
}
+int dcn31_get_dtb_ref_freq_khz(struct clk_mgr *clk_mgr_base)
+{
+ return clk_mgr_base->clks.ref_dtbclk_khz;
+}
+
static struct clk_mgr_funcs dcn31_funcs = {
.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
+ .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz,
.update_clocks = dcn31_update_clocks,
.init_clocks = dcn31_init_clocks,
.enable_pme_wa = dcn31_enable_pme_wa,
}
clk_mgr->base.base.dprefclk_khz = 600000;
- clk_mgr->base.dccg->ref_dtbclk_khz = 600000;
+ clk_mgr->base.base.clks.ref_dtbclk_khz = 600000;
dce_clock_read_ss_info(&clk_mgr->base);
/*if bios enabled SS, driver needs to adjust dtb clock, only enable with correct bios*/
//clk_mgr->base.dccg->ref_dtbclk_khz = dce_adjust_dp_ref_freq_for_ss(clk_mgr_internal, clk_mgr->base.base.dprefclk_khz);
struct pp_smu_funcs *pp_smu,
struct dccg *dccg);
+int dcn31_get_dtb_ref_freq_khz(struct clk_mgr *clk_mgr_base);
+
void dcn31_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int);
#endif //__DCN31_CLK_MGR_H__
#include "dc_dmub_srv.h"
-#if defined (CONFIG_DRM_AMD_DC_DP2_0)
#include "dc_link_dp.h"
-#endif
#define TO_CLK_MGR_DCN315(clk_mgr)\
container_of(clk_mgr, struct clk_mgr_dcn315, base)
if (!bw_params->clk_table.entries[i].dtbclk_mhz)
bw_params->clk_table.entries[i].dtbclk_mhz = def_max.dtbclk_mhz;
}
- ASSERT(bw_params->clk_table.entries[i].dcfclk_mhz);
+ ASSERT(bw_params->clk_table.entries[i-1].dcfclk_mhz);
bw_params->vram_type = bios_info->memory_type;
bw_params->num_channels = bios_info->ma_channel_number;
if (!bw_params->num_channels)
static struct clk_mgr_funcs dcn315_funcs = {
.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
+ .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz,
.update_clocks = dcn315_update_clocks,
.init_clocks = dcn31_init_clocks,
.enable_pme_wa = dcn315_enable_pme_wa,
clk_mgr->base.base.dprefclk_khz = 600000;
clk_mgr->base.base.dprefclk_khz = dcn315_smu_get_dpref_clk(&clk_mgr->base);
- clk_mgr->base.dccg->ref_dtbclk_khz = clk_mgr->base.base.dprefclk_khz;
+ clk_mgr->base.base.clks.ref_dtbclk_khz = clk_mgr->base.base.dprefclk_khz;
dce_clock_read_ss_info(&clk_mgr->base);
- clk_mgr->base.dccg->ref_dtbclk_khz = dce_adjust_dp_ref_freq_for_ss(&clk_mgr->base, clk_mgr->base.base.dprefclk_khz);
+ clk_mgr->base.base.clks.ref_dtbclk_khz = dce_adjust_dp_ref_freq_for_ss(&clk_mgr->base, clk_mgr->base.base.dprefclk_khz);
clk_mgr->base.base.bw_params = &dcn315_bw_params;
static struct clk_mgr_funcs dcn316_funcs = {
.enable_pme_wa = dcn316_enable_pme_wa,
.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
+ .get_dtb_ref_clk_frequency = dcn31_get_dtb_ref_freq_khz,
.update_clocks = dcn316_update_clocks,
.init_clocks = dcn31_init_clocks,
.are_clock_states_equal = dcn31_are_clock_states_equal,
clk_mgr->base.base.dprefclk_khz = 600000;
clk_mgr->base.base.dprefclk_khz = dcn316_smu_get_dpref_clk(&clk_mgr->base);
- clk_mgr->base.dccg->ref_dtbclk_khz = clk_mgr->base.base.dprefclk_khz;
+ clk_mgr->base.base.clks.ref_dtbclk_khz = clk_mgr->base.base.dprefclk_khz;
dce_clock_read_ss_info(&clk_mgr->base);
/*clk_mgr->base.dccg->ref_dtbclk_khz =
dce_adjust_dp_ref_freq_for_ss(&clk_mgr->base, clk_mgr->base.base.dprefclk_khz);*/
static bool decide_fallback_link_setting(
struct dc_link *link,
- struct dc_link_settings initial_link_settings,
- struct dc_link_settings *current_link_setting,
+ struct dc_link_settings *max,
+ struct dc_link_settings *cur,
enum link_training_result training_result);
static void maximize_lane_settings(const struct link_training_settings *lt_settings,
struct dc_lane_settings lane_settings[LANE_COUNT_DP_MAX]);
return;
- for (lane = 1; lane < LANE_COUNT_DP_MAX; lane++) {
+ for (lane = 0; lane < LANE_COUNT_DP_MAX; lane++) {
if (lt_settings->voltage_swing)
lane_settings[lane].VOLTAGE_SWING = *lt_settings->voltage_swing;
if (lt_settings->pre_emphasis)
enum dp_panel_mode panel_mode = dp_get_panel_mode(link);
enum link_training_result status = LINK_TRAINING_CR_FAIL_LANE0;
struct dc_link_settings cur_link_settings = *link_setting;
+ struct dc_link_settings max_link_settings = *link_setting;
const struct link_hwss *link_hwss = get_link_hwss(link, &pipe_ctx->link_res);
int fail_count = 0;
bool is_link_bw_low = false; /* link bandwidth < stream bandwidth */
dp_trace_commit_lt_init(link);
-
if (dp_get_link_encoding_format(&cur_link_settings) == DP_8b_10b_ENCODING)
/* We need to do this before the link training to ensure the idle
* pattern in SST mode will be sent right after the link training
uint32_t req_bw;
uint32_t link_bw;
- decide_fallback_link_setting(link, *link_setting, &cur_link_settings, status);
- /* Flag if reduced link bandwidth no longer meets stream requirements or fallen back to
- * minimum link bandwidth.
+ decide_fallback_link_setting(link, &max_link_settings,
+ &cur_link_settings, status);
+ /* Fail link training if reduced link bandwidth no longer meets
+ * stream requirements.
*/
req_bw = dc_bandwidth_in_kbps_from_timing(&stream->timing);
link_bw = dc_link_bandwidth_kbps(link, &cur_link_settings);
- is_link_bw_low = (req_bw > link_bw);
- is_link_bw_min = ((cur_link_settings.link_rate <= LINK_RATE_LOW) &&
- (cur_link_settings.lane_count <= LANE_COUNT_ONE));
-
- if (is_link_bw_low)
- DC_LOG_WARNING("%s: Link bandwidth too low after fallback req_bw(%d) > link_bw(%d)\n",
- __func__, req_bw, link_bw);
+ if (req_bw > link_bw)
+ break;
}
msleep(delay_between_attempts);
int *fail_count)
{
struct dc_link_settings cur_link_settings = {0};
- struct dc_link_settings initial_link_settings = *known_limit_link_setting;
+ struct dc_link_settings max_link_settings = *known_limit_link_setting;
bool success = false;
bool skip_video_pattern;
enum clock_source_id dp_cs_id = get_clock_source_id(link);
struct link_resource link_res;
memset(&irq_data, 0, sizeof(irq_data));
- cur_link_settings = initial_link_settings;
+ cur_link_settings = max_link_settings;
/* Grant extended timeout request */
if ((link->lttpr_mode == LTTPR_MODE_NON_TRANSPARENT) && (link->dpcd_caps.lttpr_caps.max_ext_timeout > 0)) {
dp_trace_lt_result_update(link, status, true);
dp_disable_link_phy(link, &link_res, link->connector_signal);
} while (!success && decide_fallback_link_setting(link,
- initial_link_settings, &cur_link_settings, status));
+ &max_link_settings, &cur_link_settings, status));
link->verified_link_cap = success ?
cur_link_settings : fail_safe_link_settings;
*/
static bool decide_fallback_link_setting(
struct dc_link *link,
- struct dc_link_settings initial_link_settings,
- struct dc_link_settings *current_link_setting,
+ struct dc_link_settings *max,
+ struct dc_link_settings *cur,
enum link_training_result training_result)
{
- if (!current_link_setting)
+ if (!cur)
+ return false;
+ if (!max)
return false;
- if (dp_get_link_encoding_format(&initial_link_settings) == DP_128b_132b_ENCODING ||
+
+ if (dp_get_link_encoding_format(max) == DP_128b_132b_ENCODING ||
link->dc->debug.force_dp2_lt_fallback_method)
- return decide_fallback_link_setting_max_bw_policy(link, &initial_link_settings,
- current_link_setting, training_result);
+ return decide_fallback_link_setting_max_bw_policy(link, max, cur,
+ training_result);
switch (training_result) {
case LINK_TRAINING_CR_FAIL_LANE0:
case LINK_TRAINING_CR_FAIL_LANE23:
case LINK_TRAINING_LQA_FAIL:
{
- if (!reached_minimum_link_rate
- (current_link_setting->link_rate)) {
- current_link_setting->link_rate =
- reduce_link_rate(
- current_link_setting->link_rate);
- } else if (!reached_minimum_lane_count
- (current_link_setting->lane_count)) {
- current_link_setting->link_rate =
- initial_link_settings.link_rate;
+ if (!reached_minimum_link_rate(cur->link_rate)) {
+ cur->link_rate = reduce_link_rate(cur->link_rate);
+ } else if (!reached_minimum_lane_count(cur->lane_count)) {
+ cur->link_rate = max->link_rate;
if (training_result == LINK_TRAINING_CR_FAIL_LANE0)
return false;
else if (training_result == LINK_TRAINING_CR_FAIL_LANE1)
- current_link_setting->lane_count =
- LANE_COUNT_ONE;
- else if (training_result ==
- LINK_TRAINING_CR_FAIL_LANE23)
- current_link_setting->lane_count =
- LANE_COUNT_TWO;
+ cur->lane_count = LANE_COUNT_ONE;
+ else if (training_result == LINK_TRAINING_CR_FAIL_LANE23)
+ cur->lane_count = LANE_COUNT_TWO;
else
- current_link_setting->lane_count =
- reduce_lane_count(
- current_link_setting->lane_count);
+ cur->lane_count = reduce_lane_count(cur->lane_count);
} else {
return false;
}
}
case LINK_TRAINING_EQ_FAIL_EQ:
{
- if (!reached_minimum_lane_count
- (current_link_setting->lane_count)) {
- current_link_setting->lane_count =
- reduce_lane_count(
- current_link_setting->lane_count);
- } else if (!reached_minimum_link_rate
- (current_link_setting->link_rate)) {
- current_link_setting->link_rate =
- reduce_link_rate(
- current_link_setting->link_rate);
- current_link_setting->lane_count = initial_link_settings.lane_count;
+ if (!reached_minimum_lane_count(cur->lane_count)) {
+ cur->lane_count = reduce_lane_count(cur->lane_count);
+ } else if (!reached_minimum_link_rate(cur->link_rate)) {
+ cur->link_rate = reduce_link_rate(cur->link_rate);
+ /* Reduce max link rate to avoid potential infinite loop.
+ * Needed so that any subsequent CR_FAIL fallback can't
+ * re-set the link rate higher than the link rate from
+ * the latest EQ_FAIL fallback.
+ */
+ max->link_rate = cur->link_rate;
+ cur->lane_count = max->lane_count;
} else {
return false;
}
}
case LINK_TRAINING_EQ_FAIL_CR:
{
- if (!reached_minimum_link_rate
- (current_link_setting->link_rate)) {
- current_link_setting->link_rate =
- reduce_link_rate(
- current_link_setting->link_rate);
- current_link_setting->lane_count = initial_link_settings.lane_count;
+ if (!reached_minimum_link_rate(cur->link_rate)) {
+ cur->link_rate = reduce_link_rate(cur->link_rate);
+ /* Reduce max link rate to avoid potential infinite loop.
+ * Needed so that any subsequent CR_FAIL fallback can't
+ * re-set the link rate higher than the link rate from
+ * the latest EQ_FAIL fallback.
+ */
+ max->link_rate = cur->link_rate;
+ cur->lane_count = max->lane_count;
} else {
return false;
}
* on certain displays, such as the Sharp 4k. 36bpp is needed
* to support SURFACE_PIXEL_FORMAT_GRPH_ARGB16161616 and
* SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616 with actual > 10 bpc
- * precision on at least DCN display engines. However, at least
- * Carrizo with DCE_VERSION_11_0 does not like 36 bpp lb depth,
- * so use only 30 bpp on DCE_VERSION_11_0. Testing with DCE 11.2 and 8.3
- * did not show such problems, so this seems to be the exception.
+ * precision on DCN display engines, but apparently not for DCE, as
+ * far as testing on DCE-11.2 and DCE-8 showed. Various DCE parts have
+ * problems: Carrizo with DCE_VERSION_11_0 does not like 36 bpp lb depth,
+ * neither do DCE-8 at 4k resolution, or DCE-11.2 (broken identify pixel
+ * passthrough). Therefore only use 36 bpp on DCN where it is actually needed.
*/
- if (plane_state->ctx->dce_version > DCE_VERSION_11_0)
+ if (plane_state->ctx->dce_version > DCE_VERSION_MAX)
pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_36BPP;
else
pipe_ctx->plane_res.scl_data.lb_params.depth = LB_PIXEL_DEPTH_30BPP;
struct set_config_cmd_payload;
struct dmub_notification;
-#define DC_VER "3.2.186"
+#define DC_VER "3.2.187"
#define MAX_SURFACES 3
#define MAX_PLANES 6
bool p_state_change_support;
enum dcn_zstate_support_state zstate_support;
bool dtbclk_en;
+ int ref_dtbclk_khz;
enum dcn_pwr_state pwr_state;
/*
* Elements below are not compared for the purposes of
bool apply_vendor_specific_lttpr_wa;
bool extended_blank_optimization;
union aux_wake_wa_options aux_wake_wa;
+ /* uses value at boot and disables switch */
+ bool disable_dtb_ref_clk_switch;
uint8_t psr_power_use_phy_fsm;
enum dml_hostvm_override_opts dml_hostvm_override;
};
break;
}
}
-
- /*
- * TO-DO: So far the code logic below only addresses single eDP case.
- * For dual eDP case, there are a few things that need to be
- * implemented first:
- *
- * 1. Change the fastboot logic above, so eDP link[0 or 1]'s
- * stream[0 or 1] will all be checked.
- *
- * 2. Change keep_edp_vdd_on to an array, and maintain keep_edp_vdd_on
- * for each eDP.
- *
- * Once above 2 things are completed, we can then change the logic below
- * correspondingly, so dual eDP case will be fully covered.
- */
-
- // We are trying to enable eDP, don't power down VDD if eDP stream is existing
- if ((edp_stream_num == 1 && edp_streams[0] != NULL) || can_apply_edp_fast_boot) {
+ // We are trying to enable eDP, don't power down VDD
+ if (can_apply_edp_fast_boot)
keep_edp_vdd_on = true;
- DC_LOG_EVENT_LINK_TRAINING("Keep eDP Vdd on\n");
- } else {
- DC_LOG_EVENT_LINK_TRAINING("No eDP stream enabled, turn eDP Vdd off\n");
- }
}
// Check seamless boot support
break;
}
+ /* Set default color space based on format if none is given. */
+ color_space = input_color_space ? input_color_space : color_space;
+
if (is_2bit == 1 && alpha_2bit_lut != NULL) {
REG_UPDATE(ALPHA_2BIT_LUT, ALPHA_2BIT_LUT0, alpha_2bit_lut->lut0);
REG_UPDATE(ALPHA_2BIT_LUT, ALPHA_2BIT_LUT1, alpha_2bit_lut->lut1);
break;
}
+ /* Set default color space based on format if none is given. */
+ color_space = input_color_space ? input_color_space : color_space;
+
if (is_2bit == 1 && alpha_2bit_lut != NULL) {
REG_UPDATE(ALPHA_2BIT_LUT, ALPHA_2BIT_LUT0, alpha_2bit_lut->lut0);
REG_UPDATE(ALPHA_2BIT_LUT, ALPHA_2BIT_LUT1, alpha_2bit_lut->lut1);
break;
}
+ /* Set default color space based on format if none is given. */
+ color_space = input_color_space ? input_color_space : color_space;
+
if (is_2bit == 1 && alpha_2bit_lut != NULL) {
REG_UPDATE(ALPHA_2BIT_LUT, ALPHA_2BIT_LUT0, alpha_2bit_lut->lut0);
REG_UPDATE(ALPHA_2BIT_LUT, ALPHA_2BIT_LUT1, alpha_2bit_lut->lut1);
/* Controls the generation of pixel valid for OTG in (OTG -> HPO case) */
static void dccg31_set_dtbclk_dto(
struct dccg *dccg,
- int dtbclk_inst,
- int req_dtbclk_khz,
- int num_odm_segments,
- const struct dc_crtc_timing *timing)
+ struct dtbclk_dto_params *params)
{
struct dcn_dccg *dccg_dcn = TO_DCN_DCCG(dccg);
+ int req_dtbclk_khz = params->pixclk_khz;
uint32_t dtbdto_div;
/* Mode DTBDTO Rate DTBCLK_DTO<x>_DIV Register
* DSC native 4:2:2 pixel rate/2 4
* Other modes pixel rate 8
*/
- if (num_odm_segments == 4) {
+ if (params->num_odm_segments == 4) {
dtbdto_div = 2;
- req_dtbclk_khz = req_dtbclk_khz / 4;
- } else if ((num_odm_segments == 2) ||
- (timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) ||
- (timing->flags.DSC && timing->pixel_encoding == PIXEL_ENCODING_YCBCR422
- && !timing->dsc_cfg.ycbcr422_simple)) {
+ req_dtbclk_khz = params->pixclk_khz / 4;
+ } else if ((params->num_odm_segments == 2) ||
+ (params->timing->pixel_encoding == PIXEL_ENCODING_YCBCR420) ||
+ (params->timing->flags.DSC && params->timing->pixel_encoding == PIXEL_ENCODING_YCBCR422
+ && !params->timing->dsc_cfg.ycbcr422_simple)) {
dtbdto_div = 4;
- req_dtbclk_khz = req_dtbclk_khz / 2;
+ req_dtbclk_khz = params->pixclk_khz / 2;
} else
dtbdto_div = 8;
- if (dccg->ref_dtbclk_khz && req_dtbclk_khz) {
+ if (params->ref_dtbclk_khz && req_dtbclk_khz) {
uint32_t modulo, phase;
// phase / modulo = dtbclk / dtbclk ref
- modulo = dccg->ref_dtbclk_khz * 1000;
- phase = div_u64((((unsigned long long)modulo * req_dtbclk_khz) + dccg->ref_dtbclk_khz - 1),
- dccg->ref_dtbclk_khz);
+ modulo = params->ref_dtbclk_khz * 1000;
+ phase = div_u64((((unsigned long long)modulo * req_dtbclk_khz) + params->ref_dtbclk_khz - 1),
+ params->ref_dtbclk_khz);
- REG_UPDATE(OTG_PIXEL_RATE_CNTL[dtbclk_inst],
- DTBCLK_DTO_DIV[dtbclk_inst], dtbdto_div);
+ REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DTBCLK_DTO_DIV[params->otg_inst], dtbdto_div);
- REG_WRITE(DTBCLK_DTO_MODULO[dtbclk_inst], modulo);
- REG_WRITE(DTBCLK_DTO_PHASE[dtbclk_inst], phase);
+ REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], modulo);
+ REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], phase);
- REG_UPDATE(OTG_PIXEL_RATE_CNTL[dtbclk_inst],
- DTBCLK_DTO_ENABLE[dtbclk_inst], 1);
+ REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DTBCLK_DTO_ENABLE[params->otg_inst], 1);
- REG_WAIT(OTG_PIXEL_RATE_CNTL[dtbclk_inst],
- DTBCLKDTO_ENABLE_STATUS[dtbclk_inst], 1,
+ REG_WAIT(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DTBCLKDTO_ENABLE_STATUS[params->otg_inst], 1,
1, 100);
/* The recommended programming sequence to enable DTBCLK DTO to generate
* valid pixel HPO DPSTREAM ENCODER, specifies that DTO source select should
* be set only after DTO is enabled
*/
- REG_UPDATE(OTG_PIXEL_RATE_CNTL[dtbclk_inst],
- PIPE_DTO_SRC_SEL[dtbclk_inst], 1);
-
- dccg->dtbclk_khz[dtbclk_inst] = req_dtbclk_khz;
+ REG_UPDATE(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ PIPE_DTO_SRC_SEL[params->otg_inst], 1);
} else {
- REG_UPDATE_3(OTG_PIXEL_RATE_CNTL[dtbclk_inst],
- DTBCLK_DTO_ENABLE[dtbclk_inst], 0,
- PIPE_DTO_SRC_SEL[dtbclk_inst], 0,
- DTBCLK_DTO_DIV[dtbclk_inst], dtbdto_div);
+ REG_UPDATE_3(OTG_PIXEL_RATE_CNTL[params->otg_inst],
+ DTBCLK_DTO_ENABLE[params->otg_inst], 0,
+ PIPE_DTO_SRC_SEL[params->otg_inst], 0,
+ DTBCLK_DTO_DIV[params->otg_inst], dtbdto_div);
- REG_WRITE(DTBCLK_DTO_MODULO[dtbclk_inst], 0);
- REG_WRITE(DTBCLK_DTO_PHASE[dtbclk_inst], 0);
-
- dccg->dtbclk_khz[dtbclk_inst] = 0;
+ REG_WRITE(DTBCLK_DTO_MODULO[params->otg_inst], 0);
+ REG_WRITE(DTBCLK_DTO_PHASE[params->otg_inst], 0);
}
}
REG_UPDATE(DCCG_AUDIO_DTO_SOURCE,
DCCG_AUDIO_DTO_SEL, 4); // 04 - DCCG_AUDIO_DTO_SEL_AUDIO_DTO_DTBCLK
-
- dccg->audio_dtbclk_khz = req_audio_dtbclk_khz;
} else {
REG_WRITE(DCCG_AUDIO_DTBCLK_DTO_PHASE, 0);
REG_WRITE(DCCG_AUDIO_DTBCLK_DTO_MODULO, 0);
REG_UPDATE(DCCG_AUDIO_DTO_SOURCE,
DCCG_AUDIO_DTO_SEL, 3); // 03 - DCCG_AUDIO_DTO_SEL_NO_AUDIO_DTO
-
- dccg->audio_dtbclk_khz = 0;
}
}
AUX_RX_PHASE_DETECT_LEN, [21,20] = 0x3 default is 3
AUX_RX_DETECTION_THRESHOLD [30:28] = 1
*/
- AUX_REG_WRITE(AUX_DPHY_RX_CONTROL0, 0x103d1110);
-
- AUX_REG_WRITE(AUX_DPHY_TX_CONTROL, 0x21c7a);
+ // dmub will read AUX_DPHY_RX_CONTROL0/AUX_DPHY_TX_CONTROL from vbios table in dp_aux_init
//AUX_DPHY_TX_REF_CONTROL'AUX_TX_REF_DIV HW default is 0x32;
// Set AUX_TX_REF_DIV Divider to generate 2 MHz reference from refclk
for (i = 0; i < dc->res_pool->pipe_count; i++) {
if (!context->res_ctx.pipe_ctx[i].stream)
continue;
-#if defined (CONFIG_DRM_AMD_DC_DP2_0)
if (is_dp_128b_132b_signal(&context->res_ctx.pipe_ctx[i]))
return true;
-#endif
}
return false;
}
bool safe_to_lower);
int (*get_dp_ref_clk_frequency)(struct clk_mgr *clk_mgr);
+ int (*get_dtb_ref_clk_frequency)(struct clk_mgr *clk_mgr);
void (*set_low_power_state)(struct clk_mgr *clk_mgr);
const struct dccg_funcs *funcs;
int pipe_dppclk_khz[MAX_PIPES];
int ref_dppclk;
- int dtbclk_khz[MAX_PIPES];
- int audio_dtbclk_khz;
+ //int dtbclk_khz[MAX_PIPES];/* TODO needs to be removed */
+ //int audio_dtbclk_khz;/* TODO needs to be removed */
+ int ref_dtbclk_khz;/* TODO needs to be removed */
+};
+
+struct dtbclk_dto_params {
+ const struct dc_crtc_timing *timing;
+ int otg_inst;
+ int pixclk_khz;
+ int req_audio_dtbclk_khz;
+ int num_odm_segments;
int ref_dtbclk_khz;
};
void (*set_dtbclk_dto)(
struct dccg *dccg,
- int dtbclk_inst,
- int req_dtbclk_khz,
- int num_odm_segments,
- const struct dc_crtc_timing *timing);
+ struct dtbclk_dto_params *dto_params);
void (*set_audio_dtbclk_dto)(
struct dccg *dccg,
#include "core_types.h"
#include "dccg.h"
#include "dc_link_dp.h"
+#include "clk_mgr.h"
static enum phyd32clk_clock_source get_phyd32clk_src(struct dc_link *link)
{
struct hpo_dp_link_encoder *link_enc = pipe_ctx->link_res.hpo_dp_link_enc;
struct dccg *dccg = dc->res_pool->dccg;
struct timing_generator *tg = pipe_ctx->stream_res.tg;
- int odm_segment_count = get_odm_segment_count(pipe_ctx);
+ struct dtbclk_dto_params dto_params = {0};
enum phyd32clk_clock_source phyd32clk = get_phyd32clk_src(pipe_ctx->stream->link);
+ dto_params.otg_inst = tg->inst;
+ dto_params.pixclk_khz = pipe_ctx->stream->phy_pix_clk;
+ dto_params.num_odm_segments = get_odm_segment_count(pipe_ctx);
+ dto_params.timing = &pipe_ctx->stream->timing;
+ dto_params.ref_dtbclk_khz = dc->clk_mgr->funcs->get_dtb_ref_clk_frequency(dc->clk_mgr);
+
dccg->funcs->set_dpstreamclk(dccg, DTBCLK0, tg->inst);
dccg->funcs->enable_symclk32_se(dccg, stream_enc->inst, phyd32clk);
- dccg->funcs->set_dtbclk_dto(dccg, tg->inst, pipe_ctx->stream->phy_pix_clk,
- odm_segment_count,
- &pipe_ctx->stream->timing);
+ dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
stream_enc->funcs->enable_stream(stream_enc);
stream_enc->funcs->map_stream_to_link(stream_enc, stream_enc->inst, link_enc->inst);
}
struct hpo_dp_stream_encoder *stream_enc = pipe_ctx->stream_res.hpo_dp_stream_enc;
struct dccg *dccg = dc->res_pool->dccg;
struct timing_generator *tg = pipe_ctx->stream_res.tg;
+ struct dtbclk_dto_params dto_params = {0};
+
+ dto_params.otg_inst = tg->inst;
+ dto_params.timing = &pipe_ctx->stream->timing;
stream_enc->funcs->disable(stream_enc);
- dccg->funcs->set_dtbclk_dto(dccg, tg->inst, 0, 0, &pipe_ctx->stream->timing);
+ dccg->funcs->set_dtbclk_dto(dccg, &dto_params);
dccg->funcs->disable_symclk32_se(dccg, stream_enc->inst);
dccg->funcs->set_dpstreamclk(dccg, REFCLK, tg->inst);
}
{
union dmub_gpint_data_register cmd;
const uint32_t timeout = 100;
- uint32_t in_reset, scratch, i;
+ uint32_t in_reset, scratch, i, pwait_mode;
REG_GET(DMCUB_CNTL2, DMCUB_SOFT_RESET, &in_reset);
udelay(1);
}
+ for (i = 0; i < timeout; ++i) {
+ REG_GET(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS, &pwait_mode);
+ if (pwait_mode & (1 << 0))
+ break;
+
+ udelay(1);
+ }
/* Force reset in case we timed out, DMCUB is likely hung. */
}
REG_WRITE(DMCUB_INBOX1_WPTR, 0);
REG_WRITE(DMCUB_OUTBOX1_RPTR, 0);
REG_WRITE(DMCUB_OUTBOX1_WPTR, 0);
+ REG_WRITE(DMCUB_OUTBOX0_RPTR, 0);
+ REG_WRITE(DMCUB_OUTBOX0_WPTR, 0);
REG_WRITE(DMCUB_SCRATCH0, 0);
/* Clear the GPINT command manually so we don't send anything during boot. */
DMUB_SF(DCN_VM_FB_OFFSET, FB_OFFSET) \
DMUB_SF(DMCUB_INBOX0_WPTR, DMCUB_INBOX0_WPTR) \
DMUB_SF(DMCUB_INTERRUPT_ENABLE, DMCUB_GPINT_IH_INT_EN) \
- DMUB_SF(DMCUB_INTERRUPT_ACK, DMCUB_GPINT_IH_INT_ACK)
+ DMUB_SF(DMCUB_INTERRUPT_ACK, DMCUB_GPINT_IH_INT_ACK) \
+ DMUB_SF(DMCUB_CNTL, DMCUB_PWAIT_MODE_STATUS)
struct dmub_srv_dcn31_reg_offset {
#define DMUB_SR(reg) uint32_t reg;
static const uint8_t DP_SINK_DEVICE_STR_ID_1[] = {7, 1, 8, 7, 3, 0};
static const uint8_t DP_SINK_DEVICE_STR_ID_2[] = {7, 1, 8, 7, 5, 0};
+static const u8 DP_SINK_BRANCH_DEV_NAME_7580[] = "7580\x80u";
+
/*MST Dock*/
static const uint8_t SYNAPTICS_DEVICE_ID[] = "SYNA";
#ifndef SMU_11_0_7_PPTABLE_H
#define SMU_11_0_7_PPTABLE_H
+#pragma pack(push, 1)
#define SMU_11_0_7_TABLE_FORMAT_REVISION 15
uint32_t max[SMU_11_0_7_MAX_ODSETTING]; //default maximum settings
uint32_t min[SMU_11_0_7_MAX_ODSETTING]; //default minimum settings
int16_t pm_setting[SMU_11_0_7_MAX_PMSETTING]; //Optimized power mode feature settings
-} __attribute__((packed));
+};
enum SMU_11_0_7_PPCLOCK_ID {
SMU_11_0_7_PPCLOCK_GFXCLK = 0,
uint32_t count; //power_saving_clock_count = SMU_11_0_7_PPCLOCK_COUNT
uint32_t max[SMU_11_0_7_MAX_PPCLOCK]; //PowerSavingClock Mode Clock Maximum array In MHz
uint32_t min[SMU_11_0_7_MAX_PPCLOCK]; //PowerSavingClock Mode Clock Minimum array In MHz
-} __attribute__((packed));
+};
struct smu_11_0_7_powerplay_table
{
struct smu_11_0_7_overdrive_table overdrive_table;
PPTable_t smc_pptable; //PPTable_t in smu11_driver_if.h
-} __attribute__((packed));
+};
+
+#pragma pack(pop)
#endif
#ifndef SMU_11_0_PPTABLE_H
#define SMU_11_0_PPTABLE_H
+#pragma pack(push, 1)
#define SMU_11_0_TABLE_FORMAT_REVISION 12
uint8_t cap[SMU_11_0_MAX_ODFEATURE]; //OD feature support flags
uint32_t max[SMU_11_0_MAX_ODSETTING]; //default maximum settings
uint32_t min[SMU_11_0_MAX_ODSETTING]; //default minimum settings
-} __attribute__((packed));
+};
enum SMU_11_0_PPCLOCK_ID {
SMU_11_0_PPCLOCK_GFXCLK = 0,
uint32_t count; //power_saving_clock_count = SMU_11_0_PPCLOCK_COUNT
uint32_t max[SMU_11_0_MAX_PPCLOCK]; //PowerSavingClock Mode Clock Maximum array In MHz
uint32_t min[SMU_11_0_MAX_PPCLOCK]; //PowerSavingClock Mode Clock Minimum array In MHz
-} __attribute__((packed));
+};
struct smu_11_0_powerplay_table
{
#ifndef SMU_11_0_PARTIAL_PPTABLE
PPTable_t smc_pptable; //PPTable_t in smu11_driver_if.h
#endif
-} __attribute__((packed));
+};
+
+#pragma pack(pop)
#endif
#ifndef SMU_13_0_7_PPTABLE_H
#define SMU_13_0_7_PPTABLE_H
+#pragma pack(push, 1)
+
#define SMU_13_0_7_TABLE_FORMAT_REVISION 15
//// POWERPLAYTABLE::ulPlatformCaps
struct smu_13_0_7_overdrive_table overdrive_table;
uint8_t padding1;
PPTable_t smc_pptable; //PPTable_t in driver_if.h
-} __attribute__((packed));
+};
+#pragma pack(pop)
#endif
#ifndef SMU_13_0_PPTABLE_H
#define SMU_13_0_PPTABLE_H
+#pragma pack(push, 1)
+
#define SMU_13_0_TABLE_FORMAT_REVISION 1
//// POWERPLAYTABLE::ulPlatformCaps
uint8_t cap[SMU_13_0_MAX_ODFEATURE]; //OD feature support flags
uint32_t max[SMU_13_0_MAX_ODSETTING]; //default maximum settings
uint32_t min[SMU_13_0_MAX_ODSETTING]; //default minimum settings
-} __attribute__((packed));
+};
enum SMU_13_0_PPCLOCK_ID {
SMU_13_0_PPCLOCK_GFXCLK = 0,
uint32_t count; //power_saving_clock_count = SMU_11_0_PPCLOCK_COUNT
uint32_t max[SMU_13_0_MAX_PPCLOCK]; //PowerSavingClock Mode Clock Maximum array In MHz
uint32_t min[SMU_13_0_MAX_PPCLOCK]; //PowerSavingClock Mode Clock Minimum array In MHz
-} __attribute__((packed));
+};
struct smu_13_0_powerplay_table {
struct atom_common_table_header header;
#ifndef SMU_13_0_PARTIAL_PPTABLE
PPTable_t smc_pptable; //PPTable_t in driver_if.h
#endif
-} __attribute__((packed));
+};
+
+#pragma pack(pop)
#endif
uint32_t crystal_clock_freq = 2500;
uint32_t tach_period;
+ if (speed == 0)
+ return -EINVAL;
/*
* To prevent from possible overheat, some ASICs may have requirement
* for minimum fan speed:
}
if (bDPExecute)
- ast->tx_chip_type = AST_TX_ASTDP;
+ ast->tx_chip_types |= BIT(AST_TX_ASTDP);
ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xE5,
(u8) ~ASTDP_HOST_EDID_READ_DONE_MASK,
ASTDP_HOST_EDID_READ_DONE);
- } else
- ast->tx_chip_type = AST_TX_NONE;
+ }
}
ast_init_dvo(dev);
break;
default:
- if (ast->tx_chip_type == AST_TX_SIL164)
+ if (ast->tx_chip_types & BIT(AST_TX_SIL164))
ast_init_dvo(dev);
else
ast_init_analog(dev);
AST_TX_ASTDP,
};
+#define AST_TX_NONE_BIT BIT(AST_TX_NONE)
+#define AST_TX_SIL164_BIT BIT(AST_TX_SIL164)
+#define AST_TX_DP501_BIT BIT(AST_TX_DP501)
+#define AST_TX_ASTDP_BIT BIT(AST_TX_ASTDP)
+
#define AST_DRAM_512Mx16 0
#define AST_DRAM_1Gx16 1
#define AST_DRAM_512Mx32 2
struct drm_plane primary_plane;
struct ast_cursor_plane cursor_plane;
struct drm_crtc crtc;
- union {
+ struct {
struct {
struct drm_encoder encoder;
struct ast_vga_connector vga_connector;
ast_use_defaults
} config_mode;
- enum ast_tx_chip tx_chip_type;
+ unsigned long tx_chip_types; /* bitfield of enum ast_chip_type */
u8 *dp501_fw_addr;
const struct firmware *dp501_fw; /* dp501 fw */
};
}
/* Check 3rd Tx option (digital output afaik) */
- ast->tx_chip_type = AST_TX_NONE;
+ ast->tx_chip_types |= AST_TX_NONE_BIT;
/*
* VGACRA3 Enhanced Color Mode Register, check if DVO is already
if (!*need_post) {
jreg = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xa3, 0xff);
if (jreg & 0x80)
- ast->tx_chip_type = AST_TX_SIL164;
+ ast->tx_chip_types = AST_TX_SIL164_BIT;
}
if ((ast->chip == AST2300) || (ast->chip == AST2400) || (ast->chip == AST2500)) {
jreg = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xd1, 0xff);
switch (jreg) {
case 0x04:
- ast->tx_chip_type = AST_TX_SIL164;
+ ast->tx_chip_types = AST_TX_SIL164_BIT;
break;
case 0x08:
ast->dp501_fw_addr = drmm_kzalloc(dev, 32*1024, GFP_KERNEL);
}
fallthrough;
case 0x0c:
- ast->tx_chip_type = AST_TX_DP501;
+ ast->tx_chip_types = AST_TX_DP501_BIT;
}
} else if (ast->chip == AST2600)
ast_dp_launch(&ast->base, 0);
/* Print stuff for diagnostic purposes */
- switch(ast->tx_chip_type) {
- case AST_TX_SIL164:
+ if (ast->tx_chip_types & AST_TX_NONE_BIT)
+ drm_info(dev, "Using analog VGA\n");
+ if (ast->tx_chip_types & AST_TX_SIL164_BIT)
drm_info(dev, "Using Sil164 TMDS transmitter\n");
- break;
- case AST_TX_DP501:
+ if (ast->tx_chip_types & AST_TX_DP501_BIT)
drm_info(dev, "Using DP501 DisplayPort transmitter\n");
- break;
- default:
- drm_info(dev, "Analog VGA only\n");
- }
+
return 0;
}
case DRM_MODE_DPMS_ON:
ast_set_index_reg_mask(ast, AST_IO_SEQ_PORT, 0x01, 0xdf, 0);
ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb6, 0xfc, 0);
- if (ast->tx_chip_type == AST_TX_DP501)
+ if (ast->tx_chip_types & AST_TX_DP501_BIT)
ast_set_dp501_video_output(crtc->dev, 1);
- if (ast->tx_chip_type == AST_TX_ASTDP) {
+ if (ast->tx_chip_types & AST_TX_ASTDP_BIT) {
ast_dp_power_on_off(crtc->dev, AST_DP_POWER_ON);
ast_wait_for_vretrace(ast);
ast_dp_set_on_off(crtc->dev, 1);
case DRM_MODE_DPMS_SUSPEND:
case DRM_MODE_DPMS_OFF:
ch = mode;
- if (ast->tx_chip_type == AST_TX_DP501)
+ if (ast->tx_chip_types & AST_TX_DP501_BIT)
ast_set_dp501_video_output(crtc->dev, 0);
- break;
- if (ast->tx_chip_type == AST_TX_ASTDP) {
+ if (ast->tx_chip_types & AST_TX_ASTDP_BIT) {
ast_dp_set_on_off(crtc->dev, 0);
ast_dp_power_on_off(crtc->dev, AST_DP_POWER_OFF);
}
ast_set_index_reg_mask(ast, AST_IO_SEQ_PORT, 0x01, 0xdf, 0x20);
ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb6, 0xfc, ch);
+ break;
}
}
ast_crtc_load_lut(ast, crtc);
//Set Aspeed Display-Port
- if (ast->tx_chip_type == AST_TX_ASTDP)
+ if (ast->tx_chip_types & AST_TX_ASTDP_BIT)
ast_dp_set_mode(crtc, vbios_mode_info);
mutex_unlock(&ast->ioregs_lock);
ast_crtc_init(dev);
- switch (ast->tx_chip_type) {
- case AST_TX_NONE:
+ if (ast->tx_chip_types & AST_TX_NONE_BIT) {
ret = ast_vga_output_init(ast);
- break;
- case AST_TX_SIL164:
+ if (ret)
+ return ret;
+ }
+ if (ast->tx_chip_types & AST_TX_SIL164_BIT) {
ret = ast_sil164_output_init(ast);
- break;
- case AST_TX_DP501:
+ if (ret)
+ return ret;
+ }
+ if (ast->tx_chip_types & AST_TX_DP501_BIT) {
ret = ast_dp501_output_init(ast);
- break;
- case AST_TX_ASTDP:
+ if (ret)
+ return ret;
+ }
+ if (ast->tx_chip_types & AST_TX_ASTDP_BIT) {
ret = ast_astdp_output_init(ast);
- break;
+ if (ret)
+ return ret;
}
- if (ret)
- return ret;
drm_mode_config_reset(dev);
ast_init_3rdtx(dev);
} else {
- if (ast->tx_chip_type != AST_TX_NONE)
+ if (ast->tx_chip_types & AST_TX_SIL164_BIT)
ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xa3, 0xcf, 0x80); /* Enable DVO */
}
}
return 0;
}
+static
+struct drm_crtc *analogix_dp_get_old_crtc(struct analogix_dp_device *dp,
+ struct drm_atomic_state *state)
+{
+ struct drm_encoder *encoder = dp->encoder;
+ struct drm_connector *connector;
+ struct drm_connector_state *conn_state;
+
+ connector = drm_atomic_get_old_connector_for_encoder(state, encoder);
+ if (!connector)
+ return NULL;
+
+ conn_state = drm_atomic_get_old_connector_state(state, connector);
+ if (!conn_state)
+ return NULL;
+
+ return conn_state->crtc;
+}
+
static
struct drm_crtc *analogix_dp_get_new_crtc(struct analogix_dp_device *dp,
struct drm_atomic_state *state)
{
struct drm_atomic_state *old_state = old_bridge_state->base.state;
struct analogix_dp_device *dp = bridge->driver_private;
- struct drm_crtc *crtc;
+ struct drm_crtc *old_crtc, *new_crtc;
+ struct drm_crtc_state *old_crtc_state = NULL;
struct drm_crtc_state *new_crtc_state = NULL;
+ int ret;
- crtc = analogix_dp_get_new_crtc(dp, old_state);
- if (!crtc)
+ new_crtc = analogix_dp_get_new_crtc(dp, old_state);
+ if (!new_crtc)
goto out;
- new_crtc_state = drm_atomic_get_new_crtc_state(old_state, crtc);
+ new_crtc_state = drm_atomic_get_new_crtc_state(old_state, new_crtc);
if (!new_crtc_state)
goto out;
return;
out:
+ old_crtc = analogix_dp_get_old_crtc(dp, old_state);
+ if (old_crtc) {
+ old_crtc_state = drm_atomic_get_old_crtc_state(old_state,
+ old_crtc);
+
+ /* When moving from PSR to fully disabled, exit PSR first. */
+ if (old_crtc_state && old_crtc_state->self_refresh_active) {
+ ret = analogix_dp_disable_psr(dp);
+ if (ret)
+ DRM_ERROR("Failed to disable psr (%d)\n", ret);
+ }
+ }
+
analogix_dp_bridge_disable(bridge);
}
bridge, flags);
}
-static int fsl_ldb_atomic_check(struct drm_bridge *bridge,
- struct drm_bridge_state *bridge_state,
- struct drm_crtc_state *crtc_state,
- struct drm_connector_state *conn_state)
-{
- /* Invert DE signal polarity. */
- bridge_state->input_bus_cfg.flags &= ~(DRM_BUS_FLAG_DE_LOW |
- DRM_BUS_FLAG_DE_HIGH);
- if (bridge_state->output_bus_cfg.flags & DRM_BUS_FLAG_DE_LOW)
- bridge_state->input_bus_cfg.flags |= DRM_BUS_FLAG_DE_HIGH;
- else if (bridge_state->output_bus_cfg.flags & DRM_BUS_FLAG_DE_HIGH)
- bridge_state->input_bus_cfg.flags |= DRM_BUS_FLAG_DE_LOW;
-
- return 0;
-}
-
static void fsl_ldb_atomic_enable(struct drm_bridge *bridge,
struct drm_bridge_state *old_bridge_state)
{
reg = LDB_CTRL_CH0_ENABLE;
if (fsl_ldb->lvds_dual_link)
- reg |= LDB_CTRL_CH1_ENABLE;
+ reg |= LDB_CTRL_CH1_ENABLE | LDB_CTRL_SPLIT_MODE;
if (lvds_format_24bpp) {
reg |= LDB_CTRL_CH0_DATA_WIDTH;
{
struct fsl_ldb *fsl_ldb = to_fsl_ldb(bridge);
- if (mode->clock > (fsl_ldb->lvds_dual_link ? 80000 : 160000))
+ if (mode->clock > (fsl_ldb->lvds_dual_link ? 160000 : 80000))
return MODE_CLOCK_HIGH;
return MODE_OK;
static const struct drm_bridge_funcs funcs = {
.attach = fsl_ldb_attach,
- .atomic_check = fsl_ldb_atomic_check,
.atomic_enable = fsl_ldb_atomic_enable,
.atomic_disable = fsl_ldb_atomic_disable,
.atomic_duplicate_state = drm_atomic_helper_bridge_duplicate_state,
ctx->host_node = of_graph_get_remote_port_parent(endpoint);
of_node_put(endpoint);
- if (ctx->dsi_lanes < 0 || ctx->dsi_lanes > 4) {
+ if (ctx->dsi_lanes <= 0 || ctx->dsi_lanes > 4) {
ret = -EINVAL;
goto err_put_node;
}
const struct drm_driver *req_driver)
{
resource_size_t base, size;
- int bar, ret = 0;
+ int bar, ret;
+
+ /*
+ * WARNING: Apparently we must kick fbdev drivers before vgacon,
+ * otherwise the vga fbdev driver falls over.
+ */
+#if IS_REACHABLE(CONFIG_FB)
+ ret = remove_conflicting_pci_framebuffers(pdev, req_driver->name);
+ if (ret)
+ return ret;
+#endif
+ ret = vga_remove_vgacon(pdev);
+ if (ret)
+ return ret;
for (bar = 0; bar < PCI_STD_NUM_BARS; ++bar) {
if (!(pci_resource_flags(pdev, bar) & IORESOURCE_MEM))
drm_aperture_detach_drivers(base, size);
}
- /*
- * WARNING: Apparently we must kick fbdev drivers before vgacon,
- * otherwise the vga fbdev driver falls over.
- */
-#if IS_REACHABLE(CONFIG_FB)
- ret = remove_conflicting_pci_framebuffers(pdev, req_driver->name);
-#endif
- if (ret == 0)
- ret = vga_remove_vgacon(pdev);
- return ret;
+ return 0;
}
EXPORT_SYMBOL(drm_aperture_remove_conflicting_pci_framebuffers);
return drm_atomic_crtc_effectively_active(old_state);
/*
- * We need to run through the crtc_funcs->disable() function if the CRTC
- * is currently on, if it's transitioning to self refresh mode, or if
- * it's in self refresh mode and needs to be fully disabled.
+ * We need to disable bridge(s) and CRTC if we're transitioning out of
+ * self-refresh and changing CRTCs at the same time, because the
+ * bridge tracks self-refresh status via CRTC state.
+ */
+ if (old_state->self_refresh_active &&
+ old_state->crtc != new_state->crtc)
+ return true;
+
+ /*
+ * We also need to run through the crtc_funcs->disable() function if
+ * the CRTC is currently on, if it's transitioning to self refresh
+ * mode, or if it's in self refresh mode and needs to be fully
+ * disabled.
*/
return old_state->active ||
(old_state->self_refresh_active && !new_state->active) ||
struct iosys_map *map)
{
struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem);
+ int ret;
+
+ dma_resv_lock(gem->resv, NULL);
+ ret = ttm_bo_vmap(bo, map);
+ dma_resv_unlock(gem->resv);
- return ttm_bo_vmap(bo, map);
+ return ret;
}
EXPORT_SYMBOL(drm_gem_ttm_vmap);
{
struct ttm_buffer_object *bo = drm_gem_ttm_of_gem(gem);
+ dma_resv_lock(gem->resv, NULL);
ttm_bo_vunmap(bo, map);
+ dma_resv_unlock(gem->resv);
}
EXPORT_SYMBOL(drm_gem_ttm_vunmap);
DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "AYA NEO 2021"),
},
.driver_data = (void *)&lcd800x1280_rightside_up,
+ }, { /* AYA NEO NEXT */
+ .matches = {
+ DMI_EXACT_MATCH(DMI_BOARD_VENDOR, "AYANEO"),
+ DMI_MATCH(DMI_BOARD_NAME, "NEXT"),
+ },
+ .driver_data = (void *)&lcd800x1280_rightside_up,
}, { /* Chuwi HiBook (CWI514) */
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "Hampoo"),
DMI_MATCH(DMI_PRODUCT_NAME, "Lenovo YB1-X9"),
},
.driver_data = (void *)&lcd1200x1920_rightside_up,
+ }, { /* Lenovo Yoga Tablet 2 830F / 830L */
+ .matches = {
+ /*
+ * Note this also matches the Lenovo Yoga Tablet 2 1050F/L
+ * since that uses the same mainboard. The resolution match
+ * will limit this to only matching on the 830F/L. Neither has
+ * any external video outputs so those are not a concern.
+ */
+ DMI_MATCH(DMI_SYS_VENDOR, "Intel Corp."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "VALLEYVIEW C0 PLATFORM"),
+ DMI_MATCH(DMI_BOARD_NAME, "BYT-T FFD8"),
+ /* Partial match on beginning of BIOS version */
+ DMI_MATCH(DMI_BIOS_VERSION, "BLADE_21"),
+ },
+ .driver_data = (void *)&lcd1200x1920_rightside_up,
}, { /* OneGX1 Pro */
.matches = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "SYSTEM_MANUFACTURER"),
}, {
DRV_PTR(mixer_driver, CONFIG_DRM_EXYNOS_MIXER),
DRM_COMPONENT_DRIVER
- }, {
- DRV_PTR(mic_driver, CONFIG_DRM_EXYNOS_MIC),
- DRM_COMPONENT_DRIVER
}, {
DRV_PTR(dp_driver, CONFIG_DRM_EXYNOS_DP),
DRM_COMPONENT_DRIVER
}, {
DRV_PTR(dsi_driver, CONFIG_DRM_EXYNOS_DSI),
DRM_COMPONENT_DRIVER
+ }, {
+ DRV_PTR(mic_driver, CONFIG_DRM_EXYNOS_MIC),
+ DRM_COMPONENT_DRIVER
}, {
DRV_PTR(hdmi_driver, CONFIG_DRM_EXYNOS_HDMI),
DRM_COMPONENT_DRIVER
#include <drm/drm_print.h>
#include "exynos_drm_drv.h"
+#include "exynos_drm_crtc.h"
/* Sysreg registers for MIC */
#define DSD_CFG_MUX 0x1004
bool i80_mode;
struct videomode vm;
- struct drm_encoder *encoder;
struct drm_bridge bridge;
- struct drm_bridge *next_bridge;
bool enabled;
};
writel(reg, mic->reg + MIC_OP);
}
-static void mic_disable(struct drm_bridge *bridge) { }
-
static void mic_post_disable(struct drm_bridge *bridge)
{
struct exynos_mic *mic = bridge->driver_private;
mutex_unlock(&mic_mutex);
}
-static void mic_enable(struct drm_bridge *bridge) { }
-
-static int mic_attach(struct drm_bridge *bridge,
- enum drm_bridge_attach_flags flags)
-{
- struct exynos_mic *mic = bridge->driver_private;
-
- return drm_bridge_attach(bridge->encoder, mic->next_bridge,
- &mic->bridge, flags);
-}
-
static const struct drm_bridge_funcs mic_bridge_funcs = {
- .disable = mic_disable,
.post_disable = mic_post_disable,
.mode_set = mic_mode_set,
.pre_enable = mic_pre_enable,
- .enable = mic_enable,
- .attach = mic_attach,
};
static int exynos_mic_bind(struct device *dev, struct device *master,
void *data)
{
struct exynos_mic *mic = dev_get_drvdata(dev);
+ struct drm_device *drm_dev = data;
+ struct exynos_drm_crtc *crtc = exynos_drm_crtc_get_by_type(drm_dev,
+ EXYNOS_DISPLAY_TYPE_LCD);
+ struct drm_encoder *e, *encoder = NULL;
+
+ drm_for_each_encoder(e, drm_dev)
+ if (e->possible_crtcs == drm_crtc_mask(&crtc->base))
+ encoder = e;
+ if (!encoder)
+ return -ENODEV;
mic->bridge.driver_private = mic;
- return 0;
+ return drm_bridge_attach(encoder, &mic->bridge, NULL, 0);
}
static void exynos_mic_unbind(struct device *dev, struct device *master,
{
struct device *dev = &pdev->dev;
struct exynos_mic *mic;
- struct device_node *remote;
struct resource res;
int ret, i;
}
}
- remote = of_graph_get_remote_node(dev->of_node, 1, 0);
- mic->next_bridge = of_drm_find_bridge(remote);
- if (IS_ERR(mic->next_bridge)) {
- DRM_DEV_ERROR(dev, "mic: Failed to find next bridge\n");
- ret = PTR_ERR(mic->next_bridge);
- goto err;
- }
-
- of_node_put(remote);
-
platform_set_drvdata(pdev, mic);
mic->bridge.funcs = &mic_bridge_funcs;
return intel_dp_is_edp(intel_dp) ? 810000 : 1350000;
}
+static bool is_low_voltage_sku(struct drm_i915_private *i915, enum phy phy)
+{
+ u32 voltage;
+
+ voltage = intel_de_read(i915, ICL_PORT_COMP_DW3(phy)) & VOLTAGE_INFO_MASK;
+
+ return voltage == VOLTAGE_INFO_0_85V;
+}
+
static int icl_max_source_rate(struct intel_dp *intel_dp)
{
struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
enum phy phy = intel_port_to_phy(dev_priv, dig_port->base.port);
- if (intel_phy_is_combo(dev_priv, phy) && !intel_dp_is_edp(intel_dp))
+ if (intel_phy_is_combo(dev_priv, phy) &&
+ (is_low_voltage_sku(dev_priv, phy) || !intel_dp_is_edp(intel_dp)))
return 540000;
return 810000;
static int ehl_max_source_rate(struct intel_dp *intel_dp)
{
- if (intel_dp_is_edp(intel_dp))
+ struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+ struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
+ enum phy phy = intel_port_to_phy(dev_priv, dig_port->base.port);
+
+ if (intel_dp_is_edp(intel_dp) || is_low_voltage_sku(dev_priv, phy))
+ return 540000;
+
+ return 810000;
+}
+
+static int dg1_max_source_rate(struct intel_dp *intel_dp)
+{
+ struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+ struct drm_i915_private *i915 = to_i915(dig_port->base.base.dev);
+ enum phy phy = intel_port_to_phy(i915, dig_port->base.port);
+
+ if (intel_phy_is_combo(i915, phy) && is_low_voltage_sku(i915, phy))
return 540000;
return 810000;
max_rate = dg2_max_source_rate(intel_dp);
else if (IS_ALDERLAKE_P(dev_priv) || IS_ALDERLAKE_S(dev_priv) ||
IS_DG1(dev_priv) || IS_ROCKETLAKE(dev_priv))
- max_rate = 810000;
+ max_rate = dg1_max_source_rate(intel_dp);
else if (IS_JSL_EHL(dev_priv))
max_rate = ehl_max_source_rate(intel_dp);
else
ret = drm_connector_init(dev, connector, &intel_dp_mst_connector_funcs,
DRM_MODE_CONNECTOR_DisplayPort);
if (ret) {
+ drm_dp_mst_put_port_malloc(port);
intel_connector_free(intel_connector);
return NULL;
}
}
/*
- * Display WA #22010492432: ehl, tgl, adl-p
+ * Display WA #22010492432: ehl, tgl, adl-s, adl-p
* Program half of the nominal DCO divider fraction value.
*/
static bool
{
return ((IS_PLATFORM(i915, INTEL_ELKHARTLAKE) &&
IS_JSL_EHL_DISPLAY_STEP(i915, STEP_B0, STEP_FOREVER)) ||
- IS_TIGERLAKE(i915) || IS_ALDERLAKE_P(i915)) &&
+ IS_TIGERLAKE(i915) || IS_ALDERLAKE_S(i915) || IS_ALDERLAKE_P(i915)) &&
i915->dpll.ref_clks.nssc == 38400;
}
case I915_CONTEXT_PARAM_PERSISTENCE:
if (args->size)
ret = -EINVAL;
- ret = proto_context_set_persistence(fpriv->dev_priv, pc,
- args->value);
+ else
+ ret = proto_context_set_persistence(fpriv->dev_priv, pc,
+ args->value);
break;
case I915_CONTEXT_PARAM_PROTECTED_CONTENT:
if (obj->cache_dirty)
return false;
- if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
- return true;
-
if (IS_DGFX(i915))
return false;
+ if (!(obj->cache_coherent & I915_BO_CACHE_COHERENT_FOR_WRITE))
+ return true;
+
/* Currently in use by HW (display engine)? Keep flushed. */
return i915_gem_object_is_framebuffer(obj);
}
}
}
- err = dma_resv_reserve_fences(vma->obj->base.resv, 1);
+ /* Reserve enough slots to accommodate composite fences */
+ err = dma_resv_reserve_fences(vma->obj->base.resv, eb->num_batches);
if (err)
return err;
if (page_size)
default_page_size = page_size;
+ /* We should be able to fit a page within an sg entry */
+ GEM_BUG_ON(overflows_type(default_page_size, u32));
GEM_BUG_ON(!is_power_of_2_u64(default_page_size));
GEM_BUG_ON(default_page_size < PAGE_SIZE);
struct ttm_resource *res)
{
struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
+ u32 page_alignment;
if (!i915_ttm_gtt_binds_lmem(res))
return i915_ttm_tt_get_st(bo->ttm);
+ page_alignment = bo->page_alignment << PAGE_SHIFT;
+ if (!page_alignment)
+ page_alignment = obj->mm.region->min_page_size;
+
/*
* If CPU mapping differs, we need to add the ttm_tt pages to
* the resulting st. Might make sense for GGTT.
struct i915_refct_sgt *rsgt;
rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region,
- res);
+ res,
+ page_alignment);
if (IS_ERR(rsgt))
return rsgt;
return i915_refct_sgt_get(obj->ttm.cached_io_rsgt);
}
- return intel_region_ttm_resource_to_rsgt(obj->mm.region, res);
+ return intel_region_ttm_resource_to_rsgt(obj->mm.region, res,
+ page_alignment);
}
static int i915_ttm_truncate(struct drm_i915_gem_object *obj)
#include <linux/jiffies.h>
#include "gt/intel_engine.h"
+#include "gt/intel_rps.h"
#include "i915_gem_ioctls.h"
#include "i915_gem_object.h"
timeout);
}
+static void
+i915_gem_object_boost(struct dma_resv *resv, unsigned int flags)
+{
+ struct dma_resv_iter cursor;
+ struct dma_fence *fence;
+
+ /*
+ * Prescan all fences for potential boosting before we begin waiting.
+ *
+ * When we wait, we wait on outstanding fences serially. If the
+ * dma-resv contains a sequence such as 1:1, 1:2 instead of a reduced
+ * form 1:2, then as we look at each wait in turn we see that each
+ * request is currently executing and not worthy of boosting. But if
+ * we only happen to look at the final fence in the sequence (because
+ * of request coalescing or splitting between read/write arrays by
+ * the iterator), then we would boost. As such our decision to boost
+ * or not is delicately balanced on the order we wait on fences.
+ *
+ * So instead of looking for boosts sequentially, look for all boosts
+ * upfront and then wait on the outstanding fences.
+ */
+
+ dma_resv_iter_begin(&cursor, resv,
+ dma_resv_usage_rw(flags & I915_WAIT_ALL));
+ dma_resv_for_each_fence_unlocked(&cursor, fence)
+ if (dma_fence_is_i915(fence) &&
+ !i915_request_started(to_request(fence)))
+ intel_rps_boost(to_request(fence));
+ dma_resv_iter_end(&cursor);
+}
+
static long
i915_gem_object_wait_reservation(struct dma_resv *resv,
unsigned int flags,
struct dma_fence *fence;
long ret = timeout ?: 1;
+ i915_gem_object_boost(resv, flags);
+
dma_resv_iter_begin(&cursor, resv,
dma_resv_usage_rw(flags & I915_WAIT_ALL));
dma_resv_for_each_fence_unlocked(&cursor, fence) {
u8 child_index;
/** @guc: GuC specific members for parallel submission */
struct {
- /** @wqi_head: head pointer in work queue */
+ /** @wqi_head: cached head pointer in work queue */
u16 wqi_head;
- /** @wqi_tail: tail pointer in work queue */
+ /** @wqi_tail: cached tail pointer in work queue */
u16 wqi_tail;
+ /** @wq_head: pointer to the actual head in work queue */
+ u32 *wq_head;
+ /** @wq_tail: pointer to the actual head in work queue */
+ u32 *wq_tail;
+ /** @wq_status: pointer to the status in work queue */
+ u32 *wq_status;
+
/**
* @parent_page: page in context state (ce->state) used
* by parent for work queue, process descriptor
int intel_engine_stop_cs(struct intel_engine_cs *engine);
void intel_engine_cancel_stop_cs(struct intel_engine_cs *engine);
+void intel_engine_wait_for_pending_mi_fw(struct intel_engine_cs *engine);
+
void intel_engine_set_hwsp_writemask(struct intel_engine_cs *engine, u32 mask);
u64 intel_engine_get_active_head(const struct intel_engine_cs *engine);
intel_uncore_write_fw(uncore, mode, _MASKED_BIT_ENABLE(STOP_RING));
/*
- * Wa_22011802037 : gen12, Prior to doing a reset, ensure CS is
+ * Wa_22011802037 : gen11, gen12, Prior to doing a reset, ensure CS is
* stopped, set ring stop bit and prefetch disable bit to halt CS
*/
- if (GRAPHICS_VER(engine->i915) == 12)
+ if (IS_GRAPHICS_VER(engine->i915, 11, 12))
intel_uncore_write_fw(uncore, RING_MODE_GEN7(engine->mmio_base),
_MASKED_BIT_ENABLE(GEN12_GFX_PREFETCH_DISABLE));
return -ENODEV;
ENGINE_TRACE(engine, "\n");
+ /*
+ * TODO: Find out why occasionally stopping the CS times out. Seen
+ * especially with gem_eio tests.
+ *
+ * Occasionally trying to stop the cs times out, but does not adversely
+ * affect functionality. The timeout is set as a config parameter that
+ * defaults to 100ms. In most cases the follow up operation is to wait
+ * for pending MI_FORCE_WAKES. The assumption is that this timeout is
+ * sufficient for any pending MI_FORCEWAKEs to complete. Once root
+ * caused, the caller must check and handle the return from this
+ * function.
+ */
if (__intel_engine_stop_cs(engine, 1000, stop_timeout(engine))) {
ENGINE_TRACE(engine,
"timed out on STOP_RING -> IDLE; HEAD:%04x, TAIL:%04x\n",
ENGINE_WRITE_FW(engine, RING_MI_MODE, _MASKED_BIT_DISABLE(STOP_RING));
}
+static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine)
+{
+ static const i915_reg_t _reg[I915_NUM_ENGINES] = {
+ [RCS0] = MSG_IDLE_CS,
+ [BCS0] = MSG_IDLE_BCS,
+ [VCS0] = MSG_IDLE_VCS0,
+ [VCS1] = MSG_IDLE_VCS1,
+ [VCS2] = MSG_IDLE_VCS2,
+ [VCS3] = MSG_IDLE_VCS3,
+ [VCS4] = MSG_IDLE_VCS4,
+ [VCS5] = MSG_IDLE_VCS5,
+ [VCS6] = MSG_IDLE_VCS6,
+ [VCS7] = MSG_IDLE_VCS7,
+ [VECS0] = MSG_IDLE_VECS0,
+ [VECS1] = MSG_IDLE_VECS1,
+ [VECS2] = MSG_IDLE_VECS2,
+ [VECS3] = MSG_IDLE_VECS3,
+ [CCS0] = MSG_IDLE_CS,
+ [CCS1] = MSG_IDLE_CS,
+ [CCS2] = MSG_IDLE_CS,
+ [CCS3] = MSG_IDLE_CS,
+ };
+ u32 val;
+
+ if (!_reg[engine->id].reg) {
+ drm_err(&engine->i915->drm,
+ "MSG IDLE undefined for engine id %u\n", engine->id);
+ return 0;
+ }
+
+ val = intel_uncore_read(engine->uncore, _reg[engine->id]);
+
+ /* bits[29:25] & bits[13:9] >> shift */
+ return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT;
+}
+
+static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask)
+{
+ int ret;
+
+ /* Ensure GPM receives fw up/down after CS is stopped */
+ udelay(1);
+
+ /* Wait for forcewake request to complete in GPM */
+ ret = __intel_wait_for_register_fw(gt->uncore,
+ GEN9_PWRGT_DOMAIN_STATUS,
+ fw_mask, fw_mask, 5000, 0, NULL);
+
+ /* Ensure CS receives fw ack from GPM */
+ udelay(1);
+
+ if (ret)
+ GT_TRACE(gt, "Failed to complete pending forcewake %d\n", ret);
+}
+
+/*
+ * Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any
+ * pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The
+ * pending status is indicated by bits[13:9] (masked by bits[29:25]) in the
+ * MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we
+ * are concerned only with the gt reset here, we use a logical OR of pending
+ * forcewakeups from all reset domains and then wait for them to complete by
+ * querying PWRGT_DOMAIN_STATUS.
+ */
+void intel_engine_wait_for_pending_mi_fw(struct intel_engine_cs *engine)
+{
+ u32 fw_pending = __cs_pending_mi_force_wakes(engine);
+
+ if (fw_pending)
+ __gpm_wait_for_fw_complete(engine->gt, fw_pending);
+}
+
static u32
read_subslice_reg(const struct intel_engine_cs *engine,
int slice, int subslice, i915_reg_t reg)
i915_request_put(rq);
}
+static u32 map_i915_prio_to_lrc_desc_prio(int prio)
+{
+ if (prio > I915_PRIORITY_NORMAL)
+ return GEN12_CTX_PRIORITY_HIGH;
+ else if (prio < I915_PRIORITY_NORMAL)
+ return GEN12_CTX_PRIORITY_LOW;
+ else
+ return GEN12_CTX_PRIORITY_NORMAL;
+}
+
static u64 execlists_update_context(struct i915_request *rq)
{
struct intel_context *ce = rq->context;
desc = ce->lrc.desc;
if (rq->engine->flags & I915_ENGINE_HAS_EU_PRIORITY)
- desc |= lrc_desc_priority(rq_prio(rq));
+ desc |= map_i915_prio_to_lrc_desc_prio(rq_prio(rq));
/*
* WaIdleLiteRestore:bdw,skl
ring_set_paused(engine, 1);
intel_engine_stop_cs(engine);
+ /*
+ * Wa_22011802037:gen11/gen12: In addition to stopping the cs, we need
+ * to wait for any pending mi force wakeups
+ */
+ if (IS_GRAPHICS_VER(engine->i915, 11, 12))
+ intel_engine_wait_for_pending_mi_fw(engine);
+
engine->execlists.reset_ccid = active_ccid(engine);
}
{
intel_wakeref_t wakeref;
+ intel_gt_sysfs_unregister(gt);
intel_rps_driver_unregister(>->rps);
intel_gsc_fini(>->gsc);
mutex_lock(>->tlb_invalidate_lock);
intel_uncore_forcewake_get(uncore, FORCEWAKE_ALL);
+ spin_lock_irq(&uncore->lock); /* serialise invalidate with GT reset */
+
+ for_each_engine(engine, gt, id) {
+ struct reg_and_bit rb;
+
+ rb = get_reg_and_bit(engine, regs == gen8_regs, regs, num);
+ if (!i915_mmio_reg_offset(rb.reg))
+ continue;
+
+ intel_uncore_write_fw(uncore, rb.reg, rb.bit);
+ }
+
+ spin_unlock_irq(&uncore->lock);
+
for_each_engine(engine, gt, id) {
/*
* HW architecture suggest typical invalidation time at 40us,
if (!i915_mmio_reg_offset(rb.reg))
continue;
- intel_uncore_write_fw(uncore, rb.reg, rb.bit);
if (__intel_wait_for_register_fw(uncore,
rb.reg, rb.bit, 0,
timeout_us, timeout_ms,
static struct intel_gt *kobj_to_gt(struct kobject *kobj)
{
- return container_of(kobj, struct kobj_gt, base)->gt;
+ return container_of(kobj, struct intel_gt, sysfs_gt);
}
struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
};
ATTRIBUTE_GROUPS(id);
+/* A kobject needs a release() method even if it does nothing */
static void kobj_gt_release(struct kobject *kobj)
{
- kfree(kobj);
}
static struct kobj_type kobj_gt_type = {
void intel_gt_sysfs_register(struct intel_gt *gt)
{
- struct kobj_gt *kg;
-
/*
* We need to make things right with the
* ABI compatibility. The files were originally
if (gt_is_root(gt))
intel_gt_sysfs_pm_init(gt, gt_get_parent_obj(gt));
- kg = kzalloc(sizeof(*kg), GFP_KERNEL);
- if (!kg)
+ /* init and xfer ownership to sysfs tree */
+ if (kobject_init_and_add(>->sysfs_gt, &kobj_gt_type,
+ gt->i915->sysfs_gt, "gt%d", gt->info.id))
goto exit_fail;
- kobject_init(&kg->base, &kobj_gt_type);
- kg->gt = gt;
-
- /* xfer ownership to sysfs tree */
- if (kobject_add(&kg->base, gt->i915->sysfs_gt, "gt%d", gt->info.id))
- goto exit_kobj_put;
-
- intel_gt_sysfs_pm_init(gt, &kg->base);
+ intel_gt_sysfs_pm_init(gt, >->sysfs_gt);
return;
-exit_kobj_put:
- kobject_put(&kg->base);
-
exit_fail:
+ kobject_put(>->sysfs_gt);
drm_warn(>->i915->drm,
"failed to initialize gt%d sysfs root\n", gt->info.id);
}
+
+void intel_gt_sysfs_unregister(struct intel_gt *gt)
+{
+ kobject_put(>->sysfs_gt);
+}
struct intel_gt;
-struct kobj_gt {
- struct kobject base;
- struct intel_gt *gt;
-};
-
bool is_object_gt(struct kobject *kobj);
struct drm_i915_private *kobj_to_i915(struct kobject *kobj);
const char *name);
void intel_gt_sysfs_register(struct intel_gt *gt);
+void intel_gt_sysfs_unregister(struct intel_gt *gt);
struct intel_gt *intel_gt_sysfs_get_drvdata(struct device *dev,
const char *name);
} mocs;
struct intel_pxp pxp;
+
+ /* gt/gtN sysfs */
+ struct kobject sysfs_gt;
};
enum intel_gt_scratch_field {
#define XEHP_SW_COUNTER_SHIFT 58
#define XEHP_SW_COUNTER_WIDTH 6
-static inline u32 lrc_desc_priority(int prio)
-{
- if (prio > I915_PRIORITY_NORMAL)
- return GEN12_CTX_PRIORITY_HIGH;
- else if (prio < I915_PRIORITY_NORMAL)
- return GEN12_CTX_PRIORITY_LOW;
- else
- return GEN12_CTX_PRIORITY_NORMAL;
-}
-
static inline void lrc_runtime_start(struct intel_context *ce)
{
struct intel_context_stats *stats = &ce->stats;
return err;
}
-static int gen6_reset_engines(struct intel_gt *gt,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
+static int __gen6_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
{
struct intel_engine_cs *engine;
u32 hw_mask;
return gen6_hw_domain_reset(gt, hw_mask);
}
+static int gen6_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(>->uncore->lock, flags);
+ ret = __gen6_reset_engines(gt, engine_mask, retry);
+ spin_unlock_irqrestore(>->uncore->lock, flags);
+
+ return ret;
+}
+
static struct intel_engine_cs *find_sfc_paired_vecs_engine(struct intel_engine_cs *engine)
{
int vecs_id;
rmw_clear_fw(uncore, sfc_lock.lock_reg, sfc_lock.lock_bit);
}
-static int gen11_reset_engines(struct intel_gt *gt,
- intel_engine_mask_t engine_mask,
- unsigned int retry)
+static int __gen11_reset_engines(struct intel_gt *gt,
+ intel_engine_mask_t engine_mask,
+ unsigned int retry)
{
struct intel_engine_cs *engine;
intel_engine_mask_t tmp;
struct intel_engine_cs *engine;
const bool reset_non_ready = retry >= 1;
intel_engine_mask_t tmp;
+ unsigned long flags;
int ret;
+ spin_lock_irqsave(>->uncore->lock, flags);
+
for_each_engine_masked(engine, gt, engine_mask, tmp) {
ret = gen8_engine_reset_prepare(engine);
if (ret && !reset_non_ready)
* This is best effort, so ignore any error from the initial reset.
*/
if (IS_DG2(gt->i915) && engine_mask == ALL_ENGINES)
- gen11_reset_engines(gt, gt->info.engine_mask, 0);
+ __gen11_reset_engines(gt, gt->info.engine_mask, 0);
if (GRAPHICS_VER(gt->i915) >= 11)
- ret = gen11_reset_engines(gt, engine_mask, retry);
+ ret = __gen11_reset_engines(gt, engine_mask, retry);
else
- ret = gen6_reset_engines(gt, engine_mask, retry);
+ ret = __gen6_reset_engines(gt, engine_mask, retry);
skip_reset:
for_each_engine_masked(engine, gt, engine_mask, tmp)
gen8_engine_reset_cancel(engine);
+ spin_unlock_irqrestore(>->uncore->lock, flags);
+
return ret;
}
continue;
hw = shmem_pin_map(engine->default_state);
- if (IS_ERR(hw)) {
- err = PTR_ERR(hw);
+ if (!hw) {
+ err = -ENOMEM;
break;
}
hw += LRC_STATE_OFFSET / sizeof(*hw);
continue;
hw = shmem_pin_map(engine->default_state);
- if (IS_ERR(hw)) {
- err = PTR_ERR(hw);
+ if (!hw) {
+ err = -ENOMEM;
break;
}
hw += LRC_STATE_OFFSET / sizeof(*hw);
INTEL_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002,
INTEL_GUC_ACTION_SCHED_ENGINE_MODE_SET = 0x1003,
INTEL_GUC_ACTION_SCHED_ENGINE_MODE_DONE = 0x1004,
+ INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY = 0x1005,
+ INTEL_GUC_ACTION_V69_SET_CONTEXT_EXECUTION_QUANTUM = 0x1006,
+ INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT = 0x1007,
INTEL_GUC_ACTION_CONTEXT_RESET_NOTIFICATION = 0x1008,
INTEL_GUC_ACTION_ENGINE_FAILURE_NOTIFICATION = 0x1009,
INTEL_GUC_ACTION_HOST2GUC_UPDATE_CONTEXT_POLICIES = 0x100B,
if (IS_DG2(gt->i915))
flags |= GUC_WA_DUAL_QUEUE;
- /* Wa_22011802037: graphics version 12 */
- if (GRAPHICS_VER(gt->i915) == 12)
+ /* Wa_22011802037: graphics version 11/12 */
+ if (IS_GRAPHICS_VER(gt->i915, 11, 12))
flags |= GUC_WA_PRE_PARSER;
/* Wa_16011777198:dg2 */
/** @ads_engine_usage_size: size of engine usage in the ADS */
u32 ads_engine_usage_size;
+ /** @lrc_desc_pool_v69: object allocated to hold the GuC LRC descriptor pool */
+ struct i915_vma *lrc_desc_pool_v69;
+ /** @lrc_desc_pool_vaddr_v69: contents of the GuC LRC descriptor pool */
+ void *lrc_desc_pool_vaddr_v69;
+
/**
* @context_lookup: used to resolve intel_context from guc_id, if a
* context is present in this structure it is registered with the GuC
u32 fence_id;
} __packed;
+struct guc_process_desc_v69 {
+ u32 stage_id;
+ u64 db_base_addr;
+ u32 head;
+ u32 tail;
+ u32 error_offset;
+ u64 wq_base_addr;
+ u32 wq_size_bytes;
+ u32 wq_status;
+ u32 engine_presence;
+ u32 priority;
+ u32 reserved[36];
+} __packed;
+
struct guc_sched_wq_desc {
u32 head;
u32 tail;
};
#define CONTEXT_REGISTRATION_FLAG_KMD BIT(0)
+/* Preempt to idle on quantum expiry */
+#define CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69 BIT(0)
+
+/*
+ * GuC Context registration descriptor.
+ * FIXME: This is only required to exist during context registration.
+ * The current 1:1 between guc_lrc_desc and LRCs for the lifetime of the LRC
+ * is not required.
+ */
+struct guc_lrc_desc_v69 {
+ u32 hw_context_desc;
+ u32 slpm_perf_mode_hint; /* SPLC v1 only */
+ u32 slpm_freq_hint;
+ u32 engine_submit_mask; /* In logical space */
+ u8 engine_class;
+ u8 reserved0[3];
+ u32 priority;
+ u32 process_desc;
+ u32 wq_addr;
+ u32 wq_size;
+ u32 context_flags; /* CONTEXT_REGISTRATION_* */
+ /* Time for one workload to execute. (in micro seconds) */
+ u32 execution_quantum;
+ /* Time to wait for a preemption request to complete before issuing a
+ * reset. (in micro seconds).
+ */
+ u32 preemption_timeout;
+ u32 policy_flags; /* CONTEXT_POLICY_* */
+ u32 reserved1[19];
+} __packed;
+
/* 32-bit KLV structure as used by policy updates and others */
struct guc_klv_generic_dw_t {
u32 kl;
};
struct parent_scratch {
- struct guc_sched_wq_desc wq_desc;
+ union guc_descs {
+ struct guc_sched_wq_desc wq_desc;
+ struct guc_process_desc_v69 pdesc;
+ } descs;
struct sync_semaphore go;
struct sync_semaphore join[MAX_ENGINE_INSTANCE + 1];
- u8 unused[WQ_OFFSET - sizeof(struct guc_sched_wq_desc) -
+ u8 unused[WQ_OFFSET - sizeof(union guc_descs) -
sizeof(struct sync_semaphore) * (MAX_ENGINE_INSTANCE + 2)];
u32 wq[WQ_SIZE / sizeof(u32)];
LRC_STATE_OFFSET) / sizeof(u32)));
}
+static struct guc_process_desc_v69 *
+__get_process_desc_v69(struct intel_context *ce)
+{
+ struct parent_scratch *ps = __get_parent_scratch(ce);
+
+ return &ps->descs.pdesc;
+}
+
static struct guc_sched_wq_desc *
-__get_wq_desc(struct intel_context *ce)
+__get_wq_desc_v70(struct intel_context *ce)
{
struct parent_scratch *ps = __get_parent_scratch(ce);
- return &ps->wq_desc;
+ return &ps->descs.wq_desc;
}
-static u32 *get_wq_pointer(struct guc_sched_wq_desc *wq_desc,
- struct intel_context *ce,
- u32 wqi_size)
+static u32 *get_wq_pointer(struct intel_context *ce, u32 wqi_size)
{
/*
* Check for space in work queue. Caching a value of head pointer in
#define AVAILABLE_SPACE \
CIRC_SPACE(ce->parallel.guc.wqi_tail, ce->parallel.guc.wqi_head, WQ_SIZE)
if (wqi_size > AVAILABLE_SPACE) {
- ce->parallel.guc.wqi_head = READ_ONCE(wq_desc->head);
+ ce->parallel.guc.wqi_head = READ_ONCE(*ce->parallel.guc.wq_head);
if (wqi_size > AVAILABLE_SPACE)
return NULL;
return ce;
}
+static struct guc_lrc_desc_v69 *__get_lrc_desc_v69(struct intel_guc *guc, u32 index)
+{
+ struct guc_lrc_desc_v69 *base = guc->lrc_desc_pool_vaddr_v69;
+
+ if (!base)
+ return NULL;
+
+ GEM_BUG_ON(index >= GUC_MAX_CONTEXT_ID);
+
+ return &base[index];
+}
+
+static int guc_lrc_desc_pool_create_v69(struct intel_guc *guc)
+{
+ u32 size;
+ int ret;
+
+ size = PAGE_ALIGN(sizeof(struct guc_lrc_desc_v69) *
+ GUC_MAX_CONTEXT_ID);
+ ret = intel_guc_allocate_and_map_vma(guc, size, &guc->lrc_desc_pool_v69,
+ (void **)&guc->lrc_desc_pool_vaddr_v69);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+static void guc_lrc_desc_pool_destroy_v69(struct intel_guc *guc)
+{
+ if (!guc->lrc_desc_pool_vaddr_v69)
+ return;
+
+ guc->lrc_desc_pool_vaddr_v69 = NULL;
+ i915_vma_unpin_and_release(&guc->lrc_desc_pool_v69, I915_VMA_RELEASE_MAP);
+}
+
static inline bool guc_submission_initialized(struct intel_guc *guc)
{
return guc->submission_initialized;
}
+static inline void _reset_lrc_desc_v69(struct intel_guc *guc, u32 id)
+{
+ struct guc_lrc_desc_v69 *desc = __get_lrc_desc_v69(guc, id);
+
+ if (desc)
+ memset(desc, 0, sizeof(*desc));
+}
+
static inline bool ctx_id_mapped(struct intel_guc *guc, u32 id)
{
return __get_context(guc, id);
if (unlikely(!guc_submission_initialized(guc)))
return;
+ _reset_lrc_desc_v69(guc, id);
+
/*
* xarray API doesn't have xa_erase_irqsave wrapper, so calling
* the lower level functions directly.
true, timeout);
}
-static int guc_context_policy_init(struct intel_context *ce, bool loop);
+static int guc_context_policy_init_v70(struct intel_context *ce, bool loop);
static int try_context_registration(struct intel_context *ce, bool loop);
static int __guc_add_request(struct intel_guc *guc, struct i915_request *rq)
GEM_BUG_ON(context_guc_id_invalid(ce));
if (context_policy_required(ce)) {
- err = guc_context_policy_init(ce, false);
+ err = guc_context_policy_init_v70(ce, false);
if (err)
return err;
}
return (WQ_SIZE - ce->parallel.guc.wqi_tail);
}
-static void write_wqi(struct guc_sched_wq_desc *wq_desc,
- struct intel_context *ce,
- u32 wqi_size)
+static void write_wqi(struct intel_context *ce, u32 wqi_size)
{
BUILD_BUG_ON(!is_power_of_2(WQ_SIZE));
ce->parallel.guc.wqi_tail = (ce->parallel.guc.wqi_tail + wqi_size) &
(WQ_SIZE - 1);
- WRITE_ONCE(wq_desc->tail, ce->parallel.guc.wqi_tail);
+ WRITE_ONCE(*ce->parallel.guc.wq_tail, ce->parallel.guc.wqi_tail);
}
static int guc_wq_noop_append(struct intel_context *ce)
{
- struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce);
- u32 *wqi = get_wq_pointer(wq_desc, ce, wq_space_until_wrap(ce));
+ u32 *wqi = get_wq_pointer(ce, wq_space_until_wrap(ce));
u32 len_dw = wq_space_until_wrap(ce) / sizeof(u32) - 1;
if (!wqi)
{
struct intel_context *ce = request_to_scheduling_context(rq);
struct intel_context *child;
- struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce);
unsigned int wqi_size = (ce->parallel.number_children + 4) *
sizeof(u32);
u32 *wqi;
return ret;
}
- wqi = get_wq_pointer(wq_desc, ce, wqi_size);
+ wqi = get_wq_pointer(ce, wqi_size);
if (!wqi)
return -EBUSY;
for_each_child(ce, child)
*wqi++ = child->ring->tail / sizeof(u64);
- write_wqi(wq_desc, ce, wqi_size);
+ write_wqi(ce, wqi_size);
return 0;
}
lrc_update_regs(ce, engine, head);
}
-static u32 __cs_pending_mi_force_wakes(struct intel_engine_cs *engine)
-{
- static const i915_reg_t _reg[I915_NUM_ENGINES] = {
- [RCS0] = MSG_IDLE_CS,
- [BCS0] = MSG_IDLE_BCS,
- [VCS0] = MSG_IDLE_VCS0,
- [VCS1] = MSG_IDLE_VCS1,
- [VCS2] = MSG_IDLE_VCS2,
- [VCS3] = MSG_IDLE_VCS3,
- [VCS4] = MSG_IDLE_VCS4,
- [VCS5] = MSG_IDLE_VCS5,
- [VCS6] = MSG_IDLE_VCS6,
- [VCS7] = MSG_IDLE_VCS7,
- [VECS0] = MSG_IDLE_VECS0,
- [VECS1] = MSG_IDLE_VECS1,
- [VECS2] = MSG_IDLE_VECS2,
- [VECS3] = MSG_IDLE_VECS3,
- [CCS0] = MSG_IDLE_CS,
- [CCS1] = MSG_IDLE_CS,
- [CCS2] = MSG_IDLE_CS,
- [CCS3] = MSG_IDLE_CS,
- };
- u32 val;
-
- if (!_reg[engine->id].reg)
- return 0;
-
- val = intel_uncore_read(engine->uncore, _reg[engine->id]);
-
- /* bits[29:25] & bits[13:9] >> shift */
- return (val & (val >> 16) & MSG_IDLE_FW_MASK) >> MSG_IDLE_FW_SHIFT;
-}
-
-static void __gpm_wait_for_fw_complete(struct intel_gt *gt, u32 fw_mask)
-{
- int ret;
-
- /* Ensure GPM receives fw up/down after CS is stopped */
- udelay(1);
-
- /* Wait for forcewake request to complete in GPM */
- ret = __intel_wait_for_register_fw(gt->uncore,
- GEN9_PWRGT_DOMAIN_STATUS,
- fw_mask, fw_mask, 5000, 0, NULL);
-
- /* Ensure CS receives fw ack from GPM */
- udelay(1);
-
- if (ret)
- GT_TRACE(gt, "Failed to complete pending forcewake %d\n", ret);
-}
-
-/*
- * Wa_22011802037:gen12: In addition to stopping the cs, we need to wait for any
- * pending MI_FORCE_WAKEUP requests that the CS has initiated to complete. The
- * pending status is indicated by bits[13:9] (masked by bits[ 29:25]) in the
- * MSG_IDLE register. There's one MSG_IDLE register per reset domain. Since we
- * are concerned only with the gt reset here, we use a logical OR of pending
- * forcewakeups from all reset domains and then wait for them to complete by
- * querying PWRGT_DOMAIN_STATUS.
- */
static void guc_engine_reset_prepare(struct intel_engine_cs *engine)
{
- u32 fw_pending;
-
- if (GRAPHICS_VER(engine->i915) != 12)
+ if (!IS_GRAPHICS_VER(engine->i915, 11, 12))
return;
- /*
- * Wa_22011802037
- * TODO: Occasionally trying to stop the cs times out, but does not
- * adversely affect functionality. The timeout is set as a config
- * parameter that defaults to 100ms. Assuming that this timeout is
- * sufficient for any pending MI_FORCEWAKEs to complete, ignore the
- * timeout returned here until it is root caused.
- */
intel_engine_stop_cs(engine);
- fw_pending = __cs_pending_mi_force_wakes(engine);
- if (fw_pending)
- __gpm_wait_for_fw_complete(engine->gt, fw_pending);
+ /*
+ * Wa_22011802037:gen11/gen12: In addition to stopping the cs, we need
+ * to wait for any pending mi force wakeups
+ */
+ intel_engine_wait_for_pending_mi_fw(engine);
}
static void guc_reset_nop(struct intel_engine_cs *engine)
int intel_guc_submission_init(struct intel_guc *guc)
{
struct intel_gt *gt = guc_to_gt(guc);
+ int ret;
if (guc->submission_initialized)
return 0;
+ if (guc->fw.major_ver_found < 70) {
+ ret = guc_lrc_desc_pool_create_v69(guc);
+ if (ret)
+ return ret;
+ }
+
guc->submission_state.guc_ids_bitmap =
bitmap_zalloc(NUMBER_MULTI_LRC_GUC_ID(guc), GFP_KERNEL);
- if (!guc->submission_state.guc_ids_bitmap)
- return -ENOMEM;
+ if (!guc->submission_state.guc_ids_bitmap) {
+ ret = -ENOMEM;
+ goto destroy_pool;
+ }
guc->timestamp.ping_delay = (POLL_TIME_CLKS / gt->clock_frequency + 1) * HZ;
guc->timestamp.shift = gpm_timestamp_shift(gt);
guc->submission_initialized = true;
return 0;
+
+destroy_pool:
+ guc_lrc_desc_pool_destroy_v69(guc);
+
+ return ret;
}
void intel_guc_submission_fini(struct intel_guc *guc)
return;
guc_flush_destroyed_contexts(guc);
+ guc_lrc_desc_pool_destroy_v69(guc);
i915_sched_engine_put(guc->sched_engine);
bitmap_free(guc->submission_state.guc_ids_bitmap);
guc->submission_initialized = false;
spin_unlock_irqrestore(&guc->submission_state.lock, flags);
}
-static int __guc_action_register_multi_lrc(struct intel_guc *guc,
- struct intel_context *ce,
- struct guc_ctxt_registration_info *info,
- bool loop)
+static int __guc_action_register_multi_lrc_v69(struct intel_guc *guc,
+ struct intel_context *ce,
+ u32 guc_id,
+ u32 offset,
+ bool loop)
+{
+ struct intel_context *child;
+ u32 action[4 + MAX_ENGINE_INSTANCE];
+ int len = 0;
+
+ GEM_BUG_ON(ce->parallel.number_children > MAX_ENGINE_INSTANCE);
+
+ action[len++] = INTEL_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC;
+ action[len++] = guc_id;
+ action[len++] = ce->parallel.number_children + 1;
+ action[len++] = offset;
+ for_each_child(ce, child) {
+ offset += sizeof(struct guc_lrc_desc_v69);
+ action[len++] = offset;
+ }
+
+ return guc_submission_send_busy_loop(guc, action, len, 0, loop);
+}
+
+static int __guc_action_register_multi_lrc_v70(struct intel_guc *guc,
+ struct intel_context *ce,
+ struct guc_ctxt_registration_info *info,
+ bool loop)
{
struct intel_context *child;
u32 action[13 + (MAX_ENGINE_INSTANCE * 2)];
return guc_submission_send_busy_loop(guc, action, len, 0, loop);
}
-static int __guc_action_register_context(struct intel_guc *guc,
- struct guc_ctxt_registration_info *info,
- bool loop)
+static int __guc_action_register_context_v69(struct intel_guc *guc,
+ u32 guc_id,
+ u32 offset,
+ bool loop)
+{
+ u32 action[] = {
+ INTEL_GUC_ACTION_REGISTER_CONTEXT,
+ guc_id,
+ offset,
+ };
+
+ return guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action),
+ 0, loop);
+}
+
+static int __guc_action_register_context_v70(struct intel_guc *guc,
+ struct guc_ctxt_registration_info *info,
+ bool loop)
{
u32 action[] = {
INTEL_GUC_ACTION_REGISTER_CONTEXT,
0, loop);
}
-static void prepare_context_registration_info(struct intel_context *ce,
- struct guc_ctxt_registration_info *info);
+static void prepare_context_registration_info_v69(struct intel_context *ce);
+static void prepare_context_registration_info_v70(struct intel_context *ce,
+ struct guc_ctxt_registration_info *info);
-static int register_context(struct intel_context *ce, bool loop)
+static int
+register_context_v69(struct intel_guc *guc, struct intel_context *ce, bool loop)
+{
+ u32 offset = intel_guc_ggtt_offset(guc, guc->lrc_desc_pool_v69) +
+ ce->guc_id.id * sizeof(struct guc_lrc_desc_v69);
+
+ prepare_context_registration_info_v69(ce);
+
+ if (intel_context_is_parent(ce))
+ return __guc_action_register_multi_lrc_v69(guc, ce, ce->guc_id.id,
+ offset, loop);
+ else
+ return __guc_action_register_context_v69(guc, ce->guc_id.id,
+ offset, loop);
+}
+
+static int
+register_context_v70(struct intel_guc *guc, struct intel_context *ce, bool loop)
{
struct guc_ctxt_registration_info info;
+
+ prepare_context_registration_info_v70(ce, &info);
+
+ if (intel_context_is_parent(ce))
+ return __guc_action_register_multi_lrc_v70(guc, ce, &info, loop);
+ else
+ return __guc_action_register_context_v70(guc, &info, loop);
+}
+
+static int register_context(struct intel_context *ce, bool loop)
+{
struct intel_guc *guc = ce_to_guc(ce);
int ret;
GEM_BUG_ON(intel_context_is_child(ce));
trace_intel_context_register(ce);
- prepare_context_registration_info(ce, &info);
-
- if (intel_context_is_parent(ce))
- ret = __guc_action_register_multi_lrc(guc, ce, &info, loop);
+ if (guc->fw.major_ver_found >= 70)
+ ret = register_context_v70(guc, ce, loop);
else
- ret = __guc_action_register_context(guc, &info, loop);
+ ret = register_context_v69(guc, ce, loop);
+
if (likely(!ret)) {
unsigned long flags;
set_context_registered(ce);
spin_unlock_irqrestore(&ce->guc_state.lock, flags);
- guc_context_policy_init(ce, loop);
+ if (guc->fw.major_ver_found >= 70)
+ guc_context_policy_init_v70(ce, loop);
}
return ret;
0, loop);
}
-static int guc_context_policy_init(struct intel_context *ce, bool loop)
+static int guc_context_policy_init_v70(struct intel_context *ce, bool loop)
{
struct intel_engine_cs *engine = ce->engine;
struct intel_guc *guc = &engine->gt->uc.guc;
return ret;
}
-static void prepare_context_registration_info(struct intel_context *ce,
- struct guc_ctxt_registration_info *info)
+static void guc_context_policy_init_v69(struct intel_engine_cs *engine,
+ struct guc_lrc_desc_v69 *desc)
+{
+ desc->policy_flags = 0;
+
+ if (engine->flags & I915_ENGINE_WANT_FORCED_PREEMPTION)
+ desc->policy_flags |= CONTEXT_POLICY_FLAG_PREEMPT_TO_IDLE_V69;
+
+ /* NB: For both of these, zero means disabled. */
+ desc->execution_quantum = engine->props.timeslice_duration_ms * 1000;
+ desc->preemption_timeout = engine->props.preempt_timeout_ms * 1000;
+}
+
+static u32 map_guc_prio_to_lrc_desc_prio(u8 prio)
+{
+ /*
+ * this matches the mapping we do in map_i915_prio_to_guc_prio()
+ * (e.g. prio < I915_PRIORITY_NORMAL maps to GUC_CLIENT_PRIORITY_NORMAL)
+ */
+ switch (prio) {
+ default:
+ MISSING_CASE(prio);
+ fallthrough;
+ case GUC_CLIENT_PRIORITY_KMD_NORMAL:
+ return GEN12_CTX_PRIORITY_NORMAL;
+ case GUC_CLIENT_PRIORITY_NORMAL:
+ return GEN12_CTX_PRIORITY_LOW;
+ case GUC_CLIENT_PRIORITY_HIGH:
+ case GUC_CLIENT_PRIORITY_KMD_HIGH:
+ return GEN12_CTX_PRIORITY_HIGH;
+ }
+}
+
+static void prepare_context_registration_info_v69(struct intel_context *ce)
+{
+ struct intel_engine_cs *engine = ce->engine;
+ struct intel_guc *guc = &engine->gt->uc.guc;
+ u32 ctx_id = ce->guc_id.id;
+ struct guc_lrc_desc_v69 *desc;
+ struct intel_context *child;
+
+ GEM_BUG_ON(!engine->mask);
+
+ /*
+ * Ensure LRC + CT vmas are is same region as write barrier is done
+ * based on CT vma region.
+ */
+ GEM_BUG_ON(i915_gem_object_is_lmem(guc->ct.vma->obj) !=
+ i915_gem_object_is_lmem(ce->ring->vma->obj));
+
+ desc = __get_lrc_desc_v69(guc, ctx_id);
+ desc->engine_class = engine_class_to_guc_class(engine->class);
+ desc->engine_submit_mask = engine->logical_mask;
+ desc->hw_context_desc = ce->lrc.lrca;
+ desc->priority = ce->guc_state.prio;
+ desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
+ guc_context_policy_init_v69(engine, desc);
+
+ /*
+ * If context is a parent, we need to register a process descriptor
+ * describing a work queue and register all child contexts.
+ */
+ if (intel_context_is_parent(ce)) {
+ struct guc_process_desc_v69 *pdesc;
+
+ ce->parallel.guc.wqi_tail = 0;
+ ce->parallel.guc.wqi_head = 0;
+
+ desc->process_desc = i915_ggtt_offset(ce->state) +
+ __get_parent_scratch_offset(ce);
+ desc->wq_addr = i915_ggtt_offset(ce->state) +
+ __get_wq_offset(ce);
+ desc->wq_size = WQ_SIZE;
+
+ pdesc = __get_process_desc_v69(ce);
+ memset(pdesc, 0, sizeof(*(pdesc)));
+ pdesc->stage_id = ce->guc_id.id;
+ pdesc->wq_base_addr = desc->wq_addr;
+ pdesc->wq_size_bytes = desc->wq_size;
+ pdesc->wq_status = WQ_STATUS_ACTIVE;
+
+ ce->parallel.guc.wq_head = &pdesc->head;
+ ce->parallel.guc.wq_tail = &pdesc->tail;
+ ce->parallel.guc.wq_status = &pdesc->wq_status;
+
+ for_each_child(ce, child) {
+ desc = __get_lrc_desc_v69(guc, child->guc_id.id);
+
+ desc->engine_class =
+ engine_class_to_guc_class(engine->class);
+ desc->hw_context_desc = child->lrc.lrca;
+ desc->priority = ce->guc_state.prio;
+ desc->context_flags = CONTEXT_REGISTRATION_FLAG_KMD;
+ guc_context_policy_init_v69(engine, desc);
+ }
+
+ clear_children_join_go_memory(ce);
+ }
+}
+
+static void prepare_context_registration_info_v70(struct intel_context *ce,
+ struct guc_ctxt_registration_info *info)
{
struct intel_engine_cs *engine = ce->engine;
struct intel_guc *guc = &engine->gt->uc.guc;
*/
info->hwlrca_lo = lower_32_bits(ce->lrc.lrca);
info->hwlrca_hi = upper_32_bits(ce->lrc.lrca);
+ if (engine->flags & I915_ENGINE_HAS_EU_PRIORITY)
+ info->hwlrca_lo |= map_guc_prio_to_lrc_desc_prio(ce->guc_state.prio);
info->flags = CONTEXT_REGISTRATION_FLAG_KMD;
/*
info->wq_base_hi = upper_32_bits(wq_base_offset);
info->wq_size = WQ_SIZE;
- wq_desc = __get_wq_desc(ce);
+ wq_desc = __get_wq_desc_v70(ce);
memset(wq_desc, 0, sizeof(*wq_desc));
wq_desc->wq_status = WQ_STATUS_ACTIVE;
+ ce->parallel.guc.wq_head = &wq_desc->head;
+ ce->parallel.guc.wq_tail = &wq_desc->tail;
+ ce->parallel.guc.wq_status = &wq_desc->wq_status;
+
clear_children_join_go_memory(ce);
}
}
u16 guc_id,
u32 preemption_timeout)
{
- struct context_policy policy;
+ if (guc->fw.major_ver_found >= 70) {
+ struct context_policy policy;
- __guc_context_policy_start_klv(&policy, guc_id);
- __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
- __guc_context_set_context_policies(guc, &policy, true);
+ __guc_context_policy_start_klv(&policy, guc_id);
+ __guc_context_policy_add_preemption_timeout(&policy, preemption_timeout);
+ __guc_context_set_context_policies(guc, &policy, true);
+ } else {
+ u32 action[] = {
+ INTEL_GUC_ACTION_V69_SET_CONTEXT_PREEMPTION_TIMEOUT,
+ guc_id,
+ preemption_timeout
+ };
+
+ intel_guc_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
+ }
}
static void guc_context_ban(struct intel_context *ce, struct i915_request *rq)
static void __guc_context_set_prio(struct intel_guc *guc,
struct intel_context *ce)
{
- struct context_policy policy;
+ if (guc->fw.major_ver_found >= 70) {
+ struct context_policy policy;
- __guc_context_policy_start_klv(&policy, ce->guc_id.id);
- __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
- __guc_context_set_context_policies(guc, &policy, true);
+ __guc_context_policy_start_klv(&policy, ce->guc_id.id);
+ __guc_context_policy_add_priority(&policy, ce->guc_state.prio);
+ __guc_context_set_context_policies(guc, &policy, true);
+ } else {
+ u32 action[] = {
+ INTEL_GUC_ACTION_V69_SET_CONTEXT_PRIORITY,
+ ce->guc_id.id,
+ ce->guc_state.prio,
+ };
+
+ guc_submission_send_busy_loop(guc, action, ARRAY_SIZE(action), 0, true);
+ }
}
static void guc_context_set_prio(struct intel_guc *guc,
guc_log_context_priority(p, ce);
if (intel_context_is_parent(ce)) {
- struct guc_sched_wq_desc *wq_desc = __get_wq_desc(ce);
struct intel_context *child;
drm_printf(p, "\t\tNumber children: %u\n",
ce->parallel.number_children);
- drm_printf(p, "\t\tWQI Head: %u\n",
- READ_ONCE(wq_desc->head));
- drm_printf(p, "\t\tWQI Tail: %u\n",
- READ_ONCE(wq_desc->tail));
- drm_printf(p, "\t\tWQI Status: %u\n\n",
- READ_ONCE(wq_desc->wq_status));
+
+ if (ce->parallel.guc.wq_status) {
+ drm_printf(p, "\t\tWQI Head: %u\n",
+ READ_ONCE(*ce->parallel.guc.wq_head));
+ drm_printf(p, "\t\tWQI Tail: %u\n",
+ READ_ONCE(*ce->parallel.guc.wq_tail));
+ drm_printf(p, "\t\tWQI Status: %u\n\n",
+ READ_ONCE(*ce->parallel.guc.wq_status));
+ }
if (ce->engine->emit_bb_start ==
emit_bb_start_parent_no_preempt_mid_batch) {
fw_def(BROXTON, 0, guc_def(bxt, 70, 1, 1)) \
fw_def(SKYLAKE, 0, guc_def(skl, 70, 1, 1))
+#define INTEL_GUC_FIRMWARE_DEFS_FALLBACK(fw_def, guc_def) \
+ fw_def(ALDERLAKE_P, 0, guc_def(adlp, 69, 0, 3)) \
+ fw_def(ALDERLAKE_S, 0, guc_def(tgl, 69, 0, 3))
+
#define INTEL_HUC_FIRMWARE_DEFS(fw_def, huc_def) \
fw_def(ALDERLAKE_P, 0, huc_def(tgl, 7, 9, 3)) \
fw_def(ALDERLAKE_S, 0, huc_def(tgl, 7, 9, 3)) \
MODULE_FIRMWARE(uc_);
INTEL_GUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH)
+INTEL_GUC_FIRMWARE_DEFS_FALLBACK(INTEL_UC_MODULE_FW, MAKE_GUC_FW_PATH)
INTEL_HUC_FIRMWARE_DEFS(INTEL_UC_MODULE_FW, MAKE_HUC_FW_PATH)
/* The below structs and macros are used to iterate across the list of blobs */
static const struct uc_fw_platform_requirement blobs_guc[] = {
INTEL_GUC_FIRMWARE_DEFS(MAKE_FW_LIST, GUC_FW_BLOB)
};
+ static const struct uc_fw_platform_requirement blobs_guc_fallback[] = {
+ INTEL_GUC_FIRMWARE_DEFS_FALLBACK(MAKE_FW_LIST, GUC_FW_BLOB)
+ };
static const struct uc_fw_platform_requirement blobs_huc[] = {
INTEL_HUC_FIRMWARE_DEFS(MAKE_FW_LIST, HUC_FW_BLOB)
};
[INTEL_UC_FW_TYPE_GUC] = { blobs_guc, ARRAY_SIZE(blobs_guc) },
[INTEL_UC_FW_TYPE_HUC] = { blobs_huc, ARRAY_SIZE(blobs_huc) },
};
- static const struct uc_fw_platform_requirement *fw_blobs;
+ const struct uc_fw_platform_requirement *fw_blobs;
enum intel_platform p = INTEL_INFO(i915)->platform;
u32 fw_count;
u8 rev = INTEL_REVID(i915);
int i;
+ /*
+ * The only difference between the ADL GuC FWs is the HWConfig support.
+ * ADL-N does not support HWConfig, so we should use the same binary as
+ * ADL-S, otherwise the GuC might attempt to fetch a config table that
+ * does not exist.
+ */
+ if (IS_ADLP_N(i915))
+ p = INTEL_ALDERLAKE_S;
+
GEM_BUG_ON(uc_fw->type >= ARRAY_SIZE(blobs_all));
fw_blobs = blobs_all[uc_fw->type].blobs;
fw_count = blobs_all[uc_fw->type].count;
if (p == fw_blobs[i].p && rev >= fw_blobs[i].rev) {
const struct uc_fw_blob *blob = &fw_blobs[i].blob;
uc_fw->path = blob->path;
+ uc_fw->wanted_path = blob->path;
uc_fw->major_ver_wanted = blob->major;
uc_fw->minor_ver_wanted = blob->minor;
break;
}
}
+ if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) {
+ const struct uc_fw_platform_requirement *blobs = blobs_guc_fallback;
+ u32 count = ARRAY_SIZE(blobs_guc_fallback);
+
+ for (i = 0; i < count && p <= blobs[i].p; i++) {
+ if (p == blobs[i].p && rev >= blobs[i].rev) {
+ const struct uc_fw_blob *blob = &blobs[i].blob;
+
+ uc_fw->fallback.path = blob->path;
+ uc_fw->fallback.major_ver = blob->major;
+ uc_fw->fallback.minor_ver = blob->minor;
+ break;
+ }
+ }
+ }
+
/* make sure the list is ordered as expected */
if (IS_ENABLED(CONFIG_DRM_I915_SELFTEST)) {
for (i = 1; i < fw_count; i++) {
__force_fw_fetch_failures(uc_fw, -EINVAL);
__force_fw_fetch_failures(uc_fw, -ESTALE);
- err = request_firmware(&fw, uc_fw->path, dev);
+ err = firmware_request_nowarn(&fw, uc_fw->path, dev);
+ if (err && !intel_uc_fw_is_overridden(uc_fw) && uc_fw->fallback.path) {
+ err = firmware_request_nowarn(&fw, uc_fw->fallback.path, dev);
+ if (!err) {
+ drm_notice(&i915->drm,
+ "%s firmware %s is recommended, but only %s was found\n",
+ intel_uc_fw_type_repr(uc_fw->type),
+ uc_fw->wanted_path,
+ uc_fw->fallback.path);
+ drm_info(&i915->drm,
+ "Consider updating your linux-firmware pkg or downloading from %s\n",
+ INTEL_UC_FIRMWARE_URL);
+
+ uc_fw->path = uc_fw->fallback.path;
+ uc_fw->major_ver_wanted = uc_fw->fallback.major_ver;
+ uc_fw->minor_ver_wanted = uc_fw->fallback.minor_ver;
+ }
+ }
if (err)
goto fail;
INTEL_UC_FIRMWARE_MISSING :
INTEL_UC_FIRMWARE_ERROR);
- drm_notice(&i915->drm, "%s firmware %s: fetch failed with error %d\n",
- intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
+ i915_probe_error(i915, "%s firmware %s: fetch failed with error %d\n",
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
drm_info(&i915->drm, "%s firmware(s) can be downloaded from %s\n",
intel_uc_fw_type_repr(uc_fw->type), INTEL_UC_FIRMWARE_URL);
void intel_uc_fw_dump(const struct intel_uc_fw *uc_fw, struct drm_printer *p)
{
drm_printf(p, "%s firmware: %s\n",
- intel_uc_fw_type_repr(uc_fw->type), uc_fw->path);
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->wanted_path);
+ if (uc_fw->fallback.path) {
+ drm_printf(p, "%s firmware fallback: %s\n",
+ intel_uc_fw_type_repr(uc_fw->type), uc_fw->fallback.path);
+ drm_printf(p, "fallback selected: %s\n",
+ str_yes_no(uc_fw->path == uc_fw->fallback.path));
+ }
drm_printf(p, "\tstatus: %s\n",
intel_uc_fw_status_repr(uc_fw->status));
drm_printf(p, "\tversion: wanted %u.%u, found %u.%u\n",
const enum intel_uc_fw_status status;
enum intel_uc_fw_status __status; /* no accidental overwrites */
};
+ const char *wanted_path;
const char *path;
bool user_overridden;
size_t size;
u16 major_ver_found;
u16 minor_ver_found;
+ struct {
+ const char *path;
+ u16 major_ver;
+ u16 minor_ver;
+ } fallback;
+
u32 rsa_size;
u32 ucode_size;
continue;
vaddr = shmem_pin_map(engine->default_state);
- if (IS_ERR(vaddr)) {
- gvt_err("failed to map %s->default state, err:%zd\n",
- engine->name, PTR_ERR(vaddr));
+ if (!vaddr) {
+ gvt_err("failed to map %s->default state\n",
+ engine->name);
return;
}
static int i915_driver_hw_probe(struct drm_i915_private *dev_priv)
{
struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
+ struct pci_dev *root_pdev;
int ret;
if (i915_inject_probe_failure(dev_priv))
intel_bw_init_hw(dev_priv);
+ /*
+ * FIXME: Temporary hammer to avoid freezing the machine on our DGFX
+ * This should be totally removed when we handle the pci states properly
+ * on runtime PM and on s2idle cases.
+ */
+ root_pdev = pcie_find_root_port(pdev);
+ if (root_pdev)
+ pci_d3cold_disable(root_pdev);
+
return 0;
err_msi:
static void i915_driver_hw_remove(struct drm_i915_private *dev_priv)
{
struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
+ struct pci_dev *root_pdev;
i915_perf_fini(dev_priv);
if (pdev->msi_enabled)
pci_disable_msi(pdev);
+
+ root_pdev = pcie_find_root_port(pdev);
+ if (root_pdev)
+ pci_d3cold_enable(root_pdev);
}
/**
goto out;
}
- /*
- * FIXME: Temporary hammer to avoid freezing the machine on our DGFX
- * This should be totally removed when we handle the pci states properly
- * on runtime PM and on s2idle cases.
- */
- if (suspend_to_idle(dev_priv))
- pci_d3cold_disable(pdev);
-
pci_disable_device(pdev);
/*
* During hibernation on some platforms the BIOS may try to access
pci_set_master(pdev);
- pci_d3cold_enable(pdev);
-
disable_rpm_wakeref_asserts(&dev_priv->runtime_pm);
ret = vlv_resume_prepare(dev_priv, false);
{
struct drm_i915_private *dev_priv = kdev_to_i915(kdev);
struct intel_runtime_pm *rpm = &dev_priv->runtime_pm;
- struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
int ret;
if (drm_WARN_ON_ONCE(&dev_priv->drm, !HAS_RUNTIME_PM(dev_priv)))
drm_err(&dev_priv->drm,
"Unclaimed access detected prior to suspending\n");
- /*
- * FIXME: Temporary hammer to avoid freezing the machine on our DGFX
- * This should be totally removed when we handle the pci states properly
- * on runtime PM and on s2idle cases.
- */
- pci_d3cold_disable(pdev);
rpm->suspended = true;
/*
{
struct drm_i915_private *dev_priv = kdev_to_i915(kdev);
struct intel_runtime_pm *rpm = &dev_priv->runtime_pm;
- struct pci_dev *pdev = to_pci_dev(dev_priv->drm.dev);
int ret;
if (drm_WARN_ON_ONCE(&dev_priv->drm, !HAS_RUNTIME_PM(dev_priv)))
intel_opregion_notify_adapter(dev_priv, PCI_D0);
rpm->suspended = false;
- pci_d3cold_enable(pdev);
if (intel_uncore_unclaimed_mmio(&dev_priv->uncore))
drm_dbg(&dev_priv->drm,
"Unclaimed access during suspend, bios?\n");
total += busy_add(ctx, class);
rcu_read_unlock();
- seq_printf(m, "drm-engine-%s:\t%llu ns\n",
- uabi_class_names[class], total);
+ if (capacity)
+ seq_printf(m, "drm-engine-%s:\t%llu ns\n",
+ uabi_class_names[class], total);
if (capacity > 1)
seq_printf(m, "drm-engine-capacity-%s:\t%u\n",
* drm_mm_node
* @node: The drm_mm_node.
* @region_start: An offset to add to the dma addresses of the sg list.
+ * @page_alignment: Required page alignment for each sg entry. Power of two.
*
* Create a struct sg_table, initializing it from a struct drm_mm_node,
* taking a maximum segment length into account, splitting into segments
* error code cast to an error pointer on failure.
*/
struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
- u64 region_start)
+ u64 region_start,
+ u32 page_alignment)
{
- const u64 max_segment = SZ_1G; /* Do we have a limit on this? */
- u64 segment_pages = max_segment >> PAGE_SHIFT;
+ const u32 max_segment = round_down(UINT_MAX, page_alignment);
+ const u32 segment_pages = max_segment >> PAGE_SHIFT;
u64 block_size, offset, prev_end;
struct i915_refct_sgt *rsgt;
struct sg_table *st;
struct scatterlist *sg;
+ GEM_BUG_ON(!max_segment);
+
rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
if (!rsgt)
return ERR_PTR(-ENOMEM);
i915_refct_sgt_init(rsgt, node->size << PAGE_SHIFT);
st = &rsgt->table;
- if (sg_alloc_table(st, DIV_ROUND_UP(node->size, segment_pages),
+ if (sg_alloc_table(st, DIV_ROUND_UP_ULL(node->size, segment_pages),
GFP_KERNEL)) {
i915_refct_sgt_put(rsgt);
return ERR_PTR(-ENOMEM);
sg = __sg_next(sg);
sg_dma_address(sg) = region_start + offset;
+ GEM_BUG_ON(!IS_ALIGNED(sg_dma_address(sg),
+ page_alignment));
sg_dma_len(sg) = 0;
sg->length = 0;
st->nents++;
}
- len = min(block_size, max_segment - sg->length);
+ len = min_t(u64, block_size, max_segment - sg->length);
sg->length += len;
sg_dma_len(sg) += len;
* i915_buddy_block list
* @res: The struct i915_ttm_buddy_resource.
* @region_start: An offset to add to the dma addresses of the sg list.
+ * @page_alignment: Required page alignment for each sg entry. Power of two.
*
* Create a struct sg_table, initializing it from struct i915_buddy_block list,
* taking a maximum segment length into account, splitting into segments
* error code cast to an error pointer on failure.
*/
struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
- u64 region_start)
+ u64 region_start,
+ u32 page_alignment)
{
struct i915_ttm_buddy_resource *bman_res = to_ttm_buddy_resource(res);
const u64 size = res->num_pages << PAGE_SHIFT;
- const u64 max_segment = rounddown(UINT_MAX, PAGE_SIZE);
+ const u32 max_segment = round_down(UINT_MAX, page_alignment);
struct drm_buddy *mm = bman_res->mm;
struct list_head *blocks = &bman_res->blocks;
struct drm_buddy_block *block;
resource_size_t prev_end;
GEM_BUG_ON(list_empty(blocks));
+ GEM_BUG_ON(!max_segment);
rsgt = kmalloc(sizeof(*rsgt), GFP_KERNEL);
if (!rsgt)
sg = __sg_next(sg);
sg_dma_address(sg) = region_start + offset;
+ GEM_BUG_ON(!IS_ALIGNED(sg_dma_address(sg),
+ page_alignment));
sg_dma_len(sg) = 0;
sg->length = 0;
st->nents++;
}
- len = min(block_size, max_segment - sg->length);
+ len = min_t(u64, block_size, max_segment - sg->length);
sg->length += len;
sg_dma_len(sg) += len;
void i915_refct_sgt_init(struct i915_refct_sgt *rsgt, size_t size);
struct i915_refct_sgt *i915_rsgt_from_mm_node(const struct drm_mm_node *node,
- u64 region_start);
+ u64 region_start,
+ u32 page_alignment);
struct i915_refct_sgt *i915_rsgt_from_buddy_resource(struct ttm_resource *res,
- u64 region_start);
+ u64 region_start,
+ u32 page_alignment);
#endif
struct device *kdev = kobj_to_dev(kobj);
struct drm_i915_private *i915 = kdev_minor_to_i915(kdev);
struct i915_gpu_coredump *gpu;
- ssize_t ret;
+ ssize_t ret = 0;
+
+ /*
+ * FIXME: Concurrent clients triggering resets and reading + clearing
+ * dumps can cause inconsistent sysfs reads when a user calls in with a
+ * non-zero offset to complete a prior partial read but the
+ * gpu_coredump has been cleared or replaced.
+ */
gpu = i915_first_error_state(i915);
if (IS_ERR(gpu)) {
const char *str = "No error state collected\n";
size_t len = strlen(str);
- ret = min_t(size_t, count, len - off);
- memcpy(buf, str + off, ret);
+ if (off < len) {
+ ret = min_t(size_t, count, len - off);
+ memcpy(buf, str + off, ret);
+ }
}
return ret;
device_remove_bin_file(kdev, &dpf_attrs_1);
device_remove_bin_file(kdev, &dpf_attrs);
+
+ kobject_put(dev_priv->sysfs_gt);
}
*/
#include <linux/sched/mm.h>
+#include <linux/dma-fence-array.h>
#include <drm/drm_gem.h>
#include "display/intel_frontbuffer.h"
GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
}
-static void release_references(struct i915_vma *vma, bool vm_ddestroy)
+static void release_references(struct i915_vma *vma, struct intel_gt *gt,
+ bool vm_ddestroy)
{
struct drm_i915_gem_object *obj = vma->obj;
- struct intel_gt *gt = vma->vm->gt;
GEM_BUG_ON(i915_vma_is_active(vma));
force_unbind(vma);
list_del_init(&vma->vm_link);
- release_references(vma, false);
+ release_references(vma, vma->vm->gt, false);
}
void i915_vma_destroy(struct i915_vma *vma)
{
+ struct intel_gt *gt;
bool vm_ddestroy;
mutex_lock(&vma->vm->mutex);
list_del_init(&vma->vm_link);
vm_ddestroy = vma->vm_ddestroy;
vma->vm_ddestroy = false;
+
+ /* vma->vm may be freed when releasing vma->vm->mutex. */
+ gt = vma->vm->gt;
mutex_unlock(&vma->vm->mutex);
- release_references(vma, vm_ddestroy);
+ release_references(vma, gt, vm_ddestroy);
}
void i915_vma_parked(struct intel_gt *gt)
if (unlikely(err))
return err;
+ /*
+ * Reserve fences slot early to prevent an allocation after preparing
+ * the workload and associating fences with dma_resv.
+ */
+ if (fence && !(flags & __EXEC_OBJECT_NO_RESERVE)) {
+ struct dma_fence *curr;
+ int idx;
+
+ dma_fence_array_for_each(curr, idx, fence)
+ ;
+ err = dma_resv_reserve_fences(vma->obj->base.resv, idx);
+ if (unlikely(err))
+ return err;
+ }
+
if (flags & EXEC_OBJECT_WRITE) {
struct intel_frontbuffer *front;
i915_active_add_request(&front->write, rq);
intel_frontbuffer_put(front);
}
+ }
- if (!(flags & __EXEC_OBJECT_NO_RESERVE)) {
- err = dma_resv_reserve_fences(vma->obj->base.resv, 1);
- if (unlikely(err))
- return err;
- }
+ if (fence) {
+ struct dma_fence *curr;
+ enum dma_resv_usage usage;
+ int idx;
- if (fence) {
- dma_resv_add_fence(vma->obj->base.resv, fence,
- DMA_RESV_USAGE_WRITE);
+ obj->read_domains = 0;
+ if (flags & EXEC_OBJECT_WRITE) {
+ usage = DMA_RESV_USAGE_WRITE;
obj->write_domain = I915_GEM_DOMAIN_RENDER;
- obj->read_domains = 0;
- }
- } else {
- if (!(flags & __EXEC_OBJECT_NO_RESERVE)) {
- err = dma_resv_reserve_fences(vma->obj->base.resv, 1);
- if (unlikely(err))
- return err;
+ } else {
+ usage = DMA_RESV_USAGE_READ;
}
- if (fence) {
- dma_resv_add_fence(vma->obj->base.resv, fence,
- DMA_RESV_USAGE_READ);
- obj->write_domain = 0;
- }
+ dma_fence_array_for_each(curr, idx, fence)
+ dma_resv_add_fence(vma->obj->base.resv, curr, usage);
}
if (flags & EXEC_OBJECT_NEEDS_FENCE && vma->fence)
* Convert an opaque TTM resource manager resource to a refcounted sg_table.
* @mem: The memory region.
* @res: The resource manager resource obtained from the TTM resource manager.
+ * @page_alignment: Required page alignment for each sg entry. Power of two.
*
* The gem backends typically use sg-tables for operations on the underlying
* io_memory. So provide a way for the backends to translate the
*/
struct i915_refct_sgt *
intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem,
- struct ttm_resource *res)
+ struct ttm_resource *res,
+ u32 page_alignment)
{
if (mem->is_range_manager) {
struct ttm_range_mgr_node *range_node =
to_ttm_range_mgr_node(res);
return i915_rsgt_from_mm_node(&range_node->mm_nodes[0],
- mem->region.start);
+ mem->region.start,
+ page_alignment);
} else {
- return i915_rsgt_from_buddy_resource(res, mem->region.start);
+ return i915_rsgt_from_buddy_resource(res, mem->region.start,
+ page_alignment);
}
}
struct i915_refct_sgt *
intel_region_ttm_resource_to_rsgt(struct intel_memory_region *mem,
- struct ttm_resource *res);
+ struct ttm_resource *res,
+ u32 page_alignment);
void intel_region_ttm_resource_free(struct intel_memory_region *mem,
struct ttm_resource *res);
u64 addr;
for (addr = round_up(hole_start + min_alignment, step) - min_alignment;
- addr <= round_down(hole_end - (2 * min_alignment), step) - min_alignment;
+ hole_end > addr && hole_end - addr >= 2 * min_alignment;
addr += step) {
err = i915_vma_pin(vma, 0, 0, addr | flags);
if (err) {
static int igt_mock_max_segment(void *arg)
{
- const unsigned int max_segment = rounddown(UINT_MAX, PAGE_SIZE);
struct intel_memory_region *mem = arg;
struct drm_i915_private *i915 = mem->i915;
struct i915_ttm_buddy_resource *res;
struct drm_buddy *mm;
struct list_head *blocks;
struct scatterlist *sg;
+ I915_RND_STATE(prng);
LIST_HEAD(objects);
+ unsigned int max_segment;
+ unsigned int ps;
u64 size;
int err = 0;
*/
size = SZ_8G;
- mem = mock_region_create(i915, 0, size, PAGE_SIZE, 0, 0);
+ ps = PAGE_SIZE;
+ if (i915_prandom_u64_state(&prng) & 1)
+ ps = SZ_64K; /* For something like DG2 */
+
+ max_segment = round_down(UINT_MAX, ps);
+
+ mem = mock_region_create(i915, 0, size, ps, 0, 0);
if (IS_ERR(mem))
return PTR_ERR(mem);
}
for (sg = obj->mm.pages->sgl; sg; sg = sg_next(sg)) {
+ dma_addr_t daddr = sg_dma_address(sg);
+
if (sg->length > max_segment) {
pr_err("%s: Created an oversized scatterlist entry, %u > %u\n",
__func__, sg->length, max_segment);
err = -EINVAL;
goto out_close;
}
+
+ if (!IS_ALIGNED(daddr, ps)) {
+ pr_err("%s: Created an unaligned scatterlist entry, addr=%pa, ps=%u\n",
+ __func__, &daddr, ps);
+ err = -EINVAL;
+ goto out_close;
+ }
}
out_close:
return PTR_ERR(obj->mm.res);
obj->mm.rsgt = intel_region_ttm_resource_to_rsgt(obj->mm.region,
- obj->mm.res);
+ obj->mm.res,
+ obj->mm.region->min_page_size);
if (IS_ERR(obj->mm.rsgt)) {
err = PTR_ERR(obj->mm.rsgt);
goto err_free_resource;
ret = dcss_submodules_init(dcss);
if (ret) {
+ of_node_put(dcss->of_port);
dev_err(dev, "submodules initialization failed\n");
goto clks_err;
}
dcss_clocks_disable(dcss);
}
+ of_node_put(dcss->of_port);
+
pm_runtime_disable(dcss->dev);
dcss_submodules_stop(dcss);
drm_atomic_crtc_state_for_each_plane(plane, old_crtc_state) {
if (plane == &ipu_crtc->plane[0]->base)
disable_full = true;
- if (&ipu_crtc->plane[1] && plane == &ipu_crtc->plane[1]->base)
+ if (ipu_crtc->plane[1] && plane == &ipu_crtc->plane[1]->base)
disable_partial = true;
}
ring->cur = ring->start;
ring->next = ring->start;
-
- /* reset completed fence seqno: */
- ring->memptrs->fence = ring->fctx->completed_fence;
ring->memptrs->rptr = 0;
+
+ /* Detect and clean up an impossible fence, ie. if GPU managed
+ * to scribble something invalid, we don't want that to confuse
+ * us into mistakingly believing that submits have completed.
+ */
+ if (fence_before(ring->fctx->last_fence, ring->memptrs->fence)) {
+ ring->memptrs->fence = ring->fctx->last_fence;
+ }
}
return 0;
for (i = 0; i < ARRAY_SIZE(adreno_gpu->info->fw); i++)
release_firmware(adreno_gpu->fw[i]);
- pm_runtime_disable(&priv->gpu_pdev->dev);
+ if (pm_runtime_enabled(&priv->gpu_pdev->dev))
+ pm_runtime_disable(&priv->gpu_pdev->dev);
msm_gpu_cleanup(&adreno_gpu->base);
}
DPU_ATRACE_BEGIN("encoder_vblank_callback");
dpu_enc = to_dpu_encoder_virt(drm_enc);
+ atomic_inc(&phy_enc->vsync_cnt);
+
spin_lock_irqsave(&dpu_enc->enc_spinlock, lock_flags);
if (dpu_enc->crtc)
dpu_crtc_vblank_callback(dpu_enc->crtc);
spin_unlock_irqrestore(&dpu_enc->enc_spinlock, lock_flags);
- atomic_inc(&phy_enc->vsync_cnt);
DPU_ATRACE_END("encoder_vblank_callback");
}
DPU_DEBUG("[atomic_check:%d, \"%s\",%d,%d]\n",
phys_enc->wb_idx, mode->name, mode->hdisplay, mode->vdisplay);
- if (!conn_state->writeback_job || !conn_state->writeback_job->fb)
- return 0;
-
- fb = conn_state->writeback_job->fb;
-
if (!conn_state || !conn_state->connector) {
DPU_ERROR("invalid connector state\n");
return -EINVAL;
return -EINVAL;
}
+ if (!conn_state->writeback_job || !conn_state->writeback_job->fb)
+ return 0;
+
+ fb = conn_state->writeback_job->fb;
+
DPU_DEBUG("[fb_id:%u][fb:%u,%u]\n", fb->base.id,
fb->width, fb->height);
struct msm_drm_private *priv = dev->dev_private;
struct dpu_kms *dpu_kms = to_dpu_kms(priv->kms);
- return drm_add_modes_noedid(connector, dpu_kms->catalog->caps->max_linewidth,
+ /*
+ * We should ideally be limiting the modes only to the maxlinewidth but
+ * on some chipsets this will allow even 4k modes to be added which will
+ * fail the per SSPP bandwidth checks. So, till we have dual-SSPP support
+ * and source split support added lets limit the modes based on max_mixer_width
+ * as 4K modes can then be supported.
+ */
+ return drm_add_modes_noedid(connector, dpu_kms->catalog->caps->max_mixer_width,
dev->mode_config.max_height);
}
encoder = mdp4_lcdc_encoder_init(dev, panel_node);
if (IS_ERR(encoder)) {
DRM_DEV_ERROR(dev->dev, "failed to construct LCDC encoder\n");
+ of_node_put(panel_node);
return PTR_ERR(encoder);
}
connector = mdp4_lvds_connector_init(dev, panel_node, encoder);
if (IS_ERR(connector)) {
DRM_DEV_ERROR(dev->dev, "failed to initialize LVDS connector\n");
+ of_node_put(panel_node);
return PTR_ERR(connector);
}
return ret;
}
+static int dp_ctrl_on_stream_phy_test_report(struct dp_ctrl *dp_ctrl);
+
static int dp_ctrl_process_phy_test_request(struct dp_ctrl_private *ctrl)
{
int ret = 0;
ret = dp_ctrl_on_link(&ctrl->dp_ctrl);
if (!ret)
- ret = dp_ctrl_on_stream(&ctrl->dp_ctrl);
+ ret = dp_ctrl_on_stream_phy_test_report(&ctrl->dp_ctrl);
else
DRM_ERROR("failed to enable DP link controller\n");
return dp_ctrl_setup_main_link(ctrl, &training_step);
}
-int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl)
+static int dp_ctrl_on_stream_phy_test_report(struct dp_ctrl *dp_ctrl)
+{
+ int ret;
+ struct dp_ctrl_private *ctrl;
+
+ ctrl = container_of(dp_ctrl, struct dp_ctrl_private, dp_ctrl);
+
+ ctrl->dp_ctrl.pixel_rate = ctrl->panel->dp_mode.drm_mode.clock;
+
+ ret = dp_ctrl_enable_stream_clocks(ctrl);
+ if (ret) {
+ DRM_ERROR("Failed to start pixel clocks. ret=%d\n", ret);
+ return ret;
+ }
+
+ dp_ctrl_send_phy_test_pattern(ctrl);
+
+ return 0;
+}
+
+int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl, bool force_link_train)
{
int ret = 0;
bool mainlink_ready = false;
goto end;
}
- if (ctrl->link->sink_request & DP_TEST_LINK_PHY_TEST_PATTERN) {
- dp_ctrl_send_phy_test_pattern(ctrl);
- return 0;
- }
-
- if (!dp_ctrl_channel_eq_ok(ctrl))
+ if (force_link_train || !dp_ctrl_channel_eq_ok(ctrl))
dp_ctrl_link_retrain(ctrl);
/* stop txing train pattern to end link training */
};
int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl);
-int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl);
+int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl, bool force_link_train);
int dp_ctrl_off_link_stream(struct dp_ctrl *dp_ctrl);
int dp_ctrl_off_link(struct dp_ctrl *dp_ctrl);
int dp_ctrl_off(struct dp_ctrl *dp_ctrl);
struct msm_drm_private *priv = dev_get_drvdata(master);
/* disable all HPD interrupts */
- dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_INT_MASK, false);
+ if (dp->core_initialized)
+ dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_INT_MASK, false);
kthread_stop(dp->ev_tsk);
dp_power_client_deinit(dp->power);
dp_aux_unregister(dp->aux);
+ dp->drm_dev = NULL;
+ dp->aux->drm_dev = NULL;
priv->dp[dp->id] = NULL;
}
return 0;
}
- rc = dp_ctrl_on_stream(dp->ctrl);
+ rc = dp_ctrl_on_stream(dp->ctrl, data);
if (!rc)
dp_display->power_on = true;
int rc = 0;
struct dp_display_private *dp_display;
u32 state;
+ bool force_link_train = false;
dp_display = container_of(dp, struct dp_display_private, dp_display);
if (!dp_display->dp_mode.drm_mode.clock) {
state = dp_display->hpd_state;
- if (state == ST_DISPLAY_OFF)
+ if (state == ST_DISPLAY_OFF) {
dp_display_host_phy_init(dp_display);
+ force_link_train = true;
+ }
- dp_display_enable(dp_display, 0);
+ dp_display_enable(dp_display, force_link_train);
rc = dp_display_post_enable(dp);
if (rc) {
dp_display_unprepare(dp);
}
- /* manual kick off plug event to train link */
- if (state == ST_DISPLAY_OFF)
- dp_add_event(dp_display, EV_IRQ_HPD_INT, 0, 0);
-
/* completed connection */
dp_display->hpd_state = ST_CONNECTED;
.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
.gem_prime_import_sg_table = msm_gem_prime_import_sg_table,
- .gem_prime_mmap = drm_gem_prime_mmap,
+ .gem_prime_mmap = msm_gem_prime_mmap,
#ifdef CONFIG_DEBUG_FS
.debugfs_init = msm_debugfs_init,
#endif
void msm_gem_shrinker_init(struct drm_device *dev);
void msm_gem_shrinker_cleanup(struct drm_device *dev);
+int msm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj);
int msm_gem_prime_vmap(struct drm_gem_object *obj, struct iosys_map *map);
void msm_gem_prime_vunmap(struct drm_gem_object *obj, struct iosys_map *map);
(int32_t)(*fctx->fenceptr - fence) >= 0;
}
-/* called from workqueue */
+/* called from irq handler and workqueue (in recover path) */
void msm_update_fence(struct msm_fence_context *fctx, uint32_t fence)
{
- spin_lock(&fctx->spinlock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&fctx->spinlock, flags);
fctx->completed_fence = max(fence, fctx->completed_fence);
- spin_unlock(&fctx->spinlock);
+ spin_unlock_irqrestore(&fctx->spinlock, flags);
}
struct msm_fence {
return ret;
}
-void msm_gem_unpin_vma_locked(struct drm_gem_object *obj, struct msm_gem_vma *vma)
+void msm_gem_unpin_locked(struct drm_gem_object *obj)
{
struct msm_gem_object *msm_obj = to_msm_bo(obj);
GEM_WARN_ON(!msm_gem_is_locked(obj));
- msm_gem_unpin_vma(vma);
-
msm_obj->pin_count--;
GEM_WARN_ON(msm_obj->pin_count < 0);
msm_gem_lock(obj);
vma = lookup_vma(obj, aspace);
if (!GEM_WARN_ON(!vma)) {
- msm_gem_unpin_vma_locked(obj, vma);
+ msm_gem_unpin_vma(vma);
+ msm_gem_unpin_locked(obj);
}
msm_gem_unlock(obj);
}
uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj);
int msm_gem_pin_vma_locked(struct drm_gem_object *obj, struct msm_gem_vma *vma);
-void msm_gem_unpin_vma_locked(struct drm_gem_object *obj, struct msm_gem_vma *vma);
+void msm_gem_unpin_locked(struct drm_gem_object *obj);
struct msm_gem_vma *msm_gem_get_vma_locked(struct drm_gem_object *obj,
struct msm_gem_address_space *aspace);
int msm_gem_get_iova(struct drm_gem_object *obj,
} *cmd; /* array of size nr_cmds */
struct {
/* make sure these don't conflict w/ MSM_SUBMIT_BO_x */
-#define BO_VALID 0x8000 /* is current addr in cmdstream correct/valid? */
-#define BO_LOCKED 0x4000 /* obj lock is held */
-#define BO_ACTIVE 0x2000 /* active refcnt is held */
-#define BO_PINNED 0x1000 /* obj is pinned and on active list */
+#define BO_VALID 0x8000 /* is current addr in cmdstream correct/valid? */
+#define BO_LOCKED 0x4000 /* obj lock is held */
+#define BO_ACTIVE 0x2000 /* active refcnt is held */
+#define BO_OBJ_PINNED 0x1000 /* obj (pages) is pinned and on active list */
+#define BO_VMA_PINNED 0x0800 /* vma (virtual address) is pinned */
uint32_t flags;
union {
struct msm_gem_object *obj;
#include "msm_drv.h"
#include "msm_gem.h"
+int msm_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+{
+ int ret;
+
+ /* Ensure the mmap offset is initialized. We lazily initialize it,
+ * so if it has not been first mmap'd directly as a GEM object, the
+ * mmap offset will not be already initialized.
+ */
+ ret = drm_gem_create_mmap_offset(obj);
+ if (ret)
+ return ret;
+
+ return drm_gem_prime_mmap(obj, vma);
+}
+
struct sg_table *msm_gem_prime_get_sg_table(struct drm_gem_object *obj)
{
struct msm_gem_object *msm_obj = to_msm_bo(obj);
*/
submit->bos[i].flags &= ~cleanup_flags;
- if (flags & BO_PINNED)
- msm_gem_unpin_vma_locked(obj, submit->bos[i].vma);
+ if (flags & BO_VMA_PINNED)
+ msm_gem_unpin_vma(submit->bos[i].vma);
+
+ if (flags & BO_OBJ_PINNED)
+ msm_gem_unpin_locked(obj);
if (flags & BO_ACTIVE)
msm_gem_active_put(obj);
static void submit_unlock_unpin_bo(struct msm_gem_submit *submit, int i)
{
- submit_cleanup_bo(submit, i, BO_PINNED | BO_ACTIVE | BO_LOCKED);
+ unsigned cleanup_flags = BO_VMA_PINNED | BO_OBJ_PINNED |
+ BO_ACTIVE | BO_LOCKED;
+ submit_cleanup_bo(submit, i, cleanup_flags);
if (!(submit->bos[i].flags & BO_VALID))
submit->bos[i].iova = 0;
if (ret)
break;
- submit->bos[i].flags |= BO_PINNED;
+ submit->bos[i].flags |= BO_OBJ_PINNED | BO_VMA_PINNED;
submit->bos[i].vma = vma;
if (vma->iova == submit->bos[i].iova) {
unsigned i;
if (error)
- cleanup_flags |= BO_PINNED | BO_ACTIVE;
+ cleanup_flags |= BO_VMA_PINNED | BO_OBJ_PINNED | BO_ACTIVE;
for (i = 0; i < submit->nr_bos; i++) {
struct msm_gem_object *msm_obj = submit->bos[i].obj;
struct drm_gem_object *obj = &submit->bos[i].obj->base;
msm_gem_lock(obj);
- submit_cleanup_bo(submit, i, BO_PINNED | BO_ACTIVE);
+ /* Note, VMA already fence-unpinned before submit: */
+ submit_cleanup_bo(submit, i, BO_OBJ_PINNED | BO_ACTIVE);
msm_gem_unlock(obj);
drm_gem_object_put(obj);
}
INT_MAX, GFP_KERNEL);
}
if (submit->fence_id < 0) {
- ret = submit->fence_id = 0;
+ ret = submit->fence_id;
submit->fence_id = 0;
}
unsigned size = vma->node.size;
/* Print a message if we try to purge a vma in use */
- if (GEM_WARN_ON(msm_gem_vma_inuse(vma)))
- return;
+ GEM_WARN_ON(msm_gem_vma_inuse(vma));
/* Don't do anything if the memory isn't mapped */
if (!vma->mapped)
void msm_gem_close_vma(struct msm_gem_address_space *aspace,
struct msm_gem_vma *vma)
{
- if (GEM_WARN_ON(msm_gem_vma_inuse(vma) || vma->mapped))
- return;
+ GEM_WARN_ON(msm_gem_vma_inuse(vma) || vma->mapped);
spin_lock(&aspace->lock);
if (vma->iova)
return ret;
}
-static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
- uint32_t fence)
-{
- struct msm_gem_submit *submit;
- unsigned long flags;
-
- spin_lock_irqsave(&ring->submit_lock, flags);
- list_for_each_entry(submit, &ring->submits, node) {
- if (fence_after(submit->seqno, fence))
- break;
-
- msm_update_fence(submit->ring->fctx,
- submit->hw_fence->seqno);
- dma_fence_signal(submit->hw_fence);
- }
- spin_unlock_irqrestore(&ring->submit_lock, flags);
-}
-
#ifdef CONFIG_DEV_COREDUMP
static ssize_t msm_gpu_devcoredump_read(char *buffer, loff_t offset,
size_t count, void *data, size_t datalen)
* one more to clear the faulting submit
*/
if (ring == cur_ring)
- fence++;
+ ring->memptrs->fence = ++fence;
- update_fences(gpu, ring, fence);
+ msm_update_fence(ring->fctx, fence);
}
if (msm_gpu_active(gpu)) {
msm_submit_retire(submit);
pm_runtime_mark_last_busy(&gpu->pdev->dev);
- pm_runtime_put_autosuspend(&gpu->pdev->dev);
spin_lock_irqsave(&ring->submit_lock, flags);
list_del(&submit->node);
msm_devfreq_idle(gpu);
mutex_unlock(&gpu->active_lock);
+ pm_runtime_put_autosuspend(&gpu->pdev->dev);
+
msm_gem_submit_put(submit);
}
int i;
for (i = 0; i < gpu->nr_rings; i++)
- update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence);
+ msm_update_fence(gpu->rb[i]->fctx, gpu->rb[i]->memptrs->fence);
kthread_queue_work(gpu->worker, &gpu->retire_work);
update_sw_cntrs(gpu);
u64 addr = iova;
unsigned int i;
- for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+ for_each_sgtable_sg(sgt, sg, i) {
size_t size = sg->length;
phys_addr_t phys = sg_phys(sg);
msm_gem_lock(obj);
msm_gem_unpin_vma_fenced(submit->bos[i].vma, fctx);
- submit->bos[i].flags &= ~BO_PINNED;
+ submit->bos[i].flags &= ~BO_VMA_PINNED;
msm_gem_unlock(obj);
}
goto out_free_dma;
for (i = 0; i < npages; i += max) {
- args.end = start + (max << PAGE_SHIFT);
+ if (args.start + (max << PAGE_SHIFT) > end)
+ args.end = end;
+ else
+ args.end = args.start + (max << PAGE_SHIFT);
+
ret = migrate_vma_setup(&args);
if (ret)
goto out_free_pfns;
of_property_read_u32(dev->of_node, "hpd-reliable-delay-ms", &reliable_ms);
desc->delay.hpd_reliable = reliable_ms;
of_property_read_u32(dev->of_node, "hpd-absent-delay-ms", &absent_ms);
- desc->delay.hpd_reliable = absent_ms;
+ desc->delay.hpd_absent = absent_ms;
/* Power the panel on so we can read the EDID */
ret = pm_runtime_get_sync(dev);
struct drm_file *file)
{
struct panfrost_device *pfdev = dev->dev_private;
+ struct panfrost_file_priv *file_priv = file->driver_priv;
struct drm_panfrost_submit *args = data;
struct drm_syncobj *sync_out = NULL;
struct panfrost_job *job;
job->jc = args->jc;
job->requirements = args->requirements;
job->flush_id = panfrost_gpu_get_latest_flush_id(pfdev);
- job->file_priv = file->driver_priv;
+ job->mmu = file_priv->mmu;
slot = panfrost_job_get_slot(job);
ret = drm_sched_job_init(&job->base,
- &job->file_priv->sched_entity[slot],
+ &file_priv->sched_entity[slot],
NULL);
if (ret)
goto out_put_job;
if (args->retained) {
if (args->madv == PANFROST_MADV_DONTNEED)
- list_add_tail(&bo->base.madv_list,
- &pfdev->shrinker_list);
+ list_move_tail(&bo->base.madv_list,
+ &pfdev->shrinker_list);
else if (args->madv == PANFROST_MADV_WILLNEED)
list_del_init(&bo->base.madv_list);
}
return;
}
- cfg = panfrost_mmu_as_get(pfdev, job->file_priv->mmu);
+ cfg = panfrost_mmu_as_get(pfdev, job->mmu);
job_write(pfdev, JS_HEAD_NEXT_LO(js), lower_32_bits(jc_head));
job_write(pfdev, JS_HEAD_NEXT_HI(js), upper_32_bits(jc_head));
job->jc = 0;
}
- panfrost_mmu_as_put(pfdev, job->file_priv->mmu);
+ panfrost_mmu_as_put(pfdev, job->mmu);
panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
if (signal_fence)
* happen when we receive the DONE interrupt while doing a GPU reset).
*/
job->jc = 0;
- panfrost_mmu_as_put(pfdev, job->file_priv->mmu);
+ panfrost_mmu_as_put(pfdev, job->mmu);
panfrost_devfreq_record_idle(&pfdev->pfdevfreq);
dma_fence_signal_locked(job->done_fence);
struct kref refcount;
struct panfrost_device *pfdev;
- struct panfrost_file_priv *file_priv;
+ struct panfrost_mmu *mmu;
/* Fence to be signaled by IRQ handler when the job is complete. */
struct dma_fence *done_fence;
err_pages:
drm_gem_shmem_put_pages(&bo->base);
err_bo:
- drm_gem_object_put(&bo->base.base);
+ panfrost_gem_mapping_put(bomapping);
return ret;
}
#include <drm/drm_probe_helper.h>
#include <drm/drm_vblank.h>
+#if defined(CONFIG_ARM_DMA_USE_IOMMU)
+#include <asm/dma-iommu.h>
+#else
+#define arm_iommu_detach_device(...) ({ })
+#define arm_iommu_release_mapping(...) ({ })
+#define to_dma_iommu_mapping(dev) NULL
+#endif
+
#include "rockchip_drm_drv.h"
#include "rockchip_drm_fb.h"
#include "rockchip_drm_gem.h"
if (!private->domain)
return 0;
+ if (IS_ENABLED(CONFIG_ARM_DMA_USE_IOMMU)) {
+ struct dma_iommu_mapping *mapping = to_dma_iommu_mapping(dev);
+
+ if (mapping) {
+ arm_iommu_detach_device(dev);
+ arm_iommu_release_mapping(mapping);
+ }
+ }
+
ret = iommu_attach_device(private->domain, dev);
if (ret) {
DRM_DEV_ERROR(dev, "Failed to attach iommu device\n");
}
EXPORT_SYMBOL(drm_sched_entity_flush);
-static void drm_sched_entity_kill_jobs_irq_work(struct irq_work *wrk)
+static void drm_sched_entity_kill_jobs_work(struct work_struct *wrk)
{
struct drm_sched_job *job = container_of(wrk, typeof(*job), work);
struct drm_sched_job *job = container_of(cb, struct drm_sched_job,
finish_cb);
- init_irq_work(&job->work, drm_sched_entity_kill_jobs_irq_work);
- irq_work_queue(&job->work);
+ INIT_WORK(&job->work, drm_sched_entity_kill_jobs_work);
+ schedule_work(&job->work);
}
static struct dma_fence *
/* Set precharge period in number of ticks from the internal clock */
precharge = (SSD130X_SET_PRECHARGE_PERIOD1_SET(ssd130x->prechargep1) |
- SSD130X_SET_PRECHARGE_PERIOD1_SET(ssd130x->prechargep2));
+ SSD130X_SET_PRECHARGE_PERIOD2_SET(ssd130x->prechargep2));
ret = ssd130x_write_cmd(ssd130x, 2, SSD130X_SET_PRECHARGE_PERIOD, precharge);
if (ret < 0)
return ret;
*/
#include <linux/component.h>
+#include <linux/dma-mapping.h>
#include <linux/kfifo.h>
#include <linux/module.h>
#include <linux/of_graph.h>
goto free_drm;
}
- dev_set_drvdata(dev, drm);
drm->dev_private = drv;
INIT_LIST_HEAD(&drv->frontend_list);
INIT_LIST_HEAD(&drv->engine_list);
drm_fbdev_generic_setup(drm, 32);
+ dev_set_drvdata(dev, drm);
+
return 0;
finish_poll:
{
struct drm_device *drm = dev_get_drvdata(dev);
+ dev_set_drvdata(dev, NULL);
drm_dev_unregister(drm);
drm_kms_helper_poll_fini(drm);
drm_atomic_helper_shutdown(drm);
INIT_KFIFO(list.fifo);
+ /*
+ * DE2 and DE3 cores actually supports 40-bit addresses, but
+ * driver does not.
+ */
+ dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
+ dma_set_max_seg_size(&pdev->dev, UINT_MAX);
+
for (i = 0;; i++) {
struct device_node *pipeline = of_parse_phandle(np,
"allwinner,pipelines",
struct sun4i_layer *layer = plane_to_sun4i_layer(plane);
if (IS_ERR_OR_NULL(layer->backend->frontend))
- sun4i_backend_format_is_supported(format, modifier);
+ return sun4i_backend_format_is_supported(format, modifier);
return sun4i_backend_format_is_supported(format, modifier) ||
sun4i_frontend_format_is_supported(format, modifier);
return crtcs;
}
-static int sun8i_dw_hdmi_find_connector_pdev(struct device *dev,
- struct platform_device **pdev_out)
-{
- struct platform_device *pdev;
- struct device_node *remote;
-
- remote = of_graph_get_remote_node(dev->of_node, 1, -1);
- if (!remote)
- return -ENODEV;
-
- if (!of_device_is_compatible(remote, "hdmi-connector")) {
- of_node_put(remote);
- return -ENODEV;
- }
-
- pdev = of_find_device_by_node(remote);
- of_node_put(remote);
- if (!pdev)
- return -ENODEV;
-
- *pdev_out = pdev;
- return 0;
-}
-
static int sun8i_dw_hdmi_bind(struct device *dev, struct device *master,
void *data)
{
- struct platform_device *pdev = to_platform_device(dev), *connector_pdev;
+ struct platform_device *pdev = to_platform_device(dev);
struct dw_hdmi_plat_data *plat_data;
struct drm_device *drm = data;
struct device_node *phy_node;
return dev_err_probe(dev, PTR_ERR(hdmi->regulator),
"Couldn't get regulator\n");
- ret = sun8i_dw_hdmi_find_connector_pdev(dev, &connector_pdev);
- if (!ret) {
- hdmi->ddc_en = gpiod_get_optional(&connector_pdev->dev,
- "ddc-en", GPIOD_OUT_HIGH);
- platform_device_put(connector_pdev);
-
- if (IS_ERR(hdmi->ddc_en)) {
- dev_err(dev, "Couldn't get ddc-en gpio\n");
- return PTR_ERR(hdmi->ddc_en);
- }
- }
-
ret = regulator_enable(hdmi->regulator);
if (ret) {
dev_err(dev, "Failed to enable regulator\n");
- goto err_unref_ddc_en;
+ return ret;
}
- gpiod_set_value(hdmi->ddc_en, 1);
-
ret = reset_control_deassert(hdmi->rst_ctrl);
if (ret) {
dev_err(dev, "Could not deassert ctrl reset control\n");
- goto err_disable_ddc_en;
+ goto err_disable_regulator;
}
ret = clk_prepare_enable(hdmi->clk_tmds);
clk_disable_unprepare(hdmi->clk_tmds);
err_assert_ctrl_reset:
reset_control_assert(hdmi->rst_ctrl);
-err_disable_ddc_en:
- gpiod_set_value(hdmi->ddc_en, 0);
+err_disable_regulator:
regulator_disable(hdmi->regulator);
-err_unref_ddc_en:
- if (hdmi->ddc_en)
- gpiod_put(hdmi->ddc_en);
return ret;
}
sun8i_hdmi_phy_deinit(hdmi->phy);
clk_disable_unprepare(hdmi->clk_tmds);
reset_control_assert(hdmi->rst_ctrl);
- gpiod_set_value(hdmi->ddc_en, 0);
regulator_disable(hdmi->regulator);
-
- if (hdmi->ddc_en)
- gpiod_put(hdmi->ddc_en);
}
static const struct component_ops sun8i_dw_hdmi_ops = {
#include <drm/bridge/dw_hdmi.h>
#include <drm/drm_encoder.h>
#include <linux/clk.h>
-#include <linux/gpio/consumer.h>
#include <linux/regmap.h>
#include <linux/regulator/consumer.h>
#include <linux/reset.h>
struct regulator *regulator;
const struct sun8i_dw_hdmi_quirks *quirks;
struct reset_control *rst_ctrl;
- struct gpio_desc *ddc_en;
};
extern struct platform_driver sun8i_hdmi_phy_driver;
.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
};
-static int
+static enum drm_mode_status
simpledrm_simple_display_pipe_mode_valid(struct drm_simple_display_pipe *pipe,
const struct drm_display_mode *mode)
{
return;
spin_lock(&bo->bdev->lru_lock);
- if (bo->bulk_move && bo->resource)
- ttm_lru_bulk_move_del(bo->bulk_move, bo->resource);
+ if (bo->resource)
+ ttm_resource_del_bulk_move(bo->resource, bo);
bo->bulk_move = bulk;
- if (bo->bulk_move && bo->resource)
- ttm_lru_bulk_move_add(bo->bulk_move, bo->resource);
+ if (bo->resource)
+ ttm_resource_add_bulk_move(bo->resource, bo);
spin_unlock(&bo->bdev->lru_lock);
}
EXPORT_SYMBOL(ttm_bo_set_bulk_move);
{
dma_resv_assert_held(bo->base.resv);
WARN_ON_ONCE(!kref_read(&bo->kref));
- if (!(bo->pin_count++) && bo->bulk_move && bo->resource)
- ttm_lru_bulk_move_del(bo->bulk_move, bo->resource);
+ spin_lock(&bo->bdev->lru_lock);
+ if (bo->resource)
+ ttm_resource_del_bulk_move(bo->resource, bo);
+ ++bo->pin_count;
+ spin_unlock(&bo->bdev->lru_lock);
}
EXPORT_SYMBOL(ttm_bo_pin);
if (WARN_ON_ONCE(!bo->pin_count))
return;
- if (!(--bo->pin_count) && bo->bulk_move && bo->resource)
- ttm_lru_bulk_move_add(bo->bulk_move, bo->resource);
+ spin_lock(&bo->bdev->lru_lock);
+ --bo->pin_count;
+ if (bo->resource)
+ ttm_resource_add_bulk_move(bo->resource, bo);
+ spin_unlock(&bo->bdev->lru_lock);
}
EXPORT_SYMBOL(ttm_bo_unpin);
ttm_resource_manager_for_each_res(man, &cursor, res) {
struct ttm_buffer_object *bo = res->bo;
- uint32_t num_pages = PFN_UP(bo->base.size);
+ uint32_t num_pages;
+ if (!bo)
+ continue;
+
+ num_pages = PFN_UP(bo->base.size);
ret = ttm_bo_swapout(bo, ctx, gfp_flags);
/* ttm_bo_swapout has dropped the lru_lock */
if (!ret)
}
/* Add the resource to a bulk_move cursor */
-void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk,
- struct ttm_resource *res)
+static void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk,
+ struct ttm_resource *res)
{
struct ttm_lru_bulk_move_pos *pos = ttm_lru_bulk_move_pos(bulk, res);
}
/* Remove the resource from a bulk_move range */
-void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk,
- struct ttm_resource *res)
+static void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk,
+ struct ttm_resource *res)
{
struct ttm_lru_bulk_move_pos *pos = ttm_lru_bulk_move_pos(bulk, res);
}
}
+/* Add the resource to a bulk move if the BO is configured for it */
+void ttm_resource_add_bulk_move(struct ttm_resource *res,
+ struct ttm_buffer_object *bo)
+{
+ if (bo->bulk_move && !bo->pin_count)
+ ttm_lru_bulk_move_add(bo->bulk_move, res);
+}
+
+/* Remove the resource from a bulk move if the BO is configured for it */
+void ttm_resource_del_bulk_move(struct ttm_resource *res,
+ struct ttm_buffer_object *bo)
+{
+ if (bo->bulk_move && !bo->pin_count)
+ ttm_lru_bulk_move_del(bo->bulk_move, res);
+}
+
/* Move a resource to the LRU or bulk tail */
void ttm_resource_move_to_lru_tail(struct ttm_resource *res)
{
res->bus.is_iomem = false;
res->bus.caching = ttm_cached;
res->bo = bo;
- INIT_LIST_HEAD(&res->lru);
man = ttm_manager_type(bo->bdev, place->mem_type);
spin_lock(&bo->bdev->lru_lock);
- man->usage += res->num_pages << PAGE_SHIFT;
- if (bo->bulk_move)
- ttm_lru_bulk_move_add(bo->bulk_move, res);
+ if (bo->pin_count)
+ list_add_tail(&res->lru, &bo->bdev->pinned);
else
- ttm_resource_move_to_lru_tail(res);
+ list_add_tail(&res->lru, &man->lru[bo->priority]);
+ man->usage += res->num_pages << PAGE_SHIFT;
spin_unlock(&bo->bdev->lru_lock);
}
EXPORT_SYMBOL(ttm_resource_init);
{
struct ttm_resource_manager *man =
ttm_manager_type(bo->bdev, place->mem_type);
+ int ret;
+
+ ret = man->func->alloc(man, bo, place, res_ptr);
+ if (ret)
+ return ret;
- return man->func->alloc(man, bo, place, res_ptr);
+ spin_lock(&bo->bdev->lru_lock);
+ ttm_resource_add_bulk_move(*res_ptr, bo);
+ spin_unlock(&bo->bdev->lru_lock);
+ return 0;
}
void ttm_resource_free(struct ttm_buffer_object *bo, struct ttm_resource **res)
if (!*res)
return;
- if (bo->bulk_move) {
- spin_lock(&bo->bdev->lru_lock);
- ttm_lru_bulk_move_del(bo->bulk_move, *res);
- spin_unlock(&bo->bdev->lru_lock);
- }
-
+ spin_lock(&bo->bdev->lru_lock);
+ ttm_resource_del_bulk_move(*res, bo);
+ spin_unlock(&bo->bdev->lru_lock);
man = ttm_manager_type(bo->bdev, (*res)->mem_type);
man->func->free(man, *res);
*res = NULL;
{
struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev);
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return;
+
mutex_lock(&vc4->purgeable.lock);
list_add_tail(&bo->size_head, &vc4->purgeable.list);
vc4->purgeable.num++;
{
struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev);
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return;
+
/* list_del_init() is used here because the caller might release
* the purgeable lock in order to acquire the madv one and update the
* madv status.
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct vc4_bo *bo;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return ERR_PTR(-ENODEV);
+
bo = kzalloc(sizeof(*bo), GFP_KERNEL);
if (!bo)
return ERR_PTR(-ENOMEM);
struct drm_gem_cma_object *cma_obj;
struct vc4_bo *bo;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return ERR_PTR(-ENODEV);
+
if (size == 0)
return ERR_PTR(-EINVAL);
return bo;
}
-int vc4_dumb_create(struct drm_file *file_priv,
- struct drm_device *dev,
- struct drm_mode_create_dumb *args)
+int vc4_bo_dumb_create(struct drm_file *file_priv,
+ struct drm_device *dev,
+ struct drm_mode_create_dumb *args)
{
- int min_pitch = DIV_ROUND_UP(args->width * args->bpp, 8);
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
struct vc4_bo *bo = NULL;
int ret;
- if (args->pitch < min_pitch)
- args->pitch = min_pitch;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
- if (args->size < args->pitch * args->height)
- args->size = args->pitch * args->height;
+ ret = vc4_dumb_fixup_args(args);
+ if (ret)
+ return ret;
bo = vc4_bo_create(dev, args->size, false, VC4_BO_TYPE_DUMB);
if (IS_ERR(bo))
int vc4_bo_inc_usecnt(struct vc4_bo *bo)
{
+ struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev);
int ret;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
/* Fast path: if the BO is already retained by someone, no need to
* check the madv status.
*/
void vc4_bo_dec_usecnt(struct vc4_bo *bo)
{
+ struct vc4_dev *vc4 = to_vc4_dev(bo->base.base.dev);
+
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return;
+
/* Fast path: if the BO is still retained by someone, no need to test
* the madv value.
*/
struct vc4_bo *bo = NULL;
int ret;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
ret = vc4_grab_bin_bo(vc4, vc4file);
if (ret)
return ret;
int vc4_mmap_bo_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
struct drm_vc4_mmap_bo *args = data;
struct drm_gem_object *gem_obj;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
gem_obj = drm_gem_object_lookup(file_priv, args->handle);
if (!gem_obj) {
DRM_DEBUG("Failed to look up GEM BO %d\n", args->handle);
struct vc4_bo *bo = NULL;
int ret;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
if (args->size == 0)
return -EINVAL;
int vc4_set_tiling_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
struct drm_vc4_set_tiling *args = data;
struct drm_gem_object *gem_obj;
struct vc4_bo *bo;
bool t_format;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
if (args->flags != 0)
return -EINVAL;
int vc4_get_tiling_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
struct drm_vc4_get_tiling *args = data;
struct drm_gem_object *gem_obj;
struct vc4_bo *bo;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
if (args->flags != 0 || args->modifier != 0)
return -EINVAL;
struct vc4_dev *vc4 = to_vc4_dev(dev);
int i;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
/* Create the initial set of BO labels that the kernel will
* use. This lets us avoid a bunch of string reallocation in
* the kernel's draw and BO allocation paths.
struct drm_gem_object *gem_obj;
int ret = 0, label;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
if (!args->len)
return -EINVAL;
* Removing 1 from the FIFO full level however
* seems to completely remove that issue.
*/
- if (!vc4->hvs->hvs5)
+ if (!vc4->is_vc5)
return fifo_len_bytes - 3 * HVS_FIFO_LATENCY_PIX - 1;
return fifo_len_bytes - 3 * HVS_FIFO_LATENCY_PIX;
if (is_dsi)
CRTC_WRITE(PV_HACT_ACT, mode->hdisplay * pixel_rep);
- if (vc4->hvs->hvs5)
+ if (vc4->is_vc5)
CRTC_WRITE(PV_MUX_CFG,
VC4_SET_FIELD(PV_MUX_CFG_RGB_PIXEL_MUX_MODE_NO_SWAP,
PV_MUX_CFG_RGB_PIXEL_MUX_MODE));
struct drm_framebuffer *old_fb;
struct drm_pending_vblank_event *event;
- struct vc4_seqno_cb cb;
+ union {
+ struct dma_fence_cb fence;
+ struct vc4_seqno_cb seqno;
+ } cb;
};
/* Called when the V3D execution for the BO being flipped to is done, so that
* we can actually update the plane's address to point to it.
*/
static void
-vc4_async_page_flip_complete(struct vc4_seqno_cb *cb)
+vc4_async_page_flip_complete(struct vc4_async_flip_state *flip_state)
{
- struct vc4_async_flip_state *flip_state =
- container_of(cb, struct vc4_async_flip_state, cb);
struct drm_crtc *crtc = flip_state->crtc;
struct drm_device *dev = crtc->dev;
struct drm_plane *plane = crtc->primary;
drm_crtc_vblank_put(crtc);
drm_framebuffer_put(flip_state->fb);
- /* Decrement the BO usecnt in order to keep the inc/dec calls balanced
- * when the planes are updated through the async update path.
- * FIXME: we should move to generic async-page-flip when it's
- * available, so that we can get rid of this hand-made cleanup_fb()
- * logic.
- */
- if (flip_state->old_fb) {
- struct drm_gem_cma_object *cma_bo;
- struct vc4_bo *bo;
+ if (flip_state->old_fb)
+ drm_framebuffer_put(flip_state->old_fb);
+
+ kfree(flip_state);
+}
+
+static void vc4_async_page_flip_seqno_complete(struct vc4_seqno_cb *cb)
+{
+ struct vc4_async_flip_state *flip_state =
+ container_of(cb, struct vc4_async_flip_state, cb.seqno);
+ struct vc4_bo *bo = NULL;
- cma_bo = drm_fb_cma_get_gem_obj(flip_state->old_fb, 0);
+ if (flip_state->old_fb) {
+ struct drm_gem_cma_object *cma_bo =
+ drm_fb_cma_get_gem_obj(flip_state->old_fb, 0);
bo = to_vc4_bo(&cma_bo->base);
- vc4_bo_dec_usecnt(bo);
- drm_framebuffer_put(flip_state->old_fb);
}
- kfree(flip_state);
+ vc4_async_page_flip_complete(flip_state);
+
+ /*
+ * Decrement the BO usecnt in order to keep the inc/dec
+ * calls balanced when the planes are updated through
+ * the async update path.
+ *
+ * FIXME: we should move to generic async-page-flip when
+ * it's available, so that we can get rid of this
+ * hand-made cleanup_fb() logic.
+ */
+ if (bo)
+ vc4_bo_dec_usecnt(bo);
}
-/* Implements async (non-vblank-synced) page flips.
- *
- * The page flip ioctl needs to return immediately, so we grab the
- * modeset semaphore on the pipe, and queue the address update for
- * when V3D is done with the BO being flipped to.
- */
-static int vc4_async_page_flip(struct drm_crtc *crtc,
- struct drm_framebuffer *fb,
- struct drm_pending_vblank_event *event,
- uint32_t flags)
+static void vc4_async_page_flip_fence_complete(struct dma_fence *fence,
+ struct dma_fence_cb *cb)
{
- struct drm_device *dev = crtc->dev;
- struct drm_plane *plane = crtc->primary;
- int ret = 0;
- struct vc4_async_flip_state *flip_state;
+ struct vc4_async_flip_state *flip_state =
+ container_of(cb, struct vc4_async_flip_state, cb.fence);
+
+ vc4_async_page_flip_complete(flip_state);
+ dma_fence_put(fence);
+}
+
+static int vc4_async_set_fence_cb(struct drm_device *dev,
+ struct vc4_async_flip_state *flip_state)
+{
+ struct drm_framebuffer *fb = flip_state->fb;
struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0);
- struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct dma_fence *fence;
+ int ret;
- /* Increment the BO usecnt here, so that we never end up with an
- * unbalanced number of vc4_bo_{dec,inc}_usecnt() calls when the
- * plane is later updated through the non-async path.
- * FIXME: we should move to generic async-page-flip when it's
- * available, so that we can get rid of this hand-made prepare_fb()
- * logic.
- */
- ret = vc4_bo_inc_usecnt(bo);
+ if (!vc4->is_vc5) {
+ struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
+
+ return vc4_queue_seqno_cb(dev, &flip_state->cb.seqno, bo->seqno,
+ vc4_async_page_flip_seqno_complete);
+ }
+
+ ret = dma_resv_get_singleton(cma_bo->base.resv, DMA_RESV_USAGE_READ, &fence);
if (ret)
return ret;
+ /* If there's no fence, complete the page flip immediately */
+ if (!fence) {
+ vc4_async_page_flip_fence_complete(fence, &flip_state->cb.fence);
+ return 0;
+ }
+
+ /* If the fence has already been completed, complete the page flip */
+ if (dma_fence_add_callback(fence, &flip_state->cb.fence,
+ vc4_async_page_flip_fence_complete))
+ vc4_async_page_flip_fence_complete(fence, &flip_state->cb.fence);
+
+ return 0;
+}
+
+static int
+vc4_async_page_flip_common(struct drm_crtc *crtc,
+ struct drm_framebuffer *fb,
+ struct drm_pending_vblank_event *event,
+ uint32_t flags)
+{
+ struct drm_device *dev = crtc->dev;
+ struct drm_plane *plane = crtc->primary;
+ struct vc4_async_flip_state *flip_state;
+
flip_state = kzalloc(sizeof(*flip_state), GFP_KERNEL);
- if (!flip_state) {
- vc4_bo_dec_usecnt(bo);
+ if (!flip_state)
return -ENOMEM;
- }
drm_framebuffer_get(fb);
flip_state->fb = fb;
*/
drm_atomic_set_fb_for_plane(plane->state, fb);
- vc4_queue_seqno_cb(dev, &flip_state->cb, bo->seqno,
- vc4_async_page_flip_complete);
+ vc4_async_set_fence_cb(dev, flip_state);
/* Driver takes ownership of state on successful async commit. */
return 0;
}
+/* Implements async (non-vblank-synced) page flips.
+ *
+ * The page flip ioctl needs to return immediately, so we grab the
+ * modeset semaphore on the pipe, and queue the address update for
+ * when V3D is done with the BO being flipped to.
+ */
+static int vc4_async_page_flip(struct drm_crtc *crtc,
+ struct drm_framebuffer *fb,
+ struct drm_pending_vblank_event *event,
+ uint32_t flags)
+{
+ struct drm_device *dev = crtc->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+ struct drm_gem_cma_object *cma_bo = drm_fb_cma_get_gem_obj(fb, 0);
+ struct vc4_bo *bo = to_vc4_bo(&cma_bo->base);
+ int ret;
+
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
+ /*
+ * Increment the BO usecnt here, so that we never end up with an
+ * unbalanced number of vc4_bo_{dec,inc}_usecnt() calls when the
+ * plane is later updated through the non-async path.
+ *
+ * FIXME: we should move to generic async-page-flip when
+ * it's available, so that we can get rid of this
+ * hand-made prepare_fb() logic.
+ */
+ ret = vc4_bo_inc_usecnt(bo);
+ if (ret)
+ return ret;
+
+ ret = vc4_async_page_flip_common(crtc, fb, event, flags);
+ if (ret) {
+ vc4_bo_dec_usecnt(bo);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int vc5_async_page_flip(struct drm_crtc *crtc,
+ struct drm_framebuffer *fb,
+ struct drm_pending_vblank_event *event,
+ uint32_t flags)
+{
+ return vc4_async_page_flip_common(crtc, fb, event, flags);
+}
+
int vc4_page_flip(struct drm_crtc *crtc,
struct drm_framebuffer *fb,
struct drm_pending_vblank_event *event,
uint32_t flags,
struct drm_modeset_acquire_ctx *ctx)
{
- if (flags & DRM_MODE_PAGE_FLIP_ASYNC)
- return vc4_async_page_flip(crtc, fb, event, flags);
- else
+ if (flags & DRM_MODE_PAGE_FLIP_ASYNC) {
+ struct drm_device *dev = crtc->dev;
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
+
+ if (vc4->is_vc5)
+ return vc5_async_page_flip(crtc, fb, event, flags);
+ else
+ return vc4_async_page_flip(crtc, fb, event, flags);
+ } else {
return drm_atomic_helper_page_flip(crtc, fb, event, flags, ctx);
+ }
}
struct drm_crtc_state *vc4_crtc_duplicate_state(struct drm_crtc *crtc)
crtc_funcs, NULL);
drm_crtc_helper_add(crtc, crtc_helper_funcs);
- if (!vc4->hvs->hvs5) {
+ if (!vc4->is_vc5) {
drm_mode_crtc_set_gamma_size(crtc, ARRAY_SIZE(vc4_crtc->lut_r));
drm_crtc_enable_color_mgmt(crtc, 0, false, crtc->gamma_size);
return map;
}
+int vc4_dumb_fixup_args(struct drm_mode_create_dumb *args)
+{
+ int min_pitch = DIV_ROUND_UP(args->width * args->bpp, 8);
+
+ if (args->pitch < min_pitch)
+ args->pitch = min_pitch;
+
+ if (args->size < args->pitch * args->height)
+ args->size = args->pitch * args->height;
+
+ return 0;
+}
+
+static int vc5_dumb_create(struct drm_file *file_priv,
+ struct drm_device *dev,
+ struct drm_mode_create_dumb *args)
+{
+ int ret;
+
+ ret = vc4_dumb_fixup_args(args);
+ if (ret)
+ return ret;
+
+ return drm_gem_cma_dumb_create_internal(file_priv, dev, args);
+}
+
static int vc4_get_param_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
if (args->pad != 0)
return -EINVAL;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
if (!vc4->v3d)
return -ENODEV;
static int vc4_open(struct drm_device *dev, struct drm_file *file)
{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
struct vc4_file *vc4file;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
vc4file = kzalloc(sizeof(*vc4file), GFP_KERNEL);
if (!vc4file)
return -ENOMEM;
+ vc4file->dev = vc4;
vc4_perfmon_open_file(vc4file);
file->driver_priv = vc4file;
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct vc4_file *vc4file = file->driver_priv;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return;
+
if (vc4file->bin_bo_used)
vc4_v3d_bin_bo_put(vc4);
DRM_IOCTL_DEF_DRV(VC4_PERFMON_GET_VALUES, vc4_perfmon_get_values_ioctl, DRM_RENDER_ALLOW),
};
-static struct drm_driver vc4_drm_driver = {
+static const struct drm_driver vc4_drm_driver = {
.driver_features = (DRIVER_MODESET |
DRIVER_ATOMIC |
DRIVER_GEM |
.gem_create_object = vc4_create_object,
- DRM_GEM_CMA_DRIVER_OPS_WITH_DUMB_CREATE(vc4_dumb_create),
+ DRM_GEM_CMA_DRIVER_OPS_WITH_DUMB_CREATE(vc4_bo_dumb_create),
.ioctls = vc4_drm_ioctls,
.num_ioctls = ARRAY_SIZE(vc4_drm_ioctls),
.patchlevel = DRIVER_PATCHLEVEL,
};
+static const struct drm_driver vc5_drm_driver = {
+ .driver_features = (DRIVER_MODESET |
+ DRIVER_ATOMIC |
+ DRIVER_GEM),
+
+#if defined(CONFIG_DEBUG_FS)
+ .debugfs_init = vc4_debugfs_init,
+#endif
+
+ DRM_GEM_CMA_DRIVER_OPS_WITH_DUMB_CREATE(vc5_dumb_create),
+
+ .fops = &vc4_drm_fops,
+
+ .name = DRIVER_NAME,
+ .desc = DRIVER_DESC,
+ .date = DRIVER_DATE,
+ .major = DRIVER_MAJOR,
+ .minor = DRIVER_MINOR,
+ .patchlevel = DRIVER_PATCHLEVEL,
+};
+
static void vc4_match_add_drivers(struct device *dev,
struct component_match **match,
struct platform_driver *const *drivers,
static int vc4_drm_bind(struct device *dev)
{
struct platform_device *pdev = to_platform_device(dev);
+ const struct drm_driver *driver;
struct rpi_firmware *firmware = NULL;
struct drm_device *drm;
struct vc4_dev *vc4;
struct device_node *node;
struct drm_crtc *crtc;
+ bool is_vc5;
int ret = 0;
dev->coherent_dma_mask = DMA_BIT_MASK(32);
- /* If VC4 V3D is missing, don't advertise render nodes. */
- node = of_find_matching_node_and_match(NULL, vc4_v3d_dt_match, NULL);
- if (!node || !of_device_is_available(node))
- vc4_drm_driver.driver_features &= ~DRIVER_RENDER;
- of_node_put(node);
+ is_vc5 = of_device_is_compatible(dev->of_node, "brcm,bcm2711-vc5");
+ if (is_vc5)
+ driver = &vc5_drm_driver;
+ else
+ driver = &vc4_drm_driver;
- vc4 = devm_drm_dev_alloc(dev, &vc4_drm_driver, struct vc4_dev, base);
+ vc4 = devm_drm_dev_alloc(dev, driver, struct vc4_dev, base);
if (IS_ERR(vc4))
return PTR_ERR(vc4);
+ vc4->is_vc5 = is_vc5;
drm = &vc4->base;
platform_set_drvdata(pdev, drm);
INIT_LIST_HEAD(&vc4->debugfs_list);
- mutex_init(&vc4->bin_bo_lock);
+ if (!is_vc5) {
+ mutex_init(&vc4->bin_bo_lock);
- ret = vc4_bo_cache_init(drm);
- if (ret)
- return ret;
+ ret = vc4_bo_cache_init(drm);
+ if (ret)
+ return ret;
+ }
ret = drmm_mode_config_init(drm);
if (ret)
return ret;
- ret = vc4_gem_init(drm);
- if (ret)
- return ret;
+ if (!is_vc5) {
+ ret = vc4_gem_init(drm);
+ if (ret)
+ return ret;
+ }
node = of_find_compatible_node(NULL, NULL, "raspberrypi,bcm2835-firmware");
if (node) {
return -EPROBE_DEFER;
}
- ret = drm_aperture_remove_framebuffers(false, &vc4_drm_driver);
+ ret = drm_aperture_remove_framebuffers(false, driver);
if (ret)
return ret;
* done. This way, only events related to a specific job will be counted.
*/
struct vc4_perfmon {
+ struct vc4_dev *dev;
+
/* Tracks the number of users of the perfmon, when this counter reaches
* zero the perfmon is destroyed.
*/
struct vc4_dev {
struct drm_device base;
+ bool is_vc5;
+
unsigned int irq;
struct vc4_hvs *hvs;
};
struct vc4_hvs {
+ struct vc4_dev *vc4;
struct platform_device *pdev;
void __iomem *regs;
u32 __iomem *dlist;
struct drm_mm_node mitchell_netravali_filter;
struct debugfs_regset32 regset;
-
- /* HVS version 5 flag, therefore requires updated dlist structures */
- bool hvs5;
};
struct vc4_plane {
#define VC4_REG32(reg) { .name = #reg, .offset = reg }
struct vc4_exec_info {
+ struct vc4_dev *dev;
+
/* Sequence number for this bin/render job. */
uint64_t seqno;
* released when the DRM file is closed should be placed here.
*/
struct vc4_file {
+ struct vc4_dev *dev;
+
struct {
struct idr idr;
struct mutex lock;
struct drm_gem_object *vc4_create_object(struct drm_device *dev, size_t size);
struct vc4_bo *vc4_bo_create(struct drm_device *dev, size_t size,
bool from_cache, enum vc4_kernel_bo_type type);
-int vc4_dumb_create(struct drm_file *file_priv,
- struct drm_device *dev,
- struct drm_mode_create_dumb *args);
+int vc4_bo_dumb_create(struct drm_file *file_priv,
+ struct drm_device *dev,
+ struct drm_mode_create_dumb *args);
int vc4_create_bo_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv);
int vc4_create_shader_bo_ioctl(struct drm_device *dev, void *data,
/* vc4_drv.c */
void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index);
+int vc4_dumb_fixup_args(struct drm_mode_create_dumb *args);
/* vc4_dpi.c */
extern struct platform_driver vc4_dpi_driver;
u32 i;
int ret = 0;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
if (!vc4->v3d) {
DRM_DEBUG("VC4_GET_HANG_STATE with no VC4 V3D probed\n");
return -ENODEV;
unsigned long timeout_expire;
DEFINE_WAIT(wait);
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
if (vc4->finished_seqno >= seqno)
return 0;
struct vc4_dev *vc4 = to_vc4_dev(dev);
struct vc4_exec_info *exec;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return;
+
again:
exec = vc4_first_bin_job(vc4);
if (!exec)
if (!exec)
return;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return;
+
/* A previous RCL may have written to one of our textures, and
* our full cache flush at bin time may have occurred before
* that RCL completed. Flush the texture cache now, but not
struct vc4_dev *vc4 = to_vc4_dev(dev);
bool was_empty = list_empty(&vc4->render_job_list);
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return;
+
list_move_tail(&exec->head, &vc4->render_job_list);
if (was_empty)
vc4_submit_next_render_job(dev);
unsigned long irqflags;
struct vc4_seqno_cb *cb, *cb_temp;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return;
+
spin_lock_irqsave(&vc4->job_lock, irqflags);
while (!list_empty(&vc4->job_done_list)) {
struct vc4_exec_info *exec =
struct vc4_dev *vc4 = to_vc4_dev(dev);
unsigned long irqflags;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
cb->func = func;
INIT_WORK(&cb->work, vc4_seqno_cb_work);
vc4_wait_seqno_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
struct drm_vc4_wait_seqno *args = data;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
return vc4_wait_for_seqno_ioctl_helper(dev, args->seqno,
&args->timeout_ns);
}
vc4_wait_bo_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
int ret;
struct drm_vc4_wait_bo *args = data;
struct drm_gem_object *gem_obj;
struct vc4_bo *bo;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
if (args->pad != 0)
return -EINVAL;
args->shader_rec_size,
args->bo_handle_count);
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
if (!vc4->v3d) {
DRM_DEBUG("VC4_SUBMIT_CL with no VC4 V3D probed\n");
return -ENODEV;
DRM_ERROR("malloc failure on exec struct\n");
return -ENOMEM;
}
+ exec->dev = vc4;
ret = vc4_v3d_pm_get(vc4);
if (ret) {
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
vc4->dma_fence_context = dma_fence_context_alloc(1);
INIT_LIST_HEAD(&vc4->bin_job_list);
int vc4_gem_madvise_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
struct drm_vc4_gem_madvise *args = data;
struct drm_gem_object *gem_obj;
struct vc4_bo *bo;
int ret;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
switch (args->madv) {
case VC4_MADV_DONTNEED:
case VC4_MADV_WILLNEED:
unsigned int bpc,
enum vc4_hdmi_output_format fmt)
{
- unsigned long long clock = mode->clock * 1000;
+ unsigned long long clock = mode->clock * 1000ULL;
if (mode->flags & DRM_MODE_FLAG_DBLCLK)
clock = clock * 2;
int vc4_hvs_get_fifo_from_output(struct vc4_hvs *hvs, unsigned int output)
{
+ struct vc4_dev *vc4 = hvs->vc4;
u32 reg;
int ret;
- if (!hvs->hvs5)
+ if (!vc4->is_vc5)
return output;
switch (output) {
static int vc4_hvs_init_channel(struct vc4_hvs *hvs, struct drm_crtc *crtc,
struct drm_display_mode *mode, bool oneshot)
{
+ struct vc4_dev *vc4 = hvs->vc4;
struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
struct vc4_crtc_state *vc4_crtc_state = to_vc4_crtc_state(crtc->state);
unsigned int chan = vc4_crtc_state->assigned_channel;
*/
dispctrl = SCALER_DISPCTRLX_ENABLE;
- if (!hvs->hvs5)
+ if (!vc4->is_vc5)
dispctrl |= VC4_SET_FIELD(mode->hdisplay,
SCALER_DISPCTRLX_WIDTH) |
VC4_SET_FIELD(mode->vdisplay,
HVS_WRITE(SCALER_DISPBKGNDX(chan), dispbkgndx |
SCALER_DISPBKGND_AUTOHS |
- ((!hvs->hvs5) ? SCALER_DISPBKGND_GAMMA : 0) |
+ ((!vc4->is_vc5) ? SCALER_DISPBKGND_GAMMA : 0) |
(interlace ? SCALER_DISPBKGND_INTERLACE : 0));
/* Reload the LUT, since the SRAMs would have been disabled if
if (!hvs)
return -ENOMEM;
+ hvs->vc4 = vc4;
hvs->pdev = pdev;
- if (of_device_is_compatible(pdev->dev.of_node, "brcm,bcm2711-hvs"))
- hvs->hvs5 = true;
-
hvs->regs = vc4_ioremap_regs(pdev, 0);
if (IS_ERR(hvs->regs))
return PTR_ERR(hvs->regs);
hvs->regset.regs = hvs_regs;
hvs->regset.nregs = ARRAY_SIZE(hvs_regs);
- if (hvs->hvs5) {
+ if (vc4->is_vc5) {
hvs->core_clk = devm_clk_get(&pdev->dev, NULL);
if (IS_ERR(hvs->core_clk)) {
dev_err(&pdev->dev, "Couldn't get core clock\n");
}
}
- if (!hvs->hvs5)
+ if (!vc4->is_vc5)
hvs->dlist = hvs->regs + SCALER_DLIST_START;
else
hvs->dlist = hvs->regs + SCALER5_DLIST_START;
* between planes when they don't overlap on the screen, but
* for now we just allocate globally.
*/
- if (!hvs->hvs5)
+ if (!vc4->is_vc5)
/* 48k words of 2x12-bit pixels */
drm_mm_init(&hvs->lbm_mm, 0, 48 * 1024);
else
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return;
+
if (!vc4->v3d)
return;
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return;
+
if (!vc4->v3d)
return;
int vc4_irq_install(struct drm_device *dev, int irq)
{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
int ret;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
if (irq == IRQ_NOTCONNECTED)
return -ENOTCONN;
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return;
+
vc4_irq_disable(dev);
free_irq(vc4->irq, dev);
}
struct vc4_dev *vc4 = to_vc4_dev(dev);
unsigned long irqflags;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return;
+
/* Acknowledge any stale IRQs. */
V3D_WRITE(V3D_INTCTL, V3D_DRIVER_IRQS);
old_hvs_state->fifo_state[channel].pending_commit = NULL;
}
- if (vc4->hvs->hvs5) {
+ if (vc4->is_vc5) {
unsigned long state_rate = max(old_hvs_state->core_clock_rate,
new_hvs_state->core_clock_rate);
unsigned long core_rate = max_t(unsigned long,
vc4_ctm_commit(vc4, state);
- if (vc4->hvs->hvs5)
+ if (vc4->is_vc5)
vc5_hvs_pv_muxing_commit(vc4, state);
else
vc4_hvs_pv_muxing_commit(vc4, state);
drm_atomic_helper_cleanup_planes(dev, state);
- if (vc4->hvs->hvs5) {
+ if (vc4->is_vc5) {
drm_dbg(dev, "Running the core clock at %lu Hz\n",
new_hvs_state->core_clock_rate);
struct drm_file *file_priv,
const struct drm_mode_fb_cmd2 *mode_cmd)
{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
struct drm_mode_fb_cmd2 mode_cmd_local;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return ERR_PTR(-ENODEV);
+
/* If the user didn't specify a modifier, use the
* vc4_set_tiling_ioctl() state for the BO.
*/
.fb_create = vc4_fb_create,
};
+static const struct drm_mode_config_funcs vc5_mode_funcs = {
+ .atomic_check = vc4_atomic_check,
+ .atomic_commit = drm_atomic_helper_commit,
+ .fb_create = drm_gem_fb_create,
+};
+
int vc4_kms_load(struct drm_device *dev)
{
struct vc4_dev *vc4 = to_vc4_dev(dev);
- bool is_vc5 = of_device_is_compatible(dev->dev->of_node,
- "brcm,bcm2711-vc5");
int ret;
/*
* the BCM2711, but the load tracker computations are used for
* the core clock rate calculation.
*/
- if (!is_vc5) {
+ if (!vc4->is_vc5) {
/* Start with the load tracker enabled. Can be
* disabled through the debugfs load_tracker file.
*/
return ret;
}
- if (is_vc5) {
+ if (vc4->is_vc5) {
dev->mode_config.max_width = 7680;
dev->mode_config.max_height = 7680;
} else {
dev->mode_config.max_height = 2048;
}
- dev->mode_config.funcs = &vc4_mode_funcs;
+ dev->mode_config.funcs = vc4->is_vc5 ? &vc5_mode_funcs : &vc4_mode_funcs;
dev->mode_config.helper_private = &vc4_mode_config_helpers;
dev->mode_config.preferred_depth = 24;
dev->mode_config.async_page_flip = true;
void vc4_perfmon_get(struct vc4_perfmon *perfmon)
{
- if (perfmon)
- refcount_inc(&perfmon->refcnt);
+ struct vc4_dev *vc4;
+
+ if (!perfmon)
+ return;
+
+ vc4 = perfmon->dev;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return;
+
+ refcount_inc(&perfmon->refcnt);
}
void vc4_perfmon_put(struct vc4_perfmon *perfmon)
{
- if (perfmon && refcount_dec_and_test(&perfmon->refcnt))
+ struct vc4_dev *vc4;
+
+ if (!perfmon)
+ return;
+
+ vc4 = perfmon->dev;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return;
+
+ if (refcount_dec_and_test(&perfmon->refcnt))
kfree(perfmon);
}
unsigned int i;
u32 mask;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return;
+
if (WARN_ON_ONCE(!perfmon || vc4->active_perfmon))
return;
{
unsigned int i;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return;
+
if (WARN_ON_ONCE(!vc4->active_perfmon ||
perfmon != vc4->active_perfmon))
return;
struct vc4_perfmon *vc4_perfmon_find(struct vc4_file *vc4file, int id)
{
+ struct vc4_dev *vc4 = vc4file->dev;
struct vc4_perfmon *perfmon;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return NULL;
+
mutex_lock(&vc4file->perfmon.lock);
perfmon = idr_find(&vc4file->perfmon.idr, id);
vc4_perfmon_get(perfmon);
void vc4_perfmon_open_file(struct vc4_file *vc4file)
{
+ struct vc4_dev *vc4 = vc4file->dev;
+
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return;
+
mutex_init(&vc4file->perfmon.lock);
idr_init_base(&vc4file->perfmon.idr, VC4_PERFMONID_MIN);
+ vc4file->dev = vc4;
}
static int vc4_perfmon_idr_del(int id, void *elem, void *data)
void vc4_perfmon_close_file(struct vc4_file *vc4file)
{
+ struct vc4_dev *vc4 = vc4file->dev;
+
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return;
+
mutex_lock(&vc4file->perfmon.lock);
idr_for_each(&vc4file->perfmon.idr, vc4_perfmon_idr_del, NULL);
idr_destroy(&vc4file->perfmon.idr);
unsigned int i;
int ret;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
if (!vc4->v3d) {
DRM_DEBUG("Creating perfmon no VC4 V3D probed\n");
return -ENODEV;
GFP_KERNEL);
if (!perfmon)
return -ENOMEM;
+ perfmon->dev = vc4;
for (i = 0; i < req->ncounters; i++)
perfmon->events[i] = req->events[i];
struct drm_vc4_perfmon_destroy *req = data;
struct vc4_perfmon *perfmon;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
if (!vc4->v3d) {
DRM_DEBUG("Destroying perfmon no VC4 V3D probed\n");
return -ENODEV;
struct vc4_perfmon *perfmon;
int ret;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
if (!vc4->v3d) {
DRM_DEBUG("Getting perfmon no VC4 V3D probed\n");
return -ENODEV;
}
/* Align it to 64 or 128 (hvs5) bytes */
- lbm = roundup(lbm, vc4->hvs->hvs5 ? 128 : 64);
+ lbm = roundup(lbm, vc4->is_vc5 ? 128 : 64);
/* Each "word" of the LBM memory contains 2 or 4 (hvs5) pixels */
- lbm /= vc4->hvs->hvs5 ? 4 : 2;
+ lbm /= vc4->is_vc5 ? 4 : 2;
return lbm;
}
ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm,
&vc4_state->lbm,
lbm_size,
- vc4->hvs->hvs5 ? 64 : 32,
+ vc4->is_vc5 ? 64 : 32,
0, 0);
spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
mix_plane_alpha = state->alpha != DRM_BLEND_ALPHA_OPAQUE &&
fb->format->has_alpha;
- if (!vc4->hvs->hvs5) {
+ if (!vc4->is_vc5) {
/* Control word */
vc4_dlist_write(vc4_state,
SCALER_CTL0_VALID |
old_vc4_state = to_vc4_plane_state(plane->state);
new_vc4_state = to_vc4_plane_state(new_plane_state);
+
+ if (!new_vc4_state->hw_dlist)
+ return -EINVAL;
+
if (old_vc4_state->dlist_count != new_vc4_state->dlist_count ||
old_vc4_state->pos0_offset != new_vc4_state->pos0_offset ||
old_vc4_state->pos2_offset != new_vc4_state->pos2_offset ||
.atomic_async_update = vc4_plane_atomic_async_update,
};
+static const struct drm_plane_helper_funcs vc5_plane_helper_funcs = {
+ .atomic_check = vc4_plane_atomic_check,
+ .atomic_update = vc4_plane_atomic_update,
+ .atomic_async_check = vc4_plane_atomic_async_check,
+ .atomic_async_update = vc4_plane_atomic_async_update,
+};
+
static bool vc4_format_mod_supported(struct drm_plane *plane,
uint32_t format,
uint64_t modifier)
struct drm_plane *vc4_plane_init(struct drm_device *dev,
enum drm_plane_type type)
{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
struct drm_plane *plane = NULL;
struct vc4_plane *vc4_plane;
u32 formats[ARRAY_SIZE(hvs_formats)];
int num_formats = 0;
int ret = 0;
unsigned i;
- bool hvs5 = of_device_is_compatible(dev->dev->of_node,
- "brcm,bcm2711-vc5");
static const uint64_t modifiers[] = {
DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED,
DRM_FORMAT_MOD_BROADCOM_SAND128,
return ERR_PTR(-ENOMEM);
for (i = 0; i < ARRAY_SIZE(hvs_formats); i++) {
- if (!hvs_formats[i].hvs5_only || hvs5) {
+ if (!hvs_formats[i].hvs5_only || vc4->is_vc5) {
formats[num_formats] = hvs_formats[i].drm;
num_formats++;
}
if (ret)
return ERR_PTR(ret);
- drm_plane_helper_add(plane, &vc4_plane_helper_funcs);
+ if (vc4->is_vc5)
+ drm_plane_helper_add(plane, &vc5_plane_helper_funcs);
+ else
+ drm_plane_helper_add(plane, &vc4_plane_helper_funcs);
drm_plane_create_alpha_property(plane);
drm_plane_create_rotation_property(plane, DRM_MODE_ROTATE_0,
int vc4_get_rcl(struct drm_device *dev, struct vc4_exec_info *exec)
{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
struct vc4_rcl_setup setup = {0};
struct drm_vc4_submit_cl *args = exec->args;
bool has_bin = args->bin_cl_size != 0;
int ret;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
if (args->min_x_tile > args->max_x_tile ||
args->min_y_tile > args->max_y_tile) {
DRM_DEBUG("Bad render tile set (%d,%d)-(%d,%d)\n",
int
vc4_v3d_pm_get(struct vc4_dev *vc4)
{
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
mutex_lock(&vc4->power_lock);
if (vc4->power_refcount++ == 0) {
int ret = pm_runtime_get_sync(&vc4->v3d->pdev->dev);
void
vc4_v3d_pm_put(struct vc4_dev *vc4)
{
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return;
+
mutex_lock(&vc4->power_lock);
if (--vc4->power_refcount == 0) {
pm_runtime_mark_last_busy(&vc4->v3d->pdev->dev);
uint64_t seqno = 0;
struct vc4_exec_info *exec;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
try_again:
spin_lock_irqsave(&vc4->job_lock, irqflags);
slot = ffs(~vc4->bin_alloc_used);
{
int ret = 0;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
mutex_lock(&vc4->bin_bo_lock);
if (used && *used)
void vc4_v3d_bin_bo_put(struct vc4_dev *vc4)
{
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return;
+
mutex_lock(&vc4->bin_bo_lock);
kref_put(&vc4->bin_bo_kref, bin_bo_release);
mutex_unlock(&vc4->bin_bo_lock);
struct drm_gem_cma_object *
vc4_use_bo(struct vc4_exec_info *exec, uint32_t hindex)
{
+ struct vc4_dev *vc4 = exec->dev;
struct drm_gem_cma_object *obj;
struct vc4_bo *bo;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return NULL;
+
if (hindex >= exec->bo_count) {
DRM_DEBUG("BO index %d greater than BO count %d\n",
hindex, exec->bo_count);
uint32_t offset, uint8_t tiling_format,
uint32_t width, uint32_t height, uint8_t cpp)
{
+ struct vc4_dev *vc4 = exec->dev;
uint32_t aligned_width, aligned_height, stride, size;
uint32_t utile_w = utile_width(cpp);
uint32_t utile_h = utile_height(cpp);
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return false;
+
/* The shaded vertex format stores signed 12.4 fixed point
* (-2048,2047) offsets from the viewport center, so we should
* never have a render target larger than 4096. The texture
void *unvalidated,
struct vc4_exec_info *exec)
{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
uint32_t len = exec->args->bin_cl_size;
uint32_t dst_offset = 0;
uint32_t src_offset = 0;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
while (src_offset < len) {
void *dst_pkt = validated + dst_offset;
void *src_pkt = unvalidated + src_offset;
vc4_validate_shader_recs(struct drm_device *dev,
struct vc4_exec_info *exec)
{
+ struct vc4_dev *vc4 = to_vc4_dev(dev);
uint32_t i;
int ret = 0;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return -ENODEV;
+
for (i = 0; i < exec->shader_state_count; i++) {
ret = validate_gl_shader_rec(dev, exec, &exec->shader_state[i]);
if (ret)
struct vc4_validated_shader_info *
vc4_validate_shader(struct drm_gem_cma_object *shader_obj)
{
+ struct vc4_dev *vc4 = to_vc4_dev(shader_obj->base.dev);
bool found_shader_end = false;
int shader_end_ip = 0;
uint32_t last_thread_switch_ip = -3;
struct vc4_validated_shader_info *validated_shader = NULL;
struct vc4_shader_validation_state validation_state;
+ if (WARN_ON_ONCE(vc4->is_vc5))
+ return NULL;
+
memset(&validation_state, 0, sizeof(validation_state));
validation_state.shader = shader_obj->vaddr;
validation_state.max_ip = shader_obj->base.size / sizeof(uint64_t);
* the whole buffer.
*/
vma->vm_flags &= ~VM_PFNMAP;
- vma->vm_flags |= VM_MIXEDMAP;
+ vma->vm_flags |= VM_MIXEDMAP | VM_DONTEXPAND;
vma->vm_pgoff = 0;
/*
if (!input_device->hid_desc)
goto cleanup;
- input_device->report_desc_size = desc->desc[0].wDescriptorLength;
+ input_device->report_desc_size = le16_to_cpu(
+ desc->desc[0].wDescriptorLength);
if (input_device->report_desc_size == 0) {
input_device->dev_info_status = -EINVAL;
goto cleanup;
memcpy(input_device->report_desc,
((unsigned char *)desc) + desc->bLength,
- desc->desc[0].wDescriptorLength);
+ le16_to_cpu(desc->desc[0].wDescriptorLength));
/* Send the ack */
memset(&ack, 0, sizeof(struct mousevsc_prt_msg));
#include <linux/cpu.h>
#include <linux/hyperv.h>
#include <asm/mshyperv.h>
+#include <linux/sched/isolation.h>
#include "hyperv_vmbus.h"
*/
if (newchannel->offermsg.offer.sub_channel_index == 0) {
mutex_unlock(&vmbus_connection.channel_mutex);
+ cpus_read_unlock();
/*
* Don't call free_channel(), because newchannel->kobj
* is not initialized yet.
u32 i, ncpu = num_online_cpus();
cpumask_var_t available_mask;
struct cpumask *allocated_mask;
+ const struct cpumask *hk_mask = housekeeping_cpumask(HK_TYPE_MANAGED_IRQ);
u32 target_cpu;
int numa_node;
if (!perf_chn ||
- !alloc_cpumask_var(&available_mask, GFP_KERNEL)) {
+ !alloc_cpumask_var(&available_mask, GFP_KERNEL) ||
+ cpumask_empty(hk_mask)) {
/*
* If the channel is not a performance critical
* channel, bind it to VMBUS_CONNECT_CPU.
* In case alloc_cpumask_var() fails, bind it to
* VMBUS_CONNECT_CPU.
+ * If all the cpus are isolated, bind it to
+ * VMBUS_CONNECT_CPU.
*/
channel->target_cpu = VMBUS_CONNECT_CPU;
if (perf_chn)
}
allocated_mask = &hv_context.hv_numa_map[numa_node];
- if (cpumask_equal(allocated_mask, cpumask_of_node(numa_node))) {
+retry:
+ cpumask_xor(available_mask, allocated_mask, cpumask_of_node(numa_node));
+ cpumask_and(available_mask, available_mask, hk_mask);
+
+ if (cpumask_empty(available_mask)) {
/*
* We have cycled through all the CPUs in the node;
* reset the allocated map.
*/
cpumask_clear(allocated_mask);
+ goto retry;
}
- cpumask_xor(available_mask, allocated_mask,
- cpumask_of_node(numa_node));
-
target_cpu = cpumask_first(available_mask);
cpumask_set_cpu(target_cpu, allocated_mask);
in_msg = kvp_transaction.kvp_msg;
/*
- * The key/value strings sent from the host are encoded in
+ * The key/value strings sent from the host are encoded
* in utf16; convert it to utf8 strings.
* The host assures us that the utf16 strings will not exceed
* the max lengths specified. We will however, reserve room
#include <linux/kernel_stat.h>
#include <linux/clockchips.h>
#include <linux/cpu.h>
+#include <linux/sched/isolation.h>
#include <linux/sched/task_stack.h>
#include <linux/delay.h>
if (target_cpu >= nr_cpumask_bits)
return -EINVAL;
+ if (!cpumask_test_cpu(target_cpu, housekeeping_cpumask(HK_TYPE_MANAGED_IRQ)))
+ return -EINVAL;
+
/* No CPUs should come up or down during this. */
cpus_read_lock();
},
{
.board_names = {
- "ROG CROSSHAIR VIII FORMULA"
+ "ROG CROSSHAIR VIII FORMULA",
"ROG CROSSHAIR VIII HERO",
"ROG CROSSHAIR VIII HERO (WI-FI)",
},
res = platform_device_add(data->pdev);
if (res)
- goto ipmi_err;
+ goto dev_add_err;
platform_set_drvdata(data->pdev, data);
ipmi_destroy_user(data->ipmi.user);
ipmi_err:
platform_set_drvdata(data->pdev, NULL);
- platform_device_unregister(data->pdev);
+ platform_device_del(data->pdev);
+dev_add_err:
+ platform_device_put(data->pdev);
dev_err:
ida_free(&aem_ida, data->id);
id_err:
res = platform_device_add(data->pdev);
if (res)
- goto ipmi_err;
+ goto dev_add_err;
platform_set_drvdata(data->pdev, data);
ipmi_destroy_user(data->ipmi.user);
ipmi_err:
platform_set_drvdata(data->pdev, NULL);
- platform_device_unregister(data->pdev);
+ platform_device_del(data->pdev);
+dev_add_err:
+ platform_device_put(data->pdev);
dev_err:
ida_free(&aem_ida, data->id);
id_err:
cmd[6] = 0; /* checksum lsb */
/* mutex should already be locked if necessary */
- rc = occ->send_cmd(occ, cmd, sizeof(cmd));
+ rc = occ->send_cmd(occ, cmd, sizeof(cmd), &occ->resp, sizeof(occ->resp));
if (rc) {
occ->last_error = rc;
if (occ->error_count++ > OCC_ERROR_COUNT_THRESHOLD)
{
int rc;
u8 cmd[8];
+ u8 resp[8];
__be16 user_power_cap_be = cpu_to_be16(user_power_cap);
cmd[0] = 0; /* sequence number */
if (rc)
return rc;
- rc = occ->send_cmd(occ, cmd, sizeof(cmd));
+ rc = occ->send_cmd(occ, cmd, sizeof(cmd), resp, sizeof(resp));
mutex_unlock(&occ->lock);
void occ_shutdown(struct occ *occ)
{
+ mutex_lock(&occ->lock);
+
occ_shutdown_sysfs(occ);
if (occ->hwmon)
hwmon_device_unregister(occ->hwmon);
+ occ->hwmon = NULL;
+
+ mutex_unlock(&occ->lock);
}
EXPORT_SYMBOL_GPL(occ_shutdown);
int powr_sample_time_us; /* average power sample time */
u8 poll_cmd_data; /* to perform OCC poll command */
- int (*send_cmd)(struct occ *occ, u8 *cmd, size_t len);
+ int (*send_cmd)(struct occ *occ, u8 *cmd, size_t len, void *resp,
+ size_t resp_len);
unsigned long next_update;
struct mutex lock; /* lock OCC access */
be32_to_cpu(data1));
}
-static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len)
+static int p8_i2c_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len,
+ void *resp, size_t resp_len)
{
int i, rc;
unsigned long start;
const long wait_time = msecs_to_jiffies(OCC_CMD_IN_PRG_WAIT_MS);
struct p8_i2c_occ *ctx = to_p8_i2c_occ(occ);
struct i2c_client *client = ctx->client;
- struct occ_response *resp = &occ->resp;
+ struct occ_response *or = (struct occ_response *)resp;
start = jiffies;
return rc;
/* wait for OCC */
- if (resp->return_status == OCC_RESP_CMD_IN_PRG) {
+ if (or->return_status == OCC_RESP_CMD_IN_PRG) {
rc = -EALREADY;
if (time_after(jiffies, start + timeout))
} while (rc);
/* check the OCC response */
- switch (resp->return_status) {
+ switch (or->return_status) {
case OCC_RESP_CMD_IN_PRG:
rc = -ETIMEDOUT;
break;
if (rc < 0)
return rc;
- data_length = get_unaligned_be16(&resp->data_length);
- if (data_length > OCC_RESP_DATA_BYTES)
+ data_length = get_unaligned_be16(&or->data_length);
+ if ((data_length + 7) > resp_len)
return -EMSGSIZE;
/* fetch the rest of the response data */
return notify;
}
-static int p9_sbe_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len)
+static int p9_sbe_occ_send_cmd(struct occ *occ, u8 *cmd, size_t len,
+ void *resp, size_t resp_len)
{
- struct occ_response *resp = &occ->resp;
struct p9_sbe_occ *ctx = to_p9_sbe_occ(occ);
- size_t resp_len = sizeof(*resp);
int rc;
rc = fsi_occ_submit(ctx->sbe, cmd, len, resp, &resp_len);
return rc;
}
- switch (resp->return_status) {
+ switch (((struct occ_response *)resp)->return_status) {
case OCC_RESP_CMD_IN_PRG:
rc = -ETIMEDOUT;
break;
* This only affects the READ_IOUT and READ_TEMPERATURE2 registers.
* READ_IOUT will return the sum of currents of all phases of a rail,
* and READ_TEMPERATURE2 will return the maximum temperature detected
- * for the the phases of the rail.
+ * for the phases of the rail.
*/
for (i = 0; i < info->pages; i++) {
/*
*/
static irqreturn_t cdns_i2c_master_isr(void *ptr)
{
- unsigned int isr_status, avail_bytes, updatetx;
+ unsigned int isr_status, avail_bytes;
unsigned int bytes_to_send;
- bool hold_quirk;
+ bool updatetx;
struct cdns_i2c *id = ptr;
/* Signal completion only after everything is updated */
int done_flag = 0;
* Check if transfer size register needs to be updated again for a
* large data receive operation.
*/
- updatetx = 0;
- if (id->recv_count > id->curr_recv_count)
- updatetx = 1;
-
- hold_quirk = (id->quirks & CDNS_I2C_BROKEN_HOLD_BIT) && updatetx;
+ updatetx = id->recv_count > id->curr_recv_count;
/* When receiving, handle data interrupt and completion interrupt */
if (id->p_recv_buf &&
break;
}
- if (cdns_is_holdquirk(id, hold_quirk))
+ if (cdns_is_holdquirk(id, updatetx))
break;
}
* maintain transfer size non-zero while performing a large
* receive operation.
*/
- if (cdns_is_holdquirk(id, hold_quirk)) {
+ if (cdns_is_holdquirk(id, updatetx)) {
/* wait while fifo is full */
while (cdns_i2c_readreg(CDNS_I2C_XFER_SIZE_OFFSET) !=
(id->curr_recv_count - CDNS_I2C_FIFO_DEPTH))
CDNS_I2C_XFER_SIZE_OFFSET);
id->curr_recv_count = id->recv_count;
}
- } else if (id->recv_count && !hold_quirk &&
- !id->curr_recv_count) {
-
- /* Set the slave address in address register*/
- cdns_i2c_writereg(id->p_msg->addr & CDNS_I2C_ADDR_MASK,
- CDNS_I2C_ADDR_OFFSET);
-
- if (id->recv_count > CDNS_I2C_TRANSFER_SIZE) {
- cdns_i2c_writereg(CDNS_I2C_TRANSFER_SIZE,
- CDNS_I2C_XFER_SIZE_OFFSET);
- id->curr_recv_count = CDNS_I2C_TRANSFER_SIZE;
- } else {
- cdns_i2c_writereg(id->recv_count,
- CDNS_I2C_XFER_SIZE_OFFSET);
- id->curr_recv_count = id->recv_count;
- }
}
/* Clear hold (if not repeated start) and signal completion */
return 0;
err_clk_dis:
+ clk_notifier_unregister(id->clk, &id->clk_rate_change_nb);
clk_disable_unprepare(id->clk);
pm_runtime_disable(&pdev->dev);
pm_runtime_set_suspended(&pdev->dev);
{
int ret;
- if (IS_ERR(dev->clk))
- return PTR_ERR(dev->clk);
-
if (prepare) {
/* Optional interface clock */
ret = clk_prepare_enable(dev->pclk);
goto exit_reset;
}
- dev->clk = devm_clk_get(&pdev->dev, NULL);
- if (!i2c_dw_prepare_clk(dev, true)) {
+ dev->clk = devm_clk_get_optional(&pdev->dev, NULL);
+ if (IS_ERR(dev->clk)) {
+ ret = PTR_ERR(dev->clk);
+ goto exit_reset;
+ }
+
+ ret = i2c_dw_prepare_clk(dev, true);
+ if (ret)
+ goto exit_reset;
+
+ if (dev->clk) {
u64 clk_khz;
dev->get_clk_rate_khz = i2c_dw_get_clk_rate_khz;
/* IMX I2C registers:
* the I2C register offset is different between SoCs,
- * to provid support for all these chips, split the
+ * to provide support for all these chips, split the
* register offset into a fixed base address and a
* variable shift value, then the full register offset
* will be calculated by
#define MLXCPLD_LPCI2C_NACK_IND 2
#define MLXCPLD_I2C_FREQ_1000KHZ_SET 0x04
-#define MLXCPLD_I2C_FREQ_400KHZ_SET 0x0c
+#define MLXCPLD_I2C_FREQ_400KHZ_SET 0x0e
#define MLXCPLD_I2C_FREQ_100KHZ_SET 0x42
enum mlxcpld_i2c_frequency {
if (ret < 0) {
dev_err(&pdev->dev,
"Request I2C IRQ %d fail\n", irq);
- return ret;
+ goto err_bulk_unprepare;
}
i2c_set_adapdata(&i2c->adap, i2c);
ret = i2c_add_adapter(&i2c->adap);
if (ret)
- return ret;
+ goto err_bulk_unprepare;
platform_set_drvdata(pdev, i2c);
return 0;
+
+err_bulk_unprepare:
+ clk_bulk_unprepare(I2C_MT65XX_CLK_MAX, i2c->clocks);
+
+ return ret;
}
static int mtk_i2c_remove(struct platform_device *pdev)
static int __init npcm_i2c_init(void)
{
npcm_i2c_debugfs_dir = debugfs_create_dir("npcm_i2c", NULL);
- platform_driver_register(&npcm_i2c_bus_driver);
- return 0;
+ return platform_driver_register(&npcm_i2c_bus_driver);
}
module_init(npcm_i2c_init);
struct sb800_mmio_cfg {
void __iomem *addr;
- struct resource *res;
bool use_mmio;
};
struct sb800_mmio_cfg *mmio_cfg)
{
if (mmio_cfg->use_mmio) {
- struct resource *res;
void __iomem *addr;
- res = request_mem_region_muxed(SB800_PIIX4_FCH_PM_ADDR,
- SB800_PIIX4_FCH_PM_SIZE,
- "sb800_piix4_smb");
- if (!res) {
+ if (!request_mem_region_muxed(SB800_PIIX4_FCH_PM_ADDR,
+ SB800_PIIX4_FCH_PM_SIZE,
+ "sb800_piix4_smb")) {
dev_err(dev,
"SMBus base address memory region 0x%x already in use.\n",
SB800_PIIX4_FCH_PM_ADDR);
addr = ioremap(SB800_PIIX4_FCH_PM_ADDR,
SB800_PIIX4_FCH_PM_SIZE);
if (!addr) {
- release_resource(res);
+ release_mem_region(SB800_PIIX4_FCH_PM_ADDR,
+ SB800_PIIX4_FCH_PM_SIZE);
dev_err(dev, "SMBus base address mapping failed.\n");
return -ENOMEM;
}
- mmio_cfg->res = res;
mmio_cfg->addr = addr;
return 0;
{
if (mmio_cfg->use_mmio) {
iounmap(mmio_cfg->addr);
- release_resource(mmio_cfg->res);
+ release_mem_region(SB800_PIIX4_FCH_PM_ADDR,
+ SB800_PIIX4_FCH_PM_SIZE);
return;
}
#include <linux/tick.h>
#include <trace/events/power.h>
#include <linux/sched.h>
+#include <linux/sched/smt.h>
#include <linux/notifier.h>
#include <linux/cpu.h>
#include <linux/moduleparam.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
+#include <asm/nospec-branch.h>
#include <asm/mwait.h>
#include <asm/msr.h>
*/
#define CPUIDLE_FLAG_ALWAYS_ENABLE BIT(15)
+/*
+ * Disable IBRS across idle (when KERNEL_IBRS), is exclusive vs IRQ_ENABLE
+ * above.
+ */
+#define CPUIDLE_FLAG_IBRS BIT(16)
+
/*
* MWAIT takes an 8-bit "hint" in EAX "suggesting"
* the C-state (top nibble) and sub-state (bottom nibble)
#define flg2MWAIT(flags) (((flags) >> 24) & 0xFF)
#define MWAIT2flg(eax) ((eax & 0xFF) << 24)
+static __always_inline int __intel_idle(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ struct cpuidle_state *state = &drv->states[index];
+ unsigned long eax = flg2MWAIT(state->flags);
+ unsigned long ecx = 1; /* break on interrupt flag */
+
+ mwait_idle_with_hints(eax, ecx);
+
+ return index;
+}
+
/**
* intel_idle - Ask the processor to enter the given idle state.
* @dev: cpuidle device of the target CPU.
static __cpuidle int intel_idle(struct cpuidle_device *dev,
struct cpuidle_driver *drv, int index)
{
- struct cpuidle_state *state = &drv->states[index];
- unsigned long eax = flg2MWAIT(state->flags);
- unsigned long ecx = 1; /* break on interrupt flag */
+ return __intel_idle(dev, drv, index);
+}
- if (state->flags & CPUIDLE_FLAG_IRQ_ENABLE)
- local_irq_enable();
+static __cpuidle int intel_idle_irq(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ int ret;
- mwait_idle_with_hints(eax, ecx);
+ raw_local_irq_enable();
+ ret = __intel_idle(dev, drv, index);
- return index;
+ /*
+ * The lockdep hardirqs state may be changed to 'on' with timer
+ * tick interrupt followed by __do_softirq(). Use local_irq_disable()
+ * to keep the hardirqs state correct.
+ */
+ local_irq_disable();
+
+ return ret;
+}
+
+static __cpuidle int intel_idle_ibrs(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ bool smt_active = sched_smt_active();
+ u64 spec_ctrl = spec_ctrl_current();
+ int ret;
+
+ if (smt_active)
+ wrmsrl(MSR_IA32_SPEC_CTRL, 0);
+
+ ret = __intel_idle(dev, drv, index);
+
+ if (smt_active)
+ wrmsrl(MSR_IA32_SPEC_CTRL, spec_ctrl);
+
+ return ret;
}
/**
{
.name = "C6",
.desc = "MWAIT 0x20",
- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 85,
.target_residency = 200,
.enter = &intel_idle,
{
.name = "C7s",
.desc = "MWAIT 0x33",
- .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x33) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 124,
.target_residency = 800,
.enter = &intel_idle,
{
.name = "C8",
.desc = "MWAIT 0x40",
- .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 200,
.target_residency = 800,
.enter = &intel_idle,
{
.name = "C9",
.desc = "MWAIT 0x50",
- .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 480,
.target_residency = 5000,
.enter = &intel_idle,
{
.name = "C10",
.desc = "MWAIT 0x60",
- .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 890,
.target_residency = 5000,
.enter = &intel_idle,
{
.name = "C6",
.desc = "MWAIT 0x20",
- .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
+ .flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED | CPUIDLE_FLAG_IBRS,
.exit_latency = 133,
.target_residency = 600,
.enter = &intel_idle,
/* Structure copy. */
drv->states[drv->state_count] = cpuidle_state_table[cstate];
+ if (cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE)
+ drv->states[drv->state_count].enter = intel_idle_irq;
+
+ if (cpu_feature_enabled(X86_FEATURE_KERNEL_IBRS) &&
+ cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IBRS) {
+ WARN_ON_ONCE(cpuidle_state_table[cstate].flags & CPUIDLE_FLAG_IRQ_ENABLE);
+ drv->states[drv->state_count].enter = intel_idle_ibrs;
+ }
+
if ((disabled_states_mask & BIT(drv->state_count)) ||
((icpu->use_acpi || force_use_acpi) &&
intel_idle_off_by_default(mwait_hint) &&
data->trig->ops = &bma180_trigger_ops;
iio_trigger_set_drvdata(data->trig, indio_dev);
- indio_dev->trig = iio_trigger_get(data->trig);
ret = iio_trigger_register(data->trig);
if (ret)
goto err_trigger_free;
+
+ indio_dev->trig = iio_trigger_get(data->trig);
}
ret = iio_triggered_buffer_setup(indio_dev, NULL,
data->dready_trig->ops = &kxcjk1013_trigger_ops;
iio_trigger_set_drvdata(data->dready_trig, indio_dev);
- indio_dev->trig = data->dready_trig;
- iio_trigger_get(indio_dev->trig);
ret = iio_trigger_register(data->dready_trig);
if (ret)
goto err_poweroff;
+ indio_dev->trig = iio_trigger_get(data->dready_trig);
+
data->motion_trig->ops = &kxcjk1013_trigger_ops;
iio_trigger_set_drvdata(data->motion_trig, indio_dev);
ret = iio_trigger_register(data->motion_trig);
int i;
int ret;
- ret = i2c_smbus_write_byte_data(client, MMA8452_CTRL_REG2,
+ /*
+ * Find on fxls8471, after config reset bit, it reset immediately,
+ * and will not give ACK, so here do not check the return value.
+ * The following code will read the reset register, and check whether
+ * this reset works.
+ */
+ i2c_smbus_write_byte_data(client, MMA8452_CTRL_REG2,
MMA8452_CTRL_REG2_RST);
- if (ret < 0)
- return ret;
for (i = 0; i < 10; i++) {
usleep_range(100, 200);
mutex_init(&data->lock);
data->chip_info = device_get_match_data(&client->dev);
- if (!data->chip_info && id) {
- data->chip_info = &mma_chip_info_table[id->driver_data];
- } else {
- dev_err(&client->dev, "unknown device model\n");
- return -ENODEV;
+ if (!data->chip_info) {
+ if (id) {
+ data->chip_info = &mma_chip_info_table[id->driver_data];
+ } else {
+ dev_err(&client->dev, "unknown device model\n");
+ return -ENODEV;
+ }
}
ret = iio_read_mount_matrix(&client->dev, &data->orientation);
data->dready_trig->ops = &mxc4005_trigger_ops;
iio_trigger_set_drvdata(data->dready_trig, indio_dev);
- indio_dev->trig = data->dready_trig;
- iio_trigger_get(indio_dev->trig);
ret = devm_iio_trigger_register(&client->dev,
data->dready_trig);
if (ret) {
"failed to register trigger\n");
return ret;
}
+
+ indio_dev->trig = iio_trigger_get(data->dready_trig);
}
return devm_iio_device_register(&client->dev, indio_dev);
if (!try_module_get(cl->dev->driver->owner)) {
mutex_unlock(®istered_clients_lock);
+ of_node_put(cln);
return ERR_PTR(-ENODEV);
}
get_device(cl->dev);
cl->info = info;
mutex_unlock(®istered_clients_lock);
+ of_node_put(cln);
return cl;
}
mutex_unlock(®istered_clients_lock);
+ of_node_put(cln);
return ERR_PTR(-EPROBE_DEFER);
}
return -EOPNOTSUPP;
}
scu = syscon_node_to_regmap(syscon);
+ of_node_put(syscon);
if (IS_ERR(scu)) {
dev_warn(data->dev, "Failed to get syscon regmap\n");
return -EOPNOTSUPP;
},
.driver_data = (void *)(uintptr_t)AXP288_ADC_TS_BIAS_80UA,
},
+ {
+ /* Nuvision Solo 10 Draw */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "TMAX"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "TM101W610L"),
+ },
+ .driver_data = (void *)(uintptr_t)AXP288_ADC_TS_BIAS_80UA,
+ },
{}
};
i = 0;
device_for_each_child_node(&pdev->dev, fwnode) {
ret = fwnode_property_read_u32(fwnode, "reg", &channel);
- if (ret)
+ if (ret) {
+ fwnode_handle_put(fwnode);
return ret;
+ }
- if (channel >= RZG2L_ADC_MAX_CHANNELS)
+ if (channel >= RZG2L_ADC_MAX_CHANNELS) {
+ fwnode_handle_put(fwnode);
return -EINVAL;
+ }
chan_array[i].type = IIO_VOLTAGE;
chan_array[i].indexed = 1;
* @max_clk_rate_hz: maximum analog clock rate (Hz, from datasheet)
* @has_syscfg: SYSCFG capability flags
* @num_irqs: number of interrupt lines
+ * @num_adcs: maximum number of ADC instances in the common registers
*/
struct stm32_adc_priv_cfg {
const struct stm32_adc_common_regs *regs;
u32 max_clk_rate_hz;
unsigned int has_syscfg;
unsigned int num_irqs;
+ unsigned int num_adcs;
};
/**
* before invoking the interrupt handler (e.g. call ISR only for
* IRQ-enabled ADCs).
*/
- for (i = 0; i < priv->cfg->num_irqs; i++) {
+ for (i = 0; i < priv->cfg->num_adcs; i++) {
if ((status & priv->cfg->regs->eoc_msk[i] &&
stm32_adc_eoc_enabled(priv, i)) ||
(status & priv->cfg->regs->ovr_msk[i]))
.clk_sel = stm32f4_adc_clk_sel,
.max_clk_rate_hz = 36000000,
.num_irqs = 1,
+ .num_adcs = 3,
};
static const struct stm32_adc_priv_cfg stm32h7_adc_priv_cfg = {
.max_clk_rate_hz = 36000000,
.has_syscfg = HAS_VBOOSTER,
.num_irqs = 1,
+ .num_adcs = 2,
};
static const struct stm32_adc_priv_cfg stm32mp1_adc_priv_cfg = {
.regs = &stm32h7_adc_common_regs,
.clk_sel = stm32h7_adc_clk_sel,
- .max_clk_rate_hz = 40000000,
+ .max_clk_rate_hz = 36000000,
.has_syscfg = HAS_VBOOSTER | HAS_ANASWVDD,
.num_irqs = 2,
+ .num_adcs = 2,
};
static const struct of_device_id stm32_adc_of_match[] = {
else
ret = -EINVAL;
- if (mask == IIO_CHAN_INFO_PROCESSED && adc->vrefint.vrefint_cal)
+ if (mask == IIO_CHAN_INFO_PROCESSED)
*val = STM32_ADC_VREFINT_VOLTAGE * adc->vrefint.vrefint_cal / *val;
iio_device_release_direct_mode(indio_dev);
struct stm32_adc *adc = iio_priv(indio_dev);
const struct stm32_adc_regspec *regs = adc->cfg->regs;
u32 status = stm32_adc_readl(adc, regs->isr_eoc.reg);
- u32 mask = stm32_adc_readl(adc, regs->ier_eoc.reg);
/* Check ovr status right now, as ovr mask should be already disabled */
if (status & regs->isr_ovr.mask) {
return IRQ_HANDLED;
}
- if (!(status & mask))
- dev_err_ratelimited(&indio_dev->dev,
- "Unexpected IRQ: IER=0x%08x, ISR=0x%08x\n",
- mask, status);
-
return IRQ_NONE;
}
struct stm32_adc *adc = iio_priv(indio_dev);
const struct stm32_adc_regspec *regs = adc->cfg->regs;
u32 status = stm32_adc_readl(adc, regs->isr_eoc.reg);
- u32 mask = stm32_adc_readl(adc, regs->ier_eoc.reg);
-
- if (!(status & mask))
- return IRQ_WAKE_THREAD;
if (status & regs->isr_ovr.mask) {
/*
for (i = 0; i < STM32_ADC_INT_CH_NB; i++) {
if (!strncmp(stm32_adc_ic[i].name, ch_name, STM32_ADC_CH_SZ)) {
- adc->int_ch[i] = chan;
-
- if (stm32_adc_ic[i].idx != STM32_ADC_INT_CH_VREFINT)
- continue;
+ if (stm32_adc_ic[i].idx != STM32_ADC_INT_CH_VREFINT) {
+ adc->int_ch[i] = chan;
+ break;
+ }
/* Get calibration data for vrefint channel */
ret = nvmem_cell_read_u16(&indio_dev->dev, "vrefint", &vrefint);
return dev_err_probe(indio_dev->dev.parent, ret,
"nvmem access error\n");
}
- if (ret == -ENOENT)
- dev_dbg(&indio_dev->dev, "vrefint calibration not found\n");
- else
- adc->vrefint.vrefint_cal = vrefint;
+ if (ret == -ENOENT) {
+ dev_dbg(&indio_dev->dev, "vrefint calibration not found. Skip vrefint channel\n");
+ return ret;
+ } else if (!vrefint) {
+ dev_dbg(&indio_dev->dev, "Null vrefint calibration value. Skip vrefint channel\n");
+ return -ENOENT;
+ }
+ adc->int_ch[i] = chan;
+ adc->vrefint.vrefint_cal = vrefint;
}
}
}
strncpy(adc->chan_name[val], name, STM32_ADC_CH_SZ);
ret = stm32_adc_populate_int_ch(indio_dev, name, val);
- if (ret)
+ if (ret == -ENOENT)
+ continue;
+ else if (ret)
goto err;
} else if (ret != -EINVAL) {
dev_err(&indio_dev->dev, "Invalid label %d\n", ret);
device_for_each_child_node(dev, node) {
ret = fwnode_property_read_u32(node, "reg", &channel);
if (ret)
- return ret;
+ goto err_child_out;
ret = fwnode_property_read_u32(node, "ti,gain", &tmp);
if (ret) {
} else {
ret = ads131e08_pga_gain_to_field_value(st, tmp);
if (ret < 0)
- return ret;
+ goto err_child_out;
channel_config[i].pga_gain = tmp;
}
} else {
ret = ads131e08_validate_channel_mux(st, tmp);
if (ret)
- return ret;
+ goto err_child_out;
channel_config[i].mux = tmp;
}
st->channel_config = channel_config;
return 0;
+
+err_child_out:
+ fwnode_handle_put(node);
+ return ret;
}
static void ads131e08_regulator_disable(void *data)
irq = platform_get_irq(pdev, 0);
if (irq < 0)
- return ret;
+ return irq;
ret = devm_request_irq(&pdev->dev, irq, &ams_irq, 0, "ams-irq",
indio_dev);
chan->ext_info = rescale->ext_info;
chan->type = rescale->cfg->type;
- if (iio_channel_has_info(schan, IIO_CHAN_INFO_RAW) ||
+ if (iio_channel_has_info(schan, IIO_CHAN_INFO_RAW) &&
iio_channel_has_info(schan, IIO_CHAN_INFO_SCALE)) {
dev_info(dev, "using raw+scale source channel\n");
} else if (iio_channel_has_info(schan, IIO_CHAN_INFO_PROCESSED)) {
data->drdy_trig->ops = &ccs811_trigger_ops;
iio_trigger_set_drvdata(data->drdy_trig, indio_dev);
- indio_dev->trig = data->drdy_trig;
- iio_trigger_get(indio_dev->trig);
ret = iio_trigger_register(data->drdy_trig);
if (ret)
goto err_poweroff;
+
+ indio_dev->trig = iio_trigger_get(data->drdy_trig);
}
ret = iio_triggered_buffer_setup(indio_dev, NULL,
ADMV1014_DET_EN_MSK;
enable_reg = FIELD_PREP(ADMV1014_P1DB_COMPENSATION_MSK, st->p1db_comp ? 3 : 0) |
- FIELD_PREP(ADMV1014_IF_AMP_PD_MSK, !(st->input_mode)) |
- FIELD_PREP(ADMV1014_BB_AMP_PD_MSK, st->input_mode) |
+ FIELD_PREP(ADMV1014_IF_AMP_PD_MSK,
+ (st->input_mode == ADMV1014_IF_MODE) ? 0 : 1) |
+ FIELD_PREP(ADMV1014_BB_AMP_PD_MSK,
+ (st->input_mode == ADMV1014_IF_MODE) ? 1 : 0) |
FIELD_PREP(ADMV1014_DET_EN_MSK, st->det_en);
return __admv1014_spi_update_bits(st, ADMV1014_REG_ENABLE, enable_reg_msk, enable_reg);
ret = regmap_update_bits(mpu3050->map, MPU3050_PWR_MGM,
MPU3050_PWR_MGM_SLEEP, 0);
if (ret) {
+ regulator_bulk_disable(ARRAY_SIZE(mpu3050->regs), mpu3050->regs);
dev_err(mpu3050->dev, "error setting power mode\n");
return ret;
}
iio_trigger_set_drvdata(hw->trig, iio_dev);
hw->trig->ops = &hts221_trigger_ops;
+
+ err = devm_iio_trigger_register(hw->dev, hw->trig);
+
iio_dev->trig = iio_trigger_get(hw->trig);
- return devm_iio_trigger_register(hw->dev, hw->trig);
+ return err;
}
static int hts221_buffer_preenable(struct iio_dev *iio_dev)
#include "inv_icm42600_buffer.h"
enum inv_icm42600_chip {
+ INV_CHIP_INVALID,
INV_CHIP_ICM42600,
INV_CHIP_ICM42602,
INV_CHIP_ICM42605,
bool open_drain;
int ret;
- if (chip < 0 || chip >= INV_CHIP_NB) {
+ if (chip <= INV_CHIP_INVALID || chip >= INV_CHIP_NB) {
dev_err(dev, "invalid chip = %d\n", chip);
return -ENODEV;
}
dev_dbg(yas5xx->dev, "calibration data: %*ph\n", 14, data);
/* Sanity check, is this all zeroes? */
- if (memchr_inv(data, 0x00, 13)) {
+ if (memchr_inv(data, 0x00, 13) == NULL) {
if (!(data[13] & BIT(7)))
dev_warn(yas5xx->dev, "calibration is blank!\n");
}
break;
ret = device_property_read_u32_array(dev, prop, pin_defs,
ARRAY_SIZE(pin_defs));
+ if (ret)
+ break;
+
for (pin = 0; pin < SX9324_NUM_PINS; pin++)
raw |= (pin_defs[pin] << (2 * pin)) &
SX9324_REG_AFE_PH0_PIN_MASK(pin);
# Keep in alphabetical order
config IIO_RESCALE_KUNIT_TEST
bool "Test IIO rescale conversion functions"
- depends on KUNIT=y && !IIO_RESCALE
+ depends on KUNIT=y && IIO_RESCALE=y
default KUNIT_ALL_TESTS
help
If you want to run tests on the iio-rescale code say Y here.
#
# Keep in alphabetical order
-obj-$(CONFIG_IIO_RESCALE_KUNIT_TEST) += iio-test-rescale.o ../afe/iio-rescale.o
+obj-$(CONFIG_IIO_RESCALE_KUNIT_TEST) += iio-test-rescale.o
obj-$(CONFIG_IIO_TEST_FORMAT) += iio-test-format.o
CFLAGS_iio-test-format.o += $(DISABLE_STRUCTLEAK_PLUGIN)
}
iio_trigger_unregister(t->trig);
+ irq_work_sync(&t->work);
iio_trigger_free(t->trig);
list_del(&t->l);
return ERR_CAST(cm_id_priv);
err = cm_init_listen(cm_id_priv, service_id, 0);
- if (err)
+ if (err) {
+ ib_destroy_cm_id(&cm_id_priv->id);
return ERR_PTR(err);
+ }
spin_lock_irq(&cm_id_priv->lock);
listen_id_priv = cm_insert_listen(cm_id_priv, cm_handler);
struct irdma_cm_node *cm_node;
struct list_head teardown_list;
struct ib_qp_attr attr;
- struct irdma_sc_vsi *vsi = &iwdev->vsi;
- struct irdma_sc_qp *sc_qp;
- struct irdma_qp *qp;
- int i;
INIT_LIST_HEAD(&teardown_list);
irdma_cm_disconn(cm_node->iwqp);
irdma_rem_ref_cm_node(cm_node);
}
- if (!iwdev->roce_mode)
- return;
-
- INIT_LIST_HEAD(&teardown_list);
- for (i = 0; i < IRDMA_MAX_USER_PRIORITY; i++) {
- mutex_lock(&vsi->qos[i].qos_mutex);
- list_for_each_safe (list_node, list_core_temp,
- &vsi->qos[i].qplist) {
- u32 qp_ip[4];
-
- sc_qp = container_of(list_node, struct irdma_sc_qp,
- list);
- if (sc_qp->qp_uk.qp_type != IRDMA_QP_TYPE_ROCE_RC)
- continue;
-
- qp = sc_qp->qp_uk.back_qp;
- if (!disconnect_all) {
- if (nfo->ipv4)
- qp_ip[0] = qp->udp_info.local_ipaddr[3];
- else
- memcpy(qp_ip,
- &qp->udp_info.local_ipaddr[0],
- sizeof(qp_ip));
- }
-
- if (disconnect_all ||
- (nfo->vlan_id == (qp->udp_info.vlan_tag & VLAN_VID_MASK) &&
- !memcmp(qp_ip, ipaddr, nfo->ipv4 ? 4 : 16))) {
- spin_lock(&iwdev->rf->qptable_lock);
- if (iwdev->rf->qp_table[sc_qp->qp_uk.qp_id]) {
- irdma_qp_add_ref(&qp->ibqp);
- list_add(&qp->teardown_entry,
- &teardown_list);
- }
- spin_unlock(&iwdev->rf->qptable_lock);
- }
- }
- mutex_unlock(&vsi->qos[i].qos_mutex);
- }
-
- list_for_each_safe (list_node, list_core_temp, &teardown_list) {
- qp = container_of(list_node, struct irdma_qp, teardown_entry);
- attr.qp_state = IB_QPS_ERR;
- irdma_modify_qp_roce(&qp->ibqp, &attr, IB_QP_STATE, NULL);
- irdma_qp_rem_ref(&qp->ibqp);
- }
}
/**
dev->hw_attrs.uk_attrs.max_hw_read_sges = I40IW_MAX_SGE_RD;
dev->hw_attrs.max_hw_device_pages = I40IW_MAX_PUSH_PAGE_COUNT;
dev->hw_attrs.uk_attrs.max_hw_inline = I40IW_MAX_INLINE_DATA_SIZE;
+ dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M;
dev->hw_attrs.max_hw_ird = I40IW_MAX_IRD_SIZE;
dev->hw_attrs.max_hw_ord = I40IW_MAX_ORD_SIZE;
dev->hw_attrs.max_hw_wqes = I40IW_MAX_WQ_ENTRIES;
dev->cqp_db = dev->hw_regs[IRDMA_CQPDB];
dev->cq_ack_db = dev->hw_regs[IRDMA_CQACK];
dev->irq_ops = &icrdma_irq_ops;
+ dev->hw_attrs.page_size_cap = SZ_4K | SZ_2M | SZ_1G;
dev->hw_attrs.max_hw_ird = ICRDMA_MAX_IRD_SIZE;
dev->hw_attrs.max_hw_ord = ICRDMA_MAX_ORD_SIZE;
dev->hw_attrs.max_stat_inst = ICRDMA_MAX_STATS_COUNT;
u64 max_hw_outbound_msg_size;
u64 max_hw_inbound_msg_size;
u64 max_mr_size;
+ u64 page_size_cap;
u32 min_hw_qp_id;
u32 min_hw_aeq_size;
u32 max_hw_aeq_size;
props->vendor_part_id = pcidev->device;
props->hw_ver = rf->pcidev->revision;
- props->page_size_cap = SZ_4K | SZ_2M | SZ_1G;
+ props->page_size_cap = hw_attrs->page_size_cap;
props->max_mr_size = hw_attrs->max_mr_size;
props->max_qp = rf->max_qp - rf->used_qps;
props->max_qp_wr = hw_attrs->max_qp_wr;
if (req.reg_type == IRDMA_MEMREG_TYPE_MEM) {
iwmr->page_size = ib_umem_find_best_pgsz(region,
- SZ_4K | SZ_2M | SZ_1G,
+ iwdev->rf->sc_dev.hw_attrs.page_size_cap,
virt);
if (unlikely(!iwmr->page_size)) {
kfree(iwmr);
u32 sq_psn;
u32 qkey;
u32 dest_qp_num;
+ u8 timeout;
/* Relevant to qps created from kernel space only (ULPs) */
u8 prev_wqe_size;
1 << max_t(int, attr->timeout - 8, 0);
else
qp_params.ack_timeout = 0;
+
+ qp->timeout = attr->timeout;
}
if (attr_mask & IB_QP_RETRY_CNT) {
rdma_ah_set_dgid_raw(&qp_attr->ah_attr, ¶ms.dgid.bytes[0]);
rdma_ah_set_port_num(&qp_attr->ah_attr, 1);
rdma_ah_set_sl(&qp_attr->ah_attr, 0);
- qp_attr->timeout = params.timeout;
+ qp_attr->timeout = qp->timeout;
qp_attr->rnr_retry = params.rnr_retry;
qp_attr->retry_cnt = params.retry_cnt;
qp_attr->min_rnr_timer = params.min_rnr_nak_timer;
config JOYSTICK_SENSEHAT
tristate "Raspberry Pi Sense HAT joystick"
depends on INPUT && I2C
+ depends on HAS_IOMEM
select MFD_SIMPLE_MFD_I2C
help
Say Y here if you want to enable the driver for the
},
{
/*
- * Lenovo Yoga Tab2 1051L, something messes with the home-button
+ * Lenovo Yoga Tab2 1051F/1051L, something messes with the home-button
* IRQ settings, leading to a non working home-button.
*/
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
DMI_MATCH(DMI_PRODUCT_NAME, "60073"),
- DMI_MATCH(DMI_PRODUCT_VERSION, "1051L"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "1051"),
},
},
{} /* Terminating entry */
if (!dev->tp_data)
goto err_free_bt_buffer;
- if (dev->bt_urb)
+ if (dev->bt_urb) {
usb_fill_int_urb(dev->bt_urb, udev,
usb_rcvintpipe(udev, cfg->bt_ep),
dev->bt_data, dev->cfg.bt_datalen,
bcm5974_irq_button, dev, 1);
+ dev->bt_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+ }
+
usb_fill_int_urb(dev->tp_urb, udev,
usb_rcvintpipe(udev, cfg->tp_ep),
dev->tp_data, dev->cfg.tp_datalen,
bcm5974_irq_trackpad, dev, 1);
+ dev->tp_urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
+
/* create bcm5974 device */
usb_make_path(udev, dev->phys, sizeof(dev->phys));
strlcat(dev->phys, "/input0", sizeof(dev->phys));
} else {
dev_warn(dev, "Unexpected ACPI resources: gpio_count %d, gpio_int_idx %d\n",
ts->gpio_count, ts->gpio_int_idx);
+ /*
+ * On some devices _PS0 does a reset for us and
+ * sometimes this is necessary for things to work.
+ */
+ acpi_device_fix_up_power(ACPI_COMPANION(dev));
return -EINVAL;
}
if (id->driver_info == DEVTYPE_IGNORE)
return -ENODEV;
+ if (id->driver_info >= ARRAY_SIZE(usbtouch_dev_info))
+ return -ENODEV;
+
endpoint = usbtouch_get_input_endpoint(intf->cur_altsetting);
if (!endpoint)
return -ENXIO;
static int wm97xx_mfd_remove(struct platform_device *pdev)
{
- return wm97xx_remove(&pdev->dev);
+ wm97xx_remove(&pdev->dev);
+
+ return 0;
}
static int __maybe_unused wm97xx_suspend(struct device *dev)
static struct notifier_block dmar_pci_bus_nb = {
.notifier_call = dmar_pci_bus_notifier,
- .priority = INT_MIN,
+ .priority = 1,
};
static struct dmar_drhd_unit *
DEFINE_SPINLOCK(device_domain_lock);
static LIST_HEAD(device_domain_list);
-/*
- * Iterate over elements in device_domain_list and call the specified
- * callback @fn against each element.
- */
-int for_each_device_domain(int (*fn)(struct device_domain_info *info,
- void *data), void *data)
-{
- int ret = 0;
- unsigned long flags;
- struct device_domain_info *info;
-
- spin_lock_irqsave(&device_domain_lock, flags);
- list_for_each_entry(info, &device_domain_list, global) {
- ret = fn(info, data);
- if (ret) {
- spin_unlock_irqrestore(&device_domain_lock, flags);
- return ret;
- }
- }
- spin_unlock_irqrestore(&device_domain_lock, flags);
-
- return 0;
-}
-
const struct iommu_ops intel_iommu_ops;
static bool translation_pre_enabled(struct intel_iommu *iommu)
/*
* Per device pasid table management:
*/
-static inline void
-device_attach_pasid_table(struct device_domain_info *info,
- struct pasid_table *pasid_table)
-{
- info->pasid_table = pasid_table;
- list_add(&info->table, &pasid_table->dev);
-}
-
-static inline void
-device_detach_pasid_table(struct device_domain_info *info,
- struct pasid_table *pasid_table)
-{
- info->pasid_table = NULL;
- list_del(&info->table);
-}
-
-struct pasid_table_opaque {
- struct pasid_table **pasid_table;
- int segment;
- int bus;
- int devfn;
-};
-
-static int search_pasid_table(struct device_domain_info *info, void *opaque)
-{
- struct pasid_table_opaque *data = opaque;
-
- if (info->iommu->segment == data->segment &&
- info->bus == data->bus &&
- info->devfn == data->devfn &&
- info->pasid_table) {
- *data->pasid_table = info->pasid_table;
- return 1;
- }
-
- return 0;
-}
-
-static int get_alias_pasid_table(struct pci_dev *pdev, u16 alias, void *opaque)
-{
- struct pasid_table_opaque *data = opaque;
-
- data->segment = pci_domain_nr(pdev->bus);
- data->bus = PCI_BUS_NUM(alias);
- data->devfn = alias & 0xff;
-
- return for_each_device_domain(&search_pasid_table, data);
-}
/*
* Allocate a pasid table for @dev. It should be called in a
{
struct device_domain_info *info;
struct pasid_table *pasid_table;
- struct pasid_table_opaque data;
struct page *pages;
u32 max_pasid = 0;
- int ret, order;
- int size;
+ int order, size;
might_sleep();
info = dev_iommu_priv_get(dev);
if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table))
return -EINVAL;
- /* DMA alias device already has a pasid table, use it: */
- data.pasid_table = &pasid_table;
- ret = pci_for_each_dma_alias(to_pci_dev(dev),
- &get_alias_pasid_table, &data);
- if (ret)
- goto attach_out;
-
pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL);
if (!pasid_table)
return -ENOMEM;
- INIT_LIST_HEAD(&pasid_table->dev);
if (info->pasid_supported)
max_pasid = min_t(u32, pci_max_pasids(to_pci_dev(dev)),
pasid_table->table = page_address(pages);
pasid_table->order = order;
pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3);
-
-attach_out:
- device_attach_pasid_table(info, pasid_table);
+ info->pasid_table = pasid_table;
return 0;
}
return;
pasid_table = info->pasid_table;
- device_detach_pasid_table(info, pasid_table);
-
- if (!list_empty(&pasid_table->dev))
- return;
+ info->pasid_table = NULL;
/* Free scalable mode PASID directory tables: */
dir = pasid_table->table;
void *table; /* pasid table pointer */
int order; /* page order of pasid table */
u32 max_pasid; /* max pasid */
- struct list_head dev; /* device list */
};
/* Get PRESENT bit of a PASID directory entry. */
.compatible = "renesas,ipmmu-r8a779a0",
.data = &ipmmu_features_rcar_gen4,
}, {
- .compatible = "renesas,rcar-gen4-ipmmu",
+ .compatible = "renesas,rcar-gen4-ipmmu-vmsa",
.data = &ipmmu_features_rcar_gen4,
}, {
/* Terminator */
config XILINX_INTC
bool "Xilinx Interrupt Controller IP"
- depends on MICROBLAZE || ARCH_ZYNQ || ARCH_ZYNQMP
+ depends on OF_ADDRESS
select IRQ_DOMAIN
help
Support for the Xilinx Interrupt Controller IP core.
#define AIC_TMR_EL02_PHYS AIC_TMR_GUEST_PHYS
#define AIC_TMR_EL02_VIRT AIC_TMR_GUEST_VIRT
-DEFINE_STATIC_KEY_TRUE(use_fast_ipi);
+static DEFINE_STATIC_KEY_TRUE(use_fast_ipi);
struct aic_info {
int version;
continue;
cpu = of_cpu_node_to_id(cpu_node);
+ of_node_put(cpu_node);
if (WARN_ON(cpu < 0))
continue;
for_each_child_of_node(affs, chld)
build_fiq_affinity(irqc, chld);
}
+ of_node_put(affs);
set_handle_irq(aic_handle_irq);
set_handle_fiq(aic_handle_fiq);
/* The PB11MPCore GIC needs to be configured in the syscon */
map = syscon_node_to_regmap(np);
+ of_node_put(np);
if (!IS_ERR(map)) {
/* new irq mode with no DCC */
regmap_write(map, REALVIEW_SYS_LOCK_OFFSET,
gic_data.ppi_descs = kcalloc(gic_data.ppi_nr, sizeof(*gic_data.ppi_descs), GFP_KERNEL);
if (!gic_data.ppi_descs)
- return;
+ goto out_put_node;
nr_parts = of_get_child_count(parts_node);
continue;
cpu = of_cpu_node_to_id(cpu_node);
- if (WARN_ON(cpu < 0))
+ if (WARN_ON(cpu < 0)) {
+ of_node_put(cpu_node);
continue;
+ }
pr_cont("%pOF[%d] ", cpu_node, cpu);
cpumask_set_cpu(cpu, &part->mask);
+ of_node_put(cpu_node);
}
pr_cont("}\n");
vgic_set_kvm_info(&gic_v3_kvm_info);
}
+static void gic_request_region(resource_size_t base, resource_size_t size,
+ const char *name)
+{
+ if (!request_mem_region(base, size, name))
+ pr_warn_once(FW_BUG "%s region %pa has overlapping address\n",
+ name, &base);
+}
+
+static void __iomem *gic_of_iomap(struct device_node *node, int idx,
+ const char *name, struct resource *res)
+{
+ void __iomem *base;
+ int ret;
+
+ ret = of_address_to_resource(node, idx, res);
+ if (ret)
+ return IOMEM_ERR_PTR(ret);
+
+ gic_request_region(res->start, resource_size(res), name);
+ base = of_iomap(node, idx);
+
+ return base ?: IOMEM_ERR_PTR(-ENOMEM);
+}
+
static int __init gic_of_init(struct device_node *node, struct device_node *parent)
{
void __iomem *dist_base;
struct redist_region *rdist_regs;
+ struct resource res;
u64 redist_stride;
u32 nr_redist_regions;
int err, i;
- dist_base = of_io_request_and_map(node, 0, "GICD");
+ dist_base = gic_of_iomap(node, 0, "GICD", &res);
if (IS_ERR(dist_base)) {
pr_err("%pOF: unable to map gic dist registers\n", node);
return PTR_ERR(dist_base);
}
for (i = 0; i < nr_redist_regions; i++) {
- struct resource res;
- int ret;
-
- ret = of_address_to_resource(node, 1 + i, &res);
- rdist_regs[i].redist_base = of_io_request_and_map(node, 1 + i, "GICR");
- if (ret || IS_ERR(rdist_regs[i].redist_base)) {
+ rdist_regs[i].redist_base = gic_of_iomap(node, 1 + i, "GICR", &res);
+ if (IS_ERR(rdist_regs[i].redist_base)) {
pr_err("%pOF: couldn't map region %d\n", node, i);
err = -ENODEV;
goto out_unmap_rdist;
pr_err("Couldn't map GICR region @%llx\n", redist->base_address);
return -ENOMEM;
}
- request_mem_region(redist->base_address, redist->length, "GICR");
+ gic_request_region(redist->base_address, redist->length, "GICR");
gic_acpi_register_redist(redist->base_address, redist_base);
return 0;
redist_base = ioremap(gicc->gicr_base_address, size);
if (!redist_base)
return -ENOMEM;
- request_mem_region(gicc->gicr_base_address, size, "GICR");
+ gic_request_region(gicc->gicr_base_address, size, "GICR");
gic_acpi_register_redist(gicc->gicr_base_address, redist_base);
return 0;
pr_err("Unable to map GICD registers\n");
return -ENOMEM;
}
- request_mem_region(dist->base_address, ACPI_GICV3_DIST_MEM_SIZE, "GICD");
+ gic_request_region(dist->base_address, ACPI_GICV3_DIST_MEM_SIZE, "GICD");
err = gic_validate_dist_version(acpi_data.dist_base);
if (err) {
#define LIOINTC_ERRATA_IRQ 10
+#if defined(CONFIG_MIPS)
+#define liointc_core_id get_ebase_cpunum()
+#else
+#define liointc_core_id get_csr_cpuid()
+#endif
+
struct liointc_handler_data {
struct liointc_priv *priv;
u32 parent_int_map;
struct liointc_handler_data *handler = irq_desc_get_handler_data(desc);
struct irq_chip *chip = irq_desc_get_chip(desc);
struct irq_chip_generic *gc = handler->priv->gc;
- int core = cpu_logical_map(smp_processor_id()) % LIOINTC_NUM_CORES;
+ int core = liointc_core_id % LIOINTC_NUM_CORES;
u32 pending;
chained_irq_enter(chip, desc);
.name = "or1k-PIC-level",
.irq_unmask = or1k_pic_unmask,
.irq_mask = or1k_pic_mask,
- .irq_mask_ack = or1k_pic_mask_ack,
},
.handle = handle_level_irq,
.flags = IRQ_LEVEL | IRQ_NOPROBE,
if (!cpu_ictl)
return -EINVAL;
ret = of_property_read_u32(cpu_ictl, "#interrupt-cells", &tmp);
+ of_node_put(cpu_ictl);
if (ret || tmp != 1)
return -EINVAL;
- of_node_put(cpu_ictl);
cpu_int = be32_to_cpup(imap + 2);
if (cpu_int > 7 || cpu_int < 2)
{ .compatible = "socionext,uniphier-ld11-aidet" },
{ .compatible = "socionext,uniphier-ld20-aidet" },
{ .compatible = "socionext,uniphier-pxs3-aidet" },
+ { .compatible = "socionext,uniphier-nx1-aidet" },
{ /* sentinel */ }
};
* access their members!
*/
+/*
+ * For mempools pre-allocation at the table loading time.
+ */
+struct dm_md_mempools {
+ struct bio_set bs;
+ struct bio_set io_bs;
+};
+
struct mapped_device {
struct mutex suspend_lock;
/*
* io objects are allocated from here.
*/
- struct bio_set io_bs;
- struct bio_set bs;
+ struct dm_md_mempools *mempools;
/* kobject and completion */
struct dm_kobject_holder kobj_holder;
atomic_t io_count;
struct mapped_device *md;
+ struct bio *split_bio;
/* The three fields represent mapped part of original bio */
struct bio *orig_bio;
unsigned int sector_offset; /* offset to end of orig_bio */
static void stop_worker(struct era *era)
{
atomic_set(&era->suspended, 1);
- flush_workqueue(era->wq);
+ drain_workqueue(era->wq);
}
/*----------------------------------------------------------------
}
stop_worker(era);
+
+ r = metadata_commit(era->md);
+ if (r) {
+ DMERR("%s: metadata_commit failed", __func__);
+ /* FIXME: fail mode */
+ }
}
static int era_preresume(struct dm_target *ti)
/*
* Work out how many "unsigned long"s we need to hold the bitset.
*/
- bitset_size = dm_round_up(region_count,
- sizeof(*lc->clean_bits) << BYTE_SHIFT);
+ bitset_size = dm_round_up(region_count, BITS_PER_LONG);
bitset_size >>= BYTE_SHIFT;
lc->bitset_uint32_count = bitset_size / sizeof(*lc->clean_bits);
log_clear_bit(lc, lc->clean_bits, i);
/* clear any old bits -- device has shrunk */
- for (i = lc->region_count; i % (sizeof(*lc->clean_bits) << BYTE_SHIFT); i++)
+ for (i = lc->region_count; i % BITS_PER_LONG; i++)
log_clear_bit(lc, lc->clean_bits, i);
/* copy clean across to sync */
static int validate_raid_redundancy(struct raid_set *rs)
{
unsigned int i, rebuild_cnt = 0;
- unsigned int rebuilds_per_group = 0, copies;
+ unsigned int rebuilds_per_group = 0, copies, raid_disks;
unsigned int group_size, last_group_start;
- for (i = 0; i < rs->md.raid_disks; i++)
- if (!test_bit(In_sync, &rs->dev[i].rdev.flags) ||
- !rs->dev[i].rdev.sb_page)
+ for (i = 0; i < rs->raid_disks; i++)
+ if (!test_bit(FirstUse, &rs->dev[i].rdev.flags) &&
+ ((!test_bit(In_sync, &rs->dev[i].rdev.flags) ||
+ !rs->dev[i].rdev.sb_page)))
rebuild_cnt++;
switch (rs->md.level) {
* A A B B C
* C D D E E
*/
+ raid_disks = min(rs->raid_disks, rs->md.raid_disks);
if (__is_raid10_near(rs->md.new_layout)) {
- for (i = 0; i < rs->md.raid_disks; i++) {
+ for (i = 0; i < raid_disks; i++) {
if (!(i % copies))
rebuilds_per_group = 0;
if ((!rs->dev[i].rdev.sb_page ||
* results in the need to treat the last (potentially larger)
* set differently.
*/
- group_size = (rs->md.raid_disks / copies);
- last_group_start = (rs->md.raid_disks / group_size) - 1;
+ group_size = (raid_disks / copies);
+ last_group_start = (raid_disks / group_size) - 1;
last_group_start *= group_size;
- for (i = 0; i < rs->md.raid_disks; i++) {
+ for (i = 0; i < raid_disks; i++) {
if (!(i % copies) && !(i > last_group_start))
rebuilds_per_group = 0;
if ((!rs->dev[i].rdev.sb_page ||
{
int i;
- for (i = 0; i < rs->md.raid_disks; i++) {
+ for (i = 0; i < rs->raid_disks; i++) {
struct md_rdev *rdev = &rs->dev[i].rdev;
if (!test_bit(Journal, &rdev->flags) &&
if (!strcasecmp(argv[0], "idle") || !strcasecmp(argv[0], "frozen")) {
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- md_reap_sync_thread(mddev, false);
+ md_reap_sync_thread(mddev);
}
} else if (decipher_sync_action(mddev, mddev->recovery) != st_idle)
return -EBUSY;
unsigned int i;
int r = 0;
- for (i = 0; !r && i < rs->md.raid_disks; i++)
- if (rs->dev[i].data_dev)
- r = fn(ti,
- rs->dev[i].data_dev,
- 0, /* No offset on data devs */
- rs->md.dev_sectors,
- data);
+ for (i = 0; !r && i < rs->raid_disks; i++) {
+ if (rs->dev[i].data_dev) {
+ r = fn(ti, rs->dev[i].data_dev,
+ 0, /* No offset on data devs */
+ rs->md.dev_sectors, data);
+ }
+ }
return r;
}
{
int r;
- r = blk_rq_prep_clone(clone, rq, &tio->md->bs, gfp_mask,
+ r = blk_rq_prep_clone(clone, rq, &tio->md->mempools->bs, gfp_mask,
dm_rq_bio_constructor, tio);
if (r)
return r;
return 0;
}
-void dm_table_free_md_mempools(struct dm_table *t)
-{
- dm_free_md_mempools(t->mempools);
- t->mempools = NULL;
-}
-
-struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t)
-{
- return t->mempools;
-}
-
static int setup_indexes(struct dm_table *t)
{
int i;
return latch;
}
-/*
- * For mempools pre-allocation at the table loading time.
- */
-struct dm_md_mempools {
- struct bio_set bs;
- struct bio_set io_bs;
-};
-
struct table_device {
struct list_head list;
refcount_t count;
unsigned long flags;
/* Can afford locking given DM_TIO_IS_DUPLICATE_BIO */
spin_lock_irqsave(&io->lock, flags);
+ if (dm_io_flagged(io, DM_IO_ACCOUNTED)) {
+ spin_unlock_irqrestore(&io->lock, flags);
+ return;
+ }
dm_io_set_flag(io, DM_IO_ACCOUNTED);
spin_unlock_irqrestore(&io->lock, flags);
}
struct dm_target_io *tio;
struct bio *clone;
- clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->io_bs);
+ clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->mempools->io_bs);
/* Set default bdev, but target must bio_set_dev() before issuing IO */
clone->bi_bdev = md->disk->part0;
atomic_set(&io->io_count, 2);
this_cpu_inc(*md->pending_io);
io->orig_bio = bio;
+ io->split_bio = NULL;
io->md = md;
spin_lock_init(&io->lock);
io->start_time = jiffies;
} else {
struct mapped_device *md = ci->io->md;
- clone = bio_alloc_clone(NULL, ci->bio, gfp_mask, &md->bs);
+ clone = bio_alloc_clone(NULL, ci->bio, gfp_mask,
+ &md->mempools->bs);
if (!clone)
return NULL;
/* Set default bdev, but target must bio_set_dev() before issuing IO */
}
static inline struct dm_table *dm_get_live_table_bio(struct mapped_device *md,
- int *srcu_idx, struct bio *bio)
+ int *srcu_idx, unsigned bio_opf)
{
- if (bio->bi_opf & REQ_NOWAIT)
+ if (bio_opf & REQ_NOWAIT)
return dm_get_live_table_fast(md);
else
return dm_get_live_table(md, srcu_idx);
}
static inline void dm_put_live_table_bio(struct mapped_device *md, int srcu_idx,
- struct bio *bio)
+ unsigned bio_opf)
{
- if (bio->bi_opf & REQ_NOWAIT)
+ if (bio_opf & REQ_NOWAIT)
dm_put_live_table_fast(md);
else
dm_put_live_table(md, srcu_idx);
{
blk_status_t io_error;
struct mapped_device *md = io->md;
- struct bio *bio = io->orig_bio;
+ struct bio *bio = io->split_bio ? io->split_bio : io->orig_bio;
if (io->status == BLK_STS_DM_REQUEUE) {
unsigned long flags;
if (io_error == BLK_STS_AGAIN) {
/* io_uring doesn't handle BLK_STS_AGAIN (yet) */
queue_io(md, bio);
+ return;
}
}
- return;
+ if (io_error == BLK_STS_DM_REQUEUE)
+ return;
}
if (bio_is_flush_with_data(bio)) {
struct dm_io *io = tio->io;
struct mapped_device *md = io->md;
- if (likely(bio->bi_bdev != md->disk->part0)) {
- struct request_queue *q = bdev_get_queue(bio->bi_bdev);
-
- if (unlikely(error == BLK_STS_TARGET)) {
- if (bio_op(bio) == REQ_OP_DISCARD &&
- !bdev_max_discard_sectors(bio->bi_bdev))
- disable_discard(md);
- else if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
- !q->limits.max_write_zeroes_sectors)
- disable_write_zeroes(md);
- }
-
- if (static_branch_unlikely(&zoned_enabled) &&
- unlikely(blk_queue_is_zoned(q)))
- dm_zone_endio(io, bio);
+ if (unlikely(error == BLK_STS_TARGET)) {
+ if (bio_op(bio) == REQ_OP_DISCARD &&
+ !bdev_max_discard_sectors(bio->bi_bdev))
+ disable_discard(md);
+ else if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
+ !bdev_write_zeroes_sectors(bio->bi_bdev))
+ disable_write_zeroes(md);
}
+ if (static_branch_unlikely(&zoned_enabled) &&
+ unlikely(blk_queue_is_zoned(bdev_get_queue(bio->bi_bdev))))
+ dm_zone_endio(io, bio);
+
if (endio) {
int r = endio(ti, bio, &error);
switch (r) {
ti = dm_table_find_target(ci->map, ci->sector);
if (unlikely(!ti))
return BLK_STS_IOERR;
- else if (unlikely(ci->is_abnormal_io))
+
+ if (unlikely((ci->bio->bi_opf & REQ_NOWAIT) != 0) &&
+ unlikely(!dm_target_supports_nowait(ti->type)))
+ return BLK_STS_NOTSUPP;
+
+ if (unlikely(ci->is_abnormal_io))
return __process_abnormal_io(ci, ti);
/*
* Remainder must be passed to submit_bio_noacct() so it gets handled
* *after* bios already submitted have been completely processed.
*/
- bio_trim(bio, io->sectors, ci.sector_count);
- trace_block_split(bio, bio->bi_iter.bi_sector);
- bio_inc_remaining(bio);
+ WARN_ON_ONCE(!dm_io_flagged(io, DM_IO_WAS_SPLIT));
+ io->split_bio = bio_split(bio, io->sectors, GFP_NOIO,
+ &md->queue->bio_split);
+ bio_chain(io->split_bio, bio);
+ trace_block_split(io->split_bio, bio->bi_iter.bi_sector);
submit_bio_noacct(bio);
out:
/*
struct mapped_device *md = bio->bi_bdev->bd_disk->private_data;
int srcu_idx;
struct dm_table *map;
+ unsigned bio_opf = bio->bi_opf;
- map = dm_get_live_table_bio(md, &srcu_idx, bio);
+ map = dm_get_live_table_bio(md, &srcu_idx, bio_opf);
/* If suspended, or map not yet available, queue this IO for later */
if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) ||
dm_split_and_process_bio(md, map, bio);
out:
- dm_put_live_table_bio(md, srcu_idx, bio);
+ dm_put_live_table_bio(md, srcu_idx, bio_opf);
}
static bool dm_poll_dm_io(struct dm_io *io, struct io_comp_batch *iob,
{
if (md->wq)
destroy_workqueue(md->wq);
- bioset_exit(&md->bs);
- bioset_exit(&md->io_bs);
+ dm_free_md_mempools(md->mempools);
if (md->dax_dev) {
dax_remove_host(md->disk);
kvfree(md);
}
-static int __bind_mempools(struct mapped_device *md, struct dm_table *t)
-{
- struct dm_md_mempools *p = dm_table_get_md_mempools(t);
- int ret = 0;
-
- if (dm_table_bio_based(t)) {
- /*
- * The md may already have mempools that need changing.
- * If so, reload bioset because front_pad may have changed
- * because a different table was loaded.
- */
- bioset_exit(&md->bs);
- bioset_exit(&md->io_bs);
-
- } else if (bioset_initialized(&md->bs)) {
- /*
- * There's no need to reload with request-based dm
- * because the size of front_pad doesn't change.
- * Note for future: If you are to reload bioset,
- * prep-ed requests in the queue may refer
- * to bio from the old bioset, so you must walk
- * through the queue to unprep.
- */
- goto out;
- }
-
- BUG_ON(!p ||
- bioset_initialized(&md->bs) ||
- bioset_initialized(&md->io_bs));
-
- ret = bioset_init_from_src(&md->bs, &p->bs);
- if (ret)
- goto out;
- ret = bioset_init_from_src(&md->io_bs, &p->io_bs);
- if (ret)
- bioset_exit(&md->bs);
-out:
- /* mempool bind completed, no longer need any mempools in the table */
- dm_table_free_md_mempools(t);
- return ret;
-}
-
/*
* Bind a table to the device.
*/
* immutable singletons - used to optimize dm_mq_queue_rq.
*/
md->immutable_target = dm_table_get_immutable_target(t);
- }
- ret = __bind_mempools(md, t);
- if (ret) {
- old_map = ERR_PTR(ret);
- goto out;
+ /*
+ * There is no need to reload with request-based dm because the
+ * size of front_pad doesn't change.
+ *
+ * Note for future: If you are to reload bioset, prep-ed
+ * requests in the queue may refer to bio from the old bioset,
+ * so you must walk through the queue to unprep.
+ */
+ if (!md->mempools) {
+ md->mempools = t->mempools;
+ t->mempools = NULL;
+ }
+ } else {
+ /*
+ * The md may already have mempools that need changing.
+ * If so, reload bioset because front_pad may have changed
+ * because a different table was loaded.
+ */
+ dm_free_md_mempools(md->mempools);
+ md->mempools = t->mempools;
+ t->mempools = NULL;
}
ret = dm_table_set_restrictions(t, md->queue, limits);
struct dm_target *dm_table_get_wildcard_target(struct dm_table *t);
bool dm_table_bio_based(struct dm_table *t);
bool dm_table_request_based(struct dm_table *t);
-void dm_table_free_md_mempools(struct dm_table *t);
-struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t);
void dm_lock_md_type(struct mapped_device *md);
void dm_unlock_md_type(struct mapped_device *md);
flush_workqueue(md_misc_wq);
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- md_reap_sync_thread(mddev, true);
+ md_reap_sync_thread(mddev);
}
mddev_unlock(mddev);
}
flush_workqueue(md_misc_wq);
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- md_reap_sync_thread(mddev, true);
+ md_reap_sync_thread(mddev);
}
del_timer_sync(&mddev->safemode_timer);
* ->spare_active and clear saved_raid_disk
*/
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
- md_reap_sync_thread(mddev, true);
+ md_reap_sync_thread(mddev);
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
clear_bit(MD_SB_CHANGE_PENDING, &mddev->sb_flags);
goto unlock;
}
if (mddev->sync_thread) {
- md_reap_sync_thread(mddev, true);
+ md_reap_sync_thread(mddev);
goto unlock;
}
/* Set RUNNING before clearing NEEDED to avoid
}
EXPORT_SYMBOL(md_check_recovery);
-void md_reap_sync_thread(struct mddev *mddev, bool reconfig_mutex_held)
+void md_reap_sync_thread(struct mddev *mddev)
{
struct md_rdev *rdev;
sector_t old_dev_sectors = mddev->dev_sectors;
bool is_reshaped = false;
- if (reconfig_mutex_held)
- mddev_unlock(mddev);
/* resync has finished, collect result */
md_unregister_thread(&mddev->sync_thread);
- if (reconfig_mutex_held)
- mddev_lock_nointr(mddev);
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
mddev->degraded != mddev->raid_disks) {
extern void md_unregister_thread(struct md_thread **threadp);
extern void md_wakeup_thread(struct md_thread *thread);
extern void md_check_recovery(struct mddev *mddev);
-extern void md_reap_sync_thread(struct mddev *mddev, bool reconfig_mutex_held);
+extern void md_reap_sync_thread(struct mddev *mddev);
extern int mddev_init_writes_pending(struct mddev *mddev);
extern bool md_write_start(struct mddev *mddev, struct bio *bi);
extern void md_write_inc(struct mddev *mddev, struct bio *bi);
if (bdev) {
struct bio *bio;
- bio = bio_alloc_bioset(bdev, 0, GFP_NOIO,
+ bio = bio_alloc_bioset(bdev, 0,
REQ_OP_WRITE | REQ_PREFLUSH,
- &ppl_conf->flush_bs);
+ GFP_NOIO, &ppl_conf->flush_bs);
bio->bi_private = io;
bio->bi_end_io = ppl_flush_endio;
goto abort;
conf->mddev = mddev;
- if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL)
+ ret = -ENOMEM;
+ conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL);
+ if (!conf->stripe_hashtbl)
goto abort;
/* We init hash_locks[0] separately to that it can be used
int err = 0;
int number = rdev->raid_disk;
struct md_rdev __rcu **rdevp;
- struct disk_info *p = conf->disks + number;
+ struct disk_info *p;
struct md_rdev *tmp;
print_raid5_conf(conf);
log_exit(conf);
return 0;
}
+ if (unlikely(number >= conf->pool_size))
+ return 0;
+ p = conf->disks + number;
if (rdev == rcu_access_pointer(p->rdev))
rdevp = &p->rdev;
else if (rdev == rcu_access_pointer(p->replacement))
*/
if (rdev->saved_raid_disk >= 0 &&
rdev->saved_raid_disk >= first &&
+ rdev->saved_raid_disk <= last &&
conf->disks[rdev->saved_raid_disk].rdev == NULL)
first = rdev->saved_raid_disk;
config OMAP_GPMC
tristate "Texas Instruments OMAP SoC GPMC driver"
depends on OF_ADDRESS
+ depends on ARCH_OMAP2PLUS || ARCH_KEYSTONE || ARCH_K3 || COMPILE_TEST
select GPIOLIB
help
This driver is for the General Purpose Memory Controller (GPMC)
of_node_put(smi_com_node);
if (smi_com_pdev) {
/* smi common is the supplier, Make sure it is ready before */
- if (!platform_get_drvdata(smi_com_pdev))
+ if (!platform_get_drvdata(smi_com_pdev)) {
+ put_device(&smi_com_pdev->dev);
return -EPROBE_DEFER;
+ }
smi_com_dev = &smi_com_pdev->dev;
link = device_link_add(dev, smi_com_dev,
DL_FLAG_PM_RUNTIME | DL_FLAG_STATELESS);
if (!link) {
dev_err(dev, "Unable to link smi-common dev\n");
+ put_device(&smi_com_pdev->dev);
return -ENODEV;
}
*com_dev = smi_com_dev;
dmc->timing_row = devm_kmalloc_array(dmc->dev, TIMING_COUNT,
sizeof(u32), GFP_KERNEL);
- if (!dmc->timing_row)
- return -ENOMEM;
+ if (!dmc->timing_row) {
+ ret = -ENOMEM;
+ goto put_node;
+ }
dmc->timing_data = devm_kmalloc_array(dmc->dev, TIMING_COUNT,
sizeof(u32), GFP_KERNEL);
- if (!dmc->timing_data)
- return -ENOMEM;
+ if (!dmc->timing_data) {
+ ret = -ENOMEM;
+ goto put_node;
+ }
dmc->timing_power = devm_kmalloc_array(dmc->dev, TIMING_COUNT,
sizeof(u32), GFP_KERNEL);
- if (!dmc->timing_power)
- return -ENOMEM;
+ if (!dmc->timing_power) {
+ ret = -ENOMEM;
+ goto put_node;
+ }
dmc->timings = of_lpddr3_get_ddr_timings(np_ddr, dmc->dev,
DDR_TYPE_LPDDR3,
&dmc->timings_arr_size);
if (!dmc->timings) {
- of_node_put(np_ddr);
dev_warn(dmc->dev, "could not get timings from DT\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto put_node;
}
dmc->min_tck = of_lpddr3_get_min_tck(np_ddr, dmc->dev);
if (!dmc->min_tck) {
- of_node_put(np_ddr);
dev_warn(dmc->dev, "could not get tck from DT\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto put_node;
}
/* Sorted array of OPPs with frequency ascending */
clk_period_ps);
}
- of_node_put(np_ddr);
/* Take the highest frequency's timings as 'bypass' */
dmc->bypass_timing_row = dmc->timing_row[idx - 1];
dmc->bypass_timing_data = dmc->timing_data[idx - 1];
dmc->bypass_timing_power = dmc->timing_power[idx - 1];
+put_node:
+ of_node_put(np_ddr);
return ret;
}
clk_disable_unprepare(ssc->clk);
ssc->irq = platform_get_irq(pdev, 0);
- if (!ssc->irq) {
+ if (ssc->irq < 0) {
dev_dbg(&pdev->dev, "could not get irq\n");
- return -ENXIO;
+ return ssc->irq;
}
mutex_lock(&user_lock);
// default
setting_reg1 = PCR_SETTING_REG1;
setting_reg2 = PCR_SETTING_REG2;
+ } else {
+ return;
}
pci_read_config_dword(pdev, setting_reg2, &lval2);
ucr->pusb_dev = usb_dev;
- ucr->iobuf = usb_alloc_coherent(ucr->pusb_dev, IOBUF_SIZE,
- GFP_KERNEL, &ucr->iobuf_dma);
- if (!ucr->iobuf)
+ ucr->cmd_buf = kmalloc(IOBUF_SIZE, GFP_KERNEL);
+ if (!ucr->cmd_buf)
return -ENOMEM;
+ ucr->rsp_buf = kmalloc(IOBUF_SIZE, GFP_KERNEL);
+ if (!ucr->rsp_buf) {
+ ret = -ENOMEM;
+ goto out_free_cmd_buf;
+ }
+
usb_set_intfdata(intf, ucr);
ucr->vendor_id = id->idVendor;
ucr->product_id = id->idProduct;
- ucr->cmd_buf = ucr->rsp_buf = ucr->iobuf;
mutex_init(&ucr->dev_mutex);
out_init_fail:
usb_set_intfdata(ucr->pusb_intf, NULL);
- usb_free_coherent(ucr->pusb_dev, IOBUF_SIZE, ucr->iobuf,
- ucr->iobuf_dma);
+ kfree(ucr->rsp_buf);
+ ucr->rsp_buf = NULL;
+out_free_cmd_buf:
+ kfree(ucr->cmd_buf);
+ ucr->cmd_buf = NULL;
return ret;
}
mfd_remove_devices(&intf->dev);
usb_set_intfdata(ucr->pusb_intf, NULL);
- usb_free_coherent(ucr->pusb_dev, IOBUF_SIZE, ucr->iobuf,
- ucr->iobuf_dma);
+
+ kfree(ucr->cmd_buf);
+ ucr->cmd_buf = NULL;
+
+ kfree(ucr->rsp_buf);
+ ucr->rsp_buf = NULL;
}
#ifdef CONFIG_PM
{
struct at25_data *at25 = priv;
char *buf = val;
+ size_t max_chunk = spi_max_transfer_size(at25->spi);
+ unsigned int msg_offset = offset;
+ size_t bytes_left = count;
+ size_t segment;
u8 *cp;
ssize_t status;
struct spi_transfer t[2];
if (unlikely(!count))
return -EINVAL;
- cp = at25->command;
+ do {
+ segment = min(bytes_left, max_chunk);
+ cp = at25->command;
- instr = AT25_READ;
- if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR)
- if (offset >= BIT(at25->addrlen * 8))
- instr |= AT25_INSTR_BIT3;
+ instr = AT25_READ;
+ if (at25->chip.flags & EE_INSTR_BIT3_IS_ADDR)
+ if (msg_offset >= BIT(at25->addrlen * 8))
+ instr |= AT25_INSTR_BIT3;
- mutex_lock(&at25->lock);
+ mutex_lock(&at25->lock);
- *cp++ = instr;
-
- /* 8/16/24-bit address is written MSB first */
- switch (at25->addrlen) {
- default: /* case 3 */
- *cp++ = offset >> 16;
- fallthrough;
- case 2:
- *cp++ = offset >> 8;
- fallthrough;
- case 1:
- case 0: /* can't happen: for better code generation */
- *cp++ = offset >> 0;
- }
+ *cp++ = instr;
- spi_message_init(&m);
- memset(t, 0, sizeof(t));
+ /* 8/16/24-bit address is written MSB first */
+ switch (at25->addrlen) {
+ default: /* case 3 */
+ *cp++ = msg_offset >> 16;
+ fallthrough;
+ case 2:
+ *cp++ = msg_offset >> 8;
+ fallthrough;
+ case 1:
+ case 0: /* can't happen: for better code generation */
+ *cp++ = msg_offset >> 0;
+ }
- t[0].tx_buf = at25->command;
- t[0].len = at25->addrlen + 1;
- spi_message_add_tail(&t[0], &m);
+ spi_message_init(&m);
+ memset(t, 0, sizeof(t));
- t[1].rx_buf = buf;
- t[1].len = count;
- spi_message_add_tail(&t[1], &m);
+ t[0].tx_buf = at25->command;
+ t[0].len = at25->addrlen + 1;
+ spi_message_add_tail(&t[0], &m);
- /*
- * Read it all at once.
- *
- * REVISIT that's potentially a problem with large chips, if
- * other devices on the bus need to be accessed regularly or
- * this chip is clocked very slowly.
- */
- status = spi_sync(at25->spi, &m);
- dev_dbg(&at25->spi->dev, "read %zu bytes at %d --> %zd\n",
- count, offset, status);
+ t[1].rx_buf = buf;
+ t[1].len = segment;
+ spi_message_add_tail(&t[1], &m);
- mutex_unlock(&at25->lock);
- return status;
+ status = spi_sync(at25->spi, &m);
+
+ mutex_unlock(&at25->lock);
+
+ if (status)
+ return status;
+
+ msg_offset += segment;
+ buf += segment;
+ bytes_left -= segment;
+ } while (bytes_left > 0);
+
+ dev_dbg(&at25->spi->dev, "read %zu bytes at %d\n",
+ count, offset);
+ return 0;
}
/* Read extra registers as ID or serial number */
static int at25_ee_write(void *priv, unsigned int off, void *val, size_t count)
{
struct at25_data *at25 = priv;
+ size_t maxsz = spi_max_transfer_size(at25->spi);
const char *buf = val;
int status = 0;
unsigned buf_size;
do {
unsigned long timeout, retries;
unsigned segment;
- unsigned offset = (unsigned) off;
+ unsigned offset = off;
u8 *cp = bounce;
int sr;
u8 instr;
segment = buf_size - (offset % buf_size);
if (segment > count)
segment = count;
+ if (segment > maxsz)
+ segment = maxsz;
memcpy(cp, buf, segment);
status = spi_write(at25->spi, bounce,
segment + at25->addrlen + 1);
lkdtm-$(CONFIG_LKDTM) += fortify.o
lkdtm-$(CONFIG_PPC_64S_HASH_MMU) += powerpc.o
-KASAN_SANITIZE_rodata.o := n
KASAN_SANITIZE_stackleak.o := n
-KCOV_INSTRUMENT_rodata.o := n
-CFLAGS_REMOVE_rodata.o += $(CC_FLAGS_LTO)
+
+KASAN_SANITIZE_rodata.o := n
+KCSAN_SANITIZE_rodata.o := n
+KCOV_INSTRUMENT_rodata.o := n
+OBJECT_FILES_NON_STANDARD_rodata.o := y
+CFLAGS_REMOVE_rodata.o += $(CC_FLAGS_LTO) $(RETHUNK_CFLAGS)
OBJCOPYFLAGS :=
OBJCOPYFLAGS_rodata_objcopy.o := \
if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
dev->hbm_state != MEI_HBM_CAP_SETUP) {
- if (dev->dev_state == MEI_DEV_POWER_DOWN) {
+ if (dev->dev_state == MEI_DEV_POWER_DOWN ||
+ dev->dev_state == MEI_DEV_POWERING_DOWN) {
dev_dbg(dev->dev, "hbm: capabilities response: on shutdown, ignoring\n");
return 0;
}
#define MEI_DEV_ID_ADP_P 0x51E0 /* Alder Lake Point P */
#define MEI_DEV_ID_ADP_N 0x54E0 /* Alder Lake Point N */
+#define MEI_DEV_ID_RPL_S 0x7A68 /* Raptor Lake Point S */
+
/*
* MEI HW Section
*/
ret = mei_me_d0i3_exit_sync(dev);
if (ret)
return ret;
+ } else {
+ hw->pg_state = MEI_PG_OFF;
}
}
{MEI_PCI_DEVICE(MEI_DEV_ID_ADP_P, MEI_ME_PCH15_CFG)},
{MEI_PCI_DEVICE(MEI_DEV_ID_ADP_N, MEI_ME_PCH15_CFG)},
+ {MEI_PCI_DEVICE(MEI_DEV_ID_RPL_S, MEI_ME_PCH15_CFG)},
+
/* required last entry */
{0, }
};
err = mmc_cqe_recovery(host);
if (err)
mmc_blk_reset(mq->blkdata, host, MMC_BLK_CQE_RECOVERY);
- else
- mmc_blk_reset_success(mq->blkdata, MMC_BLK_CQE_RECOVERY);
+ mmc_blk_reset_success(mq->blkdata, MMC_BLK_CQE_RECOVERY);
pr_debug("%s: CQE recovery done\n", mmc_hostname(host));
}
msdc_request_done(host, mrq);
}
-static bool msdc_data_xfer_done(struct msdc_host *host, u32 events,
+static void msdc_data_xfer_done(struct msdc_host *host, u32 events,
struct mmc_request *mrq, struct mmc_data *data)
{
struct mmc_command *stop;
spin_unlock_irqrestore(&host->lock, flags);
if (done)
- return true;
+ return;
stop = data->stop;
if (check_data || (stop && stop->error)) {
sdr_set_field(host->base + MSDC_DMA_CTRL, MSDC_DMA_CTRL_STOP,
1);
+ ret = readl_poll_timeout_atomic(host->base + MSDC_DMA_CTRL, val,
+ !(val & MSDC_DMA_CTRL_STOP), 1, 20000);
+ if (ret)
+ dev_dbg(host->dev, "DMA stop timed out\n");
+
ret = readl_poll_timeout_atomic(host->base + MSDC_DMA_CFG, val,
!(val & MSDC_DMA_CFG_STS), 1, 20000);
- if (ret) {
- dev_dbg(host->dev, "DMA stop timed out\n");
- return false;
- }
+ if (ret)
+ dev_dbg(host->dev, "DMA inactive timed out\n");
sdr_clr_bits(host->base + MSDC_INTEN, data_ints_mask);
dev_dbg(host->dev, "DMA stop\n");
}
msdc_data_xfer_next(host, mrq);
- done = true;
}
- return done;
}
static void msdc_set_buswidth(struct msdc_host *host, u32 width)
if (recovery) {
sdr_set_field(host->base + MSDC_DMA_CTRL,
MSDC_DMA_CTRL_STOP, 1);
+ if (WARN_ON(readl_poll_timeout(host->base + MSDC_DMA_CTRL, val,
+ !(val & MSDC_DMA_CTRL_STOP), 1, 3000)))
+ return;
if (WARN_ON(readl_poll_timeout(host->base + MSDC_DMA_CFG, val,
!(val & MSDC_DMA_CFG_STS), 1, 3000)))
return;
/*
* omap_device_pm_domain has callbacks to enable the main
* functional clock, interface clock and also configure the
- * SYSCONFIG register of omap devices. The callback will be invoked
- * as part of pm_runtime_get_sync.
+ * SYSCONFIG register to clear any boot loader set voltage
+ * capabilities before calling sdhci_setup_host(). The
+ * callback will be invoked as part of pm_runtime_get_sync.
*/
pm_runtime_use_autosuspend(dev);
pm_runtime_set_autosuspend_delay(dev, 50);
struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
struct sdhci_omap_host *omap_host = sdhci_pltfm_priv(pltfm_host);
- sdhci_runtime_suspend_host(host);
+ if (omap_host->con != -EINVAL)
+ sdhci_runtime_suspend_host(host);
sdhci_omap_context_save(omap_host);
pinctrl_pm_select_default_state(dev);
- if (omap_host->con != -EINVAL)
+ if (omap_host->con != -EINVAL) {
sdhci_omap_context_restore(omap_host);
-
- sdhci_runtime_resume_host(host, 0);
+ sdhci_runtime_resume_host(host, 0);
+ }
return 0;
}
struct sdhci_host *host = slot->host;
u16 clock;
+ if (host->mmc->ios.power_mode != MMC_POWER_ON)
+ return 0;
+
clock = sdhci_readw(host, SDHCI_CLOCK_CONTROL);
clock |= SDHCI_CLOCK_PLL_EN;
if (!(sdhci_readw(host, O2_PLL_DLL_WDT_CONTROL1) & O2_PLL_LOCK_STATUS))
sdhci_o2_enable_internal_clock(host);
+ else
+ sdhci_o2_wait_card_detect_stable(host);
return !!(sdhci_readl(host, SDHCI_PRESENT_STATE) & SDHCI_CARD_PRESENT);
}
unsigned int tRP_ps;
bool use_half_period;
int sample_delay_ps, sample_delay_factor;
- u16 busy_timeout_cycles;
+ unsigned int busy_timeout_cycles;
u8 wrn_dly_sel;
unsigned long clk_rate, min_rate;
+ u64 busy_timeout_ps;
if (sdr->tRC_min >= 30000) {
/* ONFI non-EDO modes [0-3] */
addr_setup_cycles = TO_CYCLES(sdr->tALS_min, period_ps);
data_setup_cycles = TO_CYCLES(sdr->tDS_min, period_ps);
data_hold_cycles = TO_CYCLES(sdr->tDH_min, period_ps);
- busy_timeout_cycles = TO_CYCLES(sdr->tWB_max + sdr->tR_max, period_ps);
+ busy_timeout_ps = max(sdr->tBERS_max, sdr->tPROG_max);
+ busy_timeout_cycles = TO_CYCLES(busy_timeout_ps, period_ps);
hw->timing0 = BF_GPMI_TIMING0_ADDRESS_SETUP(addr_setup_cycles) |
BF_GPMI_TIMING0_DATA_HOLD(data_hold_cycles) |
BF_GPMI_TIMING0_DATA_SETUP(data_setup_cycles);
- hw->timing1 = BF_GPMI_TIMING1_BUSY_TIMEOUT(busy_timeout_cycles * 4096);
+ hw->timing1 = BF_GPMI_TIMING1_BUSY_TIMEOUT(DIV_ROUND_UP(busy_timeout_cycles, 4096));
/*
* Derive NFC ideal delay from {3}:
{"TC58NVG0S3E 1G 3.3V 8-bit",
{ .id = {0x98, 0xd1, 0x90, 0x15, 0x76, 0x14, 0x01, 0x00} },
SZ_2K, SZ_128, SZ_128K, 0, 8, 64, NAND_ECC_INFO(1, SZ_512), },
- {"TC58NVG0S3HTA00 1G 3.3V 8-bit",
- { .id = {0x98, 0xf1, 0x80, 0x15} },
- SZ_2K, SZ_128, SZ_128K, 0, 4, 128, NAND_ECC_INFO(8, SZ_512), },
{"TC58NVG2S0F 4G 3.3V 8-bit",
{ .id = {0x98, 0xdc, 0x90, 0x26, 0x76, 0x15, 0x01, 0x08} },
SZ_4K, SZ_512, SZ_256K, 0, 8, 224, NAND_ECC_INFO(4, SZ_512) },
select CRYPTO_CURVE25519_NEON if ARM && KERNEL_MODE_NEON
select CRYPTO_CHACHA_MIPS if CPU_MIPS32_R2
select CRYPTO_POLY1305_MIPS if MIPS
+ select CRYPTO_CHACHA_S390 if S390
help
WireGuard is a secure, fast, and easy to use replacement for IPSec
that uses modern cryptography and clever networking tricks. It's
};
static char *type_str[] = {
+ "", /* Type 0 is not defined */
"AMT_MSG_DISCOVERY",
"AMT_MSG_ADVERTISEMENT",
"AMT_MSG_REQUEST",
ihv3->nsrcs = 0;
ihv3->resv = 0;
ihv3->suppress = false;
- ihv3->qrv = amt->net->ipv4.sysctl_igmp_qrv;
+ ihv3->qrv = READ_ONCE(amt->net->ipv4.sysctl_igmp_qrv);
ihv3->csum = 0;
csum = &ihv3->csum;
csum_start = (void *)ihv3;
return skb;
}
-static void __amt_update_gw_status(struct amt_dev *amt, enum amt_status status,
- bool validate)
+static void amt_update_gw_status(struct amt_dev *amt, enum amt_status status,
+ bool validate)
{
if (validate && amt->status >= status)
return;
netdev_dbg(amt->dev, "Update GW status %s -> %s",
status_str[amt->status], status_str[status]);
- amt->status = status;
+ WRITE_ONCE(amt->status, status);
}
static void __amt_update_relay_status(struct amt_tunnel_list *tunnel,
tunnel->status = status;
}
-static void amt_update_gw_status(struct amt_dev *amt, enum amt_status status,
- bool validate)
-{
- spin_lock_bh(&amt->lock);
- __amt_update_gw_status(amt, status, validate);
- spin_unlock_bh(&amt->lock);
-}
-
static void amt_update_relay_status(struct amt_tunnel_list *tunnel,
enum amt_status status, bool validate)
{
if (unlikely(net_xmit_eval(err)))
amt->dev->stats.tx_errors++;
- spin_lock_bh(&amt->lock);
- __amt_update_gw_status(amt, AMT_STATUS_SENT_DISCOVERY, true);
- spin_unlock_bh(&amt->lock);
+ amt_update_gw_status(amt, AMT_STATUS_SENT_DISCOVERY, true);
out:
rcu_read_unlock();
}
}
#endif
+static bool amt_queue_event(struct amt_dev *amt, enum amt_event event,
+ struct sk_buff *skb)
+{
+ int index;
+
+ spin_lock_bh(&amt->lock);
+ if (amt->nr_events >= AMT_MAX_EVENTS) {
+ spin_unlock_bh(&amt->lock);
+ return 1;
+ }
+
+ index = (amt->event_idx + amt->nr_events) % AMT_MAX_EVENTS;
+ amt->events[index].event = event;
+ amt->events[index].skb = skb;
+ amt->nr_events++;
+ amt->event_idx %= AMT_MAX_EVENTS;
+ queue_work(amt_wq, &amt->event_wq);
+ spin_unlock_bh(&amt->lock);
+
+ return 0;
+}
+
static void amt_secret_work(struct work_struct *work)
{
struct amt_dev *amt = container_of(to_delayed_work(work),
msecs_to_jiffies(AMT_SECRET_TIMEOUT));
}
-static void amt_discovery_work(struct work_struct *work)
+static void amt_event_send_discovery(struct amt_dev *amt)
{
- struct amt_dev *amt = container_of(to_delayed_work(work),
- struct amt_dev,
- discovery_wq);
-
- spin_lock_bh(&amt->lock);
if (amt->status > AMT_STATUS_SENT_DISCOVERY)
goto out;
get_random_bytes(&amt->nonce, sizeof(__be32));
- spin_unlock_bh(&amt->lock);
amt_send_discovery(amt);
- spin_lock_bh(&amt->lock);
out:
mod_delayed_work(amt_wq, &amt->discovery_wq,
msecs_to_jiffies(AMT_DISCOVERY_TIMEOUT));
- spin_unlock_bh(&amt->lock);
}
-static void amt_req_work(struct work_struct *work)
+static void amt_discovery_work(struct work_struct *work)
{
struct amt_dev *amt = container_of(to_delayed_work(work),
struct amt_dev,
- req_wq);
+ discovery_wq);
+
+ if (amt_queue_event(amt, AMT_EVENT_SEND_DISCOVERY, NULL))
+ mod_delayed_work(amt_wq, &amt->discovery_wq,
+ msecs_to_jiffies(AMT_DISCOVERY_TIMEOUT));
+}
+
+static void amt_event_send_request(struct amt_dev *amt)
+{
u32 exp;
- spin_lock_bh(&amt->lock);
if (amt->status < AMT_STATUS_RECEIVED_ADVERTISEMENT)
goto out;
if (amt->req_cnt > AMT_MAX_REQ_COUNT) {
netdev_dbg(amt->dev, "Gateway is not ready");
amt->qi = AMT_INIT_REQ_TIMEOUT;
- amt->ready4 = false;
- amt->ready6 = false;
+ WRITE_ONCE(amt->ready4, false);
+ WRITE_ONCE(amt->ready6, false);
amt->remote_ip = 0;
- __amt_update_gw_status(amt, AMT_STATUS_INIT, false);
+ amt_update_gw_status(amt, AMT_STATUS_INIT, false);
amt->req_cnt = 0;
+ amt->nonce = 0;
goto out;
}
- spin_unlock_bh(&amt->lock);
+
+ if (!amt->req_cnt) {
+ WRITE_ONCE(amt->ready4, false);
+ WRITE_ONCE(amt->ready6, false);
+ get_random_bytes(&amt->nonce, sizeof(__be32));
+ }
amt_send_request(amt, false);
amt_send_request(amt, true);
- spin_lock_bh(&amt->lock);
- __amt_update_gw_status(amt, AMT_STATUS_SENT_REQUEST, true);
+ amt_update_gw_status(amt, AMT_STATUS_SENT_REQUEST, true);
amt->req_cnt++;
out:
exp = min_t(u32, (1 * (1 << amt->req_cnt)), AMT_MAX_REQ_TIMEOUT);
mod_delayed_work(amt_wq, &amt->req_wq, msecs_to_jiffies(exp * 1000));
- spin_unlock_bh(&amt->lock);
+}
+
+static void amt_req_work(struct work_struct *work)
+{
+ struct amt_dev *amt = container_of(to_delayed_work(work),
+ struct amt_dev,
+ req_wq);
+
+ if (amt_queue_event(amt, AMT_EVENT_SEND_REQUEST, NULL))
+ mod_delayed_work(amt_wq, &amt->req_wq,
+ msecs_to_jiffies(100));
}
static bool amt_send_membership_update(struct amt_dev *amt,
/* Gateway only passes IGMP/MLD packets */
if (!report)
goto free;
- if ((!v6 && !amt->ready4) || (v6 && !amt->ready6))
+ if ((!v6 && !READ_ONCE(amt->ready4)) ||
+ (v6 && !READ_ONCE(amt->ready6)))
goto free;
if (amt_send_membership_update(amt, skb, v6))
goto free;
struct amt_header_advertisement *amta;
int hdr_size;
- hdr_size = sizeof(*amta) - sizeof(struct amt_header);
-
+ hdr_size = sizeof(*amta) + sizeof(struct udphdr);
if (!pskb_may_pull(skb, hdr_size))
return true;
ipv4_is_zeronet(amta->ip4))
return true;
+ if (amt->status != AMT_STATUS_SENT_DISCOVERY ||
+ amt->nonce != amta->nonce)
+ return true;
+
amt->remote_ip = amta->ip4;
netdev_dbg(amt->dev, "advertised remote ip = %pI4\n", &amt->remote_ip);
mod_delayed_work(amt_wq, &amt->req_wq, 0);
struct ethhdr *eth;
struct iphdr *iph;
+ if (READ_ONCE(amt->status) != AMT_STATUS_SENT_UPDATE)
+ return true;
+
+ hdr_size = sizeof(*amtmd) + sizeof(struct udphdr);
+ if (!pskb_may_pull(skb, hdr_size))
+ return true;
+
amtmd = (struct amt_header_mcast_data *)(udp_hdr(skb) + 1);
if (amtmd->reserved || amtmd->version)
return true;
- hdr_size = sizeof(*amtmd) + sizeof(struct udphdr);
if (iptunnel_pull_header(skb, hdr_size, htons(ETH_P_IP), false))
return true;
+
skb_reset_network_header(skb);
skb_push(skb, sizeof(*eth));
skb_reset_mac_header(skb);
skb_pull(skb, sizeof(*eth));
eth = eth_hdr(skb);
+
+ if (!pskb_may_pull(skb, sizeof(*iph)))
+ return true;
iph = ip_hdr(skb);
+
if (iph->version == 4) {
if (!ipv4_is_multicast(iph->daddr))
return true;
} else if (iph->version == 6) {
struct ipv6hdr *ip6h;
+ if (!pskb_may_pull(skb, sizeof(*ip6h)))
+ return true;
+
ip6h = ipv6_hdr(skb);
if (!ipv6_addr_is_multicast(&ip6h->daddr))
return true;
struct iphdr *iph;
int hdr_size, len;
- hdr_size = sizeof(*amtmq) - sizeof(struct amt_header);
-
+ hdr_size = sizeof(*amtmq) + sizeof(struct udphdr);
if (!pskb_may_pull(skb, hdr_size))
return true;
if (amtmq->reserved || amtmq->version)
return true;
- hdr_size = sizeof(*amtmq) + sizeof(struct udphdr) - sizeof(*eth);
+ if (amtmq->nonce != amt->nonce)
+ return true;
+
+ hdr_size -= sizeof(*eth);
if (iptunnel_pull_header(skb, hdr_size, htons(ETH_P_TEB), false))
return true;
+
oeth = eth_hdr(skb);
skb_reset_mac_header(skb);
skb_pull(skb, sizeof(*eth));
skb_reset_network_header(skb);
eth = eth_hdr(skb);
+ if (!pskb_may_pull(skb, sizeof(*iph)))
+ return true;
+
iph = ip_hdr(skb);
if (iph->version == 4) {
- if (!ipv4_is_multicast(iph->daddr))
+ if (READ_ONCE(amt->ready4))
return true;
+
if (!pskb_may_pull(skb, sizeof(*iph) + AMT_IPHDR_OPTS +
sizeof(*ihv3)))
return true;
+ if (!ipv4_is_multicast(iph->daddr))
+ return true;
+
ihv3 = skb_pull(skb, sizeof(*iph) + AMT_IPHDR_OPTS);
skb_reset_transport_header(skb);
skb_push(skb, sizeof(*iph) + AMT_IPHDR_OPTS);
- spin_lock_bh(&amt->lock);
- amt->ready4 = true;
+ WRITE_ONCE(amt->ready4, true);
amt->mac = amtmq->response_mac;
amt->req_cnt = 0;
amt->qi = ihv3->qqic;
- spin_unlock_bh(&amt->lock);
skb->protocol = htons(ETH_P_IP);
eth->h_proto = htons(ETH_P_IP);
ip_eth_mc_map(iph->daddr, eth->h_dest);
#if IS_ENABLED(CONFIG_IPV6)
} else if (iph->version == 6) {
- struct ipv6hdr *ip6h = ipv6_hdr(skb);
struct mld2_query *mld2q;
+ struct ipv6hdr *ip6h;
- if (!ipv6_addr_is_multicast(&ip6h->daddr))
+ if (READ_ONCE(amt->ready6))
return true;
+
if (!pskb_may_pull(skb, sizeof(*ip6h) + AMT_IP6HDR_OPTS +
sizeof(*mld2q)))
return true;
+ ip6h = ipv6_hdr(skb);
+ if (!ipv6_addr_is_multicast(&ip6h->daddr))
+ return true;
+
mld2q = skb_pull(skb, sizeof(*ip6h) + AMT_IP6HDR_OPTS);
skb_reset_transport_header(skb);
skb_push(skb, sizeof(*ip6h) + AMT_IP6HDR_OPTS);
- spin_lock_bh(&amt->lock);
- amt->ready6 = true;
+ WRITE_ONCE(amt->ready6, true);
amt->mac = amtmq->response_mac;
amt->req_cnt = 0;
amt->qi = mld2q->mld2q_qqic;
- spin_unlock_bh(&amt->lock);
skb->protocol = htons(ETH_P_IPV6);
eth->h_proto = htons(ETH_P_IPV6);
ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest);
skb->pkt_type = PACKET_MULTICAST;
skb->ip_summed = CHECKSUM_NONE;
len = skb->len;
+ local_bh_disable();
if (__netif_rx(skb) == NET_RX_SUCCESS) {
amt_update_gw_status(amt, AMT_STATUS_RECEIVED_QUERY, true);
dev_sw_netstats_rx_add(amt->dev, len);
} else {
amt->dev->stats.rx_dropped++;
}
+ local_bh_enable();
return false;
}
{
struct amt_header_membership_update *amtmu;
struct amt_tunnel_list *tunnel;
- struct udphdr *udph;
struct ethhdr *eth;
struct iphdr *iph;
- int len;
+ int len, hdr_size;
iph = ip_hdr(skb);
- udph = udp_hdr(skb);
- if (__iptunnel_pull_header(skb, sizeof(*udph), skb->protocol,
- false, false))
+ hdr_size = sizeof(*amtmu) + sizeof(struct udphdr);
+ if (!pskb_may_pull(skb, hdr_size))
return true;
- amtmu = (struct amt_header_membership_update *)skb->data;
+ amtmu = (struct amt_header_membership_update *)(udp_hdr(skb) + 1);
if (amtmu->reserved || amtmu->version)
return true;
- skb_pull(skb, sizeof(*amtmu));
+ if (iptunnel_pull_header(skb, hdr_size, skb->protocol, false))
+ return true;
+
skb_reset_network_header(skb);
list_for_each_entry_rcu(tunnel, &amt->tunnel_list, list) {
return true;
report:
+ if (!pskb_may_pull(skb, sizeof(*iph)))
+ return true;
+
iph = ip_hdr(skb);
if (iph->version == 4) {
if (ip_mc_check_igmp(skb)) {
if (tunnel->ip4 == iph->saddr)
goto send;
+ spin_lock_bh(&amt->lock);
if (amt->nr_tunnels >= amt->max_tunnels) {
+ spin_unlock_bh(&amt->lock);
icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
return true;
}
tunnel = kzalloc(sizeof(*tunnel) +
(sizeof(struct hlist_head) * amt->hash_buckets),
GFP_ATOMIC);
- if (!tunnel)
+ if (!tunnel) {
+ spin_unlock_bh(&amt->lock);
return true;
+ }
tunnel->source_port = udph->source;
tunnel->ip4 = iph->saddr;
INIT_DELAYED_WORK(&tunnel->gc_wq, amt_tunnel_expire);
- spin_lock_bh(&amt->lock);
list_add_tail_rcu(&tunnel->list, &amt->tunnel_list);
tunnel->key = amt->key;
- amt_update_relay_status(tunnel, AMT_STATUS_RECEIVED_REQUEST, true);
+ __amt_update_relay_status(tunnel, AMT_STATUS_RECEIVED_REQUEST, true);
amt->nr_tunnels++;
mod_delayed_work(amt_wq, &tunnel->gc_wq,
msecs_to_jiffies(amt_gmi(amt)));
return false;
}
+static void amt_gw_rcv(struct amt_dev *amt, struct sk_buff *skb)
+{
+ int type = amt_parse_type(skb);
+ int err = 1;
+
+ if (type == -1)
+ goto drop;
+
+ if (amt->mode == AMT_MODE_GATEWAY) {
+ switch (type) {
+ case AMT_MSG_ADVERTISEMENT:
+ err = amt_advertisement_handler(amt, skb);
+ break;
+ case AMT_MSG_MEMBERSHIP_QUERY:
+ err = amt_membership_query_handler(amt, skb);
+ if (!err)
+ return;
+ break;
+ default:
+ netdev_dbg(amt->dev, "Invalid type of Gateway\n");
+ break;
+ }
+ }
+drop:
+ if (err) {
+ amt->dev->stats.rx_dropped++;
+ kfree_skb(skb);
+ } else {
+ consume_skb(skb);
+ }
+}
+
static int amt_rcv(struct sock *sk, struct sk_buff *skb)
{
struct amt_dev *amt;
amt = rcu_dereference_sk_user_data(sk);
if (!amt) {
err = true;
- goto drop;
+ kfree_skb(skb);
+ goto out;
}
skb->dev = amt->dev;
err = true;
goto drop;
}
- err = amt_advertisement_handler(amt, skb);
- break;
+ if (amt_queue_event(amt, AMT_EVENT_RECEIVE, skb)) {
+ netdev_dbg(amt->dev, "AMT Event queue full\n");
+ err = true;
+ goto drop;
+ }
+ goto out;
case AMT_MSG_MULTICAST_DATA:
if (iph->saddr != amt->remote_ip) {
netdev_dbg(amt->dev, "Invalid Relay IP\n");
err = true;
goto drop;
}
- err = amt_membership_query_handler(amt, skb);
- if (err)
+ if (amt_queue_event(amt, AMT_EVENT_RECEIVE, skb)) {
+ netdev_dbg(amt->dev, "AMT Event queue full\n");
+ err = true;
goto drop;
- else
- goto out;
+ }
+ goto out;
default:
err = true;
netdev_dbg(amt->dev, "Invalid type of Gateway\n");
return 0;
}
+static void amt_event_work(struct work_struct *work)
+{
+ struct amt_dev *amt = container_of(work, struct amt_dev, event_wq);
+ struct sk_buff *skb;
+ u8 event;
+ int i;
+
+ for (i = 0; i < AMT_MAX_EVENTS; i++) {
+ spin_lock_bh(&amt->lock);
+ if (amt->nr_events == 0) {
+ spin_unlock_bh(&amt->lock);
+ return;
+ }
+ event = amt->events[amt->event_idx].event;
+ skb = amt->events[amt->event_idx].skb;
+ amt->events[amt->event_idx].event = AMT_EVENT_NONE;
+ amt->events[amt->event_idx].skb = NULL;
+ amt->nr_events--;
+ amt->event_idx++;
+ amt->event_idx %= AMT_MAX_EVENTS;
+ spin_unlock_bh(&amt->lock);
+
+ switch (event) {
+ case AMT_EVENT_RECEIVE:
+ amt_gw_rcv(amt, skb);
+ break;
+ case AMT_EVENT_SEND_DISCOVERY:
+ amt_event_send_discovery(amt);
+ break;
+ case AMT_EVENT_SEND_REQUEST:
+ amt_event_send_request(amt);
+ break;
+ default:
+ if (skb)
+ kfree_skb(skb);
+ break;
+ }
+ }
+}
+
static int amt_err_lookup(struct sock *sk, struct sk_buff *skb)
{
struct amt_dev *amt;
break;
case AMT_MSG_REQUEST:
case AMT_MSG_MEMBERSHIP_UPDATE:
- if (amt->status >= AMT_STATUS_RECEIVED_ADVERTISEMENT)
+ if (READ_ONCE(amt->status) >= AMT_STATUS_RECEIVED_ADVERTISEMENT)
mod_delayed_work(amt_wq, &amt->req_wq, 0);
break;
default:
amt->ready4 = false;
amt->ready6 = false;
+ amt->event_idx = 0;
+ amt->nr_events = 0;
err = amt_socket_create(amt);
if (err)
amt->req_cnt = 0;
amt->remote_ip = 0;
+ amt->nonce = 0;
get_random_bytes(&amt->key, sizeof(siphash_key_t));
amt->status = AMT_STATUS_INIT;
struct amt_dev *amt = netdev_priv(dev);
struct amt_tunnel_list *tunnel, *tmp;
struct socket *sock;
+ struct sk_buff *skb;
+ int i;
cancel_delayed_work_sync(&amt->req_wq);
cancel_delayed_work_sync(&amt->discovery_wq);
if (sock)
udp_tunnel_sock_release(sock);
+ cancel_work_sync(&amt->event_wq);
+ for (i = 0; i < AMT_MAX_EVENTS; i++) {
+ skb = amt->events[i].skb;
+ if (skb)
+ kfree_skb(skb);
+ amt->events[i].event = AMT_EVENT_NONE;
+ amt->events[i].skb = NULL;
+ }
+
amt->ready4 = false;
amt->ready6 = false;
amt->req_cnt = 0;
goto err;
}
if (amt->mode == AMT_MODE_RELAY) {
- amt->qrv = amt->net->ipv4.sysctl_igmp_qrv;
+ amt->qrv = READ_ONCE(amt->net->ipv4.sysctl_igmp_qrv);
amt->qri = 10;
dev->needed_headroom = amt->stream_dev->needed_headroom +
AMT_RELAY_HLEN;
INIT_DELAYED_WORK(&amt->discovery_wq, amt_discovery_work);
INIT_DELAYED_WORK(&amt->req_wq, amt_req_work);
INIT_DELAYED_WORK(&amt->secret_wq, amt_secret_work);
+ INIT_WORK(&amt->event_wq, amt_event_work);
INIT_LIST_HEAD(&amt->tunnel_list);
-
return 0;
err:
dev_put(amt->stream_dev);
if (err < 0)
goto unregister_notifier;
- amt_wq = alloc_workqueue("amt", WQ_UNBOUND, 1);
+ amt_wq = alloc_workqueue("amt", WQ_UNBOUND, 0);
if (!amt_wq) {
err = -ENOMEM;
goto rtnl_unregister;
temp_aggregator->num_of_ports--;
if (__agg_active_ports(temp_aggregator) == 0) {
select_new_active_agg = temp_aggregator->is_active;
- ad_clear_agg(temp_aggregator);
+ if (temp_aggregator->num_of_ports == 0)
+ ad_clear_agg(temp_aggregator);
if (select_new_active_agg) {
slave_info(bond->dev, slave->dev, "Removing an active aggregator\n");
/* select new active aggregator */
return res;
if (rlb_enabled) {
- bond->alb_info.rlb_enabled = 1;
res = rlb_initialize(bond);
if (res) {
tlb_deinitialize(bond);
return res;
}
+ bond->alb_info.rlb_enabled = 1;
} else {
bond->alb_info.rlb_enabled = 0;
}
if (!rtnl_trylock())
return;
- if (should_notify_peers)
+ if (should_notify_peers) {
+ bond->send_peer_notif--;
call_netdevice_notifiers(NETDEV_NOTIFY_PEERS,
bond->dev);
+ }
if (should_notify_rtnl) {
bond_slave_state_notify(bond);
bond_slave_link_notify(bond);
/* Carrier is off until netdevice is opened */
netif_carrier_off(netdev);
+ /* serialize netdev register + virtio_device_ready() with ndo_open() */
+ rtnl_lock();
+
/* register Netdev */
- err = register_netdev(netdev);
+ err = register_netdevice(netdev);
if (err) {
+ rtnl_unlock();
dev_err(&vdev->dev, "Unable to register netdev (%d)\n", err);
goto err;
}
+ virtio_device_ready(vdev);
+
+ rtnl_unlock();
+
debugfs_init(cfv);
return 0;
*/
sysid_parent = of_find_node_by_path("/ambapp0");
if (sysid_parent) {
- of_node_get(sysid_parent);
err = of_property_read_u32(sysid_parent, "systemid", &sysid);
if (!err && ((sysid & GRLIB_VERSION_MASK) >=
GRCAN_TXBUG_SAFE_GRLIB_VERSION))
/* acknowledge rx fifo 0 */
m_can_write(cdev, M_CAN_RXF0A, fgi);
- timestamp = FIELD_GET(RX_BUF_RXTS_MASK, fifo_header.dlc);
+ timestamp = FIELD_GET(RX_BUF_RXTS_MASK, fifo_header.dlc) << 16;
m_can_receive_skb(cdev, skb, timestamp);
}
msg_mark = FIELD_GET(TX_EVENT_MM_MASK, txe);
- timestamp = FIELD_GET(TX_EVENT_TXTS_MASK, txe);
+ timestamp = FIELD_GET(TX_EVENT_TXTS_MASK, txe) << 16;
/* ack txe element */
m_can_write(cdev, M_CAN_TXEFA, FIELD_PREP(TXEFA_EFAI_MASK,
/* enable internal timestamp generation, with a prescalar of 16. The
* prescalar is applied to the nominal bit timing
*/
- m_can_write(cdev, M_CAN_TSCC, FIELD_PREP(TSCC_TCP_MASK, 0xf));
+ m_can_write(cdev, M_CAN_TSCC,
+ FIELD_PREP(TSCC_TCP_MASK, 0xf) |
+ FIELD_PREP(TSCC_TSS_MASK, TSCC_TSS_INTERNAL));
m_can_config_endisable(cdev, false);
cfg = (RCANFD_DCFG_DTSEG1(gpriv, tseg1) | RCANFD_DCFG_DBRP(brp) |
RCANFD_DCFG_DSJW(sjw) | RCANFD_DCFG_DTSEG2(gpriv, tseg2));
- rcar_canfd_write(priv->base, RCANFD_F_DCFG(ch), cfg);
+ if (is_v3u(gpriv))
+ rcar_canfd_write(priv->base, RCANFD_V3U_DCFG(ch), cfg);
+ else
+ rcar_canfd_write(priv->base, RCANFD_F_DCFG(ch), cfg);
netdev_dbg(priv->ndev, "drate: brp %u, sjw %u, tseg1 %u, tseg2 %u\n",
brp, sjw, tseg1, tseg2);
} else {
of_child = of_get_child_by_name(pdev->dev.of_node, name);
if (of_child && of_device_is_available(of_child))
channels_mask |= BIT(i);
+ of_node_put(of_child);
}
if (chip_id != RENESAS_RZG2L) {
// Copyright (c) 2019 Martin Sperl <kernel@martin.sperl.org>
//
+#include <asm/unaligned.h>
#include <linux/bitfield.h>
#include <linux/clk.h>
#include <linux/device.h>
netif_stop_queue(ndev);
set_bit(MCP251XFD_FLAGS_DOWN, priv->flags);
hrtimer_cancel(&priv->rx_irq_timer);
+ hrtimer_cancel(&priv->tx_irq_timer);
mcp251xfd_chip_interrupts_disable(priv);
free_irq(ndev->irq, priv);
can_rx_offload_disable(&priv->offload);
u32 osc;
int err;
- /* The OSC_LPMEN is only supported on MCP2518FD, so use it to
- * autodetect the model.
+ /* The OSC_LPMEN is only supported on MCP2518FD and MCP251863,
+ * so use it to autodetect the model.
*/
err = regmap_update_bits(priv->map_reg, MCP251XFD_REG_OSC,
MCP251XFD_REG_OSC_LPMEN,
if (err)
return err;
- if (osc & MCP251XFD_REG_OSC_LPMEN)
- devtype_data = &mcp251xfd_devtype_data_mcp2518fd;
- else
+ if (osc & MCP251XFD_REG_OSC_LPMEN) {
+ /* We cannot distinguish between MCP2518FD and
+ * MCP251863. If firmware specifies MCP251863, keep
+ * it, otherwise set to MCP2518FD.
+ */
+ if (mcp251xfd_is_251863(priv))
+ devtype_data = &mcp251xfd_devtype_data_mcp251863;
+ else
+ devtype_data = &mcp251xfd_devtype_data_mcp2518fd;
+ } else {
devtype_data = &mcp251xfd_devtype_data_mcp2517fd;
+ }
if (!mcp251xfd_is_251XFD(priv) &&
priv->devtype_data.model != devtype_data->model) {
xfer[0].len = sizeof(buf_tx->cmd);
xfer[0].speed_hz = priv->spi_max_speed_hz_slow;
xfer[1].rx_buf = buf_rx->data;
- xfer[1].len = sizeof(dev_id);
+ xfer[1].len = sizeof(*dev_id);
xfer[1].speed_hz = priv->spi_max_speed_hz_fast;
mcp251xfd_spi_cmd_read_nocrc(&buf_tx->cmd, MCP251XFD_REG_DEVID);
if (err)
goto out_kfree_buf_tx;
- *dev_id = be32_to_cpup((__be32 *)buf_rx->data);
+ *dev_id = get_unaligned_le32(buf_rx->data);
*effective_speed_hz_slow = xfer[0].effective_speed_hz;
*effective_speed_hz_fast = xfer[1].effective_speed_hz;
* register. It increments once per SYS clock tick,
* which is 20 or 40 MHz.
*
- * Observation shows that if the lowest byte (which is
- * transferred first on the SPI bus) of that register
- * is 0x00 or 0x80 the calculated CRC doesn't always
- * match the transferred one.
+ * Observation on the mcp2518fd shows that if the
+ * lowest byte (which is transferred first on the SPI
+ * bus) of that register is 0x00 or 0x80 the
+ * calculated CRC doesn't always match the transferred
+ * one. On the mcp2517fd this problem is not limited
+ * to the first byte being 0x00 or 0x80.
*
* If the highest bit in the lowest byte is flipped
* the transferred CRC matches the calculated one. We
- * assume for now the CRC calculation in the chip
- * works on wrong data and the transferred data is
- * correct.
+ * assume for now the CRC operates on the correct
+ * data.
*/
if (reg == MCP251XFD_REG_TBC &&
- (buf_rx->data[0] == 0x0 || buf_rx->data[0] == 0x80)) {
+ ((buf_rx->data[0] & 0xf8) == 0x0 ||
+ (buf_rx->data[0] & 0xf8) == 0x80)) {
/* Flip highest bit in lowest byte of le32 */
buf_rx->data[0] ^= 0x80;
val_len);
if (!err) {
/* If CRC is now correct, assume
- * transferred data was OK, flip bit
- * back to original value.
+ * flipped data is OK.
*/
- buf_rx->data[0] ^= 0x80;
goto out;
}
}
struct usb_anchor tx_submitted;
atomic_t active_tx_urbs;
+ void *rxbuf[GS_MAX_RX_URBS];
+ dma_addr_t rxbuf_dma[GS_MAX_RX_URBS];
};
/* usb interface struct */
for (i = 0; i < GS_MAX_RX_URBS; i++) {
struct urb *urb;
u8 *buf;
+ dma_addr_t buf_dma;
/* alloc rx urb */
urb = usb_alloc_urb(0, GFP_KERNEL);
buf = usb_alloc_coherent(dev->udev,
dev->parent->hf_size_rx,
GFP_KERNEL,
- &urb->transfer_dma);
+ &buf_dma);
if (!buf) {
netdev_err(netdev,
"No memory left for USB buffer\n");
return -ENOMEM;
}
+ urb->transfer_dma = buf_dma;
+
/* fill, anchor, and submit rx urb */
usb_fill_bulk_urb(urb,
dev->udev,
"usb_submit failed (err=%d)\n", rc);
usb_unanchor_urb(urb);
+ usb_free_coherent(dev->udev,
+ sizeof(struct gs_host_frame),
+ buf,
+ buf_dma);
usb_free_urb(urb);
break;
}
+ dev->rxbuf[i] = buf;
+ dev->rxbuf_dma[i] = buf_dma;
+
/* Drop reference,
* USB core will take care of freeing it
*/
int rc;
struct gs_can *dev = netdev_priv(netdev);
struct gs_usb *parent = dev->parent;
+ unsigned int i;
netif_stop_queue(netdev);
/* Stop polling */
parent->active_channels--;
- if (!parent->active_channels)
+ if (!parent->active_channels) {
usb_kill_anchored_urbs(&parent->rx_submitted);
+ for (i = 0; i < GS_MAX_RX_URBS; i++)
+ usb_free_coherent(dev->udev,
+ sizeof(struct gs_host_frame),
+ dev->rxbuf[i],
+ dev->rxbuf_dma[i]);
+ }
/* Stop sending URBs */
usb_kill_anchored_urbs(&dev->tx_submitted);
#define KVASER_USB_RX_BUFFER_SIZE 3072
#define KVASER_USB_MAX_NET_DEVICES 5
-/* USB devices features */
-#define KVASER_USB_HAS_SILENT_MODE BIT(0)
-#define KVASER_USB_HAS_TXRX_ERRORS BIT(1)
+/* Kvaser USB device quirks */
+#define KVASER_USB_QUIRK_HAS_SILENT_MODE BIT(0)
+#define KVASER_USB_QUIRK_HAS_TXRX_ERRORS BIT(1)
+#define KVASER_USB_QUIRK_IGNORE_CLK_FREQ BIT(2)
/* Device capabilities */
#define KVASER_USB_CAP_BERR_CAP 0x01
struct kvaser_usb_dev_card_data {
u32 ctrlmode_supported;
u32 capabilities;
- union {
- struct {
- enum kvaser_usb_leaf_family family;
- } leaf;
- struct kvaser_usb_dev_card_data_hydra hydra;
- };
+ struct kvaser_usb_dev_card_data_hydra hydra;
};
/* Context for an outstanding, not yet ACKed, transmission */
struct usb_device *udev;
struct usb_interface *intf;
struct kvaser_usb_net_priv *nets[KVASER_USB_MAX_NET_DEVICES];
- const struct kvaser_usb_dev_ops *ops;
+ const struct kvaser_usb_driver_info *driver_info;
const struct kvaser_usb_dev_cfg *cfg;
struct usb_endpoint_descriptor *bulk_in, *bulk_out;
u16 transid);
};
+struct kvaser_usb_driver_info {
+ u32 quirks;
+ enum kvaser_usb_leaf_family family;
+ const struct kvaser_usb_dev_ops *ops;
+};
+
struct kvaser_usb_dev_cfg {
const struct can_clock clock;
const unsigned int timestamp_freq;
int len);
int kvaser_usb_can_rx_over_error(struct net_device *netdev);
+
+extern const struct can_bittiming_const kvaser_usb_flexc_bittiming_const;
+
#endif /* KVASER_USB_H */
#define USB_USBCAN_R_V2_PRODUCT_ID 294
#define USB_LEAF_LIGHT_R_V2_PRODUCT_ID 295
#define USB_LEAF_LIGHT_HS_V2_OEM2_PRODUCT_ID 296
-#define USB_LEAF_PRODUCT_ID_END \
- USB_LEAF_LIGHT_HS_V2_OEM2_PRODUCT_ID
/* Kvaser USBCan-II devices product ids */
#define USB_USBCAN_REVB_PRODUCT_ID 2
#define USB_USBCAN_PRO_4HS_PRODUCT_ID 276
#define USB_HYBRID_CANLIN_PRODUCT_ID 277
#define USB_HYBRID_PRO_CANLIN_PRODUCT_ID 278
-#define USB_HYDRA_PRODUCT_ID_END \
- USB_HYBRID_PRO_CANLIN_PRODUCT_ID
-static inline bool kvaser_is_leaf(const struct usb_device_id *id)
-{
- return (id->idProduct >= USB_LEAF_DEVEL_PRODUCT_ID &&
- id->idProduct <= USB_CAN_R_PRODUCT_ID) ||
- (id->idProduct >= USB_LEAF_LITE_V2_PRODUCT_ID &&
- id->idProduct <= USB_LEAF_PRODUCT_ID_END);
-}
+static const struct kvaser_usb_driver_info kvaser_usb_driver_info_hydra = {
+ .quirks = 0,
+ .ops = &kvaser_usb_hydra_dev_ops,
+};
-static inline bool kvaser_is_usbcan(const struct usb_device_id *id)
-{
- return id->idProduct >= USB_USBCAN_REVB_PRODUCT_ID &&
- id->idProduct <= USB_MEMORATOR_PRODUCT_ID;
-}
+static const struct kvaser_usb_driver_info kvaser_usb_driver_info_usbcan = {
+ .quirks = KVASER_USB_QUIRK_HAS_TXRX_ERRORS |
+ KVASER_USB_QUIRK_HAS_SILENT_MODE,
+ .family = KVASER_USBCAN,
+ .ops = &kvaser_usb_leaf_dev_ops,
+};
-static inline bool kvaser_is_hydra(const struct usb_device_id *id)
-{
- return id->idProduct >= USB_BLACKBIRD_V2_PRODUCT_ID &&
- id->idProduct <= USB_HYDRA_PRODUCT_ID_END;
-}
+static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leaf = {
+ .quirks = KVASER_USB_QUIRK_IGNORE_CLK_FREQ,
+ .family = KVASER_LEAF,
+ .ops = &kvaser_usb_leaf_dev_ops,
+};
+
+static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leaf_err = {
+ .quirks = KVASER_USB_QUIRK_HAS_TXRX_ERRORS |
+ KVASER_USB_QUIRK_IGNORE_CLK_FREQ,
+ .family = KVASER_LEAF,
+ .ops = &kvaser_usb_leaf_dev_ops,
+};
+
+static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leaf_err_listen = {
+ .quirks = KVASER_USB_QUIRK_HAS_TXRX_ERRORS |
+ KVASER_USB_QUIRK_HAS_SILENT_MODE |
+ KVASER_USB_QUIRK_IGNORE_CLK_FREQ,
+ .family = KVASER_LEAF,
+ .ops = &kvaser_usb_leaf_dev_ops,
+};
+
+static const struct kvaser_usb_driver_info kvaser_usb_driver_info_leafimx = {
+ .quirks = 0,
+ .ops = &kvaser_usb_leaf_dev_ops,
+};
static const struct usb_device_id kvaser_usb_table[] = {
- /* Leaf USB product IDs */
- { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_DEVEL_PRODUCT_ID) },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_PRODUCT_ID) },
+ /* Leaf M32C USB product IDs */
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_DEVEL_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf },
{ USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_PRODUCT_ID),
- .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
- KVASER_USB_HAS_SILENT_MODE },
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen },
{ USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_SPRO_PRODUCT_ID),
- .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
- KVASER_USB_HAS_SILENT_MODE },
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen },
{ USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_LS_PRODUCT_ID),
- .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
- KVASER_USB_HAS_SILENT_MODE },
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen },
{ USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_SWC_PRODUCT_ID),
- .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
- KVASER_USB_HAS_SILENT_MODE },
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen },
{ USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_LIN_PRODUCT_ID),
- .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
- KVASER_USB_HAS_SILENT_MODE },
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen },
{ USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_SPRO_LS_PRODUCT_ID),
- .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
- KVASER_USB_HAS_SILENT_MODE },
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen },
{ USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_SPRO_SWC_PRODUCT_ID),
- .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
- KVASER_USB_HAS_SILENT_MODE },
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen },
{ USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO2_DEVEL_PRODUCT_ID),
- .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
- KVASER_USB_HAS_SILENT_MODE },
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen },
{ USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO2_HSHS_PRODUCT_ID),
- .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
- KVASER_USB_HAS_SILENT_MODE },
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen },
{ USB_DEVICE(KVASER_VENDOR_ID, USB_UPRO_HSHS_PRODUCT_ID),
- .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_GI_PRODUCT_ID) },
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_GI_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf },
{ USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_OBDII_PRODUCT_ID),
- .driver_info = KVASER_USB_HAS_TXRX_ERRORS |
- KVASER_USB_HAS_SILENT_MODE },
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err_listen },
{ USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO2_HSLS_PRODUCT_ID),
- .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err },
{ USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_CH_PRODUCT_ID),
- .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err },
{ USB_DEVICE(KVASER_VENDOR_ID, USB_BLACKBIRD_SPRO_PRODUCT_ID),
- .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err },
{ USB_DEVICE(KVASER_VENDOR_ID, USB_OEM_MERCURY_PRODUCT_ID),
- .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err },
{ USB_DEVICE(KVASER_VENDOR_ID, USB_OEM_LEAF_PRODUCT_ID),
- .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err },
{ USB_DEVICE(KVASER_VENDOR_ID, USB_CAN_R_PRODUCT_ID),
- .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_V2_PRODUCT_ID) },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_HS_PRODUCT_ID) },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID) },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_2HS_PRODUCT_ID) },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_2HS_PRODUCT_ID) },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_R_V2_PRODUCT_ID) },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_R_V2_PRODUCT_ID) },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM2_PRODUCT_ID) },
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leaf_err },
+
+ /* Leaf i.MX28 USB product IDs */
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_V2_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_HS_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_2HS_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_2HS_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_R_V2_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_R_V2_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM2_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_leafimx },
/* USBCANII USB product IDs */
{ USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN2_PRODUCT_ID),
- .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_usbcan },
{ USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_REVB_PRODUCT_ID),
- .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_usbcan },
{ USB_DEVICE(KVASER_VENDOR_ID, USB_MEMORATOR_PRODUCT_ID),
- .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_usbcan },
{ USB_DEVICE(KVASER_VENDOR_ID, USB_VCI2_PRODUCT_ID),
- .driver_info = KVASER_USB_HAS_TXRX_ERRORS },
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_usbcan },
/* Minihydra USB product IDs */
- { USB_DEVICE(KVASER_VENDOR_ID, USB_BLACKBIRD_V2_PRODUCT_ID) },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_PRO_5HS_PRODUCT_ID) },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_5HS_PRODUCT_ID) },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_4HS_PRODUCT_ID) },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_HS_V2_PRODUCT_ID) },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_2HS_V2_PRODUCT_ID) },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_2HS_PRODUCT_ID) },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_PRO_2HS_V2_PRODUCT_ID) },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_2CANLIN_PRODUCT_ID) },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_ATI_USBCAN_PRO_2HS_V2_PRODUCT_ID) },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_ATI_MEMO_PRO_2HS_V2_PRODUCT_ID) },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_PRO_2CANLIN_PRODUCT_ID) },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_U100_PRODUCT_ID) },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_U100P_PRODUCT_ID) },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_U100S_PRODUCT_ID) },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_4HS_PRODUCT_ID) },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_CANLIN_PRODUCT_ID) },
- { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_PRO_CANLIN_PRODUCT_ID) },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_BLACKBIRD_V2_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_PRO_5HS_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_5HS_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_4HS_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_PRO_HS_V2_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_2HS_V2_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_2HS_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_MEMO_PRO_2HS_V2_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_2CANLIN_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_ATI_USBCAN_PRO_2HS_V2_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_ATI_MEMO_PRO_2HS_V2_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_PRO_2CANLIN_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_U100_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_U100P_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_U100S_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_4HS_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_CANLIN_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
+ { USB_DEVICE(KVASER_VENDOR_ID, USB_HYBRID_PRO_CANLIN_PRODUCT_ID),
+ .driver_info = (kernel_ulong_t)&kvaser_usb_driver_info_hydra },
{ }
};
MODULE_DEVICE_TABLE(usb, kvaser_usb_table);
static void kvaser_usb_read_bulk_callback(struct urb *urb)
{
struct kvaser_usb *dev = urb->context;
+ const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops;
int err;
unsigned int i;
goto resubmit_urb;
}
- dev->ops->dev_read_bulk_callback(dev, urb->transfer_buffer,
- urb->actual_length);
+ ops->dev_read_bulk_callback(dev, urb->transfer_buffer,
+ urb->actual_length);
resubmit_urb:
usb_fill_bulk_urb(urb, dev->udev,
{
struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
struct kvaser_usb *dev = priv->dev;
+ const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops;
int err;
err = open_candev(netdev);
if (err)
goto error;
- err = dev->ops->dev_set_opt_mode(priv);
+ err = ops->dev_set_opt_mode(priv);
if (err)
goto error;
- err = dev->ops->dev_start_chip(priv);
+ err = ops->dev_start_chip(priv);
if (err) {
netdev_warn(netdev, "Cannot start device, error %d\n", err);
goto error;
{
struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
struct kvaser_usb *dev = priv->dev;
+ const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops;
int err;
netif_stop_queue(netdev);
- err = dev->ops->dev_flush_queue(priv);
+ err = ops->dev_flush_queue(priv);
if (err)
netdev_warn(netdev, "Cannot flush queue, error %d\n", err);
- if (dev->ops->dev_reset_chip) {
- err = dev->ops->dev_reset_chip(dev, priv->channel);
+ if (ops->dev_reset_chip) {
+ err = ops->dev_reset_chip(dev, priv->channel);
if (err)
netdev_warn(netdev, "Cannot reset card, error %d\n",
err);
}
- err = dev->ops->dev_stop_chip(priv);
+ err = ops->dev_stop_chip(priv);
if (err)
netdev_warn(netdev, "Cannot stop device, error %d\n", err);
{
struct kvaser_usb_net_priv *priv = netdev_priv(netdev);
struct kvaser_usb *dev = priv->dev;
+ const struct kvaser_usb_dev_ops *ops = dev->driver_info->ops;
struct net_device_stats *stats = &netdev->stats;
struct kvaser_usb_tx_urb_context *context = NULL;
struct urb *urb;
goto freeurb;
}
- buf = dev->ops->dev_frame_to_cmd(priv, skb, &cmd_len,
- context->echo_index);
+ buf = ops->dev_frame_to_cmd(priv, skb, &cmd_len, context->echo_index);
if (!buf) {
stats->tx_dropped++;
dev_kfree_skb(skb);
}
}
-static int kvaser_usb_init_one(struct kvaser_usb *dev,
- const struct usb_device_id *id, int channel)
+static int kvaser_usb_init_one(struct kvaser_usb *dev, int channel)
{
struct net_device *netdev;
struct kvaser_usb_net_priv *priv;
+ const struct kvaser_usb_driver_info *driver_info = dev->driver_info;
+ const struct kvaser_usb_dev_ops *ops = driver_info->ops;
int err;
- if (dev->ops->dev_reset_chip) {
- err = dev->ops->dev_reset_chip(dev, channel);
+ if (ops->dev_reset_chip) {
+ err = ops->dev_reset_chip(dev, channel);
if (err)
return err;
}
priv->can.state = CAN_STATE_STOPPED;
priv->can.clock.freq = dev->cfg->clock.freq;
priv->can.bittiming_const = dev->cfg->bittiming_const;
- priv->can.do_set_bittiming = dev->ops->dev_set_bittiming;
- priv->can.do_set_mode = dev->ops->dev_set_mode;
- if ((id->driver_info & KVASER_USB_HAS_TXRX_ERRORS) ||
+ priv->can.do_set_bittiming = ops->dev_set_bittiming;
+ priv->can.do_set_mode = ops->dev_set_mode;
+ if ((driver_info->quirks & KVASER_USB_QUIRK_HAS_TXRX_ERRORS) ||
(priv->dev->card_data.capabilities & KVASER_USB_CAP_BERR_CAP))
- priv->can.do_get_berr_counter = dev->ops->dev_get_berr_counter;
- if (id->driver_info & KVASER_USB_HAS_SILENT_MODE)
+ priv->can.do_get_berr_counter = ops->dev_get_berr_counter;
+ if (driver_info->quirks & KVASER_USB_QUIRK_HAS_SILENT_MODE)
priv->can.ctrlmode_supported |= CAN_CTRLMODE_LISTENONLY;
priv->can.ctrlmode_supported |= dev->card_data.ctrlmode_supported;
if (priv->can.ctrlmode_supported & CAN_CTRLMODE_FD) {
priv->can.data_bittiming_const = dev->cfg->data_bittiming_const;
- priv->can.do_set_data_bittiming =
- dev->ops->dev_set_data_bittiming;
+ priv->can.do_set_data_bittiming = ops->dev_set_data_bittiming;
}
netdev->flags |= IFF_ECHO;
struct kvaser_usb *dev;
int err;
int i;
+ const struct kvaser_usb_driver_info *driver_info;
+ const struct kvaser_usb_dev_ops *ops;
+
+ driver_info = (const struct kvaser_usb_driver_info *)id->driver_info;
+ if (!driver_info)
+ return -ENODEV;
dev = devm_kzalloc(&intf->dev, sizeof(*dev), GFP_KERNEL);
if (!dev)
return -ENOMEM;
- if (kvaser_is_leaf(id)) {
- dev->card_data.leaf.family = KVASER_LEAF;
- dev->ops = &kvaser_usb_leaf_dev_ops;
- } else if (kvaser_is_usbcan(id)) {
- dev->card_data.leaf.family = KVASER_USBCAN;
- dev->ops = &kvaser_usb_leaf_dev_ops;
- } else if (kvaser_is_hydra(id)) {
- dev->ops = &kvaser_usb_hydra_dev_ops;
- } else {
- dev_err(&intf->dev,
- "Product ID (%d) is not a supported Kvaser USB device\n",
- id->idProduct);
- return -ENODEV;
- }
-
dev->intf = intf;
+ dev->driver_info = driver_info;
+ ops = driver_info->ops;
- err = dev->ops->dev_setup_endpoints(dev);
+ err = ops->dev_setup_endpoints(dev);
if (err) {
dev_err(&intf->dev, "Cannot get usb endpoint(s)");
return err;
dev->card_data.ctrlmode_supported = 0;
dev->card_data.capabilities = 0;
- err = dev->ops->dev_init_card(dev);
+ err = ops->dev_init_card(dev);
if (err) {
dev_err(&intf->dev,
"Failed to initialize card, error %d\n", err);
return err;
}
- err = dev->ops->dev_get_software_info(dev);
+ err = ops->dev_get_software_info(dev);
if (err) {
dev_err(&intf->dev,
"Cannot get software info, error %d\n", err);
return err;
}
- if (dev->ops->dev_get_software_details) {
- err = dev->ops->dev_get_software_details(dev);
+ if (ops->dev_get_software_details) {
+ err = ops->dev_get_software_details(dev);
if (err) {
dev_err(&intf->dev,
"Cannot get software details, error %d\n", err);
dev_dbg(&intf->dev, "Max outstanding tx = %d URBs\n", dev->max_tx_urbs);
- err = dev->ops->dev_get_card_info(dev);
+ err = ops->dev_get_card_info(dev);
if (err) {
dev_err(&intf->dev, "Cannot get card info, error %d\n", err);
return err;
}
- if (dev->ops->dev_get_capabilities) {
- err = dev->ops->dev_get_capabilities(dev);
+ if (ops->dev_get_capabilities) {
+ err = ops->dev_get_capabilities(dev);
if (err) {
dev_err(&intf->dev,
"Cannot get capabilities, error %d\n", err);
}
for (i = 0; i < dev->nchannels; i++) {
- err = kvaser_usb_init_one(dev, id, i);
+ err = kvaser_usb_init_one(dev, i);
if (err) {
kvaser_usb_remove_interfaces(dev);
return err;
.brp_inc = 1,
};
-static const struct can_bittiming_const kvaser_usb_hydra_flexc_bittiming_c = {
+const struct can_bittiming_const kvaser_usb_flexc_bittiming_const = {
.name = "kvaser_usb_flex",
.tseg1_min = 4,
.tseg1_max = 16,
.freq = 24 * MEGA /* Hz */,
},
.timestamp_freq = 1,
- .bittiming_const = &kvaser_usb_hydra_flexc_bittiming_c,
+ .bittiming_const = &kvaser_usb_flexc_bittiming_const,
};
static const struct kvaser_usb_dev_cfg kvaser_usb_hydra_dev_cfg_rt = {
#define USBCAN_ERROR_STATE_RX_ERROR BIT(1)
#define USBCAN_ERROR_STATE_BUSERROR BIT(2)
-/* bittiming parameters */
-#define KVASER_USB_TSEG1_MIN 1
-#define KVASER_USB_TSEG1_MAX 16
-#define KVASER_USB_TSEG2_MIN 1
-#define KVASER_USB_TSEG2_MAX 8
-#define KVASER_USB_SJW_MAX 4
-#define KVASER_USB_BRP_MIN 1
-#define KVASER_USB_BRP_MAX 64
-#define KVASER_USB_BRP_INC 1
-
/* ctrl modes */
#define KVASER_CTRL_MODE_NORMAL 1
#define KVASER_CTRL_MODE_SILENT 2
};
};
-static const struct can_bittiming_const kvaser_usb_leaf_bittiming_const = {
- .name = "kvaser_usb",
- .tseg1_min = KVASER_USB_TSEG1_MIN,
- .tseg1_max = KVASER_USB_TSEG1_MAX,
- .tseg2_min = KVASER_USB_TSEG2_MIN,
- .tseg2_max = KVASER_USB_TSEG2_MAX,
- .sjw_max = KVASER_USB_SJW_MAX,
- .brp_min = KVASER_USB_BRP_MIN,
- .brp_max = KVASER_USB_BRP_MAX,
- .brp_inc = KVASER_USB_BRP_INC,
+static const struct can_bittiming_const kvaser_usb_leaf_m16c_bittiming_const = {
+ .name = "kvaser_usb_ucii",
+ .tseg1_min = 4,
+ .tseg1_max = 16,
+ .tseg2_min = 2,
+ .tseg2_max = 8,
+ .sjw_max = 4,
+ .brp_min = 1,
+ .brp_max = 16,
+ .brp_inc = 1,
+};
+
+static const struct can_bittiming_const kvaser_usb_leaf_m32c_bittiming_const = {
+ .name = "kvaser_usb_leaf",
+ .tseg1_min = 3,
+ .tseg1_max = 16,
+ .tseg2_min = 2,
+ .tseg2_max = 8,
+ .sjw_max = 4,
+ .brp_min = 2,
+ .brp_max = 128,
+ .brp_inc = 2,
};
-static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg_8mhz = {
+static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_usbcan_dev_cfg = {
.clock = {
.freq = 8 * MEGA /* Hz */,
},
.timestamp_freq = 1,
- .bittiming_const = &kvaser_usb_leaf_bittiming_const,
+ .bittiming_const = &kvaser_usb_leaf_m16c_bittiming_const,
+};
+
+static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_m32c_dev_cfg = {
+ .clock = {
+ .freq = 16 * MEGA /* Hz */,
+ },
+ .timestamp_freq = 1,
+ .bittiming_const = &kvaser_usb_leaf_m32c_bittiming_const,
};
-static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg_16mhz = {
+static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_imx_dev_cfg_16mhz = {
.clock = {
.freq = 16 * MEGA /* Hz */,
},
.timestamp_freq = 1,
- .bittiming_const = &kvaser_usb_leaf_bittiming_const,
+ .bittiming_const = &kvaser_usb_flexc_bittiming_const,
};
-static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg_24mhz = {
+static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_imx_dev_cfg_24mhz = {
.clock = {
.freq = 24 * MEGA /* Hz */,
},
.timestamp_freq = 1,
- .bittiming_const = &kvaser_usb_leaf_bittiming_const,
+ .bittiming_const = &kvaser_usb_flexc_bittiming_const,
};
-static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_dev_cfg_32mhz = {
+static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_imx_dev_cfg_32mhz = {
.clock = {
.freq = 32 * MEGA /* Hz */,
},
.timestamp_freq = 1,
- .bittiming_const = &kvaser_usb_leaf_bittiming_const,
+ .bittiming_const = &kvaser_usb_flexc_bittiming_const,
};
static void *
sizeof(struct kvaser_cmd_tx_can);
cmd->u.tx_can.channel = priv->channel;
- switch (dev->card_data.leaf.family) {
+ switch (dev->driver_info->family) {
case KVASER_LEAF:
cmd_tx_can_flags = &cmd->u.tx_can.leaf.flags;
break;
dev->fw_version = le32_to_cpu(softinfo->fw_version);
dev->max_tx_urbs = le16_to_cpu(softinfo->max_outstanding_tx);
- switch (sw_options & KVASER_USB_LEAF_SWOPTION_FREQ_MASK) {
- case KVASER_USB_LEAF_SWOPTION_FREQ_16_MHZ_CLK:
- dev->cfg = &kvaser_usb_leaf_dev_cfg_16mhz;
- break;
- case KVASER_USB_LEAF_SWOPTION_FREQ_24_MHZ_CLK:
- dev->cfg = &kvaser_usb_leaf_dev_cfg_24mhz;
- break;
- case KVASER_USB_LEAF_SWOPTION_FREQ_32_MHZ_CLK:
- dev->cfg = &kvaser_usb_leaf_dev_cfg_32mhz;
- break;
+ if (dev->driver_info->quirks & KVASER_USB_QUIRK_IGNORE_CLK_FREQ) {
+ /* Firmware expects bittiming parameters calculated for 16MHz
+ * clock, regardless of the actual clock
+ */
+ dev->cfg = &kvaser_usb_leaf_m32c_dev_cfg;
+ } else {
+ switch (sw_options & KVASER_USB_LEAF_SWOPTION_FREQ_MASK) {
+ case KVASER_USB_LEAF_SWOPTION_FREQ_16_MHZ_CLK:
+ dev->cfg = &kvaser_usb_leaf_imx_dev_cfg_16mhz;
+ break;
+ case KVASER_USB_LEAF_SWOPTION_FREQ_24_MHZ_CLK:
+ dev->cfg = &kvaser_usb_leaf_imx_dev_cfg_24mhz;
+ break;
+ case KVASER_USB_LEAF_SWOPTION_FREQ_32_MHZ_CLK:
+ dev->cfg = &kvaser_usb_leaf_imx_dev_cfg_32mhz;
+ break;
+ }
}
}
if (err)
return err;
- switch (dev->card_data.leaf.family) {
+ switch (dev->driver_info->family) {
case KVASER_LEAF:
kvaser_usb_leaf_get_software_info_leaf(dev, &cmd.u.leaf.softinfo);
break;
dev->fw_version = le32_to_cpu(cmd.u.usbcan.softinfo.fw_version);
dev->max_tx_urbs =
le16_to_cpu(cmd.u.usbcan.softinfo.max_outstanding_tx);
- dev->cfg = &kvaser_usb_leaf_dev_cfg_8mhz;
+ dev->cfg = &kvaser_usb_leaf_usbcan_dev_cfg;
break;
}
dev->nchannels = cmd.u.cardinfo.nchannels;
if (dev->nchannels > KVASER_USB_MAX_NET_DEVICES ||
- (dev->card_data.leaf.family == KVASER_USBCAN &&
+ (dev->driver_info->family == KVASER_USBCAN &&
dev->nchannels > MAX_USBCAN_NET_DEVICES))
return -EINVAL;
new_state < CAN_STATE_BUS_OFF)
priv->can.can_stats.restarts++;
- switch (dev->card_data.leaf.family) {
+ switch (dev->driver_info->family) {
case KVASER_LEAF:
if (es->leaf.error_factor) {
priv->can.can_stats.bus_error++;
}
}
- switch (dev->card_data.leaf.family) {
+ switch (dev->driver_info->family) {
case KVASER_LEAF:
if (es->leaf.error_factor) {
cf->can_id |= CAN_ERR_BUSERROR | CAN_ERR_PROT;
stats = &priv->netdev->stats;
if ((cmd->u.rx_can_header.flag & MSG_FLAG_ERROR_FRAME) &&
- (dev->card_data.leaf.family == KVASER_LEAF &&
+ (dev->driver_info->family == KVASER_LEAF &&
cmd->id == CMD_LEAF_LOG_MESSAGE)) {
kvaser_usb_leaf_leaf_rx_error(dev, cmd);
return;
return;
}
- switch (dev->card_data.leaf.family) {
+ switch (dev->driver_info->family) {
case KVASER_LEAF:
rx_data = cmd->u.leaf.rx_can.data;
break;
return;
}
- if (dev->card_data.leaf.family == KVASER_LEAF && cmd->id ==
+ if (dev->driver_info->family == KVASER_LEAF && cmd->id ==
CMD_LEAF_LOG_MESSAGE) {
cf->can_id = le32_to_cpu(cmd->u.leaf.log_message.id);
if (cf->can_id & KVASER_EXTENDED_FRAME)
break;
case CMD_LEAF_LOG_MESSAGE:
- if (dev->card_data.leaf.family != KVASER_LEAF)
+ if (dev->driver_info->family != KVASER_LEAF)
goto warn;
kvaser_usb_leaf_rx_can_msg(dev, cmd);
break;
case CMD_CHIP_STATE_EVENT:
case CMD_CAN_ERROR_EVENT:
- if (dev->card_data.leaf.family == KVASER_LEAF)
+ if (dev->driver_info->family == KVASER_LEAF)
kvaser_usb_leaf_leaf_rx_error(dev, cmd);
else
kvaser_usb_leaf_usbcan_rx_error(dev, cmd);
/* Ignored commands */
case CMD_USBCAN_CLOCK_OVERFLOW_EVENT:
- if (dev->card_data.leaf.family != KVASER_USBCAN)
+ if (dev->driver_info->family != KVASER_USBCAN)
goto warn;
break;
case CMD_FLUSH_QUEUE_REPLY:
- if (dev->card_data.leaf.family != KVASER_LEAF)
+ if (dev->driver_info->family != KVASER_LEAF)
goto warn;
break;
.tseg2_min = 1,
.tseg2_max = 128,
.sjw_max = 128,
- .brp_min = 2,
+ .brp_min = 1,
.brp_max = 256,
.brp_inc = 1,
};
.tseg2_min = 1,
.tseg2_max = 16,
.sjw_max = 16,
- .brp_min = 2,
+ .brp_min = 1,
.brp_max = 256,
.brp_inc = 1,
};
if (duplex == DUPLEX_FULL)
reg |= DUPLX_MODE;
+ if (tx_pause)
+ reg |= TXFLOW_CNTL;
+ if (rx_pause)
+ reg |= RXFLOW_CNTL;
+
core_writel(priv, reg, offset);
}
const char *label, *state;
int ret = -EINVAL;
+ of_node_get(hellcreek->dev->of_node);
leds = of_find_node_by_name(hellcreek->dev->of_node, "leds");
if (!leds) {
dev_err(hellcreek->dev, "No LEDs specified in device tree!\n");
for_each_available_child_of_node(gphy_fw_list_np, gphy_fw_np) {
err = gswip_gphy_fw_probe(priv, &priv->gphy_fw[i],
gphy_fw_np, i);
- if (err)
+ if (err) {
+ of_node_put(gphy_fw_np);
goto remove_gphy;
+ }
i++;
}
ports = of_get_child_by_name(dev->dev->of_node, "ethernet-ports");
if (!ports)
ports = of_get_child_by_name(dev->dev->of_node, "ports");
- if (ports)
+ if (ports) {
for_each_available_child_of_node(ports, port) {
if (of_property_read_u32(port, "reg",
&port_num))
continue;
if (!(dev->port_mask & BIT(port_num))) {
of_node_put(port);
+ of_node_put(ports);
return -EINVAL;
}
of_get_phy_mode(port,
&dev->ports[port_num].interface);
}
+ of_node_put(ports);
+ }
dev->synclko_125 = of_property_read_bool(dev->dev->of_node,
"microchip,synclko-125");
dev->synclko_disable = of_property_read_bool(dev->dev->of_node,
}
static int mv88e6xxx_serdes_pcs_get_state(struct mv88e6xxx_chip *chip,
- u16 ctrl, u16 status, u16 lpa,
+ u16 bmsr, u16 lpa, u16 status,
struct phylink_link_state *state)
{
+ state->link = false;
+
+ /* If the BMSR reports that the link had failed, report this to
+ * phylink.
+ */
+ if (!(bmsr & BMSR_LSTATUS))
+ return 0;
+
state->link = !!(status & MV88E6390_SGMII_PHY_STATUS_LINK);
+ state->an_complete = !!(bmsr & BMSR_ANEGCOMPLETE);
if (status & MV88E6390_SGMII_PHY_STATUS_SPD_DPL_VALID) {
/* The Spped and Duplex Resolved register is 1 if AN is enabled
* and complete, or if AN is disabled. So with disabled AN we
- * still get here on link up. But we want to set an_complete
- * only if AN was enabled, thus we look at BMCR_ANENABLE.
- * (According to 802.3-2008 section 22.2.4.2.10, we should be
- * able to get this same value from BMSR_ANEGCAPABLE, but tests
- * show that these Marvell PHYs don't conform to this part of
- * the specificaion - BMSR_ANEGCAPABLE is simply always 1.)
+ * still get here on link up.
*/
- state->an_complete = !!(ctrl & BMCR_ANENABLE);
state->duplex = status &
MV88E6390_SGMII_PHY_STATUS_DUPLEX_FULL ?
DUPLEX_FULL : DUPLEX_HALF;
int mv88e6352_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
int lane, struct phylink_link_state *state)
{
- u16 lpa, status, ctrl;
+ u16 bmsr, lpa, status;
int err;
- err = mv88e6352_serdes_read(chip, MII_BMCR, &ctrl);
+ err = mv88e6352_serdes_read(chip, MII_BMSR, &bmsr);
if (err) {
- dev_err(chip->dev, "can't read Serdes PHY control: %d\n", err);
+ dev_err(chip->dev, "can't read Serdes PHY BMSR: %d\n", err);
return err;
}
return err;
}
- return mv88e6xxx_serdes_pcs_get_state(chip, ctrl, status, lpa, state);
+ return mv88e6xxx_serdes_pcs_get_state(chip, bmsr, lpa, status, state);
}
int mv88e6352_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port,
static int mv88e6390_serdes_pcs_get_state_sgmii(struct mv88e6xxx_chip *chip,
int port, int lane, struct phylink_link_state *state)
{
- u16 lpa, status, ctrl;
+ u16 bmsr, lpa, status;
int err;
err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
- MV88E6390_SGMII_BMCR, &ctrl);
+ MV88E6390_SGMII_BMSR, &bmsr);
if (err) {
- dev_err(chip->dev, "can't read Serdes PHY control: %d\n", err);
+ dev_err(chip->dev, "can't read Serdes PHY BMSR: %d\n", err);
return err;
}
return err;
}
- return mv88e6xxx_serdes_pcs_get_state(chip, ctrl, status, lpa, state);
+ return mv88e6xxx_serdes_pcs_get_state(chip, bmsr, lpa, status, state);
}
static int mv88e6390_serdes_pcs_get_state_10g(struct mv88e6xxx_chip *chip,
static void vsc9959_psfp_counters_get(struct ocelot *ocelot, u32 index,
struct felix_stream_filter_counters *counters)
{
+ mutex_lock(&ocelot->stats_lock);
+
ocelot_rmw(ocelot, SYS_STAT_CFG_STAT_VIEW(index),
SYS_STAT_CFG_STAT_VIEW_M,
SYS_STAT_CFG);
SYS_STAT_CFG_STAT_VIEW(index) |
SYS_STAT_CFG_STAT_CLEAR_SHOT(0x10),
SYS_STAT_CFG);
+
+ mutex_unlock(&ocelot->stats_lock);
}
static int vsc9959_psfp_filter_add(struct ocelot *ocelot, int port,
qca8k_port_change_mtu(struct dsa_switch *ds, int port, int new_mtu)
{
struct qca8k_priv *priv = ds->priv;
+ int ret;
/* We have only have a general MTU setting.
* DSA always set the CPU port's MTU to the largest MTU of the slave
if (!dsa_is_cpu_port(ds, port))
return 0;
+ /* To change the MAX_FRAME_SIZE the cpu ports must be off or
+ * the switch panics.
+ * Turn off both cpu ports before applying the new value to prevent
+ * this.
+ */
+ if (priv->port_enabled_map & BIT(0))
+ qca8k_port_set_status(priv, 0, 0);
+
+ if (priv->port_enabled_map & BIT(6))
+ qca8k_port_set_status(priv, 6, 0);
+
/* Include L2 header / FCS length */
- return qca8k_write(priv, QCA8K_MAX_FRAME_SIZE, new_mtu + ETH_HLEN + ETH_FCS_LEN);
+ ret = qca8k_write(priv, QCA8K_MAX_FRAME_SIZE, new_mtu + ETH_HLEN + ETH_FCS_LEN);
+
+ if (priv->port_enabled_map & BIT(0))
+ qca8k_port_set_status(priv, 0, 1);
+
+ if (priv->port_enabled_map & BIT(6))
+ qca8k_port_set_status(priv, 6, 1);
+
+ return ret;
}
static int
#define QCA8K_ETHERNET_MDIO_PRIORITY 7
#define QCA8K_ETHERNET_PHY_PRIORITY 6
-#define QCA8K_ETHERNET_TIMEOUT 100
+#define QCA8K_ETHERNET_TIMEOUT 5
#define QCA8K_NUM_PORTS 7
#define QCA8K_NUM_CPU_PORTS 2
return 0;
}
-static bool rtl8365mb_phy_mode_supported(struct dsa_switch *ds, int port,
- phy_interface_t interface)
-{
- int ext_int;
-
- ext_int = rtl8365mb_extint_port_map[port];
-
- if (ext_int < 0 &&
- (interface == PHY_INTERFACE_MODE_NA ||
- interface == PHY_INTERFACE_MODE_INTERNAL ||
- interface == PHY_INTERFACE_MODE_GMII))
- /* Internal PHY */
- return true;
- else if ((ext_int >= 1) &&
- phy_interface_mode_is_rgmii(interface))
- /* Extension MAC */
- return true;
-
- return false;
-}
-
static void rtl8365mb_phylink_get_caps(struct dsa_switch *ds, int port,
struct phylink_config *config)
{
- if (dsa_is_user_port(ds, port))
+ if (dsa_is_user_port(ds, port)) {
__set_bit(PHY_INTERFACE_MODE_INTERNAL,
config->supported_interfaces);
- else if (dsa_is_cpu_port(ds, port))
+
+ /* GMII is the default interface mode for phylib, so
+ * we have to support it for ports with integrated PHY.
+ */
+ __set_bit(PHY_INTERFACE_MODE_GMII,
+ config->supported_interfaces);
+ } else if (dsa_is_cpu_port(ds, port)) {
phy_interface_set_rgmii(config->supported_interfaces);
+ }
config->mac_capabilities = MAC_SYM_PAUSE | MAC_ASYM_PAUSE |
MAC_10 | MAC_100 | MAC_1000FD;
struct realtek_priv *priv = ds->priv;
int ret;
- if (!rtl8365mb_phy_mode_supported(ds, port, state->interface)) {
- dev_err(priv->dev, "phy mode %s is unsupported on port %d\n",
- phy_modes(state->interface), port);
- return;
- }
-
if (mode != MLO_AN_PHY && mode != MLO_AN_FIXED) {
dev_err(priv->dev,
"port %d supports only conventional PHY or fixed-link\n",
};
MODULE_DEVICE_TABLE(of, sja1105_dt_ids);
+static const struct spi_device_id sja1105_spi_ids[] = {
+ { "sja1105e" },
+ { "sja1105t" },
+ { "sja1105p" },
+ { "sja1105q" },
+ { "sja1105r" },
+ { "sja1105s" },
+ { "sja1110a" },
+ { "sja1110b" },
+ { "sja1110c" },
+ { "sja1110d" },
+ { },
+};
+MODULE_DEVICE_TABLE(spi, sja1105_spi_ids);
+
static struct spi_driver sja1105_driver = {
.driver = {
.name = "sja1105",
.owner = THIS_MODULE,
.of_match_table = of_match_ptr(sja1105_dt_ids),
},
+ .id_table = sja1105_spi_ids,
.probe = sja1105_probe,
.remove = sja1105_remove,
.shutdown = sja1105_shutdown,
};
MODULE_DEVICE_TABLE(of, vsc73xx_of_match);
+static const struct spi_device_id vsc73xx_spi_ids[] = {
+ { "vsc7385" },
+ { "vsc7388" },
+ { "vsc7395" },
+ { "vsc7398" },
+ { },
+};
+MODULE_DEVICE_TABLE(spi, vsc73xx_spi_ids);
+
static struct spi_driver vsc73xx_spi_driver = {
.probe = vsc73xx_spi_probe,
.remove = vsc73xx_spi_remove,
.shutdown = vsc73xx_spi_shutdown,
+ .id_table = vsc73xx_spi_ids,
.driver = {
.name = "vsc73xx-spi",
.of_match_table = vsc73xx_of_match,
mdio = mdiobus_alloc();
if (mdio == NULL) {
netdev_err(dev, "Error allocating MDIO bus\n");
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto put_node;
}
mdio->name = ALTERA_TSE_RESOURCE_NAME;
mdio->id);
goto out_free_mdio;
}
+ of_node_put(mdio_node);
if (netif_msg_drv(priv))
netdev_info(dev, "MDIO bus %s: created\n", mdio->id);
out_free_mdio:
mdiobus_free(mdio);
mdio = NULL;
+put_node:
+ of_node_put(mdio_node);
return ret;
}
pr_cont("\n");
}
}
- prxd->buff_stat = (u32)(pDB->dma_addr | RX_DMA_ENABLE);
+ prxd->buff_stat = lower_32_bits(pDB->dma_addr) | RX_DMA_ENABLE;
aup->rx_head = (aup->rx_head + 1) & (NUM_RX_DMA - 1);
wmb(); /* drain writebuffer */
ps->tx_packets++;
ps->tx_bytes += ptxd->len;
- ptxd->buff_stat = pDB->dma_addr | TX_DMA_ENABLE;
+ ptxd->buff_stat = lower_32_bits(pDB->dma_addr) | TX_DMA_ENABLE;
wmb(); /* drain writebuffer */
dev_kfree_skb(skb);
aup->tx_head = (aup->tx_head + 1) & (NUM_TX_DMA - 1);
/* Allocate the data buffers
* Snooping works fine with eth on all au1xxx
*/
- aup->vaddr = (u32)dma_alloc_coherent(&pdev->dev, MAX_BUF_SIZE *
- (NUM_TX_BUFFS + NUM_RX_BUFFS),
- &aup->dma_addr, 0);
+ aup->vaddr = dma_alloc_coherent(&pdev->dev, MAX_BUF_SIZE *
+ (NUM_TX_BUFFS + NUM_RX_BUFFS),
+ &aup->dma_addr, 0);
if (!aup->vaddr) {
dev_err(&pdev->dev, "failed to allocate data buffers\n");
err = -ENOMEM;
for (i = 0; i < (NUM_TX_BUFFS+NUM_RX_BUFFS); i++) {
pDB->pnext = pDBfree;
pDBfree = pDB;
- pDB->vaddr = (u32 *)((unsigned)aup->vaddr + MAX_BUF_SIZE*i);
- pDB->dma_addr = (dma_addr_t)virt_to_bus(pDB->vaddr);
+ pDB->vaddr = aup->vaddr + MAX_BUF_SIZE * i;
+ pDB->dma_addr = aup->dma_addr + MAX_BUF_SIZE * i;
pDB++;
}
aup->pDBfree = pDBfree;
if (!pDB)
goto err_out;
- aup->rx_dma_ring[i]->buff_stat = (unsigned)pDB->dma_addr;
+ aup->rx_dma_ring[i]->buff_stat = lower_32_bits(pDB->dma_addr);
aup->rx_db_inuse[i] = pDB;
}
if (!pDB)
goto err_out;
- aup->tx_dma_ring[i]->buff_stat = (unsigned)pDB->dma_addr;
+ aup->tx_dma_ring[i]->buff_stat = lower_32_bits(pDB->dma_addr);
aup->tx_dma_ring[i]->len = 0;
aup->tx_db_inuse[i] = pDB;
}
iounmap(aup->mac);
err_remap1:
dma_free_coherent(&pdev->dev, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS),
- (void *)aup->vaddr, aup->dma_addr);
+ aup->vaddr, aup->dma_addr);
err_vaddr:
free_netdev(dev);
err_alloc:
au1000_ReleaseDB(aup, aup->tx_db_inuse[i]);
dma_free_coherent(&pdev->dev, MAX_BUF_SIZE * (NUM_TX_BUFFS + NUM_RX_BUFFS),
- (void *)aup->vaddr, aup->dma_addr);
+ aup->vaddr, aup->dma_addr);
iounmap(aup->macdma);
iounmap(aup->mac);
struct mac_reg *mac; /* mac registers */
u32 *enable; /* address of MAC Enable Register */
void __iomem *macdma; /* base of MAC DMA port */
- u32 vaddr; /* virtual address of rx/tx buffers */
- dma_addr_t dma_addr; /* dma address of rx/tx buffers */
+ void *vaddr; /* virtual address of rx/tx buffers */
+ dma_addr_t dma_addr; /* dma address of rx/tx buffers */
spinlock_t lock; /* Serialise access to device */
netdev_dbg(netdev, "Dst MAC addr: %pM\n", eth->h_dest);
netdev_dbg(netdev, "Src MAC addr: %pM\n", eth->h_source);
- netdev_dbg(netdev, "Protocol: %#06hx\n", ntohs(eth->h_proto));
+ netdev_dbg(netdev, "Protocol: %#06x\n", ntohs(eth->h_proto));
for (i = 0; i < skb->len; i += 32) {
unsigned int len = min(skb->len - i, 32U);
* the PHY resources listed last
*/
phy_memnum = xgbe_resource_count(pdev, IORESOURCE_MEM) - 3;
- phy_irqnum = xgbe_resource_count(pdev, IORESOURCE_IRQ) - 1;
+ phy_irqnum = platform_irq_count(pdev) - 1;
dma_irqnum = 1;
dma_irqend = phy_irqnum;
} else {
phy_memnum = 0;
phy_irqnum = 0;
dma_irqnum = 1;
- dma_irqend = xgbe_resource_count(pdev, IORESOURCE_IRQ);
+ dma_irqend = platform_irq_count(pdev);
}
/* Obtain the mmio areas for the device */
}
}
-static int aq_suspend_common(struct device *dev, bool deep)
+static int aq_suspend_common(struct device *dev)
{
struct aq_nic_s *nic = pci_get_drvdata(to_pci_dev(dev));
if (netif_running(nic->ndev))
aq_nic_stop(nic);
- if (deep) {
- aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol);
- aq_nic_set_power(nic);
- }
+ aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol);
+ aq_nic_set_power(nic);
rtnl_unlock();
return 0;
}
-static int atl_resume_common(struct device *dev, bool deep)
+static int atl_resume_common(struct device *dev)
{
struct pci_dev *pdev = to_pci_dev(dev);
struct aq_nic_s *nic;
pci_set_power_state(pdev, PCI_D0);
pci_restore_state(pdev);
- if (deep) {
- /* Reinitialize Nic/Vecs objects */
- aq_nic_deinit(nic, !nic->aq_hw->aq_nic_cfg->wol);
- }
-
if (netif_running(nic->ndev)) {
ret = aq_nic_init(nic);
if (ret)
static int aq_pm_freeze(struct device *dev)
{
- return aq_suspend_common(dev, true);
+ return aq_suspend_common(dev);
}
static int aq_pm_suspend_poweroff(struct device *dev)
{
- return aq_suspend_common(dev, true);
+ return aq_suspend_common(dev);
}
static int aq_pm_thaw(struct device *dev)
{
- return atl_resume_common(dev, true);
+ return atl_resume_common(dev);
}
static int aq_pm_resume_restore(struct device *dev)
{
- return atl_resume_common(dev, true);
+ return atl_resume_common(dev);
}
static const struct dev_pm_ops aq_pm_ops = {
np = of_get_child_by_name(core->dev.of_node, "mdio");
err = of_mdiobus_register(mii_bus, np);
+ of_node_put(np);
if (err) {
dev_err(&core->dev, "Registration of mii bus failed\n");
goto err_free_bus;
bcma_mdio_mii_unregister(bgmac->mii_bus);
bgmac_enet_remove(bgmac);
bcma_set_drvdata(core, NULL);
- kfree(bgmac);
}
static struct bcma_driver bgmac_bcma_driver = {
static int bnxt_hwrm_queue_qportcfg(struct bnxt *bp);
-static int bnxt_hwrm_func_qcaps(struct bnxt *bp)
+int bnxt_hwrm_func_qcaps(struct bnxt *bp)
{
int rc;
if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_RESC_CHANGE)
resc_reinit = true;
- if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_HOT_FW_RESET_DONE)
+ if (flags & FUNC_DRV_IF_CHANGE_RESP_FLAGS_HOT_FW_RESET_DONE ||
+ test_bit(BNXT_STATE_FW_RESET_DET, &bp->state))
fw_reset = true;
else
bnxt_remap_fw_health_regs(bp);
int bnxt_hwrm_alloc_wol_fltr(struct bnxt *bp);
int bnxt_hwrm_free_wol_fltr(struct bnxt *bp);
int bnxt_hwrm_func_resc_qcaps(struct bnxt *bp, bool all);
+int bnxt_hwrm_func_qcaps(struct bnxt *bp);
int bnxt_hwrm_fw_set_time(struct bnxt *);
int bnxt_open_nic(struct bnxt *, bool, bool);
int bnxt_half_open_nic(struct bnxt *bp);
if (rc)
return rc;
- rc = bnxt_dl_livepatch_info_put(bp, req, BNXT_FW_SRT_PATCH);
- if (rc)
- return rc;
+ if (BNXT_CHIP_P5(bp)) {
+ rc = bnxt_dl_livepatch_info_put(bp, req, BNXT_FW_SRT_PATCH);
+ if (rc)
+ return rc;
+ }
return bnxt_dl_livepatch_info_put(bp, req, BNXT_FW_CRT_PATCH);
}
u64 *ns)
{
struct bnxt_ptp_cfg *ptp = bp->ptp_cfg;
+ u32 high_before, high_now, low;
if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
return -EIO;
+ high_before = readl(bp->bar0 + ptp->refclk_mapped_regs[1]);
ptp_read_system_prets(sts);
- *ns = readl(bp->bar0 + ptp->refclk_mapped_regs[0]);
+ low = readl(bp->bar0 + ptp->refclk_mapped_regs[0]);
ptp_read_system_postts(sts);
- *ns |= (u64)readl(bp->bar0 + ptp->refclk_mapped_regs[1]) << 32;
+ high_now = readl(bp->bar0 + ptp->refclk_mapped_regs[1]);
+ if (high_now != high_before) {
+ ptp_read_system_prets(sts);
+ low = readl(bp->bar0 + ptp->refclk_mapped_regs[0]);
+ ptp_read_system_postts(sts);
+ }
+ *ns = ((u64)high_now << 32) | low;
+
return 0;
}
goto err_out2;
rc = pci_enable_sriov(bp->pdev, *num_vfs);
- if (rc)
+ if (rc) {
+ bnxt_ulp_sriov_cfg(bp, 0);
goto err_out2;
+ }
return 0;
/* Free the resources reserved for various VF's */
bnxt_hwrm_func_vf_resource_free(bp, *num_vfs);
+ /* Restore the max resources */
+ bnxt_hwrm_func_qcaps(bp);
+
err_out1:
bnxt_free_vf_resources(bp);
struct xdp_buff *xdp)
{
struct skb_shared_info *sinfo;
- struct bnxt_sw_tx_bd *tx_buf, *first_buf;
+ struct bnxt_sw_tx_bd *tx_buf;
struct tx_bd *txbd;
int num_frags = 0;
u32 flags;
/* fill up the first buffer */
prod = txr->tx_prod;
tx_buf = &txr->tx_buf_ring[prod];
- first_buf = tx_buf;
tx_buf->nr_frags = num_frags;
if (xdp)
tx_buf->page = virt_to_head_page(xdp->data);
txbd = &txr->tx_desc_ring[TX_RING(prod)][TX_IDX(prod)];
- flags = ((len) << TX_BD_LEN_SHIFT) | ((num_frags + 1) << TX_BD_FLAGS_BD_CNT_SHIFT);
+ flags = (len << TX_BD_LEN_SHIFT) |
+ ((num_frags + 1) << TX_BD_FLAGS_BD_CNT_SHIFT) |
+ bnxt_lhint_arr[len >> 9];
txbd->tx_bd_len_flags_type = cpu_to_le32(flags);
txbd->tx_bd_opaque = prod;
txbd->tx_bd_haddr = cpu_to_le64(mapping);
flags = frag_len << TX_BD_LEN_SHIFT;
txbd->tx_bd_len_flags_type = cpu_to_le32(flags);
- txbd->tx_bd_opaque = prod;
txbd->tx_bd_haddr = cpu_to_le64(frag_mapping);
len = frag_len;
prod = NEXT_TX(prod);
txr->tx_prod = prod;
- return first_buf;
+ return tx_buf;
}
static void __bnxt_xmit_xdp(struct bnxt *bp, struct bnxt_tx_ring_info *txr,
csk->sndbuf = newsk->sk_sndbuf;
csk->smac_idx = ((struct port_info *)netdev_priv(ndev))->smt_idx;
RCV_WSCALE(tp) = select_rcv_wscale(tcp_full_space(newsk),
- sock_net(newsk)->
- ipv4.sysctl_tcp_window_scaling,
+ READ_ONCE(sock_net(newsk)->
+ ipv4.sysctl_tcp_window_scaling),
tp->window_clamp);
neigh_release(n);
inet_inherit_port(&tcp_hashinfo, lsk, newsk);
#endif
}
if (req->tcpopt.wsf <= 14 &&
- sock_net(sk)->ipv4.sysctl_tcp_window_scaling) {
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) {
inet_rsk(oreq)->wscale_ok = 1;
inet_rsk(oreq)->snd_wscale = req->tcpopt.wsf;
}
th_ecn = tcph->ece && tcph->cwr;
if (th_ecn) {
ect = !INET_ECN_is_not_ect(ip_dsfield);
- ecn_ok = sock_net(sk)->ipv4.sysctl_tcp_ecn;
+ ecn_ok = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn);
if ((!ect && ecn_ok) || tcp_ca_needs_ecn(sk))
inet_rsk(oreq)->ecn_ok = 1;
}
/* Uses sync mcc */
int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
- u8 page_num, u8 *data)
+ u8 page_num, u32 off, u32 len, u8 *data)
{
struct be_dma_mem cmd;
struct be_mcc_wrb *wrb;
req->port = cpu_to_le32(adapter->hba_port_num);
req->page_num = cpu_to_le32(page_num);
status = be_mcc_notify_wait(adapter);
- if (!status) {
+ if (!status && len > 0) {
struct be_cmd_resp_port_type *resp = cmd.va;
- memcpy(data, resp->page_data, PAGE_DATA_LEN);
+ memcpy(data, resp->page_data + off, len);
}
err:
mutex_unlock(&adapter->mcc_lock);
int status;
status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
- page_data);
+ 0, PAGE_DATA_LEN, page_data);
if (!status) {
switch (adapter->phy.interface_type) {
case PHY_TYPE_QSFP:
int status;
status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
- page_data);
+ 0, PAGE_DATA_LEN, page_data);
if (!status) {
strlcpy(adapter->phy.vendor_name, page_data +
SFP_VENDOR_NAME_OFFSET, SFP_VENDOR_NAME_LEN - 1);
int be_cmd_get_beacon_state(struct be_adapter *adapter, u8 port_num,
u32 *state);
int be_cmd_read_port_transceiver_data(struct be_adapter *adapter,
- u8 page_num, u8 *data);
+ u8 page_num, u32 off, u32 len, u8 *data);
int be_cmd_query_cable_type(struct be_adapter *adapter);
int be_cmd_query_sfp_info(struct be_adapter *adapter);
int lancer_cmd_read_object(struct be_adapter *adapter, struct be_dma_mem *cmd,
return -EOPNOTSUPP;
status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
- page_data);
+ 0, PAGE_DATA_LEN, page_data);
if (!status) {
if (!page_data[SFP_PLUS_SFF_8472_COMP]) {
modinfo->type = ETH_MODULE_SFF_8079;
{
struct be_adapter *adapter = netdev_priv(netdev);
int status;
+ u32 begin, end;
if (!check_privilege(adapter, MAX_PRIVILEGES))
return -EOPNOTSUPP;
- status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0,
- data);
- if (status)
- goto err;
+ begin = eeprom->offset;
+ end = eeprom->offset + eeprom->len;
+
+ if (begin < PAGE_DATA_LEN) {
+ status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A0, begin,
+ min_t(u32, end, PAGE_DATA_LEN) - begin,
+ data);
+ if (status)
+ goto err;
+
+ data += PAGE_DATA_LEN - begin;
+ begin = PAGE_DATA_LEN;
+ }
- if (eeprom->offset + eeprom->len > PAGE_DATA_LEN) {
- status = be_cmd_read_port_transceiver_data(adapter,
- TR_PAGE_A2,
- data +
- PAGE_DATA_LEN);
+ if (end > PAGE_DATA_LEN) {
+ status = be_cmd_read_port_transceiver_data(adapter, TR_PAGE_A2,
+ begin - PAGE_DATA_LEN,
+ end - begin, data);
if (status)
goto err;
}
- if (eeprom->offset)
- memcpy(data, data + eeprom->offset, eeprom->len);
err:
return be_cmd_status(status);
}
return rc;
}
+static bool ftgmac100_has_child_node(struct device_node *np, const char *name)
+{
+ struct device_node *child_np = of_get_child_by_name(np, name);
+ bool ret = false;
+
+ if (child_np) {
+ ret = true;
+ of_node_put(child_np);
+ }
+
+ return ret;
+}
+
static int ftgmac100_probe(struct platform_device *pdev)
{
struct resource *res;
/* Display what we found */
phy_attached_info(phy);
- } else if (np && !of_get_child_by_name(np, "mdio")) {
+ } else if (np && !ftgmac100_has_child_node(np, "mdio")) {
/* Support legacy ASPEED devicetree descriptions that decribe a
* MAC with an embedded MDIO controller but have no "mdio"
* child node. Automatically scan the MDIO bus for available
int ref_ok, struct funeth_txq *xdp_q)
{
struct bpf_prog *xdp_prog;
+ struct xdp_frame *xdpf;
struct xdp_buff xdp;
u32 act;
case XDP_TX:
if (unlikely(!ref_ok))
goto pass;
- if (!fun_xdp_tx(xdp_q, xdp.data, xdp.data_end - xdp.data))
+
+ xdpf = xdp_convert_buff_to_frame(&xdp);
+ if (!xdpf || !fun_xdp_tx(xdp_q, xdpf))
goto xdp_error;
FUN_QSTAT_INC(q, xdp_tx);
q->xdp_flush |= FUN_XDP_FLUSH_TX;
do {
fun_xdp_unmap(q, reclaim_idx);
- page_frag_free(q->info[reclaim_idx].vaddr);
+ xdp_return_frame(q->info[reclaim_idx].xdpf);
trace_funeth_tx_free(q, reclaim_idx, 1, head);
return npkts;
}
-bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len)
+bool fun_xdp_tx(struct funeth_txq *q, struct xdp_frame *xdpf)
{
struct fun_eth_tx_req *req;
struct fun_dataop_gl *gle;
- unsigned int idx;
+ unsigned int idx, len;
dma_addr_t dma;
if (fun_txq_avail(q) < FUN_XDP_CLEAN_THRES)
return false;
}
- dma = dma_map_single(q->dma_dev, data, len, DMA_TO_DEVICE);
+ len = xdpf->len;
+ dma = dma_map_single(q->dma_dev, xdpf->data, len, DMA_TO_DEVICE);
if (unlikely(dma_mapping_error(q->dma_dev, dma))) {
FUN_QSTAT_INC(q, tx_map_err);
return false;
gle = (struct fun_dataop_gl *)req->dataop.imm;
fun_dataop_gl_init(gle, 0, 0, len, dma);
- q->info[idx].vaddr = data;
+ q->info[idx].xdpf = xdpf;
u64_stats_update_begin(&q->syncp);
q->stats.tx_bytes += len;
if (unlikely(q_idx >= fp->num_xdpqs))
return -ENXIO;
- for (q = xdpqs[q_idx], i = 0; i < n; i++) {
- const struct xdp_frame *xdpf = frames[i];
-
- if (!fun_xdp_tx(q, xdpf->data, xdpf->len))
+ for (q = xdpqs[q_idx], i = 0; i < n; i++)
+ if (!fun_xdp_tx(q, frames[i]))
break;
- }
if (unlikely(flags & XDP_XMIT_FLUSH))
fun_txq_wr_db(q);
unsigned int idx = q->cons_cnt & q->mask;
fun_xdp_unmap(q, idx);
- page_frag_free(q->info[idx].vaddr);
+ xdp_return_frame(q->info[idx].xdpf);
q->cons_cnt++;
}
}
struct funeth_tx_info { /* per Tx descriptor state */
union {
- struct sk_buff *skb; /* associated packet */
- void *vaddr; /* start address for XDP */
+ struct sk_buff *skb; /* associated packet (sk_buff path) */
+ struct xdp_frame *xdpf; /* associated XDP frame (XDP path) */
};
};
int fun_rxq_napi_poll(struct napi_struct *napi, int budget);
int fun_txq_napi_poll(struct napi_struct *napi, int budget);
netdev_tx_t fun_start_xmit(struct sk_buff *skb, struct net_device *netdev);
-bool fun_xdp_tx(struct funeth_txq *q, void *data, unsigned int len);
+bool fun_xdp_tx(struct funeth_txq *q, struct xdp_frame *xdpf);
int fun_xdp_xmit_frames(struct net_device *dev, int n,
struct xdp_frame **frames, u32 flags);
u8 prio_tc[HNAE3_MAX_USER_PRIO]; /* TC indexed by prio */
u16 tqp_count[HNAE3_MAX_TC];
u16 tqp_offset[HNAE3_MAX_TC];
+ u8 max_tc; /* Total number of TCs */
u8 num_tc; /* Total number of enabled TCs */
bool mqprio_active;
};
if (old_ringparam->tx_desc_num == new_ringparam->tx_desc_num &&
old_ringparam->rx_desc_num == new_ringparam->rx_desc_num &&
old_ringparam->rx_buf_len == new_ringparam->rx_buf_len) {
- netdev_info(ndev, "ringparam not changed\n");
+ netdev_info(ndev, "descriptor number and rx buffer length not changed\n");
return false;
}
static int hclge_update_port_info(struct hclge_dev *hdev)
{
struct hclge_mac *mac = &hdev->hw.mac;
- int speed = HCLGE_MAC_SPEED_UNKNOWN;
+ int speed;
int ret;
/* get the port info from SFP cmd if not copper port */
if (!hdev->support_sfp_query)
return 0;
- if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2)
+ if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) {
+ speed = mac->speed;
ret = hclge_get_sfp_info(hdev, mac);
- else
+ } else {
+ speed = HCLGE_MAC_SPEED_UNKNOWN;
ret = hclge_get_sfp_speed(hdev, &speed);
+ }
if (ret == -EOPNOTSUPP) {
hdev->support_sfp_query = false;
if (hdev->ae_dev->dev_version >= HNAE3_DEVICE_VERSION_V2) {
if (mac->speed_type == QUERY_ACTIVE_SPEED) {
hclge_update_port_capability(hdev, mac);
+ if (mac->speed != speed)
+ (void)hclge_tm_port_shaper_cfg(hdev);
return 0;
}
return hclge_cfg_mac_speed_dup(hdev, mac->speed,
link_state_old = vport->vf_info.link_state;
vport->vf_info.link_state = link_state;
+ /* return success directly if the VF is unalive, VF will
+ * query link state itself when it starts work.
+ */
+ if (!test_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state))
+ return 0;
+
ret = hclge_push_vf_link_status(vport);
if (ret) {
vport->vf_info.link_state = link_state_old;
if (ret)
return ret;
+ vport->port_base_vlan_cfg.tbl_sta = false;
/* remove old VLAN tag */
if (old_info->vlan_tag == 0)
ret = hclge_set_vf_vlan_common(hdev, vport->vport_id,
return hclge_cmd_send(&hdev->hw, &desc, 1);
}
-static int hclge_tm_qs_to_pri_map_cfg(struct hclge_dev *hdev,
- u16 qs_id, u8 pri)
+static int hclge_tm_qs_to_pri_map_cfg(struct hclge_dev *hdev, u16 qs_id, u8 pri,
+ bool link_vld)
{
struct hclge_qs_to_pri_link_cmd *map;
struct hclge_desc desc;
map->qs_id = cpu_to_le16(qs_id);
map->priority = pri;
- map->link_vld = HCLGE_TM_QS_PRI_LINK_VLD_MSK;
+ map->link_vld = link_vld ? HCLGE_TM_QS_PRI_LINK_VLD_MSK : 0;
return hclge_cmd_send(&hdev->hw, &desc, 1);
}
return hclge_cmd_send(&hdev->hw, &desc, 1);
}
-static int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev)
+int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev)
{
struct hclge_port_shapping_cmd *shap_cfg_cmd;
struct hclge_shaper_ir_para ir_para;
* one tc for VF for simplicity. VF's vport_id is non zero.
*/
if (vport->vport_id) {
+ kinfo->tc_info.max_tc = 1;
kinfo->tc_info.num_tc = 1;
vport->qs_offset = HNAE3_MAX_TC +
vport->vport_id - HCLGE_VF_VPORT_START_NUM;
vport_max_rss_size = hdev->vf_rss_size_max;
} else {
+ kinfo->tc_info.max_tc = hdev->tc_max;
kinfo->tc_info.num_tc =
min_t(u16, vport->alloc_tqps, hdev->tm_info.num_tc);
vport->qs_offset = 0;
kinfo->num_tqps = hclge_vport_get_tqp_num(vport);
vport->dwrr = 100; /* 100 percent as init */
vport->bw_limit = hdev->tm_info.pg_info[0].bw_limit;
- hdev->rss_cfg.rss_size = kinfo->rss_size;
+
+ if (vport->vport_id == PF_VPORT_ID)
+ hdev->rss_cfg.rss_size = kinfo->rss_size;
/* when enable mqprio, the tc_info has been updated. */
if (kinfo->tc_info.mqprio_active)
static void hclge_tm_tc_info_init(struct hclge_dev *hdev)
{
- u8 i;
+ u8 i, tc_sch_mode;
+ u32 bw_limit;
+
+ for (i = 0; i < hdev->tc_max; i++) {
+ if (i < hdev->tm_info.num_tc) {
+ tc_sch_mode = HCLGE_SCH_MODE_DWRR;
+ bw_limit = hdev->tm_info.pg_info[0].bw_limit;
+ } else {
+ tc_sch_mode = HCLGE_SCH_MODE_SP;
+ bw_limit = 0;
+ }
- for (i = 0; i < hdev->tm_info.num_tc; i++) {
hdev->tm_info.tc_info[i].tc_id = i;
- hdev->tm_info.tc_info[i].tc_sch_mode = HCLGE_SCH_MODE_DWRR;
+ hdev->tm_info.tc_info[i].tc_sch_mode = tc_sch_mode;
hdev->tm_info.tc_info[i].pgid = 0;
- hdev->tm_info.tc_info[i].bw_limit =
- hdev->tm_info.pg_info[0].bw_limit;
+ hdev->tm_info.tc_info[i].bw_limit = bw_limit;
}
for (i = 0; i < HNAE3_MAX_USER_PRIO; i++)
for (k = 0; k < hdev->num_alloc_vport; k++) {
struct hnae3_knic_private_info *kinfo = &vport[k].nic.kinfo;
- for (i = 0; i < kinfo->tc_info.num_tc; i++) {
+ for (i = 0; i < kinfo->tc_info.max_tc; i++) {
+ u8 pri = i < kinfo->tc_info.num_tc ? i : 0;
+ bool link_vld = i < kinfo->tc_info.num_tc;
+
ret = hclge_tm_qs_to_pri_map_cfg(hdev,
vport[k].qs_offset + i,
- i);
+ pri, link_vld);
if (ret)
return ret;
}
for (i = 0; i < HNAE3_MAX_TC; i++) {
ret = hclge_tm_qs_to_pri_map_cfg(hdev,
vport[k].qs_offset + i,
- k);
+ k, true);
if (ret)
return ret;
}
{
u32 max_tm_rate = hdev->ae_dev->dev_specs.max_tm_rate;
struct hclge_shaper_ir_para ir_para;
- u32 shaper_para;
+ u32 shaper_para_c, shaper_para_p;
int ret;
u32 i;
- for (i = 0; i < hdev->tm_info.num_tc; i++) {
+ for (i = 0; i < hdev->tc_max; i++) {
u32 rate = hdev->tm_info.tc_info[i].bw_limit;
- ret = hclge_shaper_para_calc(rate, HCLGE_SHAPER_LVL_PRI,
- &ir_para, max_tm_rate);
- if (ret)
- return ret;
+ if (rate) {
+ ret = hclge_shaper_para_calc(rate, HCLGE_SHAPER_LVL_PRI,
+ &ir_para, max_tm_rate);
+ if (ret)
+ return ret;
+
+ shaper_para_c = hclge_tm_get_shapping_para(0, 0, 0,
+ HCLGE_SHAPER_BS_U_DEF,
+ HCLGE_SHAPER_BS_S_DEF);
+ shaper_para_p = hclge_tm_get_shapping_para(ir_para.ir_b,
+ ir_para.ir_u,
+ ir_para.ir_s,
+ HCLGE_SHAPER_BS_U_DEF,
+ HCLGE_SHAPER_BS_S_DEF);
+ } else {
+ shaper_para_c = 0;
+ shaper_para_p = 0;
+ }
- shaper_para = hclge_tm_get_shapping_para(0, 0, 0,
- HCLGE_SHAPER_BS_U_DEF,
- HCLGE_SHAPER_BS_S_DEF);
ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_C_BUCKET, i,
- shaper_para, rate);
+ shaper_para_c, rate);
if (ret)
return ret;
- shaper_para = hclge_tm_get_shapping_para(ir_para.ir_b,
- ir_para.ir_u,
- ir_para.ir_s,
- HCLGE_SHAPER_BS_U_DEF,
- HCLGE_SHAPER_BS_S_DEF);
ret = hclge_tm_pri_shapping_cfg(hdev, HCLGE_TM_SHAP_P_BUCKET, i,
- shaper_para, rate);
+ shaper_para_p, rate);
if (ret)
return ret;
}
int ret;
u32 i, k;
- for (i = 0; i < hdev->tm_info.num_tc; i++) {
+ for (i = 0; i < hdev->tc_max; i++) {
pg_info =
&hdev->tm_info.pg_info[hdev->tm_info.tc_info[i].pgid];
dwrr = pg_info->tc_dwrr[i];
return ret;
for (k = 0; k < hdev->num_alloc_vport; k++) {
+ struct hnae3_knic_private_info *kinfo = &vport[k].nic.kinfo;
+
+ if (i >= kinfo->tc_info.max_tc)
+ continue;
+
+ dwrr = i < kinfo->tc_info.num_tc ? vport[k].dwrr : 0;
ret = hclge_tm_qs_weight_cfg(
hdev, vport[k].qs_offset + i,
- vport[k].dwrr);
+ dwrr);
if (ret)
return ret;
}
{
struct hclge_vport *vport = hdev->vport;
int ret;
+ u8 mode;
u16 i;
ret = hclge_tm_pri_schd_mode_cfg(hdev, pri_id);
return ret;
for (i = 0; i < hdev->num_alloc_vport; i++) {
+ struct hnae3_knic_private_info *kinfo = &vport[i].nic.kinfo;
+
+ if (pri_id >= kinfo->tc_info.max_tc)
+ continue;
+
+ mode = pri_id < kinfo->tc_info.num_tc ? HCLGE_SCH_MODE_DWRR :
+ HCLGE_SCH_MODE_SP;
ret = hclge_tm_qs_schd_mode_cfg(hdev,
vport[i].qs_offset + pri_id,
- HCLGE_SCH_MODE_DWRR);
+ mode);
if (ret)
return ret;
}
u8 i;
if (hdev->tx_sch_mode == HCLGE_FLAG_TC_BASE_SCH_MODE) {
- for (i = 0; i < hdev->tm_info.num_tc; i++) {
+ for (i = 0; i < hdev->tc_max; i++) {
ret = hclge_tm_schd_mode_tc_base_cfg(hdev, i);
if (ret)
return ret;
void hclge_pfc_rx_stats_get(struct hclge_dev *hdev, u64 *stats);
void hclge_pfc_tx_stats_get(struct hclge_dev *hdev, u64 *stats);
int hclge_tm_qs_shaper_cfg(struct hclge_vport *vport, int max_tx_rate);
+int hclge_tm_port_shaper_cfg(struct hclge_dev *hdev);
int hclge_tm_get_qset_num(struct hclge_dev *hdev, u16 *qset_num);
int hclge_tm_get_pri_num(struct hclge_dev *hdev, u8 *pri_num);
int hclge_tm_get_qset_map_pri(struct hclge_dev *hdev, u16 qset_id, u8 *priority,
for (i = 0; i < fw_image->fw_info.fw_section_cnt; i++) {
len += fw_image->fw_section_info[i].fw_section_len;
- memcpy(&host_image->image_section_info[i],
- &fw_image->fw_section_info[i],
- sizeof(struct fw_section_info_st));
+ host_image->image_section_info[i] = fw_image->fw_section_info[i];
}
if (len != fw_image->fw_len ||
release_sub_crqs(adapter, 0);
rc = init_sub_crqs(adapter);
} else {
+ /* no need to reinitialize completely, but we do
+ * need to clean up transmits that were in flight
+ * when we processed the reset. Failure to do so
+ * will confound the upper layer, usually TCP, by
+ * creating the illusion of transmits that are
+ * awaiting completion.
+ */
+ clean_tx_pools(adapter);
+
rc = reset_sub_crq_queues(adapter);
}
} else {
bool disable_polarity_correction;
bool is_mdix;
bool polarity_correction;
- bool reset_disable;
bool speed_downgraded;
bool autoneg_wait_to_complete;
};
bool blocked = false;
int i = 0;
- /* Check the PHY (LCD) reset flag */
- if (hw->phy.reset_disable)
- return true;
-
while ((blocked = !(er32(FWSM) & E1000_ICH_FWSM_RSPCIPHY)) &&
(i++ < 30))
usleep_range(10000, 11000);
#define I217_CGFREG_ENABLE_MTA_RESET 0x0002
#define I217_MEMPWR PHY_REG(772, 26)
#define I217_MEMPWR_DISABLE_SMB_RELEASE 0x0010
-#define I217_MEMPWR_MOEM 0x1000
/* Receive Address Initial CRC Calculation */
#define E1000_PCH_RAICC(_n) (0x05F50 + ((_n) * 4))
if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
hw->mac.type >= e1000_pch_adp) {
+ /* Keep the GPT clock enabled for CSME */
+ mac_data = er32(FEXTNVM);
+ mac_data |= BIT(3);
+ ew32(FEXTNVM, mac_data);
/* Request ME unconfigure the device from S0ix */
mac_data = er32(H2ME);
mac_data &= ~E1000_H2ME_START_DPG;
struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
struct e1000_adapter *adapter = netdev_priv(netdev);
struct pci_dev *pdev = to_pci_dev(dev);
- struct e1000_hw *hw = &adapter->hw;
- u16 phy_data;
int rc;
- if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
- hw->mac.type >= e1000_pch_adp) {
- /* Mask OEM Bits / Gig Disable / Restart AN (772_26[12] = 1) */
- e1e_rphy(hw, I217_MEMPWR, &phy_data);
- phy_data |= I217_MEMPWR_MOEM;
- e1e_wphy(hw, I217_MEMPWR, phy_data);
-
- /* Disable LCD reset */
- hw->phy.reset_disable = true;
- }
-
e1000e_flush_lpic(pdev);
e1000e_pm_freeze(dev);
struct net_device *netdev = pci_get_drvdata(to_pci_dev(dev));
struct e1000_adapter *adapter = netdev_priv(netdev);
struct pci_dev *pdev = to_pci_dev(dev);
- struct e1000_hw *hw = &adapter->hw;
- u16 phy_data;
int rc;
/* Introduce S0ix implementation */
if (rc)
return rc;
- if (er32(FWSM) & E1000_ICH_FWSM_FW_VALID &&
- hw->mac.type >= e1000_pch_adp) {
- /* Unmask OEM Bits / Gig Disable / Restart AN 772_26[12] = 0 */
- e1e_rphy(hw, I217_MEMPWR, &phy_data);
- phy_data &= ~I217_MEMPWR_MOEM;
- e1e_wphy(hw, I217_MEMPWR, phy_data);
-
- /* Enable LCD reset */
- hw->phy.reset_disable = false;
- }
-
return e1000e_pm_thaw(dev);
}
#include <net/tc_act/tc_mirred.h>
#include <net/udp_tunnel.h>
#include <net/xdp_sock.h>
+#include <linux/bitfield.h>
#include "i40e_type.h"
#include "i40e_prototype.h"
#include <linux/net/intel/i40e_client.h>
(u32)(val & 0xFFFFFFFFULL));
}
+/**
+ * i40e_get_pf_count - get PCI PF count.
+ * @hw: pointer to a hw.
+ *
+ * Reports the function number of the highest PCI physical
+ * function plus 1 as it is loaded from the NVM.
+ *
+ * Return: PCI PF count.
+ **/
+static inline u32 i40e_get_pf_count(struct i40e_hw *hw)
+{
+ return FIELD_GET(I40E_GLGEN_PCIFCNCNT_PCIPFCNT_MASK,
+ rd32(hw, I40E_GLGEN_PCIFCNCNT));
+}
+
/* needed by i40e_ethtool.c */
int i40e_up(struct i40e_vsi *vsi);
void i40e_down(struct i40e_vsi *vsi);
set_bit(__I40E_TESTING, pf->state);
+ if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
+ test_bit(__I40E_RESET_INTR_RECEIVED, pf->state)) {
+ dev_warn(&pf->pdev->dev,
+ "Cannot start offline testing when PF is in reset state.\n");
+ goto skip_ol_tests;
+ }
+
if (i40e_active_vfs(pf) || i40e_active_vmdqs(pf)) {
dev_warn(&pf->pdev->dev,
"Please take active VFs and Netqueues offline and restart the adapter before running NIC diagnostics\n");
- data[I40E_ETH_TEST_REG] = 1;
- data[I40E_ETH_TEST_EEPROM] = 1;
- data[I40E_ETH_TEST_INTR] = 1;
- data[I40E_ETH_TEST_LINK] = 1;
- eth_test->flags |= ETH_TEST_FL_FAILED;
- clear_bit(__I40E_TESTING, pf->state);
goto skip_ol_tests;
}
data[I40E_ETH_TEST_INTR] = 0;
}
-skip_ol_tests:
-
netif_info(pf, drv, netdev, "testing finished\n");
+ return;
+
+skip_ol_tests:
+ data[I40E_ETH_TEST_REG] = 1;
+ data[I40E_ETH_TEST_EEPROM] = 1;
+ data[I40E_ETH_TEST_INTR] = 1;
+ data[I40E_ETH_TEST_LINK] = 1;
+ eth_test->flags |= ETH_TEST_FL_FAILED;
+ clear_bit(__I40E_TESTING, pf->state);
+ netif_info(pf, drv, netdev, "testing failed\n");
}
static void i40e_get_wol(struct net_device *netdev,
pf->hw_csum_rx_error = 0;
}
+/**
+ * i40e_compute_pci_to_hw_id - compute index form PCI function.
+ * @vsi: ptr to the VSI to read from.
+ * @hw: ptr to the hardware info.
+ **/
+static u32 i40e_compute_pci_to_hw_id(struct i40e_vsi *vsi, struct i40e_hw *hw)
+{
+ int pf_count = i40e_get_pf_count(hw);
+
+ if (vsi->type == I40E_VSI_SRIOV)
+ return (hw->port * BIT(7)) / pf_count + vsi->vf_id;
+
+ return hw->port + BIT(7);
+}
+
+/**
+ * i40e_stat_update64 - read and update a 64 bit stat from the chip.
+ * @hw: ptr to the hardware info.
+ * @hireg: the high 32 bit reg to read.
+ * @loreg: the low 32 bit reg to read.
+ * @offset_loaded: has the initial offset been loaded yet.
+ * @offset: ptr to current offset value.
+ * @stat: ptr to the stat.
+ *
+ * Since the device stats are not reset at PFReset, they will not
+ * be zeroed when the driver starts. We'll save the first values read
+ * and use them as offsets to be subtracted from the raw values in order
+ * to report stats that count from zero.
+ **/
+static void i40e_stat_update64(struct i40e_hw *hw, u32 hireg, u32 loreg,
+ bool offset_loaded, u64 *offset, u64 *stat)
+{
+ u64 new_data;
+
+ new_data = rd64(hw, loreg);
+
+ if (!offset_loaded || new_data < *offset)
+ *offset = new_data;
+ *stat = new_data - *offset;
+}
+
/**
* i40e_stat_update48 - read and update a 48 bit stat from the chip
* @hw: ptr to the hardware info
*stat += new_data;
}
+/**
+ * i40e_stats_update_rx_discards - update rx_discards.
+ * @vsi: ptr to the VSI to be updated.
+ * @hw: ptr to the hardware info.
+ * @stat_idx: VSI's stat_counter_idx.
+ * @offset_loaded: ptr to the VSI's stat_offsets_loaded.
+ * @stat_offset: ptr to stat_offset to store first read of specific register.
+ * @stat: ptr to VSI's stat to be updated.
+ **/
+static void
+i40e_stats_update_rx_discards(struct i40e_vsi *vsi, struct i40e_hw *hw,
+ int stat_idx, bool offset_loaded,
+ struct i40e_eth_stats *stat_offset,
+ struct i40e_eth_stats *stat)
+{
+ u64 rx_rdpc, rx_rxerr;
+
+ i40e_stat_update32(hw, I40E_GLV_RDPC(stat_idx), offset_loaded,
+ &stat_offset->rx_discards, &rx_rdpc);
+ i40e_stat_update64(hw,
+ I40E_GL_RXERR1H(i40e_compute_pci_to_hw_id(vsi, hw)),
+ I40E_GL_RXERR1L(i40e_compute_pci_to_hw_id(vsi, hw)),
+ offset_loaded, &stat_offset->rx_discards_other,
+ &rx_rxerr);
+
+ stat->rx_discards = rx_rdpc + rx_rxerr;
+}
+
/**
* i40e_update_eth_stats - Update VSI-specific ethernet statistics counters.
* @vsi: the VSI to be updated
I40E_GLV_BPTCL(stat_idx),
vsi->stat_offsets_loaded,
&oes->tx_broadcast, &es->tx_broadcast);
+
+ i40e_stats_update_rx_discards(vsi, hw, stat_idx,
+ vsi->stat_offsets_loaded, oes, es);
+
vsi->stat_offsets_loaded = true;
}
* non-zero req_queue_pairs says that user requested a new
* queue count via ethtool's set_channels, so use this
* value for queues distribution across traffic classes
+ * We need at least one queue pair for the interface
+ * to be usable as we see in else statement.
*/
if (vsi->req_queue_pairs > 0)
vsi->num_queue_pairs = vsi->req_queue_pairs;
else if (pf->flags & I40E_FLAG_MSIX_ENABLED)
vsi->num_queue_pairs = pf->num_lan_msix;
+ else
+ vsi->num_queue_pairs = 1;
}
/* Number of queues per enabled TC */
return -EOPNOTSUPP;
}
+ if (!tc) {
+ dev_err(&pf->pdev->dev, "Unable to add filter because of invalid destination");
+ return -EINVAL;
+ }
+
if (test_bit(__I40E_RESET_RECOVERY_PENDING, pf->state) ||
test_bit(__I40E_RESET_INTR_RECEIVED, pf->state))
return -EBUSY;
**/
static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
{
- int old_recovery_mode_bit = test_bit(__I40E_RECOVERY_MODE, pf->state);
+ const bool is_recovery_mode_reported = i40e_check_recovery_mode(pf);
struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
struct i40e_hw *hw = &pf->hw;
i40e_status ret;
int v;
if (test_bit(__I40E_EMP_RESET_INTR_RECEIVED, pf->state) &&
- i40e_check_recovery_mode(pf)) {
+ is_recovery_mode_reported)
i40e_set_ethtool_ops(pf->vsi[pf->lan_vsi]->netdev);
- }
if (test_bit(__I40E_DOWN, pf->state) &&
- !test_bit(__I40E_RECOVERY_MODE, pf->state) &&
- !old_recovery_mode_bit)
+ !test_bit(__I40E_RECOVERY_MODE, pf->state))
goto clear_recovery;
dev_dbg(&pf->pdev->dev, "Rebuilding internal switch\n");
* accordingly with regard to resources initialization
* and deinitialization
*/
- if (test_bit(__I40E_RECOVERY_MODE, pf->state) ||
- old_recovery_mode_bit) {
+ if (test_bit(__I40E_RECOVERY_MODE, pf->state)) {
if (i40e_get_capabilities(pf,
i40e_aqc_opc_list_func_capabilities))
goto end_unlock;
- if (test_bit(__I40E_RECOVERY_MODE, pf->state)) {
+ if (is_recovery_mode_reported) {
/* we're staying in recovery mode so we'll reinitialize
* misc vector here
*/
#define I40E_GLGEN_MSRWD_MDIWRDATA_SHIFT 0
#define I40E_GLGEN_MSRWD_MDIRDDATA_SHIFT 16
#define I40E_GLGEN_MSRWD_MDIRDDATA_MASK I40E_MASK(0xFFFF, I40E_GLGEN_MSRWD_MDIRDDATA_SHIFT)
+#define I40E_GLGEN_PCIFCNCNT 0x001C0AB4 /* Reset: PCIR */
+#define I40E_GLGEN_PCIFCNCNT_PCIPFCNT_SHIFT 0
+#define I40E_GLGEN_PCIFCNCNT_PCIPFCNT_MASK I40E_MASK(0x1F, I40E_GLGEN_PCIFCNCNT_PCIPFCNT_SHIFT)
+#define I40E_GLGEN_PCIFCNCNT_PCIVFCNT_SHIFT 16
+#define I40E_GLGEN_PCIFCNCNT_PCIVFCNT_MASK I40E_MASK(0xFF, I40E_GLGEN_PCIFCNCNT_PCIVFCNT_SHIFT)
#define I40E_GLGEN_RSTAT 0x000B8188 /* Reset: POR */
#define I40E_GLGEN_RSTAT_DEVSTATE_SHIFT 0
#define I40E_GLGEN_RSTAT_DEVSTATE_MASK I40E_MASK(0x3, I40E_GLGEN_RSTAT_DEVSTATE_SHIFT)
#define I40E_VFQF_HKEY1_MAX_INDEX 12
#define I40E_VFQF_HLUT1(_i, _VF) (0x00220000 + ((_i) * 1024 + (_VF) * 4)) /* _i=0...15, _VF=0...127 */ /* Reset: CORER */
#define I40E_VFQF_HLUT1_MAX_INDEX 15
+#define I40E_GL_RXERR1H(_i) (0x00318004 + ((_i) * 8)) /* _i=0...143 */ /* Reset: CORER */
+#define I40E_GL_RXERR1H_MAX_INDEX 143
+#define I40E_GL_RXERR1H_RXERR1H_SHIFT 0
+#define I40E_GL_RXERR1H_RXERR1H_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_RXERR1H_RXERR1H_SHIFT)
+#define I40E_GL_RXERR1L(_i) (0x00318000 + ((_i) * 8)) /* _i=0...143 */ /* Reset: CORER */
+#define I40E_GL_RXERR1L_MAX_INDEX 143
+#define I40E_GL_RXERR1L_RXERR1L_SHIFT 0
+#define I40E_GL_RXERR1L_RXERR1L_MASK I40E_MASK(0xFFFFFFFF, I40E_GL_RXERR1L_RXERR1L_SHIFT)
#define I40E_GLPRT_BPRCH(_i) (0x003005E4 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
#define I40E_GLPRT_BPRCL(_i) (0x003005E0 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
#define I40E_GLPRT_BPTCH(_i) (0x00300A04 + ((_i) * 8)) /* _i=0...3 */ /* Reset: CORER */
u64 tx_broadcast; /* bptc */
u64 tx_discards; /* tdpc */
u64 tx_errors; /* tepc */
+ u64 rx_discards_other; /* rxerr1 */
};
/* Statistics collected per VEB per TC */
/* VFs only use TC 0 */
vfres->vsi_res[0].qset_handle
= le16_to_cpu(vsi->info.qs_handle[0]);
+ if (!(vf->driver_caps & VIRTCHNL_VF_OFFLOAD_USO) && !vf->pf_set_mac) {
+ i40e_del_mac_filter(vsi, vf->default_lan_addr.addr);
+ eth_zero_addr(vf->default_lan_addr.addr);
+ }
ether_addr_copy(vfres->vsi_res[0].default_mac_addr,
vf->default_lan_addr.addr);
}
}
if (vf->adq_enabled) {
- for (i = 0; i < I40E_MAX_VF_VSI; i++)
+ for (i = 0; i < vf->num_tc; i++)
num_qps_all += vf->ch[i].num_qps;
if (num_qps_all != qci->num_queue_pairs) {
aq_ret = I40E_ERR_PARAM;
u16 id;
DECLARE_BITMAP(state, __IAVF_VSI_STATE_SIZE__);
int base_vector;
- u16 work_limit;
u16 qs_handle;
void *priv; /* client driver data reference. */
};
struct iavf_vlan_filter {
struct list_head list;
struct iavf_vlan vlan;
- bool remove; /* filter needs to be removed */
- bool add; /* filter needs to be added */
+ struct {
+ u8 is_new_vlan:1; /* filter is new, wait for PF answer */
+ u8 remove:1; /* filter needs to be removed */
+ u8 add:1; /* filter needs to be added */
+ u8 padding:5;
+ };
};
#define IAVF_MAX_TRAFFIC_CLASS 4
return "__IAVF_INIT_VERSION_CHECK";
case __IAVF_INIT_GET_RESOURCES:
return "__IAVF_INIT_GET_RESOURCES";
+ case __IAVF_INIT_EXTENDED_CAPS:
+ return "__IAVF_INIT_EXTENDED_CAPS";
+ case __IAVF_INIT_CONFIG_ADAPTER:
+ return "__IAVF_INIT_CONFIG_ADAPTER";
case __IAVF_INIT_SW:
return "__IAVF_INIT_SW";
case __IAVF_INIT_FAILED:
int iavf_get_vf_vlan_v2_caps(struct iavf_adapter *adapter);
int iavf_send_vf_offload_vlan_v2_msg(struct iavf_adapter *adapter);
void iavf_set_queue_vlan_tag_loc(struct iavf_adapter *adapter);
+u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter);
void iavf_irq_enable(struct iavf_adapter *adapter, bool flush);
void iavf_configure_queues(struct iavf_adapter *adapter);
void iavf_deconfigure_queues(struct iavf_adapter *adapter);
struct ethtool_coalesce *ec, int queue)
{
struct iavf_adapter *adapter = netdev_priv(netdev);
- struct iavf_vsi *vsi = &adapter->vsi;
struct iavf_ring *rx_ring, *tx_ring;
- ec->tx_max_coalesced_frames = vsi->work_limit;
- ec->rx_max_coalesced_frames = vsi->work_limit;
-
/* Rx and Tx usecs per queue value. If user doesn't specify the
* queue, return queue 0's value to represent.
*/
struct ethtool_coalesce *ec, int queue)
{
struct iavf_adapter *adapter = netdev_priv(netdev);
- struct iavf_vsi *vsi = &adapter->vsi;
int i;
- if (ec->tx_max_coalesced_frames_irq || ec->rx_max_coalesced_frames_irq)
- vsi->work_limit = ec->tx_max_coalesced_frames_irq;
-
if (ec->rx_coalesce_usecs == 0) {
if (ec->use_adaptive_rx_coalesce)
netif_info(adapter, drv, netdev, "rx-usecs=0, need to disable adaptive-rx for a complete disable\n");
static const struct ethtool_ops iavf_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
- ETHTOOL_COALESCE_MAX_FRAMES |
- ETHTOOL_COALESCE_MAX_FRAMES_IRQ |
ETHTOOL_COALESCE_USE_ADAPTIVE,
.get_drvinfo = iavf_get_drvinfo,
.get_link = ethtool_op_get_link,
* iavf_get_num_vlans_added - get number of VLANs added
* @adapter: board private structure
*/
-static u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter)
+u16 iavf_get_num_vlans_added(struct iavf_adapter *adapter)
{
return bitmap_weight(adapter->vsi.active_cvlans, VLAN_N_VID) +
bitmap_weight(adapter->vsi.active_svlans, VLAN_N_VID);
if (!iavf_add_vlan(adapter, IAVF_VLAN(vid, be16_to_cpu(proto))))
return -ENOMEM;
- if (proto == cpu_to_be16(ETH_P_8021Q))
- set_bit(vid, adapter->vsi.active_cvlans);
- else
- set_bit(vid, adapter->vsi.active_svlans);
-
return 0;
}
list_add_tail(&f->list, &adapter->mac_filter_list);
f->add = true;
f->is_new_mac = true;
- f->is_primary = false;
+ f->is_primary = ether_addr_equal(macaddr, adapter->hw.mac.addr);
adapter->aq_required |= IAVF_FLAG_AQ_ADD_MAC_FILTER;
} else {
f->remove = false;
adapter->vsi.back = adapter;
adapter->vsi.base_vector = 1;
- adapter->vsi.work_limit = IAVF_DEFAULT_IRQ_WORK;
vsi->netdev = adapter->netdev;
vsi->qs_handle = adapter->vsi_res->qset_handle;
if (adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_RSS_PF) {
adapter->aq_required |= IAVF_FLAG_AQ_ADD_CLOUD_FILTER;
iavf_misc_irq_enable(adapter);
+ bitmap_clear(adapter->vsi.active_cvlans, 0, VLAN_N_VID);
+ bitmap_clear(adapter->vsi.active_svlans, 0, VLAN_N_VID);
+
mod_delayed_work(iavf_wq, &adapter->watchdog_task, 2);
/* We were running when the reset started, so we need to restore some
struct iavf_tx_buffer *tx_buf;
struct iavf_tx_desc *tx_desc;
unsigned int total_bytes = 0, total_packets = 0;
- unsigned int budget = vsi->work_limit;
+ unsigned int budget = IAVF_DEFAULT_IRQ_WORK;
tx_buf = &tx_ring->tx_bi[i];
tx_desc = IAVF_TX_DESC(tx_ring, i);
{
struct iavf_rx_buffer *rx_buffer;
- if (!size)
- return NULL;
-
rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean];
prefetchw(rx_buffer->page);
+ if (!size)
+ return rx_buffer;
/* we are reusing so sync this buffer for CPU use */
dma_sync_single_range_for_cpu(rx_ring->dev,
spin_unlock_bh(&adapter->mac_vlan_list_lock);
}
+/**
+ * iavf_vlan_add_reject
+ * @adapter: adapter structure
+ *
+ * Remove VLAN filters from list based on PF response.
+ **/
+static void iavf_vlan_add_reject(struct iavf_adapter *adapter)
+{
+ struct iavf_vlan_filter *f, *ftmp;
+
+ spin_lock_bh(&adapter->mac_vlan_list_lock);
+ list_for_each_entry_safe(f, ftmp, &adapter->vlan_filter_list, list) {
+ if (f->is_new_vlan) {
+ if (f->vlan.tpid == ETH_P_8021Q)
+ clear_bit(f->vlan.vid,
+ adapter->vsi.active_cvlans);
+ else
+ clear_bit(f->vlan.vid,
+ adapter->vsi.active_svlans);
+
+ list_del(&f->list);
+ kfree(f);
+ }
+ }
+ spin_unlock_bh(&adapter->mac_vlan_list_lock);
+}
+
/**
* iavf_add_vlans
* @adapter: adapter structure
vvfl->vlan_id[i] = f->vlan.vid;
i++;
f->add = false;
+ f->is_new_vlan = true;
if (i == count)
break;
}
iavf_send_pf_msg(adapter, VIRTCHNL_OP_ADD_VLAN, (u8 *)vvfl, len);
kfree(vvfl);
} else {
+ u16 max_vlans = adapter->vlan_v2_caps.filtering.max_filters;
+ u16 current_vlans = iavf_get_num_vlans_added(adapter);
struct virtchnl_vlan_filter_list_v2 *vvfl_v2;
adapter->current_op = VIRTCHNL_OP_ADD_VLAN_V2;
+ if ((count + current_vlans) > max_vlans &&
+ current_vlans < max_vlans) {
+ count = max_vlans - iavf_get_num_vlans_added(adapter);
+ more = true;
+ }
+
len = sizeof(*vvfl_v2) + ((count - 1) *
sizeof(struct virtchnl_vlan_filter));
if (len > IAVF_MAX_AQ_BUF_SIZE) {
&adapter->vlan_v2_caps.filtering.filtering_support;
struct virtchnl_vlan *vlan;
+ if (i == count)
+ break;
+
/* give priority over outer if it's enabled */
if (filtering_support->outer)
vlan = &vvfl_v2->filters[i].outer;
i++;
f->add = false;
- if (i == count)
- break;
+ f->is_new_vlan = true;
}
}
*/
iavf_netdev_features_vlan_strip_set(netdev, true);
break;
+ case VIRTCHNL_OP_ADD_VLAN_V2:
+ iavf_vlan_add_reject(adapter);
+ dev_warn(&adapter->pdev->dev, "Failed to add VLAN filter, error %s\n",
+ iavf_stat_str(&adapter->hw, v_retval));
+ break;
default:
dev_err(&adapter->pdev->dev, "PF returned error %d (%s) to our request %d\n",
v_retval, iavf_stat_str(&adapter->hw, v_retval),
spin_unlock_bh(&adapter->adv_rss_lock);
}
break;
+ case VIRTCHNL_OP_ADD_VLAN_V2: {
+ struct iavf_vlan_filter *f;
+
+ spin_lock_bh(&adapter->mac_vlan_list_lock);
+ list_for_each_entry(f, &adapter->vlan_filter_list, list) {
+ if (f->is_new_vlan) {
+ f->is_new_vlan = false;
+ if (f->vlan.tpid == ETH_P_8021Q)
+ set_bit(f->vlan.vid,
+ adapter->vsi.active_cvlans);
+ else
+ set_bit(f->vlan.vid,
+ adapter->vsi.active_svlans);
+ }
+ }
+ spin_unlock_bh(&adapter->mac_vlan_list_lock);
+ }
+ break;
case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING:
/* PF enabled vlan strip on this VF.
* Update netdev->features if needed to be in sync with ethtool.
#define _ICE_DEVIDS_H_
/* Device IDs */
+#define ICE_DEV_ID_E822_SI_DFLT 0x1888
/* Intel(R) Ethernet Connection E823-L for backplane */
#define ICE_DEV_ID_E823L_BACKPLANE 0x124C
/* Intel(R) Ethernet Connection E823-L for SFP */
devlink_port_unregister(devlink_port);
}
+#define ICE_DEVLINK_READ_BLK_SIZE (1024 * 1024)
+
/**
* ice_devlink_nvm_snapshot - Capture a snapshot of the NVM flash contents
* @devlink: the devlink instance
struct ice_pf *pf = devlink_priv(devlink);
struct device *dev = ice_pf_to_dev(pf);
struct ice_hw *hw = &pf->hw;
- void *nvm_data;
- u32 nvm_size;
+ u8 *nvm_data, *tmp, i;
+ u32 nvm_size, left;
+ s8 num_blks;
int status;
nvm_size = hw->flash.flash_size;
if (!nvm_data)
return -ENOMEM;
- status = ice_acquire_nvm(hw, ICE_RES_READ);
- if (status) {
- dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n",
- status, hw->adminq.sq_last_status);
- NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore");
- vfree(nvm_data);
- return status;
- }
- status = ice_read_flat_nvm(hw, 0, &nvm_size, nvm_data, false);
- if (status) {
- dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n",
- nvm_size, status, hw->adminq.sq_last_status);
- NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents");
+ num_blks = DIV_ROUND_UP(nvm_size, ICE_DEVLINK_READ_BLK_SIZE);
+ tmp = nvm_data;
+ left = nvm_size;
+
+ /* Some systems take longer to read the NVM than others which causes the
+ * FW to reclaim the NVM lock before the entire NVM has been read. Fix
+ * this by breaking the reads of the NVM into smaller chunks that will
+ * probably not take as long. This has some overhead since we are
+ * increasing the number of AQ commands, but it should always work
+ */
+ for (i = 0; i < num_blks; i++) {
+ u32 read_sz = min_t(u32, ICE_DEVLINK_READ_BLK_SIZE, left);
+
+ status = ice_acquire_nvm(hw, ICE_RES_READ);
+ if (status) {
+ dev_dbg(dev, "ice_acquire_nvm failed, err %d aq_err %d\n",
+ status, hw->adminq.sq_last_status);
+ NL_SET_ERR_MSG_MOD(extack, "Failed to acquire NVM semaphore");
+ vfree(nvm_data);
+ return -EIO;
+ }
+
+ status = ice_read_flat_nvm(hw, i * ICE_DEVLINK_READ_BLK_SIZE,
+ &read_sz, tmp, false);
+ if (status) {
+ dev_dbg(dev, "ice_read_flat_nvm failed after reading %u bytes, err %d aq_err %d\n",
+ read_sz, status, hw->adminq.sq_last_status);
+ NL_SET_ERR_MSG_MOD(extack, "Failed to read NVM contents");
+ ice_release_nvm(hw);
+ vfree(nvm_data);
+ return -EIO;
+ }
ice_release_nvm(hw);
- vfree(nvm_data);
- return status;
- }
- ice_release_nvm(hw);
+ tmp += read_sz;
+ left -= read_sz;
+ }
*data = nvm_data;
rx_desc = ICE_RX_DESC(rx_ring, i);
if (!(rx_desc->wb.status_error0 &
- cpu_to_le16(ICE_TX_DESC_CMD_EOP | ICE_TX_DESC_CMD_RS)))
+ (cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_DD_S)) |
+ cpu_to_le16(BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S)))))
continue;
rx_buf = &rx_ring->rx_buf[i];
return err;
}
+/**
+ * ice_set_phy_type_from_speed - set phy_types based on speeds
+ * and advertised modes
+ * @ks: ethtool link ksettings struct
+ * @phy_type_low: pointer to the lower part of phy_type
+ * @phy_type_high: pointer to the higher part of phy_type
+ * @adv_link_speed: targeted link speeds bitmap
+ */
+static void
+ice_set_phy_type_from_speed(const struct ethtool_link_ksettings *ks,
+ u64 *phy_type_low, u64 *phy_type_high,
+ u16 adv_link_speed)
+{
+ /* Handle 1000M speed in a special way because ice_update_phy_type
+ * enables all link modes, but having mixed copper and optical
+ * standards is not supported.
+ */
+ adv_link_speed &= ~ICE_AQ_LINK_SPEED_1000MB;
+
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 1000baseT_Full))
+ *phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_T |
+ ICE_PHY_TYPE_LOW_1G_SGMII;
+
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 1000baseKX_Full))
+ *phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_KX;
+
+ if (ethtool_link_ksettings_test_link_mode(ks, advertising,
+ 1000baseX_Full))
+ *phy_type_low |= ICE_PHY_TYPE_LOW_1000BASE_SX |
+ ICE_PHY_TYPE_LOW_1000BASE_LX;
+
+ ice_update_phy_type(phy_type_low, phy_type_high, adv_link_speed);
+}
+
/**
* ice_set_link_ksettings - Set Speed and Duplex
* @netdev: network interface device structure
adv_link_speed = curr_link_speed;
/* Convert the advertise link speeds to their corresponded PHY_TYPE */
- ice_update_phy_type(&phy_type_low, &phy_type_high, adv_link_speed);
+ ice_set_phy_type_from_speed(ks, &phy_type_low, &phy_type_high,
+ adv_link_speed);
if (!autoneg_changed && adv_link_speed == curr_link_speed) {
netdev_info(netdev, "Nothing changed, exiting without setting anything.\n");
new_rx = ch->combined_count + ch->rx_count;
new_tx = ch->combined_count + ch->tx_count;
+ if (new_rx < vsi->tc_cfg.numtc) {
+ netdev_err(dev, "Cannot set less Rx channels, than Traffic Classes you have (%u)\n",
+ vsi->tc_cfg.numtc);
+ return -EINVAL;
+ }
+ if (new_tx < vsi->tc_cfg.numtc) {
+ netdev_err(dev, "Cannot set less Tx channels, than Traffic Classes you have (%u)\n",
+ vsi->tc_cfg.numtc);
+ return -EINVAL;
+ }
if (new_rx > ice_get_max_rxq(pf)) {
netdev_err(dev, "Maximum allowed Rx channels is %d\n",
ice_get_max_rxq(pf));
return 0;
}
-static const struct pldmfw_ops ice_fwu_ops = {
+struct ice_pldm_pci_record_id {
+ u32 vendor;
+ u32 device;
+ u32 subsystem_vendor;
+ u32 subsystem_device;
+};
+
+/**
+ * ice_op_pci_match_record - Check if a PCI device matches the record
+ * @context: PLDM fw update structure
+ * @record: list of records extracted from the PLDM image
+ *
+ * Determine if the PCI device associated with this device matches the record
+ * data provided.
+ *
+ * Searches the descriptor TLVs and extracts the relevant descriptor data into
+ * a pldm_pci_record_id. This is then compared against the PCI device ID
+ * information.
+ *
+ * Returns: true if the device matches the record, false otherwise.
+ */
+static bool
+ice_op_pci_match_record(struct pldmfw *context, struct pldmfw_record *record)
+{
+ struct pci_dev *pdev = to_pci_dev(context->dev);
+ struct ice_pldm_pci_record_id id = {
+ .vendor = PCI_ANY_ID,
+ .device = PCI_ANY_ID,
+ .subsystem_vendor = PCI_ANY_ID,
+ .subsystem_device = PCI_ANY_ID,
+ };
+ struct pldmfw_desc_tlv *desc;
+
+ list_for_each_entry(desc, &record->descs, entry) {
+ u16 value;
+ int *ptr;
+
+ switch (desc->type) {
+ case PLDM_DESC_ID_PCI_VENDOR_ID:
+ ptr = &id.vendor;
+ break;
+ case PLDM_DESC_ID_PCI_DEVICE_ID:
+ ptr = &id.device;
+ break;
+ case PLDM_DESC_ID_PCI_SUBVENDOR_ID:
+ ptr = &id.subsystem_vendor;
+ break;
+ case PLDM_DESC_ID_PCI_SUBDEV_ID:
+ ptr = &id.subsystem_device;
+ break;
+ default:
+ /* Skip unrelated TLVs */
+ continue;
+ }
+
+ value = get_unaligned_le16(desc->data);
+ /* A value of zero for one of the descriptors is sometimes
+ * used when the record should ignore this field when matching
+ * device. For example if the record applies to any subsystem
+ * device or vendor.
+ */
+ if (value)
+ *ptr = value;
+ else
+ *ptr = PCI_ANY_ID;
+ }
+
+ /* the E822 device can have a generic device ID so check for that */
+ if ((id.vendor == PCI_ANY_ID || id.vendor == pdev->vendor) &&
+ (id.device == PCI_ANY_ID || id.device == pdev->device ||
+ id.device == ICE_DEV_ID_E822_SI_DFLT) &&
+ (id.subsystem_vendor == PCI_ANY_ID ||
+ id.subsystem_vendor == pdev->subsystem_vendor) &&
+ (id.subsystem_device == PCI_ANY_ID ||
+ id.subsystem_device == pdev->subsystem_device))
+ return true;
+
+ return false;
+}
+
+static const struct pldmfw_ops ice_fwu_ops_e810 = {
.match_record = &pldmfw_op_pci_match_record,
.send_package_data = &ice_send_package_data,
.send_component_table = &ice_send_component_table,
.finalize_update = &ice_finalize_update,
};
+static const struct pldmfw_ops ice_fwu_ops_e822 = {
+ .match_record = &ice_op_pci_match_record,
+ .send_package_data = &ice_send_package_data,
+ .send_component_table = &ice_send_component_table,
+ .flash_component = &ice_flash_component,
+ .finalize_update = &ice_finalize_update,
+};
+
/**
* ice_get_pending_updates - Check if the component has a pending update
* @pf: the PF driver structure
memset(&priv, 0, sizeof(priv));
- priv.context.ops = &ice_fwu_ops;
+ /* the E822 device needs a slightly different ops */
+ if (hw->mac_type == ICE_MAC_GENERIC)
+ priv.context.ops = &ice_fwu_ops_e822;
+ else
+ priv.context.ops = &ice_fwu_ops_e810;
priv.context.dev = dev;
priv.extack = extack;
priv.pf = pf;
* @vsi: the VSI being configured
* @ctxt: VSI context structure
*/
-static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
+static int ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
{
u16 offset = 0, qmap = 0, tx_count = 0, pow = 0;
u16 num_txq_per_tc, num_rxq_per_tc;
else
vsi->num_rxq = num_rxq_per_tc;
+ if (vsi->num_rxq > vsi->alloc_rxq) {
+ dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Rx queues (%u), than were allocated (%u)!\n",
+ vsi->num_rxq, vsi->alloc_rxq);
+ return -EINVAL;
+ }
+
vsi->num_txq = tx_count;
+ if (vsi->num_txq > vsi->alloc_txq) {
+ dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Tx queues (%u), than were allocated (%u)!\n",
+ vsi->num_txq, vsi->alloc_txq);
+ return -EINVAL;
+ }
if (vsi->type == ICE_VSI_VF && vsi->num_txq != vsi->num_rxq) {
dev_dbg(ice_pf_to_dev(vsi->back), "VF VSI should have same number of Tx and Rx queues. Hence making them equal\n");
*/
ctxt->info.q_mapping[0] = cpu_to_le16(vsi->rxq_map[0]);
ctxt->info.q_mapping[1] = cpu_to_le16(vsi->num_rxq);
+
+ return 0;
}
/**
if (vsi->type == ICE_VSI_CHNL) {
ice_chnl_vsi_setup_q_map(vsi, ctxt);
} else {
- ice_vsi_setup_q_map(vsi, ctxt);
+ ret = ice_vsi_setup_q_map(vsi, ctxt);
+ if (ret)
+ goto out;
+
if (!init_vsi) /* means VSI being updated */
/* must to indicate which section of VSI context are
* being modified
*
* Prepares VSI tc_config to have queue configurations based on MQPRIO options.
*/
-static void
+static int
ice_vsi_setup_q_map_mqprio(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt,
u8 ena_tc)
{
/* Set actual Tx/Rx queue pairs */
vsi->num_txq = offset + qcount_tx;
+ if (vsi->num_txq > vsi->alloc_txq) {
+ dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Tx queues (%u), than were allocated (%u)!\n",
+ vsi->num_txq, vsi->alloc_txq);
+ return -EINVAL;
+ }
+
vsi->num_rxq = offset + qcount_rx;
+ if (vsi->num_rxq > vsi->alloc_rxq) {
+ dev_err(ice_pf_to_dev(vsi->back), "Trying to use more Rx queues (%u), than were allocated (%u)!\n",
+ vsi->num_rxq, vsi->alloc_rxq);
+ return -EINVAL;
+ }
/* Setup queue TC[0].qmap for given VSI context */
ctxt->info.tc_mapping[0] = cpu_to_le16(qmap);
dev_dbg(ice_pf_to_dev(vsi->back), "vsi->num_rxq = %d\n", vsi->num_rxq);
dev_dbg(ice_pf_to_dev(vsi->back), "all_numtc %u, all_enatc: 0x%04x, tc_cfg.numtc %u\n",
vsi->all_numtc, vsi->all_enatc, vsi->tc_cfg.numtc);
+
+ return 0;
}
/**
if (vsi->type == ICE_VSI_PF &&
test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))
- ice_vsi_setup_q_map_mqprio(vsi, ctx, ena_tc);
+ ret = ice_vsi_setup_q_map_mqprio(vsi, ctx, ena_tc);
else
- ice_vsi_setup_q_map(vsi, ctx);
+ ret = ice_vsi_setup_q_map(vsi, ctx);
+
+ if (ret)
+ goto out;
/* must to indicate which section of VSI context are being modified */
ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID);
ice_set_safe_mode_caps(hw);
}
+ hw->ucast_shared = true;
+
err = ice_init_pf(pf);
if (err) {
dev_err(dev, "ice_init_pf failed: %d\n", err);
{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_10G_BASE_T), 0 },
{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_1GBE), 0 },
{ PCI_VDEVICE(INTEL, ICE_DEV_ID_E823L_QSFP), 0 },
+ { PCI_VDEVICE(INTEL, ICE_DEV_ID_E822_SI_DFLT), 0 },
/* required last entry */
{ 0, }
};
ice_fix_features(struct net_device *netdev, netdev_features_t features)
{
struct ice_netdev_priv *np = netdev_priv(netdev);
- netdev_features_t supported_vlan_filtering;
- netdev_features_t requested_vlan_filtering;
- struct ice_vsi *vsi = np->vsi;
-
- requested_vlan_filtering = features & NETIF_VLAN_FILTERING_FEATURES;
-
- /* make sure supported_vlan_filtering works for both SVM and DVM */
- supported_vlan_filtering = NETIF_F_HW_VLAN_CTAG_FILTER;
- if (ice_is_dvm_ena(&vsi->back->hw))
- supported_vlan_filtering |= NETIF_F_HW_VLAN_STAG_FILTER;
-
- if (requested_vlan_filtering &&
- requested_vlan_filtering != supported_vlan_filtering) {
- if (requested_vlan_filtering & NETIF_F_HW_VLAN_CTAG_FILTER) {
- netdev_warn(netdev, "cannot support requested VLAN filtering settings, enabling all supported VLAN filtering settings\n");
- features |= supported_vlan_filtering;
+ netdev_features_t req_vlan_fltr, cur_vlan_fltr;
+ bool cur_ctag, cur_stag, req_ctag, req_stag;
+
+ cur_vlan_fltr = netdev->features & NETIF_VLAN_FILTERING_FEATURES;
+ cur_ctag = cur_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER;
+ cur_stag = cur_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER;
+
+ req_vlan_fltr = features & NETIF_VLAN_FILTERING_FEATURES;
+ req_ctag = req_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER;
+ req_stag = req_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER;
+
+ if (req_vlan_fltr != cur_vlan_fltr) {
+ if (ice_is_dvm_ena(&np->vsi->back->hw)) {
+ if (req_ctag && req_stag) {
+ features |= NETIF_VLAN_FILTERING_FEATURES;
+ } else if (!req_ctag && !req_stag) {
+ features &= ~NETIF_VLAN_FILTERING_FEATURES;
+ } else if ((!cur_ctag && req_ctag && !cur_stag) ||
+ (!cur_stag && req_stag && !cur_ctag)) {
+ features |= NETIF_VLAN_FILTERING_FEATURES;
+ netdev_warn(netdev, "802.1Q and 802.1ad VLAN filtering must be either both on or both off. VLAN filtering has been enabled for both types.\n");
+ } else if ((cur_ctag && !req_ctag && cur_stag) ||
+ (cur_stag && !req_stag && cur_ctag)) {
+ features &= ~NETIF_VLAN_FILTERING_FEATURES;
+ netdev_warn(netdev, "802.1Q and 802.1ad VLAN filtering must be either both on or both off. VLAN filtering has been disabled for both types.\n");
+ }
} else {
- netdev_warn(netdev, "cannot support requested VLAN filtering settings, clearing all supported VLAN filtering settings\n");
- features &= ~supported_vlan_filtering;
+ if (req_vlan_fltr & NETIF_F_HW_VLAN_STAG_FILTER)
+ netdev_warn(netdev, "cannot support requested 802.1ad filtering setting in SVM mode\n");
+
+ if (req_vlan_fltr & NETIF_F_HW_VLAN_CTAG_FILTER)
+ features |= NETIF_F_HW_VLAN_CTAG_FILTER;
}
}
if (vsi->netdev) {
ice_set_rx_mode(vsi->netdev);
- err = ice_vsi_vlan_setup(vsi);
+ if (vsi->type != ICE_VSI_LB) {
+ err = ice_vsi_vlan_setup(vsi);
- if (err)
- return err;
+ if (err)
+ return err;
+ }
}
ice_vsi_cfg_dcb_rings(vsi);
ice_ptp_init_tx_e822(struct ice_pf *pf, struct ice_ptp_tx *tx, u8 port)
{
tx->quad = port / ICE_PORTS_PER_QUAD;
- tx->quad_offset = tx->quad * INDEX_PER_PORT;
+ tx->quad_offset = (port % ICE_PORTS_PER_QUAD) * INDEX_PER_PORT;
tx->len = INDEX_PER_PORT;
return ice_ptp_alloc_tx_tracker(tx);
* To allow multiple ports to access the shared register block independently,
* the blocks are split up so that indexes are assigned to each port based on
* hardware logical port number.
+ *
+ * The timestamp blocks are handled differently for E810- and E822-based
+ * devices. In E810 devices, each port has its own block of timestamps, while in
+ * E822 there is a need to logically break the block of registers into smaller
+ * chunks based on the port number to avoid collisions.
+ *
+ * Example for port 5 in E810:
+ * +--------+--------+--------+--------+--------+--------+--------+--------+
+ * |register|register|register|register|register|register|register|register|
+ * | block | block | block | block | block | block | block | block |
+ * | for | for | for | for | for | for | for | for |
+ * | port 0 | port 1 | port 2 | port 3 | port 4 | port 5 | port 6 | port 7 |
+ * +--------+--------+--------+--------+--------+--------+--------+--------+
+ * ^^
+ * ||
+ * |--- quad offset is always 0
+ * ---- quad number
+ *
+ * Example for port 5 in E822:
+ * +-----------------------------+-----------------------------+
+ * | register block for quad 0 | register block for quad 1 |
+ * |+------+------+------+------+|+------+------+------+------+|
+ * ||port 0|port 1|port 2|port 3|||port 0|port 1|port 2|port 3||
+ * |+------+------+------+------+|+------+------+------+------+|
+ * +-----------------------------+-------^---------------------+
+ * ^ |
+ * | --- quad offset*
+ * ---- quad number
+ *
+ * * PHY port 5 is port 1 in quad 1
+ *
*/
/**
return ret;
}
-/**
- * ice_unicast_mac_exists - check if the unicast MAC exists on the PF's switch
- * @pf: PF used to reference the switch's rules
- * @umac: unicast MAC to compare against existing switch rules
- *
- * Return true on the first/any match, else return false
- */
-static bool ice_unicast_mac_exists(struct ice_pf *pf, u8 *umac)
-{
- struct ice_sw_recipe *mac_recipe_list =
- &pf->hw.switch_info->recp_list[ICE_SW_LKUP_MAC];
- struct ice_fltr_mgmt_list_entry *list_itr;
- struct list_head *rule_head;
- struct mutex *rule_lock; /* protect MAC filter list access */
-
- rule_head = &mac_recipe_list->filt_rules;
- rule_lock = &mac_recipe_list->filt_rule_lock;
-
- mutex_lock(rule_lock);
- list_for_each_entry(list_itr, rule_head, list_entry) {
- u8 *existing_mac = &list_itr->fltr_info.l_data.mac.mac_addr[0];
-
- if (ether_addr_equal(existing_mac, umac)) {
- mutex_unlock(rule_lock);
- return true;
- }
- }
-
- mutex_unlock(rule_lock);
-
- return false;
-}
-
/**
* ice_set_vf_mac
* @netdev: network interface device structure
if (ret)
goto out_put_vf;
- if (ice_unicast_mac_exists(pf, mac)) {
- netdev_err(netdev, "Unicast MAC %pM already exists on this PF. Preventing setting VF %u unicast MAC address to %pM\n",
- mac, vf_id, mac);
- ret = -EINVAL;
- goto out_put_vf;
- }
-
mutex_lock(&vf->cfg_lock);
/* VF is notified of its new MAC via the PF's response to the
*/
fltr->rid = rule_added.rid;
fltr->rule_id = rule_added.rule_id;
+ fltr->dest_id = rule_added.vsi_handle;
exit:
kfree(list);
n_proto_key = ntohs(match.key->n_proto);
n_proto_mask = ntohs(match.mask->n_proto);
- if (n_proto_key == ETH_P_ALL || n_proto_key == 0) {
+ if (n_proto_key == ETH_P_ALL || n_proto_key == 0 ||
+ fltr->tunnel_type == TNL_GTPU ||
+ fltr->tunnel_type == TNL_GTPC) {
n_proto_key = 0;
n_proto_mask = 0;
} else {
protocol = vlan_get_protocol(skb);
- if (eth_p_mpls(protocol))
+ if (eth_p_mpls(protocol)) {
ip.hdr = skb_inner_network_header(skb);
- else
+ l4.hdr = skb_checksum_start(skb);
+ } else {
ip.hdr = skb_network_header(skb);
- l4.hdr = skb_checksum_start(skb);
+ l4.hdr = skb_transport_header(skb);
+ }
/* compute outer L2 header size */
l2_len = ip.hdr - skb->data;
}
if (ice_is_vf_disabled(vf)) {
+ vsi = ice_get_vf_vsi(vf);
+ if (WARN_ON(!vsi))
+ return -EINVAL;
+ ice_vsi_stop_lan_tx_rings(vsi, ICE_NO_RESET, vf->vf_id);
+ ice_vsi_stop_all_rx_rings(vsi);
dev_dbg(dev, "VF is already disabled, there is no need for resetting it, telling VM, all is fine %d\n",
vf->vf_id);
return 0;
*/
static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
{
- enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
struct virtchnl_vsi_queue_config_info *qci =
(struct virtchnl_vsi_queue_config_info *)msg;
struct virtchnl_queue_pair_info *qpi;
struct ice_pf *pf = vf->pf;
struct ice_vsi *vsi;
- int i, q_idx;
+ int i = -1, q_idx;
- if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
goto error_param;
- }
- if (!ice_vc_isvalid_vsi_id(vf, qci->vsi_id)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ if (!ice_vc_isvalid_vsi_id(vf, qci->vsi_id))
goto error_param;
- }
vsi = ice_get_vf_vsi(vf);
- if (!vsi) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ if (!vsi)
goto error_param;
- }
if (qci->num_queue_pairs > ICE_MAX_RSS_QS_PER_VF ||
qci->num_queue_pairs > min_t(u16, vsi->alloc_txq, vsi->alloc_rxq)) {
dev_err(ice_pf_to_dev(pf), "VF-%d requesting more than supported number of queues: %d\n",
vf->vf_id, min_t(u16, vsi->alloc_txq, vsi->alloc_rxq));
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
!ice_vc_isvalid_ring_len(qpi->txq.ring_len) ||
!ice_vc_isvalid_ring_len(qpi->rxq.ring_len) ||
!ice_vc_isvalid_q_id(vf, qci->vsi_id, qpi->txq.queue_id)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
* for selected "vsi"
*/
if (q_idx >= vsi->alloc_txq || q_idx >= vsi->alloc_rxq) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
goto error_param;
}
vsi->tx_rings[i]->count = qpi->txq.ring_len;
/* Disable any existing queue first */
- if (ice_vf_vsi_dis_single_txq(vf, vsi, q_idx)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ if (ice_vf_vsi_dis_single_txq(vf, vsi, q_idx))
goto error_param;
- }
/* Configure a queue with the requested settings */
if (ice_vsi_cfg_single_txq(vsi, vsi->tx_rings, q_idx)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure TX queue %d\n",
+ vf->vf_id, i);
goto error_param;
}
}
if (qpi->rxq.databuffer_size != 0 &&
(qpi->rxq.databuffer_size > ((16 * 1024) - 128) ||
- qpi->rxq.databuffer_size < 1024)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ qpi->rxq.databuffer_size < 1024))
goto error_param;
- }
vsi->rx_buf_len = qpi->rxq.databuffer_size;
vsi->rx_rings[i]->rx_buf_len = vsi->rx_buf_len;
if (qpi->rxq.max_pkt_size > max_frame_size ||
- qpi->rxq.max_pkt_size < 64) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ qpi->rxq.max_pkt_size < 64)
goto error_param;
- }
vsi->max_frame = qpi->rxq.max_pkt_size;
/* add space for the port VLAN since the VF driver is
vsi->max_frame += VLAN_HLEN;
if (ice_vsi_cfg_single_rxq(vsi, q_idx)) {
- v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+ dev_warn(ice_pf_to_dev(pf), "VF-%d failed to configure RX queue %d\n",
+ vf->vf_id, i);
goto error_param;
}
}
}
+ /* send the response to the VF */
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES,
+ VIRTCHNL_STATUS_SUCCESS, NULL, 0);
error_param:
+ /* disable whatever we can */
+ for (; i >= 0; i--) {
+ if (ice_vsi_ctrl_one_rx_ring(vsi, false, i, true))
+ dev_err(ice_pf_to_dev(pf), "VF-%d could not disable RX queue %d\n",
+ vf->vf_id, i);
+ if (ice_vf_vsi_dis_single_txq(vf, vsi, i))
+ dev_err(ice_pf_to_dev(pf), "VF-%d could not disable TX queue %d\n",
+ vf->vf_id, i);
+ }
+
/* send the response to the VF */
- return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES, v_ret,
- NULL, 0);
+ return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_VSI_QUEUES,
+ VIRTCHNL_STATUS_ERR_PARAM, NULL, 0);
}
/**
struct virtchnl_vlan_filtering_caps *vfc,
struct virtchnl_vlan_filter_list_v2 *vfl)
{
- u16 num_requested_filters = vsi->num_vlan + vfl->num_elements;
+ u16 num_requested_filters = ice_vsi_num_non_zero_vlans(vsi) +
+ vfl->num_elements;
if (num_requested_filters > vfc->max_filters)
return false;
while (i != tx_ring->next_to_use) {
union e1000_adv_tx_desc *eop_desc, *tx_desc;
- /* Free all the Tx ring sk_buffs */
- dev_kfree_skb_any(tx_buffer->skb);
+ /* Free all the Tx ring sk_buffs or xdp frames */
+ if (tx_buffer->type == IGB_TYPE_SKB)
+ dev_kfree_skb_any(tx_buffer->skb);
+ else
+ xdp_return_frame(tx_buffer->xdpf);
/* unmap skb header data */
dma_unmap_single(tx_ring->dev,
struct e1000_hw *hw = &adapter->hw;
u32 dmac_thr;
u16 hwm;
+ u32 reg;
if (hw->mac.type > e1000_82580) {
if (adapter->flags & IGB_FLAG_DMAC) {
- u32 reg;
-
/* force threshold to 0. */
wr32(E1000_DMCTXTH, 0);
/* Disable BMC-to-OS Watchdog Enable */
if (hw->mac.type != e1000_i354)
reg &= ~E1000_DMACR_DC_BMC2OSW_EN;
-
wr32(E1000_DMACR, reg);
/* no lower threshold to disable
*/
wr32(E1000_DMCTXTH, (IGB_MIN_TXPBSIZE -
(IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6);
+ }
- /* make low power state decision controlled
- * by DMA coal
- */
+ if (hw->mac.type >= e1000_i210 ||
+ (adapter->flags & IGB_FLAG_DMAC)) {
reg = rd32(E1000_PCIEMISC);
- reg &= ~E1000_PCIEMISC_LX_DECISION;
+ reg |= E1000_PCIEMISC_LX_DECISION;
wr32(E1000_PCIEMISC, reg);
} /* endif adapter->dmac is not disabled */
} else if (hw->mac.type == e1000_82580) {
u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
u32 value = 0;
+ if (IGC_REMOVED(hw_addr))
+ return ~value;
+
value = readl(&hw_addr[reg]);
/* reads should not return all F's */
#define wr32(reg, val) \
do { \
u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \
- writel((val), &hw_addr[(reg)]); \
+ if (!IGC_REMOVED(hw_addr)) \
+ writel((val), &hw_addr[(reg)]); \
} while (0)
#define rd32(reg) (igc_rd32(hw, reg))
#define array_rd32(reg, offset) (igc_rd32(hw, (reg) + ((offset) << 2)))
+#define IGC_REMOVED(h) unlikely(!(h))
+
#endif
#ifdef CONFIG_IXGBE_IPSEC
struct ixgbe_ipsec *ipsec;
#endif /* CONFIG_IXGBE_IPSEC */
+ spinlock_t vfs_lock;
};
static inline int ixgbe_determine_xdp_q_idx(int cpu)
/* n-tuple support exists, always init our spinlock */
spin_lock_init(&adapter->fdir_perfect_lock);
+ /* init spinlock to avoid concurrency of VF resources */
+ spin_lock_init(&adapter->vfs_lock);
+
#ifdef CONFIG_IXGBE_DCB
ixgbe_init_dcb(adapter);
#endif
int ixgbe_disable_sriov(struct ixgbe_adapter *adapter)
{
unsigned int num_vfs = adapter->num_vfs, vf;
+ unsigned long flags;
int rss;
+ spin_lock_irqsave(&adapter->vfs_lock, flags);
/* set num VFs to 0 to prevent access to vfinfo */
adapter->num_vfs = 0;
+ spin_unlock_irqrestore(&adapter->vfs_lock, flags);
/* put the reference to all of the vf devices */
for (vf = 0; vf < num_vfs; ++vf) {
switch (xcast_mode) {
case IXGBEVF_XCAST_MODE_NONE:
- disable = IXGBE_VMOLR_BAM | IXGBE_VMOLR_ROMPE |
+ disable = IXGBE_VMOLR_ROMPE |
IXGBE_VMOLR_MPE | IXGBE_VMOLR_UPE | IXGBE_VMOLR_VPE;
- enable = 0;
+ enable = IXGBE_VMOLR_BAM;
break;
case IXGBEVF_XCAST_MODE_MULTI:
disable = IXGBE_VMOLR_MPE | IXGBE_VMOLR_UPE | IXGBE_VMOLR_VPE;
return -EPERM;
}
- disable = 0;
+ disable = IXGBE_VMOLR_VPE;
enable = IXGBE_VMOLR_BAM | IXGBE_VMOLR_ROMPE |
- IXGBE_VMOLR_MPE | IXGBE_VMOLR_UPE | IXGBE_VMOLR_VPE;
+ IXGBE_VMOLR_MPE | IXGBE_VMOLR_UPE;
break;
default:
return -EOPNOTSUPP;
void ixgbe_msg_task(struct ixgbe_adapter *adapter)
{
struct ixgbe_hw *hw = &adapter->hw;
+ unsigned long flags;
u32 vf;
+ spin_lock_irqsave(&adapter->vfs_lock, flags);
for (vf = 0; vf < adapter->num_vfs; vf++) {
/* process any reset requests */
if (!ixgbe_check_for_rst(hw, vf))
if (!ixgbe_check_for_ack(hw, vf))
ixgbe_rcv_ack_from_vf(adapter, vf);
}
+ spin_unlock_irqrestore(&adapter->vfs_lock, flags);
}
static inline void ixgbe_ping_vf(struct ixgbe_adapter *adapter, int vf)
#define CN93_SDP_EPF_RINFO_SRN(val) ((val) & 0xFF)
#define CN93_SDP_EPF_RINFO_RPVF(val) (((val) >> 32) & 0xF)
-#define CN93_SDP_EPF_RINFO_NVFS(val) (((val) >> 48) && 0xFF)
+#define CN93_SDP_EPF_RINFO_NVFS(val) (((val) >> 48) & 0xFF)
/* SDP Function select */
#define CN93_SDP_FUNC_SEL_EPF_BIT_POS 8
static const struct ethtool_ops otx2vf_ethtool_ops = {
.supported_coalesce_params = ETHTOOL_COALESCE_USECS |
- ETHTOOL_COALESCE_MAX_FRAMES,
+ ETHTOOL_COALESCE_MAX_FRAMES |
+ ETHTOOL_COALESCE_USE_ADAPTIVE,
.supported_ring_params = ETHTOOL_RING_USE_RX_BUF_LEN |
ETHTOOL_RING_USE_CQE_SIZE,
.get_link = otx2_get_link,
#define MAX_RATE_EXPONENT 0x0FULL
#define MAX_RATE_MANTISSA 0xFFULL
+#define CN10K_MAX_BURST_MANTISSA 0x7FFFULL
+#define CN10K_MAX_BURST_SIZE 8453888ULL
+
/* Bitfields in NIX_TLX_PIR register */
#define TLX_RATE_MANTISSA GENMASK_ULL(8, 1)
#define TLX_RATE_EXPONENT GENMASK_ULL(12, 9)
#define TLX_BURST_MANTISSA GENMASK_ULL(36, 29)
#define TLX_BURST_EXPONENT GENMASK_ULL(40, 37)
+#define CN10K_TLX_BURST_MANTISSA GENMASK_ULL(43, 29)
+#define CN10K_TLX_BURST_EXPONENT GENMASK_ULL(47, 44)
+
struct otx2_tc_flow_stats {
u64 bytes;
u64 pkts;
}
EXPORT_SYMBOL(otx2_tc_alloc_ent_bitmap);
-static void otx2_get_egress_burst_cfg(u32 burst, u32 *burst_exp,
- u32 *burst_mantissa)
+static void otx2_get_egress_burst_cfg(struct otx2_nic *nic, u32 burst,
+ u32 *burst_exp, u32 *burst_mantissa)
{
+ int max_burst, max_mantissa;
unsigned int tmp;
+ if (is_dev_otx2(nic->pdev)) {
+ max_burst = MAX_BURST_SIZE;
+ max_mantissa = MAX_BURST_MANTISSA;
+ } else {
+ max_burst = CN10K_MAX_BURST_SIZE;
+ max_mantissa = CN10K_MAX_BURST_MANTISSA;
+ }
+
/* Burst is calculated as
* ((256 + BURST_MANTISSA) << (1 + BURST_EXPONENT)) / 256
* Max supported burst size is 130,816 bytes.
*/
- burst = min_t(u32, burst, MAX_BURST_SIZE);
+ burst = min_t(u32, burst, max_burst);
if (burst) {
*burst_exp = ilog2(burst) ? ilog2(burst) - 1 : 0;
tmp = burst - rounddown_pow_of_two(burst);
- if (burst < MAX_BURST_MANTISSA)
+ if (burst < max_mantissa)
*burst_mantissa = tmp * 2;
else
*burst_mantissa = tmp / (1ULL << (*burst_exp - 7));
} else {
*burst_exp = MAX_BURST_EXPONENT;
- *burst_mantissa = MAX_BURST_MANTISSA;
+ *burst_mantissa = max_mantissa;
}
}
-static void otx2_get_egress_rate_cfg(u32 maxrate, u32 *exp,
+static void otx2_get_egress_rate_cfg(u64 maxrate, u32 *exp,
u32 *mantissa, u32 *div_exp)
{
- unsigned int tmp;
+ u64 tmp;
/* Rate calculation by hardware
*
}
}
-static int otx2_set_matchall_egress_rate(struct otx2_nic *nic, u32 burst, u32 maxrate)
+static u64 otx2_get_txschq_rate_regval(struct otx2_nic *nic,
+ u64 maxrate, u32 burst)
{
- struct otx2_hw *hw = &nic->hw;
- struct nix_txschq_config *req;
u32 burst_exp, burst_mantissa;
u32 exp, mantissa, div_exp;
+ u64 regval = 0;
+
+ /* Get exponent and mantissa values from the desired rate */
+ otx2_get_egress_burst_cfg(nic, burst, &burst_exp, &burst_mantissa);
+ otx2_get_egress_rate_cfg(maxrate, &exp, &mantissa, &div_exp);
+
+ if (is_dev_otx2(nic->pdev)) {
+ regval = FIELD_PREP(TLX_BURST_EXPONENT, (u64)burst_exp) |
+ FIELD_PREP(TLX_BURST_MANTISSA, (u64)burst_mantissa) |
+ FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) |
+ FIELD_PREP(TLX_RATE_EXPONENT, exp) |
+ FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0);
+ } else {
+ regval = FIELD_PREP(CN10K_TLX_BURST_EXPONENT, (u64)burst_exp) |
+ FIELD_PREP(CN10K_TLX_BURST_MANTISSA, (u64)burst_mantissa) |
+ FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) |
+ FIELD_PREP(TLX_RATE_EXPONENT, exp) |
+ FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0);
+ }
+
+ return regval;
+}
+
+static int otx2_set_matchall_egress_rate(struct otx2_nic *nic,
+ u32 burst, u64 maxrate)
+{
+ struct otx2_hw *hw = &nic->hw;
+ struct nix_txschq_config *req;
int txschq, err;
/* All SQs share the same TL4, so pick the first scheduler */
txschq = hw->txschq_list[NIX_TXSCH_LVL_TL4][0];
- /* Get exponent and mantissa values from the desired rate */
- otx2_get_egress_burst_cfg(burst, &burst_exp, &burst_mantissa);
- otx2_get_egress_rate_cfg(maxrate, &exp, &mantissa, &div_exp);
-
mutex_lock(&nic->mbox.lock);
req = otx2_mbox_alloc_msg_nix_txschq_cfg(&nic->mbox);
if (!req) {
req->lvl = NIX_TXSCH_LVL_TL4;
req->num_regs = 1;
req->reg[0] = NIX_AF_TL4X_PIR(txschq);
- req->regval[0] = FIELD_PREP(TLX_BURST_EXPONENT, burst_exp) |
- FIELD_PREP(TLX_BURST_MANTISSA, burst_mantissa) |
- FIELD_PREP(TLX_RATE_DIVIDER_EXPONENT, div_exp) |
- FIELD_PREP(TLX_RATE_EXPONENT, exp) |
- FIELD_PREP(TLX_RATE_MANTISSA, mantissa) | BIT_ULL(0);
+ req->regval[0] = otx2_get_txschq_rate_regval(nic, maxrate, burst);
err = otx2_sync_mbox_msg(&nic->mbox);
mutex_unlock(&nic->mbox.lock);
struct netlink_ext_ack *extack = cls->common.extack;
struct flow_action *actions = &cls->rule->action;
struct flow_action_entry *entry;
- u32 rate;
+ u64 rate;
int err;
err = otx2_tc_validate_flow(nic, actions, extack);
}
/* Convert bytes per second to Mbps */
rate = entry->police.rate_bytes_ps * 8;
- rate = max_t(u32, rate / 1000000, 1);
+ rate = max_t(u64, rate / 1000000, 1);
err = otx2_set_matchall_egress_rate(nic, entry->police.burst, rate);
if (err)
return err;
flow_spec->dport = match.key->dst;
flow_mask->dport = match.mask->dst;
- if (ip_proto == IPPROTO_UDP)
- req->features |= BIT_ULL(NPC_DPORT_UDP);
- else if (ip_proto == IPPROTO_TCP)
- req->features |= BIT_ULL(NPC_DPORT_TCP);
- else if (ip_proto == IPPROTO_SCTP)
- req->features |= BIT_ULL(NPC_DPORT_SCTP);
+
+ if (flow_mask->dport) {
+ if (ip_proto == IPPROTO_UDP)
+ req->features |= BIT_ULL(NPC_DPORT_UDP);
+ else if (ip_proto == IPPROTO_TCP)
+ req->features |= BIT_ULL(NPC_DPORT_TCP);
+ else if (ip_proto == IPPROTO_SCTP)
+ req->features |= BIT_ULL(NPC_DPORT_SCTP);
+ }
flow_spec->sport = match.key->src;
flow_mask->sport = match.mask->src;
- if (ip_proto == IPPROTO_UDP)
- req->features |= BIT_ULL(NPC_SPORT_UDP);
- else if (ip_proto == IPPROTO_TCP)
- req->features |= BIT_ULL(NPC_SPORT_TCP);
- else if (ip_proto == IPPROTO_SCTP)
- req->features |= BIT_ULL(NPC_SPORT_SCTP);
+
+ if (flow_mask->sport) {
+ if (ip_proto == IPPROTO_UDP)
+ req->features |= BIT_ULL(NPC_SPORT_UDP);
+ else if (ip_proto == IPPROTO_TCP)
+ req->features |= BIT_ULL(NPC_SPORT_TCP);
+ else if (ip_proto == IPPROTO_SCTP)
+ req->features |= BIT_ULL(NPC_SPORT_SCTP);
+ }
}
return otx2_tc_parse_actions(nic, &rule->action, req, f, node);
}
port = netdev_priv(ingress_dev);
- mask = htons(0x1FFF);
- key = htons(port->hw_id);
+ mask = htons(0x1FFF << 3);
+ key = htons(port->hw_id << 3);
rule_match_set(r_match->key, SYS_PORT, key);
rule_match_set(r_match->mask, SYS_PORT, mask);
- mask = htons(0x1FF);
+ mask = htons(0x3FF);
key = htons(port->dev_id);
rule_match_set(r_match->key, SYS_DEV, key);
rule_match_set(r_match->mask, SYS_DEV, mask);
void prestera_router_fini(struct prestera_switch *sw)
{
+ unregister_fib_notifier(&init_net, &sw->router->fib_nb);
unregister_inetaddr_notifier(&sw->router->inetaddr_nb);
unregister_inetaddr_validator_notifier(&sw->router->inetaddr_valid_nb);
rhashtable_destroy(&sw->router->kern_fib_cache_ht);
return true;
}
+static void *mtk_max_lro_buf_alloc(gfp_t gfp_mask)
+{
+ unsigned int size = mtk_max_frag_size(MTK_MAX_LRO_RX_LENGTH);
+ unsigned long data;
+
+ data = __get_free_pages(gfp_mask | __GFP_COMP | __GFP_NOWARN,
+ get_order(size));
+
+ return (void *)data;
+}
+
/* the qdma core needs scratch memory to be setup */
static int mtk_init_fq_dma(struct mtk_eth *eth)
{
goto release_desc;
/* alloc new buffer */
- new_data = napi_alloc_frag(ring->frag_size);
+ if (ring->frag_size <= PAGE_SIZE)
+ new_data = napi_alloc_frag(ring->frag_size);
+ else
+ new_data = mtk_max_lro_buf_alloc(GFP_ATOMIC);
if (unlikely(!new_data)) {
netdev->stats.rx_dropped++;
goto release_desc;
return -ENOMEM;
for (i = 0; i < rx_dma_size; i++) {
- ring->data[i] = netdev_alloc_frag(ring->frag_size);
+ if (ring->frag_size <= PAGE_SIZE)
+ ring->data[i] = netdev_alloc_frag(ring->frag_size);
+ else
+ ring->data[i] = mtk_max_lro_buf_alloc(GFP_KERNEL);
if (!ring->data[i])
return -ENOMEM;
}
};
struct net_device_path path = {};
+ if (!ctx.dev)
+ return -ENODEV;
+
memcpy(ctx.daddr, addr, sizeof(ctx.daddr));
if (!IS_ENABLED(CONFIG_NET_MEDIATEK_SOC_WED))
* WDMA RX.
*/
- BUG_ON(idx > ARRAY_SIZE(dev->tx_ring));
+ BUG_ON(idx >= ARRAY_SIZE(dev->tx_ring));
if (mtk_wed_ring_alloc(dev, ring, MTK_WED_TX_RING_SIZE))
return -ENOMEM;
en_err(priv,
"mlx4_get_module_info i(%d) offset(%d) bytes_to_read(%d) - FAILED (0x%x)\n",
i, offset, ee->len - i, ret);
- return 0;
+ return ret;
}
i += ret;
return pci_get_drvdata(to_pci_dev(other));
}
-static int next_phys_dev(struct device *dev, const void *data)
-{
- struct mlx5_core_dev *mdev, *this = (struct mlx5_core_dev *)data;
-
- mdev = pci_get_other_drvdata(this->device, dev);
- if (!mdev)
- return 0;
-
- return _next_phys_dev(mdev, data);
-}
-
static int next_phys_dev_lag(struct device *dev, const void *data)
{
struct mlx5_core_dev *mdev, *this = (struct mlx5_core_dev *)data;
return pci_get_drvdata(to_pci_dev(next));
}
-/* Must be called with intf_mutex held */
-struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev)
-{
- lockdep_assert_held(&mlx5_intf_mutex);
- return mlx5_get_next_dev(dev, &next_phys_dev);
-}
-
/* Must be called with intf_mutex held */
struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev)
{
if (!tracer->owner)
return;
+ if (unlikely(!tracer->str_db.loaded))
+ goto arm;
+
block_count = tracer->buff.size / TRACER_BLOCK_SIZE_BYTE;
start_offset = tracer->buff.consumer_index * TRACER_BLOCK_SIZE_BYTE;
&tmp_trace_block[TRACES_PER_BLOCK - 1]);
}
+arm:
mlx5_fw_tracer_arm(dev);
}
queue_work(tracer->work_queue, &tracer->ownership_change_work);
break;
case MLX5_TRACER_SUBTYPE_TRACES_AVAILABLE:
- if (likely(tracer->str_db.loaded))
- queue_work(tracer->work_queue, &tracer->handle_traces_work);
+ queue_work(tracer->work_queue, &tracer->handle_traces_work);
break;
default:
mlx5_core_dbg(dev, "FWTracer: Event with unrecognized subtype: sub_type %d\n",
static u8 rq_end_pad_mode(struct mlx5_core_dev *mdev, struct mlx5e_params *params)
{
bool lro_en = params->packet_merge.type == MLX5E_PACKET_MERGE_LRO;
- bool ro = MLX5_CAP_GEN(mdev, relaxed_ordering_write);
+ bool ro = pcie_relaxed_ordering_enabled(mdev->pdev) &&
+ MLX5_CAP_GEN(mdev, relaxed_ordering_write);
return ro && lro_en ?
MLX5_WQ_END_PAD_MODE_NONE : MLX5_WQ_END_PAD_MODE_ALIGN;
struct mlx5_ct_fs *fs;
struct mlx5_ct_fs_ops *fs_ops;
spinlock_t ht_lock; /* protects ft entries */
+ struct workqueue_struct *wq;
struct mlx5_tc_ct_debugfs debugfs;
};
static void
__mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
{
- struct mlx5e_priv *priv;
-
if (!refcount_dec_and_test(&entry->refcnt))
return;
- priv = netdev_priv(entry->ct_priv->netdev);
INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
- queue_work(priv->wq, &entry->work);
+ queue_work(entry->ct_priv->wq, &entry->work);
}
static struct mlx5_ct_counter *
static void
mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
{
- struct mlx5e_priv *priv;
-
if (!refcount_dec_and_test(&ft->refcount))
return;
+ flush_workqueue(ct_priv->wq);
nf_flow_table_offload_del_cb(ft->nf_ft,
mlx5_tc_ct_block_flow_offload, ft);
rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
rhashtable_free_and_destroy(&ft->ct_entries_ht,
mlx5_tc_ct_flush_ft_entry,
ct_priv);
- priv = netdev_priv(ct_priv->netdev);
- flush_workqueue(priv->wq);
mlx5_tc_ct_free_pre_ct_tables(ft);
mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
kfree(ft);
if (rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params))
goto err_ct_tuples_nat_ht;
+ ct_priv->wq = alloc_ordered_workqueue("mlx5e_ct_priv_wq", 0);
+ if (!ct_priv->wq) {
+ err = -ENOMEM;
+ goto err_wq;
+ }
+
err = mlx5_tc_ct_fs_init(ct_priv);
if (err)
goto err_init_fs;
return ct_priv;
err_init_fs:
+ destroy_workqueue(ct_priv->wq);
+err_wq:
rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
err_ct_tuples_nat_ht:
rhashtable_destroy(&ct_priv->ct_tuples_ht);
if (!ct_priv)
return;
+ destroy_workqueue(ct_priv->wq);
mlx5_ct_tc_remove_dbgfs(ct_priv);
chains = ct_priv->chains;
struct mlx5e_ktls_offload_context_rx **ctx =
__tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_RX);
- BUILD_BUG_ON(sizeof(struct mlx5e_ktls_offload_context_rx *) >
- TLS_OFFLOAD_CONTEXT_SIZE_RX);
+ BUILD_BUG_ON(sizeof(priv_rx) > TLS_DRIVER_STATE_SIZE_RX);
*ctx = priv_rx;
}
struct mlx5e_ktls_offload_context_tx **ctx =
__tls_driver_ctx(tls_ctx, TLS_OFFLOAD_CTX_DIR_TX);
- BUILD_BUG_ON(sizeof(struct mlx5e_ktls_offload_context_tx *) >
- TLS_OFFLOAD_CONTEXT_SIZE_TX);
+ BUILD_BUG_ON(sizeof(priv_tx) > TLS_DRIVER_STATE_SIZE_TX);
*ctx = priv_tx;
}
void mlx5e_mkey_set_relaxed_ordering(struct mlx5_core_dev *mdev, void *mkc)
{
+ bool ro_pci_enable = pcie_relaxed_ordering_enabled(mdev->pdev);
bool ro_write = MLX5_CAP_GEN(mdev, relaxed_ordering_write);
bool ro_read = MLX5_CAP_GEN(mdev, relaxed_ordering_read);
- MLX5_SET(mkc, mkc, relaxed_ordering_read, ro_read);
- MLX5_SET(mkc, mkc, relaxed_ordering_write, ro_write);
+ MLX5_SET(mkc, mkc, relaxed_ordering_read, ro_pci_enable && ro_read);
+ MLX5_SET(mkc, mkc, relaxed_ordering_write, ro_pci_enable && ro_write);
}
static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
return err;
}
+static void mlx5e_cleanup_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
+{
+ mlx5e_rep_tc_netdevice_event_unregister(rpriv);
+ mlx5e_rep_bond_cleanup(rpriv);
+ mlx5e_rep_tc_cleanup(rpriv);
+}
+
static int mlx5e_init_rep_tx(struct mlx5e_priv *priv)
{
struct mlx5e_rep_priv *rpriv = priv->ppriv;
return err;
}
- err = mlx5e_tc_ht_init(&rpriv->tc_ht);
- if (err)
- goto err_ht_init;
-
if (rpriv->rep->vport == MLX5_VPORT_UPLINK) {
err = mlx5e_init_uplink_rep_tx(rpriv);
if (err)
goto err_init_tx;
}
+ err = mlx5e_tc_ht_init(&rpriv->tc_ht);
+ if (err)
+ goto err_ht_init;
+
return 0;
-err_init_tx:
- mlx5e_tc_ht_cleanup(&rpriv->tc_ht);
err_ht_init:
+ if (rpriv->rep->vport == MLX5_VPORT_UPLINK)
+ mlx5e_cleanup_uplink_rep_tx(rpriv);
+err_init_tx:
mlx5e_destroy_tises(priv);
return err;
}
-static void mlx5e_cleanup_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
-{
- mlx5e_rep_tc_netdevice_event_unregister(rpriv);
- mlx5e_rep_bond_cleanup(rpriv);
- mlx5e_rep_tc_cleanup(rpriv);
-}
-
static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv)
{
struct mlx5e_rep_priv *rpriv = priv->ppriv;
- mlx5e_destroy_tises(priv);
+ mlx5e_tc_ht_cleanup(&rpriv->tc_ht);
if (rpriv->rep->vport == MLX5_VPORT_UPLINK)
mlx5e_cleanup_uplink_rep_tx(rpriv);
- mlx5e_tc_ht_cleanup(&rpriv->tc_ht);
+ mlx5e_destroy_tises(priv);
}
static void mlx5e_rep_enable(struct mlx5e_priv *priv)
u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {};
struct mlx5_core_dev *mdev = priv->mdev;
- if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard))
+ if (!mlx5e_stats_grp_vnic_env_num_stats(priv))
return;
MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV);
static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev)
{
- if (mlx5e_eswitch_uplink_rep(out_dev) &&
+ if (same_hw_reps(priv, out_dev) &&
MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) &&
MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up))
return true;
return -EOPNOTSUPP;
}
- if (act->police.notexceed.act_id != FLOW_ACTION_PIPE &&
- act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) {
- NL_SET_ERR_MSG_MOD(extack,
- "Offload not supported when conform action is not pipe or ok");
- return -EOPNOTSUPP;
- }
-
if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT &&
!flow_action_is_last_entry(action, act)) {
NL_SET_ERR_MSG_MOD(extack,
flow_action_for_each(i, act, flow_action) {
switch (act->id) {
case FLOW_ACTION_POLICE:
+ if (act->police.notexceed.act_id != FLOW_ACTION_CONTINUE) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Offload not supported when conform action is not continue");
+ return -EOPNOTSUPP;
+ }
+
err = mlx5e_policer_validate(flow_action, act, extack);
if (err)
return err;
}
}
+static void mlx5e_tx_flush(struct mlx5e_txqsq *sq)
+{
+ struct mlx5e_tx_wqe_info *wi;
+ struct mlx5e_tx_wqe *wqe;
+ u16 pi;
+
+ /* Must not be called when a MPWQE session is active but empty. */
+ mlx5e_tx_mpwqe_ensure_complete(sq);
+
+ pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+ wi = &sq->db.wqe_info[pi];
+
+ *wi = (struct mlx5e_tx_wqe_info) {
+ .num_wqebbs = 1,
+ };
+
+ wqe = mlx5e_post_nop(&sq->wq, sq->sqn, &sq->pc);
+ mlx5e_notify_hw(&sq->wq, sq->pc, sq->uar_map, &wqe->ctrl);
+}
+
static inline void
mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
const struct mlx5e_tx_attr *attr,
err_drop:
stats->dropped++;
dev_kfree_skb_any(skb);
+ mlx5e_tx_flush(sq);
}
static bool mlx5e_tx_skb_supports_mpwqe(struct sk_buff *skb, struct mlx5e_tx_attr *attr)
struct mlx5_wqe_ctrl_seg *cseg;
struct mlx5e_xmit_data txd;
+ txd.data = skb->data;
+ txd.len = skb->len;
+
+ txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE);
+ if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr)))
+ goto err_unmap;
+
if (!mlx5e_tx_mpwqe_session_is_active(sq)) {
mlx5e_tx_mpwqe_session_start(sq, eseg);
} else if (!mlx5e_tx_mpwqe_same_eseg(sq, eseg)) {
sq->stats->xmit_more += xmit_more;
- txd.data = skb->data;
- txd.len = skb->len;
-
- txd.dma_addr = dma_map_single(sq->pdev, txd.data, txd.len, DMA_TO_DEVICE);
- if (unlikely(dma_mapping_error(sq->pdev, txd.dma_addr)))
- goto err_unmap;
mlx5e_dma_push(sq, txd.dma_addr, txd.len, MLX5E_DMA_MAP_SINGLE);
-
mlx5e_skb_fifo_push(&sq->db.skb_fifo, skb);
-
mlx5e_tx_mpwqe_add_dseg(sq, &txd);
-
mlx5e_tx_skb_update_hwts_flags(skb);
if (unlikely(mlx5e_tx_mpwqe_is_full(&sq->mpwqe, sq->max_sq_mpw_wqebbs))) {
mlx5e_dma_unmap_wqe_err(sq, 1);
sq->stats->dropped++;
dev_kfree_skb_any(skb);
+ mlx5e_tx_flush(sq);
}
void mlx5e_tx_mpwqe_ensure_complete(struct mlx5e_txqsq *sq)
err_drop:
stats->dropped++;
dev_kfree_skb_any(skb);
+ mlx5e_tx_flush(sq);
}
#endif
#include "mlx5_core.h"
#include "eswitch.h"
#include "fs_core.h"
+#include "fs_ft_pool.h"
#include "esw/qos.h"
enum {
if (!flow_group_in)
return -ENOMEM;
- table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
- ft_attr.max_fte = table_size;
+ ft_attr.max_fte = POOL_NEXT_SIZE;
ft_attr.prio = LEGACY_FDB_PRIO;
fdb = mlx5_create_flow_table(root_ns, &ft_attr);
if (IS_ERR(fdb)) {
goto out;
}
esw->fdb_table.legacy.fdb = fdb;
+ table_size = fdb->max_fte;
/* Addresses group : Full match unicast/multicast addresses */
MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
switch (event) {
case ESW_OFFLOADS_DEVCOM_PAIR:
- if (mlx5_get_next_phys_dev(esw->dev) != peer_esw->dev)
- break;
-
if (mlx5_eswitch_vport_match_metadata_enabled(esw) !=
mlx5_eswitch_vport_match_metadata_enabled(peer_esw))
break;
if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
return;
+ if (!mlx5_is_lag_supported(esw->dev))
+ return;
+
mlx5_devcom_register_component(devcom,
MLX5_DEVCOM_ESW_OFFLOADS,
mlx5_esw_offloads_devcom_event,
if (!MLX5_CAP_ESW(esw->dev, merged_eswitch))
return;
+ if (!mlx5_is_lag_supported(esw->dev))
+ return;
+
mlx5_devcom_send_event(devcom, MLX5_DEVCOM_ESW_OFFLOADS,
ESW_OFFLOADS_DEVCOM_UNPAIR, esw);
return NULL;
}
-static bool check_conflicting_actions(u32 action1, u32 action2)
+static bool check_conflicting_actions_vlan(const struct mlx5_fs_vlan *vlan0,
+ const struct mlx5_fs_vlan *vlan1)
{
- u32 xored_actions = action1 ^ action2;
+ return vlan0->ethtype != vlan1->ethtype ||
+ vlan0->vid != vlan1->vid ||
+ vlan0->prio != vlan1->prio;
+}
+
+static bool check_conflicting_actions(const struct mlx5_flow_act *act1,
+ const struct mlx5_flow_act *act2)
+{
+ u32 action1 = act1->action;
+ u32 action2 = act2->action;
+ u32 xored_actions;
+
+ xored_actions = action1 ^ action2;
/* if one rule only wants to count, it's ok */
if (action1 == MLX5_FLOW_CONTEXT_ACTION_COUNT ||
MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2))
return true;
+ if (action1 & MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT &&
+ act1->pkt_reformat != act2->pkt_reformat)
+ return true;
+
+ if (action1 & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
+ act1->modify_hdr != act2->modify_hdr)
+ return true;
+
+ if (action1 & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH &&
+ check_conflicting_actions_vlan(&act1->vlan[0], &act2->vlan[0]))
+ return true;
+
+ if (action1 & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH_2 &&
+ check_conflicting_actions_vlan(&act1->vlan[1], &act2->vlan[1]))
+ return true;
+
return false;
}
const struct mlx5_flow_context *flow_context,
const struct mlx5_flow_act *flow_act)
{
- if (check_conflicting_actions(flow_act->action, fte->action.action)) {
+ if (check_conflicting_actions(flow_act, &fte->action)) {
mlx5_core_warn(get_dev(&fte->node),
"Found two FTEs with conflicting actions\n");
return -EEXIST;
ldev = dev->priv.lag;
mutex_lock(&ldev->lock);
if (__mlx5_lag_is_active(ldev))
- mode = mlx5_get_str_port_sel_mode(ldev);
+ mode = mlx5_get_str_port_sel_mode(ldev->mode, ldev->mode_flags);
else
ret = -EINVAL;
mutex_unlock(&ldev->lock);
static int flags_show(struct seq_file *file, void *priv)
{
struct mlx5_core_dev *dev = file->private;
+ bool fdb_sel_mode_native;
struct mlx5_lag *ldev;
bool shared_fdb;
bool lag_active;
ldev = dev->priv.lag;
mutex_lock(&ldev->lock);
lag_active = __mlx5_lag_is_active(ldev);
- if (lag_active)
- shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
+ if (!lag_active)
+ goto unlock;
+
+ shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &ldev->mode_flags);
+ fdb_sel_mode_native = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
+ &ldev->mode_flags);
+unlock:
mutex_unlock(&ldev->lock);
if (!lag_active)
return -EINVAL;
seq_printf(file, "%s:%s\n", "shared_fdb", shared_fdb ? "on" : "off");
+ seq_printf(file, "%s:%s\n", "fdb_selection_mode",
+ fdb_sel_mode_native ? "native" : "affinity");
return 0;
}
static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 *ports, int mode,
unsigned long flags)
{
- bool shared_fdb = test_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, &flags);
+ bool fdb_sel_mode = test_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
+ &flags);
int port_sel_mode = get_port_sel_mode(mode, flags);
u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
void *lag_ctx;
lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
- MLX5_SET(lagc, lag_ctx, fdb_selection_mode, shared_fdb);
+ MLX5_SET(lagc, lag_ctx, fdb_selection_mode, fdb_sel_mode);
if (port_sel_mode == MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY) {
MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, ports[0]);
MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, ports[1]);
bool roce_lag = mode == MLX5_LAG_MODE_ROCE;
*flags = 0;
- if (shared_fdb)
+ if (shared_fdb) {
set_bit(MLX5_LAG_MODE_FLAG_SHARED_FDB, flags);
+ set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
+ }
+
+ if (mode == MLX5_LAG_MODE_MPESW)
+ set_bit(MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE, flags);
if (roce_lag)
return mlx5_lag_set_port_sel_mode_roce(ldev, flags);
return 0;
}
-char *mlx5_get_str_port_sel_mode(struct mlx5_lag *ldev)
+char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags)
{
- int port_sel_mode = get_port_sel_mode(ldev->mode, ldev->mode_flags);
+ int port_sel_mode = get_port_sel_mode(mode, flags);
switch (port_sel_mode) {
case MLX5_LAG_PORT_SELECT_MODE_QUEUE_AFFINITY: return "queue_affinity";
if (tracker)
mlx5_lag_print_mapping(dev0, ldev, tracker, flags);
mlx5_core_info(dev0, "shared_fdb:%d mode:%s\n",
- shared_fdb, mlx5_get_str_port_sel_mode(ldev));
+ shared_fdb, mlx5_get_str_port_sel_mode(mode, flags));
err = mlx5_cmd_create_lag(dev0, ldev->v2p_map, mode, flags);
if (err) {
{
struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
struct mlx5_core_dev *dev1 = ldev->pf[MLX5_LAG_P2].dev;
- struct lag_tracker tracker;
+ struct lag_tracker tracker = { };
bool do_bond, roce_lag;
int err;
int i;
enum {
MLX5_LAG_MODE_FLAG_HASH_BASED,
MLX5_LAG_MODE_FLAG_SHARED_FDB,
+ MLX5_LAG_MODE_FLAG_FDB_SEL_MODE_NATIVE,
};
enum mlx5_lag_mode {
struct lag_mpesw lag_mpesw;
};
+static inline bool mlx5_is_lag_supported(struct mlx5_core_dev *dev)
+{
+ if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
+ !MLX5_CAP_GEN(dev, lag_master) ||
+ MLX5_CAP_GEN(dev, num_lag_ports) < 2 ||
+ MLX5_CAP_GEN(dev, num_lag_ports) > MLX5_MAX_PORTS)
+ return false;
+ return true;
+}
+
static inline struct mlx5_lag *
mlx5_lag_dev(struct mlx5_core_dev *dev)
{
void mlx5_lag_del_mpesw_rule(struct mlx5_core_dev *dev);
int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev);
-char *mlx5_get_str_port_sel_mode(struct mlx5_lag *ldev);
+char *mlx5_get_str_port_sel_mode(enum mlx5_lag_mode mode, unsigned long flags);
void mlx5_infer_tx_enabled(struct lag_tracker *tracker, u8 num_ports,
u8 *ports, int *num_enabled);
int mlx5_lag_add_mpesw_rule(struct mlx5_core_dev *dev)
{
struct mlx5_lag *ldev = dev->priv.lag;
- bool shared_fdb;
int err = 0;
if (!ldev)
err = -EINVAL;
goto out;
}
- shared_fdb = mlx5_shared_fdb_supported(ldev);
- err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, shared_fdb);
+
+ err = mlx5_activate_lag(ldev, NULL, MLX5_LAG_MODE_MPESW, false);
if (err)
mlx5_core_warn(dev, "Failed to create LAG in MPESW mode (%d)\n", err);
void mlx5_detach_device(struct mlx5_core_dev *dev);
int mlx5_register_device(struct mlx5_core_dev *dev);
void mlx5_unregister_device(struct mlx5_core_dev *dev);
-struct mlx5_core_dev *mlx5_get_next_phys_dev(struct mlx5_core_dev *dev);
struct mlx5_core_dev *mlx5_get_next_phys_dev_lag(struct mlx5_core_dev *dev);
void mlx5_dev_list_lock(void);
void mlx5_dev_list_unlock(void);
#include "spectrum.h"
enum mlxsw_sp_counter_sub_pool_id {
- MLXSW_SP_COUNTER_SUB_POOL_FLOW,
MLXSW_SP_COUNTER_SUB_POOL_RIF,
+ MLXSW_SP_COUNTER_SUB_POOL_FLOW,
};
int mlxsw_sp_counter_alloc(struct mlxsw_sp *mlxsw_sp,
return 0;
err_nexthop_neigh_init:
+ list_del(&nh->router_list_node);
+ mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
mlxsw_sp_nexthop_remove(mlxsw_sp, nh);
return err;
}
{
const struct fib_nh *nh = fib_info_nh(fi, 0);
- return nh->fib_nh_scope == RT_SCOPE_LINK ||
+ return nh->fib_nh_gw_family ||
mlxsw_sp_nexthop4_ipip_type(mlxsw_sp, nh, NULL);
}
const struct fib6_info *rt)
{
struct net_device *dev = rt->fib6_nh->fib_nh_dev;
+ int err;
nh->nhgi = nh_grp->nhgi;
nh->nh_weight = rt->fib6_nh->fib_nh_weight;
return 0;
nh->ifindex = dev->ifindex;
- return mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
+ err = mlxsw_sp_nexthop_type_init(mlxsw_sp, nh, dev);
+ if (err)
+ goto err_nexthop_type_init;
+
+ return 0;
+
+err_nexthop_type_init:
+ list_del(&nh->router_list_node);
+ mlxsw_sp_nexthop_counter_free(mlxsw_sp, nh);
+ return err;
}
static void mlxsw_sp_nexthop6_fini(struct mlxsw_sp *mlxsw_sp,
unsigned long *fields = config->fields;
u32 hash_fields;
- switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
+ switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) {
case 0:
mlxsw_sp_mp4_hash_outer_addr(config);
break;
mlxsw_sp_mp_hash_inner_l3(config);
break;
case 3:
- hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+ hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
/* Outer */
MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_NOT_TCP_NOT_UDP);
MLXSW_SP_MP_HASH_HEADER_SET(headers, IPV4_EN_TCP_UDP);
static int __mlxsw_sp_router_init(struct mlxsw_sp *mlxsw_sp)
{
struct net *net = mlxsw_sp_net(mlxsw_sp);
- bool usp = net->ipv4.sysctl_ip_fwd_update_priority;
char rgcr_pl[MLXSW_REG_RGCR_LEN];
u64 max_rifs;
+ bool usp;
if (!MLXSW_CORE_RES_VALID(mlxsw_sp->core, MAX_RIFS))
return -EIO;
max_rifs = MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS);
+ usp = READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority);
mlxsw_reg_rgcr_pack(rgcr_pl, true, true);
mlxsw_reg_rgcr_max_router_interfaces_set(rgcr_pl, max_rifs);
unsigned int vid,
enum macaccess_entry_type type)
{
+ int ret;
+
+ spin_lock(&lan966x->mac_lock);
lan966x_mac_select(lan966x, mac, vid);
/* Issue a write command */
ANA_MACACCESS_MAC_TABLE_CMD_SET(MACACCESS_CMD_LEARN),
lan966x, ANA_MACACCESS);
- return lan966x_mac_wait_for_completion(lan966x);
+ ret = lan966x_mac_wait_for_completion(lan966x);
+ spin_unlock(&lan966x->mac_lock);
+
+ return ret;
}
/* The mask of the front ports is encoded inside the mac parameter via a call
return __lan966x_mac_learn(lan966x, port, false, mac, vid, type);
}
-int lan966x_mac_forget(struct lan966x *lan966x,
- const unsigned char mac[ETH_ALEN],
- unsigned int vid,
- enum macaccess_entry_type type)
+static int lan966x_mac_forget_locked(struct lan966x *lan966x,
+ const unsigned char mac[ETH_ALEN],
+ unsigned int vid,
+ enum macaccess_entry_type type)
{
+ lockdep_assert_held(&lan966x->mac_lock);
+
lan966x_mac_select(lan966x, mac, vid);
/* Issue a forget command */
return lan966x_mac_wait_for_completion(lan966x);
}
+int lan966x_mac_forget(struct lan966x *lan966x,
+ const unsigned char mac[ETH_ALEN],
+ unsigned int vid,
+ enum macaccess_entry_type type)
+{
+ int ret;
+
+ spin_lock(&lan966x->mac_lock);
+ ret = lan966x_mac_forget_locked(lan966x, mac, vid, type);
+ spin_unlock(&lan966x->mac_lock);
+
+ return ret;
+}
+
int lan966x_mac_cpu_learn(struct lan966x *lan966x, const char *addr, u16 vid)
{
return lan966x_mac_learn(lan966x, PGID_CPU, addr, vid, ENTRYTYPE_LOCKED);
{
struct lan966x_mac_entry *mac_entry;
- mac_entry = kzalloc(sizeof(*mac_entry), GFP_KERNEL);
+ mac_entry = kzalloc(sizeof(*mac_entry), GFP_ATOMIC);
if (!mac_entry)
return NULL;
struct lan966x_mac_entry *res = NULL;
struct lan966x_mac_entry *mac_entry;
- spin_lock(&lan966x->mac_lock);
list_for_each_entry(mac_entry, &lan966x->mac_entries, list) {
if (mac_entry->vid == vid &&
ether_addr_equal(mac, mac_entry->mac) &&
break;
}
}
- spin_unlock(&lan966x->mac_lock);
return res;
}
{
struct lan966x_mac_entry *mac_entry;
- if (lan966x_mac_lookup(lan966x, addr, vid, ENTRYTYPE_NORMAL))
+ spin_lock(&lan966x->mac_lock);
+ if (lan966x_mac_lookup(lan966x, addr, vid, ENTRYTYPE_NORMAL)) {
+ spin_unlock(&lan966x->mac_lock);
return 0;
+ }
/* In case the entry already exists, don't add it again to SW,
* just update HW, but we need to look in the actual HW because
* add the entry but without the extern_learn flag.
*/
mac_entry = lan966x_mac_find_entry(lan966x, addr, vid, port->chip_port);
- if (mac_entry)
- return lan966x_mac_learn(lan966x, port->chip_port,
- addr, vid, ENTRYTYPE_LOCKED);
+ if (mac_entry) {
+ spin_unlock(&lan966x->mac_lock);
+ goto mac_learn;
+ }
mac_entry = lan966x_mac_alloc_entry(addr, vid, port->chip_port);
- if (!mac_entry)
+ if (!mac_entry) {
+ spin_unlock(&lan966x->mac_lock);
return -ENOMEM;
+ }
- spin_lock(&lan966x->mac_lock);
list_add_tail(&mac_entry->list, &lan966x->mac_entries);
spin_unlock(&lan966x->mac_lock);
- lan966x_mac_learn(lan966x, port->chip_port, addr, vid, ENTRYTYPE_LOCKED);
lan966x_fdb_call_notifiers(SWITCHDEV_FDB_OFFLOADED, addr, vid, port->dev);
+mac_learn:
+ lan966x_mac_learn(lan966x, port->chip_port, addr, vid, ENTRYTYPE_LOCKED);
+
return 0;
}
list) {
if (mac_entry->vid == vid &&
ether_addr_equal(addr, mac_entry->mac)) {
- lan966x_mac_forget(lan966x, mac_entry->mac, mac_entry->vid,
- ENTRYTYPE_LOCKED);
+ lan966x_mac_forget_locked(lan966x, mac_entry->mac,
+ mac_entry->vid,
+ ENTRYTYPE_LOCKED);
list_del(&mac_entry->list);
kfree(mac_entry);
spin_lock(&lan966x->mac_lock);
list_for_each_entry_safe(mac_entry, tmp, &lan966x->mac_entries,
list) {
- lan966x_mac_forget(lan966x, mac_entry->mac, mac_entry->vid,
- ENTRYTYPE_LOCKED);
+ lan966x_mac_forget_locked(lan966x, mac_entry->mac,
+ mac_entry->vid, ENTRYTYPE_LOCKED);
list_del(&mac_entry->list);
kfree(mac_entry);
{
struct lan966x_mac_entry *mac_entry, *tmp;
unsigned char mac[ETH_ALEN] __aligned(2);
+ struct list_head mac_deleted_entries;
u32 dest_idx;
u32 column;
u16 vid;
+ INIT_LIST_HEAD(&mac_deleted_entries);
+
spin_lock(&lan966x->mac_lock);
list_for_each_entry_safe(mac_entry, tmp, &lan966x->mac_entries, list) {
bool found = false;
}
if (!found) {
- /* Notify the bridge that the entry doesn't exist
- * anymore in the HW and remove the entry from the SW
- * list
- */
- lan966x_mac_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE,
- mac_entry->mac, mac_entry->vid,
- lan966x->ports[mac_entry->port_index]->dev);
-
list_del(&mac_entry->list);
- kfree(mac_entry);
+ /* Move the entry from SW list to a tmp list such that
+ * it would be deleted later
+ */
+ list_add_tail(&mac_entry->list, &mac_deleted_entries);
}
}
spin_unlock(&lan966x->mac_lock);
+ list_for_each_entry_safe(mac_entry, tmp, &mac_deleted_entries, list) {
+ /* Notify the bridge that the entry doesn't exist
+ * anymore in the HW
+ */
+ lan966x_mac_notifiers(SWITCHDEV_FDB_DEL_TO_BRIDGE,
+ mac_entry->mac, mac_entry->vid,
+ lan966x->ports[mac_entry->port_index]->dev);
+ list_del(&mac_entry->list);
+ kfree(mac_entry);
+ }
+
/* Now go to the list of columns and see if any entry was not in the SW
* list, then that means that the entry is new so it needs to notify the
* bridge.
if (WARN_ON(dest_idx >= lan966x->num_phys_ports))
continue;
+ spin_lock(&lan966x->mac_lock);
+ mac_entry = lan966x_mac_find_entry(lan966x, mac, vid, dest_idx);
+ if (mac_entry) {
+ spin_unlock(&lan966x->mac_lock);
+ continue;
+ }
+
mac_entry = lan966x_mac_alloc_entry(mac, vid, dest_idx);
- if (!mac_entry)
+ if (!mac_entry) {
+ spin_unlock(&lan966x->mac_lock);
return;
+ }
mac_entry->row = row;
-
- spin_lock(&lan966x->mac_lock);
list_add_tail(&mac_entry->list, &lan966x->mac_entries);
spin_unlock(&lan966x->mac_lock);
lan966x, ANA_MACTINDX);
while (1) {
+ spin_lock(&lan966x->mac_lock);
lan_rmw(ANA_MACACCESS_MAC_TABLE_CMD_SET(MACACCESS_CMD_SYNC_GET_NEXT),
ANA_MACACCESS_MAC_TABLE_CMD,
lan966x, ANA_MACACCESS);
stop = false;
if (column == LAN966X_MAC_COLUMNS - 1 &&
- index == 0 && stop)
+ index == 0 && stop) {
+ spin_unlock(&lan966x->mac_lock);
break;
+ }
entry[column].mach = lan_rd(lan966x, ANA_MACHDATA);
entry[column].macl = lan_rd(lan966x, ANA_MACLDATA);
entry[column].maca = lan_rd(lan966x, ANA_MACACCESS);
+ spin_unlock(&lan966x->mac_lock);
/* Once all the columns are read process them */
if (column == LAN966X_MAC_COLUMNS - 1) {
struct fwnode_handle *ports, *portnp;
struct lan966x *lan966x;
u8 mac_addr[ETH_ALEN];
- int err, i;
+ int err;
lan966x = devm_kzalloc(&pdev->dev, sizeof(*lan966x), GFP_KERNEL);
if (!lan966x)
if (err)
return dev_err_probe(&pdev->dev, err, "Reset failed");
- i = 0;
- fwnode_for_each_available_child_node(ports, portnp)
- ++i;
-
- lan966x->num_phys_ports = i;
+ lan966x->num_phys_ports = NUM_PHYS_PORTS;
lan966x->ports = devm_kcalloc(&pdev->dev, lan966x->num_phys_ports,
sizeof(struct lan966x_port *),
GFP_KERNEL);
/* Reserved amount for (SRC, PRIO) at index 8*SRC + PRIO */
#define QSYS_Q_RSRV 95
+#define NUM_PHYS_PORTS 8
#define CPU_PORT 8
/* Reserved PGIDs */
u32 mact_entry;
int res, err;
+ if (!sparx5_netdevice_check(dev))
+ return -EOPNOTSUPP;
+
if (netif_is_bridge_master(v->obj.orig_dev)) {
sparx5_mact_learn(spx5, PGID_CPU, v->addr, v->vid);
return 0;
u32 mact_entry, res, pgid_entry[3];
int err;
+ if (!sparx5_netdevice_check(dev))
+ return -EOPNOTSUPP;
+
if (netif_is_bridge_master(v->obj.orig_dev)) {
sparx5_mact_forget(spx5, v->addr, v->vid);
return 0;
ocelot_fdma_writel(ocelot, MSCC_FDMA_CH_ACTIVATE, BIT(chan));
}
+static u32 ocelot_fdma_read_ch_safe(struct ocelot *ocelot)
+{
+ return ocelot_fdma_readl(ocelot, MSCC_FDMA_CH_SAFE);
+}
+
static int ocelot_fdma_wait_chan_safe(struct ocelot *ocelot, int chan)
{
- unsigned long timeout;
u32 safe;
- timeout = jiffies + usecs_to_jiffies(OCELOT_FDMA_CH_SAFE_TIMEOUT_US);
- do {
- safe = ocelot_fdma_readl(ocelot, MSCC_FDMA_CH_SAFE);
- if (safe & BIT(chan))
- return 0;
- } while (time_after(jiffies, timeout));
-
- return -ETIMEDOUT;
+ return readx_poll_timeout_atomic(ocelot_fdma_read_ch_safe, ocelot, safe,
+ safe & BIT(chan), 0,
+ OCELOT_FDMA_CH_SAFE_TIMEOUT_US);
}
static void ocelot_fdma_dcb_set_data(struct ocelot_fdma_dcb *dcb,
}
/* If the chain is ended by an load/store pair then this
- * could serve as the new head of the the next chain.
+ * could serve as the new head of the next chain.
*/
if (curr_pair_is_memcpy(meta1, meta2)) {
head_ld_meta = meta1;
set_tun->ttl = ip4_dst_hoplimit(&rt->dst);
ip_rt_put(rt);
} else {
- set_tun->ttl = net->ipv4.sysctl_ip_default_ttl;
+ set_tun->ttl = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
}
}
key_size += sizeof(struct nfp_flower_ipv6);
}
+ if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_QINQ) {
+ map[FLOW_PAY_QINQ] = key_size;
+ key_size += sizeof(struct nfp_flower_vlan);
+ }
+
if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GRE) {
map[FLOW_PAY_GRE] = key_size;
if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6)
key_size += sizeof(struct nfp_flower_ipv4_gre_tun);
}
- if (in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_QINQ) {
- map[FLOW_PAY_QINQ] = key_size;
- key_size += sizeof(struct nfp_flower_vlan);
- }
-
if ((in_key_ls.key_layer & NFP_FLOWER_LAYER_VXLAN) ||
(in_key_ls.key_layer_two & NFP_FLOWER_LAYER2_GENEVE)) {
map[FLOW_PAY_UDP_TUN] = key_size;
}
}
+ if (NFP_FLOWER_LAYER2_QINQ & key_layer.key_layer_two) {
+ offset = key_map[FLOW_PAY_QINQ];
+ key = kdata + offset;
+ msk = mdata + offset;
+ for (i = 0; i < _CT_TYPE_MAX; i++) {
+ nfp_flower_compile_vlan((struct nfp_flower_vlan *)key,
+ (struct nfp_flower_vlan *)msk,
+ rules[i]);
+ }
+ }
+
if (key_layer.key_layer_two & NFP_FLOWER_LAYER2_GRE) {
offset = key_map[FLOW_PAY_GRE];
key = kdata + offset;
}
}
- if (NFP_FLOWER_LAYER2_QINQ & key_layer.key_layer_two) {
- offset = key_map[FLOW_PAY_QINQ];
- key = kdata + offset;
- msk = mdata + offset;
- for (i = 0; i < _CT_TYPE_MAX; i++) {
- nfp_flower_compile_vlan((struct nfp_flower_vlan *)key,
- (struct nfp_flower_vlan *)msk,
- rules[i]);
- }
- }
-
if (key_layer.key_layer & NFP_FLOWER_LAYER_VXLAN ||
key_layer.key_layer_two & NFP_FLOWER_LAYER2_GENEVE) {
offset = key_map[FLOW_PAY_UDP_TUN];
msk += sizeof(struct nfp_flower_ipv6);
}
+ if (NFP_FLOWER_LAYER2_QINQ & key_ls->key_layer_two) {
+ nfp_flower_compile_vlan((struct nfp_flower_vlan *)ext,
+ (struct nfp_flower_vlan *)msk,
+ rule);
+ ext += sizeof(struct nfp_flower_vlan);
+ msk += sizeof(struct nfp_flower_vlan);
+ }
+
if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_GRE) {
if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) {
struct nfp_flower_ipv6_gre_tun *gre_match;
}
}
- if (NFP_FLOWER_LAYER2_QINQ & key_ls->key_layer_two) {
- nfp_flower_compile_vlan((struct nfp_flower_vlan *)ext,
- (struct nfp_flower_vlan *)msk,
- rule);
- ext += sizeof(struct nfp_flower_vlan);
- msk += sizeof(struct nfp_flower_vlan);
- }
-
if (key_ls->key_layer & NFP_FLOWER_LAYER_VXLAN ||
key_ls->key_layer_two & NFP_FLOWER_LAYER2_GENEVE) {
if (key_ls->key_layer_two & NFP_FLOWER_LAYER2_TUN_IPV6) {
static void
nfp_tun_write_neigh(struct net_device *netdev, struct nfp_app *app,
- void *flow, struct neighbour *neigh, bool is_ipv6)
+ void *flow, struct neighbour *neigh, bool is_ipv6,
+ bool override)
{
bool neigh_invalid = !(neigh->nud_state & NUD_VALID) || neigh->dead;
size_t neigh_size = is_ipv6 ? sizeof(struct nfp_tun_neigh_v6) :
if (nn_entry->flow)
list_del(&nn_entry->list_head);
kfree(nn_entry);
+ } else if (nn_entry && !neigh_invalid && override) {
+ mtype = is_ipv6 ? NFP_FLOWER_CMSG_TYPE_TUN_NEIGH_V6 :
+ NFP_FLOWER_CMSG_TYPE_TUN_NEIGH;
+ nfp_tun_link_predt_entries(app, nn_entry);
+ nfp_flower_xmit_tun_conf(app, mtype, neigh_size,
+ nn_entry->payload,
+ GFP_ATOMIC);
}
spin_unlock_bh(&priv->predt_lock);
dst_release(dst);
}
- nfp_tun_write_neigh(n->dev, app, &flow6, n, true);
+ nfp_tun_write_neigh(n->dev, app, &flow6, n, true, false);
#else
return NOTIFY_DONE;
#endif /* CONFIG_IPV6 */
ip_rt_put(rt);
}
- nfp_tun_write_neigh(n->dev, app, &flow4, n, false);
+ nfp_tun_write_neigh(n->dev, app, &flow4, n, false, false);
}
#else
return NOTIFY_DONE;
ip_rt_put(rt);
if (!n)
goto fail_rcu_unlock;
- nfp_tun_write_neigh(n->dev, app, &flow, n, false);
+ nfp_tun_write_neigh(n->dev, app, &flow, n, false, true);
neigh_release(n);
rcu_read_unlock();
return;
if (!n)
goto fail_rcu_unlock;
- nfp_tun_write_neigh(n->dev, app, &flow, n, true);
+ nfp_tun_write_neigh(n->dev, app, &flow, n, true, true);
neigh_release(n);
rcu_read_unlock();
return;
static int
nfp_nfdk_tx_maybe_close_block(struct nfp_net_tx_ring *tx_ring,
- unsigned int nr_frags, struct sk_buff *skb)
+ struct sk_buff *skb)
{
unsigned int n_descs, wr_p, nop_slots;
const skb_frag_t *frag, *fend;
struct nfp_nfdk_tx_desc *txd;
+ unsigned int nr_frags;
unsigned int wr_idx;
int err;
recount_descs:
n_descs = nfp_nfdk_headlen_to_segs(skb_headlen(skb));
-
+ nr_frags = skb_shinfo(skb)->nr_frags;
frag = skb_shinfo(skb)->frags;
fend = frag + nr_frags;
for (; frag < fend; frag++)
if (unlikely((int)metadata < 0))
goto err_flush;
- nr_frags = skb_shinfo(skb)->nr_frags;
- if (nfp_nfdk_tx_maybe_close_block(tx_ring, nr_frags, skb))
+ if (nfp_nfdk_tx_maybe_close_block(tx_ring, skb))
goto err_flush;
+ /* nr_frags will change after skb_linearize so we get nr_frags after
+ * nfp_nfdk_tx_maybe_close_block function
+ */
+ nr_frags = skb_shinfo(skb)->nr_frags;
/* DMA map all */
wr_idx = D_IDX(tx_ring, tx_ring->wr_p);
txd = &tx_ring->ktxds[wr_idx];
/* FIELD_PREP() implicitly truncates to chunk */
dma_len -= 1;
- dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) |
+
+ /* We will do our best to pass as much data as we can in descriptor
+ * and we need to make sure the first descriptor includes whole head
+ * since there is limitation in firmware side. Sometimes the value of
+ * dma_len bitwise and NFDK_DESC_TX_DMA_LEN_HEAD will less than
+ * headlen.
+ */
+ dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD,
+ dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ?
+ NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) |
FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type);
txd->dma_len_type = cpu_to_le16(dlen_type);
/* FIELD_PREP() implicitly truncates to chunk */
dma_len -= 1;
- dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) |
+ dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD,
+ dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ?
+ NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) |
FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type);
txd->dma_len_type = cpu_to_le16(dlen_type);
skb_push(skb, 4));
}
- if (nfp_nfdk_tx_maybe_close_block(tx_ring, 0, skb))
+ if (nfp_nfdk_tx_maybe_close_block(tx_ring, skb))
goto err_free;
/* DMA map all */
txbuf++;
dma_len -= 1;
- dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD, dma_len) |
+ dlen_type = FIELD_PREP(NFDK_DESC_TX_DMA_LEN_HEAD,
+ dma_len > NFDK_DESC_TX_DMA_LEN_HEAD ?
+ NFDK_DESC_TX_DMA_LEN_HEAD : dma_len) |
FIELD_PREP(NFDK_DESC_TX_TYPE_HEAD, type);
txd->dma_len_type = cpu_to_le16(dlen_type);
#include "nfp_net_sriov.h"
static int
-nfp_net_sriov_check(struct nfp_app *app, int vf, u16 cap, const char *msg)
+nfp_net_sriov_check(struct nfp_app *app, int vf, u16 cap, const char *msg, bool warn)
{
u16 cap_vf;
cap_vf = readw(app->pf->vfcfg_tbl2 + NFP_NET_VF_CFG_MB_CAP);
if ((cap_vf & cap) != cap) {
- nfp_warn(app->pf->cpp, "ndo_set_vf_%s not supported\n", msg);
+ if (warn)
+ nfp_warn(app->pf->cpp, "ndo_set_vf_%s not supported\n", msg);
return -EOPNOTSUPP;
}
if (vf < 0 || vf >= app->pf->num_vfs) {
- nfp_warn(app->pf->cpp, "invalid VF id %d\n", vf);
+ if (warn)
+ nfp_warn(app->pf->cpp, "invalid VF id %d\n", vf);
return -EINVAL;
}
unsigned int vf_offset;
int err;
- err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_MAC, "mac");
+ err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_MAC, "mac", true);
if (err)
return err;
u32 vlan_tag;
int err;
- err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_VLAN, "vlan");
+ err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_VLAN, "vlan", true);
if (err)
return err;
}
/* Check if fw supports or not */
- err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_VLAN_PROTO, "vlan_proto");
+ err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_VLAN_PROTO, "vlan_proto", true);
if (err)
is_proto_sup = false;
u32 vf_offset, ratevalue;
int err;
- err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_RATE, "rate");
+ err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_RATE, "rate", true);
if (err)
return err;
int err;
err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_SPOOF,
- "spoofchk");
+ "spoofchk", true);
if (err)
return err;
int err;
err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_TRUST,
- "trust");
+ "trust", true);
if (err)
return err;
int err;
err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_LINK_STATE,
- "link_state");
+ "link_state", true);
if (err)
return err;
u8 flags;
int err;
- err = nfp_net_sriov_check(app, vf, 0, "");
+ err = nfp_net_sriov_check(app, vf, 0, "", true);
if (err)
return err;
ivi->vlan = FIELD_GET(NFP_NET_VF_CFG_VLAN_VID, vlan_tag);
ivi->qos = FIELD_GET(NFP_NET_VF_CFG_VLAN_QOS, vlan_tag);
- if (!nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_VLAN_PROTO, "vlan_proto"))
+ if (!nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_VLAN_PROTO, "vlan_proto", false))
ivi->vlan_proto = htons(FIELD_GET(NFP_NET_VF_CFG_VLAN_PROT, vlan_tag));
ivi->spoofchk = FIELD_GET(NFP_NET_VF_CFG_CTRL_SPOOF, flags);
ivi->trusted = FIELD_GET(NFP_NET_VF_CFG_CTRL_TRUST, flags);
ivi->linkstate = FIELD_GET(NFP_NET_VF_CFG_CTRL_LINK_STATE, flags);
- err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_RATE, "rate");
+ err = nfp_net_sriov_check(app, vf, NFP_NET_VF_CFG_MB_CAP_RATE, "rate", false);
if (!err) {
rate = readl(app->pf->vfcfg_tbl2 + vf_offset +
NFP_NET_VF_CFG_RATE);
static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp,
struct sk_buff *skb, u32 *opts)
{
- u32 transport_offset = (u32)skb_transport_offset(skb);
struct skb_shared_info *shinfo = skb_shinfo(skb);
u32 mss = shinfo->gso_size;
WARN_ON_ONCE(1);
}
- opts[0] |= transport_offset << GTTCPHO_SHIFT;
+ opts[0] |= skb_transport_offset(skb) << GTTCPHO_SHIFT;
opts[1] |= mss << TD1_MSS_SHIFT;
} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
u8 ip_protocol;
else
WARN_ON_ONCE(1);
- opts[1] |= transport_offset << TCPHO_SHIFT;
+ opts[1] |= skb_transport_offset(skb) << TCPHO_SHIFT;
} else {
unsigned int padto = rtl_quirk_packet_padto(tp, skb);
struct net_device *dev,
netdev_features_t features)
{
- int transport_offset = skb_transport_offset(skb);
struct rtl8169_private *tp = netdev_priv(dev);
if (skb_is_gso(skb)) {
if (tp->mac_version == RTL_GIGA_MAC_VER_34)
features = rtl8168evl_fix_tso(skb, features);
- if (transport_offset > GTTCPHO_MAX &&
+ if (skb_transport_offset(skb) > GTTCPHO_MAX &&
rtl_chip_supports_csum_v2(tp))
features &= ~NETIF_F_ALL_TSO;
} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
if (rtl_quirk_packet_padto(tp, skb))
features &= ~NETIF_F_CSUM_MASK;
- if (transport_offset > TCPHO_MAX &&
+ if (skb_transport_offset(skb) > TCPHO_MAX &&
rtl_chip_supports_csum_v2(tp))
features &= ~NETIF_F_CSUM_MASK;
}
efx_update_sw_stats(efx, stats);
out:
+ /* releasing a DMA coherent buffer with BH disabled can panic */
+ spin_unlock_bh(&efx->stats_lock);
efx_nic_free_buffer(efx, &stats_buf);
+ spin_lock_bh(&efx->stats_lock);
return rc;
}
static int efx_ef10_pci_sriov_disable(struct efx_nic *efx, bool force)
{
struct pci_dev *dev = efx->pci_dev;
+ struct efx_ef10_nic_data *nic_data = efx->nic_data;
unsigned int vfs_assigned = pci_vfs_assigned(dev);
- int rc = 0;
+ int i, rc = 0;
if (vfs_assigned && !force) {
netif_info(efx, drv, efx->net_dev, "VFs are assigned to guests; "
return -EBUSY;
}
- if (!vfs_assigned)
+ if (!vfs_assigned) {
+ for (i = 0; i < efx->vf_count; i++)
+ nic_data->vf[i].pci_dev = NULL;
pci_disable_sriov(dev);
- else
+ } else {
rc = -EBUSY;
+ }
efx_ef10_sriov_free_vf_vswitching(efx);
efx->vf_count = 0;
tx_queue = efx_channel_get_tx_queue(ptp_data->channel, type);
if (tx_queue && tx_queue->timestamping) {
+ /* This code invokes normal driver TX code which is always
+ * protected from softirqs when called from generic TX code,
+ * which in turn disables preemption. Look at __dev_queue_xmit
+ * which uses rcu_read_lock_bh disabling preemption for RCU
+ * plus disabling softirqs. We do not need RCU reader
+ * protection here.
+ *
+ * Although it is theoretically safe for current PTP TX/RX code
+ * running without disabling softirqs, there are three good
+ * reasond for doing so:
+ *
+ * 1) The code invoked is mainly implemented for non-PTP
+ * packets and it is always executed with softirqs
+ * disabled.
+ * 2) This being a single PTP packet, better to not
+ * interrupt its processing by softirqs which can lead
+ * to high latencies.
+ * 3) netdev_xmit_more checks preemption is disabled and
+ * triggers a BUG_ON if not.
+ */
+ local_bh_disable();
efx_enqueue_skb(tx_queue, skb);
+ local_bh_enable();
} else {
WARN_ONCE(1, "PTP channel has no timestamped tx queue\n");
dev_kfree_skb_any(skb);
struct net_device *dev = pci_get_drvdata(pdev);
struct epic_private *ep = netdev_priv(dev);
+ unregister_netdev(dev);
dma_free_coherent(&pdev->dev, TX_TOTAL_SIZE, ep->tx_ring,
ep->tx_ring_dma);
dma_free_coherent(&pdev->dev, RX_TOTAL_SIZE, ep->rx_ring,
ep->rx_ring_dma);
- unregister_netdev(dev);
pci_iounmap(pdev, ep->ioaddr);
- pci_release_regions(pdev);
free_netdev(dev);
+ pci_release_regions(pdev);
pci_disable_device(pdev);
/* pci_power_off(pdev, -1); */
}
data->fix_mac_speed = tegra_eqos_fix_speed;
data->init = tegra_eqos_init;
data->bsp_priv = eqos;
+ data->sph_disable = 1;
err = tegra_eqos_init(pdev, eqos);
if (err < 0)
mac->tx_delay = tx_delay_ps * 1000;
} else {
dev_err(&pdev->dev, "Invalid TX clock delay: %dps\n", tx_delay_ps);
- return -EINVAL;
+ ret = -EINVAL;
+ goto err_remove_config_dt;
}
}
mac->rx_delay = rx_delay_ps * 1000;
} else {
dev_err(&pdev->dev, "Invalid RX clock delay: %dps\n", rx_delay_ps);
- return -EINVAL;
+ ret = -EINVAL;
+ goto err_remove_config_dt;
}
}
*art_time = ns;
}
+static int stmmac_cross_ts_isr(struct stmmac_priv *priv)
+{
+ return (readl(priv->ioaddr + GMAC_INT_STATUS) & GMAC_INT_TSIE);
+}
+
static int intel_crosststamp(ktime_t *device,
struct system_counterval_t *system,
void *ctx)
u32 num_snapshot;
u32 gpio_value;
u32 acr_value;
- int ret;
- u32 v;
int i;
if (!boot_cpu_has(X86_FEATURE_ART))
if (priv->plat->ext_snapshot_en)
return -EBUSY;
+ priv->plat->int_snapshot_en = 1;
+
mutex_lock(&priv->aux_ts_lock);
/* Enable Internal snapshot trigger */
acr_value = readl(ptpaddr + PTP_ACR);
break;
default:
mutex_unlock(&priv->aux_ts_lock);
+ priv->plat->int_snapshot_en = 0;
return -EINVAL;
}
writel(acr_value, ptpaddr + PTP_ACR);
gpio_value |= GMAC_GPO1;
writel(gpio_value, ioaddr + GMAC_GPIO_STATUS);
- /* Poll for time sync operation done */
- ret = readl_poll_timeout(priv->ioaddr + GMAC_INT_STATUS, v,
- (v & GMAC_INT_TSIE), 100, 10000);
-
- if (ret == -ETIMEDOUT) {
- pr_err("%s: Wait for time sync operation timeout\n", __func__);
- return ret;
+ /* Time sync done Indication - Interrupt method */
+ if (!wait_event_interruptible_timeout(priv->tstamp_busy_wait,
+ stmmac_cross_ts_isr(priv),
+ HZ / 100)) {
+ priv->plat->int_snapshot_en = 0;
+ return -ETIMEDOUT;
}
num_snapshot = (readl(ioaddr + GMAC_TIMESTAMP_STATUS) &
}
system->cycles *= intel_priv->crossts_adj;
+ priv->plat->int_snapshot_en = 0;
return 0;
}
plat->has_crossts = true;
plat->crosststamp = intel_crosststamp;
+ plat->int_snapshot_en = 0;
/* Setup MSI vector offset specific to Intel mGbE controller */
plat->msi_mac_vec = 29;
ret = stmmac_dvr_probe(&pdev->dev, plat, &res);
if (ret) {
- goto err_dvr_probe;
+ goto err_alloc_irq;
}
return 0;
-err_dvr_probe:
- pci_free_irq_vectors(pdev);
err_alloc_irq:
clk_disable_unprepare(plat->stmmac_clk);
clk_unregister_fixed_rate(plat->stmmac_clk);
}
}
- ret = clk_bulk_prepare_enable(variant->num_clks, plat->clks);
- if (ret) {
- dev_err(plat->dev, "failed to enable clks, err = %d\n", ret);
- return ret;
- }
-
- ret = clk_prepare_enable(plat->rmii_internal_clk);
- if (ret) {
- dev_err(plat->dev, "failed to enable rmii internal clk, err = %d\n", ret);
- goto err_clk;
- }
-
return 0;
-
-err_clk:
- clk_bulk_disable_unprepare(variant->num_clks, plat->clks);
- return ret;
-}
-
-static void mediatek_dwmac_exit(struct platform_device *pdev, void *priv)
-{
- struct mediatek_dwmac_plat_data *plat = priv;
- const struct mediatek_dwmac_variant *variant = plat->variant;
-
- clk_disable_unprepare(plat->rmii_internal_clk);
- clk_bulk_disable_unprepare(variant->num_clks, plat->clks);
}
static int mediatek_dwmac_clks_config(void *priv, bool enabled)
plat->addr64 = priv_plat->variant->dma_bit_mask;
plat->bsp_priv = priv_plat;
plat->init = mediatek_dwmac_init;
- plat->exit = mediatek_dwmac_exit;
plat->clks_config = mediatek_dwmac_clks_config;
if (priv_plat->variant->dwmac_fix_mac_speed)
plat->fix_mac_speed = priv_plat->variant->dwmac_fix_mac_speed;
mediatek_dwmac_common_data(pdev, plat_dat, priv_plat);
mediatek_dwmac_init(pdev, priv_plat);
+ ret = mediatek_dwmac_clks_config(priv_plat, true);
+ if (ret)
+ goto err_remove_config_dt;
+
ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
- if (ret) {
- stmmac_remove_config_dt(pdev, plat_dat);
- return ret;
- }
+ if (ret)
+ goto err_drv_probe;
return 0;
+
+err_drv_probe:
+ mediatek_dwmac_clks_config(priv_plat, false);
+err_remove_config_dt:
+ stmmac_remove_config_dt(pdev, plat_dat);
+
+ return ret;
+}
+
+static int mediatek_dwmac_remove(struct platform_device *pdev)
+{
+ struct mediatek_dwmac_plat_data *priv_plat = get_stmmac_bsp_priv(&pdev->dev);
+ int ret;
+
+ ret = stmmac_pltfr_remove(pdev);
+ mediatek_dwmac_clks_config(priv_plat, false);
+
+ return ret;
}
static const struct of_device_id mediatek_dwmac_match[] = {
static struct platform_driver mediatek_dwmac_driver = {
.probe = mediatek_dwmac_probe,
- .remove = stmmac_pltfr_remove,
+ .remove = mediatek_dwmac_remove,
.driver = {
.name = "dwmac-mediatek",
.pm = &stmmac_pltfr_pm_ops,
#define GMAC_PCS_IRQ_DEFAULT (GMAC_INT_RGSMIIS | GMAC_INT_PCS_LINK | \
GMAC_INT_PCS_ANE)
-#define GMAC_INT_DEFAULT_ENABLE (GMAC_INT_PMT_EN | GMAC_INT_LPI_EN)
+#define GMAC_INT_DEFAULT_ENABLE (GMAC_INT_PMT_EN | GMAC_INT_LPI_EN | \
+ GMAC_INT_TSIE)
enum dwmac4_irq_status {
time_stamp_irq = 0x00001000,
static void dwmac4_core_init(struct mac_device_info *hw,
struct net_device *dev)
{
+ struct stmmac_priv *priv = netdev_priv(dev);
void __iomem *ioaddr = hw->pcsr;
u32 value = readl(ioaddr + GMAC_CONFIG);
value |= GMAC_INT_FPE_EN;
writel(value, ioaddr + GMAC_INT_EN);
+
+ if (GMAC_INT_DEFAULT_ENABLE & GMAC_INT_TSIE)
+ init_waitqueue_head(&priv->tstamp_busy_wait);
}
static void dwmac4_rx_queue_enable(struct mac_device_info *hw,
if (queue == 0 || queue == 4) {
value &= ~MTL_RXQ_DMA_Q04MDMACH_MASK;
value |= MTL_RXQ_DMA_Q04MDMACH(chan);
+ } else if (queue > 4) {
+ value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue - 4);
+ value |= MTL_RXQ_DMA_QXMDMACH(chan, queue - 4);
} else {
value &= ~MTL_RXQ_DMA_QXMDMACH_MASK(queue);
value |= MTL_RXQ_DMA_QXMDMACH(chan, queue);
rwlock_t ptp_lock;
/* Protects auxiliary snapshot registers from concurrent access. */
struct mutex aux_ts_lock;
+ wait_queue_head_t tstamp_busy_wait;
void __iomem *mmcaddr;
void __iomem *ptpaddr;
netdev_warn(priv->dev,
"Setting EEE tx-lpi is not supported\n");
- if (priv->hw->xpcs) {
- ret = xpcs_config_eee(priv->hw->xpcs,
- priv->plat->mult_fact_100ns,
- edata->eee_enabled);
- if (ret)
- return ret;
- }
-
if (!edata->eee_enabled)
stmmac_disable_eee_mode(priv);
u64 ptp_time;
int i;
+ if (priv->plat->int_snapshot_en) {
+ wake_up(&priv->tstamp_busy_wait);
+ return;
+ }
+
tsync_int = readl(priv->ioaddr + GMAC_INT_STATUS) & GMAC_INT_TSIE;
if (!tsync_int)
struct timespec64 now;
u32 sec_inc = 0;
u64 temp = 0;
- int ret;
if (!(priv->dma_cap.time_stamp || priv->dma_cap.atime_stamp))
return -EOPNOTSUPP;
- ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
- if (ret < 0) {
- netdev_warn(priv->dev,
- "failed to enable PTP reference clock: %pe\n",
- ERR_PTR(ret));
- return ret;
- }
-
stmmac_config_hw_tstamping(priv, priv->ptpaddr, systime_flags);
priv->systime_flags = systime_flags;
stmmac_mmc_setup(priv);
+ if (ptp_register) {
+ ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
+ if (ret < 0)
+ netdev_warn(priv->dev,
+ "failed to enable PTP reference clock: %pe\n",
+ ERR_PTR(ret));
+ }
+
ret = stmmac_init_ptp(priv);
if (ret == -EOPNOTSUPP)
netdev_info(priv->dev, "PTP not supported by HW\n");
netdev_info(priv->dev, "%s: removing driver", __func__);
pm_runtime_get_sync(dev);
- pm_runtime_disable(dev);
- pm_runtime_put_noidle(dev);
stmmac_stop_all_dma(priv);
stmmac_mac_set(priv, priv->ioaddr, false);
mutex_destroy(&priv->lock);
bitmap_free(priv->af_xdp_zc_qps);
+ pm_runtime_disable(dev);
+ pm_runtime_put_noidle(dev);
+
return 0;
}
EXPORT_SYMBOL_GPL(stmmac_dvr_remove);
if (ret)
return ret;
- stmmac_init_tstamp_counter(priv, priv->systime_flags);
+ ret = clk_prepare_enable(priv->plat->clk_ptp_ref);
+ if (ret < 0) {
+ netdev_warn(priv->dev,
+ "failed to enable PTP reference clock: %pe\n",
+ ERR_PTR(ret));
+ return ret;
+ }
}
return 0;
struct stmmac_priv *priv =
container_of(ptp, struct stmmac_priv, ptp_clock_ops);
void __iomem *ptpaddr = priv->ptpaddr;
- void __iomem *ioaddr = priv->hw->pcsr;
struct stmmac_pps_cfg *cfg;
- u32 intr_value, acr_value;
int ret = -EOPNOTSUPP;
unsigned long flags;
+ u32 acr_value;
switch (rq->type) {
case PTP_CLK_REQ_PEROUT:
netdev_dbg(priv->dev, "Auxiliary Snapshot %d enabled.\n",
priv->plat->ext_snapshot_num >>
PTP_ACR_ATSEN_SHIFT);
- /* Enable Timestamp Interrupt */
- intr_value = readl(ioaddr + GMAC_INT_EN);
- intr_value |= GMAC_INT_TSIE;
- writel(intr_value, ioaddr + GMAC_INT_EN);
-
} else {
netdev_dbg(priv->dev, "Auxiliary Snapshot %d disabled.\n",
priv->plat->ext_snapshot_num >>
PTP_ACR_ATSEN_SHIFT);
- /* Disable Timestamp Interrupt */
- intr_value = readl(ioaddr + GMAC_INT_EN);
- intr_value &= ~GMAC_INT_TSIE;
- writel(intr_value, ioaddr + GMAC_INT_EN);
}
writel(acr_value, ptpaddr + PTP_ACR);
mutex_unlock(&priv->aux_ts_lock);
static void display_link_mode(struct happy_meal *hp, void __iomem *tregs)
{
- printk(KERN_INFO "%s: Link is up using ", hp->dev->name);
- if (hp->tcvr_type == external)
- printk("external ");
- else
- printk("internal ");
- printk("transceiver at ");
hp->sw_lpa = happy_meal_tcvr_read(hp, tregs, MII_LPA);
- if (hp->sw_lpa & (LPA_100HALF | LPA_100FULL)) {
- if (hp->sw_lpa & LPA_100FULL)
- printk("100Mb/s, Full Duplex.\n");
- else
- printk("100Mb/s, Half Duplex.\n");
- } else {
- if (hp->sw_lpa & LPA_10FULL)
- printk("10Mb/s, Full Duplex.\n");
- else
- printk("10Mb/s, Half Duplex.\n");
- }
+
+ netdev_info(hp->dev,
+ "Link is up using %s transceiver at %dMb/s, %s Duplex.\n",
+ hp->tcvr_type == external ? "external" : "internal",
+ hp->sw_lpa & (LPA_100HALF | LPA_100FULL) ? 100 : 10,
+ hp->sw_lpa & (LPA_100FULL | LPA_10FULL) ? "Full" : "Half");
}
static void display_forced_link_mode(struct happy_meal *hp, void __iomem *tregs)
{
- printk(KERN_INFO "%s: Link has been forced up using ", hp->dev->name);
- if (hp->tcvr_type == external)
- printk("external ");
- else
- printk("internal ");
- printk("transceiver at ");
hp->sw_bmcr = happy_meal_tcvr_read(hp, tregs, MII_BMCR);
- if (hp->sw_bmcr & BMCR_SPEED100)
- printk("100Mb/s, ");
- else
- printk("10Mb/s, ");
- if (hp->sw_bmcr & BMCR_FULLDPLX)
- printk("Full Duplex.\n");
- else
- printk("Half Duplex.\n");
+
+ netdev_info(hp->dev,
+ "Link has been forced up using %s transceiver at %dMb/s, %s Duplex.\n",
+ hp->tcvr_type == external ? "external" : "internal",
+ hp->sw_bmcr & BMCR_SPEED100 ? 100 : 10,
+ hp->sw_bmcr & BMCR_FULLDPLX ? "Full" : "Half");
}
static int set_happy_link_modes(struct happy_meal *hp, void __iomem *tregs)
port->port_id, ret);
goto dl_port_unreg;
}
- devlink_port_type_eth_set(dl_port, port->ndev);
}
devlink_register(common->devlink);
return ret;
static int am65_cpsw_nuss_register_ndevs(struct am65_cpsw_common *common)
{
struct device *dev = common->dev;
+ struct devlink_port *dl_port;
struct am65_cpsw_port *port;
int ret = 0, i;
return ret;
}
+ ret = am65_cpsw_nuss_register_devlink(common);
+ if (ret)
+ return ret;
+
for (i = 0; i < common->port_num; i++) {
port = &common->ports[i];
i, ret);
goto err_cleanup_ndev;
}
+
+ dl_port = &port->devlink_port;
+ devlink_port_type_eth_set(dl_port, port->ndev);
}
ret = am65_cpsw_register_notifiers(common);
if (ret)
goto err_cleanup_ndev;
- ret = am65_cpsw_nuss_register_devlink(common);
- if (ret)
- goto clean_unregister_notifiers;
-
/* can't auto unregister ndev using devm_add_action() due to
* devres release sequence in DD core for DMA
*/
return 0;
-clean_unregister_notifiers:
- am65_cpsw_unregister_notifiers(common);
+
err_cleanup_ndev:
am65_cpsw_nuss_cleanup_ndev(common);
+ am65_cpsw_unregister_devlink(common);
return ret;
}
iowrite32(value, lp->regs + offset);
}
+/**
+ * axienet_dma_out32 - Memory mapped Axi DMA register write.
+ * @lp: Pointer to axienet local structure
+ * @reg: Address offset from the base address of the Axi DMA core
+ * @value: Value to be written into the Axi DMA register
+ *
+ * This function writes the desired value into the corresponding Axi DMA
+ * register.
+ */
+
+static inline void axienet_dma_out32(struct axienet_local *lp,
+ off_t reg, u32 value)
+{
+ iowrite32(value, lp->dma_regs + reg);
+}
+
+#if defined(CONFIG_64BIT) && defined(iowrite64)
+/**
+ * axienet_dma_out64 - Memory mapped Axi DMA register write.
+ * @lp: Pointer to axienet local structure
+ * @reg: Address offset from the base address of the Axi DMA core
+ * @value: Value to be written into the Axi DMA register
+ *
+ * This function writes the desired value into the corresponding Axi DMA
+ * register.
+ */
+static inline void axienet_dma_out64(struct axienet_local *lp,
+ off_t reg, u64 value)
+{
+ iowrite64(value, lp->dma_regs + reg);
+}
+
+static inline void axienet_dma_out_addr(struct axienet_local *lp, off_t reg,
+ dma_addr_t addr)
+{
+ if (lp->features & XAE_FEATURE_DMA_64BIT)
+ axienet_dma_out64(lp, reg, addr);
+ else
+ axienet_dma_out32(lp, reg, lower_32_bits(addr));
+}
+
+#else /* CONFIG_64BIT */
+
+static inline void axienet_dma_out_addr(struct axienet_local *lp, off_t reg,
+ dma_addr_t addr)
+{
+ axienet_dma_out32(lp, reg, lower_32_bits(addr));
+}
+
+#endif /* CONFIG_64BIT */
+
/* Function prototypes visible in xilinx_axienet_mdio.c for other files */
int axienet_mdio_enable(struct axienet_local *lp);
void axienet_mdio_disable(struct axienet_local *lp);
return ioread32(lp->dma_regs + reg);
}
-/**
- * axienet_dma_out32 - Memory mapped Axi DMA register write.
- * @lp: Pointer to axienet local structure
- * @reg: Address offset from the base address of the Axi DMA core
- * @value: Value to be written into the Axi DMA register
- *
- * This function writes the desired value into the corresponding Axi DMA
- * register.
- */
-static inline void axienet_dma_out32(struct axienet_local *lp,
- off_t reg, u32 value)
-{
- iowrite32(value, lp->dma_regs + reg);
-}
-
-static void axienet_dma_out_addr(struct axienet_local *lp, off_t reg,
- dma_addr_t addr)
-{
- axienet_dma_out32(lp, reg, lower_32_bits(addr));
-
- if (lp->features & XAE_FEATURE_DMA_64BIT)
- axienet_dma_out32(lp, reg + 4, upper_32_bits(addr));
-}
-
static void desc_set_phys_addr(struct axienet_local *lp, dma_addr_t addr,
struct axidma_bd *desc)
{
iowrite32(0x0, desc);
}
}
+ if (!IS_ENABLED(CONFIG_64BIT) && lp->features & XAE_FEATURE_DMA_64BIT) {
+ dev_err(&pdev->dev, "64-bit addressable DMA is not compatible with 32-bit archecture\n");
+ ret = -EINVAL;
+ goto cleanup_clk;
+ }
ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(addr_width));
if (ret) {
unsigned int rx_count;
unsigned int rx_count_cooked;
+ spinlock_t rxlock;
int mtu; /* Our mtu (to spot changes!) */
int buffsize; /* Max buffers sizes */
sp->dev = dev;
spin_lock_init(&sp->lock);
+ spin_lock_init(&sp->rxlock);
refcount_set(&sp->refcnt, 1);
init_completion(&sp->dead);
sp->led_state = 0x60;
/* fill trailing bytes with zeroes */
sp->tty->ops->write(sp->tty, &sp->led_state, 1);
+ spin_lock_bh(&sp->rxlock);
rest = sp->rx_count;
if (rest != 0)
for (i = rest; i <= 3; i++)
sp_bump(sp, 0);
}
sp->rx_count_cooked = 0;
+ spin_unlock_bh(&sp->rxlock);
}
break;
case SIXP_TX_URUN: printk(KERN_DEBUG "6pack: TX underrun\n");
decode_prio_command(sp, inbyte);
else if ((inbyte & SIXP_STD_CMD_MASK) != 0)
decode_std_command(sp, inbyte);
- else if ((sp->status & SIXP_RX_DCD_MASK) == SIXP_RX_DCD_MASK)
+ else if ((sp->status & SIXP_RX_DCD_MASK) == SIXP_RX_DCD_MASK) {
+ spin_lock_bh(&sp->rxlock);
decode_data(sp, inbyte);
+ spin_unlock_bh(&sp->rxlock);
+ }
}
}
/* The response to a IPA_QMI_INIT_DRIVER request begins with a standard
* QMI response, but contains other information as well. Currently we
- * simply wait for the the INIT_DRIVER transaction to complete and
+ * simply wait for the INIT_DRIVER transaction to complete and
* ignore any other data that might be returned.
*/
struct ipa_init_modem_driver_rsp {
#define DEFAULT_SEND_SCI true
#define DEFAULT_ENCRYPT false
#define DEFAULT_ENCODING_SA 0
+#define MACSEC_XPN_MAX_REPLAY_WINDOW (((1 << 30) - 1))
static bool send_sci(const struct macsec_secy *secy)
{
return false;
if (attrs[MACSEC_SA_ATTR_PN] &&
- *(u64 *)nla_data(attrs[MACSEC_SA_ATTR_PN]) == 0)
+ nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
return false;
if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
}
pn_len = secy->xpn ? MACSEC_XPN_PN_LEN : MACSEC_DEFAULT_PN_LEN;
- if (nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) {
+ if (tb_sa[MACSEC_SA_ATTR_PN] &&
+ nla_len(tb_sa[MACSEC_SA_ATTR_PN]) != pn_len) {
pr_notice("macsec: nl: add_rxsa: bad pn length: %d != %d\n",
nla_len(tb_sa[MACSEC_SA_ATTR_PN]), pn_len);
rtnl_unlock();
if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) {
pr_notice("macsec: nl: add_rxsa: bad salt length: %d != %d\n",
nla_len(tb_sa[MACSEC_SA_ATTR_SALT]),
- MACSEC_SA_ATTR_SALT);
+ MACSEC_SALT_LEN);
rtnl_unlock();
return -EINVAL;
}
return 0;
cleanup:
- kfree(rx_sa);
+ macsec_rxsa_put(rx_sa);
rtnl_unlock();
return err;
}
if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN)
return false;
- if (nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0)
+ if (nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
return false;
if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
if (nla_len(tb_sa[MACSEC_SA_ATTR_SALT]) != MACSEC_SALT_LEN) {
pr_notice("macsec: nl: add_txsa: bad salt length: %d != %d\n",
nla_len(tb_sa[MACSEC_SA_ATTR_SALT]),
- MACSEC_SA_ATTR_SALT);
+ MACSEC_SALT_LEN);
rtnl_unlock();
return -EINVAL;
}
cleanup:
secy->operational = was_operational;
- kfree(tx_sa);
+ macsec_txsa_put(tx_sa);
rtnl_unlock();
return err;
}
if (nla_get_u8(attrs[MACSEC_SA_ATTR_AN]) >= MACSEC_NUM_AN)
return false;
- if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u32(attrs[MACSEC_SA_ATTR_PN]) == 0)
+ if (attrs[MACSEC_SA_ATTR_PN] && nla_get_u64(attrs[MACSEC_SA_ATTR_PN]) == 0)
return false;
if (attrs[MACSEC_SA_ATTR_ACTIVE]) {
secy->operational = tx_sa && tx_sa->active;
}
- if (data[IFLA_MACSEC_WINDOW])
- secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]);
-
if (data[IFLA_MACSEC_ENCRYPT])
tx_sc->encrypt = !!nla_get_u8(data[IFLA_MACSEC_ENCRYPT]);
}
}
+ if (data[IFLA_MACSEC_WINDOW]) {
+ secy->replay_window = nla_get_u32(data[IFLA_MACSEC_WINDOW]);
+
+ /* IEEE 802.1AEbw-2013 10.7.8 - maximum replay window
+ * for XPN cipher suites */
+ if (secy->xpn &&
+ secy->replay_window > MACSEC_XPN_MAX_REPLAY_WINDOW)
+ return -EINVAL;
+ }
+
return 0;
}
ret = macsec_changelink_common(dev, data);
if (ret)
- return ret;
+ goto cleanup;
/* If h/w offloading is available, propagate to the device */
if (macsec_is_offloaded(macsec)) {
*/
ret = xpcs_read(xpcs, MDIO_MMD_VEND2, DW_VR_MII_AN_INTR_STS);
if (ret < 0)
- return false;
+ return ret;
if (ret & DW_VR_MII_C37_ANSGM_SP_LNKSTS) {
int speed_value;
#define MDIO_AN_VEND_PROV 0xc400
#define MDIO_AN_VEND_PROV_1000BASET_FULL BIT(15)
#define MDIO_AN_VEND_PROV_1000BASET_HALF BIT(14)
+#define MDIO_AN_VEND_PROV_5000BASET_FULL BIT(11)
+#define MDIO_AN_VEND_PROV_2500BASET_FULL BIT(10)
#define MDIO_AN_VEND_PROV_DOWNSHIFT_EN BIT(4)
#define MDIO_AN_VEND_PROV_DOWNSHIFT_MASK GENMASK(3, 0)
#define MDIO_AN_VEND_PROV_DOWNSHIFT_DFLT 4
phydev->advertising))
reg |= MDIO_AN_VEND_PROV_1000BASET_HALF;
+ /* Handle the case when the 2.5G and 5G speeds are not advertised */
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_2500baseT_Full_BIT,
+ phydev->advertising))
+ reg |= MDIO_AN_VEND_PROV_2500BASET_FULL;
+
+ if (linkmode_test_bit(ETHTOOL_LINK_MODE_5000baseT_Full_BIT,
+ phydev->advertising))
+ reg |= MDIO_AN_VEND_PROV_5000BASET_FULL;
+
ret = phy_modify_mmd_changed(phydev, MDIO_MMD_AN, MDIO_AN_VEND_PROV,
MDIO_AN_VEND_PROV_1000BASET_HALF |
- MDIO_AN_VEND_PROV_1000BASET_FULL, reg);
+ MDIO_AN_VEND_PROV_1000BASET_FULL |
+ MDIO_AN_VEND_PROV_2500BASET_FULL |
+ MDIO_AN_VEND_PROV_5000BASET_FULL, reg);
if (ret < 0)
return ret;
if (ret > 0)
/* ATHEROS AR9331 */
PHY_ID_MATCH_EXACT(ATH9331_PHY_ID),
.name = "Qualcomm Atheros AR9331 built-in PHY",
+ .probe = at803x_probe,
+ .remove = at803x_remove,
.suspend = at803x_suspend,
.resume = at803x_resume,
.flags = PHY_POLL_CABLE_TEST,
/* Qualcomm Atheros QCA9561 */
PHY_ID_MATCH_EXACT(QCA9561_PHY_ID),
.name = "Qualcomm Atheros QCA9561 built-in PHY",
+ .probe = at803x_probe,
+ .remove = at803x_remove,
.suspend = at803x_suspend,
.resume = at803x_resume,
.flags = PHY_POLL_CABLE_TEST,
PHY_ID_MATCH_EXACT(QCA8081_PHY_ID),
.name = "Qualcomm QCA8081",
.flags = PHY_POLL_CABLE_TEST,
+ .probe = at803x_probe,
+ .remove = at803x_remove,
.config_intr = at803x_config_intr,
.handle_interrupt = at803x_handle_interrupt,
.get_tunable = at803x_get_tunable,
/* Reset PHY, otherwise MII_LPA will provide outdated information.
* This issue is reproducible only with some link partner PHYs
*/
- if (phydev->state == PHY_NOLINK && phydev->drv->soft_reset)
- phydev->drv->soft_reset(phydev);
+ if (phydev->state == PHY_NOLINK) {
+ phy_init_hw(phydev);
+ phy_start_aneg(phydev);
+ }
}
static struct phy_driver asix_driver[] = {
if (misr_status < 0)
return misr_status;
- misr_status |= (DP83822_RX_ERR_HF_INT_EN |
- DP83822_FALSE_CARRIER_HF_INT_EN |
- DP83822_LINK_STAT_INT_EN |
+ misr_status |= (DP83822_LINK_STAT_INT_EN |
DP83822_ENERGY_DET_INT_EN |
DP83822_LINK_QUAL_INT_EN);
#define DP83867_DOWNSHIFT_2_COUNT 2
#define DP83867_DOWNSHIFT_4_COUNT 4
#define DP83867_DOWNSHIFT_8_COUNT 8
+#define DP83867_SGMII_AUTONEG_EN BIT(7)
/* CFG3 bits */
#define DP83867_CFG3_INT_OE BIT(7)
DP83867_PHYCR_FORCE_LINK_GOOD, 0);
}
+static void dp83867_link_change_notify(struct phy_device *phydev)
+{
+ /* There is a limitation in DP83867 PHY device where SGMII AN is
+ * only triggered once after the device is booted up. Even after the
+ * PHY TPI is down and up again, SGMII AN is not triggered and
+ * hence no new in-band message from PHY to MAC side SGMII.
+ * This could cause an issue during power up, when PHY is up prior
+ * to MAC. At this condition, once MAC side SGMII is up, MAC side
+ * SGMII wouldn`t receive new in-band message from TI PHY with
+ * correct link status, speed and duplex info.
+ * Thus, implemented a SW solution here to retrigger SGMII Auto-Neg
+ * whenever there is a link change.
+ */
+ if (phydev->interface == PHY_INTERFACE_MODE_SGMII) {
+ int val = 0;
+
+ val = phy_clear_bits(phydev, DP83867_CFG2,
+ DP83867_SGMII_AUTONEG_EN);
+ if (val < 0)
+ return;
+
+ phy_set_bits(phydev, DP83867_CFG2,
+ DP83867_SGMII_AUTONEG_EN);
+ }
+}
+
static struct phy_driver dp83867_driver[] = {
{
.phy_id = DP83867_PHY_ID,
.suspend = genphy_suspend,
.resume = genphy_resume,
+
+ .link_change_notify = dp83867_link_change_notify,
},
};
module_phy_driver(dp83867_driver);
return ret;
}
-EXPORT_SYMBOL_GPL(mdio_bus_init);
#if IS_ENABLED(CONFIG_PHYLIB)
void mdio_bus_exit(void)
#include <linux/io.h>
#include <linux/uaccess.h>
#include <linux/atomic.h>
+#include <linux/suspend.h>
#include <net/netlink.h>
#include <net/genetlink.h>
#include <net/sock.h>
struct phy_driver *drv = phydev->drv;
irqreturn_t ret;
+ /* Wakeup interrupts may occur during a system sleep transition.
+ * Postpone handling until the PHY has resumed.
+ */
+ if (IS_ENABLED(CONFIG_PM_SLEEP) && phydev->irq_suspended) {
+ struct net_device *netdev = phydev->attached_dev;
+
+ if (netdev) {
+ struct device *parent = netdev->dev.parent;
+
+ if (netdev->wol_enabled)
+ pm_system_wakeup();
+ else if (device_may_wakeup(&netdev->dev))
+ pm_wakeup_dev_event(&netdev->dev, 0, true);
+ else if (parent && device_may_wakeup(parent))
+ pm_wakeup_dev_event(parent, 0, true);
+ }
+
+ phydev->irq_rerun = 1;
+ disable_irq_nosync(irq);
+ return IRQ_HANDLED;
+ }
+
mutex_lock(&phydev->lock);
ret = drv->handle_interrupt(phydev);
mutex_unlock(&phydev->lock);
if (phydev->mac_managed_pm)
return 0;
+ /* Wakeup interrupts may occur during the system sleep transition when
+ * the PHY is inaccessible. Set flag to postpone handling until the PHY
+ * has resumed. Wait for concurrent interrupt handler to complete.
+ */
+ if (phy_interrupt_is_valid(phydev)) {
+ phydev->irq_suspended = 1;
+ synchronize_irq(phydev->irq);
+ }
+
/* We must stop the state machine manually, otherwise it stops out of
* control, possibly with the phydev->lock held. Upon resume, netdev
* may call phy routines that try to grab the same lock, and that may
if (ret < 0)
return ret;
no_resume:
+ if (phy_interrupt_is_valid(phydev)) {
+ phydev->irq_suspended = 0;
+ synchronize_irq(phydev->irq);
+
+ /* Rerun interrupts which were postponed by phy_interrupt()
+ * because they occurred during the system sleep transition.
+ */
+ if (phydev->irq_rerun) {
+ phydev->irq_rerun = 0;
+ enable_irq(phydev->irq);
+ irq_wake_thread(phydev->irq, phydev);
+ }
+ }
+
if (phydev->attached_dev && phydev->adjust_link)
phy_start_machine(phydev);
platform_set_drvdata(pdev, sfp);
- err = devm_add_action(sfp->dev, sfp_cleanup, sfp);
+ err = devm_add_action_or_reset(sfp->dev, sfp_cleanup, sfp);
if (err < 0)
return err;
struct smsc_phy_priv *priv = phydev->priv;
int rc;
- if (!priv->energy_enable)
+ if (!priv->energy_enable || phydev->irq != PHY_POLL)
return 0;
rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS);
* response on link pulses to detect presence of plugged Ethernet cable.
* The Energy Detect Power-Down mode is enabled again in the end of procedure to
* save approximately 220 mW of power if cable is unplugged.
+ * The workaround is only applicable to poll mode. Energy Detect Power-Down may
+ * not be used in interrupt mode lest link change detection becomes unreliable.
*/
static int lan87xx_read_status(struct phy_device *phydev)
{
int err = genphy_read_status(phydev);
- if (!phydev->link && priv->energy_enable) {
+ if (!phydev->link && priv->energy_enable && phydev->irq == PHY_POLL) {
/* Disable EDPD to wake up PHY */
int rc = phy_read(phydev, MII_LAN83C185_CTRL_STATUS);
if (rc < 0)
int can_low_power = 1;
if (np == NULL || of_get_property(np, "no-autolowpower", NULL))
can_low_power = 0;
+ of_node_put(np);
if (can_low_power) {
/* Enable automatic low-power */
sungem_phy_write(phy, 0x1c, 0x9002);
}
}
+static void tun_napi_enable(struct tun_file *tfile)
+{
+ if (tfile->napi_enabled)
+ napi_enable(&tfile->napi);
+}
+
static void tun_napi_disable(struct tun_file *tfile)
{
if (tfile->napi_enabled)
tun = rtnl_dereference(tfile->tun);
if (tun && clean) {
- tun_napi_disable(tfile);
+ if (!tfile->detached)
+ tun_napi_disable(tfile);
tun_napi_del(tfile);
}
if (clean) {
RCU_INIT_POINTER(tfile->tun, NULL);
sock_put(&tfile->sk);
- } else
+ } else {
tun_disable_queue(tun, tfile);
+ tun_napi_disable(tfile);
+ }
synchronize_net();
tun_flow_delete_by_queue(tun, tun->numqueues + 1);
sock_put(&tfile->sk);
}
list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) {
+ tun_napi_del(tfile);
tun_enable_queue(tfile);
tun_queue_purge(tfile);
xdp_rxq_info_unreg(&tfile->xdp_rxq);
if (tfile->detached) {
tun_enable_queue(tfile);
+ tun_napi_enable(tfile);
} else {
sock_hold(&tfile->sk);
tun_napi_init(tun, tfile, napi, napi_frags);
AX_MEDIUM_RE)
#define AX88772_MEDIUM_DEFAULT \
- (AX_MEDIUM_FD | AX_MEDIUM_RFC | \
- AX_MEDIUM_TFC | AX_MEDIUM_PS | \
+ (AX_MEDIUM_FD | AX_MEDIUM_PS | \
AX_MEDIUM_AC | AX_MEDIUM_RE)
/* AX88772 & AX88178 RX_CTL values */
asix_write_medium_mode(dev, mode, 0);
phy_print_status(phydev);
+ usbnet_link_change(dev, phydev->link, 0);
}
int asix_write_gpio(struct usbnet *dev, u16 value, int sleep, int in_pm)
* are bundled into this buffer and where we can find an array of
* per-packet metadata (which contains elements encoded into u16).
*/
+
+ /* SKB contents for current firmware:
+ * <packet 1> <padding>
+ * ...
+ * <packet N> <padding>
+ * <per-packet metadata entry 1> <dummy header>
+ * ...
+ * <per-packet metadata entry N> <dummy header>
+ * <padding2> <rx_hdr>
+ *
+ * where:
+ * <packet N> contains pkt_len bytes:
+ * 2 bytes of IP alignment pseudo header
+ * packet received
+ * <per-packet metadata entry N> contains 4 bytes:
+ * pkt_len and fields AX_RXHDR_*
+ * <padding> 0-7 bytes to terminate at
+ * 8 bytes boundary (64-bit).
+ * <padding2> 4 bytes to make rx_hdr terminate at
+ * 8 bytes boundary (64-bit)
+ * <dummy-header> contains 4 bytes:
+ * pkt_len=0 and AX_RXHDR_DROP_ERR
+ * <rx-hdr> contains 4 bytes:
+ * pkt_cnt and hdr_off (offset of
+ * <per-packet metadata entry 1>)
+ *
+ * pkt_cnt is number of entrys in the per-packet metadata.
+ * In current firmware there is 2 entrys per packet.
+ * The first points to the packet and the
+ * second is a dummy header.
+ * This was done probably to align fields in 64-bit and
+ * maintain compatibility with old firmware.
+ * This code assumes that <dummy header> and <padding2> are
+ * optional.
+ */
+
if (skb->len < 4)
return 0;
skb_trim(skb, skb->len - 4);
/* Make sure that the bounds of the metadata array are inside the SKB
* (and in front of the counter at the end).
*/
- if (pkt_cnt * 2 + hdr_off > skb->len)
+ if (pkt_cnt * 4 + hdr_off > skb->len)
return 0;
pkt_hdr = (u32 *)(skb->data + hdr_off);
/* Packets must not overlap the metadata array */
skb_trim(skb, hdr_off);
- for (; ; pkt_cnt--, pkt_hdr++) {
+ for (; pkt_cnt > 0; pkt_cnt--, pkt_hdr++) {
+ u16 pkt_len_plus_padd;
u16 pkt_len;
le32_to_cpus(pkt_hdr);
pkt_len = (*pkt_hdr >> 16) & 0x1fff;
+ pkt_len_plus_padd = (pkt_len + 7) & 0xfff8;
- if (pkt_len > skb->len)
+ /* Skip dummy header used for alignment
+ */
+ if (pkt_len == 0)
+ continue;
+
+ if (pkt_len_plus_padd > skb->len)
return 0;
/* Check CRC or runt packet */
- if (((*pkt_hdr & (AX_RXHDR_CRC_ERR | AX_RXHDR_DROP_ERR)) == 0) &&
- pkt_len >= 2 + ETH_HLEN) {
- bool last = (pkt_cnt == 0);
-
- if (last) {
- ax_skb = skb;
- } else {
- ax_skb = skb_clone(skb, GFP_ATOMIC);
- if (!ax_skb)
- return 0;
- }
- ax_skb->len = pkt_len;
- /* Skip IP alignment pseudo header */
- skb_pull(ax_skb, 2);
- skb_set_tail_pointer(ax_skb, ax_skb->len);
- ax_skb->truesize = pkt_len + sizeof(struct sk_buff);
- ax88179_rx_checksum(ax_skb, pkt_hdr);
+ if ((*pkt_hdr & (AX_RXHDR_CRC_ERR | AX_RXHDR_DROP_ERR)) ||
+ pkt_len < 2 + ETH_HLEN) {
+ dev->net->stats.rx_errors++;
+ skb_pull(skb, pkt_len_plus_padd);
+ continue;
+ }
- if (last)
- return 1;
+ /* last packet */
+ if (pkt_len_plus_padd == skb->len) {
+ skb_trim(skb, pkt_len);
- usbnet_skb_return(dev, ax_skb);
+ /* Skip IP alignment pseudo header */
+ skb_pull(skb, 2);
+
+ skb->truesize = SKB_TRUESIZE(pkt_len_plus_padd);
+ ax88179_rx_checksum(skb, pkt_hdr);
+ return 1;
}
- /* Trim this packet away from the SKB */
- if (!skb_pull(skb, (pkt_len + 7) & 0xFFF8))
+ ax_skb = skb_clone(skb, GFP_ATOMIC);
+ if (!ax_skb)
return 0;
+ skb_trim(ax_skb, pkt_len);
+
+ /* Skip IP alignment pseudo header */
+ skb_pull(ax_skb, 2);
+
+ skb->truesize = pkt_len_plus_padd +
+ SKB_DATA_ALIGN(sizeof(struct sk_buff));
+ ax88179_rx_checksum(ax_skb, pkt_hdr);
+ usbnet_skb_return(dev, ax_skb);
+
+ skb_pull(skb, pkt_len_plus_padd);
}
+
+ return 0;
}
static struct sk_buff *
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
.link_reset = ax88179_link_reset,
.reset = ax88179_reset,
.stop = ax88179_stop,
- .flags = FLAG_ETHER | FLAG_FRAMING_AX,
+ .flags = FLAG_ETHER | FLAG_FRAMING_AX | FLAG_SEND_ZLP,
.rx_fixup = ax88179_rx_fixup,
.tx_fixup = ax88179_tx_fixup,
};
intf->altsetting->desc.bInterfaceNumber, 1)) {
dev_err(dev, "Can't set altsetting 1.\n");
ret = -EIO;
- goto fail_mem;;
+ goto fail_mem;
}
netdev = alloc_etherdev(sizeof(struct catc));
#define NETNEXT_VERSION "12"
/* Information for net */
-#define NET_VERSION "12"
+#define NET_VERSION "13"
#define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION
#define DRIVER_AUTHOR "Realtek linux nic maintainers <nic_swsd@realtek.com>"
}
static int r8152_tx_csum(struct r8152 *tp, struct tx_desc *desc,
- struct sk_buff *skb, u32 len, u32 transport_offset)
+ struct sk_buff *skb, u32 len)
{
u32 mss = skb_shinfo(skb)->gso_size;
u32 opts1, opts2 = 0;
opts1 = len | TX_FS | TX_LS;
if (mss) {
+ u32 transport_offset = (u32)skb_transport_offset(skb);
+
if (transport_offset > GTTCPHO_MAX) {
netif_warn(tp, tx_err, tp->netdev,
"Invalid transport offset 0x%x for TSO\n",
opts1 |= transport_offset << GTTCPHO_SHIFT;
opts2 |= min(mss, MSS_MAX) << MSS_SHIFT;
} else if (skb->ip_summed == CHECKSUM_PARTIAL) {
+ u32 transport_offset = (u32)skb_transport_offset(skb);
u8 ip_protocol;
if (transport_offset > TCPHO_MAX) {
struct tx_desc *tx_desc;
struct sk_buff *skb;
unsigned int len;
- u32 offset;
skb = __skb_dequeue(&skb_head);
if (!skb)
tx_data = tx_agg_align(tx_data);
tx_desc = (struct tx_desc *)tx_data;
- offset = (u32)skb_transport_offset(skb);
-
- if (r8152_tx_csum(tp, tx_desc, skb, skb->len, offset)) {
+ if (r8152_tx_csum(tp, tx_desc, skb, skb->len)) {
r8152_csum_workaround(tp, skb, &skb_head);
continue;
}
{
u32 mss = skb_shinfo(skb)->gso_size;
int max_offset = mss ? GTTCPHO_MAX : TCPHO_MAX;
- int offset = skb_transport_offset(skb);
- if ((mss || skb->ip_summed == CHECKSUM_PARTIAL) && offset > max_offset)
+ if ((mss || skb->ip_summed == CHECKSUM_PARTIAL) &&
+ skb_transport_offset(skb) > max_offset)
features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK);
else if ((skb->len + sizeof(struct tx_desc)) > agg_buf_sz)
features &= ~NETIF_F_GSO_MASK;
wait_oob_link_list_ready(tp);
- ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, mtu_to_size(tp->netdev->mtu));
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, 1522);
+ ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_DEFAULT);
switch (tp->version) {
case RTL_VER_03:
ocp_data |= NOW_IS_OOB | DIS_MCU_CLROOB;
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
+ ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7);
+ ocp_data |= MCU_BORW_EN;
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
+
rxdy_gated_en(tp, false);
ocp_data = ocp_read_dword(tp, MCU_TYPE_PLA, PLA_RCR);
rtl_disable(tp);
rtl_reset_bmu(tp);
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_RMS, 1522);
+ ocp_write_byte(tp, MCU_TYPE_PLA, PLA_MTPS, MTPS_DEFAULT);
+
/* Clear teredo wake event. bit[15:8] is the teredo wakeup
* type. Set it to zero. bits[7:0] are the W1C bits about
* the events. Set them to all 1 to clear them.
ocp_data |= NOW_IS_OOB;
ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
+ ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7);
+ ocp_data |= MCU_BORW_EN;
+ ocp_write_word(tp, MCU_TYPE_PLA, PLA_SFF_STS_7, ocp_data);
+
rtl_rx_vlan_en(tp, true);
rxdy_gated_en(tp, false);
cmd, reqtype, value, index, size);
if (size) {
- buf = kmalloc(size, GFP_KERNEL);
+ buf = kmalloc(size, GFP_NOIO);
if (!buf)
goto out;
}
cmd, reqtype, value, index, size);
if (data) {
- buf = kmemdup(data, size, GFP_KERNEL);
+ buf = kmemdup(data, size, GFP_NOIO);
if (!buf)
goto out;
} else {
int usbnet_write_cmd_async(struct usbnet *dev, u8 cmd, u8 reqtype,
u16 value, u16 index, const void *data, u16 size)
{
- struct usb_ctrlrequest *req = NULL;
+ struct usb_ctrlrequest *req;
struct urb *urb;
int err = -ENOMEM;
void *buf = NULL;
if (!buf) {
netdev_err(dev->net, "Error allocating buffer"
" in %s!\n", __func__);
- goto fail_free;
+ goto fail_free_urb;
}
}
if (err < 0) {
netdev_err(dev->net, "Error submitting the control"
" message: status=%d\n", err);
- goto fail_free;
+ goto fail_free_all;
}
return 0;
+fail_free_all:
+ kfree(req);
fail_free_buf:
kfree(buf);
-fail_free:
- kfree(req);
+ /*
+ * avoid a double free
+ * needed because the flag can be set only
+ * after filling the URB
+ */
+ urb->transfer_flags = 0;
+fail_free_urb:
usb_free_urb(urb);
fail:
return err;
static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct veth_priv *rcv_priv, *priv = netdev_priv(dev);
+ struct netdev_queue *queue = NULL;
struct veth_rq *rq = NULL;
struct net_device *rcv;
int length = skb->len;
rxq = skb_get_queue_mapping(skb);
if (rxq < rcv->real_num_rx_queues) {
rq = &rcv_priv->rq[rxq];
+ queue = netdev_get_tx_queue(dev, rxq);
/* The napi pointer is available when an XDP program is
* attached or when GRO is enabled
skb_tx_timestamp(skb);
if (likely(veth_forward_skb(rcv, skb, rq, use_napi) == NET_RX_SUCCESS)) {
+ if (queue)
+ txq_trans_cond_update(queue);
if (!use_napi)
dev_lstats_add(dev, length);
} else {
/* Packet virtio header size */
u8 hdr_len;
- /* Work struct for refilling if we run low on memory. */
+ /* Work struct for delayed refilling if we run low on memory. */
struct delayed_work refill;
+ /* Is delayed refill enabled? */
+ bool refill_enabled;
+
+ /* The lock to synchronize the access to refill_enabled */
+ spinlock_t refill_lock;
+
/* Work struct for config space updates */
struct work_struct config_work;
return p;
}
+static void enable_delayed_refill(struct virtnet_info *vi)
+{
+ spin_lock_bh(&vi->refill_lock);
+ vi->refill_enabled = true;
+ spin_unlock_bh(&vi->refill_lock);
+}
+
+static void disable_delayed_refill(struct virtnet_info *vi)
+{
+ spin_lock_bh(&vi->refill_lock);
+ vi->refill_enabled = false;
+ spin_unlock_bh(&vi->refill_lock);
+}
+
static void virtqueue_napi_schedule(struct napi_struct *napi,
struct virtqueue *vq)
{
}
if (rq->vq->num_free > min((unsigned int)budget, virtqueue_get_vring_size(rq->vq)) / 2) {
- if (!try_fill_recv(vi, rq, GFP_ATOMIC))
- schedule_delayed_work(&vi->refill, 0);
+ if (!try_fill_recv(vi, rq, GFP_ATOMIC)) {
+ spin_lock(&vi->refill_lock);
+ if (vi->refill_enabled)
+ schedule_delayed_work(&vi->refill, 0);
+ spin_unlock(&vi->refill_lock);
+ }
}
u64_stats_update_begin(&rq->stats.syncp);
struct virtnet_info *vi = netdev_priv(dev);
int i, err;
+ enable_delayed_refill(vi);
+
for (i = 0; i < vi->max_queue_pairs; i++) {
if (i < vi->curr_queue_pairs)
/* Make sure we have some buffers: if oom use wq. */
struct virtnet_info *vi = netdev_priv(dev);
int i;
+ /* Make sure NAPI doesn't schedule refill work */
+ disable_delayed_refill(vi);
/* Make sure refill_work doesn't re-enable napi! */
cancel_delayed_work_sync(&vi->refill);
static void virtnet_freeze_down(struct virtio_device *vdev)
{
struct virtnet_info *vi = vdev->priv;
- int i;
/* Make sure no work handler is accessing the device */
flush_work(&vi->config_work);
netif_tx_lock_bh(vi->dev);
netif_device_detach(vi->dev);
netif_tx_unlock_bh(vi->dev);
- cancel_delayed_work_sync(&vi->refill);
-
- if (netif_running(vi->dev)) {
- for (i = 0; i < vi->max_queue_pairs; i++) {
- napi_disable(&vi->rq[i].napi);
- virtnet_napi_tx_disable(&vi->sq[i].napi);
- }
- }
+ if (netif_running(vi->dev))
+ virtnet_close(vi->dev);
}
static int init_vqs(struct virtnet_info *vi);
static int virtnet_restore_up(struct virtio_device *vdev)
{
struct virtnet_info *vi = vdev->priv;
- int err, i;
+ int err;
err = init_vqs(vi);
if (err)
virtio_device_ready(vdev);
- if (netif_running(vi->dev)) {
- for (i = 0; i < vi->curr_queue_pairs; i++)
- if (!try_fill_recv(vi, &vi->rq[i], GFP_KERNEL))
- schedule_delayed_work(&vi->refill, 0);
+ enable_delayed_refill(vi);
- for (i = 0; i < vi->max_queue_pairs; i++) {
- virtnet_napi_enable(vi->rq[i].vq, &vi->rq[i].napi);
- virtnet_napi_tx_enable(vi, vi->sq[i].vq,
- &vi->sq[i].napi);
- }
+ if (netif_running(vi->dev)) {
+ err = virtnet_open(vi->dev);
+ if (err)
+ return err;
}
netif_tx_lock_bh(vi->dev);
vdev->priv = vi;
INIT_WORK(&vi->config_work, virtnet_config_changed_work);
+ spin_lock_init(&vi->refill_lock);
/* If we can receive ANY GSO packets, we must allocate large ones. */
if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) ||
if (vi->has_rss || vi->has_rss_hash_report)
virtnet_init_default_rss(vi);
- err = register_netdev(dev);
+ /* serialize netdev register + virtio_device_ready() with ndo_open() */
+ rtnl_lock();
+
+ err = register_netdevice(dev);
if (err) {
pr_debug("virtio_net: registering device failed\n");
+ rtnl_unlock();
goto free_failover;
}
virtio_device_ready(vdev);
+ rtnl_unlock();
+
err = virtnet_cpu_notif_add(vi);
if (err) {
pr_debug("virtio_net: registering cpu notifier failed\n");
switch (ev->evt_type) {
case WMI_BSS_COLOR_COLLISION_DETECTION:
- ieeee80211_obss_color_collision_notify(arvif->vif, ev->obss_color_bitmap);
+ ieeee80211_obss_color_collision_notify(arvif->vif, ev->obss_color_bitmap,
+ GFP_KERNEL);
ath11k_dbg(ab, ATH11K_DBG_WMI,
"OBSS color collision detected vdev:%d, event:%d, bitmap:%08llx\n",
ev->vdev_id, ev->evt_type, ev->obss_color_bitmap);
if (err)
return err;
+ virtio_device_ready(vdev);
+
err = fill_vq(hwsim_vqs[HWSIM_VQ_RX]);
if (err)
goto out_remove;
const struct wiphy_wowlan_support *wowlan_stub;
const u8 max_sched_scan_ssids;
- /* for 8821c set channel */
- u32 ch_param[3];
-
/* coex paras */
u32 coex_para_ver;
u8 bt_desired_ver;
enum rtw_sar_bands sar_band;
struct rtw_sar sar;
+
+ /* for 8821c set channel */
+ u32 ch_param[3];
};
struct rtw_path_div {
static void rtw8821c_phy_set_param(struct rtw_dev *rtwdev)
{
+ struct rtw_hal *hal = &rtwdev->hal;
u8 crystal_cap, val;
/* power on BB/RF domain */
/* post init after header files config */
rtw_write32_set(rtwdev, REG_RXPSEL, BIT_RX_PSEL_RST);
- rtwdev->chip->ch_param[0] = rtw_read32_mask(rtwdev, REG_TXSF2, MASKDWORD);
- rtwdev->chip->ch_param[1] = rtw_read32_mask(rtwdev, REG_TXSF6, MASKDWORD);
- rtwdev->chip->ch_param[2] = rtw_read32_mask(rtwdev, REG_TXFILTER, MASKDWORD);
+ hal->ch_param[0] = rtw_read32_mask(rtwdev, REG_TXSF2, MASKDWORD);
+ hal->ch_param[1] = rtw_read32_mask(rtwdev, REG_TXSF6, MASKDWORD);
+ hal->ch_param[2] = rtw_read32_mask(rtwdev, REG_TXFILTER, MASKDWORD);
rtw_phy_init(rtwdev);
rtwdev->dm_info.cck_pd_default = rtw_read8(rtwdev, REG_CSRATIO) & 0x1f;
static void rtw8821c_set_channel_bb(struct rtw_dev *rtwdev, u8 channel, u8 bw,
u8 primary_ch_idx)
{
+ struct rtw_hal *hal = &rtwdev->hal;
u32 val32;
if (channel <= 14) {
rtw_write32_mask(rtwdev, REG_TXFILTER, MASKDWORD, 0x00003667);
} else {
rtw_write32_mask(rtwdev, REG_TXSF2, MASKDWORD,
- rtwdev->chip->ch_param[0]);
+ hal->ch_param[0]);
rtw_write32_mask(rtwdev, REG_TXSF6, MASKLWORD,
- rtwdev->chip->ch_param[1] & MASKLWORD);
+ hal->ch_param[1] & MASKLWORD);
rtw_write32_mask(rtwdev, REG_TXFILTER, MASKDWORD,
- rtwdev->chip->ch_param[2]);
+ hal->ch_param[2]);
}
} else if (channel > 35) {
rtw_write32_mask(rtwdev, REG_ENTXCCK, BIT(18), 0x1);
queue->rx_copy.completed = &completed_skbs;
while (xenvif_rx_ring_slots_available(queue) &&
+ !skb_queue_empty(&queue->rx_queue) &&
work_done < RX_BATCH_SIZE) {
xenvif_rx_skb(queue);
work_done++;
MODULE_PARM_DESC(max_queues,
"Maximum number of queues per virtual interface");
+static bool __read_mostly xennet_trusted = true;
+module_param_named(trusted, xennet_trusted, bool, 0644);
+MODULE_PARM_DESC(trusted, "Is the backend trusted");
+
#define XENNET_TIMEOUT (5 * HZ)
static const struct ethtool_ops xennet_ethtool_ops;
/* Is device behaving sane? */
bool broken;
+ /* Should skbs be bounced into a zeroed buffer? */
+ bool bounce;
+
atomic_t rx_gso_checksum_fixup;
};
if (unlikely(!skb))
return NULL;
- page = page_pool_dev_alloc_pages(queue->page_pool);
+ page = page_pool_alloc_pages(queue->page_pool,
+ GFP_ATOMIC | __GFP_NOWARN | __GFP_ZERO);
if (unlikely(!page)) {
kfree_skb(skb);
return NULL;
return nxmit;
}
+struct sk_buff *bounce_skb(const struct sk_buff *skb)
+{
+ unsigned int headerlen = skb_headroom(skb);
+ /* Align size to allocate full pages and avoid contiguous data leaks */
+ unsigned int size = ALIGN(skb_end_offset(skb) + skb->data_len,
+ XEN_PAGE_SIZE);
+ struct sk_buff *n = alloc_skb(size, GFP_ATOMIC | __GFP_ZERO);
+
+ if (!n)
+ return NULL;
+
+ if (!IS_ALIGNED((uintptr_t)n->head, XEN_PAGE_SIZE)) {
+ WARN_ONCE(1, "misaligned skb allocated\n");
+ kfree_skb(n);
+ return NULL;
+ }
+
+ /* Set the data pointer */
+ skb_reserve(n, headerlen);
+ /* Set the tail pointer and length */
+ skb_put(n, skb->len);
+
+ BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len));
+
+ skb_copy_header(n, skb);
+ return n;
+}
#define MAX_XEN_SKB_FRAGS (65536 / XEN_PAGE_SIZE + 1)
/* The first req should be at least ETH_HLEN size or the packet will be
* dropped by netback.
+ *
+ * If the backend is not trusted bounce all data to zeroed pages to
+ * avoid exposing contiguous data on the granted page not belonging to
+ * the skb.
*/
- if (unlikely(PAGE_SIZE - offset < ETH_HLEN)) {
- nskb = skb_copy(skb, GFP_ATOMIC);
+ if (np->bounce || unlikely(PAGE_SIZE - offset < ETH_HLEN)) {
+ nskb = bounce_skb(skb);
if (!nskb)
goto drop;
dev_consume_skb_any(skb);
}
}
rcu_read_unlock();
-next:
+
__skb_queue_tail(list, skb);
+
+next:
if (!(rx->flags & XEN_NETRXF_more_data))
break;
info->netdev->irq = 0;
+ /* Check if backend is trusted. */
+ info->bounce = !xennet_trusted ||
+ !xenbus_read_unsigned(dev->nodename, "trusted", 1);
+
/* Check if backend supports multiple queues */
max_queues = xenbus_read_unsigned(info->xbdev->otherend,
"multi-queue-max-queues", 1);
return err;
if (np->netback_has_xdp_headroom)
pr_info("backend supports XDP headroom\n");
+ if (np->bounce)
+ dev_info(&np->xbdev->dev,
+ "bouncing transmitted data to zeroed pages\n");
/* talk_to_netback() sets the correct number of queues */
num_queues = dev->real_num_tx_queues;
pdata->irq_polarity = IRQF_TRIGGER_RISING;
ret = irq_of_parse_and_map(node, 0);
- if (ret < 0) {
- pr_err("Unable to get irq, error: %d\n", ret);
- return ret;
+ if (!ret) {
+ pr_err("Unable to get irq\n");
+ return -EINVAL;
}
pdata->irq = ret;
}
ret = irq_of_parse_and_map(node, 0);
- if (ret < 0) {
- pr_err("Unable to get irq, error: %d\n", ret);
- return ret;
+ if (!ret) {
+ pr_err("Unable to get irq\n");
+ return -EINVAL;
}
pdata->irq = ret;
int err;
while ((urb = usb_get_from_anchor(&drv_data->deferred))) {
+ usb_anchor_urb(urb, &drv_data->tx_anchor);
+
err = usb_submit_urb(urb, GFP_ATOMIC);
- if (err)
+ if (err) {
+ kfree(urb->setup_packet);
+ usb_unanchor_urb(urb);
+ usb_free_urb(urb);
break;
+ }
drv_data->tx_in_flight++;
+ usb_free_urb(urb);
+ }
+
+ /* Cleanup the rest deferred urbs. */
+ while ((urb = usb_get_from_anchor(&drv_data->deferred))) {
+ kfree(urb->setup_packet);
+ usb_free_urb(urb);
}
- usb_scuttle_anchored_urbs(&drv_data->deferred);
}
static int nfcmrvl_resume(struct usb_interface *intf)
skb_put_data(*skb, &header, NXP_NCI_FW_HDR_LEN);
r = i2c_master_recv(client, skb_put(*skb, frame_len), frame_len);
- if (r != frame_len) {
+ if (r < 0) {
+ goto fw_read_exit_free_skb;
+ } else if (r != frame_len) {
nfc_err(&client->dev,
"Invalid frame length: %u (expected %zu)\n",
r, frame_len);
skb_put_data(*skb, (void *)&header, NCI_CTRL_HDR_SIZE);
+ if (!header.plen)
+ return 0;
+
r = i2c_master_recv(client, skb_put(*skb, header.plen), header.plen);
- if (r != header.plen) {
+ if (r < 0) {
+ goto nci_read_exit_free_skb;
+ } else if (r != header.plen) {
nfc_err(&client->dev,
"Invalid frame payload length: %u (expected %u)\n",
r, header.plen);
int r = 0;
struct device *dev = &hdev->ndev->dev;
struct nfc_evt_transaction *transaction;
+ u32 aid_len;
+ u8 params_len;
pr_debug("connectivity gate event: %x\n", event);
r = nfc_se_connectivity(hdev->ndev, host);
break;
case ST21NFCA_EVT_TRANSACTION:
- /*
- * According to specification etsi 102 622
+ /* According to specification etsi 102 622
* 11.2.2.4 EVT_TRANSACTION Table 52
* Description Tag Length
* AID 81 5 to 16
* PARAMETERS 82 0 to 255
+ *
+ * The key differences are aid storage length is variably sized
+ * in the packet, but fixed in nfc_evt_transaction, and that the aid_len
+ * is u8 in the packet, but u32 in the structure, and the tags in
+ * the packet are not included in nfc_evt_transaction.
+ *
+ * size in bytes: 1 1 5-16 1 1 0-255
+ * offset: 0 1 2 aid_len + 2 aid_len + 3 aid_len + 4
+ * member name: aid_tag(M) aid_len aid params_tag(M) params_len params
+ * example: 0x81 5-16 X 0x82 0-255 X
*/
- if (skb->len < NFC_MIN_AID_LENGTH + 2 &&
- skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG)
+ if (skb->len < 2 || skb->data[0] != NFC_EVT_TRANSACTION_AID_TAG)
return -EPROTO;
- transaction = devm_kzalloc(dev, skb->len - 2, GFP_KERNEL);
- if (!transaction)
- return -ENOMEM;
-
- transaction->aid_len = skb->data[1];
+ aid_len = skb->data[1];
- /* Checking if the length of the AID is valid */
- if (transaction->aid_len > sizeof(transaction->aid))
- return -EINVAL;
+ if (skb->len < aid_len + 4 || aid_len > sizeof(transaction->aid))
+ return -EPROTO;
- memcpy(transaction->aid, &skb->data[2],
- transaction->aid_len);
+ params_len = skb->data[aid_len + 3];
- /* Check next byte is PARAMETERS tag (82) */
- if (skb->data[transaction->aid_len + 2] !=
- NFC_EVT_TRANSACTION_PARAMS_TAG)
+ /* Verify PARAMETERS tag is (82), and final check that there is enough
+ * space in the packet to read everything.
+ */
+ if ((skb->data[aid_len + 2] != NFC_EVT_TRANSACTION_PARAMS_TAG) ||
+ (skb->len < aid_len + 4 + params_len))
return -EPROTO;
- transaction->params_len = skb->data[transaction->aid_len + 3];
+ transaction = devm_kzalloc(dev, sizeof(*transaction) + params_len, GFP_KERNEL);
+ if (!transaction)
+ return -ENOMEM;
- /* Total size is allocated (skb->len - 2) minus fixed array members */
- if (transaction->params_len > ((skb->len - 2) - sizeof(struct nfc_evt_transaction)))
- return -EINVAL;
+ transaction->aid_len = aid_len;
+ transaction->params_len = params_len;
- memcpy(transaction->params, skb->data +
- transaction->aid_len + 4, transaction->params_len);
+ memcpy(transaction->aid, &skb->data[2], aid_len);
+ memcpy(transaction->params, &skb->data[aid_len + 4], params_len);
r = nfc_se_transaction(hdev->ndev, host, transaction);
break;
ndr_end = nd_region->ndr_start + nd_region->ndr_size - 1;
/* make sure we are in the region */
- if (ctx->phys < nd_region->ndr_start
- || (ctx->phys + ctx->cleared) > ndr_end)
+ if (ctx->phys < nd_region->ndr_start ||
+ (ctx->phys + ctx->cleared - 1) > ndr_end)
return 0;
sector = (ctx->phys - nd_region->ndr_start) / 512;
.vid = 0x1e0f,
.mn = "KCD6XVUL6T40",
.quirks = NVME_QUIRK_NO_APST,
+ },
+ {
+ /*
+ * The external Samsung X5 SSD fails initialization without a
+ * delay before checking if it is ready and has a whole set of
+ * other problems. To make this even more interesting, it
+ * shares the PCI ID with internal Samsung 970 Evo Plus that
+ * does not need or want these quirks.
+ */
+ .vid = 0x144d,
+ .mn = "Samsung Portable SSD X5",
+ .quirks = NVME_QUIRK_DELAY_BEFORE_CHK_RDY |
+ NVME_QUIRK_NO_DEEPEST_PS |
+ NVME_QUIRK_IGNORE_DEV_SUBNQN,
}
};
* we have no UUID set
*/
if (uuid_is_null(&ids->uuid)) {
- printk_ratelimited(KERN_WARNING
- "No UUID available providing old NGUID\n");
+ dev_warn_ratelimited(dev,
+ "No UUID available providing old NGUID\n");
return sysfs_emit(buf, "%pU\n", ids->nguid);
}
return sysfs_emit(buf, "%pU\n", &ids->uuid);
}
static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl,
- unsigned nsid, struct nvme_ns_ids *ids)
+ unsigned nsid, struct nvme_ns_ids *ids, bool is_shared)
{
struct nvme_ns_head *head;
size_t size = sizeof(*head);
head->subsys = ctrl->subsys;
head->ns_id = nsid;
head->ids = *ids;
+ head->shared = is_shared;
kref_init(&head->ref);
if (head->ids.csi) {
if (ret) {
dev_err(ctrl->device,
"globally duplicate IDs for nsid %d\n", nsid);
+ nvme_print_device_info(ctrl);
return ret;
}
nsid);
goto out_unlock;
}
- head = nvme_alloc_ns_head(ctrl, nsid, ids);
+ head = nvme_alloc_ns_head(ctrl, nsid, ids, is_shared);
if (IS_ERR(head)) {
ret = PTR_ERR(head);
goto out_unlock;
}
- head->shared = is_shared;
} else {
ret = -EINVAL;
if (!is_shared || !head->shared) {
nvme_stop_failfast_work(ctrl);
flush_work(&ctrl->async_event_work);
cancel_work_sync(&ctrl->fw_act_work);
+ if (ctrl->ops->stop_ctrl)
+ ctrl->ops->stop_ctrl(ctrl);
}
EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
void (*free_ctrl)(struct nvme_ctrl *ctrl);
void (*submit_async_event)(struct nvme_ctrl *ctrl);
void (*delete_ctrl)(struct nvme_ctrl *ctrl);
+ void (*stop_ctrl)(struct nvme_ctrl *ctrl);
int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
+ void (*print_device_info)(struct nvme_ctrl *ctrl);
};
/*
return blk_mq_tag_to_rq(tags, nvme_tag_from_cid(command_id));
}
+/*
+ * Return the length of the string without the space padding
+ */
+static inline int nvme_strlen(char *s, int len)
+{
+ while (s[len - 1] == ' ')
+ len--;
+ return len;
+}
+
+static inline void nvme_print_device_info(struct nvme_ctrl *ctrl)
+{
+ struct nvme_subsystem *subsys = ctrl->subsys;
+
+ if (ctrl->ops->print_device_info) {
+ ctrl->ops->print_device_info(ctrl);
+ return;
+ }
+
+ dev_err(ctrl->device,
+ "VID:%04x model:%.*s firmware:%.*s\n", subsys->vendor_id,
+ nvme_strlen(subsys->model, sizeof(subsys->model)),
+ subsys->model, nvme_strlen(subsys->firmware_rev,
+ sizeof(subsys->firmware_rev)),
+ subsys->firmware_rev);
+}
+
#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
void nvme_fault_inject_init(struct nvme_fault_inject *fault_inj,
const char *dev_name);
dev_warn(dev->ctrl.device,
"controller is down; will reset: CSTS=0x%x, PCI_STATUS read failed (%d)\n",
csts, result);
+
+ if (csts != ~0)
+ return;
+
+ dev_warn(dev->ctrl.device,
+ "Does your device have a faulty power saving mode enabled?\n");
+ dev_warn(dev->ctrl.device,
+ "Try \"nvme_core.default_ps_max_latency_us=0 pcie_aspm=off\" and report a bug\n");
}
static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
struct pci_dev *pdev = to_pci_dev(dev->dev);
mutex_lock(&dev->shutdown_lock);
- if (pci_device_is_present(pdev) && pci_is_enabled(pdev)) {
- u32 csts = readl(dev->bar + NVME_REG_CSTS);
+ if (pci_is_enabled(pdev)) {
+ u32 csts;
+
+ if (pci_device_is_present(pdev))
+ csts = readl(dev->bar + NVME_REG_CSTS);
+ else
+ csts = ~0;
if (dev->ctrl.state == NVME_CTRL_LIVE ||
dev->ctrl.state == NVME_CTRL_RESETTING) {
return snprintf(buf, size, "%s\n", dev_name(&pdev->dev));
}
+
+static void nvme_pci_print_device_info(struct nvme_ctrl *ctrl)
+{
+ struct pci_dev *pdev = to_pci_dev(to_nvme_dev(ctrl)->dev);
+ struct nvme_subsystem *subsys = ctrl->subsys;
+
+ dev_err(ctrl->device,
+ "VID:DID %04x:%04x model:%.*s firmware:%.*s\n",
+ pdev->vendor, pdev->device,
+ nvme_strlen(subsys->model, sizeof(subsys->model)),
+ subsys->model, nvme_strlen(subsys->firmware_rev,
+ sizeof(subsys->firmware_rev)),
+ subsys->firmware_rev);
+}
+
static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.name = "pcie",
.module = THIS_MODULE,
.free_ctrl = nvme_pci_free_ctrl,
.submit_async_event = nvme_pci_submit_async_event,
.get_address = nvme_pci_get_address,
+ .print_device_info = nvme_pci_print_device_info,
};
static int nvme_dev_map(struct nvme_dev *dev)
{ PCI_VDEVICE(REDHAT, 0x0010), /* Qemu emulated controller */
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x126f, 0x2263), /* Silicon Motion unidentified */
- .driver_data = NVME_QUIRK_NO_NS_DESC_LIST, },
+ .driver_data = NVME_QUIRK_NO_NS_DESC_LIST |
+ NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY |
NVME_QUIRK_NO_NS_DESC_LIST, },
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY |
NVME_QUIRK_DISABLE_WRITE_ZEROES|
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
+ { PCI_DEVICE(0x1987, 0x5012), /* Phison E12 */
+ .driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */
- .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
+ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN |
+ NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1b4b, 0x1092), /* Lexar 256 GB SSD */
.driver_data = NVME_QUIRK_NO_NS_DESC_LIST |
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
+ { PCI_DEVICE(0x1cc1, 0x33f8), /* ADATA IM2P33F8ABR1 1 TB */
+ .driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x10ec, 0x5762), /* ADATA SX6000LNP */
- .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
+ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN |
+ NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1cc1, 0x8201), /* ADATA SX8200PNP 512GB */
.driver_data = NVME_QUIRK_NO_DEEPEST_PS |
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
+ { PCI_DEVICE(0x1344, 0x5407), /* Micron Technology Inc NVMe SSD */
+ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN },
{ PCI_DEVICE(0x1c5c, 0x1504), /* SK Hynix PC400 */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+ { PCI_DEVICE(0x1c5c, 0x174a), /* SK Hynix P31 SSD */
+ .driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x15b7, 0x2001), /* Sandisk Skyhawk */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x1d97, 0x2263), /* SPCC */
.driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+ { PCI_DEVICE(0x144d, 0xa80b), /* Samsung PM9B1 256G and 512G */
+ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+ { PCI_DEVICE(0x144d, 0xa809), /* Samsung MZALQ256HBJD 256G */
+ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+ { PCI_DEVICE(0x1cc4, 0x6303), /* UMIS RPJTJ512MGE1QDY 512G */
+ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+ { PCI_DEVICE(0x1cc4, 0x6302), /* UMIS RPJTJ256MGE1QDY 256G */
+ .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
{ PCI_DEVICE(0x2646, 0x2262), /* KINGSTON SKC2000 NVMe SSD */
.driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
{ PCI_DEVICE(0x2646, 0x2263), /* KINGSTON A2000 NVMe SSD */
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1e4B, 0x1202), /* MAXIO MAP1202 */
.driver_data = NVME_QUIRK_BOGUS_NID, },
+ { PCI_DEVICE(0x1cc1, 0x5350), /* ADATA XPG GAMMIX S50 */
+ .driver_data = NVME_QUIRK_BOGUS_NID, },
+ { PCI_DEVICE(0x1e49, 0x0041), /* ZHITAI TiPro7000 NVMe SSD */
+ .driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
+ { PCI_DEVICE(0xc0a9, 0x540a), /* Crucial P2 */
+ .driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061),
.driver_data = NVME_QUIRK_DMA_ADDRESS_BITS_48, },
{ PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0065),
NVME_QUIRK_128_BYTES_SQES |
NVME_QUIRK_SHARED_TAGS |
NVME_QUIRK_SKIP_CID_GEN },
- { PCI_DEVICE(0x144d, 0xa808), /* Samsung X5 */
- .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY|
- NVME_QUIRK_NO_DEEPEST_PS |
- NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) },
{ 0, }
};
}
}
+static void nvme_rdma_stop_ctrl(struct nvme_ctrl *nctrl)
+{
+ struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
+
+ cancel_work_sync(&ctrl->err_work);
+ cancel_delayed_work_sync(&ctrl->reconnect_work);
+}
+
static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl)
{
struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl);
static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown)
{
- cancel_work_sync(&ctrl->err_work);
- cancel_delayed_work_sync(&ctrl->reconnect_work);
-
nvme_rdma_teardown_io_queues(ctrl, shutdown);
nvme_stop_admin_queue(&ctrl->ctrl);
if (shutdown)
.submit_async_event = nvme_rdma_submit_async_event,
.delete_ctrl = nvme_rdma_delete_ctrl,
.get_address = nvmf_get_address,
+ .stop_ctrl = nvme_rdma_stop_ctrl,
};
/*
} else if (ret < 0) {
dev_err(queue->ctrl->ctrl.device,
"failed to send request %d\n", ret);
- if (ret != -EPIPE && ret != -ECONNRESET)
- nvme_tcp_fail_request(queue->request);
+ nvme_tcp_fail_request(queue->request);
nvme_tcp_done_send_req(queue);
}
return ret;
static void nvme_tcp_teardown_ctrl(struct nvme_ctrl *ctrl, bool shutdown)
{
- cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work);
- cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work);
-
nvme_tcp_teardown_io_queues(ctrl, shutdown);
nvme_stop_admin_queue(ctrl);
if (shutdown)
nvme_tcp_reconnect_or_remove(ctrl);
}
+static void nvme_tcp_stop_ctrl(struct nvme_ctrl *ctrl)
+{
+ cancel_work_sync(&to_tcp_ctrl(ctrl)->err_work);
+ cancel_delayed_work_sync(&to_tcp_ctrl(ctrl)->connect_work);
+}
+
static void nvme_tcp_free_ctrl(struct nvme_ctrl *nctrl)
{
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
.submit_async_event = nvme_tcp_submit_async_event,
.delete_ctrl = nvme_tcp_delete_ctrl,
.get_address = nvmf_get_address,
+ .stop_ctrl = nvme_tcp_stop_ctrl,
};
static bool
__entry->metadata = !!blk_integrity_rq(req);
__entry->fctype = cmd->fabrics.fctype;
__assign_disk_name(__entry->disk, req->q->disk);
- memcpy(__entry->cdw10, &cmd->common.cdw10,
+ memcpy(__entry->cdw10, &cmd->common.cdws,
sizeof(__entry->cdw10));
),
TP_printk("nvme%d: %sqid=%d, cmdid=%u, nsid=%u, flags=0x%x, meta=0x%x, cmd=(%s %s)",
}
CONFIGFS_ATTR(nvmet_passthru_, io_timeout);
+static ssize_t nvmet_passthru_clear_ids_show(struct config_item *item,
+ char *page)
+{
+ return sprintf(page, "%u\n", to_subsys(item->ci_parent)->clear_ids);
+}
+
+static ssize_t nvmet_passthru_clear_ids_store(struct config_item *item,
+ const char *page, size_t count)
+{
+ struct nvmet_subsys *subsys = to_subsys(item->ci_parent);
+ unsigned int clear_ids;
+
+ if (kstrtouint(page, 0, &clear_ids))
+ return -EINVAL;
+ subsys->clear_ids = clear_ids;
+ return count;
+}
+CONFIGFS_ATTR(nvmet_passthru_, clear_ids);
+
static struct configfs_attribute *nvmet_passthru_attrs[] = {
&nvmet_passthru_attr_device_path,
&nvmet_passthru_attr_enable,
&nvmet_passthru_attr_admin_timeout,
&nvmet_passthru_attr_io_timeout,
+ &nvmet_passthru_attr_clear_ids,
NULL,
};
ctrl->port = req->port;
ctrl->ops = req->ops;
+#ifdef CONFIG_NVME_TARGET_PASSTHRU
+ /* By default, set loop targets to clear IDS by default */
+ if (ctrl->port->disc_addr.trtype == NVMF_TRTYPE_LOOP)
+ subsys->clear_ids = 1;
+#endif
+
INIT_WORK(&ctrl->async_event_work, nvmet_async_event_work);
INIT_LIST_HEAD(&ctrl->async_events);
INIT_RADIX_TREE(&ctrl->p2p_ns_map, GFP_KERNEL);
struct config_group passthru_group;
unsigned int admin_timeout;
unsigned int io_timeout;
+ unsigned int clear_ids;
#endif /* CONFIG_NVME_TARGET_PASSTHRU */
#ifdef CONFIG_BLK_DEV_ZONED
ctrl->cap &= ~(1ULL << 43);
}
+static u16 nvmet_passthru_override_id_descs(struct nvmet_req *req)
+{
+ struct nvmet_ctrl *ctrl = req->sq->ctrl;
+ u16 status = NVME_SC_SUCCESS;
+ int pos, len;
+ bool csi_seen = false;
+ void *data;
+ u8 csi;
+
+ if (!ctrl->subsys->clear_ids)
+ return status;
+
+ data = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);
+ if (!data)
+ return NVME_SC_INTERNAL;
+
+ status = nvmet_copy_from_sgl(req, 0, data, NVME_IDENTIFY_DATA_SIZE);
+ if (status)
+ goto out_free;
+
+ for (pos = 0; pos < NVME_IDENTIFY_DATA_SIZE; pos += len) {
+ struct nvme_ns_id_desc *cur = data + pos;
+
+ if (cur->nidl == 0)
+ break;
+ if (cur->nidt == NVME_NIDT_CSI) {
+ memcpy(&csi, cur + 1, NVME_NIDT_CSI_LEN);
+ csi_seen = true;
+ break;
+ }
+ len = sizeof(struct nvme_ns_id_desc) + cur->nidl;
+ }
+
+ memset(data, 0, NVME_IDENTIFY_DATA_SIZE);
+ if (csi_seen) {
+ struct nvme_ns_id_desc *cur = data;
+
+ cur->nidt = NVME_NIDT_CSI;
+ cur->nidl = NVME_NIDT_CSI_LEN;
+ memcpy(cur + 1, &csi, NVME_NIDT_CSI_LEN);
+ }
+ status = nvmet_copy_to_sgl(req, 0, data, NVME_IDENTIFY_DATA_SIZE);
+out_free:
+ kfree(data);
+ return status;
+}
+
static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
{
struct nvmet_ctrl *ctrl = req->sq->ctrl;
*/
id->mc = 0;
+ if (req->sq->ctrl->subsys->clear_ids) {
+ memset(id->nguid, 0, NVME_NIDT_NGUID_LEN);
+ memset(id->eui64, 0, NVME_NIDT_EUI64_LEN);
+ }
+
status = nvmet_copy_to_sgl(req, 0, id, sizeof(*id));
out_free:
case NVME_ID_CNS_NS:
nvmet_passthru_override_id_ns(req);
break;
+ case NVME_ID_CNS_NS_DESC_LIST:
+ nvmet_passthru_override_id_descs(req);
+ break;
}
} else if (status < 0)
status = NVME_SC_INTERNAL;
return NVME_SC_INTERNAL;
}
-static void nvmet_tcp_send_ddgst(struct ahash_request *hash,
+static void nvmet_tcp_calc_ddgst(struct ahash_request *hash,
struct nvmet_tcp_cmd *cmd)
{
ahash_request_set_crypt(hash, cmd->req.sg,
crypto_ahash_digest(hash);
}
-static void nvmet_tcp_recv_ddgst(struct ahash_request *hash,
- struct nvmet_tcp_cmd *cmd)
-{
- struct scatterlist sg;
- struct kvec *iov;
- int i;
-
- crypto_ahash_init(hash);
- for (i = 0, iov = cmd->iov; i < cmd->nr_mapped; i++, iov++) {
- sg_init_one(&sg, iov->iov_base, iov->iov_len);
- ahash_request_set_crypt(hash, &sg, NULL, iov->iov_len);
- crypto_ahash_update(hash);
- }
- ahash_request_set_crypt(hash, NULL, (void *)&cmd->exp_ddgst, 0);
- crypto_ahash_final(hash);
-}
-
static void nvmet_setup_c2h_data_pdu(struct nvmet_tcp_cmd *cmd)
{
struct nvme_tcp_data_pdu *pdu = cmd->data_pdu;
if (queue->data_digest) {
pdu->hdr.flags |= NVME_TCP_F_DDGST;
- nvmet_tcp_send_ddgst(queue->snd_hash, cmd);
+ nvmet_tcp_calc_ddgst(queue->snd_hash, cmd);
}
if (cmd->queue->hdr_digest) {
{
struct nvmet_tcp_queue *queue = cmd->queue;
- nvmet_tcp_recv_ddgst(queue->rcv_hash, cmd);
+ nvmet_tcp_calc_ddgst(queue->rcv_hash, cmd);
queue->offset = 0;
queue->left = NVME_TCP_DIGEST_LENGTH;
queue->rcv_state = NVMET_TCP_RECV_DDGST;
Say Y here to add some extra checks and diagnostics to PINCTRL calls.
config PINCTRL_AMD
- tristate "AMD GPIO pin control"
+ bool "AMD GPIO pin control"
depends on HAS_IOMEM
depends on ACPI || COMPILE_TEST
select GPIOLIB
const struct aspeed_sig_expr **funcs;
const struct aspeed_sig_expr ***prios;
- pr_debug("Muxing pin %s for %s\n", pdesc->name, pfunc->name);
-
if (!pdesc)
return -EINVAL;
+ pr_debug("Muxing pin %s for %s\n", pdesc->name, pfunc->name);
+
prios = pdesc->prios;
if (!prios)
static const struct imx_pinctrl_soc_info imx93_pinctrl_info = {
.pins = imx93_pinctrl_pads,
.npins = ARRAY_SIZE(imx93_pinctrl_pads),
+ .flags = ZERO_OFFSET_VALID,
.gpr_compatible = "fsl,imx93-iomuxc-gpr",
};
struct device *dev;
struct gpio_chip gpio_chip;
struct irq_chip irq_chip;
- spinlock_t irq_lock;
+ raw_spinlock_t irq_lock;
struct pinctrl_desc pctl;
struct pinctrl_dev *pctl_dev;
struct armada_37xx_pin_group *groups;
unsigned long flags;
armada_37xx_irq_update_reg(®, d);
- spin_lock_irqsave(&info->irq_lock, flags);
+ raw_spin_lock_irqsave(&info->irq_lock, flags);
writel(d->mask, info->base + reg);
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
}
static void armada_37xx_irq_mask(struct irq_data *d)
unsigned long flags;
armada_37xx_irq_update_reg(®, d);
- spin_lock_irqsave(&info->irq_lock, flags);
+ raw_spin_lock_irqsave(&info->irq_lock, flags);
val = readl(info->base + reg);
writel(val & ~d->mask, info->base + reg);
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
}
static void armada_37xx_irq_unmask(struct irq_data *d)
unsigned long flags;
armada_37xx_irq_update_reg(®, d);
- spin_lock_irqsave(&info->irq_lock, flags);
+ raw_spin_lock_irqsave(&info->irq_lock, flags);
val = readl(info->base + reg);
writel(val | d->mask, info->base + reg);
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
}
static int armada_37xx_irq_set_wake(struct irq_data *d, unsigned int on)
unsigned long flags;
armada_37xx_irq_update_reg(®, d);
- spin_lock_irqsave(&info->irq_lock, flags);
+ raw_spin_lock_irqsave(&info->irq_lock, flags);
val = readl(info->base + reg);
if (on)
val |= (BIT(d->hwirq % GPIO_PER_REG));
else
val &= ~(BIT(d->hwirq % GPIO_PER_REG));
writel(val, info->base + reg);
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
return 0;
}
u32 val, reg = IRQ_POL;
unsigned long flags;
- spin_lock_irqsave(&info->irq_lock, flags);
+ raw_spin_lock_irqsave(&info->irq_lock, flags);
armada_37xx_irq_update_reg(®, d);
val = readl(info->base + reg);
switch (type) {
break;
}
default:
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
return -EINVAL;
}
writel(val, info->base + reg);
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
return 0;
}
regmap_read(info->regmap, INPUT_VAL + 4*reg_idx, &l);
- spin_lock_irqsave(&info->irq_lock, flags);
+ raw_spin_lock_irqsave(&info->irq_lock, flags);
p = readl(info->base + IRQ_POL + 4 * reg_idx);
if ((p ^ l) & (1 << bit_num)) {
/*
ret = -1;
}
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
return ret;
}
u32 status;
unsigned long flags;
- spin_lock_irqsave(&info->irq_lock, flags);
+ raw_spin_lock_irqsave(&info->irq_lock, flags);
status = readl_relaxed(info->base + IRQ_STATUS + 4 * i);
/* Manage only the interrupt that was enabled */
status &= readl_relaxed(info->base + IRQ_EN + 4 * i);
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
while (status) {
u32 hwirq = ffs(status) - 1;
u32 virq = irq_find_mapping(d, hwirq +
update_status:
/* Update status in case a new IRQ appears */
- spin_lock_irqsave(&info->irq_lock, flags);
+ raw_spin_lock_irqsave(&info->irq_lock, flags);
status = readl_relaxed(info->base +
IRQ_STATUS + 4 * i);
/* Manage only the interrupt that was enabled */
status &= readl_relaxed(info->base + IRQ_EN + 4 * i);
- spin_unlock_irqrestore(&info->irq_lock, flags);
+ raw_spin_unlock_irqrestore(&info->irq_lock, flags);
}
}
chained_irq_exit(chip, desc);
struct device *dev = &pdev->dev;
unsigned int i, nr_irq_parent;
- spin_lock_init(&info->irq_lock);
+ raw_spin_lock_init(&info->irq_lock);
nr_irq_parent = of_irq_count(np);
if (!nr_irq_parent) {
{ },
};
+static const struct regmap_config armada_37xx_pinctrl_regmap_config = {
+ .reg_bits = 32,
+ .val_bits = 32,
+ .reg_stride = 4,
+ .use_raw_spinlock = true,
+};
+
static int __init armada_37xx_pinctrl_probe(struct platform_device *pdev)
{
struct armada_37xx_pinctrl *info;
struct device *dev = &pdev->dev;
- struct device_node *np = dev->of_node;
struct regmap *regmap;
+ void __iomem *base;
int ret;
+ base = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
+ if (IS_ERR(base)) {
+ dev_err(dev, "failed to ioremap base address: %pe\n", base);
+ return PTR_ERR(base);
+ }
+
+ regmap = devm_regmap_init_mmio(dev, base,
+ &armada_37xx_pinctrl_regmap_config);
+ if (IS_ERR(regmap)) {
+ dev_err(dev, "failed to create regmap: %pe\n", regmap);
+ return PTR_ERR(regmap);
+ }
+
info = devm_kzalloc(dev, sizeof(*info), GFP_KERNEL);
if (!info)
return -ENOMEM;
info->dev = dev;
-
- regmap = syscon_node_to_regmap(np);
- if (IS_ERR(regmap))
- return dev_err_probe(dev, PTR_ERR(regmap), "cannot get regmap\n");
info->regmap = regmap;
-
info->data = of_device_get_match_data(dev);
ret = armada_37xx_pinctrl_register(pdev, info);
#define ocelot_clrsetbits(addr, clear, set) \
writel((readl(addr) & ~(clear)) | (set), (addr))
-/* PINCONFIG bits (sparx5 only) */
enum {
PINCONF_BIAS,
PINCONF_SCHMITT,
PINCONF_DRIVE_STRENGTH,
};
-#define BIAS_PD_BIT BIT(4)
-#define BIAS_PU_BIT BIT(3)
-#define BIAS_BITS (BIAS_PD_BIT|BIAS_PU_BIT)
-#define SCHMITT_BIT BIT(2)
-#define DRIVE_BITS GENMASK(1, 0)
-
/* GPIO standard registers */
#define OCELOT_GPIO_OUT_SET 0x0
#define OCELOT_GPIO_OUT_CLR 0x4
unsigned char a_functions[OCELOT_FUNC_PER_PIN]; /* Additional functions */
};
+struct ocelot_pincfg_data {
+ u8 pd_bit;
+ u8 pu_bit;
+ u8 drive_bits;
+ u8 schmitt_bit;
+};
+
struct ocelot_pinctrl {
struct device *dev;
struct pinctrl_dev *pctl;
struct regmap *map;
struct regmap *pincfg;
struct pinctrl_desc *desc;
+ const struct ocelot_pincfg_data *pincfg_data;
struct ocelot_pmx_func func[FUNC_MAX];
u8 stride;
};
+struct ocelot_match_data {
+ struct pinctrl_desc desc;
+ struct ocelot_pincfg_data pincfg_data;
+};
+
#define LUTON_P(p, f0, f1) \
static struct ocelot_pin_caps luton_pin_##p = { \
.pin = p, \
int ret = -EOPNOTSUPP;
if (info->pincfg) {
+ const struct ocelot_pincfg_data *opd = info->pincfg_data;
u32 regcfg;
- ret = regmap_read(info->pincfg, pin, ®cfg);
+ ret = regmap_read(info->pincfg,
+ pin * regmap_get_reg_stride(info->pincfg),
+ ®cfg);
if (ret)
return ret;
ret = 0;
switch (reg) {
case PINCONF_BIAS:
- *val = regcfg & BIAS_BITS;
+ *val = regcfg & (opd->pd_bit | opd->pu_bit);
break;
case PINCONF_SCHMITT:
- *val = regcfg & SCHMITT_BIT;
+ *val = regcfg & opd->schmitt_bit;
break;
case PINCONF_DRIVE_STRENGTH:
- *val = regcfg & DRIVE_BITS;
+ *val = regcfg & opd->drive_bits;
break;
default:
u32 val;
int ret;
- ret = regmap_read(info->pincfg, regaddr, &val);
+ ret = regmap_read(info->pincfg,
+ regaddr * regmap_get_reg_stride(info->pincfg),
+ &val);
if (ret)
return ret;
val &= ~clrbits;
val |= setbits;
- ret = regmap_write(info->pincfg, regaddr, val);
+ ret = regmap_write(info->pincfg,
+ regaddr * regmap_get_reg_stride(info->pincfg),
+ val);
return ret;
}
int ret = -EOPNOTSUPP;
if (info->pincfg) {
+ const struct ocelot_pincfg_data *opd = info->pincfg_data;
ret = 0;
switch (reg) {
case PINCONF_BIAS:
- ret = ocelot_pincfg_clrsetbits(info, pin, BIAS_BITS,
+ ret = ocelot_pincfg_clrsetbits(info, pin,
+ opd->pd_bit | opd->pu_bit,
val);
break;
case PINCONF_SCHMITT:
- ret = ocelot_pincfg_clrsetbits(info, pin, SCHMITT_BIT,
+ ret = ocelot_pincfg_clrsetbits(info, pin,
+ opd->schmitt_bit,
val);
break;
case PINCONF_DRIVE_STRENGTH:
if (val <= 3)
ret = ocelot_pincfg_clrsetbits(info, pin,
- DRIVE_BITS, val);
+ opd->drive_bits,
+ val);
else
ret = -EINVAL;
break;
if (param == PIN_CONFIG_BIAS_DISABLE)
val = (val == 0);
else if (param == PIN_CONFIG_BIAS_PULL_DOWN)
- val = (val & BIAS_PD_BIT ? true : false);
+ val = !!(val & info->pincfg_data->pd_bit);
else /* PIN_CONFIG_BIAS_PULL_UP */
- val = (val & BIAS_PU_BIT ? true : false);
+ val = !!(val & info->pincfg_data->pu_bit);
break;
case PIN_CONFIG_INPUT_SCHMITT_ENABLE:
+ if (!info->pincfg_data->schmitt_bit)
+ return -EOPNOTSUPP;
+
err = ocelot_hw_get_value(info, pin, PINCONF_SCHMITT, &val);
if (err)
return err;
- val = (val & SCHMITT_BIT ? true : false);
+ val = !!(val & info->pincfg_data->schmitt_bit);
break;
case PIN_CONFIG_DRIVE_STRENGTH:
unsigned long *configs, unsigned int num_configs)
{
struct ocelot_pinctrl *info = pinctrl_dev_get_drvdata(pctldev);
+ const struct ocelot_pincfg_data *opd = info->pincfg_data;
u32 param, arg, p;
int cfg, err = 0;
case PIN_CONFIG_BIAS_PULL_UP:
case PIN_CONFIG_BIAS_PULL_DOWN:
arg = (param == PIN_CONFIG_BIAS_DISABLE) ? 0 :
- (param == PIN_CONFIG_BIAS_PULL_UP) ? BIAS_PU_BIT :
- BIAS_PD_BIT;
+ (param == PIN_CONFIG_BIAS_PULL_UP) ?
+ opd->pu_bit : opd->pd_bit;
err = ocelot_hw_set_value(info, pin, PINCONF_BIAS, arg);
if (err)
break;
case PIN_CONFIG_INPUT_SCHMITT_ENABLE:
- arg = arg ? SCHMITT_BIT : 0;
+ if (!opd->schmitt_bit)
+ return -EOPNOTSUPP;
+
+ arg = arg ? opd->schmitt_bit : 0;
err = ocelot_hw_set_value(info, pin, PINCONF_SCHMITT,
arg);
if (err)
.dt_free_map = pinconf_generic_dt_free_map,
};
-static struct pinctrl_desc luton_desc = {
- .name = "luton-pinctrl",
- .pins = luton_pins,
- .npins = ARRAY_SIZE(luton_pins),
- .pctlops = &ocelot_pctl_ops,
- .pmxops = &ocelot_pmx_ops,
- .owner = THIS_MODULE,
+static struct ocelot_match_data luton_desc = {
+ .desc = {
+ .name = "luton-pinctrl",
+ .pins = luton_pins,
+ .npins = ARRAY_SIZE(luton_pins),
+ .pctlops = &ocelot_pctl_ops,
+ .pmxops = &ocelot_pmx_ops,
+ .owner = THIS_MODULE,
+ },
};
-static struct pinctrl_desc serval_desc = {
- .name = "serval-pinctrl",
- .pins = serval_pins,
- .npins = ARRAY_SIZE(serval_pins),
- .pctlops = &ocelot_pctl_ops,
- .pmxops = &ocelot_pmx_ops,
- .owner = THIS_MODULE,
+static struct ocelot_match_data serval_desc = {
+ .desc = {
+ .name = "serval-pinctrl",
+ .pins = serval_pins,
+ .npins = ARRAY_SIZE(serval_pins),
+ .pctlops = &ocelot_pctl_ops,
+ .pmxops = &ocelot_pmx_ops,
+ .owner = THIS_MODULE,
+ },
};
-static struct pinctrl_desc ocelot_desc = {
- .name = "ocelot-pinctrl",
- .pins = ocelot_pins,
- .npins = ARRAY_SIZE(ocelot_pins),
- .pctlops = &ocelot_pctl_ops,
- .pmxops = &ocelot_pmx_ops,
- .owner = THIS_MODULE,
+static struct ocelot_match_data ocelot_desc = {
+ .desc = {
+ .name = "ocelot-pinctrl",
+ .pins = ocelot_pins,
+ .npins = ARRAY_SIZE(ocelot_pins),
+ .pctlops = &ocelot_pctl_ops,
+ .pmxops = &ocelot_pmx_ops,
+ .owner = THIS_MODULE,
+ },
};
-static struct pinctrl_desc jaguar2_desc = {
- .name = "jaguar2-pinctrl",
- .pins = jaguar2_pins,
- .npins = ARRAY_SIZE(jaguar2_pins),
- .pctlops = &ocelot_pctl_ops,
- .pmxops = &ocelot_pmx_ops,
- .owner = THIS_MODULE,
+static struct ocelot_match_data jaguar2_desc = {
+ .desc = {
+ .name = "jaguar2-pinctrl",
+ .pins = jaguar2_pins,
+ .npins = ARRAY_SIZE(jaguar2_pins),
+ .pctlops = &ocelot_pctl_ops,
+ .pmxops = &ocelot_pmx_ops,
+ .owner = THIS_MODULE,
+ },
};
-static struct pinctrl_desc servalt_desc = {
- .name = "servalt-pinctrl",
- .pins = servalt_pins,
- .npins = ARRAY_SIZE(servalt_pins),
- .pctlops = &ocelot_pctl_ops,
- .pmxops = &ocelot_pmx_ops,
- .owner = THIS_MODULE,
+static struct ocelot_match_data servalt_desc = {
+ .desc = {
+ .name = "servalt-pinctrl",
+ .pins = servalt_pins,
+ .npins = ARRAY_SIZE(servalt_pins),
+ .pctlops = &ocelot_pctl_ops,
+ .pmxops = &ocelot_pmx_ops,
+ .owner = THIS_MODULE,
+ },
};
-static struct pinctrl_desc sparx5_desc = {
- .name = "sparx5-pinctrl",
- .pins = sparx5_pins,
- .npins = ARRAY_SIZE(sparx5_pins),
- .pctlops = &ocelot_pctl_ops,
- .pmxops = &ocelot_pmx_ops,
- .confops = &ocelot_confops,
- .owner = THIS_MODULE,
+static struct ocelot_match_data sparx5_desc = {
+ .desc = {
+ .name = "sparx5-pinctrl",
+ .pins = sparx5_pins,
+ .npins = ARRAY_SIZE(sparx5_pins),
+ .pctlops = &ocelot_pctl_ops,
+ .pmxops = &ocelot_pmx_ops,
+ .confops = &ocelot_confops,
+ .owner = THIS_MODULE,
+ },
+ .pincfg_data = {
+ .pd_bit = BIT(4),
+ .pu_bit = BIT(3),
+ .drive_bits = GENMASK(1, 0),
+ .schmitt_bit = BIT(2),
+ },
};
-static struct pinctrl_desc lan966x_desc = {
- .name = "lan966x-pinctrl",
- .pins = lan966x_pins,
- .npins = ARRAY_SIZE(lan966x_pins),
- .pctlops = &ocelot_pctl_ops,
- .pmxops = &lan966x_pmx_ops,
- .confops = &ocelot_confops,
- .owner = THIS_MODULE,
+static struct ocelot_match_data lan966x_desc = {
+ .desc = {
+ .name = "lan966x-pinctrl",
+ .pins = lan966x_pins,
+ .npins = ARRAY_SIZE(lan966x_pins),
+ .pctlops = &ocelot_pctl_ops,
+ .pmxops = &lan966x_pmx_ops,
+ .confops = &ocelot_confops,
+ .owner = THIS_MODULE,
+ },
+ .pincfg_data = {
+ .pd_bit = BIT(3),
+ .pu_bit = BIT(2),
+ .drive_bits = GENMASK(1, 0),
+ },
};
static int ocelot_create_group_func_map(struct device *dev,
{},
};
-static struct regmap *ocelot_pinctrl_create_pincfg(struct platform_device *pdev)
+static struct regmap *ocelot_pinctrl_create_pincfg(struct platform_device *pdev,
+ const struct ocelot_pinctrl *info)
{
void __iomem *base;
.reg_bits = 32,
.val_bits = 32,
.reg_stride = 4,
- .max_register = 32,
+ .max_register = info->desc->npins * 4,
.name = "pincfg",
};
static int ocelot_pinctrl_probe(struct platform_device *pdev)
{
+ const struct ocelot_match_data *data;
struct device *dev = &pdev->dev;
struct ocelot_pinctrl *info;
struct reset_control *reset;
if (!info)
return -ENOMEM;
- info->desc = (struct pinctrl_desc *)device_get_match_data(dev);
+ data = device_get_match_data(dev);
+ if (!data)
+ return -EINVAL;
+
+ info->desc = devm_kmemdup(dev, &data->desc, sizeof(*info->desc),
+ GFP_KERNEL);
+ if (!info->desc)
+ return -ENOMEM;
+
+ info->pincfg_data = &data->pincfg_data;
reset = devm_reset_control_get_optional_shared(dev, "switch");
if (IS_ERR(reset))
/* Pinconf registers */
if (info->desc->confops) {
- pincfg = ocelot_pinctrl_create_pincfg(pdev);
+ pincfg = ocelot_pinctrl_create_pincfg(pdev, info);
if (IS_ERR(pincfg))
dev_dbg(dev, "Failed to create pincfg regmap\n");
else
p->func[i]->pin_count,
sizeof(int),
GFP_KERNEL);
+ if (!p->func[i]->pins)
+ return -ENOMEM;
for (j = 0; j < p->func[i]->pin_count; j++)
p->func[i]->pins[j] = p->func[i]->pin_first + j;
bank->secure_control = pctl->match_data->secure_control;
spin_lock_init(&bank->lock);
- /* create irq hierarchical domain */
- bank->fwnode = fwnode;
+ if (pctl->domain) {
+ /* create irq hierarchical domain */
+ bank->fwnode = fwnode;
- bank->domain = irq_domain_create_hierarchy(pctl->domain, 0,
- STM32_GPIO_IRQ_LINE, bank->fwnode,
- &stm32_gpio_domain_ops, bank);
+ bank->domain = irq_domain_create_hierarchy(pctl->domain, 0, STM32_GPIO_IRQ_LINE,
+ bank->fwnode, &stm32_gpio_domain_ops,
+ bank);
- if (!bank->domain) {
- err = -ENODEV;
- goto err_clk;
+ if (!bank->domain) {
+ err = -ENODEV;
+ goto err_clk;
+ }
}
err = gpiochip_add_data(&bank->gpio_chip, bank);
pctl->domain = stm32_pctrl_get_irq_domain(pdev);
if (IS_ERR(pctl->domain))
return PTR_ERR(pctl->domain);
+ if (!pctl->domain)
+ dev_warn(dev, "pinctrl without interrupt support\n");
/* hwspinlock is optional */
hwlock_id = of_hwspin_lock_get_id(pdev->dev.of_node, 0);
}
*map = kcalloc(*num_maps + nmG, sizeof(**map), GFP_KERNEL);
+ if (*map == NULL)
+ return -ENOMEM;
+
for (i = 0; i < (*num_maps); i++) {
dt_pin = be32_to_cpu(list[i]);
pin_num = FIELD_GET(GENMASK(31, 24), dt_pin);
SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 14),
SUNXI_FUNCTION(0x0, "gpio_in"),
SUNXI_FUNCTION(0x1, "gpio_out"),
- SUNXI_FUNCTION(0x2, "nand"), /* DQ6 */
+ SUNXI_FUNCTION(0x2, "nand0"), /* DQ6 */
SUNXI_FUNCTION(0x3, "mmc2")), /* D6 */
SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 15),
SUNXI_FUNCTION(0x0, "gpio_in"),
SUNXI_FUNCTION(0x1, "gpio_out"),
- SUNXI_FUNCTION(0x2, "nand"), /* DQ7 */
+ SUNXI_FUNCTION(0x2, "nand0"), /* DQ7 */
SUNXI_FUNCTION(0x3, "mmc2")), /* D7 */
SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 16),
SUNXI_FUNCTION(0x0, "gpio_in"),
SUNXI_FUNCTION(0x1, "gpio_out"),
- SUNXI_FUNCTION(0x2, "nand"), /* DQS */
+ SUNXI_FUNCTION(0x2, "nand0"), /* DQS */
SUNXI_FUNCTION(0x3, "mmc2")), /* RST */
SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 17),
SUNXI_FUNCTION(0x0, "gpio_in"),
SUNXI_FUNCTION(0x1, "gpio_out"),
- SUNXI_FUNCTION(0x2, "nand")), /* CE2 */
+ SUNXI_FUNCTION(0x2, "nand0")), /* CE2 */
SUNXI_PIN(SUNXI_PINCTRL_PIN(C, 18),
SUNXI_FUNCTION(0x0, "gpio_in"),
SUNXI_FUNCTION(0x1, "gpio_out"),
- SUNXI_FUNCTION(0x2, "nand")), /* CE3 */
+ SUNXI_FUNCTION(0x2, "nand0")), /* CE3 */
/* Hole */
SUNXI_PIN(SUNXI_PINCTRL_PIN(D, 2),
SUNXI_FUNCTION(0x0, "gpio_in"),
struct sunxi_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev);
int i;
+ pin -= pctl->desc->pin_base;
+
for (i = 0; i < num_configs; i++) {
enum pin_config_param param;
unsigned long flags;
depends on I2C
depends on REGMAP_I2C
help
- This driver provides support for the Nvidia SN2201 platfom.
+ This driver provides support for the Nvidia SN2201 platform.
The SN2201 is a highly integrated for one rack unit system with
L3 management switches. It has 48 x 1Gbps RJ45 + 4 x 100G QSFP28
ports in a compact 1RU form factor. The system also including a
};
/* SN2201 I2C platform data. */
-struct mlxreg_core_hotplug_platform_data nvsw_sn2201_i2c_data = {
+static struct mlxreg_core_hotplug_platform_data nvsw_sn2201_i2c_data = {
.irq = NVSW_SN2201_CPLD_SYSIRQ,
};
int size)
{
struct mlxreg_hotplug_device *dev = devs;
+ int ret;
int i;
/* Create I2C static devices. */
dev->nr, dev->brdinfo->addr);
dev->adapter = NULL;
+ ret = PTR_ERR(dev->client);
goto fail_create_static_devices;
}
}
dev->client = NULL;
dev->adapter = NULL;
}
- return IS_ERR(dev->client);
+ return ret;
}
static void nvsw_sn2201_destroy_static_devices(struct nvsw_sn2201 *nvsw_sn2201,
if MIPS_PLATFORM_DEVICES
config CPU_HWMON
- tristate "Loongson-3 CPU HWMon Driver"
+ bool "Loongson-3 CPU HWMon Driver"
depends on MACH_LOONGSON64
select HWMON
default y
tristate "Panasonic Laptop Extras"
depends on INPUT && ACPI
depends on BACKLIGHT_CLASS_DEVICE
+ depends on ACPI_VIDEO=n || ACPI_VIDEO
+ depends on SERIO_I8042 || SERIO_I8042 = n
select INPUT_SPARSEKMAP
help
This driver adds support for access to backlight control and hotkeys
#define AMD_CPU_ID_PCO AMD_CPU_ID_RV
#define AMD_CPU_ID_CZN AMD_CPU_ID_RN
#define AMD_CPU_ID_YC 0x14B5
+#define AMD_CPU_ID_CB 0x14D8
+#define AMD_CPU_ID_PS 0x14E8
#define PMC_MSG_DELAY_MIN_US 50
#define RESPONSE_REGISTER_LOOP_MAX 20000
val = amd_pmc_reg_read(pdev, AMD_PMC_SCRATCH_REG_CZN);
break;
case AMD_CPU_ID_YC:
+ case AMD_CPU_ID_CB:
+ case AMD_CPU_ID_PS:
val = amd_pmc_reg_read(pdev, AMD_PMC_SCRATCH_REG_YC);
break;
default:
&amd_pmc_idlemask_fops);
/* Enable STB only when the module_param is set */
if (enable_stb) {
- if (dev->cpu_id == AMD_CPU_ID_YC)
+ if (dev->cpu_id == AMD_CPU_ID_YC || dev->cpu_id == AMD_CPU_ID_CB ||
+ dev->cpu_id == AMD_CPU_ID_PS)
debugfs_create_file("stb_read", 0644, dev->dbgfs_dir, dev,
&amd_pmc_stb_debugfs_fops_v2);
else
return MSG_OS_HINT_PCO;
case AMD_CPU_ID_RN:
case AMD_CPU_ID_YC:
+ case AMD_CPU_ID_CB:
+ case AMD_CPU_ID_PS:
return MSG_OS_HINT_RN;
}
return -EINVAL;
#endif
static const struct pci_device_id pmc_pci_ids[] = {
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_PS) },
+ { PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_CB) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_YC) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_CZN) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD, AMD_CPU_ID_RN) },
mutex_init(&dev->lock);
- if (enable_stb && dev->cpu_id == AMD_CPU_ID_YC) {
+ if (enable_stb && (dev->cpu_id == AMD_CPU_ID_YC || dev->cpu_id == AMD_CPU_ID_CB)) {
err = amd_pmc_s2d_init(dev);
if (err)
return err;
{"AMDI0005", 0},
{"AMDI0006", 0},
{"AMDI0007", 0},
+ {"AMDI0008", 0},
{"AMD0004", 0},
{"AMD0005", 0},
{ }
{ KE_KEY, 0x31, { KEY_VOLUMEDOWN } },
{ KE_KEY, 0x32, { KEY_MUTE } },
{ KE_KEY, 0x35, { KEY_SCREENLOCK } },
+ { KE_KEY, 0x38, { KEY_PROG3 } }, /* Armoury Crate */
{ KE_KEY, 0x40, { KEY_PREVIOUSSONG } },
{ KE_KEY, 0x41, { KEY_NEXTSONG } },
{ KE_KEY, 0x43, { KEY_STOPCD } }, /* Stop/Eject */
{ KE_KEY, 0xA5, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + TV + HDMI */
{ KE_KEY, 0xA6, { KEY_SWITCHVIDEOMODE } }, /* SDSP CRT + TV + HDMI */
{ KE_KEY, 0xA7, { KEY_SWITCHVIDEOMODE } }, /* SDSP LCD + CRT + TV + HDMI */
+ { KE_KEY, 0xB3, { KEY_PROG4 } }, /* AURA */
{ KE_KEY, 0xB5, { KEY_CALC } },
{ KE_KEY, 0xC4, { KEY_KBDILLUMUP } },
{ KE_KEY, 0xC5, { KEY_KBDILLUMDOWN } },
static int __init p50_module_init(void)
{
struct resource res = DEFINE_RES_IO(P50_GPIO_IO_PORT_BASE, P50_PORT_CMD + 1);
+ int ret;
if (!dmi_first_match(dmi_ids))
return -ENODEV;
- platform_driver_register(&p50_gpio_driver);
+ ret = platform_driver_register(&p50_gpio_driver);
+ if (ret)
+ return ret;
gpio_pdev = platform_device_register_simple(DRIVER_NAME, PLATFORM_DEVID_NONE, &res, 1);
if (IS_ERR(gpio_pdev)) {
}}
static const struct dmi_system_id gigabyte_wmi_known_working_platforms[] = {
+ DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B450M DS3H-CF"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B450M S2H V2"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE AX V2"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550 AORUS ELITE"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M AORUS PRO-P"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B550M DS3H"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B660 GAMING X DDR4"),
+ DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("B660I AORUS PRO DDR4"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z390 I AORUS PRO WIFI-CF"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z490 AORUS ELITE AC"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 AORUS ELITE"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 GAMING X"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 I AORUS PRO WIFI"),
DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("X570 UD"),
+ DMI_EXACT_MATCH_GIGABYTE_BOARD_NAME("Z690M AORUS ELITE AX DDR4"),
{ }
};
#define HPWMI_EVENT_GUID "95F24279-4D7B-4334-9387-ACCDC67EF61C"
#define HPWMI_BIOS_GUID "5FB7F034-2C63-45e9-BE91-3D44E2C707E4"
#define HP_OMEN_EC_THERMAL_PROFILE_OFFSET 0x95
+#define zero_if_sup(tmp) (zero_insize_support?0:sizeof(tmp)) // use when zero insize is required
/* DMI board names of devices that should use the omen specific path for
* thermal profiles.
HPWMI_BACKLIT_KB_BRIGHTNESS = 0x0D,
HPWMI_PEAKSHIFT_PERIOD = 0x0F,
HPWMI_BATTERY_CHARGE_PERIOD = 0x10,
+ HPWMI_SANITIZATION_MODE = 0x17,
};
/*
static struct platform_device *hp_wmi_platform_dev;
static struct platform_profile_handler platform_profile_handler;
static bool platform_profile_support;
+static bool zero_insize_support;
static struct rfkill *wifi_rfkill;
static struct rfkill *bluetooth_rfkill;
struct bios_return *bios_return;
union acpi_object *obj = NULL;
struct bios_args *args = NULL;
- int mid, actual_outsize, ret;
+ int mid, actual_insize, actual_outsize;
size_t bios_args_size;
+ int ret;
mid = encode_outsize_for_pvsz(outsize);
if (WARN_ON(mid < 0))
return mid;
- bios_args_size = struct_size(args, data, insize);
+ actual_insize = max(insize, 128);
+ bios_args_size = struct_size(args, data, actual_insize);
args = kmalloc(bios_args_size, GFP_KERNEL);
if (!args)
return -ENOMEM;
int val = 0, ret;
ret = hp_wmi_perform_query(query, HPWMI_READ, &val,
- 0, sizeof(val));
+ zero_if_sup(val), sizeof(val));
if (ret)
return ret < 0 ? ret : -EINVAL;
return -ENODEV;
ret = hp_wmi_perform_query(HPWMI_SYSTEM_DEVICE_MODE, HPWMI_READ,
- system_device_mode, 0, sizeof(system_device_mode));
+ system_device_mode, zero_if_sup(system_device_mode),
+ sizeof(system_device_mode));
if (ret < 0)
return ret;
int val = 0, ret;
ret = hp_wmi_perform_query(HPWMI_FAN_SPEED_MAX_GET_QUERY, HPWMI_GM,
- &val, 0, sizeof(val));
+ &val, zero_if_sup(val), sizeof(val));
if (ret)
return ret < 0 ? ret : -EINVAL;
{
int state = 0;
int ret = hp_wmi_perform_query(HPWMI_FEATURE_QUERY, HPWMI_READ, &state,
- 0, sizeof(state));
+ zero_if_sup(state), sizeof(state));
if (!ret)
return 1;
{
u8 state[128];
int ret = hp_wmi_perform_query(HPWMI_FEATURE2_QUERY, HPWMI_READ, &state,
- 0, sizeof(state));
+ zero_if_sup(state), sizeof(state));
if (!ret)
return 1;
int err, i;
err = hp_wmi_perform_query(HPWMI_WIRELESS2_QUERY, HPWMI_READ, &state,
- 0, sizeof(state));
+ zero_if_sup(state), sizeof(state));
if (err)
return err;
break;
case HPWMI_BATTERY_CHARGE_PERIOD:
break;
+ case HPWMI_SANITIZATION_MODE:
+ break;
default:
pr_info("Unknown event_id - %d - 0x%x\n", event_id, event_data);
break;
int err, i;
err = hp_wmi_perform_query(HPWMI_WIRELESS2_QUERY, HPWMI_READ, &state,
- 0, sizeof(state));
+ zero_if_sup(state), sizeof(state));
if (err)
return err < 0 ? err : -EINVAL;
{
int event_capable = wmi_has_guid(HPWMI_EVENT_GUID);
int bios_capable = wmi_has_guid(HPWMI_BIOS_GUID);
- int err;
+ int err, tmp = 0;
if (!bios_capable && !event_capable)
return -ENODEV;
+ if (hp_wmi_perform_query(HPWMI_HARDWARE_QUERY, HPWMI_READ, &tmp,
+ sizeof(tmp), sizeof(tmp)) == HPWMI_RET_INVALID_PARAMETERS)
+ zero_insize_support = true;
+
if (event_capable) {
err = hp_wmi_input_setup();
if (err)
module_param(no_bt_rfkill, bool, 0444);
MODULE_PARM_DESC(no_bt_rfkill, "No rfkill for bluetooth.");
+static bool allow_v4_dytc;
+module_param(allow_v4_dytc, bool, 0444);
+MODULE_PARM_DESC(allow_v4_dytc, "Enable DYTC version 4 platform-profile support.");
+
/*
* ACPI Helpers
*/
static const struct dmi_system_id ideapad_dytc_v4_allow_table[] = {
{
/* Ideapad 5 Pro 16ACH6 */
- .ident = "LENOVO 82L5",
.matches = {
DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
DMI_MATCH(DMI_PRODUCT_NAME, "82L5")
}
},
+ {
+ /* Ideapad 5 15ITL05 */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+ DMI_MATCH(DMI_PRODUCT_VERSION, "IdeaPad 5 15ITL05")
+ }
+ },
{}
};
dytc_version = (output >> DYTC_QUERY_REV_BIT) & 0xF;
- if (dytc_version < 5) {
- if (dytc_version < 4 || !dmi_check_system(ideapad_dytc_v4_allow_table)) {
- dev_info(&priv->platform_device->dev,
- "DYTC_VERSION is less than 4 or is not allowed: %d\n",
- dytc_version);
- return -ENODEV;
- }
+ if (dytc_version < 4) {
+ dev_info(&priv->platform_device->dev, "DYTC_VERSION < 4 is not supported\n");
+ return -ENODEV;
+ }
+
+ if (dytc_version < 5 &&
+ !(allow_v4_dytc || dmi_check_system(ideapad_dytc_v4_allow_table))) {
+ dev_info(&priv->platform_device->dev,
+ "DYTC_VERSION 4 support may not work. Pass ideapad_laptop.allow_v4_dytc=Y on the kernel commandline to enable\n");
+ return -ENODEV;
}
priv->dytc = kzalloc(sizeof(*priv->dytc), GFP_KERNEL);
{
.matches = {
DMI_EXACT_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
- DMI_EXACT_MATCH(DMI_PRODUCT_NAME, "T100TA"),
+ /* Non exact match to also match T100TAF */
+ DMI_MATCH(DMI_PRODUCT_NAME, "T100TA"),
},
.driver_data = &asus_t100ta_lookup,
},
DMI_MATCH(DMI_PRODUCT_NAME, "HP Spectre x360 Convertible 15-df0xxx"),
},
},
+ {
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Microsoft Corporation"),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Surface Go"),
+ },
+ },
{ }
};
config INTEL_IFS
tristate "Intel In Field Scan"
depends on X86 && CPU_SUP_INTEL && 64BIT && SMP
+ # Discussion on the list has shown that the sysfs API needs a bit
+ # more work, mark this as broken for now
+ depends on BROKEN
select INTEL_IFS_DEVICE
help
Enable support for the In Field Scan capability in select
X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L, &icl_reg_map),
X86_MATCH_INTEL_FAM6_MODEL(ROCKETLAKE, &tgl_reg_map),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, &tgl_reg_map),
+ X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_N, &tgl_reg_map),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, &adl_reg_map),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE_P, &tgl_reg_map),
{}
};
auxiliary_set_drvdata(auxdev, priv);
for (i = 0; i < intel_vsec_dev->num_resources; i++) {
- struct intel_pmt_entry *entry = &priv->entry[i].entry;
+ struct intel_pmt_entry *entry = &priv->entry[priv->num_entries].entry;
ret = intel_pmt_dev_create(entry, &pmt_crashlog_ns, intel_vsec_dev, i);
if (ret < 0)
* - v0.1 start from toshiba_acpi driver written by John Belmonte
*/
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/types.h>
+#include <linux/acpi.h>
#include <linux/backlight.h>
#include <linux/ctype.h>
-#include <linux/seq_file.h>
-#include <linux/uaccess.h>
-#include <linux/slab.h>
-#include <linux/acpi.h>
+#include <linux/i8042.h>
+#include <linux/init.h>
#include <linux/input.h>
#include <linux/input/sparse-keymap.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/platform_device.h>
-
+#include <linux/seq_file.h>
+#include <linux/serio.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/uaccess.h>
+#include <acpi/video.h>
MODULE_AUTHOR("Hiroshi Miura <miura@da-cha.org>");
MODULE_AUTHOR("David Bronaugh <dbronaugh@linuxboxen.org>");
struct platform_device *platform;
};
+/*
+ * On some Panasonic models the volume up / down / mute keys send duplicate
+ * keypress events over the PS/2 kbd interface, filter these out.
+ */
+static bool panasonic_i8042_filter(unsigned char data, unsigned char str,
+ struct serio *port)
+{
+ static bool extended;
+
+ if (str & I8042_STR_AUXDATA)
+ return false;
+
+ if (data == 0xe0) {
+ extended = true;
+ return true;
+ } else if (extended) {
+ extended = false;
+
+ switch (data & 0x7f) {
+ case 0x20: /* e0 20 / e0 a0, Volume Mute press / release */
+ case 0x2e: /* e0 2e / e0 ae, Volume Down press / release */
+ case 0x30: /* e0 30 / e0 b0, Volume Up press / release */
+ return true;
+ default:
+ /*
+ * Report the previously filtered e0 before continuing
+ * with the next non-filtered byte.
+ */
+ serio_interrupt(port, 0xe0, 0);
+ return false;
+ }
+ }
+
+ return false;
+}
+
/* method access functions */
static int acpi_pcc_write_sset(struct pcc_acpi *pcc, int func, int val)
{
struct input_dev *hotk_input_dev = pcc->input_dev;
int rc;
unsigned long long result;
+ unsigned int key;
+ unsigned int updown;
rc = acpi_evaluate_integer(pcc->handle, METHOD_HKEY_QUERY,
NULL, &result);
return;
}
+ key = result & 0xf;
+ updown = result & 0x80; /* 0x80 == key down; 0x00 = key up */
+
/* hack: some firmware sends no key down for sleep / hibernate */
- if ((result & 0xf) == 0x7 || (result & 0xf) == 0xa) {
- if (result & 0x80)
+ if (key == 7 || key == 10) {
+ if (updown)
sleep_keydown_seen = 1;
if (!sleep_keydown_seen)
sparse_keymap_report_event(hotk_input_dev,
- result & 0xf, 0x80, false);
+ key, 0x80, false);
}
- if ((result & 0xf) == 0x7 || (result & 0xf) == 0x9 || (result & 0xf) == 0xa) {
- if (!sparse_keymap_report_event(hotk_input_dev,
- result & 0xf, result & 0x80, false))
- pr_err("Unknown hotkey event: 0x%04llx\n", result);
- }
+ /*
+ * Don't report brightness key-presses if they are also reported
+ * by the ACPI video bus.
+ */
+ if ((key == 1 || key == 2) && acpi_video_handles_brightness_key_presses())
+ return;
+
+ if (!sparse_keymap_report_event(hotk_input_dev, key, updown, false))
+ pr_err("Unknown hotkey event: 0x%04llx\n", result);
}
static void acpi_pcc_hotkey_notify(struct acpi_device *device, u32 event)
pcc->platform = NULL;
}
+ i8042_install_filter(panasonic_i8042_filter);
return 0;
out_platform:
if (!device || !pcc)
return -EINVAL;
+ i8042_remove_filter(panasonic_i8042_filter);
+
if (pcc->platform) {
device_remove_file(&pcc->platform->dev, &dev_attr_cdpower);
platform_device_unregister(pcc->platform);
iounmap(addr);
cleanup_resource:
release_resource(res);
+ kfree(res);
}
static struct acpi_s2idle_dev_ops thinkpad_acpi_s2idle_dev_ops = {
#define DYTC_DISABLE_CQL DYTC_SET_COMMAND(DYTC_FUNCTION_CQL, DYTC_MODE_MMC_BALANCE, 0)
#define DYTC_ENABLE_CQL DYTC_SET_COMMAND(DYTC_FUNCTION_CQL, DYTC_MODE_MMC_BALANCE, 1)
-enum dytc_profile_funcmode {
- DYTC_FUNCMODE_NONE = 0,
- DYTC_FUNCMODE_MMC,
- DYTC_FUNCMODE_PSC,
-};
-
-static enum dytc_profile_funcmode dytc_profile_available;
static enum platform_profile_option dytc_current_profile;
static atomic_t dytc_ignore_event = ATOMIC_INIT(0);
static DEFINE_MUTEX(dytc_mutex);
+static int dytc_capabilities;
static bool dytc_mmc_get_available;
static int convert_dytc_to_profile(int dytcmode, enum platform_profile_option *profile)
{
- if (dytc_profile_available == DYTC_FUNCMODE_MMC) {
+ if (dytc_capabilities & BIT(DYTC_FC_MMC)) {
switch (dytcmode) {
case DYTC_MODE_MMC_LOWPOWER:
*profile = PLATFORM_PROFILE_LOW_POWER;
}
return 0;
}
- if (dytc_profile_available == DYTC_FUNCMODE_PSC) {
+ if (dytc_capabilities & BIT(DYTC_FC_PSC)) {
switch (dytcmode) {
case DYTC_MODE_PSC_LOWPOWER:
*profile = PLATFORM_PROFILE_LOW_POWER;
{
switch (profile) {
case PLATFORM_PROFILE_LOW_POWER:
- if (dytc_profile_available == DYTC_FUNCMODE_MMC)
+ if (dytc_capabilities & BIT(DYTC_FC_MMC))
*perfmode = DYTC_MODE_MMC_LOWPOWER;
- else if (dytc_profile_available == DYTC_FUNCMODE_PSC)
+ else if (dytc_capabilities & BIT(DYTC_FC_PSC))
*perfmode = DYTC_MODE_PSC_LOWPOWER;
break;
case PLATFORM_PROFILE_BALANCED:
- if (dytc_profile_available == DYTC_FUNCMODE_MMC)
+ if (dytc_capabilities & BIT(DYTC_FC_MMC))
*perfmode = DYTC_MODE_MMC_BALANCE;
- else if (dytc_profile_available == DYTC_FUNCMODE_PSC)
+ else if (dytc_capabilities & BIT(DYTC_FC_PSC))
*perfmode = DYTC_MODE_PSC_BALANCE;
break;
case PLATFORM_PROFILE_PERFORMANCE:
- if (dytc_profile_available == DYTC_FUNCMODE_MMC)
+ if (dytc_capabilities & BIT(DYTC_FC_MMC))
*perfmode = DYTC_MODE_MMC_PERFORM;
- else if (dytc_profile_available == DYTC_FUNCMODE_PSC)
+ else if (dytc_capabilities & BIT(DYTC_FC_PSC))
*perfmode = DYTC_MODE_PSC_PERFORM;
break;
default: /* Unknown profile */
if (err)
goto unlock;
- if (dytc_profile_available == DYTC_FUNCMODE_MMC) {
+ if (dytc_capabilities & BIT(DYTC_FC_MMC)) {
if (profile == PLATFORM_PROFILE_BALANCED) {
/*
* To get back to balanced mode we need to issue a reset command.
goto unlock;
}
}
- if (dytc_profile_available == DYTC_FUNCMODE_PSC) {
+ if (dytc_capabilities & BIT(DYTC_FC_PSC)) {
err = dytc_command(DYTC_SET_COMMAND(DYTC_FUNCTION_PSC, perfmode, 1), &output);
if (err)
goto unlock;
int perfmode;
mutex_lock(&dytc_mutex);
- if (dytc_profile_available == DYTC_FUNCMODE_MMC) {
+ if (dytc_capabilities & BIT(DYTC_FC_MMC)) {
if (dytc_mmc_get_available)
err = dytc_command(DYTC_CMD_MMC_GET, &output);
else
err = dytc_cql_command(DYTC_CMD_GET, &output);
- } else if (dytc_profile_available == DYTC_FUNCMODE_PSC)
+ } else if (dytc_capabilities & BIT(DYTC_FC_PSC))
err = dytc_command(DYTC_CMD_GET, &output);
mutex_unlock(&dytc_mutex);
set_bit(PLATFORM_PROFILE_BALANCED, dytc_profile.choices);
set_bit(PLATFORM_PROFILE_PERFORMANCE, dytc_profile.choices);
- dytc_profile_available = DYTC_FUNCMODE_NONE;
err = dytc_command(DYTC_CMD_QUERY, &output);
if (err)
return err;
return -ENODEV;
/* Check what capabilities are supported */
- err = dytc_command(DYTC_CMD_FUNC_CAP, &output);
+ err = dytc_command(DYTC_CMD_FUNC_CAP, &dytc_capabilities);
if (err)
return err;
- if (output & BIT(DYTC_FC_MMC)) { /* MMC MODE */
- dytc_profile_available = DYTC_FUNCMODE_MMC;
-
+ if (dytc_capabilities & BIT(DYTC_FC_MMC)) { /* MMC MODE */
+ pr_debug("MMC is supported\n");
/*
* Check if MMC_GET functionality available
* Version > 6 and return success from MMC_GET command
if (!err && ((output & DYTC_ERR_MASK) == DYTC_ERR_SUCCESS))
dytc_mmc_get_available = true;
}
- } else if (output & BIT(DYTC_FC_PSC)) { /* PSC MODE */
- dytc_profile_available = DYTC_FUNCMODE_PSC;
+ } else if (dytc_capabilities & BIT(DYTC_FC_PSC)) { /* PSC MODE */
+ /* Support for this only works on AMD platforms */
+ if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) {
+ dbg_printk(TPACPI_DBG_INIT, "PSC not support on Intel platforms\n");
+ return -ENODEV;
+ }
+ pr_debug("PSC is supported\n");
} else {
dbg_printk(TPACPI_DBG_INIT, "No DYTC support available\n");
return -ENODEV;
static void dytc_profile_exit(void)
{
- dytc_profile_available = DYTC_FUNCMODE_NONE;
platform_profile_remove();
}
#include <linux/pinctrl/machine.h>
#include <linux/platform_data/lp855x.h>
#include <linux/platform_device.h>
-#include <linux/pm.h>
#include <linux/power/bq24190_charger.h>
+#include <linux/reboot.h>
#include <linux/rmi.h>
#include <linux/serdev.h>
#include <linux/spi/spi.h>
"INT33FC:02", "pmu_clk2_grp", "pmu_clk");
static struct pinctrl *lenovo_yoga_tab2_830_1050_codec_pinctrl;
+static struct sys_off_handler *lenovo_yoga_tab2_830_1050_sys_off_handler;
static int __init lenovo_yoga_tab2_830_1050_init_codec(void)
{
* followed by a normal 3 second press to recover. Avoid this by doing an EFI
* poweroff instead.
*/
-static void lenovo_yoga_tab2_830_1050_power_off(void)
+static int lenovo_yoga_tab2_830_1050_power_off(struct sys_off_data *data)
{
efi.reset_system(EFI_RESET_SHUTDOWN, EFI_SUCCESS, 0, NULL);
+
+ return NOTIFY_DONE;
}
static int __init lenovo_yoga_tab2_830_1050_init(void)
if (ret)
return ret;
- pm_power_off = lenovo_yoga_tab2_830_1050_power_off;
+ /* SYS_OFF_PRIO_FIRMWARE + 1 so that it runs before acpi_power_off */
+ lenovo_yoga_tab2_830_1050_sys_off_handler =
+ register_sys_off_handler(SYS_OFF_MODE_POWER_OFF, SYS_OFF_PRIO_FIRMWARE + 1,
+ lenovo_yoga_tab2_830_1050_power_off, NULL);
+ if (IS_ERR(lenovo_yoga_tab2_830_1050_sys_off_handler))
+ return PTR_ERR(lenovo_yoga_tab2_830_1050_sys_off_handler);
+
return 0;
}
static void lenovo_yoga_tab2_830_1050_exit(void)
{
- pm_power_off = NULL; /* Just turn poweroff into halt on module unload */
+ unregister_sys_off_handler(lenovo_yoga_tab2_830_1050_sys_off_handler);
if (lenovo_yoga_tab2_830_1050_codec_pinctrl) {
pinctrl_put(lenovo_yoga_tab2_830_1050_codec_pinctrl);
versatile_reboot_type = (enum versatile_reboot)reboot_id->data;
syscon_regmap = syscon_node_to_regmap(np);
+ of_node_put(np);
if (IS_ERR(syscon_regmap))
return PTR_ERR(syscon_regmap);
ret = ab8500_fg_init_hw_registers(di);
if (ret) {
dev_err(dev, "failed to initialize registers\n");
+ destroy_workqueue(di->fg_wq);
return ret;
}
di->fg_psy = devm_power_supply_register(dev, &ab8500_fg_desc, &psy_cfg);
if (IS_ERR(di->fg_psy)) {
dev_err(dev, "failed to register FG psy\n");
+ destroy_workqueue(di->fg_wq);
return PTR_ERR(di->fg_psy);
}
/* Register primary interrupt handlers */
for (i = 0; i < ARRAY_SIZE(ab8500_fg_irq); i++) {
irq = platform_get_irq_byname(pdev, ab8500_fg_irq[i].name);
- if (irq < 0)
+ if (irq < 0) {
+ destroy_workqueue(di->fg_wq);
return irq;
+ }
ret = devm_request_threaded_irq(dev, irq, NULL,
ab8500_fg_irq[i].isr,
if (ret != 0) {
dev_err(dev, "failed to request %s IRQ %d: %d\n",
ab8500_fg_irq[i].name, irq, ret);
+ destroy_workqueue(di->fg_wq);
return ret;
}
dev_dbg(dev, "Requested %s IRQ %d: %d\n",
ret = ab8500_fg_sysfs_init(di);
if (ret) {
dev_err(dev, "failed to create sysfs entry\n");
+ destroy_workqueue(di->fg_wq);
return ret;
}
if (ret) {
dev_err(dev, "failed to create FG psy\n");
ab8500_fg_sysfs_exit(di);
+ destroy_workqueue(di->fg_wq);
return ret;
}
{
int i, high, low;
- /* Break loop at table_len - 1 because that is the highest index */
- for (i = 0; i < table_len - 1; i++)
+ for (i = 0; i < table_len; i++)
if (temp > table[i].temp)
break;
/* The library function will deal with high == low */
- if ((i == 0) || (i == (table_len - 1)))
- high = i;
+ if (i == 0)
+ high = low = i;
+ else if (i == table_len)
+ high = low = i - 1;
else
- high = i - 1;
- low = i;
+ high = (low = i) - 1;
return fixp_linear_interpolate(table[low].temp,
table[low].resistance,
{
int i, high, low;
- /* Break loop at table_len - 1 because that is the highest index */
- for (i = 0; i < table_len - 1; i++)
+ for (i = 0; i < table_len; i++)
if (ocv > table[i].ocv)
break;
/* The library function will deal with high == low */
- if ((i == 0) || (i == (table_len - 1)))
- high = i - 1;
+ if (i == 0)
+ high = low = i;
+ else if (i == table_len)
+ high = low = i - 1;
else
- high = i; /* i.e. i == 0 */
- low = i;
+ high = (low = i) - 1;
return fixp_linear_interpolate(table[low].ocv,
table[low].capacity,
depends on !S390
depends on COMMON_CLK
select NET_DEVLINK
+ select CRC16
help
This driver adds support for an OpenCompute time card.
static const struct regulator_desc mp5496_smpa2 = {
.linear_ranges = (struct linear_range[]) {
- REGULATOR_LINEAR_RANGE(725000, 0, 27, 12500),
+ REGULATOR_LINEAR_RANGE(600000, 0, 127, 12500),
},
.n_linear_ranges = 1,
- .n_voltages = 28,
+ .n_voltages = 128,
.ops = &rpm_mp5496_ops,
};
static const struct regulator_desc mp5496_ldoa2 = {
.linear_ranges = (struct linear_range[]) {
- REGULATOR_LINEAR_RANGE(1800000, 0, 60, 25000),
+ REGULATOR_LINEAR_RANGE(800000, 0, 127, 25000),
},
.n_linear_ranges = 1,
- .n_voltages = 61,
+ .n_voltages = 128,
.ops = &rpm_mp5496_ops,
};
/* List of queued requests. */
static LIST_HEAD(sclp_req_queue);
-/* Data for read and and init requests. */
+/* Data for read and init requests. */
static struct sclp_req sclp_read_req;
static struct sclp_req sclp_init_req;
static void *sclp_read_sccb;
if (ap_drv->in_use) {
rc = ap_drv->in_use(ap_perms.apm, newaqm);
if (rc)
- return -EBUSY;
+ rc = -EBUSY;
}
/* release the driver's module */
if (!atomic_read(&queue->set_pci_flags_count)) {
/*
* there's no outstanding PCI any more, so we
- * have to request a PCI to be sure the the PCI
+ * have to request a PCI to be sure the PCI
* will wake at some time in the future then we
* can flush packed buffers that might still be
* hanging around, which can happen if no
vcdev->err = -EIO;
}
virtio_ccw_check_activity(vcdev, activity);
- /* Interrupts are disabled here */
+#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
+ /*
+ * Paired with virtio_ccw_synchronize_cbs() and interrupts are
+ * disabled here.
+ */
read_lock(&vcdev->irq_lock);
+#endif
for_each_set_bit(i, indicators(vcdev),
sizeof(*indicators(vcdev)) * BITS_PER_BYTE) {
/* The bit clear must happen before the vring kick. */
vq = virtio_ccw_vq_by_ind(vcdev, i);
vring_interrupt(0, vq);
}
+#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
read_unlock(&vcdev->irq_lock);
+#endif
if (test_bit(0, indicators2(vcdev))) {
virtio_config_changed(&vcdev->vdev);
clear_bit(0, indicators2(vcdev));
struct hisi_hba *hisi_hba = shost_priv(shost);
struct device *dev = hisi_hba->dev;
int ret = sas_slave_configure(sdev);
+ unsigned int max_sectors;
if (ret)
return ret;
}
}
+ /* Set according to IOMMU IOVA caching limit */
+ max_sectors = min_t(size_t, queue_max_hw_sectors(sdev->request_queue),
+ (PAGE_SIZE * 32) >> SECTOR_SHIFT);
+
+ blk_queue_max_hw_sectors(sdev->request_queue, max_sectors);
+
return 0;
}
static void ibmvfc_tgt_implicit_logout_and_del(struct ibmvfc_target *);
static void ibmvfc_tgt_move_login(struct ibmvfc_target *);
-static void ibmvfc_release_sub_crqs(struct ibmvfc_host *);
-static void ibmvfc_init_sub_crqs(struct ibmvfc_host *);
+static void ibmvfc_dereg_sub_crqs(struct ibmvfc_host *);
+static void ibmvfc_reg_sub_crqs(struct ibmvfc_host *);
static const char *unknown_error = "unknown error";
struct vio_dev *vdev = to_vio_dev(vhost->dev);
unsigned long flags;
- ibmvfc_release_sub_crqs(vhost);
+ ibmvfc_dereg_sub_crqs(vhost);
/* Re-enable the CRQ */
do {
spin_unlock(vhost->crq.q_lock);
spin_unlock_irqrestore(vhost->host->host_lock, flags);
- ibmvfc_init_sub_crqs(vhost);
+ ibmvfc_reg_sub_crqs(vhost);
return rc;
}
struct vio_dev *vdev = to_vio_dev(vhost->dev);
struct ibmvfc_queue *crq = &vhost->crq;
- ibmvfc_release_sub_crqs(vhost);
+ ibmvfc_dereg_sub_crqs(vhost);
/* Close the CRQ */
do {
spin_unlock(vhost->crq.q_lock);
spin_unlock_irqrestore(vhost->host->host_lock, flags);
- ibmvfc_init_sub_crqs(vhost);
+ ibmvfc_reg_sub_crqs(vhost);
return rc;
}
queue->cur = 0;
queue->fmt = fmt;
queue->size = PAGE_SIZE / fmt_size;
+
+ queue->vhost = vhost;
return 0;
}
ENTER;
- if (ibmvfc_alloc_queue(vhost, scrq, IBMVFC_SUB_CRQ_FMT))
- return -ENOMEM;
-
rc = h_reg_sub_crq(vdev->unit_address, scrq->msg_token, PAGE_SIZE,
&scrq->cookie, &scrq->hw_irq);
}
scrq->hwq_id = index;
- scrq->vhost = vhost;
LEAVE;
return 0;
rc = plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address, scrq->cookie);
} while (rtas_busy_delay(rc));
reg_failed:
- ibmvfc_free_queue(vhost, scrq);
LEAVE;
return rc;
}
if (rc)
dev_err(dev, "Failed to free sub-crq[%d]: rc=%ld\n", index, rc);
- ibmvfc_free_queue(vhost, scrq);
+ /* Clean out the queue */
+ memset(scrq->msgs.crq, 0, PAGE_SIZE);
+ scrq->cur = 0;
+
+ LEAVE;
+}
+
+static void ibmvfc_reg_sub_crqs(struct ibmvfc_host *vhost)
+{
+ int i, j;
+
+ ENTER;
+ if (!vhost->mq_enabled || !vhost->scsi_scrqs.scrqs)
+ return;
+
+ for (i = 0; i < nr_scsi_hw_queues; i++) {
+ if (ibmvfc_register_scsi_channel(vhost, i)) {
+ for (j = i; j > 0; j--)
+ ibmvfc_deregister_scsi_channel(vhost, j - 1);
+ vhost->do_enquiry = 0;
+ return;
+ }
+ }
+
+ LEAVE;
+}
+
+static void ibmvfc_dereg_sub_crqs(struct ibmvfc_host *vhost)
+{
+ int i;
+
+ ENTER;
+ if (!vhost->mq_enabled || !vhost->scsi_scrqs.scrqs)
+ return;
+
+ for (i = 0; i < nr_scsi_hw_queues; i++)
+ ibmvfc_deregister_scsi_channel(vhost, i);
+
LEAVE;
}
static void ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost)
{
+ struct ibmvfc_queue *scrq;
int i, j;
ENTER;
}
for (i = 0; i < nr_scsi_hw_queues; i++) {
- if (ibmvfc_register_scsi_channel(vhost, i)) {
- for (j = i; j > 0; j--)
- ibmvfc_deregister_scsi_channel(vhost, j - 1);
+ scrq = &vhost->scsi_scrqs.scrqs[i];
+ if (ibmvfc_alloc_queue(vhost, scrq, IBMVFC_SUB_CRQ_FMT)) {
+ for (j = i; j > 0; j--) {
+ scrq = &vhost->scsi_scrqs.scrqs[j - 1];
+ ibmvfc_free_queue(vhost, scrq);
+ }
kfree(vhost->scsi_scrqs.scrqs);
vhost->scsi_scrqs.scrqs = NULL;
vhost->scsi_scrqs.active_queues = 0;
vhost->do_enquiry = 0;
- break;
+ vhost->mq_enabled = 0;
+ return;
}
}
+ ibmvfc_reg_sub_crqs(vhost);
+
LEAVE;
}
static void ibmvfc_release_sub_crqs(struct ibmvfc_host *vhost)
{
+ struct ibmvfc_queue *scrq;
int i;
ENTER;
if (!vhost->scsi_scrqs.scrqs)
return;
- for (i = 0; i < nr_scsi_hw_queues; i++)
- ibmvfc_deregister_scsi_channel(vhost, i);
+ ibmvfc_dereg_sub_crqs(vhost);
+
+ for (i = 0; i < nr_scsi_hw_queues; i++) {
+ scrq = &vhost->scsi_scrqs.scrqs[i];
+ ibmvfc_free_queue(vhost, scrq);
+ }
kfree(vhost->scsi_scrqs.scrqs);
vhost->scsi_scrqs.scrqs = NULL;
spinlock_t _lock;
spinlock_t *q_lock;
+ struct ibmvfc_host *vhost;
struct ibmvfc_event_pool evt_pool;
struct list_head sent;
struct list_head free;
union ibmvfc_iu cancel_rsp;
/* Sub-CRQ fields */
- struct ibmvfc_host *vhost;
unsigned long cookie;
unsigned long vios_cookie;
unsigned long hw_irq;
GFP_KERNEL);
if (!ioa_cfg->hrrq[i].host_rrq) {
- while (--i > 0)
+ while (--i >= 0)
dma_free_coherent(&pdev->dev,
sizeof(u32) * ioa_cfg->hrrq[i].size,
ioa_cfg->hrrq[i].host_rrq,
ioa_cfg->vectors_info[i].desc,
&ioa_cfg->hrrq[i]);
if (rc) {
- while (--i >= 0)
+ while (--i > 0)
free_irq(pci_irq_vector(pdev, i),
&ioa_cfg->hrrq[i]);
return rc;
uint32_t);
void lpfc_sli_abort_fcp_cmpl(struct lpfc_hba *, struct lpfc_iocbq *,
struct lpfc_iocbq *);
-void lpfc_sli4_abort_fcp_cmpl(struct lpfc_hba *h, struct lpfc_iocbq *i,
- struct lpfc_wcqe_complete *w);
void lpfc_sli_free_hbq(struct lpfc_hba *, struct hbq_dmabuf *);
struct lpfc_nodelist *ndlp);
void lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba,
struct lpfc_iocbq *cmdiocb,
- struct lpfc_wcqe_complete *abts_cmpl);
+ struct lpfc_iocbq *rspiocb);
void lpfc_create_multixri_pools(struct lpfc_hba *phba);
void lpfc_create_destroy_pools(struct lpfc_hba *phba);
void lpfc_move_xri_pvt_to_pbl(struct lpfc_hba *phba, u32 hwqid);
memset(bpl, 0, sizeof(struct ulp_bde64));
bpl->addrHigh = le32_to_cpu(putPaddrHigh(mp->phys));
bpl->addrLow = le32_to_cpu(putPaddrLow(mp->phys));
- bpl->tus.f.bdeFlags = BUFF_TYPE_BLP_64;
+ bpl->tus.f.bdeFlags = BUFF_TYPE_BDE_64;
bpl->tus.f.bdeSize = (LPFC_CT_PREAMBLE - 4);
bpl->tus.w = le32_to_cpu(bpl->tus.w);
ndlp->nlp_DID, ulp_status,
ulp_word4);
- /* Call NLP_EVT_DEVICE_RM if link is down or LOGO is aborted */
if (lpfc_error_lost_link(ulp_status, ulp_word4)) {
- lpfc_disc_state_machine(vport, ndlp, cmdiocb,
- NLP_EVT_DEVICE_RM);
skip_recovery = 1;
goto out;
}
spin_unlock_irq(&ndlp->lock);
lpfc_disc_state_machine(vport, ndlp, cmdiocb,
NLP_EVT_DEVICE_RM);
- lpfc_els_free_iocb(phba, cmdiocb);
- lpfc_nlp_put(ndlp);
-
- /* Presume the node was released. */
- return;
+ goto out_rsrc_free;
}
out:
- /* Driver is done with the IO. */
- lpfc_els_free_iocb(phba, cmdiocb);
- lpfc_nlp_put(ndlp);
-
/* At this point, the LOGO processing is complete. NOTE: For a
* pt2pt topology, we are assuming the NPortID will only change
* on link up processing. For a LOGO / PLOGI initiated by the
ndlp->nlp_DID, ulp_status,
ulp_word4, tmo,
vport->num_disc_nodes);
+
+ lpfc_els_free_iocb(phba, cmdiocb);
+ lpfc_nlp_put(ndlp);
+
lpfc_disc_start(vport);
return;
}
lpfc_disc_state_machine(vport, ndlp, cmdiocb,
NLP_EVT_DEVICE_RM);
}
+out_rsrc_free:
+ /* Driver is done with the I/O. */
+ lpfc_els_free_iocb(phba, cmdiocb);
+ lpfc_nlp_put(ndlp);
}
/**
#define wqe_sup_SHIFT 6
#define wqe_sup_MASK 0x00000001
#define wqe_sup_WORD word11
+#define wqe_ffrq_SHIFT 6
+#define wqe_ffrq_MASK 0x00000001
+#define wqe_ffrq_WORD word11
#define wqe_wqec_SHIFT 7
#define wqe_wqec_MASK 0x00000001
#define wqe_wqec_WORD word11
rc = pci_enable_msi(phba->pcidev);
if (!rc)
lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
- "0462 PCI enable MSI mode success.\n");
+ "0012 PCI enable MSI mode success.\n");
else {
lpfc_printf_log(phba, KERN_INFO, LOG_INIT,
"0471 PCI enable MSI mode failed (%d)\n", rc);
lpfc_nvmet_invalidate_host(phba, ndlp);
if (ndlp->nlp_DID == Fabric_DID) {
- if (vport->port_state <= LPFC_FDISC)
+ if (vport->port_state <= LPFC_FDISC ||
+ vport->fc_flag & FC_PT2PT)
goto out;
lpfc_linkdown_port(vport);
spin_lock_irq(shost->host_lock);
nCmd->rcv_rsplen = wcqe->parameter;
nCmd->status = 0;
+ /* Get the NVME cmd details for this unique error. */
+ cp = (struct nvme_fc_cmd_iu *)nCmd->cmdaddr;
+ ep = (struct nvme_fc_ersp_iu *)nCmd->rspaddr;
+
/* Check if this is really an ERSP */
if (nCmd->rcv_rsplen == LPFC_NVME_ERSP_LEN) {
lpfc_ncmd->status = IOSTAT_SUCCESS;
lpfc_ncmd->result = 0;
lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME,
- "6084 NVME Completion ERSP: "
- "xri %x placed x%x\n",
- lpfc_ncmd->cur_iocbq.sli4_xritag,
- wcqe->total_data_placed);
+ "6084 NVME FCP_ERR ERSP: "
+ "xri %x placed x%x opcode x%x cmd_id "
+ "x%x cqe_status x%x\n",
+ lpfc_ncmd->cur_iocbq.sli4_xritag,
+ wcqe->total_data_placed,
+ cp->sqe.common.opcode,
+ cp->sqe.common.command_id,
+ ep->cqe.status);
break;
}
lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
"6081 NVME Completion Protocol Error: "
"xri %x status x%x result x%x "
- "placed x%x\n",
+ "placed x%x opcode x%x cmd_id x%x, "
+ "cqe_status x%x\n",
lpfc_ncmd->cur_iocbq.sli4_xritag,
lpfc_ncmd->status, lpfc_ncmd->result,
- wcqe->total_data_placed);
+ wcqe->total_data_placed,
+ cp->sqe.common.opcode,
+ cp->sqe.common.command_id,
+ ep->cqe.status);
break;
case IOSTAT_LOCAL_REJECT:
/* Let fall through to set command final state. */
{
struct lpfc_hba *phba = vport->phba;
struct nvmefc_fcp_req *nCmd = lpfc_ncmd->nvmeCmd;
- struct lpfc_iocbq *pwqeq = &(lpfc_ncmd->cur_iocbq);
+ struct nvme_common_command *sqe;
+ struct lpfc_iocbq *pwqeq = &lpfc_ncmd->cur_iocbq;
union lpfc_wqe128 *wqe = &pwqeq->wqe;
uint32_t req_len;
cstat->control_requests++;
}
- if (pnode->nlp_nvme_info & NLP_NVME_NSLER)
+ if (pnode->nlp_nvme_info & NLP_NVME_NSLER) {
bf_set(wqe_erp, &wqe->generic.wqe_com, 1);
+ sqe = &((struct nvme_fc_cmd_iu *)
+ nCmd->cmdaddr)->sqe.common;
+ if (sqe->opcode == nvme_admin_async_event)
+ bf_set(wqe_ffrq, &wqe->generic.wqe_com, 1);
+ }
+
/*
* Finish initializing those WQE fields that are independent
* of the nvme_cmnd request_buffer
* lpfc_nvme_abort_fcreq_cmpl - Complete an NVME FCP abort request.
* @phba: Pointer to HBA context object
* @cmdiocb: Pointer to command iocb object.
- * @abts_cmpl: Pointer to wcqe complete object.
+ * @rspiocb: Pointer to response iocb object.
*
* This is the callback function for any NVME FCP IO that was aborted.
*
**/
void
lpfc_nvme_abort_fcreq_cmpl(struct lpfc_hba *phba, struct lpfc_iocbq *cmdiocb,
- struct lpfc_wcqe_complete *abts_cmpl)
+ struct lpfc_iocbq *rspiocb)
{
+ struct lpfc_wcqe_complete *abts_cmpl = &rspiocb->wcqe_cmpl;
+
lpfc_printf_log(phba, KERN_INFO, LOG_NVME,
"6145 ABORT_XRI_CN completing on rpi x%x "
"original iotag x%x, abort cmd iotag x%x "
struct lpfc_nvme_fcpreq_priv *freqpriv;
unsigned long flags;
int ret_val;
+ struct nvme_fc_cmd_iu *cp;
/* Validate pointers. LLDD fault handling with transport does
* have timing races.
return;
}
+ /*
+ * Get Command Id from cmd to plug into response. This
+ * code is not needed in the next NVME Transport drop.
+ */
+ cp = (struct nvme_fc_cmd_iu *)lpfc_nbuf->nvmeCmd->cmdaddr;
lpfc_printf_vlog(vport, KERN_INFO, LOG_NVME_ABTS,
"6138 Transport Abort NVME Request Issued for "
- "ox_id x%x\n",
- nvmereq_wqe->sli4_xritag);
+ "ox_id x%x nvme opcode x%x nvme cmd_id x%x\n",
+ nvmereq_wqe->sli4_xritag, cp->sqe.common.opcode,
+ cp->sqe.common.command_id);
return;
out_unlock:
int status;
u32 logit = LOG_FCP;
+ if (!rport)
+ return FAILED;
+
rdata = rport->dd_data;
if (!rdata || !rdata->pnode) {
lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
unsigned long flags;
DECLARE_WAIT_QUEUE_HEAD_ONSTACK(waitq);
+ if (!rport)
+ return FAILED;
+
rdata = rport->dd_data;
if (!rdata || !rdata->pnode) {
lpfc_printf_vlog(vport, KERN_ERR, LOG_TRACE_EVENT,
sync_buf = __lpfc_sli_get_iocbq(phba);
if (!sync_buf) {
lpfc_printf_log(phba, KERN_ERR, LOG_CGN_MGMT,
- "6213 No available WQEs for CMF_SYNC_WQE\n");
+ "6244 No available WQEs for CMF_SYNC_WQE\n");
ret_val = ENOMEM;
goto out_unlock;
}
set_job_ulpword4(cmdiocbp,
IOERR_ABORT_REQUESTED);
/*
- * For SLI4, irsiocb contains
+ * For SLI4, irspiocb contains
* NO_XRI in sli_xritag, it
* shall not affect releasing
* sgl (xri) process.
}
}
}
- (cmdiocbp->cmd_cmpl) (phba, cmdiocbp, saveq);
+ cmdiocbp->cmd_cmpl(phba, cmdiocbp, saveq);
} else
lpfc_sli_release_iocbq(phba, cmdiocbp);
} else {
cmdiocbq->cmd_flag &= ~LPFC_DRIVER_ABORTED;
if (cmdiocbq->cmd_cmpl) {
spin_unlock_irqrestore(&phba->hbalock, iflag);
- (cmdiocbq->cmd_cmpl)(phba, cmdiocbq,
- &rspiocbq);
+ cmdiocbq->cmd_cmpl(phba, cmdiocbq, &rspiocbq);
spin_lock_irqsave(&phba->hbalock, iflag);
}
break;
* @flag: Flag indicating if this command can be put into txq.
*
* __lpfc_sli_issue_fcp_io_s3 is wrapper function to invoke lockless func to
- * send an iocb command to an HBA with SLI-4 interface spec.
+ * send an iocb command to an HBA with SLI-3 interface spec.
*
* This function takes the hbalock before invoking the lockless version.
* The function will return success after it successfully submit the wqe to
cmdiocbq->cmd_cmpl = cmdiocbq->wait_cmd_cmpl;
cmdiocbq->wait_cmd_cmpl = NULL;
if (cmdiocbq->cmd_cmpl)
- (cmdiocbq->cmd_cmpl)(phba, cmdiocbq, NULL);
+ cmdiocbq->cmd_cmpl(phba, cmdiocbq, NULL);
else
lpfc_sli_release_iocbq(phba, cmdiocbq);
return;
/* Set the exchange busy flag for task management commands */
if ((cmdiocbq->cmd_flag & LPFC_IO_FCP) &&
- !(cmdiocbq->cmd_flag & LPFC_IO_LIBDFC)) {
+ !(cmdiocbq->cmd_flag & LPFC_IO_LIBDFC)) {
lpfc_cmd = container_of(cmdiocbq, struct lpfc_io_buf,
- cur_iocbq);
+ cur_iocbq);
if (rspiocbq && (rspiocbq->cmd_flag & LPFC_EXCHANGE_BUSY))
lpfc_cmd->flags |= LPFC_SBUF_XBUSY;
else
* @irspiocbq: Pointer to work-queue completion queue entry.
*
* This routine handles an ELS work-queue completion event and construct
- * a pseudo response ELS IODBQ from the SLI4 ELS WCQE for the common
+ * a pseudo response ELS IOCBQ from the SLI4 ELS WCQE for the common
* discovery engine to handle.
*
* Return: Pointer to the receive IOCBQ, NULL otherwise.
if (bf_get(lpfc_wcqe_c_xb, wcqe)) {
spin_lock_irqsave(&phba->hbalock, iflags);
- cmdiocbq->cmd_flag |= LPFC_EXCHANGE_BUSY;
+ irspiocbq->cmd_flag |= LPFC_EXCHANGE_BUSY;
spin_unlock_irqrestore(&phba->hbalock, iflags);
}
/* Pass the cmd_iocb and the wcqe to the upper layer */
memcpy(&cmdiocbq->wcqe_cmpl, wcqe,
sizeof(struct lpfc_wcqe_complete));
- (cmdiocbq->cmd_cmpl)(phba, cmdiocbq, cmdiocbq);
+ cmdiocbq->cmd_cmpl(phba, cmdiocbq, cmdiocbq);
} else {
lpfc_printf_log(phba, KERN_WARNING, LOG_SLI,
"0375 FCP cmdiocb not callback function "
/* Free iocb created in lpfc_prep_seq */
list_for_each_entry_safe(curr_iocb, next_iocb,
- &iocbq->list, list) {
+ &iocbq->list, list) {
list_del_init(&curr_iocb->list);
lpfc_sli_release_iocbq(phba, curr_iocb);
}
* included with this package. *
*******************************************************************/
-#define LPFC_DRIVER_VERSION "14.2.0.3"
+#define LPFC_DRIVER_VERSION "14.2.0.4"
#define LPFC_DRIVER_NAME "lpfc"
/* Used for SLI 2/3 */
qoff += map->nr_queues;
offset += map->nr_queues;
+ /* we never use READ queue, so can't cheat blk-mq */
+ shost->tag_set.map[HCTX_TYPE_READ].nr_queues = 0;
+
/* Setup Poll hctx */
map = &shost->tag_set.map[HCTX_TYPE_POLL];
map->nr_queues = instance->iopoll_q_count;
Mpi2ConfigReply_t mpi_reply;
Mpi2SasIOUnitPage1_t *sas_iounit_pg1 = NULL;
Mpi26PCIeIOUnitPage1_t pcie_iounit_pg1;
+ u16 depth;
int sz;
int rc = 0;
goto out;
/* sas iounit page 1 */
sz = offsetof(Mpi2SasIOUnitPage1_t, PhyData);
- sas_iounit_pg1 = kzalloc(sz, GFP_KERNEL);
+ sas_iounit_pg1 = kzalloc(sizeof(Mpi2SasIOUnitPage1_t), GFP_KERNEL);
if (!sas_iounit_pg1) {
pr_err("%s: failure at %s:%d/%s()!\n",
ioc->name, __FILE__, __LINE__, __func__);
ioc->name, __FILE__, __LINE__, __func__);
goto out;
}
- ioc->max_wideport_qd =
- (le16_to_cpu(sas_iounit_pg1->SASWideMaxQueueDepth)) ?
- le16_to_cpu(sas_iounit_pg1->SASWideMaxQueueDepth) :
- MPT3SAS_SAS_QUEUE_DEPTH;
- ioc->max_narrowport_qd =
- (le16_to_cpu(sas_iounit_pg1->SASNarrowMaxQueueDepth)) ?
- le16_to_cpu(sas_iounit_pg1->SASNarrowMaxQueueDepth) :
- MPT3SAS_SAS_QUEUE_DEPTH;
- ioc->max_sata_qd = (sas_iounit_pg1->SATAMaxQDepth) ?
- sas_iounit_pg1->SATAMaxQDepth : MPT3SAS_SATA_QUEUE_DEPTH;
+
+ depth = le16_to_cpu(sas_iounit_pg1->SASWideMaxQueueDepth);
+ ioc->max_wideport_qd = (depth ? depth : MPT3SAS_SAS_QUEUE_DEPTH);
+
+ depth = le16_to_cpu(sas_iounit_pg1->SASNarrowMaxQueueDepth);
+ ioc->max_narrowport_qd = (depth ? depth : MPT3SAS_SAS_QUEUE_DEPTH);
+
+ depth = sas_iounit_pg1->SATAMaxQDepth;
+ ioc->max_sata_qd = (depth ? depth : MPT3SAS_SATA_QUEUE_DEPTH);
+
/* pcie iounit page 1 */
rc = mpt3sas_config_get_pcie_iounit_pg1(ioc, &mpi_reply,
&pcie_iounit_pg1, sizeof(Mpi26PCIeIOUnitPage1_t));
_scsih_ir_shutdown(ioc);
_scsih_nvme_shutdown(ioc);
mpt3sas_base_mask_interrupts(ioc);
+ mpt3sas_base_stop_watchdog(ioc);
ioc->shost_recovery = 1;
mpt3sas_base_make_ioc_ready(ioc, SOFT_RESET);
ioc->shost_recovery = 0;
if (!phy->phy_attached)
return;
- if (sas_phy->phy) {
- struct sas_phy *sphy = sas_phy->phy;
- sphy->negotiated_linkrate = sas_phy->linkrate;
- sphy->minimum_linkrate = phy->minimum_linkrate;
- sphy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS;
- sphy->maximum_linkrate = phy->maximum_linkrate;
- sphy->maximum_linkrate_hw = phy->maximum_linkrate;
- }
-
if (phy->phy_type & PORT_TYPE_SAS) {
struct sas_identify_frame *id;
id = (struct sas_identify_frame *)phy->frame_rcvd;
switch (link_rate) {
case PHY_SPEED_120:
phy->sas_phy.linkrate = SAS_LINK_RATE_12_0_GBPS;
- phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_12_0_GBPS;
break;
case PHY_SPEED_60:
phy->sas_phy.linkrate = SAS_LINK_RATE_6_0_GBPS;
- phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_6_0_GBPS;
break;
case PHY_SPEED_30:
phy->sas_phy.linkrate = SAS_LINK_RATE_3_0_GBPS;
- phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_3_0_GBPS;
break;
case PHY_SPEED_15:
phy->sas_phy.linkrate = SAS_LINK_RATE_1_5_GBPS;
- phy->sas_phy.phy->negotiated_linkrate = SAS_LINK_RATE_1_5_GBPS;
break;
}
sas_phy->negotiated_linkrate = phy->sas_phy.linkrate;
- sas_phy->maximum_linkrate_hw = SAS_LINK_RATE_6_0_GBPS;
+ sas_phy->maximum_linkrate_hw = phy->maximum_linkrate;
sas_phy->minimum_linkrate_hw = SAS_LINK_RATE_1_5_GBPS;
- sas_phy->maximum_linkrate = SAS_LINK_RATE_6_0_GBPS;
- sas_phy->minimum_linkrate = SAS_LINK_RATE_1_5_GBPS;
+ sas_phy->maximum_linkrate = phy->maximum_linkrate;
+ sas_phy->minimum_linkrate = phy->minimum_linkrate;
}
/**
struct asd_sas_phy *sas_phy = &phy->sas_phy;
phy->phy_state = PHY_LINK_DISABLE;
phy->pm8001_ha = pm8001_ha;
+ phy->minimum_linkrate = SAS_LINK_RATE_1_5_GBPS;
+ phy->maximum_linkrate = SAS_LINK_RATE_6_0_GBPS;
sas_phy->enabled = (phy_id < pm8001_ha->chip->n_phy) ? 1 : 0;
sas_phy->class = SAS;
sas_phy->iproto = SAS_PROTOCOL_ALL;
pm8001_dbg(pm8001_ha, MSG, "phy:0x%x status:0x%x\n",
phyid, status);
if (status == PHY_STOP_SUCCESS ||
- status == PHY_STOP_ERR_DEVICE_ATTACHED)
+ status == PHY_STOP_ERR_DEVICE_ATTACHED) {
phy->phy_state = PHY_LINK_DISABLE;
+ phy->sas_phy.phy->negotiated_linkrate = SAS_PHY_DISABLED;
+ phy->sas_phy.linkrate = SAS_PHY_DISABLED;
+ }
+
return 0;
}
return 0;
out_unwind:
- while (--i > 0)
+ while (--i >= 0)
free_irq(pci_irq_vector(pdev, i), &pinstance->hrrq_vector[i]);
pci_free_irq_vectors(pdev);
return rc;
}
}
+static inline void zbc_set_zone_full(struct sdebug_dev_info *devip,
+ struct sdeb_zone_state *zsp)
+{
+ switch (zsp->z_cond) {
+ case ZC2_IMPLICIT_OPEN:
+ devip->nr_imp_open--;
+ break;
+ case ZC3_EXPLICIT_OPEN:
+ devip->nr_exp_open--;
+ break;
+ default:
+ WARN_ONCE(true, "Invalid zone %llu condition %x\n",
+ zsp->z_start, zsp->z_cond);
+ break;
+ }
+ zsp->z_cond = ZC5_FULL;
+}
+
static void zbc_inc_wp(struct sdebug_dev_info *devip,
unsigned long long lba, unsigned int num)
{
if (zsp->z_type == ZBC_ZTYPE_SWR) {
zsp->z_wp += num;
if (zsp->z_wp >= zend)
- zsp->z_cond = ZC5_FULL;
+ zbc_set_zone_full(devip, zsp);
return;
}
n = num;
}
if (zsp->z_wp >= zend)
- zsp->z_cond = ZC5_FULL;
+ zbc_set_zone_full(devip, zsp);
num -= n;
lba += n;
goto out_put_request;
ret = 0;
- if (hdr->iovec_count) {
+ if (hdr->iovec_count && hdr->dxfer_len) {
struct iov_iter i;
struct iovec *iov = NULL;
return NULL;
mutex_lock(&iscsi_ep_idr_mutex);
- id = idr_alloc(&iscsi_ep_idr, ep, 0, -1, GFP_NOIO);
+
+ /*
+ * First endpoint id should be 1 to comply with user space
+ * applications (iscsid).
+ */
+ id = idr_alloc(&iscsi_ep_idr, ep, 1, -1, GFP_NOIO);
if (id < 0) {
mutex_unlock(&iscsi_ep_idr_mutex);
printk(KERN_ERR "Could not allocate endpoint ID. Error %d.\n",
goto out;
/* We must have at least a 64B header and one 32B range descriptor */
- vpd_len = get_unaligned_be16(&buffer[2]) + 3;
+ vpd_len = get_unaligned_be16(&buffer[2]) + 4;
if (vpd_len > buf_len || vpd_len < 64 + 32 || (vpd_len & 31)) {
sd_printk(KERN_ERR, sdkp,
"Invalid Concurrent Positioning Ranges VPD page\n");
.cmd_per_lun = 2048,
.this_id = -1,
/* Ensure there are no gaps in presented sgls */
- .virt_boundary_mask = PAGE_SIZE-1,
+ .virt_boundary_mask = HV_HYP_PAGE_SIZE - 1,
.no_write_same = 1,
.track_queue_depth = 1,
.change_queue_depth = storvsc_change_queue_depth,
int target = 0;
struct storvsc_device *stor_device;
int max_sub_channels = 0;
+ u32 max_xfer_bytes;
/*
* We support sub-channels for storage on SCSI and FC controllers.
}
/* max cmd length */
host->max_cmd_len = STORVSC_MAX_CMD_LEN;
-
/*
- * set the table size based on the info we got
- * from the host.
+ * Any reasonable Hyper-V configuration should provide
+ * max_transfer_bytes value aligning to HV_HYP_PAGE_SIZE,
+ * protecting it from any weird value.
+ */
+ max_xfer_bytes = round_down(stor_device->max_transfer_bytes, HV_HYP_PAGE_SIZE);
+ /* max_hw_sectors_kb */
+ host->max_sectors = max_xfer_bytes >> 9;
+ /*
+ * There are 2 requirements for Hyper-V storvsc sgl segments,
+ * based on which the below calculation for max segments is
+ * done:
+ *
+ * 1. Except for the first and last sgl segment, all sgl segments
+ * should be align to HV_HYP_PAGE_SIZE, that also means the
+ * maximum number of segments in a sgl can be calculated by
+ * dividing the total max transfer length by HV_HYP_PAGE_SIZE.
+ *
+ * 2. Except for the first and last, each entry in the SGL must
+ * have an offset that is a multiple of HV_HYP_PAGE_SIZE.
*/
- host->sg_tablesize = (stor_device->max_transfer_bytes >> PAGE_SHIFT);
+ host->sg_tablesize = (max_xfer_bytes >> HV_HYP_PAGE_SHIFT) + 1;
/*
* For non-IDE disks, the host supports multiple channels.
* Set the number of HW queues we are supporting.
u8 tag;
u8 bus;
u8 target;
- u8 vcpuHint;
- u8 unused[59];
+ u16 vcpuHint;
+ u8 unused[58];
} __packed;
/*
AT91_SOC(SAM9X60_CIDR_MATCH, AT91_CIDR_MATCH_MASK,
AT91_CIDR_VERSION_MASK, SAM9X60_EXID_MATCH,
"sam9x60", "sam9x60"),
- AT91_SOC(SAM9X60_CIDR_MATCH, SAM9X60_D5M_EXID_MATCH,
- AT91_CIDR_VERSION_MASK, SAM9X60_EXID_MATCH,
+ AT91_SOC(SAM9X60_CIDR_MATCH, AT91_CIDR_MATCH_MASK,
+ AT91_CIDR_VERSION_MASK, SAM9X60_D5M_EXID_MATCH,
"sam9x60 64MiB DDR2 SiP", "sam9x60"),
- AT91_SOC(SAM9X60_CIDR_MATCH, SAM9X60_D1G_EXID_MATCH,
- AT91_CIDR_VERSION_MASK, SAM9X60_EXID_MATCH,
+ AT91_SOC(SAM9X60_CIDR_MATCH, AT91_CIDR_MATCH_MASK,
+ AT91_CIDR_VERSION_MASK, SAM9X60_D1G_EXID_MATCH,
"sam9x60 128MiB DDR2 SiP", "sam9x60"),
- AT91_SOC(SAM9X60_CIDR_MATCH, SAM9X60_D6K_EXID_MATCH,
- AT91_CIDR_VERSION_MASK, SAM9X60_EXID_MATCH,
+ AT91_SOC(SAM9X60_CIDR_MATCH, AT91_CIDR_MATCH_MASK,
+ AT91_CIDR_VERSION_MASK, SAM9X60_D6K_EXID_MATCH,
"sam9x60 8MiB SDRAM SiP", "sam9x60"),
#endif
#ifdef CONFIG_SOC_SAMA5
}
ret = brcmstb_init_sram(dn);
+ of_node_put(dn);
if (ret) {
pr_err("error setting up SRAM for PM\n");
return ret;
},
[IMX8MP_MEDIABLK_PD_LCDIF_2] = {
.name = "mediablk-lcdif-2",
- .clk_names = (const char *[]){ "disp1", "apb", "axi", },
+ .clk_names = (const char *[]){ "disp2", "apb", "axi", },
.num_clks = 3,
.gpc_name = "lcdif2",
.rst_mask = BIT(11) | BIT(12) | BIT(24),
static struct platform_driver ixp4xx_npe_driver = {
.driver = {
.name = "ixp4xx-npe",
- .of_match_table = of_match_ptr(ixp4xx_npe_of_match),
+ .of_match_table = ixp4xx_npe_of_match,
},
.probe = ixp4xx_npe_probe,
.remove = ixp4xx_npe_remove,
struct smem_partition_header *header;
struct smem_ptable_entry *entry;
struct smem_ptable *ptable;
- unsigned int remote_host;
+ u16 remote_host;
u16 host0, host1;
int i;
continue;
if (remote_host >= SMEM_HOST_COUNT) {
- dev_err(smem->dev, "bad host %hu\n", remote_host);
+ dev_err(smem->dev, "bad host %u\n", remote_host);
return -EINVAL;
}
if (smem->partitions[remote_host].virt_base) {
- dev_err(smem->dev, "duplicate host %hu\n", remote_host);
+ dev_err(smem->dev, "duplicate host %u\n", remote_host);
return -EINVAL;
}
#define AMD_SPI_RX_COUNT_REG 0x4B
#define AMD_SPI_STATUS_REG 0x4C
+#define AMD_SPI_FIFO_SIZE 70
#define AMD_SPI_MEM_SIZE 200
/* M_CMD OP codes for SPI */
return 0;
}
+static size_t amd_spi_max_transfer_size(struct spi_device *spi)
+{
+ return AMD_SPI_FIFO_SIZE;
+}
+
static int amd_spi_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
master->flags = SPI_MASTER_HALF_DUPLEX;
master->setup = amd_spi_master_setup;
master->transfer_one_message = amd_spi_master_transfer;
+ master->max_transfer_size = amd_spi_max_transfer_size;
+ master->max_message_size = amd_spi_max_transfer_size;
/* Register the controller with SPI framework */
err = devm_spi_register_master(dev, master);
u32 ctl_val;
int ret = 0;
+ dev_dbg(aspi->dev,
+ "CE%d %s dirmap [ 0x%.8llx - 0x%.8llx ] OP %#x mode:%d.%d.%d.%d naddr:%#x ndummies:%#x\n",
+ chip->cs, op->data.dir == SPI_MEM_DATA_IN ? "read" : "write",
+ desc->info.offset, desc->info.offset + desc->info.length,
+ op->cmd.opcode, op->cmd.buswidth, op->addr.buswidth,
+ op->dummy.buswidth, op->data.buswidth,
+ op->addr.nbytes, op->dummy.nbytes);
+
chip->clk_freq = desc->mem->spi->max_speed_hz;
/* Only for reads */
ctl_val = readl(chip->ctl) & ~CTRL_IO_CMD_MASK;
ctl_val |= aspeed_spi_get_io_mode(op) |
op->cmd.opcode << CTRL_COMMAND_SHIFT |
- CTRL_IO_DUMMY_SET(op->dummy.nbytes / op->dummy.buswidth) |
CTRL_IO_MODE_READ;
+ if (op->dummy.nbytes)
+ ctl_val |= CTRL_IO_DUMMY_SET(op->dummy.nbytes / op->dummy.buswidth);
+
/* Tune 4BYTE address mode */
if (op->addr.nbytes) {
u32 addr_mode = readl(aspi->regs + CE_CTRL_REG);
struct bcm2835_spi *bs = spi_controller_get_devdata(ctlr);
/* if an error occurred and we have an active dma, then terminate */
- dmaengine_terminate_sync(ctlr->dma_tx);
- bs->tx_dma_active = false;
- dmaengine_terminate_sync(ctlr->dma_rx);
- bs->rx_dma_active = false;
+ if (ctlr->dma_tx) {
+ dmaengine_terminate_sync(ctlr->dma_tx);
+ bs->tx_dma_active = false;
+ }
+ if (ctlr->dma_rx) {
+ dmaengine_terminate_sync(ctlr->dma_rx);
+ bs->rx_dma_active = false;
+ }
bcm2835_spi_undo_prologue(bs);
/* and reset */
ret = cqspi_of_get_pdata(cqspi);
if (ret) {
dev_err(dev, "Cannot get mandatory OF data.\n");
- ret = -ENODEV;
- goto probe_master_put;
+ return -ENODEV;
}
/* Obtain QSPI clock. */
if (IS_ERR(cqspi->clk)) {
dev_err(dev, "Cannot claim QSPI clock.\n");
ret = PTR_ERR(cqspi->clk);
- goto probe_master_put;
+ return ret;
}
/* Obtain and remap controller address. */
if (IS_ERR(cqspi->iobase)) {
dev_err(dev, "Cannot remap controller address.\n");
ret = PTR_ERR(cqspi->iobase);
- goto probe_master_put;
+ return ret;
}
/* Obtain and remap AHB address. */
if (IS_ERR(cqspi->ahb_base)) {
dev_err(dev, "Cannot remap AHB address.\n");
ret = PTR_ERR(cqspi->ahb_base);
- goto probe_master_put;
+ return ret;
}
cqspi->mmap_phys_base = (dma_addr_t)res_ahb->start;
cqspi->ahb_size = resource_size(res_ahb);
/* Obtain IRQ line. */
irq = platform_get_irq(pdev, 0);
- if (irq < 0) {
- ret = -ENXIO;
- goto probe_master_put;
- }
+ if (irq < 0)
+ return -ENXIO;
pm_runtime_enable(dev);
ret = pm_runtime_resume_and_get(dev);
if (ret < 0)
- goto probe_master_put;
+ return ret;
ret = clk_prepare_enable(cqspi->clk);
if (ret) {
probe_clk_failed:
pm_runtime_put_sync(dev);
pm_runtime_disable(dev);
-probe_master_put:
- spi_master_put(master);
return ret;
}
#define CDNS_SPI_BAUD_DIV_SHIFT 3 /* Baud rate divisor shift in CR */
#define CDNS_SPI_SS_SHIFT 10 /* Slave Select field shift in CR */
#define CDNS_SPI_SS0 0x1 /* Slave Select zero */
+#define CDNS_SPI_NOSS 0xF /* No Slave select */
/*
* SPI Interrupt Registers bit Masks
#define CDNS_SPI_ER_ENABLE 0x00000001 /* SPI Enable Bit Mask */
#define CDNS_SPI_ER_DISABLE 0x0 /* SPI Disable Bit Mask */
-/* SPI FIFO depth in bytes */
-#define CDNS_SPI_FIFO_DEPTH 128
-
/* Default number of chip select lines */
#define CDNS_SPI_DEFAULT_NUM_CS 4
* @rx_bytes: Number of bytes requested
* @dev_busy: Device busy flag
* @is_decoded_cs: Flag for decoder property set or not
+ * @tx_fifo_depth: Depth of the TX FIFO
*/
struct cdns_spi {
void __iomem *regs;
int rx_bytes;
u8 dev_busy;
u32 is_decoded_cs;
+ unsigned int tx_fifo_depth;
};
/* Macros for the SPI controller read/write */
{
unsigned long trans_cnt = 0;
- while ((trans_cnt < CDNS_SPI_FIFO_DEPTH) &&
+ while ((trans_cnt < xspi->tx_fifo_depth) &&
(xspi->tx_bytes > 0)) {
/* When xspi in busy condition, bytes may send failed,
* @master: Pointer to the spi_master structure which provides
* information about the controller.
*
- * This function disables the SPI master controller.
+ * This function disables the SPI master controller when no slave selected.
*
* Return: 0 always
*/
static int cdns_unprepare_transfer_hardware(struct spi_master *master)
{
struct cdns_spi *xspi = spi_master_get_devdata(master);
+ u32 ctrl_reg;
- cdns_spi_write(xspi, CDNS_SPI_ER, CDNS_SPI_ER_DISABLE);
+ /* Disable the SPI if slave is deselected */
+ ctrl_reg = cdns_spi_read(xspi, CDNS_SPI_CR);
+ ctrl_reg = (ctrl_reg & CDNS_SPI_CR_SSCTRL) >> CDNS_SPI_SS_SHIFT;
+ if (ctrl_reg == CDNS_SPI_NOSS)
+ cdns_spi_write(xspi, CDNS_SPI_ER, CDNS_SPI_ER_DISABLE);
return 0;
}
+/**
+ * cdns_spi_detect_fifo_depth - Detect the FIFO depth of the hardware
+ * @xspi: Pointer to the cdns_spi structure
+ *
+ * The depth of the TX FIFO is a synthesis configuration parameter of the SPI
+ * IP. The FIFO threshold register is sized so that its maximum value can be the
+ * FIFO size - 1. This is used to detect the size of the FIFO.
+ */
+static void cdns_spi_detect_fifo_depth(struct cdns_spi *xspi)
+{
+ /* The MSBs will get truncated giving us the size of the FIFO */
+ cdns_spi_write(xspi, CDNS_SPI_THLD, 0xffff);
+ xspi->tx_fifo_depth = cdns_spi_read(xspi, CDNS_SPI_THLD) + 1;
+
+ /* Reset to default */
+ cdns_spi_write(xspi, CDNS_SPI_THLD, 0x1);
+}
+
/**
* cdns_spi_probe - Probe method for the SPI driver
* @pdev: Pointer to the platform_device structure
if (ret < 0)
xspi->is_decoded_cs = 0;
+ cdns_spi_detect_fifo_depth(xspi);
+
/* SPI controller initializations */
cdns_spi_init_hw(xspi);
op->data.dir != SPI_MEM_DATA_IN)
return -EINVAL;
- if (ctlr->mem_ops && ctlr->mem_ops->poll_status) {
+ if (ctlr->mem_ops && ctlr->mem_ops->poll_status && !mem->spi->cs_gpiod) {
ret = spi_mem_access_start(mem);
if (ret)
return ret;
rs->tx_left = rs->tx ? xfer->len / rs->n_bytes : 0;
rs->rx_left = xfer->len / rs->n_bytes;
- if (rs->cs_inactive)
- writel_relaxed(INT_RF_FULL | INT_CS_INACTIVE, rs->regs + ROCKCHIP_SPI_IMR);
- else
- writel_relaxed(INT_RF_FULL, rs->regs + ROCKCHIP_SPI_IMR);
+ writel_relaxed(0xffffffff, rs->regs + ROCKCHIP_SPI_ICR);
+
spi_enable_chip(rs, true);
if (rs->tx_left)
rockchip_spi_pio_writer(rs);
+ if (rs->cs_inactive)
+ writel_relaxed(INT_RF_FULL | INT_CS_INACTIVE, rs->regs + ROCKCHIP_SPI_IMR);
+ else
+ writel_relaxed(INT_RF_FULL, rs->regs + ROCKCHIP_SPI_IMR);
+
/* 1 means the transfer is in progress */
return 1;
}
rspi->dma_callbacked, HZ);
if (ret > 0 && rspi->dma_callbacked) {
ret = 0;
+ if (tx)
+ dmaengine_synchronize(rspi->ctlr->dma_tx);
+ if (rx)
+ dmaengine_synchronize(rspi->ctlr->dma_rx);
} else {
if (!ret) {
dev_err(&rspi->ctlr->dev, "DMA timeout\n");
# SPDX-License-Identifier: GPL-2.0
config FB_OLPC_DCON
tristate "One Laptop Per Child Display CONtroller support"
- depends on OLPC && FB
+ depends on OLPC && FB && BROKEN
depends on I2C
depends on GPIO_CS5535 && ACPI
select BACKLIGHT_CLASS_DEVICE
pxmitpriv->free_xmit_extbuf_cnt = num_xmit_extbuf;
- res = rtw_alloc_hwxmits(padapter);
- if (res) {
+ if (rtw_alloc_hwxmits(padapter)) {
res = _FAIL;
goto exit;
}
hwxmits = pxmitpriv->hwxmits;
- if (pxmitpriv->hwxmit_entry == 5) {
- hwxmits[0] .sta_queue = &pxmitpriv->bm_pending;
- hwxmits[1] .sta_queue = &pxmitpriv->vo_pending;
- hwxmits[2] .sta_queue = &pxmitpriv->vi_pending;
- hwxmits[3] .sta_queue = &pxmitpriv->bk_pending;
- hwxmits[4] .sta_queue = &pxmitpriv->be_pending;
- } else if (pxmitpriv->hwxmit_entry == 4) {
- hwxmits[0] .sta_queue = &pxmitpriv->vo_pending;
- hwxmits[1] .sta_queue = &pxmitpriv->vi_pending;
- hwxmits[2] .sta_queue = &pxmitpriv->be_pending;
- hwxmits[3] .sta_queue = &pxmitpriv->bk_pending;
- } else {
- }
+ hwxmits[0].sta_queue = &pxmitpriv->vo_pending;
+ hwxmits[1].sta_queue = &pxmitpriv->vi_pending;
+ hwxmits[2].sta_queue = &pxmitpriv->be_pending;
+ hwxmits[3].sta_queue = &pxmitpriv->bk_pending;
return 0;
}
if (wep_key_len > 0) {
wep_key_len = wep_key_len <= 5 ? 5 : 13;
- wep_total_len = wep_key_len + FIELD_OFFSET(struct ndis_802_11_wep, KeyMaterial);
+ wep_total_len = wep_key_len + sizeof(*pwep);
pwep = kzalloc(wep_total_len, GFP_KERNEL);
if (!pwep)
goto exit;
if (wep_key_len > 0) {
wep_key_len = wep_key_len <= 5 ? 5 : 13;
wep_total_len = wep_key_len + FIELD_OFFSET(struct ndis_802_11_wep, key_material);
- pwep = kzalloc(wep_total_len, GFP_KERNEL);
+ /* Allocate a full structure to avoid potentially running off the end. */
+ pwep = kzalloc(sizeof(*pwep), GFP_KERNEL);
if (!pwep) {
ret = -ENOMEM;
goto exit;
if (wep_key_len > 0) {
wep_key_len = wep_key_len <= 5 ? 5 : 13;
wep_total_len = wep_key_len + FIELD_OFFSET(struct ndis_802_11_wep, key_material);
- pwep = kzalloc(wep_total_len, GFP_KERNEL);
+ /* Allocate a full structure to avoid potentially running off the end. */
+ pwep = kzalloc(sizeof(*pwep), GFP_KERNEL);
if (!pwep)
goto exit;
*/
static void hfa384x_usbctlx_completion_task(struct work_struct *work)
{
- struct hfa384x *hw = container_of(work, struct hfa384x, reaper_bh);
+ struct hfa384x *hw = container_of(work, struct hfa384x, completion_bh);
struct hfa384x_usbctlx *ctlx, *temp;
unsigned long flags;
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
}
+ if (!cmd->t_data_nents)
+ return TCM_INVALID_CDB_FIELD;
+
if (cmd->t_data_nents > 1 ||
cmd->t_data_sg[0].length != cmd->se_dev->dev_attrib.block_size) {
pr_err("WRITE_SAME: Illegal SGL t_data_nents: %u length: %u"
" backends not supported\n");
return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
}
+
+ if (!cmd->t_data_nents)
+ return TCM_INVALID_CDB_FIELD;
+
sg = &cmd->t_data_sg[0];
if (cmd->t_data_nents > 1 ||
pr_warn("WRITE SAME with ANCHOR not supported\n");
return TCM_INVALID_CDB_FIELD;
}
+
+ if (flags & 0x01) {
+ pr_warn("WRITE SAME with NDOB not supported\n");
+ return TCM_INVALID_CDB_FIELD;
+ }
+
/*
* Special case for WRITE_SAME w/ UNMAP=1 that ends up getting
* translated into block discard requests within backend code.
* Have config return register usage:
* a0 OPTEE_SMC_RETURN_OK
* a1 Physical address of start of SHM
- * a2 Size of of SHM
+ * a2 Size of SHM
* a3 Cache settings of memory, as defined by the
* OPTEE_SMC_SHM_* values above
* a4-7 Preserved
rpc_arg_offs = OPTEE_MSG_GET_ARG_SIZE(arg->num_params);
rpc_arg = tee_shm_get_va(shm, offs + rpc_arg_offs);
- if (IS_ERR(arg))
- return PTR_ERR(arg);
+ if (IS_ERR(rpc_arg))
+ return PTR_ERR(rpc_arg);
}
if (rpc_arg && tee_shm_is_dynamic(shm)) {
/**
* tee_get_drvdata() - Return driver_data pointer
* @teedev: Device containing the driver_data pointer
- * @returns the driver_data pointer supplied to tee_register().
+ * @returns the driver_data pointer supplied to tee_device_alloc().
*/
void *tee_get_drvdata(struct tee_device *teedev)
{
X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, NULL),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE, NULL),
X86_MATCH_INTEL_FAM6_MODEL(ALDERLAKE_L, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(RAPTORLAKE, NULL),
{}
};
tty_unregister_device(goldfish_tty_driver, qtty->console.index);
iounmap(qtty->base);
qtty->base = NULL;
- free_irq(qtty->irq, pdev);
+ free_irq(qtty->irq, qtty);
tty_port_destroy(&qtty->port);
goldfish_tty_current_line_count--;
if (goldfish_tty_current_line_count == 0)
return;
}
- prefix = kasprintf(GFP_KERNEL, "%s: ", fname);
+ prefix = kasprintf(GFP_ATOMIC, "%s: ", fname);
if (!prefix)
return;
print_hex_dump(KERN_INFO, prefix, DUMP_PREFIX_OFFSET, 16, 1, data, len,
static int pty_write(struct tty_struct *tty, const unsigned char *buf, int c)
{
struct tty_struct *to = tty->link;
- unsigned long flags;
- if (tty->flow.stopped)
+ if (tty->flow.stopped || !c)
return 0;
- if (c > 0) {
- spin_lock_irqsave(&to->port->lock, flags);
- /* Stuff the data into the input queue of the other end */
- c = tty_insert_flip_string(to->port, buf, c);
- spin_unlock_irqrestore(&to->port->lock, flags);
- /* And shovel */
- if (c)
- tty_flip_buffer_push(to->port);
- }
- return c;
+ return tty_insert_flip_string_and_push_buffer(to->port, buf, c);
}
/**
#include <linux/sysrq.h>
#include <linux/delay.h>
#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
#include <linux/tty.h>
#include <linux/ratelimit.h>
#include <linux/tty_flip.h>
up->port.dev = dev;
+ if (uart_console_enabled(&up->port))
+ pm_runtime_get_sync(up->port.dev);
+
serial8250_apply_quirks(up);
uart_add_one_port(drv, &up->port);
}
UART_XMIT_SIZE, DMA_TO_DEVICE);
dma_async_issue_pending(dma->txchan);
- if (dma->tx_err) {
+ serial8250_clear_THRI(p);
+ if (dma->tx_err)
dma->tx_err = 0;
- serial8250_clear_THRI(p);
- }
+
return 0;
err:
dma->tx_err = 1;
#define RZN1_UART_xDMACR_DMA_EN BIT(0)
#define RZN1_UART_xDMACR_1_WORD_BURST (0 << 1)
#define RZN1_UART_xDMACR_4_WORD_BURST (1 << 1)
-#define RZN1_UART_xDMACR_8_WORD_BURST (3 << 1)
+#define RZN1_UART_xDMACR_8_WORD_BURST (2 << 1)
#define RZN1_UART_xDMACR_BLK_SZ(x) ((x) << 3)
/* Quirks */
MODULE_DEVICE_TABLE(of, dw8250_of_match);
static const struct acpi_device_id dw8250_acpi_match[] = {
- { "INT33C4", 0 },
- { "INT33C5", 0 },
- { "INT3434", 0 },
- { "INT3435", 0 },
- { "80860F0A", 0 },
- { "8086228A", 0 },
- { "APMC0D08", 0},
- { "AMD0020", 0 },
- { "AMDI0020", 0 },
- { "AMDI0022", 0 },
- { "BRCM2032", 0 },
- { "HISI0031", 0 },
+ { "80860F0A", (kernel_ulong_t)&dw8250_dw_apb },
+ { "8086228A", (kernel_ulong_t)&dw8250_dw_apb },
+ { "AMD0020", (kernel_ulong_t)&dw8250_dw_apb },
+ { "AMDI0020", (kernel_ulong_t)&dw8250_dw_apb },
+ { "AMDI0022", (kernel_ulong_t)&dw8250_dw_apb },
+ { "APMC0D08", (kernel_ulong_t)&dw8250_dw_apb},
+ { "BRCM2032", (kernel_ulong_t)&dw8250_dw_apb },
+ { "HISI0031", (kernel_ulong_t)&dw8250_dw_apb },
+ { "INT33C4", (kernel_ulong_t)&dw8250_dw_apb },
+ { "INT33C5", (kernel_ulong_t)&dw8250_dw_apb },
+ { "INT3434", (kernel_ulong_t)&dw8250_dw_apb },
+ { "INT3435", (kernel_ulong_t)&dw8250_dw_apb },
{ },
};
MODULE_DEVICE_TABLE(acpi, dw8250_acpi_match);
unsigned char lsr = serial_in(p, UART_LSR);
u64 stop_delay = 0;
+ p->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS;
+
if (!(lsr & UART_LSR_THRE))
return;
/*
if ((status & UART_LSR_THRE) && (up->ier & UART_IER_THRI)) {
if (!up->dma || up->dma->tx_err)
serial8250_tx_chars(up);
- else
+ else if (!up->dma->tx_running)
__stop_tx(up);
}
case UPIO_MEM32BE:
case UPIO_MEM16:
case UPIO_MEM:
- if (!port->mapbase)
+ if (!port->mapbase) {
+ ret = -EINVAL;
break;
+ }
if (!request_mem_region(port->mapbase, size, "serial")) {
ret = -EBUSY;
pl011_dma_rx_stop(uap);
}
+static void pl011_throttle_rx(struct uart_port *port)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->lock, flags);
+ pl011_stop_rx(port);
+ spin_unlock_irqrestore(&port->lock, flags);
+}
+
static void pl011_enable_ms(struct uart_port *port)
{
struct uart_amba_port *uap =
*/
static void pl011_enable_interrupts(struct uart_amba_port *uap)
{
+ unsigned long flags;
unsigned int i;
- spin_lock_irq(&uap->port.lock);
+ spin_lock_irqsave(&uap->port.lock, flags);
/* Clear out any spuriously appearing RX interrupts */
pl011_write(UART011_RTIS | UART011_RXIS, uap, REG_ICR);
if (!pl011_dma_rx_running(uap))
uap->im |= UART011_RXIM;
pl011_write(uap->im, uap, REG_IMSC);
- spin_unlock_irq(&uap->port.lock);
+ spin_unlock_irqrestore(&uap->port.lock, flags);
+}
+
+static void pl011_unthrottle_rx(struct uart_port *port)
+{
+ struct uart_amba_port *uap = container_of(port, struct uart_amba_port, port);
+
+ pl011_enable_interrupts(uap);
}
static int pl011_startup(struct uart_port *port)
.stop_tx = pl011_stop_tx,
.start_tx = pl011_start_tx,
.stop_rx = pl011_stop_rx,
+ .throttle = pl011_throttle_rx,
+ .unthrottle = pl011_unthrottle_rx,
.enable_ms = pl011_enable_ms,
.break_ctl = pl011_break_ctl,
.startup = pl011_startup,
}
}
-static int mvebu_uart_baud_rate_set(struct uart_port *port, unsigned int baud)
+static unsigned int mvebu_uart_baud_rate_set(struct uart_port *port, unsigned int baud)
{
unsigned int d_divisor, m_divisor;
unsigned long flags;
u32 brdv, osamp;
if (!port->uartclk)
- return -EOPNOTSUPP;
+ return 0;
/*
* The baudrate is derived from the UART clock thanks to divisors:
(m_divisor << 16) | (m_divisor << 24);
writel(osamp, port->membase + UART_OSAMP);
- return 0;
+ return DIV_ROUND_CLOSEST(port->uartclk, d_divisor * m_divisor);
}
static void mvebu_uart_set_termios(struct uart_port *port,
max_baud = port->uartclk / 80;
baud = uart_get_baud_rate(port, termios, old, min_baud, max_baud);
- if (mvebu_uart_baud_rate_set(port, baud)) {
- /* No clock available, baudrate cannot be changed */
- if (old)
- baud = uart_get_baud_rate(port, old, NULL,
- min_baud, max_baud);
- } else {
- tty_termios_encode_baud_rate(termios, baud, baud);
- uart_update_timeout(port, termios->c_cflag, baud);
- }
+ baud = mvebu_uart_baud_rate_set(port, baud);
+
+ /* In case baudrate cannot be changed, report previous old value */
+ if (baud == 0 && old)
+ baud = tty_termios_baud_rate(old);
/* Only the following flag changes are supported */
if (old) {
termios->c_cflag |= CS8;
}
+ if (baud != 0) {
+ tty_termios_encode_baud_rate(termios, baud, baud);
+ uart_update_timeout(port, termios->c_cflag, baud);
+ }
+
spin_unlock_irqrestore(&port->lock, flags);
}
.stop_tx = qcom_geni_serial_stop_tx,
.start_tx = qcom_geni_serial_start_tx,
.stop_rx = qcom_geni_serial_stop_rx,
+ .start_rx = qcom_geni_serial_start_rx,
.set_termios = qcom_geni_serial_set_termios,
.startup = qcom_geni_serial_startup,
.request_port = qcom_geni_serial_request_port,
/* Enable tx dma mode */
ucon = rd_regl(port, S3C2410_UCON);
ucon &= ~(S3C64XX_UCON_TXBURST_MASK | S3C64XX_UCON_TXMODE_MASK);
- ucon |= (dma_get_cache_alignment() >= 16) ?
- S3C64XX_UCON_TXBURST_16 : S3C64XX_UCON_TXBURST_1;
+ ucon |= S3C64XX_UCON_TXBURST_1;
ucon |= S3C64XX_UCON_TXMODE_DMA;
wr_regl(port, S3C2410_UCON, ucon);
S3C64XX_UCON_DMASUS_EN |
S3C64XX_UCON_TIMEOUT_EN |
S3C64XX_UCON_RXMODE_MASK);
- ucon |= S3C64XX_UCON_RXBURST_16 |
+ ucon |= S3C64XX_UCON_RXBURST_1 |
0xf << S3C64XX_UCON_TIMEOUT_SHIFT |
S3C64XX_UCON_EMPTYINT_EN |
S3C64XX_UCON_TIMEOUT_EN |
}
#endif
-static inline bool uart_console_enabled(struct uart_port *port)
-{
- return uart_console(port) && (port->cons->flags & CON_ENABLED);
-}
-
static void uart_port_spin_lock_init(struct uart_port *port)
{
spin_lock_init(&port->lock);
/*
* Nothing to do if the console is not suspending
* except stop_rx to prevent any asynchronous data
- * over RX line. Re-start_rx, when required, is
- * done by set_termios in resume sequence
+ * over RX line. However ensure that we will be
+ * able to Re-start_rx later.
*/
if (!console_suspend_enabled && uart_console(uport)) {
- uport->ops->stop_rx(uport);
+ if (uport->ops->start_rx)
+ uport->ops->stop_rx(uport);
goto unlock;
}
if (console_suspend_enabled)
uart_change_pm(state, UART_PM_STATE_ON);
uport->ops->set_termios(uport, &termios, NULL);
+ if (!console_suspend_enabled && uport->ops->start_rx)
+ uport->ops->start_rx(uport);
if (console_suspend_enabled)
console_start(uport->cons);
}
*cr3 |= USART_CR3_DEM;
over8 = *cr1 & USART_CR1_OVER8;
+ *cr1 &= ~(USART_CR1_DEDT_MASK | USART_CR1_DEAT_MASK);
+
if (over8)
rs485_deat_dedt = delay_ADE * baud * 8;
else
rcu_sysrq_start();
rcu_read_lock();
- printk_prefer_direct_enter();
/*
* Raise the apparent loglevel to maximum so that the sysrq header
* is shown to provide the user with positive feedback. We do not
pr_cont("\n");
console_loglevel = orig_log_level;
}
- printk_prefer_direct_exit();
rcu_read_unlock();
rcu_sysrq_end();
ssize_t redirected_tty_write(struct kiocb *, struct iov_iter *);
+int tty_insert_flip_string_and_push_buffer(struct tty_port *port,
+ const unsigned char *chars, size_t cnt);
+
#endif
}
+static inline void tty_flip_buffer_commit(struct tty_buffer *tail)
+{
+ /*
+ * Paired w/ acquire in flush_to_ldisc(); ensures flush_to_ldisc() sees
+ * buffer data.
+ */
+ smp_store_release(&tail->commit, tail->used);
+}
+
/**
* tty_flip_buffer_push - push terminal buffers
* @port: tty port to push
{
struct tty_bufhead *buf = &port->buf;
- /*
- * Paired w/ acquire in flush_to_ldisc(); ensures flush_to_ldisc() sees
- * buffer data.
- */
- smp_store_release(&buf->tail->commit, buf->tail->used);
+ tty_flip_buffer_commit(buf->tail);
queue_work(system_unbound_wq, &buf->work);
}
EXPORT_SYMBOL(tty_flip_buffer_push);
+/**
+ * tty_insert_flip_string_and_push_buffer - add characters to the tty buffer and
+ * push
+ * @port: tty port
+ * @chars: characters
+ * @size: size
+ *
+ * The function combines tty_insert_flip_string() and tty_flip_buffer_push()
+ * with the exception of properly holding the @port->lock.
+ *
+ * To be used only internally (by pty currently).
+ *
+ * Returns: the number added.
+ */
+int tty_insert_flip_string_and_push_buffer(struct tty_port *port,
+ const unsigned char *chars, size_t size)
+{
+ struct tty_bufhead *buf = &port->buf;
+ unsigned long flags;
+
+ spin_lock_irqsave(&port->lock, flags);
+ size = tty_insert_flip_string(port, chars, size);
+ if (size)
+ tty_flip_buffer_commit(buf->tail);
+ spin_unlock_irqrestore(&port->lock, flags);
+
+ queue_work(system_unbound_wq, &buf->work);
+
+ return size;
+}
+
/**
* tty_buffer_init - prepare a tty buffer structure
* @port: tty port to initialise
unsigned short *p = (unsigned short *) vc->vc_pos;
vc_uniscr_delete(vc, nr);
- scr_memcpyw(p, p + nr, (vc->vc_cols - vc->state.x - nr) * 2);
+ scr_memmovew(p, p + nr, (vc->vc_cols - vc->state.x - nr) * 2);
scr_memsetw(p + vc->vc_cols - vc->state.x - nr, vc->vc_video_erase_char,
nr * 2);
vc->vc_need_wrap = 0;
}
/**
- * ufshcd_utrl_clear - Clear a bit in UTRLCLR register
+ * ufshcd_utrl_clear() - Clear requests from the controller request list.
* @hba: per adapter instance
- * @pos: position of the bit to be cleared
+ * @mask: mask with one bit set for each request to be cleared
*/
-static inline void ufshcd_utrl_clear(struct ufs_hba *hba, u32 pos)
+static inline void ufshcd_utrl_clear(struct ufs_hba *hba, u32 mask)
{
if (hba->quirks & UFSHCI_QUIRK_BROKEN_REQ_LIST_CLR)
- ufshcd_writel(hba, (1 << pos), REG_UTP_TRANSFER_REQ_LIST_CLEAR);
- else
- ufshcd_writel(hba, ~(1 << pos),
- REG_UTP_TRANSFER_REQ_LIST_CLEAR);
+ mask = ~mask;
+ /*
+ * From the UFSHCI specification: "UTP Transfer Request List CLear
+ * Register (UTRLCLR): This field is bit significant. Each bit
+ * corresponds to a slot in the UTP Transfer Request List, where bit 0
+ * corresponds to request slot 0. A bit in this field is set to ‘0’
+ * by host software to indicate to the host controller that a transfer
+ * request slot is cleared. The host controller
+ * shall free up any resources associated to the request slot
+ * immediately, and shall set the associated bit in UTRLDBR to ‘0’. The
+ * host software indicates no change to request slots by setting the
+ * associated bits in this field to ‘1’. Bits in this field shall only
+ * be set ‘1’ or ‘0’ by host software when UTRLRSR is set to ‘1’."
+ */
+ ufshcd_writel(hba, ~mask, REG_UTP_TRANSFER_REQ_LIST_CLEAR);
}
/**
return ufshcd_compose_devman_upiu(hba, lrbp);
}
-static int
-ufshcd_clear_cmd(struct ufs_hba *hba, int tag)
+/*
+ * Clear all the requests from the controller for which a bit has been set in
+ * @mask and wait until the controller confirms that these requests have been
+ * cleared.
+ */
+static int ufshcd_clear_cmds(struct ufs_hba *hba, u32 mask)
{
- int err = 0;
unsigned long flags;
- u32 mask = 1 << tag;
/* clear outstanding transaction before retry */
spin_lock_irqsave(hba->host->host_lock, flags);
- ufshcd_utrl_clear(hba, tag);
+ ufshcd_utrl_clear(hba, mask);
spin_unlock_irqrestore(hba->host->host_lock, flags);
/*
* wait for h/w to clear corresponding bit in door-bell.
* max. wait is 1 sec.
*/
- err = ufshcd_wait_for_register(hba,
- REG_UTP_TRANSFER_REQ_DOOR_BELL,
- mask, ~mask, 1000, 1000);
-
- return err;
+ return ufshcd_wait_for_register(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL,
+ mask, ~mask, 1000, 1000);
}
static int
static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba,
struct ufshcd_lrb *lrbp, int max_timeout)
{
- int err = 0;
- unsigned long time_left;
+ unsigned long time_left = msecs_to_jiffies(max_timeout);
unsigned long flags;
+ bool pending;
+ int err;
+retry:
time_left = wait_for_completion_timeout(hba->dev_cmd.complete,
- msecs_to_jiffies(max_timeout));
+ time_left);
- spin_lock_irqsave(hba->host->host_lock, flags);
- hba->dev_cmd.complete = NULL;
if (likely(time_left)) {
+ /*
+ * The completion handler called complete() and the caller of
+ * this function still owns the @lrbp tag so the code below does
+ * not trigger any race conditions.
+ */
+ hba->dev_cmd.complete = NULL;
err = ufshcd_get_tr_ocs(lrbp);
if (!err)
err = ufshcd_dev_cmd_completion(hba, lrbp);
- }
- spin_unlock_irqrestore(hba->host->host_lock, flags);
-
- if (!time_left) {
+ } else {
err = -ETIMEDOUT;
dev_dbg(hba->dev, "%s: dev_cmd request timedout, tag %d\n",
__func__, lrbp->task_tag);
- if (!ufshcd_clear_cmd(hba, lrbp->task_tag))
+ if (ufshcd_clear_cmds(hba, 1U << lrbp->task_tag) == 0) {
/* successfully cleared the command, retry if needed */
err = -EAGAIN;
- /*
- * in case of an error, after clearing the doorbell,
- * we also need to clear the outstanding_request
- * field in hba
- */
- spin_lock_irqsave(&hba->outstanding_lock, flags);
- __clear_bit(lrbp->task_tag, &hba->outstanding_reqs);
- spin_unlock_irqrestore(&hba->outstanding_lock, flags);
+ /*
+ * Since clearing the command succeeded we also need to
+ * clear the task tag bit from the outstanding_reqs
+ * variable.
+ */
+ spin_lock_irqsave(&hba->outstanding_lock, flags);
+ pending = test_bit(lrbp->task_tag,
+ &hba->outstanding_reqs);
+ if (pending) {
+ hba->dev_cmd.complete = NULL;
+ __clear_bit(lrbp->task_tag,
+ &hba->outstanding_reqs);
+ }
+ spin_unlock_irqrestore(&hba->outstanding_lock, flags);
+
+ if (!pending) {
+ /*
+ * The completion handler ran while we tried to
+ * clear the command.
+ */
+ time_left = 1;
+ goto retry;
+ }
+ } else {
+ dev_err(hba->dev, "%s: failed to clear tag %d\n",
+ __func__, lrbp->task_tag);
+ }
}
return err;
}
hba->dev_info.wb_enabled = enable;
- dev_info(hba->dev, "%s Write Booster %s\n",
+ dev_dbg(hba->dev, "%s Write Booster %s\n",
__func__, enable ? "enabled" : "disabled");
return ret;
}
/**
- * ufshcd_eh_device_reset_handler - device reset handler registered to
- * scsi layer.
+ * ufshcd_eh_device_reset_handler() - Reset a single logical unit.
* @cmd: SCSI command pointer
*
* Returns SUCCESS/FAILED
*/
static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
{
+ unsigned long flags, pending_reqs = 0, not_cleared = 0;
struct Scsi_Host *host;
struct ufs_hba *hba;
u32 pos;
}
/* clear the commands that were pending for corresponding LUN */
- for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs) {
- if (hba->lrb[pos].lun == lun) {
- err = ufshcd_clear_cmd(hba, pos);
- if (err)
- break;
- __ufshcd_transfer_req_compl(hba, 1U << pos);
- }
+ spin_lock_irqsave(&hba->outstanding_lock, flags);
+ for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs)
+ if (hba->lrb[pos].lun == lun)
+ __set_bit(pos, &pending_reqs);
+ hba->outstanding_reqs &= ~pending_reqs;
+ spin_unlock_irqrestore(&hba->outstanding_lock, flags);
+
+ if (ufshcd_clear_cmds(hba, pending_reqs) < 0) {
+ spin_lock_irqsave(&hba->outstanding_lock, flags);
+ not_cleared = pending_reqs &
+ ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
+ hba->outstanding_reqs |= not_cleared;
+ spin_unlock_irqrestore(&hba->outstanding_lock, flags);
+
+ dev_err(hba->dev, "%s: failed to clear requests %#lx\n",
+ __func__, not_cleared);
}
+ __ufshcd_transfer_req_compl(hba, pending_reqs & ~not_cleared);
out:
hba->req_abort_count = 0;
goto out;
}
- err = ufshcd_clear_cmd(hba, tag);
+ err = ufshcd_clear_cmds(hba, 1U << tag);
if (err)
dev_err(hba->dev, "%s: Failed clearing cmd at tag %d, err %d\n",
__func__, tag, err);
hba->silence_err_logs = false;
/* scale up clocks to max frequency before full reinitialization */
- ufshcd_set_clk_freq(hba, true);
+ ufshcd_scale_clks(hba, true);
err = ufshcd_hba_enable(hba);
return ret;
}
+static bool phandle_exists(const struct device_node *np,
+ const char *phandle_name, int index)
+{
+ struct device_node *parse_np = of_parse_phandle(np, phandle_name, index);
+
+ if (parse_np)
+ of_node_put(parse_np);
+
+ return parse_np != NULL;
+}
+
#define MAX_PROP_SIZE 32
static int ufshcd_populate_vreg(struct device *dev, const char *name,
- struct ufs_vreg **out_vreg)
+ struct ufs_vreg **out_vreg)
{
char prop_name[MAX_PROP_SIZE];
struct ufs_vreg *vreg = NULL;
}
snprintf(prop_name, MAX_PROP_SIZE, "%s-supply", name);
- if (!of_parse_phandle(np, prop_name, 0)) {
+ if (!phandle_exists(np, prop_name, 0)) {
dev_info(dev, "%s: Unable to find %s regulator, assuming enabled\n",
__func__, prop_name);
goto out;
}
if (enqd_len + trb_buff_len >= full_len) {
- if (need_zero_pkt)
- zero_len_trb = !zero_len_trb;
-
- field &= ~TRB_CHAIN;
- field |= TRB_IOC;
- more_trbs_coming = false;
- preq->td.last_trb = ring->enqueue;
+ if (need_zero_pkt && !zero_len_trb) {
+ zero_len_trb = true;
+ } else {
+ zero_len_trb = false;
+ field &= ~TRB_CHAIN;
+ field |= TRB_IOC;
+ more_trbs_coming = false;
+ need_zero_pkt = false;
+ preq->td.last_trb = ring->enqueue;
+ }
}
/* Only set interrupt on short packet for OUT endpoints. */
length_field = TRB_LEN(trb_buff_len) | TRB_TD_SIZE(remainder) |
TRB_INTR_TARGET(0);
- cdnsp_queue_trb(pdev, ring, more_trbs_coming | zero_len_trb,
+ cdnsp_queue_trb(pdev, ring, more_trbs_coming,
lower_32_bits(send_addr),
upper_32_bits(send_addr),
length_field,
struct ci_hdrc *ci = req->context;
unsigned long flags;
+ if (req->status < 0)
+ return;
+
if (ci->setaddr) {
hw_usb_set_address(ci, ci->address);
ci->setaddr = false;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!res) {
retval = -EINVAL;
- goto error1;
+ goto error2;
}
hcd->rsrc_start = res->start;
hcd->rsrc_len = resource_size(res);
* This device property is for kernel internal use only and
* is expected to be set by the glue code.
*/
- if (device_property_read_string(dev, "linux,extcon-name", &name) == 0) {
- edev = extcon_get_extcon_dev(name);
- if (!edev)
- return ERR_PTR(-EPROBE_DEFER);
-
- return edev;
- }
+ if (device_property_read_string(dev, "linux,extcon-name", &name) == 0)
+ return extcon_get_extcon_dev(name);
/*
* Try to get an extcon device from the USB PHY controller's "port"
if (i == ARRAY_SIZE(dwc3_ti_rate_table)) {
dev_err(dev, "unsupported usb2_refclk rate: %lu KHz\n", rate);
- ret = -EINVAL;
- goto err_clk_disable;
+ return -EINVAL;
}
data->rate_code = i;
/* Read the syscon property and set the rate code */
ret = phy_syscon_pll_refclk(data);
if (ret)
- goto err_clk_disable;
+ return ret;
/* VBUS divider select */
data->vbus_divider = device_property_read_bool(dev, "ti,vbus-divider");
clk_disable_unprepare(data->usb2_refclk);
pm_runtime_disable(dev);
pm_runtime_set_suspended(dev);
-err_clk_disable:
- clk_put(data->usb2_refclk);
return ret;
}
pm_runtime_disable(dev);
pm_runtime_set_suspended(dev);
- clk_put(data->usb2_refclk);
platform_set_drvdata(pdev, NULL);
return 0;
}
PROPERTY_ENTRY_STRING("dr_mode", "peripheral"),
PROPERTY_ENTRY_BOOL("snps,dis_u2_susphy_quirk"),
PROPERTY_ENTRY_BOOL("linux,phy_charger_detect"),
+ PROPERTY_ENTRY_BOOL("linux,sysdev_is_parent"),
{}
};
struct dwc3 *dwc = dep->dwc;
u32 mdwidth;
int size;
+ int maxpacket;
mdwidth = dwc3_mdwidth(dwc);
else
size = DWC31_GTXFIFOSIZ_TXFDEP(size);
- /* FIFO Depth is in MDWDITH bytes. Multiply */
- size *= mdwidth;
-
/*
- * To meet performance requirement, a minimum TxFIFO size of 3x
- * MaxPacketSize is recommended for endpoints that support burst and a
- * minimum TxFIFO size of 2x MaxPacketSize for endpoints that don't
- * support burst. Use those numbers and we can calculate the max packet
- * limit as below.
+ * maxpacket size is determined as part of the following, after assuming
+ * a mult value of one maxpacket:
+ * DWC3 revision 280A and prior:
+ * fifo_size = mult * (max_packet / mdwidth) + 1;
+ * maxpacket = mdwidth * (fifo_size - 1);
+ *
+ * DWC3 revision 290A and onwards:
+ * fifo_size = mult * ((max_packet + mdwidth)/mdwidth + 1) + 1
+ * maxpacket = mdwidth * ((fifo_size - 1) - 1) - mdwidth;
*/
- if (dwc->maximum_speed >= USB_SPEED_SUPER)
- size /= 3;
+ if (DWC3_VER_IS_PRIOR(DWC3, 290A))
+ maxpacket = mdwidth * (size - 1);
else
- size /= 2;
+ maxpacket = mdwidth * ((size - 1) - 1) - mdwidth;
+ /* Functionally, space for one max packet is sufficient */
+ size = min_t(int, maxpacket, 1024);
usb_ep_set_maxpacket_limit(&dep->endpoint, size);
dep->endpoint.max_streams = 16;
}
evt->count = 0;
- evt->flags &= ~DWC3_EVENT_PENDING;
ret = IRQ_HANDLED;
/* Unmask interrupt */
dwc3_writel(dwc->regs, DWC3_DEV_IMOD(0), dwc->imod_interval);
}
+ /* Keep the clearing of DWC3_EVENT_PENDING at the end */
+ evt->flags &= ~DWC3_EVENT_PENDING;
+
return ret;
}
struct usb_endpoint_descriptor *descs[3];
u8 num;
-
- int status; /* P: epfile->mutex */
};
struct ffs_epfile {
bool use_sg;
struct ffs_data *ffs;
+
+ int status;
+ struct completion done;
};
struct ffs_desc_helper {
static void ffs_epfile_io_complete(struct usb_ep *_ep, struct usb_request *req)
{
+ struct ffs_io_data *io_data = req->context;
+
ENTER();
- if (req->context) {
- struct ffs_ep *ep = _ep->driver_data;
- ep->status = req->status ? req->status : req->actual;
- complete(req->context);
- }
+ if (req->status)
+ io_data->status = req->status;
+ else
+ io_data->status = req->actual;
+
+ complete(&io_data->done);
}
static ssize_t ffs_copy_to_iter(void *data, int data_len, struct iov_iter *iter)
WARN(1, "%s: data_len == -EINVAL\n", __func__);
ret = -EINVAL;
} else if (!io_data->aio) {
- DECLARE_COMPLETION_ONSTACK(done);
bool interrupted = false;
req = ep->req;
io_data->buf = data;
- req->context = &done;
+ init_completion(&io_data->done);
+ req->context = io_data;
req->complete = ffs_epfile_io_complete;
ret = usb_ep_queue(ep->ep, req, GFP_ATOMIC);
spin_unlock_irq(&epfile->ffs->eps_lock);
- if (wait_for_completion_interruptible(&done)) {
+ if (wait_for_completion_interruptible(&io_data->done)) {
+ spin_lock_irq(&epfile->ffs->eps_lock);
+ if (epfile->ep != ep) {
+ ret = -ESHUTDOWN;
+ goto error_lock;
+ }
/*
* To avoid race condition with ffs_epfile_io_complete,
* dequeue the request first then check
* condition with req->complete callback.
*/
usb_ep_dequeue(ep->ep, req);
- wait_for_completion(&done);
- interrupted = ep->status < 0;
+ spin_unlock_irq(&epfile->ffs->eps_lock);
+ wait_for_completion(&io_data->done);
+ interrupted = io_data->status < 0;
}
if (interrupted)
ret = -EINTR;
- else if (io_data->read && ep->status > 0)
- ret = __ffs_epfile_read_data(epfile, data, ep->status,
+ else if (io_data->read && io_data->status > 0)
+ ret = __ffs_epfile_read_data(epfile, data, io_data->status,
&io_data->data);
else
- ret = ep->status;
+ ret = io_data->status;
goto error_mutex;
} else if (!(req = usb_ep_alloc_request(ep->ep, GFP_ATOMIC))) {
ret = -ENOMEM;
dev->qmult = qmult;
snprintf(net->name, sizeof(net->name), "%s%%d", netname);
- if (get_ether_addr(dev_addr, addr))
+ if (get_ether_addr(dev_addr, addr)) {
+ net->addr_assign_type = NET_ADDR_RANDOM;
dev_warn(&g->dev,
"using random %s ethernet address\n", "self");
+ } else {
+ net->addr_assign_type = NET_ADDR_SET;
+ }
eth_hw_addr_set(net, addr);
if (get_ether_addr(host_addr, dev->host_mac))
dev_warn(&g->dev,
eth_random_addr(dev->dev_mac);
pr_warn("using random %s ethernet address\n", "self");
+
+ /* by default we always have a random MAC address */
+ net->addr_assign_type = NET_ADDR_RANDOM;
+
eth_random_addr(dev->host_mac);
pr_warn("using random %s ethernet address\n", "host");
dev = netdev_priv(net);
g = dev->gadget;
- net->addr_assign_type = NET_ADDR_RANDOM;
eth_hw_addr_set(net, dev->dev_mac);
status = register_netdev(net);
if (get_ether_addr(dev_addr, new_addr))
return -EINVAL;
memcpy(dev->dev_mac, new_addr, ETH_ALEN);
+ net->addr_assign_type = NET_ADDR_SET;
return 0;
}
EXPORT_SYMBOL_GPL(gether_set_dev_addr);
const char *page, size_t len) \
{ \
struct f_uvc_opts *opts = to_f_uvc_opts(item); \
+ int size = min(sizeof(opts->aname), len + 1); \
int ret = 0; \
\
mutex_lock(&opts->lock); \
goto end; \
} \
\
- ret = snprintf(opts->aname, min(sizeof(opts->aname), len), \
- "%s", page); \
+ ret = strscpy(opts->aname, page, size); \
+ if (ret == -E2BIG) \
+ ret = size - 1; \
\
end: \
mutex_unlock(&opts->lock); \
uvcg_queue_cancel(queue, 0);
break;
}
+
+ /* Endpoint now owns the request */
+ req = NULL;
video->req_int_count++;
}
#include <linux/ctype.h>
#include <linux/debugfs.h>
#include <linux/delay.h>
+#include <linux/idr.h>
#include <linux/kref.h>
#include <linux/miscdevice.h>
#include <linux/module.h>
/*----------------------------------------------------------------------*/
+static DEFINE_IDA(driver_id_numbers);
+#define DRIVER_DRIVER_NAME_LENGTH_MAX 32
+
#define RAW_EVENT_QUEUE_SIZE 16
struct raw_event_queue {
/* Reference to misc device: */
struct device *dev;
+ /* Make driver names unique */
+ int driver_id_number;
+
/* Protected by lock: */
enum dev_state state;
bool gadget_registered;
spin_lock_init(&dev->lock);
init_completion(&dev->ep0_done);
raw_event_queue_init(&dev->queue);
+ dev->driver_id_number = -1;
return dev;
}
kfree(dev->udc_name);
kfree(dev->driver.udc_name);
+ kfree(dev->driver.driver.name);
+ if (dev->driver_id_number >= 0)
+ ida_free(&driver_id_numbers, dev->driver_id_number);
if (dev->req) {
if (dev->ep0_urb_queued)
usb_ep_dequeue(dev->gadget->ep0, dev->req);
static int raw_ioctl_init(struct raw_dev *dev, unsigned long value)
{
int ret = 0;
+ int driver_id_number;
struct usb_raw_init arg;
char *udc_driver_name;
char *udc_device_name;
+ char *driver_driver_name;
unsigned long flags;
if (copy_from_user(&arg, (void __user *)value, sizeof(arg)))
return -EINVAL;
}
+ driver_id_number = ida_alloc(&driver_id_numbers, GFP_KERNEL);
+ if (driver_id_number < 0)
+ return driver_id_number;
+
+ driver_driver_name = kmalloc(DRIVER_DRIVER_NAME_LENGTH_MAX, GFP_KERNEL);
+ if (!driver_driver_name) {
+ ret = -ENOMEM;
+ goto out_free_driver_id_number;
+ }
+ snprintf(driver_driver_name, DRIVER_DRIVER_NAME_LENGTH_MAX,
+ DRIVER_NAME ".%d", driver_id_number);
+
udc_driver_name = kmalloc(UDC_NAME_LENGTH_MAX, GFP_KERNEL);
- if (!udc_driver_name)
- return -ENOMEM;
+ if (!udc_driver_name) {
+ ret = -ENOMEM;
+ goto out_free_driver_driver_name;
+ }
ret = strscpy(udc_driver_name, &arg.driver_name[0],
UDC_NAME_LENGTH_MAX);
- if (ret < 0) {
- kfree(udc_driver_name);
- return ret;
- }
+ if (ret < 0)
+ goto out_free_udc_driver_name;
ret = 0;
udc_device_name = kmalloc(UDC_NAME_LENGTH_MAX, GFP_KERNEL);
if (!udc_device_name) {
- kfree(udc_driver_name);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out_free_udc_driver_name;
}
ret = strscpy(udc_device_name, &arg.device_name[0],
UDC_NAME_LENGTH_MAX);
- if (ret < 0) {
- kfree(udc_driver_name);
- kfree(udc_device_name);
- return ret;
- }
+ if (ret < 0)
+ goto out_free_udc_device_name;
ret = 0;
spin_lock_irqsave(&dev->lock, flags);
if (dev->state != STATE_DEV_OPENED) {
dev_dbg(dev->dev, "fail, device is not opened\n");
- kfree(udc_driver_name);
- kfree(udc_device_name);
ret = -EINVAL;
goto out_unlock;
}
dev->driver.suspend = gadget_suspend;
dev->driver.resume = gadget_resume;
dev->driver.reset = gadget_reset;
- dev->driver.driver.name = DRIVER_NAME;
+ dev->driver.driver.name = driver_driver_name;
dev->driver.udc_name = udc_device_name;
dev->driver.match_existing_only = 1;
+ dev->driver_id_number = driver_id_number;
dev->state = STATE_DEV_INITIALIZED;
+ spin_unlock_irqrestore(&dev->lock, flags);
+ return ret;
out_unlock:
spin_unlock_irqrestore(&dev->lock, flags);
+out_free_udc_device_name:
+ kfree(udc_device_name);
+out_free_udc_driver_name:
+ kfree(udc_driver_name);
+out_free_driver_driver_name:
+ kfree(driver_driver_name);
+out_free_driver_id_number:
+ ida_free(&driver_id_numbers, driver_id_number);
return ret;
}
}
udc->isp1301_i2c_client = isp1301_get_client(isp1301_node);
+ of_node_put(isp1301_node);
if (!udc->isp1301_i2c_client) {
return -EPROBE_DEFER;
}
return -ENODEV;
}
- res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
- if (!res) {
- dev_err(&pdev->dev,
- "Found HC with no IRQ. Check %s setup!\n",
- dev_name(&pdev->dev));
- return -ENODEV;
- }
- irq = res->start;
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
hcd = __usb_create_hcd(&fsl_ehci_hc_driver, pdev->dev.parent,
&pdev->dev, dev_name(&pdev->dev), NULL);
goto error;
}
+ pdev->dev.of_node = ofdev->dev.of_node;
+ pdev->dev.of_node_reused = true;
+
retval = platform_device_add(pdev);
if (retval)
goto error;
* It will release and re-aquire the lock while calling ACPI
* method.
*/
-static void xhci_set_port_power(struct xhci_hcd *xhci, struct usb_hcd *hcd,
+void xhci_set_port_power(struct xhci_hcd *xhci, struct usb_hcd *hcd,
u16 index, bool on, unsigned long *flags)
__must_hold(&xhci->lock)
{
#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI 0x461e
#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_XHCI 0x464e
#define PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI 0x51ed
+#define PCI_DEVICE_ID_INTEL_RAPTOR_LAKE_XHCI 0xa71e
+#define PCI_DEVICE_ID_INTEL_METEOR_LAKE_XHCI 0x7ec0
#define PCI_DEVICE_ID_AMD_RENOIR_XHCI 0x1639
#define PCI_DEVICE_ID_AMD_PROMONTORYA_4 0x43b9
pdev->device == PCI_DEVICE_ID_INTEL_MAPLE_RIDGE_XHCI ||
pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_XHCI ||
pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_N_XHCI ||
- pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI))
+ pdev->device == PCI_DEVICE_ID_INTEL_ALDER_LAKE_PCH_XHCI ||
+ pdev->device == PCI_DEVICE_ID_INTEL_RAPTOR_LAKE_XHCI ||
+ pdev->device == PCI_DEVICE_ID_INTEL_METEOR_LAKE_XHCI))
xhci->quirks |= XHCI_DEFAULT_PM_RUNTIME_ALLOW;
if (pdev->vendor == PCI_VENDOR_ID_ETRON &&
static int xhci_run_finished(struct xhci_hcd *xhci)
{
+ unsigned long flags;
+ u32 temp;
+
+ /*
+ * Enable interrupts before starting the host (xhci 4.2 and 5.5.2).
+ * Protect the short window before host is running with a lock
+ */
+ spin_lock_irqsave(&xhci->lock, flags);
+
+ xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Enable interrupts");
+ temp = readl(&xhci->op_regs->command);
+ temp |= (CMD_EIE);
+ writel(temp, &xhci->op_regs->command);
+
+ xhci_dbg_trace(xhci, trace_xhci_dbg_init, "Enable primary interrupter");
+ temp = readl(&xhci->ir_set->irq_pending);
+ writel(ER_IRQ_ENABLE(temp), &xhci->ir_set->irq_pending);
+
if (xhci_start(xhci)) {
xhci_halt(xhci);
+ spin_unlock_irqrestore(&xhci->lock, flags);
return -ENODEV;
}
+
xhci->cmd_ring_state = CMD_RING_STATE_RUNNING;
if (xhci->quirks & XHCI_NEC_HOST)
xhci_ring_cmd_db(xhci);
+ spin_unlock_irqrestore(&xhci->lock, flags);
+
return 0;
}
temp |= (xhci->imod_interval / 250) & ER_IRQ_INTERVAL_MASK;
writel(temp, &xhci->ir_set->irq_control);
- /* Set the HCD state before we enable the irqs */
- temp = readl(&xhci->op_regs->command);
- temp |= (CMD_EIE);
- xhci_dbg_trace(xhci, trace_xhci_dbg_init,
- "// Enable interrupts, cmd = 0x%x.", temp);
- writel(temp, &xhci->op_regs->command);
-
- temp = readl(&xhci->ir_set->irq_pending);
- xhci_dbg_trace(xhci, trace_xhci_dbg_init,
- "// Enabling event ring interrupter %p by writing 0x%x to irq_pending",
- xhci->ir_set, (unsigned int) ER_IRQ_ENABLE(temp));
- writel(ER_IRQ_ENABLE(temp), &xhci->ir_set->irq_pending);
-
if (xhci->quirks & XHCI_NEC_HOST) {
struct xhci_command *command;
void xhci_shutdown(struct usb_hcd *hcd)
{
struct xhci_hcd *xhci = hcd_to_xhci(hcd);
+ unsigned long flags;
+ int i;
if (xhci->quirks & XHCI_SPURIOUS_REBOOT)
usb_disable_xhci_ports(to_pci_dev(hcd->self.sysdev));
del_timer_sync(&xhci->shared_hcd->rh_timer);
}
- spin_lock_irq(&xhci->lock);
+ spin_lock_irqsave(&xhci->lock, flags);
xhci_halt(xhci);
+
+ /* Power off USB2 ports*/
+ for (i = 0; i < xhci->usb2_rhub.num_ports; i++)
+ xhci_set_port_power(xhci, xhci->main_hcd, i, false, &flags);
+
+ /* Power off USB3 ports*/
+ for (i = 0; i < xhci->usb3_rhub.num_ports; i++)
+ xhci_set_port_power(xhci, xhci->shared_hcd, i, false, &flags);
+
/* Workaround for spurious wakeups at shutdown with HSW */
if (xhci->quirks & XHCI_SPURIOUS_WAKEUP)
xhci_reset(xhci, XHCI_RESET_SHORT_USEC);
- spin_unlock_irq(&xhci->lock);
+ spin_unlock_irqrestore(&xhci->lock, flags);
xhci_cleanup_msix(xhci);
{
u32 command, temp = 0;
struct usb_hcd *hcd = xhci_to_hcd(xhci);
- struct usb_hcd *secondary_hcd;
int retval = 0;
bool comp_timer_running = false;
bool pending_portevent = false;
* first with the primary HCD, and then with the secondary HCD.
* If we don't do the same, the host will never be started.
*/
- if (!usb_hcd_is_primary_hcd(hcd))
- secondary_hcd = hcd;
- else
- secondary_hcd = xhci->shared_hcd;
-
xhci_dbg(xhci, "Initialize the xhci_hcd\n");
- retval = xhci_init(hcd->primary_hcd);
+ retval = xhci_init(hcd);
if (retval)
return retval;
comp_timer_running = true;
xhci_dbg(xhci, "Start the primary HCD\n");
- retval = xhci_run(hcd->primary_hcd);
- if (!retval && secondary_hcd) {
+ retval = xhci_run(hcd);
+ if (!retval && xhci->shared_hcd) {
xhci_dbg(xhci, "Start the secondary HCD\n");
- retval = xhci_run(secondary_hcd);
+ retval = xhci_run(xhci->shared_hcd);
}
+
hcd->state = HC_STATE_SUSPENDED;
if (xhci->shared_hcd)
xhci->shared_hcd->state = HC_STATE_SUSPENDED;
int xhci_hub_status_data(struct usb_hcd *hcd, char *buf);
int xhci_find_raw_port_number(struct usb_hcd *hcd, int port1);
struct xhci_hub *xhci_get_rhub(struct usb_hcd *hcd);
+void xhci_set_port_power(struct xhci_hcd *xhci, struct usb_hcd *hcd, u16 index,
+ bool on, unsigned long *flags);
void xhci_hc_died(struct xhci_hcd *xhci);
{ USB_DEVICE(FTDI_VID, CHETCO_SEASMART_DISPLAY_PID) },
{ USB_DEVICE(FTDI_VID, CHETCO_SEASMART_LITE_PID) },
{ USB_DEVICE(FTDI_VID, CHETCO_SEASMART_ANALOG_PID) },
+ /* Belimo Automation devices */
+ { USB_DEVICE(FTDI_VID, BELIMO_ZTH_PID) },
+ { USB_DEVICE(FTDI_VID, BELIMO_ZIP_PID) },
/* ICP DAS I-756xU devices */
{ USB_DEVICE(ICPDAS_VID, ICPDAS_I7560U_PID) },
{ USB_DEVICE(ICPDAS_VID, ICPDAS_I7561U_PID) },
#define CHETCO_SEASMART_LITE_PID 0xA5AE /* SeaSmart Lite USB Adapter */
#define CHETCO_SEASMART_ANALOG_PID 0xA5AF /* SeaSmart Analog Adapter */
+/*
+ * Belimo Automation
+ */
+#define BELIMO_ZTH_PID 0x8050
+#define BELIMO_ZIP_PID 0xC811
+
/*
* Unjo AB
*/
{ USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_8S) },
{ USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_416) },
{ USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_416B) },
+ { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_E5805A) },
{ }
};
{ USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_8S) },
{ USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_416) },
{ USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_TI_EDGEPORT_416B) },
+ { USB_DEVICE(USB_VENDOR_ID_ION, ION_DEVICE_ID_E5805A) },
{ }
};
//
// Definitions for other product IDs
#define ION_DEVICE_ID_MT4X56USB 0x1403 // OEM device
+#define ION_DEVICE_ID_E5805A 0x1A01 // OEM device (rebranded Edgeport/4)
#define GENERATION_ID_FROM_USB_PRODUCT_ID(ProductId) \
#define QUECTEL_PRODUCT_EG95 0x0195
#define QUECTEL_PRODUCT_BG96 0x0296
#define QUECTEL_PRODUCT_EP06 0x0306
+#define QUECTEL_PRODUCT_EM05G 0x030a
#define QUECTEL_PRODUCT_EM12 0x0512
#define QUECTEL_PRODUCT_RM500Q 0x0800
#define QUECTEL_PRODUCT_EC200S_CN 0x6002
#define QUECTEL_PRODUCT_EC200T 0x6026
+#define QUECTEL_PRODUCT_RM500K 0x7001
#define CMOTECH_VENDOR_ID 0x16d8
#define CMOTECH_PRODUCT_6001 0x6001
#define CINTERION_PRODUCT_CLS8 0x00b0
#define CINTERION_PRODUCT_MV31_MBIM 0x00b3
#define CINTERION_PRODUCT_MV31_RMNET 0x00b7
+#define CINTERION_PRODUCT_MV31_2_MBIM 0x00b8
+#define CINTERION_PRODUCT_MV31_2_RMNET 0x00b9
#define CINTERION_PRODUCT_MV32_WA 0x00f1
#define CINTERION_PRODUCT_MV32_WB 0x00f2
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0xff, 0xff),
.driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EP06, 0xff, 0, 0) },
+ { USB_DEVICE_INTERFACE_CLASS(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM05G, 0xff),
+ .driver_info = RSVD(6) | ZLP },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0xff, 0xff),
.driver_info = RSVD(1) | RSVD(2) | RSVD(3) | RSVD(4) | NUMEP2 },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EM12, 0xff, 0, 0) },
.driver_info = ZLP },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200S_CN, 0xff, 0, 0) },
{ USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_EC200T, 0xff, 0, 0) },
+ { USB_DEVICE_AND_INTERFACE_INFO(QUECTEL_VENDOR_ID, QUECTEL_PRODUCT_RM500K, 0xff, 0x00, 0x00) },
{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_6001) },
{ USB_DEVICE(CMOTECH_VENDOR_ID, CMOTECH_PRODUCT_CMU_300) },
.driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
{ USB_DEVICE_INTERFACE_CLASS(TELIT_VENDOR_ID, 0x1231, 0xff), /* Telit LE910Cx (RNDIS) */
.driver_info = NCTRL(2) | RSVD(3) },
+ { USB_DEVICE_AND_INTERFACE_INFO(TELIT_VENDOR_ID, 0x1250, 0xff, 0x00, 0x00) }, /* Telit LE910Cx (rmnet) */
{ USB_DEVICE(TELIT_VENDOR_ID, 0x1260),
.driver_info = NCTRL(0) | RSVD(1) | RSVD(2) },
{ USB_DEVICE(TELIT_VENDOR_ID, 0x1261),
.driver_info = RSVD(3)},
{ USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_RMNET, 0xff),
.driver_info = RSVD(0)},
+ { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_2_MBIM, 0xff),
+ .driver_info = RSVD(3)},
+ { USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV31_2_RMNET, 0xff),
+ .driver_info = RSVD(0)},
{ USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WA, 0xff),
.driver_info = RSVD(3)},
{ USB_DEVICE_INTERFACE_CLASS(CINTERION_VENDOR_ID, CINTERION_PRODUCT_MV32_WB, 0xff),
break;
case 0x200:
switch (bcdDevice) {
- case 0x100:
+ case 0x100: /* GC */
case 0x105:
+ return TYPE_HXN;
+ case 0x300: /* GT / TA */
+ if (pl2303_supports_hx_status(serial))
+ return TYPE_TA;
+ fallthrough;
case 0x305:
+ case 0x400: /* GL */
case 0x405:
+ return TYPE_HXN;
+ case 0x500: /* GE / TB */
+ if (pl2303_supports_hx_status(serial))
+ return TYPE_TB;
+ fallthrough;
+ case 0x505:
+ case 0x600: /* GS */
case 0x605:
- /*
- * Assume it's an HXN-type if the device doesn't
- * support the old read request value.
- */
- if (!pl2303_supports_hx_status(serial))
- return TYPE_HXN;
- break;
- case 0x300:
- return TYPE_TA;
- case 0x500:
- return TYPE_TB;
+ case 0x700: /* GR */
+ case 0x705:
+ return TYPE_HXN;
}
break;
}
partner->usb_pd = 1;
sysfs_notify(&partner_dev->kobj, NULL,
"supports_usb_power_delivery");
+ kobject_uevent(&partner_dev->kobj, KOBJ_CHANGE);
}
put_device(partner_dev);
}
tristate "Intel WhiskeyCove PMIC USB Type-C PHY driver"
depends on ACPI
depends on MFD_INTEL_PMC_BXT
- depends on INTEL_SOC_PMIC
depends on BXT_WC_PMIC_OPREGION
help
This driver adds support for USB Type-C on Intel Broxton platforms
/* Resources for implementing the notification channel from the device
* to the driver. fwqp is the firmware end of an RC connection; the
- * other end is vqqp used by the driver. cq is is where completions are
+ * other end is vqqp used by the driver. cq is where completions are
* reported.
*/
struct mlx5_vdpa_cq cq;
id = mlx5vdpa16_to_cpu(mvdev, vlan);
mac_vlan_del(ndev, ndev->config.mac, id, true);
+ status = VIRTIO_NET_OK;
break;
default:
- break;
-}
+ break;
+ }
-return status;
+ return status;
}
static void mlx5_cvq_kick_handler(struct work_struct *work)
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
ndev->event_cbs[idx] = *cb;
+ if (is_ctrl_vq_idx(mvdev, idx))
+ mvdev->cvq.event_cb = *cb;
}
static void mlx5_cvq_notify(struct vringh *vring)
static int setup_virtqueues(struct mlx5_vdpa_dev *mvdev)
{
struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
- struct mlx5_control_vq *cvq = &mvdev->cvq;
int err;
int i;
goto err_vq;
}
- if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ)) {
- err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
- MLX5_CVQ_MAX_ENT, false,
- (struct vring_desc *)(uintptr_t)cvq->desc_addr,
- (struct vring_avail *)(uintptr_t)cvq->driver_addr,
- (struct vring_used *)(uintptr_t)cvq->device_addr);
- if (err)
- goto err_vq;
- }
-
return 0;
err_vq:
ndev->mvdev.cvq.ready = false;
}
+static int setup_cvq_vring(struct mlx5_vdpa_dev *mvdev)
+{
+ struct mlx5_control_vq *cvq = &mvdev->cvq;
+ int err = 0;
+
+ if (mvdev->actual_features & BIT_ULL(VIRTIO_NET_F_CTRL_VQ))
+ err = vringh_init_iotlb(&cvq->vring, mvdev->actual_features,
+ MLX5_CVQ_MAX_ENT, false,
+ (struct vring_desc *)(uintptr_t)cvq->desc_addr,
+ (struct vring_avail *)(uintptr_t)cvq->driver_addr,
+ (struct vring_used *)(uintptr_t)cvq->device_addr);
+
+ return err;
+}
+
static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
{
struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
if ((status ^ ndev->mvdev.status) & VIRTIO_CONFIG_S_DRIVER_OK) {
if (status & VIRTIO_CONFIG_S_DRIVER_OK) {
+ err = setup_cvq_vring(mvdev);
+ if (err) {
+ mlx5_vdpa_warn(mvdev, "failed to setup control VQ vring\n");
+ goto err_setup;
+ }
err = setup_driver(mvdev);
if (err) {
mlx5_vdpa_warn(mvdev, "failed to setup driver\n");
dev->minor = ret;
dev->msg_timeout = VDUSE_MSG_DEFAULT_TIMEOUT;
- dev->dev = device_create(vduse_class, NULL,
- MKDEV(MAJOR(vduse_major), dev->minor),
- dev, "%s", config->name);
+ dev->dev = device_create_with_groups(vduse_class, NULL,
+ MKDEV(MAJOR(vduse_major), dev->minor),
+ dev, vduse_dev_groups, "%s", config->name);
if (IS_ERR(dev->dev)) {
ret = PTR_ERR(dev->dev);
goto err_dev;
return kasprintf(GFP_KERNEL, "vduse/%s", dev_name(dev));
}
-static void vduse_mgmtdev_release(struct device *dev)
-{
-}
-
-static struct device vduse_mgmtdev = {
- .init_name = "vduse",
- .release = vduse_mgmtdev_release,
+struct vduse_mgmt_dev {
+ struct vdpa_mgmt_dev mgmt_dev;
+ struct device dev;
};
-static struct vdpa_mgmt_dev mgmt_dev;
+static struct vduse_mgmt_dev *vduse_mgmt;
static int vduse_dev_init_vdpa(struct vduse_dev *dev, const char *name)
{
}
set_dma_ops(&vdev->vdpa.dev, &vduse_dev_dma_ops);
vdev->vdpa.dma_dev = &vdev->vdpa.dev;
- vdev->vdpa.mdev = &mgmt_dev;
+ vdev->vdpa.mdev = &vduse_mgmt->mgmt_dev;
return 0;
}
{ 0 },
};
-static struct vdpa_mgmt_dev mgmt_dev = {
- .device = &vduse_mgmtdev,
- .id_table = id_table,
- .ops = &vdpa_dev_mgmtdev_ops,
-};
+static void vduse_mgmtdev_release(struct device *dev)
+{
+ struct vduse_mgmt_dev *mgmt_dev;
+
+ mgmt_dev = container_of(dev, struct vduse_mgmt_dev, dev);
+ kfree(mgmt_dev);
+}
static int vduse_mgmtdev_init(void)
{
int ret;
- ret = device_register(&vduse_mgmtdev);
- if (ret)
+ vduse_mgmt = kzalloc(sizeof(*vduse_mgmt), GFP_KERNEL);
+ if (!vduse_mgmt)
+ return -ENOMEM;
+
+ ret = dev_set_name(&vduse_mgmt->dev, "vduse");
+ if (ret) {
+ kfree(vduse_mgmt);
return ret;
+ }
- ret = vdpa_mgmtdev_register(&mgmt_dev);
+ vduse_mgmt->dev.release = vduse_mgmtdev_release;
+
+ ret = device_register(&vduse_mgmt->dev);
if (ret)
- goto err;
+ goto dev_reg_err;
- return 0;
-err:
- device_unregister(&vduse_mgmtdev);
+ vduse_mgmt->mgmt_dev.id_table = id_table;
+ vduse_mgmt->mgmt_dev.ops = &vdpa_dev_mgmtdev_ops;
+ vduse_mgmt->mgmt_dev.device = &vduse_mgmt->dev;
+ ret = vdpa_mgmtdev_register(&vduse_mgmt->mgmt_dev);
+ if (ret)
+ device_unregister(&vduse_mgmt->dev);
+
+ return ret;
+
+dev_reg_err:
+ put_device(&vduse_mgmt->dev);
return ret;
}
static void vduse_mgmtdev_exit(void)
{
- vdpa_mgmtdev_unregister(&mgmt_dev);
- device_unregister(&vduse_mgmtdev);
+ vdpa_mgmtdev_unregister(&vduse_mgmt->mgmt_dev);
+ device_unregister(&vduse_mgmt->dev);
}
static int vduse_init(void)
return PTR_ERR(vduse_class);
vduse_class->devnode = vduse_devnode;
- vduse_class->dev_groups = vduse_dev_groups;
ret = alloc_chrdev_region(&vduse_major, 0, VDUSE_DEV_MAX, "vduse");
if (ret)
if (!iommu_group)
return ERR_PTR(-EINVAL);
+ /*
+ * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
+ * restore cache coherency. It has to be checked here because it is only
+ * valid for cases where we are using iommu groups.
+ */
+ if (!iommu_capable(dev->bus, IOMMU_CAP_CACHE_COHERENCY)) {
+ iommu_group_put(iommu_group);
+ return ERR_PTR(-EINVAL);
+ }
+
group = vfio_group_get_from_iommu(iommu_group);
if (!group)
group = vfio_create_group(iommu_group, VFIO_IOMMU);
int vfio_register_group_dev(struct vfio_device *device)
{
- /*
- * VFIO always sets IOMMU_CACHE because we offer no way for userspace to
- * restore cache coherency.
- */
- if (!iommu_capable(device->dev->bus, IOMMU_CAP_CACHE_COHERENCY))
- return -EINVAL;
-
return __vfio_register_dev(device,
vfio_group_find_or_alloc(device->dev));
}
ops->set_vq_ready(vdpa, idx, s.num);
return 0;
case VHOST_VDPA_GET_VRING_GROUP:
+ if (!ops->get_vq_group)
+ return -EOPNOTSUPP;
s.index = idx;
s.num = ops->get_vq_group(vdpa, idx);
if (s.num >= vdpa->ngroups)
vhost_dev_stop(&v->vdev);
vhost_vdpa_free_domain(v);
vhost_vdpa_config_put(v);
- vhost_dev_cleanup(&v->vdev);
+ vhost_vdpa_cleanup(v);
mutex_unlock(&d->mutex);
atomic_dec(&v->opened);
int (*copy)(const struct vringh *vrh,
void *dst, const void *src, size_t len))
{
- int err, count = 0, up_next, desc_max;
+ int err, count = 0, indirect_count = 0, up_next, desc_max;
struct vring_desc desc, *descs;
struct vringh_range range = { -1ULL, 0 }, slowrange;
bool slow = false;
continue;
}
- if (count++ == vrh->vring.num) {
+ if (up_next == -1)
+ count++;
+ else
+ indirect_count++;
+
+ if (count > vrh->vring.num || indirect_count > desc_max) {
vringh_bad("Descriptor loop in %p", descs);
err = -ELOOP;
goto fail;
i = return_from_indirect(vrh, &up_next,
&descs, &desc_max);
slow = false;
+ indirect_count = 0;
} else
break;
}
return ret;
}
+#if defined(CONFIG_FB_STI)
/* check if given fb_info is the primary device */
int fb_is_primary_device(struct fb_info *info)
{
return (sti->info == info);
}
EXPORT_SYMBOL(fb_is_primary_device);
+#endif
MODULE_AUTHOR("Philipp Rumpf, Helge Deller, Thomas Bogendoerfer");
MODULE_DESCRIPTION("Core STI driver for HP's NGLE series graphics cards in HP PARISC machines");
/* Blank the LCD */
au1100fb_fb_blank(VESA_POWERDOWN, &fbdev->info);
- if (fbdev->lcdclk)
- clk_disable(fbdev->lcdclk);
+ clk_disable(fbdev->lcdclk);
memcpy(&fbregs, fbdev->regs, sizeof(struct au1100fb_regs));
memcpy(fbdev->regs, &fbregs, sizeof(struct au1100fb_regs));
- if (fbdev->lcdclk)
- clk_enable(fbdev->lcdclk);
+ clk_enable(fbdev->lcdclk);
/* Unblank the LCD */
au1100fb_fb_blank(VESA_NO_BLANKING, &fbdev->info);
.id_table = cirrusfb_pci_table,
.probe = cirrusfb_pci_register,
.remove = cirrusfb_pci_unregister,
-#ifdef CONFIG_PM
-#if 0
- .suspend = cirrusfb_pci_suspend,
- .resume = cirrusfb_pci_resume,
-#endif
-#endif
};
#endif /* CONFIG_PCI */
if (charcount != 256 && charcount != 512)
return -EINVAL;
+ /* font bigger than screen resolution ? */
+ if (w > FBCON_SWAP(info->var.rotate, info->var.xres, info->var.yres) ||
+ h > FBCON_SWAP(info->var.rotate, info->var.yres, info->var.xres))
+ return -EINVAL;
+
/* Make sure drawing engine can handle the font */
if (!(info->pixmap.blit_x & (1 << (font->width - 1))) ||
!(info->pixmap.blit_y & (1 << (font->height - 1))))
}
EXPORT_SYMBOL(fbcon_update_vcs);
+/* let fbcon check if it supports a new screen resolution */
+int fbcon_modechange_possible(struct fb_info *info, struct fb_var_screeninfo *var)
+{
+ struct fbcon_ops *ops = info->fbcon_par;
+ struct vc_data *vc;
+ unsigned int i;
+
+ WARN_CONSOLE_UNLOCKED();
+
+ if (!ops)
+ return 0;
+
+ /* prevent setting a screen size which is smaller than font size */
+ for (i = first_fb_vc; i <= last_fb_vc; i++) {
+ vc = vc_cons[i].d;
+ if (!vc || vc->vc_mode != KD_TEXT ||
+ fbcon_info_from_console(i) != info)
+ continue;
+
+ if (vc->vc_font.width > FBCON_SWAP(var->rotate, var->xres, var->yres) ||
+ vc->vc_font.height > FBCON_SWAP(var->rotate, var->yres, var->xres))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(fbcon_modechange_possible);
+
int fbcon_mode_deleted(struct fb_info *info,
struct fb_videomode *mode)
{
#include <linux/kernel.h>
#include <linux/major.h>
#include <linux/slab.h>
+#include <linux/sysfb.h>
#include <linux/mm.h>
#include <linux/mman.h>
#include <linux/vt.h>
while (n && (n * (logo->width + 8) - 8 > xres))
--n;
- image.dx = (xres - n * (logo->width + 8) - 8) / 2;
+ image.dx = (xres - (n * (logo->width + 8) - 8)) / 2;
image.dy = y ?: (yres - logo->height) / 2;
} else {
image.dx = 0;
if (ret)
return ret;
+ /* verify that virtual resolution >= physical resolution */
+ if (var->xres_virtual < var->xres ||
+ var->yres_virtual < var->yres) {
+ pr_warn("WARNING: fbcon: Driver '%s' missed to adjust virtual screen size (%ux%u vs. %ux%u)\n",
+ info->fix.id,
+ var->xres_virtual, var->yres_virtual,
+ var->xres, var->yres);
+ return -EINVAL;
+ }
+
if ((var->activate & FB_ACTIVATE_MASK) != FB_ACTIVATE_NOW)
return 0;
return -EFAULT;
console_lock();
lock_fb_info(info);
- ret = fb_set_var(info, &var);
+ ret = fbcon_modechange_possible(info, &var);
+ if (!ret)
+ ret = fb_set_var(info, &var);
if (!ret)
fbcon_update_vcs(info, var.activate & FB_ACTIVATE_ALL);
unlock_fb_info(info);
do_free = true;
}
+ /*
+ * If a driver asked to unregister a platform device registered by
+ * sysfb, then can be assumed that this is a driver for a display
+ * that is set up by the system firmware and has a generic driver.
+ *
+ * Drivers for devices that don't have a generic driver will never
+ * ask for this, so let's assume that a real driver for the display
+ * was already probed and prevent sysfb to register devices later.
+ */
+ sysfb_disable();
+
mutex_lock(®istration_lock);
do_remove_conflicting_framebuffers(a, name, primary);
mutex_unlock(®istration_lock);
struct fb_info *info;
struct intelfb_info *dinfo;
int i, err, dvo;
- int aperture_size, stolen_size;
+ int aperture_size, stolen_size = 0;
struct agp_kern_info gtt_info;
int agp_memtype;
const char *s;
return -ENODEV;
}
- if (intelfbhw_get_memory(pdev, &aperture_size,&stolen_size)) {
+ if (intelfbhw_get_memory(pdev, &aperture_size, &stolen_size)) {
cleanup(dinfo);
return -ENODEV;
}
case PCI_DEVICE_ID_INTEL_945GME:
case PCI_DEVICE_ID_INTEL_965G:
case PCI_DEVICE_ID_INTEL_965GM:
- /* 915, 945 and 965 chipsets support a 256MB aperture.
- Aperture size is determined by inspected the
- base address of the aperture. */
- if (pci_resource_start(pdev, 2) & 0x08000000)
- *aperture_size = MB(128);
- else
- *aperture_size = MB(256);
+ /*
+ * 915, 945 and 965 chipsets support 64MB, 128MB or 256MB
+ * aperture. Determine size from PCI resource length.
+ */
+ *aperture_size = pci_resource_len(pdev, 2);
break;
default:
if ((tmp & INTEL_GMCH_MEM_MASK) == INTEL_GMCH_MEM_64M)
int bus_pick_count, bus_pick_width;
/*
- * We set explicitly the the bus_pick_count as well, although
+ * We set explicitly the bus_pick_count as well, although
* with remapping/reordering disabled it will be calculated by HW
* as (32 / bus_pick_width).
*/
/*
* In OMAP5+, the HFBITCLK must be divided by 2 before issuing the
* HDMI_PHYPWRCMD_LDOON command.
- */
+ */
if (phy_feat->bist_ctrl)
REG_FLD_MOD(phy->base, HDMI_TXPHY_BIST_CONTROL, 1, 11, 11);
struct pxa3xx_gcu_batch *buffer;
struct pxa3xx_gcu_priv *priv = to_pxa3xx_gcu_priv(file);
- int words = count / 4;
+ size_t words = count / 4;
/* Does not need to be atomic. There's a lock in user space,
* but anyhow, this is just for statistics. */
if (IS_ERR(clock)) {
if (PTR_ERR(clock) == -EPROBE_DEFER) {
while (--i >= 0) {
- if (par->clks[i])
- clk_put(par->clks[i]);
+ clk_put(par->clks[i]);
}
kfree(par->clks);
return -EPROBE_DEFER;
/*
* Modern graphical hardware not only supports pipelines but some
- * also support multiple monitors where each display can have its
+ * also support multiple monitors where each display can have
* its own unique data. In this case each display could be
* represented by a separate framebuffer device thus a separate
* struct fb_info. Now the struct xxx_par represents the graphics
*
* See Documentation/driver-api/pm/devices.rst for more information
*/
-static int xxxfb_suspend(struct pci_dev *dev, pm_message_t msg)
+static int xxxfb_suspend(struct device *dev)
{
- struct fb_info *info = pci_get_drvdata(dev);
+ struct fb_info *info = dev_get_drvdata(dev);
struct xxxfb_par *par = info->par;
/* suspend here */
*
* See Documentation/driver-api/pm/devices.rst for more information
*/
-static int xxxfb_resume(struct pci_dev *dev)
+static int xxxfb_resume(struct device *dev)
{
- struct fb_info *info = pci_get_drvdata(dev);
+ struct fb_info *info = dev_get_drvdata(dev);
struct xxxfb_par *par = info->par;
/* resume here */
{ 0, }
};
+static SIMPLE_DEV_PM_OPS(xxxfb_pm_ops, xxxfb_suspend, xxxfb_resume);
+
/* For PCI drivers */
static struct pci_driver xxxfb_driver = {
.name = "xxxfb",
.id_table = xxxfb_id_table,
.probe = xxxfb_probe,
.remove = xxxfb_remove,
- .suspend = xxxfb_suspend, /* optional but recommended */
- .resume = xxxfb_resume, /* optional but recommended */
+ .driver.pm = xxxfb_pm_ops, /* optional but recommended */
};
MODULE_DEVICE_TABLE(pci, xxxfb_id_table);
struct device *dev = &pdev->dev;
struct snp_guest_dev *snp_dev;
struct miscdevice *misc;
+ void __iomem *mapping;
int ret;
if (!dev->platform_data)
return -ENODEV;
data = (struct sev_guest_platform_data *)dev->platform_data;
- layout = (__force void *)ioremap_encrypted(data->secrets_gpa, PAGE_SIZE);
- if (!layout)
+ mapping = ioremap_encrypted(data->secrets_gpa, PAGE_SIZE);
+ if (!mapping)
return -ENODEV;
+ layout = (__force void *)mapping;
+
ret = -ENOMEM;
snp_dev = devm_kzalloc(&pdev->dev, sizeof(struct snp_guest_dev), GFP_KERNEL);
if (!snp_dev)
e_free_request:
free_shared_pages(snp_dev->request, sizeof(struct snp_guest_msg));
e_unmap:
- iounmap(layout);
+ iounmap(mapping);
return ret;
}
bus, such as CONFIG_VIRTIO_PCI, CONFIG_VIRTIO_MMIO, CONFIG_RPMSG
or CONFIG_S390_GUEST.
-config ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS
- bool
- help
- This option is selected if the architecture may need to enforce
- VIRTIO_F_ACCESS_PLATFORM
-
config VIRTIO_PCI_LIB
tristate
help
if VIRTIO_MENU
+config VIRTIO_HARDEN_NOTIFICATION
+ bool "Harden virtio notification"
+ help
+ Enable this to harden the device notifications and suppress
+ those that happen at a time where notifications are illegal.
+
+ Experimental: Note that several drivers still have bugs that
+ may cause crashes or hangs when correct handling of
+ notifications is enforced; depending on the subset of
+ drivers and devices you use, this may or may not work.
+
+ If unsure, say N.
+
config VIRTIO_PCI
tristate "PCI driver for virtio devices"
depends on PCI
#include <linux/module.h>
#include <linux/idr.h>
#include <linux/of.h>
+#include <linux/platform-feature.h>
#include <uapi/linux/virtio_ids.h>
/* Unique numbering for virtio devices. */
static int virtio_features_ok(struct virtio_device *dev)
{
unsigned int status;
- int ret;
might_sleep();
- ret = arch_has_restricted_virtio_memory_access();
- if (ret) {
+ if (platform_has(PLATFORM_VIRTIO_RESTRICTED_MEM_ACCESS)) {
if (!virtio_has_feature(dev, VIRTIO_F_VERSION_1)) {
dev_warn(&dev->dev,
"device must provide VIRTIO_F_VERSION_1\n");
* */
void virtio_reset_device(struct virtio_device *dev)
{
+#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
/*
* The below virtio_synchronize_cbs() guarantees that any
* interrupt for this line arriving after
*/
virtio_break_device(dev);
virtio_synchronize_cbs(dev);
+#endif
dev->config->reset(dev);
}
#include <linux/list.h>
#include <linux/module.h>
#include <linux/platform_device.h>
+#include <linux/pm.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/virtio.h>
/*
* Per memory-barriers.txt, wmb() is not needed to guarantee
- * that the the cache coherent memory writes have completed
+ * that the cache coherent memory writes have completed
* before writing to the MMIO region.
*/
writel(status, vm_dev->base + VIRTIO_MMIO_STATUS);
.synchronize_cbs = vm_synchronize_cbs,
};
+#ifdef CONFIG_PM_SLEEP
+static int virtio_mmio_freeze(struct device *dev)
+{
+ struct virtio_mmio_device *vm_dev = dev_get_drvdata(dev);
+
+ return virtio_device_freeze(&vm_dev->vdev);
+}
+
+static int virtio_mmio_restore(struct device *dev)
+{
+ struct virtio_mmio_device *vm_dev = dev_get_drvdata(dev);
+
+ if (vm_dev->version == 1)
+ writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_GUEST_PAGE_SIZE);
+
+ return virtio_device_restore(&vm_dev->vdev);
+}
+
+static const struct dev_pm_ops virtio_mmio_pm_ops = {
+ SET_SYSTEM_SLEEP_PM_OPS(virtio_mmio_freeze, virtio_mmio_restore)
+};
+#endif
static void virtio_mmio_release_dev(struct device *_d)
{
if (!vm_cmdline_parent_registered) {
err = device_register(&vm_cmdline_parent);
if (err) {
+ put_device(&vm_cmdline_parent);
pr_err("Failed to register parent device!\n");
return err;
}
.name = "virtio-mmio",
.of_match_table = virtio_mmio_match,
.acpi_match_table = ACPI_PTR(virtio_mmio_acpi_match),
+#ifdef CONFIG_PM_SLEEP
+ .pm = &virtio_mmio_pm_ops,
+#endif
},
};
check_offsets();
- mdev->pci_dev = pci_dev;
-
/* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */
if (pci_dev->device < 0x1000 || pci_dev->device > 0x107f)
return -ENODEV;
/*
* Per memory-barriers.txt, wmb() is not needed to guarantee
- * that the the cache coherent memory writes have completed
+ * that the cache coherent memory writes have completed
* before writing to the MMIO region.
*/
vp_iowrite8(status, &cfg->device_status);
/* Number we've added since last sync. */
unsigned int num_added;
- /* Last used index we've seen. */
+ /* Last used index we've seen.
+ * for split ring, it just contains last used index
+ * for packed ring:
+ * bits up to VRING_PACKED_EVENT_F_WRAP_CTR include the last used index.
+ * bits from VRING_PACKED_EVENT_F_WRAP_CTR include the used wrap counter.
+ */
u16 last_used_idx;
/* Hint for event idx: already triggered no need to disable. */
/* Driver ring wrap counter. */
bool avail_wrap_counter;
- /* Device ring wrap counter. */
- bool used_wrap_counter;
-
/* Avail used flags. */
u16 avail_used_flags;
for (; num && vring_size(num, vring_align) > PAGE_SIZE; num /= 2) {
queue = vring_alloc_queue(vdev, vring_size(num, vring_align),
&dma_addr,
- GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
+ GFP_KERNEL | __GFP_NOWARN | __GFP_ZERO);
if (queue)
break;
if (!may_reduce_num)
/*
* Packed ring specific functions - *_packed().
*/
+static inline bool packed_used_wrap_counter(u16 last_used_idx)
+{
+ return !!(last_used_idx & (1 << VRING_PACKED_EVENT_F_WRAP_CTR));
+}
+
+static inline u16 packed_last_used(u16 last_used_idx)
+{
+ return last_used_idx & ~(-(1 << VRING_PACKED_EVENT_F_WRAP_CTR));
+}
static void vring_unmap_extra_packed(const struct vring_virtqueue *vq,
struct vring_desc_extra *extra)
static inline bool more_used_packed(const struct vring_virtqueue *vq)
{
- return is_used_desc_packed(vq, vq->last_used_idx,
- vq->packed.used_wrap_counter);
+ u16 last_used;
+ u16 last_used_idx;
+ bool used_wrap_counter;
+
+ last_used_idx = READ_ONCE(vq->last_used_idx);
+ last_used = packed_last_used(last_used_idx);
+ used_wrap_counter = packed_used_wrap_counter(last_used_idx);
+ return is_used_desc_packed(vq, last_used, used_wrap_counter);
}
static void *virtqueue_get_buf_ctx_packed(struct virtqueue *_vq,
void **ctx)
{
struct vring_virtqueue *vq = to_vvq(_vq);
- u16 last_used, id;
+ u16 last_used, id, last_used_idx;
+ bool used_wrap_counter;
void *ret;
START_USE(vq);
/* Only get used elements after they have been exposed by host. */
virtio_rmb(vq->weak_barriers);
- last_used = vq->last_used_idx;
+ last_used_idx = READ_ONCE(vq->last_used_idx);
+ used_wrap_counter = packed_used_wrap_counter(last_used_idx);
+ last_used = packed_last_used(last_used_idx);
id = le16_to_cpu(vq->packed.vring.desc[last_used].id);
*len = le32_to_cpu(vq->packed.vring.desc[last_used].len);
ret = vq->packed.desc_state[id].data;
detach_buf_packed(vq, id, ctx);
- vq->last_used_idx += vq->packed.desc_state[id].num;
- if (unlikely(vq->last_used_idx >= vq->packed.vring.num)) {
- vq->last_used_idx -= vq->packed.vring.num;
- vq->packed.used_wrap_counter ^= 1;
+ last_used += vq->packed.desc_state[id].num;
+ if (unlikely(last_used >= vq->packed.vring.num)) {
+ last_used -= vq->packed.vring.num;
+ used_wrap_counter ^= 1;
}
+ last_used = (last_used | (used_wrap_counter << VRING_PACKED_EVENT_F_WRAP_CTR));
+ WRITE_ONCE(vq->last_used_idx, last_used);
+
/*
* If we expect an interrupt for the next entry, tell host
* by writing event index and flush out the write before
if (vq->packed.event_flags_shadow == VRING_PACKED_EVENT_FLAG_DESC)
virtio_store_mb(vq->weak_barriers,
&vq->packed.vring.driver->off_wrap,
- cpu_to_le16(vq->last_used_idx |
- (vq->packed.used_wrap_counter <<
- VRING_PACKED_EVENT_F_WRAP_CTR)));
+ cpu_to_le16(vq->last_used_idx));
LAST_ADD_TIME_INVALID(vq);
if (vq->event) {
vq->packed.vring.driver->off_wrap =
- cpu_to_le16(vq->last_used_idx |
- (vq->packed.used_wrap_counter <<
- VRING_PACKED_EVENT_F_WRAP_CTR));
+ cpu_to_le16(vq->last_used_idx);
/*
* We need to update event offset and event wrap
* counter first before updating event flags.
}
END_USE(vq);
- return vq->last_used_idx | ((u16)vq->packed.used_wrap_counter <<
- VRING_PACKED_EVENT_F_WRAP_CTR);
+ return vq->last_used_idx;
}
static bool virtqueue_poll_packed(struct virtqueue *_vq, u16 off_wrap)
static bool virtqueue_enable_cb_delayed_packed(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
- u16 used_idx, wrap_counter;
+ u16 used_idx, wrap_counter, last_used_idx;
u16 bufs;
START_USE(vq);
if (vq->event) {
/* TODO: tune this threshold */
bufs = (vq->packed.vring.num - vq->vq.num_free) * 3 / 4;
- wrap_counter = vq->packed.used_wrap_counter;
+ last_used_idx = READ_ONCE(vq->last_used_idx);
+ wrap_counter = packed_used_wrap_counter(last_used_idx);
- used_idx = vq->last_used_idx + bufs;
+ used_idx = packed_last_used(last_used_idx) + bufs;
if (used_idx >= vq->packed.vring.num) {
used_idx -= vq->packed.vring.num;
wrap_counter ^= 1;
*/
virtio_mb(vq->weak_barriers);
- if (is_used_desc_packed(vq,
- vq->last_used_idx,
- vq->packed.used_wrap_counter)) {
+ last_used_idx = READ_ONCE(vq->last_used_idx);
+ wrap_counter = packed_used_wrap_counter(last_used_idx);
+ used_idx = packed_last_used(last_used_idx);
+ if (is_used_desc_packed(vq, used_idx, wrap_counter)) {
END_USE(vq);
return false;
}
vq->we_own_ring = true;
vq->notify = notify;
vq->weak_barriers = weak_barriers;
+#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
vq->broken = true;
- vq->last_used_idx = 0;
+#else
+ vq->broken = false;
+#endif
+ vq->last_used_idx = 0 | (1 << VRING_PACKED_EVENT_F_WRAP_CTR);
vq->event_triggered = false;
vq->num_added = 0;
vq->packed_ring = true;
vq->packed.next_avail_idx = 0;
vq->packed.avail_wrap_counter = 1;
- vq->packed.used_wrap_counter = 1;
vq->packed.event_flags_shadow = 0;
vq->packed.avail_used_flags = 1 << VRING_PACKED_DESC_F_AVAIL;
}
if (unlikely(vq->broken)) {
+#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
dev_warn_once(&vq->vq.vdev->dev,
"virtio vring IRQ raised before DRIVER_OK");
return IRQ_NONE;
+#else
+ return IRQ_HANDLED;
+#endif
}
/* Just a hint for performance: so it's ok that this can be racy! */
vq->we_own_ring = false;
vq->notify = notify;
vq->weak_barriers = weak_barriers;
+#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
vq->broken = true;
+#else
+ vq->broken = false;
+#endif
vq->last_used_idx = 0;
vq->event_triggered = false;
vq->num_added = 0;
MODULE_AUTHOR("Nick Hawkins <nick.hawkins@hpe.com>");
MODULE_AUTHOR("Jean-Marie Verdun <verdun@hpe.com>");
MODULE_DESCRIPTION("Driver for GXP watchdog timer");
+MODULE_LICENSE("GPL");
having to balloon out RAM regions in order to obtain physical memory
space to create such mappings.
+config XEN_GRANT_DMA_IOMMU
+ bool
+ select IOMMU_API
+
+config XEN_GRANT_DMA_OPS
+ bool
+ select DMA_OPS
+
+config XEN_VIRTIO
+ bool "Xen virtio support"
+ depends on VIRTIO
+ select XEN_GRANT_DMA_OPS
+ select XEN_GRANT_DMA_IOMMU if OF
+ help
+ Enable virtio support for running as Xen guest. Depending on the
+ guest type this will require special support on the backend side
+ (qemu or kernel, depending on the virtio device types used).
+
+ If in doubt, say n.
+
endmenu
xen-privcmd-y := privcmd.o privcmd-buf.o
obj-$(CONFIG_XEN_FRONT_PGDIR_SHBUF) += xen-front-pgdir-shbuf.o
obj-$(CONFIG_XEN_UNPOPULATED_ALLOC) += unpopulated-alloc.o
+obj-$(CONFIG_XEN_GRANT_DMA_OPS) += grant-dma-ops.o
+obj-$(CONFIG_XEN_GRANT_DMA_IOMMU) += grant-dma-iommu.o
if (HYPERVISOR_xen_version(XENVER_get_features, &fi) < 0)
break;
for (j = 0; j < 32; j++)
- xen_features[i * 32 + j] = !!(fi.submap & 1<<j);
+ xen_features[i * 32 + j] = !!(fi.submap & 1U << j);
}
if (xen_pv_domain()) {
#include <linux/mmu_notifier.h>
#include <linux/types.h>
#include <xen/interface/event_channel.h>
+#include <xen/grant_table.h>
struct gntdev_dmabuf_priv;
struct gnttab_unmap_grant_ref *unmap_ops;
struct gnttab_map_grant_ref *kmap_ops;
struct gnttab_unmap_grant_ref *kunmap_ops;
+ bool *being_removed;
struct page **pages;
unsigned long pages_vm_start;
/* Needed to avoid allocation in gnttab_dma_free_pages(). */
xen_pfn_t *frames;
#endif
+
+ /* Number of live grants */
+ atomic_t live_grants;
+ /* Needed to avoid allocation in __unmap_grant_pages */
+ struct gntab_unmap_queue_data unmap_data;
};
struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
#include <linux/slab.h>
#include <linux/highmem.h>
#include <linux/refcount.h>
+#include <linux/workqueue.h>
#include <xen/xen.h>
#include <xen/grant_table.h>
MODULE_PARM_DESC(limit,
"Maximum number of grants that may be mapped by one mapping request");
+/* True in PV mode, false otherwise */
static int use_ptemod;
-static int unmap_grant_pages(struct gntdev_grant_map *map,
- int offset, int pages);
+static void unmap_grant_pages(struct gntdev_grant_map *map,
+ int offset, int pages);
static struct miscdevice gntdev_miscdev;
kvfree(map->unmap_ops);
kvfree(map->kmap_ops);
kvfree(map->kunmap_ops);
+ kvfree(map->being_removed);
kfree(map);
}
add->unmap_ops = kvmalloc_array(count, sizeof(add->unmap_ops[0]),
GFP_KERNEL);
add->pages = kvcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
+ add->being_removed =
+ kvcalloc(count, sizeof(add->being_removed[0]), GFP_KERNEL);
if (NULL == add->grants ||
NULL == add->map_ops ||
NULL == add->unmap_ops ||
- NULL == add->pages)
+ NULL == add->pages ||
+ NULL == add->being_removed)
goto err;
if (use_ptemod) {
add->kmap_ops = kvmalloc_array(count, sizeof(add->kmap_ops[0]),
if (!refcount_dec_and_test(&map->users))
return;
- if (map->pages && !use_ptemod)
+ if (map->pages && !use_ptemod) {
+ /*
+ * Increment the reference count. This ensures that the
+ * subsequent call to unmap_grant_pages() will not wind up
+ * re-entering itself. It *can* wind up calling
+ * gntdev_put_map() recursively, but such calls will be with a
+ * reference count greater than 1, so they will return before
+ * this code is reached. The recursion depth is thus limited to
+ * 1. Do NOT use refcount_inc() here, as it will detect that
+ * the reference count is zero and WARN().
+ */
+ refcount_set(&map->users, 1);
+
+ /*
+ * Unmap the grants. This may or may not be asynchronous, so it
+ * is possible that the reference count is 1 on return, but it
+ * could also be greater than 1.
+ */
unmap_grant_pages(map, 0, map->count);
+ /* Check if the memory now needs to be freed */
+ if (!refcount_dec_and_test(&map->users))
+ return;
+
+ /*
+ * All pages have been returned to the hypervisor, so free the
+ * map.
+ */
+ }
+
if (map->notify.flags & UNMAP_NOTIFY_SEND_EVENT) {
notify_remote_via_evtchn(map->notify.event);
evtchn_put(map->notify.event);
int gntdev_map_grant_pages(struct gntdev_grant_map *map)
{
+ size_t alloced = 0;
int i, err = 0;
if (!use_ptemod) {
map->count);
for (i = 0; i < map->count; i++) {
- if (map->map_ops[i].status == GNTST_okay)
+ if (map->map_ops[i].status == GNTST_okay) {
map->unmap_ops[i].handle = map->map_ops[i].handle;
- else if (!err)
+ if (!use_ptemod)
+ alloced++;
+ } else if (!err)
err = -EINVAL;
if (map->flags & GNTMAP_device_map)
map->unmap_ops[i].dev_bus_addr = map->map_ops[i].dev_bus_addr;
if (use_ptemod) {
- if (map->kmap_ops[i].status == GNTST_okay)
+ if (map->kmap_ops[i].status == GNTST_okay) {
+ if (map->map_ops[i].status == GNTST_okay)
+ alloced++;
map->kunmap_ops[i].handle = map->kmap_ops[i].handle;
- else if (!err)
+ } else if (!err)
err = -EINVAL;
}
}
+ atomic_add(alloced, &map->live_grants);
return err;
}
-static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset,
- int pages)
+static void __unmap_grant_pages_done(int result,
+ struct gntab_unmap_queue_data *data)
{
- int i, err = 0;
- struct gntab_unmap_queue_data unmap_data;
-
- if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
- int pgno = (map->notify.addr >> PAGE_SHIFT);
- if (pgno >= offset && pgno < offset + pages) {
- /* No need for kmap, pages are in lowmem */
- uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno]));
- tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
- map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
- }
- }
-
- unmap_data.unmap_ops = map->unmap_ops + offset;
- unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
- unmap_data.pages = map->pages + offset;
- unmap_data.count = pages;
-
- err = gnttab_unmap_refs_sync(&unmap_data);
- if (err)
- return err;
+ unsigned int i;
+ struct gntdev_grant_map *map = data->data;
+ unsigned int offset = data->unmap_ops - map->unmap_ops;
- for (i = 0; i < pages; i++) {
- if (map->unmap_ops[offset+i].status)
- err = -EINVAL;
+ for (i = 0; i < data->count; i++) {
+ WARN_ON(map->unmap_ops[offset + i].status != GNTST_okay &&
+ map->unmap_ops[offset + i].handle != INVALID_GRANT_HANDLE);
pr_debug("unmap handle=%d st=%d\n",
map->unmap_ops[offset+i].handle,
map->unmap_ops[offset+i].status);
map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
if (use_ptemod) {
- if (map->kunmap_ops[offset+i].status)
- err = -EINVAL;
+ WARN_ON(map->kunmap_ops[offset + i].status != GNTST_okay &&
+ map->kunmap_ops[offset + i].handle != INVALID_GRANT_HANDLE);
pr_debug("kunmap handle=%u st=%d\n",
map->kunmap_ops[offset+i].handle,
map->kunmap_ops[offset+i].status);
map->kunmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
}
}
- return err;
+ /*
+ * Decrease the live-grant counter. This must happen after the loop to
+ * prevent premature reuse of the grants by gnttab_mmap().
+ */
+ atomic_sub(data->count, &map->live_grants);
+
+ /* Release reference taken by __unmap_grant_pages */
+ gntdev_put_map(NULL, map);
+}
+
+static void __unmap_grant_pages(struct gntdev_grant_map *map, int offset,
+ int pages)
+{
+ if (map->notify.flags & UNMAP_NOTIFY_CLEAR_BYTE) {
+ int pgno = (map->notify.addr >> PAGE_SHIFT);
+
+ if (pgno >= offset && pgno < offset + pages) {
+ /* No need for kmap, pages are in lowmem */
+ uint8_t *tmp = pfn_to_kaddr(page_to_pfn(map->pages[pgno]));
+
+ tmp[map->notify.addr & (PAGE_SIZE-1)] = 0;
+ map->notify.flags &= ~UNMAP_NOTIFY_CLEAR_BYTE;
+ }
+ }
+
+ map->unmap_data.unmap_ops = map->unmap_ops + offset;
+ map->unmap_data.kunmap_ops = use_ptemod ? map->kunmap_ops + offset : NULL;
+ map->unmap_data.pages = map->pages + offset;
+ map->unmap_data.count = pages;
+ map->unmap_data.done = __unmap_grant_pages_done;
+ map->unmap_data.data = map;
+ refcount_inc(&map->users); /* to keep map alive during async call below */
+
+ gnttab_unmap_refs_async(&map->unmap_data);
}
-static int unmap_grant_pages(struct gntdev_grant_map *map, int offset,
- int pages)
+static void unmap_grant_pages(struct gntdev_grant_map *map, int offset,
+ int pages)
{
- int range, err = 0;
+ int range;
+
+ if (atomic_read(&map->live_grants) == 0)
+ return; /* Nothing to do */
pr_debug("unmap %d+%d [%d+%d]\n", map->index, map->count, offset, pages);
/* It is possible the requested range will have a "hole" where we
* already unmapped some of the grants. Only unmap valid ranges.
*/
- while (pages && !err) {
- while (pages &&
- map->unmap_ops[offset].handle == INVALID_GRANT_HANDLE) {
+ while (pages) {
+ while (pages && map->being_removed[offset]) {
offset++;
pages--;
}
range = 0;
while (range < pages) {
- if (map->unmap_ops[offset + range].handle ==
- INVALID_GRANT_HANDLE)
+ if (map->being_removed[offset + range])
break;
+ map->being_removed[offset + range] = true;
range++;
}
- err = __unmap_grant_pages(map, offset, range);
+ if (range)
+ __unmap_grant_pages(map, offset, range);
offset += range;
pages -= range;
}
-
- return err;
}
/* ------------------------------------------------------------------ */
struct gntdev_grant_map *map =
container_of(mn, struct gntdev_grant_map, notifier);
unsigned long mstart, mend;
- int err;
if (!mmu_notifier_range_blockable(range))
return false;
map->index, map->count,
map->vma->vm_start, map->vma->vm_end,
range->start, range->end, mstart, mend);
- err = unmap_grant_pages(map,
+ unmap_grant_pages(map,
(mstart - map->vma->vm_start) >> PAGE_SHIFT,
(mend - mstart) >> PAGE_SHIFT);
- WARN_ON(err);
return true;
}
goto unlock_out;
if (use_ptemod && map->vma)
goto unlock_out;
+ if (atomic_read(&map->live_grants)) {
+ err = -EAGAIN;
+ goto unlock_out;
+ }
refcount_inc(&map->users);
vma->vm_ops = &gntdev_vmops;
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Stub IOMMU driver which does nothing.
+ * The main purpose of it being present is to reuse generic IOMMU device tree
+ * bindings by Xen grant DMA-mapping layer.
+ *
+ * Copyright (C) 2022 EPAM Systems Inc.
+ */
+
+#include <linux/iommu.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+
+struct grant_dma_iommu_device {
+ struct device *dev;
+ struct iommu_device iommu;
+};
+
+/* Nothing is really needed here */
+static const struct iommu_ops grant_dma_iommu_ops;
+
+static const struct of_device_id grant_dma_iommu_of_match[] = {
+ { .compatible = "xen,grant-dma" },
+ { },
+};
+
+static int grant_dma_iommu_probe(struct platform_device *pdev)
+{
+ struct grant_dma_iommu_device *mmu;
+ int ret;
+
+ mmu = devm_kzalloc(&pdev->dev, sizeof(*mmu), GFP_KERNEL);
+ if (!mmu)
+ return -ENOMEM;
+
+ mmu->dev = &pdev->dev;
+
+ ret = iommu_device_register(&mmu->iommu, &grant_dma_iommu_ops, &pdev->dev);
+ if (ret)
+ return ret;
+
+ platform_set_drvdata(pdev, mmu);
+
+ return 0;
+}
+
+static int grant_dma_iommu_remove(struct platform_device *pdev)
+{
+ struct grant_dma_iommu_device *mmu = platform_get_drvdata(pdev);
+
+ platform_set_drvdata(pdev, NULL);
+ iommu_device_unregister(&mmu->iommu);
+
+ return 0;
+}
+
+static struct platform_driver grant_dma_iommu_driver = {
+ .driver = {
+ .name = "grant-dma-iommu",
+ .of_match_table = grant_dma_iommu_of_match,
+ },
+ .probe = grant_dma_iommu_probe,
+ .remove = grant_dma_iommu_remove,
+};
+
+static int __init grant_dma_iommu_init(void)
+{
+ struct device_node *iommu_np;
+
+ iommu_np = of_find_matching_node(NULL, grant_dma_iommu_of_match);
+ if (!iommu_np)
+ return 0;
+
+ of_node_put(iommu_np);
+
+ return platform_driver_register(&grant_dma_iommu_driver);
+}
+subsys_initcall(grant_dma_iommu_init);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Xen grant DMA-mapping layer - contains special DMA-mapping routines
+ * for providing grant references as DMA addresses to be used by frontends
+ * (e.g. virtio) in Xen guests
+ *
+ * Copyright (c) 2021, Juergen Gross <jgross@suse.com>
+ */
+
+#include <linux/module.h>
+#include <linux/dma-map-ops.h>
+#include <linux/of.h>
+#include <linux/pfn.h>
+#include <linux/xarray.h>
+#include <xen/xen.h>
+#include <xen/xen-ops.h>
+#include <xen/grant_table.h>
+
+struct xen_grant_dma_data {
+ /* The ID of backend domain */
+ domid_t backend_domid;
+ /* Is device behaving sane? */
+ bool broken;
+};
+
+static DEFINE_XARRAY(xen_grant_dma_devices);
+
+#define XEN_GRANT_DMA_ADDR_OFF (1ULL << 63)
+
+static inline dma_addr_t grant_to_dma(grant_ref_t grant)
+{
+ return XEN_GRANT_DMA_ADDR_OFF | ((dma_addr_t)grant << PAGE_SHIFT);
+}
+
+static inline grant_ref_t dma_to_grant(dma_addr_t dma)
+{
+ return (grant_ref_t)((dma & ~XEN_GRANT_DMA_ADDR_OFF) >> PAGE_SHIFT);
+}
+
+static struct xen_grant_dma_data *find_xen_grant_dma_data(struct device *dev)
+{
+ struct xen_grant_dma_data *data;
+
+ xa_lock(&xen_grant_dma_devices);
+ data = xa_load(&xen_grant_dma_devices, (unsigned long)dev);
+ xa_unlock(&xen_grant_dma_devices);
+
+ return data;
+}
+
+/*
+ * DMA ops for Xen frontends (e.g. virtio).
+ *
+ * Used to act as a kind of software IOMMU for Xen guests by using grants as
+ * DMA addresses.
+ * Such a DMA address is formed by using the grant reference as a frame
+ * number and setting the highest address bit (this bit is for the backend
+ * to be able to distinguish it from e.g. a mmio address).
+ */
+static void *xen_grant_dma_alloc(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp,
+ unsigned long attrs)
+{
+ struct xen_grant_dma_data *data;
+ unsigned int i, n_pages = PFN_UP(size);
+ unsigned long pfn;
+ grant_ref_t grant;
+ void *ret;
+
+ data = find_xen_grant_dma_data(dev);
+ if (!data)
+ return NULL;
+
+ if (unlikely(data->broken))
+ return NULL;
+
+ ret = alloc_pages_exact(n_pages * PAGE_SIZE, gfp);
+ if (!ret)
+ return NULL;
+
+ pfn = virt_to_pfn(ret);
+
+ if (gnttab_alloc_grant_reference_seq(n_pages, &grant)) {
+ free_pages_exact(ret, n_pages * PAGE_SIZE);
+ return NULL;
+ }
+
+ for (i = 0; i < n_pages; i++) {
+ gnttab_grant_foreign_access_ref(grant + i, data->backend_domid,
+ pfn_to_gfn(pfn + i), 0);
+ }
+
+ *dma_handle = grant_to_dma(grant);
+
+ return ret;
+}
+
+static void xen_grant_dma_free(struct device *dev, size_t size, void *vaddr,
+ dma_addr_t dma_handle, unsigned long attrs)
+{
+ struct xen_grant_dma_data *data;
+ unsigned int i, n_pages = PFN_UP(size);
+ grant_ref_t grant;
+
+ data = find_xen_grant_dma_data(dev);
+ if (!data)
+ return;
+
+ if (unlikely(data->broken))
+ return;
+
+ grant = dma_to_grant(dma_handle);
+
+ for (i = 0; i < n_pages; i++) {
+ if (unlikely(!gnttab_end_foreign_access_ref(grant + i))) {
+ dev_alert(dev, "Grant still in use by backend domain, disabled for further use\n");
+ data->broken = true;
+ return;
+ }
+ }
+
+ gnttab_free_grant_reference_seq(grant, n_pages);
+
+ free_pages_exact(vaddr, n_pages * PAGE_SIZE);
+}
+
+static struct page *xen_grant_dma_alloc_pages(struct device *dev, size_t size,
+ dma_addr_t *dma_handle,
+ enum dma_data_direction dir,
+ gfp_t gfp)
+{
+ void *vaddr;
+
+ vaddr = xen_grant_dma_alloc(dev, size, dma_handle, gfp, 0);
+ if (!vaddr)
+ return NULL;
+
+ return virt_to_page(vaddr);
+}
+
+static void xen_grant_dma_free_pages(struct device *dev, size_t size,
+ struct page *vaddr, dma_addr_t dma_handle,
+ enum dma_data_direction dir)
+{
+ xen_grant_dma_free(dev, size, page_to_virt(vaddr), dma_handle, 0);
+}
+
+static dma_addr_t xen_grant_dma_map_page(struct device *dev, struct page *page,
+ unsigned long offset, size_t size,
+ enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ struct xen_grant_dma_data *data;
+ unsigned int i, n_pages = PFN_UP(size);
+ grant_ref_t grant;
+ dma_addr_t dma_handle;
+
+ if (WARN_ON(dir == DMA_NONE))
+ return DMA_MAPPING_ERROR;
+
+ data = find_xen_grant_dma_data(dev);
+ if (!data)
+ return DMA_MAPPING_ERROR;
+
+ if (unlikely(data->broken))
+ return DMA_MAPPING_ERROR;
+
+ if (gnttab_alloc_grant_reference_seq(n_pages, &grant))
+ return DMA_MAPPING_ERROR;
+
+ for (i = 0; i < n_pages; i++) {
+ gnttab_grant_foreign_access_ref(grant + i, data->backend_domid,
+ xen_page_to_gfn(page) + i, dir == DMA_TO_DEVICE);
+ }
+
+ dma_handle = grant_to_dma(grant) + offset;
+
+ return dma_handle;
+}
+
+static void xen_grant_dma_unmap_page(struct device *dev, dma_addr_t dma_handle,
+ size_t size, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ struct xen_grant_dma_data *data;
+ unsigned int i, n_pages = PFN_UP(size);
+ grant_ref_t grant;
+
+ if (WARN_ON(dir == DMA_NONE))
+ return;
+
+ data = find_xen_grant_dma_data(dev);
+ if (!data)
+ return;
+
+ if (unlikely(data->broken))
+ return;
+
+ grant = dma_to_grant(dma_handle);
+
+ for (i = 0; i < n_pages; i++) {
+ if (unlikely(!gnttab_end_foreign_access_ref(grant + i))) {
+ dev_alert(dev, "Grant still in use by backend domain, disabled for further use\n");
+ data->broken = true;
+ return;
+ }
+ }
+
+ gnttab_free_grant_reference_seq(grant, n_pages);
+}
+
+static void xen_grant_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ struct scatterlist *s;
+ unsigned int i;
+
+ if (WARN_ON(dir == DMA_NONE))
+ return;
+
+ for_each_sg(sg, s, nents, i)
+ xen_grant_dma_unmap_page(dev, s->dma_address, sg_dma_len(s), dir,
+ attrs);
+}
+
+static int xen_grant_dma_map_sg(struct device *dev, struct scatterlist *sg,
+ int nents, enum dma_data_direction dir,
+ unsigned long attrs)
+{
+ struct scatterlist *s;
+ unsigned int i;
+
+ if (WARN_ON(dir == DMA_NONE))
+ return -EINVAL;
+
+ for_each_sg(sg, s, nents, i) {
+ s->dma_address = xen_grant_dma_map_page(dev, sg_page(s), s->offset,
+ s->length, dir, attrs);
+ if (s->dma_address == DMA_MAPPING_ERROR)
+ goto out;
+
+ sg_dma_len(s) = s->length;
+ }
+
+ return nents;
+
+out:
+ xen_grant_dma_unmap_sg(dev, sg, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC);
+ sg_dma_len(sg) = 0;
+
+ return -EIO;
+}
+
+static int xen_grant_dma_supported(struct device *dev, u64 mask)
+{
+ return mask == DMA_BIT_MASK(64);
+}
+
+static const struct dma_map_ops xen_grant_dma_ops = {
+ .alloc = xen_grant_dma_alloc,
+ .free = xen_grant_dma_free,
+ .alloc_pages = xen_grant_dma_alloc_pages,
+ .free_pages = xen_grant_dma_free_pages,
+ .mmap = dma_common_mmap,
+ .get_sgtable = dma_common_get_sgtable,
+ .map_page = xen_grant_dma_map_page,
+ .unmap_page = xen_grant_dma_unmap_page,
+ .map_sg = xen_grant_dma_map_sg,
+ .unmap_sg = xen_grant_dma_unmap_sg,
+ .dma_supported = xen_grant_dma_supported,
+};
+
+bool xen_is_grant_dma_device(struct device *dev)
+{
+ struct device_node *iommu_np;
+ bool has_iommu;
+
+ /* XXX Handle only DT devices for now */
+ if (!dev->of_node)
+ return false;
+
+ iommu_np = of_parse_phandle(dev->of_node, "iommus", 0);
+ has_iommu = iommu_np && of_device_is_compatible(iommu_np, "xen,grant-dma");
+ of_node_put(iommu_np);
+
+ return has_iommu;
+}
+
+void xen_grant_setup_dma_ops(struct device *dev)
+{
+ struct xen_grant_dma_data *data;
+ struct of_phandle_args iommu_spec;
+
+ data = find_xen_grant_dma_data(dev);
+ if (data) {
+ dev_err(dev, "Xen grant DMA data is already created\n");
+ return;
+ }
+
+ /* XXX ACPI device unsupported for now */
+ if (!dev->of_node)
+ goto err;
+
+ if (of_parse_phandle_with_args(dev->of_node, "iommus", "#iommu-cells",
+ 0, &iommu_spec)) {
+ dev_err(dev, "Cannot parse iommus property\n");
+ goto err;
+ }
+
+ if (!of_device_is_compatible(iommu_spec.np, "xen,grant-dma") ||
+ iommu_spec.args_count != 1) {
+ dev_err(dev, "Incompatible IOMMU node\n");
+ of_node_put(iommu_spec.np);
+ goto err;
+ }
+
+ of_node_put(iommu_spec.np);
+
+ data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+ if (!data)
+ goto err;
+
+ /*
+ * The endpoint ID here means the ID of the domain where the corresponding
+ * backend is running
+ */
+ data->backend_domid = iommu_spec.args[0];
+
+ if (xa_err(xa_store(&xen_grant_dma_devices, (unsigned long)dev, data,
+ GFP_KERNEL))) {
+ dev_err(dev, "Cannot store Xen grant DMA data\n");
+ goto err;
+ }
+
+ dev->dma_ops = &xen_grant_dma_ops;
+
+ return;
+
+err:
+ dev_err(dev, "Cannot set up Xen grant DMA ops, retain platform DMA ops\n");
+}
+
+MODULE_DESCRIPTION("Xen grant DMA-mapping layer");
+MODULE_AUTHOR("Juergen Gross <jgross@suse.com>");
+MODULE_LICENSE("GPL");
#define pr_fmt(fmt) "xen:" KBUILD_MODNAME ": " fmt
+#include <linux/bitmap.h>
#include <linux/memblock.h>
#include <linux/sched.h>
#include <linux/mm.h>
static grant_ref_t **gnttab_list;
static unsigned int nr_grant_frames;
+
+/*
+ * Handling of free grants:
+ *
+ * Free grants are in a simple list anchored in gnttab_free_head. They are
+ * linked by grant ref, the last element contains GNTTAB_LIST_END. The number
+ * of free entries is stored in gnttab_free_count.
+ * Additionally there is a bitmap of free entries anchored in
+ * gnttab_free_bitmap. This is being used for simplifying allocation of
+ * multiple consecutive grants, which is needed e.g. for support of virtio.
+ * gnttab_last_free is used to add free entries of new frames at the end of
+ * the free list.
+ * gnttab_free_tail_ptr specifies the variable which references the start
+ * of consecutive free grants ending with gnttab_last_free. This pointer is
+ * updated in a rather defensive way, in order to avoid performance hits in
+ * hot paths.
+ * All those variables are protected by gnttab_list_lock.
+ */
static int gnttab_free_count;
-static grant_ref_t gnttab_free_head;
+static unsigned int gnttab_size;
+static grant_ref_t gnttab_free_head = GNTTAB_LIST_END;
+static grant_ref_t gnttab_last_free = GNTTAB_LIST_END;
+static grant_ref_t *gnttab_free_tail_ptr;
+static unsigned long *gnttab_free_bitmap;
static DEFINE_SPINLOCK(gnttab_list_lock);
+
struct grant_frames xen_auto_xlat_grant_frames;
static unsigned int xen_gnttab_version;
module_param_named(version, xen_gnttab_version, uint, 0);
ref = head = gnttab_free_head;
gnttab_free_count -= count;
- while (count-- > 1)
- head = gnttab_entry(head);
+ while (count--) {
+ bitmap_clear(gnttab_free_bitmap, head, 1);
+ if (gnttab_free_tail_ptr == __gnttab_entry(head))
+ gnttab_free_tail_ptr = &gnttab_free_head;
+ if (count)
+ head = gnttab_entry(head);
+ }
gnttab_free_head = gnttab_entry(head);
gnttab_entry(head) = GNTTAB_LIST_END;
+ if (!gnttab_free_count) {
+ gnttab_last_free = GNTTAB_LIST_END;
+ gnttab_free_tail_ptr = NULL;
+ }
+
spin_unlock_irqrestore(&gnttab_list_lock, flags);
return ref;
}
+static int get_seq_entry_count(void)
+{
+ if (gnttab_last_free == GNTTAB_LIST_END || !gnttab_free_tail_ptr ||
+ *gnttab_free_tail_ptr == GNTTAB_LIST_END)
+ return 0;
+
+ return gnttab_last_free - *gnttab_free_tail_ptr + 1;
+}
+
+/* Rebuilds the free grant list and tries to find count consecutive entries. */
+static int get_free_seq(unsigned int count)
+{
+ int ret = -ENOSPC;
+ unsigned int from, to;
+ grant_ref_t *last;
+
+ gnttab_free_tail_ptr = &gnttab_free_head;
+ last = &gnttab_free_head;
+
+ for (from = find_first_bit(gnttab_free_bitmap, gnttab_size);
+ from < gnttab_size;
+ from = find_next_bit(gnttab_free_bitmap, gnttab_size, to + 1)) {
+ to = find_next_zero_bit(gnttab_free_bitmap, gnttab_size,
+ from + 1);
+ if (ret < 0 && to - from >= count) {
+ ret = from;
+ bitmap_clear(gnttab_free_bitmap, ret, count);
+ from += count;
+ gnttab_free_count -= count;
+ if (from == to)
+ continue;
+ }
+
+ /*
+ * Recreate the free list in order to have it properly sorted.
+ * This is needed to make sure that the free tail has the maximum
+ * possible size.
+ */
+ while (from < to) {
+ *last = from;
+ last = __gnttab_entry(from);
+ gnttab_last_free = from;
+ from++;
+ }
+ if (to < gnttab_size)
+ gnttab_free_tail_ptr = __gnttab_entry(to - 1);
+ }
+
+ *last = GNTTAB_LIST_END;
+ if (gnttab_last_free != gnttab_size - 1)
+ gnttab_free_tail_ptr = NULL;
+
+ return ret;
+}
+
+static int get_free_entries_seq(unsigned int count)
+{
+ unsigned long flags;
+ int ret = 0;
+
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+
+ if (gnttab_free_count < count) {
+ ret = gnttab_expand(count - gnttab_free_count);
+ if (ret < 0)
+ goto out;
+ }
+
+ if (get_seq_entry_count() < count) {
+ ret = get_free_seq(count);
+ if (ret >= 0)
+ goto out;
+ ret = gnttab_expand(count - get_seq_entry_count());
+ if (ret < 0)
+ goto out;
+ }
+
+ ret = *gnttab_free_tail_ptr;
+ *gnttab_free_tail_ptr = gnttab_entry(ret + count - 1);
+ gnttab_free_count -= count;
+ if (!gnttab_free_count)
+ gnttab_free_tail_ptr = NULL;
+ bitmap_clear(gnttab_free_bitmap, ret, count);
+
+ out:
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+
+ return ret;
+}
+
static void do_free_callbacks(void)
{
struct gnttab_free_callback *callback, *next;
do_free_callbacks();
}
-static void put_free_entry(grant_ref_t ref)
+static void put_free_entry_locked(grant_ref_t ref)
{
- unsigned long flags;
-
if (unlikely(ref < GNTTAB_NR_RESERVED_ENTRIES))
return;
- spin_lock_irqsave(&gnttab_list_lock, flags);
gnttab_entry(ref) = gnttab_free_head;
gnttab_free_head = ref;
+ if (!gnttab_free_count)
+ gnttab_last_free = ref;
+ if (gnttab_free_tail_ptr == &gnttab_free_head)
+ gnttab_free_tail_ptr = __gnttab_entry(ref);
gnttab_free_count++;
+ bitmap_set(gnttab_free_bitmap, ref, 1);
+}
+
+static void put_free_entry(grant_ref_t ref)
+{
+ unsigned long flags;
+
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+ put_free_entry_locked(ref);
check_free_callbacks();
spin_unlock_irqrestore(&gnttab_list_lock, flags);
}
+static void gnttab_set_free(unsigned int start, unsigned int n)
+{
+ unsigned int i;
+
+ for (i = start; i < start + n - 1; i++)
+ gnttab_entry(i) = i + 1;
+
+ gnttab_entry(i) = GNTTAB_LIST_END;
+ if (!gnttab_free_count) {
+ gnttab_free_head = start;
+ gnttab_free_tail_ptr = &gnttab_free_head;
+ } else {
+ gnttab_entry(gnttab_last_free) = start;
+ }
+ gnttab_free_count += n;
+ gnttab_last_free = i;
+
+ bitmap_set(gnttab_free_bitmap, start, n);
+}
+
/*
* Following applies to gnttab_update_entry_v1 and gnttab_update_entry_v2.
* Introducing a valid entry into the grant table:
{
grant_ref_t ref;
unsigned long flags;
- int count = 1;
- if (head == GNTTAB_LIST_END)
- return;
+
spin_lock_irqsave(&gnttab_list_lock, flags);
- ref = head;
- while (gnttab_entry(ref) != GNTTAB_LIST_END) {
- ref = gnttab_entry(ref);
- count++;
+ while (head != GNTTAB_LIST_END) {
+ ref = gnttab_entry(head);
+ put_free_entry_locked(head);
+ head = ref;
}
- gnttab_entry(ref) = gnttab_free_head;
- gnttab_free_head = head;
- gnttab_free_count += count;
check_free_callbacks();
spin_unlock_irqrestore(&gnttab_list_lock, flags);
}
EXPORT_SYMBOL_GPL(gnttab_free_grant_references);
+void gnttab_free_grant_reference_seq(grant_ref_t head, unsigned int count)
+{
+ unsigned long flags;
+ unsigned int i;
+
+ spin_lock_irqsave(&gnttab_list_lock, flags);
+ for (i = count; i > 0; i--)
+ put_free_entry_locked(head + i - 1);
+ check_free_callbacks();
+ spin_unlock_irqrestore(&gnttab_list_lock, flags);
+}
+EXPORT_SYMBOL_GPL(gnttab_free_grant_reference_seq);
+
int gnttab_alloc_grant_references(u16 count, grant_ref_t *head)
{
int h = get_free_entries(count);
}
EXPORT_SYMBOL_GPL(gnttab_alloc_grant_references);
+int gnttab_alloc_grant_reference_seq(unsigned int count, grant_ref_t *first)
+{
+ int h;
+
+ if (count == 1)
+ h = get_free_entries(1);
+ else
+ h = get_free_entries_seq(count);
+
+ if (h < 0)
+ return -ENOSPC;
+
+ *first = h;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(gnttab_alloc_grant_reference_seq);
+
int gnttab_empty_grant_references(const grant_ref_t *private_head)
{
return (*private_head == GNTTAB_LIST_END);
goto grow_nomem;
}
+ gnttab_set_free(gnttab_size, extra_entries);
- for (i = grefs_per_frame * nr_grant_frames;
- i < grefs_per_frame * new_nr_grant_frames - 1; i++)
- gnttab_entry(i) = i + 1;
-
- gnttab_entry(i) = gnttab_free_head;
- gnttab_free_head = grefs_per_frame * nr_grant_frames;
- gnttab_free_count += extra_entries;
+ if (!gnttab_free_tail_ptr)
+ gnttab_free_tail_ptr = __gnttab_entry(gnttab_size);
nr_grant_frames = new_nr_grant_frames;
+ gnttab_size += extra_entries;
check_free_callbacks();
int gnttab_init(void)
{
int i;
- unsigned long max_nr_grant_frames;
+ unsigned long max_nr_grant_frames, max_nr_grefs;
unsigned int max_nr_glist_frames, nr_glist_frames;
- unsigned int nr_init_grefs;
int ret;
gnttab_request_version();
max_nr_grant_frames = gnttab_max_grant_frames();
+ max_nr_grefs = max_nr_grant_frames *
+ gnttab_interface->grefs_per_grant_frame;
nr_grant_frames = 1;
/* Determine the maximum number of frames required for the
* grant reference free list on the current hypervisor.
*/
- max_nr_glist_frames = (max_nr_grant_frames *
- gnttab_interface->grefs_per_grant_frame / RPP);
+ max_nr_glist_frames = max_nr_grefs / RPP;
gnttab_list = kmalloc_array(max_nr_glist_frames,
sizeof(grant_ref_t *),
}
}
+ gnttab_free_bitmap = bitmap_zalloc(max_nr_grefs, GFP_KERNEL);
+ if (!gnttab_free_bitmap) {
+ ret = -ENOMEM;
+ goto ini_nomem;
+ }
+
ret = arch_gnttab_init(max_nr_grant_frames,
nr_status_frames(max_nr_grant_frames));
if (ret < 0)
goto ini_nomem;
}
- nr_init_grefs = nr_grant_frames *
- gnttab_interface->grefs_per_grant_frame;
-
- for (i = GNTTAB_NR_RESERVED_ENTRIES; i < nr_init_grefs - 1; i++)
- gnttab_entry(i) = i + 1;
+ gnttab_size = nr_grant_frames * gnttab_interface->grefs_per_grant_frame;
- gnttab_entry(nr_init_grefs - 1) = GNTTAB_LIST_END;
- gnttab_free_count = nr_init_grefs - GNTTAB_NR_RESERVED_ENTRIES;
- gnttab_free_head = GNTTAB_NR_RESERVED_ENTRIES;
+ gnttab_set_free(GNTTAB_NR_RESERVED_ENTRIES,
+ gnttab_size - GNTTAB_NR_RESERVED_ENTRIES);
printk("Grant table initialized\n");
return 0;
for (i--; i >= 0; i--)
free_page((unsigned long)gnttab_list[i]);
kfree(gnttab_list);
+ bitmap_free(gnttab_free_bitmap);
return ret;
}
EXPORT_SYMBOL_GPL(gnttab_init);
return 0;
}
-EXPORT_SYMBOL_GPL(xen_xlate_map_ballooned_pages);
struct remap_pfn {
struct mm_struct *mm;
version = cpu_to_le32(v9inode->qid.version);
path = cpu_to_le64(v9inode->qid.path);
v9ses = v9fs_inode2v9ses(inode);
- v9inode->netfs_ctx.cache =
+ v9inode->netfs.cache =
fscache_acquire_cookie(v9fs_session_cache(v9ses),
0,
&path, sizeof(path),
&version, sizeof(version),
- i_size_read(&v9inode->vfs_inode));
+ i_size_read(&v9inode->netfs.inode));
p9_debug(P9_DEBUG_FSC, "inode %p get cookie %p\n",
inode, v9fs_inode_cookie(v9inode));
const unsigned char **wnames, *uname;
int i, n, l, clone, access;
struct v9fs_session_info *v9ses;
- struct p9_fid *fid, *old_fid = NULL;
+ struct p9_fid *fid, *old_fid;
v9ses = v9fs_dentry2v9ses(dentry);
access = v9ses->flags & V9FS_ACCESS_MASK;
if (IS_ERR(fid))
return fid;
+ refcount_inc(&fid->count);
v9fs_fid_add(dentry->d_sb->s_root, fid);
}
/* If we are root ourself just return that */
- if (dentry->d_sb->s_root == dentry) {
- refcount_inc(&fid->count);
+ if (dentry->d_sb->s_root == dentry)
return fid;
- }
/*
* Do a multipath walk with attached root.
* When walking parent we need to make sure we
fid = ERR_PTR(n);
goto err_out;
}
+ old_fid = fid;
clone = 1;
i = 0;
while (i < n) {
* walk to ensure none of the patch component change
*/
fid = p9_client_walk(fid, l, &wnames[i], clone);
+ /* non-cloning walk will return the same fid */
+ if (fid != old_fid) {
+ p9_client_clunk(old_fid);
+ old_fid = fid;
+ }
if (IS_ERR(fid)) {
- if (old_fid) {
- /*
- * If we fail, clunk fid which are mapping
- * to path component and not the last component
- * of the path.
- */
- p9_client_clunk(old_fid);
- }
kfree(wnames);
goto err_out;
}
- old_fid = fid;
i += l;
clone = 0;
}
struct v9fs_inode *v9inode = (struct v9fs_inode *)foo;
memset(&v9inode->qid, 0, sizeof(v9inode->qid));
- inode_init_once(&v9inode->vfs_inode);
+ inode_init_once(&v9inode->netfs.inode);
}
/**
#define V9FS_INO_INVALID_ATTR 0x01
struct v9fs_inode {
- struct {
- /* These must be contiguous */
- struct inode vfs_inode; /* the VFS's inode record */
- struct netfs_i_context netfs_ctx; /* Netfslib context */
- };
+ struct netfs_inode netfs; /* Netfslib context and vfs inode */
struct p9_qid qid;
unsigned int cache_validity;
struct p9_fid *writeback_fid;
static inline struct v9fs_inode *V9FS_I(const struct inode *inode)
{
- return container_of(inode, struct v9fs_inode, vfs_inode);
+ return container_of(inode, struct v9fs_inode, netfs.inode);
}
static inline struct fscache_cookie *v9fs_inode_cookie(struct v9fs_inode *v9inode)
{
#ifdef CONFIG_9P_FSCACHE
- return netfs_i_cookie(&v9inode->vfs_inode);
+ return netfs_i_cookie(&v9inode->netfs);
#else
return NULL;
#endif
*/
static int v9fs_init_request(struct netfs_io_request *rreq, struct file *file)
{
+ struct inode *inode = file_inode(file);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
struct p9_fid *fid = file->private_data;
+ BUG_ON(!fid);
+
+ /* we might need to read from a fid that was opened write-only
+ * for read-modify-write of page cache, use the writeback fid
+ * for that */
+ if (rreq->origin == NETFS_READ_FOR_WRITE &&
+ (fid->mode & O_ACCMODE) == O_WRONLY) {
+ fid = v9inode->writeback_fid;
+ BUG_ON(!fid);
+ }
+
refcount_inc(&fid->count);
rreq->netfs_priv = fid;
return 0;
}
/**
- * v9fs_req_cleanup - Cleanup request initialized by v9fs_init_request
- * @mapping: unused mapping of request to cleanup
- * @priv: private data to cleanup, a fid, guaranted non-null.
+ * v9fs_free_request - Cleanup request initialized by v9fs_init_rreq
+ * @rreq: The I/O request to clean up
*/
-static void v9fs_req_cleanup(struct address_space *mapping, void *priv)
+static void v9fs_free_request(struct netfs_io_request *rreq)
{
- struct p9_fid *fid = priv;
+ struct p9_fid *fid = rreq->netfs_priv;
p9_client_clunk(fid);
}
const struct netfs_request_ops v9fs_req_ops = {
.init_request = v9fs_init_request,
+ .free_request = v9fs_free_request,
.begin_cache_operation = v9fs_begin_cache_operation,
.issue_read = v9fs_issue_read,
- .cleanup = v9fs_req_cleanup,
};
/**
transferred_or_error != -ENOBUFS) {
version = cpu_to_le32(v9inode->qid.version);
fscache_invalidate(v9fs_inode_cookie(v9inode), &version,
- i_size_read(&v9inode->vfs_inode), 0);
+ i_size_read(&v9inode->netfs.inode), 0);
}
}
* file. We need to do this before we get a lock on the page in case
* there's more than one writer competing for the same cache block.
*/
- retval = netfs_write_begin(filp, mapping, pos, len, &folio, fsdata);
+ retval = netfs_write_begin(&v9inode->netfs, filp, mapping, pos, len, &folio, fsdata);
if (retval < 0)
return retval;
v9inode->writeback_fid = NULL;
v9inode->cache_validity = 0;
mutex_init(&v9inode->v_mutex);
- return &v9inode->vfs_inode;
+ return &v9inode->netfs.inode;
}
/**
*/
static void v9fs_set_netfs_context(struct inode *inode)
{
- netfs_i_context_init(inode, &v9fs_req_ops);
+ struct v9fs_inode *v9inode = V9FS_I(inode);
+ netfs_inode_init(&v9inode->netfs, &v9fs_req_ops);
}
int v9fs_init_inode(struct v9fs_session_info *v9ses,
return ERR_PTR(-ECHILD);
v9ses = v9fs_dentry2v9ses(dentry);
- fid = v9fs_fid_lookup(dentry);
+ if (!v9fs_proto_dotu(v9ses))
+ return ERR_PTR(-EBADF);
+
p9_debug(P9_DEBUG_VFS, "%pd\n", dentry);
+ fid = v9fs_fid_lookup(dentry);
if (IS_ERR(fid))
return ERR_CAST(fid);
- if (!v9fs_proto_dotu(v9ses))
- return ERR_PTR(-EBADF);
-
st = p9_client_stat(fid);
p9_client_clunk(fid);
if (IS_ERR(st))
if (IS_ERR(ofid)) {
err = PTR_ERR(ofid);
p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
+ p9_client_clunk(dfid);
goto out;
}
if (err) {
p9_debug(P9_DEBUG_VFS, "Failed to get acl values in creat %d\n",
err);
+ p9_client_clunk(dfid);
goto error;
}
err = p9_client_create_dotl(ofid, name, v9fs_open_to_dotl_flags(flags),
if (err < 0) {
p9_debug(P9_DEBUG_VFS, "p9_client_open_dotl failed in creat %d\n",
err);
+ p9_client_clunk(dfid);
goto error;
}
v9fs_invalidate_inode_attr(dir);
{
struct afs_vnode *vnode = container_of(work, struct afs_vnode, cb_work);
- unmap_mapping_pages(vnode->vfs_inode.i_mapping, 0, 0, false);
+ unmap_mapping_pages(vnode->netfs.inode.i_mapping, 0, 0, false);
}
void afs_server_init_callback_work(struct work_struct *work)
*/
static void afs_dir_read_cleanup(struct afs_read *req)
{
- struct address_space *mapping = req->vnode->vfs_inode.i_mapping;
+ struct address_space *mapping = req->vnode->netfs.inode.i_mapping;
struct folio *folio;
pgoff_t last = req->nr_pages - 1;
block = kmap_local_folio(folio, offset);
if (block->hdr.magic != AFS_DIR_MAGIC) {
printk("kAFS: %s(%lx): [%llx] bad magic %zx/%zx is %04hx\n",
- __func__, dvnode->vfs_inode.i_ino,
+ __func__, dvnode->netfs.inode.i_ino,
pos, offset, size, ntohs(block->hdr.magic));
trace_afs_dir_check_failed(dvnode, pos + offset, i_size);
kunmap_local(block);
static void afs_dir_dump(struct afs_vnode *dvnode, struct afs_read *req)
{
union afs_xdr_dir_block *block;
- struct address_space *mapping = dvnode->vfs_inode.i_mapping;
+ struct address_space *mapping = dvnode->netfs.inode.i_mapping;
struct folio *folio;
pgoff_t last = req->nr_pages - 1;
size_t offset, size;
*/
static int afs_dir_check(struct afs_vnode *dvnode, struct afs_read *req)
{
- struct address_space *mapping = dvnode->vfs_inode.i_mapping;
+ struct address_space *mapping = dvnode->netfs.inode.i_mapping;
struct folio *folio;
pgoff_t last = req->nr_pages - 1;
int ret = 0;
static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key)
__acquires(&dvnode->validate_lock)
{
- struct address_space *mapping = dvnode->vfs_inode.i_mapping;
+ struct address_space *mapping = dvnode->netfs.inode.i_mapping;
struct afs_read *req;
loff_t i_size;
int nr_pages, i;
req->cleanup = afs_dir_read_cleanup;
expand:
- i_size = i_size_read(&dvnode->vfs_inode);
+ i_size = i_size_read(&dvnode->netfs.inode);
if (i_size < 2048) {
ret = afs_bad(dvnode, afs_file_error_dir_small);
goto error;
req->actual_len = i_size; /* May change */
req->len = nr_pages * PAGE_SIZE; /* We can ask for more than there is */
req->data_version = dvnode->status.data_version; /* May change */
- iov_iter_xarray(&req->def_iter, READ, &dvnode->vfs_inode.i_mapping->i_pages,
+ iov_iter_xarray(&req->def_iter, READ, &dvnode->netfs.inode.i_mapping->i_pages,
0, i_size);
req->iter = &req->def_iter;
out_op:
if (op->error == 0) {
- inode = &op->file[1].vnode->vfs_inode;
+ inode = &op->file[1].vnode->netfs.inode;
op->file[1].vnode = NULL;
}
afs_stat_v(dir, n_reval);
/* search the directory for this vnode */
- ret = afs_do_lookup_one(&dir->vfs_inode, dentry, &fid, key, &dir_version);
+ ret = afs_do_lookup_one(&dir->netfs.inode, dentry, &fid, key, &dir_version);
switch (ret) {
case 0:
/* the filename maps to something */
_debug("%pd: file deleted (uq %u -> %u I:%u)",
dentry, fid.unique,
vnode->fid.unique,
- vnode->vfs_inode.i_generation);
+ vnode->netfs.inode.i_generation);
goto not_found;
}
goto out_valid;
if (d_really_is_positive(dentry)) {
struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
- clear_nlink(&vnode->vfs_inode);
+ clear_nlink(&vnode->netfs.inode);
set_bit(AFS_VNODE_DELETED, &vnode->flags);
clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
/* Already done */
} else if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) {
write_seqlock(&vnode->cb_lock);
- drop_nlink(&vnode->vfs_inode);
- if (vnode->vfs_inode.i_nlink == 0) {
+ drop_nlink(&vnode->netfs.inode);
+ if (vnode->netfs.inode.i_nlink == 0) {
set_bit(AFS_VNODE_DELETED, &vnode->flags);
__afs_break_callback(vnode, afs_cb_break_for_unlink);
}
op->error = ret;
}
- _debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, op->error);
+ _debug("nlink %d [val %d]", vnode->netfs.inode.i_nlink, op->error);
}
static void afs_unlink_success(struct afs_operation *op)
afs_update_dentry_version(op, dvp, op->dentry);
if (op->dentry_2->d_parent == op->dentry->d_parent)
afs_update_dentry_version(op, dvp, op->dentry_2);
- ihold(&vp->vnode->vfs_inode);
- d_instantiate(op->dentry, &vp->vnode->vfs_inode);
+ ihold(&vp->vnode->netfs.inode);
+ d_instantiate(op->dentry, &vp->vnode->netfs.inode);
}
static void afs_link_put(struct afs_operation *op)
*/
static struct folio *afs_dir_get_folio(struct afs_vnode *vnode, pgoff_t index)
{
- struct address_space *mapping = vnode->vfs_inode.i_mapping;
+ struct address_space *mapping = vnode->netfs.inode.i_mapping;
struct folio *folio;
folio = __filemap_get_folio(mapping, index,
_enter(",,{%d,%s},", name->len, name->name);
- i_size = i_size_read(&vnode->vfs_inode);
+ i_size = i_size_read(&vnode->netfs.inode);
if (i_size > AFS_DIR_BLOCK_SIZE * AFS_DIR_MAX_BLOCKS ||
(i_size & (AFS_DIR_BLOCK_SIZE - 1))) {
clear_bit(AFS_VNODE_DIR_VALID, &vnode->flags);
if (b < AFS_DIR_BLOCKS_WITH_CTR)
meta->meta.alloc_ctrs[b] -= need_slots;
- inode_inc_iversion_raw(&vnode->vfs_inode);
+ inode_inc_iversion_raw(&vnode->netfs.inode);
afs_stat_v(vnode, n_dir_cr);
_debug("Insert %s in %u[%u]", name->name, b, slot);
_enter(",,{%d,%s},", name->len, name->name);
- i_size = i_size_read(&vnode->vfs_inode);
+ i_size = i_size_read(&vnode->netfs.inode);
if (i_size < AFS_DIR_BLOCK_SIZE ||
i_size > AFS_DIR_BLOCK_SIZE * AFS_DIR_MAX_BLOCKS ||
(i_size & (AFS_DIR_BLOCK_SIZE - 1))) {
if (b < AFS_DIR_BLOCKS_WITH_CTR)
meta->meta.alloc_ctrs[b] += need_slots;
- inode_set_iversion_raw(&vnode->vfs_inode, vnode->status.data_version);
+ inode_set_iversion_raw(&vnode->netfs.inode, vnode->status.data_version);
afs_stat_v(vnode, n_dir_rm);
_debug("Remove %s from %u[%u]", name->name, b, slot);
goto out;
} while (!d_is_negative(sdentry));
- ihold(&vnode->vfs_inode);
+ ihold(&vnode->netfs.inode);
ret = afs_do_silly_rename(dvnode, vnode, dentry, sdentry, key);
switch (ret) {
d_drop(sdentry);
}
- iput(&vnode->vfs_inode);
+ iput(&vnode->netfs.inode);
dput(sdentry);
out:
_leave(" = %d", ret);
/* there shouldn't be an existing inode */
BUG_ON(!(inode->i_state & I_NEW));
- netfs_i_context_init(inode, NULL);
+ netfs_inode_init(&vnode->netfs, NULL);
inode->i_size = 0;
inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO;
if (root) {
afs_put_wb_key(af->wb);
if ((file->f_mode & FMODE_WRITE)) {
- i_size = i_size_read(&vnode->vfs_inode);
+ i_size = i_size_read(&vnode->netfs.inode);
afs_set_cache_aux(vnode, &aux);
fscache_unuse_cookie(afs_vnode_cache(vnode), &aux, &i_size);
} else {
fsreq->iter = &fsreq->def_iter;
iov_iter_xarray(&fsreq->def_iter, READ,
- &fsreq->vnode->vfs_inode.i_mapping->i_pages,
+ &fsreq->vnode->netfs.inode.i_mapping->i_pages,
fsreq->pos, fsreq->len);
afs_fetch_data(fsreq->vnode, fsreq);
}
static int afs_check_write_begin(struct file *file, loff_t pos, unsigned len,
- struct folio *folio, void **_fsdata)
+ struct folio **foliop, void **_fsdata)
{
struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
return test_bit(AFS_VNODE_DELETED, &vnode->flags) ? -ESTALE : 0;
}
-static void afs_priv_cleanup(struct address_space *mapping, void *netfs_priv)
+static void afs_free_request(struct netfs_io_request *rreq)
{
- key_put(netfs_priv);
+ key_put(rreq->netfs_priv);
}
const struct netfs_request_ops afs_req_ops = {
.init_request = afs_init_request,
+ .free_request = afs_free_request,
.begin_cache_operation = afs_begin_cache_operation,
.check_write_begin = afs_check_write_begin,
.issue_read = afs_issue_read,
- .cleanup = afs_priv_cleanup,
};
int afs_write_inode(struct inode *inode, struct writeback_control *wbc)
if (op->file[1].modification && op->file[1].vnode != op->file[0].vnode)
clear_bit(AFS_VNODE_MODIFYING, &op->file[1].vnode->flags);
if (op->file[0].put_vnode)
- iput(&op->file[0].vnode->vfs_inode);
+ iput(&op->file[0].vnode->netfs.inode);
if (op->file[1].put_vnode)
- iput(&op->file[1].vnode->vfs_inode);
+ iput(&op->file[1].vnode->netfs.inode);
if (op->more_files) {
for (i = 0; i < op->nr_files - 2; i++)
if (op->more_files[i].put_vnode)
- iput(&op->more_files[i].vnode->vfs_inode);
+ iput(&op->more_files[i].vnode->netfs.inode);
kfree(op->more_files);
}
*/
static void afs_set_netfs_context(struct afs_vnode *vnode)
{
- netfs_i_context_init(&vnode->vfs_inode, &afs_req_ops);
+ netfs_inode_init(&vnode->netfs, &afs_req_ops);
}
/*
inode->i_flags |= S_NOATIME;
inode->i_uid = make_kuid(&init_user_ns, status->owner);
inode->i_gid = make_kgid(&init_user_ns, status->group);
- set_nlink(&vnode->vfs_inode, status->nlink);
+ set_nlink(&vnode->netfs.inode, status->nlink);
switch (status->type) {
case AFS_FTYPE_FILE:
afs_set_netfs_context(vnode);
vnode->invalid_before = status->data_version;
- inode_set_iversion_raw(&vnode->vfs_inode, status->data_version);
+ inode_set_iversion_raw(&vnode->netfs.inode, status->data_version);
if (!vp->scb.have_cb) {
/* it's a symlink we just created (the fileserver
{
struct afs_file_status *status = &vp->scb.status;
struct afs_vnode *vnode = vp->vnode;
- struct inode *inode = &vnode->vfs_inode;
+ struct inode *inode = &vnode->netfs.inode;
struct timespec64 t;
umode_t mode;
bool data_changed = false;
* idea of what the size should be that's not the same as
* what's on the server.
*/
- vnode->netfs_ctx.remote_i_size = status->size;
+ vnode->netfs.remote_i_size = status->size;
if (change_size) {
afs_set_i_size(vnode, status->size);
inode->i_ctime = t;
*/
if (vp->scb.status.abort_code == VNOVNODE) {
set_bit(AFS_VNODE_DELETED, &vnode->flags);
- clear_nlink(&vnode->vfs_inode);
+ clear_nlink(&vnode->netfs.inode);
__afs_break_callback(vnode, afs_cb_break_for_deleted);
op->flags &= ~AFS_OPERATION_DIR_CONFLICT;
}
if (vp->scb.have_cb)
afs_apply_callback(op, vp);
} else if (vp->op_unlinked && !(op->flags & AFS_OPERATION_DIR_CONFLICT)) {
- drop_nlink(&vnode->vfs_inode);
- if (vnode->vfs_inode.i_nlink == 0) {
+ drop_nlink(&vnode->netfs.inode);
+ if (vnode->netfs.inode.i_nlink == 0) {
set_bit(AFS_VNODE_DELETED, &vnode->flags);
__afs_break_callback(vnode, afs_cb_break_for_deleted);
}
struct afs_vnode *vnode = vp->vnode;
int ret;
- if (vnode->vfs_inode.i_state & I_NEW) {
+ if (vnode->netfs.inode.i_state & I_NEW) {
ret = afs_inode_init_from_status(op, vp, vnode);
op->error = ret;
if (ret == 0)
struct afs_vnode_cache_aux aux;
if (vnode->status.type != AFS_FTYPE_FILE) {
- vnode->netfs_ctx.cache = NULL;
+ vnode->netfs.cache = NULL;
return;
}
struct inode *afs_iget(struct afs_operation *op, struct afs_vnode_param *vp)
{
struct afs_vnode_param *dvp = &op->file[0];
- struct super_block *sb = dvp->vnode->vfs_inode.i_sb;
+ struct super_block *sb = dvp->vnode->netfs.inode.i_sb;
struct afs_vnode *vnode;
struct inode *inode;
int ret;
/* nuke all the non-dirty pages that aren't locked, mapped or being
* written back in a regular file and completely discard the pages in a
* directory or symlink */
- if (S_ISREG(vnode->vfs_inode.i_mode))
- invalidate_remote_inode(&vnode->vfs_inode);
+ if (S_ISREG(vnode->netfs.inode.i_mode))
+ invalidate_remote_inode(&vnode->netfs.inode);
else
- invalidate_inode_pages2(vnode->vfs_inode.i_mapping);
+ invalidate_inode_pages2(vnode->netfs.inode.i_mapping);
}
/*
key_serial(key));
if (unlikely(test_bit(AFS_VNODE_DELETED, &vnode->flags))) {
- if (vnode->vfs_inode.i_nlink)
- clear_nlink(&vnode->vfs_inode);
+ if (vnode->netfs.inode.i_nlink)
+ clear_nlink(&vnode->netfs.inode);
goto valid;
}
_enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation);
- if (!(query_flags & AT_STATX_DONT_SYNC) &&
+ if (vnode->volume &&
+ !(query_flags & AT_STATX_DONT_SYNC) &&
!test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) {
key = afs_request_key(vnode->volume->cell);
if (IS_ERR(key))
static void afs_setattr_success(struct afs_operation *op)
{
struct afs_vnode_param *vp = &op->file[0];
- struct inode *inode = &vp->vnode->vfs_inode;
+ struct inode *inode = &vp->vnode->netfs.inode;
loff_t old_i_size = i_size_read(inode);
op->setattr.old_i_size = old_i_size;
static void afs_setattr_edit_file(struct afs_operation *op)
{
struct afs_vnode_param *vp = &op->file[0];
- struct inode *inode = &vp->vnode->vfs_inode;
+ struct inode *inode = &vp->vnode->netfs.inode;
if (op->setattr.attr->ia_valid & ATTR_SIZE) {
loff_t size = op->setattr.attr->ia_size;
ATTR_MTIME | ATTR_MTIME_SET | ATTR_TIMES_SET | ATTR_TOUCH;
struct afs_operation *op;
struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
- struct inode *inode = &vnode->vfs_inode;
+ struct inode *inode = &vnode->netfs.inode;
loff_t i_size;
int ret;
* leak from one inode to another.
*/
struct afs_vnode {
- struct {
- /* These must be contiguous */
- struct inode vfs_inode; /* the VFS's inode record */
- struct netfs_i_context netfs_ctx; /* Netfslib context */
- };
-
+ struct netfs_inode netfs; /* Netfslib context and vfs inode */
struct afs_volume *volume; /* volume on which vnode resides */
struct afs_fid fid; /* the file identifier for this inode */
struct afs_file_status status; /* AFS status info for this file */
static inline struct fscache_cookie *afs_vnode_cache(struct afs_vnode *vnode)
{
#ifdef CONFIG_AFS_FSCACHE
- return netfs_i_cookie(&vnode->vfs_inode);
+ return netfs_i_cookie(&vnode->netfs);
#else
return NULL;
#endif
struct fscache_cookie *cookie)
{
#ifdef CONFIG_AFS_FSCACHE
- vnode->netfs_ctx.cache = cookie;
+ vnode->netfs.cache = cookie;
#endif
}
afs_set_cache_aux(vnode, &aux);
fscache_invalidate(afs_vnode_cache(vnode), &aux,
- i_size_read(&vnode->vfs_inode), flags);
+ i_size_read(&vnode->netfs.inode), flags);
}
/*
static inline struct afs_net *afs_v2net(struct afs_vnode *vnode)
{
- return afs_i2net(&vnode->vfs_inode);
+ return afs_i2net(&vnode->netfs.inode);
}
static inline struct afs_net *afs_sock2net(struct sock *sk)
*/
static inline struct afs_vnode *AFS_FS_I(struct inode *inode)
{
- return container_of(inode, struct afs_vnode, vfs_inode);
+ return container_of(inode, struct afs_vnode, netfs.inode);
}
static inline struct inode *AFS_VNODE_TO_I(struct afs_vnode *vnode)
{
- return &vnode->vfs_inode;
+ return &vnode->netfs.inode;
}
/*
*/
static inline void afs_set_i_size(struct afs_vnode *vnode, u64 size)
{
- i_size_write(&vnode->vfs_inode, size);
- vnode->vfs_inode.i_blocks = ((size + 1023) >> 10) << 1;
+ i_size_write(&vnode->netfs.inode, size);
+ vnode->netfs.inode.i_blocks = ((size + 1023) >> 10) << 1;
}
/*
struct afs_vnode *vnode = _vnode;
memset(vnode, 0, sizeof(*vnode));
- inode_init_once(&vnode->vfs_inode);
+ inode_init_once(&vnode->netfs.inode);
mutex_init(&vnode->io_lock);
init_rwsem(&vnode->validate_lock);
spin_lock_init(&vnode->wb_lock);
init_rwsem(&vnode->rmdir_lock);
INIT_WORK(&vnode->cb_work, afs_invalidate_mmap_work);
- _leave(" = %p", &vnode->vfs_inode);
- return &vnode->vfs_inode;
+ _leave(" = %p", &vnode->netfs.inode);
+ return &vnode->netfs.inode;
}
static void afs_free_inode(struct inode *inode)
#include <linux/slab.h>
#include "internal.h"
-unsigned __read_mostly afs_volume_gc_delay = 10;
-unsigned __read_mostly afs_volume_record_life = 60 * 60;
+static unsigned __read_mostly afs_volume_record_life = 60 * 60;
/*
* Insert a volume into a cell. If there's an existing volume record, that is
* file. We need to do this before we get a lock on the page in case
* there's more than one writer competing for the same cache block.
*/
- ret = netfs_write_begin(file, mapping, pos, len, &folio, fsdata);
+ ret = netfs_write_begin(&vnode->netfs, file, mapping, pos, len, &folio, fsdata);
if (ret < 0)
return ret;
write_end_pos = pos + copied;
- i_size = i_size_read(&vnode->vfs_inode);
+ i_size = i_size_read(&vnode->netfs.inode);
if (write_end_pos > i_size) {
write_seqlock(&vnode->cb_lock);
- i_size = i_size_read(&vnode->vfs_inode);
+ i_size = i_size_read(&vnode->netfs.inode);
if (write_end_pos > i_size)
afs_set_i_size(vnode, write_end_pos);
write_sequnlock(&vnode->cb_lock);
*/
static void afs_pages_written_back(struct afs_vnode *vnode, loff_t start, unsigned int len)
{
- struct address_space *mapping = vnode->vfs_inode.i_mapping;
+ struct address_space *mapping = vnode->netfs.inode.i_mapping;
struct folio *folio;
pgoff_t end;
static int afs_store_data(struct afs_vnode *vnode, struct iov_iter *iter, loff_t pos,
bool laundering)
{
- struct netfs_i_context *ictx = &vnode->netfs_ctx;
struct afs_operation *op;
struct afs_wb_key *wbk = NULL;
loff_t size = iov_iter_count(iter);
op->store.write_iter = iter;
op->store.pos = pos;
op->store.size = size;
- op->store.i_size = max(pos + size, ictx->remote_i_size);
+ op->store.i_size = max(pos + size, vnode->netfs.remote_i_size);
op->store.laundering = laundering;
- op->mtime = vnode->vfs_inode.i_mtime;
+ op->mtime = vnode->netfs.inode.i_mtime;
op->flags |= AFS_OPERATION_UNINTR;
op->ops = &afs_store_data_operation;
struct iov_iter iter;
unsigned long priv;
unsigned int offset, to, len, max_len;
- loff_t i_size = i_size_read(&vnode->vfs_inode);
+ loff_t i_size = i_size_read(&vnode->netfs.inode);
bool new_content = test_bit(AFS_VNODE_NEW_CONTENT, &vnode->flags);
bool caching = fscache_cookie_enabled(afs_vnode_cache(vnode));
long count = wbc->nr_to_write;
_enter("{%llx:%llu},{%zu},",
vnode->fid.vid, vnode->fid.vnode, count);
- if (IS_SWAPFILE(&vnode->vfs_inode)) {
+ if (IS_SWAPFILE(&vnode->netfs.inode)) {
printk(KERN_INFO
"AFS: Attempt to write to active swap file!\n");
return -EBUSY;
/* Discard unused keys */
spin_lock(&vnode->wb_lock);
- if (!mapping_tagged(&vnode->vfs_inode.i_data, PAGECACHE_TAG_WRITEBACK) &&
- !mapping_tagged(&vnode->vfs_inode.i_data, PAGECACHE_TAG_DIRTY)) {
+ if (!mapping_tagged(&vnode->netfs.inode.i_data, PAGECACHE_TAG_WRITEBACK) &&
+ !mapping_tagged(&vnode->netfs.inode.i_data, PAGECACHE_TAG_DIRTY)) {
list_for_each_entry_safe(wbk, tmp, &vnode->wb_keys, vnode_link) {
if (refcount_read(&wbk->usage) == 1)
list_move(&wbk->vnode_link, &graveyard);
bool caching)
{
fscache_write_to_cache(afs_vnode_cache(vnode),
- vnode->vfs_inode.i_mapping, start, len, i_size,
+ vnode->netfs.inode.i_mapping, start, len, i_size,
afs_write_to_cache_done, vnode, caching);
}
const struct inode *inode, kgid_t gid)
{
kgid_t kgid = i_gid_into_mnt(mnt_userns, inode);
- if (uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode)) &&
- (in_group_p(gid) || gid_eq(gid, inode->i_gid)))
- return true;
+ if (uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode))) {
+ kgid_t mapped_gid;
+
+ if (gid_eq(gid, inode->i_gid))
+ return true;
+ mapped_gid = mapped_kgid_fs(mnt_userns, i_user_ns(inode), gid);
+ if (in_group_p(mapped_gid))
+ return true;
+ }
if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN))
return true;
if (gid_eq(kgid, INVALID_GID) &&
/* Make sure a caller can chmod. */
if (ia_valid & ATTR_MODE) {
+ kgid_t mapped_gid;
+
if (!inode_owner_or_capable(mnt_userns, inode))
return -EPERM;
+
+ if (ia_valid & ATTR_GID)
+ mapped_gid = mapped_kgid_fs(mnt_userns,
+ i_user_ns(inode), attr->ia_gid);
+ else
+ mapped_gid = i_gid_into_mnt(mnt_userns, inode);
+
/* Also check the setgid bit! */
- if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
- i_gid_into_mnt(mnt_userns, inode)) &&
- !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
+ if (!in_group_p(mapped_gid) &&
+ !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
attr->ia_mode &= ~S_ISGID;
}
unsigned int relocating_repair:1;
unsigned int chunk_item_inserted:1;
unsigned int zone_is_active:1;
+ unsigned int zoned_data_reloc_ongoing:1;
int disk_cache_state;
rwlock_t global_root_lock;
struct rb_root global_root_tree;
- /* The xarray that holds all the FS roots */
- spinlock_t fs_roots_lock;
- struct xarray fs_roots;
+ spinlock_t fs_roots_radix_lock;
+ struct radix_tree_root fs_roots_radix;
/* block group cache stuff */
rwlock_t block_group_cache_lock;
struct btrfs_delayed_root *delayed_root;
- /* Extent buffer xarray */
+ /* Extent buffer radix tree */
spinlock_t buffer_lock;
/* Entries are eb->start / sectorsize */
- struct xarray extent_buffers;
+ struct radix_tree_root buffer_radix;
/* next backup root to be overwritten */
int backup_root_index;
*/
BTRFS_ROOT_SHAREABLE,
BTRFS_ROOT_TRACK_DIRTY,
- /* The root is tracked in fs_info::fs_roots */
- BTRFS_ROOT_REGISTERED,
+ BTRFS_ROOT_IN_RADIX,
BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
BTRFS_ROOT_DEFRAG_RUNNING,
BTRFS_ROOT_FORCE_COW,
struct rb_root inode_tree;
/*
- * Xarray that keeps track of delayed nodes of every inode, protected
- * by inode_lock
+ * radix tree that keeps track of delayed nodes of every inode,
+ * protected by inode_lock
*/
- struct xarray delayed_nodes;
+ struct radix_tree_root delayed_nodes_tree;
/*
* right now this just gets used so that a root has its own devid
* for stat. It may be used for more later
* existing extent into a file range.
*/
bool is_new_extent;
+ /* Indicate if we should update the inode's mtime and ctime. */
+ bool update_times;
/* Meaningful only if is_new_extent is true. */
int qgroup_reserved;
/*
}
spin_lock(&root->inode_lock);
- node = xa_load(&root->delayed_nodes, ino);
+ node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
if (node) {
if (btrfs_inode->delayed_node) {
/*
* It's possible that we're racing into the middle of removing
- * this node from the xarray. In this case, the refcount
+ * this node from the radix tree. In this case, the refcount
* was zero and it should never go back to one. Just return
- * NULL like it was never in the xarray at all; our release
+ * NULL like it was never in the radix at all; our release
* function is in the process of removing it.
*
* Some implementations of refcount_inc refuse to bump the
* here, refcount_inc() may decide to just WARN_ONCE() instead
* of actually bumping the refcount.
*
- * If this node is properly in the xarray, we want to bump the
+ * If this node is properly in the radix, we want to bump the
* refcount twice, once for the inode and once for this get
* operation.
*/
u64 ino = btrfs_ino(btrfs_inode);
int ret;
- do {
- node = btrfs_get_delayed_node(btrfs_inode);
- if (node)
- return node;
+again:
+ node = btrfs_get_delayed_node(btrfs_inode);
+ if (node)
+ return node;
- node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS);
- if (!node)
- return ERR_PTR(-ENOMEM);
- btrfs_init_delayed_node(node, root, ino);
+ node = kmem_cache_zalloc(delayed_node_cache, GFP_NOFS);
+ if (!node)
+ return ERR_PTR(-ENOMEM);
+ btrfs_init_delayed_node(node, root, ino);
- /* Cached in the inode and can be accessed */
- refcount_set(&node->refs, 2);
+ /* cached in the btrfs inode and can be accessed */
+ refcount_set(&node->refs, 2);
- spin_lock(&root->inode_lock);
- ret = xa_insert(&root->delayed_nodes, ino, node, GFP_NOFS);
- if (ret) {
- spin_unlock(&root->inode_lock);
- kmem_cache_free(delayed_node_cache, node);
- if (ret != -EBUSY)
- return ERR_PTR(ret);
- }
- } while (ret);
+ ret = radix_tree_preload(GFP_NOFS);
+ if (ret) {
+ kmem_cache_free(delayed_node_cache, node);
+ return ERR_PTR(ret);
+ }
+
+ spin_lock(&root->inode_lock);
+ ret = radix_tree_insert(&root->delayed_nodes_tree, ino, node);
+ if (ret == -EEXIST) {
+ spin_unlock(&root->inode_lock);
+ kmem_cache_free(delayed_node_cache, node);
+ radix_tree_preload_end();
+ goto again;
+ }
btrfs_inode->delayed_node = node;
spin_unlock(&root->inode_lock);
+ radix_tree_preload_end();
return node;
}
* back up. We can delete it now.
*/
ASSERT(refcount_read(&delayed_node->refs) == 0);
- xa_erase(&root->delayed_nodes, delayed_node->inode_id);
+ radix_tree_delete(&root->delayed_nodes_tree,
+ delayed_node->inode_id);
spin_unlock(&root->inode_lock);
kmem_cache_free(delayed_node_cache, delayed_node);
}
void btrfs_kill_all_delayed_nodes(struct btrfs_root *root)
{
- unsigned long index = 0;
- struct btrfs_delayed_node *delayed_node;
+ u64 inode_id = 0;
struct btrfs_delayed_node *delayed_nodes[8];
+ int i, n;
while (1) {
- int n = 0;
-
spin_lock(&root->inode_lock);
- if (xa_empty(&root->delayed_nodes)) {
+ n = radix_tree_gang_lookup(&root->delayed_nodes_tree,
+ (void **)delayed_nodes, inode_id,
+ ARRAY_SIZE(delayed_nodes));
+ if (!n) {
spin_unlock(&root->inode_lock);
- return;
+ break;
}
- xa_for_each_start(&root->delayed_nodes, index, delayed_node, index) {
+ inode_id = delayed_nodes[n - 1]->inode_id + 1;
+ for (i = 0; i < n; i++) {
/*
* Don't increase refs in case the node is dead and
* about to be removed from the tree in the loop below
*/
- if (refcount_inc_not_zero(&delayed_node->refs)) {
- delayed_nodes[n] = delayed_node;
- n++;
- }
- if (n >= ARRAY_SIZE(delayed_nodes))
- break;
+ if (!refcount_inc_not_zero(&delayed_nodes[i]->refs))
+ delayed_nodes[i] = NULL;
}
- index++;
spin_unlock(&root->inode_lock);
- for (int i = 0; i < n; i++) {
+ for (i = 0; i < n; i++) {
+ if (!delayed_nodes[i])
+ continue;
__btrfs_kill_delayed_node(delayed_nodes[i]);
btrfs_release_delayed_node(delayed_nodes[i]);
}
#include <linux/fs.h>
#include <linux/blkdev.h>
+#include <linux/radix-tree.h>
#include <linux/writeback.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>
uptodate = btrfs_subpage_test_uptodate(fs_info, page, cur,
fs_info->nodesize);
- /* A dirty eb shouldn't disappear from extent_buffers */
+ /* A dirty eb shouldn't disappear from buffer_radix */
if (WARN_ON(!eb))
return -EUCLEAN;
root->nr_delalloc_inodes = 0;
root->nr_ordered_extents = 0;
root->inode_tree = RB_ROOT;
- xa_init_flags(&root->delayed_nodes, GFP_ATOMIC);
+ INIT_RADIX_TREE(&root->delayed_nodes_tree, GFP_ATOMIC);
btrfs_init_root_block_rsv(root);
btrfs_qgroup_init_swapped_blocks(&root->swapped_blocks);
#ifdef CONFIG_BTRFS_DEBUG
INIT_LIST_HEAD(&root->leak_list);
- spin_lock(&fs_info->fs_roots_lock);
+ spin_lock(&fs_info->fs_roots_radix_lock);
list_add_tail(&root->leak_list, &fs_info->allocated_roots);
- spin_unlock(&fs_info->fs_roots_lock);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
#endif
}
{
struct btrfs_root *root;
- spin_lock(&fs_info->fs_roots_lock);
- root = xa_load(&fs_info->fs_roots, (unsigned long)root_id);
+ spin_lock(&fs_info->fs_roots_radix_lock);
+ root = radix_tree_lookup(&fs_info->fs_roots_radix,
+ (unsigned long)root_id);
if (root)
root = btrfs_grab_root(root);
- spin_unlock(&fs_info->fs_roots_lock);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
return root;
}
{
int ret;
- spin_lock(&fs_info->fs_roots_lock);
- ret = xa_insert(&fs_info->fs_roots, (unsigned long)root->root_key.objectid,
- root, GFP_NOFS);
+ ret = radix_tree_preload(GFP_NOFS);
+ if (ret)
+ return ret;
+
+ spin_lock(&fs_info->fs_roots_radix_lock);
+ ret = radix_tree_insert(&fs_info->fs_roots_radix,
+ (unsigned long)root->root_key.objectid,
+ root);
if (ret == 0) {
btrfs_grab_root(root);
- set_bit(BTRFS_ROOT_REGISTERED, &root->state);
+ set_bit(BTRFS_ROOT_IN_RADIX, &root->state);
}
- spin_unlock(&fs_info->fs_roots_lock);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
+ radix_tree_preload_end();
return ret;
}
btrfs_drew_lock_destroy(&root->snapshot_lock);
free_root_extent_buffers(root);
#ifdef CONFIG_BTRFS_DEBUG
- spin_lock(&root->fs_info->fs_roots_lock);
+ spin_lock(&root->fs_info->fs_roots_radix_lock);
list_del_init(&root->leak_list);
- spin_unlock(&root->fs_info->fs_roots_lock);
+ spin_unlock(&root->fs_info->fs_roots_radix_lock);
#endif
kfree(root);
}
void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info)
{
- struct btrfs_root *root;
- unsigned long index = 0;
+ int ret;
+ struct btrfs_root *gang[8];
+ int i;
while (!list_empty(&fs_info->dead_roots)) {
- root = list_entry(fs_info->dead_roots.next,
- struct btrfs_root, root_list);
- list_del(&root->root_list);
+ gang[0] = list_entry(fs_info->dead_roots.next,
+ struct btrfs_root, root_list);
+ list_del(&gang[0]->root_list);
- if (test_bit(BTRFS_ROOT_REGISTERED, &root->state))
- btrfs_drop_and_free_fs_root(fs_info, root);
- btrfs_put_root(root);
+ if (test_bit(BTRFS_ROOT_IN_RADIX, &gang[0]->state))
+ btrfs_drop_and_free_fs_root(fs_info, gang[0]);
+ btrfs_put_root(gang[0]);
}
- xa_for_each(&fs_info->fs_roots, index, root) {
- btrfs_drop_and_free_fs_root(fs_info, root);
+ while (1) {
+ ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
+ (void **)gang, 0,
+ ARRAY_SIZE(gang));
+ if (!ret)
+ break;
+ for (i = 0; i < ret; i++)
+ btrfs_drop_and_free_fs_root(fs_info, gang[i]);
}
}
void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
{
- xa_init_flags(&fs_info->fs_roots, GFP_ATOMIC);
- xa_init_flags(&fs_info->extent_buffers, GFP_ATOMIC);
+ INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
+ INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
INIT_LIST_HEAD(&fs_info->trans_list);
INIT_LIST_HEAD(&fs_info->dead_roots);
INIT_LIST_HEAD(&fs_info->delayed_iputs);
INIT_LIST_HEAD(&fs_info->caching_block_groups);
spin_lock_init(&fs_info->delalloc_root_lock);
spin_lock_init(&fs_info->trans_lock);
- spin_lock_init(&fs_info->fs_roots_lock);
+ spin_lock_init(&fs_info->fs_roots_radix_lock);
spin_lock_init(&fs_info->delayed_iput_lock);
spin_lock_init(&fs_info->defrag_inodes_lock);
spin_lock_init(&fs_info->super_lock);
/*
* btrfs_find_orphan_roots() is responsible for finding all the dead
* roots (with 0 refs), flag them with BTRFS_ROOT_DEAD_TREE and load
- * them into the fs_info->fs_roots. This must be done before
+ * them into the fs_info->fs_roots_radix tree. This must be done before
* calling btrfs_orphan_cleanup() on the tree root. If we don't do it
* first, then btrfs_orphan_cleanup() will delete a dead root's orphan
* item before the root's tree is deleted - this means that if we unmount
{
bool drop_ref = false;
- spin_lock(&fs_info->fs_roots_lock);
- xa_erase(&fs_info->fs_roots, (unsigned long)root->root_key.objectid);
- if (test_and_clear_bit(BTRFS_ROOT_REGISTERED, &root->state))
+ spin_lock(&fs_info->fs_roots_radix_lock);
+ radix_tree_delete(&fs_info->fs_roots_radix,
+ (unsigned long)root->root_key.objectid);
+ if (test_and_clear_bit(BTRFS_ROOT_IN_RADIX, &root->state))
drop_ref = true;
- spin_unlock(&fs_info->fs_roots_lock);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
if (BTRFS_FS_ERROR(fs_info)) {
ASSERT(root->log_root == NULL);
int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info)
{
- struct btrfs_root *roots[8];
- unsigned long index = 0;
- int i;
+ u64 root_objectid = 0;
+ struct btrfs_root *gang[8];
+ int i = 0;
int err = 0;
- int grabbed;
+ unsigned int ret = 0;
while (1) {
- struct btrfs_root *root;
-
- spin_lock(&fs_info->fs_roots_lock);
- if (!xa_find(&fs_info->fs_roots, &index, ULONG_MAX, XA_PRESENT)) {
- spin_unlock(&fs_info->fs_roots_lock);
- return err;
+ spin_lock(&fs_info->fs_roots_radix_lock);
+ ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
+ (void **)gang, root_objectid,
+ ARRAY_SIZE(gang));
+ if (!ret) {
+ spin_unlock(&fs_info->fs_roots_radix_lock);
+ break;
}
+ root_objectid = gang[ret - 1]->root_key.objectid + 1;
- grabbed = 0;
- xa_for_each_start(&fs_info->fs_roots, index, root, index) {
- /* Avoid grabbing roots in dead_roots */
- if (btrfs_root_refs(&root->root_item) > 0)
- roots[grabbed++] = btrfs_grab_root(root);
- if (grabbed >= ARRAY_SIZE(roots))
- break;
+ for (i = 0; i < ret; i++) {
+ /* Avoid to grab roots in dead_roots */
+ if (btrfs_root_refs(&gang[i]->root_item) == 0) {
+ gang[i] = NULL;
+ continue;
+ }
+ /* grab all the search result for later use */
+ gang[i] = btrfs_grab_root(gang[i]);
}
- spin_unlock(&fs_info->fs_roots_lock);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
- for (i = 0; i < grabbed; i++) {
- if (!roots[i])
+ for (i = 0; i < ret; i++) {
+ if (!gang[i])
continue;
- index = roots[i]->root_key.objectid;
- err = btrfs_orphan_cleanup(roots[i]);
+ root_objectid = gang[i]->root_key.objectid;
+ err = btrfs_orphan_cleanup(gang[i]);
if (err)
- goto out;
- btrfs_put_root(roots[i]);
+ break;
+ btrfs_put_root(gang[i]);
}
- index++;
+ root_objectid++;
}
-out:
- /* Release the roots that remain uncleaned due to error */
- for (; i < grabbed; i++) {
- if (roots[i])
- btrfs_put_root(roots[i]);
+ /* release the uncleaned roots due to error */
+ for (; i < ret; i++) {
+ if (gang[i])
+ btrfs_put_root(gang[i]);
}
return err;
}
int ret;
set_bit(BTRFS_FS_CLOSING_START, &fs_info->flags);
+
+ /*
+ * We may have the reclaim task running and relocating a data block group,
+ * in which case it may create delayed iputs. So stop it before we park
+ * the cleaner kthread otherwise we can get new delayed iputs after
+ * parking the cleaner, and that can make the async reclaim task to hang
+ * if it's waiting for delayed iputs to complete, since the cleaner is
+ * parked and can not run delayed iputs - this will make us hang when
+ * trying to stop the async reclaim task.
+ */
+ cancel_work_sync(&fs_info->reclaim_bgs_work);
/*
* We don't want the cleaner to start new transactions, add more delayed
* iputs, etc. while we're closing. We can't use kthread_stop() yet
cancel_work_sync(&fs_info->async_data_reclaim_work);
cancel_work_sync(&fs_info->preempt_reclaim_work);
- cancel_work_sync(&fs_info->reclaim_bgs_work);
-
/* Cancel or finish ongoing discard work */
btrfs_discard_cleanup(fs_info);
static void btrfs_drop_all_logs(struct btrfs_fs_info *fs_info)
{
- unsigned long index = 0;
- int grabbed = 0;
- struct btrfs_root *roots[8];
+ struct btrfs_root *gang[8];
+ u64 root_objectid = 0;
+ int ret;
+
+ spin_lock(&fs_info->fs_roots_radix_lock);
+ while ((ret = radix_tree_gang_lookup(&fs_info->fs_roots_radix,
+ (void **)gang, root_objectid,
+ ARRAY_SIZE(gang))) != 0) {
+ int i;
- spin_lock(&fs_info->fs_roots_lock);
- while ((grabbed = xa_extract(&fs_info->fs_roots, (void **)roots, index,
- ULONG_MAX, 8, XA_PRESENT))) {
- for (int i = 0; i < grabbed; i++)
- roots[i] = btrfs_grab_root(roots[i]);
- spin_unlock(&fs_info->fs_roots_lock);
+ for (i = 0; i < ret; i++)
+ gang[i] = btrfs_grab_root(gang[i]);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
- for (int i = 0; i < grabbed; i++) {
- if (!roots[i])
+ for (i = 0; i < ret; i++) {
+ if (!gang[i])
continue;
- index = roots[i]->root_key.objectid;
- btrfs_free_log(NULL, roots[i]);
- btrfs_put_root(roots[i]);
+ root_objectid = gang[i]->root_key.objectid;
+ btrfs_free_log(NULL, gang[i]);
+ btrfs_put_root(gang[i]);
}
- index++;
- spin_lock(&fs_info->fs_roots_lock);
+ root_objectid++;
+ spin_lock(&fs_info->fs_roots_radix_lock);
}
- spin_unlock(&fs_info->fs_roots_lock);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
btrfs_free_log_root_tree(NULL, fs_info);
}
block_group->start == fs_info->data_reloc_bg ||
fs_info->data_reloc_bg == 0);
- if (block_group->ro) {
+ if (block_group->ro || block_group->zoned_data_reloc_ongoing) {
ret = 1;
goto out;
}
out:
if (ret && ffe_ctl->for_treelog)
fs_info->treelog_bg = 0;
- if (ret && ffe_ctl->for_data_reloc)
+ if (ret && ffe_ctl->for_data_reloc &&
+ fs_info->data_reloc_bg == block_group->start) {
+ /*
+ * Do not allow further allocations from this block group.
+ * Compared to increasing the ->ro, setting the
+ * ->zoned_data_reloc_ongoing flag still allows nocow
+ * writers to come in. See btrfs_inc_nocow_writers().
+ *
+ * We need to disable an allocation to avoid an allocation of
+ * regular (non-relocation data) extent. With mix of relocation
+ * extents and regular extents, we can dispatch WRITE commands
+ * (for relocation extents) and ZONE APPEND commands (for
+ * regular extents) at the same time to the same zone, which
+ * easily break the write pointer.
+ */
+ block_group->zoned_data_reloc_ongoing = 1;
fs_info->data_reloc_bg = 0;
+ }
spin_unlock(&fs_info->relocation_bg_lock);
spin_unlock(&fs_info->treelog_bg_lock);
spin_unlock(&block_group->lock);
btrfs_qgroup_convert_reserved_meta(root, INT_MAX);
btrfs_qgroup_free_meta_all_pertrans(root);
- if (test_bit(BTRFS_ROOT_REGISTERED, &root->state))
+ if (test_bit(BTRFS_ROOT_IN_RADIX, &root->state))
btrfs_add_dropped_root(trans, root);
else
btrfs_put_root(root);
}
/*
- * Find extent buffer for a given bytenr.
+ * Find extent buffer for a givne bytenr.
*
* This is for end_bio_extent_readpage(), thus we can't do any unsafe locking
* in endio context.
return (struct extent_buffer *)page->private;
}
- /* For subpage case, we need to lookup extent buffer xarray */
- eb = xa_load(&fs_info->extent_buffers,
- bytenr >> fs_info->sectorsize_bits);
+ /* For subpage case, we need to lookup buffer radix tree */
+ rcu_read_lock();
+ eb = radix_tree_lookup(&fs_info->buffer_radix,
+ bytenr >> fs_info->sectorsize_bits);
+ rcu_read_unlock();
ASSERT(eb);
return eb;
}
struct extent_buffer *eb;
rcu_read_lock();
- eb = xa_load(&fs_info->extent_buffers,
- start >> fs_info->sectorsize_bits);
+ eb = radix_tree_lookup(&fs_info->buffer_radix,
+ start >> fs_info->sectorsize_bits);
if (eb && atomic_inc_not_zero(&eb->refs)) {
rcu_read_unlock();
return eb;
*/
btrfs_zoned_data_reloc_lock(BTRFS_I(inode));
ret = extent_write_cache_pages(mapping, wbc, &epd);
- btrfs_zoned_data_reloc_unlock(BTRFS_I(inode));
ASSERT(ret <= 0);
if (ret < 0) {
+ btrfs_zoned_data_reloc_unlock(BTRFS_I(inode));
end_write_bio(&epd, ret);
return ret;
}
flush_write_bio(&epd);
+ btrfs_zoned_data_reloc_unlock(BTRFS_I(inode));
return ret;
}
if (!eb)
return ERR_PTR(-ENOMEM);
eb->fs_info = fs_info;
-
- do {
- ret = xa_insert(&fs_info->extent_buffers,
- start >> fs_info->sectorsize_bits,
- eb, GFP_NOFS);
- if (ret == -ENOMEM) {
- exists = ERR_PTR(ret);
+again:
+ ret = radix_tree_preload(GFP_NOFS);
+ if (ret) {
+ exists = ERR_PTR(ret);
+ goto free_eb;
+ }
+ spin_lock(&fs_info->buffer_lock);
+ ret = radix_tree_insert(&fs_info->buffer_radix,
+ start >> fs_info->sectorsize_bits, eb);
+ spin_unlock(&fs_info->buffer_lock);
+ radix_tree_preload_end();
+ if (ret == -EEXIST) {
+ exists = find_extent_buffer(fs_info, start);
+ if (exists)
goto free_eb;
- }
- if (ret == -EBUSY) {
- exists = find_extent_buffer(fs_info, start);
- if (exists)
- goto free_eb;
- }
- } while (ret);
-
+ else
+ goto again;
+ }
check_buffer_tree_ref(eb);
set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
}
if (uptodate)
set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
-
- do {
- ret = xa_insert(&fs_info->extent_buffers,
- start >> fs_info->sectorsize_bits,
- eb, GFP_NOFS);
- if (ret == -ENOMEM) {
- exists = ERR_PTR(ret);
+again:
+ ret = radix_tree_preload(GFP_NOFS);
+ if (ret) {
+ exists = ERR_PTR(ret);
+ goto free_eb;
+ }
+
+ spin_lock(&fs_info->buffer_lock);
+ ret = radix_tree_insert(&fs_info->buffer_radix,
+ start >> fs_info->sectorsize_bits, eb);
+ spin_unlock(&fs_info->buffer_lock);
+ radix_tree_preload_end();
+ if (ret == -EEXIST) {
+ exists = find_extent_buffer(fs_info, start);
+ if (exists)
goto free_eb;
- }
- if (ret == -EBUSY) {
- exists = find_extent_buffer(fs_info, start);
- if (exists)
- goto free_eb;
- }
- } while (ret);
-
+ else
+ goto again;
+ }
/* add one reference for the tree */
check_buffer_tree_ref(eb);
set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
spin_unlock(&eb->refs_lock);
- xa_erase(&fs_info->extent_buffers,
- eb->start >> fs_info->sectorsize_bits);
+ spin_lock(&fs_info->buffer_lock);
+ radix_tree_delete(&fs_info->buffer_radix,
+ eb->start >> fs_info->sectorsize_bits);
+ spin_unlock(&fs_info->buffer_lock);
} else {
spin_unlock(&eb->refs_lock);
}
}
}
+#define GANG_LOOKUP_SIZE 16
static struct extent_buffer *get_next_extent_buffer(
struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr)
{
- struct extent_buffer *eb;
- unsigned long index;
+ struct extent_buffer *gang[GANG_LOOKUP_SIZE];
+ struct extent_buffer *found = NULL;
u64 page_start = page_offset(page);
+ u64 cur = page_start;
ASSERT(in_range(bytenr, page_start, PAGE_SIZE));
lockdep_assert_held(&fs_info->buffer_lock);
- xa_for_each_start(&fs_info->extent_buffers, index, eb,
- page_start >> fs_info->sectorsize_bits) {
- if (in_range(eb->start, page_start, PAGE_SIZE))
- return eb;
- else if (eb->start >= page_start + PAGE_SIZE)
- /* Already beyond page end */
- return NULL;
+ while (cur < page_start + PAGE_SIZE) {
+ int ret;
+ int i;
+
+ ret = radix_tree_gang_lookup(&fs_info->buffer_radix,
+ (void **)gang, cur >> fs_info->sectorsize_bits,
+ min_t(unsigned int, GANG_LOOKUP_SIZE,
+ PAGE_SIZE / fs_info->nodesize));
+ if (ret == 0)
+ goto out;
+ for (i = 0; i < ret; i++) {
+ /* Already beyond page end */
+ if (gang[i]->start >= page_start + PAGE_SIZE)
+ goto out;
+ /* Found one */
+ if (gang[i]->start >= bytenr) {
+ found = gang[i];
+ goto out;
+ }
+ }
+ cur = gang[ret - 1]->start + gang[ret - 1]->len;
}
- return NULL;
+out:
+ return found;
}
static int try_release_subpage_extent_buffer(struct page *page)
*/
btrfs_inode_unlock(inode, BTRFS_ILOCK_MMAP);
- if (ret != BTRFS_NO_LOG_SYNC) {
+ if (ret == BTRFS_NO_LOG_SYNC) {
+ ret = btrfs_end_transaction(trans);
+ goto out;
+ }
+
+ /* We successfully logged the inode, attempt to sync the log. */
+ if (!ret) {
+ ret = btrfs_sync_log(trans, root, &ctx);
if (!ret) {
- ret = btrfs_sync_log(trans, root, &ctx);
- if (!ret) {
- ret = btrfs_end_transaction(trans);
- goto out;
- }
- }
- if (!full_sync) {
- ret = btrfs_wait_ordered_range(inode, start, len);
- if (ret) {
- btrfs_end_transaction(trans);
- goto out;
- }
+ ret = btrfs_end_transaction(trans);
+ goto out;
}
- ret = btrfs_commit_transaction(trans);
- } else {
+ }
+
+ /*
+ * At this point we need to commit the transaction because we had
+ * btrfs_need_log_full_commit() or some other error.
+ *
+ * If we didn't do a full sync we have to stop the trans handle, wait on
+ * the ordered extents, start it again and commit the transaction. If
+ * we attempt to wait on the ordered extents here we could deadlock with
+ * something like fallocate() that is holding the extent lock trying to
+ * start a transaction while some other thread is trying to commit the
+ * transaction while we (fsync) are currently holding the transaction
+ * open.
+ */
+ if (!full_sync) {
ret = btrfs_end_transaction(trans);
+ if (ret)
+ goto out;
+ ret = btrfs_wait_ordered_range(inode, start, len);
+ if (ret)
+ goto out;
+
+ /*
+ * This is safe to use here because we're only interested in
+ * making sure the transaction that had the ordered extents is
+ * committed. We aren't waiting on anything past this point,
+ * we're purely getting the transaction and committing it.
+ */
+ trans = btrfs_attach_transaction_barrier(root);
+ if (IS_ERR(trans)) {
+ ret = PTR_ERR(trans);
+
+ /*
+ * We committed the transaction and there's no currently
+ * running transaction, this means everything we care
+ * about made it to disk and we are done.
+ */
+ if (ret == -ENOENT)
+ ret = 0;
+ goto out;
+ }
}
+
+ ret = btrfs_commit_transaction(trans);
out:
ASSERT(list_empty(&ctx.list));
err = file_check_and_advance_wb_err(file);
ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv, rsv,
min_size, false);
- BUG_ON(ret);
+ if (WARN_ON(ret))
+ goto out_trans;
trans->block_rsv = rsv;
cur_offset = start;
extent_info->file_offset += replace_len;
}
+ /*
+ * We are releasing our handle on the transaction, balance the
+ * dirty pages of the btree inode and flush delayed items, and
+ * then get a new transaction handle, which may now point to a
+ * new transaction in case someone else may have committed the
+ * transaction we used to replace/drop file extent items. So
+ * bump the inode's iversion and update mtime and ctime except
+ * if we are called from a dedupe context. This is because a
+ * power failure/crash may happen after the transaction is
+ * committed and before we finish replacing/dropping all the
+ * file extent items we need.
+ */
+ inode_inc_iversion(&inode->vfs_inode);
+
+ if (!extent_info || extent_info->update_times) {
+ inode->vfs_inode.i_mtime = current_time(&inode->vfs_inode);
+ inode->vfs_inode.i_ctime = inode->vfs_inode.i_mtime;
+ }
+
ret = btrfs_update_inode(trans, root, inode);
if (ret)
break;
ret = btrfs_block_rsv_migrate(&fs_info->trans_block_rsv,
rsv, min_size, false);
- BUG_ON(ret); /* shouldn't happen */
+ if (WARN_ON(ret))
+ break;
trans->block_rsv = rsv;
cur_offset = drop_args.drop_end;
ordered_extent->file_offset,
ordered_extent->file_offset +
logical_len);
+ btrfs_zoned_release_data_reloc_bg(fs_info, ordered_extent->disk_bytenr,
+ ordered_extent->disk_num_bytes);
} else {
BUG_ON(root == fs_info->tree_root);
ret = insert_ordered_extent_file_extent(trans, ordered_extent);
u64 last_objectid = 0;
int ret = 0, nr_unlink = 0;
- /* Bail out if the cleanup is already running. */
if (test_and_set_bit(BTRFS_ROOT_ORPHAN_CLEANUP, &root->state))
return 0;
*
* btrfs_find_orphan_roots() ran before us, which has
* found all deleted roots and loaded them into
- * fs_info->fs_roots. So here we can find if an
+ * fs_info->fs_roots_radix. So here we can find if an
* orphan item corresponds to a deleted root by looking
- * up the root from that xarray.
+ * up the root from that radix tree.
*/
- spin_lock(&fs_info->fs_roots_lock);
- dead_root = xa_load(&fs_info->fs_roots,
- (unsigned long)found_key.objectid);
+ spin_lock(&fs_info->fs_roots_radix_lock);
+ dead_root = radix_tree_lookup(&fs_info->fs_roots_radix,
+ (unsigned long)found_key.objectid);
if (dead_root && btrfs_root_refs(&dead_root->root_item) == 0)
is_dead_root = 1;
- spin_unlock(&fs_info->fs_roots_lock);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
if (is_dead_root) {
/* prevent this orphan from being found again */
* cache.
*
* This is required for both inode re-read from disk and delayed inode
- * in the delayed_nodes xarray.
+ * in delayed_nodes_tree.
*/
if (BTRFS_I(inode)->last_trans == fs_info->generation)
set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
em->block_start == EXTENT_MAP_INLINE) {
free_extent_map(em);
- ret = -ENOTBLK;
+ /*
+ * If we are in a NOWAIT context, return -EAGAIN in order to
+ * fallback to buffered IO. This is not only because we can
+ * block with buffered IO (no support for NOWAIT semantics at
+ * the moment) but also to avoid returning short reads to user
+ * space - this happens if we were able to read some data from
+ * previous non-compressed extents and then when we fallback to
+ * buffered IO, at btrfs_file_read_iter() by calling
+ * filemap_read(), we fail to fault in pages for the read buffer,
+ * in which case filemap_read() returns a short read (the number
+ * of bytes previously read is > 0, so it does not return -EFAULT).
+ */
+ ret = (flags & IOMAP_NOWAIT) ? -EAGAIN : -ENOTBLK;
goto unlock_err;
}
extent_info.file_offset = file_offset;
extent_info.extent_buf = (char *)&stack_fi;
extent_info.is_new_extent = true;
+ extent_info.update_times = true;
extent_info.qgroup_reserved = qgroup_released;
extent_info.insertions = 0;
start_ns = ktime_get_ns();
down_read_nested(&eb->lock, nest);
- eb->lock_owner = current->pid;
trace_btrfs_tree_read_lock(eb, start_ns);
}
int btrfs_try_tree_read_lock(struct extent_buffer *eb)
{
if (down_read_trylock(&eb->lock)) {
- eb->lock_owner = current->pid;
trace_btrfs_try_tree_read_lock(eb);
return 1;
}
void btrfs_tree_read_unlock(struct extent_buffer *eb)
{
trace_btrfs_tree_read_unlock(eb);
- eb->lock_owner = 0;
up_read(&eb->lock);
}
int ret;
const u64 len = olen_aligned;
u64 last_dest_end = destoff;
+ u64 prev_extent_end = off;
ret = -ENOMEM;
buf = kvmalloc(fs_info->nodesize, GFP_KERNEL);
key.offset = off;
while (1) {
- u64 next_key_min_offset = key.offset + 1;
struct btrfs_file_extent_item *extent;
u64 extent_gen;
int type;
* The first search might have left us at an extent item that
* ends before our target range's start, can happen if we have
* holes and NO_HOLES feature enabled.
+ *
+ * Subsequent searches may leave us on a file range we have
+ * processed before - this happens due to a race with ordered
+ * extent completion for a file range that is outside our source
+ * range, but that range was part of a file extent item that
+ * also covered a leading part of our source range.
*/
- if (key.offset + datal <= off) {
+ if (key.offset + datal <= prev_extent_end) {
path->slots[0]++;
goto process_slot;
} else if (key.offset >= off + len) {
break;
}
- next_key_min_offset = key.offset + datal;
+
+ prev_extent_end = key.offset + datal;
size = btrfs_item_size(leaf, slot);
read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, slot),
size);
clone_info.file_offset = new_key.offset;
clone_info.extent_buf = buf;
clone_info.is_new_extent = false;
+ clone_info.update_times = !no_time_update;
ret = btrfs_replace_file_extents(BTRFS_I(inode), path,
drop_start, new_key.offset + datal - 1,
&clone_info, &trans);
break;
btrfs_release_path(path);
- key.offset = next_key_min_offset;
+ key.offset = prev_extent_end;
if (fatal_signal_pending(current)) {
ret = -EINTR;
#include <linux/mount.h>
#include <linux/xattr.h>
#include <linux/posix_acl_xattr.h>
+#include <linux/radix-tree.h>
#include <linux/vmalloc.h>
#include <linux/string.h>
#include <linux/compat.h>
struct list_head new_refs;
struct list_head deleted_refs;
- struct xarray name_cache;
+ struct radix_tree_root name_cache;
struct list_head name_cache_list;
int name_cache_size;
struct name_cache_entry {
struct list_head list;
/*
- * On 32bit kernels, xarray has only 32bit indices, but we need to
- * handle 64bit inums. We use the lower 32bit of the 64bit inum to store
- * it in the tree. If more than one inum would fall into the same entry,
- * we use inum_aliases to store the additional entries. inum_aliases is
- * also used to store entries with the same inum but different generations.
+ * radix_tree has only 32bit entries but we need to handle 64bit inums.
+ * We use the lower 32bit of the 64bit inum to store it in the tree. If
+ * more then one inum would fall into the same entry, we use radix_list
+ * to store the additional entries. radix_list is also used to store
+ * entries where two entries have the same inum but different
+ * generations.
*/
- struct list_head inum_aliases;
+ struct list_head radix_list;
u64 ino;
u64 gen;
u64 parent_ino;
}
/*
- * Insert a name cache entry. On 32bit kernels the xarray index is 32bit,
+ * Insert a name cache entry. On 32bit kernels the radix tree index is 32bit,
* so we need to do some special handling in case we have clashes. This function
- * takes care of this with the help of name_cache_entry::inum_aliases.
+ * takes care of this with the help of name_cache_entry::radix_list.
* In case of error, nce is kfreed.
*/
static int name_cache_insert(struct send_ctx *sctx,
int ret = 0;
struct list_head *nce_head;
- nce_head = xa_load(&sctx->name_cache, (unsigned long)nce->ino);
+ nce_head = radix_tree_lookup(&sctx->name_cache,
+ (unsigned long)nce->ino);
if (!nce_head) {
nce_head = kmalloc(sizeof(*nce_head), GFP_KERNEL);
if (!nce_head) {
}
INIT_LIST_HEAD(nce_head);
- ret = xa_insert(&sctx->name_cache, nce->ino, nce_head, GFP_KERNEL);
+ ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);
if (ret < 0) {
kfree(nce_head);
kfree(nce);
return ret;
}
}
- list_add_tail(&nce->inum_aliases, nce_head);
+ list_add_tail(&nce->radix_list, nce_head);
list_add_tail(&nce->list, &sctx->name_cache_list);
sctx->name_cache_size++;
{
struct list_head *nce_head;
- nce_head = xa_load(&sctx->name_cache, (unsigned long)nce->ino);
+ nce_head = radix_tree_lookup(&sctx->name_cache,
+ (unsigned long)nce->ino);
if (!nce_head) {
btrfs_err(sctx->send_root->fs_info,
"name_cache_delete lookup failed ino %llu cache size %d, leaking memory",
nce->ino, sctx->name_cache_size);
}
- list_del(&nce->inum_aliases);
+ list_del(&nce->radix_list);
list_del(&nce->list);
sctx->name_cache_size--;
* We may not get to the final release of nce_head if the lookup fails
*/
if (nce_head && list_empty(nce_head)) {
- xa_erase(&sctx->name_cache, (unsigned long)nce->ino);
+ radix_tree_delete(&sctx->name_cache, (unsigned long)nce->ino);
kfree(nce_head);
}
}
struct list_head *nce_head;
struct name_cache_entry *cur;
- nce_head = xa_load(&sctx->name_cache, (unsigned long)ino);
+ nce_head = radix_tree_lookup(&sctx->name_cache, (unsigned long)ino);
if (!nce_head)
return NULL;
- list_for_each_entry(cur, nce_head, inum_aliases) {
+ list_for_each_entry(cur, nce_head, radix_list) {
if (cur->ino == ino && cur->gen == gen)
return cur;
}
INIT_LIST_HEAD(&sctx->new_refs);
INIT_LIST_HEAD(&sctx->deleted_refs);
- xa_init_flags(&sctx->name_cache, GFP_KERNEL);
+ INIT_RADIX_TREE(&sctx->name_cache, GFP_KERNEL);
INIT_LIST_HEAD(&sctx->name_cache_list);
sctx->flags = arg->flags;
compress_force = false;
no_compress++;
} else {
+ btrfs_err(info, "unrecognized compression value %s",
+ args[0].from);
ret = -EINVAL;
goto out;
}
case Opt_thread_pool:
ret = match_int(&args[0], &intarg);
if (ret) {
+ btrfs_err(info, "unrecognized thread_pool value %s",
+ args[0].from);
goto out;
} else if (intarg == 0) {
+ btrfs_err(info, "invalid value 0 for thread_pool");
ret = -EINVAL;
goto out;
}
break;
case Opt_ratio:
ret = match_int(&args[0], &intarg);
- if (ret)
+ if (ret) {
+ btrfs_err(info, "unrecognized metadata_ratio value %s",
+ args[0].from);
goto out;
+ }
info->metadata_ratio = intarg;
btrfs_info(info, "metadata ratio %u",
info->metadata_ratio);
btrfs_set_and_info(info, DISCARD_ASYNC,
"turning on async discard");
} else {
+ btrfs_err(info, "unrecognized discard mode value %s",
+ args[0].from);
ret = -EINVAL;
goto out;
}
btrfs_set_and_info(info, FREE_SPACE_TREE,
"enabling free space tree");
} else {
+ btrfs_err(info, "unrecognized space_cache value %s",
+ args[0].from);
ret = -EINVAL;
goto out;
}
break;
case Opt_check_integrity_print_mask:
ret = match_int(&args[0], &intarg);
- if (ret)
+ if (ret) {
+ btrfs_err(info,
+ "unrecognized check_integrity_print_mask value %s",
+ args[0].from);
goto out;
+ }
info->check_integrity_print_mask = intarg;
btrfs_info(info, "check_integrity_print_mask 0x%x",
info->check_integrity_print_mask);
goto out;
#endif
case Opt_fatal_errors:
- if (strcmp(args[0].from, "panic") == 0)
+ if (strcmp(args[0].from, "panic") == 0) {
btrfs_set_opt(info->mount_opt,
PANIC_ON_FATAL_ERROR);
- else if (strcmp(args[0].from, "bug") == 0)
+ } else if (strcmp(args[0].from, "bug") == 0) {
btrfs_clear_opt(info->mount_opt,
PANIC_ON_FATAL_ERROR);
- else {
+ } else {
+ btrfs_err(info, "unrecognized fatal_errors value %s",
+ args[0].from);
ret = -EINVAL;
goto out;
}
case Opt_commit_interval:
intarg = 0;
ret = match_int(&args[0], &intarg);
- if (ret)
+ if (ret) {
+ btrfs_err(info, "unrecognized commit_interval value %s",
+ args[0].from);
+ ret = -EINVAL;
goto out;
+ }
if (intarg == 0) {
btrfs_info(info,
"using default commit interval %us",
break;
case Opt_rescue:
ret = parse_rescue_options(info, args[0].from);
- if (ret < 0)
+ if (ret < 0) {
+ btrfs_err(info, "unrecognized rescue value %s",
+ args[0].from);
goto out;
+ }
break;
#ifdef CONFIG_BTRFS_DEBUG
case Opt_fragment_all:
if (ret)
goto restore;
+ /* V1 cache is not supported for subpage mount. */
+ if (fs_info->sectorsize < PAGE_SIZE && btrfs_test_opt(fs_info, SPACE_CACHE)) {
+ btrfs_warn(fs_info,
+ "v1 space cache is not supported for page size %lu with sectorsize %u",
+ PAGE_SIZE, fs_info->sectorsize);
+ ret = -EINVAL;
+ goto restore;
+ }
btrfs_remount_begin(fs_info, old_opts, *flags);
btrfs_resize_thread_pool(fs_info,
fs_info->thread_pool_size, old_thread_pool_size);
void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
{
- unsigned long index;
- struct extent_buffer *eb;
+ struct radix_tree_iter iter;
+ void **slot;
struct btrfs_device *dev, *tmp;
if (!fs_info)
test_mnt->mnt_sb->s_fs_info = NULL;
- xa_for_each(&fs_info->extent_buffers, index, eb) {
+ spin_lock(&fs_info->buffer_lock);
+ radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) {
+ struct extent_buffer *eb;
+
+ eb = radix_tree_deref_slot_protected(slot, &fs_info->buffer_lock);
+ if (!eb)
+ continue;
+ /* Shouldn't happen but that kind of thinking creates CVE's */
+ if (radix_tree_exception(eb)) {
+ if (radix_tree_deref_retry(eb))
+ slot = radix_tree_iter_retry(&iter);
+ continue;
+ }
+ slot = radix_tree_iter_resume(slot, &iter);
+ spin_unlock(&fs_info->buffer_lock);
free_extent_buffer_stale(eb);
+ spin_lock(&fs_info->buffer_lock);
}
+ spin_unlock(&fs_info->buffer_lock);
btrfs_mapping_tree_free(&fs_info->mapping_tree);
list_for_each_entry_safe(dev, tmp, &fs_info->fs_devices->devices,
if (!root)
return;
/* Will be freed by btrfs_free_fs_roots */
- if (WARN_ON(test_bit(BTRFS_ROOT_REGISTERED, &root->state)))
+ if (WARN_ON(test_bit(BTRFS_ROOT_IN_RADIX, &root->state)))
return;
btrfs_global_root_delete(root);
btrfs_put_root(root);
#include "space-info.h"
#include "zoned.h"
-#define BTRFS_ROOT_TRANS_TAG XA_MARK_0
+#define BTRFS_ROOT_TRANS_TAG 0
/*
* Transaction states and transitions
*/
smp_wmb();
- spin_lock(&fs_info->fs_roots_lock);
+ spin_lock(&fs_info->fs_roots_radix_lock);
if (root->last_trans == trans->transid && !force) {
- spin_unlock(&fs_info->fs_roots_lock);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
return 0;
}
- xa_set_mark(&fs_info->fs_roots,
- (unsigned long)root->root_key.objectid,
- BTRFS_ROOT_TRANS_TAG);
- spin_unlock(&fs_info->fs_roots_lock);
+ radix_tree_tag_set(&fs_info->fs_roots_radix,
+ (unsigned long)root->root_key.objectid,
+ BTRFS_ROOT_TRANS_TAG);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
root->last_trans = trans->transid;
/* this is pretty tricky. We don't want to
spin_unlock(&cur_trans->dropped_roots_lock);
/* Make sure we don't try to update the root at commit time */
- xa_clear_mark(&fs_info->fs_roots,
- (unsigned long)root->root_key.objectid,
- BTRFS_ROOT_TRANS_TAG);
+ spin_lock(&fs_info->fs_roots_radix_lock);
+ radix_tree_tag_clear(&fs_info->fs_roots_radix,
+ (unsigned long)root->root_key.objectid,
+ BTRFS_ROOT_TRANS_TAG);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
}
int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
static noinline int commit_fs_roots(struct btrfs_trans_handle *trans)
{
struct btrfs_fs_info *fs_info = trans->fs_info;
- struct btrfs_root *root;
- unsigned long index;
+ struct btrfs_root *gang[8];
+ int i;
+ int ret;
/*
* At this point no one can be using this transaction to modify any tree
*/
ASSERT(trans->transaction->state == TRANS_STATE_COMMIT_DOING);
- spin_lock(&fs_info->fs_roots_lock);
- xa_for_each_marked(&fs_info->fs_roots, index, root, BTRFS_ROOT_TRANS_TAG) {
- int ret;
-
- /*
- * At this point we can neither have tasks logging inodes
- * from a root nor trying to commit a log tree.
- */
- ASSERT(atomic_read(&root->log_writers) == 0);
- ASSERT(atomic_read(&root->log_commit[0]) == 0);
- ASSERT(atomic_read(&root->log_commit[1]) == 0);
-
- xa_clear_mark(&fs_info->fs_roots,
- (unsigned long)root->root_key.objectid,
- BTRFS_ROOT_TRANS_TAG);
- spin_unlock(&fs_info->fs_roots_lock);
-
- btrfs_free_log(trans, root);
- ret = btrfs_update_reloc_root(trans, root);
- if (ret)
- return ret;
-
- /* See comments in should_cow_block() */
- clear_bit(BTRFS_ROOT_FORCE_COW, &root->state);
- smp_mb__after_atomic();
+ spin_lock(&fs_info->fs_roots_radix_lock);
+ while (1) {
+ ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix,
+ (void **)gang, 0,
+ ARRAY_SIZE(gang),
+ BTRFS_ROOT_TRANS_TAG);
+ if (ret == 0)
+ break;
+ for (i = 0; i < ret; i++) {
+ struct btrfs_root *root = gang[i];
+ int ret2;
+
+ /*
+ * At this point we can neither have tasks logging inodes
+ * from a root nor trying to commit a log tree.
+ */
+ ASSERT(atomic_read(&root->log_writers) == 0);
+ ASSERT(atomic_read(&root->log_commit[0]) == 0);
+ ASSERT(atomic_read(&root->log_commit[1]) == 0);
+
+ radix_tree_tag_clear(&fs_info->fs_roots_radix,
+ (unsigned long)root->root_key.objectid,
+ BTRFS_ROOT_TRANS_TAG);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
+
+ btrfs_free_log(trans, root);
+ ret2 = btrfs_update_reloc_root(trans, root);
+ if (ret2)
+ return ret2;
+
+ /* see comments in should_cow_block() */
+ clear_bit(BTRFS_ROOT_FORCE_COW, &root->state);
+ smp_mb__after_atomic();
+
+ if (root->commit_root != root->node) {
+ list_add_tail(&root->dirty_list,
+ &trans->transaction->switch_commits);
+ btrfs_set_root_node(&root->root_item,
+ root->node);
+ }
- if (root->commit_root != root->node) {
- list_add_tail(&root->dirty_list,
- &trans->transaction->switch_commits);
- btrfs_set_root_node(&root->root_item, root->node);
+ ret2 = btrfs_update_root(trans, fs_info->tree_root,
+ &root->root_key,
+ &root->root_item);
+ if (ret2)
+ return ret2;
+ spin_lock(&fs_info->fs_roots_radix_lock);
+ btrfs_qgroup_free_meta_all_pertrans(root);
}
-
- ret = btrfs_update_root(trans, fs_info->tree_root,
- &root->root_key, &root->root_item);
- if (ret)
- return ret;
- spin_lock(&fs_info->fs_roots_lock);
- btrfs_qgroup_free_meta_all_pertrans(root);
}
- spin_unlock(&fs_info->fs_roots_lock);
+ spin_unlock(&fs_info->fs_roots_radix_lock);
return 0;
}
ret = btrfs_map_sblock(fs_info, BTRFS_MAP_GET_READ_MIRRORS, logical,
&mapped_length, &bioc);
if (ret || !bioc || mapped_length < PAGE_SIZE) {
- btrfs_put_bioc(bioc);
- return -EIO;
+ ret = -EIO;
+ goto out_put_bioc;
}
- if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK)
- return -EINVAL;
+ if (bioc->map_type & BTRFS_BLOCK_GROUP_RAID56_MASK) {
+ ret = -EINVAL;
+ goto out_put_bioc;
+ }
nofs_flag = memalloc_nofs_save();
nmirrors = (int)bioc->num_stripes;
break;
}
memalloc_nofs_restore(nofs_flag);
-
+out_put_bioc:
+ btrfs_put_bioc(bioc);
return ret;
}
{
struct btrfs_fs_info *fs_info = block_group->fs_info;
struct map_lookup *map;
- bool need_zone_finish;
int ret = 0;
int i;
}
}
- /*
- * The block group is not fully allocated, so not fully written yet. We
- * need to send ZONE_FINISH command to free up an active zone.
- */
- need_zone_finish = !btrfs_zoned_bg_is_full(block_group);
-
block_group->zone_is_active = 0;
block_group->alloc_offset = block_group->zone_capacity;
block_group->free_space_ctl->free_space = 0;
if (device->zone_info->max_active_zones == 0)
continue;
- if (need_zone_finish) {
- ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
- physical >> SECTOR_SHIFT,
- device->zone_info->zone_size >> SECTOR_SHIFT,
- GFP_NOFS);
+ ret = blkdev_zone_mgmt(device->bdev, REQ_OP_ZONE_FINISH,
+ physical >> SECTOR_SHIFT,
+ device->zone_info->zone_size >> SECTOR_SHIFT,
+ GFP_NOFS);
- if (ret)
- return ret;
- }
+ if (ret)
+ return ret;
btrfs_dev_clear_active_zone(device, physical);
}
factor = div64_u64(used * 100, total);
return factor >= fs_info->bg_reclaim_threshold;
}
+
+void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical,
+ u64 length)
+{
+ struct btrfs_block_group *block_group;
+
+ if (!btrfs_is_zoned(fs_info))
+ return;
+
+ block_group = btrfs_lookup_block_group(fs_info, logical);
+ /* It should be called on a previous data relocation block group. */
+ ASSERT(block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA));
+
+ spin_lock(&block_group->lock);
+ if (!block_group->zoned_data_reloc_ongoing)
+ goto out;
+
+ /* All relocation extents are written. */
+ if (block_group->start + block_group->alloc_offset == logical + length) {
+ /* Now, release this block group for further allocations. */
+ block_group->zoned_data_reloc_ongoing = 0;
+ }
+
+out:
+ spin_unlock(&block_group->lock);
+ btrfs_put_block_group(block_group);
+}
void btrfs_clear_data_reloc_bg(struct btrfs_block_group *bg);
void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info);
bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info);
+void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical,
+ u64 length);
#else /* CONFIG_BLK_DEV_ZONED */
static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
struct blk_zone *zone)
{
return false;
}
+
+static inline void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info,
+ u64 logical, u64 length) { }
#endif
static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
* anon_fd.
*/
xas_for_each(&xas, req, ULONG_MAX) {
- if (req->msg.opcode == CACHEFILES_OP_READ) {
+ if (req->msg.object_id == object_id &&
+ req->msg.opcode == CACHEFILES_OP_READ) {
req->error = -EIO;
complete(&req->done);
xas_store(&xas, NULL);
(CONGESTION_ON_THRESH(congestion_kb) >> 2))
static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
- struct folio *folio, void **_fsdata);
+ struct folio **foliop, void **_fsdata);
static inline struct ceph_snap_context *page_snap_context(struct page *page)
{
return 0;
}
-static void ceph_readahead_cleanup(struct address_space *mapping, void *priv)
+static void ceph_netfs_free_request(struct netfs_io_request *rreq)
{
- struct inode *inode = mapping->host;
- struct ceph_inode_info *ci = ceph_inode(inode);
- int got = (uintptr_t)priv;
+ struct ceph_inode_info *ci = ceph_inode(rreq->inode);
+ int got = (uintptr_t)rreq->netfs_priv;
if (got)
ceph_put_cap_refs(ci, got);
const struct netfs_request_ops ceph_netfs_ops = {
.init_request = ceph_init_request,
+ .free_request = ceph_netfs_free_request,
.begin_cache_operation = ceph_begin_cache_operation,
.issue_read = ceph_netfs_issue_read,
.expand_readahead = ceph_netfs_expand_readahead,
.clamp_length = ceph_netfs_clamp_length,
.check_write_begin = ceph_netfs_check_write_begin,
- .cleanup = ceph_readahead_cleanup,
};
#ifdef CONFIG_CEPH_FSCACHE
}
static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
- struct folio *folio, void **_fsdata)
+ struct folio **foliop, void **_fsdata)
{
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_snap_context *snapc;
- snapc = ceph_find_incompatible(folio_page(folio, 0));
+ snapc = ceph_find_incompatible(folio_page(*foliop, 0));
if (snapc) {
int r;
- folio_unlock(folio);
- folio_put(folio);
+ folio_unlock(*foliop);
+ folio_put(*foliop);
+ *foliop = NULL;
if (IS_ERR(snapc))
return PTR_ERR(snapc);
struct page **pagep, void **fsdata)
{
struct inode *inode = file_inode(file);
+ struct ceph_inode_info *ci = ceph_inode(inode);
struct folio *folio = NULL;
int r;
- r = netfs_write_begin(file, inode->i_mapping, pos, len, &folio, NULL);
+ r = netfs_write_begin(&ci->netfs, file, inode->i_mapping, pos, len, &folio, NULL);
if (r == 0)
folio_wait_fscache(folio);
if (r < 0) {
static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
s64 pool, struct ceph_string *pool_ns)
{
- struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->vfs_inode);
+ struct ceph_fs_client *fsc = ceph_inode_to_client(&ci->netfs.inode);
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_osd_request *rd_req = NULL, *wr_req = NULL;
struct rb_node **p, *parent;
0, false, true);
err = ceph_osdc_start_request(&fsc->client->osdc, rd_req, false);
- wr_req->r_mtime = ci->vfs_inode.i_mtime;
+ wr_req->r_mtime = ci->netfs.inode.i_mtime;
err2 = ceph_osdc_start_request(&fsc->client->osdc, wr_req, false);
if (!err)
if (!(inode->i_state & I_NEW))
return;
- WARN_ON_ONCE(ci->netfs_ctx.cache);
+ WARN_ON_ONCE(ci->netfs.cache);
- ci->netfs_ctx.cache =
+ ci->netfs.cache =
fscache_acquire_cookie(fsc->fscache, 0,
&ci->i_vino, sizeof(ci->i_vino),
&ci->i_version, sizeof(ci->i_version),
static inline struct fscache_cookie *ceph_fscache_cookie(struct ceph_inode_info *ci)
{
- return netfs_i_cookie(&ci->vfs_inode);
+ return netfs_i_cookie(&ci->netfs);
}
static inline void ceph_fscache_resize(struct inode *inode, loff_t to)
struct ceph_mount_options *opt = mdsc->fsc->mount_options;
ci->i_hold_caps_max = round_jiffies(jiffies +
opt->caps_wanted_delay_max * HZ);
- dout("__cap_set_timeouts %p %lu\n", &ci->vfs_inode,
+ dout("__cap_set_timeouts %p %lu\n", &ci->netfs.inode,
ci->i_hold_caps_max - jiffies);
}
static void __cap_delay_requeue(struct ceph_mds_client *mdsc,
struct ceph_inode_info *ci)
{
- dout("__cap_delay_requeue %p flags 0x%lx at %lu\n", &ci->vfs_inode,
+ dout("__cap_delay_requeue %p flags 0x%lx at %lu\n", &ci->netfs.inode,
ci->i_ceph_flags, ci->i_hold_caps_max);
if (!mdsc->stopping) {
spin_lock(&mdsc->cap_delay_lock);
static void __cap_delay_requeue_front(struct ceph_mds_client *mdsc,
struct ceph_inode_info *ci)
{
- dout("__cap_delay_requeue_front %p\n", &ci->vfs_inode);
+ dout("__cap_delay_requeue_front %p\n", &ci->netfs.inode);
spin_lock(&mdsc->cap_delay_lock);
ci->i_ceph_flags |= CEPH_I_FLUSH;
if (!list_empty(&ci->i_cap_delay_list))
static void __cap_delay_cancel(struct ceph_mds_client *mdsc,
struct ceph_inode_info *ci)
{
- dout("__cap_delay_cancel %p\n", &ci->vfs_inode);
+ dout("__cap_delay_cancel %p\n", &ci->netfs.inode);
if (list_empty(&ci->i_cap_delay_list))
return;
spin_lock(&mdsc->cap_delay_lock);
* Each time we receive FILE_CACHE anew, we increment
* i_rdcache_gen.
*/
- if (S_ISREG(ci->vfs_inode.i_mode) &&
+ if (S_ISREG(ci->netfs.inode.i_mode) &&
(issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
(had & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0) {
ci->i_rdcache_gen++;
if ((issued & CEPH_CAP_FILE_SHARED) != (had & CEPH_CAP_FILE_SHARED)) {
if (issued & CEPH_CAP_FILE_SHARED)
atomic_inc(&ci->i_shared_gen);
- if (S_ISDIR(ci->vfs_inode.i_mode)) {
- dout(" marking %p NOT complete\n", &ci->vfs_inode);
+ if (S_ISDIR(ci->netfs.inode.i_mode)) {
+ dout(" marking %p NOT complete\n", &ci->netfs.inode);
__ceph_dir_clear_complete(ci);
}
}
/* Wipe saved layout if we're losing DIR_CREATE caps */
- if (S_ISDIR(ci->vfs_inode.i_mode) && (had & CEPH_CAP_DIR_CREATE) &&
+ if (S_ISDIR(ci->netfs.inode.i_mode) && (had & CEPH_CAP_DIR_CREATE) &&
!(issued & CEPH_CAP_DIR_CREATE)) {
ceph_put_string(rcu_dereference_raw(ci->i_cached_layout.pool_ns));
memset(&ci->i_cached_layout, 0, sizeof(ci->i_cached_layout));
if (cap->cap_gen < gen || time_after_eq(jiffies, ttl)) {
dout("__cap_is_valid %p cap %p issued %s "
- "but STALE (gen %u vs %u)\n", &cap->ci->vfs_inode,
+ "but STALE (gen %u vs %u)\n", &cap->ci->netfs.inode,
cap, ceph_cap_string(cap->issued), cap->cap_gen, gen);
return 0;
}
if (!__cap_is_valid(cap))
continue;
dout("__ceph_caps_issued %p cap %p issued %s\n",
- &ci->vfs_inode, cap, ceph_cap_string(cap->issued));
+ &ci->netfs.inode, cap, ceph_cap_string(cap->issued));
have |= cap->issued;
if (implemented)
*implemented |= cap->implemented;
spin_lock(&s->s_cap_lock);
if (!s->s_cap_iterator) {
- dout("__touch_cap %p cap %p mds%d\n", &cap->ci->vfs_inode, cap,
+ dout("__touch_cap %p cap %p mds%d\n", &cap->ci->netfs.inode, cap,
s->s_mds);
list_move_tail(&cap->session_caps, &s->s_caps);
} else {
dout("__touch_cap %p cap %p mds%d NOP, iterating over caps\n",
- &cap->ci->vfs_inode, cap, s->s_mds);
+ &cap->ci->netfs.inode, cap, s->s_mds);
}
spin_unlock(&s->s_cap_lock);
}
if ((have & mask) == mask) {
dout("__ceph_caps_issued_mask ino 0x%llx snap issued %s"
- " (mask %s)\n", ceph_ino(&ci->vfs_inode),
+ " (mask %s)\n", ceph_ino(&ci->netfs.inode),
ceph_cap_string(have),
ceph_cap_string(mask));
return 1;
continue;
if ((cap->issued & mask) == mask) {
dout("__ceph_caps_issued_mask ino 0x%llx cap %p issued %s"
- " (mask %s)\n", ceph_ino(&ci->vfs_inode), cap,
+ " (mask %s)\n", ceph_ino(&ci->netfs.inode), cap,
ceph_cap_string(cap->issued),
ceph_cap_string(mask));
if (touch)
have |= cap->issued;
if ((have & mask) == mask) {
dout("__ceph_caps_issued_mask ino 0x%llx combo issued %s"
- " (mask %s)\n", ceph_ino(&ci->vfs_inode),
+ " (mask %s)\n", ceph_ino(&ci->netfs.inode),
ceph_cap_string(cap->issued),
ceph_cap_string(mask));
if (touch) {
int __ceph_caps_issued_mask_metric(struct ceph_inode_info *ci, int mask,
int touch)
{
- struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
+ struct ceph_fs_client *fsc = ceph_sb_to_client(ci->netfs.inode.i_sb);
int r;
r = __ceph_caps_issued_mask(ci, mask, touch);
int ceph_caps_revoking(struct ceph_inode_info *ci, int mask)
{
- struct inode *inode = &ci->vfs_inode;
+ struct inode *inode = &ci->netfs.inode;
int ret;
spin_lock(&ci->i_ceph_lock);
if (ci->i_rd_ref)
used |= CEPH_CAP_FILE_RD;
if (ci->i_rdcache_ref ||
- (S_ISREG(ci->vfs_inode.i_mode) &&
- ci->vfs_inode.i_data.nrpages))
+ (S_ISREG(ci->netfs.inode.i_mode) &&
+ ci->netfs.inode.i_data.nrpages))
used |= CEPH_CAP_FILE_CACHE;
if (ci->i_wr_ref)
used |= CEPH_CAP_FILE_WR;
const int WR_SHIFT = ffs(CEPH_FILE_MODE_WR);
const int LAZY_SHIFT = ffs(CEPH_FILE_MODE_LAZY);
struct ceph_mount_options *opt =
- ceph_inode_to_client(&ci->vfs_inode)->mount_options;
+ ceph_inode_to_client(&ci->netfs.inode)->mount_options;
unsigned long used_cutoff = jiffies - opt->caps_wanted_delay_max * HZ;
unsigned long idle_cutoff = jiffies - opt->caps_wanted_delay_min * HZ;
- if (S_ISDIR(ci->vfs_inode.i_mode)) {
+ if (S_ISDIR(ci->netfs.inode.i_mode)) {
int want = 0;
/* use used_cutoff here, to keep dir's wanted caps longer */
int __ceph_caps_wanted(struct ceph_inode_info *ci)
{
int w = __ceph_caps_file_wanted(ci) | __ceph_caps_used(ci);
- if (S_ISDIR(ci->vfs_inode.i_mode)) {
+ if (S_ISDIR(ci->netfs.inode.i_mode)) {
/* we want EXCL if holding caps of dir ops */
if (w & CEPH_CAP_ANY_DIR_OPS)
w |= CEPH_CAP_FILE_EXCL;
lockdep_assert_held(&ci->i_ceph_lock);
- dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
+ dout("__ceph_remove_cap %p from %p\n", cap, &ci->netfs.inode);
- mdsc = ceph_inode_to_client(&ci->vfs_inode)->mdsc;
+ mdsc = ceph_inode_to_client(&ci->netfs.inode)->mdsc;
/* remove from inode's cap rbtree, and clear auth cap */
rb_erase(&cap->ci_node, &ci->i_caps);
* keep i_snap_realm.
*/
if (ci->i_wr_ref == 0 && ci->i_snap_realm)
- ceph_change_snap_realm(&ci->vfs_inode, NULL);
+ ceph_change_snap_realm(&ci->netfs.inode, NULL);
__cap_delay_cancel(mdsc, ci);
}
lockdep_assert_held(&ci->i_ceph_lock);
- fsc = ceph_inode_to_client(&ci->vfs_inode);
+ fsc = ceph_inode_to_client(&ci->netfs.inode);
WARN_ON_ONCE(ci->i_auth_cap == cap &&
!list_empty(&ci->i_dirty_item) &&
!fsc->blocklisted &&
- !ceph_inode_is_shutdown(&ci->vfs_inode));
+ !ceph_inode_is_shutdown(&ci->netfs.inode));
__ceph_remove_cap(cap, queue_release);
}
int flushing, u64 flush_tid, u64 oldest_flush_tid)
{
struct ceph_inode_info *ci = cap->ci;
- struct inode *inode = &ci->vfs_inode;
+ struct inode *inode = &ci->netfs.inode;
int held, revoking;
lockdep_assert_held(&ci->i_ceph_lock);
static void __send_cap(struct cap_msg_args *arg, struct ceph_inode_info *ci)
{
struct ceph_msg *msg;
- struct inode *inode = &ci->vfs_inode;
+ struct inode *inode = &ci->netfs.inode;
msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPS, CAP_MSG_SIZE, GFP_NOFS, false);
if (!msg) {
__releases(ci->i_ceph_lock)
__acquires(ci->i_ceph_lock)
{
- struct inode *inode = &ci->vfs_inode;
+ struct inode *inode = &ci->netfs.inode;
struct ceph_mds_client *mdsc = session->s_mdsc;
struct ceph_cap_snap *capsnap;
u64 oldest_flush_tid = 0;
void ceph_flush_snaps(struct ceph_inode_info *ci,
struct ceph_mds_session **psession)
{
- struct inode *inode = &ci->vfs_inode;
+ struct inode *inode = &ci->netfs.inode;
struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc;
struct ceph_mds_session *session = NULL;
int mds;
struct ceph_cap_flush **pcf)
{
struct ceph_mds_client *mdsc =
- ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
- struct inode *inode = &ci->vfs_inode;
+ ceph_sb_to_client(ci->netfs.inode.i_sb)->mdsc;
+ struct inode *inode = &ci->netfs.inode;
int was = ci->i_dirty_caps;
int dirty = 0;
return 0;
}
- dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->vfs_inode,
+ dout("__mark_dirty_caps %p %s dirty %s -> %s\n", &ci->netfs.inode,
ceph_cap_string(mask), ceph_cap_string(was),
ceph_cap_string(was | mask));
ci->i_dirty_caps |= mask;
ci->i_snap_realm->cached_context);
}
dout(" inode %p now dirty snapc %p auth cap %p\n",
- &ci->vfs_inode, ci->i_head_snapc, ci->i_auth_cap);
+ &ci->netfs.inode, ci->i_head_snapc, ci->i_auth_cap);
BUG_ON(!list_empty(&ci->i_dirty_item));
spin_lock(&mdsc->cap_dirty_lock);
list_add(&ci->i_dirty_item, &session->s_cap_dirty);
bool __ceph_should_report_size(struct ceph_inode_info *ci)
{
- loff_t size = i_size_read(&ci->vfs_inode);
+ loff_t size = i_size_read(&ci->netfs.inode);
/* mds will adjust max size according to the reported size */
if (ci->i_flushing_caps & CEPH_CAP_FILE_WR)
return false;
void ceph_check_caps(struct ceph_inode_info *ci, int flags,
struct ceph_mds_session *session)
{
- struct inode *inode = &ci->vfs_inode;
+ struct inode *inode = &ci->netfs.inode;
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
struct ceph_cap *cap;
u64 flush_tid, oldest_flush_tid;
__releases(ci->i_ceph_lock)
__acquires(ci->i_ceph_lock)
{
- struct inode *inode = &ci->vfs_inode;
+ struct inode *inode = &ci->netfs.inode;
struct ceph_cap *cap;
struct ceph_cap_flush *cf;
int ret;
cap = ci->i_auth_cap;
if (!(cap && cap->session == session)) {
pr_err("%p auth cap %p not mds%d ???\n",
- &ci->vfs_inode, cap, session->s_mds);
+ &ci->netfs.inode, cap, session->s_mds);
spin_unlock(&ci->i_ceph_lock);
continue;
}
cap = ci->i_auth_cap;
if (!(cap && cap->session == session)) {
pr_err("%p auth cap %p not mds%d ???\n",
- &ci->vfs_inode, cap, session->s_mds);
+ &ci->netfs.inode, cap, session->s_mds);
spin_unlock(&ci->i_ceph_lock);
continue;
}
lockdep_assert_held(&ci->i_ceph_lock);
- dout("%s %p flushing %s\n", __func__, &ci->vfs_inode,
+ dout("%s %p flushing %s\n", __func__, &ci->netfs.inode,
ceph_cap_string(ci->i_flushing_caps));
if (!list_empty(&ci->i_cap_flush_list)) {
}
if (got & CEPH_CAP_FILE_BUFFER) {
if (ci->i_wb_ref == 0)
- ihold(&ci->vfs_inode);
+ ihold(&ci->netfs.inode);
ci->i_wb_ref++;
dout("%s %p wb %d -> %d (?)\n", __func__,
- &ci->vfs_inode, ci->i_wb_ref-1, ci->i_wb_ref);
+ &ci->netfs.inode, ci->i_wb_ref-1, ci->i_wb_ref);
}
}
return ret;
}
- if (S_ISREG(ci->vfs_inode.i_mode) &&
+ if (S_ISREG(ci->netfs.inode.i_mode) &&
ci->i_inline_version != CEPH_INLINE_NONE &&
(_got & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) &&
i_size_read(inode) > 0) {
static void __ceph_put_cap_refs(struct ceph_inode_info *ci, int had,
enum put_cap_refs_mode mode)
{
- struct inode *inode = &ci->vfs_inode;
+ struct inode *inode = &ci->netfs.inode;
int last = 0, put = 0, flushsnaps = 0, wake = 0;
bool check_flushsnaps = false;
void ceph_put_wrbuffer_cap_refs(struct ceph_inode_info *ci, int nr,
struct ceph_snap_context *snapc)
{
- struct inode *inode = &ci->vfs_inode;
+ struct inode *inode = &ci->netfs.inode;
struct ceph_cap_snap *capsnap = NULL, *iter;
int put = 0;
bool last = false;
session->s_mds,
&list_first_entry(&session->s_cap_flushing,
struct ceph_inode_info,
- i_flushing_item)->vfs_inode);
+ i_flushing_item)->netfs.inode);
}
}
mdsc->num_cap_flushing--;
break;
list_del_init(&ci->i_cap_delay_list);
- inode = igrab(&ci->vfs_inode);
+ inode = igrab(&ci->netfs.inode);
if (inode) {
spin_unlock(&mdsc->cap_delay_lock);
dout("check_delayed_caps on %p\n", inode);
while (!list_empty(&s->s_cap_dirty)) {
ci = list_first_entry(&s->s_cap_dirty, struct ceph_inode_info,
i_dirty_item);
- inode = &ci->vfs_inode;
+ inode = &ci->netfs.inode;
ihold(inode);
dout("flush_dirty_caps %llx.%llx\n", ceph_vinop(inode));
spin_unlock(&mdsc->cap_dirty_lock);
+ ceph_wait_on_async_create(inode);
ceph_check_caps(ci, CHECK_CAPS_FLUSH, NULL);
iput(inode);
spin_lock(&mdsc->cap_dirty_lock);
void ceph_get_fmode(struct ceph_inode_info *ci, int fmode, int count)
{
- struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->vfs_inode.i_sb);
+ struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->netfs.inode.i_sb);
int bits = (fmode << 1) | 1;
bool already_opened = false;
int i;
*/
void ceph_put_fmode(struct ceph_inode_info *ci, int fmode, int count)
{
- struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->vfs_inode.i_sb);
+ struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(ci->netfs.inode.i_sb);
int bits = (fmode << 1) | 1;
bool is_closed = true;
int i;
lockdep_assert_held(&ci->i_ceph_lock);
dout("removing cap %p, ci is %p, inode is %p\n",
- cap, ci, &ci->vfs_inode);
+ cap, ci, &ci->netfs.inode);
is_auth = (cap == ci->i_auth_cap);
__ceph_remove_cap(cap, false);
{
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_mount_options *opt =
- ceph_inode_to_client(&ci->vfs_inode)->mount_options;
+ ceph_inode_to_client(&ci->netfs.inode)->mount_options;
struct ceph_file_info *fi;
int ret;
rb_insert_color(&frag->node, &ci->i_fragtree);
dout("get_or_create_frag added %llx.%llx frag %x\n",
- ceph_vinop(&ci->vfs_inode), f);
+ ceph_vinop(&ci->netfs.inode), f);
return frag;
}
if (!ci)
return NULL;
- dout("alloc_inode %p\n", &ci->vfs_inode);
+ dout("alloc_inode %p\n", &ci->netfs.inode);
/* Set parameters for the netfs library */
- netfs_i_context_init(&ci->vfs_inode, &ceph_netfs_ops);
+ netfs_inode_init(&ci->netfs, &ceph_netfs_ops);
spin_lock_init(&ci->i_ceph_lock);
INIT_WORK(&ci->i_work, ceph_inode_work);
ci->i_work_mask = 0;
memset(&ci->i_btime, '\0', sizeof(ci->i_btime));
- return &ci->vfs_inode;
+ return &ci->netfs.inode;
}
void ceph_free_inode(struct inode *inode)
{
struct ceph_inode_info *ci = container_of(work, struct ceph_inode_info,
i_work);
- struct inode *inode = &ci->vfs_inode;
+ struct inode *inode = &ci->netfs.inode;
if (test_and_clear_bit(CEPH_I_WORK_WRITEBACK, &ci->i_work_mask)) {
dout("writeback %p\n", inode);
p = session->s_caps.next;
while (p != &session->s_caps) {
cap = list_entry(p, struct ceph_cap, session_caps);
- inode = igrab(&cap->ci->vfs_inode);
+ inode = igrab(&cap->ci->netfs.inode);
if (!inode) {
p = p->next;
continue;
int iputs;
dout("removing cap %p, ci is %p, inode is %p\n",
- cap, ci, &ci->vfs_inode);
+ cap, ci, &ci->netfs.inode);
spin_lock(&ci->i_ceph_lock);
iputs = ceph_purge_inode_cap(inode, cap, &invalidate);
spin_unlock(&ci->i_ceph_lock);
static void ceph_queue_cap_snap(struct ceph_inode_info *ci,
struct ceph_cap_snap **pcapsnap)
{
- struct inode *inode = &ci->vfs_inode;
+ struct inode *inode = &ci->netfs.inode;
struct ceph_snap_context *old_snapc, *new_snapc;
struct ceph_cap_snap *capsnap = *pcapsnap;
struct ceph_buffer *old_blob = NULL;
int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
struct ceph_cap_snap *capsnap)
{
- struct inode *inode = &ci->vfs_inode;
+ struct inode *inode = &ci->netfs.inode;
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
BUG_ON(capsnap->writing);
spin_lock(&realm->inodes_with_caps_lock);
list_for_each_entry(ci, &realm->inodes_with_caps, i_snap_realm_item) {
- struct inode *inode = igrab(&ci->vfs_inode);
+ struct inode *inode = igrab(&ci->netfs.inode);
if (!inode)
continue;
spin_unlock(&realm->inodes_with_caps_lock);
while (!list_empty(&mdsc->snap_flush_list)) {
ci = list_first_entry(&mdsc->snap_flush_list,
struct ceph_inode_info, i_snap_flush_item);
- inode = &ci->vfs_inode;
+ inode = &ci->netfs.inode;
ihold(inode);
spin_unlock(&mdsc->snap_flush_lock);
ceph_flush_snaps(ci, &session);
static void ceph_inode_init_once(void *foo)
{
struct ceph_inode_info *ci = foo;
- inode_init_once(&ci->vfs_inode);
+ inode_init_once(&ci->netfs.inode);
}
static int __init init_caches(void)
* Ceph inode.
*/
struct ceph_inode_info {
- struct {
- /* These must be contiguous */
- struct inode vfs_inode;
- struct netfs_i_context netfs_ctx; /* Netfslib context */
- };
+ struct netfs_inode netfs; /* Netfslib context and vfs inode */
struct ceph_vino i_vino; /* ceph ino + snap */
spinlock_t i_ceph_lock;
static inline struct ceph_inode_info *
ceph_inode(const struct inode *inode)
{
- return container_of(inode, struct ceph_inode_info, vfs_inode);
+ return container_of(inode, struct ceph_inode_info, netfs.inode);
}
static inline struct ceph_fs_client *
has_quota = __ceph_has_quota(ci, QUOTA_GET_ANY);
if (had_quota != has_quota)
- ceph_adjust_quota_realms_count(&ci->vfs_inode, has_quota);
+ ceph_adjust_quota_realms_count(&ci->netfs.inode, has_quota);
}
extern void ceph_handle_quota(struct ceph_mds_client *mdsc,
static ssize_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
size_t size)
{
- struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
+ struct ceph_fs_client *fsc = ceph_sb_to_client(ci->netfs.inode.i_sb);
struct ceph_osd_client *osdc = &fsc->client->osdc;
struct ceph_string *pool_ns;
s64 pool = ci->i_layout.pool_id;
pool_ns = ceph_try_get_string(ci->i_layout.pool_ns);
- dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
+ dout("ceph_vxattrcb_layout %p\n", &ci->netfs.inode);
down_read(&osdc->lock);
pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
if (pool_name) {
char *val, size_t size)
{
ssize_t ret;
- struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
+ struct ceph_fs_client *fsc = ceph_sb_to_client(ci->netfs.inode.i_sb);
struct ceph_osd_client *osdc = &fsc->client->osdc;
s64 pool = ci->i_layout.pool_id;
const char *pool_name;
static ssize_t ceph_vxattrcb_cluster_fsid(struct ceph_inode_info *ci,
char *val, size_t size)
{
- struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
+ struct ceph_fs_client *fsc = ceph_sb_to_client(ci->netfs.inode.i_sb);
return ceph_fmt_xattr(val, size, "%pU", &fsc->client->fsid);
}
static ssize_t ceph_vxattrcb_client_id(struct ceph_inode_info *ci,
char *val, size_t size)
{
- struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
+ struct ceph_fs_client *fsc = ceph_sb_to_client(ci->netfs.inode.i_sb);
return ceph_fmt_xattr(val, size, "client%lld",
ceph_client_gid(fsc->client));
}
dout("__set_xattr_val added %llx.%llx xattr %p %.*s=%.*s\n",
- ceph_vinop(&ci->vfs_inode), xattr, name_len, name, val_len, val);
+ ceph_vinop(&ci->netfs.inode), xattr, name_len, name, val_len, val);
return 0;
}
struct ceph_buffer *old_blob = NULL;
void *dest;
- dout("__build_xattrs_blob %p\n", &ci->vfs_inode);
+ dout("__build_xattrs_blob %p\n", &ci->netfs.inode);
if (ci->i_xattrs.dirty) {
int need = __get_required_blob_size(ci, 0, 0);
seq_printf(m, "\t\tIPv4: %pI4\n", &ipv4->sin_addr);
else if (iface->sockaddr.ss_family == AF_INET6)
seq_printf(m, "\t\tIPv6: %pI6\n", &ipv6->sin6_addr);
+ if (!iface->is_active)
+ seq_puts(m, "\t\t[for-cleanup]\n");
}
static int cifs_debug_files_proc_show(struct seq_file *m, void *v)
struct TCP_Server_Info *server;
struct cifs_ses *ses;
struct cifs_tcon *tcon;
+ struct cifs_server_iface *iface;
int c, i, j;
seq_puts(m,
if (ses->iface_count)
seq_printf(m, "\n\n\tServer interfaces: %zu",
ses->iface_count);
- for (j = 0; j < ses->iface_count; j++) {
- struct cifs_server_iface *iface;
-
- iface = &ses->iface_list[j];
- seq_printf(m, "\n\t%d)", j+1);
+ j = 0;
+ list_for_each_entry(iface, &ses->iface_list,
+ iface_head) {
+ seq_printf(m, "\n\t%d)", ++j);
cifs_dump_iface(m, iface);
if (is_ses_using_iface(ses, iface))
seq_puts(m, "\t\t[CONNECTED]\n");
cifs_inode->flags = 0;
spin_lock_init(&cifs_inode->writers_lock);
cifs_inode->writers = 0;
- cifs_inode->vfs_inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */
+ cifs_inode->netfs.inode.i_blkbits = 14; /* 2**14 = CIFS_MAX_MSGSIZE */
cifs_inode->server_eof = 0;
cifs_inode->uniqueid = 0;
cifs_inode->createtime = 0;
* Can not set i_flags here - they get immediately overwritten to zero
* by the VFS.
*/
- /* cifs_inode->vfs_inode.i_flags = S_NOATIME | S_NOCMTIME; */
+ /* cifs_inode->netfs.inode.i_flags = S_NOATIME | S_NOCMTIME; */
INIT_LIST_HEAD(&cifs_inode->openFileList);
INIT_LIST_HEAD(&cifs_inode->llist);
INIT_LIST_HEAD(&cifs_inode->deferred_closes);
spin_lock_init(&cifs_inode->deferred_lock);
- return &cifs_inode->vfs_inode;
+ return &cifs_inode->netfs.inode;
}
static void
};
MODULE_ALIAS_FS("cifs");
-static struct file_system_type smb3_fs_type = {
+struct file_system_type smb3_fs_type = {
.owner = THIS_MODULE,
.name = "smb3",
.init_fs_context = smb3_init_fs_context,
{
struct cifsInodeInfo *cifsi = inode;
- inode_init_once(&cifsi->vfs_inode);
+ inode_init_once(&cifsi->netfs.inode);
init_rwsem(&cifsi->lock_sem);
}
return (unsigned long) dentry->d_fsdata;
}
-extern struct file_system_type cifs_fs_type;
+extern struct file_system_type cifs_fs_type, smb3_fs_type;
extern const struct address_space_operations cifs_addr_ops;
extern const struct address_space_operations cifs_addr_ops_smallbuf;
#define SMB_DNS_RESOLVE_INTERVAL_MIN 120
#define SMB_DNS_RESOLVE_INTERVAL_DEFAULT 600
+/* smb multichannel query server interfaces interval in seconds */
+#define SMB_INTERFACE_POLL_INTERVAL 600
+
/* maximum number of PDUs in one compound */
#define MAX_COMPOUND 5
#endif
struct cifs_server_iface {
+ struct list_head iface_head;
+ struct kref refcount;
size_t speed;
unsigned int rdma_capable : 1;
unsigned int rss_capable : 1;
+ unsigned int is_active : 1; /* unset if non existent */
struct sockaddr_storage sockaddr;
};
+/* release iface when last ref is dropped */
+static inline void
+release_iface(struct kref *ref)
+{
+ struct cifs_server_iface *iface = container_of(ref,
+ struct cifs_server_iface,
+ refcount);
+ list_del_init(&iface->iface_head);
+ kfree(iface);
+}
+
+/*
+ * compare two interfaces a and b
+ * return 0 if everything matches.
+ * return 1 if a has higher link speed, or rdma capable, or rss capable
+ * return -1 otherwise.
+ */
+static inline int
+iface_cmp(struct cifs_server_iface *a, struct cifs_server_iface *b)
+{
+ int cmp_ret = 0;
+
+ WARN_ON(!a || !b);
+ if (a->speed == b->speed) {
+ if (a->rdma_capable == b->rdma_capable) {
+ if (a->rss_capable == b->rss_capable) {
+ cmp_ret = memcmp(&a->sockaddr, &b->sockaddr,
+ sizeof(a->sockaddr));
+ if (!cmp_ret)
+ return 0;
+ else if (cmp_ret > 0)
+ return 1;
+ else
+ return -1;
+ } else if (a->rss_capable > b->rss_capable)
+ return 1;
+ else
+ return -1;
+ } else if (a->rdma_capable > b->rdma_capable)
+ return 1;
+ else
+ return -1;
+ } else if (a->speed > b->speed)
+ return 1;
+ else
+ return -1;
+}
+
struct cifs_chan {
unsigned int in_reconnect : 1; /* if session setup in progress for this channel */
struct TCP_Server_Info *server;
+ struct cifs_server_iface *iface; /* interface in use */
__u8 signkey[SMB3_SIGN_KEY_SIZE];
};
*/
spinlock_t iface_lock;
/* ========= begin: protected by iface_lock ======== */
- struct cifs_server_iface *iface_list;
+ struct list_head iface_list;
size_t iface_count;
unsigned long iface_last_update; /* jiffies */
/* ========= end: protected by iface_lock ======== */
#ifdef CONFIG_CIFS_DFS_UPCALL
struct list_head ulist; /* cache update list */
#endif
+ struct delayed_work query_interfaces; /* query interfaces workqueue job */
};
/*
#define CIFS_CACHE_RW_FLG (CIFS_CACHE_READ_FLG | CIFS_CACHE_WRITE_FLG)
#define CIFS_CACHE_RHW_FLG (CIFS_CACHE_RW_FLG | CIFS_CACHE_HANDLE_FLG)
-#define CIFS_CACHE_READ(cinode) ((cinode->oplock & CIFS_CACHE_READ_FLG) || (CIFS_SB(cinode->vfs_inode.i_sb)->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE))
+#define CIFS_CACHE_READ(cinode) ((cinode->oplock & CIFS_CACHE_READ_FLG) || (CIFS_SB(cinode->netfs.inode.i_sb)->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE))
#define CIFS_CACHE_HANDLE(cinode) (cinode->oplock & CIFS_CACHE_HANDLE_FLG)
-#define CIFS_CACHE_WRITE(cinode) ((cinode->oplock & CIFS_CACHE_WRITE_FLG) || (CIFS_SB(cinode->vfs_inode.i_sb)->mnt_cifs_flags & CIFS_MOUNT_RW_CACHE))
+#define CIFS_CACHE_WRITE(cinode) ((cinode->oplock & CIFS_CACHE_WRITE_FLG) || (CIFS_SB(cinode->netfs.inode.i_sb)->mnt_cifs_flags & CIFS_MOUNT_RW_CACHE))
/*
* One of these for each file inode
*/
struct cifsInodeInfo {
- struct {
- /* These must be contiguous */
- struct inode vfs_inode; /* the VFS's inode record */
- struct netfs_i_context netfs_ctx; /* Netfslib context */
- };
+ struct netfs_inode netfs; /* Netfslib context and vfs inode */
bool can_cache_brlcks;
struct list_head llist; /* locks helb by this inode */
/*
static inline struct cifsInodeInfo *
CIFS_I(struct inode *inode)
{
- return container_of(inode, struct cifsInodeInfo, vfs_inode);
+ return container_of(inode, struct cifsInodeInfo, netfs.inode);
}
static inline struct cifs_sb_info *
bool
cifs_chan_needs_reconnect(struct cifs_ses *ses,
struct TCP_Server_Info *server);
+bool
+cifs_chan_is_iface_active(struct cifs_ses *ses,
+ struct TCP_Server_Info *server);
+int
+cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server);
+int
+SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon);
void extract_unc_hostname(const char *unc, const char **h, size_t *len);
int copy_path_name(char *dst, const char *src);
if (!server->hostname)
return -EINVAL;
+ /* if server hostname isn't populated, there's nothing to do here */
+ if (server->hostname[0] == '\0')
+ return 0;
+
len = strlen(server->hostname) + 3;
unc = kmalloc(len, GFP_KERNEL);
return rc;
}
+static void smb2_query_server_interfaces(struct work_struct *work)
+{
+ int rc;
+ struct cifs_tcon *tcon = container_of(work,
+ struct cifs_tcon,
+ query_interfaces.work);
+
+ /*
+ * query server network interfaces, in case they change
+ */
+ rc = SMB3_request_interfaces(0, tcon);
+ if (rc) {
+ cifs_dbg(FYI, "%s: failed to query server interfaces: %d\n",
+ __func__, rc);
+ }
+
+ queue_delayed_work(cifsiod_wq, &tcon->query_interfaces,
+ (SMB_INTERFACE_POLL_INTERVAL * HZ));
+}
static void cifs_resolve_server(struct work_struct *work)
{
bool mark_smb_session)
{
struct TCP_Server_Info *pserver;
- struct cifs_ses *ses;
+ struct cifs_ses *ses, *nses;
struct cifs_tcon *tcon;
/*
spin_lock(&cifs_tcp_ses_lock);
- list_for_each_entry(ses, &pserver->smb_ses_list, smb_ses_list) {
+ list_for_each_entry_safe(ses, nses, &pserver->smb_ses_list, smb_ses_list) {
+ /* check if iface is still active */
+ if (!cifs_chan_is_iface_active(ses, server)) {
+ /*
+ * HACK: drop the lock before calling
+ * cifs_chan_update_iface to avoid deadlock
+ */
+ ses->ses_count++;
+ spin_unlock(&cifs_tcp_ses_lock);
+ cifs_chan_update_iface(ses, server);
+ spin_lock(&cifs_tcp_ses_lock);
+ ses->ses_count--;
+ }
+
spin_lock(&ses->chan_lock);
if (!mark_smb_session && cifs_chan_needs_reconnect(ses, server))
goto next_session;
list_del_init(&ses->smb_ses_list);
spin_unlock(&cifs_tcp_ses_lock);
- spin_lock(&ses->chan_lock);
chan_count = ses->chan_count;
/* close any extra channels */
int i;
for (i = 1; i < chan_count; i++) {
- spin_unlock(&ses->chan_lock);
+ if (ses->chans[i].iface) {
+ kref_put(&ses->chans[i].iface->refcount, release_iface);
+ ses->chans[i].iface = NULL;
+ }
cifs_put_tcp_session(ses->chans[i].server, 0);
- spin_lock(&ses->chan_lock);
ses->chans[i].server = NULL;
}
}
- spin_unlock(&ses->chan_lock);
sesInfoFree(ses);
cifs_put_tcp_session(server, 0);
list_del_init(&tcon->tcon_list);
spin_unlock(&cifs_tcp_ses_lock);
+ /* cancel polling of interfaces */
+ cancel_delayed_work_sync(&tcon->query_interfaces);
+
if (tcon->use_witness) {
int rc;
tcon->local_lease = ctx->local_lease;
INIT_LIST_HEAD(&tcon->pending_opens);
+ /* schedule query interfaces poll */
+ INIT_DELAYED_WORK(&tcon->query_interfaces,
+ smb2_query_server_interfaces);
+ queue_delayed_work(cifsiod_wq, &tcon->query_interfaces,
+ (SMB_INTERFACE_POLL_INTERVAL * HZ));
+
spin_lock(&cifs_tcp_ses_lock);
list_add(&tcon->tcon_list, &ses->tcon_list);
spin_unlock(&cifs_tcp_ses_lock);
struct nls_table *nls_info)
{
int rc = -ENOSYS;
+ struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr;
+ struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr;
bool is_binding = false;
-
spin_lock(&cifs_tcp_ses_lock);
+ if (server->dstaddr.ss_family == AF_INET6)
+ scnprintf(ses->ip_addr, sizeof(ses->ip_addr), "%pI6", &addr6->sin6_addr);
+ else
+ scnprintf(ses->ip_addr, sizeof(ses->ip_addr), "%pI4", &addr->sin_addr);
+
if (ses->ses_status != SES_GOOD &&
ses->ses_status != SES_NEW &&
ses->ses_status != SES_NEED_RECON) {
bool fsuid_only)
{
struct cifsFileInfo *open_file = NULL;
- struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
/* only filter by fsuid on multiuser mounts */
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
return rc;
}
- cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
+ cifs_sb = CIFS_SB(cifs_inode->netfs.inode.i_sb);
/* only filter by fsuid on multiuser mounts */
if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
/* This inode is open for write at least once */
struct cifs_sb_info *cifs_sb;
- cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
+ cifs_sb = CIFS_SB(cifsInode->netfs.inode.i_sb);
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
/* since no page cache to corrupt on directio
we can change size safely */
return true;
}
- if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
+ if (i_size_read(&cifsInode->netfs.inode) < end_of_file)
return true;
return false;
struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
- cifs_fscache_fill_coherency(&cifsi->vfs_inode, &cd);
+ cifs_fscache_fill_coherency(&cifsi->netfs.inode, &cd);
- cifsi->netfs_ctx.cache =
+ cifsi->netfs.cache =
fscache_acquire_cookie(tcon->fscache, 0,
&cifsi->uniqueid, sizeof(cifsi->uniqueid),
&cd, sizeof(cd),
- i_size_read(&cifsi->vfs_inode));
+ i_size_read(&cifsi->netfs.inode));
}
void cifs_fscache_unuse_inode_cookie(struct inode *inode, bool update)
if (cookie) {
cifs_dbg(FYI, "%s: (0x%p)\n", __func__, cookie);
fscache_relinquish_cookie(cookie, false);
- cifsi->netfs_ctx.cache = NULL;
+ cifsi->netfs.cache = NULL;
}
}
struct cifsInodeInfo *cifsi = CIFS_I(inode);
memset(cd, 0, sizeof(*cd));
- cd->last_write_time_sec = cpu_to_le64(cifsi->vfs_inode.i_mtime.tv_sec);
- cd->last_write_time_nsec = cpu_to_le32(cifsi->vfs_inode.i_mtime.tv_nsec);
- cd->last_change_time_sec = cpu_to_le64(cifsi->vfs_inode.i_ctime.tv_sec);
- cd->last_change_time_nsec = cpu_to_le32(cifsi->vfs_inode.i_ctime.tv_nsec);
+ cd->last_write_time_sec = cpu_to_le64(cifsi->netfs.inode.i_mtime.tv_sec);
+ cd->last_write_time_nsec = cpu_to_le32(cifsi->netfs.inode.i_mtime.tv_nsec);
+ cd->last_change_time_sec = cpu_to_le64(cifsi->netfs.inode.i_ctime.tv_sec);
+ cd->last_change_time_nsec = cpu_to_le32(cifsi->netfs.inode.i_ctime.tv_nsec);
}
static inline struct fscache_cookie *cifs_inode_cookie(struct inode *inode)
{
- return netfs_i_cookie(inode);
+ return netfs_i_cookie(&CIFS_I(inode)->netfs);
}
static inline void cifs_invalidate_cache(struct inode *inode, unsigned int flags)
__func__, cifs_i->uniqueid);
set_bit(CIFS_INO_INVALID_MAPPING, &cifs_i->flags);
/* Invalidate fscache cookie */
- cifs_fscache_fill_coherency(&cifs_i->vfs_inode, &cd);
+ cifs_fscache_fill_coherency(&cifs_i->netfs.inode, &cd);
fscache_invalidate(cifs_inode_cookie(inode), &cd, i_size_read(inode), 0);
}
u64 len)
{
struct cifsInodeInfo *cifs_i = CIFS_I(inode);
- struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_i->vfs_inode.i_sb);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_i->netfs.inode.i_sb);
struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
struct TCP_Server_Info *server = tcon->ses->server;
struct cifsFileInfo *cfile;
INIT_LIST_HEAD(&ret_buf->tcon_list);
mutex_init(&ret_buf->session_mutex);
spin_lock_init(&ret_buf->iface_lock);
+ INIT_LIST_HEAD(&ret_buf->iface_list);
spin_lock_init(&ret_buf->chan_lock);
}
return ret_buf;
void
sesInfoFree(struct cifs_ses *buf_to_free)
{
+ struct cifs_server_iface *iface = NULL, *niface = NULL;
+
if (buf_to_free == NULL) {
cifs_dbg(FYI, "Null buffer passed to sesInfoFree\n");
return;
kfree(buf_to_free->user_name);
kfree(buf_to_free->domainName);
kfree_sensitive(buf_to_free->auth_key.response);
- kfree(buf_to_free->iface_list);
+ spin_lock(&buf_to_free->iface_lock);
+ list_for_each_entry_safe(iface, niface, &buf_to_free->iface_list,
+ iface_head)
+ kref_put(&iface->refcount, release_iface);
+ spin_unlock(&buf_to_free->iface_lock);
kfree_sensitive(buf_to_free);
}
if (oplock == OPLOCK_EXCLUSIVE) {
cinode->oplock = CIFS_CACHE_WRITE_FLG | CIFS_CACHE_READ_FLG;
cifs_dbg(FYI, "Exclusive Oplock granted on inode %p\n",
- &cinode->vfs_inode);
+ &cinode->netfs.inode);
} else if (oplock == OPLOCK_READ) {
cinode->oplock = CIFS_CACHE_READ_FLG;
cifs_dbg(FYI, "Level II Oplock granted on inode %p\n",
- &cinode->vfs_inode);
+ &cinode->netfs.inode);
} else
cinode->oplock = 0;
}
.data = data,
.sb = NULL,
};
+ struct file_system_type **fs_type = (struct file_system_type *[]) {
+ &cifs_fs_type, &smb3_fs_type, NULL,
+ };
- iterate_supers_type(&cifs_fs_type, f, &sd);
-
- if (!sd.sb)
- return ERR_PTR(-EINVAL);
- /*
- * Grab an active reference in order to prevent automounts (DFS links)
- * of expiring and then freeing up our cifs superblock pointer while
- * we're doing failover.
- */
- cifs_sb_active(sd.sb);
- return sd.sb;
+ for (; *fs_type; fs_type++) {
+ iterate_supers_type(*fs_type, f, &sd);
+ if (sd.sb) {
+ /*
+ * Grab an active reference in order to prevent automounts (DFS links)
+ * of expiring and then freeing up our cifs superblock pointer while
+ * we're doing failover.
+ */
+ cifs_sb_active(sd.sb);
+ return sd.sb;
+ }
+ }
+ return ERR_PTR(-EINVAL);
}
static void __cifs_put_super(struct super_block *sb)
spin_lock(&ses->chan_lock);
for (i = 0; i < ses->chan_count; i++) {
- if (is_server_using_iface(ses->chans[i].server, iface)) {
+ if (ses->chans[i].iface == iface) {
spin_unlock(&ses->chan_lock);
return true;
}
}
/* If we didn't find the channel, it is likely a bug */
+ if (server)
+ cifs_dbg(VFS, "unable to get chan index for server: 0x%llx",
+ server->conn_id);
WARN_ON(1);
return 0;
}
return CIFS_CHAN_NEEDS_RECONNECT(ses, chan_index);
}
+bool
+cifs_chan_is_iface_active(struct cifs_ses *ses,
+ struct TCP_Server_Info *server)
+{
+ unsigned int chan_index = cifs_ses_get_chan_index(ses, server);
+
+ return ses->chans[chan_index].iface &&
+ ses->chans[chan_index].iface->is_active;
+}
+
/* returns number of channels added */
int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
{
int old_chan_count, new_chan_count;
int left;
- int i = 0;
int rc = 0;
int tries = 0;
- struct cifs_server_iface *ifaces = NULL;
- size_t iface_count;
+ struct cifs_server_iface *iface = NULL, *niface = NULL;
spin_lock(&ses->chan_lock);
}
spin_unlock(&ses->chan_lock);
- /*
- * Make a copy of the iface list at the time and use that
- * instead so as to not hold the iface spinlock for opening
- * channels
- */
- spin_lock(&ses->iface_lock);
- iface_count = ses->iface_count;
- if (iface_count <= 0) {
- spin_unlock(&ses->iface_lock);
- cifs_dbg(VFS, "no iface list available to open channels\n");
- return 0;
- }
- ifaces = kmemdup(ses->iface_list, iface_count*sizeof(*ifaces),
- GFP_ATOMIC);
- if (!ifaces) {
- spin_unlock(&ses->iface_lock);
- return 0;
- }
- spin_unlock(&ses->iface_lock);
-
/*
* Keep connecting to same, fastest, iface for all channels as
* long as its RSS. Try next fastest one if not RSS or channel
* creation fails.
*/
+ spin_lock(&ses->iface_lock);
+ iface = list_first_entry(&ses->iface_list, struct cifs_server_iface,
+ iface_head);
+ spin_unlock(&ses->iface_lock);
+
while (left > 0) {
- struct cifs_server_iface *iface;
tries++;
if (tries > 3*ses->chan_max) {
break;
}
- iface = &ifaces[i];
- if (is_ses_using_iface(ses, iface) && !iface->rss_capable) {
- i = (i+1) % iface_count;
- continue;
+ spin_lock(&ses->iface_lock);
+ if (!ses->iface_count) {
+ spin_unlock(&ses->iface_lock);
+ break;
}
- rc = cifs_ses_add_channel(cifs_sb, ses, iface);
- if (rc) {
- cifs_dbg(FYI, "failed to open extra channel on iface#%d rc=%d\n",
- i, rc);
- i = (i+1) % iface_count;
- continue;
+ list_for_each_entry_safe_from(iface, niface, &ses->iface_list,
+ iface_head) {
+ /* skip ifaces that are unusable */
+ if (!iface->is_active ||
+ (is_ses_using_iface(ses, iface) &&
+ !iface->rss_capable)) {
+ continue;
+ }
+
+ /* take ref before unlock */
+ kref_get(&iface->refcount);
+
+ spin_unlock(&ses->iface_lock);
+ rc = cifs_ses_add_channel(cifs_sb, ses, iface);
+ spin_lock(&ses->iface_lock);
+
+ if (rc) {
+ cifs_dbg(VFS, "failed to open extra channel on iface:%pIS rc=%d\n",
+ &iface->sockaddr,
+ rc);
+ kref_put(&iface->refcount, release_iface);
+ continue;
+ }
+
+ cifs_dbg(FYI, "successfully opened new channel on iface:%pIS\n",
+ &iface->sockaddr);
+ break;
}
+ spin_unlock(&ses->iface_lock);
- cifs_dbg(FYI, "successfully opened new channel on iface#%d\n",
- i);
left--;
new_chan_count++;
}
- kfree(ifaces);
return new_chan_count - old_chan_count;
}
+/*
+ * update the iface for the channel if necessary.
+ * will return 0 when iface is updated, 1 if removed, 2 otherwise
+ * Must be called with chan_lock held.
+ */
+int
+cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server)
+{
+ unsigned int chan_index;
+ struct cifs_server_iface *iface = NULL;
+ struct cifs_server_iface *old_iface = NULL;
+ int rc = 0;
+
+ spin_lock(&ses->chan_lock);
+ chan_index = cifs_ses_get_chan_index(ses, server);
+ if (!chan_index) {
+ spin_unlock(&ses->chan_lock);
+ return 0;
+ }
+
+ if (ses->chans[chan_index].iface) {
+ old_iface = ses->chans[chan_index].iface;
+ if (old_iface->is_active) {
+ spin_unlock(&ses->chan_lock);
+ return 1;
+ }
+ }
+ spin_unlock(&ses->chan_lock);
+
+ spin_lock(&ses->iface_lock);
+ /* then look for a new one */
+ list_for_each_entry(iface, &ses->iface_list, iface_head) {
+ if (!iface->is_active ||
+ (is_ses_using_iface(ses, iface) &&
+ !iface->rss_capable)) {
+ continue;
+ }
+ kref_get(&iface->refcount);
+ }
+
+ if (!list_entry_is_head(iface, &ses->iface_list, iface_head)) {
+ rc = 1;
+ iface = NULL;
+ cifs_dbg(FYI, "unable to find a suitable iface\n");
+ }
+
+ /* now drop the ref to the current iface */
+ if (old_iface && iface) {
+ kref_put(&old_iface->refcount, release_iface);
+ cifs_dbg(FYI, "replacing iface: %pIS with %pIS\n",
+ &old_iface->sockaddr,
+ &iface->sockaddr);
+ } else if (old_iface) {
+ kref_put(&old_iface->refcount, release_iface);
+ cifs_dbg(FYI, "releasing ref to iface: %pIS\n",
+ &old_iface->sockaddr);
+ } else {
+ WARN_ON(!iface);
+ cifs_dbg(FYI, "adding new iface: %pIS\n", &iface->sockaddr);
+ }
+ spin_unlock(&ses->iface_lock);
+
+ spin_lock(&ses->chan_lock);
+ chan_index = cifs_ses_get_chan_index(ses, server);
+ ses->chans[chan_index].iface = iface;
+
+ /* No iface is found. if secondary chan, drop connection */
+ if (!iface && CIFS_SERVER_IS_CHAN(server))
+ ses->chans[chan_index].server = NULL;
+
+ spin_unlock(&ses->chan_lock);
+
+ if (!iface && CIFS_SERVER_IS_CHAN(server))
+ cifs_put_tcp_session(server, false);
+
+ return rc;
+}
+
/*
* If server is a channel of ses, return the corresponding enclosing
* cifs_chan otherwise return NULL.
/* Auth */
ctx.domainauto = ses->domainAuto;
ctx.domainname = ses->domainName;
- ctx.server_hostname = ses->server->hostname;
+
+ /* no hostname for extra channels */
+ ctx.server_hostname = "";
+
ctx.username = ses->user_name;
ctx.password = ses->password;
ctx.sectype = ses->sectype;
spin_unlock(&ses->chan_lock);
goto out;
}
+ chan->iface = iface;
ses->chan_count++;
atomic_set(&ses->chan_seq, 0);
out:
if (rc && chan->server) {
+ /*
+ * we should avoid race with these delayed works before we
+ * remove this channel
+ */
+ cancel_delayed_work_sync(&chan->server->echo);
+ cancel_delayed_work_sync(&chan->server->resolve);
+ cancel_delayed_work_sync(&chan->server->reconnect);
+
spin_lock(&ses->chan_lock);
/* we rely on all bits beyond chan_count to be clear */
cifs_chan_clear_need_reconnect(ses, chan->server);
*/
WARN_ON(ses->chan_count < 1);
spin_unlock(&ses->chan_lock);
- }
- if (rc && chan->server)
cifs_put_tcp_session(chan->server, 0);
+ }
return rc;
}
static int
parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
size_t buf_len,
- struct cifs_server_iface **iface_list,
- size_t *iface_count)
+ struct cifs_ses *ses)
{
struct network_interface_info_ioctl_rsp *p;
struct sockaddr_in *addr4;
struct sockaddr_in6 *addr6;
struct iface_info_ipv4 *p4;
struct iface_info_ipv6 *p6;
- struct cifs_server_iface *info;
+ struct cifs_server_iface *info = NULL, *iface = NULL, *niface = NULL;
+ struct cifs_server_iface tmp_iface;
ssize_t bytes_left;
size_t next = 0;
int nb_iface = 0;
- int rc = 0;
-
- *iface_list = NULL;
- *iface_count = 0;
-
- /*
- * Fist pass: count and sanity check
- */
+ int rc = 0, ret = 0;
bytes_left = buf_len;
p = buf;
- while (bytes_left >= sizeof(*p)) {
- nb_iface++;
- next = le32_to_cpu(p->Next);
- if (!next) {
- bytes_left -= sizeof(*p);
- break;
- }
- p = (struct network_interface_info_ioctl_rsp *)((u8 *)p+next);
- bytes_left -= next;
- }
-
- if (!nb_iface) {
- cifs_dbg(VFS, "%s: malformed interface info\n", __func__);
- rc = -EINVAL;
- goto out;
- }
-
- /* Azure rounds the buffer size up 8, to a 16 byte boundary */
- if ((bytes_left > 8) || p->Next)
- cifs_dbg(VFS, "%s: incomplete interface info\n", __func__);
-
+ spin_lock(&ses->iface_lock);
/*
- * Second pass: extract info to internal structure
+ * Go through iface_list and do kref_put to remove
+ * any unused ifaces. ifaces in use will be removed
+ * when the last user calls a kref_put on it
*/
-
- *iface_list = kcalloc(nb_iface, sizeof(**iface_list), GFP_KERNEL);
- if (!*iface_list) {
- rc = -ENOMEM;
- goto out;
+ list_for_each_entry_safe(iface, niface, &ses->iface_list,
+ iface_head) {
+ iface->is_active = 0;
+ kref_put(&iface->refcount, release_iface);
}
+ spin_unlock(&ses->iface_lock);
- info = *iface_list;
- bytes_left = buf_len;
- p = buf;
while (bytes_left >= sizeof(*p)) {
- info->speed = le64_to_cpu(p->LinkSpeed);
- info->rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE) ? 1 : 0;
- info->rss_capable = le32_to_cpu(p->Capability & RSS_CAPABLE) ? 1 : 0;
-
- cifs_dbg(FYI, "%s: adding iface %zu\n", __func__, *iface_count);
- cifs_dbg(FYI, "%s: speed %zu bps\n", __func__, info->speed);
- cifs_dbg(FYI, "%s: capabilities 0x%08x\n", __func__,
- le32_to_cpu(p->Capability));
+ memset(&tmp_iface, 0, sizeof(tmp_iface));
+ tmp_iface.speed = le64_to_cpu(p->LinkSpeed);
+ tmp_iface.rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE) ? 1 : 0;
+ tmp_iface.rss_capable = le32_to_cpu(p->Capability & RSS_CAPABLE) ? 1 : 0;
switch (p->Family) {
/*
* conversion explicit in case either one changes.
*/
case INTERNETWORK:
- addr4 = (struct sockaddr_in *)&info->sockaddr;
+ addr4 = (struct sockaddr_in *)&tmp_iface.sockaddr;
p4 = (struct iface_info_ipv4 *)p->Buffer;
addr4->sin_family = AF_INET;
memcpy(&addr4->sin_addr, &p4->IPv4Address, 4);
&addr4->sin_addr);
break;
case INTERNETWORKV6:
- addr6 = (struct sockaddr_in6 *)&info->sockaddr;
+ addr6 = (struct sockaddr_in6 *)&tmp_iface.sockaddr;
p6 = (struct iface_info_ipv6 *)p->Buffer;
addr6->sin6_family = AF_INET6;
memcpy(&addr6->sin6_addr, &p6->IPv6Address, 16);
goto next_iface;
}
- (*iface_count)++;
- info++;
+ /*
+ * The iface_list is assumed to be sorted by speed.
+ * Check if the new interface exists in that list.
+ * NEVER change iface. it could be in use.
+ * Add a new one instead
+ */
+ spin_lock(&ses->iface_lock);
+ iface = niface = NULL;
+ list_for_each_entry_safe(iface, niface, &ses->iface_list,
+ iface_head) {
+ ret = iface_cmp(iface, &tmp_iface);
+ if (!ret) {
+ /* just get a ref so that it doesn't get picked/freed */
+ iface->is_active = 1;
+ kref_get(&iface->refcount);
+ spin_unlock(&ses->iface_lock);
+ goto next_iface;
+ } else if (ret < 0) {
+ /* all remaining ifaces are slower */
+ kref_get(&iface->refcount);
+ break;
+ }
+ }
+ spin_unlock(&ses->iface_lock);
+
+ /* no match. insert the entry in the list */
+ info = kmalloc(sizeof(struct cifs_server_iface),
+ GFP_KERNEL);
+ if (!info) {
+ rc = -ENOMEM;
+ goto out;
+ }
+ memcpy(info, &tmp_iface, sizeof(tmp_iface));
+
+ /* add this new entry to the list */
+ kref_init(&info->refcount);
+ info->is_active = 1;
+
+ cifs_dbg(FYI, "%s: adding iface %zu\n", __func__, ses->iface_count);
+ cifs_dbg(FYI, "%s: speed %zu bps\n", __func__, info->speed);
+ cifs_dbg(FYI, "%s: capabilities 0x%08x\n", __func__,
+ le32_to_cpu(p->Capability));
+
+ spin_lock(&ses->iface_lock);
+ if (!list_entry_is_head(iface, &ses->iface_list, iface_head)) {
+ list_add_tail(&info->iface_head, &iface->iface_head);
+ kref_put(&iface->refcount, release_iface);
+ } else
+ list_add_tail(&info->iface_head, &ses->iface_list);
+ spin_unlock(&ses->iface_lock);
+
+ ses->iface_count++;
+ ses->iface_last_update = jiffies;
next_iface:
+ nb_iface++;
next = le32_to_cpu(p->Next);
- if (!next)
+ if (!next) {
+ bytes_left -= sizeof(*p);
break;
+ }
p = (struct network_interface_info_ioctl_rsp *)((u8 *)p+next);
bytes_left -= next;
}
- if (!*iface_count) {
+ if (!nb_iface) {
+ cifs_dbg(VFS, "%s: malformed interface info\n", __func__);
rc = -EINVAL;
goto out;
}
-out:
- if (rc) {
- kfree(*iface_list);
- *iface_count = 0;
- *iface_list = NULL;
- }
- return rc;
-}
+ /* Azure rounds the buffer size up 8, to a 16 byte boundary */
+ if ((bytes_left > 8) || p->Next)
+ cifs_dbg(VFS, "%s: incomplete interface info\n", __func__);
-static int compare_iface(const void *ia, const void *ib)
-{
- const struct cifs_server_iface *a = (struct cifs_server_iface *)ia;
- const struct cifs_server_iface *b = (struct cifs_server_iface *)ib;
- return a->speed == b->speed ? 0 : (a->speed > b->speed ? -1 : 1);
+ if (!ses->iface_count) {
+ rc = -EINVAL;
+ goto out;
+ }
+
+out:
+ return rc;
}
-static int
+int
SMB3_request_interfaces(const unsigned int xid, struct cifs_tcon *tcon)
{
int rc;
unsigned int ret_data_len = 0;
struct network_interface_info_ioctl_rsp *out_buf = NULL;
- struct cifs_server_iface *iface_list;
- size_t iface_count;
struct cifs_ses *ses = tcon->ses;
rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID,
goto out;
}
- rc = parse_server_interfaces(out_buf, ret_data_len,
- &iface_list, &iface_count);
+ rc = parse_server_interfaces(out_buf, ret_data_len, ses);
if (rc)
goto out;
- /* sort interfaces from fastest to slowest */
- sort(iface_list, iface_count, sizeof(*iface_list), compare_iface, NULL);
-
- spin_lock(&ses->iface_lock);
- kfree(ses->iface_list);
- ses->iface_list = iface_list;
- ses->iface_count = iface_count;
- ses->iface_last_update = jiffies;
- spin_unlock(&ses->iface_lock);
-
out:
kfree(out_buf);
return rc;
if (oplock == SMB2_OPLOCK_LEVEL_BATCH) {
cinode->oplock = CIFS_CACHE_RHW_FLG;
cifs_dbg(FYI, "Batch Oplock granted on inode %p\n",
- &cinode->vfs_inode);
+ &cinode->netfs.inode);
} else if (oplock == SMB2_OPLOCK_LEVEL_EXCLUSIVE) {
cinode->oplock = CIFS_CACHE_RW_FLG;
cifs_dbg(FYI, "Exclusive Oplock granted on inode %p\n",
- &cinode->vfs_inode);
+ &cinode->netfs.inode);
} else if (oplock == SMB2_OPLOCK_LEVEL_II) {
cinode->oplock = CIFS_CACHE_READ_FLG;
cifs_dbg(FYI, "Level II Oplock granted on inode %p\n",
- &cinode->vfs_inode);
+ &cinode->netfs.inode);
} else
cinode->oplock = 0;
}
cinode->oplock = new_oplock;
cifs_dbg(FYI, "%s Lease granted on inode %p\n", message,
- &cinode->vfs_inode);
+ &cinode->netfs.inode);
}
static void
mutex_unlock(&ses->session_mutex);
rc = -EHOSTDOWN;
goto failed;
+ } else if (rc) {
+ mutex_unlock(&ses->session_mutex);
+ goto out;
}
} else {
mutex_unlock(&ses->session_mutex);
struct TCP_Server_Info *server, unsigned int *total_len)
{
char *pneg_ctxt;
+ char *hostname = NULL;
unsigned int ctxt_len, neg_context_count;
if (*total_len > 200) {
*total_len += ctxt_len;
pneg_ctxt += ctxt_len;
- ctxt_len = build_netname_ctxt((struct smb2_netname_neg_context *)pneg_ctxt,
- server->hostname);
- *total_len += ctxt_len;
- pneg_ctxt += ctxt_len;
+ /*
+ * secondary channels don't have the hostname field populated
+ * use the hostname field in the primary channel instead
+ */
+ hostname = CIFS_SERVER_IS_CHAN(server) ?
+ server->primary_server->hostname : server->hostname;
+ if (hostname && (hostname[0] != 0)) {
+ ctxt_len = build_netname_ctxt((struct smb2_netname_neg_context *)pneg_ctxt,
+ hostname);
+ *total_len += ctxt_len;
+ pneg_ctxt += ctxt_len;
+ neg_context_count = 3;
+ } else
+ neg_context_count = 2;
build_posix_ctxt((struct smb2_posix_neg_context *)pneg_ctxt);
*total_len += sizeof(struct smb2_posix_neg_context);
pneg_ctxt += sizeof(struct smb2_posix_neg_context);
-
- neg_context_count = 4;
+ neg_context_count++;
if (server->compress_algorithm) {
build_compression_ctxt((struct smb2_compression_capabilities_context *)
data = &info;
size = sizeof(struct smb2_file_eof_info);
+ trace_smb3_set_eof(xid, persistent_fid, tcon->tid, tcon->ses->Suid, le64_to_cpu(*eof));
+
return send_set_info(xid, tcon, persistent_fid, volatile_fid,
pid, FILE_END_OF_FILE_INFORMATION, SMB2_O_INFO_FILE,
0, 1, &data, &size);
DEFINE_SMB3_RW_DONE_EVENT(zero_done);
DEFINE_SMB3_RW_DONE_EVENT(falloc_done);
+/* For logging successful set EOF (truncate) */
+DECLARE_EVENT_CLASS(smb3_eof_class,
+ TP_PROTO(unsigned int xid,
+ __u64 fid,
+ __u32 tid,
+ __u64 sesid,
+ __u64 offset),
+ TP_ARGS(xid, fid, tid, sesid, offset),
+ TP_STRUCT__entry(
+ __field(unsigned int, xid)
+ __field(__u64, fid)
+ __field(__u32, tid)
+ __field(__u64, sesid)
+ __field(__u64, offset)
+ ),
+ TP_fast_assign(
+ __entry->xid = xid;
+ __entry->fid = fid;
+ __entry->tid = tid;
+ __entry->sesid = sesid;
+ __entry->offset = offset;
+ ),
+ TP_printk("xid=%u sid=0x%llx tid=0x%x fid=0x%llx offset=0x%llx",
+ __entry->xid, __entry->sesid, __entry->tid, __entry->fid,
+ __entry->offset)
+)
+
+#define DEFINE_SMB3_EOF_EVENT(name) \
+DEFINE_EVENT(smb3_eof_class, smb3_##name, \
+ TP_PROTO(unsigned int xid, \
+ __u64 fid, \
+ __u32 tid, \
+ __u64 sesid, \
+ __u64 offset), \
+ TP_ARGS(xid, fid, tid, sesid, offset))
+
+DEFINE_SMB3_EOF_EVENT(set_eof);
+
/*
* For handle based calls other than read and write, and get/set info
*/
bprm->mm = NULL;
#ifdef CONFIG_POSIX_TIMERS
- exit_itimers(me->signal);
+ exit_itimers(me);
flush_itimer_signals();
#endif
return -ENOENT;
}
- exfat_chain_dup(&olddir, &ei->dir);
+ exfat_chain_set(&olddir, EXFAT_I(old_parent_inode)->start_clu,
+ EXFAT_B_TO_CLU_ROUND_UP(i_size_read(old_parent_inode), sbi),
+ EXFAT_I(old_parent_inode)->flags);
dentry = ei->entry;
ep = exfat_get_dentry(sb, &olddir, dentry, &old_bh);
void *page_addr = NULL;
struct page *page = NULL;
unsigned long i, npages = dir_pages(inode);
- int dir_has_error = 0;
for (i = 0; i < npages; i++) {
char *kaddr;
ext2_dirent * de;
- page = ext2_get_page(inode, i, dir_has_error, &page_addr);
+ page = ext2_get_page(inode, i, 0, &page_addr);
- if (IS_ERR(page)) {
- dir_has_error = 1;
- continue;
- }
+ if (IS_ERR(page))
+ goto not_empty;
kaddr = page_addr;
de = (ext2_dirent *)kaddr;
if (IS_ERR(raw_inode))
return -EIO;
- /* For fields not not tracking in the in-memory inode,
+ /* For fields not tracking in the in-memory inode,
* initialise them to zero for new inodes. */
if (ei->i_state & EXT2_STATE_NEW)
memset(raw_inode, 0, EXT2_SB(sb)->s_inode_size);
ext4_debug("ext4_get_block_unwritten: inode %lu, create flag %d\n",
inode->i_ino, create);
return _ext4_get_block(inode, iblock, bh_result,
- EXT4_GET_BLOCKS_IO_CREATE_EXT);
+ EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT);
}
/* Maximum number of blocks we map for direct IO at once. */
size = size >> bsbits;
start = start_off >> bsbits;
+ /*
+ * For tiny groups (smaller than 8MB) the chosen allocation
+ * alignment may be larger than group size. Make sure the
+ * alignment does not move allocation to a different group which
+ * makes mballoc fail assertions later.
+ */
+ start = max(start, rounddown(ac->ac_o_ex.fe_logical,
+ (ext4_lblk_t)EXT4_BLOCKS_PER_GROUP(ac->ac_sb)));
+
/* don't cover already allocated blocks in selected range */
if (ar->pleft && start <= ar->lleft) {
size -= ar->lleft + 1 - start;
}
rcu_read_unlock();
- if (start + size <= ac->ac_o_ex.fe_logical &&
+ /*
+ * In this function "start" and "size" are normalized for better
+ * alignment and length such that we could preallocate more blocks.
+ * This normalization is done such that original request of
+ * ac->ac_o_ex.fe_logical & fe_len should always lie within "start" and
+ * "size" boundaries.
+ * (Note fe_len can be relaxed since FS block allocation API does not
+ * provide gurantee on number of contiguous blocks allocation since that
+ * depends upon free space left, etc).
+ * In case of inode pa, later we use the allocated blocks
+ * [pa_start + fe_logical - pa_lstart, fe_len/size] from the preallocated
+ * range of goal/best blocks [start, size] to put it at the
+ * ac_o_ex.fe_logical extent of this inode.
+ * (See ext4_mb_use_inode_pa() for more details)
+ */
+ if (start + size <= ac->ac_o_ex.fe_logical ||
start > ac->ac_o_ex.fe_logical) {
ext4_msg(ac->ac_sb, KERN_ERR,
"start %lu, size %lu, fe_logical %lu",
/*
* Worst case we can touch the allocation bitmaps and a block
- * group descriptor block. We do need need to worry about
+ * group descriptor block. We do need to worry about
* credits for modifying the quota inode.
*/
handle = ext4_journal_start(inode, EXT4_HT_MIGRATE,
struct dx_hash_info *hinfo)
{
unsigned blocksize = dir->i_sb->s_blocksize;
- unsigned count, continued;
+ unsigned continued;
+ int count;
struct buffer_head *bh2;
ext4_lblk_t newblock;
u32 hash2;
/*
* In the first loop we prepare and mark buffers to submit. We have to
* mark all buffers in the page before submitting so that
- * end_page_writeback() cannot be called from ext4_bio_end_io() when IO
+ * end_page_writeback() cannot be called from ext4_end_bio() when IO
* on the first buffer finishes and we are still working on submitting
* the second buffer.
*/
if (!capable(CAP_SYS_RESOURCE))
return -EPERM;
+ /*
+ * If the reserved GDT blocks is non-zero, the resize_inode feature
+ * should always be set.
+ */
+ if (EXT4_SB(sb)->s_es->s_reserved_gdt_blocks &&
+ !ext4_has_feature_resize_inode(sb)) {
+ ext4_error(sb, "resize_inode disabled but reserved GDT blocks non-zero");
+ return -EFSCORRUPTED;
+ }
+
/*
* If we are not using the primary superblock/GDT copy don't resize,
* because the user tools have no way of handling this. Probably a
static int ext4_validate_options(struct fs_context *fc);
static int ext4_check_opt_consistency(struct fs_context *fc,
struct super_block *sb);
-static int ext4_apply_options(struct fs_context *fc, struct super_block *sb);
+static void ext4_apply_options(struct fs_context *fc, struct super_block *sb);
static int ext4_parse_param(struct fs_context *fc, struct fs_parameter *param);
static int ext4_get_tree(struct fs_context *fc);
static int ext4_reconfigure(struct fs_context *fc);
}
#endif
-static int ext4_set_test_dummy_encryption(struct super_block *sb, char *arg)
-{
-#ifdef CONFIG_FS_ENCRYPTION
- struct ext4_sb_info *sbi = EXT4_SB(sb);
- int err;
-
- err = fscrypt_set_test_dummy_encryption(sb, arg,
- &sbi->s_dummy_enc_policy);
- if (err) {
- ext4_msg(sb, KERN_WARNING,
- "Error while setting test dummy encryption [%d]", err);
- return err;
- }
- ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled");
-#endif
- return 0;
-}
-
#define EXT4_SPEC_JQUOTA (1 << 0)
#define EXT4_SPEC_JQFMT (1 << 1)
#define EXT4_SPEC_DATAJ (1 << 2)
#define EXT4_SPEC_SB_BLOCK (1 << 3)
#define EXT4_SPEC_JOURNAL_DEV (1 << 4)
#define EXT4_SPEC_JOURNAL_IOPRIO (1 << 5)
-#define EXT4_SPEC_DUMMY_ENCRYPTION (1 << 6)
#define EXT4_SPEC_s_want_extra_isize (1 << 7)
#define EXT4_SPEC_s_max_batch_time (1 << 8)
#define EXT4_SPEC_s_min_batch_time (1 << 9)
struct ext4_fs_context {
char *s_qf_names[EXT4_MAXQUOTAS];
- char *test_dummy_enc_arg;
+ struct fscrypt_dummy_policy dummy_enc_policy;
int s_jquota_fmt; /* Format of quota to use */
#ifdef CONFIG_EXT4_DEBUG
int s_fc_debug_max_replay;
for (i = 0; i < EXT4_MAXQUOTAS; i++)
kfree(ctx->s_qf_names[i]);
- kfree(ctx->test_dummy_enc_arg);
+ fscrypt_free_dummy_policy(&ctx->dummy_enc_policy);
kfree(ctx);
}
}
#endif
+static int ext4_parse_test_dummy_encryption(const struct fs_parameter *param,
+ struct ext4_fs_context *ctx)
+{
+ int err;
+
+ if (!IS_ENABLED(CONFIG_FS_ENCRYPTION)) {
+ ext4_msg(NULL, KERN_WARNING,
+ "test_dummy_encryption option not supported");
+ return -EINVAL;
+ }
+ err = fscrypt_parse_test_dummy_encryption(param,
+ &ctx->dummy_enc_policy);
+ if (err == -EINVAL) {
+ ext4_msg(NULL, KERN_WARNING,
+ "Value of option \"%s\" is unrecognized", param->key);
+ } else if (err == -EEXIST) {
+ ext4_msg(NULL, KERN_WARNING,
+ "Conflicting test_dummy_encryption options");
+ return -EINVAL;
+ }
+ return err;
+}
+
#define EXT4_SET_CTX(name) \
static inline void ctx_set_##name(struct ext4_fs_context *ctx, \
unsigned long flag) \
ctx->spec |= EXT4_SPEC_JOURNAL_IOPRIO;
return 0;
case Opt_test_dummy_encryption:
-#ifdef CONFIG_FS_ENCRYPTION
- if (param->type == fs_value_is_flag) {
- ctx->spec |= EXT4_SPEC_DUMMY_ENCRYPTION;
- ctx->test_dummy_enc_arg = NULL;
- return 0;
- }
- if (*param->string &&
- !(!strcmp(param->string, "v1") ||
- !strcmp(param->string, "v2"))) {
- ext4_msg(NULL, KERN_WARNING,
- "Value of option \"%s\" is unrecognized",
- param->key);
- return -EINVAL;
- }
- ctx->spec |= EXT4_SPEC_DUMMY_ENCRYPTION;
- ctx->test_dummy_enc_arg = kmemdup_nul(param->string, param->size,
- GFP_KERNEL);
- return 0;
-#else
- ext4_msg(NULL, KERN_WARNING,
- "test_dummy_encryption option not supported");
- return -EINVAL;
-#endif
+ return ext4_parse_test_dummy_encryption(param, ctx);
case Opt_dax:
case Opt_dax_type:
#ifdef CONFIG_FS_DAX
if (s_ctx->spec & EXT4_SPEC_JOURNAL_IOPRIO)
m_ctx->journal_ioprio = s_ctx->journal_ioprio;
- ret = ext4_apply_options(fc, sb);
+ ext4_apply_options(fc, sb);
+ ret = 0;
out_free:
if (fc) {
static int ext4_check_test_dummy_encryption(const struct fs_context *fc,
struct super_block *sb)
{
-#ifdef CONFIG_FS_ENCRYPTION
const struct ext4_fs_context *ctx = fc->fs_private;
const struct ext4_sb_info *sbi = EXT4_SB(sb);
+ int err;
- if (!(ctx->spec & EXT4_SPEC_DUMMY_ENCRYPTION))
+ if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy))
return 0;
if (!ext4_has_feature_encrypt(sb)) {
* needed to allow it to be set or changed during remount. We do allow
* it to be specified during remount, but only if there is no change.
*/
- if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE &&
- !sbi->s_dummy_enc_policy.policy) {
+ if (fc->purpose == FS_CONTEXT_FOR_RECONFIGURE) {
+ if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy,
+ &ctx->dummy_enc_policy))
+ return 0;
ext4_msg(NULL, KERN_WARNING,
- "Can't set test_dummy_encryption on remount");
+ "Can't set or change test_dummy_encryption on remount");
return -EINVAL;
}
-#endif /* CONFIG_FS_ENCRYPTION */
- return 0;
+ /* Also make sure s_mount_opts didn't contain a conflicting value. */
+ if (fscrypt_is_dummy_policy_set(&sbi->s_dummy_enc_policy)) {
+ if (fscrypt_dummy_policies_equal(&sbi->s_dummy_enc_policy,
+ &ctx->dummy_enc_policy))
+ return 0;
+ ext4_msg(NULL, KERN_WARNING,
+ "Conflicting test_dummy_encryption options");
+ return -EINVAL;
+ }
+ /*
+ * fscrypt_add_test_dummy_key() technically changes the super_block, so
+ * technically it should be delayed until ext4_apply_options() like the
+ * other changes. But since we never get here for remounts (see above),
+ * and this is the last chance to report errors, we do it here.
+ */
+ err = fscrypt_add_test_dummy_key(sb, &ctx->dummy_enc_policy);
+ if (err)
+ ext4_msg(NULL, KERN_WARNING,
+ "Error adding test dummy encryption key [%d]", err);
+ return err;
+}
+
+static void ext4_apply_test_dummy_encryption(struct ext4_fs_context *ctx,
+ struct super_block *sb)
+{
+ if (!fscrypt_is_dummy_policy_set(&ctx->dummy_enc_policy) ||
+ /* if already set, it was already verified to be the same */
+ fscrypt_is_dummy_policy_set(&EXT4_SB(sb)->s_dummy_enc_policy))
+ return;
+ EXT4_SB(sb)->s_dummy_enc_policy = ctx->dummy_enc_policy;
+ memset(&ctx->dummy_enc_policy, 0, sizeof(ctx->dummy_enc_policy));
+ ext4_msg(sb, KERN_WARNING, "Test dummy encryption mode enabled");
}
static int ext4_check_opt_consistency(struct fs_context *fc,
return ext4_check_quota_consistency(fc, sb);
}
-static int ext4_apply_options(struct fs_context *fc, struct super_block *sb)
+static void ext4_apply_options(struct fs_context *fc, struct super_block *sb)
{
struct ext4_fs_context *ctx = fc->fs_private;
struct ext4_sb_info *sbi = fc->s_fs_info;
- int ret = 0;
sbi->s_mount_opt &= ~ctx->mask_s_mount_opt;
sbi->s_mount_opt |= ctx->vals_s_mount_opt;
#endif
ext4_apply_quota_options(fc, sb);
-
- if (ctx->spec & EXT4_SPEC_DUMMY_ENCRYPTION)
- ret = ext4_set_test_dummy_encryption(sb, ctx->test_dummy_enc_arg);
-
- return ret;
+ ext4_apply_test_dummy_encryption(ctx, sb);
}
if (err < 0)
goto failed_mount;
- err = ext4_apply_options(fc, sb);
- if (err < 0)
- goto failed_mount;
+ ext4_apply_options(fc, sb);
#if IS_ENABLED(CONFIG_UNICODE)
if (ext4_has_feature_casefold(sb) && !sb->s_encoding) {
err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
GFP_KERNEL);
}
- /*
- * Update the checksum after updating free space/inode
- * counters. Otherwise the superblock can have an incorrect
- * checksum in the buffer cache until it is written out and
- * e2fsprogs programs trying to open a file system immediately
- * after it is mounted can fail.
- */
- ext4_superblock_csum_set(sb);
if (!err)
err = percpu_counter_init(&sbi->s_dirs_counter,
ext4_count_dirs(sb), GFP_KERNEL);
EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
ext4_orphan_cleanup(sb, es);
EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
+ /*
+ * Update the checksum after updating free space/inode counters and
+ * ext4_orphan_cleanup. Otherwise the superblock can have an incorrect
+ * checksum in the buffer cache until it is written out and
+ * e2fsprogs programs trying to open a file system immediately
+ * after it is mounted can fail.
+ */
+ ext4_superblock_csum_set(sb);
if (needs_recovery) {
ext4_msg(sb, KERN_INFO, "recovery complete");
err = ext4_mark_recovery_complete(sb, es);
static int ext4_commit_super(struct super_block *sb)
{
struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
- int error = 0;
if (!sbh)
return -EINVAL;
ext4_update_super(sb);
+ lock_buffer(sbh);
+ /* Buffer got discarded which means block device got invalidated */
+ if (!buffer_mapped(sbh)) {
+ unlock_buffer(sbh);
+ return -EIO;
+ }
+
if (buffer_write_io_error(sbh) || !buffer_uptodate(sbh)) {
/*
* Oh, dear. A previous attempt to write the
clear_buffer_write_io_error(sbh);
set_buffer_uptodate(sbh);
}
- BUFFER_TRACE(sbh, "marking dirty");
- mark_buffer_dirty(sbh);
- error = __sync_dirty_buffer(sbh,
- REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0));
+ get_bh(sbh);
+ /* Clear potential dirty bit if it was journalled update */
+ clear_buffer_dirty(sbh);
+ sbh->b_end_io = end_buffer_write_sync;
+ submit_bh(REQ_OP_WRITE,
+ REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0), sbh);
+ wait_on_buffer(sbh);
if (buffer_write_io_error(sbh)) {
ext4_msg(sb, KERN_ERR, "I/O error while writing "
"superblock");
clear_buffer_write_io_error(sbh);
set_buffer_uptodate(sbh);
+ return -EIO;
}
- return error;
+ return 0;
}
/*
unlock_buffer(bs->bh);
ea_bdebug(bs->bh, "cloning");
- s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
+ s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
error = -ENOMEM;
if (s->base == NULL)
goto cleanup;
- memcpy(s->base, BHDR(bs->bh), bs->bh->b_size);
s->first = ENTRY(header(s->base)+1);
header(s->base)->h_refcount = cpu_to_le32(1);
s->here = ENTRY(s->base + offset);
unsigned int cnt;
struct f2fs_iostat_latency iostat_lat[MAX_IO_TYPE][NR_PAGE_TYPE];
struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
+ unsigned long flags;
- spin_lock_bh(&sbi->iostat_lat_lock);
+ spin_lock_irqsave(&sbi->iostat_lat_lock, flags);
for (idx = 0; idx < MAX_IO_TYPE; idx++) {
for (io = 0; io < NR_PAGE_TYPE; io++) {
cnt = io_lat->bio_cnt[idx][io];
io_lat->bio_cnt[idx][io] = 0;
}
}
- spin_unlock_bh(&sbi->iostat_lat_lock);
+ spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
trace_f2fs_iostat_latency(sbi, iostat_lat);
}
{
unsigned long long iostat_diff[NR_IO_TYPE];
int i;
+ unsigned long flags;
if (time_is_after_jiffies(sbi->iostat_next_period))
return;
/* Need double check under the lock */
- spin_lock_bh(&sbi->iostat_lock);
+ spin_lock_irqsave(&sbi->iostat_lock, flags);
if (time_is_after_jiffies(sbi->iostat_next_period)) {
- spin_unlock_bh(&sbi->iostat_lock);
+ spin_unlock_irqrestore(&sbi->iostat_lock, flags);
return;
}
sbi->iostat_next_period = jiffies +
sbi->prev_rw_iostat[i];
sbi->prev_rw_iostat[i] = sbi->rw_iostat[i];
}
- spin_unlock_bh(&sbi->iostat_lock);
+ spin_unlock_irqrestore(&sbi->iostat_lock, flags);
trace_f2fs_iostat(sbi, iostat_diff);
struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
int i;
- spin_lock_bh(&sbi->iostat_lock);
+ spin_lock_irq(&sbi->iostat_lock);
for (i = 0; i < NR_IO_TYPE; i++) {
sbi->rw_iostat[i] = 0;
sbi->prev_rw_iostat[i] = 0;
}
- spin_unlock_bh(&sbi->iostat_lock);
+ spin_unlock_irq(&sbi->iostat_lock);
- spin_lock_bh(&sbi->iostat_lat_lock);
+ spin_lock_irq(&sbi->iostat_lat_lock);
memset(io_lat, 0, sizeof(struct iostat_lat_info));
- spin_unlock_bh(&sbi->iostat_lat_lock);
+ spin_unlock_irq(&sbi->iostat_lat_lock);
}
void f2fs_update_iostat(struct f2fs_sb_info *sbi,
enum iostat_type type, unsigned long long io_bytes)
{
+ unsigned long flags;
+
if (!sbi->iostat_enable)
return;
- spin_lock_bh(&sbi->iostat_lock);
+ spin_lock_irqsave(&sbi->iostat_lock, flags);
sbi->rw_iostat[type] += io_bytes;
if (type == APP_BUFFERED_IO || type == APP_DIRECT_IO)
if (type == APP_BUFFERED_READ_IO || type == APP_DIRECT_READ_IO)
sbi->rw_iostat[APP_READ_IO] += io_bytes;
- spin_unlock_bh(&sbi->iostat_lock);
+ spin_unlock_irqrestore(&sbi->iostat_lock, flags);
f2fs_record_iostat(sbi);
}
struct f2fs_sb_info *sbi = iostat_ctx->sbi;
struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
int idx;
+ unsigned long flags;
if (!sbi->iostat_enable)
return;
idx = WRITE_ASYNC_IO;
}
- spin_lock_bh(&sbi->iostat_lat_lock);
+ spin_lock_irqsave(&sbi->iostat_lat_lock, flags);
io_lat->sum_lat[idx][iotype] += ts_diff;
io_lat->bio_cnt[idx][iotype]++;
if (ts_diff > io_lat->peak_lat[idx][iotype])
io_lat->peak_lat[idx][iotype] = ts_diff;
- spin_unlock_bh(&sbi->iostat_lat_lock);
+ spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
}
void iostat_update_and_unbind_ctx(struct bio *bio, int rw)
if (test_opt(sbi, INLINE_XATTR))
set_inode_flag(inode, FI_INLINE_XATTR);
- if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode))
- set_inode_flag(inode, FI_INLINE_DATA);
if (f2fs_may_inline_dentry(inode))
set_inode_flag(inode, FI_INLINE_DENTRY);
f2fs_init_extent_tree(inode, NULL);
- stat_inc_inline_xattr(inode);
- stat_inc_inline_inode(inode);
- stat_inc_inline_dir(inode);
-
F2FS_I(inode)->i_flags =
f2fs_mask_flags(mode, F2FS_I(dir)->i_flags & F2FS_FL_INHERITED);
set_compress_context(inode);
}
+ /* Should enable inline_data after compression set */
+ if (test_opt(sbi, INLINE_DATA) && f2fs_may_inline_data(inode))
+ set_inode_flag(inode, FI_INLINE_DATA);
+
+ stat_inc_inline_xattr(inode);
+ stat_inc_inline_inode(inode);
+ stat_inc_inline_dir(inode);
+
f2fs_set_inode_flags(inode);
trace_f2fs_new_inode(inode, 0);
if (!is_extension_exist(name, ext[i], false))
continue;
+ /* Do not use inline_data with compression */
+ stat_dec_inline_inode(inode);
+ clear_inode_flag(inode, FI_INLINE_DATA);
set_compress_context(inode);
return;
}
out_err:
ClearPageUptodate(page);
out_put_err:
- f2fs_handle_page_eio(sbi, page->index, NODE);
+ /* ENOENT comes from read_node_page which is not an error. */
+ if (err != -ENOENT)
+ f2fs_handle_page_eio(sbi, page->index, NODE);
f2fs_put_page(page, 1);
return ERR_PTR(err);
}
struct list_head *head)
{
assert_spin_locked(&wb->list_lock);
+ assert_spin_locked(&inode->i_lock);
list_move(&inode->i_io_list, head);
inode = wb_inode(delaying_queue->prev);
if (inode_dirtied_after(inode, dirtied_before))
break;
+ spin_lock(&inode->i_lock);
list_move(&inode->i_io_list, &tmp);
moved++;
- spin_lock(&inode->i_lock);
inode->i_state |= I_SYNC_QUEUED;
spin_unlock(&inode->i_lock);
if (sb_is_blkdev_sb(inode->i_sb))
goto out;
}
- /* Move inodes from one superblock together */
+ /*
+ * Although inode's i_io_list is moved from 'tmp' to 'dispatch_queue',
+ * we don't take inode->i_lock here because it is just a pointless overhead.
+ * Inode is already marked as I_SYNC_QUEUED so writeback list handling is
+ * fully under our control.
+ */
while (!list_empty(&tmp)) {
sb = wb_inode(tmp.prev)->i_sb;
list_for_each_prev_safe(pos, node, &tmp) {
* We'll have another go at writing back this inode
* when we completed a full scan of b_io.
*/
- spin_unlock(&inode->i_lock);
requeue_io(inode, wb);
+ spin_unlock(&inode->i_lock);
trace_writeback_sb_inodes_requeue(inode);
continue;
}
{
struct super_block *sb = inode->i_sb;
int dirtytime = 0;
+ struct bdi_writeback *wb = NULL;
trace_writeback_mark_inode_dirty(inode, flags);
inode->i_state &= ~I_DIRTY_TIME;
inode->i_state |= flags;
+ /*
+ * Grab inode's wb early because it requires dropping i_lock and we
+ * need to make sure following checks happen atomically with dirty
+ * list handling so that we don't move inodes under flush worker's
+ * hands.
+ */
+ if (!was_dirty) {
+ wb = locked_inode_to_wb_and_lock_list(inode);
+ spin_lock(&inode->i_lock);
+ }
+
/*
* If the inode is queued for writeback by flush worker, just
* update its dirty state. Once the flush worker is done with
* list, based upon its state.
*/
if (inode->i_state & I_SYNC_QUEUED)
- goto out_unlock_inode;
+ goto out_unlock;
/*
* Only add valid (hashed) inodes to the superblock's
*/
if (!S_ISBLK(inode->i_mode)) {
if (inode_unhashed(inode))
- goto out_unlock_inode;
+ goto out_unlock;
}
if (inode->i_state & I_FREEING)
- goto out_unlock_inode;
+ goto out_unlock;
/*
* If the inode was already on b_dirty/b_io/b_more_io, don't
* reposition it (that would break b_dirty time-ordering).
*/
if (!was_dirty) {
- struct bdi_writeback *wb;
struct list_head *dirty_list;
bool wakeup_bdi = false;
- wb = locked_inode_to_wb_and_lock_list(inode);
-
inode->dirtied_when = jiffies;
if (dirtytime)
inode->dirtied_time_when = jiffies;
dirty_list);
spin_unlock(&wb->list_lock);
+ spin_unlock(&inode->i_lock);
trace_writeback_dirty_inode_enqueue(inode);
/*
return;
}
}
+out_unlock:
+ if (wb)
+ spin_unlock(&wb->list_lock);
out_unlock_inode:
spin_unlock(&inode->i_lock);
}
return NULL;
}
+static inline bool fscache_cookie_is_dropped(struct fscache_cookie *cookie)
+{
+ return READ_ONCE(cookie->state) == FSCACHE_COOKIE_STATE_DROPPED;
+}
+
static void fscache_wait_on_collision(struct fscache_cookie *candidate,
struct fscache_cookie *wait_for)
{
enum fscache_cookie_state *statep = &wait_for->state;
- wait_var_event_timeout(statep, READ_ONCE(*statep) == FSCACHE_COOKIE_STATE_DROPPED,
+ wait_var_event_timeout(statep, fscache_cookie_is_dropped(wait_for),
20 * HZ);
- if (READ_ONCE(*statep) != FSCACHE_COOKIE_STATE_DROPPED) {
+ if (!fscache_cookie_is_dropped(wait_for)) {
pr_notice("Potential collision c=%08x old: c=%08x",
candidate->debug_id, wait_for->debug_id);
- wait_var_event(statep, READ_ONCE(*statep) == FSCACHE_COOKIE_STATE_DROPPED);
+ wait_var_event(statep, fscache_cookie_is_dropped(wait_for));
}
}
}
fscache_see_cookie(cookie, fscache_cookie_see_active);
- fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_ACTIVE);
+ spin_lock(&cookie->lock);
+ if (test_and_clear_bit(FSCACHE_COOKIE_DO_INVALIDATE, &cookie->flags))
+ __fscache_set_cookie_state(cookie,
+ FSCACHE_COOKIE_STATE_INVALIDATING);
+ else
+ __fscache_set_cookie_state(cookie, FSCACHE_COOKIE_STATE_ACTIVE);
+ spin_unlock(&cookie->lock);
+ wake_up_cookie_state(cookie);
trace = fscache_access_lookup_cookie_end;
out:
spin_lock(&cookie->lock);
}
+ if (test_and_clear_bit(FSCACHE_COOKIE_DO_INVALIDATE, &cookie->flags))
+ fscache_end_cookie_access(cookie, fscache_access_invalidate_cookie_end);
+
switch (state) {
case FSCACHE_COOKIE_STATE_RELINQUISHING:
fscache_see_cookie(cookie, fscache_cookie_see_relinquish);
return;
case FSCACHE_COOKIE_STATE_LOOKING_UP:
+ __fscache_begin_cookie_access(cookie, fscache_access_invalidate_cookie);
+ set_bit(FSCACHE_COOKIE_DO_INVALIDATE, &cookie->flags);
+ fallthrough;
case FSCACHE_COOKIE_STATE_CREATING:
spin_unlock(&cookie->lock);
_leave(" [look %x]", cookie->inval_counter);
{
wait_var_event_timeout(&candidate->flags,
!fscache_is_acquire_pending(candidate), 20 * HZ);
- if (!fscache_is_acquire_pending(candidate)) {
+ if (fscache_is_acquire_pending(candidate)) {
pr_notice("Potential volume collision new=%08x old=%08x",
candidate->debug_id, collidee_debug_id);
fscache_stat(&fscache_n_volumes_collision);
hlist_bl_add_head(&candidate->hash_link, h);
hlist_bl_unlock(h);
- if (test_bit(FSCACHE_VOLUME_ACQUIRE_PENDING, &candidate->flags))
+ if (fscache_is_acquire_pending(candidate))
fscache_wait_on_volume_collision(candidate, collidee_debug_id);
return true;
remove_inode_hugepages(inode, offset, LLONG_MAX);
}
+static void hugetlbfs_zero_partial_page(struct hstate *h,
+ struct address_space *mapping,
+ loff_t start,
+ loff_t end)
+{
+ pgoff_t idx = start >> huge_page_shift(h);
+ struct folio *folio;
+
+ folio = filemap_lock_folio(mapping, idx);
+ if (!folio)
+ return;
+
+ start = start & ~huge_page_mask(h);
+ end = end & ~huge_page_mask(h);
+ if (!end)
+ end = huge_page_size(h);
+
+ folio_zero_segment(folio, (size_t)start, (size_t)end);
+
+ folio_unlock(folio);
+ folio_put(folio);
+}
+
static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
{
+ struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
+ struct address_space *mapping = inode->i_mapping;
struct hstate *h = hstate_inode(inode);
loff_t hpage_size = huge_page_size(h);
loff_t hole_start, hole_end;
/*
- * For hole punch round up the beginning offset of the hole and
- * round down the end.
+ * hole_start and hole_end indicate the full pages within the hole.
*/
hole_start = round_up(offset, hpage_size);
hole_end = round_down(offset + len, hpage_size);
- if (hole_end > hole_start) {
- struct address_space *mapping = inode->i_mapping;
- struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
+ inode_lock(inode);
- inode_lock(inode);
+ /* protected by i_rwsem */
+ if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
+ inode_unlock(inode);
+ return -EPERM;
+ }
- /* protected by i_rwsem */
- if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
- inode_unlock(inode);
- return -EPERM;
- }
+ i_mmap_lock_write(mapping);
+
+ /* If range starts before first full page, zero partial page. */
+ if (offset < hole_start)
+ hugetlbfs_zero_partial_page(h, mapping,
+ offset, min(offset + len, hole_start));
- i_mmap_lock_write(mapping);
+ /* Unmap users of full pages in the hole. */
+ if (hole_end > hole_start) {
if (!RB_EMPTY_ROOT(&mapping->i_mmap.rb_root))
hugetlb_vmdelete_list(&mapping->i_mmap,
hole_start >> PAGE_SHIFT,
hole_end >> PAGE_SHIFT, 0);
- i_mmap_unlock_write(mapping);
- remove_inode_hugepages(inode, hole_start, hole_end);
- inode_unlock(inode);
}
+ /* If range extends beyond last full page, zero partial page. */
+ if ((offset + len) > hole_end && (offset + len) > hole_start)
+ hugetlbfs_zero_partial_page(h, mapping,
+ hole_end, offset + len);
+
+ i_mmap_unlock_write(mapping);
+
+ /* Remove full pages from the file. */
+ if (hole_end > hole_start)
+ remove_inode_hugepages(inode, hole_start, hole_end);
+
+ inode_unlock(inode);
+
return 0;
}
* Inode locking rules:
*
* inode->i_lock protects:
- * inode->i_state, inode->i_hash, __iget()
+ * inode->i_state, inode->i_hash, __iget(), inode->i_io_list
* Inode LRU list locks protect:
* inode->i_sb->s_inode_lru, inode->i_lru
* inode->i_sb->s_inode_list_lock protects:
/* below is for ring provided buffers */
__u16 buf_nr_pages;
__u16 nr_entries;
- __u32 head;
- __u32 mask;
+ __u16 head;
+ __u16 mask;
};
struct io_buffer {
struct file *file;
int fd;
u32 file_slot;
- u32 flags;
};
struct io_timeout_data {
u32 len;
};
-struct io_nop {
- struct file *file;
- u64 extra1;
- u64 extra2;
-};
-
struct io_async_connect {
struct sockaddr_storage address;
};
REQ_F_SINGLE_POLL_BIT,
REQ_F_DOUBLE_POLL_BIT,
REQ_F_PARTIAL_IO_BIT,
+ REQ_F_CQE32_INIT_BIT,
REQ_F_APOLL_MULTISHOT_BIT,
/* keep async read/write and isreg together and in order */
REQ_F_SUPPORT_NOWAIT_BIT,
REQ_F_PARTIAL_IO = BIT(REQ_F_PARTIAL_IO_BIT),
/* fast poll multishot mode */
REQ_F_APOLL_MULTISHOT = BIT(REQ_F_APOLL_MULTISHOT_BIT),
+ /* ->extra1 and ->extra2 are initialised */
+ REQ_F_CQE32_INIT = BIT(REQ_F_CQE32_INIT_BIT),
};
struct async_poll {
struct io_msg msg;
struct io_xattr xattr;
struct io_socket sock;
- struct io_nop nop;
struct io_uring_cmd uring_cmd;
};
[IORING_OP_NOP] = {
.audit_skip = 1,
.iopoll = 1,
- .buffer_select = 1,
},
[IORING_OP_READV] = {
.needs_file = 1,
.unbound_nonreg_file = 1,
.pollout = 1,
.needs_async_setup = 1,
+ .ioprio = 1,
.async_size = sizeof(struct io_async_msghdr),
},
[IORING_OP_RECVMSG] = {
.pollin = 1,
.buffer_select = 1,
.needs_async_setup = 1,
+ .ioprio = 1,
.async_size = sizeof(struct io_async_msghdr),
},
[IORING_OP_TIMEOUT] = {
.unbound_nonreg_file = 1,
.pollout = 1,
.audit_skip = 1,
+ .ioprio = 1,
},
[IORING_OP_RECV] = {
.needs_file = 1,
.pollin = 1,
.buffer_select = 1,
.audit_skip = 1,
+ .ioprio = 1,
},
[IORING_OP_OPENAT2] = {
},
if (!(req->flags & (REQ_F_BUFFER_SELECTED|REQ_F_BUFFER_RING)))
return;
- /* don't recycle if we already did IO to this buffer */
- if (req->flags & REQ_F_PARTIAL_IO)
+ /*
+ * For legacy provided buffer mode, don't recycle if we already did
+ * IO to this buffer. For ring-mapped provided buffer mode, we should
+ * increment ring->head to explicitly monopolize the buffer to avoid
+ * multiple use.
+ */
+ if ((req->flags & REQ_F_BUFFER_SELECTED) &&
+ (req->flags & REQ_F_PARTIAL_IO))
+ return;
+
+ /*
+ * READV uses fields in `struct io_rw` (len/addr) to stash the selected
+ * buffer data. However if that buffer is recycled the original request
+ * data stored in addr is lost. Therefore forbid recycling for now.
+ */
+ if (req->opcode == IORING_OP_READV)
return;
+
/*
* We don't need to recycle for REQ_F_BUFFER_RING, we can just clear
* the flag and hence ensure that bl->head doesn't get incremented.
*/
if (req->flags & REQ_F_BUFFER_RING) {
if (req->buf_list) {
- req->buf_index = req->buf_list->bgid;
- req->flags &= ~REQ_F_BUFFER_RING;
+ if (req->flags & REQ_F_PARTIAL_IO) {
+ req->buf_list->head++;
+ req->buf_list = NULL;
+ } else {
+ req->buf_index = req->buf_list->bgid;
+ req->flags &= ~REQ_F_BUFFER_RING;
+ }
}
return;
}
{
if (!(req->flags & REQ_F_INFLIGHT)) {
req->flags |= REQ_F_INFLIGHT;
- atomic_inc(¤t->io_uring->inflight_tracked);
+ atomic_inc(&req->task->io_uring->inflight_tracked);
}
}
return true;
}
-static inline bool __io_fill_cqe(struct io_ring_ctx *ctx, u64 user_data,
- s32 res, u32 cflags)
+static inline bool __io_fill_cqe_req(struct io_ring_ctx *ctx,
+ struct io_kiocb *req)
{
struct io_uring_cqe *cqe;
- /*
- * If we can't get a cq entry, userspace overflowed the
- * submission (by quite a lot). Increment the overflow count in
- * the ring.
- */
- cqe = io_get_cqe(ctx);
- if (likely(cqe)) {
- WRITE_ONCE(cqe->user_data, user_data);
- WRITE_ONCE(cqe->res, res);
- WRITE_ONCE(cqe->flags, cflags);
- return true;
- }
- return io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
-}
+ if (!(ctx->flags & IORING_SETUP_CQE32)) {
+ trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
+ req->cqe.res, req->cqe.flags, 0, 0);
-static inline bool __io_fill_cqe_req_filled(struct io_ring_ctx *ctx,
- struct io_kiocb *req)
-{
- struct io_uring_cqe *cqe;
+ /*
+ * If we can't get a cq entry, userspace overflowed the
+ * submission (by quite a lot). Increment the overflow count in
+ * the ring.
+ */
+ cqe = io_get_cqe(ctx);
+ if (likely(cqe)) {
+ memcpy(cqe, &req->cqe, sizeof(*cqe));
+ return true;
+ }
- trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
- req->cqe.res, req->cqe.flags, 0, 0);
+ return io_cqring_event_overflow(ctx, req->cqe.user_data,
+ req->cqe.res, req->cqe.flags,
+ 0, 0);
+ } else {
+ u64 extra1 = 0, extra2 = 0;
- /*
- * If we can't get a cq entry, userspace overflowed the
- * submission (by quite a lot). Increment the overflow count in
- * the ring.
- */
- cqe = io_get_cqe(ctx);
- if (likely(cqe)) {
- memcpy(cqe, &req->cqe, sizeof(*cqe));
- return true;
- }
- return io_cqring_event_overflow(ctx, req->cqe.user_data,
- req->cqe.res, req->cqe.flags, 0, 0);
-}
+ if (req->flags & REQ_F_CQE32_INIT) {
+ extra1 = req->extra1;
+ extra2 = req->extra2;
+ }
-static inline bool __io_fill_cqe32_req_filled(struct io_ring_ctx *ctx,
- struct io_kiocb *req)
-{
- struct io_uring_cqe *cqe;
- u64 extra1 = req->extra1;
- u64 extra2 = req->extra2;
+ trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
+ req->cqe.res, req->cqe.flags, extra1, extra2);
- trace_io_uring_complete(req->ctx, req, req->cqe.user_data,
- req->cqe.res, req->cqe.flags, extra1, extra2);
+ /*
+ * If we can't get a cq entry, userspace overflowed the
+ * submission (by quite a lot). Increment the overflow count in
+ * the ring.
+ */
+ cqe = io_get_cqe(ctx);
+ if (likely(cqe)) {
+ memcpy(cqe, &req->cqe, sizeof(struct io_uring_cqe));
+ WRITE_ONCE(cqe->big_cqe[0], extra1);
+ WRITE_ONCE(cqe->big_cqe[1], extra2);
+ return true;
+ }
- /*
- * If we can't get a cq entry, userspace overflowed the
- * submission (by quite a lot). Increment the overflow count in
- * the ring.
- */
- cqe = io_get_cqe(ctx);
- if (likely(cqe)) {
- memcpy(cqe, &req->cqe, sizeof(struct io_uring_cqe));
- cqe->big_cqe[0] = extra1;
- cqe->big_cqe[1] = extra2;
- return true;
+ return io_cqring_event_overflow(ctx, req->cqe.user_data,
+ req->cqe.res, req->cqe.flags,
+ extra1, extra2);
}
-
- return io_cqring_event_overflow(ctx, req->cqe.user_data, req->cqe.res,
- req->cqe.flags, extra1, extra2);
-}
-
-static inline bool __io_fill_cqe_req(struct io_kiocb *req, s32 res, u32 cflags)
-{
- trace_io_uring_complete(req->ctx, req, req->cqe.user_data, res, cflags, 0, 0);
- return __io_fill_cqe(req->ctx, req->cqe.user_data, res, cflags);
}
-static inline void __io_fill_cqe32_req(struct io_kiocb *req, s32 res, u32 cflags,
- u64 extra1, u64 extra2)
+static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
+ s32 res, u32 cflags)
{
- struct io_ring_ctx *ctx = req->ctx;
struct io_uring_cqe *cqe;
- if (WARN_ON_ONCE(!(ctx->flags & IORING_SETUP_CQE32)))
- return;
- if (req->flags & REQ_F_CQE_SKIP)
- return;
-
- trace_io_uring_complete(ctx, req, req->cqe.user_data, res, cflags,
- extra1, extra2);
+ ctx->cq_extra++;
+ trace_io_uring_complete(ctx, NULL, user_data, res, cflags, 0, 0);
/*
* If we can't get a cq entry, userspace overflowed the
*/
cqe = io_get_cqe(ctx);
if (likely(cqe)) {
- WRITE_ONCE(cqe->user_data, req->cqe.user_data);
+ WRITE_ONCE(cqe->user_data, user_data);
WRITE_ONCE(cqe->res, res);
WRITE_ONCE(cqe->flags, cflags);
- WRITE_ONCE(cqe->big_cqe[0], extra1);
- WRITE_ONCE(cqe->big_cqe[1], extra2);
- return;
- }
- io_cqring_event_overflow(ctx, req->cqe.user_data, res, cflags, extra1, extra2);
-}
-
-static noinline bool io_fill_cqe_aux(struct io_ring_ctx *ctx, u64 user_data,
- s32 res, u32 cflags)
-{
- ctx->cq_extra++;
- trace_io_uring_complete(ctx, NULL, user_data, res, cflags, 0, 0);
- return __io_fill_cqe(ctx, user_data, res, cflags);
+ if (ctx->flags & IORING_SETUP_CQE32) {
+ WRITE_ONCE(cqe->big_cqe[0], 0);
+ WRITE_ONCE(cqe->big_cqe[1], 0);
+ }
+ return true;
+ }
+ return io_cqring_event_overflow(ctx, user_data, res, cflags, 0, 0);
}
static void __io_req_complete_put(struct io_kiocb *req)
static void __io_req_complete_post(struct io_kiocb *req, s32 res,
u32 cflags)
{
- if (!(req->flags & REQ_F_CQE_SKIP))
- __io_fill_cqe_req(req, res, cflags);
- __io_req_complete_put(req);
-}
-
-static void __io_req_complete_post32(struct io_kiocb *req, s32 res,
- u32 cflags, u64 extra1, u64 extra2)
-{
- if (!(req->flags & REQ_F_CQE_SKIP))
- __io_fill_cqe32_req(req, res, cflags, extra1, extra2);
+ if (!(req->flags & REQ_F_CQE_SKIP)) {
+ req->cqe.res = res;
+ req->cqe.flags = cflags;
+ __io_fill_cqe_req(req->ctx, req);
+ }
__io_req_complete_put(req);
}
io_cqring_ev_posted(ctx);
}
-static void io_req_complete_post32(struct io_kiocb *req, s32 res,
- u32 cflags, u64 extra1, u64 extra2)
-{
- struct io_ring_ctx *ctx = req->ctx;
-
- spin_lock(&ctx->completion_lock);
- __io_req_complete_post32(req, res, cflags, extra1, extra2);
- io_commit_cqring(ctx);
- spin_unlock(&ctx->completion_lock);
- io_cqring_ev_posted(ctx);
-}
-
static inline void io_req_complete_state(struct io_kiocb *req, s32 res,
u32 cflags)
{
io_req_complete_post(req, res, cflags);
}
-static inline void __io_req_complete32(struct io_kiocb *req,
- unsigned int issue_flags, s32 res,
- u32 cflags, u64 extra1, u64 extra2)
-{
- if (issue_flags & IO_URING_F_COMPLETE_DEFER) {
- io_req_complete_state(req, res, cflags);
- req->extra1 = extra1;
- req->extra2 = extra2;
- } else {
- io_req_complete_post32(req, res, cflags, extra1, extra2);
- }
-}
-
static inline void io_req_complete(struct io_kiocb *req, s32 res)
{
if (res < 0)
struct io_kiocb *req = container_of(node, struct io_kiocb,
comp_list);
- if (!(req->flags & REQ_F_CQE_SKIP)) {
- if (!(ctx->flags & IORING_SETUP_CQE32))
- __io_fill_cqe_req_filled(ctx, req);
- else
- __io_fill_cqe32_req_filled(ctx, req);
- }
+ if (!(req->flags & REQ_F_CQE_SKIP))
+ __io_fill_cqe_req(ctx, req);
}
io_commit_cqring(ctx);
nr_events++;
if (unlikely(req->flags & REQ_F_CQE_SKIP))
continue;
- __io_fill_cqe_req(req, req->cqe.res, io_put_kbuf(req, 0));
+
+ req->cqe.flags = io_put_kbuf(req, 0);
+ __io_fill_cqe_req(req->ctx, req);
}
if (unlikely(!nr_events))
if (unlikely(res != req->cqe.res)) {
if ((res == -EAGAIN || res == -EOPNOTSUPP) &&
io_rw_should_reissue(req)) {
- req->flags |= REQ_F_REISSUE;
+ req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO;
return true;
}
req_set_fail(req);
kiocb_end_write(req);
if (unlikely(res != req->cqe.res)) {
if (res == -EAGAIN && io_rw_should_reissue(req)) {
- req->flags |= REQ_F_REISSUE;
+ req->flags |= REQ_F_REISSUE | REQ_F_PARTIAL_IO;
return;
}
req->cqe.res = res;
int ret;
kiocb->ki_pos = READ_ONCE(sqe->off);
+ /* used for fixed read/write too - just read unconditionally */
+ req->buf_index = READ_ONCE(sqe->buf_index);
+
+ if (req->opcode == IORING_OP_READ_FIXED ||
+ req->opcode == IORING_OP_WRITE_FIXED) {
+ struct io_ring_ctx *ctx = req->ctx;
+ u16 index;
+
+ if (unlikely(req->buf_index >= ctx->nr_user_bufs))
+ return -EFAULT;
+ index = array_index_nospec(req->buf_index, ctx->nr_user_bufs);
+ req->imu = ctx->user_bufs[index];
+ io_req_set_rsrc_node(req, ctx, 0);
+ }
ioprio = READ_ONCE(sqe->ioprio);
if (ioprio) {
kiocb->ki_ioprio = get_current_ioprio();
}
- req->imu = NULL;
req->rw.addr = READ_ONCE(sqe->addr);
req->rw.len = READ_ONCE(sqe->len);
req->rw.flags = READ_ONCE(sqe->rw_flags);
- /* used for fixed read/write too - just read unconditionally */
- req->buf_index = READ_ONCE(sqe->buf_index);
return 0;
}
static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter,
unsigned int issue_flags)
{
- struct io_mapped_ubuf *imu = req->imu;
- u16 index, buf_index = req->buf_index;
-
- if (likely(!imu)) {
- struct io_ring_ctx *ctx = req->ctx;
-
- if (unlikely(buf_index >= ctx->nr_user_bufs))
- return -EFAULT;
- io_req_set_rsrc_node(req, ctx, issue_flags);
- index = array_index_nospec(buf_index, ctx->nr_user_bufs);
- imu = READ_ONCE(ctx->user_bufs[index]);
- req->imu = imu;
- }
- return __io_import_fixed(req, rw, iter, imu);
+ if (WARN_ON_ONCE(!req->imu))
+ return -EFAULT;
+ return __io_import_fixed(req, rw, iter, req->imu);
}
static int io_buffer_add_list(struct io_ring_ctx *ctx,
{
struct io_uring_buf_ring *br = bl->buf_ring;
struct io_uring_buf *buf;
- __u32 head = bl->head;
+ __u16 head = bl->head;
- if (unlikely(smp_load_acquire(&br->tail) == head)) {
- io_ring_submit_unlock(req->ctx, issue_flags);
+ if (unlikely(smp_load_acquire(&br->tail) == head))
return NULL;
- }
head &= bl->mask;
if (head < IO_BUFFER_LIST_BUF_PER_PAGE) {
buf = &br->bufs[head];
} else {
int off = head & (IO_BUFFER_LIST_BUF_PER_PAGE - 1);
- int index = head / IO_BUFFER_LIST_BUF_PER_PAGE - 1;
+ int index = head / IO_BUFFER_LIST_BUF_PER_PAGE;
buf = page_address(bl->buf_pages[index]);
buf += off;
}
req->buf_list = bl;
req->buf_index = buf->bid;
- if (issue_flags & IO_URING_F_UNLOCKED) {
+ if (issue_flags & IO_URING_F_UNLOCKED || !file_can_poll(req->file)) {
/*
* If we came in unlocked, we have no choice but to consume the
* buffer here. This does mean it'll be pinned until the IO
if (unlikely(ret < 0))
return ret;
} else {
+ rw = req->async_data;
+ s = &rw->s;
+
/*
* Safe and required to re-import if we're using provided
* buffers, as we dropped the selected one before retry.
*/
- if (req->flags & REQ_F_BUFFER_SELECT) {
+ if (io_do_buffer_select(req)) {
ret = io_import_iovec(READ, req, &iovec, s, issue_flags);
if (unlikely(ret < 0))
return ret;
}
- rw = req->async_data;
- s = &rw->s;
/*
* We come here from an earlier attempt, restore our state to
* match in case it doesn't. It's cheap enough that we don't
req->uring_cmd.task_work_cb = task_work_cb;
req->io_task_work.func = io_uring_cmd_work;
- io_req_task_prio_work_add(req);
+ io_req_task_work_add(req);
}
EXPORT_SYMBOL_GPL(io_uring_cmd_complete_in_task);
+static inline void io_req_set_cqe32_extra(struct io_kiocb *req,
+ u64 extra1, u64 extra2)
+{
+ req->extra1 = extra1;
+ req->extra2 = extra2;
+ req->flags |= REQ_F_CQE32_INIT;
+}
+
/*
* Called by consumers of io_uring_cmd, if they originally returned
* -EIOCBQUEUED upon receiving the command.
if (ret < 0)
req_set_fail(req);
+
if (req->ctx->flags & IORING_SETUP_CQE32)
- __io_req_complete32(req, 0, ret, 0, res2, 0);
- else
- io_req_complete(req, ret);
+ io_req_set_cqe32_extra(req, res2, 0);
+ io_req_complete(req, ret);
}
EXPORT_SYMBOL_GPL(io_uring_cmd_done);
{
struct io_uring_cmd *ioucmd = &req->uring_cmd;
- if (sqe->rw_flags)
+ if (sqe->rw_flags || sqe->__pad1)
return -EINVAL;
ioucmd->cmd = sqe->cmd;
ioucmd->cmd_op = READ_ONCE(sqe->cmd_op);
static int io_nop_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- /*
- * If the ring is setup with CQE32, relay back addr/addr
- */
- if (req->ctx->flags & IORING_SETUP_CQE32) {
- req->nop.extra1 = READ_ONCE(sqe->addr);
- req->nop.extra2 = READ_ONCE(sqe->addr2);
- }
-
return 0;
}
*/
static int io_nop(struct io_kiocb *req, unsigned int issue_flags)
{
- unsigned int cflags;
- void __user *buf;
-
- if (req->flags & REQ_F_BUFFER_SELECT) {
- size_t len = 1;
-
- buf = io_buffer_select(req, &len, issue_flags);
- if (!buf)
- return -ENOBUFS;
- }
-
- cflags = io_put_kbuf(req, issue_flags);
- if (!(req->ctx->flags & IORING_SETUP_CQE32))
- __io_req_complete(req, issue_flags, 0, cflags);
- else
- __io_req_complete32(req, issue_flags, 0, cflags,
- req->nop.extra1, req->nop.extra2);
+ __io_req_complete(req, issue_flags, 0, 0);
return 0;
}
static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
{
- if (sqe->off || sqe->addr || sqe->len || sqe->buf_index)
+ if (sqe->off || sqe->addr || sqe->len || sqe->rw_flags || sqe->buf_index)
return -EINVAL;
if (req->flags & REQ_F_FIXED_FILE)
return -EBADF;
req->close.fd = READ_ONCE(sqe->fd);
req->close.file_slot = READ_ONCE(sqe->file_index);
- req->close.flags = READ_ONCE(sqe->close_flags);
- if (req->close.flags & ~IORING_CLOSE_FD_AND_FILE_SLOT)
- return -EINVAL;
- if (!(req->close.flags & IORING_CLOSE_FD_AND_FILE_SLOT) &&
- req->close.file_slot && req->close.fd)
+ if (req->close.file_slot && req->close.fd)
return -EINVAL;
return 0;
if (req->close.file_slot) {
ret = io_close_fixed(req, issue_flags);
- if (ret || !(req->close.flags & IORING_CLOSE_FD_AND_FILE_SLOT))
- goto err;
+ goto err;
}
spin_lock(&files->file_lock);
{
struct io_sr_msg *sr = &req->sr_msg;
- if (unlikely(sqe->file_index))
- return -EINVAL;
- if (unlikely(sqe->addr2 || sqe->file_index))
+ if (unlikely(sqe->file_index || sqe->addr2))
return -EINVAL;
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
- sr->flags = READ_ONCE(sqe->addr2);
+ sr->flags = READ_ONCE(sqe->ioprio);
if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
return -EINVAL;
sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
{
struct io_sr_msg *sr = &req->sr_msg;
- if (unlikely(sqe->file_index))
- return -EINVAL;
- if (unlikely(sqe->addr2 || sqe->file_index))
+ if (unlikely(sqe->file_index || sqe->addr2))
return -EINVAL;
sr->umsg = u64_to_user_ptr(READ_ONCE(sqe->addr));
sr->len = READ_ONCE(sqe->len);
- sr->flags = READ_ONCE(sqe->addr2);
+ sr->flags = READ_ONCE(sqe->ioprio);
if (sr->flags & ~IORING_RECVSEND_POLL_FIRST)
return -EINVAL;
sr->msg_flags = READ_ONCE(sqe->msg_flags) | MSG_NOSIGNAL;
io_req_complete_failed(req, ret);
}
-static void __io_poll_execute(struct io_kiocb *req, int mask, __poll_t events)
+static void __io_poll_execute(struct io_kiocb *req, int mask,
+ __poll_t __maybe_unused events)
{
req->cqe.res = mask;
/*
* CPU. We want to avoid pulling in req->apoll->events for that
* case.
*/
- req->apoll_events = events;
if (req->opcode == IORING_OP_POLL_ADD)
req->io_task_work.func = io_poll_task_func;
else
io_init_poll_iocb(poll, mask, io_poll_wake);
poll->file = req->file;
+ req->apoll_events = poll->events;
+
ipt->pt._key = mask;
ipt->req = req;
ipt->error = 0;
if (mask) {
/* can't multishot if failed, just queue the event we've got */
- if (unlikely(ipt->error || !ipt->nr_entries))
+ if (unlikely(ipt->error || !ipt->nr_entries)) {
poll->events |= EPOLLONESHOT;
+ req->apoll_events |= EPOLLONESHOT;
+ ipt->error = 0;
+ }
__io_poll_execute(req, mask, poll->events);
return 0;
}
mask |= EPOLLEXCLUSIVE;
if (req->flags & REQ_F_POLLED) {
apoll = req->apoll;
+ kfree(apoll->double_poll);
} else if (!(issue_flags & IO_URING_F_UNLOCKED) &&
!list_empty(&ctx->apoll_cache)) {
apoll = list_first_entry(&ctx->apoll_cache, struct async_poll,
return -EINVAL;
io_req_set_refcount(req);
- req->apoll_events = poll->events = io_poll_parse_events(sqe, flags);
+ poll->events = io_poll_parse_events(sqe, flags);
return 0;
}
ipt.pt._qproc = io_poll_queue_proc;
ret = __io_arm_poll_handler(req, &req->poll, &ipt, poll->events);
+ if (!ret && ipt.error)
+ req_set_fail(req);
ret = ret ?: ipt.error;
if (ret)
__io_req_complete(req, issue_flags, ret, 0);
struct file *file;
int ret, fd;
+ if (!req->ctx->file_data)
+ return -ENXIO;
+
for (done = 0; done < req->rsrc_update.nr_args; done++) {
if (copy_from_user(&fd, &fds[done], sizeof(fd))) {
ret = -EFAULT;
if (ret < 0)
break;
if (copy_to_user(&fds[done], &ret, sizeof(ret))) {
- ret = -EFAULT;
__io_close_fixed(req, issue_flags, ret);
+ ret = -EFAULT;
break;
}
}
* Queued up for async execution, worker will release
* submit reference when the iocb is actually submitted.
*/
+ io_kbuf_recycle(req, 0);
io_queue_iowq(req, NULL);
break;
case IO_APOLL_OK:
static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
{
+ unsigned nr = ctx->nr_user_files;
int ret;
if (!ctx->file_data)
return -ENXIO;
+
+ /*
+ * Quiesce may unlock ->uring_lock, and while it's not held
+ * prevent new requests using the table.
+ */
+ ctx->nr_user_files = 0;
ret = io_rsrc_ref_quiesce(ctx->file_data, ctx);
+ ctx->nr_user_files = nr;
if (!ret)
__io_sqe_files_unregister(ctx);
return ret;
static int io_sqe_buffers_unregister(struct io_ring_ctx *ctx)
{
+ unsigned nr = ctx->nr_user_bufs;
int ret;
if (!ctx->buf_data)
return -ENXIO;
+ /*
+ * Quiesce may unlock ->uring_lock, and while it's not held
+ * prevent new requests using the table.
+ */
+ ctx->nr_user_bufs = 0;
ret = io_rsrc_ref_quiesce(ctx->buf_data, ctx);
+ ctx->nr_user_bufs = nr;
if (!ret)
__io_sqe_buffers_unregister(ctx);
return ret;
{
struct io_uring_buf_ring *br;
struct io_uring_buf_reg reg;
- struct io_buffer_list *bl;
+ struct io_buffer_list *bl, *free_bl = NULL;
struct page **pages;
int nr_pages;
if (!is_power_of_2(reg.ring_entries))
return -EINVAL;
+ /* cannot disambiguate full vs empty due to head/tail size */
+ if (reg.ring_entries >= 65536)
+ return -EINVAL;
+
if (unlikely(reg.bgid < BGID_ARRAY && !ctx->io_bl)) {
int ret = io_init_bl_list(ctx);
if (ret)
if (bl->buf_nr_pages || !list_empty(&bl->buf_list))
return -EEXIST;
} else {
- bl = kzalloc(sizeof(*bl), GFP_KERNEL);
+ free_bl = bl = kzalloc(sizeof(*bl), GFP_KERNEL);
if (!bl)
return -ENOMEM;
}
struct_size(br, bufs, reg.ring_entries),
&nr_pages);
if (IS_ERR(pages)) {
- kfree(bl);
+ kfree(free_bl);
return PTR_ERR(pages);
}
/**
* jbd2_journal_try_to_free_buffers() - try to free page buffers.
* @journal: journal for operation
- * @page: to try and free
+ * @folio: Folio to detach data from.
*
* For all the buffers on this page,
* if they are fully written out ordered data, move them onto BUF_CLEAN
goto out;
}
+ ksmbd_debug(SMB, "flags %u\n", le32_to_cpu(req->Flags));
if (le32_to_cpu(req->Flags) & SMB2_WRITEFLAG_WRITE_THROUGH)
writethrough = true;
data_buf = (char *)(((char *)&req->hdr.ProtocolId) +
le16_to_cpu(req->DataOffset));
- ksmbd_debug(SMB, "flags %u\n", le32_to_cpu(req->Flags));
- if (le32_to_cpu(req->Flags) & SMB2_WRITEFLAG_WRITE_THROUGH)
- writethrough = true;
-
ksmbd_debug(SMB, "filename %pd, offset %lld, len %zu\n",
fp->filp->f_path.dentry, offset, length);
err = ksmbd_vfs_write(work, fp, data_buf, length, &offset,
{
struct file_zero_data_information *zero_data;
struct ksmbd_file *fp;
- loff_t off, len;
+ loff_t off, len, bfz;
if (!test_tree_conn_flag(work->tcon, KSMBD_TREE_CONN_FLAG_WRITABLE)) {
ksmbd_debug(SMB,
zero_data =
(struct file_zero_data_information *)&req->Buffer[0];
- fp = ksmbd_lookup_fd_fast(work, id);
- if (!fp) {
- ret = -ENOENT;
+ off = le64_to_cpu(zero_data->FileOffset);
+ bfz = le64_to_cpu(zero_data->BeyondFinalZero);
+ if (off > bfz) {
+ ret = -EINVAL;
goto out;
}
- off = le64_to_cpu(zero_data->FileOffset);
- len = le64_to_cpu(zero_data->BeyondFinalZero) - off;
+ len = bfz - off;
+ if (len) {
+ fp = ksmbd_lookup_fd_fast(work, id);
+ if (!fp) {
+ ret = -ENOENT;
+ goto out;
+ }
- ret = ksmbd_vfs_zero_data(work, fp, off, len);
- ksmbd_fd_put(work, fp);
- if (ret < 0)
- goto out;
+ ret = ksmbd_vfs_zero_data(work, fp, off, len);
+ ksmbd_fd_put(work, fp);
+ if (ret < 0)
+ goto out;
+ }
break;
}
case FSCTL_QUERY_ALLOCATED_RANGES:
src_off = le64_to_cpu(dup_ext->SourceFileOffset);
dst_off = le64_to_cpu(dup_ext->TargetFileOffset);
length = le64_to_cpu(dup_ext->ByteCount);
- cloned = vfs_clone_file_range(fp_in->filp, src_off, fp_out->filp,
- dst_off, length, 0);
+ /*
+ * XXX: It is not clear if FSCTL_DUPLICATE_EXTENTS_TO_FILE
+ * should fall back to vfs_copy_file_range(). This could be
+ * beneficial when re-exporting nfs/smb mount, but note that
+ * this can result in partial copy that returns an error status.
+ * If/when FSCTL_DUPLICATE_EXTENTS_TO_FILE_EX is implemented,
+ * fall back to vfs_copy_file_range(), should be avoided when
+ * the flag DUPLICATE_EXTENTS_DATA_EX_SOURCE_ATOMIC is set.
+ */
+ cloned = vfs_clone_file_range(fp_in->filp, src_off,
+ fp_out->filp, dst_off, length, 0);
if (cloned == -EXDEV || cloned == -EOPNOTSUPP) {
ret = -EOPNOTSUPP;
goto dup_ext_out;
} else if (cloned != length) {
cloned = vfs_copy_file_range(fp_in->filp, src_off,
- fp_out->filp, dst_off, length, 0);
+ fp_out->filp, dst_off,
+ length, 0);
if (cloned != length) {
if (cloned < 0)
ret = cloned;
*
* Author(s): Long Li <longli@microsoft.com>,
* Hyunchul Lee <hyc.lee@gmail.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
- * the GNU General Public License for more details.
*/
#define SUBMOD_NAME "smb_direct"
break;
}
ret = kernel_accept(iface->ksmbd_socket, &client_sk,
- O_NONBLOCK);
+ SOCK_NONBLOCK);
mutex_unlock(&iface->sock_release_lock);
if (ret) {
if (ret == -EAGAIN)
FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
off, len);
- return vfs_fallocate(fp->filp, FALLOC_FL_ZERO_RANGE, off, len);
+ return vfs_fallocate(fp->filp,
+ FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE,
+ off, len);
}
int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length,
*out_count = 0;
end = start + length;
while (start < end && *out_count < in_count) {
- extent_start = f->f_op->llseek(f, start, SEEK_DATA);
+ extent_start = vfs_llseek(f, start, SEEK_DATA);
if (extent_start < 0) {
if (extent_start != -ENXIO)
ret = (int)extent_start;
if (extent_start >= end)
break;
- extent_end = f->f_op->llseek(f, extent_start, SEEK_HOLE);
+ extent_end = vfs_llseek(f, extent_start, SEEK_HOLE);
if (extent_end < 0) {
if (extent_end != -ENXIO)
ret = (int)extent_end;
ret = vfs_copy_file_range(src_fp->filp, src_off,
dst_fp->filp, dst_off, len, 0);
+ if (ret == -EOPNOTSUPP || ret == -EXDEV)
+ ret = generic_copy_file_range(src_fp->filp, src_off,
+ dst_fp->filp, dst_off,
+ len, 0);
if (ret < 0)
return ret;
}
}
-static int nlm_unlock_files(struct nlm_file *file)
+static int nlm_unlock_files(struct nlm_file *file, fl_owner_t owner)
{
struct file_lock lock;
lock.fl_type = F_UNLCK;
lock.fl_start = 0;
lock.fl_end = OFFSET_MAX;
+ lock.fl_owner = owner;
if (file->f_file[O_RDONLY] &&
vfs_lock_file(file->f_file[O_RDONLY], F_SETLK, &lock, NULL))
goto out_err;
if (match(lockhost, host)) {
spin_unlock(&flctx->flc_lock);
- if (nlm_unlock_files(file))
+ if (nlm_unlock_files(file, fl->fl_owner))
return 1;
goto again;
}
static void nlm_close_files(struct nlm_file *file)
{
- struct file *f;
-
- for (f = file->f_file[0]; f <= file->f_file[1]; f++)
- if (f)
- nlmsvc_ops->fclose(f);
+ if (file->f_file[O_RDONLY])
+ nlmsvc_ops->fclose(file->f_file[O_RDONLY]);
+ if (file->f_file[O_WRONLY])
+ nlmsvc_ops->fclose(file->f_file[O_WRONLY]);
}
/*
void netfs_readahead(struct readahead_control *ractl)
{
struct netfs_io_request *rreq;
- struct netfs_i_context *ctx = netfs_i_context(ractl->mapping->host);
+ struct netfs_inode *ctx = netfs_inode(ractl->mapping->host);
int ret;
_enter("%lx,%x", readahead_index(ractl), readahead_count(ractl));
{
struct address_space *mapping = folio_file_mapping(folio);
struct netfs_io_request *rreq;
- struct netfs_i_context *ctx = netfs_i_context(mapping->host);
+ struct netfs_inode *ctx = netfs_inode(mapping->host);
int ret;
_enter("%lx", folio_index(folio));
/**
* netfs_write_begin - Helper to prepare for writing
+ * @ctx: The netfs context
* @file: The file to read from
* @mapping: The mapping to read from
* @pos: File position at which the write will begin
* conflicting writes once the folio is grabbed and locked. It is passed a
* pointer to the fsdata cookie that gets returned to the VM to be passed to
* write_end. It is permitted to sleep. It should return 0 if the request
- * should go ahead; unlock the folio and return -EAGAIN to cause the folio to
- * be regot; or return an error.
+ * should go ahead or it may return an error. It may also unlock and put the
+ * folio, provided it sets ``*foliop`` to NULL, in which case a return of 0
+ * will cause the folio to be re-got and the process to be retried.
*
* The calling netfs must initialise a netfs context contiguous to the vfs
* inode before calling this.
*
* This is usable whether or not caching is enabled.
*/
-int netfs_write_begin(struct file *file, struct address_space *mapping,
+int netfs_write_begin(struct netfs_inode *ctx,
+ struct file *file, struct address_space *mapping,
loff_t pos, unsigned int len, struct folio **_folio,
void **_fsdata)
{
struct netfs_io_request *rreq;
- struct netfs_i_context *ctx = netfs_i_context(file_inode(file ));
struct folio *folio;
unsigned int fgp_flags = FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE;
pgoff_t index = pos >> PAGE_SHIFT;
if (ctx->ops->check_write_begin) {
/* Allow the netfs (eg. ceph) to flush conflicts. */
- ret = ctx->ops->check_write_begin(file, pos, len, folio, _fsdata);
+ ret = ctx->ops->check_write_begin(file, pos, len, &folio, _fsdata);
if (ret < 0) {
trace_netfs_failure(NULL, NULL, ret, netfs_fail_check_write_begin);
- if (ret == -EAGAIN)
- goto retry;
goto error;
}
+ if (!folio)
+ goto retry;
}
if (folio_test_uptodate(folio))
error_put:
netfs_put_request(rreq, false, netfs_rreq_trace_put_failed);
error:
- folio_unlock(folio);
- folio_put(folio);
+ if (folio) {
+ folio_unlock(folio);
+ folio_put(folio);
+ }
_leave(" = %d", ret);
return ret;
}
/*
* Miscellaneous functions.
*/
-static inline bool netfs_is_cache_enabled(struct netfs_i_context *ctx)
+static inline bool netfs_is_cache_enabled(struct netfs_inode *ctx)
{
#if IS_ENABLED(CONFIG_FSCACHE)
struct fscache_cookie *cookie = ctx->cache;
{
static atomic_t debug_ids;
struct inode *inode = file ? file_inode(file) : mapping->host;
- struct netfs_i_context *ctx = netfs_i_context(inode);
+ struct netfs_inode *ctx = netfs_inode(inode);
struct netfs_io_request *rreq;
int ret;
struct netfs_io_request *rreq =
container_of(work, struct netfs_io_request, work);
- netfs_clear_subrequests(rreq, false);
- if (rreq->netfs_priv)
- rreq->netfs_ops->cleanup(rreq->mapping, rreq->netfs_priv);
trace_netfs_rreq(rreq, netfs_rreq_trace_free);
+ netfs_clear_subrequests(rreq, false);
+ if (rreq->netfs_ops->free_request)
+ rreq->netfs_ops->free_request(rreq);
if (rreq->cache_resources.ops)
rreq->cache_resources.ops->end_operation(&rreq->cache_resources);
kfree(rreq);
rv = NFS4_OK;
break;
case -ENOENT:
+ set_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags);
/* Embrace your forgetfulness! */
rv = NFS4ERR_NOMATCHING_LAYOUT;
}
goto out;
}
+ file->f_mode |= FMODE_CAN_ODIRECT;
err = nfs_finish_open(ctx, ctx->dentry, file, open_flags);
trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
nfs_file_set_open_context(filp, ctx);
nfs_fscache_open_file(inode, filp);
err = 0;
+ filp->f_mode |= FMODE_CAN_ODIRECT;
out_put_ctx:
put_nfs_open_context(ctx);
}
page = alloc_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
- if (page == NULL || locations == NULL)
- goto out;
+ if (!locations)
+ goto out_free;
+ locations->fattr = nfs_alloc_fattr();
+ if (!locations->fattr)
+ goto out_free_2;
status = nfs4_proc_get_locations(server, fhandle, locations, page,
cred);
if (status)
- goto out;
+ goto out_free_3;
for (i = 0; i < locations->nlocations; i++)
test_fs_location_for_trunking(&locations->locations[i], clp,
server);
-out:
- if (page)
- __free_page(page);
+out_free_3:
+ kfree(locations->fattr);
+out_free_2:
kfree(locations);
+out_free:
+ __free_page(page);
return status;
}
goto again;
nfs_put_client(clp);
+ module_put_and_kthread_exit(0);
return 0;
}
pnfs_clear_lseg_state(lseg, lseg_list);
pnfs_clear_layoutreturn_info(lo);
pnfs_free_returned_lsegs(lo, lseg_list, &range, 0);
+ set_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags);
if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) &&
!test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
pnfs_clear_layoutreturn_waitbit(lo);
static void nfs_layoutget_end(struct pnfs_layout_hdr *lo)
{
- if (atomic_dec_and_test(&lo->plh_outstanding))
- wake_up_var(&lo->plh_outstanding);
+ if (atomic_dec_and_test(&lo->plh_outstanding) &&
+ test_and_clear_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags))
+ wake_up_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN);
}
static bool pnfs_is_first_layoutget(struct pnfs_layout_hdr *lo)
* If the layout segment list is empty, but there are outstanding
* layoutget calls, then they might be subject to a layoutrecall.
*/
- if ((list_empty(&lo->plh_segs) || !pnfs_layout_is_valid(lo)) &&
+ if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) &&
atomic_read(&lo->plh_outstanding) != 0) {
spin_unlock(&ino->i_lock);
- lseg = ERR_PTR(wait_var_event_killable(&lo->plh_outstanding,
- !atomic_read(&lo->plh_outstanding)));
+ lseg = ERR_PTR(wait_on_bit(&lo->plh_flags, NFS_LAYOUT_DRAIN,
+ TASK_KILLABLE));
if (IS_ERR(lseg))
goto out_put_layout_hdr;
pnfs_put_layout_hdr(lo);
case -ERECALLCONFLICT:
case -EAGAIN:
break;
+ case -ENODATA:
+ /* The server returned NFS4ERR_LAYOUTUNAVAILABLE */
+ pnfs_layout_set_fail_bit(
+ lo, pnfs_iomode_to_fail_bit(iomode));
+ lseg = NULL;
+ goto out_put_layout_hdr;
default:
if (!nfs_error_is_fatal(PTR_ERR(lseg))) {
pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
goto out_forget;
}
- if (!pnfs_layout_is_valid(lo) && !pnfs_is_first_layoutget(lo))
+ if (test_bit(NFS_LAYOUT_DRAIN, &lo->plh_flags) &&
+ !pnfs_is_first_layoutget(lo))
goto out_forget;
if (nfs4_stateid_match_other(&lo->plh_stateid, &res->stateid)) {
NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */
NFS_LAYOUT_INODE_FREEING, /* The inode is being freed */
NFS_LAYOUT_HASHED, /* The layout visible */
+ NFS_LAYOUT_DRAIN,
};
enum layoutdriver_policy_flags {
if (test_bit(NFSD_FILE_HASHED, &nf->nf_flags) == 0) {
nfsd_file_flush(nf);
nfsd_file_put_noref(nf);
- } else {
+ } else if (nf->nf_file) {
nfsd_file_put_noref(nf);
- if (nf->nf_file)
- nfsd_file_schedule_laundrette();
- }
+ nfsd_file_schedule_laundrette();
+ } else
+ nfsd_file_put_noref(nf);
+
if (atomic_long_read(&nfsd_filecache_count) >= NFSD_FILE_LRU_LIMIT)
nfsd_file_gc();
}
return nfserr_bad_xdr;
}
}
+ if (bmval[1] & FATTR4_WORD1_TIME_CREATE) {
+ struct timespec64 ts;
+
+ /* No Linux filesystem supports setting this attribute. */
+ bmval[1] &= ~FATTR4_WORD1_TIME_CREATE;
+ status = nfsd4_decode_nfstime4(argp, &ts);
+ if (status)
+ return status;
+ }
if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) {
u32 set_it;
(FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL)
#define NFSD_WRITEABLE_ATTRS_WORD1 \
(FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \
- | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
+ | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_CREATE \
+ | FATTR4_WORD1_TIME_MODIFY_SET)
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
#define MAYBE_FATTR4_WORD2_SECURITY_LABEL \
FATTR4_WORD2_SECURITY_LABEL
ssize_t nfsd_copy_file_range(struct file *src, u64 src_pos, struct file *dst,
u64 dst_pos, u64 count)
{
+ ssize_t ret;
/*
* Limit copy to 4MB to prevent indefinitely blocking an nfsd
* limit like this and pipeline multiple COPY requests.
*/
count = min_t(u64, count, 1 << 22);
- return vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
+ ret = vfs_copy_file_range(src, src_pos, dst, dst_pos, count, 0);
+
+ if (ret == -EOPNOTSUPP || ret == -EXDEV)
+ ret = generic_copy_file_range(src, src_pos, dst, dst_pos,
+ count, 0);
+ return ret;
}
__be32 nfsd4_vfs_fallocate(struct svc_rqst *rqstp, struct svc_fh *fhp,
nfsd_copy_write_verifier(verf, nn);
err2 = filemap_check_wb_err(nf->nf_file->f_mapping,
since);
+ err = nfserrno(err2);
break;
case -EINVAL:
err = nfserr_notsupp;
default:
nfsd_reset_write_verifier(nn);
trace_nfsd_writeverf_reset(nn, rqstp, err2);
+ err = nfserrno(err2);
}
- err = nfserrno(err2);
} else
nfsd_copy_write_verifier(verf, nn);
static inline int nilfs_init_acl(struct inode *inode, struct inode *dir)
{
+ if (S_ISLNK(inode->i_mode))
+ return 0;
+
inode->i_mode &= ~current_umask();
return 0;
}
return 0;
}
-static int fanotify_events_supported(struct path *path, __u64 mask)
+static int fanotify_events_supported(struct fsnotify_group *group,
+ struct path *path, __u64 mask,
+ unsigned int flags)
{
+ unsigned int mark_type = flags & FANOTIFY_MARK_TYPE_BITS;
+ /* Strict validation of events in non-dir inode mask with v5.17+ APIs */
+ bool strict_dir_events = FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID) ||
+ (mask & FAN_RENAME);
+
/*
* Some filesystems such as 'proc' acquire unusual locks when opening
* files. For them fanotify permission events have high chances of
if (mask & FANOTIFY_PERM_EVENTS &&
path->mnt->mnt_sb->s_type->fs_flags & FS_DISALLOW_NOTIFY_PERM)
return -EINVAL;
+
+ /*
+ * We shouldn't have allowed setting dirent events and the directory
+ * flags FAN_ONDIR and FAN_EVENT_ON_CHILD in mask of non-dir inode,
+ * but because we always allowed it, error only when using new APIs.
+ */
+ if (strict_dir_events && mark_type == FAN_MARK_INODE &&
+ !d_is_dir(path->dentry) && (mask & FANOTIFY_DIRONLY_EVENT_BITS))
+ return -ENOTDIR;
+
return 0;
}
goto fput_and_out;
if (flags & FAN_MARK_ADD) {
- ret = fanotify_events_supported(&path, mask);
+ ret = fanotify_events_supported(group, &path, mask, flags);
if (ret)
goto path_put_and_out;
}
else
mnt = path.mnt;
- /*
- * FAN_RENAME is not allowed on non-dir (for now).
- * We shouldn't have allowed setting any dirent events in mask of
- * non-dir, but because we always allowed it, error only if group
- * was initialized with the new flag FAN_REPORT_TARGET_FID.
- */
- ret = -ENOTDIR;
- if (inode && !S_ISDIR(inode->i_mode) &&
- ((mask & FAN_RENAME) ||
- ((mask & FANOTIFY_DIRENT_EVENTS) &&
- FAN_GROUP_FLAG(group, FAN_REPORT_TARGET_FID))))
- goto path_put_and_out;
-
/* Mask out FAN_EVENT_ON_CHILD flag for sb/mount/non-dir marks */
if (mnt || !S_ISDIR(inode->i_mode)) {
mask &= ~FAN_EVENT_ON_CHILD;
a = (ATTR_RECORD*)((u8*)ctx->attr +
le32_to_cpu(ctx->attr->length));
for (;; a = (ATTR_RECORD*)((u8*)a + le32_to_cpu(a->length))) {
- if ((u8*)a < (u8*)ctx->mrec || (u8*)a > (u8*)ctx->mrec +
- le32_to_cpu(ctx->mrec->bytes_allocated))
+ u8 *mrec_end = (u8 *)ctx->mrec +
+ le32_to_cpu(ctx->mrec->bytes_allocated);
+ u8 *name_end = (u8 *)a + le16_to_cpu(a->name_offset) +
+ a->name_length * sizeof(ntfschar);
+ if ((u8*)a < (u8*)ctx->mrec || (u8*)a > mrec_end ||
+ name_end > mrec_end)
break;
ctx->attr = a;
if (unlikely(le32_to_cpu(a->type) > le32_to_cpu(type) ||
OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */
OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */
OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */
- OCFS2_MOUNT_NOCLUSTER = 1 << 18, /* No cluster aware filesystem mount */
};
#define OCFS2_OSB_SOFT_RO 0x0001
static inline int ocfs2_mount_local(struct ocfs2_super *osb)
{
- return ((osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT)
- || (osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER));
+ return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT);
}
static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb)
int i, ret = -ENOSPC;
if ((preferred >= 0) && (preferred < si->si_num_slots)) {
- if (!si->si_slots[preferred].sl_valid ||
- !si->si_slots[preferred].sl_node_num) {
+ if (!si->si_slots[preferred].sl_valid) {
ret = preferred;
goto out;
}
}
for(i = 0; i < si->si_num_slots; i++) {
- if (!si->si_slots[i].sl_valid ||
- !si->si_slots[i].sl_node_num) {
+ if (!si->si_slots[i].sl_valid) {
ret = i;
break;
}
spin_lock(&osb->osb_lock);
ocfs2_update_slot_info(si);
- if (ocfs2_mount_local(osb))
- /* use slot 0 directly in local mode */
- slot = 0;
- else {
- /* search for ourselves first and take the slot if it already
- * exists. Perhaps we need to mark this in a variable for our
- * own journal recovery? Possibly not, though we certainly
- * need to warn to the user */
- slot = __ocfs2_node_num_to_slot(si, osb->node_num);
+ /* search for ourselves first and take the slot if it already
+ * exists. Perhaps we need to mark this in a variable for our
+ * own journal recovery? Possibly not, though we certainly
+ * need to warn to the user */
+ slot = __ocfs2_node_num_to_slot(si, osb->node_num);
+ if (slot < 0) {
+ /* if no slot yet, then just take 1st available
+ * one. */
+ slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
if (slot < 0) {
- /* if no slot yet, then just take 1st available
- * one. */
- slot = __ocfs2_find_empty_slot(si, osb->preferred_slot);
- if (slot < 0) {
- spin_unlock(&osb->osb_lock);
- mlog(ML_ERROR, "no free slots available!\n");
- status = -EINVAL;
- goto bail;
- }
- } else
- printk(KERN_INFO "ocfs2: Slot %d on device (%s) was "
- "already allocated to this node!\n",
- slot, osb->dev_str);
- }
+ spin_unlock(&osb->osb_lock);
+ mlog(ML_ERROR, "no free slots available!\n");
+ status = -EINVAL;
+ goto bail;
+ }
+ } else
+ printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already "
+ "allocated to this node!\n", slot, osb->dev_str);
ocfs2_set_slot(si, slot, osb->node_num);
osb->slot_num = slot;
Opt_dir_resv_level,
Opt_journal_async_commit,
Opt_err_cont,
- Opt_nocluster,
Opt_err,
};
{Opt_dir_resv_level, "dir_resv_level=%u"},
{Opt_journal_async_commit, "journal_async_commit"},
{Opt_err_cont, "errors=continue"},
- {Opt_nocluster, "nocluster"},
{Opt_err, NULL}
};
goto out;
}
- tmp = OCFS2_MOUNT_NOCLUSTER;
- if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
- ret = -EINVAL;
- mlog(ML_ERROR, "Cannot change nocluster option on remount\n");
- goto out;
- }
-
tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL |
OCFS2_MOUNT_HB_NONE;
if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) {
}
if (ocfs2_userspace_stack(osb) &&
- !(osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) &&
strncmp(osb->osb_cluster_stack, mopt->cluster_stack,
OCFS2_STACK_LABEL_LEN)) {
mlog(ML_ERROR,
osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" :
"ordered");
- if ((osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) &&
- !(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT))
- printk(KERN_NOTICE "ocfs2: The shared device (%s) is mounted "
- "without cluster aware mode.\n", osb->dev_str);
-
atomic_set(&osb->vol_state, VOLUME_MOUNTED);
wake_up(&osb->osb_mount_event);
case Opt_journal_async_commit:
mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT;
break;
- case Opt_nocluster:
- mopt->mount_opt |= OCFS2_MOUNT_NOCLUSTER;
- break;
default:
mlog(ML_ERROR,
"Unrecognized mount option \"%s\" "
if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT)
seq_printf(s, ",journal_async_commit");
- if (opts & OCFS2_MOUNT_NOCLUSTER)
- seq_printf(s, ",nocluster");
-
return 0;
}
struct dentry *dentry, struct inode *inode,
const char *name, void *buffer, size_t size)
{
+ if (!IS_POSIXACL(inode))
+ return -EOPNOTSUPP;
+
return ovl_xattr_get(dentry, inode, handler->name, buffer, size);
}
struct posix_acl *acl = NULL;
int err;
+ if (!IS_POSIXACL(inode))
+ return -EOPNOTSUPP;
+
/* Check that everything is OK before copy-up */
if (value) {
acl = posix_acl_from_xattr(&init_user_ns, value, size);
return root;
}
+static bool ovl_has_idmapped_layers(struct ovl_fs *ofs)
+{
+
+ unsigned int i;
+ const struct vfsmount *mnt;
+
+ for (i = 0; i < ofs->numlayer; i++) {
+ mnt = ofs->layers[i].mnt;
+ if (mnt && is_idmapped_mnt(mnt))
+ return true;
+ }
+ return false;
+}
+
static int ovl_fill_super(struct super_block *sb, void *data, int silent)
{
struct path upperpath = { };
sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers :
ovl_trusted_xattr_handlers;
sb->s_fs_info = ofs;
- sb->s_flags |= SB_POSIXACL;
+ if (ovl_has_idmapped_layers(ofs))
+ pr_warn("POSIX ACLs are not yet supported with idmapped layers, mounting without ACL support.\n");
+ else
+ sb->s_flags |= SB_POSIXACL;
sb->s_iflags |= SB_I_SKIP_SYNC;
err = -ENOMEM;
#include <linux/capability.h>
#include <linux/quotaops.h>
#include <linux/blkdev.h>
+#include <linux/sched/mm.h>
#include "../internal.h" /* ugh */
#include <linux/uaccess.h>
int dquot_acquire(struct dquot *dquot)
{
int ret = 0, ret2 = 0;
+ unsigned int memalloc;
struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
mutex_lock(&dquot->dq_lock);
+ memalloc = memalloc_nofs_save();
if (!test_bit(DQ_READ_B, &dquot->dq_flags)) {
ret = dqopt->ops[dquot->dq_id.type]->read_dqblk(dquot);
if (ret < 0)
smp_mb__before_atomic();
set_bit(DQ_ACTIVE_B, &dquot->dq_flags);
out_iolock:
+ memalloc_nofs_restore(memalloc);
mutex_unlock(&dquot->dq_lock);
return ret;
}
int dquot_commit(struct dquot *dquot)
{
int ret = 0;
+ unsigned int memalloc;
struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
mutex_lock(&dquot->dq_lock);
+ memalloc = memalloc_nofs_save();
if (!clear_dquot_dirty(dquot))
goto out_lock;
/* Inactive dquot can be only if there was error during read/init
else
ret = -EIO;
out_lock:
+ memalloc_nofs_restore(memalloc);
mutex_unlock(&dquot->dq_lock);
return ret;
}
int dquot_release(struct dquot *dquot)
{
int ret = 0, ret2 = 0;
+ unsigned int memalloc;
struct quota_info *dqopt = sb_dqopt(dquot->dq_sb);
mutex_lock(&dquot->dq_lock);
+ memalloc = memalloc_nofs_save();
/* Check whether we are not racing with some other dqget() */
if (dquot_is_busy(dquot))
goto out_dqlock;
}
clear_bit(DQ_ACTIVE_B, &dquot->dq_flags);
out_dqlock:
+ memalloc_nofs_restore(memalloc);
mutex_unlock(&dquot->dq_lock);
return ret;
}
count, fl);
file_end_write(out.file);
} else {
+ if (out.file->f_flags & O_NONBLOCK)
+ fl |= SPLICE_F_NONBLOCK;
+
retval = splice_file_to_pipe(in.file, opipe, &pos, count, fl);
}
}
EXPORT_SYMBOL(generic_copy_file_range);
-static ssize_t do_copy_file_range(struct file *file_in, loff_t pos_in,
- struct file *file_out, loff_t pos_out,
- size_t len, unsigned int flags)
-{
- /*
- * Although we now allow filesystems to handle cross sb copy, passing
- * a file of the wrong filesystem type to filesystem driver can result
- * in an attempt to dereference the wrong type of ->private_data, so
- * avoid doing that until we really have a good reason. NFS defines
- * several different file_system_type structures, but they all end up
- * using the same ->copy_file_range() function pointer.
- */
- if (file_out->f_op->copy_file_range &&
- file_out->f_op->copy_file_range == file_in->f_op->copy_file_range)
- return file_out->f_op->copy_file_range(file_in, pos_in,
- file_out, pos_out,
- len, flags);
-
- return generic_copy_file_range(file_in, pos_in, file_out, pos_out, len,
- flags);
-}
-
/*
* Performs necessary checks before doing a file copy
*
if (ret)
return ret;
+ /*
+ * We allow some filesystems to handle cross sb copy, but passing
+ * a file of the wrong filesystem type to filesystem driver can result
+ * in an attempt to dereference the wrong type of ->private_data, so
+ * avoid doing that until we really have a good reason.
+ *
+ * nfs and cifs define several different file_system_type structures
+ * and several different sets of file_operations, but they all end up
+ * using the same ->copy_file_range() function pointer.
+ */
+ if (file_out->f_op->copy_file_range) {
+ if (file_in->f_op->copy_file_range !=
+ file_out->f_op->copy_file_range)
+ return -EXDEV;
+ } else if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) {
+ return -EXDEV;
+ }
+
/* Don't touch certain kinds of inodes */
if (IS_IMMUTABLE(inode_out))
return -EPERM;
file_start_write(file_out);
/*
- * Try cloning first, this is supported by more file systems, and
- * more efficient if both clone and copy are supported (e.g. NFS).
+ * Cloning is supported by more file systems, so we implement copy on
+ * same sb using clone, but for filesystems where both clone and copy
+ * are supported (e.g. nfs,cifs), we only call the copy method.
*/
+ if (file_out->f_op->copy_file_range) {
+ ret = file_out->f_op->copy_file_range(file_in, pos_in,
+ file_out, pos_out,
+ len, flags);
+ goto done;
+ }
+
if (file_in->f_op->remap_file_range &&
file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) {
- loff_t cloned;
-
- cloned = file_in->f_op->remap_file_range(file_in, pos_in,
+ ret = file_in->f_op->remap_file_range(file_in, pos_in,
file_out, pos_out,
min_t(loff_t, MAX_RW_COUNT, len),
REMAP_FILE_CAN_SHORTEN);
- if (cloned > 0) {
- ret = cloned;
+ if (ret > 0)
goto done;
- }
}
- ret = do_copy_file_range(file_in, pos_in, file_out, pos_out, len,
- flags);
- WARN_ON_ONCE(ret == -EOPNOTSUPP);
+ /*
+ * We can get here for same sb copy of filesystems that do not implement
+ * ->copy_file_range() in case filesystem does not support clone or in
+ * case filesystem supports clone but rejected the clone request (e.g.
+ * because it was not block aligned).
+ *
+ * In both cases, fall back to kernel copy so we are able to maintain a
+ * consistent story about which filesystems support copy_file_range()
+ * and which filesystems do not, that will allow userspace tools to
+ * make consistent desicions w.r.t using copy_file_range().
+ */
+ ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len,
+ flags);
+
done:
if (ret > 0) {
fsnotify_access(file_in);
* Otherwise, make sure the count is also block-aligned, having
* already confirmed the starting offsets' block alignment.
*/
- if (pos_in + count == size_in) {
+ if (pos_in + count == size_in &&
+ (!(remap_flags & REMAP_FILE_DEDUP) || pos_out + count == size_out)) {
bcount = ALIGN(size_in, bs) - pos_in;
} else {
if (!IS_ALIGNED(count, bs))
*
* Only one instances directory is allowed.
*
- * The instances directory is special as it allows for mkdir and rmdir to
+ * The instances directory is special as it allows for mkdir and rmdir
* to be done by userspace. When a mkdir or rmdir is performed, the inode
* locks are released and the methods passed in (@mkdir and @rmdir) are
* called without locks and with the name of the directory being created
}
static inline struct uffd_msg userfault_msg(unsigned long address,
+ unsigned long real_address,
unsigned int flags,
unsigned long reason,
unsigned int features)
{
struct uffd_msg msg;
+
msg_init(&msg);
msg.event = UFFD_EVENT_PAGEFAULT;
- if (!(features & UFFD_FEATURE_EXACT_ADDRESS))
- address &= PAGE_MASK;
- msg.arg.pagefault.address = address;
+ msg.arg.pagefault.address = (features & UFFD_FEATURE_EXACT_ADDRESS) ?
+ real_address : address;
+
/*
* These flags indicate why the userfault occurred:
* - UFFD_PAGEFAULT_FLAG_WP indicates a write protect fault.
init_waitqueue_func_entry(&uwq.wq, userfaultfd_wake_function);
uwq.wq.private = current;
- uwq.msg = userfault_msg(vmf->real_address, vmf->flags, reason,
- ctx->features);
+ uwq.msg = userfault_msg(vmf->address, vmf->real_address, vmf->flags,
+ reason, ctx->features);
uwq.ctx = ctx;
uwq.waken = false;
STATIC int xfs_attr_leaf_get(xfs_da_args_t *args);
STATIC int xfs_attr_leaf_removename(xfs_da_args_t *args);
STATIC int xfs_attr_leaf_hasname(struct xfs_da_args *args, struct xfs_buf **bp);
-STATIC int xfs_attr_leaf_try_add(struct xfs_da_args *args, struct xfs_buf *bp);
+STATIC int xfs_attr_leaf_try_add(struct xfs_da_args *args);
/*
* Internal routines when attribute list is more than one block.
* It won't fit in the shortform, transform to a leaf block. GROT:
* another possible req'mt for a double-split btree op.
*/
- error = xfs_attr_shortform_to_leaf(args, &attr->xattri_leaf_bp);
+ error = xfs_attr_shortform_to_leaf(args);
if (error)
return error;
- /*
- * Prevent the leaf buffer from being unlocked so that a concurrent AIL
- * push cannot grab the half-baked leaf buffer and run into problems
- * with the write verifier.
- */
- xfs_trans_bhold(args->trans, attr->xattri_leaf_bp);
attr->xattri_dela_state = XFS_DAS_LEAF_ADD;
out:
trace_xfs_attr_sf_addname_return(attr->xattri_dela_state, args->dp);
/*
* Use the leaf buffer we may already hold locked as a result of
- * a sf-to-leaf conversion. The held buffer is no longer valid
- * after this call, regardless of the result.
+ * a sf-to-leaf conversion.
*/
- error = xfs_attr_leaf_try_add(args, attr->xattri_leaf_bp);
- attr->xattri_leaf_bp = NULL;
+ error = xfs_attr_leaf_try_add(args);
if (error == -ENOSPC) {
error = xfs_attr3_leaf_to_node(args);
struct xfs_da_args *args = attr->xattri_da_args;
int error;
- ASSERT(!attr->xattri_leaf_bp);
-
error = xfs_attr_node_addname_find_attr(attr);
if (error)
return error;
/*
* We have no control over the attribute names that userspace passes us
* to remove, so we have to allow the name lookup prior to attribute
- * removal to fail as well.
+ * removal to fail as well. Preserve the logged flag, since we need
+ * to pass that through to the logging code.
*/
- args->op_flags = XFS_DA_OP_OKNOENT;
+ args->op_flags = XFS_DA_OP_OKNOENT |
+ (args->op_flags & XFS_DA_OP_LOGGED);
if (args->value) {
XFS_STATS_INC(mp, xs_attr_set);
*/
STATIC int
xfs_attr_leaf_try_add(
- struct xfs_da_args *args,
- struct xfs_buf *bp)
+ struct xfs_da_args *args)
{
+ struct xfs_buf *bp;
int error;
- /*
- * If the caller provided a buffer to us, it is locked and held in
- * the transaction because it just did a shortform to leaf conversion.
- * Hence we don't need to read it again. Otherwise read in the leaf
- * buffer.
- */
- if (bp) {
- xfs_trans_bhold_release(args->trans, bp);
- } else {
- error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp);
- if (error)
- return error;
- }
+ error = xfs_attr3_leaf_read(args->trans, args->dp, 0, &bp);
+ if (error)
+ return error;
/*
* Look up the xattr name to set the insertion point for the new xattr.
xfs_attr_node_try_addname(
struct xfs_attr_intent *attr)
{
- struct xfs_da_args *args = attr->xattri_da_args;
struct xfs_da_state *state = attr->xattri_da_state;
struct xfs_da_state_blk *blk;
int error;
- trace_xfs_attr_node_addname(args);
+ trace_xfs_attr_node_addname(state->args);
blk = &state->path.blk[state->path.active-1];
ASSERT(blk->magic == XFS_ATTR_LEAF_MAGIC);
*/
#define ATTR_MAX_VALUELEN (64*1024) /* max length of a value */
-static inline bool xfs_has_larp(struct xfs_mount *mp)
-{
-#ifdef DEBUG
- /* Logged xattrs require a V5 super for log_incompat */
- return xfs_has_crc(mp) && xfs_globals.larp;
-#else
- return false;
-#endif
-}
-
/*
* Kernel-internal version of the attrlist cursor.
*/
*/
struct xfs_attri_log_nameval *xattri_nameval;
- /*
- * Used by xfs_attr_set to hold a leaf buffer across a transaction roll
- */
- struct xfs_buf *xattri_leaf_bp;
-
/* Used to keep track of current state of delayed operation */
enum xfs_delattr_state xattri_dela_state;
xfs_attr_init_replace_state(struct xfs_da_args *args)
{
args->op_flags |= XFS_DA_OP_ADDNAME | XFS_DA_OP_REPLACE;
- if (xfs_has_larp(args->dp->i_mount))
+ if (args->op_flags & XFS_DA_OP_LOGGED)
return xfs_attr_init_remove_state(args);
return xfs_attr_init_add_state(args);
}
return NULL;
}
+/*
+ * Validate an attribute leaf block.
+ *
+ * Empty leaf blocks can occur under the following circumstances:
+ *
+ * 1. setxattr adds a new extended attribute to a file;
+ * 2. The file has zero existing attributes;
+ * 3. The attribute is too large to fit in the attribute fork;
+ * 4. The attribute is small enough to fit in a leaf block;
+ * 5. A log flush occurs after committing the transaction that creates
+ * the (empty) leaf block; and
+ * 6. The filesystem goes down after the log flush but before the new
+ * attribute can be committed to the leaf block.
+ *
+ * Hence we need to ensure that we don't fail the validation purely
+ * because the leaf is empty.
+ */
static xfs_failaddr_t
xfs_attr3_leaf_verify(
struct xfs_buf *bp)
if (fa)
return fa;
- /*
- * Empty leaf blocks should never occur; they imply the existence of a
- * software bug that needs fixing. xfs_repair also flags them as a
- * corruption that needs fixing, so we should never let these go to
- * disk.
- */
- if (ichdr.count == 0)
- return __this_address;
-
/*
* firstused is the block offset of the first name info structure.
* Make sure it doesn't go off the block or crash into the header.
return -ENOATTR;
}
-/*
- * Convert from using the shortform to the leaf. On success, return the
- * buffer so that we can keep it locked until we're totally done with it.
- */
+/* Convert from using the shortform to the leaf format. */
int
xfs_attr_shortform_to_leaf(
- struct xfs_da_args *args,
- struct xfs_buf **leaf_bp)
+ struct xfs_da_args *args)
{
struct xfs_inode *dp;
struct xfs_attr_shortform *sf;
sfe = xfs_attr_sf_nextentry(sfe);
}
error = 0;
- *leaf_bp = bp;
out:
kmem_free(tmpbuffer);
return error;
if (tmp)
entry->flags |= XFS_ATTR_LOCAL;
if (args->op_flags & XFS_DA_OP_REPLACE) {
- if (!xfs_has_larp(mp))
+ if (!(args->op_flags & XFS_DA_OP_LOGGED))
entry->flags |= XFS_ATTR_INCOMPLETE;
if ((args->blkno2 == args->blkno) &&
(args->index2 <= args->index)) {
void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff);
int xfs_attr_shortform_lookup(struct xfs_da_args *args);
int xfs_attr_shortform_getvalue(struct xfs_da_args *args);
-int xfs_attr_shortform_to_leaf(struct xfs_da_args *args,
- struct xfs_buf **leaf_bp);
+int xfs_attr_shortform_to_leaf(struct xfs_da_args *args);
int xfs_attr_sf_removename(struct xfs_da_args *args);
int xfs_attr_sf_findname(struct xfs_da_args *args,
struct xfs_attr_sf_entry **sfep,
#define XFS_DA_OP_NOTIME (1u << 5) /* don't update inode timestamps */
#define XFS_DA_OP_REMOVE (1u << 6) /* this is a remove operation */
#define XFS_DA_OP_RECOVERY (1u << 7) /* Log recovery operation */
+#define XFS_DA_OP_LOGGED (1u << 8) /* Use intent items to track op */
#define XFS_DA_OP_FLAGS \
{ XFS_DA_OP_JUSTCHECK, "JUSTCHECK" }, \
{ XFS_DA_OP_CILOOKUP, "CILOOKUP" }, \
{ XFS_DA_OP_NOTIME, "NOTIME" }, \
{ XFS_DA_OP_REMOVE, "REMOVE" }, \
- { XFS_DA_OP_RECOVERY, "RECOVERY" }
+ { XFS_DA_OP_RECOVERY, "RECOVERY" }, \
+ { XFS_DA_OP_LOGGED, "LOGGED" }
/*
* Storage for holding state during Btree searches and split/join ops.
struct xfs_mount *mp = tp->t_mountp;
struct xfs_attri_log_item *attrip;
struct xfs_attr_intent *attr;
+ struct xfs_da_args *args;
ASSERT(count == 1);
- if (!xfs_sb_version_haslogxattrs(&mp->m_sb))
- return NULL;
-
/*
* Each attr item only performs one attribute operation at a time, so
* this is a list of one
*/
attr = list_first_entry_or_null(items, struct xfs_attr_intent,
xattri_list);
+ args = attr->xattri_da_args;
+
+ if (!(args->op_flags & XFS_DA_OP_LOGGED))
+ return NULL;
/*
* Create a buffer to store the attribute name and value. This buffer
* and the lower level xattr log items.
*/
if (!attr->xattri_nameval) {
- struct xfs_da_args *args = attr->xattri_da_args;
-
/*
* Transfer our reference to the name/value buffer to the
* deferred work state structure.
struct xfs_trans_res tres;
struct xfs_attri_log_format *attrp;
struct xfs_attri_log_nameval *nv = attrip->attri_nameval;
- int error, ret = 0;
+ int error;
int total;
int local;
struct xfs_attrd_log_item *done_item = NULL;
args->namelen = nv->name.i_len;
args->hashval = xfs_da_hashname(args->name, args->namelen);
args->attr_filter = attrp->alfi_attr_filter & XFS_ATTRI_FILTER_MASK;
- args->op_flags = XFS_DA_OP_RECOVERY | XFS_DA_OP_OKNOENT;
+ args->op_flags = XFS_DA_OP_RECOVERY | XFS_DA_OP_OKNOENT |
+ XFS_DA_OP_LOGGED;
+
+ ASSERT(xfs_sb_version_haslogxattrs(&mp->m_sb));
switch (attr->xattri_op_flags) {
case XFS_ATTRI_OP_FLAGS_SET:
xfs_ilock(ip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(tp, ip, 0);
- ret = xfs_xattri_finish_update(attr, done_item);
- if (ret == -EAGAIN) {
- /* There's more work to do, so add it to this transaction */
+ error = xfs_xattri_finish_update(attr, done_item);
+ if (error == -EAGAIN) {
+ /*
+ * There's more work to do, so add the intent item to this
+ * transaction so that we can continue it later.
+ */
xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_ATTR, &attr->xattri_list);
- } else
- error = ret;
+ error = xfs_defer_ops_capture_and_commit(tp, capture_list);
+ if (error)
+ goto out_unlock;
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xfs_irele(ip);
+ return 0;
+ }
if (error) {
xfs_trans_cancel(tp);
goto out_unlock;
}
error = xfs_defer_ops_capture_and_commit(tp, capture_list);
-
out_unlock:
- if (attr->xattri_leaf_bp)
- xfs_buf_relse(attr->xattri_leaf_bp);
-
xfs_iunlock(ip, XFS_ILOCK_EXCL);
xfs_irele(ip);
out:
- if (ret != -EAGAIN)
- xfs_attr_free_item(attr);
+ xfs_attr_free_item(attr);
return error;
}
* forever.
*/
end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
+ if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 1)
+ end_fsb = roundup_64(end_fsb, mp->m_sb.sb_rextsize);
last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
if (last_fsb <= end_fsb)
return false;
for_each_online_cpu(cpu) {
gc = per_cpu_ptr(mp->m_inodegc, cpu);
if (!llist_empty(&gc->list))
- queue_work_on(cpu, mp->m_inodegc_wq, &gc->work);
+ mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0);
}
}
xfs_inodegc_worker(
struct work_struct *work)
{
- struct xfs_inodegc *gc = container_of(work, struct xfs_inodegc,
- work);
+ struct xfs_inodegc *gc = container_of(to_delayed_work(work),
+ struct xfs_inodegc, work);
struct llist_node *node = llist_del_all(&gc->list);
struct xfs_inode *ip, *n;
}
/*
- * Force all currently queued inode inactivation work to run immediately and
- * wait for the work to finish.
+ * Expedite all pending inodegc work to run immediately. This does not wait for
+ * completion of the work.
*/
void
-xfs_inodegc_flush(
+xfs_inodegc_push(
struct xfs_mount *mp)
{
if (!xfs_is_inodegc_enabled(mp))
return;
+ trace_xfs_inodegc_push(mp, __return_address);
+ xfs_inodegc_queue_all(mp);
+}
+/*
+ * Force all currently queued inode inactivation work to run immediately and
+ * wait for the work to finish.
+ */
+void
+xfs_inodegc_flush(
+ struct xfs_mount *mp)
+{
+ xfs_inodegc_push(mp);
trace_xfs_inodegc_flush(mp, __return_address);
-
- xfs_inodegc_queue_all(mp);
flush_workqueue(mp->m_inodegc_wq);
}
struct xfs_inodegc *gc;
int items;
unsigned int shrinker_hits;
+ unsigned long queue_delay = 1;
trace_xfs_inode_set_need_inactive(ip);
spin_lock(&ip->i_flags_lock);
items = READ_ONCE(gc->items);
WRITE_ONCE(gc->items, items + 1);
shrinker_hits = READ_ONCE(gc->shrinker_hits);
- put_cpu_ptr(gc);
- if (!xfs_is_inodegc_enabled(mp))
+ /*
+ * We queue the work while holding the current CPU so that the work
+ * is scheduled to run on this CPU.
+ */
+ if (!xfs_is_inodegc_enabled(mp)) {
+ put_cpu_ptr(gc);
return;
-
- if (xfs_inodegc_want_queue_work(ip, items)) {
- trace_xfs_inodegc_queue(mp, __return_address);
- queue_work(mp->m_inodegc_wq, &gc->work);
}
+ if (xfs_inodegc_want_queue_work(ip, items))
+ queue_delay = 0;
+
+ trace_xfs_inodegc_queue(mp, __return_address);
+ mod_delayed_work(mp->m_inodegc_wq, &gc->work, queue_delay);
+ put_cpu_ptr(gc);
+
if (xfs_inodegc_want_flush_work(ip, items, shrinker_hits)) {
trace_xfs_inodegc_throttle(mp, __return_address);
- flush_work(&gc->work);
+ flush_delayed_work(&gc->work);
}
}
unsigned int count = 0;
dead_gc = per_cpu_ptr(mp->m_inodegc, dead_cpu);
- cancel_work_sync(&dead_gc->work);
+ cancel_delayed_work_sync(&dead_gc->work);
if (llist_empty(&dead_gc->list))
return;
llist_add_batch(first, last, &gc->list);
count += READ_ONCE(gc->items);
WRITE_ONCE(gc->items, count);
- put_cpu_ptr(gc);
if (xfs_is_inodegc_enabled(mp)) {
trace_xfs_inodegc_queue(mp, __return_address);
- queue_work(mp->m_inodegc_wq, &gc->work);
+ mod_delayed_work(mp->m_inodegc_wq, &gc->work, 0);
}
+ put_cpu_ptr(gc);
}
/*
unsigned int h = READ_ONCE(gc->shrinker_hits);
WRITE_ONCE(gc->shrinker_hits, h + 1);
- queue_work_on(cpu, mp->m_inodegc_wq, &gc->work);
+ mod_delayed_work_on(cpu, mp->m_inodegc_wq, &gc->work, 0);
no_items = false;
}
}
void xfs_blockgc_start(struct xfs_mount *mp);
void xfs_inodegc_worker(struct work_struct *work);
+void xfs_inodegc_push(struct xfs_mount *mp);
void xfs_inodegc_flush(struct xfs_mount *mp);
void xfs_inodegc_stop(struct xfs_mount *mp);
void xfs_inodegc_start(struct xfs_mount *mp);
return lock_mode;
}
+/*
+ * You can't set both SHARED and EXCL for the same lock,
+ * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_MMAPLOCK_SHARED,
+ * XFS_MMAPLOCK_EXCL, XFS_ILOCK_SHARED, XFS_ILOCK_EXCL are valid values
+ * to set in lock_flags.
+ */
+static inline void
+xfs_lock_flags_assert(
+ uint lock_flags)
+{
+ ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
+ (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
+ ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
+ (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
+ ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
+ (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
+ ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
+ ASSERT(lock_flags != 0);
+}
+
/*
* In addition to i_rwsem in the VFS inode, the xfs inode contains 2
* multi-reader locks: invalidate_lock and the i_lock. This routine allows
{
trace_xfs_ilock(ip, lock_flags, _RET_IP_);
- /*
- * You can't set both SHARED and EXCL for the same lock,
- * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
- * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
- */
- ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
- (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
- ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
- (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
- ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
- (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
- ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
+ xfs_lock_flags_assert(lock_flags);
if (lock_flags & XFS_IOLOCK_EXCL) {
down_write_nested(&VFS_I(ip)->i_rwsem,
{
trace_xfs_ilock_nowait(ip, lock_flags, _RET_IP_);
- /*
- * You can't set both SHARED and EXCL for the same lock,
- * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
- * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
- */
- ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
- (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
- ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
- (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
- ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
- (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
- ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
+ xfs_lock_flags_assert(lock_flags);
if (lock_flags & XFS_IOLOCK_EXCL) {
if (!down_write_trylock(&VFS_I(ip)->i_rwsem))
xfs_inode_t *ip,
uint lock_flags)
{
- /*
- * You can't set both SHARED and EXCL for the same lock,
- * and only XFS_IOLOCK_SHARED, XFS_IOLOCK_EXCL, XFS_ILOCK_SHARED,
- * and XFS_ILOCK_EXCL are valid values to set in lock_flags.
- */
- ASSERT((lock_flags & (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL)) !=
- (XFS_IOLOCK_SHARED | XFS_IOLOCK_EXCL));
- ASSERT((lock_flags & (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL)) !=
- (XFS_MMAPLOCK_SHARED | XFS_MMAPLOCK_EXCL));
- ASSERT((lock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL)) !=
- (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL));
- ASSERT((lock_flags & ~(XFS_LOCK_MASK | XFS_LOCK_SUBCLASS_MASK)) == 0);
- ASSERT(lock_flags != 0);
+ xfs_lock_flags_assert(lock_flags);
if (lock_flags & XFS_IOLOCK_EXCL)
up_write(&VFS_I(ip)->i_rwsem);
}
if (lock_flags & (XFS_MMAPLOCK_EXCL|XFS_MMAPLOCK_SHARED)) {
- return __xfs_rwsem_islocked(&VFS_I(ip)->i_rwsem,
- (lock_flags & XFS_IOLOCK_SHARED));
+ return __xfs_rwsem_islocked(&VFS_I(ip)->i_mapping->invalidate_lock,
+ (lock_flags & XFS_MMAPLOCK_SHARED));
}
if (lock_flags & (XFS_IOLOCK_EXCL | XFS_IOLOCK_SHARED)) {
{
uint64_t di_flags2 =
(ip->i_diflags2 & (XFS_DIFLAG2_REFLINK |
- XFS_DIFLAG2_BIGTIME));
+ XFS_DIFLAG2_BIGTIME |
+ XFS_DIFLAG2_NREXT64));
if (xflags & FS_XFLAG_DAX)
di_flags2 |= XFS_DIFLAG2_DAX;
xlog_in_core_t *iclog, *next_iclog;
int i;
- xlog_cil_destroy(log);
-
/*
* Cycle all the iclogbuf locks to make sure all log IO completion
* is done before we tear down these buffers.
iclog = iclog->ic_next;
}
+ /*
+ * Destroy the CIL after waiting for iclog IO completion because an
+ * iclog EIO error will try to shut down the log, which accesses the
+ * CIL to wake up the waiters.
+ */
+ xlog_cil_destroy(log);
+
iclog = log->l_iclog;
for (i = 0; i < log->l_iclog_bufs; i++) {
next_iclog = iclog->ic_next;
*/
struct xfs_inodegc {
struct llist_head list;
- struct work_struct work;
+ struct delayed_work work;
/* approximate count of inodes in the list */
unsigned int items;
struct xfs_dquot *dqp;
int error;
- /* Flush inodegc work at the start of a quota reporting scan. */
+ /*
+ * Expedite pending inodegc work at the start of a quota reporting
+ * scan but don't block waiting for it to complete.
+ */
if (id == 0)
- xfs_inodegc_flush(mp);
+ xfs_inodegc_push(mp);
/*
* Try to get the dquot. We don't want it allocated on disk, so don't
/* Flush inodegc work at the start of a quota reporting scan. */
if (*id == 0)
- xfs_inodegc_flush(mp);
+ xfs_inodegc_push(mp);
error = xfs_qm_dqget_next(mp, *id, type, &dqp);
if (error)
xfs_extlen_t lsize;
int64_t ffree;
- /* Wait for whatever inactivations are in progress. */
- xfs_inodegc_flush(mp);
+ /*
+ * Expedite background inodegc but don't wait. We do not want to block
+ * here waiting hours for a billion extent file to be truncated.
+ */
+ xfs_inodegc_push(mp);
statp->f_type = XFS_SUPER_MAGIC;
statp->f_namelen = MAXNAMELEN - 1;
gc = per_cpu_ptr(mp->m_inodegc, cpu);
init_llist_head(&gc->list);
gc->items = 0;
- INIT_WORK(&gc->work, xfs_inodegc_worker);
+ INIT_DELAYED_WORK(&gc->work, xfs_inodegc_worker);
}
return 0;
}
TP_PROTO(struct xfs_mount *mp, void *caller_ip), \
TP_ARGS(mp, caller_ip))
DEFINE_FS_EVENT(xfs_inodegc_flush);
+DEFINE_FS_EVENT(xfs_inodegc_push);
DEFINE_FS_EVENT(xfs_inodegc_start);
DEFINE_FS_EVENT(xfs_inodegc_stop);
DEFINE_FS_EVENT(xfs_inodegc_queue);
xlog_drop_incompat_feat(mp->m_log);
}
+static inline bool
+xfs_attr_want_log_assist(
+ struct xfs_mount *mp)
+{
+#ifdef DEBUG
+ /* Logged xattrs require a V5 super for log_incompat */
+ return xfs_has_crc(mp) && xfs_globals.larp;
+#else
+ return false;
+#endif
+}
+
/*
* Set or remove an xattr, having grabbed the appropriate logging resources
* prior to calling libxfs.
bool use_logging = false;
int error;
- if (xfs_has_larp(mp)) {
+ ASSERT(!(args->op_flags & XFS_DA_OP_LOGGED));
+
+ if (xfs_attr_want_log_assist(mp)) {
error = xfs_attr_grab_log_assist(mp);
if (error)
return error;
+ args->op_flags |= XFS_DA_OP_LOGGED;
use_logging = true;
}
}
}
-static int zonefs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
- unsigned int flags, struct iomap *iomap,
- struct iomap *srcmap)
+static int zonefs_read_iomap_begin(struct inode *inode, loff_t offset,
+ loff_t length, unsigned int flags,
+ struct iomap *iomap, struct iomap *srcmap)
{
struct zonefs_inode_info *zi = ZONEFS_I(inode);
struct super_block *sb = inode->i_sb;
loff_t isize;
- /* All I/Os should always be within the file maximum size */
+ /*
+ * All blocks are always mapped below EOF. If reading past EOF,
+ * act as if there is a hole up to the file maximum size.
+ */
+ mutex_lock(&zi->i_truncate_mutex);
+ iomap->bdev = inode->i_sb->s_bdev;
+ iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize);
+ isize = i_size_read(inode);
+ if (iomap->offset >= isize) {
+ iomap->type = IOMAP_HOLE;
+ iomap->addr = IOMAP_NULL_ADDR;
+ iomap->length = length;
+ } else {
+ iomap->type = IOMAP_MAPPED;
+ iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset;
+ iomap->length = isize - iomap->offset;
+ }
+ mutex_unlock(&zi->i_truncate_mutex);
+
+ trace_zonefs_iomap_begin(inode, iomap);
+
+ return 0;
+}
+
+static const struct iomap_ops zonefs_read_iomap_ops = {
+ .iomap_begin = zonefs_read_iomap_begin,
+};
+
+static int zonefs_write_iomap_begin(struct inode *inode, loff_t offset,
+ loff_t length, unsigned int flags,
+ struct iomap *iomap, struct iomap *srcmap)
+{
+ struct zonefs_inode_info *zi = ZONEFS_I(inode);
+ struct super_block *sb = inode->i_sb;
+ loff_t isize;
+
+ /* All write I/Os should always be within the file maximum size */
if (WARN_ON_ONCE(offset + length > zi->i_max_size))
return -EIO;
* operation.
*/
if (WARN_ON_ONCE(zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
- (flags & IOMAP_WRITE) && !(flags & IOMAP_DIRECT)))
+ !(flags & IOMAP_DIRECT)))
return -EIO;
/*
* write pointer) and unwriten beyond.
*/
mutex_lock(&zi->i_truncate_mutex);
+ iomap->bdev = inode->i_sb->s_bdev;
+ iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize);
+ iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset;
isize = i_size_read(inode);
- if (offset >= isize)
+ if (iomap->offset >= isize) {
iomap->type = IOMAP_UNWRITTEN;
- else
+ iomap->length = zi->i_max_size - iomap->offset;
+ } else {
iomap->type = IOMAP_MAPPED;
- if (flags & IOMAP_WRITE)
- length = zi->i_max_size - offset;
- else
- length = min(length, isize - offset);
+ iomap->length = isize - iomap->offset;
+ }
mutex_unlock(&zi->i_truncate_mutex);
- iomap->offset = ALIGN_DOWN(offset, sb->s_blocksize);
- iomap->length = ALIGN(offset + length, sb->s_blocksize) - iomap->offset;
- iomap->bdev = inode->i_sb->s_bdev;
- iomap->addr = (zi->i_zsector << SECTOR_SHIFT) + iomap->offset;
-
trace_zonefs_iomap_begin(inode, iomap);
return 0;
}
-static const struct iomap_ops zonefs_iomap_ops = {
- .iomap_begin = zonefs_iomap_begin,
+static const struct iomap_ops zonefs_write_iomap_ops = {
+ .iomap_begin = zonefs_write_iomap_begin,
};
static int zonefs_read_folio(struct file *unused, struct folio *folio)
{
- return iomap_read_folio(folio, &zonefs_iomap_ops);
+ return iomap_read_folio(folio, &zonefs_read_iomap_ops);
}
static void zonefs_readahead(struct readahead_control *rac)
{
- iomap_readahead(rac, &zonefs_iomap_ops);
+ iomap_readahead(rac, &zonefs_read_iomap_ops);
}
/*
* Map blocks for page writeback. This is used only on conventional zone files,
* which implies that the page range can only be within the fixed inode size.
*/
-static int zonefs_map_blocks(struct iomap_writepage_ctx *wpc,
- struct inode *inode, loff_t offset)
+static int zonefs_write_map_blocks(struct iomap_writepage_ctx *wpc,
+ struct inode *inode, loff_t offset)
{
struct zonefs_inode_info *zi = ZONEFS_I(inode);
offset < wpc->iomap.offset + wpc->iomap.length)
return 0;
- return zonefs_iomap_begin(inode, offset, zi->i_max_size - offset,
- IOMAP_WRITE, &wpc->iomap, NULL);
+ return zonefs_write_iomap_begin(inode, offset, zi->i_max_size - offset,
+ IOMAP_WRITE, &wpc->iomap, NULL);
}
static const struct iomap_writeback_ops zonefs_writeback_ops = {
- .map_blocks = zonefs_map_blocks,
+ .map_blocks = zonefs_write_map_blocks,
};
static int zonefs_writepage(struct page *page, struct writeback_control *wbc)
return -EINVAL;
}
- return iomap_swapfile_activate(sis, swap_file, span, &zonefs_iomap_ops);
+ return iomap_swapfile_activate(sis, swap_file, span,
+ &zonefs_read_iomap_ops);
}
static const struct address_space_operations zonefs_file_aops = {
/* Serialize against truncates */
filemap_invalidate_lock_shared(inode->i_mapping);
- ret = iomap_page_mkwrite(vmf, &zonefs_iomap_ops);
+ ret = iomap_page_mkwrite(vmf, &zonefs_write_iomap_ops);
filemap_invalidate_unlock_shared(inode->i_mapping);
sb_end_pagefault(inode->i_sb);
if (append)
ret = zonefs_file_dio_append(iocb, from);
else
- ret = iomap_dio_rw(iocb, from, &zonefs_iomap_ops,
+ ret = iomap_dio_rw(iocb, from, &zonefs_write_iomap_ops,
&zonefs_write_dio_ops, 0, NULL, 0);
if (zi->i_ztype == ZONEFS_ZTYPE_SEQ &&
(ret > 0 || ret == -EIOCBQUEUED)) {
if (ret <= 0)
goto inode_unlock;
- ret = iomap_file_buffered_write(iocb, from, &zonefs_iomap_ops);
+ ret = iomap_file_buffered_write(iocb, from, &zonefs_write_iomap_ops);
if (ret > 0)
iocb->ki_pos += ret;
else if (ret == -EIO)
goto inode_unlock;
}
file_accessed(iocb->ki_filp);
- ret = iomap_dio_rw(iocb, to, &zonefs_iomap_ops,
+ ret = iomap_dio_rw(iocb, to, &zonefs_read_iomap_ops,
&zonefs_read_dio_ops, 0, NULL, 0);
} else {
ret = generic_file_read_iter(iocb, to);
if (sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) {
- if (wro > sbi->s_max_wro_seq_files) {
+ if (sbi->s_max_wro_seq_files
+ && wro > sbi->s_max_wro_seq_files) {
atomic_dec(&sbi->s_wro_seq_files);
ret = -EBUSY;
goto unlock;
atomic_set(&sbi->s_wro_seq_files, 0);
sbi->s_max_wro_seq_files = bdev_max_open_zones(sb->s_bdev);
- if (!sbi->s_max_wro_seq_files &&
- sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) {
- zonefs_info(sb, "No open zones limit. Ignoring explicit_open mount option\n");
- sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN;
- }
-
atomic_set(&sbi->s_active_seq_files, 0);
sbi->s_max_active_seq_files = bdev_max_active_zones(sb->s_bdev);
zonefs_info(sb, "Mounting %u zones",
blkdev_nr_zones(sb->s_bdev->bd_disk));
+ if (!sbi->s_max_wro_seq_files &&
+ !sbi->s_max_active_seq_files &&
+ sbi->s_mount_opts & ZONEFS_MNTOPT_EXPLICIT_OPEN) {
+ zonefs_info(sb,
+ "No open and active zone limits. Ignoring explicit_open mount option\n");
+ sbi->s_mount_opts &= ~ZONEFS_MNTOPT_EXPLICIT_OPEN;
+ }
+
/* Create root directory inode */
ret = -ENOMEM;
inode = new_inode(sb);
extern int acpi_get_psd_map(unsigned int cpu, struct cppc_cpudata *cpu_data);
extern unsigned int cppc_get_transition_latency(int cpu);
extern bool cpc_ffh_supported(void);
+extern bool cpc_supported_by_cpu(void);
extern int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val);
extern int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val);
#else /* !CONFIG_ACPI_CPPC_LIB */
mandatory-y += pci.h
mandatory-y += percpu.h
mandatory-y += pgalloc.h
+mandatory-y += platform-feature.h
mandatory-y += preempt.h
mandatory-y += rwonce.h
mandatory-y += sections.h
}
#endif
-#ifndef CONFIG_GENERIC_DEVMEM_IS_ALLOWED
extern int devmem_is_allowed(unsigned long pfn);
-#endif
#endif /* __KERNEL__ */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_PLATFORM_FEATURE_H
+#define _ASM_GENERIC_PLATFORM_FEATURE_H
+
+/* Number of arch specific feature flags. */
+#define PLATFORM_ARCH_FEAT_N 0
+
+#endif /* _ASM_GENERIC_PLATFORM_FEATURE_H */
* Useful if your architecture doesn't use IPIs for remote TLB invalidates
* and therefore doesn't naturally serialize with software page-table walkers.
*
+ * MMU_GATHER_NO_FLUSH_CACHE
+ *
+ * Indicates the architecture has flush_cache_range() but it needs *NOT* be called
+ * before unmapping a VMA.
+ *
+ * NOTE: strictly speaking we shouldn't have this knob and instead rely on
+ * flush_cache_range() being a NOP, except Sparc64 seems to be
+ * different here.
+ *
+ * MMU_GATHER_MERGE_VMAS
+ *
+ * Indicates the architecture wants to merge ranges over VMAs; typical when
+ * multiple range invalidates are more expensive than a full invalidate.
+ *
* MMU_GATHER_NO_RANGE
*
- * Use this if your architecture lacks an efficient flush_tlb_range().
+ * Use this if your architecture lacks an efficient flush_tlb_range(). This
+ * option implies MMU_GATHER_MERGE_VMAS above.
*
* MMU_GATHER_NO_GATHER
*
*/
unsigned int vma_exec : 1;
unsigned int vma_huge : 1;
+ unsigned int vma_pfn : 1;
unsigned int batch_count;
#ifdef CONFIG_MMU_GATHER_NO_RANGE
-#if defined(tlb_flush) || defined(tlb_start_vma) || defined(tlb_end_vma)
-#error MMU_GATHER_NO_RANGE relies on default tlb_flush(), tlb_start_vma() and tlb_end_vma()
+#if defined(tlb_flush)
+#error MMU_GATHER_NO_RANGE relies on default tlb_flush()
#endif
/*
flush_tlb_mm(tlb->mm);
}
-static inline void
-tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
-
-#define tlb_end_vma tlb_end_vma
-static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
-
#else /* CONFIG_MMU_GATHER_NO_RANGE */
#ifndef tlb_flush
-
-#if defined(tlb_start_vma) || defined(tlb_end_vma)
-#error Default tlb_flush() relies on default tlb_start_vma() and tlb_end_vma()
-#endif
-
/*
* When an architecture does not provide its own tlb_flush() implementation
* but does have a reasonably efficient flush_vma_range() implementation
flush_tlb_range(&vma, tlb->start, tlb->end);
}
}
+#endif
+
+#endif /* CONFIG_MMU_GATHER_NO_RANGE */
static inline void
tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma)
*/
tlb->vma_huge = is_vm_hugetlb_page(vma);
tlb->vma_exec = !!(vma->vm_flags & VM_EXEC);
+ tlb->vma_pfn = !!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP));
}
-#else
-
-static inline void
-tlb_update_vma_flags(struct mmu_gather *tlb, struct vm_area_struct *vma) { }
-
-#endif
-
-#endif /* CONFIG_MMU_GATHER_NO_RANGE */
-
static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
{
/*
* case where we're doing a full MM flush. When we're doing a munmap,
* the vmas are adjusted to only cover the region to be torn down.
*/
-#ifndef tlb_start_vma
static inline void tlb_start_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
{
if (tlb->fullmm)
return;
tlb_update_vma_flags(tlb, vma);
+#ifndef CONFIG_MMU_GATHER_NO_FLUSH_CACHE
flush_cache_range(vma, vma->vm_start, vma->vm_end);
-}
#endif
+}
-#ifndef tlb_end_vma
static inline void tlb_end_vma(struct mmu_gather *tlb, struct vm_area_struct *vma)
{
if (tlb->fullmm)
return;
/*
- * Do a TLB flush and reset the range at VMA boundaries; this avoids
- * the ranges growing with the unused space between consecutive VMAs,
- * but also the mmu_gather::vma_* flags from tlb_start_vma() rely on
- * this.
+ * VM_PFNMAP is more fragile because the core mm will not track the
+ * page mapcount -- there might not be page-frames for these PFNs after
+ * all. Force flush TLBs for such ranges to avoid munmap() vs
+ * unmap_mapping_range() races.
*/
- tlb_flush_mmu_tlbonly(tlb);
+ if (tlb->vma_pfn || !IS_ENABLED(CONFIG_MMU_GATHER_MERGE_VMAS)) {
+ /*
+ * Do a TLB flush and reset the range at VMA boundaries; this avoids
+ * the ranges growing with the unused space between consecutive VMAs.
+ */
+ tlb_flush_mmu_tlbonly(tlb);
+ }
}
-#endif
/*
* tlb_flush_{pte|pmd|pud|p4d}_range() adjust the tlb->start and tlb->end,
for ((__i) = 0; \
(__i) < (__state)->num_private_objs && \
((obj) = (__state)->private_objs[__i].ptr, \
+ (void)(obj) /* Only to avoid unused-but-set-variable warning */, \
(new_obj_state) = (__state)->private_objs[__i].new_state, 1); \
(__i)++)
#include <linux/dma-fence.h>
#include <linux/completion.h>
#include <linux/xarray.h>
-#include <linux/irq_work.h>
+#include <linux/workqueue.h>
#define MAX_WAIT_SCHED_ENTITY_Q_EMPTY msecs_to_jiffies(1000)
*/
union {
struct dma_fence_cb finish_cb;
- struct irq_work work;
+ struct work_struct work;
};
uint64_t id;
}
void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk);
-void ttm_lru_bulk_move_add(struct ttm_lru_bulk_move *bulk,
- struct ttm_resource *res);
-void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk,
- struct ttm_resource *res);
void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk);
+void ttm_resource_add_bulk_move(struct ttm_resource *res,
+ struct ttm_buffer_object *bo);
+void ttm_resource_del_bulk_move(struct ttm_resource *res,
+ struct ttm_buffer_object *bo);
void ttm_resource_move_to_lru_tail(struct ttm_resource *res);
void ttm_resource_init(struct ttm_buffer_object *bo,
#define R9A07G043_ADC_ADCLK 76
#define R9A07G043_ADC_PCLK 77
#define R9A07G043_TSU_PCLK 78
+#define R9A07G043_NCEPLDM_DM_CLK 79 /* RZ/Five Only */
+#define R9A07G043_NCEPLDM_ACLK 80 /* RZ/Five Only */
+#define R9A07G043_NCEPLDM_TCK 81 /* RZ/Five Only */
+#define R9A07G043_NCEPLMT_ACLK 82 /* RZ/Five Only */
+#define R9A07G043_NCEPLIC_ACLK 83 /* RZ/Five Only */
+#define R9A07G043_AX45MP_CORE0_CLK 84 /* RZ/Five Only */
+#define R9A07G043_AX45MP_ACLK 85 /* RZ/Five Only */
+#define R9A07G043_IAX45_CLK 86 /* RZ/Five Only */
+#define R9A07G043_IAX45_PCLK 87 /* RZ/Five Only */
/* R9A07G043 Resets */
#define R9A07G043_CA55_RST_1_0 0 /* RZ/G2UL Only */
#define R9A07G043_ADC_PRESETN 67
#define R9A07G043_ADC_ADRST_N 68
#define R9A07G043_TSU_PRESETN 69
+#define R9A07G043_NCEPLDM_DTM_PWR_RST_N 70 /* RZ/Five Only */
+#define R9A07G043_NCEPLDM_ARESETN 71 /* RZ/Five Only */
+#define R9A07G043_NCEPLMT_POR_RSTN 72 /* RZ/Five Only */
+#define R9A07G043_NCEPLMT_ARESETN 73 /* RZ/Five Only */
+#define R9A07G043_NCEPLIC_ARESETN 74 /* RZ/Five Only */
+#define R9A07G043_AX45MP_ARESETNM 75 /* RZ/Five Only */
+#define R9A07G043_AX45MP_ARESETNS 76 /* RZ/Five Only */
+#define R9A07G043_AX45MP_L2_RESETN 77 /* RZ/Five Only */
+#define R9A07G043_AX45MP_CORE0_RESETN 78 /* RZ/Five Only */
+#define R9A07G043_IAX45_RESETN 79 /* RZ/Five Only */
+
#endif /* __DT_BINDINGS_CLOCK_R9A07G043_CPG_H__ */
--- /dev/null
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/*
+ * Unisoc UMS512 SoC DTS file
+ *
+ * Copyright (C) 2022, Unisoc Inc.
+ */
+
+#ifndef _DT_BINDINGS_CLK_UMS512_H_
+#define _DT_BINDINGS_CLK_UMS512_H_
+
+#define CLK_26M_AUD 0
+#define CLK_13M 1
+#define CLK_6M5 2
+#define CLK_4M3 3
+#define CLK_2M 4
+#define CLK_1M 5
+#define CLK_250K 6
+#define CLK_RCO_25M 7
+#define CLK_RCO_4M 8
+#define CLK_RCO_2M 9
+#define CLK_ISPPLL_GATE 10
+#define CLK_DPLL0_GATE 11
+#define CLK_DPLL1_GATE 12
+#define CLK_LPLL_GATE 13
+#define CLK_TWPLL_GATE 14
+#define CLK_GPLL_GATE 15
+#define CLK_RPLL_GATE 16
+#define CLK_CPPLL_GATE 17
+#define CLK_MPLL0_GATE 18
+#define CLK_MPLL1_GATE 19
+#define CLK_MPLL2_GATE 20
+#define CLK_PMU_GATE_NUM (CLK_MPLL2_GATE + 1)
+
+#define CLK_DPLL0 0
+#define CLK_DPLL0_58M31 1
+#define CLK_ANLG_PHY_G0_NUM (CLK_DPLL0_58M31 + 1)
+
+#define CLK_MPLL1 0
+#define CLK_MPLL1_63M38 1
+#define CLK_ANLG_PHY_G2_NUM (CLK_MPLL1_63M38 + 1)
+
+#define CLK_RPLL 0
+#define CLK_AUDIO_GATE 1
+#define CLK_MPLL0 2
+#define CLK_MPLL0_56M88 3
+#define CLK_MPLL2 4
+#define CLK_MPLL2_47M13 5
+#define CLK_ANLG_PHY_G3_NUM (CLK_MPLL2_47M13 + 1)
+
+#define CLK_TWPLL 0
+#define CLK_TWPLL_768M 1
+#define CLK_TWPLL_384M 2
+#define CLK_TWPLL_192M 3
+#define CLK_TWPLL_96M 4
+#define CLK_TWPLL_48M 5
+#define CLK_TWPLL_24M 6
+#define CLK_TWPLL_12M 7
+#define CLK_TWPLL_512M 8
+#define CLK_TWPLL_256M 9
+#define CLK_TWPLL_128M 10
+#define CLK_TWPLL_64M 11
+#define CLK_TWPLL_307M2 12
+#define CLK_TWPLL_219M4 13
+#define CLK_TWPLL_170M6 14
+#define CLK_TWPLL_153M6 15
+#define CLK_TWPLL_76M8 16
+#define CLK_TWPLL_51M2 17
+#define CLK_TWPLL_38M4 18
+#define CLK_TWPLL_19M2 19
+#define CLK_TWPLL_12M29 20
+#define CLK_LPLL 21
+#define CLK_LPLL_614M4 22
+#define CLK_LPLL_409M6 23
+#define CLK_LPLL_245M76 24
+#define CLK_LPLL_30M72 25
+#define CLK_ISPPLL 26
+#define CLK_ISPPLL_468M 27
+#define CLK_ISPPLL_78M 28
+#define CLK_GPLL 29
+#define CLK_GPLL_40M 30
+#define CLK_CPPLL 31
+#define CLK_CPPLL_39M32 32
+#define CLK_ANLG_PHY_GC_NUM (CLK_CPPLL_39M32 + 1)
+
+#define CLK_AP_APB 0
+#define CLK_IPI 1
+#define CLK_AP_UART0 2
+#define CLK_AP_UART1 3
+#define CLK_AP_UART2 4
+#define CLK_AP_I2C0 5
+#define CLK_AP_I2C1 6
+#define CLK_AP_I2C2 7
+#define CLK_AP_I2C3 8
+#define CLK_AP_I2C4 9
+#define CLK_AP_SPI0 10
+#define CLK_AP_SPI1 11
+#define CLK_AP_SPI2 12
+#define CLK_AP_SPI3 13
+#define CLK_AP_IIS0 14
+#define CLK_AP_IIS1 15
+#define CLK_AP_IIS2 16
+#define CLK_AP_SIM 17
+#define CLK_AP_CE 18
+#define CLK_SDIO0_2X 19
+#define CLK_SDIO1_2X 20
+#define CLK_EMMC_2X 21
+#define CLK_VSP 22
+#define CLK_DISPC0 23
+#define CLK_DISPC0_DPI 24
+#define CLK_DSI_APB 25
+#define CLK_DSI_RXESC 26
+#define CLK_DSI_LANEBYTE 27
+#define CLK_VDSP 28
+#define CLK_VDSP_M 29
+#define CLK_AP_CLK_NUM (CLK_VDSP_M + 1)
+
+#define CLK_DSI_EB 0
+#define CLK_DISPC_EB 1
+#define CLK_VSP_EB 2
+#define CLK_VDMA_EB 3
+#define CLK_DMA_PUB_EB 4
+#define CLK_DMA_SEC_EB 5
+#define CLK_IPI_EB 6
+#define CLK_AHB_CKG_EB 7
+#define CLK_BM_CLK_EB 8
+#define CLK_AP_AHB_GATE_NUM (CLK_BM_CLK_EB + 1)
+
+#define CLK_AON_APB 0
+#define CLK_ADI 1
+#define CLK_AUX0 2
+#define CLK_AUX1 3
+#define CLK_AUX2 4
+#define CLK_PROBE 5
+#define CLK_PWM0 6
+#define CLK_PWM1 7
+#define CLK_PWM2 8
+#define CLK_PWM3 9
+#define CLK_EFUSE 10
+#define CLK_UART0 11
+#define CLK_UART1 12
+#define CLK_THM0 13
+#define CLK_THM1 14
+#define CLK_THM2 15
+#define CLK_THM3 16
+#define CLK_AON_I2C 17
+#define CLK_AON_IIS 18
+#define CLK_SCC 19
+#define CLK_APCPU_DAP 20
+#define CLK_APCPU_DAP_MTCK 21
+#define CLK_APCPU_TS 22
+#define CLK_DEBUG_TS 23
+#define CLK_DSI_TEST_S 24
+#define CLK_DJTAG_TCK 25
+#define CLK_DJTAG_TCK_HW 26
+#define CLK_AON_TMR 27
+#define CLK_AON_PMU 28
+#define CLK_DEBOUNCE 29
+#define CLK_APCPU_PMU 30
+#define CLK_TOP_DVFS 31
+#define CLK_OTG_UTMI 32
+#define CLK_OTG_REF 33
+#define CLK_CSSYS 34
+#define CLK_CSSYS_PUB 35
+#define CLK_CSSYS_APB 36
+#define CLK_AP_AXI 37
+#define CLK_AP_MM 38
+#define CLK_SDIO2_2X 39
+#define CLK_ANALOG_IO_APB 40
+#define CLK_DMC_REF_CLK 41
+#define CLK_EMC 42
+#define CLK_USB 43
+#define CLK_26M_PMU 44
+#define CLK_AON_APB_NUM (CLK_26M_PMU + 1)
+
+#define CLK_MM_AHB 0
+#define CLK_MM_MTX 1
+#define CLK_SENSOR0 2
+#define CLK_SENSOR1 3
+#define CLK_SENSOR2 4
+#define CLK_CPP 5
+#define CLK_JPG 6
+#define CLK_FD 7
+#define CLK_DCAM_IF 8
+#define CLK_DCAM_AXI 9
+#define CLK_ISP 10
+#define CLK_MIPI_CSI0 11
+#define CLK_MIPI_CSI1 12
+#define CLK_MIPI_CSI2 13
+#define CLK_MM_CLK_NUM (CLK_MIPI_CSI2 + 1)
+
+#define CLK_RC100M_CAL_EB 0
+#define CLK_DJTAG_TCK_EB 1
+#define CLK_DJTAG_EB 2
+#define CLK_AUX0_EB 3
+#define CLK_AUX1_EB 4
+#define CLK_AUX2_EB 5
+#define CLK_PROBE_EB 6
+#define CLK_MM_EB 7
+#define CLK_GPU_EB 8
+#define CLK_MSPI_EB 9
+#define CLK_APCPU_DAP_EB 10
+#define CLK_AON_CSSYS_EB 11
+#define CLK_CSSYS_APB_EB 12
+#define CLK_CSSYS_PUB_EB 13
+#define CLK_SDPHY_CFG_EB 14
+#define CLK_SDPHY_REF_EB 15
+#define CLK_EFUSE_EB 16
+#define CLK_GPIO_EB 17
+#define CLK_MBOX_EB 18
+#define CLK_KPD_EB 19
+#define CLK_AON_SYST_EB 20
+#define CLK_AP_SYST_EB 21
+#define CLK_AON_TMR_EB 22
+#define CLK_OTG_UTMI_EB 23
+#define CLK_OTG_PHY_EB 24
+#define CLK_SPLK_EB 25
+#define CLK_PIN_EB 26
+#define CLK_ANA_EB 27
+#define CLK_APCPU_TS0_EB 28
+#define CLK_APB_BUSMON_EB 29
+#define CLK_AON_IIS_EB 30
+#define CLK_SCC_EB 31
+#define CLK_THM0_EB 32
+#define CLK_THM1_EB 33
+#define CLK_THM2_EB 34
+#define CLK_ASIM_TOP_EB 35
+#define CLK_I2C_EB 36
+#define CLK_PMU_EB 37
+#define CLK_ADI_EB 38
+#define CLK_EIC_EB 39
+#define CLK_AP_INTC0_EB 40
+#define CLK_AP_INTC1_EB 41
+#define CLK_AP_INTC2_EB 42
+#define CLK_AP_INTC3_EB 43
+#define CLK_AP_INTC4_EB 44
+#define CLK_AP_INTC5_EB 45
+#define CLK_AUDCP_INTC_EB 46
+#define CLK_AP_TMR0_EB 47
+#define CLK_AP_TMR1_EB 48
+#define CLK_AP_TMR2_EB 49
+#define CLK_PWM0_EB 50
+#define CLK_PWM1_EB 51
+#define CLK_PWM2_EB 52
+#define CLK_PWM3_EB 53
+#define CLK_AP_WDG_EB 54
+#define CLK_APCPU_WDG_EB 55
+#define CLK_SERDES_EB 56
+#define CLK_ARCH_RTC_EB 57
+#define CLK_KPD_RTC_EB 58
+#define CLK_AON_SYST_RTC_EB 59
+#define CLK_AP_SYST_RTC_EB 60
+#define CLK_AON_TMR_RTC_EB 61
+#define CLK_EIC_RTC_EB 62
+#define CLK_EIC_RTCDV5_EB 63
+#define CLK_AP_WDG_RTC_EB 64
+#define CLK_AC_WDG_RTC_EB 65
+#define CLK_AP_TMR0_RTC_EB 66
+#define CLK_AP_TMR1_RTC_EB 67
+#define CLK_AP_TMR2_RTC_EB 68
+#define CLK_DCXO_LC_RTC_EB 69
+#define CLK_BB_CAL_RTC_EB 70
+#define CLK_AP_EMMC_RTC_EB 71
+#define CLK_AP_SDIO0_RTC_EB 72
+#define CLK_AP_SDIO1_RTC_EB 73
+#define CLK_AP_SDIO2_RTC_EB 74
+#define CLK_DSI_CSI_TEST_EB 75
+#define CLK_DJTAG_TCK_EN 76
+#define CLK_DPHY_REF_EB 77
+#define CLK_DMC_REF_EB 78
+#define CLK_OTG_REF_EB 79
+#define CLK_TSEN_EB 80
+#define CLK_TMR_EB 81
+#define CLK_RC100M_REF_EB 82
+#define CLK_RC100M_FDK_EB 83
+#define CLK_DEBOUNCE_EB 84
+#define CLK_DET_32K_EB 85
+#define CLK_TOP_CSSYS_EB 86
+#define CLK_AP_AXI_EN 87
+#define CLK_SDIO0_2X_EN 88
+#define CLK_SDIO0_1X_EN 89
+#define CLK_SDIO1_2X_EN 90
+#define CLK_SDIO1_1X_EN 91
+#define CLK_SDIO2_2X_EN 92
+#define CLK_SDIO2_1X_EN 93
+#define CLK_EMMC_2X_EN 94
+#define CLK_EMMC_1X_EN 95
+#define CLK_PLL_TEST_EN 96
+#define CLK_CPHY_CFG_EN 97
+#define CLK_DEBUG_TS_EN 98
+#define CLK_ACCESS_AUD_EN 99
+#define CLK_AON_APB_GATE_NUM (CLK_ACCESS_AUD_EN + 1)
+
+#define CLK_MM_CPP_EB 0
+#define CLK_MM_JPG_EB 1
+#define CLK_MM_DCAM_EB 2
+#define CLK_MM_ISP_EB 3
+#define CLK_MM_CSI2_EB 4
+#define CLK_MM_CSI1_EB 5
+#define CLK_MM_CSI0_EB 6
+#define CLK_MM_CKG_EB 7
+#define CLK_ISP_AHB_EB 8
+#define CLK_MM_DVFS_EB 9
+#define CLK_MM_FD_EB 10
+#define CLK_MM_SENSOR2_EB 11
+#define CLK_MM_SENSOR1_EB 12
+#define CLK_MM_SENSOR0_EB 13
+#define CLK_MM_MIPI_CSI2_EB 14
+#define CLK_MM_MIPI_CSI1_EB 15
+#define CLK_MM_MIPI_CSI0_EB 16
+#define CLK_DCAM_AXI_EB 17
+#define CLK_ISP_AXI_EB 18
+#define CLK_MM_CPHY_EB 19
+#define CLK_MM_GATE_CLK_NUM (CLK_MM_CPHY_EB + 1)
+
+#define CLK_SIM0_EB 0
+#define CLK_IIS0_EB 1
+#define CLK_IIS1_EB 2
+#define CLK_IIS2_EB 3
+#define CLK_APB_REG_EB 4
+#define CLK_SPI0_EB 5
+#define CLK_SPI1_EB 6
+#define CLK_SPI2_EB 7
+#define CLK_SPI3_EB 8
+#define CLK_I2C0_EB 9
+#define CLK_I2C1_EB 10
+#define CLK_I2C2_EB 11
+#define CLK_I2C3_EB 12
+#define CLK_I2C4_EB 13
+#define CLK_UART0_EB 14
+#define CLK_UART1_EB 15
+#define CLK_UART2_EB 16
+#define CLK_SIM0_32K_EB 17
+#define CLK_SPI0_LFIN_EB 18
+#define CLK_SPI1_LFIN_EB 19
+#define CLK_SPI2_LFIN_EB 20
+#define CLK_SPI3_LFIN_EB 21
+#define CLK_SDIO0_EB 22
+#define CLK_SDIO1_EB 23
+#define CLK_SDIO2_EB 24
+#define CLK_EMMC_EB 25
+#define CLK_SDIO0_32K_EB 26
+#define CLK_SDIO1_32K_EB 27
+#define CLK_SDIO2_32K_EB 28
+#define CLK_EMMC_32K_EB 29
+#define CLK_AP_APB_GATE_NUM (CLK_EMMC_32K_EB + 1)
+
+#define CLK_GPU_CORE_EB 0
+#define CLK_GPU_CORE 1
+#define CLK_GPU_MEM_EB 2
+#define CLK_GPU_MEM 3
+#define CLK_GPU_SYS_EB 4
+#define CLK_GPU_SYS 5
+#define CLK_GPU_CLK_NUM (CLK_GPU_SYS + 1)
+
+#define CLK_AUDCP_IIS0_EB 0
+#define CLK_AUDCP_IIS1_EB 1
+#define CLK_AUDCP_IIS2_EB 2
+#define CLK_AUDCP_UART_EB 3
+#define CLK_AUDCP_DMA_CP_EB 4
+#define CLK_AUDCP_DMA_AP_EB 5
+#define CLK_AUDCP_SRC48K_EB 6
+#define CLK_AUDCP_MCDT_EB 7
+#define CLK_AUDCP_VBCIFD_EB 8
+#define CLK_AUDCP_VBC_EB 9
+#define CLK_AUDCP_SPLK_EB 10
+#define CLK_AUDCP_ICU_EB 11
+#define CLK_AUDCP_DMA_AP_ASHB_EB 12
+#define CLK_AUDCP_DMA_CP_ASHB_EB 13
+#define CLK_AUDCP_AUD_EB 14
+#define CLK_AUDCP_VBC_24M_EB 15
+#define CLK_AUDCP_TMR_26M_EB 16
+#define CLK_AUDCP_DVFS_ASHB_EB 17
+#define CLK_AUDCP_AHB_GATE_NUM (CLK_AUDCP_DVFS_ASHB_EB + 1)
+
+#define CLK_AUDCP_WDG_EB 0
+#define CLK_AUDCP_RTC_WDG_EB 1
+#define CLK_AUDCP_TMR0_EB 2
+#define CLK_AUDCP_TMR1_EB 3
+#define CLK_AUDCP_APB_GATE_NUM (CLK_AUDCP_TMR1_EB + 1)
+
+#define CLK_ACORE0 0
+#define CLK_ACORE1 1
+#define CLK_ACORE2 2
+#define CLK_ACORE3 3
+#define CLK_ACORE4 4
+#define CLK_ACORE5 5
+#define CLK_PCORE0 6
+#define CLK_PCORE1 7
+#define CLK_SCU 8
+#define CLK_ACE 9
+#define CLK_PERIPH 10
+#define CLK_GIC 11
+#define CLK_ATB 12
+#define CLK_DEBUG_APB 13
+#define CLK_APCPU_SEC_NUM (CLK_DEBUG_APB + 1)
+
+#endif /* _DT_BINDINGS_CLK_UMS512_H_ */
#ifndef _DT_BINDINGS_RESET_CONTROLLER_MT8186
#define _DT_BINDINGS_RESET_CONTROLLER_MT8186
+/* TOPRGU resets */
#define MT8186_TOPRGU_INFRA_SW_RST 0
#define MT8186_TOPRGU_MM_SW_RST 1
#define MT8186_TOPRGU_MFG_SW_RST 2
/* MMSYS resets */
#define MT8186_MMSYS_SW0_RST_B_DISP_DSI0 19
+/* INFRA resets */
+#define MT8186_INFRA_THERMAL_CTRL_RST 0
+#define MT8186_INFRA_PTP_CTRL_RST 1
+
#endif /* _DT_BINDINGS_RESET_CONTROLLER_MT8186 */
#ifndef _DT_BINDINGS_RESET_CONTROLLER_MT8192
#define _DT_BINDINGS_RESET_CONTROLLER_MT8192
+/* TOPRGU resets */
#define MT8192_TOPRGU_MM_SW_RST 1
#define MT8192_TOPRGU_MFG_SW_RST 2
#define MT8192_TOPRGU_VENC_SW_RST 3
/* MMSYS resets */
#define MT8192_MMSYS_SW0_RST_B_DISP_DSI0 15
+/* INFRA resets */
+#define MT8192_INFRA_RST0_THERM_CTRL_SWRST 0
+#define MT8192_INFRA_RST2_PEXTP_PHY_SWRST 1
+#define MT8192_INFRA_RST3_THERM_CTRL_PTP_SWRST 2
+#define MT8192_INFRA_RST4_PCIE_TOP_SWRST 3
+#define MT8192_INFRA_RST4_THERM_CTRL_MCU_SWRST 4
+
#endif /* _DT_BINDINGS_RESET_CONTROLLER_MT8192 */
#ifndef _DT_BINDINGS_RESET_CONTROLLER_MT8195
#define _DT_BINDINGS_RESET_CONTROLLER_MT8195
+/* TOPRGU resets */
#define MT8195_TOPRGU_CONN_MCU_SW_RST 0
#define MT8195_TOPRGU_INFRA_GRST_SW_RST 1
#define MT8195_TOPRGU_APU_SW_RST 2
#define MT8195_TOPRGU_SW_RST_NUM 16
+/* INFRA resets */
+#define MT8195_INFRA_RST0_THERM_CTRL_SWRST 0
+#define MT8195_INFRA_RST3_THERM_CTRL_PTP_SWRST 1
+#define MT8195_INFRA_RST4_THERM_CTRL_MCU_SWRST 2
+
#endif /* _DT_BINDINGS_RESET_CONTROLLER_MT8195 */
const struct asymmetric_key_id *id_2,
bool partial);
+int x509_load_certificate_list(const u8 cert_list[], const unsigned long list_size,
+ const struct key *keyring);
+
/*
* The payload is at the discretion of the subtype.
*/
extern bool osc_sb_apei_support_acked;
extern bool osc_pc_lpi_support_confirmed;
extern bool osc_sb_native_usb4_support_confirmed;
-extern bool osc_sb_cppc_not_supported;
+extern bool osc_sb_cppc2_support_acked;
extern bool osc_cpc_flexible_adr_space_confirmed;
/* USB4 Capabilities */
extern struct backing_dev_info noop_backing_dev_info;
+int bdi_init(struct backing_dev_info *bdi);
+
/**
* writeback_in_progress - determine whether there is writeback in progress
* @wb: bdi_writeback of interest
extern int bioset_init(struct bio_set *, unsigned int, unsigned int, int flags);
extern void bioset_exit(struct bio_set *);
extern int biovec_init_pool(mempool_t *pool, int pool_entries);
-extern int bioset_init_from_src(struct bio_set *bs, struct bio_set *src);
struct bio *bio_alloc_bioset(struct block_device *bdev, unsigned short nr_vecs,
unsigned int opf, gfp_t gfp_mask,
*/
struct blk_independent_access_range {
struct kobject kobj;
- struct request_queue *queue;
sector_t sector;
sector_t nr_sectors;
};
#endif /* CONFIG_BLK_DEV_ZONED */
int node;
- struct mutex debugfs_mutex;
#ifdef CONFIG_BLK_DEV_IO_TRACE
struct blk_trace __rcu *blk_trace;
#endif
struct bio_set bio_split;
struct dentry *debugfs_dir;
-
-#ifdef CONFIG_BLK_DEBUG_FS
struct dentry *sched_debugfs_dir;
struct dentry *rqos_debugfs_dir;
-#endif
+ /*
+ * Serializes all debugfs metadata operations using the above dentries.
+ */
+ struct mutex debugfs_mutex;
bool mq_sysfs_init_done;
#define QUEUE_FLAG_RQ_ALLOC_TIME 27 /* record rq->alloc_time_ns */
#define QUEUE_FLAG_HCTX_ACTIVE 28 /* at least one blk-mq hctx is active */
#define QUEUE_FLAG_NOWAIT 29 /* device supports NOWAIT */
+#define QUEUE_FLAG_SQ_SCHED 30 /* single queue style io dispatch */
#define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_SAME_COMP) | \
#define blk_queue_pm_only(q) atomic_read(&(q)->pm_only)
#define blk_queue_registered(q) test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
#define blk_queue_nowait(q) test_bit(QUEUE_FLAG_NOWAIT, &(q)->queue_flags)
+#define blk_queue_sq_sched(q) test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags)
extern void blk_set_pm_only(struct request_queue *q);
extern void blk_clear_pm_only(struct request_queue *q);
*/
/* Supports zoned block devices sequential write constraint */
#define ELEVATOR_F_ZBD_SEQ_WRITE (1U << 0)
-/* Supports scheduling on multiple hardware queues */
-#define ELEVATOR_F_MQ_AWARE (1U << 1)
extern void blk_queue_required_elevator_features(struct request_queue *q,
unsigned int features);
* List of csets participating in the on-going migration either as
* source or destination. Protected by cgroup_mutex.
*/
- struct list_head mg_preload_node;
+ struct list_head mg_src_preload_node;
+ struct list_head mg_dst_preload_node;
struct list_head mg_node;
/*
__devm_clk_hw_register_divider((dev), NULL, (name), (parent_name), NULL, \
NULL, (flags), (reg), (shift), (width), \
(clk_divider_flags), NULL, (lock))
+/**
+ * devm_clk_hw_register_divider_parent_hw - register a divider clock with the clock framework
+ * @dev: device registering this clock
+ * @name: name of this clock
+ * @parent_hw: pointer to parent clk
+ * @flags: framework-specific flags
+ * @reg: register address to adjust divider
+ * @shift: number of bits to shift the bitfield
+ * @width: width of the bitfield
+ * @clk_divider_flags: divider-specific flags for this clock
+ * @lock: shared register lock for this clock
+ */
+#define devm_clk_hw_register_divider_parent_hw(dev, name, parent_hw, flags, \
+ reg, shift, width, \
+ clk_divider_flags, lock) \
+ __devm_clk_hw_register_divider((dev), NULL, (name), NULL, \
+ (parent_hw), NULL, (flags), (reg), \
+ (shift), (width), (clk_divider_flags), \
+ NULL, (lock))
/**
* devm_clk_hw_register_divider_table - register a table based divider clock
* with the clock framework (devres variant)
(parent_names), NULL, NULL, (flags), (reg), \
(shift), BIT((width)) - 1, (clk_mux_flags), \
NULL, (lock))
+#define devm_clk_hw_register_mux_parent_hws(dev, name, parent_hws, \
+ num_parents, flags, reg, shift, \
+ width, clk_mux_flags, lock) \
+ __devm_clk_hw_register_mux((dev), NULL, (name), (num_parents), NULL, \
+ (parent_hws), NULL, (flags), (reg), \
+ (shift), BIT((width)) - 1, \
+ (clk_mux_flags), NULL, (lock))
int clk_mux_val_to_index(struct clk_hw *hw, const u32 *table, unsigned int flags,
unsigned int val);
struct clk_hw *devm_clk_hw_register_fixed_factor_index(struct device *dev,
const char *name, unsigned int index, unsigned long flags,
unsigned int mult, unsigned int div);
+
+struct clk_hw *devm_clk_hw_register_fixed_factor_parent_hw(struct device *dev,
+ const char *name, const struct clk_hw *parent_hw,
+ unsigned long flags, unsigned int mult, unsigned int div);
+
+struct clk_hw *clk_hw_register_fixed_factor_parent_hw(struct device *dev,
+ const char *name, const struct clk_hw *parent_hw,
+ unsigned long flags, unsigned int mult, unsigned int div);
/**
* struct clk_fractional_divider - adjustable fractional divider clock
*
int __must_check of_clk_hw_register(struct device_node *node, struct clk_hw *hw);
void clk_unregister(struct clk *clk);
-void devm_clk_unregister(struct device *dev, struct clk *clk);
void clk_hw_unregister(struct clk_hw *hw);
-void devm_clk_hw_unregister(struct device *dev, struct clk_hw *hw);
/* helper functions */
const char *__clk_get_name(const struct clk *clk);
* @dev: device for clock "consumer"
* @id: clock consumer ID
*
- * Returns a struct clk corresponding to the clock producer, or
+ * Context: May sleep.
+ *
+ * Return: a struct clk corresponding to the clock producer, or
* valid IS_ERR() condition containing errno. The implementation
* uses @dev and @id to determine the clock consumer, and thereby
* the clock producer. (IOW, @id may be identical strings, but
* clk_get may return different clock producers depending on @dev.)
*
- * Drivers must assume that the clock source is not enabled.
- *
- * devm_clk_get should not be called from within interrupt context.
+ * Drivers must assume that the clock source is neither prepared nor
+ * enabled.
*
* The clock will automatically be freed when the device is unbound
* from the bus.
*/
struct clk *devm_clk_get(struct device *dev, const char *id);
+/**
+ * devm_clk_get_prepared - devm_clk_get() + clk_prepare()
+ * @dev: device for clock "consumer"
+ * @id: clock consumer ID
+ *
+ * Context: May sleep.
+ *
+ * Return: a struct clk corresponding to the clock producer, or
+ * valid IS_ERR() condition containing errno. The implementation
+ * uses @dev and @id to determine the clock consumer, and thereby
+ * the clock producer. (IOW, @id may be identical strings, but
+ * clk_get may return different clock producers depending on @dev.)
+ *
+ * The returned clk (if valid) is prepared. Drivers must however assume
+ * that the clock is not enabled.
+ *
+ * The clock will automatically be unprepared and freed when the device
+ * is unbound from the bus.
+ */
+struct clk *devm_clk_get_prepared(struct device *dev, const char *id);
+
+/**
+ * devm_clk_get_enabled - devm_clk_get() + clk_prepare_enable()
+ * @dev: device for clock "consumer"
+ * @id: clock consumer ID
+ *
+ * Context: May sleep.
+ *
+ * Return: a struct clk corresponding to the clock producer, or
+ * valid IS_ERR() condition containing errno. The implementation
+ * uses @dev and @id to determine the clock consumer, and thereby
+ * the clock producer. (IOW, @id may be identical strings, but
+ * clk_get may return different clock producers depending on @dev.)
+ *
+ * The returned clk (if valid) is prepared and enabled.
+ *
+ * The clock will automatically be disabled, unprepared and freed
+ * when the device is unbound from the bus.
+ */
+struct clk *devm_clk_get_enabled(struct device *dev, const char *id);
+
/**
* devm_clk_get_optional - lookup and obtain a managed reference to an optional
* clock producer.
* @dev: device for clock "consumer"
* @id: clock consumer ID
*
- * Behaves the same as devm_clk_get() except where there is no clock producer.
- * In this case, instead of returning -ENOENT, the function returns NULL.
+ * Context: May sleep.
+ *
+ * Return: a struct clk corresponding to the clock producer, or
+ * valid IS_ERR() condition containing errno. The implementation
+ * uses @dev and @id to determine the clock consumer, and thereby
+ * the clock producer. If no such clk is found, it returns NULL
+ * which serves as a dummy clk. That's the only difference compared
+ * to devm_clk_get().
+ *
+ * Drivers must assume that the clock source is neither prepared nor
+ * enabled.
+ *
+ * The clock will automatically be freed when the device is unbound
+ * from the bus.
*/
struct clk *devm_clk_get_optional(struct device *dev, const char *id);
+/**
+ * devm_clk_get_optional_prepared - devm_clk_get_optional() + clk_prepare()
+ * @dev: device for clock "consumer"
+ * @id: clock consumer ID
+ *
+ * Context: May sleep.
+ *
+ * Return: a struct clk corresponding to the clock producer, or
+ * valid IS_ERR() condition containing errno. The implementation
+ * uses @dev and @id to determine the clock consumer, and thereby
+ * the clock producer. If no such clk is found, it returns NULL
+ * which serves as a dummy clk. That's the only difference compared
+ * to devm_clk_get_prepared().
+ *
+ * The returned clk (if valid) is prepared. Drivers must however
+ * assume that the clock is not enabled.
+ *
+ * The clock will automatically be unprepared and freed when the
+ * device is unbound from the bus.
+ */
+struct clk *devm_clk_get_optional_prepared(struct device *dev, const char *id);
+
+/**
+ * devm_clk_get_optional_enabled - devm_clk_get_optional() +
+ * clk_prepare_enable()
+ * @dev: device for clock "consumer"
+ * @id: clock consumer ID
+ *
+ * Context: May sleep.
+ *
+ * Return: a struct clk corresponding to the clock producer, or
+ * valid IS_ERR() condition containing errno. The implementation
+ * uses @dev and @id to determine the clock consumer, and thereby
+ * the clock producer. If no such clk is found, it returns NULL
+ * which serves as a dummy clk. That's the only difference compared
+ * to devm_clk_get_enabled().
+ *
+ * The returned clk (if valid) is prepared and enabled.
+ *
+ * The clock will automatically be disabled, unprepared and freed
+ * when the device is unbound from the bus.
+ */
+struct clk *devm_clk_get_optional_enabled(struct device *dev, const char *id);
+
/**
* devm_get_clk_from_child - lookup and obtain a managed reference to a
* clock producer from child node.
return NULL;
}
+static inline struct clk *devm_clk_get_prepared(struct device *dev,
+ const char *id)
+{
+ return NULL;
+}
+
+static inline struct clk *devm_clk_get_enabled(struct device *dev,
+ const char *id)
+{
+ return NULL;
+}
+
static inline struct clk *devm_clk_get_optional(struct device *dev,
const char *id)
{
return NULL;
}
+static inline struct clk *devm_clk_get_optional_prepared(struct device *dev,
+ const char *id)
+{
+ return NULL;
+}
+
+static inline struct clk *devm_clk_get_optional_enabled(struct device *dev,
+ const char *id)
+{
+ return NULL;
+}
+
static inline int __must_check devm_clk_bulk_get(struct device *dev, int num_clks,
struct clk_bulk_data *clks)
{
/* context/locking */
# define __must_hold(x) __attribute__((context(x,1,1)))
# define __acquires(x) __attribute__((context(x,0,1)))
+# define __cond_acquires(x) __attribute__((context(x,0,-1)))
# define __releases(x) __attribute__((context(x,1,0)))
# define __acquire(x) __context__(x,1)
# define __release(x) __context__(x,-1)
/* context/locking */
# define __must_hold(x)
# define __acquires(x)
+# define __cond_acquires(x)
# define __releases(x)
# define __acquire(x) (void)0
# define __release(x) (void)0
#include <linux/atomic.h>
#include <linux/types.h>
-#include <linux/mutex.h>
struct vc_data;
struct console_font_op;
uint ospeed;
u64 seq;
unsigned long dropped;
- struct task_struct *thread;
- bool blocked;
-
- /*
- * The per-console lock is used by printing kthreads to synchronize
- * this console with callers of console_lock(). This is necessary in
- * order to allow printing kthreads to run in parallel to each other,
- * while each safely accessing the @blocked field and synchronizing
- * against direct printing via console_lock/console_unlock.
- *
- * Note: For synchronizing against direct printing via
- * console_trylock/console_unlock, see the static global
- * variable @console_kthreads_active.
- */
- struct mutex lock;
-
void *data;
struct console *next;
};
extern ssize_t cpu_show_itlb_multihit(struct device *dev,
struct device_attribute *attr, char *buf);
extern ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char *buf);
+extern ssize_t cpu_show_mmio_stale_data(struct device *dev,
+ struct device_attribute *attr,
+ char *buf);
+extern ssize_t cpu_show_retbleed(struct device *dev,
+ struct device_attribute *attr, char *buf);
extern __printf(4, 5)
struct device *cpu_device_create(struct device *parent, void *drvdata,
CPUHP_ZCOMP_PREPARE,
CPUHP_TIMERS_PREPARE,
CPUHP_MIPS_SOC_PREPARE,
- CPUHP_LOONGARCH_SOC_PREPARE,
CPUHP_BP_PREPARE_DYN,
CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20,
CPUHP_BRINGUP_CPU,
*
* Implements the standard CRC ITU-T V.41:
* Width 16
- * Poly 0x1021 (x^16 + x^12 + x^15 + 1)
+ * Poly 0x1021 (x^16 + x^12 + x^5 + 1)
* Init 0
*/
* reevaluate operable frequencies. Devfreq users may use
* devfreq.nb to the corresponding register notifier call chain.
* @work: delayed work for load monitoring.
+ * @freq_table: current frequency table used by the devfreq driver.
+ * @max_state: count of entry present in the frequency table.
* @previous_freq: previously configured frequency value.
* @last_status: devfreq user device info, performance statistics
* @data: Private data of the governor. The devfreq framework does not
struct notifier_block nb;
struct delayed_work work;
+ unsigned long *freq_table;
+ unsigned int max_state;
+
unsigned long previous_freq;
struct devfreq_dev_status last_status;
* We consider 10% difference as significant.
*/
#define IS_SIGNIFICANT_DIFF(val, ref) \
- (((100UL * abs((val) - (ref))) / (ref)) > 10)
+ ((ref) && (((100UL * abs((val) - (ref))) / (ref)) > 10))
/*
* Calculate the gap between two values.
FANOTIFY_PERM_EVENTS | \
FAN_Q_OVERFLOW | FAN_ONDIR)
+/* Events and flags relevant only for directories */
+#define FANOTIFY_DIRONLY_EVENT_BITS (FANOTIFY_DIRENT_EVENTS | \
+ FAN_EVENT_ON_CHILD | FAN_ONDIR)
+
#define ALL_FANOTIFY_EVENT_BITS (FANOTIFY_OUTGOING_EVENTS | \
FANOTIFY_EVENT_FLAGS)
void fbcon_get_requirement(struct fb_info *info,
struct fb_blit_caps *caps);
void fbcon_fb_blanked(struct fb_info *info, int blank);
+int fbcon_modechange_possible(struct fb_info *info,
+ struct fb_var_screeninfo *var);
void fbcon_update_vcs(struct fb_info *info, bool all);
void fbcon_remap_all(struct fb_info *info);
int fbcon_set_con2fb_map_ioctl(void __user *argp);
static inline void fbcon_get_requirement(struct fb_info *info,
struct fb_blit_caps *caps) {}
static inline void fbcon_fb_blanked(struct fb_info *info, int blank) {}
+static inline int fbcon_modechange_possible(struct fb_info *info,
+ struct fb_var_screeninfo *var) { return 0; }
static inline void fbcon_update_vcs(struct fb_info *info, bool all) {}
static inline void fbcon_remap_all(struct fb_info *info) {}
static inline int fbcon_set_con2fb_map_ioctl(void __user *argp) { return 0; }
#define FSCACHE_COOKIE_DO_PREP_TO_WRITE 12 /* T if cookie needs write preparation */
#define FSCACHE_COOKIE_HAVE_DATA 13 /* T if this cookie has data stored */
#define FSCACHE_COOKIE_IS_HASHED 14 /* T if this cookie is hashed */
+#define FSCACHE_COOKIE_DO_INVALIDATE 15 /* T if cookie needs invalidation */
enum fscache_cookie_state state;
u8 advice; /* FSCACHE_ADV_* */
*/
irq_flow_handler_t parent_handler;
- /**
- * @parent_handler_data:
- *
- * If @per_parent_data is false, @parent_handler_data is a single
- * pointer used as the data associated with every parent interrupt.
- *
- * @parent_handler_data_array:
- *
- * If @per_parent_data is true, @parent_handler_data_array is
- * an array of @num_parents pointers, and is used to associate
- * different data for each parent. This cannot be NULL if
- * @per_parent_data is true.
- */
union {
+ /**
+ * @parent_handler_data:
+ *
+ * If @per_parent_data is false, @parent_handler_data is a
+ * single pointer used as the data associated with every
+ * parent interrupt.
+ */
void *parent_handler_data;
+
+ /**
+ * @parent_handler_data_array:
+ *
+ * If @per_parent_data is true, @parent_handler_data_array is
+ * an array of @num_parents pointers, and is used to associate
+ * different data for each parent. This cannot be NULL if
+ * @per_parent_data is true.
+ */
void **parent_handler_data_array;
};
* It is used in atomic context when code wants to access the contents of a
* page that might be allocated from high memory (see __GFP_HIGHMEM), for
* example a page in the pagecache. The API has two functions, and they
- * can be used in a manner similar to the following:
+ * can be used in a manner similar to the following::
*
- * -- Find the page of interest. --
- * struct page *page = find_get_page(mapping, offset);
+ * // Find the page of interest.
+ * struct page *page = find_get_page(mapping, offset);
*
- * -- Gain access to the contents of that page. --
- * void *vaddr = kmap_atomic(page);
+ * // Gain access to the contents of that page.
+ * void *vaddr = kmap_atomic(page);
*
- * -- Do something to the contents of that page. --
- * memset(vaddr, 0, PAGE_SIZE);
+ * // Do something to the contents of that page.
+ * memset(vaddr, 0, PAGE_SIZE);
*
- * -- Unmap that page. --
- * kunmap_atomic(vaddr);
+ * // Unmap that page.
+ * kunmap_atomic(vaddr);
*
* Note that the kunmap_atomic() call takes the result of the kmap_atomic()
* call, not the argument.
struct device_domain_info {
struct list_head link; /* link to domain siblings */
struct list_head global; /* link to global list */
- struct list_head table; /* link to pasid table */
u32 segment; /* PCI segment number */
u8 bus; /* PCI bus number */
u8 devfn; /* PCI devfn number */
void *alloc_pgtable_page(int node);
void free_pgtable_page(void *vaddr);
struct intel_iommu *domain_get_iommu(struct dmar_domain *domain);
-int for_each_device_domain(int (*fn)(struct device_domain_info *info,
- void *data), void *data);
void iommu_flush_write_buffer(struct intel_iommu *iommu);
int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct device *dev);
struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn);
#define kexec_in_progress false
#endif /* CONFIG_KEXEC_CORE */
+#ifdef CONFIG_KEXEC_SIG
+void set_kexec_sig_enforced(void);
+#else
+static inline void set_kexec_sig_enforced(void) {}
+#endif
+
#endif /* !defined(__ASSEBMLY__) */
#endif /* LINUX_KEXEC_H */
{
}
-static inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
+static __always_inline bool kvm_arch_has_assigned_device(struct kvm *kvm)
{
return false;
}
STATS_DESC_PEAK(SCOPE, name, KVM_STATS_UNIT_NONE, \
KVM_STATS_BASE_POW10, 0)
+/* Instantaneous boolean value, read only */
+#define STATS_DESC_IBOOLEAN(SCOPE, name) \
+ STATS_DESC_INSTANT(SCOPE, name, KVM_STATS_UNIT_BOOLEAN, \
+ KVM_STATS_BASE_POW10, 0)
+/* Peak (sticky) boolean value, read/write */
+#define STATS_DESC_PBOOLEAN(SCOPE, name) \
+ STATS_DESC_PEAK(SCOPE, name, KVM_STATS_UNIT_BOOLEAN, \
+ KVM_STATS_BASE_POW10, 0)
+
/* Cumulative time in nanosecond */
#define STATS_DESC_TIME_NSEC(SCOPE, name) \
STATS_DESC_CUMULATIVE(SCOPE, name, KVM_STATS_UNIT_SECONDS, \
HALT_POLL_HIST_COUNT), \
STATS_DESC_LOGHIST_TIME_NSEC(VCPU_GENERIC, halt_wait_hist, \
HALT_POLL_HIST_COUNT), \
- STATS_DESC_ICOUNTER(VCPU_GENERIC, blocking)
+ STATS_DESC_IBOOLEAN(VCPU_GENERIC, blocking)
extern struct dentry *kvm_debugfs_dir;
struct ata_queued_cmd qcmd[ATA_MAX_QUEUE + 1];
u64 qc_active;
int nr_active_links; /* #links with active qcs */
- unsigned int sas_last_tag; /* track next tag hw expects */
struct ata_link link; /* host default link */
struct ata_link *slave_link; /* see ata_slave_link_init() */
extern int lockref_put_return(struct lockref *);
extern int lockref_get_not_zero(struct lockref *);
extern int lockref_put_not_zero(struct lockref *);
-extern int lockref_get_or_lock(struct lockref *);
extern int lockref_put_or_lock(struct lockref *);
extern void lockref_mark_dead(struct lockref *);
{
return -ENOMEM;
}
-void memregion_free(int id)
+static inline void memregion_free(int id)
{
}
#endif
#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX)
DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
-bool __put_devmap_managed_page(struct page *page);
-static inline bool put_devmap_managed_page(struct page *page)
+bool __put_devmap_managed_page_refs(struct page *page, int refs);
+static inline bool put_devmap_managed_page_refs(struct page *page, int refs)
{
if (!static_branch_unlikely(&devmap_managed_key))
return false;
if (!is_zone_device_page(page))
return false;
- return __put_devmap_managed_page(page);
+ return __put_devmap_managed_page_refs(page, refs);
}
-
#else /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */
-static inline bool put_devmap_managed_page(struct page *page)
+static inline bool put_devmap_managed_page_refs(struct page *page, int refs)
{
return false;
}
#endif /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */
+static inline bool put_devmap_managed_page(struct page *page)
+{
+ return put_devmap_managed_page_refs(page, 1);
+}
+
/* 127: arbitrary random number, small enough to assemble well */
#define folio_ref_zero_or_close_to_overflow(folio) \
((unsigned int) folio_ref_count(folio) + 127u <= 127u)
if (mt == MIGRATE_CMA || mt == MIGRATE_ISOLATE)
return false;
#endif
- return !(is_zone_movable_page(page) || is_zero_pfn(page_to_pfn(page)));
+ return !is_zone_movable_page(page) || is_zero_pfn(page_to_pfn(page));
}
#else
static inline bool is_pinnable_page(struct page *page)
MF_MUST_KILL = 1 << 2,
MF_SOFT_OFFLINE = 1 << 3,
MF_UNPOISON = 1 << 4,
+ MF_SW_SIMULATED = 1 << 5,
};
extern int memory_failure(unsigned long pfn, int flags);
extern void memory_failure_queue(unsigned long pfn, int flags);
* struct folio - Represents a contiguous set of bytes.
* @flags: Identical to the page flags.
* @lru: Least Recently Used list; tracks how recently this folio was used.
+ * @mlock_count: Number of times this folio has been pinned by mlock().
* @mapping: The file this page belongs to, or refers to the anon_vma for
* anonymous memory.
* @index: Offset within the file, in units of pages. For anonymous memory,
unsigned long flags;
union {
struct list_head lru;
+ /* private: avoid cluttering the output */
struct {
void *__filler;
+ /* public: */
unsigned int mlock_count;
+ /* private: */
};
+ /* public: */
};
struct address_space *mapping;
pgoff_t index;
IFF_FAILOVER_SLAVE = 1<<28,
IFF_L3MDEV_RX_HANDLER = 1<<29,
IFF_LIVE_RENAME_OK = 1<<30,
- IFF_TX_SKB_NO_LINEAR = 1<<31,
+ IFF_TX_SKB_NO_LINEAR = BIT_ULL(31),
IFF_CHANGE_PROTO_DOWN = BIT_ULL(32),
};
bool was_async);
/*
- * Per-inode description. This must be directly after the inode struct.
+ * Per-inode context. This wraps the VFS inode.
*/
-struct netfs_i_context {
+struct netfs_inode {
+ struct inode inode; /* The VFS inode */
const struct netfs_request_ops *ops;
#if IS_ENABLED(CONFIG_FSCACHE)
struct fscache_cookie *cache;
*/
struct netfs_request_ops {
int (*init_request)(struct netfs_io_request *rreq, struct file *file);
+ void (*free_request)(struct netfs_io_request *rreq);
int (*begin_cache_operation)(struct netfs_io_request *rreq);
+
void (*expand_readahead)(struct netfs_io_request *rreq);
bool (*clamp_length)(struct netfs_io_subrequest *subreq);
void (*issue_read)(struct netfs_io_subrequest *subreq);
bool (*is_still_valid)(struct netfs_io_request *rreq);
int (*check_write_begin)(struct file *file, loff_t pos, unsigned len,
- struct folio *folio, void **_fsdata);
+ struct folio **foliop, void **_fsdata);
void (*done)(struct netfs_io_request *rreq);
- void (*cleanup)(struct address_space *mapping, void *netfs_priv);
};
/*
* boundary as appropriate.
*/
enum netfs_io_source (*prepare_read)(struct netfs_io_subrequest *subreq,
- loff_t i_size);
+ loff_t i_size);
/* Prepare a write operation, working out what part of the write we can
* actually do.
struct readahead_control;
extern void netfs_readahead(struct readahead_control *);
int netfs_read_folio(struct file *, struct folio *);
-extern int netfs_write_begin(struct file *, struct address_space *,
+extern int netfs_write_begin(struct netfs_inode *,
+ struct file *, struct address_space *,
loff_t, unsigned int, struct folio **,
void **);
extern void netfs_stats_show(struct seq_file *);
/**
- * netfs_i_context - Get the netfs inode context from the inode
+ * netfs_inode - Get the netfs inode context from the inode
* @inode: The inode to query
*
* Get the netfs lib inode context from the network filesystem's inode. The
* context struct is expected to directly follow on from the VFS inode struct.
*/
-static inline struct netfs_i_context *netfs_i_context(struct inode *inode)
-{
- return (void *)inode + sizeof(*inode);
-}
-
-/**
- * netfs_inode - Get the netfs inode from the inode context
- * @ctx: The context to query
- *
- * Get the netfs inode from the netfs library's inode context. The VFS inode
- * is expected to directly precede the context struct.
- */
-static inline struct inode *netfs_inode(struct netfs_i_context *ctx)
+static inline struct netfs_inode *netfs_inode(struct inode *inode)
{
- return (void *)ctx - sizeof(struct inode);
+ return container_of(inode, struct netfs_inode, inode);
}
/**
- * netfs_i_context_init - Initialise a netfs lib context
- * @inode: The inode with which the context is associated
+ * netfs_inode_init - Initialise a netfslib inode context
+ * @ctx: The netfs inode to initialise
* @ops: The netfs's operations list
*
* Initialise the netfs library context struct. This is expected to follow on
* directly from the VFS inode struct.
*/
-static inline void netfs_i_context_init(struct inode *inode,
- const struct netfs_request_ops *ops)
+static inline void netfs_inode_init(struct netfs_inode *ctx,
+ const struct netfs_request_ops *ops)
{
- struct netfs_i_context *ctx = netfs_i_context(inode);
-
- memset(ctx, 0, sizeof(*ctx));
ctx->ops = ops;
- ctx->remote_i_size = i_size_read(inode);
+ ctx->remote_i_size = i_size_read(&ctx->inode);
+#if IS_ENABLED(CONFIG_FSCACHE)
+ ctx->cache = NULL;
+#endif
}
/**
* netfs_resize_file - Note that a file got resized
- * @inode: The inode being resized
+ * @ctx: The netfs inode being resized
* @new_i_size: The new file size
*
* Inform the netfs lib that a file got resized so that it can adjust its state.
*/
-static inline void netfs_resize_file(struct inode *inode, loff_t new_i_size)
+static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size)
{
- struct netfs_i_context *ctx = netfs_i_context(inode);
-
ctx->remote_i_size = new_i_size;
}
/**
* netfs_i_cookie - Get the cache cookie from the inode
- * @inode: The inode to query
+ * @ctx: The netfs inode to query
*
* Get the caching cookie (if enabled) from the network filesystem's inode.
*/
-static inline struct fscache_cookie *netfs_i_cookie(struct inode *inode)
+static inline struct fscache_cookie *netfs_i_cookie(struct netfs_inode *ctx)
{
#if IS_ENABLED(CONFIG_FSCACHE)
- struct netfs_i_context *ctx = netfs_i_context(inode);
return ctx->cache;
#else
return NULL;
};
enum {
- NVME_CAP_CRMS_CRIMS = 1ULL << 59,
- NVME_CAP_CRMS_CRWMS = 1ULL << 60,
+ NVME_CAP_CRMS_CRWMS = 1ULL << 59,
+ NVME_CAP_CRMS_CRIMS = 1ULL << 60,
};
struct nvme_id_power_state {
__le32 cdw2[2];
__le64 metadata;
union nvme_data_ptr dptr;
+ struct_group(cdws,
__le32 cdw10;
__le32 cdw11;
__le32 cdw12;
__le32 cdw13;
__le32 cdw14;
__le32 cdw15;
+ );
};
struct nvme_rw_command {
*
* UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function.
* Useful for code which doesn't have an ELF function annotation.
+ *
+ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc.
*/
#define UNWIND_HINT_TYPE_CALL 0
#define UNWIND_HINT_TYPE_REGS 1
#define UNWIND_HINT_TYPE_REGS_PARTIAL 2
#define UNWIND_HINT_TYPE_FUNC 3
+#define UNWIND_HINT_TYPE_ENTRY 4
+#define UNWIND_HINT_TYPE_SAVE 5
+#define UNWIND_HINT_TYPE_RESTORE 6
#ifdef CONFIG_OBJTOOL
* the debuginfo as necessary. It will also warn if it sees any
* inconsistencies.
*/
-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
.Lunwind_hint_ip_\@:
.pushsection .discard.unwind_hints
/* struct unwind_hint */
.popsection
.endm
+.macro STACK_FRAME_NON_STANDARD_FP func:req
+#ifdef CONFIG_FRAME_POINTER
+ STACK_FRAME_NON_STANDARD \func
+#endif
+.endm
+
.macro ANNOTATE_NOENDBR
.Lhere_\@:
.pushsection .discard.noendbr
#define ASM_REACHABLE
#else
#define ANNOTATE_INTRA_FUNCTION_CALL
-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
.endm
.macro STACK_FRAME_NON_STANDARD func:req
.endm
* @mdix_ctrl: User setting of crossover
* @pma_extable: Cached value of PMA/PMD Extended Abilities Register
* @interrupts: Flag interrupts have been enabled
+ * @irq_suspended: Flag indicating PHY is suspended and therefore interrupt
+ * handling shall be postponed until PHY has resumed
+ * @irq_rerun: Flag indicating interrupts occurred while PHY was suspended,
+ * requiring a rerun of the interrupt handler after resume
* @interface: enum phy_interface_t value
* @skb: Netlink message for cable diagnostics
* @nest: Netlink nest used for cable diagnostics
/* Interrupts are enabled */
unsigned interrupts:1;
+ unsigned irq_suspended:1;
+ unsigned irq_rerun:1;
enum phy_state state;
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _PLATFORM_FEATURE_H
+#define _PLATFORM_FEATURE_H
+
+#include <linux/bitops.h>
+#include <asm/platform-feature.h>
+
+/* The platform features are starting with the architecture specific ones. */
+
+/* Used to enable platform specific DMA handling for virtio devices. */
+#define PLATFORM_VIRTIO_RESTRICTED_MEM_ACCESS (0 + PLATFORM_ARCH_FEAT_N)
+
+#define PLATFORM_FEAT_N (1 + PLATFORM_ARCH_FEAT_N)
+
+void platform_set(unsigned int feature);
+void platform_clear(unsigned int feature);
+bool platform_has(unsigned int feature);
+
+#endif /* _PLATFORM_FEATURE_H */
extern void pm_runtime_put_suppliers(struct device *dev);
extern void pm_runtime_new_link(struct device *dev);
extern void pm_runtime_drop_link(struct device_link *link);
-extern void pm_runtime_release_supplier(struct device_link *link, bool check_idle);
+extern void pm_runtime_release_supplier(struct device_link *link);
extern int devm_pm_runtime_enable(struct device *dev);
static inline void pm_runtime_put_suppliers(struct device *dev) {}
static inline void pm_runtime_new_link(struct device *dev) {}
static inline void pm_runtime_drop_link(struct device_link *link) {}
-static inline void pm_runtime_release_supplier(struct device_link *link,
- bool check_idle) {}
+static inline void pm_runtime_release_supplier(struct device_link *link) {}
#endif /* !CONFIG_PM */
#define printk_deferred_enter __printk_safe_enter
#define printk_deferred_exit __printk_safe_exit
-extern void printk_prefer_direct_enter(void);
-extern void printk_prefer_direct_exit(void);
-
extern bool pr_flush(int timeout_ms, bool reset_on_progress);
/*
{
}
-static inline void printk_prefer_direct_enter(void)
-{
-}
-
-static inline void printk_prefer_direct_exit(void)
-{
-}
-
static inline bool pr_flush(int timeout_ms, bool reset_on_progress)
{
return true;
struct notifier_block;
void add_device_randomness(const void *buf, size_t len);
-void add_bootloader_randomness(const void *buf, size_t len);
+void __init add_bootloader_randomness(const void *buf, size_t len);
void add_input_randomness(unsigned int type, unsigned int code,
unsigned int value) __latent_entropy;
void add_interrupt_randomness(int irq) __latent_entropy;
int __init random_init(const char *command_line);
bool rng_is_initialized(void);
-bool rng_has_arch_random(void);
int wait_for_random_bytes(void);
/* Calls wait_for_random_bytes() and then calls get_random_bytes(buf, nbytes).
unsigned long flags;
};
-#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) { \
- .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
- .interval = interval_init, \
- .burst = burst_init, \
+#define RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, flags_init) { \
+ .lock = __RAW_SPIN_LOCK_UNLOCKED(name.lock), \
+ .interval = interval_init, \
+ .burst = burst_init, \
+ .flags = flags_init, \
}
+#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) \
+ RATELIMIT_STATE_INIT_FLAGS(name, interval_init, burst_init, 0)
+
#define RATELIMIT_STATE_INIT_DISABLED \
RATELIMIT_STATE_INIT(ratelimit_state, 0, DEFAULT_RATELIMIT_BURST)
extern __must_check bool refcount_dec_if_one(refcount_t *r);
extern __must_check bool refcount_dec_not_one(refcount_t *r);
-extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock);
-extern __must_check bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock);
+extern __must_check bool refcount_dec_and_mutex_lock(refcount_t *r, struct mutex *lock) __cond_acquires(lock);
+extern __must_check bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock) __cond_acquires(lock);
extern __must_check bool refcount_dec_and_lock_irqsave(refcount_t *r,
spinlock_t *lock,
- unsigned long *flags);
+ unsigned long *flags) __cond_acquires(lock);
#endif /* _LINUX_REFCOUNT_H */
devm_reset_control_bulk_get_optional_exclusive(struct device *dev, int num_rstcs,
struct reset_control_bulk_data *rstcs)
{
- return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, true, false, true);
+ return __devm_reset_control_bulk_get(dev, num_rstcs, rstcs, false, true, true);
}
/**
struct usb_device *pusb_dev;
struct usb_interface *pusb_intf;
struct usb_sg_request current_sg;
- unsigned char *iobuf;
- dma_addr_t iobuf_dma;
struct timer_list sg_timer;
struct mutex dev_mutex;
extern __noreturn void do_group_exit(int);
extern void exit_files(struct task_struct *);
-extern void exit_itimers(struct signal_struct *);
+extern void exit_itimers(struct task_struct *);
extern pid_t kernel_clone(struct kernel_clone_args *kargs);
struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node);
#include <linux/notifier.h>
#include <linux/types.h>
-#define SCMI_MAX_STR_SIZE 64
-#define SCMI_MAX_NUM_RATES 16
+#define SCMI_MAX_STR_SIZE 64
+#define SCMI_SHORT_NAME_MAX_SIZE 16
+#define SCMI_MAX_NUM_RATES 16
/**
* struct scmi_revision_info - version information structure
u8 num_protocols;
u8 num_agents;
u32 impl_ver;
- char vendor_id[SCMI_MAX_STR_SIZE];
- char sub_vendor_id[SCMI_MAX_STR_SIZE];
+ char vendor_id[SCMI_SHORT_NAME_MAX_SIZE];
+ char sub_vendor_id[SCMI_SHORT_NAME_MAX_SIZE];
};
struct scmi_clock_info {
void (*unthrottle)(struct uart_port *);
void (*send_xchar)(struct uart_port *, char ch);
void (*stop_rx)(struct uart_port *);
+ void (*start_rx)(struct uart_port *);
void (*enable_ms)(struct uart_port *);
void (*break_ctl)(struct uart_port *, int ctl);
int (*startup)(struct uart_port *);
static inline int setup_earlycon(char *buf) { return 0; }
#endif
+static inline bool uart_console_enabled(struct uart_port *port)
+{
+ return uart_console(port) && (port->cons->flags & CON_ENABLED);
+}
+
struct uart_port *uart_get_console(struct uart_port *ports, int nr,
struct console *c);
int uart_parse_earlycon(char *p, unsigned char *iotype, resource_size_t *addr,
bool has_crossts;
int int_snapshot_num;
int ext_snapshot_num;
+ bool int_snapshot_en;
bool ext_snapshot_en;
bool multi_msi_en;
int msi_mac_vec;
extern __be32 *xdr_reserve_space(struct xdr_stream *xdr, size_t nbytes);
extern int xdr_reserve_space_vec(struct xdr_stream *xdr, struct kvec *vec,
size_t nbytes);
-extern void xdr_commit_encode(struct xdr_stream *xdr);
+extern void __xdr_commit_encode(struct xdr_stream *xdr);
extern void xdr_truncate_encode(struct xdr_stream *xdr, size_t len);
extern int xdr_restrict_buflen(struct xdr_stream *xdr, int newbuflen);
extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
xdr_set_scratch_buffer(xdr, NULL, 0);
}
+/**
+ * xdr_commit_encode - Ensure all data is written to xdr->buf
+ * @xdr: pointer to xdr_stream
+ *
+ * Handle encoding across page boundaries by giving the caller a
+ * temporary location to write to, then later copying the data into
+ * place. __xdr_commit_encode() does that copying.
+ */
+static inline void xdr_commit_encode(struct xdr_stream *xdr)
+{
+ if (unlikely(xdr->scratch.iov_len))
+ __xdr_commit_encode(xdr);
+}
+
/**
* xdr_stream_remaining - Return the number of bytes remaining in the stream
* @xdr: pointer to struct xdr_stream
int flags;
};
+#ifdef CONFIG_SYSFB
+
+void sysfb_disable(void);
+
+#else /* CONFIG_SYSFB */
+
+static inline void sysfb_disable(void)
+{
+}
+
+#endif /* CONFIG_SYSFB */
+
#ifdef CONFIG_EFI
extern struct efifb_dmi_info efifb_dmi_list[];
bool sysfb_parse_mode(const struct screen_info *si,
struct simplefb_platform_data *mode);
-int sysfb_create_simplefb(const struct screen_info *si,
- const struct simplefb_platform_data *mode);
+struct platform_device *sysfb_create_simplefb(const struct screen_info *si,
+ const struct simplefb_platform_data *mode);
#else /* CONFIG_SYSFB_SIMPLE */
return false;
}
-static inline int sysfb_create_simplefb(const struct screen_info *si,
- const struct simplefb_platform_data *mode)
+static inline struct platform_device *sysfb_create_simplefb(const struct screen_info *si,
+ const struct simplefb_platform_data *mode)
{
- return -EINVAL;
+ return ERR_PTR(-EINVAL);
}
#endif /* CONFIG_SYSFB_SIMPLE */
* for the device
* @vdev: vdpa device
* Returns virtqueue algin requirement
- * @get_vq_group: Get the group id for a specific virtqueue
+ * @get_vq_group: Get the group id for a specific
+ * virtqueue (optional)
* @vdev: vdpa device
* @idx: virtqueue index
* Returns u32: group id for this virtqueue
* Returns the iova range supported by
* the device.
* @set_group_asid: Set address space identifier for a
- * virtqueue group
+ * virtqueue group (optional)
* @vdev: vdpa device
* @group: virtqueue group
* @asid: address space id for this group
WARN_ON(status & VIRTIO_CONFIG_S_DRIVER_OK);
+#ifdef CONFIG_VIRTIO_HARDEN_NOTIFICATION
/*
* The virtio_synchronize_cbs() makes sure vring_interrupt()
* will see the driver specific setup if it sees vq->broken
*/
virtio_synchronize_cbs(dev);
__virtio_unbreak_device(dev);
+#endif
/*
* The transport should ensure the visibility of vq->broken
* before setting DRIVER_OK. See the comments for the transport
_r; \
})
-#ifdef CONFIG_ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS
-int arch_has_restricted_virtio_memory_access(void);
-#else
-static inline int arch_has_restricted_virtio_memory_access(void)
-{
- return 0;
-}
-#endif /* CONFIG_ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS */
-
#endif /* _LINUX_VIRTIO_CONFIG_H */
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright (C) 2010 - 2013 UNISYS CORPORATION
- * All rights reserved.
- */
-
-/*
- * This header file is to be included by other kernel mode components that
- * implement a particular kind of visor_device. Each of these other kernel
- * mode components is called a visor device driver. Refer to visortemplate
- * for a minimal sample visor device driver.
- *
- * There should be nothing in this file that is private to the visorbus
- * bus implementation itself.
- */
-
-#ifndef __VISORBUS_H__
-#define __VISORBUS_H__
-
-#include <linux/device.h>
-
-#define VISOR_CHANNEL_SIGNATURE ('L' << 24 | 'N' << 16 | 'C' << 8 | 'E')
-
-/*
- * enum channel_serverstate
- * @CHANNELSRV_UNINITIALIZED: Channel is in an undefined state.
- * @CHANNELSRV_READY: Channel has been initialized by server.
- */
-enum channel_serverstate {
- CHANNELSRV_UNINITIALIZED = 0,
- CHANNELSRV_READY = 1
-};
-
-/*
- * enum channel_clientstate
- * @CHANNELCLI_DETACHED:
- * @CHANNELCLI_DISABLED: Client can see channel but is NOT allowed to use it
- * unless given TBD* explicit request
- * (should actually be < DETACHED).
- * @CHANNELCLI_ATTACHING: Legacy EFI client request for EFI server to attach.
- * @CHANNELCLI_ATTACHED: Idle, but client may want to use channel any time.
- * @CHANNELCLI_BUSY: Client either wants to use or is using channel.
- * @CHANNELCLI_OWNED: "No worries" state - client can access channel
- * anytime.
- */
-enum channel_clientstate {
- CHANNELCLI_DETACHED = 0,
- CHANNELCLI_DISABLED = 1,
- CHANNELCLI_ATTACHING = 2,
- CHANNELCLI_ATTACHED = 3,
- CHANNELCLI_BUSY = 4,
- CHANNELCLI_OWNED = 5
-};
-
-/*
- * Values for VISOR_CHANNEL_PROTOCOL.Features: This define exists so that
- * a guest can look at the FeatureFlags in the io channel, and configure the
- * driver to use interrupts or not based on this setting. All feature bits for
- * all channels should be defined here. The io channel feature bits are defined
- * below.
- */
-#define VISOR_DRIVER_ENABLES_INTS (0x1ULL << 1)
-#define VISOR_CHANNEL_IS_POLLING (0x1ULL << 3)
-#define VISOR_IOVM_OK_DRIVER_DISABLING_INTS (0x1ULL << 4)
-#define VISOR_DRIVER_DISABLES_INTS (0x1ULL << 5)
-#define VISOR_DRIVER_ENHANCED_RCVBUF_CHECKING (0x1ULL << 6)
-
-/*
- * struct channel_header - Common Channel Header
- * @signature: Signature.
- * @legacy_state: DEPRECATED - being replaced by.
- * @header_size: sizeof(struct channel_header).
- * @size: Total size of this channel in bytes.
- * @features: Flags to modify behavior.
- * @chtype: Channel type: data, bus, control, etc..
- * @partition_handle: ID of guest partition.
- * @handle: Device number of this channel in client.
- * @ch_space_offset: Offset in bytes to channel specific area.
- * @version_id: Struct channel_header Version ID.
- * @partition_index: Index of guest partition.
- * @zone_uuid: Guid of Channel's zone.
- * @cli_str_offset: Offset from channel header to null-terminated
- * ClientString (0 if ClientString not present).
- * @cli_state_boot: CHANNEL_CLIENTSTATE of pre-boot EFI client of this
- * channel.
- * @cmd_state_cli: CHANNEL_COMMANDSTATE (overloaded in Windows drivers, see
- * ServerStateUp, ServerStateDown, etc).
- * @cli_state_os: CHANNEL_CLIENTSTATE of Guest OS client of this channel.
- * @ch_characteristic: CHANNEL_CHARACTERISTIC_<xxx>.
- * @cmd_state_srv: CHANNEL_COMMANDSTATE (overloaded in Windows drivers, see
- * ServerStateUp, ServerStateDown, etc).
- * @srv_state: CHANNEL_SERVERSTATE.
- * @cli_error_boot: Bits to indicate err states for boot clients, so err
- * messages can be throttled.
- * @cli_error_os: Bits to indicate err states for OS clients, so err
- * messages can be throttled.
- * @filler: Pad out to 128 byte cacheline.
- * @recover_channel: Please add all new single-byte values below here.
- */
-struct channel_header {
- u64 signature;
- u32 legacy_state;
- /* SrvState, CliStateBoot, and CliStateOS below */
- u32 header_size;
- u64 size;
- u64 features;
- guid_t chtype;
- u64 partition_handle;
- u64 handle;
- u64 ch_space_offset;
- u32 version_id;
- u32 partition_index;
- guid_t zone_guid;
- u32 cli_str_offset;
- u32 cli_state_boot;
- u32 cmd_state_cli;
- u32 cli_state_os;
- u32 ch_characteristic;
- u32 cmd_state_srv;
- u32 srv_state;
- u8 cli_error_boot;
- u8 cli_error_os;
- u8 filler[1];
- u8 recover_channel;
-} __packed;
-
-#define VISOR_CHANNEL_ENABLE_INTS (0x1ULL << 0)
-
-/*
- * struct signal_queue_header - Subheader for the Signal Type variation of the
- * Common Channel.
- * @version: SIGNAL_QUEUE_HEADER Version ID.
- * @chtype: Queue type: storage, network.
- * @size: Total size of this queue in bytes.
- * @sig_base_offset: Offset to signal queue area.
- * @features: Flags to modify behavior.
- * @num_sent: Total # of signals placed in this queue.
- * @num_overflows: Total # of inserts failed due to full queue.
- * @signal_size: Total size of a signal for this queue.
- * @max_slots: Max # of slots in queue, 1 slot is always empty.
- * @max_signals: Max # of signals in queue (MaxSignalSlots-1).
- * @head: Queue head signal #.
- * @num_received: Total # of signals removed from this queue.
- * @tail: Queue tail signal.
- * @reserved1: Reserved field.
- * @reserved2: Reserved field.
- * @client_queue:
- * @num_irq_received: Total # of Interrupts received. This is incremented by the
- * ISR in the guest windows driver.
- * @num_empty: Number of times that visor_signal_remove is called and
- * returned Empty Status.
- * @errorflags: Error bits set during SignalReinit to denote trouble with
- * client's fields.
- * @filler: Pad out to 64 byte cacheline.
- */
-struct signal_queue_header {
- /* 1st cache line */
- u32 version;
- u32 chtype;
- u64 size;
- u64 sig_base_offset;
- u64 features;
- u64 num_sent;
- u64 num_overflows;
- u32 signal_size;
- u32 max_slots;
- u32 max_signals;
- u32 head;
- /* 2nd cache line */
- u64 num_received;
- u32 tail;
- u32 reserved1;
- u64 reserved2;
- u64 client_queue;
- u64 num_irq_received;
- u64 num_empty;
- u32 errorflags;
- u8 filler[12];
-} __packed;
-
-/* VISORCHANNEL Guids */
-/* {414815ed-c58c-11da-95a9-00e08161165f} */
-#define VISOR_VHBA_CHANNEL_GUID \
- GUID_INIT(0x414815ed, 0xc58c, 0x11da, \
- 0x95, 0xa9, 0x0, 0xe0, 0x81, 0x61, 0x16, 0x5f)
-#define VISOR_VHBA_CHANNEL_GUID_STR \
- "414815ed-c58c-11da-95a9-00e08161165f"
-struct visorchipset_state {
- u32 created:1;
- u32 attached:1;
- u32 configured:1;
- u32 running:1;
- /* Remaining bits in this 32-bit word are reserved. */
-};
-
-/**
- * struct visor_device - A device type for things "plugged" into the visorbus
- * bus
- * @visorchannel: Points to the channel that the device is
- * associated with.
- * @channel_type_guid: Identifies the channel type to the bus driver.
- * @device: Device struct meant for use by the bus driver
- * only.
- * @list_all: Used by the bus driver to enumerate devices.
- * @timer: Timer fired periodically to do interrupt-type
- * activity.
- * @being_removed: Indicates that the device is being removed from
- * the bus. Private bus driver use only.
- * @visordriver_callback_lock: Used by the bus driver to lock when adding and
- * removing devices.
- * @pausing: Indicates that a change towards a paused state.
- * is in progress. Only modified by the bus driver.
- * @resuming: Indicates that a change towards a running state
- * is in progress. Only modified by the bus driver.
- * @chipset_bus_no: Private field used by the bus driver.
- * @chipset_dev_no: Private field used the bus driver.
- * @state: Used to indicate the current state of the
- * device.
- * @inst: Unique GUID for this instance of the device.
- * @name: Name of the device.
- * @pending_msg_hdr: For private use by bus driver to respond to
- * hypervisor requests.
- * @vbus_hdr_info: A pointer to header info. Private use by bus
- * driver.
- * @partition_guid: Indicates client partion id. This should be the
- * same across all visor_devices in the current
- * guest. Private use by bus driver only.
- */
-struct visor_device {
- struct visorchannel *visorchannel;
- guid_t channel_type_guid;
- /* These fields are for private use by the bus driver only. */
- struct device device;
- struct list_head list_all;
- struct timer_list timer;
- bool timer_active;
- bool being_removed;
- struct mutex visordriver_callback_lock; /* synchronize probe/remove */
- bool pausing;
- bool resuming;
- u32 chipset_bus_no;
- u32 chipset_dev_no;
- struct visorchipset_state state;
- guid_t inst;
- u8 *name;
- struct controlvm_message_header *pending_msg_hdr;
- void *vbus_hdr_info;
- guid_t partition_guid;
- struct dentry *debugfs_dir;
- struct dentry *debugfs_bus_info;
-};
-
-#define to_visor_device(x) container_of(x, struct visor_device, device)
-
-typedef void (*visorbus_state_complete_func) (struct visor_device *dev,
- int status);
-
-/*
- * This struct describes a specific visor channel, by providing its GUID, name,
- * and sizes.
- */
-struct visor_channeltype_descriptor {
- const guid_t guid;
- const char *name;
- u64 min_bytes;
- u32 version;
-};
-
-/**
- * struct visor_driver - Information provided by each visor driver when it
- * registers with the visorbus driver
- * @name: Name of the visor driver.
- * @owner: The module owner.
- * @channel_types: Types of channels handled by this driver, ending with
- * a zero GUID. Our specialized BUS.match() method knows
- * about this list, and uses it to determine whether this
- * driver will in fact handle a new device that it has
- * detected.
- * @probe: Called when a new device comes online, by our probe()
- * function specified by driver.probe() (triggered
- * ultimately by some call to driver_register(),
- * bus_add_driver(), or driver_attach()).
- * @remove: Called when a new device is removed, by our remove()
- * function specified by driver.remove() (triggered
- * ultimately by some call to device_release_driver()).
- * @channel_interrupt: Called periodically, whenever there is a possiblity
- * that "something interesting" may have happened to the
- * channel.
- * @pause: Called to initiate a change of the device's state. If
- * the return valu`e is < 0, there was an error and the
- * state transition will NOT occur. If the return value
- * is >= 0, then the state transition was INITIATED
- * successfully, and complete_func() will be called (or
- * was just called) with the final status when either the
- * state transition fails or completes successfully.
- * @resume: Behaves similar to pause.
- * @driver: Private reference to the device driver. For use by bus
- * driver only.
- */
-struct visor_driver {
- const char *name;
- struct module *owner;
- struct visor_channeltype_descriptor *channel_types;
- int (*probe)(struct visor_device *dev);
- void (*remove)(struct visor_device *dev);
- void (*channel_interrupt)(struct visor_device *dev);
- int (*pause)(struct visor_device *dev,
- visorbus_state_complete_func complete_func);
- int (*resume)(struct visor_device *dev,
- visorbus_state_complete_func complete_func);
-
- /* These fields are for private use by the bus driver only. */
- struct device_driver driver;
-};
-
-#define to_visor_driver(x) (container_of(x, struct visor_driver, driver))
-
-int visor_check_channel(struct channel_header *ch, struct device *dev,
- const guid_t *expected_uuid, char *chname,
- u64 expected_min_bytes, u32 expected_version,
- u64 expected_signature);
-
-int visorbus_register_visor_driver(struct visor_driver *drv);
-void visorbus_unregister_visor_driver(struct visor_driver *drv);
-int visorbus_read_channel(struct visor_device *dev,
- unsigned long offset, void *dest,
- unsigned long nbytes);
-int visorbus_write_channel(struct visor_device *dev,
- unsigned long offset, void *src,
- unsigned long nbytes);
-int visorbus_enable_channel_interrupts(struct visor_device *dev);
-void visorbus_disable_channel_interrupts(struct visor_device *dev);
-
-int visorchannel_signalremove(struct visorchannel *channel, u32 queue,
- void *msg);
-int visorchannel_signalinsert(struct visorchannel *channel, u32 queue,
- void *msg);
-bool visorchannel_signalempty(struct visorchannel *channel, u32 queue);
-const guid_t *visorchannel_get_guid(struct visorchannel *channel);
-
-#define BUS_ROOT_DEVICE UINT_MAX
-struct visor_device *visorbus_get_device_by_id(u32 bus_no, u32 dev_no,
- struct visor_device *from);
-#endif
void free_vm_area(struct vm_struct *area);
extern struct vm_struct *remove_vm_area(const void *addr);
extern struct vm_struct *find_vm_area(const void *addr);
+struct vmap_area *find_vmap_area(unsigned long addr);
static inline bool is_vm_area_hugepages(const void *addr)
{
* alloc_ordered_workqueue - allocate an ordered workqueue
* @fmt: printf format for the name of the workqueue
* @flags: WQ_* flags (only WQ_FREEZABLE and WQ_MEM_RECLAIM are meaningful)
- * @args...: args for @fmt
+ * @args: args for @fmt
*
* Allocate an ordered workqueue. An ordered workqueue executes at
* most one work item at any given time in the queued order. They are
struct delayed_work *dwork, unsigned long delay);
extern bool queue_rcu_work(struct workqueue_struct *wq, struct rcu_work *rwork);
-extern void flush_workqueue(struct workqueue_struct *wq);
+extern void __flush_workqueue(struct workqueue_struct *wq);
extern void drain_workqueue(struct workqueue_struct *wq);
extern int schedule_on_each_cpu(work_func_t func);
return queue_work(system_wq, work);
}
+/*
+ * Detect attempt to flush system-wide workqueues at compile time when possible.
+ *
+ * See https://lkml.kernel.org/r/49925af7-78a8-a3dd-bce6-cfc02e1a9236@I-love.SAKURA.ne.jp
+ * for reasons and steps for converting system-wide workqueues into local workqueues.
+ */
+extern void __warn_flushing_systemwide_wq(void)
+ __compiletime_warning("Please avoid flushing system-wide workqueues.");
+
/**
* flush_scheduled_work - ensure that any scheduled work has run to completion.
*
* Forces execution of the kernel-global workqueue and blocks until its
* completion.
*
- * Think twice before calling this function! It's very easy to get into
- * trouble if you don't take great care. Either of the following situations
- * will lead to deadlock:
+ * It's very easy to get into trouble if you don't take great care.
+ * Either of the following situations will lead to deadlock:
*
* One of the work items currently on the workqueue needs to acquire
* a lock held by your code or its caller.
* need to know that a particular work item isn't queued and isn't running.
* In such cases you should use cancel_delayed_work_sync() or
* cancel_work_sync() instead.
+ *
+ * Please stop calling this function! A conversion to stop flushing system-wide
+ * workqueues is in progress. This function will be removed after all in-tree
+ * users stopped calling this function.
*/
-static inline void flush_scheduled_work(void)
-{
- flush_workqueue(system_wq);
-}
+/*
+ * The background of commit 771c035372a036f8 ("deprecate the
+ * '__deprecated' attribute warnings entirely and for good") is that,
+ * since Linus builds all modules between every single pull he does,
+ * the standard kernel build needs to be _clean_ in order to be able to
+ * notice when new problems happen. Therefore, don't emit warning while
+ * there are in-tree users.
+ */
+#define flush_scheduled_work() \
+({ \
+ if (0) \
+ __warn_flushing_systemwide_wq(); \
+ __flush_workqueue(system_wq); \
+})
+
+/*
+ * Although there is no longer in-tree caller, for now just emit warning
+ * in order to give out-of-tree callers time to update.
+ */
+#define flush_workqueue(wq) \
+({ \
+ struct workqueue_struct *_wq = (wq); \
+ \
+ if ((__builtin_constant_p(_wq == system_wq) && \
+ _wq == system_wq) || \
+ (__builtin_constant_p(_wq == system_highpri_wq) && \
+ _wq == system_highpri_wq) || \
+ (__builtin_constant_p(_wq == system_long_wq) && \
+ _wq == system_long_wq) || \
+ (__builtin_constant_p(_wq == system_unbound_wq) && \
+ _wq == system_unbound_wq) || \
+ (__builtin_constant_p(_wq == system_freezable_wq) && \
+ _wq == system_freezable_wq) || \
+ (__builtin_constant_p(_wq == system_power_efficient_wq) && \
+ _wq == system_power_efficient_wq) || \
+ (__builtin_constant_p(_wq == system_freezable_power_efficient_wq) && \
+ _wq == system_freezable_power_efficient_wq)) \
+ __warn_flushing_systemwide_wq(); \
+ __flush_workqueue(_wq); \
+})
/**
* schedule_delayed_work_on - queue work in global workqueue on CPU after delay
void xas_init_marks(const struct xa_state *);
bool xas_nomem(struct xa_state *, gfp_t);
+void xas_destroy(struct xa_state *);
void xas_pause(struct xa_state *);
void xas_create_range(struct xa_state *);
{
const struct inet6_dev *idev = __in6_dev_get(dev);
+ if (unlikely(!idev))
+ return true;
+
return !!idev->cnf.ignore_routes_with_linkdown;
}
#define AMT_STATUS_MAX (__AMT_STATUS_MAX - 1)
+/* Gateway events only */
+enum amt_event {
+ AMT_EVENT_NONE,
+ AMT_EVENT_RECEIVE,
+ AMT_EVENT_SEND_DISCOVERY,
+ AMT_EVENT_SEND_REQUEST,
+ __AMT_EVENT_MAX,
+};
+
struct amt_header {
#if defined(__LITTLE_ENDIAN_BITFIELD)
u8 type:4,
struct hlist_head sources[];
};
+#define AMT_MAX_EVENTS 16
+struct amt_events {
+ enum amt_event event;
+ struct sk_buff *skb;
+};
+
struct amt_dev {
struct net_device *dev;
struct net_device *stream_dev;
struct delayed_work req_wq;
/* Protected by RTNL */
struct delayed_work secret_wq;
+ struct work_struct event_wq;
/* AMT status */
enum amt_status status;
/* Generated key */
/* Used only in gateway mode */
u64 mac:48,
reserved:16;
+ /* AMT gateway side message handler queue */
+ struct amt_events events[AMT_MAX_EVENTS];
+ u8 event_idx;
+ u8 nr_events;
};
#define AMT_TOS 0xc0
};
void l2cap_chan_hold(struct l2cap_chan *c);
+struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c);
void l2cap_chan_put(struct l2cap_chan *c);
static inline void l2cap_chan_lock(struct l2cap_chan *chan)
* cfg80211_obss_color_collision_notify - notify about bss color collision
* @dev: network device
* @color_bitmap: representations of the colors that the local BSS is aware of
+ * @gfp: allocation flags
*/
static inline int cfg80211_obss_color_collision_notify(struct net_device *dev,
- u64 color_bitmap)
+ u64 color_bitmap, gfp_t gfp)
{
- return cfg80211_bss_color_notify(dev, GFP_KERNEL,
+ return cfg80211_bss_color_notify(dev, gfp,
NL80211_CMD_OBSS_COLOR_COLLISION,
0, color_bitmap);
}
FLOW_ACTION_PIPE,
FLOW_ACTION_VLAN_PUSH_ETH,
FLOW_ACTION_VLAN_POP_ETH,
+ FLOW_ACTION_CONTINUE,
NUM_FLOW_ACTIONS,
};
enum tc_setup_type type, void *data,
struct flow_block_offload *bo,
void (*cleanup)(struct flow_block_cb *block_cb));
+bool flow_indr_dev_exists(void);
#endif /* _NET_FLOW_OFFLOAD_H */
#undef INET_CSK_CLEAR_TIMERS
struct inet_bind_bucket;
-struct inet_bind2_bucket;
struct tcp_congestion_ops;
/*
*
* @icsk_accept_queue: FIFO of established children
* @icsk_bind_hash: Bind node
- * @icsk_bind2_hash: Bind node in the bhash2 table
* @icsk_timeout: Timeout
* @icsk_retransmit_timer: Resend (no ack)
* @icsk_rto: Retransmit timeout
struct inet_sock icsk_inet;
struct request_sock_queue icsk_accept_queue;
struct inet_bind_bucket *icsk_bind_hash;
- struct inet_bind2_bucket *icsk_bind2_hash;
unsigned long icsk_timeout;
struct timer_list icsk_retransmit_timer;
struct timer_list icsk_delack_timer;
struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu);
-#define TCP_PINGPONG_THRESH 3
+#define TCP_PINGPONG_THRESH 1
static inline void inet_csk_enter_pingpong_mode(struct sock *sk)
{
return inet_csk(sk)->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
}
-static inline void inet_csk_inc_pingpong_cnt(struct sock *sk)
-{
- struct inet_connection_sock *icsk = inet_csk(sk);
-
- if (icsk->icsk_ack.pingpong < U8_MAX)
- icsk->icsk_ack.pingpong++;
-}
-
static inline bool inet_csk_has_ulp(struct sock *sk)
{
return inet_sk(sk)->is_icsk && !!inet_csk(sk)->icsk_ulp_ops;
struct hlist_head owners;
};
-struct inet_bind2_bucket {
- possible_net_t ib_net;
- int l3mdev;
- unsigned short port;
- union {
-#if IS_ENABLED(CONFIG_IPV6)
- struct in6_addr v6_rcv_saddr;
-#endif
- __be32 rcv_saddr;
- };
- /* Node in the inet2_bind_hashbucket chain */
- struct hlist_node node;
- /* List of sockets hashed to this bucket */
- struct hlist_head owners;
-};
-
static inline struct net *ib_net(struct inet_bind_bucket *ib)
{
return read_pnet(&ib->ib_net);
}
-static inline struct net *ib2_net(struct inet_bind2_bucket *ib)
-{
- return read_pnet(&ib->ib_net);
-}
-
#define inet_bind_bucket_for_each(tb, head) \
hlist_for_each_entry(tb, head, node)
struct hlist_head chain;
};
-/* This is synchronized using the inet_bind_hashbucket's spinlock.
- * Instead of having separate spinlocks, the inet_bind2_hashbucket can share
- * the inet_bind_hashbucket's given that in every case where the bhash2 table
- * is useful, a lookup in the bhash table also occurs.
- */
-struct inet_bind2_hashbucket {
- struct hlist_head chain;
-};
-
/* Sockets can be hashed in established or listening table.
* We must use different 'nulls' end-of-chain value for all hash buckets :
* A socket might transition from ESTABLISH to LISTEN state without
*/
struct kmem_cache *bind_bucket_cachep;
struct inet_bind_hashbucket *bhash;
- /* The 2nd binding table hashed by port and address.
- * This is used primarily for expediting the resolution of bind
- * conflicts.
- */
- struct kmem_cache *bind2_bucket_cachep;
- struct inet_bind2_hashbucket *bhash2;
unsigned int bhash_size;
/* The 2nd listener table hashed by local port and address */
int dif, int sdif)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
- return inet_bound_dev_eq(!!net->ipv4.sysctl_tcp_l3mdev_accept,
+ return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept),
bound_dev_if, dif, sdif);
#else
return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
void inet_bind_bucket_destroy(struct kmem_cache *cachep,
struct inet_bind_bucket *tb);
-static inline bool check_bind_bucket_match(struct inet_bind_bucket *tb,
- struct net *net,
- const unsigned short port,
- int l3mdev)
-{
- return net_eq(ib_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev;
-}
-
-struct inet_bind2_bucket *
-inet_bind2_bucket_create(struct kmem_cache *cachep, struct net *net,
- struct inet_bind2_hashbucket *head,
- const unsigned short port, int l3mdev,
- const struct sock *sk);
-
-void inet_bind2_bucket_destroy(struct kmem_cache *cachep,
- struct inet_bind2_bucket *tb);
-
-struct inet_bind2_bucket *
-inet_bind2_bucket_find(struct inet_hashinfo *hinfo, struct net *net,
- const unsigned short port, int l3mdev,
- struct sock *sk,
- struct inet_bind2_hashbucket **head);
-
-bool check_bind2_bucket_match_nulladdr(struct inet_bind2_bucket *tb,
- struct net *net,
- const unsigned short port,
- int l3mdev,
- const struct sock *sk);
-
static inline u32 inet_bhashfn(const struct net *net, const __u16 lport,
const u32 bhash_size)
{
}
void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
- struct inet_bind2_bucket *tb2, const unsigned short snum);
+ const unsigned short snum);
/* Caller must disable local BH processing. */
int __inet_inherit_port(const struct sock *sk, struct sock *child);
static inline u32 inet_request_mark(const struct sock *sk, struct sk_buff *skb)
{
- if (!sk->sk_mark && sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept)
+ if (!sk->sk_mark &&
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fwmark_accept))
return skb->mark;
return sk->sk_mark;
#ifdef CONFIG_NET_L3_MASTER_DEV
struct net *net = sock_net(sk);
- if (!bound_dev_if && net->ipv4.sysctl_tcp_l3mdev_accept)
+ if (!bound_dev_if && READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept))
return l3mdev_master_ifindex_by_index(net, skb->skb_iif);
#endif
#ifdef CONFIG_NET_L3_MASTER_DEV
struct net *net = sock_net(sk);
- if (!net->ipv4.sysctl_tcp_l3mdev_accept)
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_l3mdev_accept))
return l3mdev_master_ifindex_by_index(net,
sk->sk_bound_dev_if);
#endif
#define IP_CMSG_CHECKSUM BIT(7)
#define IP_CMSG_RECVFRAGSIZE BIT(8)
+static inline bool sk_is_inet(struct sock *sk)
+{
+ return sk->sk_family == AF_INET || sk->sk_family == AF_INET6;
+}
+
/**
* sk_to_full_sk - Access to a full socket
* @sk: pointer to a socket
static inline bool inet_can_nonlocal_bind(struct net *net,
struct inet_sock *inet)
{
- return net->ipv4.sysctl_ip_nonlocal_bind ||
+ return READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind) ||
inet->freebind || inet->transparent;
}
static inline bool inet_port_requires_bind_service(struct net *net, unsigned short port)
{
- return port < net->ipv4.sysctl_ip_prot_sock;
+ return port < READ_ONCE(net->ipv4.sysctl_ip_prot_sock);
}
#else
void ip_static_sysctl_init(void);
#define IP4_REPLY_MARK(net, mark) \
- ((net)->ipv4.sysctl_fwmark_reflect ? (mark) : 0)
+ (READ_ONCE((net)->ipv4.sysctl_fwmark_reflect) ? (mark) : 0)
static inline bool ip_is_fragment(const struct iphdr *iph)
{
struct net *net = dev_net(dst->dev);
unsigned int mtu;
- if (net->ipv4.sysctl_ip_fwd_use_pmtu ||
+ if (READ_ONCE(net->ipv4.sysctl_ip_fwd_use_pmtu) ||
ip_mtu_locked(dst) ||
!forwarding) {
mtu = rt->rt_pmtu;
int ip6_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset, int len,
int odd, struct sk_buff *skb),
- void *from, int length, int transhdrlen,
+ void *from, size_t length, int transhdrlen,
struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
struct rt6_info *rt, unsigned int flags);
struct sk_buff *ip6_make_skb(struct sock *sk,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
- void *from, int length, int transhdrlen,
+ void *from, size_t length, int transhdrlen,
struct ipcm6_cookie *ipc6,
struct rt6_info *rt, unsigned int flags,
struct inet_cork_full *cork);
* @vif: &struct ieee80211_vif pointer from the add_interface callback.
* @color_bitmap: a 64 bit bitmap representing the colors that the local BSS is
* aware of.
+ * @gfp: allocation flags
*/
void
ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
- u64 color_bitmap);
+ u64 color_bitmap, gfp_t gfp);
/**
* ieee80211_is_tx_data - check if frame is a data frame
tmpl->len = sizeof(struct nft_set_ext);
}
-static inline void nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id,
- unsigned int len)
+static inline int nft_set_ext_add_length(struct nft_set_ext_tmpl *tmpl, u8 id,
+ unsigned int len)
{
tmpl->len = ALIGN(tmpl->len, nft_set_ext_types[id].align);
- BUG_ON(tmpl->len > U8_MAX);
+ if (tmpl->len > U8_MAX)
+ return -EINVAL;
+
tmpl->offset[id] = tmpl->len;
tmpl->len += nft_set_ext_types[id].len + len;
+
+ return 0;
}
-static inline void nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id)
+static inline int nft_set_ext_add(struct nft_set_ext_tmpl *tmpl, u8 id)
{
- nft_set_ext_add_length(tmpl, id, 0);
+ return nft_set_ext_add_length(tmpl, id, 0);
}
static inline void nft_set_ext_init(struct nft_set_ext *ext,
struct nft_hook {
struct list_head list;
- bool inactive;
struct nf_hook_ops ops;
struct rcu_head rcu;
};
/**
* struct nft_traceinfo - nft tracing information and state
*
+ * @trace: other struct members are initialised
+ * @nf_trace: copy of skb->nf_trace before rule evaluation
+ * @type: event type (enum nft_trace_types)
+ * @skbid: hash of skb to be used as trace id
+ * @packet_dumped: packet headers sent in a previous traceinfo message
* @pkt: pktinfo currently processed
* @basechain: base chain currently processed
* @chain: chain currently processed
* @rule: rule that was evaluated
* @verdict: verdict given by rule
- * @type: event type (enum nft_trace_types)
- * @packet_dumped: packet headers sent in a previous traceinfo message
- * @trace: other struct members are initialised
*/
struct nft_traceinfo {
+ bool trace;
+ bool nf_trace;
+ bool packet_dumped;
+ enum nft_trace_types type:8;
+ u32 skbid;
const struct nft_pktinfo *pkt;
const struct nft_base_chain *basechain;
const struct nft_chain *chain;
const struct nft_rule_dp *rule;
const struct nft_verdict *verdict;
- enum nft_trace_types type;
- bool packet_dumped;
- bool trace;
};
void nft_trace_init(struct nft_traceinfo *info, const struct nft_pktinfo *pkt,
NFT_OFFLOAD_MATCH(__key, __base, __field, __len, __reg) \
memset(&(__reg)->mask, 0xff, (__reg)->len);
-int nft_chain_offload_priority(struct nft_base_chain *basechain);
+bool nft_chain_offload_support(const struct nft_base_chain *basechain);
int nft_offload_init(void);
void nft_offload_exit(void);
/* This is used to register protocols. */
struct net_protocol {
- int (*early_demux)(struct sk_buff *skb);
- int (*early_demux_handler)(struct sk_buff *skb);
int (*handler)(struct sk_buff *skb);
/* This returns an error if we weren't able to handle the error. */
#if IS_ENABLED(CONFIG_IPV6)
struct inet6_protocol {
- void (*early_demux)(struct sk_buff *skb);
- void (*early_demux_handler)(struct sk_buff *skb);
int (*handler)(struct sk_buff *skb);
/* This returns an error if we weren't able to handle the error. */
int dif, int sdif)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
- return inet_bound_dev_eq(!!net->ipv4.sysctl_raw_l3mdev_accept,
+ return inet_bound_dev_eq(READ_ONCE(net->ipv4.sysctl_raw_l3mdev_accept),
bound_dev_if, dif, sdif);
#else
return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
struct net *net = dev_net(dst->dev);
if (hoplimit == 0)
- hoplimit = net->ipv4.sysctl_ip_default_ttl;
+ hoplimit = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
return hoplimit;
}
* @sk_txtime_report_errors: set report errors mode for SO_TXTIME
* @sk_txtime_unused: unused txtime flags
* @ns_tracker: tracker for netns reference
- * @sk_bind2_node: bind node in the bhash2 table
*/
struct sock {
/*
#endif
struct rcu_head sk_rcu;
netns_tracker ns_tracker;
- struct hlist_node sk_bind2_node;
};
enum sk_pacing {
hlist_add_head(&sk->sk_bind_node, list);
}
-static inline void __sk_del_bind2_node(struct sock *sk)
-{
- __hlist_del(&sk->sk_bind2_node);
-}
-
-static inline void sk_add_bind2_node(struct sock *sk, struct hlist_head *list)
-{
- hlist_add_head(&sk->sk_bind2_node, list);
-}
-
#define sk_for_each(__sk, list) \
hlist_for_each_entry(__sk, list, sk_node)
#define sk_for_each_rcu(__sk, list) \
hlist_for_each_entry_safe(__sk, tmp, list, sk_node)
#define sk_for_each_bound(__sk, list) \
hlist_for_each_entry(__sk, list, sk_bind_node)
-#define sk_for_each_bound_bhash2(__sk, list) \
- hlist_for_each_entry(__sk, list, sk_bind2_node)
/**
* sk_for_each_entry_offset_rcu - iterate over a list at a given struct offset
/* sysctl_mem values are in pages, we convert them in SK_MEM_QUANTUM units */
static inline long sk_prot_mem_limits(const struct sock *sk, int index)
{
- long val = sk->sk_prot->sysctl_mem[index];
+ long val = READ_ONCE(sk->sk_prot->sysctl_mem[index]);
#if PAGE_SIZE > SK_MEM_QUANTUM
val <<= PAGE_SHIFT - SK_MEM_QUANTUM_SHIFT;
{
/* Does this proto have per netns sysctl_wmem ? */
if (proto->sysctl_wmem_offset)
- return *(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset);
+ return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_wmem_offset));
- return *proto->sysctl_wmem;
+ return READ_ONCE(*proto->sysctl_wmem);
}
static inline int sk_get_rmem0(const struct sock *sk, const struct proto *proto)
{
/* Does this proto have per netns sysctl_rmem ? */
if (proto->sysctl_rmem_offset)
- return *(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset);
+ return READ_ONCE(*(int *)((void *)sock_net(sk) + proto->sysctl_rmem_offset));
- return *proto->sysctl_rmem;
+ return READ_ONCE(*proto->sysctl_rmem);
}
/* Default TCP Small queue budget is ~1 ms of data (1sec >> 10)
INDIRECT_CALLABLE_DECLARE(void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb));
INDIRECT_CALLABLE_DECLARE(int tcp_v6_rcv(struct sk_buff *skb));
-INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *skb));
+void tcp_v6_early_demux(struct sk_buff *skb);
#endif
struct tcp_sock *tp = tcp_sk(sk);
s32 delta;
- if (!sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle || tp->packets_out ||
- ca_ops->cong_control)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) ||
+ tp->packets_out || ca_ops->cong_control)
return;
delta = tcp_jiffies32 - tp->lsndtime;
if (delta > inet_csk(sk)->icsk_rto)
static inline int tcp_win_from_space(const struct sock *sk, int space)
{
- int tcp_adv_win_scale = sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale;
+ int tcp_adv_win_scale = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_adv_win_scale);
return tcp_adv_win_scale <= 0 ?
(space>>(-tcp_adv_win_scale)) :
{
struct net *net = sock_net((struct sock *)tp);
- return tp->keepalive_intvl ? : net->ipv4.sysctl_tcp_keepalive_intvl;
+ return tp->keepalive_intvl ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl);
}
static inline int keepalive_time_when(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->keepalive_time ? : net->ipv4.sysctl_tcp_keepalive_time;
+ return tp->keepalive_time ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
}
static inline int keepalive_probes(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->keepalive_probes ? : net->ipv4.sysctl_tcp_keepalive_probes;
+ return tp->keepalive_probes ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes);
}
static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
static inline int tcp_fin_time(const struct sock *sk)
{
- int fin_timeout = tcp_sk(sk)->linger2 ? : sock_net(sk)->ipv4.sysctl_tcp_fin_timeout;
+ int fin_timeout = tcp_sk(sk)->linger2 ? :
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fin_timeout);
const int rto = inet_csk(sk)->icsk_rto;
if (fin_timeout < (rto << 2) - (rto >> 1))
static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
{
struct net *net = sock_net((struct sock *)tp);
- return tp->notsent_lowat ?: net->ipv4.sysctl_tcp_notsent_lowat;
+ return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
}
bool tcp_stream_memory_free(const struct sock *sk, int wake);
struct tls_crypto_info *crypto_info);
#ifdef CONFIG_TLS_DEVICE
-void tls_device_init(void);
+int tls_device_init(void);
void tls_device_cleanup(void);
void tls_device_sk_destruct(struct sock *sk);
int tls_set_device_offload(struct sock *sk, struct tls_context *ctx);
return tls_get_ctx(sk)->rx_conf == TLS_HW;
}
#else
-static inline void tls_device_init(void) {}
+static inline int tls_device_init(void) { return 0; }
static inline void tls_device_cleanup(void) {}
static inline int
typedef struct sock *(*udp_lookup_t)(const struct sk_buff *skb, __be16 sport,
__be16 dport);
-INDIRECT_CALLABLE_DECLARE(void udp_v6_early_demux(struct sk_buff *));
+void udp_v6_early_demux(struct sk_buff *skb);
INDIRECT_CALLABLE_DECLARE(int udpv6_rcv(struct sk_buff *));
struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb,
int dif, int sdif)
{
#if IS_ENABLED(CONFIG_NET_L3_MASTER_DEV)
- return inet_bound_dev_eq(!!net->ipv4.sysctl_udp_l3mdev_accept,
+ return inet_bound_dev_eq(!!READ_ONCE(net->ipv4.sysctl_udp_l3mdev_accept),
bound_dev_if, dif, sdif);
#else
return inet_bound_dev_eq(true, bound_dev_if, dif, sdif);
struct snd_soc_jack_gpio;
-typedef int (*hw_write_t)(void *,const char* ,int);
-
enum snd_soc_pcm_subclass {
SND_SOC_PCM_CLASS_PCM = 0,
SND_SOC_PCM_CLASS_BE = 1,
__field( unsigned int, flags )
__field( struct io_wq_work *, work )
__field( int, rw )
+
+ __string( op_str, io_uring_get_opcode(opcode) )
),
TP_fast_assign(
__entry->opcode = opcode;
__entry->work = work;
__entry->rw = rw;
+
+ __assign_str(op_str, io_uring_get_opcode(opcode));
),
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, flags 0x%x, %s queue, work %p",
__entry->ctx, __entry->req, __entry->user_data,
- io_uring_get_opcode(__entry->opcode),
+ __get_str(op_str),
__entry->flags, __entry->rw ? "hashed" : "normal", __entry->work)
);
__field( void *, req )
__field( unsigned long long, data )
__field( u8, opcode )
+
+ __string( op_str, io_uring_get_opcode(opcode) )
),
TP_fast_assign(
__entry->req = req;
__entry->data = user_data;
__entry->opcode = opcode;
+
+ __assign_str(op_str, io_uring_get_opcode(opcode));
),
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s",
__entry->ctx, __entry->req, __entry->data,
- io_uring_get_opcode(__entry->opcode))
+ __get_str(op_str))
);
/**
__field( unsigned long long, user_data )
__field( u8, opcode )
__field( void *, link )
+
+ __string( op_str, io_uring_get_opcode(opcode) )
),
TP_fast_assign(
__entry->user_data = user_data;
__entry->opcode = opcode;
__entry->link = link;
+
+ __assign_str(op_str, io_uring_get_opcode(opcode));
),
TP_printk("ring %p, request %p, user_data 0x%llx, opcode %s, link %p",
__entry->ctx, __entry->req, __entry->user_data,
- io_uring_get_opcode(__entry->opcode), __entry->link)
+ __get_str(op_str), __entry->link)
);
/**
__field( u32, flags )
__field( bool, force_nonblock )
__field( bool, sq_thread )
+
+ __string( op_str, io_uring_get_opcode(opcode) )
),
TP_fast_assign(
__entry->flags = flags;
__entry->force_nonblock = force_nonblock;
__entry->sq_thread = sq_thread;
+
+ __assign_str(op_str, io_uring_get_opcode(opcode));
),
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, flags 0x%x, "
"non block %d, sq_thread %d", __entry->ctx, __entry->req,
- __entry->user_data, io_uring_get_opcode(__entry->opcode),
+ __entry->user_data, __get_str(op_str),
__entry->flags, __entry->force_nonblock, __entry->sq_thread)
);
__field( u8, opcode )
__field( int, mask )
__field( int, events )
+
+ __string( op_str, io_uring_get_opcode(opcode) )
),
TP_fast_assign(
__entry->opcode = opcode;
__entry->mask = mask;
__entry->events = events;
+
+ __assign_str(op_str, io_uring_get_opcode(opcode));
),
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask 0x%x, events 0x%x",
__entry->ctx, __entry->req, __entry->user_data,
- io_uring_get_opcode(__entry->opcode),
+ __get_str(op_str),
__entry->mask, __entry->events)
);
__field( unsigned long long, user_data )
__field( u8, opcode )
__field( int, mask )
+
+ __string( op_str, io_uring_get_opcode(opcode) )
),
TP_fast_assign(
__entry->user_data = user_data;
__entry->opcode = opcode;
__entry->mask = mask;
+
+ __assign_str(op_str, io_uring_get_opcode(opcode));
),
TP_printk("ring %p, req %p, user_data 0x%llx, opcode %s, mask %x",
__entry->ctx, __entry->req, __entry->user_data,
- io_uring_get_opcode(__entry->opcode),
+ __get_str(op_str),
__entry->mask)
);
__field( u64, pad1 )
__field( u64, addr3 )
__field( int, error )
+
+ __string( op_str, io_uring_get_opcode(sqe->opcode) )
),
TP_fast_assign(
__entry->pad1 = sqe->__pad2[0];
__entry->addr3 = sqe->addr3;
__entry->error = error;
+
+ __assign_str(op_str, io_uring_get_opcode(sqe->opcode));
),
TP_printk("ring %p, req %p, user_data 0x%llx, "
"personality=%d, file_index=%d, pad=0x%llx, addr3=%llx, "
"error=%d",
__entry->ctx, __entry->req, __entry->user_data,
- io_uring_get_opcode(__entry->opcode),
+ __get_str(op_str),
__entry->flags, __entry->ioprio,
(unsigned long long)__entry->off,
(unsigned long long) __entry->addr, __entry->len,
TP_fast_assign(
__assign_str(devname, ioc_name(ioc));
- __entry->old_vrate = atomic64_read(&ioc->vtime_rate);;
+ __entry->old_vrate = atomic64_read(&ioc->vtime_rate);
__entry->new_vrate = new_vrate;
__entry->busy_level = ioc->busy_level;
__entry->read_missed_ppm = missed_ppm[READ];
__entry->hob_feature = qc->result_tf.hob_feature;
__entry->nsect = qc->result_tf.nsect;
__entry->hob_nsect = qc->result_tf.hob_nsect;
+ __entry->flags = qc->flags;
),
TP_printk("ata_port=%u ata_dev=%u tag=%d flags=%s status=%s " \
TP_STRUCT__entry(
__array(char, name, 32)
- __field(long *, sysctl_mem)
+ __array(long, sysctl_mem, 3)
__field(long, allocated)
__field(int, sysctl_rmem)
__field(int, rmem_alloc)
TP_fast_assign(
strncpy(__entry->name, prot->name, 32);
- __entry->sysctl_mem = prot->sysctl_mem;
+ __entry->sysctl_mem[0] = READ_ONCE(prot->sysctl_mem[0]);
+ __entry->sysctl_mem[1] = READ_ONCE(prot->sysctl_mem[1]);
+ __entry->sysctl_mem[2] = READ_ONCE(prot->sysctl_mem[2]);
__entry->allocated = allocated;
__entry->sysctl_rmem = sk_get_rmem0(sk, prot);
__entry->rmem_alloc = atomic_read(&sk->sk_rmem_alloc);
*/
TRACE_EVENT(workqueue_queue_work,
- TP_PROTO(unsigned int req_cpu, struct pool_workqueue *pwq,
+ TP_PROTO(int req_cpu, struct pool_workqueue *pwq,
struct work_struct *work),
TP_ARGS(req_cpu, pwq, work),
__field( void *, work )
__field( void *, function)
__string( workqueue, pwq->wq->name)
- __field( unsigned int, req_cpu )
- __field( unsigned int, cpu )
+ __field( int, req_cpu )
+ __field( int, cpu )
),
TP_fast_assign(
__entry->cpu = pwq->pool->cpu;
),
- TP_printk("work struct=%p function=%ps workqueue=%s req_cpu=%u cpu=%u",
+ TP_printk("work struct=%p function=%ps workqueue=%s req_cpu=%d cpu=%d",
__entry->work, __entry->function, __get_str(workqueue),
__entry->req_cpu, __entry->cpu)
);
#define F_LINUX_SPECIFIC_BASE 1024
+#ifndef HAVE_ARCH_STRUCT_FLOCK
struct flock {
short l_type;
short l_whence;
__ARCH_FLOCK64_PAD
#endif
};
+#endif /* HAVE_ARCH_STRUCT_FLOCK */
#endif /* _ASM_GENERIC_FCNTL_H */
#define AMD_FMT_MOD_PIPE_MASK 0x7
#define AMD_FMT_MOD_SET(field, value) \
- ((uint64_t)(value) << AMD_FMT_MOD_##field##_SHIFT)
+ ((__u64)(value) << AMD_FMT_MOD_##field##_SHIFT)
#define AMD_FMT_MOD_GET(field, value) \
(((value) >> AMD_FMT_MOD_##field##_SHIFT) & AMD_FMT_MOD_##field##_MASK)
#define AMD_FMT_MOD_CLEAR(field) \
- (~((uint64_t)AMD_FMT_MOD_##field##_MASK << AMD_FMT_MOD_##field##_SHIFT))
+ (~((__u64)AMD_FMT_MOD_##field##_MASK << AMD_FMT_MOD_##field##_SHIFT))
#if defined(__cplusplus)
}
* Return
* Nothing. Always succeeds.
*
- * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset)
+ * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset, u64 flags)
* Description
* Read *len* bytes from *src* into *dst*, starting from *offset*
* into *src*.
+ * *flags* is currently unused.
* Return
* 0 on success, -E2BIG if *offset* + *len* exceeds the length
- * of *src*'s data, -EINVAL if *src* is an invalid dynptr.
+ * of *src*'s data, -EINVAL if *src* is an invalid dynptr or if
+ * *flags* is not 0.
*
- * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len)
+ * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
* Description
* Write *len* bytes from *src* into *dst*, starting from *offset*
* into *dst*.
+ * *flags* is currently unused.
* Return
* 0 on success, -E2BIG if *offset* + *len* exceeds the length
* of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
- * is a read-only dynptr.
+ * is a read-only dynptr or if *flags* is not 0.
*
* void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len)
* Description
* Note that input core does not clamp reported values to the
* [minimum, maximum] limits, such task is left to userspace.
*
- * The default resolution for main axes (ABS_X, ABS_Y, ABS_Z)
- * is reported in units per millimeter (units/mm), resolution
- * for rotational axes (ABS_RX, ABS_RY, ABS_RZ) is reported
- * in units per radian.
+ * The default resolution for main axes (ABS_X, ABS_Y, ABS_Z,
+ * ABS_MT_POSITION_X, ABS_MT_POSITION_Y) is reported in units
+ * per millimeter (units/mm), resolution for rotational axes
+ * (ABS_RX, ABS_RY, ABS_RZ) is reported in units per radian.
+ * The resolution for the size axes (ABS_MT_TOUCH_MAJOR,
+ * ABS_MT_TOUCH_MINOR, ABS_MT_WIDTH_MAJOR, ABS_MT_WIDTH_MINOR)
+ * is reported in units per millimeter (units/mm).
* When INPUT_PROP_ACCELEROMETER is set the resolution changes.
* The main axes (ABS_X, ABS_Y, ABS_Z) are then reported in
* units per g (units/g) and in units per degree per second
union {
__u64 off; /* offset into file */
__u64 addr2;
- __u32 cmd_op;
+ struct {
+ __u32 cmd_op;
+ __u32 __pad1;
+ };
};
union {
__u64 addr; /* pointer to buffer or iovecs */
__u32 unlink_flags;
__u32 hardlink_flags;
__u32 xattr_flags;
- __u32 close_flags;
};
__u64 user_data; /* data to be passed back at completion time */
/* pack this to avoid bogus arm OABI complaints */
#define IORING_ASYNC_CANCEL_ANY (1U << 2)
/*
- * send/sendmsg and recv/recvmsg flags (sqe->addr2)
+ * send/sendmsg and recv/recvmsg flags (sqe->ioprio)
*
* IORING_RECVSEND_POLL_FIRST If set, instead of first attempting to send
* or receive and arm poll if that yields an
*/
#define IORING_ACCEPT_MULTISHOT (1U << 0)
-/*
- * close flags, store in sqe->close_flags
- */
-#define IORING_CLOSE_FD_AND_FILE_SLOT (1U << 0)
-
/*
* IO completion data structure (Completion Queue Entry)
*/
#define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT)
-#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES
+#define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_BOOLEAN
#define KVM_STATS_BASE_SHIFT 8
#define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT)
#ifndef _UAPI_MPTCP_H
#define _UAPI_MPTCP_H
+#ifndef __KERNEL__
+#include <netinet/in.h> /* for sockaddr_in and sockaddr_in6 */
+#include <sys/socket.h> /* for struct sockaddr */
+#endif
+
#include <linux/const.h>
#include <linux/types.h>
#include <linux/in.h> /* for sockaddr_in */
#include <linux/in6.h> /* for sockaddr_in6 */
#include <linux/socket.h> /* for sockaddr_storage and sa_family */
-#ifndef __KERNEL__
-#include <sys/socket.h> /* for struct sockaddr */
-#endif
-
#define MPTCP_SUBFLOW_FLAG_MCAP_REM _BITUL(0)
#define MPTCP_SUBFLOW_FLAG_MCAP_LOC _BITUL(1)
#define MPTCP_SUBFLOW_FLAG_JOIN_REM _BITUL(2)
/* TLS socket options */
#define TLS_TX 1 /* Set transmit parameters */
#define TLS_RX 2 /* Set receive parameters */
-#define TLS_TX_ZEROCOPY_SENDFILE 3 /* transmit zerocopy sendfile */
+#define TLS_TX_ZEROCOPY_RO 3 /* TX zerocopy (only sendfile now) */
/* Supported versions */
#define TLS_VERSION_MINOR(ver) ((ver) & 0xFF)
TLS_INFO_CIPHER,
TLS_INFO_TXCONF,
TLS_INFO_RXCONF,
- TLS_INFO_ZC_SENDFILE,
+ TLS_INFO_ZC_RO_TX,
__TLS_INFO_MAX,
};
#define TLS_INFO_MAX (__TLS_INFO_MAX - 1)
#define N_NULL 27 /* Null ldisc used for error handling */
#define N_MCTP 28 /* MCTP-over-serial */
#define N_DEVELOPMENT 29 /* Manual out-of-tree testing */
+#define N_CAN327 30 /* ELM327 based OBD-II interfaces */
/* Always the newest line discipline + 1 */
-#define NR_LDISCS 30
+#define NR_LDISCS 31
#endif /* _UAPI_LINUX_TTY_H */
#ifndef __LINUX_OF_DISPLAY_TIMING_H
#define __LINUX_OF_DISPLAY_TIMING_H
+#include <linux/errno.h>
+
struct device_node;
struct display_timing;
struct display_timings;
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_ARM_XEN_OPS_H
+#define _ASM_ARM_XEN_OPS_H
+
+#include <xen/swiotlb-xen.h>
+#include <xen/xen-ops.h>
+
+static inline void xen_setup_dma_ops(struct device *dev)
+{
+#ifdef CONFIG_XEN
+ if (xen_is_grant_dma_device(dev))
+ xen_grant_setup_dma_ops(dev);
+ else if (xen_swiotlb_detect())
+ dev->dma_ops = &xen_swiotlb_dma_ops;
+#endif
+}
+
+#endif /* _ASM_ARM_XEN_OPS_H */
*/
int gnttab_alloc_grant_references(u16 count, grant_ref_t *pprivate_head);
+int gnttab_alloc_grant_reference_seq(unsigned int count, grant_ref_t *first);
+
void gnttab_free_grant_reference(grant_ref_t ref);
void gnttab_free_grant_references(grant_ref_t head);
+void gnttab_free_grant_reference_seq(grant_ref_t head, unsigned int count);
+
int gnttab_empty_grant_references(const grant_ref_t *pprivate_head);
int gnttab_claim_grant_reference(grant_ref_t *pprivate_head);
#endif /* CONFIG_XEN_PV && !CONFIG_PREEMPTION */
+#ifdef CONFIG_XEN_GRANT_DMA_OPS
+void xen_grant_setup_dma_ops(struct device *dev);
+bool xen_is_grant_dma_device(struct device *dev);
+#else
+static inline void xen_grant_setup_dma_ops(struct device *dev)
+{
+}
+static inline bool xen_is_grant_dma_device(struct device *dev)
+{
+ return false;
+}
+#endif /* CONFIG_XEN_GRANT_DMA_OPS */
+
#endif /* INCLUDE_XEN_OPS_H */
extern u64 xen_saved_max_mem_size;
#endif
+#include <linux/platform-feature.h>
+
+static inline void xen_set_restricted_virtio_memory_access(void)
+{
+ if (IS_ENABLED(CONFIG_XEN_VIRTIO) && xen_domain())
+ platform_set(PLATFORM_VIRTIO_RESTRICTED_MEM_ACCESS);
+}
+
#ifdef CONFIG_XEN_UNPOPULATED_ALLOC
int xen_alloc_unpopulated_pages(unsigned int nr_pages, struct page **pages);
void xen_free_unpopulated_pages(unsigned int nr_pages, struct page **pages);
default "-Wimplicit-fallthrough=5" if CC_IS_GCC && $(cc-option,-Wimplicit-fallthrough=5)
default "-Wimplicit-fallthrough" if CC_IS_CLANG && $(cc-option,-Wunreachable-code-fallthrough)
+# Currently, disable gcc-12 array-bounds globally.
+# We may want to target only particular configurations some day.
+config GCC12_NO_ARRAY_BOUNDS
+ def_bool y
+
+config CC_NO_ARRAY_BOUNDS
+ bool
+ default y if CC_IS_GCC && GCC_VERSION >= 120000 && GCC_VERSION < 130000 && GCC12_NO_ARRAY_BOUNDS
+
#
# For architectures that know their GCC __int128 support is sound
#
goto fail_put;
if (!setup_ipc_sysctls(ns))
- goto fail_put;
+ goto fail_mq;
sem_init_ns(ns);
msg_init_ns(ns);
return ns;
+fail_mq:
+ retire_mq_sysctls(ns);
+
fail_put:
put_user_ns(ns->user_ns);
ns_free_inum(&ns->ns);
cpu.o exit.o softirq.o resource.o \
sysctl.o capability.o ptrace.o user.o \
signal.o sys.o umh.o workqueue.o pid.o task_work.o \
- extable.o params.o \
+ extable.o params.o platform-feature.o \
kthread.o sys_ni.o nsproxy.o \
notifier.o ksysfs.o cred.o reboot.o \
async.o range.o smpboot.o ucount.o regset.o
ctx->target_comm[0] = '\0';
unroll_tree_refs(ctx, NULL, 0);
WARN_ON(!list_empty(&ctx->killed_trees));
- ctx->type = 0;
audit_free_module(ctx);
ctx->fds[0] = -1;
audit_proctitle_free(ctx);
+ ctx->type = 0; /* reset last for audit_free_*() */
}
static inline struct audit_context *audit_alloc_context(enum audit_state state)
n = btf_nr_types(btf);
for (i = start_id; i < n; i++) {
const struct btf_type *t;
+ int chain_limit = 32;
u32 cur_id = i;
t = btf_type_by_id(btf, i);
in_tags = btf_type_is_type_tag(t);
while (btf_type_is_modifier(t)) {
+ if (!chain_limit--) {
+ btf_verifier_log(env, "Max chain length or cycle detected");
+ return -ELOOP;
+ }
if (btf_type_is_type_tag(t)) {
if (!in_tags) {
btf_verifier_log(env, "Type tags don't precede modifiers");
struct bpf_reg_state *regs,
bool ptr_to_mem_ok)
{
+ enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
struct bpf_verifier_log *log = &env->log;
u32 i, nargs, ref_id, ref_obj_id = 0;
bool is_kfunc = btf_is_kernel(btf);
return -EINVAL;
}
/* rest of the arguments can be anything, like normal kfunc */
- } else if (btf_get_prog_ctx_type(log, btf, t, env->prog->type, i)) {
+ } else if (btf_get_prog_ctx_type(log, btf, t, prog_type, i)) {
/* If function expects ctx type in BTF check that caller
* is passing PTR_TO_CTX.
*/
{
u8 *ptr = NULL;
- if (k >= SKF_NET_OFF)
+ if (k >= SKF_NET_OFF) {
ptr = skb_network_header(skb) + k - SKF_NET_OFF;
- else if (k >= SKF_LL_OFF)
+ } else if (k >= SKF_LL_OFF) {
+ if (unlikely(!skb_mac_header_was_set(skb)))
+ return NULL;
ptr = skb_mac_header(skb) + k - SKF_LL_OFF;
-
+ }
if (ptr >= skb->head && ptr + size <= skb_tail_pointer(skb))
return ptr;
.arg4_type = ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL | MEM_UNINIT,
};
-BPF_CALL_4(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src, u32, offset)
+BPF_CALL_5(bpf_dynptr_read, void *, dst, u32, len, struct bpf_dynptr_kern *, src,
+ u32, offset, u64, flags)
{
int err;
- if (!src->data)
+ if (!src->data || flags)
return -EINVAL;
err = bpf_dynptr_check_off_len(src, offset, len);
.arg2_type = ARG_CONST_SIZE_OR_ZERO,
.arg3_type = ARG_PTR_TO_DYNPTR,
.arg4_type = ARG_ANYTHING,
+ .arg5_type = ARG_ANYTHING,
};
-BPF_CALL_4(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, src, u32, len)
+BPF_CALL_5(bpf_dynptr_write, struct bpf_dynptr_kern *, dst, u32, offset, void *, src,
+ u32, len, u64, flags)
{
int err;
- if (!dst->data || bpf_dynptr_is_rdonly(dst))
+ if (!dst->data || flags || bpf_dynptr_is_rdonly(dst))
return -EINVAL;
err = bpf_dynptr_check_off_len(dst, offset, len);
.arg2_type = ARG_ANYTHING,
.arg3_type = ARG_PTR_TO_MEM | MEM_RDONLY,
.arg4_type = ARG_CONST_SIZE_OR_ZERO,
+ .arg5_type = ARG_ANYTHING,
};
BPF_CALL_3(bpf_dynptr_data, struct bpf_dynptr_kern *, ptr, u32, offset, u32, len)
reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
}
+static void reg_bounds_sync(struct bpf_reg_state *reg)
+{
+ /* We might have learned new bounds from the var_off. */
+ __update_reg_bounds(reg);
+ /* We might have learned something about the sign bit. */
+ __reg_deduce_bounds(reg);
+ /* We might have learned some bits from the bounds. */
+ __reg_bound_offset(reg);
+ /* Intersecting with the old var_off might have improved our bounds
+ * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
+ * then new var_off is (0; 0x7f...fc) which improves our umax.
+ */
+ __update_reg_bounds(reg);
+}
+
static bool __reg32_bound_s64(s32 a)
{
return a >= 0 && a <= S32_MAX;
* so they do not impact tnum bounds calculation.
*/
__mark_reg64_unbounded(reg);
- __update_reg_bounds(reg);
}
-
- /* Intersecting with the old var_off might have improved our bounds
- * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
- * then new var_off is (0; 0x7f...fc) which improves our umax.
- */
- __reg_deduce_bounds(reg);
- __reg_bound_offset(reg);
- __update_reg_bounds(reg);
+ reg_bounds_sync(reg);
}
static bool __reg64_bound_s32(s64 a)
static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
{
__mark_reg32_unbounded(reg);
-
if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) {
reg->s32_min_value = (s32)reg->smin_value;
reg->s32_max_value = (s32)reg->smax_value;
reg->u32_min_value = (u32)reg->umin_value;
reg->u32_max_value = (u32)reg->umax_value;
}
-
- /* Intersecting with the old var_off might have improved our bounds
- * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
- * then new var_off is (0; 0x7f...fc) which improves our umax.
- */
- __reg_deduce_bounds(reg);
- __reg_bound_offset(reg);
- __update_reg_bounds(reg);
+ reg_bounds_sync(reg);
}
/* Mark a register as having a completely unknown (scalar) value. */
ret_reg->s32_max_value = meta->msize_max_value;
ret_reg->smin_value = -MAX_ERRNO;
ret_reg->s32_min_value = -MAX_ERRNO;
- __reg_deduce_bounds(ret_reg);
- __reg_bound_offset(ret_reg);
- __update_reg_bounds(ret_reg);
+ reg_bounds_sync(ret_reg);
}
static int
if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type))
return -EINVAL;
-
- __update_reg_bounds(dst_reg);
- __reg_deduce_bounds(dst_reg);
- __reg_bound_offset(dst_reg);
-
+ reg_bounds_sync(dst_reg);
if (sanitize_check_bounds(env, insn, dst_reg) < 0)
return -EACCES;
if (sanitize_needed(opcode)) {
/* ALU32 ops are zero extended into 64bit register */
if (alu32)
zext_32_to_64(dst_reg);
-
- __update_reg_bounds(dst_reg);
- __reg_deduce_bounds(dst_reg);
- __reg_bound_offset(dst_reg);
+ reg_bounds_sync(dst_reg);
return 0;
}
insn->dst_reg);
}
zext_32_to_64(dst_reg);
-
- __update_reg_bounds(dst_reg);
- __reg_deduce_bounds(dst_reg);
- __reg_bound_offset(dst_reg);
+ reg_bounds_sync(dst_reg);
}
} else {
/* case: R = imm
return;
switch (opcode) {
+ /* JEQ/JNE comparison doesn't change the register equivalence.
+ *
+ * r1 = r2;
+ * if (r1 == 42) goto label;
+ * ...
+ * label: // here both r1 and r2 are known to be 42.
+ *
+ * Hence when marking register as known preserve it's ID.
+ */
case BPF_JEQ:
+ if (is_jmp32) {
+ __mark_reg32_known(true_reg, val32);
+ true_32off = tnum_subreg(true_reg->var_off);
+ } else {
+ ___mark_reg_known(true_reg, val);
+ true_64off = true_reg->var_off;
+ }
+ break;
case BPF_JNE:
- {
- struct bpf_reg_state *reg =
- opcode == BPF_JEQ ? true_reg : false_reg;
-
- /* JEQ/JNE comparison doesn't change the register equivalence.
- * r1 = r2;
- * if (r1 == 42) goto label;
- * ...
- * label: // here both r1 and r2 are known to be 42.
- *
- * Hence when marking register as known preserve it's ID.
- */
- if (is_jmp32)
- __mark_reg32_known(reg, val32);
- else
- ___mark_reg_known(reg, val);
+ if (is_jmp32) {
+ __mark_reg32_known(false_reg, val32);
+ false_32off = tnum_subreg(false_reg->var_off);
+ } else {
+ ___mark_reg_known(false_reg, val);
+ false_64off = false_reg->var_off;
+ }
break;
- }
case BPF_JSET:
if (is_jmp32) {
false_32off = tnum_and(false_32off, tnum_const(~val32));
dst_reg->smax_value);
src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off,
dst_reg->var_off);
- /* We might have learned new bounds from the var_off. */
- __update_reg_bounds(src_reg);
- __update_reg_bounds(dst_reg);
- /* We might have learned something about the sign bit. */
- __reg_deduce_bounds(src_reg);
- __reg_deduce_bounds(dst_reg);
- /* We might have learned some bits from the bounds. */
- __reg_bound_offset(src_reg);
- __reg_bound_offset(dst_reg);
- /* Intersecting with the old var_off might have improved our bounds
- * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
- * then new var_off is (0; 0x7f...fc) which improves our umax.
- */
- __update_reg_bounds(src_reg);
- __update_reg_bounds(dst_reg);
+ reg_bounds_sync(src_reg);
+ reg_bounds_sync(dst_reg);
}
static void reg_combine_min_max(struct bpf_reg_state *true_src,
static inline cfi_check_fn find_check_fn(unsigned long ptr)
{
cfi_check_fn fn = NULL;
+ unsigned long flags;
+ bool rcu_idle;
if (is_kernel_text(ptr))
return __cfi_check;
* the shadow and __module_address use RCU, so we need to wake it
* up if necessary.
*/
- RCU_NONIDLE({
- if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW))
- fn = find_shadow_check_fn(ptr);
+ rcu_idle = !rcu_is_watching();
+ if (rcu_idle) {
+ local_irq_save(flags);
+ rcu_irq_enter();
+ }
+
+ if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW))
+ fn = find_shadow_check_fn(ptr);
+ if (!fn)
+ fn = find_module_check_fn(ptr);
- if (!fn)
- fn = find_module_check_fn(ptr);
- });
+ if (rcu_idle) {
+ rcu_irq_exit();
+ local_irq_restore(flags);
+ }
return fn;
}
.task_iters = LIST_HEAD_INIT(init_css_set.task_iters),
.threaded_csets = LIST_HEAD_INIT(init_css_set.threaded_csets),
.cgrp_links = LIST_HEAD_INIT(init_css_set.cgrp_links),
- .mg_preload_node = LIST_HEAD_INIT(init_css_set.mg_preload_node),
+ .mg_src_preload_node = LIST_HEAD_INIT(init_css_set.mg_src_preload_node),
+ .mg_dst_preload_node = LIST_HEAD_INIT(init_css_set.mg_dst_preload_node),
.mg_node = LIST_HEAD_INIT(init_css_set.mg_node),
/*
INIT_LIST_HEAD(&cset->threaded_csets);
INIT_HLIST_NODE(&cset->hlist);
INIT_LIST_HEAD(&cset->cgrp_links);
- INIT_LIST_HEAD(&cset->mg_preload_node);
+ INIT_LIST_HEAD(&cset->mg_src_preload_node);
+ INIT_LIST_HEAD(&cset->mg_dst_preload_node);
INIT_LIST_HEAD(&cset->mg_node);
/* Copy the set of subsystem state objects generated in
*/
void cgroup_migrate_finish(struct cgroup_mgctx *mgctx)
{
- LIST_HEAD(preloaded);
struct css_set *cset, *tmp_cset;
lockdep_assert_held(&cgroup_mutex);
spin_lock_irq(&css_set_lock);
- list_splice_tail_init(&mgctx->preloaded_src_csets, &preloaded);
- list_splice_tail_init(&mgctx->preloaded_dst_csets, &preloaded);
+ list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_src_csets,
+ mg_src_preload_node) {
+ cset->mg_src_cgrp = NULL;
+ cset->mg_dst_cgrp = NULL;
+ cset->mg_dst_cset = NULL;
+ list_del_init(&cset->mg_src_preload_node);
+ put_css_set_locked(cset);
+ }
- list_for_each_entry_safe(cset, tmp_cset, &preloaded, mg_preload_node) {
+ list_for_each_entry_safe(cset, tmp_cset, &mgctx->preloaded_dst_csets,
+ mg_dst_preload_node) {
cset->mg_src_cgrp = NULL;
cset->mg_dst_cgrp = NULL;
cset->mg_dst_cset = NULL;
- list_del_init(&cset->mg_preload_node);
+ list_del_init(&cset->mg_dst_preload_node);
put_css_set_locked(cset);
}
if (src_cset->dead)
return;
- if (!list_empty(&src_cset->mg_preload_node))
+ if (!list_empty(&src_cset->mg_src_preload_node))
return;
src_cgrp = cset_cgroup_from_root(src_cset, dst_cgrp->root);
src_cset->mg_src_cgrp = src_cgrp;
src_cset->mg_dst_cgrp = dst_cgrp;
get_css_set(src_cset);
- list_add_tail(&src_cset->mg_preload_node, &mgctx->preloaded_src_csets);
+ list_add_tail(&src_cset->mg_src_preload_node, &mgctx->preloaded_src_csets);
}
/**
/* look up the dst cset for each src cset and link it to src */
list_for_each_entry_safe(src_cset, tmp_cset, &mgctx->preloaded_src_csets,
- mg_preload_node) {
+ mg_src_preload_node) {
struct css_set *dst_cset;
struct cgroup_subsys *ss;
int ssid;
if (src_cset == dst_cset) {
src_cset->mg_src_cgrp = NULL;
src_cset->mg_dst_cgrp = NULL;
- list_del_init(&src_cset->mg_preload_node);
+ list_del_init(&src_cset->mg_src_preload_node);
put_css_set(src_cset);
put_css_set(dst_cset);
continue;
src_cset->mg_dst_cset = dst_cset;
- if (list_empty(&dst_cset->mg_preload_node))
- list_add_tail(&dst_cset->mg_preload_node,
+ if (list_empty(&dst_cset->mg_dst_preload_node))
+ list_add_tail(&dst_cset->mg_dst_preload_node,
&mgctx->preloaded_dst_csets);
else
put_css_set(dst_cset);
goto out_finish;
spin_lock_irq(&css_set_lock);
- list_for_each_entry(src_cset, &mgctx.preloaded_src_csets, mg_preload_node) {
+ list_for_each_entry(src_cset, &mgctx.preloaded_src_csets,
+ mg_src_preload_node) {
struct task_struct *task, *ntask;
/* all tasks in src_csets need to be migrated */
CONFIG_DEBUG_KMEMLEAK=y
CONFIG_DEBUG_PAGEALLOC=y
CONFIG_SLUB_DEBUG_ON=y
-CONFIG_KMEMCHECK=y
CONFIG_DEBUG_OBJECTS=y
CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT=1
CONFIG_GCOV_KERNEL=y
CONFIG_LOCKDEP=y
CONFIG_PROVE_LOCKING=y
CONFIG_SCHEDSTATS=y
-CONFIG_VMLINUX_VALIDATION=y
+CONFIG_NOINSTR_VALIDATION=y
CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
rc = active_cacheline_insert(entry);
if (rc == -ENOMEM) {
- pr_err("cacheline tracking ENOMEM, dma-debug disabled\n");
+ pr_err_once("cacheline tracking ENOMEM, dma-debug disabled\n");
global_disable = true;
} else if (rc == -EEXIST && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {
err_printk(entry->dev, entry,
} else {
if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED))
arch_dma_clear_uncached(cpu_addr, size);
- if (dma_set_encrypted(dev, cpu_addr, 1 << page_order))
+ if (dma_set_encrypted(dev, cpu_addr, size))
return;
}
struct page *page, dma_addr_t dma_addr,
enum dma_data_direction dir)
{
- unsigned int page_order = get_order(size);
void *vaddr = page_address(page);
/* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */
dma_free_from_pool(dev, vaddr, size))
return;
- if (dma_set_encrypted(dev, vaddr, 1 << page_order))
+ if (dma_set_encrypted(dev, vaddr, size))
return;
__dma_direct_free_pages(dev, page, size);
}
}
static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,
- unsigned long nslabs, bool late_alloc)
+ unsigned long nslabs, unsigned int flags, bool late_alloc)
{
void *vaddr = phys_to_virt(start);
unsigned long bytes = nslabs << IO_TLB_SHIFT, i;
mem->index = 0;
mem->late_alloc = late_alloc;
- if (swiotlb_force_bounce)
- mem->force_bounce = true;
+ mem->force_bounce = swiotlb_force_bounce || (flags & SWIOTLB_FORCE);
spin_lock_init(&mem->lock);
for (i = 0; i < mem->nslabs; i++) {
panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
__func__, alloc_size, PAGE_SIZE);
- swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, false);
- mem->force_bounce = flags & SWIOTLB_FORCE;
+ swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, flags, false);
if (flags & SWIOTLB_VERBOSE)
swiotlb_print_info();
set_memory_decrypted((unsigned long)vstart,
(nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT);
- swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, true);
+ swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, 0, true);
swiotlb_print_info();
return 0;
set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),
rmem->size >> PAGE_SHIFT);
- swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, false);
- mem->force_bounce = true;
+ swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, SWIOTLB_FORCE,
+ false);
mem->for_alloc = true;
rmem->priv = mem;
int ret;
if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) {
- clear_notify_signal();
- if (task_work_pending(current))
- task_work_run();
- }
-
- if (ti_work & _TIF_SIGPENDING) {
kvm_handle_signal_exit(vcpu);
return -EINTR;
}
if (!atomic_inc_not_zero(&event->rb->mmap_count)) {
/*
- * Raced against perf_mmap_close() through
- * perf_event_set_output(). Try again, hope for better
- * luck.
+ * Raced against perf_mmap_close(); remove the
+ * event and try again.
*/
+ ring_buffer_attach(event, NULL);
mutex_unlock(&event->mmap_mutex);
goto again;
}
goto out;
}
+static void mutex_lock_double(struct mutex *a, struct mutex *b)
+{
+ if (b < a)
+ swap(a, b);
+
+ mutex_lock(a);
+ mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
+}
+
static int
perf_event_set_output(struct perf_event *event, struct perf_event *output_event)
{
struct perf_buffer *rb = NULL;
int ret = -EINVAL;
- if (!output_event)
+ if (!output_event) {
+ mutex_lock(&event->mmap_mutex);
goto set;
+ }
/* don't allow circular references */
if (event == output_event)
event->pmu != output_event->pmu)
goto out;
+ /*
+ * Hold both mmap_mutex to serialize against perf_mmap_close(). Since
+ * output_event is already on rb->event_list, and the list iteration
+ * restarts after every removal, it is guaranteed this new event is
+ * observed *OR* if output_event is already removed, it's guaranteed we
+ * observe !rb->mmap_count.
+ */
+ mutex_lock_double(&event->mmap_mutex, &output_event->mmap_mutex);
set:
- mutex_lock(&event->mmap_mutex);
/* Can't redirect output if we've got an active mmap() */
if (atomic_read(&event->mmap_count))
goto unlock;
rb = ring_buffer_get(output_event);
if (!rb)
goto unlock;
+
+ /* did we race against perf_mmap_close() */
+ if (!atomic_read(&rb->mmap_count)) {
+ ring_buffer_put(rb);
+ goto unlock;
+ }
}
ring_buffer_attach(event, rb);
ret = 0;
unlock:
mutex_unlock(&event->mmap_mutex);
+ if (output_event)
+ mutex_unlock(&output_event->mmap_mutex);
out:
return ret;
}
-static void mutex_lock_double(struct mutex *a, struct mutex *b)
-{
- if (b < a)
- swap(a, b);
-
- mutex_lock(a);
- mutex_lock_nested(b, SINGLE_DEPTH_NESTING);
-}
-
static int perf_event_set_clock(struct perf_event *event, clockid_t clk_id)
{
bool nmi_safe = false;
#ifdef CONFIG_POSIX_TIMERS
hrtimer_cancel(&tsk->signal->real_timer);
- exit_itimers(tsk->signal);
+ exit_itimers(tsk);
#endif
if (tsk->mm)
setmax_mm_hiwater_rss(&tsk->signal->maxrss, tsk->mm);
* complain:
*/
if (sysctl_hung_task_warnings) {
- printk_prefer_direct_enter();
-
if (sysctl_hung_task_warnings > 0)
sysctl_hung_task_warnings--;
pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n",
if (sysctl_hung_task_all_cpu_backtrace)
hung_task_show_all_bt = true;
-
- printk_prefer_direct_exit();
}
touch_nmi_watchdog();
}
unlock:
rcu_read_unlock();
- if (hung_task_show_lock) {
- printk_prefer_direct_enter();
+ if (hung_task_show_lock)
debug_show_all_locks();
- printk_prefer_direct_exit();
- }
if (hung_task_show_all_bt) {
hung_task_show_all_bt = false;
- printk_prefer_direct_enter();
trigger_all_cpu_backtrace();
- printk_prefer_direct_exit();
}
if (hung_task_call_panic)
if (desc->irq_data.chip != &no_irq_chip)
mask_ack_irq(desc);
irq_state_set_disabled(desc);
- if (is_chained)
+ if (is_chained) {
desc->action = NULL;
+ WARN_ON(irq_chip_pm_put(irq_desc_get_irq_data(desc)));
+ }
desc->depth = 1;
}
desc->handle_irq = handle;
irq_settings_set_norequest(desc);
irq_settings_set_nothread(desc);
desc->action = &chained_action;
+ WARN_ON(irq_chip_pm_get(irq_desc_get_irq_data(desc)));
irq_activate_and_startup(desc, IRQ_RESEND);
}
}
#include <linux/vmalloc.h>
#include "kexec_internal.h"
+#ifdef CONFIG_KEXEC_SIG
+static bool sig_enforce = IS_ENABLED(CONFIG_KEXEC_SIG_FORCE);
+
+void set_kexec_sig_enforced(void)
+{
+ sig_enforce = true;
+}
+#endif
+
static int kexec_calculate_store_digests(struct kimage *image);
/*
image->kernel_buf_len);
if (ret) {
- if (IS_ENABLED(CONFIG_KEXEC_SIG_FORCE)) {
+ if (sig_enforce) {
pr_notice("Enforced kernel signature verification failed (%d).\n", ret);
return ret;
}
self = to_kthread(current);
- /* If user was SIGKILLed, I release the structure. */
+ /* Release the structure when caller killed by a fatal signal. */
done = xchg(&create->done, NULL);
if (!done) {
kfree(create);
/* We want our own signal handler (we take no signals by default). */
pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);
if (pid < 0) {
- /* If user was SIGKILLed, I release the structure. */
+ /* Release the structure when caller killed by a fatal signal. */
struct completion *done = xchg(&create->done, NULL);
if (!done) {
*/
if (unlikely(wait_for_completion_killable(&done))) {
/*
- * If I was SIGKILLed before kthreadd (or new kernel thread)
- * calls complete(), leave the cleanup of this structure to
- * that thread.
+ * If I was killed by a fatal signal before kthreadd (or new
+ * kernel thread) calls complete(), leave the cleanup of this
+ * structure to that thread.
*/
if (xchg(&create->done, NULL))
return ERR_PTR(-EINTR);
*
* Returns a pointer to the allocated worker on success, ERR_PTR(-ENOMEM)
* when the needed structures could not get allocated, and ERR_PTR(-EINTR)
- * when the worker was SIGKILLed.
+ * when the caller was killed by a fatal signal.
*/
struct kthread_worker *
kthread_create_worker(unsigned int flags, const char namefmt[], ...)
* Return:
* The pointer to the allocated worker on success, ERR_PTR(-ENOMEM)
* when the needed structures could not get allocated, and ERR_PTR(-EINTR)
- * when the worker was SIGKILLed.
+ * when the caller was killed by a fatal signal.
*/
struct kthread_worker *
kthread_create_worker_on_cpu(int cpu, unsigned int flags,
* be guessable and still allows some pin nesting in
* our u32 pin_count.
*/
- cookie.val = 1 + (prandom_u32() >> 16);
+ cookie.val = 1 + (sched_clock() & 0xffff);
hlock->pin_count += cookie.val;
return cookie;
}
struct task_struct *task;
enum rwsem_waiter_type type;
unsigned long timeout;
-
- /* Writer only, not initialized in reader */
bool handoff_set;
};
#define rwsem_first_waiter(sem) \
* to give up the lock), request a HANDOFF to
* force the issue.
*/
- if (!(oldcount & RWSEM_FLAG_HANDOFF) &&
- time_after(jiffies, waiter->timeout)) {
- adjustment -= RWSEM_FLAG_HANDOFF;
- lockevent_inc(rwsem_rlock_handoff);
+ if (time_after(jiffies, waiter->timeout)) {
+ if (!(oldcount & RWSEM_FLAG_HANDOFF)) {
+ adjustment -= RWSEM_FLAG_HANDOFF;
+ lockevent_inc(rwsem_rlock_handoff);
+ }
+ waiter->handoff_set = true;
}
atomic_long_add(-adjustment, &sem->count);
static inline bool rwsem_try_write_lock(struct rw_semaphore *sem,
struct rwsem_waiter *waiter)
{
- bool first = rwsem_first_waiter(sem) == waiter;
+ struct rwsem_waiter *first = rwsem_first_waiter(sem);
long count, new;
lockdep_assert_held(&sem->wait_lock);
bool has_handoff = !!(count & RWSEM_FLAG_HANDOFF);
if (has_handoff) {
- if (!first)
+ /*
+ * Honor handoff bit and yield only when the first
+ * waiter is the one that set it. Otherwisee, we
+ * still try to acquire the rwsem.
+ */
+ if (first->handoff_set && (waiter != first))
return false;
- /* First waiter inherits a previously set handoff bit */
- waiter->handoff_set = true;
+ /*
+ * First waiter can inherit a previously set handoff
+ * bit and spin on rwsem if lock acquisition fails.
+ */
+ if (waiter == first)
+ waiter->handoff_set = true;
}
new = count;
waiter.task = current;
waiter.type = RWSEM_WAITING_FOR_READ;
waiter.timeout = jiffies + RWSEM_WAIT_TIMEOUT;
+ waiter.handoff_set = false;
raw_spin_lock_irq(&sem->wait_lock);
if (list_empty(&sem->wait_list)) {
#include <linux/mutex.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
+#include <linux/mm.h>
#ifndef ARCH_SHF_SMALL
#define ARCH_SHF_SMALL 0
* to ensure complete separation of code and data, but
* only when CONFIG_STRICT_MODULE_RWX=y
*/
-#ifdef CONFIG_STRICT_MODULE_RWX
-# define strict_align(X) PAGE_ALIGN(X)
-#else
-# define strict_align(X) (X)
-#endif
+static inline unsigned int strict_align(unsigned int size)
+{
+ if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX))
+ return PAGE_ALIGN(size);
+ else
+ return size;
+}
extern struct mutex module_mutex;
extern struct list_head modules;
info->symoffs = ALIGN(mod->data_layout.size, symsect->sh_addralign ?: 1);
info->stroffs = mod->data_layout.size = info->symoffs + ndst * sizeof(Elf_Sym);
mod->data_layout.size += strtab_size;
+ /* Note add_kallsyms() computes strtab_size as core_typeoffs - stroffs */
info->core_typeoffs = mod->data_layout.size;
mod->data_layout.size += ndst * sizeof(char);
mod->data_layout.size = strict_align(mod->data_layout.size);
Elf_Sym *dst;
char *s;
Elf_Shdr *symsec = &info->sechdrs[info->index.sym];
+ unsigned long strtab_size;
/* Set up to point into init section. */
mod->kallsyms = (void __rcu *)mod->init_layout.base +
info->mod_kallsyms_init_off;
- preempt_disable();
+ rcu_read_lock();
/* The following is safe since this pointer cannot change */
- rcu_dereference_sched(mod->kallsyms)->symtab = (void *)symsec->sh_addr;
- rcu_dereference_sched(mod->kallsyms)->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
+ rcu_dereference(mod->kallsyms)->symtab = (void *)symsec->sh_addr;
+ rcu_dereference(mod->kallsyms)->num_symtab = symsec->sh_size / sizeof(Elf_Sym);
/* Make sure we get permanent strtab: don't use info->strtab. */
- rcu_dereference_sched(mod->kallsyms)->strtab =
+ rcu_dereference(mod->kallsyms)->strtab =
(void *)info->sechdrs[info->index.str].sh_addr;
- rcu_dereference_sched(mod->kallsyms)->typetab = mod->init_layout.base + info->init_typeoffs;
+ rcu_dereference(mod->kallsyms)->typetab = mod->init_layout.base + info->init_typeoffs;
/*
* Now populate the cut down core kallsyms for after init
mod->core_kallsyms.symtab = dst = mod->data_layout.base + info->symoffs;
mod->core_kallsyms.strtab = s = mod->data_layout.base + info->stroffs;
mod->core_kallsyms.typetab = mod->data_layout.base + info->core_typeoffs;
- src = rcu_dereference_sched(mod->kallsyms)->symtab;
- for (ndst = i = 0; i < rcu_dereference_sched(mod->kallsyms)->num_symtab; i++) {
- rcu_dereference_sched(mod->kallsyms)->typetab[i] = elf_type(src + i, info);
+ strtab_size = info->core_typeoffs - info->stroffs;
+ src = rcu_dereference(mod->kallsyms)->symtab;
+ for (ndst = i = 0; i < rcu_dereference(mod->kallsyms)->num_symtab; i++) {
+ rcu_dereference(mod->kallsyms)->typetab[i] = elf_type(src + i, info);
if (i == 0 || is_livepatch_module(mod) ||
is_core_symbol(src + i, info->sechdrs, info->hdr->e_shnum,
info->index.pcpu)) {
+ ssize_t ret;
+
mod->core_kallsyms.typetab[ndst] =
- rcu_dereference_sched(mod->kallsyms)->typetab[i];
+ rcu_dereference(mod->kallsyms)->typetab[i];
dst[ndst] = src[i];
dst[ndst++].st_name = s - mod->core_kallsyms.strtab;
- s += strscpy(s,
- &rcu_dereference_sched(mod->kallsyms)->strtab[src[i].st_name],
- KSYM_NAME_LEN) + 1;
+ ret = strscpy(s,
+ &rcu_dereference(mod->kallsyms)->strtab[src[i].st_name],
+ strtab_size);
+ if (ret < 0)
+ break;
+ s += ret + 1;
+ strtab_size -= ret + 1;
}
}
- preempt_enable();
+ rcu_read_unlock();
mod->core_kallsyms.num_symtab = ndst;
}
{
#ifdef CONFIG_CFI_CLANG
initcall_t *init;
+#ifdef CONFIG_MODULE_UNLOAD
exitcall_t *exit;
+#endif
rcu_read_lock_sched();
mod->cfi_check = (cfi_check_fn)
find_kallsyms_symbol_value(mod, "__cfi_check");
init = (initcall_t *)
find_kallsyms_symbol_value(mod, "__cfi_jt_init_module");
- exit = (exitcall_t *)
- find_kallsyms_symbol_value(mod, "__cfi_jt_cleanup_module");
- rcu_read_unlock_sched();
-
/* Fix init/exit functions to point to the CFI jump table */
if (init)
mod->init = *init;
#ifdef CONFIG_MODULE_UNLOAD
+ exit = (exitcall_t *)
+ find_kallsyms_symbol_value(mod, "__cfi_jt_cleanup_module");
if (exit)
mod->exit = *exit;
#endif
+ rcu_read_unlock_sched();
cfi_module_add(mod, mod_tree.addr_min);
#endif
{
disable_trace_on_warning();
- printk_prefer_direct_enter();
-
if (file)
pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n",
raw_smp_processor_id(), current->pid, file, line,
/* Just a warning, don't kill lockdep. */
add_taint(taint, LOCKDEP_STILL_OK);
-
- printk_prefer_direct_exit();
}
#ifndef __WARN_FLAGS
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bitops.h>
+#include <linux/cache.h>
+#include <linux/export.h>
+#include <linux/platform-feature.h>
+
+#define PLATFORM_FEAT_ARRAY_SZ BITS_TO_LONGS(PLATFORM_FEAT_N)
+static unsigned long __read_mostly platform_features[PLATFORM_FEAT_ARRAY_SZ];
+
+void platform_set(unsigned int feature)
+{
+ set_bit(feature, platform_features);
+}
+EXPORT_SYMBOL_GPL(platform_set);
+
+void platform_clear(unsigned int feature)
+{
+ clear_bit(feature, platform_features);
+}
+EXPORT_SYMBOL_GPL(platform_clear);
+
+bool platform_has(unsigned int feature)
+{
+ return test_bit(feature, platform_features);
+}
+EXPORT_SYMBOL_GPL(platform_has);
hibernation_platform_enter();
fallthrough;
case HIBERNATION_SHUTDOWN:
- if (pm_power_off)
+ if (kernel_can_power_off())
kernel_power_off();
break;
}
/* Number of registered extended console drivers. */
static int nr_ext_console_drivers;
-/*
- * Used to synchronize printing kthreads against direct printing via
- * console_trylock/console_unlock.
- *
- * Values:
- * -1 = console kthreads atomically blocked (via global trylock)
- * 0 = no kthread printing, console not locked (via trylock)
- * >0 = kthread(s) actively printing
- *
- * Note: For synchronizing against direct printing via
- * console_lock/console_unlock, see the @lock variable in
- * struct console.
- */
-static atomic_t console_kthreads_active = ATOMIC_INIT(0);
-
-#define console_kthreads_atomic_tryblock() \
- (atomic_cmpxchg(&console_kthreads_active, 0, -1) == 0)
-#define console_kthreads_atomic_unblock() \
- atomic_cmpxchg(&console_kthreads_active, -1, 0)
-#define console_kthreads_atomically_blocked() \
- (atomic_read(&console_kthreads_active) == -1)
-
-#define console_kthread_printing_tryenter() \
- atomic_inc_unless_negative(&console_kthreads_active)
-#define console_kthread_printing_exit() \
- atomic_dec(&console_kthreads_active)
-
/*
* Helper macros to handle lockdep when locking/unlocking console_sem. We use
* macros instead of functions so that _RET_IP_ contains useful information.
}
/*
- * Tracks whether kthread printers are all blocked. A value of true implies
- * that the console is locked via console_lock() or the console is suspended.
- * Writing to this variable requires holding @console_sem.
- */
-static bool console_kthreads_blocked;
-
-/*
- * Block all kthread printers from a schedulable context.
- *
- * Requires holding @console_sem.
- */
-static void console_kthreads_block(void)
-{
- struct console *con;
-
- for_each_console(con) {
- mutex_lock(&con->lock);
- con->blocked = true;
- mutex_unlock(&con->lock);
- }
-
- console_kthreads_blocked = true;
-}
-
-/*
- * Unblock all kthread printers from a schedulable context.
- *
- * Requires holding @console_sem.
+ * This is used for debugging the mess that is the VT code by
+ * keeping track if we have the console semaphore held. It's
+ * definitely not the perfect debug tool (we don't know if _WE_
+ * hold it and are racing, but it helps tracking those weird code
+ * paths in the console code where we end up in places I want
+ * locked without the console semaphore held).
*/
-static void console_kthreads_unblock(void)
-{
- struct console *con;
-
- for_each_console(con) {
- mutex_lock(&con->lock);
- con->blocked = false;
- mutex_unlock(&con->lock);
- }
-
- console_kthreads_blocked = false;
-}
-
-static int console_suspended;
+static int console_locked, console_suspended;
/*
* Array of consoles built from command line options (console=)
/* syslog_lock protects syslog_* variables and write access to clear_seq. */
static DEFINE_MUTEX(syslog_lock);
-/*
- * A flag to signify if printk_activate_kthreads() has already started the
- * kthread printers. If true, any later registered consoles must start their
- * own kthread directly. The flag is write protected by the console_lock.
- */
-static bool printk_kthreads_available;
-
#ifdef CONFIG_PRINTK
-static atomic_t printk_prefer_direct = ATOMIC_INIT(0);
-
-/**
- * printk_prefer_direct_enter - cause printk() calls to attempt direct
- * printing to all enabled consoles
- *
- * Since it is not possible to call into the console printing code from any
- * context, there is no guarantee that direct printing will occur.
- *
- * This globally effects all printk() callers.
- *
- * Context: Any context.
- */
-void printk_prefer_direct_enter(void)
-{
- atomic_inc(&printk_prefer_direct);
-}
-
-/**
- * printk_prefer_direct_exit - restore printk() behavior
- *
- * Context: Any context.
- */
-void printk_prefer_direct_exit(void)
-{
- WARN_ON(atomic_dec_if_positive(&printk_prefer_direct) < 0);
-}
-
-/*
- * Calling printk() always wakes kthread printers so that they can
- * flush the new message to their respective consoles. Also, if direct
- * printing is allowed, printk() tries to flush the messages directly.
- *
- * Direct printing is allowed in situations when the kthreads
- * are not available or the system is in a problematic state.
- *
- * See the implementation about possible races.
- */
-static inline bool allow_direct_printing(void)
-{
- /*
- * Checking kthread availability is a possible race because the
- * kthread printers can become permanently disabled during runtime.
- * However, doing that requires holding the console_lock, so any
- * pending messages will be direct printed by console_unlock().
- */
- if (!printk_kthreads_available)
- return true;
-
- /*
- * Prefer direct printing when the system is in a problematic state.
- * The context that sets this state will always see the updated value.
- * The other contexts do not care. Anyway, direct printing is just a
- * best effort. The direct output is only possible when console_lock
- * is not already taken and no kthread printers are actively printing.
- */
- return (system_state > SYSTEM_RUNNING ||
- oops_in_progress ||
- atomic_read(&printk_prefer_direct));
-}
-
DECLARE_WAIT_QUEUE_HEAD(log_wait);
/* All 3 protected by @syslog_lock. */
/* the next printk record to read by syslog(READ) or /proc/kmsg */
printed_len = vprintk_store(facility, level, dev_info, fmt, args);
/* If called from the scheduler, we can not call up(). */
- if (!in_sched && allow_direct_printing()) {
+ if (!in_sched) {
/*
* The caller may be holding system-critical or
- * timing-sensitive locks. Disable preemption during direct
+ * timing-sensitive locks. Disable preemption during
* printing of all remaining records to all consoles so that
* this context can return as soon as possible. Hopefully
* another printk() caller will take over the printing.
static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress);
-static void printk_start_kthread(struct console *con);
-
#else /* CONFIG_PRINTK */
#define CONSOLE_LOG_MAX 0
}
static bool suppress_message_printing(int level) { return false; }
static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; }
-static void printk_start_kthread(struct console *con) { }
-static bool allow_direct_printing(void) { return true; }
#endif /* CONFIG_PRINTK */
/* If trylock fails, someone else is doing the printing */
if (console_trylock())
console_unlock();
- else {
- /*
- * If a new CPU comes online, the conditions for
- * printer_should_wake() may have changed for some
- * kthread printer with !CON_ANYTIME.
- */
- wake_up_klogd();
- }
}
return 0;
}
down_console_sem();
if (console_suspended)
return;
- console_kthreads_block();
+ console_locked = 1;
console_may_schedule = 1;
}
EXPORT_SYMBOL(console_lock);
up_console_sem();
return 0;
}
- if (!console_kthreads_atomic_tryblock()) {
- up_console_sem();
- return 0;
- }
+ console_locked = 1;
console_may_schedule = 0;
return 1;
}
EXPORT_SYMBOL(console_trylock);
-/*
- * This is used to help to make sure that certain paths within the VT code are
- * running with the console lock held. It is definitely not the perfect debug
- * tool (it is not known if the VT code is the task holding the console lock),
- * but it helps tracking those weird code paths in the console code such as
- * when the console is suspended: where the console is not locked but no
- * console printing may occur.
- *
- * Note: This returns true when the console is suspended but is not locked.
- * This is intentional because the VT code must consider that situation
- * the same as if the console was locked.
- */
int is_console_locked(void)
{
- return (console_kthreads_blocked || atomic_read(&console_kthreads_active));
+ return console_locked;
}
EXPORT_SYMBOL(is_console_locked);
return atomic_read(&panic_cpu) != raw_smp_processor_id();
}
-static inline bool __console_is_usable(short flags)
+/*
+ * Check if the given console is currently capable and allowed to print
+ * records.
+ *
+ * Requires the console_lock.
+ */
+static inline bool console_is_usable(struct console *con)
{
- if (!(flags & CON_ENABLED))
+ if (!(con->flags & CON_ENABLED))
+ return false;
+
+ if (!con->write)
return false;
/*
* cope (CON_ANYTIME) don't call them until this CPU is officially up.
*/
if (!cpu_online(raw_smp_processor_id()) &&
- !(flags & CON_ANYTIME))
+ !(con->flags & CON_ANYTIME))
return false;
return true;
}
-/*
- * Check if the given console is currently capable and allowed to print
- * records.
- *
- * Requires holding the console_lock.
- */
-static inline bool console_is_usable(struct console *con)
-{
- if (!con->write)
- return false;
-
- return __console_is_usable(con->flags);
-}
-
static void __console_unlock(void)
{
- /*
- * Depending on whether console_lock() or console_trylock() was used,
- * appropriately allow the kthread printers to continue.
- */
- if (console_kthreads_blocked)
- console_kthreads_unblock();
- else
- console_kthreads_atomic_unblock();
-
- /*
- * New records may have arrived while the console was locked.
- * Wake the kthread printers to print them.
- */
- wake_up_klogd();
-
+ console_locked = 0;
up_console_sem();
}
*
* @handover will be set to true if a printk waiter has taken over the
* console_lock, in which case the caller is no longer holding the
- * console_lock. Otherwise it is set to false. A NULL pointer may be provided
- * to disable allowing the console_lock to be taken over by a printk waiter.
+ * console_lock. Otherwise it is set to false.
*
* Returns false if the given console has no next record to print, otherwise
* true.
*
- * Requires the console_lock if @handover is non-NULL.
- * Requires con->lock otherwise.
+ * Requires the console_lock.
*/
-static bool __console_emit_next_record(struct console *con, char *text, char *ext_text,
- char *dropped_text, bool *handover)
+static bool console_emit_next_record(struct console *con, char *text, char *ext_text,
+ char *dropped_text, bool *handover)
{
- static atomic_t panic_console_dropped = ATOMIC_INIT(0);
+ static int panic_console_dropped;
struct printk_info info;
struct printk_record r;
unsigned long flags;
prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX);
- if (handover)
- *handover = false;
+ *handover = false;
if (!prb_read_valid(prb, con->seq, &r))
return false;
if (con->seq != r.info->seq) {
con->dropped += r.info->seq - con->seq;
con->seq = r.info->seq;
- if (panic_in_progress() &&
- atomic_fetch_inc_relaxed(&panic_console_dropped) > 10) {
+ if (panic_in_progress() && panic_console_dropped++ > 10) {
suppress_panic_printk = 1;
pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n");
}
len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time);
}
- if (handover) {
- /*
- * While actively printing out messages, if another printk()
- * were to occur on another CPU, it may wait for this one to
- * finish. This task can not be preempted if there is a
- * waiter waiting to take over.
- *
- * Interrupts are disabled because the hand over to a waiter
- * must not be interrupted until the hand over is completed
- * (@console_waiter is cleared).
- */
- printk_safe_enter_irqsave(flags);
- console_lock_spinning_enable();
-
- /* don't trace irqsoff print latency */
- stop_critical_timings();
- }
+ /*
+ * While actively printing out messages, if another printk()
+ * were to occur on another CPU, it may wait for this one to
+ * finish. This task can not be preempted if there is a
+ * waiter waiting to take over.
+ *
+ * Interrupts are disabled because the hand over to a waiter
+ * must not be interrupted until the hand over is completed
+ * (@console_waiter is cleared).
+ */
+ printk_safe_enter_irqsave(flags);
+ console_lock_spinning_enable();
+ stop_critical_timings(); /* don't trace print latency */
call_console_driver(con, write_text, len, dropped_text);
+ start_critical_timings();
con->seq++;
- if (handover) {
- start_critical_timings();
- *handover = console_lock_spinning_disable_and_check();
- printk_safe_exit_irqrestore(flags);
- }
+ *handover = console_lock_spinning_disable_and_check();
+ printk_safe_exit_irqrestore(flags);
skip:
return true;
}
-/*
- * Print a record for a given console, but allow another printk() caller to
- * take over the console_lock and continue printing.
- *
- * Requires the console_lock, but depending on @handover after the call, the
- * caller may no longer have the console_lock.
- *
- * See __console_emit_next_record() for argument and return details.
- */
-static bool console_emit_next_record_transferable(struct console *con, char *text, char *ext_text,
- char *dropped_text, bool *handover)
-{
- /*
- * Handovers are only supported if threaded printers are atomically
- * blocked. The context taking over the console_lock may be atomic.
- */
- if (!console_kthreads_atomically_blocked()) {
- *handover = false;
- handover = NULL;
- }
-
- return __console_emit_next_record(con, text, ext_text, dropped_text, handover);
-}
-
/*
* Print out all remaining records to all consoles.
*
* were flushed to all usable consoles. A returned false informs the caller
* that everything was not flushed (either there were no usable consoles or
* another context has taken over printing or it is a panic situation and this
- * is not the panic CPU or direct printing is not preferred). Regardless the
- * reason, the caller should assume it is not useful to immediately try again.
+ * is not the panic CPU). Regardless the reason, the caller should assume it
+ * is not useful to immediately try again.
*
* Requires the console_lock.
*/
*handover = false;
do {
- /* Let the kthread printers do the work if they can. */
- if (!allow_direct_printing())
- return false;
-
any_progress = false;
for_each_console(con) {
if (con->flags & CON_EXTENDED) {
/* Extended consoles do not print "dropped messages". */
- progress = console_emit_next_record_transferable(con, &text[0],
- &ext_text[0], NULL, handover);
+ progress = console_emit_next_record(con, &text[0],
+ &ext_text[0], NULL,
+ handover);
} else {
- progress = console_emit_next_record_transferable(con, &text[0],
- NULL, &dropped_text[0], handover);
+ progress = console_emit_next_record(con, &text[0],
+ NULL, &dropped_text[0],
+ handover);
}
if (*handover)
return false;
if (oops_in_progress) {
if (down_trylock_console_sem() != 0)
return;
- if (!console_kthreads_atomic_tryblock()) {
- up_console_sem();
- return;
- }
} else
console_lock();
+ console_locked = 1;
console_may_schedule = 0;
for_each_console(c)
if ((c->flags & CON_ENABLED) && c->unblank)
nr_ext_console_drivers++;
newcon->dropped = 0;
- newcon->thread = NULL;
- newcon->blocked = true;
- mutex_init(&newcon->lock);
-
if (newcon->flags & CON_PRINTBUFFER) {
/* Get a consistent copy of @syslog_seq. */
mutex_lock(&syslog_lock);
/* Begin with next message. */
newcon->seq = prb_next_seq(prb);
}
-
- if (printk_kthreads_available)
- printk_start_kthread(newcon);
-
console_unlock();
console_sysfs_notify();
int unregister_console(struct console *console)
{
- struct task_struct *thd;
struct console *con;
int res;
console_drivers->flags |= CON_CONSDEV;
console->flags &= ~CON_ENABLED;
-
- /*
- * console->thread can only be cleared under the console lock. But
- * stopping the thread must be done without the console lock. The
- * task that clears @thread is the task that stops the kthread.
- */
- thd = console->thread;
- console->thread = NULL;
-
console_unlock();
-
- if (thd)
- kthread_stop(thd);
-
console_sysfs_notify();
if (console->exit)
}
late_initcall(printk_late_init);
-static int __init printk_activate_kthreads(void)
-{
- struct console *con;
-
- console_lock();
- printk_kthreads_available = true;
- for_each_console(con)
- printk_start_kthread(con);
- console_unlock();
-
- return 0;
-}
-early_initcall(printk_activate_kthreads);
-
#if defined CONFIG_PRINTK
/* If @con is specified, only wait for that console. Otherwise wait for all. */
static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress)
diff = 0;
console_lock();
+
for_each_console(c) {
if (con && con != c)
continue;
if (printk_seq < seq)
diff += seq - printk_seq;
}
- console_unlock();
- if (diff != last_diff && reset_on_progress)
+ /*
+ * If consoles are suspended, it cannot be expected that they
+ * make forward progress, so timeout immediately. @diff is
+ * still used to return a valid flush status.
+ */
+ if (console_suspended)
+ remaining = 0;
+ else if (diff != last_diff && reset_on_progress)
remaining = timeout_ms;
+ console_unlock();
+
if (diff == 0 || remaining == 0)
break;
}
EXPORT_SYMBOL(pr_flush);
-static void __printk_fallback_preferred_direct(void)
-{
- printk_prefer_direct_enter();
- pr_err("falling back to preferred direct printing\n");
- printk_kthreads_available = false;
-}
-
-/*
- * Enter preferred direct printing, but never exit. Mark console threads as
- * unavailable. The system is then forever in preferred direct printing and
- * any printing threads will exit.
- *
- * Must *not* be called under console_lock. Use
- * __printk_fallback_preferred_direct() if already holding console_lock.
- */
-static void printk_fallback_preferred_direct(void)
-{
- console_lock();
- __printk_fallback_preferred_direct();
- console_unlock();
-}
-
-/*
- * Print a record for a given console, not allowing another printk() caller
- * to take over. This is appropriate for contexts that do not have the
- * console_lock.
- *
- * See __console_emit_next_record() for argument and return details.
- */
-static bool console_emit_next_record(struct console *con, char *text, char *ext_text,
- char *dropped_text)
-{
- return __console_emit_next_record(con, text, ext_text, dropped_text, NULL);
-}
-
-static bool printer_should_wake(struct console *con, u64 seq)
-{
- short flags;
-
- if (kthread_should_stop() || !printk_kthreads_available)
- return true;
-
- if (con->blocked ||
- console_kthreads_atomically_blocked()) {
- return false;
- }
-
- /*
- * This is an unsafe read from con->flags, but a false positive is
- * not a problem. Worst case it would allow the printer to wake up
- * although it is disabled. But the printer will notice that when
- * attempting to print and instead go back to sleep.
- */
- flags = data_race(READ_ONCE(con->flags));
-
- if (!__console_is_usable(flags))
- return false;
-
- return prb_read_valid(prb, seq, NULL);
-}
-
-static int printk_kthread_func(void *data)
-{
- struct console *con = data;
- char *dropped_text = NULL;
- char *ext_text = NULL;
- u64 seq = 0;
- char *text;
- int error;
-
- text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL);
- if (!text) {
- con_printk(KERN_ERR, con, "failed to allocate text buffer\n");
- printk_fallback_preferred_direct();
- goto out;
- }
-
- if (con->flags & CON_EXTENDED) {
- ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL);
- if (!ext_text) {
- con_printk(KERN_ERR, con, "failed to allocate ext_text buffer\n");
- printk_fallback_preferred_direct();
- goto out;
- }
- } else {
- dropped_text = kmalloc(DROPPED_TEXT_MAX, GFP_KERNEL);
- if (!dropped_text) {
- con_printk(KERN_ERR, con, "failed to allocate dropped_text buffer\n");
- printk_fallback_preferred_direct();
- goto out;
- }
- }
-
- con_printk(KERN_INFO, con, "printing thread started\n");
-
- for (;;) {
- /*
- * Guarantee this task is visible on the waitqueue before
- * checking the wake condition.
- *
- * The full memory barrier within set_current_state() of
- * prepare_to_wait_event() pairs with the full memory barrier
- * within wq_has_sleeper().
- *
- * This pairs with __wake_up_klogd:A.
- */
- error = wait_event_interruptible(log_wait,
- printer_should_wake(con, seq)); /* LMM(printk_kthread_func:A) */
-
- if (kthread_should_stop() || !printk_kthreads_available)
- break;
-
- if (error)
- continue;
-
- error = mutex_lock_interruptible(&con->lock);
- if (error)
- continue;
-
- if (con->blocked ||
- !console_kthread_printing_tryenter()) {
- /* Another context has locked the console_lock. */
- mutex_unlock(&con->lock);
- continue;
- }
-
- /*
- * Although this context has not locked the console_lock, it
- * is known that the console_lock is not locked and it is not
- * possible for any other context to lock the console_lock.
- * Therefore it is safe to read con->flags.
- */
-
- if (!__console_is_usable(con->flags)) {
- console_kthread_printing_exit();
- mutex_unlock(&con->lock);
- continue;
- }
-
- /*
- * Even though the printk kthread is always preemptible, it is
- * still not allowed to call cond_resched() from within
- * console drivers. The task may become non-preemptible in the
- * console driver call chain. For example, vt_console_print()
- * takes a spinlock and then can call into fbcon_redraw(),
- * which can conditionally invoke cond_resched().
- */
- console_may_schedule = 0;
- console_emit_next_record(con, text, ext_text, dropped_text);
-
- seq = con->seq;
-
- console_kthread_printing_exit();
-
- mutex_unlock(&con->lock);
- }
-
- con_printk(KERN_INFO, con, "printing thread stopped\n");
-out:
- kfree(dropped_text);
- kfree(ext_text);
- kfree(text);
-
- console_lock();
- /*
- * If this kthread is being stopped by another task, con->thread will
- * already be NULL. That is fine. The important thing is that it is
- * NULL after the kthread exits.
- */
- con->thread = NULL;
- console_unlock();
-
- return 0;
-}
-
-/* Must be called under console_lock. */
-static void printk_start_kthread(struct console *con)
-{
- /*
- * Do not start a kthread if there is no write() callback. The
- * kthreads assume the write() callback exists.
- */
- if (!con->write)
- return;
-
- con->thread = kthread_run(printk_kthread_func, con,
- "pr/%s%d", con->name, con->index);
- if (IS_ERR(con->thread)) {
- con->thread = NULL;
- con_printk(KERN_ERR, con, "unable to start printing thread\n");
- __printk_fallback_preferred_direct();
- return;
- }
-}
-
/*
* Delayed printk version, for scheduler-internal messages:
*/
-#define PRINTK_PENDING_WAKEUP 0x01
-#define PRINTK_PENDING_DIRECT_OUTPUT 0x02
+#define PRINTK_PENDING_WAKEUP 0x01
+#define PRINTK_PENDING_OUTPUT 0x02
static DEFINE_PER_CPU(int, printk_pending);
{
int pending = this_cpu_xchg(printk_pending, 0);
- if (pending & PRINTK_PENDING_DIRECT_OUTPUT) {
- printk_prefer_direct_enter();
-
+ if (pending & PRINTK_PENDING_OUTPUT) {
/* If trylock fails, someone else is doing the printing */
if (console_trylock())
console_unlock();
-
- printk_prefer_direct_exit();
}
if (pending & PRINTK_PENDING_WAKEUP)
* prepare_to_wait_event(), which is called after ___wait_event() adds
* the waiter but before it has checked the wait condition.
*
- * This pairs with devkmsg_read:A, syslog_print:A, and
- * printk_kthread_func:A.
+ * This pairs with devkmsg_read:A and syslog_print:A.
*/
if (wq_has_sleeper(&log_wait) || /* LMM(__wake_up_klogd:A) */
- (val & PRINTK_PENDING_DIRECT_OUTPUT)) {
+ (val & PRINTK_PENDING_OUTPUT)) {
this_cpu_or(printk_pending, val);
irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));
}
* New messages may have been added directly to the ringbuffer
* using vprintk_store(), so wake any waiters as well.
*/
- int val = PRINTK_PENDING_WAKEUP;
-
- /*
- * Make sure that some context will print the messages when direct
- * printing is allowed. This happens in situations when the kthreads
- * may not be as reliable or perhaps unusable.
- */
- if (allow_direct_printing())
- val |= PRINTK_PENDING_DIRECT_OUTPUT;
-
- __wake_up_klogd(val);
+ __wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT);
}
void printk_trigger_flush(void)
if (lock_task_sighand(task, &flags)) {
task->jobctl &= ~JOBCTL_PTRACE_FROZEN;
if (__fatal_signal_pending(task)) {
- task->jobctl &= ~TASK_TRACED;
+ task->jobctl &= ~JOBCTL_TRACED;
wake_up_state(task, __TASK_TRACED);
}
unlock_task_sighand(task, &flags);
return sum;
}
-#define SRCU_INTERVAL 1 // Base delay if no expedited GPs pending.
-#define SRCU_MAX_INTERVAL 10 // Maximum incremental delay from slow readers.
-#define SRCU_MAX_NODELAY_PHASE 1 // Maximum per-GP-phase consecutive no-delay instances.
-#define SRCU_MAX_NODELAY 100 // Maximum consecutive no-delay instances.
+/*
+ * We use an adaptive strategy for synchronize_srcu() and especially for
+ * synchronize_srcu_expedited(). We spin for a fixed time period
+ * (defined below, boot time configurable) to allow SRCU readers to exit
+ * their read-side critical sections. If there are still some readers
+ * after one jiffy, we repeatedly block for one jiffy time periods.
+ * The blocking time is increased as the grace-period age increases,
+ * with max blocking time capped at 10 jiffies.
+ */
+#define SRCU_DEFAULT_RETRY_CHECK_DELAY 5
+
+static ulong srcu_retry_check_delay = SRCU_DEFAULT_RETRY_CHECK_DELAY;
+module_param(srcu_retry_check_delay, ulong, 0444);
+
+#define SRCU_INTERVAL 1 // Base delay if no expedited GPs pending.
+#define SRCU_MAX_INTERVAL 10 // Maximum incremental delay from slow readers.
+
+#define SRCU_DEFAULT_MAX_NODELAY_PHASE_LO 3UL // Lowmark on default per-GP-phase
+ // no-delay instances.
+#define SRCU_DEFAULT_MAX_NODELAY_PHASE_HI 1000UL // Highmark on default per-GP-phase
+ // no-delay instances.
+
+#define SRCU_UL_CLAMP_LO(val, low) ((val) > (low) ? (val) : (low))
+#define SRCU_UL_CLAMP_HI(val, high) ((val) < (high) ? (val) : (high))
+#define SRCU_UL_CLAMP(val, low, high) SRCU_UL_CLAMP_HI(SRCU_UL_CLAMP_LO((val), (low)), (high))
+// per-GP-phase no-delay instances adjusted to allow non-sleeping poll upto
+// one jiffies time duration. Mult by 2 is done to factor in the srcu_get_delay()
+// called from process_srcu().
+#define SRCU_DEFAULT_MAX_NODELAY_PHASE_ADJUSTED \
+ (2UL * USEC_PER_SEC / HZ / SRCU_DEFAULT_RETRY_CHECK_DELAY)
+
+// Maximum per-GP-phase consecutive no-delay instances.
+#define SRCU_DEFAULT_MAX_NODELAY_PHASE \
+ SRCU_UL_CLAMP(SRCU_DEFAULT_MAX_NODELAY_PHASE_ADJUSTED, \
+ SRCU_DEFAULT_MAX_NODELAY_PHASE_LO, \
+ SRCU_DEFAULT_MAX_NODELAY_PHASE_HI)
+
+static ulong srcu_max_nodelay_phase = SRCU_DEFAULT_MAX_NODELAY_PHASE;
+module_param(srcu_max_nodelay_phase, ulong, 0444);
+
+// Maximum consecutive no-delay instances.
+#define SRCU_DEFAULT_MAX_NODELAY (SRCU_DEFAULT_MAX_NODELAY_PHASE > 100 ? \
+ SRCU_DEFAULT_MAX_NODELAY_PHASE : 100)
+
+static ulong srcu_max_nodelay = SRCU_DEFAULT_MAX_NODELAY;
+module_param(srcu_max_nodelay, ulong, 0444);
/*
* Return grace-period delay, zero if there are expedited grace
*/
static unsigned long srcu_get_delay(struct srcu_struct *ssp)
{
+ unsigned long gpstart;
+ unsigned long j;
unsigned long jbase = SRCU_INTERVAL;
if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
jbase = 0;
- if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)))
- jbase += jiffies - READ_ONCE(ssp->srcu_gp_start);
- if (!jbase) {
- WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
- if (READ_ONCE(ssp->srcu_n_exp_nodelay) > SRCU_MAX_NODELAY_PHASE)
- jbase = 1;
+ if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))) {
+ j = jiffies - 1;
+ gpstart = READ_ONCE(ssp->srcu_gp_start);
+ if (time_after(j, gpstart))
+ jbase += j - gpstart;
+ if (!jbase) {
+ WRITE_ONCE(ssp->srcu_n_exp_nodelay, READ_ONCE(ssp->srcu_n_exp_nodelay) + 1);
+ if (READ_ONCE(ssp->srcu_n_exp_nodelay) > srcu_max_nodelay_phase)
+ jbase = 1;
+ }
}
return jbase > SRCU_MAX_INTERVAL ? SRCU_MAX_INTERVAL : jbase;
}
}
EXPORT_SYMBOL_GPL(__srcu_read_unlock);
-/*
- * We use an adaptive strategy for synchronize_srcu() and especially for
- * synchronize_srcu_expedited(). We spin for a fixed time period
- * (defined below) to allow SRCU readers to exit their read-side critical
- * sections. If there are still some readers after a few microseconds,
- * we repeatedly block for 1-millisecond time periods.
- */
-#define SRCU_RETRY_CHECK_DELAY 5
-
/*
* Start an SRCU grace period.
*/
*/
static void srcu_gp_end(struct srcu_struct *ssp)
{
- unsigned long cbdelay;
+ unsigned long cbdelay = 1;
bool cbs;
bool last_lvl;
int cpu;
spin_lock_irq_rcu_node(ssp);
idx = rcu_seq_state(ssp->srcu_gp_seq);
WARN_ON_ONCE(idx != SRCU_STATE_SCAN2);
- cbdelay = !!srcu_get_delay(ssp);
+ if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), READ_ONCE(ssp->srcu_gp_seq_needed_exp)))
+ cbdelay = 0;
+
WRITE_ONCE(ssp->srcu_last_gp_end, ktime_get_mono_fast_ns());
rcu_seq_end(&ssp->srcu_gp_seq);
gpseq = rcu_seq_current(&ssp->srcu_gp_seq);
*/
static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount)
{
+ unsigned long curdelay;
+
+ curdelay = !srcu_get_delay(ssp);
+
for (;;) {
if (srcu_readers_active_idx_check(ssp, idx))
return true;
- if (--trycount + !srcu_get_delay(ssp) <= 0)
+ if ((--trycount + curdelay) <= 0)
return false;
- udelay(SRCU_RETRY_CHECK_DELAY);
+ udelay(srcu_retry_check_delay);
}
}
j = jiffies;
if (READ_ONCE(ssp->reschedule_jiffies) == j) {
WRITE_ONCE(ssp->reschedule_count, READ_ONCE(ssp->reschedule_count) + 1);
- if (READ_ONCE(ssp->reschedule_count) > SRCU_MAX_NODELAY)
+ if (READ_ONCE(ssp->reschedule_count) > srcu_max_nodelay)
curdelay = 1;
} else {
WRITE_ONCE(ssp->reschedule_count, 1);
pr_info("Hierarchical SRCU implementation.\n");
if (exp_holdoff != DEFAULT_SRCU_EXP_HOLDOFF)
pr_info("\tNon-default auto-expedite holdoff of %lu ns.\n", exp_holdoff);
+ if (srcu_retry_check_delay != SRCU_DEFAULT_RETRY_CHECK_DELAY)
+ pr_info("\tNon-default retry check delay of %lu us.\n", srcu_retry_check_delay);
+ if (srcu_max_nodelay != SRCU_DEFAULT_MAX_NODELAY)
+ pr_info("\tNon-default max no-delay of %lu.\n", srcu_max_nodelay);
+ pr_info("\tMax phase no-delay instances is %lu.\n", srcu_max_nodelay_phase);
return 0;
}
early_initcall(srcu_bootup_announce);
* See Documentation/RCU/stallwarn.rst for info on how to debug
* RCU CPU stall warnings.
*/
- printk_prefer_direct_enter();
trace_rcu_stall_warning(rcu_state.name, TPS("SelfDetected"));
pr_err("INFO: %s self-detected stall on CPU\n", rcu_state.name);
raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags);
*/
set_tsk_need_resched(current);
set_preempt_need_resched();
- printk_prefer_direct_exit();
}
static void check_cpu_stall(struct rcu_data *rdp)
return handler->sys_off_cb(&data);
}
+static struct sys_off_handler platform_sys_off_handler;
+
+static struct sys_off_handler *alloc_sys_off_handler(int priority)
+{
+ struct sys_off_handler *handler;
+ gfp_t flags;
+
+ /*
+ * Platforms like m68k can't allocate sys_off handler dynamically
+ * at the early boot time because memory allocator isn't available yet.
+ */
+ if (priority == SYS_OFF_PRIO_PLATFORM) {
+ handler = &platform_sys_off_handler;
+ if (handler->cb_data)
+ return ERR_PTR(-EBUSY);
+ } else {
+ if (system_state > SYSTEM_RUNNING)
+ flags = GFP_ATOMIC;
+ else
+ flags = GFP_KERNEL;
+
+ handler = kzalloc(sizeof(*handler), flags);
+ if (!handler)
+ return ERR_PTR(-ENOMEM);
+ }
+
+ return handler;
+}
+
+static void free_sys_off_handler(struct sys_off_handler *handler)
+{
+ if (handler == &platform_sys_off_handler)
+ memset(handler, 0, sizeof(*handler));
+ else
+ kfree(handler);
+}
+
/**
* register_sys_off_handler - Register sys-off handler
* @mode: Sys-off mode
struct sys_off_handler *handler;
int err;
- handler = kzalloc(sizeof(*handler), GFP_KERNEL);
- if (!handler)
- return ERR_PTR(-ENOMEM);
+ handler = alloc_sys_off_handler(priority);
+ if (IS_ERR(handler))
+ return handler;
switch (mode) {
case SYS_OFF_MODE_POWER_OFF_PREPARE:
break;
default:
- kfree(handler);
+ free_sys_off_handler(handler);
return ERR_PTR(-EINVAL);
}
}
if (err) {
- kfree(handler);
+ free_sys_off_handler(handler);
return ERR_PTR(err);
}
{
int err;
- if (!handler)
+ if (IS_ERR_OR_NULL(handler))
return;
if (handler->blocking)
/* sanity check, shall never happen */
WARN_ON(err);
- kfree(handler);
+ free_sys_off_handler(handler);
}
EXPORT_SYMBOL_GPL(unregister_sys_off_handler);
*/
void do_kernel_power_off(void)
{
+ struct sys_off_handler *sys_off = NULL;
+
+ /*
+ * Register sys-off handlers for legacy PM callback. This allows
+ * legacy PM callbacks temporary co-exist with the new sys-off API.
+ *
+ * TODO: Remove legacy handlers once all legacy PM users will be
+ * switched to the sys-off based APIs.
+ */
+ if (pm_power_off)
+ sys_off = register_sys_off_handler(SYS_OFF_MODE_POWER_OFF,
+ SYS_OFF_PRIO_DEFAULT,
+ legacy_pm_power_off, NULL);
+
atomic_notifier_call_chain(&power_off_handler_list, 0, NULL);
+
+ unregister_sys_off_handler(sys_off);
}
/**
*/
bool kernel_can_power_off(void)
{
- return !atomic_notifier_call_chain_is_empty(&power_off_handler_list);
+ return !atomic_notifier_call_chain_is_empty(&power_off_handler_list) ||
+ pm_power_off;
}
EXPORT_SYMBOL_GPL(kernel_can_power_off);
void __user *, arg)
{
struct pid_namespace *pid_ns = task_active_pid_ns(current);
- struct sys_off_handler *sys_off = NULL;
char buffer[256];
int ret = 0;
if (ret)
return ret;
- /*
- * Register sys-off handlers for legacy PM callback. This allows
- * legacy PM callbacks temporary co-exist with the new sys-off API.
- *
- * TODO: Remove legacy handlers once all legacy PM users will be
- * switched to the sys-off based APIs.
- */
- if (pm_power_off) {
- sys_off = register_sys_off_handler(SYS_OFF_MODE_POWER_OFF,
- SYS_OFF_PRIO_DEFAULT,
- legacy_pm_power_off, NULL);
- if (IS_ERR(sys_off))
- return PTR_ERR(sys_off);
- }
-
/* Instead of trying to make the power_off code look like
* halt when pm_power_off is not set do it the easy way.
*/
break;
}
mutex_unlock(&system_transition_mutex);
- unregister_sys_off_handler(sys_off);
return ret;
}
ret = run_cmd(reboot_cmd);
if (ret) {
- printk_prefer_direct_enter();
pr_warn("Failed to start orderly reboot: forcing the issue\n");
emergency_sync();
kernel_restart(NULL);
- printk_prefer_direct_exit();
}
return ret;
ret = run_cmd(poweroff_cmd);
if (ret && force) {
- printk_prefer_direct_enter();
pr_warn("Failed to start orderly shutdown: forcing the issue\n");
/*
*/
emergency_sync();
kernel_power_off();
- printk_prefer_direct_exit();
}
return ret;
*/
static void hw_failure_emergency_poweroff_func(struct work_struct *work)
{
- printk_prefer_direct_enter();
-
/*
* We have reached here after the emergency shutdown waiting period has
* expired. This means orderly_poweroff has not been able to shut off
*/
pr_emerg("Hardware protection shutdown failed. Trying emergency restart\n");
emergency_restart();
-
- printk_prefer_direct_exit();
}
static DECLARE_DELAYED_WORK(hw_failure_emergency_poweroff_work,
{
static atomic_t allow_proceed = ATOMIC_INIT(1);
- printk_prefer_direct_enter();
-
pr_emerg("HARDWARE PROTECTION shutdown (%s)\n", reason);
/* Shutdown should be initiated only once. */
if (!atomic_dec_and_test(&allow_proceed))
- goto out;
+ return;
/*
* Queue a backup emergency shutdown in the event of
*/
hw_failure_emergency_poweroff(ms_until_forced);
orderly_poweroff(true);
-out:
- printk_prefer_direct_exit();
}
EXPORT_SYMBOL_GPL(hw_protection_shutdown);
static void balance_push(struct rq *rq);
+/*
+ * balance_push_callback is a right abuse of the callback interface and plays
+ * by significantly different rules.
+ *
+ * Where the normal balance_callback's purpose is to be ran in the same context
+ * that queued it (only later, when it's safe to drop rq->lock again),
+ * balance_push_callback is specifically targeted at __schedule().
+ *
+ * This abuse is tolerated because it places all the unlikely/odd cases behind
+ * a single test, namely: rq->balance_callback == NULL.
+ */
struct callback_head balance_push_callback = {
.next = NULL,
.func = (void (*)(struct callback_head *))balance_push,
};
-static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
+static inline struct callback_head *
+__splice_balance_callbacks(struct rq *rq, bool split)
{
struct callback_head *head = rq->balance_callback;
+ if (likely(!head))
+ return NULL;
+
lockdep_assert_rq_held(rq);
- if (head)
+ /*
+ * Must not take balance_push_callback off the list when
+ * splice_balance_callbacks() and balance_callbacks() are not
+ * in the same rq->lock section.
+ *
+ * In that case it would be possible for __schedule() to interleave
+ * and observe the list empty.
+ */
+ if (split && head == &balance_push_callback)
+ head = NULL;
+ else
rq->balance_callback = NULL;
return head;
}
+static inline struct callback_head *splice_balance_callbacks(struct rq *rq)
+{
+ return __splice_balance_callbacks(rq, true);
+}
+
static void __balance_callbacks(struct rq *rq)
{
- do_balance_callbacks(rq, splice_balance_callbacks(rq));
+ do_balance_callbacks(rq, __splice_balance_callbacks(rq, false));
}
static inline void balance_callbacks(struct rq *rq, struct callback_head *head)
* the throttle.
*/
p->dl.dl_throttled = 0;
- BUG_ON(!is_dl_boosted(&p->dl) || flags != ENQUEUE_REPLENISH);
+ if (!(flags & ENQUEUE_REPLENISH))
+ printk_deferred_once("sched: DL de-boosted task PID %d: REPLENISH flag missing\n",
+ task_pid_nr(p));
+
return;
}
{
lockdep_assert_rq_held(rq);
+ /*
+ * Don't (re)queue an already queued item; nor queue anything when
+ * balance_push() is active, see the comment with
+ * balance_push_callback.
+ */
if (unlikely(head->next || rq->balance_callback == &balance_push_callback))
return;
bool autoreap = false;
u64 utime, stime;
- BUG_ON(sig == -1);
+ WARN_ON_ONCE(sig == -1);
- /* do_notify_parent_cldstop should have been called instead. */
- BUG_ON(task_is_stopped_or_traced(tsk));
+ /* do_notify_parent_cldstop should have been called instead. */
+ WARN_ON_ONCE(task_is_stopped_or_traced(tsk));
- BUG_ON(!tsk->ptrace &&
+ WARN_ON_ONCE(!tsk->ptrace &&
(tsk->group_leader != tsk || !thread_group_empty(tsk)));
/* Wake up all pidfd waiters */
if (*negp) {
if (*lvalp > (unsigned long) INT_MAX + 1)
return -EINVAL;
- *valp = -*lvalp;
+ WRITE_ONCE(*valp, -*lvalp);
} else {
if (*lvalp > (unsigned long) INT_MAX)
return -EINVAL;
- *valp = *lvalp;
+ WRITE_ONCE(*valp, *lvalp);
}
} else {
- int val = *valp;
+ int val = READ_ONCE(*valp);
if (val < 0) {
*negp = true;
*lvalp = -(unsigned long)val;
if (write) {
if (*lvalp > UINT_MAX)
return -EINVAL;
- *valp = *lvalp;
+ WRITE_ONCE(*valp, *lvalp);
} else {
- unsigned int val = *valp;
+ unsigned int val = READ_ONCE(*valp);
*lvalp = (unsigned long)val;
}
return 0;
if ((param->min && *param->min > tmp) ||
(param->max && *param->max < tmp))
return -EINVAL;
- *valp = tmp;
+ WRITE_ONCE(*valp, tmp);
}
return 0;
(param->max && *param->max < tmp))
return -ERANGE;
- *valp = tmp;
+ WRITE_ONCE(*valp, tmp);
}
return 0;
tmp.maxlen = sizeof(val);
tmp.data = &val;
- val = *data;
+ val = READ_ONCE(*data);
res = do_proc_douintvec(&tmp, write, buffer, lenp, ppos,
do_proc_douintvec_minmax_conv, ¶m);
if (res)
return res;
if (write)
- *data = val;
+ WRITE_ONCE(*data, val);
return 0;
}
EXPORT_SYMBOL_GPL(proc_dou8vec_minmax);
err = -EINVAL;
break;
}
- *i = val;
+ WRITE_ONCE(*i, val);
} else {
- val = convdiv * (*i) / convmul;
+ val = convdiv * READ_ONCE(*i) / convmul;
if (!first)
proc_put_char(&buffer, &left, '\t');
proc_put_long(&buffer, &left, val, false);
if (write) {
if (*lvalp > INT_MAX / HZ)
return 1;
- *valp = *negp ? -(*lvalp*HZ) : (*lvalp*HZ);
+ if (*negp)
+ WRITE_ONCE(*valp, -*lvalp * HZ);
+ else
+ WRITE_ONCE(*valp, *lvalp * HZ);
} else {
- int val = *valp;
+ int val = READ_ONCE(*valp);
unsigned long lval;
if (val < 0) {
*negp = true;
if (jif > INT_MAX)
return 1;
- *valp = (int)jif;
+ WRITE_ONCE(*valp, (int)jif);
} else {
- int val = *valp;
+ int val = READ_ONCE(*valp);
unsigned long lval;
if (val < 0) {
*negp = true;
* @ppos: the current position in the file
*
* Reads/writes up to table->maxlen/sizeof(unsigned int) integer
- * values from/to the user buffer, treated as an ASCII string.
- * The values read are assumed to be in 1/1000 seconds, and
+ * values from/to the user buffer, treated as an ASCII string.
+ * The values read are assumed to be in 1/1000 seconds, and
* are converted into jiffies.
*
* Returns 0 on success.
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_TWO_HUNDRED,
},
+#ifdef CONFIG_NUMA
+ {
+ .procname = "numa_stat",
+ .data = &sysctl_vm_numa_stat,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = sysctl_vm_numa_stat_handler,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
+ },
+#endif
#ifdef CONFIG_HUGETLB_PAGE
{
.procname = "nr_hugepages",
.mode = 0644,
.proc_handler = &hugetlb_mempolicy_sysctl_handler,
},
- {
- .procname = "numa_stat",
- .data = &sysctl_vm_numa_stat,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = sysctl_vm_numa_stat_handler,
- .extra1 = SYSCTL_ZERO,
- .extra2 = SYSCTL_ONE,
- },
#endif
{
.procname = "hugetlb_shm_group",
}
/*
- * This is called by do_exit or de_thread, only when there are no more
- * references to the shared signal_struct.
+ * This is called by do_exit or de_thread, only when nobody else can
+ * modify the signal->posix_timers list. Yet we need sighand->siglock
+ * to prevent the race with /proc/pid/timers.
*/
-void exit_itimers(struct signal_struct *sig)
+void exit_itimers(struct task_struct *tsk)
{
+ struct list_head timers;
struct k_itimer *tmr;
- while (!list_empty(&sig->posix_timers)) {
- tmr = list_entry(sig->posix_timers.next, struct k_itimer, list);
+ if (list_empty(&tsk->signal->posix_timers))
+ return;
+
+ spin_lock_irq(&tsk->sighand->siglock);
+ list_replace_init(&tsk->signal->posix_timers, &timers);
+ spin_unlock_irq(&tsk->sighand->siglock);
+
+ while (!list_empty(&timers)) {
+ tmr = list_first_entry(&timers, struct k_itimer, list);
itimer_delete(tmr);
}
}
cpumask_copy(tick_nohz_full_mask, cpumask);
tick_nohz_full_running = true;
}
-EXPORT_SYMBOL_GPL(tick_nohz_full_setup);
static int tick_nohz_cpu_down(unsigned int cpu)
{
sequence is then dynamically patched into a tracer call when
tracing is enabled by the administrator. If it's runtime disabled
(the bootup default), then the overhead of the instructions is very
- small and not measurable even in micro-benchmarks.
+ small and not measurable even in micro-benchmarks (at least on
+ x86, but may have impact on other architectures).
config FUNCTION_GRAPH_TRACER
bool "Kernel Function Graph Tracer"
**/
void blk_trace_shutdown(struct request_queue *q)
{
- mutex_lock(&q->debugfs_mutex);
if (rcu_dereference_protected(q->blk_trace,
lockdep_is_held(&q->debugfs_mutex))) {
__blk_trace_startstop(q, 0);
__blk_trace_remove(q);
}
-
- mutex_unlock(&q->debugfs_mutex);
}
#ifdef CONFIG_BLK_CGROUP
int err = -ENOMEM;
unsigned int i;
- syms = kvmalloc(cnt * sizeof(*syms), GFP_KERNEL);
+ syms = kvmalloc_array(cnt, sizeof(*syms), GFP_KERNEL);
if (!syms)
goto error;
- buf = kvmalloc(cnt * KSYM_NAME_LEN, GFP_KERNEL);
+ buf = kvmalloc_array(cnt, KSYM_NAME_LEN, GFP_KERNEL);
if (!buf)
goto error;
kprobe_multi_link_prog_run(link, entry_ip, regs);
}
-static int symbols_cmp(const void *a, const void *b)
+static int symbols_cmp_r(const void *a, const void *b, const void *priv)
{
const char **str_a = (const char **) a;
const char **str_b = (const char **) b;
return strcmp(*str_a, *str_b);
}
+struct multi_symbols_sort {
+ const char **funcs;
+ u64 *cookies;
+};
+
+static void symbols_swap_r(void *a, void *b, int size, const void *priv)
+{
+ const struct multi_symbols_sort *data = priv;
+ const char **name_a = a, **name_b = b;
+
+ swap(*name_a, *name_b);
+
+ /* If defined, swap also related cookies. */
+ if (data->cookies) {
+ u64 *cookie_a, *cookie_b;
+
+ cookie_a = data->cookies + (name_a - data->funcs);
+ cookie_b = data->cookies + (name_b - data->funcs);
+ swap(*cookie_a, *cookie_b);
+ }
+}
+
int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
{
struct bpf_kprobe_multi_link *link = NULL;
return -EINVAL;
size = cnt * sizeof(*addrs);
- addrs = kvmalloc(size, GFP_KERNEL);
+ addrs = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL);
if (!addrs)
return -ENOMEM;
+ ucookies = u64_to_user_ptr(attr->link_create.kprobe_multi.cookies);
+ if (ucookies) {
+ cookies = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL);
+ if (!cookies) {
+ err = -ENOMEM;
+ goto error;
+ }
+ if (copy_from_user(cookies, ucookies, size)) {
+ err = -EFAULT;
+ goto error;
+ }
+ }
+
if (uaddrs) {
if (copy_from_user(addrs, uaddrs, size)) {
err = -EFAULT;
goto error;
}
} else {
+ struct multi_symbols_sort data = {
+ .cookies = cookies,
+ };
struct user_syms us;
err = copy_user_syms(&us, usyms, cnt);
if (err)
goto error;
- sort(us.syms, cnt, sizeof(*us.syms), symbols_cmp, NULL);
+ if (cookies)
+ data.funcs = us.syms;
+
+ sort_r(us.syms, cnt, sizeof(*us.syms), symbols_cmp_r,
+ symbols_swap_r, &data);
+
err = ftrace_lookup_symbols(us.syms, cnt, addrs);
free_user_syms(&us);
if (err)
goto error;
}
- ucookies = u64_to_user_ptr(attr->link_create.kprobe_multi.cookies);
- if (ucookies) {
- cookies = kvmalloc(size, GFP_KERNEL);
- if (!cookies) {
- err = -ENOMEM;
- goto error;
- }
- if (copy_from_user(cookies, ucookies, size)) {
- err = -EFAULT;
- goto error;
- }
- }
-
link = kzalloc(sizeof(*link), GFP_KERNEL);
if (!link) {
err = -ENOMEM;
struct module *mod, unsigned long addr)
{
struct kallsyms_data *args = data;
+ const char **sym;
+ int idx;
- if (!bsearch(&name, args->syms, args->cnt, sizeof(*args->syms), symbols_cmp))
+ sym = bsearch(&name, args->syms, args->cnt, sizeof(*args->syms), symbols_cmp);
+ if (!sym)
+ return 0;
+
+ idx = sym - args->syms;
+ if (args->addrs[idx])
return 0;
addr = ftrace_location(addr);
if (!addr)
return 0;
- args->addrs[args->found++] = addr;
+ args->addrs[idx] = addr;
+ args->found++;
return args->found == args->cnt ? 1 : 0;
}
struct kallsyms_data args;
int err;
+ memset(addrs, 0, sizeof(*addrs) * cnt);
args.addrs = addrs;
args.syms = sorted_syms;
args.cnt = cnt;
if (unlikely(!handler))
return NULL;
+ /*
+ * This expects the caller will set up a rethook on a function entry.
+ * When the function returns, the rethook will eventually be reclaimed
+ * or released in the rethook_recycle() with call_rcu().
+ * This means the caller must be run in the RCU-availabe context.
+ */
+ if (unlikely(!rcu_is_watching()))
+ return NULL;
+
fn = freelist_try_get(&rh->pool);
if (!fn)
return NULL;
synchronize_rcu();
free_snapshot(tr);
}
-#endif
-#ifdef CONFIG_TRACER_MAX_TRACE
if (t->use_max_tr && !had_max_tr) {
ret = tracing_alloc_snapshot_instance(tr);
if (ret < 0)
/* Output in nanoseconds only if we are using a clock in nanoseconds. */
if (trace_clocks[iter->tr->clock_id].in_ns)
iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
+
+ /* Can not use kmalloc for iter.temp and iter.fmt */
+ iter->temp = static_temp_buf;
+ iter->temp_size = STATIC_TEMP_BUF_SIZE;
+ iter->fmt = static_fmt_buf;
+ iter->fmt_size = STATIC_FMT_BUF_SIZE;
}
void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
/* Simulate the iterator */
trace_init_global_iter(&iter);
- /* Can not use kmalloc for iter.temp and iter.fmt */
- iter.temp = static_temp_buf;
- iter.temp_size = STATIC_TEMP_BUF_SIZE;
- iter.fmt = static_fmt_buf;
- iter.fmt_size = STATIC_FMT_BUF_SIZE;
for_each_tracing_cpu(cpu) {
atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
s = kstrdup(field_str, GFP_KERNEL);
if (!s) {
+ kfree(hist_data->attrs->var_defs.name[n_vars]);
+ hist_data->attrs->var_defs.name[n_vars] = NULL;
ret = -ENOMEM;
goto free;
}
kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)
{
struct kretprobe *rp = get_kretprobe(ri);
- struct trace_kprobe *tk = container_of(rp, struct trace_kprobe, rp);
+ struct trace_kprobe *tk;
+
+ /*
+ * There is a small chance that get_kretprobe(ri) returns NULL when
+ * the kretprobe is unregister on another CPU between kretprobe's
+ * trampoline_handler and this function.
+ */
+ if (unlikely(!rp))
+ return 0;
+ tk = container_of(rp, struct trace_kprobe, rp);
raw_cpu_inc(*tk->nhit);
if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE))
bool is_return = false;
int i, ret;
- ret = 0;
ref_ctr_offset = 0;
switch (argv[0][0]) {
#define WATCH_QUEUE_NOTE_SIZE 128
#define WATCH_QUEUE_NOTES_PER_PAGE (PAGE_SIZE / WATCH_QUEUE_NOTE_SIZE)
+/*
+ * This must be called under the RCU read-lock, which makes
+ * sure that the wqueue still exists. It can then take the lock,
+ * and check that the wqueue hasn't been destroyed, which in
+ * turn makes sure that the notification pipe still exists.
+ */
+static inline bool lock_wqueue(struct watch_queue *wqueue)
+{
+ spin_lock_bh(&wqueue->lock);
+ if (unlikely(wqueue->defunct)) {
+ spin_unlock_bh(&wqueue->lock);
+ return false;
+ }
+ return true;
+}
+
+static inline void unlock_wqueue(struct watch_queue *wqueue)
+{
+ spin_unlock_bh(&wqueue->lock);
+}
+
static void watch_queue_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
/*
* Post a notification to a watch queue.
+ *
+ * Must be called with the RCU lock for reading, and the
+ * watch_queue lock held, which guarantees that the pipe
+ * hasn't been released.
*/
static bool post_one_notification(struct watch_queue *wqueue,
struct watch_notification *n)
spin_lock_irq(&pipe->rd_wait.lock);
- if (wqueue->defunct)
- goto out;
-
mask = pipe->ring_size - 1;
head = pipe->head;
tail = pipe->tail;
if (security_post_notification(watch->cred, cred, n) < 0)
continue;
- post_one_notification(wqueue, n);
+ if (lock_wqueue(wqueue)) {
+ post_one_notification(wqueue, n);
+ unlock_wqueue(wqueue);
+ }
}
rcu_read_unlock();
rcu_assign_pointer(watch->queue, wqueue);
}
+static int add_one_watch(struct watch *watch, struct watch_list *wlist, struct watch_queue *wqueue)
+{
+ const struct cred *cred;
+ struct watch *w;
+
+ hlist_for_each_entry(w, &wlist->watchers, list_node) {
+ struct watch_queue *wq = rcu_access_pointer(w->queue);
+ if (wqueue == wq && watch->id == w->id)
+ return -EBUSY;
+ }
+
+ cred = current_cred();
+ if (atomic_inc_return(&cred->user->nr_watches) > task_rlimit(current, RLIMIT_NOFILE)) {
+ atomic_dec(&cred->user->nr_watches);
+ return -EAGAIN;
+ }
+
+ watch->cred = get_cred(cred);
+ rcu_assign_pointer(watch->watch_list, wlist);
+
+ kref_get(&wqueue->usage);
+ kref_get(&watch->usage);
+ hlist_add_head(&watch->queue_node, &wqueue->watches);
+ hlist_add_head_rcu(&watch->list_node, &wlist->watchers);
+ return 0;
+}
+
/**
* add_watch_to_object - Add a watch on an object to a watch list
* @watch: The watch to add
*/
int add_watch_to_object(struct watch *watch, struct watch_list *wlist)
{
- struct watch_queue *wqueue = rcu_access_pointer(watch->queue);
- struct watch *w;
-
- hlist_for_each_entry(w, &wlist->watchers, list_node) {
- struct watch_queue *wq = rcu_access_pointer(w->queue);
- if (wqueue == wq && watch->id == w->id)
- return -EBUSY;
- }
+ struct watch_queue *wqueue;
+ int ret = -ENOENT;
- watch->cred = get_current_cred();
- rcu_assign_pointer(watch->watch_list, wlist);
+ rcu_read_lock();
- if (atomic_inc_return(&watch->cred->user->nr_watches) >
- task_rlimit(current, RLIMIT_NOFILE)) {
- atomic_dec(&watch->cred->user->nr_watches);
- put_cred(watch->cred);
- return -EAGAIN;
+ wqueue = rcu_access_pointer(watch->queue);
+ if (lock_wqueue(wqueue)) {
+ spin_lock(&wlist->lock);
+ ret = add_one_watch(watch, wlist, wqueue);
+ spin_unlock(&wlist->lock);
+ unlock_wqueue(wqueue);
}
- spin_lock_bh(&wqueue->lock);
- kref_get(&wqueue->usage);
- kref_get(&watch->usage);
- hlist_add_head(&watch->queue_node, &wqueue->watches);
- spin_unlock_bh(&wqueue->lock);
-
- hlist_add_head(&watch->list_node, &wlist->watchers);
- return 0;
+ rcu_read_unlock();
+ return ret;
}
EXPORT_SYMBOL(add_watch_to_object);
wqueue = rcu_dereference(watch->queue);
- /* We don't need the watch list lock for the next bit as RCU is
- * protecting *wqueue from deallocation.
- */
- if (wqueue) {
+ if (lock_wqueue(wqueue)) {
post_one_notification(wqueue, &n.watch);
- spin_lock_bh(&wqueue->lock);
-
if (!hlist_unhashed(&watch->queue_node)) {
hlist_del_init_rcu(&watch->queue_node);
put_watch(watch);
}
- spin_unlock_bh(&wqueue->lock);
+ unlock_wqueue(wqueue);
}
if (wlist->release_watch) {
/* Start period for the next softlockup warning. */
update_report_ts();
- printk_prefer_direct_enter();
-
pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
smp_processor_id(), duration,
current->comm, task_pid_nr(current));
add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);
if (softlockup_panic)
panic("softlockup: hung tasks");
-
- printk_prefer_direct_exit();
}
return HRTIMER_RESTART;
if (__this_cpu_read(hard_watchdog_warn) == true)
return;
- printk_prefer_direct_enter();
-
pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n",
this_cpu);
print_modules();
if (hardlockup_panic)
nmi_panic(regs, "Hard LOCKUP");
- printk_prefer_direct_exit();
-
__this_cpu_write(hard_watchdog_warn, true);
return;
}
}
/**
- * flush_workqueue - ensure that any scheduled work has run to completion.
+ * __flush_workqueue - ensure that any scheduled work has run to completion.
* @wq: workqueue to flush
*
* This function sleeps until all work items which were queued on entry
* have finished execution, but it is not livelocked by new incoming ones.
*/
-void flush_workqueue(struct workqueue_struct *wq)
+void __flush_workqueue(struct workqueue_struct *wq)
{
struct wq_flusher this_flusher = {
.list = LIST_HEAD_INIT(this_flusher.list),
out_unlock:
mutex_unlock(&wq->mutex);
}
-EXPORT_SYMBOL(flush_workqueue);
+EXPORT_SYMBOL(__flush_workqueue);
/**
* drain_workqueue - drain a workqueue
wq->flags |= __WQ_DRAINING;
mutex_unlock(&wq->mutex);
reflush:
- flush_workqueue(wq);
+ __flush_workqueue(wq);
mutex_lock(&wq->mutex);
for_each_pool_worker(worker, pool) {
kthread_set_per_cpu(worker->task, -1);
- WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, wq_unbound_cpumask) < 0);
+ if (cpumask_intersects(wq_unbound_cpumask, cpu_active_mask))
+ WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, wq_unbound_cpumask) < 0);
+ else
+ WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, cpu_possible_mask) < 0);
}
mutex_unlock(&wq_pool_attach_mutex);
wq_online = true;
wq_watchdog_init();
}
+
+/*
+ * Despite the naming, this is a no-op function which is here only for avoiding
+ * link error. Since compile-time warning may fail to catch, we will need to
+ * emit run-time warning from __flush_workqueue().
+ */
+void __warn_flushing_systemwide_wq(void) { }
+EXPORT_SYMBOL(__warn_flushing_systemwide_wq);
source "lib/crypto/Kconfig"
+config LIB_MEMNEQ
+ bool
+
config CRC_CCITT
tristate "CRC-CCITT functions"
help
config UBSAN_DIV_ZERO
bool "Perform checking for integer divide-by-zero"
depends on $(cc-option,-fsanitize=integer-divide-by-zero)
+ # https://github.com/ClangBuiltLinux/linux/issues/1657
+ # https://github.com/llvm/llvm-project/issues/56289
+ depends on !CC_IS_CLANG
help
This option enables -fsanitize=integer-divide-by-zero which checks
for integer division by zero. This is effectively redundant with the
bool "Perform checking for unreachable code"
# objtool already handles unreachable checking and gets angry about
# seeing UBSan instrumentation located in unreachable places.
- depends on !(OBJTOOL && (STACK_VALIDATION || UNWINDER_ORC || X86_SMAP))
+ depends on !(OBJTOOL && (STACK_VALIDATION || UNWINDER_ORC || HAVE_UACCESS_VALIDATION))
depends on $(cc-option,-fsanitize=unreachable)
help
This option enables -fsanitize=unreachable which checks for control
obj-$(CONFIG_SIGNATURE) += digsig.o
lib-$(CONFIG_CLZ_TAB) += clz_tab.o
+lib-$(CONFIG_LIB_MEMNEQ) += memneq.o
obj-$(CONFIG_GENERIC_STRNCPY_FROM_USER) += strncpy_from_user.o
obj-$(CONFIG_GENERIC_STRNLEN_USER) += strnlen_user.o
#include <linux/module.h>
#include <linux/crc-itu-t.h>
-/** CRC table for the CRC ITU-T V.41 0x1021 (x^16 + x^12 + x^15 + 1) */
+/* CRC table for the CRC ITU-T V.41 0x1021 (x^16 + x^12 + x^5 + 1) */
const u16 crc_itu_t_table[256] = {
0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
tristate "Curve25519 scalar multiplication library"
depends on CRYPTO_ARCH_HAVE_LIB_CURVE25519 || !CRYPTO_ARCH_HAVE_LIB_CURVE25519
select CRYPTO_LIB_CURVE25519_GENERIC if CRYPTO_ARCH_HAVE_LIB_CURVE25519=n
+ select LIB_MEMNEQ
help
Enable the Curve25519 library interface. This interface may be
fulfilled by either the generic implementation or an arch-specific
struct ida_bitmap *bitmap;
unsigned long flags;
- BUG_ON((int)id < 0);
+ if ((int)id < 0)
+ return;
xas_lock_irqsave(&xas, flags);
bitmap = xas_load(&xas);
{
unsigned nr, offset;
pgoff_t index, count;
- size_t size = maxsize, actual;
+ size_t size = maxsize;
loff_t pos;
if (!size || !maxpages)
if (nr == 0)
return 0;
- actual = PAGE_SIZE * nr;
- actual -= offset;
- if (nr == count && size > 0) {
- unsigned last_offset = (nr > 1) ? 0 : offset;
- actual -= PAGE_SIZE - (last_offset + size);
- }
- return actual;
+ return min_t(size_t, nr * PAGE_SIZE - offset, maxsize);
}
/* must be done on non-empty ITER_IOVEC one */
struct page **p;
unsigned nr, offset;
pgoff_t index, count;
- size_t size = maxsize, actual;
+ size_t size = maxsize;
loff_t pos;
if (!size)
if (nr == 0)
return 0;
- actual = PAGE_SIZE * nr;
- actual -= offset;
- if (nr == count && size > 0) {
- unsigned last_offset = (nr > 1) ? 0 : offset;
- actual -= PAGE_SIZE - (last_offset + size);
- }
- return actual;
+ return min_t(size_t, nr * PAGE_SIZE - offset, maxsize);
}
ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
}
EXPORT_SYMBOL(lockref_put_not_zero);
-/**
- * lockref_get_or_lock - Increments count unless the count is 0 or dead
- * @lockref: pointer to lockref structure
- * Return: 1 if count updated successfully or 0 if count was zero
- * and we got the lock instead.
- */
-int lockref_get_or_lock(struct lockref *lockref)
-{
- CMPXCHG_LOOP(
- new.count++;
- if (old.count <= 0)
- break;
- ,
- return 1;
- );
-
- spin_lock(&lockref->lock);
- if (lockref->count <= 0)
- return 0;
- lockref->count++;
- spin_unlock(&lockref->lock);
- return 1;
-}
-EXPORT_SYMBOL(lockref_get_or_lock);
-
/**
* lockref_put_return - Decrement reference count if possible
* @lockref: pointer to lockref structure
--- /dev/null
+/*
+ * Constant-time equality testing of memory regions.
+ *
+ * Authors:
+ *
+ * James Yonan <james@openvpn.net>
+ * Daniel Borkmann <dborkman@redhat.com>
+ *
+ * This file is provided under a dual BSD/GPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2013 OpenVPN Technologies, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ * The full GNU General Public License is included in this distribution
+ * in the file called LICENSE.GPL.
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2013 OpenVPN Technologies, Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of OpenVPN Technologies nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <crypto/algapi.h>
+#include <asm/unaligned.h>
+
+#ifndef __HAVE_ARCH_CRYPTO_MEMNEQ
+
+/* Generic path for arbitrary size */
+static inline unsigned long
+__crypto_memneq_generic(const void *a, const void *b, size_t size)
+{
+ unsigned long neq = 0;
+
+#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
+ while (size >= sizeof(unsigned long)) {
+ neq |= get_unaligned((unsigned long *)a) ^
+ get_unaligned((unsigned long *)b);
+ OPTIMIZER_HIDE_VAR(neq);
+ a += sizeof(unsigned long);
+ b += sizeof(unsigned long);
+ size -= sizeof(unsigned long);
+ }
+#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
+ while (size > 0) {
+ neq |= *(unsigned char *)a ^ *(unsigned char *)b;
+ OPTIMIZER_HIDE_VAR(neq);
+ a += 1;
+ b += 1;
+ size -= 1;
+ }
+ return neq;
+}
+
+/* Loop-free fast-path for frequently used 16-byte size */
+static inline unsigned long __crypto_memneq_16(const void *a, const void *b)
+{
+ unsigned long neq = 0;
+
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ if (sizeof(unsigned long) == 8) {
+ neq |= get_unaligned((unsigned long *)a) ^
+ get_unaligned((unsigned long *)b);
+ OPTIMIZER_HIDE_VAR(neq);
+ neq |= get_unaligned((unsigned long *)(a + 8)) ^
+ get_unaligned((unsigned long *)(b + 8));
+ OPTIMIZER_HIDE_VAR(neq);
+ } else if (sizeof(unsigned int) == 4) {
+ neq |= get_unaligned((unsigned int *)a) ^
+ get_unaligned((unsigned int *)b);
+ OPTIMIZER_HIDE_VAR(neq);
+ neq |= get_unaligned((unsigned int *)(a + 4)) ^
+ get_unaligned((unsigned int *)(b + 4));
+ OPTIMIZER_HIDE_VAR(neq);
+ neq |= get_unaligned((unsigned int *)(a + 8)) ^
+ get_unaligned((unsigned int *)(b + 8));
+ OPTIMIZER_HIDE_VAR(neq);
+ neq |= get_unaligned((unsigned int *)(a + 12)) ^
+ get_unaligned((unsigned int *)(b + 12));
+ OPTIMIZER_HIDE_VAR(neq);
+ } else
+#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */
+ {
+ neq |= *(unsigned char *)(a) ^ *(unsigned char *)(b);
+ OPTIMIZER_HIDE_VAR(neq);
+ neq |= *(unsigned char *)(a+1) ^ *(unsigned char *)(b+1);
+ OPTIMIZER_HIDE_VAR(neq);
+ neq |= *(unsigned char *)(a+2) ^ *(unsigned char *)(b+2);
+ OPTIMIZER_HIDE_VAR(neq);
+ neq |= *(unsigned char *)(a+3) ^ *(unsigned char *)(b+3);
+ OPTIMIZER_HIDE_VAR(neq);
+ neq |= *(unsigned char *)(a+4) ^ *(unsigned char *)(b+4);
+ OPTIMIZER_HIDE_VAR(neq);
+ neq |= *(unsigned char *)(a+5) ^ *(unsigned char *)(b+5);
+ OPTIMIZER_HIDE_VAR(neq);
+ neq |= *(unsigned char *)(a+6) ^ *(unsigned char *)(b+6);
+ OPTIMIZER_HIDE_VAR(neq);
+ neq |= *(unsigned char *)(a+7) ^ *(unsigned char *)(b+7);
+ OPTIMIZER_HIDE_VAR(neq);
+ neq |= *(unsigned char *)(a+8) ^ *(unsigned char *)(b+8);
+ OPTIMIZER_HIDE_VAR(neq);
+ neq |= *(unsigned char *)(a+9) ^ *(unsigned char *)(b+9);
+ OPTIMIZER_HIDE_VAR(neq);
+ neq |= *(unsigned char *)(a+10) ^ *(unsigned char *)(b+10);
+ OPTIMIZER_HIDE_VAR(neq);
+ neq |= *(unsigned char *)(a+11) ^ *(unsigned char *)(b+11);
+ OPTIMIZER_HIDE_VAR(neq);
+ neq |= *(unsigned char *)(a+12) ^ *(unsigned char *)(b+12);
+ OPTIMIZER_HIDE_VAR(neq);
+ neq |= *(unsigned char *)(a+13) ^ *(unsigned char *)(b+13);
+ OPTIMIZER_HIDE_VAR(neq);
+ neq |= *(unsigned char *)(a+14) ^ *(unsigned char *)(b+14);
+ OPTIMIZER_HIDE_VAR(neq);
+ neq |= *(unsigned char *)(a+15) ^ *(unsigned char *)(b+15);
+ OPTIMIZER_HIDE_VAR(neq);
+ }
+
+ return neq;
+}
+
+/* Compare two areas of memory without leaking timing information,
+ * and with special optimizations for common sizes. Users should
+ * not call this function directly, but should instead use
+ * crypto_memneq defined in crypto/algapi.h.
+ */
+noinline unsigned long __crypto_memneq(const void *a, const void *b,
+ size_t size)
+{
+ switch (size) {
+ case 16:
+ return __crypto_memneq_16(a, b);
+ default:
+ return __crypto_memneq_generic(a, b, size);
+ }
+}
+EXPORT_SYMBOL(__crypto_memneq);
+
+#endif /* __HAVE_ARCH_CRYPTO_MEMNEQ */
sbitmap_deferred_clear(map);
if (map->word == (1UL << (map_depth - 1)) - 1)
- continue;
+ goto next;
nr = find_first_zero_bit(&map->word, map_depth);
if (nr + nr_tags <= map_depth) {
get_mask = ((1UL << map_tags) - 1) << nr;
do {
val = READ_ONCE(map->word);
+ if ((val & ~get_mask) != val)
+ goto next;
ret = atomic_long_cmpxchg(ptr, val, get_mask | val);
} while (ret != val);
get_mask = (get_mask & ~ret) >> nr;
return get_mask;
}
}
+next:
/* Jump to next index. */
if (++index >= sb->map_nr)
index = 0;
static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn);
unsigned long flags;
- if (!system_unbound_wq ||
- (!rng_is_initialized() && !rng_has_arch_random()) ||
+ if (!system_unbound_wq || !rng_is_initialized() ||
!spin_trylock_irqsave(&filling, flags))
return -EAGAIN;
* xas_destroy() - Free any resources allocated during the XArray operation.
* @xas: XArray operation state.
*
- * This function is now internal-only.
+ * Most users will not need to call this function; it is called for you
+ * by xas_nomem().
*/
-static void xas_destroy(struct xa_state *xas)
+void xas_destroy(struct xa_state *xas)
{
struct xa_node *next, *node = xas->xa_alloc;
}
postcore_initcall(bdi_class_init);
-static int bdi_init(struct backing_dev_info *bdi);
-
static int __init default_bdi_init(void)
{
- int err;
-
bdi_wq = alloc_workqueue("writeback", WQ_MEM_RECLAIM | WQ_UNBOUND |
WQ_SYSFS, 0);
if (!bdi_wq)
return -ENOMEM;
-
- err = bdi_init(&noop_backing_dev_info);
-
- return err;
+ return 0;
}
subsys_initcall(default_bdi_init);
#endif /* CONFIG_CGROUP_WRITEBACK */
-static int bdi_init(struct backing_dev_info *bdi)
+int bdi_init(struct backing_dev_info *bdi)
{
int ret;
}
static DECLARE_DELAYED_WORK(damon_reclaim_timer, damon_reclaim_timer_fn);
+static bool damon_reclaim_initialized;
+
static int enabled_store(const char *val,
const struct kernel_param *kp)
{
if (rc < 0)
return rc;
+ /* system_wq might not initialized yet */
+ if (!damon_reclaim_initialized)
+ return rc;
+
if (enabled)
schedule_delayed_work(&damon_reclaim_timer, 0);
damon_add_target(ctx, target);
schedule_delayed_work(&damon_reclaim_timer, 0);
+
+ damon_reclaim_initialized = true;
return 0;
}
if (pte_young(entry)) {
referenced = true;
entry = pte_mkold(entry);
- huge_ptep_set_access_flags(vma, addr, pte, entry,
- vma->vm_flags & VM_WRITE);
+ set_huge_pte_at(mm, addr, pte, entry);
}
#ifdef CONFIG_MMU_NOTIFIER
continue;
if (xas.xa_index > max || xa_is_value(folio))
break;
+ if (xa_is_sibling(folio))
+ break;
if (!folio_try_get_rcu(folio))
goto retry;
return err;
}
+static inline bool pos_same_folio(loff_t pos1, loff_t pos2, struct folio *folio)
+{
+ unsigned int shift = folio_shift(folio);
+
+ return (pos1 >> shift == pos2 >> shift);
+}
+
/**
* filemap_read - Read data from the page cache.
* @iocb: The iocb to read.
writably_mapped = mapping_writably_mapped(mapping);
/*
- * When a sequential read accesses a page several times, only
+ * When a read accesses the same folio several times, only
* mark it as accessed the first time.
*/
- if (iocb->ki_pos >> PAGE_SHIFT !=
- ra->prev_pos >> PAGE_SHIFT)
+ if (!pos_same_folio(iocb->ki_pos, ra->prev_pos - 1,
+ fbatch.folios[0]))
folio_mark_accessed(fbatch.folios[0]);
for (i = 0; i < folio_batch_count(&fbatch); i++) {
struct address_space *mapping = file->f_mapping;
DEFINE_READAHEAD(ractl, file, ra, mapping, vmf->pgoff);
struct file *fpin = NULL;
+ unsigned long vm_flags = vmf->vma->vm_flags;
unsigned int mmap_miss;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
/* Use the readahead code, even if readahead is disabled */
- if (vmf->vma->vm_flags & VM_HUGEPAGE) {
+ if (vm_flags & VM_HUGEPAGE) {
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
ractl._index &= ~((unsigned long)HPAGE_PMD_NR - 1);
ra->size = HPAGE_PMD_NR;
* Fetch two PMD folios, so we get the chance to actually
* readahead, unless we've been told not to.
*/
- if (!(vmf->vma->vm_flags & VM_RAND_READ))
+ if (!(vm_flags & VM_RAND_READ))
ra->size *= 2;
ra->async_size = HPAGE_PMD_NR;
page_cache_ra_order(&ractl, ra, HPAGE_PMD_ORDER);
#endif
/* If we don't want any read-ahead, don't bother */
- if (vmf->vma->vm_flags & VM_RAND_READ)
+ if (vm_flags & VM_RAND_READ)
return fpin;
if (!ra->ra_pages)
return fpin;
- if (vmf->vma->vm_flags & VM_SEQ_READ) {
+ if (vm_flags & VM_SEQ_READ) {
fpin = maybe_unlock_mmap_for_io(vmf, fpin);
page_cache_sync_ra(&ractl, ra->ra_pages);
return fpin;
* belongs to this folio.
*/
if (unlikely(page_folio(page) != folio)) {
- folio_put_refs(folio, refs);
+ if (!put_devmap_managed_page_refs(&folio->page, refs))
+ folio_put_refs(folio, refs);
goto retry;
}
refs *= GUP_PIN_COUNTING_BIAS;
}
- folio_put_refs(folio, refs);
+ if (!put_devmap_managed_page_refs(&folio->page, refs))
+ folio_put_refs(folio, refs);
}
/**
unsigned long end, unsigned long hmm_pfns[], pmd_t pmd);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
-static inline bool hmm_is_device_private_entry(struct hmm_range *range,
- swp_entry_t entry)
-{
- return is_device_private_entry(entry) &&
- pfn_swap_entry_to_page(entry)->pgmap->owner ==
- range->dev_private_owner;
-}
-
static inline unsigned long pte_to_hmm_pfn_flags(struct hmm_range *range,
pte_t pte)
{
swp_entry_t entry = pte_to_swp_entry(pte);
/*
- * Never fault in device private pages, but just report
- * the PFN even if not present.
+ * Don't fault in device private pages owned by the caller,
+ * just report the PFN.
*/
- if (hmm_is_device_private_entry(range, entry)) {
+ if (is_device_private_entry(entry) &&
+ pfn_swap_entry_to_page(entry)->pgmap->owner ==
+ range->dev_private_owner) {
cpu_flags = HMM_PFN_VALID;
if (is_writable_device_private_entry(entry))
cpu_flags |= HMM_PFN_WRITE;
if (!non_swap_entry(entry))
goto fault;
+ if (is_device_private_entry(entry))
+ goto fault;
+
if (is_device_exclusive_entry(entry))
goto fault;
page_tail);
page_tail->mapping = head->mapping;
page_tail->index = head->index + tail;
+ page_tail->private = 0;
/* Page flags must be visible before we make the page non-compound. */
smp_wmb();
if (mapping)
i_mmap_unlock_read(mapping);
out:
- /* Free any memory we didn't use */
- xas_nomem(&xas, 0);
+ xas_destroy(&xas);
count_vm_event(!ret ? THP_SPLIT_PAGE : THP_SPLIT_PAGE_FAILED);
return ret;
}
* sharing with another vma.
*/
;
- } else if (unlikely(is_hugetlb_entry_migration(entry) ||
- is_hugetlb_entry_hwpoisoned(entry))) {
+ } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) {
+ bool uffd_wp = huge_pte_uffd_wp(entry);
+
+ if (!userfaultfd_wp(dst_vma) && uffd_wp)
+ entry = huge_pte_clear_uffd_wp(entry);
+ set_huge_pte_at(dst, addr, dst_pte, entry);
+ } else if (unlikely(is_hugetlb_entry_migration(entry))) {
swp_entry_t swp_entry = pte_to_swp_entry(entry);
bool uffd_wp = huge_pte_uffd_wp(entry);
page = alloc_huge_page(dst_vma, dst_addr, 0);
if (IS_ERR(page)) {
+ put_page(*pagep);
ret = -ENOMEM;
*pagep = NULL;
goto out;
inject:
pr_info("Injecting memory failure at pfn %#lx\n", pfn);
- err = memory_failure(pfn, 0);
+ err = memory_failure(pfn, MF_SW_SIMULATED);
return (err == -EOPNOTSUPP) ? 0 : err;
}
unsigned long flags;
struct slab *slab;
void *addr;
+ const bool random_right_allocate = prandom_u32_max(2);
+ const bool random_fault = CONFIG_KFENCE_STRESS_TEST_FAULTS &&
+ !prandom_u32_max(CONFIG_KFENCE_STRESS_TEST_FAULTS);
/* Try to obtain a free object. */
raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
* is that the out-of-bounds accesses detected are deterministic for
* such allocations.
*/
- if (prandom_u32_max(2)) {
+ if (random_right_allocate) {
/* Allocate on the "right" side, re-calculate address. */
meta->addr += PAGE_SIZE - size;
meta->addr = ALIGN_DOWN(meta->addr, cache->align);
if (cache->ctor)
cache->ctor(addr);
- if (CONFIG_KFENCE_STRESS_TEST_FAULTS && !prandom_u32_max(CONFIG_KFENCE_STRESS_TEST_FAULTS))
+ if (random_fault)
kfence_protect(meta->addr); /* Random "faults" by protecting the object. */
atomic_long_inc(&counters[KFENCE_COUNTER_ALLOCATED]);
addr += 2 * PAGE_SIZE;
}
- /*
- * The pool is live and will never be deallocated from this point on.
- * Remove the pool object from the kmemleak object tree, as it would
- * otherwise overlap with allocations returned by kfence_alloc(), which
- * are registered with kmemleak through the slab post-alloc hook.
- */
- kmemleak_free(__kfence_pool);
-
return 0;
}
addr = kfence_init_pool();
- if (!addr)
+ if (!addr) {
+ /*
+ * The pool is live and will never be deallocated from this point on.
+ * Ignore the pool object from the kmemleak phys object tree, as it would
+ * otherwise overlap with allocations returned by kfence_alloc(), which
+ * are registered with kmemleak through the slab post-alloc hook.
+ */
+ kmemleak_ignore_phys(__pa(__kfence_pool));
return true;
+ }
/*
* Only release unprotected pages, and do not try to go back and change
} else {
pr_info("Injecting memory failure for pfn %#lx at process virtual address %#lx\n",
pfn, start);
- ret = memory_failure(pfn, MF_COUNT_INCREASED);
+ ret = memory_failure(pfn, MF_COUNT_INCREASED | MF_SW_SIMULATED);
if (ret == -EOPNOTSUPP)
ret = 0;
}
{
/*
* Deprecated.
- * Please, take a look at tools/cgroup/slabinfo.py .
+ * Please, take a look at tools/cgroup/memcg_slabinfo.py .
*/
return 0;
}
atomic_long_t num_poisoned_pages __read_mostly = ATOMIC_LONG_INIT(0);
+static bool hw_memory_failure __read_mostly = false;
+
static bool __page_handle_poison(struct page *page)
{
int ret;
mutex_lock(&mf_mutex);
+ if (!(flags & MF_SW_SIMULATED))
+ hw_memory_failure = true;
+
p = pfn_to_online_page(pfn);
if (!p) {
res = arch_memory_failure(pfn, flags);
mutex_lock(&mf_mutex);
+ if (hw_memory_failure) {
+ unpoison_pr_info("Unpoison: Disabled after HW memory failure %#lx\n",
+ pfn, &unpoison_rs);
+ ret = -EOPNOTSUPP;
+ goto unlock_mutex;
+ }
+
if (!PageHWPoison(p)) {
unpoison_pr_info("Unpoison: Page was already unpoisoned %#lx\n",
pfn, &unpoison_rs);
pte_t entry;
VM_BUG_ON(!(vmf->flags & FAULT_FLAG_WRITE));
- VM_BUG_ON(PageAnon(page) && !PageAnonExclusive(page));
+ VM_BUG_ON(page && PageAnon(page) && !PageAnonExclusive(page));
/*
* Clear the pages cpupid information as the existing
return VM_FAULT_OOM;
}
- /* See comment in handle_pte_fault() */
+ /*
+ * See comment in handle_pte_fault() for how this scenario happens, we
+ * need to return NOPAGE so that we drop this page.
+ */
if (pmd_devmap_trans_unstable(vmf->pmd))
- return 0;
+ return VM_FAULT_NOPAGE;
vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd,
vmf->address, &vmf->ptl);
static vm_fault_t create_huge_pud(struct vm_fault *vmf)
{
+#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
+ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
+ /* No support for anonymous transparent PUD pages yet */
+ if (vma_is_anonymous(vmf->vma))
+ return VM_FAULT_FALLBACK;
+ if (vmf->vma->vm_ops->huge_fault)
+ return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+ return VM_FAULT_FALLBACK;
+}
+
+static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
+{
#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \
defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)
/* No support for anonymous transparent PUD pages yet */
split:
/* COW or write-notify not handled on PUD level: split pud.*/
__split_huge_pud(vmf->vma, vmf->pud, vmf->address);
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
- return VM_FAULT_FALLBACK;
-}
-
-static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
-{
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- /* No support for anonymous transparent PUD pages yet */
- if (vma_is_anonymous(vmf->vma))
- return VM_FAULT_FALLBACK;
- if (vmf->vma->vm_ops->huge_fault)
- return vmf->vma->vm_ops->huge_fault(vmf, PE_SIZE_PUD);
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
+#endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
return VM_FAULT_FALLBACK;
}
}
#ifdef CONFIG_FS_DAX
-bool __put_devmap_managed_page(struct page *page)
+bool __put_devmap_managed_page_refs(struct page *page, int refs)
{
if (page->pgmap->type != MEMORY_DEVICE_FS_DAX)
return false;
* refcount is 1, then the page is free and the refcount is
* stable because nobody holds a reference on the page.
*/
- if (page_ref_dec_return(page) == 1)
+ if (page_ref_sub_return(page, refs) == 1)
wake_up_var(&page->_refcount);
return true;
}
-EXPORT_SYMBOL(__put_devmap_managed_page);
+EXPORT_SYMBOL(__put_devmap_managed_page_refs);
#endif /* CONFIG_FS_DAX */
if (!newpage)
return -ENOMEM;
+ newpage->private = 0;
rc = __unmap_and_move(page, newpage, force, mode);
if (rc == MIGRATEPAGE_SUCCESS)
set_page_owner_migrate_reason(newpage, reason);
* need to be calculated.
*/
if (!order) {
- long fast_free;
+ long usable_free;
+ long reserved;
- fast_free = free_pages;
- fast_free -= __zone_watermark_unusable_free(z, 0, alloc_flags);
- if (fast_free > mark + z->lowmem_reserve[highest_zoneidx])
+ usable_free = free_pages;
+ reserved = __zone_watermark_unusable_free(z, 0, alloc_flags);
+
+ /* reserved may over estimate high-atomic reserves. */
+ usable_free -= min(usable_free, reserved);
+ if (usable_free > mark + z->lowmem_reserve[highest_zoneidx])
return true;
}
* @flags: isolation flags
* @gfp_flags: GFP flags used for migrating pages
* @isolate_before: isolate the pageblock before the boundary_pfn
+ * @skip_isolation: the flag to skip the pageblock isolation in second
+ * isolate_single_pageblock()
*
* Free and in-use pages can be as big as MAX_ORDER-1 and contain more than one
* pageblock. When not all pageblocks within a page are isolated at the same
while ((folio = readahead_folio(rac)) != NULL) {
unsigned long nr = folio_nr_pages(folio);
+ folio_get(folio);
rac->ra->size -= nr;
if (rac->ra->async_size >= nr) {
rac->ra->async_size -= nr;
filemap_remove_folio(folio);
}
folio_unlock(folio);
+ folio_put(folio);
}
} else {
while ((folio = readahead_folio(rac)) != NULL)
new_order--;
}
+ filemap_invalidate_lock_shared(mapping);
while (index <= limit) {
unsigned int order = new_order;
}
read_pages(ractl);
+ filemap_invalidate_unlock_shared(mapping);
/*
* If there were already pages in the page cache, then we may have
/* Unexpected PMD-mapped THP? */
VM_BUG_ON_FOLIO(!pvmw.pte, folio);
- subpage = folio_page(folio,
- pte_pfn(*pvmw.pte) - folio_pfn(folio));
+ if (folio_is_zone_device(folio)) {
+ /*
+ * Our PTE is a non-present device exclusive entry and
+ * calculating the subpage as for the common case would
+ * result in an invalid pointer.
+ *
+ * Since only PAGE_SIZE pages can currently be
+ * migrated, just set it to page. This will need to be
+ * changed when hugepage migrations to device private
+ * memory are supported.
+ */
+ VM_BUG_ON_FOLIO(folio_nr_pages(folio) > 1, folio);
+ subpage = &folio->page;
+ } else {
+ subpage = folio_page(folio,
+ pte_pfn(*pvmw.pte) - folio_pfn(folio));
+ }
address = pvmw.address;
anon_exclusive = folio_test_anon(folio) &&
PageAnonExclusive(subpage);
/*
* No need to invalidate here it will synchronize on
* against the special swap migration pte.
- *
- * The assignment to subpage above was computed from a
- * swap PTE which results in an invalid pointer.
- * Since only PAGE_SIZE pages can currently be
- * migrated, just set it to page. This will need to be
- * changed when hugepage migrations to device private
- * memory are supported.
*/
- subpage = &folio->page;
} else if (PageHWPoison(subpage)) {
pteval = swp_entry_to_pte(make_hwpoison_entry(subpage));
if (folio_test_hugetlb(folio)) {
gfp_t gfp = vmf->gfp_mask;
unsigned long addr;
struct page *page;
+ vm_fault_t ret;
int err;
if (((loff_t)vmf->pgoff << PAGE_SHIFT) >= i_size_read(inode))
return vmf_error(-EINVAL);
+ filemap_invalidate_lock_shared(mapping);
+
retry:
page = find_lock_page(mapping, offset);
if (!page) {
page = alloc_page(gfp | __GFP_ZERO);
- if (!page)
- return VM_FAULT_OOM;
+ if (!page) {
+ ret = VM_FAULT_OOM;
+ goto out;
+ }
err = set_direct_map_invalid_noflush(page);
if (err) {
put_page(page);
- return vmf_error(err);
+ ret = vmf_error(err);
+ goto out;
}
__SetPageUptodate(page);
if (err == -EEXIST)
goto retry;
- return vmf_error(err);
+ ret = vmf_error(err);
+ goto out;
}
addr = (unsigned long)page_address(page);
}
vmf->page = page;
- return VM_FAULT_LOCKED;
+ ret = VM_FAULT_LOCKED;
+
+out:
+ filemap_invalidate_unlock_shared(mapping);
+ return ret;
}
static const struct vm_operations_struct secretmem_vm_ops = {
struct dentry *dentry, struct iattr *iattr)
{
struct inode *inode = d_inode(dentry);
+ struct address_space *mapping = inode->i_mapping;
unsigned int ia_valid = iattr->ia_valid;
+ int ret;
+
+ filemap_invalidate_lock(mapping);
if ((ia_valid & ATTR_SIZE) && inode->i_size)
- return -EINVAL;
+ ret = -EINVAL;
+ else
+ ret = simple_setattr(mnt_userns, dentry, iattr);
- return simple_setattr(mnt_userns, dentry, iattr);
+ filemap_invalidate_unlock(mapping);
+
+ return ret;
}
static const struct inode_operations secretmem_iops = {
break;
case Opt_nr_blocks:
ctx->blocks = memparse(param->string, &rest);
- if (*rest)
+ if (*rest || ctx->blocks > S64_MAX)
goto bad_value;
ctx->seen |= SHMEM_SEEN_BLOCKS;
break;
raw_spin_lock(&sbinfo->stat_lock);
inodes = sbinfo->max_inodes - sbinfo->free_inodes;
- if (ctx->blocks > S64_MAX) {
- err = "Number of blocks too large";
- goto out;
- }
+
if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) {
if (!sbinfo->max_blocks) {
err = "Cannot retroactively limit size";
return kasan_reset_tag(p + alloc);
}
-static void noinline set_track(struct kmem_cache *s, void *object,
- enum track_item alloc, unsigned long addr)
-{
- struct track *p = get_track(s, object, alloc);
-
#ifdef CONFIG_STACKDEPOT
+static noinline depot_stack_handle_t set_track_prepare(void)
+{
+ depot_stack_handle_t handle;
unsigned long entries[TRACK_ADDRS_COUNT];
unsigned int nr_entries;
nr_entries = stack_trace_save(entries, ARRAY_SIZE(entries), 3);
- p->handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT);
+ handle = stack_depot_save(entries, nr_entries, GFP_NOWAIT);
+
+ return handle;
+}
+#else
+static inline depot_stack_handle_t set_track_prepare(void)
+{
+ return 0;
+}
#endif
+static void set_track_update(struct kmem_cache *s, void *object,
+ enum track_item alloc, unsigned long addr,
+ depot_stack_handle_t handle)
+{
+ struct track *p = get_track(s, object, alloc);
+
+#ifdef CONFIG_STACKDEPOT
+ p->handle = handle;
+#endif
p->addr = addr;
p->cpu = smp_processor_id();
p->pid = current->pid;
p->when = jiffies;
}
+static __always_inline void set_track(struct kmem_cache *s, void *object,
+ enum track_item alloc, unsigned long addr)
+{
+ depot_stack_handle_t handle = set_track_prepare();
+
+ set_track_update(s, object, alloc, addr, handle);
+}
+
static void init_tracking(struct kmem_cache *s, void *object)
{
struct track *p;
int cnt = 0;
unsigned long flags, flags2;
int ret = 0;
+ depot_stack_handle_t handle = 0;
+
+ if (s->flags & SLAB_STORE_USER)
+ handle = set_track_prepare();
spin_lock_irqsave(&n->list_lock, flags);
slab_lock(slab, &flags2);
}
if (s->flags & SLAB_STORE_USER)
- set_track(s, object, TRACK_FREE, addr);
+ set_track_update(s, object, TRACK_FREE, addr, handle);
trace(s, slab, object, 0);
/* Freepointer not overwritten by init_object(), SLAB_POISON moved it */
init_object(s, object, SLUB_RED_INACTIVE);
if (!freelist) {
c->slab = NULL;
+ c->tid = next_tid(c->tid);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
stat(s, DEACTIVATE_BYPASS);
goto new_slab;
freelist = c->freelist;
c->slab = NULL;
c->freelist = NULL;
+ c->tid = next_tid(c->tid);
local_unlock_irqrestore(&s->cpu_slab->lock, flags);
deactivate_slab(s, slab, freelist);
spin_lock(&init_mm.page_table_lock);
if (likely(pmd_leaf(*pmd))) {
+ /*
+ * Higher order allocations from buddy allocator must be able to
+ * be treated as indepdenent small pages (as they can be freed
+ * individually).
+ */
+ if (!PageReserved(page))
+ split_page(page, get_order(PMD_SIZE));
+
/* Make pte visible before pmd. See comment in pmd_install(). */
smp_wmb();
pmd_populate_kernel(&init_mm, pmd, pgtable);
* lru_disable_count = 0 will have exited the critical
* section when synchronize_rcu() returns.
*/
- synchronize_rcu();
+ synchronize_rcu_expedited();
#ifdef CONFIG_SMP
__lru_add_drain_all(true);
#else
static inline void check_heap_object(const void *ptr, unsigned long n,
bool to_user)
{
+ uintptr_t addr = (uintptr_t)ptr;
+ unsigned long offset;
struct folio *folio;
if (is_kmap_addr(ptr)) {
- unsigned long page_end = (unsigned long)ptr | (PAGE_SIZE - 1);
-
- if ((unsigned long)ptr + n - 1 > page_end)
- usercopy_abort("kmap", NULL, to_user,
- offset_in_page(ptr), n);
+ offset = offset_in_page(ptr);
+ if (n > PAGE_SIZE - offset)
+ usercopy_abort("kmap", NULL, to_user, offset, n);
return;
}
if (is_vmalloc_addr(ptr)) {
- struct vm_struct *area = find_vm_area(ptr);
- unsigned long offset;
+ struct vmap_area *area = find_vmap_area(addr);
- if (!area) {
+ if (!area)
usercopy_abort("vmalloc", "no area", to_user, 0, n);
- return;
- }
- offset = ptr - area->addr;
- if (offset + n > get_vm_area_size(area))
+ if (n > area->va_end - addr) {
+ offset = addr - area->va_start;
usercopy_abort("vmalloc", NULL, to_user, offset, n);
+ }
return;
}
/* Check slab allocator for flags and size. */
__check_heap_object(ptr, n, folio_slab(folio), to_user);
} else if (folio_test_large(folio)) {
- unsigned long offset = ptr - folio_address(folio);
- if (offset + n > folio_size(folio))
+ offset = ptr - folio_address(folio);
+ if (n > folio_size(folio) - offset)
usercopy_abort("page alloc", NULL, to_user, offset, n);
}
}
struct page *page;
int ret;
- ret = shmem_getpage(inode, pgoff, &page, SGP_READ);
+ ret = shmem_getpage(inode, pgoff, &page, SGP_NOALLOC);
+ /* Our caller expects us to return -EFAULT if we failed to find page. */
+ if (ret == -ENOENT)
+ ret = -EFAULT;
if (ret)
goto out;
if (!page) {
free_vmap_area_noflush(va);
}
-static struct vmap_area *find_vmap_area(unsigned long addr)
+struct vmap_area *find_vmap_area(unsigned long addr)
{
struct vmap_area *va;
else if (dev->mtu > max_mtu)
return -EINVAL;
+ /* Note: If this initial vlan_changelink() fails, we need
+ * to call vlan_dev_free_egress_priority() to free memory.
+ */
err = vlan_changelink(dev, tb, data, extack);
- if (err)
- return err;
- err = register_vlan_dev(dev, extack);
+
+ if (!err)
+ err = register_vlan_dev(dev, extack);
+
if (err)
vlan_dev_free_egress_priority(dev);
return err;
int flags)
{
struct sock *sk = sock->sk;
- struct sk_buff *skb;
+ struct sk_buff *skb, *last;
+ struct sk_buff_head *sk_queue;
int copied;
int err = 0;
+ int off = 0;
+ long timeo;
lock_sock(sk);
/*
goto out;
}
- /* Now we can treat all alike */
- skb = skb_recv_datagram(sk, flags, &err);
- if (skb == NULL)
- goto out;
+ /* We need support for non-blocking reads. */
+ sk_queue = &sk->sk_receive_queue;
+ skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off, &err, &last);
+ /* If no packet is available, release_sock(sk) and try again. */
+ if (!skb) {
+ if (err != -EAGAIN)
+ goto out;
+ release_sock(sk);
+ timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
+ while (timeo && !__skb_wait_for_more_packets(sk, sk_queue, &err,
+ &timeo, last)) {
+ skb = __skb_try_recv_datagram(sk, sk_queue, flags, &off,
+ &err, &last);
+ if (skb)
+ break;
+
+ if (err != -EAGAIN)
+ goto done;
+ }
+ if (!skb)
+ goto done;
+ lock_sock(sk);
+ }
if (!sk_to_ax25(sk)->pidincl)
skb_pull(skb, 1); /* Remove PID */
out:
release_sock(sk);
+done:
return err;
}
goto done;
}
+ cancel_work_sync(&hdev->power_on);
if (hci_dev_test_and_clear_flag(hdev, HCI_AUTO_OFF))
cancel_delayed_work(&hdev->power_off);
list_del(&hdev->list);
write_unlock(&hci_dev_list_lock);
+ cancel_work_sync(&hdev->power_on);
+
hci_cmd_sync_clear(hdev);
if (!test_bit(HCI_QUIRK_NO_SUSPEND_NOTIFIER, &hdev->quirks))
bt_dev_dbg(hdev, "");
- cancel_work_sync(&hdev->power_on);
cancel_delayed_work(&hdev->power_off);
cancel_delayed_work(&hdev->ncmd_timer);
return err;
}
+ /* Update event mask so only the allowed event can wakeup the host */
+ hci_set_event_mask_sync(hdev);
+
/* Only configure accept list if disconnect succeeded and wake
* isn't being prevented.
*/
/* Unpause to take care of updating scanning params */
hdev->scanning_paused = false;
- /* Update event mask so only the allowed event can wakeup the host */
- hci_set_event_mask_sync(hdev);
-
/* Enable event filter for paired devices */
hci_update_event_filter_sync(hdev);
}
/* Find channel with given SCID.
- * Returns locked channel. */
+ * Returns a reference locked channel.
+ */
static struct l2cap_chan *l2cap_get_chan_by_scid(struct l2cap_conn *conn,
u16 cid)
{
mutex_lock(&conn->chan_lock);
c = __l2cap_get_chan_by_scid(conn, cid);
- if (c)
- l2cap_chan_lock(c);
+ if (c) {
+ /* Only lock if chan reference is not 0 */
+ c = l2cap_chan_hold_unless_zero(c);
+ if (c)
+ l2cap_chan_lock(c);
+ }
mutex_unlock(&conn->chan_lock);
return c;
}
/* Find channel with given DCID.
- * Returns locked channel.
+ * Returns a reference locked channel.
*/
static struct l2cap_chan *l2cap_get_chan_by_dcid(struct l2cap_conn *conn,
u16 cid)
mutex_lock(&conn->chan_lock);
c = __l2cap_get_chan_by_dcid(conn, cid);
- if (c)
- l2cap_chan_lock(c);
+ if (c) {
+ /* Only lock if chan reference is not 0 */
+ c = l2cap_chan_hold_unless_zero(c);
+ if (c)
+ l2cap_chan_lock(c);
+ }
mutex_unlock(&conn->chan_lock);
return c;
mutex_lock(&conn->chan_lock);
c = __l2cap_get_chan_by_ident(conn, ident);
- if (c)
- l2cap_chan_lock(c);
+ if (c) {
+ /* Only lock if chan reference is not 0 */
+ c = l2cap_chan_hold_unless_zero(c);
+ if (c)
+ l2cap_chan_lock(c);
+ }
mutex_unlock(&conn->chan_lock);
return c;
kref_get(&c->kref);
}
+struct l2cap_chan *l2cap_chan_hold_unless_zero(struct l2cap_chan *c)
+{
+ BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref));
+
+ if (!kref_get_unless_zero(&c->kref))
+ return NULL;
+
+ return c;
+}
+
void l2cap_chan_put(struct l2cap_chan *c)
{
BT_DBG("chan %p orig refcnt %u", c, kref_read(&c->kref));
src_match = !bacmp(&c->src, src);
dst_match = !bacmp(&c->dst, dst);
if (src_match && dst_match) {
- l2cap_chan_hold(c);
+ c = l2cap_chan_hold_unless_zero(c);
+ if (!c)
+ continue;
+
read_unlock(&chan_list_lock);
return c;
}
}
if (c1)
- l2cap_chan_hold(c1);
+ c1 = l2cap_chan_hold_unless_zero(c1);
read_unlock(&chan_list_lock);
unlock:
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return err;
}
done:
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return err;
}
l2cap_send_move_chan_rsp(chan, result);
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return 0;
}
}
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
}
static void l2cap_move_fail(struct l2cap_conn *conn, u8 ident, u16 icid,
l2cap_send_move_chan_cfm(chan, L2CAP_MC_UNCONFIRMED);
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
}
static int l2cap_move_channel_rsp(struct l2cap_conn *conn,
l2cap_send_move_chan_cfm_rsp(conn, cmd->ident, icid);
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return 0;
}
}
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return 0;
}
if (credits > max_credits) {
BT_ERR("LE credits overflow");
l2cap_send_disconn_req(chan, ECONNRESET);
- l2cap_chan_unlock(chan);
/* Return 0 so that we don't trigger an unnecessary
* command reject packet.
*/
- return 0;
+ goto unlock;
}
chan->tx_credits += credits;
if (chan->tx_credits)
chan->ops->resume(chan);
+unlock:
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
return 0;
}
done:
l2cap_chan_unlock(chan);
+ l2cap_chan_put(chan);
}
static void l2cap_conless_channel(struct l2cap_conn *conn, __le16 psm,
if (src_type != c->src_type)
continue;
- l2cap_chan_hold(c);
+ c = l2cap_chan_hold_unless_zero(c);
read_unlock(&chan_list_lock);
return c;
}
else
status = MGMT_STATUS_FAILED;
- mgmt_pending_remove(cmd);
goto unlock;
}
return okfn(net, sk, skb);
ops = nf_hook_entries_get_hook_ops(e);
- for (i = 0; i < e->num_hook_entries &&
- ops[i]->priority <= NF_BR_PRI_BRNF; i++)
- ;
+ for (i = 0; i < e->num_hook_entries; i++) {
+ /* These hooks have already been called */
+ if (ops[i]->priority < NF_BR_PRI_BRNF)
+ continue;
+
+ /* These hooks have not been called yet, run them. */
+ if (ops[i]->priority > NF_BR_PRI_BRNF)
+ break;
+
+ /* take a closer look at NF_BR_PRI_BRNF. */
+ if (ops[i]->hook == br_nf_pre_routing) {
+ /* This hook diverted the skb to this function,
+ * hooks after this have not been run yet.
+ */
+ i++;
+ break;
+ }
+ }
nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev,
sk, net, okfn);
}
done:
+ if (af) {
+ if (nlmsg_get_pos(skb) - (void *)af > nla_attr_size(0))
+ nla_nest_end(skb, af);
+ else
+ nla_nest_cancel(skb, af);
+ }
- if (af)
- nla_nest_end(skb, af);
nlmsg_end(skb, nlh);
return 0;
struct caifsock {
struct sock sk; /* must be first member */
struct cflayer layer;
- u32 flow_state;
+ unsigned long flow_state;
struct caif_connect_request conn_req;
struct mutex readlock;
struct dentry *debugfs_socket_dir;
static int rx_flow_is_on(struct caifsock *cf_sk)
{
- return test_bit(RX_FLOW_ON_BIT,
- (void *) &cf_sk->flow_state);
+ return test_bit(RX_FLOW_ON_BIT, &cf_sk->flow_state);
}
static int tx_flow_is_on(struct caifsock *cf_sk)
{
- return test_bit(TX_FLOW_ON_BIT,
- (void *) &cf_sk->flow_state);
+ return test_bit(TX_FLOW_ON_BIT, &cf_sk->flow_state);
}
static void set_rx_flow_off(struct caifsock *cf_sk)
{
- clear_bit(RX_FLOW_ON_BIT,
- (void *) &cf_sk->flow_state);
+ clear_bit(RX_FLOW_ON_BIT, &cf_sk->flow_state);
}
static void set_rx_flow_on(struct caifsock *cf_sk)
{
- set_bit(RX_FLOW_ON_BIT,
- (void *) &cf_sk->flow_state);
+ set_bit(RX_FLOW_ON_BIT, &cf_sk->flow_state);
}
static void set_tx_flow_off(struct caifsock *cf_sk)
{
- clear_bit(TX_FLOW_ON_BIT,
- (void *) &cf_sk->flow_state);
+ clear_bit(TX_FLOW_ON_BIT, &cf_sk->flow_state);
}
static void set_tx_flow_on(struct caifsock *cf_sk)
{
- set_bit(TX_FLOW_ON_BIT,
- (void *) &cf_sk->flow_state);
+ set_bit(TX_FLOW_ON_BIT, &cf_sk->flow_state);
}
static void caif_read_lock(struct sock *sk)
struct bcm_op {
struct list_head list;
+ struct rcu_head rcu;
int ifindex;
canid_t can_id;
u32 flags;
return NULL;
}
-static void bcm_remove_op(struct bcm_op *op)
+static void bcm_free_op_rcu(struct rcu_head *rcu_head)
{
- hrtimer_cancel(&op->timer);
- hrtimer_cancel(&op->thrtimer);
+ struct bcm_op *op = container_of(rcu_head, struct bcm_op, rcu);
if ((op->frames) && (op->frames != &op->sframe))
kfree(op->frames);
kfree(op);
}
+static void bcm_remove_op(struct bcm_op *op)
+{
+ hrtimer_cancel(&op->timer);
+ hrtimer_cancel(&op->thrtimer);
+
+ call_rcu(&op->rcu, bcm_free_op_rcu);
+}
+
static void bcm_rx_unreg(struct net_device *dev, struct bcm_op *op)
{
if (op->rx_reg_dev == dev) {
if ((op->can_id == mh->can_id) && (op->ifindex == ifindex) &&
(op->flags & CAN_FD_FRAME) == (mh->flags & CAN_FD_FRAME)) {
+ /* disable automatic timer on frame reception */
+ op->flags |= RX_NO_AUTOTIMER;
+
/*
* Don't care if we're bound or not (due to netdev
* problems) can_rx_unregister() is always a save
bcm_rx_handler, op);
list_del(&op->list);
- synchronize_rcu();
bcm_remove_op(op);
return 1; /* done */
}
/* Device list removal
* caller must respect a RCU grace period before freeing/reusing dev
*/
-static void unlist_netdevice(struct net_device *dev)
+static void unlist_netdevice(struct net_device *dev, bool lock)
{
ASSERT_RTNL();
/* Unlink dev from the device chain */
- write_lock(&dev_base_lock);
+ if (lock)
+ write_lock(&dev_base_lock);
list_del_rcu(&dev->dev_list);
netdev_name_node_del(dev->name_node);
hlist_del_rcu(&dev->index_hlist);
- write_unlock(&dev_base_lock);
+ if (lock)
+ write_unlock(&dev_base_lock);
dev_base_seq_inc(dev_net(dev));
}
}
/* When doing generic XDP we have to bypass the qdisc layer and the
- * network taps in order to match in-driver-XDP behavior.
+ * network taps in order to match in-driver-XDP behavior. This also means
+ * that XDP packets are able to starve other packets going through a qdisc,
+ * and DDOS attacks will be more effective. In-driver-XDP use dedicated TX
+ * queues, so they do not have this starvation issue.
*/
void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog)
{
txq = netdev_core_pick_tx(dev, skb, NULL);
cpu = smp_processor_id();
HARD_TX_LOCK(dev, txq, cpu);
- if (!netif_xmit_stopped(txq)) {
+ if (!netif_xmit_frozen_or_drv_stopped(txq)) {
rc = netdev_start_xmit(skb, dev, txq, 0);
if (dev_xmit_complete(rc))
free_skb = false;
HARD_TX_UNLOCK(dev, txq);
if (free_skb) {
trace_xdp_exception(dev, xdp_prog, XDP_TX);
+ dev_core_stats_tx_dropped_inc(dev);
kfree_skb(skb);
}
}
goto err_uninit;
ret = netdev_register_kobject(dev);
- if (ret) {
- dev->reg_state = NETREG_UNREGISTERED;
+ write_lock(&dev_base_lock);
+ dev->reg_state = ret ? NETREG_UNREGISTERED : NETREG_REGISTERED;
+ write_unlock(&dev_base_lock);
+ if (ret)
goto err_uninit;
- }
- dev->reg_state = NETREG_REGISTERED;
__netdev_update_features(dev);
continue;
}
+ write_lock(&dev_base_lock);
dev->reg_state = NETREG_UNREGISTERED;
+ write_unlock(&dev_base_lock);
linkwatch_forget_dev(dev);
}
list_for_each_entry(dev, head, unreg_list) {
/* And unlink it from device chain. */
- unlist_netdevice(dev);
-
+ write_lock(&dev_base_lock);
+ unlist_netdevice(dev, false);
dev->reg_state = NETREG_UNREGISTERING;
+ write_unlock(&dev_base_lock);
}
flush_all_backlogs();
dev_close(dev);
/* And unlink it from device chain */
- unlist_netdevice(dev);
+ unlist_netdevice(dev, true);
synchronize_net();
if (err)
return err;
- ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
return seg6_lookup_nexthop(skb, NULL, 0);
ifindex, proto, netns_id, flags);
if (sk) {
- sk = sk_to_full_sk(sk);
- if (!sk_fullsock(sk)) {
+ struct sock *sk2 = sk_to_full_sk(sk);
+
+ /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk
+ * sock refcnt is decremented to prevent a request_sock leak.
+ */
+ if (!sk_fullsock(sk2))
+ sk2 = NULL;
+ if (sk2 != sk) {
sock_gen_put(sk);
- return NULL;
+ /* Ensure there is no need to bump sk2 refcnt */
+ if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) {
+ WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
+ return NULL;
+ }
+ sk = sk2;
}
}
flags);
if (sk) {
- sk = sk_to_full_sk(sk);
- if (!sk_fullsock(sk)) {
+ struct sock *sk2 = sk_to_full_sk(sk);
+
+ /* sk_to_full_sk() may return (sk)->rsk_listener, so make sure the original sk
+ * sock refcnt is decremented to prevent a request_sock leak.
+ */
+ if (!sk_fullsock(sk2))
+ sk2 = NULL;
+ if (sk2 != sk) {
sock_gen_put(sk);
- return NULL;
+ /* Ensure there is no need to bump sk2 refcnt */
+ if (unlikely(sk2 && !sock_flag(sk2, SOCK_RCU_FREE))) {
+ WARN_ONCE(1, "Found non-RCU, unreferenced socket!");
+ return NULL;
+ }
+ sk = sk2;
}
}
if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
return -EINVAL;
- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies))
return -EINVAL;
if (!th->ack || th->rst || th->syn)
if (sk->sk_protocol != IPPROTO_TCP || sk->sk_state != TCP_LISTEN)
return -EINVAL;
- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies))
return -ENOENT;
if (!th->syn || th->ack || th->fin || th->rst)
return (bo && list_empty(&bo->cb_list)) ? -EOPNOTSUPP : count;
}
EXPORT_SYMBOL(flow_indr_dev_setup_offload);
+
+bool flow_indr_dev_exists(void)
+{
+ return !list_empty(&flow_block_indr_dev_list);
+}
+EXPORT_SYMBOL(flow_indr_dev_exists);
static const char fmt_ulong[] = "%lu\n";
static const char fmt_u64[] = "%llu\n";
+/* Caller holds RTNL or dev_base_lock */
static inline int dev_isalive(const struct net_device *dev)
{
return dev->reg_state <= NETREG_REGISTERED;
.daddr = *(struct in6_addr *)daddr,
};
- if (net->ipv4.sysctl_tcp_timestamps != 1)
+ if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
return 0;
ts_secret_init();
#ifdef CONFIG_INET
u32 secure_tcp_ts_off(const struct net *net, __be32 saddr, __be32 daddr)
{
- if (net->ipv4.sysctl_tcp_timestamps != 1)
+ if (READ_ONCE(net->ipv4.sysctl_tcp_timestamps) != 1)
return 0;
ts_secret_init();
write_lock_bh(&sk->sk_callback_lock);
+ if (sk_is_inet(sk) && inet_csk_has_ulp(sk)) {
+ psock = ERR_PTR(-EINVAL);
+ goto out;
+ }
+
if (sk->sk_user_data) {
psock = ERR_PTR(-EBUSY);
goto out;
prog = rcu_dereference_protected(reuse->prog,
lockdep_is_held(&reuseport_lock));
- if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req ||
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req) ||
(prog && prog->expected_attach_type == BPF_SK_REUSEPORT_SELECT_OR_MIGRATE)) {
/* Migration capable, move sk from the listening section
* to the closed section.
hash = migrating_sk->sk_hash;
prog = rcu_dereference(reuse->prog);
if (!prog || prog->expected_attach_type != BPF_SK_REUSEPORT_SELECT_OR_MIGRATE) {
- if (sock_net(sk)->ipv4.sysctl_tcp_migrate_req)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_migrate_req))
goto select_by_hash;
goto failure;
}
SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
if (!dccp_hashinfo.bind_bucket_cachep)
goto out_free_hashinfo2;
- dccp_hashinfo.bind2_bucket_cachep =
- kmem_cache_create("dccp_bind2_bucket",
- sizeof(struct inet_bind2_bucket), 0,
- SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT, NULL);
- if (!dccp_hashinfo.bind2_bucket_cachep)
- goto out_free_bind_bucket_cachep;
/*
* Size and allocate the main established and bind bucket
if (!dccp_hashinfo.ehash) {
DCCP_CRIT("Failed to allocate DCCP established hash table");
- goto out_free_bind2_bucket_cachep;
+ goto out_free_bind_bucket_cachep;
}
for (i = 0; i <= dccp_hashinfo.ehash_mask; i++)
goto out_free_dccp_locks;
}
- dccp_hashinfo.bhash2 = (struct inet_bind2_hashbucket *)
- __get_free_pages(GFP_ATOMIC | __GFP_NOWARN, bhash_order);
-
- if (!dccp_hashinfo.bhash2) {
- DCCP_CRIT("Failed to allocate DCCP bind2 hash table");
- goto out_free_dccp_bhash;
- }
-
for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
spin_lock_init(&dccp_hashinfo.bhash[i].lock);
INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
- INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain);
}
rc = dccp_mib_init();
if (rc)
- goto out_free_dccp_bhash2;
+ goto out_free_dccp_bhash;
rc = dccp_ackvec_init();
if (rc)
dccp_ackvec_exit();
out_free_dccp_mib:
dccp_mib_exit();
-out_free_dccp_bhash2:
- free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
out_free_dccp_bhash:
free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
out_free_dccp_locks:
inet_ehash_locks_free(&dccp_hashinfo);
out_free_dccp_ehash:
free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
-out_free_bind2_bucket_cachep:
- kmem_cache_destroy(dccp_hashinfo.bind2_bucket_cachep);
out_free_bind_bucket_cachep:
kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
out_free_hashinfo2:
inet_hashinfo2_free_mod(&dccp_hashinfo);
out_fail:
dccp_hashinfo.bhash = NULL;
- dccp_hashinfo.bhash2 = NULL;
dccp_hashinfo.ehash = NULL;
dccp_hashinfo.bind_bucket_cachep = NULL;
- dccp_hashinfo.bind2_bucket_cachep = NULL;
return rc;
}
static void __exit dccp_fini(void)
{
- int bhash_order = get_order(dccp_hashinfo.bhash_size *
- sizeof(struct inet_bind_hashbucket));
-
ccid_cleanup_builtins();
dccp_mib_exit();
- free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
- free_pages((unsigned long)dccp_hashinfo.bhash2, bhash_order);
+ free_pages((unsigned long)dccp_hashinfo.bhash,
+ get_order(dccp_hashinfo.bhash_size *
+ sizeof(struct inet_bind_hashbucket)));
free_pages((unsigned long)dccp_hashinfo.ehash,
get_order((dccp_hashinfo.ehash_mask + 1) *
sizeof(struct inet_ehash_bucket)));
sk->sk_family = PF_DECnet;
sk->sk_protocol = 0;
sk->sk_allocation = gfp;
- sk->sk_sndbuf = sysctl_decnet_wmem[1];
- sk->sk_rcvbuf = sysctl_decnet_rmem[1];
+ sk->sk_sndbuf = READ_ONCE(sysctl_decnet_wmem[1]);
+ sk->sk_rcvbuf = READ_ONCE(sysctl_decnet_rmem[1]);
/* Initialization of DECnet Session Control Port */
scp = DN_SK(sk);
struct netlink_ext_ack extack = {0};
bool change_vlan_filtering = false;
struct dsa_switch *ds = dp->ds;
+ struct dsa_port *other_dp;
bool vlan_filtering;
int err;
* VLAN-aware bridge.
*/
if (change_vlan_filtering && ds->vlan_filtering_is_global) {
- dsa_switch_for_each_port(dp, ds) {
- struct net_device *br = dsa_port_bridge_dev_get(dp);
+ dsa_switch_for_each_port(other_dp, ds) {
+ struct net_device *br = dsa_port_bridge_dev_get(other_dp);
if (br && br_vlan_enabled(br)) {
change_vlan_filtering = false;
ds->vlan_filtering = vlan_filtering;
dsa_switch_for_each_user_port(other_dp, ds) {
- struct net_device *slave = dp->slave;
+ struct net_device *slave = other_dp->slave;
/* We might be called in the unbind path, so not
* all slave devices might still be registered.
ether_addr_copy(a->addr, addr);
a->vid = vid;
+ a->db = db;
refcount_set(&a->refcount, 1);
list_add_tail(&a->list, &lag->fdbs);
if (request->page)
offset = request->page * ETH_MODULE_EEPROM_PAGE_LEN + offset;
- if (modinfo->type == ETH_MODULE_SFF_8079 &&
+ if (modinfo->type == ETH_MODULE_SFF_8472 &&
request->i2c_address == 0x51)
offset += ETH_MODULE_EEPROM_PAGE_LEN * 2;
* because the socket was in TCP_LISTEN state previously but
* was shutdown() rather than close().
*/
- tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+ tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen);
if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) &&
(tcp_fastopen & TFO_SERVER_ENABLE) &&
!inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) {
inet->hdrincl = 1;
}
- if (net->ipv4.sysctl_ip_no_pmtu_disc)
+ if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
inet->pmtudisc = IP_PMTUDISC_DONT;
else
inet->pmtudisc = IP_PMTUDISC_WANT;
if (new_saddr == old_saddr)
return 0;
- if (sock_net(sk)->ipv4.sysctl_ip_dynaddr > 1) {
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) > 1) {
pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n",
__func__, &old_saddr, &new_saddr);
}
* Other protocols have to map its equivalent state to TCP_SYN_SENT.
* DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme
*/
- if (!sock_net(sk)->ipv4.sysctl_ip_dynaddr ||
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_ip_dynaddr) ||
sk->sk_state != TCP_SYN_SENT ||
(sk->sk_userlocks & SOCK_BINDADDR_LOCK) ||
(err = inet_sk_reselect_saddr(sk)) != 0)
};
#endif
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct net_protocol tcp_protocol = {
- .early_demux = tcp_v4_early_demux,
- .early_demux_handler = tcp_v4_early_demux,
+static const struct net_protocol tcp_protocol = {
.handler = tcp_v4_rcv,
.err_handler = tcp_v4_err,
.no_policy = 1,
.icmp_strict_tag_validation = 1,
};
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct net_protocol udp_protocol = {
- .early_demux = udp_v4_early_demux,
- .early_demux_handler = udp_v4_early_demux,
+static const struct net_protocol udp_protocol = {
.handler = udp_rcv,
.err_handler = udp_err,
.no_policy = 1,
if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
crypto_ahash_digestsize(ahash)) {
- pr_info("%s: %s digestsize %u != %hu\n",
+ pr_info("%s: %s digestsize %u != %u\n",
__func__, x->aalg->alg_name,
crypto_ahash_digestsize(ahash),
aalg_desc->uinfo.auth.icv_fullbits / 8);
struct cipso_v4_map_cache_entry *prev_entry = NULL;
u32 hash;
- if (!cipso_v4_cache_enabled)
+ if (!READ_ONCE(cipso_v4_cache_enabled))
return -ENOENT;
hash = cipso_v4_map_cache_hash(key, key_len);
int cipso_v4_cache_add(const unsigned char *cipso_ptr,
const struct netlbl_lsm_secattr *secattr)
{
+ int bkt_size = READ_ONCE(cipso_v4_cache_bucketsize);
int ret_val = -EPERM;
u32 bkt;
struct cipso_v4_map_cache_entry *entry = NULL;
struct cipso_v4_map_cache_entry *old_entry = NULL;
u32 cipso_ptr_len;
- if (!cipso_v4_cache_enabled || cipso_v4_cache_bucketsize <= 0)
+ if (!READ_ONCE(cipso_v4_cache_enabled) || bkt_size <= 0)
return 0;
cipso_ptr_len = cipso_ptr[1];
bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETS - 1);
spin_lock_bh(&cipso_v4_cache[bkt].lock);
- if (cipso_v4_cache[bkt].size < cipso_v4_cache_bucketsize) {
+ if (cipso_v4_cache[bkt].size < bkt_size) {
list_add(&entry->list, &cipso_v4_cache[bkt].list);
cipso_v4_cache[bkt].size += 1;
} else {
/* This will send packets using the "optimized" format when
* possible as specified in section 3.4.2.6 of the
* CIPSO draft. */
- if (cipso_v4_rbm_optfmt && ret_val > 0 && ret_val <= 10)
+ if (READ_ONCE(cipso_v4_rbm_optfmt) && ret_val > 0 &&
+ ret_val <= 10)
tag_len = 14;
else
tag_len = 4 + ret_val;
* all the CIPSO validations here but it doesn't
* really specify _exactly_ what we need to validate
* ... so, just make it a sysctl tunable. */
- if (cipso_v4_rbm_strictvalid) {
+ if (READ_ONCE(cipso_v4_rbm_strictvalid)) {
if (cipso_v4_map_lvl_valid(doi_def,
tag[3]) < 0) {
err_offset = opt_iter + 3;
err = -EINVAL;
if (aalg_desc->uinfo.auth.icv_fullbits / 8 !=
crypto_aead_authsize(aead)) {
- pr_info("ESP: %s digestsize %u != %hu\n",
+ pr_info("ESP: %s digestsize %u != %u\n",
x->aalg->alg_name,
crypto_aead_authsize(aead),
aalg_desc->uinfo.auth.icv_fullbits / 8);
nh->fib_nh_dev = in_dev->dev;
dev_hold_track(nh->fib_nh_dev, &nh->fib_nh_dev_tracker, GFP_ATOMIC);
- nh->fib_nh_scope = RT_SCOPE_HOST;
+ nh->fib_nh_scope = RT_SCOPE_LINK;
if (!netif_carrier_ok(nh->fib_nh_dev))
nh->fib_nh_flags |= RTNH_F_LINKDOWN;
err = 0;
goto nla_put_failure;
if (nexthop_is_blackhole(fi->nh))
rtm->rtm_type = RTN_BLACKHOLE;
- if (!fi->fib_net->ipv4.sysctl_nexthop_compat_mode)
+ if (!READ_ONCE(fi->fib_net->ipv4.sysctl_nexthop_compat_mode))
goto offload;
}
}
change_nexthops(fi) {
- if (net->ipv4.sysctl_fib_multipath_use_neigh) {
+ if (READ_ONCE(net->ipv4.sysctl_fib_multipath_use_neigh)) {
if (!fib_good_nh(nexthop_nh))
continue;
if (!first) {
tn = container_of(head, struct tnode, rcu)->kv;
}
- if (tnode_free_size >= sysctl_fib_sync_mem) {
+ if (tnode_free_size >= READ_ONCE(sysctl_fib_sync_mem)) {
tnode_free_size = 0;
synchronize_rcu();
}
void fib_alias_hw_flags_set(struct net *net, const struct fib_rt_info *fri)
{
+ u8 fib_notify_on_flag_change;
struct fib_alias *fa_match;
struct sk_buff *skb;
int err;
WRITE_ONCE(fa_match->offload, fri->offload);
WRITE_ONCE(fa_match->trap, fri->trap);
+ fib_notify_on_flag_change = READ_ONCE(net->ipv4.sysctl_fib_notify_on_flag_change);
+
/* 2 means send notifications only if offload_failed was changed. */
- if (net->ipv4.sysctl_fib_notify_on_flag_change == 2 &&
+ if (fib_notify_on_flag_change == 2 &&
READ_ONCE(fa_match->offload_failed) == fri->offload_failed)
goto out;
WRITE_ONCE(fa_match->offload_failed, fri->offload_failed);
- if (!net->ipv4.sysctl_fib_notify_on_flag_change)
+ if (!fib_notify_on_flag_change)
goto out;
skb = nlmsg_new(fib_nlmsg_size(fa_match->fa_info), GFP_ATOMIC);
spin_lock(&icmp_global.lock);
delta = min_t(u32, now - icmp_global.stamp, HZ);
if (delta >= HZ / 50) {
- incr = sysctl_icmp_msgs_per_sec * delta / HZ ;
+ incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ;
if (incr)
WRITE_ONCE(icmp_global.stamp, now);
}
- credit = min_t(u32, icmp_global.credit + incr, sysctl_icmp_msgs_burst);
+ credit = min_t(u32, icmp_global.credit + incr,
+ READ_ONCE(sysctl_icmp_msgs_burst));
if (credit) {
/* We want to use a credit of one in average, but need to randomize
* it for security reasons.
return true;
/* Limit if icmp type is enabled in ratemask. */
- if (!((1 << type) & net->ipv4.sysctl_icmp_ratemask))
+ if (!((1 << type) & READ_ONCE(net->ipv4.sysctl_icmp_ratemask)))
return true;
return false;
vif = l3mdev_master_ifindex(dst->dev);
peer = inet_getpeer_v4(net->ipv4.peers, fl4->daddr, vif, 1);
- rc = inet_peer_xrlim_allow(peer, net->ipv4.sysctl_icmp_ratelimit);
+ rc = inet_peer_xrlim_allow(peer,
+ READ_ONCE(net->ipv4.sysctl_icmp_ratelimit));
if (peer)
inet_putpeer(peer);
out:
rcu_read_lock();
if (rt_is_input_route(rt) &&
- net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr)
+ READ_ONCE(net->ipv4.sysctl_icmp_errors_use_inbound_ifaddr))
dev = dev_get_by_index_rcu(net, inet_iif(skb_in));
if (dev)
* values please see
* Documentation/networking/ip-sysctl.rst
*/
- switch (net->ipv4.sysctl_ip_no_pmtu_disc) {
+ switch (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc)) {
default:
net_dbg_ratelimited("%pI4: fragmentation needed and DF set\n",
&iph->daddr);
* get the other vendor to fix their kit.
*/
- if (!net->ipv4.sysctl_icmp_ignore_bogus_error_responses &&
+ if (!READ_ONCE(net->ipv4.sysctl_icmp_ignore_bogus_error_responses) &&
inet_addr_type_dev_table(net, skb->dev, iph->daddr) == RTN_BROADCAST) {
net_warn_ratelimited("%pI4 sent an invalid ICMP type %u, code %u error to a broadcast: %pI4 on %s\n",
&ip_hdr(skb)->saddr,
net = dev_net(skb_dst(skb)->dev);
/* should there be an ICMP stat for ignored echos? */
- if (net->ipv4.sysctl_icmp_echo_ignore_all)
+ if (READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_all))
return SKB_NOT_DROPPED_YET;
icmp_param.data.icmph = *icmp_hdr(skb);
u16 ident_len;
u8 status;
- if (!net->ipv4.sysctl_icmp_echo_enable_probe)
+ if (!READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
return false;
/* We currently only support probing interfaces on the proxy node
*/
if ((icmph->type == ICMP_ECHO ||
icmph->type == ICMP_TIMESTAMP) &&
- net->ipv4.sysctl_icmp_echo_ignore_broadcasts) {
+ READ_ONCE(net->ipv4.sysctl_icmp_echo_ignore_broadcasts)) {
reason = SKB_DROP_REASON_INVALID_PROTO;
goto error;
}
if (pmc->multiaddr == IGMP_ALL_HOSTS)
return skb;
- if (ipv4_is_local_multicast(pmc->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(pmc->multiaddr) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return skb;
mtu = READ_ONCE(dev->mtu);
if (pmc->multiaddr == IGMP_ALL_HOSTS)
continue;
if (ipv4_is_local_multicast(pmc->multiaddr) &&
- !net->ipv4.sysctl_igmp_llm_reports)
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
continue;
spin_lock_bh(&pmc->lock);
if (pmc->sfcount[MCAST_EXCLUDE])
if (type == IGMPV3_HOST_MEMBERSHIP_REPORT)
return igmpv3_send_report(in_dev, pmc);
- if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(group) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return 0;
if (type == IGMP_HOST_LEAVE_MESSAGE)
struct net *net = dev_net(in_dev->dev);
if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
return;
- WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv);
+ WRITE_ONCE(in_dev->mr_ifc_count, in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv));
igmp_ifc_start_timer(in_dev, 1);
}
if (group == IGMP_ALL_HOSTS)
return false;
- if (ipv4_is_local_multicast(group) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(group) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return false;
rcu_read_lock();
* received value was zero, use the default or statically
* configured value.
*/
- in_dev->mr_qrv = ih3->qrv ?: net->ipv4.sysctl_igmp_qrv;
+ in_dev->mr_qrv = ih3->qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
in_dev->mr_qi = IGMPV3_QQIC(ih3->qqic)*HZ ?: IGMP_QUERY_INTERVAL;
/* RFC3376, 8.3. Query Response Interval:
if (im->multiaddr == IGMP_ALL_HOSTS)
continue;
if (ipv4_is_local_multicast(im->multiaddr) &&
- !net->ipv4.sysctl_igmp_llm_reports)
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
continue;
spin_lock_bh(&im->lock);
if (im->tm_running)
pmc->interface = im->interface;
in_dev_hold(in_dev);
pmc->multiaddr = im->multiaddr;
- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
pmc->sfmode = im->sfmode;
if (pmc->sfmode == MCAST_INCLUDE) {
struct ip_sf_list *psf;
swap(im->tomb, pmc->tomb);
swap(im->sources, pmc->sources);
for (psf = im->sources; psf; psf = psf->sf_next)
- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ psf->sf_crcount = in_dev->mr_qrv ?:
+ READ_ONCE(net->ipv4.sysctl_igmp_qrv);
} else {
- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ im->crcount = in_dev->mr_qrv ?:
+ READ_ONCE(net->ipv4.sysctl_igmp_qrv);
}
in_dev_put(pmc->interface);
kfree_pmc(pmc);
#ifdef CONFIG_IP_MULTICAST
if (im->multiaddr == IGMP_ALL_HOSTS)
return;
- if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(im->multiaddr) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return;
reporter = im->reporter;
#ifdef CONFIG_IP_MULTICAST
if (im->multiaddr == IGMP_ALL_HOSTS)
return;
- if (ipv4_is_local_multicast(im->multiaddr) && !net->ipv4.sysctl_igmp_llm_reports)
+ if (ipv4_is_local_multicast(im->multiaddr) &&
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
return;
if (in_dev->dead)
return;
- im->unsolicit_count = net->ipv4.sysctl_igmp_qrv;
+ im->unsolicit_count = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev)) {
spin_lock_bh(&im->lock);
igmp_start_timer(im, IGMP_INITIAL_REPORT_DELAY);
* IN() to IN(A).
*/
if (im->sfmode == MCAST_EXCLUDE)
- im->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ im->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
igmp_ifc_event(in_dev);
#endif
if (im->multiaddr == IGMP_ALL_HOSTS)
continue;
if (ipv4_is_local_multicast(im->multiaddr) &&
- !net->ipv4.sysctl_igmp_llm_reports)
+ !READ_ONCE(net->ipv4.sysctl_igmp_llm_reports))
continue;
/* a failover is happening and switches
in_dev->mr_qi = IGMP_QUERY_INTERVAL;
in_dev->mr_qri = IGMP_QUERY_RESPONSE_INTERVAL;
- in_dev->mr_qrv = net->ipv4.sysctl_igmp_qrv;
+ in_dev->mr_qrv = READ_ONCE(net->ipv4.sysctl_igmp_qrv);
}
#else
static void ip_mc_reset(struct in_device *in_dev)
#ifdef CONFIG_IP_MULTICAST
if (psf->sf_oldin &&
!IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) {
- psf->sf_crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ psf->sf_crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
psf->sf_next = pmc->tomb;
pmc->tomb = psf;
rv = 1;
/* filter mode change */
pmc->sfmode = MCAST_INCLUDE;
#ifdef CONFIG_IP_MULTICAST
- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
for (psf = pmc->sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
#ifdef CONFIG_IP_MULTICAST
/* else no filters; keep old mode for reports */
- pmc->crcount = in_dev->mr_qrv ?: net->ipv4.sysctl_igmp_qrv;
+ pmc->crcount = in_dev->mr_qrv ?: READ_ONCE(net->ipv4.sysctl_igmp_qrv);
WRITE_ONCE(in_dev->mr_ifc_count, pmc->crcount);
for (psf = pmc->sources; psf; psf = psf->sf_next)
psf->sf_crcount = 0;
count++;
}
err = -ENOBUFS;
- if (count >= net->ipv4.sysctl_igmp_max_memberships)
+ if (count >= READ_ONCE(net->ipv4.sysctl_igmp_max_memberships))
goto done;
iml = sock_kmalloc(sk, sizeof(*iml), GFP_KERNEL);
if (!iml)
}
/* else, add a new source to the filter */
- if (psl && psl->sl_count >= net->ipv4.sysctl_igmp_max_msf) {
+ if (psl && psl->sl_count >= READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) {
err = -ENOBUFS;
goto done;
}
return !sk->sk_rcv_saddr;
}
-static bool use_bhash2_on_bind(const struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- int addr_type;
-
- if (sk->sk_family == AF_INET6) {
- addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
- return addr_type != IPV6_ADDR_ANY &&
- addr_type != IPV6_ADDR_MAPPED;
- }
-#endif
- return sk->sk_rcv_saddr != htonl(INADDR_ANY);
-}
-
-static u32 get_bhash2_nulladdr_hash(const struct sock *sk, struct net *net,
- int port)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- struct in6_addr nulladdr = {};
-
- if (sk->sk_family == AF_INET6)
- return ipv6_portaddr_hash(net, &nulladdr, port);
-#endif
- return ipv4_portaddr_hash(net, 0, port);
-}
-
void inet_get_local_port_range(struct net *net, int *low, int *high)
{
unsigned int seq;
}
EXPORT_SYMBOL(inet_get_local_port_range);
-static bool bind_conflict_exist(const struct sock *sk, struct sock *sk2,
- kuid_t sk_uid, bool relax,
- bool reuseport_cb_ok, bool reuseport_ok)
-{
- int bound_dev_if2;
-
- if (sk == sk2)
- return false;
-
- bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);
-
- if (!sk->sk_bound_dev_if || !bound_dev_if2 ||
- sk->sk_bound_dev_if == bound_dev_if2) {
- if (sk->sk_reuse && sk2->sk_reuse &&
- sk2->sk_state != TCP_LISTEN) {
- if (!relax || (!reuseport_ok && sk->sk_reuseport &&
- sk2->sk_reuseport && reuseport_cb_ok &&
- (sk2->sk_state == TCP_TIME_WAIT ||
- uid_eq(sk_uid, sock_i_uid(sk2)))))
- return true;
- } else if (!reuseport_ok || !sk->sk_reuseport ||
- !sk2->sk_reuseport || !reuseport_cb_ok ||
- (sk2->sk_state != TCP_TIME_WAIT &&
- !uid_eq(sk_uid, sock_i_uid(sk2)))) {
- return true;
- }
- }
- return false;
-}
-
-static bool check_bhash2_conflict(const struct sock *sk,
- struct inet_bind2_bucket *tb2, kuid_t sk_uid,
- bool relax, bool reuseport_cb_ok,
- bool reuseport_ok)
-{
- struct sock *sk2;
-
- sk_for_each_bound_bhash2(sk2, &tb2->owners) {
- if (sk->sk_family == AF_INET && ipv6_only_sock(sk2))
- continue;
-
- if (bind_conflict_exist(sk, sk2, sk_uid, relax,
- reuseport_cb_ok, reuseport_ok))
- return true;
- }
- return false;
-}
-
-/* This should be called only when the corresponding inet_bind_bucket spinlock
- * is held
- */
-static int inet_csk_bind_conflict(const struct sock *sk, int port,
- struct inet_bind_bucket *tb,
- struct inet_bind2_bucket *tb2, /* may be null */
+static int inet_csk_bind_conflict(const struct sock *sk,
+ const struct inet_bind_bucket *tb,
bool relax, bool reuseport_ok)
{
- struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
- kuid_t uid = sock_i_uid((struct sock *)sk);
- struct sock_reuseport *reuseport_cb;
- struct inet_bind2_hashbucket *head2;
- bool reuseport_cb_ok;
struct sock *sk2;
- struct net *net;
- int l3mdev;
- u32 hash;
+ bool reuseport_cb_ok;
+ bool reuse = sk->sk_reuse;
+ bool reuseport = !!sk->sk_reuseport;
+ struct sock_reuseport *reuseport_cb;
+ kuid_t uid = sock_i_uid((struct sock *)sk);
rcu_read_lock();
reuseport_cb = rcu_dereference(sk->sk_reuseport_cb);
/*
* Unlike other sk lookup places we do not check
* for sk_net here, since _all_ the socks listed
- * in tb->owners and tb2->owners list belong
- * to the same net
+ * in tb->owners list belong to the same net - the
+ * one this bucket belongs to.
*/
- if (!use_bhash2_on_bind(sk)) {
- sk_for_each_bound(sk2, &tb->owners)
- if (bind_conflict_exist(sk, sk2, uid, relax,
- reuseport_cb_ok, reuseport_ok) &&
- inet_rcv_saddr_equal(sk, sk2, true))
- return true;
+ sk_for_each_bound(sk2, &tb->owners) {
+ int bound_dev_if2;
- return false;
+ if (sk == sk2)
+ continue;
+ bound_dev_if2 = READ_ONCE(sk2->sk_bound_dev_if);
+ if ((!sk->sk_bound_dev_if ||
+ !bound_dev_if2 ||
+ sk->sk_bound_dev_if == bound_dev_if2)) {
+ if (reuse && sk2->sk_reuse &&
+ sk2->sk_state != TCP_LISTEN) {
+ if ((!relax ||
+ (!reuseport_ok &&
+ reuseport && sk2->sk_reuseport &&
+ reuseport_cb_ok &&
+ (sk2->sk_state == TCP_TIME_WAIT ||
+ uid_eq(uid, sock_i_uid(sk2))))) &&
+ inet_rcv_saddr_equal(sk, sk2, true))
+ break;
+ } else if (!reuseport_ok ||
+ !reuseport || !sk2->sk_reuseport ||
+ !reuseport_cb_ok ||
+ (sk2->sk_state != TCP_TIME_WAIT &&
+ !uid_eq(uid, sock_i_uid(sk2)))) {
+ if (inet_rcv_saddr_equal(sk, sk2, true))
+ break;
+ }
+ }
}
-
- if (tb2 && check_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
- reuseport_ok))
- return true;
-
- net = sock_net(sk);
-
- /* check there's no conflict with an existing IPV6_ADDR_ANY (if ipv6) or
- * INADDR_ANY (if ipv4) socket.
- */
- hash = get_bhash2_nulladdr_hash(sk, net, port);
- head2 = &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
-
- l3mdev = inet_sk_bound_l3mdev(sk);
- inet_bind_bucket_for_each(tb2, &head2->chain)
- if (check_bind2_bucket_match_nulladdr(tb2, net, port, l3mdev, sk))
- break;
-
- if (tb2 && check_bhash2_conflict(sk, tb2, uid, relax, reuseport_cb_ok,
- reuseport_ok))
- return true;
-
- return false;
+ return sk2 != NULL;
}
/*
* inet_bind_hashbucket lock held.
*/
static struct inet_bind_hashbucket *
-inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret,
- struct inet_bind2_bucket **tb2_ret,
- struct inet_bind2_hashbucket **head2_ret, int *port_ret)
+inet_csk_find_open_port(struct sock *sk, struct inet_bind_bucket **tb_ret, int *port_ret)
{
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
- struct inet_bind2_hashbucket *head2;
+ int port = 0;
struct inet_bind_hashbucket *head;
struct net *net = sock_net(sk);
+ bool relax = false;
int i, low, high, attempt_half;
- struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
u32 remaining, offset;
- bool relax = false;
- int port = 0;
int l3mdev;
l3mdev = inet_sk_bound_l3mdev(sk);
head = &hinfo->bhash[inet_bhashfn(net, port,
hinfo->bhash_size)];
spin_lock_bh(&head->lock);
- tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk,
- &head2);
inet_bind_bucket_for_each(tb, &head->chain)
- if (check_bind_bucket_match(tb, net, port, l3mdev)) {
- if (!inet_csk_bind_conflict(sk, port, tb, tb2,
- relax, false))
+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+ tb->port == port) {
+ if (!inet_csk_bind_conflict(sk, tb, relax, false))
goto success;
goto next_port;
}
goto other_half_scan;
}
- if (net->ipv4.sysctl_ip_autobind_reuse && !relax) {
+ if (READ_ONCE(net->ipv4.sysctl_ip_autobind_reuse) && !relax) {
/* We still have a chance to connect to different destinations */
relax = true;
goto ports_exhausted;
success:
*port_ret = port;
*tb_ret = tb;
- *tb2_ret = tb2;
- *head2_ret = head2;
return head;
}
{
bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN;
struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo;
- bool bhash_created = false, bhash2_created = false;
- struct inet_bind2_bucket *tb2 = NULL;
- struct inet_bind2_hashbucket *head2;
- struct inet_bind_bucket *tb = NULL;
+ int ret = 1, port = snum;
struct inet_bind_hashbucket *head;
struct net *net = sock_net(sk);
- int ret = 1, port = snum;
- bool found_port = false;
+ struct inet_bind_bucket *tb = NULL;
int l3mdev;
l3mdev = inet_sk_bound_l3mdev(sk);
if (!port) {
- head = inet_csk_find_open_port(sk, &tb, &tb2, &head2, &port);
+ head = inet_csk_find_open_port(sk, &tb, &port);
if (!head)
return ret;
- if (tb && tb2)
- goto success;
- found_port = true;
- } else {
- head = &hinfo->bhash[inet_bhashfn(net, port,
- hinfo->bhash_size)];
- spin_lock_bh(&head->lock);
- inet_bind_bucket_for_each(tb, &head->chain)
- if (check_bind_bucket_match(tb, net, port, l3mdev))
- break;
-
- tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk,
- &head2);
- }
-
- if (!tb) {
- tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep, net,
- head, port, l3mdev);
if (!tb)
- goto fail_unlock;
- bhash_created = true;
- }
-
- if (!tb2) {
- tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep,
- net, head2, port, l3mdev, sk);
- if (!tb2)
- goto fail_unlock;
- bhash2_created = true;
+ goto tb_not_found;
+ goto success;
}
-
- /* If we had to find an open port, we already checked for conflicts */
- if (!found_port && !hlist_empty(&tb->owners)) {
+ head = &hinfo->bhash[inet_bhashfn(net, port,
+ hinfo->bhash_size)];
+ spin_lock_bh(&head->lock);
+ inet_bind_bucket_for_each(tb, &head->chain)
+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+ tb->port == port)
+ goto tb_found;
+tb_not_found:
+ tb = inet_bind_bucket_create(hinfo->bind_bucket_cachep,
+ net, head, port, l3mdev);
+ if (!tb)
+ goto fail_unlock;
+tb_found:
+ if (!hlist_empty(&tb->owners)) {
if (sk->sk_reuse == SK_FORCE_REUSE)
goto success;
if ((tb->fastreuse > 0 && reuse) ||
sk_reuseport_match(tb, sk))
goto success;
- if (inet_csk_bind_conflict(sk, port, tb, tb2, true, true))
+ if (inet_csk_bind_conflict(sk, tb, true, true))
goto fail_unlock;
}
success:
inet_csk_update_fastreuse(tb, sk);
if (!inet_csk(sk)->icsk_bind_hash)
- inet_bind_hash(sk, tb, tb2, port);
+ inet_bind_hash(sk, tb, port);
WARN_ON(inet_csk(sk)->icsk_bind_hash != tb);
- WARN_ON(inet_csk(sk)->icsk_bind2_hash != tb2);
ret = 0;
fail_unlock:
- if (ret) {
- if (bhash_created)
- inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
- if (bhash2_created)
- inet_bind2_bucket_destroy(hinfo->bind2_bucket_cachep,
- tb2);
- }
spin_unlock_bh(&head->lock);
return ret;
}
icsk = inet_csk(sk_listener);
net = sock_net(sk_listener);
- max_syn_ack_retries = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_synack_retries;
+ max_syn_ack_retries = icsk->icsk_syn_retries ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_synack_retries);
/* Normally all the openreqs are young and become mature
* (i.e. converted to established socket) for first timeout.
* If synack was not acknowledged for 1 second, it means
inet_sk_set_state(newsk, TCP_SYN_RECV);
newicsk->icsk_bind_hash = NULL;
- newicsk->icsk_bind2_hash = NULL;
inet_sk(newsk)->inet_dport = inet_rsk(req)->ir_rmt_port;
inet_sk(newsk)->inet_num = inet_rsk(req)->ir_num;
return tb;
}
-struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep,
- struct net *net,
- struct inet_bind2_hashbucket *head,
- const unsigned short port,
- int l3mdev,
- const struct sock *sk)
-{
- struct inet_bind2_bucket *tb = kmem_cache_alloc(cachep, GFP_ATOMIC);
-
- if (tb) {
- write_pnet(&tb->ib_net, net);
- tb->l3mdev = l3mdev;
- tb->port = port;
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
- tb->v6_rcv_saddr = sk->sk_v6_rcv_saddr;
- else
-#endif
- tb->rcv_saddr = sk->sk_rcv_saddr;
- INIT_HLIST_HEAD(&tb->owners);
- hlist_add_head(&tb->node, &head->chain);
- }
- return tb;
-}
-
-static bool bind2_bucket_addr_match(struct inet_bind2_bucket *tb2, struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
- return ipv6_addr_equal(&tb2->v6_rcv_saddr,
- &sk->sk_v6_rcv_saddr);
-#endif
- return tb2->rcv_saddr == sk->sk_rcv_saddr;
-}
-
/*
* Caller must hold hashbucket lock for this tb with local BH disabled
*/
}
}
-/* Caller must hold the lock for the corresponding hashbucket in the bhash table
- * with local BH disabled
- */
-void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb)
-{
- if (hlist_empty(&tb->owners)) {
- __hlist_del(&tb->node);
- kmem_cache_free(cachep, tb);
- }
-}
-
void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb,
- struct inet_bind2_bucket *tb2, const unsigned short snum)
+ const unsigned short snum)
{
inet_sk(sk)->inet_num = snum;
sk_add_bind_node(sk, &tb->owners);
inet_csk(sk)->icsk_bind_hash = tb;
- sk_add_bind2_node(sk, &tb2->owners);
- inet_csk(sk)->icsk_bind2_hash = tb2;
}
/*
const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num,
hashinfo->bhash_size);
struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash];
- struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
spin_lock(&head->lock);
inet_csk(sk)->icsk_bind_hash = NULL;
inet_sk(sk)->inet_num = 0;
inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
-
- if (inet_csk(sk)->icsk_bind2_hash) {
- tb2 = inet_csk(sk)->icsk_bind2_hash;
- __sk_del_bind2_node(sk);
- inet_csk(sk)->icsk_bind2_hash = NULL;
- inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2);
- }
spin_unlock(&head->lock);
}
struct inet_hashinfo *table = sk->sk_prot->h.hashinfo;
unsigned short port = inet_sk(child)->inet_num;
const int bhash = inet_bhashfn(sock_net(sk), port,
- table->bhash_size);
+ table->bhash_size);
struct inet_bind_hashbucket *head = &table->bhash[bhash];
- struct inet_bind2_hashbucket *head_bhash2;
- bool created_inet_bind_bucket = false;
- struct net *net = sock_net(sk);
- struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
int l3mdev;
spin_lock(&head->lock);
tb = inet_csk(sk)->icsk_bind_hash;
- tb2 = inet_csk(sk)->icsk_bind2_hash;
- if (unlikely(!tb || !tb2)) {
+ if (unlikely(!tb)) {
spin_unlock(&head->lock);
return -ENOENT;
}
* as that of the child socket. We have to look up or
* create a new bind bucket for the child here. */
inet_bind_bucket_for_each(tb, &head->chain) {
- if (check_bind_bucket_match(tb, net, port, l3mdev))
+ if (net_eq(ib_net(tb), sock_net(sk)) &&
+ tb->l3mdev == l3mdev && tb->port == port)
break;
}
if (!tb) {
tb = inet_bind_bucket_create(table->bind_bucket_cachep,
- net, head, port, l3mdev);
+ sock_net(sk), head, port,
+ l3mdev);
if (!tb) {
spin_unlock(&head->lock);
return -ENOMEM;
}
- created_inet_bind_bucket = true;
}
inet_csk_update_fastreuse(tb, child);
-
- goto bhash2_find;
- } else if (!bind2_bucket_addr_match(tb2, child)) {
- l3mdev = inet_sk_bound_l3mdev(sk);
-
-bhash2_find:
- tb2 = inet_bind2_bucket_find(table, net, port, l3mdev, child,
- &head_bhash2);
- if (!tb2) {
- tb2 = inet_bind2_bucket_create(table->bind2_bucket_cachep,
- net, head_bhash2, port,
- l3mdev, child);
- if (!tb2)
- goto error;
- }
}
- inet_bind_hash(child, tb, tb2, port);
+ inet_bind_hash(child, tb, port);
spin_unlock(&head->lock);
return 0;
-
-error:
- if (created_inet_bind_bucket)
- inet_bind_bucket_destroy(table->bind_bucket_cachep, tb);
- spin_unlock(&head->lock);
- return -ENOMEM;
}
EXPORT_SYMBOL_GPL(__inet_inherit_port);
}
EXPORT_SYMBOL_GPL(inet_unhash);
-static bool check_bind2_bucket_match(struct inet_bind2_bucket *tb,
- struct net *net, unsigned short port,
- int l3mdev, struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev &&
- ipv6_addr_equal(&tb->v6_rcv_saddr, &sk->sk_v6_rcv_saddr);
- else
-#endif
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev && tb->rcv_saddr == sk->sk_rcv_saddr;
-}
-
-bool check_bind2_bucket_match_nulladdr(struct inet_bind2_bucket *tb,
- struct net *net, const unsigned short port,
- int l3mdev, const struct sock *sk)
-{
-#if IS_ENABLED(CONFIG_IPV6)
- struct in6_addr nulladdr = {};
-
- if (sk->sk_family == AF_INET6)
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev &&
- ipv6_addr_equal(&tb->v6_rcv_saddr, &nulladdr);
- else
-#endif
- return net_eq(ib2_net(tb), net) && tb->port == port &&
- tb->l3mdev == l3mdev && tb->rcv_saddr == 0;
-}
-
-static struct inet_bind2_hashbucket *
-inet_bhashfn_portaddr(struct inet_hashinfo *hinfo, const struct sock *sk,
- const struct net *net, unsigned short port)
-{
- u32 hash;
-
-#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
- hash = ipv6_portaddr_hash(net, &sk->sk_v6_rcv_saddr, port);
- else
-#endif
- hash = ipv4_portaddr_hash(net, sk->sk_rcv_saddr, port);
- return &hinfo->bhash2[hash & (hinfo->bhash_size - 1)];
-}
-
-/* This should only be called when the spinlock for the socket's corresponding
- * bind_hashbucket is held
- */
-struct inet_bind2_bucket *
-inet_bind2_bucket_find(struct inet_hashinfo *hinfo, struct net *net,
- const unsigned short port, int l3mdev, struct sock *sk,
- struct inet_bind2_hashbucket **head)
-{
- struct inet_bind2_bucket *bhash2 = NULL;
- struct inet_bind2_hashbucket *h;
-
- h = inet_bhashfn_portaddr(hinfo, sk, net, port);
- inet_bind_bucket_for_each(bhash2, &h->chain) {
- if (check_bind2_bucket_match(bhash2, net, port, l3mdev, sk))
- break;
- }
-
- if (head)
- *head = h;
-
- return bhash2;
-}
-
/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm
* Note that we use 32bit integers (vs RFC 'short integers')
* because 2^16 is not a multiple of num_ephemeral and this
{
struct inet_hashinfo *hinfo = death_row->hashinfo;
struct inet_timewait_sock *tw = NULL;
- struct inet_bind2_hashbucket *head2;
struct inet_bind_hashbucket *head;
int port = inet_sk(sk)->inet_num;
struct net *net = sock_net(sk);
- struct inet_bind2_bucket *tb2;
struct inet_bind_bucket *tb;
- bool tb_created = false;
u32 remaining, offset;
int ret, i, low, high;
int l3mdev;
* the established check is already unique enough.
*/
inet_bind_bucket_for_each(tb, &head->chain) {
- if (check_bind_bucket_match(tb, net, port, l3mdev)) {
+ if (net_eq(ib_net(tb), net) && tb->l3mdev == l3mdev &&
+ tb->port == port) {
if (tb->fastreuse >= 0 ||
tb->fastreuseport >= 0)
goto next_port;
spin_unlock_bh(&head->lock);
return -ENOMEM;
}
- tb_created = true;
tb->fastreuse = -1;
tb->fastreuseport = -1;
goto ok;
return -EADDRNOTAVAIL;
ok:
- /* Find the corresponding tb2 bucket since we need to
- * add the socket to the bhash2 table as well
- */
- tb2 = inet_bind2_bucket_find(hinfo, net, port, l3mdev, sk, &head2);
- if (!tb2) {
- tb2 = inet_bind2_bucket_create(hinfo->bind2_bucket_cachep, net,
- head2, port, l3mdev, sk);
- if (!tb2)
- goto error;
- }
-
/* Here we want to add a little bit of randomness to the next source
* port that will be chosen. We use a max() with a random here so that
* on low contention the randomness is maximal and on high contention
WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
/* Head lock still held and bh's disabled */
- inet_bind_hash(sk, tb, tb2, port);
+ inet_bind_hash(sk, tb, port);
if (sk_unhashed(sk)) {
inet_sk(sk)->inet_sport = htons(port);
inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
inet_twsk_deschedule_put(tw);
local_bh_enable();
return 0;
-
-error:
- if (tb_created)
- inet_bind_bucket_destroy(hinfo->bind_bucket_cachep, tb);
- spin_unlock_bh(&head->lock);
- return -ENOMEM;
}
/*
init_hashinfo_lhash2(h);
/* this one is used for source ports of outgoing connections */
- table_perturb = kmalloc_array(INET_TABLE_PERTURB_SIZE,
- sizeof(*table_perturb), GFP_KERNEL);
- if (!table_perturb)
- panic("TCP: failed to alloc table_perturb");
+ table_perturb = alloc_large_system_hash("Table-perturb",
+ sizeof(*table_perturb),
+ INET_TABLE_PERTURB_SIZE,
+ 0, 0, NULL, NULL,
+ INET_TABLE_PERTURB_SIZE,
+ INET_TABLE_PERTURB_SIZE);
}
int inet_hashinfo2_init_mod(struct inet_hashinfo *h)
{
struct inet_timewait_sock *tw;
- if (refcount_read(&dr->tw_refcount) - 1 >= dr->sysctl_max_tw_buckets)
+ if (refcount_read(&dr->tw_refcount) - 1 >=
+ READ_ONCE(dr->sysctl_max_tw_buckets))
return NULL;
tw = kmem_cache_alloc(sk->sk_prot_creator->twsk_prot->twsk_slab,
struct inet_peer *gc_stack[],
unsigned int gc_cnt)
{
+ int peer_threshold, peer_maxttl, peer_minttl;
struct inet_peer *p;
__u32 delta, ttl;
int i;
- if (base->total >= inet_peer_threshold)
+ peer_threshold = READ_ONCE(inet_peer_threshold);
+ peer_maxttl = READ_ONCE(inet_peer_maxttl);
+ peer_minttl = READ_ONCE(inet_peer_minttl);
+
+ if (base->total >= peer_threshold)
ttl = 0; /* be aggressive */
else
- ttl = inet_peer_maxttl
- - (inet_peer_maxttl - inet_peer_minttl) / HZ *
- base->total / inet_peer_threshold * HZ;
+ ttl = peer_maxttl - (peer_maxttl - peer_minttl) / HZ *
+ base->total / peer_threshold * HZ;
for (i = 0; i < gc_cnt; i++) {
p = gc_stack[i];
!skb_sec_path(skb))
ip_rt_send_redirect(skb);
- if (net->ipv4.sysctl_ip_fwd_update_priority)
+ if (READ_ONCE(net->ipv4.sysctl_ip_fwd_update_priority))
skb->priority = rt_tos2priority(iph->tos);
return NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD,
int tunnel_hlen;
int version;
int nhoff;
- int thoff;
tun_info = skb_tunnel_info(skb);
if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
(ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
truncate = true;
- thoff = skb_transport_header(skb) - skb_mac_header(skb);
- if (skb->protocol == htons(ETH_P_IPV6) &&
- (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
- truncate = true;
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ int thoff;
+
+ if (skb_transport_header_was_set(skb))
+ thoff = skb_transport_header(skb) - skb_mac_header(skb);
+ else
+ thoff = nhoff + sizeof(struct ipv6hdr);
+ if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
+ truncate = true;
+ }
if (version == 1) {
erspan_build_header(skb, ntohl(tunnel_id_to_key32(key->tun_id)),
}
if (dev->header_ops) {
- const int pull_len = tunnel->hlen + sizeof(struct iphdr);
-
if (skb_cow_head(skb, 0))
goto free_skb;
tnl_params = (const struct iphdr *)skb->data;
- if (pull_len > skb_transport_offset(skb))
- goto free_skb;
-
/* Pull skb since ip_tunnel_xmit() needs skb->data pointing
* to gre header.
*/
- skb_pull(skb, pull_len);
+ skb_pull(skb, tunnel->hlen + sizeof(struct iphdr));
skb_reset_mac_header(skb);
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ skb_checksum_start(skb) < skb->data)
+ goto free_skb;
} else {
if (skb_cow_head(skb, dev->needed_headroom))
goto free_skb;
ip_hdr(hint)->tos == iph->tos;
}
-INDIRECT_CALLABLE_DECLARE(int udp_v4_early_demux(struct sk_buff *));
-INDIRECT_CALLABLE_DECLARE(int tcp_v4_early_demux(struct sk_buff *));
+int tcp_v4_early_demux(struct sk_buff *skb);
+int udp_v4_early_demux(struct sk_buff *skb);
static int ip_rcv_finish_core(struct net *net, struct sock *sk,
struct sk_buff *skb, struct net_device *dev,
const struct sk_buff *hint)
{
const struct iphdr *iph = ip_hdr(skb);
- int (*edemux)(struct sk_buff *skb);
int err, drop_reason;
struct rtable *rt;
goto drop_error;
}
- if (net->ipv4.sysctl_ip_early_demux &&
+ if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
!skb_dst(skb) &&
!skb->sk &&
!ip_is_fragment(iph)) {
- const struct net_protocol *ipprot;
- int protocol = iph->protocol;
-
- ipprot = rcu_dereference(inet_protos[protocol]);
- if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) {
- err = INDIRECT_CALL_2(edemux, tcp_v4_early_demux,
- udp_v4_early_demux, skb);
- if (unlikely(err))
- goto drop_error;
- /* must reload iph, skb->head might have changed */
- iph = ip_hdr(skb);
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux)) {
+ tcp_v4_early_demux(skb);
+
+ /* must reload iph, skb->head might have changed */
+ iph = ip_hdr(skb);
+ }
+ break;
+ case IPPROTO_UDP:
+ if (READ_ONCE(net->ipv4.sysctl_udp_early_demux)) {
+ err = udp_v4_early_demux(skb);
+ if (unlikely(err))
+ goto drop_error;
+
+ /* must reload iph, skb->head might have changed */
+ iph = ip_hdr(skb);
+ }
+ break;
}
}
/* numsrc >= (4G-140)/128 overflow in 32 bits */
err = -ENOBUFS;
if (gsf->gf_numsrc >= 0x1ffffff ||
- gsf->gf_numsrc > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
+ gsf->gf_numsrc > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf))
goto out_free_gsf;
err = -EINVAL;
/* numsrc >= (4G-140)/128 overflow in 32 bits */
err = -ENOBUFS;
- if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
+ if (n > READ_ONCE(sock_net(sk)->ipv4.sysctl_igmp_max_msf))
goto out_free_gsf;
err = set_mcast_msfilter(sk, gf32->gf_interface, n, gf32->gf_fmode,
&gf32->gf_group, gf32->gf_slist_flex);
}
/* numsrc >= (1G-4) overflow in 32 bits */
if (msf->imsf_numsrc >= 0x3ffffffcU ||
- msf->imsf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
+ msf->imsf_numsrc > READ_ONCE(net->ipv4.sysctl_igmp_max_msf)) {
kfree(msf);
err = -ENOBUFS;
break;
{
struct net *net = sock_net(sk);
val = (inet->uc_ttl == -1 ?
- net->ipv4.sysctl_ip_default_ttl :
+ READ_ONCE(net->ipv4.sysctl_ip_default_ttl) :
inet->uc_ttl);
break;
}
u32 mtu = dst_mtu(encap_dst) - headroom;
if ((skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu)) ||
- (!skb_is_gso(skb) && (skb->len - skb_mac_header_len(skb)) <= mtu))
+ (!skb_is_gso(skb) && (skb->len - skb_network_offset(skb)) <= mtu))
return 0;
skb_dst_update_pmtu_no_confirm(skb, mtu);
skb_reserve(nskb, LL_MAX_HEADER);
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP,
- net->ipv4.sysctl_ip_default_ttl);
+ READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
nf_reject_ip_tcphdr_put(nskb, oldskb, oth);
niph->tot_len = htons(nskb->len);
ip_send_check(niph);
skb_reserve(nskb, LL_MAX_HEADER);
niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP,
- net->ipv4.sysctl_ip_default_ttl);
+ READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
skb_reset_transport_header(nskb);
icmph = skb_put_zero(nskb, sizeof(struct icmphdr));
/* __ip6_del_rt does a release, so do a hold here */
fib6_info_hold(f6i);
ipv6_stub->ip6_del_rt(net, f6i,
- !net->ipv4.sysctl_nexthop_compat_mode);
+ !READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode));
}
}
if (!rc) {
nh_base_seq_inc(net);
nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo);
- if (replace_notify && net->ipv4.sysctl_nexthop_compat_mode)
+ if (replace_notify &&
+ READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode))
nexthop_replace_notify(net, new_nh, &cfg->nlinfo);
}
pr_debug("ping_check_bind_addr(sk=%p,addr=%pI4,port=%d)\n",
sk, &addr->sin_addr.s_addr, ntohs(addr->sin_port));
+ if (addr->sin_addr.s_addr == htonl(INADDR_ANY))
+ return 0;
+
tb_id = l3mdev_fib_table_by_index(net, sk->sk_bound_dev_if) ? : tb_id;
chk_addr_ret = inet_addr_type_table(net, addr->sin_addr.s_addr, tb_id);
- if (!inet_addr_valid_or_nonlocal(net, inet_sk(sk),
- addr->sin_addr.s_addr,
- chk_addr_ret))
+ if (chk_addr_ret == RTN_MULTICAST ||
+ chk_addr_ret == RTN_BROADCAST ||
+ (chk_addr_ret != RTN_LOCAL &&
+ !inet_can_nonlocal_bind(net, isk)))
return -EADDRNOTAVAIL;
#if IS_ENABLED(CONFIG_IPV6)
seq_printf(seq, "\nIp: %d %d",
IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2,
- net->ipv4.sysctl_ip_default_ttl);
+ READ_ONCE(net->ipv4.sysctl_ip_default_ttl));
BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0);
snmp_get_cpu_field64_batch(buff64, snmp4_ipstats_list,
struct fib_info *fi = res->fi;
u32 mtu = 0;
- if (dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu ||
+ if (READ_ONCE(dev_net(dev)->ipv4.sysctl_ip_fwd_use_pmtu) ||
fi->fib_metrics->metrics[RTAX_LOCK - 1] & (1 << RTAX_MTU))
mtu = fi->fib_mtu;
const struct sk_buff *skb,
bool *p_has_inner)
{
- u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+ u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
struct flow_keys keys, hash_keys;
if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
const struct sk_buff *skb,
bool has_inner)
{
- u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+ u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
struct flow_keys keys, hash_keys;
/* We assume the packet carries an encapsulation, but if none was
static u32 fib_multipath_custom_hash_fl4(const struct net *net,
const struct flowi4 *fl4)
{
- u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields;
+ u32 hash_fields = READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_fields);
struct flow_keys hash_keys;
if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK))
struct flow_keys hash_keys;
u32 mhash = 0;
- switch (net->ipv4.sysctl_fib_multipath_hash_policy) {
+ switch (READ_ONCE(net->ipv4.sysctl_fib_multipath_hash_policy)) {
case 0:
memset(&hash_keys, 0, sizeof(hash_keys));
hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
return true;
}
- if (!net->ipv4.sysctl_tcp_timestamps)
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_timestamps))
return false;
tcp_opt->sack_ok = (options & TS_OPT_SACK) ? TCP_SACK_SEEN : 0;
- if (tcp_opt->sack_ok && !net->ipv4.sysctl_tcp_sack)
+ if (tcp_opt->sack_ok && !READ_ONCE(net->ipv4.sysctl_tcp_sack))
return false;
if ((options & TS_OPT_WSCALE_MASK) == TS_OPT_WSCALE_MASK)
tcp_opt->wscale_ok = 1;
tcp_opt->snd_wscale = options & TS_OPT_WSCALE_MASK;
- return net->ipv4.sysctl_tcp_window_scaling != 0;
+ return READ_ONCE(net->ipv4.sysctl_tcp_window_scaling) != 0;
}
EXPORT_SYMBOL(cookie_timestamp_decode);
if (!ecn_ok)
return false;
- if (net->ipv4.sysctl_tcp_ecn)
+ if (READ_ONCE(net->ipv4.sysctl_tcp_ecn))
return true;
return dst_feature(dst, RTAX_FEATURE_ECN);
struct flowi4 fl4;
u32 tsoff = 0;
- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
+ !th->ack || th->rst)
goto out;
if (tcp_synq_no_recent_overflow(sk))
* port limit.
*/
if ((range[1] < range[0]) ||
- (range[0] < net->ipv4.sysctl_ip_prot_sock))
+ (range[0] < READ_ONCE(net->ipv4.sysctl_ip_prot_sock)))
ret = -EINVAL;
else
set_local_port_range(net, range);
.extra2 = &ip_privileged_port_max,
};
- pports = net->ipv4.sysctl_ip_prot_sock;
+ pports = READ_ONCE(net->ipv4.sysctl_ip_prot_sock);
ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
if (range[0] < pports)
ret = -EINVAL;
else
- net->ipv4.sysctl_ip_prot_sock = pports;
+ WRITE_ONCE(net->ipv4.sysctl_ip_prot_sock, pports);
}
return ret;
return ret;
}
-static void proc_configure_early_demux(int enabled, int protocol)
-{
- struct net_protocol *ipprot;
-#if IS_ENABLED(CONFIG_IPV6)
- struct inet6_protocol *ip6prot;
-#endif
-
- rcu_read_lock();
-
- ipprot = rcu_dereference(inet_protos[protocol]);
- if (ipprot)
- ipprot->early_demux = enabled ? ipprot->early_demux_handler :
- NULL;
-
-#if IS_ENABLED(CONFIG_IPV6)
- ip6prot = rcu_dereference(inet6_protos[protocol]);
- if (ip6prot)
- ip6prot->early_demux = enabled ? ip6prot->early_demux_handler :
- NULL;
-#endif
- rcu_read_unlock();
-}
-
-static int proc_tcp_early_demux(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- int ret = 0;
-
- ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
-
- if (write && !ret) {
- int enabled = init_net.ipv4.sysctl_tcp_early_demux;
-
- proc_configure_early_demux(enabled, IPPROTO_TCP);
- }
-
- return ret;
-}
-
-static int proc_udp_early_demux(struct ctl_table *table, int write,
- void *buffer, size_t *lenp, loff_t *ppos)
-{
- int ret = 0;
-
- ret = proc_dou8vec_minmax(table, write, buffer, lenp, ppos);
-
- if (write && !ret) {
- int enabled = init_net.ipv4.sysctl_udp_early_demux;
-
- proc_configure_early_demux(enabled, IPPROTO_UDP);
- }
-
- return ret;
-}
-
static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
int write, void *buffer,
size_t *lenp, loff_t *ppos)
.maxlen = sizeof(u8),
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
},
{
.procname = "icmp_echo_enable_probe",
.maxlen = sizeof(u8),
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
},
{
.procname = "icmp_ignore_bogus_error_responses",
.maxlen = sizeof(u8),
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
},
{
.procname = "icmp_errors_use_inbound_ifaddr",
.maxlen = sizeof(u8),
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE
},
{
.procname = "icmp_ratelimit",
.maxlen = sizeof(u8),
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_TWO,
},
{
.procname = "tcp_ecn_fallback",
.maxlen = sizeof(u8),
.mode = 0644,
.proc_handler = proc_dou8vec_minmax,
+ .extra1 = SYSCTL_ZERO,
+ .extra2 = SYSCTL_ONE,
},
{
.procname = "ip_dynaddr",
.data = &init_net.ipv4.sysctl_udp_early_demux,
.maxlen = sizeof(u8),
.mode = 0644,
- .proc_handler = proc_udp_early_demux
+ .proc_handler = proc_dou8vec_minmax,
},
{
.procname = "tcp_early_demux",
.data = &init_net.ipv4.sysctl_tcp_early_demux,
.maxlen = sizeof(u8),
.mode = 0644,
- .proc_handler = proc_tcp_early_demux
+ .proc_handler = proc_dou8vec_minmax,
},
{
.procname = "nexthop_compat_mode",
tp->snd_cwnd_clamp = ~0;
tp->mss_cache = TCP_MSS_DEFAULT;
- tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering;
+ tp->reordering = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering);
tcp_assign_congestion_control(sk);
tp->tsoffset = 0;
icsk->icsk_sync_mss = tcp_sync_mss;
- WRITE_ONCE(sk->sk_sndbuf, sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
- WRITE_ONCE(sk->sk_rcvbuf, sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
+ WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]));
+ WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]));
sk_sockets_allocated_inc(sk);
}
int size_goal)
{
return skb->len < size_goal &&
- sock_net(sk)->ipv4.sysctl_tcp_autocorking &&
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_autocorking) &&
!tcp_rtx_queue_empty(sk) &&
refcount_read(&sk->sk_wmem_alloc) > skb->truesize &&
tcp_skb_can_collapse_to(skb);
struct sockaddr *uaddr = msg->msg_name;
int err, flags;
- if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) ||
+ if (!(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) &
+ TFO_CLIENT_ENABLE) ||
(uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) &&
uaddr->sa_family == AF_UNSPEC))
return -EOPNOTSUPP;
if (sk->sk_userlocks & SOCK_RCVBUF_LOCK)
cap = sk->sk_rcvbuf >> 1;
else
- cap = sock_net(sk)->ipv4.sysctl_tcp_rmem[2] >> 1;
+ cap = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
val = min(val, cap);
WRITE_ONCE(sk->sk_rcvlowat, val ? : 1);
static bool tcp_too_many_orphans(int shift)
{
- return READ_ONCE(tcp_orphan_cache) << shift > sysctl_tcp_max_orphans;
+ return READ_ONCE(tcp_orphan_cache) << shift >
+ READ_ONCE(sysctl_tcp_max_orphans);
}
bool tcp_check_oom(struct sock *sk, int shift)
case TCP_FASTOPEN_CONNECT:
if (val > 1 || val < 0) {
err = -EINVAL;
- } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
+ } else if (READ_ONCE(net->ipv4.sysctl_tcp_fastopen) &
+ TFO_CLIENT_ENABLE) {
if (sk->sk_state == TCP_CLOSE)
tp->fastopen_connect = val;
else
val = keepalive_probes(tp);
break;
case TCP_SYNCNT:
- val = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
+ val = icsk->icsk_syn_retries ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
break;
case TCP_LINGER2:
val = tp->linger2;
if (val >= 0)
- val = (val ? : net->ipv4.sysctl_tcp_fin_timeout) / HZ;
+ val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ;
break;
case TCP_DEFER_ACCEPT:
val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
return SKB_DROP_REASON_TCP_MD5UNEXPECTED;
}
- /* check the signature */
- genhash = tp->af_specific->calc_md5_hash(newhash, hash_expected,
- NULL, skb);
+ /* Check the signature.
+ * To support dual stack listeners, we need to handle
+ * IPv4-mapped case.
+ */
+ if (family == AF_INET)
+ genhash = tcp_v4_md5_hash_skb(newhash,
+ hash_expected,
+ NULL, skb);
+ else
+ genhash = tp->af_specific->calc_md5_hash(newhash,
+ hash_expected,
+ NULL, skb);
if (genhash || memcmp(hash_location, newhash, 16) != 0) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
SLAB_HWCACHE_ALIGN | SLAB_PANIC |
SLAB_ACCOUNT,
NULL);
- tcp_hashinfo.bind2_bucket_cachep =
- kmem_cache_create("tcp_bind2_bucket",
- sizeof(struct inet_bind2_bucket), 0,
- SLAB_HWCACHE_ALIGN | SLAB_PANIC |
- SLAB_ACCOUNT,
- NULL);
/* Size and allocate the main established and bind bucket
* hash tables.
if (inet_ehash_locks_alloc(&tcp_hashinfo))
panic("TCP: failed to alloc ehash_locks");
tcp_hashinfo.bhash =
- alloc_large_system_hash("TCP bind bhash tables",
- sizeof(struct inet_bind_hashbucket) +
- sizeof(struct inet_bind2_hashbucket),
+ alloc_large_system_hash("TCP bind",
+ sizeof(struct inet_bind_hashbucket),
tcp_hashinfo.ehash_mask + 1,
17, /* one slot per 128 KB of memory */
0,
0,
64 * 1024);
tcp_hashinfo.bhash_size = 1U << tcp_hashinfo.bhash_size;
- tcp_hashinfo.bhash2 =
- (struct inet_bind2_hashbucket *)(tcp_hashinfo.bhash + tcp_hashinfo.bhash_size);
for (i = 0; i < tcp_hashinfo.bhash_size; i++) {
spin_lock_init(&tcp_hashinfo.bhash[i].lock);
INIT_HLIST_HEAD(&tcp_hashinfo.bhash[i].chain);
- INIT_HLIST_HEAD(&tcp_hashinfo.bhash2[i].chain);
}
return 0;
}
- if (inet_csk_has_ulp(sk))
- return -EINVAL;
-
if (sk->sk_family == AF_INET6) {
if (tcp_bpf_assert_proto_ops(psock->sk_proto))
return -EINVAL;
const struct dst_entry *dst,
int flag)
{
- return (sock_net(sk)->ipv4.sysctl_tcp_fastopen & flag) ||
+ return (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen) & flag) ||
tcp_sk(sk)->fastopen_no_cookie ||
(dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE));
}
const struct dst_entry *dst)
{
bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1;
- int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen;
+ int tcp_fastopen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen);
struct tcp_fastopen_cookie valid_foc = { .len = -1 };
struct sock *child;
int ret = 0;
{
struct net *net = sock_net(sk);
- if (!sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout))
return;
/* Paired with READ_ONCE() in tcp_fastopen_active_should_disable() */
*/
bool tcp_fastopen_active_should_disable(struct sock *sk)
{
- unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout;
+ unsigned int tfo_bh_timeout =
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout);
unsigned long timeout;
int tfo_da_times;
int multiplier;
if (sk->sk_sndbuf < sndmem)
WRITE_ONCE(sk->sk_sndbuf,
- min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]));
+ min(sndmem, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[2])));
}
/* 2. Tuning advertised window (window_clamp, rcv_ssthresh)
struct tcp_sock *tp = tcp_sk(sk);
/* Optimize this! */
int truesize = tcp_win_from_space(sk, skbtruesize) >> 1;
- int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1;
+ int window = tcp_win_from_space(sk, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2])) >> 1;
while (tp->rcv_ssthresh <= window) {
if (truesize <= skb->len)
*/
static void tcp_init_buffer_space(struct sock *sk)
{
- int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
+ int tcp_app_win = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_app_win);
struct tcp_sock *tp = tcp_sk(sk);
int maxwin;
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
struct net *net = sock_net(sk);
+ int rmem2;
icsk->icsk_ack.quick = 0;
+ rmem2 = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]);
- if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] &&
+ if (sk->sk_rcvbuf < rmem2 &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK) &&
!tcp_under_memory_pressure(sk) &&
sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) {
WRITE_ONCE(sk->sk_rcvbuf,
- min(atomic_read(&sk->sk_rmem_alloc),
- net->ipv4.sysctl_tcp_rmem[2]));
+ min(atomic_read(&sk->sk_rmem_alloc), rmem2));
}
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
* <prev RTT . ><current RTT .. ><next RTT .... >
*/
- if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
int rcvmem, rcvbuf;
u64 rcvwin, grow;
do_div(rcvwin, tp->advmss);
rcvbuf = min_t(u64, rcvwin * rcvmem,
- sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
if (rcvbuf > sk->sk_rcvbuf) {
WRITE_ONCE(sk->sk_rcvbuf, rcvbuf);
* end of slow start and should slow down.
*/
if (tcp_snd_cwnd(tp) < tp->snd_ssthresh / 2)
- rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio;
+ rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio);
else
- rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio;
+ rate *= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio);
rate *= max(tcp_snd_cwnd(tp), tp->packets_out);
tp->undo_marker ? tp->undo_retrans : 0);
#endif
tp->reordering = min_t(u32, (metric + mss - 1) / mss,
- sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
}
/* This exciting event is worth to be remembered. 8) */
return;
tp->reordering = min_t(u32, tp->packets_out + addend,
- sock_net(sk)->ipv4.sysctl_tcp_max_reordering);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_max_reordering));
tp->reord_seen++;
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER);
}
static bool tcp_is_rack(const struct sock *sk)
{
- return sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION;
+ return READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+ TCP_RACK_LOSS_DETECTION;
}
/* If we detect SACK reneging, forget all SACK information
struct tcp_sock *tp = tcp_sk(sk);
struct net *net = sock_net(sk);
bool new_recovery = icsk->icsk_ca_state < TCP_CA_Recovery;
+ u8 reordering;
tcp_timeout_mark_lost(sk);
/* Timeout in disordered state after receiving substantial DUPACKs
* suggests that the degree of reordering is over-estimated.
*/
+ reordering = READ_ONCE(net->ipv4.sysctl_tcp_reordering);
if (icsk->icsk_ca_state <= TCP_CA_Disorder &&
- tp->sacked_out >= net->ipv4.sysctl_tcp_reordering)
+ tp->sacked_out >= reordering)
tp->reordering = min_t(unsigned int, tp->reordering,
- net->ipv4.sysctl_tcp_reordering);
+ reordering);
+
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->snd_nxt;
tcp_ecn_queue_cwr(tp);
* loss recovery is underway except recurring timeout(s) on
* the same SND.UNA (sec 3.2). Disable F-RTO on path MTU probing
*/
- tp->frto = net->ipv4.sysctl_tcp_frto &&
+ tp->frto = READ_ONCE(net->ipv4.sysctl_tcp_frto) &&
(new_recovery || icsk->icsk_retransmits) &&
!inet_csk(sk)->icsk_mtup.probe_size;
}
static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us, const int flag)
{
- u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ;
+ u32 wlen = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen) * HZ;
struct tcp_sock *tp = tcp_sk(sk);
if ((flag & FLAG_ACK_MAYBE_DELAYED) && rtt_us > tcp_min_rtt(tp)) {
* new SACK or ECE mark may first advance cwnd here and later reduce
* cwnd in tcp_fastretrans_alert() based on more states.
*/
- if (tcp_sk(sk)->reordering > sock_net(sk)->ipv4.sysctl_tcp_reordering)
+ if (tcp_sk(sk)->reordering >
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reordering))
return flag & FLAG_FORWARD_PROGRESS;
return flag & FLAG_DATA_ACKED;
if (*last_oow_ack_time) {
s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time);
- if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) {
+ if (0 <= elapsed &&
+ elapsed < READ_ONCE(net->ipv4.sysctl_tcp_invalid_ratelimit)) {
NET_INC_STATS(net, mib_idx);
return true; /* rate-limited: don't send yet! */
}
/* Then check host-wide RFC 5961 rate limit. */
now = jiffies / HZ;
if (now != challenge_timestamp) {
- u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit;
+ u32 ack_limit = READ_ONCE(net->ipv4.sysctl_tcp_challenge_ack_limit);
u32 half = (ack_limit + 1) >> 1;
challenge_timestamp = now;
break;
case TCPOPT_WINDOW:
if (opsize == TCPOLEN_WINDOW && th->syn &&
- !estab && net->ipv4.sysctl_tcp_window_scaling) {
+ !estab && READ_ONCE(net->ipv4.sysctl_tcp_window_scaling)) {
__u8 snd_wscale = *(__u8 *)ptr;
opt_rx->wscale_ok = 1;
if (snd_wscale > TCP_MAX_WSCALE) {
case TCPOPT_TIMESTAMP:
if ((opsize == TCPOLEN_TIMESTAMP) &&
((estab && opt_rx->tstamp_ok) ||
- (!estab && net->ipv4.sysctl_tcp_timestamps))) {
+ (!estab && READ_ONCE(net->ipv4.sysctl_tcp_timestamps)))) {
opt_rx->saw_tstamp = 1;
opt_rx->rcv_tsval = get_unaligned_be32(ptr);
opt_rx->rcv_tsecr = get_unaligned_be32(ptr + 4);
break;
case TCPOPT_SACK_PERM:
if (opsize == TCPOLEN_SACK_PERM && th->syn &&
- !estab && net->ipv4.sysctl_tcp_sack) {
+ !estab && READ_ONCE(net->ipv4.sysctl_tcp_sack)) {
opt_rx->sack_ok = TCP_SACK_SEEN;
tcp_sack_reset(opt_rx);
}
{
struct tcp_sock *tp = tcp_sk(sk);
- if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
+ if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
int mib_idx;
if (before(seq, tp->rcv_nxt))
NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST);
tcp_enter_quickack_mode(sk, TCP_MAX_QUICKACKS);
- if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) {
+ if (tcp_is_sack(tp) && READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_dsack)) {
u32 end_seq = TCP_SKB_CB(skb)->end_seq;
tcp_rcv_spurious_retrans(sk, skb);
}
if (!tcp_is_sack(tp) ||
- tp->compressed_ack >= sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr)
+ tp->compressed_ack >= READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_nr))
goto send_now;
if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
if (tp->srtt_us && tp->srtt_us < rtt)
rtt = tp->srtt_us;
- delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
+ delay = min_t(unsigned long,
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns),
rtt * (NSEC_PER_USEC >> 3)/20);
sock_hold(sk);
hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
- sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns,
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns),
HRTIMER_MODE_REL_PINNED_SOFT);
}
struct tcp_sock *tp = tcp_sk(sk);
u32 ptr = ntohs(th->urg_ptr);
- if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg)
+ if (ptr && !READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_stdurg))
ptr--;
ptr += ntohl(th->seq);
ect = !INET_ECN_is_not_ect(TCP_SKB_CB(skb)->ip_dsfield);
ecn_ok_dst = dst_feature(dst, DST_FEATURE_ECN_MASK);
- ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
+ ecn_ok = READ_ONCE(net->ipv4.sysctl_tcp_ecn) || ecn_ok_dst;
if (((!ect || th->res1) && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
(ecn_ok_dst & DST_FEATURE_ECN_CA) ||
{
struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
const char *msg = "Dropping request";
- bool want_cookie = false;
struct net *net = sock_net(sk);
+ bool want_cookie = false;
+ u8 syncookies;
+
+ syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
#ifdef CONFIG_SYN_COOKIES
- if (net->ipv4.sysctl_tcp_syncookies) {
+ if (syncookies) {
msg = "Sending cookies";
want_cookie = true;
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDOCOOKIES);
#endif
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPREQQFULLDROP);
- if (!queue->synflood_warned &&
- net->ipv4.sysctl_tcp_syncookies != 2 &&
+ if (!queue->synflood_warned && syncookies != 2 &&
xchg(&queue->synflood_warned, 1) == 0)
net_info_ratelimited("%s: Possible SYN flooding on port %d. %s. Check SNMP counters.\n",
proto, sk->sk_num, msg);
struct tcp_sock *tp = tcp_sk(sk);
u16 mss;
- if (sock_net(sk)->ipv4.sysctl_tcp_syncookies != 2 &&
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) != 2 &&
!inet_csk_reqsk_queue_is_full(sk))
return 0;
bool want_cookie = false;
struct dst_entry *dst;
struct flowi fl;
+ u8 syncookies;
+
+ syncookies = READ_ONCE(net->ipv4.sysctl_tcp_syncookies);
/* TW buckets are converted to open requests without
* limitations, they conserve resources and peer is
* evidently real one.
*/
- if ((net->ipv4.sysctl_tcp_syncookies == 2 ||
- inet_csk_reqsk_queue_is_full(sk)) && !isn) {
+ if ((syncookies == 2 || inet_csk_reqsk_queue_is_full(sk)) && !isn) {
want_cookie = tcp_syn_flood_action(sk, rsk_ops->slab_name);
if (!want_cookie)
goto drop;
tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb);
if (!want_cookie && !isn) {
+ int max_syn_backlog = READ_ONCE(net->ipv4.sysctl_max_syn_backlog);
+
/* Kill the following clause, if you dislike this way. */
- if (!net->ipv4.sysctl_tcp_syncookies &&
- (net->ipv4.sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
- (net->ipv4.sysctl_max_syn_backlog >> 2)) &&
+ if (!syncookies &&
+ (max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
+ (max_syn_backlog >> 2)) &&
!tcp_peer_is_proven(req, dst)) {
/* Without syncookies last quarter of
* backlog is filled with destinations,
int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
{
+ int reuse = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tw_reuse);
const struct inet_timewait_sock *tw = inet_twsk(sktw);
const struct tcp_timewait_sock *tcptw = tcp_twsk(sktw);
struct tcp_sock *tp = tcp_sk(sk);
- int reuse = sock_net(sk)->ipv4.sysctl_tcp_tw_reuse;
if (reuse == 2) {
/* Still does not detect *everything* that goes through
if (skb) {
__tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr);
- tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
+ tos = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
(inet_sk(sk)->tos & INET_ECN_MASK) :
inet_sk(sk)->tos;
/* Set ToS of the new socket based upon the value of incoming SYN.
* ECT bits are set later in tcp_init_transfer().
*/
- if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
if (!dst) {
struct sock *nsk;
sk = req->rsk_listener;
- drop_reason = tcp_inbound_md5_hash(sk, skb,
+ if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
+ drop_reason = SKB_DROP_REASON_XFRM_POLICY;
+ else
+ drop_reason = tcp_inbound_md5_hash(sk, skb,
&iph->saddr, &iph->daddr,
AF_INET, dif, sdif);
if (unlikely(drop_reason)) {
}
goto discard_and_relse;
}
+ nf_reset_ct(skb);
if (nsk == sk) {
reqsk_put(req);
tcp_v4_restore_cb(skb);
int m;
sk_dst_confirm(sk);
- if (net->ipv4.sysctl_tcp_nometrics_save || !dst)
+ if (READ_ONCE(net->ipv4.sysctl_tcp_nometrics_save) || !dst)
return;
rcu_read_lock();
if (tcp_in_initial_slowstart(tp)) {
/* Slow start still did not finish. */
- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val && (tcp_snd_cwnd(tp) >> 1) > val)
} else if (!tcp_in_slow_start(tp) &&
icsk->icsk_ca_state == TCP_CA_Open) {
/* Cong. avoidance phase, cwnd is reliable. */
- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH))
tcp_metric_set(tm, TCP_METRIC_SSTHRESH,
max(tcp_snd_cwnd(tp) >> 1, tp->snd_ssthresh));
tcp_metric_set(tm, TCP_METRIC_CWND,
(val + tp->snd_ssthresh) >> 1);
}
- if (!net->ipv4.sysctl_tcp_no_ssthresh_metrics_save &&
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) &&
!tcp_metric_locked(tm, TCP_METRIC_SSTHRESH)) {
val = tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val && tp->snd_ssthresh > val)
if (!tcp_metric_locked(tm, TCP_METRIC_REORDERING)) {
val = tcp_metric_get(tm, TCP_METRIC_REORDERING);
if (val < tp->reordering &&
- tp->reordering != net->ipv4.sysctl_tcp_reordering)
+ tp->reordering !=
+ READ_ONCE(net->ipv4.sysctl_tcp_reordering))
tcp_metric_set(tm, TCP_METRIC_REORDERING,
tp->reordering);
}
if (tcp_metric_locked(tm, TCP_METRIC_CWND))
tp->snd_cwnd_clamp = tcp_metric_get(tm, TCP_METRIC_CWND);
- val = net->ipv4.sysctl_tcp_no_ssthresh_metrics_save ?
+ val = READ_ONCE(net->ipv4.sysctl_tcp_no_ssthresh_metrics_save) ?
0 : tcp_metric_get(tm, TCP_METRIC_SSTHRESH);
if (val) {
tp->snd_ssthresh = val;
* Oh well... nobody has a sufficient solution to this
* protocol bug yet.
*/
- if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) {
+ if (!READ_ONCE(twsk_net(tw)->ipv4.sysctl_tcp_rfc1337)) {
kill:
inet_twsk_deschedule_put(tw);
return TCP_TW_SUCCESS;
if (sk != req->rsk_listener)
__NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMIGRATEREQFAILURE);
- if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) {
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow)) {
inet_rsk(req)->acked = 1;
return NULL;
}
if (tcp_packets_in_flight(tp) == 0)
tcp_ca_event(sk, CA_EVENT_TX_START);
- /* If this is the first data packet sent in response to the
- * previous received data,
- * and it is a reply for ato after last received packet,
- * increase pingpong count.
- */
- if (before(tp->lsndtime, icsk->icsk_ack.lrcvtime) &&
- (u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
- inet_csk_inc_pingpong_cnt(sk);
-
tp->lsndtime = now;
+
+ /* If it is a reply for ato after last received
+ * packet, enter pingpong mode.
+ */
+ if ((u32)(now - icsk->icsk_ack.lrcvtime) < icsk->icsk_ack.ato)
+ inet_csk_enter_pingpong_mode(sk);
}
/* Account for an ACK we sent. */
* which we interpret as a sign the remote TCP is not
* misinterpreting the window field as a signed quantity.
*/
- if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows))
(*rcv_wnd) = min(space, MAX_TCP_WINDOW);
else
(*rcv_wnd) = min_t(u32, space, U16_MAX);
*rcv_wscale = 0;
if (wscale_ok) {
/* Set window scaling on max possible window */
- space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
+ space = max_t(u32, space, READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
space = max_t(u32, space, sysctl_rmem_max);
space = min_t(u32, space, *window_clamp);
*rcv_wscale = clamp_t(int, ilog2(space) - 15,
* scaled window.
*/
if (!tp->rx_opt.rcv_wscale &&
- sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows)
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows))
new_win = min(new_win, MAX_TCP_WINDOW);
else
new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
{
struct tcp_sock *tp = tcp_sk(sk);
bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
- bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
+ bool use_ecn = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn) == 1 ||
tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
if (!use_ecn) {
static void tcp_ecn_clear_syn(struct sock *sk, struct sk_buff *skb)
{
- if (sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_fallback))
/* tp->ecn_flags are cleared at a later point in time when
* SYN ACK is ultimatively being received.
*/
opts->mss = tcp_advertise_mss(sk);
remaining -= TCPOLEN_MSS_ALIGNED;
- if (likely(sock_net(sk)->ipv4.sysctl_tcp_timestamps && !*md5)) {
+ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps) && !*md5)) {
opts->options |= OPTION_TS;
opts->tsval = tcp_skb_timestamp(skb) + tp->tsoffset;
opts->tsecr = tp->rx_opt.ts_recent;
remaining -= TCPOLEN_TSTAMP_ALIGNED;
}
- if (likely(sock_net(sk)->ipv4.sysctl_tcp_window_scaling)) {
+ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling))) {
opts->ws = tp->rx_opt.rcv_wscale;
opts->options |= OPTION_WSCALE;
remaining -= TCPOLEN_WSCALE_ALIGNED;
}
- if (likely(sock_net(sk)->ipv4.sysctl_tcp_sack)) {
+ if (likely(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_sack))) {
opts->options |= OPTION_SACK_ADVERTISE;
if (unlikely(!(OPTION_TS & opts->options)))
remaining -= TCPOLEN_SACKPERM_ALIGNED;
mss_now -= icsk->icsk_ext_hdr_len;
/* Then reserve room for full set of TCP options and 8 bytes of data */
- mss_now = max(mss_now, sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss);
+ mss_now = max(mss_now,
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_snd_mss));
return mss_now;
}
struct inet_connection_sock *icsk = inet_csk(sk);
struct net *net = sock_net(sk);
- icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
+ icsk->icsk_mtup.enabled = READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing) > 1;
icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
icsk->icsk_af_ops->net_header_len;
- icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
+ icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, READ_ONCE(net->ipv4.sysctl_tcp_base_mss));
icsk->icsk_mtup.probe_size = 0;
if (icsk->icsk_mtup.enabled)
icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
if (tp->packets_out > tp->snd_cwnd_used)
tp->snd_cwnd_used = tp->packets_out;
- if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle &&
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle) &&
(s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto &&
!ca_ops->cong_control)
tcp_cwnd_application_limited(sk);
bytes = sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift);
- r = tcp_min_rtt(tcp_sk(sk)) >> sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log;
+ r = tcp_min_rtt(tcp_sk(sk)) >> READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_rtt_log);
if (r < BITS_PER_TYPE(sk->sk_gso_max_size))
bytes += sk->sk_gso_max_size >> r;
min_tso = ca_ops->min_tso_segs ?
ca_ops->min_tso_segs(sk) :
- sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs;
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs);
tso_segs = tcp_tso_autosize(sk, mss_now, min_tso);
return min_t(u32, tso_segs, sk->sk_gso_max_segs);
u32 interval;
s32 delta;
- interval = net->ipv4.sysctl_tcp_probe_interval;
+ interval = READ_ONCE(net->ipv4.sysctl_tcp_probe_interval);
delta = tcp_jiffies32 - icsk->icsk_mtup.probe_timestamp;
if (unlikely(delta >= interval * HZ)) {
int mss = tcp_current_mss(sk);
* probing process by not resetting search range to its orignal.
*/
if (probe_size > tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_high) ||
- interval < net->ipv4.sysctl_tcp_probe_threshold) {
+ interval < READ_ONCE(net->ipv4.sysctl_tcp_probe_threshold)) {
/* Check whether enough time has elaplased for
* another round of probing.
*/
sk->sk_pacing_rate >> READ_ONCE(sk->sk_pacing_shift));
if (sk->sk_pacing_status == SK_PACING_NONE)
limit = min_t(unsigned long, limit,
- sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes));
limit <<= factor;
if (static_branch_unlikely(&tcp_tx_delay_enabled) &&
if (rcu_access_pointer(tp->fastopen_rsk))
return false;
- early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans;
+ early_retrans = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_early_retrans);
/* Schedule a loss probe in 2*RTT for SACK capable connections
* not in loss recovery, that are either limited by cwnd or application.
*/
struct sk_buff *skb = to, *tmp;
bool first = true;
- if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse))
return;
if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)
return;
* See tcp_input.c:tcp_rcv_state_process case TCP_SYN_SENT.
*/
tp->tcp_header_len = sizeof(struct tcphdr);
- if (sock_net(sk)->ipv4.sysctl_tcp_timestamps)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_timestamps))
tp->tcp_header_len += TCPOLEN_TSTAMP_ALIGNED;
#ifdef CONFIG_TCP_MD5SIG
tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0),
&tp->rcv_wnd,
&tp->window_clamp,
- sock_net(sk)->ipv4.sysctl_tcp_window_scaling,
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_window_scaling),
&rcv_wscale,
rcv_wnd);
icsk->icsk_probes_out++;
if (err <= 0) {
- if (icsk->icsk_backoff < net->ipv4.sysctl_tcp_retries2)
+ if (icsk->icsk_backoff < READ_ONCE(net->ipv4.sysctl_tcp_retries2))
icsk->icsk_backoff++;
timeout = tcp_probe0_when(sk, TCP_RTO_MAX);
} else {
return 0;
if (tp->sacked_out >= tp->reordering &&
- !(sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_NO_DUPTHRESH))
+ !(READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+ TCP_RACK_NO_DUPTHRESH))
return 0;
}
{
struct tcp_sock *tp = tcp_sk(sk);
- if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND ||
+ if ((READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_recovery) &
+ TCP_RACK_STATIC_REO_WND) ||
!rs->prior_delivered)
return;
*/
static int tcp_orphan_retries(struct sock *sk, bool alive)
{
- int retries = sock_net(sk)->ipv4.sysctl_tcp_orphan_retries; /* May be zero. */
+ int retries = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_orphan_retries); /* May be zero. */
/* We know from an ICMP that something is wrong. */
if (sk->sk_err_soft && !alive)
int mss;
/* Black hole detection */
- if (!net->ipv4.sysctl_tcp_mtu_probing)
+ if (!READ_ONCE(net->ipv4.sysctl_tcp_mtu_probing))
return;
if (!icsk->icsk_mtup.enabled) {
icsk->icsk_mtup.probe_timestamp = tcp_jiffies32;
} else {
mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
- mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
- mss = max(mss, net->ipv4.sysctl_tcp_mtu_probe_floor);
- mss = max(mss, net->ipv4.sysctl_tcp_min_snd_mss);
+ mss = min(READ_ONCE(net->ipv4.sysctl_tcp_base_mss), mss);
+ mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_mtu_probe_floor));
+ mss = max(mss, READ_ONCE(net->ipv4.sysctl_tcp_min_snd_mss));
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
}
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
if ((1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV)) {
if (icsk->icsk_retransmits)
__dst_negative_advice(sk);
- retry_until = icsk->icsk_syn_retries ? : net->ipv4.sysctl_tcp_syn_retries;
+ retry_until = icsk->icsk_syn_retries ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
expired = icsk->icsk_retransmits >= retry_until;
} else {
- if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1, 0)) {
+ if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1), 0)) {
/* Black hole detection */
tcp_mtu_probing(icsk, sk);
__dst_negative_advice(sk);
}
- retry_until = net->ipv4.sysctl_tcp_retries2;
+ retry_until = READ_ONCE(net->ipv4.sysctl_tcp_retries2);
if (sock_flag(sk, SOCK_DEAD)) {
const bool alive = icsk->icsk_rto < TCP_RTO_MAX;
msecs_to_jiffies(icsk->icsk_user_timeout))
goto abort;
- max_probes = sock_net(sk)->ipv4.sysctl_tcp_retries2;
+ max_probes = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_retries2);
if (sock_flag(sk, SOCK_DEAD)) {
const bool alive = inet_csk_rto_backoff(icsk, TCP_RTO_MAX) < TCP_RTO_MAX;
static void tcp_fastopen_synack_timer(struct sock *sk, struct request_sock *req)
{
struct inet_connection_sock *icsk = inet_csk(sk);
- int max_retries = icsk->icsk_syn_retries ? :
- sock_net(sk)->ipv4.sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */
struct tcp_sock *tp = tcp_sk(sk);
+ int max_retries;
req->rsk_ops->syn_ack_timeout(req);
+ /* add one more retry for fastopen */
+ max_retries = icsk->icsk_syn_retries ? :
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_synack_retries) + 1;
+
if (req->num_timeout >= max_retries) {
tcp_write_err(sk);
return;
* linear-timeout retransmissions into a black hole
*/
if (sk->sk_state == TCP_ESTABLISHED &&
- (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) &&
+ (tp->thin_lto || READ_ONCE(net->ipv4.sysctl_tcp_thin_linear_timeouts)) &&
tcp_stream_is_thin(tp) &&
icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) {
icsk->icsk_backoff = 0;
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
tcp_clamp_rto_to_user_timeout(sk), TCP_RTO_MAX);
- if (retransmits_timed_out(sk, net->ipv4.sysctl_tcp_retries1 + 1, 0))
+ if (retransmits_timed_out(sk, READ_ONCE(net->ipv4.sysctl_tcp_retries1) + 1, 0))
__sk_dst_reset(sk);
out:;
{
xfrm_input_register_afinfo(&xfrm4_input_afinfo);
}
-EXPORT_SYMBOL(xfrm4_protocol_init);
goto out;
}
- if (net->ipv6.devconf_all->disable_policy ||
- idev->cnf.disable_policy)
- f6i->dst_nopolicy = true;
-
neigh_parms_data_state_setall(idev->nd_parms);
ifa->addr = *cfg->pfx;
fillargs->event = RTM_GETMULTICAST;
/* multicast address */
- for (ifmca = rcu_dereference(idev->mc_list);
+ for (ifmca = rtnl_dereference(idev->mc_list);
ifmca;
- ifmca = rcu_dereference(ifmca->next), ip_idx++) {
+ ifmca = rtnl_dereference(ifmca->next), ip_idx++) {
if (ip_idx < s_ip_idx)
continue;
err = inet6_fill_ifmcaddr(skb, ifmca, fillargs);
RCU_INIT_POINTER(inet->mc_list, NULL);
inet->rcv_tos = 0;
- if (net->ipv4.sysctl_ip_no_pmtu_disc)
+ if (READ_ONCE(net->ipv4.sysctl_ip_no_pmtu_disc))
inet->pmtudisc = IP_PMTUDISC_DONT;
else
inet->pmtudisc = IP_PMTUDISC_WANT;
break;
case ICMPV6_EXT_ECHO_REQUEST:
if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
- net->ipv4.sysctl_icmp_echo_enable_probe)
+ READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
icmpv6_echo_reply(skb);
break;
__be16 proto;
__u32 mtu;
int nhoff;
- int thoff;
if (!pskb_inet_may_pull(skb))
goto tx_err;
(ntohs(ip_hdr(skb)->tot_len) > skb->len - nhoff))
truncate = true;
- thoff = skb_transport_header(skb) - skb_mac_header(skb);
- if (skb->protocol == htons(ETH_P_IPV6) &&
- (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff))
- truncate = true;
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ int thoff;
+
+ if (skb_transport_header_was_set(skb))
+ thoff = skb_transport_header(skb) - skb_mac_header(skb);
+ else
+ thoff = nhoff + sizeof(struct ipv6hdr);
+ if (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)
+ truncate = true;
+ }
if (skb_cow_head(skb, dev->needed_headroom ?: t->hlen))
goto tx_err;
#include <net/inet_ecn.h>
#include <net/dst_metadata.h>
-INDIRECT_CALLABLE_DECLARE(void tcp_v6_early_demux(struct sk_buff *));
static void ip6_rcv_finish_core(struct net *net, struct sock *sk,
struct sk_buff *skb)
{
- void (*edemux)(struct sk_buff *skb);
-
- if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) {
- const struct inet6_protocol *ipprot;
-
- ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);
- if (ipprot && (edemux = READ_ONCE(ipprot->early_demux)))
- INDIRECT_CALL_2(edemux, tcp_v6_early_demux,
- udp_v6_early_demux, skb);
+ if (READ_ONCE(net->ipv4.sysctl_ip_early_demux) &&
+ !skb_dst(skb) && !skb->sk) {
+ switch (ipv6_hdr(skb)->nexthdr) {
+ case IPPROTO_TCP:
+ if (READ_ONCE(net->ipv4.sysctl_tcp_early_demux))
+ tcp_v6_early_demux(skb);
+ break;
+ case IPPROTO_UDP:
+ if (READ_ONCE(net->ipv4.sysctl_udp_early_demux))
+ udp_v6_early_demux(skb);
+ break;
+ }
}
+
if (!skb_valid_dst(skb))
ip6_route_input(skb);
}
struct page_frag *pfrag,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
- void *from, int length, int transhdrlen,
+ void *from, size_t length, int transhdrlen,
unsigned int flags, struct ipcm6_cookie *ipc6)
{
struct sk_buff *skb, *skb_prev = NULL;
int ip6_append_data(struct sock *sk,
int getfrag(void *from, char *to, int offset, int len,
int odd, struct sk_buff *skb),
- void *from, int length, int transhdrlen,
+ void *from, size_t length, int transhdrlen,
struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
struct rt6_info *rt, unsigned int flags)
{
struct sk_buff *ip6_make_skb(struct sock *sk,
int getfrag(void *from, char *to, int offset,
int len, int odd, struct sk_buff *skb),
- void *from, int length, int transhdrlen,
+ void *from, size_t length, int transhdrlen,
struct ipcm6_cookie *ipc6, struct rt6_info *rt,
unsigned int flags, struct inet_cork_full *cork)
{
if (++cnt >= MLD_MAX_QUEUE) {
rework = true;
- schedule_delayed_work(&idev->mc_query_work, 0);
break;
}
}
__mld_query_work(skb);
mutex_unlock(&idev->mc_lock);
- if (!rework)
- in6_dev_put(idev);
+ if (rework && queue_delayed_work(mld_wq, &idev->mc_query_work, 0))
+ return;
+
+ in6_dev_put(idev);
}
/* called with rcu_read_lock() */
if (++cnt >= MLD_MAX_QUEUE) {
rework = true;
- schedule_delayed_work(&idev->mc_report_work, 0);
break;
}
}
__mld_report_work(skb);
mutex_unlock(&idev->mc_lock);
- if (!rework)
- in6_dev_put(idev);
+ if (rework && queue_delayed_work(mld_wq, &idev->mc_report_work, 0))
+ return;
+
+ in6_dev_put(idev);
}
static bool is_in(struct ifmcaddr6 *pmc, struct ip6_sf_list *psf, int type,
#include <linux/proc_fs.h>
#include <net/ping.h>
+static void ping_v6_destroy(struct sock *sk)
+{
+ inet6_destroy_sock(sk);
+}
+
/* Compatibility glue so we can support IPv6 when it's compiled as a module */
static int dummy_ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len,
int *addr_len)
.owner = THIS_MODULE,
.init = ping_init_sock,
.close = ping_close,
+ .destroy = ping_v6_destroy,
.connect = ip6_datagram_connect_v6_only,
.disconnect = __udp_disconnect,
.setsockopt = ipv6_setsockopt,
}
f6i = ip6_route_info_create(&cfg, gfp_flags, NULL);
- if (!IS_ERR(f6i))
+ if (!IS_ERR(f6i)) {
f6i->dst_nocount = true;
+
+ if (!anycast &&
+ (net->ipv6.devconf_all->disable_policy ||
+ idev->cnf.disable_policy))
+ f6i->dst_nopolicy = true;
+ }
+
return f6i;
}
if (nexthop_is_blackhole(rt->nh))
rtm->rtm_type = RTN_BLACKHOLE;
- if (net->ipv4.sysctl_nexthop_compat_mode &&
+ if (READ_ONCE(net->ipv4.sysctl_nexthop_compat_mode) &&
rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
goto nla_put_failure;
{
return seg6_hmac_init_algo();
}
-EXPORT_SYMBOL(seg6_hmac_init);
int __net_init seg6_hmac_net_init(struct net *net)
{
return rhashtable_init(&sdata->hmac_infos, &rht_params);
}
-EXPORT_SYMBOL(seg6_hmac_net_init);
void seg6_hmac_exit(void)
{
}
#endif
+ hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+
skb_postpush_rcsum(skb, hdr, tot_len);
return 0;
}
#endif
+ hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+
skb_postpush_rcsum(skb, hdr, sizeof(struct ipv6hdr) + hdrlen);
return 0;
break;
}
- ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
nf_reset_ct(skb);
struct flowi6 fl6;
int dev_flags = 0;
+ memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_iif = skb->dev->ifindex;
fl6.daddr = nhaddr ? *nhaddr : hdr->daddr;
fl6.saddr = hdr->saddr;
if (err)
goto drop;
- ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
seg6_lookup_nexthop(skb, NULL, 0);
if (err)
goto drop;
- ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
seg6_lookup_nexthop(skb, NULL, 0);
kcalloc(cmax, sizeof(*kp), GFP_KERNEL_ACCOUNT | __GFP_NOWARN) :
NULL;
- rcu_read_lock();
-
ca = min(t->prl_count, cmax);
if (!kp) {
}
}
- c = 0;
+ rcu_read_lock();
for_each_prl_rcu(t->prl) {
if (c >= cmax)
break;
if (kprl.addr != htonl(INADDR_ANY))
break;
}
-out:
+
rcu_read_unlock();
len = sizeof(*kp) * c;
ret = -EFAULT;
kfree(kp);
-
+out:
return ret;
}
__u8 rcv_wscale;
u32 tsoff = 0;
- if (!sock_net(sk)->ipv4.sysctl_tcp_syncookies || !th->ack || th->rst)
+ if (!READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_syncookies) ||
+ !th->ack || th->rst)
goto out;
if (tcp_synq_no_recent_overflow(sk))
if (np->repflow && ireq->pktopts)
fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
- tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ?
+ tclass = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) ?
(tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) |
(np->tclass & INET_ECN_MASK) :
np->tclass;
/* Set ToS of the new socket based upon the value of incoming SYN.
* ECT bits are set later in tcp_init_transfer().
*/
- if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos)
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_reflect_tos))
newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK;
/* Clone native IPv6 options from listening socket (if any)
goto discard_it;
}
-INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
+void tcp_v6_early_demux(struct sk_buff *skb)
{
const struct ipv6hdr *hdr;
const struct tcphdr *th;
};
EXPORT_SYMBOL_GPL(tcpv6_prot);
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct inet6_protocol tcpv6_protocol = {
- .early_demux = tcp_v6_early_demux,
- .early_demux_handler = tcp_v6_early_demux,
+static const struct inet6_protocol tcpv6_protocol = {
.handler = tcp_v6_rcv,
.err_handler = tcp_v6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
return NULL;
}
-INDIRECT_CALLABLE_SCOPE void udp_v6_early_demux(struct sk_buff *skb)
+void udp_v6_early_demux(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
const struct udphdr *uh;
return ipv6_getsockopt(sk, level, optname, optval, optlen);
}
-/* thinking of making this const? Don't.
- * early_demux can change based on sysctl.
- */
-static struct inet6_protocol udpv6_protocol = {
- .early_demux = udp_v6_early_demux,
- .early_demux_handler = udp_v6_early_demux,
+static const struct inet6_protocol udpv6_protocol = {
.handler = udpv6_rcv,
.err_handler = udpv6_err,
.flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
struct ipcm6_cookie ipc6;
int addr_len = msg->msg_namelen;
int transhdrlen = 4; /* zero session-id */
- int ulen = len + transhdrlen;
+ int ulen;
int err;
/* Rough check on arithmetic overflow,
* better check is made in ip6_append_data().
*/
- if (len > INT_MAX)
+ if (len > INT_MAX - transhdrlen)
return -EMSGSIZE;
+ ulen = len + transhdrlen;
/* Mirror BSD error message compatibility */
if (msg->msg_flags & MSG_OOB)
void
ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
- u64 color_bitmap)
+ u64 color_bitmap, gfp_t gfp)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
if (sdata->vif.color_change_active || sdata->vif.csa_active)
return;
- cfg80211_obss_color_collision_notify(sdata->dev, color_bitmap);
+ cfg80211_obss_color_collision_notify(sdata->dev, color_bitmap, gfp);
}
EXPORT_SYMBOL_GPL(ieeee80211_obss_color_collision_notify);
struct cfg80211_nan_func *func;
clear_bit(SDATA_STATE_RUNNING, &sdata->state);
+ synchronize_rcu(); /* flush _ieee80211_wake_txqs() */
cancel_scan = rcu_access_pointer(local->scan_sdata) == sdata;
if (cancel_scan)
IEEE80211_HE_OPERATION_BSS_COLOR_MASK);
if (color == bss_conf->he_bss_color.color)
ieeee80211_obss_color_collision_notify(&rx->sdata->vif,
- BIT_ULL(color));
+ BIT_ULL(color),
+ GFP_ATOMIC);
}
}
/*
* If the skb is shared we need to obtain our own copy.
*/
- if (skb_shared(skb)) {
- struct sk_buff *tmp_skb = skb;
-
- /* can't happen -- skb is a clone if info_id != 0 */
- WARN_ON(info_id);
-
- skb = skb_clone(skb, GFP_ATOMIC);
- kfree_skb(tmp_skb);
-
- if (!skb) {
- ret = -ENOMEM;
- goto free;
- }
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (unlikely(!skb)) {
+ ret = -ENOMEM;
+ goto free;
}
hdr.frame_control = fc;
/* after this point (skb is modified) we cannot return false */
- if (skb_shared(skb)) {
- struct sk_buff *tmp_skb = skb;
-
- skb = skb_clone(skb, GFP_ATOMIC);
- kfree_skb(tmp_skb);
-
- if (!skb)
- return true;
- }
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (unlikely(!skb))
+ return true;
if ((hdr->frame_control & cpu_to_le16(IEEE80211_STYPE_QOS_DATA)) &&
ieee80211_amsdu_aggregate(sdata, sta, fast_tx, skb))
struct net_device *dev, struct sta_info *sta,
struct ieee80211_key *key, struct sk_buff *skb)
{
- struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+ struct ieee80211_tx_info *info;
struct ieee80211_local *local = sdata->local;
struct tid_ampdu_tx *tid_tx;
u8 tid;
test_bit(SDATA_STATE_OFFCHANNEL, &sdata->state))
goto out_free;
+ skb = skb_share_check(skb, GFP_ATOMIC);
+ if (unlikely(!skb))
+ return;
+
+ info = IEEE80211_SKB_CB(skb);
memset(info, 0, sizeof(*info));
ieee80211_aggr_check(sdata, sta, skb);
local_bh_disable();
spin_lock(&fq->lock);
+ if (!test_bit(SDATA_STATE_RUNNING, &sdata->state))
+ goto out;
+
if (sdata->vif.type == NL80211_IFTYPE_AP)
ps = &sdata->bss->ps;
bool qos;
/* all mesh/ocb stations are required to support WME */
- if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT ||
- sdata->vif.type == NL80211_IFTYPE_OCB)
+ if (sta && (sdata->vif.type == NL80211_IFTYPE_MESH_POINT ||
+ sdata->vif.type == NL80211_IFTYPE_OCB))
qos = true;
else if (sta)
qos = sta->sta.wme;
opts->suboptions |= OPTION_MPTCP_RST;
opts->reset_transient = subflow->reset_transient;
opts->reset_reason = subflow->reset_reason;
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPRSTTX);
return true;
}
opts->rcvr_key = msk->remote_key;
pr_debug("FASTCLOSE key=%llu", opts->rcvr_key);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSETX);
return true;
}
opts->fail_seq = subflow->map_seq;
pr_debug("MP_FAIL fail_seq=%llu", opts->fail_seq);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX);
return true;
}
mptcp_established_options_mp_fail(sk, &opt_size, remaining, opts)) {
*size += opt_size;
remaining -= opt_size;
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFASTCLOSETX);
}
/* MP_RST can be used with MP_FASTCLOSE and MP_FAIL if there is room */
if (mptcp_established_options_rst(sk, skb, &opt_size, remaining, opts)) {
*size += opt_size;
remaining -= opt_size;
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPRSTTX);
}
return true;
}
goto reset;
subflow->mp_capable = 0;
pr_fallback(msk);
- __mptcp_do_fallback(msk);
+ mptcp_do_fallback(ssk);
return false;
}
if (unlikely(th->syn))
new_win = min(new_win, 65535U) << tp->rx_opt.rcv_wscale;
if (!tp->rx_opt.rcv_wscale &&
- sock_net(ssk)->ipv4.sysctl_tcp_workaround_signed_windows)
+ READ_ONCE(sock_net(ssk)->ipv4.sysctl_tcp_workaround_signed_windows))
new_win = min(new_win, MAX_TCP_WINDOW);
else
new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale));
*ptr++ = mptcp_option(MPTCPOPT_MP_PRIO,
TCPOLEN_MPTCP_PRIO,
opts->backup, TCPOPT_NOP);
+
+ MPTCP_INC_STATS(sock_net((const struct sock *)tp),
+ MPTCP_MIB_MPPRIOTX);
}
mp_capable_done:
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
- struct sock *s = (struct sock *)msk;
pr_debug("fail_seq=%llu", fail_seq);
if (!READ_ONCE(msk->allow_infinite_fallback))
return;
- if (!READ_ONCE(subflow->mp_fail_response_expect)) {
+ if (!subflow->fail_tout) {
pr_debug("send MP_FAIL response and infinite map");
subflow->send_mp_fail = 1;
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPFAILTX);
subflow->send_infinite_map = 1;
- } else if (!sock_flag(sk, SOCK_DEAD)) {
+ tcp_send_ack(sk);
+ } else {
pr_debug("MP_FAIL response received");
-
- sk_stop_timer(s, &s->sk_timer);
+ WRITE_ONCE(subflow->fail_tout, 0);
}
}
}
}
-static int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
- struct mptcp_addr_info *addr,
- u8 bkup)
+int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
+ struct mptcp_addr_info *addr,
+ struct mptcp_addr_info *rem,
+ u8 bkup)
{
struct mptcp_subflow_context *subflow;
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
- struct sock *sk = (struct sock *)msk;
- struct mptcp_addr_info local;
+ struct mptcp_addr_info local, remote;
+ bool slow;
local_address((struct sock_common *)ssk, &local);
if (!mptcp_addresses_equal(&local, addr, addr->port))
continue;
+ if (rem && rem->family != AF_UNSPEC) {
+ remote_address((struct sock_common *)ssk, &remote);
+ if (!mptcp_addresses_equal(&remote, rem, rem->port))
+ continue;
+ }
+
+ slow = lock_sock_fast(ssk);
if (subflow->backup != bkup)
msk->last_snd = NULL;
subflow->backup = bkup;
subflow->send_mp_prio = 1;
subflow->request_bkup = bkup;
- __MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_MPPRIOTX);
- spin_unlock_bh(&msk->pm.lock);
pr_debug("send ack for mp_prio");
- mptcp_subflow_send_ack(ssk);
- spin_lock_bh(&msk->pm.lock);
+ __mptcp_subflow_send_ack(ssk);
+ unlock_sock_fast(ssk, slow);
return 0;
}
removed = true;
__MPTCP_INC_STATS(sock_net(sk), rm_type);
}
- __set_bit(rm_list->ids[i], msk->pm.id_avail_bitmap);
+ if (rm_type == MPTCP_MIB_RMSUBFLOW)
+ __set_bit(rm_list->ids[i], msk->pm.id_avail_bitmap);
if (!removed)
continue;
list.ids[list.nr++] = addr->id;
+ spin_lock_bh(&msk->pm.lock);
mptcp_pm_nl_rm_subflow_received(msk, &list);
mptcp_pm_create_subflow_or_signal_addr(msk);
+ spin_unlock_bh(&msk->pm.lock);
}
static int mptcp_nl_set_flags(struct net *net,
goto next;
lock_sock(sk);
- spin_lock_bh(&msk->pm.lock);
if (changed & MPTCP_PM_ADDR_FLAG_BACKUP)
- ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, bkup);
+ ret = mptcp_pm_nl_mp_prio_send_ack(msk, addr, NULL, bkup);
if (changed & MPTCP_PM_ADDR_FLAG_FULLMESH)
mptcp_pm_nl_fullmesh(msk, addr);
- spin_unlock_bh(&msk->pm.lock);
release_sock(sk);
next:
static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
{
struct mptcp_pm_addr_entry addr = { .addr = { .family = AF_UNSPEC }, }, *entry;
+ struct mptcp_pm_addr_entry remote = { .addr = { .family = AF_UNSPEC }, };
+ struct nlattr *attr_rem = info->attrs[MPTCP_PM_ATTR_ADDR_REMOTE];
+ struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
struct pm_nl_pernet *pernet = genl_info_pm_nl(info);
u8 changed, mask = MPTCP_PM_ADDR_FLAG_BACKUP |
if (ret < 0)
return ret;
+ if (attr_rem) {
+ ret = mptcp_pm_parse_entry(attr_rem, info, false, &remote);
+ if (ret < 0)
+ return ret;
+ }
+
if (addr.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
bkup = 1;
if (addr.addr.family == AF_UNSPEC) {
return -EOPNOTSUPP;
}
+ if (token)
+ return mptcp_userspace_pm_set_flags(sock_net(skb->sk),
+ token, &addr, &remote, bkup);
+
spin_lock_bh(&pernet->lock);
entry = __lookup_addr(pernet, &addr.addr, lookup_by_id);
if (!entry) {
*/
#include "protocol.h"
+#include "mib.h"
void mptcp_free_local_addr_list(struct mptcp_sock *msk)
{
const struct mptcp_addr_info *local,
const struct mptcp_addr_info *remote)
{
- struct sock *sk = &msk->sk.icsk_inet.sk;
struct mptcp_subflow_context *subflow;
- struct sock *found = NULL;
if (local->family != remote->family)
return NULL;
- lock_sock(sk);
-
mptcp_for_each_subflow(msk, subflow) {
const struct inet_sock *issk;
struct sock *ssk;
}
if (issk->inet_sport == local->port &&
- issk->inet_dport == remote->port) {
- found = ssk;
- goto found;
- }
+ issk->inet_dport == remote->port)
+ return ssk;
}
-found:
- release_sock(sk);
-
- return found;
+ return NULL;
}
int mptcp_nl_cmd_sf_destroy(struct sk_buff *skb, struct genl_info *info)
}
sk = &msk->sk.icsk_inet.sk;
+ lock_sock(sk);
ssk = mptcp_nl_find_ssk(msk, &addr_l, &addr_r);
if (ssk) {
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
mptcp_subflow_shutdown(sk, ssk, RCV_SHUTDOWN | SEND_SHUTDOWN);
mptcp_close_ssk(sk, ssk, subflow);
+ MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_RMSUBFLOW);
err = 0;
} else {
err = -ESRCH;
}
+ release_sock(sk);
- destroy_err:
+destroy_err:
sock_put((struct sock *)msk);
return err;
}
+
+int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token,
+ struct mptcp_pm_addr_entry *loc,
+ struct mptcp_pm_addr_entry *rem, u8 bkup)
+{
+ struct mptcp_sock *msk;
+ int ret = -EINVAL;
+ u32 token_val;
+
+ token_val = nla_get_u32(token);
+
+ msk = mptcp_token_get_sock(net, token_val);
+ if (!msk)
+ return ret;
+
+ if (!mptcp_pm_is_userspace(msk))
+ goto set_flags_err;
+
+ if (loc->addr.family == AF_UNSPEC ||
+ rem->addr.family == AF_UNSPEC)
+ goto set_flags_err;
+
+ lock_sock((struct sock *)msk);
+ ret = mptcp_pm_nl_mp_prio_send_ack(msk, &loc->addr, &rem->addr, bkup);
+ release_sock((struct sock *)msk);
+
+set_flags_err:
+ sock_put((struct sock *)msk);
+ return ret;
+}
__mptcp_set_timeout(sk, tout);
}
-static bool tcp_can_send_ack(const struct sock *ssk)
+static inline bool tcp_can_send_ack(const struct sock *ssk)
{
return !((1 << inet_sk_state_load(ssk)) &
(TCPF_SYN_SENT | TCPF_SYN_RECV | TCPF_TIME_WAIT | TCPF_CLOSE | TCPF_LISTEN));
}
+void __mptcp_subflow_send_ack(struct sock *ssk)
+{
+ if (tcp_can_send_ack(ssk))
+ tcp_send_ack(ssk);
+}
+
void mptcp_subflow_send_ack(struct sock *ssk)
{
bool slow;
slow = lock_sock_fast(ssk);
- if (tcp_can_send_ack(ssk))
- tcp_send_ack(ssk);
+ __mptcp_subflow_send_ack(ssk);
unlock_sock_fast(ssk, slow);
}
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPTX);
mptcp_subflow_ctx(ssk)->send_infinite_map = 0;
pr_fallback(msk);
- __mptcp_do_fallback(msk);
+ mptcp_do_fallback(ssk);
}
static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
if (msk->rcvq_space.copied <= msk->rcvq_space.space)
goto new_measure;
- if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf &&
+ if (READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) &&
!(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) {
int rcvmem, rcvbuf;
u64 rcvwin, grow;
do_div(rcvwin, advmss);
rcvbuf = min_t(u64, rcvwin * rcvmem,
- sock_net(sk)->ipv4.sysctl_tcp_rmem[2]);
+ READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[2]));
if (rcvbuf > sk->sk_rcvbuf) {
u32 window_clamp;
sock_put(sk);
}
-static struct mptcp_subflow_context *
-mp_fail_response_expect_subflow(struct mptcp_sock *msk)
-{
- struct mptcp_subflow_context *subflow, *ret = NULL;
-
- mptcp_for_each_subflow(msk, subflow) {
- if (READ_ONCE(subflow->mp_fail_response_expect)) {
- ret = subflow;
- break;
- }
- }
-
- return ret;
-}
-
static void mptcp_timeout_timer(struct timer_list *t)
{
struct sock *sk = from_timer(sk, t, sk_timer);
kfree_rcu(subflow, rcu);
} else {
/* otherwise tcp will dispose of the ssk and subflow ctx */
+ if (ssk->sk_state == TCP_LISTEN) {
+ tcp_set_state(ssk, TCP_CLOSE);
+ mptcp_subflow_queue_clean(ssk);
+ inet_csk_listen_stop(ssk);
+ }
__tcp_close(ssk, 0);
/* close acquired an extra ref */
mptcp_reset_timer(sk);
}
+/* schedule the timeout timer for the relevant event: either close timeout
+ * or mp_fail timeout. The close timeout takes precedence on the mp_fail one
+ */
+void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout)
+{
+ struct sock *sk = (struct sock *)msk;
+ unsigned long timeout, close_timeout;
+
+ if (!fail_tout && !sock_flag(sk, SOCK_DEAD))
+ return;
+
+ close_timeout = inet_csk(sk)->icsk_mtup.probe_timestamp - tcp_jiffies32 + jiffies + TCP_TIMEWAIT_LEN;
+
+ /* the close timeout takes precedence on the fail one, and here at least one of
+ * them is active
+ */
+ timeout = sock_flag(sk, SOCK_DEAD) ? close_timeout : fail_tout;
+
+ sk_reset_timer(sk, &sk->sk_timer, timeout);
+}
+
static void mptcp_mp_fail_no_response(struct mptcp_sock *msk)
{
- struct mptcp_subflow_context *subflow;
- struct sock *ssk;
+ struct sock *ssk = msk->first;
bool slow;
- subflow = mp_fail_response_expect_subflow(msk);
- if (subflow) {
- pr_debug("MP_FAIL doesn't respond, reset the subflow");
+ if (!ssk)
+ return;
- ssk = mptcp_subflow_tcp_sock(subflow);
- slow = lock_sock_fast(ssk);
- mptcp_subflow_reset(ssk);
- unlock_sock_fast(ssk, slow);
- }
+ pr_debug("MP_FAIL doesn't respond, reset the subflow");
+
+ slow = lock_sock_fast(ssk);
+ mptcp_subflow_reset(ssk);
+ WRITE_ONCE(mptcp_subflow_ctx(ssk)->fail_tout, 0);
+ unlock_sock_fast(ssk, slow);
+
+ mptcp_reset_timeout(msk, 0);
}
static void mptcp_worker(struct work_struct *work)
{
struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
struct sock *sk = &msk->sk.icsk_inet.sk;
+ unsigned long fail_tout;
int state;
lock_sock(sk);
if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
__mptcp_retrans(sk);
- mptcp_mp_fail_no_response(msk);
+ fail_tout = msk->first ? READ_ONCE(mptcp_subflow_ctx(msk->first)->fail_tout) : 0;
+ if (fail_tout && time_after(jiffies, fail_tout))
+ mptcp_mp_fail_no_response(msk);
unlock:
release_sock(sk);
mptcp_ca_reset(sk);
sk_sockets_allocated_inc(sk);
- sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1];
- sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1];
+ sk->sk_rcvbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
+ sk->sk_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
return 0;
}
static void mptcp_close(struct sock *sk, long timeout)
{
struct mptcp_subflow_context *subflow;
+ struct mptcp_sock *msk = mptcp_sk(sk);
bool do_cancel_work = false;
lock_sock(sk);
cleanup:
/* orphan all the subflows */
inet_csk(sk)->icsk_mtup.probe_timestamp = tcp_jiffies32;
- mptcp_for_each_subflow(mptcp_sk(sk), subflow) {
+ mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bool slow = lock_sock_fast_nested(ssk);
+ /* since the close timeout takes precedence on the fail one,
+ * cancel the latter
+ */
+ if (ssk == msk->first)
+ subflow->fail_tout = 0;
+
sock_orphan(ssk);
unlock_sock_fast(ssk, slow);
}
sock_hold(sk);
pr_debug("msk=%p state=%d", sk, sk->sk_state);
if (mptcp_sk(sk)->token)
- mptcp_event(MPTCP_EVENT_CLOSED, mptcp_sk(sk), NULL, GFP_KERNEL);
+ mptcp_event(MPTCP_EVENT_CLOSED, msk, NULL, GFP_KERNEL);
if (sk->sk_state == TCP_CLOSE) {
__mptcp_destroy_sock(sk);
do_cancel_work = true;
} else {
- sk_reset_timer(sk, &sk->sk_timer, jiffies + TCP_TIMEWAIT_LEN);
+ mptcp_reset_timeout(msk, 0);
}
release_sock(sk);
if (do_cancel_work)
static int mptcp_disconnect(struct sock *sk, int flags)
{
- struct mptcp_subflow_context *subflow;
+ struct mptcp_subflow_context *subflow, *tmp;
struct mptcp_sock *msk = mptcp_sk(sk);
inet_sk_state_store(sk, TCP_CLOSE);
- mptcp_for_each_subflow(msk, subflow) {
+ list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
__mptcp_close_ssk(sk, ssk, subflow, MPTCP_CF_FASTCLOSE);
u32 setsockopt_seq;
char ca_name[TCP_CA_NAME_MAX];
+ struct mptcp_sock *dl_next;
};
#define mptcp_data_lock(sk) spin_lock_bh(&(sk)->sk_lock.slock)
local_id_valid : 1, /* local_id is correctly initialized */
valid_csum_seen : 1; /* at least one csum validated */
enum mptcp_data_avail data_avail;
- bool mp_fail_response_expect;
u32 remote_nonce;
u64 thmac;
u32 local_nonce;
u8 stale_count;
long delegated_status;
+ unsigned long fail_tout;
);
void mptcp_subflow_shutdown(struct sock *sk, struct sock *ssk, int how);
void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow);
+void __mptcp_subflow_send_ack(struct sock *ssk);
void mptcp_subflow_send_ack(struct sock *ssk);
void mptcp_subflow_reset(struct sock *ssk);
+void mptcp_subflow_queue_clean(struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
void mptcp_finish_connect(struct sock *sk);
void __mptcp_set_connected(struct sock *sk);
+void mptcp_reset_timeout(struct mptcp_sock *msk, unsigned long fail_tout);
static inline bool mptcp_is_fully_established(struct sock *sk)
{
return inet_sk_state_load(sk) == TCP_ESTABLISHED &&
const struct mptcp_rm_list *rm_list);
void mptcp_pm_mp_prio_received(struct sock *sk, u8 bkup);
void mptcp_pm_mp_fail_received(struct sock *sk, u64 fail_seq);
+int mptcp_pm_nl_mp_prio_send_ack(struct mptcp_sock *msk,
+ struct mptcp_addr_info *addr,
+ struct mptcp_addr_info *rem,
+ u8 bkup);
bool mptcp_pm_alloc_anno_list(struct mptcp_sock *msk,
const struct mptcp_pm_addr_entry *entry);
void mptcp_pm_free_anno_list(struct mptcp_sock *msk);
int mptcp_userspace_pm_get_flags_and_ifindex_by_id(struct mptcp_sock *msk,
unsigned int id,
u8 *flags, int *ifindex);
-
+int mptcp_userspace_pm_set_flags(struct net *net, struct nlattr *token,
+ struct mptcp_pm_addr_entry *loc,
+ struct mptcp_pm_addr_entry *rem, u8 bkup);
int mptcp_pm_announce_addr(struct mptcp_sock *msk,
const struct mptcp_addr_info *addr,
bool echo);
set_bit(MPTCP_FALLBACK_DONE, &msk->flags);
}
-static inline void mptcp_do_fallback(struct sock *sk)
+static inline void mptcp_do_fallback(struct sock *ssk)
{
- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
- struct mptcp_sock *msk = mptcp_sk(subflow->conn);
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ struct sock *sk = subflow->conn;
+ struct mptcp_sock *msk;
+ msk = mptcp_sk(sk);
__mptcp_do_fallback(msk);
+ if (READ_ONCE(msk->snd_data_fin_enable) && !(ssk->sk_shutdown & SEND_SHUTDOWN)) {
+ gfp_t saved_allocation = ssk->sk_allocation;
+
+ /* we are in a atomic (BH) scope, override ssk default for data
+ * fin allocation
+ */
+ ssk->sk_allocation = GFP_ATOMIC;
+ ssk->sk_shutdown |= SEND_SHUTDOWN;
+ tcp_shutdown(ssk, SEND_SHUTDOWN);
+ ssk->sk_allocation = saved_allocation;
+ }
}
#define pr_fallback(a) pr_debug("%s:fallback to TCP (msk=%p)", __func__, a)
MAPPING_INVALID,
MAPPING_EMPTY,
MAPPING_DATA_FIN,
- MAPPING_DUMMY
+ MAPPING_DUMMY,
+ MAPPING_BAD_CSUM
};
static void dbg_bad_map(struct mptcp_subflow_context *subflow, u32 ssn)
subflow->map_data_csum);
if (unlikely(csum)) {
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_DATACSUMERR);
- if (subflow->mp_join || subflow->valid_csum_seen) {
- subflow->send_mp_fail = 1;
- MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPFAILTX);
- }
- return subflow->mp_join ? MAPPING_INVALID : MAPPING_DUMMY;
+ return MAPPING_BAD_CSUM;
}
subflow->valid_csum_seen = 1;
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
bool csum_reqd = READ_ONCE(msk->csum_enabled);
- struct sock *sk = (struct sock *)msk;
struct mptcp_ext *mpext;
struct sk_buff *skb;
u16 data_len;
pr_debug("infinite mapping received");
MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_INFINITEMAPRX);
subflow->map_data_len = 0;
- if (!sock_flag(ssk, SOCK_DEAD))
- sk_stop_timer(sk, &sk->sk_timer);
-
return MAPPING_INVALID;
}
return !subflow->fully_established;
}
+static void mptcp_subflow_fail(struct mptcp_sock *msk, struct sock *ssk)
+{
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+ unsigned long fail_tout;
+
+ /* greceful failure can happen only on the MPC subflow */
+ if (WARN_ON_ONCE(ssk != READ_ONCE(msk->first)))
+ return;
+
+ /* since the close timeout take precedence on the fail one,
+ * no need to start the latter when the first is already set
+ */
+ if (sock_flag((struct sock *)msk, SOCK_DEAD))
+ return;
+
+ /* we don't need extreme accuracy here, use a zero fail_tout as special
+ * value meaning no fail timeout at all;
+ */
+ fail_tout = jiffies + TCP_RTO_MAX;
+ if (!fail_tout)
+ fail_tout = 1;
+ WRITE_ONCE(subflow->fail_tout, fail_tout);
+ tcp_send_ack(ssk);
+
+ mptcp_reset_timeout(msk, subflow->fail_tout);
+}
+
static bool subflow_check_data_avail(struct sock *ssk)
{
struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
status = get_mapping_status(ssk, msk);
trace_subflow_check_data_avail(status, skb_peek(&ssk->sk_receive_queue));
- if (unlikely(status == MAPPING_INVALID))
- goto fallback;
-
- if (unlikely(status == MAPPING_DUMMY))
+ if (unlikely(status == MAPPING_INVALID || status == MAPPING_DUMMY ||
+ status == MAPPING_BAD_CSUM))
goto fallback;
if (status != MAPPING_OK)
fallback:
if (!__mptcp_check_fallback(msk)) {
/* RFC 8684 section 3.7. */
- if (subflow->send_mp_fail) {
+ if (status == MAPPING_BAD_CSUM &&
+ (subflow->mp_join || subflow->valid_csum_seen)) {
+ subflow->send_mp_fail = 1;
+
if (!READ_ONCE(msk->allow_infinite_fallback)) {
- ssk->sk_err = EBADMSG;
- tcp_set_state(ssk, TCP_CLOSE);
subflow->reset_transient = 0;
subflow->reset_reason = MPTCP_RST_EMIDDLEBOX;
- tcp_send_active_reset(ssk, GFP_ATOMIC);
- while ((skb = skb_peek(&ssk->sk_receive_queue)))
- sk_eat_skb(ssk, skb);
- } else if (!sock_flag(ssk, SOCK_DEAD)) {
- WRITE_ONCE(subflow->mp_fail_response_expect, true);
- sk_reset_timer((struct sock *)msk,
- &((struct sock *)msk)->sk_timer,
- jiffies + TCP_RTO_MAX);
+ goto reset;
}
- WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
+ mptcp_subflow_fail(msk, ssk);
+ WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_DATA_AVAIL);
return true;
}
/* fatal protocol error, close the socket.
* subflow_error_report() will introduce the appropriate barriers
*/
- ssk->sk_err = EBADMSG;
- tcp_set_state(ssk, TCP_CLOSE);
subflow->reset_transient = 0;
subflow->reset_reason = MPTCP_RST_EMPTCP;
+
+reset:
+ ssk->sk_err = EBADMSG;
+ tcp_set_state(ssk, TCP_CLOSE);
+ while ((skb = skb_peek(&ssk->sk_receive_queue)))
+ sk_eat_skb(ssk, skb);
tcp_send_active_reset(ssk, GFP_ATOMIC);
WRITE_ONCE(subflow->data_avail, MPTCP_SUBFLOW_NODATA);
return false;
}
- __mptcp_do_fallback(msk);
+ mptcp_do_fallback(ssk);
}
skb = skb_peek(&ssk->sk_receive_queue);
mptcp_sock_graft(ssk, sk->sk_socket);
iput(SOCK_INODE(sf));
WRITE_ONCE(msk->allow_infinite_fallback, false);
- return err;
+ return 0;
failed_unlink:
list_del(&subflow->node);
}
}
+void mptcp_subflow_queue_clean(struct sock *listener_ssk)
+{
+ struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
+ struct mptcp_sock *msk, *next, *head = NULL;
+ struct request_sock *req;
+
+ /* build a list of all unaccepted mptcp sockets */
+ spin_lock_bh(&queue->rskq_lock);
+ for (req = queue->rskq_accept_head; req; req = req->dl_next) {
+ struct mptcp_subflow_context *subflow;
+ struct sock *ssk = req->sk;
+ struct mptcp_sock *msk;
+
+ if (!sk_is_mptcp(ssk))
+ continue;
+
+ subflow = mptcp_subflow_ctx(ssk);
+ if (!subflow || !subflow->conn)
+ continue;
+
+ /* skip if already in list */
+ msk = mptcp_sk(subflow->conn);
+ if (msk->dl_next || msk == head)
+ continue;
+
+ msk->dl_next = head;
+ head = msk;
+ }
+ spin_unlock_bh(&queue->rskq_lock);
+ if (!head)
+ return;
+
+ /* can't acquire the msk socket lock under the subflow one,
+ * or will cause ABBA deadlock
+ */
+ release_sock(listener_ssk);
+
+ for (msk = head; msk; msk = next) {
+ struct sock *sk = (struct sock *)msk;
+ bool slow;
+
+ slow = lock_sock_fast_nested(sk);
+ next = msk->dl_next;
+ msk->first = NULL;
+ msk->dl_next = NULL;
+ unlock_sock_fast(sk, slow);
+ }
+
+ /* we are still under the listener msk socket lock */
+ lock_sock_nested(listener_ssk, SINGLE_DEPTH_NESTING);
+}
+
static int subflow_ulp_init(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
pdev = to_platform_device(dev->dev.parent);
if (pdev) {
np = pdev->dev.of_node;
- if (np && of_get_property(np, "mlx,multi-host", NULL))
+ if (np && (of_get_property(np, "mellanox,multi-host", NULL) ||
+ of_get_property(np, "mlx,multi-host", NULL)))
ndp->mlx_multi_host = true;
}
if (!refcount_inc_not_zero(&ct->ct_general.use))
return;
+ /* load ->status after refcount increase */
+ smp_acquire__after_ctrl_dep();
+
if (nf_ct_should_gc(ct))
nf_ct_kill(ct);
*/
ct = nf_ct_tuplehash_to_ctrack(h);
if (likely(refcount_inc_not_zero(&ct->ct_general.use))) {
+ /* re-check key after refcount */
+ smp_acquire__after_ctrl_dep();
+
if (likely(nf_ct_key_equal(h, tuple, zone, net)))
goto found;
if (!refcount_inc_not_zero(&tmp->ct_general.use))
continue;
+ /* load ->ct_net and ->status after refcount increase */
+ smp_acquire__after_ctrl_dep();
+
/* kill only if still in same netns -- might have moved due to
* SLAB_TYPESAFE_BY_RCU rules.
*
if (!refcount_inc_not_zero(&tmp->ct_general.use))
continue;
+ /* load ->status after refcount increase */
+ smp_acquire__after_ctrl_dep();
+
if (gc_worker_skip_ct(tmp)) {
nf_ct_put(tmp);
continue;
if (!exp)
__nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC);
+ /* Other CPU might have obtained a pointer to this object before it was
+ * released. Because refcount is 0, refcount_inc_not_zero() will fail.
+ *
+ * After refcount_set(1) it will succeed; ensure that zeroing of
+ * ct->status and the correct ct->net pointer are visible; else other
+ * core might observe CONFIRMED bit which means the entry is valid and
+ * in the hash table, but its not (anymore).
+ */
+ smp_wmb();
+
/* Now it is going to be associated with an sk_buff, set refcount to 1. */
refcount_set(&ct->ct_general.use, 1);
hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
if (nf_ct_is_expired(ct)) {
+ /* need to defer nf_ct_kill() until lock is released */
if (i < ARRAY_SIZE(nf_ct_evict) &&
refcount_inc_not_zero(&ct->ct_general.use))
nf_ct_evict[i++] = ct;
if (unlikely(!refcount_inc_not_zero(&ct->ct_general.use)))
return 0;
+ /* load ->status after refcount increase */
+ smp_acquire__after_ctrl_dep();
+
if (nf_ct_should_gc(ct)) {
nf_ct_kill(ct);
goto release;
#include <net/netfilter/nf_tables_offload.h>
#include <net/netfilter/nf_dup_netdev.h>
-static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev)
+#define NF_RECURSION_LIMIT 2
+
+static DEFINE_PER_CPU(u8, nf_dup_skb_recursion);
+
+static void nf_do_netdev_egress(struct sk_buff *skb, struct net_device *dev,
+ enum nf_dev_hooks hook)
{
- if (skb_mac_header_was_set(skb))
+ if (__this_cpu_read(nf_dup_skb_recursion) > NF_RECURSION_LIMIT)
+ goto err;
+
+ if (hook == NF_NETDEV_INGRESS && skb_mac_header_was_set(skb)) {
+ if (skb_cow_head(skb, skb->mac_len))
+ goto err;
+
skb_push(skb, skb->mac_len);
+ }
skb->dev = dev;
skb_clear_tstamp(skb);
+ __this_cpu_inc(nf_dup_skb_recursion);
dev_queue_xmit(skb);
+ __this_cpu_dec(nf_dup_skb_recursion);
+ return;
+err:
+ kfree_skb(skb);
}
void nf_fwd_netdev_egress(const struct nft_pktinfo *pkt, int oif)
return;
}
- nf_do_netdev_egress(pkt->skb, dev);
+ nf_do_netdev_egress(pkt->skb, dev, nft_hook(pkt));
}
EXPORT_SYMBOL_GPL(nf_fwd_netdev_egress);
skb = skb_clone(pkt->skb, GFP_ATOMIC);
if (skb)
- nf_do_netdev_egress(skb, dev);
+ nf_do_netdev_egress(skb, dev, nft_hook(pkt));
}
EXPORT_SYMBOL_GPL(nf_dup_netdev_egress);
unsigned int logflags;
struct arphdr _arph;
- ah = skb_header_pointer(skb, 0, sizeof(_arph), &_arph);
+ ah = skb_header_pointer(skb, nhoff, sizeof(_arph), &_arph);
if (!ah) {
nf_log_buf_add(m, "TRUNCATED");
return;
ah->ar_pln != sizeof(__be32))
return;
- ap = skb_header_pointer(skb, sizeof(_arph), sizeof(_arpp), &_arpp);
+ ap = skb_header_pointer(skb, nhoff + sizeof(_arph), sizeof(_arpp), &_arpp);
if (!ap) {
nf_log_buf_add(m, " INCOMPLETE [%zu bytes]",
skb->len - sizeof(_arph));
nf_log_dump_packet_common(m, pf, hooknum, skb, in, out, loginfo,
prefix);
- dump_arp_packet(m, loginfo, skb, 0);
+ dump_arp_packet(m, loginfo, skb, skb_network_offset(skb));
nf_log_buf_close(m);
}
if (in)
dump_mac_header(m, loginfo, skb);
- dump_ipv4_packet(net, m, loginfo, skb, 0);
+ dump_ipv4_packet(net, m, loginfo, skb, skb_network_offset(skb));
nf_log_buf_close(m);
}
iph->tos = 0;
iph->id = 0;
iph->frag_off = htons(IP_DF);
- iph->ttl = net->ipv4.sysctl_ip_default_ttl;
+ iph->ttl = READ_ONCE(net->ipv4.sysctl_ip_default_ttl);
iph->protocol = IPPROTO_TCP;
iph->check = 0;
iph->saddr = saddr;
if (msg_type == NFT_MSG_NEWFLOWTABLE)
nft_activate_next(ctx->net, flowtable);
+ INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans));
nft_trans_flowtable(trans) = flowtable;
nft_trans_commit_list_add_tail(ctx->net, trans);
goto err_hook_dev;
}
hook->ops.dev = dev;
- hook->inactive = false;
return hook;
chain->flags |= NFT_CHAIN_BASE | flags;
basechain->policy = NF_ACCEPT;
if (chain->flags & NFT_CHAIN_HW_OFFLOAD &&
- nft_chain_offload_priority(basechain) < 0)
+ !nft_chain_offload_support(basechain))
return -EOPNOTSUPP;
flow_block_init(&basechain->flow_block);
if (err < 0)
return err;
}
+
+ cond_resched();
}
return 0;
struct nft_data *data,
struct nlattr *attr)
{
+ u32 dtype;
int err;
err = nft_data_init(ctx, data, NFT_DATA_VALUE_MAXLEN, desc, attr);
if (err < 0)
return err;
- if (desc->type != NFT_DATA_VERDICT && desc->len != set->dlen) {
+ if (set->dtype == NFT_DATA_VERDICT)
+ dtype = NFT_DATA_VERDICT;
+ else
+ dtype = NFT_DATA_VALUE;
+
+ if (dtype != desc->type ||
+ set->dlen != desc->len) {
nft_data_release(data, desc->type);
return -EINVAL;
}
if (!nla[NFTA_SET_ELEM_KEY] && !(flags & NFT_SET_ELEM_CATCHALL))
return -EINVAL;
- if (flags != 0)
- nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
+ if (flags != 0) {
+ err = nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
+ if (err < 0)
+ return err;
+ }
if (set->flags & NFT_SET_MAP) {
if (nla[NFTA_SET_ELEM_DATA] == NULL &&
if (err < 0)
goto err_set_elem_expr;
- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
+ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
+ if (err < 0)
+ goto err_parse_key;
}
if (nla[NFTA_SET_ELEM_KEY_END]) {
if (err < 0)
goto err_parse_key;
- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen);
+ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen);
+ if (err < 0)
+ goto err_parse_key_end;
}
if (timeout > 0) {
- nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION);
- if (timeout != set->timeout)
- nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
+ err = nft_set_ext_add(&tmpl, NFT_SET_EXT_EXPIRATION);
+ if (err < 0)
+ goto err_parse_key_end;
+
+ if (timeout != set->timeout) {
+ err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
+ if (err < 0)
+ goto err_parse_key_end;
+ }
}
if (num_exprs) {
for (i = 0; i < num_exprs; i++)
size += expr_array[i]->ops->size;
- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPRESSIONS,
- sizeof(struct nft_set_elem_expr) +
- size);
+ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_EXPRESSIONS,
+ sizeof(struct nft_set_elem_expr) + size);
+ if (err < 0)
+ goto err_parse_key_end;
}
if (nla[NFTA_SET_ELEM_OBJREF] != NULL) {
err = PTR_ERR(obj);
goto err_parse_key_end;
}
- nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF);
+ err = nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF);
+ if (err < 0)
+ goto err_parse_key_end;
}
if (nla[NFTA_SET_ELEM_DATA] != NULL) {
NFT_VALIDATE_NEED);
}
- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, desc.len);
+ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_DATA, desc.len);
+ if (err < 0)
+ goto err_parse_data;
}
/* The full maximum length of userdata can exceed the maximum
ulen = 0;
if (nla[NFTA_SET_ELEM_USERDATA] != NULL) {
ulen = nla_len(nla[NFTA_SET_ELEM_USERDATA]);
- if (ulen > 0)
- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA,
- ulen);
+ if (ulen > 0) {
+ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_USERDATA,
+ ulen);
+ if (err < 0)
+ goto err_parse_data;
+ }
}
err = -ENOMEM;
nft_set_ext_prepare(&tmpl);
- if (flags != 0)
- nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
+ if (flags != 0) {
+ err = nft_set_ext_add(&tmpl, NFT_SET_EXT_FLAGS);
+ if (err < 0)
+ return err;
+ }
if (nla[NFTA_SET_ELEM_KEY]) {
err = nft_setelem_parse_key(ctx, set, &elem.key.val,
if (err < 0)
return err;
- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
+ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY, set->klen);
+ if (err < 0)
+ goto fail_elem;
}
if (nla[NFTA_SET_ELEM_KEY_END]) {
err = nft_setelem_parse_key(ctx, set, &elem.key_end.val,
nla[NFTA_SET_ELEM_KEY_END]);
if (err < 0)
- return err;
+ goto fail_elem;
- nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen);
+ err = nft_set_ext_add_length(&tmpl, NFT_SET_EXT_KEY_END, set->klen);
+ if (err < 0)
+ goto fail_elem_key_end;
}
err = -ENOMEM;
elem.key_end.val.data, NULL, 0, 0,
GFP_KERNEL_ACCOUNT);
if (elem.priv == NULL)
- goto fail_elem;
+ goto fail_elem_key_end;
ext = nft_set_elem_ext(set, elem.priv);
if (flags)
kfree(trans);
fail_trans:
kfree(elem.priv);
+fail_elem_key_end:
+ nft_data_release(&elem.key_end.val, NFT_DATA_VALUE);
fail_elem:
nft_data_release(&elem.key.val, NFT_DATA_VALUE);
return err;
nf_unregister_net_hook(net, &hook->ops);
if (release_netdev) {
list_del(&hook->list);
- kfree_rcu(hook);
+ kfree_rcu(hook, rcu);
}
}
}
if (nla[NFTA_FLOWTABLE_FLAGS]) {
flags = ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS]));
- if (flags & ~NFT_FLOWTABLE_MASK)
- return -EOPNOTSUPP;
+ if (flags & ~NFT_FLOWTABLE_MASK) {
+ err = -EOPNOTSUPP;
+ goto err_flowtable_update_hook;
+ }
if ((flowtable->data.flags & NFT_FLOWTABLE_HW_OFFLOAD) ^
- (flags & NFT_FLOWTABLE_HW_OFFLOAD))
- return -EOPNOTSUPP;
+ (flags & NFT_FLOWTABLE_HW_OFFLOAD)) {
+ err = -EOPNOTSUPP;
+ goto err_flowtable_update_hook;
+ }
} else {
flags = flowtable->data.flags;
}
{
const struct nlattr * const *nla = ctx->nla;
struct nft_flowtable_hook flowtable_hook;
+ LIST_HEAD(flowtable_del_list);
struct nft_hook *this, *hook;
struct nft_trans *trans;
int err;
err = -ENOENT;
goto err_flowtable_del_hook;
}
- hook->inactive = true;
+ list_move(&hook->list, &flowtable_del_list);
}
trans = nft_trans_alloc(ctx, NFT_MSG_DELFLOWTABLE,
nft_trans_flowtable(trans) = flowtable;
nft_trans_flowtable_update(trans) = true;
INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans));
+ list_splice(&flowtable_del_list, &nft_trans_flowtable_hooks(trans));
nft_flowtable_hook_release(&flowtable_hook);
nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
err_flowtable_del_hook:
- list_for_each_entry(this, &flowtable_hook.list, list) {
- hook = nft_hook_list_find(&flowtable->hook_list, this);
- if (!hook)
- break;
-
- hook->inactive = false;
- }
+ list_splice(&flowtable_del_list, &flowtable->hook_list);
nft_flowtable_hook_release(&flowtable_hook);
return err;
nf_tables_chain_destroy(&trans->ctx);
break;
case NFT_MSG_DELRULE:
+ if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ nft_flow_rule_destroy(nft_trans_flow_rule(trans));
+
nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
break;
case NFT_MSG_DELSET:
list_del_rcu(&chain->list);
}
-static void nft_flowtable_hooks_del(struct nft_flowtable *flowtable,
- struct list_head *hook_list)
-{
- struct nft_hook *hook, *next;
-
- list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
- if (hook->inactive)
- list_move(&hook->list, hook_list);
- }
-}
-
static void nf_tables_module_autoload_cleanup(struct net *net)
{
struct nftables_pernet *nft_net = nft_pernet(net);
nf_tables_rule_notify(&trans->ctx,
nft_trans_rule(trans),
NFT_MSG_NEWRULE);
+ if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
+ nft_flow_rule_destroy(nft_trans_flow_rule(trans));
+
nft_trans_destroy(trans);
break;
case NFT_MSG_DELRULE:
break;
case NFT_MSG_DELFLOWTABLE:
if (nft_trans_flowtable_update(trans)) {
- nft_flowtable_hooks_del(nft_trans_flowtable(trans),
- &nft_trans_flowtable_hooks(trans));
nf_tables_flowtable_notify(&trans->ctx,
nft_trans_flowtable(trans),
&nft_trans_flowtable_hooks(trans),
struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_trans *trans, *next;
struct nft_trans_elem *te;
- struct nft_hook *hook;
if (action == NFNL_ABORT_VALIDATE &&
nf_tables_validate(net) < 0)
break;
case NFT_MSG_DELFLOWTABLE:
if (nft_trans_flowtable_update(trans)) {
- list_for_each_entry(hook, &nft_trans_flowtable(trans)->hook_list, list)
- hook->inactive = false;
+ list_splice(&nft_trans_flowtable_hooks(trans),
+ &nft_trans_flowtable(trans)->hook_list);
} else {
trans->ctx.table->use++;
nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
break;
}
}
+
+ cond_resched();
}
list_for_each_entry(set, &ctx->table->sets, list) {
+ cond_resched();
+
if (!nft_is_active_next(ctx->net, set))
continue;
if (!(set->flags & NFT_SET_MAP) ||
const struct nft_chain *chain,
enum nft_trace_types type)
{
- const struct nft_pktinfo *pkt = info->pkt;
-
- if (!info->trace || !pkt->skb->nf_trace)
+ if (!info->trace || !info->nf_trace)
return;
info->chain = chain;
enum nft_trace_types type)
{
if (static_branch_unlikely(&nft_trace_enabled)) {
+ const struct nft_pktinfo *pkt = info->pkt;
+
+ info->nf_trace = pkt->skb->nf_trace;
info->rule = rule;
__nft_trace_packet(info, chain, type);
}
}
+static inline void nft_trace_copy_nftrace(struct nft_traceinfo *info)
+{
+ if (static_branch_unlikely(&nft_trace_enabled)) {
+ const struct nft_pktinfo *pkt = info->pkt;
+
+ if (info->trace)
+ info->nf_trace = pkt->skb->nf_trace;
+ }
+}
+
static void nft_bitwise_fast_eval(const struct nft_expr *expr,
struct nft_regs *regs)
{
const struct nft_chain *chain,
const struct nft_regs *regs)
{
+ const struct nft_pktinfo *pkt = info->pkt;
enum nft_trace_types type;
switch (regs->verdict.code) {
case NFT_RETURN:
type = NFT_TRACETYPE_RETURN;
break;
+ case NF_STOLEN:
+ type = NFT_TRACETYPE_RULE;
+ /* can't access skb->nf_trace; use copy */
+ break;
default:
type = NFT_TRACETYPE_RULE;
+ info->nf_trace = pkt->skb->nf_trace;
break;
}
switch (regs.verdict.code) {
case NFT_BREAK:
regs.verdict.code = NFT_CONTINUE;
+ nft_trace_copy_nftrace(&info);
continue;
case NFT_CONTINUE:
nft_trace_packet(&info, chain, rule,
return 0;
}
-int nft_chain_offload_priority(struct nft_base_chain *basechain)
+static int nft_chain_offload_priority(const struct nft_base_chain *basechain)
{
if (basechain->ops.priority <= 0 ||
basechain->ops.priority > USHRT_MAX)
return 0;
}
+bool nft_chain_offload_support(const struct nft_base_chain *basechain)
+{
+ struct net_device *dev;
+ struct nft_hook *hook;
+
+ if (nft_chain_offload_priority(basechain) < 0)
+ return false;
+
+ list_for_each_entry(hook, &basechain->hook_list, list) {
+ if (hook->ops.pf != NFPROTO_NETDEV ||
+ hook->ops.hooknum != NF_NETDEV_INGRESS)
+ return false;
+
+ dev = hook->ops.dev;
+ if (!dev->netdev_ops->ndo_setup_tc && !flow_indr_dev_exists())
+ return false;
+ }
+
+ return true;
+}
+
static void nft_flow_cls_offload_setup(struct flow_cls_offload *cls_flow,
const struct nft_base_chain *basechain,
const struct nft_rule *rule,
#include <linux/module.h>
#include <linux/static_key.h>
#include <linux/hash.h>
-#include <linux/jhash.h>
+#include <linux/siphash.h>
#include <linux/if_vlan.h>
#include <linux/init.h>
#include <linux/skbuff.h>
DEFINE_STATIC_KEY_FALSE(nft_trace_enabled);
EXPORT_SYMBOL_GPL(nft_trace_enabled);
-static int trace_fill_id(struct sk_buff *nlskb, struct sk_buff *skb)
-{
- __be32 id;
-
- /* using skb address as ID results in a limited number of
- * values (and quick reuse).
- *
- * So we attempt to use as many skb members that will not
- * change while skb is with netfilter.
- */
- id = (__be32)jhash_2words(hash32_ptr(skb), skb_get_hash(skb),
- skb->skb_iif);
-
- return nla_put_be32(nlskb, NFTA_TRACE_ID, id);
-}
-
static int trace_fill_header(struct sk_buff *nlskb, u16 type,
const struct sk_buff *skb,
int off, unsigned int len)
struct nlmsghdr *nlh;
struct sk_buff *skb;
unsigned int size;
+ u32 mark = 0;
u16 event;
if (!nfnetlink_has_listeners(nft_net(pkt), NFNLGRP_NFTRACE))
if (nla_put_be32(skb, NFTA_TRACE_TYPE, htonl(info->type)))
goto nla_put_failure;
- if (trace_fill_id(skb, pkt->skb))
+ if (nla_put_u32(skb, NFTA_TRACE_ID, info->skbid))
goto nla_put_failure;
if (nla_put_string(skb, NFTA_TRACE_CHAIN, info->chain->name))
case NFT_TRACETYPE_RULE:
if (nft_verdict_dump(skb, NFTA_TRACE_VERDICT, info->verdict))
goto nla_put_failure;
+
+ /* pkt->skb undefined iff NF_STOLEN, disable dump */
+ if (info->verdict->code == NF_STOLEN)
+ info->packet_dumped = true;
+ else
+ mark = pkt->skb->mark;
+
break;
case NFT_TRACETYPE_POLICY:
+ mark = pkt->skb->mark;
+
if (nla_put_be32(skb, NFTA_TRACE_POLICY,
htonl(info->basechain->policy)))
goto nla_put_failure;
break;
}
- if (pkt->skb->mark &&
- nla_put_be32(skb, NFTA_TRACE_MARK, htonl(pkt->skb->mark)))
+ if (mark && nla_put_be32(skb, NFTA_TRACE_MARK, htonl(mark)))
goto nla_put_failure;
if (!info->packet_dumped) {
const struct nft_verdict *verdict,
const struct nft_chain *chain)
{
+ static siphash_key_t trace_key __read_mostly;
+ struct sk_buff *skb = pkt->skb;
+
info->basechain = nft_base_chain(chain);
info->trace = true;
+ info->nf_trace = pkt->skb->nf_trace;
info->packet_dumped = false;
info->pkt = pkt;
info->verdict = verdict;
+
+ net_get_random_once(&trace_key, sizeof(trace_key));
+
+ info->skbid = (u32)siphash_3u32(hash32_ptr(skb),
+ skb_get_hash(skb),
+ skb->skb_iif,
+ &trace_key);
}
nf_ct_untimeout(net, NULL);
- list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_freelist, head) {
+ list_for_each_entry_safe(cur, tmp, &pernet->nfct_timeout_freelist, free_head) {
list_del(&cur->free_head);
if (refcount_dec_and_test(&cur->refcnt))
}
static int
-nfqnl_mangle(void *data, int data_len, struct nf_queue_entry *e, int diff)
+nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int diff)
{
struct sk_buff *nskb;
if (diff < 0) {
+ unsigned int min_len = skb_transport_offset(e->skb);
+
+ if (data_len < min_len)
+ return -EINVAL;
+
if (pskb_trim(e->skb, data_len))
return -ENOMEM;
} else if (diff > 0) {
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/random.h>
#include <linux/smp.h>
#include <linux/static_key.h>
#include <net/dst.h>
#define NFT_META_SECS_PER_DAY 86400
#define NFT_META_DAYS_PER_WEEK 7
-static DEFINE_PER_CPU(struct rnd_state, nft_prandom_state);
-
static u8 nft_meta_weekday(void)
{
time64_t secs = ktime_get_real_seconds();
return true;
}
-static noinline u32 nft_prandom_u32(void)
-{
- struct rnd_state *state = this_cpu_ptr(&nft_prandom_state);
-
- return prandom_u32_state(state);
-}
-
#ifdef CONFIG_IP_ROUTE_CLASSID
static noinline bool
nft_meta_get_eval_rtclassid(const struct sk_buff *skb, u32 *dest)
break;
#endif
case NFT_META_PRANDOM:
- *dest = nft_prandom_u32();
+ *dest = get_random_u32();
break;
#ifdef CONFIG_XFRM
case NFT_META_SECPATH:
len = IFNAMSIZ;
break;
case NFT_META_PRANDOM:
- prandom_init_once(&nft_prandom_state);
len = sizeof(u32);
break;
#ifdef CONFIG_XFRM
{
const struct nft_nat *priv = nft_expr_priv(expr);
- if (priv->family == nft_pf(pkt))
+ if (priv->family == nft_pf(pkt) ||
+ priv->family == NFPROTO_INET)
nft_nat_eval(expr, regs, pkt);
}
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
+#include <linux/random.h>
#include <linux/static_key.h>
#include <net/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
-static DEFINE_PER_CPU(struct rnd_state, nft_numgen_prandom_state);
-
struct nft_ng_inc {
u8 dreg;
u32 modulus;
u32 offset;
};
-static u32 nft_ng_random_gen(struct nft_ng_random *priv)
+static u32 nft_ng_random_gen(const struct nft_ng_random *priv)
{
- struct rnd_state *state = this_cpu_ptr(&nft_numgen_prandom_state);
-
- return reciprocal_scale(prandom_u32_state(state), priv->modulus) +
- priv->offset;
+ return reciprocal_scale(get_random_u32(), priv->modulus) + priv->offset;
}
static void nft_ng_random_eval(const struct nft_expr *expr,
if (priv->offset + priv->modulus - 1 < priv->offset)
return -EOVERFLOW;
- prandom_init_once(&nft_numgen_prandom_state);
-
return nft_parse_register_store(ctx, tb[NFTA_NG_DREG], &priv->dreg,
NULL, NFT_DATA_VALUE, sizeof(u32));
}
regs->verdict.code = ret;
}
+static int nft_queue_validate(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nft_data **data)
+{
+ static const unsigned int supported_hooks = ((1 << NF_INET_PRE_ROUTING) |
+ (1 << NF_INET_LOCAL_IN) |
+ (1 << NF_INET_FORWARD) |
+ (1 << NF_INET_LOCAL_OUT) |
+ (1 << NF_INET_POST_ROUTING));
+
+ switch (ctx->family) {
+ case NFPROTO_IPV4:
+ case NFPROTO_IPV6:
+ case NFPROTO_INET:
+ case NFPROTO_BRIDGE:
+ break;
+ case NFPROTO_NETDEV: /* lacks okfn */
+ fallthrough;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ return nft_chain_validate_hooks(ctx->chain, supported_hooks);
+}
+
static const struct nla_policy nft_queue_policy[NFTA_QUEUE_MAX + 1] = {
[NFTA_QUEUE_NUM] = { .type = NLA_U16 },
[NFTA_QUEUE_TOTAL] = { .type = NLA_U16 },
.eval = nft_queue_eval,
.init = nft_queue_init,
.dump = nft_queue_dump,
+ .validate = nft_queue_validate,
.reduce = NFT_REDUCE_READONLY,
};
.eval = nft_queue_sreg_eval,
.init = nft_queue_sreg_init,
.dump = nft_queue_sreg_dump,
+ .validate = nft_queue_validate,
.reduce = NFT_REDUCE_READONLY,
};
/* Another cpu may race to insert the element with the same key */
if (prev) {
nft_set_elem_destroy(set, he, true);
+ atomic_dec(&set->nelems);
he = prev;
}
err2:
nft_set_elem_destroy(set, he, true);
+ atomic_dec(&set->nelems);
err1:
return false;
}
return err;
}
+/**
+ * nft_set_pipapo_match_destroy() - Destroy elements from key mapping array
+ * @set: nftables API set representation
+ * @m: matching data pointing to key mapping array
+ */
+static void nft_set_pipapo_match_destroy(const struct nft_set *set,
+ struct nft_pipapo_match *m)
+{
+ struct nft_pipapo_field *f;
+ int i, r;
+
+ for (i = 0, f = m->f; i < m->field_count - 1; i++, f++)
+ ;
+
+ for (r = 0; r < f->rules; r++) {
+ struct nft_pipapo_elem *e;
+
+ if (r < f->rules - 1 && f->mt[r + 1].e == f->mt[r].e)
+ continue;
+
+ e = f->mt[r].e;
+
+ nft_set_elem_destroy(set, e, true);
+ }
+}
+
/**
* nft_pipapo_destroy() - Free private data for set and all committed elements
* @set: nftables API set representation
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m;
- struct nft_pipapo_field *f;
- int i, r, cpu;
+ int cpu;
m = rcu_dereference_protected(priv->match, true);
if (m) {
rcu_barrier();
- for (i = 0, f = m->f; i < m->field_count - 1; i++, f++)
- ;
-
- for (r = 0; r < f->rules; r++) {
- struct nft_pipapo_elem *e;
-
- if (r < f->rules - 1 && f->mt[r + 1].e == f->mt[r].e)
- continue;
-
- e = f->mt[r].e;
-
- nft_set_elem_destroy(set, e, true);
- }
+ nft_set_pipapo_match_destroy(set, m);
#ifdef NFT_PIPAPO_ALIGN
free_percpu(m->scratch_aligned);
}
if (priv->clone) {
+ m = priv->clone;
+
+ if (priv->dirty)
+ nft_set_pipapo_match_destroy(set, m);
+
#ifdef NFT_PIPAPO_ALIGN
free_percpu(priv->clone->scratch_aligned);
#endif
update_ip_l4_checksum(skb, nh, *addr, new_addr);
csum_replace4(&nh->check, *addr, new_addr);
skb_clear_hash(skb);
+ ovs_ct_clear(skb, NULL);
*addr = new_addr;
}
update_ipv6_checksum(skb, l4_proto, addr, new_addr);
skb_clear_hash(skb);
+ ovs_ct_clear(skb, NULL);
memcpy(addr, new_addr, sizeof(__be32[4]));
}
static void set_tp_port(struct sk_buff *skb, __be16 *port,
__be16 new_port, __sum16 *check)
{
+ ovs_ct_clear(skb, NULL);
inet_proto_csum_replace2(check, skb, *port, new_port, false);
*port = new_port;
}
uh->dest = dst;
flow_key->tp.src = src;
flow_key->tp.dst = dst;
+ ovs_ct_clear(skb, NULL);
}
skb_clear_hash(skb);
sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
skb_clear_hash(skb);
+ ovs_ct_clear(skb, NULL);
+
flow_key->tp.src = sh->source;
flow_key->tp.dst = sh->dest;
nf_ct_put(ct);
nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
- ovs_ct_fill_key(skb, key, false);
+
+ if (key)
+ ovs_ct_fill_key(skb, key, false);
return 0;
}
if (flags & IP6_FH_F_FRAG) {
if (frag_off) {
key->ip.frag = OVS_FRAG_TYPE_LATER;
- key->ip.proto = nexthdr;
+ key->ip.proto = NEXTHDR_FRAGMENT;
return 0;
}
key->ip.frag = OVS_FRAG_TYPE_FIRST;
{
struct rose_neigh *s;
- rose_stop_ftimer(rose_neigh);
- rose_stop_t0timer(rose_neigh);
+ del_timer_sync(&rose_neigh->ftimer);
+ del_timer_sync(&rose_neigh->t0timer);
skb_queue_purge(&rose_neigh->queue);
void rose_start_heartbeat(struct sock *sk)
{
- del_timer(&sk->sk_timer);
+ sk_stop_timer(sk, &sk->sk_timer);
sk->sk_timer.function = rose_heartbeat_expiry;
sk->sk_timer.expires = jiffies + 5 * HZ;
- add_timer(&sk->sk_timer);
+ sk_reset_timer(sk, &sk->sk_timer, sk->sk_timer.expires);
}
void rose_start_t1timer(struct sock *sk)
{
struct rose_sock *rose = rose_sk(sk);
- del_timer(&rose->timer);
+ sk_stop_timer(sk, &rose->timer);
rose->timer.function = rose_timer_expiry;
rose->timer.expires = jiffies + rose->t1;
- add_timer(&rose->timer);
+ sk_reset_timer(sk, &rose->timer, rose->timer.expires);
}
void rose_start_t2timer(struct sock *sk)
{
struct rose_sock *rose = rose_sk(sk);
- del_timer(&rose->timer);
+ sk_stop_timer(sk, &rose->timer);
rose->timer.function = rose_timer_expiry;
rose->timer.expires = jiffies + rose->t2;
- add_timer(&rose->timer);
+ sk_reset_timer(sk, &rose->timer, rose->timer.expires);
}
void rose_start_t3timer(struct sock *sk)
{
struct rose_sock *rose = rose_sk(sk);
- del_timer(&rose->timer);
+ sk_stop_timer(sk, &rose->timer);
rose->timer.function = rose_timer_expiry;
rose->timer.expires = jiffies + rose->t3;
- add_timer(&rose->timer);
+ sk_reset_timer(sk, &rose->timer, rose->timer.expires);
}
void rose_start_hbtimer(struct sock *sk)
{
struct rose_sock *rose = rose_sk(sk);
- del_timer(&rose->timer);
+ sk_stop_timer(sk, &rose->timer);
rose->timer.function = rose_timer_expiry;
rose->timer.expires = jiffies + rose->hb;
- add_timer(&rose->timer);
+ sk_reset_timer(sk, &rose->timer, rose->timer.expires);
}
void rose_start_idletimer(struct sock *sk)
{
struct rose_sock *rose = rose_sk(sk);
- del_timer(&rose->idletimer);
+ sk_stop_timer(sk, &rose->idletimer);
if (rose->idle > 0) {
rose->idletimer.function = rose_idletimer_expiry;
rose->idletimer.expires = jiffies + rose->idle;
- add_timer(&rose->idletimer);
+ sk_reset_timer(sk, &rose->idletimer, rose->idletimer.expires);
}
}
void rose_stop_heartbeat(struct sock *sk)
{
- del_timer(&sk->sk_timer);
+ sk_stop_timer(sk, &sk->sk_timer);
}
void rose_stop_timer(struct sock *sk)
{
- del_timer(&rose_sk(sk)->timer);
+ sk_stop_timer(sk, &rose_sk(sk)->timer);
}
void rose_stop_idletimer(struct sock *sk)
{
- del_timer(&rose_sk(sk)->idletimer);
+ sk_stop_timer(sk, &rose_sk(sk)->idletimer);
}
static void rose_heartbeat_expiry(struct timer_list *t)
(sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) {
bh_unlock_sock(sk);
rose_destroy_socket(sk);
+ sock_put(sk);
return;
}
break;
rose_start_heartbeat(sk);
bh_unlock_sock(sk);
+ sock_put(sk);
}
static void rose_timer_expiry(struct timer_list *t)
break;
}
bh_unlock_sock(sk);
+ sock_put(sk);
}
static void rose_idletimer_expiry(struct timer_list *t)
sock_set_flag(sk, SOCK_DEAD);
}
bh_unlock_sock(sk);
+ sock_put(sk);
}
}
static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
- const struct tc_action_ops *ops)
+ const struct tc_action_ops *ops,
+ struct netlink_ext_ack *extack)
{
struct nlattr *nest;
int n_i = 0;
if (nla_put_string(skb, TCA_KIND, ops->kind))
goto nla_put_failure;
+ ret = 0;
mutex_lock(&idrinfo->lock);
idr_for_each_entry_ul(idr, p, tmp, id) {
if (IS_ERR(p))
continue;
ret = tcf_idr_release_unsafe(p);
- if (ret == ACT_P_DELETED) {
+ if (ret == ACT_P_DELETED)
module_put(ops->owner);
- n_i++;
- } else if (ret < 0) {
- mutex_unlock(&idrinfo->lock);
- goto nla_put_failure;
- }
+ else if (ret < 0)
+ break;
+ n_i++;
}
mutex_unlock(&idrinfo->lock);
+ if (ret < 0) {
+ if (n_i)
+ NL_SET_ERR_MSG(extack, "Unable to flush all TC actions");
+ else
+ goto nla_put_failure;
+ }
ret = nla_put_u32(skb, TCA_FCNT, n_i);
if (ret)
struct tcf_idrinfo *idrinfo = tn->idrinfo;
if (type == RTM_DELACTION) {
- return tcf_del_walker(idrinfo, skb, ops);
+ return tcf_del_walker(idrinfo, skb, ops, extack);
} else if (type == RTM_GETACTION) {
return tcf_dump_walker(idrinfo, skb, cb);
} else {
act_id = FLOW_ACTION_JUMP;
*extval = tc_act & TC_ACT_EXT_VAL_MASK;
} else if (tc_act == TC_ACT_UNSPEC) {
- NL_SET_ERR_MSG_MOD(extack, "Offload not supported when conform/exceed action is \"continue\"");
+ act_id = FLOW_ACTION_CONTINUE;
} else {
NL_SET_ERR_MSG_MOD(extack, "Unsupported conform/exceed action offload");
}
struct tc_action *actions[],
struct netlink_ext_ack *extack)
{
- int i, j, index, err = 0;
+ int i, j, k, index, err = 0;
struct tc_action *act;
BUILD_BUG_ON(TCA_ACT_HW_STATS_ANY != FLOW_ACTION_HW_STATS_ANY);
if (err)
goto err_out_locked;
- entry->hw_stats = tc_act_hw_stats(act->hw_stats);
- entry->hw_index = act->tcfa_index;
index = 0;
err = tc_setup_offload_act(act, entry, &index, extack);
- if (!err)
- j += index;
- else
+ if (err)
goto err_out_locked;
+
+ for (k = 0; k < index ; k++) {
+ entry[k].hw_stats = tc_act_hw_stats(act->hw_stats);
+ entry[k].hw_index = act->tcfa_index;
+ }
+
+ j += index;
+
spin_unlock_bh(&act->tcfa_lock);
}
struct tc_netem_rate rate;
struct tc_netem_slot slot;
- qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency),
+ qopt.latency = min_t(psched_time_t, PSCHED_NS2TICKS(q->latency),
UINT_MAX);
- qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter),
+ qopt.jitter = min_t(psched_time_t, PSCHED_NS2TICKS(q->jitter),
UINT_MAX);
qopt.limit = q->limit;
qopt.loss = q->loss;
if (!sctp_ulpq_init(&asoc->ulpq, asoc))
goto fail_init;
- if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams,
- 0, gfp))
- goto fail_init;
+ if (sctp_stream_init(&asoc->stream, asoc->c.sinit_num_ostreams, 0, gfp))
+ goto stream_free;
/* Initialize default path MTU. */
asoc->pathmtu = sp->pathmtu;
if (addr->v4.sin_addr.s_addr != htonl(INADDR_ANY) &&
ret != RTN_LOCAL &&
!sp->inet.freebind &&
- !net->ipv4.sysctl_ip_nonlocal_bind)
+ !READ_ONCE(net->ipv4.sysctl_ip_nonlocal_bind))
return 0;
if (ipv6_only_sock(sctp_opt2sk(sp)))
ret = sctp_stream_alloc_out(stream, outcnt, gfp);
if (ret)
- goto out_err;
+ return ret;
for (i = 0; i < stream->outcnt; i++)
SCTP_SO(stream, i)->state = SCTP_STREAM_OPEN;
handle_in:
sctp_stream_interleave_init(stream);
if (!incnt)
- goto out;
-
- ret = sctp_stream_alloc_in(stream, incnt, gfp);
- if (ret)
- goto in_err;
-
- goto out;
+ return 0;
-in_err:
- sched->free(stream);
- genradix_free(&stream->in);
-out_err:
- genradix_free(&stream->out);
- stream->outcnt = 0;
-out:
- return ret;
+ return sctp_stream_alloc_in(stream, incnt, gfp);
}
int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid)
if (!SCTP_SO(&asoc->stream, i)->ext)
continue;
- ret = n->init_sid(&asoc->stream, i, GFP_KERNEL);
+ ret = n->init_sid(&asoc->stream, i, GFP_ATOMIC);
if (ret)
goto err;
}
init_waitqueue_head(&lgr->llc_flow_waiter);
init_waitqueue_head(&lgr->llc_msg_waiter);
mutex_init(&lgr->llc_conf_mutex);
- lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time;
+ lgr->llc_testlink_time = READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
}
/* called after lgr was removed from lgr_list */
int __sys_recvfrom(int fd, void __user *ubuf, size_t size, unsigned int flags,
struct sockaddr __user *addr, int __user *addr_len)
{
+ struct sockaddr_storage address;
+ struct msghdr msg = {
+ /* Save some cycles and don't copy the address if not needed */
+ .msg_name = addr ? (struct sockaddr *)&address : NULL,
+ };
struct socket *sock;
struct iovec iov;
- struct msghdr msg;
- struct sockaddr_storage address;
int err, err2;
int fput_needed;
if (!sock)
goto out;
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- /* Save some cycles and don't copy the address if not needed */
- msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
- /* We assume all kernel code knows the size of sockaddr_storage */
- msg.msg_namelen = 0;
- msg.msg_iocb = NULL;
- msg.msg_flags = 0;
if (sock->file->f_flags & O_NONBLOCK)
flags |= MSG_DONTWAIT;
err = sock_recvmsg(sock, &msg, flags);
return -EFAULT;
kmsg->msg_control_is_user = true;
+ kmsg->msg_get_inq = 0;
kmsg->msg_control_user = msg.msg_control;
kmsg->msg_controllen = msg.msg_controllen;
kmsg->msg_flags = msg.msg_flags;
new->cl_discrtry = clnt->cl_discrtry;
new->cl_chatty = clnt->cl_chatty;
new->cl_principal = clnt->cl_principal;
+ new->cl_max_connect = clnt->cl_max_connect;
return new;
out_err:
EXPORT_SYMBOL_GPL(xdr_init_encode);
/**
- * xdr_commit_encode - Ensure all data is written to buffer
+ * __xdr_commit_encode - Ensure all data is written to buffer
* @xdr: pointer to xdr_stream
*
* We handle encoding across page boundaries by giving the caller a
* required at the end of encoding, or any other time when the xdr_buf
* data might be read.
*/
-inline void xdr_commit_encode(struct xdr_stream *xdr)
+void __xdr_commit_encode(struct xdr_stream *xdr)
{
- int shift = xdr->scratch.iov_len;
+ size_t shift = xdr->scratch.iov_len;
void *page;
- if (shift == 0)
- return;
page = page_address(*xdr->page_ptr);
memcpy(xdr->scratch.iov_base, page, shift);
memmove(page, page + shift, (void *)xdr->p - page);
xdr_reset_scratch_buffer(xdr);
}
-EXPORT_SYMBOL_GPL(xdr_commit_encode);
+EXPORT_SYMBOL_GPL(__xdr_commit_encode);
-static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
- size_t nbytes)
+/*
+ * The buffer space to be reserved crosses the boundary between
+ * xdr->buf->head and xdr->buf->pages, or between two pages
+ * in xdr->buf->pages.
+ */
+static noinline __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
+ size_t nbytes)
{
- __be32 *p;
int space_left;
int frag1bytes, frag2bytes;
+ void *p;
if (nbytes > PAGE_SIZE)
goto out_overflow; /* Bigger buffers require special handling */
xdr->buf->page_len += frag1bytes;
xdr->page_ptr++;
xdr->iov = NULL;
+
/*
* If the last encode didn't end exactly on a page boundary, the
* next one will straddle boundaries. Encode into the next
* space at the end of the previous buffer:
*/
xdr_set_scratch_buffer(xdr, xdr->p, frag1bytes);
- p = page_address(*xdr->page_ptr);
+
/*
- * Note this is where the next encode will start after we've
- * shifted this one back:
+ * xdr->p is where the next encode will start after
+ * xdr_commit_encode() has shifted this one back:
*/
- xdr->p = (void *)p + frag2bytes;
+ p = page_address(*xdr->page_ptr);
+ xdr->p = p + frag2bytes;
space_left = xdr->buf->buflen - xdr->buf->len;
- xdr->end = (void *)p + min_t(int, space_left, PAGE_SIZE);
+ if (space_left - frag1bytes >= PAGE_SIZE)
+ xdr->end = p + PAGE_SIZE;
+ else
+ xdr->end = p + space_left - frag1bytes;
+
xdr->buf->page_len += frag2bytes;
xdr->buf->len += nbytes;
return p;
unsigned int write_len;
u64 offset;
- seg = &info->wi_chunk->ch_segments[info->wi_seg_no];
- if (!seg)
+ if (info->wi_seg_no >= info->wi_chunk->ch_segcount)
goto out_overflow;
+ seg = &info->wi_chunk->ch_segments[info->wi_seg_no];
write_len = min(remaining, seg->rs_length - info->wi_seg_off);
if (!write_len)
goto out_overflow;
struct tipc_net *tn = tipc_net(net);
tipc_detach_loopback(net);
+ tipc_net_stop(net);
/* Make sure the tipc_net_finalize_work() finished */
cancel_work_sync(&tn->work);
- tipc_net_stop(net);
-
tipc_bcast_stop(net);
tipc_nametbl_stop(net);
tipc_sk_rht_destroy(net);
bool preliminary)
{
struct tipc_net *tn = net_generic(net, tipc_net_id);
+ struct tipc_link *l, *snd_l = tipc_bc_sndlink(net);
struct tipc_node *n, *temp_node;
- struct tipc_link *l;
unsigned long intv;
int bearer_id;
int i;
goto exit;
/* A preliminary node becomes "real" now, refresh its data */
tipc_node_write_lock(n);
+ if (!tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX,
+ tipc_link_min_win(snd_l), tipc_link_max_win(snd_l),
+ n->capabilities, &n->bc_entry.inputq1,
+ &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) {
+ pr_warn("Broadcast rcv link refresh failed, no memory\n");
+ tipc_node_write_unlock_fast(n);
+ tipc_node_put(n);
+ n = NULL;
+ goto exit;
+ }
n->preliminary = false;
n->addr = addr;
hlist_del_rcu(&n->hash);
n->signature = INVALID_NODE_SIG;
n->active_links[0] = INVALID_BEARER_ID;
n->active_links[1] = INVALID_BEARER_ID;
- n->bc_entry.link = NULL;
+ if (!preliminary &&
+ !tipc_link_bc_create(net, tipc_own_addr(net), addr, peer_id, U16_MAX,
+ tipc_link_min_win(snd_l), tipc_link_max_win(snd_l),
+ n->capabilities, &n->bc_entry.inputq1,
+ &n->bc_entry.namedq, snd_l, &n->bc_entry.link)) {
+ pr_warn("Broadcast rcv link creation failed, no memory\n");
+ kfree(n);
+ n = NULL;
+ goto exit;
+ }
tipc_node_get(n);
timer_setup(&n->timer, tipc_node_timeout, 0);
/* Start a slow timer anyway, crypto needs it */
bool *respond, bool *dupl_addr)
{
struct tipc_node *n;
- struct tipc_link *l, *snd_l;
+ struct tipc_link *l;
struct tipc_link_entry *le;
bool addr_match = false;
bool sign_match = false;
return;
tipc_node_write_lock(n);
- if (unlikely(!n->bc_entry.link)) {
- snd_l = tipc_bc_sndlink(net);
- if (!tipc_link_bc_create(net, tipc_own_addr(net),
- addr, peer_id, U16_MAX,
- tipc_link_min_win(snd_l),
- tipc_link_max_win(snd_l),
- n->capabilities,
- &n->bc_entry.inputq1,
- &n->bc_entry.namedq, snd_l,
- &n->bc_entry.link)) {
- pr_warn("Broadcast rcv link creation failed, no mem\n");
- tipc_node_write_unlock_fast(n);
- tipc_node_put(n);
- return;
- }
- }
le = &n->links[b->identity];
sock_init_data(sock, sk);
tipc_set_sk_state(sk, TIPC_OPEN);
if (tipc_sk_insert(tsk)) {
+ sk_free(sk);
pr_warn("Socket create failed; port number exhausted\n");
return -EINVAL;
}
timer_setup(&sk->sk_timer, tipc_sk_timeout, 0);
sk->sk_shutdown = 0;
sk->sk_backlog_rcv = tipc_sk_backlog_rcv;
- sk->sk_rcvbuf = sysctl_tipc_rmem[1];
+ sk->sk_rcvbuf = READ_ONCE(sysctl_tipc_rmem[1]);
sk->sk_data_ready = tipc_data_ready;
sk->sk_write_space = tipc_write_space;
sk->sk_destruct = tipc_sock_destruct;
unsigned long flags;
spin_lock_irqsave(&tls_device_lock, flags);
+ if (unlikely(!refcount_dec_and_test(&ctx->refcount)))
+ goto unlock;
+
list_move_tail(&ctx->list, &tls_device_gc_list);
/* schedule_work inside the spinlock
* to make sure tls_device_down waits for that work.
*/
schedule_work(&tls_device_gc_work);
-
+unlock:
spin_unlock_irqrestore(&tls_device_lock, flags);
}
clean_acked_data_disable(inet_csk(sk));
}
- if (refcount_dec_and_test(&tls_ctx->refcount))
- tls_device_queue_ctx_destruction(tls_ctx);
+ tls_device_queue_ctx_destruction(tls_ctx);
}
EXPORT_SYMBOL_GPL(tls_device_sk_destruct);
* by tls_device_free_ctx. rx_conf and tx_conf stay in TLS_HW.
* Now release the ref taken above.
*/
- if (refcount_dec_and_test(&ctx->refcount))
+ if (refcount_dec_and_test(&ctx->refcount)) {
+ /* sk_destruct ran after tls_device_down took a ref, and
+ * it returned early. Complete the destruction here.
+ */
+ list_del(&ctx->list);
tls_device_free_ctx(ctx);
+ }
}
up_write(&device_offload_lock);
.notifier_call = tls_dev_event,
};
-void __init tls_device_init(void)
+int __init tls_device_init(void)
{
- register_netdevice_notifier(&tls_dev_notifier);
+ return register_netdevice_notifier(&tls_dev_notifier);
}
void __exit tls_device_cleanup(void)
rc = do_tls_getsockopt_conf(sk, optval, optlen,
optname == TLS_TX);
break;
- case TLS_TX_ZEROCOPY_SENDFILE:
+ case TLS_TX_ZEROCOPY_RO:
rc = do_tls_getsockopt_tx_zc(sk, optval, optlen);
break;
default:
optname == TLS_TX);
release_sock(sk);
break;
- case TLS_TX_ZEROCOPY_SENDFILE:
+ case TLS_TX_ZEROCOPY_RO:
lock_sock(sk);
rc = do_tls_setsockopt_tx_zc(sk, optval, optlen);
release_sock(sk);
{
struct tls_context *ctx;
+ WARN_ON_ONCE(sk->sk_prot == p);
+
ctx = tls_get_ctx(sk);
if (likely(ctx)) {
ctx->sk_write_space = write_space;
goto nla_failure;
if (ctx->tx_conf == TLS_HW && ctx->zerocopy_sendfile) {
- err = nla_put_flag(skb, TLS_INFO_ZC_SENDFILE);
+ err = nla_put_flag(skb, TLS_INFO_ZC_RO_TX);
if (err)
goto nla_failure;
}
nla_total_size(sizeof(u16)) + /* TLS_INFO_CIPHER */
nla_total_size(sizeof(u16)) + /* TLS_INFO_RXCONF */
nla_total_size(sizeof(u16)) + /* TLS_INFO_TXCONF */
- nla_total_size(0) + /* TLS_INFO_ZC_SENDFILE */
+ nla_total_size(0) + /* TLS_INFO_ZC_RO_TX */
0;
return size;
if (err)
return err;
- tls_device_init();
+ err = tls_device_init();
+ if (err) {
+ unregister_pernet_subsys(&tls_proc_ops);
+ return err;
+ }
+
tcp_register_ulp(&tcp_tls_ulp_ops);
return 0;
}
darg->async = false;
- if (ret == -EBADMSG)
- TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR);
-
return ret;
}
}
err = decrypt_internal(sk, skb, dest, NULL, darg);
- if (err < 0)
+ if (err < 0) {
+ if (err == -EBADMSG)
+ TLS_INC_STATS(sock_net(sk), LINUX_MIB_TLSDECRYPTERROR);
return err;
+ }
if (darg->async)
goto decrypt_next;
* -ECONNREFUSED. Otherwise, if we haven't queued any skbs
* to other and its full, we will hang waiting for POLLOUT.
*/
- if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
+ if (unix_recvq_full_lockless(other) && !sock_flag(other, SOCK_DEAD))
return 1;
if (connected)
{
ASSERT_WDEV_LOCK(wdev);
- if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION))
+ if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION &&
+ wdev->iftype != NL80211_IFTYPE_P2P_CLIENT))
return;
if (WARN_ON(!wdev->current_bss) ||
goto out;
}
- nb_pkts = xskq_cons_peek_desc_batch(xs->tx, pool, max_entries);
+ max_entries = xskq_cons_nb_entries(xs->tx, max_entries);
+ nb_pkts = xskq_cons_read_desc_batch(xs->tx, pool, max_entries);
if (!nb_pkts) {
xs->tx->queue_empty_descs++;
goto out;
if (!nb_pkts)
goto out;
- xskq_cons_release_n(xs->tx, nb_pkts);
+ xskq_cons_release_n(xs->tx, max_entries);
__xskq_cons_release(xs->tx);
xs->sk.sk_write_space(&xs->sk);
goto out;
}
- skb = xsk_build_skb(xs, &desc);
- if (IS_ERR(skb)) {
- err = PTR_ERR(skb);
- goto out;
- }
-
/* This is the backpressure mechanism for the Tx path.
* Reserve space in the completion queue and only proceed
* if there is space in it. This avoids having to implement
spin_lock_irqsave(&xs->pool->cq_lock, flags);
if (xskq_prod_reserve(xs->pool->cq)) {
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
- kfree_skb(skb);
goto out;
}
spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+ skb = xsk_build_skb(xs, &desc);
+ if (IS_ERR(skb)) {
+ err = PTR_ERR(skb);
+ spin_lock_irqsave(&xs->pool->cq_lock, flags);
+ xskq_prod_cancel(xs->pool->cq);
+ spin_unlock_irqrestore(&xs->pool->cq_lock, flags);
+ goto out;
+ }
+
err = __dev_direct_xmit(skb, xs->queue_id);
if (err == NETDEV_TX_BUSY) {
/* Tell user-space to retry the send */
for (i = 0; i < dma_map->dma_pages_cnt; i++) {
dma = &dma_map->dma_pages[i];
if (*dma) {
+ *dma &= ~XSK_NEXT_PG_CONTIG_MASK;
dma_unmap_page_attrs(dma_map->dev, *dma, PAGE_SIZE,
DMA_BIDIRECTIONAL, attrs);
*dma = 0;
return xskq_cons_read_desc(q, desc, pool);
}
-static inline u32 xskq_cons_peek_desc_batch(struct xsk_queue *q, struct xsk_buff_pool *pool,
- u32 max)
-{
- u32 entries = xskq_cons_nb_entries(q, max);
-
- return xskq_cons_read_desc_batch(q, pool, entries);
-}
-
/* To improve performance in the xskq_cons_release functions, only update local state here.
* Reflect this to global state when we get new entries from the ring in
* xskq_cons_get_entries() and whenever Rx or Tx processing are completed in the NAPI loop.
*num_xfrms = 0;
return 0;
}
- if (IS_ERR(pols[0]))
+ if (IS_ERR(pols[0])) {
+ *num_pols = 0;
return PTR_ERR(pols[0]);
+ }
*num_xfrms = pols[0]->xfrm_nr;
if (pols[1]) {
if (IS_ERR(pols[1])) {
xfrm_pols_put(pols, *num_pols);
+ *num_pols = 0;
return PTR_ERR(pols[1]);
}
(*num_pols)++;
int err;
if (family == AF_INET &&
- xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc)
+ READ_ONCE(xs_net(x)->ipv4.sysctl_ip_no_pmtu_disc))
x->props.flags |= XFRM_STATE_NOPMTUDISC;
err = -EPROTONOSUPPORT;
#define BACKTRACE_DEPTH 16
#define MAX_SYMBOL_LEN 4096
-struct fprobe sample_probe;
+static struct fprobe sample_probe;
+static unsigned long nhit;
static char symbol[MAX_SYMBOL_LEN] = "kernel_clone";
module_param_string(symbol, symbol, sizeof(symbol), 0644);
+MODULE_PARM_DESC(symbol, "Probed symbol(s), given by comma separated symbols or a wildcard pattern.");
+
static char nosymbol[MAX_SYMBOL_LEN] = "";
module_param_string(nosymbol, nosymbol, sizeof(nosymbol), 0644);
+MODULE_PARM_DESC(nosymbol, "Not-probed symbols, given by a wildcard pattern.");
+
static bool stackdump = true;
module_param(stackdump, bool, 0644);
+MODULE_PARM_DESC(stackdump, "Enable stackdump.");
+
+static bool use_trace = false;
+module_param(use_trace, bool, 0644);
+MODULE_PARM_DESC(use_trace, "Use trace_printk instead of printk. This is only for debugging.");
static void show_backtrace(void)
{
static void sample_entry_handler(struct fprobe *fp, unsigned long ip, struct pt_regs *regs)
{
- pr_info("Enter <%pS> ip = 0x%p\n", (void *)ip, (void *)ip);
+ if (use_trace)
+ /*
+ * This is just an example, no kernel code should call
+ * trace_printk() except when actively debugging.
+ */
+ trace_printk("Enter <%pS> ip = 0x%p\n", (void *)ip, (void *)ip);
+ else
+ pr_info("Enter <%pS> ip = 0x%p\n", (void *)ip, (void *)ip);
+ nhit++;
if (stackdump)
show_backtrace();
}
{
unsigned long rip = instruction_pointer(regs);
- pr_info("Return from <%pS> ip = 0x%p to rip = 0x%p (%pS)\n",
- (void *)ip, (void *)ip, (void *)rip, (void *)rip);
+ if (use_trace)
+ /*
+ * This is just an example, no kernel code should call
+ * trace_printk() except when actively debugging.
+ */
+ trace_printk("Return from <%pS> ip = 0x%p to rip = 0x%p (%pS)\n",
+ (void *)ip, (void *)ip, (void *)rip, (void *)rip);
+ else
+ pr_info("Return from <%pS> ip = 0x%p to rip = 0x%p (%pS)\n",
+ (void *)ip, (void *)ip, (void *)rip, (void *)rip);
+ nhit++;
if (stackdump)
show_backtrace();
}
{
unregister_fprobe(&sample_probe);
- pr_info("fprobe at %s unregistered\n", symbol);
+ pr_info("fprobe at %s unregistered. %ld times hit, %ld times missed\n",
+ symbol, nhit, sample_probe.nmissed);
}
module_init(fprobe_init)
#include <linux/module.h>
#include <linux/kprobes.h>
-#define MAX_SYMBOL_LEN 64
-static char symbol[MAX_SYMBOL_LEN] = "kernel_clone";
-module_param_string(symbol, symbol, sizeof(symbol), 0644);
+static char symbol[KSYM_NAME_LEN] = "kernel_clone";
+module_param_string(symbol, symbol, KSYM_NAME_LEN, 0644);
/* For each probe you need to allocate a kprobe structure */
static struct kprobe kp = {
#include <linux/module.h>
#include <linux/kprobes.h>
#include <linux/ktime.h>
-#include <linux/limits.h>
#include <linux/sched.h>
-static char func_name[NAME_MAX] = "kernel_clone";
-module_param_string(func, func_name, NAME_MAX, S_IRUGO);
+static char func_name[KSYM_NAME_LEN] = "kernel_clone";
+module_param_string(func, func_name, KSYM_NAME_LEN, 0644);
MODULE_PARM_DESC(func, "Function to kretprobe; this module will report the"
" function's execution time");
# To make this rule robust against "Argument list too long" error,
# ensure to add $(obj)/ prefix by a shell command.
-cmd_mod = echo $(call real-search, $*.o, .o, -objs -y -m) | \
- $(AWK) -v RS='( |\n)' '!x[$$0]++ { print("$(obj)/"$$0) }' > $@
+cmd_mod = printf '%s\n' $(call real-search, $*.o, .o, -objs -y -m) | \
+ $(AWK) '!x[$$0]++ { print("$(obj)/"$$0) }' > $@
$(obj)/%.mod: FORCE
$(call if_changed,mod)
$(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount) \
$(if $(CONFIG_UNWINDER_ORC), --orc) \
$(if $(CONFIG_RETPOLINE), --retpoline) \
+ $(if $(CONFIG_RETHUNK), --rethunk) \
$(if $(CONFIG_SLS), --sls) \
$(if $(CONFIG_STACK_VALIDATION), --stackval) \
$(if $(CONFIG_HAVE_STATIC_CALL_INLINE), --static-call) \
__modinst: $(modules)
@:
-quiet_cmd_none =
- cmd_none = :
-
#
# Installation
#
objtool_args := \
$(if $(delay-objtool),$(objtool_args)) \
- $(if $(CONFIG_NOINSTR_VALIDATION), --noinstr) \
+ $(if $(CONFIG_NOINSTR_VALIDATION), --noinstr $(if $(CONFIG_CPU_UNRET_ENTRY), --unret)) \
$(if $(CONFIG_GCOV_KERNEL), --no-unreachable) \
--link
set -e
+# catch errors from ${NM}
+set -o pipefail
+
+# Run the last element of a pipeline in the current shell.
+# Without this, the while-loop would be executed in a subshell, and
+# the changes made to 'symbol_types' and 'export_symbols' would be lost.
+shopt -s lastpipe
+
declare -A symbol_types
declare -a export_symbols
exit_code=0
+# If there is no symbol in the object, ${NM} (both GNU nm and llvm-nm) shows
+# 'no symbols' diagnostic (but exits with 0). It is harmless and hidden by
+# '2>/dev/null'. However, it suppresses real error messages as well. Add a
+# hand-crafted error message here.
+#
+# TODO:
+# Use --quiet instead of 2>/dev/null when we upgrade the minimum version of
+# binutils to 2.37, llvm to 13.0.0.
+# Then, the following line will be really simple:
+# ${NM} --quiet ${1} |
+
+{ ${NM} ${1} 2>/dev/null || { echo "${0}: ${NM} failed" >&2; false; } } |
while read value type name
do
# Skip the line if the number of fields is less than 3.
if [[ ${name} == __ksymtab_* ]]; then
export_symbols+=(${name#__ksymtab_})
fi
-
- # If there is no symbol in the object, ${NM} (both GNU nm and llvm-nm)
- # shows 'no symbols' diagnostic (but exits with 0). It is harmless and
- # hidden by '2>/dev/null'. However, it suppresses real error messages
- # as well. Add a hand-crafted error message here.
- #
- # Use --quiet instead of 2>/dev/null when we upgrade the minimum version
- # of binutils to 2.37, llvm to 13.0.0.
- #
- # Then, the following line will be really simple:
- # done < <(${NM} --quiet ${1})
-done < <(${NM} ${1} 2>/dev/null || { echo "${0}: ${NM} failed" >&2; false; } )
-
-# Catch error in the process substitution
-wait $!
+done
for name in "${export_symbols[@]}"
do
if ext != '.ko':
sys.exit('{}: module path must end with .ko'.format(ko))
mod = base + '.mod'
- # The first line of *.mod lists the objects that compose the module.
+ # Read from *.mod, to get a list of objects that compose the module.
with open(mod) as m:
- for obj in m.readline().split():
- yield to_cmdfile(obj)
+ for mod_line in m:
+ yield to_cmdfile(mod_line.rstrip())
def process_line(root_directory, command_prefix, file_path):
local print_warnings=$4
local sym_name=${func_addr%+*}
- local offset=${func_addr#*+}
- offset=${offset%/*}
+ local func_offset=${func_addr#*+}
+ func_offset=${func_offset%/*}
local user_size=
+ local file_type
+ local is_vmlinux=0
[[ $func_addr =~ "/" ]] && user_size=${func_addr#*/}
- if [[ -z $sym_name ]] || [[ -z $offset ]] || [[ $sym_name = $func_addr ]]; then
+ if [[ -z $sym_name ]] || [[ -z $func_offset ]] || [[ $sym_name = $func_addr ]]; then
warn "bad func+offset $func_addr"
DONE=1
return
fi
+ # vmlinux uses absolute addresses in the section table rather than
+ # section offsets.
+ local file_type=$(${READELF} --file-header $objfile |
+ ${AWK} '$1 == "Type:" { print $2; exit }')
+ [[ $file_type = "EXEC" ]] && is_vmlinux=1
+
# Go through each of the object's symbols which match the func name.
# In rare cases there might be duplicates, in which case we print all
# matches.
local sym_addr=0x${fields[1]}
local sym_elf_size=${fields[2]}
local sym_sec=${fields[6]}
+ local sec_size
+ local sec_name
# Get the section size:
- local sec_size=$(${READELF} --section-headers --wide $objfile |
+ sec_size=$(${READELF} --section-headers --wide $objfile |
sed 's/\[ /\[/' |
${AWK} -v sec=$sym_sec '$1 == "[" sec "]" { print "0x" $6; exit }')
return
fi
+ # Get the section name:
+ sec_name=$(${READELF} --section-headers --wide $objfile |
+ sed 's/\[ /\[/' |
+ ${AWK} -v sec=$sym_sec '$1 == "[" sec "]" { print $2; exit }')
+
+ if [[ -z $sec_name ]]; then
+ warn "bad section name: section: $sym_sec"
+ DONE=1
+ return
+ fi
+
# Calculate the symbol size.
#
# Unfortunately we can't use the ELF size, because kallsyms
sym_size=0x$(printf %x $sym_size)
- # Calculate the section address from user-supplied offset:
- local addr=$(($sym_addr + $offset))
+ # Calculate the address from user-supplied offset:
+ local addr=$(($sym_addr + $func_offset))
if [[ -z $addr ]] || [[ $addr = 0 ]]; then
- warn "bad address: $sym_addr + $offset"
+ warn "bad address: $sym_addr + $func_offset"
DONE=1
return
fi
fi
# Make sure the provided offset is within the symbol's range:
- if [[ $offset -gt $sym_size ]]; then
+ if [[ $func_offset -gt $sym_size ]]; then
[[ $print_warnings = 1 ]] &&
- echo "skipping $sym_name address at $addr due to size mismatch ($offset > $sym_size)"
+ echo "skipping $sym_name address at $addr due to size mismatch ($func_offset > $sym_size)"
continue
fi
[[ $FIRST = 0 ]] && echo
FIRST=0
- echo "$sym_name+$offset/$sym_size:"
+ echo "$sym_name+$func_offset/$sym_size:"
# Pass section address to addr2line and strip absolute paths
# from the output:
- local output=$(${ADDR2LINE} -fpie $objfile $addr | sed "s; $dir_prefix\(\./\)*; ;")
+ local args="--functions --pretty-print --inlines --exe=$objfile"
+ [[ $is_vmlinux = 0 ]] && args="$args --section=$sec_name"
+ local output=$(${ADDR2LINE} $args $addr | sed "s; $dir_prefix\(\./\)*; ;")
[[ -z $output ]] && continue
# Default output (non --list):
filename = arg
try:
- py_config_ptr = gdb.parse_and_eval("kernel_config_data + 8")
- py_config_size = gdb.parse_and_eval(
- "sizeof(kernel_config_data) - 1 - 8 * 2")
+ py_config_ptr = gdb.parse_and_eval("&kernel_config_data")
+ py_config_ptr_end = gdb.parse_and_eval("&kernel_config_data_end")
+ py_config_size = py_config_ptr_end - py_config_ptr
except gdb.error as e:
raise gdb.GdbError("Can't find config, enable CONFIG_IKCONFIG?")
self.breakpoint.delete()
self.breakpoint = None
self.breakpoint = LoadModuleBreakpoint(
- "kernel/module.c:do_init_module", self)
+ "kernel/module/main.c:do_init_module", self)
else:
gdb.write("Note: symbol update on module loading not supported "
"with this gdb version\n")
# point addresses.
sed -e 's/^\.//' |
sort -u |
+# Ignore __this_module. It's not an exported symbol, and will be resolved
+# when the final .ko's are linked.
+grep -v '^__this_module$' |
sed -e 's/\(.*\)/#define __KSYM_\1 1/' >> "$output_file"
},
/* Do not export init/exit functions or data */
{
- .fromsec = { "__ksymtab*", NULL },
+ .fromsec = { "___ksymtab*", NULL },
.bad_tosec = { INIT_SECTIONS, EXIT_SECTIONS, NULL },
.mismatch = EXPORT_TO_INIT_EXIT,
.symbol_white_list = { DEFAULT_SYMBOL_WHITE_LIST, NULL },
local mod=${1%.ko:}
shift
local namespaces="$*"
- local mod_source_files="`cat $mod.mod | sed -n 1p \
- | sed -e 's/\.o/\.c/g' \
- | sed "s|[^ ]* *|${src_prefix}&|g"`"
+ local mod_source_files=$(sed "s|^\(.*\)\.o$|${src_prefix}\1.c|" $mod.mod)
+
for ns in $namespaces; do
echo "Adding namespace $ns to module $mod.ko."
generate_deps_for_ns $ns "$mod_source_files"
#include <openssl/err.h>
#include <openssl/engine.h>
+/*
+ * OpenSSL 3.0 deprecates the OpenSSL's ENGINE API.
+ *
+ * Remove this if/when that API is no longer used
+ */
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
/*
* Use CMS if we have openssl-1.0.0 or newer available - otherwise we have to
* assume that it's not available and its header file is missing and that we
implement socket and networking access controls.
If you are unsure how to answer this question, answer N.
-config PAGE_TABLE_ISOLATION
- bool "Remove the kernel mapping in user mode"
- default y
- depends on (X86_64 || X86_PAE) && !UML
- help
- This feature reduces the number of hardware side channels by
- ensuring that the majority of kernel addresses are not mapped
- into userspace.
-
- See Documentation/x86/pti.rst for more details.
-
config SECURITY_INFINIBAND
bool "Infiniband Security Hooks"
depends on SECURITY && INFINIBAND
{
long rc;
const char *algo;
- struct crypto_shash **tfm, *tmp_tfm = NULL;
+ struct crypto_shash **tfm, *tmp_tfm;
struct shash_desc *desc;
if (type == EVM_XATTR_HMAC) {
alloc:
desc = kmalloc(sizeof(*desc) + crypto_shash_descsize(*tfm),
GFP_KERNEL);
- if (!desc) {
- crypto_free_shash(tmp_tfm);
+ if (!desc)
return ERR_PTR(-ENOMEM);
- }
desc->tfm = *tfm;
rc = crypto_shash_init(desc);
if (rc) {
- crypto_free_shash(tmp_tfm);
kfree(desc);
return ERR_PTR(rc);
}
goto out;
}
- status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_value, rc, iint);
+ status = evm_verifyxattr(dentry, XATTR_NAME_IMA, xattr_value,
+ rc < 0 ? 0 : rc, iint);
switch (status) {
case INTEGRITY_PASS:
case INTEGRITY_PASS_IMMUTABLE:
crypto_free_shash(ima_algo_array[i].tfm);
}
+ kfree(ima_algo_array);
out:
crypto_free_shash(ima_shash_tfm);
return rc;
if (IS_ENABLED(CONFIG_IMA_ARCH_POLICY) && arch_ima_get_secureboot()) {
if (IS_ENABLED(CONFIG_MODULE_SIG))
set_module_sig_enforced();
+ if (IS_ENABLED(CONFIG_KEXEC_SIG))
+ set_kexec_sig_enforced();
return sb_arch_rules;
}
return NULL;
if (id >= READING_MAX_ID)
return false;
+ if (id == READING_KEXEC_IMAGE && !(ima_appraise & IMA_APPRAISE_ENFORCE)
+ && security_locked_down(LOCKDOWN_KEXEC))
+ return false;
+
func = read_idmap[id] ?: FILE_CHECK;
rcu_read_lock();
else
/*
* If digest is NULL, the event being recorded is a violation.
- * Make room for the digest by increasing the offset of
- * IMA_DIGEST_SIZE.
+ * Make room for the digest by increasing the offset by the
+ * hash algorithm digest size.
*/
- offset += IMA_DIGEST_SIZE;
+ offset += hash_digest_size[hash_algo];
return ima_write_template_field_data(buffer, offset + digestsize,
fmt, field_data);
/* key properties */
flags = 0;
flags |= options->policydigest_len ? 0 : TPM2_OA_USER_WITH_AUTH;
- flags |= payload->migratable ? (TPM2_OA_FIXED_TPM |
- TPM2_OA_FIXED_PARENT) : 0;
+ flags |= payload->migratable ? 0 : (TPM2_OA_FIXED_TPM |
+ TPM2_OA_FIXED_PARENT);
tpm_buf_append_u32(&buf, flags);
/* policy */
}
}
rc = selinux_add_opt(token, arg, mnt_opts);
+ kfree(arg);
+ arg = NULL;
if (unlikely(rc)) {
- kfree(arg);
goto free_opt;
}
} else {
struct fs_parameter *param)
{
struct fs_parse_result result;
- int opt, rc;
+ int opt;
opt = fs_parse(fc, selinux_fs_parameters, param, &result);
if (opt < 0)
return opt;
- rc = selinux_add_opt(opt, param->string, &fc->security);
- if (!rc)
- param->string = NULL;
-
- return rc;
+ return selinux_add_opt(opt, param->string, &fc->security);
}
/* inode security operations */
*/
static void *snd_dma_dev_alloc(struct snd_dma_buffer *dmab, size_t size)
{
- void *p;
-
- p = dma_alloc_coherent(dmab->dev.dev, size, &dmab->addr, DEFAULT_GFP);
-#ifdef CONFIG_X86
- if (p && dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC)
- set_memory_wc((unsigned long)p, PAGE_ALIGN(size) >> PAGE_SHIFT);
-#endif
- return p;
+ return dma_alloc_coherent(dmab->dev.dev, size, &dmab->addr, DEFAULT_GFP);
}
static void snd_dma_dev_free(struct snd_dma_buffer *dmab)
{
-#ifdef CONFIG_X86
- if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC)
- set_memory_wb((unsigned long)dmab->area,
- PAGE_ALIGN(dmab->bytes) >> PAGE_SHIFT);
-#endif
dma_free_coherent(dmab->dev.dev, dmab->bytes, dmab->area, dmab->addr);
}
static int snd_dma_dev_mmap(struct snd_dma_buffer *dmab,
struct vm_area_struct *area)
{
-#ifdef CONFIG_X86
- if (dmab->dev.type == SNDRV_DMA_TYPE_DEV_WC)
- area->vm_page_prot = pgprot_writecombine(area->vm_page_prot);
-#endif
return dma_mmap_coherent(dmab->dev.dev, area,
dmab->area, dmab->addr, dmab->bytes);
}
/*
* Write-combined pages
*/
-#ifdef CONFIG_X86
-/* On x86, share the same ops as the standard dev ops */
-#define snd_dma_wc_ops snd_dma_dev_ops
-#else /* CONFIG_X86 */
static void *snd_dma_wc_alloc(struct snd_dma_buffer *dmab, size_t size)
{
return dma_alloc_wc(dmab->dev.dev, size, &dmab->addr, DEFAULT_GFP);
.free = snd_dma_wc_free,
.mmap = snd_dma_wc_mmap,
};
-#endif /* CONFIG_X86 */
#ifdef CONFIG_SND_DMA_SGBUF
static void *snd_dma_sg_fallback_alloc(struct snd_dma_buffer *dmab, size_t size);
{ 0x14f1, "Conexant" },
{ 0x17e8, "Chrontel" },
{ 0x1854, "LG" },
+ { 0x19e5, "Huawei" },
{ 0x1aec, "Wolfson Microelectronics" },
{ 0x1af4, "QEMU" },
{ 0x434d, "C-Media" },
/* check whether Intel graphics is present and reachable */
static int i915_gfx_present(struct pci_dev *hdac_pci)
{
- unsigned int class = PCI_BASE_CLASS_DISPLAY << 16;
struct pci_dev *display_dev = NULL;
- bool match = false;
- do {
- display_dev = pci_get_class(class, display_dev);
-
- if (display_dev && display_dev->vendor == PCI_VENDOR_ID_INTEL &&
+ for_each_pci_dev(display_dev) {
+ if (display_dev->vendor == PCI_VENDOR_ID_INTEL &&
+ (display_dev->class >> 16) == PCI_BASE_CLASS_DISPLAY &&
connectivity_check(display_dev, hdac_pci)) {
pci_dev_put(display_dev);
- match = true;
+ return true;
}
- } while (!match && display_dev);
+ }
- return match;
+ return false;
}
/**
DMI_MATCH(DMI_SYS_VENDOR, "Google"),
}
},
+ {
+ .ident = "UP-WHL",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "AAEON"),
+ }
+ },
{}
}
},
DMI_MATCH(DMI_SYS_VENDOR, "Google"),
}
},
+ {
+ .ident = "UPX-TGL",
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "AAEON"),
+ }
+ },
{}
}
},
/* find max number of channels based on format_configuration */
if (fmt_configs->fmt_count) {
- dev_dbg(dev, "%s: found %d format definitions\n",
- __func__, fmt_configs->fmt_count);
+ dev_dbg(dev, "found %d format definitions\n",
+ fmt_configs->fmt_count);
for (i = 0; i < fmt_configs->fmt_count; i++) {
struct wav_fmt_ext *fmt_ext;
if (fmt_ext->fmt.channels > max_ch)
max_ch = fmt_ext->fmt.channels;
}
- dev_dbg(dev, "%s: max channels found %d\n", __func__, max_ch);
+ dev_dbg(dev, "max channels found %d\n", max_ch);
} else {
- dev_dbg(dev, "%s: No format information found\n", __func__);
+ dev_dbg(dev, "No format information found\n");
}
if (cfg->device_config.config_type != NHLT_CONFIG_TYPE_MIC_ARRAY) {
}
if (dmic_geo > 0) {
- dev_dbg(dev, "%s: Array with %d dmics\n", __func__, dmic_geo);
+ dev_dbg(dev, "Array with %d dmics\n", dmic_geo);
}
if (max_ch > dmic_geo) {
- dev_dbg(dev, "%s: max channels %d exceed dmic number %d\n",
- __func__, max_ch, dmic_geo);
+ dev_dbg(dev, "max channels %d exceed dmic number %d\n",
+ max_ch, dmic_geo);
}
}
}
- dev_dbg(dev, "%s: dmic number %d max_ch %d\n",
- __func__, dmic_geo, max_ch);
+ dev_dbg(dev, "dmic number %d max_ch %d\n", dmic_geo, max_ch);
return dmic_geo;
}
err = snd_cs46xx_create(card, pci,
external_amp[dev], thinkpad[dev]);
if (err < 0)
- return err;
+ goto error;
card->private_data = chip;
chip->accept_valid = mmap_valid[dev];
err = snd_cs46xx_pcm(chip, 0);
if (err < 0)
- return err;
+ goto error;
#ifdef CONFIG_SND_CS46XX_NEW_DSP
err = snd_cs46xx_pcm_rear(chip, 1);
if (err < 0)
- return err;
+ goto error;
err = snd_cs46xx_pcm_iec958(chip, 2);
if (err < 0)
- return err;
+ goto error;
#endif
err = snd_cs46xx_mixer(chip, 2);
if (err < 0)
- return err;
+ goto error;
#ifdef CONFIG_SND_CS46XX_NEW_DSP
if (chip->nr_ac97_codecs ==2) {
err = snd_cs46xx_pcm_center_lfe(chip, 3);
if (err < 0)
- return err;
+ goto error;
}
#endif
err = snd_cs46xx_midi(chip, 0);
if (err < 0)
- return err;
+ goto error;
err = snd_cs46xx_start_dsp(chip);
if (err < 0)
- return err;
+ goto error;
snd_cs46xx_gameport(chip);
err = snd_card_register(card);
if (err < 0)
- return err;
+ goto error;
pci_set_drvdata(pci, card);
dev++;
return 0;
+
+ error:
+ snd_card_free(card);
+ return err;
}
static struct pci_driver cs46xx_driver = {
snd_hda_set_pin_ctl_cache(codec, cfg->nid, cfg->val);
}
-static void apply_fixup(struct hda_codec *codec, int id, int action, int depth)
+void __snd_hda_apply_fixup(struct hda_codec *codec, int id, int action, int depth)
{
const char *modelname = codec->fixup_name;
if (++depth > 10)
break;
if (fix->chained_before)
- apply_fixup(codec, fix->chain_id, action, depth + 1);
+ __snd_hda_apply_fixup(codec, fix->chain_id, action, depth + 1);
switch (fix->type) {
case HDA_FIXUP_PINS:
id = fix->chain_id;
}
}
+EXPORT_SYMBOL_GPL(__snd_hda_apply_fixup);
/**
* snd_hda_apply_fixup - Apply the fixup chain with the given action
void snd_hda_apply_fixup(struct hda_codec *codec, int action)
{
if (codec->fixup_list)
- apply_fixup(codec, codec->fixup_id, action, 0);
+ __snd_hda_apply_fixup(codec, codec->fixup_id, action, 0);
}
EXPORT_SYMBOL_GPL(snd_hda_apply_fixup);
.driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
{ PCI_DEVICE(0x8086, 0x51cf),
.driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
+ /* Meteorlake-P */
+ { PCI_DEVICE(0x8086, 0x7e28),
+ .driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_SKYLAKE},
/* Broxton-P(Apollolake) */
{ PCI_DEVICE(0x8086, 0x5a98),
.driver_data = AZX_DRIVER_SKL | AZX_DCAPS_INTEL_BROXTON },
void snd_hda_apply_pincfgs(struct hda_codec *codec,
const struct hda_pintbl *cfg);
void snd_hda_apply_fixup(struct hda_codec *codec, int action);
+void __snd_hda_apply_fixup(struct hda_codec *codec, int id, int action, int depth);
void snd_hda_pick_fixup(struct hda_codec *codec,
const struct hda_model_fixup *models,
const struct snd_pci_quirk *quirk,
SND_PCI_QUIRK(0x103c, 0x828c, "HP EliteBook 840 G4", CXT_FIXUP_HP_DOCK),
SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x103c, 0x82b4, "HP ProDesk 600 G3", CXT_FIXUP_HP_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x836e, "HP ProBook 455 G5", CXT_FIXUP_MUTE_LED_GPIO),
SND_PCI_QUIRK(0x103c, 0x837f, "HP ProBook 470 G5", CXT_FIXUP_MUTE_LED_GPIO),
SND_PCI_QUIRK(0x103c, 0x83b2, "HP EliteBook 840 G5", CXT_FIXUP_HP_DOCK),
snd_hda_pick_fixup(codec, cxt5051_fixup_models,
cxt5051_fixups, cxt_fixups);
break;
+ case 0x14f15098:
+ codec->pin_amp_workaround = 1;
+ spec->gen.mixer_nid = 0x22;
+ spec->gen.add_stereo_mix_input = HDA_HINT_STEREO_MIX_AUTO;
+ snd_hda_pick_fixup(codec, cxt5066_fixup_models,
+ cxt5066_fixups, cxt_fixups);
+ break;
case 0x14f150f2:
codec->power_save_node = 1;
fallthrough;
if (err < 0)
goto error;
- err = snd_hda_gen_parse_auto_config(codec, &spec->gen.autocfg);
+ err = cx_auto_parse_beep(codec);
if (err < 0)
goto error;
- err = cx_auto_parse_beep(codec);
+ err = snd_hda_gen_parse_auto_config(codec, &spec->gen.autocfg);
if (err < 0)
goto error;
HDA_CODEC_ENTRY(0x8086281b, "Elkhartlake HDMI", patch_i915_icl_hdmi),
HDA_CODEC_ENTRY(0x8086281c, "Alderlake-P HDMI", patch_i915_adlp_hdmi),
HDA_CODEC_ENTRY(0x8086281f, "Raptorlake-P HDMI", patch_i915_adlp_hdmi),
+HDA_CODEC_ENTRY(0x8086281d, "Meteorlake HDMI", patch_i915_adlp_hdmi),
HDA_CODEC_ENTRY(0x80862880, "CedarTrail HDMI", patch_generic_hdmi),
HDA_CODEC_ENTRY(0x80862882, "Valleyview2 HDMI", patch_i915_byt_hdmi),
HDA_CODEC_ENTRY(0x80862883, "Braswell HDMI", patch_i915_byt_hdmi),
case 0x10ec0245:
case 0x10ec0255:
case 0x10ec0256:
+ case 0x19e58326:
case 0x10ec0257:
case 0x10ec0282:
case 0x10ec0283:
switch (codec->core.vendor_id) {
case 0x10ec0236:
case 0x10ec0256:
+ case 0x19e58326:
case 0x10ec0283:
case 0x10ec0286:
case 0x10ec0288:
SND_PCI_QUIRK(0x1558, 0x67e1, "Clevo PB71[DE][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
SND_PCI_QUIRK(0x1558, 0x67e5, "Clevo PC70D[PRS](?:-D|-G)?", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
SND_PCI_QUIRK(0x1558, 0x67f1, "Clevo PC70H[PRS]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
+ SND_PCI_QUIRK(0x1558, 0x67f5, "Clevo PD70PN[NRT]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
SND_PCI_QUIRK(0x1558, 0x70d1, "Clevo PC70[ER][CDF]", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
SND_PCI_QUIRK(0x1558, 0x7714, "Clevo X170SM", ALC1220_FIXUP_CLEVO_PB51ED_PINS),
SND_PCI_QUIRK(0x1558, 0x7715, "Clevo X170KM-G", ALC1220_FIXUP_CLEVO_PB51ED),
case 0x10ec0230:
case 0x10ec0236:
case 0x10ec0256:
+ case 0x19e58326:
alc_write_coef_idx(codec, 0x48, 0x0);
alc_update_coef_idx(codec, 0x49, 0x0045, 0x0);
break;
case 0x10ec0230:
case 0x10ec0236:
case 0x10ec0256:
+ case 0x19e58326:
alc_write_coef_idx(codec, 0x48, 0xd011);
alc_update_coef_idx(codec, 0x49, 0x007f, 0x0045);
break;
case 0x10ec0230:
case 0x10ec0236:
case 0x10ec0256:
+ case 0x19e58326:
alc_process_coef_fw(codec, coef0256);
break;
case 0x10ec0234:
case 0x10ec0230:
case 0x10ec0236:
case 0x10ec0256:
+ case 0x19e58326:
alc_write_coef_idx(codec, 0x45, 0xc489);
snd_hda_set_pin_ctl_cache(codec, hp_pin, 0);
alc_process_coef_fw(codec, coef0256);
case 0x10ec0230:
case 0x10ec0236:
case 0x10ec0256:
+ case 0x19e58326:
alc_write_coef_idx(codec, 0x1b, 0x0e4b);
alc_write_coef_idx(codec, 0x45, 0xc089);
msleep(50);
case 0x10ec0230:
case 0x10ec0236:
case 0x10ec0256:
+ case 0x19e58326:
alc_process_coef_fw(codec, coef0256);
break;
case 0x10ec0234:
case 0x10ec0230:
case 0x10ec0236:
case 0x10ec0256:
+ case 0x19e58326:
alc_process_coef_fw(codec, coef0256);
break;
case 0x10ec0234:
case 0x10ec0230:
case 0x10ec0236:
case 0x10ec0256:
+ case 0x19e58326:
alc_write_coef_idx(codec, 0x1b, 0x0e4b);
alc_write_coef_idx(codec, 0x06, 0x6104);
alc_write_coefex_idx(codec, 0x57, 0x3, 0x09a3);
case 0x10ec0230:
case 0x10ec0236:
case 0x10ec0256:
+ case 0x19e58326:
alc_process_coef_fw(codec, alc256fw);
break;
}
case 0x10ec0236:
case 0x10ec0255:
case 0x10ec0256:
+ case 0x19e58326:
alc_update_coef_idx(codec, 0x1b, 0x8000, 1 << 15); /* Reset HP JD */
alc_update_coef_idx(codec, 0x1b, 0x8000, 0 << 15);
break;
ALC298_FIXUP_LENOVO_SPK_VOLUME,
ALC256_FIXUP_DELL_INSPIRON_7559_SUBWOOFER,
ALC269_FIXUP_ATIV_BOOK_8,
+ ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE,
ALC221_FIXUP_HP_MIC_NO_PRESENCE,
ALC256_FIXUP_ASUS_HEADSET_MODE,
ALC256_FIXUP_ASUS_MIC,
ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS,
ALC287_FIXUP_LEGION_15IMHG05_AUTOMUTE,
ALC287_FIXUP_YOGA7_14ITL_SPEAKERS,
+ ALC298_FIXUP_LENOVO_C940_DUET7,
ALC287_FIXUP_13S_GEN2_SPEAKERS,
ALC256_FIXUP_SET_COEF_DEFAULTS,
ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE,
ALC295_FIXUP_FRAMEWORK_LAPTOP_MIC_NO_PRESENCE,
};
+/* A special fixup for Lenovo C940 and Yoga Duet 7;
+ * both have the very same PCI SSID, and we need to apply different fixups
+ * depending on the codec ID
+ */
+static void alc298_fixup_lenovo_c940_duet7(struct hda_codec *codec,
+ const struct hda_fixup *fix,
+ int action)
+{
+ int id;
+
+ if (codec->core.vendor_id == 0x10ec0298)
+ id = ALC298_FIXUP_LENOVO_SPK_VOLUME; /* C940 */
+ else
+ id = ALC287_FIXUP_YOGA7_14ITL_SPEAKERS; /* Duet 7 */
+ __snd_hda_apply_fixup(codec, id, action, 0);
+}
+
static const struct hda_fixup alc269_fixups[] = {
[ALC269_FIXUP_GPIO2] = {
.type = HDA_FIXUP_FUNC,
.chained = true,
.chain_id = ALC269_FIXUP_NO_SHUTUP
},
+ [ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */
+ { 0x1a, 0x01813030 }, /* use as headphone mic, without its own jack detect */
+ { }
+ },
+ .chained = true,
+ .chain_id = ALC269_FIXUP_HEADSET_MODE
+ },
[ALC221_FIXUP_HP_MIC_NO_PRESENCE] = {
.type = HDA_FIXUP_PINS,
.v.pins = (const struct hda_pintbl[]) {
.chained = true,
.chain_id = ALC269_FIXUP_HEADSET_MODE,
},
+ [ALC298_FIXUP_LENOVO_C940_DUET7] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc298_fixup_lenovo_c940_duet7,
+ },
[ALC287_FIXUP_13S_GEN2_SPEAKERS] = {
.type = HDA_FIXUP_VERBS,
.v.verbs = (const struct hda_verb[]) {
SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x129c, "Acer SWIFT SF314-55", ALC256_FIXUP_ACER_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1025, 0x129d, "Acer SWIFT SF313-51", ALC256_FIXUP_ACER_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1025, 0x1300, "Acer SWIFT SF314-56", ALC256_FIXUP_ACER_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC),
SND_PCI_QUIRK(0x1025, 0x1430, "Acer TravelMate B311R-31", ALC256_FIXUP_ACER_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1025, 0x1466, "Acer Aspire A515-56", ALC255_FIXUP_ACER_HEADPHONE_AND_MIC),
SND_PCI_QUIRK(0x1028, 0x0470, "Dell M101z", ALC269_FIXUP_DELL_M101Z),
+ SND_PCI_QUIRK(0x1028, 0x053c, "Dell Latitude E5430", ALC292_FIXUP_DELL_E7X),
SND_PCI_QUIRK(0x1028, 0x054b, "Dell XPS one 2710", ALC275_FIXUP_DELL_XPS),
SND_PCI_QUIRK(0x1028, 0x05bd, "Dell Latitude E6440", ALC292_FIXUP_DELL_E7X),
SND_PCI_QUIRK(0x1028, 0x05be, "Dell Latitude E6540", ALC292_FIXUP_DELL_E7X),
SND_PCI_QUIRK(0x103c, 0x2335, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
SND_PCI_QUIRK(0x103c, 0x2336, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
SND_PCI_QUIRK(0x103c, 0x2337, "HP", ALC269_FIXUP_HP_MUTE_LED_MIC1),
+ SND_PCI_QUIRK(0x103c, 0x2b5e, "HP 288 Pro G2 MT", ALC221_FIXUP_HP_288PRO_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x802e, "HP Z240 SFF", ALC221_FIXUP_HP_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x802f, "HP Z240", ALC221_FIXUP_HP_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x8077, "HP", ALC256_FIXUP_HP_HEADSET_MIC),
ALC285_FIXUP_HP_GPIO_AMP_INIT),
SND_PCI_QUIRK(0x103c, 0x8783, "HP ZBook Fury 15 G7 Mobile Workstation",
ALC285_FIXUP_HP_GPIO_AMP_INIT),
+ SND_PCI_QUIRK(0x103c, 0x8787, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED),
SND_PCI_QUIRK(0x103c, 0x8788, "HP OMEN 15", ALC285_FIXUP_HP_MUTE_LED),
SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x87e5, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x89c3, "Zbook Studio G9", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x89c6, "Zbook Fury 17 G9", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x89ca, "HP", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
+ SND_PCI_QUIRK(0x103c, 0x8a78, "HP Dev One", ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST),
+ SND_PCI_QUIRK(0x103c, 0x8aa0, "HP ProBook 440 G9 (MB 8A9E)", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8aa3, "HP ProBook 450 G9 (MB 8AA1)", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8aa8, "HP EliteBook 640 G9 (MB 8AA6)", ALC236_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8aab, "HP EliteBook 650 G9 (MB 8AA9)", ALC236_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
SND_PCI_QUIRK(0x1558, 0x70f3, "Clevo NH77DPQ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x70f4, "Clevo NH77EPY", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x70f6, "Clevo NH77DPQ-Y", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x7716, "Clevo NS50PU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+ SND_PCI_QUIRK(0x1558, 0x7718, "Clevo L140PU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x8228, "Clevo NR40BU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x8520, "Clevo NH50D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1558, 0x8521, "Clevo NH77D[CD]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x17aa, 0x3176, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x17aa, 0x3178, "ThinkCentre Station", ALC283_FIXUP_HEADSET_MIC),
SND_PCI_QUIRK(0x17aa, 0x31af, "ThinkCentre Station", ALC623_FIXUP_LENOVO_THINKSTATION_P340),
+ SND_PCI_QUIRK(0x17aa, 0x3802, "Lenovo Yoga DuetITL 2021", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
SND_PCI_QUIRK(0x17aa, 0x3813, "Legion 7i 15IMHG05", ALC287_FIXUP_LEGION_15IMHG05_SPEAKERS),
- SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940", ALC298_FIXUP_LENOVO_SPK_VOLUME),
+ SND_PCI_QUIRK(0x17aa, 0x3818, "Lenovo C940 / Yoga Duet 7", ALC298_FIXUP_LENOVO_C940_DUET7),
SND_PCI_QUIRK(0x17aa, 0x3819, "Lenovo 13s Gen2 ITL", ALC287_FIXUP_13S_GEN2_SPEAKERS),
SND_PCI_QUIRK(0x17aa, 0x3820, "Yoga Duet 7 13ITL6", ALC287_FIXUP_YOGA7_14ITL_SPEAKERS),
SND_PCI_QUIRK(0x17aa, 0x3824, "Legion Y9000X 2020", ALC285_FIXUP_LEGION_Y9000X_SPEAKERS),
SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC),
+ SND_PCI_QUIRK(0x1d72, 0x1945, "Redmi G", ALC256_FIXUP_ASUS_HEADSET_MIC),
SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
SND_PCI_QUIRK(0x8086, 0x2074, "Intel NUC 8", ALC233_FIXUP_INTEL_NUC8_DMIC),
SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED),
case 0x10ec0230:
case 0x10ec0236:
case 0x10ec0256:
+ case 0x19e58326:
spec->codec_variant = ALC269_TYPE_ALC256;
spec->shutup = alc256_shutup;
spec->init_hook = alc256_init;
ALC668_FIXUP_MIC_DET_COEF,
ALC897_FIXUP_LENOVO_HEADSET_MIC,
ALC897_FIXUP_HEADSET_MIC_PIN,
+ ALC897_FIXUP_HP_HSMIC_VERB,
};
static const struct hda_fixup alc662_fixups[] = {
.chained = true,
.chain_id = ALC897_FIXUP_LENOVO_HEADSET_MIC
},
+ [ALC897_FIXUP_HP_HSMIC_VERB] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */
+ { }
+ },
+ },
};
static const struct snd_pci_quirk alc662_fixup_tbl[] = {
SND_PCI_QUIRK(0x1028, 0x0698, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x1028, 0x069f, "Dell", ALC668_FIXUP_DELL_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x1632, "HP RP5800", ALC662_FIXUP_HP_RP5800),
+ SND_PCI_QUIRK(0x103c, 0x8719, "HP", ALC897_FIXUP_HP_HSMIC_VERB),
SND_PCI_QUIRK(0x103c, 0x873e, "HP", ALC671_FIXUP_HP_HEADSET_MIC2),
+ SND_PCI_QUIRK(0x103c, 0x877e, "HP 288 Pro G6", ALC671_FIXUP_HP_HEADSET_MIC2),
SND_PCI_QUIRK(0x103c, 0x885f, "HP 288 Pro G8", ALC671_FIXUP_HP_HEADSET_MIC2),
SND_PCI_QUIRK(0x1043, 0x1080, "Asus UX501VW", ALC668_FIXUP_HEADSET_MODE),
SND_PCI_QUIRK(0x1043, 0x11cd, "Asus N550", ALC662_FIXUP_ASUS_Nx50),
HDA_CODEC_ENTRY(0x10ec0b00, "ALCS1200A", patch_alc882),
HDA_CODEC_ENTRY(0x10ec1168, "ALC1220", patch_alc882),
HDA_CODEC_ENTRY(0x10ec1220, "ALC1220", patch_alc882),
+ HDA_CODEC_ENTRY(0x19e58326, "HW8326", patch_alc269),
{} /* terminator */
};
MODULE_DEVICE_TABLE(hdaudio, snd_hda_id_realtek);
if (err < 0)
return err;
- err = snd_hda_gen_parse_auto_config(codec, &spec->gen.autocfg);
+ err = auto_parse_beep(codec);
if (err < 0)
return err;
- err = auto_parse_beep(codec);
+ err = snd_hda_gen_parse_auto_config(codec, &spec->gen.autocfg);
if (err < 0)
return err;
/*
* connected STDI
+ * TDM support is assuming it is probed via Audio-Graph-Card style here.
+ * Default is SDTIx1 if it was probed via Simple-Audio-Card for now.
*/
sdti_num = of_graph_get_endpoint_count(np);
- if (WARN_ON((sdti_num > 3) || (sdti_num < 1)))
- return;
+ if ((sdti_num >= SDTx_MAX) || (sdti_num < 1))
+ sdti_num = 1;
AK4613_CONFIG_SDTI_set(priv, sdti_num);
}
if (bclk != (val & ARIZONA_AIF1_BCLK_FREQ_MASK))
return true;
- val = snd_soc_component_read(component, base + ARIZONA_AIF_TX_BCLK_RATE);
- if (lrclk != (val & ARIZONA_AIF1TX_BCPF_MASK))
+ val = snd_soc_component_read(component, base + ARIZONA_AIF_RX_BCLK_RATE);
+ if (lrclk != (val & ARIZONA_AIF1RX_BCPF_MASK))
return true;
val = snd_soc_component_read(component, base + ARIZONA_AIF_FRAME_CTRL_1);
}
}
-static DECLARE_TLV_DB_SCALE(dig_vol_tlv, -10200, 25, 0);
+static const DECLARE_TLV_DB_RANGE(dig_vol_tlv, 0, 912,
+ TLV_DB_MINMAX_ITEM(-10200, 1200));
static DECLARE_TLV_DB_SCALE(amp_gain_tlv, 0, 1, 1);
static const char * const cs35l36_pcm_sftramp_text[] = {
{ CS35L41_DAC_PCM1_SRC, 0x00000008 },
{ CS35L41_ASP_TX1_SRC, 0x00000018 },
{ CS35L41_ASP_TX2_SRC, 0x00000019 },
- { CS35L41_ASP_TX3_SRC, 0x00000020 },
- { CS35L41_ASP_TX4_SRC, 0x00000021 },
+ { CS35L41_ASP_TX3_SRC, 0x00000000 },
+ { CS35L41_ASP_TX4_SRC, 0x00000000 },
{ CS35L41_DSP1_RX1_SRC, 0x00000008 },
{ CS35L41_DSP1_RX2_SRC, 0x00000009 },
{ CS35L41_DSP1_RX3_SRC, 0x00000018 },
{ CS35L41_DSP1_XM_ACCEL_PL0_PRI, 0x00000000 },
{ CS35L41_PWR_CTRL2, 0x00000000 },
{ CS35L41_AMP_GAIN_CTRL, 0x00000000 },
+ { CS35L41_ASP_TX3_SRC, 0x00000000 },
+ { CS35L41_ASP_TX4_SRC, 0x00000000 },
};
static const struct reg_sequence cs35l41_revb0_errata_patch[] = {
{ CS35L41_DSP1_XM_ACCEL_PL0_PRI, 0x00000000 },
{ CS35L41_PWR_CTRL2, 0x00000000 },
{ CS35L41_AMP_GAIN_CTRL, 0x00000000 },
+ { CS35L41_ASP_TX3_SRC, 0x00000000 },
+ { CS35L41_ASP_TX4_SRC, 0x00000000 },
};
static const struct reg_sequence cs35l41_revb2_errata_patch[] = {
{ CS35L41_DSP1_XM_ACCEL_PL0_PRI, 0x00000000 },
{ CS35L41_PWR_CTRL2, 0x00000000 },
{ CS35L41_AMP_GAIN_CTRL, 0x00000000 },
+ { CS35L41_ASP_TX3_SRC, 0x00000000 },
+ { CS35L41_ASP_TX4_SRC, 0x00000000 },
};
static const struct reg_sequence cs35l41_fs_errata_patch[] = {
SOC_SINGLE("HW Noise Gate Enable", CS35L41_NG_CFG, 8, 63, 0),
SOC_SINGLE("HW Noise Gate Delay", CS35L41_NG_CFG, 4, 7, 0),
SOC_SINGLE("HW Noise Gate Threshold", CS35L41_NG_CFG, 0, 7, 0),
- SOC_SINGLE("Aux Noise Gate CH1 Enable",
+ SOC_SINGLE("Aux Noise Gate CH1 Switch",
CS35L41_MIXER_NGATE_CH1_CFG, 16, 1, 0),
SOC_SINGLE("Aux Noise Gate CH1 Entry Delay",
CS35L41_MIXER_NGATE_CH1_CFG, 8, 15, 0),
CS35L41_MIXER_NGATE_CH1_CFG, 0, 7, 0),
SOC_SINGLE("Aux Noise Gate CH2 Entry Delay",
CS35L41_MIXER_NGATE_CH2_CFG, 8, 15, 0),
- SOC_SINGLE("Aux Noise Gate CH2 Enable",
+ SOC_SINGLE("Aux Noise Gate CH2 Switch",
CS35L41_MIXER_NGATE_CH2_CFG, 16, 1, 0),
SOC_SINGLE("Aux Noise Gate CH2 Threshold",
CS35L41_MIXER_NGATE_CH2_CFG, 0, 7, 0),
- SOC_SINGLE("SCLK Force", CS35L41_SP_FORMAT, CS35L41_SCLK_FRC_SHIFT, 1, 0),
- SOC_SINGLE("LRCLK Force", CS35L41_SP_FORMAT, CS35L41_LRCLK_FRC_SHIFT, 1, 0),
- SOC_SINGLE("Invert Class D", CS35L41_AMP_DIG_VOL_CTRL,
+ SOC_SINGLE("SCLK Force Switch", CS35L41_SP_FORMAT, CS35L41_SCLK_FRC_SHIFT, 1, 0),
+ SOC_SINGLE("LRCLK Force Switch", CS35L41_SP_FORMAT, CS35L41_LRCLK_FRC_SHIFT, 1, 0),
+ SOC_SINGLE("Invert Class D Switch", CS35L41_AMP_DIG_VOL_CTRL,
CS35L41_AMP_INV_PCM_SHIFT, 1, 0),
- SOC_SINGLE("Amp Gain ZC", CS35L41_AMP_GAIN_CTRL,
+ SOC_SINGLE("Amp Gain ZC Switch", CS35L41_AMP_GAIN_CTRL,
CS35L41_AMP_GAIN_ZC_SHIFT, 1, 0),
WM_ADSP2_PRELOAD_SWITCH("DSP1", 1),
WM_ADSP_FW_CONTROL("DSP1", 0),
0, 0xA0, 96, adc_att_tlv),
SOC_DOUBLE_R_SX_TLV("PGA Volume",
CS42L51_ALC_PGA_CTL, CS42L51_ALC_PGB_CTL,
- 0, 0x1A, 30, pga_tlv),
+ 0, 0x19, 30, pga_tlv),
SOC_SINGLE("Playback Deemphasis Switch", CS42L51_DAC_CTL, 3, 1, 0),
SOC_SINGLE("Auto-Mute Switch", CS42L51_DAC_CTL, 2, 1, 0),
SOC_SINGLE("Soft Ramp Switch", CS42L51_DAC_CTL, 1, 1, 0),
static DECLARE_TLV_DB_SCALE(pga_tlv, -600, 50, 0);
-static DECLARE_TLV_DB_SCALE(mix_tlv, -50, 50, 0);
+static DECLARE_TLV_DB_SCALE(pass_tlv, -6000, 50, 0);
+
+static DECLARE_TLV_DB_SCALE(mix_tlv, -5150, 50, 0);
static DECLARE_TLV_DB_SCALE(beep_tlv, -56, 200, 0);
CS42L52_SPKB_VOL, 0, 0x40, 0xC0, hl_tlv),
SOC_DOUBLE_R_SX_TLV("Bypass Volume", CS42L52_PASSTHRUA_VOL,
- CS42L52_PASSTHRUB_VOL, 0, 0x88, 0x90, pga_tlv),
+ CS42L52_PASSTHRUB_VOL, 0, 0x88, 0x90, pass_tlv),
SOC_DOUBLE("Bypass Mute", CS42L52_MISC_CTL, 4, 5, 1, 0),
CS42L52_ADCB_VOL, 0, 0xA0, 0x78, ipd_tlv),
SOC_DOUBLE_R_SX_TLV("ADC Mixer Volume",
CS42L52_ADCA_MIXER_VOL, CS42L52_ADCB_MIXER_VOL,
- 0, 0x19, 0x7F, ipd_tlv),
+ 0, 0x19, 0x7F, mix_tlv),
SOC_DOUBLE("ADC Switch", CS42L52_ADC_MISC_CTL, 0, 1, 1, 0),
SOC_DOUBLE("ADC Boost Switch", CS42L56_GAIN_BIAS_CTL, 3, 2, 1, 1),
SOC_DOUBLE_R_SX_TLV("Headphone Volume", CS42L56_HPA_VOLUME,
- CS42L56_HPB_VOLUME, 0, 0x84, 0x48, hl_tlv),
+ CS42L56_HPB_VOLUME, 0, 0x44, 0x48, hl_tlv),
SOC_DOUBLE_R_SX_TLV("LineOut Volume", CS42L56_LOA_VOLUME,
- CS42L56_LOB_VOLUME, 0, 0x84, 0x48, hl_tlv),
+ CS42L56_LOB_VOLUME, 0, 0x44, 0x48, hl_tlv),
SOC_SINGLE_TLV("Bass Shelving Volume", CS42L56_TONE_CTL,
0, 0x00, 1, tone_tlv),
snd_soc_kcontrol_component(kcontrol);
struct cs47l15 *cs47l15 = snd_soc_component_get_drvdata(component);
+ if (!!ucontrol->value.integer.value[0] == cs47l15->in1_lp_mode)
+ return 0;
+
switch (ucontrol->value.integer.value[0]) {
case 0:
/* Set IN1 to normal mode */
break;
}
- return 0;
+ return 1;
}
static const struct snd_kcontrol_new cs47l15_snd_controls[] = {
end:
snd_soc_dapm_mutex_unlock(dapm);
- return snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL);
+ ret = snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL);
+ if (ret < 0) {
+ dev_err(madera->dev, "Failed to update demux power state: %d\n", ret);
+ return ret;
+ }
+
+ return change;
}
static SOC_ENUM_SINGLE_DECL(cs47l92_outdemux_enum,
SOC_ENUM("ADC2 NG Delay", adc2_ng_delay_enum),
SOC_SINGLE_SX_TLV("ADC1A PGA Volume",
- CS53L30_ADC1A_AFE_CTL, 0, 0x34, 0x18, pga_tlv),
+ CS53L30_ADC1A_AFE_CTL, 0, 0x34, 0x24, pga_tlv),
SOC_SINGLE_SX_TLV("ADC1B PGA Volume",
- CS53L30_ADC1B_AFE_CTL, 0, 0x34, 0x18, pga_tlv),
+ CS53L30_ADC1B_AFE_CTL, 0, 0x34, 0x24, pga_tlv),
SOC_SINGLE_SX_TLV("ADC2A PGA Volume",
- CS53L30_ADC2A_AFE_CTL, 0, 0x34, 0x18, pga_tlv),
+ CS53L30_ADC2A_AFE_CTL, 0, 0x34, 0x24, pga_tlv),
SOC_SINGLE_SX_TLV("ADC2B PGA Volume",
- CS53L30_ADC2B_AFE_CTL, 0, 0x34, 0x18, pga_tlv),
+ CS53L30_ADC2B_AFE_CTL, 0, 0x34, 0x24, pga_tlv),
SOC_SINGLE_SX_TLV("ADC1A Digital Volume",
- CS53L30_ADC1A_DIG_VOL, 0, 0xA0, 0x0C, dig_tlv),
+ CS53L30_ADC1A_DIG_VOL, 0, 0xA0, 0x6C, dig_tlv),
SOC_SINGLE_SX_TLV("ADC1B Digital Volume",
- CS53L30_ADC1B_DIG_VOL, 0, 0xA0, 0x0C, dig_tlv),
+ CS53L30_ADC1B_DIG_VOL, 0, 0xA0, 0x6C, dig_tlv),
SOC_SINGLE_SX_TLV("ADC2A Digital Volume",
- CS53L30_ADC2A_DIG_VOL, 0, 0xA0, 0x0C, dig_tlv),
+ CS53L30_ADC2A_DIG_VOL, 0, 0xA0, 0x6C, dig_tlv),
SOC_SINGLE_SX_TLV("ADC2B Digital Volume",
- CS53L30_ADC2B_DIG_VOL, 0, 0xA0, 0x0C, dig_tlv),
+ CS53L30_ADC2B_DIG_VOL, 0, 0xA0, 0x6C, dig_tlv),
};
static const struct snd_soc_dapm_widget cs53l30_dapm_widgets[] = {
if (deemph > 1)
return -EINVAL;
+ if (es8328->deemph == deemph)
+ return 0;
+
ret = es8328_set_deemph(component);
if (ret < 0)
return ret;
es8328->deemph = deemph;
- return 0;
+ return 1;
}
end:
snd_soc_dapm_mutex_unlock(dapm);
- return snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL);
+ ret = snd_soc_dapm_mux_update_power(dapm, kcontrol, mux, e, NULL);
+ if (ret < 0) {
+ dev_err(madera->dev, "Failed to update demux power state: %d\n", ret);
+ return ret;
+ }
+
+ return change;
}
EXPORT_SYMBOL_GPL(madera_out1_demux_put);
struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
const int adsp_num = e->shift_l;
const unsigned int item = ucontrol->value.enumerated.item[0];
- int ret;
+ int ret = 0;
if (item >= e->items)
return -EINVAL;
"Cannot change '%s' while in use by active audio paths\n",
kcontrol->id.name);
ret = -EBUSY;
- } else {
+ } else if (priv->adsp_rate_cache[adsp_num] != e->values[item]) {
/* Volatile register so defer until the codec is powered up */
priv->adsp_rate_cache[adsp_num] = e->values[item];
- ret = 0;
+ ret = 1;
}
mutex_unlock(&priv->rate_lock);
return max98373_init(slave, regmap);
}
+static int max98373_sdw_remove(struct sdw_slave *slave)
+{
+ struct max98373_priv *max98373 = dev_get_drvdata(&slave->dev);
+
+ if (max98373->first_hw_init)
+ pm_runtime_disable(&slave->dev);
+
+ return 0;
+}
+
#if defined(CONFIG_OF)
static const struct of_device_id max98373_of_match[] = {
{ .compatible = "maxim,max98373", },
.pm = &max98373_pm,
},
.probe = max98373_sdw_probe,
- .remove = NULL,
+ .remove = max98373_sdw_remove,
.ops = &max98373_slave_ops,
.id_table = max98373_id,
};
{
struct snd_soc_component *component = codec_dai->component;
struct max98396_priv *max98396 = snd_soc_component_get_drvdata(component);
- unsigned int format = 0;
+ unsigned int format_mask, format = 0;
unsigned int bclk_pol = 0;
int ret, status;
int reg;
bool update = false;
+ format_mask = MAX98396_PCM_MODE_CFG_FORMAT_MASK |
+ MAX98396_PCM_MODE_CFG_LRCLKEDGE;
+
dev_dbg(component->dev, "%s: fmt 0x%08X\n", __func__, fmt);
switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
ret = regmap_read(max98396->regmap, MAX98396_R2041_PCM_MODE_CFG, ®);
if (ret < 0)
return -EINVAL;
- if (format != (reg & MAX98396_PCM_BCLKEDGE_BSEL_MASK)) {
+ if (format != (reg & format_mask)) {
update = true;
} else {
ret = regmap_read(max98396->regmap,
regmap_update_bits(max98396->regmap,
MAX98396_R2041_PCM_MODE_CFG,
- MAX98396_PCM_BCLKEDGE_BSEL_MASK,
- format);
+ format_mask, format);
regmap_update_bits(max98396->regmap,
MAX98396_R2042_PCM_CLK_SETUP,
pll_param->pll_int, pll_param->pll_frac,
pll_param->mclk_scaler, pll_param->pre_factor);
+ snd_soc_component_update_bits(component,
+ NAU8822_REG_POWER_MANAGEMENT_1, NAU8822_PLL_EN_MASK, NAU8822_PLL_OFF);
snd_soc_component_update_bits(component,
NAU8822_REG_PLL_N, NAU8822_PLLMCLK_DIV2 | NAU8822_PLLN_MASK,
(pll_param->pre_factor ? NAU8822_PLLMCLK_DIV2 : 0) |
pll_param->mclk_scaler << NAU8822_MCLKSEL_SFT);
snd_soc_component_update_bits(component,
NAU8822_REG_CLOCKING, NAU8822_CLKM_MASK, NAU8822_CLKM_PLL);
+ snd_soc_component_update_bits(component,
+ NAU8822_REG_POWER_MANAGEMENT_1, NAU8822_PLL_EN_MASK, NAU8822_PLL_ON);
return 0;
}
#define NAU8822_REFIMP_3K 0x3
#define NAU8822_IOBUF_EN (0x1 << 2)
#define NAU8822_ABIAS_EN (0x1 << 3)
+#define NAU8822_PLL_EN_MASK (0x1 << 5)
+#define NAU8822_PLL_ON (0x1 << 5)
+#define NAU8822_PLL_OFF (0x0 << 5)
/* NAU8822_REG_AUDIO_INTERFACE (0x4) */
#define NAU8822_AIFMT_MASK (0x3 << 3)
return 0;
}
+static int rt1308_sdw_remove(struct sdw_slave *slave)
+{
+ struct rt1308_sdw_priv *rt1308 = dev_get_drvdata(&slave->dev);
+
+ if (rt1308->first_hw_init)
+ pm_runtime_disable(&slave->dev);
+
+ return 0;
+}
+
static const struct sdw_device_id rt1308_id[] = {
SDW_SLAVE_ENTRY_EXT(0x025d, 0x1308, 0x2, 0, 0),
{},
.pm = &rt1308_pm,
},
.probe = rt1308_sdw_probe,
+ .remove = rt1308_sdw_remove,
.ops = &rt1308_slave_ops,
.id_table = rt1308_id,
};
return rt1316_sdw_init(&slave->dev, regmap, slave);
}
+static int rt1316_sdw_remove(struct sdw_slave *slave)
+{
+ struct rt1316_sdw_priv *rt1316 = dev_get_drvdata(&slave->dev);
+
+ if (rt1316->first_hw_init)
+ pm_runtime_disable(&slave->dev);
+
+ return 0;
+}
+
static const struct sdw_device_id rt1316_id[] = {
SDW_SLAVE_ENTRY_EXT(0x025d, 0x1316, 0x3, 0x1, 0),
{},
.pm = &rt1316_pm,
},
.probe = rt1316_sdw_probe,
+ .remove = rt1316_sdw_remove,
.ops = &rt1316_slave_ops,
.id_table = rt1316_id,
};
snd_soc_component_write(component, RT5640_PWR_DIG2, 0x0000);
snd_soc_component_write(component, RT5640_PWR_VOL, 0x0000);
snd_soc_component_write(component, RT5640_PWR_MIXER, 0x0000);
- snd_soc_component_write(component, RT5640_PWR_ANLG1, 0x0000);
+ if (rt5640->jd_src == RT5640_JD_SRC_HDA_HEADER)
+ snd_soc_component_write(component, RT5640_PWR_ANLG1,
+ 0x0018);
+ else
+ snd_soc_component_write(component, RT5640_PWR_ANLG1,
+ 0x0000);
snd_soc_component_write(component, RT5640_PWR_ANLG2, 0x0000);
break;
static irqreturn_t rt5640_irq(int irq, void *data)
{
struct rt5640_priv *rt5640 = data;
+ int delay = 0;
+
+ if (rt5640->jd_src == RT5640_JD_SRC_HDA_HEADER) {
+ cancel_delayed_work_sync(&rt5640->jack_work);
+ delay = 100;
+ }
if (rt5640->jack)
- queue_delayed_work(system_long_wq, &rt5640->jack_work, 0);
+ queue_delayed_work(system_long_wq, &rt5640->jack_work, delay);
return IRQ_HANDLED;
}
snd_soc_component_update_bits(component, RT5640_DUMMY1, 0x400, 0x0);
+ snd_soc_component_update_bits(component, RT5640_PWR_ANLG1,
+ RT5640_PWR_VREF2, RT5640_PWR_VREF2);
+ usleep_range(10000, 15000);
+ snd_soc_component_update_bits(component, RT5640_PWR_ANLG1,
+ RT5640_PWR_FV2, RT5640_PWR_FV2);
+
rt5640->jack = jack;
ret = request_irq(rt5640->irq, rt5640_irq,
if (device_property_read_u32(component->dev,
"realtek,jack-detect-source", &val) == 0) {
- if (val <= RT5640_JD_SRC_GPIO4) {
+ if (val <= RT5640_JD_SRC_GPIO4)
rt5640->jd_src = val << RT5640_JD_SFT;
- } else if (val == RT5640_JD_SRC_HDA_HEADER) {
+ else if (val == RT5640_JD_SRC_HDA_HEADER)
rt5640->jd_src = RT5640_JD_SRC_HDA_HEADER;
- snd_soc_component_update_bits(component, RT5640_DUMMY1,
- 0x0300, 0x0);
- } else {
+ else
dev_warn(component->dev, "Warning: Invalid jack-detect-source value: %d, leaving jack-detect disabled\n",
val);
- }
}
if (!device_property_read_bool(component->dev, "realtek,jack-detect-not-inverted"))
{
struct rt5682_priv *rt5682 = dev_get_drvdata(&slave->dev);
- if (rt5682 && rt5682->hw_init)
+ if (rt5682->hw_init)
cancel_delayed_work_sync(&rt5682->jack_detect_work);
+ if (rt5682->first_hw_init)
+ pm_runtime_disable(&slave->dev);
+
return 0;
}
#include <linux/soundwire/sdw_type.h>
#include <linux/soundwire/sdw_registers.h>
#include <linux/module.h>
+#include <linux/pm_runtime.h>
#include <linux/regmap.h>
#include <sound/soc.h>
#include "rt700.h"
{
struct rt700_priv *rt700 = dev_get_drvdata(&slave->dev);
- if (rt700 && rt700->hw_init) {
+ if (rt700->hw_init) {
cancel_delayed_work_sync(&rt700->jack_detect_work);
cancel_delayed_work_sync(&rt700->jack_btn_check_work);
}
+ if (rt700->first_hw_init)
+ pm_runtime_disable(&slave->dev);
+
return 0;
}
if (!rt700->hs_jack)
return;
- if (!rt700->component->card->instantiated)
+ if (!rt700->component->card || !rt700->component->card->instantiated)
return;
reg = RT700_VERB_GET_PIN_SENSE | RT700_HP_OUT;
struct snd_soc_jack *hs_jack, void *data)
{
struct rt700_priv *rt700 = snd_soc_component_get_drvdata(component);
+ int ret;
rt700->hs_jack = hs_jack;
- if (!rt700->hw_init) {
- dev_dbg(&rt700->slave->dev,
- "%s hw_init not ready yet\n", __func__);
+ ret = pm_runtime_resume_and_get(component->dev);
+ if (ret < 0) {
+ if (ret != -EACCES) {
+ dev_err(component->dev, "%s: failed to resume %d\n", __func__, ret);
+ return ret;
+ }
+
+ /* pm_runtime not enabled yet */
+ dev_dbg(component->dev, "%s: skipping jack init for now\n", __func__);
return 0;
}
rt700_jack_init(rt700);
+ pm_runtime_mark_last_busy(component->dev);
+ pm_runtime_put_autosuspend(component->dev);
+
return 0;
}
mutex_init(&rt700->disable_irq_lock);
+ INIT_DELAYED_WORK(&rt700->jack_detect_work,
+ rt700_jack_detect_handler);
+ INIT_DELAYED_WORK(&rt700->jack_btn_check_work,
+ rt700_btn_check_handler);
+
/*
* Mark hw_init to false
* HW init will be performed when device reports present
/* Finish Initial Settings, set power to D3 */
regmap_write(rt700->regmap, RT700_SET_AUDIO_POWER_STATE, AC_PWRST_D3);
- if (!rt700->first_hw_init) {
- INIT_DELAYED_WORK(&rt700->jack_detect_work,
- rt700_jack_detect_handler);
- INIT_DELAYED_WORK(&rt700->jack_btn_check_work,
- rt700_btn_check_handler);
- }
-
/*
* if set_jack callback occurred early than io_init,
* we set up the jack detection function now
#include <linux/mod_devicetable.h>
#include <linux/soundwire/sdw_registers.h>
#include <linux/module.h>
+#include <linux/pm_runtime.h>
#include "rt711-sdca.h"
#include "rt711-sdca-sdw.h"
{
struct rt711_sdca_priv *rt711 = dev_get_drvdata(&slave->dev);
- if (rt711 && rt711->hw_init) {
+ if (rt711->hw_init) {
cancel_delayed_work_sync(&rt711->jack_detect_work);
cancel_delayed_work_sync(&rt711->jack_btn_check_work);
}
+ if (rt711->first_hw_init)
+ pm_runtime_disable(&slave->dev);
+
+ mutex_destroy(&rt711->calibrate_mutex);
+ mutex_destroy(&rt711->disable_irq_lock);
+
return 0;
}
ret = regmap_write(regmap, addr, value);
if (ret < 0)
- dev_err(rt711->component->dev,
+ dev_err(&rt711->slave->dev,
"Failed to set private value: %06x <= %04x ret=%d\n",
addr, value, ret);
ret = regmap_read(regmap, addr, value);
if (ret < 0)
- dev_err(rt711->component->dev,
+ dev_err(&rt711->slave->dev,
"Failed to get private value: %06x => %04x ret=%d\n",
addr, *value, ret);
if (!rt711->hs_jack)
return;
- if (!rt711->component->card->instantiated)
+ if (!rt711->component->card || !rt711->component->card->instantiated)
return;
/* SDW_SCP_SDCA_INT_SDCA_0 is used for jack detection */
struct snd_soc_jack *hs_jack, void *data)
{
struct rt711_sdca_priv *rt711 = snd_soc_component_get_drvdata(component);
+ int ret;
rt711->hs_jack = hs_jack;
- if (!rt711->hw_init) {
- dev_dbg(&rt711->slave->dev,
- "%s hw_init not ready yet\n", __func__);
+ ret = pm_runtime_resume_and_get(component->dev);
+ if (ret < 0) {
+ if (ret != -EACCES) {
+ dev_err(component->dev, "%s: failed to resume %d\n", __func__, ret);
+ return ret;
+ }
+
+ /* pm_runtime not enabled yet */
+ dev_dbg(component->dev, "%s: skipping jack init for now\n", __func__);
return 0;
}
rt711_sdca_jack_init(rt711);
+
+ pm_runtime_mark_last_busy(component->dev);
+ pm_runtime_put_autosuspend(component->dev);
+
return 0;
}
return 0;
}
-static void rt711_sdca_remove(struct snd_soc_component *component)
-{
- struct rt711_sdca_priv *rt711 = snd_soc_component_get_drvdata(component);
-
- regcache_cache_only(rt711->regmap, true);
- regcache_cache_only(rt711->mbq_regmap, true);
-}
-
static const struct snd_soc_component_driver soc_sdca_dev_rt711 = {
.probe = rt711_sdca_probe,
.controls = rt711_sdca_snd_controls,
.dapm_routes = rt711_sdca_audio_map,
.num_dapm_routes = ARRAY_SIZE(rt711_sdca_audio_map),
.set_jack = rt711_sdca_set_jack_detect,
- .remove = rt711_sdca_remove,
.endianness = 1,
};
rt711->regmap = regmap;
rt711->mbq_regmap = mbq_regmap;
+ mutex_init(&rt711->calibrate_mutex);
mutex_init(&rt711->disable_irq_lock);
+ INIT_DELAYED_WORK(&rt711->jack_detect_work, rt711_sdca_jack_detect_handler);
+ INIT_DELAYED_WORK(&rt711->jack_btn_check_work, rt711_sdca_btn_check_handler);
+
/*
* Mark hw_init to false
* HW init will be performed when device reports present
rt711_sdca_index_update_bits(rt711, RT711_VENDOR_HDA_CTL,
RT711_PUSH_BTN_INT_CTL0, 0x20, 0x00);
- if (!rt711->first_hw_init) {
- INIT_DELAYED_WORK(&rt711->jack_detect_work,
- rt711_sdca_jack_detect_handler);
- INIT_DELAYED_WORK(&rt711->jack_btn_check_work,
- rt711_sdca_btn_check_handler);
- mutex_init(&rt711->calibrate_mutex);
- }
-
/* calibration */
ret = rt711_sdca_calibration(rt711);
if (ret < 0)
#include <linux/soundwire/sdw_type.h>
#include <linux/soundwire/sdw_registers.h>
#include <linux/module.h>
+#include <linux/pm_runtime.h>
#include <linux/regmap.h>
#include <sound/soc.h>
#include "rt711.h"
{
struct rt711_priv *rt711 = dev_get_drvdata(&slave->dev);
- if (rt711 && rt711->hw_init) {
+ if (rt711->hw_init) {
cancel_delayed_work_sync(&rt711->jack_detect_work);
cancel_delayed_work_sync(&rt711->jack_btn_check_work);
cancel_work_sync(&rt711->calibration_work);
}
+ if (rt711->first_hw_init)
+ pm_runtime_disable(&slave->dev);
+
+ mutex_destroy(&rt711->calibrate_mutex);
+ mutex_destroy(&rt711->disable_irq_lock);
+
return 0;
}
if (!rt711->hs_jack)
return;
- if (!rt711->component->card->instantiated)
+ if (!rt711->component->card || !rt711->component->card->instantiated)
return;
if (pm_runtime_status_suspended(rt711->slave->dev.parent)) {
struct snd_soc_jack *hs_jack, void *data)
{
struct rt711_priv *rt711 = snd_soc_component_get_drvdata(component);
+ int ret;
rt711->hs_jack = hs_jack;
- if (!rt711->hw_init) {
- dev_dbg(&rt711->slave->dev,
- "%s hw_init not ready yet\n", __func__);
+ ret = pm_runtime_resume_and_get(component->dev);
+ if (ret < 0) {
+ if (ret != -EACCES) {
+ dev_err(component->dev, "%s: failed to resume %d\n", __func__, ret);
+ return ret;
+ }
+
+ /* pm_runtime not enabled yet */
+ dev_dbg(component->dev, "%s: skipping jack init for now\n", __func__);
return 0;
}
rt711_jack_init(rt711);
+ pm_runtime_mark_last_busy(component->dev);
+ pm_runtime_put_autosuspend(component->dev);
+
return 0;
}
return 0;
}
-static void rt711_remove(struct snd_soc_component *component)
-{
- struct rt711_priv *rt711 = snd_soc_component_get_drvdata(component);
-
- regcache_cache_only(rt711->regmap, true);
-}
-
static const struct snd_soc_component_driver soc_codec_dev_rt711 = {
.probe = rt711_probe,
.set_bias_level = rt711_set_bias_level,
.dapm_routes = rt711_audio_map,
.num_dapm_routes = ARRAY_SIZE(rt711_audio_map),
.set_jack = rt711_set_jack_detect,
- .remove = rt711_remove,
.endianness = 1,
};
rt711->sdw_regmap = sdw_regmap;
rt711->regmap = regmap;
+ mutex_init(&rt711->calibrate_mutex);
mutex_init(&rt711->disable_irq_lock);
+ INIT_DELAYED_WORK(&rt711->jack_detect_work, rt711_jack_detect_handler);
+ INIT_DELAYED_WORK(&rt711->jack_btn_check_work, rt711_btn_check_handler);
+ INIT_WORK(&rt711->calibration_work, rt711_calibration_work);
+
/*
* Mark hw_init to false
* HW init will be performed when device reports present
if (rt711->first_hw_init)
rt711_calibration(rt711);
- else {
- INIT_DELAYED_WORK(&rt711->jack_detect_work,
- rt711_jack_detect_handler);
- INIT_DELAYED_WORK(&rt711->jack_btn_check_work,
- rt711_btn_check_handler);
- mutex_init(&rt711->calibrate_mutex);
- INIT_WORK(&rt711->calibration_work, rt711_calibration_work);
+ else
schedule_work(&rt711->calibration_work);
- }
/*
* if set_jack callback occurred early than io_init,
#include <linux/soundwire/sdw_type.h>
#include <linux/soundwire/sdw_registers.h>
#include <linux/module.h>
+#include <linux/pm_runtime.h>
#include <linux/regmap.h>
#include <sound/soc.h>
#include "rt715-sdca.h"
return rt715_sdca_init(&slave->dev, mbq_regmap, regmap, slave);
}
+static int rt715_sdca_sdw_remove(struct sdw_slave *slave)
+{
+ struct rt715_sdca_priv *rt715 = dev_get_drvdata(&slave->dev);
+
+ if (rt715->first_hw_init)
+ pm_runtime_disable(&slave->dev);
+
+ return 0;
+}
+
static const struct sdw_device_id rt715_sdca_id[] = {
SDW_SLAVE_ENTRY_EXT(0x025d, 0x715, 0x3, 0x1, 0),
SDW_SLAVE_ENTRY_EXT(0x025d, 0x714, 0x3, 0x1, 0),
.pm = &rt715_pm,
},
.probe = rt715_sdca_sdw_probe,
+ .remove = rt715_sdca_sdw_remove,
.ops = &rt715_sdca_slave_ops,
.id_table = rt715_sdca_id,
};
#include <linux/soundwire/sdw_type.h>
#include <linux/soundwire/sdw_registers.h>
#include <linux/module.h>
+#include <linux/pm_runtime.h>
#include <linux/of.h>
#include <linux/regmap.h>
#include <sound/soc.h>
return 0;
}
+static int rt715_sdw_remove(struct sdw_slave *slave)
+{
+ struct rt715_priv *rt715 = dev_get_drvdata(&slave->dev);
+
+ if (rt715->first_hw_init)
+ pm_runtime_disable(&slave->dev);
+
+ return 0;
+}
+
static const struct sdw_device_id rt715_id[] = {
SDW_SLAVE_ENTRY_EXT(0x025d, 0x714, 0x2, 0, 0),
SDW_SLAVE_ENTRY_EXT(0x025d, 0x715, 0x2, 0, 0),
.pm = &rt715_pm,
},
.probe = rt715_sdw_probe,
+ .remove = rt715_sdw_remove,
.ops = &rt715_slave_ops,
.id_table = rt715_id,
};
{
struct sgtl5000_priv *sgtl5000 = i2c_get_clientdata(client);
+ regmap_write(sgtl5000->regmap, SGTL5000_CHIP_DIG_POWER, SGTL5000_DIG_POWER_DEFAULT);
+ regmap_write(sgtl5000->regmap, SGTL5000_CHIP_ANA_POWER, SGTL5000_ANA_POWER_DEFAULT);
+
clk_disable_unprepare(sgtl5000->mclk);
regulator_bulk_disable(sgtl5000->num_supplies, sgtl5000->supplies);
regulator_bulk_free(sgtl5000->num_supplies, sgtl5000->supplies);
return 0;
}
+static void sgtl5000_i2c_shutdown(struct i2c_client *client)
+{
+ sgtl5000_i2c_remove(client);
+}
+
static const struct i2c_device_id sgtl5000_id[] = {
{"sgtl5000", 0},
{},
},
.probe_new = sgtl5000_i2c_probe,
.remove = sgtl5000_i2c_remove,
+ .shutdown = sgtl5000_i2c_shutdown,
.id_table = sgtl5000_id,
};
/*
* SGTL5000_CHIP_DIG_POWER
*/
+#define SGTL5000_DIG_POWER_DEFAULT 0x0000
#define SGTL5000_ADC_EN 0x0040
#define SGTL5000_DAC_EN 0x0020
#define SGTL5000_DAP_POWERUP 0x0010
gpiod_set_value_cansleep(tas2764->reset_gpio, 0);
msleep(20);
gpiod_set_value_cansleep(tas2764->reset_gpio, 1);
+ usleep_range(1000, 2000);
}
snd_soc_component_write(tas2764->component, TAS2764_SW_RST,
TAS2764_RST);
+ usleep_range(1000, 2000);
}
static int tas2764_set_bias_level(struct snd_soc_component *component,
struct tas2764_priv *tas2764 = snd_soc_component_get_drvdata(component);
int ret;
- if (tas2764->sdz_gpio)
+ if (tas2764->sdz_gpio) {
gpiod_set_value_cansleep(tas2764->sdz_gpio, 1);
+ usleep_range(1000, 2000);
+ }
ret = snd_soc_component_update_bits(component, TAS2764_PWR_CTRL,
TAS2764_PWR_CTRL_MASK,
};
static SOC_ENUM_SINGLE_DECL(
- tas2764_ASI1_src_enum, TAS2764_TDM_CFG2, 4, tas2764_ASI1_src);
+ tas2764_ASI1_src_enum, TAS2764_TDM_CFG2, TAS2764_TDM_CFG2_SCFG_SHIFT,
+ tas2764_ASI1_src);
static const struct snd_kcontrol_new tas2764_asi1_mux =
SOC_DAPM_ENUM("ASI1 Source", tas2764_ASI1_src_enum);
{
struct snd_soc_component *component = dai->component;
struct tas2764_priv *tas2764 = snd_soc_component_get_drvdata(component);
- u8 tdm_rx_start_slot = 0, asi_cfg_1 = 0;
- int iface;
+ u8 tdm_rx_start_slot = 0, asi_cfg_0 = 0, asi_cfg_1 = 0;
int ret;
switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
+ case SND_SOC_DAIFMT_NB_IF:
+ asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START;
+ fallthrough;
case SND_SOC_DAIFMT_NB_NF:
asi_cfg_1 = TAS2764_TDM_CFG1_RX_RISING;
break;
+ case SND_SOC_DAIFMT_IB_IF:
+ asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START;
+ fallthrough;
case SND_SOC_DAIFMT_IB_NF:
asi_cfg_1 = TAS2764_TDM_CFG1_RX_FALLING;
break;
- default:
- dev_err(tas2764->dev, "ASI format Inverse is not found\n");
- return -EINVAL;
}
ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG1,
switch (fmt & SND_SOC_DAIFMT_FORMAT_MASK) {
case SND_SOC_DAIFMT_I2S:
+ asi_cfg_0 ^= TAS2764_TDM_CFG0_FRAME_START;
+ fallthrough;
case SND_SOC_DAIFMT_DSP_A:
- iface = TAS2764_TDM_CFG2_SCFG_I2S;
tdm_rx_start_slot = 1;
break;
case SND_SOC_DAIFMT_DSP_B:
case SND_SOC_DAIFMT_LEFT_J:
- iface = TAS2764_TDM_CFG2_SCFG_LEFT_J;
tdm_rx_start_slot = 0;
break;
default:
return -EINVAL;
}
- ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG1,
- TAS2764_TDM_CFG1_MASK,
- (tdm_rx_start_slot << TAS2764_TDM_CFG1_51_SHIFT));
+ ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG0,
+ TAS2764_TDM_CFG0_FRAME_START,
+ asi_cfg_0);
if (ret < 0)
return ret;
- ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG2,
- TAS2764_TDM_CFG2_SCFG_MASK, iface);
+ ret = snd_soc_component_update_bits(component, TAS2764_TDM_CFG1,
+ TAS2764_TDM_CFG1_MASK,
+ (tdm_rx_start_slot << TAS2764_TDM_CFG1_51_SHIFT));
if (ret < 0)
return ret;
tas2764->component = component;
- if (tas2764->sdz_gpio)
+ if (tas2764->sdz_gpio) {
gpiod_set_value_cansleep(tas2764->sdz_gpio, 1);
+ usleep_range(1000, 2000);
+ }
tas2764_reset(tas2764);
}
static DECLARE_TLV_DB_SCALE(tas2764_digital_tlv, 1100, 50, 0);
-static DECLARE_TLV_DB_SCALE(tas2764_playback_volume, -10000, 50, 0);
+static DECLARE_TLV_DB_SCALE(tas2764_playback_volume, -10050, 50, 1);
static const struct snd_kcontrol_new tas2764_snd_controls[] = {
SOC_SINGLE_TLV("Speaker Volume", TAS2764_DVC, 0,
TAS2764_DVC_MAX, 1, tas2764_playback_volume),
- SOC_SINGLE_TLV("Amp Gain Volume", TAS2764_CHNL_0, 0, 0x14, 0,
+ SOC_SINGLE_TLV("Amp Gain Volume", TAS2764_CHNL_0, 1, 0x14, 0,
tas2764_digital_tlv),
};
{ TAS2764_SW_RST, 0x00 },
{ TAS2764_PWR_CTRL, 0x1a },
{ TAS2764_DVC, 0x00 },
- { TAS2764_CHNL_0, 0x00 },
+ { TAS2764_CHNL_0, 0x28 },
{ TAS2764_TDM_CFG0, 0x09 },
{ TAS2764_TDM_CFG1, 0x02 },
{ TAS2764_TDM_CFG2, 0x0a },
#define TAS2764_TDM_CFG0_MASK GENMASK(3, 1)
#define TAS2764_TDM_CFG0_44_1_48KHZ BIT(3)
#define TAS2764_TDM_CFG0_88_2_96KHZ (BIT(3) | BIT(1))
+#define TAS2764_TDM_CFG0_FRAME_START BIT(0)
/* TDM Configuration Reg1 */
#define TAS2764_TDM_CFG1 TAS2764_REG(0X0, 0x09)
#define TAS2764_TDM_CFG2_RXS_16BITS 0x0
#define TAS2764_TDM_CFG2_RXS_24BITS BIT(0)
#define TAS2764_TDM_CFG2_RXS_32BITS BIT(1)
-#define TAS2764_TDM_CFG2_SCFG_MASK GENMASK(5, 4)
-#define TAS2764_TDM_CFG2_SCFG_I2S 0x0
-#define TAS2764_TDM_CFG2_SCFG_LEFT_J BIT(4)
-#define TAS2764_TDM_CFG2_SCFG_RIGHT_J BIT(5)
+#define TAS2764_TDM_CFG2_SCFG_SHIFT 4
/* TDM Configuration Reg3 */
#define TAS2764_TDM_CFG3 TAS2764_REG(0X0, 0x0c)
bool micbias_vg;
unsigned int dai_fmt;
- unsigned int tdm_delay;
unsigned int slot_width;
};
{
struct snd_soc_component *component = codec_dai->component;
struct adcx140_priv *adcx140 = snd_soc_component_get_drvdata(component);
- unsigned int lsb;
- /* TDM based on DSP mode requires slots to be adjacent */
- lsb = __ffs(tx_mask);
- if ((lsb + 1) != __fls(tx_mask)) {
- dev_err(component->dev, "Invalid mask, slots must be adjacent\n");
+ /*
+ * The chip itself supports arbitrary masks, but the driver currently
+ * only supports adjacent slots beginning at the first slot.
+ */
+ if (tx_mask != GENMASK(__fls(tx_mask), 0)) {
+ dev_err(component->dev, "Only lower adjacent slots are supported\n");
return -EINVAL;
}
return -EINVAL;
}
- adcx140->tdm_delay = lsb;
adcx140->slot_width = slot_width;
return 0;
struct regulator_bulk_data supplies[WCD9335_MAX_SUPPLY];
unsigned int rx_port_value[WCD9335_RX_MAX];
- unsigned int tx_port_value;
+ unsigned int tx_port_value[WCD9335_TX_MAX];
int hph_l_gain;
int hph_r_gain;
u32 rx_bias_count;
struct snd_soc_dapm_update *update = NULL;
u32 port_id = w->shift;
+ if (wcd->rx_port_value[port_id] == ucontrol->value.enumerated.item[0])
+ return 0;
+
wcd->rx_port_value[port_id] = ucontrol->value.enumerated.item[0];
+ /* Remove channel from any list it's in before adding it to a new one */
+ list_del_init(&wcd->rx_chs[port_id].list);
+
switch (wcd->rx_port_value[port_id]) {
case 0:
- list_del_init(&wcd->rx_chs[port_id].list);
+ /* Channel already removed from lists. Nothing to do here */
break;
case 1:
list_add_tail(&wcd->rx_chs[port_id].list,
struct snd_soc_dapm_context *dapm = snd_soc_dapm_kcontrol_dapm(kc);
struct wcd9335_codec *wcd = dev_get_drvdata(dapm->dev);
+ struct snd_soc_dapm_widget *widget = snd_soc_dapm_kcontrol_widget(kc);
+ struct soc_mixer_control *mixer =
+ (struct soc_mixer_control *)kc->private_value;
+ int dai_id = widget->shift;
+ int port_id = mixer->shift;
- ucontrol->value.integer.value[0] = wcd->tx_port_value;
+ ucontrol->value.integer.value[0] = wcd->tx_port_value[port_id] == dai_id;
return 0;
}
case AIF2_CAP:
case AIF3_CAP:
/* only add to the list if value not set */
- if (enable && !(wcd->tx_port_value & BIT(port_id))) {
- wcd->tx_port_value |= BIT(port_id);
+ if (enable && wcd->tx_port_value[port_id] != dai_id) {
+ wcd->tx_port_value[port_id] = dai_id;
list_add_tail(&wcd->tx_chs[port_id].list,
&wcd->dai[dai_id].slim_ch_list);
- } else if (!enable && (wcd->tx_port_value & BIT(port_id))) {
- wcd->tx_port_value &= ~BIT(port_id);
+ } else if (!enable && wcd->tx_port_value[port_id] == dai_id) {
+ wcd->tx_port_value[port_id] = -1;
list_del_init(&wcd->tx_chs[port_id].list);
}
break;
struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
int path = e->shift_l;
+ if (wcd938x->tx_mode[path] == ucontrol->value.enumerated.item[0])
+ return 0;
+
wcd938x->tx_mode[path] = ucontrol->value.enumerated.item[0];
return 1;
struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component);
+ if (wcd938x->hph_mode == ucontrol->value.enumerated.item[0])
+ return 0;
+
wcd938x->hph_mode = ucontrol->value.enumerated.item[0];
return 1;
struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component);
+ if (wcd938x->ldoh == ucontrol->value.integer.value[0])
+ return 0;
+
wcd938x->ldoh = ucontrol->value.integer.value[0];
return 1;
struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
struct wcd938x_priv *wcd938x = snd_soc_component_get_drvdata(component);
+ if (wcd938x->bcs_dis == ucontrol->value.integer.value[0])
+ return 0;
+
wcd938x->bcs_dis = ucontrol->value.integer.value[0];
return 1;
{
struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
struct arizona *arizona = dev_get_drvdata(component->dev->parent);
+ uint16_t dac_comp_coeff = get_unaligned_be16(ucontrol->value.bytes.data);
+ int ret = 0;
mutex_lock(&arizona->dac_comp_lock);
- arizona->dac_comp_coeff = get_unaligned_be16(ucontrol->value.bytes.data);
+ if (arizona->dac_comp_coeff != dac_comp_coeff) {
+ arizona->dac_comp_coeff = dac_comp_coeff;
+ ret = 1;
+ }
mutex_unlock(&arizona->dac_comp_lock);
- return 0;
+ return ret;
}
static int wm5102_out_comp_switch_get(struct snd_kcontrol *kcontrol,
{
struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
struct arizona *arizona = dev_get_drvdata(component->dev->parent);
+ struct soc_mixer_control *mc = (struct soc_mixer_control *)kcontrol->private_value;
+ int ret = 0;
+
+ if (ucontrol->value.integer.value[0] > mc->max)
+ return -EINVAL;
mutex_lock(&arizona->dac_comp_lock);
- arizona->dac_comp_enabled = ucontrol->value.integer.value[0];
+ if (arizona->dac_comp_enabled != ucontrol->value.integer.value[0]) {
+ arizona->dac_comp_enabled = ucontrol->value.integer.value[0];
+ ret = 1;
+ }
mutex_unlock(&arizona->dac_comp_lock);
- return 0;
+ return ret;
}
static const char * const wm5102_osr_text[] = {
unsigned int rnew = (!!ucontrol->value.integer.value[1]) << mc->rshift;
unsigned int lold, rold;
unsigned int lena, rena;
+ bool change = false;
int ret;
snd_soc_dapm_mutex_lock(dapm);
goto err;
}
- ret = regmap_update_bits(arizona->regmap, ARIZONA_DRE_ENABLE,
- mask, lnew | rnew);
+ ret = regmap_update_bits_check(arizona->regmap, ARIZONA_DRE_ENABLE,
+ mask, lnew | rnew, &change);
if (ret) {
dev_err(arizona->dev, "Failed to set DRE: %d\n", ret);
goto err;
if (!rnew && rold)
wm5110_clear_pga_volume(arizona, mc->rshift);
+ if (change)
+ ret = 1;
+
err:
snd_soc_dapm_mutex_unlock(dapm);
#endif
static const struct dev_pm_ops wm8962_pm = {
+ SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend, pm_runtime_force_resume)
SET_RUNTIME_PM_OPS(wm8962_runtime_suspend, wm8962_runtime_resume, NULL)
};
struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
unsigned int mode_reg, mode_index;
unsigned int mux, inmode, src_val, mode_val;
+ int change, ret;
mux = ucontrol->value.enumerated.item[0];
if (mux > 1)
snd_soc_component_update_bits(component, mode_reg,
ARIZONA_IN1_MODE_MASK, mode_val);
- snd_soc_component_update_bits(component, e->reg,
- ARIZONA_IN1L_SRC_MASK |
- ARIZONA_IN1L_SRC_SE_MASK,
- src_val);
+ change = snd_soc_component_update_bits(component, e->reg,
+ ARIZONA_IN1L_SRC_MASK |
+ ARIZONA_IN1L_SRC_SE_MASK,
+ src_val);
- return snd_soc_dapm_mux_update_power(dapm, kcontrol,
- ucontrol->value.enumerated.item[0],
- e, NULL);
+ ret = snd_soc_dapm_mux_update_power(dapm, kcontrol,
+ ucontrol->value.enumerated.item[0],
+ e, NULL);
+ if (ret < 0) {
+ dev_err(arizona->dev, "Failed to update demux power state: %d\n", ret);
+ return ret;
+ }
+
+ return change;
}
static const char * const wm8998_inmux_texts[] = {
struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
struct soc_enum *e = (struct soc_enum *)kcontrol->private_value;
struct wm_adsp *dsp = snd_soc_component_get_drvdata(component);
- int ret = 0;
+ int ret = 1;
if (ucontrol->value.enumerated.item[0] == dsp[e->shift_l].fw)
return 0;
snd_soc_dapm_sync(dapm);
}
- return 0;
+ return 1;
}
EXPORT_SYMBOL_GPL(wm_adsp2_preloader_put);
{ .compatible = "fsl,imx8mm-sai", .data = &fsl_sai_imx8mm_data },
{ .compatible = "fsl,imx8mp-sai", .data = &fsl_sai_imx8mp_data },
{ .compatible = "fsl,imx8ulp-sai", .data = &fsl_sai_imx8ulp_data },
+ { .compatible = "fsl,imx8mn-sai", .data = &fsl_sai_imx8mp_data },
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, fsl_sai_ids);
ports@0 {
(X) (A) mcpu: port@0 { mcpu0_ep: endpoint { remote-endpoint = <&mcodec0_ep>; }; };
(y) port@1 { mcpu1_ep: endpoint { remote-endpoint = <&cpu1_ep>; }; };
-(y) port@1 { mcpu2_ep: endpoint { remote-endpoint = <&cpu2_ep>; }; };
+(y) port@2 { mcpu2_ep: endpoint { remote-endpoint = <&cpu2_ep>; }; };
};
ports@1 {
(X) port@0 { mcodec0_ep: endpoint { remote-endpoint = <&mcpu0_ep>; }; };
-(y) port@0 { mcodec1_ep: endpoint { remote-endpoint = <&codec1_ep>; }; };
-(y) port@1 { mcodec2_ep: endpoint { remote-endpoint = <&codec2_ep>; }; };
+(y) port@1 { mcodec1_ep: endpoint { remote-endpoint = <&codec1_ep>; }; };
+(y) port@2 { mcodec2_ep: endpoint { remote-endpoint = <&codec2_ep>; }; };
};
};
};
static int
avs_parse_uuid_token(struct snd_soc_component *comp, void *elem, void *object, u32 offset)
{
- struct snd_soc_tplg_vendor_value_elem *tuple = elem;
+ struct snd_soc_tplg_vendor_uuid_elem *tuple = elem;
guid_t *val = (guid_t *)((u8 *)object + offset);
- guid_copy((guid_t *)val, (const guid_t *)&tuple->value);
+ guid_copy((guid_t *)val, (const guid_t *)&tuple->uuid);
return 0;
}
priv->spkvdd_en_gpio = gpiod_get(codec_dev, "wlf,spkvdd-ena", GPIOD_OUT_LOW);
put_device(codec_dev);
- if (IS_ERR(priv->spkvdd_en_gpio))
- return dev_err_probe(dev, PTR_ERR(priv->spkvdd_en_gpio), "getting spkvdd-GPIO\n");
+ if (IS_ERR(priv->spkvdd_en_gpio)) {
+ ret = PTR_ERR(priv->spkvdd_en_gpio);
+ /*
+ * The spkvdd gpio-lookup is registered by: drivers/mfd/arizona-spi.c,
+ * so -ENOENT means that arizona-spi hasn't probed yet.
+ */
+ if (ret == -ENOENT)
+ ret = -EPROBE_DEFER;
+
+ return dev_err_probe(dev, ret, "getting spkvdd-GPIO\n");
+ }
/* override platform name, if required */
byt_wm5102_card.dev = dev;
},
};
+/*
+ * Mapping between ACPI instance id and speaker position.
+ *
+ * Four speakers:
+ * 0: Tweeter left, 1: Woofer left
+ * 2: Tweeter right, 3: Woofer right
+ */
static struct snd_soc_codec_conf cs35l41_codec_conf[] = {
{
.dlc = COMP_CODEC_CONF(CS35L41_DEV0_NAME),
- .name_prefix = "WL",
+ .name_prefix = "TL",
},
{
.dlc = COMP_CODEC_CONF(CS35L41_DEV1_NAME),
- .name_prefix = "WR",
+ .name_prefix = "WL",
},
{
.dlc = COMP_CODEC_CONF(CS35L41_DEV2_NAME),
- .name_prefix = "TL",
+ .name_prefix = "TR",
},
{
.dlc = COMP_CODEC_CONF(CS35L41_DEV3_NAME),
- .name_prefix = "TR",
+ .name_prefix = "WR",
},
};
return ret;
}
+/*
+ * Channel map:
+ *
+ * TL/WL: ASPRX1 on slot 0, ASPRX2 on slot 1 (default)
+ * TR/WR: ASPRX1 on slot 1, ASPRX2 on slot 0
+ */
+static const struct {
+ unsigned int rx[2];
+} cs35l41_channel_map[] = {
+ {.rx = {0, 1}}, /* TL */
+ {.rx = {0, 1}}, /* WL */
+ {.rx = {1, 0}}, /* TR */
+ {.rx = {1, 0}}, /* WR */
+};
+
static int cs35l41_hw_params(struct snd_pcm_substream *substream,
struct snd_pcm_hw_params *params)
{
ret);
return ret;
}
+
+ /* setup channel map */
+ ret = snd_soc_dai_set_channel_map(codec_dai, 0, NULL,
+ ARRAY_SIZE(cs35l41_channel_map[i].rx),
+ (unsigned int *)cs35l41_channel_map[i].rx);
+ if (ret < 0) {
+ dev_err(codec_dai->dev, "fail to set channel map, ret %d\n",
+ ret);
+ return ret;
+ }
}
return 0;
static int is_legacy_cpu;
-static struct snd_soc_jack sof_hdmi[3];
-
struct sof_hdmi_pcm {
struct list_head head;
struct snd_soc_dai *codec_dai;
+ struct snd_soc_jack hdmi_jack;
int device;
};
char jack_name[NAME_SIZE];
struct sof_hdmi_pcm *pcm;
int err;
- int i = 0;
/* HDMI is not supported by SOF on Baytrail/CherryTrail */
if (is_legacy_cpu || !ctx->idisp_codec)
snprintf(jack_name, sizeof(jack_name),
"HDMI/DP, pcm=%d Jack", pcm->device);
err = snd_soc_card_jack_new(card, jack_name,
- SND_JACK_AVOUT, &sof_hdmi[i]);
+ SND_JACK_AVOUT, &pcm->hdmi_jack);
if (err)
return err;
err = hdac_hdmi_jack_init(pcm->codec_dai, pcm->device,
- &sof_hdmi[i]);
+ &pcm->hdmi_jack);
if (err < 0)
return err;
-
- i++;
}
if (sof_rt5682_quirk & SOF_MAX98373_SPEAKER_AMP_PRESENT) {
.late_probe = sof_sdw_card_late_probe,
};
+static void mc_dailink_exit_loop(struct snd_soc_card *card)
+{
+ struct snd_soc_dai_link *link;
+ int ret;
+ int i, j;
+
+ for (i = 0; i < ARRAY_SIZE(codec_info_list); i++) {
+ if (!codec_info_list[i].exit)
+ continue;
+ /*
+ * We don't need to call .exit function if there is no matched
+ * dai link found.
+ */
+ for_each_card_prelinks(card, j, link) {
+ if (!strcmp(link->codecs[0].dai_name,
+ codec_info_list[i].dai_name)) {
+ ret = codec_info_list[i].exit(card, link);
+ if (ret)
+ dev_warn(card->dev,
+ "codec exit failed %d\n",
+ ret);
+ break;
+ }
+ }
+ }
+}
+
static int mc_probe(struct platform_device *pdev)
{
struct snd_soc_card *card = &card_sof_sdw;
ret = devm_snd_soc_register_card(&pdev->dev, card);
if (ret) {
dev_err(card->dev, "snd_soc_register_card failed %d\n", ret);
+ mc_dailink_exit_loop(card);
return ret;
}
static int mc_remove(struct platform_device *pdev)
{
struct snd_soc_card *card = platform_get_drvdata(pdev);
- struct snd_soc_dai_link *link;
- int ret;
- int i, j;
- for (i = 0; i < ARRAY_SIZE(codec_info_list); i++) {
- if (!codec_info_list[i].exit)
- continue;
- /*
- * We don't need to call .exit function if there is no matched
- * dai link found.
- */
- for_each_card_prelinks(card, j, link) {
- if (!strcmp(link->codecs[0].dai_name,
- codec_info_list[i].dai_name)) {
- ret = codec_info_list[i].exit(card, link);
- if (ret)
- dev_warn(&pdev->dev,
- "codec exit failed %d\n",
- ret);
- break;
- }
- }
- }
+ mc_dailink_exit_loop(card);
return 0;
}
struct nhlt_fmt_cfg *fmt_cfg;
struct wav_fmt_ext *wav_fmt;
unsigned long rate;
- bool present = false;
int rate_index = 0;
u16 channels, bps;
u8 clk_src;
if (fmt->fmt_count == 0)
return;
+ fmt_cfg = (struct nhlt_fmt_cfg *)fmt->fmt_config;
for (i = 0; i < fmt->fmt_count; i++) {
- fmt_cfg = &fmt->fmt_config[i];
- wav_fmt = &fmt_cfg->fmt_ext;
+ struct nhlt_fmt_cfg *saved_fmt_cfg = fmt_cfg;
+ bool present = false;
+
+ wav_fmt = &saved_fmt_cfg->fmt_ext;
channels = wav_fmt->fmt.channels;
bps = wav_fmt->fmt.bits_per_sample;
* derive the rate.
*/
for (j = i; j < fmt->fmt_count; j++) {
- fmt_cfg = &fmt->fmt_config[j];
- wav_fmt = &fmt_cfg->fmt_ext;
+ struct nhlt_fmt_cfg *tmp_fmt_cfg = fmt_cfg;
+
+ wav_fmt = &tmp_fmt_cfg->fmt_ext;
if ((fs == wav_fmt->fmt.samples_per_sec) &&
- (bps == wav_fmt->fmt.bits_per_sample))
+ (bps == wav_fmt->fmt.bits_per_sample)) {
channels = max_t(u16, channels,
wav_fmt->fmt.channels);
+ saved_fmt_cfg = tmp_fmt_cfg;
+ }
+ /* Move to the next nhlt_fmt_cfg */
+ tmp_fmt_cfg = (struct nhlt_fmt_cfg *)(tmp_fmt_cfg->config.caps +
+ tmp_fmt_cfg->config.size);
}
rate = channels * bps * fs;
/* Fill rate and parent for sclk/sclkfs */
if (!present) {
+ struct nhlt_fmt_cfg *first_fmt_cfg;
+
+ first_fmt_cfg = (struct nhlt_fmt_cfg *)fmt->fmt_config;
i2s_config_ext = (struct skl_i2s_config_blob_ext *)
- fmt->fmt_config[0].config.caps;
+ first_fmt_cfg->config.caps;
/* MCLK Divider Source Select */
if (is_legacy_blob(i2s_config_ext->hdr.sig)) {
parent = skl_get_parent_clk(clk_src);
+ /* Move to the next nhlt_fmt_cfg */
+ fmt_cfg = (struct nhlt_fmt_cfg *)(fmt_cfg->config.caps +
+ fmt_cfg->config.size);
/*
* Do not copy the config data if there is no parent
* clock available for this clock source select
continue;
sclk[id].rate_cfg[rate_index].rate = rate;
- sclk[id].rate_cfg[rate_index].config = fmt_cfg;
+ sclk[id].rate_cfg[rate_index].config = saved_fmt_cfg;
sclkfs[id].rate_cfg[rate_index].rate = rate;
- sclkfs[id].rate_cfg[rate_index].config = fmt_cfg;
+ sclkfs[id].rate_cfg[rate_index].config = saved_fmt_cfg;
sclk[id].parent_name = parent->name;
sclkfs[id].parent_name = parent->name;
{
struct skl_i2s_config_blob_ext *i2s_config_ext;
struct skl_i2s_config_blob_legacy *i2s_config;
- struct nhlt_specific_cfg *fmt_cfg;
+ struct nhlt_fmt_cfg *fmt_cfg;
struct skl_clk_parent_src *parent;
u32 clkdiv, div_ratio;
u8 clk_src;
- fmt_cfg = &fmt->fmt_config[0].config;
- i2s_config_ext = (struct skl_i2s_config_blob_ext *)fmt_cfg->caps;
+ fmt_cfg = (struct nhlt_fmt_cfg *)fmt->fmt_config;
+ i2s_config_ext = (struct skl_i2s_config_blob_ext *)fmt_cfg->config.caps;
/* MCLK Divider Source Select and divider */
if (is_legacy_blob(i2s_config_ext->hdr.sig)) {
return;
mclk[id].rate_cfg[0].rate = parent->rate/div_ratio;
- mclk[id].rate_cfg[0].config = &fmt->fmt_config[0];
+ mclk[id].rate_cfg[0].config = fmt_cfg;
mclk[id].parent_name = parent->name;
}
struct snd_pcm_runtime *runtime = substream->runtime;
unsigned long size, offset;
- vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
size = vma->vm_end - vma->vm_start;
offset = vma->vm_pgoff << PAGE_SHIFT;
return io_remap_pfn_range(vma, vma->vm_start,
cfg.num_channels = runtime->channels;
cfg.bit_width = prtd->bits_per_sample;
+ if (prtd->state) {
+ /* clear the previous setup if any */
+ q6apm_graph_stop(prtd->graph);
+ q6apm_unmap_memory_regions(prtd->graph, substream->stream);
+ }
+
prtd->pcm_count = snd_pcm_lib_period_bytes(substream);
prtd->pos = 0;
/* rate and channels are sent to audio driver */
id = idr_alloc(&apm->graph_idr, graph, graph_id, graph_id + 1, GFP_KERNEL);
if (id < 0) {
dev_err(apm->dev, "Unable to allocate graph id (%d)\n", graph_id);
+ kfree(graph->graph);
kfree(graph);
mutex_unlock(&apm->lock);
return ERR_PTR(id);
snd_soc_dapm_new_control_unlocked(struct snd_soc_dapm_context *dapm,
const struct snd_soc_dapm_widget *widget);
+static unsigned int soc_dapm_read(struct snd_soc_dapm_context *dapm, int reg);
+
/* dapm power sequences - make this per codec in the future */
static int dapm_up_seq[] = {
[snd_soc_dapm_pre] = 1,
snd_soc_dapm_add_path(widget->dapm, data->widget,
widget, NULL, NULL);
+ } else if (e->reg != SND_SOC_NOPM) {
+ data->value = soc_dapm_read(widget->dapm, e->reg) &
+ (e->mask << e->shift_l);
}
break;
default:
return -EINVAL;
if (mc->platform_max && tmp > mc->platform_max)
return -EINVAL;
- if (tmp > mc->max - mc->min + 1)
+ if (tmp > mc->max - mc->min)
return -EINVAL;
if (invert)
return -EINVAL;
if (mc->platform_max && tmp > mc->platform_max)
return -EINVAL;
- if (tmp > mc->max - mc->min + 1)
+ if (tmp > mc->max - mc->min)
return -EINVAL;
if (invert)
* Power Management.
*/
-static int hda_dsp_core_power_up(struct snd_sof_dev *sdev, unsigned int core_mask)
+int hda_dsp_core_power_up(struct snd_sof_dev *sdev, unsigned int core_mask)
{
+ struct sof_intel_hda_dev *hda = sdev->pdata->hw_pdata;
+ const struct sof_intel_dsp_desc *chip = hda->desc;
unsigned int cpa;
u32 adspcs;
int ret;
+ /* restrict core_mask to host managed cores mask */
+ core_mask &= chip->host_managed_cores_mask;
+ /* return if core_mask is not valid */
+ if (!core_mask)
+ return 0;
+
/* update bits */
snd_sof_dsp_update_bits(sdev, HDA_DSP_BAR, HDA_DSP_REG_ADSPCS,
HDA_DSP_ADSPCS_SPA_MASK(core_mask),
}
/*
- * first boot sequence has some extra steps. core 0 waits for power
- * status on core 1, so power up core 1 also momentarily, keep it in
- * reset/stall and then turn it off
+ * first boot sequence has some extra steps.
+ * power on all host managed cores and only unstall/run the boot core to boot the
+ * DSP then turn off all non boot cores (if any) is powered on.
*/
static int cl_dsp_init(struct snd_sof_dev *sdev, int stream_tag, bool imr_boot)
{
int ret;
/* step 1: power up corex */
- ret = hda_dsp_enable_core(sdev, chip->host_managed_cores_mask);
+ ret = hda_dsp_core_power_up(sdev, chip->host_managed_cores_mask);
if (ret < 0) {
if (hda->boot_iteration == HDA_FW_BOOT_ATTEMPTS)
dev_err(sdev->dev, "error: dsp core 0/1 power up failed\n");
snd_sof_dsp_write(sdev, HDA_DSP_BAR, chip->ipc_req, ipc_hdr);
/* step 3: unset core 0 reset state & unstall/run core 0 */
- ret = hda_dsp_core_run(sdev, BIT(0));
+ ret = hda_dsp_core_run(sdev, chip->init_core_mask);
if (ret < 0) {
if (hda->boot_iteration == HDA_FW_BOOT_ATTEMPTS)
dev_err(sdev->dev,
struct snd_dma_buffer dmab;
int ret, ret1, i;
- if (hda->imrboot_supported && !sdev->first_boot) {
+ if (sdev->system_suspend_target < SOF_SUSPEND_S4 &&
+ hda->imrboot_supported && !sdev->first_boot) {
dev_dbg(sdev->dev, "IMR restore supported, booting from IMR directly\n");
hda->boot_iteration = 0;
ret = hda_dsp_boot_imr(sdev);
goto found;
}
- switch (sof_hda_position_quirk) {
- case SOF_HDA_POSITION_QUIRK_USE_SKYLAKE_LEGACY:
- /*
- * This legacy code, inherited from the Skylake driver,
- * mixes DPIB registers and DPIB DDR updates and
- * does not seem to follow any known hardware recommendations.
- * It's not clear e.g. why there is a different flow
- * for capture and playback, the only information that matters is
- * what traffic class is used, and on all SOF-enabled platforms
- * only VC0 is supported so the work-around was likely not necessary
- * and quite possibly wrong.
- */
-
- /* DPIB/posbuf position mode:
- * For Playback, Use DPIB register from HDA space which
- * reflects the actual data transferred.
- * For Capture, Use the position buffer for pointer, as DPIB
- * is not accurate enough, its update may be completed
- * earlier than the data written to DDR.
- */
- if (substream->stream == SNDRV_PCM_STREAM_PLAYBACK) {
- pos = snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR,
- AZX_REG_VS_SDXDPIB_XBASE +
- (AZX_REG_VS_SDXDPIB_XINTERVAL *
- hstream->index));
- } else {
- /*
- * For capture stream, we need more workaround to fix the
- * position incorrect issue:
- *
- * 1. Wait at least 20us before reading position buffer after
- * the interrupt generated(IOC), to make sure position update
- * happens on frame boundary i.e. 20.833uSec for 48KHz.
- * 2. Perform a dummy Read to DPIB register to flush DMA
- * position value.
- * 3. Read the DMA Position from posbuf. Now the readback
- * value should be >= period boundary.
- */
- usleep_range(20, 21);
- snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR,
- AZX_REG_VS_SDXDPIB_XBASE +
- (AZX_REG_VS_SDXDPIB_XINTERVAL *
- hstream->index));
- pos = snd_hdac_stream_get_pos_posbuf(hstream);
- }
- break;
- case SOF_HDA_POSITION_QUIRK_USE_DPIB_REGISTERS:
- /*
- * In case VC1 traffic is disabled this is the recommended option
- */
- pos = snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR,
- AZX_REG_VS_SDXDPIB_XBASE +
- (AZX_REG_VS_SDXDPIB_XINTERVAL *
- hstream->index));
- break;
- case SOF_HDA_POSITION_QUIRK_USE_DPIB_DDR_UPDATE:
- /*
- * This is the recommended option when VC1 is enabled.
- * While this isn't needed for SOF platforms it's added for
- * consistency and debug.
- */
- pos = snd_hdac_stream_get_pos_posbuf(hstream);
- break;
- default:
- dev_err_once(sdev->dev, "hda_position_quirk value %d not supported\n",
- sof_hda_position_quirk);
- pos = 0;
- break;
- }
-
- if (pos >= hstream->bufsize)
- pos = 0;
-
+ pos = hda_dsp_stream_get_position(hstream, substream->stream, true);
found:
pos = bytes_to_frames(substream->runtime, pos);
}
static void
-hda_dsp_set_bytes_transferred(struct hdac_stream *hstream, u64 buffer_size)
+hda_dsp_compr_bytes_transferred(struct hdac_stream *hstream, int direction)
{
+ u64 buffer_size = hstream->bufsize;
u64 prev_pos, pos, num_bytes;
div64_u64_rem(hstream->curr_pos, buffer_size, &prev_pos);
- pos = snd_hdac_stream_get_pos_posbuf(hstream);
+ pos = hda_dsp_stream_get_position(hstream, direction, false);
if (pos < prev_pos)
num_bytes = (buffer_size - prev_pos) + pos;
if (s->substream && sof_hda->no_ipc_position) {
snd_sof_pcm_period_elapsed(s->substream);
} else if (s->cstream) {
- hda_dsp_set_bytes_transferred(s,
- s->cstream->runtime->buffer_size);
+ hda_dsp_compr_bytes_transferred(s, s->cstream->direction);
snd_compr_fragment_elapsed(s->cstream);
}
}
devm_kfree(sdev->dev, hda_stream);
}
}
+
+snd_pcm_uframes_t hda_dsp_stream_get_position(struct hdac_stream *hstream,
+ int direction, bool can_sleep)
+{
+ struct hdac_ext_stream *hext_stream = stream_to_hdac_ext_stream(hstream);
+ struct sof_intel_hda_stream *hda_stream = hstream_to_sof_hda_stream(hext_stream);
+ struct snd_sof_dev *sdev = hda_stream->sdev;
+ snd_pcm_uframes_t pos;
+
+ switch (sof_hda_position_quirk) {
+ case SOF_HDA_POSITION_QUIRK_USE_SKYLAKE_LEGACY:
+ /*
+ * This legacy code, inherited from the Skylake driver,
+ * mixes DPIB registers and DPIB DDR updates and
+ * does not seem to follow any known hardware recommendations.
+ * It's not clear e.g. why there is a different flow
+ * for capture and playback, the only information that matters is
+ * what traffic class is used, and on all SOF-enabled platforms
+ * only VC0 is supported so the work-around was likely not necessary
+ * and quite possibly wrong.
+ */
+
+ /* DPIB/posbuf position mode:
+ * For Playback, Use DPIB register from HDA space which
+ * reflects the actual data transferred.
+ * For Capture, Use the position buffer for pointer, as DPIB
+ * is not accurate enough, its update may be completed
+ * earlier than the data written to DDR.
+ */
+ if (direction == SNDRV_PCM_STREAM_PLAYBACK) {
+ pos = snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR,
+ AZX_REG_VS_SDXDPIB_XBASE +
+ (AZX_REG_VS_SDXDPIB_XINTERVAL *
+ hstream->index));
+ } else {
+ /*
+ * For capture stream, we need more workaround to fix the
+ * position incorrect issue:
+ *
+ * 1. Wait at least 20us before reading position buffer after
+ * the interrupt generated(IOC), to make sure position update
+ * happens on frame boundary i.e. 20.833uSec for 48KHz.
+ * 2. Perform a dummy Read to DPIB register to flush DMA
+ * position value.
+ * 3. Read the DMA Position from posbuf. Now the readback
+ * value should be >= period boundary.
+ */
+ if (can_sleep)
+ usleep_range(20, 21);
+
+ snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR,
+ AZX_REG_VS_SDXDPIB_XBASE +
+ (AZX_REG_VS_SDXDPIB_XINTERVAL *
+ hstream->index));
+ pos = snd_hdac_stream_get_pos_posbuf(hstream);
+ }
+ break;
+ case SOF_HDA_POSITION_QUIRK_USE_DPIB_REGISTERS:
+ /*
+ * In case VC1 traffic is disabled this is the recommended option
+ */
+ pos = snd_sof_dsp_read(sdev, HDA_DSP_HDA_BAR,
+ AZX_REG_VS_SDXDPIB_XBASE +
+ (AZX_REG_VS_SDXDPIB_XINTERVAL *
+ hstream->index));
+ break;
+ case SOF_HDA_POSITION_QUIRK_USE_DPIB_DDR_UPDATE:
+ /*
+ * This is the recommended option when VC1 is enabled.
+ * While this isn't needed for SOF platforms it's added for
+ * consistency and debug.
+ */
+ pos = snd_hdac_stream_get_pos_posbuf(hstream);
+ break;
+ default:
+ dev_err_once(sdev->dev, "hda_position_quirk value %d not supported\n",
+ sof_hda_position_quirk);
+ pos = 0;
+ break;
+ }
+
+ if (pos >= hstream->bufsize)
+ pos = 0;
+
+ return pos;
+}
*/
int hda_dsp_probe(struct snd_sof_dev *sdev);
int hda_dsp_remove(struct snd_sof_dev *sdev);
+int hda_dsp_core_power_up(struct snd_sof_dev *sdev, unsigned int core_mask);
int hda_dsp_core_run(struct snd_sof_dev *sdev, unsigned int core_mask);
int hda_dsp_enable_core(struct snd_sof_dev *sdev, unsigned int core_mask);
int hda_dsp_core_reset_power_down(struct snd_sof_dev *sdev,
bool hda_dsp_check_ipc_irq(struct snd_sof_dev *sdev);
bool hda_dsp_check_stream_irq(struct snd_sof_dev *sdev);
+snd_pcm_uframes_t hda_dsp_stream_get_position(struct hdac_stream *hstream,
+ int direction, bool can_sleep);
+
struct hdac_ext_stream *
hda_dsp_stream_get(struct snd_sof_dev *sdev, int direction, u32 flags);
int hda_dsp_stream_put(struct snd_sof_dev *sdev, int direction, int stream_tag);
struct sof_ipc_ctrl_data *cdata;
int ret;
- scontrol->ipc_control_data = kzalloc(scontrol->max_size, GFP_KERNEL);
- if (!scontrol->ipc_control_data)
- return -ENOMEM;
-
- if (scontrol->max_size < sizeof(*cdata) ||
- scontrol->max_size < sizeof(struct sof_abi_hdr)) {
- ret = -EINVAL;
- goto err;
+ if (scontrol->max_size < (sizeof(*cdata) + sizeof(struct sof_abi_hdr))) {
+ dev_err(sdev->dev, "%s: insufficient size for a bytes control: %zu.\n",
+ __func__, scontrol->max_size);
+ return -EINVAL;
}
- /* init the get/put bytes data */
if (scontrol->priv_size > scontrol->max_size - sizeof(*cdata)) {
- dev_err(sdev->dev, "err: bytes data size %zu exceeds max %zu.\n",
+ dev_err(sdev->dev,
+ "%s: bytes data size %zu exceeds max %zu.\n", __func__,
scontrol->priv_size, scontrol->max_size - sizeof(*cdata));
- ret = -EINVAL;
- goto err;
+ return -EINVAL;
}
+ scontrol->ipc_control_data = kzalloc(scontrol->max_size, GFP_KERNEL);
+ if (!scontrol->ipc_control_data)
+ return -ENOMEM;
+
scontrol->size = sizeof(struct sof_ipc_ctrl_data) + scontrol->priv_size;
cdata = scontrol->ipc_control_data;
PLATFORM_DEVID_NONE,
pdev, sizeof(*pdev));
if (IS_ERR(priv->ipc_dev)) {
- ret = IS_ERR(priv->ipc_dev);
+ ret = PTR_ERR(priv->ipc_dev);
dev_err(sdev->dev, "failed to create mtk-adsp-ipc device\n");
goto err_adsp_off;
}
u32 target_dsp_state;
switch (sdev->system_suspend_target) {
+ case SOF_SUSPEND_S5:
+ case SOF_SUSPEND_S4:
+ /* DSP should be in D3 if the system is suspending to S3+ */
case SOF_SUSPEND_S3:
/* DSP should be in D3 if the system is suspending to S3 */
target_dsp_state = SOF_DSP_PM_D3;
return 0;
#if defined(CONFIG_ACPI)
- if (acpi_target_system_state() == ACPI_STATE_S0)
+ switch (acpi_target_system_state()) {
+ case ACPI_STATE_S0:
sdev->system_suspend_target = SOF_SUSPEND_S0IX;
+ break;
+ case ACPI_STATE_S1:
+ case ACPI_STATE_S2:
+ case ACPI_STATE_S3:
+ sdev->system_suspend_target = SOF_SUSPEND_S3;
+ break;
+ case ACPI_STATE_S4:
+ sdev->system_suspend_target = SOF_SUSPEND_S4;
+ break;
+ case ACPI_STATE_S5:
+ sdev->system_suspend_target = SOF_SUSPEND_S5;
+ break;
+ default:
+ break;
+ }
#endif
return 0;
p->walking = false;
if (ret < 0) {
/* unprepare the source widget */
- if (!widget_ops[widget->id].ipc_unprepare && swidget->prepared) {
+ if (widget_ops[widget->id].ipc_unprepare && swidget->prepared) {
widget_ops[widget->id].ipc_unprepare(swidget);
swidget->prepared = false;
}
{
struct sof_client_dev *cdev = file->private_data;
struct sof_msg_inject_priv *priv = cdev->data;
- size_t size;
+ ssize_t size;
int ret;
if (*ppos)
size = simple_write_to_buffer(priv->tx_buffer, priv->max_msg_size,
ppos, buffer, count);
+ if (size < 0)
+ return size;
if (size != count)
- return size > 0 ? -EFAULT : size;
+ return -EFAULT;
memset(priv->rx_buffer, 0, priv->max_msg_size);
struct sof_client_dev *cdev = file->private_data;
struct sof_msg_inject_priv *priv = cdev->data;
struct sof_ipc4_msg *ipc4_msg = priv->tx_buffer;
- size_t size;
+ ssize_t size;
int ret;
if (*ppos)
size = simple_write_to_buffer(&ipc4_msg->header_u64,
sizeof(ipc4_msg->header_u64),
ppos, buffer, count);
+ if (size < 0)
+ return size;
if (size != sizeof(ipc4_msg->header_u64))
- return size > 0 ? -EFAULT : size;
+ return -EFAULT;
count -= size;
- if (!count) {
- /* Copy the payload */
- size = simple_write_to_buffer(ipc4_msg->data_ptr,
- priv->max_msg_size, ppos, buffer,
- count);
- if (size != count)
- return size > 0 ? -EFAULT : size;
- }
+ /* Copy the payload */
+ size = simple_write_to_buffer(ipc4_msg->data_ptr,
+ priv->max_msg_size, ppos, buffer,
+ count);
+ if (size < 0)
+ return size;
+ if (size != count)
+ return -EFAULT;
ipc4_msg->data_size = count;
SOF_SUSPEND_NONE = 0,
SOF_SUSPEND_S0IX,
SOF_SUSPEND_S3,
+ SOF_SUSPEND_S4,
+ SOF_SUSPEND_S5,
};
enum sof_dfsentry_type {
/* Sidetone specific API */
int omap_mcbsp_st_init(struct platform_device *pdev);
-void omap_mcbsp_st_cleanup(struct platform_device *pdev);
-
int omap_mcbsp_st_start(struct omap_mcbsp *mcbsp);
int omap_mcbsp_st_stop(struct omap_mcbsp *mcbsp);
if (!st_data)
return -ENOMEM;
- st_data->mcbsp_iclk = clk_get(mcbsp->dev, "ick");
+ st_data->mcbsp_iclk = devm_clk_get(mcbsp->dev, "ick");
if (IS_ERR(st_data->mcbsp_iclk)) {
dev_warn(mcbsp->dev,
"Failed to get ick, sidetone might be broken\n");
if (!st_data->io_base_st)
return -ENOMEM;
- ret = sysfs_create_group(&mcbsp->dev->kobj, &sidetone_attr_group);
+ ret = devm_device_add_group(mcbsp->dev, &sidetone_attr_group);
if (ret)
return ret;
return 0;
}
-void omap_mcbsp_st_cleanup(struct platform_device *pdev)
-{
- struct omap_mcbsp *mcbsp = platform_get_drvdata(pdev);
-
- if (mcbsp->st_data) {
- sysfs_remove_group(&mcbsp->dev->kobj, &sidetone_attr_group);
- clk_put(mcbsp->st_data->mcbsp_iclk);
- }
-}
-
static int omap_mcbsp_st_info_volsw(struct snd_kcontrol *kcontrol,
struct snd_ctl_elem_info *uinfo)
{
mcbsp->max_tx_thres = max_thres(mcbsp) - 0x10;
mcbsp->max_rx_thres = max_thres(mcbsp) - 0x10;
- ret = sysfs_create_group(&mcbsp->dev->kobj,
- &additional_attr_group);
+ ret = devm_device_add_group(mcbsp->dev, &additional_attr_group);
if (ret) {
dev_err(mcbsp->dev,
"Unable to create additional controls\n");
}
}
- ret = omap_mcbsp_st_init(pdev);
- if (ret)
- goto err_st;
-
- return 0;
-
-err_st:
- if (mcbsp->pdata->buffer_size)
- sysfs_remove_group(&mcbsp->dev->kobj, &additional_attr_group);
- return ret;
+ return omap_mcbsp_st_init(pdev);
}
/*
if (cpu_latency_qos_request_active(&mcbsp->pm_qos_req))
cpu_latency_qos_remove_request(&mcbsp->pm_qos_req);
- if (mcbsp->pdata->buffer_size)
- sysfs_remove_group(&mcbsp->dev->kobj, &additional_attr_group);
-
- omap_mcbsp_st_cleanup(pdev);
-
return 0;
}
}
} else {
/* skip channels with no compressor active */
- while (!store->comp_store->val[
+ while (store->comp_index <= SND_US16X08_MAX_CHANNELS
+ && !store->comp_store->val[
COMP_STORE_IDX(SND_US16X08_ID_COMP_SWITCH)]
- [store->comp_index - 1]
- && store->comp_index <= SND_US16X08_MAX_CHANNELS) {
+ [store->comp_index - 1]) {
store->comp_index++;
}
ret = store->comp_index++;
bool is_playback;
int err;
+ if (fmt->sync_ep)
+ return 0; /* already set up */
+
alts = snd_usb_get_host_interface(chip, fmt->iface, fmt->altsetting);
if (!alts)
return 0;
* Generic sync EP handling
*/
- if (altsd->bNumEndpoints < 2)
+ if (fmt->ep_idx > 0 || altsd->bNumEndpoints < 2)
return 0;
is_playback = !(get_endpoint(alts, 0)->bEndpointAddress & USB_DIR_IN);
.nr_rates = 2,
.rate_table = (unsigned int[]) {
44100, 48000
- }
+ },
+ .sync_ep = 0x82,
+ .sync_iface = 0,
+ .sync_altsetting = 1,
+ .sync_ep_idx = 1,
+ .implicit_fb = 1,
}
},
{
}
},
+/*
+ * MacroSilicon MS2100/MS2106 based AV capture cards
+ *
+ * These claim 96kHz 1ch in the descriptors, but are actually 48kHz 2ch.
+ * They also need QUIRK_FLAG_ALIGN_TRANSFER, which makes one wonder if
+ * they pretend to be 96kHz mono as a workaround for stereo being broken
+ * by that...
+ *
+ * They also have an issue with initial stream alignment that causes the
+ * channels to be swapped and out of phase, which is dealt with in quirks.c.
+ */
+{
+ USB_AUDIO_DEVICE(0x534d, 0x0021),
+ .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
+ .vendor_name = "MacroSilicon",
+ .product_name = "MS210x",
+ .ifnum = QUIRK_ANY_INTERFACE,
+ .type = QUIRK_COMPOSITE,
+ .data = &(const struct snd_usb_audio_quirk[]) {
+ {
+ .ifnum = 2,
+ .type = QUIRK_AUDIO_STANDARD_MIXER,
+ },
+ {
+ .ifnum = 3,
+ .type = QUIRK_AUDIO_FIXED_ENDPOINT,
+ .data = &(const struct audioformat) {
+ .formats = SNDRV_PCM_FMTBIT_S16_LE,
+ .channels = 2,
+ .iface = 3,
+ .altsetting = 1,
+ .altset_idx = 1,
+ .attributes = 0,
+ .endpoint = 0x82,
+ .ep_attr = USB_ENDPOINT_XFER_ISOC |
+ USB_ENDPOINT_SYNC_ASYNC,
+ .rates = SNDRV_PCM_RATE_CONTINUOUS,
+ .rate_min = 48000,
+ .rate_max = 48000,
+ }
+ },
+ {
+ .ifnum = -1
+ }
+ }
+ }
+},
+
/*
* MacroSilicon MS2109 based HDMI capture cards
*
}
}
},
+{
+ /*
+ * Fiero SC-01 (firmware v1.0.0 @ 48 kHz)
+ */
+ USB_DEVICE(0x2b53, 0x0023),
+ .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
+ .vendor_name = "Fiero",
+ .product_name = "SC-01",
+ .ifnum = QUIRK_ANY_INTERFACE,
+ .type = QUIRK_COMPOSITE,
+ .data = &(const struct snd_usb_audio_quirk[]) {
+ {
+ .ifnum = 0,
+ .type = QUIRK_AUDIO_STANDARD_INTERFACE
+ },
+ /* Playback */
+ {
+ .ifnum = 1,
+ .type = QUIRK_AUDIO_FIXED_ENDPOINT,
+ .data = &(const struct audioformat) {
+ .formats = SNDRV_PCM_FMTBIT_S32_LE,
+ .channels = 2,
+ .fmt_bits = 24,
+ .iface = 1,
+ .altsetting = 1,
+ .altset_idx = 1,
+ .endpoint = 0x01,
+ .ep_attr = USB_ENDPOINT_XFER_ISOC |
+ USB_ENDPOINT_SYNC_ASYNC,
+ .rates = SNDRV_PCM_RATE_48000,
+ .rate_min = 48000,
+ .rate_max = 48000,
+ .nr_rates = 1,
+ .rate_table = (unsigned int[]) { 48000 },
+ .clock = 0x29
+ }
+ },
+ /* Capture */
+ {
+ .ifnum = 2,
+ .type = QUIRK_AUDIO_FIXED_ENDPOINT,
+ .data = &(const struct audioformat) {
+ .formats = SNDRV_PCM_FMTBIT_S32_LE,
+ .channels = 2,
+ .fmt_bits = 24,
+ .iface = 2,
+ .altsetting = 1,
+ .altset_idx = 1,
+ .endpoint = 0x82,
+ .ep_attr = USB_ENDPOINT_XFER_ISOC |
+ USB_ENDPOINT_SYNC_ASYNC |
+ USB_ENDPOINT_USAGE_IMPLICIT_FB,
+ .rates = SNDRV_PCM_RATE_48000,
+ .rate_min = 48000,
+ .rate_max = 48000,
+ .nr_rates = 1,
+ .rate_table = (unsigned int[]) { 48000 },
+ .clock = 0x29
+ }
+ },
+ {
+ .ifnum = -1
+ }
+ }
+ }
+},
+{
+ /*
+ * Fiero SC-01 (firmware v1.0.0 @ 96 kHz)
+ */
+ USB_DEVICE(0x2b53, 0x0024),
+ .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
+ .vendor_name = "Fiero",
+ .product_name = "SC-01",
+ .ifnum = QUIRK_ANY_INTERFACE,
+ .type = QUIRK_COMPOSITE,
+ .data = &(const struct snd_usb_audio_quirk[]) {
+ {
+ .ifnum = 0,
+ .type = QUIRK_AUDIO_STANDARD_INTERFACE
+ },
+ /* Playback */
+ {
+ .ifnum = 1,
+ .type = QUIRK_AUDIO_FIXED_ENDPOINT,
+ .data = &(const struct audioformat) {
+ .formats = SNDRV_PCM_FMTBIT_S32_LE,
+ .channels = 2,
+ .fmt_bits = 24,
+ .iface = 1,
+ .altsetting = 1,
+ .altset_idx = 1,
+ .endpoint = 0x01,
+ .ep_attr = USB_ENDPOINT_XFER_ISOC |
+ USB_ENDPOINT_SYNC_ASYNC,
+ .rates = SNDRV_PCM_RATE_96000,
+ .rate_min = 96000,
+ .rate_max = 96000,
+ .nr_rates = 1,
+ .rate_table = (unsigned int[]) { 96000 },
+ .clock = 0x29
+ }
+ },
+ /* Capture */
+ {
+ .ifnum = 2,
+ .type = QUIRK_AUDIO_FIXED_ENDPOINT,
+ .data = &(const struct audioformat) {
+ .formats = SNDRV_PCM_FMTBIT_S32_LE,
+ .channels = 2,
+ .fmt_bits = 24,
+ .iface = 2,
+ .altsetting = 1,
+ .altset_idx = 1,
+ .endpoint = 0x82,
+ .ep_attr = USB_ENDPOINT_XFER_ISOC |
+ USB_ENDPOINT_SYNC_ASYNC |
+ USB_ENDPOINT_USAGE_IMPLICIT_FB,
+ .rates = SNDRV_PCM_RATE_96000,
+ .rate_min = 96000,
+ .rate_max = 96000,
+ .nr_rates = 1,
+ .rate_table = (unsigned int[]) { 96000 },
+ .clock = 0x29
+ }
+ },
+ {
+ .ifnum = -1
+ }
+ }
+ }
+},
+{
+ /*
+ * Fiero SC-01 (firmware v1.1.0)
+ */
+ USB_DEVICE(0x2b53, 0x0031),
+ .driver_info = (unsigned long) &(const struct snd_usb_audio_quirk) {
+ .vendor_name = "Fiero",
+ .product_name = "SC-01",
+ .ifnum = QUIRK_ANY_INTERFACE,
+ .type = QUIRK_COMPOSITE,
+ .data = &(const struct snd_usb_audio_quirk[]) {
+ {
+ .ifnum = 0,
+ .type = QUIRK_AUDIO_STANDARD_INTERFACE
+ },
+ /* Playback */
+ {
+ .ifnum = 1,
+ .type = QUIRK_AUDIO_FIXED_ENDPOINT,
+ .data = &(const struct audioformat) {
+ .formats = SNDRV_PCM_FMTBIT_S32_LE,
+ .channels = 2,
+ .fmt_bits = 24,
+ .iface = 1,
+ .altsetting = 1,
+ .altset_idx = 1,
+ .endpoint = 0x01,
+ .ep_attr = USB_ENDPOINT_XFER_ISOC |
+ USB_ENDPOINT_SYNC_ASYNC,
+ .rates = SNDRV_PCM_RATE_48000 |
+ SNDRV_PCM_RATE_96000,
+ .rate_min = 48000,
+ .rate_max = 96000,
+ .nr_rates = 2,
+ .rate_table = (unsigned int[]) { 48000, 96000 },
+ .clock = 0x29
+ }
+ },
+ /* Capture */
+ {
+ .ifnum = 2,
+ .type = QUIRK_AUDIO_FIXED_ENDPOINT,
+ .data = &(const struct audioformat) {
+ .formats = SNDRV_PCM_FMTBIT_S32_LE,
+ .channels = 2,
+ .fmt_bits = 24,
+ .iface = 2,
+ .altsetting = 1,
+ .altset_idx = 1,
+ .endpoint = 0x82,
+ .ep_attr = USB_ENDPOINT_XFER_ISOC |
+ USB_ENDPOINT_SYNC_ASYNC |
+ USB_ENDPOINT_USAGE_IMPLICIT_FB,
+ .rates = SNDRV_PCM_RATE_48000 |
+ SNDRV_PCM_RATE_96000,
+ .rate_min = 48000,
+ .rate_max = 96000,
+ .nr_rates = 2,
+ .rate_table = (unsigned int[]) { 48000, 96000 },
+ .clock = 0x29
+ }
+ },
+ {
+ .ifnum = -1
+ }
+ }
+ }
+},
#undef USB_DEVICE_VENDOR_SPEC
#undef USB_AUDIO_DEVICE
case USB_ID(0x041e, 0x3f19): /* E-Mu 0204 USB */
set_format_emu_quirk(subs, fmt);
break;
+ case USB_ID(0x534d, 0x0021): /* MacroSilicon MS2100/MS2106 */
case USB_ID(0x534d, 0x2109): /* MacroSilicon MS2109 */
subs->stream_offset_adj = 2;
break;
QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER),
DEVICE_FLG(0x1395, 0x740a, /* Sennheiser DECT */
QUIRK_FLAG_GET_SAMPLE_RATE),
+ DEVICE_FLG(0x1397, 0x0508, /* Behringer UMC204HD */
+ QUIRK_FLAG_PLAYBACK_FIRST | QUIRK_FLAG_GENERIC_IMPLICIT_FB),
+ DEVICE_FLG(0x1397, 0x0509, /* Behringer UMC404HD */
+ QUIRK_FLAG_PLAYBACK_FIRST | QUIRK_FLAG_GENERIC_IMPLICIT_FB),
DEVICE_FLG(0x13e5, 0x0001, /* Serato Phono */
QUIRK_FLAG_IGNORE_CTL_ERROR),
DEVICE_FLG(0x154e, 0x1002, /* Denon DCD-1500RE */
QUIRK_FLAG_IGNORE_CTL_ERROR),
DEVICE_FLG(0x413c, 0xa506, /* Dell AE515 sound bar */
QUIRK_FLAG_GET_SAMPLE_RATE),
+ DEVICE_FLG(0x534d, 0x0021, /* MacroSilicon MS2100/MS2106 */
+ QUIRK_FLAG_ALIGN_TRANSFER),
DEVICE_FLG(0x534d, 0x2109, /* MacroSilicon MS2109 */
QUIRK_FLAG_ALIGN_TRANSFER),
DEVICE_FLG(0x1224, 0x2a25, /* Jieli Technology USB PHY 2.0 */
QUIRK_FLAG_GET_SAMPLE_RATE),
+ DEVICE_FLG(0x2b53, 0x0023, /* Fiero SC-01 (firmware v1.0.0 @ 48 kHz) */
+ QUIRK_FLAG_GENERIC_IMPLICIT_FB),
+ DEVICE_FLG(0x2b53, 0x0024, /* Fiero SC-01 (firmware v1.0.0 @ 96 kHz) */
+ QUIRK_FLAG_GENERIC_IMPLICIT_FB),
+ DEVICE_FLG(0x2b53, 0x0031, /* Fiero SC-01 (firmware v1.1.0) */
+ QUIRK_FLAG_GENERIC_IMPLICIT_FB),
/* Vendor matches */
VENDOR_FLG(0x045e, /* MS Lifecam */
#include <drm/intel_lpe_audio.h>
#include "intel_hdmi_audio.h"
+#define INTEL_HDMI_AUDIO_SUSPEND_DELAY_MS 5000
+
#define for_each_pipe(card_ctx, pipe) \
for ((pipe) = 0; (pipe) < (card_ctx)->num_pipes; (pipe)++)
#define for_each_port(card_ctx, port) \
intelhaddata = snd_pcm_substream_chip(substream);
runtime = substream->runtime;
- pm_runtime_get_sync(intelhaddata->dev);
+ retval = pm_runtime_resume_and_get(intelhaddata->dev);
+ if (retval < 0)
+ return retval;
/* set the runtime hw parameter with local snd_pcm_hardware struct */
runtime->hw = had_pcm_hardware;
container_of(work, struct snd_intelhad, hdmi_audio_wq);
struct intel_hdmi_lpe_audio_pdata *pdata = ctx->dev->platform_data;
struct intel_hdmi_lpe_audio_port_pdata *ppdata = &pdata->port[ctx->port];
+ int ret;
+
+ ret = pm_runtime_resume_and_get(ctx->dev);
+ if (ret < 0)
+ return;
- pm_runtime_get_sync(ctx->dev);
mutex_lock(&ctx->mutex);
if (ppdata->pipe < 0) {
dev_dbg(ctx->dev, "%s: Event: HAD_NOTIFY_HOT_UNPLUG : port = %d\n",
pdata->notify_audio_lpe = notify_audio_lpe;
spin_unlock_irq(&pdata->lpe_audio_slock);
+ pm_runtime_set_autosuspend_delay(&pdev->dev, INTEL_HDMI_AUDIO_SUSPEND_DELAY_MS);
pm_runtime_use_autosuspend(&pdev->dev);
+ pm_runtime_enable(&pdev->dev);
pm_runtime_mark_last_busy(&pdev->dev);
+ pm_runtime_idle(&pdev->dev);
dev_dbg(&pdev->dev, "%s: handle pending notification\n", __func__);
for_each_port(card_ctx, port) {
#define MIDR_VARIANT(midr) \
(((midr) & MIDR_VARIANT_MASK) >> MIDR_VARIANT_SHIFT)
#define MIDR_IMPLEMENTOR_SHIFT 24
-#define MIDR_IMPLEMENTOR_MASK (0xff << MIDR_IMPLEMENTOR_SHIFT)
+#define MIDR_IMPLEMENTOR_MASK (0xffU << MIDR_IMPLEMENTOR_SHIFT)
#define MIDR_IMPLEMENTOR(midr) \
(((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT)
#define APPLE_CPU_PART_M1_ICESTORM 0x022
#define APPLE_CPU_PART_M1_FIRESTORM 0x023
+#define APPLE_CPU_PART_M1_ICESTORM_PRO 0x024
+#define APPLE_CPU_PART_M1_FIRESTORM_PRO 0x025
+#define APPLE_CPU_PART_M1_ICESTORM_MAX 0x028
+#define APPLE_CPU_PART_M1_FIRESTORM_MAX 0x029
#define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53)
#define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57)
#define MIDR_HISI_TSV110 MIDR_CPU_MODEL(ARM_CPU_IMP_HISI, HISI_CPU_PART_TSV110)
#define MIDR_APPLE_M1_ICESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM)
#define MIDR_APPLE_M1_FIRESTORM MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM)
+#define MIDR_APPLE_M1_ICESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_PRO)
+#define MIDR_APPLE_M1_FIRESTORM_PRO MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_PRO)
+#define MIDR_APPLE_M1_ICESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_ICESTORM_MAX)
+#define MIDR_APPLE_M1_FIRESTORM_MAX MIDR_CPU_MODEL(ARM_CPU_IMP_APPLE, APPLE_CPU_PART_M1_FIRESTORM_MAX)
/* Fujitsu Erratum 010001 affects A64FX 1.0 and 1.1, (v0r0 and v1r0) */
#define MIDR_FUJITSU_ERRATUM_010001 MIDR_FUJITSU_A64FX
#ifndef __ASSEMBLY__
-#include "sysreg.h"
+#include <asm/sysreg.h>
#define read_cpuid(reg) read_sysreg_s(SYS_ ## reg)
__u64 dbg_wvr[KVM_ARM_MAX_DBG_REGS];
};
+#define KVM_DEBUG_ARCH_HSR_HIGH_VALID (1 << 0)
struct kvm_debug_exit_arch {
__u32 hsr;
+ __u32 hsr_high; /* ESR_EL2[61:32] */
__u64 far; /* used for watchpoints */
};
#define KVM_ARM64_SVE_VLS_WORDS \
((KVM_ARM64_SVE_VQ_MAX - KVM_ARM64_SVE_VQ_MIN) / 64 + 1)
+/* Bitmap feature firmware registers */
+#define KVM_REG_ARM_FW_FEAT_BMAP (0x0016 << KVM_REG_ARM_COPROC_SHIFT)
+#define KVM_REG_ARM_FW_FEAT_BMAP_REG(r) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
+ KVM_REG_ARM_FW_FEAT_BMAP | \
+ ((r) & 0xffff))
+
+#define KVM_REG_ARM_STD_BMAP KVM_REG_ARM_FW_FEAT_BMAP_REG(0)
+
+enum {
+ KVM_REG_ARM_STD_BIT_TRNG_V1_0 = 0,
+#ifdef __KERNEL__
+ KVM_REG_ARM_STD_BMAP_BIT_COUNT,
+#endif
+};
+
+#define KVM_REG_ARM_STD_HYP_BMAP KVM_REG_ARM_FW_FEAT_BMAP_REG(1)
+
+enum {
+ KVM_REG_ARM_STD_HYP_BIT_PV_TIME = 0,
+#ifdef __KERNEL__
+ KVM_REG_ARM_STD_HYP_BMAP_BIT_COUNT,
+#endif
+};
+
+#define KVM_REG_ARM_VENDOR_HYP_BMAP KVM_REG_ARM_FW_FEAT_BMAP_REG(2)
+
+enum {
+ KVM_REG_ARM_VENDOR_HYP_BIT_FUNC_FEAT = 0,
+ KVM_REG_ARM_VENDOR_HYP_BIT_PTP = 1,
+#ifdef __KERNEL__
+ KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_COUNT,
+#endif
+};
+
/* Device Control API: ARM VGIC */
#define KVM_DEV_ARM_VGIC_GRP_ADDR 0
#define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1
#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
#define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */
#define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-/* FREE! ( 7*32+10) */
+#define X86_FEATURE_XCOMPACTED ( 7*32+10) /* "" Use compacted XSTATE (XSAVES or XSAVEC) */
#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */
-#define X86_FEATURE_RETPOLINE ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
-#define X86_FEATURE_RETPOLINE_LFENCE ( 7*32+13) /* "" Use LFENCE for Spectre variant 2 */
+#define X86_FEATURE_KERNEL_IBRS ( 7*32+12) /* "" Set/clear IBRS on kernel entry/exit */
+#define X86_FEATURE_RSB_VMEXIT ( 7*32+13) /* "" Fill RSB on VM-Exit */
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */
#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */
#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */
#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */
-/* FREE! ( 7*32+20) */
+#define X86_FEATURE_PERFMON_V2 ( 7*32+20) /* AMD Performance Monitoring Version 2 */
#define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
#define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */
#define X86_FEATURE_VMW_VMMCALL ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
#define X86_FEATURE_PVUNLOCK ( 8*32+20) /* "" PV unlock function */
#define X86_FEATURE_VCPUPREEMPT ( 8*32+21) /* "" PV vcpu_is_preempted function */
+#define X86_FEATURE_TDX_GUEST ( 8*32+22) /* Intel Trust Domain Extensions Guest */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
#define X86_FEATURE_PER_THREAD_MBA (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
#define X86_FEATURE_SGX1 (11*32+ 8) /* "" Basic SGX */
#define X86_FEATURE_SGX2 (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
+#define X86_FEATURE_ENTRY_IBPB (11*32+10) /* "" Issue an IBPB on kernel entry */
+#define X86_FEATURE_RRSBA_CTRL (11*32+11) /* "" RET prediction control */
+#define X86_FEATURE_RETPOLINE (11*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
+#define X86_FEATURE_RETPOLINE_LFENCE (11*32+13) /* "" Use LFENCE for Spectre variant 2 */
+#define X86_FEATURE_RETHUNK (11*32+14) /* "" Use REturn THUNK */
+#define X86_FEATURE_UNRET (11*32+15) /* "" AMD BTB untrain return */
+#define X86_FEATURE_USE_IBPB_FW (11*32+16) /* "" Use IBPB during runtime firmware calls */
/* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
#define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */
#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */
#define X86_FEATURE_AMD_SSB_NO (13*32+26) /* "" Speculative Store Bypass is fixed in hardware. */
#define X86_FEATURE_CPPC (13*32+27) /* Collaborative Processor Performance Control */
+#define X86_FEATURE_BTC_NO (13*32+29) /* "" Not vulnerable to Branch Type Confusion */
+#define X86_FEATURE_BRS (13*32+31) /* Branch Sampling available */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
#define X86_FEATURE_SEV (19*32+ 1) /* AMD Secure Encrypted Virtualization */
#define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* "" VM Page Flush MSR is supported */
#define X86_FEATURE_SEV_ES (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
+#define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* "" Virtual TSC_AUX */
#define X86_FEATURE_SME_COHERENT (19*32+10) /* "" AMD hardware-enforced cache coherency */
/*
#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */
#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */
#define X86_BUG_SRBDS X86_BUG(24) /* CPU may leak RNG bits if not mitigated */
+#define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* CPU is affected by Processor MMIO Stale Data vulnerabilities */
+#define X86_BUG_RETBLEED X86_BUG(26) /* CPU is affected by RETBleed */
#endif /* _ASM_X86_CPUFEATURES_H */
# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31))
#endif
+#ifdef CONFIG_RETPOLINE
+# define DISABLE_RETPOLINE 0
+#else
+# define DISABLE_RETPOLINE ((1 << (X86_FEATURE_RETPOLINE & 31)) | \
+ (1 << (X86_FEATURE_RETPOLINE_LFENCE & 31)))
+#endif
+
+#ifdef CONFIG_RETHUNK
+# define DISABLE_RETHUNK 0
+#else
+# define DISABLE_RETHUNK (1 << (X86_FEATURE_RETHUNK & 31))
+#endif
+
+#ifdef CONFIG_CPU_UNRET_ENTRY
+# define DISABLE_UNRET 0
+#else
+# define DISABLE_UNRET (1 << (X86_FEATURE_UNRET & 31))
+#endif
+
#ifdef CONFIG_INTEL_IOMMU_SVM
# define DISABLE_ENQCMD 0
#else
# define DISABLE_SGX (1 << (X86_FEATURE_SGX & 31))
#endif
+#ifdef CONFIG_INTEL_TDX_GUEST
+# define DISABLE_TDX_GUEST 0
+#else
+# define DISABLE_TDX_GUEST (1 << (X86_FEATURE_TDX_GUEST & 31))
+#endif
+
/*
* Make sure to add features to the correct mask
*/
#define DISABLED_MASK5 0
#define DISABLED_MASK6 0
#define DISABLED_MASK7 (DISABLE_PTI)
-#define DISABLED_MASK8 0
+#define DISABLED_MASK8 (DISABLE_TDX_GUEST)
#define DISABLED_MASK9 (DISABLE_SGX)
#define DISABLED_MASK10 0
-#define DISABLED_MASK11 0
+#define DISABLED_MASK11 (DISABLE_RETPOLINE|DISABLE_RETHUNK|DISABLE_UNRET)
#define DISABLED_MASK12 0
#define DISABLED_MASK13 0
#define DISABLED_MASK14 0
#define SPEC_CTRL_STIBP BIT(SPEC_CTRL_STIBP_SHIFT) /* STIBP mask */
#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */
#define SPEC_CTRL_SSBD BIT(SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */
+#define SPEC_CTRL_RRSBA_DIS_S_SHIFT 6 /* Disable RRSBA behavior */
+#define SPEC_CTRL_RRSBA_DIS_S BIT(SPEC_CTRL_RRSBA_DIS_S_SHIFT)
#define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */
#define PRED_CMD_IBPB BIT(0) /* Indirect Branch Prediction Barrier */
#define MSR_IA32_ARCH_CAPABILITIES 0x0000010a
#define ARCH_CAP_RDCL_NO BIT(0) /* Not susceptible to Meltdown */
#define ARCH_CAP_IBRS_ALL BIT(1) /* Enhanced IBRS support */
+#define ARCH_CAP_RSBA BIT(2) /* RET may use alternative branch predictors */
#define ARCH_CAP_SKIP_VMENTRY_L1DFLUSH BIT(3) /* Skip L1D flush on vmentry */
#define ARCH_CAP_SSB_NO BIT(4) /*
* Not susceptible to Speculative Store Bypass
* Not susceptible to
* TSX Async Abort (TAA) vulnerabilities.
*/
+#define ARCH_CAP_SBDR_SSDP_NO BIT(13) /*
+ * Not susceptible to SBDR and SSDP
+ * variants of Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_FBSDP_NO BIT(14) /*
+ * Not susceptible to FBSDP variant of
+ * Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_PSDP_NO BIT(15) /*
+ * Not susceptible to PSDP variant of
+ * Processor MMIO stale data
+ * vulnerabilities.
+ */
+#define ARCH_CAP_FB_CLEAR BIT(17) /*
+ * VERW clears CPU fill buffer
+ * even on MDS_NO CPUs.
+ */
+#define ARCH_CAP_FB_CLEAR_CTRL BIT(18) /*
+ * MSR_IA32_MCU_OPT_CTRL[FB_CLEAR_DIS]
+ * bit available to control VERW
+ * behavior.
+ */
+#define ARCH_CAP_RRSBA BIT(19) /*
+ * Indicates RET may use predictors
+ * other than the RSB. With eIBRS
+ * enabled predictions in kernel mode
+ * are restricted to targets in
+ * kernel.
+ */
#define MSR_IA32_FLUSH_CMD 0x0000010b
#define L1D_FLUSH BIT(0) /*
#define MSR_IA32_MCU_OPT_CTRL 0x00000123
#define RNGDS_MITG_DIS BIT(0) /* SRBDS support */
#define RTM_ALLOW BIT(1) /* TSX development mode */
+#define FB_CLEAR_DIS BIT(3) /* CPU Fill buffer clear disable */
#define MSR_IA32_SYSENTER_CS 0x00000174
#define MSR_IA32_SYSENTER_ESP 0x00000175
/* Fam 17h MSRs */
#define MSR_F17H_IRPERF 0xc00000e9
+#define MSR_ZEN2_SPECTRAL_CHICKEN 0xc00110e3
+#define MSR_ZEN2_SPECTRAL_CHICKEN_BIT BIT_ULL(1)
+
/* Fam 16h MSRs */
#define MSR_F16H_L2I_PERF_CTL 0xc0010230
#define MSR_F16H_L2I_PERF_CTR 0xc0010231
struct kvm_vcpu_events events;
};
-#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
-#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
-#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
-#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3)
-#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
+#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
+#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
+#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
+#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3)
+#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
+#define KVM_X86_QUIRK_FIX_HYPERCALL_INSN (1 << 5)
#define KVM_STATE_NESTED_FORMAT_VMX 0
#define KVM_STATE_NESTED_FORMAT_SVM 1
#define SVM_VMGEXIT_AP_JUMP_TABLE 0x80000005
#define SVM_VMGEXIT_SET_AP_JUMP_TABLE 0
#define SVM_VMGEXIT_GET_AP_JUMP_TABLE 1
+#define SVM_VMGEXIT_PSC 0x80000010
+#define SVM_VMGEXIT_GUEST_REQUEST 0x80000011
+#define SVM_VMGEXIT_EXT_GUEST_REQUEST 0x80000012
+#define SVM_VMGEXIT_AP_CREATION 0x80000013
+#define SVM_VMGEXIT_AP_CREATE_ON_INIT 0
+#define SVM_VMGEXIT_AP_CREATE 1
+#define SVM_VMGEXIT_AP_DESTROY 2
+#define SVM_VMGEXIT_HV_FEATURES 0x8000fffd
#define SVM_VMGEXIT_UNSUPPORTED_EVENT 0x8000ffff
/* Exit code reserved for hypervisor/software use */
{ SVM_VMGEXIT_NMI_COMPLETE, "vmgexit_nmi_complete" }, \
{ SVM_VMGEXIT_AP_HLT_LOOP, "vmgexit_ap_hlt_loop" }, \
{ SVM_VMGEXIT_AP_JUMP_TABLE, "vmgexit_ap_jump_table" }, \
+ { SVM_VMGEXIT_PSC, "vmgexit_page_state_change" }, \
+ { SVM_VMGEXIT_GUEST_REQUEST, "vmgexit_guest_request" }, \
+ { SVM_VMGEXIT_EXT_GUEST_REQUEST, "vmgexit_ext_guest_request" }, \
+ { SVM_VMGEXIT_AP_CREATION, "vmgexit_ap_creation" }, \
+ { SVM_VMGEXIT_HV_FEATURES, "vmgexit_hypervisor_feature" }, \
{ SVM_EXIT_ERR, "invalid_guest_state" }
*
* UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function.
* Useful for code which doesn't have an ELF function annotation.
+ *
+ * UNWIND_HINT_ENTRY: machine entry without stack, SYSCALL/SYSENTER etc.
*/
#define UNWIND_HINT_TYPE_CALL 0
#define UNWIND_HINT_TYPE_REGS 1
#define UNWIND_HINT_TYPE_REGS_PARTIAL 2
#define UNWIND_HINT_TYPE_FUNC 3
+#define UNWIND_HINT_TYPE_ENTRY 4
+#define UNWIND_HINT_TYPE_SAVE 5
+#define UNWIND_HINT_TYPE_RESTORE 6
#ifdef CONFIG_OBJTOOL
* the debuginfo as necessary. It will also warn if it sees any
* inconsistencies.
*/
-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
.Lunwind_hint_ip_\@:
.pushsection .discard.unwind_hints
/* struct unwind_hint */
.popsection
.endm
+.macro STACK_FRAME_NON_STANDARD_FP func:req
+#ifdef CONFIG_FRAME_POINTER
+ STACK_FRAME_NON_STANDARD \func
+#endif
+.endm
+
.macro ANNOTATE_NOENDBR
.Lhere_\@:
.pushsection .discard.noendbr
#define ASM_REACHABLE
#else
#define ANNOTATE_INTRA_FUNCTION_CALL
-.macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
+.macro UNWIND_HINT type:req sp_reg=0 sp_offset=0 end=0
.endm
.macro STACK_FRAME_NON_STANDARD func:req
.endm
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
#ifndef _ASM_GENERIC_FCNTL_H
#define _ASM_GENERIC_FCNTL_H
/* a horrid kludge trying to make sure that this will fail on old kernels */
#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
-#define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT)
+#define O_TMPFILE_MASK (__O_TMPFILE | O_DIRECTORY | O_CREAT)
#ifndef O_NDELAY
#define O_NDELAY O_NONBLOCK
#define F_GETSIG 11 /* for sockets. */
#endif
+#if __BITS_PER_LONG == 32 || defined(__KERNEL__)
#ifndef F_GETLK64
#define F_GETLK64 12 /* using 'struct flock64' */
#define F_SETLK64 13
#define F_SETLKW64 14
#endif
+#endif /* __BITS_PER_LONG == 32 || defined(__KERNEL__) */
#ifndef F_SETOWN_EX
#define F_SETOWN_EX 15
blocking */
#define LOCK_UN 8 /* remove lock */
+/*
+ * LOCK_MAND support has been removed from the kernel. We leave the symbols
+ * here to not break legacy builds, but these should not be used in new code.
+ */
#define LOCK_MAND 32 /* This is a mandatory flock ... */
#define LOCK_READ 64 /* which allows concurrent read operations */
#define LOCK_WRITE 128 /* which allows concurrent write operations */
#define F_LINUX_SPECIFIC_BASE 1024
+#ifndef HAVE_ARCH_STRUCT_FLOCK
struct flock {
short l_type;
short l_whence;
__ARCH_FLOCK64_PAD
#endif
};
+#endif /* HAVE_ARCH_STRUCT_FLOCK */
#endif /* _ASM_GENERIC_FCNTL_H */
I915_MOCS_CACHED,
};
-/*
+/**
+ * enum drm_i915_gem_engine_class - uapi engine type enumeration
+ *
* Different engines serve different roles, and there may be more than one
- * engine serving each role. enum drm_i915_gem_engine_class provides a
- * classification of the role of the engine, which may be used when requesting
- * operations to be performed on a certain subset of engines, or for providing
- * information about that group.
+ * engine serving each role. This enum provides a classification of the role
+ * of the engine, which may be used when requesting operations to be performed
+ * on a certain subset of engines, or for providing information about that
+ * group.
*/
enum drm_i915_gem_engine_class {
+ /**
+ * @I915_ENGINE_CLASS_RENDER:
+ *
+ * Render engines support instructions used for 3D, Compute (GPGPU),
+ * and programmable media workloads. These instructions fetch data and
+ * dispatch individual work items to threads that operate in parallel.
+ * The threads run small programs (called "kernels" or "shaders") on
+ * the GPU's execution units (EUs).
+ */
I915_ENGINE_CLASS_RENDER = 0,
+
+ /**
+ * @I915_ENGINE_CLASS_COPY:
+ *
+ * Copy engines (also referred to as "blitters") support instructions
+ * that move blocks of data from one location in memory to another,
+ * or that fill a specified location of memory with fixed data.
+ * Copy engines can perform pre-defined logical or bitwise operations
+ * on the source, destination, or pattern data.
+ */
I915_ENGINE_CLASS_COPY = 1,
+
+ /**
+ * @I915_ENGINE_CLASS_VIDEO:
+ *
+ * Video engines (also referred to as "bit stream decode" (BSD) or
+ * "vdbox") support instructions that perform fixed-function media
+ * decode and encode.
+ */
I915_ENGINE_CLASS_VIDEO = 2,
+
+ /**
+ * @I915_ENGINE_CLASS_VIDEO_ENHANCE:
+ *
+ * Video enhancement engines (also referred to as "vebox") support
+ * instructions related to image enhancement.
+ */
I915_ENGINE_CLASS_VIDEO_ENHANCE = 3,
- /* should be kept compact */
+ /**
+ * @I915_ENGINE_CLASS_COMPUTE:
+ *
+ * Compute engines support a subset of the instructions available
+ * on render engines: compute engines support Compute (GPGPU) and
+ * programmable media workloads, but do not support the 3D pipeline.
+ */
+ I915_ENGINE_CLASS_COMPUTE = 4,
+
+ /* Values in this enum should be kept compact. */
+ /**
+ * @I915_ENGINE_CLASS_INVALID:
+ *
+ * Placeholder value to represent an invalid engine class assignment.
+ */
I915_ENGINE_CLASS_INVALID = -1
};
-/*
+/**
+ * struct i915_engine_class_instance - Engine class/instance identifier
+ *
* There may be more than one engine fulfilling any role within the system.
* Each engine of a class is given a unique instance number and therefore
* any engine can be specified by its class:instance tuplet. APIs that allow
* for this identification.
*/
struct i915_engine_class_instance {
- __u16 engine_class; /* see enum drm_i915_gem_engine_class */
- __u16 engine_instance;
+ /**
+ * @engine_class:
+ *
+ * Engine class from enum drm_i915_gem_engine_class
+ */
+ __u16 engine_class;
#define I915_ENGINE_CLASS_INVALID_NONE -1
#define I915_ENGINE_CLASS_INVALID_VIRTUAL -2
+
+ /**
+ * @engine_instance:
+ *
+ * Engine instance.
+ */
+ __u16 engine_instance;
};
/**
DRM_I915_PERF_RECORD_MAX /* non-ABI */
};
-/*
+/**
+ * struct drm_i915_perf_oa_config
+ *
* Structure to upload perf dynamic configuration into the kernel.
*/
struct drm_i915_perf_oa_config {
- /** String formatted like "%08x-%04x-%04x-%04x-%012x" */
+ /**
+ * @uuid:
+ *
+ * String formatted like "%\08x-%\04x-%\04x-%\04x-%\012x"
+ */
char uuid[36];
+ /**
+ * @n_mux_regs:
+ *
+ * Number of mux regs in &mux_regs_ptr.
+ */
__u32 n_mux_regs;
+
+ /**
+ * @n_boolean_regs:
+ *
+ * Number of boolean regs in &boolean_regs_ptr.
+ */
__u32 n_boolean_regs;
+
+ /**
+ * @n_flex_regs:
+ *
+ * Number of flex regs in &flex_regs_ptr.
+ */
__u32 n_flex_regs;
- /*
- * These fields are pointers to tuples of u32 values (register address,
- * value). For example the expected length of the buffer pointed by
- * mux_regs_ptr is (2 * sizeof(u32) * n_mux_regs).
+ /**
+ * @mux_regs_ptr:
+ *
+ * Pointer to tuples of u32 values (register address, value) for mux
+ * registers. Expected length of buffer is (2 * sizeof(u32) *
+ * &n_mux_regs).
*/
__u64 mux_regs_ptr;
+
+ /**
+ * @boolean_regs_ptr:
+ *
+ * Pointer to tuples of u32 values (register address, value) for mux
+ * registers. Expected length of buffer is (2 * sizeof(u32) *
+ * &n_boolean_regs).
+ */
__u64 boolean_regs_ptr;
+
+ /**
+ * @flex_regs_ptr:
+ *
+ * Pointer to tuples of u32 values (register address, value) for mux
+ * registers. Expected length of buffer is (2 * sizeof(u32) *
+ * &n_flex_regs).
+ */
__u64 flex_regs_ptr;
};
* @data_ptr is also depends on the specific @query_id.
*/
struct drm_i915_query_item {
- /** @query_id: The id for this query */
+ /**
+ * @query_id:
+ *
+ * The id for this query. Currently accepted query IDs are:
+ * - %DRM_I915_QUERY_TOPOLOGY_INFO (see struct drm_i915_query_topology_info)
+ * - %DRM_I915_QUERY_ENGINE_INFO (see struct drm_i915_engine_info)
+ * - %DRM_I915_QUERY_PERF_CONFIG (see struct drm_i915_query_perf_config)
+ * - %DRM_I915_QUERY_MEMORY_REGIONS (see struct drm_i915_query_memory_regions)
+ * - %DRM_I915_QUERY_HWCONFIG_BLOB (see `GuC HWCONFIG blob uAPI`)
+ * - %DRM_I915_QUERY_GEOMETRY_SUBSLICES (see struct drm_i915_query_topology_info)
+ */
__u64 query_id;
-#define DRM_I915_QUERY_TOPOLOGY_INFO 1
-#define DRM_I915_QUERY_ENGINE_INFO 2
-#define DRM_I915_QUERY_PERF_CONFIG 3
-#define DRM_I915_QUERY_MEMORY_REGIONS 4
+#define DRM_I915_QUERY_TOPOLOGY_INFO 1
+#define DRM_I915_QUERY_ENGINE_INFO 2
+#define DRM_I915_QUERY_PERF_CONFIG 3
+#define DRM_I915_QUERY_MEMORY_REGIONS 4
+#define DRM_I915_QUERY_HWCONFIG_BLOB 5
+#define DRM_I915_QUERY_GEOMETRY_SUBSLICES 6
/* Must be kept compact -- no holes and well documented */
/**
/**
* @flags:
*
- * When query_id == DRM_I915_QUERY_TOPOLOGY_INFO, must be 0.
+ * When &query_id == %DRM_I915_QUERY_TOPOLOGY_INFO, must be 0.
*
- * When query_id == DRM_I915_QUERY_PERF_CONFIG, must be one of the
+ * When &query_id == %DRM_I915_QUERY_PERF_CONFIG, must be one of the
* following:
*
- * - DRM_I915_QUERY_PERF_CONFIG_LIST
- * - DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID
- * - DRM_I915_QUERY_PERF_CONFIG_FOR_UUID
+ * - %DRM_I915_QUERY_PERF_CONFIG_LIST
+ * - %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID
+ * - %DRM_I915_QUERY_PERF_CONFIG_FOR_UUID
+ *
+ * When &query_id == %DRM_I915_QUERY_GEOMETRY_SUBSLICES must contain
+ * a struct i915_engine_class_instance that references a render engine.
*/
__u32 flags;
#define DRM_I915_QUERY_PERF_CONFIG_LIST 1
__u64 items_ptr;
};
-/*
- * Data written by the kernel with query DRM_I915_QUERY_TOPOLOGY_INFO :
- *
- * data: contains the 3 pieces of information :
- *
- * - the slice mask with one bit per slice telling whether a slice is
- * available. The availability of slice X can be queried with the following
- * formula :
- *
- * (data[X / 8] >> (X % 8)) & 1
- *
- * - the subslice mask for each slice with one bit per subslice telling
- * whether a subslice is available. Gen12 has dual-subslices, which are
- * similar to two gen11 subslices. For gen12, this array represents dual-
- * subslices. The availability of subslice Y in slice X can be queried
- * with the following formula :
- *
- * (data[subslice_offset +
- * X * subslice_stride +
- * Y / 8] >> (Y % 8)) & 1
- *
- * - the EU mask for each subslice in each slice with one bit per EU telling
- * whether an EU is available. The availability of EU Z in subslice Y in
- * slice X can be queried with the following formula :
+/**
+ * struct drm_i915_query_topology_info
*
- * (data[eu_offset +
- * (X * max_subslices + Y) * eu_stride +
- * Z / 8] >> (Z % 8)) & 1
+ * Describes slice/subslice/EU information queried by
+ * %DRM_I915_QUERY_TOPOLOGY_INFO
*/
struct drm_i915_query_topology_info {
- /*
+ /**
+ * @flags:
+ *
* Unused for now. Must be cleared to zero.
*/
__u16 flags;
+ /**
+ * @max_slices:
+ *
+ * The number of bits used to express the slice mask.
+ */
__u16 max_slices;
+
+ /**
+ * @max_subslices:
+ *
+ * The number of bits used to express the subslice mask.
+ */
__u16 max_subslices;
+
+ /**
+ * @max_eus_per_subslice:
+ *
+ * The number of bits in the EU mask that correspond to a single
+ * subslice's EUs.
+ */
__u16 max_eus_per_subslice;
- /*
+ /**
+ * @subslice_offset:
+ *
* Offset in data[] at which the subslice masks are stored.
*/
__u16 subslice_offset;
- /*
+ /**
+ * @subslice_stride:
+ *
* Stride at which each of the subslice masks for each slice are
* stored.
*/
__u16 subslice_stride;
- /*
+ /**
+ * @eu_offset:
+ *
* Offset in data[] at which the EU masks are stored.
*/
__u16 eu_offset;
- /*
+ /**
+ * @eu_stride:
+ *
* Stride at which each of the EU masks for each subslice are stored.
*/
__u16 eu_stride;
+ /**
+ * @data:
+ *
+ * Contains 3 pieces of information :
+ *
+ * - The slice mask with one bit per slice telling whether a slice is
+ * available. The availability of slice X can be queried with the
+ * following formula :
+ *
+ * .. code:: c
+ *
+ * (data[X / 8] >> (X % 8)) & 1
+ *
+ * Starting with Xe_HP platforms, Intel hardware no longer has
+ * traditional slices so i915 will always report a single slice
+ * (hardcoded slicemask = 0x1) which contains all of the platform's
+ * subslices. I.e., the mask here does not reflect any of the newer
+ * hardware concepts such as "gslices" or "cslices" since userspace
+ * is capable of inferring those from the subslice mask.
+ *
+ * - The subslice mask for each slice with one bit per subslice telling
+ * whether a subslice is available. Starting with Gen12 we use the
+ * term "subslice" to refer to what the hardware documentation
+ * describes as a "dual-subslices." The availability of subslice Y
+ * in slice X can be queried with the following formula :
+ *
+ * .. code:: c
+ *
+ * (data[subslice_offset + X * subslice_stride + Y / 8] >> (Y % 8)) & 1
+ *
+ * - The EU mask for each subslice in each slice, with one bit per EU
+ * telling whether an EU is available. The availability of EU Z in
+ * subslice Y in slice X can be queried with the following formula :
+ *
+ * .. code:: c
+ *
+ * (data[eu_offset +
+ * (X * max_subslices + Y) * eu_stride +
+ * Z / 8
+ * ] >> (Z % 8)) & 1
+ */
__u8 data[];
};
struct drm_i915_engine_info engines[];
};
-/*
- * Data written by the kernel with query DRM_I915_QUERY_PERF_CONFIG.
+/**
+ * struct drm_i915_query_perf_config
+ *
+ * Data written by the kernel with query %DRM_I915_QUERY_PERF_CONFIG and
+ * %DRM_I915_QUERY_GEOMETRY_SUBSLICES.
*/
struct drm_i915_query_perf_config {
union {
- /*
- * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_LIST, i915 sets
- * this fields to the number of configurations available.
+ /**
+ * @n_configs:
+ *
+ * When &drm_i915_query_item.flags ==
+ * %DRM_I915_QUERY_PERF_CONFIG_LIST, i915 sets this fields to
+ * the number of configurations available.
*/
__u64 n_configs;
- /*
- * When query_id == DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID,
- * i915 will use the value in this field as configuration
- * identifier to decide what data to write into config_ptr.
+ /**
+ * @config:
+ *
+ * When &drm_i915_query_item.flags ==
+ * %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_ID, i915 will use the
+ * value in this field as configuration identifier to decide
+ * what data to write into config_ptr.
*/
__u64 config;
- /*
- * When query_id == DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID,
- * i915 will use the value in this field as configuration
- * identifier to decide what data to write into config_ptr.
+ /**
+ * @uuid:
+ *
+ * When &drm_i915_query_item.flags ==
+ * %DRM_I915_QUERY_PERF_CONFIG_DATA_FOR_UUID, i915 will use the
+ * value in this field as configuration identifier to decide
+ * what data to write into config_ptr.
*
* String formatted like "%08x-%04x-%04x-%04x-%012x"
*/
char uuid[36];
};
- /*
+ /**
+ * @flags:
+ *
* Unused for now. Must be cleared to zero.
*/
__u32 flags;
- /*
- * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_LIST, i915 will
- * write an array of __u64 of configuration identifiers.
+ /**
+ * @data:
*
- * When query_item.flags == DRM_I915_QUERY_PERF_CONFIG_DATA, i915 will
- * write a struct drm_i915_perf_oa_config. If the following fields of
- * drm_i915_perf_oa_config are set not set to 0, i915 will write into
- * the associated pointers the values of submitted when the
+ * When &drm_i915_query_item.flags == %DRM_I915_QUERY_PERF_CONFIG_LIST,
+ * i915 will write an array of __u64 of configuration identifiers.
+ *
+ * When &drm_i915_query_item.flags == %DRM_I915_QUERY_PERF_CONFIG_DATA,
+ * i915 will write a struct drm_i915_perf_oa_config. If the following
+ * fields of struct drm_i915_perf_oa_config are not set to 0, i915 will
+ * write into the associated pointers the values of submitted when the
* configuration was created :
*
- * - n_mux_regs
- * - n_boolean_regs
- * - n_flex_regs
+ * - &drm_i915_perf_oa_config.n_mux_regs
+ * - &drm_i915_perf_oa_config.n_boolean_regs
+ * - &drm_i915_perf_oa_config.n_flex_regs
*/
__u8 data[];
};
struct drm_i915_memory_region_info regions[];
};
+/**
+ * DOC: GuC HWCONFIG blob uAPI
+ *
+ * The GuC produces a blob with information about the current device.
+ * i915 reads this blob from GuC and makes it available via this uAPI.
+ *
+ * The format and meaning of the blob content are documented in the
+ * Programmer's Reference Manual.
+ */
+
/**
* struct drm_i915_gem_create_ext - Existing gem_create behaviour, with added
* extension support using struct i915_user_extension.
* Return
* Nothing. Always succeeds.
*
- * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset)
+ * long bpf_dynptr_read(void *dst, u32 len, struct bpf_dynptr *src, u32 offset, u64 flags)
* Description
* Read *len* bytes from *src* into *dst*, starting from *offset*
* into *src*.
+ * *flags* is currently unused.
* Return
* 0 on success, -E2BIG if *offset* + *len* exceeds the length
- * of *src*'s data, -EINVAL if *src* is an invalid dynptr.
+ * of *src*'s data, -EINVAL if *src* is an invalid dynptr or if
+ * *flags* is not 0.
*
- * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len)
+ * long bpf_dynptr_write(struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
* Description
* Write *len* bytes from *src* into *dst*, starting from *offset*
* into *dst*.
+ * *flags* is currently unused.
* Return
* 0 on success, -E2BIG if *offset* + *len* exceeds the length
* of *dst*'s data, -EINVAL if *dst* is an invalid dynptr or if *dst*
- * is a read-only dynptr.
+ * is a read-only dynptr or if *flags* is not 0.
*
* void *bpf_dynptr_data(struct bpf_dynptr *ptr, u32 offset, u32 len)
* Description
#define KVM_SYSTEM_EVENT_SHUTDOWN 1
#define KVM_SYSTEM_EVENT_RESET 2
#define KVM_SYSTEM_EVENT_CRASH 3
+#define KVM_SYSTEM_EVENT_WAKEUP 4
+#define KVM_SYSTEM_EVENT_SUSPEND 5
+#define KVM_SYSTEM_EVENT_SEV_TERM 6
__u32 type;
__u32 ndata;
union {
#define KVM_MP_STATE_OPERATING 7
#define KVM_MP_STATE_LOAD 8
#define KVM_MP_STATE_AP_RESET_HOLD 9
+#define KVM_MP_STATE_SUSPENDED 10
struct kvm_mp_state {
__u32 mp_state;
#define KVM_CAP_S390_MEM_OP_EXTENSION 211
#define KVM_CAP_PMU_CAPABILITY 212
#define KVM_CAP_DISABLE_QUIRKS2 213
-/* #define KVM_CAP_VM_TSC_CONTROL 214 */
+#define KVM_CAP_VM_TSC_CONTROL 214
#define KVM_CAP_SYSTEM_EVENT_DATA 215
+#define KVM_CAP_ARM_SYSTEM_SUSPEND 216
#ifdef KVM_CAP_IRQ_ROUTING
#define KVM_XEN_HVM_CONFIG_SHARED_INFO (1 << 2)
#define KVM_XEN_HVM_CONFIG_RUNSTATE (1 << 3)
#define KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL (1 << 4)
+#define KVM_XEN_HVM_CONFIG_EVTCHN_SEND (1 << 5)
struct kvm_xen_hvm_config {
__u32 flags;
#define KVM_SET_PIT2 _IOW(KVMIO, 0xa0, struct kvm_pit_state2)
/* Available with KVM_CAP_PPC_GET_PVINFO */
#define KVM_PPC_GET_PVINFO _IOW(KVMIO, 0xa1, struct kvm_ppc_pvinfo)
-/* Available with KVM_CAP_TSC_CONTROL */
+/* Available with KVM_CAP_TSC_CONTROL for a vCPU, or with
+* KVM_CAP_VM_TSC_CONTROL to set defaults for a VM */
#define KVM_SET_TSC_KHZ _IO(KVMIO, 0xa2)
#define KVM_GET_TSC_KHZ _IO(KVMIO, 0xa3)
/* Available with KVM_CAP_PCI_2_3 */
struct {
__u64 gfn;
} shared_info;
+ struct {
+ __u32 send_port;
+ __u32 type; /* EVTCHNSTAT_ipi / EVTCHNSTAT_interdomain */
+ __u32 flags;
+#define KVM_XEN_EVTCHN_DEASSIGN (1 << 0)
+#define KVM_XEN_EVTCHN_UPDATE (1 << 1)
+#define KVM_XEN_EVTCHN_RESET (1 << 2)
+ /*
+ * Events sent by the guest are either looped back to
+ * the guest itself (potentially on a different port#)
+ * or signalled via an eventfd.
+ */
+ union {
+ struct {
+ __u32 port;
+ __u32 vcpu;
+ __u32 priority;
+ } port;
+ struct {
+ __u32 port; /* Zero for eventfd */
+ __s32 fd;
+ } eventfd;
+ __u32 padding[4];
+ } deliver;
+ } evtchn;
+ __u32 xen_version;
__u64 pad[8];
} u;
};
#define KVM_XEN_ATTR_TYPE_LONG_MODE 0x0
#define KVM_XEN_ATTR_TYPE_SHARED_INFO 0x1
#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR 0x2
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
+#define KVM_XEN_ATTR_TYPE_EVTCHN 0x3
+#define KVM_XEN_ATTR_TYPE_XEN_VERSION 0x4
/* Per-vCPU Xen attributes */
#define KVM_XEN_VCPU_GET_ATTR _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr)
#define KVM_XEN_VCPU_SET_ATTR _IOW(KVMIO, 0xcb, struct kvm_xen_vcpu_attr)
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
+#define KVM_XEN_HVM_EVTCHN_SEND _IOW(KVMIO, 0xd0, struct kvm_irq_routing_xen_evtchn)
+
#define KVM_GET_SREGS2 _IOR(KVMIO, 0xcc, struct kvm_sregs2)
#define KVM_SET_SREGS2 _IOW(KVMIO, 0xcd, struct kvm_sregs2)
__u64 time_blocked;
__u64 time_offline;
} runstate;
+ __u32 vcpu_id;
+ struct {
+ __u32 port;
+ __u32 priority;
+ __u64 expires_ns;
+ } timer;
+ __u8 vector;
} u;
};
#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT 0x3
#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA 0x4
#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_EVTCHN_SEND */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_ID 0x6
+#define KVM_XEN_VCPU_ATTR_TYPE_TIMER 0x7
+#define KVM_XEN_VCPU_ATTR_TYPE_UPCALL_VECTOR 0x8
/* Secure Encrypted Virtualization command */
enum sev_cmd_id {
#define KVM_STATS_UNIT_BYTES (0x1 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_SECONDS (0x2 << KVM_STATS_UNIT_SHIFT)
#define KVM_STATS_UNIT_CYCLES (0x3 << KVM_STATS_UNIT_SHIFT)
-#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_CYCLES
+#define KVM_STATS_UNIT_BOOLEAN (0x4 << KVM_STATS_UNIT_SHIFT)
+#define KVM_STATS_UNIT_MAX KVM_STATS_UNIT_BOOLEAN
#define KVM_STATS_BASE_SHIFT 8
#define KVM_STATS_BASE_MASK (0xF << KVM_STATS_BASE_SHIFT)
# define PR_SCHED_CORE_SCOPE_THREAD_GROUP 1
# define PR_SCHED_CORE_SCOPE_PROCESS_GROUP 2
+/* arm64 Scalable Matrix Extension controls */
+/* Flag values must be in sync with SVE versions */
+#define PR_SME_SET_VL 63 /* set task vector length */
+# define PR_SME_SET_VL_ONEXEC (1 << 18) /* defer effect until exec */
+#define PR_SME_GET_VL 64 /* get task vector length */
+/* Bits common to PR_SME_SET_VL and PR_SME_GET_VL */
+# define PR_SME_VL_LEN_MASK 0xffff
+# define PR_SME_VL_INHERIT (1 << 17) /* inherit across exec */
+
#define PR_SET_VMA 0x53564d41
# define PR_SET_VMA_ANON_NAME 0
/* Set or get vhost backend capability */
-/* Use message type V2 */
-#define VHOST_BACKEND_F_IOTLB_MSG_V2 0x1
-/* IOTLB can accept batching hints */
-#define VHOST_BACKEND_F_IOTLB_BATCH 0x2
-
#define VHOST_SET_BACKEND_FEATURES _IOW(VHOST_VIRTIO, 0x25, __u64)
#define VHOST_GET_BACKEND_FEATURES _IOR(VHOST_VIRTIO, 0x26, __u64)
/* Get the valid iova range */
#define VHOST_VDPA_GET_IOVA_RANGE _IOR(VHOST_VIRTIO, 0x78, \
struct vhost_vdpa_iova_range)
-
/* Get the config size */
#define VHOST_VDPA_GET_CONFIG_SIZE _IOR(VHOST_VIRTIO, 0x79, __u32)
/* Get the count of all virtqueues */
#define VHOST_VDPA_GET_VQS_COUNT _IOR(VHOST_VIRTIO, 0x80, __u32)
+/* Get the number of virtqueue groups. */
+#define VHOST_VDPA_GET_GROUP_NUM _IOR(VHOST_VIRTIO, 0x81, __u32)
+
+/* Get the number of address spaces. */
+#define VHOST_VDPA_GET_AS_NUM _IOR(VHOST_VIRTIO, 0x7A, unsigned int)
+
+/* Get the group for a virtqueue: read index, write group in num,
+ * The virtqueue index is stored in the index field of
+ * vhost_vring_state. The group for this specific virtqueue is
+ * returned via num field of vhost_vring_state.
+ */
+#define VHOST_VDPA_GET_VRING_GROUP _IOWR(VHOST_VIRTIO, 0x7B, \
+ struct vhost_vring_state)
+/* Set the ASID for a virtqueue group. The group index is stored in
+ * the index field of vhost_vring_state, the ASID associated with this
+ * group is stored at num field of vhost_vring_state.
+ */
+#define VHOST_VDPA_SET_GROUP_ASID _IOW(VHOST_VIRTIO, 0x7C, \
+ struct vhost_vring_state)
+
#endif
.format(values))
if len(pids) > 1:
sys.exit('Error: Multiple processes found (pids: {}). Use "-p"'
- ' to specify the desired pid'.format(" ".join(pids)))
+ ' to specify the desired pid'
+ .format(" ".join(map(str, pids))))
namespace.pid = pids[0]
argparser = argparse.ArgumentParser(description=description_text,
int fd, group_fd, *evsel_fd;
evsel_fd = FD(evsel, idx, thread);
- if (evsel_fd == NULL)
- return -EINVAL;
+ if (evsel_fd == NULL) {
+ err = -EINVAL;
+ goto out;
+ }
err = get_group_fd(evsel, idx, thread, &group_fd);
if (err < 0)
- return err;
+ goto out;
fd = sys_perf_event_open(&evsel->attr,
threads->map[thread].pid,
cpu, group_fd, 0);
- if (fd < 0)
- return -errno;
+ if (fd < 0) {
+ err = -errno;
+ goto out;
+ }
*evsel_fd = fd;
}
}
+out:
+ if (err)
+ perf_evsel__close(evsel);
return err;
}
{
return !strncmp(sym->name, "__x86_indirect_", 15);
}
+
+bool arch_is_rethunk(struct symbol *sym)
+{
+ return !strcmp(sym->name, "__x86_return_thunk");
+}
OPT_BOOLEAN('n', "noinstr", &opts.noinstr, "validate noinstr rules"),
OPT_BOOLEAN('o', "orc", &opts.orc, "generate ORC metadata"),
OPT_BOOLEAN('r', "retpoline", &opts.retpoline, "validate and annotate retpoline usage"),
+ OPT_BOOLEAN(0, "rethunk", &opts.rethunk, "validate and annotate rethunk usage"),
+ OPT_BOOLEAN(0, "unret", &opts.unret, "validate entry unret placement"),
OPT_BOOLEAN('l', "sls", &opts.sls, "validate straight-line-speculation mitigations"),
OPT_BOOLEAN('s', "stackval", &opts.stackval, "validate frame pointer rules"),
OPT_BOOLEAN('t', "static-call", &opts.static_call, "annotate static calls"),
opts.noinstr ||
opts.orc ||
opts.retpoline ||
+ opts.rethunk ||
opts.sls ||
opts.stackval ||
opts.static_call ||
return true;
}
+ if (opts.unret && !opts.rethunk) {
+ ERROR("--unret requires --rethunk");
+ return false;
+ }
+
if (opts.dump_orc)
return true;
return false;
}
+ if (opts.unret) {
+ ERROR("--unret requires --link");
+ return false;
+ }
+
return true;
}
sec->text = true;
if (!strcmp(sec->name, ".noinstr.text") ||
- !strcmp(sec->name, ".entry.text"))
+ !strcmp(sec->name, ".entry.text") ||
+ !strncmp(sec->name, ".text.__x86.", 12))
sec->noinstr = true;
for (offset = 0; offset < sec->sh.sh_size; offset += insn->len) {
return 0;
}
+static int create_return_sites_sections(struct objtool_file *file)
+{
+ struct instruction *insn;
+ struct section *sec;
+ int idx;
+
+ sec = find_section_by_name(file->elf, ".return_sites");
+ if (sec) {
+ WARN("file already has .return_sites, skipping");
+ return 0;
+ }
+
+ idx = 0;
+ list_for_each_entry(insn, &file->return_thunk_list, call_node)
+ idx++;
+
+ if (!idx)
+ return 0;
+
+ sec = elf_create_section(file->elf, ".return_sites", 0,
+ sizeof(int), idx);
+ if (!sec) {
+ WARN("elf_create_section: .return_sites");
+ return -1;
+ }
+
+ idx = 0;
+ list_for_each_entry(insn, &file->return_thunk_list, call_node) {
+
+ int *site = (int *)sec->data->d_buf + idx;
+ *site = 0;
+
+ if (elf_add_reloc_to_insn(file->elf, sec,
+ idx * sizeof(int),
+ R_X86_64_PC32,
+ insn->sec, insn->offset)) {
+ WARN("elf_add_reloc_to_insn: .return_sites");
+ return -1;
+ }
+
+ idx++;
+ }
+
+ return 0;
+}
+
static int create_ibt_endbr_seal_sections(struct objtool_file *file)
{
struct instruction *insn;
return false;
}
+__weak bool arch_is_rethunk(struct symbol *sym)
+{
+ return false;
+}
+
#define NEGATIVE_RELOC ((void *)-1L)
static struct reloc *insn_reloc(struct objtool_file *file, struct instruction *insn)
annotate_call_site(file, insn, false);
}
+static void add_return_call(struct objtool_file *file, struct instruction *insn, bool add)
+{
+ /*
+ * Return thunk tail calls are really just returns in disguise,
+ * so convert them accordingly.
+ */
+ insn->type = INSN_RETURN;
+ insn->retpoline_safe = true;
+
+ if (add)
+ list_add_tail(&insn->call_node, &file->return_thunk_list);
+}
+
static bool same_function(struct instruction *insn1, struct instruction *insn2)
{
return insn1->func->pfunc == insn2->func->pfunc;
} else if (reloc->sym->retpoline_thunk) {
add_retpoline_call(file, insn);
continue;
+ } else if (reloc->sym->return_thunk) {
+ add_return_call(file, insn, true);
+ continue;
} else if (insn->func) {
/*
* External sibling call or internal sibling call with
jump_dest = find_insn(file, dest_sec, dest_off);
if (!jump_dest) {
+ struct symbol *sym = find_symbol_by_offset(dest_sec, dest_off);
+
+ /*
+ * This is a special case for zen_untrain_ret().
+ * It jumps to __x86_return_thunk(), but objtool
+ * can't find the thunk's starting RET
+ * instruction, because the RET is also in the
+ * middle of another instruction. Objtool only
+ * knows about the outer instruction.
+ */
+ if (sym && sym->return_thunk) {
+ add_return_call(file, insn, false);
+ continue;
+ }
+
WARN_FUNC("can't find jump dest instruction at %s+0x%lx",
insn->sec, insn->offset, dest_sec->name,
dest_off);
insn->hint = true;
- if (opts.ibt && hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) {
+ if (hint->type == UNWIND_HINT_TYPE_SAVE) {
+ insn->hint = false;
+ insn->save = true;
+ continue;
+ }
+
+ if (hint->type == UNWIND_HINT_TYPE_RESTORE) {
+ insn->restore = true;
+ continue;
+ }
+
+ if (hint->type == UNWIND_HINT_TYPE_REGS_PARTIAL) {
struct symbol *sym = find_symbol_by_offset(insn->sec, insn->offset);
- if (sym && sym->bind == STB_GLOBAL &&
- insn->type != INSN_ENDBR && !insn->noendbr) {
- WARN_FUNC("UNWIND_HINT_IRET_REGS without ENDBR",
- insn->sec, insn->offset);
+ if (sym && sym->bind == STB_GLOBAL) {
+ if (opts.ibt && insn->type != INSN_ENDBR && !insn->noendbr) {
+ WARN_FUNC("UNWIND_HINT_IRET_REGS without ENDBR",
+ insn->sec, insn->offset);
+ }
+
+ insn->entry = 1;
}
}
+ if (hint->type == UNWIND_HINT_TYPE_ENTRY) {
+ hint->type = UNWIND_HINT_TYPE_CALL;
+ insn->entry = 1;
+ }
+
if (hint->type == UNWIND_HINT_TYPE_FUNC) {
insn->cfi = &func_cfi;
continue;
}
if (insn->type != INSN_JUMP_DYNAMIC &&
- insn->type != INSN_CALL_DYNAMIC) {
- WARN_FUNC("retpoline_safe hint not an indirect jump/call",
+ insn->type != INSN_CALL_DYNAMIC &&
+ insn->type != INSN_RETURN &&
+ insn->type != INSN_NOP) {
+ WARN_FUNC("retpoline_safe hint not an indirect jump/call/ret/nop",
insn->sec, insn->offset);
return -1;
}
if (arch_is_retpoline(func))
func->retpoline_thunk = true;
+ if (arch_is_rethunk(func))
+ func->return_thunk = true;
+
if (!strcmp(func->name, "__fentry__"))
func->fentry = true;
return 1;
}
- visited = 1 << state.uaccess;
- if (insn->visited) {
+ visited = VISITED_BRANCH << state.uaccess;
+ if (insn->visited & VISITED_BRANCH_MASK) {
if (!insn->hint && !insn_cfi_match(insn, &state.cfi))
return 1;
state.instr += insn->instr;
if (insn->hint) {
+ if (insn->restore) {
+ struct instruction *save_insn, *i;
+
+ i = insn;
+ save_insn = NULL;
+
+ sym_for_each_insn_continue_reverse(file, func, i) {
+ if (i->save) {
+ save_insn = i;
+ break;
+ }
+ }
+
+ if (!save_insn) {
+ WARN_FUNC("no corresponding CFI save for CFI restore",
+ sec, insn->offset);
+ return 1;
+ }
+
+ if (!save_insn->visited) {
+ WARN_FUNC("objtool isn't smart enough to handle this CFI save/restore combo",
+ sec, insn->offset);
+ return 1;
+ }
+
+ insn->cfi = save_insn->cfi;
+ nr_cfi_reused++;
+ }
+
state.cfi = *insn->cfi;
} else {
/* XXX track if we actually changed state.cfi */
return warnings;
}
+/*
+ * Validate rethunk entry constraint: must untrain RET before the first RET.
+ *
+ * Follow every branch (intra-function) and ensure ANNOTATE_UNRET_END comes
+ * before an actual RET instruction.
+ */
+static int validate_entry(struct objtool_file *file, struct instruction *insn)
+{
+ struct instruction *next, *dest;
+ int ret, warnings = 0;
+
+ for (;;) {
+ next = next_insn_to_validate(file, insn);
+
+ if (insn->visited & VISITED_ENTRY)
+ return 0;
+
+ insn->visited |= VISITED_ENTRY;
+
+ if (!insn->ignore_alts && !list_empty(&insn->alts)) {
+ struct alternative *alt;
+ bool skip_orig = false;
+
+ list_for_each_entry(alt, &insn->alts, list) {
+ if (alt->skip_orig)
+ skip_orig = true;
+
+ ret = validate_entry(file, alt->insn);
+ if (ret) {
+ if (opts.backtrace)
+ BT_FUNC("(alt)", insn);
+ return ret;
+ }
+ }
+
+ if (skip_orig)
+ return 0;
+ }
+
+ switch (insn->type) {
+
+ case INSN_CALL_DYNAMIC:
+ case INSN_JUMP_DYNAMIC:
+ case INSN_JUMP_DYNAMIC_CONDITIONAL:
+ WARN_FUNC("early indirect call", insn->sec, insn->offset);
+ return 1;
+
+ case INSN_JUMP_UNCONDITIONAL:
+ case INSN_JUMP_CONDITIONAL:
+ if (!is_sibling_call(insn)) {
+ if (!insn->jump_dest) {
+ WARN_FUNC("unresolved jump target after linking?!?",
+ insn->sec, insn->offset);
+ return -1;
+ }
+ ret = validate_entry(file, insn->jump_dest);
+ if (ret) {
+ if (opts.backtrace) {
+ BT_FUNC("(branch%s)", insn,
+ insn->type == INSN_JUMP_CONDITIONAL ? "-cond" : "");
+ }
+ return ret;
+ }
+
+ if (insn->type == INSN_JUMP_UNCONDITIONAL)
+ return 0;
+
+ break;
+ }
+
+ /* fallthrough */
+ case INSN_CALL:
+ dest = find_insn(file, insn->call_dest->sec,
+ insn->call_dest->offset);
+ if (!dest) {
+ WARN("Unresolved function after linking!?: %s",
+ insn->call_dest->name);
+ return -1;
+ }
+
+ ret = validate_entry(file, dest);
+ if (ret) {
+ if (opts.backtrace)
+ BT_FUNC("(call)", insn);
+ return ret;
+ }
+ /*
+ * If a call returns without error, it must have seen UNTRAIN_RET.
+ * Therefore any non-error return is a success.
+ */
+ return 0;
+
+ case INSN_RETURN:
+ WARN_FUNC("RET before UNTRAIN", insn->sec, insn->offset);
+ return 1;
+
+ case INSN_NOP:
+ if (insn->retpoline_safe)
+ return 0;
+ break;
+
+ default:
+ break;
+ }
+
+ if (!next) {
+ WARN_FUNC("teh end!", insn->sec, insn->offset);
+ return -1;
+ }
+ insn = next;
+ }
+
+ return warnings;
+}
+
+/*
+ * Validate that all branches starting at 'insn->entry' encounter UNRET_END
+ * before RET.
+ */
+static int validate_unret(struct objtool_file *file)
+{
+ struct instruction *insn;
+ int ret, warnings = 0;
+
+ for_each_insn(file, insn) {
+ if (!insn->entry)
+ continue;
+
+ ret = validate_entry(file, insn);
+ if (ret < 0) {
+ WARN_FUNC("Failed UNRET validation", insn->sec, insn->offset);
+ return ret;
+ }
+ warnings += ret;
+ }
+
+ return warnings;
+}
+
static int validate_retpoline(struct objtool_file *file)
{
struct instruction *insn;
for_each_insn(file, insn) {
if (insn->type != INSN_JUMP_DYNAMIC &&
- insn->type != INSN_CALL_DYNAMIC)
+ insn->type != INSN_CALL_DYNAMIC &&
+ insn->type != INSN_RETURN)
continue;
if (insn->retpoline_safe)
if (!strcmp(insn->sec->name, ".init.text") && !opts.module)
continue;
- WARN_FUNC("indirect %s found in RETPOLINE build",
- insn->sec, insn->offset,
- insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
+ if (insn->type == INSN_RETURN) {
+ if (opts.rethunk) {
+ WARN_FUNC("'naked' return found in RETHUNK build",
+ insn->sec, insn->offset);
+ } else
+ continue;
+ } else {
+ WARN_FUNC("indirect %s found in RETPOLINE build",
+ insn->sec, insn->offset,
+ insn->type == INSN_JUMP_DYNAMIC ? "jump" : "call");
+ }
warnings++;
}
!strcmp(sec->name, "__bug_table") ||
!strcmp(sec->name, "__ex_table") ||
!strcmp(sec->name, "__jump_table") ||
- !strcmp(sec->name, "__mcount_loc") ||
- !strcmp(sec->name, "__tracepoints"))
+ !strcmp(sec->name, "__mcount_loc"))
continue;
list_for_each_entry(reloc, &sec->reloc->reloc_list, list)
warnings += ret;
}
+ if (opts.unret) {
+ /*
+ * Must be after validate_branch() and friends, it plays
+ * further games with insn->visited.
+ */
+ ret = validate_unret(file);
+ if (ret < 0)
+ return ret;
+ warnings += ret;
+ }
+
if (opts.ibt) {
ret = validate_ibt(file);
if (ret < 0)
warnings += ret;
}
+ if (opts.rethunk) {
+ ret = create_return_sites_sections(file);
+ if (ret < 0)
+ goto out;
+ warnings += ret;
+ }
+
if (opts.mcount) {
ret = create_mcount_loc_sections(file);
if (ret < 0)
int arch_decode_hint_reg(u8 sp_reg, int *base);
bool arch_is_retpoline(struct symbol *sym);
+bool arch_is_rethunk(struct symbol *sym);
int arch_rewrite_retpolines(struct objtool_file *file);
bool noinstr;
bool orc;
bool retpoline;
+ bool rethunk;
+ bool unret;
bool sls;
bool stackval;
bool static_call;
enum insn_type type;
unsigned long immediate;
- u8 dead_end : 1,
- ignore : 1,
- ignore_alts : 1,
- hint : 1,
- retpoline_safe : 1,
- noendbr : 1;
- /* 2 bit hole */
+ u16 dead_end : 1,
+ ignore : 1,
+ ignore_alts : 1,
+ hint : 1,
+ save : 1,
+ restore : 1,
+ retpoline_safe : 1,
+ noendbr : 1,
+ entry : 1;
+ /* 7 bit hole */
+
s8 instr;
u8 visited;
- /* u8 hole */
struct alt_group *alt_group;
struct symbol *call_dest;
struct cfi_state *cfi;
};
+#define VISITED_BRANCH 0x01
+#define VISITED_BRANCH_UACCESS 0x02
+#define VISITED_BRANCH_MASK 0x03
+#define VISITED_ENTRY 0x04
+
static inline bool is_static_jump(struct instruction *insn)
{
return insn->type == INSN_JUMP_CONDITIONAL ||
u8 uaccess_safe : 1;
u8 static_call_tramp : 1;
u8 retpoline_thunk : 1;
+ u8 return_thunk : 1;
u8 fentry : 1;
u8 profiling_func : 1;
struct list_head pv_target;
struct list_head insn_list;
DECLARE_HASHTABLE(insn_hash, 20);
struct list_head retpoline_call_list;
+ struct list_head return_thunk_list;
struct list_head static_call_list;
struct list_head mcount_loc_list;
struct list_head endbr_list;
INIT_LIST_HEAD(&file.insn_list);
hash_init(file.insn_hash);
INIT_LIST_HEAD(&file.retpoline_call_list);
+ INIT_LIST_HEAD(&file.return_thunk_list);
INIT_LIST_HEAD(&file.static_call_list);
INIT_LIST_HEAD(&file.mcount_loc_list);
INIT_LIST_HEAD(&file.endbr_list);
if (ret < 0)
return ret;
pr_debug("%s\n", cmd);
- return system(cmd);
+ ret = system(cmd);
+ free(cmd);
+ return ret;
}
static int output_fd(struct perf_inject *inject)
inject->tool.tracing_data = perf_event__repipe_tracing_data;
}
- output_data_offset = session->header.data_offset;
+ output_data_offset = perf_session__data_offset(session->evlist);
if (inject->build_id_all) {
inject->tool.mmap = perf_event__repipe_buildid_mmap;
if (evlist__initialize_ctlfd(evsel_list, stat_config.ctl_fd, stat_config.ctl_fd_ack))
goto out;
+ /* Enable ignoring missing threads when -p option is defined. */
+ evlist__first(evsel_list)->ignore_missing_thread = target.pid;
status = 0;
for (run_idx = 0; forever || run_idx < stat_config.run_count; run_idx++) {
if (stat_config.run_count != 1 && verbose > 0)
goto out;
evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_enter");
+ trace->syscalls.events.sys_enter = evsel;
/* older kernels have syscalls tp versus raw_syscalls */
if (evsel == NULL)
evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_enter");
}
evsel = evlist__find_tracepoint_by_name(session->evlist, "raw_syscalls:sys_exit");
+ trace->syscalls.events.sys_exit = evsel;
if (evsel == NULL)
evsel = evlist__find_tracepoint_by_name(session->evlist, "syscalls:sys_exit");
if (evsel &&
def get_offset(perf_dict, field):
if field in perf_dict:
- return f"+0x{perf_dict[field]:x}"
+ return "+%#x" % perf_dict[field]
return ""
def get_dso_file_path(dso_name, dso_build_id):
else:
append = "/elf"
- dso_path = f"{os.environ['PERF_BUILDID_DIR']}/{dso_name}/{dso_build_id}{append}"
+ dso_path = os.environ['PERF_BUILDID_DIR'] + "/" + dso_name + "/" + dso_build_id + append;
# Replace duplicate slash chars to single slash char
dso_path = dso_path.replace('//', '/', 1)
return dso_path
start_addr = start_addr - dso_start;
stop_addr = stop_addr - dso_start;
disasm = [ options.objdump_name, "-d", "-z",
- f"--start-address=0x{start_addr:x}",
- f"--stop-address=0x{stop_addr:x}" ]
+ "--start-address="+format(start_addr,"#x"),
+ "--stop-address="+format(stop_addr,"#x") ]
disasm += [ dso_fname ]
disasm_output = check_output(disasm).decode('utf-8').split('\n')
disasm_cache[addr_range] = disasm_output
m = disasm_re.search(line)
if m is None:
continue
- print(f"\t{line}")
+ print("\t" + line)
def print_sample(sample):
- print(f"Sample = {{ cpu: {sample['cpu']:04} addr: 0x{sample['addr']:016x} " \
- f"phys_addr: 0x{sample['phys_addr']:016x} ip: 0x{sample['ip']:016x} " \
- f"pid: {sample['pid']} tid: {sample['tid']} period: {sample['period']} time: {sample['time']} }}")
+ print("Sample = { cpu: %04d addr: 0x%016x phys_addr: 0x%016x ip: 0x%016x " \
+ "pid: %d tid: %d period: %d time: %d }" % \
+ (sample['cpu'], sample['addr'], sample['phys_addr'], \
+ sample['ip'], sample['pid'], sample['tid'], \
+ sample['period'], sample['time']))
def trace_begin():
print('ARM CoreSight Trace Data Assembler Dump')
cpu = sample["cpu"]
pid = sample["pid"]
tid = sample["tid"]
- return f"{comm:>16} {pid:>5}/{tid:<5} [{cpu:04}] {sec:9}.{ns:09} "
+ return "%16s %5u/%-5u [%04u] %9u.%09u " % (comm, pid, tid, cpu, sec, ns)
# This code is copied from intel-pt-events.py for printing source code
# line and symbols.
glb_line_number = line_number
glb_source_file_name = source_file_name
- print(f"{start_str}{src_str}")
+ print(start_str, src_str)
def process_event(param_dict):
global cache_size
symbol = get_optional(param_dict, "symbol")
if (options.verbose == True):
- print(f"Event type: {name}")
+ print("Event type: %s" % name)
print_sample(sample)
# If cannot find dso so cannot dump assembler, bail out
# Validate dso start and end addresses
if ((dso_start == '[unknown]') or (dso_end == '[unknown]')):
- print(f"Failed to find valid dso map for dso {dso}")
+ print("Failed to find valid dso map for dso %s" % dso)
return
if (name[0:12] == "instructions"):
# Handle CS_ETM_TRACE_ON packet if start_addr=0 and stop_addr=4
if (start_addr == 0 and stop_addr == 4):
- print(f"CPU{cpu}: CS_ETM_TRACE_ON packet is inserted")
+ print("CPU%d: CS_ETM_TRACE_ON packet is inserted" % cpu)
return
if (start_addr < int(dso_start) or start_addr > int(dso_end)):
- print(f"Start address 0x{start_addr:x} is out of range [ 0x{dso_start:x} .. 0x{dso_end:x} ] for dso {dso}")
+ print("Start address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (start_addr, int(dso_start), int(dso_end), dso))
return
if (stop_addr < int(dso_start) or stop_addr > int(dso_end)):
- print(f"Stop address 0x{stop_addr:x} is out of range [ 0x{dso_start:x} .. 0x{dso_end:x} ] for dso {dso}")
+ print("Stop address 0x%x is out of range [ 0x%x .. 0x%x ] for dso %s" % (stop_addr, int(dso_start), int(dso_end), dso))
return
if (options.objdump_name != None):
if path.exists(dso_fname):
print_disam(dso_fname, dso_vm_start, start_addr, stop_addr)
else:
- print(f"Failed to find dso {dso} for address range [ 0x{start_addr:x} .. 0x{stop_addr:x} ]")
+ print("Failed to find dso %s for address range [ 0x%x .. 0x%x ]" % (dso, start_addr, stop_addr))
print_srccode(comm, param_dict, sample, symbol, dso)
static int detect_share(int wp_cnt, int bp_cnt)
{
struct perf_event_attr attr;
- int i, fd[wp_cnt + bp_cnt], ret;
+ int i, *fd = NULL, ret = -1;
+
+ if (wp_cnt + bp_cnt == 0)
+ return 0;
+
+ fd = malloc(sizeof(int) * (wp_cnt + bp_cnt));
+ if (!fd)
+ return -1;
for (i = 0; i < wp_cnt; i++) {
fd[i] = wp_event((void *)&the_var, &attr);
- TEST_ASSERT_VAL("failed to create wp\n", fd[i] != -1);
+ if (fd[i] == -1) {
+ pr_err("failed to create wp\n");
+ goto out;
+ }
}
for (; i < (bp_cnt + wp_cnt); i++) {
ret = i != (bp_cnt + wp_cnt);
+out:
while (i--)
close(fd[i]);
+ free(fd);
return ret;
}
ret |= test(ctx, "2.2 > 2.2", 0);
ret |= test(ctx, "2.2 < 1.1", 0);
ret |= test(ctx, "1.1 > 2.2", 0);
+ ret |= test(ctx, "1.1e10 < 1.1e100", 1);
+ ret |= test(ctx, "1.1e2 > 1.1e-2", 1);
if (ret) {
expr__ctx_free(ctx);
#include "tsc.h"
#include "mmap.h"
#include "tests.h"
-#include "pmu.h"
-#include "pmu-hybrid.h"
/*
* Except x86_64/i386 and Arm64, other archs don't support TSC in perf. Just
evlist__config(evlist, &opts, NULL);
- evsel = evlist__first(evlist);
-
- evsel->core.attr.comm = 1;
- evsel->core.attr.disabled = 1;
- evsel->core.attr.enable_on_exec = 0;
-
- /*
- * For hybrid "cycles:u", it creates two events.
- * Init the second evsel here.
- */
- if (perf_pmu__has_hybrid() && perf_pmu__hybrid_mounted("cpu_atom")) {
- evsel = evsel__next(evsel);
+ /* For hybrid "cycles:u", it creates two events */
+ evlist__for_each_entry(evlist, evsel) {
evsel->core.attr.comm = 1;
evsel->core.attr.disabled = 1;
evsel->core.attr.enable_on_exec = 0;
}
- if (evlist__open(evlist) == -ENOENT) {
- err = TEST_SKIP;
+ ret = evlist__open(evlist);
+ if (ret < 0) {
+ if (ret == -ENOENT)
+ err = TEST_SKIP;
+ else
+ pr_debug("evlist__open() failed\n");
goto out_err;
}
- CHECK__(evlist__open(evlist));
CHECK__(evlist__mmap(evlist, UINT_MAX));
goto next_event;
if (strcmp(event->comm.comm, comm1) == 0) {
+ CHECK_NOT_NULL__(evsel = evlist__event2evsel(evlist, event));
CHECK__(evsel__parse_sample(evsel, event, &sample));
comm1_time = sample.time;
}
if (strcmp(event->comm.comm, comm2) == 0) {
+ CHECK_NOT_NULL__(evsel = evlist__event2evsel(evlist, event));
CHECK__(evsel__parse_sample(evsel, event, &sample));
comm2_time = sample.time;
}
+++ /dev/null
-#!/usr/bin/python
-# SPDX-License-Identifier: GPL-2.0
-
-import argparse
-import sys
-
-# Basic sanity check of perf CSV output as specified in the man page.
-# Currently just checks the number of fields per line in output.
-
-ap = argparse.ArgumentParser()
-ap.add_argument('--no-args', action='store_true')
-ap.add_argument('--interval', action='store_true')
-ap.add_argument('--system-wide-no-aggr', action='store_true')
-ap.add_argument('--system-wide', action='store_true')
-ap.add_argument('--event', action='store_true')
-ap.add_argument('--per-core', action='store_true')
-ap.add_argument('--per-thread', action='store_true')
-ap.add_argument('--per-die', action='store_true')
-ap.add_argument('--per-node', action='store_true')
-ap.add_argument('--per-socket', action='store_true')
-ap.add_argument('--separator', default=',', nargs='?')
-args = ap.parse_args()
-
-Lines = sys.stdin.readlines()
-
-def check_csv_output(exp):
- for line in Lines:
- if 'failed' not in line:
- count = line.count(args.separator)
- if count != exp:
- sys.stdout.write(''.join(Lines))
- raise RuntimeError(f'wrong number of fields. expected {exp} in {line}')
-
-try:
- if args.no_args or args.system_wide or args.event:
- expected_items = 6
- elif args.interval or args.per_thread or args.system_wide_no_aggr:
- expected_items = 7
- elif args.per_core or args.per_socket or args.per_node or args.per_die:
- expected_items = 8
- else:
- ap.print_help()
- raise RuntimeError('No checking option specified')
- check_csv_output(expected_items)
-
-except:
- sys.stdout.write('Test failed for input: ' + ''.join(Lines))
- raise
set -e
-pythonchecker=$(dirname $0)/lib/perf_csv_output_lint.py
-if [ "x$PYTHON" == "x" ]
-then
- if which python3 > /dev/null
- then
- PYTHON=python3
- elif which python > /dev/null
- then
- PYTHON=python
- else
- echo Skipping test, python not detected please set environment variable PYTHON.
- exit 2
- fi
-fi
+function commachecker()
+{
+ local -i cnt=0 exp=0
+
+ case "$1"
+ in "--no-args") exp=6
+ ;; "--system-wide") exp=6
+ ;; "--event") exp=6
+ ;; "--interval") exp=7
+ ;; "--per-thread") exp=7
+ ;; "--system-wide-no-aggr") exp=7
+ [ $(uname -m) = "s390x" ] && exp=6
+ ;; "--per-core") exp=8
+ ;; "--per-socket") exp=8
+ ;; "--per-node") exp=8
+ ;; "--per-die") exp=8
+ esac
+
+ while read line
+ do
+ # Check for lines beginning with Failed
+ x=${line:0:6}
+ [ "$x" = "Failed" ] && continue
+
+ # Count the number of commas
+ x=$(echo $line | tr -d -c ',')
+ cnt="${#x}"
+ # echo $line $cnt
+ [ "$cnt" -ne "$exp" ] && {
+ echo "wrong number of fields. expected $exp in $line" 1>&2
+ exit 1;
+ }
+ done
+ return 0
+}
# Return true if perf_event_paranoid is > $1 and not running as root.
function ParanoidAndNotRoot()
check_no_args()
{
echo -n "Checking CSV output: no args "
- perf stat -x, true 2>&1 | $PYTHON $pythonchecker --no-args
+ perf stat -x, true 2>&1 | commachecker --no-args
echo "[Success]"
}
echo "[Skip] paranoid and not root"
return
fi
- perf stat -x, -a true 2>&1 | $PYTHON $pythonchecker --system-wide
+ perf stat -x, -a true 2>&1 | commachecker --system-wide
echo "[Success]"
}
return
fi
echo -n "Checking CSV output: system wide no aggregation "
- perf stat -x, -A -a --no-merge true 2>&1 | $PYTHON $pythonchecker --system-wide-no-aggr
+ perf stat -x, -A -a --no-merge true 2>&1 | commachecker --system-wide-no-aggr
echo "[Success]"
}
check_interval()
{
echo -n "Checking CSV output: interval "
- perf stat -x, -I 1000 true 2>&1 | $PYTHON $pythonchecker --interval
+ perf stat -x, -I 1000 true 2>&1 | commachecker --interval
echo "[Success]"
}
check_event()
{
echo -n "Checking CSV output: event "
- perf stat -x, -e cpu-clock true 2>&1 | $PYTHON $pythonchecker --event
+ perf stat -x, -e cpu-clock true 2>&1 | commachecker --event
echo "[Success]"
}
echo "[Skip] paranoid and not root"
return
fi
- perf stat -x, --per-core -a true 2>&1 | $PYTHON $pythonchecker --per-core
+ perf stat -x, --per-core -a true 2>&1 | commachecker --per-core
echo "[Success]"
}
echo "[Skip] paranoid and not root"
return
fi
- perf stat -x, --per-thread -a true 2>&1 | $PYTHON $pythonchecker --per-thread
+ perf stat -x, --per-thread -a true 2>&1 | commachecker --per-thread
echo "[Success]"
}
echo "[Skip] paranoid and not root"
return
fi
- perf stat -x, --per-die -a true 2>&1 | $PYTHON $pythonchecker --per-die
+ perf stat -x, --per-die -a true 2>&1 | commachecker --per-die
echo "[Success]"
}
echo "[Skip] paranoid and not root"
return
fi
- perf stat -x, --per-node -a true 2>&1 | $PYTHON $pythonchecker --per-node
+ perf stat -x, --per-node -a true 2>&1 | commachecker --per-node
echo "[Success]"
}
echo "[Skip] paranoid and not root"
return
fi
- perf stat -x, --per-socket -a true 2>&1 | $PYTHON $pythonchecker --per-socket
+ perf stat -x, --per-socket -a true 2>&1 | commachecker --per-socket
echo "[Success]"
}
cc $CFLAGS $TEST_PROGRAM_SOURCE -o $TEST_PROGRAM || exit 1
# Add a 1 second delay to skip samples that are not in the leaf() function
-perf record -o $PERF_DATA --call-graph fp -e cycles//u -D 1000 -- $TEST_PROGRAM 2> /dev/null &
+perf record -o $PERF_DATA --call-graph fp -e cycles//u -D 1000 --user-callchains -- $TEST_PROGRAM 2> /dev/null &
PID=$!
echo " + Recording (PID=$PID)..."
* physical_package_id will be set to -1. Hence skip this
* test if physical_package_id returns -1 for cpu from perf_cpu_map.
*/
- if (strncmp(session->header.env.arch, "powerpc", 7)) {
+ if (!strncmp(session->header.env.arch, "ppc64le", 7)) {
if (cpu__get_socket_id(perf_cpu_map__cpu(map, 0)) == -1)
return TEST_SKIP;
}
local arch=$(arch_string "$1")
local nr name
- cat <<EoFuncBegin
-static const char *errno_to_name__$arch(int err)
-{
- switch (err) {
-EoFuncBegin
+ printf "static const char *errno_to_name__%s(int err)\n{\n\tswitch (err) {\n" $arch
while read name nr; do
printf '\tcase %d: return "%s";\n' $nr $name
done
- cat <<EoFuncEnd
- default:
- return "(unknown)";
- }
-}
-
-EoFuncEnd
+ printf '\tdefault: return "(unknown)";\n\t}\n}\n'
}
process_arch()
struct msghdr {
void *msg_name; /* ptr to socket address structure */
int msg_namelen; /* size of socket address structure */
+
+ int msg_inq; /* output, data left in socket */
+
struct iov_iter msg_iter; /* data */
/*
void __user *msg_control_user;
};
bool msg_control_is_user : 1;
- __kernel_size_t msg_controllen; /* ancillary data buffer length */
+ bool msg_get_inq : 1;/* return INQ after receive */
unsigned int msg_flags; /* flags on received message */
+ __kernel_size_t msg_controllen; /* ancillary data buffer length */
struct kiocb *msg_iocb; /* ptr to iocb for async requests */
};
extern int __sys_accept4(int fd, struct sockaddr __user *upeer_sockaddr,
int __user *upeer_addrlen, int flags);
extern int __sys_socket(int family, int type, int protocol);
+extern struct file *__sys_socket_file(int family, int type, int protocol);
extern int __sys_bind(int fd, struct sockaddr __user *umyaddr, int addrlen);
extern int __sys_connect_file(struct file *file, struct sockaddr_storage *addr,
int addrlen, int file_flags);
return arm_spe_deliver_synth_event(spe, speq, event, &sample);
}
-#define SPE_MEM_TYPE (ARM_SPE_L1D_ACCESS | ARM_SPE_L1D_MISS | \
- ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS | \
- ARM_SPE_REMOTE_ACCESS)
-
-static bool arm_spe__is_memory_event(enum arm_spe_sample_type type)
-{
- if (type & SPE_MEM_TYPE)
- return true;
-
- return false;
-}
-
static u64 arm_spe__synth_data_source(const struct arm_spe_record *record)
{
union perf_mem_data_src data_src = { 0 };
if (record->op == ARM_SPE_LD)
data_src.mem_op = PERF_MEM_OP_LOAD;
- else
+ else if (record->op == ARM_SPE_ST)
data_src.mem_op = PERF_MEM_OP_STORE;
+ else
+ return 0;
if (record->type & (ARM_SPE_LLC_ACCESS | ARM_SPE_LLC_MISS)) {
data_src.mem_lvl = PERF_MEM_LVL_L3;
return err;
}
- if (spe->sample_memory && arm_spe__is_memory_event(record->type)) {
+ /*
+ * When data_src is zero it means the record is not a memory operation,
+ * skip to synthesize memory sample for this case.
+ */
+ if (spe->sample_memory && data_src) {
err = arm_spe__synth_mem_sample(speq, spe->memory_id, data_src);
if (err)
return err;
static struct bpf_perf_object *
bpf_perf_object__next(struct bpf_perf_object *prev)
{
- struct bpf_perf_object *next;
-
- if (!prev)
- next = list_first_entry(&bpf_objects_list,
- struct bpf_perf_object,
- list);
- else
- next = list_next_entry(prev, list);
+ if (!prev) {
+ if (list_empty(&bpf_objects_list))
+ return NULL;
- /* Empty list is noticed here so don't need checking on entry. */
- if (&next->list == &bpf_objects_list)
+ return list_first_entry(&bpf_objects_list, struct bpf_perf_object, list);
+ }
+ if (list_is_last(&prev->list, &bpf_objects_list))
return NULL;
- return next;
+ return list_next_entry(prev, list);
}
#define bpf_perf_object__for_each(perf_obj, tmp) \
count = bpf_prog_info_read_offset_u32(&info, desc->count_offset);
size = bpf_prog_info_read_offset_u32(&info, desc->size_offset);
- data_len += count * size;
+ data_len += roundup(count * size, sizeof(__u64));
}
/* step 3: allocate continuous memory */
- data_len = roundup(data_len, sizeof(__u64));
info_linear = malloc(sizeof(struct perf_bpil) + data_len);
if (!info_linear)
return ERR_PTR(-ENOMEM);
bpf_prog_info_set_offset_u64(&info_linear->info,
desc->array_offset,
ptr_to_u64(ptr));
- ptr += count * size;
+ ptr += roundup(count * size, sizeof(__u64));
}
/* step 5: call syscall again to get required arrays */
sample_type = evsel->core.attr.sample_type;
+ if (sample_type & ~OFFCPU_SAMPLE_TYPES) {
+ pr_err("not supported sample type: %llx\n",
+ (unsigned long long)sample_type);
+ return -1;
+ }
+
if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) {
if (evsel->core.id)
sid = evsel->core.id[0];
}
if (sample_type & PERF_SAMPLE_CGROUP)
data.array[n++] = key.cgroup_id;
- /* TODO: handle more sample types */
size = n * sizeof(u64);
data.hdr.size = size;
__uint(max_entries, 1);
} cgroup_filter SEC(".maps");
+/* new kernel task_struct definition */
+struct task_struct___new {
+ long __state;
+} __attribute__((preserve_access_index));
+
/* old kernel task_struct definition */
struct task_struct___old {
long state;
*/
static inline int get_task_state(struct task_struct *t)
{
- if (bpf_core_field_exists(t->__state))
- return BPF_CORE_READ(t, __state);
+ /* recast pointer to capture new type for compiler */
+ struct task_struct___new *t_new = (void *)t;
- /* recast pointer to capture task_struct___old type for compiler */
- struct task_struct___old *t_old = (void *)t;
+ if (bpf_core_field_exists(t_new->__state)) {
+ return BPF_CORE_READ(t_new, __state);
+ } else {
+ /* recast pointer to capture old type for compiler */
+ struct task_struct___old *t_old = (void *)t;
- /* now use old "state" name of the field */
- return BPF_CORE_READ(t_old, state);
+ return BPF_CORE_READ(t_old, state);
+ }
}
static inline __u64 get_cgroup_id(struct task_struct *t)
return err;
}
+static int filename__read_build_id_ns(const char *filename,
+ struct build_id *bid,
+ struct nsinfo *nsi)
+{
+ struct nscookie nsc;
+ int ret;
+
+ nsinfo__mountns_enter(nsi, &nsc);
+ ret = filename__read_build_id(filename, bid);
+ nsinfo__mountns_exit(&nsc);
+
+ return ret;
+}
+
+static bool dso__build_id_mismatch(struct dso *dso, const char *name)
+{
+ struct build_id bid;
+
+ if (filename__read_build_id_ns(name, &bid, dso->nsinfo) < 0)
+ return false;
+
+ return !dso__build_id_equal(dso, &bid);
+}
+
static int dso__cache_build_id(struct dso *dso, struct machine *machine,
void *priv __maybe_unused)
{
is_kallsyms = true;
name = machine->mmap_name;
}
+
+ if (!is_kallsyms && dso__build_id_mismatch(dso, name))
+ return 0;
+
return build_id_cache__add_b(&dso->bid, name, dso->nsinfo,
is_kallsyms, is_vdso);
}
#include "util.h"
#include "hashmap.h"
#include "pmu-hybrid.h"
+#include "off_cpu.h"
#include "../perf-sys.h"
#include "util/parse-branch-options.h"
#include <internal/xyarray.h>
}
}
+static bool evsel__is_offcpu_event(struct evsel *evsel)
+{
+ return evsel__is_bpf_output(evsel) && !strcmp(evsel->name, OFFCPU_EVENT);
+}
+
/*
* The enable_on_exec/disabled value strategy:
*
*/
if (evsel__is_dummy_event(evsel))
evsel__reset_sample_bit(evsel, BRANCH_STACK);
+
+ if (evsel__is_offcpu_event(evsel))
+ evsel->core.attr.sample_type &= OFFCPU_SAMPLE_TYPES;
}
int evsel__set_filter(struct evsel *evsel, const char *filter)
}
%}
-number ([0-9]+\.?[0-9]*|[0-9]*\.?[0-9]+)
+number ([0-9]+\.?[0-9]*|[0-9]*\.?[0-9]+)(e-?[0-9]+)?
sch [-,=]
spec \\{sch}
return perf_session__do_write_header(session, evlist, fd, at_exit, NULL);
}
+size_t perf_session__data_offset(const struct evlist *evlist)
+{
+ struct evsel *evsel;
+ size_t data_offset;
+
+ data_offset = sizeof(struct perf_file_header);
+ evlist__for_each_entry(evlist, evsel) {
+ data_offset += evsel->core.ids * sizeof(u64);
+ }
+ data_offset += evlist->core.nr_entries * sizeof(struct perf_file_attr);
+
+ return data_offset;
+}
+
int perf_session__inject_header(struct perf_session *session,
struct evlist *evlist,
int fd,
int fd,
struct feat_copier *fc);
+size_t perf_session__data_offset(const struct evlist *evlist);
+
void perf_header__set_feat(struct perf_header *header, int feat);
void perf_header__clear_feat(struct perf_header *header, int feat);
bool perf_header__has_feat(const struct perf_header *header, int feat);
*out_evlist = NULL;
if (!metric_no_merge || hashmap__size(ids->ids) == 0) {
+ bool added_event = false;
int i;
/*
* We may fail to share events between metrics because a tool
if (!tmp)
return -ENOMEM;
ids__insert(ids->ids, tmp);
+ added_event = true;
}
}
+ if (!added_event && hashmap__size(ids->ids) == 0) {
+ char *tmp = strdup("duration_time");
+
+ if (!tmp)
+ return -ENOMEM;
+ ids__insert(ids->ids, tmp);
+ }
}
ret = metricgroup__build_event_string(&events, ids, modifier,
has_constraint);
#ifndef PERF_UTIL_OFF_CPU_H
#define PERF_UTIL_OFF_CPU_H
+#include <linux/perf_event.h>
+
struct evlist;
struct target;
struct perf_session;
#define OFFCPU_EVENT "offcpu-time"
+#define OFFCPU_SAMPLE_TYPES (PERF_SAMPLE_IDENTIFIER | PERF_SAMPLE_IP | \
+ PERF_SAMPLE_TID | PERF_SAMPLE_TIME | \
+ PERF_SAMPLE_ID | PERF_SAMPLE_CPU | \
+ PERF_SAMPLE_PERIOD | PERF_SAMPLE_CALLCHAIN | \
+ PERF_SAMPLE_CGROUP)
+
+
#ifdef HAVE_BPF_SKEL
int off_cpu_prepare(struct evlist *evlist, struct target *target,
struct record_opts *opts);
return NULL;
}
+static int elf_read_program_header(Elf *elf, u64 vaddr, GElf_Phdr *phdr)
+{
+ size_t i, phdrnum;
+ u64 sz;
+
+ if (elf_getphdrnum(elf, &phdrnum))
+ return -1;
+
+ for (i = 0; i < phdrnum; i++) {
+ if (gelf_getphdr(elf, i, phdr) == NULL)
+ return -1;
+
+ if (phdr->p_type != PT_LOAD)
+ continue;
+
+ sz = max(phdr->p_memsz, phdr->p_filesz);
+ if (!sz)
+ continue;
+
+ if (vaddr >= phdr->p_vaddr && (vaddr < phdr->p_vaddr + sz))
+ return 0;
+ }
+
+ /* Not found any valid program header */
+ return -1;
+}
+
static bool want_demangle(bool is_kernel_sym)
{
return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle;
sym.st_value);
used_opd = true;
}
+
/*
* When loading symbols in a data mapping, ABS symbols (which
* has a value of SHN_ABS in its st_shndx) failed at
gelf_getshdr(sec, &shdr);
+ /*
+ * If the attribute bit SHF_ALLOC is not set, the section
+ * doesn't occupy memory during process execution.
+ * E.g. ".gnu.warning.*" section is used by linker to generate
+ * warnings when calling deprecated functions, the symbols in
+ * the section aren't loaded to memory during process execution,
+ * so skip them.
+ */
+ if (!(shdr.sh_flags & SHF_ALLOC))
+ continue;
+
secstrs = secstrs_sym;
/*
goto out_elf_end;
} else if ((used_opd && runtime_ss->adjust_symbols) ||
(!used_opd && syms_ss->adjust_symbols)) {
+ GElf_Phdr phdr;
+
+ if (elf_read_program_header(syms_ss->elf,
+ (u64)sym.st_value, &phdr)) {
+ pr_warning("%s: failed to find program header for "
+ "symbol: %s st_value: %#" PRIx64 "\n",
+ __func__, elf_name, (u64)sym.st_value);
+ continue;
+ }
pr_debug4("%s: adjusting symbol: st_value: %#" PRIx64 " "
- "sh_addr: %#" PRIx64 " sh_offset: %#" PRIx64 "\n", __func__,
- (u64)sym.st_value, (u64)shdr.sh_addr,
- (u64)shdr.sh_offset);
- sym.st_value -= shdr.sh_addr - shdr.sh_offset;
+ "p_vaddr: %#" PRIx64 " p_offset: %#" PRIx64 "\n",
+ __func__, (u64)sym.st_value, (u64)phdr.p_vaddr,
+ (u64)phdr.p_offset);
+ sym.st_value -= phdr.p_vaddr - phdr.p_offset;
}
demangled = demangle_sym(dso, kmodule, elf_name);
snprintf(filename, sizeof(filename), "%s/proc/%d/task",
machine->root_dir, pid);
- n = scandir(filename, &dirent, filter_task, alphasort);
+ n = scandir(filename, &dirent, filter_task, NULL);
if (n < 0)
return n;
if (*end)
continue;
- rc = -1;
+ /* some threads may exit just after scan, ignore it */
if (perf_event__prepare_comm(comm_event, pid, _pid, machine,
&tgid, &ppid, &kernel_thread) != 0)
- break;
+ continue;
+ rc = -1;
if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid,
ppid, process, machine) < 0)
break;
return 0;
snprintf(proc_path, sizeof(proc_path), "%s/proc", machine->root_dir);
- n = scandir(proc_path, &dirent, filter_task, alphasort);
+ n = scandir(proc_path, &dirent, filter_task, NULL);
if (n < 0)
return err;
Elf *elf;
GElf_Ehdr ehdr;
GElf_Shdr shdr;
- int ret;
+ int ret = -1;
elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL);
if (elf == NULL)
#ifndef NO_LIBUNWIND_DEBUG_FRAME
static u64 elf_section_offset(int fd, const char *name)
{
- u64 address, offset;
+ u64 address, offset = 0;
if (elf_section_address_and_offset(fd, name, &address, &offset))
return 0;
})
GET_ADDR("bpf_fentry_test1", addrs[0]);
- GET_ADDR("bpf_fentry_test2", addrs[1]);
- GET_ADDR("bpf_fentry_test3", addrs[2]);
- GET_ADDR("bpf_fentry_test4", addrs[3]);
- GET_ADDR("bpf_fentry_test5", addrs[4]);
- GET_ADDR("bpf_fentry_test6", addrs[5]);
- GET_ADDR("bpf_fentry_test7", addrs[6]);
+ GET_ADDR("bpf_fentry_test3", addrs[1]);
+ GET_ADDR("bpf_fentry_test4", addrs[2]);
+ GET_ADDR("bpf_fentry_test5", addrs[3]);
+ GET_ADDR("bpf_fentry_test6", addrs[4]);
+ GET_ADDR("bpf_fentry_test7", addrs[5]);
+ GET_ADDR("bpf_fentry_test2", addrs[6]);
GET_ADDR("bpf_fentry_test8", addrs[7]);
#undef GET_ADDR
- cookies[0] = 1;
- cookies[1] = 2;
- cookies[2] = 3;
- cookies[3] = 4;
- cookies[4] = 5;
- cookies[5] = 6;
- cookies[6] = 7;
- cookies[7] = 8;
+ cookies[0] = 1; /* bpf_fentry_test1 */
+ cookies[1] = 2; /* bpf_fentry_test3 */
+ cookies[2] = 3; /* bpf_fentry_test4 */
+ cookies[3] = 4; /* bpf_fentry_test5 */
+ cookies[4] = 5; /* bpf_fentry_test6 */
+ cookies[5] = 6; /* bpf_fentry_test7 */
+ cookies[6] = 7; /* bpf_fentry_test2 */
+ cookies[7] = 8; /* bpf_fentry_test8 */
opts.kprobe_multi.addrs = (const unsigned long *) &addrs;
opts.kprobe_multi.cnt = ARRAY_SIZE(addrs);
if (!ASSERT_GE(link1_fd, 0, "link1_fd"))
goto cleanup;
- cookies[0] = 8;
- cookies[1] = 7;
- cookies[2] = 6;
- cookies[3] = 5;
- cookies[4] = 4;
- cookies[5] = 3;
- cookies[6] = 2;
- cookies[7] = 1;
+ cookies[0] = 8; /* bpf_fentry_test1 */
+ cookies[1] = 7; /* bpf_fentry_test3 */
+ cookies[2] = 6; /* bpf_fentry_test4 */
+ cookies[3] = 5; /* bpf_fentry_test5 */
+ cookies[4] = 4; /* bpf_fentry_test6 */
+ cookies[5] = 3; /* bpf_fentry_test7 */
+ cookies[6] = 2; /* bpf_fentry_test2 */
+ cookies[7] = 1; /* bpf_fentry_test8 */
opts.kprobe_multi.flags = BPF_F_KPROBE_MULTI_RETURN;
prog_fd = bpf_program__fd(skel->progs.test_kretprobe);
struct kprobe_multi *skel = NULL;
const char *syms[8] = {
"bpf_fentry_test1",
- "bpf_fentry_test2",
"bpf_fentry_test3",
"bpf_fentry_test4",
"bpf_fentry_test5",
"bpf_fentry_test6",
"bpf_fentry_test7",
+ "bpf_fentry_test2",
"bpf_fentry_test8",
};
__u64 cookies[8];
skel->bss->pid = getpid();
skel->bss->test_cookie = true;
- cookies[0] = 1;
- cookies[1] = 2;
- cookies[2] = 3;
- cookies[3] = 4;
- cookies[4] = 5;
- cookies[5] = 6;
- cookies[6] = 7;
- cookies[7] = 8;
+ cookies[0] = 1; /* bpf_fentry_test1 */
+ cookies[1] = 2; /* bpf_fentry_test3 */
+ cookies[2] = 3; /* bpf_fentry_test4 */
+ cookies[3] = 4; /* bpf_fentry_test5 */
+ cookies[4] = 5; /* bpf_fentry_test6 */
+ cookies[5] = 6; /* bpf_fentry_test7 */
+ cookies[6] = 7; /* bpf_fentry_test2 */
+ cookies[7] = 8; /* bpf_fentry_test8 */
opts.syms = syms;
opts.cnt = ARRAY_SIZE(syms);
if (!ASSERT_OK_PTR(link1, "bpf_program__attach_kprobe_multi_opts"))
goto cleanup;
- cookies[0] = 8;
- cookies[1] = 7;
- cookies[2] = 6;
- cookies[3] = 5;
- cookies[4] = 4;
- cookies[5] = 3;
- cookies[6] = 2;
- cookies[7] = 1;
+ cookies[0] = 8; /* bpf_fentry_test1 */
+ cookies[1] = 7; /* bpf_fentry_test3 */
+ cookies[2] = 6; /* bpf_fentry_test4 */
+ cookies[3] = 5; /* bpf_fentry_test5 */
+ cookies[4] = 4; /* bpf_fentry_test6 */
+ cookies[5] = 3; /* bpf_fentry_test7 */
+ cookies[6] = 2; /* bpf_fentry_test2 */
+ cookies[7] = 1; /* bpf_fentry_test8 */
opts.retprobe = true;
"./test_attach_probe.o");
}
+static void test_func_replace_global_func(void)
+{
+ const char *prog_name[] = {
+ "freplace/test_pkt_access",
+ };
+
+ test_fexit_bpf2bpf_common("./freplace_global_func.o",
+ "./test_pkt_access.o",
+ ARRAY_SIZE(prog_name),
+ prog_name, false, NULL);
+}
+
/* NOTE: affect other tests, must run in serial mode */
void serial_test_fexit_bpf2bpf(void)
{
test_func_replace_multi();
if (test__start_subtest("fmod_ret_freplace"))
test_fmod_ret_freplace();
+ if (test__start_subtest("func_replace_global_func"))
+ test_func_replace_global_func();
}
continue;
if (!strncmp(name, "rcu_", 4))
continue;
+ if (!strncmp(name, "__ftrace_invalid_address__",
+ sizeof("__ftrace_invalid_address__") - 1))
+ continue;
err = hashmap__add(map, name, NULL);
if (err) {
free(name);
* Tests for sockmap/sockhash holding kTLS sockets.
*/
+#include <netinet/tcp.h>
#include "test_progs.h"
#define MAX_TEST_NAME 80
close(srv);
}
+static void test_sockmap_ktls_update_fails_when_sock_has_ulp(int family, int map)
+{
+ struct sockaddr_storage addr = {};
+ socklen_t len = sizeof(addr);
+ struct sockaddr_in6 *v6;
+ struct sockaddr_in *v4;
+ int err, s, zero = 0;
+
+ switch (family) {
+ case AF_INET:
+ v4 = (struct sockaddr_in *)&addr;
+ v4->sin_family = AF_INET;
+ break;
+ case AF_INET6:
+ v6 = (struct sockaddr_in6 *)&addr;
+ v6->sin6_family = AF_INET6;
+ break;
+ default:
+ PRINT_FAIL("unsupported socket family %d", family);
+ return;
+ }
+
+ s = socket(family, SOCK_STREAM, 0);
+ if (!ASSERT_GE(s, 0, "socket"))
+ return;
+
+ err = bind(s, (struct sockaddr *)&addr, len);
+ if (!ASSERT_OK(err, "bind"))
+ goto close;
+
+ err = getsockname(s, (struct sockaddr *)&addr, &len);
+ if (!ASSERT_OK(err, "getsockname"))
+ goto close;
+
+ err = connect(s, (struct sockaddr *)&addr, len);
+ if (!ASSERT_OK(err, "connect"))
+ goto close;
+
+ /* save sk->sk_prot and set it to tls_prots */
+ err = setsockopt(s, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
+ if (!ASSERT_OK(err, "setsockopt(TCP_ULP)"))
+ goto close;
+
+ /* sockmap update should not affect saved sk_prot */
+ err = bpf_map_update_elem(map, &zero, &s, BPF_ANY);
+ if (!ASSERT_ERR(err, "sockmap update elem"))
+ goto close;
+
+ /* call sk->sk_prot->setsockopt to dispatch to saved sk_prot */
+ err = setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &zero, sizeof(zero));
+ ASSERT_OK(err, "setsockopt(TCP_NODELAY)");
+
+close:
+ close(s);
+}
+
+static const char *fmt_test_name(const char *subtest_name, int family,
+ enum bpf_map_type map_type)
+{
+ const char *map_type_str = BPF_MAP_TYPE_SOCKMAP ? "SOCKMAP" : "SOCKHASH";
+ const char *family_str = AF_INET ? "IPv4" : "IPv6";
+ static char test_name[MAX_TEST_NAME];
+
+ snprintf(test_name, MAX_TEST_NAME,
+ "sockmap_ktls %s %s %s",
+ subtest_name, family_str, map_type_str);
+
+ return test_name;
+}
+
static void run_tests(int family, enum bpf_map_type map_type)
{
- char test_name[MAX_TEST_NAME];
int map;
map = bpf_map_create(map_type, NULL, sizeof(int), sizeof(int), 1, NULL);
return;
}
- snprintf(test_name, MAX_TEST_NAME,
- "sockmap_ktls disconnect_after_delete %s %s",
- family == AF_INET ? "IPv4" : "IPv6",
- map_type == BPF_MAP_TYPE_SOCKMAP ? "SOCKMAP" : "SOCKHASH");
- if (!test__start_subtest(test_name))
- return;
-
- test_sockmap_ktls_disconnect_after_delete(family, map);
+ if (test__start_subtest(fmt_test_name("disconnect_after_delete", family, map_type)))
+ test_sockmap_ktls_disconnect_after_delete(family, map);
+ if (test__start_subtest(fmt_test_name("update_fails_when_sock_has_ulp", family, map_type)))
+ test_sockmap_ktls_update_fails_when_sock_has_ulp(family, map);
close(map);
}
bpf_object__close(obj);
}
+#include "tailcall_bpf2bpf6.skel.h"
+
+/* Tail call counting works even when there is data on stack which is
+ * not aligned to 8 bytes.
+ */
+static void test_tailcall_bpf2bpf_6(void)
+{
+ struct tailcall_bpf2bpf6 *obj;
+ int err, map_fd, prog_fd, main_fd, data_fd, i, val;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ obj = tailcall_bpf2bpf6__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "open and load"))
+ return;
+
+ main_fd = bpf_program__fd(obj->progs.entry);
+ if (!ASSERT_GE(main_fd, 0, "entry prog fd"))
+ goto out;
+
+ map_fd = bpf_map__fd(obj->maps.jmp_table);
+ if (!ASSERT_GE(map_fd, 0, "jmp_table map fd"))
+ goto out;
+
+ prog_fd = bpf_program__fd(obj->progs.classifier_0);
+ if (!ASSERT_GE(prog_fd, 0, "classifier_0 prog fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (!ASSERT_OK(err, "jmp_table map update"))
+ goto out;
+
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "entry prog test run");
+ ASSERT_EQ(topts.retval, 0, "tailcall retval");
+
+ data_fd = bpf_map__fd(obj->maps.bss);
+ if (!ASSERT_GE(map_fd, 0, "bss map fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ ASSERT_OK(err, "bss map lookup");
+ ASSERT_EQ(val, 1, "done flag is set");
+
+out:
+ tailcall_bpf2bpf6__destroy(obj);
+}
+
void test_tailcalls(void)
{
if (test__start_subtest("tailcall_1"))
test_tailcall_bpf2bpf_4(false);
if (test__start_subtest("tailcall_bpf2bpf_5"))
test_tailcall_bpf2bpf_4(true);
+ if (test__start_subtest("tailcall_bpf2bpf_6"))
+ test_tailcall_bpf2bpf_6();
}
bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(read_data), 0, &ptr);
- bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0);
+ bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0);
bpf_ringbuf_submit_dynptr(&ptr, 0);
/* this should fail */
- bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0);
+ bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0);
return 0;
}
get_map_val_dynptr(&ptr);
/* this should fail */
- bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0);
+ bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0, 0);
return 0;
}
memcpy((void *)&ptr + 8, &x, sizeof(x));
/* this should fail */
- bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0);
+ bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0);
bpf_ringbuf_submit_dynptr(&ptr, 0);
get_map_val_dynptr(&ptr);
/* this should fail */
- bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 1, 0);
+ bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 1, 0, 0);
return 0;
}
bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(write_data), 0, &ptr);
/* Write data into the dynptr */
- err = err ?: bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data));
+ err = bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0);
/* Read the data that was written into the dynptr */
- err = err ?: bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0);
+ err = err ?: bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0);
/* Ensure the data we read matches the data we wrote */
for (i = 0; i < sizeof(read_data); i++) {
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__noinline
+int test_ctx_global_func(struct __sk_buff *skb)
+{
+ volatile int retval = 1;
+ return retval;
+}
+
+SEC("freplace/test_pkt_access")
+int new_test_pkt_access(struct __sk_buff *skb)
+{
+ return test_ctx_global_func(skb);
+}
+
+char _license[] SEC("license") = "GPL";
if (is_return) {
SET(kretprobe_test1_result, &bpf_fentry_test1, 8);
- SET(kretprobe_test2_result, &bpf_fentry_test2, 7);
- SET(kretprobe_test3_result, &bpf_fentry_test3, 6);
- SET(kretprobe_test4_result, &bpf_fentry_test4, 5);
- SET(kretprobe_test5_result, &bpf_fentry_test5, 4);
- SET(kretprobe_test6_result, &bpf_fentry_test6, 3);
- SET(kretprobe_test7_result, &bpf_fentry_test7, 2);
+ SET(kretprobe_test2_result, &bpf_fentry_test2, 2);
+ SET(kretprobe_test3_result, &bpf_fentry_test3, 7);
+ SET(kretprobe_test4_result, &bpf_fentry_test4, 6);
+ SET(kretprobe_test5_result, &bpf_fentry_test5, 5);
+ SET(kretprobe_test6_result, &bpf_fentry_test6, 4);
+ SET(kretprobe_test7_result, &bpf_fentry_test7, 3);
SET(kretprobe_test8_result, &bpf_fentry_test8, 1);
} else {
SET(kprobe_test1_result, &bpf_fentry_test1, 1);
- SET(kprobe_test2_result, &bpf_fentry_test2, 2);
- SET(kprobe_test3_result, &bpf_fentry_test3, 3);
- SET(kprobe_test4_result, &bpf_fentry_test4, 4);
- SET(kprobe_test5_result, &bpf_fentry_test5, 5);
- SET(kprobe_test6_result, &bpf_fentry_test6, 6);
- SET(kprobe_test7_result, &bpf_fentry_test7, 7);
+ SET(kprobe_test2_result, &bpf_fentry_test2, 7);
+ SET(kprobe_test3_result, &bpf_fentry_test3, 2);
+ SET(kprobe_test4_result, &bpf_fentry_test4, 3);
+ SET(kprobe_test5_result, &bpf_fentry_test5, 4);
+ SET(kprobe_test6_result, &bpf_fentry_test6, 5);
+ SET(kprobe_test7_result, &bpf_fentry_test7, 6);
SET(kprobe_test8_result, &bpf_fentry_test8, 8);
}
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define __unused __attribute__((unused))
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+int done = 0;
+
+SEC("tc")
+int classifier_0(struct __sk_buff *skb __unused)
+{
+ done = 1;
+ return 0;
+}
+
+static __noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+ /* Don't propagate the constant to the caller */
+ volatile int ret = 1;
+
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ return ret;
+}
+
+SEC("tc")
+int entry(struct __sk_buff *skb)
+{
+ /* Have data on stack which size is not a multiple of 8 */
+ volatile char arr[1] = {};
+
+ return subprog_tail(skb);
+}
+
+char __license[] SEC("license") = "GPL";
.result = ACCEPT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
+{
+ "jeq32/jne32: bounds checking",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_6, 563),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
+ BPF_ALU32_REG(BPF_OR, BPF_REG_2, BPF_REG_6),
+ BPF_JMP32_IMM(BPF_JNE, BPF_REG_2, 8, 5),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_2, 500, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+},
.result = ACCEPT,
.retval = 3,
},
+{
+ "jump & dead code elimination",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_OR, BPF_REG_3, 32767),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_3, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 0x8000, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -32767),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 0, 1),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 2,
+},
# SPDX-License-Identifier: GPL-2.0
CFLAGS += -I../../../../usr/include/
+CFLAGS += -I../../../../include/
TEST_GEN_PROGS := dma_map_benchmark
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
-#include <linux/map_benchmark.h>
#include <linux/types.h>
+#include <linux/map_benchmark.h>
#define NSEC_PER_MSEC 1000000L
TEST_PROGS := gpio-mockup.sh gpio-sim.sh
TEST_FILES := gpio-mockup-sysfs.sh
TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev gpio-chip-info gpio-line-name
-CFLAGS += -O2 -g -Wall -I../../../../usr/include/
+CFLAGS += -O2 -g -Wall -I../../../../usr/include/ $(KHDR_INCLUDES)
include ../lib.mk
UNAME_M := riscv
endif
-LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
-LIBKVM_x86_64 = lib/x86_64/apic.c lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
-LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S lib/aarch64/spinlock.c lib/aarch64/gic.c lib/aarch64/gic_v3.c lib/aarch64/vgic.c
-LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
-LIBKVM_riscv = lib/riscv/processor.c lib/riscv/ucall.c
+LIBKVM += lib/assert.c
+LIBKVM += lib/elf.c
+LIBKVM += lib/guest_modes.c
+LIBKVM += lib/io.c
+LIBKVM += lib/kvm_util.c
+LIBKVM += lib/perf_test_util.c
+LIBKVM += lib/rbtree.c
+LIBKVM += lib/sparsebit.c
+LIBKVM += lib/test_util.c
+
+LIBKVM_x86_64 += lib/x86_64/apic.c
+LIBKVM_x86_64 += lib/x86_64/handlers.S
+LIBKVM_x86_64 += lib/x86_64/perf_test_util.c
+LIBKVM_x86_64 += lib/x86_64/processor.c
+LIBKVM_x86_64 += lib/x86_64/svm.c
+LIBKVM_x86_64 += lib/x86_64/ucall.c
+LIBKVM_x86_64 += lib/x86_64/vmx.c
+
+LIBKVM_aarch64 += lib/aarch64/gic.c
+LIBKVM_aarch64 += lib/aarch64/gic_v3.c
+LIBKVM_aarch64 += lib/aarch64/handlers.S
+LIBKVM_aarch64 += lib/aarch64/processor.c
+LIBKVM_aarch64 += lib/aarch64/spinlock.c
+LIBKVM_aarch64 += lib/aarch64/ucall.c
+LIBKVM_aarch64 += lib/aarch64/vgic.c
+
+LIBKVM_s390x += lib/s390x/diag318_test_handler.c
+LIBKVM_s390x += lib/s390x/processor.c
+LIBKVM_s390x += lib/s390x/ucall.c
+
+LIBKVM_riscv += lib/riscv/processor.c
+LIBKVM_riscv += lib/riscv/ucall.c
TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test
TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
include ../lib.mk
-STATIC_LIBS := $(OUTPUT)/libkvm.a
LIBKVM_C := $(filter %.c,$(LIBKVM))
LIBKVM_S := $(filter %.S,$(LIBKVM))
LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
-EXTRA_CLEAN += $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(STATIC_LIBS) cscope.*
+LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)
+
+EXTRA_CLEAN += $(LIBKVM_OBJS) cscope.*
x := $(shell mkdir -p $(sort $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c
$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S
$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
-LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ)
-$(OUTPUT)/libkvm.a: $(LIBKVM_OBJS)
- $(AR) crs $@ $^
-
x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
-all: $(STATIC_LIBS)
-$(TEST_GEN_PROGS): $(STATIC_LIBS)
+$(TEST_GEN_PROGS): $(LIBKVM_OBJS)
cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib ..
cscope:
static void help(char *name)
{
puts("");
- printf("usage: %s [-h] [-i iterations] [-p offset] [-g]"
- "[-m mode] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]"
+ printf("usage: %s [-h] [-i iterations] [-p offset] [-g] "
+ "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]"
"[-x memslots]\n", name);
puts("");
printf(" -i: specify iteration counts (default: %"PRIu64")\n",
printf(" -p: specify guest physical test memory offset\n"
" Warning: a low offset can conflict with the loaded test code.\n");
guest_modes_help();
+ printf(" -n: Run the vCPUs in nested mode (L2)\n");
printf(" -b: specify the size of the memory region which should be\n"
" dirtied by each vCPU. e.g. 10M or 3G.\n"
" (default: 1G)\n");
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "ghi:p:m:b:f:v:os:x:")) != -1) {
+ while ((opt = getopt(argc, argv, "ghi:p:m:nb:f:v:os:x:")) != -1) {
switch (opt) {
case 'g':
dirty_log_manual_caps = 0;
case 'm':
guest_modes_cmdline(optarg);
break;
+ case 'n':
+ perf_test_args.nested = true;
+ break;
case 'b':
guest_percpu_mem_size = parse_size(optarg);
break;
struct perf_test_args {
struct kvm_vm *vm;
+ /* The starting address and size of the guest test region. */
uint64_t gpa;
+ uint64_t size;
uint64_t guest_page_size;
int wr_fract;
+ /* Run vCPUs in L2 instead of L1, if the architecture supports it. */
+ bool nested;
+
struct perf_test_vcpu_args vcpu_args[KVM_MAX_VCPUS];
};
void perf_test_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct perf_test_vcpu_args *));
void perf_test_join_vcpu_threads(int vcpus);
+void perf_test_guest_code(uint32_t vcpu_id);
+
+uint64_t perf_test_nested_pages(int nr_vcpus);
+void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus);
#endif /* SELFTEST_KVM_PERF_TEST_UTIL_H */
struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vm *vm, uint32_t vcpuid);
void vm_xsave_req_perm(int bit);
-enum x86_page_size {
- X86_PAGE_SIZE_4K = 0,
- X86_PAGE_SIZE_2M,
- X86_PAGE_SIZE_1G,
+enum pg_level {
+ PG_LEVEL_NONE,
+ PG_LEVEL_4K,
+ PG_LEVEL_2M,
+ PG_LEVEL_1G,
+ PG_LEVEL_512G,
+ PG_LEVEL_NUM
};
-void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- enum x86_page_size page_size);
+
+#define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12)
+#define PG_LEVEL_SIZE(_level) (1ull << PG_LEVEL_SHIFT(_level))
+
+#define PG_SIZE_4K PG_LEVEL_SIZE(PG_LEVEL_4K)
+#define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M)
+#define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G)
+
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level);
/*
* Basic CPU control in CR0
#define X86_CR0_CD (1UL<<30) /* Cache Disable */
#define X86_CR0_PG (1UL<<31) /* Paging */
-/* VMX_EPT_VPID_CAP bits */
-#define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21)
-
#define XSTATE_XTILE_CFG_BIT 17
#define XSTATE_XTILE_DATA_BIT 18
#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
#define VMX_MISC_SAVE_EFER_LMA 0x00000020
+#define VMX_EPT_VPID_CAP_1G_PAGES 0x00020000
+#define VMX_EPT_VPID_CAP_AD_BITS 0x00200000
+
#define EXIT_REASON_FAILED_VMENTRY 0x80000000
#define EXIT_REASON_EXCEPTION_NMI 0
#define EXIT_REASON_EXTERNAL_INTERRUPT 1
bool nested_vmx_supported(void);
void nested_vmx_check_supported(void);
+bool ept_1g_pages_supported(void);
void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
uint64_t nested_paddr, uint64_t paddr);
uint64_t nested_paddr, uint64_t paddr, uint64_t size);
void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t memslot);
+void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t addr, uint64_t size);
void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t eptp_memslot);
void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);
void ucall(uint64_t cmd, int nargs, ...)
{
- struct ucall uc = {
- .cmd = cmd,
- };
+ struct ucall uc = {};
va_list va;
int i;
+ WRITE_ONCE(uc.cmd, cmd);
nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
va_start(va, nargs);
for (i = 0; i < nargs; ++i)
- uc.args[i] = va_arg(va, uint64_t);
+ WRITE_ONCE(uc.args[i], va_arg(va, uint64_t));
va_end(va);
- *ucall_exit_mmio_addr = (vm_vaddr_t)&uc;
+ WRITE_ONCE(*ucall_exit_mmio_addr, (vm_vaddr_t)&uc);
}
uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
* Continuously write to the first 8 bytes of each page in the
* specified region.
*/
-static void guest_code(uint32_t vcpu_id)
+void perf_test_guest_code(uint32_t vcpu_id)
{
struct perf_test_args *pta = &perf_test_args;
struct perf_test_vcpu_args *vcpu_args = &pta->vcpu_args[vcpu_id];
{
struct perf_test_args *pta = &perf_test_args;
struct kvm_vm *vm;
- uint64_t guest_num_pages;
+ uint64_t guest_num_pages, slot0_pages = DEFAULT_GUEST_PHY_PAGES;
uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src);
+ uint64_t region_end_gfn;
int i;
pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
"Guest memory cannot be evenly divided into %d slots.",
slots);
+ /*
+ * If using nested, allocate extra pages for the nested page tables and
+ * in-memory data structures.
+ */
+ if (pta->nested)
+ slot0_pages += perf_test_nested_pages(vcpus);
+
/*
* Pass guest_num_pages to populate the page tables for test memory.
* The memory is also added to memslot 0, but that's a benign side
* effect as KVM allows aliasing HVAs in meslots.
*/
- vm = vm_create_with_vcpus(mode, vcpus, DEFAULT_GUEST_PHY_PAGES,
- guest_num_pages, 0, guest_code, NULL);
+ vm = vm_create_with_vcpus(mode, vcpus, slot0_pages, guest_num_pages, 0,
+ perf_test_guest_code, NULL);
pta->vm = vm;
+ /* Put the test region at the top guest physical memory. */
+ region_end_gfn = vm_get_max_gfn(vm) + 1;
+
+#ifdef __x86_64__
+ /*
+ * When running vCPUs in L2, restrict the test region to 48 bits to
+ * avoid needing 5-level page tables to identity map L2.
+ */
+ if (pta->nested)
+ region_end_gfn = min(region_end_gfn, (1UL << 48) / pta->guest_page_size);
+#endif
/*
* If there should be more memory in the guest test region than there
* can be pages in the guest, it will definitely cause problems.
*/
- TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
+ TEST_ASSERT(guest_num_pages < region_end_gfn,
"Requested more guest memory than address space allows.\n"
" guest pages: %" PRIx64 " max gfn: %" PRIx64
" vcpus: %d wss: %" PRIx64 "]\n",
- guest_num_pages, vm_get_max_gfn(vm), vcpus,
+ guest_num_pages, region_end_gfn - 1, vcpus,
vcpu_memory_bytes);
- pta->gpa = (vm_get_max_gfn(vm) - guest_num_pages) * pta->guest_page_size;
+ pta->gpa = (region_end_gfn - guest_num_pages) * pta->guest_page_size;
pta->gpa = align_down(pta->gpa, backing_src_pagesz);
#ifdef __s390x__
/* Align to 1M (segment size) */
pta->gpa = align_down(pta->gpa, 1 << 20);
#endif
- pr_info("guest physical test memory offset: 0x%lx\n", pta->gpa);
+ pta->size = guest_num_pages * pta->guest_page_size;
+ pr_info("guest physical test memory: [0x%lx, 0x%lx)\n",
+ pta->gpa, pta->gpa + pta->size);
/* Add extra memory slots for testing */
for (i = 0; i < slots; i++) {
perf_test_setup_vcpus(vm, vcpus, vcpu_memory_bytes, partition_vcpu_memory_access);
+ if (pta->nested) {
+ pr_info("Configuring vCPUs to run in L2 (nested).\n");
+ perf_test_setup_nested(vm, vcpus);
+ }
+
ucall_init(vm, NULL);
/* Export the shared variables to the guest. */
sync_global_to_guest(vm, perf_test_args);
}
+uint64_t __weak perf_test_nested_pages(int nr_vcpus)
+{
+ return 0;
+}
+
+void __weak perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus)
+{
+ pr_info("%s() not support on this architecture, skipping.\n", __func__);
+ exit(KSFT_SKIP);
+}
+
static void *vcpu_thread_main(void *data)
{
struct vcpu_thread *vcpu = data;
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * x86_64-specific extensions to perf_test_util.c.
+ *
+ * Copyright (C) 2022, Google, Inc.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "perf_test_util.h"
+#include "../kvm_util_internal.h"
+#include "processor.h"
+#include "vmx.h"
+
+void perf_test_l2_guest_code(uint64_t vcpu_id)
+{
+ perf_test_guest_code(vcpu_id);
+ vmcall();
+}
+
+extern char perf_test_l2_guest_entry[];
+__asm__(
+"perf_test_l2_guest_entry:"
+" mov (%rsp), %rdi;"
+" call perf_test_l2_guest_code;"
+" ud2;"
+);
+
+static void perf_test_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ unsigned long *rsp;
+
+ GUEST_ASSERT(vmx->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+ GUEST_ASSERT(load_vmcs(vmx));
+ GUEST_ASSERT(ept_1g_pages_supported());
+
+ rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1];
+ *rsp = vcpu_id;
+ prepare_vmcs(vmx, perf_test_l2_guest_entry, rsp);
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ GUEST_DONE();
+}
+
+uint64_t perf_test_nested_pages(int nr_vcpus)
+{
+ /*
+ * 513 page tables is enough to identity-map 256 TiB of L2 with 1G
+ * pages and 4-level paging, plus a few pages per-vCPU for data
+ * structures such as the VMCS.
+ */
+ return 513 + 10 * nr_vcpus;
+}
+
+void perf_test_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm)
+{
+ uint64_t start, end;
+
+ prepare_eptp(vmx, vm, 0);
+
+ /*
+ * Identity map the first 4G and the test region with 1G pages so that
+ * KVM can shadow the EPT12 with the maximum huge page size supported
+ * by the backing source.
+ */
+ nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL);
+
+ start = align_down(perf_test_args.gpa, PG_SIZE_1G);
+ end = align_up(perf_test_args.gpa + perf_test_args.size, PG_SIZE_1G);
+ nested_identity_map_1g(vmx, vm, start, end - start);
+}
+
+void perf_test_setup_nested(struct kvm_vm *vm, int nr_vcpus)
+{
+ struct vmx_pages *vmx, *vmx0 = NULL;
+ struct kvm_regs regs;
+ vm_vaddr_t vmx_gva;
+ int vcpu_id;
+
+ nested_vmx_check_supported();
+
+ for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+ vmx = vcpu_alloc_vmx(vm, &vmx_gva);
+
+ if (vcpu_id == 0) {
+ perf_test_setup_ept(vmx, vm);
+ vmx0 = vmx;
+ } else {
+ /* Share the same EPT table across all vCPUs. */
+ vmx->eptp = vmx0->eptp;
+ vmx->eptp_hva = vmx0->eptp_hva;
+ vmx->eptp_gpa = vmx0->eptp_gpa;
+ }
+
+ /*
+ * Override the vCPU to run perf_test_l1_guest_code() which will
+ * bounce it into L2 before calling perf_test_guest_code().
+ */
+ vcpu_regs_get(vm, vcpu_id, ®s);
+ regs.rip = (unsigned long) perf_test_l1_guest_code;
+ vcpu_regs_set(vm, vcpu_id, ®s);
+ vcpu_args_set(vm, vcpu_id, 2, vmx_gva, vcpu_id);
+ }
+}
int level)
{
uint64_t *page_table = addr_gpa2hva(vm, pt_pfn << vm->page_shift);
- int index = vaddr >> (vm->page_shift + level * 9) & 0x1ffu;
+ int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
return &page_table[index];
}
uint64_t pt_pfn,
uint64_t vaddr,
uint64_t paddr,
- int level,
- enum x86_page_size page_size)
+ int current_level,
+ int target_level)
{
- uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, level);
+ uint64_t *pte = virt_get_pte(vm, pt_pfn, vaddr, current_level);
if (!(*pte & PTE_PRESENT_MASK)) {
*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
- if (level == page_size)
+ if (current_level == target_level)
*pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
else
*pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
* a hugepage at this level, and that there isn't a hugepage at
* this level.
*/
- TEST_ASSERT(level != page_size,
+ TEST_ASSERT(current_level != target_level,
"Cannot create hugepage at level: %u, vaddr: 0x%lx\n",
- page_size, vaddr);
+ current_level, vaddr);
TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
"Cannot create page table at level: %u, vaddr: 0x%lx\n",
- level, vaddr);
+ current_level, vaddr);
}
return pte;
}
-void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- enum x86_page_size page_size)
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
{
- const uint64_t pg_size = 1ull << ((page_size * 9) + 12);
+ const uint64_t pg_size = PG_LEVEL_SIZE(level);
uint64_t *pml4e, *pdpe, *pde;
uint64_t *pte;
* early if a hugepage was created.
*/
pml4e = virt_create_upper_pte(vm, vm->pgd >> vm->page_shift,
- vaddr, paddr, 3, page_size);
+ vaddr, paddr, PG_LEVEL_512G, level);
if (*pml4e & PTE_LARGE_MASK)
return;
- pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, 2, page_size);
+ pdpe = virt_create_upper_pte(vm, PTE_GET_PFN(*pml4e), vaddr, paddr, PG_LEVEL_1G, level);
if (*pdpe & PTE_LARGE_MASK)
return;
- pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, 1, page_size);
+ pde = virt_create_upper_pte(vm, PTE_GET_PFN(*pdpe), vaddr, paddr, PG_LEVEL_2M, level);
if (*pde & PTE_LARGE_MASK)
return;
/* Fill in page table entry. */
- pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, 0);
+ pte = virt_get_pte(vm, PTE_GET_PFN(*pde), vaddr, PG_LEVEL_4K);
TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
"PTE already present for 4k page at vaddr: 0x%lx\n", vaddr);
*pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
{
- __virt_pg_map(vm, vaddr, paddr, X86_PAGE_SIZE_4K);
+ __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K);
}
static uint64_t *_vm_get_page_table_entry(struct kvm_vm *vm, int vcpuid,
return true;
}
+static bool ept_vpid_cap_supported(uint64_t mask)
+{
+ return rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & mask;
+}
+
+bool ept_1g_pages_supported(void)
+{
+ return ept_vpid_cap_supported(VMX_EPT_VPID_CAP_1G_PAGES);
+}
+
/*
* Initialize the control fields to the most basic settings possible.
*/
struct eptPageTablePointer eptp = {
.memory_type = VMX_BASIC_MEM_TYPE_WB,
.page_walk_length = 3, /* + 1 */
- .ad_enabled = !!(rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & VMX_EPT_VPID_CAP_AD_BITS),
+ .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS),
.address = vmx->eptp_gpa >> PAGE_SHIFT_4K,
};
}
}
-void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr)
+static void nested_create_pte(struct kvm_vm *vm,
+ struct eptPageTableEntry *pte,
+ uint64_t nested_paddr,
+ uint64_t paddr,
+ int current_level,
+ int target_level)
+{
+ if (!pte->readable) {
+ pte->writable = true;
+ pte->readable = true;
+ pte->executable = true;
+ pte->page_size = (current_level == target_level);
+ if (pte->page_size)
+ pte->address = paddr >> vm->page_shift;
+ else
+ pte->address = vm_alloc_page_table(vm) >> vm->page_shift;
+ } else {
+ /*
+ * Entry already present. Assert that the caller doesn't want
+ * a hugepage at this level, and that there isn't a hugepage at
+ * this level.
+ */
+ TEST_ASSERT(current_level != target_level,
+ "Cannot create hugepage at level: %u, nested_paddr: 0x%lx\n",
+ current_level, nested_paddr);
+ TEST_ASSERT(!pte->page_size,
+ "Cannot create page table at level: %u, nested_paddr: 0x%lx\n",
+ current_level, nested_paddr);
+ }
+}
+
+
+void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr, int target_level)
{
- uint16_t index[4];
- struct eptPageTableEntry *pml4e;
+ const uint64_t page_size = PG_LEVEL_SIZE(target_level);
+ struct eptPageTableEntry *pt = vmx->eptp_hva, *pte;
+ uint16_t index;
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
- TEST_ASSERT((nested_paddr % vm->page_size) == 0,
+ TEST_ASSERT((nested_paddr >> 48) == 0,
+ "Nested physical address 0x%lx requires 5-level paging",
+ nested_paddr);
+ TEST_ASSERT((nested_paddr % page_size) == 0,
"Nested physical address not on page boundary,\n"
- " nested_paddr: 0x%lx vm->page_size: 0x%x",
- nested_paddr, vm->page_size);
+ " nested_paddr: 0x%lx page_size: 0x%lx",
+ nested_paddr, page_size);
TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn,
"Physical address beyond beyond maximum supported,\n"
" nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
paddr, vm->max_gfn, vm->page_size);
- TEST_ASSERT((paddr % vm->page_size) == 0,
+ TEST_ASSERT((paddr % page_size) == 0,
"Physical address not on page boundary,\n"
- " paddr: 0x%lx vm->page_size: 0x%x",
- paddr, vm->page_size);
+ " paddr: 0x%lx page_size: 0x%lx",
+ paddr, page_size);
TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
"Physical address beyond beyond maximum supported,\n"
" paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
paddr, vm->max_gfn, vm->page_size);
- index[0] = (nested_paddr >> 12) & 0x1ffu;
- index[1] = (nested_paddr >> 21) & 0x1ffu;
- index[2] = (nested_paddr >> 30) & 0x1ffu;
- index[3] = (nested_paddr >> 39) & 0x1ffu;
-
- /* Allocate page directory pointer table if not present. */
- pml4e = vmx->eptp_hva;
- if (!pml4e[index[3]].readable) {
- pml4e[index[3]].address = vm_alloc_page_table(vm) >> vm->page_shift;
- pml4e[index[3]].writable = true;
- pml4e[index[3]].readable = true;
- pml4e[index[3]].executable = true;
- }
+ for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) {
+ index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
+ pte = &pt[index];
- /* Allocate page directory table if not present. */
- struct eptPageTableEntry *pdpe;
- pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
- if (!pdpe[index[2]].readable) {
- pdpe[index[2]].address = vm_alloc_page_table(vm) >> vm->page_shift;
- pdpe[index[2]].writable = true;
- pdpe[index[2]].readable = true;
- pdpe[index[2]].executable = true;
- }
+ nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level);
- /* Allocate page table if not present. */
- struct eptPageTableEntry *pde;
- pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
- if (!pde[index[1]].readable) {
- pde[index[1]].address = vm_alloc_page_table(vm) >> vm->page_shift;
- pde[index[1]].writable = true;
- pde[index[1]].readable = true;
- pde[index[1]].executable = true;
- }
+ if (pte->page_size)
+ break;
- /* Fill in page table entry. */
- struct eptPageTableEntry *pte;
- pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
- pte[index[0]].address = paddr >> vm->page_shift;
- pte[index[0]].writable = true;
- pte[index[0]].readable = true;
- pte[index[0]].executable = true;
+ pt = addr_gpa2hva(vm, pte->address * vm->page_size);
+ }
/*
* For now mark these as accessed and dirty because the only
* testcase we have needs that. Can be reconsidered later.
*/
- pte[index[0]].accessed = true;
- pte[index[0]].dirty = true;
+ pte->accessed = true;
+ pte->dirty = true;
+
+}
+
+void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr)
+{
+ __nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K);
}
/*
* nested_paddr - Nested guest physical address to map
* paddr - VM Physical Address
* size - The size of the range to map
- * eptp_memslot - Memory region slot for new virtual translation tables
+ * level - The level at which to map the range
*
* Output Args: None
*
* Within the VM given by vm, creates a nested guest translation for the
* page range starting at nested_paddr to the page range starting at paddr.
*/
-void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint64_t size)
+void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr, uint64_t size,
+ int level)
{
- size_t page_size = vm->page_size;
+ size_t page_size = PG_LEVEL_SIZE(level);
size_t npages = size / page_size;
TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow");
TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
while (npages--) {
- nested_pg_map(vmx, vm, nested_paddr, paddr);
+ __nested_pg_map(vmx, vm, nested_paddr, paddr, level);
nested_paddr += page_size;
paddr += page_size;
}
}
+void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr, uint64_t size)
+{
+ __nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K);
+}
+
/* Prepare an identity extended page table that maps all the
* physical pages in VM.
*/
}
}
+/* Identity map a region with 1GiB Pages. */
+void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t addr, uint64_t size)
+{
+ __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G);
+}
+
void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t eptp_memslot)
{
#ifdef __x86_64__
/* Identity map memory in the guest using 1gb pages. */
for (i = 0; i < slot_size; i += size_1gb)
- __virt_pg_map(vm, gpa + i, gpa + i, X86_PAGE_SIZE_1G);
+ __virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G);
#else
for (i = 0; i < slot_size; i += vm_get_page_size(vm))
virt_pg_map(vm, gpa + i, gpa + i);
return cpu;
}
-static void *migration_worker(void *ign)
+static void *migration_worker(void *__rseq_tid)
{
+ pid_t rseq_tid = (pid_t)(unsigned long)__rseq_tid;
cpu_set_t allowed_mask;
int r, i, cpu;
* stable, i.e. while changing affinity is in-progress.
*/
smp_wmb();
- r = sched_setaffinity(0, sizeof(allowed_mask), &allowed_mask);
+ r = sched_setaffinity(rseq_tid, sizeof(allowed_mask), &allowed_mask);
TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)",
errno, strerror(errno));
smp_wmb();
vm = vm_create_default(VCPU_ID, 0, guest_code);
ucall_init(vm, NULL);
- pthread_create(&migration_thread, NULL, migration_worker, 0);
+ pthread_create(&migration_thread, NULL, migration_worker,
+ (void *)(unsigned long)gettid());
for (i = 0; !done; i++) {
vcpu_run(vm, VCPU_ID);
{
int i;
- for (i = 0; i < 1000000; i++)
+ for (i = 0; i < 100000000; i++)
asm volatile("nop");
}
tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY);
GUEST_ASSERT(tsc_freq > 0);
- /* First, check MSR-based clocksource */
+ /* For increased accuracy, take mean rdtsc() before and afrer rdmsr() */
r1 = rdtsc();
t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+ r1 = (r1 + rdtsc()) / 2;
nop_loop();
r2 = rdtsc();
t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+ r2 = (r2 + rdtsc()) / 2;
GUEST_ASSERT(r2 > r1 && t2 > t1);
tsc_freq = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TSC_FREQUENCY);
TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero");
- /* First, check MSR-based clocksource */
+ /* For increased accuracy, take mean rdtsc() before and afrer ioctl */
r1 = rdtsc();
t1 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);
+ r1 = (r1 + rdtsc()) / 2;
nop_loop();
r2 = rdtsc();
t2 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);
+ r2 = (r2 + rdtsc()) / 2;
TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2);
run = vcpu_state(vm, VCPU_ID);
/* Map 1gb page without a backing memlot. */
- __virt_pg_map(vm, MMIO_GPA, MMIO_GPA, X86_PAGE_SIZE_1G);
+ __virt_pg_map(vm, MMIO_GPA, MMIO_GPA, PG_LEVEL_1G);
r = _vcpu_run(vm, VCPU_ID);
LLVM_SUFFIX := $(LLVM)
endif
-CC := $(LLVM_PREFIX)clang$(LLVM_SUFFIX)
+CLANG_TARGET_FLAGS_arm := arm-linux-gnueabi
+CLANG_TARGET_FLAGS_arm64 := aarch64-linux-gnu
+CLANG_TARGET_FLAGS_hexagon := hexagon-linux-musl
+CLANG_TARGET_FLAGS_m68k := m68k-linux-gnu
+CLANG_TARGET_FLAGS_mips := mipsel-linux-gnu
+CLANG_TARGET_FLAGS_powerpc := powerpc64le-linux-gnu
+CLANG_TARGET_FLAGS_riscv := riscv64-linux-gnu
+CLANG_TARGET_FLAGS_s390 := s390x-linux-gnu
+CLANG_TARGET_FLAGS_x86 := x86_64-linux-gnu
+CLANG_TARGET_FLAGS := $(CLANG_TARGET_FLAGS_$(ARCH))
+
+ifeq ($(CROSS_COMPILE),)
+ifeq ($(CLANG_TARGET_FLAGS),)
+$(error Specify CROSS_COMPILE or add '--target=' option to lib.mk
+else
+CLANG_FLAGS += --target=$(CLANG_TARGET_FLAGS)
+endif # CLANG_TARGET_FLAGS
+else
+CLANG_FLAGS += --target=$(notdir $(CROSS_COMPILE:%-=%))
+endif # CROSS_COMPILE
+
+CC := $(LLVM_PREFIX)clang$(LLVM_SUFFIX) $(CLANG_FLAGS) -fintegrated-as
else
CC := $(CROSS_COMPILE)gcc
-endif
+endif # LLVM
ifeq (0,$(MAKELEVEL))
ifeq ($(OUTPUT),)
gro
ioam6_parser
toeplitz
+tun
cmsg_sender
-bind_bhash_test
TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh
TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh
-TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh
+TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh fib_nexthop_nongw.sh
TEST_PROGS += altnames.sh icmp.sh icmp_redirect.sh ip6_gre_headroom.sh
TEST_PROGS += route_localnet.sh
TEST_PROGS += reuseaddr_ports_exhausted.sh
TEST_GEN_FILES += ioam6_parser
TEST_GEN_FILES += gro
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
-TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
+TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun
TEST_GEN_FILES += toeplitz
TEST_GEN_FILES += cmsg_sender
TEST_GEN_FILES += stress_reuseport_listen
TEST_PROGS += test_vxlan_vnifiltering.sh
-TEST_GEN_FILES += bind_bhash_test
TEST_FILES := settings
$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread
-$(OUTPUT)/bind_bhash_test: LDLIBS += -lpthread
$(OUTPUT)/tcp_inq: LDLIBS += -lpthread
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * This times how long it takes to bind to a port when the port already
- * has multiple sockets in its bhash table.
- *
- * In the setup(), we populate the port's bhash table with
- * MAX_THREADS * MAX_CONNECTIONS number of entries.
- */
-
-#include <unistd.h>
-#include <stdio.h>
-#include <netdb.h>
-#include <pthread.h>
-
-#define MAX_THREADS 600
-#define MAX_CONNECTIONS 40
-
-static const char *bind_addr = "::1";
-static const char *port;
-
-static int fd_array[MAX_THREADS][MAX_CONNECTIONS];
-
-static int bind_socket(int opt, const char *addr)
-{
- struct addrinfo *res, hint = {};
- int sock_fd, reuse = 1, err;
-
- sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
- if (sock_fd < 0) {
- perror("socket fd err");
- return -1;
- }
-
- hint.ai_family = AF_INET6;
- hint.ai_socktype = SOCK_STREAM;
-
- err = getaddrinfo(addr, port, &hint, &res);
- if (err) {
- perror("getaddrinfo failed");
- return -1;
- }
-
- if (opt) {
- err = setsockopt(sock_fd, SOL_SOCKET, opt, &reuse, sizeof(reuse));
- if (err) {
- perror("setsockopt failed");
- return -1;
- }
- }
-
- err = bind(sock_fd, res->ai_addr, res->ai_addrlen);
- if (err) {
- perror("failed to bind to port");
- return -1;
- }
-
- return sock_fd;
-}
-
-static void *setup(void *arg)
-{
- int sock_fd, i;
- int *array = (int *)arg;
-
- for (i = 0; i < MAX_CONNECTIONS; i++) {
- sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, bind_addr);
- if (sock_fd < 0)
- return NULL;
- array[i] = sock_fd;
- }
-
- return NULL;
-}
-
-int main(int argc, const char *argv[])
-{
- int listener_fd, sock_fd, i, j;
- pthread_t tid[MAX_THREADS];
- clock_t begin, end;
-
- if (argc != 2) {
- printf("Usage: listener <port>\n");
- return -1;
- }
-
- port = argv[1];
-
- listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, bind_addr);
- if (listen(listener_fd, 100) < 0) {
- perror("listen failed");
- return -1;
- }
-
- /* Set up threads to populate the bhash table entry for the port */
- for (i = 0; i < MAX_THREADS; i++)
- pthread_create(&tid[i], NULL, setup, fd_array[i]);
-
- for (i = 0; i < MAX_THREADS; i++)
- pthread_join(tid[i], NULL);
-
- begin = clock();
-
- /* Bind to the same port on a different address */
- sock_fd = bind_socket(0, "2001:0db8:0:f101::1");
-
- end = clock();
-
- printf("time spent = %f\n", (double)(end - begin) / CLOCKS_PER_SEC);
-
- /* clean up */
- close(sock_fd);
- close(listener_fd);
- for (i = 0; i < MAX_THREADS; i++) {
- for (j = 0; i < MAX_THREADS; i++)
- close(fd_array[i][j]);
- }
-
- return 0;
-}
CLANG ?= clang
CCINCLUDE += -I../../bpf
+CCINCLUDE += -I../../../../lib
CCINCLUDE += -I../../../../../usr/include/
TEST_CUSTOM_PROGS = $(OUTPUT)/bpf/nat6to4.o
$(OUTPUT)/%.o: %.c
$(CLANG) -O2 -target bpf -c $< $(CCINCLUDE) -o $@
-clean:
- rm -f $(TEST_CUSTOM_PROGS)
+EXTRA_CLEAN := $(TEST_CUSTOM_PROGS)
NL_IP=172.17.1.1
NL_IP6=2001:db8:4::1
+# multicast and broadcast addresses
+MCAST_IP=224.0.0.1
+BCAST_IP=255.255.255.255
+
MD5_PW=abc123
MD5_WRONG_PW=abc1234
127.0.0.1) echo "loopback";;
::1) echo "IPv6 loopback";;
+ ${BCAST_IP}) echo "broadcast";;
+ ${MCAST_IP}) echo "multicast";;
+
${NSA_IP}) echo "ns-A IP";;
${NSA_IP6}) echo "ns-A IPv6";;
${NSA_LO_IP}) echo "ns-A loopback IP";;
done
#
- # raw socket with nonlocal bind
+ # tests for nonlocal bind
#
a=${NL_IP}
log_start
- run_cmd nettest -s -R -P icmp -f -l ${a} -I ${NSA_DEV} -b
- log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after device bind"
+ run_cmd nettest -s -R -f -l ${a} -b
+ log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address"
+
+ log_start
+ run_cmd nettest -s -f -l ${a} -b
+ log_test_addr ${a} $? 0 "TCP socket bind to nonlocal address"
+
+ log_start
+ run_cmd nettest -s -D -P icmp -f -l ${a} -b
+ log_test_addr ${a} $? 0 "ICMP socket bind to nonlocal address"
+
+ #
+ # check that ICMP sockets cannot bind to broadcast and multicast addresses
+ #
+ a=${BCAST_IP}
+ log_start
+ run_cmd nettest -s -D -P icmp -l ${a} -b
+ log_test_addr ${a} $? 1 "ICMP socket bind to broadcast address"
+
+ a=${MCAST_IP}
+ log_start
+ run_cmd nettest -s -D -P icmp -l ${a} -b
+ log_test_addr ${a} $? 1 "ICMP socket bind to multicast address"
#
# tcp sockets
log_test_addr ${a} $? 1 "Raw socket bind to out of scope address after VRF bind"
#
- # raw socket with nonlocal bind
+ # tests for nonlocal bind
#
a=${NL_IP}
log_start
- run_cmd nettest -s -R -P icmp -f -l ${a} -I ${VRF} -b
+ run_cmd nettest -s -R -f -l ${a} -I ${VRF} -b
log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after VRF bind"
+ log_start
+ run_cmd nettest -s -f -l ${a} -I ${VRF} -b
+ log_test_addr ${a} $? 0 "TCP socket bind to nonlocal address after VRF bind"
+
+ log_start
+ run_cmd nettest -s -D -P icmp -f -l ${a} -I ${VRF} -b
+ log_test_addr ${a} $? 0 "ICMP socket bind to nonlocal address after VRF bind"
+
+ #
+ # check that ICMP sockets cannot bind to broadcast and multicast addresses
+ #
+ a=${BCAST_IP}
+ log_start
+ run_cmd nettest -s -D -P icmp -l ${a} -I ${VRF} -b
+ log_test_addr ${a} $? 1 "ICMP socket bind to broadcast address after VRF bind"
+
+ a=${MCAST_IP}
+ log_start
+ run_cmd nettest -s -D -P icmp -l ${a} -I ${VRF} -b
+ log_test_addr ${a} $? 1 "ICMP socket bind to multicast address after VRF bind"
+
#
# tcp sockets
#
log_subsection "No VRF"
setup
+ set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
ipv4_addr_bind_novrf
log_subsection "With VRF"
setup "yes"
+ set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null
ipv4_addr_bind_vrf
}
--- /dev/null
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# ns: h1 | ns: h2
+# 192.168.0.1/24 |
+# eth0 |
+# | 192.168.1.1/32
+# veth0 <---|---> veth1
+# Validate source address selection for route without gateway
+
+PAUSE_ON_FAIL=no
+VERBOSE=0
+ret=0
+
+################################################################################
+# helpers
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+}
+
+run_cmd()
+{
+ local cmd="$*"
+ local out
+ local rc
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ fi
+
+ out=$(eval $cmd 2>&1)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo "$out"
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+
+ return $rc
+}
+
+################################################################################
+# config
+setup()
+{
+ ip netns add h1
+ ip -n h1 link set lo up
+ ip netns add h2
+ ip -n h2 link set lo up
+
+ # Add a fake eth0 to support an ip address
+ ip -n h1 link add name eth0 type dummy
+ ip -n h1 link set eth0 up
+ ip -n h1 address add 192.168.0.1/24 dev eth0
+
+ # Configure veths (same @mac, arp off)
+ ip -n h1 link add name veth0 type veth peer name veth1 netns h2
+ ip -n h1 link set veth0 up
+
+ ip -n h2 link set veth1 up
+
+ # Configure @IP in the peer netns
+ ip -n h2 address add 192.168.1.1/32 dev veth1
+ ip -n h2 route add default dev veth1
+
+ # Add a nexthop without @gw and use it in a route
+ ip -n h1 nexthop add id 1 dev veth0
+ ip -n h1 route add 192.168.1.1 nhid 1
+}
+
+cleanup()
+{
+ ip netns del h1 2>/dev/null
+ ip netns del h2 2>/dev/null
+}
+
+trap cleanup EXIT
+
+################################################################################
+# main
+
+while getopts :pv o
+do
+ case $o in
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=1;;
+ esac
+done
+
+cleanup
+setup
+
+run_cmd ip -netns h1 route get 192.168.1.1
+log_test $? 0 "nexthop: get route with nexthop without gw"
+run_cmd ip netns exec h1 ping -c1 192.168.1.1
+log_test $? 0 "nexthop: ping through nexthop without gw"
+
+exit $ret
ipip_hier_gre_key.sh \
ipip_hier_gre_keys.sh \
ipip_hier_gre.sh \
+ local_termination.sh \
loopback.sh \
mirror_gre_bound.sh \
mirror_gre_bridge_1d.sh \
mirror_gre_vlan_bridge_1q.sh \
mirror_gre_vlan.sh \
mirror_vlan.sh \
+ no_forwarding.sh \
pedit_dsfield.sh \
pedit_ip.sh \
pedit_l4port.sh \
# FDB entry was installed.
bridge link set dev $br_port1 flood off
+ ip link set $host1_if promisc on
tc qdisc add dev $host1_if ingress
tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \
flower dst_mac $mac action drop
tc -j -s filter show dev $host1_if ingress \
| jq -e ".[] | select(.options.handle == 101) \
| select(.options.actions[0].stats.packets == 1)" &> /dev/null
- check_fail $? "Packet reached second host when should not"
+ check_fail $? "Packet reached first host when should not"
$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
sleep 1
tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower
tc qdisc del dev $host1_if ingress
+ ip link set $host1_if promisc off
bridge link set dev $br_port1 flood on
# Add an ACL on `host2_if` which will tell us whether the packet
# was flooded to it or not.
+ ip link set $host2_if promisc on
tc qdisc add dev $host2_if ingress
tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
flower dst_mac $mac action drop
tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
tc qdisc del dev $host2_if ingress
+ ip link set $host2_if promisc off
return $err
}
CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
- simult_flows.sh mptcp_sockopt.sh
+ simult_flows.sh mptcp_sockopt.sh userspace_pm.sh
TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq
__chk_nr "grep -c token:" $*
}
+wait_msk_nr()
+{
+ local condition="grep -c token:"
+ local expected=$1
+ local timeout=20
+ local msg nr
+ local max=0
+ local i=0
+
+ shift 1
+ msg=$*
+
+ while [ $i -lt $timeout ]; do
+ nr=$(ss -inmHMN $ns | $condition)
+ [ $nr == $expected ] && break;
+ [ $nr -gt $max ] && max=$nr
+ i=$((i + 1))
+ sleep 1
+ done
+
+ printf "%-50s" "$msg"
+ if [ $i -ge $timeout ]; then
+ echo "[ fail ] timeout while expecting $expected max $max last $nr"
+ ret=$test_cnt
+ elif [ $nr != $expected ]; then
+ echo "[ fail ] expected $expected found $nr"
+ ret=$test_cnt
+ else
+ echo "[ ok ]"
+ fi
+ test_cnt=$((test_cnt+1))
+}
+
chk_msk_fallback_nr()
{
__chk_nr "grep -c fallback" $*
echo "a" | \
timeout ${timeout_test} \
ip netns exec $ns \
- ./mptcp_connect -p 10000 -l -t ${timeout_poll} \
+ ./mptcp_connect -p 10000 -l -t ${timeout_poll} -w 20 \
0.0.0.0 >/dev/null &
wait_local_port_listen $ns 10000
chk_msk_nr 0 "no msk on netns creation"
echo "b" | \
timeout ${timeout_test} \
ip netns exec $ns \
- ./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} \
+ ./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} -w 20 \
127.0.0.1 >/dev/null &
wait_connected $ns 10000
chk_msk_nr 2 "after MPC handshake "
echo "a" | \
timeout ${timeout_test} \
ip netns exec $ns \
- ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} \
+ ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} -w 20 \
0.0.0.0 >/dev/null &
wait_local_port_listen $ns 10001
echo "b" | \
timeout ${timeout_test} \
ip netns exec $ns \
- ./mptcp_connect -p 10001 -r 0 -t ${timeout_poll} \
+ ./mptcp_connect -p 10001 -r 0 -t ${timeout_poll} -w 20 \
127.0.0.1 >/dev/null &
wait_connected $ns 10001
chk_msk_fallback_nr 1 "check fallback"
echo "a" | \
timeout ${timeout_test} \
ip netns exec $ns \
- ./mptcp_connect -p $((I+10001)) -l -w 10 \
+ ./mptcp_connect -p $((I+10001)) -l -w 20 \
-t ${timeout_poll} 0.0.0.0 >/dev/null &
done
wait_local_port_listen $ns $((NR_CLIENTS + 10001))
echo "b" | \
timeout ${timeout_test} \
ip netns exec $ns \
- ./mptcp_connect -p $((I+10001)) -w 10 \
+ ./mptcp_connect -p $((I+10001)) -w 20 \
-t ${timeout_poll} 127.0.0.1 >/dev/null &
done
-sleep 1.5
-chk_msk_nr $((NR_CLIENTS*2)) "many msk socket present"
+wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present"
flush_pids
exit $ret
static int sock_listen_mptcp(const char * const listenaddr,
const char * const port)
{
- int sock;
+ int sock = -1;
struct addrinfo hints = {
.ai_protocol = IPPROTO_TCP,
.ai_socktype = SOCK_STREAM,
static int sock_listen_mptcp(const char * const listenaddr,
const char * const port)
{
- int sock;
+ int sock = -1;
struct addrinfo hints = {
.ai_protocol = IPPROTO_TCP,
.ai_socktype = SOCK_STREAM,
static int sock_listen_mptcp(const char * const listenaddr,
const char * const port)
{
- int sock;
+ int sock = -1;
struct addrinfo hints = {
.ai_protocol = IPPROTO_TCP,
.ai_socktype = SOCK_STREAM,
fprintf(stderr, "\tdsf lip <local-ip> lport <local-port> rip <remote-ip> rport <remote-port> token <token>\n");
fprintf(stderr, "\tdel <id> [<ip>]\n");
fprintf(stderr, "\tget <id>\n");
- fprintf(stderr, "\tset [<ip>] [id <nr>] flags [no]backup|[no]fullmesh [port <nr>]\n");
+ fprintf(stderr, "\tset [<ip>] [id <nr>] flags [no]backup|[no]fullmesh [port <nr>] [token <token>] [rip <ip>] [rport <port>]\n");
fprintf(stderr, "\tflush\n");
fprintf(stderr, "\tdump\n");
fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n");
struct rtattr *rta, *nest;
struct nlmsghdr *nh;
u_int32_t flags = 0;
+ u_int32_t token = 0;
+ u_int16_t rport = 0;
u_int16_t family;
+ void *rip = NULL;
int nest_start;
int use_id = 0;
u_int8_t id;
error(1, 0, " missing flags keyword");
for (; arg < argc; arg++) {
- if (!strcmp(argv[arg], "flags")) {
+ if (!strcmp(argv[arg], "token")) {
+ if (++arg >= argc)
+ error(1, 0, " missing token value");
+
+ /* token */
+ token = atoi(argv[arg]);
+ } else if (!strcmp(argv[arg], "flags")) {
char *tok, *str;
/* flags */
rta->rta_len = RTA_LENGTH(2);
memcpy(RTA_DATA(rta), &port, 2);
off += NLMSG_ALIGN(rta->rta_len);
+ } else if (!strcmp(argv[arg], "rport")) {
+ if (++arg >= argc)
+ error(1, 0, " missing remote port");
+
+ rport = atoi(argv[arg]);
+ } else if (!strcmp(argv[arg], "rip")) {
+ if (++arg >= argc)
+ error(1, 0, " missing remote ip");
+
+ rip = argv[arg];
} else {
error(1, 0, "unknown keyword %s", argv[arg]);
}
}
nest->rta_len = off - nest_start;
+ /* token */
+ if (token) {
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &token, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+ }
+
+ /* remote addr/port */
+ if (rip) {
+ nest_start = off;
+ nest = (void *)(data + off);
+ nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR_REMOTE;
+ nest->rta_len = RTA_LENGTH(0);
+ off += NLMSG_ALIGN(nest->rta_len);
+
+ /* addr data */
+ rta = (void *)(data + off);
+ if (inet_pton(AF_INET, rip, RTA_DATA(rta))) {
+ family = AF_INET;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+ rta->rta_len = RTA_LENGTH(4);
+ } else if (inet_pton(AF_INET6, rip, RTA_DATA(rta))) {
+ family = AF_INET6;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+ rta->rta_len = RTA_LENGTH(16);
+ } else {
+ error(1, errno, "can't parse ip %s", (char *)rip);
+ }
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* family */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &family, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ if (rport) {
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &rport, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
+ }
+
+ nest->rta_len = off - nest_start;
+ }
+
do_nl_req(fd, nh, off, 0);
return 0;
}
rm -f "$evts"
}
+test_prio()
+{
+ local count
+
+ # Send MP_PRIO signal from client to server machine
+ ip netns exec "$ns2" ./pm_nl_ctl set 10.0.1.2 port "$client4_port" flags backup token "$client4_token" rip 10.0.1.1 rport "$server4_port"
+ sleep 0.5
+
+ # Check TX
+ stdbuf -o0 -e0 printf "MP_PRIO TX \t"
+ count=$(ip netns exec "$ns2" nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}')
+ [ -z "$count" ] && count=0
+ if [ $count != 1 ]; then
+ stdbuf -o0 -e0 printf "[FAIL]\n"
+ exit 1
+ else
+ stdbuf -o0 -e0 printf "[OK]\n"
+ fi
+
+ # Check RX
+ stdbuf -o0 -e0 printf "MP_PRIO RX \t"
+ count=$(ip netns exec "$ns1" nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}')
+ [ -z "$count" ] && count=0
+ if [ $count != 1 ]; then
+ stdbuf -o0 -e0 printf "[FAIL]\n"
+ exit 1
+ else
+ stdbuf -o0 -e0 printf "[OK]\n"
+ fi
+}
+
make_connection
make_connection "v6"
test_announce
test_remove
test_subflows
+test_prio
exit 0
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+
+#include "../kselftest_harness.h"
+
+static int tun_attach(int fd, char *dev)
+{
+ struct ifreq ifr;
+
+ memset(&ifr, 0, sizeof(ifr));
+ strcpy(ifr.ifr_name, dev);
+ ifr.ifr_flags = IFF_ATTACH_QUEUE;
+
+ return ioctl(fd, TUNSETQUEUE, (void *) &ifr);
+}
+
+static int tun_detach(int fd, char *dev)
+{
+ struct ifreq ifr;
+
+ memset(&ifr, 0, sizeof(ifr));
+ strcpy(ifr.ifr_name, dev);
+ ifr.ifr_flags = IFF_DETACH_QUEUE;
+
+ return ioctl(fd, TUNSETQUEUE, (void *) &ifr);
+}
+
+static int tun_alloc(char *dev)
+{
+ struct ifreq ifr;
+ int fd, err;
+
+ fd = open("/dev/net/tun", O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "can't open tun: %s\n", strerror(errno));
+ return fd;
+ }
+
+ memset(&ifr, 0, sizeof(ifr));
+ strcpy(ifr.ifr_name, dev);
+ ifr.ifr_flags = IFF_TAP | IFF_NAPI | IFF_MULTI_QUEUE;
+
+ err = ioctl(fd, TUNSETIFF, (void *) &ifr);
+ if (err < 0) {
+ fprintf(stderr, "can't TUNSETIFF: %s\n", strerror(errno));
+ close(fd);
+ return err;
+ }
+ strcpy(dev, ifr.ifr_name);
+ return fd;
+}
+
+static int tun_delete(char *dev)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg ifm;
+ unsigned char data[64];
+ } req;
+ struct rtattr *rta;
+ int ret, rtnl;
+
+ rtnl = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
+ if (rtnl < 0) {
+ fprintf(stderr, "can't open rtnl: %s\n", strerror(errno));
+ return 1;
+ }
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(req.ifm)));
+ req.nh.nlmsg_flags = NLM_F_REQUEST;
+ req.nh.nlmsg_type = RTM_DELLINK;
+
+ req.ifm.ifi_family = AF_UNSPEC;
+
+ rta = (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.nh.nlmsg_len));
+ rta->rta_type = IFLA_IFNAME;
+ rta->rta_len = RTA_LENGTH(IFNAMSIZ);
+ req.nh.nlmsg_len += rta->rta_len;
+ memcpy(RTA_DATA(rta), dev, IFNAMSIZ);
+
+ ret = send(rtnl, &req, req.nh.nlmsg_len, 0);
+ if (ret < 0)
+ fprintf(stderr, "can't send: %s\n", strerror(errno));
+ ret = (unsigned int)ret != req.nh.nlmsg_len;
+
+ close(rtnl);
+ return ret;
+}
+
+FIXTURE(tun)
+{
+ char ifname[IFNAMSIZ];
+ int fd, fd2;
+};
+
+FIXTURE_SETUP(tun)
+{
+ memset(self->ifname, 0, sizeof(self->ifname));
+
+ self->fd = tun_alloc(self->ifname);
+ ASSERT_GE(self->fd, 0);
+
+ self->fd2 = tun_alloc(self->ifname);
+ ASSERT_GE(self->fd2, 0);
+}
+
+FIXTURE_TEARDOWN(tun)
+{
+ if (self->fd >= 0)
+ close(self->fd);
+ if (self->fd2 >= 0)
+ close(self->fd2);
+}
+
+TEST_F(tun, delete_detach_close) {
+ EXPECT_EQ(tun_delete(self->ifname), 0);
+ EXPECT_EQ(tun_detach(self->fd, self->ifname), -1);
+ EXPECT_EQ(errno, 22);
+}
+
+TEST_F(tun, detach_delete_close) {
+ EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
+ EXPECT_EQ(tun_delete(self->ifname), 0);
+}
+
+TEST_F(tun, detach_close_delete) {
+ EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
+ close(self->fd);
+ self->fd = -1;
+ EXPECT_EQ(tun_delete(self->ifname), 0);
+}
+
+TEST_F(tun, reattach_delete_close) {
+ EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
+ EXPECT_EQ(tun_attach(self->fd, self->ifname), 0);
+ EXPECT_EQ(tun_delete(self->ifname), 0);
+}
+
+TEST_F(tun, reattach_close_delete) {
+ EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
+ EXPECT_EQ(tun_attach(self->fd, self->ifname), 0);
+ close(self->fd);
+ self->fd = -1;
+ EXPECT_EQ(tun_delete(self->ifname), 0);
+}
+
+TEST_HARNESS_MAIN
ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24
ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad
ip -netns "${PEER_NS}" link set dev veth1 up
- ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy
+ ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp
}
run_one() {
ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad
ip -netns "${PEER_NS}" link set dev veth1 up
- ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy
+ ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp
ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r &
ip netns exec "${PEER_NS}" ethtool -K veth1 rx-gro-list on
- ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy
+ ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp
tc -n "${PEER_NS}" qdisc add dev veth1 clsact
tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file ../bpf/nat6to4.o section schedcls/ingress6/nat_6 direct-action
tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file ../bpf/nat6to4.o section schedcls/egress4/snat4 direct-action
ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24
ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad
done
- ip -n $NS_DST link set veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null
+ ip -n $NS_DST link set veth$DST xdp object ../bpf/xdp_dummy.o section xdp 2>/dev/null
}
create_vxlan_endpoint() {
run_udp "${ipv4_args}"
echo "ipv6"
- run_tcp "${ipv4_args}"
+ run_tcp "${ipv6_args}"
run_udp "${ipv6_args}"
}
ip netns exec $NS_SRC ethtool -L veth$SRC rx 1 tx 2 2>/dev/null
printf "%-60s" "bad setting: XDP with RX nr less than TX"
ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \
- section xdp_dummy 2>/dev/null &&\
+ section xdp 2>/dev/null &&\
echo "fail - set operation successful ?!?" || echo " ok "
# the following tests will run with multiple channels active
ip netns exec $NS_SRC ethtool -L veth$SRC rx 2
ip netns exec $NS_DST ethtool -L veth$DST rx 2
ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o \
- section xdp_dummy 2>/dev/null
+ section xdp 2>/dev/null
printf "%-60s" "bad setting: reducing RX nr below peer TX with XDP set"
ip netns exec $NS_DST ethtool -L veth$DST rx 1 2>/dev/null &&\
echo "fail - set operation successful ?!?" || echo " ok "
chk_channels "setting invalid channels nr" $DST 2 2
fi
-ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp_dummy 2>/dev/null
+ip -n $NS_DST link set dev veth$DST xdp object ../bpf/xdp_dummy.o section xdp 2>/dev/null
chk_gro_flag "with xdp attached - gro flag" $DST on
chk_gro_flag " - peer gro flag" $SRC off
chk_tso_flag " - tso flag" $SRC off
# List of possible paths to pktgen script from kernel tree for performance tests
PKTGEN_SCRIPT_PATHS="
- ../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
+ ../../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
pktgen/pktgen_bench_xmit_mode_netif_receive.sh"
# Definition of set types:
return $lret
}
+test_local_dnat_portonly()
+{
+ local family=$1
+ local daddr=$2
+ local lret=0
+ local sr_s
+ local sr_r
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family nat {
+ chain output {
+ type nat hook output priority 0; policy accept;
+ meta l4proto tcp dnat to :2000
+
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ if [ $family = "inet" ];then
+ echo "SKIP: inet port test"
+ test_inet_nat=false
+ return
+ fi
+ echo "SKIP: Could not add $family dnat hook"
+ return
+ fi
+
+ echo SERVER-$family | ip netns exec "$ns1" timeout 5 socat -u STDIN TCP-LISTEN:2000 &
+ sc_s=$!
+
+ result=$(ip netns exec "$ns0" timeout 1 socat TCP:$daddr:2000 STDOUT)
+
+ if [ "$result" = "SERVER-inet" ];then
+ echo "PASS: inet port rewrite without l3 address"
+ else
+ echo "ERROR: inet port rewrite"
+ ret=1
+ fi
+}
test_masquerade6()
{
reset_counters
test_local_dnat ip
test_local_dnat6 ip6
+
+reset_counters
+test_local_dnat_portonly inet 10.0.1.99
+
reset_counters
$test_inet_nat && test_local_dnat inet
$test_inet_nat && test_local_dnat6 inet
"teardown": [
"$TC actions flush action gact"
]
+ },
+ {
+ "id": "7f52",
+ "name": "Try to flush action which is referenced by filter",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC actions add action pass index 1",
+ "$TC filter add dev $DEV1 protocol all ingress prio 1 handle 0x1234 matchall action gact index 1"
+ ],
+ "cmdUnderTest": "$TC actions flush action gact",
+ "expExitCode": "1",
+ "verifyCmd": "$TC actions ls action gact",
+ "matchPattern": "total acts 1.*action order [0-9]*: gact action pass.*index 1 ref 2 bind 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress",
+ [
+ "sleep 1; $TC actions flush action gact",
+ 0,
+ 1
+ ]
+ ]
+ },
+ {
+ "id": "ae1e",
+ "name": "Try to flush actions when last one is referenced by filter",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC qdisc add dev $DEV1 ingress",
+ [
+ "$TC actions add action pass index 1",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action reclassify index 2",
+ "$TC actions add action drop index 3",
+ "$TC filter add dev $DEV1 protocol all ingress prio 1 handle 0x1234 matchall action gact index 3"
+ ],
+ "cmdUnderTest": "$TC actions flush action gact",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action gact",
+ "matchPattern": "total acts 1.*action order [0-9]*: gact action drop.*index 3 ref 2 bind 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress",
+ [
+ "sleep 1; $TC actions flush action gact",
+ 0,
+ 1
+ ]
+ ]
}
]
if (write)
gup.gup_flags |= FOLL_WRITE;
- gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
+ gup_fd = open(GUP_TEST_FILE, O_RDWR);
if (gup_fd == -1) {
switch (errno) {
case EACCES:
printf("check if CONFIG_GUP_TEST is enabled in kernel config\n");
break;
default:
- perror("failed to open /sys/kernel/debug/gup_test");
+ perror("failed to open " GUP_TEST_FILE);
break;
}
exit(KSFT_SKIP);
}
if (fprintf(f, "%lu", val) < 0) {
perror("fprintf");
+ fclose(f);
return 1;
}
fclose(f);
}
if (fscanf(f, "%lu", val) != 1) {
perror("fscanf");
+ fclose(f);
return 1;
}
fclose(f);
MIRROR := https://download.wireguard.com/qemu-test/distfiles/
KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug)
-rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
-WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*)
default: qemu
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
else
-QEMU_MACHINE := -cpu cortex-a53 -machine virt
-CFLAGS += -march=armv8-a -mtune=cortex-a53
+QEMU_MACHINE := -cpu max -machine virt
+CFLAGS += -march=armv8-a
endif
else ifeq ($(ARCH),aarch64_be)
CHOST := aarch64_be-linux-musl
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
else
-QEMU_MACHINE := -cpu cortex-a53 -machine virt
-CFLAGS += -march=armv8-a -mtune=cortex-a53
+QEMU_MACHINE := -cpu max -machine virt
+CFLAGS += -march=armv8-a
endif
else ifeq ($(ARCH),arm)
CHOST := arm-linux-musleabi
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
else
-QEMU_MACHINE := -cpu cortex-a15 -machine virt
-CFLAGS += -march=armv7-a -mtune=cortex-a15 -mabi=aapcs-linux
+QEMU_MACHINE := -cpu max -machine virt
+CFLAGS += -march=armv7-a -mabi=aapcs-linux
endif
else ifeq ($(ARCH),armeb)
CHOST := armeb-linux-musleabi
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
else
-QEMU_MACHINE := -cpu cortex-a15 -machine virt
-CFLAGS += -march=armv7-a -mabi=aapcs-linux # We don't pass -mtune=cortex-a15 due to a compiler bug on big endian.
+QEMU_MACHINE := -cpu max -machine virt
+CFLAGS += -march=armv7-a -mabi=aapcs-linux
LDFLAGS += -Wl,--be8
endif
else ifeq ($(ARCH),x86_64)
QEMU_ARCH := x86_64
KERNEL_ARCH := x86_64
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
+QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(HOST_ARCH),$(ARCH))
-QEMU_MACHINE := -cpu host -machine q35,accel=kvm
+QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi
else
-QEMU_MACHINE := -cpu Skylake-Server -machine q35
-CFLAGS += -march=skylake-avx512
+QEMU_MACHINE := -cpu max -machine microvm -no-acpi
endif
else ifeq ($(ARCH),i686)
CHOST := i686-linux-musl
QEMU_ARCH := i386
KERNEL_ARCH := x86
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
+QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH))
-QEMU_MACHINE := -cpu host -machine q35,accel=kvm
+QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi
else
-QEMU_MACHINE := -cpu coreduo -machine q35
-CFLAGS += -march=prescott
+QEMU_MACHINE := -cpu coreduo -machine microvm -no-acpi
endif
else ifeq ($(ARCH),mips64)
CHOST := mips64-linux-musl
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host,accel=kvm -machine pseries
else
-QEMU_MACHINE := -machine pseries
+QEMU_MACHINE := -machine pseries -device spapr-rng,rng=rng -object rng-random,id=rng
endif
else ifeq ($(ARCH),powerpc64le)
CHOST := powerpc64le-linux-musl
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host,accel=kvm -machine pseries
else
-QEMU_MACHINE := -machine pseries
+QEMU_MACHINE := -machine pseries -device spapr-rng,rng=rng -object rng-random,id=rng
endif
else ifeq ($(ARCH),powerpc)
CHOST := powerpc-linux-musl
KERNEL_ARCH := m68k
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/m68k.config)
+QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(HOST_ARCH),$(ARCH))
-QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -append $(KERNEL_CMDLINE)
+QEMU_MACHINE := -cpu host,accel=kvm -machine virt -append $(KERNEL_CMDLINE)
else
-QEMU_MACHINE := -machine q800 -smp 1 -append $(KERNEL_CMDLINE)
+QEMU_MACHINE := -machine virt -smp 1 -append $(KERNEL_CMDLINE)
endif
else ifeq ($(ARCH),riscv64)
CHOST := riscv64-linux-musl
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host,accel=kvm -machine s390-ccw-virtio -append $(KERNEL_CMDLINE)
else
-QEMU_MACHINE := -machine s390-ccw-virtio -append $(KERNEL_CMDLINE)
+QEMU_MACHINE := -cpu max -machine s390-ccw-virtio -append $(KERNEL_CMDLINE)
endif
else
$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64, powerpc64le, powerpc, m68k, riscv64, riscv32, s390x)
cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config $(KERNEL_BUILD_PATH)/minimal.config
$(if $(findstring yes,$(DEBUG_KERNEL)),cp debug.config $(KERNEL_BUILD_PATH) && cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config debug.config,)
-$(KERNEL_BZIMAGE): $(TOOLCHAIN_PATH)/.installed $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES)
+$(KERNEL_BZIMAGE): $(TOOLCHAIN_PATH)/.installed $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init
$(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE)
+.PHONY: $(KERNEL_BZIMAGE)
$(TOOLCHAIN_PATH)/$(CHOST)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config $(TOOLCHAIN_PATH)/.installed
rm -rf $(TOOLCHAIN_PATH)/$(CHOST)/include/linux
CONFIG_VIRTIO_MENU=y
CONFIG_VIRTIO_MMIO=y
CONFIG_VIRTIO_CONSOLE=y
+CONFIG_COMPAT_32BIT_TIME=y
CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
CONFIG_FRAME_WARN=1024
CONFIG_VIRTIO_MENU=y
CONFIG_VIRTIO_MMIO=y
CONFIG_VIRTIO_CONSOLE=y
+CONFIG_COMPAT_32BIT_TIME=y
CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
CONFIG_CPU_BIG_ENDIAN=y
-CONFIG_ACPI=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
+CONFIG_COMPAT_32BIT_TIME=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1 panic_on_warn=1 reboot=t"
CONFIG_FRAME_WARN=1024
CONFIG_MMU=y
+CONFIG_VIRT=y
CONFIG_M68KCLASSIC=y
-CONFIG_M68040=y
-CONFIG_MAC=y
-CONFIG_SERIAL_PMACZILOG=y
-CONFIG_SERIAL_PMACZILOG_TTYS=y
-CONFIG_SERIAL_PMACZILOG_CONSOLE=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_COMPAT_32BIT_TIME=y
+CONFIG_CMDLINE="console=ttyGF0 wg.success=vport0p1 panic_on_warn=1"
CONFIG_FRAME_WARN=1024
CONFIG_POWER_RESET_SYSCON=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_COMPAT_32BIT_TIME=y
CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
CONFIG_FRAME_WARN=1024
CONFIG_POWER_RESET_SYSCON=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_COMPAT_32BIT_TIME=y
CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
CONFIG_FRAME_WARN=1024
CONFIG_PHYS_64BIT=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_COMPAT_32BIT_TIME=y
CONFIG_MATH_EMULATION=y
CONFIG_CMDLINE_BOOL=y
CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
-CONFIG_ACPI=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1 panic_on_warn=1 reboot=t"
CONFIG_FRAME_WARN=1280
#include <stdlib.h>
#include <stdbool.h>
#include <fcntl.h>
+#include <time.h>
#include <sys/wait.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/utsname.h>
#include <sys/sendfile.h>
#include <sys/sysmacros.h>
+#include <sys/random.h>
#include <linux/random.h>
#include <linux/version.h>
{
int bits = 256, fd;
+ if (!getrandom(NULL, 0, GRND_NONBLOCK))
+ return;
pretty_message("[+] Fake seeding RNG...");
fd = open("/dev/random", O_WRONLY);
if (fd < 0)
close(fd);
}
+static void set_time(void)
+{
+ if (time(NULL))
+ return;
+ pretty_message("[+] Setting fake time...");
+ if (stime(&(time_t){1433512680}) < 0)
+ panic("settimeofday()");
+}
+
static void mount_filesystems(void)
{
pretty_message("[+] Mounting filesystems...");
print_banner();
mount_filesystems();
seed_rng();
+ set_time();
kmod_selftests();
enable_logging();
clear_leaks();
CONFIG_BINFMT_ELF=y
CONFIG_BINFMT_SCRIPT=y
CONFIG_VDSO=y
+CONFIG_STRICT_KERNEL_RWX=y
CONFIG_VIRTUALIZATION=y
CONFIG_HYPERVISOR_GUEST=y
CONFIG_PARAVIRT=y
CONFIG_PROC_SYSCTL=y
CONFIG_SYSFS=y
CONFIG_TMPFS=y
+CONFIG_RANDOM_TRUST_CPU=y
+CONFIG_RANDOM_TRUST_BOOTLOADER=y
CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15
CONFIG_LOG_BUF_SHIFT=18
CONFIG_PRINTK_TIME=y
vcpu->stat.generic.blocking = 1;
+ preempt_disable();
kvm_arch_vcpu_blocking(vcpu);
-
prepare_to_rcuwait(wait);
+ preempt_enable();
+
for (;;) {
set_current_state(TASK_INTERRUPTIBLE);
waited = true;
schedule();
}
- finish_rcuwait(wait);
+ preempt_disable();
+ finish_rcuwait(wait);
kvm_arch_vcpu_unblocking(vcpu);
+ preempt_enable();
vcpu->stat.generic.blocking = 0;
kvm_put_kvm_no_destroy(kvm);
mutex_lock(&kvm->lock);
list_del(&dev->vm_node);
+ if (ops->release)
+ ops->release(dev);
mutex_unlock(&kvm->lock);
- ops->destroy(dev);
+ if (ops->destroy)
+ ops->destroy(dev);
return ret;
}