*.xz
*.zst
Module.symvers
-modules.builtin
modules.order
#
# Top-level generic files
#
-/tags
-/TAGS
/linux
/modules-only.symvers
/vmlinux
/vmlinuz
/System.map
/Module.markers
+/modules.builtin
/modules.builtin.modinfo
/modules.nsdeps
patches
series
+# ctags files
+tags
+TAGS
+
# cscope files
cscope.*
ncscope.*
S: 181 00 Praha 8
S: Czech Republic
+N: Murali Karicheri
+E: m-karicheri2@ti.com
+D: Keystone NetCP driver
+D: Keystone PCIe host controller driver
+
N: Jan "Yenya" Kasprzak
E: kas@fi.muni.cz
D: Author of the COSA/SRP sync serial board driver.
--- /dev/null
+What: /sys/bus/event_source/devices/dsa*/format
+Date: April 2021
+KernelVersion: 5.13
+Contact: Tom Zanussi <tom.zanussi@linux.intel.com>
+Description: Read-only. Attribute group to describe the magic bits
+ that go into perf_event_attr.config or
+ perf_event_attr.config1 for the IDXD DSA pmu. (See also
+ ABI/testing/sysfs-bus-event_source-devices-format).
+
+ Each attribute in this group defines a bit range in
+ perf_event_attr.config or perf_event_attr.config1.
+ All supported attributes are listed below (See the
+ IDXD DSA Spec for possible attribute values)::
+
+ event_category = "config:0-3" - event category
+ event = "config:4-31" - event ID
+
+ filter_wq = "config1:0-31" - workqueue filter
+ filter_tc = "config1:32-39" - traffic class filter
+ filter_pgsz = "config1:40-43" - page size filter
+ filter_sz = "config1:44-51" - transfer size filter
+ filter_eng = "config1:52-59" - engine filter
+
+What: /sys/bus/event_source/devices/dsa*/cpumask
+Date: April 2021
+KernelVersion: 5.13
+Contact: Tom Zanussi <tom.zanussi@linux.intel.com>
+Description: Read-only. This file always returns the cpu to which the
+ IDXD DSA pmu is bound for access to all dsa pmu
+ performance monitoring events.
Indicates the mux id associated to the qmimux network interface
during its creation.
+
+What: /sys/class/net/<iface>/qmi/pass_through
+Date: January 2021
+KernelVersion: 5.12
+Contact: Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
+Description:
+ Boolean. Default: 'N'
+
+ Set this to 'Y' to enable 'pass-through' mode, allowing packets
+ in MAP format to be passed on to the stack.
+
+ Normally the rmnet driver (CONFIG_RMNET) is then used to process
+ and demultiplex these packets.
+
+ 'Pass-through' mode can be enabled when the device is in
+ 'raw-ip' mode only.
All AMD processors with L3 caches provide this functionality.
For details, see BKDGs at
- http://developer.amd.com/documentation/guides/Pages/default.aspx
+ https://www.amd.com/en/support/tech-docs?keyword=bios+kernel
What: /sys/devices/system/cpu/cpufreq/boost
Access: Read
Valid values: Represented as string
+
+What: /sys/bus/i2c/devices/xxx/type
+Date: Jan 2021
+Contact: linux-input@vger.kernel.org
+Description: Reports the type identification provided by the touchscreen, for example "PCAP82H80 Series"
+
+ Access: Read
+
+ Valid values: Represented as string
Contact: "Daniel Rosenberg" <drosen@google.com>
Description: If checkpoint=disable, it displays the number of blocks that
are unusable.
- If checkpoint=enable it displays the enumber of blocks that
+ If checkpoint=enable it displays the number of blocks that
would be unusable if checkpoint=disable were to be set.
What: /sys/fs/f2fs/<disk>/encoding
I/O priority "3". We can select the class between "rt" and "be",
and set the I/O priority within valid range of it. "," delimiter
is necessary in between I/O class and priority number.
+
+What: /sys/fs/f2fs/<disk>/ovp_segments
+Date: March 2021
+Contact: "Jaegeuk Kim" <jaegeuk@kernel.org>
+Description: Shows the number of overprovision segments.
+
+What: /sys/fs/f2fs/<disk>/compr_written_block
+Date: March 2021
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: Show the block count written after compression since mount. Note
+ that when the compressed blocks are deleted, this count doesn't
+ decrease. If you write "0" here, you can initialize
+ compr_written_block and compr_saved_block to "0".
+
+What: /sys/fs/f2fs/<disk>/compr_saved_block
+Date: March 2021
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: Show the saved block count with compression since mount. Note
+ that when the compressed blocks are deleted, this count doesn't
+ decrease. If you write "0" here, you can initialize
+ compr_written_block and compr_saved_block to "0".
+
+What: /sys/fs/f2fs/<disk>/compr_new_inode
+Date: March 2021
+Contact: "Daeho Jeong" <daehojeong@google.com>
+Description: Show the count of inode newly enabled for compression since mount.
+ Note that when the compression is disabled for the files, this count
+ doesn't decrease. If you write "0" here, you can initialize
+ compr_new_inode to "0".
--- /dev/null
+What: /sys/kernel/mm/cma/
+Date: Feb 2021
+Contact: Minchan Kim <minchan@kernel.org>
+Description:
+ /sys/kernel/mm/cma/ contains a subdirectory for each CMA
+ heap name (also sometimes called CMA areas).
+
+ Each CMA heap subdirectory (that is, each
+ /sys/kernel/mm/cma/<cma-heap-name> directory) contains the
+ following items:
+
+ alloc_pages_success
+ alloc_pages_fail
+
+What: /sys/kernel/mm/cma/<cma-heap-name>/alloc_pages_success
+Date: Feb 2021
+Contact: Minchan Kim <minchan@kernel.org>
+Description:
+ the number of pages CMA API succeeded to allocate
+
+What: /sys/kernel/mm/cma/<cma-heap-name>/alloc_pages_fail
+Date: Feb 2021
+Contact: Minchan Kim <minchan@kernel.org>
+Description:
+ the number of pages CMA API failed to allocate
1 char Memory devices
1 = /dev/mem Physical memory access
- 2 = /dev/kmem Kernel virtual memory access
+ 2 = /dev/kmem OBSOLETE - replaced by /proc/kcore
3 = /dev/null Null device
4 = /dev/port I/O port access
5 = /dev/zero Null byte source
gpio_mockup_ranges
This parameter takes an argument in the form of an array of integer
- pairs. Each pair defines the base GPIO number (if any) and the number
- of lines exposed by the chip. If the base GPIO is -1, the gpiolib
- will assign it automatically.
+ pairs. Each pair defines the base GPIO number (non-negative integer)
+ and the first number after the last of this chip. If the base GPIO
+ is -1, the gpiolib will assign it automatically. while the following
+ parameter is the number of lines exposed by the chip.
- Example: gpio_mockup_ranges=-1,8,-1,16,405,4
+ Example: gpio_mockup_ranges=-1,8,-1,16,405,409
The line above creates three chips. The first one will expose 8 lines,
the second 16 and the third 4. The base GPIO for the third chip is set
to 405 while for two first chips it will be assigned automatically.
- gpio_named_lines
+ gpio_mockup_named_lines
This parameter doesn't take any arguments. It lets the driver know that
GPIO lines exposed by it should be named.
Don't use this when you are not running on the
android emulator
+ gpio-mockup.gpio_mockup_ranges
+ [HW] Sets the ranges of gpiochip of for this device.
+ Format: <start1>,<end1>,<start2>,<end2>...
+ gpio-mockup.gpio_mockup_named_lines
+ [HW] Let the driver know GPIO lines should be named.
+
gpt [EFI] Forces disk with valid GPT signature but
invalid Protective MBR to be treated as GPT. If the
primary GPT is corrupted, it enables the backup/alternate
Format: <unsigned int> such that (rxsize & ~0x1fffc0) == 0.
Default: 1024
- gpio-mockup.gpio_mockup_ranges
- [HW] Sets the ranges of gpiochip of for this device.
- Format: <start1>,<end1>,<start2>,<end2>...
-
hardlockup_all_cpu_backtrace=
[KNL] Should the hard-lockup detector generate
backtraces on all cpus.
initcall functions. Useful for debugging built-in
modules and initcalls.
+ initramfs_async= [KNL]
+ Format: <bool>
+ Default: 1
+ This parameter controls whether the initramfs
+ image is unpacked asynchronously, concurrently
+ with devices being probed and
+ initialized. This should normally just work,
+ but as a debugging aid, one can get the
+ historical behaviour of the initramfs
+ unpacking being completed before device_ and
+ late_ initcalls.
+
initrd= [BOOT] Specify the location of the initial ramdisk
initrdmem= [KNL] Specify a physical address and size from which to
seconds. Use this parameter to check at some
other rate. 0 disables periodic checking.
- memtest= [KNL,X86,ARM,PPC] Enable memtest
+ memory_hotplug.memmap_on_memory
+ [KNL,X86,ARM] Boolean flag to enable this feature.
+ Format: {on | off (default)}
+ When enabled, runtime hotplugged memory will
+ allocate its internal metadata (struct pages)
+ from the hotadded memory which will allow to
+ hotadd a lot of memory without requiring
+ additional memory to do so.
+ This feature is disabled by default because it
+ has some implication on large (e.g. GB)
+ allocations in some configurations (e.g. small
+ memory blocks).
+ The state of the flag can be read in
+ /sys/module/memory_hotplug/parameters/memmap_on_memory.
+ Note that even when enabled, there are a few cases where
+ the feature is not effective.
+
+ memtest= [KNL,X86,ARM,PPC,RISCV] Enable memtest
Format: <integer>
default : 0 <disable>
Specifies the number of memtest passes to be
nohugeiomap [KNL,X86,PPC,ARM64] Disable kernel huge I/O mappings.
+ nohugevmalloc [PPC] Disable kernel huge vmalloc mappings.
+
nosmt [KNL,S390] Disable symmetric multithreading (SMT).
Equivalent to smt=1.
Unfortunately, there is no information to show which memory block belongs
to ZONE_MOVABLE. This is TBD.
+.. note::
+ Techniques that rely on long-term pinnings of memory (especially, RDMA and
+ vfio) are fundamentally problematic with ZONE_MOVABLE and, therefore, memory
+ hot remove. Pinned pages cannot reside on ZONE_MOVABLE, to guarantee that
+ memory can still get hot removed - be aware that pinning can fail even if
+ there is plenty of free memory in ZONE_MOVABLE. In addition, using
+ ZONE_MOVABLE might make page pinning more expensive, because pages have to be
+ migrated off that zone first.
+
.. _memory_hotplug_how_to_offline_memory:
How to offline memory
The ``uffdio_api.features`` bitmask returned by the ``UFFDIO_API`` ioctl
defines what memory types are supported by the ``userfaultfd`` and what
-events, except page fault notifications, may be generated.
-
-If the kernel supports registering ``userfaultfd`` ranges on hugetlbfs
-virtual memory areas, ``UFFD_FEATURE_MISSING_HUGETLBFS`` will be set in
-``uffdio_api.features``. Similarly, ``UFFD_FEATURE_MISSING_SHMEM`` will be
-set if the kernel supports registering ``userfaultfd`` ranges on shared
-memory (covering all shmem APIs, i.e. tmpfs, ``IPCSHM``, ``/dev/zero``,
-``MAP_SHARED``, ``memfd_create``, etc).
-
-The userland application that wants to use ``userfaultfd`` with hugetlbfs
-or shared memory need to set the corresponding flag in
-``uffdio_api.features`` to enable those features.
-
-If the userland desires to receive notifications for events other than
-page faults, it has to verify that ``uffdio_api.features`` has appropriate
-``UFFD_FEATURE_EVENT_*`` bits set. These events are described in more
-detail below in `Non-cooperative userfaultfd`_ section.
-
-Once the ``userfaultfd`` has been enabled the ``UFFDIO_REGISTER`` ioctl should
-be invoked (if present in the returned ``uffdio_api.ioctls`` bitmask) to
-register a memory range in the ``userfaultfd`` by setting the
+events, except page fault notifications, may be generated:
+
+- The ``UFFD_FEATURE_EVENT_*`` flags indicate that various other events
+ other than page faults are supported. These events are described in more
+ detail below in the `Non-cooperative userfaultfd`_ section.
+
+- ``UFFD_FEATURE_MISSING_HUGETLBFS`` and ``UFFD_FEATURE_MISSING_SHMEM``
+ indicate that the kernel supports ``UFFDIO_REGISTER_MODE_MISSING``
+ registrations for hugetlbfs and shared memory (covering all shmem APIs,
+ i.e. tmpfs, ``IPCSHM``, ``/dev/zero``, ``MAP_SHARED``, ``memfd_create``,
+ etc) virtual memory areas, respectively.
+
+- ``UFFD_FEATURE_MINOR_HUGETLBFS`` indicates that the kernel supports
+ ``UFFDIO_REGISTER_MODE_MINOR`` registration for hugetlbfs virtual memory
+ areas.
+
+The userland application should set the feature flags it intends to use
+when invoking the ``UFFDIO_API`` ioctl, to request that those features be
+enabled if supported.
+
+Once the ``userfaultfd`` API has been enabled the ``UFFDIO_REGISTER``
+ioctl should be invoked (if present in the returned ``uffdio_api.ioctls``
+bitmask) to register a memory range in the ``userfaultfd`` by setting the
uffdio_register structure accordingly. The ``uffdio_register.mode``
bitmask will specify to the kernel which kind of faults to track for
-the range (``UFFDIO_REGISTER_MODE_MISSING`` would track missing
-pages). The ``UFFDIO_REGISTER`` ioctl will return the
+the range. The ``UFFDIO_REGISTER`` ioctl will return the
``uffdio_register.ioctls`` bitmask of ioctls that are suitable to resolve
userfaults on the range registered. Not all ioctls will necessarily be
-supported for all memory types depending on the underlying virtual
-memory backend (anonymous memory vs tmpfs vs real filebacked
-mappings).
+supported for all memory types (e.g. anonymous memory vs. shmem vs.
+hugetlbfs), or all types of intercepted faults.
Userland can use the ``uffdio_register.ioctls`` to manage the virtual
address space in the background (to add or potentially also remove
could be triggering just before userland maps in the background the
user-faulted page.
-The primary ioctl to resolve userfaults is ``UFFDIO_COPY``. That
-atomically copies a page into the userfault registered range and wakes
-up the blocked userfaults
-(unless ``uffdio_copy.mode & UFFDIO_COPY_MODE_DONTWAKE`` is set).
-Other ioctl works similarly to ``UFFDIO_COPY``. They're atomic as in
-guaranteeing that nothing can see an half copied page since it'll
-keep userfaulting until the copy has finished.
+Resolving Userfaults
+--------------------
+
+There are three basic ways to resolve userfaults:
+
+- ``UFFDIO_COPY`` atomically copies some existing page contents from
+ userspace.
+
+- ``UFFDIO_ZEROPAGE`` atomically zeros the new page.
+
+- ``UFFDIO_CONTINUE`` maps an existing, previously-populated page.
+
+These operations are atomic in the sense that they guarantee nothing can
+see a half-populated page, since readers will keep userfaulting until the
+operation has finished.
+
+By default, these wake up userfaults blocked on the range in question.
+They support a ``UFFDIO_*_MODE_DONTWAKE`` ``mode`` flag, which indicates
+that waking will be done separately at some later time.
+
+Which ioctl to choose depends on the kind of page fault, and what we'd
+like to do to resolve it:
+
+- For ``UFFDIO_REGISTER_MODE_MISSING`` faults, the fault needs to be
+ resolved by either providing a new page (``UFFDIO_COPY``), or mapping
+ the zero page (``UFFDIO_ZEROPAGE``). By default, the kernel would map
+ the zero page for a missing fault. With userfaultfd, userspace can
+ decide what content to provide before the faulting thread continues.
+
+- For ``UFFDIO_REGISTER_MODE_MINOR`` faults, there is an existing page (in
+ the page cache). Userspace has the option of modifying the page's
+ contents before resolving the fault. Once the contents are correct
+ (modified or not), userspace asks the kernel to map the page and let the
+ faulting thread continue with ``UFFDIO_CONTINUE``.
Notes:
-- If you requested ``UFFDIO_REGISTER_MODE_MISSING`` when registering then
- you must provide some kind of page in your thread after reading from
- the uffd. You must provide either ``UFFDIO_COPY`` or ``UFFDIO_ZEROPAGE``.
- The normal behavior of the OS automatically providing a zero page on
- an anonymous mmaping is not in place.
+- You can tell which kind of fault occurred by examining
+ ``pagefault.flags`` within the ``uffd_msg``, checking for the
+ ``UFFD_PAGEFAULT_FLAG_*`` flags.
- None of the page-delivering ioctls default to the range that you
registered with. You must fill in all fields for the appropriate
- You get the address of the access that triggered the missing page
event out of a struct uffd_msg that you read in the thread from the
- uffd. You can supply as many pages as you want with ``UFFDIO_COPY`` or
- ``UFFDIO_ZEROPAGE``. Keep in mind that unless you used DONTWAKE then
- the first of any of those IOCTLs wakes up the faulting thread.
+ uffd. You can supply as many pages as you want with these IOCTLs.
+ Keep in mind that unless you used DONTWAKE then the first of any of
+ those IOCTLs wakes up the faulting thread.
- Be sure to test for all errors including
(``pollfd[0].revents & POLLERR``). This can happen, e.g. when ranges
you don't find any, install `the latest release from that series
<https://kernel.org/>`_. If it still shows the issue, report it to the stable
mailing list (stable@vger.kernel.org) and CC the regressions list
-(regressions@lists.linux.dev).
+(regressions@lists.linux.dev); ideally also CC the maintainer and the mailing
+list for the subsystem in question.
In all other cases try your best guess which kernel part might be causing the
issue. Check the :ref:`MAINTAINERS <maintainers>` file for how its developers
If you are facing multiple issues with the Linux kernel at once, report each
separately. While writing your report, include all information relevant to the
issue, like the kernel and the distro used. In case of a regression, CC the
-regressions mailing list (regressions@lists.linux.dev) to your report; also try
-to include the commit-id of the change causing it, which a bisection can find.
+regressions mailing list (regressions@lists.linux.dev) to your report. Also try
+to pin-point the culprit with a bisection; if you succeed, include its
+commit-id and CC everyone in the sign-off-by chain.
Once the report is out, answer any questions that come up and help where you
can. That includes keeping the ball rolling by occasionally retesting with newer
* Send a short problem report to the Linux stable mailing list
(stable@vger.kernel.org) and CC the Linux regressions mailing list
- (regressions@lists.linux.dev). Roughly describe the issue and ideally
- explain how to reproduce it. Mention the first version that shows the
- problem and the last version that's working fine. Then wait for further
- instructions.
+ (regressions@lists.linux.dev); if you suspect the cause in a particular
+ subsystem, CC its maintainer and its mailing list. Roughly describe the
+ issue and ideally explain how to reproduce it. Mention the first version
+ that shows the problem and the last version that's working fine. Then
+ wait for further instructions.
The reference section below explains each of these steps in more detail.
the results to the archives at that URL.
It's also wise to check the internet, LKML and maybe bugzilla.kernel.org again
-at this point.
+at this point. If your report needs to be filed in a bug tracker, you may want
+to check the mailing list archives for the subsystem as well, as someone might
+have reported it only there.
For details how to search and what to do if you find matching reports see
"Search for existing reports, first run" above.
When sending the report by mail, CC the Linux regressions mailing list
(regressions@lists.linux.dev). In case the report needs to be filed to some web
-tracker, proceed to do so; once filed, forward the report by mail to the
-regressions list. Make sure to inline the forwarded report, hence do not attach
-it. Also add a short note at the top where you mention the URL to the ticket.
+tracker, proceed to do so. Once filed, forward the report by mail to the
+regressions list; CC the maintainer and the mailing list for the subsystem in
+question. Make sure to inline the forwarded report, hence do not attach it.
+Also add a short note at the top where you mention the URL to the ticket.
When mailing or forwarding the report, in case of a successful bisection add the
author of the culprit to the recipients; also CC everyone in the signed-off-by
*Send a short problem report to the Linux stable mailing list
(stable@vger.kernel.org) and CC the Linux regressions mailing list
- (regressions@lists.linux.dev). Roughly describe the issue and ideally
- explain how to reproduce it. Mention the first version that shows the
- problem and the last version that's working fine. Then wait for further
- instructions.*
+ (regressions@lists.linux.dev); if you suspect the cause in a particular
+ subsystem, CC its maintainer and its mailing list. Roughly describe the
+ issue and ideally explain how to reproduce it. Mention the first version
+ that shows the problem and the last version that's working fine. Then
+ wait for further instructions.*
When reporting a regression that happens within a stable or longterm kernel
line (say when updating from 5.10.4 to 5.10.5) a brief report is enough for
-the start to get the issue reported quickly. Hence a rough description is all
-it takes.
+the start to get the issue reported quickly. Hence a rough description to the
+stable and regressions mailing list is all it takes; but in case you suspect
+the cause in a particular subsystem, CC its maintainers and its mailing list
+as well, because that will speed things up.
-But note, it helps developers a great deal if you can specify the exact version
+And note, it helps developers a great deal if you can specify the exact version
that introduced the problem. Hence if possible within a reasonable time frame,
try to find that version using vanilla kernels. Lets assume something broke when
your distributor released a update from Linux kernel 5.10.5 to 5.10.8. Then as
reverted to fix the issue quickly). Hence consider to do a proper bisection
right away if time permits. See the section 'Special care for regressions' and
the document 'Documentation/admin-guide/bug-bisect.rst' for details how to
-perform one.
+perform one. In case of a successful bisection add the author of the culprit to
+the recipients; also CC everyone in the signed-off-by chain, which you find at
+the end of its commit message.
Reference for "Reporting issues only occurring in older kernel version lines"
- SCR_EL3.FGTEn (bit 27) must be initialised to 0b1.
+ For CPUs with Advanced SIMD and floating point support:
+
+ - If EL3 is present:
+
+ - CPTR_EL3.TFP (bit 10) must be initialised to 0b0.
+
+ - If EL2 is present and the kernel is entered at EL1:
+
+ - CPTR_EL2.TFP (bit 10) must be initialised to 0b0.
+
+ For CPUs with the Scalable Vector Extension (FEAT_SVE) present:
+
+ - if EL3 is present:
+
+ - CPTR_EL3.EZ (bit 8) must be initialised to 0b1.
+
+ - ZCR_EL3.LEN must be initialised to the same value for all CPUs the
+ kernel is executed on.
+
+ - If the kernel is entered at EL1 and EL2 is present:
+
+ - CPTR_EL2.TZ (bit 8) must be initialised to 0b0.
+
+ - CPTR_EL2.ZEN (bits 17:16) must be initialised to 0b11.
+
+ - ZCR_EL2.LEN must be initialised to the same value for all CPUs the
+ kernel will execute on.
+
The requirements described above for CPU mode, caches, MMUs, architected
timers, coherency and system registers apply to all CPUs. All CPUs must
-enter the kernel in the same exception level.
+enter the kernel in the same exception level. Where the values documented
+disable traps it is permissible for these traps to be enabled so long as
+those traps are handled transparently by higher exception levels as though
+the values documented were set.
The boot loader is expected to enter the kernel on each CPU in the
following manner:
HWCAP_EVTSTRM
The generic timer is configured to generate events at a frequency of
- approximately 100KHz.
+ approximately 10KHz.
HWCAP_AES
Functionality implied by ID_AA64ISAR0_EL1.AES == 0b0001.
- ``shmat()`` and ``shmdt()``.
+- ``brk()`` (since kernel v5.6).
+
+- ``mmap()`` (since kernel v5.6).
+
+- ``mremap()``, the ``new_address`` argument (since kernel v5.6).
+
Any attempt to use non-zero tagged pointers may result in an error code
being returned, a (fatal) signal being raised, or other modes of
failure.
dev, size, dma_handle and dir must all be the same as those passed into
dma_alloc_pages(). page must be the pointer returned by dma_alloc_pages().
+::
+
+ int
+ dma_mmap_pages(struct device *dev, struct vm_area_struct *vma,
+ size_t size, struct page *page)
+
+Map an allocation returned from dma_alloc_pages() into a user address space.
+dev and size must be the same as those passed into dma_alloc_pages().
+page must be the pointer returned by dma_alloc_pages().
+
::
void *
dma_alloc_noncoherent(). cpu_addr must be the virtual address returned by
dma_alloc_noncoherent().
+::
+
+ struct sg_table *
+ dma_alloc_noncontiguous(struct device *dev, size_t size,
+ enum dma_data_direction dir, gfp_t gfp,
+ unsigned long attrs);
+
+This routine allocates <size> bytes of non-coherent and possibly non-contiguous
+memory. It returns a pointer to struct sg_table that describes the allocated
+and DMA mapped memory, or NULL if the allocation failed. The resulting memory
+can be used for struct page mapped into a scatterlist are suitable for.
+
+The return sg_table is guaranteed to have 1 single DMA mapped segment as
+indicated by sgt->nents, but it might have multiple CPU side segments as
+indicated by sgt->orig_nents.
+
+The dir parameter specified if data is read and/or written by the device,
+see dma_map_single() for details.
+
+The gfp parameter allows the caller to specify the ``GFP_`` flags (see
+kmalloc()) for the allocation, but rejects flags used to specify a memory
+zone such as GFP_DMA or GFP_HIGHMEM.
+
+The attrs argument must be either 0 or DMA_ATTR_ALLOC_SINGLE_PAGES.
+
+Before giving the memory to the device, dma_sync_sgtable_for_device() needs
+to be called, and before reading memory written by the device,
+dma_sync_sgtable_for_cpu(), just like for streaming DMA mappings that are
+reused.
+
+::
+
+ void
+ dma_free_noncontiguous(struct device *dev, size_t size,
+ struct sg_table *sgt,
+ enum dma_data_direction dir)
+
+Free memory previously allocated using dma_alloc_noncontiguous(). dev, size,
+and dir must all be the same as those passed into dma_alloc_noncontiguous().
+sgt must be the pointer returned by dma_alloc_noncontiguous().
+
+::
+
+ void *
+ dma_vmap_noncontiguous(struct device *dev, size_t size,
+ struct sg_table *sgt)
+
+Return a contiguous kernel mapping for an allocation returned from
+dma_alloc_noncontiguous(). dev and size must be the same as those passed into
+dma_alloc_noncontiguous(). sgt must be the pointer returned by
+dma_alloc_noncontiguous().
+
+Once a non-contiguous allocation is mapped using this function, the
+flush_kernel_vmap_range() and invalidate_kernel_vmap_range() APIs must be used
+to manage the coherency between the kernel mapping, the device and user space
+mappings (if any).
+
+::
+
+ void
+ dma_vunmap_noncontiguous(struct device *dev, void *vaddr)
+
+Unmap a kernel mapping returned by dma_vmap_noncontiguous(). dev must be the
+same the one passed into dma_alloc_noncontiguous(). vaddr must be the pointer
+returned by dma_vmap_noncontiguous().
+
+
+::
+
+ int
+ dma_mmap_noncontiguous(struct device *dev, struct vm_area_struct *vma,
+ size_t size, struct sg_table *sgt)
+
+Map an allocation returned from dma_alloc_noncontiguous() into a user address
+space. dev and size must be the same as those passed into
+dma_alloc_noncontiguous(). sgt must be the pointer returned by
+dma_alloc_noncontiguous().
+
::
int
================
An interrupt controller driver creates and registers an irq_domain by
-calling one of the irq_domain_add_*() functions (each mapping method
-has a different allocator function, more on that later). The function
-will return a pointer to the irq_domain on success. The caller must
-provide the allocator function with an irq_domain_ops structure.
+calling one of the irq_domain_add_*() or irq_domain_create_*() functions
+(each mapping method has a different allocator function, more on that later).
+The function will return a pointer to the irq_domain on success. The caller
+must provide the allocator function with an irq_domain_ops structure.
In most cases, the irq_domain will begin empty without any mappings
between hwirq and IRQ numbers. Mappings are added to the irq_domain
irq_domain_add_simple()
irq_domain_add_legacy()
irq_domain_add_legacy_isa()
+ irq_domain_create_simple()
irq_domain_create_legacy()
The Legacy mapping is a special case for drivers that already have a
mapping Linux IRQs 0-15 so that existing ISA drivers get the correct IRQ
numbers.
-Most users of legacy mappings should use irq_domain_add_simple() which
-will use a legacy domain only if an IRQ range is supplied by the
-system and will otherwise use a linear domain mapping. The semantics
-of this call are such that if an IRQ range is specified then
+Most users of legacy mappings should use irq_domain_add_simple() or
+irq_domain_create_simple() which will use a legacy domain only if an IRQ range
+is supplied by the system and will otherwise use a linear domain mapping.
+The semantics of this call are such that if an IRQ range is specified then
descriptors will be allocated on-the-fly for it, and if no range is
-specified it will fall through to irq_domain_add_linear() which means
-*no* irq descriptors will be allocated.
+specified it will fall through to irq_domain_add_linear() or
+irq_domain_create_linear() which means *no* irq descriptors will be allocated.
A typical use case for simple domains is where an irqchip provider
is supporting both dynamic and static IRQ assignments.
before any irq_find_mapping() since the latter will actually work
for the static IRQ assignment case.
+irq_domain_add_simple() and irq_domain_create_simple() as well as
irq_domain_add_legacy() and irq_domain_create_legacy() are functionally
equivalent, except for the first argument is different - the former
accepts an Open Firmware specific 'struct device_node', while the latter
available to export symbols into a certain namespace: EXPORT_SYMBOL_NS() and
EXPORT_SYMBOL_NS_GPL(). They take one additional argument: the namespace.
Please note that due to macro expansion that argument needs to be a
-preprocessor symbol. E.g. to export the symbol `usb_stor_suspend` into the
-namespace `USB_STORAGE`, use::
+preprocessor symbol. E.g. to export the symbol ``usb_stor_suspend`` into the
+namespace ``USB_STORAGE``, use::
EXPORT_SYMBOL_NS(usb_stor_suspend, USB_STORAGE);
-The corresponding ksymtab entry struct `kernel_symbol` will have the member
-`namespace` set accordingly. A symbol that is exported without a namespace will
-refer to `NULL`. There is no default namespace if none is defined. `modpost`
+The corresponding ksymtab entry struct ``kernel_symbol`` will have the member
+``namespace`` set accordingly. A symbol that is exported without a namespace will
+refer to ``NULL``. There is no default namespace if none is defined. ``modpost``
and kernel/module.c make use the namespace at build time or module load time,
respectively.
There are multiple ways of specifying this define and it depends on the
subsystem and the maintainer's preference, which one to use. The first option
-is to define the default namespace in the `Makefile` of the subsystem. E.g. to
+is to define the default namespace in the ``Makefile`` of the subsystem. E.g. to
export all symbols defined in usb-common into the namespace USB_COMMON, add a
line like this to drivers/usb/common/Makefile::
MODULE_IMPORT_NS(USB_STORAGE);
-This will create a `modinfo` tag in the module for each imported namespace.
+This will create a ``modinfo`` tag in the module for each imported namespace.
This has the side effect, that the imported namespaces of a module can be
inspected with modinfo::
4. Loading Modules that use namespaced Symbols
==============================================
-At module loading time (e.g. `insmod`), the kernel will check each symbol
+At module loading time (e.g. ``insmod``), the kernel will check each symbol
referenced from the module for its availability and whether the namespace it
might be exported to has been imported by the module. The default behaviour of
the kernel is to reject loading modules that don't specify sufficient imports.
A typical scenario for module authors would be::
- write code that depends on a symbol from a not imported namespace
- - `make`
+ - ``make``
- notice the warning of modpost telling about a missing import
- - run `make nsdeps` to add the import to the correct code location
+ - run ``make nsdeps`` to add the import to the correct code location
For subsystem maintainers introducing a namespace, the steps are very similar.
-Again, `make nsdeps` will eventually add the missing namespace imports for
+Again, ``make nsdeps`` will eventually add the missing namespace imports for
in-tree modules::
- move or add symbols to a namespace (e.g. with EXPORT_SYMBOL_NS())
- - `make` (preferably with an allmodconfig to cover all in-kernel
+ - ``make`` (preferably with an allmodconfig to cover all in-kernel
modules)
- notice the warning of modpost telling about a missing import
- - run `make nsdeps` to add the import to the correct code location
+ - run ``make nsdeps`` to add the import to the correct code location
You can also run nsdeps for external module builds. A typical usage is::
[ 0.000000] BIOS-e820: [mem 0x000000000009fc00-0x000000000009ffff] reserved
....
-- Examine fields of the current task struct::
+- Examine fields of the current task struct(supported by x86 and arm64 only)::
(gdb) p $lx_current().pid
$1 = 4998
# SPDX-License-Identifier: GPL-2.0-only
*.example.dts
-processed-schema*.yaml
-processed-schema*.json
+/processed-schema*.yaml
+/processed-schema*.json
- resets
- ddc
-unevaluatedProperties: false
+additionalProperties: false
examples:
- |
resets: true
reset-names: true
+ power-domains:
+ maxItems: 1
+
ports:
$ref: /schemas/graph.yaml#/properties/port
description: |
compatible:
enum:
- qcom,sdm845-gpi-dma
+ - qcom,sm8150-gpi-dma
reg:
maxItems: 1
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpio/fairchild,74hc595.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Generic 8-bit shift register
+
+maintainers:
+ - Maxime Ripard <mripard@kernel.org>
+
+properties:
+ compatible:
+ enum:
+ - fairchild,74hc595
+ - nxp,74lvc594
+
+ reg:
+ maxItems: 1
+
+ gpio-controller: true
+
+ '#gpio-cells':
+ description:
+ The second cell is only used to specify the GPIO polarity.
+ const: 2
+
+ registers-number:
+ description: Number of daisy-chained shift registers
+
+ enable-gpios:
+ description: GPIO connected to the OE (Output Enable) pin.
+ maxItems: 1
+
+ spi-max-frequency: true
+
+patternProperties:
+ "^(hog-[0-9]+|.+-hog(-[0-9]+)?)$":
+ type: object
+
+ properties:
+ gpio-hog: true
+ gpios: true
+ output-high: true
+ output-low: true
+ line-name: true
+
+ required:
+ - gpio-hog
+ - gpios
+
+ additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - gpio-controller
+ - '#gpio-cells'
+ - registers-number
+
+additionalProperties: false
+
+examples:
+ - |
+ spi {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ gpio5: gpio5@0 {
+ compatible = "fairchild,74hc595";
+ reg = <0>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ registers-number = <4>;
+ spi-max-frequency = <100000>;
+ };
+ };
+++ /dev/null
-* Generic 8-bits shift register GPIO driver
-
-Required properties:
-- compatible: Should contain one of the following:
- "fairchild,74hc595"
- "nxp,74lvc594"
-- reg : chip select number
-- gpio-controller : Marks the device node as a gpio controller.
-- #gpio-cells : Should be two. The first cell is the pin number and
- the second cell is used to specify the gpio polarity:
- 0 = active high
- 1 = active low
-- registers-number: Number of daisy-chained shift registers
-
-Optional properties:
-- enable-gpios: GPIO connected to the OE (Output Enable) pin.
-
-Example:
-
-gpio5: gpio5@0 {
- compatible = "fairchild,74hc595";
- reg = <0>;
- gpio-controller;
- #gpio-cells = <2>;
- registers-number = <4>;
- spi-max-frequency = <100000>;
-};
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpio/realtek,otto-gpio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Realtek Otto GPIO controller
+
+maintainers:
+ - Sander Vanheule <sander@svanheule.net>
+ - Bert Vermeulen <bert@biot.com>
+
+description: |
+ Realtek's GPIO controller on their MIPS switch SoCs (Otto platform) consists
+ of two banks of 32 GPIOs. These GPIOs can generate edge-triggered interrupts.
+ Each bank's interrupts are cascased into one interrupt line on the parent
+ interrupt controller, if provided.
+ This binding allows defining a single bank in the devicetree. The interrupt
+ controller is not supported on the fallback compatible name, which only
+ allows for GPIO port use.
+
+properties:
+ $nodename:
+ pattern: "^gpio@[0-9a-f]+$"
+
+ compatible:
+ items:
+ - enum:
+ - realtek,rtl8380-gpio
+ - realtek,rtl8390-gpio
+ - const: realtek,otto-gpio
+
+ reg:
+ maxItems: 1
+
+ "#gpio-cells":
+ const: 2
+
+ gpio-controller: true
+
+ ngpios:
+ minimum: 1
+ maximum: 32
+
+ interrupt-controller: true
+
+ "#interrupt-cells":
+ const: 2
+
+ interrupts:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - "#gpio-cells"
+ - gpio-controller
+
+additionalProperties: false
+
+dependencies:
+ interrupt-controller: [ interrupts ]
+
+examples:
+ - |
+ gpio@3500 {
+ compatible = "realtek,rtl8380-gpio", "realtek,otto-gpio";
+ reg = <0x3500 0x1c>;
+ gpio-controller;
+ #gpio-cells = <2>;
+ ngpios = <24>;
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ interrupt-parent = <&rtlintc>;
+ interrupts = <23>;
+ };
+
+...
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpio/rockchip,gpio-bank.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip GPIO bank
+
+maintainers:
+ - Heiko Stuebner <heiko@sntech.de>
+
+properties:
+ compatible:
+ enum:
+ - rockchip,gpio-bank
+ - rockchip,rk3188-gpio-bank0
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ gpio-controller: true
+
+ "#gpio-cells":
+ const: 2
+
+ interrupt-controller: true
+
+ "#interrupt-cells":
+ const: 2
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - gpio-controller
+ - "#gpio-cells"
+ - interrupt-controller
+ - "#interrupt-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ pinctrl: pinctrl {
+ #address-cells = <1>;
+ #size-cells = <1>;
+ ranges;
+
+ gpio0: gpio@2000a000 {
+ compatible = "rockchip,rk3188-gpio-bank0";
+ reg = <0x2000a000 0x100>;
+ interrupts = <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk_gates8 9>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+
+ gpio1: gpio@2003c000 {
+ compatible = "rockchip,gpio-bank";
+ reg = <0x2003c000 0x100>;
+ interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
+ clocks = <&clk_gates8 10>;
+
+ gpio-controller;
+ #gpio-cells = <2>;
+
+ interrupt-controller;
+ #interrupt-cells = <2>;
+ };
+ };
+++ /dev/null
-SIRF Hardware spinlock device Binding
------------------------------------------------
-
-Required properties :
-- compatible : shall contain only one of the following:
- "sirf,hwspinlock"
-
-- reg : the register address of hwspinlock
-
-- #hwlock-cells : hwlock users only use the hwlock id to represent a specific
- hwlock, so the number of cells should be <1> here.
-
-Please look at the generic hwlock binding for usage information for consumers,
-"Documentation/devicetree/bindings/hwlock/hwlock.txt"
-
-Example of hwlock provider:
- hwlock {
- compatible = "sirf,hwspinlock";
- reg = <0x13240000 0x00010000>;
- #hwlock-cells = <1>;
- };
-
-Example of hwlock users:
- node {
- ...
- hwlocks = <&hwlock 2>;
- ...
- };
examples:
- |
i3c-master@a0000000 {
- compatible = "silvaco,i3c-master";
+ compatible = "silvaco,i3c-master-v1";
clocks = <&zynqmp_clk 71>, <&fclk>, <&sclk>;
clock-names = "pclk", "fast_clk", "slow_clk";
interrupt-parent = <&gic>;
(active low). The line must be flagged with
GPIO_ACTIVE_LOW.
+ wake-gpios:
+ maxItems: 1
+ description:
+ Optional GPIO specifier for the touchscreen's wake pin
+ (active low). The line must be flagged with
+ GPIO_ACTIVE_LOW.
+
linux,gpio-keymap:
$ref: /schemas/types.yaml#/definitions/uint32-array
description: |
or experiment to determine which bit corresponds to which input. Use
KEY_RESERVED for unused padding values.
+ atmel,wakeup-method:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: |
+ The WAKE line is an active-low input that is used to wake up the touch
+ controller from deep-sleep mode before communication with the controller
+ could be started. This optional feature used to minimize current
+ consumption when the controller is in deep sleep mode. This feature is
+ relevant only to some controller families, like mXT1386 controller for
+ example.
+
+ The WAKE pin can be connected in one of the following ways:
+ 1) left permanently low
+ 2) connected to the I2C-compatible SCL pin
+ 3) connected to a GPIO pin on the host
+ enum:
+ - 0 # ATMEL_MXT_WAKEUP_NONE
+ - 1 # ATMEL_MXT_WAKEUP_I2C_SCL
+ - 2 # ATMEL_MXT_WAKEUP_GPIO
+ default: 0
+
+ wakeup-source:
+ type: boolean
+
required:
- compatible
- reg
examples:
- |
#include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/input/atmel-maxtouch.h>
#include <dt-bindings/gpio/gpio.h>
i2c {
#address-cells = <1>;
reset-gpios = <&gpio 27 GPIO_ACTIVE_LOW>;
vdda-supply = <&ab8500_ldo_aux2_reg>;
vdd-supply = <&ab8500_ldo_aux5_reg>;
+ atmel,wakeup-method = <ATMEL_MXT_WAKEUP_I2C_SCL>;
};
};
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/iqs626a.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Azoteq IQS626A Capacitive Touch Controller
+
+maintainers:
+ - Jeff LaBundy <jeff@labundy.com>
+
+description: |
+ The Azoteq IQS626A is a 14-channel capacitive touch controller that features
+ additional Hall-effect and inductive sensing capabilities.
+
+ Link to datasheet: https://www.azoteq.com/
+
+allOf:
+ - $ref: touchscreen/touchscreen.yaml#
+
+properties:
+ compatible:
+ const: azoteq,iqs626a
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ "#address-cells":
+ const: 1
+
+ "#size-cells":
+ const: 0
+
+ azoteq,suspend-mode:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3]
+ default: 0
+ description: |
+ Specifies the power mode during suspend as follows:
+ 0: Automatic (same as normal runtime, i.e. suspend/resume disabled)
+ 1: Low power (all sensing at a reduced reporting rate)
+ 2: Ultra-low power (ULP channel proximity sensing)
+ 3: Halt (no sensing)
+
+ azoteq,clk-div:
+ type: boolean
+ description: Divides the device's core clock by a factor of 4.
+
+ azoteq,ulp-enable:
+ type: boolean
+ description:
+ Permits the device to automatically enter ultra-low-power mode from low-
+ power mode.
+
+ azoteq,ulp-update:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3, 4, 5, 6, 7]
+ default: 3
+ description: |
+ Specifies the rate at which the trackpad, generic and Hall channels are
+ updated during ultra-low-power mode as follows:
+ 0: 8
+ 1: 13
+ 2: 28
+ 3: 54
+ 4: 89
+ 5: 135
+ 6: 190
+ 7: 256
+
+ azoteq,ati-band-disable:
+ type: boolean
+ description: Disables the ATI band check.
+
+ azoteq,ati-lp-only:
+ type: boolean
+ description: Limits automatic ATI to low-power mode.
+
+ azoteq,gpio3-select:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3, 4, 5, 6, 7]
+ default: 1
+ description: |
+ Selects the channel or group of channels for which the GPIO3 pin
+ represents touch state as follows:
+ 0: None
+ 1: ULP channel
+ 2: Trackpad
+ 3: Trackpad
+ 4: Generic channel 0
+ 5: Generic channel 1
+ 6: Generic channel 2
+ 7: Hall channel
+
+ azoteq,reseed-select:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3]
+ default: 0
+ description: |
+ Specifies the event(s) that prompt the device to reseed (i.e. reset the
+ long-term average) of an associated channel as follows:
+ 0: None
+ 1: Proximity
+ 2: Proximity or touch
+ 3: Proximity, touch or deep touch
+
+ azoteq,thresh-extend:
+ type: boolean
+ description: Multiplies all touch and deep-touch thresholds by 4.
+
+ azoteq,tracking-enable:
+ type: boolean
+ description:
+ Enables all associated channels to track their respective reference
+ channels.
+
+ azoteq,reseed-offset:
+ type: boolean
+ description:
+ Applies an 8-count offset to all long-term averages upon either ATI or
+ reseed events.
+
+ azoteq,rate-np-ms:
+ minimum: 0
+ maximum: 255
+ default: 150
+ description: Specifies the report rate (in ms) during normal-power mode.
+
+ azoteq,rate-lp-ms:
+ minimum: 0
+ maximum: 255
+ default: 150
+ description: Specifies the report rate (in ms) during low-power mode.
+
+ azoteq,rate-ulp-ms:
+ multipleOf: 16
+ minimum: 0
+ maximum: 4080
+ default: 0
+ description: Specifies the report rate (in ms) during ultra-low-power mode.
+
+ azoteq,timeout-pwr-ms:
+ multipleOf: 512
+ minimum: 0
+ maximum: 130560
+ default: 2560
+ description:
+ Specifies the length of time (in ms) to wait for an event before moving
+ from normal-power mode to low-power mode, or (if 'azoteq,ulp-enable' is
+ present) from low-power mode to ultra-low-power mode.
+
+ azoteq,timeout-lta-ms:
+ multipleOf: 512
+ minimum: 0
+ maximum: 130560
+ default: 40960
+ description:
+ Specifies the length of time (in ms) to wait before resetting the long-
+ term average of all channels. Specify the maximum timeout to disable it
+ altogether.
+
+ touchscreen-inverted-x: true
+ touchscreen-inverted-y: true
+ touchscreen-swapped-x-y: true
+
+patternProperties:
+ "^ulp-0|generic-[0-2]|hall$":
+ type: object
+ description:
+ Represents a single sensing channel. A channel is active if defined and
+ inactive otherwise.
+
+ properties:
+ azoteq,ati-exclude:
+ type: boolean
+ description:
+ Prevents the channel from participating in an ATI event that is
+ manually triggered during initialization.
+
+ azoteq,reseed-disable:
+ type: boolean
+ description:
+ Prevents the channel from being reseeded if the long-term average
+ timeout (defined in 'azoteq,timeout-lta') expires.
+
+ azoteq,meas-cap-decrease:
+ type: boolean
+ description:
+ Decreases the internal measurement capacitance from 60 pF to 15 pF.
+
+ azoteq,rx-inactive:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2]
+ default: 0
+ description: |
+ Specifies how inactive CRX pins are to be terminated as follows:
+ 0: VSS
+ 1: Floating
+ 2: VREG (generic channels only)
+
+ azoteq,linearize:
+ type: boolean
+ description:
+ Enables linearization of the channel's counts (generic and Hall
+ channels) or inverts the polarity of the channel's proximity or
+ touch states (ULP channel).
+
+ azoteq,dual-direction:
+ type: boolean
+ description:
+ Specifies that the channel's long-term average is to freeze in the
+ presence of either increasing or decreasing counts, thereby permit-
+ ting events to be reported in either direction.
+
+ azoteq,filt-disable:
+ type: boolean
+ description: Disables raw count filtering for the channel.
+
+ azoteq,ati-mode:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3]
+ description: |
+ Specifies the channel's ATI mode as follows:
+ 0: Disabled
+ 1: Semi-partial
+ 2: Partial
+ 3: Full
+
+ The default value is a function of the channel and the device's reset
+ user interface (RUI); reference the datasheet for further information
+ about the available RUI options.
+
+ azoteq,ati-base:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [75, 100, 150, 200]
+ description:
+ Specifies the channel's ATI base. The default value is a function
+ of the channel and the device's RUI.
+
+ azoteq,ati-target:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ multipleOf: 32
+ minimum: 0
+ maximum: 2016
+ description:
+ Specifies the channel's ATI target. The default value is a function
+ of the channel and the device's RUI.
+
+ azoteq,cct-increase:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 16
+ default: 0
+ description:
+ Specifies the degree to which the channel's charge cycle time is to
+ be increased, with 0 representing no increase. The maximum value is
+ limited to 4 in the case of the ULP channel, and the property is un-
+ available entirely in the case of the Hall channel.
+
+ azoteq,proj-bias:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3]
+ default: 0
+ description: |
+ Specifies the bias current applied during projected-capacitance
+ sensing as follows:
+ 0: 2.5 uA
+ 1: 5 uA
+ 2: 10 uA
+ 3: 20 uA
+
+ This property is unavailable in the case of the Hall channel.
+
+ azoteq,sense-freq:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3]
+ description: |
+ Specifies the channel's sensing frequency as follows (parenthesized
+ numbers represent the frequency if 'azoteq,clk-div' is present):
+ 0: 4 MHz (1 MHz)
+ 1: 2 MHz (500 kHz)
+ 2: 1 MHz (250 kHz)
+ 3: 500 kHz (125 kHz)
+
+ This property is unavailable in the case of the Hall channel. The
+ default value is a function of the channel and the device's RUI.
+
+ azoteq,ati-band-tighten:
+ type: boolean
+ description:
+ Tightens the ATI band from 1/8 to 1/16 of the desired target (ULP and
+ generic channels only).
+
+ azoteq,proj-enable:
+ type: boolean
+ description: Enables projected-capacitance sensing (ULP channel only).
+
+ azoteq,filt-str-np-cnt:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3]
+ default: 0
+ description:
+ Specifies the raw count filter strength during normal-power mode (ULP
+ and generic channels only).
+
+ azoteq,filt-str-lp-cnt:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3]
+ default: 0
+ description:
+ Specifies the raw count filter strength during low-power mode (ULP and
+ generic channels only).
+
+ azoteq,filt-str-np-lta:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3]
+ default: 0
+ description:
+ Specifies the long-term average filter strength during normal-power
+ mode (ULP and generic channels only).
+
+ azoteq,filt-str-lp-lta:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3]
+ default: 0
+ description:
+ Specifies the long-term average filter strength during low-power mode
+ (ULP and generic channels only).
+
+ azoteq,rx-enable:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 1
+ maxItems: 8
+ items:
+ minimum: 0
+ maximum: 7
+ description:
+ Specifies the CRX pin(s) associated with the channel.
+
+ This property is unavailable in the case of the Hall channel. The
+ default value is a function of the channel and the device's RUI.
+
+ azoteq,tx-enable:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 1
+ maxItems: 8
+ items:
+ minimum: 0
+ maximum: 7
+ description:
+ Specifies the TX pin(s) associated with the channel.
+
+ This property is unavailable in the case of the Hall channel. The
+ default value is a function of the channel and the device's RUI.
+
+ azoteq,local-cap-size:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3, 4]
+ default: 0
+ description: |
+ Specifies the capacitance to be added to the channel as follows:
+ 0: 0 pF
+ 1: 0.5 pF
+ 2: 1.0 pF
+ 3: 1.5 pF
+ 4: 2.0 pF
+
+ This property is unavailable in the case of the ULP or Hall channels.
+
+ azoteq,sense-mode:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 8, 9, 12, 14, 15]
+ description: |
+ Specifies the channel's sensing mode as follows:
+ 0: Self capacitance
+ 1: Projected capacitance
+ 8: Self inductance
+ 9: Mutual inductance
+ 12: External
+ 14: Hall effect
+ 15: Temperature
+
+ This property is unavailable in the case of the ULP or Hall channels.
+ The default value is a function of the channel and the device's RUI.
+
+ azoteq,tx-freq:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3]
+ default: 0
+ description: |
+ Specifies the inductive sensing excitation frequency as follows
+ (parenthesized numbers represent the frequency if 'azoteq,clk-div'
+ is present):
+ 0: 16 MHz (4 MHz)
+ 1: 8 MHz (2 MHz)
+ 2: 4 MHz (1 MHz)
+ 3: 2 MHz (500 kHz)
+
+ This property is unavailable in the case of the ULP or Hall channels.
+
+ azoteq,invert-enable:
+ type: boolean
+ description:
+ Inverts the polarity of the states reported for proximity, touch and
+ deep-touch events relative to their respective thresholds (generic
+ channels only).
+
+ azoteq,comp-disable:
+ type: boolean
+ description:
+ Disables compensation for the channel (generic channels only).
+
+ azoteq,static-enable:
+ type: boolean
+ description:
+ Enables the static front-end for the channel (generic channels only).
+
+ azoteq,assoc-select:
+ $ref: /schemas/types.yaml#/definitions/string-array
+ minItems: 1
+ maxItems: 6
+ items:
+ enum:
+ - ulp-0
+ - trackpad-3x2
+ - trackpad-3x3
+ - generic-0
+ - generic-1
+ - generic-2
+ - hall
+ description:
+ Specifies the associated channels for which the channel serves as a
+ reference channel. By default, no channels are selected. This prop-
+ erty is only available for the generic channels.
+
+ azoteq,assoc-weight:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 255
+ default: 0
+ description:
+ Specifies the channel's impact weight if it acts as an associated
+ channel (0 = 0% impact, 255 = 200% impact). This property is only
+ available for the generic channels.
+
+ patternProperties:
+ "^event-(prox|touch|deep)(-alt)?$":
+ type: object
+ description:
+ Represents a proximity, touch or deep-touch event reported by the
+ channel in response to a decrease in counts. Node names suffixed with
+ '-alt' instead correspond to an increase in counts.
+
+ By default, the long-term average tracks an increase in counts such
+ that only events corresponding to a decrease in counts are reported
+ (refer to the datasheet for more information).
+
+ Specify 'azoteq,dual-direction' to freeze the long-term average when
+ the counts increase or decrease such that events of either direction
+ can be reported. Alternatively, specify 'azoteq,invert-enable' to in-
+ vert the polarity of the states reported by the channel.
+
+ Complementary events (e.g. event-touch and event-touch-alt) can both
+ be present and specify different key or switch codes, but not differ-
+ ent thresholds or hysteresis (if applicable).
+
+ Proximity events are unavailable in the case of the Hall channel, and
+ deep-touch events are only available for the generic channels. Unless
+ otherwise specified, default values are a function of the channel and
+ the device's RUI.
+
+ properties:
+ azoteq,thresh:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 255
+ description: Specifies the threshold for the event.
+
+ azoteq,hyst:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 15
+ description:
+ Specifies the hysteresis for the event (touch and deep-touch
+ events only).
+
+ linux,code:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ description: Numeric key or switch code associated with the event.
+
+ linux,input-type:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [1, 5]
+ description:
+ Specifies whether the event is to be interpreted as a key (1) or
+ a switch (5). By default, Hall-channel events are interpreted as
+ switches and all others are interpreted as keys.
+
+ dependencies:
+ linux,input-type: ["linux,code"]
+
+ additionalProperties: false
+
+ dependencies:
+ azoteq,assoc-weight: ["azoteq,assoc-select"]
+
+ additionalProperties: false
+
+ "^trackpad-3x[2-3]$":
+ type: object
+ description:
+ Represents all channels associated with the trackpad. The channels are
+ collectively active if the trackpad is defined and inactive otherwise.
+
+ properties:
+ azoteq,ati-exclude:
+ type: boolean
+ description:
+ Prevents the trackpad channels from participating in an ATI event
+ that is manually triggered during initialization.
+
+ azoteq,reseed-disable:
+ type: boolean
+ description:
+ Prevents the trackpad channels from being reseeded if the long-term
+ average timeout (defined in 'azoteq,timeout-lta') expires.
+
+ azoteq,meas-cap-decrease:
+ type: boolean
+ description:
+ Decreases the internal measurement capacitance from 60 pF to 15 pF.
+
+ azoteq,rx-inactive:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1]
+ default: 0
+ description: |
+ Specifies how inactive CRX pins are to be terminated as follows:
+ 0: VSS
+ 1: Floating
+
+ azoteq,linearize:
+ type: boolean
+ description: Inverts the polarity of the trackpad's touch state.
+
+ azoteq,dual-direction:
+ type: boolean
+ description:
+ Specifies that the trackpad's long-term averages are to freeze in
+ the presence of either increasing or decreasing counts, thereby
+ permitting events to be reported in either direction.
+
+ azoteq,filt-disable:
+ type: boolean
+ description: Disables raw count filtering for the trackpad channels.
+
+ azoteq,ati-mode:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3]
+ default: 0
+ description: |
+ Specifies the trackpad's ATI mode as follows:
+ 0: Disabled
+ 1: Semi-partial
+ 2: Partial
+ 3: Full
+
+ azoteq,ati-base:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 6
+ maxItems: 9
+ items:
+ minimum: 45
+ maximum: 300
+ default: [45, 45, 45, 45, 45, 45, 45, 45, 45]
+ description: Specifies each individual trackpad channel's ATI base.
+
+ azoteq,ati-target:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ multipleOf: 32
+ minimum: 0
+ maximum: 2016
+ default: 0
+ description: Specifies the trackpad's ATI target.
+
+ azoteq,cct-increase:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 4
+ default: 0
+ description:
+ Specifies the degree to which the trackpad's charge cycle time is to
+ be increased, with 0 representing no increase.
+
+ azoteq,proj-bias:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3]
+ default: 0
+ description: |
+ Specifies the bias current applied during projected-capacitance
+ sensing as follows:
+ 0: 2.5 uA
+ 1: 5 uA
+ 2: 10 uA
+ 3: 20 uA
+
+ azoteq,sense-freq:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3]
+ default: 0
+ description: |
+ Specifies the trackpad's sensing frequency as follows (parenthesized
+ numbers represent the frequency if 'azoteq,clk-div' is present):
+ 0: 4 MHz (1 MHz)
+ 1: 2 MHz (500 kHz)
+ 2: 1 MHz (250 kHz)
+ 3: 500 kHz (125 kHz)
+
+ azoteq,ati-band-tighten:
+ type: boolean
+ description:
+ Tightens the ATI band from 1/8 to 1/16 of the desired target.
+
+ azoteq,thresh:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 6
+ maxItems: 9
+ items:
+ minimum: 0
+ maximum: 255
+ default: [0, 0, 0, 0, 0, 0, 0, 0, 0]
+ description:
+ Specifies each individual trackpad channel's touch threshold.
+
+ azoteq,hyst:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 15
+ default: 0
+ description: Specifies the trackpad's touch hysteresis.
+
+ azoteq,lta-update:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3, 4, 5, 6, 7]
+ default: 0
+ description: |
+ Specifies the update rate of the trackpad's long-term average during
+ ultra-low-power mode as follows:
+ 0: 2
+ 1: 4
+ 2: 8
+ 3: 16
+ 4: 32
+ 5: 64
+ 6: 128
+ 7: 255
+
+ azoteq,filt-str-trackpad:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3]
+ default: 0
+ description: Specifies the trackpad coordinate filter strength.
+
+ azoteq,filt-str-np-cnt:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3]
+ default: 0
+ description:
+ Specifies the raw count filter strength during normal-power mode.
+
+ azoteq,filt-str-lp-cnt:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ enum: [0, 1, 2, 3]
+ default: 0
+ description:
+ Specifies the raw count filter strength during low-power mode.
+
+ linux,keycodes:
+ $ref: /schemas/types.yaml#/definitions/uint32-array
+ minItems: 1
+ maxItems: 6
+ description: |
+ Specifies the numeric keycodes associated with each available gesture
+ in the following order (enter 0 for unused gestures):
+ 0: Positive flick or swipe in X direction
+ 1: Negative flick or swipe in X direction
+ 2: Positive flick or swipe in Y direction
+ 3: Negative flick or swipe in Y direction
+ 4: Tap
+ 5: Hold
+
+ azoteq,gesture-swipe:
+ type: boolean
+ description:
+ Directs the device to interpret axial gestures as a swipe (finger
+ remains on trackpad) instead of a flick (finger leaves trackpad).
+
+ azoteq,timeout-tap-ms:
+ multipleOf: 16
+ minimum: 0
+ maximum: 4080
+ default: 0
+ description:
+ Specifies the length of time (in ms) within which a trackpad touch
+ must be released in order to be interpreted as a tap.
+
+ azoteq,timeout-swipe-ms:
+ multipleOf: 16
+ minimum: 0
+ maximum: 4080
+ default: 0
+ description:
+ Specifies the length of time (in ms) within which an axial gesture
+ must be completed in order to be interpreted as a flick or swipe.
+
+ azoteq,thresh-swipe:
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 255
+ default: 0
+ description:
+ Specifies the number of points across which an axial gesture must
+ travel in order to be interpreted as a flick or swipe.
+
+ dependencies:
+ azoteq,gesture-swipe: ["linux,keycodes"]
+ azoteq,timeout-tap-ms: ["linux,keycodes"]
+ azoteq,timeout-swipe-ms: ["linux,keycodes"]
+ azoteq,thresh-swipe: ["linux,keycodes"]
+
+ additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - "#address-cells"
+ - "#size-cells"
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/input/input.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ iqs626a@44 {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ compatible = "azoteq,iqs626a";
+ reg = <0x44>;
+ interrupt-parent = <&gpio>;
+ interrupts = <17 IRQ_TYPE_LEVEL_LOW>;
+
+ azoteq,rate-np-ms = <16>;
+ azoteq,rate-lp-ms = <160>;
+
+ azoteq,timeout-pwr-ms = <2560>;
+ azoteq,timeout-lta-ms = <32768>;
+
+ ulp-0 {
+ azoteq,meas-cap-decrease;
+
+ azoteq,ati-base = <75>;
+ azoteq,ati-target = <1024>;
+
+ azoteq,rx-enable = <2>, <3>, <4>,
+ <5>, <6>, <7>;
+
+ event-prox {
+ linux,code = <KEY_POWER>;
+ };
+ };
+
+ trackpad-3x3 {
+ azoteq,filt-str-np-cnt = <1>;
+ azoteq,filt-str-lp-cnt = <1>;
+
+ azoteq,hyst = <4>;
+ azoteq,thresh = <35>, <40>, <40>,
+ <38>, <33>, <38>,
+ <35>, <35>, <35>;
+
+ azoteq,ati-mode = <3>;
+ azoteq,ati-base = <195>, <195>, <195>,
+ <195>, <195>, <195>,
+ <195>, <195>, <195>;
+ azoteq,ati-target = <512>;
+
+ azoteq,proj-bias = <1>;
+ azoteq,sense-freq = <2>;
+
+ linux,keycodes = <KEY_VOLUMEUP>,
+ <KEY_VOLUMEDOWN>,
+ <KEY_NEXTSONG>,
+ <KEY_PREVIOUSSONG>,
+ <KEY_PLAYPAUSE>,
+ <KEY_STOPCD>;
+
+ azoteq,gesture-swipe;
+ azoteq,timeout-swipe-ms = <800>;
+ azoteq,timeout-tap-ms = <400>;
+ azoteq,thresh-swipe = <40>;
+ };
+
+ /*
+ * Preserve the default register settings for
+ * the temperature-tracking channel leveraged
+ * by reset user interface (RUI) 1.
+ *
+ * Scalar properties (e.g. ATI mode) are left
+ * untouched by simply omitting them; boolean
+ * properties must be specified explicitly as
+ * needed.
+ */
+ generic-2 {
+ azoteq,reseed-disable;
+ azoteq,meas-cap-decrease;
+ azoteq,dual-direction;
+ azoteq,comp-disable;
+ azoteq,static-enable;
+ };
+
+ hall {
+ azoteq,reseed-disable;
+ azoteq,meas-cap-decrease;
+
+ event-touch {
+ linux,code = <SW_LID>;
+ };
+ };
+ };
+ };
+
+...
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/touchscreen/azoteq,iqs5xx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Azoteq IQS550/572/525 Trackpad/Touchscreen Controller
+
+maintainers:
+ - Jeff LaBundy <jeff@labundy.com>
+
+description: |
+ The Azoteq IQS550, IQS572 and IQS525 trackpad and touchscreen controllers
+ employ projected-capacitance sensing and can track up to five independent
+ contacts.
+
+ Link to datasheet: https://www.azoteq.com/
+
+allOf:
+ - $ref: touchscreen.yaml#
+
+properties:
+ compatible:
+ enum:
+ - azoteq,iqs550
+ - azoteq,iqs572
+ - azoteq,iqs525
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ reset-gpios:
+ maxItems: 1
+
+ wakeup-source: true
+
+ touchscreen-size-x: true
+ touchscreen-size-y: true
+ touchscreen-inverted-x: true
+ touchscreen-inverted-y: true
+ touchscreen-swapped-x-y: true
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ touchscreen@74 {
+ compatible = "azoteq,iqs550";
+ reg = <0x74>;
+ interrupt-parent = <&gpio>;
+ interrupts = <27 IRQ_TYPE_LEVEL_HIGH>;
+ reset-gpios = <&gpio 22 (GPIO_ACTIVE_LOW |
+ GPIO_PUSH_PULL)>;
+
+ touchscreen-size-x = <800>;
+ touchscreen-size-y = <480>;
+ };
+ };
+
+...
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/touchscreen/hycon,hy46xx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Hycon HY46XX series touchscreen controller bindings
+
+description: |
+ There are 6 variants of the chip for various touch panel sizes and cover lens material
+ Glass: 0.3mm--4.0mm
+ PET/PMMA: 0.2mm--2.0mm
+ HY4613(B)-N048 < 6"
+ HY4614(B)-N068 7" .. 10.1"
+ HY4621-NS32 < 5"
+ HY4623-NS48 5.1" .. 7"
+ Glass: 0.3mm--8.0mm
+ PET/PMMA: 0.2mm--4.0mm
+ HY4633(B)-N048 < 6"
+ HY4635(B)-N048 < 7" .. 10.1"
+
+maintainers:
+ - Giulio Benetti <giulio.benetti@benettiengineering.com>
+
+allOf:
+ - $ref: touchscreen.yaml#
+
+properties:
+ compatible:
+ enum:
+ - hycon,hy4613
+ - hycon,hy4614
+ - hycon,hy4621
+ - hycon,hy4623
+ - hycon,hy4633
+ - hycon,hy4635
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ reset-gpios:
+ maxItems: 1
+
+ vcc-supply: true
+
+ hycon,threshold:
+ description: Allows setting the sensitivity in the range from 0 to 255.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 255
+
+ hycon,glove-enable:
+ type: boolean
+ description: Allows enabling glove setting.
+
+ hycon,report-speed-hz:
+ description: Allows setting the report speed in Hertz.
+ minimum: 1
+ maximum: 255
+
+ hycon,noise-filter-enable:
+ type: boolean
+ description: Allows enabling power noise filter.
+
+ hycon,filter-data:
+ description: Allows setting how many samples throw before reporting touch
+ in the range from 0 to 5.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 5
+
+ hycon,gain:
+ description: Allows setting the sensitivity distance in the range from 0 to 5.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 5
+
+ hycon,edge-offset:
+ description: Allows setting the edge compensation in the range from 0 to 16.
+ $ref: /schemas/types.yaml#/definitions/uint32
+ minimum: 0
+ maximum: 16
+
+ touchscreen-size-x: true
+ touchscreen-size-y: true
+ touchscreen-fuzz-x: true
+ touchscreen-fuzz-y: true
+ touchscreen-inverted-x: true
+ touchscreen-inverted-y: true
+ touchscreen-swapped-x-y: true
+ interrupt-controller: true
+
+additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - interrupts
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ touchscreen@1c {
+ compatible = "hycon,hy4633";
+ reg = <0x1c>;
+ interrupt-parent = <&gpio2>;
+ interrupts = <5 IRQ_TYPE_EDGE_FALLING>;
+ reset-gpios = <&gpio2 6 GPIO_ACTIVE_LOW>;
+ };
+ };
+
+...
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/touchscreen/ilitek_ts_i2c.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Ilitek I2C Touchscreen Controller
+
+maintainers:
+ - Dmitry Torokhov <dmitry.torokhov@gmail.com>
+
+allOf:
+ - $ref: touchscreen.yaml#
+
+properties:
+ compatible:
+ enum:
+ - ilitek,ili2130
+ - ilitek,ili2131
+ - ilitek,ili2132
+ - ilitek,ili2316
+ - ilitek,ili2322
+ - ilitek,ili2323
+ - ilitek,ili2326
+ - ilitek,ili2520
+ - ilitek,ili2521
+
+ reg:
+ const: 0x41
+
+ interrupts:
+ maxItems: 1
+
+ reset-gpios:
+ maxItems: 1
+
+ wakeup-source:
+ type: boolean
+ description: touchscreen can be used as a wakeup source.
+
+ touchscreen-size-x: true
+ touchscreen-size-y: true
+ touchscreen-inverted-x: true
+ touchscreen-inverted-y: true
+ touchscreen-swapped-x-y: true
+
+additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - reset-gpios
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/gpio/gpio.h>
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ touchscreen@41 {
+ compatible = "ilitek,ili2520";
+ reg = <0x41>;
+
+ interrupt-parent = <&gpio1>;
+ interrupts = <7 IRQ_TYPE_LEVEL_LOW>;
+ reset-gpios = <&gpio1 8 GPIO_ACTIVE_LOW>;
+ touchscreen-inverted-y;
+ wakeup-source;
+ };
+ };
+++ /dev/null
-Azoteq IQS550/572/525 Trackpad/Touchscreen Controller
-
-Required properties:
-
-- compatible : Must be equal to one of the following:
- "azoteq,iqs550"
- "azoteq,iqs572"
- "azoteq,iqs525"
-
-- reg : I2C slave address for the device.
-
-- interrupts : GPIO to which the device's active-high RDY
- output is connected (see [0]).
-
-- reset-gpios : GPIO to which the device's active-low NRST
- input is connected (see [1]).
-
-Optional properties:
-
-- touchscreen-min-x : See [2].
-
-- touchscreen-min-y : See [2].
-
-- touchscreen-size-x : See [2]. If this property is omitted, the
- maximum x-coordinate is specified by the
- device's "X Resolution" register.
-
-- touchscreen-size-y : See [2]. If this property is omitted, the
- maximum y-coordinate is specified by the
- device's "Y Resolution" register.
-
-- touchscreen-max-pressure : See [2]. Pressure is expressed as the sum of
- the deltas across all channels impacted by a
- touch event. A channel's delta is calculated
- as its count value minus a reference, where
- the count value is inversely proportional to
- the channel's capacitance.
-
-- touchscreen-fuzz-x : See [2].
-
-- touchscreen-fuzz-y : See [2].
-
-- touchscreen-fuzz-pressure : See [2].
-
-- touchscreen-inverted-x : See [2]. Inversion is applied relative to that
- which may already be specified by the device's
- FLIP_X and FLIP_Y register fields.
-
-- touchscreen-inverted-y : See [2]. Inversion is applied relative to that
- which may already be specified by the device's
- FLIP_X and FLIP_Y register fields.
-
-- touchscreen-swapped-x-y : See [2]. Swapping is applied relative to that
- which may already be specified by the device's
- SWITCH_XY_AXIS register field.
-
-[0]: Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
-[1]: Documentation/devicetree/bindings/gpio/gpio.txt
-[2]: Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt
-
-Example:
-
- &i2c1 {
- /* ... */
-
- touchscreen@74 {
- compatible = "azoteq,iqs550";
- reg = <0x74>;
- interrupt-parent = <&gpio>;
- interrupts = <17 4>;
- reset-gpios = <&gpio 27 1>;
-
- touchscreen-size-x = <640>;
- touchscreen-size-y = <480>;
-
- touchscreen-max-pressure = <16000>;
- };
-
- /* ... */
- };
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/touchscreen/melfas,mms114.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Melfas MMS114 family touchscreen controller bindings
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+allOf:
+ - $ref: touchscreen.yaml#
+
+properties:
+ $nodename:
+ pattern: "^touchscreen(@.*)?$"
+
+ compatible:
+ items:
+ - enum:
+ - melfas,mms114
+ - melfas,mms134s
+ - melfas,mms136
+ - melfas,mms152
+ - melfas,mms345l
+
+ reg:
+ description: I2C address
+
+ clock-frequency:
+ description: I2C client clock frequency, defined for host
+ minimum: 100000
+ maximum: 400000
+
+ interrupts:
+ maxItems: 1
+
+ avdd-supply:
+ description: Analog power supply regulator on AVDD pin
+
+ vdd-supply:
+ description: Digital power supply regulator on VDD pin
+
+ touchscreen-size-x: true
+ touchscreen-size-y: true
+ touchscreen-fuzz-x: true
+ touchscreen-fuzz-y: true
+ touchscreen-fuzz-pressure: true
+ touchscreen-inverted-x: true
+ touchscreen-inverted-y: true
+ touchscreen-swapped-x-y: true
+ touchscreen-max-pressure: true
+
+additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - touchscreen-size-x
+ - touchscreen-size-y
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ touchscreen@48 {
+ compatible = "melfas,mms114";
+ reg = <0x48>;
+ interrupt-parent = <&gpio>;
+ interrupts = <39 IRQ_TYPE_EDGE_FALLING>;
+ avdd-supply = <&ldo1_reg>;
+ vdd-supply = <&ldo2_reg>;
+ touchscreen-size-x = <720>;
+ touchscreen-size-y = <1280>;
+ touchscreen-fuzz-x = <10>;
+ touchscreen-fuzz-y = <10>;
+ touchscreen-fuzz-pressure = <10>;
+ touchscreen-inverted-x;
+ touchscreen-inverted-y;
+ };
+ };
+
+...
+++ /dev/null
-* MELFAS MMS114/MMS152/MMS345L touchscreen controller
-
-Required properties:
-- compatible: should be one of:
- - "melfas,mms114"
- - "melfas,mms152"
- - "melfas,mms345l"
-- reg: I2C address of the chip
-- interrupts: interrupt to which the chip is connected
-- touchscreen-size-x: See [1]
-- touchscreen-size-y: See [1]
-
-Optional properties:
-- touchscreen-fuzz-x: See [1]
-- touchscreen-fuzz-y: See [1]
-- touchscreen-fuzz-pressure: See [1]
-- touchscreen-inverted-x: See [1]
-- touchscreen-inverted-y: See [1]
-- touchscreen-swapped-x-y: See [1]
-
-[1]: Documentation/devicetree/bindings/input/touchscreen/touchscreen.txt
-
-Example:
-
- i2c@00000000 {
- /* ... */
-
- touchscreen@48 {
- compatible = "melfas,mms114";
- reg = <0x48>;
- interrupts = <39 0>;
- touchscreen-size-x = <720>;
- touchscreen-size-y = <1280>;
- touchscreen-fuzz-x = <10>;
- touchscreen-fuzz-y = <10>;
- touchscreen-fuzz-pressure = <10>;
- touchscreen-inverted-x;
- touchscreen-inverted-y;
- };
-
- /* ... */
- };
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/touchscreen/mstar,msg2638.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MStar msg2638 touchscreen controller Bindings
+
+maintainers:
+ - Vincent Knecht <vincent.knecht@mailoo.org>
+
+allOf:
+ - $ref: touchscreen.yaml#
+
+properties:
+ compatible:
+ const: mstar,msg2638
+
+ reg:
+ const: 0x26
+
+ interrupts:
+ maxItems: 1
+
+ reset-gpios:
+ maxItems: 1
+
+ vdd-supply:
+ description: Power supply regulator for the chip
+
+ vddio-supply:
+ description: Power supply regulator for the I2C bus
+
+ touchscreen-size-x: true
+ touchscreen-size-y: true
+
+additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - reset-gpios
+ - touchscreen-size-x
+ - touchscreen-size-y
+
+examples:
+ - |
+ #include <dt-bindings/gpio/gpio.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+ i2c {
+ #address-cells = <1>;
+ #size-cells = <0>;
+ touchscreen@26 {
+ compatible = "mstar,msg2638";
+ reg = <0x26>;
+ interrupt-parent = <&msmgpio>;
+ interrupts = <13 IRQ_TYPE_EDGE_FALLING>;
+ reset-gpios = <&msmgpio 100 GPIO_ACTIVE_LOW>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&ts_int_reset_default>;
+ vdd-supply = <&pm8916_l17>;
+ vddio-supply = <&pm8916_l5>;
+ touchscreen-size-x = <2048>;
+ touchscreen-size-y = <2048>;
+ };
+ };
+
+...
reg:
maxItems: 1
+ interrupts:
+ maxItems: 1
+
interrupt-controller: true
required:
- compatible
- reg
- interrupt-controller
+ - interrupts
additionalProperties: false
- interrupts
- clocks
- power-domains
- - resets
-
-if:
- properties:
- compatible:
- contains:
- enum:
- - renesas,vin-r8a7778
- - renesas,vin-r8a7779
- - renesas,rcar-gen2-vin
-then:
- required:
- - port
-else:
- required:
- - renesas,id
- - ports
+
+allOf:
+ - if:
+ not:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - renesas,vin-r8a7778
+ - renesas,vin-r8a7779
+ then:
+ required:
+ - resets
+
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - renesas,vin-r8a7778
+ - renesas,vin-r8a7779
+ - renesas,rcar-gen2-vin
+ then:
+ required:
+ - port
+ else:
+ required:
+ - renesas,id
+ - ports
additionalProperties: false
+++ /dev/null
-Sigma Designs Tango4 NAND Flash Controller (NFC)
-
-Required properties:
-
-- compatible: "sigma,smp8758-nand"
-- reg: address/size of nfc_reg, nfc_mem, and pbus_reg
-- dmas: reference to the DMA channel used by the controller
-- dma-names: "rxtx"
-- clocks: reference to the system clock
-- #address-cells: <1>
-- #size-cells: <0>
-
-Children nodes represent the available NAND chips.
-See Documentation/devicetree/bindings/mtd/nand-controller.yaml for generic bindings.
-
-Example:
-
- nandc: nand-controller@2c000 {
- compatible = "sigma,smp8758-nand";
- reg = <0x2c000 0x30>, <0x2d000 0x800>, <0x20000 0x1000>;
- dmas = <&dma0 3>;
- dma-names = "rxtx";
- clocks = <&clkgen SYS_CLK>;
- #address-cells = <1>;
- #size-cells = <0>;
-
- nand@0 {
- reg = <0>; /* CS0 */
- nand-ecc-strength = <14>;
- nand-ecc-step-size = <1024>;
- };
-
- nand@1 {
- reg = <1>; /* CS1 */
- nand-ecc-strength = <14>;
- nand-ecc-step-size = <1024>;
- };
- };
clocks:
minItems: 1
- maxItems: 2
items:
- description: AVB functional clock
- description: Optional TXC reference clock
clock-names:
+ minItems: 1
items:
- const: fck
- const: refclk
+++ /dev/null
-HiSilicon Hip05 and Hip06 PCIe host bridge DT description
-
-HiSilicon PCIe host controller is based on the Synopsys DesignWare PCI core.
-It shares common functions with the PCIe DesignWare core driver and inherits
-common properties defined in
-Documentation/devicetree/bindings/pci/designware-pcie.txt.
-
-Additional properties are described here:
-
-Required properties
-- compatible: Should contain "hisilicon,hip05-pcie" or "hisilicon,hip06-pcie".
-- reg: Should contain rc_dbi, config registers location and length.
-- reg-names: Must include the following entries:
- "rc_dbi": controller configuration registers;
- "config": PCIe configuration space registers.
-- msi-parent: Should be its_pcie which is an ITS receiving MSI interrupts.
-- port-id: Should be 0, 1, 2 or 3.
-
-Optional properties:
-- status: Either "ok" or "disabled".
-- dma-coherent: Present if DMA operations are coherent.
-
-Hip05 Example (note that Hip06 is the same except compatible):
- pcie@b0080000 {
- compatible = "hisilicon,hip05-pcie", "snps,dw-pcie";
- reg = <0 0xb0080000 0 0x10000>, <0x220 0x00000000 0 0x2000>;
- reg-names = "rc_dbi", "config";
- bus-range = <0 15>;
- msi-parent = <&its_pcie>;
- #address-cells = <3>;
- #size-cells = <2>;
- device_type = "pci";
- dma-coherent;
- ranges = <0x82000000 0 0x00000000 0x220 0x00000000 0 0x10000000>;
- num-lanes = <8>;
- port-id = <1>;
- #interrupt-cells = <1>;
- interrupt-map-mask = <0xf800 0 0 7>;
- interrupt-map = <0x0 0 0 1 &mbigen_pcie 1 10
- 0x0 0 0 2 &mbigen_pcie 2 11
- 0x0 0 0 3 &mbigen_pcie 3 12
- 0x0 0 0 4 &mbigen_pcie 4 13>;
- };
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pci/mediatek-pcie-gen3.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Gen3 PCIe controller on MediaTek SoCs
+
+maintainers:
+ - Jianjun Wang <jianjun.wang@mediatek.com>
+
+description: |+
+ PCIe Gen3 MAC controller for MediaTek SoCs, it supports Gen3 speed
+ and compatible with Gen2, Gen1 speed.
+
+ This PCIe controller supports up to 256 MSI vectors, the MSI hardware
+ block diagram is as follows:
+
+ +-----+
+ | GIC |
+ +-----+
+ ^
+ |
+ port->irq
+ |
+ +-+-+-+-+-+-+-+-+
+ |0|1|2|3|4|5|6|7| (PCIe intc)
+ +-+-+-+-+-+-+-+-+
+ ^ ^ ^
+ | | ... |
+ +-------+ +------+ +-----------+
+ | | |
+ +-+-+---+--+--+ +-+-+---+--+--+ +-+-+---+--+--+
+ |0|1|...|30|31| |0|1|...|30|31| |0|1|...|30|31| (MSI sets)
+ +-+-+---+--+--+ +-+-+---+--+--+ +-+-+---+--+--+
+ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^
+ | | | | | | | | | | | | (MSI vectors)
+ | | | | | | | | | | | |
+
+ (MSI SET0) (MSI SET1) ... (MSI SET7)
+
+ With 256 MSI vectors supported, the MSI vectors are composed of 8 sets,
+ each set has its own address for MSI message, and supports 32 MSI vectors
+ to generate interrupt.
+
+allOf:
+ - $ref: /schemas/pci/pci-bus.yaml#
+
+properties:
+ compatible:
+ const: mediatek,mt8192-pcie
+
+ reg:
+ maxItems: 1
+
+ reg-names:
+ items:
+ - const: pcie-mac
+
+ interrupts:
+ maxItems: 1
+
+ ranges:
+ minItems: 1
+ maxItems: 8
+
+ resets:
+ minItems: 1
+ maxItems: 2
+
+ reset-names:
+ minItems: 1
+ maxItems: 2
+ items:
+ - const: phy
+ - const: mac
+
+ clocks:
+ maxItems: 6
+
+ clock-names:
+ items:
+ - const: pl_250m
+ - const: tl_26m
+ - const: tl_96m
+ - const: tl_32k
+ - const: peri_26m
+ - const: top_133m
+
+ assigned-clocks:
+ maxItems: 1
+
+ assigned-clock-parents:
+ maxItems: 1
+
+ phys:
+ maxItems: 1
+
+ '#interrupt-cells':
+ const: 1
+
+ interrupt-controller:
+ description: Interrupt controller node for handling legacy PCI interrupts.
+ type: object
+ properties:
+ '#address-cells':
+ const: 0
+ '#interrupt-cells':
+ const: 1
+ interrupt-controller: true
+
+ required:
+ - '#address-cells'
+ - '#interrupt-cells'
+ - interrupt-controller
+
+ additionalProperties: false
+
+required:
+ - compatible
+ - reg
+ - reg-names
+ - interrupts
+ - ranges
+ - clocks
+ - '#interrupt-cells'
+ - interrupt-controller
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/interrupt-controller/irq.h>
+
+ bus {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ pcie: pcie@11230000 {
+ compatible = "mediatek,mt8192-pcie";
+ device_type = "pci";
+ #address-cells = <3>;
+ #size-cells = <2>;
+ reg = <0x00 0x11230000 0x00 0x4000>;
+ reg-names = "pcie-mac";
+ interrupts = <GIC_SPI 251 IRQ_TYPE_LEVEL_HIGH 0>;
+ bus-range = <0x00 0xff>;
+ ranges = <0x82000000 0x00 0x12000000 0x00
+ 0x12000000 0x00 0x1000000>;
+ clocks = <&infracfg 44>,
+ <&infracfg 40>,
+ <&infracfg 43>,
+ <&infracfg 97>,
+ <&infracfg 99>,
+ <&infracfg 111>;
+ clock-names = "pl_250m", "tl_26m", "tl_96m",
+ "tl_32k", "peri_26m", "top_133m";
+ assigned-clocks = <&topckgen 50>;
+ assigned-clock-parents = <&topckgen 91>;
+
+ phys = <&pciephy>;
+ phy-names = "pcie-phy";
+
+ resets = <&infracfg_rst 2>,
+ <&infracfg_rst 3>;
+ reset-names = "phy", "mac";
+
+ #interrupt-cells = <1>;
+ interrupt-map-mask = <0 0 0 0x7>;
+ interrupt-map = <0 0 0 1 &pcie_intc 0>,
+ <0 0 0 2 &pcie_intc 1>,
+ <0 0 0 3 &pcie_intc 2>,
+ <0 0 0 4 &pcie_intc 3>;
+ pcie_intc: interrupt-controller {
+ #address-cells = <0>;
+ #interrupt-cells = <1>;
+ interrupt-controller;
+ };
+ };
+ };
properties:
compatible:
oneOf:
+ - const: renesas,pcie-r8a7779 # R-Car H1
- items:
- enum:
- renesas,pcie-r8a7742 # RZ/G1H
- clocks
- clock-names
- power-domains
- - resets
+
+if:
+ not:
+ properties:
+ compatible:
+ contains:
+ const: renesas,pcie-r8a7779
+then:
+ required:
+ - resets
unevaluatedProperties: false
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pci/sifive,fu740-pcie.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SiFive FU740 PCIe host controller
+
+description: |+
+ SiFive FU740 PCIe host controller is based on the Synopsys DesignWare
+ PCI core. It shares common features with the PCIe DesignWare core and
+ inherits common properties defined in
+ Documentation/devicetree/bindings/pci/designware-pcie.txt.
+
+maintainers:
+ - Paul Walmsley <paul.walmsley@sifive.com>
+ - Greentime Hu <greentime.hu@sifive.com>
+
+allOf:
+ - $ref: /schemas/pci/pci-bus.yaml#
+
+properties:
+ compatible:
+ const: sifive,fu740-pcie
+
+ reg:
+ maxItems: 3
+
+ reg-names:
+ items:
+ - const: dbi
+ - const: config
+ - const: mgmt
+
+ num-lanes:
+ const: 8
+
+ msi-parent: true
+
+ interrupt-names:
+ items:
+ - const: msi
+ - const: inta
+ - const: intb
+ - const: intc
+ - const: intd
+
+ resets:
+ description: A phandle to the PCIe power up reset line.
+ maxItems: 1
+
+ pwren-gpios:
+ description: Should specify the GPIO for controlling the PCI bus device power on.
+ maxItems: 1
+
+ reset-gpios:
+ maxItems: 1
+
+required:
+ - dma-coherent
+ - num-lanes
+ - interrupts
+ - interrupt-names
+ - interrupt-parent
+ - interrupt-map-mask
+ - interrupt-map
+ - clock-names
+ - clocks
+ - resets
+ - pwren-gpios
+ - reset-gpios
+
+unevaluatedProperties: false
+
+examples:
+ - |
+ bus {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ #include <dt-bindings/clock/sifive-fu740-prci.h>
+
+ pcie@e00000000 {
+ compatible = "sifive,fu740-pcie";
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+ reg = <0xe 0x00000000 0x0 0x80000000>,
+ <0xd 0xf0000000 0x0 0x10000000>,
+ <0x0 0x100d0000 0x0 0x1000>;
+ reg-names = "dbi", "config", "mgmt";
+ device_type = "pci";
+ dma-coherent;
+ bus-range = <0x0 0xff>;
+ ranges = <0x81000000 0x0 0x60080000 0x0 0x60080000 0x0 0x10000>, /* I/O */
+ <0x82000000 0x0 0x60090000 0x0 0x60090000 0x0 0xff70000>, /* mem */
+ <0x82000000 0x0 0x70000000 0x0 0x70000000 0x0 0x1000000>, /* mem */
+ <0xc3000000 0x20 0x00000000 0x20 0x00000000 0x20 0x00000000>; /* mem prefetchable */
+ num-lanes = <0x8>;
+ interrupts = <56>, <57>, <58>, <59>, <60>, <61>, <62>, <63>, <64>;
+ interrupt-names = "msi", "inta", "intb", "intc", "intd";
+ interrupt-parent = <&plic0>;
+ interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+ interrupt-map = <0x0 0x0 0x0 0x1 &plic0 57>,
+ <0x0 0x0 0x0 0x2 &plic0 58>,
+ <0x0 0x0 0x0 0x3 &plic0 59>,
+ <0x0 0x0 0x0 0x4 &plic0 60>;
+ clock-names = "pcie_aux";
+ clocks = <&prci PRCI_CLK_PCIE_AUX>;
+ resets = <&prci 4>;
+ pwren-gpios = <&gpio 5 0>;
+ reset-gpios = <&gpio 8 0>;
+ };
+ };
+++ /dev/null
-Sigma Designs Tango PCIe controller
-
-Required properties:
-
-- compatible: "sigma,smp8759-pcie"
-- reg: address/size of PCI configuration space, address/size of register area
-- bus-range: defined by size of PCI configuration space
-- device_type: "pci"
-- #size-cells: <2>
-- #address-cells: <3>
-- msi-controller
-- ranges: translation from system to bus addresses
-- interrupts: spec for misc interrupts, spec for MSI
-
-Example:
-
- pcie@2e000 {
- compatible = "sigma,smp8759-pcie";
- reg = <0x50000000 0x400000>, <0x2e000 0x100>;
- bus-range = <0 3>;
- device_type = "pci";
- #size-cells = <2>;
- #address-cells = <3>;
- msi-controller;
- ranges = <0x02000000 0x0 0x00400000 0x50400000 0x0 0x3c00000>;
- interrupts =
- <54 IRQ_TYPE_LEVEL_HIGH>, /* misc interrupts */
- <55 IRQ_TYPE_LEVEL_HIGH>; /* MSI */
- };
properties:
compatible:
oneOf:
- - description: PCIe EP controller in J7200
+ - const: ti,j721e-pcie-ep
+ - description: PCIe EP controller in AM64
items:
- - const: ti,j7200-pcie-ep
+ - const: ti,am64-pcie-ep
- const: ti,j721e-pcie-ep
- - description: PCIe EP controller in J721E
+ - description: PCIe EP controller in J7200
items:
+ - const: ti,j7200-pcie-ep
- const: ti,j721e-pcie-ep
reg:
- power-domains
- clocks
- clock-names
- - dma-coherent
- max-functions
- phys
- phy-names
properties:
compatible:
oneOf:
- - description: PCIe controller in J7200
+ - const: ti,j721e-pcie-host
+ - description: PCIe controller in AM64
items:
- - const: ti,j7200-pcie-host
+ - const: ti,am64-pcie-host
- const: ti,j721e-pcie-host
- - description: PCIe controller in J721E
+ - description: PCIe controller in J7200
items:
+ - const: ti,j7200-pcie-host
- const: ti,j721e-pcie-host
reg:
maxItems: 1
clocks:
- maxItems: 1
- description: clock-specifier to represent input to the PCIe
+ minItems: 1
+ maxItems: 2
+ description: |+
+ clock-specifier to represent input to the PCIe for 1 item.
+ 2nd item if present represents reference clock to the connector.
clock-names:
+ minItems: 1
items:
- const: fck
+ - const: pcie_refclk
vendor-id:
const: 0x104c
- const: 0xb00d
- items:
- const: 0xb00f
+ - items:
+ - const: 0xb010
msi-map: true
- vendor-id
- device-id
- msi-map
- - dma-coherent
- dma-ranges
- ranges
- reset-gpios
- #address-cells: specifies the number of cells needed to encode an
address. The value must be 0.
+Optional properties:
+- dma-coherent: present if DMA operations are coherent
Example:
++++++++
Use rockchip,grf and rockchip,pmu described above instead.
Required properties for gpio sub nodes:
- - compatible: "rockchip,gpio-bank"
- - reg: register of the gpio bank (different than the iomux registerset)
- - interrupts: base interrupt of the gpio bank in the interrupt controller
- - clocks: clock that drives this bank
- - gpio-controller: identifies the node as a gpio controller and pin bank.
- - #gpio-cells: number of cells in GPIO specifier. Since the generic GPIO
- binding is used, the amount of cells must be specified as 2. See generic
- GPIO binding documentation for description of particular cells.
- - interrupt-controller: identifies the controller node as interrupt-parent.
- - #interrupt-cells: the value of this property should be 2 and the interrupt
- cells should use the standard two-cell scheme described in
- bindings/interrupt-controller/interrupts.txt
-
-Deprecated properties for gpio sub nodes:
- - compatible: "rockchip,rk3188-gpio-bank0"
- - reg: second element: separate pull register for rk3188 bank0, use
- rockchip,pmu described above instead
+See rockchip,gpio-bank.yaml
Required properties for pin configuration node:
- rockchip,pins: 3 integers array, represents a group of pins mux and config
pinctrl-names = "default";
pinctrl-0 = <&uart2_xfer>;
};
-
-Example for rk3188:
-
- pinctrl@20008000 {
- compatible = "rockchip,rk3188-pinctrl";
- rockchip,grf = <&grf>;
- rockchip,pmu = <&pmu>;
- #address-cells = <1>;
- #size-cells = <1>;
- ranges;
-
- gpio0: gpio0@2000a000 {
- compatible = "rockchip,rk3188-gpio-bank0";
- reg = <0x2000a000 0x100>;
- interrupts = <GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&clk_gates8 9>;
-
- gpio-controller;
- #gpio-cells = <2>;
-
- interrupt-controller;
- #interrupt-cells = <2>;
- };
-
- gpio1: gpio1@2003c000 {
- compatible = "rockchip,gpio-bank";
- reg = <0x2003c000 0x100>;
- interrupts = <GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&clk_gates8 10>;
-
- gpio-controller;
- #gpio-cells = <2>;
-
- interrupt-controller;
- #interrupt-cells = <2>;
- };
-
- ...
-
- };
+++ /dev/null
-Rockchip PWM controller
-
-Required properties:
- - compatible: should be "rockchip,<name>-pwm"
- "rockchip,rk2928-pwm": found on RK29XX,RK3066 and RK3188 SoCs
- "rockchip,rk3288-pwm": found on RK3288 SOC
- "rockchip,rv1108-pwm", "rockchip,rk3288-pwm": found on RV1108 SoC
- "rockchip,vop-pwm": found integrated in VOP on RK3288 SoC
- - reg: physical base address and length of the controller's registers
- - clocks: See ../clock/clock-bindings.txt
- - For older hardware (rk2928, rk3066, rk3188, rk3228, rk3288, rk3399):
- - There is one clock that's used both to derive the functional clock
- for the device and as the bus clock.
- - For newer hardware (rk3328 and future socs): specified by name
- - "pwm": This is used to derive the functional clock.
- - "pclk": This is the APB bus clock.
- - #pwm-cells: must be 2 (rk2928) or 3 (rk3288). See pwm.yaml in this directory
- for a description of the cell format.
-
-Example:
-
- pwm0: pwm@20030000 {
- compatible = "rockchip,rk2928-pwm";
- reg = <0x20030000 0x10>;
- clocks = <&cru PCLK_PWM01>;
- #pwm-cells = <2>;
- };
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pwm/pwm-rockchip.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Rockchip PWM controller
+
+maintainers:
+ - Heiko Stuebner <heiko@sntech.de>
+
+properties:
+ compatible:
+ oneOf:
+ - const: rockchip,rk2928-pwm
+ - const: rockchip,rk3288-pwm
+ - const: rockchip,rk3328-pwm
+ - const: rockchip,vop-pwm
+ - items:
+ - const: rockchip,rk3036-pwm
+ - const: rockchip,rk2928-pwm
+ - items:
+ - enum:
+ - rockchip,rk3368-pwm
+ - rockchip,rk3399-pwm
+ - rockchip,rv1108-pwm
+ - const: rockchip,rk3288-pwm
+ - items:
+ - enum:
+ - rockchip,px30-pwm
+ - rockchip,rk3308-pwm
+ - const: rockchip,rk3328-pwm
+
+ reg:
+ maxItems: 1
+
+ clocks:
+ minItems: 1
+ maxItems: 2
+
+ clock-names:
+ maxItems: 2
+
+ "#pwm-cells":
+ enum: [2, 3]
+ description:
+ Must be 2 (rk2928) or 3 (rk3288 and later).
+ See pwm.yaml for a description of the cell format.
+
+required:
+ - compatible
+ - reg
+ - "#pwm-cells"
+
+if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - rockchip,rk3328-pwm
+ - rockchip,rv1108-pwm
+
+then:
+ properties:
+ clocks:
+ items:
+ - description: Used to derive the functional clock for the device.
+ - description: Used as the APB bus clock.
+
+ clock-names:
+ items:
+ - const: pwm
+ - const: pclk
+
+ required:
+ - clocks
+ - clock-names
+
+else:
+ properties:
+ clocks:
+ maxItems: 1
+ description:
+ Used both to derive the functional clock
+ for the device and as the bus clock.
+
+ required:
+ - clocks
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/rk3188-cru-common.h>
+ pwm0: pwm@20030000 {
+ compatible = "rockchip,rk2928-pwm";
+ reg = <0x20030000 0x10>;
+ clocks = <&cru PCLK_PWM01>;
+ #pwm-cells = <2>;
+ };
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pwm/toshiba,pwm-visconti.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Toshiba Visconti PWM Controller
+
+maintainers:
+ - Nobuhiro Iwamatsu <nobuhiro1.iwamatsu@toshiba.co.jp>
+
+properties:
+ compatible:
+ items:
+ - const: toshiba,visconti-pwm
+
+ reg:
+ maxItems: 1
+
+ '#pwm-cells':
+ const: 2
+
+required:
+ - compatible
+ - reg
+ - '#pwm-cells'
+
+additionalProperties: false
+
+examples:
+ - |
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+
+ pwm: pwm@241c0000 {
+ compatible = "toshiba,visconti-pwm";
+ reg = <0 0x241c0000 0 0x1000>;
+ pinctrl-names = "default";
+ pinctrl-0 = <&pwm_mux>;
+ #pwm-cells = <2>;
+ };
+ };
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/remoteproc/fsl,imx-rproc.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: NXP i.MX Co-Processor Bindings
+
+description:
+ This binding provides support for ARM Cortex M4 Co-processor found on some NXP iMX SoCs.
+
+maintainers:
+ - Peng Fan <peng.fan@nxp.com>
+
+properties:
+ compatible:
+ enum:
+ - fsl,imx8mq-cm4
+ - fsl,imx8mm-cm4
+ - fsl,imx7d-cm4
+ - fsl,imx6sx-cm4
+
+ clocks:
+ maxItems: 1
+
+ syscon:
+ $ref: /schemas/types.yaml#/definitions/phandle
+ description:
+ Phandle to syscon block which provide access to System Reset Controller
+
+ mbox-names:
+ items:
+ - const: tx
+ - const: rx
+ - const: rxdb
+
+ mboxes:
+ description:
+ This property is required only if the rpmsg/virtio functionality is used.
+ List of <&phandle type channel> - 1 channel for TX, 1 channel for RX, 1 channel for RXDB.
+ (see mailbox/fsl,mu.yaml)
+ minItems: 1
+ maxItems: 3
+
+ memory-region:
+ description:
+ If present, a phandle for a reserved memory area that used for vdev buffer,
+ resource table, vring region and others used by remote processor.
+ minItems: 1
+ maxItems: 32
+
+required:
+ - compatible
+ - clocks
+ - syscon
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/clock/imx7d-clock.h>
+ m4_reserved_sysmem1: cm4@80000000 {
+ reg = <0x80000000 0x80000>;
+ };
+
+ m4_reserved_sysmem2: cm4@81000000 {
+ reg = <0x81000000 0x80000>;
+ };
+
+ imx7d-cm4 {
+ compatible = "fsl,imx7d-cm4";
+ memory-region = <&m4_reserved_sysmem1>, <&m4_reserved_sysmem2>;
+ syscon = <&src>;
+ clocks = <&clks IMX7D_ARM_M4_ROOT_CLK>;
+ };
+
+ - |
+ #include <dt-bindings/clock/imx8mm-clock.h>
+
+ imx8mm-cm4 {
+ compatible = "fsl,imx8mm-cm4";
+ clocks = <&clk IMX8MM_CLK_M4_DIV>;
+ mbox-names = "tx", "rx", "rxdb";
+ mboxes = <&mu 0 1
+ &mu 1 1
+ &mu 3 1>;
+ memory-region = <&vdev0buffer>, <&vdev0vring0>, <&vdev0vring1>, <&rsc_table>;
+ syscon = <&src>;
+ };
+...
+++ /dev/null
-NXP iMX6SX/iMX7D Co-Processor Bindings
-----------------------------------------
-
-This binding provides support for ARM Cortex M4 Co-processor found on some
-NXP iMX SoCs.
-
-Required properties:
-- compatible Should be one of:
- "fsl,imx7d-cm4"
- "fsl,imx6sx-cm4"
-- clocks Clock for co-processor (See: ../clock/clock-bindings.txt)
-- syscon Phandle to syscon block which provide access to
- System Reset Controller
-
-Optional properties:
-- memory-region list of phandels to the reserved memory regions.
- (See: ../reserved-memory/reserved-memory.txt)
-
-Example:
- m4_reserved_sysmem1: cm4@80000000 {
- reg = <0x80000000 0x80000>;
- };
-
- m4_reserved_sysmem2: cm4@81000000 {
- reg = <0x81000000 0x80000>;
- };
-
- imx7d-cm4 {
- compatible = "fsl,imx7d-cm4";
- memory-region = <&m4_reserved_sysmem1>, <&m4_reserved_sysmem2>;
- syscon = <&src>;
- clocks = <&clks IMX7D_ARM_M4_ROOT_CLK>;
- };
"qcom,sc7180-mpss-pas"
"qcom,sdm845-adsp-pas"
"qcom,sdm845-cdsp-pas"
+ "qcom,sdx55-mpss-pas"
"qcom,sm8150-adsp-pas"
"qcom,sm8150-cdsp-pas"
"qcom,sm8150-mpss-pas"
must be "wdog", "fatal", "ready", "handover", "stop-ack"
qcom,qcs404-wcss-pas:
qcom,sc7180-mpss-pas:
+ qcom,sdx55-mpss-pas:
qcom,sm8150-mpss-pas:
qcom,sm8350-mpss-pas:
must be "wdog", "fatal", "ready", "handover", "stop-ack",
qcom,sm8150-mpss-pas:
qcom,sm8350-mpss-pas:
must be "cx", "load_state", "mss"
+ qcom,sdx55-mpss-pas:
+ must be "cx", "mss"
qcom,sm8250-adsp-pas:
qcom,sm8350-adsp-pas:
qcom,sm8150-slpi-pas:
Definition: must be one of:
"qcom,q6v5-pil",
"qcom,ipq8074-wcss-pil"
+ "qcom,qcs404-wcss-pil"
"qcom,msm8916-mss-pil",
"qcom,msm8974-mss-pil"
"qcom,msm8996-mss-pil"
string:
qcom,q6v5-pil:
qcom,ipq8074-wcss-pil:
+ qcom,qcs404-wcss-pil:
qcom,msm8916-mss-pil:
qcom,msm8974-mss-pil:
must be "wdog", "fatal", "ready", "handover", "stop-ack"
Definition: The clocks needed depend on the compatible string:
qcom,ipq8074-wcss-pil:
no clock names required
+ qcom,qcs404-wcss-pil:
+ must be "xo", "gcc_abhs_cbcr", "gcc_abhs_cbcr",
+ "gcc_axim_cbcr", "lcc_ahbfabric_cbc", "tcsr_lcc_cbc",
+ "lcc_abhs_cbc", "lcc_tcm_slave_cbc", "lcc_abhm_cbc",
+ "lcc_axim_cbc", "lcc_bcr_sleep"
qcom,q6v5-pil:
qcom,msm8916-mss-pil:
qcom,msm8974-mss-pil:
Definition: reference to the regulators to be held on behalf of the
booting of the Hexagon core
+For the compatible string below the following supplies are required:
+ "qcom,qcs404-wcss-pil"
+- cx-supply:
+ Usage: required
+ Value type: <phandle>
+ Definition: reference to the regulators to be held on behalf of the
+ booting of the Hexagon core
+
For the compatible string below the following supplies are required:
"qcom,msm8996-mss-pil"
- pll-supply:
Definition: should be "wdog", "fatal", optionally followed by "ready",
"handover", "stop-ack"
+- firmware-name:
+ Usage: optional
+ Value type: <string>
+ Definition: must list the relative firmware image path for the
+ WCNSS core. Defaults to "wcnss.mdt".
+
- vddmx-supply: (deprecated for qcom,pronto-v1/2-pil)
- vddcx-supply: (deprecated for qcom,pronto-v1/2-pil)
- vddpx-supply:
Unidirectional channel:
- from local to remote, where ACK from the remote means that it is
ready for shutdown
+ - description: |
+ A channel (d) used by the local proc to notify the remote proc that it
+ has to stop interprocessor communnication.
+ Unidirectional channel:
+ - from local to remote, where ACK from the remote means that communnication
+ as been stopped on the remote side.
minItems: 1
- maxItems: 3
+ maxItems: 4
mbox-names:
items:
- const: vq0
- const: vq1
- const: shutdown
+ - const: detach
minItems: 1
- maxItems: 3
+ maxItems: 4
memory-region:
description:
--- /dev/null
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/riscv/microchip.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip PolarFire SoC-based boards device tree bindings
+
+maintainers:
+ - Cyril Jean <Cyril.Jean@microchip.com>
+ - Lewis Hanly <lewis.hanly@microchip.com>
+
+description:
+ Microchip PolarFire SoC-based boards
+
+properties:
+ $nodename:
+ const: '/'
+ compatible:
+ items:
+ - enum:
+ - microchip,mpfs-icicle-kit
+ - const: microchip,mpfs
+
+additionalProperties: true
+
+...
- mediatek,mt7622-btif
- mediatek,mt7623-btif
- const: mediatek,mtk-btif
- - items:
- - enum:
- - mediatek,mt7622-btif
- - mediatek,mt7623-btif
- - const: mediatek,mtk-btif
- items:
- const: mrvl,mmp-uart
- const: intel,xscale-uart
+++ /dev/null
-* Broadcom Northstar Thermal
-
-This binding describes thermal sensor that is part of Northstar's DMU (Device
-Management Unit).
-
-Required properties:
-- compatible : Must be "brcm,ns-thermal"
-- reg : iomem address range of PVTMON registers
-- #thermal-sensor-cells : Should be <0>
-
-Example:
-
-thermal: thermal@1800c2c0 {
- compatible = "brcm,ns-thermal";
- reg = <0x1800c2c0 0x10>;
- #thermal-sensor-cells = <0>;
-};
-
-thermal-zones {
- cpu_thermal: cpu-thermal {
- polling-delay-passive = <0>;
- polling-delay = <1000>;
- coefficients = <(-556) 418000>;
- thermal-sensors = <&thermal>;
-
- trips {
- cpu-crit {
- temperature = <125000>;
- hysteresis = <0>;
- type = "critical";
- };
- };
-
- cooling-maps {
- };
- };
-};
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/thermal/brcm,ns-thermal.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom Northstar Thermal
+
+maintainers:
+ - Rafał Miłecki <rafal@milecki.pl>
+
+description:
+ Thermal sensor that is part of Northstar's DMU (Device Management Unit).
+
+allOf:
+ - $ref: thermal-sensor.yaml#
+
+properties:
+ compatible:
+ const: brcm,ns-thermal
+
+ reg:
+ description: PVTMON registers range
+ maxItems: 1
+
+ "#thermal-sensor-cells":
+ const: 0
+
+unevaluatedProperties: false
+
+required:
+ - reg
+
+examples:
+ - |
+ thermal: thermal@1800c2c0 {
+ compatible = "brcm,ns-thermal";
+ reg = <0x1800c2c0 0x10>;
+ #thermal-sensor-cells = <0>;
+ };
+
+ thermal-zones {
+ cpu-thermal {
+ polling-delay-passive = <0>;
+ polling-delay = <1000>;
+ coefficients = <(-556) 418000>;
+ thermal-sensors = <&thermal>;
+
+ trips {
+ cpu-crit {
+ temperature = <125000>;
+ hysteresis = <0>;
+ type = "critical";
+ };
+ };
+
+ cooling-maps {
+ };
+ };
+ };
properties:
compatible:
oneOf:
+ - description: msm9860 TSENS based
+ items:
+ - enum:
+ - qcom,ipq8064-tsens
+
- description: v0.1 of TSENS
items:
- enum:
+ - qcom,mdm9607-tsens
- qcom,msm8916-tsens
- qcom,msm8939-tsens
- qcom,msm8974-tsens
- qcom,sdm845-tsens
- qcom,sm8150-tsens
- qcom,sm8250-tsens
+ - qcom,sm8350-tsens
- const: qcom,tsens-v2
reg:
maxItems: 2
items:
- const: calib
- - const: calib_sel
+ - enum:
+ - calib_backup
+ - calib_sel
"#qcom,sensors":
description:
Number of cells required to uniquely identify the thermal sensors. Since
we have multiple sensors this is set to 1
+required:
+ - compatible
+ - interrupts
+ - interrupt-names
+ - "#thermal-sensor-cells"
+ - "#qcom,sensors"
+
allOf:
- if:
properties:
compatible:
contains:
enum:
+ - qcom,ipq8064-tsens
+ - qcom,mdm9607-tsens
- qcom,msm8916-tsens
- qcom,msm8974-tsens
- qcom,msm8976-tsens
interrupt-names:
minItems: 2
-required:
- - compatible
- - reg
- - "#qcom,sensors"
- - interrupts
- - interrupt-names
- - "#thermal-sensor-cells"
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - qcom,tsens-v0_1
+ - qcom,tsens-v1
+ - qcom,tsens-v2
+
+ then:
+ required:
+ - reg
additionalProperties: false
examples:
+ - |
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ // Example msm9860 based SoC (ipq8064):
+ gcc: clock-controller {
+
+ /* ... */
+
+ tsens: thermal-sensor {
+ compatible = "qcom,ipq8064-tsens";
+
+ nvmem-cells = <&tsens_calib>, <&tsens_calib_backup>;
+ nvmem-cell-names = "calib", "calib_backup";
+ interrupts = <GIC_SPI 178 IRQ_TYPE_LEVEL_HIGH>;
+ interrupt-names = "uplow";
+
+ #qcom,sensors = <11>;
+ #thermal-sensor-cells = <1>;
+ };
+ };
+
- |
#include <dt-bindings/interrupt-controller/arm-gic.h>
// Example 1 (legacy: for pre v1 IP):
- renesas,r8a77980-thermal # R-Car V3H
- renesas,r8a779a0-thermal # R-Car V3U
- reg:
- minItems: 2
- maxItems: 4
- items:
- - description: TSC1 registers
- - description: TSC2 registers
- - description: TSC3 registers
- - description: TSC4 registers
+ reg: true
interrupts:
items:
enum:
- renesas,r8a779a0-thermal
then:
+ properties:
+ reg:
+ minItems: 2
+ maxItems: 3
+ items:
+ - description: TSC1 registers
+ - description: TSC2 registers
+ - description: TSC3 registers
required:
- interrupts
+else:
+ properties:
+ reg:
+ items:
+ - description: TSC0 registers
+ - description: TSC1 registers
+ - description: TSC2 registers
+ - description: TSC3 registers
+ - description: TSC4 registers
additionalProperties: false
};
};
};
+ - |
+ #include <dt-bindings/clock/r8a779a0-cpg-mssr.h>
+ #include <dt-bindings/interrupt-controller/arm-gic.h>
+ #include <dt-bindings/power/r8a779a0-sysc.h>
+
+ tsc_r8a779a0: thermal@e6190000 {
+ compatible = "renesas,r8a779a0-thermal";
+ reg = <0xe6190000 0x200>,
+ <0xe6198000 0x200>,
+ <0xe61a0000 0x200>,
+ <0xe61a8000 0x200>,
+ <0xe61b0000 0x200>;
+ clocks = <&cpg CPG_MOD 919>;
+ power-domains = <&sysc R8A779A0_PD_ALWAYS_ON>;
+ resets = <&cpg 919>;
+ #thermal-sensor-cells = <1>;
+ };
containing several internal sensors.
enum: [0, 1]
+required:
+ - "#thermal-sensor-cells"
+
additionalProperties: true
examples:
description: Shenzhen Hugsun Technology Co. Ltd.
"^hwacom,.*":
description: HwaCom Systems Inc.
+ "^hycon,.*":
+ description: Hycon Technology Corp.
"^hydis,.*":
description: Hydis Technologies
"^hyundai,.*":
Drivers that can't work without standard GPIO calls should have Kconfig entries
that depend on GPIOLIB or select GPIOLIB. The functions that allow a driver to
-obtain and use GPIOs are available by including the following file:
+obtain and use GPIOs are available by including the following file::
#include <linux/gpio/consumer.h>
way to pass the charging parameters from hardware descriptions such as the
device tree.
+- gpio-mux: drivers/mux/gpio.c is used for controlling a multiplexer using
+ n GPIO lines such that you can mux in 2^n different devices by activating
+ different GPIO lines. Often the GPIOs are on a SoC and the devices are
+ some SoC-external entities, such as different components on a PCB that
+ can be selectively enabled.
+
Apart from this there are special GPIO drivers in subsystems like MMC/SD to
read card detect and write protect GPIO lines, and in the TTY serial subsystem
to emulate MCTRL (modem control) signals CTS/RTS by using two GPIO lines. The
pwm_{enable,disable}() calls in the same function, this probably means you
should switch to pwm_apply_state().
-The PWM user API also allows one to query the PWM state with pwm_get_state().
+The PWM user API also allows one to query the PWM state that was passed to the
+last invocation of pwm_apply_state() using pwm_get_state(). Note this is
+different to what the driver has actually implemented if the request cannot be
+satisfied exactly with the hardware in use. There is currently no way for
+consumers to get the actually implemented settings.
In addition to the PWM state, the PWM API also exposes PWM arguments, which
are the reference PWM config one should use on this PWM.
{thermal_zone, cooling_device, trip_point} combination. Returns NULL
if such an instance does not exist.
-4.3. thermal_notify_framework
------------------------------
-
-This function handles the trip events from sensor drivers. It starts
-throttling the cooling devices according to the policy configured.
-For CRITICAL and HOT trip points, this notifies the respective drivers,
-and does actual throttling for other trip points i.e ACTIVE and PASSIVE.
-The throttling policy is based on the configured platform data; if no
-platform data is provided, this uses the step_wise throttling policy.
-
-4.4. thermal_cdev_update
+4.3. thermal_cdev_update
------------------------
This function serves as an arbitrator to set the state of a cooling
VFIO - "Virtual Function I/O" [1]_
==================================
-Many modern system now provide DMA and interrupt remapping facilities
+Many modern systems now provide DMA and interrupt remapping facilities
to help ensure I/O devices behave within the boundaries they've been
allotted. This includes x86 hardware with AMD-Vi and Intel VT-d,
POWER systems with Partitionable Endpoints (PEs) and embedded PowerPC
on synchronous garbage collection running in background.
Default value for this option is on. So garbage
collection is on by default.
+gc_merge When background_gc is on, this option can be enabled to
+ let background GC thread to handle foreground GC requests,
+ it can eliminate the sluggish issue caused by slow foreground
+ GC operation when GC is triggered from a process with limited
+ I/O and CPU resources.
+nogc_merge Disable GC merge feature.
disable_roll_forward Disable the roll-forward recovery routine
norecovery Disable the roll-forward recovery routine, mounted read-
only (i.e., -o ro,disable_roll_forward)
* chattr +c file
* chattr +c dir; touch dir/file
* mount w/ -o compress_extension=ext; touch file.ext
+ * mount w/ -o compress_extension=*; touch any_file
+
+- At this point, compression feature doesn't expose compressed space to user
+ directly in order to guarantee potential data updates later to the space.
+ Instead, the main goal is to reduce data writes to flash disk as much as
+ possible, resulting in extending disk life time as well as relaxing IO
+ congestion. Alternatively, we've added ioctl interface to reclaim compressed
+ space and show it to user after putting the immutable bit.
Compress metadata layout::
}
}
+Note, that historically ACPI has no means of the GPIO polarity and thus
+the SPISerialBus() resource defines it on the per-chip basis. In order
+to avoid a chain of negations, the GPIO polarity is considered being
+Active High. Even for the cases when _DSD() is involved (see the example
+above) the GPIO CS polarity must be defined Active High to avoid ambiguity.
+
Other supported properties
==========================
},
};
- static const struct property_entry rotary_encoder_properties[] __initconst = {
+ static const struct property_entry rotary_encoder_properties[] = {
PROPERTY_ENTRY_U32("rotary-encoder,steps-per-period", 24),
PROPERTY_ENTRY_U32("linux,axis", ABS_X),
PROPERTY_ENTRY_U32("rotary-encoder,relative_axis", 0),
{ },
};
+ static const struct software_node rotary_encoder_node = {
+ .properties = rotary_encoder_properties,
+ };
+
static struct platform_device rotary_encoder_device = {
.name = "rotary-encoder",
.id = 0,
...
gpiod_add_lookup_table(&rotary_encoder_gpios);
- device_add_properties(&rotary_encoder_device, rotary_encoder_properties);
+ device_add_software_node(&rotary_encoder_device.dev, &rotary_encoder_node);
platform_device_register(&rotary_encoder_device);
...
#define JS_EVENT_INIT 0x80 /* initial state of device */
As mentioned above, the driver will issue synthetic JS_EVENT_INIT ORed
-events on open. That is, if it's issuing a INIT BUTTON event, the
+events on open. That is, if it's issuing an INIT BUTTON event, the
current type value will be::
int type = JS_EVENT_BUTTON | JS_EVENT_INIT; /* 0x81 */
=============== =======
Hats vary from one joystick type to another. Some can be moved in 8
-directions, some only in 4, The driver, however, always reports a hat as two
-independent axis, even if the hardware doesn't allow independent movement.
+directions, some only in 4. The driver, however, always reports a hat as two
+independent axes, even if the hardware doesn't allow independent movement.
js_event.value
missing events since the queue is finite, and older events will get
overwritten.
-The other reason is that you want to know all what happened, and not
+The other reason is that you want to know all that happened, and not
delay the processing till later.
-Why can get the queue full? Because you don't empty the queue as
+Why can the queue get full? Because you don't empty the queue as
mentioned, or because too much time elapses from one read to another
and too many events to store in the queue get generated. Note that
high system load may contribute to space those reads even more.
warning in following releases of the driver.
Both JSIOCSCORR and JSIOCGCORR expect &js_corr to be able to hold
-information for all axis. That is, struct js_corr corr[MAX_AXIS];
+information for all axes. That is, struct js_corr corr[MAX_AXIS];
struct js_corr is defined as::
second_button_state = js.buttons & 2;
The axis values do not have a defined range in the original 0.x driver,
-except for that the values are non-negative. The 1.2.8+ drivers use a
+except that the values are non-negative. The 1.2.8+ drivers use a
fixed range for reporting the values, 1 being the minimum, 128 the
center, and 255 maximum value.
This way, after the next reboot your joystick will remain calibrated. You
can also add the ``jscal -p`` line to your shutdown script.
-HW specific driver information
-==============================
+Hardware-specific driver information
+====================================
In this section each of the separate hardware specific drivers is described.
Analog joysticks
----------------
-The analog.c uses the standard analog inputs of the gameport, and thus
+The analog.c driver uses the standard analog inputs of the gameport, and thus
supports all standard joysticks and gamepads. It uses a very advanced
routine for this, allowing for data precision that can't be found on any
other system.
* Logitech WingMan Extreme Digital 3D
ADI devices are autodetected, and the driver supports up to two (any
-combination of) devices on a single gameport, using an Y-cable or chained
+combination of) devices on a single gameport, using a Y-cable or chained
together.
Logitech WingMan Joystick, Logitech WingMan Attack, Logitech WingMan
* Gravis Xterminator DualControl
All these devices are autodetected, and you can even use any combination
-of up to two of these pads either chained together or using an Y-cable on a
+of up to two of these pads either chained together or using a Y-cable on a
single gameport.
GrIP MultiPort isn't supported yet. Gravis Stinger is a serial device and is
driver as well to handle the attached joysticks.
The trackball should work with USB mousedev module as a normal mouse. See
-the USB documentation for how to setup an USB mouse.
+the USB documentation for how to setup a USB mouse.
ThrustMaster DirectConnect (BSP)
--------------------------------
TMDC devices are autodetected, and thus no parameters to the module
are needed. Up to two TMDC devices can be connected to one gameport, using
-an Y-cable.
+a Y-cable.
Creative Labs Blaster
---------------------
* Creative Blaster GamePad Cobra
-Up to two of these can be used on a single gameport, using an Y-cable.
+Up to two of these can be used on a single gameport, using a Y-cable.
Genius Digital joysticks
------------------------
Trident 4DWave / Aureal Vortex
------------------------------
-Soundcards with a Trident 4DWave DX/NX or Aureal Vortex/Vortex2 chipsets
+Soundcards with a Trident 4DWave DX/NX or Aureal Vortex/Vortex2 chipset
provide an "Enhanced Game Port" mode where the soundcard handles polling the
joystick. This mode is supported by the pcigame.c module. Once loaded the
analog driver can use the enhanced features of these gameports..
* SpaceTec SpaceBall 4000 FLX
In addition to having the spaceorb/spaceball and serport modules in the
-kernel, you also need to attach a serial port to it. to do that, run the
+kernel, you also need to attach a serial port to it. To do that, run the
inputattach program::
inputattach --spaceorb /dev/tts/x &
where /dev/tts/x is the serial port which the device is connected to. After
doing this, the device will be reported and will start working.
-There is one caveat with the SpaceOrb. The button #6, the on the bottom
+There is one caveat with the SpaceOrb. The button #6, the one on the bottom
side of the orb, although reported as an ordinary button, causes internal
recentering of the spaceorb, moving the zero point to the position in which
the ball is at the moment of pressing the button. So, think first before
* Magellan 3D
* Space Mouse
-models, the additional buttons on the 'Plus' versions are not supported yet.
+models; the additional buttons on the 'Plus' versions are not supported yet.
To use it, you need to attach the serial port to the driver using the::
:A: The device files don't exist. Create them (see section 2.2).
:Q: Is it possible to connect my old Atari/Commodore/Amiga/console joystick
- or pad that uses a 9-pin D-type cannon connector to the serial port of my
+ or pad that uses a 9-pin D-type Cannon connector to the serial port of my
PC?
:A: Yes, it is possible, but it'll burn your serial port or the pad. It
won't work, of course.
PPP 2.4.0 pppd --version
nfs-utils 1.0.5 showmount --version
procps 3.2.0 ps --version
-oprofile 0.9 oprofiled --version
udev 081 udevd --version
grub 0.93 grub --version || grub-install --version
mcelog 0.6 mcelog --version
:maxdepth: 1
boot-image-header
+ vm-layout
pmu
patch-acceptance
--- /dev/null
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================
+Virtual Memory Layout on RISC-V Linux
+=====================================
+
+:Author: Alexandre Ghiti <alex@ghiti.fr>
+:Date: 12 February 2021
+
+This document describes the virtual memory layout used by the RISC-V Linux
+Kernel.
+
+RISC-V Linux Kernel 32bit
+=========================
+
+RISC-V Linux Kernel SV32
+------------------------
+
+TODO
+
+RISC-V Linux Kernel 64bit
+=========================
+
+The RISC-V privileged architecture document states that the 64bit addresses
+"must have bits 63–48 all equal to bit 47, or else a page-fault exception will
+occur.": that splits the virtual address space into 2 halves separated by a very
+big hole, the lower half is where the userspace resides, the upper half is where
+the RISC-V Linux Kernel resides.
+
+RISC-V Linux Kernel SV39
+------------------------
+
+::
+
+ ========================================================================================================================
+ Start addr | Offset | End addr | Size | VM area description
+ ========================================================================================================================
+ | | | |
+ 0000000000000000 | 0 | 0000003fffffffff | 256 GB | user-space virtual memory, different per mm
+ __________________|____________|__________________|_________|___________________________________________________________
+ | | | |
+ 0000004000000000 | +256 GB | ffffffbfffffffff | ~16M TB | ... huge, almost 64 bits wide hole of non-canonical
+ | | | | virtual memory addresses up to the -256 GB
+ | | | | starting offset of kernel mappings.
+ __________________|____________|__________________|_________|___________________________________________________________
+ |
+ | Kernel-space virtual memory, shared between all processes:
+ ____________________________________________________________|___________________________________________________________
+ | | | |
+ ffffffc000000000 | -256 GB | ffffffc7ffffffff | 32 GB | kasan
+ ffffffcefee00000 | -196 GB | ffffffcefeffffff | 2 MB | fixmap
+ ffffffceff000000 | -196 GB | ffffffceffffffff | 16 MB | PCI io
+ ffffffcf00000000 | -196 GB | ffffffcfffffffff | 4 GB | vmemmap
+ ffffffd000000000 | -192 GB | ffffffdfffffffff | 64 GB | vmalloc/ioremap space
+ ffffffe000000000 | -128 GB | ffffffff7fffffff | 124 GB | direct mapping of all physical memory
+ __________________|____________|__________________|_________|____________________________________________________________
+ |
+ |
+ ____________________________________________________________|____________________________________________________________
+ | | | |
+ ffffffff00000000 | -4 GB | ffffffff7fffffff | 2 GB | modules
+ ffffffff80000000 | -2 GB | ffffffffffffffff | 2 GB | kernel, BPF
+ __________________|____________|__________________|_________|____________________________________________________________
PPP 2.4.0 pppd --version
nfs-utils 1.0.5 showmount --version
procps 3.2.0 ps --version
-oprofile 0.9 oprofiled --version
udev 081 udevd --version
grub 0.93 grub --version || grub-install --version
mcelog 0.6 mcelog --version
+.. SPDX-License-Identifier: GPL-2.0
+
.. raw:: latex
\renewcommand\thesection*
\renewcommand\thesubsection*
+.. _linux_doc_zh:
+
中文翻译
========
-这些手册包含有关如何开发内核的整体信息。内核社区非常庞大,一年下来有数千名开发
-人员做出贡献。 与任何大型社区一样,知道如何完成任务将使得更改合并的过程变得更
-加容易。
-翻译计划:
-内核中文文档欢迎任何翻译投稿,特别是关于内核用户和管理员指南部分。
+.. note::
+
+ **翻译计划:**
+ 内核中文文档欢迎任何翻译投稿,特别是关于内核用户和管理员指南部分。
+
+许可证文档
+----------
+
+下面的文档介绍了Linux内核源代码的许可证(GPLv2)、如何在源代码树中正确标记
+单个文件的许可证、以及指向完整许可证文本的链接。
+
+* Documentation/translations/zh_CN/process/license-rules.rst
+
+用户文档
+--------
+
+下面的手册是为内核用户编写的——即那些试图让它在给定系统上以最佳方式工作的
+用户。
.. toctree::
:maxdepth: 2
admin-guide/index
+
+TODOList:
+
+* kbuild/index
+
+固件相关文档
+------------
+
+下列文档描述了内核需要的平台固件相关信息。
+
+TODOList:
+
+* firmware-guide/index
+* devicetree/index
+
+应用程序开发人员文档
+--------------------
+
+用户空间API手册涵盖了描述应用程序开发人员可见内核接口方面的文档。
+
+TODOlist:
+
+* userspace-api/index
+
+内核开发简介
+------------
+
+这些手册包含有关如何开发内核的整体信息。内核社区非常庞大,一年下来有数千名
+开发人员做出贡献。与任何大型社区一样,知道如何完成任务将使得更改合并的过程
+变得更加容易。
+
+.. toctree::
+ :maxdepth: 2
+
process/index
dev-tools/index
doc-guide/index
kernel-hacking/index
- filesystems/index
- arm64/index
- sound/index
+
+TODOList:
+
+* trace/index
+* maintainer/index
+* fault-injection/index
+* livepatch/index
+* rust/index
+
+内核API文档
+-----------
+
+以下手册从内核开发人员的角度详细介绍了特定的内核子系统是如何工作的。这里的
+大部分信息都是直接从内核源代码获取的,并根据需要添加补充材料(或者至少是在
+我们设法添加的时候——可能不是所有的都是有需要的)。
+
+.. toctree::
+ :maxdepth: 2
+
+ core-api/index
cpu-freq/index
- mips/index
iio/index
+ sound/index
+ filesystems/index
+
+TODOList:
+
+* driver-api/index
+* locking/index
+* accounting/index
+* block/index
+* cdrom/index
+* ide/index
+* fb/index
+* fpga/index
+* hid/index
+* i2c/index
+* isdn/index
+* infiniband/index
+* leds/index
+* netlabel/index
+* networking/index
+* pcmcia/index
+* power/index
+* target/index
+* timers/index
+* spi/index
+* w1/index
+* watchdog/index
+* virt/index
+* input/index
+* hwmon/index
+* gpu/index
+* security/index
+* crypto/index
+* vm/index
+* bpf/index
+* usb/index
+* PCI/index
+* scsi/index
+* misc-devices/index
+* scheduler/index
+* mhi/index
+
+体系结构无关文档
+----------------
+
+TODOList:
+
+* asm-annotations
+
+特定体系结构文档
+----------------
+
+.. toctree::
+ :maxdepth: 2
+
+ mips/index
+ arm64/index
riscv/index
- core-api/index
openrisc/index
+TODOList:
+
+* arm/index
+* ia64/index
+* m68k/index
+* nios2/index
+* parisc/index
+* powerpc/index
+* s390/index
+* sh/index
+* sparc/index
+* x86/index
+* xtensa/index
+
+其他文档
+--------
+
+有几份未排序的文档似乎不适合放在文档的其他部分,或者可能需要进行一些调整和/或
+转换为reStructureText格式,也有可能太旧。
+
+TODOList:
+
+* staging/index
+* watch_queue
+
目录和表格
----------
Overview
========
-Original x86-64 was limited by 4-level paing to 256 TiB of virtual address
+Original x86-64 was limited by 4-level paging to 256 TiB of virtual address
space and 64 TiB of physical address space. We are already bumping into
-this limit: some vendors offers servers with 64 TiB of memory today.
+this limit: some vendors offer servers with 64 TiB of memory today.
To overcome the limitation upcoming hardware will introduce support for
5-level paging. It is a straight-forward extension of the current page
AFS FILESYSTEM
M: David Howells <dhowells@redhat.com>
+M: Marc Dionne <marc.dionne@auristor.com>
L: linux-afs@lists.infradead.org
S: Supported
W: https://www.infradead.org/~dhowells/kafs/
F: fs/bfs/
F: include/uapi/linux/bfs_fs.h
+BITMAP API
+M: Yury Norov <yury.norov@gmail.com>
+R: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+R: Rasmus Villemoes <linux@rasmusvillemoes.dk>
+S: Maintained
+F: include/asm-generic/bitops/find.h
+F: include/linux/bitmap.h
+F: lib/bitmap.c
+F: lib/find_bit.c
+F: lib/find_bit_benchmark.c
+F: lib/test_bitmap.c
+F: tools/include/asm-generic/bitops/find.h
+F: tools/include/linux/bitmap.h
+F: tools/lib/bitmap.c
+F: tools/lib/find_bit.c
+
BLINKM RGB LED DRIVER
M: Jan-Simon Moeller <jansimon.moeller@gmx.de>
S: Maintained
F: mm/hwpoison-inject.c
F: mm/memory-failure.c
+HYCON HY46XX TOUCHSCREEN SUPPORT
+M: Giulio Benetti <giulio.benetti@benettiengineering.com>
+L: linux-input@vger.kernel.org
+S: Maintained
+F: Documentation/devicetree/bindings/input/touchscreen/hycon,hy46xx.yaml
+F: drivers/input/touchscreen/hycon-hy46xx.c
+
HYGON PROCESSOR SUPPORT
M: Pu Wen <puwen@hygon.cn>
L: linux-kernel@vger.kernel.org
F: fs/io_uring.c
F: include/linux/io_uring.h
F: include/uapi/linux/io_uring.h
+F: tools/io_uring/
IPMI SUBSYSTEM
M: Corey Minyard <minyard@acm.org>
F: Documentation/devicetree/bindings/leds/backlight/kinetic,ktd253.yaml
F: drivers/video/backlight/ktd253-backlight.c
+KTEST
+M: Steven Rostedt <rostedt@goodmis.org>
+M: John Hawley <warthog9@eaglescrag.net>
+S: Maintained
+F: tools/testing/ktest
+
L3MDEV
M: David Ahern <dsahern@kernel.org>
L: netdev@vger.kernel.org
F: Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt
F: drivers/pci/controller/dwc/*imx6*
+PCI DRIVER FOR FU740
+M: Paul Walmsley <paul.walmsley@sifive.com>
+M: Greentime Hu <greentime.hu@sifive.com>
+L: linux-pci@vger.kernel.org
+S: Maintained
+F: Documentation/devicetree/bindings/pci/sifive,fu740-pcie.yaml
+F: drivers/pci/controller/dwc/pcie-fu740.c
+
PCI DRIVER FOR INTEL VOLUME MANAGEMENT DEVICE (VMD)
M: Jonathan Derrick <jonathan.derrick@intel.com>
L: linux-pci@vger.kernel.org
F: drivers/pci/controller/cadence/pci-j721e.c
F: drivers/pci/controller/dwc/pci-dra7xx.c
-PCI DRIVER FOR TI KEYSTONE
-M: Murali Karicheri <m-karicheri2@ti.com>
-L: linux-pci@vger.kernel.org
-L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
-S: Maintained
-F: drivers/pci/controller/dwc/pci-keystone.c
-
PCI DRIVER FOR V3 SEMICONDUCTOR V360EPC
M: Linus Walleij <linus.walleij@linaro.org>
L: linux-pci@vger.kernel.org
M: Zhou Wang <wangzhou1@hisilicon.com>
L: linux-pci@vger.kernel.org
S: Maintained
-F: Documentation/devicetree/bindings/pci/hisilicon-pcie.txt
F: drivers/pci/controller/dwc/pcie-hisi.c
PCIE DRIVER FOR HISILICON KIRIN
PCIE DRIVER FOR MEDIATEK
M: Ryder Lee <ryder.lee@mediatek.com>
+M: Jianjun Wang <jianjun.wang@mediatek.com>
L: linux-pci@vger.kernel.org
L: linux-mediatek@lists.infradead.org
S: Supported
QUALCOMM TSENS THERMAL DRIVER
M: Amit Kucheria <amitk@kernel.org>
+M: Thara Gopinath <thara.gopinath@linaro.org>
L: linux-pm@vger.kernel.org
L: linux-arm-msm@vger.kernel.org
S: Maintained
RXRPC SOCKETS (AF_RXRPC)
M: David Howells <dhowells@redhat.com>
+M: Marc Dionne <marc.dionne@auristor.com>
L: linux-afs@lists.infradead.org
S: Supported
W: https://www.infradead.org/~dhowells/kafs/
M: Amit Daniel Kachhap <amit.kachhap@gmail.com>
M: Daniel Lezcano <daniel.lezcano@linaro.org>
M: Viresh Kumar <viresh.kumar@linaro.org>
-M: Javi Merino <javi.merino@kernel.org>
+R: Lukasz Luba <lukasz.luba@arm.com>
L: linux-pm@vger.kernel.org
S: Supported
F: Documentation/driver-api/thermal/cpu-cooling-api.rst
F: sound/soc/codecs/isabelle*
F: sound/soc/codecs/lm49453*
-TI NETCP ETHERNET DRIVER
-M: Wingman Kwok <w-kwok2@ti.com>
-M: Murali Karicheri <m-karicheri2@ti.com>
-L: netdev@vger.kernel.org
-S: Maintained
-F: drivers/net/ethernet/ti/netcp*
-
TI PCM3060 ASoC CODEC DRIVER
M: Kirill Marinushkin <kmarinushkin@birdec.com>
L: alsa-devel@alsa-project.org (moderated for non-subscribers)
SRCARCH := sparc
endif
-# Additional ARCH settings for sh
-ifeq ($(ARCH),sh64)
- SRCARCH := sh
-endif
-
export cross_compiling :=
ifneq ($(SRCARCH),$(SUBARCH))
cross_compiling := 1
KBUILD_CFLAGS += -mno-global-merge
else
-# These warnings generated too much noise in a regular build.
-# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
-KBUILD_CFLAGS += -Wno-unused-but-set-variable
-
# Warn about unmarked fall-throughs in switch statement.
# Disabled for clang while comment to attribute conversion happens and
# https://github.com/ClangBuiltLinux/linux/issues/636 is discussed.
KBUILD_CFLAGS += $(call cc-option,-Wimplicit-fallthrough,)
endif
+# These warnings generated too much noise in a regular build.
+# Use make W=1 to enable them (see scripts/Makefile.extrawarn)
+KBUILD_CFLAGS += $(call cc-disable-warning, unused-but-set-variable)
+
KBUILD_CFLAGS += $(call cc-disable-warning, unused-const-variable)
ifdef CONFIG_FRAME_POINTER
KBUILD_CFLAGS += -fno-omit-frame-pointer -fno-optimize-sibling-calls
archprepare: outputmakefile archheaders archscripts scripts include/config/kernel.release \
asm-generic $(version_h) $(autoksyms_h) include/generated/utsrelease.h \
- include/generated/autoconf.h
+ include/generated/autoconf.h remove-stale-files
prepare0: archprepare
$(Q)$(MAKE) $(build)=scripts/mod
# All the preparing..
prepare: prepare0 prepare-objtool prepare-resolve_btfids
+PHONY += remove-stale-files
+remove-stale-files:
+ $(Q)$(srctree)/scripts/remove-stale-files
+
# Support for using generic headers in asm-generic
asm-generic := -f $(srctree)/scripts/Makefile.asm-generic obj
vmlinux-gdb.py \
*.spec
-# Directories & files removed with 'make distclean'
-DISTCLEAN_FILES += tags TAGS cscope* GPATH GTAGS GRTAGS GSYMS
-
# clean - Delete most, but leave enough to build external modules
#
clean: rm-files := $(CLEAN_FILES)
# distclean
#
-distclean: rm-files := $(wildcard $(DISTCLEAN_FILES))
-
PHONY += distclean
distclean: mrproper
- $(call cmd,rmfiles)
- @find $(srctree) $(RCS_FIND_IGNORE) \
+ @find . $(RCS_FIND_IGNORE) \
\( -name '*.orig' -o -name '*.rej' -o -name '*~' \
-o -name '*.bak' -o -name '#*#' -o -name '*%' \
- -o -name 'core' \) \
+ -o -name 'core' -o -name tags -o -name TAGS -o -name 'cscope*' \
+ -o -name GPATH -o -name GRTAGS -o -name GSYMS -o -name GTAGS \) \
-type f -print | xargs rm -f
# When building external modules the kernel used as basis is considered
# read-only, and no consistency checks are made and the make
# system is not used on the basis kernel. If updates are required
-# in the basis kernel ordinary make commands (without M=...) must
-# be used.
-#
-# The following are the only valid targets when building external
-# modules.
-# make M=dir clean Delete all automatically generated files
-# make M=dir modules Make all modules in specified dir
-# make M=dir Same as 'make M=dir modules'
-# make M=dir modules_install
-# Install the modules built in the module directory
-# Assumes install directory is already created
+# in the basis kernel ordinary make commands (without M=...) must be used.
# We are always building only modules.
KBUILD_BUILTIN :=
# SPDX-License-Identifier: GPL-2.0-only
-i386
-x86_64
+/i386/
+/x86_64/
*/
#define xlate_dev_mem_ptr(p) __va(p)
-/*
- * Convert a virtual cached pointer to an uncached pointer
- */
-#define xlate_dev_kmem_ptr(p) p
-
#endif /* __KERNEL__ */
#endif /* __ALPHA_IO_H */
static unsigned int base, model;
-unsigned int __init pc873xx_get_base()
+unsigned int __init pc873xx_get_base(void)
{
return base;
}
-char *__init pc873xx_get_model()
+char *__init pc873xx_get_model(void)
{
return pc873xx_names[model];
}
#include <linux/types.h>
#include <linux/string.h>
#include <linux/uaccess.h>
+#include <net/checksum.h>
#define ldq_u(x,y) \
config ARC
def_bool y
select ARC_TIMERS
+ select ARCH_HAS_CACHE_LINE_SIZE
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DMA_PREP_COHERENT
select ARCH_HAS_PTE_SPECIAL
select GENERIC_SMP_IDLE_THREAD
select HAVE_ARCH_KGDB
select HAVE_ARCH_TRACEHOOK
+ select HAVE_ARCH_TRANSPARENT_HUGEPAGE if ARC_MMU_V4
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DEBUG_KMEMLEAK
select HAVE_FUTEX_CMPXCHG if FUTEX
select HAVE_ARCH_JUMP_LABEL if ISA_ARCV2 && !CPU_ENDIAN_BE32
select SET_FS
-config ARCH_HAS_CACHE_LINE_SIZE
- def_bool y
-
config TRACE_IRQFLAGS_SUPPORT
def_bool y
def_bool y
select STACKTRACE
-config HAVE_ARCH_TRANSPARENT_HUGEPAGE
- def_bool y
- depends on ARC_MMU_V4
-
menu "ARC Architecture Configuration"
menu "ARC Platform/SoC/Board"
select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7
select ARCH_SUPPORTS_ATOMIC_RMW
+ select ARCH_SUPPORTS_HUGETLBFS if ARM_LPAE
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF
select ARCH_USE_MEMTEST
select HAVE_ARCH_SECCOMP_FILTER if AEABI && !OABI_COMPAT
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_TRACEHOOK
+ select HAVE_ARCH_TRANSPARENT_HUGEPAGE if ARM_LPAE
select HAVE_ARM_SMCCC if CPU_V7
select HAVE_EBPF_JIT if !CPU_ENDIAN_BE32
select HAVE_CONTEXT_TRACKING
def_bool y
depends on ARM_PMU
-config SYS_SUPPORTS_HUGETLBFS
- def_bool y
- depends on ARM_LPAE
-
-config HAVE_ARCH_TRANSPARENT_HUGEPAGE
- def_bool y
- depends on ARM_LPAE
-
config ARCH_WANT_GENERAL_HUGETLB
def_bool y
$(foreach o, $(libfdt_objs) atags_to_fdt.o fdt_check_mem_start.o, \
$(eval CFLAGS_$(o) := -I $(srctree)/scripts/dtc/libfdt -fno-stack-protector))
-# These were previously generated C files. When you are building the kernel
-# with O=, make sure to remove the stale files in the output tree. Otherwise,
-# the build system wrongly compiles the stale ones.
-ifdef building_out_of_srctree
-$(shell rm -f $(addprefix $(obj)/, fdt_rw.c fdt_ro.c fdt_wip.c fdt.c))
-endif
-
targets := vmlinux vmlinux.lds piggy_data piggy.o \
lib1funcs.o ashldi3.o bswapsdi2.o \
head.o $(OBJS)
# Supply kernel BSS size to the decompressor via a linker symbol.
KBSS_SZ = $(shell echo $$(($$($(NM) $(obj)/../../../../vmlinux | \
- sed -n -e 's/^\([^ ]*\) [AB] __bss_start$$/-0x\1/p' \
- -e 's/^\([^ ]*\) [AB] __bss_stop$$/+0x\1/p') )) )
+ sed -n -e 's/^\([^ ]*\) [ABD] __bss_start$$/-0x\1/p' \
+ -e 's/^\([^ ]*\) [ABD] __bss_stop$$/+0x\1/p') )) )
LDFLAGS_vmlinux = --defsym _kernel_bss_size=$(KBSS_SZ)
# Supply ZRELADDR to the decompressor via a linker symbol.
ifneq ($(CONFIG_AUTO_ZRELADDR),y)
reg = <0x20050000 0x10>;
#pwm-cells = <3>;
clocks = <&cru PCLK_PWM>;
- clock-names = "pwm";
pinctrl-names = "default";
pinctrl-0 = <&pwm0_pin>;
status = "disabled";
reg = <0x20050010 0x10>;
#pwm-cells = <3>;
clocks = <&cru PCLK_PWM>;
- clock-names = "pwm";
pinctrl-names = "default";
pinctrl-0 = <&pwm1_pin>;
status = "disabled";
reg = <0x20050020 0x10>;
#pwm-cells = <3>;
clocks = <&cru PCLK_PWM>;
- clock-names = "pwm";
pinctrl-names = "default";
pinctrl-0 = <&pwm2_pin>;
status = "disabled";
reg = <0x20050030 0x10>;
#pwm-cells = <2>;
clocks = <&cru PCLK_PWM>;
- clock-names = "pwm";
pinctrl-names = "default";
pinctrl-0 = <&pwm3_pin>;
status = "disabled";
pinctrl-names = "default";
pinctrl-0 = <&pwm0_pin>;
clocks = <&cru PCLK_RKPWM>;
- clock-names = "pwm";
status = "disabled";
};
pinctrl-names = "default";
pinctrl-0 = <&pwm1_pin>;
clocks = <&cru PCLK_RKPWM>;
- clock-names = "pwm";
status = "disabled";
};
pinctrl-names = "default";
pinctrl-0 = <&pwm2_pin>;
clocks = <&cru PCLK_RKPWM>;
- clock-names = "pwm";
status = "disabled";
};
pinctrl-names = "default";
pinctrl-0 = <&pwm3_pin>;
clocks = <&cru PCLK_RKPWM>;
- clock-names = "pwm";
status = "disabled";
};
# CONFIG_MOUSE_PS2 is not set
# CONFIG_SERIO is not set
CONFIG_LEGACY_PTY_COUNT=16
-# CONFIG_DEVKMEM is not set
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_8250_RUNTIME_UARTS=2
CONFIG_MODULES=y
CONFIG_ARCH_FOOTBRIDGE=y
CONFIG_ARCH_CATS=y
-CONFIG_ARCH_PERSONAL_SERVER=y
CONFIG_ARCH_EBSA285_HOST=y
CONFIG_ARCH_NETWINDER=y
CONFIG_LEDS=y
CONFIG_INPUT_MISC=y
CONFIG_INPUT_UINPUT=m
# CONFIG_SERIO is not set
-# CONFIG_DEVKMEM is not set
CONFIG_SERIAL_PXA=y
# CONFIG_LEGACY_PTYS is not set
# CONFIG_HW_RANDOM is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
# CONFIG_LEGACY_PTYS is not set
-# CONFIG_DEVKMEM is not set
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_8250_NR_UARTS=1
# CONFIG_VT is not set
# CONFIG_LEGACY_PTYS is not set
CONFIG_SERIAL_NONSTANDARD=y
-# CONFIG_DEVKMEM is not set
CONFIG_SERIAL_MPS2_UART_CONSOLE=y
CONFIG_SERIAL_MPS2_UART=y
# CONFIG_HW_RANDOM is not set
CONFIG_KEYBOARD_GPIO=y
# CONFIG_INPUT_MOUSE is not set
CONFIG_LEGACY_PTY_COUNT=16
-# CONFIG_DEVKMEM is not set
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_8250_RUNTIME_UARTS=2
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
-# CONFIG_DEVKMEM is not set
CONFIG_SERIAL_PXA=y
CONFIG_SERIAL_PXA_CONSOLE=y
# CONFIG_LEGACY_PTYS is not set
struct mm_struct;
void show_pte(const char *lvl, struct mm_struct *mm, unsigned long addr);
extern void __show_regs(struct pt_regs *);
+extern void __show_regs_alloc_free(struct pt_regs *regs);
#endif
*/
#define xlate_dev_mem_ptr(p) __va(p)
-/*
- * Convert a virtual cached pointer to an uncached pointer
- */
-#define xlate_dev_kmem_ptr(p) p
-
#include <asm-generic/io.h>
#ifdef CONFIG_MMU
}
}
-/* Function pointer to optional machine-specific reinitialization */
-extern void (*kexec_reinit)(void);
-
static inline unsigned long phys_to_boot_phys(phys_addr_t phys)
{
return phys_to_idmap(phys);
*/
#define PLAT_PHYS_OFFSET UL(CONFIG_PHYS_OFFSET)
-#ifdef CONFIG_XIP_KERNEL
-/*
- * When referencing data in RAM from the XIP region in a relative manner
- * with the MMU off, we need the relative offset between the two physical
- * addresses. The macro below achieves this, which is:
- * __pa(v_data) - __xip_pa(v_text)
- */
-#define PHYS_RELATIVE(v_data, v_text) \
- (((v_data) - PAGE_OFFSET + PLAT_PHYS_OFFSET) - \
- ((v_text) - XIP_VIRT_ADDR(CONFIG_XIP_PHYS_ADDR) + \
- CONFIG_XIP_PHYS_ADDR))
-#else
-#define PHYS_RELATIVE(v_data, v_text) ((v_data) - (v_text))
-#endif
-
#ifndef __ASSEMBLY__
/*
static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
#endif
-#ifdef CONFIG_STRICT_KERNEL_RWX
-void set_kernel_text_rw(void);
-void set_kernel_text_ro(void);
-#else
-static inline void set_kernel_text_rw(void) { }
-static inline void set_kernel_text_ro(void) { }
-#endif
-
#endif
# SPDX-License-Identifier: GPL-2.0
-generated-y += unistd-common.h
generated-y += unistd-oabi.h
generated-y += unistd-eabi.h
generic-y += kvm_para.h
#include <asm/unistd-oabi.h>
#endif
-#include <asm/unistd-common.h>
#define __NR_sync_file_range2 __NR_arm_sync_file_range
/*
#include <asm/vdso_datapage.h>
#include <asm/hardware/cache-l2x0.h>
#include <linux/kbuild.h>
+#include <linux/arm-smccc.h>
#include "signal.h"
/*
DEFINE(SLEEP_SAVE_SP_PHYS, offsetof(struct sleep_save_sp, save_ptr_stash_phys));
DEFINE(SLEEP_SAVE_SP_VIRT, offsetof(struct sleep_save_sp, save_ptr_stash));
#endif
+ DEFINE(ARM_SMCCC_QUIRK_ID_OFFS, offsetof(struct arm_smccc_quirk, id));
+ DEFINE(ARM_SMCCC_QUIRK_STATE_OFFS, offsetof(struct arm_smccc_quirk, state));
BLANK();
DEFINE(DMA_BIDIRECTIONAL, DMA_BIDIRECTIONAL);
DEFINE(DMA_TO_DEVICE, DMA_TO_DEVICE);
.size \sym, . - \sym
.endm
-#define NATIVE(nr, func) syscall nr, func
+#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native)
+#define __SYSCALL(nr, func) syscall nr, func
/*
* This is the syscall table declaration for native ABI syscalls.
* With EABI a couple syscalls are obsolete and defined as sys_ni_syscall.
*/
syscall_table_start sys_call_table
-#define COMPAT(nr, native, compat) syscall nr, native
#ifdef CONFIG_AEABI
#include <calls-eabi.S>
#else
#include <calls-oabi.S>
#endif
-#undef COMPAT
syscall_table_end sys_call_table
/*============================================================================
* using the compatibility syscall entries.
*/
syscall_table_start sys_oabi_call_table
-#define COMPAT(nr, native, compat) syscall nr, compat
+#undef __SYSCALL_WITH_COMPAT
+#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, compat)
#include <calls-oabi.S>
syscall_table_end sys_oabi_call_table
info->trigger = addr;
pr_debug("breakpoint fired: address = 0x%x\n", addr);
perf_bp_event(bp, regs);
- if (!bp->overflow_handler)
+ if (is_default_overflow_handler(bp))
enable_single_step(bp, addr);
goto unlock;
}
pr_info("Loading crashdump kernel...\n");
}
-/*
- * Function pointer to optional machine-specific reinitialization
- */
-void (*kexec_reinit)(void);
-
void machine_kexec(struct kimage *image)
{
unsigned long page_list, reboot_entry_phys;
pr_info("Bye!\n");
- if (kexec_reinit)
- kexec_reinit();
-
soft_restart(reboot_entry_phys);
}
ledtrig_cpu(CPU_LED_IDLE_END);
}
+void __show_regs_alloc_free(struct pt_regs *regs)
+{
+ int i;
+
+ /* check for r0 - r12 only */
+ for (i = 0; i < 13; i++) {
+ pr_alert("Register r%d information:", i);
+ mem_dump_obj((void *)regs->uregs[i]);
+ }
+}
+
void __show_regs(struct pt_regs *regs)
{
unsigned long flags;
* Copyright (c) 2015, Linaro Limited
*/
#include <linux/linkage.h>
+#include <linux/arm-smccc.h>
+#include <asm/asm-offsets.h>
#include <asm/opcodes-sec.h>
#include <asm/opcodes-virt.h>
#include <asm/unwind.h>
UNWIND( .save {r4-r7})
ldm r12, {r4-r7}
\instr
- pop {r4-r7}
+ ldr r4, [sp, #36]
+ cmp r4, #0
+ beq 1f // No quirk structure
+ ldr r5, [r4, #ARM_SMCCC_QUIRK_ID_OFFS]
+ cmp r5, #ARM_SMCCC_QUIRK_QCOM_A6
+ bne 1f // No quirk present
+ str r6, [r4, #ARM_SMCCC_QUIRK_STATE_OFFS]
+1: pop {r4-r7}
ldr r12, [sp, #(4 * 4)]
stm r12, {r0-r3}
bx lr
// SPDX-License-Identifier: GPL-2.0
+#include <linux/ftrace.h>
#include <linux/init.h>
#include <linux/slab.h>
#include <linux/mm_types.h>
if (!idmap_pgd)
return -EINVAL;
+ /*
+ * Function graph tracer state gets incosistent when the kernel
+ * calls functions that never return (aka suspend finishers) hence
+ * disable graph tracing during their execution.
+ */
+ pause_graph_tracing();
+
/*
* Provide a temporary page table with an identity mapping for
* the MMU-enable code, required for resuming. On successful
* back to the correct page tables.
*/
ret = __cpu_suspend(arg, fn, __mpidr);
+
+ unpause_graph_tracing();
+
if (ret == 0) {
cpu_switch_mm(mm->pgd, mm);
local_flush_bp_all();
int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
{
u32 __mpidr = cpu_logical_map(smp_processor_id());
- return __cpu_suspend(arg, fn, __mpidr);
+ int ret;
+
+ pause_graph_tracing();
+ ret = __cpu_suspend(arg, fn, __mpidr);
+ unpause_graph_tracing();
+
+ return ret;
}
#define idmap_pgd NULL
#endif
print_modules();
__show_regs(regs);
+ __show_regs_alloc_free(regs);
pr_emerg("Process %.*s (pid: %d, stack limit = 0x%p)\n",
TASK_COMM_LEN, tsk->comm, task_pid_nr(tsk), end_of_stack(tsk));
Saying N will reduce the size of the Footbridge kernel.
-config ARCH_PERSONAL_SERVER
- bool "Compaq Personal Server"
- select FOOTBRIDGE_HOST
- select ISA
- select ISA_DMA
- select FORCE_PCI
- help
- Say Y here if you intend to run this kernel on the Compaq
- Personal Server.
-
- Saying N will reduce the size of the Footbridge kernel.
-
- The Compaq Personal Server is not available for purchase.
- There are no product plans beyond the current research
- prototypes at this time. Information is available at:
-
- <http://www.crl.hpl.hp.com/projects/personalserver/>
-
- If you have any questions or comments about the Compaq Personal
- Server, send e-mail to <skiff@crl.dec.com>.
-
config ARCH_EBSA285_ADDIN
bool "EBSA285 (addin mode)"
select ARCH_EBSA285
pci-$(CONFIG_ARCH_CATS) += cats-pci.o
pci-$(CONFIG_ARCH_EBSA285_HOST) += ebsa285-pci.o
pci-$(CONFIG_ARCH_NETWINDER) += netwinder-pci.o
-pci-$(CONFIG_ARCH_PERSONAL_SERVER) += personal-pci.o
obj-$(CONFIG_ARCH_CATS) += cats-hw.o isa-timer.o
obj-$(CONFIG_ARCH_EBSA285) += ebsa285.o dc21285-timer.o
obj-$(CONFIG_ARCH_NETWINDER) += netwinder-hw.o isa-timer.o
-obj-$(CONFIG_ARCH_PERSONAL_SERVER) += personal.o dc21285-timer.o
obj-$(CONFIG_PCI) +=$(pci-y)
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * linux/arch/arm/mach-footbridge/personal-pci.c
- *
- * PCI bios-type initialisation for PCI machines
- *
- * Bits taken from various places.
- */
-#include <linux/kernel.h>
-#include <linux/pci.h>
-#include <linux/init.h>
-
-#include <asm/irq.h>
-#include <asm/mach/pci.h>
-#include <asm/mach-types.h>
-
-static int irqmap_personal_server[] = {
- IRQ_IN0, IRQ_IN1, IRQ_IN2, IRQ_IN3, 0, 0, 0,
- IRQ_DOORBELLHOST, IRQ_DMA1, IRQ_DMA2, IRQ_PCI
-};
-
-static int personal_server_map_irq(const struct pci_dev *dev, u8 slot, u8 pin)
-{
- unsigned char line;
-
- pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &line);
-
- if (line > 0x40 && line <= 0x5f) {
- /* line corresponds to the bit controlling this interrupt
- * in the footbridge. Ignore the first 8 interrupt bits,
- * look up the rest in the map. IN0 is bit number 8
- */
- return irqmap_personal_server[(line & 0x1f) - 8];
- } else if (line == 0) {
- /* no interrupt */
- return 0;
- } else
- return irqmap_personal_server[(line - 1) & 3];
-}
-
-static struct hw_pci personal_server_pci __initdata = {
- .map_irq = personal_server_map_irq,
- .nr_controllers = 1,
- .ops = &dc21285_ops,
- .setup = dc21285_setup,
- .preinit = dc21285_preinit,
- .postinit = dc21285_postinit,
-};
-
-static int __init personal_pci_init(void)
-{
- if (machine_is_personal_server())
- pci_common_init(&personal_server_pci);
- return 0;
-}
-
-subsys_initcall(personal_pci_init);
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * linux/arch/arm/mach-footbridge/personal.c
- *
- * Personal server (Skiff) machine fixup
- */
-#include <linux/init.h>
-#include <linux/spinlock.h>
-
-#include <asm/hardware/dec21285.h>
-#include <asm/mach-types.h>
-
-#include <asm/mach/arch.h>
-
-#include "common.h"
-
-MACHINE_START(PERSONAL_SERVER, "Compaq-PersonalServer")
- /* Maintainer: Jamey Hicks / George France */
- .atag_offset = 0x100,
- .map_io = footbridge_map_io,
- .init_irq = footbridge_init_irq,
- .init_time = footbridge_timer_init,
- .restart = footbridge_restart,
-MACHINE_END
-
};
/*
- * Both r8169 chips on the n2100 exhibit PCI parity problems. Set
- * the ->broken_parity_status flag for both ports so that the r8169
- * driver knows it should ignore error interrupts.
+ * Both r8169 chips on the n2100 exhibit PCI parity problems. Turn
+ * off parity reporting for both ports so we don't get error interrupts
+ * for them.
*/
static void n2100_fixup_r8169(struct pci_dev *dev)
{
if (dev->bus->number == 0 &&
(dev->devfn == PCI_DEVFN(1, 0) ||
dev->devfn == PCI_DEVFN(2, 0)))
- dev->broken_parity_status = 1;
+ pci_disable_parity(dev);
}
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_REALTEK, PCI_ANY_ID, n2100_fixup_r8169);
* processor. We fix this by performing an invalidate, rather than a
* clean + invalidate, before jumping into the kernel.
*
- * This function is cloned from arch/arm/mach-tegra/headsmp.S, and needs
- * to be called for both secondary cores startup and primary core resume
- * procedures.
+ * This function needs to be called for both secondary cores startup and
+ * primary core resume procedures.
*/
ENTRY(v7_invalidate_l1)
- mov r0, #0
- mcr p15, 2, r0, c0, c0, 0
- mrc p15, 1, r0, c0, c0, 0
-
- movw r1, #0x7fff
- and r2, r1, r0, lsr #13
+ mov r0, #0
+ mcr p15, 2, r0, c0, c0, 0 @ select L1 data cache in CSSELR
+ isb
+ mrc p15, 1, r0, c0, c0, 0 @ read cache geometry from CCSIDR
- movw r1, #0x3ff
+ movw r3, #0x3ff
+ and r3, r3, r0, lsr #3 @ 'Associativity' in CCSIDR[12:3]
+ clz r1, r3 @ WayShift
+ mov r2, #1
+ mov r3, r3, lsl r1 @ NumWays-1 shifted into bits [31:...]
+ movs r1, r2, lsl r1 @ #1 shifted left by same amount
+ moveq r1, #1 @ r1 needs value > 0 even if only 1 way
- and r3, r1, r0, lsr #3 @ NumWays - 1
- add r2, r2, #1 @ NumSets
+ and r2, r0, #0x7
+ add r2, r2, #4 @ SetShift
- and r0, r0, #0x7
- add r0, r0, #4 @ SetShift
+1: movw ip, #0x7fff
+ and r0, ip, r0, lsr #13 @ 'NumSets' in CCSIDR[27:13]
- clz r1, r3 @ WayShift
- add r4, r3, #1 @ NumWays
-1: sub r2, r2, #1 @ NumSets--
- mov r3, r4 @ Temp = NumWays
-2: subs r3, r3, #1 @ Temp--
- mov r5, r3, lsl r1
- mov r6, r2, lsl r0
- orr r5, r5, r6 @ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
- mcr p15, 0, r5, c7, c6, 2
- bgt 2b
- cmp r2, #0
- bgt 1b
- dsb st
- isb
- ret lr
+2: mov ip, r0, lsl r2 @ NumSet << SetShift
+ orr ip, ip, r3 @ Reg = (Temp<<WayShift)|(NumSets<<SetShift)
+ mcr p15, 0, ip, c7, c6, 2
+ subs r0, r0, #1 @ Set--
+ bpl 2b
+ subs r3, r3, r1 @ Way--
+ bcc 3f
+ mrc p15, 1, r0, c0, c0, 0 @ re-read cache geometry from CCSIDR
+ b 1b
+3: dsb st
+ isb
+ ret lr
ENDPROC(v7_invalidate_l1)
/*
note_page(&st, 0, 0, 0, NULL);
}
-static void ptdump_initialize(void)
+static void __init ptdump_initialize(void)
{
unsigned i, j;
pr_info("Checked W+X mappings: passed, no W+X pages found\n");
}
-static int ptdump_init(void)
+static int __init ptdump_init(void)
{
ptdump_initialize();
ptdump_debugfs_register(&kernel_ptdump_info, "kernel_page_tables");
void __init mem_init(void)
{
#ifdef CONFIG_ARM_LPAE
- swiotlb_init(1);
+ if (swiotlb_force == SWIOTLB_FORCE ||
+ max_pfn > arm_dma_pfn_limit)
+ swiotlb_init(1);
+ else
+ swiotlb_force = SWIOTLB_NO_FORCE;
#endif
set_max_mapnr(pfn_to_page(max_pfn) - mem_map);
return 0;
}
-static int kernel_set_to_readonly __read_mostly;
-
void mark_rodata_ro(void)
{
- kernel_set_to_readonly = 1;
stop_machine(__mark_rodata_ro, NULL, NULL);
debug_checkwx();
}
-void set_kernel_text_rw(void)
-{
- if (!kernel_set_to_readonly)
- return;
-
- set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), false,
- current->active_mm);
-}
-
-void set_kernel_text_ro(void)
-{
- if (!kernel_set_to_readonly)
- return;
-
- set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), true,
- current->active_mm);
-}
-
#else
static inline void fix_kernmem_perms(void) { }
#endif /* CONFIG_STRICT_KERNEL_RWX */
#endif
+ @
+ @ Invoke the v7_invalidate_l1() function, which adheres to the AAPCS
+ @ rules, and so it may corrupt registers that we need to preserve.
+ @
+ .macro do_invalidate_l1
+ mov r6, r1
+ mov r7, r2
+ mov r10, lr
+ bl v7_invalidate_l1 @ corrupts {r0-r3, ip, lr}
+ mov r1, r6
+ mov r2, r7
+ mov lr, r10
+ .endm
+
/*
* __v7_setup
*
__v7_ca9mp_setup:
__v7_cr7mp_setup:
__v7_cr8mp_setup:
+ do_invalidate_l1
mov r10, #(1 << 0) @ Cache/TLB ops broadcasting
b 1f
__v7_ca7mp_setup:
__v7_ca15mp_setup:
__v7_b15mp_setup:
__v7_ca17mp_setup:
+ do_invalidate_l1
mov r10, #0
-1: adr r0, __v7_setup_stack_ptr
- ldr r12, [r0]
- add r12, r12, r0 @ the local stack
- stmia r12, {r1-r6, lr} @ v7_invalidate_l1 touches r0-r6
- bl v7_invalidate_l1
- ldmia r12, {r1-r6, lr}
+1:
#ifdef CONFIG_SMP
orr r10, r10, #(1 << 6) @ Enable SMP/nAMP mode
ALT_SMP(mrc p15, 0, r0, c1, c0, 1)
#endif /* CONFIG_CPU_PJ4B */
__v7_setup:
- adr r0, __v7_setup_stack_ptr
- ldr r12, [r0]
- add r12, r12, r0 @ the local stack
- stmia r12, {r1-r6, lr} @ v7_invalidate_l1 touches r0-r6
- bl v7_invalidate_l1
- ldmia r12, {r1-r6, lr}
+ do_invalidate_l1
__v7_setup_cont:
and r0, r9, #0xff000000 @ ARM?
orr r0, r0, r6 @ set them
THUMB( orr r0, r0, #1 << 30 ) @ Thumb exceptions
ret lr @ return to head.S:__ret
-
- .align 2
-__v7_setup_stack_ptr:
- .word PHYS_RELATIVE(__v7_setup_stack, .)
ENDPROC(__v7_setup)
- .bss
- .align 2
-__v7_setup_stack:
- .space 4 * 7 @ 7 registers
-
__INITDATA
.weak cpu_v7_bugs_init
ptdump_walk_pgd(m, info);
return 0;
}
+DEFINE_SHOW_ATTRIBUTE(ptdump);
-static int ptdump_open(struct inode *inode, struct file *file)
-{
- return single_open(file, ptdump_show, inode->i_private);
-}
-
-static const struct file_operations ptdump_fops = {
- .open = ptdump_open,
- .read = seq_read,
- .llseek = seq_lseek,
- .release = single_release,
-};
-
-void ptdump_debugfs_register(struct ptdump_info *info, const char *name)
+void __init ptdump_debugfs_register(struct ptdump_info *info, const char *name)
{
debugfs_create_file(name, 0400, NULL, info, &ptdump_fops);
}
TEST_GROUP("Data-processing (register), (register-shifted register), (immediate)")
#define _DATA_PROCESSING_DNM(op,s,val) \
- TEST_RR( op "eq" s " r0, r",1, VAL1,", r",2, val, "") \
- TEST_RR( op "ne" s " r1, r",1, VAL1,", r",2, val, ", lsl #3") \
- TEST_RR( op "cs" s " r2, r",3, VAL1,", r",2, val, ", lsr #4") \
- TEST_RR( op "cc" s " r3, r",3, VAL1,", r",2, val, ", asr #5") \
- TEST_RR( op "mi" s " r4, r",5, VAL1,", r",2, N(val),", asr #6") \
- TEST_RR( op "pl" s " r5, r",5, VAL1,", r",2, val, ", ror #7") \
- TEST_RR( op "vs" s " r6, r",7, VAL1,", r",2, val, ", rrx") \
- TEST_R( op "vc" s " r6, r",7, VAL1,", pc, lsl #3") \
- TEST_R( op "vc" s " r6, r",7, VAL1,", sp, lsr #4") \
- TEST_R( op "vc" s " r6, pc, r",7, VAL1,", asr #5") \
- TEST_R( op "vc" s " r6, sp, r",7, VAL1,", ror #6") \
- TEST_RRR( op "hi" s " r8, r",9, VAL1,", r",14,val, ", lsl r",0, 3,"")\
- TEST_RRR( op "ls" s " r9, r",9, VAL1,", r",14,val, ", lsr r",7, 4,"")\
- TEST_RRR( op "ge" s " r10, r",11,VAL1,", r",14,val, ", asr r",7, 5,"")\
- TEST_RRR( op "lt" s " r11, r",11,VAL1,", r",14,N(val),", asr r",7, 6,"")\
- TEST_RR( op "gt" s " r12, r13" ", r",14,val, ", ror r",14,7,"")\
- TEST_RR( op "le" s " r14, r",0, val, ", r13" ", lsl r",14,8,"")\
- TEST_R( op "eq" s " r0, r",11,VAL1,", #0xf5") \
- TEST_R( op "ne" s " r11, r",0, VAL1,", #0xf5000000") \
+ TEST_RR( op s "eq r0, r",1, VAL1,", r",2, val, "") \
+ TEST_RR( op s "ne r1, r",1, VAL1,", r",2, val, ", lsl #3") \
+ TEST_RR( op s "cs r2, r",3, VAL1,", r",2, val, ", lsr #4") \
+ TEST_RR( op s "cc r3, r",3, VAL1,", r",2, val, ", asr #5") \
+ TEST_RR( op s "mi r4, r",5, VAL1,", r",2, N(val),", asr #6") \
+ TEST_RR( op s "pl r5, r",5, VAL1,", r",2, val, ", ror #7") \
+ TEST_RR( op s "vs r6, r",7, VAL1,", r",2, val, ", rrx") \
+ TEST_R( op s "vc r6, r",7, VAL1,", pc, lsl #3") \
+ TEST_R( op s "vc r6, r",7, VAL1,", sp, lsr #4") \
+ TEST_R( op s "vc r6, pc, r",7, VAL1,", asr #5") \
+ TEST_R( op s "vc r6, sp, r",7, VAL1,", ror #6") \
+ TEST_RRR( op s "hi r8, r",9, VAL1,", r",14,val, ", lsl r",0, 3,"")\
+ TEST_RRR( op s "ls r9, r",9, VAL1,", r",14,val, ", lsr r",7, 4,"")\
+ TEST_RRR( op s "ge r10, r",11,VAL1,", r",14,val, ", asr r",7, 5,"")\
+ TEST_RRR( op s "lt r11, r",11,VAL1,", r",14,N(val),", asr r",7, 6,"")\
+ TEST_RR( op s "gt r12, r13" ", r",14,val, ", ror r",14,7,"")\
+ TEST_RR( op s "le r14, r",0, val, ", r13" ", lsl r",14,8,"")\
+ TEST_R( op s "eq r0, r",11,VAL1,", #0xf5") \
+ TEST_R( op s "ne r11, r",0, VAL1,", #0xf5000000") \
TEST_R( op s " r7, r",8, VAL2,", #0x000af000") \
TEST( op s " r4, pc" ", #0x00005a00")
TEST_R( op " r",8, VAL2,", #0x000af000")
#define _DATA_PROCESSING_DM(op,s,val) \
- TEST_R( op "eq" s " r0, r",1, val, "") \
- TEST_R( op "ne" s " r1, r",1, val, ", lsl #3") \
- TEST_R( op "cs" s " r2, r",3, val, ", lsr #4") \
- TEST_R( op "cc" s " r3, r",3, val, ", asr #5") \
- TEST_R( op "mi" s " r4, r",5, N(val),", asr #6") \
- TEST_R( op "pl" s " r5, r",5, val, ", ror #7") \
- TEST_R( op "vs" s " r6, r",10,val, ", rrx") \
- TEST( op "vs" s " r7, pc, lsl #3") \
- TEST( op "vs" s " r7, sp, lsr #4") \
- TEST_RR( op "vc" s " r8, r",7, val, ", lsl r",0, 3,"") \
- TEST_RR( op "hi" s " r9, r",9, val, ", lsr r",7, 4,"") \
- TEST_RR( op "ls" s " r10, r",9, val, ", asr r",7, 5,"") \
- TEST_RR( op "ge" s " r11, r",11,N(val),", asr r",7, 6,"") \
- TEST_RR( op "lt" s " r12, r",11,val, ", ror r",14,7,"") \
- TEST_R( op "gt" s " r14, r13" ", lsl r",14,8,"") \
- TEST( op "eq" s " r0, #0xf5") \
- TEST( op "ne" s " r11, #0xf5000000") \
+ TEST_R( op s "eq r0, r",1, val, "") \
+ TEST_R( op s "ne r1, r",1, val, ", lsl #3") \
+ TEST_R( op s "cs r2, r",3, val, ", lsr #4") \
+ TEST_R( op s "cc r3, r",3, val, ", asr #5") \
+ TEST_R( op s "mi r4, r",5, N(val),", asr #6") \
+ TEST_R( op s "pl r5, r",5, val, ", ror #7") \
+ TEST_R( op s "vs r6, r",10,val, ", rrx") \
+ TEST( op s "vs r7, pc, lsl #3") \
+ TEST( op s "vs r7, sp, lsr #4") \
+ TEST_RR( op s "vc r8, r",7, val, ", lsl r",0, 3,"") \
+ TEST_RR( op s "hi r9, r",9, val, ", lsr r",7, 4,"") \
+ TEST_RR( op s "ls r10, r",9, val, ", asr r",7, 5,"") \
+ TEST_RR( op s "ge r11, r",11,N(val),", asr r",7, 6,"") \
+ TEST_RR( op s "lt r12, r",11,val, ", ror r",14,7,"") \
+ TEST_R( op s "gt r14, r13" ", lsl r",14,8,"") \
+ TEST( op s "eq r0, #0xf5") \
+ TEST( op s "ne r11, #0xf5000000") \
TEST( op s " r7, #0x000af000") \
TEST( op s " r4, #0x00005a00")
/* Data-processing with PC as a target and status registers updated */
TEST_UNSUPPORTED("movs pc, r1")
- TEST_UNSUPPORTED("movs pc, r1, lsl r2")
+ TEST_UNSUPPORTED(__inst_arm(0xe1b0f211) " @movs pc, r1, lsl r2")
TEST_UNSUPPORTED("movs pc, #0x10000")
TEST_UNSUPPORTED("adds pc, lr, r1")
- TEST_UNSUPPORTED("adds pc, lr, r1, lsl r2")
+ TEST_UNSUPPORTED(__inst_arm(0xe09ef211) " @adds pc, lr, r1, lsl r2")
TEST_UNSUPPORTED("adds pc, lr, #4")
/* Data-processing with SP as target */
TEST_UNSUPPORTED(__inst_arm(0xe000029f) " @ mul r0, pc, r2")
TEST_UNSUPPORTED(__inst_arm(0xe0000f91) " @ mul r0, r1, pc")
TEST_RR( "muls r0, r",1, VAL1,", r",2, VAL2,"")
- TEST_RR( "mullss r7, r",8, VAL2,", r",9, VAL2,"")
+ TEST_RR( "mulsls r7, r",8, VAL2,", r",9, VAL2,"")
TEST_R( "muls lr, r",4, VAL3,", r13")
TEST_UNSUPPORTED(__inst_arm(0xe01f0291) " @ muls pc, r1, r2")
TEST_RR( "mla lr, r",1, VAL2,", r",2, VAL3,", r13")
TEST_UNSUPPORTED(__inst_arm(0xe02f3291) " @ mla pc, r1, r2, r3")
TEST_RRR( "mlas r0, r",1, VAL1,", r",2, VAL2,", r",3, VAL3,"")
- TEST_RRR( "mlahis r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
+ TEST_RRR( "mlashi r7, r",8, VAL3,", r",9, VAL1,", r",10, VAL2,"")
TEST_RR( "mlas lr, r",1, VAL2,", r",2, VAL3,", r13")
TEST_UNSUPPORTED(__inst_arm(0xe03f3291) " @ mlas pc, r1, r2, r3")
TEST_UNSUPPORTED(__inst_arm(0xe081f392) " @ umull pc, r1, r2, r3")
TEST_UNSUPPORTED(__inst_arm(0xe08f1392) " @ umull r1, pc, r2, r3")
TEST_RR( "umulls r0, r1, r",2, VAL1,", r",3, VAL2,"")
- TEST_RR( "umulllss r7, r8, r",9, VAL2,", r",10, VAL1,"")
+ TEST_RR( "umullsls r7, r8, r",9, VAL2,", r",10, VAL1,"")
TEST_R( "umulls lr, r12, r",11,VAL3,", r13")
TEST_UNSUPPORTED(__inst_arm(0xe091f392) " @ umulls pc, r1, r2, r3")
TEST_UNSUPPORTED(__inst_arm(0xe09f1392) " @ umulls r1, pc, r2, r3")
TEST_UNSUPPORTED(__inst_arm(0xe0af1392) " @ umlal pc, r1, r2, r3")
TEST_UNSUPPORTED(__inst_arm(0xe0a1f392) " @ umlal r1, pc, r2, r3")
TEST_RRRR( "umlals r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
- TEST_RRRR( "umlalles r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+ TEST_RRRR( "umlalsle r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
TEST_RRR( "umlals r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
TEST_UNSUPPORTED(__inst_arm(0xe0bf1392) " @ umlals pc, r1, r2, r3")
TEST_UNSUPPORTED(__inst_arm(0xe0b1f392) " @ umlals r1, pc, r2, r3")
TEST_UNSUPPORTED(__inst_arm(0xe0c1f392) " @ smull pc, r1, r2, r3")
TEST_UNSUPPORTED(__inst_arm(0xe0cf1392) " @ smull r1, pc, r2, r3")
TEST_RR( "smulls r0, r1, r",2, VAL1,", r",3, VAL2,"")
- TEST_RR( "smulllss r7, r8, r",9, VAL2,", r",10, VAL1,"")
+ TEST_RR( "smullsls r7, r8, r",9, VAL2,", r",10, VAL1,"")
TEST_R( "smulls lr, r12, r",11,VAL3,", r13")
TEST_UNSUPPORTED(__inst_arm(0xe0d1f392) " @ smulls pc, r1, r2, r3")
TEST_UNSUPPORTED(__inst_arm(0xe0df1392) " @ smulls r1, pc, r2, r3")
TEST_UNSUPPORTED(__inst_arm(0xe0ef1392) " @ smlal pc, r1, r2, r3")
TEST_UNSUPPORTED(__inst_arm(0xe0e1f392) " @ smlal r1, pc, r2, r3")
TEST_RRRR( "smlals r",0, VAL1,", r",1, VAL2,", r",2, VAL3,", r",3, VAL4)
- TEST_RRRR( "smlalles r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
+ TEST_RRRR( "smlalsle r",8, VAL4,", r",9, VAL1,", r",10,VAL2,", r",11,VAL3)
TEST_RRR( "smlals r",14,VAL3,", r",7, VAL4,", r",5, VAL1,", r13")
TEST_UNSUPPORTED(__inst_arm(0xe0ff1392) " @ smlals pc, r1, r2, r3")
TEST_UNSUPPORTED(__inst_arm(0xe0f0f392) " @ smlals r0, pc, r2, r3")
TEST_UNSUPPORTED(__inst_arm(0xe10f0091) " @ swp r0, r1, [pc]")
#if __LINUX_ARM_ARCH__ < 6
TEST_RP("swpb lr, r",7,VAL2,", [r",8,0,"]")
- TEST_R( "swpvsb r0, r",1,VAL1,", [sp]")
+ TEST_R( "swpbvs r0, r",1,VAL1,", [sp]")
#else
TEST_UNSUPPORTED(__inst_arm(0xe148e097) " @ swpb lr, r7, [r8]")
TEST_UNSUPPORTED(__inst_arm(0x614d0091) " @ swpvsb r0, r1, [sp]")
TEST_GROUP("Extra load/store instructions")
TEST_RPR( "strh r",0, VAL1,", [r",1, 48,", -r",2, 24,"]")
- TEST_RPR( "streqh r",14,VAL2,", [r",11,0, ", r",12, 48,"]")
- TEST_UNSUPPORTED( "streqh r14, [r13, r12]")
- TEST_UNSUPPORTED( "streqh r14, [r12, r13]")
+ TEST_RPR( "strheq r",14,VAL2,", [r",11,0, ", r",12, 48,"]")
+ TEST_UNSUPPORTED( "strheq r14, [r13, r12]")
+ TEST_UNSUPPORTED( "strheq r14, [r12, r13]")
TEST_RPR( "strh r",1, VAL1,", [r",2, 24,", r",3, 48,"]!")
- TEST_RPR( "strneh r",12,VAL2,", [r",11,48,", -r",10,24,"]!")
+ TEST_RPR( "strhne r",12,VAL2,", [r",11,48,", -r",10,24,"]!")
TEST_RPR( "strh r",2, VAL1,", [r",3, 24,"], r",4, 48,"")
TEST_RPR( "strh r",10,VAL2,", [r",9, 48,"], -r",11,24,"")
TEST_UNSUPPORTED(__inst_arm(0xe1afc0ba) " @ strh r12, [pc, r10]!")
TEST_UNSUPPORTED(__inst_arm(0xe089a0bf) " @ strh r10, [r9], pc")
TEST_PR( "ldrh r0, [r",0, 48,", -r",2, 24,"]")
- TEST_PR( "ldrcsh r14, [r",13,0, ", r",12, 48,"]")
+ TEST_PR( "ldrhcs r14, [r",13,0, ", r",12, 48,"]")
TEST_PR( "ldrh r1, [r",2, 24,", r",3, 48,"]!")
- TEST_PR( "ldrcch r12, [r",11,48,", -r",10,24,"]!")
+ TEST_PR( "ldrhcc r12, [r",11,48,", -r",10,24,"]!")
TEST_PR( "ldrh r2, [r",3, 24,"], r",4, 48,"")
TEST_PR( "ldrh r10, [r",9, 48,"], -r",11,24,"")
TEST_UNSUPPORTED(__inst_arm(0xe1bfc0ba) " @ ldrh r12, [pc, r10]!")
TEST_UNSUPPORTED(__inst_arm(0xe099a0bf) " @ ldrh r10, [r9], pc")
TEST_RP( "strh r",0, VAL1,", [r",1, 24,", #-2]")
- TEST_RP( "strmih r",14,VAL2,", [r",13,0, ", #2]")
+ TEST_RP( "strhmi r",14,VAL2,", [r",13,0, ", #2]")
TEST_RP( "strh r",1, VAL1,", [r",2, 24,", #4]!")
- TEST_RP( "strplh r",12,VAL2,", [r",11,24,", #-4]!")
+ TEST_RP( "strhpl r",12,VAL2,", [r",11,24,", #-4]!")
TEST_RP( "strh r",2, VAL1,", [r",3, 24,"], #48")
TEST_RP( "strh r",10,VAL2,", [r",9, 64,"], #-48")
TEST_RP( "strh r",3, VAL1,", [r",13,TEST_MEMORY_SIZE,", #-"__stringify(MAX_STACK_SIZE)"]!")
TEST_UNSUPPORTED(__inst_arm(0xe0c9f3b0) " @ strh pc, [r9], #48")
TEST_P( "ldrh r0, [r",0, 24,", #-2]")
- TEST_P( "ldrvsh r14, [r",13,0, ", #2]")
+ TEST_P( "ldrhvs r14, [r",13,0, ", #2]")
TEST_P( "ldrh r1, [r",2, 24,", #4]!")
- TEST_P( "ldrvch r12, [r",11,24,", #-4]!")
+ TEST_P( "ldrhvc r12, [r",11,24,", #-4]!")
TEST_P( "ldrh r2, [r",3, 24,"], #48")
TEST_P( "ldrh r10, [r",9, 64,"], #-48")
TEST( "ldrh r0, [pc, #0]")
TEST_UNSUPPORTED(__inst_arm(0xe0d9f3b0) " @ ldrh pc, [r9], #48")
TEST_PR( "ldrsb r0, [r",0, 48,", -r",2, 24,"]")
- TEST_PR( "ldrhisb r14, [r",13,0,", r",12, 48,"]")
+ TEST_PR( "ldrsbhi r14, [r",13,0,", r",12, 48,"]")
TEST_PR( "ldrsb r1, [r",2, 24,", r",3, 48,"]!")
- TEST_PR( "ldrlssb r12, [r",11,48,", -r",10,24,"]!")
+ TEST_PR( "ldrsbls r12, [r",11,48,", -r",10,24,"]!")
TEST_PR( "ldrsb r2, [r",3, 24,"], r",4, 48,"")
TEST_PR( "ldrsb r10, [r",9, 48,"], -r",11,24,"")
TEST_UNSUPPORTED(__inst_arm(0xe1bfc0da) " @ ldrsb r12, [pc, r10]!")
TEST_UNSUPPORTED(__inst_arm(0xe099f0db) " @ ldrsb pc, [r9], r11")
TEST_P( "ldrsb r0, [r",0, 24,", #-1]")
- TEST_P( "ldrgesb r14, [r",13,0, ", #1]")
+ TEST_P( "ldrsbge r14, [r",13,0, ", #1]")
TEST_P( "ldrsb r1, [r",2, 24,", #4]!")
- TEST_P( "ldrltsb r12, [r",11,24,", #-4]!")
+ TEST_P( "ldrsblt r12, [r",11,24,", #-4]!")
TEST_P( "ldrsb r2, [r",3, 24,"], #48")
TEST_P( "ldrsb r10, [r",9, 64,"], #-48")
TEST( "ldrsb r0, [pc, #0]")
TEST_UNSUPPORTED(__inst_arm(0xe0d9f3d0) " @ ldrsb pc, [r9], #48")
TEST_PR( "ldrsh r0, [r",0, 48,", -r",2, 24,"]")
- TEST_PR( "ldrgtsh r14, [r",13,0, ", r",12, 48,"]")
+ TEST_PR( "ldrshgt r14, [r",13,0, ", r",12, 48,"]")
TEST_PR( "ldrsh r1, [r",2, 24,", r",3, 48,"]!")
- TEST_PR( "ldrlesh r12, [r",11,48,", -r",10,24,"]!")
+ TEST_PR( "ldrshle r12, [r",11,48,", -r",10,24,"]!")
TEST_PR( "ldrsh r2, [r",3, 24,"], r",4, 48,"")
TEST_PR( "ldrsh r10, [r",9, 48,"], -r",11,24,"")
TEST_UNSUPPORTED(__inst_arm(0xe1bfc0fa) " @ ldrsh r12, [pc, r10]!")
TEST_UNSUPPORTED(__inst_arm(0xe099f0fb) " @ ldrsh pc, [r9], r11")
TEST_P( "ldrsh r0, [r",0, 24,", #-1]")
- TEST_P( "ldreqsh r14, [r",13,0 ,", #1]")
+ TEST_P( "ldrsheq r14, [r",13,0 ,", #1]")
TEST_P( "ldrsh r1, [r",2, 24,", #4]!")
- TEST_P( "ldrnesh r12, [r",11,24,", #-4]!")
+ TEST_P( "ldrshne r12, [r",11,24,", #-4]!")
TEST_P( "ldrsh r2, [r",3, 24,"], #48")
TEST_P( "ldrsh r10, [r",9, 64,"], #-48")
TEST( "ldrsh r0, [pc, #0]")
#if __LINUX_ARM_ARCH__ >= 5
TEST_RPR( "strd r",0, VAL1,", [r",1, 48,", -r",2,24,"]")
- TEST_RPR( "strccd r",8, VAL2,", [r",11,0, ", r",12,48,"]")
- TEST_UNSUPPORTED( "strccd r8, [r13, r12]")
- TEST_UNSUPPORTED( "strccd r8, [r12, r13]")
+ TEST_RPR( "strdcc r",8, VAL2,", [r",11,0, ", r",12,48,"]")
+ TEST_UNSUPPORTED( "strdcc r8, [r13, r12]")
+ TEST_UNSUPPORTED( "strdcc r8, [r12, r13]")
TEST_RPR( "strd r",4, VAL1,", [r",2, 24,", r",3, 48,"]!")
- TEST_RPR( "strcsd r",12,VAL2,", [r",11,48,", -r",10,24,"]!")
- TEST_RPR( "strd r",2, VAL1,", [r",5, 24,"], r",4,48,"")
- TEST_RPR( "strd r",10,VAL2,", [r",9, 48,"], -r",7,24,"")
+ TEST_RPR( "strdcs r",12,VAL2,", r13, [r",11,48,", -r",10,24,"]!")
+ TEST_RPR( "strd r",2, VAL1,", r3, [r",5, 24,"], r",4,48,"")
+ TEST_RPR( "strd r",10,VAL2,", r11, [r",9, 48,"], -r",7,24,"")
TEST_UNSUPPORTED(__inst_arm(0xe1afc0fa) " @ strd r12, [pc, r10]!")
TEST_PR( "ldrd r0, [r",0, 48,", -r",2,24,"]")
- TEST_PR( "ldrmid r8, [r",13,0, ", r",12,48,"]")
+ TEST_PR( "ldrdmi r8, [r",13,0, ", r",12,48,"]")
TEST_PR( "ldrd r4, [r",2, 24,", r",3, 48,"]!")
- TEST_PR( "ldrpld r6, [r",11,48,", -r",10,24,"]!")
- TEST_PR( "ldrd r2, [r",5, 24,"], r",4,48,"")
- TEST_PR( "ldrd r10, [r",9,48,"], -r",7,24,"")
+ TEST_PR( "ldrdpl r6, [r",11,48,", -r",10,24,"]!")
+ TEST_PR( "ldrd r2, r3, [r",5, 24,"], r",4,48,"")
+ TEST_PR( "ldrd r10, r11, [r",9,48,"], -r",7,24,"")
TEST_UNSUPPORTED(__inst_arm(0xe1afc0da) " @ ldrd r12, [pc, r10]!")
TEST_UNSUPPORTED(__inst_arm(0xe089f0db) " @ ldrd pc, [r9], r11")
TEST_UNSUPPORTED(__inst_arm(0xe089e0db) " @ ldrd lr, [r9], r11")
TEST_UNSUPPORTED(__inst_arm(0xe089c0df) " @ ldrd r12, [r9], pc")
TEST_RP( "strd r",0, VAL1,", [r",1, 24,", #-8]")
- TEST_RP( "strvsd r",8, VAL2,", [r",13,0, ", #8]")
+ TEST_RP( "strdvs r",8, VAL2,", [r",13,0, ", #8]")
TEST_RP( "strd r",4, VAL1,", [r",2, 24,", #16]!")
- TEST_RP( "strvcd r",12,VAL2,", [r",11,24,", #-16]!")
+ TEST_RP( "strdvc r",12,VAL2,", r13, [r",11,24,", #-16]!")
TEST_RP( "strd r",2, VAL1,", [r",4, 24,"], #48")
TEST_RP( "strd r",10,VAL2,", [r",9, 64,"], #-48")
TEST_RP( "strd r",6, VAL1,", [r",13,TEST_MEMORY_SIZE,", #-"__stringify(MAX_STACK_SIZE)"]!")
TEST_UNSUPPORTED(__inst_arm(0xe1efc3f0) " @ strd r12, [pc, #48]!")
TEST_P( "ldrd r0, [r",0, 24,", #-8]")
- TEST_P( "ldrhid r8, [r",13,0, ", #8]")
+ TEST_P( "ldrdhi r8, [r",13,0, ", #8]")
TEST_P( "ldrd r4, [r",2, 24,", #16]!")
- TEST_P( "ldrlsd r6, [r",11,24,", #-16]!")
+ TEST_P( "ldrdls r6, [r",11,24,", #-16]!")
TEST_P( "ldrd r2, [r",5, 24,"], #48")
TEST_P( "ldrd r10, [r",9,6,"], #-48")
TEST_UNSUPPORTED(__inst_arm(0xe1efc3d0) " @ ldrd r12, [pc, #48]!")
TEST_GROUP("Branch, branch with link, and block data transfer")
TEST_P( "stmda r",0, 16*4,", {r0}")
- TEST_P( "stmeqda r",4, 16*4,", {r0-r15}")
- TEST_P( "stmneda r",8, 16*4,"!, {r8-r15}")
+ TEST_P( "stmdaeq r",4, 16*4,", {r0-r15}")
+ TEST_P( "stmdane r",8, 16*4,"!, {r8-r15}")
TEST_P( "stmda r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
TEST_P( "stmda r",13,0, "!, {pc}")
TEST_P( "ldmda r",0, 16*4,", {r0}")
- TEST_BF_P("ldmcsda r",4, 15*4,", {r0-r15}")
- TEST_BF_P("ldmccda r",7, 15*4,"!, {r8-r15}")
+ TEST_BF_P("ldmdacs r",4, 15*4,", {r0-r15}")
+ TEST_BF_P("ldmdacc r",7, 15*4,"!, {r8-r15}")
TEST_P( "ldmda r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
TEST_BF_P("ldmda r",14,15*4,"!, {pc}")
TEST_P( "stmia r",0, 16*4,", {r0}")
- TEST_P( "stmmiia r",4, 16*4,", {r0-r15}")
- TEST_P( "stmplia r",8, 16*4,"!, {r8-r15}")
+ TEST_P( "stmiami r",4, 16*4,", {r0-r15}")
+ TEST_P( "stmiapl r",8, 16*4,"!, {r8-r15}")
TEST_P( "stmia r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
TEST_P( "stmia r",14,0, "!, {pc}")
TEST_P( "ldmia r",0, 16*4,", {r0}")
- TEST_BF_P("ldmvsia r",4, 0, ", {r0-r15}")
- TEST_BF_P("ldmvcia r",7, 8*4, "!, {r8-r15}")
+ TEST_BF_P("ldmiavs r",4, 0, ", {r0-r15}")
+ TEST_BF_P("ldmiavc r",7, 8*4, "!, {r8-r15}")
TEST_P( "ldmia r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
TEST_BF_P("ldmia r",14,15*4,"!, {pc}")
TEST_P( "stmdb r",0, 16*4,", {r0}")
- TEST_P( "stmhidb r",4, 16*4,", {r0-r15}")
- TEST_P( "stmlsdb r",8, 16*4,"!, {r8-r15}")
+ TEST_P( "stmdbhi r",4, 16*4,", {r0-r15}")
+ TEST_P( "stmdbls r",8, 16*4,"!, {r8-r15}")
TEST_P( "stmdb r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
TEST_P( "stmdb r",13,4, "!, {pc}")
TEST_P( "ldmdb r",0, 16*4,", {r0}")
- TEST_BF_P("ldmgedb r",4, 16*4,", {r0-r15}")
- TEST_BF_P("ldmltdb r",7, 16*4,"!, {r8-r15}")
+ TEST_BF_P("ldmdbge r",4, 16*4,", {r0-r15}")
+ TEST_BF_P("ldmdblt r",7, 16*4,"!, {r8-r15}")
TEST_P( "ldmdb r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
TEST_BF_P("ldmdb r",14,16*4,"!, {pc}")
TEST_P( "stmib r",0, 16*4,", {r0}")
- TEST_P( "stmgtib r",4, 16*4,", {r0-r15}")
- TEST_P( "stmleib r",8, 16*4,"!, {r8-r15}")
+ TEST_P( "stmibgt r",4, 16*4,", {r0-r15}")
+ TEST_P( "stmible r",8, 16*4,"!, {r8-r15}")
TEST_P( "stmib r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
TEST_P( "stmib r",13,-4, "!, {pc}")
TEST_P( "ldmib r",0, 16*4,", {r0}")
- TEST_BF_P("ldmeqib r",4, -4,", {r0-r15}")
- TEST_BF_P("ldmneib r",7, 7*4,"!, {r8-r15}")
+ TEST_BF_P("ldmibeq r",4, -4,", {r0-r15}")
+ TEST_BF_P("ldmibne r",7, 7*4,"!, {r8-r15}")
TEST_P( "ldmib r",12,16*4,"!, {r1,r3,r5,r7,r8-r11,r14}")
TEST_BF_P("ldmib r",14,14*4,"!, {pc}")
TEST_P( "stmdb r",13,16*4,"!, {r3-r12,lr}")
- TEST_P( "stmeqdb r",13,16*4,"!, {r3-r12}")
- TEST_P( "stmnedb r",2, 16*4,", {r3-r12,lr}")
+ TEST_P( "stmdbeq r",13,16*4,"!, {r3-r12}")
+ TEST_P( "stmdbne r",2, 16*4,", {r3-r12,lr}")
TEST_P( "stmdb r",13,16*4,"!, {r2-r12,lr}")
TEST_P( "stmdb r",0, 16*4,", {r0-r12}")
TEST_P( "stmdb r",0, 16*4,", {r0-r12,lr}")
TEST_BF_P("ldmia r",13,5*4, "!, {r3-r12,pc}")
- TEST_P( "ldmccia r",13,5*4, "!, {r3-r12}")
- TEST_BF_P("ldmcsia r",2, 5*4, "!, {r3-r12,pc}")
+ TEST_P( "ldmiacc r",13,5*4, "!, {r3-r12}")
+ TEST_BF_P("ldmiacs r",2, 5*4, "!, {r3-r12,pc}")
TEST_BF_P("ldmia r",13,4*4, "!, {r2-r12,pc}")
TEST_P( "ldmia r",0, 16*4,", {r0-r12}")
TEST_P( "ldmia r",0, 16*4,", {r0-r12,lr}")
#define TEST_COPROCESSOR(code) TEST_UNSUPPORTED(code)
#define COPROCESSOR_INSTRUCTIONS_ST_LD(two,cc) \
- TEST_COPROCESSOR("stc"two" 0, cr0, [r13, #4]") \
- TEST_COPROCESSOR("stc"two" 0, cr0, [r13, #-4]") \
- TEST_COPROCESSOR("stc"two" 0, cr0, [r13, #4]!") \
- TEST_COPROCESSOR("stc"two" 0, cr0, [r13, #-4]!") \
- TEST_COPROCESSOR("stc"two" 0, cr0, [r13], #4") \
- TEST_COPROCESSOR("stc"two" 0, cr0, [r13], #-4") \
- TEST_COPROCESSOR("stc"two" 0, cr0, [r13], {1}") \
- TEST_COPROCESSOR("stc"two"l 0, cr0, [r13, #4]") \
- TEST_COPROCESSOR("stc"two"l 0, cr0, [r13, #-4]") \
- TEST_COPROCESSOR("stc"two"l 0, cr0, [r13, #4]!") \
- TEST_COPROCESSOR("stc"two"l 0, cr0, [r13, #-4]!") \
- TEST_COPROCESSOR("stc"two"l 0, cr0, [r13], #4") \
- TEST_COPROCESSOR("stc"two"l 0, cr0, [r13], #-4") \
- TEST_COPROCESSOR("stc"two"l 0, cr0, [r13], {1}") \
- TEST_COPROCESSOR("ldc"two" 0, cr0, [r13, #4]") \
- TEST_COPROCESSOR("ldc"two" 0, cr0, [r13, #-4]") \
- TEST_COPROCESSOR("ldc"two" 0, cr0, [r13, #4]!") \
- TEST_COPROCESSOR("ldc"two" 0, cr0, [r13, #-4]!") \
- TEST_COPROCESSOR("ldc"two" 0, cr0, [r13], #4") \
- TEST_COPROCESSOR("ldc"two" 0, cr0, [r13], #-4") \
- TEST_COPROCESSOR("ldc"two" 0, cr0, [r13], {1}") \
- TEST_COPROCESSOR("ldc"two"l 0, cr0, [r13, #4]") \
- TEST_COPROCESSOR("ldc"two"l 0, cr0, [r13, #-4]") \
- TEST_COPROCESSOR("ldc"two"l 0, cr0, [r13, #4]!") \
- TEST_COPROCESSOR("ldc"two"l 0, cr0, [r13, #-4]!") \
- TEST_COPROCESSOR("ldc"two"l 0, cr0, [r13], #4") \
- TEST_COPROCESSOR("ldc"two"l 0, cr0, [r13], #-4") \
- TEST_COPROCESSOR("ldc"two"l 0, cr0, [r13], {1}") \
+ TEST_COPROCESSOR("stc"two" p0, cr0, [r13, #4]") \
+ TEST_COPROCESSOR("stc"two" p0, cr0, [r13, #-4]") \
+ TEST_COPROCESSOR("stc"two" p0, cr0, [r13, #4]!") \
+ TEST_COPROCESSOR("stc"two" p0, cr0, [r13, #-4]!") \
+ TEST_COPROCESSOR("stc"two" p0, cr0, [r13], #4") \
+ TEST_COPROCESSOR("stc"two" p0, cr0, [r13], #-4") \
+ TEST_COPROCESSOR("stc"two" p0, cr0, [r13], {1}") \
+ TEST_COPROCESSOR("stc"two"l p0, cr0, [r13, #4]") \
+ TEST_COPROCESSOR("stc"two"l p0, cr0, [r13, #-4]") \
+ TEST_COPROCESSOR("stc"two"l p0, cr0, [r13, #4]!") \
+ TEST_COPROCESSOR("stc"two"l p0, cr0, [r13, #-4]!") \
+ TEST_COPROCESSOR("stc"two"l p0, cr0, [r13], #4") \
+ TEST_COPROCESSOR("stc"two"l p0, cr0, [r13], #-4") \
+ TEST_COPROCESSOR("stc"two"l p0, cr0, [r13], {1}") \
+ TEST_COPROCESSOR("ldc"two" p0, cr0, [r13, #4]") \
+ TEST_COPROCESSOR("ldc"two" p0, cr0, [r13, #-4]") \
+ TEST_COPROCESSOR("ldc"two" p0, cr0, [r13, #4]!") \
+ TEST_COPROCESSOR("ldc"two" p0, cr0, [r13, #-4]!") \
+ TEST_COPROCESSOR("ldc"two" p0, cr0, [r13], #4") \
+ TEST_COPROCESSOR("ldc"two" p0, cr0, [r13], #-4") \
+ TEST_COPROCESSOR("ldc"two" p0, cr0, [r13], {1}") \
+ TEST_COPROCESSOR("ldc"two"l p0, cr0, [r13, #4]") \
+ TEST_COPROCESSOR("ldc"two"l p0, cr0, [r13, #-4]") \
+ TEST_COPROCESSOR("ldc"two"l p0, cr0, [r13, #4]!") \
+ TEST_COPROCESSOR("ldc"two"l p0, cr0, [r13, #-4]!") \
+ TEST_COPROCESSOR("ldc"two"l p0, cr0, [r13], #4") \
+ TEST_COPROCESSOR("ldc"two"l p0, cr0, [r13], #-4") \
+ TEST_COPROCESSOR("ldc"two"l p0, cr0, [r13], {1}") \
\
- TEST_COPROCESSOR( "stc"two" 0, cr0, [r15, #4]") \
- TEST_COPROCESSOR( "stc"two" 0, cr0, [r15, #-4]") \
+ TEST_COPROCESSOR( "stc"two" p0, cr0, [r15, #4]") \
+ TEST_COPROCESSOR( "stc"two" p0, cr0, [r15, #-4]") \
TEST_UNSUPPORTED(__inst_arm(0x##cc##daf0001) " @ stc"two" 0, cr0, [r15, #4]!") \
TEST_UNSUPPORTED(__inst_arm(0x##cc##d2f0001) " @ stc"two" 0, cr0, [r15, #-4]!") \
TEST_UNSUPPORTED(__inst_arm(0x##cc##caf0001) " @ stc"two" 0, cr0, [r15], #4") \
TEST_UNSUPPORTED(__inst_arm(0x##cc##c2f0001) " @ stc"two" 0, cr0, [r15], #-4") \
- TEST_COPROCESSOR( "stc"two" 0, cr0, [r15], {1}") \
- TEST_COPROCESSOR( "stc"two"l 0, cr0, [r15, #4]") \
- TEST_COPROCESSOR( "stc"two"l 0, cr0, [r15, #-4]") \
+ TEST_COPROCESSOR( "stc"two" p0, cr0, [r15], {1}") \
+ TEST_COPROCESSOR( "stc"two"l p0, cr0, [r15, #4]") \
+ TEST_COPROCESSOR( "stc"two"l p0, cr0, [r15, #-4]") \
TEST_UNSUPPORTED(__inst_arm(0x##cc##def0001) " @ stc"two"l 0, cr0, [r15, #4]!") \
TEST_UNSUPPORTED(__inst_arm(0x##cc##d6f0001) " @ stc"two"l 0, cr0, [r15, #-4]!") \
TEST_UNSUPPORTED(__inst_arm(0x##cc##cef0001) " @ stc"two"l 0, cr0, [r15], #4") \
TEST_UNSUPPORTED(__inst_arm(0x##cc##c6f0001) " @ stc"two"l 0, cr0, [r15], #-4") \
- TEST_COPROCESSOR( "stc"two"l 0, cr0, [r15], {1}") \
- TEST_COPROCESSOR( "ldc"two" 0, cr0, [r15, #4]") \
- TEST_COPROCESSOR( "ldc"two" 0, cr0, [r15, #-4]") \
+ TEST_COPROCESSOR( "stc"two"l p0, cr0, [r15], {1}") \
+ TEST_COPROCESSOR( "ldc"two" p0, cr0, [r15, #4]") \
+ TEST_COPROCESSOR( "ldc"two" p0, cr0, [r15, #-4]") \
TEST_UNSUPPORTED(__inst_arm(0x##cc##dbf0001) " @ ldc"two" 0, cr0, [r15, #4]!") \
TEST_UNSUPPORTED(__inst_arm(0x##cc##d3f0001) " @ ldc"two" 0, cr0, [r15, #-4]!") \
TEST_UNSUPPORTED(__inst_arm(0x##cc##cbf0001) " @ ldc"two" 0, cr0, [r15], #4") \
TEST_UNSUPPORTED(__inst_arm(0x##cc##c3f0001) " @ ldc"two" 0, cr0, [r15], #-4") \
- TEST_COPROCESSOR( "ldc"two" 0, cr0, [r15], {1}") \
- TEST_COPROCESSOR( "ldc"two"l 0, cr0, [r15, #4]") \
- TEST_COPROCESSOR( "ldc"two"l 0, cr0, [r15, #-4]") \
+ TEST_COPROCESSOR( "ldc"two" p0, cr0, [r15], {1}") \
+ TEST_COPROCESSOR( "ldc"two"l p0, cr0, [r15, #4]") \
+ TEST_COPROCESSOR( "ldc"two"l p0, cr0, [r15, #-4]") \
TEST_UNSUPPORTED(__inst_arm(0x##cc##dff0001) " @ ldc"two"l 0, cr0, [r15, #4]!") \
TEST_UNSUPPORTED(__inst_arm(0x##cc##d7f0001) " @ ldc"two"l 0, cr0, [r15, #-4]!") \
TEST_UNSUPPORTED(__inst_arm(0x##cc##cff0001) " @ ldc"two"l 0, cr0, [r15], #4") \
TEST_UNSUPPORTED(__inst_arm(0x##cc##c7f0001) " @ ldc"two"l 0, cr0, [r15], #-4") \
- TEST_COPROCESSOR( "ldc"two"l 0, cr0, [r15], {1}")
+ TEST_COPROCESSOR( "ldc"two"l p0, cr0, [r15], {1}")
#define COPROCESSOR_INSTRUCTIONS_MC_MR(two,cc) \
\
- TEST_COPROCESSOR( "mcrr"two" 0, 15, r0, r14, cr0") \
- TEST_COPROCESSOR( "mcrr"two" 15, 0, r14, r0, cr15") \
+ TEST_COPROCESSOR( "mcrr"two" p0, 15, r0, r14, cr0") \
+ TEST_COPROCESSOR( "mcrr"two" p15, 0, r14, r0, cr15") \
TEST_UNSUPPORTED(__inst_arm(0x##cc##c4f00f0) " @ mcrr"two" 0, 15, r0, r15, cr0") \
TEST_UNSUPPORTED(__inst_arm(0x##cc##c40ff0f) " @ mcrr"two" 15, 0, r15, r0, cr15") \
- TEST_COPROCESSOR( "mrrc"two" 0, 15, r0, r14, cr0") \
- TEST_COPROCESSOR( "mrrc"two" 15, 0, r14, r0, cr15") \
+ TEST_COPROCESSOR( "mrrc"two" p0, 15, r0, r14, cr0") \
+ TEST_COPROCESSOR( "mrrc"two" p15, 0, r14, r0, cr15") \
TEST_UNSUPPORTED(__inst_arm(0x##cc##c5f00f0) " @ mrrc"two" 0, 15, r0, r15, cr0") \
TEST_UNSUPPORTED(__inst_arm(0x##cc##c50ff0f) " @ mrrc"two" 15, 0, r15, r0, cr15") \
- TEST_COPROCESSOR( "cdp"two" 15, 15, cr15, cr15, cr15, 7") \
- TEST_COPROCESSOR( "cdp"two" 0, 0, cr0, cr0, cr0, 0") \
- TEST_COPROCESSOR( "mcr"two" 15, 7, r15, cr15, cr15, 7") \
- TEST_COPROCESSOR( "mcr"two" 0, 0, r0, cr0, cr0, 0") \
- TEST_COPROCESSOR( "mrc"two" 15, 7, r15, cr15, cr15, 7") \
- TEST_COPROCESSOR( "mrc"two" 0, 0, r0, cr0, cr0, 0")
+ TEST_COPROCESSOR( "cdp"two" p15, 15, cr15, cr15, cr15, 7") \
+ TEST_COPROCESSOR( "cdp"two" p0, 0, cr0, cr0, cr0, 0") \
+ TEST_COPROCESSOR( "mcr"two" p15, 7, r15, cr15, cr15, 7") \
+ TEST_COPROCESSOR( "mcr"two" p0, 0, r0, cr0, cr0, 0") \
+ TEST_COPROCESSOR( "mrc"two" p15, 7, r14, cr15, cr15, 7") \
+ TEST_COPROCESSOR( "mrc"two" p0, 0, r0, cr0, cr0, 0")
COPROCESSOR_INSTRUCTIONS_ST_LD("",e)
#if __LINUX_ARM_ARCH__ >= 5
#define TESTCASE_START(title) \
__asm__ __volatile__ ( \
+ ".syntax unified \n\t" \
"bl __kprobes_test_case_start \n\t" \
".pushsection .rodata \n\t" \
"10: \n\t" \
gen := arch/$(ARCH)/include/generated
kapi := $(gen)/asm
uapi := $(gen)/uapi/asm
-syshdr := $(srctree)/$(src)/syscallhdr.sh
+syshdr := $(srctree)/scripts/syscallhdr.sh
sysnr := $(srctree)/$(src)/syscallnr.sh
-systbl := $(srctree)/$(src)/syscalltbl.sh
+systbl := $(srctree)/scripts/syscalltbl.sh
syscall := $(src)/syscall.tbl
gen-y := $(gen)/calls-oabi.S
gen-y += $(gen)/calls-eabi.S
kapi-hdrs-y := $(kapi)/unistd-nr.h
kapi-hdrs-y += $(kapi)/mach-types.h
-uapi-hdrs-y := $(uapi)/unistd-common.h
uapi-hdrs-y += $(uapi)/unistd-oabi.h
uapi-hdrs-y += $(uapi)/unistd-eabi.h
$(call if_changed,gen_mach)
quiet_cmd_syshdr = SYSHDR $@
- cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' '$<' '$@' \
- '$(syshdr_abi_$(basetarget))' \
- '$(syshdr_pfx_$(basetarget))' \
- '__NR_SYSCALL_BASE'
+ cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --abis $(abis) \
+ --offset __NR_SYSCALL_BASE $< $@
quiet_cmd_systbl = SYSTBL $@
- cmd_systbl = $(CONFIG_SHELL) '$(systbl)' '$<' '$@' \
- '$(systbl_abi_$(basetarget))'
+ cmd_systbl = $(CONFIG_SHELL) $(systbl) --abis $(abis) $< $@
quiet_cmd_sysnr = SYSNR $@
cmd_sysnr = $(CONFIG_SHELL) '$(sysnr)' '$<' '$@' \
'$(syshdr_abi_$(basetarget))'
-syshdr_abi_unistd-common := common
-$(uapi)/unistd-common.h: $(syscall) $(syshdr) FORCE
- $(call if_changed,syshdr)
-
-syshdr_abi_unistd-oabi := oabi
+$(uapi)/unistd-oabi.h: abis := common,oabi
$(uapi)/unistd-oabi.h: $(syscall) $(syshdr) FORCE
$(call if_changed,syshdr)
-syshdr_abi_unistd-eabi := eabi
+$(uapi)/unistd-eabi.h: abis := common,eabi
$(uapi)/unistd-eabi.h: $(syscall) $(syshdr) FORCE
$(call if_changed,syshdr)
$(kapi)/unistd-nr.h: $(syscall) $(sysnr) FORCE
$(call if_changed,sysnr)
-systbl_abi_calls-oabi := common,oabi
+$(gen)/calls-oabi.S: abis := common,oabi
$(gen)/calls-oabi.S: $(syscall) $(systbl) FORCE
$(call if_changed,systbl)
-systbl_abi_calls-eabi := common,eabi
+$(gen)/calls-eabi.S: abis := common,eabi
$(gen)/calls-eabi.S: $(syscall) $(systbl) FORCE
$(call if_changed,systbl)
+++ /dev/null
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-in="$1"
-out="$2"
-my_abis=`echo "($3)" | tr ',' '|'`
-prefix="$4"
-offset="$5"
-
-fileguard=_ASM_ARM_`basename "$out" | sed \
- -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \
- -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'`
-if echo $out | grep -q uapi; then
- fileguard="_UAPI$fileguard"
-fi
-grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
- echo "#ifndef ${fileguard}"
- echo "#define ${fileguard} 1"
- echo ""
-
- while read nr abi name entry ; do
- if [ -z "$offset" ]; then
- echo "#define __NR_${prefix}${name} $nr"
- else
- echo "#define __NR_${prefix}${name} ($offset + $nr)"
- fi
- done
-
- echo ""
- echo "#endif /* ${fileguard} */"
-) > "$out"
+++ /dev/null
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-in="$1"
-out="$2"
-my_abis=`echo "($3)" | tr ',' '|'`
-
-grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
- while read nr abi name entry compat; do
- if [ "$abi" = "eabi" -a -n "$compat" ]; then
- echo "$in: error: a compat entry for an EABI syscall ($name) makes no sense" >&2
- exit 1
- fi
-
- if [ -n "$entry" ]; then
- if [ -z "$compat" ]; then
- echo "NATIVE($nr, $entry)"
- else
- echo "COMPAT($nr, $entry, $compat)"
- fi
- fi
- done
-) > "$out"
struct gnttab_cache_flush cflush;
if (!xen_swiotlb_detect())
return 0;
- xen_swiotlb_init(1, false);
+ xen_swiotlb_init();
cflush.op = 0;
cflush.a.dev_bus_addr = 0;
select ACPI_PPTT if ACPI
select ARCH_HAS_DEBUG_WX
select ARCH_BINFMT_ELF_STATE
+ select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
+ select ARCH_ENABLE_MEMORY_HOTPLUG
+ select ARCH_ENABLE_MEMORY_HOTREMOVE
+ select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2
+ select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
+ select ARCH_HAS_CACHE_LINE_SIZE
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DMA_PREP_COHERENT
select ARCH_USE_QUEUED_SPINLOCKS
select ARCH_USE_SYM_ANNOTATIONS
select ARCH_SUPPORTS_DEBUG_PAGEALLOC
+ select ARCH_SUPPORTS_HUGETLBFS
select ARCH_SUPPORTS_MEMORY_FAILURE
select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK
select ARCH_SUPPORTS_LTO_CLANG if CPU_LITTLE_ENDIAN
select HAVE_CMPXCHG_DOUBLE
select HAVE_CMPXCHG_LOCAL
select HAVE_CONTEXT_TRACKING
- select HAVE_DEBUG_BUGVERBOSE
select HAVE_DEBUG_KMEMLEAK
select HAVE_DMA_CONTIGUOUS
select HAVE_DYNAMIC_FTRACE
select SWIOTLB
select SYSCTL_EXCEPTION_TRACE
select THREAD_INFO_IN_TASK
+ select HAVE_ARCH_USERFAULTFD_MINOR if USERFAULTFD
help
ARM 64-bit (AArch64) Linux support.
bool "Support DMA32 zone" if EXPERT
default y
-config ARCH_ENABLE_MEMORY_HOTPLUG
- def_bool y
-
-config ARCH_ENABLE_MEMORY_HOTREMOVE
+config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
def_bool y
config SMP
config ARCH_SPARSEMEM_ENABLE
def_bool y
select SPARSEMEM_VMEMMAP_ENABLE
-
-config ARCH_SPARSEMEM_DEFAULT
- def_bool ARCH_SPARSEMEM_ENABLE
-
-config ARCH_SELECT_MEMORY_MODEL
- def_bool ARCH_SPARSEMEM_ENABLE
-
-config ARCH_FLATMEM_ENABLE
- def_bool !NUMA
+ select SPARSEMEM_VMEMMAP
config HW_PERF_EVENTS
def_bool y
depends on ARM_PMU
-config SYS_SUPPORTS_HUGETLBFS
- def_bool y
-
-config ARCH_HAS_CACHE_LINE_SIZE
- def_bool y
-
config ARCH_HAS_FILTER_PGPROT
def_bool y
-config ARCH_ENABLE_SPLIT_PMD_PTLOCK
- def_bool y if PGTABLE_LEVELS > 2
-
# Supported by clang >= 7.0
config CC_HAVE_SHADOW_CALL_STACK
def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18)
def_bool y
depends on COMPAT && SYSVIPC
-config ARCH_ENABLE_HUGEPAGE_MIGRATION
- def_bool y
- depends on HUGETLB_PAGE && MIGRATION
-
-config ARCH_ENABLE_THP_MIGRATION
- def_bool y
- depends on TRANSPARENT_HUGEPAGE
-
menu "Power management options"
source "kernel/power/Kconfig"
pinctrl-names = "default";
pinctrl-0 = <&pwm0_pin>;
clocks = <&cru PCLK_PWM1>;
- clock-names = "pwm";
status = "disabled";
};
pinctrl-names = "default";
pinctrl-0 = <&pwm1_pin>;
clocks = <&cru PCLK_PWM1>;
- clock-names = "pwm";
status = "disabled";
};
reg = <0x0 0xff680020 0x0 0x10>;
#pwm-cells = <3>;
clocks = <&cru PCLK_PWM1>;
- clock-names = "pwm";
status = "disabled";
};
pinctrl-names = "default";
pinctrl-0 = <&pwm3_pin>;
clocks = <&cru PCLK_PWM1>;
- clock-names = "pwm";
status = "disabled";
};
pinctrl-names = "default";
pinctrl-0 = <&pwm0_pin>;
clocks = <&pmucru PCLK_RKPWM_PMU>;
- clock-names = "pwm";
status = "disabled";
};
pinctrl-names = "default";
pinctrl-0 = <&pwm1_pin>;
clocks = <&pmucru PCLK_RKPWM_PMU>;
- clock-names = "pwm";
status = "disabled";
};
pinctrl-names = "default";
pinctrl-0 = <&pwm2_pin>;
clocks = <&pmucru PCLK_RKPWM_PMU>;
- clock-names = "pwm";
status = "disabled";
};
pinctrl-names = "default";
pinctrl-0 = <&pwm3a_pin>;
clocks = <&pmucru PCLK_RKPWM_PMU>;
- clock-names = "pwm";
status = "disabled";
};
if (interrupts_enabled(regs))
trace_hardirqs_on();
+ if (system_uses_irq_prio_masking())
+ gic_write_pmr(regs->pmr_save);
+
/*
* We can't use local_daif_restore(regs->pstate) here as
* system_has_prio_mask_debugging() won't restore the I bit if it can
* has a direct correspondence, and needs to appear sufficiently aligned
* in the virtual address space.
*/
-#if defined(CONFIG_SPARSEMEM_VMEMMAP) && ARM64_MEMSTART_SHIFT < SECTION_SIZE_BITS
+#if ARM64_MEMSTART_SHIFT < SECTION_SIZE_BITS
#define ARM64_MEMSTART_ALIGN (1UL << SECTION_SIZE_BITS)
#else
#define ARM64_MEMSTART_ALIGN (1UL << ARM64_MEMSTART_SHIFT)
*/
#define ARCH_PFN_OFFSET ((unsigned long)PHYS_PFN_OFFSET)
-#if !defined(CONFIG_SPARSEMEM_VMEMMAP) || defined(CONFIG_DEBUG_VIRTUAL)
+#if defined(CONFIG_DEBUG_VIRTUAL)
#define page_to_virt(x) ({ \
__typeof__(x) __page = x; \
void *__addr = __va(page_to_phys(__page)); \
u64 __addr = VMEMMAP_START + (__idx * sizeof(struct page)); \
(struct page *)__addr; \
})
-#endif /* !CONFIG_SPARSEMEM_VMEMMAP || CONFIG_DEBUG_VIRTUAL */
+#endif /* CONFIG_DEBUG_VIRTUAL */
#define virt_addr_valid(addr) ({ \
__typeof__(addr) __addr = __tag_reset(addr); \
#ifndef __ASM_SPARSEMEM_H
#define __ASM_SPARSEMEM_H
-#ifdef CONFIG_SPARSEMEM
#define MAX_PHYSMEM_BITS CONFIG_ARM64_PA_BITS
/*
#define SECTION_SIZE_BITS 27
#endif /* CONFIG_ARM64_64K_PAGES */
-#endif /* CONFIG_SPARSEMEM*/
-
#endif
} while (cur += d_size, cur < end);
}
-static void __nocfi __apply_alternatives(void *alt_region, bool is_module,
- unsigned long *feature_mask)
+static void __nocfi __apply_alternatives(struct alt_region *region, bool is_module,
+ unsigned long *feature_mask)
{
struct alt_instr *alt;
- struct alt_region *region = alt_region;
__le32 *origptr, *updptr;
alternative_cb_t alt_cb;
#include <linux/sort.h>
#include <linux/stop_machine.h>
#include <linux/types.h>
+#include <linux/minmax.h>
#include <linux/mm.h>
#include <linux/cpu.h>
#include <linux/kasan.h>
ret = ftrp->safe_val;
break;
case FTR_LOWER_SAFE:
- ret = new < cur ? new : cur;
+ ret = min(new, cur);
break;
case FTR_HIGHER_OR_ZERO_SAFE:
if (!cur || !new)
break;
fallthrough;
case FTR_HIGHER_SAFE:
- ret = new > cur ? new : cur;
+ ret = max(new, cur);
break;
default:
BUG();
/**
* arm_cpuidle_suspend() - function to enter a low-power idle state
- * @arg: argument to pass to CPU suspend operations
+ * @index: argument to pass to CPU suspend operations
*
* Return: 0 on success, -EOPNOTSUPP if CPU suspend hook not initialized, CPU
* operations back-end error code otherwise.
{
unsigned long far = read_sysreg(far_el1);
- /*
- * The CPU masked interrupts, and we are leaving them masked during
- * do_debug_exception(). Update PMR as if we had called
- * local_daif_mask().
- */
- if (system_uses_irq_prio_masking())
- gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
-
arm64_enter_el1_dbg(regs);
if (!cortex_a76_erratum_1463225_debug_handler(regs))
do_debug_exception(far, esr, regs);
/* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */
unsigned long far = read_sysreg(far_el1);
- if (system_uses_irq_prio_masking())
- gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
-
enter_from_user_mode();
do_debug_exception(far, esr, regs);
local_daif_restore(DAIF_PROCCTX_NOIRQ);
static void noinstr el0_svc(struct pt_regs *regs)
{
- if (system_uses_irq_prio_masking())
- gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
-
enter_from_user_mode();
cortex_a76_erratum_1463225_svc_handler();
do_el0_svc(regs);
static void noinstr el0_svc_compat(struct pt_regs *regs)
{
- if (system_uses_irq_prio_masking())
- gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET);
-
enter_from_user_mode();
cortex_a76_erratum_1463225_svc_handler();
do_el0_svc_compat(regs);
stp lr, x21, [sp, #S_LR]
/*
- * For exceptions from EL0, terminate the callchain here.
+ * For exceptions from EL0, create a terminal frame record.
* For exceptions from EL1, create a synthetic frame record so the
* interrupted code shows up in the backtrace.
*/
.if \el == 0
- mov x29, xzr
+ stp xzr, xzr, [sp, #S_STACKFRAME]
.else
stp x29, x22, [sp, #S_STACKFRAME]
- add x29, sp, #S_STACKFRAME
.endif
+ add x29, sp, #S_STACKFRAME
#ifdef CONFIG_ARM64_SW_TTBR0_PAN
alternative_if_not ARM64_HAS_PAN
alternative_if ARM64_HAS_IRQ_PRIO_MASKING
mrs_s x20, SYS_ICC_PMR_EL1
str x20, [sp, #S_PMR_SAVE]
+ mov x20, #GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET
+ msr_s SYS_ICC_PMR_EL1, x20
alternative_else_nop_endif
/* Re-enable tag checking (TCO set on exception entry) */
#endif
.endm
- .macro gic_prio_irq_setup, pmr:req, tmp:req
-#ifdef CONFIG_ARM64_PSEUDO_NMI
- alternative_if ARM64_HAS_IRQ_PRIO_MASKING
- orr \tmp, \pmr, #GIC_PRIO_PSR_I_SET
- msr_s SYS_ICC_PMR_EL1, \tmp
- alternative_else_nop_endif
-#endif
- .endm
-
.macro el1_interrupt_handler, handler:req
- gic_prio_irq_setup pmr=x20, tmp=x1
enable_da
mov x0, sp
.endm
.macro el0_interrupt_handler, handler:req
- gic_prio_irq_setup pmr=x20, tmp=x0
user_exit_irqoff
enable_da
SYM_CODE_START_LOCAL(el1_error)
kernel_entry 1
mrs x1, esr_el1
- gic_prio_kentry_setup tmp=x2
enable_dbg
mov x0, sp
bl do_serror
kernel_entry 0
el0_error_naked:
mrs x25, esr_el1
- gic_prio_kentry_setup tmp=x2
user_exit_irqoff
enable_dbg
mov x0, sp
i = top_reg;
while (i >= 0) {
- printk("x%-2d: %016llx ", i, regs->regs[i]);
- i--;
+ printk("x%-2d: %016llx", i, regs->regs[i]);
- if (i % 2 == 0) {
- pr_cont("x%-2d: %016llx ", i, regs->regs[i]);
- i--;
- }
+ while (i-- % 3)
+ pr_cont(" x%-2d: %016llx", i, regs->regs[i]);
pr_cont("\n");
}
unsigned long fp = frame->fp;
struct stack_info info;
- /* Terminal record; nothing to unwind */
- if (!fp)
- return -ENOENT;
-
if (fp & 0xf)
return -EINVAL;
frame->pc = ptrauth_strip_insn_pac(frame->pc);
+ /*
+ * This is a terminal record, so we have finished unwinding.
+ */
+ if (!frame->fp && !frame->pc)
+ return -ENOENT;
+
return 0;
}
NOKPROBE_SYMBOL(unwind_frame);
.gnu.version_d : { *(.gnu.version_d) }
.gnu.version_r : { *(.gnu.version_r) }
+ /*
+ * Discard .note.gnu.property sections which are unused and have
+ * different alignment requirement from vDSO note sections.
+ */
+ /DISCARD/ : {
+ *(.note.GNU-stack .note.gnu.property)
+ }
.note : { *(.note.*) } :text :note
. = ALIGN(16);
PROVIDE(end = .);
/DISCARD/ : {
- *(.note.GNU-stack)
*(.data .data.* .gnu.linkonce.d.* .sdata*)
*(.bss .sbss .dynbss .dynsbss)
*(.eh_frame .eh_frame_hdr)
# Same as cc-*option, but using CC_COMPAT instead of CC
ifeq ($(CONFIG_CC_IS_CLANG), y)
-COMPAT_GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE_COMPAT)elfedit))
-COMPAT_GCC_TOOLCHAIN := $(realpath $(COMPAT_GCC_TOOLCHAIN_DIR)/..)
-
CC_COMPAT_CLANG_FLAGS := --target=$(notdir $(CROSS_COMPILE_COMPAT:%-=%))
-CC_COMPAT_CLANG_FLAGS += --prefix=$(COMPAT_GCC_TOOLCHAIN_DIR)$(notdir $(CROSS_COMPILE_COMPAT))
-CC_COMPAT_CLANG_FLAGS += -no-integrated-as -Qunused-arguments
-ifneq ($(COMPAT_GCC_TOOLCHAIN),)
-CC_COMPAT_CLANG_FLAGS += --gcc-toolchain=$(COMPAT_GCC_TOOLCHAIN)
-endif
CC_COMPAT ?= $(CC)
CC_COMPAT += $(CC_COMPAT_CLANG_FLAGS)
set_pte(ptep, pte);
}
-pte_t *huge_pte_alloc(struct mm_struct *mm,
+pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, unsigned long sz)
{
pgd_t *pgdp;
*/
ptep = pte_alloc_map(mm, pmdp, addr);
} else if (sz == PMD_SIZE) {
- if (IS_ENABLED(CONFIG_ARCH_WANT_HUGE_PMD_SHARE) &&
- pud_none(READ_ONCE(*pudp)))
- ptep = huge_pmd_share(mm, addr, pudp);
+ if (want_pmd_share(vma, addr) && pud_none(READ_ONCE(*pudp)))
+ ptep = huge_pmd_share(mm, vma, addr, pudp);
else
ptep = (pte_t *)pmd_alloc(mm, pudp, addr);
} else if (sz == (CONT_PMD_SIZE)) {
int pfn_valid(unsigned long pfn)
{
phys_addr_t addr = PFN_PHYS(pfn);
+ struct mem_section *ms;
/*
* Ensure the upper PAGE_SHIFT bits are clear in the
if (PHYS_PFN(addr) != pfn)
return 0;
-#ifdef CONFIG_SPARSEMEM
-{
- struct mem_section *ms;
-
if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
return 0;
*/
if (!early_section(ms))
return pfn_section_valid(ms, pfn);
-}
-#endif
+
return memblock_is_map_memory(addr);
}
EXPORT_SYMBOL(pfn_valid);
}
#endif
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
#if !ARM64_SWAPPER_USES_SECTION_MAPS
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
struct vmem_altmap *altmap)
free_empty_tables(start, end, VMEMMAP_START, VMEMMAP_END);
#endif
}
-#endif /* CONFIG_SPARSEMEM_VMEMMAP */
static inline pud_t *fixmap_pud(unsigned long addr)
{
{ FIXADDR_TOP, "Fixmap end" },
{ PCI_IO_START, "PCI I/O start" },
{ PCI_IO_END, "PCI I/O end" },
-#ifdef CONFIG_SPARSEMEM_VMEMMAP
{ VMEMMAP_START, "vmemmap start" },
{ VMEMMAP_START + VMEMMAP_SIZE, "vmemmap end" },
-#endif
{ -1, NULL },
};
#include <linux/compiler.h>
+#include <asm-generic/bitops/fls.h>
+#include <asm-generic/bitops/__fls.h>
+#include <asm-generic/bitops/fls64.h>
+
#ifdef __KERNEL__
#ifndef _LINUX_BITOPS_H
#endif /* __KERNEL__ */
-#include <asm-generic/bitops/fls.h>
-#include <asm-generic/bitops/__fls.h>
-#include <asm-generic/bitops/fls64.h>
-
#endif /* _H8300_BITOPS_H */
# Do not use single-byte enums; these will overflow.
KBUILD_CFLAGS += -fno-short-enums
+# We must use long-calls:
+KBUILD_CFLAGS += -mlong-calls
+
# Modules must use either long-calls, or use pic/plt.
# Use long-calls for now, it's easier. And faster.
# KBUILD_CFLAGS_MODULE += -fPIC
KBUILD_CFLAGS += -ffixed-$(TIR_NAME) -DTHREADINFO_REG=$(TIR_NAME) -D__linux__
KBUILD_AFLAGS += -DTHREADINFO_REG=$(TIR_NAME)
-LIBGCC := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name 2>/dev/null)
-libs-y += $(LIBGCC)
-
head-y := arch/hexagon/kernel/head.o
core-y += arch/hexagon/kernel/ \
# CONFIG_SERIO is not set
# CONFIG_CONSOLE_TRANSLATIONS is not set
CONFIG_LEGACY_PTY_COUNT=64
-# CONFIG_DEVKMEM is not set
# CONFIG_HW_RANDOM is not set
CONFIG_SPI=y
CONFIG_SPI_DEBUG=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_FS=y
# CONFIG_SCHED_DEBUG is not set
-CONFIG_DEBUG_INFO=y
"3:\n" \
".section .fixup,\"ax\"\n" \
"4: %1 = #%5;\n" \
- " jump 3b\n" \
+ " jump ##3b\n" \
".previous\n" \
".section __ex_table,\"a\"\n" \
".long 1b,4b,2b,4b\n" \
"3:\n"
".section .fixup,\"ax\"\n"
"4: %0 = #%6\n"
- " jump 3b\n"
+ " jump ##3b\n"
".previous\n"
".section __ex_table,\"a\"\n"
".long 1b,4b,2b,4b\n"
* convert a physical pointer to a virtual kernel pointer for
* /dev/mem access.
*/
-#define xlate_dev_kmem_ptr(p) __va(p)
#define xlate_dev_mem_ptr(p) __va(p)
/*
#include <asm-generic/timex.h>
#include <asm/timer-regs.h>
+#include <asm/hexagon_vm.h>
/* Using TCX0 as our clock. CLOCK_TICK_RATE scheduled to be removed. */
#define CLOCK_TICK_RATE TCX0_CLK_RATE
static inline int read_current_timer(unsigned long *timer_val)
{
- *timer_val = (unsigned long) __vmgettime();
+ *timer_val = __vmgettime();
return 0;
}
DECLARE_EXPORT(__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes);
/* Additional functions */
-DECLARE_EXPORT(__divsi3);
-DECLARE_EXPORT(__modsi3);
-DECLARE_EXPORT(__udivsi3);
-DECLARE_EXPORT(__umodsi3);
+DECLARE_EXPORT(__hexagon_divsi3);
+DECLARE_EXPORT(__hexagon_modsi3);
+DECLARE_EXPORT(__hexagon_udivsi3);
+DECLARE_EXPORT(__hexagon_umodsi3);
DECLARE_EXPORT(csum_tcpudp_magic);
static int genregs_get(struct task_struct *target,
const struct user_regset *regset,
- srtuct membuf to)
+ struct membuf to)
{
struct pt_regs *regs = task_pt_regs(target);
membuf_store(&to, regs->m0);
membuf_store(&to, regs->m1);
membuf_store(&to, regs->usr);
- membuf_store(&to, regs->p3_0);
+ membuf_store(&to, regs->preds);
membuf_store(&to, regs->gp);
membuf_store(&to, regs->ugp);
membuf_store(&to, pt_elr(regs)); // pc
#
# Makefile for hexagon-specific library files.
#
-obj-y = checksum.o io.o memcpy.o memset.o
+obj-y = checksum.o io.o memcpy.o memset.o memcpy_likely_aligned.o \
+ divsi3.o modsi3.o udivsi3.o umodsi3.o
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/linkage.h>
+
+SYM_FUNC_START(__hexagon_divsi3)
+ {
+ p0 = cmp.gt(r0,#-1)
+ p1 = cmp.gt(r1,#-1)
+ r3:2 = vabsw(r1:0)
+ }
+ {
+ p3 = xor(p0,p1)
+ r4 = sub(r2,r3)
+ r6 = cl0(r2)
+ p0 = cmp.gtu(r3,r2)
+ }
+ {
+ r0 = mux(p3,#-1,#1)
+ r7 = cl0(r3)
+ p1 = cmp.gtu(r3,r4)
+ }
+ {
+ r0 = mux(p0,#0,r0)
+ p0 = or(p0,p1)
+ if (p0.new) jumpr:nt r31
+ r6 = sub(r7,r6)
+ }
+ {
+ r7 = r6
+ r5:4 = combine(#1,r3)
+ r6 = add(#1,lsr(r6,#1))
+ p0 = cmp.gtu(r6,#4)
+ }
+ {
+ r5:4 = vaslw(r5:4,r7)
+ if (!p0) r6 = #3
+ }
+ {
+ loop0(1f,r6)
+ r7:6 = vlsrw(r5:4,#1)
+ r1:0 = #0
+ }
+ .falign
+1:
+ {
+ r5:4 = vlsrw(r5:4,#2)
+ if (!p0.new) r0 = add(r0,r5)
+ if (!p0.new) r2 = sub(r2,r4)
+ p0 = cmp.gtu(r4,r2)
+ }
+ {
+ r7:6 = vlsrw(r7:6,#2)
+ if (!p0.new) r0 = add(r0,r7)
+ if (!p0.new) r2 = sub(r2,r6)
+ p0 = cmp.gtu(r6,r2)
+ }:endloop0
+ {
+ if (!p0) r0 = add(r0,r7)
+ }
+ {
+ if (p3) r0 = sub(r1,r0)
+ jumpr r31
+ }
+SYM_FUNC_END(__hexagon_divsi3)
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/linkage.h>
+
+SYM_FUNC_START(__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes)
+ {
+ p0 = bitsclr(r1,#7)
+ p0 = bitsclr(r0,#7)
+ if (p0.new) r5:4 = memd(r1)
+ if (p0.new) r7:6 = memd(r1+#8)
+ }
+ {
+ if (!p0) jump:nt .Lmemcpy_call
+ if (p0) r9:8 = memd(r1+#16)
+ if (p0) r11:10 = memd(r1+#24)
+ p0 = cmp.gtu(r2,#64)
+ }
+ {
+ if (p0) jump:nt .Lmemcpy_call
+ if (!p0) memd(r0) = r5:4
+ if (!p0) memd(r0+#8) = r7:6
+ p0 = cmp.gtu(r2,#32)
+ }
+ {
+ p1 = cmp.gtu(r2,#40)
+ p2 = cmp.gtu(r2,#48)
+ if (p0) r13:12 = memd(r1+#32)
+ if (p1.new) r15:14 = memd(r1+#40)
+ }
+ {
+ memd(r0+#16) = r9:8
+ memd(r0+#24) = r11:10
+ }
+ {
+ if (p0) memd(r0+#32) = r13:12
+ if (p1) memd(r0+#40) = r15:14
+ if (!p2) jumpr:t r31
+ }
+ {
+ p0 = cmp.gtu(r2,#56)
+ r5:4 = memd(r1+#48)
+ if (p0.new) r7:6 = memd(r1+#56)
+ }
+ {
+ memd(r0+#48) = r5:4
+ if (p0) memd(r0+#56) = r7:6
+ jumpr r31
+ }
+
+.Lmemcpy_call:
+ jump memcpy
+
+SYM_FUNC_END(__hexagon_memcpy_likely_aligned_min32bytes_mult8bytes)
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/linkage.h>
+
+SYM_FUNC_START(__hexagon_modsi3)
+ {
+ p2 = cmp.ge(r0,#0)
+ r2 = abs(r0)
+ r1 = abs(r1)
+ }
+ {
+ r3 = cl0(r2)
+ r4 = cl0(r1)
+ p0 = cmp.gtu(r1,r2)
+ }
+ {
+ r3 = sub(r4,r3)
+ if (p0) jumpr r31
+ }
+ {
+ p1 = cmp.eq(r3,#0)
+ loop0(1f,r3)
+ r0 = r2
+ r2 = lsl(r1,r3)
+ }
+ .falign
+1:
+ {
+ p0 = cmp.gtu(r2,r0)
+ if (!p0.new) r0 = sub(r0,r2)
+ r2 = lsr(r2,#1)
+ if (p1) r1 = #0
+ }:endloop0
+ {
+ p0 = cmp.gtu(r2,r0)
+ if (!p0.new) r0 = sub(r0,r1)
+ if (p2) jumpr r31
+ }
+ {
+ r0 = neg(r0)
+ jumpr r31
+ }
+SYM_FUNC_END(__hexagon_modsi3)
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/linkage.h>
+
+SYM_FUNC_START(__hexagon_udivsi3)
+ {
+ r2 = cl0(r0)
+ r3 = cl0(r1)
+ r5:4 = combine(#1,#0)
+ p0 = cmp.gtu(r1,r0)
+ }
+ {
+ r6 = sub(r3,r2)
+ r4 = r1
+ r1:0 = combine(r0,r4)
+ if (p0) jumpr r31
+ }
+ {
+ r3:2 = vlslw(r5:4,r6)
+ loop0(1f,r6)
+ }
+ .falign
+1:
+ {
+ p0 = cmp.gtu(r2,r1)
+ if (!p0.new) r1 = sub(r1,r2)
+ if (!p0.new) r0 = add(r0,r3)
+ r3:2 = vlsrw(r3:2,#1)
+ }:endloop0
+ {
+ p0 = cmp.gtu(r2,r1)
+ if (!p0.new) r0 = add(r0,r3)
+ jumpr r31
+ }
+SYM_FUNC_END(__hexagon_udivsi3)
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/linkage.h>
+
+SYM_FUNC_START(__hexagon_umodsi3)
+ {
+ r2 = cl0(r0)
+ r3 = cl0(r1)
+ p0 = cmp.gtu(r1,r0)
+ }
+ {
+ r2 = sub(r3,r2)
+ if (p0) jumpr r31
+ }
+ {
+ loop0(1f,r2)
+ p1 = cmp.eq(r2,#0)
+ r2 = lsl(r1,r2)
+ }
+ .falign
+1:
+ {
+ p0 = cmp.gtu(r2,r0)
+ if (!p0.new) r0 = sub(r0,r2)
+ r2 = lsr(r2,#1)
+ if (p1) r1 = #0
+ }:endloop0
+ {
+ p0 = cmp.gtu(r2,r0)
+ if (!p0.new) r0 = sub(r0,r1)
+ jumpr r31
+ }
+SYM_FUNC_END(__hexagon_umodsi3)
select ARCH_MIGHT_HAVE_PC_SERIO
select ACPI
select ACPI_NUMA if NUMA
+ select ARCH_ENABLE_MEMORY_HOTPLUG
+ select ARCH_ENABLE_MEMORY_HOTREMOVE
select ARCH_SUPPORTS_ACPI
select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI
select TTY
select HAVE_ARCH_TRACEHOOK
select HAVE_VIRT_CPU_ACCOUNTING
+ select HUGETLB_PAGE_SIZE_VARIABLE if HUGETLB_PAGE
select VIRT_TO_BUS
select GENERIC_IRQ_PROBE
select GENERIC_PENDING_IRQ if SMP
config GENERIC_LOCKBREAK
def_bool n
-config HUGETLB_PAGE_SIZE_VARIABLE
- bool
- depends on HUGETLB_PAGE
- default y
-
config GENERIC_CALIBRATE_DELAY
bool
default y
can be controlled through /sys/devices/system/cpu/cpu#.
Say N if you want to disable CPU hotplug.
-config ARCH_ENABLE_MEMORY_HOTPLUG
- def_bool y
-
-config ARCH_ENABLE_MEMORY_HOTREMOVE
- def_bool y
-
config SCHED_SMT
bool "SMT scheduler support"
depends on SMP
#define memcpy_fromio memcpy_fromio
#define memcpy_toio memcpy_toio
#define memset_io memset_io
-#define xlate_dev_kmem_ptr xlate_dev_kmem_ptr
#define xlate_dev_mem_ptr xlate_dev_mem_ptr
#include <asm-generic/io.h>
#undef PCI_IOBASE
return ptr;
}
-/*
- * Convert a virtual cached kernel memory pointer to an uncached pointer
- */
-static __inline__ void *
-xlate_dev_kmem_ptr(void *p)
-{
- struct page *page;
- void *ptr;
-
- page = virt_to_page((unsigned long)p);
- if (PageUncached(page))
- ptr = (void *)__pa(p) + __IA64_UNCACHED_OFFSET;
- else
- ptr = p;
-
- return ptr;
-}
-
#endif /* _ASM_IA64_UACCESS_H */
EXPORT_SYMBOL(hpage_shift);
pte_t *
-huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
+huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long addr, unsigned long sz)
{
unsigned long taddr = htlbpage_to_page(addr);
pgd_t *pgd;
KBUILD_DEFCONFIG := multi_defconfig
-ifneq ($(SUBARCH),$(ARCH))
+ifdef cross_compiling
ifeq ($(CROSS_COMPILE),)
CROSS_COMPILE := $(call cc-cross-prefix, \
m68k-linux-gnu- m68k-linux- m68k-unknown-linux-gnu-)
return( 0 );
}
-
-/*
- * Local variables:
- * c-indent-level: 4
- * tab-width: 8
- * End:
- */
{
unsigned int irq = d->irq - MCFINT_VECBASE;
- if (MCFINTC2_SIMR && (irq > 128))
+ if (MCFINTC2_SIMR && (irq > 127))
__raw_writeb(irq - 128, MCFINTC2_SIMR);
- else if (MCFINTC1_SIMR && (irq > 64))
+ else if (MCFINTC1_SIMR && (irq > 63))
__raw_writeb(irq - 64, MCFINTC1_SIMR);
else
__raw_writeb(irq, MCFINTC0_SIMR);
{
unsigned int irq = d->irq - MCFINT_VECBASE;
- if (MCFINTC2_CIMR && (irq > 128))
+ if (MCFINTC2_CIMR && (irq > 127))
__raw_writeb(irq - 128, MCFINTC2_CIMR);
- else if (MCFINTC1_CIMR && (irq > 64))
+ else if (MCFINTC1_CIMR && (irq > 63))
__raw_writeb(irq - 64, MCFINTC1_CIMR);
else
__raw_writeb(irq, MCFINTC0_CIMR);
}
irq -= MCFINT_VECBASE;
- if (MCFINTC2_ICR0 && (irq > 128))
+ if (MCFINTC2_ICR0 && (irq > 127))
__raw_writeb(5, MCFINTC2_ICR0 + irq - 128);
- else if (MCFINTC1_ICR0 && (irq > 64))
+ else if (MCFINTC1_ICR0 && (irq > 63))
__raw_writeb(5, MCFINTC1_ICR0 + irq - 64);
else
__raw_writeb(5, MCFINTC0_ICR0 + irq);
# CONFIG_VT is not set
# CONFIG_UNIX98_PTYS is not set
# CONFIG_DEVMEM is not set
-# CONFIG_DEVKMEM is not set
CONFIG_SERIAL_MCF=y
CONFIG_SERIAL_MCF_BAUDRATE=115200
CONFIG_SERIAL_MCF_CONSOLE=y
#endif
-#include <asm-generic/bitops/find.h>
-
#ifdef __KERNEL__
#if defined(CONFIG_CPU_HAS_NO_BITFIELDS)
#define __clear_bit_unlock clear_bit_unlock
#include <asm-generic/bitops/ext2-atomic.h>
-#include <asm-generic/bitops/le.h>
#include <asm-generic/bitops/fls64.h>
#include <asm-generic/bitops/sched.h>
#include <asm-generic/bitops/hweight.h>
+#include <asm-generic/bitops/le.h>
#endif /* __KERNEL__ */
+#include <asm-generic/bitops/find.h>
+
#endif /* _M68K_BITOPS_H */
*/
#define xlate_dev_mem_ptr(p) __va(p)
-/*
- * Convert a virtual cached pointer to an uncached pointer
- */
-#define xlate_dev_kmem_ptr(p) p
-
#define readb_relaxed(addr) readb(addr)
#define readw_relaxed(addr) readw(addr)
#define readl_relaxed(addr) readl(addr)
select ARCH_USE_MEMTEST
select ARCH_USE_QUEUED_RWLOCKS
select ARCH_USE_QUEUED_SPINLOCKS
+ select ARCH_SUPPORTS_HUGETLBFS if CPU_SUPPORTS_HUGEPAGES
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
select ARCH_WANT_IPC_PARSE_VERSION
select ARCH_WANT_LD_ORPHAN_WARN
config SYS_SUPPORTS_LITTLE_ENDIAN
bool
-config SYS_SUPPORTS_HUGETLBFS
- bool
- depends on CPU_SUPPORTS_HUGEPAGES
- default y
-
config MIPS_HUGE_TLB_SUPPORT
def_bool HUGETLB_PAGE || TRANSPARENT_HUGEPAGE
UTS_MACHINE := mips64
endif
-ifneq ($(SUBARCH),$(ARCH))
+ifdef cross_compiling
ifeq ($(CROSS_COMPILE),)
CROSS_COMPILE := $(call cc-cross-prefix, $(tool-archpref)-linux- $(tool-archpref)-linux-gnu- $(tool-archpref)-unknown-linux-gnu-)
endif
*/
#define xlate_dev_mem_ptr(p) __va(p)
-/*
- * Convert a virtual cached pointer to an uncached pointer
- */
-#define xlate_dev_kmem_ptr(p) p
-
void __ioread64_copy(void *to, const void __iomem *from, size_t count);
#endif /* _ASM_IO_H */
#include <asm/tlb.h>
#include <asm/tlbflush.h>
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr,
- unsigned long sz)
+pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long addr, unsigned long sz)
{
pgd_t *pgd;
p4d_t *p4d;
# CONFIG_SERIO is not set
# CONFIG_VT is not set
# CONFIG_LEGACY_PTYS is not set
-# CONFIG_DEVKMEM is not set
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_SERIAL_OF_PLATFORM=y
select ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_NO_SG_CHAIN
+ select ARCH_SUPPORTS_HUGETLBFS if PA20
select ARCH_SUPPORTS_MEMORY_FAILURE
select DMA_OPS
select RTC_CLASS
default 3 if 64BIT && PARISC_PAGE_SIZE_4KB
default 2
-config SYS_SUPPORTS_HUGETLBFS
- def_bool y if PA20
-
-
menu "Processor type and features"
choice
export LD_BFD
-ifneq ($(SUBARCH),$(UTS_MACHINE))
+ifdef cross_compiling
ifeq ($(CROSS_COMPILE),)
CC_SUFFIXES = linux linux-gnu unknown-linux-gnu
CROSS_COMPILE := $(call cc-cross-prefix, \
*/
#define xlate_dev_mem_ptr(p) __va(p)
-/*
- * Convert a virtual cached pointer to an uncached pointer
- */
-#define xlate_dev_kmem_ptr(p) p
-
extern int devmem_is_allowed(unsigned long pfn);
#endif
PDC_CHASSIS_EOM_SET )
#endif /* _PARISC_PDC_CHASSIS_H */
-/* vim: set ts=8 */
}
-pte_t *huge_pte_alloc(struct mm_struct *mm,
+pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, unsigned long sz)
{
pgd_t *pgd;
# Please keep this list sorted alphabetically.
#
select ARCH_32BIT_OFF_T if PPC32
+ select ARCH_ENABLE_MEMORY_HOTPLUG
+ select ARCH_ENABLE_MEMORY_HOTREMOVE
+ select ARCH_HAS_COPY_MC if PPC64
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DEVMEM_IS_ALLOWED
+ select ARCH_HAS_DMA_MAP_DIRECT if PPC_PSERIES
select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
- select ARCH_HAS_KCOV
select ARCH_HAS_HUGEPD if HUGETLB_PAGE
+ select ARCH_HAS_KCOV
+ select ARCH_HAS_MEMBARRIER_CALLBACKS
+ select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_MEMREMAP_COMPAT_ALIGN
select ARCH_HAS_MMIOWB if PPC64
+ select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PHYS_TO_DMA
select ARCH_HAS_PMEM_API
- select ARCH_HAS_NON_OVERLAPPING_ADDRESS_SPACE
select ARCH_HAS_PTE_DEVMAP if PPC_BOOK3S_64
select ARCH_HAS_PTE_SPECIAL
- select ARCH_HAS_MEMBARRIER_CALLBACKS
- select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64
select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !HIBERNATION)
select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UACCESS_FLUSHCACHE
- select ARCH_HAS_COPY_MC if PPC64
select ARCH_HAS_UBSAN_SANITIZE_ALL
select ARCH_HAVE_NMI_SAFE_CMPXCHG
select ARCH_KEEP_MEMBLOCK
select BUILDTIME_TABLE_SORT
select CLONE_BACKWARDS
select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN
- select DMA_OPS if PPC64
select DMA_OPS_BYPASS if PPC64
- select ARCH_HAS_DMA_MAP_DIRECT if PPC64 && PPC_PSERIES
+ select DMA_OPS if PPC64
select DYNAMIC_FTRACE if FUNCTION_TRACER
select EDAC_ATOMIC_SCRUB
select EDAC_SUPPORT
select GENERIC_TIME_VSYSCALL
select GENERIC_VDSO_TIME_NS
select HAVE_ARCH_AUDITSYSCALL
+ select HAVE_ARCH_HUGE_VMALLOC if HAVE_ARCH_HUGE_VMAP
select HAVE_ARCH_HUGE_VMAP if PPC_BOOK3S_64 && PPC_RADIX_MMU
select HAVE_ARCH_JUMP_LABEL
select HAVE_ARCH_JUMP_LABEL_RELATIVE
select HAVE_ARCH_KASAN if PPC32 && PPC_PAGE_SHIFT <= 14
select HAVE_ARCH_KASAN_VMALLOC if PPC32 && PPC_PAGE_SHIFT <= 14
- select HAVE_ARCH_KGDB
select HAVE_ARCH_KFENCE if PPC32
+ select HAVE_ARCH_KGDB
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
select HAVE_ARCH_NVRAM_OPS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ASM_MODVERSIONS
- select HAVE_C_RECORDMCOUNT
- select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
- select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
select HAVE_CONTEXT_TRACKING if PPC64
+ select HAVE_C_RECORDMCOUNT
select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_STACKOVERFLOW
select HAVE_DYNAMIC_FTRACE
select HAVE_FUNCTION_TRACER
select HAVE_GCC_PLUGINS if GCC_VERSION >= 50200 # plugin support on gcc <= 5.1 is buggy on PPC
select HAVE_GENERIC_VDSO
+ select HAVE_HARDLOCKUP_DETECTOR_ARCH if PPC_BOOK3S_64 && SMP
+ select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx)
select HAVE_IDE
select HAVE_IOREMAP_PROT
select HAVE_IRQ_EXIT_ON_IRQ_STACK
+ select HAVE_IRQ_TIME_ACCOUNTING
select HAVE_KERNEL_GZIP
select HAVE_KERNEL_LZMA if DEFAULT_UIMAGE
select HAVE_KERNEL_LZO if DEFAULT_UIMAGE
select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS
select HAVE_MOD_ARCH_SPECIFIC
select HAVE_NMI if PERF_EVENTS || (PPC64 && PPC_BOOK3S)
- select HAVE_HARDLOCKUP_DETECTOR_ARCH if PPC64 && PPC_BOOK3S && SMP
select HAVE_OPTPROBES
select HAVE_PERF_EVENTS
select HAVE_PERF_EVENTS_NMI if PPC64
- select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI && !HAVE_HARDLOCKUP_DETECTOR_ARCH
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
- select MMU_GATHER_RCU_TABLE_FREE
- select MMU_GATHER_PAGE_SIZE
select HAVE_REGS_AND_STACK_ACCESS_API
select HAVE_RELIABLE_STACKTRACE
+ select HAVE_RSEQ
select HAVE_SOFTIRQ_ON_OWN_STACK
+ select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
+ select HAVE_STACKPROTECTOR if PPC64 && $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_VIRT_CPU_ACCOUNTING
- select HAVE_IRQ_TIME_ACCOUNTING
- select HAVE_RSEQ
+ select HUGETLB_PAGE_SIZE_VARIABLE if PPC_BOOK3S_64 && HUGETLB_PAGE
select IOMMU_HELPER if PPC64
select IRQ_DOMAIN
select IRQ_FORCED_THREADING
+ select MMU_GATHER_PAGE_SIZE
+ select MMU_GATHER_RCU_TABLE_FREE
select MODULES_USE_ELF_RELA
select NEED_DMA_MAP_STATE if PPC64 || NOT_COHERENT_CACHE
select NEED_SG_DMA_LENGTH
source "kernel/Kconfig.hz"
-config HUGETLB_PAGE_SIZE_VARIABLE
- bool
- depends on HUGETLB_PAGE && PPC_BOOK3S_64
- default y
-
config MATH_EMULATION
bool "Math emulation"
depends on 4xx || PPC_8xx || PPC_MPC832x || BOOKE
def_bool y
depends on HOTPLUG_CPU
-config ARCH_ENABLE_MEMORY_HOTPLUG
- def_bool y
-
-config ARCH_ENABLE_MEMORY_HOTREMOVE
- def_bool y
-
config PPC64_SUPPORTS_MEMORY_FAILURE
bool "Add support for memory hwpoison"
depends on PPC_BOOK3S_64
def_bool y
depends on PPC_BOOK3S_64
-config SYS_SUPPORTS_HUGETLBFS
- bool
-
config ILLEGAL_POINTER_VALUE
hex
# This is roughly half way between the top of user space and the bottom
kernel=vmlinux
fi
-LANG=C elfformat="`${CROSS}objdump -p "$kernel" | grep 'file format' | awk '{print $4}'`"
+LC_ALL=C elfformat="`${CROSS}objdump -p "$kernel" | grep 'file format' | awk '{print $4}'`"
case "$elfformat" in
elf64-powerpcle) format=elf64lppc ;;
elf64-powerpc) format=elf32ppc ;;
*/
#define xlate_dev_mem_ptr(p) __va(p)
-/*
- * Convert a virtual cached pointer to an uncached pointer
- */
-#define xlate_dev_kmem_ptr(p) p
-
/*
* We don't do relaxed operations yet, at least not with this semantic
*/
unsigned int lpid);
extern int kvmppc_radix_init(void);
extern void kvmppc_radix_exit(void);
-extern bool kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+extern void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn);
extern bool kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn);
/* API functions */
int64_t opal_invalid_call(void);
-int64_t opal_npu_destroy_context(uint64_t phb_id, uint64_t pid, uint64_t bdf);
-int64_t opal_npu_init_context(uint64_t phb_id, int pasid, uint64_t msr,
- uint64_t bdf);
int64_t opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid,
uint64_t lpcr);
int64_t opal_npu_spa_setup(uint64_t phb_id, uint32_t bdfn,
#endif /* CONFIG_PPC64 */
void *private_data;
- struct npu *npu;
};
/* These are used for config access before all the PCI probing
#endif /* __KERNEL__ */
-extern struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev);
-extern struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index);
-extern int pnv_npu2_init(struct pci_controller *hose);
-extern int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid,
- unsigned long msr);
-extern int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev);
-
#endif /* __ASM_POWERPC_PCI_H */
#include <linux/moduleloader.h>
#include <linux/err.h>
#include <linux/vmalloc.h>
+#include <linux/mm.h>
#include <linux/bug.h>
#include <asm/module.h>
#include <linux/uaccess.h>
return 0;
}
-#ifdef MODULES_VADDR
static __always_inline void *
__module_alloc(unsigned long size, unsigned long start, unsigned long end)
{
- return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL,
- PAGE_KERNEL_EXEC, VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
- __builtin_return_address(0));
+ /*
+ * Don't do huge page allocations for modules yet until more testing
+ * is done. STRICT_MODULE_RWX may require extra work to support this
+ * too.
+ */
+ return __vmalloc_node_range(size, 1, start, end, GFP_KERNEL, PAGE_KERNEL_EXEC,
+ VM_FLUSH_RESET_PERMS | VM_NO_HUGE_VMAP,
+ NUMA_NO_NODE, __builtin_return_address(0));
}
void *module_alloc(unsigned long size)
{
+#ifdef MODULES_VADDR
unsigned long limit = (unsigned long)_etext - SZ_32M;
void *ptr = NULL;
ptr = __module_alloc(size, MODULES_VADDR, MODULES_END);
return ptr;
-}
+#else
+ return __module_alloc(size, VMALLOC_START, VMALLOC_END);
#endif
+}
return (unsigned int)(usm_entries * sizeof(u64));
}
+/**
+ * add_node_props - Reads node properties from device node structure and add
+ * them to fdt.
+ * @fdt: Flattened device tree of the kernel
+ * @node_offset: offset of the node to add a property at
+ * @dn: device node pointer
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int add_node_props(void *fdt, int node_offset, const struct device_node *dn)
+{
+ int ret = 0;
+ struct property *pp;
+
+ if (!dn)
+ return -EINVAL;
+
+ for_each_property_of_node(dn, pp) {
+ ret = fdt_setprop(fdt, node_offset, pp->name, pp->value, pp->length);
+ if (ret < 0) {
+ pr_err("Unable to add %s property: %s\n", pp->name, fdt_strerror(ret));
+ return ret;
+ }
+ }
+ return ret;
+}
+
+/**
+ * update_cpus_node - Update cpus node of flattened device tree using of_root
+ * device node.
+ * @fdt: Flattened device tree of the kernel.
+ *
+ * Returns 0 on success, negative errno on error.
+ */
+static int update_cpus_node(void *fdt)
+{
+ struct device_node *cpus_node, *dn;
+ int cpus_offset, cpus_subnode_offset, ret = 0;
+
+ cpus_offset = fdt_path_offset(fdt, "/cpus");
+ if (cpus_offset < 0 && cpus_offset != -FDT_ERR_NOTFOUND) {
+ pr_err("Malformed device tree: error reading /cpus node: %s\n",
+ fdt_strerror(cpus_offset));
+ return cpus_offset;
+ }
+
+ if (cpus_offset > 0) {
+ ret = fdt_del_node(fdt, cpus_offset);
+ if (ret < 0) {
+ pr_err("Error deleting /cpus node: %s\n", fdt_strerror(ret));
+ return -EINVAL;
+ }
+ }
+
+ /* Add cpus node to fdt */
+ cpus_offset = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"), "cpus");
+ if (cpus_offset < 0) {
+ pr_err("Error creating /cpus node: %s\n", fdt_strerror(cpus_offset));
+ return -EINVAL;
+ }
+
+ /* Add cpus node properties */
+ cpus_node = of_find_node_by_path("/cpus");
+ ret = add_node_props(fdt, cpus_offset, cpus_node);
+ of_node_put(cpus_node);
+ if (ret < 0)
+ return ret;
+
+ /* Loop through all subnodes of cpus and add them to fdt */
+ for_each_node_by_type(dn, "cpu") {
+ cpus_subnode_offset = fdt_add_subnode(fdt, cpus_offset, dn->full_name);
+ if (cpus_subnode_offset < 0) {
+ pr_err("Unable to add %s subnode: %s\n", dn->full_name,
+ fdt_strerror(cpus_subnode_offset));
+ ret = cpus_subnode_offset;
+ goto out;
+ }
+
+ ret = add_node_props(fdt, cpus_subnode_offset, dn);
+ if (ret < 0)
+ goto out;
+ }
+out:
+ of_node_put(dn);
+ return ret;
+}
+
/**
* setup_new_fdt_ppc64 - Update the flattend device-tree of the kernel
* being loaded.
}
}
+ /* Update cpus nodes information to account hotplug CPUs. */
+ ret = update_cpus_node(fdt);
+ if (ret < 0)
+ goto out;
+
/* Update memory reserve map */
ret = get_reserved_memory_ranges(&rmem);
if (ret)
}
}
-static bool kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
+static void kvm_unmap_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn)
{
unsigned long i;
unlock_rmap(rmapp);
__unlock_hpte(hptep, be64_to_cpu(hptep[0]));
}
- return false;
}
bool kvm_unmap_gfn_range_hv(struct kvm *kvm, struct kvm_gfn_range *range)
{
- if (kvm_is_radix(kvm))
- return kvm_unmap_radix(kvm, range->slot, range->start);
+ gfn_t gfn;
+
+ if (kvm_is_radix(kvm)) {
+ for (gfn = range->start; gfn < range->end; gfn++)
+ kvm_unmap_radix(kvm, range->slot, gfn);
+ } else {
+ for (gfn = range->start; gfn < range->end; gfn++)
+ kvm_unmap_rmapp(kvm, range->slot, range->start);
+ }
- return kvm_unmap_rmapp(kvm, range->slot, range->start);
+ return false;
}
void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
bool kvm_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
{
- if (kvm_is_radix(kvm))
- kvm_age_radix(kvm, range->slot, range->start);
+ gfn_t gfn;
+ bool ret = false;
- return kvm_age_rmapp(kvm, range->slot, range->start);
+ if (kvm_is_radix(kvm)) {
+ for (gfn = range->start; gfn < range->end; gfn++)
+ ret |= kvm_age_radix(kvm, range->slot, gfn);
+ } else {
+ for (gfn = range->start; gfn < range->end; gfn++)
+ ret |= kvm_age_rmapp(kvm, range->slot, gfn);
+ }
+
+ return ret;
}
static bool kvm_test_age_rmapp(struct kvm *kvm, struct kvm_memory_slot *memslot,
bool kvm_test_age_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
{
- if (kvm_is_radix(kvm))
- kvm_test_age_radix(kvm, range->slot, range->start);
+ WARN_ON(range->start + 1 != range->end);
- return kvm_test_age_rmapp(kvm, range->slot, range->start);
+ if (kvm_is_radix(kvm))
+ return kvm_test_age_radix(kvm, range->slot, range->start);
+ else
+ return kvm_test_age_rmapp(kvm, range->slot, range->start);
}
bool kvm_set_spte_gfn_hv(struct kvm *kvm, struct kvm_gfn_range *range)
{
+ WARN_ON(range->start + 1 != range->end);
+
if (kvm_is_radix(kvm))
- return kvm_unmap_radix(kvm, range->slot, range->start);
+ kvm_unmap_radix(kvm, range->slot, range->start);
+ else
+ kvm_unmap_rmapp(kvm, range->slot, range->start);
- return kvm_unmap_rmapp(kvm, range->slot, range->start);
+ return false;
}
static int vcpus_running(struct kvm *kvm)
}
/* Called with kvm->mmu_lock held */
-bool kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
+void kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot,
unsigned long gfn)
{
pte_t *ptep;
if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) {
uv_page_inval(kvm->arch.lpid, gpa, PAGE_SHIFT);
- return false;
+ return;
}
ptep = find_kvm_secondary_pte(kvm, gpa, &shift);
if (ptep && pte_present(*ptep))
kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot,
kvm->arch.lpid);
- return false;
}
/* Called with kvm->mmu_lock held */
ccflags-$(CONFIG_PPC64) := $(NO_MINIMAL_TOC)
+CFLAGS_code-patching.o += -fno-stack-protector
+CFLAGS_feature-fixups.o += -fno-stack-protector
+
CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE)
CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE)
* At this point we do the placement change only for BOOK3S 64. This would
* possibly work on other subarchs.
*/
-pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz)
+pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long addr, unsigned long sz)
{
pgd_t *pg;
p4d_t *p4;
config PPC_8xx
bool "Freescale 8xx"
+ select ARCH_SUPPORTS_HUGETLBFS
select FSL_SOC
- select SYS_SUPPORTS_HUGETLBFS
select PPC_HAVE_KUEP
select PPC_HAVE_KUAP
select HAVE_ARCH_VMAP_STACK
bool "Server processors"
select PPC_FPU
select PPC_HAVE_PMU_SUPPORT
- select SYS_SUPPORTS_HUGETLBFS
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
+ select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
+ select ARCH_ENABLE_PMD_SPLIT_PTLOCK
select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE
+ select ARCH_SUPPORTS_HUGETLBFS
select ARCH_SUPPORTS_NUMA_BALANCING
select IRQ_WORK
select PPC_MM_SLICES
# this is for common code between PPC32 & PPC64 FSL BOOKE
config PPC_FSL_BOOK3E
bool
+ select ARCH_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64
select FSL_EMB_PERFMON
select PPC_SMP_MUXED_IPI
- select SYS_SUPPORTS_HUGETLBFS if PHYS_64BIT || PPC64
select PPC_DOORBELL
default y if FSL_BOOKE
If in doubt, say Y here.
-config ARCH_ENABLE_SPLIT_PMD_PTLOCK
- def_bool y
- depends on PPC_BOOK3S_64
-
config PPC_RADIX_MMU
bool "Radix MMU Support"
depends on PPC_BOOK3S_64
depends on PPC_BOOK3S_64
depends on PPC_MEM_KEYS || PPC_KUAP || PPC_KUEP
-config ARCH_ENABLE_HUGEPAGE_MIGRATION
- def_bool y
- depends on PPC_BOOK3S_64 && HUGETLB_PAGE && MIGRATION
-
config PPC_MMU_NOHASH
def_bool y
obj-$(CONFIG_FA_DUMP) += opal-fadump.o
obj-$(CONFIG_PRESERVE_FA_DUMP) += opal-fadump.o
obj-$(CONFIG_OPAL_CORE) += opal-core.o
-obj-$(CONFIG_PCI) += pci.o pci-ioda.o npu-dma.o pci-ioda-tce.o
+obj-$(CONFIG_PCI) += pci.o pci-ioda.o pci-ioda-tce.o
obj-$(CONFIG_PCI_IOV) += pci-sriov.o
obj-$(CONFIG_CXL_BASE) += pci-cxl.o
obj-$(CONFIG_EEH) += eeh-powernv.o
* Before we go ahead and use this range as cache inhibited range
* flush the cache.
*/
- flush_dcache_range_chunked(PFN_PHYS(start_pfn),
- PFN_PHYS(start_pfn + nr_pages),
+ flush_dcache_range_chunked((unsigned long)pfn_to_kaddr(start_pfn),
+ (unsigned long)pfn_to_kaddr(start_pfn + nr_pages),
FLUSH_CHUNK_SIZE);
}
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * This file implements the DMA operations for NVLink devices. The NPU
- * devices all point to the same iommu table as the parent PCI device.
- *
- * Copyright Alistair Popple, IBM Corporation 2015.
- */
-
-#include <linux/mmu_notifier.h>
-#include <linux/mmu_context.h>
-#include <linux/of.h>
-#include <linux/pci.h>
-#include <linux/memblock.h>
-#include <linux/sizes.h>
-
-#include <asm/debugfs.h>
-#include <asm/powernv.h>
-#include <asm/ppc-pci.h>
-#include <asm/opal.h>
-
-#include "pci.h"
-
-static struct pci_dev *get_pci_dev(struct device_node *dn)
-{
- struct pci_dn *pdn = PCI_DN(dn);
- struct pci_dev *pdev;
-
- pdev = pci_get_domain_bus_and_slot(pci_domain_nr(pdn->phb->bus),
- pdn->busno, pdn->devfn);
-
- /*
- * pci_get_domain_bus_and_slot() increased the reference count of
- * the PCI device, but callers don't need that actually as the PE
- * already holds a reference to the device. Since callers aren't
- * aware of the reference count change, call pci_dev_put() now to
- * avoid leaks.
- */
- if (pdev)
- pci_dev_put(pdev);
-
- return pdev;
-}
-
-/* Given a NPU device get the associated PCI device. */
-struct pci_dev *pnv_pci_get_gpu_dev(struct pci_dev *npdev)
-{
- struct device_node *dn;
- struct pci_dev *gpdev;
-
- if (WARN_ON(!npdev))
- return NULL;
-
- if (WARN_ON(!npdev->dev.of_node))
- return NULL;
-
- /* Get assoicated PCI device */
- dn = of_parse_phandle(npdev->dev.of_node, "ibm,gpu", 0);
- if (!dn)
- return NULL;
-
- gpdev = get_pci_dev(dn);
- of_node_put(dn);
-
- return gpdev;
-}
-EXPORT_SYMBOL(pnv_pci_get_gpu_dev);
-
-/* Given the real PCI device get a linked NPU device. */
-struct pci_dev *pnv_pci_get_npu_dev(struct pci_dev *gpdev, int index)
-{
- struct device_node *dn;
- struct pci_dev *npdev;
-
- if (WARN_ON(!gpdev))
- return NULL;
-
- /* Not all PCI devices have device-tree nodes */
- if (!gpdev->dev.of_node)
- return NULL;
-
- /* Get assoicated PCI device */
- dn = of_parse_phandle(gpdev->dev.of_node, "ibm,npu", index);
- if (!dn)
- return NULL;
-
- npdev = get_pci_dev(dn);
- of_node_put(dn);
-
- return npdev;
-}
-EXPORT_SYMBOL(pnv_pci_get_npu_dev);
-
-#ifdef CONFIG_IOMMU_API
-/*
- * Returns the PE assoicated with the PCI device of the given
- * NPU. Returns the linked pci device if pci_dev != NULL.
- */
-static struct pnv_ioda_pe *get_gpu_pci_dev_and_pe(struct pnv_ioda_pe *npe,
- struct pci_dev **gpdev)
-{
- struct pnv_phb *phb;
- struct pci_controller *hose;
- struct pci_dev *pdev;
- struct pnv_ioda_pe *pe;
- struct pci_dn *pdn;
-
- pdev = pnv_pci_get_gpu_dev(npe->pdev);
- if (!pdev)
- return NULL;
-
- pdn = pci_get_pdn(pdev);
- if (WARN_ON(!pdn || pdn->pe_number == IODA_INVALID_PE))
- return NULL;
-
- hose = pci_bus_to_host(pdev->bus);
- phb = hose->private_data;
- pe = &phb->ioda.pe_array[pdn->pe_number];
-
- if (gpdev)
- *gpdev = pdev;
-
- return pe;
-}
-
-static long pnv_npu_unset_window(struct iommu_table_group *table_group,
- int num);
-
-static long pnv_npu_set_window(struct iommu_table_group *table_group, int num,
- struct iommu_table *tbl)
-{
- struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
- table_group);
- struct pnv_phb *phb = npe->phb;
- int64_t rc;
- const unsigned long size = tbl->it_indirect_levels ?
- tbl->it_level_size : tbl->it_size;
- const __u64 start_addr = tbl->it_offset << tbl->it_page_shift;
- const __u64 win_size = tbl->it_size << tbl->it_page_shift;
- int num2 = (num == 0) ? 1 : 0;
-
- /* NPU has just one TVE so if there is another table, remove it first */
- if (npe->table_group.tables[num2])
- pnv_npu_unset_window(&npe->table_group, num2);
-
- pe_info(npe, "Setting up window %llx..%llx pg=%lx\n",
- start_addr, start_addr + win_size - 1,
- IOMMU_PAGE_SIZE(tbl));
-
- rc = opal_pci_map_pe_dma_window(phb->opal_id,
- npe->pe_number,
- npe->pe_number,
- tbl->it_indirect_levels + 1,
- __pa(tbl->it_base),
- size << 3,
- IOMMU_PAGE_SIZE(tbl));
- if (rc) {
- pe_err(npe, "Failed to configure TCE table, err %lld\n", rc);
- return rc;
- }
- pnv_pci_ioda2_tce_invalidate_entire(phb, false);
-
- /* Add the table to the list so its TCE cache will get invalidated */
- pnv_pci_link_table_and_group(phb->hose->node, num,
- tbl, &npe->table_group);
-
- return 0;
-}
-
-static long pnv_npu_unset_window(struct iommu_table_group *table_group, int num)
-{
- struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
- table_group);
- struct pnv_phb *phb = npe->phb;
- int64_t rc;
-
- if (!npe->table_group.tables[num])
- return 0;
-
- pe_info(npe, "Removing DMA window\n");
-
- rc = opal_pci_map_pe_dma_window(phb->opal_id, npe->pe_number,
- npe->pe_number,
- 0/* levels */, 0/* table address */,
- 0/* table size */, 0/* page size */);
- if (rc) {
- pe_err(npe, "Unmapping failed, ret = %lld\n", rc);
- return rc;
- }
- pnv_pci_ioda2_tce_invalidate_entire(phb, false);
-
- pnv_pci_unlink_table_and_group(npe->table_group.tables[num],
- &npe->table_group);
-
- return 0;
-}
-
-/* Switch ownership from platform code to external user (e.g. VFIO) */
-static void pnv_npu_take_ownership(struct iommu_table_group *table_group)
-{
- struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
- table_group);
- struct pnv_phb *phb = npe->phb;
- int64_t rc;
- struct pci_dev *gpdev = NULL;
-
- /*
- * Note: NPU has just a single TVE in the hardware which means that
- * while used by the kernel, it can have either 32bit window or
- * DMA bypass but never both. So we deconfigure 32bit window only
- * if it was enabled at the moment of ownership change.
- */
- if (npe->table_group.tables[0]) {
- pnv_npu_unset_window(&npe->table_group, 0);
- return;
- }
-
- /* Disable bypass */
- rc = opal_pci_map_pe_dma_window_real(phb->opal_id,
- npe->pe_number, npe->pe_number,
- 0 /* bypass base */, 0);
- if (rc) {
- pe_err(npe, "Failed to disable bypass, err %lld\n", rc);
- return;
- }
- pnv_pci_ioda2_tce_invalidate_entire(npe->phb, false);
-
- get_gpu_pci_dev_and_pe(npe, &gpdev);
- if (gpdev)
- pnv_npu2_unmap_lpar_dev(gpdev);
-}
-
-static void pnv_npu_release_ownership(struct iommu_table_group *table_group)
-{
- struct pnv_ioda_pe *npe = container_of(table_group, struct pnv_ioda_pe,
- table_group);
- struct pci_dev *gpdev = NULL;
-
- get_gpu_pci_dev_and_pe(npe, &gpdev);
- if (gpdev)
- pnv_npu2_map_lpar_dev(gpdev, 0, MSR_DR | MSR_PR | MSR_HV);
-}
-
-static struct iommu_table_group_ops pnv_pci_npu_ops = {
- .set_window = pnv_npu_set_window,
- .unset_window = pnv_npu_unset_window,
- .take_ownership = pnv_npu_take_ownership,
- .release_ownership = pnv_npu_release_ownership,
-};
-#endif /* !CONFIG_IOMMU_API */
-
-/*
- * NPU2 ATS
- */
-/* Maximum possible number of ATSD MMIO registers per NPU */
-#define NV_NMMU_ATSD_REGS 8
-#define NV_NPU_MAX_PE_NUM 16
-
-/*
- * A compound NPU IOMMU group which might consist of 1 GPU + 2xNPUs (POWER8) or
- * up to 3 x (GPU + 2xNPUs) (POWER9).
- */
-struct npu_comp {
- struct iommu_table_group table_group;
- int pe_num;
- struct pnv_ioda_pe *pe[NV_NPU_MAX_PE_NUM];
-};
-
-/* An NPU descriptor, valid for POWER9 only */
-struct npu {
- int index;
- struct npu_comp npucomp;
-};
-
-#ifdef CONFIG_IOMMU_API
-static long pnv_npu_peers_create_table_userspace(
- struct iommu_table_group *table_group,
- int num, __u32 page_shift, __u64 window_size, __u32 levels,
- struct iommu_table **ptbl)
-{
- struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
- table_group);
-
- if (!npucomp->pe_num || !npucomp->pe[0] ||
- !npucomp->pe[0]->table_group.ops ||
- !npucomp->pe[0]->table_group.ops->create_table)
- return -EFAULT;
-
- return npucomp->pe[0]->table_group.ops->create_table(
- &npucomp->pe[0]->table_group, num, page_shift,
- window_size, levels, ptbl);
-}
-
-static long pnv_npu_peers_set_window(struct iommu_table_group *table_group,
- int num, struct iommu_table *tbl)
-{
- int i, j;
- long ret = 0;
- struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
- table_group);
-
- for (i = 0; i < npucomp->pe_num; ++i) {
- struct pnv_ioda_pe *pe = npucomp->pe[i];
-
- if (!pe->table_group.ops->set_window)
- continue;
-
- ret = pe->table_group.ops->set_window(&pe->table_group,
- num, tbl);
- if (ret)
- break;
- }
-
- if (ret) {
- for (j = 0; j < i; ++j) {
- struct pnv_ioda_pe *pe = npucomp->pe[j];
-
- if (!pe->table_group.ops->unset_window)
- continue;
-
- ret = pe->table_group.ops->unset_window(
- &pe->table_group, num);
- if (ret)
- break;
- }
- } else {
- table_group->tables[num] = iommu_tce_table_get(tbl);
- }
-
- return ret;
-}
-
-static long pnv_npu_peers_unset_window(struct iommu_table_group *table_group,
- int num)
-{
- int i, j;
- long ret = 0;
- struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
- table_group);
-
- for (i = 0; i < npucomp->pe_num; ++i) {
- struct pnv_ioda_pe *pe = npucomp->pe[i];
-
- WARN_ON(npucomp->table_group.tables[num] !=
- table_group->tables[num]);
- if (!npucomp->table_group.tables[num])
- continue;
-
- if (!pe->table_group.ops->unset_window)
- continue;
-
- ret = pe->table_group.ops->unset_window(&pe->table_group, num);
- if (ret)
- break;
- }
-
- if (ret) {
- for (j = 0; j < i; ++j) {
- struct pnv_ioda_pe *pe = npucomp->pe[j];
-
- if (!npucomp->table_group.tables[num])
- continue;
-
- if (!pe->table_group.ops->set_window)
- continue;
-
- ret = pe->table_group.ops->set_window(&pe->table_group,
- num, table_group->tables[num]);
- if (ret)
- break;
- }
- } else if (table_group->tables[num]) {
- iommu_tce_table_put(table_group->tables[num]);
- table_group->tables[num] = NULL;
- }
-
- return ret;
-}
-
-static void pnv_npu_peers_take_ownership(struct iommu_table_group *table_group)
-{
- int i;
- struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
- table_group);
-
- for (i = 0; i < npucomp->pe_num; ++i) {
- struct pnv_ioda_pe *pe = npucomp->pe[i];
-
- if (!pe->table_group.ops ||
- !pe->table_group.ops->take_ownership)
- continue;
- pe->table_group.ops->take_ownership(&pe->table_group);
- }
-}
-
-static void pnv_npu_peers_release_ownership(
- struct iommu_table_group *table_group)
-{
- int i;
- struct npu_comp *npucomp = container_of(table_group, struct npu_comp,
- table_group);
-
- for (i = 0; i < npucomp->pe_num; ++i) {
- struct pnv_ioda_pe *pe = npucomp->pe[i];
-
- if (!pe->table_group.ops ||
- !pe->table_group.ops->release_ownership)
- continue;
- pe->table_group.ops->release_ownership(&pe->table_group);
- }
-}
-
-static struct iommu_table_group_ops pnv_npu_peers_ops = {
- .get_table_size = pnv_pci_ioda2_get_table_size,
- .create_table = pnv_npu_peers_create_table_userspace,
- .set_window = pnv_npu_peers_set_window,
- .unset_window = pnv_npu_peers_unset_window,
- .take_ownership = pnv_npu_peers_take_ownership,
- .release_ownership = pnv_npu_peers_release_ownership,
-};
-
-static void pnv_comp_attach_table_group(struct npu_comp *npucomp,
- struct pnv_ioda_pe *pe)
-{
- if (WARN_ON(npucomp->pe_num == NV_NPU_MAX_PE_NUM))
- return;
-
- npucomp->pe[npucomp->pe_num] = pe;
- ++npucomp->pe_num;
-}
-
-static struct iommu_table_group *
- pnv_try_setup_npu_table_group(struct pnv_ioda_pe *pe)
-{
- struct iommu_table_group *compound_group;
- struct npu_comp *npucomp;
- struct pci_dev *gpdev = NULL;
- struct pci_controller *hose;
- struct pci_dev *npdev = NULL;
-
- list_for_each_entry(gpdev, &pe->pbus->devices, bus_list) {
- npdev = pnv_pci_get_npu_dev(gpdev, 0);
- if (npdev)
- break;
- }
-
- if (!npdev)
- /* It is not an NPU attached device, skip */
- return NULL;
-
- hose = pci_bus_to_host(npdev->bus);
-
- if (hose->npu) {
- /* P9 case: compound group is per-NPU (all gpus, all links) */
- npucomp = &hose->npu->npucomp;
- } else {
- /* P8 case: Compound group is per-GPU (1 gpu, 2 links) */
- npucomp = pe->npucomp = kzalloc(sizeof(*npucomp), GFP_KERNEL);
- }
-
- compound_group = &npucomp->table_group;
- if (!compound_group->group) {
- compound_group->ops = &pnv_npu_peers_ops;
- iommu_register_group(compound_group, hose->global_number,
- pe->pe_number);
-
- /* Steal capabilities from a GPU PE */
- compound_group->max_dynamic_windows_supported =
- pe->table_group.max_dynamic_windows_supported;
- compound_group->tce32_start = pe->table_group.tce32_start;
- compound_group->tce32_size = pe->table_group.tce32_size;
- compound_group->max_levels = pe->table_group.max_levels;
- if (!compound_group->pgsizes)
- compound_group->pgsizes = pe->table_group.pgsizes;
- }
-
- /*
- * The gpu would have been added to the iommu group that's created
- * for the PE. Pull it out now.
- */
- iommu_del_device(&gpdev->dev);
-
- /*
- * I'm not sure this is strictly required, but it's probably a good idea
- * since the table_group for the PE is going to be attached to the
- * compound table group. If we leave the PE's iommu group active then
- * we might have the same table_group being modifiable via two sepeate
- * iommu groups.
- */
- iommu_group_put(pe->table_group.group);
-
- /* now put the GPU into the compound group */
- pnv_comp_attach_table_group(npucomp, pe);
- iommu_add_device(compound_group, &gpdev->dev);
-
- return compound_group;
-}
-
-static struct iommu_table_group *pnv_npu_compound_attach(struct pnv_ioda_pe *pe)
-{
- struct iommu_table_group *table_group;
- struct npu_comp *npucomp;
- struct pci_dev *gpdev = NULL;
- struct pci_dev *npdev;
- struct pnv_ioda_pe *gpe = get_gpu_pci_dev_and_pe(pe, &gpdev);
-
- WARN_ON(!(pe->flags & PNV_IODA_PE_DEV));
- if (!gpe)
- return NULL;
-
- /*
- * IODA2 bridges get this set up from pci_controller_ops::setup_bridge
- * but NPU bridges do not have this hook defined so we do it here.
- * We do not setup other table group parameters as they won't be used
- * anyway - NVLink bridges are subordinate PEs.
- */
- pe->table_group.ops = &pnv_pci_npu_ops;
-
- table_group = iommu_group_get_iommudata(
- iommu_group_get(&gpdev->dev));
-
- /*
- * On P9 NPU PHB and PCI PHB support different page sizes,
- * keep only matching. We expect here that NVLink bridge PE pgsizes is
- * initialized by the caller.
- */
- table_group->pgsizes &= pe->table_group.pgsizes;
- npucomp = container_of(table_group, struct npu_comp, table_group);
- pnv_comp_attach_table_group(npucomp, pe);
-
- list_for_each_entry(npdev, &pe->phb->hose->bus->devices, bus_list) {
- struct pci_dev *gpdevtmp = pnv_pci_get_gpu_dev(npdev);
-
- if (gpdevtmp != gpdev)
- continue;
-
- iommu_add_device(table_group, &npdev->dev);
- }
-
- return table_group;
-}
-
-void pnv_pci_npu_setup_iommu_groups(void)
-{
- struct pci_controller *hose;
- struct pnv_phb *phb;
- struct pnv_ioda_pe *pe;
-
- /*
- * For non-nvlink devices the IOMMU group is registered when the PE is
- * configured and devices are added to the group when the per-device
- * DMA setup is run. That's done in hose->ops.dma_dev_setup() which is
- * only initialise for "normal" IODA PHBs.
- *
- * For NVLink devices we need to ensure the NVLinks and the GPU end up
- * in the same IOMMU group, so that's handled here.
- */
- list_for_each_entry(hose, &hose_list, list_node) {
- phb = hose->private_data;
-
- if (phb->type == PNV_PHB_IODA2)
- list_for_each_entry(pe, &phb->ioda.pe_list, list)
- pnv_try_setup_npu_table_group(pe);
- }
-
- /*
- * Now we have all PHBs discovered, time to add NPU devices to
- * the corresponding IOMMU groups.
- */
- list_for_each_entry(hose, &hose_list, list_node) {
- unsigned long pgsizes;
-
- phb = hose->private_data;
-
- if (phb->type != PNV_PHB_NPU_NVLINK)
- continue;
-
- pgsizes = pnv_ioda_parse_tce_sizes(phb);
- list_for_each_entry(pe, &phb->ioda.pe_list, list) {
- /*
- * IODA2 bridges get this set up from
- * pci_controller_ops::setup_bridge but NPU bridges
- * do not have this hook defined so we do it here.
- */
- pe->table_group.pgsizes = pgsizes;
- pnv_npu_compound_attach(pe);
- }
- }
-}
-#endif /* CONFIG_IOMMU_API */
-
-int pnv_npu2_init(struct pci_controller *hose)
-{
- static int npu_index;
- struct npu *npu;
- int ret;
-
- npu = kzalloc(sizeof(*npu), GFP_KERNEL);
- if (!npu)
- return -ENOMEM;
-
- npu_index++;
- if (WARN_ON(npu_index >= NV_MAX_NPUS)) {
- ret = -ENOSPC;
- goto fail_exit;
- }
- npu->index = npu_index;
- hose->npu = npu;
-
- return 0;
-
-fail_exit:
- kfree(npu);
- return ret;
-}
-
-int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid,
- unsigned long msr)
-{
- int ret;
- struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
- struct pci_controller *hose;
- struct pnv_phb *nphb;
-
- if (!npdev)
- return -ENODEV;
-
- hose = pci_bus_to_host(npdev->bus);
- if (hose->npu == NULL) {
- dev_info_once(&npdev->dev, "Nvlink1 does not support contexts");
- return 0;
- }
-
- nphb = hose->private_data;
-
- dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=%u\n",
- nphb->opal_id, lparid);
- /*
- * Currently we only support radix and non-zero LPCR only makes sense
- * for hash tables so skiboot expects the LPCR parameter to be a zero.
- */
- ret = opal_npu_map_lpar(nphb->opal_id, pci_dev_id(gpdev), lparid,
- 0 /* LPCR bits */);
- if (ret) {
- dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret);
- return ret;
- }
-
- dev_dbg(&gpdev->dev, "init context opalid=%llu msr=%lx\n",
- nphb->opal_id, msr);
- ret = opal_npu_init_context(nphb->opal_id, 0/*__unused*/, msr,
- pci_dev_id(gpdev));
- if (ret < 0)
- dev_err(&gpdev->dev, "Failed to init context: %d\n", ret);
- else
- ret = 0;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(pnv_npu2_map_lpar_dev);
-
-void pnv_npu2_map_lpar(struct pnv_ioda_pe *gpe, unsigned long msr)
-{
- struct pci_dev *gpdev;
-
- list_for_each_entry(gpdev, &gpe->pbus->devices, bus_list)
- pnv_npu2_map_lpar_dev(gpdev, 0, msr);
-}
-
-int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev)
-{
- int ret;
- struct pci_dev *npdev = pnv_pci_get_npu_dev(gpdev, 0);
- struct pci_controller *hose;
- struct pnv_phb *nphb;
-
- if (!npdev)
- return -ENODEV;
-
- hose = pci_bus_to_host(npdev->bus);
- if (hose->npu == NULL) {
- dev_info_once(&npdev->dev, "Nvlink1 does not support contexts");
- return 0;
- }
-
- nphb = hose->private_data;
-
- dev_dbg(&gpdev->dev, "destroy context opalid=%llu\n",
- nphb->opal_id);
- ret = opal_npu_destroy_context(nphb->opal_id, 0/*__unused*/,
- pci_dev_id(gpdev));
- if (ret < 0) {
- dev_err(&gpdev->dev, "Failed to destroy context: %d\n", ret);
- return ret;
- }
-
- /* Set LPID to 0 anyway, just to be safe */
- dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=0\n", nphb->opal_id);
- ret = opal_npu_map_lpar(nphb->opal_id, pci_dev_id(gpdev), 0 /*LPID*/,
- 0 /* LPCR bits */);
- if (ret)
- dev_err(&gpdev->dev, "Error %d mapping device to LPAR\n", ret);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(pnv_npu2_unmap_lpar_dev);
OPAL_CALL(opal_xive_set_queue_state, OPAL_XIVE_SET_QUEUE_STATE);
OPAL_CALL(opal_xive_get_vp_state, OPAL_XIVE_GET_VP_STATE);
OPAL_CALL(opal_signal_system_reset, OPAL_SIGNAL_SYSTEM_RESET);
-OPAL_CALL(opal_npu_init_context, OPAL_NPU_INIT_CONTEXT);
-OPAL_CALL(opal_npu_destroy_context, OPAL_NPU_DESTROY_CONTEXT);
OPAL_CALL(opal_npu_map_lpar, OPAL_NPU_MAP_LPAR);
OPAL_CALL(opal_imc_counters_init, OPAL_IMC_COUNTERS_INIT);
OPAL_CALL(opal_imc_counters_start, OPAL_IMC_COUNTERS_START);
#define PNV_IODA1_M64_SEGS 8 /* Segments per M64 BAR */
#define PNV_IODA1_DMA32_SEGSIZE 0x10000000
-static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_NVLINK",
- "NPU_OCAPI" };
+static const char * const pnv_phb_names[] = { "IODA1", "IODA2", "NPU_OCAPI" };
static void pnv_pci_ioda2_set_bypass(struct pnv_ioda_pe *pe, bool enable);
static void pnv_pci_configure_bus(struct pci_bus *bus);
unsigned int pe_num = pe->pe_number;
WARN_ON(pe->pdev);
- WARN_ON(pe->npucomp); /* NPUs for nvlink are not supposed to be freed */
- kfree(pe->npucomp);
memset(pe, 0, sizeof(struct pnv_ioda_pe));
mutex_lock(&phb->ioda.pe_alloc_mutex);
* Release from all parents PELT-V. NPUs don't have a PELTV
* table
*/
- if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI)
+ if (phb->type != PNV_PHB_NPU_OCAPI)
pnv_ioda_unset_peltv(phb, pe, parent);
rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
* Configure PELTV. NPUs don't have a PELTV table so skip
* configuration on them.
*/
- if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI)
+ if (phb->type != PNV_PHB_NPU_OCAPI)
pnv_ioda_set_peltv(phb, pe, true);
/* Setup reverse map */
/* NOTE: We don't get a reference for the pointer in the PE
* data structure, both the device and PE structures should be
- * destroyed at the same time. However, removing nvlink
- * devices will need some work.
+ * destroyed at the same time.
*
* At some point we want to remove the PDN completely anyways
*/
return pe;
}
-static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev)
-{
- int pe_num, found_pe = false, rc;
- long rid;
- struct pnv_ioda_pe *pe;
- struct pci_dev *gpu_pdev;
- struct pci_dn *npu_pdn;
- struct pnv_phb *phb = pci_bus_to_pnvhb(npu_pdev->bus);
-
- /*
- * Intentionally leak a reference on the npu device (for
- * nvlink only; this is not an opencapi path) to make sure it
- * never goes away, as it's been the case all along and some
- * work is needed otherwise.
- */
- pci_dev_get(npu_pdev);
-
- /*
- * Due to a hardware errata PE#0 on the NPU is reserved for
- * error handling. This means we only have three PEs remaining
- * which need to be assigned to four links, implying some
- * links must share PEs.
- *
- * To achieve this we assign PEs such that NPUs linking the
- * same GPU get assigned the same PE.
- */
- gpu_pdev = pnv_pci_get_gpu_dev(npu_pdev);
- for (pe_num = 0; pe_num < phb->ioda.total_pe_num; pe_num++) {
- pe = &phb->ioda.pe_array[pe_num];
- if (!pe->pdev)
- continue;
-
- if (pnv_pci_get_gpu_dev(pe->pdev) == gpu_pdev) {
- /*
- * This device has the same peer GPU so should
- * be assigned the same PE as the existing
- * peer NPU.
- */
- dev_info(&npu_pdev->dev,
- "Associating to existing PE %x\n", pe_num);
- npu_pdn = pci_get_pdn(npu_pdev);
- rid = npu_pdev->bus->number << 8 | npu_pdn->devfn;
- npu_pdn->pe_number = pe_num;
- phb->ioda.pe_rmap[rid] = pe->pe_number;
- pe->device_count++;
-
- /* Map the PE to this link */
- rc = opal_pci_set_pe(phb->opal_id, pe_num, rid,
- OpalPciBusAll,
- OPAL_COMPARE_RID_DEVICE_NUMBER,
- OPAL_COMPARE_RID_FUNCTION_NUMBER,
- OPAL_MAP_PE);
- WARN_ON(rc != OPAL_SUCCESS);
- found_pe = true;
- break;
- }
- }
-
- if (!found_pe)
- /*
- * Could not find an existing PE so allocate a new
- * one.
- */
- return pnv_ioda_setup_dev_PE(npu_pdev);
- else
- return pe;
-}
-
-static void pnv_ioda_setup_npu_PEs(struct pci_bus *bus)
-{
- struct pci_dev *pdev;
-
- list_for_each_entry(pdev, &bus->devices, bus_list)
- pnv_ioda_setup_npu_PE(pdev);
-}
-
-static void pnv_pci_ioda_setup_nvlink(void)
-{
- struct pci_controller *hose;
- struct pnv_phb *phb;
- struct pnv_ioda_pe *pe;
-
- list_for_each_entry(hose, &hose_list, list_node) {
- phb = hose->private_data;
- if (phb->type == PNV_PHB_NPU_NVLINK) {
- /* PE#0 is needed for error reporting */
- pnv_ioda_reserve_pe(phb, 0);
- pnv_ioda_setup_npu_PEs(hose->bus);
- if (phb->model == PNV_PHB_MODEL_NPU2)
- WARN_ON_ONCE(pnv_npu2_init(hose));
- }
- }
- list_for_each_entry(hose, &hose_list, list_node) {
- phb = hose->private_data;
- if (phb->type != PNV_PHB_IODA2)
- continue;
-
- list_for_each_entry(pe, &phb->ioda.pe_list, list)
- pnv_npu2_map_lpar(pe, MSR_DR | MSR_PR | MSR_HV);
- }
-
-#ifdef CONFIG_IOMMU_API
- /* setup iommu groups so we can do nvlink pass-thru */
- pnv_pci_npu_setup_iommu_groups();
-#endif
-}
-
static void pnv_pci_ioda1_setup_dma_pe(struct pnv_phb *phb,
struct pnv_ioda_pe *pe);
#define PHB3_TCE_KILL_INVAL_PE PPC_BIT(1)
#define PHB3_TCE_KILL_INVAL_ONE PPC_BIT(2)
-static void pnv_pci_phb3_tce_invalidate_entire(struct pnv_phb *phb, bool rm)
-{
- __be64 __iomem *invalidate = pnv_ioda_get_inval_reg(phb, rm);
- const unsigned long val = PHB3_TCE_KILL_INVAL_ALL;
-
- mb(); /* Ensure previous TCE table stores are visible */
- if (rm)
- __raw_rm_writeq_be(val, invalidate);
- else
- __raw_writeq_be(val, invalidate);
-}
-
static inline void pnv_pci_phb3_tce_invalidate_pe(struct pnv_ioda_pe *pe)
{
/* 01xb - invalidate TCEs that match the specified PE# */
struct pnv_phb *phb = pe->phb;
unsigned int shift = tbl->it_page_shift;
- /*
- * NVLink1 can use the TCE kill register directly as
- * it's the same as PHB3. NVLink2 is different and
- * should go via the OPAL call.
- */
- if (phb->model == PNV_PHB_MODEL_NPU) {
- /*
- * The NVLink hardware does not support TCE kill
- * per TCE entry so we have to invalidate
- * the entire cache for it.
- */
- pnv_pci_phb3_tce_invalidate_entire(phb, rm);
- continue;
- }
if (phb->model == PNV_PHB_MODEL_PHB3 && phb->regs)
pnv_pci_phb3_tce_invalidate(pe, rm, shift,
index, npages);
}
}
-void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm)
-{
- if (phb->model == PNV_PHB_MODEL_NPU || phb->model == PNV_PHB_MODEL_PHB3)
- pnv_pci_phb3_tce_invalidate_entire(phb, rm);
- else
- opal_pci_tce_kill(phb->opal_id, OPAL_PCI_TCE_KILL, 0, 0, 0, 0);
-}
-
static int pnv_ioda2_tce_build(struct iommu_table *tbl, long index,
long npages, unsigned long uaddr,
enum dma_data_direction direction,
static void pnv_pci_ioda_fixup(void)
{
- pnv_pci_ioda_setup_nvlink();
pnv_pci_ioda_create_dbgfs();
pnv_pci_enable_bridges();
pnv_ioda_release_pe(pe);
}
-static void pnv_npu_disable_device(struct pci_dev *pdev)
-{
- struct eeh_dev *edev = pci_dev_to_eeh_dev(pdev);
- struct eeh_pe *eehpe = edev ? edev->pe : NULL;
-
- if (eehpe && eeh_ops && eeh_ops->reset)
- eeh_ops->reset(eehpe, EEH_RESET_HOT);
-}
-
static void pnv_pci_ioda_shutdown(struct pci_controller *hose)
{
struct pnv_phb *phb = hose->private_data;
.shutdown = pnv_pci_ioda_shutdown,
};
-static const struct pci_controller_ops pnv_npu_ioda_controller_ops = {
- .setup_msi_irqs = pnv_setup_msi_irqs,
- .teardown_msi_irqs = pnv_teardown_msi_irqs,
- .enable_device_hook = pnv_pci_enable_device_hook,
- .window_alignment = pnv_pci_window_alignment,
- .reset_secondary_bus = pnv_pci_reset_secondary_bus,
- .shutdown = pnv_pci_ioda_shutdown,
- .disable_device = pnv_npu_disable_device,
-};
-
static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = {
.enable_device_hook = pnv_ocapi_enable_device_hook,
.release_device = pnv_pci_release_device,
phb->model = PNV_PHB_MODEL_P7IOC;
else if (of_device_is_compatible(np, "ibm,power8-pciex"))
phb->model = PNV_PHB_MODEL_PHB3;
- else if (of_device_is_compatible(np, "ibm,power8-npu-pciex"))
- phb->model = PNV_PHB_MODEL_NPU;
- else if (of_device_is_compatible(np, "ibm,power9-npu-pciex"))
- phb->model = PNV_PHB_MODEL_NPU2;
else
phb->model = PNV_PHB_MODEL_UNKNOWN;
ppc_md.pcibios_fixup = pnv_pci_ioda_fixup;
switch (phb->type) {
- case PNV_PHB_NPU_NVLINK:
- hose->controller_ops = pnv_npu_ioda_controller_ops;
- break;
case PNV_PHB_NPU_OCAPI:
hose->controller_ops = pnv_npu_ocapi_ioda_controller_ops;
break;
pnv_pci_init_ioda_phb(np, 0, PNV_PHB_IODA2);
}
-void __init pnv_pci_init_npu_phb(struct device_node *np)
-{
- pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_NVLINK);
-}
-
void __init pnv_pci_init_npu2_opencapi_phb(struct device_node *np)
{
pnv_pci_init_ioda_phb(np, 0, PNV_PHB_NPU_OCAPI);
for_each_compatible_node(np, NULL, "ibm,ioda3-phb")
pnv_pci_init_ioda2_phb(np);
- /* Look for NPU PHBs */
- for_each_compatible_node(np, NULL, "ibm,ioda2-npu-phb")
- pnv_pci_init_npu_phb(np);
-
- /*
- * Look for NPU2 PHBs which we treat mostly as NPU PHBs with
- * the exception of TCE kill which requires an OPAL call.
- */
- for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-phb")
- pnv_pci_init_npu_phb(np);
-
/* Look for NPU2 OpenCAPI PHBs */
for_each_compatible_node(np, NULL, "ibm,ioda2-npu2-opencapi-phb")
pnv_pci_init_npu2_opencapi_phb(np);
struct pci_dn;
enum pnv_phb_type {
- PNV_PHB_IODA1 = 0,
- PNV_PHB_IODA2 = 1,
- PNV_PHB_NPU_NVLINK = 2,
- PNV_PHB_NPU_OCAPI = 3,
+ PNV_PHB_IODA1,
+ PNV_PHB_IODA2,
+ PNV_PHB_NPU_OCAPI,
};
/* Precise PHB model for error management */
PNV_PHB_MODEL_UNKNOWN,
PNV_PHB_MODEL_P7IOC,
PNV_PHB_MODEL_PHB3,
- PNV_PHB_MODEL_NPU,
- PNV_PHB_MODEL_NPU2,
};
#define PNV_PCI_DIAG_BUF_SIZE 8192
/* "Base" iommu table, ie, 4K TCEs, 32-bit DMA */
struct iommu_table_group table_group;
- struct npu_comp *npucomp;
/* 64-bit TCE bypass region */
bool tce_bypass_enabled;
extern void pnv_pci_init_ioda_hub(struct device_node *np);
extern void pnv_pci_init_ioda2_phb(struct device_node *np);
-extern void pnv_pci_init_npu_phb(struct device_node *np);
extern void pnv_pci_init_npu2_opencapi_phb(struct device_node *np);
-extern void pnv_npu2_map_lpar(struct pnv_ioda_pe *gpe, unsigned long msr);
extern void pnv_pci_reset_secondary_bus(struct pci_dev *dev);
extern int pnv_eeh_phb_reset(struct pci_controller *hose, int option);
#define pe_info(pe, fmt, ...) \
pe_level_printk(pe, KERN_INFO, fmt, ##__VA_ARGS__)
-/* Nvlink functions */
-extern void pnv_npu_try_dma_set_bypass(struct pci_dev *gpdev, bool bypass);
-extern void pnv_pci_ioda2_tce_invalidate_entire(struct pnv_phb *phb, bool rm);
-extern void pnv_pci_npu_setup_iommu_groups(void);
-
/* pci-ioda-tce.c */
#define POWERNV_IOMMU_DEFAULT_LEVELS 2
#define POWERNV_IOMMU_MAX_LEVELS 5
void __init pSeries_final_fixup(void)
{
- struct pci_controller *hose;
-
pSeries_request_regions();
eeh_show_enabled();
ppc_md.pcibios_sriov_enable = pseries_pcibios_sriov_enable;
ppc_md.pcibios_sriov_disable = pseries_pcibios_sriov_disable;
#endif
- list_for_each_entry(hose, &hose_list, list_node) {
- struct device_node *dn = hose->dn, *nvdn;
-
- while (1) {
- dn = of_find_all_nodes(dn);
- if (!dn)
- break;
- nvdn = of_parse_phandle(dn, "ibm,nvlink", 0);
- if (!nvdn)
- continue;
- if (!of_device_is_compatible(nvdn, "ibm,npu-link"))
- continue;
- if (!of_device_is_compatible(nvdn->parent,
- "ibm,power9-npu"))
- continue;
-#ifdef CONFIG_PPC_POWERNV
- WARN_ON_ONCE(pnv_npu2_init(hose));
-#endif
- break;
- }
- }
}
/*
if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, false))
return;
- if (io_tlb_start)
- memblock_free_early(io_tlb_start,
- PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
+
+ memblock_free_early(__pa(vstart),
+ PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
panic("SVM: Cannot allocate SWIOTLB buffer");
}
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DEBUG_VIRTUAL if MMU
select ARCH_HAS_DEBUG_WX
+ select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_GIGANTIC_PAGE
select ARCH_HAS_KCOV
select ARCH_HAS_PTE_SPECIAL
select ARCH_HAS_SET_DIRECT_MAP
select ARCH_HAS_SET_MEMORY
- select ARCH_HAS_STRICT_KERNEL_RWX if MMU
+ select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
+ select ARCH_HAS_STRICT_MODULE_RWX if MMU && !XIP_KERNEL
+ select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
+ select ARCH_SUPPORTS_HUGETLBFS if MMU
select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
select ARCH_WANT_FRAME_POINTERS
select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
+ select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
select CLONE_BACKWARDS
select CLINT_TIMER if !MMU
select COMMON_CLK
select EDAC_SUPPORT
select GENERIC_ARCH_TOPOLOGY if SMP
select GENERIC_ATOMIC64 if !64BIT
+ select GENERIC_CLOCKEVENTS_BROADCAST if SMP
select GENERIC_EARLY_IOREMAP
select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO
select GENERIC_IOREMAP
config ARCH_SUPPORTS_UPROBES
def_bool y
-config SYS_SUPPORTS_HUGETLBFS
- depends on MMU
- def_bool y
-
config STACKTRACE_SUPPORT
def_bool y
def_bool y
source "arch/riscv/Kconfig.socs"
+source "arch/riscv/Kconfig.erratas"
menu "Platform type"
bool "RV64I"
select 64BIT
select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && GCC_VERSION >= 50000
- select HAVE_DYNAMIC_FTRACE if MMU
+ select HAVE_DYNAMIC_FTRACE if MMU && $(cc-option,-fpatchable-function-entry=8)
select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE
select HAVE_FTRACE_MCOUNT_RECORD
select HAVE_FUNCTION_GRAPH_TRACER
help
This config allows kernel to use SBI v0.1 APIs. This will be
deprecated in future once legacy M-mode software are no longer in use.
+
+config KEXEC
+ bool "Kexec system call"
+ select KEXEC_CORE
+ select HOTPLUG_CPU if SMP
+ depends on MMU
+ help
+ kexec is a system call that implements the ability to shutdown your
+ current kernel, and to start another kernel. It is like a reboot
+ but it is independent of the system firmware. And like a reboot
+ you can start any kernel with it, not just Linux.
+
+ The name comes from the similarity to the exec system call.
+
+config CRASH_DUMP
+ bool "Build kdump crash kernel"
+ help
+ Generate crash dump after being started by kexec. This should
+ be normally only set in special crash dump kernels which are
+ loaded in the main kernel with kexec-tools into a specially
+ reserved region and then later executed after a crash by
+ kdump/kexec.
+
+ For more details see Documentation/admin-guide/kdump/kdump.rst
+
endmenu
menu "Boot options"
config EFI
bool "UEFI runtime support"
- depends on OF
+ depends on OF && !XIP_KERNEL
select LIBFDT
select UCS2_STRING
select EFI_PARAMS_FROM_FDT
def_bool y
depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_TLS
+config PHYS_RAM_BASE_FIXED
+ bool "Explicitly specified physical RAM address"
+ default n
+
+config PHYS_RAM_BASE
+ hex "Platform Physical RAM address"
+ depends on PHYS_RAM_BASE_FIXED
+ default "0x80000000"
+ help
+ This is the physical address of RAM in the system. It has to be
+ explicitly specified to run early relocations of read-write data
+ from flash to RAM.
+
+config XIP_KERNEL
+ bool "Kernel Execute-In-Place from ROM"
+ depends on MMU && SPARSEMEM
+ # This prevents XIP from being enabled by all{yes,mod}config, which
+ # fail to build since XIP doesn't support large kernels.
+ depends on !COMPILE_TEST
+ select PHYS_RAM_BASE_FIXED
+ help
+ Execute-In-Place allows the kernel to run from non-volatile storage
+ directly addressable by the CPU, such as NOR flash. This saves RAM
+ space since the text section of the kernel is not loaded from flash
+ to RAM. Read-write sections, such as the data section and stack,
+ are still copied to RAM. The XIP kernel is not compressed since
+ it has to run directly from flash, so it will take more space to
+ store it. The flash address used to link the kernel object files,
+ and for storing it, is configuration dependent. Therefore, if you
+ say Y here, you must know the proper physical address where to
+ store the kernel image depending on your own flash memory usage.
+
+ Also note that the make target becomes "make xipImage" rather than
+ "make zImage" or "make Image". The final kernel binary to put in
+ ROM memory will be arch/riscv/boot/xipImage.
+
+ SPARSEMEM is required because the kernel text and rodata that are
+ flash resident are not backed by memmap, then any attempt to get
+ a struct page on those regions will trigger a fault.
+
+ If unsure, say N.
+
+config XIP_PHYS_ADDR
+ hex "XIP Kernel Physical Location"
+ depends on XIP_KERNEL
+ default "0x21000000"
+ help
+ This is the physical address in your flash memory the kernel will
+ be linked for and stored to. This address is dependent on your
+ own flash usage.
+
endmenu
config BUILTIN_DTB
- def_bool n
+ bool
depends on OF
+ default y if XIP_KERNEL
menu "Power management options"
--- /dev/null
+menu "CPU errata selection"
+
+config RISCV_ERRATA_ALTERNATIVE
+ bool "RISC-V alternative scheme"
+ default y
+ help
+ This Kconfig allows the kernel to automatically patch the
+ errata required by the execution platform at run time. The
+ code patching is performed once in the boot stages. It means
+ that the overhead from this mechanism is just taken once.
+
+config ERRATA_SIFIVE
+ bool "SiFive errata"
+ depends on RISCV_ERRATA_ALTERNATIVE
+ help
+ All SiFive errata Kconfig depend on this Kconfig. Disabling
+ this Kconfig will disable all SiFive errata. Please say "Y"
+ here if your platform uses SiFive CPU cores.
+
+ Otherwise, please say "N" here to avoid unnecessary overhead.
+
+config ERRATA_SIFIVE_CIP_453
+ bool "Apply SiFive errata CIP-453"
+ depends on ERRATA_SIFIVE && 64BIT
+ default y
+ help
+ This will apply the SiFive CIP-453 errata to add sign extension
+ to the $badaddr when exception type is instruction page fault
+ and instruction access fault.
+
+ If you don't know what to do here, say "Y".
+
+config ERRATA_SIFIVE_CIP_1200
+ bool "Apply SiFive errata CIP-1200"
+ depends on ERRATA_SIFIVE && 64BIT
+ default y
+ help
+ This will apply the SiFive CIP-1200 errata to repalce all
+ "sfence.vma addr" with "sfence.vma" to ensure that the addr
+ has been flushed from TLB.
+
+ If you don't know what to do here, say "Y".
+
+endmenu
menu "SoC selection"
+config SOC_MICROCHIP_POLARFIRE
+ bool "Microchip PolarFire SoCs"
+ select MCHP_CLK_MPFS
+ select SIFIVE_PLIC
+ help
+ This enables support for Microchip PolarFire SoC platforms.
+
config SOC_SIFIVE
bool "SiFive SoCs"
select SERIAL_SIFIVE if TTY
select CLK_SIFIVE
select CLK_SIFIVE_PRCI
select SIFIVE_PLIC
+ select ERRATA_SIFIVE
help
This enables support for SiFive SoC platform hardware.
# Default target when executing plain make
boot := arch/riscv/boot
+ifeq ($(CONFIG_XIP_KERNEL),y)
+KBUILD_IMAGE := $(boot)/xipImage
+else
KBUILD_IMAGE := $(boot)/Image.gz
+endif
head-y := arch/riscv/kernel/head.o
core-y += arch/riscv/
+core-$(CONFIG_RISCV_ERRATA_ALTERNATIVE) += arch/riscv/errata/
libs-y += arch/riscv/lib/
libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a
vdso_install:
$(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@
+ifneq ($(CONFIG_XIP_KERNEL),y)
ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN),yy)
KBUILD_IMAGE := $(boot)/loader.bin
else
KBUILD_IMAGE := $(boot)/Image.gz
endif
-BOOT_TARGETS := Image Image.gz loader loader.bin
+endif
+BOOT_TARGETS := Image Image.gz loader loader.bin xipImage
all: $(notdir $(KBUILD_IMAGE))
KCOV_INSTRUMENT := n
OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
+OBJCOPYFLAGS_xipImage :=-O binary -R .note -R .note.gnu.build-id -R .comment -S
targets := Image Image.* loader loader.o loader.lds loader.bin
+targets := Image Image.* loader loader.o loader.lds loader.bin xipImage
+
+ifeq ($(CONFIG_XIP_KERNEL),y)
+
+quiet_cmd_mkxip = $(quiet_cmd_objcopy)
+cmd_mkxip = $(cmd_objcopy)
+
+$(obj)/xipImage: vmlinux FORCE
+ $(call if_changed,mkxip)
+ @$(kecho) ' Physical Address of xipImage: $(CONFIG_XIP_PHYS_ADDR)'
+
+endif
$(obj)/Image: vmlinux FORCE
$(call if_changed,objcopy)
# SPDX-License-Identifier: GPL-2.0
subdir-y += sifive
subdir-$(CONFIG_SOC_CANAAN_K210_DTB_BUILTIN) += canaan
+subdir-y += microchip
obj-$(CONFIG_BUILTIN_DTB) := $(addsuffix /, $(subdir-y))
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
+dtb-$(CONFIG_SOC_MICROCHIP_POLARFIRE) += microchip-mpfs-icicle-kit.dtb
--- /dev/null
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Copyright (c) 2020 Microchip Technology Inc */
+
+/dts-v1/;
+
+#include "microchip-mpfs.dtsi"
+
+/* Clock frequency (in Hz) of the rtcclk */
+#define RTCCLK_FREQ 1000000
+
+/ {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ model = "Microchip PolarFire-SoC Icicle Kit";
+ compatible = "microchip,mpfs-icicle-kit";
+
+ chosen {
+ stdout-path = &serial0;
+ };
+
+ cpus {
+ timebase-frequency = <RTCCLK_FREQ>;
+ };
+
+ memory@80000000 {
+ device_type = "memory";
+ reg = <0x0 0x80000000 0x0 0x40000000>;
+ clocks = <&clkcfg 26>;
+ };
+
+ soc {
+ };
+};
+
+&serial0 {
+ status = "okay";
+};
+
+&serial1 {
+ status = "okay";
+};
+
+&serial2 {
+ status = "okay";
+};
+
+&serial3 {
+ status = "okay";
+};
+
+&sdcard {
+ status = "okay";
+};
+
+&emac0 {
+ phy-mode = "sgmii";
+ phy-handle = <&phy0>;
+ phy0: ethernet-phy@8 {
+ reg = <8>;
+ ti,fifo-depth = <0x01>;
+ };
+};
+
+&emac1 {
+ status = "okay";
+ phy-mode = "sgmii";
+ phy-handle = <&phy1>;
+ phy1: ethernet-phy@9 {
+ reg = <9>;
+ ti,fifo-depth = <0x01>;
+ };
+};
--- /dev/null
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Copyright (c) 2020 Microchip Technology Inc */
+
+/dts-v1/;
+
+/ {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ model = "Microchip MPFS Icicle Kit";
+ compatible = "microchip,mpfs-icicle-kit";
+
+ chosen {
+ };
+
+ cpus {
+ #address-cells = <1>;
+ #size-cells = <0>;
+
+ cpu@0 {
+ clock-frequency = <0>;
+ compatible = "sifive,e51", "sifive,rocket0", "riscv";
+ device_type = "cpu";
+ i-cache-block-size = <64>;
+ i-cache-sets = <128>;
+ i-cache-size = <16384>;
+ reg = <0>;
+ riscv,isa = "rv64imac";
+ status = "disabled";
+
+ cpu0_intc: interrupt-controller {
+ #interrupt-cells = <1>;
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ };
+ };
+
+ cpu@1 {
+ clock-frequency = <0>;
+ compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
+ d-cache-block-size = <64>;
+ d-cache-sets = <64>;
+ d-cache-size = <32768>;
+ d-tlb-sets = <1>;
+ d-tlb-size = <32>;
+ device_type = "cpu";
+ i-cache-block-size = <64>;
+ i-cache-sets = <64>;
+ i-cache-size = <32768>;
+ i-tlb-sets = <1>;
+ i-tlb-size = <32>;
+ mmu-type = "riscv,sv39";
+ reg = <1>;
+ riscv,isa = "rv64imafdc";
+ tlb-split;
+ status = "okay";
+
+ cpu1_intc: interrupt-controller {
+ #interrupt-cells = <1>;
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ };
+ };
+
+ cpu@2 {
+ clock-frequency = <0>;
+ compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
+ d-cache-block-size = <64>;
+ d-cache-sets = <64>;
+ d-cache-size = <32768>;
+ d-tlb-sets = <1>;
+ d-tlb-size = <32>;
+ device_type = "cpu";
+ i-cache-block-size = <64>;
+ i-cache-sets = <64>;
+ i-cache-size = <32768>;
+ i-tlb-sets = <1>;
+ i-tlb-size = <32>;
+ mmu-type = "riscv,sv39";
+ reg = <2>;
+ riscv,isa = "rv64imafdc";
+ tlb-split;
+ status = "okay";
+
+ cpu2_intc: interrupt-controller {
+ #interrupt-cells = <1>;
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ };
+ };
+
+ cpu@3 {
+ clock-frequency = <0>;
+ compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
+ d-cache-block-size = <64>;
+ d-cache-sets = <64>;
+ d-cache-size = <32768>;
+ d-tlb-sets = <1>;
+ d-tlb-size = <32>;
+ device_type = "cpu";
+ i-cache-block-size = <64>;
+ i-cache-sets = <64>;
+ i-cache-size = <32768>;
+ i-tlb-sets = <1>;
+ i-tlb-size = <32>;
+ mmu-type = "riscv,sv39";
+ reg = <3>;
+ riscv,isa = "rv64imafdc";
+ tlb-split;
+ status = "okay";
+
+ cpu3_intc: interrupt-controller {
+ #interrupt-cells = <1>;
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ };
+ };
+
+ cpu@4 {
+ clock-frequency = <0>;
+ compatible = "sifive,u54-mc", "sifive,rocket0", "riscv";
+ d-cache-block-size = <64>;
+ d-cache-sets = <64>;
+ d-cache-size = <32768>;
+ d-tlb-sets = <1>;
+ d-tlb-size = <32>;
+ device_type = "cpu";
+ i-cache-block-size = <64>;
+ i-cache-sets = <64>;
+ i-cache-size = <32768>;
+ i-tlb-sets = <1>;
+ i-tlb-size = <32>;
+ mmu-type = "riscv,sv39";
+ reg = <4>;
+ riscv,isa = "rv64imafdc";
+ tlb-split;
+ status = "okay";
+ cpu4_intc: interrupt-controller {
+ #interrupt-cells = <1>;
+ compatible = "riscv,cpu-intc";
+ interrupt-controller;
+ };
+ };
+ };
+
+ soc {
+ #address-cells = <2>;
+ #size-cells = <2>;
+ compatible = "simple-bus";
+ ranges;
+
+ cache-controller@2010000 {
+ compatible = "sifive,fu540-c000-ccache", "cache";
+ cache-block-size = <64>;
+ cache-level = <2>;
+ cache-sets = <1024>;
+ cache-size = <2097152>;
+ cache-unified;
+ interrupt-parent = <&plic>;
+ interrupts = <1 2 3>;
+ reg = <0x0 0x2010000 0x0 0x1000>;
+ };
+
+ clint@2000000 {
+ compatible = "sifive,clint0";
+ reg = <0x0 0x2000000 0x0 0xC000>;
+ interrupts-extended = <&cpu0_intc 3 &cpu0_intc 7
+ &cpu1_intc 3 &cpu1_intc 7
+ &cpu2_intc 3 &cpu2_intc 7
+ &cpu3_intc 3 &cpu3_intc 7
+ &cpu4_intc 3 &cpu4_intc 7>;
+ };
+
+ plic: interrupt-controller@c000000 {
+ #interrupt-cells = <1>;
+ compatible = "sifive,plic-1.0.0";
+ reg = <0x0 0xc000000 0x0 0x4000000>;
+ riscv,ndev = <186>;
+ interrupt-controller;
+ interrupts-extended = <&cpu0_intc 11
+ &cpu1_intc 11 &cpu1_intc 9
+ &cpu2_intc 11 &cpu2_intc 9
+ &cpu3_intc 11 &cpu3_intc 9
+ &cpu4_intc 11 &cpu4_intc 9>;
+ };
+
+ dma@3000000 {
+ compatible = "sifive,fu540-c000-pdma";
+ reg = <0x0 0x3000000 0x0 0x8000>;
+ interrupt-parent = <&plic>;
+ interrupts = <23 24 25 26 27 28 29 30>;
+ #dma-cells = <1>;
+ };
+
+ refclk: refclk {
+ compatible = "fixed-clock";
+ #clock-cells = <0>;
+ clock-frequency = <600000000>;
+ clock-output-names = "msspllclk";
+ };
+
+ clkcfg: clkcfg@20002000 {
+ compatible = "microchip,mpfs-clkcfg";
+ reg = <0x0 0x20002000 0x0 0x1000>;
+ reg-names = "mss_sysreg";
+ clocks = <&refclk>;
+ #clock-cells = <1>;
+ clock-output-names = "cpu", "axi", "ahb", "envm", /* 0-3 */
+ "mac0", "mac1", "mmc", "timer", /* 4-7 */
+ "mmuart0", "mmuart1", "mmuart2", "mmuart3", /* 8-11 */
+ "mmuart4", "spi0", "spi1", "i2c0", /* 12-15 */
+ "i2c1", "can0", "can1", "usb", /* 16-19 */
+ "rsvd", "rtc", "qspi", "gpio0", /* 20-23 */
+ "gpio1", "gpio2", "ddrc", "fic0", /* 24-27 */
+ "fic1", "fic2", "fic3", "athena", "cfm"; /* 28-32 */
+ };
+
+ serial0: serial@20000000 {
+ compatible = "ns16550a";
+ reg = <0x0 0x20000000 0x0 0x400>;
+ reg-io-width = <4>;
+ reg-shift = <2>;
+ interrupt-parent = <&plic>;
+ interrupts = <90>;
+ current-speed = <115200>;
+ clocks = <&clkcfg 8>;
+ status = "disabled";
+ };
+
+ serial1: serial@20100000 {
+ compatible = "ns16550a";
+ reg = <0x0 0x20100000 0x0 0x400>;
+ reg-io-width = <4>;
+ reg-shift = <2>;
+ interrupt-parent = <&plic>;
+ interrupts = <91>;
+ current-speed = <115200>;
+ clocks = <&clkcfg 9>;
+ status = "disabled";
+ };
+
+ serial2: serial@20102000 {
+ compatible = "ns16550a";
+ reg = <0x0 0x20102000 0x0 0x400>;
+ reg-io-width = <4>;
+ reg-shift = <2>;
+ interrupt-parent = <&plic>;
+ interrupts = <92>;
+ current-speed = <115200>;
+ clocks = <&clkcfg 10>;
+ status = "disabled";
+ };
+
+ serial3: serial@20104000 {
+ compatible = "ns16550a";
+ reg = <0x0 0x20104000 0x0 0x400>;
+ reg-io-width = <4>;
+ reg-shift = <2>;
+ interrupt-parent = <&plic>;
+ interrupts = <93>;
+ current-speed = <115200>;
+ clocks = <&clkcfg 11>;
+ status = "disabled";
+ };
+
+ emmc: mmc@20008000 {
+ compatible = "cdns,sd4hc";
+ reg = <0x0 0x20008000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupts = <88 89>;
+ pinctrl-names = "default";
+ clocks = <&clkcfg 6>;
+ bus-width = <4>;
+ cap-mmc-highspeed;
+ mmc-ddr-3_3v;
+ max-frequency = <200000000>;
+ non-removable;
+ no-sd;
+ no-sdio;
+ voltage-ranges = <3300 3300>;
+ status = "disabled";
+ };
+
+ sdcard: sdhc@20008000 {
+ compatible = "cdns,sd4hc";
+ reg = <0x0 0x20008000 0x0 0x1000>;
+ interrupt-parent = <&plic>;
+ interrupts = <88>;
+ pinctrl-names = "default";
+ clocks = <&clkcfg 6>;
+ bus-width = <4>;
+ disable-wp;
+ cap-sd-highspeed;
+ card-detect-delay = <200>;
+ sd-uhs-sdr12;
+ sd-uhs-sdr25;
+ sd-uhs-sdr50;
+ sd-uhs-sdr104;
+ max-frequency = <200000000>;
+ status = "disabled";
+ };
+
+ emac0: ethernet@20110000 {
+ compatible = "cdns,macb";
+ reg = <0x0 0x20110000 0x0 0x2000>;
+ interrupt-parent = <&plic>;
+ interrupts = <64 65 66 67>;
+ local-mac-address = [00 00 00 00 00 00];
+ clocks = <&clkcfg 4>, <&clkcfg 2>;
+ clock-names = "pclk", "hclk";
+ status = "disabled";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+ emac1: ethernet@20112000 {
+ compatible = "cdns,macb";
+ reg = <0x0 0x20112000 0x0 0x2000>;
+ interrupt-parent = <&plic>;
+ interrupts = <70 71 72 73>;
+ mac-address = [00 00 00 00 00 00];
+ clocks = <&clkcfg 5>, <&clkcfg 2>;
+ status = "disabled";
+ clock-names = "pclk", "hclk";
+ #address-cells = <1>;
+ #size-cells = <0>;
+ };
+
+ };
+};
reg = <0x0 0x10000000 0x0 0x1000>;
clocks = <&hfclk>, <&rtcclk>;
#clock-cells = <1>;
+ #reset-cells = <1>;
};
uart0: serial@10010000 {
compatible = "sifive,fu740-c000-uart", "sifive,uart0";
clocks = <&prci PRCI_CLK_PCLK>;
status = "disabled";
};
+ pcie@e00000000 {
+ compatible = "sifive,fu740-pcie";
+ #address-cells = <3>;
+ #size-cells = <2>;
+ #interrupt-cells = <1>;
+ reg = <0xe 0x00000000 0x0 0x80000000>,
+ <0xd 0xf0000000 0x0 0x10000000>,
+ <0x0 0x100d0000 0x0 0x1000>;
+ reg-names = "dbi", "config", "mgmt";
+ device_type = "pci";
+ dma-coherent;
+ bus-range = <0x0 0xff>;
+ ranges = <0x81000000 0x0 0x60080000 0x0 0x60080000 0x0 0x10000>, /* I/O */
+ <0x82000000 0x0 0x60090000 0x0 0x60090000 0x0 0xff70000>, /* mem */
+ <0x82000000 0x0 0x70000000 0x0 0x70000000 0x0 0x1000000>, /* mem */
+ <0xc3000000 0x20 0x00000000 0x20 0x00000000 0x20 0x00000000>; /* mem prefetchable */
+ num-lanes = <0x8>;
+ interrupts = <56>, <57>, <58>, <59>, <60>, <61>, <62>, <63>, <64>;
+ interrupt-names = "msi", "inta", "intb", "intc", "intd";
+ interrupt-parent = <&plic0>;
+ interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+ interrupt-map = <0x0 0x0 0x0 0x1 &plic0 57>,
+ <0x0 0x0 0x0 0x2 &plic0 58>,
+ <0x0 0x0 0x0 0x3 &plic0 59>,
+ <0x0 0x0 0x0 0x4 &plic0 60>;
+ clock-names = "pcie_aux";
+ clocks = <&prci PRCI_CLK_PCIE_AUX>;
+ pwren-gpios = <&gpio 5 0>;
+ reset-gpios = <&gpio 8 0>;
+ resets = <&prci 4>;
+ status = "okay";
+ };
};
};
/* SPDX-License-Identifier: GPL-2.0 */
#include <asm/page.h>
+#include <asm/pgtable.h>
OUTPUT_ARCH(riscv)
ENTRY(_start)
SECTIONS
{
- . = PAGE_OFFSET;
+ . = KERNEL_LINK_ADDR;
.payload : {
*(.payload)
CONFIG_BPF_SYSCALL=y
CONFIG_SOC_SIFIVE=y
CONFIG_SOC_VIRT=y
+CONFIG_SOC_MICROCHIP_POLARFIRE=y
CONFIG_SMP=y
CONFIG_HOTPLUG_CPU=y
CONFIG_JUMP_LABEL=y
CONFIG_USB_OHCI_HCD_PLATFORM=y
CONFIG_USB_STORAGE=y
CONFIG_USB_UAS=y
+CONFIG_MMC_SDHCI=y
+CONFIG_MMC_SDHCI_PLTFM=y
+CONFIG_MMC_SDHCI_CADENCE=y
CONFIG_MMC=y
CONFIG_MMC_SPI=y
CONFIG_RTC_CLASS=y
--- /dev/null
+obj-y += alternative.o
+obj-$(CONFIG_ERRATA_SIFIVE) += sifive/
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * alternative runtime patching
+ * inspired by the ARM64 and x86 version
+ *
+ * Copyright (C) 2021 Sifive.
+ */
+
+#include <linux/init.h>
+#include <linux/cpu.h>
+#include <linux/uaccess.h>
+#include <asm/alternative.h>
+#include <asm/sections.h>
+#include <asm/vendorid_list.h>
+#include <asm/sbi.h>
+#include <asm/csr.h>
+
+static struct cpu_manufacturer_info_t {
+ unsigned long vendor_id;
+ unsigned long arch_id;
+ unsigned long imp_id;
+} cpu_mfr_info;
+
+static void (*vendor_patch_func)(struct alt_entry *begin, struct alt_entry *end,
+ unsigned long archid, unsigned long impid);
+
+static inline void __init riscv_fill_cpu_mfr_info(void)
+{
+#ifdef CONFIG_RISCV_M_MODE
+ cpu_mfr_info.vendor_id = csr_read(CSR_MVENDORID);
+ cpu_mfr_info.arch_id = csr_read(CSR_MARCHID);
+ cpu_mfr_info.imp_id = csr_read(CSR_MIMPID);
+#else
+ cpu_mfr_info.vendor_id = sbi_get_mvendorid();
+ cpu_mfr_info.arch_id = sbi_get_marchid();
+ cpu_mfr_info.imp_id = sbi_get_mimpid();
+#endif
+}
+
+static void __init init_alternative(void)
+{
+ riscv_fill_cpu_mfr_info();
+
+ switch (cpu_mfr_info.vendor_id) {
+#ifdef CONFIG_ERRATA_SIFIVE
+ case SIFIVE_VENDOR_ID:
+ vendor_patch_func = sifive_errata_patch_func;
+ break;
+#endif
+ default:
+ vendor_patch_func = NULL;
+ }
+}
+
+/*
+ * This is called very early in the boot process (directly after we run
+ * a feature detect on the boot CPU). No need to worry about other CPUs
+ * here.
+ */
+void __init apply_boot_alternatives(void)
+{
+ /* If called on non-boot cpu things could go wrong */
+ WARN_ON(smp_processor_id() != 0);
+
+ init_alternative();
+
+ if (!vendor_patch_func)
+ return;
+
+ vendor_patch_func((struct alt_entry *)__alt_start,
+ (struct alt_entry *)__alt_end,
+ cpu_mfr_info.arch_id, cpu_mfr_info.imp_id);
+}
+
--- /dev/null
+obj-y += errata_cip_453.o
+obj-y += errata.o
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 Sifive.
+ */
+
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/bug.h>
+#include <asm/patch.h>
+#include <asm/alternative.h>
+#include <asm/vendorid_list.h>
+#include <asm/errata_list.h>
+
+struct errata_info_t {
+ char name[ERRATA_STRING_LENGTH_MAX];
+ bool (*check_func)(unsigned long arch_id, unsigned long impid);
+};
+
+static bool errata_cip_453_check_func(unsigned long arch_id, unsigned long impid)
+{
+ /*
+ * Affected cores:
+ * Architecture ID: 0x8000000000000007
+ * Implement ID: 0x20181004 <= impid <= 0x20191105
+ */
+ if (arch_id != 0x8000000000000007 ||
+ (impid < 0x20181004 || impid > 0x20191105))
+ return false;
+ return true;
+}
+
+static bool errata_cip_1200_check_func(unsigned long arch_id, unsigned long impid)
+{
+ /*
+ * Affected cores:
+ * Architecture ID: 0x8000000000000007 or 0x1
+ * Implement ID: mimpid[23:0] <= 0x200630 and mimpid != 0x01200626
+ */
+ if (arch_id != 0x8000000000000007 && arch_id != 0x1)
+ return false;
+ if ((impid & 0xffffff) > 0x200630 || impid == 0x1200626)
+ return false;
+ return true;
+}
+
+static struct errata_info_t errata_list[ERRATA_SIFIVE_NUMBER] = {
+ {
+ .name = "cip-453",
+ .check_func = errata_cip_453_check_func
+ },
+ {
+ .name = "cip-1200",
+ .check_func = errata_cip_1200_check_func
+ },
+};
+
+static u32 __init sifive_errata_probe(unsigned long archid, unsigned long impid)
+{
+ int idx;
+ u32 cpu_req_errata = 0;
+
+ for (idx = 0; idx < ERRATA_SIFIVE_NUMBER; idx++)
+ if (errata_list[idx].check_func(archid, impid))
+ cpu_req_errata |= (1U << idx);
+
+ return cpu_req_errata;
+}
+
+static void __init warn_miss_errata(u32 miss_errata)
+{
+ int i;
+
+ pr_warn("----------------------------------------------------------------\n");
+ pr_warn("WARNING: Missing the following errata may cause potential issues\n");
+ for (i = 0; i < ERRATA_SIFIVE_NUMBER; i++)
+ if (miss_errata & 0x1 << i)
+ pr_warn("\tSiFive Errata[%d]:%s\n", i, errata_list[i].name);
+ pr_warn("Please enable the corresponding Kconfig to apply them\n");
+ pr_warn("----------------------------------------------------------------\n");
+}
+
+void __init sifive_errata_patch_func(struct alt_entry *begin, struct alt_entry *end,
+ unsigned long archid, unsigned long impid)
+{
+ struct alt_entry *alt;
+ u32 cpu_req_errata = sifive_errata_probe(archid, impid);
+ u32 cpu_apply_errata = 0;
+ u32 tmp;
+
+ for (alt = begin; alt < end; alt++) {
+ if (alt->vendor_id != SIFIVE_VENDOR_ID)
+ continue;
+ if (alt->errata_id >= ERRATA_SIFIVE_NUMBER) {
+ WARN(1, "This errata id:%d is not in kernel errata list", alt->errata_id);
+ continue;
+ }
+
+ tmp = (1U << alt->errata_id);
+ if (cpu_req_errata & tmp) {
+ patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len);
+ cpu_apply_errata |= tmp;
+ }
+ }
+ if (cpu_apply_errata != cpu_req_errata)
+ warn_miss_errata(cpu_req_errata - cpu_apply_errata);
+}
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 SiFive
+ */
+
+#include <linux/linkage.h>
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+#include <asm/alternative.h>
+
+.macro ADD_SIGN_EXT pt_reg badaddr tmp_reg
+ REG_L \badaddr, PT_BADADDR(\pt_reg)
+ li \tmp_reg,1
+ slli \tmp_reg,\tmp_reg,0x26
+ and \tmp_reg,\tmp_reg,\badaddr
+ beqz \tmp_reg, 1f
+ li \tmp_reg,-1
+ slli \tmp_reg,\tmp_reg,0x27
+ or \badaddr,\tmp_reg,\badaddr
+ REG_S \badaddr, PT_BADADDR(\pt_reg)
+1:
+.endm
+
+ENTRY(sifive_cip_453_page_fault_trp)
+ ADD_SIGN_EXT a0, t0, t1
+#ifdef CONFIG_MMU
+ la t0, do_page_fault
+#else
+ la t0, do_trap_unknown
+#endif
+ jr t0
+END(sifive_cip_453_page_fault_trp)
+
+ENTRY(sifive_cip_453_insn_fault_trp)
+ ADD_SIGN_EXT a0, t0, t1
+ la t0, do_trap_insn_fault
+ jr t0
+END(sifive_cip_453_insn_fault_trp)
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_ALTERNATIVE_MACROS_H
+#define __ASM_ALTERNATIVE_MACROS_H
+
+#ifdef CONFIG_RISCV_ERRATA_ALTERNATIVE
+
+#ifdef __ASSEMBLY__
+
+.macro ALT_ENTRY oldptr newptr vendor_id errata_id new_len
+ RISCV_PTR \oldptr
+ RISCV_PTR \newptr
+ REG_ASM \vendor_id
+ REG_ASM \new_len
+ .word \errata_id
+.endm
+
+.macro ALT_NEW_CONTENT vendor_id, errata_id, enable = 1, new_c : vararg
+ .if \enable
+ .pushsection .alternative, "a"
+ ALT_ENTRY 886b, 888f, \vendor_id, \errata_id, 889f - 888f
+ .popsection
+ .subsection 1
+888 :
+ \new_c
+889 :
+ .previous
+ .org . - (889b - 888b) + (887b - 886b)
+ .org . - (887b - 886b) + (889b - 888b)
+ .endif
+.endm
+
+.macro __ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, enable
+886 :
+ \old_c
+887 :
+ ALT_NEW_CONTENT \vendor_id, \errata_id, \enable, \new_c
+.endm
+
+#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \
+ __ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k)
+
+#else /* !__ASSEMBLY__ */
+
+#include <asm/asm.h>
+#include <linux/stringify.h>
+
+#define ALT_ENTRY(oldptr, newptr, vendor_id, errata_id, newlen) \
+ RISCV_PTR " " oldptr "\n" \
+ RISCV_PTR " " newptr "\n" \
+ REG_ASM " " vendor_id "\n" \
+ REG_ASM " " newlen "\n" \
+ ".word " errata_id "\n"
+
+#define ALT_NEW_CONSTENT(vendor_id, errata_id, enable, new_c) \
+ ".if " __stringify(enable) " == 1\n" \
+ ".pushsection .alternative, \"a\"\n" \
+ ALT_ENTRY("886b", "888f", __stringify(vendor_id), __stringify(errata_id), "889f - 888f") \
+ ".popsection\n" \
+ ".subsection 1\n" \
+ "888 :\n" \
+ new_c "\n" \
+ "889 :\n" \
+ ".previous\n" \
+ ".org . - (887b - 886b) + (889b - 888b)\n" \
+ ".org . - (889b - 888b) + (887b - 886b)\n" \
+ ".endif\n"
+
+#define __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, enable) \
+ "886 :\n" \
+ old_c "\n" \
+ "887 :\n" \
+ ALT_NEW_CONSTENT(vendor_id, errata_id, enable, new_c)
+
+#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \
+ __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k))
+
+#endif /* __ASSEMBLY__ */
+
+#else /* !CONFIG_RISCV_ERRATA_ALTERNATIVE*/
+#ifdef __ASSEMBLY__
+
+.macro __ALTERNATIVE_CFG old_c
+ \old_c
+.endm
+
+#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \
+ __ALTERNATIVE_CFG old_c
+
+#else /* !__ASSEMBLY__ */
+
+#define __ALTERNATIVE_CFG(old_c) \
+ old_c "\n"
+
+#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \
+ __ALTERNATIVE_CFG(old_c)
+
+#endif /* __ASSEMBLY__ */
+#endif /* CONFIG_RISCV_ERRATA_ALTERNATIVE */
+/*
+ * Usage:
+ * ALTERNATIVE(old_content, new_content, vendor_id, errata_id, CONFIG_k)
+ * in the assembly code. Otherwise,
+ * asm(ALTERNATIVE(old_content, new_content, vendor_id, errata_id, CONFIG_k));
+ *
+ * old_content: The old content which is probably replaced with new content.
+ * new_content: The new content.
+ * vendor_id: The CPU vendor ID.
+ * errata_id: The errata ID.
+ * CONFIG_k: The Kconfig of this errata. When Kconfig is disabled, the old
+ * content will alwyas be executed.
+ */
+#define ALTERNATIVE(old_content, new_content, vendor_id, errata_id, CONFIG_k) \
+ _ALTERNATIVE_CFG(old_content, new_content, vendor_id, errata_id, CONFIG_k)
+
+/*
+ * A vendor wants to replace an old_content, but another vendor has used
+ * ALTERNATIVE() to patch its customized content at the same location. In
+ * this case, this vendor can create a new macro ALTERNATIVE_2() based
+ * on the following sample code and then replace ALTERNATIVE() with
+ * ALTERNATIVE_2() to append its customized content.
+ *
+ * .macro __ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \
+ * new_c_2, vendor_id_2, errata_id_2, enable_2
+ * 886 :
+ * \old_c
+ * 887 :
+ * ALT_NEW_CONTENT \vendor_id_1, \errata_id_1, \enable_1, \new_c_1
+ * ALT_NEW_CONTENT \vendor_id_2, \errata_id_2, \enable_2, \new_c_2
+ * .endm
+ *
+ * #define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, CONFIG_k_1, \
+ * new_c_2, vendor_id_2, errata_id_2, CONFIG_k_2) \
+ * __ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, IS_ENABLED(CONFIG_k_1), \
+ * new_c_2, vendor_id_2, errata_id_2, IS_ENABLED(CONFIG_k_2) \
+ *
+ * #define ALTERNATIVE_2(old_content, new_content_1, vendor_id_1, errata_id_1, CONFIG_k_1, \
+ * new_content_2, vendor_id_2, errata_id_2, CONFIG_k_2) \
+ * _ALTERNATIVE_CFG_2(old_content, new_content_1, vendor_id_1, errata_id_1, CONFIG_k_1, \
+ * new_content_2, vendor_id_2, errata_id_2, CONFIG_k_2)
+ *
+ */
+#endif
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 Sifive.
+ */
+
+#ifndef __ASM_ALTERNATIVE_H
+#define __ASM_ALTERNATIVE_H
+
+#define ERRATA_STRING_LENGTH_MAX 32
+
+#include <asm/alternative-macros.h>
+
+#ifndef __ASSEMBLY__
+
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <asm/hwcap.h>
+
+void __init apply_boot_alternatives(void);
+
+struct alt_entry {
+ void *old_ptr; /* address of original instruciton or data */
+ void *alt_ptr; /* address of replacement instruction or data */
+ unsigned long vendor_id; /* cpu vendor id */
+ unsigned long alt_len; /* The replacement size */
+ unsigned int errata_id; /* The errata id */
+} __packed;
+
+struct errata_checkfunc_id {
+ unsigned long vendor_id;
+ bool (*func)(struct alt_entry *alt);
+};
+
+void sifive_errata_patch_func(struct alt_entry *begin, struct alt_entry *end,
+ unsigned long archid, unsigned long impid);
+
+#endif
+#endif
#define REG_L __REG_SEL(ld, lw)
#define REG_S __REG_SEL(sd, sw)
#define REG_SC __REG_SEL(sc.d, sc.w)
+#define REG_ASM __REG_SEL(.dword, .word)
#define SZREG __REG_SEL(8, 4)
#define LGREG __REG_SEL(3, 2)
#define CSR_MIP 0x344
#define CSR_PMPCFG0 0x3a0
#define CSR_PMPADDR0 0x3b0
+#define CSR_MVENDORID 0xf11
+#define CSR_MARCHID 0xf12
+#define CSR_MIMPID 0xf13
#define CSR_MHARTID 0xf14
#ifdef CONFIG_RISCV_M_MODE
int uses_interp);
#endif /* CONFIG_MMU */
+#define ELF_CORE_COPY_REGS(dest, regs) \
+do { \
+ *(struct user_regs_struct *)&(dest) = \
+ *(struct user_regs_struct *)regs; \
+} while (0);
+
#endif /* _ASM_RISCV_ELF_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 Sifive.
+ */
+#ifndef ASM_ERRATA_LIST_H
+#define ASM_ERRATA_LIST_H
+
+#include <asm/alternative.h>
+#include <asm/vendorid_list.h>
+
+#ifdef CONFIG_ERRATA_SIFIVE
+#define ERRATA_SIFIVE_CIP_453 0
+#define ERRATA_SIFIVE_CIP_1200 1
+#define ERRATA_SIFIVE_NUMBER 2
+#endif
+
+#ifdef __ASSEMBLY__
+
+#define ALT_INSN_FAULT(x) \
+ALTERNATIVE(__stringify(RISCV_PTR do_trap_insn_fault), \
+ __stringify(RISCV_PTR sifive_cip_453_insn_fault_trp), \
+ SIFIVE_VENDOR_ID, ERRATA_SIFIVE_CIP_453, \
+ CONFIG_ERRATA_SIFIVE_CIP_453)
+
+#define ALT_PAGE_FAULT(x) \
+ALTERNATIVE(__stringify(RISCV_PTR do_page_fault), \
+ __stringify(RISCV_PTR sifive_cip_453_page_fault_trp), \
+ SIFIVE_VENDOR_ID, ERRATA_SIFIVE_CIP_453, \
+ CONFIG_ERRATA_SIFIVE_CIP_453)
+#else /* !__ASSEMBLY__ */
+
+#define ALT_FLUSH_TLB_PAGE(x) \
+asm(ALTERNATIVE("sfence.vma %0", "sfence.vma", SIFIVE_VENDOR_ID, \
+ ERRATA_SIFIVE_CIP_1200, CONFIG_ERRATA_SIFIVE_CIP_1200) \
+ : : "r" (addr) : "memory")
+
+#endif /* __ASSEMBLY__ */
+
+#endif
#endif
#define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
+/*
+ * Clang prior to 13 had "mcount" instead of "_mcount":
+ * https://reviews.llvm.org/D98881
+ */
+#if defined(CONFIG_CC_IS_GCC) || CONFIG_CLANG_VERSION >= 130000
+#define MCOUNT_NAME _mcount
+#else
+#define MCOUNT_NAME mcount
+#endif
+
#define ARCH_SUPPORTS_FTRACE_OPS 1
#ifndef __ASSEMBLY__
-void _mcount(void);
+void MCOUNT_NAME(void);
static inline unsigned long ftrace_call_adjust(unsigned long addr)
{
return addr;
* both auipc and jalr at the same time.
*/
-#define MCOUNT_ADDR ((unsigned long)_mcount)
+#define MCOUNT_ADDR ((unsigned long)MCOUNT_NAME)
#define JALR_SIGN_MASK (0x00000800)
#define JALR_OFFSET_MASK (0x00000fff)
#define AUIPC_OFFSET_MASK (0xfffff000)
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 FORTH-ICS/CARV
+ * Nick Kossifidis <mick@ics.forth.gr>
+ */
+
+#ifndef _RISCV_KEXEC_H
+#define _RISCV_KEXEC_H
+
+#include <asm/page.h> /* For PAGE_SIZE */
+
+/* Maximum physical address we can use pages from */
+#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can reach in physical address mode */
+#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL)
+
+/* Maximum address we can use for the control code buffer */
+#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL)
+
+/* Reserve a page for the control code buffer */
+#define KEXEC_CONTROL_PAGE_SIZE PAGE_SIZE
+
+#define KEXEC_ARCH KEXEC_ARCH_RISCV
+
+extern void riscv_crash_save_regs(struct pt_regs *newregs);
+
+static inline void
+crash_setup_regs(struct pt_regs *newregs,
+ struct pt_regs *oldregs)
+{
+ if (oldregs)
+ memcpy(newregs, oldregs, sizeof(struct pt_regs));
+ else
+ riscv_crash_save_regs(newregs);
+}
+
+
+#define ARCH_HAS_KIMAGE_ARCH
+
+struct kimage_arch {
+ unsigned long fdt_addr;
+};
+
+const extern unsigned char riscv_kexec_relocate[];
+const extern unsigned int riscv_kexec_relocate_size;
+
+typedef void (*riscv_kexec_method)(unsigned long first_ind_entry,
+ unsigned long jump_addr,
+ unsigned long fdt_addr,
+ unsigned long hartid,
+ unsigned long va_pa_off);
+
+extern riscv_kexec_method riscv_kexec_norelocate;
+
+#endif
#ifdef CONFIG_MMU
extern unsigned long va_pa_offset;
+#ifdef CONFIG_64BIT
+extern unsigned long va_kernel_pa_offset;
+#endif
+#ifdef CONFIG_XIP_KERNEL
+extern unsigned long va_kernel_xip_pa_offset;
+#endif
extern unsigned long pfn_base;
#define ARCH_PFN_OFFSET (pfn_base)
#else
#define va_pa_offset 0
+#ifdef CONFIG_64BIT
+#define va_kernel_pa_offset 0
+#endif
#define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT)
#endif /* CONFIG_MMU */
-#define __pa_to_va_nodebug(x) ((void *)((unsigned long) (x) + va_pa_offset))
-#define __va_to_pa_nodebug(x) ((unsigned long)(x) - va_pa_offset)
+extern unsigned long kernel_virt_addr;
+
+#ifdef CONFIG_64BIT
+#define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + va_pa_offset))
+#ifdef CONFIG_XIP_KERNEL
+#define kernel_mapping_pa_to_va(y) ({ \
+ unsigned long _y = y; \
+ (_y >= CONFIG_PHYS_RAM_BASE) ? \
+ (void *)((unsigned long)(_y) + va_kernel_pa_offset + XIP_OFFSET) : \
+ (void *)((unsigned long)(_y) + va_kernel_xip_pa_offset); \
+ })
+#else
+#define kernel_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + va_kernel_pa_offset))
+#endif
+#define __pa_to_va_nodebug(x) linear_mapping_pa_to_va(x)
+
+#define linear_mapping_va_to_pa(x) ((unsigned long)(x) - va_pa_offset)
+#ifdef CONFIG_XIP_KERNEL
+#define kernel_mapping_va_to_pa(y) ({ \
+ unsigned long _y = y; \
+ (_y < kernel_virt_addr + XIP_OFFSET) ? \
+ ((unsigned long)(_y) - va_kernel_xip_pa_offset) : \
+ ((unsigned long)(_y) - va_kernel_pa_offset - XIP_OFFSET); \
+ })
+#else
+#define kernel_mapping_va_to_pa(x) ((unsigned long)(x) - va_kernel_pa_offset)
+#endif
+#define __va_to_pa_nodebug(x) ({ \
+ unsigned long _x = x; \
+ (_x < kernel_virt_addr) ? \
+ linear_mapping_va_to_pa(_x) : kernel_mapping_va_to_pa(_x); \
+ })
+#else
+#define __pa_to_va_nodebug(x) ((void *)((unsigned long) (x) + va_pa_offset))
+#define __va_to_pa_nodebug(x) ((unsigned long)(x) - va_pa_offset)
+#endif
#ifdef CONFIG_DEBUG_VIRTUAL
extern phys_addr_t __virt_to_phys(unsigned long x);
#include <asm/pgtable-bits.h>
-#ifndef __ASSEMBLY__
+#ifndef CONFIG_MMU
+#define KERNEL_LINK_ADDR PAGE_OFFSET
+#else
-/* Page Upper Directory not used in RISC-V */
-#include <asm-generic/pgtable-nopud.h>
-#include <asm/page.h>
-#include <asm/tlbflush.h>
-#include <linux/mm_types.h>
+#define ADDRESS_SPACE_END (UL(-1))
-#ifdef CONFIG_MMU
+#ifdef CONFIG_64BIT
+/* Leave 2GB for kernel and BPF at the end of the address space */
+#define KERNEL_LINK_ADDR (ADDRESS_SPACE_END - SZ_2G + 1)
+#else
+#define KERNEL_LINK_ADDR PAGE_OFFSET
+#endif
#define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1)
#define VMALLOC_END (PAGE_OFFSET - 1)
#define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE)
#define BPF_JIT_REGION_SIZE (SZ_128M)
+#ifdef CONFIG_64BIT
+/* KASLR should leave at least 128MB for BPF after the kernel */
+#define BPF_JIT_REGION_START PFN_ALIGN((unsigned long)&_end)
+#define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE)
+#else
#define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE)
#define BPF_JIT_REGION_END (VMALLOC_END)
+#endif
+
+/* Modules always live before the kernel */
+#ifdef CONFIG_64BIT
+#define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G)
+#define MODULES_END (PFN_ALIGN((unsigned long)&_start))
+#endif
/*
* Roughly size the vmemmap space to be large enough to fit enough
#endif
+#ifdef CONFIG_XIP_KERNEL
+#define XIP_OFFSET SZ_8M
+#endif
+
+#ifndef __ASSEMBLY__
+
+/* Page Upper Directory not used in RISC-V */
+#include <asm-generic/pgtable-nopud.h>
+#include <asm/page.h>
+#include <asm/tlbflush.h>
+#include <linux/mm_types.h>
+
#ifdef CONFIG_64BIT
#include <asm/pgtable-64.h>
#else
#include <asm/pgtable-32.h>
#endif /* CONFIG_64BIT */
+#ifdef CONFIG_XIP_KERNEL
+#define XIP_FIXUP(addr) ({ \
+ uintptr_t __a = (uintptr_t)(addr); \
+ (__a >= CONFIG_XIP_PHYS_ADDR && __a < CONFIG_XIP_PHYS_ADDR + SZ_16M) ? \
+ __a - CONFIG_XIP_PHYS_ADDR + CONFIG_PHYS_RAM_BASE - XIP_OFFSET :\
+ __a; \
+ })
+#else
+#define XIP_FIXUP(addr) (addr)
+#endif /* CONFIG_XIP_KERNEL */
+
#ifdef CONFIG_MMU
/* Number of entries in the page global directory */
#define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t))
#define kern_addr_valid(addr) (1) /* FIXME */
-extern void *dtb_early_va;
-extern uintptr_t dtb_early_pa;
+extern char _start[];
+extern void *_dtb_early_va;
+extern uintptr_t _dtb_early_pa;
+#if defined(CONFIG_XIP_KERNEL) && defined(CONFIG_MMU)
+#define dtb_early_va (*(void **)XIP_FIXUP(&_dtb_early_va))
+#define dtb_early_pa (*(uintptr_t *)XIP_FIXUP(&_dtb_early_pa))
+#else
+#define dtb_early_va _dtb_early_va
+#define dtb_early_pa _dtb_early_pa
+#endif /* CONFIG_XIP_KERNEL */
+
void setup_bootmem(void);
void paging_init(void);
void misc_mem_init(void);
void sbi_console_putchar(int ch);
int sbi_console_getchar(void);
+long sbi_get_mvendorid(void);
+long sbi_get_marchid(void);
+long sbi_get_mimpid(void);
void sbi_set_timer(uint64_t stime_value);
void sbi_shutdown(void);
void sbi_clear_ipi(void);
extern char _start_kernel[];
extern char __init_data_begin[], __init_data_end[];
extern char __init_text_begin[], __init_text_end[];
+extern char __alt_start[], __alt_end[];
#endif /* __ASM_SECTIONS_H */
static inline int set_memory_rw_nx(unsigned long addr, int numpages) { return 0; }
#endif
+#if defined(CONFIG_64BIT) && defined(CONFIG_STRICT_KERNEL_RWX)
+void protect_kernel_linear_mapping_text_rodata(void);
+#else
+static inline void protect_kernel_linear_mapping_text_rodata(void) {}
+#endif
+
int set_direct_map_invalid_noflush(struct page *page);
int set_direct_map_default_noflush(struct page *page);
bool kernel_page_present(struct page *page);
void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out);
/* Set custom IPI operations */
-void riscv_set_ipi_ops(struct riscv_ipi_ops *ops);
+void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops);
/* Clear IPI for current CPU */
void riscv_clear_ipi(void);
cpumask_set_cpu(boot_cpu_hartid, out);
}
-static inline void riscv_set_ipi_ops(struct riscv_ipi_ops *ops)
+static inline void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops)
{
}
#define memcpy(dst, src, len) __memcpy(dst, src, len)
#define memset(s, c, n) __memset(s, c, n)
#define memmove(dst, src, len) __memmove(dst, src, len)
+
+#ifndef __NO_FORTIFY
+#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */
+#endif
+
#endif
#endif /* _ASM_RISCV_STRING_H */
#include <linux/err.h>
/* The array of function pointers for syscalls. */
-extern void *sys_call_table[];
+extern void * const sys_call_table[];
/*
* Only the low 32 bits of orig_r0 are meaningful, so we return int.
#include <linux/mm_types.h>
#include <asm/smp.h>
+#include <asm/errata_list.h>
#ifdef CONFIG_MMU
static inline void local_flush_tlb_all(void)
/* Flush one page from local TLB */
static inline void local_flush_tlb_page(unsigned long addr)
{
- __asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory");
+ ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory"));
}
#else /* CONFIG_MMU */
#define local_flush_tlb_all() do { } while (0)
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 SiFive
+ */
+#ifndef ASM_VENDOR_LIST_H
+#define ASM_VENDOR_LIST_H
+
+#define SIFIVE_VENDOR_ID 0x489
+
+#endif
endif
CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,)
+ifdef CONFIG_KEXEC
+AFLAGS_kexec_relocate.o := -mcmodel=medany -mno-relax
+endif
+
extra-y += head.o
extra-y += vmlinux.lds
endif
obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o
obj-$(CONFIG_KGDB) += kgdb.o
+obj-$(CONFIG_KEXEC) += kexec_relocate.o crash_save_regs.o machine_kexec.o
+obj-$(CONFIG_CRASH_DUMP) += crash_dump.o
obj-$(CONFIG_JUMP_LABEL) += jump_label.o
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This code comes from arch/arm64/kernel/crash_dump.c
+ * Created by: AKASHI Takahiro <takahiro.akashi@linaro.org>
+ * Copyright (C) 2017 Linaro Limited
+ */
+
+#include <linux/crash_dump.h>
+#include <linux/io.h>
+
+/**
+ * copy_oldmem_page() - copy one page from old kernel memory
+ * @pfn: page frame number to be copied
+ * @buf: buffer where the copied page is placed
+ * @csize: number of bytes to copy
+ * @offset: offset in bytes into the page
+ * @userbuf: if set, @buf is in a user address space
+ *
+ * This function copies one page from old kernel memory into buffer pointed by
+ * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes
+ * copied or negative error in case of failure.
+ */
+ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
+ size_t csize, unsigned long offset,
+ int userbuf)
+{
+ void *vaddr;
+
+ if (!csize)
+ return 0;
+
+ vaddr = memremap(__pfn_to_phys(pfn), PAGE_SIZE, MEMREMAP_WB);
+ if (!vaddr)
+ return -ENOMEM;
+
+ if (userbuf) {
+ if (copy_to_user((char __user *)buf, vaddr + offset, csize)) {
+ memunmap(vaddr);
+ return -EFAULT;
+ }
+ } else
+ memcpy(buf, vaddr + offset, csize);
+
+ memunmap(vaddr);
+ return csize;
+}
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 FORTH-ICS/CARV
+ * Nick Kossifidis <mick@ics.forth.gr>
+ */
+
+#include <asm/asm.h> /* For RISCV_* and REG_* macros */
+#include <asm/csr.h> /* For CSR_* macros */
+#include <asm/asm-offsets.h> /* For offsets on pt_regs */
+#include <linux/linkage.h> /* For SYM_* macros */
+
+.section ".text"
+SYM_CODE_START(riscv_crash_save_regs)
+ REG_S ra, PT_RA(a0) /* x1 */
+ REG_S sp, PT_SP(a0) /* x2 */
+ REG_S gp, PT_GP(a0) /* x3 */
+ REG_S tp, PT_TP(a0) /* x4 */
+ REG_S t0, PT_T0(a0) /* x5 */
+ REG_S t1, PT_T1(a0) /* x6 */
+ REG_S t2, PT_T2(a0) /* x7 */
+ REG_S s0, PT_S0(a0) /* x8/fp */
+ REG_S s1, PT_S1(a0) /* x9 */
+ REG_S a0, PT_A0(a0) /* x10 */
+ REG_S a1, PT_A1(a0) /* x11 */
+ REG_S a2, PT_A2(a0) /* x12 */
+ REG_S a3, PT_A3(a0) /* x13 */
+ REG_S a4, PT_A4(a0) /* x14 */
+ REG_S a5, PT_A5(a0) /* x15 */
+ REG_S a6, PT_A6(a0) /* x16 */
+ REG_S a7, PT_A7(a0) /* x17 */
+ REG_S s2, PT_S2(a0) /* x18 */
+ REG_S s3, PT_S3(a0) /* x19 */
+ REG_S s4, PT_S4(a0) /* x20 */
+ REG_S s5, PT_S5(a0) /* x21 */
+ REG_S s6, PT_S6(a0) /* x22 */
+ REG_S s7, PT_S7(a0) /* x23 */
+ REG_S s8, PT_S8(a0) /* x24 */
+ REG_S s9, PT_S9(a0) /* x25 */
+ REG_S s10, PT_S10(a0) /* x26 */
+ REG_S s11, PT_S11(a0) /* x27 */
+ REG_S t3, PT_T3(a0) /* x28 */
+ REG_S t4, PT_T4(a0) /* x29 */
+ REG_S t5, PT_T5(a0) /* x30 */
+ REG_S t6, PT_T6(a0) /* x31 */
+
+ csrr t1, CSR_STATUS
+ csrr t2, CSR_EPC
+ csrr t3, CSR_TVAL
+ csrr t4, CSR_CAUSE
+
+ REG_S t1, PT_STATUS(a0)
+ REG_S t2, PT_EPC(a0)
+ REG_S t3, PT_BADADDR(a0)
+ REG_S t4, PT_CAUSE(a0)
+ ret
+SYM_CODE_END(riscv_crash_save_regs)
#include <asm/unistd.h>
#include <asm/thread_info.h>
#include <asm/asm-offsets.h>
+#include <asm/errata_list.h>
#if !IS_ENABLED(CONFIG_PREEMPTION)
.set resume_kernel, restore_all
/* Exception vector table */
ENTRY(excp_vect_table)
RISCV_PTR do_trap_insn_misaligned
- RISCV_PTR do_trap_insn_fault
+ ALT_INSN_FAULT(RISCV_PTR do_trap_insn_fault)
RISCV_PTR do_trap_insn_illegal
RISCV_PTR do_trap_break
RISCV_PTR do_trap_load_misaligned
RISCV_PTR do_trap_ecall_s
RISCV_PTR do_trap_unknown
RISCV_PTR do_trap_ecall_m
- RISCV_PTR do_page_fault /* instruction page fault */
+ /* instruciton page fault */
+ ALT_PAGE_FAULT(RISCV_PTR do_page_fault)
RISCV_PTR do_page_fault /* load page fault */
RISCV_PTR do_trap_unknown
RISCV_PTR do_page_fault /* store page fault */
#include <linux/linkage.h>
#include <asm/thread_info.h>
#include <asm/page.h>
+#include <asm/pgtable.h>
#include <asm/csr.h>
#include <asm/hwcap.h>
#include <asm/image.h>
#include "efi-header.S"
+#ifdef CONFIG_XIP_KERNEL
+.macro XIP_FIXUP_OFFSET reg
+ REG_L t0, _xip_fixup
+ add \reg, \reg, t0
+.endm
+_xip_fixup: .dword CONFIG_PHYS_RAM_BASE - CONFIG_XIP_PHYS_ADDR - XIP_OFFSET
+#else
+.macro XIP_FIXUP_OFFSET reg
+.endm
+#endif /* CONFIG_XIP_KERNEL */
+
__HEAD
ENTRY(_start)
/*
#ifdef CONFIG_MMU
relocate:
/* Relocate return address */
- li a1, PAGE_OFFSET
+ la a1, kernel_virt_addr
+ XIP_FIXUP_OFFSET a1
+ REG_L a1, 0(a1)
la a2, _start
sub a1, a1, a2
add ra, ra, a1
* to ensure the new translations are in use.
*/
la a0, trampoline_pg_dir
+ XIP_FIXUP_OFFSET a0
srl a0, a0, PAGE_SHIFT
or a0, a0, a1
sfence.vma
slli a3, a0, LGREG
la a4, __cpu_up_stack_pointer
+ XIP_FIXUP_OFFSET a4
la a5, __cpu_up_task_pointer
+ XIP_FIXUP_OFFSET a5
add a4, a3, a4
add a5, a3, a5
REG_L sp, (a4)
#ifdef CONFIG_MMU
/* Enable virtual memory and relocate to virtual address */
la a0, swapper_pg_dir
+ XIP_FIXUP_OFFSET a0
call relocate
#endif
call setup_trap_vector
.Lgood_cores:
#endif
+#ifndef CONFIG_XIP_KERNEL
/* Pick one hart to run the main boot sequence */
la a3, hart_lottery
li a2, 1
amoadd.w a3, a2, (a3)
bnez a3, .Lsecondary_start
+#else
+ /* hart_lottery in flash contains a magic number */
+ la a3, hart_lottery
+ mv a2, a3
+ XIP_FIXUP_OFFSET a2
+ lw t1, (a3)
+ amoswap.w t0, t1, (a2)
+ /* first time here if hart_lottery in RAM is not set */
+ beq t0, t1, .Lsecondary_start
+
+ la sp, _end + THREAD_SIZE
+ XIP_FIXUP_OFFSET sp
+ mv s0, a0
+ call __copy_data
+
+ /* Restore a0 copy */
+ mv a0, s0
+#endif
+
+#ifndef CONFIG_XIP_KERNEL
/* Clear BSS for flat non-ELF images */
la a3, __bss_start
la a4, __bss_stop
add a3, a3, RISCV_SZPTR
blt a3, a4, clear_bss
clear_bss_done:
-
+#endif
/* Save hart ID and DTB physical address */
mv s0, a0
mv s1, a1
+
la a2, boot_cpu_hartid
+ XIP_FIXUP_OFFSET a2
REG_S a0, (a2)
/* Initialize page tables and relocate to virtual addresses */
la sp, init_thread_union + THREAD_SIZE
+ XIP_FIXUP_OFFSET sp
#ifdef CONFIG_BUILTIN_DTB
la a0, __dtb_start
#else
call setup_vm
#ifdef CONFIG_MMU
la a0, early_pg_dir
+ XIP_FIXUP_OFFSET a0
call relocate
#endif /* CONFIG_MMU */
slli a3, a0, LGREG
la a1, __cpu_up_stack_pointer
+ XIP_FIXUP_OFFSET a1
la a2, __cpu_up_task_pointer
+ XIP_FIXUP_OFFSET a2
add a1, a3, a1
add a2, a3, a2
asmlinkage void do_page_fault(struct pt_regs *regs);
asmlinkage void __init setup_vm(uintptr_t dtb_pa);
+#ifdef CONFIG_XIP_KERNEL
+asmlinkage void __init __copy_data(void);
+#endif
extern void *__cpu_up_stack_pointer[];
extern void *__cpu_up_task_pointer[];
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 FORTH-ICS/CARV
+ * Nick Kossifidis <mick@ics.forth.gr>
+ */
+
+#include <asm/asm.h> /* For RISCV_* and REG_* macros */
+#include <asm/csr.h> /* For CSR_* macros */
+#include <asm/page.h> /* For PAGE_SIZE */
+#include <linux/linkage.h> /* For SYM_* macros */
+
+.section ".rodata"
+SYM_CODE_START(riscv_kexec_relocate)
+
+ /*
+ * s0: Pointer to the current entry
+ * s1: (const) Phys address to jump to after relocation
+ * s2: (const) Phys address of the FDT image
+ * s3: (const) The hartid of the current hart
+ * s4: Pointer to the destination address for the relocation
+ * s5: (const) Number of words per page
+ * s6: (const) 1, used for subtraction
+ * s7: (const) va_pa_offset, used when switching MMU off
+ * s8: (const) Physical address of the main loop
+ * s9: (debug) indirection page counter
+ * s10: (debug) entry counter
+ * s11: (debug) copied words counter
+ */
+ mv s0, a0
+ mv s1, a1
+ mv s2, a2
+ mv s3, a3
+ mv s4, zero
+ li s5, (PAGE_SIZE / RISCV_SZPTR)
+ li s6, 1
+ mv s7, a4
+ mv s8, zero
+ mv s9, zero
+ mv s10, zero
+ mv s11, zero
+
+ /* Disable / cleanup interrupts */
+ csrw CSR_SIE, zero
+ csrw CSR_SIP, zero
+
+ /*
+ * When we switch SATP.MODE to "Bare" we'll only
+ * play with physical addresses. However the first time
+ * we try to jump somewhere, the offset on the jump
+ * will be relative to pc which will still be on VA. To
+ * deal with this we set stvec to the physical address at
+ * the start of the loop below so that we jump there in
+ * any case.
+ */
+ la s8, 1f
+ sub s8, s8, s7
+ csrw CSR_STVEC, s8
+
+ /* Process entries in a loop */
+.align 2
+1:
+ addi s10, s10, 1
+ REG_L t0, 0(s0) /* t0 = *image->entry */
+ addi s0, s0, RISCV_SZPTR /* image->entry++ */
+
+ /* IND_DESTINATION entry ? -> save destination address */
+ andi t1, t0, 0x1
+ beqz t1, 2f
+ andi s4, t0, ~0x1
+ j 1b
+
+2:
+ /* IND_INDIRECTION entry ? -> update next entry ptr (PA) */
+ andi t1, t0, 0x2
+ beqz t1, 2f
+ andi s0, t0, ~0x2
+ addi s9, s9, 1
+ csrw CSR_SATP, zero
+ jalr zero, s8, 0
+
+2:
+ /* IND_DONE entry ? -> jump to done label */
+ andi t1, t0, 0x4
+ beqz t1, 2f
+ j 4f
+
+2:
+ /*
+ * IND_SOURCE entry ? -> copy page word by word to the
+ * destination address we got from IND_DESTINATION
+ */
+ andi t1, t0, 0x8
+ beqz t1, 1b /* Unknown entry type, ignore it */
+ andi t0, t0, ~0x8
+ mv t3, s5 /* i = num words per page */
+3: /* copy loop */
+ REG_L t1, (t0) /* t1 = *src_ptr */
+ REG_S t1, (s4) /* *dst_ptr = *src_ptr */
+ addi t0, t0, RISCV_SZPTR /* stc_ptr++ */
+ addi s4, s4, RISCV_SZPTR /* dst_ptr++ */
+ sub t3, t3, s6 /* i-- */
+ addi s11, s11, 1 /* c++ */
+ beqz t3, 1b /* copy done ? */
+ j 3b
+
+4:
+ /* Pass the arguments to the next kernel / Cleanup*/
+ mv a0, s3
+ mv a1, s2
+ mv a2, s1
+
+ /* Cleanup */
+ mv a3, zero
+ mv a4, zero
+ mv a5, zero
+ mv a6, zero
+ mv a7, zero
+
+ mv s0, zero
+ mv s1, zero
+ mv s2, zero
+ mv s3, zero
+ mv s4, zero
+ mv s5, zero
+ mv s6, zero
+ mv s7, zero
+ mv s8, zero
+ mv s9, zero
+ mv s10, zero
+ mv s11, zero
+
+ mv t0, zero
+ mv t1, zero
+ mv t2, zero
+ mv t3, zero
+ mv t4, zero
+ mv t5, zero
+ mv t6, zero
+ csrw CSR_SEPC, zero
+ csrw CSR_SCAUSE, zero
+ csrw CSR_SSCRATCH, zero
+
+ /*
+ * Make sure the relocated code is visible
+ * and jump to the new kernel
+ */
+ fence.i
+
+ jalr zero, a2, 0
+
+SYM_CODE_END(riscv_kexec_relocate)
+riscv_kexec_relocate_end:
+
+
+/* Used for jumping to crashkernel */
+.section ".text"
+SYM_CODE_START(riscv_kexec_norelocate)
+ /*
+ * s0: (const) Phys address to jump to
+ * s1: (const) Phys address of the FDT image
+ * s2: (const) The hartid of the current hart
+ * s3: (const) va_pa_offset, used when switching MMU off
+ */
+ mv s0, a1
+ mv s1, a2
+ mv s2, a3
+ mv s3, a4
+
+ /* Disable / cleanup interrupts */
+ csrw CSR_SIE, zero
+ csrw CSR_SIP, zero
+
+ /* Switch to physical addressing */
+ la s4, 1f
+ sub s4, s4, s3
+ csrw CSR_STVEC, s4
+ csrw CSR_SATP, zero
+
+.align 2
+1:
+ /* Pass the arguments to the next kernel / Cleanup*/
+ mv a0, s2
+ mv a1, s1
+ mv a2, s0
+
+ /* Cleanup */
+ mv a3, zero
+ mv a4, zero
+ mv a5, zero
+ mv a6, zero
+ mv a7, zero
+
+ mv s0, zero
+ mv s1, zero
+ mv s2, zero
+ mv s3, zero
+ mv s4, zero
+ mv s5, zero
+ mv s6, zero
+ mv s7, zero
+ mv s8, zero
+ mv s9, zero
+ mv s10, zero
+ mv s11, zero
+
+ mv t0, zero
+ mv t1, zero
+ mv t2, zero
+ mv t3, zero
+ mv t4, zero
+ mv t5, zero
+ mv t6, zero
+ csrw CSR_SEPC, zero
+ csrw CSR_SCAUSE, zero
+ csrw CSR_SSCRATCH, zero
+
+ jalr zero, a2, 0
+SYM_CODE_END(riscv_kexec_norelocate)
+
+.section ".rodata"
+SYM_DATA(riscv_kexec_relocate_size,
+ .long riscv_kexec_relocate_end - riscv_kexec_relocate)
+
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 FORTH-ICS/CARV
+ * Nick Kossifidis <mick@ics.forth.gr>
+ */
+
+#include <linux/kexec.h>
+#include <asm/kexec.h> /* For riscv_kexec_* symbol defines */
+#include <linux/smp.h> /* For smp_send_stop () */
+#include <asm/cacheflush.h> /* For local_flush_icache_all() */
+#include <asm/barrier.h> /* For smp_wmb() */
+#include <asm/page.h> /* For PAGE_MASK */
+#include <linux/libfdt.h> /* For fdt_check_header() */
+#include <asm/set_memory.h> /* For set_memory_x() */
+#include <linux/compiler.h> /* For unreachable() */
+#include <linux/cpu.h> /* For cpu_down() */
+
+/**
+ * kexec_image_info - Print received image details
+ */
+static void
+kexec_image_info(const struct kimage *image)
+{
+ unsigned long i;
+
+ pr_debug("Kexec image info:\n");
+ pr_debug("\ttype: %d\n", image->type);
+ pr_debug("\tstart: %lx\n", image->start);
+ pr_debug("\thead: %lx\n", image->head);
+ pr_debug("\tnr_segments: %lu\n", image->nr_segments);
+
+ for (i = 0; i < image->nr_segments; i++) {
+ pr_debug("\t segment[%lu]: %016lx - %016lx", i,
+ image->segment[i].mem,
+ image->segment[i].mem + image->segment[i].memsz);
+ pr_debug("\t\t0x%lx bytes, %lu pages\n",
+ (unsigned long) image->segment[i].memsz,
+ (unsigned long) image->segment[i].memsz / PAGE_SIZE);
+ }
+}
+
+/**
+ * machine_kexec_prepare - Initialize kexec
+ *
+ * This function is called from do_kexec_load, when the user has
+ * provided us with an image to be loaded. Its goal is to validate
+ * the image and prepare the control code buffer as needed.
+ * Note that kimage_alloc_init has already been called and the
+ * control buffer has already been allocated.
+ */
+int
+machine_kexec_prepare(struct kimage *image)
+{
+ struct kimage_arch *internal = &image->arch;
+ struct fdt_header fdt = {0};
+ void *control_code_buffer = NULL;
+ unsigned int control_code_buffer_sz = 0;
+ int i = 0;
+
+ kexec_image_info(image);
+
+ /* Find the Flattened Device Tree and save its physical address */
+ for (i = 0; i < image->nr_segments; i++) {
+ if (image->segment[i].memsz <= sizeof(fdt))
+ continue;
+
+ if (copy_from_user(&fdt, image->segment[i].buf, sizeof(fdt)))
+ continue;
+
+ if (fdt_check_header(&fdt))
+ continue;
+
+ internal->fdt_addr = (unsigned long) image->segment[i].mem;
+ break;
+ }
+
+ if (!internal->fdt_addr) {
+ pr_err("Device tree not included in the provided image\n");
+ return -EINVAL;
+ }
+
+ /* Copy the assembler code for relocation to the control page */
+ if (image->type != KEXEC_TYPE_CRASH) {
+ control_code_buffer = page_address(image->control_code_page);
+ control_code_buffer_sz = page_size(image->control_code_page);
+
+ if (unlikely(riscv_kexec_relocate_size > control_code_buffer_sz)) {
+ pr_err("Relocation code doesn't fit within a control page\n");
+ return -EINVAL;
+ }
+
+ memcpy(control_code_buffer, riscv_kexec_relocate,
+ riscv_kexec_relocate_size);
+
+ /* Mark the control page executable */
+ set_memory_x((unsigned long) control_code_buffer, 1);
+ }
+
+ return 0;
+}
+
+
+/**
+ * machine_kexec_cleanup - Cleanup any leftovers from
+ * machine_kexec_prepare
+ *
+ * This function is called by kimage_free to handle any arch-specific
+ * allocations done on machine_kexec_prepare. Since we didn't do any
+ * allocations there, this is just an empty function. Note that the
+ * control buffer is freed by kimage_free.
+ */
+void
+machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+
+/*
+ * machine_shutdown - Prepare for a kexec reboot
+ *
+ * This function is called by kernel_kexec just before machine_kexec
+ * below. Its goal is to prepare the rest of the system (the other
+ * harts and possibly devices etc) for a kexec reboot.
+ */
+void machine_shutdown(void)
+{
+ /*
+ * No more interrupts on this hart
+ * until we are back up.
+ */
+ local_irq_disable();
+
+#if defined(CONFIG_HOTPLUG_CPU)
+ smp_shutdown_nonboot_cpus(smp_processor_id());
+#endif
+}
+
+/**
+ * machine_crash_shutdown - Prepare to kexec after a kernel crash
+ *
+ * This function is called by crash_kexec just before machine_kexec
+ * below and its goal is similar to machine_shutdown, but in case of
+ * a kernel crash. Since we don't handle such cases yet, this function
+ * is empty.
+ */
+void
+machine_crash_shutdown(struct pt_regs *regs)
+{
+ crash_save_cpu(regs, smp_processor_id());
+ machine_shutdown();
+ pr_info("Starting crashdump kernel...\n");
+}
+
+/**
+ * machine_kexec - Jump to the loaded kimage
+ *
+ * This function is called by kernel_kexec which is called by the
+ * reboot system call when the reboot cmd is LINUX_REBOOT_CMD_KEXEC,
+ * or by crash_kernel which is called by the kernel's arch-specific
+ * trap handler in case of a kernel panic. It's the final stage of
+ * the kexec process where the pre-loaded kimage is ready to be
+ * executed. We assume at this point that all other harts are
+ * suspended and this hart will be the new boot hart.
+ */
+void __noreturn
+machine_kexec(struct kimage *image)
+{
+ struct kimage_arch *internal = &image->arch;
+ unsigned long jump_addr = (unsigned long) image->start;
+ unsigned long first_ind_entry = (unsigned long) &image->head;
+ unsigned long this_hart_id = raw_smp_processor_id();
+ unsigned long fdt_addr = internal->fdt_addr;
+ void *control_code_buffer = page_address(image->control_code_page);
+ riscv_kexec_method kexec_method = NULL;
+
+ if (image->type != KEXEC_TYPE_CRASH)
+ kexec_method = control_code_buffer;
+ else
+ kexec_method = (riscv_kexec_method) &riscv_kexec_norelocate;
+
+ pr_notice("Will call new kernel at %08lx from hart id %lx\n",
+ jump_addr, this_hart_id);
+ pr_notice("FDT image at %08lx\n", fdt_addr);
+
+ /* Make sure the relocation code is visible to the hart */
+ local_flush_icache_all();
+
+ /* Jump to the relocation code */
+ pr_notice("Bye...\n");
+ kexec_method(first_ind_entry, jump_addr, fdt_addr,
+ this_hart_id, va_pa_offset);
+ unreachable();
+}
ENTRY(ftrace_stub)
#ifdef CONFIG_DYNAMIC_FTRACE
- .global _mcount
- .set _mcount, ftrace_stub
+ .global MCOUNT_NAME
+ .set MCOUNT_NAME, ftrace_stub
#endif
ret
ENDPROC(ftrace_stub)
#endif
#ifndef CONFIG_DYNAMIC_FTRACE
-ENTRY(_mcount)
+ENTRY(MCOUNT_NAME)
la t4, ftrace_stub
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
la t0, ftrace_graph_return
jalr t5
RESTORE_ABI_STATE
ret
-ENDPROC(_mcount)
+ENDPROC(MCOUNT_NAME)
#endif
-EXPORT_SYMBOL(_mcount)
+EXPORT_SYMBOL(MCOUNT_NAME)
}
#if defined(CONFIG_MMU) && defined(CONFIG_64BIT)
-#define VMALLOC_MODULE_START \
- max(PFN_ALIGN((unsigned long)&_end - SZ_2G), VMALLOC_START)
void *module_alloc(unsigned long size)
{
- return __vmalloc_node_range(size, 1, VMALLOC_MODULE_START,
- VMALLOC_END, GFP_KERNEL,
- PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
+ return __vmalloc_node_range(size, 1, MODULES_VADDR,
+ MODULES_END, GFP_KERNEL,
+ PAGE_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
}
#endif
return 0;
}
+void *alloc_insn_page(void)
+{
+ return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
+ GFP_KERNEL, PAGE_KERNEL_READ_EXEC,
+ VM_FLUSH_RESET_PERMS, NUMA_NO_NODE,
+ __builtin_return_address(0));
+}
+
/* install breakpoint in text */
void __kprobes arch_arm_kprobe(struct kprobe *p)
{
if (kcb->kprobe_status == KPROBE_REENTER)
restore_previous_kprobe(kcb);
- else
+ else {
+ kprobes_restore_local_irqflag(kcb, regs);
reset_current_kprobe();
+ }
break;
case KPROBE_HIT_ACTIVE:
#include <asm/smp.h>
/* default SBI version is 0.1 */
-unsigned long sbi_spec_version = SBI_SPEC_VERSION_DEFAULT;
+unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT;
EXPORT_SYMBOL(sbi_spec_version);
-static void (*__sbi_set_timer)(uint64_t stime);
-static int (*__sbi_send_ipi)(const unsigned long *hart_mask);
+static void (*__sbi_set_timer)(uint64_t stime) __ro_after_init;
+static int (*__sbi_send_ipi)(const unsigned long *hart_mask) __ro_after_init;
static int (*__sbi_rfence)(int fid, const unsigned long *hart_mask,
unsigned long start, unsigned long size,
- unsigned long arg4, unsigned long arg5);
+ unsigned long arg4, unsigned long arg5) __ro_after_init;
struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
unsigned long arg1, unsigned long arg2,
return __sbi_base_ecall(SBI_EXT_BASE_GET_IMP_VERSION);
}
+long sbi_get_mvendorid(void)
+{
+ return __sbi_base_ecall(SBI_EXT_BASE_GET_MVENDORID);
+}
+
+long sbi_get_marchid(void)
+{
+ return __sbi_base_ecall(SBI_EXT_BASE_GET_MARCHID);
+}
+
+long sbi_get_mimpid(void)
+{
+ return __sbi_base_ecall(SBI_EXT_BASE_GET_MIMPID);
+}
+
static void sbi_send_cpumask_ipi(const struct cpumask *target)
{
struct cpumask hartid_mask;
sbi_send_ipi(cpumask_bits(&hartid_mask));
}
-static struct riscv_ipi_ops sbi_ipi_ops = {
+static const struct riscv_ipi_ops sbi_ipi_ops = {
.ipi_inject = sbi_send_cpumask_ipi
};
sbi_get_firmware_id(), sbi_get_firmware_version());
if (sbi_probe_extension(SBI_EXT_TIME) > 0) {
__sbi_set_timer = __sbi_set_timer_v02;
- pr_info("SBI v0.2 TIME extension detected\n");
+ pr_info("SBI TIME extension detected\n");
} else {
__sbi_set_timer = __sbi_set_timer_v01;
}
if (sbi_probe_extension(SBI_EXT_IPI) > 0) {
__sbi_send_ipi = __sbi_send_ipi_v02;
- pr_info("SBI v0.2 IPI extension detected\n");
+ pr_info("SBI IPI extension detected\n");
} else {
__sbi_send_ipi = __sbi_send_ipi_v01;
}
if (sbi_probe_extension(SBI_EXT_RFENCE) > 0) {
__sbi_rfence = __sbi_rfence_v02;
- pr_info("SBI v0.2 RFENCE extension detected\n");
+ pr_info("SBI RFENCE extension detected\n");
} else {
__sbi_rfence = __sbi_rfence_v01;
}
#include <linux/swiotlb.h>
#include <linux/smp.h>
#include <linux/efi.h>
+#include <linux/crash_dump.h>
#include <asm/cpu_ops.h>
#include <asm/early_ioremap.h>
+#include <asm/pgtable.h>
#include <asm/setup.h>
#include <asm/set_memory.h>
#include <asm/sections.h>
* This is used before the kernel initializes the BSS so it can't be in the
* BSS.
*/
-atomic_t hart_lottery __section(".sdata");
+atomic_t hart_lottery __section(".sdata")
+#ifdef CONFIG_XIP_KERNEL
+= ATOMIC_INIT(0xC001BEEF)
+#endif
+;
unsigned long boot_cpu_hartid;
static DEFINE_PER_CPU(struct cpu, cpu_devices);
* also add "System RAM" regions for compatibility with other
* archs, and the rest of the known regions for completeness.
*/
+static struct resource kimage_res = { .name = "Kernel image", };
static struct resource code_res = { .name = "Kernel code", };
static struct resource data_res = { .name = "Kernel data", };
static struct resource rodata_res = { .name = "Kernel rodata", };
static struct resource bss_res = { .name = "Kernel bss", };
+#ifdef CONFIG_CRASH_DUMP
+static struct resource elfcorehdr_res = { .name = "ELF Core hdr", };
+#endif
static int __init add_resource(struct resource *parent,
struct resource *res)
return 1;
}
-static int __init add_kernel_resources(struct resource *res)
+static int __init add_kernel_resources(void)
{
int ret = 0;
/*
* The memory region of the kernel image is continuous and
- * was reserved on setup_bootmem, find it here and register
- * it as a resource, then register the various segments of
- * the image as child nodes
+ * was reserved on setup_bootmem, register it here as a
+ * resource, with the various segments of the image as
+ * child nodes.
*/
- if (!(res->start <= code_res.start && res->end >= data_res.end))
- return 0;
- res->name = "Kernel image";
- res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+ code_res.start = __pa_symbol(_text);
+ code_res.end = __pa_symbol(_etext) - 1;
+ code_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
- /*
- * We removed a part of this region on setup_bootmem so
- * we need to expand the resource for the bss to fit in.
- */
- res->end = bss_res.end;
+ rodata_res.start = __pa_symbol(__start_rodata);
+ rodata_res.end = __pa_symbol(__end_rodata) - 1;
+ rodata_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
- ret = add_resource(&iomem_resource, res);
+ data_res.start = __pa_symbol(_data);
+ data_res.end = __pa_symbol(_edata) - 1;
+ data_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+
+ bss_res.start = __pa_symbol(__bss_start);
+ bss_res.end = __pa_symbol(__bss_stop) - 1;
+ bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+
+ kimage_res.start = code_res.start;
+ kimage_res.end = bss_res.end;
+ kimage_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+
+ ret = add_resource(&iomem_resource, &kimage_res);
if (ret < 0)
return ret;
- ret = add_resource(res, &code_res);
+ ret = add_resource(&kimage_res, &code_res);
if (ret < 0)
return ret;
- ret = add_resource(res, &rodata_res);
+ ret = add_resource(&kimage_res, &rodata_res);
if (ret < 0)
return ret;
- ret = add_resource(res, &data_res);
+ ret = add_resource(&kimage_res, &data_res);
if (ret < 0)
return ret;
- ret = add_resource(res, &bss_res);
+ ret = add_resource(&kimage_res, &bss_res);
return ret;
}
struct resource *res = NULL;
struct resource *mem_res = NULL;
size_t mem_res_sz = 0;
- int ret = 0, i = 0;
-
- code_res.start = __pa_symbol(_text);
- code_res.end = __pa_symbol(_etext) - 1;
- code_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
-
- rodata_res.start = __pa_symbol(__start_rodata);
- rodata_res.end = __pa_symbol(__end_rodata) - 1;
- rodata_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
-
- data_res.start = __pa_symbol(_data);
- data_res.end = __pa_symbol(_edata) - 1;
- data_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
-
- bss_res.start = __pa_symbol(__bss_start);
- bss_res.end = __pa_symbol(__bss_stop) - 1;
- bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+ int num_resources = 0, res_idx = 0;
+ int ret = 0;
/* + 1 as memblock_alloc() might increase memblock.reserved.cnt */
- mem_res_sz = (memblock.memory.cnt + memblock.reserved.cnt + 1) * sizeof(*mem_res);
+ num_resources = memblock.memory.cnt + memblock.reserved.cnt + 1;
+ res_idx = num_resources - 1;
+
+ mem_res_sz = num_resources * sizeof(*mem_res);
mem_res = memblock_alloc(mem_res_sz, SMP_CACHE_BYTES);
if (!mem_res)
panic("%s: Failed to allocate %zu bytes\n", __func__, mem_res_sz);
+
/*
* Start by adding the reserved regions, if they overlap
* with /memory regions, insert_resource later on will take
* care of it.
*/
+ ret = add_kernel_resources();
+ if (ret < 0)
+ goto error;
+
+#ifdef CONFIG_KEXEC_CORE
+ if (crashk_res.start != crashk_res.end) {
+ ret = add_resource(&iomem_resource, &crashk_res);
+ if (ret < 0)
+ goto error;
+ }
+#endif
+
+#ifdef CONFIG_CRASH_DUMP
+ if (elfcorehdr_size > 0) {
+ elfcorehdr_res.start = elfcorehdr_addr;
+ elfcorehdr_res.end = elfcorehdr_addr + elfcorehdr_size - 1;
+ elfcorehdr_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
+ add_resource(&iomem_resource, &elfcorehdr_res);
+ }
+#endif
+
for_each_reserved_mem_region(region) {
- res = &mem_res[i++];
+ res = &mem_res[res_idx--];
res->name = "Reserved";
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
res->start = __pfn_to_phys(memblock_region_reserved_base_pfn(region));
res->end = __pfn_to_phys(memblock_region_reserved_end_pfn(region)) - 1;
- ret = add_kernel_resources(res);
- if (ret < 0)
- goto error;
- else if (ret)
- continue;
-
/*
* Ignore any other reserved regions within
* system memory.
*/
if (memblock_is_memory(res->start)) {
- memblock_free((phys_addr_t) res, sizeof(struct resource));
+ /* Re-use this pre-allocated resource */
+ res_idx++;
continue;
}
/* Add /memory regions to the resource tree */
for_each_mem_region(region) {
- res = &mem_res[i++];
+ res = &mem_res[res_idx--];
if (unlikely(memblock_is_nomap(region))) {
res->name = "Reserved";
goto error;
}
+ /* Clean-up any unused pre-allocated resources */
+ mem_res_sz = (num_resources - res_idx + 1) * sizeof(*mem_res);
+ memblock_free((phys_addr_t) mem_res, mem_res_sz);
return;
error:
efi_init();
setup_bootmem();
paging_init();
- init_resources();
#if IS_ENABLED(CONFIG_BUILTIN_DTB)
unflatten_and_copy_device_tree();
#else
- if (early_init_dt_verify(__va(dtb_early_pa)))
+ if (early_init_dt_verify(__va(XIP_FIXUP(dtb_early_pa))))
unflatten_device_tree();
else
pr_err("No DTB found in kernel mappings\n");
#endif
misc_mem_init();
+ init_resources();
sbi_init();
- if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
+ if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) {
protect_kernel_text_data();
+ protect_kernel_linear_mapping_text_rodata();
+ }
+
#ifdef CONFIG_SWIOTLB
swiotlb_init(1);
#endif
*/
#include <linux/cpu.h>
+#include <linux/clockchips.h>
#include <linux/interrupt.h>
#include <linux/module.h>
#include <linux/profile.h>
IPI_CALL_FUNC,
IPI_CPU_STOP,
IPI_IRQ_WORK,
+ IPI_TIMER,
IPI_MAX
};
-unsigned long __cpuid_to_hartid_map[NR_CPUS] = {
+unsigned long __cpuid_to_hartid_map[NR_CPUS] __ro_after_init = {
[0 ... NR_CPUS-1] = INVALID_HARTID
};
return i;
pr_err("Couldn't find cpu id for hartid [%d]\n", hartid);
- return i;
+ return -ENOENT;
}
void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out)
wait_for_interrupt();
}
-static struct riscv_ipi_ops *ipi_ops;
+static const struct riscv_ipi_ops *ipi_ops __ro_after_init;
-void riscv_set_ipi_ops(struct riscv_ipi_ops *ops)
+void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops)
{
ipi_ops = ops;
}
irq_work_run();
}
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+ if (ops & (1 << IPI_TIMER)) {
+ stats[IPI_TIMER]++;
+ tick_receive_broadcast();
+ }
+#endif
BUG_ON((ops >> IPI_MAX) != 0);
/* Order data access and bit testing. */
[IPI_CALL_FUNC] = "Function call interrupts",
[IPI_CPU_STOP] = "CPU stop interrupts",
[IPI_IRQ_WORK] = "IRQ work interrupts",
+ [IPI_TIMER] = "Timer broadcast interrupts",
};
void show_ipi_stats(struct seq_file *p, int prec)
send_ipi_single(cpu, IPI_CALL_FUNC);
}
+#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
+void tick_broadcast(const struct cpumask *mask)
+{
+ send_ipi_mask(mask, IPI_TIMER);
+}
+#endif
+
void smp_send_stop(void)
{
unsigned long timeout;
#include <asm/sections.h>
#include <asm/sbi.h>
#include <asm/smp.h>
+#include <asm/alternative.h>
#include "head.h"
void __init smp_prepare_boot_cpu(void)
{
init_cpu_topology();
+#ifdef CONFIG_RISCV_ERRATA_ALTERNATIVE
+ apply_boot_alternatives();
+#endif
}
void __init smp_prepare_cpus(unsigned int max_cpus)
#undef __SYSCALL
#define __SYSCALL(nr, call) [nr] = (call),
-void *sys_call_table[__NR_syscalls] = {
+void * const sys_call_table[__NR_syscalls] = {
[0 ... __NR_syscalls - 1] = sys_ni_syscall,
#include <asm/unistd.h>
};
#include <asm/processor.h>
#include <asm/timex.h>
-unsigned long riscv_timebase;
+unsigned long riscv_timebase __ro_after_init;
EXPORT_SYMBOL_GPL(riscv_timebase);
void __init time_init(void)
int show_unhandled_signals = 1;
-extern asmlinkage void handle_exception(void);
-
static DEFINE_SPINLOCK(die_lock);
void die(struct pt_regs *regs, const char *str)
#endif /* CONFIG_GENERIC_BUG */
/* stvec & scratch is already set from head.S */
-void trap_init(void)
+void __init trap_init(void)
{
}
extern char vdso_start[], vdso_end[];
-static unsigned int vdso_pages;
-static struct page **vdso_pagelist;
+static unsigned int vdso_pages __ro_after_init;
+static struct page **vdso_pagelist __ro_after_init;
/*
* The vDSO data page.
endif
# Build rules
-targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds vdso-dummy.o
+targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds vdso-syms.S
obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
obj-y += vdso.o vdso-syms.o
$(obj)/vdso.o: $(obj)/vdso.so
# link rule for the .so file, .lds has to be first
-SYSCFLAGS_vdso.so.dbg = $(c_flags)
-$(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) FORCE
+$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE
$(call if_changed,vdsold)
-SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \
- -Wl,--build-id=sha1 -Wl,--hash-style=both
+LDFLAGS_vdso.so.dbg = -shared -s -soname=linux-vdso.so.1 \
+ --build-id=sha1 --hash-style=both --eh-frame-hdr
# We also create a special relocatable object that should mirror the symbol
# table and layout of the linked DSO. With ld --just-symbols we can then
# actual build commands
# The DSO images are built using a special linker script
-# Add -lgcc so rv32 gets static muldi3 and lshrdi3 definitions.
# Make sure only to export the intended __vdso_xxx symbol offsets.
quiet_cmd_vdsold = VDSOLD $@
- cmd_vdsold = $(CC) $(KBUILD_CFLAGS) $(call cc-option, -no-pie) -nostdlib -nostartfiles $(SYSCFLAGS_$(@F)) \
- -Wl,-T,$(filter-out FORCE,$^) -o $@.tmp && \
- $(CROSS_COMPILE)objcopy \
- $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \
+ cmd_vdsold = $(LD) $(ld_flags) -T $(filter-out FORCE,$^) -o $@.tmp && \
+ $(OBJCOPY) $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \
rm $@.tmp
# Extracts symbol offsets from the VDSO, converting them into an assembly file
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2012 Regents of the University of California
+ * Copyright (C) 2017 SiFive
+ * Copyright (C) 2020 Vitaly Wool, Konsulko AB
+ */
+
+#include <asm/pgtable.h>
+#define LOAD_OFFSET KERNEL_LINK_ADDR
+/* No __ro_after_init data in the .rodata section - which will always be ro */
+#define RO_AFTER_INIT_DATA
+
+#include <asm/vmlinux.lds.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/cache.h>
+#include <asm/thread_info.h>
+
+OUTPUT_ARCH(riscv)
+ENTRY(_start)
+
+jiffies = jiffies_64;
+
+SECTIONS
+{
+ /* Beginning of code and text segment */
+ . = LOAD_OFFSET;
+ _xiprom = .;
+ _start = .;
+ HEAD_TEXT_SECTION
+ INIT_TEXT_SECTION(PAGE_SIZE)
+ /* we have to discard exit text and such at runtime, not link time */
+ .exit.text :
+ {
+ EXIT_TEXT
+ }
+
+ .text : {
+ _text = .;
+ _stext = .;
+ TEXT_TEXT
+ SCHED_TEXT
+ CPUIDLE_TEXT
+ LOCK_TEXT
+ KPROBES_TEXT
+ ENTRY_TEXT
+ IRQENTRY_TEXT
+ SOFTIRQENTRY_TEXT
+ *(.fixup)
+ _etext = .;
+ }
+ RO_DATA(L1_CACHE_BYTES)
+ .srodata : {
+ *(.srodata*)
+ }
+ .init.rodata : {
+ INIT_SETUP(16)
+ INIT_CALLS
+ CON_INITCALL
+ INIT_RAM_FS
+ }
+ _exiprom = .; /* End of XIP ROM area */
+
+
+/*
+ * From this point, stuff is considered writable and will be copied to RAM
+ */
+ __data_loc = ALIGN(16); /* location in file */
+ . = LOAD_OFFSET + XIP_OFFSET; /* location in memory */
+
+ _sdata = .; /* Start of data section */
+ _data = .;
+ RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE)
+ _edata = .;
+ __start_ro_after_init = .;
+ .data.ro_after_init : AT(ADDR(.data.ro_after_init) - LOAD_OFFSET) {
+ *(.data..ro_after_init)
+ }
+ __end_ro_after_init = .;
+
+ . = ALIGN(PAGE_SIZE);
+ __init_begin = .;
+ .init.data : {
+ INIT_DATA
+ }
+ .exit.data : {
+ EXIT_DATA
+ }
+ . = ALIGN(8);
+ __soc_early_init_table : {
+ __soc_early_init_table_start = .;
+ KEEP(*(__soc_early_init_table))
+ __soc_early_init_table_end = .;
+ }
+ __soc_builtin_dtb_table : {
+ __soc_builtin_dtb_table_start = .;
+ KEEP(*(__soc_builtin_dtb_table))
+ __soc_builtin_dtb_table_end = .;
+ }
+ PERCPU_SECTION(L1_CACHE_BYTES)
+
+ . = ALIGN(PAGE_SIZE);
+ __init_end = .;
+
+ .sdata : {
+ __global_pointer$ = . + 0x800;
+ *(.sdata*)
+ *(.sbss*)
+ }
+
+ BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0)
+ EXCEPTION_TABLE(0x10)
+
+ .rel.dyn : AT(ADDR(.rel.dyn) - LOAD_OFFSET) {
+ *(.rel.dyn*)
+ }
+
+ /*
+ * End of copied data. We need a dummy section to get its LMA.
+ * Also located before final ALIGN() as trailing padding is not stored
+ * in the resulting binary file and useless to copy.
+ */
+ .data.endmark : AT(ADDR(.data.endmark) - LOAD_OFFSET) { }
+ _edata_loc = LOADADDR(.data.endmark);
+
+ . = ALIGN(PAGE_SIZE);
+ _end = .;
+
+ STABS_DEBUG
+ DWARF_DEBUG
+
+ DISCARDS
+}
* Copyright (C) 2017 SiFive
*/
-#define LOAD_OFFSET PAGE_OFFSET
+#ifdef CONFIG_XIP_KERNEL
+#include "vmlinux-xip.lds.S"
+#else
+
+#include <asm/pgtable.h>
+#define LOAD_OFFSET KERNEL_LINK_ADDR
+
#include <asm/vmlinux.lds.h>
#include <asm/page.h>
#include <asm/cache.h>
}
__init_data_end = .;
+
+ . = ALIGN(8);
+ .alternative : {
+ __alt_start = .;
+ *(.alternative)
+ __alt_end = .;
+ }
__init_end = .;
/* Start of data section */
DISCARDS
}
+#endif /* CONFIG_XIP_KERNEL */
return;
}
+#ifdef CONFIG_64BIT
+ /*
+ * Modules in 64bit kernels lie in their own virtual region which is not
+ * in the vmalloc region, but dealing with page faults in this region
+ * or the vmalloc region amounts to doing the same thing: checking that
+ * the mapping exists in init_mm.pgd and updating user page table, so
+ * just use vmalloc_fault.
+ */
+ if (unlikely(addr >= MODULES_VADDR && addr < MODULES_END)) {
+ vmalloc_fault(regs, code, addr);
+ return;
+ }
+#endif
/* Enable interrupts if they were enabled in the parent context. */
if (likely(regs->status & SR_PIE))
local_irq_enable();
/*
* Copyright (C) 2012 Regents of the University of California
* Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ * Copyright (C) 2020 FORTH-ICS/CARV
+ * Nick Kossifidis <mick@ics.forth.gr>
*/
#include <linux/init.h>
#include <linux/swap.h>
#include <linux/sizes.h>
#include <linux/of_fdt.h>
+#include <linux/of_reserved_mem.h>
#include <linux/libfdt.h>
#include <linux/set_memory.h>
#include <linux/dma-map-ops.h>
+#include <linux/crash_dump.h>
#include <asm/fixmap.h>
#include <asm/tlbflush.h>
#include "../kernel/head.h"
+unsigned long kernel_virt_addr = KERNEL_LINK_ADDR;
+EXPORT_SYMBOL(kernel_virt_addr);
+#ifdef CONFIG_XIP_KERNEL
+#define kernel_virt_addr (*((unsigned long *)XIP_FIXUP(&kernel_virt_addr)))
+#endif
+
unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
__page_aligned_bss;
EXPORT_SYMBOL(empty_zero_page);
extern char _start[];
#define DTB_EARLY_BASE_VA PGDIR_SIZE
-void *dtb_early_va __initdata;
-uintptr_t dtb_early_pa __initdata;
+void *_dtb_early_va __initdata;
+uintptr_t _dtb_early_pa __initdata;
struct pt_alloc_ops {
pte_t *(*get_pte_virt)(phys_addr_t pa);
free_area_init(max_zone_pfns);
}
-static void setup_zero_page(void)
+static void __init setup_zero_page(void)
{
memset((void *)empty_zero_page, 0, PAGE_SIZE);
}
(((t) - (b)) >> 20));
}
-static void print_vm_layout(void)
+static void __init print_vm_layout(void)
{
pr_notice("Virtual kernel memory layout:\n");
print_mlk("fixmap", (unsigned long)FIXADDR_START,
(unsigned long)VMALLOC_END);
print_mlm("lowmem", (unsigned long)PAGE_OFFSET,
(unsigned long)high_memory);
+#ifdef CONFIG_64BIT
+ print_mlm("kernel", (unsigned long)KERNEL_LINK_ADDR,
+ (unsigned long)ADDRESS_SPACE_END);
+#endif
}
#else
static void print_vm_layout(void) { }
phys_addr_t dram_end = memblock_end_of_DRAM();
phys_addr_t max_mapped_addr = __pa(~(ulong)0);
+#ifdef CONFIG_XIP_KERNEL
+ vmlinux_start = __pa_symbol(&_sdata);
+#endif
+
/* The maximal physical memory size is -PAGE_OFFSET. */
memblock_enforce_memory_limit(-PAGE_OFFSET);
- /* Reserve from the start of the kernel to the end of the kernel */
+ /*
+ * Reserve from the start of the kernel to the end of the kernel
+ */
+#if defined(CONFIG_64BIT) && defined(CONFIG_STRICT_KERNEL_RWX)
+ /*
+ * Make sure we align the reservation on PMD_SIZE since we will
+ * map the kernel in the linear mapping as read-only: we do not want
+ * any allocation to happen between _end and the next pmd aligned page.
+ */
+ vmlinux_end = (vmlinux_end + PMD_SIZE - 1) & PMD_MASK;
+#endif
memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
/*
if (max_mapped_addr == (dram_end - 1))
memblock_set_current_limit(max_mapped_addr - 4096);
- max_pfn = PFN_DOWN(dram_end);
- max_low_pfn = max_pfn;
+ min_low_pfn = PFN_UP(memblock_start_of_DRAM());
+ max_low_pfn = max_pfn = PFN_DOWN(dram_end);
+
dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn));
set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET);
memblock_allow_resize();
}
+#ifdef CONFIG_XIP_KERNEL
+
+extern char _xiprom[], _exiprom[];
+extern char _sdata[], _edata[];
+
+#endif /* CONFIG_XIP_KERNEL */
+
#ifdef CONFIG_MMU
-static struct pt_alloc_ops pt_ops;
+static struct pt_alloc_ops _pt_ops __ro_after_init;
-unsigned long va_pa_offset;
+#ifdef CONFIG_XIP_KERNEL
+#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&_pt_ops))
+#else
+#define pt_ops _pt_ops
+#endif
+
+/* Offset between linear mapping virtual address and kernel load address */
+unsigned long va_pa_offset __ro_after_init;
EXPORT_SYMBOL(va_pa_offset);
-unsigned long pfn_base;
+#ifdef CONFIG_XIP_KERNEL
+#define va_pa_offset (*((unsigned long *)XIP_FIXUP(&va_pa_offset)))
+#endif
+/* Offset between kernel mapping virtual address and kernel load address */
+#ifdef CONFIG_64BIT
+unsigned long va_kernel_pa_offset;
+EXPORT_SYMBOL(va_kernel_pa_offset);
+#endif
+#ifdef CONFIG_XIP_KERNEL
+#define va_kernel_pa_offset (*((unsigned long *)XIP_FIXUP(&va_kernel_pa_offset)))
+#endif
+unsigned long va_kernel_xip_pa_offset;
+EXPORT_SYMBOL(va_kernel_xip_pa_offset);
+#ifdef CONFIG_XIP_KERNEL
+#define va_kernel_xip_pa_offset (*((unsigned long *)XIP_FIXUP(&va_kernel_xip_pa_offset)))
+#endif
+unsigned long pfn_base __ro_after_init;
EXPORT_SYMBOL(pfn_base);
pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
+#ifdef CONFIG_XIP_KERNEL
+#define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir))
+#define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte))
+#define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir))
+#endif /* CONFIG_XIP_KERNEL */
+
void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
{
unsigned long addr = __fix_to_virt(idx);
unsigned long vaddr;
vaddr = __get_free_page(GFP_KERNEL);
- if (!vaddr || !pgtable_pte_page_ctor(virt_to_page(vaddr)))
- BUG();
+ BUG_ON(!vaddr || !pgtable_pte_page_ctor(virt_to_page(vaddr)));
+
return __pa(vaddr);
}
pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
pmd_t early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
+#ifdef CONFIG_XIP_KERNEL
+#define trampoline_pmd ((pmd_t *)XIP_FIXUP(trampoline_pmd))
+#define fixmap_pmd ((pmd_t *)XIP_FIXUP(fixmap_pmd))
+#define early_pmd ((pmd_t *)XIP_FIXUP(early_pmd))
+#endif /* CONFIG_XIP_KERNEL */
+
static pmd_t *__init get_pmd_virt_early(phys_addr_t pa)
{
/* Before MMU is enabled */
static phys_addr_t __init alloc_pmd_early(uintptr_t va)
{
- BUG_ON((va - PAGE_OFFSET) >> PGDIR_SHIFT);
+ BUG_ON((va - kernel_virt_addr) >> PGDIR_SHIFT);
return (uintptr_t)early_pmd;
}
return PMD_SIZE;
}
+#ifdef CONFIG_XIP_KERNEL
+/* called from head.S with MMU off */
+asmlinkage void __init __copy_data(void)
+{
+ void *from = (void *)(&_sdata);
+ void *end = (void *)(&_end);
+ void *to = (void *)CONFIG_PHYS_RAM_BASE;
+ size_t sz = (size_t)(end - from + 1);
+
+ memcpy(to, from, sz);
+}
+#endif
+
/*
* setup_vm() is called from head.S with MMU-off.
*
#error "setup_vm() is called from head.S before relocate so it should not use absolute addressing."
#endif
+uintptr_t load_pa, load_sz;
+#ifdef CONFIG_XIP_KERNEL
+#define load_pa (*((uintptr_t *)XIP_FIXUP(&load_pa)))
+#define load_sz (*((uintptr_t *)XIP_FIXUP(&load_sz)))
+#endif
+
+#ifdef CONFIG_XIP_KERNEL
+uintptr_t xiprom, xiprom_sz;
+#define xiprom_sz (*((uintptr_t *)XIP_FIXUP(&xiprom_sz)))
+#define xiprom (*((uintptr_t *)XIP_FIXUP(&xiprom)))
+
+static void __init create_kernel_page_table(pgd_t *pgdir, uintptr_t map_size)
+{
+ uintptr_t va, end_va;
+
+ /* Map the flash resident part */
+ end_va = kernel_virt_addr + xiprom_sz;
+ for (va = kernel_virt_addr; va < end_va; va += map_size)
+ create_pgd_mapping(pgdir, va,
+ xiprom + (va - kernel_virt_addr),
+ map_size, PAGE_KERNEL_EXEC);
+
+ /* Map the data in RAM */
+ end_va = kernel_virt_addr + XIP_OFFSET + load_sz;
+ for (va = kernel_virt_addr + XIP_OFFSET; va < end_va; va += map_size)
+ create_pgd_mapping(pgdir, va,
+ load_pa + (va - (kernel_virt_addr + XIP_OFFSET)),
+ map_size, PAGE_KERNEL);
+}
+#else
+static void __init create_kernel_page_table(pgd_t *pgdir, uintptr_t map_size)
+{
+ uintptr_t va, end_va;
+
+ end_va = kernel_virt_addr + load_sz;
+ for (va = kernel_virt_addr; va < end_va; va += map_size)
+ create_pgd_mapping(pgdir, va,
+ load_pa + (va - kernel_virt_addr),
+ map_size, PAGE_KERNEL_EXEC);
+}
+#endif
+
asmlinkage void __init setup_vm(uintptr_t dtb_pa)
{
- uintptr_t va, pa, end_va;
- uintptr_t load_pa = (uintptr_t)(&_start);
- uintptr_t load_sz = (uintptr_t)(&_end) - load_pa;
+ uintptr_t __maybe_unused pa;
uintptr_t map_size;
#ifndef __PAGETABLE_PMD_FOLDED
pmd_t fix_bmap_spmd, fix_bmap_epmd;
#endif
+#ifdef CONFIG_XIP_KERNEL
+ xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR;
+ xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom);
+
+ load_pa = (uintptr_t)CONFIG_PHYS_RAM_BASE;
+ load_sz = (uintptr_t)(&_end) - (uintptr_t)(&_sdata);
+
+ va_kernel_xip_pa_offset = kernel_virt_addr - xiprom;
+#else
+ load_pa = (uintptr_t)(&_start);
+ load_sz = (uintptr_t)(&_end) - load_pa;
+#endif
+
va_pa_offset = PAGE_OFFSET - load_pa;
+#ifdef CONFIG_64BIT
+ va_kernel_pa_offset = kernel_virt_addr - load_pa;
+#endif
+
pfn_base = PFN_DOWN(load_pa);
/*
create_pmd_mapping(fixmap_pmd, FIXADDR_START,
(uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE);
/* Setup trampoline PGD and PMD */
- create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET,
+ create_pgd_mapping(trampoline_pg_dir, kernel_virt_addr,
(uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE);
- create_pmd_mapping(trampoline_pmd, PAGE_OFFSET,
+#ifdef CONFIG_XIP_KERNEL
+ create_pmd_mapping(trampoline_pmd, kernel_virt_addr,
+ xiprom, PMD_SIZE, PAGE_KERNEL_EXEC);
+#else
+ create_pmd_mapping(trampoline_pmd, kernel_virt_addr,
load_pa, PMD_SIZE, PAGE_KERNEL_EXEC);
+#endif
#else
/* Setup trampoline PGD */
- create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET,
+ create_pgd_mapping(trampoline_pg_dir, kernel_virt_addr,
load_pa, PGDIR_SIZE, PAGE_KERNEL_EXEC);
#endif
/*
- * Setup early PGD covering entire kernel which will allows
+ * Setup early PGD covering entire kernel which will allow
* us to reach paging_init(). We map all memory banks later
* in setup_vm_final() below.
*/
- end_va = PAGE_OFFSET + load_sz;
- for (va = PAGE_OFFSET; va < end_va; va += map_size)
- create_pgd_mapping(early_pg_dir, va,
- load_pa + (va - PAGE_OFFSET),
- map_size, PAGE_KERNEL_EXEC);
+ create_kernel_page_table(early_pg_dir, map_size);
#ifndef __PAGETABLE_PMD_FOLDED
/* Setup early PMD for DTB */
pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL);
dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1));
#else /* CONFIG_BUILTIN_DTB */
+#ifdef CONFIG_64BIT
+ /*
+ * __va can't be used since it would return a linear mapping address
+ * whereas dtb_early_va will be used before setup_vm_final installs
+ * the linear mapping.
+ */
+ dtb_early_va = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_pa));
+#else
dtb_early_va = __va(dtb_pa);
+#endif /* CONFIG_64BIT */
#endif /* CONFIG_BUILTIN_DTB */
#else
#ifndef CONFIG_BUILTIN_DTB
pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL);
dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1));
#else /* CONFIG_BUILTIN_DTB */
+#ifdef CONFIG_64BIT
+ dtb_early_va = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_pa));
+#else
dtb_early_va = __va(dtb_pa);
+#endif /* CONFIG_64BIT */
#endif /* CONFIG_BUILTIN_DTB */
#endif
dtb_early_pa = dtb_pa;
#endif
}
+#if defined(CONFIG_64BIT) && defined(CONFIG_STRICT_KERNEL_RWX)
+void protect_kernel_linear_mapping_text_rodata(void)
+{
+ unsigned long text_start = (unsigned long)lm_alias(_start);
+ unsigned long init_text_start = (unsigned long)lm_alias(__init_text_begin);
+ unsigned long rodata_start = (unsigned long)lm_alias(__start_rodata);
+ unsigned long data_start = (unsigned long)lm_alias(_data);
+
+ set_memory_ro(text_start, (init_text_start - text_start) >> PAGE_SHIFT);
+ set_memory_nx(text_start, (init_text_start - text_start) >> PAGE_SHIFT);
+
+ set_memory_ro(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
+ set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT);
+}
+#endif
+
static void __init setup_vm_final(void)
{
uintptr_t va, map_size;
__pa_symbol(fixmap_pgd_next),
PGDIR_SIZE, PAGE_TABLE);
- /* Map all memory banks */
+ /* Map all memory banks in the linear mapping */
for_each_mem_range(i, &start, &end) {
if (start >= end)
break;
for (pa = start; pa < end; pa += map_size) {
va = (uintptr_t)__va(pa);
create_pgd_mapping(swapper_pg_dir, va, pa,
- map_size, PAGE_KERNEL_EXEC);
+ map_size,
+#ifdef CONFIG_64BIT
+ PAGE_KERNEL
+#else
+ PAGE_KERNEL_EXEC
+#endif
+ );
+
}
}
+#ifdef CONFIG_64BIT
+ /* Map the kernel */
+ create_kernel_page_table(swapper_pg_dir, PMD_SIZE);
+#endif
+
/* Clear fixmap PTE and PMD mappings */
clear_fixmap(FIX_PTE);
clear_fixmap(FIX_PMD);
#endif /* CONFIG_MMU */
#ifdef CONFIG_STRICT_KERNEL_RWX
-void protect_kernel_text_data(void)
+void __init protect_kernel_text_data(void)
{
unsigned long text_start = (unsigned long)_start;
unsigned long init_text_start = (unsigned long)__init_text_begin;
}
#endif
+#ifdef CONFIG_KEXEC_CORE
+/*
+ * reserve_crashkernel() - reserves memory for crash kernel
+ *
+ * This function reserves memory area given in "crashkernel=" kernel command
+ * line parameter. The memory reserved is used by dump capture kernel when
+ * primary kernel is crashing.
+ */
+static void __init reserve_crashkernel(void)
+{
+ unsigned long long crash_base = 0;
+ unsigned long long crash_size = 0;
+ unsigned long search_start = memblock_start_of_DRAM();
+ unsigned long search_end = memblock_end_of_DRAM();
+
+ int ret = 0;
+
+ /*
+ * Don't reserve a region for a crash kernel on a crash kernel
+ * since it doesn't make much sense and we have limited memory
+ * resources.
+ */
+#ifdef CONFIG_CRASH_DUMP
+ if (is_kdump_kernel()) {
+ pr_info("crashkernel: ignoring reservation request\n");
+ return;
+ }
+#endif
+
+ ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
+ &crash_size, &crash_base);
+ if (ret || !crash_size)
+ return;
+
+ crash_size = PAGE_ALIGN(crash_size);
+
+ if (crash_base == 0) {
+ /*
+ * Current riscv boot protocol requires 2MB alignment for
+ * RV64 and 4MB alignment for RV32 (hugepage size)
+ */
+ crash_base = memblock_find_in_range(search_start, search_end,
+ crash_size, PMD_SIZE);
+
+ if (crash_base == 0) {
+ pr_warn("crashkernel: couldn't allocate %lldKB\n",
+ crash_size >> 10);
+ return;
+ }
+ } else {
+ /* User specifies base address explicitly. */
+ if (!memblock_is_region_memory(crash_base, crash_size)) {
+ pr_warn("crashkernel: requested region is not memory\n");
+ return;
+ }
+
+ if (memblock_is_region_reserved(crash_base, crash_size)) {
+ pr_warn("crashkernel: requested region is reserved\n");
+ return;
+ }
+
+
+ if (!IS_ALIGNED(crash_base, PMD_SIZE)) {
+ pr_warn("crashkernel: requested region is misaligned\n");
+ return;
+ }
+ }
+ memblock_reserve(crash_base, crash_size);
+
+ pr_info("crashkernel: reserved 0x%016llx - 0x%016llx (%lld MB)\n",
+ crash_base, crash_base + crash_size, crash_size >> 20);
+
+ crashk_res.start = crash_base;
+ crashk_res.end = crash_base + crash_size - 1;
+}
+#endif /* CONFIG_KEXEC_CORE */
+
+#ifdef CONFIG_CRASH_DUMP
+/*
+ * We keep track of the ELF core header of the crashed
+ * kernel with a reserved-memory region with compatible
+ * string "linux,elfcorehdr". Here we register a callback
+ * to populate elfcorehdr_addr/size when this region is
+ * present. Note that this region will be marked as
+ * reserved once we call early_init_fdt_scan_reserved_mem()
+ * later on.
+ */
+static int elfcore_hdr_setup(struct reserved_mem *rmem)
+{
+ elfcorehdr_addr = rmem->base;
+ elfcorehdr_size = rmem->size;
+ return 0;
+}
+
+RESERVEDMEM_OF_DECLARE(elfcorehdr, "linux,elfcorehdr", elfcore_hdr_setup);
+#endif
+
void __init paging_init(void)
{
setup_vm_final();
void __init misc_mem_init(void)
{
+ early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT);
arch_numa_init();
sparse_init();
zone_sizes_init();
+#ifdef CONFIG_KEXEC_CORE
+ reserve_crashkernel();
+#endif
memblock_dump_all();
}
#include <asm/fixmap.h>
#include <asm/pgalloc.h>
-static __init void *early_alloc(size_t size, int node)
-{
- void *ptr = memblock_alloc_try_nid(size, size,
- __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node);
-
- if (!ptr)
- panic("%pS: Failed to allocate %zu bytes align=%zx nid=%d from=%llx\n",
- __func__, size, size, node, (u64)__pa(MAX_DMA_ADDRESS));
-
- return ptr;
-}
-
extern pgd_t early_pg_dir[PTRS_PER_PGD];
asmlinkage void __init kasan_early_init(void)
{
local_flush_tlb_all();
}
-static void kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned long end)
+static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned long end)
{
phys_addr_t phys_addr;
pte_t *ptep, *base_pte;
set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(base_pte)), PAGE_TABLE));
}
-static void kasan_populate_pmd(pgd_t *pgd, unsigned long vaddr, unsigned long end)
+static void __init kasan_populate_pmd(pgd_t *pgd, unsigned long vaddr, unsigned long end)
{
phys_addr_t phys_addr;
pmd_t *pmdp, *base_pmd;
set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
}
-static void kasan_populate_pgd(unsigned long vaddr, unsigned long end)
+static void __init kasan_populate_pgd(unsigned long vaddr, unsigned long end)
{
phys_addr_t phys_addr;
pgd_t *pgdp = pgd_offset_k(vaddr);
memset(start, KASAN_SHADOW_INIT, end - start);
}
+static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long end)
+{
+ unsigned long next;
+ void *p;
+ pgd_t *pgd_k = pgd_offset_k(vaddr);
+
+ do {
+ next = pgd_addr_end(vaddr, end);
+ if (pgd_page_vaddr(*pgd_k) == (unsigned long)lm_alias(kasan_early_shadow_pmd)) {
+ p = memblock_alloc(PAGE_SIZE, PAGE_SIZE);
+ set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE));
+ }
+ } while (pgd_k++, vaddr = next, vaddr != end);
+}
+
static void __init kasan_shallow_populate(void *start, void *end)
{
unsigned long vaddr = (unsigned long)start & PAGE_MASK;
unsigned long vend = PAGE_ALIGN((unsigned long)end);
- unsigned long pfn;
- int index;
- void *p;
- pud_t *pud_dir, *pud_k;
- pgd_t *pgd_dir, *pgd_k;
- p4d_t *p4d_dir, *p4d_k;
-
- while (vaddr < vend) {
- index = pgd_index(vaddr);
- pfn = csr_read(CSR_SATP) & SATP_PPN;
- pgd_dir = (pgd_t *)pfn_to_virt(pfn) + index;
- pgd_k = init_mm.pgd + index;
- pgd_dir = pgd_offset_k(vaddr);
- set_pgd(pgd_dir, *pgd_k);
-
- p4d_dir = p4d_offset(pgd_dir, vaddr);
- p4d_k = p4d_offset(pgd_k, vaddr);
-
- vaddr = (vaddr + PUD_SIZE) & PUD_MASK;
- pud_dir = pud_offset(p4d_dir, vaddr);
- pud_k = pud_offset(p4d_k, vaddr);
-
- if (pud_present(*pud_dir)) {
- p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
- pud_populate(&init_mm, pud_dir, p);
- }
- vaddr += PAGE_SIZE;
- }
+ kasan_shallow_populate_pgd(vaddr, vend);
local_flush_tlb_all();
}
phys_addr_t _start, _end;
u64 i;
+ /*
+ * Populate all kernel virtual address space with kasan_early_shadow_page
+ * except for the linear mapping and the modules/kernel/BPF mapping.
+ */
kasan_populate_early_shadow((void *)KASAN_SHADOW_START,
(void *)kasan_mem_to_shadow((void *)
VMEMMAP_END));
(void *)kasan_mem_to_shadow((void *)VMALLOC_START),
(void *)kasan_mem_to_shadow((void *)VMALLOC_END));
+ /* Populate the linear mapping */
for_each_mem_range(i, &_start, &_end) {
void *start = (void *)__va(_start);
void *end = (void *)__va(_end);
kasan_populate(kasan_mem_to_shadow(start), kasan_mem_to_shadow(end));
}
+ /* Populate kernel, BPF, modules mapping */
+ kasan_populate(kasan_mem_to_shadow((const void *)MODULES_VADDR),
+ kasan_mem_to_shadow((const void *)BPF_JIT_REGION_END));
+
for (i = 0; i < PTRS_PER_PTE; i++)
set_pte(&kasan_early_shadow_pte[i],
mk_pte(virt_to_page(kasan_early_shadow_page),
phys_addr_t __phys_addr_symbol(unsigned long x)
{
- unsigned long kernel_start = (unsigned long)PAGE_OFFSET;
+ unsigned long kernel_start = (unsigned long)kernel_virt_addr;
unsigned long kernel_end = (unsigned long)_end;
/*
unsigned long end;
};
+enum address_markers_idx {
+#ifdef CONFIG_KASAN
+ KASAN_SHADOW_START_NR,
+ KASAN_SHADOW_END_NR,
+#endif
+ FIXMAP_START_NR,
+ FIXMAP_END_NR,
+ PCI_IO_START_NR,
+ PCI_IO_END_NR,
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ VMEMMAP_START_NR,
+ VMEMMAP_END_NR,
+#endif
+ VMALLOC_START_NR,
+ VMALLOC_END_NR,
+ PAGE_OFFSET_NR,
+#ifdef CONFIG_64BIT
+ MODULES_MAPPING_NR,
+ KERNEL_MAPPING_NR,
+#endif
+ END_OF_SPACE_NR
+};
+
static struct addr_marker address_markers[] = {
#ifdef CONFIG_KASAN
- {KASAN_SHADOW_START, "Kasan shadow start"},
- {KASAN_SHADOW_END, "Kasan shadow end"},
+ {0, "Kasan shadow start"},
+ {0, "Kasan shadow end"},
#endif
- {FIXADDR_START, "Fixmap start"},
- {FIXADDR_TOP, "Fixmap end"},
- {PCI_IO_START, "PCI I/O start"},
- {PCI_IO_END, "PCI I/O end"},
+ {0, "Fixmap start"},
+ {0, "Fixmap end"},
+ {0, "PCI I/O start"},
+ {0, "PCI I/O end"},
#ifdef CONFIG_SPARSEMEM_VMEMMAP
- {VMEMMAP_START, "vmemmap start"},
- {VMEMMAP_END, "vmemmap end"},
+ {0, "vmemmap start"},
+ {0, "vmemmap end"},
+#endif
+ {0, "vmalloc() area"},
+ {0, "vmalloc() end"},
+ {0, "Linear mapping"},
+#ifdef CONFIG_64BIT
+ {0, "Modules mapping"},
+ {0, "Kernel mapping (kernel, BPF)"},
#endif
- {VMALLOC_START, "vmalloc() area"},
- {VMALLOC_END, "vmalloc() end"},
- {PAGE_OFFSET, "Linear mapping"},
{-1, NULL},
};
static struct ptd_mm_info kernel_ptd_info = {
.mm = &init_mm,
.markers = address_markers,
- .base_addr = KERN_VIRT_START,
+ .base_addr = 0,
.end = ULONG_MAX,
};
DEFINE_SHOW_ATTRIBUTE(ptdump);
-static int ptdump_init(void)
+static int __init ptdump_init(void)
{
unsigned int i, j;
+#ifdef CONFIG_KASAN
+ address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START;
+ address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END;
+#endif
+ address_markers[FIXMAP_START_NR].start_address = FIXADDR_START;
+ address_markers[FIXMAP_END_NR].start_address = FIXADDR_TOP;
+ address_markers[PCI_IO_START_NR].start_address = PCI_IO_START;
+ address_markers[PCI_IO_END_NR].start_address = PCI_IO_END;
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START;
+ address_markers[VMEMMAP_END_NR].start_address = VMEMMAP_END;
+#endif
+ address_markers[VMALLOC_START_NR].start_address = VMALLOC_START;
+ address_markers[VMALLOC_END_NR].start_address = VMALLOC_END;
+ address_markers[PAGE_OFFSET_NR].start_address = PAGE_OFFSET;
+#ifdef CONFIG_64BIT
+ address_markers[MODULES_MAPPING_NR].start_address = MODULES_VADDR;
+ address_markers[KERNEL_MAPPING_NR].start_address = kernel_virt_addr;
+#endif
+
+ kernel_ptd_info.base_addr = KERN_VIRT_START;
+
for (i = 0; i < ARRAY_SIZE(pg_level); i++)
for (j = 0; j < ARRAY_SIZE(pte_bits); j++)
pg_level[i].mask |= pte_bits[j].mask;
{
__build_epilogue(false, ctx);
}
-
-void *bpf_jit_alloc_exec(unsigned long size)
-{
- return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
- BPF_JIT_REGION_END, GFP_KERNEL,
- PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE,
- __builtin_return_address(0));
-}
-
-void bpf_jit_free_exec(void *addr)
-{
- return vfree(addr);
-}
bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns);
if (!prog->is_func || extra_pass) {
+ bpf_jit_binary_lock_ro(jit_data->header);
out_offset:
kfree(ctx->offset);
kfree(jit_data);
tmp : orig_prog);
return prog;
}
+
+void *bpf_jit_alloc_exec(unsigned long size)
+{
+ return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START,
+ BPF_JIT_REGION_END, GFP_KERNEL,
+ PAGE_KERNEL, 0, NUMA_NO_NODE,
+ __builtin_return_address(0));
+}
+
+void bpf_jit_free_exec(void *addr)
+{
+ return vfree(addr);
+}
imply IMA_SECURE_AND_OR_TRUSTED_BOOT
select ARCH_32BIT_USTAT_F_TINODE
select ARCH_BINFMT_ELF_STATE
+ select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM
+ select ARCH_ENABLE_MEMORY_HOTREMOVE
+ select ARCH_ENABLE_SPLIT_PMD_PTLOCK
select ARCH_HAS_DEBUG_VM_PGTABLE
select ARCH_HAS_DEBUG_WX
select ARCH_HAS_DEVMEM_IS_ALLOWED
select HAVE_ARCH_JUMP_LABEL_RELATIVE
select HAVE_ARCH_KASAN
select HAVE_ARCH_KASAN_VMALLOC
+ select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_SOFT_DIRTY
select HAVE_ARCH_TRACEHOOK
config ARCH_SPARSEMEM_DEFAULT
def_bool y
-config ARCH_ENABLE_MEMORY_HOTPLUG
- def_bool y if SPARSEMEM
-
-config ARCH_ENABLE_MEMORY_HOTREMOVE
- def_bool y
-
-config ARCH_ENABLE_SPLIT_PMD_PTLOCK
- def_bool y
-
config MAX_PHYSMEM_BITS
int "Maximum size of supported physical memory in bits (42-53)"
range 42 53
CONFIG_BPF_JIT=y
CONFIG_NET_PKTGEN=m
CONFIG_PCI=y
+CONFIG_PCI_IOV=y
# CONFIG_PCIEASPM is not set
CONFIG_PCI_DEBUG=y
CONFIG_HOTPLUG_PCI=y
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
CONFIG_LEGACY_PTY_COUNT=0
-CONFIG_VIRTIO_CONSOLE=y
+CONFIG_VIRTIO_CONSOLE=m
CONFIG_HW_RANDOM_VIRTIO=m
CONFIG_RAW_DRIVER=m
CONFIG_HANGCHECK_TIMER=m
CONFIG_BPF_JIT=y
CONFIG_NET_PKTGEN=m
CONFIG_PCI=y
+CONFIG_PCI_IOV=y
# CONFIG_PCIEASPM is not set
CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_S390=y
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
CONFIG_LEGACY_PTY_COUNT=0
-CONFIG_VIRTIO_CONSOLE=y
+CONFIG_VIRTIO_CONSOLE=m
CONFIG_HW_RANDOM_VIRTIO=m
CONFIG_RAW_DRIVER=m
CONFIG_HANGCHECK_TIMER=m
return test_facility(142);
}
+size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset,
+ struct cpumf_ctr_info *info);
#endif /* _ASM_S390_CPU_MCF_H */
#include <linux/sched.h>
#include <linux/audit.h>
+#include <linux/randomize_kstack.h>
#include <linux/tracehook.h>
#include <linux/processor.h>
#include <linux/uaccess.h>
+#include <asm/timex.h>
#include <asm/fpu/api.h>
#define ARCH_EXIT_TO_USER_MODE_WORK (_TIF_GUARDED_STORAGE | _TIF_PER_TRAP)
#define arch_exit_to_user_mode arch_exit_to_user_mode
+static inline void arch_exit_to_user_mode_prepare(struct pt_regs *regs,
+ unsigned long ti_work)
+{
+ choose_random_kstack_offset(get_tod_clock_fast() & 0xff);
+}
+
+#define arch_exit_to_user_mode_prepare arch_exit_to_user_mode_prepare
+
static inline bool on_thread_stack(void)
{
return !(((unsigned long)(current->stack) ^ current_stack_pointer()) & ~(THREAD_SIZE - 1));
#define unxlate_dev_mem_ptr unxlate_dev_mem_ptr
void unxlate_dev_mem_ptr(phys_addr_t phys, void *addr);
-/*
- * Convert a virtual cached pointer to an uncached pointer
- */
-#define xlate_dev_kmem_ptr(p) p
-
#define IO_SPACE_LIMIT 0
void __iomem *ioremap_prot(phys_addr_t addr, size_t size, unsigned long prot);
struct zpci_dev *zpci_create_device(u32 fid, u32 fh, enum zpci_state state);
int zpci_enable_device(struct zpci_dev *);
int zpci_disable_device(struct zpci_dev *);
-int zpci_configure_device(struct zpci_dev *zdev, u32 fh);
+int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh);
int zpci_deconfigure_device(struct zpci_dev *zdev);
int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
/* No support for kernel space counters only */
} else if (!attr->exclude_kernel && attr->exclude_user) {
return -EOPNOTSUPP;
-
- /* Count user and kernel space */
- } else {
+ } else { /* Count user and kernel space */
if (ev >= ARRAY_SIZE(cpumf_generic_events_basic))
return -EOPNOTSUPP;
ev = cpumf_generic_events_basic[ev];
*/
if (!atomic_dec_return(&cpuhw->ctr_set[hwc->config_base]))
ctr_set_stop(&cpuhw->state, hwc->config_base);
- event->hw.state |= PERF_HES_STOPPED;
+ hwc->state |= PERF_HES_STOPPED;
}
if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) {
hw_perf_event_update(event);
- event->hw.state |= PERF_HES_UPTODATE;
+ hwc->state |= PERF_HES_UPTODATE;
}
}
if (flags & PERF_EF_START)
cpumf_pmu_start(event, PERF_EF_RELOAD);
- perf_event_update_userpage(event);
-
return 0;
}
*/
if (!atomic_read(&cpuhw->ctr_set[event->hw.config_base]))
ctr_set_disable(&cpuhw->state, event->hw.config_base);
-
- perf_event_update_userpage(event);
}
/*
return cpum_cf_setup(cpu, PMC_RELEASE);
}
+/* Return the maximum possible counter set size (in number of 8 byte counters)
+ * depending on type and model number.
+ */
+size_t cpum_cf_ctrset_size(enum cpumf_ctr_set ctrset,
+ struct cpumf_ctr_info *info)
+{
+ size_t ctrset_size = 0;
+
+ switch (ctrset) {
+ case CPUMF_CTR_SET_BASIC:
+ if (info->cfvn >= 1)
+ ctrset_size = 6;
+ break;
+ case CPUMF_CTR_SET_USER:
+ if (info->cfvn == 1)
+ ctrset_size = 6;
+ else if (info->cfvn >= 3)
+ ctrset_size = 2;
+ break;
+ case CPUMF_CTR_SET_CRYPTO:
+ if (info->csvn >= 1 && info->csvn <= 5)
+ ctrset_size = 16;
+ else if (info->csvn == 6)
+ ctrset_size = 20;
+ break;
+ case CPUMF_CTR_SET_EXT:
+ if (info->csvn == 1)
+ ctrset_size = 32;
+ else if (info->csvn == 2)
+ ctrset_size = 48;
+ else if (info->csvn >= 3 && info->csvn <= 5)
+ ctrset_size = 128;
+ else if (info->csvn == 6)
+ ctrset_size = 160;
+ break;
+ case CPUMF_CTR_SET_MT_DIAG:
+ if (info->csvn > 3)
+ ctrset_size = 48;
+ break;
+ case CPUMF_CTR_SET_MAX:
+ break;
+ }
+
+ return ctrset_size;
+}
+
static int __init cpum_cf_init(void)
{
int rc;
debug_sprintf_event(cf_diag_dbg, 5, "%s event %p\n", __func__, event);
}
-/* Return the maximum possible counter set size (in number of 8 byte counters)
- * depending on type and model number.
- */
-static size_t cf_diag_ctrset_size(enum cpumf_ctr_set ctrset,
- struct cpumf_ctr_info *info)
-{
- size_t ctrset_size = 0;
-
- switch (ctrset) {
- case CPUMF_CTR_SET_BASIC:
- if (info->cfvn >= 1)
- ctrset_size = 6;
- break;
- case CPUMF_CTR_SET_USER:
- if (info->cfvn == 1)
- ctrset_size = 6;
- else if (info->cfvn >= 3)
- ctrset_size = 2;
- break;
- case CPUMF_CTR_SET_CRYPTO:
- if (info->csvn >= 1 && info->csvn <= 5)
- ctrset_size = 16;
- else if (info->csvn == 6)
- ctrset_size = 20;
- break;
- case CPUMF_CTR_SET_EXT:
- if (info->csvn == 1)
- ctrset_size = 32;
- else if (info->csvn == 2)
- ctrset_size = 48;
- else if (info->csvn >= 3 && info->csvn <= 5)
- ctrset_size = 128;
- else if (info->csvn == 6)
- ctrset_size = 160;
- break;
- case CPUMF_CTR_SET_MT_DIAG:
- if (info->csvn > 3)
- ctrset_size = 48;
- break;
- case CPUMF_CTR_SET_MAX:
- break;
- }
-
- return ctrset_size;
-}
-
/* Calculate memory needed to store all counter sets together with header and
* trailer data. This is independend of the counter set authorization which
* can vary depending on the configuration.
enum cpumf_ctr_set i;
for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
- size_t size = cf_diag_ctrset_size(i, info);
+ size_t size = cpum_cf_ctrset_size(i, info);
if (size)
max_size += size * sizeof(u64) +
ctrdata->def = CF_DIAG_CTRSET_DEF;
ctrdata->set = ctrset;
ctrdata->res1 = 0;
- ctrset_size = cf_diag_ctrset_size(ctrset, &cpuhw->info);
+ ctrset_size = cpum_cf_ctrset_size(ctrset, &cpuhw->info);
if (ctrset_size) { /* Save data */
need = ctrset_size * sizeof(u64) + sizeof(*ctrdata);
if (!(p->sets & cpumf_ctr_ctl[set]))
continue; /* Counter set not in list */
- set_size = cf_diag_ctrset_size(set, &cpuhw->info);
+ set_size = cpum_cf_ctrset_size(set, &cpuhw->info);
space = sizeof(csd->data) - csd->used;
space = cf_diag_cpuset_read(sp, set, set_size, space);
if (space) {
for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
if (!(sets & cpumf_ctr_ctl[i]))
continue;
- bytes += cf_diag_ctrset_size(i, &cpuhw->info) * sizeof(u64) +
+ bytes += cpum_cf_ctrset_size(i, &cpuhw->info) * sizeof(u64) +
sizeof(((struct s390_ctrset_setdata *)0)->set) +
sizeof(((struct s390_ctrset_setdata *)0)->no_cnts);
}
if (MACHINE_HAS_VX) {
elf_hwcap |= HWCAP_S390_VXRS;
if (test_facility(134))
- elf_hwcap |= HWCAP_S390_VXRS_EXT;
- if (test_facility(135))
elf_hwcap |= HWCAP_S390_VXRS_BCD;
+ if (test_facility(135))
+ elf_hwcap |= HWCAP_S390_VXRS_EXT;
if (test_facility(148))
elf_hwcap |= HWCAP_S390_VXRS_EXT2;
if (test_facility(152))
void noinstr __do_syscall(struct pt_regs *regs, int per_trap)
{
+ add_random_kstack_offset();
enter_from_user_mode(regs);
memcpy(®s->gprs[8], S390_lowcore.save_area_sync, 8 * sizeof(unsigned long));
#include "asm/ptrace.h"
#include <linux/kprobes.h>
#include <linux/kdebug.h>
+#include <linux/randomize_kstack.h>
#include <linux/extable.h>
#include <linux/ptrace.h>
#include <linux/sched.h>
unsigned int trapnr, syscall_redirect = 0;
irqentry_state_t state;
+ add_random_kstack_offset();
regs->int_code = *(u32 *)&S390_lowcore.pgm_ilc;
regs->int_parm_long = S390_lowcore.trans_exc_code;
return pte;
}
-pte_t *huge_pte_alloc(struct mm_struct *mm,
+pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, unsigned long sz)
{
pgd_t *pgdp;
}
/**
- * zpci_configure_device() - Configure a zpci_dev
+ * zpci_scan_configured_device() - Scan a freshly configured zpci_dev
* @zdev: The zpci_dev to be configured
* @fh: The general function handle supplied by the platform
*
* Given a device in the configuration state Configured, enables, scans and
- * adds it to the common code PCI subsystem. If any failure occurs, the
- * zpci_dev is left disabled.
+ * adds it to the common code PCI subsystem if possible. If the PCI device is
+ * parked because we can not yet create a PCI bus because we have not seen
+ * function 0, it is ignored but will be scanned once function 0 appears.
+ * If any failure occurs, the zpci_dev is left disabled.
*
* Return: 0 on success, or an error code otherwise
*/
-int zpci_configure_device(struct zpci_dev *zdev, u32 fh)
+int zpci_scan_configured_device(struct zpci_dev *zdev, u32 fh)
{
int rc;
static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh)
{
- enum zpci_state state;
-
zdev->fh = fh;
/* Give the driver a hint that the function is
* already unusable.
*/
zpci_disable_device(zdev);
zdev->state = ZPCI_FN_STATE_STANDBY;
- if (!clp_get_state(zdev->fid, &state) &&
- state == ZPCI_FN_STATE_RESERVED) {
- zpci_zdev_put(zdev);
- }
}
static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
{
struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
+ enum zpci_state state;
zpci_err("avail CCDF:\n");
zpci_err_hex(ccdf, sizeof(*ccdf));
break;
zdev->state = ZPCI_FN_STATE_CONFIGURED;
}
- zpci_configure_device(zdev, ccdf->fh);
+ zpci_scan_configured_device(zdev, ccdf->fh);
break;
case 0x0302: /* Reserved -> Standby */
if (!zdev)
break;
case 0x0303: /* Deconfiguration requested */
if (zdev) {
+ /* The event may have been queued before we confirgured
+ * the device.
+ */
+ if (zdev->state != ZPCI_FN_STATE_CONFIGURED)
+ break;
zdev->fh = ccdf->fh;
zpci_deconfigure_device(zdev);
}
break;
case 0x0304: /* Configured -> Standby|Reserved */
- if (zdev)
- zpci_event_hard_deconfigured(zdev, ccdf->fh);
+ if (zdev) {
+ /* The event may have been queued before we confirgured
+ * the device.:
+ */
+ if (zdev->state == ZPCI_FN_STATE_CONFIGURED)
+ zpci_event_hard_deconfigured(zdev, ccdf->fh);
+ /* The 0x0304 event may immediately reserve the device */
+ if (!clp_get_state(zdev->fid, &state) &&
+ state == ZPCI_FN_STATE_RESERVED) {
+ zpci_zdev_put(zdev);
+ }
+ }
break;
case 0x0306: /* 0x308 or 0x302 for multiple devices */
zpci_remove_reserved_devices();
config SUPERH
def_bool y
select ARCH_32BIT_OFF_T
+ select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM && MMU
+ select ARCH_ENABLE_MEMORY_HOTREMOVE if SPARSEMEM && MMU
select ARCH_HAVE_CUSTOM_GPIO_H
select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A)
select ARCH_HAS_BINFMT_FLAT if !MMU
bool
select ARCH_SUSPEND_POSSIBLE
-config SYS_SUPPORTS_HUGETLBFS
- bool
-
config SYS_SUPPORTS_SMP
bool
config CPU_SH4
bool
+ select ARCH_SUPPORTS_HUGETLBFS if MMU
select CPU_HAS_INTEVT
select CPU_HAS_SR_RB
select CPU_HAS_FPU if !CPU_SH4AL_DSP
select SH_INTC
select SYS_SUPPORTS_SH_TMU
- select SYS_SUPPORTS_HUGETLBFS if MMU
config CPU_SH4A
bool
# License. See the file "COPYING" in the main directory of this archive
# for more details.
#
-ifneq ($(SUBARCH),$(ARCH))
+ifdef cross_compiling
ifeq ($(CROSS_COMPILE),)
CROSS_COMPILE := $(call cc-cross-prefix, sh-linux- sh-linux-gnu- sh-unknown-linux-gnu-)
endif
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
-# CONFIG_DEVKMEM is not set
# CONFIG_UNIX98_PTYS is not set
# CONFIG_LEGACY_PTYS is not set
# CONFIG_HW_RANDOM is not set
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
-# CONFIG_DEVKMEM is not set
CONFIG_SERIAL_SH_SCI=y
CONFIG_SERIAL_SH_SCI_NR_UARTS=4
CONFIG_SERIAL_SH_SCI_CONSOLE=y
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
CONFIG_VT_HW_CONSOLE_BINDING=y
-# CONFIG_DEVKMEM is not set
CONFIG_SERIAL_SH_SCI=y
CONFIG_SERIAL_SH_SCI_CONSOLE=y
# CONFIG_LEGACY_PTYS is not set
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
-# CONFIG_DEVKMEM is not set
CONFIG_SERIAL_SH_SCI=y
CONFIG_SERIAL_SH_SCI_NR_UARTS=6
CONFIG_SERIAL_SH_SCI_CONSOLE=y
# CONFIG_INPUT is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
-# CONFIG_DEVKMEM is not set
CONFIG_SERIAL_SH_SCI=y
CONFIG_SERIAL_SH_SCI_NR_UARTS=6
CONFIG_SERIAL_SH_SCI_CONSOLE=y
CONFIG_INPUT_EVDEV=y
CONFIG_INPUT_EVBUG=m
CONFIG_VT_HW_CONSOLE_BINDING=y
-# CONFIG_DEVKMEM is not set
CONFIG_SERIAL_SH_SCI=y
CONFIG_SERIAL_SH_SCI_NR_UARTS=6
CONFIG_SERIAL_SH_SCI_CONSOLE=y
return result;
}
-#include <asm-generic/bitops/find.h>
#include <asm-generic/bitops/ffs.h>
#include <asm-generic/bitops/hweight.h>
#include <asm-generic/bitops/lock.h>
#include <asm-generic/bitops/sched.h>
-#include <asm-generic/bitops/le.h>
#include <asm-generic/bitops/ext2-atomic.h>
#include <asm-generic/bitops/fls.h>
#include <asm-generic/bitops/__fls.h>
#include <asm-generic/bitops/fls64.h>
+#include <asm-generic/bitops/le.h>
+#include <asm-generic/bitops/find.h>
+
#endif /* __ASM_SH_BITOPS_H */
*/
#define xlate_dev_mem_ptr(p) __va(p)
-/*
- * Convert a virtual cached pointer to an uncached pointer
- */
-#define xlate_dev_kmem_ptr(p) p
-
#define ARCH_HAS_VALID_PHYS_ADDR_RANGE
int valid_phys_addr_range(phys_addr_t addr, size_t size);
int valid_mmap_phys_addr_range(unsigned long pfn, size_t size);
$(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
syscall := $(src)/syscall.tbl
-syshdr := $(srctree)/$(src)/syscallhdr.sh
-systbl := $(srctree)/$(src)/syscalltbl.sh
+syshdr := $(srctree)/scripts/syscallhdr.sh
+systbl := $(srctree)/scripts/syscalltbl.sh
quiet_cmd_syshdr = SYSHDR $@
- cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' '$<' '$@' \
- '$(syshdr_abis_$(basetarget))' \
- '$(syshdr_pfx_$(basetarget))' \
- '$(syshdr_offset_$(basetarget))'
+ cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --emit-nr $< $@
quiet_cmd_systbl = SYSTBL $@
- cmd_systbl = $(CONFIG_SHELL) '$(systbl)' '$<' '$@' \
- '$(systbl_abis_$(basetarget))' \
- '$(systbl_abi_$(basetarget))' \
- '$(systbl_offset_$(basetarget))'
+ cmd_systbl = $(CONFIG_SHELL) $(systbl) $< $@
$(uapi)/unistd_32.h: $(syscall) $(syshdr) FORCE
$(call if_changed,syshdr)
+++ /dev/null
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-in="$1"
-out="$2"
-my_abis=`echo "($3)" | tr ',' '|'`
-prefix="$4"
-offset="$5"
-
-fileguard=_UAPI_ASM_SH_`basename "$out" | sed \
- -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \
- -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'`
-grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
- printf "#ifndef %s\n" "${fileguard}"
- printf "#define %s\n" "${fileguard}"
- printf "\n"
-
- nxt=0
- while read nr abi name entry ; do
- if [ -z "$offset" ]; then
- printf "#define __NR_%s%s\t%s\n" \
- "${prefix}" "${name}" "${nr}"
- else
- printf "#define __NR_%s%s\t(%s + %s)\n" \
- "${prefix}" "${name}" "${offset}" "${nr}"
- fi
- nxt=$((nr+1))
- done
-
- printf "\n"
- printf "#ifdef __KERNEL__\n"
- printf "#define __NR_syscalls\t%s\n" "${nxt}"
- printf "#endif\n"
- printf "\n"
- printf "#endif /* %s */\n" "${fileguard}"
-) > "$out"
+++ /dev/null
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-in="$1"
-out="$2"
-my_abis=`echo "($3)" | tr ',' '|'`
-my_abi="$4"
-offset="$5"
-
-emit() {
- t_nxt="$1"
- t_nr="$2"
- t_entry="$3"
-
- while [ $t_nxt -lt $t_nr ]; do
- printf "__SYSCALL(%s,sys_ni_syscall)\n" "${t_nxt}"
- t_nxt=$((t_nxt+1))
- done
- printf "__SYSCALL(%s,%s)\n" "${t_nxt}" "${t_entry}"
-}
-
-grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
- nxt=0
- if [ -z "$offset" ]; then
- offset=0
- fi
-
- while read nr abi name entry ; do
- emit $((nxt+offset)) $((nr+offset)) $entry
- nxt=$((nr+1))
- done
-) > "$out"
config ARCH_SELECT_MEMORY_MODEL
def_bool y
-config ARCH_ENABLE_MEMORY_HOTPLUG
- def_bool y
- depends on SPARSEMEM && MMU
-
-config ARCH_ENABLE_MEMORY_HOTREMOVE
- def_bool y
- depends on SPARSEMEM && MMU
-
config ARCH_MEMORY_PROBE
def_bool y
depends on MEMORY_HOTPLUG
#include <asm/tlbflush.h>
#include <asm/cacheflush.h>
-pte_t *huge_pte_alloc(struct mm_struct *mm,
+pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, unsigned long sz)
{
pgd_t *pgd;
# CONFIG_SERIO_SERPORT is not set
CONFIG_SERIO_PCIPS2=m
CONFIG_SERIO_RAW=m
-# CONFIG_DEVKMEM is not set
CONFIG_SERIAL_SUNSU=y
CONFIG_SERIAL_SUNSU_CONSOLE=y
CONFIG_SERIAL_SUNSAB=y
# SPDX-License-Identifier: GPL-2.0
generated-y += syscall_table_32.h
generated-y += syscall_table_64.h
-generated-y += syscall_table_c32.h
generic-y += export.h
generic-y += kvm_para.h
generic-y += mcs_spinlock.h
*/
#define xlate_dev_mem_ptr(p) __va(p)
-/*
- * Convert a virtual cached pointer to an uncached pointer
- */
-#define xlate_dev_kmem_ptr(p) p
-
#endif
#endif /* !(__SPARC64_IO_H) */
$(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
syscall := $(src)/syscall.tbl
-syshdr := $(srctree)/$(src)/syscallhdr.sh
-systbl := $(srctree)/$(src)/syscalltbl.sh
+syshdr := $(srctree)/scripts/syscallhdr.sh
+systbl := $(srctree)/scripts/syscalltbl.sh
quiet_cmd_syshdr = SYSHDR $@
- cmd_syshdr = $(CONFIG_SHELL) '$(syshdr)' '$<' '$@' \
- '$(syshdr_abis_$(basetarget))' \
- '$(syshdr_pfx_$(basetarget))' \
- '$(syshdr_offset_$(basetarget))'
+ cmd_syshdr = $(CONFIG_SHELL) $(syshdr) --emit-nr --abis $(abis) $< $@
quiet_cmd_systbl = SYSTBL $@
- cmd_systbl = $(CONFIG_SHELL) '$(systbl)' '$<' '$@' \
- '$(systbl_abis_$(basetarget))' \
- '$(systbl_abi_$(basetarget))' \
- '$(systbl_offset_$(basetarget))'
+ cmd_systbl = $(CONFIG_SHELL) $(systbl) --abis $(abis) $< $@
-syshdr_abis_unistd_32 := common,32
+$(uapi)/unistd_32.h: abis := common,32
$(uapi)/unistd_32.h: $(syscall) $(syshdr) FORCE
$(call if_changed,syshdr)
-syshdr_abis_unistd_64 := common,64
+$(uapi)/unistd_64.h: abis := common,64
$(uapi)/unistd_64.h: $(syscall) $(syshdr) FORCE
$(call if_changed,syshdr)
-systbl_abis_syscall_table_32 := common,32
+$(kapi)/syscall_table_32.h: abis := common,32
$(kapi)/syscall_table_32.h: $(syscall) $(systbl) FORCE
$(call if_changed,systbl)
-systbl_abis_syscall_table_64 := common,64
+$(kapi)/syscall_table_64.h: abis := common,64
$(kapi)/syscall_table_64.h: $(syscall) $(systbl) FORCE
$(call if_changed,systbl)
-systbl_abis_syscall_table_c32 := common,32
-systbl_abi_syscall_table_c32 := c32
-$(kapi)/syscall_table_c32.h: $(syscall) $(systbl) FORCE
- $(call if_changed,systbl)
-
uapisyshdr-y += unistd_32.h unistd_64.h
kapisyshdr-y += syscall_table_32.h \
- syscall_table_64.h \
- syscall_table_c32.h
+ syscall_table_64.h
uapisyshdr-y := $(addprefix $(uapi)/, $(uapisyshdr-y))
kapisyshdr-y := $(addprefix $(kapi)/, $(kapisyshdr-y))
+++ /dev/null
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-in="$1"
-out="$2"
-my_abis=`echo "($3)" | tr ',' '|'`
-prefix="$4"
-offset="$5"
-
-fileguard=_UAPI_ASM_SPARC_`basename "$out" | sed \
- -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \
- -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g'`
-grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
- printf "#ifndef %s\n" "${fileguard}"
- printf "#define %s\n" "${fileguard}"
- printf "\n"
-
- nxt=0
- while read nr abi name entry compat ; do
- if [ -z "$offset" ]; then
- printf "#define __NR_%s%s\t%s\n" \
- "${prefix}" "${name}" "${nr}"
- else
- printf "#define __NR_%s%s\t(%s + %s)\n" \
- "${prefix}" "${name}" "${offset}" "${nr}"
- fi
- nxt=$((nr+1))
- done
-
- printf "\n"
- printf "#ifdef __KERNEL__\n"
- printf "#define __NR_syscalls\t%s\n" "${nxt}"
- printf "#endif\n"
- printf "\n"
- printf "#endif /* %s */\n" "${fileguard}"
-) > "$out"
+++ /dev/null
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-in="$1"
-out="$2"
-my_abis=`echo "($3)" | tr ',' '|'`
-my_abi="$4"
-offset="$5"
-
-emit() {
- t_nxt="$1"
- t_nr="$2"
- t_entry="$3"
-
- while [ $t_nxt -lt $t_nr ]; do
- printf "__SYSCALL(%s, sys_nis_syscall, )\n" "${t_nxt}"
- t_nxt=$((t_nxt+1))
- done
- printf "__SYSCALL(%s, %s, )\n" "${t_nxt}" "${t_entry}"
-}
-
-grep -E "^[0-9A-Fa-fXx]+[[:space:]]+${my_abis}" "$in" | sort -n | (
- nxt=0
- if [ -z "$offset" ]; then
- offset=0
- fi
-
- while read nr abi name entry compat ; do
- if [ "$my_abi" = "c32" ] && [ ! -z "$compat" ]; then
- emit $((nxt+offset)) $((nr+offset)) $compat
- else
- emit $((nxt+offset)) $((nr+offset)) $entry
- fi
- nxt=$((nr+1))
- done
-) > "$out"
* Copyright (C) 1995 Adrian M. Rodriguez (adrian@remus.rutgers.edu)
*/
-#define __SYSCALL(nr, entry, nargs) .long entry
+#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native)
+#define __SYSCALL(nr, entry) .long entry
.data
.align 4
.globl sys_call_table
sys_call_table:
#include <asm/syscall_table_32.h> /* 32-bit native syscalls */
-#undef __SYSCALL
* Copyright (C) 1995 Adrian M. Rodriguez (adrian@remus.rutgers.edu)
*/
-#define __SYSCALL(nr, entry, nargs) .word entry
+#define __SYSCALL(nr, entry) .word entry
.text
.align 4
#ifdef CONFIG_COMPAT
.globl sys_call_table32
sys_call_table32:
-#include <asm/syscall_table_c32.h> /* Compat syscalls */
+#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, compat)
+#include <asm/syscall_table_32.h> /* Compat syscalls */
+#undef __SYSCALL_WITH_COMPAT
#endif /* CONFIG_COMPAT */
.align 4
.globl sys_call_table64, sys_call_table
sys_call_table64:
sys_call_table:
+#define __SYSCALL_WITH_COMPAT(nr, native, compat) __SYSCALL(nr, native)
#include <asm/syscall_table_64.h> /* 64-bit native syscalls */
-#undef __SYSCALL
unsigned long pmd_leaf_size(pmd_t pmd) { return 1UL << tte_to_shift(*(pte_t *)&pmd); }
unsigned long pte_leaf_size(pte_t pte) { return 1UL << tte_to_shift(pte); }
-pte_t *huge_pte_alloc(struct mm_struct *mm,
+pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, unsigned long sz)
{
pgd_t *pgd;
bool "Enable gcov support"
depends on DEBUG_INFO
depends on !KCOV
+ depends on !MODULES
help
This option allows developers to retrieve coverage data from a UML
session.
int *data_offset_out);
#endif
-
-/*
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-file-style: "linux"
- * End:
- */
static __poll_t hostaudio_poll(struct file *file,
struct poll_table_struct *wait)
{
- __poll_t mask = 0;
-
#ifdef DEBUG
printk(KERN_DEBUG "hostaudio: poll called (unimplemented)\n");
#endif
- return mask;
+ return 0;
}
static long hostaudio_ioctl(struct file *file,
* Copyright (C) 2001 by various other people who didn't put their name here.
*/
-#include <linux/version.h>
#include <linux/memblock.h>
#include <linux/etherdevice.h>
#include <linux/ethtool.h>
struct mm_struct;
extern pte_t *virt_to_pte(struct mm_struct *mm, unsigned long addr);
-#define update_mmu_cache(vma,address,ptep) do ; while (0)
+#define update_mmu_cache(vma,address,ptep) do {} while (0)
/* Encode and de-code a swap entry */
#define __swp_type(x) (((x).val >> 5) & 0x1f)
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_BLK_DEV_INITRD) += initrd.o
obj-$(CONFIG_GPROF) += gprof_syms.o
-obj-$(CONFIG_GCOV) += gmon_syms.o
obj-$(CONFIG_EARLY_PRINTK) += early_printk.o
obj-$(CONFIG_STACKTRACE) += stacktrace.o
ENTRY(_start)
jiffies = jiffies_64;
+VERSION {
+ {
+ local: *;
+ };
+}
+
SECTIONS
{
PROVIDE (__executable_start = START);
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#include <linux/module.h>
-
-extern void __bb_init_func(void *) __attribute__((weak));
-EXPORT_SYMBOL(__bb_init_func);
-
-extern void __gcov_init(void *) __attribute__((weak));
-EXPORT_SYMBOL(__gcov_init);
-extern void __gcov_merge_add(void *, unsigned int) __attribute__((weak));
-EXPORT_SYMBOL(__gcov_merge_add);
-extern void __gcov_exit(void) __attribute__((weak));
-EXPORT_SYMBOL(__gcov_exit);
set_pmd(pmd, __pmd(_KERNPG_TABLE +
(unsigned long) __pa(pte)));
- if (pte != pte_offset_kernel(pmd, 0))
- BUG();
+ BUG_ON(pte != pte_offset_kernel(pmd, 0));
}
}
ENTRY(_start)
jiffies = jiffies_64;
+VERSION {
+ {
+ local: *;
+ };
+}
+
SECTIONS
{
/* This must contain the right address - not quite the default ELF one.*/
select ACPI_SYSTEM_POWER_STATES_SUPPORT if ACPI
select ARCH_32BIT_OFF_T if X86_32
select ARCH_CLOCKSOURCE_INIT
+ select ARCH_ENABLE_HUGEPAGE_MIGRATION if X86_64 && HUGETLB_PAGE && MIGRATION
+ select ARCH_ENABLE_MEMORY_HOTPLUG if X86_64 || (X86_32 && HIGHMEM)
+ select ARCH_ENABLE_MEMORY_HOTREMOVE if MEMORY_HOTPLUG
+ select ARCH_ENABLE_SPLIT_PMD_PTLOCK if X86_64 || X86_PAE
+ select ARCH_ENABLE_THP_MIGRATION if X86_64 && TRANSPARENT_HUGEPAGE
select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI
+ select ARCH_HAS_CACHE_LINE_SIZE
select ARCH_HAS_DEBUG_VIRTUAL
select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE
select ARCH_HAS_DEVMEM_IS_ALLOWED
select HAVE_ARCH_TRANSPARENT_HUGEPAGE
select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64
select HAVE_ARCH_USERFAULTFD_WP if X86_64 && USERFAULTFD
+ select HAVE_ARCH_USERFAULTFD_MINOR if X86_64 && USERFAULTFD
select HAVE_ARCH_VMAP_STACK if X86_64
select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
select HAVE_ARCH_WITHIN_STACK_FRAMES
config ARCH_HAS_CPU_RELAX
def_bool y
-config ARCH_HAS_CACHE_LINE_SIZE
- def_bool y
-
config ARCH_HAS_FILTER_PGPROT
def_bool y
def_bool y
depends on X86_64 && ARCH_ENABLE_MEMORY_HOTPLUG
-config ARCH_ENABLE_MEMORY_HOTPLUG
- def_bool y
- depends on X86_64 || (X86_32 && HIGHMEM)
-
-config ARCH_ENABLE_MEMORY_HOTREMOVE
+config ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
def_bool y
- depends on MEMORY_HOTPLUG
config USE_PERCPU_NUMA_NODE_ID
def_bool y
depends on NUMA
-config ARCH_ENABLE_SPLIT_PMD_PTLOCK
- def_bool y
- depends on X86_64 || X86_PAE
-
-config ARCH_ENABLE_HUGEPAGE_MIGRATION
- def_bool y
- depends on X86_64 && HUGETLB_PAGE && MIGRATION
-
-config ARCH_ENABLE_THP_MIGRATION
- def_bool y
- depends on X86_64 && TRANSPARENT_HUGEPAGE
-
menu "Power management and ACPI options"
config ARCH_HIBERNATION_HEADER
#endif
/* Kernel thread ? */
- if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
+ if (unlikely(p->flags & PF_KTHREAD)) {
memset(childregs, 0, sizeof(struct pt_regs));
kthread_frame_init(frame, sp, arg);
return 0;
task_user_gs(p) = get_user_gs(current_pt_regs());
#endif
+ if (unlikely(p->flags & PF_IO_WORKER)) {
+ /*
+ * An IO thread is a user space thread, but it doesn't
+ * return to ret_after_fork().
+ *
+ * In order to indicate that to tools like gdb,
+ * we reset the stack and instruction pointers.
+ *
+ * It does the same kernel frame setup to return to a kernel
+ * function that a kernel thread does.
+ */
+ childregs->sp = 0;
+ childregs->ip = 0;
+ kthread_frame_init(frame, sp, arg);
+ return 0;
+ }
+
/* Set a new TLS for the child thread? */
if (clone_flags & CLONE_SETTLS)
ret = set_new_tls(p, tls);
#include <linux/pci.h>
#include <linux/vmalloc.h>
#include <linux/libnvdimm.h>
+#include <linux/vmstat.h>
+#include <linux/kernel.h>
#include <asm/e820/api.h>
#include <asm/processor.h>
return;
direct_pages_count[level]--;
+ if (system_state == SYSTEM_RUNNING) {
+ if (level == PG_LEVEL_2M)
+ count_vm_event(DIRECT_MAP_LEVEL2_SPLIT);
+ else if (level == PG_LEVEL_1G)
+ count_vm_event(DIRECT_MAP_LEVEL3_SPLIT);
+ }
direct_pages_count[level - 1] += PTRS_PER_PTE;
}
node = (reg >> 4) & 0x07;
link = (reg >> 8) & 0x03;
- info = alloc_pci_root_info(min_bus, max_bus, node, link);
+ alloc_pci_root_info(min_bus, max_bus, node, link);
}
/*
obj-$(CONFIG_ELF_CORE) += elfcore.o
subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
+subarch-y += ../lib/cmpxchg8b_emu.o ../lib/atomic64_386_32.o
subarch-y += ../kernel/sys_ia32.o
else
extern long elf_aux_hwcap;
#define ELF_HWCAP (elf_aux_hwcap)
-#define SET_PERSONALITY(ex) do ; while(0)
+#define SET_PERSONALITY(ex) do {} while(0)
#endif
__asm("int3");
}
-static void inline remap_stack_and_trap(void)
+static inline void remap_stack_and_trap(void)
{
__asm__ volatile (
"movl %%esp,%%ebx ;"
void __init pci_xen_swiotlb_init(void)
{
if (xen_swiotlb) {
- xen_swiotlb_init(1, true /* early */);
+ xen_swiotlb_init_early();
dma_ops = &xen_swiotlb_dma_ops;
#ifdef CONFIG_PCI
if (xen_swiotlb)
return 0;
- rc = xen_swiotlb_init(1, false /* late */);
+ rc = xen_swiotlb_init();
if (rc)
return rc;
VARIANT = $(variant-y)
export VARIANT
-# Test for cross compiling
-
ifneq ($(VARIANT),)
- COMPILE_ARCH = $(shell uname -m)
-
- ifneq ($(COMPILE_ARCH), xtensa)
+ ifdef cross_compiling
ifndef CROSS_COMPILE
CROSS_COMPILE = xtensa_$(VARIANT)-
endif
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
-CONFIG_DEVKMEM=y
CONFIG_SERIAL_8250=y
# CONFIG_SERIAL_8250_DEPRECATED_OPTIONS is not set
CONFIG_SERIAL_8250_CONSOLE=y
}
EXPORT_SYMBOL(bio_init);
+unsigned int bio_max_size(struct bio *bio)
+{
+ struct block_device *bdev = bio->bi_bdev;
+
+ return bdev ? bdev->bd_disk->queue->limits.bio_max_bytes : UINT_MAX;
+}
+
/**
* bio_reset - reinitialize a bio
* @bio: bio to reset
struct bio_vec *bv = &bio->bi_io_vec[bio->bi_vcnt - 1];
if (page_is_mergeable(bv, page, len, off, same_page)) {
- if (bio->bi_iter.bi_size > UINT_MAX - len) {
+ if (bio->bi_iter.bi_size > bio_max_size(bio) - len) {
*same_page = false;
return false;
}
{
unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt;
unsigned short entries_left = bio->bi_max_vecs - bio->bi_vcnt;
+ unsigned int bytes_left = bio_max_size(bio) - bio->bi_iter.bi_size;
struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt;
struct page **pages = (struct page **)bv;
bool same_page = false;
BUILD_BUG_ON(PAGE_PTRS_PER_BVEC < 2);
pages += entries_left * (PAGE_PTRS_PER_BVEC - 1);
- size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset);
+ size = iov_iter_get_pages(iter, pages, bytes_left, nr_pages,
+ &offset);
if (unlikely(size <= 0))
return size ? size : -EFAULT;
#include <linux/init.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
+#include <linux/pagemap.h>
#include <linux/gcd.h>
#include <linux/lcm.h>
#include <linux/jiffies.h>
*/
void blk_set_default_limits(struct queue_limits *lim)
{
+ lim->bio_max_bytes = UINT_MAX;
lim->max_segments = BLK_MAX_SEGMENTS;
lim->max_discard_segments = 1;
lim->max_integrity_segments = 0;
limits->logical_block_size >> SECTOR_SHIFT);
limits->max_sectors = max_sectors;
+ if (check_shl_overflow(max_sectors, SECTOR_SHIFT,
+ &limits->bio_max_bytes))
+ limits->bio_max_bytes = UINT_MAX;
+
q->backing_dev_info->io_pages = max_sectors >> (PAGE_SHIFT - 9);
}
EXPORT_SYMBOL(blk_queue_max_hw_sectors);
# SPDX-License-Identifier: GPL-2.0-only
-x509_certificate_list
-x509_revocation_list
+/x509_certificate_list
+/x509_revocation_list
obj-y += soc/
obj-$(CONFIG_VIRTIO) += virtio/
+obj-$(CONFIG_VIRTIO_PCI_LIB) += virtio/
obj-$(CONFIG_VDPA) += vdpa/
obj-$(CONFIG_XEN) += xen/
acpi_handle handle = mem_device->device->handle;
int result, num_enabled = 0;
struct acpi_memory_info *info;
+ mhp_t mhp_flags = MHP_NONE;
int node;
node = acpi_get_node(handle);
if (node < 0)
node = memory_add_physaddr_to_nid(info->start_addr);
+ if (mhp_supports_memmap_on_memory(info->length))
+ mhp_flags |= MHP_MEMMAP_ON_MEMORY;
result = __add_memory(node, info->start_addr, info->length,
- MHP_NONE);
+ mhp_flags);
/*
* If the memory block has been used by the kernel, add_memory()
int index)
{
struct platform_device *pdev;
- int irq = map_gt_gsi(wd->timer_interrupt, wd->timer_flags);
+ int irq;
/*
* According to SBSA specification the size of refresh and control
struct resource res[] = {
DEFINE_RES_MEM(wd->control_frame_address, SZ_4K),
DEFINE_RES_MEM(wd->refresh_frame_address, SZ_4K),
- DEFINE_RES_IRQ(irq),
+ {},
};
int nr_res = ARRAY_SIZE(res);
if (!(wd->refresh_frame_address && wd->control_frame_address)) {
pr_err(FW_BUG "failed to get the Watchdog base address.\n");
- acpi_unregister_gsi(wd->timer_interrupt);
return -EINVAL;
}
+ irq = map_gt_gsi(wd->timer_interrupt, wd->timer_flags);
+ res[2] = (struct resource)DEFINE_RES_IRQ(irq);
if (irq <= 0) {
pr_warn("failed to map the Watchdog interrupt.\n");
nr_res--;
*/
pdev = platform_device_register_simple("sbsa-gwdt", index, res, nr_res);
if (IS_ERR(pdev)) {
- acpi_unregister_gsi(wd->timer_interrupt);
+ if (irq > 0)
+ acpi_unregister_gsi(wd->timer_interrupt);
return PTR_ERR(pdev);
}
sizeof(struct acpi_table_header)))
return -EFAULT;
uncopied_bytes = max_size = table.length;
+ /* make sure the buf is not allocated */
+ kfree(buf);
buf = kzalloc(max_size, GFP_KERNEL);
if (!buf)
return -ENOMEM;
(*ppos + count < count) ||
(count > uncopied_bytes)) {
kfree(buf);
+ buf = NULL;
return -EINVAL;
}
add_taint(TAINT_OVERRIDDEN_ACPI_TABLE, LOCKDEP_NOW_UNRELIABLE);
}
- kfree(buf);
return count;
}
int acpi_power_get_inferred_state(struct acpi_device *device, int *state);
int acpi_power_on_resources(struct acpi_device *device, int state);
int acpi_power_transition(struct acpi_device *device, int state);
-void acpi_turn_off_unused_power_resources(void);
/* --------------------------------------------------------------------------
Device Power Management
{
struct irq_domain *d = irq_find_matching_fwnode(acpi_gsi_domain_id,
DOMAIN_BUS_ANY);
- int irq = irq_find_mapping(d, gsi);
+ int irq;
+ if (WARN_ON(acpi_irq_model == ACPI_IRQ_MODEL_GIC && gsi < 16))
+ return;
+
+ irq = irq_find_mapping(d, gsi);
irq_dispose_mapping(irq);
}
EXPORT_SYMBOL_GPL(acpi_unregister_gsi);
THUNDER_ECAM_QUIRK(2, 12),
THUNDER_ECAM_QUIRK(2, 13),
+ { "NVIDIA", "TEGRA194", 1, 0, MCFG_BUS_ANY, &tegra194_pcie_ops},
+ { "NVIDIA", "TEGRA194", 1, 1, MCFG_BUS_ANY, &tegra194_pcie_ops},
+ { "NVIDIA", "TEGRA194", 1, 2, MCFG_BUS_ANY, &tegra194_pcie_ops},
+ { "NVIDIA", "TEGRA194", 1, 3, MCFG_BUS_ANY, &tegra194_pcie_ops},
+ { "NVIDIA", "TEGRA194", 1, 4, MCFG_BUS_ANY, &tegra194_pcie_ops},
+ { "NVIDIA", "TEGRA194", 1, 5, MCFG_BUS_ANY, &tegra194_pcie_ops},
+
#define XGENE_V1_ECAM_MCFG(rev, seg) \
{"APM ", "XGENE ", rev, seg, MCFG_BUS_ANY, \
&xgene_v1_pcie_ecam_ops }
mutex_unlock(&power_resource_list_lock);
}
-#endif
void acpi_turn_off_unused_power_resources(void)
{
mutex_unlock(&power_resource_list_lock);
}
+#endif
}
}
- acpi_turn_off_unused_power_resources();
-
acpi_scan_initialized = true;
out:
extern struct mutex acpi_device_lock;
extern void acpi_resume_power_resources(void);
+extern void acpi_turn_off_unused_power_resources(void);
static inline acpi_status acpi_set_waking_vector(u32 wakeup_address)
{
u32 port_mask;
u32 quirks;
enum brcm_ahci_version version;
- struct reset_control *rcdev;
+ struct reset_control *rcdev_rescal;
+ struct reset_control *rcdev_ahci;
};
static inline u32 brcm_sata_readreg(void __iomem *addr)
else
ret = 0;
- if (priv->version != BRCM_SATA_BCM7216)
- reset_control_assert(priv->rcdev);
+ reset_control_assert(priv->rcdev_ahci);
+ reset_control_rearm(priv->rcdev_rescal);
return ret;
}
struct brcm_ahci_priv *priv = hpriv->plat_data;
int ret = 0;
- if (priv->version == BRCM_SATA_BCM7216)
- ret = reset_control_reset(priv->rcdev);
- else
- ret = reset_control_deassert(priv->rcdev);
+ ret = reset_control_deassert(priv->rcdev_ahci);
+ if (ret)
+ return ret;
+ ret = reset_control_reset(priv->rcdev_rescal);
if (ret)
return ret;
{
const struct of_device_id *of_id;
struct device *dev = &pdev->dev;
- const char *reset_name = NULL;
struct brcm_ahci_priv *priv;
struct ahci_host_priv *hpriv;
struct resource *res;
if (IS_ERR(priv->top_ctrl))
return PTR_ERR(priv->top_ctrl);
- /* Reset is optional depending on platform and named differently */
- if (priv->version == BRCM_SATA_BCM7216)
- reset_name = "rescal";
- else
- reset_name = "ahci";
-
- priv->rcdev = devm_reset_control_get_optional(&pdev->dev, reset_name);
- if (IS_ERR(priv->rcdev))
- return PTR_ERR(priv->rcdev);
+ if (priv->version == BRCM_SATA_BCM7216) {
+ priv->rcdev_rescal = devm_reset_control_get_optional_shared(
+ &pdev->dev, "rescal");
+ if (IS_ERR(priv->rcdev_rescal))
+ return PTR_ERR(priv->rcdev_rescal);
+ }
+ priv->rcdev_ahci = devm_reset_control_get_optional(&pdev->dev, "ahci");
+ if (IS_ERR(priv->rcdev_ahci))
+ return PTR_ERR(priv->rcdev_ahci);
hpriv = ahci_platform_get_resources(pdev, 0);
if (IS_ERR(hpriv))
break;
}
- if (priv->version == BRCM_SATA_BCM7216)
- ret = reset_control_reset(priv->rcdev);
- else
- ret = reset_control_deassert(priv->rcdev);
+ ret = reset_control_reset(priv->rcdev_rescal);
+ if (ret)
+ return ret;
+ ret = reset_control_deassert(priv->rcdev_ahci);
if (ret)
return ret;
out_disable_clks:
ahci_platform_disable_clks(hpriv);
out_reset:
- if (priv->version != BRCM_SATA_BCM7216)
- reset_control_assert(priv->rcdev);
+ reset_control_assert(priv->rcdev_ahci);
+ reset_control_rearm(priv->rcdev_rescal);
return ret;
}
switch (STATUS_CODE (qe)) {
case 0x1:
/* Fall through for streaming mode */
+ fallthrough;
case 0x2:/* Packet received OK.... */
if (atm_vcc) {
skb = pe->skb;
module_exit(panel_cleanup_module);
MODULE_AUTHOR("Willy Tarreau");
MODULE_LICENSE("GPL");
-
-/*
- * Local variables:
- * c-indent-level: 4
- * tab-width: 8
- * End:
- */
#include <linux/kernel_read_file.h>
#include <linux/module.h>
#include <linux/init.h>
+#include <linux/initrd.h>
#include <linux/timer.h>
#include <linux/vmalloc.h>
#include <linux/interrupt.h>
if (!path)
return -ENOMEM;
+ wait_for_initramfs();
for (i = 0; i < ARRAY_SIZE(fw_path); i++) {
size_t file_size = 0;
size_t *file_size_ptr = NULL;
return blocking_notifier_call_chain(&memory_chain, val, v);
}
+static int memory_block_online(struct memory_block *mem)
+{
+ unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
+ unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
+ unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages;
+ struct zone *zone;
+ int ret;
+
+ zone = zone_for_pfn_range(mem->online_type, mem->nid, start_pfn, nr_pages);
+
+ /*
+ * Although vmemmap pages have a different lifecycle than the pages
+ * they describe (they remain until the memory is unplugged), doing
+ * their initialization and accounting at memory onlining/offlining
+ * stage helps to keep accounting easier to follow - e.g vmemmaps
+ * belong to the same zone as the memory they backed.
+ */
+ if (nr_vmemmap_pages) {
+ ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, zone);
+ if (ret)
+ return ret;
+ }
+
+ ret = online_pages(start_pfn + nr_vmemmap_pages,
+ nr_pages - nr_vmemmap_pages, zone);
+ if (ret) {
+ if (nr_vmemmap_pages)
+ mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages);
+ return ret;
+ }
+
+ /*
+ * Account once onlining succeeded. If the zone was unpopulated, it is
+ * now already properly populated.
+ */
+ if (nr_vmemmap_pages)
+ adjust_present_page_count(zone, nr_vmemmap_pages);
+
+ return ret;
+}
+
+static int memory_block_offline(struct memory_block *mem)
+{
+ unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
+ unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
+ unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages;
+ struct zone *zone;
+ int ret;
+
+ zone = page_zone(pfn_to_page(start_pfn));
+
+ /*
+ * Unaccount before offlining, such that unpopulated zone and kthreads
+ * can properly be torn down in offline_pages().
+ */
+ if (nr_vmemmap_pages)
+ adjust_present_page_count(zone, -nr_vmemmap_pages);
+
+ ret = offline_pages(start_pfn + nr_vmemmap_pages,
+ nr_pages - nr_vmemmap_pages);
+ if (ret) {
+ /* offline_pages() failed. Account back. */
+ if (nr_vmemmap_pages)
+ adjust_present_page_count(zone, nr_vmemmap_pages);
+ return ret;
+ }
+
+ if (nr_vmemmap_pages)
+ mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages);
+
+ return ret;
+}
+
/*
* MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is
* OK to have direct references to sparsemem variables in here.
*/
static int
-memory_block_action(unsigned long start_section_nr, unsigned long action,
- int online_type, int nid)
+memory_block_action(struct memory_block *mem, unsigned long action)
{
- unsigned long start_pfn;
- unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
int ret;
- start_pfn = section_nr_to_pfn(start_section_nr);
-
switch (action) {
case MEM_ONLINE:
- ret = online_pages(start_pfn, nr_pages, online_type, nid);
+ ret = memory_block_online(mem);
break;
case MEM_OFFLINE:
- ret = offline_pages(start_pfn, nr_pages);
+ ret = memory_block_offline(mem);
break;
default:
WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: "
- "%ld\n", __func__, start_section_nr, action, action);
+ "%ld\n", __func__, mem->start_section_nr, action, action);
ret = -EINVAL;
}
if (to_state == MEM_OFFLINE)
mem->state = MEM_GOING_OFFLINE;
- ret = memory_block_action(mem->start_section_nr, to_state,
- mem->online_type, mem->nid);
-
+ ret = memory_block_action(mem, to_state);
mem->state = ret ? from_state_req : to_state;
return ret;
return ret;
}
-static int init_memory_block(unsigned long block_id, unsigned long state)
+static int init_memory_block(unsigned long block_id, unsigned long state,
+ unsigned long nr_vmemmap_pages)
{
struct memory_block *mem;
int ret = 0;
mem->start_section_nr = block_id * sections_per_block;
mem->state = state;
mem->nid = NUMA_NO_NODE;
+ mem->nr_vmemmap_pages = nr_vmemmap_pages;
ret = register_memory(mem);
if (section_count == 0)
return 0;
return init_memory_block(memory_block_id(base_section_nr),
- MEM_ONLINE);
+ MEM_ONLINE, 0);
}
static void unregister_memory(struct memory_block *memory)
*
* Called under device_hotplug_lock.
*/
-int create_memory_block_devices(unsigned long start, unsigned long size)
+int create_memory_block_devices(unsigned long start, unsigned long size,
+ unsigned long vmemmap_pages)
{
const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
return -EINVAL;
for (block_id = start_block_id; block_id != end_block_id; block_id++) {
- ret = init_memory_block(block_id, MEM_OFFLINE);
+ ret = init_memory_block(block_id, MEM_OFFLINE, vmemmap_pages);
if (ret)
break;
}
#include <linux/bio.h>
#include <linux/highmem.h>
#include <linux/mutex.h>
+#include <linux/pagemap.h>
#include <linux/radix-tree.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/moduleparam.h>
#include <linux/sched.h>
#include <linux/fs.h>
+#include <linux/pagemap.h>
#include <linux/file.h>
#include <linux/stat.h>
#include <linux/errno.h>
dev->discard_alignment = le32_to_cpu(rsp->discard_alignment);
dev->secure_discard = le16_to_cpu(rsp->secure_discard);
dev->rotational = rsp->rotational;
- dev->wc = !!(rsp->cache_policy & RNBD_WRITEBACK);
+ dev->wc = !!(rsp->cache_policy & RNBD_WRITEBACK);
dev->fua = !!(rsp->cache_policy & RNBD_FUA);
dev->max_hw_sectors = sess->max_io_size / SECTOR_SIZE;
cpu_q = rnbd_get_cpu_qlist(sess, nxt_cpu(cpu_q->cpu))) {
if (!spin_trylock_irqsave(&cpu_q->requeue_lock, flags))
continue;
- if (unlikely(!test_bit(cpu_q->cpu, sess->cpu_queues_bm)))
+ if (!test_bit(cpu_q->cpu, sess->cpu_queues_bm))
goto unlock;
q = list_first_entry_or_null(&cpu_q->requeue_list,
typeof(*q), requeue_list);
struct rtrs_permit *permit;
permit = rtrs_clt_get_permit(sess->rtrs, con_type, wait);
- if (likely(permit))
+ if (permit)
/* We have a subtle rare case here, when all permits can be
* consumed before busy counter increased. This is safe,
* because loser will get NULL as a permit, observe 0 busy
struct rtrs_permit *permit;
iu = kzalloc(sizeof(*iu), GFP_KERNEL);
- if (!iu) {
+ if (!iu)
return NULL;
- }
permit = rnbd_get_permit(sess, con_type, wait);
- if (unlikely(!permit)) {
+ if (!permit) {
kfree(iu);
return NULL;
}
return;
}
- rtrs_clt_query(sess->rtrs, &attrs);
+ err = rtrs_clt_query(sess->rtrs, &attrs);
+ if (err) {
+ pr_err("rtrs_clt_query(\"%s\"): %d\n", sess->sessname, err);
+ return;
+ }
mutex_lock(&sess->lock);
sess->max_io_size = attrs.max_io_size;
mutex_init(&sess->lock);
INIT_LIST_HEAD(&sess->devs_list);
INIT_LIST_HEAD(&sess->list);
- bitmap_zero(sess->cpu_queues_bm, NR_CPUS);
+ bitmap_zero(sess->cpu_queues_bm, num_possible_cpus());
init_waitqueue_head(&sess->rtrs_waitq);
refcount_set(&sess->refcount, 1);
};
err = rtrs_clt_request(rq_data_dir(rq), &req_ops, rtrs, permit,
&vec, 1, size, iu->sgt.sgl, sg_cnt);
- if (unlikely(err)) {
+ if (err) {
rnbd_clt_err_rl(dev, "RTRS failed to transfer IO, err: %d\n",
err);
return err;
cpu_q = get_cpu_ptr(sess->cpu_queues);
spin_lock_irqsave(&cpu_q->requeue_lock, flags);
- if (likely(!test_and_set_bit_lock(0, &q->in_list))) {
+ if (!test_and_set_bit_lock(0, &q->in_list)) {
if (WARN_ON(!list_empty(&q->requeue_list)))
goto unlock;
*/
smp_mb__before_atomic();
}
- if (likely(atomic_read(&sess->busy))) {
+ if (atomic_read(&sess->busy)) {
list_add_tail(&q->requeue_list, &cpu_q->requeue_list);
} else {
/* Very unlikely, but possible: busy counter was
if (delay != RNBD_DELAY_IFBUSY)
blk_mq_delay_run_hw_queue(hctx, delay);
- else if (unlikely(!rnbd_clt_dev_add_to_requeue(dev, q)))
+ else if (!rnbd_clt_dev_add_to_requeue(dev, q))
/*
* If session is not busy we have to restart
* the queue ourselves.
int err;
blk_status_t ret = BLK_STS_IOERR;
- if (unlikely(dev->dev_state != DEV_STATE_MAPPED))
+ if (dev->dev_state != DEV_STATE_MAPPED)
return BLK_STS_IOERR;
iu->permit = rnbd_get_permit(dev->sess, RTRS_IO_CON,
RTRS_PERMIT_NOWAIT);
- if (unlikely(!iu->permit)) {
+ if (!iu->permit) {
rnbd_clt_dev_kick_mq_queue(dev, hctx, RNBD_DELAY_IFBUSY);
return BLK_STS_RESOURCE;
}
iu->sgt.sgl = iu->first_sgl;
err = sg_alloc_table_chained(&iu->sgt,
/* Even-if the request has no segment,
- * sglist must have one entry at least */
+ * sglist must have one entry at least.
+ */
blk_rq_nr_phys_segments(rq) ? : 1,
iu->sgt.sgl,
RNBD_INLINE_SG_CNT);
blk_mq_start_request(rq);
err = rnbd_client_xfer_request(dev, rq, iu);
- if (likely(err == 0))
+ if (err == 0)
return BLK_STS_OK;
- if (unlikely(err == -EAGAIN || err == -ENOMEM)) {
+ if (err == -EAGAIN || err == -ENOMEM) {
rnbd_clt_dev_kick_mq_queue(dev, hctx, 10/*ms*/);
ret = BLK_STS_RESOURCE;
}
err = PTR_ERR(sess->rtrs);
goto wake_up_and_put;
}
- rtrs_clt_query(sess->rtrs, &attrs);
+
+ err = rtrs_clt_query(sess->rtrs, &attrs);
+ if (err)
+ goto close_rtrs;
+
sess->max_io_size = attrs.max_io_size;
sess->queue_depth = attrs.queue_depth;
sess->nr_poll_queues = nr_poll_queues;
struct rnbd_clt_dev *dev;
int ret;
- if (unlikely(exists_devpath(pathname, sessname)))
+ if (exists_devpath(pathname, sessname))
return ERR_PTR(-EEXIST);
sess = find_and_get_or_create_sess(sessname, paths, path_cnt, port_nr, nr_poll_queues);
DECLARE_BITMAP(cpu_queues_bm, NR_CPUS);
int __percpu *cpu_rr; /* per-cpu var for CPU round-robin */
atomic_t busy;
- int queue_depth;
+ size_t queue_depth;
u32 max_io_size;
struct blk_mq_tag_set tag_set;
u32 nr_poll_queues;
rcu_read_lock();
sess_dev = xa_load(&srv_sess->index_idr, dev_id);
- if (likely(sess_dev))
+ if (sess_dev)
ret = kref_get_unless_zero(&sess_dev->kref);
rcu_read_unlock();
memory.
When in doubt, say "Y".
-config DEVKMEM
- bool "/dev/kmem virtual device support"
- # On arm64, VMALLOC_START < PAGE_OFFSET, which confuses kmem read/write
- depends on !ARM64
- help
- Say Y here if you want to support the /dev/kmem device. The
- /dev/kmem device is rarely used, but can be used for certain
- kind of kernel debugging operations.
- When in doubt, say "N".
-
config NVRAM
tristate "/dev/nvram support"
depends on X86 || HAVE_ARCH_NVRAM_OPS
return 0;
}
-static int mmap_kmem(struct file *file, struct vm_area_struct *vma)
-{
- unsigned long pfn;
-
- /* Turn a kernel-virtual address into a physical page frame */
- pfn = __pa((u64)vma->vm_pgoff << PAGE_SHIFT) >> PAGE_SHIFT;
-
- /*
- * RED-PEN: on some architectures there is more mapped memory than
- * available in mem_map which pfn_valid checks for. Perhaps should add a
- * new macro here.
- *
- * RED-PEN: vmalloc is not supported right now.
- */
- if (!pfn_valid(pfn))
- return -EIO;
-
- vma->vm_pgoff = pfn;
- return mmap_mem(file, vma);
-}
-
-/*
- * This function reads the *virtual* memory as seen by the kernel.
- */
-static ssize_t read_kmem(struct file *file, char __user *buf,
- size_t count, loff_t *ppos)
-{
- unsigned long p = *ppos;
- ssize_t low_count, read, sz;
- char *kbuf; /* k-addr because vread() takes vmlist_lock rwlock */
- int err = 0;
-
- read = 0;
- if (p < (unsigned long) high_memory) {
- low_count = count;
- if (count > (unsigned long)high_memory - p)
- low_count = (unsigned long)high_memory - p;
-
-#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
- /* we don't have page 0 mapped on sparc and m68k.. */
- if (p < PAGE_SIZE && low_count > 0) {
- sz = size_inside_page(p, low_count);
- if (clear_user(buf, sz))
- return -EFAULT;
- buf += sz;
- p += sz;
- read += sz;
- low_count -= sz;
- count -= sz;
- }
-#endif
- while (low_count > 0) {
- sz = size_inside_page(p, low_count);
-
- /*
- * On ia64 if a page has been mapped somewhere as
- * uncached, then it must also be accessed uncached
- * by the kernel or data corruption may occur
- */
- kbuf = xlate_dev_kmem_ptr((void *)p);
- if (!virt_addr_valid(kbuf))
- return -ENXIO;
-
- if (copy_to_user(buf, kbuf, sz))
- return -EFAULT;
- buf += sz;
- p += sz;
- read += sz;
- low_count -= sz;
- count -= sz;
- if (should_stop_iteration()) {
- count = 0;
- break;
- }
- }
- }
-
- if (count > 0) {
- kbuf = (char *)__get_free_page(GFP_KERNEL);
- if (!kbuf)
- return -ENOMEM;
- while (count > 0) {
- sz = size_inside_page(p, count);
- if (!is_vmalloc_or_module_addr((void *)p)) {
- err = -ENXIO;
- break;
- }
- sz = vread(kbuf, (char *)p, sz);
- if (!sz)
- break;
- if (copy_to_user(buf, kbuf, sz)) {
- err = -EFAULT;
- break;
- }
- count -= sz;
- buf += sz;
- read += sz;
- p += sz;
- if (should_stop_iteration())
- break;
- }
- free_page((unsigned long)kbuf);
- }
- *ppos = p;
- return read ? read : err;
-}
-
-
-static ssize_t do_write_kmem(unsigned long p, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- ssize_t written, sz;
- unsigned long copied;
-
- written = 0;
-#ifdef __ARCH_HAS_NO_PAGE_ZERO_MAPPED
- /* we don't have page 0 mapped on sparc and m68k.. */
- if (p < PAGE_SIZE) {
- sz = size_inside_page(p, count);
- /* Hmm. Do something? */
- buf += sz;
- p += sz;
- count -= sz;
- written += sz;
- }
-#endif
-
- while (count > 0) {
- void *ptr;
-
- sz = size_inside_page(p, count);
-
- /*
- * On ia64 if a page has been mapped somewhere as uncached, then
- * it must also be accessed uncached by the kernel or data
- * corruption may occur.
- */
- ptr = xlate_dev_kmem_ptr((void *)p);
- if (!virt_addr_valid(ptr))
- return -ENXIO;
-
- copied = copy_from_user(ptr, buf, sz);
- if (copied) {
- written += sz - copied;
- if (written)
- break;
- return -EFAULT;
- }
- buf += sz;
- p += sz;
- count -= sz;
- written += sz;
- if (should_stop_iteration())
- break;
- }
-
- *ppos += written;
- return written;
-}
-
-/*
- * This function writes to the *virtual* memory as seen by the kernel.
- */
-static ssize_t write_kmem(struct file *file, const char __user *buf,
- size_t count, loff_t *ppos)
-{
- unsigned long p = *ppos;
- ssize_t wrote = 0;
- ssize_t virtr = 0;
- char *kbuf; /* k-addr because vwrite() takes vmlist_lock rwlock */
- int err = 0;
-
- if (p < (unsigned long) high_memory) {
- unsigned long to_write = min_t(unsigned long, count,
- (unsigned long)high_memory - p);
- wrote = do_write_kmem(p, buf, to_write, ppos);
- if (wrote != to_write)
- return wrote;
- p += wrote;
- buf += wrote;
- count -= wrote;
- }
-
- if (count > 0) {
- kbuf = (char *)__get_free_page(GFP_KERNEL);
- if (!kbuf)
- return wrote ? wrote : -ENOMEM;
- while (count > 0) {
- unsigned long sz = size_inside_page(p, count);
- unsigned long n;
-
- if (!is_vmalloc_or_module_addr((void *)p)) {
- err = -ENXIO;
- break;
- }
- n = copy_from_user(kbuf, buf, sz);
- if (n) {
- err = -EFAULT;
- break;
- }
- vwrite(kbuf, (char *)p, sz);
- count -= sz;
- buf += sz;
- virtr += sz;
- p += sz;
- if (should_stop_iteration())
- break;
- }
- free_page((unsigned long)kbuf);
- }
-
- *ppos = p;
- return virtr + wrote ? : err;
-}
-
static ssize_t read_port(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
#define write_zero write_null
#define write_iter_zero write_iter_null
#define open_mem open_port
-#define open_kmem open_mem
static const struct file_operations __maybe_unused mem_fops = {
.llseek = memory_lseek,
#endif
};
-static const struct file_operations __maybe_unused kmem_fops = {
- .llseek = memory_lseek,
- .read = read_kmem,
- .write = write_kmem,
- .mmap = mmap_kmem,
- .open = open_kmem,
-#ifndef CONFIG_MMU
- .get_unmapped_area = get_unmapped_area_mem,
- .mmap_capabilities = memory_mmap_capabilities,
-#endif
-};
-
static const struct file_operations null_fops = {
.llseek = null_lseek,
.read = read_null,
} devlist[] = {
#ifdef CONFIG_DEVMEM
[DEVMEM_MINOR] = { "mem", 0, &mem_fops, FMODE_UNSIGNED_OFFSET },
-#endif
-#ifdef CONFIG_DEVKMEM
- [2] = { "kmem", 0, &kmem_fops, FMODE_UNSIGNED_OFFSET },
#endif
[3] = { "null", 0666, &null_fops, 0 },
#ifdef CONFIG_DEVPORT
config CLK_SIFIVE_PRCI
bool "PRCI driver for SiFive SoCs"
+ select RESET_CONTROLLER
+ select RESET_SIMPLE
select CLK_ANALOGBITS_WRPLL_CLN28HPC
help
Supports the Power Reset Clock interface (PRCI) IP block found in
.recalc_rate = sifive_prci_hfpclkplldiv_recalc_rate,
};
+static const struct clk_ops sifive_fu740_prci_pcie_aux_clk_ops = {
+ .enable = sifive_prci_pcie_aux_clock_enable,
+ .disable = sifive_prci_pcie_aux_clock_disable,
+ .is_enabled = sifive_prci_pcie_aux_clock_is_enabled,
+};
+
/* List of clock controls provided by the PRCI */
struct __prci_clock __prci_init_clocks_fu740[] = {
[PRCI_CLK_COREPLL] = {
.parent_name = "hfpclkpll",
.ops = &sifive_fu740_prci_hfpclkplldiv_clk_ops,
},
+ [PRCI_CLK_PCIE_AUX] = {
+ .name = "pcie_aux",
+ .parent_name = "hfclk",
+ .ops = &sifive_fu740_prci_pcie_aux_clk_ops,
+ },
};
#include "sifive-prci.h"
-#define NUM_CLOCK_FU740 8
+#define NUM_CLOCK_FU740 9
extern struct __prci_clock __prci_init_clocks_fu740[NUM_CLOCK_FU740];
r = __prci_readl(pd, PRCI_HFPCLKPLLSEL_OFFSET); /* barrier */
}
+/* PCIE AUX clock APIs for enable, disable. */
+int sifive_prci_pcie_aux_clock_is_enabled(struct clk_hw *hw)
+{
+ struct __prci_clock *pc = clk_hw_to_prci_clock(hw);
+ struct __prci_data *pd = pc->pd;
+ u32 r;
+
+ r = __prci_readl(pd, PRCI_PCIE_AUX_OFFSET);
+
+ if (r & PRCI_PCIE_AUX_EN_MASK)
+ return 1;
+ else
+ return 0;
+}
+
+int sifive_prci_pcie_aux_clock_enable(struct clk_hw *hw)
+{
+ struct __prci_clock *pc = clk_hw_to_prci_clock(hw);
+ struct __prci_data *pd = pc->pd;
+ u32 r __maybe_unused;
+
+ if (sifive_prci_pcie_aux_clock_is_enabled(hw))
+ return 0;
+
+ __prci_writel(1, PRCI_PCIE_AUX_OFFSET, pd);
+ r = __prci_readl(pd, PRCI_PCIE_AUX_OFFSET); /* barrier */
+
+ return 0;
+}
+
+void sifive_prci_pcie_aux_clock_disable(struct clk_hw *hw)
+{
+ struct __prci_clock *pc = clk_hw_to_prci_clock(hw);
+ struct __prci_data *pd = pc->pd;
+ u32 r __maybe_unused;
+
+ __prci_writel(0, PRCI_PCIE_AUX_OFFSET, pd);
+ r = __prci_readl(pd, PRCI_PCIE_AUX_OFFSET); /* barrier */
+
+}
+
/**
* __prci_register_clocks() - register clock controls in the PRCI
* @dev: Linux struct device
if (IS_ERR(pd->va))
return PTR_ERR(pd->va);
+ pd->reset.rcdev.owner = THIS_MODULE;
+ pd->reset.rcdev.nr_resets = PRCI_RST_NR;
+ pd->reset.rcdev.ops = &reset_simple_ops;
+ pd->reset.rcdev.of_node = pdev->dev.of_node;
+ pd->reset.active_low = true;
+ pd->reset.membase = pd->va + PRCI_DEVICESRESETREG_OFFSET;
+ spin_lock_init(&pd->reset.lock);
+
+ r = devm_reset_controller_register(&pdev->dev, &pd->reset.rcdev);
+ if (r) {
+ dev_err(dev, "could not register reset controller: %d\n", r);
+ return r;
+ }
r = __prci_register_clocks(dev, pd, desc);
if (r) {
dev_err(dev, "could not register clocks: %d\n", r);
#include <linux/clk/analogbits-wrpll-cln28hpc.h>
#include <linux/clk-provider.h>
+#include <linux/reset/reset-simple.h>
#include <linux/platform_device.h>
/*
#define PRCI_DDRPLLCFG1_CKE_SHIFT 31
#define PRCI_DDRPLLCFG1_CKE_MASK (0x1 << PRCI_DDRPLLCFG1_CKE_SHIFT)
+/* PCIEAUX */
+#define PRCI_PCIE_AUX_OFFSET 0x14
+#define PRCI_PCIE_AUX_EN_SHIFT 0
+#define PRCI_PCIE_AUX_EN_MASK (0x1 << PRCI_PCIE_AUX_EN_SHIFT)
+
/* GEMGXLPLLCFG0 */
#define PRCI_GEMGXLPLLCFG0_OFFSET 0x1c
#define PRCI_GEMGXLPLLCFG0_DIVR_SHIFT 0
#define PRCI_DEVICESRESETREG_CHIPLINK_RST_N_MASK \
(0x1 << PRCI_DEVICESRESETREG_CHIPLINK_RST_N_SHIFT)
+#define PRCI_RST_NR 7
+
/* CLKMUXSTATUSREG */
#define PRCI_CLKMUXSTATUSREG_OFFSET 0x2c
#define PRCI_CLKMUXSTATUSREG_TLCLKSEL_STATUS_SHIFT 1
*/
struct __prci_data {
void __iomem *va;
+ struct reset_simple_data reset;
struct clk_hw_onecell_data hw_clks;
};
unsigned long sifive_prci_hfpclkplldiv_recalc_rate(struct clk_hw *hw,
unsigned long parent_rate);
+int sifive_prci_pcie_aux_clock_is_enabled(struct clk_hw *hw);
+int sifive_prci_pcie_aux_clock_enable(struct clk_hw *hw);
+void sifive_prci_pcie_aux_clock_disable(struct clk_hw *hw);
+
#endif /* __SIFIVE_CLK_SIFIVE_PRCI_H */
depends on PCI_PASID
depends on PCI_IOV
+config INTEL_IDXD_PERFMON
+ bool "Intel Data Accelerators performance monitor support"
+ depends on INTEL_IDXD
+ help
+ Enable performance monitor (pmu) support for the Intel(R)
+ data accelerators present in Intel Xeon CPU. With this
+ enabled, perf can be used to monitor the DSA (Intel Data
+ Streaming Accelerator) events described in the Intel DSA
+ spec.
+
+ If unsure, say N.
+
config INTEL_IOATDMA
tristate "Intel I/OAT DMA support"
depends on PCI && X86_64
return test_bit(AT_XDMAC_CHAN_IS_PAUSED, &atchan->status);
}
-static inline int at_xdmac_csize(u32 maxburst)
-{
- int csize;
-
- csize = ffs(maxburst) - 1;
- if (csize > 4)
- csize = -EINVAL;
-
- return csize;
-};
-
static inline bool at_xdmac_chan_is_peripheral_xfer(u32 cfg)
{
return cfg & AT_XDMAC_CC_TYPE_PER_TRAN;
* - Even chunks originate CB equal to 1
*/
chunk->cb = !(desc->chunks_alloc % 2);
- chunk->ll_region.paddr = dw->ll_region.paddr + chan->ll_off;
- chunk->ll_region.vaddr = dw->ll_region.vaddr + chan->ll_off;
+ if (chan->dir == EDMA_DIR_WRITE) {
+ chunk->ll_region.paddr = dw->ll_region_wr[chan->id].paddr;
+ chunk->ll_region.vaddr = dw->ll_region_wr[chan->id].vaddr;
+ } else {
+ chunk->ll_region.paddr = dw->ll_region_rd[chan->id].paddr;
+ chunk->ll_region.vaddr = dw->ll_region_rd[chan->id].vaddr;
+ }
if (desc->chunk) {
/* Create and add new element into the linked list */
struct dw_edma_chunk *chunk;
struct dw_edma_burst *burst;
struct dw_edma_desc *desc;
- u32 cnt;
+ u32 cnt = 0;
int i;
if (!chan->configured)
return NULL;
switch (chan->config.direction) {
- case DMA_DEV_TO_MEM: /* local dma */
+ case DMA_DEV_TO_MEM: /* local DMA */
if (dir == DMA_DEV_TO_MEM && chan->dir == EDMA_DIR_READ)
break;
return NULL;
- case DMA_MEM_TO_DEV: /* local dma */
+ case DMA_MEM_TO_DEV: /* local DMA */
if (dir == DMA_MEM_TO_DEV && chan->dir == EDMA_DIR_WRITE)
break;
return NULL;
- default: /* remote dma */
+ default: /* remote DMA */
if (dir == DMA_MEM_TO_DEV && chan->dir == EDMA_DIR_READ)
break;
if (dir == DMA_DEV_TO_MEM && chan->dir == EDMA_DIR_WRITE)
return NULL;
}
- if (xfer->cyclic) {
+ if (xfer->type == EDMA_XFER_CYCLIC) {
if (!xfer->xfer.cyclic.len || !xfer->xfer.cyclic.cnt)
return NULL;
- } else {
+ } else if (xfer->type == EDMA_XFER_SCATTER_GATHER) {
if (xfer->xfer.sg.len < 1)
return NULL;
+ } else if (xfer->type == EDMA_XFER_INTERLEAVED) {
+ if (!xfer->xfer.il->numf)
+ return NULL;
+ if (xfer->xfer.il->numf > 0 && xfer->xfer.il->frame_size > 0)
+ return NULL;
+ } else {
+ return NULL;
}
desc = dw_edma_alloc_desc(chan);
if (unlikely(!chunk))
goto err_alloc;
- src_addr = chan->config.src_addr;
- dst_addr = chan->config.dst_addr;
+ if (xfer->type == EDMA_XFER_INTERLEAVED) {
+ src_addr = xfer->xfer.il->src_start;
+ dst_addr = xfer->xfer.il->dst_start;
+ } else {
+ src_addr = chan->config.src_addr;
+ dst_addr = chan->config.dst_addr;
+ }
- if (xfer->cyclic) {
+ if (xfer->type == EDMA_XFER_CYCLIC) {
cnt = xfer->xfer.cyclic.cnt;
- } else {
+ } else if (xfer->type == EDMA_XFER_SCATTER_GATHER) {
cnt = xfer->xfer.sg.len;
sg = xfer->xfer.sg.sgl;
+ } else if (xfer->type == EDMA_XFER_INTERLEAVED) {
+ if (xfer->xfer.il->numf > 0)
+ cnt = xfer->xfer.il->numf;
+ else
+ cnt = xfer->xfer.il->frame_size;
}
for (i = 0; i < cnt; i++) {
- if (!xfer->cyclic && !sg)
+ if (xfer->type == EDMA_XFER_SCATTER_GATHER && !sg)
break;
if (chunk->bursts_alloc == chan->ll_max) {
if (unlikely(!burst))
goto err_alloc;
- if (xfer->cyclic)
+ if (xfer->type == EDMA_XFER_CYCLIC)
burst->sz = xfer->xfer.cyclic.len;
- else
+ else if (xfer->type == EDMA_XFER_SCATTER_GATHER)
burst->sz = sg_dma_len(sg);
+ else if (xfer->type == EDMA_XFER_INTERLEAVED)
+ burst->sz = xfer->xfer.il->sgl[i].size;
chunk->ll_region.sz += burst->sz;
desc->alloc_sz += burst->sz;
if (chan->dir == EDMA_DIR_WRITE) {
burst->sar = src_addr;
- if (xfer->cyclic) {
+ if (xfer->type == EDMA_XFER_CYCLIC) {
burst->dar = xfer->xfer.cyclic.paddr;
- } else {
- burst->dar = dst_addr;
+ } else if (xfer->type == EDMA_XFER_SCATTER_GATHER) {
+ src_addr += sg_dma_len(sg);
+ burst->dar = sg_dma_address(sg);
/* Unlike the typical assumption by other
* drivers/IPs the peripheral memory isn't
* a FIFO memory, in this case, it's a
}
} else {
burst->dar = dst_addr;
- if (xfer->cyclic) {
+ if (xfer->type == EDMA_XFER_CYCLIC) {
burst->sar = xfer->xfer.cyclic.paddr;
- } else {
- burst->sar = src_addr;
+ } else if (xfer->type == EDMA_XFER_SCATTER_GATHER) {
+ dst_addr += sg_dma_len(sg);
+ burst->sar = sg_dma_address(sg);
/* Unlike the typical assumption by other
* drivers/IPs the peripheral memory isn't
* a FIFO memory, in this case, it's a
}
}
- if (!xfer->cyclic) {
- src_addr += sg_dma_len(sg);
- dst_addr += sg_dma_len(sg);
+ if (xfer->type == EDMA_XFER_SCATTER_GATHER) {
sg = sg_next(sg);
+ } else if (xfer->type == EDMA_XFER_INTERLEAVED &&
+ xfer->xfer.il->frame_size > 0) {
+ struct dma_interleaved_template *il = xfer->xfer.il;
+ struct data_chunk *dc = &il->sgl[i];
+
+ if (il->src_sgl) {
+ src_addr += burst->sz;
+ src_addr += dmaengine_get_src_icg(il, dc);
+ }
+
+ if (il->dst_sgl) {
+ dst_addr += burst->sz;
+ dst_addr += dmaengine_get_dst_icg(il, dc);
+ }
}
}
xfer.xfer.sg.sgl = sgl;
xfer.xfer.sg.len = len;
xfer.flags = flags;
- xfer.cyclic = false;
+ xfer.type = EDMA_XFER_SCATTER_GATHER;
return dw_edma_device_transfer(&xfer);
}
xfer.xfer.cyclic.len = len;
xfer.xfer.cyclic.cnt = count;
xfer.flags = flags;
- xfer.cyclic = true;
+ xfer.type = EDMA_XFER_CYCLIC;
+
+ return dw_edma_device_transfer(&xfer);
+}
+
+static struct dma_async_tx_descriptor *
+dw_edma_device_prep_interleaved_dma(struct dma_chan *dchan,
+ struct dma_interleaved_template *ilt,
+ unsigned long flags)
+{
+ struct dw_edma_transfer xfer;
+
+ xfer.dchan = dchan;
+ xfer.direction = ilt->dir;
+ xfer.xfer.il = ilt;
+ xfer.flags = flags;
+ xfer.type = EDMA_XFER_INTERLEAVED;
return dw_edma_device_transfer(&xfer);
}
struct device *dev = chip->dev;
struct dw_edma *dw = chip->dw;
struct dw_edma_chan *chan;
- size_t ll_chunk, dt_chunk;
struct dw_edma_irq *irq;
struct dma_device *dma;
- u32 i, j, cnt, ch_cnt;
u32 alloc, off_alloc;
+ u32 i, j, cnt;
int err = 0;
u32 pos;
- ch_cnt = dw->wr_ch_cnt + dw->rd_ch_cnt;
- ll_chunk = dw->ll_region.sz;
- dt_chunk = dw->dt_region.sz;
-
- /* Calculate linked list chunk for each channel */
- ll_chunk /= roundup_pow_of_two(ch_cnt);
-
- /* Calculate linked list chunk for each channel */
- dt_chunk /= roundup_pow_of_two(ch_cnt);
-
if (write) {
i = 0;
cnt = dw->wr_ch_cnt;
chan->request = EDMA_REQ_NONE;
chan->status = EDMA_ST_IDLE;
- chan->ll_off = (ll_chunk * i);
- chan->ll_max = (ll_chunk / EDMA_LL_SZ) - 1;
-
- chan->dt_off = (dt_chunk * i);
+ if (write)
+ chan->ll_max = (dw->ll_region_wr[j].sz / EDMA_LL_SZ);
+ else
+ chan->ll_max = (dw->ll_region_rd[j].sz / EDMA_LL_SZ);
+ chan->ll_max -= 1;
- dev_vdbg(dev, "L. List:\tChannel %s[%u] off=0x%.8lx, max_cnt=%u\n",
- write ? "write" : "read", j,
- chan->ll_off, chan->ll_max);
+ dev_vdbg(dev, "L. List:\tChannel %s[%u] max_cnt=%u\n",
+ write ? "write" : "read", j, chan->ll_max);
if (dw->nr_irqs == 1)
pos = 0;
chan->vc.desc_free = vchan_free_desc;
vchan_init(&chan->vc, dma);
- dt_region->paddr = dw->dt_region.paddr + chan->dt_off;
- dt_region->vaddr = dw->dt_region.vaddr + chan->dt_off;
- dt_region->sz = dt_chunk;
-
- dev_vdbg(dev, "Data:\tChannel %s[%u] off=0x%.8lx\n",
- write ? "write" : "read", j, chan->dt_off);
+ if (write) {
+ dt_region->paddr = dw->dt_region_wr[j].paddr;
+ dt_region->vaddr = dw->dt_region_wr[j].vaddr;
+ dt_region->sz = dw->dt_region_wr[j].sz;
+ } else {
+ dt_region->paddr = dw->dt_region_rd[j].paddr;
+ dt_region->vaddr = dw->dt_region_rd[j].vaddr;
+ dt_region->sz = dw->dt_region_rd[j].sz;
+ }
dw_edma_v0_core_device_config(chan);
}
dma_cap_set(DMA_SLAVE, dma->cap_mask);
dma_cap_set(DMA_CYCLIC, dma->cap_mask);
dma_cap_set(DMA_PRIVATE, dma->cap_mask);
+ dma_cap_set(DMA_INTERLEAVE, dma->cap_mask);
dma->directions = BIT(write ? DMA_DEV_TO_MEM : DMA_MEM_TO_DEV);
dma->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
dma->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
dma->device_tx_status = dw_edma_device_tx_status;
dma->device_prep_slave_sg = dw_edma_device_prep_slave_sg;
dma->device_prep_dma_cyclic = dw_edma_device_prep_dma_cyclic;
+ dma->device_prep_interleaved_dma = dw_edma_device_prep_interleaved_dma;
dma_set_max_seg_size(dma->dev, U32_MAX);
raw_spin_lock_init(&dw->lock);
- /* Find out how many write channels are supported by hardware */
- dw->wr_ch_cnt = dw_edma_v0_core_ch_count(dw, EDMA_DIR_WRITE);
- if (!dw->wr_ch_cnt)
- return -EINVAL;
+ dw->wr_ch_cnt = min_t(u16, dw->wr_ch_cnt,
+ dw_edma_v0_core_ch_count(dw, EDMA_DIR_WRITE));
+ dw->wr_ch_cnt = min_t(u16, dw->wr_ch_cnt, EDMA_MAX_WR_CH);
- /* Find out how many read channels are supported by hardware */
- dw->rd_ch_cnt = dw_edma_v0_core_ch_count(dw, EDMA_DIR_READ);
- if (!dw->rd_ch_cnt)
+ dw->rd_ch_cnt = min_t(u16, dw->rd_ch_cnt,
+ dw_edma_v0_core_ch_count(dw, EDMA_DIR_READ));
+ dw->rd_ch_cnt = min_t(u16, dw->rd_ch_cnt, EDMA_MAX_RD_CH);
+
+ if (!dw->wr_ch_cnt && !dw->rd_ch_cnt)
return -EINVAL;
dev_vdbg(dev, "Channels:\twrite=%d, read=%d\n",
/* Power management */
pm_runtime_disable(dev);
+ /* Deregister eDMA device */
+ dma_async_device_unregister(&dw->wr_edma);
list_for_each_entry_safe(chan, _chan, &dw->wr_edma.channels,
vc.chan.device_node) {
- list_del(&chan->vc.chan.device_node);
tasklet_kill(&chan->vc.task);
+ list_del(&chan->vc.chan.device_node);
}
+ dma_async_device_unregister(&dw->rd_edma);
list_for_each_entry_safe(chan, _chan, &dw->rd_edma.channels,
vc.chan.device_node) {
- list_del(&chan->vc.chan.device_node);
tasklet_kill(&chan->vc.task);
+ list_del(&chan->vc.chan.device_node);
}
- /* Deregister eDMA device */
- dma_async_device_unregister(&dw->wr_edma);
- dma_async_device_unregister(&dw->rd_edma);
-
/* Turn debugfs off */
- dw_edma_v0_core_debugfs_off();
+ dw_edma_v0_core_debugfs_off(chip);
return 0;
}
#include "../virt-dma.h"
#define EDMA_LL_SZ 24
+#define EDMA_MAX_WR_CH 8
+#define EDMA_MAX_RD_CH 8
enum dw_edma_dir {
EDMA_DIR_WRITE = 0,
EDMA_DIR_READ
};
-enum dw_edma_mode {
- EDMA_MODE_LEGACY = 0,
- EDMA_MODE_UNROLL
+enum dw_edma_map_format {
+ EDMA_MF_EDMA_LEGACY = 0x0,
+ EDMA_MF_EDMA_UNROLL = 0x1,
+ EDMA_MF_HDMA_COMPAT = 0x5
};
enum dw_edma_request {
EDMA_ST_BUSY
};
+enum dw_edma_xfer_type {
+ EDMA_XFER_SCATTER_GATHER = 0,
+ EDMA_XFER_CYCLIC,
+ EDMA_XFER_INTERLEAVED
+};
+
struct dw_edma_chan;
struct dw_edma_chunk;
int id;
enum dw_edma_dir dir;
- off_t ll_off;
u32 ll_max;
- off_t dt_off;
-
struct msi_msg msi;
enum dw_edma_request request;
u16 rd_ch_cnt;
struct dw_edma_region rg_region; /* Registers */
- struct dw_edma_region ll_region; /* Linked list */
- struct dw_edma_region dt_region; /* Data */
+ struct dw_edma_region ll_region_wr[EDMA_MAX_WR_CH];
+ struct dw_edma_region ll_region_rd[EDMA_MAX_RD_CH];
+ struct dw_edma_region dt_region_wr[EDMA_MAX_WR_CH];
+ struct dw_edma_region dt_region_rd[EDMA_MAX_RD_CH];
struct dw_edma_irq *irq;
int nr_irqs;
- u32 version;
- enum dw_edma_mode mode;
+ enum dw_edma_map_format mf;
struct dw_edma_chan *chan;
const struct dw_edma_core_ops *ops;
raw_spinlock_t lock; /* Only for legacy */
+#ifdef CONFIG_DEBUG_FS
+ struct dentry *debugfs;
+#endif /* CONFIG_DEBUG_FS */
};
struct dw_edma_sg {
struct dw_edma_transfer {
struct dma_chan *dchan;
union dw_edma_xfer {
- struct dw_edma_sg sg;
- struct dw_edma_cyclic cyclic;
+ struct dw_edma_sg sg;
+ struct dw_edma_cyclic cyclic;
+ struct dma_interleaved_template *il;
} xfer;
enum dma_transfer_direction direction;
unsigned long flags;
- bool cyclic;
+ enum dw_edma_xfer_type type;
};
static inline
#include <linux/dma/edma.h>
#include <linux/pci-epf.h>
#include <linux/msi.h>
+#include <linux/bitfield.h>
#include "dw-edma-core.h"
+#define DW_PCIE_VSEC_DMA_ID 0x6
+#define DW_PCIE_VSEC_DMA_BAR GENMASK(10, 8)
+#define DW_PCIE_VSEC_DMA_MAP GENMASK(2, 0)
+#define DW_PCIE_VSEC_DMA_WR_CH GENMASK(9, 0)
+#define DW_PCIE_VSEC_DMA_RD_CH GENMASK(25, 16)
+
+#define DW_BLOCK(a, b, c) \
+ { \
+ .bar = a, \
+ .off = b, \
+ .sz = c, \
+ },
+
+struct dw_edma_block {
+ enum pci_barno bar;
+ off_t off;
+ size_t sz;
+};
+
struct dw_edma_pcie_data {
/* eDMA registers location */
- enum pci_barno rg_bar;
- off_t rg_off;
- size_t rg_sz;
+ struct dw_edma_block rg;
/* eDMA memory linked list location */
- enum pci_barno ll_bar;
- off_t ll_off;
- size_t ll_sz;
+ struct dw_edma_block ll_wr[EDMA_MAX_WR_CH];
+ struct dw_edma_block ll_rd[EDMA_MAX_RD_CH];
/* eDMA memory data location */
- enum pci_barno dt_bar;
- off_t dt_off;
- size_t dt_sz;
+ struct dw_edma_block dt_wr[EDMA_MAX_WR_CH];
+ struct dw_edma_block dt_rd[EDMA_MAX_RD_CH];
/* Other */
- u32 version;
- enum dw_edma_mode mode;
+ enum dw_edma_map_format mf;
u8 irqs;
+ u16 wr_ch_cnt;
+ u16 rd_ch_cnt;
};
static const struct dw_edma_pcie_data snps_edda_data = {
/* eDMA registers location */
- .rg_bar = BAR_0,
- .rg_off = 0x00001000, /* 4 Kbytes */
- .rg_sz = 0x00002000, /* 8 Kbytes */
+ .rg.bar = BAR_0,
+ .rg.off = 0x00001000, /* 4 Kbytes */
+ .rg.sz = 0x00002000, /* 8 Kbytes */
/* eDMA memory linked list location */
- .ll_bar = BAR_2,
- .ll_off = 0x00000000, /* 0 Kbytes */
- .ll_sz = 0x00800000, /* 8 Mbytes */
+ .ll_wr = {
+ /* Channel 0 - BAR 2, offset 0 Mbytes, size 2 Kbytes */
+ DW_BLOCK(BAR_2, 0x00000000, 0x00000800)
+ /* Channel 1 - BAR 2, offset 2 Mbytes, size 2 Kbytes */
+ DW_BLOCK(BAR_2, 0x00200000, 0x00000800)
+ },
+ .ll_rd = {
+ /* Channel 0 - BAR 2, offset 4 Mbytes, size 2 Kbytes */
+ DW_BLOCK(BAR_2, 0x00400000, 0x00000800)
+ /* Channel 1 - BAR 2, offset 6 Mbytes, size 2 Kbytes */
+ DW_BLOCK(BAR_2, 0x00600000, 0x00000800)
+ },
/* eDMA memory data location */
- .dt_bar = BAR_2,
- .dt_off = 0x00800000, /* 8 Mbytes */
- .dt_sz = 0x03800000, /* 56 Mbytes */
+ .dt_wr = {
+ /* Channel 0 - BAR 2, offset 8 Mbytes, size 2 Kbytes */
+ DW_BLOCK(BAR_2, 0x00800000, 0x00000800)
+ /* Channel 1 - BAR 2, offset 9 Mbytes, size 2 Kbytes */
+ DW_BLOCK(BAR_2, 0x00900000, 0x00000800)
+ },
+ .dt_rd = {
+ /* Channel 0 - BAR 2, offset 10 Mbytes, size 2 Kbytes */
+ DW_BLOCK(BAR_2, 0x00a00000, 0x00000800)
+ /* Channel 1 - BAR 2, offset 11 Mbytes, size 2 Kbytes */
+ DW_BLOCK(BAR_2, 0x00b00000, 0x00000800)
+ },
/* Other */
- .version = 0,
- .mode = EDMA_MODE_UNROLL,
+ .mf = EDMA_MF_EDMA_UNROLL,
.irqs = 1,
+ .wr_ch_cnt = 2,
+ .rd_ch_cnt = 2,
};
static int dw_edma_pcie_irq_vector(struct device *dev, unsigned int nr)
.irq_vector = dw_edma_pcie_irq_vector,
};
+static void dw_edma_pcie_get_vsec_dma_data(struct pci_dev *pdev,
+ struct dw_edma_pcie_data *pdata)
+{
+ u32 val, map;
+ u16 vsec;
+ u64 off;
+
+ vsec = pci_find_vsec_capability(pdev, PCI_VENDOR_ID_SYNOPSYS,
+ DW_PCIE_VSEC_DMA_ID);
+ if (!vsec)
+ return;
+
+ pci_read_config_dword(pdev, vsec + PCI_VNDR_HEADER, &val);
+ if (PCI_VNDR_HEADER_REV(val) != 0x00 ||
+ PCI_VNDR_HEADER_LEN(val) != 0x18)
+ return;
+
+ pci_dbg(pdev, "Detected PCIe Vendor-Specific Extended Capability DMA\n");
+ pci_read_config_dword(pdev, vsec + 0x8, &val);
+ map = FIELD_GET(DW_PCIE_VSEC_DMA_MAP, val);
+ if (map != EDMA_MF_EDMA_LEGACY &&
+ map != EDMA_MF_EDMA_UNROLL &&
+ map != EDMA_MF_HDMA_COMPAT)
+ return;
+
+ pdata->mf = map;
+ pdata->rg.bar = FIELD_GET(DW_PCIE_VSEC_DMA_BAR, val);
+
+ pci_read_config_dword(pdev, vsec + 0xc, &val);
+ pdata->wr_ch_cnt = min_t(u16, pdata->wr_ch_cnt,
+ FIELD_GET(DW_PCIE_VSEC_DMA_WR_CH, val));
+ pdata->rd_ch_cnt = min_t(u16, pdata->rd_ch_cnt,
+ FIELD_GET(DW_PCIE_VSEC_DMA_RD_CH, val));
+
+ pci_read_config_dword(pdev, vsec + 0x14, &val);
+ off = val;
+ pci_read_config_dword(pdev, vsec + 0x10, &val);
+ off <<= 32;
+ off |= val;
+ pdata->rg.off = off;
+}
+
static int dw_edma_pcie_probe(struct pci_dev *pdev,
const struct pci_device_id *pid)
{
- const struct dw_edma_pcie_data *pdata = (void *)pid->driver_data;
+ struct dw_edma_pcie_data *pdata = (void *)pid->driver_data;
+ struct dw_edma_pcie_data vsec_data;
struct device *dev = &pdev->dev;
struct dw_edma_chip *chip;
- int err, nr_irqs;
struct dw_edma *dw;
+ int err, nr_irqs;
+ int i, mask;
/* Enable PCI device */
err = pcim_enable_device(pdev);
return err;
}
+ memcpy(&vsec_data, pdata, sizeof(struct dw_edma_pcie_data));
+
+ /*
+ * Tries to find if exists a PCIe Vendor-Specific Extended Capability
+ * for the DMA, if one exists, then reconfigures it.
+ */
+ dw_edma_pcie_get_vsec_dma_data(pdev, &vsec_data);
+
/* Mapping PCI BAR regions */
- err = pcim_iomap_regions(pdev, BIT(pdata->rg_bar) |
- BIT(pdata->ll_bar) |
- BIT(pdata->dt_bar),
- pci_name(pdev));
+ mask = BIT(vsec_data.rg.bar);
+ for (i = 0; i < vsec_data.wr_ch_cnt; i++) {
+ mask |= BIT(vsec_data.ll_wr[i].bar);
+ mask |= BIT(vsec_data.dt_wr[i].bar);
+ }
+ for (i = 0; i < vsec_data.rd_ch_cnt; i++) {
+ mask |= BIT(vsec_data.ll_rd[i].bar);
+ mask |= BIT(vsec_data.dt_rd[i].bar);
+ }
+ err = pcim_iomap_regions(pdev, mask, pci_name(pdev));
if (err) {
pci_err(pdev, "eDMA BAR I/O remapping failed\n");
return err;
return -ENOMEM;
/* IRQs allocation */
- nr_irqs = pci_alloc_irq_vectors(pdev, 1, pdata->irqs,
+ nr_irqs = pci_alloc_irq_vectors(pdev, 1, vsec_data.irqs,
PCI_IRQ_MSI | PCI_IRQ_MSIX);
if (nr_irqs < 1) {
pci_err(pdev, "fail to alloc IRQ vector (number of IRQs=%u)\n",
chip->id = pdev->devfn;
chip->irq = pdev->irq;
- dw->rg_region.vaddr = pcim_iomap_table(pdev)[pdata->rg_bar];
- dw->rg_region.vaddr += pdata->rg_off;
- dw->rg_region.paddr = pdev->resource[pdata->rg_bar].start;
- dw->rg_region.paddr += pdata->rg_off;
- dw->rg_region.sz = pdata->rg_sz;
-
- dw->ll_region.vaddr = pcim_iomap_table(pdev)[pdata->ll_bar];
- dw->ll_region.vaddr += pdata->ll_off;
- dw->ll_region.paddr = pdev->resource[pdata->ll_bar].start;
- dw->ll_region.paddr += pdata->ll_off;
- dw->ll_region.sz = pdata->ll_sz;
-
- dw->dt_region.vaddr = pcim_iomap_table(pdev)[pdata->dt_bar];
- dw->dt_region.vaddr += pdata->dt_off;
- dw->dt_region.paddr = pdev->resource[pdata->dt_bar].start;
- dw->dt_region.paddr += pdata->dt_off;
- dw->dt_region.sz = pdata->dt_sz;
-
- dw->version = pdata->version;
- dw->mode = pdata->mode;
+ dw->mf = vsec_data.mf;
dw->nr_irqs = nr_irqs;
dw->ops = &dw_edma_pcie_core_ops;
+ dw->wr_ch_cnt = vsec_data.wr_ch_cnt;
+ dw->rd_ch_cnt = vsec_data.rd_ch_cnt;
- /* Debug info */
- pci_dbg(pdev, "Version:\t%u\n", dw->version);
+ dw->rg_region.vaddr = pcim_iomap_table(pdev)[vsec_data.rg.bar];
+ if (!dw->rg_region.vaddr)
+ return -ENOMEM;
+
+ dw->rg_region.vaddr += vsec_data.rg.off;
+ dw->rg_region.paddr = pdev->resource[vsec_data.rg.bar].start;
+ dw->rg_region.paddr += vsec_data.rg.off;
+ dw->rg_region.sz = vsec_data.rg.sz;
+
+ for (i = 0; i < dw->wr_ch_cnt; i++) {
+ struct dw_edma_region *ll_region = &dw->ll_region_wr[i];
+ struct dw_edma_region *dt_region = &dw->dt_region_wr[i];
+ struct dw_edma_block *ll_block = &vsec_data.ll_wr[i];
+ struct dw_edma_block *dt_block = &vsec_data.dt_wr[i];
+
+ ll_region->vaddr = pcim_iomap_table(pdev)[ll_block->bar];
+ if (!ll_region->vaddr)
+ return -ENOMEM;
+
+ ll_region->vaddr += ll_block->off;
+ ll_region->paddr = pdev->resource[ll_block->bar].start;
+ ll_region->paddr += ll_block->off;
+ ll_region->sz = ll_block->sz;
+
+ dt_region->vaddr = pcim_iomap_table(pdev)[dt_block->bar];
+ if (!dt_region->vaddr)
+ return -ENOMEM;
+
+ dt_region->vaddr += dt_block->off;
+ dt_region->paddr = pdev->resource[dt_block->bar].start;
+ dt_region->paddr += dt_block->off;
+ dt_region->sz = dt_block->sz;
+ }
- pci_dbg(pdev, "Mode:\t%s\n",
- dw->mode == EDMA_MODE_LEGACY ? "Legacy" : "Unroll");
+ for (i = 0; i < dw->rd_ch_cnt; i++) {
+ struct dw_edma_region *ll_region = &dw->ll_region_rd[i];
+ struct dw_edma_region *dt_region = &dw->dt_region_rd[i];
+ struct dw_edma_block *ll_block = &vsec_data.ll_rd[i];
+ struct dw_edma_block *dt_block = &vsec_data.dt_rd[i];
+
+ ll_region->vaddr = pcim_iomap_table(pdev)[ll_block->bar];
+ if (!ll_region->vaddr)
+ return -ENOMEM;
+
+ ll_region->vaddr += ll_block->off;
+ ll_region->paddr = pdev->resource[ll_block->bar].start;
+ ll_region->paddr += ll_block->off;
+ ll_region->sz = ll_block->sz;
+
+ dt_region->vaddr = pcim_iomap_table(pdev)[dt_block->bar];
+ if (!dt_region->vaddr)
+ return -ENOMEM;
+
+ dt_region->vaddr += dt_block->off;
+ dt_region->paddr = pdev->resource[dt_block->bar].start;
+ dt_region->paddr += dt_block->off;
+ dt_region->sz = dt_block->sz;
+ }
+
+ /* Debug info */
+ if (dw->mf == EDMA_MF_EDMA_LEGACY)
+ pci_dbg(pdev, "Version:\teDMA Port Logic (0x%x)\n", dw->mf);
+ else if (dw->mf == EDMA_MF_EDMA_UNROLL)
+ pci_dbg(pdev, "Version:\teDMA Unroll (0x%x)\n", dw->mf);
+ else if (dw->mf == EDMA_MF_HDMA_COMPAT)
+ pci_dbg(pdev, "Version:\tHDMA Compatible (0x%x)\n", dw->mf);
+ else
+ pci_dbg(pdev, "Version:\tUnknown (0x%x)\n", dw->mf);
pci_dbg(pdev, "Registers:\tBAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n",
- pdata->rg_bar, pdata->rg_off, pdata->rg_sz,
+ vsec_data.rg.bar, vsec_data.rg.off, vsec_data.rg.sz,
dw->rg_region.vaddr, &dw->rg_region.paddr);
- pci_dbg(pdev, "L. List:\tBAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n",
- pdata->ll_bar, pdata->ll_off, pdata->ll_sz,
- dw->ll_region.vaddr, &dw->ll_region.paddr);
- pci_dbg(pdev, "Data:\tBAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n",
- pdata->dt_bar, pdata->dt_off, pdata->dt_sz,
- dw->dt_region.vaddr, &dw->dt_region.paddr);
+ for (i = 0; i < dw->wr_ch_cnt; i++) {
+ pci_dbg(pdev, "L. List:\tWRITE CH%.2u, BAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n",
+ i, vsec_data.ll_wr[i].bar,
+ vsec_data.ll_wr[i].off, dw->ll_region_wr[i].sz,
+ dw->ll_region_wr[i].vaddr, &dw->ll_region_wr[i].paddr);
+
+ pci_dbg(pdev, "Data:\tWRITE CH%.2u, BAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n",
+ i, vsec_data.dt_wr[i].bar,
+ vsec_data.dt_wr[i].off, dw->dt_region_wr[i].sz,
+ dw->dt_region_wr[i].vaddr, &dw->dt_region_wr[i].paddr);
+ }
+
+ for (i = 0; i < dw->rd_ch_cnt; i++) {
+ pci_dbg(pdev, "L. List:\tREAD CH%.2u, BAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n",
+ i, vsec_data.ll_rd[i].bar,
+ vsec_data.ll_rd[i].off, dw->ll_region_rd[i].sz,
+ dw->ll_region_rd[i].vaddr, &dw->ll_region_rd[i].paddr);
+
+ pci_dbg(pdev, "Data:\tREAD CH%.2u, BAR=%u, off=0x%.8lx, sz=0x%zx bytes, addr(v=%p, p=%pa)\n",
+ i, vsec_data.dt_rd[i].bar,
+ vsec_data.dt_rd[i].off, dw->dt_region_rd[i].sz,
+ dw->dt_region_rd[i].vaddr, &dw->dt_region_rd[i].paddr);
+ }
pci_dbg(pdev, "Nr. IRQs:\t%u\n", dw->nr_irqs);
return dw->rg_region.vaddr;
}
-#define SET(dw, name, value) \
+#define SET_32(dw, name, value) \
writel(value, &(__dw_regs(dw)->name))
-#define GET(dw, name) \
+#define GET_32(dw, name) \
readl(&(__dw_regs(dw)->name))
-#define SET_RW(dw, dir, name, value) \
+#define SET_RW_32(dw, dir, name, value) \
do { \
if ((dir) == EDMA_DIR_WRITE) \
- SET(dw, wr_##name, value); \
+ SET_32(dw, wr_##name, value); \
else \
- SET(dw, rd_##name, value); \
+ SET_32(dw, rd_##name, value); \
} while (0)
-#define GET_RW(dw, dir, name) \
+#define GET_RW_32(dw, dir, name) \
((dir) == EDMA_DIR_WRITE \
- ? GET(dw, wr_##name) \
- : GET(dw, rd_##name))
+ ? GET_32(dw, wr_##name) \
+ : GET_32(dw, rd_##name))
-#define SET_BOTH(dw, name, value) \
+#define SET_BOTH_32(dw, name, value) \
do { \
- SET(dw, wr_##name, value); \
- SET(dw, rd_##name, value); \
+ SET_32(dw, wr_##name, value); \
+ SET_32(dw, rd_##name, value); \
+ } while (0)
+
+#ifdef CONFIG_64BIT
+
+#define SET_64(dw, name, value) \
+ writeq(value, &(__dw_regs(dw)->name))
+
+#define GET_64(dw, name) \
+ readq(&(__dw_regs(dw)->name))
+
+#define SET_RW_64(dw, dir, name, value) \
+ do { \
+ if ((dir) == EDMA_DIR_WRITE) \
+ SET_64(dw, wr_##name, value); \
+ else \
+ SET_64(dw, rd_##name, value); \
+ } while (0)
+
+#define GET_RW_64(dw, dir, name) \
+ ((dir) == EDMA_DIR_WRITE \
+ ? GET_64(dw, wr_##name) \
+ : GET_64(dw, rd_##name))
+
+#define SET_BOTH_64(dw, name, value) \
+ do { \
+ SET_64(dw, wr_##name, value); \
+ SET_64(dw, rd_##name, value); \
+ } while (0)
+
+#endif /* CONFIG_64BIT */
+
+#define SET_COMPAT(dw, name, value) \
+ writel(value, &(__dw_regs(dw)->type.unroll.name))
+
+#define SET_RW_COMPAT(dw, dir, name, value) \
+ do { \
+ if ((dir) == EDMA_DIR_WRITE) \
+ SET_COMPAT(dw, wr_##name, value); \
+ else \
+ SET_COMPAT(dw, rd_##name, value); \
} while (0)
static inline struct dw_edma_v0_ch_regs __iomem *
__dw_ch_regs(struct dw_edma *dw, enum dw_edma_dir dir, u16 ch)
{
- if (dw->mode == EDMA_MODE_LEGACY)
+ if (dw->mf == EDMA_MF_EDMA_LEGACY)
return &(__dw_regs(dw)->type.legacy.ch);
if (dir == EDMA_DIR_WRITE)
static inline void writel_ch(struct dw_edma *dw, enum dw_edma_dir dir, u16 ch,
u32 value, void __iomem *addr)
{
- if (dw->mode == EDMA_MODE_LEGACY) {
+ if (dw->mf == EDMA_MF_EDMA_LEGACY) {
u32 viewport_sel;
unsigned long flags;
{
u32 value;
- if (dw->mode == EDMA_MODE_LEGACY) {
+ if (dw->mf == EDMA_MF_EDMA_LEGACY) {
u32 viewport_sel;
unsigned long flags;
return value;
}
-#define SET_CH(dw, dir, ch, name, value) \
+#define SET_CH_32(dw, dir, ch, name, value) \
writel_ch(dw, dir, ch, value, &(__dw_ch_regs(dw, dir, ch)->name))
-#define GET_CH(dw, dir, ch, name) \
+#define GET_CH_32(dw, dir, ch, name) \
readl_ch(dw, dir, ch, &(__dw_ch_regs(dw, dir, ch)->name))
-#define SET_LL(ll, value) \
+#define SET_LL_32(ll, value) \
writel(value, ll)
+#ifdef CONFIG_64BIT
+
+static inline void writeq_ch(struct dw_edma *dw, enum dw_edma_dir dir, u16 ch,
+ u64 value, void __iomem *addr)
+{
+ if (dw->mf == EDMA_MF_EDMA_LEGACY) {
+ u32 viewport_sel;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&dw->lock, flags);
+
+ viewport_sel = FIELD_PREP(EDMA_V0_VIEWPORT_MASK, ch);
+ if (dir == EDMA_DIR_READ)
+ viewport_sel |= BIT(31);
+
+ writel(viewport_sel,
+ &(__dw_regs(dw)->type.legacy.viewport_sel));
+ writeq(value, addr);
+
+ raw_spin_unlock_irqrestore(&dw->lock, flags);
+ } else {
+ writeq(value, addr);
+ }
+}
+
+static inline u64 readq_ch(struct dw_edma *dw, enum dw_edma_dir dir, u16 ch,
+ const void __iomem *addr)
+{
+ u32 value;
+
+ if (dw->mf == EDMA_MF_EDMA_LEGACY) {
+ u32 viewport_sel;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&dw->lock, flags);
+
+ viewport_sel = FIELD_PREP(EDMA_V0_VIEWPORT_MASK, ch);
+ if (dir == EDMA_DIR_READ)
+ viewport_sel |= BIT(31);
+
+ writel(viewport_sel,
+ &(__dw_regs(dw)->type.legacy.viewport_sel));
+ value = readq(addr);
+
+ raw_spin_unlock_irqrestore(&dw->lock, flags);
+ } else {
+ value = readq(addr);
+ }
+
+ return value;
+}
+
+#define SET_CH_64(dw, dir, ch, name, value) \
+ writeq_ch(dw, dir, ch, value, &(__dw_ch_regs(dw, dir, ch)->name))
+
+#define GET_CH_64(dw, dir, ch, name) \
+ readq_ch(dw, dir, ch, &(__dw_ch_regs(dw, dir, ch)->name))
+
+#define SET_LL_64(ll, value) \
+ writeq(value, ll)
+
+#endif /* CONFIG_64BIT */
+
/* eDMA management callbacks */
void dw_edma_v0_core_off(struct dw_edma *dw)
{
- SET_BOTH(dw, int_mask, EDMA_V0_DONE_INT_MASK | EDMA_V0_ABORT_INT_MASK);
- SET_BOTH(dw, int_clear, EDMA_V0_DONE_INT_MASK | EDMA_V0_ABORT_INT_MASK);
- SET_BOTH(dw, engine_en, 0);
+ SET_BOTH_32(dw, int_mask,
+ EDMA_V0_DONE_INT_MASK | EDMA_V0_ABORT_INT_MASK);
+ SET_BOTH_32(dw, int_clear,
+ EDMA_V0_DONE_INT_MASK | EDMA_V0_ABORT_INT_MASK);
+ SET_BOTH_32(dw, engine_en, 0);
}
u16 dw_edma_v0_core_ch_count(struct dw_edma *dw, enum dw_edma_dir dir)
u32 num_ch;
if (dir == EDMA_DIR_WRITE)
- num_ch = FIELD_GET(EDMA_V0_WRITE_CH_COUNT_MASK, GET(dw, ctrl));
+ num_ch = FIELD_GET(EDMA_V0_WRITE_CH_COUNT_MASK,
+ GET_32(dw, ctrl));
else
- num_ch = FIELD_GET(EDMA_V0_READ_CH_COUNT_MASK, GET(dw, ctrl));
+ num_ch = FIELD_GET(EDMA_V0_READ_CH_COUNT_MASK,
+ GET_32(dw, ctrl));
if (num_ch > EDMA_V0_MAX_NR_CH)
num_ch = EDMA_V0_MAX_NR_CH;
u32 tmp;
tmp = FIELD_GET(EDMA_V0_CH_STATUS_MASK,
- GET_CH(dw, chan->dir, chan->id, ch_control1));
+ GET_CH_32(dw, chan->dir, chan->id, ch_control1));
if (tmp == 1)
return DMA_IN_PROGRESS;
{
struct dw_edma *dw = chan->chip->dw;
- SET_RW(dw, chan->dir, int_clear,
- FIELD_PREP(EDMA_V0_DONE_INT_MASK, BIT(chan->id)));
+ SET_RW_32(dw, chan->dir, int_clear,
+ FIELD_PREP(EDMA_V0_DONE_INT_MASK, BIT(chan->id)));
}
void dw_edma_v0_core_clear_abort_int(struct dw_edma_chan *chan)
{
struct dw_edma *dw = chan->chip->dw;
- SET_RW(dw, chan->dir, int_clear,
- FIELD_PREP(EDMA_V0_ABORT_INT_MASK, BIT(chan->id)));
+ SET_RW_32(dw, chan->dir, int_clear,
+ FIELD_PREP(EDMA_V0_ABORT_INT_MASK, BIT(chan->id)));
}
u32 dw_edma_v0_core_status_done_int(struct dw_edma *dw, enum dw_edma_dir dir)
{
- return FIELD_GET(EDMA_V0_DONE_INT_MASK, GET_RW(dw, dir, int_status));
+ return FIELD_GET(EDMA_V0_DONE_INT_MASK,
+ GET_RW_32(dw, dir, int_status));
}
u32 dw_edma_v0_core_status_abort_int(struct dw_edma *dw, enum dw_edma_dir dir)
{
- return FIELD_GET(EDMA_V0_ABORT_INT_MASK, GET_RW(dw, dir, int_status));
+ return FIELD_GET(EDMA_V0_ABORT_INT_MASK,
+ GET_RW_32(dw, dir, int_status));
}
static void dw_edma_v0_core_write_chunk(struct dw_edma_chunk *chunk)
control |= (DW_EDMA_V0_LIE | DW_EDMA_V0_RIE);
/* Channel control */
- SET_LL(&lli[i].control, control);
+ SET_LL_32(&lli[i].control, control);
/* Transfer size */
- SET_LL(&lli[i].transfer_size, child->sz);
- /* SAR - low, high */
- SET_LL(&lli[i].sar_low, lower_32_bits(child->sar));
- SET_LL(&lli[i].sar_high, upper_32_bits(child->sar));
- /* DAR - low, high */
- SET_LL(&lli[i].dar_low, lower_32_bits(child->dar));
- SET_LL(&lli[i].dar_high, upper_32_bits(child->dar));
+ SET_LL_32(&lli[i].transfer_size, child->sz);
+ /* SAR */
+ #ifdef CONFIG_64BIT
+ SET_LL_64(&lli[i].sar.reg, child->sar);
+ #else /* CONFIG_64BIT */
+ SET_LL_32(&lli[i].sar.lsb, lower_32_bits(child->sar));
+ SET_LL_32(&lli[i].sar.msb, upper_32_bits(child->sar));
+ #endif /* CONFIG_64BIT */
+ /* DAR */
+ #ifdef CONFIG_64BIT
+ SET_LL_64(&lli[i].dar.reg, child->dar);
+ #else /* CONFIG_64BIT */
+ SET_LL_32(&lli[i].dar.lsb, lower_32_bits(child->dar));
+ SET_LL_32(&lli[i].dar.msb, upper_32_bits(child->dar));
+ #endif /* CONFIG_64BIT */
i++;
}
control |= DW_EDMA_V0_CB;
/* Channel control */
- SET_LL(&llp->control, control);
- /* Linked list - low, high */
- SET_LL(&llp->llp_low, lower_32_bits(chunk->ll_region.paddr));
- SET_LL(&llp->llp_high, upper_32_bits(chunk->ll_region.paddr));
+ SET_LL_32(&llp->control, control);
+ /* Linked list */
+ #ifdef CONFIG_64BIT
+ SET_LL_64(&llp->llp.reg, chunk->ll_region.paddr);
+ #else /* CONFIG_64BIT */
+ SET_LL_32(&llp->llp.lsb, lower_32_bits(chunk->ll_region.paddr));
+ SET_LL_32(&llp->llp.msb, upper_32_bits(chunk->ll_region.paddr));
+ #endif /* CONFIG_64BIT */
}
void dw_edma_v0_core_start(struct dw_edma_chunk *chunk, bool first)
if (first) {
/* Enable engine */
- SET_RW(dw, chan->dir, engine_en, BIT(0));
+ SET_RW_32(dw, chan->dir, engine_en, BIT(0));
+ if (dw->mf == EDMA_MF_HDMA_COMPAT) {
+ switch (chan->id) {
+ case 0:
+ SET_RW_COMPAT(dw, chan->dir, ch0_pwr_en,
+ BIT(0));
+ break;
+ case 1:
+ SET_RW_COMPAT(dw, chan->dir, ch1_pwr_en,
+ BIT(0));
+ break;
+ case 2:
+ SET_RW_COMPAT(dw, chan->dir, ch2_pwr_en,
+ BIT(0));
+ break;
+ case 3:
+ SET_RW_COMPAT(dw, chan->dir, ch3_pwr_en,
+ BIT(0));
+ break;
+ case 4:
+ SET_RW_COMPAT(dw, chan->dir, ch4_pwr_en,
+ BIT(0));
+ break;
+ case 5:
+ SET_RW_COMPAT(dw, chan->dir, ch5_pwr_en,
+ BIT(0));
+ break;
+ case 6:
+ SET_RW_COMPAT(dw, chan->dir, ch6_pwr_en,
+ BIT(0));
+ break;
+ case 7:
+ SET_RW_COMPAT(dw, chan->dir, ch7_pwr_en,
+ BIT(0));
+ break;
+ }
+ }
/* Interrupt unmask - done, abort */
- tmp = GET_RW(dw, chan->dir, int_mask);
+ tmp = GET_RW_32(dw, chan->dir, int_mask);
tmp &= ~FIELD_PREP(EDMA_V0_DONE_INT_MASK, BIT(chan->id));
tmp &= ~FIELD_PREP(EDMA_V0_ABORT_INT_MASK, BIT(chan->id));
- SET_RW(dw, chan->dir, int_mask, tmp);
+ SET_RW_32(dw, chan->dir, int_mask, tmp);
/* Linked list error */
- tmp = GET_RW(dw, chan->dir, linked_list_err_en);
+ tmp = GET_RW_32(dw, chan->dir, linked_list_err_en);
tmp |= FIELD_PREP(EDMA_V0_LINKED_LIST_ERR_MASK, BIT(chan->id));
- SET_RW(dw, chan->dir, linked_list_err_en, tmp);
+ SET_RW_32(dw, chan->dir, linked_list_err_en, tmp);
/* Channel control */
- SET_CH(dw, chan->dir, chan->id, ch_control1,
- (DW_EDMA_V0_CCS | DW_EDMA_V0_LLE));
- /* Linked list - low, high */
- SET_CH(dw, chan->dir, chan->id, llp_low,
- lower_32_bits(chunk->ll_region.paddr));
- SET_CH(dw, chan->dir, chan->id, llp_high,
- upper_32_bits(chunk->ll_region.paddr));
+ SET_CH_32(dw, chan->dir, chan->id, ch_control1,
+ (DW_EDMA_V0_CCS | DW_EDMA_V0_LLE));
+ /* Linked list */
+ #ifdef CONFIG_64BIT
+ SET_CH_64(dw, chan->dir, chan->id, llp.reg,
+ chunk->ll_region.paddr);
+ #else /* CONFIG_64BIT */
+ SET_CH_32(dw, chan->dir, chan->id, llp.lsb,
+ lower_32_bits(chunk->ll_region.paddr));
+ SET_CH_32(dw, chan->dir, chan->id, llp.msb,
+ upper_32_bits(chunk->ll_region.paddr));
+ #endif /* CONFIG_64BIT */
}
/* Doorbell */
- SET_RW(dw, chan->dir, doorbell,
- FIELD_PREP(EDMA_V0_DOORBELL_CH_MASK, chan->id));
+ SET_RW_32(dw, chan->dir, doorbell,
+ FIELD_PREP(EDMA_V0_DOORBELL_CH_MASK, chan->id));
}
int dw_edma_v0_core_device_config(struct dw_edma_chan *chan)
u32 tmp = 0;
/* MSI done addr - low, high */
- SET_RW(dw, chan->dir, done_imwr_low, chan->msi.address_lo);
- SET_RW(dw, chan->dir, done_imwr_high, chan->msi.address_hi);
+ SET_RW_32(dw, chan->dir, done_imwr.lsb, chan->msi.address_lo);
+ SET_RW_32(dw, chan->dir, done_imwr.msb, chan->msi.address_hi);
/* MSI abort addr - low, high */
- SET_RW(dw, chan->dir, abort_imwr_low, chan->msi.address_lo);
- SET_RW(dw, chan->dir, abort_imwr_high, chan->msi.address_hi);
+ SET_RW_32(dw, chan->dir, abort_imwr.lsb, chan->msi.address_lo);
+ SET_RW_32(dw, chan->dir, abort_imwr.msb, chan->msi.address_hi);
/* MSI data - low, high */
switch (chan->id) {
case 0:
case 1:
- tmp = GET_RW(dw, chan->dir, ch01_imwr_data);
+ tmp = GET_RW_32(dw, chan->dir, ch01_imwr_data);
break;
case 2:
case 3:
- tmp = GET_RW(dw, chan->dir, ch23_imwr_data);
+ tmp = GET_RW_32(dw, chan->dir, ch23_imwr_data);
break;
case 4:
case 5:
- tmp = GET_RW(dw, chan->dir, ch45_imwr_data);
+ tmp = GET_RW_32(dw, chan->dir, ch45_imwr_data);
break;
case 6:
case 7:
- tmp = GET_RW(dw, chan->dir, ch67_imwr_data);
+ tmp = GET_RW_32(dw, chan->dir, ch67_imwr_data);
break;
}
switch (chan->id) {
case 0:
case 1:
- SET_RW(dw, chan->dir, ch01_imwr_data, tmp);
+ SET_RW_32(dw, chan->dir, ch01_imwr_data, tmp);
break;
case 2:
case 3:
- SET_RW(dw, chan->dir, ch23_imwr_data, tmp);
+ SET_RW_32(dw, chan->dir, ch23_imwr_data, tmp);
break;
case 4:
case 5:
- SET_RW(dw, chan->dir, ch45_imwr_data, tmp);
+ SET_RW_32(dw, chan->dir, ch45_imwr_data, tmp);
break;
case 6:
case 7:
- SET_RW(dw, chan->dir, ch67_imwr_data, tmp);
+ SET_RW_32(dw, chan->dir, ch67_imwr_data, tmp);
break;
}
dw_edma_v0_debugfs_on(chip);
}
-void dw_edma_v0_core_debugfs_off(void)
+void dw_edma_v0_core_debugfs_off(struct dw_edma_chip *chip)
{
- dw_edma_v0_debugfs_off();
+ dw_edma_v0_debugfs_off(chip);
}
int dw_edma_v0_core_device_config(struct dw_edma_chan *chan);
/* eDMA debug fs callbacks */
void dw_edma_v0_core_debugfs_on(struct dw_edma_chip *chip);
-void dw_edma_v0_core_debugfs_off(void);
+void dw_edma_v0_core_debugfs_off(struct dw_edma_chip *chip);
#endif /* _DW_EDMA_V0_CORE_H */
#define CHANNEL_STR "channel"
#define REGISTERS_STR "registers"
-static struct dentry *base_dir;
static struct dw_edma *dw;
static struct dw_edma_v0_regs __iomem *regs;
static int dw_edma_debugfs_u32_get(void *data, u64 *val)
{
void __iomem *reg = (void __force __iomem *)data;
- if (dw->mode == EDMA_MODE_LEGACY &&
+ if (dw->mf == EDMA_MF_EDMA_LEGACY &&
reg >= (void __iomem *)®s->type.legacy.ch) {
void __iomem *ptr = ®s->type.legacy.ch;
u32 viewport_sel = 0;
REGISTER(ch_control1),
REGISTER(ch_control2),
REGISTER(transfer_size),
- REGISTER(sar_low),
- REGISTER(sar_high),
- REGISTER(dar_low),
- REGISTER(dar_high),
- REGISTER(llp_low),
- REGISTER(llp_high),
+ REGISTER(sar.lsb),
+ REGISTER(sar.msb),
+ REGISTER(dar.lsb),
+ REGISTER(dar.msb),
+ REGISTER(llp.lsb),
+ REGISTER(llp.msb),
};
nr_entries = ARRAY_SIZE(debugfs_regs);
/* eDMA global registers */
WR_REGISTER(engine_en),
WR_REGISTER(doorbell),
- WR_REGISTER(ch_arb_weight_low),
- WR_REGISTER(ch_arb_weight_high),
+ WR_REGISTER(ch_arb_weight.lsb),
+ WR_REGISTER(ch_arb_weight.msb),
/* eDMA interrupts registers */
WR_REGISTER(int_status),
WR_REGISTER(int_mask),
WR_REGISTER(int_clear),
WR_REGISTER(err_status),
- WR_REGISTER(done_imwr_low),
- WR_REGISTER(done_imwr_high),
- WR_REGISTER(abort_imwr_low),
- WR_REGISTER(abort_imwr_high),
+ WR_REGISTER(done_imwr.lsb),
+ WR_REGISTER(done_imwr.msb),
+ WR_REGISTER(abort_imwr.lsb),
+ WR_REGISTER(abort_imwr.msb),
WR_REGISTER(ch01_imwr_data),
WR_REGISTER(ch23_imwr_data),
WR_REGISTER(ch45_imwr_data),
const struct debugfs_entries debugfs_unroll_regs[] = {
/* eDMA channel context grouping */
WR_REGISTER_UNROLL(engine_chgroup),
- WR_REGISTER_UNROLL(engine_hshake_cnt_low),
- WR_REGISTER_UNROLL(engine_hshake_cnt_high),
+ WR_REGISTER_UNROLL(engine_hshake_cnt.lsb),
+ WR_REGISTER_UNROLL(engine_hshake_cnt.msb),
WR_REGISTER_UNROLL(ch0_pwr_en),
WR_REGISTER_UNROLL(ch1_pwr_en),
WR_REGISTER_UNROLL(ch2_pwr_en),
nr_entries = ARRAY_SIZE(debugfs_regs);
dw_edma_debugfs_create_x32(debugfs_regs, nr_entries, regs_dir);
- if (dw->mode == EDMA_MODE_UNROLL) {
+ if (dw->mf == EDMA_MF_HDMA_COMPAT) {
nr_entries = ARRAY_SIZE(debugfs_unroll_regs);
dw_edma_debugfs_create_x32(debugfs_unroll_regs, nr_entries,
regs_dir);
/* eDMA global registers */
RD_REGISTER(engine_en),
RD_REGISTER(doorbell),
- RD_REGISTER(ch_arb_weight_low),
- RD_REGISTER(ch_arb_weight_high),
+ RD_REGISTER(ch_arb_weight.lsb),
+ RD_REGISTER(ch_arb_weight.msb),
/* eDMA interrupts registers */
RD_REGISTER(int_status),
RD_REGISTER(int_mask),
RD_REGISTER(int_clear),
- RD_REGISTER(err_status_low),
- RD_REGISTER(err_status_high),
+ RD_REGISTER(err_status.lsb),
+ RD_REGISTER(err_status.msb),
RD_REGISTER(linked_list_err_en),
- RD_REGISTER(done_imwr_low),
- RD_REGISTER(done_imwr_high),
- RD_REGISTER(abort_imwr_low),
- RD_REGISTER(abort_imwr_high),
+ RD_REGISTER(done_imwr.lsb),
+ RD_REGISTER(done_imwr.msb),
+ RD_REGISTER(abort_imwr.lsb),
+ RD_REGISTER(abort_imwr.msb),
RD_REGISTER(ch01_imwr_data),
RD_REGISTER(ch23_imwr_data),
RD_REGISTER(ch45_imwr_data),
const struct debugfs_entries debugfs_unroll_regs[] = {
/* eDMA channel context grouping */
RD_REGISTER_UNROLL(engine_chgroup),
- RD_REGISTER_UNROLL(engine_hshake_cnt_low),
- RD_REGISTER_UNROLL(engine_hshake_cnt_high),
+ RD_REGISTER_UNROLL(engine_hshake_cnt.lsb),
+ RD_REGISTER_UNROLL(engine_hshake_cnt.msb),
RD_REGISTER_UNROLL(ch0_pwr_en),
RD_REGISTER_UNROLL(ch1_pwr_en),
RD_REGISTER_UNROLL(ch2_pwr_en),
nr_entries = ARRAY_SIZE(debugfs_regs);
dw_edma_debugfs_create_x32(debugfs_regs, nr_entries, regs_dir);
- if (dw->mode == EDMA_MODE_UNROLL) {
+ if (dw->mf == EDMA_MF_HDMA_COMPAT) {
nr_entries = ARRAY_SIZE(debugfs_unroll_regs);
dw_edma_debugfs_create_x32(debugfs_unroll_regs, nr_entries,
regs_dir);
struct dentry *regs_dir;
int nr_entries;
- regs_dir = debugfs_create_dir(REGISTERS_STR, base_dir);
+ regs_dir = debugfs_create_dir(REGISTERS_STR, dw->debugfs);
if (!regs_dir)
return;
if (!regs)
return;
- base_dir = debugfs_create_dir(dw->name, NULL);
- if (!base_dir)
+ dw->debugfs = debugfs_create_dir(dw->name, NULL);
+ if (!dw->debugfs)
return;
- debugfs_create_u32("version", 0444, base_dir, &dw->version);
- debugfs_create_u32("mode", 0444, base_dir, &dw->mode);
- debugfs_create_u16("wr_ch_cnt", 0444, base_dir, &dw->wr_ch_cnt);
- debugfs_create_u16("rd_ch_cnt", 0444, base_dir, &dw->rd_ch_cnt);
+ debugfs_create_u32("mf", 0444, dw->debugfs, &dw->mf);
+ debugfs_create_u16("wr_ch_cnt", 0444, dw->debugfs, &dw->wr_ch_cnt);
+ debugfs_create_u16("rd_ch_cnt", 0444, dw->debugfs, &dw->rd_ch_cnt);
dw_edma_debugfs_regs();
}
-void dw_edma_v0_debugfs_off(void)
+void dw_edma_v0_debugfs_off(struct dw_edma_chip *chip)
{
- debugfs_remove_recursive(base_dir);
+ dw = chip->dw;
+ if (!dw)
+ return;
+
+ debugfs_remove_recursive(dw->debugfs);
+ dw->debugfs = NULL;
}
#ifdef CONFIG_DEBUG_FS
void dw_edma_v0_debugfs_on(struct dw_edma_chip *chip);
-void dw_edma_v0_debugfs_off(void);
+void dw_edma_v0_debugfs_off(struct dw_edma_chip *chip);
#else
static inline void dw_edma_v0_debugfs_on(struct dw_edma_chip *chip)
{
}
-static inline void dw_edma_v0_debugfs_off(void)
+static inline void dw_edma_v0_debugfs_off(struct dw_edma_chip *chip)
{
}
#endif /* CONFIG_DEBUG_FS */
#define EDMA_V0_CH_EVEN_MSI_DATA_MASK GENMASK(15, 0)
struct dw_edma_v0_ch_regs {
- u32 ch_control1; /* 0x000 */
- u32 ch_control2; /* 0x004 */
- u32 transfer_size; /* 0x008 */
- u32 sar_low; /* 0x00c */
- u32 sar_high; /* 0x010 */
- u32 dar_low; /* 0x014 */
- u32 dar_high; /* 0x018 */
- u32 llp_low; /* 0x01c */
- u32 llp_high; /* 0x020 */
-};
+ u32 ch_control1; /* 0x0000 */
+ u32 ch_control2; /* 0x0004 */
+ u32 transfer_size; /* 0x0008 */
+ union {
+ u64 reg; /* 0x000c..0x0010 */
+ struct {
+ u32 lsb; /* 0x000c */
+ u32 msb; /* 0x0010 */
+ };
+ } sar;
+ union {
+ u64 reg; /* 0x0014..0x0018 */
+ struct {
+ u32 lsb; /* 0x0014 */
+ u32 msb; /* 0x0018 */
+ };
+ } dar;
+ union {
+ u64 reg; /* 0x001c..0x0020 */
+ struct {
+ u32 lsb; /* 0x001c */
+ u32 msb; /* 0x0020 */
+ };
+ } llp;
+} __packed;
struct dw_edma_v0_ch {
- struct dw_edma_v0_ch_regs wr; /* 0x200 */
- u32 padding_1[55]; /* [0x224..0x2fc] */
- struct dw_edma_v0_ch_regs rd; /* 0x300 */
- u32 padding_2[55]; /* [0x324..0x3fc] */
-};
+ struct dw_edma_v0_ch_regs wr; /* 0x0200 */
+ u32 padding_1[55]; /* 0x0224..0x02fc */
+ struct dw_edma_v0_ch_regs rd; /* 0x0300 */
+ u32 padding_2[55]; /* 0x0324..0x03fc */
+} __packed;
struct dw_edma_v0_unroll {
- u32 padding_1; /* 0x0f8 */
- u32 wr_engine_chgroup; /* 0x100 */
- u32 rd_engine_chgroup; /* 0x104 */
- u32 wr_engine_hshake_cnt_low; /* 0x108 */
- u32 wr_engine_hshake_cnt_high; /* 0x10c */
- u32 padding_2[2]; /* [0x110..0x114] */
- u32 rd_engine_hshake_cnt_low; /* 0x118 */
- u32 rd_engine_hshake_cnt_high; /* 0x11c */
- u32 padding_3[2]; /* [0x120..0x124] */
- u32 wr_ch0_pwr_en; /* 0x128 */
- u32 wr_ch1_pwr_en; /* 0x12c */
- u32 wr_ch2_pwr_en; /* 0x130 */
- u32 wr_ch3_pwr_en; /* 0x134 */
- u32 wr_ch4_pwr_en; /* 0x138 */
- u32 wr_ch5_pwr_en; /* 0x13c */
- u32 wr_ch6_pwr_en; /* 0x140 */
- u32 wr_ch7_pwr_en; /* 0x144 */
- u32 padding_4[8]; /* [0x148..0x164] */
- u32 rd_ch0_pwr_en; /* 0x168 */
- u32 rd_ch1_pwr_en; /* 0x16c */
- u32 rd_ch2_pwr_en; /* 0x170 */
- u32 rd_ch3_pwr_en; /* 0x174 */
- u32 rd_ch4_pwr_en; /* 0x178 */
- u32 rd_ch5_pwr_en; /* 0x18c */
- u32 rd_ch6_pwr_en; /* 0x180 */
- u32 rd_ch7_pwr_en; /* 0x184 */
- u32 padding_5[30]; /* [0x188..0x1fc] */
- struct dw_edma_v0_ch ch[EDMA_V0_MAX_NR_CH]; /* [0x200..0x1120] */
-};
+ u32 padding_1; /* 0x00f8 */
+ u32 wr_engine_chgroup; /* 0x0100 */
+ u32 rd_engine_chgroup; /* 0x0104 */
+ union {
+ u64 reg; /* 0x0108..0x010c */
+ struct {
+ u32 lsb; /* 0x0108 */
+ u32 msb; /* 0x010c */
+ };
+ } wr_engine_hshake_cnt;
+ u32 padding_2[2]; /* 0x0110..0x0114 */
+ union {
+ u64 reg; /* 0x0120..0x0124 */
+ struct {
+ u32 lsb; /* 0x0120 */
+ u32 msb; /* 0x0124 */
+ };
+ } rd_engine_hshake_cnt;
+ u32 padding_3[2]; /* 0x0120..0x0124 */
+ u32 wr_ch0_pwr_en; /* 0x0128 */
+ u32 wr_ch1_pwr_en; /* 0x012c */
+ u32 wr_ch2_pwr_en; /* 0x0130 */
+ u32 wr_ch3_pwr_en; /* 0x0134 */
+ u32 wr_ch4_pwr_en; /* 0x0138 */
+ u32 wr_ch5_pwr_en; /* 0x013c */
+ u32 wr_ch6_pwr_en; /* 0x0140 */
+ u32 wr_ch7_pwr_en; /* 0x0144 */
+ u32 padding_4[8]; /* 0x0148..0x0164 */
+ u32 rd_ch0_pwr_en; /* 0x0168 */
+ u32 rd_ch1_pwr_en; /* 0x016c */
+ u32 rd_ch2_pwr_en; /* 0x0170 */
+ u32 rd_ch3_pwr_en; /* 0x0174 */
+ u32 rd_ch4_pwr_en; /* 0x0178 */
+ u32 rd_ch5_pwr_en; /* 0x018c */
+ u32 rd_ch6_pwr_en; /* 0x0180 */
+ u32 rd_ch7_pwr_en; /* 0x0184 */
+ u32 padding_5[30]; /* 0x0188..0x01fc */
+ struct dw_edma_v0_ch ch[EDMA_V0_MAX_NR_CH]; /* 0x0200..0x1120 */
+} __packed;
struct dw_edma_v0_legacy {
- u32 viewport_sel; /* 0x0f8 */
- struct dw_edma_v0_ch_regs ch; /* [0x100..0x120] */
-};
+ u32 viewport_sel; /* 0x00f8 */
+ struct dw_edma_v0_ch_regs ch; /* 0x0100..0x0120 */
+} __packed;
struct dw_edma_v0_regs {
/* eDMA global registers */
- u32 ctrl_data_arb_prior; /* 0x000 */
- u32 padding_1; /* 0x004 */
- u32 ctrl; /* 0x008 */
- u32 wr_engine_en; /* 0x00c */
- u32 wr_doorbell; /* 0x010 */
- u32 padding_2; /* 0x014 */
- u32 wr_ch_arb_weight_low; /* 0x018 */
- u32 wr_ch_arb_weight_high; /* 0x01c */
- u32 padding_3[3]; /* [0x020..0x028] */
- u32 rd_engine_en; /* 0x02c */
- u32 rd_doorbell; /* 0x030 */
- u32 padding_4; /* 0x034 */
- u32 rd_ch_arb_weight_low; /* 0x038 */
- u32 rd_ch_arb_weight_high; /* 0x03c */
- u32 padding_5[3]; /* [0x040..0x048] */
+ u32 ctrl_data_arb_prior; /* 0x0000 */
+ u32 padding_1; /* 0x0004 */
+ u32 ctrl; /* 0x0008 */
+ u32 wr_engine_en; /* 0x000c */
+ u32 wr_doorbell; /* 0x0010 */
+ u32 padding_2; /* 0x0014 */
+ union {
+ u64 reg; /* 0x0018..0x001c */
+ struct {
+ u32 lsb; /* 0x0018 */
+ u32 msb; /* 0x001c */
+ };
+ } wr_ch_arb_weight;
+ u32 padding_3[3]; /* 0x0020..0x0028 */
+ u32 rd_engine_en; /* 0x002c */
+ u32 rd_doorbell; /* 0x0030 */
+ u32 padding_4; /* 0x0034 */
+ union {
+ u64 reg; /* 0x0038..0x003c */
+ struct {
+ u32 lsb; /* 0x0038 */
+ u32 msb; /* 0x003c */
+ };
+ } rd_ch_arb_weight;
+ u32 padding_5[3]; /* 0x0040..0x0048 */
/* eDMA interrupts registers */
- u32 wr_int_status; /* 0x04c */
- u32 padding_6; /* 0x050 */
- u32 wr_int_mask; /* 0x054 */
- u32 wr_int_clear; /* 0x058 */
- u32 wr_err_status; /* 0x05c */
- u32 wr_done_imwr_low; /* 0x060 */
- u32 wr_done_imwr_high; /* 0x064 */
- u32 wr_abort_imwr_low; /* 0x068 */
- u32 wr_abort_imwr_high; /* 0x06c */
- u32 wr_ch01_imwr_data; /* 0x070 */
- u32 wr_ch23_imwr_data; /* 0x074 */
- u32 wr_ch45_imwr_data; /* 0x078 */
- u32 wr_ch67_imwr_data; /* 0x07c */
- u32 padding_7[4]; /* [0x080..0x08c] */
- u32 wr_linked_list_err_en; /* 0x090 */
- u32 padding_8[3]; /* [0x094..0x09c] */
- u32 rd_int_status; /* 0x0a0 */
- u32 padding_9; /* 0x0a4 */
- u32 rd_int_mask; /* 0x0a8 */
- u32 rd_int_clear; /* 0x0ac */
- u32 padding_10; /* 0x0b0 */
- u32 rd_err_status_low; /* 0x0b4 */
- u32 rd_err_status_high; /* 0x0b8 */
- u32 padding_11[2]; /* [0x0bc..0x0c0] */
- u32 rd_linked_list_err_en; /* 0x0c4 */
- u32 padding_12; /* 0x0c8 */
- u32 rd_done_imwr_low; /* 0x0cc */
- u32 rd_done_imwr_high; /* 0x0d0 */
- u32 rd_abort_imwr_low; /* 0x0d4 */
- u32 rd_abort_imwr_high; /* 0x0d8 */
- u32 rd_ch01_imwr_data; /* 0x0dc */
- u32 rd_ch23_imwr_data; /* 0x0e0 */
- u32 rd_ch45_imwr_data; /* 0x0e4 */
- u32 rd_ch67_imwr_data; /* 0x0e8 */
- u32 padding_13[4]; /* [0x0ec..0x0f8] */
+ u32 wr_int_status; /* 0x004c */
+ u32 padding_6; /* 0x0050 */
+ u32 wr_int_mask; /* 0x0054 */
+ u32 wr_int_clear; /* 0x0058 */
+ u32 wr_err_status; /* 0x005c */
+ union {
+ u64 reg; /* 0x0060..0x0064 */
+ struct {
+ u32 lsb; /* 0x0060 */
+ u32 msb; /* 0x0064 */
+ };
+ } wr_done_imwr;
+ union {
+ u64 reg; /* 0x0068..0x006c */
+ struct {
+ u32 lsb; /* 0x0068 */
+ u32 msb; /* 0x006c */
+ };
+ } wr_abort_imwr;
+ u32 wr_ch01_imwr_data; /* 0x0070 */
+ u32 wr_ch23_imwr_data; /* 0x0074 */
+ u32 wr_ch45_imwr_data; /* 0x0078 */
+ u32 wr_ch67_imwr_data; /* 0x007c */
+ u32 padding_7[4]; /* 0x0080..0x008c */
+ u32 wr_linked_list_err_en; /* 0x0090 */
+ u32 padding_8[3]; /* 0x0094..0x009c */
+ u32 rd_int_status; /* 0x00a0 */
+ u32 padding_9; /* 0x00a4 */
+ u32 rd_int_mask; /* 0x00a8 */
+ u32 rd_int_clear; /* 0x00ac */
+ u32 padding_10; /* 0x00b0 */
+ union {
+ u64 reg; /* 0x00b4..0x00b8 */
+ struct {
+ u32 lsb; /* 0x00b4 */
+ u32 msb; /* 0x00b8 */
+ };
+ } rd_err_status;
+ u32 padding_11[2]; /* 0x00bc..0x00c0 */
+ u32 rd_linked_list_err_en; /* 0x00c4 */
+ u32 padding_12; /* 0x00c8 */
+ union {
+ u64 reg; /* 0x00cc..0x00d0 */
+ struct {
+ u32 lsb; /* 0x00cc */
+ u32 msb; /* 0x00d0 */
+ };
+ } rd_done_imwr;
+ union {
+ u64 reg; /* 0x00d4..0x00d8 */
+ struct {
+ u32 lsb; /* 0x00d4 */
+ u32 msb; /* 0x00d8 */
+ };
+ } rd_abort_imwr;
+ u32 rd_ch01_imwr_data; /* 0x00dc */
+ u32 rd_ch23_imwr_data; /* 0x00e0 */
+ u32 rd_ch45_imwr_data; /* 0x00e4 */
+ u32 rd_ch67_imwr_data; /* 0x00e8 */
+ u32 padding_13[4]; /* 0x00ec..0x00f8 */
/* eDMA channel context grouping */
union dw_edma_v0_type {
- struct dw_edma_v0_legacy legacy; /* [0x0f8..0x120] */
- struct dw_edma_v0_unroll unroll; /* [0x0f8..0x1120] */
+ struct dw_edma_v0_legacy legacy; /* 0x00f8..0x0120 */
+ struct dw_edma_v0_unroll unroll; /* 0x00f8..0x1120 */
} type;
-};
+} __packed;
struct dw_edma_v0_lli {
u32 control;
u32 transfer_size;
- u32 sar_low;
- u32 sar_high;
- u32 dar_low;
- u32 dar_high;
-};
+ union {
+ u64 reg;
+ struct {
+ u32 lsb;
+ u32 msb;
+ };
+ } sar;
+ union {
+ u64 reg;
+ struct {
+ u32 lsb;
+ u32 msb;
+ };
+ } dar;
+} __packed;
struct dw_edma_v0_llp {
u32 control;
u32 reserved;
- u32 llp_low;
- u32 llp_high;
-};
+ union {
+ u64 reg;
+ struct {
+ u32 lsb;
+ u32 msb;
+ };
+ } llp;
+} __packed;
#endif /* _DW_EDMA_V0_REGS_H */
obj-$(CONFIG_INTEL_IDXD) += idxd.o
idxd-y := init.o irq.o device.o sysfs.o submit.o dma.o cdev.o
+
+idxd-$(CONFIG_INTEL_IDXD_PERFMON) += perfmon.o
struct iommu_sva *sva;
};
-enum idxd_cdev_cleanup {
- CDEV_NORMAL = 0,
- CDEV_FAILED,
-};
-
static void idxd_cdev_dev_release(struct device *dev)
{
- dev_dbg(dev, "releasing cdev device\n");
- kfree(dev);
+ struct idxd_cdev *idxd_cdev = container_of(dev, struct idxd_cdev, dev);
+ struct idxd_cdev_context *cdev_ctx;
+ struct idxd_wq *wq = idxd_cdev->wq;
+
+ cdev_ctx = &ictx[wq->idxd->data->type];
+ ida_simple_remove(&cdev_ctx->minor_ida, idxd_cdev->minor);
+ kfree(idxd_cdev);
}
static struct device_type idxd_cdev_device_type = {
return container_of(cdev, struct idxd_cdev, cdev);
}
-static inline struct idxd_wq *idxd_cdev_wq(struct idxd_cdev *idxd_cdev)
-{
- return container_of(idxd_cdev, struct idxd_wq, idxd_cdev);
-}
-
static inline struct idxd_wq *inode_wq(struct inode *inode)
{
- return idxd_cdev_wq(inode_idxd_cdev(inode));
+ struct idxd_cdev *idxd_cdev = inode_idxd_cdev(inode);
+
+ return idxd_cdev->wq;
}
static int idxd_cdev_open(struct inode *inode, struct file *filp)
struct idxd_user_context *ctx = filp->private_data;
struct idxd_wq *wq = ctx->wq;
struct idxd_device *idxd = wq->idxd;
- struct idxd_cdev *idxd_cdev = &wq->idxd_cdev;
unsigned long flags;
__poll_t out = 0;
- poll_wait(filp, &idxd_cdev->err_queue, wait);
+ poll_wait(filp, &wq->err_queue, wait);
spin_lock_irqsave(&idxd->dev_lock, flags);
if (idxd->sw_err.valid)
out = EPOLLIN | EPOLLRDNORM;
int idxd_cdev_get_major(struct idxd_device *idxd)
{
- return MAJOR(ictx[idxd->type].devt);
+ return MAJOR(ictx[idxd->data->type].devt);
}
-static int idxd_wq_cdev_dev_setup(struct idxd_wq *wq)
+int idxd_wq_add_cdev(struct idxd_wq *wq)
{
struct idxd_device *idxd = wq->idxd;
- struct idxd_cdev *idxd_cdev = &wq->idxd_cdev;
- struct idxd_cdev_context *cdev_ctx;
+ struct idxd_cdev *idxd_cdev;
+ struct cdev *cdev;
struct device *dev;
- int minor, rc;
+ struct idxd_cdev_context *cdev_ctx;
+ int rc, minor;
- idxd_cdev->dev = kzalloc(sizeof(*idxd_cdev->dev), GFP_KERNEL);
- if (!idxd_cdev->dev)
+ idxd_cdev = kzalloc(sizeof(*idxd_cdev), GFP_KERNEL);
+ if (!idxd_cdev)
return -ENOMEM;
- dev = idxd_cdev->dev;
- dev->parent = &idxd->pdev->dev;
- dev_set_name(dev, "%s/wq%u.%u", idxd_get_dev_name(idxd),
- idxd->id, wq->id);
- dev->bus = idxd_get_bus_type(idxd);
-
- cdev_ctx = &ictx[wq->idxd->type];
+ idxd_cdev->wq = wq;
+ cdev = &idxd_cdev->cdev;
+ dev = &idxd_cdev->dev;
+ cdev_ctx = &ictx[wq->idxd->data->type];
minor = ida_simple_get(&cdev_ctx->minor_ida, 0, MINORMASK, GFP_KERNEL);
if (minor < 0) {
- rc = minor;
- kfree(dev);
- goto ida_err;
- }
-
- dev->devt = MKDEV(MAJOR(cdev_ctx->devt), minor);
- dev->type = &idxd_cdev_device_type;
- rc = device_register(dev);
- if (rc < 0) {
- dev_err(&idxd->pdev->dev, "device register failed\n");
- goto dev_reg_err;
+ kfree(idxd_cdev);
+ return minor;
}
idxd_cdev->minor = minor;
- return 0;
-
- dev_reg_err:
- ida_simple_remove(&cdev_ctx->minor_ida, MINOR(dev->devt));
- put_device(dev);
- ida_err:
- idxd_cdev->dev = NULL;
- return rc;
-}
-
-static void idxd_wq_cdev_cleanup(struct idxd_wq *wq,
- enum idxd_cdev_cleanup cdev_state)
-{
- struct idxd_cdev *idxd_cdev = &wq->idxd_cdev;
- struct idxd_cdev_context *cdev_ctx;
-
- cdev_ctx = &ictx[wq->idxd->type];
- if (cdev_state == CDEV_NORMAL)
- cdev_del(&idxd_cdev->cdev);
- device_unregister(idxd_cdev->dev);
- /*
- * The device_type->release() will be called on the device and free
- * the allocated struct device. We can just forget it.
- */
- ida_simple_remove(&cdev_ctx->minor_ida, idxd_cdev->minor);
- idxd_cdev->dev = NULL;
- idxd_cdev->minor = -1;
-}
-
-int idxd_wq_add_cdev(struct idxd_wq *wq)
-{
- struct idxd_cdev *idxd_cdev = &wq->idxd_cdev;
- struct cdev *cdev = &idxd_cdev->cdev;
- struct device *dev;
- int rc;
+ device_initialize(dev);
+ dev->parent = &wq->conf_dev;
+ dev->bus = &dsa_bus_type;
+ dev->type = &idxd_cdev_device_type;
+ dev->devt = MKDEV(MAJOR(cdev_ctx->devt), minor);
- rc = idxd_wq_cdev_dev_setup(wq);
+ rc = dev_set_name(dev, "%s/wq%u.%u", idxd->data->name_prefix, idxd->id, wq->id);
if (rc < 0)
- return rc;
+ goto err;
- dev = idxd_cdev->dev;
+ wq->idxd_cdev = idxd_cdev;
cdev_init(cdev, &idxd_cdev_fops);
- cdev_set_parent(cdev, &dev->kobj);
- rc = cdev_add(cdev, dev->devt, 1);
+ rc = cdev_device_add(cdev, dev);
if (rc) {
dev_dbg(&wq->idxd->pdev->dev, "cdev_add failed: %d\n", rc);
- idxd_wq_cdev_cleanup(wq, CDEV_FAILED);
- return rc;
+ goto err;
}
- init_waitqueue_head(&idxd_cdev->err_queue);
return 0;
+
+ err:
+ put_device(dev);
+ wq->idxd_cdev = NULL;
+ return rc;
}
void idxd_wq_del_cdev(struct idxd_wq *wq)
{
- idxd_wq_cdev_cleanup(wq, CDEV_NORMAL);
+ struct idxd_cdev *idxd_cdev;
+ struct idxd_cdev_context *cdev_ctx;
+
+ cdev_ctx = &ictx[wq->idxd->data->type];
+ idxd_cdev = wq->idxd_cdev;
+ wq->idxd_cdev = NULL;
+ cdev_device_del(&idxd_cdev->cdev, &idxd_cdev->dev);
+ put_device(&idxd_cdev->dev);
}
int idxd_cdev_register(void)
/* Interrupt control bits */
void idxd_mask_msix_vector(struct idxd_device *idxd, int vec_id)
{
- struct irq_data *data = irq_get_irq_data(idxd->msix_entries[vec_id].vector);
+ struct irq_data *data = irq_get_irq_data(idxd->irq_entries[vec_id].vector);
pci_msi_mask_irq(data);
}
void idxd_unmask_msix_vector(struct idxd_device *idxd, int vec_id)
{
- struct irq_data *data = irq_get_irq_data(idxd->msix_entries[vec_id].vector);
+ struct irq_data *data = irq_get_irq_data(idxd->irq_entries[vec_id].vector);
pci_msi_unmask_irq(data);
}
genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET);
genctrl.softerr_int_en = 1;
+ genctrl.halt_int_en = 1;
iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET);
}
genctrl.bits = ioread32(idxd->reg_base + IDXD_GENCTRL_OFFSET);
genctrl.softerr_int_en = 0;
+ genctrl.halt_int_en = 0;
iowrite32(genctrl.bits, idxd->reg_base + IDXD_GENCTRL_OFFSET);
}
if (rc < 0)
return rc;
- if (idxd->type == IDXD_TYPE_DSA)
- align = 32;
- else if (idxd->type == IDXD_TYPE_IAX)
- align = 64;
- else
- return -ENODEV;
-
- wq->compls_size = num_descs * idxd->compl_size + align;
+ align = idxd->data->align;
+ wq->compls_size = num_descs * idxd->data->compl_size + align;
wq->compls_raw = dma_alloc_coherent(dev, wq->compls_size,
&wq->compls_addr_raw, GFP_KERNEL);
if (!wq->compls_raw) {
struct idxd_desc *desc = wq->descs[i];
desc->hw = wq->hw_descs[i];
- if (idxd->type == IDXD_TYPE_DSA)
+ if (idxd->data->type == IDXD_TYPE_DSA)
desc->completion = &wq->compls[i];
- else if (idxd->type == IDXD_TYPE_IAX)
+ else if (idxd->data->type == IDXD_TYPE_IAX)
desc->iax_completion = &wq->iax_compls[i];
- desc->compl_dma = wq->compls_addr + idxd->compl_size * i;
+ desc->compl_dma = wq->compls_addr + idxd->data->compl_size * i;
desc->id = i;
desc->wq = wq;
desc->cpu = -1;
- dma_async_tx_descriptor_init(&desc->txd, &wq->dma_chan);
- desc->txd.tx_submit = idxd_dma_tx_submit;
}
return 0;
struct device *dev = &wq->idxd->pdev->dev;
devm_iounmap(dev, wq->portal);
+ wq->portal = NULL;
+}
+
+void idxd_wqs_unmap_portal(struct idxd_device *idxd)
+{
+ int i;
+
+ for (i = 0; i < idxd->max_wqs; i++) {
+ struct idxd_wq *wq = idxd->wqs[i];
+
+ if (wq->portal)
+ idxd_wq_unmap_portal(wq);
+ }
}
int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid)
memset(wq->name, 0, WQ_NAME_SIZE);
}
+static void idxd_wq_ref_release(struct percpu_ref *ref)
+{
+ struct idxd_wq *wq = container_of(ref, struct idxd_wq, wq_active);
+
+ complete(&wq->wq_dead);
+}
+
+int idxd_wq_init_percpu_ref(struct idxd_wq *wq)
+{
+ int rc;
+
+ memset(&wq->wq_active, 0, sizeof(wq->wq_active));
+ rc = percpu_ref_init(&wq->wq_active, idxd_wq_ref_release, 0, GFP_KERNEL);
+ if (rc < 0)
+ return rc;
+ reinit_completion(&wq->wq_dead);
+ return 0;
+}
+
+void idxd_wq_quiesce(struct idxd_wq *wq)
+{
+ percpu_ref_kill(&wq->wq_active);
+ wait_for_completion(&wq->wq_dead);
+ percpu_ref_exit(&wq->wq_active);
+}
+
/* Device control bits */
static inline bool idxd_is_enabled(struct idxd_device *idxd)
{
memset(&cmd, 0, sizeof(cmd));
cmd.cmd = IDXD_CMD_RESET_DEVICE;
dev_dbg(dev, "%s: sending reset for init.\n", __func__);
- spin_lock_irqsave(&idxd->dev_lock, flags);
+ spin_lock_irqsave(&idxd->cmd_lock, flags);
iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET);
while (ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET) &
IDXD_CMDSTS_ACTIVE)
cpu_relax();
- spin_unlock_irqrestore(&idxd->dev_lock, flags);
+ spin_unlock_irqrestore(&idxd->cmd_lock, flags);
return 0;
}
if (idxd_device_is_halted(idxd)) {
dev_warn(&idxd->pdev->dev, "Device is HALTED!\n");
- *status = IDXD_CMDSTS_HW_ERR;
+ if (status)
+ *status = IDXD_CMDSTS_HW_ERR;
return;
}
cmd.operand = operand;
cmd.int_req = 1;
- spin_lock_irqsave(&idxd->dev_lock, flags);
+ spin_lock_irqsave(&idxd->cmd_lock, flags);
wait_event_lock_irq(idxd->cmd_waitq,
!test_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags),
- idxd->dev_lock);
+ idxd->cmd_lock);
dev_dbg(&idxd->pdev->dev, "%s: sending cmd: %#x op: %#x\n",
__func__, cmd_code, operand);
* After command submitted, release lock and go to sleep until
* the command completes via interrupt.
*/
- spin_unlock_irqrestore(&idxd->dev_lock, flags);
+ spin_unlock_irqrestore(&idxd->cmd_lock, flags);
wait_for_completion(&done);
- spin_lock_irqsave(&idxd->dev_lock, flags);
+ spin_lock_irqsave(&idxd->cmd_lock, flags);
if (status) {
*status = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET);
idxd->cmd_status = *status & GENMASK(7, 0);
__clear_bit(IDXD_FLAG_CMD_RUNNING, &idxd->flags);
/* Wake up other pending commands */
wake_up(&idxd->cmd_waitq);
- spin_unlock_irqrestore(&idxd->dev_lock, flags);
+ spin_unlock_irqrestore(&idxd->cmd_lock, flags);
}
int idxd_device_enable(struct idxd_device *idxd)
lockdep_assert_held(&idxd->dev_lock);
for (i = 0; i < idxd->max_wqs; i++) {
- struct idxd_wq *wq = &idxd->wqs[i];
+ struct idxd_wq *wq = idxd->wqs[i];
if (wq->state == IDXD_WQ_ENABLED) {
idxd_wq_disable_cleanup(wq);
dev_dbg(dev, "pasid %d drained\n", pasid);
}
+int idxd_device_request_int_handle(struct idxd_device *idxd, int idx, int *handle,
+ enum idxd_interrupt_type irq_type)
+{
+ struct device *dev = &idxd->pdev->dev;
+ u32 operand, status;
+
+ if (!(idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)))
+ return -EOPNOTSUPP;
+
+ dev_dbg(dev, "get int handle, idx %d\n", idx);
+
+ operand = idx & GENMASK(15, 0);
+ if (irq_type == IDXD_IRQ_IMS)
+ operand |= CMD_INT_HANDLE_IMS;
+
+ dev_dbg(dev, "cmd: %u operand: %#x\n", IDXD_CMD_REQUEST_INT_HANDLE, operand);
+
+ idxd_cmd_exec(idxd, IDXD_CMD_REQUEST_INT_HANDLE, operand, &status);
+
+ if ((status & IDXD_CMDSTS_ERR_MASK) != IDXD_CMDSTS_SUCCESS) {
+ dev_dbg(dev, "request int handle failed: %#x\n", status);
+ return -ENXIO;
+ }
+
+ *handle = (status >> IDXD_CMDSTS_RES_SHIFT) & GENMASK(15, 0);
+
+ dev_dbg(dev, "int handle acquired: %u\n", *handle);
+ return 0;
+}
+
+int idxd_device_release_int_handle(struct idxd_device *idxd, int handle,
+ enum idxd_interrupt_type irq_type)
+{
+ struct device *dev = &idxd->pdev->dev;
+ u32 operand, status;
+ union idxd_command_reg cmd;
+ unsigned long flags;
+
+ if (!(idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)))
+ return -EOPNOTSUPP;
+
+ dev_dbg(dev, "release int handle, handle %d\n", handle);
+
+ memset(&cmd, 0, sizeof(cmd));
+ operand = handle & GENMASK(15, 0);
+
+ if (irq_type == IDXD_IRQ_IMS)
+ operand |= CMD_INT_HANDLE_IMS;
+
+ cmd.cmd = IDXD_CMD_RELEASE_INT_HANDLE;
+ cmd.operand = operand;
+
+ dev_dbg(dev, "cmd: %u operand: %#x\n", IDXD_CMD_RELEASE_INT_HANDLE, operand);
+
+ spin_lock_irqsave(&idxd->cmd_lock, flags);
+ iowrite32(cmd.bits, idxd->reg_base + IDXD_CMD_OFFSET);
+
+ while (ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET) & IDXD_CMDSTS_ACTIVE)
+ cpu_relax();
+ status = ioread32(idxd->reg_base + IDXD_CMDSTS_OFFSET);
+ spin_unlock_irqrestore(&idxd->cmd_lock, flags);
+
+ if ((status & IDXD_CMDSTS_ERR_MASK) != IDXD_CMDSTS_SUCCESS) {
+ dev_dbg(dev, "release int handle failed: %#x\n", status);
+ return -ENXIO;
+ }
+
+ dev_dbg(dev, "int handle released.\n");
+ return 0;
+}
+
/* Device configuration bits */
void idxd_msix_perm_setup(struct idxd_device *idxd)
{
ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET));
for (i = 0; i < idxd->max_groups; i++) {
- struct idxd_group *group = &idxd->groups[i];
+ struct idxd_group *group = idxd->groups[i];
idxd_group_config_write(group);
}
int i, rc;
for (i = 0; i < idxd->max_wqs; i++) {
- struct idxd_wq *wq = &idxd->wqs[i];
+ struct idxd_wq *wq = idxd->wqs[i];
rc = idxd_wq_config_write(wq);
if (rc < 0)
/* TC-A 0 and TC-B 1 should be defaults */
for (i = 0; i < idxd->max_groups; i++) {
- struct idxd_group *group = &idxd->groups[i];
+ struct idxd_group *group = idxd->groups[i];
if (group->tc_a == -1)
group->tc_a = group->grpcfg.flags.tc_a = 0;
struct idxd_group *group;
for (i = 0; i < idxd->max_groups; i++) {
- group = &idxd->groups[i];
+ group = idxd->groups[i];
group->grpcfg.engines = 0;
}
for (i = 0; i < idxd->max_engines; i++) {
- eng = &idxd->engines[i];
+ eng = idxd->engines[i];
group = eng->group;
if (!group)
struct device *dev = &idxd->pdev->dev;
for (i = 0; i < idxd->max_groups; i++) {
- group = &idxd->groups[i];
+ group = idxd->groups[i];
for (j = 0; j < 4; j++)
group->grpcfg.wqs[j] = 0;
}
for (i = 0; i < idxd->max_wqs; i++) {
- wq = &idxd->wqs[i];
+ wq = idxd->wqs[i];
group = wq->group;
if (!wq->group)
return 0;
}
+
+static int idxd_wq_load_config(struct idxd_wq *wq)
+{
+ struct idxd_device *idxd = wq->idxd;
+ struct device *dev = &idxd->pdev->dev;
+ int wqcfg_offset;
+ int i;
+
+ wqcfg_offset = WQCFG_OFFSET(idxd, wq->id, 0);
+ memcpy_fromio(wq->wqcfg, idxd->reg_base + wqcfg_offset, idxd->wqcfg_size);
+
+ wq->size = wq->wqcfg->wq_size;
+ wq->threshold = wq->wqcfg->wq_thresh;
+ if (wq->wqcfg->priv)
+ wq->type = IDXD_WQT_KERNEL;
+
+ /* The driver does not support shared WQ mode in read-only config yet */
+ if (wq->wqcfg->mode == 0 || wq->wqcfg->pasid_en)
+ return -EOPNOTSUPP;
+
+ set_bit(WQ_FLAG_DEDICATED, &wq->flags);
+
+ wq->priority = wq->wqcfg->priority;
+
+ for (i = 0; i < WQCFG_STRIDES(idxd); i++) {
+ wqcfg_offset = WQCFG_OFFSET(idxd, wq->id, i);
+ dev_dbg(dev, "WQ[%d][%d][%#x]: %#x\n", wq->id, i, wqcfg_offset, wq->wqcfg->bits[i]);
+ }
+
+ return 0;
+}
+
+static void idxd_group_load_config(struct idxd_group *group)
+{
+ struct idxd_device *idxd = group->idxd;
+ struct device *dev = &idxd->pdev->dev;
+ int i, j, grpcfg_offset;
+
+ /*
+ * Load WQS bit fields
+ * Iterate through all 256 bits 64 bits at a time
+ */
+ for (i = 0; i < GRPWQCFG_STRIDES; i++) {
+ struct idxd_wq *wq;
+
+ grpcfg_offset = GRPWQCFG_OFFSET(idxd, group->id, i);
+ group->grpcfg.wqs[i] = ioread64(idxd->reg_base + grpcfg_offset);
+ dev_dbg(dev, "GRPCFG wq[%d:%d: %#x]: %#llx\n",
+ group->id, i, grpcfg_offset, group->grpcfg.wqs[i]);
+
+ if (i * 64 >= idxd->max_wqs)
+ break;
+
+ /* Iterate through all 64 bits and check for wq set */
+ for (j = 0; j < 64; j++) {
+ int id = i * 64 + j;
+
+ /* No need to check beyond max wqs */
+ if (id >= idxd->max_wqs)
+ break;
+
+ /* Set group assignment for wq if wq bit is set */
+ if (group->grpcfg.wqs[i] & BIT(j)) {
+ wq = idxd->wqs[id];
+ wq->group = group;
+ }
+ }
+ }
+
+ grpcfg_offset = GRPENGCFG_OFFSET(idxd, group->id);
+ group->grpcfg.engines = ioread64(idxd->reg_base + grpcfg_offset);
+ dev_dbg(dev, "GRPCFG engs[%d: %#x]: %#llx\n", group->id,
+ grpcfg_offset, group->grpcfg.engines);
+
+ /* Iterate through all 64 bits to check engines set */
+ for (i = 0; i < 64; i++) {
+ if (i >= idxd->max_engines)
+ break;
+
+ if (group->grpcfg.engines & BIT(i)) {
+ struct idxd_engine *engine = idxd->engines[i];
+
+ engine->group = group;
+ }
+ }
+
+ grpcfg_offset = GRPFLGCFG_OFFSET(idxd, group->id);
+ group->grpcfg.flags.bits = ioread32(idxd->reg_base + grpcfg_offset);
+ dev_dbg(dev, "GRPFLAGS flags[%d: %#x]: %#x\n",
+ group->id, grpcfg_offset, group->grpcfg.flags.bits);
+}
+
+int idxd_device_load_config(struct idxd_device *idxd)
+{
+ union gencfg_reg reg;
+ int i, rc;
+
+ reg.bits = ioread32(idxd->reg_base + IDXD_GENCFG_OFFSET);
+ idxd->token_limit = reg.token_limit;
+
+ for (i = 0; i < idxd->max_groups; i++) {
+ struct idxd_group *group = idxd->groups[i];
+
+ idxd_group_load_config(group);
+ }
+
+ for (i = 0; i < idxd->max_wqs; i++) {
+ struct idxd_wq *wq = idxd->wqs[i];
+
+ rc = idxd_wq_load_config(wq);
+ if (rc < 0)
+ return rc;
+ }
+
+ return 0;
+}
static inline struct idxd_wq *to_idxd_wq(struct dma_chan *c)
{
- return container_of(c, struct idxd_wq, dma_chan);
+ struct idxd_dma_chan *idxd_chan;
+
+ idxd_chan = container_of(c, struct idxd_dma_chan, chan);
+ return idxd_chan->wq;
}
void idxd_dma_complete_txd(struct idxd_desc *desc,
{
}
-dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+static dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx)
{
struct dma_chan *c = tx->chan;
struct idxd_wq *wq = to_idxd_wq(c);
static void idxd_dma_release(struct dma_device *device)
{
+ struct idxd_dma_dev *idxd_dma = container_of(device, struct idxd_dma_dev, dma);
+
+ kfree(idxd_dma);
}
int idxd_register_dma_device(struct idxd_device *idxd)
{
- struct dma_device *dma = &idxd->dma_dev;
+ struct idxd_dma_dev *idxd_dma;
+ struct dma_device *dma;
+ struct device *dev = &idxd->pdev->dev;
+ int rc;
+ idxd_dma = kzalloc_node(sizeof(*idxd_dma), GFP_KERNEL, dev_to_node(dev));
+ if (!idxd_dma)
+ return -ENOMEM;
+
+ dma = &idxd_dma->dma;
INIT_LIST_HEAD(&dma->channels);
- dma->dev = &idxd->pdev->dev;
+ dma->dev = dev;
dma_cap_set(DMA_PRIVATE, dma->cap_mask);
dma_cap_set(DMA_COMPLETION_NO_ORDER, dma->cap_mask);
dma->device_alloc_chan_resources = idxd_dma_alloc_chan_resources;
dma->device_free_chan_resources = idxd_dma_free_chan_resources;
- return dma_async_device_register(&idxd->dma_dev);
+ rc = dma_async_device_register(dma);
+ if (rc < 0) {
+ kfree(idxd_dma);
+ return rc;
+ }
+
+ idxd_dma->idxd = idxd;
+ /*
+ * This pointer is protected by the refs taken by the dma_chan. It will remain valid
+ * as long as there are outstanding channels.
+ */
+ idxd->idxd_dma = idxd_dma;
+ return 0;
}
void idxd_unregister_dma_device(struct idxd_device *idxd)
{
- dma_async_device_unregister(&idxd->dma_dev);
+ dma_async_device_unregister(&idxd->idxd_dma->dma);
}
int idxd_register_dma_channel(struct idxd_wq *wq)
{
struct idxd_device *idxd = wq->idxd;
- struct dma_device *dma = &idxd->dma_dev;
- struct dma_chan *chan = &wq->dma_chan;
- int rc;
+ struct dma_device *dma = &idxd->idxd_dma->dma;
+ struct device *dev = &idxd->pdev->dev;
+ struct idxd_dma_chan *idxd_chan;
+ struct dma_chan *chan;
+ int rc, i;
+
+ idxd_chan = kzalloc_node(sizeof(*idxd_chan), GFP_KERNEL, dev_to_node(dev));
+ if (!idxd_chan)
+ return -ENOMEM;
- memset(&wq->dma_chan, 0, sizeof(struct dma_chan));
+ chan = &idxd_chan->chan;
chan->device = dma;
list_add_tail(&chan->device_node, &dma->channels);
+
+ for (i = 0; i < wq->num_descs; i++) {
+ struct idxd_desc *desc = wq->descs[i];
+
+ dma_async_tx_descriptor_init(&desc->txd, chan);
+ desc->txd.tx_submit = idxd_dma_tx_submit;
+ }
+
rc = dma_async_device_channel_register(dma, chan);
- if (rc < 0)
+ if (rc < 0) {
+ kfree(idxd_chan);
return rc;
+ }
+
+ wq->idxd_chan = idxd_chan;
+ idxd_chan->wq = wq;
+ get_device(&wq->conf_dev);
return 0;
}
void idxd_unregister_dma_channel(struct idxd_wq *wq)
{
- struct dma_chan *chan = &wq->dma_chan;
+ struct idxd_dma_chan *idxd_chan = wq->idxd_chan;
+ struct dma_chan *chan = &idxd_chan->chan;
+ struct idxd_dma_dev *idxd_dma = wq->idxd->idxd_dma;
- dma_async_device_channel_unregister(&wq->idxd->dma_dev, chan);
+ dma_async_device_channel_unregister(&idxd_dma->dma, chan);
list_del(&chan->device_node);
+ kfree(wq->idxd_chan);
+ wq->idxd_chan = NULL;
+ put_device(&wq->conf_dev);
}
#include <linux/percpu-rwsem.h>
#include <linux/wait.h>
#include <linux/cdev.h>
+#include <linux/idr.h>
+#include <linux/pci.h>
+#include <linux/perf_event.h>
#include "registers.h"
#define IDXD_DRIVER_VERSION "1.00"
extern struct kmem_cache *idxd_desc_pool;
+struct idxd_device;
+struct idxd_wq;
+
#define IDXD_REG_TIMEOUT 50
#define IDXD_DRAIN_TIMEOUT 5000
};
#define IDXD_NAME_SIZE 128
+#define IDXD_PMU_EVENT_MAX 64
struct idxd_device_driver {
struct device_driver drv;
struct idxd_irq_entry {
struct idxd_device *idxd;
int id;
+ int vector;
struct llist_head pending_llist;
struct list_head work_list;
/*
int tc_b;
};
+struct idxd_pmu {
+ struct idxd_device *idxd;
+
+ struct perf_event *event_list[IDXD_PMU_EVENT_MAX];
+ int n_events;
+
+ DECLARE_BITMAP(used_mask, IDXD_PMU_EVENT_MAX);
+
+ struct pmu pmu;
+ char name[IDXD_NAME_SIZE];
+ int cpu;
+
+ int n_counters;
+ int counter_width;
+ int n_event_categories;
+
+ bool per_counter_caps_supported;
+ unsigned long supported_event_categories;
+
+ unsigned long supported_filters;
+ int n_filters;
+
+ struct hlist_node cpuhp_node;
+};
+
#define IDXD_MAX_PRIORITY 0xf
enum idxd_wq_state {
};
struct idxd_cdev {
+ struct idxd_wq *wq;
struct cdev cdev;
- struct device *dev;
+ struct device dev;
int minor;
- struct wait_queue_head err_queue;
};
#define IDXD_ALLOCATED_BATCH_SIZE 128U
IDXD_COMPLETE_DEV_FAIL,
};
+struct idxd_dma_chan {
+ struct dma_chan chan;
+ struct idxd_wq *wq;
+};
+
struct idxd_wq {
void __iomem *portal;
+ struct percpu_ref wq_active;
+ struct completion wq_dead;
struct device conf_dev;
- struct idxd_cdev idxd_cdev;
+ struct idxd_cdev *idxd_cdev;
+ struct wait_queue_head err_queue;
struct idxd_device *idxd;
int id;
enum idxd_wq_type type;
int compls_size;
struct idxd_desc **descs;
struct sbitmap_queue sbq;
- struct dma_chan dma_chan;
+ struct idxd_dma_chan *idxd_chan;
char name[WQ_NAME_SIZE + 1];
u64 max_xfer_bytes;
u32 max_batch_size;
union group_cap_reg group_cap;
union engine_cap_reg engine_cap;
struct opcap opcap;
+ u32 cmd_cap;
};
enum idxd_device_state {
IDXD_FLAG_PASID_ENABLED,
};
-struct idxd_device {
+struct idxd_dma_dev {
+ struct idxd_device *idxd;
+ struct dma_device dma;
+};
+
+struct idxd_driver_data {
+ const char *name_prefix;
enum idxd_type type;
+ struct device_type *dev_type;
+ int compl_size;
+ int align;
+};
+
+struct idxd_device {
struct device conf_dev;
+ struct idxd_driver_data *data;
struct list_head list;
struct idxd_hw hw;
enum idxd_device_state state;
void __iomem *reg_base;
spinlock_t dev_lock; /* spinlock for device */
+ spinlock_t cmd_lock; /* spinlock for device commands */
struct completion *cmd_done;
- struct idxd_group *groups;
- struct idxd_wq *wqs;
- struct idxd_engine *engines;
+ struct idxd_group **groups;
+ struct idxd_wq **wqs;
+ struct idxd_engine **engines;
struct iommu_sva *sva;
unsigned int pasid;
int token_limit;
int nr_tokens; /* non-reserved tokens */
unsigned int wqcfg_size;
- int compl_size;
union sw_err_reg sw_err;
wait_queue_head_t cmd_waitq;
- struct msix_entry *msix_entries;
int num_wq_irqs;
struct idxd_irq_entry *irq_entries;
- struct dma_device dma_dev;
+ struct idxd_dma_dev *idxd_dma;
struct workqueue_struct *wq;
struct work_struct work;
+
+ int *int_handles;
+
+ struct idxd_pmu *idxd_pmu;
};
/* IDXD software descriptor */
struct list_head list;
int id;
int cpu;
+ unsigned int vector;
struct idxd_wq *wq;
};
extern struct bus_type iax_bus_type;
extern bool support_enqcmd;
+extern struct ida idxd_ida;
+extern struct device_type dsa_device_type;
+extern struct device_type iax_device_type;
+extern struct device_type idxd_wq_device_type;
+extern struct device_type idxd_engine_device_type;
+extern struct device_type idxd_group_device_type;
+
+static inline bool is_dsa_dev(struct device *dev)
+{
+ return dev->type == &dsa_device_type;
+}
+
+static inline bool is_iax_dev(struct device *dev)
+{
+ return dev->type == &iax_device_type;
+}
+
+static inline bool is_idxd_dev(struct device *dev)
+{
+ return is_dsa_dev(dev) || is_iax_dev(dev);
+}
+
+static inline bool is_idxd_wq_dev(struct device *dev)
+{
+ return dev->type == &idxd_wq_device_type;
+}
+
+static inline bool is_idxd_wq_dmaengine(struct idxd_wq *wq)
+{
+ if (wq->type == IDXD_WQT_KERNEL && strcmp(wq->name, "dmaengine") == 0)
+ return true;
+ return false;
+}
+
+static inline bool is_idxd_wq_cdev(struct idxd_wq *wq)
+{
+ return wq->type == IDXD_WQT_USER;
+}
static inline bool wq_dedicated(struct idxd_wq *wq)
{
IDXD_PORTAL_LIMITED,
};
+enum idxd_interrupt_type {
+ IDXD_IRQ_MSIX = 0,
+ IDXD_IRQ_IMS,
+};
+
static inline int idxd_get_wq_portal_offset(enum idxd_portal_prot prot)
{
return prot * 0x1000;
return ((wq_id * 4) << PAGE_SHIFT) + idxd_get_wq_portal_offset(prot);
}
-static inline void idxd_set_type(struct idxd_device *idxd)
-{
- struct pci_dev *pdev = idxd->pdev;
-
- if (pdev->device == PCI_DEVICE_ID_INTEL_DSA_SPR0)
- idxd->type = IDXD_TYPE_DSA;
- else if (pdev->device == PCI_DEVICE_ID_INTEL_IAX_SPR0)
- idxd->type = IDXD_TYPE_IAX;
- else
- idxd->type = IDXD_TYPE_UNKNOWN;
-}
-
static inline void idxd_wq_get(struct idxd_wq *wq)
{
wq->client_count++;
return wq->client_count;
};
-const char *idxd_get_dev_name(struct idxd_device *idxd);
int idxd_register_bus_type(void);
void idxd_unregister_bus_type(void);
-int idxd_setup_sysfs(struct idxd_device *idxd);
-void idxd_cleanup_sysfs(struct idxd_device *idxd);
+int idxd_register_devices(struct idxd_device *idxd);
+void idxd_unregister_devices(struct idxd_device *idxd);
int idxd_register_driver(void);
void idxd_unregister_driver(void);
-struct bus_type *idxd_get_bus_type(struct idxd_device *idxd);
+void idxd_wqs_quiesce(struct idxd_device *idxd);
/* device interrupt control */
void idxd_msix_perm_setup(struct idxd_device *idxd);
void idxd_msix_perm_clear(struct idxd_device *idxd);
-irqreturn_t idxd_irq_handler(int vec, void *data);
irqreturn_t idxd_misc_thread(int vec, void *data);
irqreturn_t idxd_wq_thread(int irq, void *data);
void idxd_mask_error_interrupts(struct idxd_device *idxd);
int idxd_device_config(struct idxd_device *idxd);
void idxd_device_wqs_clear_state(struct idxd_device *idxd);
void idxd_device_drain_pasid(struct idxd_device *idxd, int pasid);
+int idxd_device_load_config(struct idxd_device *idxd);
+int idxd_device_request_int_handle(struct idxd_device *idxd, int idx, int *handle,
+ enum idxd_interrupt_type irq_type);
+int idxd_device_release_int_handle(struct idxd_device *idxd, int handle,
+ enum idxd_interrupt_type irq_type);
/* work queue control */
+void idxd_wqs_unmap_portal(struct idxd_device *idxd);
int idxd_wq_alloc_resources(struct idxd_wq *wq);
void idxd_wq_free_resources(struct idxd_wq *wq);
int idxd_wq_enable(struct idxd_wq *wq);
void idxd_wq_disable_cleanup(struct idxd_wq *wq);
int idxd_wq_set_pasid(struct idxd_wq *wq, int pasid);
int idxd_wq_disable_pasid(struct idxd_wq *wq);
+void idxd_wq_quiesce(struct idxd_wq *wq);
+int idxd_wq_init_percpu_ref(struct idxd_wq *wq);
/* submission */
int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc);
void idxd_parse_completion_status(u8 status, enum dmaengine_tx_result *res);
void idxd_dma_complete_txd(struct idxd_desc *desc,
enum idxd_complete_type comp_type);
-dma_cookie_t idxd_dma_tx_submit(struct dma_async_tx_descriptor *tx);
/* cdev */
int idxd_cdev_register(void);
int idxd_wq_add_cdev(struct idxd_wq *wq);
void idxd_wq_del_cdev(struct idxd_wq *wq);
+/* perfmon */
+#if IS_ENABLED(CONFIG_INTEL_IDXD_PERFMON)
+int perfmon_pmu_init(struct idxd_device *idxd);
+void perfmon_pmu_remove(struct idxd_device *idxd);
+void perfmon_counter_overflow(struct idxd_device *idxd);
+void perfmon_init(void);
+void perfmon_exit(void);
+#else
+static inline int perfmon_pmu_init(struct idxd_device *idxd) { return 0; }
+static inline void perfmon_pmu_remove(struct idxd_device *idxd) {}
+static inline void perfmon_counter_overflow(struct idxd_device *idxd) {}
+static inline void perfmon_init(void) {}
+static inline void perfmon_exit(void) {}
+#endif
+
#endif
#include "../dmaengine.h"
#include "registers.h"
#include "idxd.h"
+#include "perfmon.h"
MODULE_VERSION(IDXD_DRIVER_VERSION);
MODULE_LICENSE("GPL v2");
#define DRV_NAME "idxd"
bool support_enqcmd;
-
-static struct idr idxd_idrs[IDXD_TYPE_MAX];
-static DEFINE_MUTEX(idxd_idr_lock);
+DEFINE_IDA(idxd_ida);
+
+static struct idxd_driver_data idxd_driver_data[] = {
+ [IDXD_TYPE_DSA] = {
+ .name_prefix = "dsa",
+ .type = IDXD_TYPE_DSA,
+ .compl_size = sizeof(struct dsa_completion_record),
+ .align = 32,
+ .dev_type = &dsa_device_type,
+ },
+ [IDXD_TYPE_IAX] = {
+ .name_prefix = "iax",
+ .type = IDXD_TYPE_IAX,
+ .compl_size = sizeof(struct iax_completion_record),
+ .align = 64,
+ .dev_type = &iax_device_type,
+ },
+};
static struct pci_device_id idxd_pci_tbl[] = {
/* DSA ver 1.0 platforms */
- { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_DSA_SPR0) },
+ { PCI_DEVICE_DATA(INTEL, DSA_SPR0, &idxd_driver_data[IDXD_TYPE_DSA]) },
/* IAX ver 1.0 platforms */
- { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_IAX_SPR0) },
+ { PCI_DEVICE_DATA(INTEL, IAX_SPR0, &idxd_driver_data[IDXD_TYPE_IAX]) },
{ 0, }
};
MODULE_DEVICE_TABLE(pci, idxd_pci_tbl);
-static char *idxd_name[] = {
- "dsa",
- "iax"
-};
-
-const char *idxd_get_dev_name(struct idxd_device *idxd)
-{
- return idxd_name[idxd->type];
-}
-
static int idxd_setup_interrupts(struct idxd_device *idxd)
{
struct pci_dev *pdev = idxd->pdev;
struct device *dev = &pdev->dev;
- struct msix_entry *msix;
struct idxd_irq_entry *irq_entry;
int i, msixcnt;
int rc = 0;
msixcnt = pci_msix_vec_count(pdev);
if (msixcnt < 0) {
dev_err(dev, "Not MSI-X interrupt capable.\n");
- goto err_no_irq;
+ return -ENOSPC;
}
- idxd->msix_entries = devm_kzalloc(dev, sizeof(struct msix_entry) *
- msixcnt, GFP_KERNEL);
- if (!idxd->msix_entries) {
- rc = -ENOMEM;
- goto err_no_irq;
- }
-
- for (i = 0; i < msixcnt; i++)
- idxd->msix_entries[i].entry = i;
-
- rc = pci_enable_msix_exact(pdev, idxd->msix_entries, msixcnt);
- if (rc) {
- dev_err(dev, "Failed enabling %d MSIX entries.\n", msixcnt);
- goto err_no_irq;
+ rc = pci_alloc_irq_vectors(pdev, msixcnt, msixcnt, PCI_IRQ_MSIX);
+ if (rc != msixcnt) {
+ dev_err(dev, "Failed enabling %d MSIX entries: %d\n", msixcnt, rc);
+ return -ENOSPC;
}
dev_dbg(dev, "Enabled %d msix vectors\n", msixcnt);
* We implement 1 completion list per MSI-X entry except for
* entry 0, which is for errors and others.
*/
- idxd->irq_entries = devm_kcalloc(dev, msixcnt,
- sizeof(struct idxd_irq_entry),
- GFP_KERNEL);
+ idxd->irq_entries = kcalloc_node(msixcnt, sizeof(struct idxd_irq_entry),
+ GFP_KERNEL, dev_to_node(dev));
if (!idxd->irq_entries) {
rc = -ENOMEM;
- goto err_no_irq;
+ goto err_irq_entries;
}
for (i = 0; i < msixcnt; i++) {
idxd->irq_entries[i].id = i;
idxd->irq_entries[i].idxd = idxd;
+ idxd->irq_entries[i].vector = pci_irq_vector(pdev, i);
spin_lock_init(&idxd->irq_entries[i].list_lock);
}
- msix = &idxd->msix_entries[0];
irq_entry = &idxd->irq_entries[0];
- rc = devm_request_threaded_irq(dev, msix->vector, idxd_irq_handler,
- idxd_misc_thread, 0, "idxd-misc",
- irq_entry);
+ rc = request_threaded_irq(irq_entry->vector, NULL, idxd_misc_thread,
+ 0, "idxd-misc", irq_entry);
if (rc < 0) {
dev_err(dev, "Failed to allocate misc interrupt.\n");
- goto err_no_irq;
+ goto err_misc_irq;
}
- dev_dbg(dev, "Allocated idxd-misc handler on msix vector %d\n",
- msix->vector);
+ dev_dbg(dev, "Allocated idxd-misc handler on msix vector %d\n", irq_entry->vector);
/* first MSI-X entry is not for wq interrupts */
idxd->num_wq_irqs = msixcnt - 1;
for (i = 1; i < msixcnt; i++) {
- msix = &idxd->msix_entries[i];
irq_entry = &idxd->irq_entries[i];
init_llist_head(&idxd->irq_entries[i].pending_llist);
INIT_LIST_HEAD(&idxd->irq_entries[i].work_list);
- rc = devm_request_threaded_irq(dev, msix->vector,
- idxd_irq_handler,
- idxd_wq_thread, 0,
- "idxd-portal", irq_entry);
+ rc = request_threaded_irq(irq_entry->vector, NULL,
+ idxd_wq_thread, 0, "idxd-portal", irq_entry);
if (rc < 0) {
- dev_err(dev, "Failed to allocate irq %d.\n",
- msix->vector);
- goto err_no_irq;
+ dev_err(dev, "Failed to allocate irq %d.\n", irq_entry->vector);
+ goto err_wq_irqs;
+ }
+
+ dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n", i, irq_entry->vector);
+ if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) {
+ /*
+ * The MSIX vector enumeration starts at 1 with vector 0 being the
+ * misc interrupt that handles non I/O completion events. The
+ * interrupt handles are for IMS enumeration on guest. The misc
+ * interrupt vector does not require a handle and therefore we start
+ * the int_handles at index 0. Since 'i' starts at 1, the first
+ * int_handles index will be 0.
+ */
+ rc = idxd_device_request_int_handle(idxd, i, &idxd->int_handles[i - 1],
+ IDXD_IRQ_MSIX);
+ if (rc < 0) {
+ free_irq(irq_entry->vector, irq_entry);
+ goto err_wq_irqs;
+ }
+ dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i - 1]);
}
- dev_dbg(dev, "Allocated idxd-msix %d for vector %d\n",
- i, msix->vector);
}
idxd_unmask_error_interrupts(idxd);
idxd_msix_perm_setup(idxd);
return 0;
- err_no_irq:
+ err_wq_irqs:
+ while (--i >= 0) {
+ irq_entry = &idxd->irq_entries[i];
+ free_irq(irq_entry->vector, irq_entry);
+ if (i != 0)
+ idxd_device_release_int_handle(idxd,
+ idxd->int_handles[i], IDXD_IRQ_MSIX);
+ }
+ err_misc_irq:
/* Disable error interrupt generation */
idxd_mask_error_interrupts(idxd);
- pci_disable_msix(pdev);
+ err_irq_entries:
+ pci_free_irq_vectors(pdev);
dev_err(dev, "No usable interrupts\n");
return rc;
}
-static int idxd_setup_internals(struct idxd_device *idxd)
+static int idxd_setup_wqs(struct idxd_device *idxd)
{
struct device *dev = &idxd->pdev->dev;
- int i;
+ struct idxd_wq *wq;
+ int i, rc;
- init_waitqueue_head(&idxd->cmd_waitq);
- idxd->groups = devm_kcalloc(dev, idxd->max_groups,
- sizeof(struct idxd_group), GFP_KERNEL);
- if (!idxd->groups)
- return -ENOMEM;
-
- for (i = 0; i < idxd->max_groups; i++) {
- idxd->groups[i].idxd = idxd;
- idxd->groups[i].id = i;
- idxd->groups[i].tc_a = -1;
- idxd->groups[i].tc_b = -1;
- }
-
- idxd->wqs = devm_kcalloc(dev, idxd->max_wqs, sizeof(struct idxd_wq),
- GFP_KERNEL);
+ idxd->wqs = kcalloc_node(idxd->max_wqs, sizeof(struct idxd_wq *),
+ GFP_KERNEL, dev_to_node(dev));
if (!idxd->wqs)
return -ENOMEM;
- idxd->engines = devm_kcalloc(dev, idxd->max_engines,
- sizeof(struct idxd_engine), GFP_KERNEL);
- if (!idxd->engines)
- return -ENOMEM;
-
for (i = 0; i < idxd->max_wqs; i++) {
- struct idxd_wq *wq = &idxd->wqs[i];
+ wq = kzalloc_node(sizeof(*wq), GFP_KERNEL, dev_to_node(dev));
+ if (!wq) {
+ rc = -ENOMEM;
+ goto err;
+ }
wq->id = i;
wq->idxd = idxd;
+ device_initialize(&wq->conf_dev);
+ wq->conf_dev.parent = &idxd->conf_dev;
+ wq->conf_dev.bus = &dsa_bus_type;
+ wq->conf_dev.type = &idxd_wq_device_type;
+ rc = dev_set_name(&wq->conf_dev, "wq%d.%d", idxd->id, wq->id);
+ if (rc < 0) {
+ put_device(&wq->conf_dev);
+ goto err;
+ }
+
mutex_init(&wq->wq_lock);
- wq->idxd_cdev.minor = -1;
+ init_waitqueue_head(&wq->err_queue);
+ init_completion(&wq->wq_dead);
wq->max_xfer_bytes = idxd->max_xfer_bytes;
wq->max_batch_size = idxd->max_batch_size;
- wq->wqcfg = devm_kzalloc(dev, idxd->wqcfg_size, GFP_KERNEL);
- if (!wq->wqcfg)
- return -ENOMEM;
+ wq->wqcfg = kzalloc_node(idxd->wqcfg_size, GFP_KERNEL, dev_to_node(dev));
+ if (!wq->wqcfg) {
+ put_device(&wq->conf_dev);
+ rc = -ENOMEM;
+ goto err;
+ }
+ idxd->wqs[i] = wq;
}
+ return 0;
+
+ err:
+ while (--i >= 0)
+ put_device(&idxd->wqs[i]->conf_dev);
+ return rc;
+}
+
+static int idxd_setup_engines(struct idxd_device *idxd)
+{
+ struct idxd_engine *engine;
+ struct device *dev = &idxd->pdev->dev;
+ int i, rc;
+
+ idxd->engines = kcalloc_node(idxd->max_engines, sizeof(struct idxd_engine *),
+ GFP_KERNEL, dev_to_node(dev));
+ if (!idxd->engines)
+ return -ENOMEM;
+
for (i = 0; i < idxd->max_engines; i++) {
- idxd->engines[i].idxd = idxd;
- idxd->engines[i].id = i;
+ engine = kzalloc_node(sizeof(*engine), GFP_KERNEL, dev_to_node(dev));
+ if (!engine) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ engine->id = i;
+ engine->idxd = idxd;
+ device_initialize(&engine->conf_dev);
+ engine->conf_dev.parent = &idxd->conf_dev;
+ engine->conf_dev.type = &idxd_engine_device_type;
+ rc = dev_set_name(&engine->conf_dev, "engine%d.%d", idxd->id, engine->id);
+ if (rc < 0) {
+ put_device(&engine->conf_dev);
+ goto err;
+ }
+
+ idxd->engines[i] = engine;
}
- idxd->wq = create_workqueue(dev_name(dev));
- if (!idxd->wq)
+ return 0;
+
+ err:
+ while (--i >= 0)
+ put_device(&idxd->engines[i]->conf_dev);
+ return rc;
+}
+
+static int idxd_setup_groups(struct idxd_device *idxd)
+{
+ struct device *dev = &idxd->pdev->dev;
+ struct idxd_group *group;
+ int i, rc;
+
+ idxd->groups = kcalloc_node(idxd->max_groups, sizeof(struct idxd_group *),
+ GFP_KERNEL, dev_to_node(dev));
+ if (!idxd->groups)
return -ENOMEM;
+ for (i = 0; i < idxd->max_groups; i++) {
+ group = kzalloc_node(sizeof(*group), GFP_KERNEL, dev_to_node(dev));
+ if (!group) {
+ rc = -ENOMEM;
+ goto err;
+ }
+
+ group->id = i;
+ group->idxd = idxd;
+ device_initialize(&group->conf_dev);
+ group->conf_dev.parent = &idxd->conf_dev;
+ group->conf_dev.bus = &dsa_bus_type;
+ group->conf_dev.type = &idxd_group_device_type;
+ rc = dev_set_name(&group->conf_dev, "group%d.%d", idxd->id, group->id);
+ if (rc < 0) {
+ put_device(&group->conf_dev);
+ goto err;
+ }
+
+ idxd->groups[i] = group;
+ group->tc_a = -1;
+ group->tc_b = -1;
+ }
+
+ return 0;
+
+ err:
+ while (--i >= 0)
+ put_device(&idxd->groups[i]->conf_dev);
+ return rc;
+}
+
+static int idxd_setup_internals(struct idxd_device *idxd)
+{
+ struct device *dev = &idxd->pdev->dev;
+ int rc, i;
+
+ init_waitqueue_head(&idxd->cmd_waitq);
+
+ if (idxd->hw.cmd_cap & BIT(IDXD_CMD_REQUEST_INT_HANDLE)) {
+ idxd->int_handles = devm_kcalloc(dev, idxd->max_wqs, sizeof(int), GFP_KERNEL);
+ if (!idxd->int_handles)
+ return -ENOMEM;
+ }
+
+ rc = idxd_setup_wqs(idxd);
+ if (rc < 0)
+ goto err_wqs;
+
+ rc = idxd_setup_engines(idxd);
+ if (rc < 0)
+ goto err_engine;
+
+ rc = idxd_setup_groups(idxd);
+ if (rc < 0)
+ goto err_group;
+
+ idxd->wq = create_workqueue(dev_name(dev));
+ if (!idxd->wq) {
+ rc = -ENOMEM;
+ goto err_wkq_create;
+ }
+
return 0;
+
+ err_wkq_create:
+ for (i = 0; i < idxd->max_groups; i++)
+ put_device(&idxd->groups[i]->conf_dev);
+ err_group:
+ for (i = 0; i < idxd->max_engines; i++)
+ put_device(&idxd->engines[i]->conf_dev);
+ err_engine:
+ for (i = 0; i < idxd->max_wqs; i++)
+ put_device(&idxd->wqs[i]->conf_dev);
+ err_wqs:
+ kfree(idxd->int_handles);
+ return rc;
}
static void idxd_read_table_offsets(struct idxd_device *idxd)
/* reading generic capabilities */
idxd->hw.gen_cap.bits = ioread64(idxd->reg_base + IDXD_GENCAP_OFFSET);
dev_dbg(dev, "gen_cap: %#llx\n", idxd->hw.gen_cap.bits);
+
+ if (idxd->hw.gen_cap.cmd_cap) {
+ idxd->hw.cmd_cap = ioread32(idxd->reg_base + IDXD_CMDCAP_OFFSET);
+ dev_dbg(dev, "cmd_cap: %#x\n", idxd->hw.cmd_cap);
+ }
+
idxd->max_xfer_bytes = 1ULL << idxd->hw.gen_cap.max_xfer_shift;
dev_dbg(dev, "max xfer size: %llu bytes\n", idxd->max_xfer_bytes);
idxd->max_batch_size = 1U << idxd->hw.gen_cap.max_batch_shift;
}
}
-static struct idxd_device *idxd_alloc(struct pci_dev *pdev)
+static struct idxd_device *idxd_alloc(struct pci_dev *pdev, struct idxd_driver_data *data)
{
struct device *dev = &pdev->dev;
struct idxd_device *idxd;
+ int rc;
- idxd = devm_kzalloc(dev, sizeof(struct idxd_device), GFP_KERNEL);
+ idxd = kzalloc_node(sizeof(*idxd), GFP_KERNEL, dev_to_node(dev));
if (!idxd)
return NULL;
idxd->pdev = pdev;
+ idxd->data = data;
+ idxd->id = ida_alloc(&idxd_ida, GFP_KERNEL);
+ if (idxd->id < 0)
+ return NULL;
+
+ device_initialize(&idxd->conf_dev);
+ idxd->conf_dev.parent = dev;
+ idxd->conf_dev.bus = &dsa_bus_type;
+ idxd->conf_dev.type = idxd->data->dev_type;
+ rc = dev_set_name(&idxd->conf_dev, "%s%d", idxd->data->name_prefix, idxd->id);
+ if (rc < 0) {
+ put_device(&idxd->conf_dev);
+ return NULL;
+ }
+
spin_lock_init(&idxd->dev_lock);
+ spin_lock_init(&idxd->cmd_lock);
return idxd;
}
dev_dbg(dev, "IDXD reset complete\n");
if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM) && sva) {
- rc = idxd_enable_system_pasid(idxd);
- if (rc < 0)
- dev_warn(dev, "Failed to enable PASID. No SVA support: %d\n", rc);
- else
- set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags);
+ rc = iommu_dev_enable_feature(dev, IOMMU_DEV_FEAT_SVA);
+ if (rc == 0) {
+ rc = idxd_enable_system_pasid(idxd);
+ if (rc < 0) {
+ iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA);
+ dev_warn(dev, "Failed to enable PASID. No SVA support: %d\n", rc);
+ } else {
+ set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags);
+ }
+ } else {
+ dev_warn(dev, "Unable to turn on SVA feature.\n");
+ }
} else if (!sva) {
dev_warn(dev, "User forced SVA off via module param.\n");
}
rc = idxd_setup_internals(idxd);
if (rc)
- goto err_setup;
+ goto err;
+
+ /* If the configs are readonly, then load them from device */
+ if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) {
+ dev_dbg(dev, "Loading RO device config\n");
+ rc = idxd_device_load_config(idxd);
+ if (rc < 0)
+ goto err;
+ }
rc = idxd_setup_interrupts(idxd);
if (rc)
- goto err_setup;
+ goto err;
dev_dbg(dev, "IDXD interrupt setup complete.\n");
- mutex_lock(&idxd_idr_lock);
- idxd->id = idr_alloc(&idxd_idrs[idxd->type], idxd, 0, 0, GFP_KERNEL);
- mutex_unlock(&idxd_idr_lock);
- if (idxd->id < 0) {
- rc = -ENOMEM;
- goto err_idr_fail;
- }
-
idxd->major = idxd_cdev_get_major(idxd);
+ rc = perfmon_pmu_init(idxd);
+ if (rc < 0)
+ dev_warn(dev, "Failed to initialize perfmon. No PMU support: %d\n", rc);
+
dev_dbg(dev, "IDXD device %d probed successfully\n", idxd->id);
return 0;
- err_idr_fail:
- idxd_mask_error_interrupts(idxd);
- idxd_mask_msix_vectors(idxd);
- err_setup:
+ err:
if (device_pasid_enabled(idxd))
idxd_disable_system_pasid(idxd);
+ iommu_dev_disable_feature(dev, IOMMU_DEV_FEAT_SVA);
return rc;
}
-static void idxd_type_init(struct idxd_device *idxd)
-{
- if (idxd->type == IDXD_TYPE_DSA)
- idxd->compl_size = sizeof(struct dsa_completion_record);
- else if (idxd->type == IDXD_TYPE_IAX)
- idxd->compl_size = sizeof(struct iax_completion_record);
-}
-
static int idxd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
{
struct device *dev = &pdev->dev;
struct idxd_device *idxd;
+ struct idxd_driver_data *data = (struct idxd_driver_data *)id->driver_data;
int rc;
- rc = pcim_enable_device(pdev);
+ rc = pci_enable_device(pdev);
if (rc)
return rc;
dev_dbg(dev, "Alloc IDXD context\n");
- idxd = idxd_alloc(pdev);
- if (!idxd)
- return -ENOMEM;
+ idxd = idxd_alloc(pdev, data);
+ if (!idxd) {
+ rc = -ENOMEM;
+ goto err_idxd_alloc;
+ }
dev_dbg(dev, "Mapping BARs\n");
- idxd->reg_base = pcim_iomap(pdev, IDXD_MMIO_BAR, 0);
- if (!idxd->reg_base)
- return -ENOMEM;
+ idxd->reg_base = pci_iomap(pdev, IDXD_MMIO_BAR, 0);
+ if (!idxd->reg_base) {
+ rc = -ENOMEM;
+ goto err_iomap;
+ }
dev_dbg(dev, "Set DMA masks\n");
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
if (rc)
rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
if (rc)
- return rc;
+ goto err;
rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
if (rc)
rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
if (rc)
- return rc;
-
- idxd_set_type(idxd);
-
- idxd_type_init(idxd);
+ goto err;
dev_dbg(dev, "Set PCI master\n");
pci_set_master(pdev);
rc = idxd_probe(idxd);
if (rc) {
dev_err(dev, "Intel(R) IDXD DMA Engine init failed\n");
- return -ENODEV;
+ goto err;
}
- rc = idxd_setup_sysfs(idxd);
+ rc = idxd_register_devices(idxd);
if (rc) {
dev_err(dev, "IDXD sysfs setup failed\n");
- return -ENODEV;
+ goto err;
}
idxd->state = IDXD_DEV_CONF_READY;
idxd->hw.version);
return 0;
+
+ err:
+ pci_iounmap(pdev, idxd->reg_base);
+ err_iomap:
+ put_device(&idxd->conf_dev);
+ err_idxd_alloc:
+ pci_disable_device(pdev);
+ return rc;
}
static void idxd_flush_pending_llist(struct idxd_irq_entry *ie)
}
}
+void idxd_wqs_quiesce(struct idxd_device *idxd)
+{
+ struct idxd_wq *wq;
+ int i;
+
+ for (i = 0; i < idxd->max_wqs; i++) {
+ wq = idxd->wqs[i];
+ if (wq->state == IDXD_WQ_ENABLED && wq->type == IDXD_WQT_KERNEL)
+ idxd_wq_quiesce(wq);
+ }
+}
+
+static void idxd_release_int_handles(struct idxd_device *idxd)
+{
+ struct device *dev = &idxd->pdev->dev;
+ int i, rc;
+
+ for (i = 0; i < idxd->num_wq_irqs; i++) {
+ if (idxd->hw.cmd_cap & BIT(IDXD_CMD_RELEASE_INT_HANDLE)) {
+ rc = idxd_device_release_int_handle(idxd, idxd->int_handles[i],
+ IDXD_IRQ_MSIX);
+ if (rc < 0)
+ dev_warn(dev, "irq handle %d release failed\n",
+ idxd->int_handles[i]);
+ else
+ dev_dbg(dev, "int handle requested: %u\n", idxd->int_handles[i]);
+ }
+ }
+}
+
static void idxd_shutdown(struct pci_dev *pdev)
{
struct idxd_device *idxd = pci_get_drvdata(pdev);
for (i = 0; i < msixcnt; i++) {
irq_entry = &idxd->irq_entries[i];
- synchronize_irq(idxd->msix_entries[i].vector);
+ synchronize_irq(irq_entry->vector);
+ free_irq(irq_entry->vector, irq_entry);
if (i == 0)
continue;
idxd_flush_pending_llist(irq_entry);
}
idxd_msix_perm_clear(idxd);
+ idxd_release_int_handles(idxd);
+ pci_free_irq_vectors(pdev);
+ pci_iounmap(pdev, idxd->reg_base);
+ pci_disable_device(pdev);
destroy_workqueue(idxd->wq);
}
struct idxd_device *idxd = pci_get_drvdata(pdev);
dev_dbg(&pdev->dev, "%s called\n", __func__);
- idxd_cleanup_sysfs(idxd);
idxd_shutdown(pdev);
if (device_pasid_enabled(idxd))
idxd_disable_system_pasid(idxd);
- mutex_lock(&idxd_idr_lock);
- idr_remove(&idxd_idrs[idxd->type], idxd->id);
- mutex_unlock(&idxd_idr_lock);
+ idxd_unregister_devices(idxd);
+ perfmon_pmu_remove(idxd);
+ iommu_dev_disable_feature(&pdev->dev, IOMMU_DEV_FEAT_SVA);
}
static struct pci_driver idxd_pci_driver = {
static int __init idxd_init_module(void)
{
- int err, i;
+ int err;
/*
* If the CPU does not support MOVDIR64B or ENQCMDS, there's no point in
else
support_enqcmd = true;
- for (i = 0; i < IDXD_TYPE_MAX; i++)
- idr_init(&idxd_idrs[i]);
+ perfmon_init();
err = idxd_register_bus_type();
if (err < 0)
pci_unregister_driver(&idxd_pci_driver);
idxd_cdev_remove();
idxd_unregister_bus_type();
+ perfmon_exit();
}
module_exit(idxd_exit_module);
goto out;
for (i = 0; i < idxd->max_wqs; i++) {
- struct idxd_wq *wq = &idxd->wqs[i];
+ struct idxd_wq *wq = idxd->wqs[i];
if (wq->state == IDXD_WQ_ENABLED) {
rc = idxd_wq_enable(wq);
return 0;
}
-irqreturn_t idxd_irq_handler(int vec, void *data)
-{
- struct idxd_irq_entry *irq_entry = data;
- struct idxd_device *idxd = irq_entry->idxd;
-
- idxd_mask_msix_vector(idxd, irq_entry->id);
- return IRQ_WAKE_THREAD;
-}
-
static int process_misc_interrupts(struct idxd_device *idxd, u32 cause)
{
struct device *dev = &idxd->pdev->dev;
if (idxd->sw_err.valid && idxd->sw_err.wq_idx_valid) {
int id = idxd->sw_err.wq_idx;
- struct idxd_wq *wq = &idxd->wqs[id];
+ struct idxd_wq *wq = idxd->wqs[id];
if (wq->type == IDXD_WQT_USER)
- wake_up_interruptible(&wq->idxd_cdev.err_queue);
+ wake_up_interruptible(&wq->err_queue);
} else {
int i;
for (i = 0; i < idxd->max_wqs; i++) {
- struct idxd_wq *wq = &idxd->wqs[i];
+ struct idxd_wq *wq = idxd->wqs[i];
if (wq->type == IDXD_WQT_USER)
- wake_up_interruptible(&wq->idxd_cdev.err_queue);
+ wake_up_interruptible(&wq->err_queue);
}
}
}
if (cause & IDXD_INTC_PERFMON_OVFL) {
- /*
- * Driver does not utilize perfmon counter overflow interrupt
- * yet.
- */
val |= IDXD_INTC_PERFMON_OVFL;
+ perfmon_counter_overflow(idxd);
}
val ^= cause;
queue_work(idxd->wq, &idxd->work);
} else {
spin_lock_bh(&idxd->dev_lock);
+ idxd_wqs_quiesce(idxd);
+ idxd_wqs_unmap_portal(idxd);
idxd_device_wqs_clear_state(idxd);
dev_err(&idxd->pdev->dev,
"idxd halted, need %s.\n",
iowrite32(cause, idxd->reg_base + IDXD_INTCAUSE_OFFSET);
}
- idxd_unmask_msix_vector(idxd, irq_entry->id);
return IRQ_HANDLED;
}
int processed;
processed = idxd_desc_process(irq_entry);
- idxd_unmask_msix_vector(irq_entry->idxd, irq_entry->id);
-
if (processed == 0)
return IRQ_NONE;
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2020 Intel Corporation. All rights rsvd. */
+
+#include <linux/sched/task.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include "idxd.h"
+#include "perfmon.h"
+
+static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
+ char *buf);
+
+static cpumask_t perfmon_dsa_cpu_mask;
+static bool cpuhp_set_up;
+static enum cpuhp_state cpuhp_slot;
+
+/*
+ * perf userspace reads this attribute to determine which cpus to open
+ * counters on. It's connected to perfmon_dsa_cpu_mask, which is
+ * maintained by the cpu hotplug handlers.
+ */
+static DEVICE_ATTR_RO(cpumask);
+
+static struct attribute *perfmon_cpumask_attrs[] = {
+ &dev_attr_cpumask.attr,
+ NULL,
+};
+
+static struct attribute_group cpumask_attr_group = {
+ .attrs = perfmon_cpumask_attrs,
+};
+
+/*
+ * These attributes specify the bits in the config word that the perf
+ * syscall uses to pass the event ids and categories to perfmon.
+ */
+DEFINE_PERFMON_FORMAT_ATTR(event_category, "config:0-3");
+DEFINE_PERFMON_FORMAT_ATTR(event, "config:4-31");
+
+/*
+ * These attributes specify the bits in the config1 word that the perf
+ * syscall uses to pass filter data to perfmon.
+ */
+DEFINE_PERFMON_FORMAT_ATTR(filter_wq, "config1:0-31");
+DEFINE_PERFMON_FORMAT_ATTR(filter_tc, "config1:32-39");
+DEFINE_PERFMON_FORMAT_ATTR(filter_pgsz, "config1:40-43");
+DEFINE_PERFMON_FORMAT_ATTR(filter_sz, "config1:44-51");
+DEFINE_PERFMON_FORMAT_ATTR(filter_eng, "config1:52-59");
+
+#define PERFMON_FILTERS_START 2
+#define PERFMON_FILTERS_MAX 5
+
+static struct attribute *perfmon_format_attrs[] = {
+ &format_attr_idxd_event_category.attr,
+ &format_attr_idxd_event.attr,
+ &format_attr_idxd_filter_wq.attr,
+ &format_attr_idxd_filter_tc.attr,
+ &format_attr_idxd_filter_pgsz.attr,
+ &format_attr_idxd_filter_sz.attr,
+ &format_attr_idxd_filter_eng.attr,
+ NULL,
+};
+
+static struct attribute_group perfmon_format_attr_group = {
+ .name = "format",
+ .attrs = perfmon_format_attrs,
+};
+
+static const struct attribute_group *perfmon_attr_groups[] = {
+ &perfmon_format_attr_group,
+ &cpumask_attr_group,
+ NULL,
+};
+
+static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ return cpumap_print_to_pagebuf(true, buf, &perfmon_dsa_cpu_mask);
+}
+
+static bool is_idxd_event(struct idxd_pmu *idxd_pmu, struct perf_event *event)
+{
+ return &idxd_pmu->pmu == event->pmu;
+}
+
+static int perfmon_collect_events(struct idxd_pmu *idxd_pmu,
+ struct perf_event *leader,
+ bool do_grp)
+{
+ struct perf_event *event;
+ int n, max_count;
+
+ max_count = idxd_pmu->n_counters;
+ n = idxd_pmu->n_events;
+
+ if (n >= max_count)
+ return -EINVAL;
+
+ if (is_idxd_event(idxd_pmu, leader)) {
+ idxd_pmu->event_list[n] = leader;
+ idxd_pmu->event_list[n]->hw.idx = n;
+ n++;
+ }
+
+ if (!do_grp)
+ return n;
+
+ for_each_sibling_event(event, leader) {
+ if (!is_idxd_event(idxd_pmu, event) ||
+ event->state <= PERF_EVENT_STATE_OFF)
+ continue;
+
+ if (n >= max_count)
+ return -EINVAL;
+
+ idxd_pmu->event_list[n] = event;
+ idxd_pmu->event_list[n]->hw.idx = n;
+ n++;
+ }
+
+ return n;
+}
+
+static void perfmon_assign_hw_event(struct idxd_pmu *idxd_pmu,
+ struct perf_event *event, int idx)
+{
+ struct idxd_device *idxd = idxd_pmu->idxd;
+ struct hw_perf_event *hwc = &event->hw;
+
+ hwc->idx = idx;
+ hwc->config_base = ioread64(CNTRCFG_REG(idxd, idx));
+ hwc->event_base = ioread64(CNTRCFG_REG(idxd, idx));
+}
+
+static int perfmon_assign_event(struct idxd_pmu *idxd_pmu,
+ struct perf_event *event)
+{
+ int i;
+
+ for (i = 0; i < IDXD_PMU_EVENT_MAX; i++)
+ if (!test_and_set_bit(i, idxd_pmu->used_mask))
+ return i;
+
+ return -EINVAL;
+}
+
+/*
+ * Check whether there are enough counters to satisfy that all the
+ * events in the group can actually be scheduled at the same time.
+ *
+ * To do this, create a fake idxd_pmu object so the event collection
+ * and assignment functions can be used without affecting the internal
+ * state of the real idxd_pmu object.
+ */
+static int perfmon_validate_group(struct idxd_pmu *pmu,
+ struct perf_event *event)
+{
+ struct perf_event *leader = event->group_leader;
+ struct idxd_pmu *fake_pmu;
+ int i, ret = 0, n, idx;
+
+ fake_pmu = kzalloc(sizeof(*fake_pmu), GFP_KERNEL);
+ if (!fake_pmu)
+ return -ENOMEM;
+
+ fake_pmu->pmu.name = pmu->pmu.name;
+ fake_pmu->n_counters = pmu->n_counters;
+
+ n = perfmon_collect_events(fake_pmu, leader, true);
+ if (n < 0) {
+ ret = n;
+ goto out;
+ }
+
+ fake_pmu->n_events = n;
+ n = perfmon_collect_events(fake_pmu, event, false);
+ if (n < 0) {
+ ret = n;
+ goto out;
+ }
+
+ fake_pmu->n_events = n;
+
+ for (i = 0; i < n; i++) {
+ event = fake_pmu->event_list[i];
+
+ idx = perfmon_assign_event(fake_pmu, event);
+ if (idx < 0) {
+ ret = idx;
+ goto out;
+ }
+ }
+out:
+ kfree(fake_pmu);
+
+ return ret;
+}
+
+static int perfmon_pmu_event_init(struct perf_event *event)
+{
+ struct idxd_device *idxd;
+ int ret = 0;
+
+ idxd = event_to_idxd(event);
+ event->hw.idx = -1;
+
+ if (event->attr.type != event->pmu->type)
+ return -ENOENT;
+
+ /* sampling not supported */
+ if (event->attr.sample_period)
+ return -EINVAL;
+
+ if (event->cpu < 0)
+ return -EINVAL;
+
+ if (event->pmu != &idxd->idxd_pmu->pmu)
+ return -EINVAL;
+
+ event->hw.event_base = ioread64(PERFMON_TABLE_OFFSET(idxd));
+ event->cpu = idxd->idxd_pmu->cpu;
+ event->hw.config = event->attr.config;
+
+ if (event->group_leader != event)
+ /* non-group events have themselves as leader */
+ ret = perfmon_validate_group(idxd->idxd_pmu, event);
+
+ return ret;
+}
+
+static inline u64 perfmon_pmu_read_counter(struct perf_event *event)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct idxd_device *idxd;
+ int cntr = hwc->idx;
+
+ idxd = event_to_idxd(event);
+
+ return ioread64(CNTRDATA_REG(idxd, cntr));
+}
+
+static void perfmon_pmu_event_update(struct perf_event *event)
+{
+ struct idxd_device *idxd = event_to_idxd(event);
+ u64 prev_raw_count, new_raw_count, delta, p, n;
+ int shift = 64 - idxd->idxd_pmu->counter_width;
+ struct hw_perf_event *hwc = &event->hw;
+
+ do {
+ prev_raw_count = local64_read(&hwc->prev_count);
+ new_raw_count = perfmon_pmu_read_counter(event);
+ } while (local64_cmpxchg(&hwc->prev_count, prev_raw_count,
+ new_raw_count) != prev_raw_count);
+
+ n = (new_raw_count << shift);
+ p = (prev_raw_count << shift);
+
+ delta = ((n - p) >> shift);
+
+ local64_add(delta, &event->count);
+}
+
+void perfmon_counter_overflow(struct idxd_device *idxd)
+{
+ int i, n_counters, max_loop = OVERFLOW_SIZE;
+ struct perf_event *event;
+ unsigned long ovfstatus;
+
+ n_counters = min(idxd->idxd_pmu->n_counters, OVERFLOW_SIZE);
+
+ ovfstatus = ioread32(OVFSTATUS_REG(idxd));
+
+ /*
+ * While updating overflowed counters, other counters behind
+ * them could overflow and be missed in a given pass.
+ * Normally this could happen at most n_counters times, but in
+ * theory a tiny counter width could result in continual
+ * overflows and endless looping. max_loop provides a
+ * failsafe in that highly unlikely case.
+ */
+ while (ovfstatus && max_loop--) {
+ /* Figure out which counter(s) overflowed */
+ for_each_set_bit(i, &ovfstatus, n_counters) {
+ unsigned long ovfstatus_clear = 0;
+
+ /* Update event->count for overflowed counter */
+ event = idxd->idxd_pmu->event_list[i];
+ perfmon_pmu_event_update(event);
+ /* Writing 1 to OVFSTATUS bit clears it */
+ set_bit(i, &ovfstatus_clear);
+ iowrite32(ovfstatus_clear, OVFSTATUS_REG(idxd));
+ }
+
+ ovfstatus = ioread32(OVFSTATUS_REG(idxd));
+ }
+
+ /*
+ * Should never happen. If so, it means a counter(s) looped
+ * around twice while this handler was running.
+ */
+ WARN_ON_ONCE(ovfstatus);
+}
+
+static inline void perfmon_reset_config(struct idxd_device *idxd)
+{
+ iowrite32(CONFIG_RESET, PERFRST_REG(idxd));
+ iowrite32(0, OVFSTATUS_REG(idxd));
+ iowrite32(0, PERFFRZ_REG(idxd));
+}
+
+static inline void perfmon_reset_counters(struct idxd_device *idxd)
+{
+ iowrite32(CNTR_RESET, PERFRST_REG(idxd));
+}
+
+static inline void perfmon_reset(struct idxd_device *idxd)
+{
+ perfmon_reset_config(idxd);
+ perfmon_reset_counters(idxd);
+}
+
+static void perfmon_pmu_event_start(struct perf_event *event, int mode)
+{
+ u32 flt_wq, flt_tc, flt_pg_sz, flt_xfer_sz, flt_eng = 0;
+ u64 cntr_cfg, cntrdata, event_enc, event_cat = 0;
+ struct hw_perf_event *hwc = &event->hw;
+ union filter_cfg flt_cfg;
+ union event_cfg event_cfg;
+ struct idxd_device *idxd;
+ int cntr;
+
+ idxd = event_to_idxd(event);
+
+ event->hw.idx = hwc->idx;
+ cntr = hwc->idx;
+
+ /* Obtain event category and event value from user space */
+ event_cfg.val = event->attr.config;
+ flt_cfg.val = event->attr.config1;
+ event_cat = event_cfg.event_cat;
+ event_enc = event_cfg.event_enc;
+
+ /* Obtain filter configuration from user space */
+ flt_wq = flt_cfg.wq;
+ flt_tc = flt_cfg.tc;
+ flt_pg_sz = flt_cfg.pg_sz;
+ flt_xfer_sz = flt_cfg.xfer_sz;
+ flt_eng = flt_cfg.eng;
+
+ if (flt_wq && test_bit(FLT_WQ, &idxd->idxd_pmu->supported_filters))
+ iowrite32(flt_wq, FLTCFG_REG(idxd, cntr, FLT_WQ));
+ if (flt_tc && test_bit(FLT_TC, &idxd->idxd_pmu->supported_filters))
+ iowrite32(flt_tc, FLTCFG_REG(idxd, cntr, FLT_TC));
+ if (flt_pg_sz && test_bit(FLT_PG_SZ, &idxd->idxd_pmu->supported_filters))
+ iowrite32(flt_pg_sz, FLTCFG_REG(idxd, cntr, FLT_PG_SZ));
+ if (flt_xfer_sz && test_bit(FLT_XFER_SZ, &idxd->idxd_pmu->supported_filters))
+ iowrite32(flt_xfer_sz, FLTCFG_REG(idxd, cntr, FLT_XFER_SZ));
+ if (flt_eng && test_bit(FLT_ENG, &idxd->idxd_pmu->supported_filters))
+ iowrite32(flt_eng, FLTCFG_REG(idxd, cntr, FLT_ENG));
+
+ /* Read the start value */
+ cntrdata = ioread64(CNTRDATA_REG(idxd, cntr));
+ local64_set(&event->hw.prev_count, cntrdata);
+
+ /* Set counter to event/category */
+ cntr_cfg = event_cat << CNTRCFG_CATEGORY_SHIFT;
+ cntr_cfg |= event_enc << CNTRCFG_EVENT_SHIFT;
+ /* Set interrupt on overflow and counter enable bits */
+ cntr_cfg |= (CNTRCFG_IRQ_OVERFLOW | CNTRCFG_ENABLE);
+
+ iowrite64(cntr_cfg, CNTRCFG_REG(idxd, cntr));
+}
+
+static void perfmon_pmu_event_stop(struct perf_event *event, int mode)
+{
+ struct hw_perf_event *hwc = &event->hw;
+ struct idxd_device *idxd;
+ int i, cntr = hwc->idx;
+ u64 cntr_cfg;
+
+ idxd = event_to_idxd(event);
+
+ /* remove this event from event list */
+ for (i = 0; i < idxd->idxd_pmu->n_events; i++) {
+ if (event != idxd->idxd_pmu->event_list[i])
+ continue;
+
+ for (++i; i < idxd->idxd_pmu->n_events; i++)
+ idxd->idxd_pmu->event_list[i - 1] = idxd->idxd_pmu->event_list[i];
+ --idxd->idxd_pmu->n_events;
+ break;
+ }
+
+ cntr_cfg = ioread64(CNTRCFG_REG(idxd, cntr));
+ cntr_cfg &= ~CNTRCFG_ENABLE;
+ iowrite64(cntr_cfg, CNTRCFG_REG(idxd, cntr));
+
+ if (mode == PERF_EF_UPDATE)
+ perfmon_pmu_event_update(event);
+
+ event->hw.idx = -1;
+ clear_bit(cntr, idxd->idxd_pmu->used_mask);
+}
+
+static void perfmon_pmu_event_del(struct perf_event *event, int mode)
+{
+ perfmon_pmu_event_stop(event, PERF_EF_UPDATE);
+}
+
+static int perfmon_pmu_event_add(struct perf_event *event, int flags)
+{
+ struct idxd_device *idxd = event_to_idxd(event);
+ struct idxd_pmu *idxd_pmu = idxd->idxd_pmu;
+ struct hw_perf_event *hwc = &event->hw;
+ int idx, n;
+
+ n = perfmon_collect_events(idxd_pmu, event, false);
+ if (n < 0)
+ return n;
+
+ hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED;
+ if (!(flags & PERF_EF_START))
+ hwc->state |= PERF_HES_ARCH;
+
+ idx = perfmon_assign_event(idxd_pmu, event);
+ if (idx < 0)
+ return idx;
+
+ perfmon_assign_hw_event(idxd_pmu, event, idx);
+
+ if (flags & PERF_EF_START)
+ perfmon_pmu_event_start(event, 0);
+
+ idxd_pmu->n_events = n;
+
+ return 0;
+}
+
+static void enable_perfmon_pmu(struct idxd_device *idxd)
+{
+ iowrite32(COUNTER_UNFREEZE, PERFFRZ_REG(idxd));
+}
+
+static void disable_perfmon_pmu(struct idxd_device *idxd)
+{
+ iowrite32(COUNTER_FREEZE, PERFFRZ_REG(idxd));
+}
+
+static void perfmon_pmu_enable(struct pmu *pmu)
+{
+ struct idxd_device *idxd = pmu_to_idxd(pmu);
+
+ enable_perfmon_pmu(idxd);
+}
+
+static void perfmon_pmu_disable(struct pmu *pmu)
+{
+ struct idxd_device *idxd = pmu_to_idxd(pmu);
+
+ disable_perfmon_pmu(idxd);
+}
+
+static void skip_filter(int i)
+{
+ int j;
+
+ for (j = i; j < PERFMON_FILTERS_MAX; j++)
+ perfmon_format_attrs[PERFMON_FILTERS_START + j] =
+ perfmon_format_attrs[PERFMON_FILTERS_START + j + 1];
+}
+
+static void idxd_pmu_init(struct idxd_pmu *idxd_pmu)
+{
+ int i;
+
+ for (i = 0 ; i < PERFMON_FILTERS_MAX; i++) {
+ if (!test_bit(i, &idxd_pmu->supported_filters))
+ skip_filter(i);
+ }
+
+ idxd_pmu->pmu.name = idxd_pmu->name;
+ idxd_pmu->pmu.attr_groups = perfmon_attr_groups;
+ idxd_pmu->pmu.task_ctx_nr = perf_invalid_context;
+ idxd_pmu->pmu.event_init = perfmon_pmu_event_init;
+ idxd_pmu->pmu.pmu_enable = perfmon_pmu_enable,
+ idxd_pmu->pmu.pmu_disable = perfmon_pmu_disable,
+ idxd_pmu->pmu.add = perfmon_pmu_event_add;
+ idxd_pmu->pmu.del = perfmon_pmu_event_del;
+ idxd_pmu->pmu.start = perfmon_pmu_event_start;
+ idxd_pmu->pmu.stop = perfmon_pmu_event_stop;
+ idxd_pmu->pmu.read = perfmon_pmu_event_update;
+ idxd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE;
+ idxd_pmu->pmu.module = THIS_MODULE;
+}
+
+void perfmon_pmu_remove(struct idxd_device *idxd)
+{
+ if (!idxd->idxd_pmu)
+ return;
+
+ cpuhp_state_remove_instance(cpuhp_slot, &idxd->idxd_pmu->cpuhp_node);
+ perf_pmu_unregister(&idxd->idxd_pmu->pmu);
+ kfree(idxd->idxd_pmu);
+ idxd->idxd_pmu = NULL;
+}
+
+static int perf_event_cpu_online(unsigned int cpu, struct hlist_node *node)
+{
+ struct idxd_pmu *idxd_pmu;
+
+ idxd_pmu = hlist_entry_safe(node, typeof(*idxd_pmu), cpuhp_node);
+
+ /* select the first online CPU as the designated reader */
+ if (cpumask_empty(&perfmon_dsa_cpu_mask)) {
+ cpumask_set_cpu(cpu, &perfmon_dsa_cpu_mask);
+ idxd_pmu->cpu = cpu;
+ }
+
+ return 0;
+}
+
+static int perf_event_cpu_offline(unsigned int cpu, struct hlist_node *node)
+{
+ struct idxd_pmu *idxd_pmu;
+ unsigned int target;
+
+ idxd_pmu = hlist_entry_safe(node, typeof(*idxd_pmu), cpuhp_node);
+
+ if (!cpumask_test_and_clear_cpu(cpu, &perfmon_dsa_cpu_mask))
+ return 0;
+
+ target = cpumask_any_but(cpu_online_mask, cpu);
+
+ /* migrate events if there is a valid target */
+ if (target < nr_cpu_ids)
+ cpumask_set_cpu(target, &perfmon_dsa_cpu_mask);
+ else
+ target = -1;
+
+ perf_pmu_migrate_context(&idxd_pmu->pmu, cpu, target);
+
+ return 0;
+}
+
+int perfmon_pmu_init(struct idxd_device *idxd)
+{
+ union idxd_perfcap perfcap;
+ struct idxd_pmu *idxd_pmu;
+ int rc = -ENODEV;
+
+ /*
+ * perfmon module initialization failed, nothing to do
+ */
+ if (!cpuhp_set_up)
+ return -ENODEV;
+
+ /*
+ * If perfmon_offset or num_counters is 0, it means perfmon is
+ * not supported on this hardware.
+ */
+ if (idxd->perfmon_offset == 0)
+ return -ENODEV;
+
+ idxd_pmu = kzalloc(sizeof(*idxd_pmu), GFP_KERNEL);
+ if (!idxd_pmu)
+ return -ENOMEM;
+
+ idxd_pmu->idxd = idxd;
+ idxd->idxd_pmu = idxd_pmu;
+
+ if (idxd->data->type == IDXD_TYPE_DSA) {
+ rc = sprintf(idxd_pmu->name, "dsa%d", idxd->id);
+ if (rc < 0)
+ goto free;
+ } else if (idxd->data->type == IDXD_TYPE_IAX) {
+ rc = sprintf(idxd_pmu->name, "iax%d", idxd->id);
+ if (rc < 0)
+ goto free;
+ } else {
+ goto free;
+ }
+
+ perfmon_reset(idxd);
+
+ perfcap.bits = ioread64(PERFCAP_REG(idxd));
+
+ /*
+ * If total perf counter is 0, stop further registration.
+ * This is necessary in order to support driver running on
+ * guest which does not have pmon support.
+ */
+ if (perfcap.num_perf_counter == 0)
+ goto free;
+
+ /* A counter width of 0 means it can't count */
+ if (perfcap.counter_width == 0)
+ goto free;
+
+ /* Overflow interrupt and counter freeze support must be available */
+ if (!perfcap.overflow_interrupt || !perfcap.counter_freeze)
+ goto free;
+
+ /* Number of event categories cannot be 0 */
+ if (perfcap.num_event_category == 0)
+ goto free;
+
+ /*
+ * We don't support per-counter capabilities for now.
+ */
+ if (perfcap.cap_per_counter)
+ goto free;
+
+ idxd_pmu->n_event_categories = perfcap.num_event_category;
+ idxd_pmu->supported_event_categories = perfcap.global_event_category;
+ idxd_pmu->per_counter_caps_supported = perfcap.cap_per_counter;
+
+ /* check filter capability. If 0, then filters are not supported */
+ idxd_pmu->supported_filters = perfcap.filter;
+ if (perfcap.filter)
+ idxd_pmu->n_filters = hweight8(perfcap.filter);
+
+ /* Store the total number of counters categories, and counter width */
+ idxd_pmu->n_counters = perfcap.num_perf_counter;
+ idxd_pmu->counter_width = perfcap.counter_width;
+
+ idxd_pmu_init(idxd_pmu);
+
+ rc = perf_pmu_register(&idxd_pmu->pmu, idxd_pmu->name, -1);
+ if (rc)
+ goto free;
+
+ rc = cpuhp_state_add_instance(cpuhp_slot, &idxd_pmu->cpuhp_node);
+ if (rc) {
+ perf_pmu_unregister(&idxd->idxd_pmu->pmu);
+ goto free;
+ }
+out:
+ return rc;
+free:
+ kfree(idxd_pmu);
+ idxd->idxd_pmu = NULL;
+
+ goto out;
+}
+
+void __init perfmon_init(void)
+{
+ int rc = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN,
+ "driver/dma/idxd/perf:online",
+ perf_event_cpu_online,
+ perf_event_cpu_offline);
+ if (WARN_ON(rc < 0))
+ return;
+
+ cpuhp_slot = rc;
+ cpuhp_set_up = true;
+}
+
+void __exit perfmon_exit(void)
+{
+ if (cpuhp_set_up)
+ cpuhp_remove_multi_state(cpuhp_slot);
+}
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2020 Intel Corporation. All rights rsvd. */
+
+#ifndef _PERFMON_H_
+#define _PERFMON_H_
+
+#include <linux/slab.h>
+#include <linux/pci.h>
+#include <linux/sbitmap.h>
+#include <linux/dmaengine.h>
+#include <linux/percpu-rwsem.h>
+#include <linux/wait.h>
+#include <linux/cdev.h>
+#include <linux/uuid.h>
+#include <linux/idxd.h>
+#include <linux/perf_event.h>
+#include "registers.h"
+
+static inline struct idxd_pmu *event_to_pmu(struct perf_event *event)
+{
+ struct idxd_pmu *idxd_pmu;
+ struct pmu *pmu;
+
+ pmu = event->pmu;
+ idxd_pmu = container_of(pmu, struct idxd_pmu, pmu);
+
+ return idxd_pmu;
+}
+
+static inline struct idxd_device *event_to_idxd(struct perf_event *event)
+{
+ struct idxd_pmu *idxd_pmu;
+ struct pmu *pmu;
+
+ pmu = event->pmu;
+ idxd_pmu = container_of(pmu, struct idxd_pmu, pmu);
+
+ return idxd_pmu->idxd;
+}
+
+static inline struct idxd_device *pmu_to_idxd(struct pmu *pmu)
+{
+ struct idxd_pmu *idxd_pmu;
+
+ idxd_pmu = container_of(pmu, struct idxd_pmu, pmu);
+
+ return idxd_pmu->idxd;
+}
+
+enum dsa_perf_events {
+ DSA_PERF_EVENT_WQ = 0,
+ DSA_PERF_EVENT_ENGINE,
+ DSA_PERF_EVENT_ADDR_TRANS,
+ DSA_PERF_EVENT_OP,
+ DSA_PERF_EVENT_COMPL,
+ DSA_PERF_EVENT_MAX,
+};
+
+enum filter_enc {
+ FLT_WQ = 0,
+ FLT_TC,
+ FLT_PG_SZ,
+ FLT_XFER_SZ,
+ FLT_ENG,
+ FLT_MAX,
+};
+
+#define CONFIG_RESET 0x0000000000000001
+#define CNTR_RESET 0x0000000000000002
+#define CNTR_ENABLE 0x0000000000000001
+#define INTR_OVFL 0x0000000000000002
+
+#define COUNTER_FREEZE 0x00000000FFFFFFFF
+#define COUNTER_UNFREEZE 0x0000000000000000
+#define OVERFLOW_SIZE 32
+
+#define CNTRCFG_ENABLE BIT(0)
+#define CNTRCFG_IRQ_OVERFLOW BIT(1)
+#define CNTRCFG_CATEGORY_SHIFT 8
+#define CNTRCFG_EVENT_SHIFT 32
+
+#define PERFMON_TABLE_OFFSET(_idxd) \
+({ \
+ typeof(_idxd) __idxd = (_idxd); \
+ ((__idxd)->reg_base + (__idxd)->perfmon_offset); \
+})
+#define PERFMON_REG_OFFSET(idxd, offset) \
+ (PERFMON_TABLE_OFFSET(idxd) + (offset))
+
+#define PERFCAP_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_PERFCAP_OFFSET))
+#define PERFRST_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_PERFRST_OFFSET))
+#define OVFSTATUS_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_OVFSTATUS_OFFSET))
+#define PERFFRZ_REG(idxd) (PERFMON_REG_OFFSET(idxd, IDXD_PERFFRZ_OFFSET))
+
+#define FLTCFG_REG(idxd, cntr, flt) \
+ (PERFMON_REG_OFFSET(idxd, IDXD_FLTCFG_OFFSET) + ((cntr) * 32) + ((flt) * 4))
+
+#define CNTRCFG_REG(idxd, cntr) \
+ (PERFMON_REG_OFFSET(idxd, IDXD_CNTRCFG_OFFSET) + ((cntr) * 8))
+#define CNTRDATA_REG(idxd, cntr) \
+ (PERFMON_REG_OFFSET(idxd, IDXD_CNTRDATA_OFFSET) + ((cntr) * 8))
+#define CNTRCAP_REG(idxd, cntr) \
+ (PERFMON_REG_OFFSET(idxd, IDXD_CNTRCAP_OFFSET) + ((cntr) * 8))
+
+#define EVNTCAP_REG(idxd, category) \
+ (PERFMON_REG_OFFSET(idxd, IDXD_EVNTCAP_OFFSET) + ((category) * 8))
+
+#define DEFINE_PERFMON_FORMAT_ATTR(_name, _format) \
+static ssize_t __perfmon_idxd_##_name##_show(struct kobject *kobj, \
+ struct kobj_attribute *attr, \
+ char *page) \
+{ \
+ BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \
+ return sprintf(page, _format "\n"); \
+} \
+static struct kobj_attribute format_attr_idxd_##_name = \
+ __ATTR(_name, 0444, __perfmon_idxd_##_name##_show, NULL)
+
+#endif
u64 overlap_copy:1;
u64 cache_control_mem:1;
u64 cache_control_cache:1;
+ u64 cmd_cap:1;
u64 rsvd:3;
- u64 int_handle_req:1;
u64 dest_readback:1;
u64 drain_readback:1;
u64 rsvd2:6;
union genctrl_reg {
struct {
u32 softerr_int_en:1;
- u32 rsvd:31;
+ u32 halt_int_en:1;
+ u32 rsvd:30;
};
u32 bits;
} __packed;
IDXD_CMD_DRAIN_PASID,
IDXD_CMD_ABORT_PASID,
IDXD_CMD_REQUEST_INT_HANDLE,
+ IDXD_CMD_RELEASE_INT_HANDLE,
};
+#define CMD_INT_HANDLE_IMS 0x10000
+
#define IDXD_CMDSTS_OFFSET 0xa8
union cmdsts_reg {
struct {
u32 bits;
} __packed;
#define IDXD_CMDSTS_ACTIVE 0x80000000
+#define IDXD_CMDSTS_ERR_MASK 0xff
+#define IDXD_CMDSTS_RES_SHIFT 8
enum idxd_cmdsts_err {
IDXD_CMDSTS_SUCCESS = 0,
IDXD_CMDSTS_ERR_NO_HANDLE,
};
+#define IDXD_CMDCAP_OFFSET 0xb0
+
#define IDXD_SWERR_OFFSET 0xc0
#define IDXD_SWERR_VALID 0x00000001
#define IDXD_SWERR_OVERFLOW 0x00000002
#define GRPENGCFG_OFFSET(idxd_dev, n) ((idxd_dev)->grpcfg_offset + (n) * GRPCFG_SIZE + 32)
#define GRPFLGCFG_OFFSET(idxd_dev, n) ((idxd_dev)->grpcfg_offset + (n) * GRPCFG_SIZE + 40)
+/* Following is performance monitor registers */
+#define IDXD_PERFCAP_OFFSET 0x0
+union idxd_perfcap {
+ struct {
+ u64 num_perf_counter:6;
+ u64 rsvd1:2;
+ u64 counter_width:8;
+ u64 num_event_category:4;
+ u64 global_event_category:16;
+ u64 filter:8;
+ u64 rsvd2:8;
+ u64 cap_per_counter:1;
+ u64 writeable_counter:1;
+ u64 counter_freeze:1;
+ u64 overflow_interrupt:1;
+ u64 rsvd3:8;
+ };
+ u64 bits;
+} __packed;
+
+#define IDXD_EVNTCAP_OFFSET 0x80
+union idxd_evntcap {
+ struct {
+ u64 events:28;
+ u64 rsvd:36;
+ };
+ u64 bits;
+} __packed;
+
+struct idxd_event {
+ union {
+ struct {
+ u32 event_category:4;
+ u32 events:28;
+ };
+ u32 val;
+ };
+} __packed;
+
+#define IDXD_CNTRCAP_OFFSET 0x800
+struct idxd_cntrcap {
+ union {
+ struct {
+ u32 counter_width:8;
+ u32 rsvd:20;
+ u32 num_events:4;
+ };
+ u32 val;
+ };
+ struct idxd_event events[];
+} __packed;
+
+#define IDXD_PERFRST_OFFSET 0x10
+union idxd_perfrst {
+ struct {
+ u32 perfrst_config:1;
+ u32 perfrst_counter:1;
+ u32 rsvd:30;
+ };
+ u32 val;
+} __packed;
+
+#define IDXD_OVFSTATUS_OFFSET 0x30
+#define IDXD_PERFFRZ_OFFSET 0x20
+#define IDXD_CNTRCFG_OFFSET 0x100
+union idxd_cntrcfg {
+ struct {
+ u64 enable:1;
+ u64 interrupt_ovf:1;
+ u64 global_freeze_ovf:1;
+ u64 rsvd1:5;
+ u64 event_category:4;
+ u64 rsvd2:20;
+ u64 events:28;
+ u64 rsvd3:4;
+ };
+ u64 val;
+} __packed;
+
+#define IDXD_FLTCFG_OFFSET 0x300
+
+#define IDXD_CNTRDATA_OFFSET 0x200
+union idxd_cntrdata {
+ struct {
+ u64 event_count_value;
+ };
+ u64 val;
+} __packed;
+
+union event_cfg {
+ struct {
+ u64 event_cat:4;
+ u64 event_enc:28;
+ };
+ u64 val;
+} __packed;
+
+union filter_cfg {
+ struct {
+ u64 wq:32;
+ u64 tc:8;
+ u64 pg_sz:4;
+ u64 xfer_sz:8;
+ u64 eng:8;
+ };
+ u64 val;
+} __packed;
+
#endif
desc = wq->descs[idx];
memset(desc->hw, 0, sizeof(struct dsa_hw_desc));
- memset(desc->completion, 0, idxd->compl_size);
+ memset(desc->completion, 0, idxd->data->compl_size);
desc->cpu = cpu;
if (device_pasid_enabled(idxd))
desc->hw->pasid = idxd->pasid;
/*
- * Descriptor completion vectors are 1-8 for MSIX. We will round
- * robin through the 8 vectors.
+ * Descriptor completion vectors are 1...N for MSIX. We will round
+ * robin through the N vectors.
*/
wq->vec_ptr = (wq->vec_ptr % idxd->num_wq_irqs) + 1;
- desc->hw->int_handle = wq->vec_ptr;
+ if (!idxd->int_handles) {
+ desc->hw->int_handle = wq->vec_ptr;
+ } else {
+ desc->vector = wq->vec_ptr;
+ /*
+ * int_handles are only for descriptor completion. However for device
+ * MSIX enumeration, vec 0 is used for misc interrupts. Therefore even
+ * though we are rotating through 1...N for descriptor interrupts, we
+ * need to acqurie the int_handles from 0..N-1.
+ */
+ desc->hw->int_handle = idxd->int_handles[desc->vector - 1];
+ }
+
return desc;
}
int idxd_submit_desc(struct idxd_wq *wq, struct idxd_desc *desc)
{
struct idxd_device *idxd = wq->idxd;
- int vec = desc->hw->int_handle;
void __iomem *portal;
int rc;
if (idxd->state != IDXD_DEV_ENABLED)
return -EIO;
+ if (!percpu_ref_tryget_live(&wq->wq_active))
+ return -ENXIO;
+
portal = wq->portal;
/*
return rc;
}
+ percpu_ref_put(&wq->wq_active);
+
/*
* Pending the descriptor to the lockless list for the irq_entry
* that we designated the descriptor to.
*/
- if (desc->hw->flags & IDXD_OP_FLAG_RCI)
- llist_add(&desc->llnode,
- &idxd->irq_entries[vec].pending_llist);
+ if (desc->hw->flags & IDXD_OP_FLAG_RCI) {
+ int vec;
+
+ /*
+ * If the driver is on host kernel, it would be the value
+ * assigned to interrupt handle, which is index for MSIX
+ * vector. If it's guest then can't use the int_handle since
+ * that is the index to IMS for the entire device. The guest
+ * device local index will be used.
+ */
+ vec = !idxd->int_handles ? desc->hw->int_handle : desc->vector;
+ llist_add(&desc->llnode, &idxd->irq_entries[vec].pending_llist);
+ }
return 0;
}
[IDXD_WQT_USER] = "user",
};
-static void idxd_conf_device_release(struct device *dev)
-{
- dev_dbg(dev, "%s for %s\n", __func__, dev_name(dev));
-}
-
-static struct device_type idxd_group_device_type = {
- .name = "group",
- .release = idxd_conf_device_release,
-};
-
-static struct device_type idxd_wq_device_type = {
- .name = "wq",
- .release = idxd_conf_device_release,
-};
-
-static struct device_type idxd_engine_device_type = {
- .name = "engine",
- .release = idxd_conf_device_release,
-};
-
-static struct device_type dsa_device_type = {
- .name = "dsa",
- .release = idxd_conf_device_release,
-};
-
-static struct device_type iax_device_type = {
- .name = "iax",
- .release = idxd_conf_device_release,
-};
-
-static inline bool is_dsa_dev(struct device *dev)
-{
- return dev ? dev->type == &dsa_device_type : false;
-}
-
-static inline bool is_iax_dev(struct device *dev)
-{
- return dev ? dev->type == &iax_device_type : false;
-}
-
-static inline bool is_idxd_dev(struct device *dev)
-{
- return is_dsa_dev(dev) || is_iax_dev(dev);
-}
-
-static inline bool is_idxd_wq_dev(struct device *dev)
-{
- return dev ? dev->type == &idxd_wq_device_type : false;
-}
-
-static inline bool is_idxd_wq_dmaengine(struct idxd_wq *wq)
-{
- if (wq->type == IDXD_WQT_KERNEL &&
- strcmp(wq->name, "dmaengine") == 0)
- return true;
- return false;
-}
-
-static inline bool is_idxd_wq_cdev(struct idxd_wq *wq)
-{
- return wq->type == IDXD_WQT_USER;
-}
-
static int idxd_config_bus_match(struct device *dev,
struct device_driver *drv)
{
return matched;
}
-static int idxd_config_bus_probe(struct device *dev)
+static int enable_wq(struct idxd_wq *wq)
{
+ struct idxd_device *idxd = wq->idxd;
+ struct device *dev = &idxd->pdev->dev;
+ unsigned long flags;
int rc;
+
+ mutex_lock(&wq->wq_lock);
+
+ if (idxd->state != IDXD_DEV_ENABLED) {
+ mutex_unlock(&wq->wq_lock);
+ dev_warn(dev, "Enabling while device not enabled.\n");
+ return -EPERM;
+ }
+
+ if (wq->state != IDXD_WQ_DISABLED) {
+ mutex_unlock(&wq->wq_lock);
+ dev_warn(dev, "WQ %d already enabled.\n", wq->id);
+ return -EBUSY;
+ }
+
+ if (!wq->group) {
+ mutex_unlock(&wq->wq_lock);
+ dev_warn(dev, "WQ not attached to group.\n");
+ return -EINVAL;
+ }
+
+ if (strlen(wq->name) == 0) {
+ mutex_unlock(&wq->wq_lock);
+ dev_warn(dev, "WQ name not set.\n");
+ return -EINVAL;
+ }
+
+ /* Shared WQ checks */
+ if (wq_shared(wq)) {
+ if (!device_swq_supported(idxd)) {
+ dev_warn(dev, "PASID not enabled and shared WQ.\n");
+ mutex_unlock(&wq->wq_lock);
+ return -ENXIO;
+ }
+ /*
+ * Shared wq with the threshold set to 0 means the user
+ * did not set the threshold or transitioned from a
+ * dedicated wq but did not set threshold. A value
+ * of 0 would effectively disable the shared wq. The
+ * driver does not allow a value of 0 to be set for
+ * threshold via sysfs.
+ */
+ if (wq->threshold == 0) {
+ dev_warn(dev, "Shared WQ and threshold 0.\n");
+ mutex_unlock(&wq->wq_lock);
+ return -EINVAL;
+ }
+ }
+
+ rc = idxd_wq_alloc_resources(wq);
+ if (rc < 0) {
+ mutex_unlock(&wq->wq_lock);
+ dev_warn(dev, "WQ resource alloc failed\n");
+ return rc;
+ }
+
+ spin_lock_irqsave(&idxd->dev_lock, flags);
+ if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ rc = idxd_device_config(idxd);
+ spin_unlock_irqrestore(&idxd->dev_lock, flags);
+ if (rc < 0) {
+ mutex_unlock(&wq->wq_lock);
+ dev_warn(dev, "Writing WQ %d config failed: %d\n", wq->id, rc);
+ return rc;
+ }
+
+ rc = idxd_wq_enable(wq);
+ if (rc < 0) {
+ mutex_unlock(&wq->wq_lock);
+ dev_warn(dev, "WQ %d enabling failed: %d\n", wq->id, rc);
+ return rc;
+ }
+
+ rc = idxd_wq_map_portal(wq);
+ if (rc < 0) {
+ dev_warn(dev, "wq portal mapping failed: %d\n", rc);
+ rc = idxd_wq_disable(wq);
+ if (rc < 0)
+ dev_warn(dev, "IDXD wq disable failed\n");
+ mutex_unlock(&wq->wq_lock);
+ return rc;
+ }
+
+ wq->client_count = 0;
+
+ if (wq->type == IDXD_WQT_KERNEL) {
+ rc = idxd_wq_init_percpu_ref(wq);
+ if (rc < 0) {
+ dev_dbg(dev, "percpu_ref setup failed\n");
+ mutex_unlock(&wq->wq_lock);
+ return rc;
+ }
+ }
+
+ if (is_idxd_wq_dmaengine(wq)) {
+ rc = idxd_register_dma_channel(wq);
+ if (rc < 0) {
+ dev_dbg(dev, "DMA channel register failed\n");
+ mutex_unlock(&wq->wq_lock);
+ return rc;
+ }
+ } else if (is_idxd_wq_cdev(wq)) {
+ rc = idxd_wq_add_cdev(wq);
+ if (rc < 0) {
+ dev_dbg(dev, "Cdev creation failed\n");
+ mutex_unlock(&wq->wq_lock);
+ return rc;
+ }
+ }
+
+ mutex_unlock(&wq->wq_lock);
+ dev_info(dev, "wq %s enabled\n", dev_name(&wq->conf_dev));
+
+ return 0;
+}
+
+static int idxd_config_bus_probe(struct device *dev)
+{
+ int rc = 0;
unsigned long flags;
dev_dbg(dev, "%s called\n", __func__);
/* Perform IDXD configuration and enabling */
spin_lock_irqsave(&idxd->dev_lock, flags);
- rc = idxd_device_config(idxd);
+ if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
+ rc = idxd_device_config(idxd);
spin_unlock_irqrestore(&idxd->dev_lock, flags);
if (rc < 0) {
module_put(THIS_MODULE);
return 0;
} else if (is_idxd_wq_dev(dev)) {
struct idxd_wq *wq = confdev_to_wq(dev);
- struct idxd_device *idxd = wq->idxd;
-
- mutex_lock(&wq->wq_lock);
-
- if (idxd->state != IDXD_DEV_ENABLED) {
- mutex_unlock(&wq->wq_lock);
- dev_warn(dev, "Enabling while device not enabled.\n");
- return -EPERM;
- }
-
- if (wq->state != IDXD_WQ_DISABLED) {
- mutex_unlock(&wq->wq_lock);
- dev_warn(dev, "WQ %d already enabled.\n", wq->id);
- return -EBUSY;
- }
-
- if (!wq->group) {
- mutex_unlock(&wq->wq_lock);
- dev_warn(dev, "WQ not attached to group.\n");
- return -EINVAL;
- }
-
- if (strlen(wq->name) == 0) {
- mutex_unlock(&wq->wq_lock);
- dev_warn(dev, "WQ name not set.\n");
- return -EINVAL;
- }
-
- /* Shared WQ checks */
- if (wq_shared(wq)) {
- if (!device_swq_supported(idxd)) {
- dev_warn(dev,
- "PASID not enabled and shared WQ.\n");
- mutex_unlock(&wq->wq_lock);
- return -ENXIO;
- }
- /*
- * Shared wq with the threshold set to 0 means the user
- * did not set the threshold or transitioned from a
- * dedicated wq but did not set threshold. A value
- * of 0 would effectively disable the shared wq. The
- * driver does not allow a value of 0 to be set for
- * threshold via sysfs.
- */
- if (wq->threshold == 0) {
- dev_warn(dev,
- "Shared WQ and threshold 0.\n");
- mutex_unlock(&wq->wq_lock);
- return -EINVAL;
- }
- }
-
- rc = idxd_wq_alloc_resources(wq);
- if (rc < 0) {
- mutex_unlock(&wq->wq_lock);
- dev_warn(dev, "WQ resource alloc failed\n");
- return rc;
- }
-
- spin_lock_irqsave(&idxd->dev_lock, flags);
- rc = idxd_device_config(idxd);
- spin_unlock_irqrestore(&idxd->dev_lock, flags);
- if (rc < 0) {
- mutex_unlock(&wq->wq_lock);
- dev_warn(dev, "Writing WQ %d config failed: %d\n",
- wq->id, rc);
- return rc;
- }
-
- rc = idxd_wq_enable(wq);
- if (rc < 0) {
- mutex_unlock(&wq->wq_lock);
- dev_warn(dev, "WQ %d enabling failed: %d\n",
- wq->id, rc);
- return rc;
- }
-
- rc = idxd_wq_map_portal(wq);
- if (rc < 0) {
- dev_warn(dev, "wq portal mapping failed: %d\n", rc);
- rc = idxd_wq_disable(wq);
- if (rc < 0)
- dev_warn(dev, "IDXD wq disable failed\n");
- mutex_unlock(&wq->wq_lock);
- return rc;
- }
-
- wq->client_count = 0;
-
- dev_info(dev, "wq %s enabled\n", dev_name(&wq->conf_dev));
- if (is_idxd_wq_dmaengine(wq)) {
- rc = idxd_register_dma_channel(wq);
- if (rc < 0) {
- dev_dbg(dev, "DMA channel register failed\n");
- mutex_unlock(&wq->wq_lock);
- return rc;
- }
- } else if (is_idxd_wq_cdev(wq)) {
- rc = idxd_wq_add_cdev(wq);
- if (rc < 0) {
- dev_dbg(dev, "Cdev creation failed\n");
- mutex_unlock(&wq->wq_lock);
- return rc;
- }
- }
-
- mutex_unlock(&wq->wq_lock);
- return 0;
+ return enable_wq(wq);
}
return -ENODEV;
return;
}
+ if (wq->type == IDXD_WQT_KERNEL)
+ idxd_wq_quiesce(wq);
+
if (is_idxd_wq_dmaengine(wq))
idxd_unregister_dma_channel(wq);
else if (is_idxd_wq_cdev(wq))
dev_dbg(dev, "%s removing dev %s\n", __func__,
dev_name(&idxd->conf_dev));
for (i = 0; i < idxd->max_wqs; i++) {
- struct idxd_wq *wq = &idxd->wqs[i];
+ struct idxd_wq *wq = idxd->wqs[i];
if (wq->state == IDXD_WQ_DISABLED)
continue;
idxd_unregister_dma_device(idxd);
rc = idxd_device_disable(idxd);
- for (i = 0; i < idxd->max_wqs; i++) {
- struct idxd_wq *wq = &idxd->wqs[i];
+ if (test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags)) {
+ for (i = 0; i < idxd->max_wqs; i++) {
+ struct idxd_wq *wq = idxd->wqs[i];
- mutex_lock(&wq->wq_lock);
- idxd_wq_disable_cleanup(wq);
- mutex_unlock(&wq->wq_lock);
+ mutex_lock(&wq->wq_lock);
+ idxd_wq_disable_cleanup(wq);
+ mutex_unlock(&wq->wq_lock);
+ }
}
module_put(THIS_MODULE);
if (rc < 0)
.shutdown = idxd_config_bus_shutdown,
};
-struct bus_type iax_bus_type = {
- .name = "iax",
- .match = idxd_config_bus_match,
- .probe = idxd_config_bus_probe,
- .remove = idxd_config_bus_remove,
- .shutdown = idxd_config_bus_shutdown,
-};
-
-static struct bus_type *idxd_bus_types[] = {
- &dsa_bus_type,
- &iax_bus_type
-};
-
static struct idxd_device_driver dsa_drv = {
.drv = {
.name = "dsa",
},
};
-static struct idxd_device_driver iax_drv = {
- .drv = {
- .name = "iax",
- .bus = &iax_bus_type,
- .owner = THIS_MODULE,
- .mod_name = KBUILD_MODNAME,
- },
-};
-
-static struct idxd_device_driver *idxd_drvs[] = {
- &dsa_drv,
- &iax_drv
-};
-
-struct bus_type *idxd_get_bus_type(struct idxd_device *idxd)
-{
- return idxd_bus_types[idxd->type];
-}
-
-static struct device_type *idxd_get_device_type(struct idxd_device *idxd)
-{
- if (idxd->type == IDXD_TYPE_DSA)
- return &dsa_device_type;
- else if (idxd->type == IDXD_TYPE_IAX)
- return &iax_device_type;
- else
- return NULL;
-}
-
/* IDXD generic driver setup */
int idxd_register_driver(void)
{
- int i, rc;
-
- for (i = 0; i < IDXD_TYPE_MAX; i++) {
- rc = driver_register(&idxd_drvs[i]->drv);
- if (rc < 0)
- goto drv_fail;
- }
-
- return 0;
-
-drv_fail:
- while (--i >= 0)
- driver_unregister(&idxd_drvs[i]->drv);
- return rc;
+ return driver_register(&dsa_drv.drv);
}
void idxd_unregister_driver(void)
{
- int i;
-
- for (i = 0; i < IDXD_TYPE_MAX; i++)
- driver_unregister(&idxd_drvs[i]->drv);
+ driver_unregister(&dsa_drv.drv);
}
/* IDXD engine attributes */
container_of(dev, struct idxd_engine, conf_dev);
if (engine->group)
- return sprintf(buf, "%d\n", engine->group->id);
+ return sysfs_emit(buf, "%d\n", engine->group->id);
else
- return sprintf(buf, "%d\n", -1);
+ return sysfs_emit(buf, "%d\n", -1);
}
static ssize_t engine_group_id_store(struct device *dev,
if (prevg)
prevg->num_engines--;
- engine->group = &idxd->groups[id];
+ engine->group = idxd->groups[id];
engine->group->num_engines++;
return count;
NULL,
};
+static void idxd_conf_engine_release(struct device *dev)
+{
+ struct idxd_engine *engine = container_of(dev, struct idxd_engine, conf_dev);
+
+ kfree(engine);
+}
+
+struct device_type idxd_engine_device_type = {
+ .name = "engine",
+ .release = idxd_conf_engine_release,
+ .groups = idxd_engine_attribute_groups,
+};
+
/* Group attributes */
static void idxd_set_free_tokens(struct idxd_device *idxd)
int i, tokens;
for (i = 0, tokens = 0; i < idxd->max_groups; i++) {
- struct idxd_group *g = &idxd->groups[i];
+ struct idxd_group *g = idxd->groups[i];
tokens += g->tokens_reserved;
}
struct idxd_group *group =
container_of(dev, struct idxd_group, conf_dev);
- return sprintf(buf, "%u\n", group->tokens_reserved);
+ return sysfs_emit(buf, "%u\n", group->tokens_reserved);
}
static ssize_t group_tokens_reserved_store(struct device *dev,
if (rc < 0)
return -EINVAL;
- if (idxd->type == IDXD_TYPE_IAX)
+ if (idxd->data->type == IDXD_TYPE_IAX)
return -EOPNOTSUPP;
if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
struct idxd_group *group =
container_of(dev, struct idxd_group, conf_dev);
- return sprintf(buf, "%u\n", group->tokens_allowed);
+ return sysfs_emit(buf, "%u\n", group->tokens_allowed);
}
static ssize_t group_tokens_allowed_store(struct device *dev,
if (rc < 0)
return -EINVAL;
- if (idxd->type == IDXD_TYPE_IAX)
+ if (idxd->data->type == IDXD_TYPE_IAX)
return -EOPNOTSUPP;
if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
struct idxd_group *group =
container_of(dev, struct idxd_group, conf_dev);
- return sprintf(buf, "%u\n", group->use_token_limit);
+ return sysfs_emit(buf, "%u\n", group->use_token_limit);
}
static ssize_t group_use_token_limit_store(struct device *dev,
if (rc < 0)
return -EINVAL;
- if (idxd->type == IDXD_TYPE_IAX)
+ if (idxd->data->type == IDXD_TYPE_IAX)
return -EOPNOTSUPP;
if (!test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags))
struct idxd_group *group =
container_of(dev, struct idxd_group, conf_dev);
int i, rc = 0;
- char *tmp = buf;
struct idxd_device *idxd = group->idxd;
for (i = 0; i < idxd->max_engines; i++) {
- struct idxd_engine *engine = &idxd->engines[i];
+ struct idxd_engine *engine = idxd->engines[i];
if (!engine->group)
continue;
if (engine->group->id == group->id)
- rc += sprintf(tmp + rc, "engine%d.%d ",
- idxd->id, engine->id);
+ rc += sysfs_emit_at(buf, rc, "engine%d.%d ", idxd->id, engine->id);
}
+ if (!rc)
+ return 0;
rc--;
- rc += sprintf(tmp + rc, "\n");
+ rc += sysfs_emit_at(buf, rc, "\n");
return rc;
}
struct idxd_group *group =
container_of(dev, struct idxd_group, conf_dev);
int i, rc = 0;
- char *tmp = buf;
struct idxd_device *idxd = group->idxd;
for (i = 0; i < idxd->max_wqs; i++) {
- struct idxd_wq *wq = &idxd->wqs[i];
+ struct idxd_wq *wq = idxd->wqs[i];
if (!wq->group)
continue;
if (wq->group->id == group->id)
- rc += sprintf(tmp + rc, "wq%d.%d ",
- idxd->id, wq->id);
+ rc += sysfs_emit_at(buf, rc, "wq%d.%d ", idxd->id, wq->id);
}
+ if (!rc)
+ return 0;
rc--;
- rc += sprintf(tmp + rc, "\n");
+ rc += sysfs_emit_at(buf, rc, "\n");
return rc;
}
struct idxd_group *group =
container_of(dev, struct idxd_group, conf_dev);
- return sprintf(buf, "%d\n", group->tc_a);
+ return sysfs_emit(buf, "%d\n", group->tc_a);
}
static ssize_t group_traffic_class_a_store(struct device *dev,
struct idxd_group *group =
container_of(dev, struct idxd_group, conf_dev);
- return sprintf(buf, "%d\n", group->tc_b);
+ return sysfs_emit(buf, "%d\n", group->tc_b);
}
static ssize_t group_traffic_class_b_store(struct device *dev,
NULL,
};
+static void idxd_conf_group_release(struct device *dev)
+{
+ struct idxd_group *group = container_of(dev, struct idxd_group, conf_dev);
+
+ kfree(group);
+}
+
+struct device_type idxd_group_device_type = {
+ .name = "group",
+ .release = idxd_conf_group_release,
+ .groups = idxd_group_attribute_groups,
+};
+
/* IDXD work queue attribs */
static ssize_t wq_clients_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
- return sprintf(buf, "%d\n", wq->client_count);
+ return sysfs_emit(buf, "%d\n", wq->client_count);
}
static struct device_attribute dev_attr_wq_clients =
switch (wq->state) {
case IDXD_WQ_DISABLED:
- return sprintf(buf, "disabled\n");
+ return sysfs_emit(buf, "disabled\n");
case IDXD_WQ_ENABLED:
- return sprintf(buf, "enabled\n");
+ return sysfs_emit(buf, "enabled\n");
}
- return sprintf(buf, "unknown\n");
+ return sysfs_emit(buf, "unknown\n");
}
static struct device_attribute dev_attr_wq_state =
struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
if (wq->group)
- return sprintf(buf, "%u\n", wq->group->id);
+ return sysfs_emit(buf, "%u\n", wq->group->id);
else
- return sprintf(buf, "-1\n");
+ return sysfs_emit(buf, "-1\n");
}
static ssize_t wq_group_id_store(struct device *dev,
return count;
}
- group = &idxd->groups[id];
+ group = idxd->groups[id];
prevg = wq->group;
if (prevg)
{
struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
- return sprintf(buf, "%s\n",
- wq_dedicated(wq) ? "dedicated" : "shared");
+ return sysfs_emit(buf, "%s\n", wq_dedicated(wq) ? "dedicated" : "shared");
}
static ssize_t wq_mode_store(struct device *dev,
{
struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
- return sprintf(buf, "%u\n", wq->size);
+ return sysfs_emit(buf, "%u\n", wq->size);
}
static int total_claimed_wq_size(struct idxd_device *idxd)
int wq_size = 0;
for (i = 0; i < idxd->max_wqs; i++) {
- struct idxd_wq *wq = &idxd->wqs[i];
+ struct idxd_wq *wq = idxd->wqs[i];
wq_size += wq->size;
}
{
struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
- return sprintf(buf, "%u\n", wq->priority);
+ return sysfs_emit(buf, "%u\n", wq->priority);
}
static ssize_t wq_priority_store(struct device *dev,
{
struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
- return sprintf(buf, "%u\n",
- test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags));
+ return sysfs_emit(buf, "%u\n", test_bit(WQ_FLAG_BLOCK_ON_FAULT, &wq->flags));
}
static ssize_t wq_block_on_fault_store(struct device *dev,
{
struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
- return sprintf(buf, "%u\n", wq->threshold);
+ return sysfs_emit(buf, "%u\n", wq->threshold);
}
static ssize_t wq_threshold_store(struct device *dev,
switch (wq->type) {
case IDXD_WQT_KERNEL:
- return sprintf(buf, "%s\n",
- idxd_wq_type_names[IDXD_WQT_KERNEL]);
+ return sysfs_emit(buf, "%s\n", idxd_wq_type_names[IDXD_WQT_KERNEL]);
case IDXD_WQT_USER:
- return sprintf(buf, "%s\n",
- idxd_wq_type_names[IDXD_WQT_USER]);
+ return sysfs_emit(buf, "%s\n", idxd_wq_type_names[IDXD_WQT_USER]);
case IDXD_WQT_NONE:
default:
- return sprintf(buf, "%s\n",
- idxd_wq_type_names[IDXD_WQT_NONE]);
+ return sysfs_emit(buf, "%s\n", idxd_wq_type_names[IDXD_WQT_NONE]);
}
return -EINVAL;
{
struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
- return sprintf(buf, "%s\n", wq->name);
+ return sysfs_emit(buf, "%s\n", wq->name);
}
static ssize_t wq_name_store(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+ int minor = -1;
- return sprintf(buf, "%d\n", wq->idxd_cdev.minor);
+ mutex_lock(&wq->wq_lock);
+ if (wq->idxd_cdev)
+ minor = wq->idxd_cdev->minor;
+ mutex_unlock(&wq->wq_lock);
+
+ if (minor == -1)
+ return -ENXIO;
+ return sysfs_emit(buf, "%d\n", minor);
}
static struct device_attribute dev_attr_wq_cdev_minor =
{
struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
- return sprintf(buf, "%llu\n", wq->max_xfer_bytes);
+ return sysfs_emit(buf, "%llu\n", wq->max_xfer_bytes);
}
static ssize_t wq_max_transfer_size_store(struct device *dev, struct device_attribute *attr,
{
struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
- return sprintf(buf, "%u\n", wq->max_batch_size);
+ return sysfs_emit(buf, "%u\n", wq->max_batch_size);
}
static ssize_t wq_max_batch_size_store(struct device *dev, struct device_attribute *attr,
{
struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
- return sprintf(buf, "%u\n", wq->ats_dis);
+ return sysfs_emit(buf, "%u\n", wq->ats_dis);
}
static ssize_t wq_ats_disable_store(struct device *dev, struct device_attribute *attr,
NULL,
};
+static void idxd_conf_wq_release(struct device *dev)
+{
+ struct idxd_wq *wq = container_of(dev, struct idxd_wq, conf_dev);
+
+ kfree(wq->wqcfg);
+ kfree(wq);
+}
+
+struct device_type idxd_wq_device_type = {
+ .name = "wq",
+ .release = idxd_conf_wq_release,
+ .groups = idxd_wq_attribute_groups,
+};
+
/* IDXD device attribs */
static ssize_t version_show(struct device *dev, struct device_attribute *attr,
char *buf)
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%#x\n", idxd->hw.version);
+ return sysfs_emit(buf, "%#x\n", idxd->hw.version);
}
static DEVICE_ATTR_RO(version);
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%u\n", idxd->max_wq_size);
+ return sysfs_emit(buf, "%u\n", idxd->max_wq_size);
}
static DEVICE_ATTR_RO(max_work_queues_size);
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%u\n", idxd->max_groups);
+ return sysfs_emit(buf, "%u\n", idxd->max_groups);
}
static DEVICE_ATTR_RO(max_groups);
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%u\n", idxd->max_wqs);
+ return sysfs_emit(buf, "%u\n", idxd->max_wqs);
}
static DEVICE_ATTR_RO(max_work_queues);
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%u\n", idxd->max_engines);
+ return sysfs_emit(buf, "%u\n", idxd->max_engines);
}
static DEVICE_ATTR_RO(max_engines);
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%d\n", dev_to_node(&idxd->pdev->dev));
+ return sysfs_emit(buf, "%d\n", dev_to_node(&idxd->pdev->dev));
}
static DEVICE_ATTR_RO(numa_node);
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%u\n", idxd->max_batch_size);
+ return sysfs_emit(buf, "%u\n", idxd->max_batch_size);
}
static DEVICE_ATTR_RO(max_batch_size);
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%llu\n", idxd->max_xfer_bytes);
+ return sysfs_emit(buf, "%llu\n", idxd->max_xfer_bytes);
}
static DEVICE_ATTR_RO(max_transfer_size);
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%#llx\n", idxd->hw.gen_cap.bits);
+ return sysfs_emit(buf, "%#llx\n", idxd->hw.gen_cap.bits);
}
static DEVICE_ATTR_RO(gen_cap);
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%u\n",
- test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags));
+ return sysfs_emit(buf, "%u\n", test_bit(IDXD_FLAG_CONFIGURABLE, &idxd->flags));
}
static DEVICE_ATTR_RO(configurable);
spin_lock_irqsave(&idxd->dev_lock, flags);
for (i = 0; i < idxd->max_wqs; i++) {
- struct idxd_wq *wq = &idxd->wqs[i];
+ struct idxd_wq *wq = idxd->wqs[i];
count += wq->client_count;
}
spin_unlock_irqrestore(&idxd->dev_lock, flags);
- return sprintf(buf, "%d\n", count);
+ return sysfs_emit(buf, "%d\n", count);
}
static DEVICE_ATTR_RO(clients);
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%u\n", device_pasid_enabled(idxd));
+ return sysfs_emit(buf, "%u\n", device_pasid_enabled(idxd));
}
static DEVICE_ATTR_RO(pasid_enabled);
switch (idxd->state) {
case IDXD_DEV_DISABLED:
case IDXD_DEV_CONF_READY:
- return sprintf(buf, "disabled\n");
+ return sysfs_emit(buf, "disabled\n");
case IDXD_DEV_ENABLED:
- return sprintf(buf, "enabled\n");
+ return sysfs_emit(buf, "enabled\n");
case IDXD_DEV_HALTED:
- return sprintf(buf, "halted\n");
+ return sysfs_emit(buf, "halted\n");
}
- return sprintf(buf, "unknown\n");
+ return sysfs_emit(buf, "unknown\n");
}
static DEVICE_ATTR_RO(state);
spin_lock_irqsave(&idxd->dev_lock, flags);
for (i = 0; i < 4; i++)
- out += sprintf(buf + out, "%#018llx ", idxd->sw_err.bits[i]);
+ out += sysfs_emit_at(buf, out, "%#018llx ", idxd->sw_err.bits[i]);
spin_unlock_irqrestore(&idxd->dev_lock, flags);
out--;
- out += sprintf(buf + out, "\n");
+ out += sysfs_emit_at(buf, out, "\n");
return out;
}
static DEVICE_ATTR_RO(errors);
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%u\n", idxd->max_tokens);
+ return sysfs_emit(buf, "%u\n", idxd->max_tokens);
}
static DEVICE_ATTR_RO(max_tokens);
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%u\n", idxd->token_limit);
+ return sysfs_emit(buf, "%u\n", idxd->token_limit);
}
static ssize_t token_limit_store(struct device *dev,
struct idxd_device *idxd =
container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%u\n", idxd->major);
+ return sysfs_emit(buf, "%u\n", idxd->major);
}
static DEVICE_ATTR_RO(cdev_major);
{
struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev);
- return sprintf(buf, "%#x\n", idxd->cmd_status);
+ return sysfs_emit(buf, "%#x\n", idxd->cmd_status);
}
static DEVICE_ATTR_RO(cmd_status);
NULL,
};
-static int idxd_setup_engine_sysfs(struct idxd_device *idxd)
+static void idxd_conf_device_release(struct device *dev)
{
- struct device *dev = &idxd->pdev->dev;
- int i, rc;
+ struct idxd_device *idxd = container_of(dev, struct idxd_device, conf_dev);
+
+ kfree(idxd->groups);
+ kfree(idxd->wqs);
+ kfree(idxd->engines);
+ kfree(idxd->irq_entries);
+ kfree(idxd->int_handles);
+ ida_free(&idxd_ida, idxd->id);
+ kfree(idxd);
+}
+
+struct device_type dsa_device_type = {
+ .name = "dsa",
+ .release = idxd_conf_device_release,
+ .groups = idxd_attribute_groups,
+};
+
+struct device_type iax_device_type = {
+ .name = "iax",
+ .release = idxd_conf_device_release,
+ .groups = idxd_attribute_groups,
+};
+
+static int idxd_register_engine_devices(struct idxd_device *idxd)
+{
+ int i, j, rc;
for (i = 0; i < idxd->max_engines; i++) {
- struct idxd_engine *engine = &idxd->engines[i];
-
- engine->conf_dev.parent = &idxd->conf_dev;
- dev_set_name(&engine->conf_dev, "engine%d.%d",
- idxd->id, engine->id);
- engine->conf_dev.bus = idxd_get_bus_type(idxd);
- engine->conf_dev.groups = idxd_engine_attribute_groups;
- engine->conf_dev.type = &idxd_engine_device_type;
- dev_dbg(dev, "Engine device register: %s\n",
- dev_name(&engine->conf_dev));
- rc = device_register(&engine->conf_dev);
- if (rc < 0) {
- put_device(&engine->conf_dev);
+ struct idxd_engine *engine = idxd->engines[i];
+
+ rc = device_add(&engine->conf_dev);
+ if (rc < 0)
goto cleanup;
- }
}
return 0;
cleanup:
- while (i--) {
- struct idxd_engine *engine = &idxd->engines[i];
+ j = i - 1;
+ for (; i < idxd->max_engines; i++)
+ put_device(&idxd->engines[i]->conf_dev);
- device_unregister(&engine->conf_dev);
- }
+ while (j--)
+ device_unregister(&idxd->engines[j]->conf_dev);
return rc;
}
-static int idxd_setup_group_sysfs(struct idxd_device *idxd)
+static int idxd_register_group_devices(struct idxd_device *idxd)
{
- struct device *dev = &idxd->pdev->dev;
- int i, rc;
+ int i, j, rc;
for (i = 0; i < idxd->max_groups; i++) {
- struct idxd_group *group = &idxd->groups[i];
-
- group->conf_dev.parent = &idxd->conf_dev;
- dev_set_name(&group->conf_dev, "group%d.%d",
- idxd->id, group->id);
- group->conf_dev.bus = idxd_get_bus_type(idxd);
- group->conf_dev.groups = idxd_group_attribute_groups;
- group->conf_dev.type = &idxd_group_device_type;
- dev_dbg(dev, "Group device register: %s\n",
- dev_name(&group->conf_dev));
- rc = device_register(&group->conf_dev);
- if (rc < 0) {
- put_device(&group->conf_dev);
+ struct idxd_group *group = idxd->groups[i];
+
+ rc = device_add(&group->conf_dev);
+ if (rc < 0)
goto cleanup;
- }
}
return 0;
cleanup:
- while (i--) {
- struct idxd_group *group = &idxd->groups[i];
+ j = i - 1;
+ for (; i < idxd->max_groups; i++)
+ put_device(&idxd->groups[i]->conf_dev);
- device_unregister(&group->conf_dev);
- }
+ while (j--)
+ device_unregister(&idxd->groups[j]->conf_dev);
return rc;
}
-static int idxd_setup_wq_sysfs(struct idxd_device *idxd)
+static int idxd_register_wq_devices(struct idxd_device *idxd)
{
- struct device *dev = &idxd->pdev->dev;
- int i, rc;
+ int i, rc, j;
for (i = 0; i < idxd->max_wqs; i++) {
- struct idxd_wq *wq = &idxd->wqs[i];
-
- wq->conf_dev.parent = &idxd->conf_dev;
- dev_set_name(&wq->conf_dev, "wq%d.%d", idxd->id, wq->id);
- wq->conf_dev.bus = idxd_get_bus_type(idxd);
- wq->conf_dev.groups = idxd_wq_attribute_groups;
- wq->conf_dev.type = &idxd_wq_device_type;
- dev_dbg(dev, "WQ device register: %s\n",
- dev_name(&wq->conf_dev));
- rc = device_register(&wq->conf_dev);
- if (rc < 0) {
- put_device(&wq->conf_dev);
+ struct idxd_wq *wq = idxd->wqs[i];
+
+ rc = device_add(&wq->conf_dev);
+ if (rc < 0)
goto cleanup;
- }
}
return 0;
cleanup:
- while (i--) {
- struct idxd_wq *wq = &idxd->wqs[i];
+ j = i - 1;
+ for (; i < idxd->max_wqs; i++)
+ put_device(&idxd->wqs[i]->conf_dev);
- device_unregister(&wq->conf_dev);
- }
+ while (j--)
+ device_unregister(&idxd->wqs[j]->conf_dev);
return rc;
}
-static int idxd_setup_device_sysfs(struct idxd_device *idxd)
+int idxd_register_devices(struct idxd_device *idxd)
{
struct device *dev = &idxd->pdev->dev;
- int rc;
- char devname[IDXD_NAME_SIZE];
+ int rc, i;
- sprintf(devname, "%s%d", idxd_get_dev_name(idxd), idxd->id);
- idxd->conf_dev.parent = dev;
- dev_set_name(&idxd->conf_dev, "%s", devname);
- idxd->conf_dev.bus = idxd_get_bus_type(idxd);
- idxd->conf_dev.groups = idxd_attribute_groups;
- idxd->conf_dev.type = idxd_get_device_type(idxd);
-
- dev_dbg(dev, "IDXD device register: %s\n", dev_name(&idxd->conf_dev));
- rc = device_register(&idxd->conf_dev);
- if (rc < 0) {
- put_device(&idxd->conf_dev);
- return rc;
- }
-
- return 0;
-}
-
-int idxd_setup_sysfs(struct idxd_device *idxd)
-{
- struct device *dev = &idxd->pdev->dev;
- int rc;
-
- rc = idxd_setup_device_sysfs(idxd);
- if (rc < 0) {
- dev_dbg(dev, "Device sysfs registering failed: %d\n", rc);
+ rc = device_add(&idxd->conf_dev);
+ if (rc < 0)
return rc;
- }
- rc = idxd_setup_wq_sysfs(idxd);
+ rc = idxd_register_wq_devices(idxd);
if (rc < 0) {
- /* unregister conf dev */
- dev_dbg(dev, "Work Queue sysfs registering failed: %d\n", rc);
- return rc;
+ dev_dbg(dev, "WQ devices registering failed: %d\n", rc);
+ goto err_wq;
}
- rc = idxd_setup_group_sysfs(idxd);
+ rc = idxd_register_engine_devices(idxd);
if (rc < 0) {
- /* unregister conf dev */
- dev_dbg(dev, "Group sysfs registering failed: %d\n", rc);
- return rc;
+ dev_dbg(dev, "Engine devices registering failed: %d\n", rc);
+ goto err_engine;
}
- rc = idxd_setup_engine_sysfs(idxd);
+ rc = idxd_register_group_devices(idxd);
if (rc < 0) {
- /* unregister conf dev */
- dev_dbg(dev, "Engine sysfs registering failed: %d\n", rc);
- return rc;
+ dev_dbg(dev, "Group device registering failed: %d\n", rc);
+ goto err_group;
}
return 0;
+
+ err_group:
+ for (i = 0; i < idxd->max_engines; i++)
+ device_unregister(&idxd->engines[i]->conf_dev);
+ err_engine:
+ for (i = 0; i < idxd->max_wqs; i++)
+ device_unregister(&idxd->wqs[i]->conf_dev);
+ err_wq:
+ device_del(&idxd->conf_dev);
+ return rc;
}
-void idxd_cleanup_sysfs(struct idxd_device *idxd)
+void idxd_unregister_devices(struct idxd_device *idxd)
{
int i;
for (i = 0; i < idxd->max_wqs; i++) {
- struct idxd_wq *wq = &idxd->wqs[i];
+ struct idxd_wq *wq = idxd->wqs[i];
device_unregister(&wq->conf_dev);
}
for (i = 0; i < idxd->max_engines; i++) {
- struct idxd_engine *engine = &idxd->engines[i];
+ struct idxd_engine *engine = idxd->engines[i];
device_unregister(&engine->conf_dev);
}
for (i = 0; i < idxd->max_groups; i++) {
- struct idxd_group *group = &idxd->groups[i];
+ struct idxd_group *group = idxd->groups[i];
device_unregister(&group->conf_dev);
}
int idxd_register_bus_type(void)
{
- int i, rc;
-
- for (i = 0; i < IDXD_TYPE_MAX; i++) {
- rc = bus_register(idxd_bus_types[i]);
- if (rc < 0)
- goto bus_err;
- }
-
- return 0;
-
-bus_err:
- while (--i >= 0)
- bus_unregister(idxd_bus_types[i]);
- return rc;
+ return bus_register(&dsa_bus_type);
}
void idxd_unregister_bus_type(void)
{
- int i;
-
- for (i = 0; i < IDXD_TYPE_MAX; i++)
- bus_unregister(idxd_bus_types[i]);
+ bus_unregister(&dsa_bus_type);
}
// SPDX-License-Identifier: GPL-2.0-only
/*
* Copyright (c) 2013 - 2015 Linaro Ltd.
- * Copyright (c) 2013 Hisilicon Limited.
+ * Copyright (c) 2013 HiSilicon Limited.
*/
#include <linux/sched.h>
#include <linux/device.h>
module_platform_driver(k3_pdma_driver);
-MODULE_DESCRIPTION("Hisilicon k3 DMA Driver");
+MODULE_DESCRIPTION("HiSilicon k3 DMA Driver");
MODULE_ALIAS("platform:k3dma");
MODULE_LICENSE("GPL v2");
static const struct of_device_id gpi_of_match[] = {
{ .compatible = "qcom,sdm845-gpi-dma" },
+ { .compatible = "qcom,sm8150-gpi-dma" },
{ },
};
MODULE_DEVICE_TABLE(of, gpi_of_match);
return container_of(dmach, struct hidma_chan, chan);
}
-static inline
-struct hidma_desc *to_hidma_desc(struct dma_async_tx_descriptor *t)
-{
- return container_of(t, struct hidma_desc, desc);
-}
-
static void hidma_free(struct hidma_dev *dmadev)
{
INIT_LIST_HEAD(&dmadev->ddev.channels);
return 0;
}
+static void xilinx_dma_synchronize(struct dma_chan *dchan)
+{
+ struct xilinx_dma_chan *chan = to_xilinx_chan(dchan);
+
+ tasklet_kill(&chan->tasklet);
+}
+
/**
* xilinx_dma_channel_set_config - Configure VDMA channel
* Run-time configuration for Axi VDMA, supports:
xdev->common.device_free_chan_resources =
xilinx_dma_free_chan_resources;
xdev->common.device_terminate_all = xilinx_dma_terminate_all;
+ xdev->common.device_synchronize = xilinx_dma_synchronize;
xdev->common.device_tx_status = xilinx_dma_tx_status;
xdev->common.device_issue_pending = xilinx_dma_issue_pending;
if (xdev->dma_config->dmatype == XDMA_TYPE_AXIDMA) {
return -EINVAL;
case PSCI_RET_DENIED:
return -EPERM;
- };
+ }
return -EINVAL;
}
config GPIO_ICH
tristate "Intel ICH GPIO"
- depends on PCI && X86
- select MFD_CORE
- select LPC_ICH
+ depends on X86
+ depends on LPC_ICH
help
Say yes here to support the GPIO functionality of a number of Intel
ICH-based chipsets. Currently supported devices: ICH6, ICH7, ICH8
help
Say Y here to support RDA Micro GPIO controller.
+config GPIO_REALTEK_OTTO
+ tristate "Realtek Otto GPIO support"
+ depends on MACH_REALTEK_RTL
+ default MACH_REALTEK_RTL
+ select GPIO_GENERIC
+ select GPIOLIB_IRQCHIP
+ help
+ The GPIO controller on the Otto MIPS platform supports up to two
+ banks of 32 GPIOs, with edge triggered interrupts. The 32 GPIOs
+ are grouped in four 8-bit wide ports.
+
+ When built as a module, the module will be called realtek_otto_gpio.
+
config GPIO_REG
bool
help
config GPIO_SCH
tristate "Intel SCH/TunnelCreek/Centerton/Quark X1000 GPIO"
- depends on (X86 || COMPILE_TEST) && PCI
- select MFD_CORE
- select LPC_SCH
+ depends on (X86 || COMPILE_TEST) && ACPI
+ depends on LPC_SCH
+ select GPIOLIB_IRQCHIP
help
Say yes here to support GPIO interface on Intel Poulsbo SCH,
Intel Tunnel Creek processor, Intel Centerton processor or
obj-$(CONFIG_GPIO_RCAR) += gpio-rcar.o
obj-$(CONFIG_GPIO_RDA) += gpio-rda.o
obj-$(CONFIG_GPIO_RDC321X) += gpio-rdc321x.o
+obj-$(CONFIG_GPIO_REALTEK_OTTO) += gpio-realtek-otto.o
obj-$(CONFIG_GPIO_REG) += gpio-reg.o
obj-$(CONFIG_ARCH_SA1100) += gpio-sa1100.o
obj-$(CONFIG_GPIO_SAMA5D2_PIOBU) += gpio-sama5d2-piobu.o
unsigned char out_state[6];
unsigned char control[2];
raw_spinlock_t lock;
- unsigned base;
+ unsigned int base;
unsigned char irq_mask;
};
-static int dio48e_gpio_get_direction(struct gpio_chip *chip, unsigned offset)
+static int dio48e_gpio_get_direction(struct gpio_chip *chip, unsigned int offset)
{
struct dio48e_gpio *const dio48egpio = gpiochip_get_data(chip);
- const unsigned port = offset / 8;
- const unsigned mask = BIT(offset % 8);
+ const unsigned int port = offset / 8;
+ const unsigned int mask = BIT(offset % 8);
if (dio48egpio->io_state[port] & mask)
return GPIO_LINE_DIRECTION_IN;
return GPIO_LINE_DIRECTION_OUT;
}
-static int dio48e_gpio_direction_input(struct gpio_chip *chip, unsigned offset)
+static int dio48e_gpio_direction_input(struct gpio_chip *chip, unsigned int offset)
{
struct dio48e_gpio *const dio48egpio = gpiochip_get_data(chip);
- const unsigned io_port = offset / 8;
+ const unsigned int io_port = offset / 8;
const unsigned int control_port = io_port / 3;
- const unsigned control_addr = dio48egpio->base + 3 + control_port*4;
+ const unsigned int control_addr = dio48egpio->base + 3 + control_port * 4;
unsigned long flags;
- unsigned control;
+ unsigned int control;
raw_spin_lock_irqsave(&dio48egpio->lock, flags);
return 0;
}
-static int dio48e_gpio_direction_output(struct gpio_chip *chip, unsigned offset,
- int value)
+static int dio48e_gpio_direction_output(struct gpio_chip *chip, unsigned int offset,
+ int value)
{
struct dio48e_gpio *const dio48egpio = gpiochip_get_data(chip);
- const unsigned io_port = offset / 8;
+ const unsigned int io_port = offset / 8;
const unsigned int control_port = io_port / 3;
- const unsigned mask = BIT(offset % 8);
- const unsigned control_addr = dio48egpio->base + 3 + control_port*4;
- const unsigned out_port = (io_port > 2) ? io_port + 1 : io_port;
+ const unsigned int mask = BIT(offset % 8);
+ const unsigned int control_addr = dio48egpio->base + 3 + control_port * 4;
+ const unsigned int out_port = (io_port > 2) ? io_port + 1 : io_port;
unsigned long flags;
- unsigned control;
+ unsigned int control;
raw_spin_lock_irqsave(&dio48egpio->lock, flags);
return 0;
}
-static int dio48e_gpio_get(struct gpio_chip *chip, unsigned offset)
+static int dio48e_gpio_get(struct gpio_chip *chip, unsigned int offset)
{
struct dio48e_gpio *const dio48egpio = gpiochip_get_data(chip);
- const unsigned port = offset / 8;
- const unsigned mask = BIT(offset % 8);
- const unsigned in_port = (port > 2) ? port + 1 : port;
+ const unsigned int port = offset / 8;
+ const unsigned int mask = BIT(offset % 8);
+ const unsigned int in_port = (port > 2) ? port + 1 : port;
unsigned long flags;
- unsigned port_state;
+ unsigned int port_state;
raw_spin_lock_irqsave(&dio48egpio->lock, flags);
return 0;
}
-static void dio48e_gpio_set(struct gpio_chip *chip, unsigned offset, int value)
+static void dio48e_gpio_set(struct gpio_chip *chip, unsigned int offset, int value)
{
struct dio48e_gpio *const dio48egpio = gpiochip_get_data(chip);
- const unsigned port = offset / 8;
- const unsigned mask = BIT(offset % 8);
- const unsigned out_port = (port > 2) ? port + 1 : port;
+ const unsigned int port = offset / 8;
+ const unsigned int mask = BIT(offset % 8);
+ const unsigned int out_port = (port > 2) ? port + 1 : port;
unsigned long flags;
raw_spin_lock_irqsave(&dio48egpio->lock, flags);
raw_spin_unlock_irqrestore(&dio48egpio->lock, flags);
}
-static int dio48e_irq_set_type(struct irq_data *data, unsigned flow_type)
+static int dio48e_irq_set_type(struct irq_data *data, unsigned int flow_type)
{
const unsigned long offset = irqd_to_hwirq(data);
static DEFINE_MUTEX(gpio_aggregator_lock); /* protects idr */
static DEFINE_IDR(gpio_aggregator_idr);
-static char *get_arg(char **args)
-{
- char *start, *end;
-
- start = skip_spaces(*args);
- if (!*start)
- return NULL;
-
- if (*start == '"') {
- /* Quoted arg */
- end = strchr(++start, '"');
- if (!end)
- return ERR_PTR(-EINVAL);
- } else {
- /* Unquoted arg */
- for (end = start; *end && !isspace(*end); end++) ;
- }
-
- if (*end)
- *end++ = '\0';
-
- *args = end;
- return start;
-}
-
static int aggr_add_gpio(struct gpio_aggregator *aggr, const char *key,
int hwnum, unsigned int *n)
{
static int aggr_parse(struct gpio_aggregator *aggr)
{
+ char *args = skip_spaces(aggr->args);
char *name, *offsets, *p;
- char *args = aggr->args;
unsigned long *bitmap;
unsigned int i, n = 0;
int error = 0;
if (!bitmap)
return -ENOMEM;
- for (name = get_arg(&args), offsets = get_arg(&args); name;
- offsets = get_arg(&args)) {
- if (IS_ERR(name)) {
- pr_err("Cannot get GPIO specifier: %pe\n", name);
- error = PTR_ERR(name);
- goto free_bitmap;
- }
+ args = next_arg(args, &name, &p);
+ while (*args) {
+ args = next_arg(args, &offsets, &p);
p = get_options(offsets, 0, &error);
if (error == 0 || *p) {
goto free_bitmap;
}
- name = get_arg(&args);
+ args = next_arg(args, &name, &p);
}
if (!n) {
* Copyright (C) 2010 Extreme Engineering Solutions.
*/
-
#include <linux/bitops.h>
#include <linux/gpio/driver.h>
#include <linux/ioport.h>
#include <linux/mfd/lpc_ich.h>
#include <linux/module.h>
-#include <linux/pci.h>
#include <linux/platform_device.h>
#define DRV_NAME "gpio_ich"
return val;
}
-static inline void superio_outw(int val, int reg)
-{
- outb(reg++, REG);
- outb(val >> 8, VAL);
- outb(reg, REG);
- outb(val, VAL);
-}
-
static inline void superio_set_mask(int mask, int reg)
{
u8 curr_val = superio_inb(reg);
static void gpio_mockup_unregister_pdevs(void)
{
- struct platform_device *pdev;
int i;
- for (i = 0; i < GPIO_MOCKUP_MAX_GC; i++) {
- pdev = gpio_mockup_pdevs[i];
-
- if (pdev)
- platform_device_unregister(pdev);
- }
+ for (i = 0; i < GPIO_MOCKUP_MAX_GC; i++)
+ platform_device_unregister(gpio_mockup_pdevs[i]);
}
static __init char **gpio_mockup_make_line_names(const char *label,
* kind, whether express or implied.
*/
+#include <linux/acpi.h>
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/spinlock.h>
#include <linux/of_address.h>
#include <linux/of_irq.h>
#include <linux/of_platform.h>
+#include <linux/property.h>
+#include <linux/mod_devicetable.h>
#include <linux/slab.h>
#include <linux/irq.h>
#include <linux/gpio/driver.h>
struct device_node *np = pdev->dev.of_node;
struct mpc8xxx_gpio_chip *mpc8xxx_gc;
struct gpio_chip *gc;
- const struct mpc8xxx_gpio_devtype *devtype =
- of_device_get_match_data(&pdev->dev);
+ const struct mpc8xxx_gpio_devtype *devtype = NULL;
+ struct fwnode_handle *fwnode;
int ret;
mpc8xxx_gc = devm_kzalloc(&pdev->dev, sizeof(*mpc8xxx_gc), GFP_KERNEL);
raw_spin_lock_init(&mpc8xxx_gc->lock);
- mpc8xxx_gc->regs = of_iomap(np, 0);
- if (!mpc8xxx_gc->regs)
- return -ENOMEM;
+ mpc8xxx_gc->regs = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(mpc8xxx_gc->regs))
+ return PTR_ERR(mpc8xxx_gc->regs);
gc = &mpc8xxx_gc->gc;
gc->parent = &pdev->dev;
- if (of_property_read_bool(np, "little-endian")) {
+ if (device_property_read_bool(&pdev->dev, "little-endian")) {
ret = bgpio_init(gc, &pdev->dev, 4,
mpc8xxx_gc->regs + GPIO_DAT,
NULL, NULL,
mpc8xxx_gc->direction_output = gc->direction_output;
+ devtype = device_get_match_data(&pdev->dev);
if (!devtype)
devtype = &mpc8xxx_gpio_devtype_default;
* associated input enable must be set (GPIOxGPIE[IEn]=1) to propagate
* the port value to the GPIO Data Register.
*/
+ fwnode = dev_fwnode(&pdev->dev);
if (of_device_is_compatible(np, "fsl,qoriq-gpio") ||
of_device_is_compatible(np, "fsl,ls1028a-gpio") ||
- of_device_is_compatible(np, "fsl,ls1088a-gpio"))
+ of_device_is_compatible(np, "fsl,ls1088a-gpio") ||
+ is_acpi_node(fwnode))
gc->write_reg(mpc8xxx_gc->regs + GPIO_IBE, 0xffffffff);
ret = gpiochip_add_data(gc, mpc8xxx_gc);
if (ret) {
- pr_err("%pOF: GPIO chip registration failed with status %d\n",
- np, ret);
+ dev_err(&pdev->dev,
+ "GPIO chip registration failed with status %d\n", ret);
goto err;
}
- mpc8xxx_gc->irqn = irq_of_parse_and_map(np, 0);
+ mpc8xxx_gc->irqn = platform_get_irq(pdev, 0);
if (!mpc8xxx_gc->irqn)
return 0;
- mpc8xxx_gc->irq = irq_domain_add_linear(np, MPC8XXX_GPIO_PINS,
- &mpc8xxx_gpio_irq_ops, mpc8xxx_gc);
+ mpc8xxx_gc->irq = irq_domain_create_linear(fwnode,
+ MPC8XXX_GPIO_PINS,
+ &mpc8xxx_gpio_irq_ops,
+ mpc8xxx_gc);
+
if (!mpc8xxx_gc->irq)
return 0;
IRQF_SHARED, "gpio-cascade",
mpc8xxx_gc);
if (ret) {
- dev_err(&pdev->dev, "%s: failed to devm_request_irq(%d), ret = %d\n",
- np->full_name, mpc8xxx_gc->irqn, ret);
+ dev_err(&pdev->dev,
+ "failed to devm_request_irq(%d), ret = %d\n",
+ mpc8xxx_gc->irqn, ret);
goto err;
}
return 0;
}
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id gpio_acpi_ids[] = {
+ {"NXP0031",},
+ { }
+};
+MODULE_DEVICE_TABLE(acpi, gpio_acpi_ids);
+#endif
+
static struct platform_driver mpc8xxx_plat_driver = {
.probe = mpc8xxx_probe,
.remove = mpc8xxx_remove,
.driver = {
.name = "gpio-mpc8xxx",
.of_match_table = mpc8xxx_gpio_ids,
+ .acpi_match_table = ACPI_PTR(gpio_acpi_ids),
},
};
return port->devid == IMX23_GPIO;
}
-static inline int is_imx28_gpio(struct mxs_gpio_port *port)
-{
- return port->devid == IMX28_GPIO;
-}
-
/* Note: This driver assumes 32 GPIOs are handled in one register */
static int mxs_gpio_set_irq_type(struct irq_data *d, unsigned int type)
{
struct device *dev = &pdev->dev;
struct device_node *node = dev->of_node;
- const struct of_device_id *match;
const struct omap_gpio_platform_data *pdata;
struct gpio_bank *bank;
struct irq_chip *irqc;
int ret;
- match = of_match_device(of_match_ptr(omap_gpio_match), dev);
+ pdata = device_get_match_data(dev);
- pdata = match ? match->data : dev_get_platdata(dev);
+ pdata = pdata ?: dev_get_platdata(dev);
if (!pdata)
return -EINVAL;
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/gpio/driver.h>
+#include <linux/irq.h>
+#include <linux/minmax.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+
+/*
+ * Total register block size is 0x1C for one bank of four ports (A, B, C, D).
+ * An optional second bank, with ports E, F, G, and H, may be present, starting
+ * at register offset 0x1C.
+ */
+
+/*
+ * Pin select: (0) "normal", (1) "dedicate peripheral"
+ * Not used on RTL8380/RTL8390, peripheral selection is managed by control bits
+ * in the peripheral registers.
+ */
+#define REALTEK_GPIO_REG_CNR 0x00
+/* Clear bit (0) for input, set bit (1) for output */
+#define REALTEK_GPIO_REG_DIR 0x08
+#define REALTEK_GPIO_REG_DATA 0x0C
+/* Read bit for IRQ status, write 1 to clear IRQ */
+#define REALTEK_GPIO_REG_ISR 0x10
+/* Two bits per GPIO in IMR registers */
+#define REALTEK_GPIO_REG_IMR 0x14
+#define REALTEK_GPIO_REG_IMR_AB 0x14
+#define REALTEK_GPIO_REG_IMR_CD 0x18
+#define REALTEK_GPIO_IMR_LINE_MASK GENMASK(1, 0)
+#define REALTEK_GPIO_IRQ_EDGE_FALLING 1
+#define REALTEK_GPIO_IRQ_EDGE_RISING 2
+#define REALTEK_GPIO_IRQ_EDGE_BOTH 3
+
+#define REALTEK_GPIO_MAX 32
+#define REALTEK_GPIO_PORTS_PER_BANK 4
+
+/**
+ * realtek_gpio_ctrl - Realtek Otto GPIO driver data
+ *
+ * @gc: Associated gpio_chip instance
+ * @base: Base address of the register block for a GPIO bank
+ * @lock: Lock for accessing the IRQ registers and values
+ * @intr_mask: Mask for interrupts lines
+ * @intr_type: Interrupt type selection
+ *
+ * Because the interrupt mask register (IMR) combines the function of IRQ type
+ * selection and masking, two extra values are stored. @intr_mask is used to
+ * mask/unmask the interrupts for a GPIO port, and @intr_type is used to store
+ * the selected interrupt types. The logical AND of these values is written to
+ * IMR on changes.
+ */
+struct realtek_gpio_ctrl {
+ struct gpio_chip gc;
+ void __iomem *base;
+ raw_spinlock_t lock;
+ u16 intr_mask[REALTEK_GPIO_PORTS_PER_BANK];
+ u16 intr_type[REALTEK_GPIO_PORTS_PER_BANK];
+};
+
+/* Expand with more flags as devices with other quirks are added */
+enum realtek_gpio_flags {
+ /*
+ * Allow disabling interrupts, for cases where the port order is
+ * unknown. This may result in a port mismatch between ISR and IMR.
+ * An interrupt would appear to come from a different line than the
+ * line the IRQ handler was assigned to, causing uncaught interrupts.
+ */
+ GPIO_INTERRUPTS_DISABLED = BIT(0),
+};
+
+static struct realtek_gpio_ctrl *irq_data_to_ctrl(struct irq_data *data)
+{
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(data);
+
+ return container_of(gc, struct realtek_gpio_ctrl, gc);
+}
+
+/*
+ * Normal port order register access
+ *
+ * Port information is stored with the first port at offset 0, followed by the
+ * second, etc. Most registers store one bit per GPIO and use a u8 value per
+ * port. The two interrupt mask registers store two bits per GPIO, so use u16
+ * values.
+ */
+static void realtek_gpio_write_imr(struct realtek_gpio_ctrl *ctrl,
+ unsigned int port, u16 irq_type, u16 irq_mask)
+{
+ iowrite16(irq_type & irq_mask, ctrl->base + REALTEK_GPIO_REG_IMR + 2 * port);
+}
+
+static void realtek_gpio_clear_isr(struct realtek_gpio_ctrl *ctrl,
+ unsigned int port, u8 mask)
+{
+ iowrite8(mask, ctrl->base + REALTEK_GPIO_REG_ISR + port);
+}
+
+static u8 realtek_gpio_read_isr(struct realtek_gpio_ctrl *ctrl, unsigned int port)
+{
+ return ioread8(ctrl->base + REALTEK_GPIO_REG_ISR + port);
+}
+
+/* Set the rising and falling edge mask bits for a GPIO port pin */
+static u16 realtek_gpio_imr_bits(unsigned int pin, u16 value)
+{
+ return (value & REALTEK_GPIO_IMR_LINE_MASK) << 2 * pin;
+}
+
+static void realtek_gpio_irq_ack(struct irq_data *data)
+{
+ struct realtek_gpio_ctrl *ctrl = irq_data_to_ctrl(data);
+ irq_hw_number_t line = irqd_to_hwirq(data);
+ unsigned int port = line / 8;
+ unsigned int port_pin = line % 8;
+
+ realtek_gpio_clear_isr(ctrl, port, BIT(port_pin));
+}
+
+static void realtek_gpio_irq_unmask(struct irq_data *data)
+{
+ struct realtek_gpio_ctrl *ctrl = irq_data_to_ctrl(data);
+ unsigned int line = irqd_to_hwirq(data);
+ unsigned int port = line / 8;
+ unsigned int port_pin = line % 8;
+ unsigned long flags;
+ u16 m;
+
+ raw_spin_lock_irqsave(&ctrl->lock, flags);
+ m = ctrl->intr_mask[port];
+ m |= realtek_gpio_imr_bits(port_pin, REALTEK_GPIO_IMR_LINE_MASK);
+ ctrl->intr_mask[port] = m;
+ realtek_gpio_write_imr(ctrl, port, ctrl->intr_type[port], m);
+ raw_spin_unlock_irqrestore(&ctrl->lock, flags);
+}
+
+static void realtek_gpio_irq_mask(struct irq_data *data)
+{
+ struct realtek_gpio_ctrl *ctrl = irq_data_to_ctrl(data);
+ unsigned int line = irqd_to_hwirq(data);
+ unsigned int port = line / 8;
+ unsigned int port_pin = line % 8;
+ unsigned long flags;
+ u16 m;
+
+ raw_spin_lock_irqsave(&ctrl->lock, flags);
+ m = ctrl->intr_mask[port];
+ m &= ~realtek_gpio_imr_bits(port_pin, REALTEK_GPIO_IMR_LINE_MASK);
+ ctrl->intr_mask[port] = m;
+ realtek_gpio_write_imr(ctrl, port, ctrl->intr_type[port], m);
+ raw_spin_unlock_irqrestore(&ctrl->lock, flags);
+}
+
+static int realtek_gpio_irq_set_type(struct irq_data *data, unsigned int flow_type)
+{
+ struct realtek_gpio_ctrl *ctrl = irq_data_to_ctrl(data);
+ unsigned int line = irqd_to_hwirq(data);
+ unsigned int port = line / 8;
+ unsigned int port_pin = line % 8;
+ unsigned long flags;
+ u16 type, t;
+
+ switch (flow_type & IRQ_TYPE_SENSE_MASK) {
+ case IRQ_TYPE_EDGE_FALLING:
+ type = REALTEK_GPIO_IRQ_EDGE_FALLING;
+ break;
+ case IRQ_TYPE_EDGE_RISING:
+ type = REALTEK_GPIO_IRQ_EDGE_RISING;
+ break;
+ case IRQ_TYPE_EDGE_BOTH:
+ type = REALTEK_GPIO_IRQ_EDGE_BOTH;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ irq_set_handler_locked(data, handle_edge_irq);
+
+ raw_spin_lock_irqsave(&ctrl->lock, flags);
+ t = ctrl->intr_type[port];
+ t &= ~realtek_gpio_imr_bits(port_pin, REALTEK_GPIO_IMR_LINE_MASK);
+ t |= realtek_gpio_imr_bits(port_pin, type);
+ ctrl->intr_type[port] = t;
+ realtek_gpio_write_imr(ctrl, port, t, ctrl->intr_mask[port]);
+ raw_spin_unlock_irqrestore(&ctrl->lock, flags);
+
+ return 0;
+}
+
+static void realtek_gpio_irq_handler(struct irq_desc *desc)
+{
+ struct gpio_chip *gc = irq_desc_get_handler_data(desc);
+ struct realtek_gpio_ctrl *ctrl = gpiochip_get_data(gc);
+ struct irq_chip *irq_chip = irq_desc_get_chip(desc);
+ unsigned int lines_done;
+ unsigned int port_pin_count;
+ unsigned int irq;
+ unsigned long status;
+ int offset;
+
+ chained_irq_enter(irq_chip, desc);
+
+ for (lines_done = 0; lines_done < gc->ngpio; lines_done += 8) {
+ status = realtek_gpio_read_isr(ctrl, lines_done / 8);
+ port_pin_count = min(gc->ngpio - lines_done, 8U);
+ for_each_set_bit(offset, &status, port_pin_count) {
+ irq = irq_find_mapping(gc->irq.domain, offset);
+ generic_handle_irq(irq);
+ }
+ }
+
+ chained_irq_exit(irq_chip, desc);
+}
+
+static int realtek_gpio_irq_init(struct gpio_chip *gc)
+{
+ struct realtek_gpio_ctrl *ctrl = gpiochip_get_data(gc);
+ unsigned int port;
+
+ for (port = 0; (port * 8) < gc->ngpio; port++) {
+ realtek_gpio_write_imr(ctrl, port, 0, 0);
+ realtek_gpio_clear_isr(ctrl, port, GENMASK(7, 0));
+ }
+
+ return 0;
+}
+
+static struct irq_chip realtek_gpio_irq_chip = {
+ .name = "realtek-otto-gpio",
+ .irq_ack = realtek_gpio_irq_ack,
+ .irq_mask = realtek_gpio_irq_mask,
+ .irq_unmask = realtek_gpio_irq_unmask,
+ .irq_set_type = realtek_gpio_irq_set_type,
+};
+
+static const struct of_device_id realtek_gpio_of_match[] = {
+ {
+ .compatible = "realtek,otto-gpio",
+ .data = (void *)GPIO_INTERRUPTS_DISABLED,
+ },
+ {
+ .compatible = "realtek,rtl8380-gpio",
+ },
+ {
+ .compatible = "realtek,rtl8390-gpio",
+ },
+ {}
+};
+MODULE_DEVICE_TABLE(of, realtek_gpio_of_match);
+
+static int realtek_gpio_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ unsigned int dev_flags;
+ struct gpio_irq_chip *girq;
+ struct realtek_gpio_ctrl *ctrl;
+ u32 ngpios;
+ int err, irq;
+
+ ctrl = devm_kzalloc(dev, sizeof(*ctrl), GFP_KERNEL);
+ if (!ctrl)
+ return -ENOMEM;
+
+ dev_flags = (unsigned int) device_get_match_data(dev);
+
+ ngpios = REALTEK_GPIO_MAX;
+ device_property_read_u32(dev, "ngpios", &ngpios);
+
+ if (ngpios > REALTEK_GPIO_MAX) {
+ dev_err(&pdev->dev, "invalid ngpios (max. %d)\n",
+ REALTEK_GPIO_MAX);
+ return -EINVAL;
+ }
+
+ ctrl->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(ctrl->base))
+ return PTR_ERR(ctrl->base);
+
+ raw_spin_lock_init(&ctrl->lock);
+
+ err = bgpio_init(&ctrl->gc, dev, 4,
+ ctrl->base + REALTEK_GPIO_REG_DATA, NULL, NULL,
+ ctrl->base + REALTEK_GPIO_REG_DIR, NULL,
+ BGPIOF_BIG_ENDIAN_BYTE_ORDER);
+ if (err) {
+ dev_err(dev, "unable to init generic GPIO");
+ return err;
+ }
+
+ ctrl->gc.ngpio = ngpios;
+ ctrl->gc.owner = THIS_MODULE;
+
+ irq = platform_get_irq_optional(pdev, 0);
+ if (!(dev_flags & GPIO_INTERRUPTS_DISABLED) && irq > 0) {
+ girq = &ctrl->gc.irq;
+ girq->chip = &realtek_gpio_irq_chip;
+ girq->default_type = IRQ_TYPE_NONE;
+ girq->handler = handle_bad_irq;
+ girq->parent_handler = realtek_gpio_irq_handler;
+ girq->num_parents = 1;
+ girq->parents = devm_kcalloc(dev, girq->num_parents,
+ sizeof(*girq->parents), GFP_KERNEL);
+ if (!girq->parents)
+ return -ENOMEM;
+ girq->parents[0] = irq;
+ girq->init_hw = realtek_gpio_irq_init;
+ }
+
+ return devm_gpiochip_add_data(dev, &ctrl->gc, ctrl);
+}
+
+static struct platform_driver realtek_gpio_driver = {
+ .driver = {
+ .name = "realtek-otto-gpio",
+ .of_match_table = realtek_gpio_of_match,
+ },
+ .probe = realtek_gpio_probe,
+};
+module_platform_driver(realtek_gpio_driver);
+
+MODULE_DESCRIPTION("Realtek Otto GPIO support");
+MODULE_AUTHOR("Sander Vanheule <sander@svanheule.net>");
+MODULE_LICENSE("GPL v2");
*/
#include <linux/acpi.h>
+#include <linux/bitops.h>
#include <linux/errno.h>
#include <linux/gpio/driver.h>
#include <linux/io.h>
+#include <linux/irq.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/pci_ids.h>
#include <linux/platform_device.h>
+#include <linux/types.h>
#define GEN 0x00
#define GIO 0x04
#define GLV 0x08
+#define GTPE 0x0c
+#define GTNE 0x10
+#define GGPE 0x14
+#define GSMI 0x18
+#define GTS 0x1c
+
+#define CORE_BANK_OFFSET 0x00
+#define RESUME_BANK_OFFSET 0x20
+
+/*
+ * iLB datasheet describes GPE0BLK registers, in particular GPE0E.GPIO bit.
+ * Document Number: 328195-001
+ */
+#define GPE0E_GPIO 14
struct sch_gpio {
struct gpio_chip chip;
+ struct irq_chip irqchip;
spinlock_t lock;
unsigned short iobase;
unsigned short resume_base;
+
+ /* GPE handling */
+ u32 gpe;
+ acpi_gpe_handler gpe_handler;
};
static unsigned int sch_gpio_offset(struct sch_gpio *sch, unsigned int gpio,
unsigned int reg)
{
- unsigned int base = 0;
+ unsigned int base = CORE_BANK_OFFSET;
if (gpio >= sch->resume_base) {
gpio -= sch->resume_base;
- base += 0x20;
+ base = RESUME_BANK_OFFSET;
}
return base + reg + gpio / 8;
static int sch_gpio_direction_in(struct gpio_chip *gc, unsigned int gpio_num)
{
struct sch_gpio *sch = gpiochip_get_data(gc);
+ unsigned long flags;
- spin_lock(&sch->lock);
+ spin_lock_irqsave(&sch->lock, flags);
sch_gpio_reg_set(sch, gpio_num, GIO, 1);
- spin_unlock(&sch->lock);
+ spin_unlock_irqrestore(&sch->lock, flags);
return 0;
}
static void sch_gpio_set(struct gpio_chip *gc, unsigned int gpio_num, int val)
{
struct sch_gpio *sch = gpiochip_get_data(gc);
+ unsigned long flags;
- spin_lock(&sch->lock);
+ spin_lock_irqsave(&sch->lock, flags);
sch_gpio_reg_set(sch, gpio_num, GLV, val);
- spin_unlock(&sch->lock);
+ spin_unlock_irqrestore(&sch->lock, flags);
}
static int sch_gpio_direction_out(struct gpio_chip *gc, unsigned int gpio_num,
int val)
{
struct sch_gpio *sch = gpiochip_get_data(gc);
+ unsigned long flags;
- spin_lock(&sch->lock);
+ spin_lock_irqsave(&sch->lock, flags);
sch_gpio_reg_set(sch, gpio_num, GIO, 0);
- spin_unlock(&sch->lock);
+ spin_unlock_irqrestore(&sch->lock, flags);
/*
* according to the datasheet, writing to the level register has no
.get_direction = sch_gpio_get_direction,
};
+static int sch_irq_type(struct irq_data *d, unsigned int type)
+{
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ struct sch_gpio *sch = gpiochip_get_data(gc);
+ irq_hw_number_t gpio_num = irqd_to_hwirq(d);
+ unsigned long flags;
+ int rising, falling;
+
+ switch (type & IRQ_TYPE_SENSE_MASK) {
+ case IRQ_TYPE_EDGE_RISING:
+ rising = 1;
+ falling = 0;
+ break;
+ case IRQ_TYPE_EDGE_FALLING:
+ rising = 0;
+ falling = 1;
+ break;
+ case IRQ_TYPE_EDGE_BOTH:
+ rising = 1;
+ falling = 1;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ spin_lock_irqsave(&sch->lock, flags);
+
+ sch_gpio_reg_set(sch, gpio_num, GTPE, rising);
+ sch_gpio_reg_set(sch, gpio_num, GTNE, falling);
+
+ irq_set_handler_locked(d, handle_edge_irq);
+
+ spin_unlock_irqrestore(&sch->lock, flags);
+
+ return 0;
+}
+
+static void sch_irq_ack(struct irq_data *d)
+{
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ struct sch_gpio *sch = gpiochip_get_data(gc);
+ irq_hw_number_t gpio_num = irqd_to_hwirq(d);
+ unsigned long flags;
+
+ spin_lock_irqsave(&sch->lock, flags);
+ sch_gpio_reg_set(sch, gpio_num, GTS, 1);
+ spin_unlock_irqrestore(&sch->lock, flags);
+}
+
+static void sch_irq_mask_unmask(struct irq_data *d, int val)
+{
+ struct gpio_chip *gc = irq_data_get_irq_chip_data(d);
+ struct sch_gpio *sch = gpiochip_get_data(gc);
+ irq_hw_number_t gpio_num = irqd_to_hwirq(d);
+ unsigned long flags;
+
+ spin_lock_irqsave(&sch->lock, flags);
+ sch_gpio_reg_set(sch, gpio_num, GGPE, val);
+ spin_unlock_irqrestore(&sch->lock, flags);
+}
+
+static void sch_irq_mask(struct irq_data *d)
+{
+ sch_irq_mask_unmask(d, 0);
+}
+
+static void sch_irq_unmask(struct irq_data *d)
+{
+ sch_irq_mask_unmask(d, 1);
+}
+
+static u32 sch_gpio_gpe_handler(acpi_handle gpe_device, u32 gpe, void *context)
+{
+ struct sch_gpio *sch = context;
+ struct gpio_chip *gc = &sch->chip;
+ unsigned long core_status, resume_status;
+ unsigned long pending;
+ unsigned long flags;
+ int offset;
+ u32 ret;
+
+ spin_lock_irqsave(&sch->lock, flags);
+
+ core_status = inl(sch->iobase + CORE_BANK_OFFSET + GTS);
+ resume_status = inl(sch->iobase + RESUME_BANK_OFFSET + GTS);
+
+ spin_unlock_irqrestore(&sch->lock, flags);
+
+ pending = (resume_status << sch->resume_base) | core_status;
+ for_each_set_bit(offset, &pending, sch->chip.ngpio)
+ generic_handle_irq(irq_find_mapping(gc->irq.domain, offset));
+
+ /* Set returning value depending on whether we handled an interrupt */
+ ret = pending ? ACPI_INTERRUPT_HANDLED : ACPI_INTERRUPT_NOT_HANDLED;
+
+ /* Acknowledge GPE to ACPICA */
+ ret |= ACPI_REENABLE_GPE;
+
+ return ret;
+}
+
+static void sch_gpio_remove_gpe_handler(void *data)
+{
+ struct sch_gpio *sch = data;
+
+ acpi_disable_gpe(NULL, sch->gpe);
+ acpi_remove_gpe_handler(NULL, sch->gpe, sch->gpe_handler);
+}
+
+static int sch_gpio_install_gpe_handler(struct sch_gpio *sch)
+{
+ struct device *dev = sch->chip.parent;
+ acpi_status status;
+
+ status = acpi_install_gpe_handler(NULL, sch->gpe, ACPI_GPE_LEVEL_TRIGGERED,
+ sch->gpe_handler, sch);
+ if (ACPI_FAILURE(status)) {
+ dev_err(dev, "Failed to install GPE handler for %u: %s\n",
+ sch->gpe, acpi_format_exception(status));
+ return -ENODEV;
+ }
+
+ status = acpi_enable_gpe(NULL, sch->gpe);
+ if (ACPI_FAILURE(status)) {
+ dev_err(dev, "Failed to enable GPE handler for %u: %s\n",
+ sch->gpe, acpi_format_exception(status));
+ acpi_remove_gpe_handler(NULL, sch->gpe, sch->gpe_handler);
+ return -ENODEV;
+ }
+
+ return devm_add_action_or_reset(dev, sch_gpio_remove_gpe_handler, sch);
+}
+
static int sch_gpio_probe(struct platform_device *pdev)
{
+ struct gpio_irq_chip *girq;
struct sch_gpio *sch;
struct resource *res;
+ int ret;
sch = devm_kzalloc(&pdev->dev, sizeof(*sch), GFP_KERNEL);
if (!sch)
platform_set_drvdata(pdev, sch);
+ sch->irqchip.name = "sch_gpio";
+ sch->irqchip.irq_ack = sch_irq_ack;
+ sch->irqchip.irq_mask = sch_irq_mask;
+ sch->irqchip.irq_unmask = sch_irq_unmask;
+ sch->irqchip.irq_set_type = sch_irq_type;
+
+ girq = &sch->chip.irq;
+ girq->chip = &sch->irqchip;
+ girq->num_parents = 0;
+ girq->parents = NULL;
+ girq->parent_handler = NULL;
+ girq->default_type = IRQ_TYPE_NONE;
+ girq->handler = handle_bad_irq;
+
+ /* GPE setup is optional */
+ sch->gpe = GPE0E_GPIO;
+ sch->gpe_handler = sch_gpio_gpe_handler;
+
+ ret = sch_gpio_install_gpe_handler(sch);
+ if (ret)
+ dev_warn(&pdev->dev, "Can't setup GPE, no IRQ support\n");
+
return devm_gpiochip_add_data(&pdev->dev, &sch->chip, sch);
}
kfree(acpi_gpio);
}
+void acpi_gpio_dev_init(struct gpio_chip *gc, struct gpio_device *gdev)
+{
+ /* Set default fwnode to parent's one if present */
+ if (gc->parent)
+ ACPI_COMPANION_SET(&gdev->dev, ACPI_COMPANION(gc->parent));
+}
+
static int acpi_gpio_package_count(const union acpi_object *obj)
{
const union acpi_object *element = obj->package.elements;
.no_edge_events_on_boot = true,
},
},
+ {
+ /*
+ * The Dell Venue 10 Pro 5055, with Bay Trail SoC + TI PMIC uses an
+ * external embedded-controller connected via I2C + an ACPI GPIO
+ * event handler on INT33FFC:02 pin 12, causing spurious wakeups.
+ */
+ .matches = {
+ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ DMI_MATCH(DMI_PRODUCT_NAME, "Venue 10 Pro 5055"),
+ },
+ .driver_data = &(struct acpi_gpiolib_dmi_quirk) {
+ .ignore_wake = "INT33FC:02@12",
+ },
+ },
{
/*
* HP X2 10 models with Cherry Trail SoC + TI PMIC use an
void acpi_gpiochip_add(struct gpio_chip *chip);
void acpi_gpiochip_remove(struct gpio_chip *chip);
+void acpi_gpio_dev_init(struct gpio_chip *gc, struct gpio_device *gdev);
+
void acpi_gpiochip_request_interrupts(struct gpio_chip *chip);
void acpi_gpiochip_free_interrupts(struct gpio_chip *chip);
static inline void acpi_gpiochip_add(struct gpio_chip *chip) { }
static inline void acpi_gpiochip_remove(struct gpio_chip *chip) { }
+static inline void acpi_gpio_dev_init(struct gpio_chip *gc, struct gpio_device *gdev) { }
+
static inline void
acpi_gpiochip_request_interrupts(struct gpio_chip *chip) { }
void of_gpio_dev_init(struct gpio_chip *gc, struct gpio_device *gdev)
{
+ /* Set default OF node to parent's one if present */
+ if (gc->parent)
+ gdev->dev.of_node = gc->parent->of_node;
+
/* If the gpiochip has an assigned OF node this takes precedence */
if (gc->of_node)
gdev->dev.of_node = gc->of_node;
else
gc->of_node = gdev->dev.of_node;
- if (gdev->dev.of_node)
- gdev->dev.fwnode = of_fwnode_handle(gdev->dev.of_node);
}
if (!gdev)
return -ENOMEM;
gdev->dev.bus = &gpio_bus_type;
+ gdev->dev.parent = gc->parent;
gdev->chip = gc;
gc->gpiodev = gdev;
- if (gc->parent) {
- gdev->dev.parent = gc->parent;
- gdev->dev.of_node = gc->parent->of_node;
- }
of_gpio_dev_init(gc, gdev);
+ acpi_gpio_dev_init(gc, gdev);
/*
* Assign fwnode depending on the result of the previous calls,
struct lock_class_key *lock_key,
struct lock_class_key *request_key)
{
+ struct fwnode_handle *fwnode = dev_fwnode(&gc->gpiodev->dev);
struct irq_chip *irqchip = gc->irq.chip;
- const struct irq_domain_ops *ops = NULL;
- struct device_node *np;
unsigned int type;
unsigned int i;
return -EINVAL;
}
- np = gc->gpiodev->dev.of_node;
type = gc->irq.default_type;
/*
* used to configure the interrupts, as you may end up with
* conflicting triggers. Tell the user, and reset to NONE.
*/
- if (WARN(np && type != IRQ_TYPE_NONE,
- "%s: Ignoring %u default trigger\n", np->full_name, type))
- type = IRQ_TYPE_NONE;
-
- if (has_acpi_companion(gc->parent) && type != IRQ_TYPE_NONE) {
- acpi_handle_warn(ACPI_HANDLE(gc->parent),
- "Ignoring %u default trigger\n", type);
+ if (WARN(fwnode && type != IRQ_TYPE_NONE,
+ "%pfw: Ignoring %u default trigger\n", fwnode, type))
type = IRQ_TYPE_NONE;
- }
if (gc->to_irq)
chip_warn(gc, "to_irq is redefined in %s and you shouldn't rely on it\n", __func__);
return ret;
} else {
/* Some drivers provide custom irqdomain ops */
- if (gc->irq.domain_ops)
- ops = gc->irq.domain_ops;
-
- if (!ops)
- ops = &gpiochip_domain_ops;
- gc->irq.domain = irq_domain_add_simple(np,
+ gc->irq.domain = irq_domain_create_simple(fwnode,
gc->ngpio,
gc->irq.first,
- ops, gc);
+ gc->irq.domain_ops ?: &gpiochip_domain_ops,
+ gc);
if (!gc->irq.domain)
return -EINVAL;
}
*/
int gpiod_count(struct device *dev, const char *con_id)
{
+ const struct fwnode_handle *fwnode = dev ? dev_fwnode(dev) : NULL;
int count = -ENOENT;
- if (IS_ENABLED(CONFIG_OF) && dev && dev->of_node)
+ if (is_of_node(fwnode))
count = of_gpio_get_count(dev, con_id);
- else if (IS_ENABLED(CONFIG_ACPI) && dev && ACPI_HANDLE(dev))
+ else if (is_acpi_node(fwnode))
count = acpi_gpio_count(dev, con_id);
if (count < 0)
int ret;
/* Maybe we have a device name, maybe not */
const char *devname = dev ? dev_name(dev) : "?";
+ const struct fwnode_handle *fwnode = dev ? dev_fwnode(dev) : NULL;
dev_dbg(dev, "GPIO lookup for consumer %s\n", con_id);
- if (dev) {
- /* Using device tree? */
- if (IS_ENABLED(CONFIG_OF) && dev->of_node) {
- dev_dbg(dev, "using device tree for GPIO lookup\n");
- desc = of_find_gpio(dev, con_id, idx, &lookupflags);
- } else if (ACPI_COMPANION(dev)) {
- dev_dbg(dev, "using ACPI for GPIO lookup\n");
- desc = acpi_find_gpio(dev, con_id, idx, &flags, &lookupflags);
- }
+ /* Using device tree? */
+ if (is_of_node(fwnode)) {
+ dev_dbg(dev, "using device tree for GPIO lookup\n");
+ desc = of_find_gpio(dev, con_id, idx, &lookupflags);
+ } else if (is_acpi_node(fwnode)) {
+ dev_dbg(dev, "using ACPI for GPIO lookup\n");
+ desc = acpi_find_gpio(dev, con_id, idx, &flags, &lookupflags);
}
/*
struct gpio_desc *desc = ERR_PTR(-ENODEV);
int ret;
- if (!fwnode)
- return ERR_PTR(-EINVAL);
-
if (is_of_node(fwnode)) {
desc = gpiod_get_from_of_node(to_of_node(fwnode),
propname, index,
acpi_gpio_update_gpiod_flags(&dflags, &info);
acpi_gpio_update_gpiod_lookup_flags(&lflags, &info);
- }
+ } else
+ return ERR_PTR(-EINVAL);
/* Currently only ACPI takes this path */
ret = gpiod_request(desc, label);
static int gpio_bus_match(struct device *dev, struct device_driver *drv)
{
+ struct fwnode_handle *fwnode = dev_fwnode(dev);
+
/*
* Only match if the fwnode doesn't already have a proper struct device
* created for it.
*/
- if (dev->fwnode && dev->fwnode->dev != dev)
+ if (fwnode && fwnode->dev != dev)
return 0;
return 1;
}
drm_WARN_ON_ONCE(&i915->drm, intel_dp->active_mst_links < 0);
for (;;) {
- u8 esi[DP_DPRX_ESI_LEN] = {};
+ /*
+ * The +2 is because DP_DPRX_ESI_LEN is 14, but we then
+ * pass in "esi+10" to drm_dp_channel_eq_ok(), which
+ * takes a 6-byte array. So we actually need 16 bytes
+ * here.
+ *
+ * Somebody who knows what the limits actually are
+ * should check this, but for now this is at least
+ * harmless and avoids a valid compiler warning about
+ * using more of the array than we have allocated.
+ */
+ u8 esi[DP_DPRX_ESI_LEN+2] = {};
bool handled;
int retry;
max_order = MAX_ORDER;
#ifdef CONFIG_SWIOTLB
- if (swiotlb_nr_tbl()) {
+ if (is_swiotlb_active()) {
unsigned int max_segment;
max_segment = swiotlb_max_segment();
}
#if IS_ENABLED(CONFIG_SWIOTLB) && IS_ENABLED(CONFIG_X86)
- need_swiotlb = !!swiotlb_nr_tbl();
+ need_swiotlb = is_swiotlb_active();
#endif
ret = ttm_device_init(&drm->ttm.bdev, &nouveau_bo_driver, drm->dev->dev,
-/* vim: set ts=8 sw=8 tw=78 ai noexpandtab */
/* qxl_drv.c -- QXL driver -*- linux-c -*-
*
* Copyright 2011 Red Hat, Inc.
If unsure, say N.
-config HWSPINLOCK_SIRF
- tristate "SIRF Hardware Spinlock device"
- depends on ARCH_SIRF || COMPILE_TEST
- help
- Say y here to support the SIRF Hardware Spinlock device, which
- provides a synchronisation mechanism for the various processors
- on the SoC.
-
- It's safe to say n here if you're not interested in SIRF hardware
- spinlock or just want a bare minimum kernel.
-
config HWSPINLOCK_SPRD
tristate "SPRD Hardware Spinlock device"
depends on ARCH_SPRD || COMPILE_TEST
obj-$(CONFIG_HWSPINLOCK) += hwspinlock_core.o
obj-$(CONFIG_HWSPINLOCK_OMAP) += omap_hwspinlock.o
obj-$(CONFIG_HWSPINLOCK_QCOM) += qcom_hwspinlock.o
-obj-$(CONFIG_HWSPINLOCK_SIRF) += sirf_hwspinlock.o
obj-$(CONFIG_HWSPINLOCK_SPRD) += sprd_hwspinlock.o
obj-$(CONFIG_HWSPINLOCK_STM32) += stm32_hwspinlock.o
obj-$(CONFIG_HSEM_U8500) += u8500_hsem.o
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0
-/*
- * SIRF hardware spinlock driver
- *
- * Copyright (c) 2015 Cambridge Silicon Radio Limited, a CSR plc group company.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/hwspinlock.h>
-#include <linux/platform_device.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-
-#include "hwspinlock_internal.h"
-
-struct sirf_hwspinlock {
- void __iomem *io_base;
- struct hwspinlock_device bank;
-};
-
-/* Number of Hardware Spinlocks*/
-#define HW_SPINLOCK_NUMBER 30
-
-/* Hardware spinlock register offsets */
-#define HW_SPINLOCK_BASE 0x404
-#define HW_SPINLOCK_OFFSET(x) (HW_SPINLOCK_BASE + 0x4 * (x))
-
-static int sirf_hwspinlock_trylock(struct hwspinlock *lock)
-{
- void __iomem *lock_addr = lock->priv;
-
- /* attempt to acquire the lock by reading value == 1 from it */
- return !!readl(lock_addr);
-}
-
-static void sirf_hwspinlock_unlock(struct hwspinlock *lock)
-{
- void __iomem *lock_addr = lock->priv;
-
- /* release the lock by writing 0 to it */
- writel(0, lock_addr);
-}
-
-static const struct hwspinlock_ops sirf_hwspinlock_ops = {
- .trylock = sirf_hwspinlock_trylock,
- .unlock = sirf_hwspinlock_unlock,
-};
-
-static int sirf_hwspinlock_probe(struct platform_device *pdev)
-{
- struct sirf_hwspinlock *hwspin;
- struct hwspinlock *hwlock;
- int idx;
-
- if (!pdev->dev.of_node)
- return -ENODEV;
-
- hwspin = devm_kzalloc(&pdev->dev,
- struct_size(hwspin, bank.lock,
- HW_SPINLOCK_NUMBER),
- GFP_KERNEL);
- if (!hwspin)
- return -ENOMEM;
-
- /* retrieve io base */
- hwspin->io_base = devm_platform_ioremap_resource(pdev, 0);
- if (IS_ERR(hwspin->io_base))
- return PTR_ERR(hwspin->io_base);
-
- for (idx = 0; idx < HW_SPINLOCK_NUMBER; idx++) {
- hwlock = &hwspin->bank.lock[idx];
- hwlock->priv = hwspin->io_base + HW_SPINLOCK_OFFSET(idx);
- }
-
- platform_set_drvdata(pdev, hwspin);
-
- return devm_hwspin_lock_register(&pdev->dev, &hwspin->bank,
- &sirf_hwspinlock_ops, 0,
- HW_SPINLOCK_NUMBER);
-}
-
-static const struct of_device_id sirf_hwpinlock_ids[] = {
- { .compatible = "sirf,hwspinlock", },
- {},
-};
-MODULE_DEVICE_TABLE(of, sirf_hwpinlock_ids);
-
-static struct platform_driver sirf_hwspinlock_driver = {
- .probe = sirf_hwspinlock_probe,
- .driver = {
- .name = "atlas7_hwspinlock",
- .of_match_table = sirf_hwpinlock_ids,
- },
-};
-
-module_platform_driver(sirf_hwspinlock_driver);
-
-MODULE_LICENSE("GPL v2");
-MODULE_DESCRIPTION("SIRF Hardware spinlock driver");
-MODULE_AUTHOR("Wei Chen <wei.chen@csr.com>");
ret = i3c_master_bus_init(master);
if (ret)
- goto err_destroy_wq;
+ goto err_put_dev;
ret = device_add(&master->dev);
if (ret)
err_cleanup_bus:
i3c_master_bus_cleanup(master);
-err_destroy_wq:
- destroy_workqueue(master->wq);
-
err_put_dev:
put_device(&master->dev);
cmd->in = NULL;
cmd->out = &ccc->id;
cmd->len = 1;
- cmd->read_len = xfer_len;
cmd->read_len = 0;
cmd->continued = true;
int rtrs_clt_rdma_cq_direct(struct rtrs_clt *clt, unsigned int index)
{
- int cnt;
+ /* If no path, return -1 for block layer not to try again */
+ int cnt = -1;
struct rtrs_con *con;
struct rtrs_clt_sess *sess;
struct path_it it;
obj-$(CONFIG_INPUT) += input-core.o
input-core-y := input.o input-compat.o input-mt.o input-poller.o ff-core.o
+input-core-y += touchscreen.o
obj-$(CONFIG_INPUT_FF_MEMLESS) += ff-memless.o
obj-$(CONFIG_INPUT_SPARSEKMAP) += sparse-keymap.o
{ 0x1689, 0xfd00, "Razer Onza Tournament Edition", 0, XTYPE_XBOX360 },
{ 0x1689, 0xfd01, "Razer Onza Classic Edition", 0, XTYPE_XBOX360 },
{ 0x1689, 0xfe00, "Razer Sabertooth", 0, XTYPE_XBOX360 },
+ { 0x1949, 0x041a, "Amazon Game Controller", 0, XTYPE_XBOX360 },
{ 0x1bad, 0x0002, "Harmonix Rock Band Guitar", 0, XTYPE_XBOX360 },
{ 0x1bad, 0x0003, "Harmonix Rock Band Drumkit", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
{ 0x1bad, 0x0130, "Ion Drum Rocker", MAP_DPAD_TO_BUTTONS, XTYPE_XBOX360 },
XPAD_XBOX360_VENDOR(0x15e4), /* Numark X-Box 360 controllers */
XPAD_XBOX360_VENDOR(0x162e), /* Joytech X-Box 360 controllers */
XPAD_XBOX360_VENDOR(0x1689), /* Razer Onza */
+ XPAD_XBOX360_VENDOR(0x1949), /* Amazon controllers */
XPAD_XBOX360_VENDOR(0x1bad), /* Harminix Rock Band Guitar and Drums */
XPAD_XBOX360_VENDOR(0x20d6), /* PowerA Controllers */
XPAD_XBOXONE_VENDOR(0x20d6), /* PowerA Controllers */
#include <linux/module.h>
+#include <linux/hrtimer.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/interrupt.h>
unsigned short *code;
- struct timer_list release_timer;
+ struct hrtimer release_timer;
unsigned int release_delay; /* in msecs, for IRQ-only buttons */
struct delayed_work work;
+ struct hrtimer debounce_timer;
unsigned int software_debounce; /* in msecs, for GPIO-driven buttons */
unsigned int irq;
bool disabled;
bool key_pressed;
bool suspended;
+ bool debounce_use_hrtimer;
};
struct gpio_keys_drvdata {
return (type == EV_KEY) ? dev->keybit : dev->swbit;
}
+static void gpio_keys_quiesce_key(void *data)
+{
+ struct gpio_button_data *bdata = data;
+
+ if (!bdata->gpiod)
+ hrtimer_cancel(&bdata->release_timer);
+ if (bdata->debounce_use_hrtimer)
+ hrtimer_cancel(&bdata->debounce_timer);
+ else
+ cancel_delayed_work_sync(&bdata->work);
+}
+
/**
* gpio_keys_disable_button() - disables given GPIO button
* @bdata: button data for button to be disabled
* Disable IRQ and associated timer/work structure.
*/
disable_irq(bdata->irq);
-
- if (bdata->gpiod)
- cancel_delayed_work_sync(&bdata->work);
- else
- del_timer_sync(&bdata->release_timer);
-
+ gpio_keys_quiesce_key(bdata);
bdata->disabled = true;
}
}
unsigned int type = button->type ?: EV_KEY;
int state;
- state = gpiod_get_value_cansleep(bdata->gpiod);
+ state = bdata->debounce_use_hrtimer ?
+ gpiod_get_value(bdata->gpiod) :
+ gpiod_get_value_cansleep(bdata->gpiod);
if (state < 0) {
dev_err(input->dev.parent,
"failed to get gpio state: %d\n", state);
} else {
input_event(input, type, *bdata->code, state);
}
- input_sync(input);
+}
+
+static void gpio_keys_debounce_event(struct gpio_button_data *bdata)
+{
+ gpio_keys_gpio_report_event(bdata);
+ input_sync(bdata->input);
+
+ if (bdata->button->wakeup)
+ pm_relax(bdata->input->dev.parent);
}
static void gpio_keys_gpio_work_func(struct work_struct *work)
struct gpio_button_data *bdata =
container_of(work, struct gpio_button_data, work.work);
- gpio_keys_gpio_report_event(bdata);
+ gpio_keys_debounce_event(bdata);
+}
- if (bdata->button->wakeup)
- pm_relax(bdata->input->dev.parent);
+static enum hrtimer_restart gpio_keys_debounce_timer(struct hrtimer *t)
+{
+ struct gpio_button_data *bdata =
+ container_of(t, struct gpio_button_data, debounce_timer);
+
+ gpio_keys_debounce_event(bdata);
+
+ return HRTIMER_NORESTART;
}
static irqreturn_t gpio_keys_gpio_isr(int irq, void *dev_id)
}
}
- mod_delayed_work(system_wq,
- &bdata->work,
- msecs_to_jiffies(bdata->software_debounce));
+ if (bdata->debounce_use_hrtimer) {
+ hrtimer_start(&bdata->debounce_timer,
+ ms_to_ktime(bdata->software_debounce),
+ HRTIMER_MODE_REL);
+ } else {
+ mod_delayed_work(system_wq,
+ &bdata->work,
+ msecs_to_jiffies(bdata->software_debounce));
+ }
return IRQ_HANDLED;
}
-static void gpio_keys_irq_timer(struct timer_list *t)
+static enum hrtimer_restart gpio_keys_irq_timer(struct hrtimer *t)
{
- struct gpio_button_data *bdata = from_timer(bdata, t, release_timer);
+ struct gpio_button_data *bdata = container_of(t,
+ struct gpio_button_data,
+ release_timer);
struct input_dev *input = bdata->input;
- unsigned long flags;
- spin_lock_irqsave(&bdata->lock, flags);
if (bdata->key_pressed) {
input_event(input, EV_KEY, *bdata->code, 0);
input_sync(input);
bdata->key_pressed = false;
}
- spin_unlock_irqrestore(&bdata->lock, flags);
+
+ return HRTIMER_NORESTART;
}
static irqreturn_t gpio_keys_irq_isr(int irq, void *dev_id)
}
if (bdata->release_delay)
- mod_timer(&bdata->release_timer,
- jiffies + msecs_to_jiffies(bdata->release_delay));
+ hrtimer_start(&bdata->release_timer,
+ ms_to_ktime(bdata->release_delay),
+ HRTIMER_MODE_REL_HARD);
out:
spin_unlock_irqrestore(&bdata->lock, flags);
return IRQ_HANDLED;
}
-static void gpio_keys_quiesce_key(void *data)
-{
- struct gpio_button_data *bdata = data;
-
- if (bdata->gpiod)
- cancel_delayed_work_sync(&bdata->work);
- else
- del_timer_sync(&bdata->release_timer);
-}
-
static int gpio_keys_setup_key(struct platform_device *pdev,
struct input_dev *input,
struct gpio_keys_drvdata *ddata,
if (error < 0)
bdata->software_debounce =
button->debounce_interval;
+
+ /*
+ * If reading the GPIO won't sleep, we can use a
+ * hrtimer instead of a standard timer for the software
+ * debounce, to reduce the latency as much as possible.
+ */
+ bdata->debounce_use_hrtimer =
+ !gpiod_cansleep(bdata->gpiod);
}
if (button->irq) {
INIT_DELAYED_WORK(&bdata->work, gpio_keys_gpio_work_func);
+ hrtimer_init(&bdata->debounce_timer,
+ CLOCK_REALTIME, HRTIMER_MODE_REL);
+ bdata->debounce_timer.function = gpio_keys_debounce_timer;
+
isr = gpio_keys_gpio_isr;
irqflags = IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING;
}
bdata->release_delay = button->debounce_interval;
- timer_setup(&bdata->release_timer, gpio_keys_irq_timer, 0);
+ hrtimer_init(&bdata->release_timer,
+ CLOCK_REALTIME, HRTIMER_MODE_REL_HARD);
+ bdata->release_timer.function = gpio_keys_irq_timer;
isr = gpio_keys_irq_isr;
irqflags = 0;
return -EIO;
}
-#ifdef CONFIG_OF
static const struct of_device_id imx_keypad_of_match[] = {
{ .compatible = "fsl,imx21-kpp", },
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, imx_keypad_of_match);
-#endif
static int imx_keypad_probe(struct platform_device *pdev)
{
- const struct matrix_keymap_data *keymap_data =
- dev_get_platdata(&pdev->dev);
struct imx_keypad *keypad;
struct input_dev *input_dev;
int irq, error, i, row, col;
- if (!keymap_data && !pdev->dev.of_node) {
- dev_err(&pdev->dev, "no keymap defined\n");
- return -EINVAL;
- }
-
irq = platform_get_irq(pdev, 0);
if (irq < 0)
return irq;
input_dev->open = imx_keypad_open;
input_dev->close = imx_keypad_close;
- error = matrix_keypad_build_keymap(keymap_data, NULL,
+ error = matrix_keypad_build_keymap(NULL, NULL,
MAX_MATRIX_KEY_ROWS,
MAX_MATRIX_KEY_COLS,
keypad->keycodes, input_dev);
.driver = {
.name = "imx-keypad",
.pm = &imx_kbd_pm_ops,
- .of_match_table = of_match_ptr(imx_keypad_of_match),
+ .of_match_table = imx_keypad_of_match,
},
.probe = imx_keypad_probe,
};
error = request_threaded_irq(chip->irqnum, NULL,
tca6416_keys_isr,
IRQF_TRIGGER_FALLING |
- IRQF_ONESHOT,
+ IRQF_ONESHOT | IRQF_NO_AUTOEN,
"tca6416-keypad", chip);
if (error) {
dev_dbg(&client->dev,
chip->irqnum, error);
goto fail1;
}
- disable_irq(chip->irqnum);
}
error = input_register_device(input);
input_set_drvdata(kbc->idev, kbc);
err = devm_request_irq(&pdev->dev, kbc->irq, tegra_kbc_isr,
- IRQF_TRIGGER_HIGH, pdev->name, kbc);
+ IRQF_TRIGGER_HIGH | IRQF_NO_AUTOEN,
+ pdev->name, kbc);
if (err) {
dev_err(&pdev->dev, "failed to request keyboard IRQ\n");
return err;
}
- disable_irq(kbc->irq);
-
err = input_register_device(kbc->idev);
if (err) {
dev_err(&pdev->dev, "failed to register input device\n");
To compile this driver as a module, choose M here: the
module will be called iqs269a.
+config INPUT_IQS626A
+ tristate "Azoteq IQS626A capacitive touch controller"
+ depends on I2C
+ select REGMAP_I2C
+ help
+ Say Y to enable support for the Azoteq IQS626A capacitive
+ touch controller.
+
+ To compile this driver as a module, choose M here: the
+ module will be called iqs626a.
+
config INPUT_CMA3000
tristate "VTI CMA3000 Tri-axis accelerometer"
help
obj-$(CONFIG_HP_SDC_RTC) += hp_sdc_rtc.o
obj-$(CONFIG_INPUT_IMS_PCU) += ims-pcu.o
obj-$(CONFIG_INPUT_IQS269A) += iqs269a.o
+obj-$(CONFIG_INPUT_IQS626A) += iqs626a.o
obj-$(CONFIG_INPUT_IXP4XX_BEEPER) += ixp4xx-beeper.o
obj-$(CONFIG_INPUT_KEYSPAN_REMOTE) += keyspan_remote.o
obj-$(CONFIG_INPUT_KXTJ9) += kxtj9.o
}
usb_set_intfdata(pcu->ctrl_intf, pcu);
- usb_set_intfdata(pcu->data_intf, pcu);
error = ims_pcu_buffers_alloc(pcu);
if (error)
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Azoteq IQS626A Capacitive Touch Controller
+ *
+ * Copyright (C) 2020 Jeff LaBundy <jeff@labundy.com>
+ *
+ * This driver registers up to 2 input devices: one representing capacitive or
+ * inductive keys as well as Hall-effect switches, and one for a trackpad that
+ * can express various gestures.
+ */
+
+#include <linux/bits.h>
+#include <linux/completion.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/input/touchscreen.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/property.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+#define IQS626_VER_INFO 0x00
+#define IQS626_VER_INFO_PROD_NUM 0x51
+
+#define IQS626_SYS_FLAGS 0x02
+#define IQS626_SYS_FLAGS_SHOW_RESET BIT(15)
+#define IQS626_SYS_FLAGS_IN_ATI BIT(12)
+#define IQS626_SYS_FLAGS_PWR_MODE_MASK GENMASK(9, 8)
+#define IQS626_SYS_FLAGS_PWR_MODE_SHIFT 8
+
+#define IQS626_HALL_OUTPUT 0x23
+
+#define IQS626_SYS_SETTINGS 0x80
+#define IQS626_SYS_SETTINGS_CLK_DIV BIT(15)
+#define IQS626_SYS_SETTINGS_ULP_AUTO BIT(14)
+#define IQS626_SYS_SETTINGS_DIS_AUTO BIT(13)
+#define IQS626_SYS_SETTINGS_PWR_MODE_MASK GENMASK(12, 11)
+#define IQS626_SYS_SETTINGS_PWR_MODE_SHIFT 11
+#define IQS626_SYS_SETTINGS_PWR_MODE_MAX 3
+#define IQS626_SYS_SETTINGS_ULP_UPDATE_MASK GENMASK(10, 8)
+#define IQS626_SYS_SETTINGS_ULP_UPDATE_SHIFT 8
+#define IQS626_SYS_SETTINGS_ULP_UPDATE_MAX 7
+#define IQS626_SYS_SETTINGS_EVENT_MODE BIT(5)
+#define IQS626_SYS_SETTINGS_EVENT_MODE_LP BIT(4)
+#define IQS626_SYS_SETTINGS_REDO_ATI BIT(2)
+#define IQS626_SYS_SETTINGS_ACK_RESET BIT(0)
+
+#define IQS626_MISC_A_ATI_BAND_DISABLE BIT(7)
+#define IQS626_MISC_A_TPx_LTA_UPDATE_MASK GENMASK(6, 4)
+#define IQS626_MISC_A_TPx_LTA_UPDATE_SHIFT 4
+#define IQS626_MISC_A_TPx_LTA_UPDATE_MAX 7
+#define IQS626_MISC_A_ATI_LP_ONLY BIT(3)
+#define IQS626_MISC_A_GPIO3_SELECT_MASK GENMASK(2, 0)
+#define IQS626_MISC_A_GPIO3_SELECT_MAX 7
+
+#define IQS626_EVENT_MASK_SYS BIT(6)
+#define IQS626_EVENT_MASK_GESTURE BIT(3)
+#define IQS626_EVENT_MASK_DEEP BIT(2)
+#define IQS626_EVENT_MASK_TOUCH BIT(1)
+#define IQS626_EVENT_MASK_PROX BIT(0)
+
+#define IQS626_RATE_NP_MS_MAX 255
+#define IQS626_RATE_LP_MS_MAX 255
+#define IQS626_RATE_ULP_MS_MAX 4080
+#define IQS626_TIMEOUT_PWR_MS_MAX 130560
+#define IQS626_TIMEOUT_LTA_MS_MAX 130560
+
+#define IQS626_MISC_B_RESEED_UI_SEL_MASK GENMASK(7, 6)
+#define IQS626_MISC_B_RESEED_UI_SEL_SHIFT 6
+#define IQS626_MISC_B_RESEED_UI_SEL_MAX 3
+#define IQS626_MISC_B_THRESH_EXTEND BIT(5)
+#define IQS626_MISC_B_TRACKING_UI_ENABLE BIT(4)
+#define IQS626_MISC_B_TPx_SWIPE BIT(3)
+#define IQS626_MISC_B_RESEED_OFFSET BIT(2)
+#define IQS626_MISC_B_FILT_STR_TPx GENMASK(1, 0)
+
+#define IQS626_THRESH_SWIPE_MAX 255
+#define IQS626_TIMEOUT_TAP_MS_MAX 4080
+#define IQS626_TIMEOUT_SWIPE_MS_MAX 4080
+
+#define IQS626_CHx_ENG_0_MEAS_CAP_SIZE BIT(7)
+#define IQS626_CHx_ENG_0_RX_TERM_VSS BIT(5)
+#define IQS626_CHx_ENG_0_LINEARIZE BIT(4)
+#define IQS626_CHx_ENG_0_DUAL_DIR BIT(3)
+#define IQS626_CHx_ENG_0_FILT_DISABLE BIT(2)
+#define IQS626_CHx_ENG_0_ATI_MODE_MASK GENMASK(1, 0)
+#define IQS626_CHx_ENG_0_ATI_MODE_MAX 3
+
+#define IQS626_CHx_ENG_1_CCT_HIGH_1 BIT(7)
+#define IQS626_CHx_ENG_1_CCT_HIGH_0 BIT(6)
+#define IQS626_CHx_ENG_1_PROJ_BIAS_MASK GENMASK(5, 4)
+#define IQS626_CHx_ENG_1_PROJ_BIAS_SHIFT 4
+#define IQS626_CHx_ENG_1_PROJ_BIAS_MAX 3
+#define IQS626_CHx_ENG_1_CCT_ENABLE BIT(3)
+#define IQS626_CHx_ENG_1_SENSE_FREQ_MASK GENMASK(2, 1)
+#define IQS626_CHx_ENG_1_SENSE_FREQ_SHIFT 1
+#define IQS626_CHx_ENG_1_SENSE_FREQ_MAX 3
+#define IQS626_CHx_ENG_1_ATI_BAND_TIGHTEN BIT(0)
+
+#define IQS626_CHx_ENG_2_LOCAL_CAP_MASK GENMASK(7, 6)
+#define IQS626_CHx_ENG_2_LOCAL_CAP_SHIFT 6
+#define IQS626_CHx_ENG_2_LOCAL_CAP_MAX 3
+#define IQS626_CHx_ENG_2_LOCAL_CAP_ENABLE BIT(5)
+#define IQS626_CHx_ENG_2_SENSE_MODE_MASK GENMASK(3, 0)
+#define IQS626_CHx_ENG_2_SENSE_MODE_MAX 15
+
+#define IQS626_CHx_ENG_3_TX_FREQ_MASK GENMASK(5, 4)
+#define IQS626_CHx_ENG_3_TX_FREQ_SHIFT 4
+#define IQS626_CHx_ENG_3_TX_FREQ_MAX 3
+#define IQS626_CHx_ENG_3_INV_LOGIC BIT(0)
+
+#define IQS626_CHx_ENG_4_RX_TERM_VREG BIT(6)
+#define IQS626_CHx_ENG_4_CCT_LOW_1 BIT(5)
+#define IQS626_CHx_ENG_4_CCT_LOW_0 BIT(4)
+#define IQS626_CHx_ENG_4_COMP_DISABLE BIT(1)
+#define IQS626_CHx_ENG_4_STATIC_ENABLE BIT(0)
+
+#define IQS626_TPx_ATI_BASE_MIN 45
+#define IQS626_TPx_ATI_BASE_MAX 300
+#define IQS626_CHx_ATI_BASE_MASK GENMASK(7, 6)
+#define IQS626_CHx_ATI_BASE_75 0x00
+#define IQS626_CHx_ATI_BASE_100 0x40
+#define IQS626_CHx_ATI_BASE_150 0x80
+#define IQS626_CHx_ATI_BASE_200 0xC0
+#define IQS626_CHx_ATI_TARGET_MASK GENMASK(5, 0)
+#define IQS626_CHx_ATI_TARGET_MAX 2016
+
+#define IQS626_CHx_THRESH_MAX 255
+#define IQS626_CHx_HYST_DEEP_MASK GENMASK(7, 4)
+#define IQS626_CHx_HYST_DEEP_SHIFT 4
+#define IQS626_CHx_HYST_TOUCH_MASK GENMASK(3, 0)
+#define IQS626_CHx_HYST_MAX 15
+
+#define IQS626_FILT_STR_NP_TPx_MASK GENMASK(7, 6)
+#define IQS626_FILT_STR_NP_TPx_SHIFT 6
+#define IQS626_FILT_STR_LP_TPx_MASK GENMASK(5, 4)
+#define IQS626_FILT_STR_LP_TPx_SHIFT 4
+
+#define IQS626_FILT_STR_NP_CNT_MASK GENMASK(7, 6)
+#define IQS626_FILT_STR_NP_CNT_SHIFT 6
+#define IQS626_FILT_STR_LP_CNT_MASK GENMASK(5, 4)
+#define IQS626_FILT_STR_LP_CNT_SHIFT 4
+#define IQS626_FILT_STR_NP_LTA_MASK GENMASK(3, 2)
+#define IQS626_FILT_STR_NP_LTA_SHIFT 2
+#define IQS626_FILT_STR_LP_LTA_MASK GENMASK(1, 0)
+#define IQS626_FILT_STR_MAX 3
+
+#define IQS626_ULP_PROJ_ENABLE BIT(4)
+#define IQS626_GEN_WEIGHT_MAX 255
+
+#define IQS626_MAX_REG 0xFF
+
+#define IQS626_NUM_CH_TP_3 9
+#define IQS626_NUM_CH_TP_2 6
+#define IQS626_NUM_CH_GEN 3
+#define IQS626_NUM_CRx_TX 8
+
+#define IQS626_PWR_MODE_POLL_SLEEP_US 50000
+#define IQS626_PWR_MODE_POLL_TIMEOUT_US 500000
+
+#define iqs626_irq_wait() usleep_range(350, 400)
+
+enum iqs626_ch_id {
+ IQS626_CH_ULP_0,
+ IQS626_CH_TP_2,
+ IQS626_CH_TP_3,
+ IQS626_CH_GEN_0,
+ IQS626_CH_GEN_1,
+ IQS626_CH_GEN_2,
+ IQS626_CH_HALL,
+};
+
+enum iqs626_rx_inactive {
+ IQS626_RX_INACTIVE_VSS,
+ IQS626_RX_INACTIVE_FLOAT,
+ IQS626_RX_INACTIVE_VREG,
+};
+
+enum iqs626_st_offs {
+ IQS626_ST_OFFS_PROX,
+ IQS626_ST_OFFS_DIR,
+ IQS626_ST_OFFS_TOUCH,
+ IQS626_ST_OFFS_DEEP,
+};
+
+enum iqs626_th_offs {
+ IQS626_TH_OFFS_PROX,
+ IQS626_TH_OFFS_TOUCH,
+ IQS626_TH_OFFS_DEEP,
+};
+
+enum iqs626_event_id {
+ IQS626_EVENT_PROX_DN,
+ IQS626_EVENT_PROX_UP,
+ IQS626_EVENT_TOUCH_DN,
+ IQS626_EVENT_TOUCH_UP,
+ IQS626_EVENT_DEEP_DN,
+ IQS626_EVENT_DEEP_UP,
+};
+
+enum iqs626_gesture_id {
+ IQS626_GESTURE_FLICK_X_POS,
+ IQS626_GESTURE_FLICK_X_NEG,
+ IQS626_GESTURE_FLICK_Y_POS,
+ IQS626_GESTURE_FLICK_Y_NEG,
+ IQS626_GESTURE_TAP,
+ IQS626_GESTURE_HOLD,
+ IQS626_NUM_GESTURES,
+};
+
+struct iqs626_event_desc {
+ const char *name;
+ enum iqs626_st_offs st_offs;
+ enum iqs626_th_offs th_offs;
+ bool dir_up;
+ u8 mask;
+};
+
+static const struct iqs626_event_desc iqs626_events[] = {
+ [IQS626_EVENT_PROX_DN] = {
+ .name = "event-prox",
+ .st_offs = IQS626_ST_OFFS_PROX,
+ .th_offs = IQS626_TH_OFFS_PROX,
+ .mask = IQS626_EVENT_MASK_PROX,
+ },
+ [IQS626_EVENT_PROX_UP] = {
+ .name = "event-prox-alt",
+ .st_offs = IQS626_ST_OFFS_PROX,
+ .th_offs = IQS626_TH_OFFS_PROX,
+ .dir_up = true,
+ .mask = IQS626_EVENT_MASK_PROX,
+ },
+ [IQS626_EVENT_TOUCH_DN] = {
+ .name = "event-touch",
+ .st_offs = IQS626_ST_OFFS_TOUCH,
+ .th_offs = IQS626_TH_OFFS_TOUCH,
+ .mask = IQS626_EVENT_MASK_TOUCH,
+ },
+ [IQS626_EVENT_TOUCH_UP] = {
+ .name = "event-touch-alt",
+ .st_offs = IQS626_ST_OFFS_TOUCH,
+ .th_offs = IQS626_TH_OFFS_TOUCH,
+ .dir_up = true,
+ .mask = IQS626_EVENT_MASK_TOUCH,
+ },
+ [IQS626_EVENT_DEEP_DN] = {
+ .name = "event-deep",
+ .st_offs = IQS626_ST_OFFS_DEEP,
+ .th_offs = IQS626_TH_OFFS_DEEP,
+ .mask = IQS626_EVENT_MASK_DEEP,
+ },
+ [IQS626_EVENT_DEEP_UP] = {
+ .name = "event-deep-alt",
+ .st_offs = IQS626_ST_OFFS_DEEP,
+ .th_offs = IQS626_TH_OFFS_DEEP,
+ .dir_up = true,
+ .mask = IQS626_EVENT_MASK_DEEP,
+ },
+};
+
+struct iqs626_ver_info {
+ u8 prod_num;
+ u8 sw_num;
+ u8 hw_num;
+ u8 padding;
+} __packed;
+
+struct iqs626_flags {
+ __be16 system;
+ u8 gesture;
+ u8 padding_a;
+ u8 states[4];
+ u8 ref_active;
+ u8 padding_b;
+ u8 comp_min;
+ u8 comp_max;
+ u8 trackpad_x;
+ u8 trackpad_y;
+} __packed;
+
+struct iqs626_ch_reg_ulp {
+ u8 thresh[2];
+ u8 hyst;
+ u8 filter;
+ u8 engine[2];
+ u8 ati_target;
+ u8 padding;
+ __be16 ati_comp;
+ u8 rx_enable;
+ u8 tx_enable;
+} __packed;
+
+struct iqs626_ch_reg_tp {
+ u8 thresh;
+ u8 ati_base;
+ __be16 ati_comp;
+} __packed;
+
+struct iqs626_tp_grp_reg {
+ u8 hyst;
+ u8 ati_target;
+ u8 engine[2];
+ struct iqs626_ch_reg_tp ch_reg_tp[IQS626_NUM_CH_TP_3];
+} __packed;
+
+struct iqs626_ch_reg_gen {
+ u8 thresh[3];
+ u8 padding;
+ u8 hyst;
+ u8 ati_target;
+ __be16 ati_comp;
+ u8 engine[5];
+ u8 filter;
+ u8 rx_enable;
+ u8 tx_enable;
+ u8 assoc_select;
+ u8 assoc_weight;
+} __packed;
+
+struct iqs626_ch_reg_hall {
+ u8 engine;
+ u8 thresh;
+ u8 hyst;
+ u8 ati_target;
+ __be16 ati_comp;
+} __packed;
+
+struct iqs626_sys_reg {
+ __be16 general;
+ u8 misc_a;
+ u8 event_mask;
+ u8 active;
+ u8 reseed;
+ u8 rate_np;
+ u8 rate_lp;
+ u8 rate_ulp;
+ u8 timeout_pwr;
+ u8 timeout_rdy;
+ u8 timeout_lta;
+ u8 misc_b;
+ u8 thresh_swipe;
+ u8 timeout_tap;
+ u8 timeout_swipe;
+ u8 redo_ati;
+ u8 padding;
+ struct iqs626_ch_reg_ulp ch_reg_ulp;
+ struct iqs626_tp_grp_reg tp_grp_reg;
+ struct iqs626_ch_reg_gen ch_reg_gen[IQS626_NUM_CH_GEN];
+ struct iqs626_ch_reg_hall ch_reg_hall;
+} __packed;
+
+struct iqs626_channel_desc {
+ const char *name;
+ int num_ch;
+ u8 active;
+ bool events[ARRAY_SIZE(iqs626_events)];
+};
+
+static const struct iqs626_channel_desc iqs626_channels[] = {
+ [IQS626_CH_ULP_0] = {
+ .name = "ulp-0",
+ .num_ch = 1,
+ .active = BIT(0),
+ .events = {
+ [IQS626_EVENT_PROX_DN] = true,
+ [IQS626_EVENT_PROX_UP] = true,
+ [IQS626_EVENT_TOUCH_DN] = true,
+ [IQS626_EVENT_TOUCH_UP] = true,
+ },
+ },
+ [IQS626_CH_TP_2] = {
+ .name = "trackpad-3x2",
+ .num_ch = IQS626_NUM_CH_TP_2,
+ .active = BIT(1),
+ .events = {
+ [IQS626_EVENT_TOUCH_DN] = true,
+ },
+ },
+ [IQS626_CH_TP_3] = {
+ .name = "trackpad-3x3",
+ .num_ch = IQS626_NUM_CH_TP_3,
+ .active = BIT(2) | BIT(1),
+ .events = {
+ [IQS626_EVENT_TOUCH_DN] = true,
+ },
+ },
+ [IQS626_CH_GEN_0] = {
+ .name = "generic-0",
+ .num_ch = 1,
+ .active = BIT(4),
+ .events = {
+ [IQS626_EVENT_PROX_DN] = true,
+ [IQS626_EVENT_PROX_UP] = true,
+ [IQS626_EVENT_TOUCH_DN] = true,
+ [IQS626_EVENT_TOUCH_UP] = true,
+ [IQS626_EVENT_DEEP_DN] = true,
+ [IQS626_EVENT_DEEP_UP] = true,
+ },
+ },
+ [IQS626_CH_GEN_1] = {
+ .name = "generic-1",
+ .num_ch = 1,
+ .active = BIT(5),
+ .events = {
+ [IQS626_EVENT_PROX_DN] = true,
+ [IQS626_EVENT_PROX_UP] = true,
+ [IQS626_EVENT_TOUCH_DN] = true,
+ [IQS626_EVENT_TOUCH_UP] = true,
+ [IQS626_EVENT_DEEP_DN] = true,
+ [IQS626_EVENT_DEEP_UP] = true,
+ },
+ },
+ [IQS626_CH_GEN_2] = {
+ .name = "generic-2",
+ .num_ch = 1,
+ .active = BIT(6),
+ .events = {
+ [IQS626_EVENT_PROX_DN] = true,
+ [IQS626_EVENT_PROX_UP] = true,
+ [IQS626_EVENT_TOUCH_DN] = true,
+ [IQS626_EVENT_TOUCH_UP] = true,
+ [IQS626_EVENT_DEEP_DN] = true,
+ [IQS626_EVENT_DEEP_UP] = true,
+ },
+ },
+ [IQS626_CH_HALL] = {
+ .name = "hall",
+ .num_ch = 1,
+ .active = BIT(7),
+ .events = {
+ [IQS626_EVENT_TOUCH_DN] = true,
+ [IQS626_EVENT_TOUCH_UP] = true,
+ },
+ },
+};
+
+struct iqs626_private {
+ struct i2c_client *client;
+ struct regmap *regmap;
+ struct iqs626_sys_reg sys_reg;
+ struct completion ati_done;
+ struct input_dev *keypad;
+ struct input_dev *trackpad;
+ struct touchscreen_properties prop;
+ unsigned int kp_type[ARRAY_SIZE(iqs626_channels)]
+ [ARRAY_SIZE(iqs626_events)];
+ unsigned int kp_code[ARRAY_SIZE(iqs626_channels)]
+ [ARRAY_SIZE(iqs626_events)];
+ unsigned int tp_code[IQS626_NUM_GESTURES];
+ unsigned int suspend_mode;
+};
+
+static int iqs626_parse_events(struct iqs626_private *iqs626,
+ const struct fwnode_handle *ch_node,
+ enum iqs626_ch_id ch_id)
+{
+ struct iqs626_sys_reg *sys_reg = &iqs626->sys_reg;
+ struct i2c_client *client = iqs626->client;
+ const struct fwnode_handle *ev_node;
+ const char *ev_name;
+ u8 *thresh, *hyst;
+ unsigned int thresh_tp[IQS626_NUM_CH_TP_3];
+ unsigned int val;
+ int num_ch = iqs626_channels[ch_id].num_ch;
+ int error, i, j;
+
+ switch (ch_id) {
+ case IQS626_CH_ULP_0:
+ thresh = sys_reg->ch_reg_ulp.thresh;
+ hyst = &sys_reg->ch_reg_ulp.hyst;
+ break;
+
+ case IQS626_CH_TP_2:
+ case IQS626_CH_TP_3:
+ thresh = &sys_reg->tp_grp_reg.ch_reg_tp[0].thresh;
+ hyst = &sys_reg->tp_grp_reg.hyst;
+ break;
+
+ case IQS626_CH_GEN_0:
+ case IQS626_CH_GEN_1:
+ case IQS626_CH_GEN_2:
+ i = ch_id - IQS626_CH_GEN_0;
+ thresh = sys_reg->ch_reg_gen[i].thresh;
+ hyst = &sys_reg->ch_reg_gen[i].hyst;
+ break;
+
+ case IQS626_CH_HALL:
+ thresh = &sys_reg->ch_reg_hall.thresh;
+ hyst = &sys_reg->ch_reg_hall.hyst;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(iqs626_events); i++) {
+ if (!iqs626_channels[ch_id].events[i])
+ continue;
+
+ if (ch_id == IQS626_CH_TP_2 || ch_id == IQS626_CH_TP_3) {
+ /*
+ * Trackpad touch events are simply described under the
+ * trackpad child node.
+ */
+ ev_node = ch_node;
+ } else {
+ ev_name = iqs626_events[i].name;
+ ev_node = fwnode_get_named_child_node(ch_node, ev_name);
+ if (!ev_node)
+ continue;
+
+ if (!fwnode_property_read_u32(ev_node, "linux,code",
+ &val)) {
+ iqs626->kp_code[ch_id][i] = val;
+
+ if (fwnode_property_read_u32(ev_node,
+ "linux,input-type",
+ &val)) {
+ if (ch_id == IQS626_CH_HALL)
+ val = EV_SW;
+ else
+ val = EV_KEY;
+ }
+
+ if (val != EV_KEY && val != EV_SW) {
+ dev_err(&client->dev,
+ "Invalid input type: %u\n",
+ val);
+ return -EINVAL;
+ }
+
+ iqs626->kp_type[ch_id][i] = val;
+
+ sys_reg->event_mask &= ~iqs626_events[i].mask;
+ }
+ }
+
+ if (!fwnode_property_read_u32(ev_node, "azoteq,hyst", &val)) {
+ if (val > IQS626_CHx_HYST_MAX) {
+ dev_err(&client->dev,
+ "Invalid %s channel hysteresis: %u\n",
+ fwnode_get_name(ch_node), val);
+ return -EINVAL;
+ }
+
+ if (i == IQS626_EVENT_DEEP_DN ||
+ i == IQS626_EVENT_DEEP_UP) {
+ *hyst &= ~IQS626_CHx_HYST_DEEP_MASK;
+ *hyst |= (val << IQS626_CHx_HYST_DEEP_SHIFT);
+ } else if (i == IQS626_EVENT_TOUCH_DN ||
+ i == IQS626_EVENT_TOUCH_UP) {
+ *hyst &= ~IQS626_CHx_HYST_TOUCH_MASK;
+ *hyst |= val;
+ }
+ }
+
+ if (ch_id != IQS626_CH_TP_2 && ch_id != IQS626_CH_TP_3 &&
+ !fwnode_property_read_u32(ev_node, "azoteq,thresh", &val)) {
+ if (val > IQS626_CHx_THRESH_MAX) {
+ dev_err(&client->dev,
+ "Invalid %s channel threshold: %u\n",
+ fwnode_get_name(ch_node), val);
+ return -EINVAL;
+ }
+
+ if (ch_id == IQS626_CH_HALL)
+ *thresh = val;
+ else
+ *(thresh + iqs626_events[i].th_offs) = val;
+
+ continue;
+ }
+
+ if (!fwnode_property_present(ev_node, "azoteq,thresh"))
+ continue;
+
+ error = fwnode_property_read_u32_array(ev_node, "azoteq,thresh",
+ thresh_tp, num_ch);
+ if (error) {
+ dev_err(&client->dev,
+ "Failed to read %s channel thresholds: %d\n",
+ fwnode_get_name(ch_node), error);
+ return error;
+ }
+
+ for (j = 0; j < num_ch; j++) {
+ if (thresh_tp[j] > IQS626_CHx_THRESH_MAX) {
+ dev_err(&client->dev,
+ "Invalid %s channel threshold: %u\n",
+ fwnode_get_name(ch_node), thresh_tp[j]);
+ return -EINVAL;
+ }
+
+ sys_reg->tp_grp_reg.ch_reg_tp[j].thresh = thresh_tp[j];
+ }
+ }
+
+ return 0;
+}
+
+static int iqs626_parse_ati_target(struct iqs626_private *iqs626,
+ const struct fwnode_handle *ch_node,
+ enum iqs626_ch_id ch_id)
+{
+ struct iqs626_sys_reg *sys_reg = &iqs626->sys_reg;
+ struct i2c_client *client = iqs626->client;
+ unsigned int ati_base[IQS626_NUM_CH_TP_3];
+ unsigned int val;
+ u8 *ati_target;
+ int num_ch = iqs626_channels[ch_id].num_ch;
+ int error, i;
+
+ switch (ch_id) {
+ case IQS626_CH_ULP_0:
+ ati_target = &sys_reg->ch_reg_ulp.ati_target;
+ break;
+
+ case IQS626_CH_TP_2:
+ case IQS626_CH_TP_3:
+ ati_target = &sys_reg->tp_grp_reg.ati_target;
+ break;
+
+ case IQS626_CH_GEN_0:
+ case IQS626_CH_GEN_1:
+ case IQS626_CH_GEN_2:
+ i = ch_id - IQS626_CH_GEN_0;
+ ati_target = &sys_reg->ch_reg_gen[i].ati_target;
+ break;
+
+ case IQS626_CH_HALL:
+ ati_target = &sys_reg->ch_reg_hall.ati_target;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ if (!fwnode_property_read_u32(ch_node, "azoteq,ati-target", &val)) {
+ if (val > IQS626_CHx_ATI_TARGET_MAX) {
+ dev_err(&client->dev,
+ "Invalid %s channel ATI target: %u\n",
+ fwnode_get_name(ch_node), val);
+ return -EINVAL;
+ }
+
+ *ati_target &= ~IQS626_CHx_ATI_TARGET_MASK;
+ *ati_target |= (val / 32);
+ }
+
+ if (ch_id != IQS626_CH_TP_2 && ch_id != IQS626_CH_TP_3 &&
+ !fwnode_property_read_u32(ch_node, "azoteq,ati-base", &val)) {
+ switch (val) {
+ case 75:
+ val = IQS626_CHx_ATI_BASE_75;
+ break;
+
+ case 100:
+ val = IQS626_CHx_ATI_BASE_100;
+ break;
+
+ case 150:
+ val = IQS626_CHx_ATI_BASE_150;
+ break;
+
+ case 200:
+ val = IQS626_CHx_ATI_BASE_200;
+ break;
+
+ default:
+ dev_err(&client->dev,
+ "Invalid %s channel ATI base: %u\n",
+ fwnode_get_name(ch_node), val);
+ return -EINVAL;
+ }
+
+ *ati_target &= ~IQS626_CHx_ATI_BASE_MASK;
+ *ati_target |= val;
+
+ return 0;
+ }
+
+ if (!fwnode_property_present(ch_node, "azoteq,ati-base"))
+ return 0;
+
+ error = fwnode_property_read_u32_array(ch_node, "azoteq,ati-base",
+ ati_base, num_ch);
+ if (error) {
+ dev_err(&client->dev,
+ "Failed to read %s channel ATI bases: %d\n",
+ fwnode_get_name(ch_node), error);
+ return error;
+ }
+
+ for (i = 0; i < num_ch; i++) {
+ if (ati_base[i] < IQS626_TPx_ATI_BASE_MIN ||
+ ati_base[i] > IQS626_TPx_ATI_BASE_MAX) {
+ dev_err(&client->dev,
+ "Invalid %s channel ATI base: %u\n",
+ fwnode_get_name(ch_node), ati_base[i]);
+ return -EINVAL;
+ }
+
+ ati_base[i] -= IQS626_TPx_ATI_BASE_MIN;
+ sys_reg->tp_grp_reg.ch_reg_tp[i].ati_base = ati_base[i];
+ }
+
+ return 0;
+}
+
+static int iqs626_parse_pins(struct iqs626_private *iqs626,
+ const struct fwnode_handle *ch_node,
+ const char *propname, u8 *enable)
+{
+ struct i2c_client *client = iqs626->client;
+ unsigned int val[IQS626_NUM_CRx_TX];
+ int error, count, i;
+
+ if (!fwnode_property_present(ch_node, propname))
+ return 0;
+
+ count = fwnode_property_count_u32(ch_node, propname);
+ if (count > IQS626_NUM_CRx_TX) {
+ dev_err(&client->dev,
+ "Too many %s channel CRX/TX pins present\n",
+ fwnode_get_name(ch_node));
+ return -EINVAL;
+ } else if (count < 0) {
+ dev_err(&client->dev,
+ "Failed to count %s channel CRX/TX pins: %d\n",
+ fwnode_get_name(ch_node), count);
+ return count;
+ }
+
+ error = fwnode_property_read_u32_array(ch_node, propname, val, count);
+ if (error) {
+ dev_err(&client->dev,
+ "Failed to read %s channel CRX/TX pins: %d\n",
+ fwnode_get_name(ch_node), error);
+ return error;
+ }
+
+ *enable = 0;
+
+ for (i = 0; i < count; i++) {
+ if (val[i] >= IQS626_NUM_CRx_TX) {
+ dev_err(&client->dev,
+ "Invalid %s channel CRX/TX pin: %u\n",
+ fwnode_get_name(ch_node), val[i]);
+ return -EINVAL;
+ }
+
+ *enable |= BIT(val[i]);
+ }
+
+ return 0;
+}
+
+static int iqs626_parse_trackpad(struct iqs626_private *iqs626,
+ const struct fwnode_handle *ch_node)
+{
+ struct iqs626_sys_reg *sys_reg = &iqs626->sys_reg;
+ struct i2c_client *client = iqs626->client;
+ u8 *hyst = &sys_reg->tp_grp_reg.hyst;
+ unsigned int val;
+ int error, count;
+
+ if (!fwnode_property_read_u32(ch_node, "azoteq,lta-update", &val)) {
+ if (val > IQS626_MISC_A_TPx_LTA_UPDATE_MAX) {
+ dev_err(&client->dev,
+ "Invalid %s channel update rate: %u\n",
+ fwnode_get_name(ch_node), val);
+ return -EINVAL;
+ }
+
+ sys_reg->misc_a &= ~IQS626_MISC_A_TPx_LTA_UPDATE_MASK;
+ sys_reg->misc_a |= (val << IQS626_MISC_A_TPx_LTA_UPDATE_SHIFT);
+ }
+
+ if (!fwnode_property_read_u32(ch_node, "azoteq,filt-str-trackpad",
+ &val)) {
+ if (val > IQS626_FILT_STR_MAX) {
+ dev_err(&client->dev,
+ "Invalid %s channel filter strength: %u\n",
+ fwnode_get_name(ch_node), val);
+ return -EINVAL;
+ }
+
+ sys_reg->misc_b &= ~IQS626_MISC_B_FILT_STR_TPx;
+ sys_reg->misc_b |= val;
+ }
+
+ if (!fwnode_property_read_u32(ch_node, "azoteq,filt-str-np-cnt",
+ &val)) {
+ if (val > IQS626_FILT_STR_MAX) {
+ dev_err(&client->dev,
+ "Invalid %s channel filter strength: %u\n",
+ fwnode_get_name(ch_node), val);
+ return -EINVAL;
+ }
+
+ *hyst &= ~IQS626_FILT_STR_NP_TPx_MASK;
+ *hyst |= (val << IQS626_FILT_STR_NP_TPx_SHIFT);
+ }
+
+ if (!fwnode_property_read_u32(ch_node, "azoteq,filt-str-lp-cnt",
+ &val)) {
+ if (val > IQS626_FILT_STR_MAX) {
+ dev_err(&client->dev,
+ "Invalid %s channel filter strength: %u\n",
+ fwnode_get_name(ch_node), val);
+ return -EINVAL;
+ }
+
+ *hyst &= ~IQS626_FILT_STR_LP_TPx_MASK;
+ *hyst |= (val << IQS626_FILT_STR_LP_TPx_SHIFT);
+ }
+
+ if (!fwnode_property_present(ch_node, "linux,keycodes"))
+ return 0;
+
+ count = fwnode_property_count_u32(ch_node, "linux,keycodes");
+ if (count > IQS626_NUM_GESTURES) {
+ dev_err(&client->dev, "Too many keycodes present\n");
+ return -EINVAL;
+ } else if (count < 0) {
+ dev_err(&client->dev, "Failed to count keycodes: %d\n", count);
+ return count;
+ }
+
+ error = fwnode_property_read_u32_array(ch_node, "linux,keycodes",
+ iqs626->tp_code, count);
+ if (error) {
+ dev_err(&client->dev, "Failed to read keycodes: %d\n", error);
+ return error;
+ }
+
+ sys_reg->misc_b &= ~IQS626_MISC_B_TPx_SWIPE;
+ if (fwnode_property_present(ch_node, "azoteq,gesture-swipe"))
+ sys_reg->misc_b |= IQS626_MISC_B_TPx_SWIPE;
+
+ if (!fwnode_property_read_u32(ch_node, "azoteq,timeout-tap-ms",
+ &val)) {
+ if (val > IQS626_TIMEOUT_TAP_MS_MAX) {
+ dev_err(&client->dev,
+ "Invalid %s channel timeout: %u\n",
+ fwnode_get_name(ch_node), val);
+ return -EINVAL;
+ }
+
+ sys_reg->timeout_tap = val / 16;
+ }
+
+ if (!fwnode_property_read_u32(ch_node, "azoteq,timeout-swipe-ms",
+ &val)) {
+ if (val > IQS626_TIMEOUT_SWIPE_MS_MAX) {
+ dev_err(&client->dev,
+ "Invalid %s channel timeout: %u\n",
+ fwnode_get_name(ch_node), val);
+ return -EINVAL;
+ }
+
+ sys_reg->timeout_swipe = val / 16;
+ }
+
+ if (!fwnode_property_read_u32(ch_node, "azoteq,thresh-swipe",
+ &val)) {
+ if (val > IQS626_THRESH_SWIPE_MAX) {
+ dev_err(&client->dev,
+ "Invalid %s channel threshold: %u\n",
+ fwnode_get_name(ch_node), val);
+ return -EINVAL;
+ }
+
+ sys_reg->thresh_swipe = val;
+ }
+
+ sys_reg->event_mask &= ~IQS626_EVENT_MASK_GESTURE;
+
+ return 0;
+}
+
+static int iqs626_parse_channel(struct iqs626_private *iqs626,
+ const struct fwnode_handle *ch_node,
+ enum iqs626_ch_id ch_id)
+{
+ struct iqs626_sys_reg *sys_reg = &iqs626->sys_reg;
+ struct i2c_client *client = iqs626->client;
+ u8 *engine, *filter, *rx_enable, *tx_enable;
+ u8 *assoc_select, *assoc_weight;
+ unsigned int val;
+ int error, i;
+
+ switch (ch_id) {
+ case IQS626_CH_ULP_0:
+ engine = sys_reg->ch_reg_ulp.engine;
+ break;
+
+ case IQS626_CH_TP_2:
+ case IQS626_CH_TP_3:
+ engine = sys_reg->tp_grp_reg.engine;
+ break;
+
+ case IQS626_CH_GEN_0:
+ case IQS626_CH_GEN_1:
+ case IQS626_CH_GEN_2:
+ i = ch_id - IQS626_CH_GEN_0;
+ engine = sys_reg->ch_reg_gen[i].engine;
+ break;
+
+ case IQS626_CH_HALL:
+ engine = &sys_reg->ch_reg_hall.engine;
+ break;
+
+ default:
+ return -EINVAL;
+ }
+
+ *engine |= IQS626_CHx_ENG_0_MEAS_CAP_SIZE;
+ if (fwnode_property_present(ch_node, "azoteq,meas-cap-decrease"))
+ *engine &= ~IQS626_CHx_ENG_0_MEAS_CAP_SIZE;
+
+ *engine |= IQS626_CHx_ENG_0_RX_TERM_VSS;
+ if (!fwnode_property_read_u32(ch_node, "azoteq,rx-inactive", &val)) {
+ switch (val) {
+ case IQS626_RX_INACTIVE_VSS:
+ break;
+
+ case IQS626_RX_INACTIVE_FLOAT:
+ *engine &= ~IQS626_CHx_ENG_0_RX_TERM_VSS;
+ if (ch_id == IQS626_CH_GEN_0 ||
+ ch_id == IQS626_CH_GEN_1 ||
+ ch_id == IQS626_CH_GEN_2)
+ *(engine + 4) &= ~IQS626_CHx_ENG_4_RX_TERM_VREG;
+ break;
+
+ case IQS626_RX_INACTIVE_VREG:
+ if (ch_id == IQS626_CH_GEN_0 ||
+ ch_id == IQS626_CH_GEN_1 ||
+ ch_id == IQS626_CH_GEN_2) {
+ *engine &= ~IQS626_CHx_ENG_0_RX_TERM_VSS;
+ *(engine + 4) |= IQS626_CHx_ENG_4_RX_TERM_VREG;
+ break;
+ }
+ fallthrough;
+
+ default:
+ dev_err(&client->dev,
+ "Invalid %s channel CRX pin termination: %u\n",
+ fwnode_get_name(ch_node), val);
+ return -EINVAL;
+ }
+ }
+
+ *engine &= ~IQS626_CHx_ENG_0_LINEARIZE;
+ if (fwnode_property_present(ch_node, "azoteq,linearize"))
+ *engine |= IQS626_CHx_ENG_0_LINEARIZE;
+
+ *engine &= ~IQS626_CHx_ENG_0_DUAL_DIR;
+ if (fwnode_property_present(ch_node, "azoteq,dual-direction"))
+ *engine |= IQS626_CHx_ENG_0_DUAL_DIR;
+
+ *engine &= ~IQS626_CHx_ENG_0_FILT_DISABLE;
+ if (fwnode_property_present(ch_node, "azoteq,filt-disable"))
+ *engine |= IQS626_CHx_ENG_0_FILT_DISABLE;
+
+ if (!fwnode_property_read_u32(ch_node, "azoteq,ati-mode", &val)) {
+ if (val > IQS626_CHx_ENG_0_ATI_MODE_MAX) {
+ dev_err(&client->dev,
+ "Invalid %s channel ATI mode: %u\n",
+ fwnode_get_name(ch_node), val);
+ return -EINVAL;
+ }
+
+ *engine &= ~IQS626_CHx_ENG_0_ATI_MODE_MASK;
+ *engine |= val;
+ }
+
+ if (ch_id == IQS626_CH_HALL)
+ return 0;
+
+ *(engine + 1) &= ~IQS626_CHx_ENG_1_CCT_ENABLE;
+ if (!fwnode_property_read_u32(ch_node, "azoteq,cct-increase",
+ &val) && val) {
+ unsigned int orig_val = val--;
+
+ /*
+ * In the case of the generic channels, the charge cycle time
+ * field doubles in size and straddles two separate registers.
+ */
+ if (ch_id == IQS626_CH_GEN_0 ||
+ ch_id == IQS626_CH_GEN_1 ||
+ ch_id == IQS626_CH_GEN_2) {
+ *(engine + 4) &= ~IQS626_CHx_ENG_4_CCT_LOW_1;
+ if (val & BIT(1))
+ *(engine + 4) |= IQS626_CHx_ENG_4_CCT_LOW_1;
+
+ *(engine + 4) &= ~IQS626_CHx_ENG_4_CCT_LOW_0;
+ if (val & BIT(0))
+ *(engine + 4) |= IQS626_CHx_ENG_4_CCT_LOW_0;
+
+ val >>= 2;
+ }
+
+ if (val & ~GENMASK(1, 0)) {
+ dev_err(&client->dev,
+ "Invalid %s channel charge cycle time: %u\n",
+ fwnode_get_name(ch_node), orig_val);
+ return -EINVAL;
+ }
+
+ *(engine + 1) &= ~IQS626_CHx_ENG_1_CCT_HIGH_1;
+ if (val & BIT(1))
+ *(engine + 1) |= IQS626_CHx_ENG_1_CCT_HIGH_1;
+
+ *(engine + 1) &= ~IQS626_CHx_ENG_1_CCT_HIGH_0;
+ if (val & BIT(0))
+ *(engine + 1) |= IQS626_CHx_ENG_1_CCT_HIGH_0;
+
+ *(engine + 1) |= IQS626_CHx_ENG_1_CCT_ENABLE;
+ }
+
+ if (!fwnode_property_read_u32(ch_node, "azoteq,proj-bias", &val)) {
+ if (val > IQS626_CHx_ENG_1_PROJ_BIAS_MAX) {
+ dev_err(&client->dev,
+ "Invalid %s channel bias current: %u\n",
+ fwnode_get_name(ch_node), val);
+ return -EINVAL;
+ }
+
+ *(engine + 1) &= ~IQS626_CHx_ENG_1_PROJ_BIAS_MASK;
+ *(engine + 1) |= (val << IQS626_CHx_ENG_1_PROJ_BIAS_SHIFT);
+ }
+
+ if (!fwnode_property_read_u32(ch_node, "azoteq,sense-freq", &val)) {
+ if (val > IQS626_CHx_ENG_1_SENSE_FREQ_MAX) {
+ dev_err(&client->dev,
+ "Invalid %s channel sensing frequency: %u\n",
+ fwnode_get_name(ch_node), val);
+ return -EINVAL;
+ }
+
+ *(engine + 1) &= ~IQS626_CHx_ENG_1_SENSE_FREQ_MASK;
+ *(engine + 1) |= (val << IQS626_CHx_ENG_1_SENSE_FREQ_SHIFT);
+ }
+
+ *(engine + 1) &= ~IQS626_CHx_ENG_1_ATI_BAND_TIGHTEN;
+ if (fwnode_property_present(ch_node, "azoteq,ati-band-tighten"))
+ *(engine + 1) |= IQS626_CHx_ENG_1_ATI_BAND_TIGHTEN;
+
+ if (ch_id == IQS626_CH_TP_2 || ch_id == IQS626_CH_TP_3)
+ return iqs626_parse_trackpad(iqs626, ch_node);
+
+ if (ch_id == IQS626_CH_ULP_0) {
+ sys_reg->ch_reg_ulp.hyst &= ~IQS626_ULP_PROJ_ENABLE;
+ if (fwnode_property_present(ch_node, "azoteq,proj-enable"))
+ sys_reg->ch_reg_ulp.hyst |= IQS626_ULP_PROJ_ENABLE;
+
+ filter = &sys_reg->ch_reg_ulp.filter;
+
+ rx_enable = &sys_reg->ch_reg_ulp.rx_enable;
+ tx_enable = &sys_reg->ch_reg_ulp.tx_enable;
+ } else {
+ i = ch_id - IQS626_CH_GEN_0;
+ filter = &sys_reg->ch_reg_gen[i].filter;
+
+ rx_enable = &sys_reg->ch_reg_gen[i].rx_enable;
+ tx_enable = &sys_reg->ch_reg_gen[i].tx_enable;
+ }
+
+ if (!fwnode_property_read_u32(ch_node, "azoteq,filt-str-np-cnt",
+ &val)) {
+ if (val > IQS626_FILT_STR_MAX) {
+ dev_err(&client->dev,
+ "Invalid %s channel filter strength: %u\n",
+ fwnode_get_name(ch_node), val);
+ return -EINVAL;
+ }
+
+ *filter &= ~IQS626_FILT_STR_NP_CNT_MASK;
+ *filter |= (val << IQS626_FILT_STR_NP_CNT_SHIFT);
+ }
+
+ if (!fwnode_property_read_u32(ch_node, "azoteq,filt-str-lp-cnt",
+ &val)) {
+ if (val > IQS626_FILT_STR_MAX) {
+ dev_err(&client->dev,
+ "Invalid %s channel filter strength: %u\n",
+ fwnode_get_name(ch_node), val);
+ return -EINVAL;
+ }
+
+ *filter &= ~IQS626_FILT_STR_LP_CNT_MASK;
+ *filter |= (val << IQS626_FILT_STR_LP_CNT_SHIFT);
+ }
+
+ if (!fwnode_property_read_u32(ch_node, "azoteq,filt-str-np-lta",
+ &val)) {
+ if (val > IQS626_FILT_STR_MAX) {
+ dev_err(&client->dev,
+ "Invalid %s channel filter strength: %u\n",
+ fwnode_get_name(ch_node), val);
+ return -EINVAL;
+ }
+
+ *filter &= ~IQS626_FILT_STR_NP_LTA_MASK;
+ *filter |= (val << IQS626_FILT_STR_NP_LTA_SHIFT);
+ }
+
+ if (!fwnode_property_read_u32(ch_node, "azoteq,filt-str-lp-lta",
+ &val)) {
+ if (val > IQS626_FILT_STR_MAX) {
+ dev_err(&client->dev,
+ "Invalid %s channel filter strength: %u\n",
+ fwnode_get_name(ch_node), val);
+ return -EINVAL;
+ }
+
+ *filter &= ~IQS626_FILT_STR_LP_LTA_MASK;
+ *filter |= val;
+ }
+
+ error = iqs626_parse_pins(iqs626, ch_node, "azoteq,rx-enable",
+ rx_enable);
+ if (error)
+ return error;
+
+ error = iqs626_parse_pins(iqs626, ch_node, "azoteq,tx-enable",
+ tx_enable);
+ if (error)
+ return error;
+
+ if (ch_id == IQS626_CH_ULP_0)
+ return 0;
+
+ *(engine + 2) &= ~IQS626_CHx_ENG_2_LOCAL_CAP_ENABLE;
+ if (!fwnode_property_read_u32(ch_node, "azoteq,local-cap-size",
+ &val) && val) {
+ unsigned int orig_val = val--;
+
+ if (val > IQS626_CHx_ENG_2_LOCAL_CAP_MAX) {
+ dev_err(&client->dev,
+ "Invalid %s channel local cap. size: %u\n",
+ fwnode_get_name(ch_node), orig_val);
+ return -EINVAL;
+ }
+
+ *(engine + 2) &= ~IQS626_CHx_ENG_2_LOCAL_CAP_MASK;
+ *(engine + 2) |= (val << IQS626_CHx_ENG_2_LOCAL_CAP_SHIFT);
+
+ *(engine + 2) |= IQS626_CHx_ENG_2_LOCAL_CAP_ENABLE;
+ }
+
+ if (!fwnode_property_read_u32(ch_node, "azoteq,sense-mode", &val)) {
+ if (val > IQS626_CHx_ENG_2_SENSE_MODE_MAX) {
+ dev_err(&client->dev,
+ "Invalid %s channel sensing mode: %u\n",
+ fwnode_get_name(ch_node), val);
+ return -EINVAL;
+ }
+
+ *(engine + 2) &= ~IQS626_CHx_ENG_2_SENSE_MODE_MASK;
+ *(engine + 2) |= val;
+ }
+
+ if (!fwnode_property_read_u32(ch_node, "azoteq,tx-freq", &val)) {
+ if (val > IQS626_CHx_ENG_3_TX_FREQ_MAX) {
+ dev_err(&client->dev,
+ "Invalid %s channel excitation frequency: %u\n",
+ fwnode_get_name(ch_node), val);
+ return -EINVAL;
+ }
+
+ *(engine + 3) &= ~IQS626_CHx_ENG_3_TX_FREQ_MASK;
+ *(engine + 3) |= (val << IQS626_CHx_ENG_3_TX_FREQ_SHIFT);
+ }
+
+ *(engine + 3) &= ~IQS626_CHx_ENG_3_INV_LOGIC;
+ if (fwnode_property_present(ch_node, "azoteq,invert-enable"))
+ *(engine + 3) |= IQS626_CHx_ENG_3_INV_LOGIC;
+
+ *(engine + 4) &= ~IQS626_CHx_ENG_4_COMP_DISABLE;
+ if (fwnode_property_present(ch_node, "azoteq,comp-disable"))
+ *(engine + 4) |= IQS626_CHx_ENG_4_COMP_DISABLE;
+
+ *(engine + 4) &= ~IQS626_CHx_ENG_4_STATIC_ENABLE;
+ if (fwnode_property_present(ch_node, "azoteq,static-enable"))
+ *(engine + 4) |= IQS626_CHx_ENG_4_STATIC_ENABLE;
+
+ i = ch_id - IQS626_CH_GEN_0;
+ assoc_select = &sys_reg->ch_reg_gen[i].assoc_select;
+ assoc_weight = &sys_reg->ch_reg_gen[i].assoc_weight;
+
+ *assoc_select = 0;
+ if (!fwnode_property_present(ch_node, "azoteq,assoc-select"))
+ return 0;
+
+ for (i = 0; i < ARRAY_SIZE(iqs626_channels); i++) {
+ if (fwnode_property_match_string(ch_node, "azoteq,assoc-select",
+ iqs626_channels[i].name) < 0)
+ continue;
+
+ *assoc_select |= iqs626_channels[i].active;
+ }
+
+ if (fwnode_property_read_u32(ch_node, "azoteq,assoc-weight", &val))
+ return 0;
+
+ if (val > IQS626_GEN_WEIGHT_MAX) {
+ dev_err(&client->dev,
+ "Invalid %s channel associated weight: %u\n",
+ fwnode_get_name(ch_node), val);
+ return -EINVAL;
+ }
+
+ *assoc_weight = val;
+
+ return 0;
+}
+
+static int iqs626_parse_prop(struct iqs626_private *iqs626)
+{
+ struct iqs626_sys_reg *sys_reg = &iqs626->sys_reg;
+ struct i2c_client *client = iqs626->client;
+ struct fwnode_handle *ch_node;
+ unsigned int val;
+ int error, i;
+ u16 general;
+
+ if (!device_property_read_u32(&client->dev, "azoteq,suspend-mode",
+ &val)) {
+ if (val > IQS626_SYS_SETTINGS_PWR_MODE_MAX) {
+ dev_err(&client->dev, "Invalid suspend mode: %u\n",
+ val);
+ return -EINVAL;
+ }
+
+ iqs626->suspend_mode = val;
+ }
+
+ error = regmap_raw_read(iqs626->regmap, IQS626_SYS_SETTINGS, sys_reg,
+ sizeof(*sys_reg));
+ if (error)
+ return error;
+
+ general = be16_to_cpu(sys_reg->general);
+ general &= IQS626_SYS_SETTINGS_ULP_UPDATE_MASK;
+
+ if (device_property_present(&client->dev, "azoteq,clk-div"))
+ general |= IQS626_SYS_SETTINGS_CLK_DIV;
+
+ if (device_property_present(&client->dev, "azoteq,ulp-enable"))
+ general |= IQS626_SYS_SETTINGS_ULP_AUTO;
+
+ if (!device_property_read_u32(&client->dev, "azoteq,ulp-update",
+ &val)) {
+ if (val > IQS626_SYS_SETTINGS_ULP_UPDATE_MAX) {
+ dev_err(&client->dev, "Invalid update rate: %u\n", val);
+ return -EINVAL;
+ }
+
+ general &= ~IQS626_SYS_SETTINGS_ULP_UPDATE_MASK;
+ general |= (val << IQS626_SYS_SETTINGS_ULP_UPDATE_SHIFT);
+ }
+
+ sys_reg->misc_a &= ~IQS626_MISC_A_ATI_BAND_DISABLE;
+ if (device_property_present(&client->dev, "azoteq,ati-band-disable"))
+ sys_reg->misc_a |= IQS626_MISC_A_ATI_BAND_DISABLE;
+
+ sys_reg->misc_a &= ~IQS626_MISC_A_ATI_LP_ONLY;
+ if (device_property_present(&client->dev, "azoteq,ati-lp-only"))
+ sys_reg->misc_a |= IQS626_MISC_A_ATI_LP_ONLY;
+
+ if (!device_property_read_u32(&client->dev, "azoteq,gpio3-select",
+ &val)) {
+ if (val > IQS626_MISC_A_GPIO3_SELECT_MAX) {
+ dev_err(&client->dev, "Invalid GPIO3 selection: %u\n",
+ val);
+ return -EINVAL;
+ }
+
+ sys_reg->misc_a &= ~IQS626_MISC_A_GPIO3_SELECT_MASK;
+ sys_reg->misc_a |= val;
+ }
+
+ if (!device_property_read_u32(&client->dev, "azoteq,reseed-select",
+ &val)) {
+ if (val > IQS626_MISC_B_RESEED_UI_SEL_MAX) {
+ dev_err(&client->dev, "Invalid reseed selection: %u\n",
+ val);
+ return -EINVAL;
+ }
+
+ sys_reg->misc_b &= ~IQS626_MISC_B_RESEED_UI_SEL_MASK;
+ sys_reg->misc_b |= (val << IQS626_MISC_B_RESEED_UI_SEL_SHIFT);
+ }
+
+ sys_reg->misc_b &= ~IQS626_MISC_B_THRESH_EXTEND;
+ if (device_property_present(&client->dev, "azoteq,thresh-extend"))
+ sys_reg->misc_b |= IQS626_MISC_B_THRESH_EXTEND;
+
+ sys_reg->misc_b &= ~IQS626_MISC_B_TRACKING_UI_ENABLE;
+ if (device_property_present(&client->dev, "azoteq,tracking-enable"))
+ sys_reg->misc_b |= IQS626_MISC_B_TRACKING_UI_ENABLE;
+
+ sys_reg->misc_b &= ~IQS626_MISC_B_RESEED_OFFSET;
+ if (device_property_present(&client->dev, "azoteq,reseed-offset"))
+ sys_reg->misc_b |= IQS626_MISC_B_RESEED_OFFSET;
+
+ if (!device_property_read_u32(&client->dev, "azoteq,rate-np-ms",
+ &val)) {
+ if (val > IQS626_RATE_NP_MS_MAX) {
+ dev_err(&client->dev, "Invalid report rate: %u\n", val);
+ return -EINVAL;
+ }
+
+ sys_reg->rate_np = val;
+ }
+
+ if (!device_property_read_u32(&client->dev, "azoteq,rate-lp-ms",
+ &val)) {
+ if (val > IQS626_RATE_LP_MS_MAX) {
+ dev_err(&client->dev, "Invalid report rate: %u\n", val);
+ return -EINVAL;
+ }
+
+ sys_reg->rate_lp = val;
+ }
+
+ if (!device_property_read_u32(&client->dev, "azoteq,rate-ulp-ms",
+ &val)) {
+ if (val > IQS626_RATE_ULP_MS_MAX) {
+ dev_err(&client->dev, "Invalid report rate: %u\n", val);
+ return -EINVAL;
+ }
+
+ sys_reg->rate_ulp = val / 16;
+ }
+
+ if (!device_property_read_u32(&client->dev, "azoteq,timeout-pwr-ms",
+ &val)) {
+ if (val > IQS626_TIMEOUT_PWR_MS_MAX) {
+ dev_err(&client->dev, "Invalid timeout: %u\n", val);
+ return -EINVAL;
+ }
+
+ sys_reg->timeout_pwr = val / 512;
+ }
+
+ if (!device_property_read_u32(&client->dev, "azoteq,timeout-lta-ms",
+ &val)) {
+ if (val > IQS626_TIMEOUT_LTA_MS_MAX) {
+ dev_err(&client->dev, "Invalid timeout: %u\n", val);
+ return -EINVAL;
+ }
+
+ sys_reg->timeout_lta = val / 512;
+ }
+
+ sys_reg->event_mask = ~((u8)IQS626_EVENT_MASK_SYS);
+ sys_reg->redo_ati = 0;
+
+ sys_reg->reseed = 0;
+ sys_reg->active = 0;
+
+ for (i = 0; i < ARRAY_SIZE(iqs626_channels); i++) {
+ ch_node = device_get_named_child_node(&client->dev,
+ iqs626_channels[i].name);
+ if (!ch_node)
+ continue;
+
+ error = iqs626_parse_channel(iqs626, ch_node, i);
+ if (error)
+ return error;
+
+ error = iqs626_parse_ati_target(iqs626, ch_node, i);
+ if (error)
+ return error;
+
+ error = iqs626_parse_events(iqs626, ch_node, i);
+ if (error)
+ return error;
+
+ if (!fwnode_property_present(ch_node, "azoteq,ati-exclude"))
+ sys_reg->redo_ati |= iqs626_channels[i].active;
+
+ if (!fwnode_property_present(ch_node, "azoteq,reseed-disable"))
+ sys_reg->reseed |= iqs626_channels[i].active;
+
+ sys_reg->active |= iqs626_channels[i].active;
+ }
+
+ general |= IQS626_SYS_SETTINGS_EVENT_MODE;
+
+ /*
+ * Enable streaming during normal-power mode if the trackpad is used to
+ * report raw coordinates instead of gestures. In that case, the device
+ * returns to event mode during low-power mode.
+ */
+ if (sys_reg->active & iqs626_channels[IQS626_CH_TP_2].active &&
+ sys_reg->event_mask & IQS626_EVENT_MASK_GESTURE)
+ general |= IQS626_SYS_SETTINGS_EVENT_MODE_LP;
+
+ general |= IQS626_SYS_SETTINGS_REDO_ATI;
+ general |= IQS626_SYS_SETTINGS_ACK_RESET;
+
+ sys_reg->general = cpu_to_be16(general);
+
+ error = regmap_raw_write(iqs626->regmap, IQS626_SYS_SETTINGS,
+ &iqs626->sys_reg, sizeof(iqs626->sys_reg));
+ if (error)
+ return error;
+
+ iqs626_irq_wait();
+
+ return 0;
+}
+
+static int iqs626_input_init(struct iqs626_private *iqs626)
+{
+ struct iqs626_sys_reg *sys_reg = &iqs626->sys_reg;
+ struct i2c_client *client = iqs626->client;
+ int error, i, j;
+
+ iqs626->keypad = devm_input_allocate_device(&client->dev);
+ if (!iqs626->keypad)
+ return -ENOMEM;
+
+ iqs626->keypad->keycodemax = ARRAY_SIZE(iqs626->kp_code);
+ iqs626->keypad->keycode = iqs626->kp_code;
+ iqs626->keypad->keycodesize = sizeof(**iqs626->kp_code);
+
+ iqs626->keypad->name = "iqs626a_keypad";
+ iqs626->keypad->id.bustype = BUS_I2C;
+
+ for (i = 0; i < ARRAY_SIZE(iqs626_channels); i++) {
+ if (!(sys_reg->active & iqs626_channels[i].active))
+ continue;
+
+ for (j = 0; j < ARRAY_SIZE(iqs626_events); j++) {
+ if (!iqs626->kp_type[i][j])
+ continue;
+
+ input_set_capability(iqs626->keypad,
+ iqs626->kp_type[i][j],
+ iqs626->kp_code[i][j]);
+ }
+ }
+
+ if (!(sys_reg->active & iqs626_channels[IQS626_CH_TP_2].active))
+ return 0;
+
+ iqs626->trackpad = devm_input_allocate_device(&client->dev);
+ if (!iqs626->trackpad)
+ return -ENOMEM;
+
+ iqs626->trackpad->keycodemax = ARRAY_SIZE(iqs626->tp_code);
+ iqs626->trackpad->keycode = iqs626->tp_code;
+ iqs626->trackpad->keycodesize = sizeof(*iqs626->tp_code);
+
+ iqs626->trackpad->name = "iqs626a_trackpad";
+ iqs626->trackpad->id.bustype = BUS_I2C;
+
+ /*
+ * Present the trackpad as a traditional pointing device if no gestures
+ * have been mapped to a keycode.
+ */
+ if (sys_reg->event_mask & IQS626_EVENT_MASK_GESTURE) {
+ u8 tp_mask = iqs626_channels[IQS626_CH_TP_3].active;
+
+ input_set_capability(iqs626->trackpad, EV_KEY, BTN_TOUCH);
+ input_set_abs_params(iqs626->trackpad, ABS_Y, 0, 255, 0, 0);
+
+ if ((sys_reg->active & tp_mask) == tp_mask)
+ input_set_abs_params(iqs626->trackpad,
+ ABS_X, 0, 255, 0, 0);
+ else
+ input_set_abs_params(iqs626->trackpad,
+ ABS_X, 0, 128, 0, 0);
+
+ touchscreen_parse_properties(iqs626->trackpad, false,
+ &iqs626->prop);
+ } else {
+ for (i = 0; i < IQS626_NUM_GESTURES; i++)
+ if (iqs626->tp_code[i] != KEY_RESERVED)
+ input_set_capability(iqs626->trackpad, EV_KEY,
+ iqs626->tp_code[i]);
+ }
+
+ error = input_register_device(iqs626->trackpad);
+ if (error)
+ dev_err(&client->dev, "Failed to register trackpad: %d\n",
+ error);
+
+ return error;
+}
+
+static int iqs626_report(struct iqs626_private *iqs626)
+{
+ struct iqs626_sys_reg *sys_reg = &iqs626->sys_reg;
+ struct i2c_client *client = iqs626->client;
+ struct iqs626_flags flags;
+ __le16 hall_output;
+ int error, i, j;
+ u8 state;
+ u8 *dir_mask = &flags.states[IQS626_ST_OFFS_DIR];
+
+ error = regmap_raw_read(iqs626->regmap, IQS626_SYS_FLAGS, &flags,
+ sizeof(flags));
+ if (error) {
+ dev_err(&client->dev, "Failed to read device status: %d\n",
+ error);
+ return error;
+ }
+
+ /*
+ * The device resets itself if its own watchdog bites, which can happen
+ * in the event of an I2C communication error. In this case, the device
+ * asserts a SHOW_RESET interrupt and all registers must be restored.
+ */
+ if (be16_to_cpu(flags.system) & IQS626_SYS_FLAGS_SHOW_RESET) {
+ dev_err(&client->dev, "Unexpected device reset\n");
+
+ error = regmap_raw_write(iqs626->regmap, IQS626_SYS_SETTINGS,
+ sys_reg, sizeof(*sys_reg));
+ if (error)
+ dev_err(&client->dev,
+ "Failed to re-initialize device: %d\n", error);
+
+ return error;
+ }
+
+ if (be16_to_cpu(flags.system) & IQS626_SYS_FLAGS_IN_ATI)
+ return 0;
+
+ /*
+ * Unlike the ULP or generic channels, the Hall channel does not have a
+ * direction flag. Instead, the direction (i.e. magnet polarity) can be
+ * derived based on the sign of the 2's complement differential output.
+ */
+ if (sys_reg->active & iqs626_channels[IQS626_CH_HALL].active) {
+ error = regmap_raw_read(iqs626->regmap, IQS626_HALL_OUTPUT,
+ &hall_output, sizeof(hall_output));
+ if (error) {
+ dev_err(&client->dev,
+ "Failed to read Hall output: %d\n", error);
+ return error;
+ }
+
+ *dir_mask &= ~iqs626_channels[IQS626_CH_HALL].active;
+ if (le16_to_cpu(hall_output) < 0x8000)
+ *dir_mask |= iqs626_channels[IQS626_CH_HALL].active;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(iqs626_channels); i++) {
+ if (!(sys_reg->active & iqs626_channels[i].active))
+ continue;
+
+ for (j = 0; j < ARRAY_SIZE(iqs626_events); j++) {
+ if (!iqs626->kp_type[i][j])
+ continue;
+
+ state = flags.states[iqs626_events[j].st_offs];
+ state &= iqs626_events[j].dir_up ? *dir_mask
+ : ~(*dir_mask);
+ state &= iqs626_channels[i].active;
+
+ input_event(iqs626->keypad, iqs626->kp_type[i][j],
+ iqs626->kp_code[i][j], !!state);
+ }
+ }
+
+ input_sync(iqs626->keypad);
+
+ /*
+ * The following completion signals that ATI has finished, any initial
+ * switch states have been reported and the keypad can be registered.
+ */
+ complete_all(&iqs626->ati_done);
+
+ if (!(sys_reg->active & iqs626_channels[IQS626_CH_TP_2].active))
+ return 0;
+
+ if (sys_reg->event_mask & IQS626_EVENT_MASK_GESTURE) {
+ state = flags.states[IQS626_ST_OFFS_TOUCH];
+ state &= iqs626_channels[IQS626_CH_TP_2].active;
+
+ input_report_key(iqs626->trackpad, BTN_TOUCH, state);
+
+ if (state)
+ touchscreen_report_pos(iqs626->trackpad, &iqs626->prop,
+ flags.trackpad_x,
+ flags.trackpad_y, false);
+ } else {
+ for (i = 0; i < IQS626_NUM_GESTURES; i++)
+ input_report_key(iqs626->trackpad, iqs626->tp_code[i],
+ flags.gesture & BIT(i));
+
+ if (flags.gesture & GENMASK(IQS626_GESTURE_TAP, 0)) {
+ input_sync(iqs626->trackpad);
+
+ /*
+ * Momentary gestures are followed by a complementary
+ * release cycle so as to emulate a full keystroke.
+ */
+ for (i = 0; i < IQS626_GESTURE_HOLD; i++)
+ input_report_key(iqs626->trackpad,
+ iqs626->tp_code[i], 0);
+ }
+ }
+
+ input_sync(iqs626->trackpad);
+
+ return 0;
+}
+
+static irqreturn_t iqs626_irq(int irq, void *context)
+{
+ struct iqs626_private *iqs626 = context;
+
+ if (iqs626_report(iqs626))
+ return IRQ_NONE;
+
+ /*
+ * The device does not deassert its interrupt (RDY) pin until shortly
+ * after receiving an I2C stop condition; the following delay ensures
+ * the interrupt handler does not return before this time.
+ */
+ iqs626_irq_wait();
+
+ return IRQ_HANDLED;
+}
+
+static const struct regmap_config iqs626_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 16,
+ .max_register = IQS626_MAX_REG,
+};
+
+static int iqs626_probe(struct i2c_client *client)
+{
+ struct iqs626_ver_info ver_info;
+ struct iqs626_private *iqs626;
+ int error;
+
+ iqs626 = devm_kzalloc(&client->dev, sizeof(*iqs626), GFP_KERNEL);
+ if (!iqs626)
+ return -ENOMEM;
+
+ i2c_set_clientdata(client, iqs626);
+ iqs626->client = client;
+
+ iqs626->regmap = devm_regmap_init_i2c(client, &iqs626_regmap_config);
+ if (IS_ERR(iqs626->regmap)) {
+ error = PTR_ERR(iqs626->regmap);
+ dev_err(&client->dev, "Failed to initialize register map: %d\n",
+ error);
+ return error;
+ }
+
+ init_completion(&iqs626->ati_done);
+
+ error = regmap_raw_read(iqs626->regmap, IQS626_VER_INFO, &ver_info,
+ sizeof(ver_info));
+ if (error)
+ return error;
+
+ if (ver_info.prod_num != IQS626_VER_INFO_PROD_NUM) {
+ dev_err(&client->dev, "Unrecognized product number: 0x%02X\n",
+ ver_info.prod_num);
+ return -EINVAL;
+ }
+
+ error = iqs626_parse_prop(iqs626);
+ if (error)
+ return error;
+
+ error = iqs626_input_init(iqs626);
+ if (error)
+ return error;
+
+ error = devm_request_threaded_irq(&client->dev, client->irq,
+ NULL, iqs626_irq, IRQF_ONESHOT,
+ client->name, iqs626);
+ if (error) {
+ dev_err(&client->dev, "Failed to request IRQ: %d\n", error);
+ return error;
+ }
+
+ if (!wait_for_completion_timeout(&iqs626->ati_done,
+ msecs_to_jiffies(2000))) {
+ dev_err(&client->dev, "Failed to complete ATI\n");
+ return -ETIMEDOUT;
+ }
+
+ /*
+ * The keypad may include one or more switches and is not registered
+ * until ATI is complete and the initial switch states are read.
+ */
+ error = input_register_device(iqs626->keypad);
+ if (error)
+ dev_err(&client->dev, "Failed to register keypad: %d\n", error);
+
+ return error;
+}
+
+static int __maybe_unused iqs626_suspend(struct device *dev)
+{
+ struct iqs626_private *iqs626 = dev_get_drvdata(dev);
+ struct i2c_client *client = iqs626->client;
+ unsigned int val;
+ int error;
+
+ if (!iqs626->suspend_mode)
+ return 0;
+
+ disable_irq(client->irq);
+
+ /*
+ * Automatic power mode switching must be disabled before the device is
+ * forced into any particular power mode. In this case, the device will
+ * transition into normal-power mode.
+ */
+ error = regmap_update_bits(iqs626->regmap, IQS626_SYS_SETTINGS,
+ IQS626_SYS_SETTINGS_DIS_AUTO, ~0);
+ if (error)
+ goto err_irq;
+
+ /*
+ * The following check ensures the device has completed its transition
+ * into normal-power mode before a manual mode switch is performed.
+ */
+ error = regmap_read_poll_timeout(iqs626->regmap, IQS626_SYS_FLAGS, val,
+ !(val & IQS626_SYS_FLAGS_PWR_MODE_MASK),
+ IQS626_PWR_MODE_POLL_SLEEP_US,
+ IQS626_PWR_MODE_POLL_TIMEOUT_US);
+ if (error)
+ goto err_irq;
+
+ error = regmap_update_bits(iqs626->regmap, IQS626_SYS_SETTINGS,
+ IQS626_SYS_SETTINGS_PWR_MODE_MASK,
+ iqs626->suspend_mode <<
+ IQS626_SYS_SETTINGS_PWR_MODE_SHIFT);
+ if (error)
+ goto err_irq;
+
+ /*
+ * This last check ensures the device has completed its transition into
+ * the desired power mode to prevent any spurious interrupts from being
+ * triggered after iqs626_suspend has already returned.
+ */
+ error = regmap_read_poll_timeout(iqs626->regmap, IQS626_SYS_FLAGS, val,
+ (val & IQS626_SYS_FLAGS_PWR_MODE_MASK)
+ == (iqs626->suspend_mode <<
+ IQS626_SYS_FLAGS_PWR_MODE_SHIFT),
+ IQS626_PWR_MODE_POLL_SLEEP_US,
+ IQS626_PWR_MODE_POLL_TIMEOUT_US);
+
+err_irq:
+ iqs626_irq_wait();
+ enable_irq(client->irq);
+
+ return error;
+}
+
+static int __maybe_unused iqs626_resume(struct device *dev)
+{
+ struct iqs626_private *iqs626 = dev_get_drvdata(dev);
+ struct i2c_client *client = iqs626->client;
+ unsigned int val;
+ int error;
+
+ if (!iqs626->suspend_mode)
+ return 0;
+
+ disable_irq(client->irq);
+
+ error = regmap_update_bits(iqs626->regmap, IQS626_SYS_SETTINGS,
+ IQS626_SYS_SETTINGS_PWR_MODE_MASK, 0);
+ if (error)
+ goto err_irq;
+
+ /*
+ * This check ensures the device has returned to normal-power mode
+ * before automatic power mode switching is re-enabled.
+ */
+ error = regmap_read_poll_timeout(iqs626->regmap, IQS626_SYS_FLAGS, val,
+ !(val & IQS626_SYS_FLAGS_PWR_MODE_MASK),
+ IQS626_PWR_MODE_POLL_SLEEP_US,
+ IQS626_PWR_MODE_POLL_TIMEOUT_US);
+ if (error)
+ goto err_irq;
+
+ error = regmap_update_bits(iqs626->regmap, IQS626_SYS_SETTINGS,
+ IQS626_SYS_SETTINGS_DIS_AUTO, 0);
+ if (error)
+ goto err_irq;
+
+ /*
+ * This step reports any events that may have been "swallowed" as a
+ * result of polling PWR_MODE (which automatically acknowledges any
+ * pending interrupts).
+ */
+ error = iqs626_report(iqs626);
+
+err_irq:
+ iqs626_irq_wait();
+ enable_irq(client->irq);
+
+ return error;
+}
+
+static SIMPLE_DEV_PM_OPS(iqs626_pm, iqs626_suspend, iqs626_resume);
+
+static const struct of_device_id iqs626_of_match[] = {
+ { .compatible = "azoteq,iqs626a" },
+ { }
+};
+MODULE_DEVICE_TABLE(of, iqs626_of_match);
+
+static struct i2c_driver iqs626_i2c_driver = {
+ .driver = {
+ .name = "iqs626a",
+ .of_match_table = iqs626_of_match,
+ .pm = &iqs626_pm,
+ },
+ .probe_new = iqs626_probe,
+};
+module_i2c_driver(iqs626_i2c_driver);
+
+MODULE_AUTHOR("Jeff LaBundy <jeff@labundy.com>");
+MODULE_DESCRIPTION("Azoteq IQS626A Capacitive Touch Controller");
+MODULE_LICENSE("GPL");
unsigned int duty = chip->pwm_period * chip->level / 100;
ret = pwm_config(chip->pwm, duty, chip->pwm_period);
} else {
- int i;
u8 duty_index = 0;
- for (i = 0; i <= 64; i++) {
- if (chip->level <= i * 100 / 64) {
- duty_index = i;
- break;
- }
- }
+ duty_index = DIV_ROUND_UP(chip->level * 64, 100);
+
switch (chip->internal_mode_pattern) {
case 0:
max8997_write_reg(chip->client,
#define ETP_FW_PAGE_SIZE_512 512
#define ETP_FW_SIGNATURE_SIZE 6
+#define ETP_PRODUCT_ID_DELBIN 0x00C2
+#define ETP_PRODUCT_ID_VOXEL 0x00BF
+#define ETP_PRODUCT_ID_MAGPIE 0x0120
+#define ETP_PRODUCT_ID_BOBBA 0x0121
+
struct i2c_client;
struct completion;
int (*calibrate_result)(struct i2c_client *client, u8 *val);
int (*get_baseline_data)(struct i2c_client *client,
- bool max_baseliune, u8 *value);
+ bool max_baseline, u8 *value);
int (*get_version)(struct i2c_client *client, u8 pattern, bool iap,
u8 *version);
#define ETP_FINGER_WIDTH 15
#define ETP_RETRY_COUNT 3
+/* quirks to control the device */
+#define ETP_QUIRK_QUICK_WAKEUP BIT(0)
+
/* The main device structure */
struct elan_tp_data {
struct i2c_client *client;
bool baseline_ready;
u8 clickpad;
bool middle_button;
+
+ u32 quirks; /* Various quirks */
};
+static u32 elan_i2c_lookup_quirks(u16 ic_type, u16 product_id)
+{
+ static const struct {
+ u16 ic_type;
+ u16 product_id;
+ u32 quirks;
+ } elan_i2c_quirks[] = {
+ { 0x0D, ETP_PRODUCT_ID_DELBIN, ETP_QUIRK_QUICK_WAKEUP },
+ { 0x10, ETP_PRODUCT_ID_VOXEL, ETP_QUIRK_QUICK_WAKEUP },
+ { 0x14, ETP_PRODUCT_ID_MAGPIE, ETP_QUIRK_QUICK_WAKEUP },
+ { 0x14, ETP_PRODUCT_ID_BOBBA, ETP_QUIRK_QUICK_WAKEUP },
+ };
+ u32 quirks = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(elan_i2c_quirks); i++) {
+ if (elan_i2c_quirks[i].ic_type == ic_type &&
+ elan_i2c_quirks[i].product_id == product_id) {
+ quirks = elan_i2c_quirks[i].quirks;
+ }
+ }
+
+ if (ic_type >= 0x0D && product_id >= 0x123)
+ quirks |= ETP_QUIRK_QUICK_WAKEUP;
+
+ return quirks;
+}
+
static int elan_get_fwinfo(u16 ic_type, u8 iap_version, u16 *validpage_count,
u32 *signature_address, u16 *page_size)
{
return false;
}
-static int __elan_initialize(struct elan_tp_data *data)
+static int __elan_initialize(struct elan_tp_data *data, bool skip_reset)
{
struct i2c_client *client = data->client;
bool woken_up = false;
int error;
- error = data->ops->initialize(client);
- if (error) {
- dev_err(&client->dev, "device initialize failed: %d\n", error);
- return error;
+ if (!skip_reset) {
+ error = data->ops->initialize(client);
+ if (error) {
+ dev_err(&client->dev, "device initialize failed: %d\n", error);
+ return error;
+ }
}
error = elan_query_product(data);
return 0;
}
-static int elan_initialize(struct elan_tp_data *data)
+static int elan_initialize(struct elan_tp_data *data, bool skip_reset)
{
int repeat = ETP_RETRY_COUNT;
int error;
do {
- error = __elan_initialize(data);
+ error = __elan_initialize(data, skip_reset);
if (!error)
return 0;
+ skip_reset = false;
msleep(30);
} while (--repeat > 0);
if (error)
return error;
+ data->quirks = elan_i2c_lookup_quirks(data->ic_type, data->product_id);
+
error = elan_get_fwinfo(data->ic_type, data->iap_version,
&data->fw_validpage_count,
&data->fw_signature_address,
data->ops->iap_reset(client);
} else {
/* Reinitialize TP after fw is updated */
- elan_initialize(data);
+ elan_initialize(data, false);
elan_query_device_info(data);
}
}
/* Initialize the touchpad. */
- error = elan_initialize(data);
+ error = elan_initialize(data, false);
if (error)
return error;
goto err;
}
- error = elan_initialize(data);
+ error = elan_initialize(data, data->quirks & ETP_QUIRK_QUICK_WAKEUP);
if (error)
dev_err(dev, "initialize when resuming failed: %d\n", error);
{
struct apbps2_priv *priv = io->port_data;
int limit;
- unsigned long tmp;
/* clear error flags */
iowrite32be(0, &priv->regs->status);
/* Clear old data if available (unlikely) */
limit = 1024;
while ((ioread32be(&priv->regs->status) & APBPS2_STATUS_DR) && --limit)
- tmp = ioread32be(&priv->regs->data);
+ ioread32be(&priv->regs->data);
/* Enable reciever and it's interrupt */
iowrite32be(APBPS2_CTRL_RE | APBPS2_CTRL_RI, &priv->regs->ctrl);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Generic helper functions for touchscreens and other two-dimensional
+ * pointing devices
+ *
+ * Copyright (c) 2014 Sebastian Reichel <sre@kernel.org>
+ */
+
+#include <linux/property.h>
+#include <linux/input.h>
+#include <linux/input/mt.h>
+#include <linux/input/touchscreen.h>
+#include <linux/module.h>
+
+static bool touchscreen_get_prop_u32(struct device *dev,
+ const char *property,
+ unsigned int default_value,
+ unsigned int *value)
+{
+ u32 val;
+ int error;
+
+ error = device_property_read_u32(dev, property, &val);
+ if (error) {
+ *value = default_value;
+ return false;
+ }
+
+ *value = val;
+ return true;
+}
+
+static void touchscreen_set_params(struct input_dev *dev,
+ unsigned long axis,
+ int min, int max, int fuzz)
+{
+ struct input_absinfo *absinfo;
+
+ if (!test_bit(axis, dev->absbit)) {
+ dev_warn(&dev->dev,
+ "Parameters are specified but the axis %lu is not set up\n",
+ axis);
+ return;
+ }
+
+ absinfo = &dev->absinfo[axis];
+ absinfo->minimum = min;
+ absinfo->maximum = max;
+ absinfo->fuzz = fuzz;
+}
+
+/**
+ * touchscreen_parse_properties - parse common touchscreen properties
+ * @input: input device that should be parsed
+ * @multitouch: specifies whether parsed properties should be applied to
+ * single-touch or multi-touch axes
+ * @prop: pointer to a struct touchscreen_properties into which to store
+ * axis swap and invert info for use with touchscreen_report_x_y();
+ * or %NULL
+ *
+ * This function parses common properties for touchscreens and sets up the
+ * input device accordingly. The function keeps previously set up default
+ * values if no value is specified.
+ */
+void touchscreen_parse_properties(struct input_dev *input, bool multitouch,
+ struct touchscreen_properties *prop)
+{
+ struct device *dev = input->dev.parent;
+ struct input_absinfo *absinfo;
+ unsigned int axis, axis_x, axis_y;
+ unsigned int minimum, maximum, fuzz;
+ bool data_present;
+
+ input_alloc_absinfo(input);
+ if (!input->absinfo)
+ return;
+
+ axis_x = multitouch ? ABS_MT_POSITION_X : ABS_X;
+ axis_y = multitouch ? ABS_MT_POSITION_Y : ABS_Y;
+
+ data_present = touchscreen_get_prop_u32(dev, "touchscreen-min-x",
+ input_abs_get_min(input, axis_x),
+ &minimum) |
+ touchscreen_get_prop_u32(dev, "touchscreen-size-x",
+ input_abs_get_max(input,
+ axis_x) + 1,
+ &maximum) |
+ touchscreen_get_prop_u32(dev, "touchscreen-fuzz-x",
+ input_abs_get_fuzz(input, axis_x),
+ &fuzz);
+ if (data_present)
+ touchscreen_set_params(input, axis_x, minimum, maximum - 1, fuzz);
+
+ data_present = touchscreen_get_prop_u32(dev, "touchscreen-min-y",
+ input_abs_get_min(input, axis_y),
+ &minimum) |
+ touchscreen_get_prop_u32(dev, "touchscreen-size-y",
+ input_abs_get_max(input,
+ axis_y) + 1,
+ &maximum) |
+ touchscreen_get_prop_u32(dev, "touchscreen-fuzz-y",
+ input_abs_get_fuzz(input, axis_y),
+ &fuzz);
+ if (data_present)
+ touchscreen_set_params(input, axis_y, minimum, maximum - 1, fuzz);
+
+ axis = multitouch ? ABS_MT_PRESSURE : ABS_PRESSURE;
+ data_present = touchscreen_get_prop_u32(dev,
+ "touchscreen-max-pressure",
+ input_abs_get_max(input, axis),
+ &maximum) |
+ touchscreen_get_prop_u32(dev,
+ "touchscreen-fuzz-pressure",
+ input_abs_get_fuzz(input, axis),
+ &fuzz);
+ if (data_present)
+ touchscreen_set_params(input, axis, 0, maximum, fuzz);
+
+ if (!prop)
+ return;
+
+ prop->max_x = input_abs_get_max(input, axis_x);
+ prop->max_y = input_abs_get_max(input, axis_y);
+
+ prop->invert_x =
+ device_property_read_bool(dev, "touchscreen-inverted-x");
+ if (prop->invert_x) {
+ absinfo = &input->absinfo[axis_x];
+ absinfo->maximum -= absinfo->minimum;
+ absinfo->minimum = 0;
+ }
+
+ prop->invert_y =
+ device_property_read_bool(dev, "touchscreen-inverted-y");
+ if (prop->invert_y) {
+ absinfo = &input->absinfo[axis_y];
+ absinfo->maximum -= absinfo->minimum;
+ absinfo->minimum = 0;
+ }
+
+ prop->swap_x_y =
+ device_property_read_bool(dev, "touchscreen-swapped-x-y");
+ if (prop->swap_x_y)
+ swap(input->absinfo[axis_x], input->absinfo[axis_y]);
+}
+EXPORT_SYMBOL(touchscreen_parse_properties);
+
+static void
+touchscreen_apply_prop_to_x_y(const struct touchscreen_properties *prop,
+ unsigned int *x, unsigned int *y)
+{
+ if (prop->invert_x)
+ *x = prop->max_x - *x;
+
+ if (prop->invert_y)
+ *y = prop->max_y - *y;
+
+ if (prop->swap_x_y)
+ swap(*x, *y);
+}
+
+/**
+ * touchscreen_set_mt_pos - Set input_mt_pos coordinates
+ * @pos: input_mt_pos to set coordinates of
+ * @prop: pointer to a struct touchscreen_properties
+ * @x: X coordinate to store in pos
+ * @y: Y coordinate to store in pos
+ *
+ * Adjust the passed in x and y values applying any axis inversion and
+ * swapping requested in the passed in touchscreen_properties and store
+ * the result in a struct input_mt_pos.
+ */
+void touchscreen_set_mt_pos(struct input_mt_pos *pos,
+ const struct touchscreen_properties *prop,
+ unsigned int x, unsigned int y)
+{
+ touchscreen_apply_prop_to_x_y(prop, &x, &y);
+ pos->x = x;
+ pos->y = y;
+}
+EXPORT_SYMBOL(touchscreen_set_mt_pos);
+
+/**
+ * touchscreen_report_pos - Report touchscreen coordinates
+ * @input: input_device to report coordinates for
+ * @prop: pointer to a struct touchscreen_properties
+ * @x: X coordinate to report
+ * @y: Y coordinate to report
+ * @multitouch: Report coordinates on single-touch or multi-touch axes
+ *
+ * Adjust the passed in x and y values applying any axis inversion and
+ * swapping requested in the passed in touchscreen_properties and then
+ * report the resulting coordinates on the input_dev's x and y axis.
+ */
+void touchscreen_report_pos(struct input_dev *input,
+ const struct touchscreen_properties *prop,
+ unsigned int x, unsigned int y,
+ bool multitouch)
+{
+ touchscreen_apply_prop_to_x_y(prop, &x, &y);
+ input_report_abs(input, multitouch ? ABS_MT_POSITION_X : ABS_X, x);
+ input_report_abs(input, multitouch ? ABS_MT_POSITION_Y : ABS_Y, y);
+}
+EXPORT_SYMBOL(touchscreen_report_pos);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Helper functions for touchscreens and other devices");
if INPUT_TOUCHSCREEN
-config TOUCHSCREEN_PROPERTIES
- def_tristate INPUT
- depends on INPUT
-
config TOUCHSCREEN_88PM860X
tristate "Marvell 88PM860x touchscreen"
depends on MFD_88PM860X
To compile this driver as a module, choose M here : the
module will be called hideep_ts.
+config TOUCHSCREEN_HYCON_HY46XX
+ tristate "Hycon hy46xx touchscreen support"
+ depends on I2C
+ help
+ Say Y here if you have a touchscreen using Hycon hy46xx
+
+ If unsure, say N.
+
+ To compile this driver as a module, choose M here: the
+ module will be called hycon-hy46xx.
+
config TOUCHSCREEN_ILI210X
tristate "Ilitek ILI210X based touchscreen"
depends on I2C
To compile this driver as a module, choose M here: the
module will be called ili210x.
+config TOUCHSCREEN_ILITEK
+ tristate "Ilitek I2C 213X/23XX/25XX/Lego Series Touch ICs"
+ depends on I2C
+ help
+ Say Y here if you have touchscreen with ILITEK touch IC,
+ it supports 213X/23XX/25XX and other Lego series.
+
+ If unsure, say N.
+
+ To compile this driver as a module, choose M here: the
+ module will be called ilitek_ts_i2c.
+
config TOUCHSCREEN_IPROC
tristate "IPROC touch panel driver support"
depends on ARCH_BCM_IPROC || COMPILE_TEST
To compile this driver as a module, choose M here:
the module will be called melfas_mip4.
+config TOUCHSCREEN_MSG2638
+ tristate "MStar msg2638 touchscreen support"
+ depends on I2C
+ depends on GPIOLIB || COMPILE_TEST
+ help
+ Say Y here if you have an I2C touchscreen using MStar msg2638.
+
+ If unsure, say N.
+
+ To compile this driver as a module, choose M here: the
+ module will be called msg2638.
+
config TOUCHSCREEN_MTOUCH
tristate "MicroTouch serial touchscreens"
select SERIO
wm97xx-ts-y := wm97xx-core.o
-obj-$(CONFIG_TOUCHSCREEN_PROPERTIES) += of_touchscreen.o
obj-$(CONFIG_TOUCHSCREEN_88PM860X) += 88pm860x-ts.o
obj-$(CONFIG_TOUCHSCREEN_AD7877) += ad7877.o
obj-$(CONFIG_TOUCHSCREEN_AD7879) += ad7879.o
obj-$(CONFIG_TOUCHSCREEN_DYNAPRO) += dynapro.o
obj-$(CONFIG_TOUCHSCREEN_EDT_FT5X06) += edt-ft5x06.o
obj-$(CONFIG_TOUCHSCREEN_HAMPSHIRE) += hampshire.o
+obj-$(CONFIG_TOUCHSCREEN_HYCON_HY46XX) += hycon-hy46xx.o
obj-$(CONFIG_TOUCHSCREEN_GUNZE) += gunze.o
obj-$(CONFIG_TOUCHSCREEN_EETI) += eeti_ts.o
obj-$(CONFIG_TOUCHSCREEN_EKTF2127) += ektf2127.o
obj-$(CONFIG_TOUCHSCREEN_GOODIX) += goodix.o
obj-$(CONFIG_TOUCHSCREEN_HIDEEP) += hideep.o
obj-$(CONFIG_TOUCHSCREEN_ILI210X) += ili210x.o
+obj-$(CONFIG_TOUCHSCREEN_ILITEK) += ilitek_ts_i2c.o
obj-$(CONFIG_TOUCHSCREEN_IMX6UL_TSC) += imx6ul_tsc.o
obj-$(CONFIG_TOUCHSCREEN_INEXIO) += inexio.o
obj-$(CONFIG_TOUCHSCREEN_IPROC) += bcm_iproc_tsc.o
obj-$(CONFIG_TOUCHSCREEN_MELFAS_MIP4) += melfas_mip4.o
obj-$(CONFIG_TOUCHSCREEN_MIGOR) += migor_ts.o
obj-$(CONFIG_TOUCHSCREEN_MMS114) += mms114.o
+obj-$(CONFIG_TOUCHSCREEN_MSG2638) += msg2638.o
obj-$(CONFIG_TOUCHSCREEN_MTOUCH) += mtouch.o
obj-$(CONFIG_TOUCHSCREEN_MK712) += mk712.o
obj-$(CONFIG_TOUCHSCREEN_HP600) += hp680_ts_input.o
error = devm_request_threaded_irq(&client->dev, client->irq,
NULL, ar1021_i2c_irq,
- IRQF_ONESHOT,
+ IRQF_ONESHOT | IRQF_NO_AUTOEN,
"ar1021_i2c", ar1021);
if (error) {
dev_err(&client->dev,
return error;
}
- /* Disable the IRQ, we'll enable it in ar1021_i2c_open() */
- disable_irq(client->irq);
-
error = input_register_device(ar1021->input);
if (error) {
dev_err(&client->dev,
#include <media/v4l2-ioctl.h>
#include <media/videobuf2-v4l2.h>
#include <media/videobuf2-vmalloc.h>
+#include <dt-bindings/input/atmel-maxtouch.h>
/* Firmware files */
#define MXT_FW_NAME "maxtouch.fw"
#define MXT_CRC_TIMEOUT 1000 /* msec */
#define MXT_FW_RESET_TIME 3000 /* msec */
#define MXT_FW_CHG_TIMEOUT 300 /* msec */
+#define MXT_WAKEUP_TIME 25 /* msec */
/* Command to unlock bootloader */
#define MXT_UNLOCK_CMD_MSB 0xaa
struct mxt_dbg dbg;
struct regulator_bulk_data regulators[2];
struct gpio_desc *reset_gpio;
+ struct gpio_desc *wake_gpio;
bool use_retrigen_workaround;
/* Cached parameters from object table */
unsigned int t19_num_keys;
enum mxt_suspend_mode suspend_mode;
+
+ u32 wakeup_method;
};
struct mxt_vb2_buffer {
return mxt_bootloader_write(data, buf, sizeof(buf));
}
+static bool mxt_wakeup_toggle(struct i2c_client *client,
+ bool wake_up, bool in_i2c)
+{
+ struct mxt_data *data = i2c_get_clientdata(client);
+
+ switch (data->wakeup_method) {
+ case ATMEL_MXT_WAKEUP_I2C_SCL:
+ if (!in_i2c)
+ return false;
+ break;
+
+ case ATMEL_MXT_WAKEUP_GPIO:
+ if (in_i2c)
+ return false;
+
+ gpiod_set_value(data->wake_gpio, wake_up);
+ break;
+
+ default:
+ return false;
+ }
+
+ if (wake_up) {
+ dev_dbg(&client->dev, "waking up controller\n");
+
+ msleep(MXT_WAKEUP_TIME);
+ }
+
+ return true;
+}
+
static int __mxt_read_reg(struct i2c_client *client,
u16 reg, u16 len, void *val)
{
struct i2c_msg xfer[2];
+ bool retried = false;
u8 buf[2];
int ret;
xfer[1].len = len;
xfer[1].buf = val;
+retry:
ret = i2c_transfer(client->adapter, xfer, 2);
if (ret == 2) {
ret = 0;
+ } else if (!retried && mxt_wakeup_toggle(client, true, true)) {
+ retried = true;
+ goto retry;
} else {
if (ret >= 0)
ret = -EIO;
static int __mxt_write_reg(struct i2c_client *client, u16 reg, u16 len,
const void *val)
{
+ bool retried = false;
u8 *buf;
size_t count;
int ret;
buf[1] = (reg >> 8) & 0xff;
memcpy(&buf[2], val, len);
+retry:
ret = i2c_master_send(client, buf, count);
if (ret == count) {
ret = 0;
+ } else if (!retried && mxt_wakeup_toggle(client, true, true)) {
+ retried = true;
+ goto retry;
} else {
if (ret >= 0)
ret = -EIO;
static void mxt_start(struct mxt_data *data)
{
+ mxt_wakeup_toggle(data->client, true, false);
+
switch (data->suspend_mode) {
case MXT_SUSPEND_T9_CTRL:
mxt_soft_reset(data);
mxt_set_t7_power_cfg(data, MXT_POWER_CFG_DEEPSLEEP);
break;
}
+
+ mxt_wakeup_toggle(data->client, false, false);
}
static int mxt_input_open(struct input_dev *dev)
return error;
}
+ /* Request the WAKE line as asserted so we go out of sleep */
+ data->wake_gpio = devm_gpiod_get_optional(&client->dev,
+ "wake", GPIOD_OUT_HIGH);
+ if (IS_ERR(data->wake_gpio)) {
+ error = PTR_ERR(data->wake_gpio);
+ dev_err(&client->dev, "Failed to get wake gpio: %d\n", error);
+ return error;
+ }
+
error = devm_request_threaded_irq(&client->dev, client->irq,
- NULL, mxt_interrupt, IRQF_ONESHOT,
+ NULL, mxt_interrupt,
+ IRQF_ONESHOT | IRQF_NO_AUTOEN,
client->name, data);
if (error) {
dev_err(&client->dev, "Failed to register interrupt\n");
return error;
}
- disable_irq(client->irq);
-
error = regulator_bulk_enable(ARRAY_SIZE(data->regulators),
data->regulators);
if (error) {
msleep(MXT_RESET_INVALID_CHG);
}
+ /*
+ * Controllers like mXT1386 have a dedicated WAKE line that could be
+ * connected to a GPIO or to I2C SCL pin, or permanently asserted low.
+ *
+ * This WAKE line is used for waking controller from a deep-sleep and
+ * it needs to be asserted low for 25 milliseconds before I2C transfers
+ * could be accepted by controller if it was in a deep-sleep mode.
+ * Controller will go into sleep automatically after 2 seconds of
+ * inactivity if WAKE line is deasserted and deep sleep is activated.
+ *
+ * If WAKE line is connected to I2C SCL pin, then the first I2C transfer
+ * will get an instant NAK and transfer needs to be retried after 25ms.
+ *
+ * If WAKE line is connected to a GPIO line, the line must be asserted
+ * 25ms before the host attempts to communicate with the controller.
+ */
+ device_property_read_u32(&client->dev, "atmel,wakeup-method",
+ &data->wakeup_method);
+
error = mxt_initialize(data);
if (error)
goto err_disable_regulators;
input_set_drvdata(in_dev, bu21029);
- irq_set_status_flags(client->irq, IRQ_NOAUTOEN);
error = devm_request_threaded_irq(&client->dev, client->irq,
NULL, bu21029_touch_soft_irq,
- IRQF_ONESHOT, DRIVER_NAME, bu21029);
+ IRQF_ONESHOT | IRQF_NO_AUTOEN,
+ DRIVER_NAME, bu21029);
if (error) {
dev_err(&client->dev,
"unable to request touch irq: %d\n", error);
static void cyttsp_hard_reset(struct cyttsp *ts)
{
if (ts->reset_gpio) {
+ /*
+ * According to the CY8CTMA340 datasheet page 21, the external
+ * reset pulse width should be >= 1 ms. The datasheet does not
+ * specify how long we have to wait after reset but a vendor
+ * tree specifies 5 ms here.
+ */
gpiod_set_value_cansleep(ts->reset_gpio, 1);
- msleep(CY_DELAY_DFLT);
+ usleep_range(1000, 2000);
gpiod_set_value_cansleep(ts->reset_gpio, 0);
- msleep(CY_DELAY_DFLT);
+ usleep_range(5000, 6000);
}
}
static int cyttsp_soft_reset(struct cyttsp *ts)
{
- unsigned long timeout;
int retval;
/* wait for interrupt to set ready completion */
enable_irq(ts->irq);
retval = ttsp_send_command(ts, CY_SOFT_RESET_MODE);
- if (retval)
+ if (retval) {
+ dev_err(ts->dev, "failed to send soft reset\n");
goto out;
+ }
- timeout = wait_for_completion_timeout(&ts->bl_ready,
- msecs_to_jiffies(CY_DELAY_DFLT * CY_DELAY_MAX));
- retval = timeout ? 0 : -EIO;
+ if (!wait_for_completion_timeout(&ts->bl_ready,
+ msecs_to_jiffies(CY_DELAY_DFLT * CY_DELAY_MAX))) {
+ dev_err(ts->dev, "timeout waiting for soft reset\n");
+ retval = -EIO;
+ }
out:
ts->state = CY_IDLE_STATE;
if (GET_BOOTLOADERMODE(ts->bl_data.bl_status) &&
IS_VALID_APP(ts->bl_data.bl_status)) {
error = cyttsp_exit_bl_mode(ts);
- if (error)
+ if (error) {
+ dev_err(ts->dev, "failed to exit bootloader mode\n");
return error;
+ }
}
if (GET_HSTMODE(ts->bl_data.bl_file) != CY_OPERATE_MODE ||
return ERR_PTR(error);
init_completion(&ts->bl_ready);
- snprintf(ts->phys, sizeof(ts->phys), "%s/input0", dev_name(dev));
input_dev->name = "Cypress TTSP TouchScreen";
- input_dev->phys = ts->phys;
input_dev->id.bustype = bus_ops->bustype;
input_dev->dev.parent = ts->dev;
input_set_capability(input_dev, EV_ABS, ABS_MT_POSITION_X);
input_set_capability(input_dev, EV_ABS, ABS_MT_POSITION_Y);
+ /* One byte for width 0..255 so this is the limit */
+ input_set_abs_params(input_dev, ABS_MT_TOUCH_MAJOR, 0, 255, 0, 0);
+
touchscreen_parse_properties(input_dev, true, NULL);
- error = input_mt_init_slots(input_dev, CY_MAX_ID, 0);
+ error = input_mt_init_slots(input_dev, CY_MAX_ID, INPUT_MT_DIRECT);
if (error) {
dev_err(dev, "Unable to init MT slots.\n");
return ERR_PTR(error);
}
error = devm_request_threaded_irq(dev, ts->irq, NULL, cyttsp_irq,
- IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
+ IRQF_TRIGGER_FALLING | IRQF_ONESHOT |
+ IRQF_NO_AUTOEN,
"cyttsp", ts);
if (error) {
dev_err(ts->dev, "failed to request IRQ %d, err: %d\n",
return ERR_PTR(error);
}
- disable_irq(ts->irq);
-
cyttsp_hard_reset(ts);
error = cyttsp_power_on(ts);
struct device *dev;
int irq;
struct input_dev *input;
- char phys[32];
const struct cyttsp_bus_ops *bus_ops;
struct cyttsp_bootloader_data bl_data;
struct cyttsp_sysinfo_data sysinfo_data;
#include <linux/of.h>
#include <linux/gpio/consumer.h>
#include <linux/regulator/consumer.h>
+#include <linux/uuid.h>
#include <asm/unaligned.h>
/* Device, Driver information */
}
}
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id i2c_hid_ids[] = {
+ {"ACPI0C50", 0 },
+ {"PNP0C50", 0 },
+ { },
+};
+
+static const guid_t i2c_hid_guid =
+ GUID_INIT(0x3CDFF6F7, 0x4267, 0x4555,
+ 0xAD, 0x05, 0xB3, 0x0A, 0x3D, 0x89, 0x38, 0xDE);
+
+static bool elants_acpi_is_hid_device(struct device *dev)
+{
+ acpi_handle handle = ACPI_HANDLE(dev);
+ union acpi_object *obj;
+
+ if (acpi_match_device_ids(ACPI_COMPANION(dev), i2c_hid_ids))
+ return false;
+
+ obj = acpi_evaluate_dsm_typed(handle, &i2c_hid_guid, 1, 1, NULL, ACPI_TYPE_INTEGER);
+ if (obj) {
+ ACPI_FREE(obj);
+ return true;
+ }
+
+ return false;
+}
+#else
+static bool elants_acpi_is_hid_device(struct device *dev)
+{
+ return false;
+}
+#endif
+
static int elants_i2c_probe(struct i2c_client *client,
const struct i2c_device_id *id)
{
unsigned long irqflags;
int error;
+ /* Don't bind to i2c-hid compatible devices, these are handled by the i2c-hid drv. */
+ if (elants_acpi_is_hid_device(&client->dev)) {
+ dev_warn(&client->dev, "This device appears to be an I2C-HID device, not binding\n");
+ return -ENODEV;
+ }
+
if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
- dev_err(&client->dev,
- "%s: i2c check functionality error\n", DEVICE_NAME);
+ dev_err(&client->dev, "I2C check functionality error\n");
return -ENXIO;
}
#define EXC3000_NUM_SLOTS 10
#define EXC3000_SLOTS_PER_FRAME 5
#define EXC3000_LEN_FRAME 66
+#define EXC3000_LEN_VENDOR_REQUEST 68
#define EXC3000_LEN_POINT 10
#define EXC3000_LEN_MODEL_NAME 16
#define EXC3000_LEN_FW_VERSION 16
+#define EXC3000_VENDOR_EVENT 0x03
#define EXC3000_MT1_EVENT 0x06
#define EXC3000_MT2_EVENT 0x18
u8 buf[2 * EXC3000_LEN_FRAME];
struct completion wait_event;
struct mutex query_lock;
- int query_result;
- char model[EXC3000_LEN_MODEL_NAME];
- char fw_version[EXC3000_LEN_FW_VERSION];
};
static void exc3000_report_slots(struct input_dev *input,
input_sync(data->input);
}
+static inline void exc3000_schedule_timer(struct exc3000_data *data)
+{
+ mod_timer(&data->timer, jiffies + msecs_to_jiffies(EXC3000_TIMEOUT_MS));
+}
+
static int exc3000_read_frame(struct exc3000_data *data, u8 *buf)
{
struct i2c_client *client = data->client;
- u8 expected_event = EXC3000_MT1_EVENT;
int ret;
- if (data->info->max_xy == SZ_16K - 1)
- expected_event = EXC3000_MT2_EVENT;
-
ret = i2c_master_send(client, "'", 2);
if (ret < 0)
return ret;
if (get_unaligned_le16(buf) != EXC3000_LEN_FRAME)
return -EINVAL;
- if (buf[2] != expected_event)
- return -EINVAL;
-
return 0;
}
-static int exc3000_read_data(struct exc3000_data *data,
- u8 *buf, int *n_slots)
+static int exc3000_handle_mt_event(struct exc3000_data *data)
{
- int error;
-
- error = exc3000_read_frame(data, buf);
- if (error)
- return error;
+ struct input_dev *input = data->input;
+ int ret, total_slots;
+ u8 *buf = data->buf;
- *n_slots = buf[3];
- if (!*n_slots || *n_slots > EXC3000_NUM_SLOTS)
- return -EINVAL;
+ total_slots = buf[3];
+ if (!total_slots || total_slots > EXC3000_NUM_SLOTS) {
+ ret = -EINVAL;
+ goto out_fail;
+ }
- if (*n_slots > EXC3000_SLOTS_PER_FRAME) {
+ if (total_slots > EXC3000_SLOTS_PER_FRAME) {
/* Read 2nd frame to get the rest of the contacts. */
- error = exc3000_read_frame(data, buf + EXC3000_LEN_FRAME);
- if (error)
- return error;
+ ret = exc3000_read_frame(data, buf + EXC3000_LEN_FRAME);
+ if (ret)
+ goto out_fail;
/* 2nd chunk must have number of contacts set to 0. */
- if (buf[EXC3000_LEN_FRAME + 3] != 0)
- return -EINVAL;
+ if (buf[EXC3000_LEN_FRAME + 3] != 0) {
+ ret = -EINVAL;
+ goto out_fail;
+ }
}
- return 0;
-}
-
-static int exc3000_query_interrupt(struct exc3000_data *data)
-{
- u8 *buf = data->buf;
- int error;
+ /*
+ * We read full state successfully, no contacts will be "stuck".
+ */
+ del_timer_sync(&data->timer);
- error = i2c_master_recv(data->client, buf, EXC3000_LEN_FRAME);
- if (error < 0)
- return error;
+ while (total_slots > 0) {
+ int slots = min(total_slots, EXC3000_SLOTS_PER_FRAME);
- if (buf[0] != 'B')
- return -EPROTO;
+ exc3000_report_slots(input, &data->prop, buf + 4, slots);
+ total_slots -= slots;
+ buf += EXC3000_LEN_FRAME;
+ }
- if (buf[4] == 'E')
- strlcpy(data->model, buf + 5, sizeof(data->model));
- else if (buf[4] == 'D')
- strlcpy(data->fw_version, buf + 5, sizeof(data->fw_version));
- else
- return -EPROTO;
+ input_mt_sync_frame(input);
+ input_sync(input);
return 0;
+
+out_fail:
+ /* Schedule a timer to release "stuck" contacts */
+ exc3000_schedule_timer(data);
+
+ return ret;
}
static irqreturn_t exc3000_interrupt(int irq, void *dev_id)
{
struct exc3000_data *data = dev_id;
- struct input_dev *input = data->input;
u8 *buf = data->buf;
- int slots, total_slots;
- int error;
-
- if (mutex_is_locked(&data->query_lock)) {
- data->query_result = exc3000_query_interrupt(data);
- complete(&data->wait_event);
- goto out;
- }
+ int ret;
- error = exc3000_read_data(data, buf, &total_slots);
- if (error) {
+ ret = exc3000_read_frame(data, buf);
+ if (ret) {
/* Schedule a timer to release "stuck" contacts */
- mod_timer(&data->timer,
- jiffies + msecs_to_jiffies(EXC3000_TIMEOUT_MS));
+ exc3000_schedule_timer(data);
goto out;
}
- /*
- * We read full state successfully, no contacts will be "stuck".
- */
- del_timer_sync(&data->timer);
+ switch (buf[2]) {
+ case EXC3000_VENDOR_EVENT:
+ complete(&data->wait_event);
+ break;
- while (total_slots > 0) {
- slots = min(total_slots, EXC3000_SLOTS_PER_FRAME);
- exc3000_report_slots(input, &data->prop, buf + 4, slots);
- total_slots -= slots;
- buf += EXC3000_LEN_FRAME;
- }
+ case EXC3000_MT1_EVENT:
+ case EXC3000_MT2_EVENT:
+ exc3000_handle_mt_event(data);
+ break;
- input_mt_sync_frame(input);
- input_sync(input);
+ default:
+ break;
+ }
out:
return IRQ_HANDLED;
}
-static ssize_t fw_version_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static int exc3000_vendor_data_request(struct exc3000_data *data, u8 *request,
+ u8 request_len, u8 *response, int timeout)
{
- struct i2c_client *client = to_i2c_client(dev);
- struct exc3000_data *data = i2c_get_clientdata(client);
- static const u8 request[68] = {
- 0x67, 0x00, 0x42, 0x00, 0x03, 0x01, 'D', 0x00
- };
- int error;
+ u8 buf[EXC3000_LEN_VENDOR_REQUEST] = { 0x67, 0x00, 0x42, 0x00, 0x03 };
+ int ret;
mutex_lock(&data->query_lock);
- data->query_result = -ETIMEDOUT;
reinit_completion(&data->wait_event);
- error = i2c_master_send(client, request, sizeof(request));
- if (error < 0) {
- mutex_unlock(&data->query_lock);
- return error;
+ buf[5] = request_len;
+ memcpy(&buf[6], request, request_len);
+
+ ret = i2c_master_send(data->client, buf, EXC3000_LEN_VENDOR_REQUEST);
+ if (ret < 0)
+ goto out_unlock;
+
+ if (response) {
+ ret = wait_for_completion_timeout(&data->wait_event,
+ timeout * HZ);
+ if (ret <= 0) {
+ ret = -ETIMEDOUT;
+ goto out_unlock;
+ }
+
+ if (data->buf[3] >= EXC3000_LEN_FRAME) {
+ ret = -ENOSPC;
+ goto out_unlock;
+ }
+
+ memcpy(response, &data->buf[4], data->buf[3]);
+ ret = data->buf[3];
}
- wait_for_completion_interruptible_timeout(&data->wait_event, 1 * HZ);
+out_unlock:
mutex_unlock(&data->query_lock);
- if (data->query_result < 0)
- return data->query_result;
-
- return sprintf(buf, "%s\n", data->fw_version);
+ return ret;
}
-static DEVICE_ATTR_RO(fw_version);
-static ssize_t exc3000_get_model(struct exc3000_data *data)
+static ssize_t fw_version_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- static const u8 request[68] = {
- 0x67, 0x00, 0x42, 0x00, 0x03, 0x01, 'E', 0x00
- };
- struct i2c_client *client = data->client;
- int error;
+ struct i2c_client *client = to_i2c_client(dev);
+ struct exc3000_data *data = i2c_get_clientdata(client);
+ u8 response[EXC3000_LEN_FRAME];
+ int ret;
- mutex_lock(&data->query_lock);
- data->query_result = -ETIMEDOUT;
- reinit_completion(&data->wait_event);
+ /* query bootloader info */
+ ret = exc3000_vendor_data_request(data,
+ (u8[]){0x39, 0x02}, 2, response, 1);
+ if (ret < 0)
+ return ret;
- error = i2c_master_send(client, request, sizeof(request));
- if (error < 0) {
- mutex_unlock(&data->query_lock);
- return error;
- }
+ /*
+ * If the bootloader version is non-zero then the device is in
+ * bootloader mode and won't answer a query for the application FW
+ * version, so we just use the bootloader version info.
+ */
+ if (response[2] || response[3])
+ return sprintf(buf, "%d.%d\n", response[2], response[3]);
- wait_for_completion_interruptible_timeout(&data->wait_event, 1 * HZ);
- mutex_unlock(&data->query_lock);
+ ret = exc3000_vendor_data_request(data, (u8[]){'D'}, 1, response, 1);
+ if (ret < 0)
+ return ret;
- return data->query_result;
+ return sprintf(buf, "%s\n", &response[1]);
}
+static DEVICE_ATTR_RO(fw_version);
static ssize_t model_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct i2c_client *client = to_i2c_client(dev);
struct exc3000_data *data = i2c_get_clientdata(client);
- int error;
+ u8 response[EXC3000_LEN_FRAME];
+ int ret;
- error = exc3000_get_model(data);
- if (error < 0)
- return error;
+ ret = exc3000_vendor_data_request(data, (u8[]){'E'}, 1, response, 1);
+ if (ret < 0)
+ return ret;
- return sprintf(buf, "%s\n", data->model);
+ return sprintf(buf, "%s\n", &response[1]);
}
static DEVICE_ATTR_RO(model);
+static ssize_t type_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct exc3000_data *data = i2c_get_clientdata(client);
+ u8 response[EXC3000_LEN_FRAME];
+ int ret;
+
+ ret = exc3000_vendor_data_request(data, (u8[]){'F'}, 1, response, 1);
+ if (ret < 0)
+ return ret;
+
+ return sprintf(buf, "%s\n", &response[1]);
+}
+static DEVICE_ATTR_RO(type);
+
static struct attribute *sysfs_attrs[] = {
&dev_attr_fw_version.attr,
&dev_attr_model.attr,
+ &dev_attr_type.attr,
NULL
};
* or two touch events anyways).
*/
for (retry = 0; retry < 3; retry++) {
- error = exc3000_get_model(data);
- if (!error)
+ u8 response[EXC3000_LEN_FRAME];
+
+ error = exc3000_vendor_data_request(data, (u8[]){'E'}, 1,
+ response, 1);
+ if (error > 0) {
+ dev_dbg(&client->dev, "TS Model: %s", &response[1]);
+ error = 0;
break;
+ }
dev_warn(&client->dev, "Retry %d get EETI EXC3000 model: %d\n",
retry + 1, error);
}
if (error)
return error;
- dev_dbg(&client->dev, "TS Model: %s", data->model);
-
i2c_set_clientdata(client, data);
error = devm_device_add_group(&client->dev, &exc3000_attribute_group);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021
+ * Author(s): Giulio Benetti <giulio.benetti@benettiengineering.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/input.h>
+#include <linux/input/mt.h>
+#include <linux/input/touchscreen.h>
+#include <linux/irq.h>
+#include <linux/regulator/consumer.h>
+#include <linux/regmap.h>
+
+#include <asm/unaligned.h>
+
+#define HY46XX_CHKSUM_CODE 0x1
+#define HY46XX_FINGER_NUM 0x2
+#define HY46XX_CHKSUM_LEN 0x7
+#define HY46XX_THRESHOLD 0x80
+#define HY46XX_GLOVE_EN 0x84
+#define HY46XX_REPORT_SPEED 0x88
+#define HY46XX_PWR_NOISE_EN 0x89
+#define HY46XX_FILTER_DATA 0x8A
+#define HY46XX_GAIN 0x92
+#define HY46XX_EDGE_OFFSET 0x93
+#define HY46XX_RX_NR_USED 0x94
+#define HY46XX_TX_NR_USED 0x95
+#define HY46XX_PWR_MODE 0xA5
+#define HY46XX_FW_VERSION 0xA6
+#define HY46XX_LIB_VERSION 0xA7
+#define HY46XX_TP_INFO 0xA8
+#define HY46XX_TP_CHIP_ID 0xA9
+#define HY46XX_BOOT_VER 0xB0
+
+#define HY46XX_TPLEN 0x6
+#define HY46XX_REPORT_PKT_LEN 0x44
+
+#define HY46XX_MAX_SUPPORTED_POINTS 11
+
+#define TOUCH_EVENT_DOWN 0x00
+#define TOUCH_EVENT_UP 0x01
+#define TOUCH_EVENT_CONTACT 0x02
+#define TOUCH_EVENT_RESERVED 0x03
+
+struct hycon_hy46xx_data {
+ struct i2c_client *client;
+ struct input_dev *input;
+ struct touchscreen_properties prop;
+ struct regulator *vcc;
+
+ struct gpio_desc *reset_gpio;
+
+ struct mutex mutex;
+ struct regmap *regmap;
+
+ int threshold;
+ bool glove_enable;
+ int report_speed;
+ bool noise_filter_enable;
+ int filter_data;
+ int gain;
+ int edge_offset;
+ int rx_number_used;
+ int tx_number_used;
+ int power_mode;
+ int fw_version;
+ int lib_version;
+ int tp_information;
+ int tp_chip_id;
+ int bootloader_version;
+};
+
+static const struct regmap_config hycon_hy46xx_i2c_regmap_config = {
+ .reg_bits = 8,
+ .val_bits = 8,
+};
+
+static bool hycon_hy46xx_check_checksum(struct hycon_hy46xx_data *tsdata, u8 *buf)
+{
+ u8 chksum = 0;
+ int i;
+
+ for (i = 2; i < buf[HY46XX_CHKSUM_LEN]; i++)
+ chksum += buf[i];
+
+ if (chksum == buf[HY46XX_CHKSUM_CODE])
+ return true;
+
+ dev_err_ratelimited(&tsdata->client->dev,
+ "checksum error: 0x%02x expected, got 0x%02x\n",
+ chksum, buf[HY46XX_CHKSUM_CODE]);
+
+ return false;
+}
+
+static irqreturn_t hycon_hy46xx_isr(int irq, void *dev_id)
+{
+ struct hycon_hy46xx_data *tsdata = dev_id;
+ struct device *dev = &tsdata->client->dev;
+ u8 rdbuf[HY46XX_REPORT_PKT_LEN];
+ int i, x, y, id;
+ int error;
+
+ memset(rdbuf, 0, sizeof(rdbuf));
+
+ error = regmap_bulk_read(tsdata->regmap, 0, rdbuf, sizeof(rdbuf));
+ if (error) {
+ dev_err_ratelimited(dev, "Unable to fetch data, error: %d\n",
+ error);
+ goto out;
+ }
+
+ if (!hycon_hy46xx_check_checksum(tsdata, rdbuf))
+ goto out;
+
+ for (i = 0; i < HY46XX_MAX_SUPPORTED_POINTS; i++) {
+ u8 *buf = &rdbuf[3 + (HY46XX_TPLEN * i)];
+ int type = buf[0] >> 6;
+
+ if (type == TOUCH_EVENT_RESERVED)
+ continue;
+
+ x = get_unaligned_be16(buf) & 0x0fff;
+ y = get_unaligned_be16(buf + 2) & 0x0fff;
+
+ id = buf[2] >> 4;
+
+ input_mt_slot(tsdata->input, id);
+ if (input_mt_report_slot_state(tsdata->input, MT_TOOL_FINGER,
+ type != TOUCH_EVENT_UP))
+ touchscreen_report_pos(tsdata->input, &tsdata->prop,
+ x, y, true);
+ }
+
+ input_mt_report_pointer_emulation(tsdata->input, false);
+ input_sync(tsdata->input);
+
+out:
+ return IRQ_HANDLED;
+}
+
+struct hycon_hy46xx_attribute {
+ struct device_attribute dattr;
+ size_t field_offset;
+ u8 address;
+ u8 limit_low;
+ u8 limit_high;
+};
+
+#define HYCON_ATTR_U8(_field, _mode, _address, _limit_low, _limit_high) \
+ struct hycon_hy46xx_attribute hycon_hy46xx_attr_##_field = { \
+ .dattr = __ATTR(_field, _mode, \
+ hycon_hy46xx_setting_show, \
+ hycon_hy46xx_setting_store), \
+ .field_offset = offsetof(struct hycon_hy46xx_data, _field), \
+ .address = _address, \
+ .limit_low = _limit_low, \
+ .limit_high = _limit_high, \
+ }
+
+#define HYCON_ATTR_BOOL(_field, _mode, _address) \
+ struct hycon_hy46xx_attribute hycon_hy46xx_attr_##_field = { \
+ .dattr = __ATTR(_field, _mode, \
+ hycon_hy46xx_setting_show, \
+ hycon_hy46xx_setting_store), \
+ .field_offset = offsetof(struct hycon_hy46xx_data, _field), \
+ .address = _address, \
+ .limit_low = false, \
+ .limit_high = true, \
+ }
+
+static ssize_t hycon_hy46xx_setting_show(struct device *dev,
+ struct device_attribute *dattr, char *buf)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct hycon_hy46xx_data *tsdata = i2c_get_clientdata(client);
+ struct hycon_hy46xx_attribute *attr =
+ container_of(dattr, struct hycon_hy46xx_attribute, dattr);
+ u8 *field = (u8 *)tsdata + attr->field_offset;
+ size_t count = 0;
+ int error = 0;
+ int val;
+
+ mutex_lock(&tsdata->mutex);
+
+ error = regmap_read(tsdata->regmap, attr->address, &val);
+ if (error < 0) {
+ dev_err(&tsdata->client->dev,
+ "Failed to fetch attribute %s, error %d\n",
+ dattr->attr.name, error);
+ goto out;
+ }
+
+ if (val != *field) {
+ dev_warn(&tsdata->client->dev,
+ "%s: read (%d) and stored value (%d) differ\n",
+ dattr->attr.name, val, *field);
+ *field = val;
+ }
+
+ count = scnprintf(buf, PAGE_SIZE, "%d\n", val);
+
+out:
+ mutex_unlock(&tsdata->mutex);
+ return error ?: count;
+}
+
+static ssize_t hycon_hy46xx_setting_store(struct device *dev,
+ struct device_attribute *dattr,
+ const char *buf, size_t count)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct hycon_hy46xx_data *tsdata = i2c_get_clientdata(client);
+ struct hycon_hy46xx_attribute *attr =
+ container_of(dattr, struct hycon_hy46xx_attribute, dattr);
+ u8 *field = (u8 *)tsdata + attr->field_offset;
+ unsigned int val;
+ int error;
+
+ mutex_lock(&tsdata->mutex);
+
+ error = kstrtouint(buf, 0, &val);
+ if (error)
+ goto out;
+
+ if (val < attr->limit_low || val > attr->limit_high) {
+ error = -ERANGE;
+ goto out;
+ }
+
+ error = regmap_write(tsdata->regmap, attr->address, val);
+ if (error < 0) {
+ dev_err(&tsdata->client->dev,
+ "Failed to update attribute %s, error: %d\n",
+ dattr->attr.name, error);
+ goto out;
+ }
+ *field = val;
+
+out:
+ mutex_unlock(&tsdata->mutex);
+ return error ?: count;
+}
+
+static HYCON_ATTR_U8(threshold, 0644, HY46XX_THRESHOLD, 0, 255);
+static HYCON_ATTR_BOOL(glove_enable, 0644, HY46XX_GLOVE_EN);
+static HYCON_ATTR_U8(report_speed, 0644, HY46XX_REPORT_SPEED, 0, 255);
+static HYCON_ATTR_BOOL(noise_filter_enable, 0644, HY46XX_PWR_NOISE_EN);
+static HYCON_ATTR_U8(filter_data, 0644, HY46XX_FILTER_DATA, 0, 5);
+static HYCON_ATTR_U8(gain, 0644, HY46XX_GAIN, 0, 5);
+static HYCON_ATTR_U8(edge_offset, 0644, HY46XX_EDGE_OFFSET, 0, 5);
+static HYCON_ATTR_U8(fw_version, 0444, HY46XX_FW_VERSION, 0, 255);
+static HYCON_ATTR_U8(lib_version, 0444, HY46XX_LIB_VERSION, 0, 255);
+static HYCON_ATTR_U8(tp_information, 0444, HY46XX_TP_INFO, 0, 255);
+static HYCON_ATTR_U8(tp_chip_id, 0444, HY46XX_TP_CHIP_ID, 0, 255);
+static HYCON_ATTR_U8(bootloader_version, 0444, HY46XX_BOOT_VER, 0, 255);
+
+static struct attribute *hycon_hy46xx_attrs[] = {
+ &hycon_hy46xx_attr_threshold.dattr.attr,
+ &hycon_hy46xx_attr_glove_enable.dattr.attr,
+ &hycon_hy46xx_attr_report_speed.dattr.attr,
+ &hycon_hy46xx_attr_noise_filter_enable.dattr.attr,
+ &hycon_hy46xx_attr_filter_data.dattr.attr,
+ &hycon_hy46xx_attr_gain.dattr.attr,
+ &hycon_hy46xx_attr_edge_offset.dattr.attr,
+ &hycon_hy46xx_attr_fw_version.dattr.attr,
+ &hycon_hy46xx_attr_lib_version.dattr.attr,
+ &hycon_hy46xx_attr_tp_information.dattr.attr,
+ &hycon_hy46xx_attr_tp_chip_id.dattr.attr,
+ &hycon_hy46xx_attr_bootloader_version.dattr.attr,
+ NULL
+};
+
+static const struct attribute_group hycon_hy46xx_attr_group = {
+ .attrs = hycon_hy46xx_attrs,
+};
+
+static void hycon_hy46xx_get_defaults(struct device *dev, struct hycon_hy46xx_data *tsdata)
+{
+ bool val_bool;
+ int error;
+ u32 val;
+
+ error = device_property_read_u32(dev, "hycon,threshold", &val);
+ if (!error) {
+ error = regmap_write(tsdata->regmap, HY46XX_THRESHOLD, val);
+ if (error < 0)
+ goto out;
+
+ tsdata->threshold = val;
+ }
+
+ val_bool = device_property_read_bool(dev, "hycon,glove-enable");
+ error = regmap_write(tsdata->regmap, HY46XX_GLOVE_EN, val_bool);
+ if (error < 0)
+ goto out;
+ tsdata->glove_enable = val_bool;
+
+ error = device_property_read_u32(dev, "hycon,report-speed-hz", &val);
+ if (!error) {
+ error = regmap_write(tsdata->regmap, HY46XX_REPORT_SPEED, val);
+ if (error < 0)
+ goto out;
+
+ tsdata->report_speed = val;
+ }
+
+ val_bool = device_property_read_bool(dev, "hycon,noise-filter-enable");
+ error = regmap_write(tsdata->regmap, HY46XX_PWR_NOISE_EN, val_bool);
+ if (error < 0)
+ goto out;
+ tsdata->noise_filter_enable = val_bool;
+
+ error = device_property_read_u32(dev, "hycon,filter-data", &val);
+ if (!error) {
+ error = regmap_write(tsdata->regmap, HY46XX_FILTER_DATA, val);
+ if (error < 0)
+ goto out;
+
+ tsdata->filter_data = val;
+ }
+
+ error = device_property_read_u32(dev, "hycon,gain", &val);
+ if (!error) {
+ error = regmap_write(tsdata->regmap, HY46XX_GAIN, val);
+ if (error < 0)
+ goto out;
+
+ tsdata->gain = val;
+ }
+
+ error = device_property_read_u32(dev, "hycon,edge-offset", &val);
+ if (!error) {
+ error = regmap_write(tsdata->regmap, HY46XX_EDGE_OFFSET, val);
+ if (error < 0)
+ goto out;
+
+ tsdata->edge_offset = val;
+ }
+
+ return;
+out:
+ dev_err(&tsdata->client->dev, "Failed to set default settings");
+}
+
+static void hycon_hy46xx_get_parameters(struct hycon_hy46xx_data *tsdata)
+{
+ int error;
+ u32 val;
+
+ error = regmap_read(tsdata->regmap, HY46XX_THRESHOLD, &val);
+ if (error < 0)
+ goto out;
+ tsdata->threshold = val;
+
+ error = regmap_read(tsdata->regmap, HY46XX_GLOVE_EN, &val);
+ if (error < 0)
+ goto out;
+ tsdata->glove_enable = val;
+
+ error = regmap_read(tsdata->regmap, HY46XX_REPORT_SPEED, &val);
+ if (error < 0)
+ goto out;
+ tsdata->report_speed = val;
+
+ error = regmap_read(tsdata->regmap, HY46XX_PWR_NOISE_EN, &val);
+ if (error < 0)
+ goto out;
+ tsdata->noise_filter_enable = val;
+
+ error = regmap_read(tsdata->regmap, HY46XX_FILTER_DATA, &val);
+ if (error < 0)
+ goto out;
+ tsdata->filter_data = val;
+
+ error = regmap_read(tsdata->regmap, HY46XX_GAIN, &val);
+ if (error < 0)
+ goto out;
+ tsdata->gain = val;
+
+ error = regmap_read(tsdata->regmap, HY46XX_EDGE_OFFSET, &val);
+ if (error < 0)
+ goto out;
+ tsdata->edge_offset = val;
+
+ error = regmap_read(tsdata->regmap, HY46XX_RX_NR_USED, &val);
+ if (error < 0)
+ goto out;
+ tsdata->rx_number_used = val;
+
+ error = regmap_read(tsdata->regmap, HY46XX_TX_NR_USED, &val);
+ if (error < 0)
+ goto out;
+ tsdata->tx_number_used = val;
+
+ error = regmap_read(tsdata->regmap, HY46XX_PWR_MODE, &val);
+ if (error < 0)
+ goto out;
+ tsdata->power_mode = val;
+
+ error = regmap_read(tsdata->regmap, HY46XX_FW_VERSION, &val);
+ if (error < 0)
+ goto out;
+ tsdata->fw_version = val;
+
+ error = regmap_read(tsdata->regmap, HY46XX_LIB_VERSION, &val);
+ if (error < 0)
+ goto out;
+ tsdata->lib_version = val;
+
+ error = regmap_read(tsdata->regmap, HY46XX_TP_INFO, &val);
+ if (error < 0)
+ goto out;
+ tsdata->tp_information = val;
+
+ error = regmap_read(tsdata->regmap, HY46XX_TP_CHIP_ID, &val);
+ if (error < 0)
+ goto out;
+ tsdata->tp_chip_id = val;
+
+ error = regmap_read(tsdata->regmap, HY46XX_BOOT_VER, &val);
+ if (error < 0)
+ goto out;
+ tsdata->bootloader_version = val;
+
+ return;
+out:
+ dev_err(&tsdata->client->dev, "Failed to read default settings");
+}
+
+static void hycon_hy46xx_disable_regulator(void *arg)
+{
+ struct hycon_hy46xx_data *data = arg;
+
+ regulator_disable(data->vcc);
+}
+
+static int hycon_hy46xx_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct hycon_hy46xx_data *tsdata;
+ struct input_dev *input;
+ int error;
+
+ dev_dbg(&client->dev, "probing for HYCON HY46XX I2C\n");
+
+ tsdata = devm_kzalloc(&client->dev, sizeof(*tsdata), GFP_KERNEL);
+ if (!tsdata)
+ return -ENOMEM;
+
+ tsdata->vcc = devm_regulator_get(&client->dev, "vcc");
+ if (IS_ERR(tsdata->vcc)) {
+ error = PTR_ERR(tsdata->vcc);
+ if (error != -EPROBE_DEFER)
+ dev_err(&client->dev,
+ "failed to request regulator: %d\n", error);
+ return error;
+ }
+
+ error = regulator_enable(tsdata->vcc);
+ if (error < 0) {
+ dev_err(&client->dev, "failed to enable vcc: %d\n", error);
+ return error;
+ }
+
+ error = devm_add_action_or_reset(&client->dev,
+ hycon_hy46xx_disable_regulator,
+ tsdata);
+ if (error)
+ return error;
+
+ tsdata->reset_gpio = devm_gpiod_get_optional(&client->dev,
+ "reset", GPIOD_OUT_LOW);
+ if (IS_ERR(tsdata->reset_gpio)) {
+ error = PTR_ERR(tsdata->reset_gpio);
+ dev_err(&client->dev,
+ "Failed to request GPIO reset pin, error %d\n", error);
+ return error;
+ }
+
+ if (tsdata->reset_gpio) {
+ usleep_range(5000, 6000);
+ gpiod_set_value_cansleep(tsdata->reset_gpio, 1);
+ usleep_range(5000, 6000);
+ gpiod_set_value_cansleep(tsdata->reset_gpio, 0);
+ msleep(1000);
+ }
+
+ input = devm_input_allocate_device(&client->dev);
+ if (!input) {
+ dev_err(&client->dev, "failed to allocate input device.\n");
+ return -ENOMEM;
+ }
+
+ mutex_init(&tsdata->mutex);
+ tsdata->client = client;
+ tsdata->input = input;
+
+ tsdata->regmap = devm_regmap_init_i2c(client,
+ &hycon_hy46xx_i2c_regmap_config);
+ if (IS_ERR(tsdata->regmap)) {
+ dev_err(&client->dev, "regmap allocation failed\n");
+ return PTR_ERR(tsdata->regmap);
+ }
+
+ hycon_hy46xx_get_defaults(&client->dev, tsdata);
+ hycon_hy46xx_get_parameters(tsdata);
+
+ input->name = "Hycon Capacitive Touch";
+ input->id.bustype = BUS_I2C;
+ input->dev.parent = &client->dev;
+
+ input_set_abs_params(input, ABS_MT_POSITION_X, 0, -1, 0, 0);
+ input_set_abs_params(input, ABS_MT_POSITION_Y, 0, -1, 0, 0);
+
+ touchscreen_parse_properties(input, true, &tsdata->prop);
+
+ error = input_mt_init_slots(input, HY46XX_MAX_SUPPORTED_POINTS,
+ INPUT_MT_DIRECT);
+ if (error) {
+ dev_err(&client->dev, "Unable to init MT slots.\n");
+ return error;
+ }
+
+ i2c_set_clientdata(client, tsdata);
+
+ error = devm_request_threaded_irq(&client->dev, client->irq,
+ NULL, hycon_hy46xx_isr, IRQF_ONESHOT,
+ client->name, tsdata);
+ if (error) {
+ dev_err(&client->dev, "Unable to request touchscreen IRQ.\n");
+ return error;
+ }
+
+ error = devm_device_add_group(&client->dev, &hycon_hy46xx_attr_group);
+ if (error)
+ return error;
+
+ error = input_register_device(input);
+ if (error)
+ return error;
+
+ dev_dbg(&client->dev,
+ "HYCON HY46XX initialized: IRQ %d, Reset pin %d.\n",
+ client->irq,
+ tsdata->reset_gpio ? desc_to_gpio(tsdata->reset_gpio) : -1);
+
+ return 0;
+}
+
+static const struct i2c_device_id hycon_hy46xx_id[] = {
+ { .name = "hy4613" },
+ { .name = "hy4614" },
+ { .name = "hy4621" },
+ { .name = "hy4623" },
+ { .name = "hy4633" },
+ { .name = "hy4635" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(i2c, hycon_hy46xx_id);
+
+static const struct of_device_id hycon_hy46xx_of_match[] = {
+ { .compatible = "hycon,hy4613" },
+ { .compatible = "hycon,hy4614" },
+ { .compatible = "hycon,hy4621" },
+ { .compatible = "hycon,hy4623" },
+ { .compatible = "hycon,hy4633" },
+ { .compatible = "hycon,hy4635" },
+ { /* sentinel */ }
+};
+MODULE_DEVICE_TABLE(of, hycon_hy46xx_of_match);
+
+static struct i2c_driver hycon_hy46xx_driver = {
+ .driver = {
+ .name = "hycon_hy46xx",
+ .of_match_table = hycon_hy46xx_of_match,
+ .probe_type = PROBE_PREFER_ASYNCHRONOUS,
+ },
+ .id_table = hycon_hy46xx_id,
+ .probe = hycon_hy46xx_probe,
+};
+
+module_i2c_driver(hycon_hy46xx_driver);
+
+MODULE_AUTHOR("Giulio Benetti <giulio.benetti@benettiengineering.com>");
+MODULE_DESCRIPTION("HYCON HY46XX I2C Touchscreen Driver");
+MODULE_LICENSE("GPL v2");
unsigned int *x, unsigned int *y,
unsigned int *z)
{
- if (touchdata[0] & BIT(finger))
+ if (!(touchdata[0] & BIT(finger)))
return false;
*x = get_unaligned_be16(touchdata + 1 + (finger * 4) + 0);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ILITEK Touch IC driver for 23XX, 25XX and Lego series
+ *
+ * Copyright (C) 2011 ILI Technology Corporation.
+ * Copyright (C) 2020 Luca Hsu <luca_hsu@ilitek.com>
+ * Copyright (C) 2021 Joe Hung <joe_hung@ilitek.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/input.h>
+#include <linux/input/mt.h>
+#include <linux/i2c.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
+#include <linux/errno.h>
+#include <linux/acpi.h>
+#include <linux/input/touchscreen.h>
+#include <asm/unaligned.h>
+
+
+#define ILITEK_TS_NAME "ilitek_ts"
+#define BL_V1_8 0x108
+#define BL_V1_7 0x107
+#define BL_V1_6 0x106
+
+#define ILITEK_TP_CMD_GET_TP_RES 0x20
+#define ILITEK_TP_CMD_GET_SCRN_RES 0x21
+#define ILITEK_TP_CMD_SET_IC_SLEEP 0x30
+#define ILITEK_TP_CMD_SET_IC_WAKE 0x31
+#define ILITEK_TP_CMD_GET_FW_VER 0x40
+#define ILITEK_TP_CMD_GET_PRL_VER 0x42
+#define ILITEK_TP_CMD_GET_MCU_VER 0x61
+#define ILITEK_TP_CMD_GET_IC_MODE 0xC0
+
+#define REPORT_COUNT_ADDRESS 61
+#define ILITEK_SUPPORT_MAX_POINT 40
+
+struct ilitek_protocol_info {
+ u16 ver;
+ u8 ver_major;
+};
+
+struct ilitek_ts_data {
+ struct i2c_client *client;
+ struct gpio_desc *reset_gpio;
+ struct input_dev *input_dev;
+ struct touchscreen_properties prop;
+
+ const struct ilitek_protocol_map *ptl_cb_func;
+ struct ilitek_protocol_info ptl;
+
+ char product_id[30];
+ u16 mcu_ver;
+ u8 ic_mode;
+ u8 firmware_ver[8];
+
+ s32 reset_time;
+ s32 screen_max_x;
+ s32 screen_max_y;
+ s32 screen_min_x;
+ s32 screen_min_y;
+ s32 max_tp;
+};
+
+struct ilitek_protocol_map {
+ u16 cmd;
+ const char *name;
+ int (*func)(struct ilitek_ts_data *ts, u16 cmd, u8 *inbuf, u8 *outbuf);
+};
+
+enum ilitek_cmds {
+ /* common cmds */
+ GET_PTL_VER = 0,
+ GET_FW_VER,
+ GET_SCRN_RES,
+ GET_TP_RES,
+ GET_IC_MODE,
+ GET_MCU_VER,
+ SET_IC_SLEEP,
+ SET_IC_WAKE,
+
+ /* ALWAYS keep at the end */
+ MAX_CMD_CNT
+};
+
+/* ILITEK I2C R/W APIs */
+static int ilitek_i2c_write_and_read(struct ilitek_ts_data *ts,
+ u8 *cmd, int write_len, int delay,
+ u8 *data, int read_len)
+{
+ int error;
+ struct i2c_client *client = ts->client;
+ struct i2c_msg msgs[] = {
+ {
+ .addr = client->addr,
+ .flags = 0,
+ .len = write_len,
+ .buf = cmd,
+ },
+ {
+ .addr = client->addr,
+ .flags = I2C_M_RD,
+ .len = read_len,
+ .buf = data,
+ },
+ };
+
+ if (delay == 0 && write_len > 0 && read_len > 0) {
+ error = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs));
+ if (error < 0)
+ return error;
+ } else {
+ if (write_len > 0) {
+ error = i2c_transfer(client->adapter, msgs, 1);
+ if (error < 0)
+ return error;
+ }
+ if (delay > 0)
+ mdelay(delay);
+
+ if (read_len > 0) {
+ error = i2c_transfer(client->adapter, msgs + 1, 1);
+ if (error < 0)
+ return error;
+ }
+ }
+
+ return 0;
+}
+
+/* ILITEK ISR APIs */
+static void ilitek_touch_down(struct ilitek_ts_data *ts, unsigned int id,
+ unsigned int x, unsigned int y)
+{
+ struct input_dev *input = ts->input_dev;
+
+ input_mt_slot(input, id);
+ input_mt_report_slot_state(input, MT_TOOL_FINGER, true);
+
+ touchscreen_report_pos(input, &ts->prop, x, y, true);
+}
+
+static int ilitek_process_and_report_v6(struct ilitek_ts_data *ts)
+{
+ int error = 0;
+ u8 buf[512];
+ int packet_len = 5;
+ int packet_max_point = 10;
+ int report_max_point;
+ int i, count;
+ struct input_dev *input = ts->input_dev;
+ struct device *dev = &ts->client->dev;
+ unsigned int x, y, status, id;
+
+ error = ilitek_i2c_write_and_read(ts, NULL, 0, 0, buf, 64);
+ if (error) {
+ dev_err(dev, "get touch info failed, err:%d\n", error);
+ goto err_sync_frame;
+ }
+
+ report_max_point = buf[REPORT_COUNT_ADDRESS];
+ if (report_max_point > ts->max_tp) {
+ dev_err(dev, "FW report max point:%d > panel info. max:%d\n",
+ report_max_point, ts->max_tp);
+ error = -EINVAL;
+ goto err_sync_frame;
+ }
+
+ count = DIV_ROUND_UP(report_max_point, packet_max_point);
+ for (i = 1; i < count; i++) {
+ error = ilitek_i2c_write_and_read(ts, NULL, 0, 0,
+ buf + i * 64, 64);
+ if (error) {
+ dev_err(dev, "get touch info. failed, cnt:%d, err:%d\n",
+ count, error);
+ goto err_sync_frame;
+ }
+ }
+
+ for (i = 0; i < report_max_point; i++) {
+ status = buf[i * packet_len + 1] & 0x40;
+ if (!status)
+ continue;
+
+ id = buf[i * packet_len + 1] & 0x3F;
+
+ x = get_unaligned_le16(buf + i * packet_len + 2);
+ y = get_unaligned_le16(buf + i * packet_len + 4);
+
+ if (x > ts->screen_max_x || x < ts->screen_min_x ||
+ y > ts->screen_max_y || y < ts->screen_min_y) {
+ dev_warn(dev, "invalid position, X[%d,%u,%d], Y[%d,%u,%d]\n",
+ ts->screen_min_x, x, ts->screen_max_x,
+ ts->screen_min_y, y, ts->screen_max_y);
+ continue;
+ }
+
+ ilitek_touch_down(ts, id, x, y);
+ }
+
+err_sync_frame:
+ input_mt_sync_frame(input);
+ input_sync(input);
+ return error;
+}
+
+/* APIs of cmds for ILITEK Touch IC */
+static int api_protocol_set_cmd(struct ilitek_ts_data *ts,
+ u16 idx, u8 *inbuf, u8 *outbuf)
+{
+ u16 cmd;
+ int error;
+
+ if (idx >= MAX_CMD_CNT)
+ return -EINVAL;
+
+ cmd = ts->ptl_cb_func[idx].cmd;
+ error = ts->ptl_cb_func[idx].func(ts, cmd, inbuf, outbuf);
+ if (error)
+ return error;
+
+ return 0;
+}
+
+static int api_protocol_get_ptl_ver(struct ilitek_ts_data *ts,
+ u16 cmd, u8 *inbuf, u8 *outbuf)
+{
+ int error;
+ u8 buf[64];
+
+ buf[0] = cmd;
+ error = ilitek_i2c_write_and_read(ts, buf, 1, 5, outbuf, 3);
+ if (error)
+ return error;
+
+ ts->ptl.ver = get_unaligned_be16(outbuf);
+ ts->ptl.ver_major = outbuf[0];
+
+ return 0;
+}
+
+static int api_protocol_get_mcu_ver(struct ilitek_ts_data *ts,
+ u16 cmd, u8 *inbuf, u8 *outbuf)
+{
+ int error;
+ u8 buf[64];
+
+ buf[0] = cmd;
+ error = ilitek_i2c_write_and_read(ts, buf, 1, 5, outbuf, 32);
+ if (error)
+ return error;
+
+ ts->mcu_ver = get_unaligned_le16(outbuf);
+ memset(ts->product_id, 0, sizeof(ts->product_id));
+ memcpy(ts->product_id, outbuf + 6, 26);
+
+ return 0;
+}
+
+static int api_protocol_get_fw_ver(struct ilitek_ts_data *ts,
+ u16 cmd, u8 *inbuf, u8 *outbuf)
+{
+ int error;
+ u8 buf[64];
+
+ buf[0] = cmd;
+ error = ilitek_i2c_write_and_read(ts, buf, 1, 5, outbuf, 8);
+ if (error)
+ return error;
+
+ memcpy(ts->firmware_ver, outbuf, 8);
+
+ return 0;
+}
+
+static int api_protocol_get_scrn_res(struct ilitek_ts_data *ts,
+ u16 cmd, u8 *inbuf, u8 *outbuf)
+{
+ int error;
+ u8 buf[64];
+
+ buf[0] = cmd;
+ error = ilitek_i2c_write_and_read(ts, buf, 1, 5, outbuf, 8);
+ if (error)
+ return error;
+
+ ts->screen_min_x = get_unaligned_le16(outbuf);
+ ts->screen_min_y = get_unaligned_le16(outbuf + 2);
+ ts->screen_max_x = get_unaligned_le16(outbuf + 4);
+ ts->screen_max_y = get_unaligned_le16(outbuf + 6);
+
+ return 0;
+}
+
+static int api_protocol_get_tp_res(struct ilitek_ts_data *ts,
+ u16 cmd, u8 *inbuf, u8 *outbuf)
+{
+ int error;
+ u8 buf[64];
+
+ buf[0] = cmd;
+ error = ilitek_i2c_write_and_read(ts, buf, 1, 5, outbuf, 15);
+ if (error)
+ return error;
+
+ ts->max_tp = outbuf[8];
+ if (ts->max_tp > ILITEK_SUPPORT_MAX_POINT) {
+ dev_err(&ts->client->dev, "Invalid MAX_TP:%d from FW\n",
+ ts->max_tp);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int api_protocol_get_ic_mode(struct ilitek_ts_data *ts,
+ u16 cmd, u8 *inbuf, u8 *outbuf)
+{
+ int error;
+ u8 buf[64];
+
+ buf[0] = cmd;
+ error = ilitek_i2c_write_and_read(ts, buf, 1, 5, outbuf, 2);
+ if (error)
+ return error;
+
+ ts->ic_mode = outbuf[0];
+ return 0;
+}
+
+static int api_protocol_set_ic_sleep(struct ilitek_ts_data *ts,
+ u16 cmd, u8 *inbuf, u8 *outbuf)
+{
+ u8 buf[64];
+
+ buf[0] = cmd;
+ return ilitek_i2c_write_and_read(ts, buf, 1, 0, NULL, 0);
+}
+
+static int api_protocol_set_ic_wake(struct ilitek_ts_data *ts,
+ u16 cmd, u8 *inbuf, u8 *outbuf)
+{
+ u8 buf[64];
+
+ buf[0] = cmd;
+ return ilitek_i2c_write_and_read(ts, buf, 1, 0, NULL, 0);
+}
+
+static const struct ilitek_protocol_map ptl_func_map[] = {
+ /* common cmds */
+ [GET_PTL_VER] = {
+ ILITEK_TP_CMD_GET_PRL_VER, "GET_PTL_VER",
+ api_protocol_get_ptl_ver
+ },
+ [GET_FW_VER] = {
+ ILITEK_TP_CMD_GET_FW_VER, "GET_FW_VER",
+ api_protocol_get_fw_ver
+ },
+ [GET_SCRN_RES] = {
+ ILITEK_TP_CMD_GET_SCRN_RES, "GET_SCRN_RES",
+ api_protocol_get_scrn_res
+ },
+ [GET_TP_RES] = {
+ ILITEK_TP_CMD_GET_TP_RES, "GET_TP_RES",
+ api_protocol_get_tp_res
+ },
+ [GET_IC_MODE] = {
+ ILITEK_TP_CMD_GET_IC_MODE, "GET_IC_MODE",
+ api_protocol_get_ic_mode
+ },
+ [GET_MCU_VER] = {
+ ILITEK_TP_CMD_GET_MCU_VER, "GET_MOD_VER",
+ api_protocol_get_mcu_ver
+ },
+ [SET_IC_SLEEP] = {
+ ILITEK_TP_CMD_SET_IC_SLEEP, "SET_IC_SLEEP",
+ api_protocol_set_ic_sleep
+ },
+ [SET_IC_WAKE] = {
+ ILITEK_TP_CMD_SET_IC_WAKE, "SET_IC_WAKE",
+ api_protocol_set_ic_wake
+ },
+};
+
+/* Probe APIs */
+static void ilitek_reset(struct ilitek_ts_data *ts, int delay)
+{
+ if (ts->reset_gpio) {
+ gpiod_set_value(ts->reset_gpio, 1);
+ mdelay(10);
+ gpiod_set_value(ts->reset_gpio, 0);
+ mdelay(delay);
+ }
+}
+
+static int ilitek_protocol_init(struct ilitek_ts_data *ts)
+{
+ int error;
+ u8 outbuf[64];
+
+ ts->ptl_cb_func = ptl_func_map;
+ ts->reset_time = 600;
+
+ error = api_protocol_set_cmd(ts, GET_PTL_VER, NULL, outbuf);
+ if (error)
+ return error;
+
+ /* Protocol v3 is not support currently */
+ if (ts->ptl.ver_major == 0x3 ||
+ ts->ptl.ver == BL_V1_6 ||
+ ts->ptl.ver == BL_V1_7)
+ return -EINVAL;
+
+ return 0;
+}
+
+static int ilitek_read_tp_info(struct ilitek_ts_data *ts, bool boot)
+{
+ u8 outbuf[256];
+ int error;
+
+ error = api_protocol_set_cmd(ts, GET_PTL_VER, NULL, outbuf);
+ if (error)
+ return error;
+
+ error = api_protocol_set_cmd(ts, GET_MCU_VER, NULL, outbuf);
+ if (error)
+ return error;
+
+ error = api_protocol_set_cmd(ts, GET_FW_VER, NULL, outbuf);
+ if (error)
+ return error;
+
+ if (boot) {
+ error = api_protocol_set_cmd(ts, GET_SCRN_RES, NULL,
+ outbuf);
+ if (error)
+ return error;
+ }
+
+ error = api_protocol_set_cmd(ts, GET_TP_RES, NULL, outbuf);
+ if (error)
+ return error;
+
+ error = api_protocol_set_cmd(ts, GET_IC_MODE, NULL, outbuf);
+ if (error)
+ return error;
+
+ return 0;
+}
+
+static int ilitek_input_dev_init(struct device *dev, struct ilitek_ts_data *ts)
+{
+ int error;
+ struct input_dev *input;
+
+ input = devm_input_allocate_device(dev);
+ if (!input)
+ return -ENOMEM;
+
+ ts->input_dev = input;
+ input->name = ILITEK_TS_NAME;
+ input->id.bustype = BUS_I2C;
+
+ __set_bit(INPUT_PROP_DIRECT, input->propbit);
+
+ input_set_abs_params(input, ABS_MT_POSITION_X,
+ ts->screen_min_x, ts->screen_max_x, 0, 0);
+ input_set_abs_params(input, ABS_MT_POSITION_Y,
+ ts->screen_min_y, ts->screen_max_y, 0, 0);
+
+ touchscreen_parse_properties(input, true, &ts->prop);
+
+ error = input_mt_init_slots(input, ts->max_tp,
+ INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED);
+ if (error) {
+ dev_err(dev, "initialize MT slots failed, err:%d\n", error);
+ return error;
+ }
+
+ error = input_register_device(input);
+ if (error) {
+ dev_err(dev, "register input device failed, err:%d\n", error);
+ return error;
+ }
+
+ return 0;
+}
+
+static irqreturn_t ilitek_i2c_isr(int irq, void *dev_id)
+{
+ struct ilitek_ts_data *ts = dev_id;
+ int error;
+
+ error = ilitek_process_and_report_v6(ts);
+ if (error < 0) {
+ dev_err(&ts->client->dev, "[%s] err:%d\n", __func__, error);
+ return IRQ_NONE;
+ }
+
+ return IRQ_HANDLED;
+}
+
+static ssize_t firmware_version_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct ilitek_ts_data *ts = i2c_get_clientdata(client);
+
+ return scnprintf(buf, PAGE_SIZE,
+ "fw version: [%02X%02X.%02X%02X.%02X%02X.%02X%02X]\n",
+ ts->firmware_ver[0], ts->firmware_ver[1],
+ ts->firmware_ver[2], ts->firmware_ver[3],
+ ts->firmware_ver[4], ts->firmware_ver[5],
+ ts->firmware_ver[6], ts->firmware_ver[7]);
+}
+static DEVICE_ATTR_RO(firmware_version);
+
+static ssize_t product_id_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct ilitek_ts_data *ts = i2c_get_clientdata(client);
+
+ return scnprintf(buf, PAGE_SIZE, "product id: [%04X], module: [%s]\n",
+ ts->mcu_ver, ts->product_id);
+}
+static DEVICE_ATTR_RO(product_id);
+
+static struct attribute *ilitek_sysfs_attrs[] = {
+ &dev_attr_firmware_version.attr,
+ &dev_attr_product_id.attr,
+ NULL
+};
+
+static struct attribute_group ilitek_attrs_group = {
+ .attrs = ilitek_sysfs_attrs,
+};
+
+static int ilitek_ts_i2c_probe(struct i2c_client *client,
+ const struct i2c_device_id *id)
+{
+ struct ilitek_ts_data *ts;
+ struct device *dev = &client->dev;
+ int error;
+
+ if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+ dev_err(dev, "i2c check functionality failed\n");
+ return -ENXIO;
+ }
+
+ ts = devm_kzalloc(dev, sizeof(*ts), GFP_KERNEL);
+ if (!ts)
+ return -ENOMEM;
+
+ ts->client = client;
+ i2c_set_clientdata(client, ts);
+
+ ts->reset_gpio = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_LOW);
+ if (IS_ERR(ts->reset_gpio)) {
+ error = PTR_ERR(ts->reset_gpio);
+ dev_err(dev, "request gpiod failed: %d", error);
+ return error;
+ }
+
+ ilitek_reset(ts, 1000);
+
+ error = ilitek_protocol_init(ts);
+ if (error) {
+ dev_err(dev, "protocol init failed: %d", error);
+ return error;
+ }
+
+ error = ilitek_read_tp_info(ts, true);
+ if (error) {
+ dev_err(dev, "read tp info failed: %d", error);
+ return error;
+ }
+
+ error = ilitek_input_dev_init(dev, ts);
+ if (error) {
+ dev_err(dev, "input dev init failed: %d", error);
+ return error;
+ }
+
+ error = devm_request_threaded_irq(dev, ts->client->irq,
+ NULL, ilitek_i2c_isr, IRQF_ONESHOT,
+ "ilitek_touch_irq", ts);
+ if (error) {
+ dev_err(dev, "request threaded irq failed: %d\n", error);
+ return error;
+ }
+
+ error = devm_device_add_group(dev, &ilitek_attrs_group);
+ if (error) {
+ dev_err(dev, "sysfs create group failed: %d\n", error);
+ return error;
+ }
+
+ return 0;
+}
+
+static int __maybe_unused ilitek_suspend(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct ilitek_ts_data *ts = i2c_get_clientdata(client);
+ int error;
+
+ disable_irq(client->irq);
+
+ if (!device_may_wakeup(dev)) {
+ error = api_protocol_set_cmd(ts, SET_IC_SLEEP, NULL, NULL);
+ if (error)
+ return error;
+ }
+
+ return 0;
+}
+
+static int __maybe_unused ilitek_resume(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct ilitek_ts_data *ts = i2c_get_clientdata(client);
+ int error;
+
+ if (!device_may_wakeup(dev)) {
+ error = api_protocol_set_cmd(ts, SET_IC_WAKE, NULL, NULL);
+ if (error)
+ return error;
+
+ ilitek_reset(ts, ts->reset_time);
+ }
+
+ enable_irq(client->irq);
+
+ return 0;
+}
+
+static SIMPLE_DEV_PM_OPS(ilitek_pm_ops, ilitek_suspend, ilitek_resume);
+
+static const struct i2c_device_id ilitek_ts_i2c_id[] = {
+ { ILITEK_TS_NAME, 0 },
+ { },
+};
+MODULE_DEVICE_TABLE(i2c, ilitek_ts_i2c_id);
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id ilitekts_acpi_id[] = {
+ { "ILTK0001", 0 },
+ { },
+};
+MODULE_DEVICE_TABLE(acpi, ilitekts_acpi_id);
+#endif
+
+#ifdef CONFIG_OF
+static const struct of_device_id ilitek_ts_i2c_match[] = {
+ {.compatible = "ilitek,ili2130",},
+ {.compatible = "ilitek,ili2131",},
+ {.compatible = "ilitek,ili2132",},
+ {.compatible = "ilitek,ili2316",},
+ {.compatible = "ilitek,ili2322",},
+ {.compatible = "ilitek,ili2323",},
+ {.compatible = "ilitek,ili2326",},
+ {.compatible = "ilitek,ili2520",},
+ {.compatible = "ilitek,ili2521",},
+ { },
+};
+MODULE_DEVICE_TABLE(of, ilitek_ts_i2c_match);
+#endif
+
+static struct i2c_driver ilitek_ts_i2c_driver = {
+ .driver = {
+ .name = ILITEK_TS_NAME,
+ .pm = &ilitek_pm_ops,
+ .of_match_table = of_match_ptr(ilitek_ts_i2c_match),
+ .acpi_match_table = ACPI_PTR(ilitekts_acpi_id),
+ },
+ .probe = ilitek_ts_i2c_probe,
+ .id_table = ilitek_ts_i2c_id,
+};
+module_i2c_driver(ilitek_ts_i2c_driver);
+
+MODULE_AUTHOR("ILITEK");
+MODULE_DESCRIPTION("ILITEK I2C Touchscreen Driver");
+MODULE_LICENSE("GPL");
* made available by the vendor. Firmware files may be pushed to the device's
* nonvolatile memory by writing the filename to the 'fw_file' sysfs control.
*
- * Link to PC-based configuration tool and data sheet: http://www.azoteq.com/
+ * Link to PC-based configuration tool and datasheet: https://www.azoteq.com/
*/
#include <linux/bits.h>
#define IQS5XX_NUM_RETRIES 10
#define IQS5XX_NUM_CONTACTS 5
#define IQS5XX_WR_BYTES_MAX 2
-#define IQS5XX_XY_RES_MAX 0xFFFE
#define IQS5XX_PROD_NUM_IQS550 40
#define IQS5XX_PROD_NUM_IQS572 58
#define IQS5XX_PROD_NUM_IQS525 52
-#define IQS5XX_PROJ_NUM_A000 0
-#define IQS5XX_PROJ_NUM_B000 15
-#define IQS5XX_MAJOR_VER_MIN 2
#define IQS5XX_SHOW_RESET BIT(7)
#define IQS5XX_ACK_RESET BIT(7)
#define IQS5XX_SYS_CFG1 0x058F
#define IQS5XX_X_RES 0x066E
#define IQS5XX_Y_RES 0x0670
+#define IQS5XX_EXP_FILE 0x0677
#define IQS5XX_CHKSM 0x83C0
#define IQS5XX_APP 0x8400
#define IQS5XX_CSTM 0xBE00
#define IQS5XX_BL_CMD_CRC 0x03
#define IQS5XX_BL_BLK_LEN_MAX 64
#define IQS5XX_BL_ID 0x0200
-#define IQS5XX_BL_STATUS_RESET 0x00
-#define IQS5XX_BL_STATUS_AVAIL 0xA5
#define IQS5XX_BL_STATUS_NONE 0xEE
#define IQS5XX_BL_CRC_PASS 0x00
#define IQS5XX_BL_CRC_FAIL 0x01
#define IQS5XX_BL_ATTEMPTS 3
-struct iqs5xx_private {
- struct i2c_client *client;
- struct input_dev *input;
- struct gpio_desc *reset_gpio;
- struct touchscreen_properties prop;
- struct mutex lock;
- u8 bl_status;
-};
-
struct iqs5xx_dev_id_info {
__be16 prod_num;
__be16 proj_num;
struct iqs5xx_touch_data touch_data[IQS5XX_NUM_CONTACTS];
} __packed;
+struct iqs5xx_private {
+ struct i2c_client *client;
+ struct input_dev *input;
+ struct gpio_desc *reset_gpio;
+ struct touchscreen_properties prop;
+ struct mutex lock;
+ struct iqs5xx_dev_id_info dev_id_info;
+ u8 exp_file[2];
+};
+
static int iqs5xx_read_burst(struct i2c_client *client,
u16 reg, void *val, u16 len)
{
struct iqs5xx_private *iqs5xx = i2c_get_clientdata(client);
int error1, error2;
- if (iqs5xx->bl_status == IQS5XX_BL_STATUS_RESET)
+ if (!iqs5xx->dev_id_info.bl_status)
return 0;
mutex_lock(&iqs5xx->lock);
input->open = iqs5xx_open;
input->close = iqs5xx_close;
- input_set_capability(input, EV_ABS, ABS_MT_POSITION_X);
- input_set_capability(input, EV_ABS, ABS_MT_POSITION_Y);
- input_set_capability(input, EV_ABS, ABS_MT_PRESSURE);
-
input_set_drvdata(input, iqs5xx);
iqs5xx->input = input;
}
if (error)
return error;
- input_abs_set_max(iqs5xx->input, ABS_MT_POSITION_X, max_x);
- input_abs_set_max(iqs5xx->input, ABS_MT_POSITION_Y, max_y);
+ input_set_abs_params(iqs5xx->input, ABS_MT_POSITION_X, 0, max_x, 0, 0);
+ input_set_abs_params(iqs5xx->input, ABS_MT_POSITION_Y, 0, max_y, 0, 0);
+ input_set_abs_params(iqs5xx->input, ABS_MT_PRESSURE, 0, U16_MAX, 0, 0);
touchscreen_parse_properties(iqs5xx->input, true, prop);
- if (prop->max_x > IQS5XX_XY_RES_MAX) {
- dev_err(&client->dev, "Invalid maximum x-coordinate: %u > %u\n",
- prop->max_x, IQS5XX_XY_RES_MAX);
+ /*
+ * The device reserves 0xFFFF for coordinates that correspond to slots
+ * which are not in a state of touch.
+ */
+ if (prop->max_x >= U16_MAX || prop->max_y >= U16_MAX) {
+ dev_err(&client->dev, "Invalid touchscreen size: %u*%u\n",
+ prop->max_x, prop->max_y);
return -EINVAL;
- } else if (prop->max_x != max_x) {
+ }
+
+ if (prop->max_x != max_x) {
error = iqs5xx_write_word(client, IQS5XX_X_RES, prop->max_x);
if (error)
return error;
}
- if (prop->max_y > IQS5XX_XY_RES_MAX) {
- dev_err(&client->dev, "Invalid maximum y-coordinate: %u > %u\n",
- prop->max_y, IQS5XX_XY_RES_MAX);
- return -EINVAL;
- } else if (prop->max_y != max_y) {
+ if (prop->max_y != max_y) {
error = iqs5xx_write_word(client, IQS5XX_Y_RES, prop->max_y);
if (error)
return error;
* the missing zero is prepended).
*/
buf[0] = 0;
- dev_id_info = (struct iqs5xx_dev_id_info *)&buf[(buf[1] > 0) ? 0 : 1];
+ dev_id_info = (struct iqs5xx_dev_id_info *)&buf[buf[1] ? 0 : 1];
switch (be16_to_cpu(dev_id_info->prod_num)) {
case IQS5XX_PROD_NUM_IQS550:
return -EINVAL;
}
- switch (be16_to_cpu(dev_id_info->proj_num)) {
- case IQS5XX_PROJ_NUM_A000:
- dev_err(&client->dev, "Unsupported project number: %u\n",
- be16_to_cpu(dev_id_info->proj_num));
- return iqs5xx_bl_open(client);
- case IQS5XX_PROJ_NUM_B000:
- break;
- default:
- dev_err(&client->dev, "Unrecognized project number: %u\n",
- be16_to_cpu(dev_id_info->proj_num));
- return -EINVAL;
- }
-
- if (dev_id_info->major_ver < IQS5XX_MAJOR_VER_MIN) {
- dev_err(&client->dev, "Unsupported major version: %u\n",
- dev_id_info->major_ver);
+ /*
+ * With the product number recognized yet shifted by one byte, open the
+ * bootloader and wait for user space to convert the A000 device into a
+ * B000 device via new firmware.
+ */
+ if (buf[1]) {
+ dev_err(&client->dev, "Opening bootloader for A000 device\n");
return iqs5xx_bl_open(client);
}
- switch (dev_id_info->bl_status) {
- case IQS5XX_BL_STATUS_AVAIL:
- case IQS5XX_BL_STATUS_NONE:
- break;
- default:
- dev_err(&client->dev,
- "Unrecognized bootloader status: 0x%02X\n",
- dev_id_info->bl_status);
- return -EINVAL;
- }
+ error = iqs5xx_read_burst(client, IQS5XX_EXP_FILE,
+ iqs5xx->exp_file, sizeof(iqs5xx->exp_file));
+ if (error)
+ return error;
error = iqs5xx_axis_init(client);
if (error)
if (error)
return error;
- iqs5xx->bl_status = dev_id_info->bl_status;
+ iqs5xx->dev_id_info = *dev_id_info;
/*
* The following delay allows ATI to complete before the open and close
* RDY output during bootloader mode. If the device operates outside of
* bootloader mode, the input device is guaranteed to be allocated.
*/
- if (iqs5xx->bl_status == IQS5XX_BL_STATUS_RESET)
+ if (!iqs5xx->dev_id_info.bl_status)
return IRQ_NONE;
error = iqs5xx_read_burst(client, IQS5XX_SYS_INFO0,
static int iqs5xx_fw_file_write(struct i2c_client *client, const char *fw_file)
{
struct iqs5xx_private *iqs5xx = i2c_get_clientdata(client);
- int error, error_bl = 0;
+ int error, error_init = 0;
u8 *pmap;
- if (iqs5xx->bl_status == IQS5XX_BL_STATUS_NONE)
- return -EPERM;
-
pmap = kzalloc(IQS5XX_PMAP_LEN, GFP_KERNEL);
if (!pmap)
return -ENOMEM;
*/
disable_irq(client->irq);
- iqs5xx->bl_status = IQS5XX_BL_STATUS_RESET;
+ iqs5xx->dev_id_info.bl_status = 0;
error = iqs5xx_bl_cmd(client, IQS5XX_BL_CMD_VER, 0);
if (error) {
error = iqs5xx_bl_verify(client, IQS5XX_CSTM,
pmap + IQS5XX_CHKSM_LEN + IQS5XX_APP_LEN,
IQS5XX_CSTM_LEN);
- if (error)
- goto err_reset;
-
- error = iqs5xx_bl_cmd(client, IQS5XX_BL_CMD_EXEC, 0);
err_reset:
- if (error) {
- iqs5xx_reset(client);
- usleep_range(10000, 10100);
- }
+ iqs5xx_reset(client);
+ usleep_range(15000, 15100);
- error_bl = error;
- error = iqs5xx_dev_init(client);
- if (!error && iqs5xx->bl_status == IQS5XX_BL_STATUS_RESET)
- error = -EINVAL;
+ error_init = iqs5xx_dev_init(client);
+ if (!iqs5xx->dev_id_info.bl_status)
+ error_init = error_init ? : -EINVAL;
enable_irq(client->irq);
err_kfree:
kfree(pmap);
- if (error_bl)
- return error_bl;
-
- return error;
+ return error ? : error_init;
}
static ssize_t fw_file_store(struct device *dev,
return count;
}
+static ssize_t fw_info_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct iqs5xx_private *iqs5xx = dev_get_drvdata(dev);
+
+ if (!iqs5xx->dev_id_info.bl_status)
+ return -ENODATA;
+
+ return scnprintf(buf, PAGE_SIZE, "%u.%u.%u.%u:%u.%u\n",
+ be16_to_cpu(iqs5xx->dev_id_info.prod_num),
+ be16_to_cpu(iqs5xx->dev_id_info.proj_num),
+ iqs5xx->dev_id_info.major_ver,
+ iqs5xx->dev_id_info.minor_ver,
+ iqs5xx->exp_file[0], iqs5xx->exp_file[1]);
+}
+
static DEVICE_ATTR_WO(fw_file);
+static DEVICE_ATTR_RO(fw_info);
static struct attribute *iqs5xx_attrs[] = {
&dev_attr_fw_file.attr,
+ &dev_attr_fw_info.attr,
NULL,
};
+static umode_t iqs5xx_attr_is_visible(struct kobject *kobj,
+ struct attribute *attr, int i)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct iqs5xx_private *iqs5xx = dev_get_drvdata(dev);
+
+ if (attr == &dev_attr_fw_file.attr &&
+ (iqs5xx->dev_id_info.bl_status == IQS5XX_BL_STATUS_NONE ||
+ !iqs5xx->reset_gpio))
+ return 0;
+
+ return attr->mode;
+}
+
static const struct attribute_group iqs5xx_attr_group = {
+ .is_visible = iqs5xx_attr_is_visible,
.attrs = iqs5xx_attrs,
};
i2c_set_clientdata(client, iqs5xx);
iqs5xx->client = client;
- iqs5xx->reset_gpio = devm_gpiod_get(&client->dev,
- "reset", GPIOD_OUT_LOW);
+ iqs5xx->reset_gpio = devm_gpiod_get_optional(&client->dev,
+ "reset", GPIOD_OUT_LOW);
if (IS_ERR(iqs5xx->reset_gpio)) {
error = PTR_ERR(iqs5xx->reset_gpio);
dev_err(&client->dev, "Failed to request GPIO: %d\n", error);
mutex_init(&iqs5xx->lock);
- iqs5xx_reset(client);
- usleep_range(10000, 10100);
-
error = iqs5xx_dev_init(client);
if (error)
return error;
#define LPC32XX_TSC_AUX_MIN 0x38
#define LPC32XX_TSC_AUX_MAX 0x3C
-#define LPC32XX_TSC_STAT_FIFO_OVRRN (1 << 8)
-#define LPC32XX_TSC_STAT_FIFO_EMPTY (1 << 7)
+#define LPC32XX_TSC_STAT_FIFO_OVRRN BIT(8)
+#define LPC32XX_TSC_STAT_FIFO_EMPTY BIT(7)
#define LPC32XX_TSC_SEL_DEFVAL 0x0284
#define LPC32XX_TSC_ADCCON_IRQ_TO_FIFO_4 (0x1 << 11)
#define LPC32XX_TSC_ADCCON_X_SAMPLE_SIZE(s) ((10 - (s)) << 7)
#define LPC32XX_TSC_ADCCON_Y_SAMPLE_SIZE(s) ((10 - (s)) << 4)
-#define LPC32XX_TSC_ADCCON_POWER_UP (1 << 2)
-#define LPC32XX_TSC_ADCCON_AUTO_EN (1 << 0)
+#define LPC32XX_TSC_ADCCON_POWER_UP BIT(2)
+#define LPC32XX_TSC_ADCCON_AUTO_EN BIT(0)
-#define LPC32XX_TSC_FIFO_TS_P_LEVEL (1 << 31)
+#define LPC32XX_TSC_FIFO_TS_P_LEVEL BIT(31)
#define LPC32XX_TSC_FIFO_NORMALIZE_X_VAL(x) (((x) & 0x03FF0000) >> 16)
#define LPC32XX_TSC_FIFO_NORMALIZE_Y_VAL(y) ((y) & 0x000003FF)
error = devm_request_threaded_irq(&client->dev, client->irq,
NULL, mip4_interrupt,
- IRQF_ONESHOT, MIP4_DEVICE_NAME, ts);
+ IRQF_ONESHOT | IRQF_NO_AUTOEN,
+ MIP4_DEVICE_NAME, ts);
if (error) {
dev_err(&client->dev,
"Failed to request interrupt %d: %d\n",
return error;
}
- disable_irq(client->irq);
-
error = input_register_device(input);
if (error) {
dev_err(&client->dev,
// SPDX-License-Identifier: GPL-2.0
-// Melfas MMS114/MMS152 touchscreen device driver
+// Melfas MMS114/MMS136/MMS152 touchscreen device driver
//
// Copyright (c) 2012 Samsung Electronics Co., Ltd.
// Author: Joonyoung Shim <jy0922.shim@samsung.com>
#define MMS114_MAX_AREA 0xff
#define MMS114_MAX_TOUCH 10
-#define MMS114_PACKET_NUM 8
+#define MMS114_EVENT_SIZE 8
+#define MMS136_EVENT_SIZE 6
/* Touch type */
#define MMS114_TYPE_NONE 0
enum mms_type {
TYPE_MMS114 = 114,
+ TYPE_MMS136 = 136,
TYPE_MMS152 = 152,
TYPE_MMS345L = 345,
};
if (packet_size <= 0)
goto out;
- touch_size = packet_size / MMS114_PACKET_NUM;
+ /* MMS136 has slightly different event size */
+ if (data->type == TYPE_MMS136)
+ touch_size = packet_size / MMS136_EVENT_SIZE;
+ else
+ touch_size = packet_size / MMS114_EVENT_SIZE;
error = __mms114_read_reg(data, MMS114_INFORMATION, packet_size,
(u8 *)touch);
break;
case TYPE_MMS114:
+ case TYPE_MMS136:
error = __mms114_read_reg(data, MMS114_TSP_REV, 6, buf);
if (error)
return error;
if (error < 0)
return error;
- /* Only MMS114 has configuration and power on registers */
- if (data->type != TYPE_MMS114)
+ /* Only MMS114 and MMS136 have configuration and power on registers */
+ if (data->type != TYPE_MMS114 && data->type != TYPE_MMS136)
return 0;
error = mms114_set_active(data, true);
0, data->props.max_y, 0, 0);
}
- if (data->type == TYPE_MMS114) {
+ if (data->type == TYPE_MMS114 || data->type == TYPE_MMS136) {
/*
* The firmware handles movement and pressure fuzz, so
* don't duplicate that in software.
}
error = devm_request_threaded_irq(&client->dev, client->irq,
- NULL, mms114_interrupt, IRQF_ONESHOT,
+ NULL, mms114_interrupt,
+ IRQF_ONESHOT | IRQF_NO_AUTOEN,
dev_name(&client->dev), data);
if (error) {
dev_err(&client->dev, "Failed to register interrupt\n");
return error;
}
- disable_irq(client->irq);
error = input_register_device(data->input_dev);
if (error) {
{
.compatible = "melfas,mms114",
.data = (void *)TYPE_MMS114,
+ }, {
+ .compatible = "melfas,mms136",
+ .data = (void *)TYPE_MMS136,
}, {
.compatible = "melfas,mms152",
.data = (void *)TYPE_MMS152,
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Driver for MStar msg2638 touchscreens
+ *
+ * Copyright (c) 2021 Vincent Knecht <vincent.knecht@mailoo.org>
+ *
+ * Checksum and IRQ handler based on mstar_drv_common.c and
+ * mstar_drv_mutual_fw_control.c
+ * Copyright (c) 2006-2012 MStar Semiconductor, Inc.
+ *
+ * Driver structure based on zinitix.c by Michael Srba <Michael.Srba@seznam.cz>
+ */
+
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/i2c.h>
+#include <linux/input.h>
+#include <linux/input/mt.h>
+#include <linux/input/touchscreen.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/regulator/consumer.h>
+#include <linux/slab.h>
+
+#define MODE_DATA_RAW 0x5A
+
+#define MAX_SUPPORTED_FINGER_NUM 5
+
+#define CHIP_ON_DELAY_MS 15
+#define FIRMWARE_ON_DELAY_MS 50
+#define RESET_DELAY_MIN_US 10000
+#define RESET_DELAY_MAX_US 11000
+
+struct packet {
+ u8 xy_hi; /* higher bits of x and y coordinates */
+ u8 x_low;
+ u8 y_low;
+ u8 pressure;
+};
+
+struct touch_event {
+ u8 mode;
+ struct packet pkt[MAX_SUPPORTED_FINGER_NUM];
+ u8 proximity;
+ u8 checksum;
+};
+
+struct msg2638_ts_data {
+ struct i2c_client *client;
+ struct input_dev *input_dev;
+ struct touchscreen_properties prop;
+ struct regulator_bulk_data supplies[2];
+ struct gpio_desc *reset_gpiod;
+};
+
+static u8 msg2638_checksum(u8 *data, u32 length)
+{
+ s32 sum = 0;
+ u32 i;
+
+ for (i = 0; i < length; i++)
+ sum += data[i];
+
+ return (u8)((-sum) & 0xFF);
+}
+
+static irqreturn_t msg2638_ts_irq_handler(int irq, void *msg2638_handler)
+{
+ struct msg2638_ts_data *msg2638 = msg2638_handler;
+ struct i2c_client *client = msg2638->client;
+ struct input_dev *input = msg2638->input_dev;
+ struct touch_event touch_event;
+ u32 len = sizeof(touch_event);
+ struct i2c_msg msg[] = {
+ {
+ .addr = client->addr,
+ .flags = I2C_M_RD,
+ .len = sizeof(touch_event),
+ .buf = (u8 *)&touch_event,
+ },
+ };
+ struct packet *p;
+ u16 x, y;
+ int ret;
+ int i;
+
+ ret = i2c_transfer(client->adapter, msg, ARRAY_SIZE(msg));
+ if (ret != ARRAY_SIZE(msg)) {
+ dev_err(&client->dev,
+ "Failed I2C transfer in irq handler: %d\n",
+ ret < 0 ? ret : -EIO);
+ goto out;
+ }
+
+ if (touch_event.mode != MODE_DATA_RAW)
+ goto out;
+
+ if (msg2638_checksum((u8 *)&touch_event, len - 1) !=
+ touch_event.checksum) {
+ dev_err(&client->dev, "Failed checksum!\n");
+ goto out;
+ }
+
+ for (i = 0; i < MAX_SUPPORTED_FINGER_NUM; i++) {
+ p = &touch_event.pkt[i];
+
+ /* Ignore non-pressed finger data */
+ if (p->xy_hi == 0xFF && p->x_low == 0xFF && p->y_low == 0xFF)
+ continue;
+
+ x = (((p->xy_hi & 0xF0) << 4) | p->x_low);
+ y = (((p->xy_hi & 0x0F) << 8) | p->y_low);
+
+ input_mt_slot(input, i);
+ input_mt_report_slot_state(input, MT_TOOL_FINGER, true);
+ touchscreen_report_pos(input, &msg2638->prop, x, y, true);
+ }
+
+ input_mt_sync_frame(msg2638->input_dev);
+ input_sync(msg2638->input_dev);
+
+out:
+ return IRQ_HANDLED;
+}
+
+static void msg2638_reset(struct msg2638_ts_data *msg2638)
+{
+ gpiod_set_value_cansleep(msg2638->reset_gpiod, 1);
+ usleep_range(RESET_DELAY_MIN_US, RESET_DELAY_MAX_US);
+ gpiod_set_value_cansleep(msg2638->reset_gpiod, 0);
+ msleep(FIRMWARE_ON_DELAY_MS);
+}
+
+static int msg2638_start(struct msg2638_ts_data *msg2638)
+{
+ int error;
+
+ error = regulator_bulk_enable(ARRAY_SIZE(msg2638->supplies),
+ msg2638->supplies);
+ if (error) {
+ dev_err(&msg2638->client->dev,
+ "Failed to enable regulators: %d\n", error);
+ return error;
+ }
+
+ msleep(CHIP_ON_DELAY_MS);
+
+ msg2638_reset(msg2638);
+
+ enable_irq(msg2638->client->irq);
+
+ return 0;
+}
+
+static int msg2638_stop(struct msg2638_ts_data *msg2638)
+{
+ int error;
+
+ disable_irq(msg2638->client->irq);
+
+ error = regulator_bulk_disable(ARRAY_SIZE(msg2638->supplies),
+ msg2638->supplies);
+ if (error) {
+ dev_err(&msg2638->client->dev,
+ "Failed to disable regulators: %d\n", error);
+ return error;
+ }
+
+ return 0;
+}
+
+static int msg2638_input_open(struct input_dev *dev)
+{
+ struct msg2638_ts_data *msg2638 = input_get_drvdata(dev);
+
+ return msg2638_start(msg2638);
+}
+
+static void msg2638_input_close(struct input_dev *dev)
+{
+ struct msg2638_ts_data *msg2638 = input_get_drvdata(dev);
+
+ msg2638_stop(msg2638);
+}
+
+static int msg2638_init_input_dev(struct msg2638_ts_data *msg2638)
+{
+ struct device *dev = &msg2638->client->dev;
+ struct input_dev *input_dev;
+ int error;
+
+ input_dev = devm_input_allocate_device(dev);
+ if (!input_dev) {
+ dev_err(dev, "Failed to allocate input device.\n");
+ return -ENOMEM;
+ }
+
+ input_set_drvdata(input_dev, msg2638);
+ msg2638->input_dev = input_dev;
+
+ input_dev->name = "MStar TouchScreen";
+ input_dev->phys = "input/ts";
+ input_dev->id.bustype = BUS_I2C;
+ input_dev->open = msg2638_input_open;
+ input_dev->close = msg2638_input_close;
+
+ input_set_capability(input_dev, EV_ABS, ABS_MT_POSITION_X);
+ input_set_capability(input_dev, EV_ABS, ABS_MT_POSITION_Y);
+
+ touchscreen_parse_properties(input_dev, true, &msg2638->prop);
+ if (!msg2638->prop.max_x || !msg2638->prop.max_y) {
+ dev_err(dev, "touchscreen-size-x and/or touchscreen-size-y not set in properties\n");
+ return -EINVAL;
+ }
+
+ error = input_mt_init_slots(input_dev, MAX_SUPPORTED_FINGER_NUM,
+ INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED);
+ if (error) {
+ dev_err(dev, "Failed to initialize MT slots: %d\n", error);
+ return error;
+ }
+
+ error = input_register_device(input_dev);
+ if (error) {
+ dev_err(dev, "Failed to register input device: %d\n", error);
+ return error;
+ }
+
+ return 0;
+}
+
+static int msg2638_ts_probe(struct i2c_client *client)
+{
+ struct device *dev = &client->dev;
+ struct msg2638_ts_data *msg2638;
+ int error;
+
+ if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
+ dev_err(dev, "Failed to assert adapter's support for plain I2C.\n");
+ return -ENXIO;
+ }
+
+ msg2638 = devm_kzalloc(dev, sizeof(*msg2638), GFP_KERNEL);
+ if (!msg2638)
+ return -ENOMEM;
+
+ msg2638->client = client;
+ i2c_set_clientdata(client, msg2638);
+
+ msg2638->supplies[0].supply = "vdd";
+ msg2638->supplies[1].supply = "vddio";
+ error = devm_regulator_bulk_get(dev, ARRAY_SIZE(msg2638->supplies),
+ msg2638->supplies);
+ if (error) {
+ dev_err(dev, "Failed to get regulators: %d\n", error);
+ return error;
+ }
+
+ msg2638->reset_gpiod = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW);
+ if (IS_ERR(msg2638->reset_gpiod)) {
+ error = PTR_ERR(msg2638->reset_gpiod);
+ dev_err(dev, "Failed to request reset GPIO: %d\n", error);
+ return error;
+ }
+
+ error = msg2638_init_input_dev(msg2638);
+ if (error) {
+ dev_err(dev, "Failed to initialize input device: %d\n", error);
+ return error;
+ }
+
+ error = devm_request_threaded_irq(dev, client->irq,
+ NULL, msg2638_ts_irq_handler,
+ IRQF_ONESHOT | IRQF_NO_AUTOEN,
+ client->name, msg2638);
+ if (error) {
+ dev_err(dev, "Failed to request IRQ: %d\n", error);
+ return error;
+ }
+
+ return 0;
+}
+
+static int __maybe_unused msg2638_suspend(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct msg2638_ts_data *msg2638 = i2c_get_clientdata(client);
+
+ mutex_lock(&msg2638->input_dev->mutex);
+
+ if (input_device_enabled(msg2638->input_dev))
+ msg2638_stop(msg2638);
+
+ mutex_unlock(&msg2638->input_dev->mutex);
+
+ return 0;
+}
+
+static int __maybe_unused msg2638_resume(struct device *dev)
+{
+ struct i2c_client *client = to_i2c_client(dev);
+ struct msg2638_ts_data *msg2638 = i2c_get_clientdata(client);
+ int ret = 0;
+
+ mutex_lock(&msg2638->input_dev->mutex);
+
+ if (input_device_enabled(msg2638->input_dev))
+ ret = msg2638_start(msg2638);
+
+ mutex_unlock(&msg2638->input_dev->mutex);
+
+ return ret;
+}
+
+static SIMPLE_DEV_PM_OPS(msg2638_pm_ops, msg2638_suspend, msg2638_resume);
+
+static const struct of_device_id msg2638_of_match[] = {
+ { .compatible = "mstar,msg2638" },
+ { }
+};
+MODULE_DEVICE_TABLE(of, msg2638_of_match);
+
+static struct i2c_driver msg2638_ts_driver = {
+ .probe_new = msg2638_ts_probe,
+ .driver = {
+ .name = "MStar-TS",
+ .pm = &msg2638_pm_ops,
+ .of_match_table = msg2638_of_match,
+ },
+};
+module_i2c_driver(msg2638_ts_driver);
+
+MODULE_AUTHOR("Vincent Knecht <vincent.knecht@mailoo.org>");
+MODULE_DESCRIPTION("MStar MSG2638 touchscreen driver");
+MODULE_LICENSE("GPL v2");
+++ /dev/null
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Generic DT helper functions for touchscreen devices
- *
- * Copyright (c) 2014 Sebastian Reichel <sre@kernel.org>
- */
-
-#include <linux/property.h>
-#include <linux/input.h>
-#include <linux/input/mt.h>
-#include <linux/input/touchscreen.h>
-#include <linux/module.h>
-
-static bool touchscreen_get_prop_u32(struct device *dev,
- const char *property,
- unsigned int default_value,
- unsigned int *value)
-{
- u32 val;
- int error;
-
- error = device_property_read_u32(dev, property, &val);
- if (error) {
- *value = default_value;
- return false;
- }
-
- *value = val;
- return true;
-}
-
-static void touchscreen_set_params(struct input_dev *dev,
- unsigned long axis,
- int min, int max, int fuzz)
-{
- struct input_absinfo *absinfo;
-
- if (!test_bit(axis, dev->absbit)) {
- dev_warn(&dev->dev,
- "DT specifies parameters but the axis %lu is not set up\n",
- axis);
- return;
- }
-
- absinfo = &dev->absinfo[axis];
- absinfo->minimum = min;
- absinfo->maximum = max;
- absinfo->fuzz = fuzz;
-}
-
-/**
- * touchscreen_parse_properties - parse common touchscreen DT properties
- * @input: input device that should be parsed
- * @multitouch: specifies whether parsed properties should be applied to
- * single-touch or multi-touch axes
- * @prop: pointer to a struct touchscreen_properties into which to store
- * axis swap and invert info for use with touchscreen_report_x_y();
- * or %NULL
- *
- * This function parses common DT properties for touchscreens and setups the
- * input device accordingly. The function keeps previously set up default
- * values if no value is specified via DT.
- */
-void touchscreen_parse_properties(struct input_dev *input, bool multitouch,
- struct touchscreen_properties *prop)
-{
- struct device *dev = input->dev.parent;
- struct input_absinfo *absinfo;
- unsigned int axis, axis_x, axis_y;
- unsigned int minimum, maximum, fuzz;
- bool data_present;
-
- input_alloc_absinfo(input);
- if (!input->absinfo)
- return;
-
- axis_x = multitouch ? ABS_MT_POSITION_X : ABS_X;
- axis_y = multitouch ? ABS_MT_POSITION_Y : ABS_Y;
-
- data_present = touchscreen_get_prop_u32(dev, "touchscreen-min-x",
- input_abs_get_min(input, axis_x),
- &minimum) |
- touchscreen_get_prop_u32(dev, "touchscreen-size-x",
- input_abs_get_max(input,
- axis_x) + 1,
- &maximum) |
- touchscreen_get_prop_u32(dev, "touchscreen-fuzz-x",
- input_abs_get_fuzz(input, axis_x),
- &fuzz);
- if (data_present)
- touchscreen_set_params(input, axis_x, minimum, maximum - 1, fuzz);
-
- data_present = touchscreen_get_prop_u32(dev, "touchscreen-min-y",
- input_abs_get_min(input, axis_y),
- &minimum) |
- touchscreen_get_prop_u32(dev, "touchscreen-size-y",
- input_abs_get_max(input,
- axis_y) + 1,
- &maximum) |
- touchscreen_get_prop_u32(dev, "touchscreen-fuzz-y",
- input_abs_get_fuzz(input, axis_y),
- &fuzz);
- if (data_present)
- touchscreen_set_params(input, axis_y, minimum, maximum - 1, fuzz);
-
- axis = multitouch ? ABS_MT_PRESSURE : ABS_PRESSURE;
- data_present = touchscreen_get_prop_u32(dev,
- "touchscreen-max-pressure",
- input_abs_get_max(input, axis),
- &maximum) |
- touchscreen_get_prop_u32(dev,
- "touchscreen-fuzz-pressure",
- input_abs_get_fuzz(input, axis),
- &fuzz);
- if (data_present)
- touchscreen_set_params(input, axis, 0, maximum, fuzz);
-
- if (!prop)
- return;
-
- prop->max_x = input_abs_get_max(input, axis_x);
- prop->max_y = input_abs_get_max(input, axis_y);
-
- prop->invert_x =
- device_property_read_bool(dev, "touchscreen-inverted-x");
- if (prop->invert_x) {
- absinfo = &input->absinfo[axis_x];
- absinfo->maximum -= absinfo->minimum;
- absinfo->minimum = 0;
- }
-
- prop->invert_y =
- device_property_read_bool(dev, "touchscreen-inverted-y");
- if (prop->invert_y) {
- absinfo = &input->absinfo[axis_y];
- absinfo->maximum -= absinfo->minimum;
- absinfo->minimum = 0;
- }
-
- prop->swap_x_y =
- device_property_read_bool(dev, "touchscreen-swapped-x-y");
- if (prop->swap_x_y)
- swap(input->absinfo[axis_x], input->absinfo[axis_y]);
-}
-EXPORT_SYMBOL(touchscreen_parse_properties);
-
-static void
-touchscreen_apply_prop_to_x_y(const struct touchscreen_properties *prop,
- unsigned int *x, unsigned int *y)
-{
- if (prop->invert_x)
- *x = prop->max_x - *x;
-
- if (prop->invert_y)
- *y = prop->max_y - *y;
-
- if (prop->swap_x_y)
- swap(*x, *y);
-}
-
-/**
- * touchscreen_set_mt_pos - Set input_mt_pos coordinates
- * @pos: input_mt_pos to set coordinates of
- * @prop: pointer to a struct touchscreen_properties
- * @x: X coordinate to store in pos
- * @y: Y coordinate to store in pos
- *
- * Adjust the passed in x and y values applying any axis inversion and
- * swapping requested in the passed in touchscreen_properties and store
- * the result in a struct input_mt_pos.
- */
-void touchscreen_set_mt_pos(struct input_mt_pos *pos,
- const struct touchscreen_properties *prop,
- unsigned int x, unsigned int y)
-{
- touchscreen_apply_prop_to_x_y(prop, &x, &y);
- pos->x = x;
- pos->y = y;
-}
-EXPORT_SYMBOL(touchscreen_set_mt_pos);
-
-/**
- * touchscreen_report_pos - Report touchscreen coordinates
- * @input: input_device to report coordinates for
- * @prop: pointer to a struct touchscreen_properties
- * @x: X coordinate to report
- * @y: Y coordinate to report
- * @multitouch: Report coordinates on single-touch or multi-touch axes
- *
- * Adjust the passed in x and y values applying any axis inversion and
- * swapping requested in the passed in touchscreen_properties and then
- * report the resulting coordinates on the input_dev's x and y axis.
- */
-void touchscreen_report_pos(struct input_dev *input,
- const struct touchscreen_properties *prop,
- unsigned int x, unsigned int y,
- bool multitouch)
-{
- touchscreen_apply_prop_to_x_y(prop, &x, &y);
- input_report_abs(input, multitouch ? ABS_MT_POSITION_X : ABS_X, x);
- input_report_abs(input, multitouch ? ABS_MT_POSITION_Y : ABS_Y, y);
-}
-EXPORT_SYMBOL(touchscreen_report_pos);
-
-MODULE_LICENSE("GPL v2");
-MODULE_DESCRIPTION("Device-tree helpers functions for touchscreen devices");
#include <linux/input/mt.h>
#include <linux/input/touchscreen.h>
#include <linux/pm.h>
+#include <linux/pm_runtime.h>
#include <linux/irq.h>
#include <linux/regulator/consumer.h>
error = i2c_smbus_read_i2c_block_data(client, SILEAD_REG_ID,
sizeof(chip_id), (u8 *)&chip_id);
- if (error < 0) {
- dev_err(&client->dev, "Chip ID read error %d\n", error);
+ if (error < 0)
return error;
- }
data->chip_id = le32_to_cpu(chip_id);
dev_info(&client->dev, "Silead chip ID: 0x%8X", data->chip_id);
int error;
u32 status;
+ /*
+ * Some buggy BIOS-es bring up the chip in a stuck state where it
+ * blocks the I2C bus. The following steps are necessary to
+ * unstuck the chip / bus:
+ * 1. Turn off the Silead chip.
+ * 2. Try to do an I2C transfer with the chip, this will fail in
+ * response to which the I2C-bus-driver will call:
+ * i2c_recover_bus() which will unstuck the I2C-bus. Note the
+ * unstuck-ing of the I2C bus only works if we first drop the
+ * chip off the bus by turning it off.
+ * 3. Turn the chip back on.
+ *
+ * On the x86/ACPI systems were this problem is seen, step 1. and
+ * 3. require making ACPI calls and dealing with ACPI Power
+ * Resources. The workaround below runtime-suspends the chip to
+ * turn it off, leaving it up to the ACPI subsystem to deal with
+ * this.
+ */
+
+ if (device_property_read_bool(&client->dev,
+ "silead,stuck-controller-bug")) {
+ pm_runtime_set_active(&client->dev);
+ pm_runtime_enable(&client->dev);
+ pm_runtime_allow(&client->dev);
+
+ pm_runtime_suspend(&client->dev);
+
+ dev_warn(&client->dev, FW_BUG "Stuck I2C bus: please ignore the next 'controller timed out' error\n");
+ silead_ts_get_id(client);
+
+ /* The forbid will also resume the device */
+ pm_runtime_forbid(&client->dev);
+ pm_runtime_disable(&client->dev);
+ }
+
silead_ts_set_power(client, SILEAD_POWER_OFF);
silead_ts_set_power(client, SILEAD_POWER_ON);
error = silead_ts_get_id(client);
- if (error)
+ if (error) {
+ dev_err(&client->dev, "Chip ID read error %d\n", error);
return error;
+ }
error = silead_ts_init(client);
if (error)
silead_ts_read_props(client);
- /* We must have the IRQ provided by DT or ACPI subsytem */
+ /* We must have the IRQ provided by DT or ACPI subsystem */
if (client->irq <= 0)
return -ENODEV;
* interrupts. To be on the safe side it's better to not enable
* the interrupts during their request.
*/
- irq_set_status_flags(client->irq, IRQ_NOAUTOEN);
err = devm_request_threaded_irq(&client->dev, client->irq,
NULL, stmfts_irq_handler,
- IRQF_ONESHOT,
+ IRQF_ONESHOT | IRQF_NO_AUTOEN,
"stmfts_irq", sdata);
if (err)
return err;
#ifndef _TSC2007_H
#define _TSC2007_H
+struct gpio_desc;
+
#define TSC2007_MEASURE_TEMP0 (0x0 << 4)
#define TSC2007_MEASURE_AUX (0x2 << 4)
#define TSC2007_MEASURE_TEMP1 (0x4 << 4)
int fuzzy;
int fuzzz;
- unsigned int gpio;
+ struct gpio_desc *gpiod;
int irq;
wait_queue_head_t wait;
#include <linux/module.h>
#include <linux/slab.h>
+#include <linux/gpio/consumer.h>
#include <linux/input.h>
#include <linux/interrupt.h>
#include <linux/i2c.h>
-#include <linux/of_device.h>
-#include <linux/of_gpio.h>
+#include <linux/mod_devicetable.h>
+#include <linux/property.h>
#include <linux/platform_data/tsc2007.h>
#include "tsc2007.h"
tsc2007_stop(ts);
}
-#ifdef CONFIG_OF
static int tsc2007_get_pendown_state_gpio(struct device *dev)
{
struct i2c_client *client = to_i2c_client(dev);
struct tsc2007 *ts = i2c_get_clientdata(client);
- return !gpio_get_value(ts->gpio);
+ return gpiod_get_value(ts->gpiod);
}
-static int tsc2007_probe_dt(struct i2c_client *client, struct tsc2007 *ts)
+static int tsc2007_probe_properties(struct device *dev, struct tsc2007 *ts)
{
- struct device_node *np = client->dev.of_node;
u32 val32;
u64 val64;
- if (!np) {
- dev_err(&client->dev, "missing device tree data\n");
- return -EINVAL;
- }
-
- if (!of_property_read_u32(np, "ti,max-rt", &val32))
+ if (!device_property_read_u32(dev, "ti,max-rt", &val32))
ts->max_rt = val32;
else
ts->max_rt = MAX_12BIT;
- if (!of_property_read_u32(np, "ti,fuzzx", &val32))
+ if (!device_property_read_u32(dev, "ti,fuzzx", &val32))
ts->fuzzx = val32;
- if (!of_property_read_u32(np, "ti,fuzzy", &val32))
+ if (!device_property_read_u32(dev, "ti,fuzzy", &val32))
ts->fuzzy = val32;
- if (!of_property_read_u32(np, "ti,fuzzz", &val32))
+ if (!device_property_read_u32(dev, "ti,fuzzz", &val32))
ts->fuzzz = val32;
- if (!of_property_read_u64(np, "ti,poll-period", &val64))
+ if (!device_property_read_u64(dev, "ti,poll-period", &val64))
ts->poll_period = msecs_to_jiffies(val64);
else
ts->poll_period = msecs_to_jiffies(1);
- if (!of_property_read_u32(np, "ti,x-plate-ohms", &val32)) {
+ if (!device_property_read_u32(dev, "ti,x-plate-ohms", &val32)) {
ts->x_plate_ohms = val32;
} else {
- dev_err(&client->dev, "missing ti,x-plate-ohms devicetree property.");
+ dev_err(dev, "Missing ti,x-plate-ohms device property\n");
return -EINVAL;
}
- ts->gpio = of_get_gpio(np, 0);
- if (gpio_is_valid(ts->gpio))
+ ts->gpiod = devm_gpiod_get_optional(dev, NULL, GPIOD_IN);
+ if (IS_ERR(ts->gpiod))
+ return PTR_ERR(ts->gpiod);
+
+ if (ts->gpiod)
ts->get_pendown_state = tsc2007_get_pendown_state_gpio;
else
- dev_warn(&client->dev,
- "GPIO not specified in DT (of_get_gpio returned %d)\n",
- ts->gpio);
+ dev_warn(dev, "Pen down GPIO is not specified in properties\n");
return 0;
}
-#else
-static int tsc2007_probe_dt(struct i2c_client *client, struct tsc2007 *ts)
-{
- dev_err(&client->dev, "platform data is required!\n");
- return -EINVAL;
-}
-#endif
-static int tsc2007_probe_pdev(struct i2c_client *client, struct tsc2007 *ts,
+static int tsc2007_probe_pdev(struct device *dev, struct tsc2007 *ts,
const struct tsc2007_platform_data *pdata,
const struct i2c_device_id *id)
{
ts->fuzzz = pdata->fuzzz;
if (pdata->x_plate_ohms == 0) {
- dev_err(&client->dev, "x_plate_ohms is not set up in platform data");
+ dev_err(dev, "x_plate_ohms is not set up in platform data\n");
return -EINVAL;
}
return -ENOMEM;
if (pdata)
- err = tsc2007_probe_pdev(client, ts, pdata, id);
+ err = tsc2007_probe_pdev(&client->dev, ts, pdata, id);
else
- err = tsc2007_probe_dt(client, ts);
+ err = tsc2007_probe_properties(&client->dev, ts);
if (err)
return err;
MODULE_DEVICE_TABLE(i2c, tsc2007_idtable);
-#ifdef CONFIG_OF
static const struct of_device_id tsc2007_of_match[] = {
{ .compatible = "ti,tsc2007" },
{ /* sentinel */ }
};
MODULE_DEVICE_TABLE(of, tsc2007_of_match);
-#endif
static struct i2c_driver tsc2007_driver = {
.driver = {
.name = "tsc2007",
- .of_match_table = of_match_ptr(tsc2007_of_match),
+ .of_match_table = tsc2007_of_match,
},
.id_table = tsc2007_idtable,
.probe = tsc2007_probe,
}
static int wacom_i2c_probe(struct i2c_client *client,
- const struct i2c_device_id *id)
+ const struct i2c_device_id *id)
{
+ struct device *dev = &client->dev;
struct wacom_i2c *wac_i2c;
struct input_dev *input;
struct wacom_features features = { 0 };
int error;
if (!i2c_check_functionality(client->adapter, I2C_FUNC_I2C)) {
- dev_err(&client->dev, "i2c_check_functionality error\n");
+ dev_err(dev, "i2c_check_functionality error\n");
return -EIO;
}
if (error)
return error;
- wac_i2c = kzalloc(sizeof(*wac_i2c), GFP_KERNEL);
- input = input_allocate_device();
- if (!wac_i2c || !input) {
- error = -ENOMEM;
- goto err_free_mem;
- }
+ wac_i2c = devm_kzalloc(dev, sizeof(*wac_i2c), GFP_KERNEL);
+ if (!wac_i2c)
+ return -ENOMEM;
wac_i2c->client = client;
+
+ input = devm_input_allocate_device(dev);
+ if (!input)
+ return -ENOMEM;
+
wac_i2c->input = input;
input->name = "Wacom I2C Digitizer";
input->id.bustype = BUS_I2C;
input->id.vendor = 0x56a;
input->id.version = features.fw_version;
- input->dev.parent = &client->dev;
input->open = wacom_i2c_open;
input->close = wacom_i2c_close;
input_set_drvdata(input, wac_i2c);
- error = request_threaded_irq(client->irq, NULL, wacom_i2c_irq,
- IRQF_TRIGGER_LOW | IRQF_ONESHOT,
- "wacom_i2c", wac_i2c);
+ error = devm_request_threaded_irq(dev, client->irq, NULL, wacom_i2c_irq,
+ IRQF_ONESHOT, "wacom_i2c", wac_i2c);
if (error) {
- dev_err(&client->dev,
- "Failed to enable IRQ, error: %d\n", error);
- goto err_free_mem;
+ dev_err(dev, "Failed to request IRQ: %d\n", error);
+ return error;
}
/* Disable the IRQ, we'll enable it in wac_i2c_open() */
error = input_register_device(wac_i2c->input);
if (error) {
- dev_err(&client->dev,
- "Failed to register input device, error: %d\n", error);
- goto err_free_irq;
+ dev_err(dev, "Failed to register input device: %d\n", error);
+ return error;
}
- i2c_set_clientdata(client, wac_i2c);
- return 0;
-
-err_free_irq:
- free_irq(client->irq, wac_i2c);
-err_free_mem:
- input_free_device(input);
- kfree(wac_i2c);
-
- return error;
-}
-
-static int wacom_i2c_remove(struct i2c_client *client)
-{
- struct wacom_i2c *wac_i2c = i2c_get_clientdata(client);
-
- free_irq(client->irq, wac_i2c);
- input_unregister_device(wac_i2c->input);
- kfree(wac_i2c);
-
return 0;
}
},
.probe = wacom_i2c_probe,
- .remove = wacom_i2c_remove,
.id_table = wacom_i2c_id,
};
module_i2c_driver(wacom_i2c_driver);
error = request_threaded_irq(wm831x_ts->data_irq,
NULL, wm831x_ts_data_irq,
- irqf | IRQF_ONESHOT,
+ irqf | IRQF_ONESHOT | IRQF_NO_AUTOEN,
"Touchscreen data", wm831x_ts);
if (error) {
dev_err(&pdev->dev, "Failed to request data IRQ %d: %d\n",
wm831x_ts->data_irq, error);
goto err_alloc;
}
- disable_irq(wm831x_ts->data_irq);
if (pdata && pdata->pd_irqf)
irqf = pdata->pd_irqf;
return -EINVAL;
}
- irq_set_status_flags(client->irq, IRQ_NOAUTOEN);
error = devm_request_threaded_irq(&client->dev, client->irq,
NULL, zinitix_ts_irq_handler,
- IRQF_ONESHOT, client->name, bt541);
+ IRQF_ONESHOT | IRQF_NO_AUTOEN,
+ client->name, bt541);
if (error) {
dev_err(&client->dev, "Failed to request IRQ: %d\n", error);
return error;
unsigned long attrs)
{
struct iommu_domain *domain = iommu_get_dma_domain(dev);
- struct iommu_dma_cookie *cookie = domain->iova_cookie;
- struct iova_domain *iovad = &cookie->iovad;
phys_addr_t phys;
phys = iommu_iova_to_phys(domain, dma_addr);
__iommu_dma_unmap(dev, dma_addr, size);
if (unlikely(is_swiotlb_buffer(phys)))
- swiotlb_tbl_unmap_single(dev, phys, size,
- iova_align(iovad, size), dir, attrs);
+ swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
}
static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
}
iova = __iommu_dma_map(dev, phys, aligned_size, prot, dma_mask);
- if ((iova == DMA_MAPPING_ERROR) && is_swiotlb_buffer(phys))
- swiotlb_tbl_unmap_single(dev, phys, org_size,
- aligned_size, dir, attrs);
-
+ if (iova == DMA_MAPPING_ERROR && is_swiotlb_buffer(phys))
+ swiotlb_tbl_unmap_single(dev, phys, org_size, dir, attrs);
return iova;
}
return pages;
}
-/**
- * iommu_dma_alloc_remap - Allocate and map a buffer contiguous in IOVA space
- * @dev: Device to allocate memory for. Must be a real device
- * attached to an iommu_dma_domain
- * @size: Size of buffer in bytes
- * @dma_handle: Out argument for allocated DMA handle
- * @gfp: Allocation flags
- * @prot: pgprot_t to use for the remapped mapping
- * @attrs: DMA attributes for this allocation
- *
- * If @size is less than PAGE_SIZE, then a full CPU page will be allocated,
+/*
+ * If size is less than PAGE_SIZE, then a full CPU page will be allocated,
* but an IOMMU which supports smaller pages might not map the whole thing.
- *
- * Return: Mapped virtual address, or NULL on failure.
*/
-static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t gfp, pgprot_t prot,
+static struct page **__iommu_dma_alloc_noncontiguous(struct device *dev,
+ size_t size, struct sg_table *sgt, gfp_t gfp, pgprot_t prot,
unsigned long attrs)
{
struct iommu_domain *domain = iommu_get_dma_domain(dev);
int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs);
unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap;
struct page **pages;
- struct sg_table sgt;
dma_addr_t iova;
- void *vaddr;
-
- *dma_handle = DMA_MAPPING_ERROR;
if (static_branch_unlikely(&iommu_deferred_attach_enabled) &&
iommu_deferred_attach(dev, domain))
if (!iova)
goto out_free_pages;
- if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, GFP_KERNEL))
+ if (sg_alloc_table_from_pages(sgt, pages, count, 0, size, GFP_KERNEL))
goto out_free_iova;
if (!(ioprot & IOMMU_CACHE)) {
struct scatterlist *sg;
int i;
- for_each_sg(sgt.sgl, sg, sgt.orig_nents, i)
+ for_each_sg(sgt->sgl, sg, sgt->orig_nents, i)
arch_dma_prep_coherent(sg_page(sg), sg->length);
}
- if (iommu_map_sg_atomic(domain, iova, sgt.sgl, sgt.orig_nents, ioprot)
+ if (iommu_map_sg_atomic(domain, iova, sgt->sgl, sgt->orig_nents, ioprot)
< size)
goto out_free_sg;
+ sgt->sgl->dma_address = iova;
+ sgt->sgl->dma_length = size;
+ return pages;
+
+out_free_sg:
+ sg_free_table(sgt);
+out_free_iova:
+ iommu_dma_free_iova(cookie, iova, size, NULL);
+out_free_pages:
+ __iommu_dma_free_pages(pages, count);
+ return NULL;
+}
+
+static void *iommu_dma_alloc_remap(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, gfp_t gfp, pgprot_t prot,
+ unsigned long attrs)
+{
+ struct page **pages;
+ struct sg_table sgt;
+ void *vaddr;
+
+ pages = __iommu_dma_alloc_noncontiguous(dev, size, &sgt, gfp, prot,
+ attrs);
+ if (!pages)
+ return NULL;
+ *dma_handle = sgt.sgl->dma_address;
+ sg_free_table(&sgt);
vaddr = dma_common_pages_remap(pages, size, prot,
__builtin_return_address(0));
if (!vaddr)
goto out_unmap;
-
- *dma_handle = iova;
- sg_free_table(&sgt);
return vaddr;
out_unmap:
- __iommu_dma_unmap(dev, iova, size);
-out_free_sg:
- sg_free_table(&sgt);
-out_free_iova:
- iommu_dma_free_iova(cookie, iova, size, NULL);
-out_free_pages:
- __iommu_dma_free_pages(pages, count);
+ __iommu_dma_unmap(dev, *dma_handle, size);
+ __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
return NULL;
}
+#ifdef CONFIG_DMA_REMAP
+static struct sg_table *iommu_dma_alloc_noncontiguous(struct device *dev,
+ size_t size, enum dma_data_direction dir, gfp_t gfp,
+ unsigned long attrs)
+{
+ struct dma_sgt_handle *sh;
+
+ sh = kmalloc(sizeof(*sh), gfp);
+ if (!sh)
+ return NULL;
+
+ sh->pages = __iommu_dma_alloc_noncontiguous(dev, size, &sh->sgt, gfp,
+ PAGE_KERNEL, attrs);
+ if (!sh->pages) {
+ kfree(sh);
+ return NULL;
+ }
+ return &sh->sgt;
+}
+
+static void iommu_dma_free_noncontiguous(struct device *dev, size_t size,
+ struct sg_table *sgt, enum dma_data_direction dir)
+{
+ struct dma_sgt_handle *sh = sgt_handle(sgt);
+
+ __iommu_dma_unmap(dev, sgt->sgl->dma_address, size);
+ __iommu_dma_free_pages(sh->pages, PAGE_ALIGN(size) >> PAGE_SHIFT);
+ sg_free_table(&sh->sgt);
+}
+#endif /* CONFIG_DMA_REMAP */
+
static void iommu_dma_sync_single_for_cpu(struct device *dev,
dma_addr_t dma_handle, size_t size, enum dma_data_direction dir)
{
arch_sync_dma_for_cpu(phys, size, dir);
if (is_swiotlb_buffer(phys))
- swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_CPU);
+ swiotlb_sync_single_for_cpu(dev, phys, size, dir);
}
static void iommu_dma_sync_single_for_device(struct device *dev,
phys = iommu_iova_to_phys(iommu_get_dma_domain(dev), dma_handle);
if (is_swiotlb_buffer(phys))
- swiotlb_tbl_sync_single(dev, phys, size, dir, SYNC_FOR_DEVICE);
+ swiotlb_sync_single_for_device(dev, phys, size, dir);
if (!dev_is_dma_coherent(dev))
arch_sync_dma_for_device(phys, size, dir);
arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir);
if (is_swiotlb_buffer(sg_phys(sg)))
- swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length,
- dir, SYNC_FOR_CPU);
+ swiotlb_sync_single_for_cpu(dev, sg_phys(sg),
+ sg->length, dir);
}
}
for_each_sg(sgl, sg, nelems, i) {
if (is_swiotlb_buffer(sg_phys(sg)))
- swiotlb_tbl_sync_single(dev, sg_phys(sg), sg->length,
- dir, SYNC_FOR_DEVICE);
+ swiotlb_sync_single_for_device(dev, sg_phys(sg),
+ sg->length, dir);
if (!dev_is_dma_coherent(dev))
arch_sync_dma_for_device(sg_phys(sg), sg->length, dir);
.free = iommu_dma_free,
.alloc_pages = dma_common_alloc_pages,
.free_pages = dma_common_free_pages,
+#ifdef CONFIG_DMA_REMAP
+ .alloc_noncontiguous = iommu_dma_alloc_noncontiguous,
+ .free_noncontiguous = iommu_dma_free_noncontiguous,
+#endif
.mmap = iommu_dma_mmap,
.get_sgtable = iommu_dma_get_sgtable,
.map_page = iommu_dma_map_page,
break;
case X86_IRQ_ALLOC_TYPE_PCI_MSI:
case X86_IRQ_ALLOC_TYPE_PCI_MSIX:
- set_msi_sid(irte, msi_desc_to_pci_dev(info->desc));
+ set_msi_sid(irte,
+ pci_real_dma_dev(msi_desc_to_pci_dev(info->desc)));
break;
default:
BUG_ON(1);
static const struct proc_ops empty_proc_ops = {
.proc_read = empty_read,
+ .proc_lseek = default_llseek,
};
// ---------------------------------------------------------------------------
#include "features.h"
#include <linux/blkdev.h>
+#include <linux/pagemap.h>
#include <linux/debugfs.h>
#include <linux/genhd.h>
#include <linux/idr.h>
Please send bug reports and support requests to <luc@saillard.org>.
The decompression routines have been implemented by reverse-engineering the
Nemosoft binary pwcx module. Caveat emptor.
-
-
- vim: set ts=8:
*/
#include <asm/current.h>
* Laurent Pinchart (laurent.pinchart@ideasonboard.com)
*/
+#include <linux/dma-mapping.h>
+#include <linux/highmem.h>
#include <linux/kernel.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/usb.h>
+#include <linux/usb/hcd.h>
#include <linux/videodev2.h>
#include <linux/vmalloc.h>
#include <linux/wait.h>
return data[0];
}
+static inline enum dma_data_direction uvc_stream_dir(
+ struct uvc_streaming *stream)
+{
+ if (stream->type == V4L2_BUF_TYPE_VIDEO_CAPTURE)
+ return DMA_FROM_DEVICE;
+ else
+ return DMA_TO_DEVICE;
+}
+
+static inline struct device *uvc_stream_to_dmadev(struct uvc_streaming *stream)
+{
+ return bus_to_hcd(stream->dev->udev->bus)->self.sysdev;
+}
+
+static int uvc_submit_urb(struct uvc_urb *uvc_urb, gfp_t mem_flags)
+{
+ /* Sync DMA. */
+ dma_sync_sgtable_for_device(uvc_stream_to_dmadev(uvc_urb->stream),
+ uvc_urb->sgt,
+ uvc_stream_dir(uvc_urb->stream));
+ return usb_submit_urb(uvc_urb->urb, mem_flags);
+}
+
/*
* uvc_video_decode_data_work: Asynchronous memcpy processing
*
uvc_queue_buffer_release(op->buf);
}
- ret = usb_submit_urb(uvc_urb->urb, GFP_KERNEL);
+ ret = uvc_submit_urb(uvc_urb, GFP_KERNEL);
if (ret < 0)
dev_err(&uvc_urb->stream->intf->dev,
"Failed to resubmit video URB (%d).\n", ret);
/* Re-initialise the URB async work. */
uvc_urb->async_operations = 0;
+ /* Sync DMA and invalidate vmap range. */
+ dma_sync_sgtable_for_cpu(uvc_stream_to_dmadev(uvc_urb->stream),
+ uvc_urb->sgt, uvc_stream_dir(stream));
+ invalidate_kernel_vmap_range(uvc_urb->buffer,
+ uvc_urb->stream->urb_size);
+
/*
* Process the URB headers, and optionally queue expensive memcpy tasks
* to be deferred to a work queue.
/* If no async work is needed, resubmit the URB immediately. */
if (!uvc_urb->async_operations) {
- ret = usb_submit_urb(uvc_urb->urb, GFP_ATOMIC);
+ ret = uvc_submit_urb(uvc_urb, GFP_ATOMIC);
if (ret < 0)
dev_err(&stream->intf->dev,
"Failed to resubmit video URB (%d).\n", ret);
*/
static void uvc_free_urb_buffers(struct uvc_streaming *stream)
{
+ struct device *dma_dev = uvc_stream_to_dmadev(stream);
struct uvc_urb *uvc_urb;
for_each_uvc_urb(uvc_urb, stream) {
if (!uvc_urb->buffer)
continue;
-#ifndef CONFIG_DMA_NONCOHERENT
- usb_free_coherent(stream->dev->udev, stream->urb_size,
- uvc_urb->buffer, uvc_urb->dma);
-#else
- kfree(uvc_urb->buffer);
-#endif
+ dma_vunmap_noncontiguous(dma_dev, uvc_urb->buffer);
+ dma_free_noncontiguous(dma_dev, stream->urb_size, uvc_urb->sgt,
+ uvc_stream_dir(stream));
+
uvc_urb->buffer = NULL;
+ uvc_urb->sgt = NULL;
}
stream->urb_size = 0;
}
+static bool uvc_alloc_urb_buffer(struct uvc_streaming *stream,
+ struct uvc_urb *uvc_urb, gfp_t gfp_flags)
+{
+ struct device *dma_dev = uvc_stream_to_dmadev(stream);
+
+ uvc_urb->sgt = dma_alloc_noncontiguous(dma_dev, stream->urb_size,
+ uvc_stream_dir(stream),
+ gfp_flags, 0);
+ if (!uvc_urb->sgt)
+ return false;
+ uvc_urb->dma = uvc_urb->sgt->sgl->dma_address;
+
+ uvc_urb->buffer = dma_vmap_noncontiguous(dma_dev, stream->urb_size,
+ uvc_urb->sgt);
+ if (!uvc_urb->buffer) {
+ dma_free_noncontiguous(dma_dev, stream->urb_size,
+ uvc_urb->sgt,
+ uvc_stream_dir(stream));
+ uvc_urb->sgt = NULL;
+ return false;
+ }
+
+ return true;
+}
+
/*
* Allocate transfer buffers. This function can be called with buffers
* already allocated when resuming from suspend, in which case it will
/* Retry allocations until one succeed. */
for (; npackets > 1; npackets /= 2) {
+ stream->urb_size = psize * npackets;
+
for (i = 0; i < UVC_URBS; ++i) {
struct uvc_urb *uvc_urb = &stream->uvc_urb[i];
- stream->urb_size = psize * npackets;
-#ifndef CONFIG_DMA_NONCOHERENT
- uvc_urb->buffer = usb_alloc_coherent(
- stream->dev->udev, stream->urb_size,
- gfp_flags | __GFP_NOWARN, &uvc_urb->dma);
-#else
- uvc_urb->buffer =
- kmalloc(stream->urb_size, gfp_flags | __GFP_NOWARN);
-#endif
- if (!uvc_urb->buffer) {
+ if (!uvc_alloc_urb_buffer(stream, uvc_urb, gfp_flags)) {
uvc_free_urb_buffers(stream);
break;
}
urb->context = uvc_urb;
urb->pipe = usb_rcvisocpipe(stream->dev->udev,
ep->desc.bEndpointAddress);
-#ifndef CONFIG_DMA_NONCOHERENT
urb->transfer_flags = URB_ISO_ASAP | URB_NO_TRANSFER_DMA_MAP;
urb->transfer_dma = uvc_urb->dma;
-#else
- urb->transfer_flags = URB_ISO_ASAP;
-#endif
urb->interval = ep->desc.bInterval;
urb->transfer_buffer = uvc_urb->buffer;
urb->complete = uvc_video_complete;
usb_fill_bulk_urb(urb, stream->dev->udev, pipe, uvc_urb->buffer,
size, uvc_video_complete, uvc_urb);
-#ifndef CONFIG_DMA_NONCOHERENT
urb->transfer_flags = URB_NO_TRANSFER_DMA_MAP;
urb->transfer_dma = uvc_urb->dma;
-#endif
uvc_urb->urb = urb;
}
/* Submit the URBs. */
for_each_uvc_urb(uvc_urb, stream) {
- ret = usb_submit_urb(uvc_urb->urb, gfp_flags);
+ ret = uvc_submit_urb(uvc_urb, gfp_flags);
if (ret < 0) {
dev_err(&stream->intf->dev,
"Failed to submit URB %u (%d).\n",
*/
struct gpio_desc;
+struct sg_table;
struct uvc_device;
/* TODO: Put the most frequently accessed fields at the beginning of
* @urb: the URB described by this context structure
* @stream: UVC streaming context
* @buffer: memory storage for the URB
- * @dma: DMA coherent addressing for the urb_buffer
+ * @dma: Allocated DMA handle
+ * @sgt: sgt_table with the urb locations in memory
* @async_operations: counter to indicate the number of copy operations
* @copy_operations: work descriptors for asynchronous copy operations
* @work: work queue entry for asynchronous decode
char *buffer;
dma_addr_t dma;
+ struct sg_table *sgt;
unsigned int async_operations;
struct uvc_copy_op copy_operations[UVC_MAX_PACKETS];
# SPDX-License-Identifier: GPL-2.0-only
-ti-emif-asm-offsets.h
+/ti-emif-asm-offsets.h
fallthrough;
case 'K':
result *= 1024;
+ break;
case '\0':
break;
default:
int mode;
};
-struct ubi_wl_entry;
-
/**
* struct ubi_debug_info - debugging information for an UBI device.
*
int i;
int putidx;
+ cdev->tx_skb = NULL;
+
/* Generate ID field for TX buffer Element */
/* Common to all supported M_CAN versions */
if (cf->can_id & CAN_EFF_FLAG) {
tx_work);
m_can_tx_handler(cdev);
- cdev->tx_skb = NULL;
}
static netdev_tx_t m_can_start_xmit(struct sk_buff *skb,
priv->force_quit = 1;
free_irq(spi->irq, priv);
- destroy_workqueue(priv->wq);
- priv->wq = NULL;
mutex_lock(&priv->mcp_lock);
goto out_close;
}
- priv->wq = alloc_workqueue("mcp251x_wq", WQ_FREEZABLE | WQ_MEM_RECLAIM,
- 0);
- if (!priv->wq) {
- ret = -ENOMEM;
- goto out_clean;
- }
- INIT_WORK(&priv->tx_work, mcp251x_tx_work_handler);
- INIT_WORK(&priv->restart_work, mcp251x_restart_work_handler);
-
ret = mcp251x_hw_wake(spi);
if (ret)
- goto out_free_wq;
+ goto out_free_irq;
ret = mcp251x_setup(net, spi);
if (ret)
- goto out_free_wq;
+ goto out_free_irq;
ret = mcp251x_set_normal_mode(spi);
if (ret)
- goto out_free_wq;
+ goto out_free_irq;
can_led_event(net, CAN_LED_EVENT_OPEN);
return 0;
-out_free_wq:
- destroy_workqueue(priv->wq);
-out_clean:
+out_free_irq:
free_irq(spi->irq, priv);
mcp251x_hw_sleep(spi);
out_close:
if (ret)
goto out_clk;
+ priv->wq = alloc_workqueue("mcp251x_wq", WQ_FREEZABLE | WQ_MEM_RECLAIM,
+ 0);
+ if (!priv->wq) {
+ ret = -ENOMEM;
+ goto out_clk;
+ }
+ INIT_WORK(&priv->tx_work, mcp251x_tx_work_handler);
+ INIT_WORK(&priv->restart_work, mcp251x_restart_work_handler);
+
priv->spi = spi;
mutex_init(&priv->mcp_lock);
return 0;
error_probe:
+ destroy_workqueue(priv->wq);
+ priv->wq = NULL;
mcp251x_power_enable(priv->power, 0);
out_clk:
mcp251x_power_enable(priv->power, 0);
+ destroy_workqueue(priv->wq);
+ priv->wq = NULL;
+
clk_disable_unprepare(priv->clk);
free_candev(net);
clk = devm_clk_get(&spi->dev, NULL);
if (IS_ERR(clk))
- dev_err_probe(&spi->dev, PTR_ERR(clk),
- "Failed to get Oscillator (clock)!\n");
+ return dev_err_probe(&spi->dev, PTR_ERR(clk),
+ "Failed to get Oscillator (clock)!\n");
freq = clk_get_rate(clk);
/* Sanity check */
err = mcp251xfd_register(priv);
if (err)
- goto out_free_candev;
+ goto out_can_rx_offload_del;
return 0;
+ out_can_rx_offload_del:
+ can_rx_offload_del(&priv->offload);
out_free_candev:
spi->max_speed_hz = priv->spi_max_speed_hz_orig;
int i, ret = 0;
ksz8 = devm_kzalloc(&spi->dev, sizeof(struct ksz8), GFP_KERNEL);
+ if (!ksz8)
+ return -ENOMEM;
+
ksz8->priv = spi;
dev = ksz_switch_alloc(&spi->dev, ksz8);
int i;
ksz8 = devm_kzalloc(&mdiodev->dev, sizeof(struct ksz8), GFP_KERNEL);
+ if (!ksz8)
+ return -ENOMEM;
+
ksz8->priv = mdiodev;
dev = ksz_switch_alloc(&mdiodev->dev, ksz8);
if (!dev)
- return -EINVAL;
+ return -ENOMEM;
for (i = 0; i < ARRAY_SIZE(ksz8863_regmap_config); i++) {
rc = ksz8863_regmap_config[i];
module_init(starfire_init);
module_exit(starfire_cleanup);
-
-
-/*
- * Local variables:
- * c-basic-offset: 8
- * tab-width: 8
- * End:
- */
module_init(atarilance_module_init);
module_exit(atarilance_module_exit);
#endif /* MODULE */
-
-
-/*
- * Local variables:
- * c-indent-level: 4
- * tab-width: 4
- * End:
- */
module_init(pcnet32_init_module);
module_exit(pcnet32_cleanup_module);
-
-/*
- * Local variables:
- * c-indent-level: 4
- * tab-width: 8
- * End:
- */
module_pci_driver(alx_driver);
MODULE_DEVICE_TABLE(pci, alx_pci_tbl);
MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>");
-MODULE_AUTHOR("Qualcomm Corporation, <nic-devel@qualcomm.com>");
+MODULE_AUTHOR("Qualcomm Corporation");
MODULE_DESCRIPTION(
"Qualcomm Atheros(R) AR816x/AR817x PCI-E Ethernet Network Driver");
MODULE_LICENSE("GPL");
MODULE_DEVICE_TABLE(pci, atl1c_pci_tbl);
MODULE_AUTHOR("Jie Yang");
-MODULE_AUTHOR("Qualcomm Atheros Inc., <nic-devel@qualcomm.com>");
+MODULE_AUTHOR("Qualcomm Atheros Inc.");
MODULE_DESCRIPTION("Qualcomm Atheros 100/1000M Ethernet Network Driver");
MODULE_LICENSE("GPL");
data[i + 3] = data[i + BNX2_VPD_LEN];
}
- i = pci_vpd_find_tag(data, 0, BNX2_VPD_LEN, PCI_VPD_LRDT_RO_DATA);
+ i = pci_vpd_find_tag(data, BNX2_VPD_LEN, PCI_VPD_LRDT_RO_DATA);
if (i < 0)
goto vpd_done;
/* VPD RO tag should be first tag after identifier string, hence
* we should be able to find it in first BNX2X_VPD_LEN chars
*/
- i = pci_vpd_find_tag(vpd_start, 0, BNX2X_VPD_LEN,
- PCI_VPD_LRDT_RO_DATA);
+ i = pci_vpd_find_tag(vpd_start, BNX2X_VPD_LEN, PCI_VPD_LRDT_RO_DATA);
if (i < 0)
goto out_not_found;
return 0;
}
- err = -EIO;
/* verify ari is enabled */
if (!pci_ari_enabled(bp->pdev->bus)) {
BNX2X_ERR("ARI not supported (check pci bridge ARI forwarding), SRIOV can not be enabled\n");
goto exit;
}
- i = pci_vpd_find_tag(vpd_data, 0, vpd_size, PCI_VPD_LRDT_RO_DATA);
+ i = pci_vpd_find_tag(vpd_data, vpd_size, PCI_VPD_LRDT_RO_DATA);
if (i < 0) {
netdev_err(bp->dev, "VPD READ-Only not found\n");
goto exit;
if (!buf)
return -ENOMEM;
- i = pci_vpd_find_tag((u8 *)buf, 0, len, PCI_VPD_LRDT_RO_DATA);
+ i = pci_vpd_find_tag((u8 *)buf, len, PCI_VPD_LRDT_RO_DATA);
if (i > 0) {
j = pci_vpd_lrdt_size(&((u8 *)buf)[i]);
if (j < 0)
if (!vpd_data)
goto out_no_vpd;
- i = pci_vpd_find_tag(vpd_data, 0, vpdlen, PCI_VPD_LRDT_RO_DATA);
+ i = pci_vpd_find_tag(vpd_data, vpdlen, PCI_VPD_LRDT_RO_DATA);
if (i < 0)
goto out_not_found;
}
}
- /* Check for BNAD_CF_DIM_ENABLED, does not eleminate a race */
+ /* Check for BNAD_CF_DIM_ENABLED, does not eliminate a race */
if (test_bit(BNAD_RF_DIM_TIMER_RUNNING, &bnad->run_flags))
mod_timer(&bnad->dim_timer,
jiffies + msecs_to_jiffies(BNAD_DIM_TIMER_FREQ));
{
struct net_device *netdev = dev_get_drvdata(dev);
struct macb *bp = netdev_priv(netdev);
- struct macb_queue *queue = bp->queues;
+ struct macb_queue *queue;
unsigned long flags;
unsigned int q;
int err;
{
struct net_device *netdev = dev_get_drvdata(dev);
struct macb *bp = netdev_priv(netdev);
- struct macb_queue *queue = bp->queues;
+ struct macb_queue *queue;
unsigned long flags;
unsigned int q;
int err;
spin_lock_bh(&eosw_txq->lock);
if (tc != FW_SCHED_CLS_NONE) {
if (eosw_txq->state != CXGB4_EO_STATE_CLOSED)
- goto out_unlock;
+ goto out_free_skb;
next_state = CXGB4_EO_STATE_FLOWC_OPEN_SEND;
} else {
if (eosw_txq->state != CXGB4_EO_STATE_ACTIVE)
- goto out_unlock;
+ goto out_free_skb;
next_state = CXGB4_EO_STATE_FLOWC_CLOSE_SEND;
}
eosw_txq_flush_pending_skbs(eosw_txq);
ret = eosw_txq_enqueue(eosw_txq, skb);
- if (ret) {
- dev_consume_skb_any(skb);
- goto out_unlock;
- }
+ if (ret)
+ goto out_free_skb;
eosw_txq->state = next_state;
eosw_txq->flowc_idx = eosw_txq->pidx;
eosw_txq_advance(eosw_txq, 1);
ethofld_xmit(dev, eosw_txq);
-out_unlock:
+ spin_unlock_bh(&eosw_txq->lock);
+ return 0;
+
+out_free_skb:
+ dev_consume_skb_any(skb);
spin_unlock_bh(&eosw_txq->lock);
return ret;
}
if (id_len > ID_LEN)
id_len = ID_LEN;
- i = pci_vpd_find_tag(vpd, 0, VPD_LEN, PCI_VPD_LRDT_RO_DATA);
+ i = pci_vpd_find_tag(vpd, VPD_LEN, PCI_VPD_LRDT_RO_DATA);
if (i < 0) {
dev_err(adapter->pdev_dev, "missing VPD-R section\n");
ret = -EINVAL;
return err;
}
-static inline void enic_queue_wq_skb(struct enic *enic,
+static inline int enic_queue_wq_skb(struct enic *enic,
struct vnic_wq *wq, struct sk_buff *skb)
{
unsigned int mss = skb_shinfo(skb)->gso_size;
wq->to_use = buf->next;
dev_kfree_skb(skb);
}
+ return err;
}
/* netif_tx_lock held, process context with BHs disabled, or BH */
return NETDEV_TX_BUSY;
}
- enic_queue_wq_skb(enic, wq, skb);
+ if (enic_queue_wq_skb(enic, wq, skb))
+ goto error;
if (vnic_wq_desc_avail(wq) < MAX_SKB_FRAGS + ENIC_DESC_MAX_SPLITS)
netif_tx_stop_queue(txq);
if (!netdev_xmit_more() || netif_xmit_stopped(txq))
vnic_wq_doorbell(wq);
+error:
spin_unlock(&enic->wq_lock[txq_map]);
return NETDEV_TX_OK;
if (h->ae_algo->ops->set_timer_task)
h->ae_algo->ops->set_timer_task(priv->ae_handle, false);
- netif_tx_stop_all_queues(netdev);
netif_carrier_off(netdev);
+ netif_tx_disable(netdev);
hns3_nic_net_down(netdev);
* and it is udp packet, which has a dest port as the IANA assigned.
* the hardware is expected to do the checksum offload, but the
* hardware will not do the checksum offload when udp dest port is
- * 4789 or 6081.
+ * 4789, 4790 or 6081.
*/
static bool hns3_tunnel_csum_bug(struct sk_buff *skb)
{
if (!(!skb->encapsulation &&
(l4.udp->dest == htons(IANA_VXLAN_UDP_PORT) ||
- l4.udp->dest == htons(GENEVE_UDP_PORT))))
+ l4.udp->dest == htons(GENEVE_UDP_PORT) ||
+ l4.udp->dest == htons(4790))))
return false;
skb_checksum_help(skb);
struct hns3_nic_priv *priv = netdev_priv(kinfo->netdev);
int ret = 0;
+ if (!test_bit(HNS3_NIC_STATE_INITED, &priv->state)) {
+ netdev_err(kinfo->netdev, "device is not initialized yet\n");
+ return -EFAULT;
+ }
+
clear_bit(HNS3_NIC_STATE_RESETTING, &priv->state);
if (netif_running(kinfo->netdev)) {
/* configure IGU,EGU error interrupts */
hclge_cmd_setup_basic_desc(&desc, HCLGE_IGU_COMMON_INT_EN, false);
+ desc.data[0] = cpu_to_le32(HCLGE_IGU_ERR_INT_TYPE);
if (en)
- desc.data[0] = cpu_to_le32(HCLGE_IGU_ERR_INT_EN);
+ desc.data[0] |= cpu_to_le32(HCLGE_IGU_ERR_INT_EN);
desc.data[1] = cpu_to_le32(HCLGE_IGU_ERR_INT_EN_MASK);
#define HCLGE_TQP_ECC_ERR_INT_EN_MASK 0x0FFF
#define HCLGE_MSIX_SRAM_ECC_ERR_INT_EN_MASK 0x0F000000
#define HCLGE_MSIX_SRAM_ECC_ERR_INT_EN 0x0F000000
-#define HCLGE_IGU_ERR_INT_EN 0x0000066F
+#define HCLGE_IGU_ERR_INT_EN 0x0000000F
+#define HCLGE_IGU_ERR_INT_TYPE 0x00000660
#define HCLGE_IGU_ERR_INT_EN_MASK 0x000F
#define HCLGE_IGU_TNL_ERR_INT_EN 0x0002AABF
#define HCLGE_IGU_TNL_ERR_INT_EN_MASK 0x003F
struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
enum hnae3_reset_type reset_level;
+ /* reset request will not be set during reset, so clear
+ * pending reset request to avoid unnecessary reset
+ * caused by the same reason.
+ */
+ hclge_get_reset_level(ae_dev, &hdev->reset_request);
+
/* if default_reset_request has a higher level reset request,
* it should be handled as soon as possible. since some errors
* need this kind of reset to fix.
unsigned long advertising;
unsigned long supported;
unsigned long send_data;
- u8 msg_data[10];
+ u8 msg_data[10] = {};
u8 dest_vfid;
advertising = hdev->hw.mac.advertising[0];
if (!phydev)
return;
+ phy_loopback(phydev, false);
+
phy_start(phydev);
}
return !!(pf->flags & I40E_FLAG_DISABLE_FW_LLDP);
}
-void i40e_set_lldp_forwarding(struct i40e_pf *pf, bool enable);
#ifdef CONFIG_I40E_DCB
void i40e_dcbnl_flush_apps(struct i40e_pf *pf,
struct i40e_dcbx_config *old_cfg,
I40E_PHY_TYPE_25GBASE_LR = 0x22,
I40E_PHY_TYPE_25GBASE_AOC = 0x23,
I40E_PHY_TYPE_25GBASE_ACC = 0x24,
- I40E_PHY_TYPE_2_5GBASE_T = 0x30,
- I40E_PHY_TYPE_5GBASE_T = 0x31,
+ I40E_PHY_TYPE_2_5GBASE_T = 0x26,
+ I40E_PHY_TYPE_5GBASE_T = 0x27,
+ I40E_PHY_TYPE_2_5GBASE_T_LINK_STATUS = 0x30,
+ I40E_PHY_TYPE_5GBASE_T_LINK_STATUS = 0x31,
I40E_PHY_TYPE_MAX,
I40E_PHY_TYPE_NOT_SUPPORTED_HIGH_TEMP = 0xFD,
I40E_PHY_TYPE_EMPTY = 0xFE,
clear_bit(__I40E_CLIENT_INSTANCE_OPENED,
&cdev->state);
i40e_client_del_instance(pf);
+ return;
}
}
}
break;
case I40E_PHY_TYPE_100BASE_TX:
case I40E_PHY_TYPE_1000BASE_T:
- case I40E_PHY_TYPE_2_5GBASE_T:
- case I40E_PHY_TYPE_5GBASE_T:
+ case I40E_PHY_TYPE_2_5GBASE_T_LINK_STATUS:
+ case I40E_PHY_TYPE_5GBASE_T_LINK_STATUS:
case I40E_PHY_TYPE_10GBASE_T:
media = I40E_MEDIA_TYPE_BASET;
break;
10000baseT_Full);
break;
case I40E_PHY_TYPE_10GBASE_T:
- case I40E_PHY_TYPE_5GBASE_T:
- case I40E_PHY_TYPE_2_5GBASE_T:
+ case I40E_PHY_TYPE_5GBASE_T_LINK_STATUS:
+ case I40E_PHY_TYPE_2_5GBASE_T_LINK_STATUS:
case I40E_PHY_TYPE_1000BASE_T:
case I40E_PHY_TYPE_100BASE_TX:
ethtool_link_ksettings_add_link_mode(ks, supported, Autoneg);
memset(&config, 0, sizeof(config));
config.phy_type = abilities.phy_type;
- config.abilities = abilities.abilities;
+ config.abilities = abilities.abilities |
+ I40E_AQ_PHY_ENABLE_ATOMIC_LINK;
config.phy_type_ext = abilities.phy_type_ext;
config.link_speed = abilities.link_speed;
config.eee_capability = abilities.eee_capability;
i40e_aq_cfg_lldp_mib_change_event(&pf->hw, false, NULL);
i40e_aq_stop_lldp(&pf->hw, true, false, NULL);
} else {
- i40e_set_lldp_forwarding(pf, false);
status = i40e_aq_start_lldp(&pf->hw, false, NULL);
if (status) {
adq_err = pf->hw.aq.asq_last_status;
}
#endif /* CONFIG_I40E_DCB */
-/**
- * i40e_set_lldp_forwarding - set forwarding of lldp frames
- * @pf: PF being configured
- * @enable: if forwarding to OS shall be enabled
- *
- * Toggle forwarding of lldp frames behavior,
- * When passing DCB control from firmware to software
- * lldp frames must be forwarded to the software based
- * lldp agent.
- */
-void i40e_set_lldp_forwarding(struct i40e_pf *pf, bool enable)
-{
- if (pf->lan_vsi == I40E_NO_VSI)
- return;
-
- if (!pf->vsi[pf->lan_vsi])
- return;
-
- /* No need to check the outcome, commands may fail
- * if desired value is already set
- */
- i40e_aq_add_rem_control_packet_filter(&pf->hw, NULL, ETH_P_LLDP,
- I40E_AQC_ADD_CONTROL_PACKET_FLAGS_TX |
- I40E_AQC_ADD_CONTROL_PACKET_FLAGS_IGNORE_MAC,
- pf->vsi[pf->lan_vsi]->seid, 0,
- enable, NULL, NULL);
-
- i40e_aq_add_rem_control_packet_filter(&pf->hw, NULL, ETH_P_LLDP,
- I40E_AQC_ADD_CONTROL_PACKET_FLAGS_RX |
- I40E_AQC_ADD_CONTROL_PACKET_FLAGS_IGNORE_MAC,
- pf->vsi[pf->lan_vsi]->seid, 0,
- enable, NULL, NULL);
-}
-
/**
* i40e_print_link_message - print link up or down
* @vsi: the VSI for which link needs a message
*/
i40e_add_filter_to_drop_tx_flow_control_frames(&pf->hw,
pf->main_vsi_seid);
-#ifdef CONFIG_I40E_DCB
- if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP)
- i40e_set_lldp_forwarding(pf, true);
-#endif /* CONFIG_I40E_DCB */
/* restart the VSIs that were rebuilt and running before the reset */
i40e_pf_unquiesce_all_vsi(pf);
*/
i40e_add_filter_to_drop_tx_flow_control_frames(&pf->hw,
pf->main_vsi_seid);
-#ifdef CONFIG_I40E_DCB
- if (pf->flags & I40E_FLAG_DISABLE_FW_LLDP)
- i40e_set_lldp_forwarding(pf, true);
-#endif /* CONFIG_I40E_DCB */
if ((pf->hw.device_id == I40E_DEV_ID_10G_BASE_T) ||
(pf->hw.device_id == I40E_DEV_ID_10G_BASE_T4))
union i40e_rx_desc *rx_desc)
{
- /* XDP packets use error pointer so abort at this point */
- if (IS_ERR(skb))
- return true;
-
/* ERR_MASK will only have valid bits if EOP set, and
* what we are doing here is actually checking
* I40E_RX_DESC_ERROR_RXE_SHIFT, since it is the zeroth bit in
}
/* exit if we failed to retrieve a buffer */
- if (!skb) {
+ if (!xdp_res && !skb) {
rx_ring->rx_stats.alloc_buff_failed++;
rx_buffer->pagecnt_bias++;
break;
if (i40e_is_non_eop(rx_ring, rx_desc))
continue;
- if (i40e_cleanup_headers(rx_ring, skb, rx_desc)) {
+ if (xdp_res || i40e_cleanup_headers(rx_ring, skb, rx_desc)) {
skb = NULL;
continue;
}
#define I40E_CAP_PHY_TYPE_25GBASE_ACC BIT_ULL(I40E_PHY_TYPE_25GBASE_ACC + \
I40E_PHY_TYPE_OFFSET)
/* Offset for 2.5G/5G PHY Types value to bit number conversion */
-#define I40E_PHY_TYPE_OFFSET2 (-10)
-#define I40E_CAP_PHY_TYPE_2_5GBASE_T BIT_ULL(I40E_PHY_TYPE_2_5GBASE_T + \
- I40E_PHY_TYPE_OFFSET2)
-#define I40E_CAP_PHY_TYPE_5GBASE_T BIT_ULL(I40E_PHY_TYPE_5GBASE_T + \
- I40E_PHY_TYPE_OFFSET2)
+#define I40E_CAP_PHY_TYPE_2_5GBASE_T BIT_ULL(I40E_PHY_TYPE_2_5GBASE_T)
+#define I40E_CAP_PHY_TYPE_5GBASE_T BIT_ULL(I40E_PHY_TYPE_5GBASE_T)
#define I40E_HW_CAP_MAX_GPIO 30
/* Capabilities of a PF or a VF or the whole device */
struct i40e_hw_capabilities {
/* Allow mlxsw thermal zone binding to an external cooling device */
for (i = 0; i < ARRAY_SIZE(mlxsw_thermal_external_allowed_cdev); i++) {
if (strnstr(cdev->type, mlxsw_thermal_external_allowed_cdev[i],
- sizeof(cdev->type)))
+ strlen(cdev->type)))
return 0;
}
u16 erif_index = 0;
int err;
+ /* Add the eRIF */
+ if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) {
+ erif_index = mlxsw_sp_rif_index(rve->mr_vif->rif);
+ err = mr->mr_ops->route_erif_add(mlxsw_sp,
+ rve->mr_route->route_priv,
+ erif_index);
+ if (err)
+ return err;
+ }
+
/* Update the route action, as the new eVIF can be a tunnel or a pimreg
* device which will require updating the action.
*/
rve->mr_route->route_priv,
route_action);
if (err)
- return err;
- }
-
- /* Add the eRIF */
- if (mlxsw_sp_mr_vif_valid(rve->mr_vif)) {
- erif_index = mlxsw_sp_rif_index(rve->mr_vif->rif);
- err = mr->mr_ops->route_erif_add(mlxsw_sp,
- rve->mr_route->route_priv,
- erif_index);
- if (err)
- goto err_route_erif_add;
+ goto err_route_action_update;
}
/* Update the minimum MTU */
return 0;
err_route_min_mtu_update:
- if (mlxsw_sp_mr_vif_valid(rve->mr_vif))
- mr->mr_ops->route_erif_del(mlxsw_sp, rve->mr_route->route_priv,
- erif_index);
-err_route_erif_add:
if (route_action != rve->mr_route->route_action)
mr->mr_ops->route_action_update(mlxsw_sp,
rve->mr_route->route_priv,
rve->mr_route->route_action);
+err_route_action_update:
+ if (mlxsw_sp_mr_vif_valid(rve->mr_vif))
+ mr->mr_ops->route_erif_del(mlxsw_sp, rve->mr_route->route_priv,
+ erif_index);
return err;
}
if (net_ratelimit())
netdev_err(dev, "PCI error (cmd = 0x%04x, status_errs = 0x%04x)\n",
pci_cmd, pci_status_errs);
- /*
- * The recovery sequence below admits a very elaborated explanation:
- * - it seems to work;
- * - I did not see what else could be done;
- * - it makes iop3xx happy.
- *
- * Feel free to adjust to your needs.
- */
- if (pdev->broken_parity_status)
- pci_cmd &= ~PCI_COMMAND_PARITY;
- else
- pci_cmd |= PCI_COMMAND_SERR | PCI_COMMAND_PARITY;
-
- pci_write_config_word(pdev, PCI_COMMAND, pci_cmd);
rtl_schedule_task(tp, RTL_FLAG_TASK_RESET_PENDING);
}
}
/* Get the Read only section */
- ro_start = pci_vpd_find_tag(vpd_data, 0, vpd_size, PCI_VPD_LRDT_RO_DATA);
+ ro_start = pci_vpd_find_tag(vpd_data, vpd_size, PCI_VPD_LRDT_RO_DATA);
if (ro_start < 0) {
netif_err(efx, drv, efx->net_dev, "VPD Read-only not found\n");
return;
}
/* Get the Read only section */
- ro_start = pci_vpd_find_tag(vpd_data, 0, vpd_size, PCI_VPD_LRDT_RO_DATA);
+ ro_start = pci_vpd_find_tag(vpd_data, vpd_size, PCI_VPD_LRDT_RO_DATA);
if (ro_start < 0) {
netif_err(efx, drv, efx->net_dev, "VPD Read-only not found\n");
return;
value &= ~GMAC_PACKET_FILTER_PCF;
value &= ~GMAC_PACKET_FILTER_PM;
value &= ~GMAC_PACKET_FILTER_PR;
+ value &= ~GMAC_PACKET_FILTER_RA;
if (dev->flags & IFF_PROMISC) {
/* VLAN Tag Filter Fail Packets Queuing */
if (hw->vlan_fail_q_en) {
u32 channel, int fifosz, u8 qmode)
{
unsigned int rqs = fifosz / 256 - 1;
- u32 mtl_rx_op, mtl_rx_int;
+ u32 mtl_rx_op;
mtl_rx_op = readl(ioaddr + MTL_CHAN_RX_OP_MODE(channel));
}
writel(mtl_rx_op, ioaddr + MTL_CHAN_RX_OP_MODE(channel));
-
- /* Enable MTL RX overflow */
- mtl_rx_int = readl(ioaddr + MTL_CHAN_INT_CTRL(channel));
- writel(mtl_rx_int | MTL_RX_OVERFLOW_INT_EN,
- ioaddr + MTL_CHAN_INT_CTRL(channel));
}
static void dwmac4_dma_tx_chan_op_mode(void __iomem *ioaddr, int mode,
#define stmmac_clean_desc3(__priv, __args...) \
stmmac_do_void_callback(__priv, mode, clean_desc3, __args)
-struct stmmac_priv;
struct tc_cls_u32_offload;
struct tc_cbs_qopt_offload;
struct flow_cls_offload;
char *name;
clear_bit(__FPE_TASK_SCHED, &priv->fpe_task_state);
+ clear_bit(__FPE_REMOVING, &priv->fpe_task_state);
name = priv->wq_name;
sprintf(name, "%s-fpe", priv->dev->name);
/* To handle GMAC own interrupts */
if ((priv->plat->has_gmac) || xmac) {
int status = stmmac_host_irq_status(priv, priv->hw, &priv->xstats);
- int mtl_status;
if (unlikely(status)) {
/* For LPI we need to save the tx status */
}
for (queue = 0; queue < queues_count; queue++) {
- struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
-
- mtl_status = stmmac_host_mtl_irq_status(priv, priv->hw,
- queue);
- if (mtl_status != -EINVAL)
- status |= mtl_status;
-
- if (status & CORE_IRQ_MTL_RX_OVERFLOW)
- stmmac_set_rx_tail_ptr(priv, priv->ioaddr,
- rx_q->rx_tail_addr,
- queue);
+ status = stmmac_host_mtl_irq_status(priv, priv->hw,
+ queue);
}
/* PCS link status */
iowrite32(0, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET);
/* The inter-EE registers are in the non-adjusted address range */
- iowrite32(0, gsi->virt_raw + GSI_INTER_EE_SRC_CH_IRQ_OFFSET);
- iowrite32(0, gsi->virt_raw + GSI_INTER_EE_SRC_EV_CH_IRQ_OFFSET);
+ iowrite32(0, gsi->virt_raw + GSI_INTER_EE_SRC_CH_IRQ_MSK_OFFSET);
+ iowrite32(0, gsi->virt_raw + GSI_INTER_EE_SRC_EV_CH_IRQ_MSK_OFFSET);
iowrite32(0, gsi->virt + GSI_CNTXT_GSI_IRQ_EN_OFFSET);
}
#define GSI_EE_REG_ADJUST 0x0000d000 /* IPA v4.5+ */
/* The two inter-EE IRQ register offsets are relative to gsi->virt_raw */
-#define GSI_INTER_EE_SRC_CH_IRQ_OFFSET \
- GSI_INTER_EE_N_SRC_CH_IRQ_OFFSET(GSI_EE_AP)
-#define GSI_INTER_EE_N_SRC_CH_IRQ_OFFSET(ee) \
- (0x0000c018 + 0x1000 * (ee))
-
-#define GSI_INTER_EE_SRC_EV_CH_IRQ_OFFSET \
- GSI_INTER_EE_N_SRC_EV_CH_IRQ_OFFSET(GSI_EE_AP)
-#define GSI_INTER_EE_N_SRC_EV_CH_IRQ_OFFSET(ee) \
- (0x0000c01c + 0x1000 * (ee))
+#define GSI_INTER_EE_SRC_CH_IRQ_MSK_OFFSET \
+ GSI_INTER_EE_N_SRC_CH_IRQ_MSK_OFFSET(GSI_EE_AP)
+#define GSI_INTER_EE_N_SRC_CH_IRQ_MSK_OFFSET(ee) \
+ (0x0000c020 + 0x1000 * (ee))
+
+#define GSI_INTER_EE_SRC_EV_CH_IRQ_MSK_OFFSET \
+ GSI_INTER_EE_N_SRC_EV_CH_IRQ_MSK_OFFSET(GSI_EE_AP)
+#define GSI_INTER_EE_N_SRC_EV_CH_IRQ_MSK_OFFSET(ee) \
+ (0x0000c024 + 0x1000 * (ee))
/* All other register offsets are relative to gsi->virt */
}
}
+static int m88e1112_config_init(struct phy_device *phydev)
+{
+ int err;
+
+ err = m88e1011_set_downshift(phydev, 3);
+ if (err < 0)
+ return err;
+
+ return m88e1111_config_init(phydev);
+}
+
+static int m88e1111gbe_config_init(struct phy_device *phydev)
+{
+ int err;
+
+ err = m88e1111_set_downshift(phydev, 3);
+ if (err < 0)
+ return err;
+
+ return m88e1111_config_init(phydev);
+}
+
+static int marvell_1011gbe_config_init(struct phy_device *phydev)
+{
+ int err;
+
+ err = m88e1011_set_downshift(phydev, 3);
+ if (err < 0)
+ return err;
+
+ return marvell_config_init(phydev);
+}
static int m88e1116r_config_init(struct phy_device *phydev)
{
int err;
if (err < 0)
return err;
}
+ err = m88e1011_set_downshift(phydev, 3);
+ if (err < 0)
+ return err;
return m88e1318_config_init(phydev);
}
if (err < 0)
return err;
}
+ err = m88e1111_set_downshift(phydev, 3);
+ if (err < 0)
+ return err;
err = marvell_of_reg_init(phydev);
if (err < 0)
.name = "Marvell 88E1112",
/* PHY_GBIT_FEATURES */
.probe = marvell_probe,
- .config_init = m88e1111_config_init,
+ .config_init = m88e1112_config_init,
.config_aneg = marvell_config_aneg,
.config_intr = marvell_config_intr,
.handle_interrupt = marvell_handle_interrupt,
.name = "Marvell 88E1111",
/* PHY_GBIT_FEATURES */
.probe = marvell_probe,
- .config_init = m88e1111_config_init,
+ .config_init = m88e1111gbe_config_init,
.config_aneg = m88e1111_config_aneg,
.read_status = marvell_read_status,
.config_intr = marvell_config_intr,
.name = "Marvell 88E1111 (Finisar)",
/* PHY_GBIT_FEATURES */
.probe = marvell_probe,
- .config_init = m88e1111_config_init,
+ .config_init = m88e1111gbe_config_init,
.config_aneg = m88e1111_config_aneg,
.read_status = marvell_read_status,
.config_intr = marvell_config_intr,
.driver_data = DEF_MARVELL_HWMON_OPS(m88e1121_hwmon_ops),
/* PHY_GBIT_FEATURES */
.probe = marvell_probe,
- .config_init = marvell_config_init,
+ .config_init = marvell_1011gbe_config_init,
.config_aneg = m88e1121_config_aneg,
.read_status = marvell_read_status,
.config_intr = marvell_config_intr,
.name = "Marvell 88E1240",
/* PHY_GBIT_FEATURES */
.probe = marvell_probe,
- .config_init = m88e1111_config_init,
+ .config_init = m88e1112_config_init,
.config_aneg = marvell_config_aneg,
.config_intr = marvell_config_intr,
.handle_interrupt = marvell_handle_interrupt,
/* PHY_GBIT_FEATURES */
.flags = PHY_POLL_CABLE_TEST,
.probe = marvell_probe,
- .config_init = marvell_config_init,
+ .config_init = marvell_1011gbe_config_init,
.config_aneg = m88e1510_config_aneg,
.read_status = marvell_read_status,
.config_intr = marvell_config_intr,
.probe = marvell_probe,
/* PHY_GBIT_FEATURES */
.flags = PHY_POLL_CABLE_TEST,
- .config_init = marvell_config_init,
+ .config_init = marvell_1011gbe_config_init,
.config_aneg = m88e1510_config_aneg,
.read_status = marvell_read_status,
.config_intr = marvell_config_intr,
/* PHY_GBIT_FEATURES */
.flags = PHY_POLL_CABLE_TEST,
.probe = marvell_probe,
- .config_init = marvell_config_init,
+ .config_init = marvell_1011gbe_config_init,
.config_aneg = m88e6390_config_aneg,
.read_status = marvell_read_status,
.config_intr = marvell_config_intr,
/* PHY_GBIT_FEATURES */
.flags = PHY_POLL_CABLE_TEST,
.probe = marvell_probe,
- .config_init = marvell_config_init,
+ .config_init = marvell_1011gbe_config_init,
.config_aneg = m88e6390_config_aneg,
.read_status = marvell_read_status,
.config_intr = marvell_config_intr,
/* PHY_GBIT_FEATURES */
.flags = PHY_POLL_CABLE_TEST,
.probe = marvell_probe,
- .config_init = marvell_config_init,
+ .config_init = marvell_1011gbe_config_init,
.config_aneg = m88e1510_config_aneg,
.read_status = marvell_read_status,
.config_intr = marvell_config_intr,
.driver_data = DEF_MARVELL_HWMON_OPS(m88e1510_hwmon_ops),
.probe = marvell_probe,
/* PHY_GBIT_FEATURES */
- .config_init = marvell_config_init,
+ .config_init = marvell_1011gbe_config_init,
.config_aneg = m88e1510_config_aneg,
.read_status = marvell_read_status,
.config_intr = marvell_config_intr,
.driver_data = DEF_MARVELL_HWMON_OPS(m88e1510_hwmon_ops),
.probe = marvell_probe,
.features = PHY_GBIT_FIBRE_FEATURES,
- .config_init = marvell_config_init,
+ .config_init = marvell_1011gbe_config_init,
.config_aneg = m88e1510_config_aneg,
.read_status = marvell_read_status,
.config_intr = marvell_config_intr,
{
int i;
- vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL);
- if (!vi->ctrl)
- goto err_ctrl;
+ if (vi->has_cvq) {
+ vi->ctrl = kzalloc(sizeof(*vi->ctrl), GFP_KERNEL);
+ if (!vi->ctrl)
+ goto err_ctrl;
+ } else {
+ vi->ctrl = NULL;
+ }
vi->sq = kcalloc(vi->max_queue_pairs, sizeof(*vi->sq), GFP_KERNEL);
if (!vi->sq)
goto err_sq;
if (pad > 0) { /* Pad the frame with zeros */
if (__skb_pad(skb, pad, false))
- goto out;
+ goto drop;
skb_put(skb, pad);
}
}
return NETDEV_TX_OK;
drop:
- kfree_skb(skb);
-out:
dev->stats.tx_dropped++;
+ kfree_skb(skb);
return NETDEV_TX_OK;
}
if (mvm->tz_device.tzone) {
struct iwl_mvm_thermal_device *tz_dev = &mvm->tz_device;
- thermal_notify_framework(tz_dev->tzone,
- tz_dev->fw_trips_index[ths_crossed]);
+ thermal_zone_device_update(tz_dev->tzone,
+ THERMAL_TRIP_VIOLATED);
}
#endif /* CONFIG_THERMAL */
}
static const struct proc_ops prism2_aux_dump_proc_ops = {
.proc_read = prism2_aux_dump_proc_no_read,
+ .proc_lseek = default_llseek,
};
module_init(orinoco_nortel_init);
module_exit(orinoco_nortel_exit);
-
-/*
- * Local variables:
- * c-indent-level: 8
- * c-basic-offset: 8
- * tab-width: 8
- * End:
- */
module_init(orinoco_pci_init);
module_exit(orinoco_pci_exit);
-
-/*
- * Local variables:
- * c-indent-level: 8
- * c-basic-offset: 8
- * tab-width: 8
- * End:
- */
module_init(orinoco_plx_init);
module_exit(orinoco_plx_exit);
-
-/*
- * Local variables:
- * c-indent-level: 8
- * c-basic-offset: 8
- * tab-width: 8
- * End:
- */
module_init(orinoco_tmd_init);
module_exit(orinoco_tmd_exit);
-
-/*
- * Local variables:
- * c-indent-level: 8
- * c-basic-offset: 8
- * tab-width: 8
- * End:
- */
#include <linux/highmem.h>
#include <linux/debugfs.h>
#include <linux/blkdev.h>
+#include <linux/pagemap.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/mutex.h>
*/
#include <linux/blkdev.h>
+#include <linux/pagemap.h>
#include <linux/hdreg.h>
#include <linux/init.h>
#include <linux/platform_device.h>
kfree(ns);
}
+static inline bool nvme_get_ns(struct nvme_ns *ns)
+{
+ return kref_get_unless_zero(&ns->kref);
+}
+
void nvme_put_ns(struct nvme_ns *ns)
{
kref_put(&ns->kref, nvme_free_ns);
static inline void nvme_clear_nvme_request(struct request *req)
{
- struct nvme_command *cmd = nvme_req(req)->cmd;
-
- memset(cmd, 0, sizeof(*cmd));
nvme_req(req)->retries = 0;
nvme_req(req)->flags = 0;
req->rq_flags |= RQF_DONTPREP;
return req;
}
+/*
+ * For something we're not in a state to send to the device the default action
+ * is to busy it and retry it after the controller state is recovered. However,
+ * if the controller is deleting or if anything is marked for failfast or
+ * nvme multipath it is immediately failed.
+ *
+ * Note: commands used to initialize the controller will be marked for failfast.
+ * Note: nvme cli/ioctl commands are marked for failfast.
+ */
+blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
+ struct request *rq)
+{
+ if (ctrl->state != NVME_CTRL_DELETING_NOIO &&
+ ctrl->state != NVME_CTRL_DEAD &&
+ !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
+ !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
+ return BLK_STS_RESOURCE;
+ return nvme_host_path_error(rq);
+}
+EXPORT_SYMBOL_GPL(nvme_fail_nonready_command);
+
+bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
+ bool queue_live)
+{
+ struct nvme_request *req = nvme_req(rq);
+
+ /*
+ * currently we have a problem sending passthru commands
+ * on the admin_q if the controller is not LIVE because we can't
+ * make sure that they are going out after the admin connect,
+ * controller enable and/or other commands in the initialization
+ * sequence. until the controller will be LIVE, fail with
+ * BLK_STS_RESOURCE so that they will be rescheduled.
+ */
+ if (rq->q == ctrl->admin_q && (req->flags & NVME_REQ_USERCMD))
+ return false;
+
+ if (ctrl->ops->flags & NVME_F_FABRICS) {
+ /*
+ * Only allow commands on a live queue, except for the connect
+ * command, which is require to set the queue live in the
+ * appropinquate states.
+ */
+ switch (ctrl->state) {
+ case NVME_CTRL_CONNECTING:
+ if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) &&
+ req->cmd->fabrics.fctype == nvme_fabrics_type_connect)
+ return true;
+ break;
+ default:
+ break;
+ case NVME_CTRL_DEAD:
+ return false;
+ }
+ }
+
+ return queue_live;
+}
+EXPORT_SYMBOL_GPL(__nvme_check_ready);
+
static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable)
{
struct nvme_command c;
struct nvme_command *cmd = nvme_req(req)->cmd;
blk_status_t ret = BLK_STS_OK;
- if (!(req->rq_flags & RQF_DONTPREP))
+ if (!(req->rq_flags & RQF_DONTPREP)) {
nvme_clear_nvme_request(req);
+ memset(cmd, 0, sizeof(*cmd));
+ }
switch (req_op(req)) {
case REQ_OP_DRV_IN:
/* should never be called due to GENHD_FL_HIDDEN */
if (WARN_ON_ONCE(nvme_ns_head_multipath(ns->head)))
goto fail;
- if (!kref_get_unless_zero(&ns->kref))
+ if (!nvme_get_ns(ns))
goto fail;
if (!try_module_get(ns->ctrl->ops->module))
goto fail_put_ns;
.pr_ops = &nvme_pr_ops,
};
-#ifdef CONFIG_NVME_MULTIPATH
-struct nvme_ctrl *nvme_find_get_live_ctrl(struct nvme_subsystem *subsys)
-{
- struct nvme_ctrl *ctrl;
- int ret;
-
- ret = mutex_lock_killable(&nvme_subsystems_lock);
- if (ret)
- return ERR_PTR(ret);
- list_for_each_entry(ctrl, &subsys->ctrls, subsys_entry) {
- if (ctrl->state == NVME_CTRL_LIVE)
- goto found;
- }
- mutex_unlock(&nvme_subsystems_lock);
- return ERR_PTR(-EWOULDBLOCK);
-found:
- nvme_get_ctrl(ctrl);
- mutex_unlock(&nvme_subsystems_lock);
- return ctrl;
-}
-#endif /* CONFIG_NVME_MULTIPATH */
-
static int nvme_wait_ready(struct nvme_ctrl *ctrl, u64 cap, bool enabled)
{
unsigned long timeout =
down_read(&ctrl->namespaces_rwsem);
list_for_each_entry(ns, &ctrl->namespaces, list) {
if (ns->head->ns_id == nsid) {
- if (!kref_get_unless_zero(&ns->kref))
+ if (!nvme_get_ns(ns))
continue;
ret = ns;
break;
return NULL;
}
-/*
- * For something we're not in a state to send to the device the default action
- * is to busy it and retry it after the controller state is recovered. However,
- * if the controller is deleting or if anything is marked for failfast or
- * nvme multipath it is immediately failed.
- *
- * Note: commands used to initialize the controller will be marked for failfast.
- * Note: nvme cli/ioctl commands are marked for failfast.
- */
-blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
- struct request *rq)
-{
- if (ctrl->state != NVME_CTRL_DELETING_NOIO &&
- ctrl->state != NVME_CTRL_DEAD &&
- !test_bit(NVME_CTRL_FAILFAST_EXPIRED, &ctrl->flags) &&
- !blk_noretry_request(rq) && !(rq->cmd_flags & REQ_NVME_MPATH))
- return BLK_STS_RESOURCE;
- return nvme_host_path_error(rq);
-}
-EXPORT_SYMBOL_GPL(nvmf_fail_nonready_command);
-
-bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
- bool queue_live)
-{
- struct nvme_request *req = nvme_req(rq);
-
- /*
- * currently we have a problem sending passthru commands
- * on the admin_q if the controller is not LIVE because we can't
- * make sure that they are going out after the admin connect,
- * controller enable and/or other commands in the initialization
- * sequence. until the controller will be LIVE, fail with
- * BLK_STS_RESOURCE so that they will be rescheduled.
- */
- if (rq->q == ctrl->admin_q && (req->flags & NVME_REQ_USERCMD))
- return false;
-
- /*
- * Only allow commands on a live queue, except for the connect command,
- * which is require to set the queue live in the appropinquate states.
- */
- switch (ctrl->state) {
- case NVME_CTRL_CONNECTING:
- if (blk_rq_is_passthrough(rq) && nvme_is_fabrics(req->cmd) &&
- req->cmd->fabrics.fctype == nvme_fabrics_type_connect)
- return true;
- break;
- default:
- break;
- case NVME_CTRL_DEAD:
- return false;
- }
-
- return queue_live;
-}
-EXPORT_SYMBOL_GPL(__nvmf_check_ready);
-
static const match_table_t opt_tokens = {
{ NVMF_OPT_TRANSPORT, "transport=%s" },
{ NVMF_OPT_TRADDR, "traddr=%s" },
void nvmf_free_options(struct nvmf_ctrl_options *opts);
int nvmf_get_address(struct nvme_ctrl *ctrl, char *buf, int size);
bool nvmf_should_reconnect(struct nvme_ctrl *ctrl);
-blk_status_t nvmf_fail_nonready_command(struct nvme_ctrl *ctrl,
- struct request *rq);
-bool __nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
- bool queue_live);
bool nvmf_ip_options_match(struct nvme_ctrl *ctrl,
struct nvmf_ctrl_options *opts);
-static inline bool nvmf_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
- bool queue_live)
-{
- if (likely(ctrl->state == NVME_CTRL_LIVE ||
- ctrl->state == NVME_CTRL_DELETING))
- return true;
- return __nvmf_check_ready(ctrl, rq, queue_live);
-}
-
#endif /* _NVME_FABRICS_H */
blk_status_t ret;
if (ctrl->rport->remoteport.port_state != FC_OBJSTATE_ONLINE ||
- !nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
- return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);
+ !nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
+ return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq);
ret = nvme_setup_cmd(ns, rq);
if (ret)
}
#ifdef CONFIG_NVME_MULTIPATH
-static int nvme_ns_head_ctrl_ioctl(struct nvme_ns_head *head,
- unsigned int cmd, void __user *argp)
+static int nvme_ns_head_ctrl_ioctl(struct nvme_ns *ns, unsigned int cmd,
+ void __user *argp, struct nvme_ns_head *head, int srcu_idx)
{
- struct nvme_ctrl *ctrl = nvme_find_get_live_ctrl(head->subsys);
+ struct nvme_ctrl *ctrl = ns->ctrl;
int ret;
- if (IS_ERR(ctrl))
- return PTR_ERR(ctrl);
- ret = nvme_ctrl_ioctl(ctrl, cmd, argp);
- nvme_put_ctrl(ctrl);
- return ret;
-}
+ nvme_get_ctrl(ns->ctrl);
+ nvme_put_ns_from_disk(head, srcu_idx);
+ ret = nvme_ctrl_ioctl(ns->ctrl, cmd, argp);
-static int nvme_ns_head_ns_ioctl(struct nvme_ns_head *head,
- unsigned int cmd, void __user *argp)
-{
- int srcu_idx = srcu_read_lock(&head->srcu);
- struct nvme_ns *ns = nvme_find_path(head);
- int ret = -EWOULDBLOCK;
-
- if (ns)
- ret = nvme_ns_ioctl(ns, cmd, argp);
- srcu_read_unlock(&head->srcu, srcu_idx);
+ nvme_put_ctrl(ctrl);
return ret;
}
int nvme_ns_head_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
- struct nvme_ns_head *head = bdev->bd_disk->private_data;
+ struct nvme_ns_head *head = NULL;
void __user *argp = (void __user *)arg;
+ struct nvme_ns *ns;
+ int srcu_idx, ret;
+
+ ns = nvme_get_ns_from_disk(bdev->bd_disk, &head, &srcu_idx);
+ if (unlikely(!ns))
+ return -EWOULDBLOCK;
+ /*
+ * Handle ioctls that apply to the controller instead of the namespace
+ * seperately and drop the ns SRCU reference early. This avoids a
+ * deadlock when deleting namespaces using the passthrough interface.
+ */
if (is_ctrl_ioctl(cmd))
- return nvme_ns_head_ctrl_ioctl(head, cmd, argp);
- return nvme_ns_head_ns_ioctl(head, cmd, argp);
+ ret = nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
+ else {
+ ret = nvme_ns_ioctl(ns, cmd, argp);
+ nvme_put_ns_from_disk(head, srcu_idx);
+ }
+
+ return ret;
}
long nvme_ns_head_chr_ioctl(struct file *file, unsigned int cmd,
struct nvme_ns_head *head =
container_of(cdev, struct nvme_ns_head, cdev);
void __user *argp = (void __user *)arg;
+ struct nvme_ns *ns;
+ int srcu_idx, ret;
+
+ srcu_idx = srcu_read_lock(&head->srcu);
+ ns = nvme_find_path(head);
+ if (!ns) {
+ srcu_read_unlock(&head->srcu, srcu_idx);
+ return -EWOULDBLOCK;
+ }
if (is_ctrl_ioctl(cmd))
- return nvme_ns_head_ctrl_ioctl(head, cmd, argp);
- return nvme_ns_head_ns_ioctl(head, cmd, argp);
+ return nvme_ns_head_ctrl_ioctl(ns, cmd, argp, head, srcu_idx);
+
+ ret = nvme_ns_ioctl(ns, cmd, argp);
+ nvme_put_ns_from_disk(head, srcu_idx);
+
+ return ret;
}
#endif /* CONFIG_NVME_MULTIPATH */
struct nvme_ns *ns = req->q->queuedata;
u16 status = nvme_req(req)->status & 0x7ff;
unsigned long flags;
+ struct bio *bio;
nvme_mpath_clear_current_path(ns);
}
spin_lock_irqsave(&ns->head->requeue_lock, flags);
+ for (bio = req->bio; bio; bio = bio->bi_next)
+ bio_set_dev(bio, ns->head->disk->part0);
blk_steal_bios(&ns->head->requeue_list, req);
spin_unlock_irqrestore(&ns->head->requeue_lock, flags);
struct nvme_command *cmd, blk_mq_req_flags_t flags);
void nvme_cleanup_cmd(struct request *req);
blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req);
+blk_status_t nvme_fail_nonready_command(struct nvme_ctrl *ctrl,
+ struct request *req);
+bool __nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
+ bool queue_live);
+
+static inline bool nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq,
+ bool queue_live)
+{
+ if (likely(ctrl->state == NVME_CTRL_LIVE))
+ return true;
+ if (ctrl->ops->flags & NVME_F_FABRICS &&
+ ctrl->state == NVME_CTRL_DELETING)
+ return true;
+ return __nvme_check_ready(ctrl, rq, queue_live);
+}
int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
void *buf, unsigned bufflen);
int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd,
void nvme_put_ns_from_disk(struct nvme_ns_head *head, int idx);
bool nvme_tryget_ns_head(struct nvme_ns_head *head);
void nvme_put_ns_head(struct nvme_ns_head *head);
-struct nvme_ctrl *nvme_find_get_live_ctrl(struct nvme_subsystem *subsys);
int nvme_cdev_add(struct cdev *cdev, struct device *cdev_device,
const struct file_operations *fops, struct module *owner);
void nvme_cdev_del(struct cdev *cdev, struct device *cdev_device);
if (unlikely(!test_bit(NVMEQ_ENABLED, &nvmeq->flags)))
return BLK_STS_IOERR;
+ if (!nvme_check_ready(&dev->ctrl, req, true))
+ return nvme_fail_nonready_command(&dev->ctrl, req);
+
ret = nvme_setup_cmd(ns, req);
if (ret)
return ret;
WARN_ON_ONCE(rq->tag < 0);
- if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
- return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);
+ if (!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
+ return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq);
dev = queue->device->dev;
bool queue_ready = test_bit(NVME_TCP_Q_LIVE, &queue->flags);
blk_status_t ret;
- if (!nvmf_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
- return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);
+ if (!nvme_check_ready(&queue->ctrl->ctrl, rq, queue_ready))
+ return nvme_fail_nonready_command(&queue->ctrl->ctrl, rq);
ret = nvme_tcp_setup_cmd_pdu(ns, rq);
if (unlikely(ret))
case NVME_LOG_ANA:
return nvmet_execute_get_log_page_ana(req);
}
- pr_err("unhandled lid %d on qid %d\n",
+ pr_debug("unhandled lid %d on qid %d\n",
req->cmd->get_log_page.lid, req->sq->qid);
req->error_loc = offsetof(struct nvme_get_log_page_command, lid);
nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
return nvmet_execute_identify_desclist(req);
}
- pr_err("unhandled identify cns %d on qid %d\n",
+ pr_debug("unhandled identify cns %d on qid %d\n",
req->cmd->identify.cns, req->sq->qid);
req->error_loc = offsetof(struct nvme_identify, cns);
nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
return 0;
}
- pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,
+ pr_debug("unhandled cmd %d on qid %d\n", cmd->common.opcode,
req->sq->qid);
req->error_loc = offsetof(struct nvme_common_command, opcode);
return NVME_SC_INVALID_OPCODE | NVME_SC_DNR;
bool queue_ready = test_bit(NVME_LOOP_Q_LIVE, &queue->flags);
blk_status_t ret;
- if (!nvmf_check_ready(&queue->ctrl->ctrl, req, queue_ready))
- return nvmf_fail_nonready_command(&queue->ctrl->ctrl, req);
+ if (!nvme_check_ready(&queue->ctrl->ctrl, req, queue_ready))
+ return nvme_fail_nonready_command(&queue->ctrl->ctrl, req);
ret = nvme_setup_cmd(ns, req);
if (ret)
struct device_node *overlay_root = NULL;
*ovcs_id = 0;
- ret = 0;
if (overlay_fdt_size < sizeof(struct fdt_header) ||
fdt_check_header(overlay_fdt)) {
struct overlay_changeset *ovcs;
int ret, ret_apply, ret_tmp;
- ret = 0;
-
if (devicetree_corrupt()) {
pr_err("suspect devicetree state, refuse to remove overlay\n");
ret = -EBUSY;
", bit 2: hardware SPP mode"
", bit 3: hardware EPP mode"
", bit 4: hardware ECP mode");
-
-/*--- Inform (X)Emacs about preferred coding style ---------------------*/
-/*
- * Local Variables:
- * mode: c
- * c-file-style: "linux"
- * indent-tabs-mode: t
- * tab-width: 8
- * fill-column: 78
- * ispell-local-dictionary: "american"
- * End:
- */
#define PASID_NUMBER_SHIFT 8
#define PASID_NUMBER_MASK (0x1f << PASID_NUMBER_SHIFT)
/**
- * pci_max_pasid - Get maximum number of PASIDs supported by device
+ * pci_max_pasids - Get maximum number of PASIDs supported by device
* @pdev: PCI device structure
*
* Returns negative value when PASID capability is not present.
bool "NVIDIA Tegra PCIe controller"
depends on ARCH_TEGRA || COMPILE_TEST
depends on PCI_MSI_IRQ_DOMAIN
- select PCI_MSI_ARCH_FALLBACKS
help
Say Y here if you want support for the PCIe host controller found
on NVIDIA Tegra SoCs.
bool "Renesas R-Car PCIe host controller"
depends on ARCH_RENESAS || COMPILE_TEST
depends on PCI_MSI_IRQ_DOMAIN
- select PCI_MSI_ARCH_FALLBACKS
help
Say Y here if you want PCIe controller support on R-Car SoCs in host
mode.
config PCIE_XILINX
bool "Xilinx AXI PCIe host bridge support"
depends on OF || COMPILE_TEST
- select PCI_MSI_ARCH_FALLBACKS
+ depends on PCI_MSI_IRQ_DOMAIN
help
Say 'Y' here if you want kernel to support the Xilinx AXI PCIe
Host Bridge driver.
Say Y here if you want to enable PCIe controller support on
MediaTek SoCs.
+config PCIE_MEDIATEK_GEN3
+ tristate "MediaTek Gen3 PCIe controller"
+ depends on ARCH_MEDIATEK || COMPILE_TEST
+ depends on PCI_MSI_IRQ_DOMAIN
+ help
+ Adds support for PCIe Gen3 MAC controller for MediaTek SoCs.
+ This PCIe controller is compatible with Gen3, Gen2 and Gen1 speed,
+ and support up to 256 MSI interrupt numbers for
+ multi-function devices.
+
+ Say Y here if you want to enable Gen3 PCIe controller support on
+ MediaTek SoCs.
+
config VMD
depends on PCI_MSI && X86_64 && SRCU
tristate "Intel Volume Management Device Driver"
obj-$(CONFIG_PCIE_RCAR_EP) += pcie-rcar.o pcie-rcar-ep.o
obj-$(CONFIG_PCI_HOST_COMMON) += pci-host-common.o
obj-$(CONFIG_PCI_HOST_GENERIC) += pci-host-generic.o
+obj-$(CONFIG_PCI_HOST_THUNDER_ECAM) += pci-thunder-ecam.o
+obj-$(CONFIG_PCI_HOST_THUNDER_PEM) += pci-thunder-pem.o
obj-$(CONFIG_PCIE_XILINX) += pcie-xilinx.o
obj-$(CONFIG_PCIE_XILINX_NWL) += pcie-xilinx-nwl.o
obj-$(CONFIG_PCIE_XILINX_CPM) += pcie-xilinx-cpm.o
obj-$(CONFIG_PCI_V3_SEMI) += pci-v3-semi.o
+obj-$(CONFIG_PCI_XGENE) += pci-xgene.o
obj-$(CONFIG_PCI_XGENE_MSI) += pci-xgene-msi.o
obj-$(CONFIG_PCI_VERSATILE) += pci-versatile.o
obj-$(CONFIG_PCIE_IPROC) += pcie-iproc.o
obj-$(CONFIG_PCIE_ROCKCHIP_EP) += pcie-rockchip-ep.o
obj-$(CONFIG_PCIE_ROCKCHIP_HOST) += pcie-rockchip-host.o
obj-$(CONFIG_PCIE_MEDIATEK) += pcie-mediatek.o
+obj-$(CONFIG_PCIE_MEDIATEK_GEN3) += pcie-mediatek-gen3.o
obj-$(CONFIG_PCIE_MICROCHIP_HOST) += pcie-microchip-host.o
obj-$(CONFIG_VMD) += vmd.o
obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb.o
# ARM64 and use internal ifdefs to only build the pieces we need
# depending on whether ACPI, the DT driver, or both are enabled.
-ifdef CONFIG_PCI
+ifdef CONFIG_ACPI
+ifdef CONFIG_PCI_QUIRKS
obj-$(CONFIG_ARM64) += pci-thunder-ecam.o
obj-$(CONFIG_ARM64) += pci-thunder-pem.o
obj-$(CONFIG_ARM64) += pci-xgene.o
endif
+endif
// SPDX-License-Identifier: GPL-2.0
-/**
+/*
* pci-j721e - PCIe controller driver for TI's J721E SoCs
*
* Copyright (C) 2020 Texas Instruments Incorporated - http://www.ti.com
* Author: Kishon Vijay Abraham I <kishon@ti.com>
*/
+#include <linux/clk.h>
#include <linux/delay.h>
#include <linux/gpio/consumer.h>
#include <linux/io.h>
struct j721e_pcie {
struct device *dev;
+ struct clk *refclk;
u32 mode;
u32 num_lanes;
struct cdns_pcie *cdns_pcie;
struct cdns_pcie_ep *ep;
struct gpio_desc *gpiod;
void __iomem *base;
+ struct clk *clk;
u32 num_lanes;
u32 mode;
int ret;
goto err_get_sync;
}
+ clk = devm_clk_get_optional(dev, "pcie_refclk");
+ if (IS_ERR(clk)) {
+ ret = PTR_ERR(clk);
+ dev_err(dev, "failed to get pcie_refclk\n");
+ goto err_pcie_setup;
+ }
+
+ ret = clk_prepare_enable(clk);
+ if (ret) {
+ dev_err(dev, "failed to enable pcie_refclk\n");
+ goto err_get_sync;
+ }
+ pcie->refclk = clk;
+
/*
* "Power Sequencing and Reset Signal Timings" table in
* PCI EXPRESS CARD ELECTROMECHANICAL SPECIFICATION, REV. 3.0
}
ret = cdns_pcie_host_setup(rc);
- if (ret < 0)
+ if (ret < 0) {
+ clk_disable_unprepare(pcie->refclk);
goto err_pcie_setup;
+ }
break;
case PCI_MODE_EP:
struct cdns_pcie *cdns_pcie = pcie->cdns_pcie;
struct device *dev = &pdev->dev;
+ clk_disable_unprepare(pcie->refclk);
cdns_pcie_disable_phy(cdns_pcie);
pm_runtime_put(dev);
pm_runtime_disable(dev);
select PCIE_TEGRA194
help
Enables support for the PCIe controller in the NVIDIA Tegra194 SoC to
- work in host mode. There are two instances of PCIe controllers in
+ work in endpoint mode. There are two instances of PCIe controllers in
Tegra194. This controller can work either as EP or RC. In order to
enable host-specific features PCIE_TEGRA194_HOST must be selected and
in order to enable device-specific features PCIE_TEGRA194_EP must be
depends on OF && (ARM64 || COMPILE_TEST)
depends on PCI_MSI_IRQ_DOMAIN
select PCIE_DW_HOST
+ select PCI_ECAM
help
Say Y here to enable support of the Amazon's Annapurna Labs PCIe
controller IP on Amazon SoCs. The PCIe controller uses the DesignWare
required only for DT-based platforms. ACPI platforms with the
Annapurna Labs PCIe controller don't need to enable this.
+config PCIE_FU740
+ bool "SiFive FU740 PCIe host controller"
+ depends on PCI_MSI_IRQ_DOMAIN
+ depends on SOC_SIFIVE || COMPILE_TEST
+ select PCIE_DW_HOST
+ help
+ Say Y here if you want PCIe controller support for the SiFive
+ FU740.
+
endmenu
obj-$(CONFIG_PCIE_DW_PLAT) += pcie-designware-plat.o
obj-$(CONFIG_PCI_DRA7XX) += pci-dra7xx.o
obj-$(CONFIG_PCI_EXYNOS) += pci-exynos.o
+obj-$(CONFIG_PCIE_FU740) += pcie-fu740.o
obj-$(CONFIG_PCI_IMX6) += pci-imx6.o
obj-$(CONFIG_PCIE_SPEAR13XX) += pcie-spear13xx.o
obj-$(CONFIG_PCI_KEYSTONE) += pci-keystone.o
obj-$(CONFIG_PCIE_KIRIN) += pcie-kirin.o
obj-$(CONFIG_PCIE_HISI_STB) += pcie-histb.o
obj-$(CONFIG_PCI_MESON) += pci-meson.o
-obj-$(CONFIG_PCIE_TEGRA194) += pcie-tegra194.o
obj-$(CONFIG_PCIE_UNIPHIER) += pcie-uniphier.o
obj-$(CONFIG_PCIE_UNIPHIER_EP) += pcie-uniphier-ep.o
# ARM64 and use internal ifdefs to only build the pieces we need
# depending on whether ACPI, the DT driver, or both are enabled.
-ifdef CONFIG_PCI
+obj-$(CONFIG_PCIE_AL) += pcie-al.o
+obj-$(CONFIG_PCI_HISI) += pcie-hisi.o
+
+ifdef CONFIG_ACPI
+ifdef CONFIG_PCI_QUIRKS
obj-$(CONFIG_ARM64) += pcie-al.o
obj-$(CONFIG_ARM64) += pcie-hisi.o
+obj-$(CONFIG_ARM64) += pcie-tegra194.o
+endif
endif
};
/**
- * ks_pcie_set_dbi_mode() - Set DBI mode to access overlaid BAR mask
- * registers
+ * ks_pcie_set_dbi_mode() - Set DBI mode to access overlaid BAR mask registers
+ * @ks_pcie: A pointer to the keystone_pcie structure which holds the KeyStone
+ * PCIe host controller driver information.
*
* Since modification of dbi_cs2 involves different clock domain, read the
* status back to ensure the transition is complete.
/**
* ks_pcie_clear_dbi_mode() - Disable DBI mode
+ * @ks_pcie: A pointer to the keystone_pcie structure which holds the KeyStone
+ * PCIe host controller driver information.
*
* Since modification of dbi_cs2 involves different clock domain, read the
* status back to ensure the transition is complete.
/**
* ks_pcie_v3_65_add_bus() - keystone add_bus post initialization
+ * @bus: A pointer to the PCI bus structure.
*
* This sets BAR0 to enable inbound access for MSI_IRQ register
*/
/**
* ks_pcie_link_up() - Check if link up
+ * @pci: A pointer to the dw_pcie structure which holds the DesignWare PCIe host
+ * controller driver information.
*/
static int ks_pcie_link_up(struct dw_pcie *pci)
{
/**
* ks_pcie_legacy_irq_handler() - Handle legacy interrupt
- * @irq: IRQ line for legacy interrupts
* @desc: Pointer to irq descriptor
*
* Traverse through pending legacy interrupts and invoke handler for each. Also
int ret;
pp->bridge->ops = &ks_pcie_ops;
- pp->bridge->child_ops = &ks_child_pcie_ops;
+ if (!ks_pcie->is_am6)
+ pp->bridge->child_ops = &ks_child_pcie_ops;
ret = ks_pcie_config_legacy_irq(ks_pcie);
if (ret)
pci->dev = dev;
pci->ops = pcie->drvdata->dw_pcie_ops;
- ls_epc->bar_fixed_64bit = (1 << BAR_2) | (1 << BAR_4),
+ ls_epc->bar_fixed_64bit = (1 << BAR_2) | (1 << BAR_4);
pcie->pci = pci;
pcie->ls_epc = ls_epc;
}
}
+ dw_pcie_iatu_detect(pci);
+
res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "addr_space");
if (!res)
return -EINVAL;
if (ret)
goto err_free_msi;
}
+ dw_pcie_iatu_detect(pci);
dw_pcie_setup_rc(pp);
- dw_pcie_msi_init(pp);
if (!dw_pcie_link_up(pci) && pci->ops && pci->ops->start_link) {
ret = pci->ops->start_link(pci);
}
}
+ dw_pcie_msi_init(pp);
+
/* Setup RC BARs */
dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_0, 0x00000004);
dw_pcie_writel_dbi(pci, PCI_BASE_ADDRESS_1, 0x00000000);
pci->num_ob_windows = ob;
}
-void dw_pcie_setup(struct dw_pcie *pci)
+void dw_pcie_iatu_detect(struct dw_pcie *pci)
{
- u32 val;
struct device *dev = pci->dev;
- struct device_node *np = dev->of_node;
struct platform_device *pdev = to_platform_device(dev);
if (pci->version >= 0x480A || (!pci->version &&
dev_info(pci->dev, "Detected iATU regions: %u outbound, %u inbound",
pci->num_ob_windows, pci->num_ib_windows);
+}
+
+void dw_pcie_setup(struct dw_pcie *pci)
+{
+ u32 val;
+ struct device *dev = pci->dev;
+ struct device_node *np = dev->of_node;
if (pci->link_gen > 0)
dw_pcie_link_set_max_speed(pci, pci->link_gen);
void dw_pcie_disable_atu(struct dw_pcie *pci, int index,
enum dw_pcie_region_type type);
void dw_pcie_setup(struct dw_pcie *pci);
+void dw_pcie_iatu_detect(struct dw_pcie *pci);
static inline void dw_pcie_writel_dbi(struct dw_pcie *pci, u32 reg, u32 val)
{
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * FU740 DesignWare PCIe Controller integration
+ * Copyright (C) 2019-2021 SiFive, Inc.
+ * Paul Walmsley
+ * Greentime Hu
+ *
+ * Based in part on the i.MX6 PCIe host controller shim which is:
+ *
+ * Copyright (C) 2013 Kosagi
+ * https://www.kosagi.com
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/regulator/consumer.h>
+#include <linux/resource.h>
+#include <linux/types.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/reset.h>
+
+#include "pcie-designware.h"
+
+#define to_fu740_pcie(x) dev_get_drvdata((x)->dev)
+
+struct fu740_pcie {
+ struct dw_pcie pci;
+ void __iomem *mgmt_base;
+ struct gpio_desc *reset;
+ struct gpio_desc *pwren;
+ struct clk *pcie_aux;
+ struct reset_control *rst;
+};
+
+#define SIFIVE_DEVICESRESETREG 0x28
+
+#define PCIEX8MGMT_PERST_N 0x0
+#define PCIEX8MGMT_APP_LTSSM_ENABLE 0x10
+#define PCIEX8MGMT_APP_HOLD_PHY_RST 0x18
+#define PCIEX8MGMT_DEVICE_TYPE 0x708
+#define PCIEX8MGMT_PHY0_CR_PARA_ADDR 0x860
+#define PCIEX8MGMT_PHY0_CR_PARA_RD_EN 0x870
+#define PCIEX8MGMT_PHY0_CR_PARA_RD_DATA 0x878
+#define PCIEX8MGMT_PHY0_CR_PARA_SEL 0x880
+#define PCIEX8MGMT_PHY0_CR_PARA_WR_DATA 0x888
+#define PCIEX8MGMT_PHY0_CR_PARA_WR_EN 0x890
+#define PCIEX8MGMT_PHY0_CR_PARA_ACK 0x898
+#define PCIEX8MGMT_PHY1_CR_PARA_ADDR 0x8a0
+#define PCIEX8MGMT_PHY1_CR_PARA_RD_EN 0x8b0
+#define PCIEX8MGMT_PHY1_CR_PARA_RD_DATA 0x8b8
+#define PCIEX8MGMT_PHY1_CR_PARA_SEL 0x8c0
+#define PCIEX8MGMT_PHY1_CR_PARA_WR_DATA 0x8c8
+#define PCIEX8MGMT_PHY1_CR_PARA_WR_EN 0x8d0
+#define PCIEX8MGMT_PHY1_CR_PARA_ACK 0x8d8
+
+#define PCIEX8MGMT_PHY_CDR_TRACK_EN BIT(0)
+#define PCIEX8MGMT_PHY_LOS_THRSHLD BIT(5)
+#define PCIEX8MGMT_PHY_TERM_EN BIT(9)
+#define PCIEX8MGMT_PHY_TERM_ACDC BIT(10)
+#define PCIEX8MGMT_PHY_EN BIT(11)
+#define PCIEX8MGMT_PHY_INIT_VAL (PCIEX8MGMT_PHY_CDR_TRACK_EN|\
+ PCIEX8MGMT_PHY_LOS_THRSHLD|\
+ PCIEX8MGMT_PHY_TERM_EN|\
+ PCIEX8MGMT_PHY_TERM_ACDC|\
+ PCIEX8MGMT_PHY_EN)
+
+#define PCIEX8MGMT_PHY_LANEN_DIG_ASIC_RX_OVRD_IN_3 0x1008
+#define PCIEX8MGMT_PHY_LANE_OFF 0x100
+#define PCIEX8MGMT_PHY_LANE0_BASE (PCIEX8MGMT_PHY_LANEN_DIG_ASIC_RX_OVRD_IN_3 + 0x100 * 0)
+#define PCIEX8MGMT_PHY_LANE1_BASE (PCIEX8MGMT_PHY_LANEN_DIG_ASIC_RX_OVRD_IN_3 + 0x100 * 1)
+#define PCIEX8MGMT_PHY_LANE2_BASE (PCIEX8MGMT_PHY_LANEN_DIG_ASIC_RX_OVRD_IN_3 + 0x100 * 2)
+#define PCIEX8MGMT_PHY_LANE3_BASE (PCIEX8MGMT_PHY_LANEN_DIG_ASIC_RX_OVRD_IN_3 + 0x100 * 3)
+
+static void fu740_pcie_assert_reset(struct fu740_pcie *afp)
+{
+ /* Assert PERST_N GPIO */
+ gpiod_set_value_cansleep(afp->reset, 0);
+ /* Assert controller PERST_N */
+ writel_relaxed(0x0, afp->mgmt_base + PCIEX8MGMT_PERST_N);
+}
+
+static void fu740_pcie_deassert_reset(struct fu740_pcie *afp)
+{
+ /* Deassert controller PERST_N */
+ writel_relaxed(0x1, afp->mgmt_base + PCIEX8MGMT_PERST_N);
+ /* Deassert PERST_N GPIO */
+ gpiod_set_value_cansleep(afp->reset, 1);
+}
+
+static void fu740_pcie_power_on(struct fu740_pcie *afp)
+{
+ gpiod_set_value_cansleep(afp->pwren, 1);
+ /*
+ * Ensure that PERST has been asserted for at least 100 ms.
+ * Section 2.2 of PCI Express Card Electromechanical Specification
+ * Revision 3.0
+ */
+ msleep(100);
+}
+
+static void fu740_pcie_drive_reset(struct fu740_pcie *afp)
+{
+ fu740_pcie_assert_reset(afp);
+ fu740_pcie_power_on(afp);
+ fu740_pcie_deassert_reset(afp);
+}
+
+static void fu740_phyregwrite(const uint8_t phy, const uint16_t addr,
+ const uint16_t wrdata, struct fu740_pcie *afp)
+{
+ struct device *dev = afp->pci.dev;
+ void __iomem *phy_cr_para_addr;
+ void __iomem *phy_cr_para_wr_data;
+ void __iomem *phy_cr_para_wr_en;
+ void __iomem *phy_cr_para_ack;
+ int ret, val;
+
+ /* Setup */
+ if (phy) {
+ phy_cr_para_addr = afp->mgmt_base + PCIEX8MGMT_PHY1_CR_PARA_ADDR;
+ phy_cr_para_wr_data = afp->mgmt_base + PCIEX8MGMT_PHY1_CR_PARA_WR_DATA;
+ phy_cr_para_wr_en = afp->mgmt_base + PCIEX8MGMT_PHY1_CR_PARA_WR_EN;
+ phy_cr_para_ack = afp->mgmt_base + PCIEX8MGMT_PHY1_CR_PARA_ACK;
+ } else {
+ phy_cr_para_addr = afp->mgmt_base + PCIEX8MGMT_PHY0_CR_PARA_ADDR;
+ phy_cr_para_wr_data = afp->mgmt_base + PCIEX8MGMT_PHY0_CR_PARA_WR_DATA;
+ phy_cr_para_wr_en = afp->mgmt_base + PCIEX8MGMT_PHY0_CR_PARA_WR_EN;
+ phy_cr_para_ack = afp->mgmt_base + PCIEX8MGMT_PHY0_CR_PARA_ACK;
+ }
+
+ writel_relaxed(addr, phy_cr_para_addr);
+ writel_relaxed(wrdata, phy_cr_para_wr_data);
+ writel_relaxed(1, phy_cr_para_wr_en);
+
+ /* Wait for wait_idle */
+ ret = readl_poll_timeout(phy_cr_para_ack, val, val, 10, 5000);
+ if (ret)
+ dev_warn(dev, "Wait for wait_idle state failed!\n");
+
+ /* Clear */
+ writel_relaxed(0, phy_cr_para_wr_en);
+
+ /* Wait for ~wait_idle */
+ ret = readl_poll_timeout(phy_cr_para_ack, val, !val, 10, 5000);
+ if (ret)
+ dev_warn(dev, "Wait for !wait_idle state failed!\n");
+}
+
+static void fu740_pcie_init_phy(struct fu740_pcie *afp)
+{
+ /* Enable phy cr_para_sel interfaces */
+ writel_relaxed(0x1, afp->mgmt_base + PCIEX8MGMT_PHY0_CR_PARA_SEL);
+ writel_relaxed(0x1, afp->mgmt_base + PCIEX8MGMT_PHY1_CR_PARA_SEL);
+
+ /*
+ * Wait 10 cr_para cycles to guarantee that the registers are ready
+ * to be edited.
+ */
+ ndelay(10);
+
+ /* Set PHY AC termination mode */
+ fu740_phyregwrite(0, PCIEX8MGMT_PHY_LANE0_BASE, PCIEX8MGMT_PHY_INIT_VAL, afp);
+ fu740_phyregwrite(0, PCIEX8MGMT_PHY_LANE1_BASE, PCIEX8MGMT_PHY_INIT_VAL, afp);
+ fu740_phyregwrite(0, PCIEX8MGMT_PHY_LANE2_BASE, PCIEX8MGMT_PHY_INIT_VAL, afp);
+ fu740_phyregwrite(0, PCIEX8MGMT_PHY_LANE3_BASE, PCIEX8MGMT_PHY_INIT_VAL, afp);
+ fu740_phyregwrite(1, PCIEX8MGMT_PHY_LANE0_BASE, PCIEX8MGMT_PHY_INIT_VAL, afp);
+ fu740_phyregwrite(1, PCIEX8MGMT_PHY_LANE1_BASE, PCIEX8MGMT_PHY_INIT_VAL, afp);
+ fu740_phyregwrite(1, PCIEX8MGMT_PHY_LANE2_BASE, PCIEX8MGMT_PHY_INIT_VAL, afp);
+ fu740_phyregwrite(1, PCIEX8MGMT_PHY_LANE3_BASE, PCIEX8MGMT_PHY_INIT_VAL, afp);
+}
+
+static int fu740_pcie_start_link(struct dw_pcie *pci)
+{
+ struct device *dev = pci->dev;
+ struct fu740_pcie *afp = dev_get_drvdata(dev);
+
+ /* Enable LTSSM */
+ writel_relaxed(0x1, afp->mgmt_base + PCIEX8MGMT_APP_LTSSM_ENABLE);
+ return 0;
+}
+
+static int fu740_pcie_host_init(struct pcie_port *pp)
+{
+ struct dw_pcie *pci = to_dw_pcie_from_pp(pp);
+ struct fu740_pcie *afp = to_fu740_pcie(pci);
+ struct device *dev = pci->dev;
+ int ret;
+
+ /* Power on reset */
+ fu740_pcie_drive_reset(afp);
+
+ /* Enable pcieauxclk */
+ ret = clk_prepare_enable(afp->pcie_aux);
+ if (ret) {
+ dev_err(dev, "unable to enable pcie_aux clock\n");
+ return ret;
+ }
+
+ /*
+ * Assert hold_phy_rst (hold the controller LTSSM in reset after
+ * power_up_rst_n for register programming with cr_para)
+ */
+ writel_relaxed(0x1, afp->mgmt_base + PCIEX8MGMT_APP_HOLD_PHY_RST);
+
+ /* Deassert power_up_rst_n */
+ ret = reset_control_deassert(afp->rst);
+ if (ret) {
+ dev_err(dev, "unable to deassert pcie_power_up_rst_n\n");
+ return ret;
+ }
+
+ fu740_pcie_init_phy(afp);
+
+ /* Disable pcieauxclk */
+ clk_disable_unprepare(afp->pcie_aux);
+ /* Clear hold_phy_rst */
+ writel_relaxed(0x0, afp->mgmt_base + PCIEX8MGMT_APP_HOLD_PHY_RST);
+ /* Enable pcieauxclk */
+ ret = clk_prepare_enable(afp->pcie_aux);
+ /* Set RC mode */
+ writel_relaxed(0x4, afp->mgmt_base + PCIEX8MGMT_DEVICE_TYPE);
+
+ return 0;
+}
+
+static const struct dw_pcie_host_ops fu740_pcie_host_ops = {
+ .host_init = fu740_pcie_host_init,
+};
+
+static const struct dw_pcie_ops dw_pcie_ops = {
+ .start_link = fu740_pcie_start_link,
+};
+
+static int fu740_pcie_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct dw_pcie *pci;
+ struct fu740_pcie *afp;
+
+ afp = devm_kzalloc(dev, sizeof(*afp), GFP_KERNEL);
+ if (!afp)
+ return -ENOMEM;
+ pci = &afp->pci;
+ pci->dev = dev;
+ pci->ops = &dw_pcie_ops;
+ pci->pp.ops = &fu740_pcie_host_ops;
+
+ /* SiFive specific region: mgmt */
+ afp->mgmt_base = devm_platform_ioremap_resource_byname(pdev, "mgmt");
+ if (IS_ERR(afp->mgmt_base))
+ return PTR_ERR(afp->mgmt_base);
+
+ /* Fetch GPIOs */
+ afp->reset = devm_gpiod_get_optional(dev, "reset-gpios", GPIOD_OUT_LOW);
+ if (IS_ERR(afp->reset))
+ return dev_err_probe(dev, PTR_ERR(afp->reset), "unable to get reset-gpios\n");
+
+ afp->pwren = devm_gpiod_get_optional(dev, "pwren-gpios", GPIOD_OUT_LOW);
+ if (IS_ERR(afp->pwren))
+ return dev_err_probe(dev, PTR_ERR(afp->pwren), "unable to get pwren-gpios\n");
+
+ /* Fetch clocks */
+ afp->pcie_aux = devm_clk_get(dev, "pcie_aux");
+ if (IS_ERR(afp->pcie_aux))
+ return dev_err_probe(dev, PTR_ERR(afp->pcie_aux),
+ "pcie_aux clock source missing or invalid\n");
+
+ /* Fetch reset */
+ afp->rst = devm_reset_control_get_exclusive(dev, NULL);
+ if (IS_ERR(afp->rst))
+ return dev_err_probe(dev, PTR_ERR(afp->rst), "unable to get reset\n");
+
+ platform_set_drvdata(pdev, afp);
+
+ return dw_pcie_host_init(&pci->pp);
+}
+
+static void fu740_pcie_shutdown(struct platform_device *pdev)
+{
+ struct fu740_pcie *afp = platform_get_drvdata(pdev);
+
+ /* Bring down link, so bootloader gets clean state in case of reboot */
+ fu740_pcie_assert_reset(afp);
+}
+
+static const struct of_device_id fu740_pcie_of_match[] = {
+ { .compatible = "sifive,fu740-pcie", },
+ {},
+};
+
+static struct platform_driver fu740_pcie_driver = {
+ .driver = {
+ .name = "fu740-pcie",
+ .of_match_table = fu740_pcie_of_match,
+ .suppress_bind_attrs = true,
+ },
+ .probe = fu740_pcie_probe,
+ .shutdown = fu740_pcie_shutdown,
+};
+
+builtin_platform_driver(fu740_pcie_driver);
writel(val, base + ofs);
}
-static inline u32 pcie_app_rd(struct intel_pcie_port *lpp, u32 ofs)
-{
- return readl(lpp->app_base + ofs);
-}
-
static inline void pcie_app_wr(struct intel_pcie_port *lpp, u32 ofs, u32 val)
{
writel(val, lpp->app_base + ofs);
#include <linux/of_irq.h>
#include <linux/of_pci.h>
#include <linux/pci.h>
+#include <linux/pci-acpi.h>
+#include <linux/pci-ecam.h>
#include <linux/phy/phy.h>
#include <linux/pinctrl/consumer.h>
#include <linux/platform_device.h>
enum dw_pcie_device_mode mode;
};
+#if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
+struct tegra194_pcie_ecam {
+ void __iomem *config_base;
+ void __iomem *iatu_base;
+ void __iomem *dbi_base;
+};
+
+static int tegra194_acpi_init(struct pci_config_window *cfg)
+{
+ struct device *dev = cfg->parent;
+ struct tegra194_pcie_ecam *pcie_ecam;
+
+ pcie_ecam = devm_kzalloc(dev, sizeof(*pcie_ecam), GFP_KERNEL);
+ if (!pcie_ecam)
+ return -ENOMEM;
+
+ pcie_ecam->config_base = cfg->win;
+ pcie_ecam->iatu_base = cfg->win + SZ_256K;
+ pcie_ecam->dbi_base = cfg->win + SZ_512K;
+ cfg->priv = pcie_ecam;
+
+ return 0;
+}
+
+static void atu_reg_write(struct tegra194_pcie_ecam *pcie_ecam, int index,
+ u32 val, u32 reg)
+{
+ u32 offset = PCIE_GET_ATU_OUTB_UNR_REG_OFFSET(index);
+
+ writel(val, pcie_ecam->iatu_base + offset + reg);
+}
+
+static void program_outbound_atu(struct tegra194_pcie_ecam *pcie_ecam,
+ int index, int type, u64 cpu_addr,
+ u64 pci_addr, u64 size)
+{
+ atu_reg_write(pcie_ecam, index, lower_32_bits(cpu_addr),
+ PCIE_ATU_LOWER_BASE);
+ atu_reg_write(pcie_ecam, index, upper_32_bits(cpu_addr),
+ PCIE_ATU_UPPER_BASE);
+ atu_reg_write(pcie_ecam, index, lower_32_bits(pci_addr),
+ PCIE_ATU_LOWER_TARGET);
+ atu_reg_write(pcie_ecam, index, lower_32_bits(cpu_addr + size - 1),
+ PCIE_ATU_LIMIT);
+ atu_reg_write(pcie_ecam, index, upper_32_bits(pci_addr),
+ PCIE_ATU_UPPER_TARGET);
+ atu_reg_write(pcie_ecam, index, type, PCIE_ATU_CR1);
+ atu_reg_write(pcie_ecam, index, PCIE_ATU_ENABLE, PCIE_ATU_CR2);
+}
+
+static void __iomem *tegra194_map_bus(struct pci_bus *bus,
+ unsigned int devfn, int where)
+{
+ struct pci_config_window *cfg = bus->sysdata;
+ struct tegra194_pcie_ecam *pcie_ecam = cfg->priv;
+ u32 busdev;
+ int type;
+
+ if (bus->number < cfg->busr.start || bus->number > cfg->busr.end)
+ return NULL;
+
+ if (bus->number == cfg->busr.start) {
+ if (PCI_SLOT(devfn) == 0)
+ return pcie_ecam->dbi_base + where;
+ else
+ return NULL;
+ }
+
+ busdev = PCIE_ATU_BUS(bus->number) | PCIE_ATU_DEV(PCI_SLOT(devfn)) |
+ PCIE_ATU_FUNC(PCI_FUNC(devfn));
+
+ if (bus->parent->number == cfg->busr.start) {
+ if (PCI_SLOT(devfn) == 0)
+ type = PCIE_ATU_TYPE_CFG0;
+ else
+ return NULL;
+ } else {
+ type = PCIE_ATU_TYPE_CFG1;
+ }
+
+ program_outbound_atu(pcie_ecam, 0, type, cfg->res.start, busdev,
+ SZ_256K);
+
+ return pcie_ecam->config_base + where;
+}
+
+const struct pci_ecam_ops tegra194_pcie_ops = {
+ .init = tegra194_acpi_init,
+ .pci_ops = {
+ .map_bus = tegra194_map_bus,
+ .read = pci_generic_config_read,
+ .write = pci_generic_config_write,
+ }
+};
+#endif /* defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS) */
+
+#ifdef CONFIG_PCIE_TEGRA194
+
static inline struct tegra_pcie_dw *to_tegra_pcie(struct dw_pcie *pci)
{
return container_of(pci, struct tegra_pcie_dw, pci);
.stop_link = tegra_pcie_dw_stop_link,
};
-static struct dw_pcie_host_ops tegra_pcie_dw_host_ops = {
+static const struct dw_pcie_host_ops tegra_pcie_dw_host_ops = {
.host_init = tegra_pcie_dw_host_init,
};
if (pcie->ep_state == EP_STATE_ENABLED)
return;
- ret = pm_runtime_get_sync(dev);
+ ret = pm_runtime_resume_and_get(dev);
if (ret < 0) {
dev_err(dev, "Failed to get runtime sync for PCIe dev: %d\n",
ret);
return &tegra_pcie_epc_features;
}
-static struct dw_pcie_ep_ops pcie_ep_ops = {
+static const struct dw_pcie_ep_ops pcie_ep_ops = {
.raise_irq = tegra_pcie_ep_raise_irq,
.get_features = tegra_pcie_ep_get_features,
};
MODULE_AUTHOR("Vidya Sagar <vidyas@nvidia.com>");
MODULE_DESCRIPTION("NVIDIA PCIe host controller driver");
MODULE_LICENSE("GPL v2");
+
+#endif /* CONFIG_PCIE_TEGRA194 */
config PCIE_LAYERSCAPE_GEN4
bool "Freescale Layerscape PCIe Gen4 controller"
- depends on PCI
- depends on OF && (ARM64 || ARCH_LAYERSCAPE)
+ depends on ARCH_LAYERSCAPE || COMPILE_TEST
depends on PCI_MSI_IRQ_DOMAIN
select PCIE_MOBIVEIL_HOST
help
bridge->sysdata = cfg;
bridge->ops = (struct pci_ops *)&ops->pci_ops;
+ bridge->msi_domain = true;
return pci_host_probe(bridge);
}
struct list_head dr_list;
struct msi_domain_info msi_info;
- struct msi_controller msi_chip;
struct irq_domain *irq_domain;
spinlock_t retarget_msi_interrupt_lock;
if (!hbus->pci_bus)
return -ENODEV;
- hbus->pci_bus->msi = &hbus->msi_chip;
- hbus->pci_bus->msi->dev = &hbus->hdev->device;
-
pci_lock_rescan_remove();
pci_scan_child_bus(hbus->pci_bus);
hv_pci_assign_numa_node(hbus);
#include <linux/interrupt.h>
#include <linux/iopoll.h>
#include <linux/irq.h>
+#include <linux/irqchip/chained_irq.h>
#include <linux/irqdomain.h>
#include <linux/kernel.h>
#include <linux/init.h>
#define AFI_MSI_FPCI_BAR_ST 0x64
#define AFI_MSI_AXI_BAR_ST 0x68
-#define AFI_MSI_VEC0 0x6c
-#define AFI_MSI_VEC1 0x70
-#define AFI_MSI_VEC2 0x74
-#define AFI_MSI_VEC3 0x78
-#define AFI_MSI_VEC4 0x7c
-#define AFI_MSI_VEC5 0x80
-#define AFI_MSI_VEC6 0x84
-#define AFI_MSI_VEC7 0x88
-
-#define AFI_MSI_EN_VEC0 0x8c
-#define AFI_MSI_EN_VEC1 0x90
-#define AFI_MSI_EN_VEC2 0x94
-#define AFI_MSI_EN_VEC3 0x98
-#define AFI_MSI_EN_VEC4 0x9c
-#define AFI_MSI_EN_VEC5 0xa0
-#define AFI_MSI_EN_VEC6 0xa4
-#define AFI_MSI_EN_VEC7 0xa8
+#define AFI_MSI_VEC(x) (0x6c + ((x) * 4))
+#define AFI_MSI_EN_VEC(x) (0x8c + ((x) * 4))
#define AFI_CONFIGURATION 0xac
#define AFI_CONFIGURATION_EN_FPCI (1 << 0)
#define LINK_RETRAIN_TIMEOUT 100000 /* in usec */
struct tegra_msi {
- struct msi_controller chip;
DECLARE_BITMAP(used, INT_PCI_MSI_NR);
struct irq_domain *domain;
- struct mutex lock;
+ struct mutex map_lock;
+ spinlock_t mask_lock;
void *virt;
dma_addr_t phys;
int irq;
} ectl;
};
-static inline struct tegra_msi *to_tegra_msi(struct msi_controller *chip)
-{
- return container_of(chip, struct tegra_msi, chip);
-}
-
struct tegra_pcie {
struct device *dev;
struct dentry *debugfs;
};
+static inline struct tegra_pcie *msi_to_pcie(struct tegra_msi *msi)
+{
+ return container_of(msi, struct tegra_pcie, msi);
+}
+
struct tegra_pcie_port {
struct tegra_pcie *pcie;
struct device_node *np;
}
}
-
static int tegra_pcie_get_resources(struct tegra_pcie *pcie)
{
struct device *dev = pcie->dev;
phys_put:
if (soc->program_uphy)
tegra_pcie_phys_put(pcie);
+
return err;
}
afi_writel(pcie, val, AFI_PCIE_PME);
}
-static int tegra_msi_alloc(struct tegra_msi *chip)
-{
- int msi;
-
- mutex_lock(&chip->lock);
-
- msi = find_first_zero_bit(chip->used, INT_PCI_MSI_NR);
- if (msi < INT_PCI_MSI_NR)
- set_bit(msi, chip->used);
- else
- msi = -ENOSPC;
-
- mutex_unlock(&chip->lock);
-
- return msi;
-}
-
-static void tegra_msi_free(struct tegra_msi *chip, unsigned long irq)
+static void tegra_pcie_msi_irq(struct irq_desc *desc)
{
- struct device *dev = chip->chip.dev;
-
- mutex_lock(&chip->lock);
-
- if (!test_bit(irq, chip->used))
- dev_err(dev, "trying to free unused MSI#%lu\n", irq);
- else
- clear_bit(irq, chip->used);
-
- mutex_unlock(&chip->lock);
-}
-
-static irqreturn_t tegra_pcie_msi_irq(int irq, void *data)
-{
- struct tegra_pcie *pcie = data;
- struct device *dev = pcie->dev;
+ struct tegra_pcie *pcie = irq_desc_get_handler_data(desc);
+ struct irq_chip *chip = irq_desc_get_chip(desc);
struct tegra_msi *msi = &pcie->msi;
- unsigned int i, processed = 0;
+ struct device *dev = pcie->dev;
+ unsigned int i;
+
+ chained_irq_enter(chip, desc);
for (i = 0; i < 8; i++) {
- unsigned long reg = afi_readl(pcie, AFI_MSI_VEC0 + i * 4);
+ unsigned long reg = afi_readl(pcie, AFI_MSI_VEC(i));
while (reg) {
unsigned int offset = find_first_bit(®, 32);
unsigned int index = i * 32 + offset;
unsigned int irq;
- /* clear the interrupt */
- afi_writel(pcie, 1 << offset, AFI_MSI_VEC0 + i * 4);
-
- irq = irq_find_mapping(msi->domain, index);
+ irq = irq_find_mapping(msi->domain->parent, index);
if (irq) {
- if (test_bit(index, msi->used))
- generic_handle_irq(irq);
- else
- dev_info(dev, "unhandled MSI\n");
+ generic_handle_irq(irq);
} else {
/*
* that's weird who triggered this?
* just clear it
*/
dev_info(dev, "unexpected MSI\n");
+ afi_writel(pcie, BIT(index % 32), AFI_MSI_VEC(index));
}
/* see if there's any more pending in this vector */
- reg = afi_readl(pcie, AFI_MSI_VEC0 + i * 4);
-
- processed++;
+ reg = afi_readl(pcie, AFI_MSI_VEC(i));
}
}
- return processed > 0 ? IRQ_HANDLED : IRQ_NONE;
+ chained_irq_exit(chip, desc);
}
-static int tegra_msi_setup_irq(struct msi_controller *chip,
- struct pci_dev *pdev, struct msi_desc *desc)
+static void tegra_msi_top_irq_ack(struct irq_data *d)
{
- struct tegra_msi *msi = to_tegra_msi(chip);
- struct msi_msg msg;
- unsigned int irq;
- int hwirq;
+ irq_chip_ack_parent(d);
+}
- hwirq = tegra_msi_alloc(msi);
- if (hwirq < 0)
- return hwirq;
+static void tegra_msi_top_irq_mask(struct irq_data *d)
+{
+ pci_msi_mask_irq(d);
+ irq_chip_mask_parent(d);
+}
- irq = irq_create_mapping(msi->domain, hwirq);
- if (!irq) {
- tegra_msi_free(msi, hwirq);
- return -EINVAL;
- }
+static void tegra_msi_top_irq_unmask(struct irq_data *d)
+{
+ pci_msi_unmask_irq(d);
+ irq_chip_unmask_parent(d);
+}
+
+static struct irq_chip tegra_msi_top_chip = {
+ .name = "Tegra PCIe MSI",
+ .irq_ack = tegra_msi_top_irq_ack,
+ .irq_mask = tegra_msi_top_irq_mask,
+ .irq_unmask = tegra_msi_top_irq_unmask,
+};
- irq_set_msi_desc(irq, desc);
+static void tegra_msi_irq_ack(struct irq_data *d)
+{
+ struct tegra_msi *msi = irq_data_get_irq_chip_data(d);
+ struct tegra_pcie *pcie = msi_to_pcie(msi);
+ unsigned int index = d->hwirq / 32;
- msg.address_lo = lower_32_bits(msi->phys);
- msg.address_hi = upper_32_bits(msi->phys);
- msg.data = hwirq;
+ /* clear the interrupt */
+ afi_writel(pcie, BIT(d->hwirq % 32), AFI_MSI_VEC(index));
+}
- pci_write_msi_msg(irq, &msg);
+static void tegra_msi_irq_mask(struct irq_data *d)
+{
+ struct tegra_msi *msi = irq_data_get_irq_chip_data(d);
+ struct tegra_pcie *pcie = msi_to_pcie(msi);
+ unsigned int index = d->hwirq / 32;
+ unsigned long flags;
+ u32 value;
- return 0;
+ spin_lock_irqsave(&msi->mask_lock, flags);
+ value = afi_readl(pcie, AFI_MSI_EN_VEC(index));
+ value &= ~BIT(d->hwirq % 32);
+ afi_writel(pcie, value, AFI_MSI_EN_VEC(index));
+ spin_unlock_irqrestore(&msi->mask_lock, flags);
}
-static void tegra_msi_teardown_irq(struct msi_controller *chip,
- unsigned int irq)
+static void tegra_msi_irq_unmask(struct irq_data *d)
{
- struct tegra_msi *msi = to_tegra_msi(chip);
- struct irq_data *d = irq_get_irq_data(irq);
- irq_hw_number_t hwirq = irqd_to_hwirq(d);
+ struct tegra_msi *msi = irq_data_get_irq_chip_data(d);
+ struct tegra_pcie *pcie = msi_to_pcie(msi);
+ unsigned int index = d->hwirq / 32;
+ unsigned long flags;
+ u32 value;
- irq_dispose_mapping(irq);
- tegra_msi_free(msi, hwirq);
+ spin_lock_irqsave(&msi->mask_lock, flags);
+ value = afi_readl(pcie, AFI_MSI_EN_VEC(index));
+ value |= BIT(d->hwirq % 32);
+ afi_writel(pcie, value, AFI_MSI_EN_VEC(index));
+ spin_unlock_irqrestore(&msi->mask_lock, flags);
}
-static struct irq_chip tegra_msi_irq_chip = {
- .name = "Tegra PCIe MSI",
- .irq_enable = pci_msi_unmask_irq,
- .irq_disable = pci_msi_mask_irq,
- .irq_mask = pci_msi_mask_irq,
- .irq_unmask = pci_msi_unmask_irq,
+static int tegra_msi_set_affinity(struct irq_data *d, const struct cpumask *mask, bool force)
+{
+ return -EINVAL;
+}
+
+static void tegra_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+ struct tegra_msi *msi = irq_data_get_irq_chip_data(data);
+
+ msg->address_lo = lower_32_bits(msi->phys);
+ msg->address_hi = upper_32_bits(msi->phys);
+ msg->data = data->hwirq;
+}
+
+static struct irq_chip tegra_msi_bottom_chip = {
+ .name = "Tegra MSI",
+ .irq_ack = tegra_msi_irq_ack,
+ .irq_mask = tegra_msi_irq_mask,
+ .irq_unmask = tegra_msi_irq_unmask,
+ .irq_set_affinity = tegra_msi_set_affinity,
+ .irq_compose_msi_msg = tegra_compose_msi_msg,
};
-static int tegra_msi_map(struct irq_domain *domain, unsigned int irq,
- irq_hw_number_t hwirq)
+static int tegra_msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *args)
{
- irq_set_chip_and_handler(irq, &tegra_msi_irq_chip, handle_simple_irq);
- irq_set_chip_data(irq, domain->host_data);
+ struct tegra_msi *msi = domain->host_data;
+ unsigned int i;
+ int hwirq;
+
+ mutex_lock(&msi->map_lock);
+
+ hwirq = bitmap_find_free_region(msi->used, INT_PCI_MSI_NR, order_base_2(nr_irqs));
+
+ mutex_unlock(&msi->map_lock);
+
+ if (hwirq < 0)
+ return -ENOSPC;
+
+ for (i = 0; i < nr_irqs; i++)
+ irq_domain_set_info(domain, virq + i, hwirq + i,
+ &tegra_msi_bottom_chip, domain->host_data,
+ handle_edge_irq, NULL, NULL);
tegra_cpuidle_pcie_irqs_in_use();
return 0;
}
-static const struct irq_domain_ops msi_domain_ops = {
- .map = tegra_msi_map,
+static void tegra_msi_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
+{
+ struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+ struct tegra_msi *msi = domain->host_data;
+
+ mutex_lock(&msi->map_lock);
+
+ bitmap_release_region(msi->used, d->hwirq, order_base_2(nr_irqs));
+
+ mutex_unlock(&msi->map_lock);
+}
+
+static const struct irq_domain_ops tegra_msi_domain_ops = {
+ .alloc = tegra_msi_domain_alloc,
+ .free = tegra_msi_domain_free,
+};
+
+static struct msi_domain_info tegra_msi_info = {
+ .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+ MSI_FLAG_PCI_MSIX),
+ .chip = &tegra_msi_top_chip,
};
+static int tegra_allocate_domains(struct tegra_msi *msi)
+{
+ struct tegra_pcie *pcie = msi_to_pcie(msi);
+ struct fwnode_handle *fwnode = dev_fwnode(pcie->dev);
+ struct irq_domain *parent;
+
+ parent = irq_domain_create_linear(fwnode, INT_PCI_MSI_NR,
+ &tegra_msi_domain_ops, msi);
+ if (!parent) {
+ dev_err(pcie->dev, "failed to create IRQ domain\n");
+ return -ENOMEM;
+ }
+ irq_domain_update_bus_token(parent, DOMAIN_BUS_NEXUS);
+
+ msi->domain = pci_msi_create_irq_domain(fwnode, &tegra_msi_info, parent);
+ if (!msi->domain) {
+ dev_err(pcie->dev, "failed to create MSI domain\n");
+ irq_domain_remove(parent);
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+static void tegra_free_domains(struct tegra_msi *msi)
+{
+ struct irq_domain *parent = msi->domain->parent;
+
+ irq_domain_remove(msi->domain);
+ irq_domain_remove(parent);
+}
+
static int tegra_pcie_msi_setup(struct tegra_pcie *pcie)
{
- struct pci_host_bridge *host = pci_host_bridge_from_priv(pcie);
struct platform_device *pdev = to_platform_device(pcie->dev);
struct tegra_msi *msi = &pcie->msi;
struct device *dev = pcie->dev;
int err;
- mutex_init(&msi->lock);
-
- msi->chip.dev = dev;
- msi->chip.setup_irq = tegra_msi_setup_irq;
- msi->chip.teardown_irq = tegra_msi_teardown_irq;
+ mutex_init(&msi->map_lock);
+ spin_lock_init(&msi->mask_lock);
- msi->domain = irq_domain_add_linear(dev->of_node, INT_PCI_MSI_NR,
- &msi_domain_ops, &msi->chip);
- if (!msi->domain) {
- dev_err(dev, "failed to create IRQ domain\n");
- return -ENOMEM;
+ if (IS_ENABLED(CONFIG_PCI_MSI)) {
+ err = tegra_allocate_domains(msi);
+ if (err)
+ return err;
}
err = platform_get_irq_byname(pdev, "msi");
msi->irq = err;
- err = request_irq(msi->irq, tegra_pcie_msi_irq, IRQF_NO_THREAD,
- tegra_msi_irq_chip.name, pcie);
- if (err < 0) {
- dev_err(dev, "failed to request IRQ: %d\n", err);
- goto free_irq_domain;
- }
+ irq_set_chained_handler_and_data(msi->irq, tegra_pcie_msi_irq, pcie);
/* Though the PCIe controller can address >32-bit address space, to
* facilitate endpoints that support only 32-bit MSI target address,
goto free_irq;
}
- host->msi = &msi->chip;
-
return 0;
free_irq:
- free_irq(msi->irq, pcie);
+ irq_set_chained_handler_and_data(msi->irq, NULL, NULL);
free_irq_domain:
- irq_domain_remove(msi->domain);
+ if (IS_ENABLED(CONFIG_PCI_MSI))
+ tegra_free_domains(msi);
+
return err;
}
{
const struct tegra_pcie_soc *soc = pcie->soc;
struct tegra_msi *msi = &pcie->msi;
- u32 reg;
+ u32 reg, msi_state[INT_PCI_MSI_NR / 32];
+ int i;
afi_writel(pcie, msi->phys >> soc->msi_base_shift, AFI_MSI_FPCI_BAR_ST);
afi_writel(pcie, msi->phys, AFI_MSI_AXI_BAR_ST);
/* this register is in 4K increments */
afi_writel(pcie, 1, AFI_MSI_BAR_SZ);
- /* enable all MSI vectors */
- afi_writel(pcie, 0xffffffff, AFI_MSI_EN_VEC0);
- afi_writel(pcie, 0xffffffff, AFI_MSI_EN_VEC1);
- afi_writel(pcie, 0xffffffff, AFI_MSI_EN_VEC2);
- afi_writel(pcie, 0xffffffff, AFI_MSI_EN_VEC3);
- afi_writel(pcie, 0xffffffff, AFI_MSI_EN_VEC4);
- afi_writel(pcie, 0xffffffff, AFI_MSI_EN_VEC5);
- afi_writel(pcie, 0xffffffff, AFI_MSI_EN_VEC6);
- afi_writel(pcie, 0xffffffff, AFI_MSI_EN_VEC7);
+ /* Restore the MSI allocation state */
+ bitmap_to_arr32(msi_state, msi->used, INT_PCI_MSI_NR);
+ for (i = 0; i < ARRAY_SIZE(msi_state); i++)
+ afi_writel(pcie, msi_state[i], AFI_MSI_EN_VEC(i));
/* and unmask the MSI interrupt */
reg = afi_readl(pcie, AFI_INTR_MASK);
dma_free_attrs(pcie->dev, PAGE_SIZE, msi->virt, msi->phys,
DMA_ATTR_NO_KERNEL_MAPPING);
- if (msi->irq > 0)
- free_irq(msi->irq, pcie);
-
for (i = 0; i < INT_PCI_MSI_NR; i++) {
irq = irq_find_mapping(msi->domain, i);
if (irq > 0)
- irq_dispose_mapping(irq);
+ irq_domain_free_irqs(irq, 1);
}
- irq_domain_remove(msi->domain);
+ irq_set_chained_handler_and_data(msi->irq, NULL, NULL);
+
+ if (IS_ENABLED(CONFIG_PCI_MSI))
+ tegra_free_domains(msi);
}
static int tegra_pcie_disable_msi(struct tegra_pcie *pcie)
value &= ~AFI_INTR_MASK_MSI_MASK;
afi_writel(pcie, value, AFI_INTR_MASK);
- /* disable all MSI vectors */
- afi_writel(pcie, 0, AFI_MSI_EN_VEC0);
- afi_writel(pcie, 0, AFI_MSI_EN_VEC1);
- afi_writel(pcie, 0, AFI_MSI_EN_VEC2);
- afi_writel(pcie, 0, AFI_MSI_EN_VEC3);
- afi_writel(pcie, 0, AFI_MSI_EN_VEC4);
- afi_writel(pcie, 0, AFI_MSI_EN_VEC5);
- afi_writel(pcie, 0, AFI_MSI_EN_VEC6);
- afi_writel(pcie, 0, AFI_MSI_EN_VEC7);
-
return 0;
}
* the config space access window. Since we are working with
* the high-order 32 bits, shift everything down by 32 bits.
*/
- node_bits = (cfg->res.start >> 32) & (1 << 12);
+ node_bits = upper_32_bits(cfg->res.start) & (1 << 12);
v |= node_bits;
set_val(v, where, size, val);
#include <linux/pci-acpi.h>
#include <linux/pci-ecam.h>
#include <linux/platform_device.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
#include "../pci.h"
#if defined(CONFIG_PCI_HOST_THUNDER_PEM) || (defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS))
* structure here for the BAR.
*/
bar4_start = res_pem->start + 0xf00000;
- pem_pci->ea_entry[0] = (u32)bar4_start | 2;
- pem_pci->ea_entry[1] = (u32)(res_pem->end - bar4_start) & ~3u;
- pem_pci->ea_entry[2] = (u32)(bar4_start >> 32);
+ pem_pci->ea_entry[0] = lower_32_bits(bar4_start) | 2;
+ pem_pci->ea_entry[1] = lower_32_bits(res_pem->end - bar4_start) & ~3u;
+ pem_pci->ea_entry[2] = upper_32_bits(bar4_start);
cfg->priv = pem_pci;
return 0;
#if defined(CONFIG_ACPI) && defined(CONFIG_PCI_QUIRKS)
-#define PEM_RES_BASE 0x87e0c0000000UL
-#define PEM_NODE_MASK GENMASK(45, 44)
-#define PEM_INDX_MASK GENMASK(26, 24)
+#define PEM_RES_BASE 0x87e0c0000000ULL
+#define PEM_NODE_MASK GENMASK_ULL(45, 44)
+#define PEM_INDX_MASK GENMASK_ULL(26, 24)
#define PEM_MIN_DOM_IN_NODE 4
#define PEM_MAX_DOM_IN_NODE 10
if (IS_ERR(port->csr_base))
return PTR_ERR(port->csr_base);
- port->cfg_base = devm_platform_ioremap_resource_byname(pdev, "cfg");
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "cfg");
+ port->cfg_base = devm_ioremap_resource(dev, res);
if (IS_ERR(port->cfg_base))
return PTR_ERR(port->cfg_base);
port->cfg_addr = res->start;
res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
"vector_slave");
msi->vector_base = devm_ioremap_resource(&pdev->dev, res);
- if (IS_ERR(msi->vector_base)) {
- dev_err(&pdev->dev, "failed to map vector_slave memory\n");
+ if (IS_ERR(msi->vector_base))
return PTR_ERR(msi->vector_base);
- }
msi->vector_phy = res->start;
brcm_pcie_turn_off(pcie);
ret = brcm_phy_stop(pcie);
+ reset_control_rearm(pcie->rescal);
clk_disable_unprepare(pcie->clk);
return ret;
base = pcie->base;
clk_prepare_enable(pcie->clk);
+ ret = reset_control_reset(pcie->rescal);
+ if (ret)
+ goto err_disable_clk;
+
ret = brcm_phy_start(pcie);
if (ret)
- goto err;
+ goto err_reset;
/* Take bridge out of reset so we can access the SERDES reg */
pcie->bridge_sw_init_set(pcie, 0);
ret = brcm_pcie_setup(pcie);
if (ret)
- goto err;
+ goto err_reset;
if (pcie->msi)
brcm_msi_set_regs(pcie->msi);
return 0;
-err:
+err_reset:
+ reset_control_rearm(pcie->rescal);
+err_disable_clk:
clk_disable_unprepare(pcie->clk);
return ret;
}
brcm_msi_remove(pcie);
brcm_pcie_turn_off(pcie);
brcm_phy_stop(pcie);
- reset_control_assert(pcie->rescal);
+ reset_control_rearm(pcie->rescal);
clk_disable_unprepare(pcie->clk);
}
return PTR_ERR(pcie->perst_reset);
}
- ret = reset_control_deassert(pcie->rescal);
+ ret = reset_control_reset(pcie->rescal);
if (ret)
dev_err(&pdev->dev, "failed to deassert 'rescal'\n");
ret = brcm_phy_start(pcie);
if (ret) {
- reset_control_assert(pcie->rescal);
+ reset_control_rearm(pcie->rescal);
clk_disable_unprepare(pcie->clk);
return ret;
}
pcie->hw_rev = readl(pcie->base + PCIE_MISC_REVISION);
if (pcie->type == BCM4908 && pcie->hw_rev >= BRCM_PCIE_HW_REV_3_20) {
dev_err(pcie->dev, "hardware revision with unsupported PERST# setup\n");
+ ret = -ENODEV;
goto fail;
}
NULL, NULL);
}
- return hwirq;
+ return 0;
}
static void iproc_msi_irq_domain_free(struct irq_domain *domain,
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * MediaTek PCIe host controller driver.
+ *
+ * Copyright (c) 2020 MediaTek Inc.
+ * Author: Jianjun Wang <jianjun.wang@mediatek.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/iopoll.h>
+#include <linux/irq.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/pci.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/pm_domain.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+
+#include "../pci.h"
+
+#define PCIE_SETTING_REG 0x80
+#define PCIE_PCI_IDS_1 0x9c
+#define PCI_CLASS(class) (class << 8)
+#define PCIE_RC_MODE BIT(0)
+
+#define PCIE_CFGNUM_REG 0x140
+#define PCIE_CFG_DEVFN(devfn) ((devfn) & GENMASK(7, 0))
+#define PCIE_CFG_BUS(bus) (((bus) << 8) & GENMASK(15, 8))
+#define PCIE_CFG_BYTE_EN(bytes) (((bytes) << 16) & GENMASK(19, 16))
+#define PCIE_CFG_FORCE_BYTE_EN BIT(20)
+#define PCIE_CFG_OFFSET_ADDR 0x1000
+#define PCIE_CFG_HEADER(bus, devfn) \
+ (PCIE_CFG_BUS(bus) | PCIE_CFG_DEVFN(devfn))
+
+#define PCIE_RST_CTRL_REG 0x148
+#define PCIE_MAC_RSTB BIT(0)
+#define PCIE_PHY_RSTB BIT(1)
+#define PCIE_BRG_RSTB BIT(2)
+#define PCIE_PE_RSTB BIT(3)
+
+#define PCIE_LTSSM_STATUS_REG 0x150
+#define PCIE_LTSSM_STATE_MASK GENMASK(28, 24)
+#define PCIE_LTSSM_STATE(val) ((val & PCIE_LTSSM_STATE_MASK) >> 24)
+#define PCIE_LTSSM_STATE_L2_IDLE 0x14
+
+#define PCIE_LINK_STATUS_REG 0x154
+#define PCIE_PORT_LINKUP BIT(8)
+
+#define PCIE_MSI_SET_NUM 8
+#define PCIE_MSI_IRQS_PER_SET 32
+#define PCIE_MSI_IRQS_NUM \
+ (PCIE_MSI_IRQS_PER_SET * PCIE_MSI_SET_NUM)
+
+#define PCIE_INT_ENABLE_REG 0x180
+#define PCIE_MSI_ENABLE GENMASK(PCIE_MSI_SET_NUM + 8 - 1, 8)
+#define PCIE_MSI_SHIFT 8
+#define PCIE_INTX_SHIFT 24
+#define PCIE_INTX_ENABLE \
+ GENMASK(PCIE_INTX_SHIFT + PCI_NUM_INTX - 1, PCIE_INTX_SHIFT)
+
+#define PCIE_INT_STATUS_REG 0x184
+#define PCIE_MSI_SET_ENABLE_REG 0x190
+#define PCIE_MSI_SET_ENABLE GENMASK(PCIE_MSI_SET_NUM - 1, 0)
+
+#define PCIE_MSI_SET_BASE_REG 0xc00
+#define PCIE_MSI_SET_OFFSET 0x10
+#define PCIE_MSI_SET_STATUS_OFFSET 0x04
+#define PCIE_MSI_SET_ENABLE_OFFSET 0x08
+
+#define PCIE_MSI_SET_ADDR_HI_BASE 0xc80
+#define PCIE_MSI_SET_ADDR_HI_OFFSET 0x04
+
+#define PCIE_ICMD_PM_REG 0x198
+#define PCIE_TURN_OFF_LINK BIT(4)
+
+#define PCIE_TRANS_TABLE_BASE_REG 0x800
+#define PCIE_ATR_SRC_ADDR_MSB_OFFSET 0x4
+#define PCIE_ATR_TRSL_ADDR_LSB_OFFSET 0x8
+#define PCIE_ATR_TRSL_ADDR_MSB_OFFSET 0xc
+#define PCIE_ATR_TRSL_PARAM_OFFSET 0x10
+#define PCIE_ATR_TLB_SET_OFFSET 0x20
+
+#define PCIE_MAX_TRANS_TABLES 8
+#define PCIE_ATR_EN BIT(0)
+#define PCIE_ATR_SIZE(size) \
+ (((((size) - 1) << 1) & GENMASK(6, 1)) | PCIE_ATR_EN)
+#define PCIE_ATR_ID(id) ((id) & GENMASK(3, 0))
+#define PCIE_ATR_TYPE_MEM PCIE_ATR_ID(0)
+#define PCIE_ATR_TYPE_IO PCIE_ATR_ID(1)
+#define PCIE_ATR_TLP_TYPE(type) (((type) << 16) & GENMASK(18, 16))
+#define PCIE_ATR_TLP_TYPE_MEM PCIE_ATR_TLP_TYPE(0)
+#define PCIE_ATR_TLP_TYPE_IO PCIE_ATR_TLP_TYPE(2)
+
+/**
+ * struct mtk_msi_set - MSI information for each set
+ * @base: IO mapped register base
+ * @msg_addr: MSI message address
+ * @saved_irq_state: IRQ enable state saved at suspend time
+ */
+struct mtk_msi_set {
+ void __iomem *base;
+ phys_addr_t msg_addr;
+ u32 saved_irq_state;
+};
+
+/**
+ * struct mtk_pcie_port - PCIe port information
+ * @dev: pointer to PCIe device
+ * @base: IO mapped register base
+ * @reg_base: physical register base
+ * @mac_reset: MAC reset control
+ * @phy_reset: PHY reset control
+ * @phy: PHY controller block
+ * @clks: PCIe clocks
+ * @num_clks: PCIe clocks count for this port
+ * @irq: PCIe controller interrupt number
+ * @saved_irq_state: IRQ enable state saved at suspend time
+ * @irq_lock: lock protecting IRQ register access
+ * @intx_domain: legacy INTx IRQ domain
+ * @msi_domain: MSI IRQ domain
+ * @msi_bottom_domain: MSI IRQ bottom domain
+ * @msi_sets: MSI sets information
+ * @lock: lock protecting IRQ bit map
+ * @msi_irq_in_use: bit map for assigned MSI IRQ
+ */
+struct mtk_pcie_port {
+ struct device *dev;
+ void __iomem *base;
+ phys_addr_t reg_base;
+ struct reset_control *mac_reset;
+ struct reset_control *phy_reset;
+ struct phy *phy;
+ struct clk_bulk_data *clks;
+ int num_clks;
+
+ int irq;
+ u32 saved_irq_state;
+ raw_spinlock_t irq_lock;
+ struct irq_domain *intx_domain;
+ struct irq_domain *msi_domain;
+ struct irq_domain *msi_bottom_domain;
+ struct mtk_msi_set msi_sets[PCIE_MSI_SET_NUM];
+ struct mutex lock;
+ DECLARE_BITMAP(msi_irq_in_use, PCIE_MSI_IRQS_NUM);
+};
+
+/**
+ * mtk_pcie_config_tlp_header() - Configure a configuration TLP header
+ * @bus: PCI bus to query
+ * @devfn: device/function number
+ * @where: offset in config space
+ * @size: data size in TLP header
+ *
+ * Set byte enable field and device information in configuration TLP header.
+ */
+static void mtk_pcie_config_tlp_header(struct pci_bus *bus, unsigned int devfn,
+ int where, int size)
+{
+ struct mtk_pcie_port *port = bus->sysdata;
+ int bytes;
+ u32 val;
+
+ bytes = (GENMASK(size - 1, 0) & 0xf) << (where & 0x3);
+
+ val = PCIE_CFG_FORCE_BYTE_EN | PCIE_CFG_BYTE_EN(bytes) |
+ PCIE_CFG_HEADER(bus->number, devfn);
+
+ writel_relaxed(val, port->base + PCIE_CFGNUM_REG);
+}
+
+static void __iomem *mtk_pcie_map_bus(struct pci_bus *bus, unsigned int devfn,
+ int where)
+{
+ struct mtk_pcie_port *port = bus->sysdata;
+
+ return port->base + PCIE_CFG_OFFSET_ADDR + where;
+}
+
+static int mtk_pcie_config_read(struct pci_bus *bus, unsigned int devfn,
+ int where, int size, u32 *val)
+{
+ mtk_pcie_config_tlp_header(bus, devfn, where, size);
+
+ return pci_generic_config_read32(bus, devfn, where, size, val);
+}
+
+static int mtk_pcie_config_write(struct pci_bus *bus, unsigned int devfn,
+ int where, int size, u32 val)
+{
+ mtk_pcie_config_tlp_header(bus, devfn, where, size);
+
+ if (size <= 2)
+ val <<= (where & 0x3) * 8;
+
+ return pci_generic_config_write32(bus, devfn, where, 4, val);
+}
+
+static struct pci_ops mtk_pcie_ops = {
+ .map_bus = mtk_pcie_map_bus,
+ .read = mtk_pcie_config_read,
+ .write = mtk_pcie_config_write,
+};
+
+static int mtk_pcie_set_trans_table(struct mtk_pcie_port *port,
+ resource_size_t cpu_addr,
+ resource_size_t pci_addr,
+ resource_size_t size,
+ unsigned long type, int num)
+{
+ void __iomem *table;
+ u32 val;
+
+ if (num >= PCIE_MAX_TRANS_TABLES) {
+ dev_err(port->dev, "not enough translate table for addr: %#llx, limited to [%d]\n",
+ (unsigned long long)cpu_addr, PCIE_MAX_TRANS_TABLES);
+ return -ENODEV;
+ }
+
+ table = port->base + PCIE_TRANS_TABLE_BASE_REG +
+ num * PCIE_ATR_TLB_SET_OFFSET;
+
+ writel_relaxed(lower_32_bits(cpu_addr) | PCIE_ATR_SIZE(fls(size) - 1),
+ table);
+ writel_relaxed(upper_32_bits(cpu_addr),
+ table + PCIE_ATR_SRC_ADDR_MSB_OFFSET);
+ writel_relaxed(lower_32_bits(pci_addr),
+ table + PCIE_ATR_TRSL_ADDR_LSB_OFFSET);
+ writel_relaxed(upper_32_bits(pci_addr),
+ table + PCIE_ATR_TRSL_ADDR_MSB_OFFSET);
+
+ if (type == IORESOURCE_IO)
+ val = PCIE_ATR_TYPE_IO | PCIE_ATR_TLP_TYPE_IO;
+ else
+ val = PCIE_ATR_TYPE_MEM | PCIE_ATR_TLP_TYPE_MEM;
+
+ writel_relaxed(val, table + PCIE_ATR_TRSL_PARAM_OFFSET);
+
+ return 0;
+}
+
+static void mtk_pcie_enable_msi(struct mtk_pcie_port *port)
+{
+ int i;
+ u32 val;
+
+ for (i = 0; i < PCIE_MSI_SET_NUM; i++) {
+ struct mtk_msi_set *msi_set = &port->msi_sets[i];
+
+ msi_set->base = port->base + PCIE_MSI_SET_BASE_REG +
+ i * PCIE_MSI_SET_OFFSET;
+ msi_set->msg_addr = port->reg_base + PCIE_MSI_SET_BASE_REG +
+ i * PCIE_MSI_SET_OFFSET;
+
+ /* Configure the MSI capture address */
+ writel_relaxed(lower_32_bits(msi_set->msg_addr), msi_set->base);
+ writel_relaxed(upper_32_bits(msi_set->msg_addr),
+ port->base + PCIE_MSI_SET_ADDR_HI_BASE +
+ i * PCIE_MSI_SET_ADDR_HI_OFFSET);
+ }
+
+ val = readl_relaxed(port->base + PCIE_MSI_SET_ENABLE_REG);
+ val |= PCIE_MSI_SET_ENABLE;
+ writel_relaxed(val, port->base + PCIE_MSI_SET_ENABLE_REG);
+
+ val = readl_relaxed(port->base + PCIE_INT_ENABLE_REG);
+ val |= PCIE_MSI_ENABLE;
+ writel_relaxed(val, port->base + PCIE_INT_ENABLE_REG);
+}
+
+static int mtk_pcie_startup_port(struct mtk_pcie_port *port)
+{
+ struct resource_entry *entry;
+ struct pci_host_bridge *host = pci_host_bridge_from_priv(port);
+ unsigned int table_index = 0;
+ int err;
+ u32 val;
+
+ /* Set as RC mode */
+ val = readl_relaxed(port->base + PCIE_SETTING_REG);
+ val |= PCIE_RC_MODE;
+ writel_relaxed(val, port->base + PCIE_SETTING_REG);
+
+ /* Set class code */
+ val = readl_relaxed(port->base + PCIE_PCI_IDS_1);
+ val &= ~GENMASK(31, 8);
+ val |= PCI_CLASS(PCI_CLASS_BRIDGE_PCI << 8);
+ writel_relaxed(val, port->base + PCIE_PCI_IDS_1);
+
+ /* Mask all INTx interrupts */
+ val = readl_relaxed(port->base + PCIE_INT_ENABLE_REG);
+ val &= ~PCIE_INTX_ENABLE;
+ writel_relaxed(val, port->base + PCIE_INT_ENABLE_REG);
+
+ /* Assert all reset signals */
+ val = readl_relaxed(port->base + PCIE_RST_CTRL_REG);
+ val |= PCIE_MAC_RSTB | PCIE_PHY_RSTB | PCIE_BRG_RSTB | PCIE_PE_RSTB;
+ writel_relaxed(val, port->base + PCIE_RST_CTRL_REG);
+
+ /*
+ * Described in PCIe CEM specification setctions 2.2 (PERST# Signal)
+ * and 2.2.1 (Initial Power-Up (G3 to S0)).
+ * The deassertion of PERST# should be delayed 100ms (TPVPERL)
+ * for the power and clock to become stable.
+ */
+ msleep(100);
+
+ /* De-assert reset signals */
+ val &= ~(PCIE_MAC_RSTB | PCIE_PHY_RSTB | PCIE_BRG_RSTB | PCIE_PE_RSTB);
+ writel_relaxed(val, port->base + PCIE_RST_CTRL_REG);
+
+ /* Check if the link is up or not */
+ err = readl_poll_timeout(port->base + PCIE_LINK_STATUS_REG, val,
+ !!(val & PCIE_PORT_LINKUP), 20,
+ PCI_PM_D3COLD_WAIT * USEC_PER_MSEC);
+ if (err) {
+ val = readl_relaxed(port->base + PCIE_LTSSM_STATUS_REG);
+ dev_err(port->dev, "PCIe link down, ltssm reg val: %#x\n", val);
+ return err;
+ }
+
+ mtk_pcie_enable_msi(port);
+
+ /* Set PCIe translation windows */
+ resource_list_for_each_entry(entry, &host->windows) {
+ struct resource *res = entry->res;
+ unsigned long type = resource_type(res);
+ resource_size_t cpu_addr;
+ resource_size_t pci_addr;
+ resource_size_t size;
+ const char *range_type;
+
+ if (type == IORESOURCE_IO) {
+ cpu_addr = pci_pio_to_address(res->start);
+ range_type = "IO";
+ } else if (type == IORESOURCE_MEM) {
+ cpu_addr = res->start;
+ range_type = "MEM";
+ } else {
+ continue;
+ }
+
+ pci_addr = res->start - entry->offset;
+ size = resource_size(res);
+ err = mtk_pcie_set_trans_table(port, cpu_addr, pci_addr, size,
+ type, table_index);
+ if (err)
+ return err;
+
+ dev_dbg(port->dev, "set %s trans window[%d]: cpu_addr = %#llx, pci_addr = %#llx, size = %#llx\n",
+ range_type, table_index, (unsigned long long)cpu_addr,
+ (unsigned long long)pci_addr, (unsigned long long)size);
+
+ table_index++;
+ }
+
+ return 0;
+}
+
+static int mtk_pcie_set_affinity(struct irq_data *data,
+ const struct cpumask *mask, bool force)
+{
+ return -EINVAL;
+}
+
+static void mtk_pcie_msi_irq_mask(struct irq_data *data)
+{
+ pci_msi_mask_irq(data);
+ irq_chip_mask_parent(data);
+}
+
+static void mtk_pcie_msi_irq_unmask(struct irq_data *data)
+{
+ pci_msi_unmask_irq(data);
+ irq_chip_unmask_parent(data);
+}
+
+static struct irq_chip mtk_msi_irq_chip = {
+ .irq_ack = irq_chip_ack_parent,
+ .irq_mask = mtk_pcie_msi_irq_mask,
+ .irq_unmask = mtk_pcie_msi_irq_unmask,
+ .name = "MSI",
+};
+
+static struct msi_domain_info mtk_msi_domain_info = {
+ .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+ MSI_FLAG_PCI_MSIX | MSI_FLAG_MULTI_PCI_MSI),
+ .chip = &mtk_msi_irq_chip,
+};
+
+static void mtk_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+ struct mtk_msi_set *msi_set = irq_data_get_irq_chip_data(data);
+ struct mtk_pcie_port *port = data->domain->host_data;
+ unsigned long hwirq;
+
+ hwirq = data->hwirq % PCIE_MSI_IRQS_PER_SET;
+
+ msg->address_hi = upper_32_bits(msi_set->msg_addr);
+ msg->address_lo = lower_32_bits(msi_set->msg_addr);
+ msg->data = hwirq;
+ dev_dbg(port->dev, "msi#%#lx address_hi %#x address_lo %#x data %d\n",
+ hwirq, msg->address_hi, msg->address_lo, msg->data);
+}
+
+static void mtk_msi_bottom_irq_ack(struct irq_data *data)
+{
+ struct mtk_msi_set *msi_set = irq_data_get_irq_chip_data(data);
+ unsigned long hwirq;
+
+ hwirq = data->hwirq % PCIE_MSI_IRQS_PER_SET;
+
+ writel_relaxed(BIT(hwirq), msi_set->base + PCIE_MSI_SET_STATUS_OFFSET);
+}
+
+static void mtk_msi_bottom_irq_mask(struct irq_data *data)
+{
+ struct mtk_msi_set *msi_set = irq_data_get_irq_chip_data(data);
+ struct mtk_pcie_port *port = data->domain->host_data;
+ unsigned long hwirq, flags;
+ u32 val;
+
+ hwirq = data->hwirq % PCIE_MSI_IRQS_PER_SET;
+
+ raw_spin_lock_irqsave(&port->irq_lock, flags);
+ val = readl_relaxed(msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET);
+ val &= ~BIT(hwirq);
+ writel_relaxed(val, msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET);
+ raw_spin_unlock_irqrestore(&port->irq_lock, flags);
+}
+
+static void mtk_msi_bottom_irq_unmask(struct irq_data *data)
+{
+ struct mtk_msi_set *msi_set = irq_data_get_irq_chip_data(data);
+ struct mtk_pcie_port *port = data->domain->host_data;
+ unsigned long hwirq, flags;
+ u32 val;
+
+ hwirq = data->hwirq % PCIE_MSI_IRQS_PER_SET;
+
+ raw_spin_lock_irqsave(&port->irq_lock, flags);
+ val = readl_relaxed(msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET);
+ val |= BIT(hwirq);
+ writel_relaxed(val, msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET);
+ raw_spin_unlock_irqrestore(&port->irq_lock, flags);
+}
+
+static struct irq_chip mtk_msi_bottom_irq_chip = {
+ .irq_ack = mtk_msi_bottom_irq_ack,
+ .irq_mask = mtk_msi_bottom_irq_mask,
+ .irq_unmask = mtk_msi_bottom_irq_unmask,
+ .irq_compose_msi_msg = mtk_compose_msi_msg,
+ .irq_set_affinity = mtk_pcie_set_affinity,
+ .name = "MSI",
+};
+
+static int mtk_msi_bottom_domain_alloc(struct irq_domain *domain,
+ unsigned int virq, unsigned int nr_irqs,
+ void *arg)
+{
+ struct mtk_pcie_port *port = domain->host_data;
+ struct mtk_msi_set *msi_set;
+ int i, hwirq, set_idx;
+
+ mutex_lock(&port->lock);
+
+ hwirq = bitmap_find_free_region(port->msi_irq_in_use, PCIE_MSI_IRQS_NUM,
+ order_base_2(nr_irqs));
+
+ mutex_unlock(&port->lock);
+
+ if (hwirq < 0)
+ return -ENOSPC;
+
+ set_idx = hwirq / PCIE_MSI_IRQS_PER_SET;
+ msi_set = &port->msi_sets[set_idx];
+
+ for (i = 0; i < nr_irqs; i++)
+ irq_domain_set_info(domain, virq + i, hwirq + i,
+ &mtk_msi_bottom_irq_chip, msi_set,
+ handle_edge_irq, NULL, NULL);
+
+ return 0;
+}
+
+static void mtk_msi_bottom_domain_free(struct irq_domain *domain,
+ unsigned int virq, unsigned int nr_irqs)
+{
+ struct mtk_pcie_port *port = domain->host_data;
+ struct irq_data *data = irq_domain_get_irq_data(domain, virq);
+
+ mutex_lock(&port->lock);
+
+ bitmap_release_region(port->msi_irq_in_use, data->hwirq,
+ order_base_2(nr_irqs));
+
+ mutex_unlock(&port->lock);
+
+ irq_domain_free_irqs_common(domain, virq, nr_irqs);
+}
+
+static const struct irq_domain_ops mtk_msi_bottom_domain_ops = {
+ .alloc = mtk_msi_bottom_domain_alloc,
+ .free = mtk_msi_bottom_domain_free,
+};
+
+static void mtk_intx_mask(struct irq_data *data)
+{
+ struct mtk_pcie_port *port = irq_data_get_irq_chip_data(data);
+ unsigned long flags;
+ u32 val;
+
+ raw_spin_lock_irqsave(&port->irq_lock, flags);
+ val = readl_relaxed(port->base + PCIE_INT_ENABLE_REG);
+ val &= ~BIT(data->hwirq + PCIE_INTX_SHIFT);
+ writel_relaxed(val, port->base + PCIE_INT_ENABLE_REG);
+ raw_spin_unlock_irqrestore(&port->irq_lock, flags);
+}
+
+static void mtk_intx_unmask(struct irq_data *data)
+{
+ struct mtk_pcie_port *port = irq_data_get_irq_chip_data(data);
+ unsigned long flags;
+ u32 val;
+
+ raw_spin_lock_irqsave(&port->irq_lock, flags);
+ val = readl_relaxed(port->base + PCIE_INT_ENABLE_REG);
+ val |= BIT(data->hwirq + PCIE_INTX_SHIFT);
+ writel_relaxed(val, port->base + PCIE_INT_ENABLE_REG);
+ raw_spin_unlock_irqrestore(&port->irq_lock, flags);
+}
+
+/**
+ * mtk_intx_eoi() - Clear INTx IRQ status at the end of interrupt
+ * @data: pointer to chip specific data
+ *
+ * As an emulated level IRQ, its interrupt status will remain
+ * until the corresponding de-assert message is received; hence that
+ * the status can only be cleared when the interrupt has been serviced.
+ */
+static void mtk_intx_eoi(struct irq_data *data)
+{
+ struct mtk_pcie_port *port = irq_data_get_irq_chip_data(data);
+ unsigned long hwirq;
+
+ hwirq = data->hwirq + PCIE_INTX_SHIFT;
+ writel_relaxed(BIT(hwirq), port->base + PCIE_INT_STATUS_REG);
+}
+
+static struct irq_chip mtk_intx_irq_chip = {
+ .irq_mask = mtk_intx_mask,
+ .irq_unmask = mtk_intx_unmask,
+ .irq_eoi = mtk_intx_eoi,
+ .irq_set_affinity = mtk_pcie_set_affinity,
+ .name = "INTx",
+};
+
+static int mtk_pcie_intx_map(struct irq_domain *domain, unsigned int irq,
+ irq_hw_number_t hwirq)
+{
+ irq_set_chip_data(irq, domain->host_data);
+ irq_set_chip_and_handler_name(irq, &mtk_intx_irq_chip,
+ handle_fasteoi_irq, "INTx");
+ return 0;
+}
+
+static const struct irq_domain_ops intx_domain_ops = {
+ .map = mtk_pcie_intx_map,
+};
+
+static int mtk_pcie_init_irq_domains(struct mtk_pcie_port *port)
+{
+ struct device *dev = port->dev;
+ struct device_node *intc_node, *node = dev->of_node;
+ int ret;
+
+ raw_spin_lock_init(&port->irq_lock);
+
+ /* Setup INTx */
+ intc_node = of_get_child_by_name(node, "interrupt-controller");
+ if (!intc_node) {
+ dev_err(dev, "missing interrupt-controller node\n");
+ return -ENODEV;
+ }
+
+ port->intx_domain = irq_domain_add_linear(intc_node, PCI_NUM_INTX,
+ &intx_domain_ops, port);
+ if (!port->intx_domain) {
+ dev_err(dev, "failed to create INTx IRQ domain\n");
+ return -ENODEV;
+ }
+
+ /* Setup MSI */
+ mutex_init(&port->lock);
+
+ port->msi_bottom_domain = irq_domain_add_linear(node, PCIE_MSI_IRQS_NUM,
+ &mtk_msi_bottom_domain_ops, port);
+ if (!port->msi_bottom_domain) {
+ dev_err(dev, "failed to create MSI bottom domain\n");
+ ret = -ENODEV;
+ goto err_msi_bottom_domain;
+ }
+
+ port->msi_domain = pci_msi_create_irq_domain(dev->fwnode,
+ &mtk_msi_domain_info,
+ port->msi_bottom_domain);
+ if (!port->msi_domain) {
+ dev_err(dev, "failed to create MSI domain\n");
+ ret = -ENODEV;
+ goto err_msi_domain;
+ }
+
+ return 0;
+
+err_msi_domain:
+ irq_domain_remove(port->msi_bottom_domain);
+err_msi_bottom_domain:
+ irq_domain_remove(port->intx_domain);
+
+ return ret;
+}
+
+static void mtk_pcie_irq_teardown(struct mtk_pcie_port *port)
+{
+ irq_set_chained_handler_and_data(port->irq, NULL, NULL);
+
+ if (port->intx_domain)
+ irq_domain_remove(port->intx_domain);
+
+ if (port->msi_domain)
+ irq_domain_remove(port->msi_domain);
+
+ if (port->msi_bottom_domain)
+ irq_domain_remove(port->msi_bottom_domain);
+
+ irq_dispose_mapping(port->irq);
+}
+
+static void mtk_pcie_msi_handler(struct mtk_pcie_port *port, int set_idx)
+{
+ struct mtk_msi_set *msi_set = &port->msi_sets[set_idx];
+ unsigned long msi_enable, msi_status;
+ unsigned int virq;
+ irq_hw_number_t bit, hwirq;
+
+ msi_enable = readl_relaxed(msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET);
+
+ do {
+ msi_status = readl_relaxed(msi_set->base +
+ PCIE_MSI_SET_STATUS_OFFSET);
+ msi_status &= msi_enable;
+ if (!msi_status)
+ break;
+
+ for_each_set_bit(bit, &msi_status, PCIE_MSI_IRQS_PER_SET) {
+ hwirq = bit + set_idx * PCIE_MSI_IRQS_PER_SET;
+ virq = irq_find_mapping(port->msi_bottom_domain, hwirq);
+ generic_handle_irq(virq);
+ }
+ } while (true);
+}
+
+static void mtk_pcie_irq_handler(struct irq_desc *desc)
+{
+ struct mtk_pcie_port *port = irq_desc_get_handler_data(desc);
+ struct irq_chip *irqchip = irq_desc_get_chip(desc);
+ unsigned long status;
+ unsigned int virq;
+ irq_hw_number_t irq_bit = PCIE_INTX_SHIFT;
+
+ chained_irq_enter(irqchip, desc);
+
+ status = readl_relaxed(port->base + PCIE_INT_STATUS_REG);
+ for_each_set_bit_from(irq_bit, &status, PCI_NUM_INTX +
+ PCIE_INTX_SHIFT) {
+ virq = irq_find_mapping(port->intx_domain,
+ irq_bit - PCIE_INTX_SHIFT);
+ generic_handle_irq(virq);
+ }
+
+ irq_bit = PCIE_MSI_SHIFT;
+ for_each_set_bit_from(irq_bit, &status, PCIE_MSI_SET_NUM +
+ PCIE_MSI_SHIFT) {
+ mtk_pcie_msi_handler(port, irq_bit - PCIE_MSI_SHIFT);
+
+ writel_relaxed(BIT(irq_bit), port->base + PCIE_INT_STATUS_REG);
+ }
+
+ chained_irq_exit(irqchip, desc);
+}
+
+static int mtk_pcie_setup_irq(struct mtk_pcie_port *port)
+{
+ struct device *dev = port->dev;
+ struct platform_device *pdev = to_platform_device(dev);
+ int err;
+
+ err = mtk_pcie_init_irq_domains(port);
+ if (err)
+ return err;
+
+ port->irq = platform_get_irq(pdev, 0);
+ if (port->irq < 0)
+ return port->irq;
+
+ irq_set_chained_handler_and_data(port->irq, mtk_pcie_irq_handler, port);
+
+ return 0;
+}
+
+static int mtk_pcie_parse_port(struct mtk_pcie_port *port)
+{
+ struct device *dev = port->dev;
+ struct platform_device *pdev = to_platform_device(dev);
+ struct resource *regs;
+ int ret;
+
+ regs = platform_get_resource_byname(pdev, IORESOURCE_MEM, "pcie-mac");
+ if (!regs)
+ return -EINVAL;
+ port->base = devm_ioremap_resource(dev, regs);
+ if (IS_ERR(port->base)) {
+ dev_err(dev, "failed to map register base\n");
+ return PTR_ERR(port->base);
+ }
+
+ port->reg_base = regs->start;
+
+ port->phy_reset = devm_reset_control_get_optional_exclusive(dev, "phy");
+ if (IS_ERR(port->phy_reset)) {
+ ret = PTR_ERR(port->phy_reset);
+ if (ret != -EPROBE_DEFER)
+ dev_err(dev, "failed to get PHY reset\n");
+
+ return ret;
+ }
+
+ port->mac_reset = devm_reset_control_get_optional_exclusive(dev, "mac");
+ if (IS_ERR(port->mac_reset)) {
+ ret = PTR_ERR(port->mac_reset);
+ if (ret != -EPROBE_DEFER)
+ dev_err(dev, "failed to get MAC reset\n");
+
+ return ret;
+ }
+
+ port->phy = devm_phy_optional_get(dev, "pcie-phy");
+ if (IS_ERR(port->phy)) {
+ ret = PTR_ERR(port->phy);
+ if (ret != -EPROBE_DEFER)
+ dev_err(dev, "failed to get PHY\n");
+
+ return ret;
+ }
+
+ port->num_clks = devm_clk_bulk_get_all(dev, &port->clks);
+ if (port->num_clks < 0) {
+ dev_err(dev, "failed to get clocks\n");
+ return port->num_clks;
+ }
+
+ return 0;
+}
+
+static int mtk_pcie_power_up(struct mtk_pcie_port *port)
+{
+ struct device *dev = port->dev;
+ int err;
+
+ /* PHY power on and enable pipe clock */
+ reset_control_deassert(port->phy_reset);
+
+ err = phy_init(port->phy);
+ if (err) {
+ dev_err(dev, "failed to initialize PHY\n");
+ goto err_phy_init;
+ }
+
+ err = phy_power_on(port->phy);
+ if (err) {
+ dev_err(dev, "failed to power on PHY\n");
+ goto err_phy_on;
+ }
+
+ /* MAC power on and enable transaction layer clocks */
+ reset_control_deassert(port->mac_reset);
+
+ pm_runtime_enable(dev);
+ pm_runtime_get_sync(dev);
+
+ err = clk_bulk_prepare_enable(port->num_clks, port->clks);
+ if (err) {
+ dev_err(dev, "failed to enable clocks\n");
+ goto err_clk_init;
+ }
+
+ return 0;
+
+err_clk_init:
+ pm_runtime_put_sync(dev);
+ pm_runtime_disable(dev);
+ reset_control_assert(port->mac_reset);
+ phy_power_off(port->phy);
+err_phy_on:
+ phy_exit(port->phy);
+err_phy_init:
+ reset_control_assert(port->phy_reset);
+
+ return err;
+}
+
+static void mtk_pcie_power_down(struct mtk_pcie_port *port)
+{
+ clk_bulk_disable_unprepare(port->num_clks, port->clks);
+
+ pm_runtime_put_sync(port->dev);
+ pm_runtime_disable(port->dev);
+ reset_control_assert(port->mac_reset);
+
+ phy_power_off(port->phy);
+ phy_exit(port->phy);
+ reset_control_assert(port->phy_reset);
+}
+
+static int mtk_pcie_setup(struct mtk_pcie_port *port)
+{
+ int err;
+
+ err = mtk_pcie_parse_port(port);
+ if (err)
+ return err;
+
+ /* Don't touch the hardware registers before power up */
+ err = mtk_pcie_power_up(port);
+ if (err)
+ return err;
+
+ /* Try link up */
+ err = mtk_pcie_startup_port(port);
+ if (err)
+ goto err_setup;
+
+ err = mtk_pcie_setup_irq(port);
+ if (err)
+ goto err_setup;
+
+ return 0;
+
+err_setup:
+ mtk_pcie_power_down(port);
+
+ return err;
+}
+
+static int mtk_pcie_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct mtk_pcie_port *port;
+ struct pci_host_bridge *host;
+ int err;
+
+ host = devm_pci_alloc_host_bridge(dev, sizeof(*port));
+ if (!host)
+ return -ENOMEM;
+
+ port = pci_host_bridge_priv(host);
+
+ port->dev = dev;
+ platform_set_drvdata(pdev, port);
+
+ err = mtk_pcie_setup(port);
+ if (err)
+ return err;
+
+ host->ops = &mtk_pcie_ops;
+ host->sysdata = port;
+
+ err = pci_host_probe(host);
+ if (err) {
+ mtk_pcie_irq_teardown(port);
+ mtk_pcie_power_down(port);
+ return err;
+ }
+
+ return 0;
+}
+
+static int mtk_pcie_remove(struct platform_device *pdev)
+{
+ struct mtk_pcie_port *port = platform_get_drvdata(pdev);
+ struct pci_host_bridge *host = pci_host_bridge_from_priv(port);
+
+ pci_lock_rescan_remove();
+ pci_stop_root_bus(host->bus);
+ pci_remove_root_bus(host->bus);
+ pci_unlock_rescan_remove();
+
+ mtk_pcie_irq_teardown(port);
+ mtk_pcie_power_down(port);
+
+ return 0;
+}
+
+static void __maybe_unused mtk_pcie_irq_save(struct mtk_pcie_port *port)
+{
+ int i;
+
+ raw_spin_lock(&port->irq_lock);
+
+ port->saved_irq_state = readl_relaxed(port->base + PCIE_INT_ENABLE_REG);
+
+ for (i = 0; i < PCIE_MSI_SET_NUM; i++) {
+ struct mtk_msi_set *msi_set = &port->msi_sets[i];
+
+ msi_set->saved_irq_state = readl_relaxed(msi_set->base +
+ PCIE_MSI_SET_ENABLE_OFFSET);
+ }
+
+ raw_spin_unlock(&port->irq_lock);
+}
+
+static void __maybe_unused mtk_pcie_irq_restore(struct mtk_pcie_port *port)
+{
+ int i;
+
+ raw_spin_lock(&port->irq_lock);
+
+ writel_relaxed(port->saved_irq_state, port->base + PCIE_INT_ENABLE_REG);
+
+ for (i = 0; i < PCIE_MSI_SET_NUM; i++) {
+ struct mtk_msi_set *msi_set = &port->msi_sets[i];
+
+ writel_relaxed(msi_set->saved_irq_state,
+ msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET);
+ }
+
+ raw_spin_unlock(&port->irq_lock);
+}
+
+static int __maybe_unused mtk_pcie_turn_off_link(struct mtk_pcie_port *port)
+{
+ u32 val;
+
+ val = readl_relaxed(port->base + PCIE_ICMD_PM_REG);
+ val |= PCIE_TURN_OFF_LINK;
+ writel_relaxed(val, port->base + PCIE_ICMD_PM_REG);
+
+ /* Check the link is L2 */
+ return readl_poll_timeout(port->base + PCIE_LTSSM_STATUS_REG, val,
+ (PCIE_LTSSM_STATE(val) ==
+ PCIE_LTSSM_STATE_L2_IDLE), 20,
+ 50 * USEC_PER_MSEC);
+}
+
+static int __maybe_unused mtk_pcie_suspend_noirq(struct device *dev)
+{
+ struct mtk_pcie_port *port = dev_get_drvdata(dev);
+ int err;
+ u32 val;
+
+ /* Trigger link to L2 state */
+ err = mtk_pcie_turn_off_link(port);
+ if (err) {
+ dev_err(port->dev, "cannot enter L2 state\n");
+ return err;
+ }
+
+ /* Pull down the PERST# pin */
+ val = readl_relaxed(port->base + PCIE_RST_CTRL_REG);
+ val |= PCIE_PE_RSTB;
+ writel_relaxed(val, port->base + PCIE_RST_CTRL_REG);
+
+ dev_dbg(port->dev, "entered L2 states successfully");
+
+ mtk_pcie_irq_save(port);
+ mtk_pcie_power_down(port);
+
+ return 0;
+}
+
+static int __maybe_unused mtk_pcie_resume_noirq(struct device *dev)
+{
+ struct mtk_pcie_port *port = dev_get_drvdata(dev);
+ int err;
+
+ err = mtk_pcie_power_up(port);
+ if (err)
+ return err;
+
+ err = mtk_pcie_startup_port(port);
+ if (err) {
+ mtk_pcie_power_down(port);
+ return err;
+ }
+
+ mtk_pcie_irq_restore(port);
+
+ return 0;
+}
+
+static const struct dev_pm_ops mtk_pcie_pm_ops = {
+ SET_NOIRQ_SYSTEM_SLEEP_PM_OPS(mtk_pcie_suspend_noirq,
+ mtk_pcie_resume_noirq)
+};
+
+static const struct of_device_id mtk_pcie_of_match[] = {
+ { .compatible = "mediatek,mt8192-pcie" },
+ {},
+};
+
+static struct platform_driver mtk_pcie_driver = {
+ .probe = mtk_pcie_probe,
+ .remove = mtk_pcie_remove,
+ .driver = {
+ .name = "mtk-pcie",
+ .of_match_table = mtk_pcie_of_match,
+ .pm = &mtk_pcie_pm_ops,
+ },
+};
+
+module_platform_driver(mtk_pcie_driver);
+MODULE_LICENSE("GPL v2");
* struct mtk_pcie_soc - differentiate between host generations
* @need_fix_class_id: whether this host's class ID needed to be fixed or not
* @need_fix_device_id: whether this host's device ID needed to be fixed or not
+ * @no_msi: Bridge has no MSI support, and relies on an external block
* @device_id: device ID which this host need to be fixed
* @ops: pointer to configuration access functions
* @startup: pointer to controller setting functions
struct mtk_pcie_soc {
bool need_fix_class_id;
bool need_fix_device_id;
+ bool no_msi;
unsigned int device_id;
struct pci_ops *ops;
int (*startup)(struct mtk_pcie_port *port);
static int mtk_pcie_startup_port(struct mtk_pcie_port *port)
{
struct mtk_pcie *pcie = port->pcie;
- u32 func = PCI_FUNC(port->slot << 3);
+ u32 func = PCI_FUNC(port->slot);
u32 slot = PCI_SLOT(port->slot << 3);
u32 val;
int err;
host->ops = pcie->soc->ops;
host->sysdata = pcie;
+ host->msi_domain = pcie->soc->no_msi;
err = pci_host_probe(host);
if (err)
};
static const struct mtk_pcie_soc mtk_pcie_soc_v1 = {
+ .no_msi = true,
.ops = &mtk_pcie_ops,
.startup = mtk_pcie_startup_port,
};
{ .compatible = "mediatek,mt7629-pcie", .data = &mtk_pcie_soc_mt7629 },
{},
};
+MODULE_DEVICE_TABLE(of, mtk_pcie_ids);
static struct platform_driver mtk_pcie_driver = {
.probe = mtk_pcie_probe,
LOCAL_EVENT_CAUSE(PM_MSI_INT_SYS_ERR, "system error"),
};
-struct event_map pcie_event_to_event[] = {
+static struct event_map pcie_event_to_event[] = {
PCIE_EVENT_TO_EVENT_MAP(L2_EXIT),
PCIE_EVENT_TO_EVENT_MAP(HOTRST_EXIT),
PCIE_EVENT_TO_EVENT_MAP(DLUP_EXIT),
};
-struct event_map sec_error_to_event[] = {
+static struct event_map sec_error_to_event[] = {
SEC_ERROR_TO_EVENT_MAP(TX_RAM_SEC_ERR),
SEC_ERROR_TO_EVENT_MAP(RX_RAM_SEC_ERR),
SEC_ERROR_TO_EVENT_MAP(PCIE2AXI_RAM_SEC_ERR),
SEC_ERROR_TO_EVENT_MAP(AXI2PCIE_RAM_SEC_ERR),
};
-struct event_map ded_error_to_event[] = {
+static struct event_map ded_error_to_event[] = {
DED_ERROR_TO_EVENT_MAP(TX_RAM_DED_ERR),
DED_ERROR_TO_EVENT_MAP(RX_RAM_DED_ERR),
DED_ERROR_TO_EVENT_MAP(PCIE2AXI_RAM_DED_ERR),
DED_ERROR_TO_EVENT_MAP(AXI2PCIE_RAM_DED_ERR),
};
-struct event_map local_status_to_event[] = {
+static struct event_map local_status_to_event[] = {
LOCAL_STATUS_TO_EVENT_MAP(DMA_END_ENGINE_0),
LOCAL_STATUS_TO_EVENT_MAP(DMA_END_ENGINE_1),
LOCAL_STATUS_TO_EVENT_MAP(DMA_ERROR_ENGINE_0),
}
irq = platform_get_irq(pdev, 0);
- if (irq < 0) {
- dev_err(dev, "unable to request IRQ%d\n", irq);
+ if (irq < 0)
return -ENODEV;
- }
for (i = 0; i < NUM_EVENTS; i++) {
event_irq = irq_create_mapping(port->event_domain, i);
struct rcar_msi {
DECLARE_BITMAP(used, INT_PCI_MSI_NR);
struct irq_domain *domain;
- struct msi_controller chip;
- unsigned long pages;
- struct mutex lock;
+ struct mutex map_lock;
+ spinlock_t mask_lock;
int irq1;
int irq2;
};
-static inline struct rcar_msi *to_rcar_msi(struct msi_controller *chip)
-{
- return container_of(chip, struct rcar_msi, chip);
-}
-
/* Structure representing the PCIe interface */
struct rcar_pcie_host {
struct rcar_pcie pcie;
int (*phy_init_fn)(struct rcar_pcie_host *host);
};
+static struct rcar_pcie_host *msi_to_host(struct rcar_msi *msi)
+{
+ return container_of(msi, struct rcar_pcie_host, msi);
+}
+
static u32 rcar_read_conf(struct rcar_pcie *pcie, int where)
{
unsigned int shift = BITS_PER_BYTE * (where & 3);
bridge->sysdata = host;
bridge->ops = &rcar_pcie_ops;
- if (IS_ENABLED(CONFIG_PCI_MSI))
- bridge->msi = &host->msi.chip;
return pci_host_probe(bridge);
}
return err;
}
-static int rcar_msi_alloc(struct rcar_msi *chip)
-{
- int msi;
-
- mutex_lock(&chip->lock);
-
- msi = find_first_zero_bit(chip->used, INT_PCI_MSI_NR);
- if (msi < INT_PCI_MSI_NR)
- set_bit(msi, chip->used);
- else
- msi = -ENOSPC;
-
- mutex_unlock(&chip->lock);
-
- return msi;
-}
-
-static int rcar_msi_alloc_region(struct rcar_msi *chip, int no_irqs)
-{
- int msi;
-
- mutex_lock(&chip->lock);
- msi = bitmap_find_free_region(chip->used, INT_PCI_MSI_NR,
- order_base_2(no_irqs));
- mutex_unlock(&chip->lock);
-
- return msi;
-}
-
-static void rcar_msi_free(struct rcar_msi *chip, unsigned long irq)
-{
- mutex_lock(&chip->lock);
- clear_bit(irq, chip->used);
- mutex_unlock(&chip->lock);
-}
-
static irqreturn_t rcar_pcie_msi_irq(int irq, void *data)
{
struct rcar_pcie_host *host = data;
unsigned int index = find_first_bit(®, 32);
unsigned int msi_irq;
- /* clear the interrupt */
- rcar_pci_write_reg(pcie, 1 << index, PCIEMSIFR);
-
- msi_irq = irq_find_mapping(msi->domain, index);
+ msi_irq = irq_find_mapping(msi->domain->parent, index);
if (msi_irq) {
- if (test_bit(index, msi->used))
- generic_handle_irq(msi_irq);
- else
- dev_info(dev, "unhandled MSI\n");
+ generic_handle_irq(msi_irq);
} else {
/* Unknown MSI, just clear it */
dev_dbg(dev, "unexpected MSI\n");
+ rcar_pci_write_reg(pcie, BIT(index), PCIEMSIFR);
}
/* see if there's any more pending in this vector */
return IRQ_HANDLED;
}
-static int rcar_msi_setup_irq(struct msi_controller *chip, struct pci_dev *pdev,
- struct msi_desc *desc)
+static void rcar_msi_top_irq_ack(struct irq_data *d)
{
- struct rcar_msi *msi = to_rcar_msi(chip);
- struct rcar_pcie_host *host = container_of(chip, struct rcar_pcie_host,
- msi.chip);
- struct rcar_pcie *pcie = &host->pcie;
- struct msi_msg msg;
- unsigned int irq;
- int hwirq;
+ irq_chip_ack_parent(d);
+}
- hwirq = rcar_msi_alloc(msi);
- if (hwirq < 0)
- return hwirq;
+static void rcar_msi_top_irq_mask(struct irq_data *d)
+{
+ pci_msi_mask_irq(d);
+ irq_chip_mask_parent(d);
+}
- irq = irq_find_mapping(msi->domain, hwirq);
- if (!irq) {
- rcar_msi_free(msi, hwirq);
- return -EINVAL;
- }
+static void rcar_msi_top_irq_unmask(struct irq_data *d)
+{
+ pci_msi_unmask_irq(d);
+ irq_chip_unmask_parent(d);
+}
- irq_set_msi_desc(irq, desc);
+static struct irq_chip rcar_msi_top_chip = {
+ .name = "PCIe MSI",
+ .irq_ack = rcar_msi_top_irq_ack,
+ .irq_mask = rcar_msi_top_irq_mask,
+ .irq_unmask = rcar_msi_top_irq_unmask,
+};
- msg.address_lo = rcar_pci_read_reg(pcie, PCIEMSIALR) & ~MSIFE;
- msg.address_hi = rcar_pci_read_reg(pcie, PCIEMSIAUR);
- msg.data = hwirq;
+static void rcar_msi_irq_ack(struct irq_data *d)
+{
+ struct rcar_msi *msi = irq_data_get_irq_chip_data(d);
+ struct rcar_pcie *pcie = &msi_to_host(msi)->pcie;
- pci_write_msi_msg(irq, &msg);
+ /* clear the interrupt */
+ rcar_pci_write_reg(pcie, BIT(d->hwirq), PCIEMSIFR);
+}
- return 0;
+static void rcar_msi_irq_mask(struct irq_data *d)
+{
+ struct rcar_msi *msi = irq_data_get_irq_chip_data(d);
+ struct rcar_pcie *pcie = &msi_to_host(msi)->pcie;
+ unsigned long flags;
+ u32 value;
+
+ spin_lock_irqsave(&msi->mask_lock, flags);
+ value = rcar_pci_read_reg(pcie, PCIEMSIIER);
+ value &= ~BIT(d->hwirq);
+ rcar_pci_write_reg(pcie, value, PCIEMSIIER);
+ spin_unlock_irqrestore(&msi->mask_lock, flags);
}
-static int rcar_msi_setup_irqs(struct msi_controller *chip,
- struct pci_dev *pdev, int nvec, int type)
+static void rcar_msi_irq_unmask(struct irq_data *d)
{
- struct rcar_msi *msi = to_rcar_msi(chip);
- struct rcar_pcie_host *host = container_of(chip, struct rcar_pcie_host,
- msi.chip);
- struct rcar_pcie *pcie = &host->pcie;
- struct msi_desc *desc;
- struct msi_msg msg;
- unsigned int irq;
- int hwirq;
- int i;
+ struct rcar_msi *msi = irq_data_get_irq_chip_data(d);
+ struct rcar_pcie *pcie = &msi_to_host(msi)->pcie;
+ unsigned long flags;
+ u32 value;
+
+ spin_lock_irqsave(&msi->mask_lock, flags);
+ value = rcar_pci_read_reg(pcie, PCIEMSIIER);
+ value |= BIT(d->hwirq);
+ rcar_pci_write_reg(pcie, value, PCIEMSIIER);
+ spin_unlock_irqrestore(&msi->mask_lock, flags);
+}
- /* MSI-X interrupts are not supported */
- if (type == PCI_CAP_ID_MSIX)
- return -EINVAL;
+static int rcar_msi_set_affinity(struct irq_data *d, const struct cpumask *mask, bool force)
+{
+ return -EINVAL;
+}
- WARN_ON(!list_is_singular(&pdev->dev.msi_list));
- desc = list_entry(pdev->dev.msi_list.next, struct msi_desc, list);
+static void rcar_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+ struct rcar_msi *msi = irq_data_get_irq_chip_data(data);
+ struct rcar_pcie *pcie = &msi_to_host(msi)->pcie;
- hwirq = rcar_msi_alloc_region(msi, nvec);
- if (hwirq < 0)
- return -ENOSPC;
+ msg->address_lo = rcar_pci_read_reg(pcie, PCIEMSIALR) & ~MSIFE;
+ msg->address_hi = rcar_pci_read_reg(pcie, PCIEMSIAUR);
+ msg->data = data->hwirq;
+}
- irq = irq_find_mapping(msi->domain, hwirq);
- if (!irq)
- return -ENOSPC;
+static struct irq_chip rcar_msi_bottom_chip = {
+ .name = "Rcar MSI",
+ .irq_ack = rcar_msi_irq_ack,
+ .irq_mask = rcar_msi_irq_mask,
+ .irq_unmask = rcar_msi_irq_unmask,
+ .irq_set_affinity = rcar_msi_set_affinity,
+ .irq_compose_msi_msg = rcar_compose_msi_msg,
+};
- for (i = 0; i < nvec; i++) {
- /*
- * irq_create_mapping() called from rcar_pcie_probe() pre-
- * allocates descs, so there is no need to allocate descs here.
- * We can therefore assume that if irq_find_mapping() above
- * returns non-zero, then the descs are also successfully
- * allocated.
- */
- if (irq_set_msi_desc_off(irq, i, desc)) {
- /* TODO: clear */
- return -EINVAL;
- }
- }
+static int rcar_msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *args)
+{
+ struct rcar_msi *msi = domain->host_data;
+ unsigned int i;
+ int hwirq;
- desc->nvec_used = nvec;
- desc->msi_attrib.multiple = order_base_2(nvec);
+ mutex_lock(&msi->map_lock);
- msg.address_lo = rcar_pci_read_reg(pcie, PCIEMSIALR) & ~MSIFE;
- msg.address_hi = rcar_pci_read_reg(pcie, PCIEMSIAUR);
- msg.data = hwirq;
+ hwirq = bitmap_find_free_region(msi->used, INT_PCI_MSI_NR, order_base_2(nr_irqs));
- pci_write_msi_msg(irq, &msg);
+ mutex_unlock(&msi->map_lock);
+
+ if (hwirq < 0)
+ return -ENOSPC;
+
+ for (i = 0; i < nr_irqs; i++)
+ irq_domain_set_info(domain, virq + i, hwirq + i,
+ &rcar_msi_bottom_chip, domain->host_data,
+ handle_edge_irq, NULL, NULL);
return 0;
}
-static void rcar_msi_teardown_irq(struct msi_controller *chip, unsigned int irq)
+static void rcar_msi_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
{
- struct rcar_msi *msi = to_rcar_msi(chip);
- struct irq_data *d = irq_get_irq_data(irq);
-
- rcar_msi_free(msi, d->hwirq);
-}
+ struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+ struct rcar_msi *msi = domain->host_data;
-static struct irq_chip rcar_msi_irq_chip = {
- .name = "R-Car PCIe MSI",
- .irq_enable = pci_msi_unmask_irq,
- .irq_disable = pci_msi_mask_irq,
- .irq_mask = pci_msi_mask_irq,
- .irq_unmask = pci_msi_unmask_irq,
-};
+ mutex_lock(&msi->map_lock);
-static int rcar_msi_map(struct irq_domain *domain, unsigned int irq,
- irq_hw_number_t hwirq)
-{
- irq_set_chip_and_handler(irq, &rcar_msi_irq_chip, handle_simple_irq);
- irq_set_chip_data(irq, domain->host_data);
+ bitmap_release_region(msi->used, d->hwirq, order_base_2(nr_irqs));
- return 0;
+ mutex_unlock(&msi->map_lock);
}
-static const struct irq_domain_ops msi_domain_ops = {
- .map = rcar_msi_map,
+static const struct irq_domain_ops rcar_msi_domain_ops = {
+ .alloc = rcar_msi_domain_alloc,
+ .free = rcar_msi_domain_free,
+};
+
+static struct msi_domain_info rcar_msi_info = {
+ .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+ MSI_FLAG_MULTI_PCI_MSI),
+ .chip = &rcar_msi_top_chip,
};
-static void rcar_pcie_unmap_msi(struct rcar_pcie_host *host)
+static int rcar_allocate_domains(struct rcar_msi *msi)
{
- struct rcar_msi *msi = &host->msi;
- int i, irq;
+ struct rcar_pcie *pcie = &msi_to_host(msi)->pcie;
+ struct fwnode_handle *fwnode = dev_fwnode(pcie->dev);
+ struct irq_domain *parent;
+
+ parent = irq_domain_create_linear(fwnode, INT_PCI_MSI_NR,
+ &rcar_msi_domain_ops, msi);
+ if (!parent) {
+ dev_err(pcie->dev, "failed to create IRQ domain\n");
+ return -ENOMEM;
+ }
+ irq_domain_update_bus_token(parent, DOMAIN_BUS_NEXUS);
- for (i = 0; i < INT_PCI_MSI_NR; i++) {
- irq = irq_find_mapping(msi->domain, i);
- if (irq > 0)
- irq_dispose_mapping(irq);
+ msi->domain = pci_msi_create_irq_domain(fwnode, &rcar_msi_info, parent);
+ if (!msi->domain) {
+ dev_err(pcie->dev, "failed to create MSI domain\n");
+ irq_domain_remove(parent);
+ return -ENOMEM;
}
- irq_domain_remove(msi->domain);
+ return 0;
}
-static void rcar_pcie_hw_enable_msi(struct rcar_pcie_host *host)
+static void rcar_free_domains(struct rcar_msi *msi)
{
- struct rcar_pcie *pcie = &host->pcie;
- struct rcar_msi *msi = &host->msi;
- unsigned long base;
-
- /* setup MSI data target */
- base = virt_to_phys((void *)msi->pages);
+ struct irq_domain *parent = msi->domain->parent;
- rcar_pci_write_reg(pcie, lower_32_bits(base) | MSIFE, PCIEMSIALR);
- rcar_pci_write_reg(pcie, upper_32_bits(base), PCIEMSIAUR);
-
- /* enable all MSI interrupts */
- rcar_pci_write_reg(pcie, 0xffffffff, PCIEMSIIER);
+ irq_domain_remove(msi->domain);
+ irq_domain_remove(parent);
}
static int rcar_pcie_enable_msi(struct rcar_pcie_host *host)
struct rcar_pcie *pcie = &host->pcie;
struct device *dev = pcie->dev;
struct rcar_msi *msi = &host->msi;
- int err, i;
-
- mutex_init(&msi->lock);
+ struct resource res;
+ int err;
- msi->chip.dev = dev;
- msi->chip.setup_irq = rcar_msi_setup_irq;
- msi->chip.setup_irqs = rcar_msi_setup_irqs;
- msi->chip.teardown_irq = rcar_msi_teardown_irq;
+ mutex_init(&msi->map_lock);
+ spin_lock_init(&msi->mask_lock);
- msi->domain = irq_domain_add_linear(dev->of_node, INT_PCI_MSI_NR,
- &msi_domain_ops, &msi->chip);
- if (!msi->domain) {
- dev_err(dev, "failed to create IRQ domain\n");
- return -ENOMEM;
- }
+ err = of_address_to_resource(dev->of_node, 0, &res);
+ if (err)
+ return err;
- for (i = 0; i < INT_PCI_MSI_NR; i++)
- irq_create_mapping(msi->domain, i);
+ err = rcar_allocate_domains(msi);
+ if (err)
+ return err;
/* Two irqs are for MSI, but they are also used for non-MSI irqs */
err = devm_request_irq(dev, msi->irq1, rcar_pcie_msi_irq,
IRQF_SHARED | IRQF_NO_THREAD,
- rcar_msi_irq_chip.name, host);
+ rcar_msi_bottom_chip.name, host);
if (err < 0) {
dev_err(dev, "failed to request IRQ: %d\n", err);
goto err;
err = devm_request_irq(dev, msi->irq2, rcar_pcie_msi_irq,
IRQF_SHARED | IRQF_NO_THREAD,
- rcar_msi_irq_chip.name, host);
+ rcar_msi_bottom_chip.name, host);
if (err < 0) {
dev_err(dev, "failed to request IRQ: %d\n", err);
goto err;
}
- /* setup MSI data target */
- msi->pages = __get_free_pages(GFP_KERNEL | GFP_DMA32, 0);
- rcar_pcie_hw_enable_msi(host);
+ /* disable all MSIs */
+ rcar_pci_write_reg(pcie, 0, PCIEMSIIER);
+
+ /*
+ * Setup MSI data target using RC base address address, which
+ * is guaranteed to be in the low 32bit range on any RCar HW.
+ */
+ rcar_pci_write_reg(pcie, lower_32_bits(res.start) | MSIFE, PCIEMSIALR);
+ rcar_pci_write_reg(pcie, upper_32_bits(res.start), PCIEMSIAUR);
return 0;
err:
- rcar_pcie_unmap_msi(host);
+ rcar_free_domains(msi);
return err;
}
static void rcar_pcie_teardown_msi(struct rcar_pcie_host *host)
{
struct rcar_pcie *pcie = &host->pcie;
- struct rcar_msi *msi = &host->msi;
/* Disable all MSI interrupts */
rcar_pci_write_reg(pcie, 0, PCIEMSIIER);
/* Disable address decoding of the MSI interrupt, MSIFE */
rcar_pci_write_reg(pcie, 0, PCIEMSIALR);
- free_pages(msi->pages, 0);
-
- rcar_pcie_unmap_msi(host);
+ rcar_free_domains(&host->msi);
}
static int rcar_pcie_get_resources(struct rcar_pcie_host *host)
dev_info(dev, "PCIe x%d: link up\n", (data >> 20) & 0x3f);
/* Enable MSI */
- if (IS_ENABLED(CONFIG_PCI_MSI))
- rcar_pcie_hw_enable_msi(host);
+ if (IS_ENABLED(CONFIG_PCI_MSI)) {
+ struct resource res;
+ u32 val;
+
+ of_address_to_resource(dev->of_node, 0, &res);
+ rcar_pci_write_reg(pcie, upper_32_bits(res.start), PCIEMSIAUR);
+ rcar_pci_write_reg(pcie, lower_32_bits(res.start) | MSIFE, PCIEMSIALR);
+
+ bitmap_to_arr32(&val, host->msi.used, INT_PCI_MSI_NR);
+ rcar_pci_write_reg(pcie, val, PCIEMSIIER);
+ }
rcar_pcie_hw_enable(host);
/* Bridge core config registers */
#define BRCFG_PCIE_RX0 0x00000000
+#define BRCFG_PCIE_RX1 0x00000004
#define BRCFG_INTERRUPT 0x00000010
#define BRCFG_PCIE_RX_MSG_FILTER 0x00000020
#define NWL_ECAM_VALUE_DEFAULT 12
#define CFG_DMA_REG_BAR GENMASK(2, 0)
+#define CFG_PCIE_CACHE GENMASK(7, 0)
#define INT_PCI_MSI_NR (2 * 32)
nwl_bridge_writel(pcie, CFG_ENABLE_MSG_FILTER_MASK,
BRCFG_PCIE_RX_MSG_FILTER);
+ /* This routes the PCIe DMA traffic to go through CCI path */
+ if (of_dma_is_coherent(dev->of_node))
+ nwl_bridge_writel(pcie, nwl_bridge_readl(pcie, BRCFG_PCIE_RX1) |
+ CFG_PCIE_CACHE, BRCFG_PCIE_RX1);
+
err = nwl_wait_for_link(pcie);
if (err)
return err;
/**
* struct xilinx_pcie_port - PCIe port information
* @reg_base: IO Mapped Register Base
- * @irq: Interrupt number
- * @msi_pages: MSI pages
* @dev: Device pointer
+ * @msi_map: Bitmap of allocated MSIs
+ * @map_lock: Mutex protecting the MSI allocation
* @msi_domain: MSI IRQ domain pointer
* @leg_domain: Legacy IRQ domain pointer
* @resources: Bus Resources
*/
struct xilinx_pcie_port {
void __iomem *reg_base;
- u32 irq;
- unsigned long msi_pages;
struct device *dev;
+ unsigned long msi_map[BITS_TO_LONGS(XILINX_NUM_MSI_IRQS)];
+ struct mutex map_lock;
struct irq_domain *msi_domain;
struct irq_domain *leg_domain;
struct list_head resources;
};
-static DECLARE_BITMAP(msi_irq_in_use, XILINX_NUM_MSI_IRQS);
-
static inline u32 pcie_read(struct xilinx_pcie_port *port, u32 reg)
{
return readl(port->reg_base + reg);
/* MSI functions */
-/**
- * xilinx_pcie_destroy_msi - Free MSI number
- * @irq: IRQ to be freed
- */
-static void xilinx_pcie_destroy_msi(unsigned int irq)
+static void xilinx_msi_top_irq_ack(struct irq_data *d)
{
- struct msi_desc *msi;
- struct xilinx_pcie_port *port;
- struct irq_data *d = irq_get_irq_data(irq);
- irq_hw_number_t hwirq = irqd_to_hwirq(d);
-
- if (!test_bit(hwirq, msi_irq_in_use)) {
- msi = irq_get_msi_desc(irq);
- port = msi_desc_to_pci_sysdata(msi);
- dev_err(port->dev, "Trying to free unused MSI#%d\n", irq);
- } else {
- clear_bit(hwirq, msi_irq_in_use);
- }
+ /*
+ * xilinx_pcie_intr_handler() will have performed the Ack.
+ * Eventually, this should be fixed and the Ack be moved in
+ * the respective callbacks for INTx and MSI.
+ */
}
-/**
- * xilinx_pcie_assign_msi - Allocate MSI number
- *
- * Return: A valid IRQ on success and error value on failure.
- */
-static int xilinx_pcie_assign_msi(void)
-{
- int pos;
-
- pos = find_first_zero_bit(msi_irq_in_use, XILINX_NUM_MSI_IRQS);
- if (pos < XILINX_NUM_MSI_IRQS)
- set_bit(pos, msi_irq_in_use);
- else
- return -ENOSPC;
+static struct irq_chip xilinx_msi_top_chip = {
+ .name = "PCIe MSI",
+ .irq_ack = xilinx_msi_top_irq_ack,
+};
- return pos;
+static int xilinx_msi_set_affinity(struct irq_data *d, const struct cpumask *mask, bool force)
+{
+ return -EINVAL;
}
-/**
- * xilinx_msi_teardown_irq - Destroy the MSI
- * @chip: MSI Chip descriptor
- * @irq: MSI IRQ to destroy
- */
-static void xilinx_msi_teardown_irq(struct msi_controller *chip,
- unsigned int irq)
+static void xilinx_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
{
- xilinx_pcie_destroy_msi(irq);
- irq_dispose_mapping(irq);
+ struct xilinx_pcie_port *pcie = irq_data_get_irq_chip_data(data);
+ phys_addr_t pa = ALIGN_DOWN(virt_to_phys(pcie), SZ_4K);
+
+ msg->address_lo = lower_32_bits(pa);
+ msg->address_hi = upper_32_bits(pa);
+ msg->data = data->hwirq;
}
-/**
- * xilinx_pcie_msi_setup_irq - Setup MSI request
- * @chip: MSI chip pointer
- * @pdev: PCIe device pointer
- * @desc: MSI descriptor pointer
- *
- * Return: '0' on success and error value on failure
- */
-static int xilinx_pcie_msi_setup_irq(struct msi_controller *chip,
- struct pci_dev *pdev,
- struct msi_desc *desc)
-{
- struct xilinx_pcie_port *port = pdev->bus->sysdata;
- unsigned int irq;
- int hwirq;
- struct msi_msg msg;
- phys_addr_t msg_addr;
+static struct irq_chip xilinx_msi_bottom_chip = {
+ .name = "Xilinx MSI",
+ .irq_set_affinity = xilinx_msi_set_affinity,
+ .irq_compose_msi_msg = xilinx_compose_msi_msg,
+};
- hwirq = xilinx_pcie_assign_msi();
- if (hwirq < 0)
- return hwirq;
+static int xilinx_msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs, void *args)
+{
+ struct xilinx_pcie_port *port = domain->host_data;
+ int hwirq, i;
- irq = irq_create_mapping(port->msi_domain, hwirq);
- if (!irq)
- return -EINVAL;
+ mutex_lock(&port->map_lock);
- irq_set_msi_desc(irq, desc);
+ hwirq = bitmap_find_free_region(port->msi_map, XILINX_NUM_MSI_IRQS, order_base_2(nr_irqs));
- msg_addr = virt_to_phys((void *)port->msi_pages);
+ mutex_unlock(&port->map_lock);
- msg.address_hi = 0;
- msg.address_lo = msg_addr;
- msg.data = irq;
+ if (hwirq < 0)
+ return -ENOSPC;
- pci_write_msi_msg(irq, &msg);
+ for (i = 0; i < nr_irqs; i++)
+ irq_domain_set_info(domain, virq + i, hwirq + i,
+ &xilinx_msi_bottom_chip, domain->host_data,
+ handle_edge_irq, NULL, NULL);
return 0;
}
-/* MSI Chip Descriptor */
-static struct msi_controller xilinx_pcie_msi_chip = {
- .setup_irq = xilinx_pcie_msi_setup_irq,
- .teardown_irq = xilinx_msi_teardown_irq,
-};
+static void xilinx_msi_domain_free(struct irq_domain *domain, unsigned int virq,
+ unsigned int nr_irqs)
+{
+ struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+ struct xilinx_pcie_port *port = domain->host_data;
-/* HW Interrupt Chip Descriptor */
-static struct irq_chip xilinx_msi_irq_chip = {
- .name = "Xilinx PCIe MSI",
- .irq_enable = pci_msi_unmask_irq,
- .irq_disable = pci_msi_mask_irq,
- .irq_mask = pci_msi_mask_irq,
- .irq_unmask = pci_msi_unmask_irq,
-};
+ mutex_lock(&port->map_lock);
-/**
- * xilinx_pcie_msi_map - Set the handler for the MSI and mark IRQ as valid
- * @domain: IRQ domain
- * @irq: Virtual IRQ number
- * @hwirq: HW interrupt number
- *
- * Return: Always returns 0.
- */
-static int xilinx_pcie_msi_map(struct irq_domain *domain, unsigned int irq,
- irq_hw_number_t hwirq)
-{
- irq_set_chip_and_handler(irq, &xilinx_msi_irq_chip, handle_simple_irq);
- irq_set_chip_data(irq, domain->host_data);
+ bitmap_release_region(port->msi_map, d->hwirq, order_base_2(nr_irqs));
- return 0;
+ mutex_unlock(&port->map_lock);
}
-/* IRQ Domain operations */
-static const struct irq_domain_ops msi_domain_ops = {
- .map = xilinx_pcie_msi_map,
+static const struct irq_domain_ops xilinx_msi_domain_ops = {
+ .alloc = xilinx_msi_domain_alloc,
+ .free = xilinx_msi_domain_free,
};
-/**
- * xilinx_pcie_enable_msi - Enable MSI support
- * @port: PCIe port information
- */
-static int xilinx_pcie_enable_msi(struct xilinx_pcie_port *port)
+static struct msi_domain_info xilinx_msi_info = {
+ .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS),
+ .chip = &xilinx_msi_top_chip,
+};
+
+static int xilinx_allocate_msi_domains(struct xilinx_pcie_port *pcie)
{
- phys_addr_t msg_addr;
+ struct fwnode_handle *fwnode = dev_fwnode(pcie->dev);
+ struct irq_domain *parent;
- port->msi_pages = __get_free_pages(GFP_KERNEL, 0);
- if (!port->msi_pages)
+ parent = irq_domain_create_linear(fwnode, XILINX_NUM_MSI_IRQS,
+ &xilinx_msi_domain_ops, pcie);
+ if (!parent) {
+ dev_err(pcie->dev, "failed to create IRQ domain\n");
return -ENOMEM;
+ }
+ irq_domain_update_bus_token(parent, DOMAIN_BUS_NEXUS);
- msg_addr = virt_to_phys((void *)port->msi_pages);
- pcie_write(port, 0x0, XILINX_PCIE_REG_MSIBASE1);
- pcie_write(port, msg_addr, XILINX_PCIE_REG_MSIBASE2);
+ pcie->msi_domain = pci_msi_create_irq_domain(fwnode, &xilinx_msi_info, parent);
+ if (!pcie->msi_domain) {
+ dev_err(pcie->dev, "failed to create MSI domain\n");
+ irq_domain_remove(parent);
+ return -ENOMEM;
+ }
return 0;
}
+static void xilinx_free_msi_domains(struct xilinx_pcie_port *pcie)
+{
+ struct irq_domain *parent = pcie->msi_domain->parent;
+
+ irq_domain_remove(pcie->msi_domain);
+ irq_domain_remove(parent);
+}
+
/* INTx Functions */
/**
}
if (status & (XILINX_PCIE_INTR_INTX | XILINX_PCIE_INTR_MSI)) {
+ unsigned int irq;
+
val = pcie_read(port, XILINX_PCIE_REG_RPIFR1);
/* Check whether interrupt valid */
if (val & XILINX_PCIE_RPIFR1_MSI_INTR) {
val = pcie_read(port, XILINX_PCIE_REG_RPIFR2) &
XILINX_PCIE_RPIFR2_MSG_DATA;
+ irq = irq_find_mapping(port->msi_domain->parent, val);
} else {
val = (val & XILINX_PCIE_RPIFR1_INTR_MASK) >>
XILINX_PCIE_RPIFR1_INTR_SHIFT;
- val = irq_find_mapping(port->leg_domain, val);
+ irq = irq_find_mapping(port->leg_domain, val);
}
/* Clear interrupt FIFO register 1 */
pcie_write(port, XILINX_PCIE_RPIFR1_ALL_MASK,
XILINX_PCIE_REG_RPIFR1);
- /* Handle the interrupt */
- if (IS_ENABLED(CONFIG_PCI_MSI) ||
- !(val & XILINX_PCIE_RPIFR1_MSI_INTR))
- generic_handle_irq(val);
+ if (irq)
+ generic_handle_irq(irq);
}
if (status & XILINX_PCIE_INTR_SLV_UNSUPP)
static int xilinx_pcie_init_irq_domain(struct xilinx_pcie_port *port)
{
struct device *dev = port->dev;
- struct device_node *node = dev->of_node;
struct device_node *pcie_intc_node;
int ret;
/* Setup INTx */
- pcie_intc_node = of_get_next_child(node, NULL);
+ pcie_intc_node = of_get_next_child(dev->of_node, NULL);
if (!pcie_intc_node) {
dev_err(dev, "No PCIe Intc node found\n");
return -ENODEV;
/* Setup MSI */
if (IS_ENABLED(CONFIG_PCI_MSI)) {
- port->msi_domain = irq_domain_add_linear(node,
- XILINX_NUM_MSI_IRQS,
- &msi_domain_ops,
- &xilinx_pcie_msi_chip);
- if (!port->msi_domain) {
- dev_err(dev, "Failed to get a MSI IRQ domain\n");
- return -ENODEV;
- }
+ phys_addr_t pa = ALIGN_DOWN(virt_to_phys(port), SZ_4K);
- ret = xilinx_pcie_enable_msi(port);
+ ret = xilinx_allocate_msi_domains(port);
if (ret)
return ret;
+
+ pcie_write(port, upper_32_bits(pa), XILINX_PCIE_REG_MSIBASE1);
+ pcie_write(port, lower_32_bits(pa), XILINX_PCIE_REG_MSIBASE2);
}
return 0;
struct device *dev = port->dev;
struct device_node *node = dev->of_node;
struct resource regs;
+ unsigned int irq;
int err;
err = of_address_to_resource(node, 0, ®s);
if (IS_ERR(port->reg_base))
return PTR_ERR(port->reg_base);
- port->irq = irq_of_parse_and_map(node, 0);
- err = devm_request_irq(dev, port->irq, xilinx_pcie_intr_handler,
+ irq = irq_of_parse_and_map(node, 0);
+ err = devm_request_irq(dev, irq, xilinx_pcie_intr_handler,
IRQF_SHARED | IRQF_NO_THREAD,
"xilinx-pcie", port);
if (err) {
- dev_err(dev, "unable to request irq %d\n", port->irq);
+ dev_err(dev, "unable to request irq %d\n", irq);
return err;
}
return -ENODEV;
port = pci_host_bridge_priv(bridge);
-
+ mutex_init(&port->map_lock);
port->dev = dev;
err = xilinx_pcie_parse_dt(port);
bridge->sysdata = port;
bridge->ops = &xilinx_pcie_ops;
-#ifdef CONFIG_PCI_MSI
- xilinx_pcie_msi_chip.dev = dev;
- bridge->msi = &xilinx_pcie_msi_chip;
-#endif
- return pci_host_probe(bridge);
+ err = pci_host_probe(bridge);
+ if (err)
+ xilinx_free_msi_domains(port);
+
+ return err;
}
static const struct of_device_id xilinx_pcie_of_match[] = {
#define BUS_RESTRICT_CAP(vmcap) (vmcap & 0x1)
#define PCI_REG_VMCONFIG 0x44
#define BUS_RESTRICT_CFG(vmcfg) ((vmcfg >> 8) & 0x3)
+#define VMCONFIG_MSI_REMAP 0x2
#define PCI_REG_VMLOCK 0x70
#define MB2_SHADOW_EN(vmlock) (vmlock & 0x2)
* be used for MSI remapping
*/
VMD_FEAT_OFFSET_FIRST_VECTOR = (1 << 3),
+
+ /*
+ * Device can bypass remapping MSI-X transactions into its MSI-X table,
+ * avoiding the requirement of a VMD MSI domain for child device
+ * interrupt handling.
+ */
+ VMD_FEAT_CAN_BYPASS_MSI_REMAP = (1 << 4),
};
/*
.chip = &vmd_msi_controller,
};
+static void vmd_set_msi_remapping(struct vmd_dev *vmd, bool enable)
+{
+ u16 reg;
+
+ pci_read_config_word(vmd->dev, PCI_REG_VMCONFIG, ®);
+ reg = enable ? (reg & ~VMCONFIG_MSI_REMAP) :
+ (reg | VMCONFIG_MSI_REMAP);
+ pci_write_config_word(vmd->dev, PCI_REG_VMCONFIG, reg);
+}
+
static int vmd_create_irq_domain(struct vmd_dev *vmd)
{
struct fwnode_handle *fn;
static void vmd_remove_irq_domain(struct vmd_dev *vmd)
{
+ /*
+ * Some production BIOS won't enable remapping between soft reboots.
+ * Ensure remapping is restored before unloading the driver.
+ */
+ if (!vmd->msix_count)
+ vmd_set_msi_remapping(vmd, true);
+
if (vmd->irq_domain) {
struct fwnode_handle *fn = vmd->irq_domain->fwnode;
sd->node = pcibus_to_node(vmd->dev->bus);
- ret = vmd_create_irq_domain(vmd);
- if (ret)
- return ret;
-
/*
- * Override the irq domain bus token so the domain can be distinguished
- * from a regular PCI/MSI domain.
+ * Currently MSI remapping must be enabled in guest passthrough mode
+ * due to some missing interrupt remapping plumbing. This is probably
+ * acceptable because the guest is usually CPU-limited and MSI
+ * remapping doesn't become a performance bottleneck.
*/
- irq_domain_update_bus_token(vmd->irq_domain, DOMAIN_BUS_VMD_MSI);
+ if (!(features & VMD_FEAT_CAN_BYPASS_MSI_REMAP) ||
+ offset[0] || offset[1]) {
+ ret = vmd_alloc_irqs(vmd);
+ if (ret)
+ return ret;
+
+ vmd_set_msi_remapping(vmd, true);
+
+ ret = vmd_create_irq_domain(vmd);
+ if (ret)
+ return ret;
+
+ /*
+ * Override the IRQ domain bus token so the domain can be
+ * distinguished from a regular PCI/MSI domain.
+ */
+ irq_domain_update_bus_token(vmd->irq_domain, DOMAIN_BUS_VMD_MSI);
+ } else {
+ vmd_set_msi_remapping(vmd, false);
+ }
pci_add_resource(&resources, &vmd->resources[0]);
pci_add_resource_offset(&resources, &vmd->resources[1], offset[0]);
if (features & VMD_FEAT_OFFSET_FIRST_VECTOR)
vmd->first_vec = 1;
- err = vmd_alloc_irqs(vmd);
- if (err)
- return err;
-
spin_lock_init(&vmd->cfg_lock);
pci_set_drvdata(dev, vmd);
err = vmd_enable_domain(vmd, features);
.driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP,},
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_VMD_28C0),
.driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW |
- VMD_FEAT_HAS_BUS_RESTRICTIONS,},
+ VMD_FEAT_HAS_BUS_RESTRICTIONS |
+ VMD_FEAT_CAN_BYPASS_MSI_REMAP,},
{PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x467f),
.driver_data = VMD_FEAT_HAS_MEMBAR_SHADOW_VSCAP |
VMD_FEAT_HAS_BUS_RESTRICTIONS |
// SPDX-License-Identifier: GPL-2.0
-/**
+/*
* Endpoint Function Driver to implement Non-Transparent Bridge functionality
*
* Copyright (C) 2020 Texas Instruments
/**
* epf_ntb_peer_spad_bar_clear() - Clear Peer Scratchpad BAR
- * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @ntb_epc: EPC associated with one of the HOST which holds peer's outbound
+ * address.
*
*+-----------------+------->+------------------+ +-----------------+
*| BAR0 | | CONFIG REGION | | BAR0 |
/**
* epf_ntb_peer_spad_bar_set() - Set peer scratchpad BAR
* @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
*
*+-----------------+------->+------------------+ +-----------------+
*| BAR0 | | CONFIG REGION | | BAR0 |
/**
* epf_ntb_config_sspad_bar_clear() - Clear Config + Self scratchpad BAR
- * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @ntb_epc: EPC associated with one of the HOST which holds peer's outbound
+ * address.
*
* +-----------------+------->+------------------+ +-----------------+
* | BAR0 | | CONFIG REGION | | BAR0 |
/**
* epf_ntb_config_sspad_bar_set() - Set Config + Self scratchpad BAR
- * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @ntb_epc: EPC associated with one of the HOST which holds peer's outbound
+ * address.
*
* +-----------------+------->+------------------+ +-----------------+
* | BAR0 | | CONFIG REGION | | BAR0 |
/**
* epf_ntb_alloc_peer_mem() - Allocate memory in peer's outbound address space
+ * @dev: The PCI device.
* @ntb_epc: EPC associated with one of the HOST whose BAR holds peer's outbound
* address
* @bar: BAR of @ntb_epc in for which memory has to be allocated (could be
* epf_ntb_init_epc_bar() - Identify BARs to be used for each of the NTB
* constructs (scratchpad region, doorbell, memorywindow)
* @ntb: NTB device that facilitates communication between HOST1 and HOST2
- * @type: PRIMARY interface or SECONDARY interface
*
* Wrapper to epf_ntb_init_epc_bar_interface() to identify the free BARs
* to be used for each of BAR_CONFIG, BAR_PEER_SPAD, BAR_DB_MW1, BAR_MW2,
/**
* epf_ntb_add_cfs() - Add configfs directory specific to NTB
* @epf: NTB endpoint function device
+ * @group: A pointer to the config_group structure referencing a group of
+ * config_items of a specific type that belong to a specific sub-system.
*
* Add configfs directory specific to NTB. This directory will hold
* NTB specific properties like db_count, spad_count, num_mws etc.,
// SPDX-License-Identifier: GPL-2.0
-/**
+/*
* Test driver to test endpoint functionality
*
* Copyright (C) 2017 Texas Instruments
return -EINVAL;
epc_features = pci_epc_get_features(epc, epf->func_no);
- if (epc_features) {
- linkup_notifier = epc_features->linkup_notifier;
- core_init_notifier = epc_features->core_init_notifier;
- test_reg_bar = pci_epc_get_first_free_bar(epc_features);
- if (test_reg_bar < 0)
- return -EINVAL;
- pci_epf_configure_bar(epf, epc_features);
+ if (!epc_features) {
+ dev_err(&epf->dev, "epc_features not implemented\n");
+ return -EOPNOTSUPP;
}
+ linkup_notifier = epc_features->linkup_notifier;
+ core_init_notifier = epc_features->core_init_notifier;
+ test_reg_bar = pci_epc_get_first_free_bar(epc_features);
+ if (test_reg_bar < 0)
+ return -EINVAL;
+ pci_epf_configure_bar(epf, epc_features);
+
epf_test->test_reg_bar = test_reg_bar;
epf_test->epc_features = epc_features;
ret = pci_epf_register_driver(&test_driver);
if (ret) {
+ destroy_workqueue(kpcitest_workqueue);
pr_err("Failed to register pci epf test driver --> %d\n", ret);
return ret;
}
static void __exit pci_epf_test_exit(void)
{
+ if (kpcitest_workqueue)
+ destroy_workqueue(kpcitest_workqueue);
pci_epf_unregister_driver(&test_driver);
}
module_exit(pci_epf_test_exit);
* pci_epc_remove_epf() - remove PCI endpoint function from endpoint controller
* @epc: the EPC device from which the endpoint function should be removed
* @epf: the endpoint function to be removed
+ * @type: identifies if the EPC is connected to the primary or secondary
+ * interface of EPF
*
* Invoke to remove PCI endpoint function from the endpoint controller.
*/
void pci_epf_free_space(struct pci_epf *epf, void *addr, enum pci_barno bar,
enum pci_epc_interface_type type)
{
- struct device *dev = epf->epc->dev.parent;
+ struct device *dev;
struct pci_epf_bar *epf_bar;
struct pci_epc *epc;
}
/**
- * acpi_pcihp_check_ejectable - check if handle is ejectable ACPI PCI slot
+ * acpi_pci_check_ejectable - check if handle is ejectable ACPI PCI slot
* @pbus: the PCI bus of the PCI slot corresponding to 'handle'
* @handle: ACPI handle to check
*
* ACPI has no generic method of setting/getting attention status
* this allows for device specific driver registration
*/
-struct acpiphp_attention_info
-{
+struct acpiphp_attention_info {
int (*set_attn)(struct hotplug_slot *slot, u8 status);
int (*get_attn)(struct hotplug_slot *slot, u8 *status);
struct module *owner;
slot->flags &= ~SLOT_ENABLED;
continue;
}
+ pci_dev_put(dev);
}
}
static void __iomem *compaq_int15_entry_point;
/* lock for ordering int15_bios_call() */
-static spinlock_t int15_lock;
+static DEFINE_SPINLOCK(int15_lock);
/* This is a series of function that deals with
compaq_int15_entry_point = (rom_start + ROM_INT15_PHY_ADDR - ROM_PHY_ADDR);
dbg("int15 entry = %p\n", compaq_int15_entry_point);
-
- /* initialize our int15 lock */
- spin_lock_init(&int15_lock);
}
return rc;
zdev->state = ZPCI_FN_STATE_CONFIGURED;
- return zpci_configure_device(zdev, zdev->fh);
+ return zpci_scan_configured_device(zdev, zdev->fh);
}
static int disable_slot(struct hotplug_slot *hotplug_slot)
return readb(ctrl->creg + reg);
}
-static inline void shpc_writeb(struct controller *ctrl, int reg, u8 val)
-{
- writeb(val, ctrl->creg + reg);
-}
-
static inline u16 shpc_readw(struct controller *ctrl, int reg)
{
return readw(ctrl->creg + reg);
/* Arch hooks */
int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
{
- struct msi_controller *chip = dev->bus->msi;
- int err;
-
- if (!chip || !chip->setup_irq)
- return -EINVAL;
-
- err = chip->setup_irq(chip, dev, desc);
- if (err < 0)
- return err;
-
- irq_set_chip_data(desc->irq, chip);
-
- return 0;
+ return -EINVAL;
}
void __weak arch_teardown_msi_irq(unsigned int irq)
{
- struct msi_controller *chip = irq_get_chip_data(irq);
-
- if (!chip || !chip->teardown_irq)
- return;
-
- chip->teardown_irq(chip, irq);
}
int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
- struct msi_controller *chip = dev->bus->msi;
struct msi_desc *entry;
int ret;
- if (chip && chip->setup_irqs)
- return chip->setup_irqs(chip, dev, nvec, type);
/*
* If an architecture wants to support multiple MSI, it needs to
* override arch_setup_msi_irqs()
return 0;
}
-/*
- * We have a default implementation available as a separate non-weak
- * function, as it is used by the Xen x86 PCI code
- */
-void default_teardown_msi_irqs(struct pci_dev *dev)
+void __weak arch_teardown_msi_irqs(struct pci_dev *dev)
{
int i;
struct msi_desc *entry;
for (i = 0; i < entry->nvec_used; i++)
arch_teardown_msi_irq(entry->irq + i);
}
-
-void __weak arch_teardown_msi_irqs(struct pci_dev *dev)
-{
- return default_teardown_msi_irqs(dev);
-}
#endif /* CONFIG_PCI_MSI_ARCH_FALLBACKS */
static void default_restore_msi_irq(struct pci_dev *dev, int irq)
* Any bridge which does NOT route MSI transactions from its
* secondary bus to its primary bus must set NO_MSI flag on
* the secondary pci_bus.
- * We expect only arch-specific PCI host bus controller driver
- * or quirks for specific PCI bridges to be setting NO_MSI.
+ *
+ * The NO_MSI flag can either be set directly by:
+ * - arch-specific PCI host bus controller drivers (deprecated)
+ * - quirks for specific PCI bridges
+ *
+ * or indirectly by platform-specific PCI host bridge drivers by
+ * advertising the 'msi_domain' property, which results in
+ * the NO_MSI flag when no MSI domain is found for this bridge
+ * at probe time.
*/
for (bus = dev->bus; bus; bus = bus->parent)
if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
EXPORT_SYMBOL_GPL(of_pci_parse_bus_range);
/**
- * This function will try to obtain the host bridge domain number by
- * finding a property called "linux,pci-domain" of the given device node.
+ * of_get_pci_domain_nr - Find the host bridge domain number
+ * of the given device node.
+ * @node: Device tree node with the domain information.
*
- * @node: device tree node with the domain information
+ * This function will try to obtain the host bridge domain number by finding
+ * a property called "linux,pci-domain" of the given device node.
+ *
+ * Return:
+ * * > 0 - On success, an associated domain number.
+ * * -EINVAL - The property "linux,pci-domain" does not exist.
+ * * -ENODATA - The linux,pci-domain" property does not have value.
+ * * -EOVERFLOW - Invalid "linux,pci-domain" property value.
*
* Returns the associated domain number from DT in the range [0-0xffff], or
* a negative value if the required property is not found.
#endif /* CONFIG_PCI */
/**
+ * of_pci_get_max_link_speed - Find the maximum link speed of the given device node.
+ * @node: Device tree node with the maximum link speed information.
+ *
* This function will try to find the limitation of link speed by finding
* a property called "max-link-speed" of the given device node.
*
- * @node: device tree node with the max link speed information
+ * Return:
+ * * > 0 - On success, a maximum link speed.
+ * * -EINVAL - Invalid "max-link-speed" property value, or failure to access
+ * the property of the device tree node.
*
* Returns the associated max link speed from DT, or a negative value if the
* required property is not found or is invalid.
if (!error)
pci_dbg(dev, "power state changed by ACPI to %s\n",
- acpi_power_state_string(state_conv[state]));
+ acpi_power_state_string(adev->power.state));
return error;
}
#include <linux/pci-acpi.h>
#include "pci.h"
+static bool device_has_acpi_name(struct device *dev)
+{
+#ifdef CONFIG_ACPI
+ acpi_handle handle = ACPI_HANDLE(dev);
+
+ if (!handle)
+ return false;
+
+ return acpi_check_dsm(handle, &pci_acpi_dsm_guid, 0x2,
+ 1 << DSM_PCI_DEVICE_NAME);
+#else
+ return false;
+#endif
+}
+
#ifdef CONFIG_DMI
enum smbios_attr_enum {
SMBIOS_ATTR_NONE = 0,
{
const struct dmi_device *dmi;
struct dmi_dev_onboard *donboard;
- int domain_nr;
- int bus;
- int devfn;
-
- domain_nr = pci_domain_nr(pdev->bus);
- bus = pdev->bus->number;
- devfn = pdev->devfn;
+ int domain_nr = pci_domain_nr(pdev->bus);
+ int bus = pdev->bus->number;
+ int devfn = pdev->devfn;
dmi = NULL;
while ((dmi = dmi_find_device(DMI_DEV_TYPE_DEV_ONBOARD,
donboard->devfn == devfn) {
if (buf) {
if (attribute == SMBIOS_ATTR_INSTANCE_SHOW)
- return scnprintf(buf, PAGE_SIZE,
- "%d\n",
- donboard->instance);
+ return sysfs_emit(buf, "%d\n",
+ donboard->instance);
else if (attribute == SMBIOS_ATTR_LABEL_SHOW)
- return scnprintf(buf, PAGE_SIZE,
- "%s\n",
- dmi->name);
+ return sysfs_emit(buf, "%s\n",
+ dmi->name);
}
return strlen(dmi->name);
}
return 0;
}
-static umode_t smbios_instance_string_exist(struct kobject *kobj,
- struct attribute *attr, int n)
+static ssize_t smbios_label_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
{
- struct device *dev;
- struct pci_dev *pdev;
-
- dev = kobj_to_dev(kobj);
- pdev = to_pci_dev(dev);
-
- return find_smbios_instance_string(pdev, NULL, SMBIOS_ATTR_NONE) ?
- S_IRUGO : 0;
-}
-
-static ssize_t smbioslabel_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct pci_dev *pdev;
- pdev = to_pci_dev(dev);
+ struct pci_dev *pdev = to_pci_dev(dev);
return find_smbios_instance_string(pdev, buf,
SMBIOS_ATTR_LABEL_SHOW);
}
+static struct device_attribute dev_attr_smbios_label = __ATTR(label, 0444,
+ smbios_label_show, NULL);
-static ssize_t smbiosinstance_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t index_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
- struct pci_dev *pdev;
- pdev = to_pci_dev(dev);
+ struct pci_dev *pdev = to_pci_dev(dev);
return find_smbios_instance_string(pdev, buf,
SMBIOS_ATTR_INSTANCE_SHOW);
}
+static DEVICE_ATTR_RO(index);
-static struct device_attribute smbios_attr_label = {
- .attr = {.name = "label", .mode = 0444},
- .show = smbioslabel_show,
-};
-
-static struct device_attribute smbios_attr_instance = {
- .attr = {.name = "index", .mode = 0444},
- .show = smbiosinstance_show,
-};
-
-static struct attribute *smbios_attributes[] = {
- &smbios_attr_label.attr,
- &smbios_attr_instance.attr,
+static struct attribute *smbios_attrs[] = {
+ &dev_attr_smbios_label.attr,
+ &dev_attr_index.attr,
NULL,
};
-static const struct attribute_group smbios_attr_group = {
- .attrs = smbios_attributes,
- .is_visible = smbios_instance_string_exist,
-};
-
-static int pci_create_smbiosname_file(struct pci_dev *pdev)
+static umode_t smbios_attr_is_visible(struct kobject *kobj, struct attribute *a,
+ int n)
{
- return sysfs_create_group(&pdev->dev.kobj, &smbios_attr_group);
-}
+ struct device *dev = kobj_to_dev(kobj);
+ struct pci_dev *pdev = to_pci_dev(dev);
-static void pci_remove_smbiosname_file(struct pci_dev *pdev)
-{
- sysfs_remove_group(&pdev->dev.kobj, &smbios_attr_group);
-}
-#else
-static inline int pci_create_smbiosname_file(struct pci_dev *pdev)
-{
- return -1;
-}
+ if (device_has_acpi_name(dev))
+ return 0;
-static inline void pci_remove_smbiosname_file(struct pci_dev *pdev)
-{
+ if (!find_smbios_instance_string(pdev, NULL, SMBIOS_ATTR_NONE))
+ return 0;
+
+ return a->mode;
}
+
+const struct attribute_group pci_dev_smbios_attr_group = {
+ .attrs = smbios_attrs,
+ .is_visible = smbios_attr_is_visible,
+};
#endif
#ifdef CONFIG_ACPI
static int dsm_get_label(struct device *dev, char *buf,
enum acpi_attr_enum attr)
{
- acpi_handle handle;
+ acpi_handle handle = ACPI_HANDLE(dev);
union acpi_object *obj, *tmp;
int len = -1;
- handle = ACPI_HANDLE(dev);
if (!handle)
return -1;
return len;
}
-static bool device_has_dsm(struct device *dev)
-{
- acpi_handle handle;
-
- handle = ACPI_HANDLE(dev);
- if (!handle)
- return false;
-
- return !!acpi_check_dsm(handle, &pci_acpi_dsm_guid, 0x2,
- 1 << DSM_PCI_DEVICE_NAME);
-}
-
-static umode_t acpi_index_string_exist(struct kobject *kobj,
- struct attribute *attr, int n)
-{
- struct device *dev;
-
- dev = kobj_to_dev(kobj);
-
- if (device_has_dsm(dev))
- return S_IRUGO;
-
- return 0;
-}
-
-static ssize_t acpilabel_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static ssize_t label_show(struct device *dev, struct device_attribute *attr,
+ char *buf)
{
return dsm_get_label(dev, buf, ACPI_ATTR_LABEL_SHOW);
}
+static DEVICE_ATTR_RO(label);
-static ssize_t acpiindex_show(struct device *dev,
+static ssize_t acpi_index_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
return dsm_get_label(dev, buf, ACPI_ATTR_INDEX_SHOW);
}
+static DEVICE_ATTR_RO(acpi_index);
-static struct device_attribute acpi_attr_label = {
- .attr = {.name = "label", .mode = 0444},
- .show = acpilabel_show,
-};
-
-static struct device_attribute acpi_attr_index = {
- .attr = {.name = "acpi_index", .mode = 0444},
- .show = acpiindex_show,
-};
-
-static struct attribute *acpi_attributes[] = {
- &acpi_attr_label.attr,
- &acpi_attr_index.attr,
+static struct attribute *acpi_attrs[] = {
+ &dev_attr_label.attr,
+ &dev_attr_acpi_index.attr,
NULL,
};
-static const struct attribute_group acpi_attr_group = {
- .attrs = acpi_attributes,
- .is_visible = acpi_index_string_exist,
-};
-
-static int pci_create_acpi_index_label_files(struct pci_dev *pdev)
+static umode_t acpi_attr_is_visible(struct kobject *kobj, struct attribute *a,
+ int n)
{
- return sysfs_create_group(&pdev->dev.kobj, &acpi_attr_group);
-}
+ struct device *dev = kobj_to_dev(kobj);
-static int pci_remove_acpi_index_label_files(struct pci_dev *pdev)
-{
- sysfs_remove_group(&pdev->dev.kobj, &acpi_attr_group);
- return 0;
-}
-#else
-static inline int pci_create_acpi_index_label_files(struct pci_dev *pdev)
-{
- return -1;
-}
+ if (!device_has_acpi_name(dev))
+ return 0;
-static inline int pci_remove_acpi_index_label_files(struct pci_dev *pdev)
-{
- return -1;
+ return a->mode;
}
-static inline bool device_has_dsm(struct device *dev)
-{
- return false;
-}
+const struct attribute_group pci_dev_acpi_attr_group = {
+ .attrs = acpi_attrs,
+ .is_visible = acpi_attr_is_visible,
+};
#endif
-
-void pci_create_firmware_label_files(struct pci_dev *pdev)
-{
- if (device_has_dsm(&pdev->dev))
- pci_create_acpi_index_label_files(pdev);
- else
- pci_create_smbiosname_file(pdev);
-}
-
-void pci_remove_firmware_label_files(struct pci_dev *pdev)
-{
- if (device_has_dsm(&pdev->dev))
- pci_remove_acpi_index_label_files(pdev);
- else
- pci_remove_smbiosname_file(pdev);
-}
struct pci_dev *pdev; \
\
pdev = to_pci_dev(dev); \
- return sprintf(buf, format_string, pdev->field); \
+ return sysfs_emit(buf, format_string, pdev->field); \
} \
static DEVICE_ATTR_RO(field)
char *buf)
{
struct pci_dev *pdev = to_pci_dev(dev);
- return sprintf(buf, "%u\n", pdev->broken_parity_status);
+ return sysfs_emit(buf, "%u\n", pdev->broken_parity_status);
}
static ssize_t broken_parity_status_store(struct device *dev,
{
struct pci_dev *pdev = to_pci_dev(dev);
- return sprintf(buf, "%s\n", pci_power_name(pdev->current_state));
+ return sysfs_emit(buf, "%s\n", pci_power_name(pdev->current_state));
}
static DEVICE_ATTR_RO(power_state);
char *buf)
{
struct pci_dev *pci_dev = to_pci_dev(dev);
- char *str = buf;
int i;
int max;
resource_size_t start, end;
+ size_t len = 0;
if (pci_dev->subordinate)
max = DEVICE_COUNT_RESOURCE;
for (i = 0; i < max; i++) {
struct resource *res = &pci_dev->resource[i];
pci_resource_to_user(pci_dev, i, res, &start, &end);
- str += sprintf(str, "0x%016llx 0x%016llx 0x%016llx\n",
- (unsigned long long)start,
- (unsigned long long)end,
- (unsigned long long)res->flags);
+ len += sysfs_emit_at(buf, len, "0x%016llx 0x%016llx 0x%016llx\n",
+ (unsigned long long)start,
+ (unsigned long long)end,
+ (unsigned long long)res->flags);
}
- return (str - buf);
+ return len;
}
static DEVICE_ATTR_RO(resource);
{
struct pci_dev *pdev = to_pci_dev(dev);
- return sprintf(buf, "%s\n",
- pci_speed_string(pcie_get_speed_cap(pdev)));
+ return sysfs_emit(buf, "%s\n",
+ pci_speed_string(pcie_get_speed_cap(pdev)));
}
static DEVICE_ATTR_RO(max_link_speed);
{
struct pci_dev *pdev = to_pci_dev(dev);
- return sprintf(buf, "%u\n", pcie_get_width_cap(pdev));
+ return sysfs_emit(buf, "%u\n", pcie_get_width_cap(pdev));
}
static DEVICE_ATTR_RO(max_link_width);
speed = pcie_link_speed[linkstat & PCI_EXP_LNKSTA_CLS];
- return sprintf(buf, "%s\n", pci_speed_string(speed));
+ return sysfs_emit(buf, "%s\n", pci_speed_string(speed));
}
static DEVICE_ATTR_RO(current_link_speed);
if (err)
return -EINVAL;
- return sprintf(buf, "%u\n",
+ return sysfs_emit(buf, "%u\n",
(linkstat & PCI_EXP_LNKSTA_NLW) >> PCI_EXP_LNKSTA_NLW_SHIFT);
}
static DEVICE_ATTR_RO(current_link_width);
if (err)
return -EINVAL;
- return sprintf(buf, "%u\n", sec_bus);
+ return sysfs_emit(buf, "%u\n", sec_bus);
}
static DEVICE_ATTR_RO(secondary_bus_number);
if (err)
return -EINVAL;
- return sprintf(buf, "%u\n", sub_bus);
+ return sysfs_emit(buf, "%u\n", sub_bus);
}
static DEVICE_ATTR_RO(subordinate_bus_number);
{
struct pci_dev *pci_dev = to_pci_dev(dev);
- return sprintf(buf, "%u\n", pci_ari_enabled(pci_dev->bus));
+ return sysfs_emit(buf, "%u\n", pci_ari_enabled(pci_dev->bus));
}
static DEVICE_ATTR_RO(ari_enabled);
{
struct pci_dev *pci_dev = to_pci_dev(dev);
- return sprintf(buf, "pci:v%08Xd%08Xsv%08Xsd%08Xbc%02Xsc%02Xi%02X\n",
- pci_dev->vendor, pci_dev->device,
- pci_dev->subsystem_vendor, pci_dev->subsystem_device,
- (u8)(pci_dev->class >> 16), (u8)(pci_dev->class >> 8),
- (u8)(pci_dev->class));
+ return sysfs_emit(buf, "pci:v%08Xd%08Xsv%08Xsd%08Xbc%02Xsc%02Xi%02X\n",
+ pci_dev->vendor, pci_dev->device,
+ pci_dev->subsystem_vendor, pci_dev->subsystem_device,
+ (u8)(pci_dev->class >> 16), (u8)(pci_dev->class >> 8),
+ (u8)(pci_dev->class));
}
static DEVICE_ATTR_RO(modalias);
struct pci_dev *pdev;
pdev = to_pci_dev(dev);
- return sprintf(buf, "%u\n", atomic_read(&pdev->enable_cnt));
+ return sysfs_emit(buf, "%u\n", atomic_read(&pdev->enable_cnt));
}
static DEVICE_ATTR_RW(enable);
static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
- return sprintf(buf, "%d\n", dev->numa_node);
+ return sysfs_emit(buf, "%d\n", dev->numa_node);
}
static DEVICE_ATTR_RW(numa_node);
#endif
{
struct pci_dev *pdev = to_pci_dev(dev);
- return sprintf(buf, "%d\n", fls64(pdev->dma_mask));
+ return sysfs_emit(buf, "%d\n", fls64(pdev->dma_mask));
}
static DEVICE_ATTR_RO(dma_mask_bits);
struct device_attribute *attr,
char *buf)
{
- return sprintf(buf, "%d\n", fls64(dev->coherent_dma_mask));
+ return sysfs_emit(buf, "%d\n", fls64(dev->coherent_dma_mask));
}
static DEVICE_ATTR_RO(consistent_dma_mask_bits);
struct pci_dev *pdev = to_pci_dev(dev);
struct pci_bus *subordinate = pdev->subordinate;
- return sprintf(buf, "%u\n", subordinate ?
- !(subordinate->bus_flags & PCI_BUS_FLAGS_NO_MSI)
- : !pdev->no_msi);
+ return sysfs_emit(buf, "%u\n", subordinate ?
+ !(subordinate->bus_flags & PCI_BUS_FLAGS_NO_MSI)
+ : !pdev->no_msi);
}
static ssize_t msi_bus_store(struct device *dev, struct device_attribute *attr,
struct device_attribute *attr, char *buf)
{
struct pci_dev *pdev = to_pci_dev(dev);
- return sprintf(buf, "%u\n", pdev->d3cold_allowed);
+ return sysfs_emit(buf, "%u\n", pdev->d3cold_allowed);
}
static DEVICE_ATTR_RW(d3cold_allowed);
#endif
if (np == NULL)
return 0;
- return sprintf(buf, "%pOF", np);
+ return sysfs_emit(buf, "%pOF", np);
}
static DEVICE_ATTR_RO(devspec);
#endif
ssize_t len;
device_lock(dev);
- len = scnprintf(buf, PAGE_SIZE, "%s\n", pdev->driver_override);
+ len = sysfs_emit(buf, "%s\n", pdev->driver_override);
device_unlock(dev);
return len;
}
struct pci_dev *vga_dev = vga_default_device();
if (vga_dev)
- return sprintf(buf, "%u\n", (pdev == vga_dev));
+ return sysfs_emit(buf, "%u\n", (pdev == vga_dev));
- return sprintf(buf, "%u\n",
- !!(pdev->resource[PCI_ROM_RESOURCE].flags &
- IORESOURCE_ROM_SHADOW));
+ return sysfs_emit(buf, "%u\n",
+ !!(pdev->resource[PCI_ROM_RESOURCE].flags &
+ IORESOURCE_ROM_SHADOW));
}
static DEVICE_ATTR_RO(boot_vga);
return count;
}
+static BIN_ATTR(config, 0644, pci_read_config, pci_write_config, 0);
+
+static struct bin_attribute *pci_dev_config_attrs[] = {
+ &bin_attr_config,
+ NULL,
+};
+
+static umode_t pci_dev_config_attr_is_visible(struct kobject *kobj,
+ struct bin_attribute *a, int n)
+{
+ struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
+
+ a->size = PCI_CFG_SPACE_SIZE;
+ if (pdev->cfg_size > PCI_CFG_SPACE_SIZE)
+ a->size = PCI_CFG_SPACE_EXP_SIZE;
+
+ return a->attr.mode;
+}
+
+static const struct attribute_group pci_dev_config_attr_group = {
+ .bin_attrs = pci_dev_config_attrs,
+ .is_bin_visible = pci_dev_config_attr_is_visible,
+};
#ifdef HAVE_PCI_LEGACY
/**
return count;
}
+static BIN_ATTR(rom, 0600, pci_read_rom, pci_write_rom, 0);
-static const struct bin_attribute pci_config_attr = {
- .attr = {
- .name = "config",
- .mode = 0644,
- },
- .size = PCI_CFG_SPACE_SIZE,
- .read = pci_read_config,
- .write = pci_write_config,
+static struct bin_attribute *pci_dev_rom_attrs[] = {
+ &bin_attr_rom,
+ NULL,
};
-static const struct bin_attribute pcie_config_attr = {
- .attr = {
- .name = "config",
- .mode = 0644,
- },
- .size = PCI_CFG_SPACE_EXP_SIZE,
- .read = pci_read_config,
- .write = pci_write_config,
+static umode_t pci_dev_rom_attr_is_visible(struct kobject *kobj,
+ struct bin_attribute *a, int n)
+{
+ struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
+ size_t rom_size;
+
+ /* If the device has a ROM, try to expose it in sysfs. */
+ rom_size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
+ if (!rom_size)
+ return 0;
+
+ a->size = rom_size;
+
+ return a->attr.mode;
+}
+
+static const struct attribute_group pci_dev_rom_attr_group = {
+ .bin_attrs = pci_dev_rom_attrs,
+ .is_bin_visible = pci_dev_rom_attr_is_visible,
};
static ssize_t reset_store(struct device *dev, struct device_attribute *attr,
return count;
}
+static DEVICE_ATTR_WO(reset);
-static DEVICE_ATTR(reset, 0200, NULL, reset_store);
+static struct attribute *pci_dev_reset_attrs[] = {
+ &dev_attr_reset.attr,
+ NULL,
+};
-static int pci_create_capabilities_sysfs(struct pci_dev *dev)
+static umode_t pci_dev_reset_attr_is_visible(struct kobject *kobj,
+ struct attribute *a, int n)
{
- int retval;
-
- pcie_vpd_create_sysfs_dev_files(dev);
+ struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
- if (dev->reset_fn) {
- retval = device_create_file(&dev->dev, &dev_attr_reset);
- if (retval)
- goto error;
- }
- return 0;
+ if (!pdev->reset_fn)
+ return 0;
-error:
- pcie_vpd_remove_sysfs_dev_files(dev);
- return retval;
+ return a->mode;
}
+static const struct attribute_group pci_dev_reset_attr_group = {
+ .attrs = pci_dev_reset_attrs,
+ .is_visible = pci_dev_reset_attr_is_visible,
+};
+
int __must_check pci_create_sysfs_dev_files(struct pci_dev *pdev)
{
- int retval;
- int rom_size;
- struct bin_attribute *attr;
-
if (!sysfs_initialized)
return -EACCES;
- if (pdev->cfg_size > PCI_CFG_SPACE_SIZE)
- retval = sysfs_create_bin_file(&pdev->dev.kobj, &pcie_config_attr);
- else
- retval = sysfs_create_bin_file(&pdev->dev.kobj, &pci_config_attr);
- if (retval)
- goto err;
-
- retval = pci_create_resource_files(pdev);
- if (retval)
- goto err_config_file;
-
- /* If the device has a ROM, try to expose it in sysfs. */
- rom_size = pci_resource_len(pdev, PCI_ROM_RESOURCE);
- if (rom_size) {
- attr = kzalloc(sizeof(*attr), GFP_ATOMIC);
- if (!attr) {
- retval = -ENOMEM;
- goto err_resource_files;
- }
- sysfs_bin_attr_init(attr);
- attr->size = rom_size;
- attr->attr.name = "rom";
- attr->attr.mode = 0600;
- attr->read = pci_read_rom;
- attr->write = pci_write_rom;
- retval = sysfs_create_bin_file(&pdev->dev.kobj, attr);
- if (retval) {
- kfree(attr);
- goto err_resource_files;
- }
- pdev->rom_attr = attr;
- }
-
- /* add sysfs entries for various capabilities */
- retval = pci_create_capabilities_sysfs(pdev);
- if (retval)
- goto err_rom_file;
-
- pci_create_firmware_label_files(pdev);
-
- return 0;
-
-err_rom_file:
- if (pdev->rom_attr) {
- sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr);
- kfree(pdev->rom_attr);
- pdev->rom_attr = NULL;
- }
-err_resource_files:
- pci_remove_resource_files(pdev);
-err_config_file:
- if (pdev->cfg_size > PCI_CFG_SPACE_SIZE)
- sysfs_remove_bin_file(&pdev->dev.kobj, &pcie_config_attr);
- else
- sysfs_remove_bin_file(&pdev->dev.kobj, &pci_config_attr);
-err:
- return retval;
-}
-
-static void pci_remove_capabilities_sysfs(struct pci_dev *dev)
-{
- pcie_vpd_remove_sysfs_dev_files(dev);
- if (dev->reset_fn) {
- device_remove_file(&dev->dev, &dev_attr_reset);
- dev->reset_fn = 0;
- }
+ return pci_create_resource_files(pdev);
}
/**
if (!sysfs_initialized)
return;
- pci_remove_capabilities_sysfs(pdev);
-
- if (pdev->cfg_size > PCI_CFG_SPACE_SIZE)
- sysfs_remove_bin_file(&pdev->dev.kobj, &pcie_config_attr);
- else
- sysfs_remove_bin_file(&pdev->dev.kobj, &pci_config_attr);
-
pci_remove_resource_files(pdev);
-
- if (pdev->rom_attr) {
- sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr);
- kfree(pdev->rom_attr);
- pdev->rom_attr = NULL;
- }
-
- pci_remove_firmware_label_files(pdev);
}
static int __init pci_sysfs_init(void)
const struct attribute_group *pci_dev_groups[] = {
&pci_dev_group,
+ &pci_dev_config_attr_group,
+ &pci_dev_rom_attr_group,
+ &pci_dev_reset_attr_group,
+ &pci_dev_vpd_attr_group,
+#ifdef CONFIG_DMI
+ &pci_dev_smbios_attr_group,
+#endif
+#ifdef CONFIG_ACPI
+ &pci_dev_acpi_attr_group,
+#endif
NULL,
};
}
EXPORT_SYMBOL_GPL(pci_find_ht_capability);
+/**
+ * pci_find_vsec_capability - Find a vendor-specific extended capability
+ * @dev: PCI device to query
+ * @vendor: Vendor ID for which capability is defined
+ * @cap: Vendor-specific capability ID
+ *
+ * If @dev has Vendor ID @vendor, search for a VSEC capability with
+ * VSEC ID @cap. If found, return the capability offset in
+ * config space; otherwise return 0.
+ */
+u16 pci_find_vsec_capability(struct pci_dev *dev, u16 vendor, int cap)
+{
+ u16 vsec = 0;
+ u32 header;
+
+ if (vendor != dev->vendor)
+ return 0;
+
+ while ((vsec = pci_find_next_ext_capability(dev, vsec,
+ PCI_EXT_CAP_ID_VNDR))) {
+ if (pci_read_config_dword(dev, vsec + PCI_VNDR_HEADER,
+ &header) == PCIBIOS_SUCCESSFUL &&
+ PCI_VNDR_HEADER_ID(header) == cap)
+ return vsec;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pci_find_vsec_capability);
+
/**
* pci_find_parent_resource - return resource region of parent bus of given
* region
return address;
}
+EXPORT_SYMBOL_GPL(pci_pio_to_address);
unsigned long __weak pci_address_to_pio(phys_addr_t address)
{
}
EXPORT_SYMBOL(pci_clear_mwi);
+/**
+ * pci_disable_parity - disable parity checking for device
+ * @dev: the PCI device to operate on
+ *
+ * Disable parity checking for device @dev
+ */
+void pci_disable_parity(struct pci_dev *dev)
+{
+ u16 cmd;
+
+ pci_read_config_word(dev, PCI_COMMAND, &cmd);
+ if (cmd & PCI_COMMAND_PARITY) {
+ cmd &= ~PCI_COMMAND_PARITY;
+ pci_write_config_word(dev, PCI_COMMAND, cmd);
+ }
+}
+
/**
* pci_intx - enables/disables PCI INTx for device dev
* @pdev: the PCI device to operate on
int pci_create_sysfs_dev_files(struct pci_dev *pdev);
void pci_remove_sysfs_dev_files(struct pci_dev *pdev);
-#if !defined(CONFIG_DMI) && !defined(CONFIG_ACPI)
-static inline void pci_create_firmware_label_files(struct pci_dev *pdev)
-{ return; }
-static inline void pci_remove_firmware_label_files(struct pci_dev *pdev)
-{ return; }
-#else
-void pci_create_firmware_label_files(struct pci_dev *pdev);
-void pci_remove_firmware_label_files(struct pci_dev *pdev);
-#endif
void pci_cleanup_rom(struct pci_dev *dev);
+#ifdef CONFIG_DMI
+extern const struct attribute_group pci_dev_smbios_attr_group;
+#endif
enum pci_mmap_api {
PCI_MMAP_SYSFS, /* mmap on /sys/bus/pci/devices/<BDF>/resource<N> */
type == PCI_EXP_TYPE_PCIE_BRIDGE;
}
-int pci_vpd_init(struct pci_dev *dev);
+void pci_vpd_init(struct pci_dev *dev);
void pci_vpd_release(struct pci_dev *dev);
-void pcie_vpd_create_sysfs_dev_files(struct pci_dev *dev);
-void pcie_vpd_remove_sysfs_dev_files(struct pci_dev *dev);
+extern const struct attribute_group pci_dev_vpd_attr_group;
/* PCI Virtual Channel */
int pci_save_vc_state(struct pci_dev *dev);
#if defined(CONFIG_PCI_QUIRKS) && defined(CONFIG_ARM64)
int acpi_get_rc_resources(struct device *dev, const char *hid, u16 segment,
struct resource *res);
+#else
+static inline int acpi_get_rc_resources(struct device *dev, const char *hid,
+ u16 segment, struct resource *res)
+{
+ return -ENODEV;
+}
#endif
int pci_rebar_get_current_size(struct pci_dev *pdev, int bar);
#ifdef CONFIG_ACPI
int pci_acpi_program_hp_params(struct pci_dev *dev);
+extern const struct attribute_group pci_dev_acpi_attr_group;
#else
static inline int pci_acpi_program_hp_params(struct pci_dev *dev)
{
};
/**
- * enable_ercr_checking - enable PCIe ECRC checking for a device
+ * enable_ecrc_checking - enable PCIe ECRC checking for a device
* @dev: the PCI device
*
* Returns 0 on success, or negative on failure.
}
/**
- * disable_ercr_checking - disables PCIe ECRC checking for a device
+ * disable_ecrc_checking - disables PCIe ECRC checking for a device
* @dev: the PCI device
*
* Returns 0 on success, or negative on failure.
};
/**
- * aer_service_init - register AER root service driver
+ * pcie_aer_init - register AER root service driver
*
* Invoked when AER root service driver is loaded.
*/
};
/**
- * pcie_pme_service_init - Register the PCIe PME service driver.
+ * pcie_pme_init - Register the PCIe PME service driver.
*/
int __init pcie_pme_init(void)
{
/* Same bus, so check bitmap */
for_each_set_bit(devn, &bitmap, 32)
- if (devn == rciep->devfn)
+ if (devn == PCI_SLOT(rciep->devfn))
return true;
return false;
/* Temporarily move resources off the list */
list_splice_init(&bridge->windows, &resources);
bus->sysdata = bridge->sysdata;
- bus->msi = bridge->msi;
bus->ops = bridge->ops;
bus->number = bus->busn_res.start = bridge->busnr;
#ifdef CONFIG_PCI_DOMAINS_GENERIC
device_enable_async_suspend(bus->bridge);
pci_set_bus_of_node(bus);
pci_set_bus_msi_domain(bus);
+ if (bridge->msi_domain && !dev_get_msi_domain(&bus->dev))
+ bus->bus_flags |= PCI_BUS_FLAGS_NO_MSI;
if (!parent)
set_dev_node(bus->bridge, pcibus_to_node(bus));
return NULL;
child->parent = parent;
- child->msi = parent->msi;
child->sysdata = parent->sysdata;
child->bus_flags = parent->bus_flags;
pci_set_of_node(dev);
if (pci_setup_device(dev)) {
+ pci_release_of_node(dev);
pci_bus_put(dev->bus);
kfree(dev);
return NULL;
PCI_CLASS_BRIDGE_HOST, 8, quirk_mmio_always_on);
/*
- * The Mellanox Tavor device gives false positive parity errors. Mark this
- * device with a broken_parity_status to allow PCI scanning code to "skip"
- * this now blacklisted device.
+ * The Mellanox Tavor device gives false positive parity errors. Disable
+ * parity error reporting.
*/
-static void quirk_mellanox_tavor(struct pci_dev *dev)
-{
- dev->broken_parity_status = 1; /* This device gives false positives */
-}
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR, quirk_mellanox_tavor);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE, quirk_mellanox_tavor);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR, pci_disable_parity);
+DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_MELLANOX, PCI_DEVICE_ID_MELLANOX_TAVOR_BRIDGE, pci_disable_parity);
/*
* Deal with broken BIOSes that neglect to enable passive release,
/* Check the HyperTransport MSI mapping to know whether MSI is enabled or not */
static void quirk_msi_ht_cap(struct pci_dev *dev)
{
- if (dev->subordinate && !msi_ht_cap_enabled(dev)) {
- pci_warn(dev, "MSI quirk detected; subordinate MSI disabled\n");
- dev->subordinate->bus_flags |= PCI_BUS_FLAGS_NO_MSI;
- }
+ if (!msi_ht_cap_enabled(dev))
+ quirk_disable_msi(dev);
}
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SERVERWORKS, PCI_DEVICE_ID_SERVERWORKS_HT2000_PCIE,
quirk_msi_ht_cap);
{
struct pci_dev *pdev;
- if (!dev->subordinate)
- return;
-
/*
* Check HT MSI cap on this chipset and the root one. A single one
* having MSI is enough to be sure that MSI is supported.
pdev = pci_get_slot(dev->bus, 0);
if (!pdev)
return;
- if (!msi_ht_cap_enabled(dev) && !msi_ht_cap_enabled(pdev)) {
- pci_warn(dev, "MSI quirk detected; subordinate MSI disabled\n");
- dev->subordinate->bus_flags |= PCI_BUS_FLAGS_NO_MSI;
- }
+ if (!msi_ht_cap_enabled(pdev))
+ quirk_msi_ht_cap(dev);
pci_dev_put(pdev);
}
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_NVIDIA, PCI_DEVICE_ID_NVIDIA_CK804_PCIE,
reset_ivb_igd },
{ PCI_VENDOR_ID_SAMSUNG, 0xa804, nvme_disable_and_flr },
{ PCI_VENDOR_ID_INTEL, 0x0953, delay_250ms_after_flr },
+ { PCI_VENDOR_ID_INTEL, 0x0a54, delay_250ms_after_flr },
{ PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID,
reset_chelsio_generic_dev },
{ 0 }
pci_pme_active(dev, false);
if (pci_dev_is_added(dev)) {
+ dev->reset_fn = 0;
+
device_release_driver(&dev->dev);
pci_proc_detach_device(dev);
pci_remove_sysfs_dev_files(dev);
struct pci_vpd_ops {
ssize_t (*read)(struct pci_dev *dev, loff_t pos, size_t count, void *buf);
ssize_t (*write)(struct pci_dev *dev, loff_t pos, size_t count, const void *buf);
- int (*set_size)(struct pci_dev *dev, size_t len);
};
struct pci_vpd {
const struct pci_vpd_ops *ops;
- struct bin_attribute *attr; /* Descriptor for sysfs VPD entry */
struct mutex lock;
unsigned int len;
u16 flag;
unsigned int valid:1;
};
+static struct pci_dev *pci_get_func0_dev(struct pci_dev *dev)
+{
+ return pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
+}
+
/**
* pci_read_vpd - Read one entry from Vital Product Data
* @dev: pci device struct
}
EXPORT_SYMBOL(pci_write_vpd);
-/**
- * pci_set_vpd_size - Set size of Vital Product Data space
- * @dev: pci device struct
- * @len: size of vpd space
- */
-int pci_set_vpd_size(struct pci_dev *dev, size_t len)
-{
- if (!dev->vpd || !dev->vpd->ops)
- return -ENODEV;
- return dev->vpd->ops->set_size(dev, len);
-}
-EXPORT_SYMBOL(pci_set_vpd_size);
-
#define PCI_VPD_MAX_SIZE (PCI_VPD_ADDR_MASK + 1)
/**
size_t off = 0;
unsigned char header[1+2]; /* 1 byte tag, 2 bytes length */
- while (off < old_size &&
- pci_read_vpd(dev, off, 1, header) == 1) {
+ while (off < old_size && pci_read_vpd(dev, off, 1, header) == 1) {
unsigned char tag;
+ if (!header[0] && !off) {
+ pci_info(dev, "Invalid VPD tag 00, assume missing optional VPD EPROM\n");
+ return 0;
+ }
+
if (header[0] & PCI_VPD_LRDT) {
/* Large Resource Data Type Tag */
tag = pci_vpd_lrdt_tag(header);
return ret ? ret : count;
}
-static int pci_vpd_set_size(struct pci_dev *dev, size_t len)
-{
- struct pci_vpd *vpd = dev->vpd;
-
- if (len == 0 || len > PCI_VPD_MAX_SIZE)
- return -EIO;
-
- vpd->valid = 1;
- vpd->len = len;
-
- return 0;
-}
-
static const struct pci_vpd_ops pci_vpd_ops = {
.read = pci_vpd_read,
.write = pci_vpd_write,
- .set_size = pci_vpd_set_size,
};
static ssize_t pci_vpd_f0_read(struct pci_dev *dev, loff_t pos, size_t count,
void *arg)
{
- struct pci_dev *tdev = pci_get_slot(dev->bus,
- PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
+ struct pci_dev *tdev = pci_get_func0_dev(dev);
ssize_t ret;
if (!tdev)
static ssize_t pci_vpd_f0_write(struct pci_dev *dev, loff_t pos, size_t count,
const void *arg)
{
- struct pci_dev *tdev = pci_get_slot(dev->bus,
- PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
+ struct pci_dev *tdev = pci_get_func0_dev(dev);
ssize_t ret;
if (!tdev)
return ret;
}
-static int pci_vpd_f0_set_size(struct pci_dev *dev, size_t len)
-{
- struct pci_dev *tdev = pci_get_slot(dev->bus,
- PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
- int ret;
-
- if (!tdev)
- return -ENODEV;
-
- ret = pci_set_vpd_size(tdev, len);
- pci_dev_put(tdev);
- return ret;
-}
-
static const struct pci_vpd_ops pci_vpd_f0_ops = {
.read = pci_vpd_f0_read,
.write = pci_vpd_f0_write,
- .set_size = pci_vpd_f0_set_size,
};
-int pci_vpd_init(struct pci_dev *dev)
+void pci_vpd_init(struct pci_dev *dev)
{
struct pci_vpd *vpd;
u8 cap;
cap = pci_find_capability(dev, PCI_CAP_ID_VPD);
if (!cap)
- return -ENODEV;
+ return;
vpd = kzalloc(sizeof(*vpd), GFP_ATOMIC);
if (!vpd)
- return -ENOMEM;
+ return;
vpd->len = PCI_VPD_MAX_SIZE;
if (dev->dev_flags & PCI_DEV_FLAGS_VPD_REF_F0)
vpd->busy = 0;
vpd->valid = 0;
dev->vpd = vpd;
- return 0;
}
void pci_vpd_release(struct pci_dev *dev)
kfree(dev->vpd);
}
-static ssize_t read_vpd_attr(struct file *filp, struct kobject *kobj,
- struct bin_attribute *bin_attr, char *buf,
- loff_t off, size_t count)
+static ssize_t vpd_read(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf, loff_t off,
+ size_t count)
{
struct pci_dev *dev = to_pci_dev(kobj_to_dev(kobj));
- if (bin_attr->size > 0) {
- if (off > bin_attr->size)
- count = 0;
- else if (count > bin_attr->size - off)
- count = bin_attr->size - off;
- }
-
return pci_read_vpd(dev, off, count, buf);
}
-static ssize_t write_vpd_attr(struct file *filp, struct kobject *kobj,
- struct bin_attribute *bin_attr, char *buf,
- loff_t off, size_t count)
+static ssize_t vpd_write(struct file *filp, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf, loff_t off,
+ size_t count)
{
struct pci_dev *dev = to_pci_dev(kobj_to_dev(kobj));
- if (bin_attr->size > 0) {
- if (off > bin_attr->size)
- count = 0;
- else if (count > bin_attr->size - off)
- count = bin_attr->size - off;
- }
-
return pci_write_vpd(dev, off, count, buf);
}
+static BIN_ATTR(vpd, 0600, vpd_read, vpd_write, 0);
-void pcie_vpd_create_sysfs_dev_files(struct pci_dev *dev)
-{
- int retval;
- struct bin_attribute *attr;
-
- if (!dev->vpd)
- return;
+static struct bin_attribute *vpd_attrs[] = {
+ &bin_attr_vpd,
+ NULL,
+};
- attr = kzalloc(sizeof(*attr), GFP_ATOMIC);
- if (!attr)
- return;
+static umode_t vpd_attr_is_visible(struct kobject *kobj,
+ struct bin_attribute *a, int n)
+{
+ struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
- sysfs_bin_attr_init(attr);
- attr->size = 0;
- attr->attr.name = "vpd";
- attr->attr.mode = S_IRUSR | S_IWUSR;
- attr->read = read_vpd_attr;
- attr->write = write_vpd_attr;
- retval = sysfs_create_bin_file(&dev->dev.kobj, attr);
- if (retval) {
- kfree(attr);
- return;
- }
+ if (!pdev->vpd)
+ return 0;
- dev->vpd->attr = attr;
+ return a->attr.mode;
}
-void pcie_vpd_remove_sysfs_dev_files(struct pci_dev *dev)
-{
- if (dev->vpd && dev->vpd->attr) {
- sysfs_remove_bin_file(&dev->dev.kobj, dev->vpd->attr);
- kfree(dev->vpd->attr);
- }
-}
+const struct attribute_group pci_dev_vpd_attr_group = {
+ .bin_attrs = vpd_attrs,
+ .is_bin_visible = vpd_attr_is_visible,
+};
-int pci_vpd_find_tag(const u8 *buf, unsigned int off, unsigned int len, u8 rdt)
+int pci_vpd_find_tag(const u8 *buf, unsigned int len, u8 rdt)
{
- int i;
+ int i = 0;
- for (i = off; i < len; ) {
- u8 val = buf[i];
-
- if (val & PCI_VPD_LRDT) {
- /* Don't return success of the tag isn't complete */
- if (i + PCI_VPD_LRDT_TAG_SIZE > len)
- break;
-
- if (val == rdt)
- return i;
-
- i += PCI_VPD_LRDT_TAG_SIZE +
- pci_vpd_lrdt_size(&buf[i]);
- } else {
- u8 tag = val & ~PCI_VPD_SRDT_LEN_MASK;
-
- if (tag == rdt)
- return i;
-
- if (tag == PCI_VPD_SRDT_END)
- break;
+ /* look for LRDT tags only, end tag is the only SRDT tag */
+ while (i + PCI_VPD_LRDT_TAG_SIZE <= len && buf[i] & PCI_VPD_LRDT) {
+ if (buf[i] == rdt)
+ return i;
- i += PCI_VPD_SRDT_TAG_SIZE +
- pci_vpd_srdt_size(&buf[i]);
- }
+ i += PCI_VPD_LRDT_TAG_SIZE + pci_vpd_lrdt_size(buf + i);
}
return -ENOENT;
if (!PCI_FUNC(dev->devfn))
return;
- f0 = pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
+ f0 = pci_get_func0_dev(dev);
if (!f0)
return;
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_LSI_LOGIC, 0x005f, quirk_blacklist_vpd);
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATTANSIC, PCI_ANY_ID,
quirk_blacklist_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_QLOGIC, 0x2261, quirk_blacklist_vpd);
/*
* The Amazon Annapurna Labs 0x0031 device id is reused for other non Root Port
* device types, so the quirk is registered for the PCI_CLASS_BRIDGE_PCI class.
DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_AMAZON_ANNAPURNA_LABS, 0x0031,
PCI_CLASS_BRIDGE_PCI, 8, quirk_blacklist_vpd);
-/*
- * For Broadcom 5706, 5708, 5709 rev. A nics, any read beyond the
- * VPD end tag will hang the device. This problem was initially
- * observed when a vpd entry was created in sysfs
- * ('/sys/bus/pci/devices/<id>/vpd'). A read to this sysfs entry
- * will dump 32k of data. Reading a full 32k will cause an access
- * beyond the VPD end tag causing the device to hang. Once the device
- * is hung, the bnx2 driver will not be able to reset the device.
- * We believe that it is legal to read beyond the end tag and
- * therefore the solution is to limit the read/write length.
- */
-static void quirk_brcm_570x_limit_vpd(struct pci_dev *dev)
+static void pci_vpd_set_size(struct pci_dev *dev, size_t len)
{
- /*
- * Only disable the VPD capability for 5706, 5706S, 5708,
- * 5708S and 5709 rev. A
- */
- if ((dev->device == PCI_DEVICE_ID_NX2_5706) ||
- (dev->device == PCI_DEVICE_ID_NX2_5706S) ||
- (dev->device == PCI_DEVICE_ID_NX2_5708) ||
- (dev->device == PCI_DEVICE_ID_NX2_5708S) ||
- ((dev->device == PCI_DEVICE_ID_NX2_5709) &&
- (dev->revision & 0xf0) == 0x0)) {
- if (dev->vpd)
- dev->vpd->len = 0x80;
- }
+ struct pci_vpd *vpd = dev->vpd;
+
+ if (!vpd || len == 0 || len > PCI_VPD_MAX_SIZE)
+ return;
+
+ vpd->valid = 1;
+ vpd->len = len;
}
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM,
- PCI_DEVICE_ID_NX2_5706,
- quirk_brcm_570x_limit_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM,
- PCI_DEVICE_ID_NX2_5706S,
- quirk_brcm_570x_limit_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM,
- PCI_DEVICE_ID_NX2_5708,
- quirk_brcm_570x_limit_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM,
- PCI_DEVICE_ID_NX2_5708S,
- quirk_brcm_570x_limit_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM,
- PCI_DEVICE_ID_NX2_5709,
- quirk_brcm_570x_limit_vpd);
-DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_BROADCOM,
- PCI_DEVICE_ID_NX2_5709S,
- quirk_brcm_570x_limit_vpd);
static void quirk_chelsio_extend_vpd(struct pci_dev *dev)
{
* limits.
*/
if (chip == 0x0 && prod >= 0x20)
- pci_set_vpd_size(dev, 8192);
+ pci_vpd_set_size(dev, 8192);
else if (chip >= 0x4 && func < 0x8)
- pci_set_vpd_size(dev, 2048);
+ pci_vpd_set_size(dev, 2048);
}
DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID,
spin_unlock(&pcifront_dev_lock);
- if (!err && !swiotlb_nr_tbl()) {
+ if (!err && !is_swiotlb_active()) {
err = pci_xen_swiotlb_init_late();
if (err)
dev_err(&pdev->xdev->dev, "Could not setup SWIOTLB!\n");
mutex_unlock(&s->ops_mutex);
}
-/**
+/*
* set_cis_map() - map the card memory at "card_offset" into virtual space.
*
* If flags & MAP_ATTRIB, map the attribute space, otherwise
#define IS_ATTR 1
#define IS_INDIRECT 8
-/**
+/*
* pcmcia_read_cis_mem() - low-level function to read CIS memory
*
* must be called with ops_mutex held
}
-/**
+/*
* pcmcia_write_cis_mem() - low-level function to write CIS memory
*
* Probably only useful for writing one-byte registers. Must be called
}
-/**
+/*
* read_cis_cache() - read CIS memory or its associated cache
*
* This is a wrapper around read_cis_mem, with the same interface,
}
}
-/**
+/*
* verify_cis_cache() - does the CIS match what is in the CIS cache?
*/
int verify_cis_cache(struct pcmcia_socket *s)
return 0;
}
-/**
+/*
* pcmcia_replace_cis() - use a replacement CIS instead of the card's CIS
*
* For really bad cards, we provide a facility for uploading a
};
/**
- * pcmcia_store_new_id - add a new PCMCIA device ID to this driver and re-probe devices
+ * new_id_store() - add a new PCMCIA device ID to this driver and re-probe devices
* @driver: target device driver
* @buf: buffer for scanning device ID data
* @count: input size
pcmcia_card_remove(p_dev->socket, p_dev);
/* detach the "instance" */
- if (!p_drv)
- return 0;
-
if (p_drv->remove)
p_drv->remove(p_dev);
"pcmcia: driver %s did not release window properly\n",
p_drv->name);
- /* references from pcmcia_probe_device */
+ /* references from pcmcia_device_probe */
pcmcia_put_dev(p_dev);
module_put(p_drv->owner);
}
-/**
+/*
* pcmcia_io_cfg_data_width() - convert cfgtable to data path width parameter
*/
static int pcmcia_io_cfg_data_width(unsigned int flags)
cistpl_cftable_entry_t dflt;
};
-/**
+/*
* pcmcia_do_loop_config() - internal helper for pcmcia_loop_config()
*
* pcmcia_do_loop_config() is the internal callback for the call from
void *priv_data);
};
-/**
+/*
* pcmcia_do_loop_tuple() - internal helper for pcmcia_loop_config()
*
* pcmcia_do_loop_tuple() is the internal callback for the call from
cisdata_t **buf;
};
-/**
+/*
* pcmcia_do_get_tuple() - internal helper for pcmcia_get_tuple()
*
* pcmcia_do_get_tuple() is the internal callback for the call from
EXPORT_SYMBOL(pcmcia_get_tuple);
-/**
+/*
* pcmcia_do_get_mac() - internal helper for pcmcia_get_mac_from_cis()
*
* pcmcia_do_get_mac() is the internal callback for the call from
}
-/**
+/*
* pcmcia_access_config() - read or write card configuration registers
*
* pcmcia_access_config() reads and writes configuration registers in
}
-/**
+/*
* pcmcia_read_config_byte() - read a byte from a card configuration register
*
* pcmcia_read_config_byte() reads a byte from a configuration register in
EXPORT_SYMBOL(pcmcia_read_config_byte);
-/**
+/*
* pcmcia_write_config_byte() - write a byte to a card configuration register
*
* pcmcia_write_config_byte() writes a byte to a configuration register in
/**
* pcmcia_setup_isa_irq() - determine whether an ISA IRQ can be used
- * @p_dev - the associated PCMCIA device
+ * @p_dev: the associated PCMCIA device
+ * @type: IRQ type (flags)
*
* locking note: must be called with ops_mutex locked.
*/
/**
* pcmcia_setup_irq() - determine IRQ to be used for device
- * @p_dev - the associated PCMCIA device
+ * @p_dev: the associated PCMCIA device
*
* locking note: must be called with ops_mutex locked.
*/
/*======================================================================*/
-/**
+/*
* readable() - iomem validation function for cards with a valid CIS
*/
static int readable(struct pcmcia_socket *s, struct resource *res,
return 0;
}
-/**
+/*
* checksum() - iomem validation function for simple memory cards
*/
static int checksum(struct pcmcia_socket *s, struct resource *res,
*/
static int do_validate_mem(struct pcmcia_socket *s,
unsigned long base, unsigned long size,
- int validate (struct pcmcia_socket *s,
- struct resource *res,
- unsigned int *value))
+ int (*validate)(struct pcmcia_socket *s,
+ struct resource *res,
+ unsigned int *value))
{
struct socket_data *s_data = s->resource_data;
struct resource *res1, *res2;
* function returns the size of the usable memory area.
*/
static int do_mem_probe(struct pcmcia_socket *s, u_long base, u_long num,
- int validate (struct pcmcia_socket *s,
- struct resource *res,
- unsigned int *value),
- int fallback (struct pcmcia_socket *s,
- struct resource *res,
- unsigned int *value))
+ int (*validate)(struct pcmcia_socket *s,
+ struct resource *res,
+ unsigned int *value),
+ int (*fallback)(struct pcmcia_socket *s,
+ struct resource *res,
+ unsigned int *value))
{
struct socket_data *s_data = s->resource_data;
u_long i, j, bad, fail, step;
#include <linux/gpio/driver.h>
#include <linux/interrupt.h>
#include <linux/io.h>
+#include <linux/kernel.h>
#include <linux/of_device.h>
#include <linux/of_irq.h>
#include <linux/of_platform.h>
return 0;
}
+#define IF_ENABLED(cfg, ptr) PTR_IF(IS_ENABLED(cfg), (ptr))
+
static const struct of_device_id ingenic_pinctrl_of_match[] = {
{
.compatible = "ingenic,jz4730-pinctrl",
* This mutex must be held while accessing the EMI unit. We can't rely on the
* EC mutex because memmap data may be accessed without it being held.
*/
-static struct mutex io_mutex;
+static DEFINE_MUTEX(io_mutex);
static u16 mec_emi_base, mec_emi_end;
/**
void cros_ec_lpc_mec_init(unsigned int base, unsigned int end)
{
- mutex_init(&io_mutex);
mec_emi_base = base;
mec_emi_end = end;
}
/* Variables keeping track of switch state. */
struct typec_mux_state state;
uint8_t mux_flags;
+ uint8_t role;
/* Port alt modes. */
struct typec_altmode p_altmode[CROS_EC_ALTMODE_MAX];
{
struct cros_typec_port *port = typec->ports[port_num];
+ if (!port->partner)
+ return;
+
cros_typec_unregister_altmodes(typec, port_num, true);
cros_typec_usb_disconnect_state(port);
{
struct cros_typec_port *port = typec->ports[port_num];
+ if (!port->cable)
+ return;
+
cros_typec_unregister_altmodes(typec, port_num, false);
typec_unregister_plug(port->plug);
if (!typec->ports[i])
continue;
- if (typec->ports[i]->partner)
- cros_typec_remove_partner(typec, i);
-
- if (typec->ports[i]->cable)
- cros_typec_remove_cable(typec, i);
+ cros_typec_remove_partner(typec, i);
+ cros_typec_remove_cable(typec, i);
usb_role_switch_put(typec->ports[i]->role_sw);
typec_switch_put(typec->ports[i]->ori_sw);
return -ENOTSUPP;
}
+ if (!pd_ctrl->dp_mode) {
+ dev_err(typec->dev, "No valid DP mode provided.\n");
+ return -EINVAL;
+ }
+
/* Status VDO. */
dp_data.status = DP_STATUS_ENABLED;
if (port->mux_flags & USB_PD_MUX_HPD_IRQ)
"Failed to register partner on port: %d\n",
port_num);
} else {
- if (typec->ports[port_num]->partner)
- cros_typec_remove_partner(typec, port_num);
-
- if (typec->ports[port_num]->cable)
- cros_typec_remove_cable(typec, port_num);
+ cros_typec_remove_partner(typec, port_num);
+ cros_typec_remove_cable(typec, port_num);
}
}
return;
}
+ /* If we got a hard reset, unregister everything and return. */
+ if (resp.events & PD_STATUS_EVENT_HARD_RESET) {
+ cros_typec_remove_partner(typec, port_num);
+ cros_typec_remove_cable(typec, port_num);
+
+ ret = cros_typec_send_clear_event(typec, port_num,
+ PD_STATUS_EVENT_HARD_RESET);
+ if (ret < 0)
+ dev_warn(typec->dev,
+ "Failed hard reset event clear, port: %d\n", port_num);
+ return;
+ }
+
/* Handle any events appropriately. */
if (resp.events & PD_STATUS_EVENT_SOP_DISC_DONE && !typec->ports[port_num]->sop_disc_done) {
u16 sop_revision;
}
/* No change needs to be made, let's exit early. */
- if (typec->ports[port_num]->mux_flags == mux_resp.flags)
+ if (typec->ports[port_num]->mux_flags == mux_resp.flags &&
+ typec->ports[port_num]->role == resp.role)
return 0;
typec->ports[port_num]->mux_flags = mux_resp.flags;
+ typec->ports[port_num]->role = resp.role;
ret = cros_typec_configure_mux(typec, port_num, mux_resp.flags, &resp);
if (ret)
dev_warn(typec->dev, "Configure muxes failed, err = %d\n", ret);
else
typec->pd_ctrl_ver = 0;
- dev_dbg(typec->dev, "PD Control has version mask 0x%hhx\n",
- typec->pd_ctrl_ver);
+ dev_dbg(typec->dev, "PD Control has version mask 0x%02x\n",
+ typec->pd_ctrl_ver & 0xff);
return 0;
}
if (!host_event)
return NOTIFY_DONE;
- if (host_event & EC_HOST_EVENT_MASK(EC_HOST_EVENT_PD_MCU)) {
+ if (host_event & (EC_HOST_EVENT_MASK(EC_HOST_EVENT_PD_MCU) |
+ EC_HOST_EVENT_MASK(EC_HOST_EVENT_USB_MUX))) {
cros_usbpd_get_event_and_notify(pdnotify->dev, ec_dev);
return NOTIFY_OK;
}
sess_data->dev_data = dev_data;
sess_data->has_msg = false;
- nonseekable_open(inode, filp);
+ stream_open(inode, filp);
filp->private_data = sess_data;
return 0;
module_exit(dcdrbu_exit);
module_init(dcdrbu_init);
-
-/* vim:noet:ts=8:sw=8
-*/
To compile this driver as a module, choose M here: the module
will be called pwm-twl-led.
+config PWM_VISCONTI
+ tristate "Toshiba Visconti PWM support"
+ depends on ARCH_VISCONTI || COMPILE_TEST
+ help
+ PWM Subsystem driver support for Toshiba Visconti SoCs.
+
+ To compile this driver as a module, choose M here: the module
+ will be called pwm-visconti.
+
config PWM_VT8500
tristate "vt8500 PWM support"
depends on ARCH_VT8500 || COMPILE_TEST
obj-$(CONFIG_PWM_TIEHRPWM) += pwm-tiehrpwm.o
obj-$(CONFIG_PWM_TWL) += pwm-twl.o
obj-$(CONFIG_PWM_TWL_LED) += pwm-twl-led.o
+obj-$(CONFIG_PWM_VISCONTI) += pwm-visconti.o
obj-$(CONFIG_PWM_VT8500) += pwm-vt8500.o
return radix_tree_lookup(&pwm_tree, pwm);
}
-static int alloc_pwms(int pwm, unsigned int count)
+static int alloc_pwms(unsigned int count)
{
- unsigned int from = 0;
unsigned int start;
- if (pwm >= MAX_PWMS)
- return -EINVAL;
-
- if (pwm >= 0)
- from = pwm;
-
- start = bitmap_find_next_zero_area(allocated_pwms, MAX_PWMS, from,
+ start = bitmap_find_next_zero_area(allocated_pwms, MAX_PWMS, 0,
count, 0);
- if (pwm >= 0 && start != pwm)
- return -EEXIST;
-
if (start + count > MAX_PWMS)
return -ENOSPC;
}
/**
- * pwmchip_add_with_polarity() - register a new PWM chip
+ * pwmchip_add() - register a new PWM chip
* @chip: the PWM chip to add
- * @polarity: initial polarity of PWM channels
*
- * Register a new PWM chip. If chip->base < 0 then a dynamically assigned base
- * will be used. The initial polarity for all channels is specified by the
- * @polarity parameter.
+ * Register a new PWM chip.
*
* Returns: 0 on success or a negative error code on failure.
*/
-int pwmchip_add_with_polarity(struct pwm_chip *chip,
- enum pwm_polarity polarity)
+int pwmchip_add(struct pwm_chip *chip)
{
struct pwm_device *pwm;
unsigned int i;
mutex_lock(&pwm_lock);
- ret = alloc_pwms(chip->base, chip->npwm);
+ ret = alloc_pwms(chip->npwm);
if (ret < 0)
goto out;
+ chip->base = ret;
+
chip->pwms = kcalloc(chip->npwm, sizeof(*pwm), GFP_KERNEL);
if (!chip->pwms) {
ret = -ENOMEM;
goto out;
}
- chip->base = ret;
-
for (i = 0; i < chip->npwm; i++) {
pwm = &chip->pwms[i];
pwm->chip = chip;
pwm->pwm = chip->base + i;
pwm->hwpwm = i;
- pwm->state.polarity = polarity;
radix_tree_insert(&pwm_tree, pwm->pwm, pwm);
}
return ret;
}
-EXPORT_SYMBOL_GPL(pwmchip_add_with_polarity);
-
-/**
- * pwmchip_add() - register a new PWM chip
- * @chip: the PWM chip to add
- *
- * Register a new PWM chip. If chip->base < 0 then a dynamically assigned base
- * will be used. The initial polarity for all channels is normal.
- *
- * Returns: 0 on success or a negative error code on failure.
- */
-int pwmchip_add(struct pwm_chip *chip)
-{
- return pwmchip_add_with_polarity(chip, PWM_POLARITY_NORMAL);
-}
EXPORT_SYMBOL_GPL(pwmchip_add);
/**
*/
if (state->polarity != pwm->state.polarity) {
if (!chip->ops->set_polarity)
- return -ENOTSUPP;
+ return -EINVAL;
/*
* Changing the polarity of a running PWM is
struct pwm_chip chip;
};
-static int ab8500_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
- int duty_ns, int period_ns)
+static int ab8500_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ const struct pwm_state *state)
{
- int ret = 0;
- unsigned int higher_val, lower_val;
+ int ret;
u8 reg;
+ unsigned int higher_val, lower_val;
+
+ if (state->polarity != PWM_POLARITY_NORMAL)
+ return -EINVAL;
+
+ if (!state->enabled) {
+ ret = abx500_mask_and_set_register_interruptible(chip->dev,
+ AB8500_MISC, AB8500_PWM_OUT_CTRL7_REG,
+ 1 << (chip->base - 1), 0);
+
+ if (ret < 0)
+ dev_err(chip->dev, "%s: Failed to disable PWM, Error %d\n",
+ pwm->label, ret);
+ return ret;
+ }
/*
* get the first 8 bits that are be written to
* AB8500_PWM_OUT_CTRL1_REG[0:7]
*/
- lower_val = duty_ns & 0x00FF;
+ lower_val = state->duty_cycle & 0x00FF;
/*
* get bits [9:10] that are to be written to
* AB8500_PWM_OUT_CTRL2_REG[0:1]
*/
- higher_val = ((duty_ns & 0x0300) >> 8);
+ higher_val = ((state->duty_cycle & 0x0300) >> 8);
reg = AB8500_PWM_OUT_CTRL1_REG + ((chip->base - 1) * 2);
reg, (u8)lower_val);
if (ret < 0)
return ret;
+
ret = abx500_set_register_interruptible(chip->dev, AB8500_MISC,
(reg + 1), (u8)higher_val);
-
- return ret;
-}
-
-static int ab8500_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
-{
- int ret;
+ if (ret < 0)
+ return ret;
ret = abx500_mask_and_set_register_interruptible(chip->dev,
AB8500_MISC, AB8500_PWM_OUT_CTRL7_REG,
if (ret < 0)
dev_err(chip->dev, "%s: Failed to enable PWM, Error %d\n",
pwm->label, ret);
- return ret;
-}
-static void ab8500_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
-{
- int ret;
-
- ret = abx500_mask_and_set_register_interruptible(chip->dev,
- AB8500_MISC, AB8500_PWM_OUT_CTRL7_REG,
- 1 << (chip->base - 1), 0);
- if (ret < 0)
- dev_err(chip->dev, "%s: Failed to disable PWM, Error %d\n",
- pwm->label, ret);
+ return ret;
}
static const struct pwm_ops ab8500_pwm_ops = {
- .config = ab8500_pwm_config,
- .enable = ab8500_pwm_enable,
- .disable = ab8500_pwm_disable,
+ .apply = ab8500_pwm_apply,
.owner = THIS_MODULE,
};
ab8500->chip.dev = &pdev->dev;
ab8500->chip.ops = &ab8500_pwm_ops;
- ab8500->chip.base = -1;
ab8500->chip.npwm = 1;
err = pwmchip_add(&ab8500->chip);
chip->hlcdc = hlcdc;
chip->chip.ops = &atmel_hlcdc_pwm_ops;
chip->chip.dev = dev;
- chip->chip.base = -1;
chip->chip.npwm = 1;
chip->chip.of_xlate = of_pwm_xlate_with_flags;
chip->chip.of_pwm_n_cells = 3;
- ret = pwmchip_add_with_polarity(&chip->chip, PWM_POLARITY_INVERSED);
+ ret = pwmchip_add(&chip->chip);
if (ret) {
clk_disable_unprepare(hlcdc->periph_clk);
return ret;
tcbpwm->div = i;
tcbpwm->duty = duty;
- /* If the PWM is enabled, call enable to apply the new conf */
- if (pwm_is_enabled(pwm))
- atmel_tcb_pwm_enable(chip, pwm);
-
return 0;
}
+static int atmel_tcb_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ const struct pwm_state *state)
+{
+ int duty_cycle, period;
+ int ret;
+
+ /* This function only sets a flag in driver data */
+ atmel_tcb_pwm_set_polarity(chip, pwm, state->polarity);
+
+ if (!state->enabled) {
+ atmel_tcb_pwm_disable(chip, pwm);
+ return 0;
+ }
+
+ period = state->period < INT_MAX ? state->period : INT_MAX;
+ duty_cycle = state->duty_cycle < INT_MAX ? state->duty_cycle : INT_MAX;
+
+ ret = atmel_tcb_pwm_config(chip, pwm, duty_cycle, period);
+ if (ret)
+ return ret;
+
+ return atmel_tcb_pwm_enable(chip, pwm);
+}
+
static const struct pwm_ops atmel_tcb_pwm_ops = {
.request = atmel_tcb_pwm_request,
.free = atmel_tcb_pwm_free,
- .config = atmel_tcb_pwm_config,
- .set_polarity = atmel_tcb_pwm_set_polarity,
- .enable = atmel_tcb_pwm_enable,
- .disable = atmel_tcb_pwm_disable,
+ .apply = atmel_tcb_pwm_apply,
.owner = THIS_MODULE,
};
tcbpwm->chip.ops = &atmel_tcb_pwm_ops;
tcbpwm->chip.of_xlate = of_pwm_xlate_with_flags;
tcbpwm->chip.of_pwm_n_cells = 3;
- tcbpwm->chip.base = -1;
tcbpwm->chip.npwm = NPWM;
tcbpwm->channel = channel;
tcbpwm->regmap = regmap;
struct atmel_tcb_pwm_chip *tcbpwm = platform_get_drvdata(pdev);
int err;
- clk_disable_unprepare(tcbpwm->slow_clk);
- clk_put(tcbpwm->slow_clk);
- clk_put(tcbpwm->clk);
-
err = pwmchip_remove(&tcbpwm->chip);
if (err < 0)
return err;
+ clk_disable_unprepare(tcbpwm->slow_clk);
+ clk_put(tcbpwm->slow_clk);
+ clk_put(tcbpwm->clk);
+
return 0;
}
}
static int atmel_pwm_calculate_cprd_and_pres(struct pwm_chip *chip,
+ unsigned long clkrate,
const struct pwm_state *state,
unsigned long *cprd, u32 *pres)
{
int shift;
/* Calculate the period cycles and prescale value */
- cycles *= clk_get_rate(atmel_pwm->clk);
+ cycles *= clkrate;
do_div(cycles, NSEC_PER_SEC);
/*
}
static void atmel_pwm_calculate_cdty(const struct pwm_state *state,
- unsigned long cprd, unsigned long *cdty)
+ unsigned long clkrate, unsigned long cprd,
+ u32 pres, unsigned long *cdty)
{
unsigned long long cycles = state->duty_cycle;
- cycles *= cprd;
- do_div(cycles, state->period);
+ cycles *= clkrate;
+ do_div(cycles, NSEC_PER_SEC);
+ cycles >>= pres;
*cdty = cprd - cycles;
}
pwm_get_state(pwm, &cstate);
if (state->enabled) {
+ unsigned long clkrate = clk_get_rate(atmel_pwm->clk);
+
if (cstate.enabled &&
cstate.polarity == state->polarity &&
cstate.period == state->period) {
+ u32 cmr = atmel_pwm_ch_readl(atmel_pwm, pwm->hwpwm, PWM_CMR);
+
cprd = atmel_pwm_ch_readl(atmel_pwm, pwm->hwpwm,
atmel_pwm->data->regs.period);
- atmel_pwm_calculate_cdty(state, cprd, &cdty);
+ pres = cmr & PWM_CMR_CPRE_MSK;
+
+ atmel_pwm_calculate_cdty(state, clkrate, cprd, pres, &cdty);
atmel_pwm_update_cdty(chip, pwm, cdty);
return 0;
}
- ret = atmel_pwm_calculate_cprd_and_pres(chip, state, &cprd,
+ ret = atmel_pwm_calculate_cprd_and_pres(chip, clkrate, state, &cprd,
&pres);
if (ret) {
dev_err(chip->dev,
return ret;
}
- atmel_pwm_calculate_cdty(state, cprd, &cdty);
+ atmel_pwm_calculate_cdty(state, clkrate, cprd, pres, &cdty);
if (cstate.enabled) {
atmel_pwm_disable(chip, pwm, false);
cdty = atmel_pwm_ch_readl(atmel_pwm, pwm->hwpwm,
atmel_pwm->data->regs.duty);
- tmp = (u64)cdty * NSEC_PER_SEC;
+ tmp = (u64)(cprd - cdty) * NSEC_PER_SEC;
tmp <<= pres;
state->duty_cycle = DIV64_U64_ROUND_UP(tmp, rate);
atmel_pwm->chip.ops = &atmel_pwm_ops;
atmel_pwm->chip.of_xlate = of_pwm_xlate_with_flags;
atmel_pwm->chip.of_pwm_n_cells = 3;
- atmel_pwm->chip.base = -1;
atmel_pwm->chip.npwm = 4;
ret = pwmchip_add(&atmel_pwm->chip);
{
struct atmel_pwm_chip *atmel_pwm = platform_get_drvdata(pdev);
+ pwmchip_remove(&atmel_pwm->chip);
+
clk_unprepare(atmel_pwm->clk);
mutex_destroy(&atmel_pwm->isr_lock);
- return pwmchip_remove(&atmel_pwm->chip);
+ return 0;
}
static struct platform_driver atmel_pwm_driver = {
ip->chip.dev = &pdev->dev;
ip->chip.ops = &iproc_pwm_ops;
- ip->chip.base = -1;
ip->chip.npwm = 4;
ip->chip.of_xlate = of_pwm_xlate_with_flags;
ip->chip.of_pwm_n_cells = 3;
{
struct iproc_pwmc *ip = platform_get_drvdata(pdev);
+ pwmchip_remove(&ip->chip);
+
clk_disable_unprepare(ip->clk);
- return pwmchip_remove(&ip->chip);
+ return 0;
}
static const struct of_device_id bcm_iproc_pwmc_dt[] = {
kp->chip.dev = &pdev->dev;
kp->chip.ops = &kona_pwm_ops;
- kp->chip.base = -1;
kp->chip.npwm = 6;
kp->chip.of_xlate = of_pwm_xlate_with_flags;
kp->chip.of_pwm_n_cells = 3;
clk_disable_unprepare(kp->clk);
- ret = pwmchip_add_with_polarity(&kp->chip, PWM_POLARITY_INVERSED);
+ ret = pwmchip_add(&kp->chip);
if (ret < 0)
dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret);
static int kona_pwmc_remove(struct platform_device *pdev)
{
struct kona_pwmc *kp = platform_get_drvdata(pdev);
- unsigned int chan;
-
- for (chan = 0; chan < kp->chip.npwm; chan++)
- if (pwm_is_enabled(&kp->chip.pwms[chan]))
- clk_disable_unprepare(kp->clk);
return pwmchip_remove(&kp->chip);
}
struct bcm2835_pwm *pc = to_bcm2835_pwm(chip);
unsigned long rate = clk_get_rate(pc->clk);
- unsigned long long period;
- unsigned long scaler;
+ unsigned long long period_cycles;
+ u64 max_period;
+
u32 val;
if (!rate) {
return -EINVAL;
}
- scaler = DIV_ROUND_CLOSEST(NSEC_PER_SEC, rate);
+ /*
+ * period_cycles must be a 32 bit value, so period * rate / NSEC_PER_SEC
+ * must be <= U32_MAX. As U32_MAX * NSEC_PER_SEC < U64_MAX the
+ * multiplication period * rate doesn't overflow.
+ * To calculate the maximal possible period that guarantees the
+ * above inequality:
+ *
+ * round(period * rate / NSEC_PER_SEC) <= U32_MAX
+ * <=> period * rate / NSEC_PER_SEC < U32_MAX + 0.5
+ * <=> period * rate < (U32_MAX + 0.5) * NSEC_PER_SEC
+ * <=> period < ((U32_MAX + 0.5) * NSEC_PER_SEC) / rate
+ * <=> period < ((U32_MAX * NSEC_PER_SEC + NSEC_PER_SEC/2) / rate
+ * <=> period <= ceil((U32_MAX * NSEC_PER_SEC + NSEC_PER_SEC/2) / rate) - 1
+ */
+ max_period = DIV_ROUND_UP_ULL((u64)U32_MAX * NSEC_PER_SEC + NSEC_PER_SEC / 2, rate) - 1;
+
+ if (state->period > max_period)
+ return -EINVAL;
+
/* set period */
- period = DIV_ROUND_CLOSEST_ULL(state->period, scaler);
+ period_cycles = DIV_ROUND_CLOSEST_ULL(state->period * rate, NSEC_PER_SEC);
- /* dont accept a period that is too small or has been truncated */
- if ((period < PERIOD_MIN) || (period > U32_MAX))
+ /* don't accept a period that is too small */
+ if (period_cycles < PERIOD_MIN)
return -EINVAL;
- writel(period, pc->base + PERIOD(pwm->hwpwm));
+ writel(period_cycles, pc->base + PERIOD(pwm->hwpwm));
/* set duty cycle */
- val = DIV_ROUND_CLOSEST_ULL(state->duty_cycle, scaler);
+ val = DIV_ROUND_CLOSEST_ULL(state->duty_cycle * rate, NSEC_PER_SEC);
writel(val, pc->base + DUTY(pwm->hwpwm));
/* set polarity */
pc->chip.dev = &pdev->dev;
pc->chip.ops = &bcm2835_pwm_ops;
- pc->chip.base = -1;
pc->chip.npwm = 2;
pc->chip.of_xlate = of_pwm_xlate_with_flags;
pc->chip.of_pwm_n_cells = 3;
{
struct bcm2835_pwm *pc = platform_get_drvdata(pdev);
+ pwmchip_remove(&pc->chip);
+
clk_disable_unprepare(pc->clk);
- return pwmchip_remove(&pc->chip);
+ return 0;
}
static const struct of_device_id bcm2835_pwm_of_match[] = {
pwm->chip.dev = &pdev->dev;
pwm->chip.ops = &berlin_pwm_ops;
- pwm->chip.base = -1;
pwm->chip.npwm = 4;
pwm->chip.of_xlate = of_pwm_xlate_with_flags;
pwm->chip.of_pwm_n_cells = 3;
p->chip.dev = &pdev->dev;
p->chip.ops = &brcmstb_pwm_ops;
- p->chip.base = -1;
p->chip.npwm = 2;
p->base = devm_platform_ioremap_resource(pdev, 0);
priv->chip.ops = &clps711x_pwm_ops;
priv->chip.dev = &pdev->dev;
- priv->chip.base = -1;
priv->chip.npwm = 2;
priv->chip.of_xlate = clps711x_pwm_xlate;
priv->chip.of_pwm_n_cells = 1;
pwm->chip.dev = &pdev->dev;
pwm->chip.ops = &crc_pwm_ops;
- pwm->chip.base = -1;
pwm->chip.npwm = 1;
/* get the PMIC regmap */
if (state->period != EC_PWM_MAX_DUTY)
return -EINVAL;
+ if (state->polarity != PWM_POLARITY_NORMAL)
+ return -EINVAL;
+
/*
* EC doesn't separate the concept of duty cycle and enabled, but
* kernel does. Translate.
chip->ops = &cros_ec_pwm_ops;
chip->of_xlate = cros_ec_pwm_xlate;
chip->of_pwm_n_cells = 1;
- chip->base = -1;
ret = cros_ec_num_pwms(ec);
if (ret < 0) {
dev_err(dev, "Couldn't find PWMs: %d\n", ret);
dwc->chip.dev = dev;
dwc->chip.ops = &dwc_pwm_ops;
dwc->chip.npwm = DWC_TIMERS_TOTAL;
- dwc->chip.base = -1;
ret = pwmchip_add(&dwc->chip);
if (ret)
ep93xx_pwm->chip.dev = &pdev->dev;
ep93xx_pwm->chip.ops = &ep93xx_pwm_ops;
- ep93xx_pwm->chip.base = -1;
ep93xx_pwm->chip.npwm = 1;
ret = pwmchip_add(&ep93xx_pwm->chip);
fpc->chip.ops = &fsl_pwm_ops;
fpc->chip.of_xlate = of_pwm_xlate_with_flags;
fpc->chip.of_pwm_n_cells = 3;
- fpc->chip.base = -1;
fpc->chip.npwm = 8;
ret = pwmchip_add(&fpc->chip);
pwm_chip->chip.ops = &hibvt_pwm_ops;
pwm_chip->chip.dev = &pdev->dev;
- pwm_chip->chip.base = -1;
pwm_chip->chip.npwm = soc->num_pwms;
pwm_chip->chip.of_xlate = of_pwm_xlate_with_flags;
pwm_chip->chip.of_pwm_n_cells = 3;
pwm->chip.dev = &pdev->dev;
pwm->chip.ops = &img_pwm_ops;
- pwm->chip.base = -1;
pwm->chip.npwm = IMG_PWM_NPWM;
ret = pwmchip_add(&pwm->chip);
tpm->chip.dev = &pdev->dev;
tpm->chip.ops = &imx_tpm_pwm_ops;
- tpm->chip.base = -1;
tpm->chip.of_xlate = of_pwm_xlate_with_flags;
tpm->chip.of_pwm_n_cells = 3;
ret = clk_prepare_enable(tpm->clk);
if (ret)
- dev_err(dev,
- "failed to prepare or enable clock: %d\n",
- ret);
+ dev_err(dev, "failed to prepare or enable clock: %d\n", ret);
return ret;
}
imx->chip.ops = &pwm_imx1_ops;
imx->chip.dev = &pdev->dev;
- imx->chip.base = -1;
imx->chip.npwm = 1;
imx->mmio_base = devm_platform_ioremap_resource(pdev, 0);
imx->chip.ops = &pwm_imx27_ops;
imx->chip.dev = &pdev->dev;
- imx->chip.base = -1;
imx->chip.npwm = 1;
imx->chip.of_xlate = of_pwm_xlate_with_flags;
pc->chip.dev = dev;
pc->chip.ops = &lgm_pwm_ops;
pc->chip.npwm = 1;
- pc->chip.base = -1;
lgm_pwm_init(pc);
iqs620_pwm->chip.dev = &pdev->dev;
iqs620_pwm->chip.ops = &iqs620_pwm_ops;
- iqs620_pwm->chip.base = -1;
iqs620_pwm->chip.npwm = 1;
mutex_init(&iqs620_pwm->lock);
jz4740->chip.dev = dev;
jz4740->chip.ops = &jz4740_pwm_ops;
jz4740->chip.npwm = info->num_pwms;
- jz4740->chip.base = -1;
jz4740->chip.of_xlate = of_pwm_xlate_with_flags;
jz4740->chip.of_pwm_n_cells = 3;
if (ret)
return ret;
- priv->chip.base = -1;
priv->chip.dev = dev;
priv->chip.ops = &keembay_pwm_ops;
priv->chip.npwm = KMB_TOTAL_PWM_CHANNELS;
lp3943_pwm->chip.dev = &pdev->dev;
lp3943_pwm->chip.ops = &lp3943_pwm_ops;
lp3943_pwm->chip.npwm = LP3943_NUM_PWMS;
- lp3943_pwm->chip.base = -1;
platform_set_drvdata(pdev, lp3943_pwm);
lpc18xx_pwm->chip.dev = &pdev->dev;
lpc18xx_pwm->chip.ops = &lpc18xx_pwm_ops;
- lpc18xx_pwm->chip.base = -1;
lpc18xx_pwm->chip.npwm = 16;
lpc18xx_pwm->chip.of_xlate = of_pwm_xlate_with_flags;
lpc18xx_pwm->chip.of_pwm_n_cells = 3;
struct lpc18xx_pwm_chip *lpc18xx_pwm = platform_get_drvdata(pdev);
u32 val;
+ pwmchip_remove(&lpc18xx_pwm->chip);
+
val = lpc18xx_pwm_readl(lpc18xx_pwm, LPC18XX_PWM_CTRL);
lpc18xx_pwm_writel(lpc18xx_pwm, LPC18XX_PWM_CTRL,
val | LPC18XX_PWM_CTRL_HALT);
clk_disable_unprepare(lpc18xx_pwm->pwm_clk);
- return pwmchip_remove(&lpc18xx_pwm->chip);
+ return 0;
}
static struct platform_driver lpc18xx_pwm_driver = {
lpc32xx->chip.dev = &pdev->dev;
lpc32xx->chip.ops = &lpc32xx_pwm_ops;
lpc32xx->chip.npwm = 1;
- lpc32xx->chip.base = -1;
ret = pwmchip_add(&lpc32xx->chip);
if (ret < 0) {
static int lpc32xx_pwm_remove(struct platform_device *pdev)
{
struct lpc32xx_pwm_chip *lpc32xx = platform_get_drvdata(pdev);
- unsigned int i;
-
- for (i = 0; i < lpc32xx->chip.npwm; i++)
- pwm_disable(&lpc32xx->chip.pwms[i]);
return pwmchip_remove(&lpc32xx->chip);
}
lpwm->chip.dev = dev;
lpwm->chip.ops = &pwm_lpss_ops;
- lpwm->chip.base = -1;
lpwm->chip.npwm = info->npwm;
ret = pwmchip_add(&lpwm->chip);
int pwm_lpss_remove(struct pwm_lpss_chip *lpwm)
{
- int i;
-
- for (i = 0; i < lpwm->info->npwm; i++) {
- if (pwm_is_enabled(&lpwm->chip.pwms[i]))
- pm_runtime_put(lpwm->chip.dev);
- }
return pwmchip_remove(&lpwm->chip);
}
EXPORT_SYMBOL_GPL(pwm_lpss_remove);
clk_disable_unprepare(pc->clk_top);
}
-static inline u32 pwm_mediatek_readl(struct pwm_mediatek_chip *chip,
- unsigned int num, unsigned int offset)
-{
- return readl(chip->regs + pwm_mediatek_reg_offset[num] + offset);
-}
-
static inline void pwm_mediatek_writel(struct pwm_mediatek_chip *chip,
unsigned int num, unsigned int offset,
u32 value)
pc->chip.dev = &pdev->dev;
pc->chip.ops = &pwm_mediatek_ops;
- pc->chip.base = -1;
pc->chip.npwm = pc->soc->num_pwms;
ret = pwmchip_add(&pc->chip);
spin_lock_init(&meson->lock);
meson->chip.dev = &pdev->dev;
meson->chip.ops = &meson_pwm_ops;
- meson->chip.base = -1;
meson->chip.npwm = MESON_NUM_PWMS;
meson->chip.of_xlate = of_pwm_xlate_with_flags;
meson->chip.of_pwm_n_cells = 3;
mdp->chip.dev = &pdev->dev;
mdp->chip.ops = &mtk_disp_pwm_ops;
- mdp->chip.base = -1;
mdp->chip.npwm = 1;
ret = pwmchip_add(&mdp->chip);
mxs->chip.ops = &mxs_pwm_ops;
mxs->chip.of_xlate = of_pwm_xlate_with_flags;
mxs->chip.of_pwm_n_cells = 3;
- mxs->chip.base = -1;
ret = of_property_read_u32(np, "fsl,pwm-number", &mxs->chip.npwm);
if (ret < 0) {
omap->chip.dev = &pdev->dev;
omap->chip.ops = &pwm_omap_dmtimer_ops;
- omap->chip.base = -1;
omap->chip.npwm = 1;
omap->chip.of_xlate = of_pwm_xlate_with_flags;
omap->chip.of_pwm_n_cells = 3;
#define PCA9685_PRESCALE_MAX 0xFF /* => min. frequency of 24 Hz */
#define PCA9685_COUNTER_RANGE 4096
-#define PCA9685_DEFAULT_PERIOD 5000000 /* Default period_ns = 1/200 Hz */
#define PCA9685_OSC_CLOCK_MHZ 25 /* Internal oscillator with 25 MHz */
#define PCA9685_NUMREGS 0xFF
#define LED_N_OFF_H(N) (PCA9685_LEDX_OFF_H + (4 * (N)))
#define LED_N_OFF_L(N) (PCA9685_LEDX_OFF_L + (4 * (N)))
+#define REG_ON_H(C) ((C) >= PCA9685_MAXCHAN ? PCA9685_ALL_LED_ON_H : LED_N_ON_H((C)))
+#define REG_ON_L(C) ((C) >= PCA9685_MAXCHAN ? PCA9685_ALL_LED_ON_L : LED_N_ON_L((C)))
+#define REG_OFF_H(C) ((C) >= PCA9685_MAXCHAN ? PCA9685_ALL_LED_OFF_H : LED_N_OFF_H((C)))
+#define REG_OFF_L(C) ((C) >= PCA9685_MAXCHAN ? PCA9685_ALL_LED_OFF_L : LED_N_OFF_L((C)))
+
struct pca9685 {
struct pwm_chip chip;
struct regmap *regmap;
- int period_ns;
#if IS_ENABLED(CONFIG_GPIOLIB)
struct mutex lock;
struct gpio_chip gpio;
return container_of(chip, struct pca9685, chip);
}
+/* Helper function to set the duty cycle ratio to duty/4096 (e.g. duty=2048 -> 50%) */
+static void pca9685_pwm_set_duty(struct pca9685 *pca, int channel, unsigned int duty)
+{
+ if (duty == 0) {
+ /* Set the full OFF bit, which has the highest precedence */
+ regmap_write(pca->regmap, REG_OFF_H(channel), LED_FULL);
+ } else if (duty >= PCA9685_COUNTER_RANGE) {
+ /* Set the full ON bit and clear the full OFF bit */
+ regmap_write(pca->regmap, REG_ON_H(channel), LED_FULL);
+ regmap_write(pca->regmap, REG_OFF_H(channel), 0);
+ } else {
+ /* Set OFF time (clears the full OFF bit) */
+ regmap_write(pca->regmap, REG_OFF_L(channel), duty & 0xff);
+ regmap_write(pca->regmap, REG_OFF_H(channel), (duty >> 8) & 0xf);
+ /* Clear the full ON bit */
+ regmap_write(pca->regmap, REG_ON_H(channel), 0);
+ }
+}
+
+static unsigned int pca9685_pwm_get_duty(struct pca9685 *pca, int channel)
+{
+ unsigned int off_h = 0, val = 0;
+
+ if (WARN_ON(channel >= PCA9685_MAXCHAN)) {
+ /* HW does not support reading state of "all LEDs" channel */
+ return 0;
+ }
+
+ regmap_read(pca->regmap, LED_N_OFF_H(channel), &off_h);
+ if (off_h & LED_FULL) {
+ /* Full OFF bit is set */
+ return 0;
+ }
+
+ regmap_read(pca->regmap, LED_N_ON_H(channel), &val);
+ if (val & LED_FULL) {
+ /* Full ON bit is set */
+ return PCA9685_COUNTER_RANGE;
+ }
+
+ if (regmap_read(pca->regmap, LED_N_OFF_L(channel), &val)) {
+ /* Reset val to 0 in case reading LED_N_OFF_L failed */
+ val = 0;
+ }
+ return ((off_h & 0xf) << 8) | (val & 0xff);
+}
+
#if IS_ENABLED(CONFIG_GPIOLIB)
static bool pca9685_pwm_test_and_set_inuse(struct pca9685 *pca, int pwm_idx)
{
static int pca9685_pwm_gpio_get(struct gpio_chip *gpio, unsigned int offset)
{
struct pca9685 *pca = gpiochip_get_data(gpio);
- struct pwm_device *pwm = &pca->chip.pwms[offset];
- unsigned int value;
-
- regmap_read(pca->regmap, LED_N_ON_H(pwm->hwpwm), &value);
- return value & LED_FULL;
+ return pca9685_pwm_get_duty(pca, offset) != 0;
}
static void pca9685_pwm_gpio_set(struct gpio_chip *gpio, unsigned int offset,
int value)
{
struct pca9685 *pca = gpiochip_get_data(gpio);
- struct pwm_device *pwm = &pca->chip.pwms[offset];
- unsigned int on = value ? LED_FULL : 0;
- /* Clear both OFF registers */
- regmap_write(pca->regmap, LED_N_OFF_L(pwm->hwpwm), 0);
- regmap_write(pca->regmap, LED_N_OFF_H(pwm->hwpwm), 0);
-
- /* Set the full ON bit */
- regmap_write(pca->regmap, LED_N_ON_H(pwm->hwpwm), on);
+ pca9685_pwm_set_duty(pca, offset, value ? PCA9685_COUNTER_RANGE : 0);
}
static void pca9685_pwm_gpio_free(struct gpio_chip *gpio, unsigned int offset)
{
struct pca9685 *pca = gpiochip_get_data(gpio);
- pca9685_pwm_gpio_set(gpio, offset, 0);
+ pca9685_pwm_set_duty(pca, offset, 0);
pm_runtime_put(pca->chip.dev);
pca9685_pwm_clear_inuse(pca, offset);
}
}
}
-static int pca9685_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
- int duty_ns, int period_ns)
+static int pca9685_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ const struct pwm_state *state)
{
struct pca9685 *pca = to_pca(chip);
- unsigned long long duty;
- unsigned int reg;
- int prescale;
-
- if (period_ns != pca->period_ns) {
- prescale = DIV_ROUND_CLOSEST(PCA9685_OSC_CLOCK_MHZ * period_ns,
- PCA9685_COUNTER_RANGE * 1000) - 1;
-
- if (prescale >= PCA9685_PRESCALE_MIN &&
- prescale <= PCA9685_PRESCALE_MAX) {
- /*
- * Putting the chip briefly into SLEEP mode
- * at this point won't interfere with the
- * pm_runtime framework, because the pm_runtime
- * state is guaranteed active here.
- */
- /* Put chip into sleep mode */
- pca9685_set_sleep_mode(pca, true);
-
- /* Change the chip-wide output frequency */
- regmap_write(pca->regmap, PCA9685_PRESCALE, prescale);
-
- /* Wake the chip up */
- pca9685_set_sleep_mode(pca, false);
-
- pca->period_ns = period_ns;
- } else {
- dev_err(chip->dev,
- "prescaler not set: period out of bounds!\n");
- return -EINVAL;
- }
- }
+ unsigned long long duty, prescale;
+ unsigned int val = 0;
- if (duty_ns < 1) {
- if (pwm->hwpwm >= PCA9685_MAXCHAN)
- reg = PCA9685_ALL_LED_OFF_H;
- else
- reg = LED_N_OFF_H(pwm->hwpwm);
+ if (state->polarity != PWM_POLARITY_NORMAL)
+ return -EINVAL;
- regmap_write(pca->regmap, reg, LED_FULL);
-
- return 0;
+ prescale = DIV_ROUND_CLOSEST_ULL(PCA9685_OSC_CLOCK_MHZ * state->period,
+ PCA9685_COUNTER_RANGE * 1000) - 1;
+ if (prescale < PCA9685_PRESCALE_MIN || prescale > PCA9685_PRESCALE_MAX) {
+ dev_err(chip->dev, "pwm not changed: period out of bounds!\n");
+ return -EINVAL;
}
- if (duty_ns == period_ns) {
- /* Clear both OFF registers */
- if (pwm->hwpwm >= PCA9685_MAXCHAN)
- reg = PCA9685_ALL_LED_OFF_L;
- else
- reg = LED_N_OFF_L(pwm->hwpwm);
-
- regmap_write(pca->regmap, reg, 0x0);
-
- if (pwm->hwpwm >= PCA9685_MAXCHAN)
- reg = PCA9685_ALL_LED_OFF_H;
- else
- reg = LED_N_OFF_H(pwm->hwpwm);
-
- regmap_write(pca->regmap, reg, 0x0);
-
- /* Set the full ON bit */
- if (pwm->hwpwm >= PCA9685_MAXCHAN)
- reg = PCA9685_ALL_LED_ON_H;
- else
- reg = LED_N_ON_H(pwm->hwpwm);
-
- regmap_write(pca->regmap, reg, LED_FULL);
-
+ if (!state->enabled) {
+ pca9685_pwm_set_duty(pca, pwm->hwpwm, 0);
return 0;
}
- duty = PCA9685_COUNTER_RANGE * (unsigned long long)duty_ns;
- duty = DIV_ROUND_UP_ULL(duty, period_ns);
-
- if (pwm->hwpwm >= PCA9685_MAXCHAN)
- reg = PCA9685_ALL_LED_OFF_L;
- else
- reg = LED_N_OFF_L(pwm->hwpwm);
-
- regmap_write(pca->regmap, reg, (int)duty & 0xff);
-
- if (pwm->hwpwm >= PCA9685_MAXCHAN)
- reg = PCA9685_ALL_LED_OFF_H;
- else
- reg = LED_N_OFF_H(pwm->hwpwm);
-
- regmap_write(pca->regmap, reg, ((int)duty >> 8) & 0xf);
+ regmap_read(pca->regmap, PCA9685_PRESCALE, &val);
+ if (prescale != val) {
+ /*
+ * Putting the chip briefly into SLEEP mode
+ * at this point won't interfere with the
+ * pm_runtime framework, because the pm_runtime
+ * state is guaranteed active here.
+ */
+ /* Put chip into sleep mode */
+ pca9685_set_sleep_mode(pca, true);
- /* Clear the full ON bit, otherwise the set OFF time has no effect */
- if (pwm->hwpwm >= PCA9685_MAXCHAN)
- reg = PCA9685_ALL_LED_ON_H;
- else
- reg = LED_N_ON_H(pwm->hwpwm);
+ /* Change the chip-wide output frequency */
+ regmap_write(pca->regmap, PCA9685_PRESCALE, prescale);
- regmap_write(pca->regmap, reg, 0);
+ /* Wake the chip up */
+ pca9685_set_sleep_mode(pca, false);
+ }
+ duty = PCA9685_COUNTER_RANGE * state->duty_cycle;
+ duty = DIV_ROUND_UP_ULL(duty, state->period);
+ pca9685_pwm_set_duty(pca, pwm->hwpwm, duty);
return 0;
}
-static int pca9685_pwm_enable(struct pwm_chip *chip, struct pwm_device *pwm)
+static void pca9685_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+ struct pwm_state *state)
{
struct pca9685 *pca = to_pca(chip);
- unsigned int reg;
-
- /*
- * The PWM subsystem does not support a pre-delay.
- * So, set the ON-timeout to 0
- */
- if (pwm->hwpwm >= PCA9685_MAXCHAN)
- reg = PCA9685_ALL_LED_ON_L;
- else
- reg = LED_N_ON_L(pwm->hwpwm);
-
- regmap_write(pca->regmap, reg, 0);
-
- if (pwm->hwpwm >= PCA9685_MAXCHAN)
- reg = PCA9685_ALL_LED_ON_H;
- else
- reg = LED_N_ON_H(pwm->hwpwm);
-
- regmap_write(pca->regmap, reg, 0);
+ unsigned long long duty;
+ unsigned int val = 0;
+ /* Calculate (chip-wide) period from prescale value */
+ regmap_read(pca->regmap, PCA9685_PRESCALE, &val);
/*
- * Clear the full-off bit.
- * It has precedence over the others and must be off.
+ * PCA9685_OSC_CLOCK_MHZ is 25, i.e. an integer divider of 1000.
+ * The following calculation is therefore only a multiplication
+ * and we are not losing precision.
*/
- if (pwm->hwpwm >= PCA9685_MAXCHAN)
- reg = PCA9685_ALL_LED_OFF_H;
- else
- reg = LED_N_OFF_H(pwm->hwpwm);
-
- regmap_update_bits(pca->regmap, reg, LED_FULL, 0x0);
+ state->period = (PCA9685_COUNTER_RANGE * 1000 / PCA9685_OSC_CLOCK_MHZ) *
+ (val + 1);
- return 0;
-}
+ /* The (per-channel) polarity is fixed */
+ state->polarity = PWM_POLARITY_NORMAL;
-static void pca9685_pwm_disable(struct pwm_chip *chip, struct pwm_device *pwm)
-{
- struct pca9685 *pca = to_pca(chip);
- unsigned int reg;
-
- if (pwm->hwpwm >= PCA9685_MAXCHAN)
- reg = PCA9685_ALL_LED_OFF_H;
- else
- reg = LED_N_OFF_H(pwm->hwpwm);
-
- regmap_write(pca->regmap, reg, LED_FULL);
-
- /* Clear the LED_OFF counter. */
- if (pwm->hwpwm >= PCA9685_MAXCHAN)
- reg = PCA9685_ALL_LED_OFF_L;
- else
- reg = LED_N_OFF_L(pwm->hwpwm);
+ if (pwm->hwpwm >= PCA9685_MAXCHAN) {
+ /*
+ * The "all LEDs" channel does not support HW readout
+ * Return 0 and disabled for backwards compatibility
+ */
+ state->duty_cycle = 0;
+ state->enabled = false;
+ return;
+ }
- regmap_write(pca->regmap, reg, 0x0);
+ state->enabled = true;
+ duty = pca9685_pwm_get_duty(pca, pwm->hwpwm);
+ state->duty_cycle = DIV_ROUND_DOWN_ULL(duty * state->period, PCA9685_COUNTER_RANGE);
}
static int pca9685_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
{
struct pca9685 *pca = to_pca(chip);
- pca9685_pwm_disable(chip, pwm);
+ pca9685_pwm_set_duty(pca, pwm->hwpwm, 0);
pm_runtime_put(chip->dev);
pca9685_pwm_clear_inuse(pca, pwm->hwpwm);
}
static const struct pwm_ops pca9685_pwm_ops = {
- .enable = pca9685_pwm_enable,
- .disable = pca9685_pwm_disable,
- .config = pca9685_pwm_config,
+ .apply = pca9685_pwm_apply,
+ .get_state = pca9685_pwm_get_state,
.request = pca9685_pwm_request,
.free = pca9685_pwm_free,
.owner = THIS_MODULE,
ret);
return ret;
}
- pca->period_ns = PCA9685_DEFAULT_PERIOD;
i2c_set_clientdata(client, pca);
reg &= ~(MODE1_ALLCALL | MODE1_SUB1 | MODE1_SUB2 | MODE1_SUB3);
regmap_write(pca->regmap, PCA9685_MODE1, reg);
- /* Clear all "full off" bits */
- regmap_write(pca->regmap, PCA9685_ALL_LED_OFF_L, 0);
- regmap_write(pca->regmap, PCA9685_ALL_LED_OFF_H, 0);
+ /* Reset OFF registers to POR default */
+ regmap_write(pca->regmap, PCA9685_ALL_LED_OFF_L, LED_FULL);
+ regmap_write(pca->regmap, PCA9685_ALL_LED_OFF_H, LED_FULL);
pca->chip.ops = &pca9685_pwm_ops;
/* Add an extra channel for ALL_LED */
pca->chip.npwm = PCA9685_MAXCHAN + 1;
pca->chip.dev = &client->dev;
- pca->chip.base = -1;
ret = pwmchip_add(&pca->chip);
if (ret < 0)
return ret;
}
- /* The chip comes out of power-up in the active state */
- pm_runtime_set_active(&client->dev);
- /*
- * Enable will put the chip into suspend, which is what we
- * want as all outputs are disabled at this point
- */
pm_runtime_enable(&client->dev);
+ if (pm_runtime_enabled(&client->dev)) {
+ /*
+ * Although the chip comes out of power-up in the sleep state,
+ * we force it to sleep in case it was woken up before
+ */
+ pca9685_set_sleep_mode(pca, true);
+ pm_runtime_set_suspended(&client->dev);
+ } else {
+ /* Wake the chip up if runtime PM is disabled */
+ pca9685_set_sleep_mode(pca, false);
+ }
+
return 0;
}
ret = pwmchip_remove(&pca->chip);
if (ret)
return ret;
+
+ if (!pm_runtime_enabled(&client->dev)) {
+ /* Put chip in sleep state if runtime PM is disabled */
+ pca9685_set_sleep_mode(pca, true);
+ }
+
pm_runtime_disable(&client->dev);
+
return 0;
}
pwm->chip.dev = &pdev->dev;
pwm->chip.ops = &pxa_pwm_ops;
- pwm->chip.base = -1;
pwm->chip.npwm = (id->driver_data & HAS_SECONDARY_PWM) ? 2 : 1;
if (IS_ENABLED(CONFIG_OF)) {
rcar_pwm->chip.dev = &pdev->dev;
rcar_pwm->chip.ops = &rcar_pwm_ops;
- rcar_pwm->chip.base = -1;
rcar_pwm->chip.npwm = 1;
pm_runtime_enable(&pdev->dev);
tpu->chip.ops = &tpu_pwm_ops;
tpu->chip.of_xlate = of_pwm_xlate_with_flags;
tpu->chip.of_pwm_n_cells = 3;
- tpu->chip.base = -1;
tpu->chip.npwm = TPU_CHANNEL_MAX;
pm_runtime_enable(&pdev->dev);
pc->data = id->data;
pc->chip.dev = &pdev->dev;
pc->chip.ops = &rockchip_pwm_ops;
- pc->chip.base = -1;
pc->chip.npwm = 1;
if (pc->data->supports_polarity) {
chip->chip.dev = &pdev->dev;
chip->chip.ops = &pwm_samsung_ops;
- chip->chip.base = -1;
chip->chip.npwm = SAMSUNG_PWM_NUM;
chip->inverter_mask = BIT(SAMSUNG_PWM_NUM) - 1;
chip->ops = &pwm_sifive_ops;
chip->of_xlate = of_pwm_xlate_with_flags;
chip->of_pwm_n_cells = 3;
- chip->base = -1;
chip->npwm = 4;
ddata->regs = devm_platform_ioremap_resource(pdev, 0);
chip = &priv->pwm_chip;
chip->dev = &pdev->dev;
chip->ops = &sl28cpld_pwm_ops;
- chip->base = -1;
chip->npwm = 1;
platform_set_drvdata(pdev, priv);
pc->chip.dev = &pdev->dev;
pc->chip.ops = &spear_pwm_ops;
- pc->chip.base = -1;
pc->chip.npwm = NUM_PWM;
ret = clk_prepare(pc->clk);
struct pwm_state *cstate = &pwm->state;
int ret;
+ if (state->polarity != PWM_POLARITY_NORMAL)
+ return -EINVAL;
+
if (state->enabled) {
if (!cstate->enabled) {
/*
spc->chip.dev = &pdev->dev;
spc->chip.ops = &sprd_pwm_ops;
- spc->chip.base = -1;
spc->chip.npwm = spc->num_pwms;
ret = pwmchip_add(&spc->chip);
pc->chip.dev = dev;
pc->chip.ops = &sti_pwm_ops;
- pc->chip.base = -1;
pc->chip.npwm = pc->cdata->pwm_num_devs;
ret = pwmchip_add(&pc->chip);
static int sti_pwm_remove(struct platform_device *pdev)
{
struct sti_pwm_chip *pc = platform_get_drvdata(pdev);
- unsigned int i;
- for (i = 0; i < pc->cdata->pwm_num_devs; i++)
- pwm_disable(&pc->chip.pwms[i]);
+ pwmchip_remove(&pc->chip);
clk_unprepare(pc->pwm_clk);
clk_unprepare(pc->cpt_clk);
- return pwmchip_remove(&pc->chip);
+ return 0;
}
static const struct of_device_id sti_pwm_of_match[] = {
priv->regmap = ddata->regmap;
priv->clk = ddata->clk;
- priv->chip.base = -1;
priv->chip.dev = &pdev->dev;
priv->chip.ops = &stm32_pwm_lp_ops;
priv->chip.npwm = 1;
stm32_pwm_detect_complementary(priv);
- priv->chip.base = -1;
priv->chip.dev = dev;
priv->chip.ops = &stm32pwm_ops;
priv->chip.npwm = stm32_pwm_detect_channels(priv);
pwm->stmpe = stmpe;
pwm->chip.dev = &pdev->dev;
- pwm->chip.base = -1;
if (stmpe->partnum == STMPE2401 || stmpe->partnum == STMPE2403) {
pwm->chip.ops = &stmpe_24xx_pwm_ops;
pwm->chip.dev = &pdev->dev;
pwm->chip.ops = &sun4i_pwm_ops;
- pwm->chip.base = -1;
pwm->chip.npwm = pwm->data->npwm;
pwm->chip.of_xlate = of_pwm_xlate_with_flags;
pwm->chip.of_pwm_n_cells = 3;
pwm->chip.dev = &pdev->dev;
pwm->chip.ops = &tegra_pwm_ops;
- pwm->chip.base = -1;
pwm->chip.npwm = pwm->soc->num_channels;
ret = pwmchip_add(&pwm->chip);
pc->chip.ops = &ecap_pwm_ops;
pc->chip.of_xlate = of_pwm_xlate_with_flags;
pc->chip.of_pwm_n_cells = 3;
- pc->chip.base = -1;
pc->chip.npwm = 1;
pc->mmio_base = devm_platform_ioremap_resource(pdev, 0);
pc->chip.ops = &ehrpwm_pwm_ops;
pc->chip.of_xlate = of_pwm_xlate_with_flags;
pc->chip.of_pwm_n_cells = 3;
- pc->chip.base = -1;
pc->chip.npwm = NUM_PWM_CHANNEL;
pc->mmio_base = devm_platform_ioremap_resource(pdev, 0);
}
twl->chip.dev = &pdev->dev;
- twl->chip.base = -1;
mutex_init(&twl->mutex);
twl->chip.ops = &twl6030_pwm_ops;
twl->chip.dev = &pdev->dev;
- twl->chip.base = -1;
twl->chip.npwm = 2;
mutex_init(&twl->mutex);
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Toshiba Visconti pulse-width-modulation controller driver
+ *
+ * Copyright (c) 2020 - 2021 TOSHIBA CORPORATION
+ * Copyright (c) 2020 - 2021 Toshiba Electronic Devices & Storage Corporation
+ *
+ * Authors: Nobuhiro Iwamatsu <nobuhiro1.iwamatsu@toshiba.co.jp>
+ *
+ * Limitations:
+ * - The fixed input clock is running at 1 MHz and is divided by either 1,
+ * 2, 4 or 8.
+ * - When the settings of the PWM are modified, the new values are shadowed
+ * in hardware until the PIPGM_PCSR register is written and the currently
+ * running period is completed. This way the hardware switches atomically
+ * from the old setting to the new.
+ * - Disabling the hardware completes the currently running period and keeps
+ * the output at low level at all times.
+ */
+
+#include <linux/err.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pwm.h>
+
+#define PIPGM_PCSR(ch) (0x400 + 4 * (ch))
+#define PIPGM_PDUT(ch) (0x420 + 4 * (ch))
+#define PIPGM_PWMC(ch) (0x440 + 4 * (ch))
+
+#define PIPGM_PWMC_PWMACT BIT(5)
+#define PIPGM_PWMC_CLK_MASK GENMASK(1, 0)
+#define PIPGM_PWMC_POLARITY_MASK GENMASK(5, 5)
+
+struct visconti_pwm_chip {
+ struct pwm_chip chip;
+ void __iomem *base;
+};
+
+static inline struct visconti_pwm_chip *visconti_pwm_from_chip(struct pwm_chip *chip)
+{
+ return container_of(chip, struct visconti_pwm_chip, chip);
+}
+
+static int visconti_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
+ const struct pwm_state *state)
+{
+ struct visconti_pwm_chip *priv = visconti_pwm_from_chip(chip);
+ u32 period, duty_cycle, pwmc0;
+
+ if (!state->enabled) {
+ writel(0, priv->base + PIPGM_PCSR(pwm->hwpwm));
+ return 0;
+ }
+
+ /*
+ * The biggest period the hardware can provide is
+ * (0xffff << 3) * 1000 ns
+ * This value fits easily in an u32, so simplify the maths by
+ * capping the values to 32 bit integers.
+ */
+ if (state->period > (0xffff << 3) * 1000)
+ period = (0xffff << 3) * 1000;
+ else
+ period = state->period;
+
+ if (state->duty_cycle > period)
+ duty_cycle = period;
+ else
+ duty_cycle = state->duty_cycle;
+
+ /*
+ * The input clock runs fixed at 1 MHz, so we have only
+ * microsecond resolution and so can divide by
+ * NSEC_PER_SEC / CLKFREQ = 1000 without losing precision.
+ */
+ period /= 1000;
+ duty_cycle /= 1000;
+
+ if (!period)
+ return -ERANGE;
+
+ /*
+ * PWMC controls a divider that divides the input clk by a
+ * power of two between 1 and 8. As a smaller divider yields
+ * higher precision, pick the smallest possible one.
+ */
+ if (period > 0xffff) {
+ pwmc0 = ilog2(period >> 16);
+ if (WARN_ON(pwmc0 > 3))
+ return -EINVAL;
+ } else {
+ pwmc0 = 0;
+ }
+
+ period >>= pwmc0;
+ duty_cycle >>= pwmc0;
+
+ if (state->polarity == PWM_POLARITY_INVERSED)
+ pwmc0 |= PIPGM_PWMC_PWMACT;
+ writel(pwmc0, priv->base + PIPGM_PWMC(pwm->hwpwm));
+ writel(duty_cycle, priv->base + PIPGM_PDUT(pwm->hwpwm));
+ writel(period, priv->base + PIPGM_PCSR(pwm->hwpwm));
+
+ return 0;
+}
+
+static void visconti_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
+ struct pwm_state *state)
+{
+ struct visconti_pwm_chip *priv = visconti_pwm_from_chip(chip);
+ u32 period, duty, pwmc0, pwmc0_clk;
+
+ period = readl(priv->base + PIPGM_PCSR(pwm->hwpwm));
+ duty = readl(priv->base + PIPGM_PDUT(pwm->hwpwm));
+ pwmc0 = readl(priv->base + PIPGM_PWMC(pwm->hwpwm));
+ pwmc0_clk = pwmc0 & PIPGM_PWMC_CLK_MASK;
+
+ state->period = (period << pwmc0_clk) * NSEC_PER_USEC;
+ state->duty_cycle = (duty << pwmc0_clk) * NSEC_PER_USEC;
+ if (pwmc0 & PIPGM_PWMC_POLARITY_MASK)
+ state->polarity = PWM_POLARITY_INVERSED;
+ else
+ state->polarity = PWM_POLARITY_NORMAL;
+
+ state->enabled = true;
+}
+
+static const struct pwm_ops visconti_pwm_ops = {
+ .apply = visconti_pwm_apply,
+ .get_state = visconti_pwm_get_state,
+ .owner = THIS_MODULE,
+};
+
+static int visconti_pwm_probe(struct platform_device *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct visconti_pwm_chip *priv;
+ int ret;
+
+ priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ priv->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(priv->base))
+ return PTR_ERR(priv->base);
+
+ platform_set_drvdata(pdev, priv);
+
+ priv->chip.dev = dev;
+ priv->chip.ops = &visconti_pwm_ops;
+ priv->chip.npwm = 4;
+
+ ret = pwmchip_add(&priv->chip);
+ if (ret < 0)
+ return dev_err_probe(&pdev->dev, ret, "Cannot register visconti PWM\n");
+
+ return 0;
+}
+
+static int visconti_pwm_remove(struct platform_device *pdev)
+{
+ struct visconti_pwm_chip *priv = platform_get_drvdata(pdev);
+
+ pwmchip_remove(&priv->chip);
+
+ return 0;
+}
+
+static const struct of_device_id visconti_pwm_of_match[] = {
+ { .compatible = "toshiba,visconti-pwm", },
+ { }
+};
+MODULE_DEVICE_TABLE(of, visconti_pwm_of_match);
+
+static struct platform_driver visconti_pwm_driver = {
+ .driver = {
+ .name = "pwm-visconti",
+ .of_match_table = visconti_pwm_of_match,
+ },
+ .probe = visconti_pwm_probe,
+ .remove = visconti_pwm_remove,
+};
+module_platform_driver(visconti_pwm_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Nobuhiro Iwamatsu <nobuhiro1.iwamatsu@toshiba.co.jp>");
+MODULE_ALIAS("platform:pwm-visconti");
chip->chip.ops = &vt8500_pwm_ops;
chip->chip.of_xlate = of_pwm_xlate_with_flags;
chip->chip.of_pwm_n_cells = 3;
- chip->chip.base = -1;
chip->chip.npwm = VT8500_NR_PWMS;
chip->clk = devm_clk_get(&pdev->dev, NULL);
It's safe to say N if you don't want to use this interface.
config IMX_REMOTEPROC
- tristate "IMX6/7 remoteproc support"
+ tristate "i.MX remoteproc support"
depends on ARCH_MXC
+ select MAILBOX
help
- Say y here to support iMX's remote processors (Cortex M4
- on iMX7D) via the remote processor framework.
+ Say y here to support iMX's remote processors via the remote
+ processor framework.
It's safe to say N here.
#include <linux/err.h>
#include <linux/interrupt.h>
#include <linux/kernel.h>
+#include <linux/mailbox_client.h>
#include <linux/mfd/syscon.h>
#include <linux/module.h>
#include <linux/of_address.h>
+#include <linux/of_reserved_mem.h>
#include <linux/of_device.h>
#include <linux/platform_device.h>
#include <linux/regmap.h>
#include <linux/remoteproc.h>
+#include <linux/workqueue.h>
+
+#include "remoteproc_internal.h"
#define IMX7D_SRC_SCR 0x0C
#define IMX7D_ENABLE_M4 BIT(3)
| IMX6SX_SW_M4C_NON_SCLR_RST \
| IMX6SX_SW_M4C_RST)
-#define IMX7D_RPROC_MEM_MAX 8
+#define IMX_RPROC_MEM_MAX 32
/**
* struct imx_rproc_mem - slim internal memory structure
struct regmap *regmap;
struct rproc *rproc;
const struct imx_rproc_dcfg *dcfg;
- struct imx_rproc_mem mem[IMX7D_RPROC_MEM_MAX];
+ struct imx_rproc_mem mem[IMX_RPROC_MEM_MAX];
struct clk *clk;
+ struct mbox_client cl;
+ struct mbox_chan *tx_ch;
+ struct mbox_chan *rx_ch;
+ struct work_struct rproc_work;
+ struct workqueue_struct *workqueue;
+ void __iomem *rsc_table;
+};
+
+static const struct imx_rproc_att imx_rproc_att_imx8mq[] = {
+ /* dev addr , sys addr , size , flags */
+ /* TCML - alias */
+ { 0x00000000, 0x007e0000, 0x00020000, 0 },
+ /* OCRAM_S */
+ { 0x00180000, 0x00180000, 0x00008000, 0 },
+ /* OCRAM */
+ { 0x00900000, 0x00900000, 0x00020000, 0 },
+ /* OCRAM */
+ { 0x00920000, 0x00920000, 0x00020000, 0 },
+ /* QSPI Code - alias */
+ { 0x08000000, 0x08000000, 0x08000000, 0 },
+ /* DDR (Code) - alias */
+ { 0x10000000, 0x80000000, 0x0FFE0000, 0 },
+ /* TCML */
+ { 0x1FFE0000, 0x007E0000, 0x00020000, ATT_OWN },
+ /* TCMU */
+ { 0x20000000, 0x00800000, 0x00020000, ATT_OWN },
+ /* OCRAM_S */
+ { 0x20180000, 0x00180000, 0x00008000, ATT_OWN },
+ /* OCRAM */
+ { 0x20200000, 0x00900000, 0x00020000, ATT_OWN },
+ /* OCRAM */
+ { 0x20220000, 0x00920000, 0x00020000, ATT_OWN },
+ /* DDR (Data) */
+ { 0x40000000, 0x40000000, 0x80000000, 0 },
};
static const struct imx_rproc_att imx_rproc_att_imx7d[] = {
{ 0x80000000, 0x80000000, 0x60000000, 0 },
};
+static const struct imx_rproc_dcfg imx_rproc_cfg_imx8mq = {
+ .src_reg = IMX7D_SRC_SCR,
+ .src_mask = IMX7D_M4_RST_MASK,
+ .src_start = IMX7D_M4_START,
+ .src_stop = IMX7D_M4_STOP,
+ .att = imx_rproc_att_imx8mq,
+ .att_size = ARRAY_SIZE(imx_rproc_att_imx8mq),
+};
+
static const struct imx_rproc_dcfg imx_rproc_cfg_imx7d = {
.src_reg = IMX7D_SRC_SCR,
.src_mask = IMX7D_M4_RST_MASK,
return -ENOENT;
}
-static void *imx_rproc_da_to_va(struct rproc *rproc, u64 da, size_t len)
+static void *imx_rproc_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iomem)
{
struct imx_rproc *priv = rproc->priv;
void *va = NULL;
if (imx_rproc_da_to_sys(priv, da, len, &sys))
return NULL;
- for (i = 0; i < IMX7D_RPROC_MEM_MAX; i++) {
+ for (i = 0; i < IMX_RPROC_MEM_MAX; i++) {
if (sys >= priv->mem[i].sys_addr && sys + len <
priv->mem[i].sys_addr + priv->mem[i].size) {
unsigned int offset = sys - priv->mem[i].sys_addr;
return va;
}
+static int imx_rproc_mem_alloc(struct rproc *rproc,
+ struct rproc_mem_entry *mem)
+{
+ struct device *dev = rproc->dev.parent;
+ void *va;
+
+ dev_dbg(dev, "map memory: %p+%zx\n", &mem->dma, mem->len);
+ va = ioremap_wc(mem->dma, mem->len);
+ if (IS_ERR_OR_NULL(va)) {
+ dev_err(dev, "Unable to map memory region: %p+%zx\n",
+ &mem->dma, mem->len);
+ return -ENOMEM;
+ }
+
+ /* Update memory entry va */
+ mem->va = va;
+
+ return 0;
+}
+
+static int imx_rproc_mem_release(struct rproc *rproc,
+ struct rproc_mem_entry *mem)
+{
+ dev_dbg(rproc->dev.parent, "unmap memory: %pa\n", &mem->dma);
+ iounmap(mem->va);
+
+ return 0;
+}
+
+static int imx_rproc_prepare(struct rproc *rproc)
+{
+ struct imx_rproc *priv = rproc->priv;
+ struct device_node *np = priv->dev->of_node;
+ struct of_phandle_iterator it;
+ struct rproc_mem_entry *mem;
+ struct reserved_mem *rmem;
+ u32 da;
+
+ /* Register associated reserved memory regions */
+ of_phandle_iterator_init(&it, np, "memory-region", NULL, 0);
+ while (of_phandle_iterator_next(&it) == 0) {
+ /*
+ * Ignore the first memory region which will be used vdev buffer.
+ * No need to do extra handlings, rproc_add_virtio_dev will handle it.
+ */
+ if (!strcmp(it.node->name, "vdev0buffer"))
+ continue;
+
+ rmem = of_reserved_mem_lookup(it.node);
+ if (!rmem) {
+ dev_err(priv->dev, "unable to acquire memory-region\n");
+ return -EINVAL;
+ }
+
+ /* No need to translate pa to da, i.MX use same map */
+ da = rmem->base;
+
+ /* Register memory region */
+ mem = rproc_mem_entry_init(priv->dev, NULL, (dma_addr_t)rmem->base, rmem->size, da,
+ imx_rproc_mem_alloc, imx_rproc_mem_release,
+ it.node->name);
+
+ if (mem)
+ rproc_coredump_add_segment(rproc, da, rmem->size);
+ else
+ return -ENOMEM;
+
+ rproc_add_carveout(rproc, mem);
+ }
+
+ return 0;
+}
+
+static int imx_rproc_parse_fw(struct rproc *rproc, const struct firmware *fw)
+{
+ int ret;
+
+ ret = rproc_elf_load_rsc_table(rproc, fw);
+ if (ret)
+ dev_info(&rproc->dev, "No resource table in elf\n");
+
+ return 0;
+}
+
+static void imx_rproc_kick(struct rproc *rproc, int vqid)
+{
+ struct imx_rproc *priv = rproc->priv;
+ int err;
+ __u32 mmsg;
+
+ if (!priv->tx_ch) {
+ dev_err(priv->dev, "No initialized mbox tx channel\n");
+ return;
+ }
+
+ /*
+ * Send the index of the triggered virtqueue as the mu payload.
+ * Let remote processor know which virtqueue is used.
+ */
+ mmsg = vqid << 16;
+
+ err = mbox_send_message(priv->tx_ch, (void *)&mmsg);
+ if (err < 0)
+ dev_err(priv->dev, "%s: failed (%d, err:%d)\n",
+ __func__, vqid, err);
+}
+
+static int imx_rproc_attach(struct rproc *rproc)
+{
+ return 0;
+}
+
+static struct resource_table *imx_rproc_get_loaded_rsc_table(struct rproc *rproc, size_t *table_sz)
+{
+ struct imx_rproc *priv = rproc->priv;
+
+ /* The resource table has already been mapped in imx_rproc_addr_init */
+ if (!priv->rsc_table)
+ return NULL;
+
+ *table_sz = SZ_1K;
+ return (struct resource_table *)priv->rsc_table;
+}
+
static const struct rproc_ops imx_rproc_ops = {
+ .prepare = imx_rproc_prepare,
+ .attach = imx_rproc_attach,
.start = imx_rproc_start,
.stop = imx_rproc_stop,
+ .kick = imx_rproc_kick,
.da_to_va = imx_rproc_da_to_va,
+ .load = rproc_elf_load_segments,
+ .parse_fw = imx_rproc_parse_fw,
+ .find_loaded_rsc_table = rproc_elf_find_loaded_rsc_table,
+ .get_loaded_rsc_table = imx_rproc_get_loaded_rsc_table,
+ .sanity_check = rproc_elf_sanity_check,
+ .get_boot_addr = rproc_elf_get_boot_addr,
};
static int imx_rproc_addr_init(struct imx_rproc *priv,
if (!(att->flags & ATT_OWN))
continue;
- if (b >= IMX7D_RPROC_MEM_MAX)
+ if (b >= IMX_RPROC_MEM_MAX)
break;
priv->mem[b].cpu_addr = devm_ioremap(&pdev->dev,
att->sa, att->size);
if (!priv->mem[b].cpu_addr) {
- dev_err(dev, "devm_ioremap_resource failed\n");
+ dev_err(dev, "failed to remap %#x bytes from %#x\n", att->size, att->sa);
return -ENOMEM;
}
priv->mem[b].sys_addr = att->sa;
struct resource res;
node = of_parse_phandle(np, "memory-region", a);
+ /* Not map vdev region */
+ if (!strcmp(node->name, "vdev"))
+ continue;
err = of_address_to_resource(node, 0, &res);
if (err) {
dev_err(dev, "unable to resolve memory region\n");
return err;
}
- if (b >= IMX7D_RPROC_MEM_MAX)
+ of_node_put(node);
+
+ if (b >= IMX_RPROC_MEM_MAX)
break;
- priv->mem[b].cpu_addr = devm_ioremap_resource(&pdev->dev, &res);
- if (IS_ERR(priv->mem[b].cpu_addr)) {
- dev_err(dev, "devm_ioremap_resource failed\n");
- err = PTR_ERR(priv->mem[b].cpu_addr);
- return err;
+ /* Not use resource version, because we might share region */
+ priv->mem[b].cpu_addr = devm_ioremap(&pdev->dev, res.start, resource_size(&res));
+ if (!priv->mem[b].cpu_addr) {
+ dev_err(dev, "failed to remap %pr\n", &res);
+ return -ENOMEM;
}
priv->mem[b].sys_addr = res.start;
priv->mem[b].size = resource_size(&res);
+ if (!strcmp(node->name, "rsc_table"))
+ priv->rsc_table = priv->mem[b].cpu_addr;
b++;
}
return 0;
}
+static void imx_rproc_vq_work(struct work_struct *work)
+{
+ struct imx_rproc *priv = container_of(work, struct imx_rproc,
+ rproc_work);
+
+ rproc_vq_interrupt(priv->rproc, 0);
+ rproc_vq_interrupt(priv->rproc, 1);
+}
+
+static void imx_rproc_rx_callback(struct mbox_client *cl, void *msg)
+{
+ struct rproc *rproc = dev_get_drvdata(cl->dev);
+ struct imx_rproc *priv = rproc->priv;
+
+ queue_work(priv->workqueue, &priv->rproc_work);
+}
+
+static int imx_rproc_xtr_mbox_init(struct rproc *rproc)
+{
+ struct imx_rproc *priv = rproc->priv;
+ struct device *dev = priv->dev;
+ struct mbox_client *cl;
+ int ret;
+
+ if (!of_get_property(dev->of_node, "mbox-names", NULL))
+ return 0;
+
+ cl = &priv->cl;
+ cl->dev = dev;
+ cl->tx_block = true;
+ cl->tx_tout = 100;
+ cl->knows_txdone = false;
+ cl->rx_callback = imx_rproc_rx_callback;
+
+ priv->tx_ch = mbox_request_channel_byname(cl, "tx");
+ if (IS_ERR(priv->tx_ch)) {
+ ret = PTR_ERR(priv->tx_ch);
+ return dev_err_probe(cl->dev, ret,
+ "failed to request tx mailbox channel: %d\n", ret);
+ }
+
+ priv->rx_ch = mbox_request_channel_byname(cl, "rx");
+ if (IS_ERR(priv->rx_ch)) {
+ mbox_free_channel(priv->tx_ch);
+ ret = PTR_ERR(priv->rx_ch);
+ return dev_err_probe(cl->dev, ret,
+ "failed to request rx mailbox channel: %d\n", ret);
+ }
+
+ return 0;
+}
+
+static void imx_rproc_free_mbox(struct rproc *rproc)
+{
+ struct imx_rproc *priv = rproc->priv;
+
+ mbox_free_channel(priv->tx_ch);
+ mbox_free_channel(priv->rx_ch);
+}
+
+static int imx_rproc_detect_mode(struct imx_rproc *priv)
+{
+ const struct imx_rproc_dcfg *dcfg = priv->dcfg;
+ struct device *dev = priv->dev;
+ int ret;
+ u32 val;
+
+ ret = regmap_read(priv->regmap, dcfg->src_reg, &val);
+ if (ret) {
+ dev_err(dev, "Failed to read src\n");
+ return ret;
+ }
+
+ if (!(val & dcfg->src_stop))
+ priv->rproc->state = RPROC_DETACHED;
+
+ return 0;
+}
+
static int imx_rproc_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
priv->dev = dev;
dev_set_drvdata(dev, rproc);
+ priv->workqueue = create_workqueue(dev_name(dev));
+ if (!priv->workqueue) {
+ dev_err(dev, "cannot create workqueue\n");
+ ret = -ENOMEM;
+ goto err_put_rproc;
+ }
+
+ ret = imx_rproc_xtr_mbox_init(rproc);
+ if (ret)
+ goto err_put_wkq;
ret = imx_rproc_addr_init(priv, pdev);
if (ret) {
dev_err(dev, "failed on imx_rproc_addr_init\n");
- goto err_put_rproc;
+ goto err_put_mbox;
}
+ ret = imx_rproc_detect_mode(priv);
+ if (ret)
+ goto err_put_mbox;
+
priv->clk = devm_clk_get(dev, NULL);
if (IS_ERR(priv->clk)) {
dev_err(dev, "Failed to get clock\n");
ret = PTR_ERR(priv->clk);
- goto err_put_rproc;
+ goto err_put_mbox;
}
/*
ret = clk_prepare_enable(priv->clk);
if (ret) {
dev_err(&rproc->dev, "Failed to enable clock\n");
- goto err_put_rproc;
+ goto err_put_mbox;
}
+ INIT_WORK(&priv->rproc_work, imx_rproc_vq_work);
+
ret = rproc_add(rproc);
if (ret) {
dev_err(dev, "rproc_add failed\n");
err_put_clk:
clk_disable_unprepare(priv->clk);
+err_put_mbox:
+ imx_rproc_free_mbox(rproc);
+err_put_wkq:
+ destroy_workqueue(priv->workqueue);
err_put_rproc:
rproc_free(rproc);
clk_disable_unprepare(priv->clk);
rproc_del(rproc);
+ imx_rproc_free_mbox(rproc);
rproc_free(rproc);
return 0;
static const struct of_device_id imx_rproc_of_match[] = {
{ .compatible = "fsl,imx7d-cm4", .data = &imx_rproc_cfg_imx7d },
{ .compatible = "fsl,imx6sx-cm4", .data = &imx_rproc_cfg_imx6sx },
+ { .compatible = "fsl,imx8mq-cm4", .data = &imx_rproc_cfg_imx8mq },
+ { .compatible = "fsl,imx8mm-cm4", .data = &imx_rproc_cfg_imx8mq },
{},
};
MODULE_DEVICE_TABLE(of, imx_rproc_of_match);
module_platform_driver(imx_rproc_driver);
MODULE_LICENSE("GPL v2");
-MODULE_DESCRIPTION("IMX6SX/7D remote processor control driver");
+MODULE_DESCRIPTION("i.MX remote processor control driver");
MODULE_AUTHOR("Oleksij Rempel <o.rempel@pengutronix.de>");
writel(vqid, vpu->aux_base + REG_CORE_MSG);
}
-static void *ingenic_rproc_da_to_va(struct rproc *rproc, u64 da, size_t len)
+static void *ingenic_rproc_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iomem)
{
struct vpu *vpu = rproc->priv;
void __iomem *va = NULL;
* can be used either by the remoteproc core for loading (when using kernel
* remoteproc loader), or by any rpmsg bus drivers.
*/
-static void *keystone_rproc_da_to_va(struct rproc *rproc, u64 da, size_t len)
+static void *keystone_rproc_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iomem)
{
struct keystone_rproc *ksproc = rproc->priv;
void __iomem *va = NULL;
}
/* grab the kernel address for this device address */
- ptr = (void __iomem *)rproc_da_to_va(rproc, da, memsz);
+ ptr = (void __iomem *)rproc_da_to_va(rproc, da, memsz, NULL);
if (!ptr) {
dev_err(dev, "bad phdr da 0x%x mem 0x%x\n", da, memsz);
ret = -EINVAL;
return NULL;
}
-static void *scp_da_to_va(struct rproc *rproc, u64 da, size_t len)
+static void *scp_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iomem)
{
struct mtk_scp *scp = (struct mtk_scp *)rproc->priv;
{
void *ptr;
- ptr = scp_da_to_va(scp->rproc, mem_addr, 0);
+ ptr = scp_da_to_va(scp->rproc, mem_addr, 0, NULL);
if (!ptr)
return ERR_PTR(-EINVAL);
* Return: translated virtual address in kernel memory space on success,
* or NULL on failure.
*/
-static void *omap_rproc_da_to_va(struct rproc *rproc, u64 da, size_t len)
+static void *omap_rproc_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iomem)
{
struct omap_rproc *oproc = rproc->priv;
int i;
return 0;
}
-DEFINE_SIMPLE_ATTRIBUTE(pru_rproc_debug_ss_fops, pru_rproc_debug_ss_get,
- pru_rproc_debug_ss_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(pru_rproc_debug_ss_fops, pru_rproc_debug_ss_get,
+ pru_rproc_debug_ss_set, "%llu\n");
/*
* Create PRU-specific debugfs entries
static void pru_dispose_irq_mapping(struct pru_rproc *pru)
{
- while (pru->evt_count--) {
+ if (!pru->mapped_irq)
+ return;
+
+ while (pru->evt_count) {
+ pru->evt_count--;
if (pru->mapped_irq[pru->evt_count] > 0)
irq_dispose_mapping(pru->mapped_irq[pru->evt_count]);
}
kfree(pru->mapped_irq);
+ pru->mapped_irq = NULL;
}
/*
struct pru_rproc *pru = rproc->priv;
struct pru_irq_rsc *rsc = pru->pru_interrupt_map;
struct irq_fwspec fwspec;
- struct device_node *irq_parent;
+ struct device_node *parent, *irq_parent;
int i, ret = 0;
/* not having pru_interrupt_map is not an error */
pru->evt_count = rsc->num_evts;
pru->mapped_irq = kcalloc(pru->evt_count, sizeof(unsigned int),
GFP_KERNEL);
- if (!pru->mapped_irq)
+ if (!pru->mapped_irq) {
+ pru->evt_count = 0;
return -ENOMEM;
+ }
/*
* parse and fill in system event to interrupt channel and
- * channel-to-host mapping
+ * channel-to-host mapping. The interrupt controller to be used
+ * for these mappings for a given PRU remoteproc is always its
+ * corresponding sibling PRUSS INTC node.
*/
- irq_parent = of_irq_find_parent(pru->dev->of_node);
+ parent = of_get_parent(dev_of_node(pru->dev));
+ if (!parent) {
+ kfree(pru->mapped_irq);
+ pru->mapped_irq = NULL;
+ pru->evt_count = 0;
+ return -ENODEV;
+ }
+
+ irq_parent = of_get_child_by_name(parent, "interrupt-controller");
+ of_node_put(parent);
if (!irq_parent) {
kfree(pru->mapped_irq);
+ pru->mapped_irq = NULL;
+ pru->evt_count = 0;
return -ENODEV;
}
pru->mapped_irq[i] = irq_create_fwspec_mapping(&fwspec);
if (!pru->mapped_irq[i]) {
- dev_err(dev, "failed to get virq\n");
- ret = pru->mapped_irq[i];
+ dev_err(dev, "failed to get virq for fw mapping %d: event %d chnl %d host %d\n",
+ i, fwspec.param[0], fwspec.param[1],
+ fwspec.param[2]);
+ ret = -EINVAL;
goto map_fail;
}
}
+ of_node_put(irq_parent);
return ret;
map_fail:
pru_dispose_irq_mapping(pru);
+ of_node_put(irq_parent);
return ret;
}
pru_control_write_reg(pru, PRU_CTRL_CTRL, val);
/* dispose irq mapping - new firmware can provide new mapping */
- if (pru->mapped_irq)
- pru_dispose_irq_mapping(pru);
+ pru_dispose_irq_mapping(pru);
return 0;
}
* core for any PRU client drivers. The PRU Instruction RAM access is restricted
* only to the PRU loader code.
*/
-static void *pru_rproc_da_to_va(struct rproc *rproc, u64 da, size_t len)
+static void *pru_rproc_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iomem)
{
struct pru_rproc *pru = rproc->priv;
return ret;
}
-static void *adsp_da_to_va(struct rproc *rproc, u64 da, size_t len)
+static void *adsp_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iomem)
{
struct qcom_adsp *adsp = (struct qcom_adsp *)rproc->priv;
int offset;
goto release_firmware;
}
+ if (phdr->p_filesz > phdr->p_memsz) {
+ dev_err(qproc->dev,
+ "refusing to load segment %d with p_filesz > p_memsz\n",
+ i);
+ ret = -EINVAL;
+ goto release_firmware;
+ }
+
ptr = memremap(qproc->mpss_phys + offset, phdr->p_memsz, MEMREMAP_WC);
if (!ptr) {
dev_err(qproc->dev,
goto release_firmware;
}
+ if (seg_fw->size != phdr->p_filesz) {
+ dev_err(qproc->dev,
+ "failed to load segment %d from truncated file %s\n",
+ i, fw_name);
+ ret = -EINVAL;
+ release_firmware(seg_fw);
+ memunmap(ptr);
+ goto release_firmware;
+ }
+
release_firmware(seg_fw);
}
mba_image = desc->hexagon_mba_image;
ret = of_property_read_string_index(pdev->dev.of_node, "firmware-name",
0, &mba_image);
- if (ret < 0 && ret != -EINVAL)
+ if (ret < 0 && ret != -EINVAL) {
+ dev_err(&pdev->dev, "unable to read mba firmware-name\n");
return ret;
+ }
rproc = rproc_alloc(&pdev->dev, pdev->name, &q6v5_ops,
mba_image, sizeof(*qproc));
qproc->hexagon_mdt_image = "modem.mdt";
ret = of_property_read_string_index(pdev->dev.of_node, "firmware-name",
1, &qproc->hexagon_mdt_image);
- if (ret < 0 && ret != -EINVAL)
+ if (ret < 0 && ret != -EINVAL) {
+ dev_err(&pdev->dev, "unable to read mpss firmware-name\n");
goto free_rproc;
+ }
platform_set_drvdata(pdev, qproc);
return ret;
}
-static void *adsp_da_to_va(struct rproc *rproc, u64 da, size_t len)
+static void *adsp_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iomem)
{
struct qcom_adsp *adsp = (struct qcom_adsp *)rproc->priv;
int offset;
.ssctl_id = 0x12,
};
+static const struct adsp_data sdx55_mpss_resource = {
+ .crash_reason_smem = 421,
+ .firmware_name = "modem.mdt",
+ .pas_id = 4,
+ .has_aggre2_clk = false,
+ .auto_boot = true,
+ .proxy_pd_names = (char*[]){
+ "cx",
+ "mss",
+ NULL
+ },
+ .ssr_name = "mpss",
+ .sysmon_name = "modem",
+ .ssctl_id = 0x22,
+};
+
static const struct of_device_id adsp_of_match[] = {
{ .compatible = "qcom,msm8974-adsp-pil", .data = &adsp_resource_init},
{ .compatible = "qcom,msm8996-adsp-pil", .data = &adsp_resource_init},
{ .compatible = "qcom,sc7180-mpss-pas", .data = &mpss_resource_init},
{ .compatible = "qcom,sdm845-adsp-pas", .data = &adsp_resource_init},
{ .compatible = "qcom,sdm845-cdsp-pas", .data = &cdsp_resource_init},
+ { .compatible = "qcom,sdx55-mpss-pas", .data = &sdx55_mpss_resource},
{ .compatible = "qcom,sm8150-adsp-pas", .data = &sm8150_adsp_resource},
{ .compatible = "qcom,sm8150-cdsp-pas", .data = &sm8150_cdsp_resource},
{ .compatible = "qcom,sm8150-mpss-pas", .data = &mpss_resource_init},
* Copyright (C) 2014 Sony Mobile Communications AB
* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved.
*/
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/io.h>
#include <linux/iopoll.h>
#include <linux/kernel.h>
#include <linux/mfd/syscon.h>
#include <linux/module.h>
+#include <linux/of_address.h>
#include <linux/of_reserved_mem.h>
#include <linux/platform_device.h>
#include <linux/regmap.h>
+#include <linux/regulator/consumer.h>
#include <linux/reset.h>
#include <linux/soc/qcom/mdt_loader.h>
#include "qcom_common.h"
#define Q6SS_GFMUX_CTL_REG 0x020
#define Q6SS_PWR_CTL_REG 0x030
#define Q6SS_MEM_PWR_CTL 0x0B0
+#define Q6SS_STRAP_ACC 0x110
+#define Q6SS_CGC_OVERRIDE 0x034
+#define Q6SS_BCR_REG 0x6000
/* AXI Halt Register Offsets */
#define AXI_HALTREQ_REG 0x0
#define Q6SS_CORE_ARES BIT(1)
#define Q6SS_BUS_ARES_ENABLE BIT(2)
+/* Q6SS_BRC_RESET */
+#define Q6SS_BRC_BLK_ARES BIT(0)
+
/* Q6SS_GFMUX_CTL */
#define Q6SS_CLK_ENABLE BIT(1)
+#define Q6SS_SWITCH_CLK_SRC BIT(8)
/* Q6SS_PWR_CTL */
#define Q6SS_L2DATA_STBY_N BIT(18)
#define Q6SS_SLP_RET_N BIT(19)
#define Q6SS_CLAMP_IO BIT(20)
#define QDSS_BHS_ON BIT(21)
+#define QDSS_Q6_MEMORIES GENMASK(15, 0)
/* Q6SS parameters */
#define Q6SS_LDO_BYP BIT(25)
#define Q6SS_CLAMP_QMC_MEM BIT(22)
#define HALT_CHECK_MAX_LOOPS 200
#define Q6SS_XO_CBCR GENMASK(5, 3)
+#define Q6SS_SLEEP_CBCR GENMASK(5, 2)
/* Q6SS config/status registers */
#define TCSR_GLOBAL_CFG0 0x0
#define TCSR_WCSS_CLK_MASK 0x1F
#define TCSR_WCSS_CLK_ENABLE 0x14
+#define MAX_HALT_REG 3
+enum {
+ WCSS_IPQ8074,
+ WCSS_QCS404,
+};
+
+struct wcss_data {
+ const char *firmware_name;
+ unsigned int crash_reason_smem;
+ u32 version;
+ bool aon_reset_required;
+ bool wcss_q6_reset_required;
+ const char *ssr_name;
+ const char *sysmon_name;
+ int ssctl_id;
+ const struct rproc_ops *ops;
+ bool requires_force_stop;
+};
+
struct q6v5_wcss {
struct device *dev;
u32 halt_wcss;
u32 halt_nc;
+ struct clk *xo;
+ struct clk *ahbfabric_cbcr_clk;
+ struct clk *gcc_abhs_cbcr;
+ struct clk *gcc_axim_cbcr;
+ struct clk *lcc_csr_cbcr;
+ struct clk *ahbs_cbcr;
+ struct clk *tcm_slave_cbcr;
+ struct clk *qdsp6ss_abhm_cbcr;
+ struct clk *qdsp6ss_sleep_cbcr;
+ struct clk *qdsp6ss_axim_cbcr;
+ struct clk *qdsp6ss_xo_cbcr;
+ struct clk *qdsp6ss_core_gfmux;
+ struct clk *lcc_bcr_sleep;
+ struct regulator *cx_supply;
+ struct qcom_sysmon *sysmon;
+
struct reset_control *wcss_aon_reset;
struct reset_control *wcss_reset;
struct reset_control *wcss_q6_reset;
+ struct reset_control *wcss_q6_bcr_reset;
struct qcom_q6v5 q6v5;
void *mem_region;
size_t mem_size;
+ unsigned int crash_reason_smem;
+ u32 version;
+ bool requires_force_stop;
+
struct qcom_rproc_glink glink_subdev;
struct qcom_rproc_ssr ssr_subdev;
};
return ret;
}
+static int q6v5_wcss_qcs404_power_on(struct q6v5_wcss *wcss)
+{
+ unsigned long val;
+ int ret, idx;
+
+ /* Toggle the restart */
+ reset_control_assert(wcss->wcss_reset);
+ usleep_range(200, 300);
+ reset_control_deassert(wcss->wcss_reset);
+ usleep_range(200, 300);
+
+ /* Enable GCC_WDSP_Q6SS_AHBS_CBCR clock */
+ ret = clk_prepare_enable(wcss->gcc_abhs_cbcr);
+ if (ret)
+ return ret;
+
+ /* Remove reset to the WCNSS QDSP6SS */
+ reset_control_deassert(wcss->wcss_q6_bcr_reset);
+
+ /* Enable Q6SSTOP_AHBFABRIC_CBCR clock */
+ ret = clk_prepare_enable(wcss->ahbfabric_cbcr_clk);
+ if (ret)
+ goto disable_gcc_abhs_cbcr_clk;
+
+ /* Enable the LCCCSR CBC clock, Q6SSTOP_Q6SSTOP_LCC_CSR_CBCR clock */
+ ret = clk_prepare_enable(wcss->lcc_csr_cbcr);
+ if (ret)
+ goto disable_ahbfabric_cbcr_clk;
+
+ /* Enable the Q6AHBS CBC, Q6SSTOP_Q6SS_AHBS_CBCR clock */
+ ret = clk_prepare_enable(wcss->ahbs_cbcr);
+ if (ret)
+ goto disable_csr_cbcr_clk;
+
+ /* Enable the TCM slave CBC, Q6SSTOP_Q6SS_TCM_SLAVE_CBCR clock */
+ ret = clk_prepare_enable(wcss->tcm_slave_cbcr);
+ if (ret)
+ goto disable_ahbs_cbcr_clk;
+
+ /* Enable the Q6SS AHB master CBC, Q6SSTOP_Q6SS_AHBM_CBCR clock */
+ ret = clk_prepare_enable(wcss->qdsp6ss_abhm_cbcr);
+ if (ret)
+ goto disable_tcm_slave_cbcr_clk;
+
+ /* Enable the Q6SS AXI master CBC, Q6SSTOP_Q6SS_AXIM_CBCR clock */
+ ret = clk_prepare_enable(wcss->qdsp6ss_axim_cbcr);
+ if (ret)
+ goto disable_abhm_cbcr_clk;
+
+ /* Enable the Q6SS XO CBC */
+ val = readl(wcss->reg_base + Q6SS_XO_CBCR);
+ val |= BIT(0);
+ writel(val, wcss->reg_base + Q6SS_XO_CBCR);
+ /* Read CLKOFF bit to go low indicating CLK is enabled */
+ ret = readl_poll_timeout(wcss->reg_base + Q6SS_XO_CBCR,
+ val, !(val & BIT(31)), 1,
+ HALT_CHECK_MAX_LOOPS);
+ if (ret) {
+ dev_err(wcss->dev,
+ "xo cbcr enabling timed out (rc:%d)\n", ret);
+ return ret;
+ }
+
+ writel(0, wcss->reg_base + Q6SS_CGC_OVERRIDE);
+
+ /* Enable QDSP6 sleep clock clock */
+ val = readl(wcss->reg_base + Q6SS_SLEEP_CBCR);
+ val |= BIT(0);
+ writel(val, wcss->reg_base + Q6SS_SLEEP_CBCR);
+
+ /* Enable the Enable the Q6 AXI clock, GCC_WDSP_Q6SS_AXIM_CBCR*/
+ ret = clk_prepare_enable(wcss->gcc_axim_cbcr);
+ if (ret)
+ goto disable_sleep_cbcr_clk;
+
+ /* Assert resets, stop core */
+ val = readl(wcss->reg_base + Q6SS_RESET_REG);
+ val |= Q6SS_CORE_ARES | Q6SS_BUS_ARES_ENABLE | Q6SS_STOP_CORE;
+ writel(val, wcss->reg_base + Q6SS_RESET_REG);
+
+ /* Program the QDSP6SS PWR_CTL register */
+ writel(0x01700000, wcss->reg_base + Q6SS_PWR_CTL_REG);
+
+ writel(0x03700000, wcss->reg_base + Q6SS_PWR_CTL_REG);
+
+ writel(0x03300000, wcss->reg_base + Q6SS_PWR_CTL_REG);
+
+ writel(0x033C0000, wcss->reg_base + Q6SS_PWR_CTL_REG);
+
+ /*
+ * Enable memories by turning on the QDSP6 memory foot/head switch, one
+ * bank at a time to avoid in-rush current
+ */
+ for (idx = 28; idx >= 0; idx--) {
+ writel((readl(wcss->reg_base + Q6SS_MEM_PWR_CTL) |
+ (1 << idx)), wcss->reg_base + Q6SS_MEM_PWR_CTL);
+ }
+
+ writel(0x031C0000, wcss->reg_base + Q6SS_PWR_CTL_REG);
+ writel(0x030C0000, wcss->reg_base + Q6SS_PWR_CTL_REG);
+
+ val = readl(wcss->reg_base + Q6SS_RESET_REG);
+ val &= ~Q6SS_CORE_ARES;
+ writel(val, wcss->reg_base + Q6SS_RESET_REG);
+
+ /* Enable the Q6 core clock at the GFM, Q6SSTOP_QDSP6SS_GFMUX_CTL */
+ val = readl(wcss->reg_base + Q6SS_GFMUX_CTL_REG);
+ val |= Q6SS_CLK_ENABLE | Q6SS_SWITCH_CLK_SRC;
+ writel(val, wcss->reg_base + Q6SS_GFMUX_CTL_REG);
+
+ /* Enable sleep clock branch needed for BCR circuit */
+ ret = clk_prepare_enable(wcss->lcc_bcr_sleep);
+ if (ret)
+ goto disable_core_gfmux_clk;
+
+ return 0;
+
+disable_core_gfmux_clk:
+ val = readl(wcss->reg_base + Q6SS_GFMUX_CTL_REG);
+ val &= ~(Q6SS_CLK_ENABLE | Q6SS_SWITCH_CLK_SRC);
+ writel(val, wcss->reg_base + Q6SS_GFMUX_CTL_REG);
+ clk_disable_unprepare(wcss->gcc_axim_cbcr);
+disable_sleep_cbcr_clk:
+ val = readl(wcss->reg_base + Q6SS_SLEEP_CBCR);
+ val &= ~Q6SS_CLK_ENABLE;
+ writel(val, wcss->reg_base + Q6SS_SLEEP_CBCR);
+ val = readl(wcss->reg_base + Q6SS_XO_CBCR);
+ val &= ~Q6SS_CLK_ENABLE;
+ writel(val, wcss->reg_base + Q6SS_XO_CBCR);
+ clk_disable_unprepare(wcss->qdsp6ss_axim_cbcr);
+disable_abhm_cbcr_clk:
+ clk_disable_unprepare(wcss->qdsp6ss_abhm_cbcr);
+disable_tcm_slave_cbcr_clk:
+ clk_disable_unprepare(wcss->tcm_slave_cbcr);
+disable_ahbs_cbcr_clk:
+ clk_disable_unprepare(wcss->ahbs_cbcr);
+disable_csr_cbcr_clk:
+ clk_disable_unprepare(wcss->lcc_csr_cbcr);
+disable_ahbfabric_cbcr_clk:
+ clk_disable_unprepare(wcss->ahbfabric_cbcr_clk);
+disable_gcc_abhs_cbcr_clk:
+ clk_disable_unprepare(wcss->gcc_abhs_cbcr);
+
+ return ret;
+}
+
+static inline int q6v5_wcss_qcs404_reset(struct q6v5_wcss *wcss)
+{
+ unsigned long val;
+
+ writel(0x80800000, wcss->reg_base + Q6SS_STRAP_ACC);
+
+ /* Start core execution */
+ val = readl(wcss->reg_base + Q6SS_RESET_REG);
+ val &= ~Q6SS_STOP_CORE;
+ writel(val, wcss->reg_base + Q6SS_RESET_REG);
+
+ return 0;
+}
+
+static int q6v5_qcs404_wcss_start(struct rproc *rproc)
+{
+ struct q6v5_wcss *wcss = rproc->priv;
+ int ret;
+
+ ret = clk_prepare_enable(wcss->xo);
+ if (ret)
+ return ret;
+
+ ret = regulator_enable(wcss->cx_supply);
+ if (ret)
+ goto disable_xo_clk;
+
+ qcom_q6v5_prepare(&wcss->q6v5);
+
+ ret = q6v5_wcss_qcs404_power_on(wcss);
+ if (ret) {
+ dev_err(wcss->dev, "wcss clk_enable failed\n");
+ goto disable_cx_supply;
+ }
+
+ writel(rproc->bootaddr >> 4, wcss->reg_base + Q6SS_RST_EVB);
+
+ q6v5_wcss_qcs404_reset(wcss);
+
+ ret = qcom_q6v5_wait_for_start(&wcss->q6v5, 5 * HZ);
+ if (ret == -ETIMEDOUT) {
+ dev_err(wcss->dev, "start timed out\n");
+ goto disable_cx_supply;
+ }
+
+ return 0;
+
+disable_cx_supply:
+ regulator_disable(wcss->cx_supply);
+disable_xo_clk:
+ clk_disable_unprepare(wcss->xo);
+
+ return ret;
+}
+
static void q6v5_wcss_halt_axi_port(struct q6v5_wcss *wcss,
struct regmap *halt_map,
u32 offset)
regmap_write(halt_map, offset + AXI_HALTREQ_REG, 0);
}
+static int q6v5_qcs404_wcss_shutdown(struct q6v5_wcss *wcss)
+{
+ unsigned long val;
+ int ret;
+
+ q6v5_wcss_halt_axi_port(wcss, wcss->halt_map, wcss->halt_wcss);
+
+ /* assert clamps to avoid MX current inrush */
+ val = readl(wcss->reg_base + Q6SS_PWR_CTL_REG);
+ val |= (Q6SS_CLAMP_IO | Q6SS_CLAMP_WL | Q6SS_CLAMP_QMC_MEM);
+ writel(val, wcss->reg_base + Q6SS_PWR_CTL_REG);
+
+ /* Disable memories by turning off memory foot/headswitch */
+ writel((readl(wcss->reg_base + Q6SS_MEM_PWR_CTL) &
+ ~QDSS_Q6_MEMORIES),
+ wcss->reg_base + Q6SS_MEM_PWR_CTL);
+
+ /* Clear the BHS_ON bit */
+ val = readl(wcss->reg_base + Q6SS_PWR_CTL_REG);
+ val &= ~Q6SS_BHS_ON;
+ writel(val, wcss->reg_base + Q6SS_PWR_CTL_REG);
+
+ clk_disable_unprepare(wcss->ahbfabric_cbcr_clk);
+ clk_disable_unprepare(wcss->lcc_csr_cbcr);
+ clk_disable_unprepare(wcss->tcm_slave_cbcr);
+ clk_disable_unprepare(wcss->qdsp6ss_abhm_cbcr);
+ clk_disable_unprepare(wcss->qdsp6ss_axim_cbcr);
+
+ val = readl(wcss->reg_base + Q6SS_SLEEP_CBCR);
+ val &= ~BIT(0);
+ writel(val, wcss->reg_base + Q6SS_SLEEP_CBCR);
+
+ val = readl(wcss->reg_base + Q6SS_XO_CBCR);
+ val &= ~BIT(0);
+ writel(val, wcss->reg_base + Q6SS_XO_CBCR);
+
+ clk_disable_unprepare(wcss->ahbs_cbcr);
+ clk_disable_unprepare(wcss->lcc_bcr_sleep);
+
+ val = readl(wcss->reg_base + Q6SS_GFMUX_CTL_REG);
+ val &= ~(Q6SS_CLK_ENABLE | Q6SS_SWITCH_CLK_SRC);
+ writel(val, wcss->reg_base + Q6SS_GFMUX_CTL_REG);
+
+ clk_disable_unprepare(wcss->gcc_abhs_cbcr);
+
+ ret = reset_control_assert(wcss->wcss_reset);
+ if (ret) {
+ dev_err(wcss->dev, "wcss_reset failed\n");
+ return ret;
+ }
+ usleep_range(200, 300);
+
+ ret = reset_control_deassert(wcss->wcss_reset);
+ if (ret) {
+ dev_err(wcss->dev, "wcss_reset failed\n");
+ return ret;
+ }
+ usleep_range(200, 300);
+
+ clk_disable_unprepare(wcss->gcc_axim_cbcr);
+
+ return 0;
+}
+
static int q6v5_wcss_powerdown(struct q6v5_wcss *wcss)
{
int ret;
int ret;
/* WCSS powerdown */
- ret = qcom_q6v5_request_stop(&wcss->q6v5, NULL);
- if (ret == -ETIMEDOUT) {
- dev_err(wcss->dev, "timed out on wait\n");
- return ret;
+ if (wcss->requires_force_stop) {
+ ret = qcom_q6v5_request_stop(&wcss->q6v5, NULL);
+ if (ret == -ETIMEDOUT) {
+ dev_err(wcss->dev, "timed out on wait\n");
+ return ret;
+ }
}
- ret = q6v5_wcss_powerdown(wcss);
- if (ret)
- return ret;
-
- /* Q6 Power down */
- ret = q6v5_q6_powerdown(wcss);
- if (ret)
- return ret;
+ if (wcss->version == WCSS_QCS404) {
+ ret = q6v5_qcs404_wcss_shutdown(wcss);
+ if (ret)
+ return ret;
+ } else {
+ ret = q6v5_wcss_powerdown(wcss);
+ if (ret)
+ return ret;
+
+ /* Q6 Power down */
+ ret = q6v5_q6_powerdown(wcss);
+ if (ret)
+ return ret;
+ }
qcom_q6v5_unprepare(&wcss->q6v5);
return 0;
}
-static void *q6v5_wcss_da_to_va(struct rproc *rproc, u64 da, size_t len)
+static void *q6v5_wcss_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iomem)
{
struct q6v5_wcss *wcss = rproc->priv;
int offset;
return ret;
}
-static const struct rproc_ops q6v5_wcss_ops = {
+static const struct rproc_ops q6v5_wcss_ipq8074_ops = {
.start = q6v5_wcss_start,
.stop = q6v5_wcss_stop,
.da_to_va = q6v5_wcss_da_to_va,
.get_boot_addr = rproc_elf_get_boot_addr,
};
-static int q6v5_wcss_init_reset(struct q6v5_wcss *wcss)
+static const struct rproc_ops q6v5_wcss_qcs404_ops = {
+ .start = q6v5_qcs404_wcss_start,
+ .stop = q6v5_wcss_stop,
+ .da_to_va = q6v5_wcss_da_to_va,
+ .load = q6v5_wcss_load,
+ .get_boot_addr = rproc_elf_get_boot_addr,
+ .parse_fw = qcom_register_dump_segments,
+};
+
+static int q6v5_wcss_init_reset(struct q6v5_wcss *wcss,
+ const struct wcss_data *desc)
{
struct device *dev = wcss->dev;
- wcss->wcss_aon_reset = devm_reset_control_get(dev, "wcss_aon_reset");
- if (IS_ERR(wcss->wcss_aon_reset)) {
- dev_err(wcss->dev, "unable to acquire wcss_aon_reset\n");
- return PTR_ERR(wcss->wcss_aon_reset);
+ if (desc->aon_reset_required) {
+ wcss->wcss_aon_reset = devm_reset_control_get_exclusive(dev, "wcss_aon_reset");
+ if (IS_ERR(wcss->wcss_aon_reset)) {
+ dev_err(wcss->dev, "fail to acquire wcss_aon_reset\n");
+ return PTR_ERR(wcss->wcss_aon_reset);
+ }
}
- wcss->wcss_reset = devm_reset_control_get(dev, "wcss_reset");
+ wcss->wcss_reset = devm_reset_control_get_exclusive(dev, "wcss_reset");
if (IS_ERR(wcss->wcss_reset)) {
dev_err(wcss->dev, "unable to acquire wcss_reset\n");
return PTR_ERR(wcss->wcss_reset);
}
- wcss->wcss_q6_reset = devm_reset_control_get(dev, "wcss_q6_reset");
- if (IS_ERR(wcss->wcss_q6_reset)) {
- dev_err(wcss->dev, "unable to acquire wcss_q6_reset\n");
- return PTR_ERR(wcss->wcss_q6_reset);
+ if (desc->wcss_q6_reset_required) {
+ wcss->wcss_q6_reset = devm_reset_control_get_exclusive(dev, "wcss_q6_reset");
+ if (IS_ERR(wcss->wcss_q6_reset)) {
+ dev_err(wcss->dev, "unable to acquire wcss_q6_reset\n");
+ return PTR_ERR(wcss->wcss_q6_reset);
+ }
+ }
+
+ wcss->wcss_q6_bcr_reset = devm_reset_control_get_exclusive(dev, "wcss_q6_bcr_reset");
+ if (IS_ERR(wcss->wcss_q6_bcr_reset)) {
+ dev_err(wcss->dev, "unable to acquire wcss_q6_bcr_reset\n");
+ return PTR_ERR(wcss->wcss_q6_bcr_reset);
}
return 0;
static int q6v5_wcss_init_mmio(struct q6v5_wcss *wcss,
struct platform_device *pdev)
{
- struct of_phandle_args args;
+ unsigned int halt_reg[MAX_HALT_REG] = {0};
+ struct device_node *syscon;
struct resource *res;
int ret;
res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "qdsp6");
- wcss->reg_base = devm_ioremap_resource(&pdev->dev, res);
- if (IS_ERR(wcss->reg_base))
- return PTR_ERR(wcss->reg_base);
+ wcss->reg_base = devm_ioremap(&pdev->dev, res->start,
+ resource_size(res));
+ if (!wcss->reg_base)
+ return -ENOMEM;
- res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "rmb");
- wcss->rmb_base = devm_ioremap_resource(&pdev->dev, res);
- if (IS_ERR(wcss->rmb_base))
- return PTR_ERR(wcss->rmb_base);
+ if (wcss->version == WCSS_IPQ8074) {
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "rmb");
+ wcss->rmb_base = devm_ioremap_resource(&pdev->dev, res);
+ if (IS_ERR(wcss->rmb_base))
+ return PTR_ERR(wcss->rmb_base);
+ }
- ret = of_parse_phandle_with_fixed_args(pdev->dev.of_node,
- "qcom,halt-regs", 3, 0, &args);
- if (ret < 0) {
+ syscon = of_parse_phandle(pdev->dev.of_node,
+ "qcom,halt-regs", 0);
+ if (!syscon) {
dev_err(&pdev->dev, "failed to parse qcom,halt-regs\n");
return -EINVAL;
}
- wcss->halt_map = syscon_node_to_regmap(args.np);
- of_node_put(args.np);
+ wcss->halt_map = syscon_node_to_regmap(syscon);
+ of_node_put(syscon);
if (IS_ERR(wcss->halt_map))
return PTR_ERR(wcss->halt_map);
- wcss->halt_q6 = args.args[0];
- wcss->halt_wcss = args.args[1];
- wcss->halt_nc = args.args[2];
+ ret = of_property_read_variable_u32_array(pdev->dev.of_node,
+ "qcom,halt-regs",
+ halt_reg, 0,
+ MAX_HALT_REG);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "failed to parse qcom,halt-regs\n");
+ return -EINVAL;
+ }
+
+ wcss->halt_q6 = halt_reg[0];
+ wcss->halt_wcss = halt_reg[1];
+ wcss->halt_nc = halt_reg[2];
return 0;
}
return 0;
}
+static int q6v5_wcss_init_clock(struct q6v5_wcss *wcss)
+{
+ int ret;
+
+ wcss->xo = devm_clk_get(wcss->dev, "xo");
+ if (IS_ERR(wcss->xo)) {
+ ret = PTR_ERR(wcss->xo);
+ if (ret != -EPROBE_DEFER)
+ dev_err(wcss->dev, "failed to get xo clock");
+ return ret;
+ }
+
+ wcss->gcc_abhs_cbcr = devm_clk_get(wcss->dev, "gcc_abhs_cbcr");
+ if (IS_ERR(wcss->gcc_abhs_cbcr)) {
+ ret = PTR_ERR(wcss->gcc_abhs_cbcr);
+ if (ret != -EPROBE_DEFER)
+ dev_err(wcss->dev, "failed to get gcc abhs clock");
+ return ret;
+ }
+
+ wcss->gcc_axim_cbcr = devm_clk_get(wcss->dev, "gcc_axim_cbcr");
+ if (IS_ERR(wcss->gcc_axim_cbcr)) {
+ ret = PTR_ERR(wcss->gcc_axim_cbcr);
+ if (ret != -EPROBE_DEFER)
+ dev_err(wcss->dev, "failed to get gcc axim clock\n");
+ return ret;
+ }
+
+ wcss->ahbfabric_cbcr_clk = devm_clk_get(wcss->dev,
+ "lcc_ahbfabric_cbc");
+ if (IS_ERR(wcss->ahbfabric_cbcr_clk)) {
+ ret = PTR_ERR(wcss->ahbfabric_cbcr_clk);
+ if (ret != -EPROBE_DEFER)
+ dev_err(wcss->dev, "failed to get ahbfabric clock\n");
+ return ret;
+ }
+
+ wcss->lcc_csr_cbcr = devm_clk_get(wcss->dev, "tcsr_lcc_cbc");
+ if (IS_ERR(wcss->lcc_csr_cbcr)) {
+ ret = PTR_ERR(wcss->lcc_csr_cbcr);
+ if (ret != -EPROBE_DEFER)
+ dev_err(wcss->dev, "failed to get csr cbcr clk\n");
+ return ret;
+ }
+
+ wcss->ahbs_cbcr = devm_clk_get(wcss->dev,
+ "lcc_abhs_cbc");
+ if (IS_ERR(wcss->ahbs_cbcr)) {
+ ret = PTR_ERR(wcss->ahbs_cbcr);
+ if (ret != -EPROBE_DEFER)
+ dev_err(wcss->dev, "failed to get ahbs_cbcr clk\n");
+ return ret;
+ }
+
+ wcss->tcm_slave_cbcr = devm_clk_get(wcss->dev,
+ "lcc_tcm_slave_cbc");
+ if (IS_ERR(wcss->tcm_slave_cbcr)) {
+ ret = PTR_ERR(wcss->tcm_slave_cbcr);
+ if (ret != -EPROBE_DEFER)
+ dev_err(wcss->dev, "failed to get tcm cbcr clk\n");
+ return ret;
+ }
+
+ wcss->qdsp6ss_abhm_cbcr = devm_clk_get(wcss->dev, "lcc_abhm_cbc");
+ if (IS_ERR(wcss->qdsp6ss_abhm_cbcr)) {
+ ret = PTR_ERR(wcss->qdsp6ss_abhm_cbcr);
+ if (ret != -EPROBE_DEFER)
+ dev_err(wcss->dev, "failed to get abhm cbcr clk\n");
+ return ret;
+ }
+
+ wcss->qdsp6ss_axim_cbcr = devm_clk_get(wcss->dev, "lcc_axim_cbc");
+ if (IS_ERR(wcss->qdsp6ss_axim_cbcr)) {
+ ret = PTR_ERR(wcss->qdsp6ss_axim_cbcr);
+ if (ret != -EPROBE_DEFER)
+ dev_err(wcss->dev, "failed to get axim cbcr clk\n");
+ return ret;
+ }
+
+ wcss->lcc_bcr_sleep = devm_clk_get(wcss->dev, "lcc_bcr_sleep");
+ if (IS_ERR(wcss->lcc_bcr_sleep)) {
+ ret = PTR_ERR(wcss->lcc_bcr_sleep);
+ if (ret != -EPROBE_DEFER)
+ dev_err(wcss->dev, "failed to get bcr cbcr clk\n");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int q6v5_wcss_init_regulator(struct q6v5_wcss *wcss)
+{
+ wcss->cx_supply = devm_regulator_get(wcss->dev, "cx");
+ if (IS_ERR(wcss->cx_supply))
+ return PTR_ERR(wcss->cx_supply);
+
+ regulator_set_load(wcss->cx_supply, 100000);
+
+ return 0;
+}
+
static int q6v5_wcss_probe(struct platform_device *pdev)
{
+ const struct wcss_data *desc;
struct q6v5_wcss *wcss;
struct rproc *rproc;
int ret;
- rproc = rproc_alloc(&pdev->dev, pdev->name, &q6v5_wcss_ops,
- "IPQ8074/q6_fw.mdt", sizeof(*wcss));
+ desc = device_get_match_data(&pdev->dev);
+ if (!desc)
+ return -EINVAL;
+
+ rproc = rproc_alloc(&pdev->dev, pdev->name, desc->ops,
+ desc->firmware_name, sizeof(*wcss));
if (!rproc) {
dev_err(&pdev->dev, "failed to allocate rproc\n");
return -ENOMEM;
wcss = rproc->priv;
wcss->dev = &pdev->dev;
+ wcss->version = desc->version;
+
+ wcss->version = desc->version;
+ wcss->requires_force_stop = desc->requires_force_stop;
ret = q6v5_wcss_init_mmio(wcss, pdev);
if (ret)
if (ret)
goto free_rproc;
- ret = q6v5_wcss_init_reset(wcss);
+ if (wcss->version == WCSS_QCS404) {
+ ret = q6v5_wcss_init_clock(wcss);
+ if (ret)
+ goto free_rproc;
+
+ ret = q6v5_wcss_init_regulator(wcss);
+ if (ret)
+ goto free_rproc;
+ }
+
+ ret = q6v5_wcss_init_reset(wcss, desc);
if (ret)
goto free_rproc;
- ret = qcom_q6v5_init(&wcss->q6v5, pdev, rproc, WCSS_CRASH_REASON, NULL);
+ ret = qcom_q6v5_init(&wcss->q6v5, pdev, rproc, desc->crash_reason_smem,
+ NULL);
if (ret)
goto free_rproc;
qcom_add_glink_subdev(rproc, &wcss->glink_subdev, "q6wcss");
qcom_add_ssr_subdev(rproc, &wcss->ssr_subdev, "q6wcss");
+ if (desc->ssctl_id)
+ wcss->sysmon = qcom_add_sysmon_subdev(rproc,
+ desc->sysmon_name,
+ desc->ssctl_id);
+
ret = rproc_add(rproc);
if (ret)
goto free_rproc;
return 0;
}
+static const struct wcss_data wcss_ipq8074_res_init = {
+ .firmware_name = "IPQ8074/q6_fw.mdt",
+ .crash_reason_smem = WCSS_CRASH_REASON,
+ .aon_reset_required = true,
+ .wcss_q6_reset_required = true,
+ .ops = &q6v5_wcss_ipq8074_ops,
+ .requires_force_stop = true,
+};
+
+static const struct wcss_data wcss_qcs404_res_init = {
+ .crash_reason_smem = WCSS_CRASH_REASON,
+ .firmware_name = "wcnss.mdt",
+ .version = WCSS_QCS404,
+ .aon_reset_required = false,
+ .wcss_q6_reset_required = false,
+ .ssr_name = "mpss",
+ .sysmon_name = "wcnss",
+ .ssctl_id = 0x12,
+ .ops = &q6v5_wcss_qcs404_ops,
+ .requires_force_stop = false,
+};
+
static const struct of_device_id q6v5_wcss_of_match[] = {
- { .compatible = "qcom,ipq8074-wcss-pil" },
+ { .compatible = "qcom,ipq8074-wcss-pil", .data = &wcss_ipq8074_res_init },
+ { .compatible = "qcom,qcs404-wcss-pil", .data = &wcss_qcs404_res_init },
{ },
};
MODULE_DEVICE_TABLE(of, q6v5_wcss_of_match);
return ret;
}
-static void *wcnss_da_to_va(struct rproc *rproc, u64 da, size_t len)
+static void *wcnss_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iomem)
{
struct qcom_wcnss *wcnss = (struct qcom_wcnss *)rproc->priv;
int offset;
static int wcnss_probe(struct platform_device *pdev)
{
+ const char *fw_name = WCNSS_FIRMWARE_NAME;
const struct wcnss_data *data;
struct qcom_wcnss *wcnss;
struct resource *res;
return -ENXIO;
}
+ ret = of_property_read_string(pdev->dev.of_node, "firmware-name",
+ &fw_name);
+ if (ret < 0 && ret != -EINVAL)
+ return ret;
+
rproc = rproc_alloc(&pdev->dev, pdev->name, &wcnss_ops,
- WCNSS_FIRMWARE_NAME, sizeof(*wcnss));
+ fw_name, sizeof(*wcnss));
if (!rproc) {
dev_err(&pdev->dev, "unable to allocate remoteproc\n");
return -ENOMEM;
return -EFAULT;
if (!strncmp(cmd, "start", len)) {
- if (rproc->state == RPROC_RUNNING)
+ if (rproc->state == RPROC_RUNNING ||
+ rproc->state == RPROC_ATTACHED)
return -EBUSY;
ret = rproc_boot(rproc);
} else if (!strncmp(cmd, "stop", len)) {
- if (rproc->state != RPROC_RUNNING)
+ if (rproc->state != RPROC_RUNNING &&
+ rproc->state != RPROC_ATTACHED)
return -EINVAL;
rproc_shutdown(rproc);
+ } else if (!strncmp(cmd, "detach", len)) {
+ if (rproc->state != RPROC_ATTACHED)
+ return -EINVAL;
+
+ ret = rproc_detach(rproc);
} else {
dev_err(&rproc->dev, "Unrecognized option\n");
ret = -EINVAL;
static int rproc_cdev_release(struct inode *inode, struct file *filp)
{
struct rproc *rproc = container_of(inode->i_cdev, struct rproc, cdev);
+ int ret = 0;
- if (rproc->cdev_put_on_release && rproc->state == RPROC_RUNNING)
+ if (!rproc->cdev_put_on_release)
+ return 0;
+
+ if (rproc->state == RPROC_RUNNING)
rproc_shutdown(rproc);
+ else if (rproc->state == RPROC_ATTACHED)
+ ret = rproc_detach(rproc);
- return 0;
+ return ret;
}
static const struct file_operations rproc_fops = {
* here the output of the DMA API for the carveouts, which should be more
* correct.
*/
-void *rproc_da_to_va(struct rproc *rproc, u64 da, size_t len)
+void *rproc_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iomem)
{
struct rproc_mem_entry *carveout;
void *ptr = NULL;
if (rproc->ops->da_to_va) {
- ptr = rproc->ops->da_to_va(rproc, da, len);
+ ptr = rproc->ops->da_to_va(rproc, da, len, is_iomem);
if (ptr)
goto out;
}
ptr = carveout->va + offset;
+ if (is_iomem)
+ *is_iomem = carveout->is_iomem;
+
break;
}
/**
* rproc_handle_vdev() - handle a vdev fw resource
* @rproc: the remote processor
- * @rsc: the vring resource descriptor
+ * @ptr: the vring resource descriptor
* @offset: offset of the resource entry
* @avail: size of available data (for sanity checking the image)
*
*
* Returns 0 on success, or an appropriate error code otherwise
*/
-static int rproc_handle_vdev(struct rproc *rproc, struct fw_rsc_vdev *rsc,
+static int rproc_handle_vdev(struct rproc *rproc, void *ptr,
int offset, int avail)
{
+ struct fw_rsc_vdev *rsc = ptr;
struct device *dev = &rproc->dev;
struct rproc_vdev *rvdev;
int i, ret;
/**
* rproc_handle_trace() - handle a shared trace buffer resource
* @rproc: the remote processor
- * @rsc: the trace resource descriptor
+ * @ptr: the trace resource descriptor
* @offset: offset of the resource entry
* @avail: size of available data (for sanity checking the image)
*
*
* Returns 0 on success, or an appropriate error code otherwise
*/
-static int rproc_handle_trace(struct rproc *rproc, struct fw_rsc_trace *rsc,
+static int rproc_handle_trace(struct rproc *rproc, void *ptr,
int offset, int avail)
{
+ struct fw_rsc_trace *rsc = ptr;
struct rproc_debug_trace *trace;
struct device *dev = &rproc->dev;
char name[15];
/**
* rproc_handle_devmem() - handle devmem resource entry
* @rproc: remote processor handle
- * @rsc: the devmem resource entry
+ * @ptr: the devmem resource entry
* @offset: offset of the resource entry
* @avail: size of available data (for sanity checking the image)
*
* and not allow firmwares to request access to physical addresses that
* are outside those ranges.
*/
-static int rproc_handle_devmem(struct rproc *rproc, struct fw_rsc_devmem *rsc,
+static int rproc_handle_devmem(struct rproc *rproc, void *ptr,
int offset, int avail)
{
+ struct fw_rsc_devmem *rsc = ptr;
struct rproc_mem_entry *mapping;
struct device *dev = &rproc->dev;
int ret;
/**
* rproc_handle_carveout() - handle phys contig memory allocation requests
* @rproc: rproc handle
- * @rsc: the resource entry
+ * @ptr: the resource entry
* @offset: offset of the resource entry
* @avail: size of available data (for image validation)
*
* pressure is important; it may have a substantial impact on performance.
*/
static int rproc_handle_carveout(struct rproc *rproc,
- struct fw_rsc_carveout *rsc,
- int offset, int avail)
+ void *ptr, int offset, int avail)
{
+ struct fw_rsc_carveout *rsc = ptr;
struct rproc_mem_entry *carveout;
struct device *dev = &rproc->dev;
* enum fw_resource_type.
*/
static rproc_handle_resource_t rproc_loading_handlers[RSC_LAST] = {
- [RSC_CARVEOUT] = (rproc_handle_resource_t)rproc_handle_carveout,
- [RSC_DEVMEM] = (rproc_handle_resource_t)rproc_handle_devmem,
- [RSC_TRACE] = (rproc_handle_resource_t)rproc_handle_trace,
- [RSC_VDEV] = (rproc_handle_resource_t)rproc_handle_vdev,
+ [RSC_CARVEOUT] = rproc_handle_carveout,
+ [RSC_DEVMEM] = rproc_handle_devmem,
+ [RSC_TRACE] = rproc_handle_trace,
+ [RSC_VDEV] = rproc_handle_vdev,
};
/* handle firmware resource entries before booting the remote processor */
return ret;
}
-static int rproc_attach(struct rproc *rproc)
+static int __rproc_attach(struct rproc *rproc)
{
struct device *dev = &rproc->dev;
int ret;
goto stop_rproc;
}
- rproc->state = RPROC_RUNNING;
+ rproc->state = RPROC_ATTACHED;
dev_info(dev, "remote processor %s is now attached\n", rproc->name);
return ret;
}
+static int rproc_set_rsc_table(struct rproc *rproc)
+{
+ struct resource_table *table_ptr;
+ struct device *dev = &rproc->dev;
+ size_t table_sz;
+ int ret;
+
+ table_ptr = rproc_get_loaded_rsc_table(rproc, &table_sz);
+ if (!table_ptr) {
+ /* Not having a resource table is acceptable */
+ return 0;
+ }
+
+ if (IS_ERR(table_ptr)) {
+ ret = PTR_ERR(table_ptr);
+ dev_err(dev, "can't load resource table: %d\n", ret);
+ return ret;
+ }
+
+ /*
+ * If it is possible to detach the remote processor, keep an untouched
+ * copy of the resource table. That way we can start fresh again when
+ * the remote processor is re-attached, that is:
+ *
+ * DETACHED -> ATTACHED -> DETACHED -> ATTACHED
+ *
+ * Free'd in rproc_reset_rsc_table_on_detach() and
+ * rproc_reset_rsc_table_on_stop().
+ */
+ if (rproc->ops->detach) {
+ rproc->clean_table = kmemdup(table_ptr, table_sz, GFP_KERNEL);
+ if (!rproc->clean_table)
+ return -ENOMEM;
+ } else {
+ rproc->clean_table = NULL;
+ }
+
+ rproc->cached_table = NULL;
+ rproc->table_ptr = table_ptr;
+ rproc->table_sz = table_sz;
+
+ return 0;
+}
+
+static int rproc_reset_rsc_table_on_detach(struct rproc *rproc)
+{
+ struct resource_table *table_ptr;
+
+ /* A resource table was never retrieved, nothing to do here */
+ if (!rproc->table_ptr)
+ return 0;
+
+ /*
+ * If we made it to this point a clean_table _must_ have been
+ * allocated in rproc_set_rsc_table(). If one isn't present
+ * something went really wrong and we must complain.
+ */
+ if (WARN_ON(!rproc->clean_table))
+ return -EINVAL;
+
+ /* Remember where the external entity installed the resource table */
+ table_ptr = rproc->table_ptr;
+
+ /*
+ * If we made it here the remote processor was started by another
+ * entity and a cache table doesn't exist. As such make a copy of
+ * the resource table currently used by the remote processor and
+ * use that for the rest of the shutdown process. The memory
+ * allocated here is free'd in rproc_detach().
+ */
+ rproc->cached_table = kmemdup(rproc->table_ptr,
+ rproc->table_sz, GFP_KERNEL);
+ if (!rproc->cached_table)
+ return -ENOMEM;
+
+ /*
+ * Use a copy of the resource table for the remainder of the
+ * shutdown process.
+ */
+ rproc->table_ptr = rproc->cached_table;
+
+ /*
+ * Reset the memory area where the firmware loaded the resource table
+ * to its original value. That way when we re-attach the remote
+ * processor the resource table is clean and ready to be used again.
+ */
+ memcpy(table_ptr, rproc->clean_table, rproc->table_sz);
+
+ /*
+ * The clean resource table is no longer needed. Allocated in
+ * rproc_set_rsc_table().
+ */
+ kfree(rproc->clean_table);
+
+ return 0;
+}
+
+static int rproc_reset_rsc_table_on_stop(struct rproc *rproc)
+{
+ /* A resource table was never retrieved, nothing to do here */
+ if (!rproc->table_ptr)
+ return 0;
+
+ /*
+ * If a cache table exists the remote processor was started by
+ * the remoteproc core. That cache table should be used for
+ * the rest of the shutdown process.
+ */
+ if (rproc->cached_table)
+ goto out;
+
+ /*
+ * If we made it here the remote processor was started by another
+ * entity and a cache table doesn't exist. As such make a copy of
+ * the resource table currently used by the remote processor and
+ * use that for the rest of the shutdown process. The memory
+ * allocated here is free'd in rproc_shutdown().
+ */
+ rproc->cached_table = kmemdup(rproc->table_ptr,
+ rproc->table_sz, GFP_KERNEL);
+ if (!rproc->cached_table)
+ return -ENOMEM;
+
+ /*
+ * Since the remote processor is being switched off the clean table
+ * won't be needed. Allocated in rproc_set_rsc_table().
+ */
+ kfree(rproc->clean_table);
+
+out:
+ /*
+ * Use a copy of the resource table for the remainder of the
+ * shutdown process.
+ */
+ rproc->table_ptr = rproc->cached_table;
+ return 0;
+}
+
/*
* Attach to remote processor - similar to rproc_fw_boot() but without
* the steps that deal with the firmware image.
*/
-static int rproc_actuate(struct rproc *rproc)
+static int rproc_attach(struct rproc *rproc)
{
struct device *dev = &rproc->dev;
int ret;
return ret;
}
+ /* Do anything that is needed to boot the remote processor */
+ ret = rproc_prepare_device(rproc);
+ if (ret) {
+ dev_err(dev, "can't prepare rproc %s: %d\n", rproc->name, ret);
+ goto disable_iommu;
+ }
+
+ ret = rproc_set_rsc_table(rproc);
+ if (ret) {
+ dev_err(dev, "can't load resource table: %d\n", ret);
+ goto unprepare_device;
+ }
+
/* reset max_notifyid */
rproc->max_notifyid = -1;
ret = rproc_handle_resources(rproc, rproc_loading_handlers);
if (ret) {
dev_err(dev, "Failed to process resources: %d\n", ret);
- goto disable_iommu;
+ goto unprepare_device;
}
/* Allocate carveout resources associated to rproc */
goto clean_up_resources;
}
- ret = rproc_attach(rproc);
+ ret = __rproc_attach(rproc);
if (ret)
goto clean_up_resources;
clean_up_resources:
rproc_resource_cleanup(rproc);
+unprepare_device:
+ /* release HW resources if needed */
+ rproc_unprepare_device(rproc);
disable_iommu:
rproc_disable_iommu(rproc);
return ret;
struct device *dev = &rproc->dev;
int ret;
+ /* No need to continue if a stop() operation has not been provided */
+ if (!rproc->ops->stop)
+ return -EINVAL;
+
/* Stop any subdevices for the remote processor */
rproc_stop_subdevices(rproc, crashed);
/* the installed resource table is no longer accessible */
- rproc->table_ptr = rproc->cached_table;
+ ret = rproc_reset_rsc_table_on_stop(rproc);
+ if (ret) {
+ dev_err(dev, "can't reset resource table: %d\n", ret);
+ return ret;
+ }
+
/* power off the remote processor */
ret = rproc->ops->stop(rproc);
rproc->state = RPROC_OFFLINE;
- /*
- * The remote processor has been stopped and is now offline, which means
- * that the next time it is brought back online the remoteproc core will
- * be responsible to load its firmware. As such it is no longer
- * autonomous.
- */
- rproc->autonomous = false;
-
dev_info(dev, "stopped remote processor %s\n", rproc->name);
return 0;
}
+/*
+ * __rproc_detach(): Does the opposite of __rproc_attach()
+ */
+static int __rproc_detach(struct rproc *rproc)
+{
+ struct device *dev = &rproc->dev;
+ int ret;
+
+ /* No need to continue if a detach() operation has not been provided */
+ if (!rproc->ops->detach)
+ return -EINVAL;
+
+ /* Stop any subdevices for the remote processor */
+ rproc_stop_subdevices(rproc, false);
+
+ /* the installed resource table is no longer accessible */
+ ret = rproc_reset_rsc_table_on_detach(rproc);
+ if (ret) {
+ dev_err(dev, "can't reset resource table: %d\n", ret);
+ return ret;
+ }
+
+ /* Tell the remote processor the core isn't available anymore */
+ ret = rproc->ops->detach(rproc);
+ if (ret) {
+ dev_err(dev, "can't detach from rproc: %d\n", ret);
+ return ret;
+ }
+
+ rproc_unprepare_subdevices(rproc);
+
+ rproc->state = RPROC_DETACHED;
+
+ dev_info(dev, "detached remote processor %s\n", rproc->name);
+
+ return 0;
+}
/**
* rproc_trigger_recovery() - recover a remoteproc
if (rproc->state == RPROC_DETACHED) {
dev_info(dev, "attaching to %s\n", rproc->name);
- ret = rproc_actuate(rproc);
+ ret = rproc_attach(rproc);
} else {
dev_info(dev, "powering up %s\n", rproc->name);
}
EXPORT_SYMBOL(rproc_shutdown);
+/**
+ * rproc_detach() - Detach the remote processor from the
+ * remoteproc core
+ *
+ * @rproc: the remote processor
+ *
+ * Detach a remote processor (previously attached to with rproc_attach()).
+ *
+ * In case @rproc is still being used by an additional user(s), then
+ * this function will just decrement the power refcount and exit,
+ * without disconnecting the device.
+ *
+ * Function rproc_detach() calls __rproc_detach() in order to let a remote
+ * processor know that services provided by the application processor are
+ * no longer available. From there it should be possible to remove the
+ * platform driver and even power cycle the application processor (if the HW
+ * supports it) without needing to switch off the remote processor.
+ */
+int rproc_detach(struct rproc *rproc)
+{
+ struct device *dev = &rproc->dev;
+ int ret;
+
+ ret = mutex_lock_interruptible(&rproc->lock);
+ if (ret) {
+ dev_err(dev, "can't lock rproc %s: %d\n", rproc->name, ret);
+ return ret;
+ }
+
+ /* if the remote proc is still needed, bail out */
+ if (!atomic_dec_and_test(&rproc->power)) {
+ ret = 0;
+ goto out;
+ }
+
+ ret = __rproc_detach(rproc);
+ if (ret) {
+ atomic_inc(&rproc->power);
+ goto out;
+ }
+
+ /* clean up all acquired resources */
+ rproc_resource_cleanup(rproc);
+
+ /* release HW resources if needed */
+ rproc_unprepare_device(rproc);
+
+ rproc_disable_iommu(rproc);
+
+ /* Free the copy of the resource table */
+ kfree(rproc->cached_table);
+ rproc->cached_table = NULL;
+ rproc->table_ptr = NULL;
+out:
+ mutex_unlock(&rproc->lock);
+ return ret;
+}
+EXPORT_SYMBOL(rproc_detach);
+
/**
* rproc_get_by_phandle() - find a remote processor by phandle
* @phandle: phandle to the rproc
if (ret < 0)
return ret;
- /*
- * Remind ourselves the remote processor has been attached to rather
- * than booted by the remoteproc core. This is important because the
- * RPROC_DETACHED state will be lost as soon as the remote processor
- * has been attached to. Used in firmware_show() and reset in
- * rproc_stop().
- */
- if (rproc->state == RPROC_DETACHED)
- rproc->autonomous = true;
-
/* if rproc is marked always-on, request it to boot */
if (rproc->auto_boot) {
ret = rproc_trigger_auto_boot(rproc);
if (!rproc)
return -EINVAL;
- /* if rproc is marked always-on, rproc_add() booted it */
/* TODO: make sure this works with rproc->power > 1 */
- if (rproc->auto_boot)
- rproc_shutdown(rproc);
+ rproc_shutdown(rproc);
mutex_lock(&rproc->lock);
rproc->state = RPROC_DELETED;
rcu_read_lock();
list_for_each_entry_rcu(rproc, &rproc_list, node) {
- if (!rproc->ops->panic || rproc->state != RPROC_RUNNING)
+ if (!rproc->ops->panic)
+ continue;
+
+ if (rproc->state != RPROC_RUNNING &&
+ rproc->state != RPROC_ATTACHED)
continue;
d = rproc->ops->panic(rproc);
size_t offset, size_t size)
{
void *ptr;
+ bool is_iomem;
if (segment->dump) {
segment->dump(rproc, segment, dest, offset, size);
} else {
- ptr = rproc_da_to_va(rproc, segment->da + offset, size);
+ ptr = rproc_da_to_va(rproc, segment->da + offset, size, &is_iomem);
if (!ptr) {
dev_err(&rproc->dev,
"invalid copy request for segment %pad with offset %zu and size %zu)\n",
&segment->da, offset, size);
memset(dest, 0xff, size);
} else {
- memcpy(dest, ptr, size);
+ if (is_iomem)
+ memcpy_fromio(dest, ptr, size);
+ else
+ memcpy(dest, ptr, size);
}
}
}
char buf[100];
int len;
- va = rproc_da_to_va(data->rproc, trace->da, trace->len);
+ va = rproc_da_to_va(data->rproc, trace->da, trace->len, NULL);
if (!va) {
len = scnprintf(buf, sizeof(buf), "Trace %s not available\n",
u64 offset = elf_phdr_get_p_offset(class, phdr);
u32 type = elf_phdr_get_p_type(class, phdr);
void *ptr;
+ bool is_iomem;
if (type != PT_LOAD)
continue;
}
/* grab the kernel address for this device address */
- ptr = rproc_da_to_va(rproc, da, memsz);
+ ptr = rproc_da_to_va(rproc, da, memsz, &is_iomem);
if (!ptr) {
dev_err(dev, "bad phdr da 0x%llx mem 0x%llx\n", da,
memsz);
}
/* put the segment where the remote processor expects it */
- if (filesz)
- memcpy(ptr, elf_data + offset, filesz);
+ if (filesz) {
+ if (is_iomem)
+ memcpy_fromio(ptr, (void __iomem *)(elf_data + offset), filesz);
+ else
+ memcpy(ptr, elf_data + offset, filesz);
+ }
/*
* Zero out remaining memory for this segment.
* did this for us. albeit harmless, we may consider removing
* this.
*/
- if (memsz > filesz)
- memset(ptr + filesz, 0, memsz - filesz);
+ if (memsz > filesz) {
+ if (is_iomem)
+ memset_io((void __iomem *)(ptr + filesz), 0, memsz - filesz);
+ else
+ memset(ptr + filesz, 0, memsz - filesz);
+ }
}
return ret;
return NULL;
}
- return rproc_da_to_va(rproc, sh_addr, sh_size);
+ return rproc_da_to_va(rproc, sh_addr, sh_size, NULL);
}
EXPORT_SYMBOL(rproc_elf_find_loaded_rsc_table);
void rproc_free_vring(struct rproc_vring *rvring);
int rproc_alloc_vring(struct rproc_vdev *rvdev, int i);
-void *rproc_da_to_va(struct rproc *rproc, u64 da, size_t len);
+void *rproc_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iomem);
phys_addr_t rproc_va_to_pa(void *cpu_addr);
int rproc_trigger_recovery(struct rproc *rproc);
return NULL;
}
+static inline
+struct resource_table *rproc_get_loaded_rsc_table(struct rproc *rproc,
+ size_t *size)
+{
+ if (rproc->ops->get_loaded_rsc_table)
+ return rproc->ops->get_loaded_rsc_table(rproc, size);
+
+ return NULL;
+}
+
static inline
bool rproc_u64_fit_in_size_t(u64 val)
{
{
struct rproc *rproc = to_rproc(dev);
- return sprintf(buf, "%s", rproc->recovery_disabled ? "disabled\n" : "enabled\n");
+ return sysfs_emit(buf, "%s", rproc->recovery_disabled ? "disabled\n" : "enabled\n");
}
/*
{
struct rproc *rproc = to_rproc(dev);
- return sprintf(buf, "%s\n", rproc_coredump_str[rproc->dump_conf]);
+ return sysfs_emit(buf, "%s\n", rproc_coredump_str[rproc->dump_conf]);
}
/*
* If the remote processor has been started by an external
* entity we have no idea of what image it is running. As such
* simply display a generic string rather then rproc->firmware.
- *
- * Here we rely on the autonomous flag because a remote processor
- * may have been attached to and currently in a running state.
*/
- if (rproc->autonomous)
+ if (rproc->state == RPROC_ATTACHED)
firmware = "unknown";
return sprintf(buf, "%s\n", firmware);
[RPROC_RUNNING] = "running",
[RPROC_CRASHED] = "crashed",
[RPROC_DELETED] = "deleted",
+ [RPROC_ATTACHED] = "attached",
[RPROC_DETACHED] = "detached",
[RPROC_LAST] = "invalid",
};
int ret = 0;
if (sysfs_streq(buf, "start")) {
- if (rproc->state == RPROC_RUNNING)
+ if (rproc->state == RPROC_RUNNING ||
+ rproc->state == RPROC_ATTACHED)
return -EBUSY;
ret = rproc_boot(rproc);
if (ret)
dev_err(&rproc->dev, "Boot failed: %d\n", ret);
} else if (sysfs_streq(buf, "stop")) {
- if (rproc->state != RPROC_RUNNING)
+ if (rproc->state != RPROC_RUNNING &&
+ rproc->state != RPROC_ATTACHED)
return -EINVAL;
rproc_shutdown(rproc);
+ } else if (sysfs_streq(buf, "detach")) {
+ if (rproc->state != RPROC_ATTACHED)
+ return -EINVAL;
+
+ ret = rproc_detach(rproc);
} else {
dev_err(&rproc->dev, "Unrecognised option: %s\n", buf);
ret = -EINVAL;
return 0;
}
-static void *slim_rproc_da_to_va(struct rproc *rproc, u64 da, size_t len)
+static void *slim_rproc_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iomem)
{
struct st_slim_rproc *slim_rproc = rproc->priv;
void *va = NULL;
#define RELEASE_BOOT 1
#define MBOX_NB_VQ 2
-#define MBOX_NB_MBX 3
+#define MBOX_NB_MBX 4
#define STM32_SMC_RCC 0x82001000
#define STM32_SMC_REG_WRITE 0x1
#define STM32_MBX_VQ1 "vq1"
#define STM32_MBX_VQ1_ID 1
#define STM32_MBX_SHUTDOWN "shutdown"
+#define STM32_MBX_DETACH "detach"
#define RSC_TBL_SIZE 1024
return -EINVAL;
}
-static int stm32_rproc_elf_load_rsc_table(struct rproc *rproc,
- const struct firmware *fw)
-{
- if (rproc_elf_load_rsc_table(rproc, fw))
- dev_warn(&rproc->dev, "no resource table found for this firmware\n");
-
- return 0;
-}
-
-static int stm32_rproc_parse_memory_regions(struct rproc *rproc)
+static int stm32_rproc_prepare(struct rproc *rproc)
{
struct device *dev = rproc->dev.parent;
struct device_node *np = dev->of_node;
static int stm32_rproc_parse_fw(struct rproc *rproc, const struct firmware *fw)
{
- int ret = stm32_rproc_parse_memory_regions(rproc);
-
- if (ret)
- return ret;
+ if (rproc_elf_load_rsc_table(rproc, fw))
+ dev_warn(&rproc->dev, "no resource table found for this firmware\n");
- return stm32_rproc_elf_load_rsc_table(rproc, fw);
+ return 0;
}
static irqreturn_t stm32_rproc_wdg(int irq, void *data)
.tx_done = NULL,
.tx_tout = 500, /* 500 ms time out */
},
+ },
+ {
+ .name = STM32_MBX_DETACH,
+ .vq_id = -1,
+ .client = {
+ .tx_block = true,
+ .tx_done = NULL,
+ .tx_tout = 200, /* 200 ms time out to detach should be fair enough */
+ },
}
};
return stm32_rproc_set_hold_boot(rproc, true);
}
+static int stm32_rproc_detach(struct rproc *rproc)
+{
+ struct stm32_rproc *ddata = rproc->priv;
+ int err, dummy_data, idx;
+
+ /* Inform the remote processor of the detach */
+ idx = stm32_rproc_mbox_idx(rproc, STM32_MBX_DETACH);
+ if (idx >= 0 && ddata->mb[idx].chan) {
+ /* A dummy data is sent to allow to block on transmit */
+ err = mbox_send_message(ddata->mb[idx].chan,
+ &dummy_data);
+ if (err < 0)
+ dev_warn(&rproc->dev, "warning: remote FW detach without ack\n");
+ }
+
+ /* Allow remote processor to auto-reboot */
+ return stm32_rproc_set_hold_boot(rproc, false);
+}
+
static int stm32_rproc_stop(struct rproc *rproc)
{
struct stm32_rproc *ddata = rproc->priv;
}
}
+static int stm32_rproc_da_to_pa(struct rproc *rproc,
+ u64 da, phys_addr_t *pa)
+{
+ struct stm32_rproc *ddata = rproc->priv;
+ struct device *dev = rproc->dev.parent;
+ struct stm32_rproc_mem *p_mem;
+ unsigned int i;
+
+ for (i = 0; i < ddata->nb_rmems; i++) {
+ p_mem = &ddata->rmems[i];
+
+ if (da < p_mem->dev_addr ||
+ da >= p_mem->dev_addr + p_mem->size)
+ continue;
+
+ *pa = da - p_mem->dev_addr + p_mem->bus_addr;
+ dev_dbg(dev, "da %llx to pa %#x\n", da, *pa);
+
+ return 0;
+ }
+
+ dev_err(dev, "can't translate da %llx\n", da);
+
+ return -EINVAL;
+}
+
+static struct resource_table *
+stm32_rproc_get_loaded_rsc_table(struct rproc *rproc, size_t *table_sz)
+{
+ struct stm32_rproc *ddata = rproc->priv;
+ struct device *dev = rproc->dev.parent;
+ phys_addr_t rsc_pa;
+ u32 rsc_da;
+ int err;
+
+ /* The resource table has already been mapped, nothing to do */
+ if (ddata->rsc_va)
+ goto done;
+
+ err = regmap_read(ddata->rsctbl.map, ddata->rsctbl.reg, &rsc_da);
+ if (err) {
+ dev_err(dev, "failed to read rsc tbl addr\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (!rsc_da)
+ /* no rsc table */
+ return ERR_PTR(-ENOENT);
+
+ err = stm32_rproc_da_to_pa(rproc, rsc_da, &rsc_pa);
+ if (err)
+ return ERR_PTR(err);
+
+ ddata->rsc_va = devm_ioremap_wc(dev, rsc_pa, RSC_TBL_SIZE);
+ if (IS_ERR_OR_NULL(ddata->rsc_va)) {
+ dev_err(dev, "Unable to map memory region: %pa+%zx\n",
+ &rsc_pa, RSC_TBL_SIZE);
+ ddata->rsc_va = NULL;
+ return ERR_PTR(-ENOMEM);
+ }
+
+done:
+ /*
+ * Assuming the resource table fits in 1kB is fair.
+ * Notice for the detach, that this 1 kB memory area has to be reserved in the coprocessor
+ * firmware for the resource table. On detach, the remoteproc core re-initializes this
+ * entire area by overwriting it with the initial values stored in rproc->clean_table.
+ */
+ *table_sz = RSC_TBL_SIZE;
+ return (struct resource_table *)ddata->rsc_va;
+}
+
static const struct rproc_ops st_rproc_ops = {
+ .prepare = stm32_rproc_prepare,
.start = stm32_rproc_start,
.stop = stm32_rproc_stop,
.attach = stm32_rproc_attach,
+ .detach = stm32_rproc_detach,
.kick = stm32_rproc_kick,
.load = rproc_elf_load_segments,
.parse_fw = stm32_rproc_parse_fw,
.find_loaded_rsc_table = rproc_elf_find_loaded_rsc_table,
+ .get_loaded_rsc_table = stm32_rproc_get_loaded_rsc_table,
.sanity_check = rproc_elf_sanity_check,
.get_boot_addr = rproc_elf_get_boot_addr,
};
return regmap_read(ddata->m4_state.map, ddata->m4_state.reg, state);
}
-static int stm32_rproc_da_to_pa(struct platform_device *pdev,
- struct stm32_rproc *ddata,
- u64 da, phys_addr_t *pa)
-{
- struct device *dev = &pdev->dev;
- struct stm32_rproc_mem *p_mem;
- unsigned int i;
-
- for (i = 0; i < ddata->nb_rmems; i++) {
- p_mem = &ddata->rmems[i];
-
- if (da < p_mem->dev_addr ||
- da >= p_mem->dev_addr + p_mem->size)
- continue;
-
- *pa = da - p_mem->dev_addr + p_mem->bus_addr;
- dev_dbg(dev, "da %llx to pa %#x\n", da, *pa);
-
- return 0;
- }
-
- dev_err(dev, "can't translate da %llx\n", da);
-
- return -EINVAL;
-}
-
-static int stm32_rproc_get_loaded_rsc_table(struct platform_device *pdev,
- struct rproc *rproc,
- struct stm32_rproc *ddata)
-{
- struct device *dev = &pdev->dev;
- phys_addr_t rsc_pa;
- u32 rsc_da;
- int err;
-
- err = regmap_read(ddata->rsctbl.map, ddata->rsctbl.reg, &rsc_da);
- if (err) {
- dev_err(dev, "failed to read rsc tbl addr\n");
- return err;
- }
-
- if (!rsc_da)
- /* no rsc table */
- return 0;
-
- err = stm32_rproc_da_to_pa(pdev, ddata, rsc_da, &rsc_pa);
- if (err)
- return err;
-
- ddata->rsc_va = devm_ioremap_wc(dev, rsc_pa, RSC_TBL_SIZE);
- if (IS_ERR_OR_NULL(ddata->rsc_va)) {
- dev_err(dev, "Unable to map memory region: %pa+%zx\n",
- &rsc_pa, RSC_TBL_SIZE);
- ddata->rsc_va = NULL;
- return -ENOMEM;
- }
-
- /*
- * The resource table is already loaded in device memory, no need
- * to work with a cached table.
- */
- rproc->cached_table = NULL;
- /* Assuming the resource table fits in 1kB is fair */
- rproc->table_sz = RSC_TBL_SIZE;
- rproc->table_ptr = (struct resource_table *)ddata->rsc_va;
-
- return 0;
-}
-
static int stm32_rproc_probe(struct platform_device *pdev)
{
struct device *dev = &pdev->dev;
if (ret)
goto free_rproc;
- if (state == M4_STATE_CRUN) {
+ if (state == M4_STATE_CRUN)
rproc->state = RPROC_DETACHED;
- ret = stm32_rproc_parse_memory_regions(rproc);
- if (ret)
- goto free_resources;
-
- ret = stm32_rproc_get_loaded_rsc_table(pdev, rproc, ddata);
- if (ret)
- goto free_resources;
- }
-
rproc->has_iommu = false;
ddata->workqueue = create_workqueue(dev_name(dev));
if (!ddata->workqueue) {
* can be used either by the remoteproc core for loading (when using kernel
* remoteproc loader), or by any rpmsg bus drivers.
*/
-static void *k3_dsp_rproc_da_to_va(struct rproc *rproc, u64 da, size_t len)
+static void *k3_dsp_rproc_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iomem)
{
struct k3_dsp_rproc *kproc = rproc->priv;
void __iomem *va = NULL;
* present in a DSP or IPU device). The translated addresses can be used
* either by the remoteproc core for loading, or by any rpmsg bus drivers.
*/
-static void *k3_r5_rproc_da_to_va(struct rproc *rproc, u64 da, size_t len)
+static void *k3_r5_rproc_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iomem)
{
struct k3_r5_rproc *kproc = rproc->priv;
struct k3_r5_core *core = kproc->core;
return error;
}
-static void *wkup_m3_rproc_da_to_va(struct rproc *rproc, u64 da, size_t len)
+static void *wkup_m3_rproc_da_to_va(struct rproc *rproc, u64 da, size_t len, bool *is_iomem)
{
struct wkup_m3_rproc *wkupm3 = rproc->priv;
void *va = NULL;
- RCC reset controller in STM32 MCUs
- Allwinner SoCs
- ZTE's zx2967 family
+ - SiFive FU740 SoCs
config RESET_STM32MP157
bool "STM32MP157 Reset Driver" if COMPILE_TEST
dev_err(glink->dev,
"no intent found for channel %s intent %d",
channel->name, liid);
+ ret = -ENOENT;
goto advance_rx;
}
}
return __qcom_glink_send(channel, data, len, false);
}
+static int qcom_glink_sendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst)
+{
+ struct glink_channel *channel = to_glink_channel(ept);
+
+ return __qcom_glink_send(channel, data, len, true);
+}
+
+static int qcom_glink_trysendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst)
+{
+ struct glink_channel *channel = to_glink_channel(ept);
+
+ return __qcom_glink_send(channel, data, len, false);
+}
+
/*
* Finds the device_node for the glink child interested in this channel.
*/
static const struct rpmsg_endpoint_ops glink_endpoint_ops = {
.destroy_ept = qcom_glink_destroy_ept,
.send = qcom_glink_send,
+ .sendto = qcom_glink_sendto,
.trysend = qcom_glink_trysend,
+ .trysendto = qcom_glink_trysendto,
};
static void qcom_glink_rpdev_release(struct device *dev)
return __qcom_smd_send(qsept->qsch, data, len, false);
}
+static int qcom_smd_sendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst)
+{
+ struct qcom_smd_endpoint *qsept = to_smd_endpoint(ept);
+
+ return __qcom_smd_send(qsept->qsch, data, len, true);
+}
+
+static int qcom_smd_trysendto(struct rpmsg_endpoint *ept, void *data, int len, u32 dst)
+{
+ struct qcom_smd_endpoint *qsept = to_smd_endpoint(ept);
+
+ return __qcom_smd_send(qsept->qsch, data, len, false);
+}
+
static __poll_t qcom_smd_poll(struct rpmsg_endpoint *ept,
struct file *filp, poll_table *wait)
{
static const struct rpmsg_endpoint_ops qcom_smd_endpoint_ops = {
.destroy_ept = qcom_smd_destroy_ept,
.send = qcom_smd_send,
+ .sendto = qcom_smd_sendto,
.trysend = qcom_smd_trysend,
+ .trysendto = qcom_smd_trysendto,
.poll = qcom_smd_poll,
};
struct rpmsg_device *rpdev = eptdev->rpdev;
struct device *dev = &eptdev->dev;
+ if (eptdev->ept)
+ return -EBUSY;
+
get_device(dev);
ept = rpmsg_create_ept(rpdev, rpmsg_ept_cb, eptdev, eptdev->chinfo);
}
if (filp->f_flags & O_NONBLOCK)
- ret = rpmsg_trysend(eptdev->ept, kbuf, len);
+ ret = rpmsg_trysendto(eptdev->ept, kbuf, len, eptdev->chinfo.dst);
else
- ret = rpmsg_send(eptdev->ept, kbuf, len);
+ ret = rpmsg_sendto(eptdev->ept, kbuf, len, eptdev->chinfo.dst);
unlock_eptdev:
mutex_unlock(&eptdev->ept_lock);
},
};
-static int rpmsg_char_init(void)
+static int rpmsg_chrdev_init(void)
{
int ret;
return ret;
}
-postcore_initcall(rpmsg_char_init);
+postcore_initcall(rpmsg_chrdev_init);
static void rpmsg_chrdev_exit(void)
{
wake_up_interruptible(&vrp->sendq);
}
+/*
+ * Called to expose to user a /dev/rpmsg_ctrlX interface allowing to
+ * create endpoint-to-endpoint communication without associated RPMsg channel.
+ * The endpoints are rattached to the ctrldev RPMsg device.
+ */
+static struct rpmsg_device *rpmsg_virtio_add_ctrl_dev(struct virtio_device *vdev)
+{
+ struct virtproc_info *vrp = vdev->priv;
+ struct virtio_rpmsg_channel *vch;
+ struct rpmsg_device *rpdev_ctrl;
+ int err = 0;
+
+ vch = kzalloc(sizeof(*vch), GFP_KERNEL);
+ if (!vch)
+ return ERR_PTR(-ENOMEM);
+
+ /* Link the channel to the vrp */
+ vch->vrp = vrp;
+
+ /* Assign public information to the rpmsg_device */
+ rpdev_ctrl = &vch->rpdev;
+ rpdev_ctrl->ops = &virtio_rpmsg_ops;
+
+ rpdev_ctrl->dev.parent = &vrp->vdev->dev;
+ rpdev_ctrl->dev.release = virtio_rpmsg_release_device;
+ rpdev_ctrl->little_endian = virtio_is_little_endian(vrp->vdev);
+
+ err = rpmsg_chrdev_register_device(rpdev_ctrl);
+ if (err) {
+ kfree(vch);
+ return ERR_PTR(err);
+ }
+
+ return rpdev_ctrl;
+}
+
+static void rpmsg_virtio_del_ctrl_dev(struct rpmsg_device *rpdev_ctrl)
+{
+ if (!rpdev_ctrl)
+ return;
+ kfree(to_virtio_rpmsg_channel(rpdev_ctrl));
+}
+
static int rpmsg_probe(struct virtio_device *vdev)
{
vq_callback_t *vq_cbs[] = { rpmsg_recv_done, rpmsg_xmit_done };
static const char * const names[] = { "input", "output" };
struct virtqueue *vqs[2];
struct virtproc_info *vrp;
- struct virtio_rpmsg_channel *vch;
- struct rpmsg_device *rpdev_ns;
+ struct virtio_rpmsg_channel *vch = NULL;
+ struct rpmsg_device *rpdev_ns, *rpdev_ctrl;
void *bufs_va;
int err = 0, i;
size_t total_buf_space;
vdev->priv = vrp;
+ rpdev_ctrl = rpmsg_virtio_add_ctrl_dev(vdev);
+ if (IS_ERR(rpdev_ctrl)) {
+ err = PTR_ERR(rpdev_ctrl);
+ goto free_coherent;
+ }
+
/* if supported by the remote processor, enable the name service */
if (virtio_has_feature(vdev, VIRTIO_RPMSG_F_NS)) {
vch = kzalloc(sizeof(*vch), GFP_KERNEL);
if (!vch) {
err = -ENOMEM;
- goto free_coherent;
+ goto free_ctrldev;
}
/* Link the channel to our vrp */
err = rpmsg_ns_register_device(rpdev_ns);
if (err)
- goto free_coherent;
+ goto free_vch;
}
/*
return 0;
-free_coherent:
+free_vch:
kfree(vch);
+free_ctrldev:
+ rpmsg_virtio_del_ctrl_dev(rpdev_ctrl);
+free_coherent:
dma_free_coherent(vdev->dev.parent, total_buf_space,
bufs_va, vrp->bufs_dma);
vqs_del:
#define DASD_ECKD_CCW_RCD 0xFA
#define DASD_ECKD_CCW_DSO 0xF7
-/* Define Subssystem Function / Orders */
+/* Define Subsystem Function / Orders */
#define DSO_ORDER_RAS 0x81
/*
#define DASD_ECKD_PG_GROUPED 0x10
/*
- * Size that is reportet for large volumes in the old 16-bit no_cyl field
+ * Size that is reported for large volumes in the old 16-bit no_cyl field
*/
#define LV_COMPAT_CYL 0xFFFE
} __packed;
/*
- * Define Subsytem Operation - Release Allocated Space
+ * Define Subsystem Operation - Release Allocated Space
*/
struct dasd_dso_ras_data {
__u8 order;
struct dasd_ext_pool_sum eps;
u32 real_cyl;
- /* alias managemnet */
+ /* alias management */
struct dasd_uid uid;
struct alias_pav_group *pavgroup;
struct alias_lcu *lcu;
switch (action) {
case IO_SCH_ORPH_UNREG:
case IO_SCH_UNREG:
- if (!cdev)
- css_sch_device_unregister(sch);
+ css_sch_device_unregister(sch);
break;
case IO_SCH_ORPH_ATTACH:
case IO_SCH_UNREG_ATTACH:
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8 -*- */
/* NCR (or Symbios) 53c700 and 53c700-66 Driver
*
/* SPDX-License-Identifier: GPL-2.0 */
-/* -*- mode: c; c-basic-offset: 8 -*- */
/* Driver for 53c700 and 53c700-66 chips from NCR and Symbios
*
module_init(init_ch_module);
module_exit(exit_ch_module);
-
-/*
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
}
/* Get the read only section offset */
- ro_start = pci_vpd_find_tag(vpd_data, 0, vpd_size,
- PCI_VPD_LRDT_RO_DATA);
+ ro_start = pci_vpd_find_tag(vpd_data, vpd_size, PCI_VPD_LRDT_RO_DATA);
if (unlikely(ro_start < 0)) {
dev_err(dev, "%s: VPD Read-only data not found\n", __func__);
rc = -ENODEV;
};
static const struct proc_ops esas2r_proc_ops = {
+ .proc_lseek = default_llseek,
.proc_ioctl = esas2r_proc_ioctl,
#ifdef CONFIG_COMPAT
.proc_compat_ioctl = compat_ptr_ioctl,
return fcpio_status_str[status];
}
-static void fnic_cleanup_io(struct fnic *fnic, int exclude_id);
+static void fnic_cleanup_io(struct fnic *fnic);
static inline spinlock_t *fnic_io_lock_hash(struct fnic *fnic,
struct scsi_cmnd *sc)
atomic64_inc(&reset_stats->fw_reset_completions);
/* Clean up all outstanding io requests */
- fnic_cleanup_io(fnic, SCSI_NO_TAG);
+ fnic_cleanup_io(fnic);
atomic64_set(&fnic->fnic_stats.fw_stats.active_fw_reqs, 0);
atomic64_set(&fnic->fnic_stats.io_stats.active_ios, 0);
return wq_work_done;
}
-static void fnic_cleanup_io(struct fnic *fnic, int exclude_id)
+static bool fnic_cleanup_io_iter(struct scsi_cmnd *sc, void *data,
+ bool reserved)
{
- int i;
+ struct fnic *fnic = data;
struct fnic_io_req *io_req;
unsigned long flags = 0;
- struct scsi_cmnd *sc;
spinlock_t *io_lock;
unsigned long start_time = 0;
struct fnic_stats *fnic_stats = &fnic->fnic_stats;
- for (i = 0; i < fnic->fnic_max_tag_id; i++) {
- if (i == exclude_id)
- continue;
-
- io_lock = fnic_io_lock_tag(fnic, i);
- spin_lock_irqsave(io_lock, flags);
- sc = scsi_host_find_tag(fnic->lport->host, i);
- if (!sc) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
-
- io_req = (struct fnic_io_req *)CMD_SP(sc);
- if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
- !(CMD_FLAGS(sc) & FNIC_DEV_RST_DONE)) {
- /*
- * We will be here only when FW completes reset
- * without sending completions for outstanding ios.
- */
- CMD_FLAGS(sc) |= FNIC_DEV_RST_DONE;
- if (io_req && io_req->dr_done)
- complete(io_req->dr_done);
- else if (io_req && io_req->abts_done)
- complete(io_req->abts_done);
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- } else if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
- if (!io_req) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
-
- CMD_SP(sc) = NULL;
-
- spin_unlock_irqrestore(io_lock, flags);
+ io_lock = fnic_io_lock_tag(fnic, sc->request->tag);
+ spin_lock_irqsave(io_lock, flags);
+ io_req = (struct fnic_io_req *)CMD_SP(sc);
+ if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
+ !(CMD_FLAGS(sc) & FNIC_DEV_RST_DONE)) {
/*
- * If there is a scsi_cmnd associated with this io_req, then
- * free the corresponding state
+ * We will be here only when FW completes reset
+ * without sending completions for outstanding ios.
*/
- start_time = io_req->start_time;
- fnic_release_ioreq_buf(fnic, io_req, sc);
- mempool_free(io_req, fnic->io_req_pool);
+ CMD_FLAGS(sc) |= FNIC_DEV_RST_DONE;
+ if (io_req && io_req->dr_done)
+ complete(io_req->dr_done);
+ else if (io_req && io_req->abts_done)
+ complete(io_req->abts_done);
+ spin_unlock_irqrestore(io_lock, flags);
+ return true;
+ } else if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
+ spin_unlock_irqrestore(io_lock, flags);
+ return true;
+ }
+ if (!io_req) {
+ spin_unlock_irqrestore(io_lock, flags);
+ goto cleanup_scsi_cmd;
+ }
- sc->result = DID_TRANSPORT_DISRUPTED << 16;
- FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
- "%s: tag:0x%x : sc:0x%p duration = %lu DID_TRANSPORT_DISRUPTED\n",
- __func__, sc->request->tag, sc,
- (jiffies - start_time));
+ CMD_SP(sc) = NULL;
- if (atomic64_read(&fnic->io_cmpl_skip))
- atomic64_dec(&fnic->io_cmpl_skip);
- else
- atomic64_inc(&fnic_stats->io_stats.io_completions);
+ spin_unlock_irqrestore(io_lock, flags);
- /* Complete the command to SCSI */
- if (sc->scsi_done) {
- if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED))
- shost_printk(KERN_ERR, fnic->lport->host,
- "Calling done for IO not issued to fw: tag:0x%x sc:0x%p\n",
- sc->request->tag, sc);
+ /*
+ * If there is a scsi_cmnd associated with this io_req, then
+ * free the corresponding state
+ */
+ start_time = io_req->start_time;
+ fnic_release_ioreq_buf(fnic, io_req, sc);
+ mempool_free(io_req, fnic->io_req_pool);
- FNIC_TRACE(fnic_cleanup_io,
- sc->device->host->host_no, i, sc,
- jiffies_to_msecs(jiffies - start_time),
- 0, ((u64)sc->cmnd[0] << 32 |
- (u64)sc->cmnd[2] << 24 |
- (u64)sc->cmnd[3] << 16 |
- (u64)sc->cmnd[4] << 8 | sc->cmnd[5]),
- (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc)));
+cleanup_scsi_cmd:
+ sc->result = DID_TRANSPORT_DISRUPTED << 16;
+ FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+ "fnic_cleanup_io: tag:0x%x : sc:0x%p duration = %lu DID_TRANSPORT_DISRUPTED\n",
+ sc->request->tag, sc, (jiffies - start_time));
- sc->scsi_done(sc);
- }
+ if (atomic64_read(&fnic->io_cmpl_skip))
+ atomic64_dec(&fnic->io_cmpl_skip);
+ else
+ atomic64_inc(&fnic_stats->io_stats.io_completions);
+
+ /* Complete the command to SCSI */
+ if (sc->scsi_done) {
+ if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED))
+ shost_printk(KERN_ERR, fnic->lport->host,
+ "Calling done for IO not issued to fw: tag:0x%x sc:0x%p\n",
+ sc->request->tag, sc);
+
+ FNIC_TRACE(fnic_cleanup_io,
+ sc->device->host->host_no, sc->request->tag, sc,
+ jiffies_to_msecs(jiffies - start_time),
+ 0, ((u64)sc->cmnd[0] << 32 |
+ (u64)sc->cmnd[2] << 24 |
+ (u64)sc->cmnd[3] << 16 |
+ (u64)sc->cmnd[4] << 8 | sc->cmnd[5]),
+ (((u64)CMD_FLAGS(sc) << 32) | CMD_STATE(sc)));
+
+ sc->scsi_done(sc);
}
+ return true;
+}
+
+static void fnic_cleanup_io(struct fnic *fnic)
+{
+ scsi_host_busy_iter(fnic->lport->host,
+ fnic_cleanup_io_iter, fnic);
}
void fnic_wq_copy_cleanup_handler(struct vnic_wq_copy *wq,
return 0;
}
-static void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id)
+struct fnic_rport_abort_io_iter_data {
+ struct fnic *fnic;
+ u32 port_id;
+ int term_cnt;
+};
+
+static bool fnic_rport_abort_io_iter(struct scsi_cmnd *sc, void *data,
+ bool reserved)
{
- int tag;
- int abt_tag;
- int term_cnt = 0;
+ struct fnic_rport_abort_io_iter_data *iter_data = data;
+ struct fnic *fnic = iter_data->fnic;
+ int abt_tag = sc->request->tag;
struct fnic_io_req *io_req;
spinlock_t *io_lock;
unsigned long flags;
- struct scsi_cmnd *sc;
struct reset_stats *reset_stats = &fnic->fnic_stats.reset_stats;
struct terminate_stats *term_stats = &fnic->fnic_stats.term_stats;
struct scsi_lun fc_lun;
enum fnic_ioreq_state old_ioreq_state;
- FNIC_SCSI_DBG(KERN_DEBUG,
- fnic->lport->host,
- "fnic_rport_exch_reset called portid 0x%06x\n",
- port_id);
-
- if (fnic->in_remove)
- return;
-
- for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) {
- abt_tag = tag;
- io_lock = fnic_io_lock_tag(fnic, tag);
- spin_lock_irqsave(io_lock, flags);
- sc = scsi_host_find_tag(fnic->lport->host, tag);
- if (!sc) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
+ io_lock = fnic_io_lock_tag(fnic, abt_tag);
+ spin_lock_irqsave(io_lock, flags);
- io_req = (struct fnic_io_req *)CMD_SP(sc);
+ io_req = (struct fnic_io_req *)CMD_SP(sc);
- if (!io_req || io_req->port_id != port_id) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
+ if (!io_req || io_req->port_id != iter_data->port_id) {
+ spin_unlock_irqrestore(io_lock, flags);
+ return true;
+ }
- if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
- (!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) {
- FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+ if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
+ (!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) {
+ FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
"fnic_rport_exch_reset dev rst not pending sc 0x%p\n",
sc);
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
+ spin_unlock_irqrestore(io_lock, flags);
+ return true;
+ }
- /*
- * Found IO that is still pending with firmware and
- * belongs to rport that went away
- */
- if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
- if (io_req->abts_done) {
- shost_printk(KERN_ERR, fnic->lport->host,
+ /*
+ * Found IO that is still pending with firmware and
+ * belongs to rport that went away
+ */
+ if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
+ spin_unlock_irqrestore(io_lock, flags);
+ return true;
+ }
+ if (io_req->abts_done) {
+ shost_printk(KERN_ERR, fnic->lport->host,
"fnic_rport_exch_reset: io_req->abts_done is set "
"state is %s\n",
fnic_ioreq_state_to_str(CMD_STATE(sc)));
- }
+ }
- if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED)) {
- shost_printk(KERN_ERR, fnic->lport->host,
- "rport_exch_reset "
- "IO not yet issued %p tag 0x%x flags "
- "%x state %d\n",
- sc, tag, CMD_FLAGS(sc), CMD_STATE(sc));
- }
- old_ioreq_state = CMD_STATE(sc);
- CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
- CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
- if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
- atomic64_inc(&reset_stats->device_reset_terminates);
- abt_tag = (tag | FNIC_TAG_DEV_RST);
- FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
- "fnic_rport_exch_reset dev rst sc 0x%p\n",
- sc);
- }
+ if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED)) {
+ shost_printk(KERN_ERR, fnic->lport->host,
+ "rport_exch_reset "
+ "IO not yet issued %p tag 0x%x flags "
+ "%x state %d\n",
+ sc, abt_tag, CMD_FLAGS(sc), CMD_STATE(sc));
+ }
+ old_ioreq_state = CMD_STATE(sc);
+ CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
+ CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
+ if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
+ atomic64_inc(&reset_stats->device_reset_terminates);
+ abt_tag |= FNIC_TAG_DEV_RST;
+ }
+ FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+ "fnic_rport_exch_reset dev rst sc 0x%p\n", sc);
+ BUG_ON(io_req->abts_done);
- BUG_ON(io_req->abts_done);
+ FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+ "fnic_rport_reset_exch: Issuing abts\n");
- FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
- "fnic_rport_reset_exch: Issuing abts\n");
+ spin_unlock_irqrestore(io_lock, flags);
+ /* Now queue the abort command to firmware */
+ int_to_scsilun(sc->device->lun, &fc_lun);
+
+ if (fnic_queue_abort_io_req(fnic, abt_tag,
+ FCPIO_ITMF_ABT_TASK_TERM,
+ fc_lun.scsi_lun, io_req)) {
+ /*
+ * Revert the cmd state back to old state, if
+ * it hasn't changed in between. This cmd will get
+ * aborted later by scsi_eh, or cleaned up during
+ * lun reset
+ */
+ spin_lock_irqsave(io_lock, flags);
+ if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
+ CMD_STATE(sc) = old_ioreq_state;
spin_unlock_irqrestore(io_lock, flags);
+ } else {
+ spin_lock_irqsave(io_lock, flags);
+ if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET)
+ CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
+ else
+ CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED;
+ spin_unlock_irqrestore(io_lock, flags);
+ atomic64_inc(&term_stats->terminates);
+ iter_data->term_cnt++;
+ }
+ return true;
+}
- /* Now queue the abort command to firmware */
- int_to_scsilun(sc->device->lun, &fc_lun);
+static void fnic_rport_exch_reset(struct fnic *fnic, u32 port_id)
+{
+ struct terminate_stats *term_stats = &fnic->fnic_stats.term_stats;
+ struct fnic_rport_abort_io_iter_data iter_data = {
+ .fnic = fnic,
+ .port_id = port_id,
+ .term_cnt = 0,
+ };
- if (fnic_queue_abort_io_req(fnic, abt_tag,
- FCPIO_ITMF_ABT_TASK_TERM,
- fc_lun.scsi_lun, io_req)) {
- /*
- * Revert the cmd state back to old state, if
- * it hasn't changed in between. This cmd will get
- * aborted later by scsi_eh, or cleaned up during
- * lun reset
- */
- spin_lock_irqsave(io_lock, flags);
- if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
- CMD_STATE(sc) = old_ioreq_state;
- spin_unlock_irqrestore(io_lock, flags);
- } else {
- spin_lock_irqsave(io_lock, flags);
- if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET)
- CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
- else
- CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED;
- spin_unlock_irqrestore(io_lock, flags);
- atomic64_inc(&term_stats->terminates);
- term_cnt++;
- }
- }
- if (term_cnt > atomic64_read(&term_stats->max_terminates))
- atomic64_set(&term_stats->max_terminates, term_cnt);
+ FNIC_SCSI_DBG(KERN_DEBUG,
+ fnic->lport->host,
+ "fnic_rport_exch_reset called portid 0x%06x\n",
+ port_id);
+
+ if (fnic->in_remove)
+ return;
+
+ scsi_host_busy_iter(fnic->lport->host, fnic_rport_abort_io_iter,
+ &iter_data);
+ if (iter_data.term_cnt > atomic64_read(&term_stats->max_terminates))
+ atomic64_set(&term_stats->max_terminates, iter_data.term_cnt);
}
void fnic_terminate_rport_io(struct fc_rport *rport)
{
- int tag;
- int abt_tag;
- int term_cnt = 0;
- struct fnic_io_req *io_req;
- spinlock_t *io_lock;
- unsigned long flags;
- struct scsi_cmnd *sc;
- struct scsi_lun fc_lun;
struct fc_rport_libfc_priv *rdata;
struct fc_lport *lport;
struct fnic *fnic;
- struct fc_rport *cmd_rport;
- struct reset_stats *reset_stats;
- struct terminate_stats *term_stats;
- enum fnic_ioreq_state old_ioreq_state;
if (!rport) {
printk(KERN_ERR "fnic_terminate_rport_io: rport is NULL\n");
if (fnic->in_remove)
return;
- reset_stats = &fnic->fnic_stats.reset_stats;
- term_stats = &fnic->fnic_stats.term_stats;
-
- for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) {
- abt_tag = tag;
- io_lock = fnic_io_lock_tag(fnic, tag);
- spin_lock_irqsave(io_lock, flags);
- sc = scsi_host_find_tag(fnic->lport->host, tag);
- if (!sc) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
-
- io_req = (struct fnic_io_req *)CMD_SP(sc);
- if (!io_req) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
-
- cmd_rport = starget_to_rport(scsi_target(sc->device));
- if (rport != cmd_rport) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
-
- if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
- (!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) {
- FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
- "fnic_terminate_rport_io dev rst not pending sc 0x%p\n",
- sc);
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
- /*
- * Found IO that is still pending with firmware and
- * belongs to rport that went away
- */
- if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
- if (io_req->abts_done) {
- shost_printk(KERN_ERR, fnic->lport->host,
- "fnic_terminate_rport_io: io_req->abts_done is set "
- "state is %s\n",
- fnic_ioreq_state_to_str(CMD_STATE(sc)));
- }
- if (!(CMD_FLAGS(sc) & FNIC_IO_ISSUED)) {
- FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
- "fnic_terminate_rport_io "
- "IO not yet issued %p tag 0x%x flags "
- "%x state %d\n",
- sc, tag, CMD_FLAGS(sc), CMD_STATE(sc));
- }
- old_ioreq_state = CMD_STATE(sc);
- CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
- CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
- if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
- atomic64_inc(&reset_stats->device_reset_terminates);
- abt_tag = (tag | FNIC_TAG_DEV_RST);
- FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
- "fnic_terminate_rport_io dev rst sc 0x%p\n", sc);
- }
-
- BUG_ON(io_req->abts_done);
-
- FNIC_SCSI_DBG(KERN_DEBUG,
- fnic->lport->host,
- "fnic_terminate_rport_io: Issuing abts\n");
-
- spin_unlock_irqrestore(io_lock, flags);
-
- /* Now queue the abort command to firmware */
- int_to_scsilun(sc->device->lun, &fc_lun);
-
- if (fnic_queue_abort_io_req(fnic, abt_tag,
- FCPIO_ITMF_ABT_TASK_TERM,
- fc_lun.scsi_lun, io_req)) {
- /*
- * Revert the cmd state back to old state, if
- * it hasn't changed in between. This cmd will get
- * aborted later by scsi_eh, or cleaned up during
- * lun reset
- */
- spin_lock_irqsave(io_lock, flags);
- if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
- CMD_STATE(sc) = old_ioreq_state;
- spin_unlock_irqrestore(io_lock, flags);
- } else {
- spin_lock_irqsave(io_lock, flags);
- if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET)
- CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
- else
- CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED;
- spin_unlock_irqrestore(io_lock, flags);
- atomic64_inc(&term_stats->terminates);
- term_cnt++;
- }
- }
- if (term_cnt > atomic64_read(&term_stats->max_terminates))
- atomic64_set(&term_stats->max_terminates, term_cnt);
-
+ fnic_rport_exch_reset(fnic, rport->port_id);
}
/*
return ret;
}
-/*
- * Clean up any pending aborts on the lun
- * For each outstanding IO on this lun, whose abort is not completed by fw,
- * issue a local abort. Wait for abort to complete. Return 0 if all commands
- * successfully aborted, 1 otherwise
- */
-static int fnic_clean_pending_aborts(struct fnic *fnic,
- struct scsi_cmnd *lr_sc,
- bool new_sc)
+struct fnic_pending_aborts_iter_data {
+ struct fnic *fnic;
+ struct scsi_cmnd *lr_sc;
+ struct scsi_device *lun_dev;
+ int ret;
+};
+static bool fnic_pending_aborts_iter(struct scsi_cmnd *sc,
+ void *data, bool reserved)
{
- int tag, abt_tag;
+ struct fnic_pending_aborts_iter_data *iter_data = data;
+ struct fnic *fnic = iter_data->fnic;
+ struct scsi_device *lun_dev = iter_data->lun_dev;
+ int abt_tag = sc->request->tag;
struct fnic_io_req *io_req;
spinlock_t *io_lock;
unsigned long flags;
- int ret = 0;
- struct scsi_cmnd *sc;
struct scsi_lun fc_lun;
- struct scsi_device *lun_dev = lr_sc->device;
DECLARE_COMPLETION_ONSTACK(tm_done);
enum fnic_ioreq_state old_ioreq_state;
- for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) {
- io_lock = fnic_io_lock_tag(fnic, tag);
- spin_lock_irqsave(io_lock, flags);
- sc = scsi_host_find_tag(fnic->lport->host, tag);
- /*
- * ignore this lun reset cmd if issued using new SC
- * or cmds that do not belong to this lun
- */
- if (!sc || ((sc == lr_sc) && new_sc) || sc->device != lun_dev) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
-
- io_req = (struct fnic_io_req *)CMD_SP(sc);
+ if (sc == iter_data->lr_sc || sc->device != lun_dev)
+ return true;
+ if (reserved)
+ return true;
- if (!io_req || sc->device != lun_dev) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
-
- /*
- * Found IO that is still pending with firmware and
- * belongs to the LUN that we are resetting
- */
- FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
- "Found IO in %s on lun\n",
- fnic_ioreq_state_to_str(CMD_STATE(sc)));
-
- if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
- if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
- (!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) {
- FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
- "%s dev rst not pending sc 0x%p\n", __func__,
- sc);
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
+ io_lock = fnic_io_lock_tag(fnic, abt_tag);
+ spin_lock_irqsave(io_lock, flags);
+ io_req = (struct fnic_io_req *)CMD_SP(sc);
+ if (!io_req) {
+ spin_unlock_irqrestore(io_lock, flags);
+ return true;
+ }
- if (io_req->abts_done)
- shost_printk(KERN_ERR, fnic->lport->host,
- "%s: io_req->abts_done is set state is %s\n",
- __func__, fnic_ioreq_state_to_str(CMD_STATE(sc)));
- old_ioreq_state = CMD_STATE(sc);
- /*
- * Any pending IO issued prior to reset is expected to be
- * in abts pending state, if not we need to set
- * FNIC_IOREQ_ABTS_PENDING to indicate the IO is abort pending.
- * When IO is completed, the IO will be handed over and
- * handled in this function.
- */
- CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
+ /*
+ * Found IO that is still pending with firmware and
+ * belongs to the LUN that we are resetting
+ */
+ FNIC_SCSI_DBG(KERN_DEBUG, fnic->lport->host,
+ "Found IO in %s on lun\n",
+ fnic_ioreq_state_to_str(CMD_STATE(sc)));
- BUG_ON(io_req->abts_done);
+ if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING) {
+ spin_unlock_irqrestore(io_lock, flags);
+ return true;
+ }
+ if ((CMD_FLAGS(sc) & FNIC_DEVICE_RESET) &&
+ (!(CMD_FLAGS(sc) & FNIC_DEV_RST_ISSUED))) {
+ FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
+ "%s dev rst not pending sc 0x%p\n", __func__,
+ sc);
+ spin_unlock_irqrestore(io_lock, flags);
+ return true;
+ }
- abt_tag = tag;
- if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
- abt_tag |= FNIC_TAG_DEV_RST;
- FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
- "%s: dev rst sc 0x%p\n", __func__, sc);
- }
+ if (io_req->abts_done)
+ shost_printk(KERN_ERR, fnic->lport->host,
+ "%s: io_req->abts_done is set state is %s\n",
+ __func__, fnic_ioreq_state_to_str(CMD_STATE(sc)));
+ old_ioreq_state = CMD_STATE(sc);
+ /*
+ * Any pending IO issued prior to reset is expected to be
+ * in abts pending state, if not we need to set
+ * FNIC_IOREQ_ABTS_PENDING to indicate the IO is abort pending.
+ * When IO is completed, the IO will be handed over and
+ * handled in this function.
+ */
+ CMD_STATE(sc) = FNIC_IOREQ_ABTS_PENDING;
- CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
- io_req->abts_done = &tm_done;
- spin_unlock_irqrestore(io_lock, flags);
+ BUG_ON(io_req->abts_done);
- /* Now queue the abort command to firmware */
- int_to_scsilun(sc->device->lun, &fc_lun);
+ if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET) {
+ abt_tag |= FNIC_TAG_DEV_RST;
+ FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
+ "%s: dev rst sc 0x%p\n", __func__, sc);
+ }
- if (fnic_queue_abort_io_req(fnic, abt_tag,
- FCPIO_ITMF_ABT_TASK_TERM,
- fc_lun.scsi_lun, io_req)) {
- spin_lock_irqsave(io_lock, flags);
- io_req = (struct fnic_io_req *)CMD_SP(sc);
- if (io_req)
- io_req->abts_done = NULL;
- if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
- CMD_STATE(sc) = old_ioreq_state;
- spin_unlock_irqrestore(io_lock, flags);
- ret = 1;
- goto clean_pending_aborts_end;
- } else {
- spin_lock_irqsave(io_lock, flags);
- if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET)
- CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
- spin_unlock_irqrestore(io_lock, flags);
- }
- CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED;
+ CMD_ABTS_STATUS(sc) = FCPIO_INVALID_CODE;
+ io_req->abts_done = &tm_done;
+ spin_unlock_irqrestore(io_lock, flags);
- wait_for_completion_timeout(&tm_done,
- msecs_to_jiffies
- (fnic->config.ed_tov));
+ /* Now queue the abort command to firmware */
+ int_to_scsilun(sc->device->lun, &fc_lun);
- /* Recheck cmd state to check if it is now aborted */
+ if (fnic_queue_abort_io_req(fnic, abt_tag,
+ FCPIO_ITMF_ABT_TASK_TERM,
+ fc_lun.scsi_lun, io_req)) {
spin_lock_irqsave(io_lock, flags);
io_req = (struct fnic_io_req *)CMD_SP(sc);
- if (!io_req) {
- spin_unlock_irqrestore(io_lock, flags);
- CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_REQ_NULL;
- continue;
- }
+ if (io_req)
+ io_req->abts_done = NULL;
+ if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
+ CMD_STATE(sc) = old_ioreq_state;
+ spin_unlock_irqrestore(io_lock, flags);
+ iter_data->ret = FAILED;
+ return false;
+ } else {
+ spin_lock_irqsave(io_lock, flags);
+ if (CMD_FLAGS(sc) & FNIC_DEVICE_RESET)
+ CMD_FLAGS(sc) |= FNIC_DEV_RST_TERM_ISSUED;
+ spin_unlock_irqrestore(io_lock, flags);
+ }
+ CMD_FLAGS(sc) |= FNIC_IO_INTERNAL_TERM_ISSUED;
- io_req->abts_done = NULL;
+ wait_for_completion_timeout(&tm_done, msecs_to_jiffies
+ (fnic->config.ed_tov));
- /* if abort is still pending with fw, fail */
- if (CMD_ABTS_STATUS(sc) == FCPIO_INVALID_CODE) {
- spin_unlock_irqrestore(io_lock, flags);
- CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_DONE;
- ret = 1;
- goto clean_pending_aborts_end;
- }
- CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE;
+ /* Recheck cmd state to check if it is now aborted */
+ spin_lock_irqsave(io_lock, flags);
+ io_req = (struct fnic_io_req *)CMD_SP(sc);
+ if (!io_req) {
+ spin_unlock_irqrestore(io_lock, flags);
+ CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_REQ_NULL;
+ return true;
+ }
- /* original sc used for lr is handled by dev reset code */
- if (sc != lr_sc)
- CMD_SP(sc) = NULL;
+ io_req->abts_done = NULL;
+
+ /* if abort is still pending with fw, fail */
+ if (CMD_ABTS_STATUS(sc) == FCPIO_INVALID_CODE) {
spin_unlock_irqrestore(io_lock, flags);
+ CMD_FLAGS(sc) |= FNIC_IO_ABT_TERM_DONE;
+ iter_data->ret = FAILED;
+ return false;
+ }
+ CMD_STATE(sc) = FNIC_IOREQ_ABTS_COMPLETE;
- /* original sc used for lr is handled by dev reset code */
- if (sc != lr_sc) {
- fnic_release_ioreq_buf(fnic, io_req, sc);
- mempool_free(io_req, fnic->io_req_pool);
- }
+ /* original sc used for lr is handled by dev reset code */
+ if (sc != iter_data->lr_sc)
+ CMD_SP(sc) = NULL;
+ spin_unlock_irqrestore(io_lock, flags);
- /*
- * Any IO is returned during reset, it needs to call scsi_done
- * to return the scsi_cmnd to upper layer.
- */
- if (sc->scsi_done) {
- /* Set result to let upper SCSI layer retry */
- sc->result = DID_RESET << 16;
- sc->scsi_done(sc);
- }
+ /* original sc used for lr is handled by dev reset code */
+ if (sc != iter_data->lr_sc) {
+ fnic_release_ioreq_buf(fnic, io_req, sc);
+ mempool_free(io_req, fnic->io_req_pool);
}
+ /*
+ * Any IO is returned during reset, it needs to call scsi_done
+ * to return the scsi_cmnd to upper layer.
+ */
+ if (sc->scsi_done) {
+ /* Set result to let upper SCSI layer retry */
+ sc->result = DID_RESET << 16;
+ sc->scsi_done(sc);
+ }
+ return true;
+}
+
+/*
+ * Clean up any pending aborts on the lun
+ * For each outstanding IO on this lun, whose abort is not completed by fw,
+ * issue a local abort. Wait for abort to complete. Return 0 if all commands
+ * successfully aborted, 1 otherwise
+ */
+static int fnic_clean_pending_aborts(struct fnic *fnic,
+ struct scsi_cmnd *lr_sc,
+ bool new_sc)
+
+{
+ int ret = SUCCESS;
+ struct fnic_pending_aborts_iter_data iter_data = {
+ .fnic = fnic,
+ .lun_dev = lr_sc->device,
+ .ret = SUCCESS,
+ };
+
+ if (new_sc)
+ iter_data.lr_sc = lr_sc;
+
+ scsi_host_busy_iter(fnic->lport->host,
+ fnic_pending_aborts_iter, &iter_data);
+ if (iter_data.ret == FAILED) {
+ ret = iter_data.ret;
+ goto clean_pending_aborts_end;
+ }
schedule_timeout(msecs_to_jiffies(2 * fnic->config.ed_tov));
/* walk again to check, if IOs are still pending in fw */
}
-/*
- * fnic_is_abts_pending() is a helper function that
- * walks through tag map to check if there is any IOs pending,if there is one,
- * then it returns 1 (true), otherwise 0 (false)
- * if @lr_sc is non NULL, then it checks IOs specific to particular LUN,
- * otherwise, it checks for all IOs.
- */
-int fnic_is_abts_pending(struct fnic *fnic, struct scsi_cmnd *lr_sc)
+static bool fnic_abts_pending_iter(struct scsi_cmnd *sc, void *data,
+ bool reserved)
{
- int tag;
+ struct fnic_pending_aborts_iter_data *iter_data = data;
+ struct fnic *fnic = iter_data->fnic;
+ int cmd_state;
struct fnic_io_req *io_req;
spinlock_t *io_lock;
unsigned long flags;
- int ret = 0;
- struct scsi_cmnd *sc;
- struct scsi_device *lun_dev = NULL;
- if (lr_sc)
- lun_dev = lr_sc->device;
+ /*
+ * ignore this lun reset cmd or cmds that do not belong to
+ * this lun
+ */
+ if (iter_data->lr_sc && sc == iter_data->lr_sc)
+ return true;
+ if (iter_data->lun_dev && sc->device != iter_data->lun_dev)
+ return true;
- /* walk again to check, if IOs are still pending in fw */
- for (tag = 0; tag < fnic->fnic_max_tag_id; tag++) {
- sc = scsi_host_find_tag(fnic->lport->host, tag);
- /*
- * ignore this lun reset cmd or cmds that do not belong to
- * this lun
- */
- if (!sc || (lr_sc && (sc->device != lun_dev || sc == lr_sc)))
- continue;
+ io_lock = fnic_io_lock_hash(fnic, sc);
+ spin_lock_irqsave(io_lock, flags);
- io_lock = fnic_io_lock_hash(fnic, sc);
- spin_lock_irqsave(io_lock, flags);
+ io_req = (struct fnic_io_req *)CMD_SP(sc);
+ if (!io_req) {
+ spin_unlock_irqrestore(io_lock, flags);
+ return true;
+ }
- io_req = (struct fnic_io_req *)CMD_SP(sc);
+ /*
+ * Found IO that is still pending with firmware and
+ * belongs to the LUN that we are resetting
+ */
+ FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
+ "Found IO in %s on lun\n",
+ fnic_ioreq_state_to_str(CMD_STATE(sc)));
+ cmd_state = CMD_STATE(sc);
+ spin_unlock_irqrestore(io_lock, flags);
+ if (cmd_state == FNIC_IOREQ_ABTS_PENDING)
+ iter_data->ret = 1;
- if (!io_req || sc->device != lun_dev) {
- spin_unlock_irqrestore(io_lock, flags);
- continue;
- }
+ return iter_data->ret ? false : true;
+}
- /*
- * Found IO that is still pending with firmware and
- * belongs to the LUN that we are resetting
- */
- FNIC_SCSI_DBG(KERN_INFO, fnic->lport->host,
- "Found IO in %s on lun\n",
- fnic_ioreq_state_to_str(CMD_STATE(sc)));
+/*
+ * fnic_is_abts_pending() is a helper function that
+ * walks through tag map to check if there is any IOs pending,if there is one,
+ * then it returns 1 (true), otherwise 0 (false)
+ * if @lr_sc is non NULL, then it checks IOs specific to particular LUN,
+ * otherwise, it checks for all IOs.
+ */
+int fnic_is_abts_pending(struct fnic *fnic, struct scsi_cmnd *lr_sc)
+{
+ struct fnic_pending_aborts_iter_data iter_data = {
+ .fnic = fnic,
+ .lun_dev = NULL,
+ .ret = 0,
+ };
- if (CMD_STATE(sc) == FNIC_IOREQ_ABTS_PENDING)
- ret = 1;
- spin_unlock_irqrestore(io_lock, flags);
+ if (lr_sc) {
+ iter_data.lun_dev = lr_sc->device;
+ iter_data.lr_sc = lr_sc;
}
- return ret;
+ /* walk again to check, if IOs are still pending in fw */
+ scsi_host_busy_iter(fnic->lport->host,
+ fnic_abts_pending_iter, &iter_data);
+
+ return iter_data.ret;
}
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("IBM ServeRAID Adapter Driver " IPS_VER_STRING);
MODULE_VERSION(IPS_VER_STRING);
-
-
-/*
- * Overrides for Emacs so that we almost follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only. This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-indent-level: 2
- * c-brace-imaginary-offset: 0
- * c-brace-offset: -2
- * c-argdecl-indent: 2
- * c-label-offset: -2
- * c-continued-statement-offset: 2
- * c-continued-brace-offset: 0
- * indent-tabs-mode: nil
- * tab-width: 8
- * End:
- */
IPS_COMPAT_TAMPA, \
IPS_COMPAT_KEYWEST \
}
-
-
-/*
- * Overrides for Emacs so that we almost follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only. This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-indent-level: 2
- * c-brace-imaginary-offset: 0
- * c-brace-offset: -2
- * c-argdecl-indent: 2
- * c-label-offset: -2
- * c-continued-statement-offset: 2
- * c-continued-brace-offset: 0
- * indent-tabs-mode: nil
- * tab-width: 8
- * End:
- */
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8 -*- */
/* PARISC LASI driver for the 53c700 chip
*
INIT_LIST_HEAD(&head);
list_add_tail(&head, &piocbq->list);
- ct_req = (struct lpfc_sli_ct_request *)bdeBuf1;
+ ct_req = (struct lpfc_sli_ct_request *)bdeBuf1->virt;
evt_req_id = ct_req->FsType;
cmd = ct_req->CommandResponse.bits.CmdRsp;
if (mb->un.varDmp.word_cnt == 0)
break;
- i = mb->un.varDmp.word_cnt * sizeof(uint32_t);
- if (offset + i > DMP_VPD_SIZE)
- i = DMP_VPD_SIZE - offset;
+ if (mb->un.varDmp.word_cnt > DMP_VPD_SIZE - offset)
+ mb->un.varDmp.word_cnt = DMP_VPD_SIZE - offset;
lpfc_sli_pcimem_bcopy(((uint8_t *)mb) + DMP_RSP_OFFSET,
- lpfc_vpd_data + offset, i);
- offset += i;
- } while (offset < DMP_VPD_SIZE);
+ lpfc_vpd_data + offset,
+ mb->un.varDmp.word_cnt);
+ offset += mb->un.varDmp.word_cnt;
+ } while (mb->un.varDmp.word_cnt && offset < DMP_VPD_SIZE);
lpfc_parse_vpd(phba, lpfc_vpd_data, offset);
lpfc_ctx_cmd ctx_cmd)
{
struct lpfc_io_buf *lpfc_cmd;
+ IOCB_t *icmd = NULL;
int rc = 1;
if (!iocbq || iocbq->vport != vport)
return rc;
- if (!(iocbq->iocb_flag & LPFC_IO_FCP) ||
- !(iocbq->iocb_flag & LPFC_IO_ON_TXCMPLQ))
+ if (!(iocbq->iocb_flag & LPFC_IO_FCP) ||
+ !(iocbq->iocb_flag & LPFC_IO_ON_TXCMPLQ) ||
+ iocbq->iocb_flag & LPFC_DRIVER_ABORTED)
+ return rc;
+
+ icmd = &iocbq->iocb;
+ if (icmd->ulpCommand == CMD_ABORT_XRI_CN ||
+ icmd->ulpCommand == CMD_CLOSE_XRI_CN)
return rc;
lpfc_cmd = container_of(iocbq, struct lpfc_io_buf, cur_iocbq);
LPFC_MBOXQ_t *pmb = NULL;
MAILBOX_t *mb;
uint32_t offset = 0;
- int i, rc;
+ int rc;
if (!rgn23_data)
return 0;
if (mb->un.varDmp.word_cnt == 0)
break;
- i = mb->un.varDmp.word_cnt * sizeof(uint32_t);
- if (offset + i > DMP_RGN23_SIZE)
- i = DMP_RGN23_SIZE - offset;
+ if (mb->un.varDmp.word_cnt > DMP_RGN23_SIZE - offset)
+ mb->un.varDmp.word_cnt = DMP_RGN23_SIZE - offset;
+
lpfc_sli_pcimem_bcopy(((uint8_t *)mb) + DMP_RSP_OFFSET,
- rgn23_data + offset, i);
- offset += i;
- } while (offset < DMP_RGN23_SIZE);
+ rgn23_data + offset,
+ mb->un.varDmp.word_cnt);
+ offset += mb->un.varDmp.word_cnt;
+ } while (mb->un.varDmp.word_cnt && offset < DMP_RGN23_SIZE);
mempool_free(pmb, phba->mbox_mem_pool);
return offset;
} __attribute__ ((packed)) mbox_sgl32;
#endif // _MRAID_MBOX_DEFS_H_
-
-/* vim: set ts=8 sw=8 tw=78: */
};
#endif // _MEGA_COMMON_H_
-
-// vim: set ts=8 sw=8 tw=78:
*/
module_init(megaraid_init);
module_exit(megaraid_exit);
-
-/* vim: set ts=8 sw=8 tw=78 ai si: */
#define WROUTDOOR(rdev, value) writel(value, (rdev)->baseaddr + 0x2C)
#endif // _MEGARAID_H_
-
-// vim: set ts=8 sw=8 tw=78:
MODULE_FIRMWARE("qlogic/1280.bin");
MODULE_FIRMWARE("qlogic/12160.bin");
MODULE_VERSION(QLA1280_VERSION);
-
-/*
- * Overrides for Emacs so that we almost follow Linus's tabbing style.
- * Emacs will notice this stuff at the end of the file and automatically
- * adjust the settings for this buffer only. This must remain at the end
- * of the file.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-basic-offset: 8
- * tab-width: 8
- * End:
- */
{
struct qla_work_evt *e;
+ if (vha->host->active_mode == MODE_TARGET)
+ return QLA_FUNCTION_FAILED;
+
e = qla2x00_alloc_work(vha, QLA_EVT_PRLI);
if (!e)
return QLA_FUNCTION_FAILED;
.eh_timed_out = fc_eh_timed_out,
.eh_abort_handler = qla2xxx_eh_abort,
+ .eh_should_retry_cmd = fc_eh_should_retry_cmd,
.eh_device_reset_handler = qla2xxx_eh_device_reset,
.eh_target_reset_handler = qla2xxx_eh_target_reset,
.eh_bus_reset_handler = qla2xxx_eh_bus_reset,
*/
#define SDEBUG_CANQUEUE_WORDS 3 /* a WORD is bits in a long */
#define SDEBUG_CANQUEUE (SDEBUG_CANQUEUE_WORDS * BITS_PER_LONG)
-#define DEF_CMD_PER_LUN 255
+#define DEF_CMD_PER_LUN SDEBUG_CANQUEUE
/* UA - Unit Attention; SA - Service Action; SSU - Start Stop Unit */
#define F_D_IN 1 /* Data-in command (e.g. READ) */
MODULE_PARM_DESC(lbpws, "enable LBP, support WRITE SAME(16) with UNMAP bit (def=0)");
MODULE_PARM_DESC(lbpws10, "enable LBP, support WRITE SAME(10) with UNMAP bit (def=0)");
MODULE_PARM_DESC(lowest_aligned, "lowest aligned lba (def=0)");
-MODULE_PARM_DESC(max_luns, "number of LUNs per target to simulate(def=1)");
MODULE_PARM_DESC(lun_format, "LUN format: 0->peripheral (def); 1 --> flat address method");
+MODULE_PARM_DESC(max_luns, "number of LUNs per target to simulate(def=1)");
MODULE_PARM_DESC(max_queue, "max number of queued commands (1 to max(def))");
MODULE_PARM_DESC(medium_error_count, "count of sectors to return follow on MEDIUM error");
MODULE_PARM_DESC(medium_error_start, "starting sector number to return MEDIUM error");
MODULE_PARM_DESC(opts, "1->noise, 2->medium_err, 4->timeout, 8->recovered_err... (def=0)");
MODULE_PARM_DESC(per_host_store, "If set, next positive add_host will get new store (def=0)");
MODULE_PARM_DESC(physblk_exp, "physical block exponent (def=0)");
-MODULE_PARM_DESC(poll_queues, "support for iouring iopoll queues (1 to max(submit_queues - 1)");
+MODULE_PARM_DESC(poll_queues, "support for iouring iopoll queues (1 to max(submit_queues - 1))");
MODULE_PARM_DESC(ptype, "SCSI peripheral type(def=0[disk])");
MODULE_PARM_DESC(random, "If set, uniformly randomize command duration between 0 and delay_in_ns");
MODULE_PARM_DESC(removable, "claim to have removable media (def=0)");
}
num_in_q = atomic_read(&devip->num_in_q);
+ if (qdepth > SDEBUG_CANQUEUE) {
+ qdepth = SDEBUG_CANQUEUE;
+ pr_warn("%s: requested qdepth [%d] exceeds canqueue [%d], trim\n", __func__,
+ qdepth, SDEBUG_CANQUEUE);
+ }
if (qdepth < 1)
qdepth = 1;
- /* allow to exceed max host qc_arr elements for testing */
- if (qdepth > SDEBUG_CANQUEUE + 10)
- qdepth = SDEBUG_CANQUEUE + 10;
- scsi_change_queue_depth(sdev, qdepth);
+ if (qdepth != sdev->queue_depth)
+ scsi_change_queue_depth(sdev, qdepth);
if (SDEBUG_OPT_Q_NOISE & sdebug_opts) {
sdev_printk(KERN_INFO, sdev, "%s: qdepth=%d, num_in_q=%d\n",
sdbg_host = to_sdebug_host(dev);
sdebug_driver_template.can_queue = sdebug_max_queue;
+ sdebug_driver_template.cmd_per_lun = sdebug_max_queue;
if (!sdebug_clustering)
sdebug_driver_template.dma_boundary = PAGE_SIZE - 1;
* If condition not met, trim poll_queues to 1 (just for simplicity).
*/
if (poll_queues >= submit_queues) {
- pr_warn("%s: trim poll_queues to 1\n", my_name);
+ if (submit_queues < 3)
+ pr_warn("%s: trim poll_queues to 1\n", my_name);
+ else
+ pr_warn("%s: trim poll_queues to 1. Perhaps try poll_queues=%d\n",
+ my_name, submit_queues - 1);
poll_queues = 1;
}
if (poll_queues)
#include <linux/genhd.h>
#include <linux/kernel.h>
#include <linux/blkdev.h>
+#include <linux/pagemap.h>
#include <linux/msdos_partition.h>
#include <asm/unaligned.h>
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8 -*- */
/* SNI RM driver
*
#include "ufs.h"
#include "ufs-sysfs.h"
-static const char *ufschd_uic_link_state_to_string(
+static const char *ufshcd_uic_link_state_to_string(
enum uic_link_state state)
{
switch (state) {
}
}
-static const char *ufschd_ufs_dev_pwr_mode_to_string(
+static const char *ufshcd_ufs_dev_pwr_mode_to_string(
enum ufs_dev_pwr_mode state)
{
switch (state) {
{
struct ufs_hba *hba = dev_get_drvdata(dev);
- return sysfs_emit(buf, "%s\n", ufschd_ufs_dev_pwr_mode_to_string(
+ return sysfs_emit(buf, "%s\n", ufshcd_ufs_dev_pwr_mode_to_string(
ufs_pm_lvl_states[hba->rpm_lvl].dev_state));
}
{
struct ufs_hba *hba = dev_get_drvdata(dev);
- return sysfs_emit(buf, "%s\n", ufschd_uic_link_state_to_string(
+ return sysfs_emit(buf, "%s\n", ufshcd_uic_link_state_to_string(
ufs_pm_lvl_states[hba->rpm_lvl].link_state));
}
{
struct ufs_hba *hba = dev_get_drvdata(dev);
- return sysfs_emit(buf, "%s\n", ufschd_ufs_dev_pwr_mode_to_string(
+ return sysfs_emit(buf, "%s\n", ufshcd_ufs_dev_pwr_mode_to_string(
ufs_pm_lvl_states[hba->spm_lvl].dev_state));
}
{
struct ufs_hba *hba = dev_get_drvdata(dev);
- return sysfs_emit(buf, "%s\n", ufschd_uic_link_state_to_string(
+ return sysfs_emit(buf, "%s\n", ufshcd_uic_link_state_to_string(
ufs_pm_lvl_states[hba->spm_lvl].link_state));
}
} else if (!ufshcd_is_ufs_dev_active(hba)) {
ufshcd_toggle_vreg(hba->dev, hba->vreg_info.vcc, false);
vcc_off = true;
- if (!ufshcd_is_link_active(hba)) {
+ if (ufshcd_is_link_hibern8(hba) || ufshcd_is_link_off(hba)) {
ufshcd_config_vreg_lpm(hba, hba->vreg_info.vccq);
ufshcd_config_vreg_lpm(hba, hba->vreg_info.vccq2);
}
!hba->dev_info.is_lu_power_on_wp) {
ret = ufshcd_setup_vreg(hba, true);
} else if (!ufshcd_is_ufs_dev_active(hba)) {
- if (!ret && !ufshcd_is_link_active(hba)) {
+ if (!ufshcd_is_link_active(hba)) {
ret = ufshcd_config_vreg_hpm(hba, hba->vreg_info.vccq);
if (ret)
goto vcc_disable;
if (!hba->is_powered)
return 0;
+ cancel_delayed_work_sync(&hba->rpm_dev_flush_recheck_work);
+
if ((ufs_get_pm_lvl_to_dev_pwr_mode(hba->spm_lvl) ==
hba->curr_dev_pwr_mode) &&
(ufs_get_pm_lvl_to_link_pwr_state(hba->spm_lvl) ==
hba->uic_link_state) &&
+ pm_runtime_suspended(hba->dev) &&
!hba->dev_info.b_rpm_dev_flush_capable)
goto out;
return 1;
}
-static unsigned int tcmu_handle_completions(struct tcmu_dev *udev)
+static bool tcmu_handle_completions(struct tcmu_dev *udev)
{
struct tcmu_mailbox *mb;
struct tcmu_cmd *cmd;
pr_err("cmd_id %u not found, ring is broken\n",
entry->hdr.cmd_id);
set_bit(TCMU_DEV_BIT_BROKEN, &udev->flags);
- break;
+ return false;
}
tcmu_handle_completion(cmd, entry);
platform_set_drvdata(pdev, pdata);
base = devm_platform_ioremap_resource(pdev, 0);
- if (IS_ERR(base)) {
- dev_err(dev, "failed to get io address\n");
+ if (IS_ERR(base))
return PTR_ERR(base);
- }
pdata->regmap = devm_regmap_init_mmio(dev, base,
pdata->data->regmap_config);
data->regs = devm_ioremap_resource(&pdev->dev, res);
if (IS_ERR(data->regs)) {
err = PTR_ERR(data->regs);
- dev_err(&pdev->dev, "Could not get registers: %d\n", err);
return err;
}
#include <linux/cpu.h>
#include <linux/cpufreq.h>
#include <linux/cpu_cooling.h>
+#include <linux/device.h>
#include <linux/energy_model.h>
#include <linux/err.h>
#include <linux/export.h>
-#include <linux/idr.h>
#include <linux/pm_opp.h>
#include <linux/pm_qos.h>
#include <linux/slab.h>
/**
* struct cpufreq_cooling_device - data for cooling device with cpufreq
- * @id: unique integer value corresponding to each cpufreq_cooling_device
- * registered.
* @last_load: load measured by the latest call to cpufreq_get_requested_power()
* @cpufreq_state: integer value representing the current state of cpufreq
* cooling devices.
* @cdev: thermal_cooling_device pointer to keep track of the
* registered cooling device.
* @policy: cpufreq policy.
- * @node: list_head to link all cpufreq_cooling_device together.
* @idle_time: idle time stats
* @qos_req: PM QoS contraint to apply
*
* cpufreq_cooling_device.
*/
struct cpufreq_cooling_device {
- int id;
u32 last_load;
unsigned int cpufreq_state;
unsigned int max_level;
struct em_perf_domain *em;
struct cpufreq_policy *policy;
- struct list_head node;
#ifndef CONFIG_SMP
struct time_in_idle *idle_time;
#endif
struct freq_qos_request qos_req;
};
-static DEFINE_IDA(cpufreq_ida);
-static DEFINE_MUTEX(cooling_list_lock);
-static LIST_HEAD(cpufreq_cdev_list);
-
#ifdef CONFIG_THERMAL_GOV_POWER_ALLOCATOR
/**
* get_level: Find the level for a particular frequency
{
int i;
- for (i = cpufreq_cdev->max_level; i >= 0; i--) {
+ for (i = cpufreq_cdev->max_level; i > 0; i--) {
if (power >= cpufreq_cdev->em->table[i].power)
break;
}
{
struct thermal_cooling_device *cdev;
struct cpufreq_cooling_device *cpufreq_cdev;
- char dev_name[THERMAL_NAME_LENGTH];
unsigned int i;
struct device *dev;
int ret;
struct thermal_cooling_device_ops *cooling_ops;
+ char *name;
dev = get_cpu_device(policy->cpu);
if (unlikely(!dev)) {
/* max_level is an index, not a counter */
cpufreq_cdev->max_level = i - 1;
- ret = ida_simple_get(&cpufreq_ida, 0, 0, GFP_KERNEL);
- if (ret < 0) {
- cdev = ERR_PTR(ret);
- goto free_idle_time;
- }
- cpufreq_cdev->id = ret;
-
- snprintf(dev_name, sizeof(dev_name), "thermal-cpufreq-%d",
- cpufreq_cdev->id);
-
cooling_ops = &cpufreq_cooling_ops;
#ifdef CONFIG_THERMAL_GOV_POWER_ALLOCATOR
pr_err("%s: unsorted frequency tables are not supported\n",
__func__);
cdev = ERR_PTR(-EINVAL);
- goto remove_ida;
+ goto free_idle_time;
}
ret = freq_qos_add_request(&policy->constraints,
pr_err("%s: Failed to add freq constraint (%d)\n", __func__,
ret);
cdev = ERR_PTR(ret);
- goto remove_ida;
+ goto free_idle_time;
}
- cdev = thermal_of_cooling_device_register(np, dev_name, cpufreq_cdev,
+ cdev = ERR_PTR(-ENOMEM);
+ name = kasprintf(GFP_KERNEL, "cpufreq-%s", dev_name(dev));
+ if (!name)
+ goto remove_qos_req;
+
+ cdev = thermal_of_cooling_device_register(np, name, cpufreq_cdev,
cooling_ops);
+ kfree(name);
+
if (IS_ERR(cdev))
goto remove_qos_req;
- mutex_lock(&cooling_list_lock);
- list_add(&cpufreq_cdev->node, &cpufreq_cdev_list);
- mutex_unlock(&cooling_list_lock);
-
return cdev;
remove_qos_req:
freq_qos_remove_request(&cpufreq_cdev->qos_req);
-remove_ida:
- ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
free_idle_time:
free_idle_time(cpufreq_cdev);
free_cdev:
cpufreq_cdev = cdev->devdata;
- mutex_lock(&cooling_list_lock);
- list_del(&cpufreq_cdev->node);
- mutex_unlock(&cooling_list_lock);
-
thermal_cooling_device_unregister(cdev);
freq_qos_remove_request(&cpufreq_cdev->qos_req);
- ida_simple_remove(&cpufreq_ida, cpufreq_cdev->id);
free_idle_time(cpufreq_cdev);
kfree(cpufreq_cdev);
}
#include <linux/cpu_cooling.h>
#include <linux/cpuidle.h>
+#include <linux/device.h>
#include <linux/err.h>
#include <linux/idle_inject.h>
-#include <linux/idr.h>
#include <linux/of_device.h>
#include <linux/slab.h>
#include <linux/thermal.h>
unsigned long state;
};
-static DEFINE_IDA(cpuidle_ida);
-
/**
* cpuidle_cooling_runtime - Running time computation
* @idle_duration_us: CPU idle time to inject in microseconds
struct idle_inject_device *ii_dev;
struct cpuidle_cooling_device *idle_cdev;
struct thermal_cooling_device *cdev;
+ struct device *dev;
unsigned int idle_duration_us = TICK_USEC;
unsigned int latency_us = UINT_MAX;
- char dev_name[THERMAL_NAME_LENGTH];
- int id, ret;
+ char *name;
+ int ret;
idle_cdev = kzalloc(sizeof(*idle_cdev), GFP_KERNEL);
if (!idle_cdev) {
goto out;
}
- id = ida_simple_get(&cpuidle_ida, 0, 0, GFP_KERNEL);
- if (id < 0) {
- ret = id;
- goto out_kfree;
- }
-
ii_dev = idle_inject_register(drv->cpumask);
if (!ii_dev) {
ret = -EINVAL;
- goto out_id;
+ goto out_kfree;
}
of_property_read_u32(np, "duration-us", &idle_duration_us);
idle_cdev->ii_dev = ii_dev;
- snprintf(dev_name, sizeof(dev_name), "thermal-idle-%d", id);
+ dev = get_cpu_device(cpumask_first(drv->cpumask));
- cdev = thermal_of_cooling_device_register(np, dev_name, idle_cdev,
+ name = kasprintf(GFP_KERNEL, "idle-%s", dev_name(dev));
+ if (!name) {
+ ret = -ENOMEM;
+ goto out_unregister;
+ }
+
+ cdev = thermal_of_cooling_device_register(np, name, idle_cdev,
&cpuidle_cooling_ops);
if (IS_ERR(cdev)) {
ret = PTR_ERR(cdev);
- goto out_unregister;
+ goto out_kfree_name;
}
pr_debug("%s: Idle injection set with idle duration=%u, latency=%u\n",
- dev_name, idle_duration_us, latency_us);
+ name, idle_duration_us, latency_us);
+
+ kfree(name);
return 0;
+out_kfree_name:
+ kfree(name);
out_unregister:
idle_inject_unregister(ii_dev);
-out_id:
- ida_simple_remove(&cpuidle_ida, id);
out_kfree:
kfree(idle_cdev);
out:
#include <linux/devfreq_cooling.h>
#include <linux/energy_model.h>
#include <linux/export.h>
-#include <linux/idr.h>
#include <linux/slab.h>
#include <linux/pm_opp.h>
#include <linux/pm_qos.h>
#define HZ_PER_KHZ 1000
#define SCALE_ERROR_MITIGATION 100
-static DEFINE_IDA(devfreq_ida);
-
/**
* struct devfreq_cooling_device - Devfreq cooling device
- * @id: unique integer value corresponding to each
* devfreq_cooling_device registered.
* @cdev: Pointer to associated thermal cooling device.
* @devfreq: Pointer to associated devfreq device.
* @em_pd: Energy Model for the associated Devfreq device
*/
struct devfreq_cooling_device {
- int id;
struct thermal_cooling_device *cdev;
struct devfreq *devfreq;
unsigned long cooling_state;
struct thermal_cooling_device *cdev;
struct device *dev = df->dev.parent;
struct devfreq_cooling_device *dfc;
- char dev_name[THERMAL_NAME_LENGTH];
+ char *name;
int err, num_opps;
dfc = kzalloc(sizeof(*dfc), GFP_KERNEL);
if (err < 0)
goto free_table;
- err = ida_simple_get(&devfreq_ida, 0, 0, GFP_KERNEL);
- if (err < 0)
+ err = -ENOMEM;
+ name = kasprintf(GFP_KERNEL, "devfreq-%s", dev_name(dev));
+ if (!name)
goto remove_qos_req;
- dfc->id = err;
-
- snprintf(dev_name, sizeof(dev_name), "thermal-devfreq-%d", dfc->id);
-
- cdev = thermal_of_cooling_device_register(np, dev_name, dfc,
+ cdev = thermal_of_cooling_device_register(np, name, dfc,
&devfreq_cooling_ops);
+ kfree(name);
+
if (IS_ERR(cdev)) {
err = PTR_ERR(cdev);
dev_err(dev,
"Failed to register devfreq cooling device (%d)\n",
err);
- goto release_ida;
+ goto remove_qos_req;
}
dfc->cdev = cdev;
return cdev;
-release_ida:
- ida_simple_remove(&devfreq_ida, dfc->id);
remove_qos_req:
dev_pm_qos_remove_request(&dfc->req_max_freq);
free_table:
dev = dfc->devfreq->dev.parent;
thermal_cooling_device_unregister(dfc->cdev);
- ida_simple_remove(&devfreq_ida, dfc->id);
dev_pm_qos_remove_request(&dfc->req_max_freq);
em_dev_unregister_perf_domain(dev);
int total_instance = 0;
int cur_trip_level = get_trip_level(tz);
+ mutex_lock(&tz->lock);
+
list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
if (instance->trip != trip)
continue;
instance->target = get_target_state(tz, cdev, percentage,
cur_trip_level);
- mutex_lock(&instance->cdev->lock);
- instance->cdev->updated = false;
- mutex_unlock(&instance->cdev->lock);
- thermal_cdev_update(cdev);
+ mutex_lock(&cdev->lock);
+ __thermal_cdev_update(cdev);
+ mutex_unlock(&cdev->lock);
}
+
+ mutex_unlock(&tz->lock);
return 0;
}
instance->target = clamp_val(state, instance->lower, instance->upper);
mutex_lock(&cdev->lock);
- cdev->updated = false;
+ __thermal_cdev_update(cdev);
mutex_unlock(&cdev->lock);
- thermal_cdev_update(cdev);
return 0;
}
*/
extra_power = min(extra_power, capped_extra_power);
if (capped_extra_power > 0)
- for (i = 0; i < num_actors; i++)
- granted_power[i] += (extra_actor_power[i] *
- extra_power) / capped_extra_power;
+ for (i = 0; i < num_actors; i++) {
+ u64 extra_range = (u64)extra_actor_power[i] * extra_power;
+ granted_power[i] += DIV_ROUND_CLOSEST_ULL(extra_range,
+ capped_extra_power);
+ }
}
static int allocate_power(struct thermal_zone_device *tz,
params->prev_err = 0;
}
-static void allow_maximum_power(struct thermal_zone_device *tz)
+static void allow_maximum_power(struct thermal_zone_device *tz, bool update)
{
struct thermal_instance *instance;
struct power_allocator_params *params = tz->governor_data;
+ u32 req_power;
mutex_lock(&tz->lock);
list_for_each_entry(instance, &tz->thermal_instances, tz_node) {
+ struct thermal_cooling_device *cdev = instance->cdev;
+
if ((instance->trip != params->trip_max_desired_temperature) ||
(!cdev_is_power_actor(instance->cdev)))
continue;
instance->target = 0;
mutex_lock(&instance->cdev->lock);
- instance->cdev->updated = false;
+ /*
+ * Call for updating the cooling devices local stats and avoid
+ * periods of dozen of seconds when those have not been
+ * maintained.
+ */
+ cdev->ops->get_requested_power(cdev, &req_power);
+
+ if (update)
+ __thermal_cdev_update(instance->cdev);
+
mutex_unlock(&instance->cdev->lock);
- thermal_cdev_update(instance->cdev);
}
mutex_unlock(&tz->lock);
}
int ret;
int switch_on_temp, control_temp;
struct power_allocator_params *params = tz->governor_data;
+ bool update;
/*
* We get called for every trip point but we only need to do
ret = tz->ops->get_trip_temp(tz, params->trip_switch_on,
&switch_on_temp);
if (!ret && (tz->temperature < switch_on_temp)) {
+ update = (tz->last_temperature >= switch_on_temp);
tz->passive = 0;
reset_pid_controller(params);
- allow_maximum_power(tz);
+ allow_maximum_power(tz, update);
return 0;
}
/*
- * Hisilicon thermal sensor driver
+ * HiSilicon thermal sensor driver
*
- * Copyright (c) 2014-2015 Hisilicon Limited.
+ * Copyright (c) 2014-2015 HiSilicon Limited.
* Copyright (c) 2014-2015 Linaro Limited.
*
* Xinwei Kong <kong.kongxinwei@hisilicon.com>
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
data->regs = devm_ioremap_resource(dev, res);
- if (IS_ERR(data->regs)) {
- dev_err(dev, "failed to get io address\n");
+ if (IS_ERR(data->regs))
return PTR_ERR(data->regs);
- }
ret = data->ops->probe(data);
if (ret)
MODULE_AUTHOR("Xinwei Kong <kong.kongxinwei@hisilicon.com>");
MODULE_AUTHOR("Leo Yan <leo.yan@linaro.org>");
-MODULE_DESCRIPTION("Hisilicon thermal driver");
+MODULE_DESCRIPTION("HiSilicon thermal driver");
MODULE_LICENSE("GPL v2");
Enable this to support thermal reporting on certain intel PCHs.
Thermal reporting device will provide temperature reading,
programmable trip points and other information.
+
+config INTEL_TCC_COOLING
+ tristate "Intel TCC offset cooling Driver"
+ depends on X86
+ help
+ Enable this to support system cooling by adjusting the effective TCC
+ activation temperature via the TCC Offset register, which is widely
+ supported on modern Intel platforms.
+ Note that, on different platforms, the behavior might be different
+ on how fast the setting takes effect, and how much the CPU frequency
+ is reduced.
obj-$(CONFIG_INT340X_THERMAL) += int340x_thermal/
obj-$(CONFIG_INTEL_BXT_PMIC_THERMAL) += intel_bxt_pmic_thermal.o
obj-$(CONFIG_INTEL_PCH_THERMAL) += intel_pch_thermal.o
+obj-$(CONFIG_INTEL_TCC_COOLING) += intel_tcc_cooling.o
obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * cooling device driver that activates the processor throttling by
+ * programming the TCC Offset register.
+ * Copyright (c) 2021, Intel Corporation.
+ */
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/thermal.h>
+#include <asm/cpu_device_id.h>
+
+#define TCC_SHIFT 24
+#define TCC_MASK (0x3fULL<<24)
+#define TCC_PROGRAMMABLE BIT(30)
+
+static struct thermal_cooling_device *tcc_cdev;
+
+static int tcc_get_max_state(struct thermal_cooling_device *cdev, unsigned long
+ *state)
+{
+ *state = TCC_MASK >> TCC_SHIFT;
+ return 0;
+}
+
+static int tcc_offset_update(int tcc)
+{
+ u64 val;
+ int err;
+
+ err = rdmsrl_safe(MSR_IA32_TEMPERATURE_TARGET, &val);
+ if (err)
+ return err;
+
+ val &= ~TCC_MASK;
+ val |= tcc << TCC_SHIFT;
+
+ err = wrmsrl_safe(MSR_IA32_TEMPERATURE_TARGET, val);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int tcc_get_cur_state(struct thermal_cooling_device *cdev, unsigned long
+ *state)
+{
+ u64 val;
+ int err;
+
+ err = rdmsrl_safe(MSR_IA32_TEMPERATURE_TARGET, &val);
+ if (err)
+ return err;
+
+ *state = (val & TCC_MASK) >> TCC_SHIFT;
+ return 0;
+}
+
+static int tcc_set_cur_state(struct thermal_cooling_device *cdev, unsigned long
+ state)
+{
+ return tcc_offset_update(state);
+}
+
+static const struct thermal_cooling_device_ops tcc_cooling_ops = {
+ .get_max_state = tcc_get_max_state,
+ .get_cur_state = tcc_get_cur_state,
+ .set_cur_state = tcc_set_cur_state,
+};
+
+static const struct x86_cpu_id tcc_ids[] __initconst = {
+ X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_L, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(KABYLAKE_L, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L, NULL),
+ X86_MATCH_INTEL_FAM6_MODEL(COMETLAKE, NULL),
+ {}
+};
+
+MODULE_DEVICE_TABLE(x86cpu, tcc_ids);
+
+static int __init tcc_cooling_init(void)
+{
+ int ret;
+ u64 val;
+ const struct x86_cpu_id *id;
+
+ int err;
+
+ id = x86_match_cpu(tcc_ids);
+ if (!id)
+ return -ENODEV;
+
+ err = rdmsrl_safe(MSR_PLATFORM_INFO, &val);
+ if (err)
+ return err;
+
+ if (!(val & TCC_PROGRAMMABLE))
+ return -ENODEV;
+
+ pr_info("Programmable TCC Offset detected\n");
+
+ tcc_cdev =
+ thermal_cooling_device_register("TCC Offset", NULL,
+ &tcc_cooling_ops);
+ if (IS_ERR(tcc_cdev)) {
+ ret = PTR_ERR(tcc_cdev);
+ return ret;
+ }
+ return 0;
+}
+
+module_init(tcc_cooling_init)
+
+static void __exit tcc_cooling_exit(void)
+{
+ thermal_cooling_device_unregister(tcc_cdev);
+}
+
+module_exit(tcc_cooling_exit)
+
+MODULE_DESCRIPTION("TCC offset cooling device Driver");
+MODULE_AUTHOR("Zhang Rui <rui.zhang@intel.com>");
+MODULE_LICENSE("GPL v2");
static int raw_to_mcelsius_v2(struct mtk_thermal *mt, int sensno, s32 raw)
{
- s32 format_1 = 0;
- s32 format_2 = 0;
- s32 g_oe = 1;
- s32 g_gain = 1;
- s32 g_x_roomt = 0;
- s32 tmp = 0;
+ s32 format_1;
+ s32 format_2;
+ s32 g_oe;
+ s32 g_gain;
+ s32 g_x_roomt;
+ s32 tmp;
if (raw == 0)
return 0;
#include "../thermal_core.h"
+#define QPNP_TM_REG_DIG_MAJOR 0x01
#define QPNP_TM_REG_TYPE 0x04
#define QPNP_TM_REG_SUBTYPE 0x05
#define QPNP_TM_REG_STATUS 0x08
#define ALARM_CTRL_FORCE_ENABLE BIT(7)
-/*
- * Trip point values based on threshold control
- * 0 = {105 C, 125 C, 145 C}
- * 1 = {110 C, 130 C, 150 C}
- * 2 = {115 C, 135 C, 155 C}
- * 3 = {120 C, 140 C, 160 C}
-*/
-#define TEMP_STAGE_STEP 20000 /* Stage step: 20.000 C */
-#define TEMP_STAGE_HYSTERESIS 2000
+#define THRESH_COUNT 4
+#define STAGE_COUNT 3
+
+/* Over-temperature trip point values in mC */
+static const long temp_map_gen1[THRESH_COUNT][STAGE_COUNT] = {
+ { 105000, 125000, 145000 },
+ { 110000, 130000, 150000 },
+ { 115000, 135000, 155000 },
+ { 120000, 140000, 160000 },
+};
+
+static const long temp_map_gen2_v1[THRESH_COUNT][STAGE_COUNT] = {
+ { 90000, 110000, 140000 },
+ { 95000, 115000, 145000 },
+ { 100000, 120000, 150000 },
+ { 105000, 125000, 155000 },
+};
-#define TEMP_THRESH_MIN 105000 /* Threshold Min: 105 C */
-#define TEMP_THRESH_STEP 5000 /* Threshold step: 5 C */
+#define TEMP_THRESH_STEP 5000 /* Threshold step: 5 C */
#define THRESH_MIN 0
#define THRESH_MAX 3
-/* Stage 2 Threshold Min: 125 C */
-#define STAGE2_THRESHOLD_MIN 125000
-/* Stage 2 Threshold Max: 140 C */
-#define STAGE2_THRESHOLD_MAX 140000
+#define TEMP_STAGE_HYSTERESIS 2000
/* Temperature in Milli Celsius reported during stage 0 if no ADC is present */
#define DEFAULT_TEMP 37000
bool initialized;
struct iio_channel *adc;
+ const long (*temp_map)[THRESH_COUNT][STAGE_COUNT];
};
/* This array maps from GEN2 alarm state to GEN1 alarm stage */
return regmap_write(chip->map, chip->base + addr, data);
}
+/**
+ * qpnp_tm_decode_temp() - return temperature in mC corresponding to the
+ * specified over-temperature stage
+ * @chip: Pointer to the qpnp_tm chip
+ * @stage: Over-temperature stage
+ *
+ * Return: temperature in mC
+ */
+static long qpnp_tm_decode_temp(struct qpnp_tm_chip *chip, unsigned int stage)
+{
+ if (!chip->temp_map || chip->thresh >= THRESH_COUNT || stage == 0 ||
+ stage > STAGE_COUNT)
+ return 0;
+
+ return (*chip->temp_map)[chip->thresh][stage - 1];
+}
+
/**
* qpnp_tm_get_temp_stage() - return over-temperature stage
* @chip: Pointer to the qpnp_tm chip
if (stage_new > stage_old) {
/* increasing stage, use lower bound */
- chip->temp = (stage_new - 1) * TEMP_STAGE_STEP +
- chip->thresh * TEMP_THRESH_STEP +
- TEMP_STAGE_HYSTERESIS + TEMP_THRESH_MIN;
+ chip->temp = qpnp_tm_decode_temp(chip, stage_new)
+ + TEMP_STAGE_HYSTERESIS;
} else if (stage_new < stage_old) {
/* decreasing stage, use upper bound */
- chip->temp = stage_new * TEMP_STAGE_STEP +
- chip->thresh * TEMP_THRESH_STEP -
- TEMP_STAGE_HYSTERESIS + TEMP_THRESH_MIN;
+ chip->temp = qpnp_tm_decode_temp(chip, stage_new + 1)
+ - TEMP_STAGE_HYSTERESIS;
}
chip->stage = stage;
static int qpnp_tm_update_critical_trip_temp(struct qpnp_tm_chip *chip,
int temp)
{
- u8 reg;
+ long stage2_threshold_min = (*chip->temp_map)[THRESH_MIN][1];
+ long stage2_threshold_max = (*chip->temp_map)[THRESH_MAX][1];
bool disable_s2_shutdown = false;
+ u8 reg;
WARN_ON(!mutex_is_locked(&chip->lock));
/*
* Default: S2 and S3 shutdown enabled, thresholds at
- * 105C/125C/145C, monitoring at 25Hz
+ * lowest threshold set, monitoring at 25Hz
*/
reg = SHUTDOWN_CTRL1_RATE_25HZ;
if (temp == THERMAL_TEMP_INVALID ||
- temp < STAGE2_THRESHOLD_MIN) {
+ temp < stage2_threshold_min) {
chip->thresh = THRESH_MIN;
goto skip;
}
- if (temp <= STAGE2_THRESHOLD_MAX) {
+ if (temp <= stage2_threshold_max) {
chip->thresh = THRESH_MAX -
- ((STAGE2_THRESHOLD_MAX - temp) /
+ ((stage2_threshold_max - temp) /
TEMP_THRESH_STEP);
disable_s2_shutdown = true;
} else {
? chip->stage : alarm_state_map[chip->stage];
if (stage)
- chip->temp = chip->thresh * TEMP_THRESH_STEP +
- (stage - 1) * TEMP_STAGE_STEP +
- TEMP_THRESH_MIN;
+ chip->temp = qpnp_tm_decode_temp(chip, stage);
crit_temp = qpnp_tm_get_critical_trip_temp(chip);
ret = qpnp_tm_update_critical_trip_temp(chip, crit_temp);
{
struct qpnp_tm_chip *chip;
struct device_node *node;
- u8 type, subtype;
+ u8 type, subtype, dig_major;
u32 res;
int ret, irq;
return ret;
}
+ ret = qpnp_tm_read(chip, QPNP_TM_REG_DIG_MAJOR, &dig_major);
+ if (ret < 0) {
+ dev_err(&pdev->dev, "could not read dig_major\n");
+ return ret;
+ }
+
if (type != QPNP_TM_TYPE || (subtype != QPNP_TM_SUBTYPE_GEN1
&& subtype != QPNP_TM_SUBTYPE_GEN2)) {
dev_err(&pdev->dev, "invalid type 0x%02x or subtype 0x%02x\n",
}
chip->subtype = subtype;
+ if (subtype == QPNP_TM_SUBTYPE_GEN2 && dig_major >= 1)
+ chip->temp_map = &temp_map_gen2_v1;
+ else
+ chip->temp_map = &temp_map_gen1;
/*
* Register the sensor before initializing the hardware to be able to
#include <linux/thermal.h>
#include "tsens.h"
-#define CAL_MDEGC 30000
-
#define CONFIG_ADDR 0x3640
#define CONFIG_ADDR_8660 0x3620
/* CONFIG_ADDR bitmasks */
#define CONFIG_SHIFT_8660 28
#define CONFIG_MASK_8660 (3 << CONFIG_SHIFT_8660)
-#define STATUS_CNTL_ADDR_8064 0x3660
#define CNTL_ADDR 0x3620
/* CNTL_ADDR bitmasks */
#define EN BIT(0)
#define SW_RST BIT(1)
-#define SENSOR0_EN BIT(3)
+
+#define MEASURE_PERIOD BIT(18)
#define SLP_CLK_ENA BIT(26)
#define SLP_CLK_ENA_8660 BIT(24)
-#define MEASURE_PERIOD 1
#define SENSOR0_SHIFT 3
-/* INT_STATUS_ADDR bitmasks */
-#define MIN_STATUS_MASK BIT(0)
-#define LOWER_STATUS_CLR BIT(1)
-#define UPPER_STATUS_CLR BIT(2)
-#define MAX_STATUS_MASK BIT(3)
-
#define THRESHOLD_ADDR 0x3624
-/* THRESHOLD_ADDR bitmasks */
-#define THRESHOLD_MAX_LIMIT_SHIFT 24
-#define THRESHOLD_MIN_LIMIT_SHIFT 16
-#define THRESHOLD_UPPER_LIMIT_SHIFT 8
-#define THRESHOLD_LOWER_LIMIT_SHIFT 0
-
-/* Initial temperature threshold values */
-#define LOWER_LIMIT_TH 0x50
-#define UPPER_LIMIT_TH 0xdf
-#define MIN_LIMIT_TH 0x0
-#define MAX_LIMIT_TH 0xff
-
-#define S0_STATUS_ADDR 0x3628
+
#define INT_STATUS_ADDR 0x363c
-#define TRDY_MASK BIT(7)
-#define TIMEOUT_US 100
+
+#define S0_STATUS_OFF 0x3628
+#define S1_STATUS_OFF 0x362c
+#define S2_STATUS_OFF 0x3630
+#define S3_STATUS_OFF 0x3634
+#define S4_STATUS_OFF 0x3638
+#define S5_STATUS_OFF 0x3664 /* Sensors 5-10 found on apq8064/msm8960 */
+#define S6_STATUS_OFF 0x3668
+#define S7_STATUS_OFF 0x366c
+#define S8_STATUS_OFF 0x3670
+#define S9_STATUS_OFF 0x3674
+#define S10_STATUS_OFF 0x3678
+
+/* Original slope - 350 to compensate mC to C inaccuracy */
+static u32 tsens_msm8960_slope[] = {
+ 826, 826, 804, 826,
+ 761, 782, 782, 849,
+ 782, 849, 782
+ };
static int suspend_8960(struct tsens_priv *priv)
{
static int enable_8960(struct tsens_priv *priv, int id)
{
int ret;
- u32 reg, mask;
+ u32 reg, mask = BIT(id);
ret = regmap_read(priv->tm_map, CNTL_ADDR, ®);
if (ret)
return ret;
- mask = BIT(id + SENSOR0_SHIFT);
+ /* HARDWARE BUG:
+ * On platforms with more than 6 sensors, all remaining sensors
+ * must be enabled together, otherwise undefined results are expected.
+ * (Sensor 6-7 disabled, Sensor 3 disabled...) In the original driver,
+ * all the sensors are enabled in one step hence this bug is not
+ * triggered.
+ */
+ if (id > 5)
+ mask = GENMASK(10, 6);
+
+ mask <<= SENSOR0_SHIFT;
+
+ /* Sensors already enabled. Skip. */
+ if ((reg & mask) == mask)
+ return 0;
+
ret = regmap_write(priv->tm_map, CNTL_ADDR, reg | SW_RST);
if (ret)
return ret;
+ reg |= MEASURE_PERIOD;
+
if (priv->num_sensors > 1)
reg |= mask | SLP_CLK_ENA | EN;
else
regmap_write(priv->tm_map, CNTL_ADDR, reg_cntl);
}
-static int init_8960(struct tsens_priv *priv)
-{
- int ret, i;
- u32 reg_cntl;
-
- priv->tm_map = dev_get_regmap(priv->dev, NULL);
- if (!priv->tm_map)
- return -ENODEV;
-
- /*
- * The status registers for each sensor are discontiguous
- * because some SoCs have 5 sensors while others have more
- * but the control registers stay in the same place, i.e
- * directly after the first 5 status registers.
- */
- for (i = 0; i < priv->num_sensors; i++) {
- if (i >= 5)
- priv->sensor[i].status = S0_STATUS_ADDR + 40;
- priv->sensor[i].status += i * 4;
- }
-
- reg_cntl = SW_RST;
- ret = regmap_update_bits(priv->tm_map, CNTL_ADDR, SW_RST, reg_cntl);
- if (ret)
- return ret;
-
- if (priv->num_sensors > 1) {
- reg_cntl |= SLP_CLK_ENA | (MEASURE_PERIOD << 18);
- reg_cntl &= ~SW_RST;
- ret = regmap_update_bits(priv->tm_map, CONFIG_ADDR,
- CONFIG_MASK, CONFIG);
- } else {
- reg_cntl |= SLP_CLK_ENA_8660 | (MEASURE_PERIOD << 16);
- reg_cntl &= ~CONFIG_MASK_8660;
- reg_cntl |= CONFIG_8660 << CONFIG_SHIFT_8660;
- }
-
- reg_cntl |= GENMASK(priv->num_sensors - 1, 0) << SENSOR0_SHIFT;
- ret = regmap_write(priv->tm_map, CNTL_ADDR, reg_cntl);
- if (ret)
- return ret;
-
- reg_cntl |= EN;
- ret = regmap_write(priv->tm_map, CNTL_ADDR, reg_cntl);
- if (ret)
- return ret;
-
- return 0;
-}
-
static int calibrate_8960(struct tsens_priv *priv)
{
int i;
char *data;
-
- ssize_t num_read = priv->num_sensors;
- struct tsens_sensor *s = priv->sensor;
+ u32 p1[11];
data = qfprom_read(priv->dev, "calib");
if (IS_ERR(data))
if (IS_ERR(data))
return PTR_ERR(data);
- for (i = 0; i < num_read; i++, s++)
- s->offset = data[i];
+ for (i = 0; i < priv->num_sensors; i++) {
+ p1[i] = data[i];
+ priv->sensor[i].slope = tsens_msm8960_slope[i];
+ }
+
+ compute_intercept_slope(priv, p1, NULL, ONE_PT_CALIB);
kfree(data);
return 0;
}
-/* Temperature on y axis and ADC-code on x-axis */
-static inline int code_to_mdegC(u32 adc_code, const struct tsens_sensor *s)
-{
- int slope, offset;
-
- slope = thermal_zone_get_slope(s->tzd);
- offset = CAL_MDEGC - slope * s->offset;
-
- return adc_code * slope + offset;
-}
-
-static int get_temp_8960(const struct tsens_sensor *s, int *temp)
-{
- int ret;
- u32 code, trdy;
- struct tsens_priv *priv = s->priv;
- unsigned long timeout;
-
- timeout = jiffies + usecs_to_jiffies(TIMEOUT_US);
- do {
- ret = regmap_read(priv->tm_map, INT_STATUS_ADDR, &trdy);
- if (ret)
- return ret;
- if (!(trdy & TRDY_MASK))
- continue;
- ret = regmap_read(priv->tm_map, s->status, &code);
- if (ret)
- return ret;
- *temp = code_to_mdegC(code, s);
- return 0;
- } while (time_before(jiffies, timeout));
-
- return -ETIMEDOUT;
-}
+static const struct reg_field tsens_8960_regfields[MAX_REGFIELDS] = {
+ /* ----- SROT ------ */
+ /* No VERSION information */
+
+ /* CNTL */
+ [TSENS_EN] = REG_FIELD(CNTL_ADDR, 0, 0),
+ [TSENS_SW_RST] = REG_FIELD(CNTL_ADDR, 1, 1),
+ /* 8960 has 5 sensors, 8660 has 11, we only handle 5 */
+ [SENSOR_EN] = REG_FIELD(CNTL_ADDR, 3, 7),
+
+ /* ----- TM ------ */
+ /* INTERRUPT ENABLE */
+ /* NO INTERRUPT ENABLE */
+
+ /* Single UPPER/LOWER TEMPERATURE THRESHOLD for all sensors */
+ [LOW_THRESH_0] = REG_FIELD(THRESHOLD_ADDR, 0, 7),
+ [UP_THRESH_0] = REG_FIELD(THRESHOLD_ADDR, 8, 15),
+ /* MIN_THRESH_0 and MAX_THRESH_0 are not present in the regfield
+ * Recycle CRIT_THRESH_0 and 1 to set the required regs to hardcoded temp
+ * MIN_THRESH_0 -> CRIT_THRESH_1
+ * MAX_THRESH_0 -> CRIT_THRESH_0
+ */
+ [CRIT_THRESH_1] = REG_FIELD(THRESHOLD_ADDR, 16, 23),
+ [CRIT_THRESH_0] = REG_FIELD(THRESHOLD_ADDR, 24, 31),
+
+ /* UPPER/LOWER INTERRUPT [CLEAR/STATUS] */
+ /* 1 == clear, 0 == normal operation */
+ [LOW_INT_CLEAR_0] = REG_FIELD(CNTL_ADDR, 9, 9),
+ [UP_INT_CLEAR_0] = REG_FIELD(CNTL_ADDR, 10, 10),
+
+ /* NO CRITICAL INTERRUPT SUPPORT on 8960 */
+
+ /* Sn_STATUS */
+ [LAST_TEMP_0] = REG_FIELD(S0_STATUS_OFF, 0, 7),
+ [LAST_TEMP_1] = REG_FIELD(S1_STATUS_OFF, 0, 7),
+ [LAST_TEMP_2] = REG_FIELD(S2_STATUS_OFF, 0, 7),
+ [LAST_TEMP_3] = REG_FIELD(S3_STATUS_OFF, 0, 7),
+ [LAST_TEMP_4] = REG_FIELD(S4_STATUS_OFF, 0, 7),
+ [LAST_TEMP_5] = REG_FIELD(S5_STATUS_OFF, 0, 7),
+ [LAST_TEMP_6] = REG_FIELD(S6_STATUS_OFF, 0, 7),
+ [LAST_TEMP_7] = REG_FIELD(S7_STATUS_OFF, 0, 7),
+ [LAST_TEMP_8] = REG_FIELD(S8_STATUS_OFF, 0, 7),
+ [LAST_TEMP_9] = REG_FIELD(S9_STATUS_OFF, 0, 7),
+ [LAST_TEMP_10] = REG_FIELD(S10_STATUS_OFF, 0, 7),
+
+ /* No VALID field on 8960 */
+ /* TSENS_INT_STATUS bits: 1 == threshold violated */
+ [MIN_STATUS_0] = REG_FIELD(INT_STATUS_ADDR, 0, 0),
+ [LOWER_STATUS_0] = REG_FIELD(INT_STATUS_ADDR, 1, 1),
+ [UPPER_STATUS_0] = REG_FIELD(INT_STATUS_ADDR, 2, 2),
+ /* No CRITICAL field on 8960 */
+ [MAX_STATUS_0] = REG_FIELD(INT_STATUS_ADDR, 3, 3),
+
+ /* TRDY: 1=ready, 0=in progress */
+ [TRDY] = REG_FIELD(INT_STATUS_ADDR, 7, 7),
+};
static const struct tsens_ops ops_8960 = {
- .init = init_8960,
+ .init = init_common,
.calibrate = calibrate_8960,
- .get_temp = get_temp_8960,
+ .get_temp = get_temp_common,
.enable = enable_8960,
.disable = disable_8960,
.suspend = suspend_8960,
.resume = resume_8960,
};
+static struct tsens_features tsens_8960_feat = {
+ .ver_major = VER_0,
+ .crit_int = 0,
+ .adc = 1,
+ .srot_split = 0,
+ .max_sensors = 11,
+};
+
struct tsens_plat_data data_8960 = {
.num_sensors = 11,
.ops = &ops_8960,
+ .feat = &tsens_8960_feat,
+ .fields = tsens_8960_regfields,
};
#define BIT_APPEND 0x3
+/* eeprom layout data for mdm9607 */
+#define MDM9607_BASE0_MASK 0x000000ff
+#define MDM9607_BASE1_MASK 0x000ff000
+#define MDM9607_BASE0_SHIFT 0
+#define MDM9607_BASE1_SHIFT 12
+
+#define MDM9607_S0_P1_MASK 0x00003f00
+#define MDM9607_S1_P1_MASK 0x03f00000
+#define MDM9607_S2_P1_MASK 0x0000003f
+#define MDM9607_S3_P1_MASK 0x0003f000
+#define MDM9607_S4_P1_MASK 0x0000003f
+
+#define MDM9607_S0_P2_MASK 0x000fc000
+#define MDM9607_S1_P2_MASK 0xfc000000
+#define MDM9607_S2_P2_MASK 0x00000fc0
+#define MDM9607_S3_P2_MASK 0x00fc0000
+#define MDM9607_S4_P2_MASK 0x00000fc0
+
+#define MDM9607_S0_P1_SHIFT 8
+#define MDM9607_S1_P1_SHIFT 20
+#define MDM9607_S2_P1_SHIFT 0
+#define MDM9607_S3_P1_SHIFT 12
+#define MDM9607_S4_P1_SHIFT 0
+
+#define MDM9607_S0_P2_SHIFT 14
+#define MDM9607_S1_P2_SHIFT 26
+#define MDM9607_S2_P2_SHIFT 6
+#define MDM9607_S3_P2_SHIFT 18
+#define MDM9607_S4_P2_SHIFT 6
+
+#define MDM9607_CAL_SEL_MASK 0x00700000
+#define MDM9607_CAL_SEL_SHIFT 20
+
static int calibrate_8916(struct tsens_priv *priv)
{
int base0 = 0, base1 = 0, i;
return 0;
}
-/* v0.1: 8916, 8939, 8974 */
+static int calibrate_9607(struct tsens_priv *priv)
+{
+ int base, i;
+ u32 p1[5], p2[5];
+ int mode = 0;
+ u32 *qfprom_cdata;
+
+ qfprom_cdata = (u32 *)qfprom_read(priv->dev, "calib");
+ if (IS_ERR(qfprom_cdata))
+ return PTR_ERR(qfprom_cdata);
+
+ mode = (qfprom_cdata[2] & MDM9607_CAL_SEL_MASK) >> MDM9607_CAL_SEL_SHIFT;
+ dev_dbg(priv->dev, "calibration mode is %d\n", mode);
+
+ switch (mode) {
+ case TWO_PT_CALIB:
+ base = (qfprom_cdata[2] & MDM9607_BASE1_MASK) >> MDM9607_BASE1_SHIFT;
+ p2[0] = (qfprom_cdata[0] & MDM9607_S0_P2_MASK) >> MDM9607_S0_P2_SHIFT;
+ p2[1] = (qfprom_cdata[0] & MDM9607_S1_P2_MASK) >> MDM9607_S1_P2_SHIFT;
+ p2[2] = (qfprom_cdata[1] & MDM9607_S2_P2_MASK) >> MDM9607_S2_P2_SHIFT;
+ p2[3] = (qfprom_cdata[1] & MDM9607_S3_P2_MASK) >> MDM9607_S3_P2_SHIFT;
+ p2[4] = (qfprom_cdata[2] & MDM9607_S4_P2_MASK) >> MDM9607_S4_P2_SHIFT;
+ for (i = 0; i < priv->num_sensors; i++)
+ p2[i] = ((base + p2[i]) << 2);
+ fallthrough;
+ case ONE_PT_CALIB2:
+ base = (qfprom_cdata[0] & MDM9607_BASE0_MASK);
+ p1[0] = (qfprom_cdata[0] & MDM9607_S0_P1_MASK) >> MDM9607_S0_P1_SHIFT;
+ p1[1] = (qfprom_cdata[0] & MDM9607_S1_P1_MASK) >> MDM9607_S1_P1_SHIFT;
+ p1[2] = (qfprom_cdata[1] & MDM9607_S2_P1_MASK) >> MDM9607_S2_P1_SHIFT;
+ p1[3] = (qfprom_cdata[1] & MDM9607_S3_P1_MASK) >> MDM9607_S3_P1_SHIFT;
+ p1[4] = (qfprom_cdata[2] & MDM9607_S4_P1_MASK) >> MDM9607_S4_P1_SHIFT;
+ for (i = 0; i < priv->num_sensors; i++)
+ p1[i] = ((base + p1[i]) << 2);
+ break;
+ default:
+ for (i = 0; i < priv->num_sensors; i++) {
+ p1[i] = 500;
+ p2[i] = 780;
+ }
+ break;
+ }
+
+ compute_intercept_slope(priv, p1, p2, mode);
+ kfree(qfprom_cdata);
+
+ return 0;
+}
+
+/* v0.1: 8916, 8939, 8974, 9607 */
static struct tsens_features tsens_v0_1_feat = {
.ver_major = VER_0_1,
.feat = &tsens_v0_1_feat,
.fields = tsens_v0_1_regfields,
};
+
+static const struct tsens_ops ops_9607 = {
+ .init = init_common,
+ .calibrate = calibrate_9607,
+ .get_temp = get_temp_common,
+};
+
+struct tsens_plat_data data_9607 = {
+ .num_sensors = 5,
+ .ops = &ops_9607,
+ .hw_ids = (unsigned int []){ 0, 1, 2, 3, 4 },
+ .feat = &tsens_v0_1_feat,
+ .fields = tsens_v0_1_regfields,
+};
.get_temp = get_temp_tsens_valid,
};
-/* Valid for both MSM8956 and MSM8976. Sensor ID 3 is unused. */
+/* Valid for both MSM8956 and MSM8976. */
struct tsens_plat_data data_8976 = {
.num_sensors = 11,
.ops = &ops_8976,
- .hw_ids = (unsigned int[]){0, 1, 2, 4, 5, 6, 7, 8, 9, 10},
+ .hw_ids = (unsigned int[]){0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10},
.feat = &tsens_v1_feat,
.fields = tsens_v1_regfields,
};
#include <linux/of.h>
#include <linux/of_address.h>
#include <linux/of_platform.h>
+#include <linux/mfd/syscon.h>
#include <linux/platform_device.h>
#include <linux/pm.h>
#include <linux/regmap.h>
"%s: sensor%d - data_point1:%#x data_point2:%#x\n",
__func__, i, p1[i], p2[i]);
- priv->sensor[i].slope = SLOPE_DEFAULT;
+ if (!priv->sensor[i].slope)
+ priv->sensor[i].slope = SLOPE_DEFAULT;
if (mode == TWO_PT_CALIB) {
/*
* slope (m) = adc_code2 - adc_code1 (y2 - y1)/
dev_dbg(priv->dev, "[%u] %s: no violation: %d\n",
hw_id, __func__, temp);
}
+
+ if (tsens_version(priv) < VER_0_1) {
+ /* Constraint: There is only 1 interrupt control register for all
+ * 11 temperature sensor. So monitoring more than 1 sensor based
+ * on interrupts will yield inconsistent result. To overcome this
+ * issue we will monitor only sensor 0 which is the master sensor.
+ */
+ break;
+ }
}
return IRQ_HANDLED;
int high_val, low_val, cl_high, cl_low;
u32 hw_id = s->hw_id;
+ if (tsens_version(priv) < VER_0_1) {
+ /* Pre v0.1 IP had a single register for each type of interrupt
+ * and thresholds
+ */
+ hw_id = 0;
+ }
+
dev_dbg(dev, "[%u] %s: proposed thresholds: (%d:%d)\n",
hw_id, __func__, low, high);
u32 valid;
int ret;
- ret = regmap_field_read(priv->rf[valid_idx], &valid);
- if (ret)
- return ret;
- while (!valid) {
- /* Valid bit is 0 for 6 AHB clock cycles.
- * At 19.2MHz, 1 AHB clock is ~60ns.
- * We should enter this loop very, very rarely.
- */
- ndelay(400);
+ /* VER_0 doesn't have VALID bit */
+ if (tsens_version(priv) >= VER_0_1) {
ret = regmap_field_read(priv->rf[valid_idx], &valid);
if (ret)
return ret;
+ while (!valid) {
+ /* Valid bit is 0 for 6 AHB clock cycles.
+ * At 19.2MHz, 1 AHB clock is ~60ns.
+ * We should enter this loop very, very rarely.
+ */
+ ndelay(400);
+ ret = regmap_field_read(priv->rf[valid_idx], &valid);
+ if (ret)
+ return ret;
+ }
}
/* Valid bit is set, OK to read the temperature */
{
struct tsens_priv *priv = s->priv;
int hw_id = s->hw_id;
- int last_temp = 0, ret;
+ int last_temp = 0, ret, trdy;
+ unsigned long timeout;
- ret = regmap_field_read(priv->rf[LAST_TEMP_0 + hw_id], &last_temp);
- if (ret)
- return ret;
+ timeout = jiffies + usecs_to_jiffies(TIMEOUT_US);
+ do {
+ if (tsens_version(priv) == VER_0) {
+ ret = regmap_field_read(priv->rf[TRDY], &trdy);
+ if (ret)
+ return ret;
+ if (!trdy)
+ continue;
+ }
- *temp = code_to_degc(last_temp, s) * 1000;
+ ret = regmap_field_read(priv->rf[LAST_TEMP_0 + hw_id], &last_temp);
+ if (ret)
+ return ret;
- return 0;
+ *temp = code_to_degc(last_temp, s) * 1000;
+
+ return 0;
+ } while (time_before(jiffies, timeout));
+
+ return -ETIMEDOUT;
}
#ifdef CONFIG_DEBUG_FS
priv->tm_offset = 0x1000;
}
- res = platform_get_resource(op, IORESOURCE_MEM, 0);
- tm_base = devm_ioremap_resource(dev, res);
- if (IS_ERR(tm_base)) {
- ret = PTR_ERR(tm_base);
- goto err_put_device;
+ if (tsens_version(priv) >= VER_0_1) {
+ res = platform_get_resource(op, IORESOURCE_MEM, 0);
+ tm_base = devm_ioremap_resource(dev, res);
+ if (IS_ERR(tm_base)) {
+ ret = PTR_ERR(tm_base);
+ goto err_put_device;
+ }
+
+ priv->tm_map = devm_regmap_init_mmio(dev, tm_base, &tsens_config);
+ } else { /* VER_0 share the same gcc regs using a syscon */
+ struct device *parent = priv->dev->parent;
+
+ if (parent)
+ priv->tm_map = syscon_node_to_regmap(parent->of_node);
}
- priv->tm_map = devm_regmap_init_mmio(dev, tm_base, &tsens_config);
- if (IS_ERR(priv->tm_map)) {
- ret = PTR_ERR(priv->tm_map);
+ if (IS_ERR_OR_NULL(priv->tm_map)) {
+ if (!priv->tm_map)
+ ret = -ENODEV;
+ else
+ ret = PTR_ERR(priv->tm_map);
goto err_put_device;
}
+ /* VER_0 have only tm_map */
+ if (!priv->srot_map)
+ priv->srot_map = priv->tm_map;
+
if (tsens_version(priv) > VER_0_1) {
for (i = VER_MAJOR; i <= VER_STEP; i++) {
priv->rf[i] = devm_regmap_field_alloc(dev, priv->srot_map,
priv->fields[i]);
- if (IS_ERR(priv->rf[i]))
- return PTR_ERR(priv->rf[i]);
+ if (IS_ERR(priv->rf[i])) {
+ ret = PTR_ERR(priv->rf[i]);
+ goto err_put_device;
+ }
}
ret = regmap_field_read(priv->rf[VER_MINOR], &ver_minor);
if (ret)
ret = PTR_ERR(priv->rf[TSENS_EN]);
goto err_put_device;
}
+ /* in VER_0 TSENS need to be explicitly enabled */
+ if (tsens_version(priv) == VER_0)
+ regmap_field_write(priv->rf[TSENS_EN], 1);
+
ret = regmap_field_read(priv->rf[TSENS_EN], &enabled);
if (ret)
goto err_put_device;
goto err_put_device;
}
+ priv->rf[TSENS_SW_RST] =
+ devm_regmap_field_alloc(dev, priv->srot_map, priv->fields[TSENS_SW_RST]);
+ if (IS_ERR(priv->rf[TSENS_SW_RST])) {
+ ret = PTR_ERR(priv->rf[TSENS_SW_RST]);
+ goto err_put_device;
+ }
+
+ priv->rf[TRDY] = devm_regmap_field_alloc(dev, priv->tm_map, priv->fields[TRDY]);
+ if (IS_ERR(priv->rf[TRDY])) {
+ ret = PTR_ERR(priv->rf[TRDY]);
+ goto err_put_device;
+ }
+
/* This loop might need changes if enum regfield_ids is reordered */
for (j = LAST_TEMP_0; j <= UP_THRESH_15; j += 16) {
for (i = 0; i < priv->feat->max_sensors; i++) {
}
}
- if (priv->feat->crit_int) {
+ if (priv->feat->crit_int || tsens_version(priv) < VER_0_1) {
/* Loop might need changes if enum regfield_ids is reordered */
for (j = CRITICAL_STATUS_0; j <= CRIT_THRESH_15; j += 16) {
for (i = 0; i < priv->feat->max_sensors; i++) {
}
spin_lock_init(&priv->ul_lock);
- tsens_enable_irq(priv);
+
+ /* VER_0 interrupt doesn't need to be enabled */
+ if (tsens_version(priv) >= VER_0_1)
+ tsens_enable_irq(priv);
+
tsens_debug_init(op);
err_put_device:
static const struct of_device_id tsens_table[] = {
{
+ .compatible = "qcom,ipq8064-tsens",
+ .data = &data_8960,
+ }, {
+ .compatible = "qcom,mdm9607-tsens",
+ .data = &data_9607,
+ }, {
.compatible = "qcom,msm8916-tsens",
.data = &data_8916,
}, {
if (irq == -ENXIO)
ret = 0;
} else {
- ret = devm_request_threaded_irq(&pdev->dev, irq,
- NULL, thread_fn,
- IRQF_ONESHOT,
- dev_name(&pdev->dev), priv);
+ /* VER_0 interrupt is TRIGGER_RISING, VER_0_1 and up is ONESHOT */
+ if (tsens_version(priv) == VER_0)
+ ret = devm_request_threaded_irq(&pdev->dev, irq,
+ thread_fn, NULL,
+ IRQF_TRIGGER_RISING,
+ dev_name(&pdev->dev),
+ priv);
+ else
+ ret = devm_request_threaded_irq(&pdev->dev, irq, NULL,
+ thread_fn, IRQF_ONESHOT,
+ dev_name(&pdev->dev),
+ priv);
+
if (ret)
dev_err(&pdev->dev, "%s: failed to get irq\n",
__func__);
priv->ops->enable(priv, i);
}
+ /* VER_0 require to set MIN and MAX THRESH
+ * These 2 regs are set using the:
+ * - CRIT_THRESH_0 for MAX THRESH hardcoded to 120°C
+ * - CRIT_THRESH_1 for MIN THRESH hardcoded to 0°C
+ */
+ if (tsens_version(priv) < VER_0_1) {
+ regmap_field_write(priv->rf[CRIT_THRESH_0],
+ tsens_mC_to_hw(priv->sensor, 120000));
+
+ regmap_field_write(priv->rf[CRIT_THRESH_1],
+ tsens_mC_to_hw(priv->sensor, 0));
+ }
+
ret = tsens_register_irq(priv, "uplow", tsens_irq_thread);
if (ret < 0)
return ret;
#define CAL_DEGC_PT2 120
#define SLOPE_FACTOR 1000
#define SLOPE_DEFAULT 3200
+#define TIMEOUT_US 100
#define THRESHOLD_MAX_ADC_CODE 0x3ff
#define THRESHOLD_MIN_ADC_CODE 0x0
/* IP version numbers in ascending order */
enum tsens_ver {
- VER_0_1 = 0,
+ VER_0 = 0,
+ VER_0_1,
VER_1_X,
VER_2_X,
};
extern struct tsens_plat_data data_8960;
/* TSENS v0.1 targets */
-extern struct tsens_plat_data data_8916, data_8939, data_8974;
+extern struct tsens_plat_data data_8916, data_8939, data_8974, data_9607;
/* TSENS v1 targets */
extern struct tsens_plat_data data_tsens_v1, data_8976;
#define MCELSIUS(temp) ((temp) * 1000)
#define GEN3_FUSE_MASK 0xFFF
-#define TSC_MAX_NUM 4
+#define TSC_MAX_NUM 5
/* default THCODE values if FUSEs are missing */
static const int thcodes[TSC_MAX_NUM][3] = {
{ 3393, 2795, 2216 },
{ 3389, 2805, 2237 },
{ 3415, 2694, 2195 },
+ { 3356, 2724, 2244 },
};
/* Structure for thermal temperature calculation */
* or 0x8xx, so they won't be away from the default value
* for a lot.
*
- * So here we do not return error if the calibartion data is
+ * So here we do not return error if the calibration data is
* not available, except the probe needs deferring.
*/
goto out;
}
/*
- * Without this undocummented value, the returned temperatures would
+ * Without this undocumented value, the returned temperatures would
* be higher than real ones by about 20C.
*/
#define SUN50I_H6_CTRL0_UNK 0x0000002f
struct tegra_soctherm *tegra;
struct thermal_zone_device *z;
struct tsensor_shared_calib shared_calib;
- struct resource *res;
struct tegra_soctherm_soc *soc;
unsigned int i;
int err;
tegra->soc = soc;
- res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
- "soctherm-reg");
- tegra->regs = devm_ioremap_resource(&pdev->dev, res);
+ tegra->regs = devm_platform_ioremap_resource_byname(pdev, "soctherm-reg");
if (IS_ERR(tegra->regs)) {
dev_err(&pdev->dev, "can't get soctherm registers");
return PTR_ERR(tegra->regs);
}
if (!tegra->soc->use_ccroc) {
- res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
- "car-reg");
- tegra->clk_regs = devm_ioremap_resource(&pdev->dev, res);
+ tegra->clk_regs = devm_platform_ioremap_resource_byname(pdev, "car-reg");
if (IS_ERR(tegra->clk_regs)) {
dev_err(&pdev->dev, "can't get car clk registers");
return PTR_ERR(tegra->clk_regs);
}
} else {
- res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
- "ccroc-reg");
- tegra->ccroc_regs = devm_ioremap_resource(&pdev->dev, res);
+ tegra->ccroc_regs = devm_platform_ioremap_resource_byname(pdev, "ccroc-reg");
if (IS_ERR(tegra->ccroc_regs)) {
dev_err(&pdev->dev, "can't get ccroc registers");
return PTR_ERR(tegra->ccroc_regs);
if (err)
return err;
- /* calculate tsensor calibaration data */
+ /* calculate tsensor calibration data */
for (i = 0; i < soc->num_tsensors; ++i) {
err = tegra_calc_tsensor_calib(&soc->tsensors[i],
&shared_calib,
}
EXPORT_SYMBOL_GPL(thermal_zone_device_update);
-/**
- * thermal_notify_framework - Sensor drivers use this API to notify framework
- * @tz: thermal zone device
- * @trip: indicates which trip point has been crossed
- *
- * This function handles the trip events from sensor drivers. It starts
- * throttling the cooling devices according to the policy configured.
- * For CRITICAL and HOT trip points, this notifies the respective drivers,
- * and does actual throttling for other trip points i.e ACTIVE and PASSIVE.
- * The throttling policy is based on the configured platform data; if no
- * platform data is provided, this uses the step_wise throttling policy.
- */
-void thermal_notify_framework(struct thermal_zone_device *tz, int trip)
-{
- handle_thermal_trip(tz, trip);
-}
-EXPORT_SYMBOL_GPL(thermal_notify_framework);
-
static void thermal_zone_device_check(struct work_struct *work)
{
struct thermal_zone_device *tz = container_of(work, struct
{
struct thermal_cooling_device *cdev;
struct thermal_zone_device *pos = NULL;
- int result;
-
- if (type && strlen(type) >= THERMAL_NAME_LENGTH)
- return ERR_PTR(-EINVAL);
+ int ret;
if (!ops || !ops->get_max_state || !ops->get_cur_state ||
!ops->set_cur_state)
if (!cdev)
return ERR_PTR(-ENOMEM);
- result = ida_simple_get(&thermal_cdev_ida, 0, 0, GFP_KERNEL);
- if (result < 0) {
- kfree(cdev);
- return ERR_PTR(result);
+ ret = ida_simple_get(&thermal_cdev_ida, 0, 0, GFP_KERNEL);
+ if (ret < 0)
+ goto out_kfree_cdev;
+ cdev->id = ret;
+
+ cdev->type = kstrdup(type ? type : "", GFP_KERNEL);
+ if (!cdev->type) {
+ ret = -ENOMEM;
+ goto out_ida_remove;
}
- cdev->id = result;
- strlcpy(cdev->type, type ? : "", sizeof(cdev->type));
mutex_init(&cdev->lock);
INIT_LIST_HEAD(&cdev->thermal_instances);
cdev->np = np;
cdev->devdata = devdata;
thermal_cooling_device_setup_sysfs(cdev);
dev_set_name(&cdev->device, "cooling_device%d", cdev->id);
- result = device_register(&cdev->device);
- if (result) {
- ida_simple_remove(&thermal_cdev_ida, cdev->id);
- put_device(&cdev->device);
- return ERR_PTR(result);
- }
+ ret = device_register(&cdev->device);
+ if (ret)
+ goto out_kfree_type;
/* Add 'this' new cdev to the global cdev list */
mutex_lock(&thermal_list_lock);
mutex_unlock(&thermal_list_lock);
return cdev;
+
+out_kfree_type:
+ kfree(cdev->type);
+ put_device(&cdev->device);
+out_ida_remove:
+ ida_simple_remove(&thermal_cdev_ida, cdev->id);
+out_kfree_cdev:
+ kfree(cdev);
+ return ERR_PTR(ret);
}
/**
ida_simple_remove(&thermal_cdev_ida, cdev->id);
device_del(&cdev->device);
thermal_cooling_device_destroy_sysfs(cdev);
+ kfree(cdev->type);
put_device(&cdev->device);
}
EXPORT_SYMBOL_GPL(thermal_cooling_device_unregister);
}
void thermal_cdev_update(struct thermal_cooling_device *);
+void __thermal_cdev_update(struct thermal_cooling_device *cdev);
/**
* struct thermal_trip - representation of a point in temperature domain
thermal_cooling_device_stats_update(cdev, target);
}
-void thermal_cdev_update(struct thermal_cooling_device *cdev)
+void __thermal_cdev_update(struct thermal_cooling_device *cdev)
{
struct thermal_instance *instance;
unsigned long target = 0;
- mutex_lock(&cdev->lock);
- /* cooling device is updated*/
- if (cdev->updated) {
- mutex_unlock(&cdev->lock);
- return;
- }
-
/* Make sure cdev enters the deepest cooling state */
list_for_each_entry(instance, &cdev->thermal_instances, cdev_node) {
dev_dbg(&cdev->device, "zone%d->target=%lu\n",
thermal_cdev_set_cur_state(cdev, target);
- cdev->updated = true;
- mutex_unlock(&cdev->lock);
trace_cdev_update(cdev, target);
dev_dbg(&cdev->device, "set to state %lu\n", target);
}
+
+/**
+ * thermal_cdev_update - update cooling device state if needed
+ * @cdev: pointer to struct thermal_cooling_device
+ *
+ * Update the cooling device state if there is a need.
+ */
+void thermal_cdev_update(struct thermal_cooling_device *cdev)
+{
+ mutex_lock(&cdev->lock);
+ if (!cdev->updated) {
+ __thermal_cdev_update(cdev);
+ cdev->updated = true;
+ }
+ mutex_unlock(&cdev->lock);
+}
EXPORT_SYMBOL(thermal_cdev_update);
/**
resource = platform_get_resource(pdev, IORESOURCE_MEM, 0);
sensor->mmio_base = devm_ioremap_resource(&pdev->dev, resource);
- if (IS_ERR(sensor->mmio_base)) {
- dev_err(&pdev->dev, "failed to ioremap memory (%ld)\n",
- PTR_ERR(sensor->mmio_base));
+ if (IS_ERR(sensor->mmio_base))
return PTR_ERR(sensor->mmio_base);
- }
sensor_init_func = device_get_match_data(&pdev->dev);
if (sensor_init_func) {
count = of_count_phandle_with_args(np, "cooling-device",
"#cooling-cells");
- if (!count) {
+ if (count <= 0) {
pr_err("Add a cooling_device property with at least one device\n");
+ ret = -ENOENT;
goto end;
}
__tcbp = kcalloc(count, sizeof(*__tcbp), GFP_KERNEL);
- if (!__tcbp)
+ if (!__tcbp) {
+ ret = -ENOMEM;
goto end;
+ }
for (i = 0; i < count; i++) {
ret = of_parse_phandle_with_args(np, "cooling-device",
* Eduardo Valentin <eduardo.valentin@ti.com>
*/
-#include <linux/module.h>
+#include <linux/clk.h>
+#include <linux/cpu_pm.h>
+#include <linux/device.h>
+#include <linux/err.h>
#include <linux/export.h>
+#include <linux/gpio/consumer.h>
#include <linux/init.h>
-#include <linux/kernel.h>
#include <linux/interrupt.h>
-#include <linux/clk.h>
-#include <linux/gpio/consumer.h>
-#include <linux/platform_device.h>
-#include <linux/err.h>
-#include <linux/types.h>
-#include <linux/spinlock.h>
-#include <linux/sys_soc.h>
-#include <linux/reboot.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
-#include <linux/of_irq.h>
#include <linux/io.h>
#include <linux/iopoll.h>
-#include <linux/cpu_pm.h>
-#include <linux/device.h>
-#include <linux/pm_runtime.h>
-#include <linux/pm.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_device.h>
+#include <linux/of_irq.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/pm_runtime.h>
+#include <linux/reboot.h>
+#include <linux/spinlock.h>
+#include <linux/sys_soc.h>
+#include <linux/types.h>
#include "ti-bandgap.h"
for (i = 0; i < bgp->conf->sensor_count; i++) {
struct temp_sensor_registers *tsr;
struct temp_sensor_regval *rval;
- u32 val = 0;
rval = &bgp->regval[i];
tsr = bgp->conf->sensors[i].registers;
- if (TI_BANDGAP_HAS(bgp, COUNTER))
- val = ti_bandgap_readl(bgp, tsr->bgap_counter);
-
if (TI_BANDGAP_HAS(bgp, TSHUT_CONFIG))
ti_bandgap_writel(bgp, rval->tshut_threshold,
tsr->tshut_threshold);
# SPDX-License-Identifier: GPL-2.0
-conmakehash
-consolemap_deftbl.c
-defkeymap.c
+/conmakehash
+/consolemap_deftbl.c
+/defkeymap.c
depends on RUNTIME_TESTING_MENU && HAS_DMA
select DMA_OPS
select VHOST_RING
+ select IOMMU_IOVA
help
Enable this module to support vDPA device simulators. These devices
are used for testing, prototyping and development of vDPA.
help
vDPA networking device simulator which loops TX traffic back to RX.
+config VDPA_SIM_BLOCK
+ tristate "vDPA simulator for block device"
+ depends on VDPA_SIM
+ help
+ vDPA block device simulator which terminates IO request in a
+ memory buffer.
+
config IFCVF
tristate "Intel IFC VF vDPA driver"
depends on PCI_MSI
be executed by the hardware. It also supports a variety of stateless
offloads depending on the actual device used and firmware version.
+config VP_VDPA
+ tristate "Virtio PCI bridge vDPA driver"
+ select VIRTIO_PCI_LIB
+ depends on PCI_MSI
+ help
+ This kernel module bridges virtio PCI device to vDPA bus.
+
endif # VDPA
obj-$(CONFIG_VDPA_SIM) += vdpa_sim/
obj-$(CONFIG_IFCVF) += ifcvf/
obj-$(CONFIG_MLX5_VDPA) += mlx5/
+obj-$(CONFIG_VP_VDPA) += virtio_pci/
ifcvf_get_status(hw);
}
-u64 ifcvf_get_features(struct ifcvf_hw *hw)
+u64 ifcvf_get_hw_features(struct ifcvf_hw *hw)
{
struct virtio_pci_common_cfg __iomem *cfg = hw->common_cfg;
u32 features_lo, features_hi;
+ u64 features;
ifc_iowrite32(0, &cfg->device_feature_select);
features_lo = ifc_ioread32(&cfg->device_feature);
ifc_iowrite32(1, &cfg->device_feature_select);
features_hi = ifc_ioread32(&cfg->device_feature);
- return ((u64)features_hi << 32) | features_lo;
+ features = ((u64)features_hi << 32) | features_lo;
+
+ return features;
+}
+
+u64 ifcvf_get_features(struct ifcvf_hw *hw)
+{
+ return hw->hw_features;
+}
+
+int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features)
+{
+ struct ifcvf_adapter *ifcvf = vf_to_adapter(hw);
+
+ if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM)) && features) {
+ IFCVF_ERR(ifcvf->pdev, "VIRTIO_F_ACCESS_PLATFORM is not negotiated\n");
+ return -EINVAL;
+ }
+
+ return 0;
}
void ifcvf_read_net_config(struct ifcvf_hw *hw, u64 offset,
#include <linux/pci_regs.h>
#include <linux/vdpa.h>
#include <uapi/linux/virtio_net.h>
+#include <uapi/linux/virtio_blk.h>
#include <uapi/linux/virtio_config.h>
#include <uapi/linux/virtio_pci.h>
-#define IFCVF_VENDOR_ID 0x1AF4
-#define IFCVF_DEVICE_ID 0x1041
-#define IFCVF_SUBSYS_VENDOR_ID 0x8086
-#define IFCVF_SUBSYS_DEVICE_ID 0x001A
+#define N3000_VENDOR_ID 0x1AF4
+#define N3000_DEVICE_ID 0x1041
+#define N3000_SUBSYS_VENDOR_ID 0x8086
+#define N3000_SUBSYS_DEVICE_ID 0x001A
-#define IFCVF_SUPPORTED_FEATURES \
+#define C5000X_PL_VENDOR_ID 0x1AF4
+#define C5000X_PL_DEVICE_ID 0x1000
+#define C5000X_PL_SUBSYS_VENDOR_ID 0x8086
+#define C5000X_PL_SUBSYS_DEVICE_ID 0x0001
+
+#define C5000X_PL_BLK_VENDOR_ID 0x1AF4
+#define C5000X_PL_BLK_DEVICE_ID 0x1001
+#define C5000X_PL_BLK_SUBSYS_VENDOR_ID 0x8086
+#define C5000X_PL_BLK_SUBSYS_DEVICE_ID 0x0002
+
+#define IFCVF_NET_SUPPORTED_FEATURES \
((1ULL << VIRTIO_NET_F_MAC) | \
(1ULL << VIRTIO_F_ANY_LAYOUT) | \
(1ULL << VIRTIO_F_VERSION_1) | \
void __iomem *notify_base;
u32 notify_off_multiplier;
u64 req_features;
+ u64 hw_features;
+ u32 dev_type;
struct virtio_pci_common_cfg __iomem *common_cfg;
void __iomem *net_cfg;
struct vring_info vring[IFCVF_MAX_QUEUE_PAIRS * 2];
void io_write64_twopart(u64 val, u32 *lo, u32 *hi);
void ifcvf_reset(struct ifcvf_hw *hw);
u64 ifcvf_get_features(struct ifcvf_hw *hw);
+u64 ifcvf_get_hw_features(struct ifcvf_hw *hw);
+int ifcvf_verify_min_features(struct ifcvf_hw *hw, u64 features);
u16 ifcvf_get_vq_state(struct ifcvf_hw *hw, u16 qid);
int ifcvf_set_vq_state(struct ifcvf_hw *hw, u16 qid, u16 num);
struct ifcvf_adapter *vf_to_adapter(struct ifcvf_hw *hw);
+int ifcvf_probed_virtio_net(struct ifcvf_hw *hw);
#endif /* _IFCVF_H_ */
#include <linux/sysfs.h>
#include "ifcvf_base.h"
-#define VERSION_STRING "0.1"
#define DRIVER_AUTHOR "Intel Corporation"
#define IFCVF_DRIVER_NAME "ifcvf"
static u64 ifcvf_vdpa_get_features(struct vdpa_device *vdpa_dev)
{
+ struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev);
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+ struct pci_dev *pdev = adapter->pdev;
+
u64 features;
- features = ifcvf_get_features(vf) & IFCVF_SUPPORTED_FEATURES;
+ switch (vf->dev_type) {
+ case VIRTIO_ID_NET:
+ features = ifcvf_get_features(vf) & IFCVF_NET_SUPPORTED_FEATURES;
+ break;
+ case VIRTIO_ID_BLOCK:
+ features = ifcvf_get_features(vf);
+ break;
+ default:
+ features = 0;
+ IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", vf->dev_type);
+ }
return features;
}
static int ifcvf_vdpa_set_features(struct vdpa_device *vdpa_dev, u64 features)
{
struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+ int ret;
+
+ ret = ifcvf_verify_min_features(vf, features);
+ if (ret)
+ return ret;
vf->req_features = features;
static u32 ifcvf_vdpa_get_device_id(struct vdpa_device *vdpa_dev)
{
- return VIRTIO_ID_NET;
+ struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+
+ return vf->dev_type;
}
static u32 ifcvf_vdpa_get_vendor_id(struct vdpa_device *vdpa_dev)
{
- return IFCVF_SUBSYS_VENDOR_ID;
+ struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev);
+ struct pci_dev *pdev = adapter->pdev;
+
+ return pdev->subsystem_vendor;
}
static u32 ifcvf_vdpa_get_vq_align(struct vdpa_device *vdpa_dev)
return IFCVF_QUEUE_ALIGNMENT;
}
+static size_t ifcvf_vdpa_get_config_size(struct vdpa_device *vdpa_dev)
+{
+ struct ifcvf_adapter *adapter = vdpa_to_adapter(vdpa_dev);
+ struct ifcvf_hw *vf = vdpa_to_vf(vdpa_dev);
+ struct pci_dev *pdev = adapter->pdev;
+ size_t size;
+
+ switch (vf->dev_type) {
+ case VIRTIO_ID_NET:
+ size = sizeof(struct virtio_net_config);
+ break;
+ case VIRTIO_ID_BLOCK:
+ size = sizeof(struct virtio_blk_config);
+ break;
+ default:
+ size = 0;
+ IFCVF_ERR(pdev, "VIRTIO ID %u not supported\n", vf->dev_type);
+ }
+
+ return size;
+}
+
static void ifcvf_vdpa_get_config(struct vdpa_device *vdpa_dev,
unsigned int offset,
void *buf, unsigned int len)
.get_device_id = ifcvf_vdpa_get_device_id,
.get_vendor_id = ifcvf_vdpa_get_vendor_id,
.get_vq_align = ifcvf_vdpa_get_vq_align,
+ .get_config_size = ifcvf_vdpa_get_config_size,
.get_config = ifcvf_vdpa_get_config,
.set_config = ifcvf_vdpa_set_config,
.set_config_cb = ifcvf_vdpa_set_config_cb,
pci_set_drvdata(pdev, adapter);
vf = &adapter->vf;
+
+ /* This drirver drives both modern virtio devices and transitional
+ * devices in modern mode.
+ * vDPA requires feature bit VIRTIO_F_ACCESS_PLATFORM,
+ * so legacy devices and transitional devices in legacy
+ * mode will not work for vDPA, this driver will not
+ * drive devices with legacy interface.
+ */
+ if (pdev->device < 0x1040)
+ vf->dev_type = pdev->subsystem_device;
+ else
+ vf->dev_type = pdev->device - 0x1040;
+
vf->base = pcim_iomap_table(pdev);
adapter->pdev = pdev;
for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++)
vf->vring[i].irq = -EINVAL;
+ vf->hw_features = ifcvf_get_hw_features(vf);
+
ret = vdpa_register_device(&adapter->vdpa, IFCVF_MAX_QUEUE_PAIRS * 2);
if (ret) {
IFCVF_ERR(pdev, "Failed to register ifcvf to vdpa bus");
}
static struct pci_device_id ifcvf_pci_ids[] = {
- { PCI_DEVICE_SUB(IFCVF_VENDOR_ID,
- IFCVF_DEVICE_ID,
- IFCVF_SUBSYS_VENDOR_ID,
- IFCVF_SUBSYS_DEVICE_ID) },
+ { PCI_DEVICE_SUB(N3000_VENDOR_ID,
+ N3000_DEVICE_ID,
+ N3000_SUBSYS_VENDOR_ID,
+ N3000_SUBSYS_DEVICE_ID) },
+ { PCI_DEVICE_SUB(C5000X_PL_VENDOR_ID,
+ C5000X_PL_DEVICE_ID,
+ C5000X_PL_SUBSYS_VENDOR_ID,
+ C5000X_PL_SUBSYS_DEVICE_ID) },
+ { PCI_DEVICE_SUB(C5000X_PL_BLK_VENDOR_ID,
+ C5000X_PL_BLK_DEVICE_ID,
+ C5000X_PL_BLK_SUBSYS_VENDOR_ID,
+ C5000X_PL_BLK_SUBSYS_DEVICE_ID) },
+
{ 0 },
};
MODULE_DEVICE_TABLE(pci, ifcvf_pci_ids);
module_pci_driver(ifcvf_driver);
MODULE_LICENSE("GPL v2");
-MODULE_VERSION(VERSION_STRING);
ndev->mvdev.status |= VIRTIO_CONFIG_S_FAILED;
}
+static size_t mlx5_vdpa_get_config_size(struct vdpa_device *vdev)
+{
+ return sizeof(struct virtio_net_config);
+}
+
static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, void *buf,
unsigned int len)
{
.get_vendor_id = mlx5_vdpa_get_vendor_id,
.get_status = mlx5_vdpa_get_status,
.set_status = mlx5_vdpa_set_status,
+ .get_config_size = mlx5_vdpa_get_config_size,
.get_config = mlx5_vdpa_get_config,
.set_config = mlx5_vdpa_set_config,
.get_generation = mlx5_vdpa_get_generation,
}
}
-static int mlx5v_probe(struct auxiliary_device *adev,
- const struct auxiliary_device_id *id)
+struct mlx5_vdpa_mgmtdev {
+ struct vdpa_mgmt_dev mgtdev;
+ struct mlx5_adev *madev;
+ struct mlx5_vdpa_net *ndev;
+};
+
+static int mlx5_vdpa_dev_add(struct vdpa_mgmt_dev *v_mdev, const char *name)
{
- struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
- struct mlx5_core_dev *mdev = madev->mdev;
+ struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
struct virtio_net_config *config;
struct mlx5_vdpa_dev *mvdev;
struct mlx5_vdpa_net *ndev;
+ struct mlx5_core_dev *mdev;
u32 max_vqs;
int err;
+ if (mgtdev->ndev)
+ return -ENOSPC;
+
+ mdev = mgtdev->madev->mdev;
/* we save one virtqueue for control virtqueue should we require it */
max_vqs = MLX5_CAP_DEV_VDPA_EMULATION(mdev, max_num_virtio_queues);
max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
- NULL);
+ name);
if (IS_ERR(ndev))
return PTR_ERR(ndev);
if (err)
goto err_res;
- err = vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs));
+ mvdev->vdev.mdev = &mgtdev->mgtdev;
+ err = _vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs));
if (err)
goto err_reg;
- dev_set_drvdata(&adev->dev, ndev);
+ mgtdev->ndev = ndev;
return 0;
err_reg:
return err;
}
+static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *dev)
+{
+ struct mlx5_vdpa_mgmtdev *mgtdev = container_of(v_mdev, struct mlx5_vdpa_mgmtdev, mgtdev);
+
+ _vdpa_unregister_device(dev);
+ mgtdev->ndev = NULL;
+}
+
+static const struct vdpa_mgmtdev_ops mdev_ops = {
+ .dev_add = mlx5_vdpa_dev_add,
+ .dev_del = mlx5_vdpa_dev_del,
+};
+
+static struct virtio_device_id id_table[] = {
+ { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
+ { 0 },
+};
+
+static int mlx5v_probe(struct auxiliary_device *adev,
+ const struct auxiliary_device_id *id)
+
+{
+ struct mlx5_adev *madev = container_of(adev, struct mlx5_adev, adev);
+ struct mlx5_core_dev *mdev = madev->mdev;
+ struct mlx5_vdpa_mgmtdev *mgtdev;
+ int err;
+
+ mgtdev = kzalloc(sizeof(*mgtdev), GFP_KERNEL);
+ if (!mgtdev)
+ return -ENOMEM;
+
+ mgtdev->mgtdev.ops = &mdev_ops;
+ mgtdev->mgtdev.device = mdev->device;
+ mgtdev->mgtdev.id_table = id_table;
+ mgtdev->madev = madev;
+
+ err = vdpa_mgmtdev_register(&mgtdev->mgtdev);
+ if (err)
+ goto reg_err;
+
+ dev_set_drvdata(&adev->dev, mgtdev);
+
+ return 0;
+
+reg_err:
+ kfree(mgtdev);
+ return err;
+}
+
static void mlx5v_remove(struct auxiliary_device *adev)
{
- struct mlx5_vdpa_dev *mvdev = dev_get_drvdata(&adev->dev);
+ struct mlx5_vdpa_mgmtdev *mgtdev;
- vdpa_unregister_device(&mvdev->vdev);
+ mgtdev = dev_get_drvdata(&adev->dev);
+ vdpa_mgmtdev_unregister(&mgtdev->mgtdev);
+ kfree(mgtdev);
}
static const struct auxiliary_device_id mlx5v_id_table[] = {
* Driver should use vdpa_alloc_device() wrapper macro instead of
* using this directly.
*
- * Returns an error when parent/config/dma_dev is not set or fail to get
- * ida.
+ * Return: Returns an error when parent/config/dma_dev is not set or fail to get
+ * ida.
*/
struct vdpa_device *__vdpa_alloc_device(struct device *parent,
const struct vdpa_config_ops *config,
* @vdev: the vdpa device to be registered to vDPA bus
* @nvqs: number of virtqueues supported by this device
*
- * Returns an error when fail to add device to vDPA bus
+ * Return: Returns an error when fail to add device to vDPA bus
*/
int _vdpa_register_device(struct vdpa_device *vdev, int nvqs)
{
* @vdev: the vdpa device to be registered to vDPA bus
* @nvqs: number of virtqueues supported by this device
*
- * Returns an error when fail to add to vDPA bus
+ * Return: Returns an error when fail to add to vDPA bus
*/
int vdpa_register_device(struct vdpa_device *vdev, int nvqs)
{
* @drv: the vdpa device driver to be registered
* @owner: module owner of the driver
*
- * Returns an err when fail to do the registration
+ * Return: Returns an err when fail to do the registration
*/
int __vdpa_register_driver(struct vdpa_driver *drv, struct module *owner)
{
* @mdev: Pointer to vdpa management device
* vdpa_mgmtdev_register() register a vdpa management device which supports
* vdpa device management.
+ * Return: Returns 0 on success or failure when required callback ops are not
+ * initialized.
*/
int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev)
{
# SPDX-License-Identifier: GPL-2.0
obj-$(CONFIG_VDPA_SIM) += vdpa_sim.o
obj-$(CONFIG_VDPA_SIM_NET) += vdpa_sim_net.o
+obj-$(CONFIG_VDPA_SIM_BLOCK) += vdpa_sim_blk.o
#include <linux/vringh.h>
#include <linux/vdpa.h>
#include <linux/vhost_iotlb.h>
+#include <linux/iova.h>
#include "vdpa_sim.h"
return perm;
}
+static dma_addr_t vdpasim_map_range(struct vdpasim *vdpasim, phys_addr_t paddr,
+ size_t size, unsigned int perm)
+{
+ struct iova *iova;
+ dma_addr_t dma_addr;
+ int ret;
+
+ /* We set the limit_pfn to the maximum (ULONG_MAX - 1) */
+ iova = alloc_iova(&vdpasim->iova, size, ULONG_MAX - 1, true);
+ if (!iova)
+ return DMA_MAPPING_ERROR;
+
+ dma_addr = iova_dma_addr(&vdpasim->iova, iova);
+
+ spin_lock(&vdpasim->iommu_lock);
+ ret = vhost_iotlb_add_range(vdpasim->iommu, (u64)dma_addr,
+ (u64)dma_addr + size - 1, (u64)paddr, perm);
+ spin_unlock(&vdpasim->iommu_lock);
+
+ if (ret) {
+ __free_iova(&vdpasim->iova, iova);
+ return DMA_MAPPING_ERROR;
+ }
+
+ return dma_addr;
+}
+
+static void vdpasim_unmap_range(struct vdpasim *vdpasim, dma_addr_t dma_addr,
+ size_t size)
+{
+ spin_lock(&vdpasim->iommu_lock);
+ vhost_iotlb_del_range(vdpasim->iommu, (u64)dma_addr,
+ (u64)dma_addr + size - 1);
+ spin_unlock(&vdpasim->iommu_lock);
+
+ free_iova(&vdpasim->iova, iova_pfn(&vdpasim->iova, dma_addr));
+}
+
static dma_addr_t vdpasim_map_page(struct device *dev, struct page *page,
unsigned long offset, size_t size,
enum dma_data_direction dir,
unsigned long attrs)
{
struct vdpasim *vdpasim = dev_to_sim(dev);
- struct vhost_iotlb *iommu = vdpasim->iommu;
- u64 pa = (page_to_pfn(page) << PAGE_SHIFT) + offset;
- int ret, perm = dir_to_perm(dir);
+ phys_addr_t paddr = page_to_phys(page) + offset;
+ int perm = dir_to_perm(dir);
if (perm < 0)
return DMA_MAPPING_ERROR;
- /* For simplicity, use identical mapping to avoid e.g iova
- * allocator.
- */
- spin_lock(&vdpasim->iommu_lock);
- ret = vhost_iotlb_add_range(iommu, pa, pa + size - 1,
- pa, dir_to_perm(dir));
- spin_unlock(&vdpasim->iommu_lock);
- if (ret)
- return DMA_MAPPING_ERROR;
-
- return (dma_addr_t)(pa);
+ return vdpasim_map_range(vdpasim, paddr, size, perm);
}
static void vdpasim_unmap_page(struct device *dev, dma_addr_t dma_addr,
unsigned long attrs)
{
struct vdpasim *vdpasim = dev_to_sim(dev);
- struct vhost_iotlb *iommu = vdpasim->iommu;
- spin_lock(&vdpasim->iommu_lock);
- vhost_iotlb_del_range(iommu, (u64)dma_addr,
- (u64)dma_addr + size - 1);
- spin_unlock(&vdpasim->iommu_lock);
+ vdpasim_unmap_range(vdpasim, dma_addr, size);
}
static void *vdpasim_alloc_coherent(struct device *dev, size_t size,
unsigned long attrs)
{
struct vdpasim *vdpasim = dev_to_sim(dev);
- struct vhost_iotlb *iommu = vdpasim->iommu;
- void *addr = kmalloc(size, flag);
- int ret;
+ phys_addr_t paddr;
+ void *addr;
- spin_lock(&vdpasim->iommu_lock);
+ addr = kmalloc(size, flag);
if (!addr) {
*dma_addr = DMA_MAPPING_ERROR;
- } else {
- u64 pa = virt_to_phys(addr);
-
- ret = vhost_iotlb_add_range(iommu, (u64)pa,
- (u64)pa + size - 1,
- pa, VHOST_MAP_RW);
- if (ret) {
- *dma_addr = DMA_MAPPING_ERROR;
- kfree(addr);
- addr = NULL;
- } else
- *dma_addr = (dma_addr_t)pa;
+ return NULL;
+ }
+
+ paddr = virt_to_phys(addr);
+
+ *dma_addr = vdpasim_map_range(vdpasim, paddr, size, VHOST_MAP_RW);
+ if (*dma_addr == DMA_MAPPING_ERROR) {
+ kfree(addr);
+ return NULL;
}
- spin_unlock(&vdpasim->iommu_lock);
return addr;
}
unsigned long attrs)
{
struct vdpasim *vdpasim = dev_to_sim(dev);
- struct vhost_iotlb *iommu = vdpasim->iommu;
- spin_lock(&vdpasim->iommu_lock);
- vhost_iotlb_del_range(iommu, (u64)dma_addr,
- (u64)dma_addr + size - 1);
- spin_unlock(&vdpasim->iommu_lock);
+ vdpasim_unmap_range(vdpasim, dma_addr, size);
- kfree(phys_to_virt((uintptr_t)dma_addr));
+ kfree(vaddr);
}
static const struct dma_map_ops vdpasim_dma_ops = {
goto err_iommu;
for (i = 0; i < dev_attr->nvqs; i++)
- vringh_set_iotlb(&vdpasim->vqs[i].vring, vdpasim->iommu);
+ vringh_set_iotlb(&vdpasim->vqs[i].vring, vdpasim->iommu,
+ &vdpasim->iommu_lock);
+
+ ret = iova_cache_get();
+ if (ret)
+ goto err_iommu;
+
+ /* For simplicity we use an IOVA allocator with byte granularity */
+ init_iova_domain(&vdpasim->iova, 1, 0);
vdpasim->vdpa.dma_dev = dev;
spin_unlock(&vdpasim->lock);
}
+static size_t vdpasim_get_config_size(struct vdpa_device *vdpa)
+{
+ struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+
+ return vdpasim->dev_attr.config_size;
+}
+
static void vdpasim_get_config(struct vdpa_device *vdpa, unsigned int offset,
void *buf, unsigned int len)
{
static void vdpasim_free(struct vdpa_device *vdpa)
{
struct vdpasim *vdpasim = vdpa_to_sim(vdpa);
+ int i;
cancel_work_sync(&vdpasim->work);
+
+ for (i = 0; i < vdpasim->dev_attr.nvqs; i++) {
+ vringh_kiov_cleanup(&vdpasim->vqs[i].out_iov);
+ vringh_kiov_cleanup(&vdpasim->vqs[i].in_iov);
+ }
+
+ put_iova_domain(&vdpasim->iova);
+ iova_cache_put();
kvfree(vdpasim->buffer);
if (vdpasim->iommu)
vhost_iotlb_free(vdpasim->iommu);
.get_vendor_id = vdpasim_get_vendor_id,
.get_status = vdpasim_get_status,
.set_status = vdpasim_set_status,
+ .get_config_size = vdpasim_get_config_size,
.get_config = vdpasim_get_config,
.set_config = vdpasim_set_config,
.get_generation = vdpasim_get_generation,
.get_vendor_id = vdpasim_get_vendor_id,
.get_status = vdpasim_get_status,
.set_status = vdpasim_set_status,
+ .get_config_size = vdpasim_get_config_size,
.get_config = vdpasim_get_config,
.set_config = vdpasim_set_config,
.get_generation = vdpasim_get_generation,
#ifndef _VDPA_SIM_H
#define _VDPA_SIM_H
+#include <linux/iova.h>
#include <linux/vringh.h>
#include <linux/vdpa.h>
#include <linux/virtio_byteorder.h>
/* virtio config according to device type */
void *config;
struct vhost_iotlb *iommu;
+ struct iova_domain iova;
void *buffer;
u32 status;
u32 generation;
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VDPA simulator for block device.
+ *
+ * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2021, Red Hat Inc. All rights reserved.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/blkdev.h>
+#include <linux/vringh.h>
+#include <linux/vdpa.h>
+#include <linux/blkdev.h>
+#include <uapi/linux/virtio_blk.h>
+
+#include "vdpa_sim.h"
+
+#define DRV_VERSION "0.1"
+#define DRV_AUTHOR "Max Gurtovoy <mgurtovoy@nvidia.com>"
+#define DRV_DESC "vDPA Device Simulator for block device"
+#define DRV_LICENSE "GPL v2"
+
+#define VDPASIM_BLK_FEATURES (VDPASIM_FEATURES | \
+ (1ULL << VIRTIO_BLK_F_SIZE_MAX) | \
+ (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
+ (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
+ (1ULL << VIRTIO_BLK_F_TOPOLOGY) | \
+ (1ULL << VIRTIO_BLK_F_MQ))
+
+#define VDPASIM_BLK_CAPACITY 0x40000
+#define VDPASIM_BLK_SIZE_MAX 0x1000
+#define VDPASIM_BLK_SEG_MAX 32
+#define VDPASIM_BLK_VQ_NUM 1
+
+static char vdpasim_blk_id[VIRTIO_BLK_ID_BYTES] = "vdpa_blk_sim";
+
+static bool vdpasim_blk_check_range(u64 start_sector, size_t range_size)
+{
+ u64 range_sectors = range_size >> SECTOR_SHIFT;
+
+ if (range_size > VDPASIM_BLK_SIZE_MAX * VDPASIM_BLK_SEG_MAX)
+ return false;
+
+ if (start_sector > VDPASIM_BLK_CAPACITY)
+ return false;
+
+ if (range_sectors > VDPASIM_BLK_CAPACITY - start_sector)
+ return false;
+
+ return true;
+}
+
+/* Returns 'true' if the request is handled (with or without an I/O error)
+ * and the status is correctly written in the last byte of the 'in iov',
+ * 'false' otherwise.
+ */
+static bool vdpasim_blk_handle_req(struct vdpasim *vdpasim,
+ struct vdpasim_virtqueue *vq)
+{
+ size_t pushed = 0, to_pull, to_push;
+ struct virtio_blk_outhdr hdr;
+ ssize_t bytes;
+ loff_t offset;
+ u64 sector;
+ u8 status;
+ u32 type;
+ int ret;
+
+ ret = vringh_getdesc_iotlb(&vq->vring, &vq->out_iov, &vq->in_iov,
+ &vq->head, GFP_ATOMIC);
+ if (ret != 1)
+ return false;
+
+ if (vq->out_iov.used < 1 || vq->in_iov.used < 1) {
+ dev_err(&vdpasim->vdpa.dev, "missing headers - out_iov: %u in_iov %u\n",
+ vq->out_iov.used, vq->in_iov.used);
+ return false;
+ }
+
+ if (vq->in_iov.iov[vq->in_iov.used - 1].iov_len < 1) {
+ dev_err(&vdpasim->vdpa.dev, "request in header too short\n");
+ return false;
+ }
+
+ /* The last byte is the status and we checked if the last iov has
+ * enough room for it.
+ */
+ to_push = vringh_kiov_length(&vq->in_iov) - 1;
+
+ to_pull = vringh_kiov_length(&vq->out_iov);
+
+ bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov, &hdr,
+ sizeof(hdr));
+ if (bytes != sizeof(hdr)) {
+ dev_err(&vdpasim->vdpa.dev, "request out header too short\n");
+ return false;
+ }
+
+ to_pull -= bytes;
+
+ type = vdpasim32_to_cpu(vdpasim, hdr.type);
+ sector = vdpasim64_to_cpu(vdpasim, hdr.sector);
+ offset = sector << SECTOR_SHIFT;
+ status = VIRTIO_BLK_S_OK;
+
+ switch (type) {
+ case VIRTIO_BLK_T_IN:
+ if (!vdpasim_blk_check_range(sector, to_push)) {
+ dev_err(&vdpasim->vdpa.dev,
+ "reading over the capacity - offset: 0x%llx len: 0x%zx\n",
+ offset, to_push);
+ status = VIRTIO_BLK_S_IOERR;
+ break;
+ }
+
+ bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov,
+ vdpasim->buffer + offset,
+ to_push);
+ if (bytes < 0) {
+ dev_err(&vdpasim->vdpa.dev,
+ "vringh_iov_push_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
+ bytes, offset, to_push);
+ status = VIRTIO_BLK_S_IOERR;
+ break;
+ }
+
+ pushed += bytes;
+ break;
+
+ case VIRTIO_BLK_T_OUT:
+ if (!vdpasim_blk_check_range(sector, to_pull)) {
+ dev_err(&vdpasim->vdpa.dev,
+ "writing over the capacity - offset: 0x%llx len: 0x%zx\n",
+ offset, to_pull);
+ status = VIRTIO_BLK_S_IOERR;
+ break;
+ }
+
+ bytes = vringh_iov_pull_iotlb(&vq->vring, &vq->out_iov,
+ vdpasim->buffer + offset,
+ to_pull);
+ if (bytes < 0) {
+ dev_err(&vdpasim->vdpa.dev,
+ "vringh_iov_pull_iotlb() error: %zd offset: 0x%llx len: 0x%zx\n",
+ bytes, offset, to_pull);
+ status = VIRTIO_BLK_S_IOERR;
+ break;
+ }
+ break;
+
+ case VIRTIO_BLK_T_GET_ID:
+ bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov,
+ vdpasim_blk_id,
+ VIRTIO_BLK_ID_BYTES);
+ if (bytes < 0) {
+ dev_err(&vdpasim->vdpa.dev,
+ "vringh_iov_push_iotlb() error: %zd\n", bytes);
+ status = VIRTIO_BLK_S_IOERR;
+ break;
+ }
+
+ pushed += bytes;
+ break;
+
+ default:
+ dev_warn(&vdpasim->vdpa.dev,
+ "Unsupported request type %d\n", type);
+ status = VIRTIO_BLK_S_IOERR;
+ break;
+ }
+
+ /* If some operations fail, we need to skip the remaining bytes
+ * to put the status in the last byte
+ */
+ if (to_push - pushed > 0)
+ vringh_kiov_advance(&vq->in_iov, to_push - pushed);
+
+ /* Last byte is the status */
+ bytes = vringh_iov_push_iotlb(&vq->vring, &vq->in_iov, &status, 1);
+ if (bytes != 1)
+ return false;
+
+ pushed += bytes;
+
+ /* Make sure data is wrote before advancing index */
+ smp_wmb();
+
+ vringh_complete_iotlb(&vq->vring, vq->head, pushed);
+
+ return true;
+}
+
+static void vdpasim_blk_work(struct work_struct *work)
+{
+ struct vdpasim *vdpasim = container_of(work, struct vdpasim, work);
+ int i;
+
+ spin_lock(&vdpasim->lock);
+
+ if (!(vdpasim->status & VIRTIO_CONFIG_S_DRIVER_OK))
+ goto out;
+
+ for (i = 0; i < VDPASIM_BLK_VQ_NUM; i++) {
+ struct vdpasim_virtqueue *vq = &vdpasim->vqs[i];
+
+ if (!vq->ready)
+ continue;
+
+ while (vdpasim_blk_handle_req(vdpasim, vq)) {
+ /* Make sure used is visible before rasing the interrupt. */
+ smp_wmb();
+
+ local_bh_disable();
+ if (vringh_need_notify_iotlb(&vq->vring) > 0)
+ vringh_notify(&vq->vring);
+ local_bh_enable();
+ }
+ }
+out:
+ spin_unlock(&vdpasim->lock);
+}
+
+static void vdpasim_blk_get_config(struct vdpasim *vdpasim, void *config)
+{
+ struct virtio_blk_config *blk_config = config;
+
+ memset(config, 0, sizeof(struct virtio_blk_config));
+
+ blk_config->capacity = cpu_to_vdpasim64(vdpasim, VDPASIM_BLK_CAPACITY);
+ blk_config->size_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SIZE_MAX);
+ blk_config->seg_max = cpu_to_vdpasim32(vdpasim, VDPASIM_BLK_SEG_MAX);
+ blk_config->num_queues = cpu_to_vdpasim16(vdpasim, VDPASIM_BLK_VQ_NUM);
+ blk_config->min_io_size = cpu_to_vdpasim16(vdpasim, 1);
+ blk_config->opt_io_size = cpu_to_vdpasim32(vdpasim, 1);
+ blk_config->blk_size = cpu_to_vdpasim32(vdpasim, SECTOR_SIZE);
+}
+
+static void vdpasim_blk_mgmtdev_release(struct device *dev)
+{
+}
+
+static struct device vdpasim_blk_mgmtdev = {
+ .init_name = "vdpasim_blk",
+ .release = vdpasim_blk_mgmtdev_release,
+};
+
+static int vdpasim_blk_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
+{
+ struct vdpasim_dev_attr dev_attr = {};
+ struct vdpasim *simdev;
+ int ret;
+
+ dev_attr.mgmt_dev = mdev;
+ dev_attr.name = name;
+ dev_attr.id = VIRTIO_ID_BLOCK;
+ dev_attr.supported_features = VDPASIM_BLK_FEATURES;
+ dev_attr.nvqs = VDPASIM_BLK_VQ_NUM;
+ dev_attr.config_size = sizeof(struct virtio_blk_config);
+ dev_attr.get_config = vdpasim_blk_get_config;
+ dev_attr.work_fn = vdpasim_blk_work;
+ dev_attr.buffer_size = VDPASIM_BLK_CAPACITY << SECTOR_SHIFT;
+
+ simdev = vdpasim_create(&dev_attr);
+ if (IS_ERR(simdev))
+ return PTR_ERR(simdev);
+
+ ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_BLK_VQ_NUM);
+ if (ret)
+ goto put_dev;
+
+ return 0;
+
+put_dev:
+ put_device(&simdev->vdpa.dev);
+ return ret;
+}
+
+static void vdpasim_blk_dev_del(struct vdpa_mgmt_dev *mdev,
+ struct vdpa_device *dev)
+{
+ struct vdpasim *simdev = container_of(dev, struct vdpasim, vdpa);
+
+ _vdpa_unregister_device(&simdev->vdpa);
+}
+
+static const struct vdpa_mgmtdev_ops vdpasim_blk_mgmtdev_ops = {
+ .dev_add = vdpasim_blk_dev_add,
+ .dev_del = vdpasim_blk_dev_del
+};
+
+static struct virtio_device_id id_table[] = {
+ { VIRTIO_ID_BLOCK, VIRTIO_DEV_ANY_ID },
+ { 0 },
+};
+
+static struct vdpa_mgmt_dev mgmt_dev = {
+ .device = &vdpasim_blk_mgmtdev,
+ .id_table = id_table,
+ .ops = &vdpasim_blk_mgmtdev_ops,
+};
+
+static int __init vdpasim_blk_init(void)
+{
+ int ret;
+
+ ret = device_register(&vdpasim_blk_mgmtdev);
+ if (ret)
+ return ret;
+
+ ret = vdpa_mgmtdev_register(&mgmt_dev);
+ if (ret)
+ goto parent_err;
+
+ return 0;
+
+parent_err:
+ device_unregister(&vdpasim_blk_mgmtdev);
+ return ret;
+}
+
+static void __exit vdpasim_blk_exit(void)
+{
+ vdpa_mgmtdev_unregister(&mgmt_dev);
+ device_unregister(&vdpasim_blk_mgmtdev);
+}
+
+module_init(vdpasim_blk_init)
+module_exit(vdpasim_blk_exit)
+
+MODULE_VERSION(DRV_VERSION);
+MODULE_LICENSE(DRV_LICENSE);
+MODULE_AUTHOR(DRV_AUTHOR);
+MODULE_DESCRIPTION(DRV_DESC);
--- /dev/null
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_VP_VDPA) += vp_vdpa.o
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vDPA bridge driver for modern virtio-pci device
+ *
+ * Copyright (c) 2020, Red Hat Inc. All rights reserved.
+ * Author: Jason Wang <jasowang@redhat.com>
+ *
+ * Based on virtio_pci_modern.c.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/vdpa.h>
+#include <linux/virtio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ring.h>
+#include <linux/virtio_pci.h>
+#include <linux/virtio_pci_modern.h>
+
+#define VP_VDPA_QUEUE_MAX 256
+#define VP_VDPA_DRIVER_NAME "vp_vdpa"
+#define VP_VDPA_NAME_SIZE 256
+
+struct vp_vring {
+ void __iomem *notify;
+ char msix_name[VP_VDPA_NAME_SIZE];
+ struct vdpa_callback cb;
+ resource_size_t notify_pa;
+ int irq;
+};
+
+struct vp_vdpa {
+ struct vdpa_device vdpa;
+ struct virtio_pci_modern_device mdev;
+ struct vp_vring *vring;
+ struct vdpa_callback config_cb;
+ char msix_name[VP_VDPA_NAME_SIZE];
+ int config_irq;
+ int queues;
+ int vectors;
+};
+
+static struct vp_vdpa *vdpa_to_vp(struct vdpa_device *vdpa)
+{
+ return container_of(vdpa, struct vp_vdpa, vdpa);
+}
+
+static struct virtio_pci_modern_device *vdpa_to_mdev(struct vdpa_device *vdpa)
+{
+ struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
+
+ return &vp_vdpa->mdev;
+}
+
+static u64 vp_vdpa_get_features(struct vdpa_device *vdpa)
+{
+ struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
+
+ return vp_modern_get_features(mdev);
+}
+
+static int vp_vdpa_set_features(struct vdpa_device *vdpa, u64 features)
+{
+ struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
+
+ vp_modern_set_features(mdev, features);
+
+ return 0;
+}
+
+static u8 vp_vdpa_get_status(struct vdpa_device *vdpa)
+{
+ struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
+
+ return vp_modern_get_status(mdev);
+}
+
+static void vp_vdpa_free_irq(struct vp_vdpa *vp_vdpa)
+{
+ struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev;
+ struct pci_dev *pdev = mdev->pci_dev;
+ int i;
+
+ for (i = 0; i < vp_vdpa->queues; i++) {
+ if (vp_vdpa->vring[i].irq != VIRTIO_MSI_NO_VECTOR) {
+ vp_modern_queue_vector(mdev, i, VIRTIO_MSI_NO_VECTOR);
+ devm_free_irq(&pdev->dev, vp_vdpa->vring[i].irq,
+ &vp_vdpa->vring[i]);
+ vp_vdpa->vring[i].irq = VIRTIO_MSI_NO_VECTOR;
+ }
+ }
+
+ if (vp_vdpa->config_irq != VIRTIO_MSI_NO_VECTOR) {
+ vp_modern_config_vector(mdev, VIRTIO_MSI_NO_VECTOR);
+ devm_free_irq(&pdev->dev, vp_vdpa->config_irq, vp_vdpa);
+ vp_vdpa->config_irq = VIRTIO_MSI_NO_VECTOR;
+ }
+
+ if (vp_vdpa->vectors) {
+ pci_free_irq_vectors(pdev);
+ vp_vdpa->vectors = 0;
+ }
+}
+
+static irqreturn_t vp_vdpa_vq_handler(int irq, void *arg)
+{
+ struct vp_vring *vring = arg;
+
+ if (vring->cb.callback)
+ return vring->cb.callback(vring->cb.private);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t vp_vdpa_config_handler(int irq, void *arg)
+{
+ struct vp_vdpa *vp_vdpa = arg;
+
+ if (vp_vdpa->config_cb.callback)
+ return vp_vdpa->config_cb.callback(vp_vdpa->config_cb.private);
+
+ return IRQ_HANDLED;
+}
+
+static int vp_vdpa_request_irq(struct vp_vdpa *vp_vdpa)
+{
+ struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev;
+ struct pci_dev *pdev = mdev->pci_dev;
+ int i, ret, irq;
+ int queues = vp_vdpa->queues;
+ int vectors = queues + 1;
+
+ ret = pci_alloc_irq_vectors(pdev, vectors, vectors, PCI_IRQ_MSIX);
+ if (ret != vectors) {
+ dev_err(&pdev->dev,
+ "vp_vdpa: fail to allocate irq vectors want %d but %d\n",
+ vectors, ret);
+ return ret;
+ }
+
+ vp_vdpa->vectors = vectors;
+
+ for (i = 0; i < queues; i++) {
+ snprintf(vp_vdpa->vring[i].msix_name, VP_VDPA_NAME_SIZE,
+ "vp-vdpa[%s]-%d\n", pci_name(pdev), i);
+ irq = pci_irq_vector(pdev, i);
+ ret = devm_request_irq(&pdev->dev, irq,
+ vp_vdpa_vq_handler,
+ 0, vp_vdpa->vring[i].msix_name,
+ &vp_vdpa->vring[i]);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "vp_vdpa: fail to request irq for vq %d\n", i);
+ goto err;
+ }
+ vp_modern_queue_vector(mdev, i, i);
+ vp_vdpa->vring[i].irq = irq;
+ }
+
+ snprintf(vp_vdpa->msix_name, VP_VDPA_NAME_SIZE, "vp-vdpa[%s]-config\n",
+ pci_name(pdev));
+ irq = pci_irq_vector(pdev, queues);
+ ret = devm_request_irq(&pdev->dev, irq, vp_vdpa_config_handler, 0,
+ vp_vdpa->msix_name, vp_vdpa);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "vp_vdpa: fail to request irq for vq %d\n", i);
+ goto err;
+ }
+ vp_modern_config_vector(mdev, queues);
+ vp_vdpa->config_irq = irq;
+
+ return 0;
+err:
+ vp_vdpa_free_irq(vp_vdpa);
+ return ret;
+}
+
+static void vp_vdpa_set_status(struct vdpa_device *vdpa, u8 status)
+{
+ struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
+ struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev;
+ u8 s = vp_vdpa_get_status(vdpa);
+
+ if (status & VIRTIO_CONFIG_S_DRIVER_OK &&
+ !(s & VIRTIO_CONFIG_S_DRIVER_OK)) {
+ vp_vdpa_request_irq(vp_vdpa);
+ }
+
+ vp_modern_set_status(mdev, status);
+
+ if (!(status & VIRTIO_CONFIG_S_DRIVER_OK) &&
+ (s & VIRTIO_CONFIG_S_DRIVER_OK))
+ vp_vdpa_free_irq(vp_vdpa);
+}
+
+static u16 vp_vdpa_get_vq_num_max(struct vdpa_device *vdpa)
+{
+ return VP_VDPA_QUEUE_MAX;
+}
+
+static int vp_vdpa_get_vq_state(struct vdpa_device *vdpa, u16 qid,
+ struct vdpa_vq_state *state)
+{
+ /* Note that this is not supported by virtio specification, so
+ * we return -EOPNOTSUPP here. This means we can't support live
+ * migration, vhost device start/stop.
+ */
+ return -EOPNOTSUPP;
+}
+
+static int vp_vdpa_set_vq_state(struct vdpa_device *vdpa, u16 qid,
+ const struct vdpa_vq_state *state)
+{
+ /* Note that this is not supported by virtio specification, so
+ * we return -ENOPOTSUPP here. This means we can't support live
+ * migration, vhost device start/stop.
+ */
+ return -EOPNOTSUPP;
+}
+
+static void vp_vdpa_set_vq_cb(struct vdpa_device *vdpa, u16 qid,
+ struct vdpa_callback *cb)
+{
+ struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
+
+ vp_vdpa->vring[qid].cb = *cb;
+}
+
+static void vp_vdpa_set_vq_ready(struct vdpa_device *vdpa,
+ u16 qid, bool ready)
+{
+ struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
+
+ vp_modern_set_queue_enable(mdev, qid, ready);
+}
+
+static bool vp_vdpa_get_vq_ready(struct vdpa_device *vdpa, u16 qid)
+{
+ struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
+
+ return vp_modern_get_queue_enable(mdev, qid);
+}
+
+static void vp_vdpa_set_vq_num(struct vdpa_device *vdpa, u16 qid,
+ u32 num)
+{
+ struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
+
+ vp_modern_set_queue_size(mdev, qid, num);
+}
+
+static int vp_vdpa_set_vq_address(struct vdpa_device *vdpa, u16 qid,
+ u64 desc_area, u64 driver_area,
+ u64 device_area)
+{
+ struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
+
+ vp_modern_queue_address(mdev, qid, desc_area,
+ driver_area, device_area);
+
+ return 0;
+}
+
+static void vp_vdpa_kick_vq(struct vdpa_device *vdpa, u16 qid)
+{
+ struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
+
+ vp_iowrite16(qid, vp_vdpa->vring[qid].notify);
+}
+
+static u32 vp_vdpa_get_generation(struct vdpa_device *vdpa)
+{
+ struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
+
+ return vp_modern_generation(mdev);
+}
+
+static u32 vp_vdpa_get_device_id(struct vdpa_device *vdpa)
+{
+ struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
+
+ return mdev->id.device;
+}
+
+static u32 vp_vdpa_get_vendor_id(struct vdpa_device *vdpa)
+{
+ struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
+
+ return mdev->id.vendor;
+}
+
+static u32 vp_vdpa_get_vq_align(struct vdpa_device *vdpa)
+{
+ return PAGE_SIZE;
+}
+
+static size_t vp_vdpa_get_config_size(struct vdpa_device *vdpa)
+{
+ struct virtio_pci_modern_device *mdev = vdpa_to_mdev(vdpa);
+
+ return mdev->device_len;
+}
+
+static void vp_vdpa_get_config(struct vdpa_device *vdpa,
+ unsigned int offset,
+ void *buf, unsigned int len)
+{
+ struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
+ struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev;
+ u8 old, new;
+ u8 *p;
+ int i;
+
+ do {
+ old = vp_ioread8(&mdev->common->config_generation);
+ p = buf;
+ for (i = 0; i < len; i++)
+ *p++ = vp_ioread8(mdev->device + offset + i);
+
+ new = vp_ioread8(&mdev->common->config_generation);
+ } while (old != new);
+}
+
+static void vp_vdpa_set_config(struct vdpa_device *vdpa,
+ unsigned int offset, const void *buf,
+ unsigned int len)
+{
+ struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
+ struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev;
+ const u8 *p = buf;
+ int i;
+
+ for (i = 0; i < len; i++)
+ vp_iowrite8(*p++, mdev->device + offset + i);
+}
+
+static void vp_vdpa_set_config_cb(struct vdpa_device *vdpa,
+ struct vdpa_callback *cb)
+{
+ struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
+
+ vp_vdpa->config_cb = *cb;
+}
+
+static struct vdpa_notification_area
+vp_vdpa_get_vq_notification(struct vdpa_device *vdpa, u16 qid)
+{
+ struct vp_vdpa *vp_vdpa = vdpa_to_vp(vdpa);
+ struct virtio_pci_modern_device *mdev = &vp_vdpa->mdev;
+ struct vdpa_notification_area notify;
+
+ notify.addr = vp_vdpa->vring[qid].notify_pa;
+ notify.size = mdev->notify_offset_multiplier;
+
+ return notify;
+}
+
+static const struct vdpa_config_ops vp_vdpa_ops = {
+ .get_features = vp_vdpa_get_features,
+ .set_features = vp_vdpa_set_features,
+ .get_status = vp_vdpa_get_status,
+ .set_status = vp_vdpa_set_status,
+ .get_vq_num_max = vp_vdpa_get_vq_num_max,
+ .get_vq_state = vp_vdpa_get_vq_state,
+ .get_vq_notification = vp_vdpa_get_vq_notification,
+ .set_vq_state = vp_vdpa_set_vq_state,
+ .set_vq_cb = vp_vdpa_set_vq_cb,
+ .set_vq_ready = vp_vdpa_set_vq_ready,
+ .get_vq_ready = vp_vdpa_get_vq_ready,
+ .set_vq_num = vp_vdpa_set_vq_num,
+ .set_vq_address = vp_vdpa_set_vq_address,
+ .kick_vq = vp_vdpa_kick_vq,
+ .get_generation = vp_vdpa_get_generation,
+ .get_device_id = vp_vdpa_get_device_id,
+ .get_vendor_id = vp_vdpa_get_vendor_id,
+ .get_vq_align = vp_vdpa_get_vq_align,
+ .get_config_size = vp_vdpa_get_config_size,
+ .get_config = vp_vdpa_get_config,
+ .set_config = vp_vdpa_set_config,
+ .set_config_cb = vp_vdpa_set_config_cb,
+};
+
+static void vp_vdpa_free_irq_vectors(void *data)
+{
+ pci_free_irq_vectors(data);
+}
+
+static int vp_vdpa_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+ struct virtio_pci_modern_device *mdev;
+ struct device *dev = &pdev->dev;
+ struct vp_vdpa *vp_vdpa;
+ int ret, i;
+
+ ret = pcim_enable_device(pdev);
+ if (ret)
+ return ret;
+
+ vp_vdpa = vdpa_alloc_device(struct vp_vdpa, vdpa,
+ dev, &vp_vdpa_ops, NULL);
+ if (vp_vdpa == NULL) {
+ dev_err(dev, "vp_vdpa: Failed to allocate vDPA structure\n");
+ return -ENOMEM;
+ }
+
+ mdev = &vp_vdpa->mdev;
+ mdev->pci_dev = pdev;
+
+ ret = vp_modern_probe(mdev);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to probe modern PCI device\n");
+ goto err;
+ }
+
+ pci_set_master(pdev);
+ pci_set_drvdata(pdev, vp_vdpa);
+
+ vp_vdpa->vdpa.dma_dev = &pdev->dev;
+ vp_vdpa->queues = vp_modern_get_num_queues(mdev);
+
+ ret = devm_add_action_or_reset(dev, vp_vdpa_free_irq_vectors, pdev);
+ if (ret) {
+ dev_err(&pdev->dev,
+ "Failed for adding devres for freeing irq vectors\n");
+ goto err;
+ }
+
+ vp_vdpa->vring = devm_kcalloc(&pdev->dev, vp_vdpa->queues,
+ sizeof(*vp_vdpa->vring),
+ GFP_KERNEL);
+ if (!vp_vdpa->vring) {
+ ret = -ENOMEM;
+ dev_err(&pdev->dev, "Fail to allocate virtqueues\n");
+ goto err;
+ }
+
+ for (i = 0; i < vp_vdpa->queues; i++) {
+ vp_vdpa->vring[i].irq = VIRTIO_MSI_NO_VECTOR;
+ vp_vdpa->vring[i].notify =
+ vp_modern_map_vq_notify(mdev, i,
+ &vp_vdpa->vring[i].notify_pa);
+ if (!vp_vdpa->vring[i].notify) {
+ dev_warn(&pdev->dev, "Fail to map vq notify %d\n", i);
+ goto err;
+ }
+ }
+ vp_vdpa->config_irq = VIRTIO_MSI_NO_VECTOR;
+
+ ret = vdpa_register_device(&vp_vdpa->vdpa, vp_vdpa->queues);
+ if (ret) {
+ dev_err(&pdev->dev, "Failed to register to vdpa bus\n");
+ goto err;
+ }
+
+ return 0;
+
+err:
+ put_device(&vp_vdpa->vdpa.dev);
+ return ret;
+}
+
+static void vp_vdpa_remove(struct pci_dev *pdev)
+{
+ struct vp_vdpa *vp_vdpa = pci_get_drvdata(pdev);
+
+ vdpa_unregister_device(&vp_vdpa->vdpa);
+ vp_modern_remove(&vp_vdpa->mdev);
+}
+
+static struct pci_driver vp_vdpa_driver = {
+ .name = "vp-vdpa",
+ .id_table = NULL, /* only dynamic ids */
+ .probe = vp_vdpa_probe,
+ .remove = vp_vdpa_remove,
+};
+
+module_pci_driver(vp_vdpa_driver);
+
+MODULE_AUTHOR("Jason Wang <jasowang@redhat.com>");
+MODULE_DESCRIPTION("vp-vdpa");
+MODULE_LICENSE("GPL");
+MODULE_VERSION("1");
#include <linux/cdev.h>
#include <linux/device.h>
#include <linux/mm.h>
+#include <linux/slab.h>
#include <linux/iommu.h>
#include <linux/uuid.h>
#include <linux/vdpa.h>
#include <linux/nospec.h>
#include <linux/vhost.h>
-#include <linux/virtio_net.h>
#include "vhost.h"
static int vhost_vdpa_config_validate(struct vhost_vdpa *v,
struct vhost_vdpa_config *c)
{
- long size = 0;
-
- switch (v->virtio_id) {
- case VIRTIO_ID_NET:
- size = sizeof(struct virtio_net_config);
- break;
- }
+ struct vdpa_device *vdpa = v->vdpa;
+ long size = vdpa->config->get_config_size(vdpa);
if (c->len == 0)
return -EINVAL;
if (vma->vm_end - vma->vm_start != notify.size)
return -ENOTSUPP;
+ vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
vma->vm_ops = &vhost_vdpa_vm_ops;
return 0;
}
int minor;
int r;
- /* Currently, we only accept the network devices. */
- if (ops->get_device_id(vdpa) != VIRTIO_ID_NET)
- return -ENOTSUPP;
-
v = kzalloc(sizeof(*v), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
if (!v)
return -ENOMEM;
return head;
}
+/**
+ * vringh_kiov_advance - skip bytes from vring_kiov
+ * @iov: an iov passed to vringh_getdesc_*() (updated as we consume)
+ * @len: the maximum length to advance
+ */
+void vringh_kiov_advance(struct vringh_kiov *iov, size_t len)
+{
+ while (len && iov->i < iov->used) {
+ size_t partlen = min(iov->iov[iov->i].iov_len, len);
+
+ iov->consumed += partlen;
+ iov->iov[iov->i].iov_len -= partlen;
+ iov->iov[iov->i].iov_base += partlen;
+
+ if (!iov->iov[iov->i].iov_len) {
+ /* Fix up old iov element then increment. */
+ iov->iov[iov->i].iov_len = iov->consumed;
+ iov->iov[iov->i].iov_base -= iov->consumed;
+
+ iov->consumed = 0;
+ iov->i++;
+ }
+
+ len -= partlen;
+ }
+}
+EXPORT_SYMBOL(vringh_kiov_advance);
+
/* Copy some bytes to/from the iovec. Returns num copied. */
static inline ssize_t vringh_iov_xfer(struct vringh *vrh,
struct vringh_kiov *iov,
done += partlen;
len -= partlen;
ptr += partlen;
- iov->consumed += partlen;
- iov->iov[iov->i].iov_len -= partlen;
- iov->iov[iov->i].iov_base += partlen;
- if (!iov->iov[iov->i].iov_len) {
- /* Fix up old iov element then increment. */
- iov->iov[iov->i].iov_len = iov->consumed;
- iov->iov[iov->i].iov_base -= iov->consumed;
-
-
- iov->consumed = 0;
- iov->i++;
- }
+ vringh_kiov_advance(iov, partlen);
}
return done;
}
return -EINVAL;
if (riov)
- riov->i = riov->used = 0;
+ riov->i = riov->used = riov->consumed = 0;
if (wiov)
- wiov->i = wiov->used = 0;
+ wiov->i = wiov->used = wiov->consumed = 0;
for (;;) {
void *addr;
* *head will be vrh->vring.num. You may be able to ignore an invalid
* descriptor, but there's not much you can do with an invalid ring.
*
- * Note that you may need to clean up riov and wiov, even on error!
+ * Note that you can reuse riov and wiov with subsequent calls. Content is
+ * overwritten and memory reallocated if more space is needed.
+ * When you don't have to use riov and wiov anymore, you should clean up them
+ * calling vringh_iov_cleanup() to release the memory, even on error!
*/
int vringh_getdesc_user(struct vringh *vrh,
struct vringh_iov *riov,
* *head will be vrh->vring.num. You may be able to ignore an invalid
* descriptor, but there's not much you can do with an invalid ring.
*
- * Note that you may need to clean up riov and wiov, even on error!
+ * Note that you can reuse riov and wiov with subsequent calls. Content is
+ * overwritten and memory reallocated if more space is needed.
+ * When you don't have to use riov and wiov anymore, you should clean up them
+ * calling vringh_kiov_cleanup() to release the memory, even on error!
*/
int vringh_getdesc_kern(struct vringh *vrh,
struct vringh_kiov *riov,
int ret = 0;
u64 s = 0;
+ spin_lock(vrh->iotlb_lock);
+
while (len > s) {
u64 size, pa, pfn;
++ret;
}
+ spin_unlock(vrh->iotlb_lock);
+
return ret;
}
* vringh_set_iotlb - initialize a vringh for a ring with IOTLB.
* @vrh: the vring
* @iotlb: iotlb associated with this vring
+ * @iotlb_lock: spinlock to synchronize the iotlb accesses
*/
-void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb)
+void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
+ spinlock_t *iotlb_lock)
{
vrh->iotlb = iotlb;
+ vrh->iotlb_lock = iotlb_lock;
}
EXPORT_SYMBOL(vringh_set_iotlb);
* *head will be vrh->vring.num. You may be able to ignore an invalid
* descriptor, but there's not much you can do with an invalid ring.
*
- * Note that you may need to clean up riov and wiov, even on error!
+ * Note that you can reuse riov and wiov with subsequent calls. Content is
+ * overwritten and memory reallocated if more space is needed.
+ * When you don't have to use riov and wiov anymore, you should clean up them
+ * calling vringh_kiov_cleanup() to release the memory, even on error!
*/
int vringh_getdesc_iotlb(struct vringh *vrh,
struct vringh_kiov *riov,
EXPORT_SYMBOL(matroxfb_unregister_driver);
EXPORT_SYMBOL(matroxfb_wait_for_sync);
EXPORT_SYMBOL(matroxfb_enable_irq);
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
-
MODULE_LICENSE("GPL");
module_init(vga16fb_init);
module_exit(vga16fb_exit);
-
-
-/*
- * Overrides for Emacs so that we follow Linus's tabbing style.
- * ---------------------------------------------------------------------------
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
-
* enclave file descriptor to be further used for enclave
* resources handling e.g. memory regions and CPUs.
* @ne_pci_dev : Private data associated with the PCI device.
- * @slot_uid: Generated unique slot id associated with an enclave.
+ * @slot_uid: User pointer to store the generated unique slot id
+ * associated with an enclave to.
*
* Context: Process context. This function is called with the ne_pci_dev enclave
* mutex held.
* * Enclave fd on success.
* * Negative return value on failure.
*/
-static int ne_create_vm_ioctl(struct ne_pci_dev *ne_pci_dev, u64 *slot_uid)
+static int ne_create_vm_ioctl(struct ne_pci_dev *ne_pci_dev, u64 __user *slot_uid)
{
struct ne_pci_dev_cmd_reply cmd_reply = {};
int enclave_fd = -1;
list_add(&ne_enclave->enclave_list_entry, &ne_pci_dev->enclaves_list);
- *slot_uid = ne_enclave->slot_uid;
+ if (copy_to_user(slot_uid, &ne_enclave->slot_uid, sizeof(ne_enclave->slot_uid))) {
+ /*
+ * As we're holding the only reference to 'enclave_file', fput()
+ * will call ne_enclave_release() which will do a proper cleanup
+ * of all so far allocated resources, leaving only the unused fd
+ * for us to free.
+ */
+ fput(enclave_file);
+ put_unused_fd(enclave_fd);
+
+ return -EFAULT;
+ }
fd_install(enclave_fd, enclave_file);
switch (cmd) {
case NE_CREATE_VM: {
int enclave_fd = -1;
- struct file *enclave_file = NULL;
struct ne_pci_dev *ne_pci_dev = ne_devs.ne_pci_dev;
- int rc = -EINVAL;
- u64 slot_uid = 0;
+ u64 __user *slot_uid = (void __user *)arg;
mutex_lock(&ne_pci_dev->enclaves_list_mutex);
-
- enclave_fd = ne_create_vm_ioctl(ne_pci_dev, &slot_uid);
- if (enclave_fd < 0) {
- rc = enclave_fd;
-
- mutex_unlock(&ne_pci_dev->enclaves_list_mutex);
-
- return rc;
- }
-
+ enclave_fd = ne_create_vm_ioctl(ne_pci_dev, slot_uid);
mutex_unlock(&ne_pci_dev->enclaves_list_mutex);
- if (copy_to_user((void __user *)arg, &slot_uid, sizeof(slot_uid))) {
- enclave_file = fget(enclave_fd);
- /* Decrement file refs to have release() called. */
- fput(enclave_file);
- fput(enclave_file);
- put_unused_fd(enclave_fd);
-
- return -EFAULT;
- }
-
return enclave_fd;
}
#ifdef CONFIG_BALLOON_COMPACTION
/*
* virtballoon_migratepage - perform the balloon page migration on behalf of
- * a compation thread. (called under page lock)
+ * a compaction thread. (called under page lock)
* @vb_dev_info: the balloon device
* @newpage: page that will replace the isolated page after migration finishes.
* @page : the isolated (old) page that is about to be migrated to newpage.
struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
struct virtqueue *vq;
- u16 num, off;
+ u16 num;
int err;
if (index >= vp_modern_get_num_queues(mdev))
return ERR_PTR(-EINVAL);
}
- /* get offset of notification word for this vq */
- off = vp_modern_get_queue_notify_off(mdev, index);
-
info->msix_vector = msix_vec;
/* create the vring */
virtqueue_get_avail_addr(vq),
virtqueue_get_used_addr(vq));
- if (mdev->notify_base) {
- /* offset should not wrap */
- if ((u64)off * mdev->notify_offset_multiplier + 2
- > mdev->notify_len) {
- dev_warn(&mdev->pci_dev->dev,
- "bad notification offset %u (x %u) "
- "for queue %u > %zd",
- off, mdev->notify_offset_multiplier,
- index, mdev->notify_len);
- err = -EINVAL;
- goto err_map_notify;
- }
- vq->priv = (void __force *)mdev->notify_base +
- off * mdev->notify_offset_multiplier;
- } else {
- vq->priv = (void __force *)vp_modern_map_capability(mdev,
- mdev->notify_map_cap, 2, 2,
- off * mdev->notify_offset_multiplier, 2,
- NULL);
- }
-
+ vq->priv = (void __force *)vp_modern_map_vq_notify(mdev, index, NULL);
if (!vq->priv) {
err = -ENOMEM;
goto err_map_notify;
* @start: start from the capability
* @size: map size
* @len: the length that is actually mapped
+ * @pa: physical address of the capability
*
* Returns the io address of for the part of the capability
*/
-void __iomem *vp_modern_map_capability(struct virtio_pci_modern_device *mdev, int off,
- size_t minlen,
- u32 align,
- u32 start, u32 size,
- size_t *len)
+static void __iomem *
+vp_modern_map_capability(struct virtio_pci_modern_device *mdev, int off,
+ size_t minlen, u32 align, u32 start, u32 size,
+ size_t *len, resource_size_t *pa)
{
struct pci_dev *dev = mdev->pci_dev;
u8 bar;
dev_err(&dev->dev,
"virtio_pci: unable to map virtio %u@%u on bar %i\n",
length, offset, bar);
+ else if (pa)
+ *pa = pci_resource_start(dev, bar) + offset;
+
return p;
}
-EXPORT_SYMBOL_GPL(vp_modern_map_capability);
/**
* virtio_pci_find_capability - walk capabilities to find device info.
mdev->common = vp_modern_map_capability(mdev, common,
sizeof(struct virtio_pci_common_cfg), 4,
0, sizeof(struct virtio_pci_common_cfg),
- NULL);
+ NULL, NULL);
if (!mdev->common)
goto err_map_common;
mdev->isr = vp_modern_map_capability(mdev, isr, sizeof(u8), 1,
0, 1,
- NULL);
+ NULL, NULL);
if (!mdev->isr)
goto err_map_isr;
mdev->notify_base = vp_modern_map_capability(mdev, notify,
2, 2,
0, notify_length,
- &mdev->notify_len);
+ &mdev->notify_len,
+ &mdev->notify_pa);
if (!mdev->notify_base)
goto err_map_notify;
} else {
if (device) {
mdev->device = vp_modern_map_capability(mdev, device, 0, 4,
0, PAGE_SIZE,
- &mdev->device_len);
+ &mdev->device_len,
+ NULL);
if (!mdev->device)
goto err_map_device;
}
*
* Returns the notification offset for a virtqueue
*/
-u16 vp_modern_get_queue_notify_off(struct virtio_pci_modern_device *mdev,
- u16 index)
+static u16 vp_modern_get_queue_notify_off(struct virtio_pci_modern_device *mdev,
+ u16 index)
{
vp_iowrite16(index, &mdev->common->queue_select);
return vp_ioread16(&mdev->common->queue_notify_off);
}
-EXPORT_SYMBOL_GPL(vp_modern_get_queue_notify_off);
+
+/*
+ * vp_modern_map_vq_notify - map notification area for a
+ * specific virtqueue
+ * @mdev: the modern virtio-pci device
+ * @index: the queue index
+ * @pa: the pointer to the physical address of the nofity area
+ *
+ * Returns the address of the notification area
+ */
+void __iomem *vp_modern_map_vq_notify(struct virtio_pci_modern_device *mdev,
+ u16 index, resource_size_t *pa)
+{
+ u16 off = vp_modern_get_queue_notify_off(mdev, index);
+
+ if (mdev->notify_base) {
+ /* offset should not wrap */
+ if ((u64)off * mdev->notify_offset_multiplier + 2
+ > mdev->notify_len) {
+ dev_warn(&mdev->pci_dev->dev,
+ "bad notification offset %u (x %u) "
+ "for queue %u > %zd",
+ off, mdev->notify_offset_multiplier,
+ index, mdev->notify_len);
+ return NULL;
+ }
+ if (pa)
+ *pa = mdev->notify_pa +
+ off * mdev->notify_offset_multiplier;
+ return mdev->notify_base + off * mdev->notify_offset_multiplier;
+ } else {
+ return vp_modern_map_capability(mdev,
+ mdev->notify_map_cap, 2, 2,
+ off * mdev->notify_offset_multiplier, 2,
+ NULL, pa);
+ }
+}
+EXPORT_SYMBOL_GPL(vp_modern_map_vq_notify);
MODULE_VERSION("0.1");
MODULE_DESCRIPTION("Modern Virtio PCI Device");
#include <trace/events/swiotlb.h>
#define MAX_DMA_BITS 32
-/*
- * Used to do a quick range check in swiotlb_tbl_unmap_single and
- * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
- * API.
- */
-static char *xen_io_tlb_start, *xen_io_tlb_end;
-static unsigned long xen_io_tlb_nslabs;
/*
* Quick lookup value of the bus address of the IOTLB.
*/
return xen_bus_to_phys(dev, dma_to_phys(dev, dma_addr));
}
-static inline dma_addr_t xen_virt_to_bus(struct device *dev, void *address)
-{
- return xen_phys_to_dma(dev, virt_to_phys(address));
-}
-
static inline int range_straddles_page_boundary(phys_addr_t p, size_t size)
{
unsigned long next_bfn, xen_pfn = XEN_PFN_DOWN(p);
* have the same virtual address as another address
* in our domain. Therefore _only_ check address within our domain.
*/
- if (pfn_valid(PFN_DOWN(paddr))) {
- return paddr >= virt_to_phys(xen_io_tlb_start) &&
- paddr < virt_to_phys(xen_io_tlb_end);
- }
+ if (pfn_valid(PFN_DOWN(paddr)))
+ return is_swiotlb_buffer(paddr);
return 0;
}
-static int
-xen_swiotlb_fixup(void *buf, size_t size, unsigned long nslabs)
+static int xen_swiotlb_fixup(void *buf, unsigned long nslabs)
{
int i, rc;
int dma_bits;
} while (i < nslabs);
return 0;
}
-static unsigned long xen_set_nslabs(unsigned long nr_tbl)
-{
- if (!nr_tbl) {
- xen_io_tlb_nslabs = (64 * 1024 * 1024 >> IO_TLB_SHIFT);
- xen_io_tlb_nslabs = ALIGN(xen_io_tlb_nslabs, IO_TLB_SEGSIZE);
- } else
- xen_io_tlb_nslabs = nr_tbl;
-
- return xen_io_tlb_nslabs << IO_TLB_SHIFT;
-}
enum xen_swiotlb_err {
XEN_SWIOTLB_UNKNOWN = 0,
}
return "";
}
-int __ref xen_swiotlb_init(int verbose, bool early)
+
+#define DEFAULT_NSLABS ALIGN(SZ_64M >> IO_TLB_SHIFT, IO_TLB_SEGSIZE)
+
+int __ref xen_swiotlb_init(void)
{
- unsigned long bytes, order;
- int rc = -ENOMEM;
enum xen_swiotlb_err m_ret = XEN_SWIOTLB_UNKNOWN;
- unsigned int repeat = 3;
+ unsigned long bytes = swiotlb_size_or_default();
+ unsigned long nslabs = bytes >> IO_TLB_SHIFT;
+ unsigned int order, repeat = 3;
+ int rc = -ENOMEM;
+ char *start;
- xen_io_tlb_nslabs = swiotlb_nr_tbl();
retry:
- bytes = xen_set_nslabs(xen_io_tlb_nslabs);
- order = get_order(xen_io_tlb_nslabs << IO_TLB_SHIFT);
-
- /*
- * IO TLB memory already allocated. Just use it.
- */
- if (io_tlb_start != 0) {
- xen_io_tlb_start = phys_to_virt(io_tlb_start);
- goto end;
- }
+ m_ret = XEN_SWIOTLB_ENOMEM;
+ order = get_order(bytes);
/*
* Get IO TLB memory from any location.
*/
- if (early) {
- xen_io_tlb_start = memblock_alloc(PAGE_ALIGN(bytes),
- PAGE_SIZE);
- if (!xen_io_tlb_start)
- panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
- __func__, PAGE_ALIGN(bytes), PAGE_SIZE);
- } else {
#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
- while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
- xen_io_tlb_start = (void *)xen_get_swiotlb_free_pages(order);
- if (xen_io_tlb_start)
- break;
- order--;
- }
- if (order != get_order(bytes)) {
- pr_warn("Warning: only able to allocate %ld MB for software IO TLB\n",
- (PAGE_SIZE << order) >> 20);
- xen_io_tlb_nslabs = SLABS_PER_PAGE << order;
- bytes = xen_io_tlb_nslabs << IO_TLB_SHIFT;
- }
+ while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
+ start = (void *)xen_get_swiotlb_free_pages(order);
+ if (start)
+ break;
+ order--;
}
- if (!xen_io_tlb_start) {
- m_ret = XEN_SWIOTLB_ENOMEM;
+ if (!start)
goto error;
+ if (order != get_order(bytes)) {
+ pr_warn("Warning: only able to allocate %ld MB for software IO TLB\n",
+ (PAGE_SIZE << order) >> 20);
+ nslabs = SLABS_PER_PAGE << order;
+ bytes = nslabs << IO_TLB_SHIFT;
}
+
/*
* And replace that memory with pages under 4GB.
*/
- rc = xen_swiotlb_fixup(xen_io_tlb_start,
- bytes,
- xen_io_tlb_nslabs);
+ rc = xen_swiotlb_fixup(start, nslabs);
if (rc) {
- if (early)
- memblock_free(__pa(xen_io_tlb_start),
- PAGE_ALIGN(bytes));
- else {
- free_pages((unsigned long)xen_io_tlb_start, order);
- xen_io_tlb_start = NULL;
- }
+ free_pages((unsigned long)start, order);
m_ret = XEN_SWIOTLB_EFIXUP;
goto error;
}
- if (early) {
- if (swiotlb_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs,
- verbose))
- panic("Cannot allocate SWIOTLB buffer");
- rc = 0;
- } else
- rc = swiotlb_late_init_with_tbl(xen_io_tlb_start, xen_io_tlb_nslabs);
-
-end:
- xen_io_tlb_end = xen_io_tlb_start + bytes;
- if (!rc)
- swiotlb_set_max_segment(PAGE_SIZE);
-
- return rc;
+ rc = swiotlb_late_init_with_tbl(start, nslabs);
+ if (rc)
+ return rc;
+ swiotlb_set_max_segment(PAGE_SIZE);
+ return 0;
error:
if (repeat--) {
- xen_io_tlb_nslabs = max(1024UL, /* Min is 2MB */
- (xen_io_tlb_nslabs >> 1));
+ /* Min is 2MB */
+ nslabs = max(1024UL, (nslabs >> 1));
pr_info("Lowering to %luMB\n",
- (xen_io_tlb_nslabs << IO_TLB_SHIFT) >> 20);
+ (nslabs << IO_TLB_SHIFT) >> 20);
goto retry;
}
pr_err("%s (rc:%d)\n", xen_swiotlb_error(m_ret), rc);
- if (early)
- panic("%s (rc:%d)", xen_swiotlb_error(m_ret), rc);
- else
- free_pages((unsigned long)xen_io_tlb_start, order);
+ free_pages((unsigned long)start, order);
return rc;
}
+#ifdef CONFIG_X86
+void __init xen_swiotlb_init_early(void)
+{
+ unsigned long bytes = swiotlb_size_or_default();
+ unsigned long nslabs = bytes >> IO_TLB_SHIFT;
+ unsigned int repeat = 3;
+ char *start;
+ int rc;
+
+retry:
+ /*
+ * Get IO TLB memory from any location.
+ */
+ start = memblock_alloc(PAGE_ALIGN(bytes), PAGE_SIZE);
+ if (!start)
+ panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
+ __func__, PAGE_ALIGN(bytes), PAGE_SIZE);
+
+ /*
+ * And replace that memory with pages under 4GB.
+ */
+ rc = xen_swiotlb_fixup(start, nslabs);
+ if (rc) {
+ memblock_free(__pa(start), PAGE_ALIGN(bytes));
+ if (repeat--) {
+ /* Min is 2MB */
+ nslabs = max(1024UL, (nslabs >> 1));
+ bytes = nslabs << IO_TLB_SHIFT;
+ pr_info("Lowering to %luMB\n", bytes >> 20);
+ goto retry;
+ }
+ panic("%s (rc:%d)", xen_swiotlb_error(XEN_SWIOTLB_EFIXUP), rc);
+ }
+
+ if (swiotlb_init_with_tbl(start, nslabs, false))
+ panic("Cannot allocate SWIOTLB buffer");
+ swiotlb_set_max_segment(PAGE_SIZE);
+}
+#endif /* CONFIG_X86 */
+
static void *
xen_swiotlb_alloc_coherent(struct device *hwdev, size_t size,
dma_addr_t *dma_handle, gfp_t flags,
* Ensure that the address returned is DMA'ble
*/
if (unlikely(!dma_capable(dev, dev_addr, size, true))) {
- swiotlb_tbl_unmap_single(dev, map, size, size, dir,
+ swiotlb_tbl_unmap_single(dev, map, size, dir,
attrs | DMA_ATTR_SKIP_CPU_SYNC);
return DMA_MAPPING_ERROR;
}
/* NOTE: We use dev_addr here, not paddr! */
if (is_xen_swiotlb_buffer(hwdev, dev_addr))
- swiotlb_tbl_unmap_single(hwdev, paddr, size, size, dir, attrs);
+ swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs);
}
static void
}
if (is_xen_swiotlb_buffer(dev, dma_addr))
- swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU);
+ swiotlb_sync_single_for_cpu(dev, paddr, size, dir);
}
static void
phys_addr_t paddr = xen_dma_to_phys(dev, dma_addr);
if (is_xen_swiotlb_buffer(dev, dma_addr))
- swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE);
+ swiotlb_sync_single_for_device(dev, paddr, size, dir);
if (!dev_is_dma_coherent(dev)) {
if (pfn_valid(PFN_DOWN(dma_to_phys(dev, dma_addr))))
static int
xen_swiotlb_dma_supported(struct device *hwdev, u64 mask)
{
- return xen_virt_to_bus(hwdev, xen_io_tlb_end - 1) <= mask;
+ return xen_phys_to_dma(hwdev, io_tlb_default_mem->end - 1) <= mask;
}
const struct dma_map_ops xen_swiotlb_dma_ops = {
NULL,
};
-static struct attribute_group v9fs_attr_group = {
+static const struct attribute_group v9fs_attr_group = {
.attrs = v9fs_attrs,
};
* to work.
*/
writeback_fid = v9fs_writeback_fid(file_dentry(file));
- if (IS_ERR(fid)) {
- err = PTR_ERR(fid);
+ if (IS_ERR(writeback_fid)) {
+ err = PTR_ERR(writeback_fid);
mutex_unlock(&v9inode->v_mutex);
goto out_error;
}
If unsure, say N.
+config ARCH_SUPPORTS_HUGETLBFS
+ def_bool n
+
config HUGETLBFS
bool "HugeTLB file system support"
depends on X86 || IA64 || SPARC64 || (S390 && 64BIT) || \
- SYS_SUPPORTS_HUGETLBFS || BROKEN
+ ARCH_SUPPORTS_HUGETLBFS || BROKEN
help
hugetlbfs is a filesystem backing for HugeTLB pages, based on
ramfs. For architectures that support it, say Y here and read
default y
config NFS_V4_2_SSC_HELPER
- tristate
- default y if NFS_V4=y || NFS_FS=y
+ bool
+ default y if NFS_V4_2
source "net/sunrpc/Kconfig"
source "fs/ceph/Kconfig"
config BINFMT_FLAT_OLD_ALWAYS_RAM
bool
+config BINFMT_FLAT_NO_DATA_START_OFFSET
+ bool
+
config BINFMT_FLAT_OLD
bool "Enable support for very old legacy flat binaries"
depends on BINFMT_FLAT
#define MAX_SHARED_LIBS (1)
#endif
+#ifdef CONFIG_BINFMT_FLAT_NO_DATA_START_OFFSET
+#define DATA_START_OFFSET_WORDS (0)
+#else
+#define DATA_START_OFFSET_WORDS (MAX_SHARED_LIBS)
+#endif
+
struct lib_info {
struct {
unsigned long start_code; /* Start of text segment */
goto err;
}
- len = data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long);
+ len = data_len + extra +
+ DATA_START_OFFSET_WORDS * sizeof(unsigned long);
len = PAGE_ALIGN(len);
realdatastart = vm_mmap(NULL, 0, len,
PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0);
goto err;
}
datapos = ALIGN(realdatastart +
- MAX_SHARED_LIBS * sizeof(unsigned long),
+ DATA_START_OFFSET_WORDS * sizeof(unsigned long),
FLAT_DATA_ALIGN);
pr_debug("Allocated data+bss+stack (%u bytes): %lx\n",
memp_size = len;
} else {
- len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(u32);
+ len = text_len + data_len + extra +
+ DATA_START_OFFSET_WORDS * sizeof(u32);
len = PAGE_ALIGN(len);
textpos = vm_mmap(NULL, 0, len,
PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0);
realdatastart = textpos + ntohl(hdr->data_start);
datapos = ALIGN(realdatastart +
- MAX_SHARED_LIBS * sizeof(u32),
+ DATA_START_OFFSET_WORDS * sizeof(u32),
FLAT_DATA_ALIGN);
reloc = (__be32 __user *)
ret = result;
pr_err("Unable to read code+data+bss, errno %d\n", ret);
vm_munmap(textpos, text_len + data_len + extra +
- MAX_SHARED_LIBS * sizeof(u32));
+ DATA_START_OFFSET_WORDS * sizeof(u32));
goto err;
}
}
{
struct address_space *mapping = bdev->bd_inode->i_mapping;
- if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
+ if (mapping_empty(mapping))
return;
invalidate_bh_lrus();
struct inode *bd_inode = bdev_file_inode(file);
loff_t size = i_size_read(bd_inode);
struct blk_plug plug;
+ size_t shorted = 0;
ssize_t ret;
if (bdev_read_only(I_BDEV(bd_inode)))
if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
return -EOPNOTSUPP;
- iov_iter_truncate(from, size - iocb->ki_pos);
+ size -= iocb->ki_pos;
+ if (iov_iter_count(from) > size) {
+ shorted = iov_iter_count(from) - size;
+ iov_iter_truncate(from, size);
+ }
blk_start_plug(&plug);
ret = __generic_file_write_iter(iocb, from);
if (ret > 0)
ret = generic_write_sync(iocb, ret);
+ iov_iter_reexpand(from, iov_iter_count(from) + shorted);
blk_finish_plug(&plug);
return ret;
}
struct inode *bd_inode = bdev_file_inode(file);
loff_t size = i_size_read(bd_inode);
loff_t pos = iocb->ki_pos;
+ size_t shorted = 0;
+ ssize_t ret;
if (pos >= size)
return 0;
size -= pos;
- iov_iter_truncate(to, size);
- return generic_file_read_iter(iocb, to);
+ if (iov_iter_count(to) > size) {
+ shorted = iov_iter_count(to) - size;
+ iov_iter_truncate(to, size);
+ }
+
+ ret = generic_file_read_iter(iocb, to);
+ iov_iter_reexpand(to, iov_iter_count(to) + shorted);
+ return ret;
}
EXPORT_SYMBOL_GPL(blkdev_read_iter);
free_extent_map(em);
if (page->index == end_index) {
- char *userpage;
size_t zero_offset = offset_in_page(isize);
if (zero_offset) {
int zeros;
zeros = PAGE_SIZE - zero_offset;
- userpage = kmap_atomic(page);
- memset(userpage + zero_offset, 0, zeros);
+ memzero_page(page, zero_offset, zeros);
flush_dcache_page(page);
- kunmap_atomic(userpage);
}
}
}
if (page->index == last_byte >> PAGE_SHIFT) {
- char *userpage;
size_t zero_offset = offset_in_page(last_byte);
if (zero_offset) {
iosize = PAGE_SIZE - zero_offset;
- userpage = kmap_atomic(page);
- memset(userpage + zero_offset, 0, iosize);
+ memzero_page(page, zero_offset, iosize);
flush_dcache_page(page);
- kunmap_atomic(userpage);
}
}
begin_page_read(fs_info, page);
u64 disk_bytenr;
if (cur >= last_byte) {
- char *userpage;
struct extent_state *cached = NULL;
iosize = PAGE_SIZE - pg_offset;
- userpage = kmap_atomic(page);
- memset(userpage + pg_offset, 0, iosize);
+ memzero_page(page, pg_offset, iosize);
flush_dcache_page(page);
- kunmap_atomic(userpage);
set_extent_uptodate(tree, cur, cur + iosize - 1,
&cached, GFP_NOFS);
unlock_extent_cached(tree, cur,
/* we've found a hole, just zero and go on */
if (block_start == EXTENT_MAP_HOLE) {
- char *userpage;
struct extent_state *cached = NULL;
- userpage = kmap_atomic(page);
- memset(userpage + pg_offset, 0, iosize);
+ memzero_page(page, pg_offset, iosize);
flush_dcache_page(page);
- kunmap_atomic(userpage);
set_extent_uptodate(tree, cur, cur + iosize - 1,
&cached, GFP_NOFS);
}
if (page->index == end_index) {
- char *userpage;
-
- userpage = kmap_atomic(page);
- memset(userpage + pg_offset, 0,
- PAGE_SIZE - pg_offset);
- kunmap_atomic(userpage);
+ memzero_page(page, pg_offset, PAGE_SIZE - pg_offset);
flush_dcache_page(page);
}
if (!ret) {
unsigned long offset = offset_in_page(total_compressed);
struct page *page = pages[nr_pages - 1];
- char *kaddr;
/* zero the tail end of the last page, we might be
* sending it down to disk
*/
- if (offset) {
- kaddr = kmap_atomic(page);
- memset(kaddr + offset, 0,
- PAGE_SIZE - offset);
- kunmap_atomic(kaddr);
- }
+ if (offset)
+ memzero_page(page, offset, PAGE_SIZE - offset);
will_compress = 1;
}
}
struct btrfs_ordered_extent *ordered;
struct extent_state *cached_state = NULL;
struct extent_changeset *data_reserved = NULL;
- char *kaddr;
bool only_release_metadata = false;
u32 blocksize = fs_info->sectorsize;
pgoff_t index = from >> PAGE_SHIFT;
if (offset != blocksize) {
if (!len)
len = blocksize - offset;
- kaddr = kmap(page);
if (front)
- memset(kaddr + (block_start - page_offset(page)),
- 0, offset);
+ memzero_page(page, (block_start - page_offset(page)),
+ offset);
else
- memset(kaddr + (block_start - page_offset(page)) + offset,
- 0, len);
+ memzero_page(page, (block_start - page_offset(page)) + offset,
+ len);
flush_dcache_page(page);
- kunmap(page);
}
ClearPageChecked(page);
set_page_dirty(page);
* cover that region here.
*/
- if (max_size + pg_offset < PAGE_SIZE) {
- char *map = kmap(page);
- memset(map + pg_offset + max_size, 0, PAGE_SIZE - max_size - pg_offset);
- kunmap(page);
- }
+ if (max_size + pg_offset < PAGE_SIZE)
+ memzero_page(page, pg_offset + max_size,
+ PAGE_SIZE - max_size - pg_offset);
kfree(tmp);
return ret;
}
struct btrfs_ordered_extent *ordered;
struct extent_state *cached_state = NULL;
struct extent_changeset *data_reserved = NULL;
- char *kaddr;
unsigned long zero_start;
loff_t size;
vm_fault_t ret;
zero_start = PAGE_SIZE;
if (zero_start != PAGE_SIZE) {
- kaddr = kmap(page);
- memset(kaddr + zero_start, 0, PAGE_SIZE - zero_start);
+ memzero_page(page, zero_start, PAGE_SIZE - zero_start);
flush_dcache_page(page);
- kunmap(page);
}
ClearPageChecked(page);
set_page_dirty(page);
* So what's in the range [500, 4095] corresponds to zeroes.
*/
if (datal < block_size) {
- char *map;
-
- map = kmap(page);
- memset(map + datal, 0, block_size - datal);
+ memzero_page(page, datal, block_size - datal);
flush_dcache_page(page);
- kunmap(page);
}
SetPageUptodate(page);
unsigned long bytes_left;
unsigned long total_out = 0;
unsigned long pg_offset = 0;
- char *kaddr;
destlen = min_t(unsigned long, destlen, PAGE_SIZE);
bytes_left = destlen;
* end of the inline extent (destlen) to the end of the page
*/
if (pg_offset < destlen) {
- kaddr = kmap_atomic(dest_page);
- memset(kaddr + pg_offset, 0, destlen - pg_offset);
- kunmap_atomic(kaddr);
+ memzero_page(dest_page, pg_offset, destlen - pg_offset);
}
return ret;
}
size_t ret2;
unsigned long total_out = 0;
unsigned long pg_offset = 0;
- char *kaddr;
stream = ZSTD_initDStream(
ZSTD_BTRFS_MAX_INPUT, workspace->mem, workspace->size);
ret = 0;
finish:
if (pg_offset < destlen) {
- kaddr = kmap_atomic(dest_page);
- memset(kaddr + pg_offset, 0, destlen - pg_offset);
- kunmap_atomic(kaddr);
+ memzero_page(dest_page, pg_offset, destlen - pg_offset);
}
return ret;
}
int i;
check_irqs_on();
+ /*
+ * the refcount of buffer_head in bh_lru prevents dropping the
+ * attached page(i.e., try_to_free_buffers) so it could cause
+ * failing page migration.
+ * Skip putting upcoming bh into bh_lru until migration is done.
+ */
+ if (lru_cache_disabled())
+ return;
+
bh_lru_lock();
b = this_cpu_ptr(&bh_lrus);
}
EXPORT_SYMBOL(__bread_gfp);
+static void __invalidate_bh_lrus(struct bh_lru *b)
+{
+ int i;
+
+ for (i = 0; i < BH_LRU_SIZE; i++) {
+ brelse(b->bhs[i]);
+ b->bhs[i] = NULL;
+ }
+}
/*
* invalidate_bh_lrus() is called rarely - but not only at unmount.
* This doesn't race because it runs in each cpu either in irq
static void invalidate_bh_lru(void *arg)
{
struct bh_lru *b = &get_cpu_var(bh_lrus);
- int i;
- for (i = 0; i < BH_LRU_SIZE; i++) {
- brelse(b->bhs[i]);
- b->bhs[i] = NULL;
- }
+ __invalidate_bh_lrus(b);
put_cpu_var(bh_lrus);
}
-static bool has_bh_in_lru(int cpu, void *dummy)
+bool has_bh_in_lru(int cpu, void *dummy)
{
struct bh_lru *b = per_cpu_ptr(&bh_lrus, cpu);
int i;
}
EXPORT_SYMBOL_GPL(invalidate_bh_lrus);
+void invalidate_bh_lrus_cpu(int cpu)
+{
+ struct bh_lru *b;
+
+ bh_lru_lock();
+ b = per_cpu_ptr(&bh_lrus, cpu);
+ __invalidate_bh_lrus(b);
+ bh_lru_unlock();
+}
+
void set_bh_page(struct buffer_head *bh,
struct page *page, unsigned long offset)
{
select LIBCRC32C
select CRYPTO_AES
select CRYPTO
+ select NETFS_SUPPORT
default n
help
Choose Y or M here to include support for mounting the
#include <linux/signal.h>
#include <linux/iversion.h>
#include <linux/ktime.h>
+#include <linux/netfs.h>
#include "super.h"
#include "mds_client.h"
(CONGESTION_ON_THRESH(congestion_kb) - \
(CONGESTION_ON_THRESH(congestion_kb) >> 2))
+static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
+ struct page *page, void **_fsdata);
+
static inline struct ceph_snap_context *page_snap_context(struct page *page)
{
if (PagePrivate(page))
* PagePrivate so that we get invalidatepage callback.
*/
BUG_ON(PagePrivate(page));
- page->private = (unsigned long)snapc;
- SetPagePrivate(page);
+ attach_page_private(page, snapc);
ret = __set_page_dirty_nobuffers(page);
WARN_ON(!PageLocked(page));
{
struct inode *inode;
struct ceph_inode_info *ci;
- struct ceph_snap_context *snapc = page_snap_context(page);
+ struct ceph_snap_context *snapc;
+
+ wait_on_page_fscache(page);
inode = page->mapping->host;
ci = ceph_inode(inode);
- if (offset != 0 || length != PAGE_SIZE) {
+ if (offset != 0 || length != thp_size(page)) {
dout("%p invalidatepage %p idx %lu partial dirty page %u~%u\n",
inode, page, page->index, offset, length);
return;
}
- ceph_invalidate_fscache_page(inode, page);
-
WARN_ON(!PageLocked(page));
if (!PagePrivate(page))
return;
dout("%p invalidatepage %p idx %lu full dirty page\n",
inode, page, page->index);
+ snapc = detach_page_private(page);
ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
ceph_put_snap_context(snapc);
- page->private = 0;
- ClearPagePrivate(page);
}
-static int ceph_releasepage(struct page *page, gfp_t g)
+static int ceph_releasepage(struct page *page, gfp_t gfp)
{
dout("%p releasepage %p idx %lu (%sdirty)\n", page->mapping->host,
page, page->index, PageDirty(page) ? "" : "not ");
- /* Can we release the page from the cache? */
- if (!ceph_release_fscache_page(page, g))
- return 0;
-
+ if (PageFsCache(page)) {
+ if (!(gfp & __GFP_DIRECT_RECLAIM) || !(gfp & __GFP_FS))
+ return 0;
+ wait_on_page_fscache(page);
+ }
return !PagePrivate(page);
}
-/* read a single page, without unlocking it. */
-static int ceph_do_readpage(struct file *filp, struct page *page)
+static void ceph_netfs_expand_readahead(struct netfs_read_request *rreq)
{
- struct inode *inode = file_inode(filp);
+ struct inode *inode = rreq->mapping->host;
struct ceph_inode_info *ci = ceph_inode(inode);
- struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
- struct ceph_osd_client *osdc = &fsc->client->osdc;
- struct ceph_osd_request *req;
- struct ceph_vino vino = ceph_vino(inode);
- int err = 0;
- u64 off = page_offset(page);
- u64 len = PAGE_SIZE;
-
- if (off >= i_size_read(inode)) {
- zero_user_segment(page, 0, PAGE_SIZE);
- SetPageUptodate(page);
- return 0;
- }
-
- if (ci->i_inline_version != CEPH_INLINE_NONE) {
- /*
- * Uptodate inline data should have been added
- * into page cache while getting Fcr caps.
- */
- if (off == 0)
- return -EINVAL;
- zero_user_segment(page, 0, PAGE_SIZE);
- SetPageUptodate(page);
- return 0;
- }
-
- err = ceph_readpage_from_fscache(inode, page);
- if (err == 0)
- return -EINPROGRESS;
-
- dout("readpage ino %llx.%llx file %p off %llu len %llu page %p index %lu\n",
- vino.ino, vino.snap, filp, off, len, page, page->index);
- req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len, 0, 1,
- CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ, NULL,
- ci->i_truncate_seq, ci->i_truncate_size,
- false);
- if (IS_ERR(req))
- return PTR_ERR(req);
+ struct ceph_file_layout *lo = &ci->i_layout;
+ u32 blockoff;
+ u64 blockno;
- osd_req_op_extent_osd_data_pages(req, 0, &page, len, 0, false, false);
+ /* Expand the start downward */
+ blockno = div_u64_rem(rreq->start, lo->stripe_unit, &blockoff);
+ rreq->start = blockno * lo->stripe_unit;
+ rreq->len += blockoff;
- err = ceph_osdc_start_request(osdc, req, false);
- if (!err)
- err = ceph_osdc_wait_request(osdc, req);
-
- ceph_update_read_latency(&fsc->mdsc->metric, req->r_start_latency,
- req->r_end_latency, err);
-
- ceph_osdc_put_request(req);
- dout("readpage result %d\n", err);
-
- if (err == -ENOENT)
- err = 0;
- if (err < 0) {
- ceph_fscache_readpage_cancel(inode, page);
- if (err == -EBLOCKLISTED)
- fsc->blocklisted = true;
- goto out;
- }
- if (err < PAGE_SIZE)
- /* zero fill remainder of page */
- zero_user_segment(page, err, PAGE_SIZE);
- else
- flush_dcache_page(page);
-
- SetPageUptodate(page);
- ceph_readpage_to_fscache(inode, page);
-
-out:
- return err < 0 ? err : 0;
+ /* Now, round up the length to the next block */
+ rreq->len = roundup(rreq->len, lo->stripe_unit);
}
-static int ceph_readpage(struct file *filp, struct page *page)
+static bool ceph_netfs_clamp_length(struct netfs_read_subrequest *subreq)
{
- int r = ceph_do_readpage(filp, page);
- if (r != -EINPROGRESS)
- unlock_page(page);
- else
- r = 0;
- return r;
+ struct inode *inode = subreq->rreq->mapping->host;
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ u64 objno, objoff;
+ u32 xlen;
+
+ /* Truncate the extent at the end of the current block */
+ ceph_calc_file_object_mapping(&ci->i_layout, subreq->start, subreq->len,
+ &objno, &objoff, &xlen);
+ subreq->len = min(xlen, fsc->mount_options->rsize);
+ return true;
}
-/*
- * Finish an async read(ahead) op.
- */
-static void finish_read(struct ceph_osd_request *req)
+static void finish_netfs_read(struct ceph_osd_request *req)
{
- struct inode *inode = req->r_inode;
- struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
- struct ceph_osd_data *osd_data;
- int rc = req->r_result <= 0 ? req->r_result : 0;
- int bytes = req->r_result >= 0 ? req->r_result : 0;
+ struct ceph_fs_client *fsc = ceph_inode_to_client(req->r_inode);
+ struct ceph_osd_data *osd_data = osd_req_op_extent_osd_data(req, 0);
+ struct netfs_read_subrequest *subreq = req->r_priv;
int num_pages;
- int i;
+ int err = req->r_result;
- dout("finish_read %p req %p rc %d bytes %d\n", inode, req, rc, bytes);
- if (rc == -EBLOCKLISTED)
- ceph_inode_to_client(inode)->blocklisted = true;
+ ceph_update_read_metrics(&fsc->mdsc->metric, req->r_start_latency,
+ req->r_end_latency, err);
- /* unlock all pages, zeroing any data we didn't read */
- osd_data = osd_req_op_extent_osd_data(req, 0);
- BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES);
- num_pages = calc_pages_for((u64)osd_data->alignment,
- (u64)osd_data->length);
- for (i = 0; i < num_pages; i++) {
- struct page *page = osd_data->pages[i];
-
- if (rc < 0 && rc != -ENOENT) {
- ceph_fscache_readpage_cancel(inode, page);
- goto unlock;
- }
- if (bytes < (int)PAGE_SIZE) {
- /* zero (remainder of) page */
- int s = bytes < 0 ? 0 : bytes;
- zero_user_segment(page, s, PAGE_SIZE);
- }
- dout("finish_read %p uptodate %p idx %lu\n", inode, page,
- page->index);
- flush_dcache_page(page);
- SetPageUptodate(page);
- ceph_readpage_to_fscache(inode, page);
-unlock:
- unlock_page(page);
- put_page(page);
- bytes -= PAGE_SIZE;
- }
+ dout("%s: result %d subreq->len=%zu i_size=%lld\n", __func__, req->r_result,
+ subreq->len, i_size_read(req->r_inode));
- ceph_update_read_latency(&fsc->mdsc->metric, req->r_start_latency,
- req->r_end_latency, rc);
+ /* no object means success but no data */
+ if (err == -ENOENT)
+ err = 0;
+ else if (err == -EBLOCKLISTED)
+ fsc->blocklisted = true;
+
+ if (err >= 0 && err < subreq->len)
+ __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+
+ netfs_subreq_terminated(subreq, err, true);
- kfree(osd_data->pages);
+ num_pages = calc_pages_for(osd_data->alignment, osd_data->length);
+ ceph_put_page_vector(osd_data->pages, num_pages, false);
+ iput(req->r_inode);
}
-/*
- * start an async read(ahead) operation. return nr_pages we submitted
- * a read for on success, or negative error code.
- */
-static int start_read(struct inode *inode, struct ceph_rw_context *rw_ctx,
- struct list_head *page_list, int max)
+static void ceph_netfs_issue_op(struct netfs_read_subrequest *subreq)
{
- struct ceph_osd_client *osdc =
- &ceph_inode_to_client(inode)->client->osdc;
+ struct netfs_read_request *rreq = subreq->rreq;
+ struct inode *inode = rreq->mapping->host;
struct ceph_inode_info *ci = ceph_inode(inode);
- struct page *page = lru_to_page(page_list);
- struct ceph_vino vino;
+ struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
struct ceph_osd_request *req;
- u64 off;
- u64 len;
- int i;
+ struct ceph_vino vino = ceph_vino(inode);
+ struct iov_iter iter;
struct page **pages;
- pgoff_t next_index;
- int nr_pages = 0;
- int got = 0;
- int ret = 0;
-
- if (!rw_ctx) {
- /* caller of readpages does not hold buffer and read caps
- * (fadvise, madvise and readahead cases) */
- int want = CEPH_CAP_FILE_CACHE;
- ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want,
- true, &got);
- if (ret < 0) {
- dout("start_read %p, error getting cap\n", inode);
- } else if (!(got & want)) {
- dout("start_read %p, no cache cap\n", inode);
- ret = 0;
- }
- if (ret <= 0) {
- if (got)
- ceph_put_cap_refs(ci, got);
- while (!list_empty(page_list)) {
- page = lru_to_page(page_list);
- list_del(&page->lru);
- put_page(page);
- }
- return ret;
- }
- }
-
- off = (u64) page_offset(page);
+ size_t page_off;
+ int err = 0;
+ u64 len = subreq->len;
- /* count pages */
- next_index = page->index;
- list_for_each_entry_reverse(page, page_list, lru) {
- if (page->index != next_index)
- break;
- nr_pages++;
- next_index++;
- if (max && nr_pages == max)
- break;
- }
- len = nr_pages << PAGE_SHIFT;
- dout("start_read %p nr_pages %d is %lld~%lld\n", inode, nr_pages,
- off, len);
- vino = ceph_vino(inode);
- req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len,
- 0, 1, CEPH_OSD_OP_READ,
- CEPH_OSD_FLAG_READ, NULL,
- ci->i_truncate_seq, ci->i_truncate_size,
- false);
+ req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, subreq->start, &len,
+ 0, 1, CEPH_OSD_OP_READ,
+ CEPH_OSD_FLAG_READ | fsc->client->osdc.client->options->read_from_replica,
+ NULL, ci->i_truncate_seq, ci->i_truncate_size, false);
if (IS_ERR(req)) {
- ret = PTR_ERR(req);
+ err = PTR_ERR(req);
+ req = NULL;
goto out;
}
- /* build page vector */
- nr_pages = calc_pages_for(0, len);
- pages = kmalloc_array(nr_pages, sizeof(*pages), GFP_KERNEL);
- if (!pages) {
- ret = -ENOMEM;
- goto out_put;
- }
- for (i = 0; i < nr_pages; ++i) {
- page = list_entry(page_list->prev, struct page, lru);
- BUG_ON(PageLocked(page));
- list_del(&page->lru);
-
- dout("start_read %p adding %p idx %lu\n", inode, page,
- page->index);
- if (add_to_page_cache_lru(page, &inode->i_data, page->index,
- GFP_KERNEL)) {
- ceph_fscache_uncache_page(inode, page);
- put_page(page);
- dout("start_read %p add_to_page_cache failed %p\n",
- inode, page);
- nr_pages = i;
- if (nr_pages > 0) {
- len = nr_pages << PAGE_SHIFT;
- osd_req_op_extent_update(req, 0, len);
- break;
- }
- goto out_pages;
- }
- pages[i] = page;
+ dout("%s: pos=%llu orig_len=%zu len=%llu\n", __func__, subreq->start, subreq->len, len);
+ iov_iter_xarray(&iter, READ, &rreq->mapping->i_pages, subreq->start, len);
+ err = iov_iter_get_pages_alloc(&iter, &pages, len, &page_off);
+ if (err < 0) {
+ dout("%s: iov_ter_get_pages_alloc returned %d\n", __func__, err);
+ goto out;
}
+
+ /* should always give us a page-aligned read */
+ WARN_ON_ONCE(page_off);
+ len = err;
+
osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false, false);
- req->r_callback = finish_read;
+ req->r_callback = finish_netfs_read;
+ req->r_priv = subreq;
req->r_inode = inode;
+ ihold(inode);
- dout("start_read %p starting %p %lld~%lld\n", inode, req, off, len);
- ret = ceph_osdc_start_request(osdc, req, false);
- if (ret < 0)
- goto out_pages;
+ err = ceph_osdc_start_request(req->r_osdc, req, false);
+ if (err)
+ iput(inode);
+out:
ceph_osdc_put_request(req);
+ if (err)
+ netfs_subreq_terminated(subreq, err, false);
+ dout("%s: result %d\n", __func__, err);
+}
- /* After adding locked pages to page cache, the inode holds cache cap.
- * So we can drop our cap refs. */
- if (got)
- ceph_put_cap_refs(ci, got);
+static void ceph_init_rreq(struct netfs_read_request *rreq, struct file *file)
+{
+}
- return nr_pages;
+static void ceph_readahead_cleanup(struct address_space *mapping, void *priv)
+{
+ struct inode *inode = mapping->host;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ int got = (uintptr_t)priv;
-out_pages:
- for (i = 0; i < nr_pages; ++i) {
- ceph_fscache_readpage_cancel(inode, pages[i]);
- unlock_page(pages[i]);
- }
- ceph_put_page_vector(pages, nr_pages, false);
-out_put:
- ceph_osdc_put_request(req);
-out:
if (got)
ceph_put_cap_refs(ci, got);
- return ret;
}
+const struct netfs_read_request_ops ceph_netfs_read_ops = {
+ .init_rreq = ceph_init_rreq,
+ .is_cache_enabled = ceph_is_cache_enabled,
+ .begin_cache_operation = ceph_begin_cache_operation,
+ .issue_op = ceph_netfs_issue_op,
+ .expand_readahead = ceph_netfs_expand_readahead,
+ .clamp_length = ceph_netfs_clamp_length,
+ .check_write_begin = ceph_netfs_check_write_begin,
+ .cleanup = ceph_readahead_cleanup,
+};
-/*
- * Read multiple pages. Leave pages we don't read + unlock in page_list;
- * the caller (VM) cleans them up.
- */
-static int ceph_readpages(struct file *file, struct address_space *mapping,
- struct list_head *page_list, unsigned nr_pages)
+/* read a single page, without unlocking it. */
+static int ceph_readpage(struct file *file, struct page *page)
{
struct inode *inode = file_inode(file);
- struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
- struct ceph_file_info *fi = file->private_data;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_vino vino = ceph_vino(inode);
+ u64 off = page_offset(page);
+ u64 len = thp_size(page);
+
+ if (ci->i_inline_version != CEPH_INLINE_NONE) {
+ /*
+ * Uptodate inline data should have been added
+ * into page cache while getting Fcr caps.
+ */
+ if (off == 0) {
+ unlock_page(page);
+ return -EINVAL;
+ }
+ zero_user_segment(page, 0, thp_size(page));
+ SetPageUptodate(page);
+ unlock_page(page);
+ return 0;
+ }
+
+ dout("readpage ino %llx.%llx file %p off %llu len %llu page %p index %lu\n",
+ vino.ino, vino.snap, file, off, len, page, page->index);
+
+ return netfs_readpage(file, page, &ceph_netfs_read_ops, NULL);
+}
+
+static void ceph_readahead(struct readahead_control *ractl)
+{
+ struct inode *inode = file_inode(ractl->file);
+ struct ceph_file_info *fi = ractl->file->private_data;
struct ceph_rw_context *rw_ctx;
- int rc = 0;
- int max = 0;
+ int got = 0;
+ int ret = 0;
if (ceph_inode(inode)->i_inline_version != CEPH_INLINE_NONE)
- return -EINVAL;
+ return;
- rc = ceph_readpages_from_fscache(mapping->host, mapping, page_list,
- &nr_pages);
+ rw_ctx = ceph_find_rw_context(fi);
+ if (!rw_ctx) {
+ /*
+ * readahead callers do not necessarily hold Fcb caps
+ * (e.g. fadvise, madvise).
+ */
+ int want = CEPH_CAP_FILE_CACHE;
- if (rc == 0)
- goto out;
+ ret = ceph_try_get_caps(inode, CEPH_CAP_FILE_RD, want, true, &got);
+ if (ret < 0)
+ dout("start_read %p, error getting cap\n", inode);
+ else if (!(got & want))
+ dout("start_read %p, no cache cap\n", inode);
- rw_ctx = ceph_find_rw_context(fi);
- max = fsc->mount_options->rsize >> PAGE_SHIFT;
- dout("readpages %p file %p ctx %p nr_pages %d max %d\n",
- inode, file, rw_ctx, nr_pages, max);
- while (!list_empty(page_list)) {
- rc = start_read(inode, rw_ctx, page_list, max);
- if (rc < 0)
- goto out;
+ if (ret <= 0)
+ return;
}
-out:
- ceph_fscache_readpages_cancel(inode, page_list);
-
- dout("readpages %p file %p ret %d\n", inode, file, rc);
- return rc;
+ netfs_readahead(ractl, &ceph_netfs_read_ops, (void *)(uintptr_t)got);
}
struct ceph_writeback_ctl
spin_unlock(&ci->i_ceph_lock);
WARN_ON(!found);
}
- if (end > page_offset(page) + PAGE_SIZE)
- end = page_offset(page) + PAGE_SIZE;
+ if (end > page_offset(page) + thp_size(page))
+ end = page_offset(page) + thp_size(page);
return end > start ? end - start : 0;
}
struct ceph_snap_context *snapc, *oldest;
loff_t page_off = page_offset(page);
int err;
- loff_t len = PAGE_SIZE;
+ loff_t len = thp_size(page);
struct ceph_writeback_ctl ceph_wbc;
struct ceph_osd_client *osdc = &fsc->client->osdc;
struct ceph_osd_request *req;
/* is this a partial page at end of file? */
if (page_off >= ceph_wbc.i_size) {
dout("%p page eof %llu\n", page, ceph_wbc.i_size);
- page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
+ page->mapping->a_ops->invalidatepage(page, 0, thp_size(page));
return 0;
}
}
/* it may be a short write due to an object boundary */
- WARN_ON_ONCE(len > PAGE_SIZE);
+ WARN_ON_ONCE(len > thp_size(page));
osd_req_op_extent_osd_data_pages(req, 0, &page, len, 0, false, false);
dout("writepage %llu~%llu (%llu bytes)\n", page_off, len, len);
if (!err)
err = ceph_osdc_wait_request(osdc, req);
- ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency,
+ ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
req->r_end_latency, err);
ceph_osdc_put_request(req);
dout("writepage cleaned page %p\n", page);
err = 0; /* vfs expects us to return 0 */
}
- page->private = 0;
- ClearPagePrivate(page);
+ oldest = detach_page_private(page);
+ WARN_ON_ONCE(oldest != snapc);
end_page_writeback(page);
ceph_put_wrbuffer_cap_refs(ci, 1, snapc);
ceph_put_snap_context(snapc); /* page's reference */
ceph_clear_error_write(ci);
}
- ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency,
+ ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
req->r_end_latency, rc);
/*
clear_bdi_congested(inode_to_bdi(inode),
BLK_RW_ASYNC);
- ceph_put_snap_context(page_snap_context(page));
- page->private = 0;
- ClearPagePrivate(page);
- dout("unlocking %p\n", page);
+ ceph_put_snap_context(detach_page_private(page));
end_page_writeback(page);
+ dout("unlocking %p\n", page);
if (remove_page)
generic_error_remove_page(inode->i_mapping,
page_offset(page) >= i_size_read(inode)) &&
clear_page_dirty_for_io(page))
mapping->a_ops->invalidatepage(page,
- 0, PAGE_SIZE);
+ 0, thp_size(page));
unlock_page(page);
continue;
}
pages[locked_pages++] = page;
pvec.pages[i] = NULL;
- len += PAGE_SIZE;
+ len += thp_size(page);
}
/* did we get anything? */
BUG_ON(IS_ERR(req));
}
BUG_ON(len < page_offset(pages[locked_pages - 1]) +
- PAGE_SIZE - offset);
+ thp_size(page) - offset);
req->r_callback = writepages_finish;
req->r_inode = inode;
}
set_page_writeback(pages[i]);
- len += PAGE_SIZE;
+ len += thp_size(page);
}
if (ceph_wbc.size_stable) {
/* writepages_finish() clears writeback pages
* according to the data length, so make sure
* data length covers all locked pages */
- u64 min_len = len + 1 - PAGE_SIZE;
+ u64 min_len = len + 1 - thp_size(page);
len = get_writepages_data_length(inode, pages[i - 1],
offset);
len = max(len, min_len);
return NULL;
}
+static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned int len,
+ struct page *page, void **_fsdata)
+{
+ struct inode *inode = file_inode(file);
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_snap_context *snapc;
+
+ snapc = ceph_find_incompatible(page);
+ if (snapc) {
+ int r;
+
+ unlock_page(page);
+ put_page(page);
+ if (IS_ERR(snapc))
+ return PTR_ERR(snapc);
+
+ ceph_queue_writeback(inode);
+ r = wait_event_killable(ci->i_cap_wq,
+ context_is_writeable_or_written(inode, snapc));
+ ceph_put_snap_context(snapc);
+ return r == 0 ? -EAGAIN : r;
+ }
+ return 0;
+}
+
/*
* We are only allowed to write into/dirty the page if the page is
* clean, or already dirty within the same snap context.
{
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
- struct ceph_snap_context *snapc;
struct page *page = NULL;
pgoff_t index = pos >> PAGE_SHIFT;
- int pos_in_page = pos & ~PAGE_MASK;
- int r = 0;
+ int r;
- dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len);
-
- for (;;) {
+ /*
+ * Uninlining should have already been done and everything updated, EXCEPT
+ * for inline_version sent to the MDS.
+ */
+ if (ci->i_inline_version != CEPH_INLINE_NONE) {
page = grab_cache_page_write_begin(mapping, index, flags);
- if (!page) {
- r = -ENOMEM;
- break;
- }
-
- snapc = ceph_find_incompatible(page);
- if (snapc) {
- if (IS_ERR(snapc)) {
- r = PTR_ERR(snapc);
- break;
- }
- unlock_page(page);
- put_page(page);
- page = NULL;
- ceph_queue_writeback(inode);
- r = wait_event_killable(ci->i_cap_wq,
- context_is_writeable_or_written(inode, snapc));
- ceph_put_snap_context(snapc);
- if (r != 0)
- break;
- continue;
- }
-
- if (PageUptodate(page)) {
- dout(" page %p already uptodate\n", page);
- break;
- }
+ if (!page)
+ return -ENOMEM;
/*
- * In some cases we don't need to read at all:
- * - full page write
- * - write that lies completely beyond EOF
- * - write that covers the the page from start to EOF or beyond it
+ * The inline_version on a new inode is set to 1. If that's the
+ * case, then the page is brand new and isn't yet Uptodate.
*/
- if ((pos_in_page == 0 && len == PAGE_SIZE) ||
- (pos >= i_size_read(inode)) ||
- (pos_in_page == 0 && (pos + len) >= i_size_read(inode))) {
- zero_user_segments(page, 0, pos_in_page,
- pos_in_page + len, PAGE_SIZE);
- break;
+ r = 0;
+ if (index == 0 && ci->i_inline_version != 1) {
+ if (!PageUptodate(page)) {
+ WARN_ONCE(1, "ceph: write_begin called on still-inlined inode (inline_version %llu)!\n",
+ ci->i_inline_version);
+ r = -EINVAL;
+ }
+ goto out;
}
-
- /*
- * We need to read it. If we get back -EINPROGRESS, then the page was
- * handed off to fscache and it will be unlocked when the read completes.
- * Refind the page in that case so we can reacquire the page lock. Otherwise
- * we got a hard error or the read was completed synchronously.
- */
- r = ceph_do_readpage(file, page);
- if (r != -EINPROGRESS)
- break;
+ zero_user_segment(page, 0, thp_size(page));
+ SetPageUptodate(page);
+ goto out;
}
+ r = netfs_write_begin(file, inode->i_mapping, pos, len, 0, &page, NULL,
+ &ceph_netfs_read_ops, NULL);
+out:
+ if (r == 0)
+ wait_on_page_fscache(page);
if (r < 0) {
- if (page) {
- unlock_page(page);
+ if (page)
put_page(page);
- }
} else {
+ WARN_ON_ONCE(!PageLocked(page));
*pagep = page;
}
return r;
const struct address_space_operations ceph_aops = {
.readpage = ceph_readpage,
- .readpages = ceph_readpages,
+ .readahead = ceph_readahead,
.writepage = ceph_writepage,
.writepages = ceph_writepages_start,
.write_begin = ceph_write_begin,
struct inode *inode = file_inode(vma->vm_file);
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_info *fi = vma->vm_file->private_data;
- struct page *pinned_page = NULL;
loff_t off = (loff_t)vmf->pgoff << PAGE_SHIFT;
int want, got, err;
sigset_t oldset;
ceph_block_sigs(&oldset);
- dout("filemap_fault %p %llx.%llx %llu~%zd trying to get caps\n",
- inode, ceph_vinop(inode), off, (size_t)PAGE_SIZE);
+ dout("filemap_fault %p %llx.%llx %llu trying to get caps\n",
+ inode, ceph_vinop(inode), off);
if (fi->fmode & CEPH_FILE_MODE_LAZY)
want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
else
want = CEPH_CAP_FILE_CACHE;
got = 0;
- err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_RD, want, -1,
- &got, &pinned_page);
+ err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_RD, want, -1, &got);
if (err < 0)
goto out_restore;
- dout("filemap_fault %p %llu~%zd got cap refs on %s\n",
- inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got));
+ dout("filemap_fault %p %llu got cap refs on %s\n",
+ inode, off, ceph_cap_string(got));
if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
ci->i_inline_version == CEPH_INLINE_NONE) {
ceph_add_rw_context(fi, &rw_ctx);
ret = filemap_fault(vmf);
ceph_del_rw_context(fi, &rw_ctx);
- dout("filemap_fault %p %llu~%zd drop cap refs %s ret %x\n",
- inode, off, (size_t)PAGE_SIZE,
- ceph_cap_string(got), ret);
+ dout("filemap_fault %p %llu drop cap refs %s ret %x\n",
+ inode, off, ceph_cap_string(got), ret);
} else
err = -EAGAIN;
- if (pinned_page)
- put_page(pinned_page);
ceph_put_cap_refs(ci, got);
if (err != -EAGAIN)
vmf->page = page;
ret = VM_FAULT_MAJOR | VM_FAULT_LOCKED;
out_inline:
- dout("filemap_fault %p %llu~%zd read inline data ret %x\n",
- inode, off, (size_t)PAGE_SIZE, ret);
+ dout("filemap_fault %p %llu read inline data ret %x\n",
+ inode, off, ret);
}
out_restore:
ceph_restore_sigs(&oldset);
return ret;
}
-/*
- * Reuse write_begin here for simplicity.
- */
static vm_fault_t ceph_page_mkwrite(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
goto out_free;
}
- if (off + PAGE_SIZE <= size)
- len = PAGE_SIZE;
+ if (off + thp_size(page) <= size)
+ len = thp_size(page);
else
- len = size & ~PAGE_MASK;
+ len = offset_in_thp(page, size);
dout("page_mkwrite %p %llx.%llx %llu~%zd getting caps i_size %llu\n",
inode, ceph_vinop(inode), off, len, size);
want = CEPH_CAP_FILE_BUFFER;
got = 0;
- err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_WR, want, off + len,
- &got, NULL);
+ err = ceph_get_caps(vma->vm_file, CEPH_CAP_FILE_WR, want, off + len, &got);
if (err < 0)
goto out_free;
if (!err)
err = ceph_osdc_wait_request(&fsc->client->osdc, req);
- ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency,
+ ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
req->r_end_latency, err);
out_put:
s64 pool;
int ret, flags;
+ /* Only need to do this for regular files */
+ if (!S_ISREG(inode->i_mode))
+ return 0;
+
if (ci->i_vino.snap != CEPH_NOSNAP) {
/*
* Pool permission check needs to write to the first object.
ci->fscache = NULL;
- fscache_uncache_all_inode_pages(cookie, &ci->vfs_inode);
fscache_relinquish_cookie(cookie, &ci->i_vino, false);
}
dout("fscache_file_set_cookie %p %p disabling cache\n",
inode, filp);
fscache_disable_cookie(ci->fscache, &ci->i_vino, false);
- fscache_uncache_all_inode_pages(ci->fscache, inode);
} else {
fscache_enable_cookie(ci->fscache, &ci->i_vino, i_size_read(inode),
ceph_fscache_can_enable, inode);
}
}
-static void ceph_readpage_from_fscache_complete(struct page *page, void *data, int error)
-{
- if (!error)
- SetPageUptodate(page);
-
- unlock_page(page);
-}
-
-static inline bool cache_valid(struct ceph_inode_info *ci)
-{
- return ci->i_fscache_gen == ci->i_rdcache_gen;
-}
-
-
-/* Atempt to read from the fscache,
- *
- * This function is called from the readpage_nounlock context. DO NOT attempt to
- * unlock the page here (or in the callback).
- */
-int ceph_readpage_from_fscache(struct inode *inode, struct page *page)
-{
- struct ceph_inode_info *ci = ceph_inode(inode);
- int ret;
-
- if (!cache_valid(ci))
- return -ENOBUFS;
-
- ret = fscache_read_or_alloc_page(ci->fscache, page,
- ceph_readpage_from_fscache_complete, NULL,
- GFP_KERNEL);
-
- switch (ret) {
- case 0: /* Page found */
- dout("page read submitted\n");
- return 0;
- case -ENOBUFS: /* Pages were not found, and can't be */
- case -ENODATA: /* Pages were not found */
- dout("page/inode not in cache\n");
- return ret;
- default:
- dout("%s: unknown error ret = %i\n", __func__, ret);
- return ret;
- }
-}
-
-int ceph_readpages_from_fscache(struct inode *inode,
- struct address_space *mapping,
- struct list_head *pages,
- unsigned *nr_pages)
-{
- struct ceph_inode_info *ci = ceph_inode(inode);
- int ret;
-
- if (!cache_valid(ci))
- return -ENOBUFS;
-
- ret = fscache_read_or_alloc_pages(ci->fscache, mapping, pages, nr_pages,
- ceph_readpage_from_fscache_complete,
- NULL, mapping_gfp_mask(mapping));
-
- switch (ret) {
- case 0: /* All pages found */
- dout("all-page read submitted\n");
- return 0;
- case -ENOBUFS: /* Some pages were not found, and can't be */
- case -ENODATA: /* some pages were not found */
- dout("page/inode not in cache\n");
- return ret;
- default:
- dout("%s: unknown error ret = %i\n", __func__, ret);
- return ret;
- }
-}
-
-void ceph_readpage_to_fscache(struct inode *inode, struct page *page)
-{
- struct ceph_inode_info *ci = ceph_inode(inode);
- int ret;
-
- if (!PageFsCache(page))
- return;
-
- if (!cache_valid(ci))
- return;
-
- ret = fscache_write_page(ci->fscache, page, i_size_read(inode),
- GFP_KERNEL);
- if (ret)
- fscache_uncache_page(ci->fscache, page);
-}
-
-void ceph_invalidate_fscache_page(struct inode* inode, struct page *page)
-{
- struct ceph_inode_info *ci = ceph_inode(inode);
-
- if (!PageFsCache(page))
- return;
-
- fscache_wait_on_page_write(ci->fscache, page);
- fscache_uncache_page(ci->fscache, page);
-}
-
void ceph_fscache_unregister_fs(struct ceph_fs_client* fsc)
{
if (fscache_cookie_valid(fsc->fscache)) {
}
fsc->fscache = NULL;
}
-
-/*
- * caller should hold CEPH_CAP_FILE_{RD,CACHE}
- */
-void ceph_fscache_revalidate_cookie(struct ceph_inode_info *ci)
-{
- if (cache_valid(ci))
- return;
-
- /* resue i_truncate_mutex. There should be no pending
- * truncate while the caller holds CEPH_CAP_FILE_RD */
- mutex_lock(&ci->i_truncate_mutex);
- if (!cache_valid(ci)) {
- if (fscache_check_consistency(ci->fscache, &ci->i_vino))
- fscache_invalidate(ci->fscache);
- spin_lock(&ci->i_ceph_lock);
- ci->i_fscache_gen = ci->i_rdcache_gen;
- spin_unlock(&ci->i_ceph_lock);
- }
- mutex_unlock(&ci->i_truncate_mutex);
-}
#ifndef _CEPH_CACHE_H
#define _CEPH_CACHE_H
+#include <linux/netfs.h>
+
#ifdef CONFIG_CEPH_FSCACHE
extern struct fscache_netfs ceph_cache_netfs;
struct address_space *mapping,
struct list_head *pages,
unsigned *nr_pages);
-void ceph_readpage_to_fscache(struct inode *inode, struct page *page);
-void ceph_invalidate_fscache_page(struct inode* inode, struct page *page);
static inline void ceph_fscache_inode_init(struct ceph_inode_info *ci)
{
ci->fscache = NULL;
- ci->i_fscache_gen = 0;
}
-static inline void ceph_fscache_invalidate(struct inode *inode)
+static inline struct fscache_cookie *ceph_fscache_cookie(struct ceph_inode_info *ci)
{
- fscache_invalidate(ceph_inode(inode)->fscache);
+ return ci->fscache;
}
-static inline void ceph_fscache_uncache_page(struct inode *inode,
- struct page *page)
+static inline void ceph_fscache_invalidate(struct inode *inode)
{
- struct ceph_inode_info *ci = ceph_inode(inode);
- return fscache_uncache_page(ci->fscache, page);
+ fscache_invalidate(ceph_inode(inode)->fscache);
}
-static inline int ceph_release_fscache_page(struct page *page, gfp_t gfp)
+static inline bool ceph_is_cache_enabled(struct inode *inode)
{
- struct inode* inode = page->mapping->host;
- struct ceph_inode_info *ci = ceph_inode(inode);
- return fscache_maybe_release_page(ci->fscache, page, gfp);
-}
+ struct fscache_cookie *cookie = ceph_fscache_cookie(ceph_inode(inode));
-static inline void ceph_fscache_readpage_cancel(struct inode *inode,
- struct page *page)
-{
- struct ceph_inode_info *ci = ceph_inode(inode);
- if (fscache_cookie_valid(ci->fscache) && PageFsCache(page))
- __fscache_uncache_page(ci->fscache, page);
+ if (!cookie)
+ return false;
+ return fscache_cookie_enabled(cookie);
}
-static inline void ceph_fscache_readpages_cancel(struct inode *inode,
- struct list_head *pages)
+static inline int ceph_begin_cache_operation(struct netfs_read_request *rreq)
{
- struct ceph_inode_info *ci = ceph_inode(inode);
- return fscache_readpages_cancel(ci->fscache, pages);
-}
+ struct fscache_cookie *cookie = ceph_fscache_cookie(ceph_inode(rreq->inode));
-static inline void ceph_disable_fscache_readpage(struct ceph_inode_info *ci)
-{
- ci->i_fscache_gen = ci->i_rdcache_gen - 1;
+ return fscache_begin_read_operation(rreq, cookie);
}
-
#else
static inline int ceph_fscache_register(void)
{
}
+static inline struct fscache_cookie *ceph_fscache_cookie(struct ceph_inode_info *ci)
+{
+ return NULL;
+}
+
static inline void ceph_fscache_register_inode_cookie(struct inode *inode)
{
}
{
}
-static inline void ceph_fscache_revalidate_cookie(struct ceph_inode_info *ci)
-{
-}
-
-static inline void ceph_fscache_uncache_page(struct inode *inode,
- struct page *pages)
-{
-}
-
-static inline int ceph_readpage_from_fscache(struct inode* inode,
- struct page *page)
-{
- return -ENOBUFS;
-}
-
-static inline int ceph_readpages_from_fscache(struct inode *inode,
- struct address_space *mapping,
- struct list_head *pages,
- unsigned *nr_pages)
-{
- return -ENOBUFS;
-}
-
-static inline void ceph_readpage_to_fscache(struct inode *inode,
- struct page *page)
-{
-}
-
static inline void ceph_fscache_invalidate(struct inode *inode)
{
}
-static inline void ceph_invalidate_fscache_page(struct inode *inode,
- struct page *page)
+static inline bool ceph_is_cache_enabled(struct inode *inode)
{
+ return false;
}
-static inline int ceph_release_fscache_page(struct page *page, gfp_t gfp)
-{
- return 1;
-}
-
-static inline void ceph_fscache_readpage_cancel(struct inode *inode,
- struct page *page)
-{
-}
-
-static inline void ceph_fscache_readpages_cancel(struct inode *inode,
- struct list_head *pages)
-{
-}
-
-static inline void ceph_disable_fscache_readpage(struct ceph_inode_info *ci)
+static inline int ceph_begin_cache_operation(struct netfs_read_request *rreq)
{
+ return -ENOBUFS;
}
-
#endif
-#endif
+#endif /* _CEPH_CACHE_H */
arg->flush_tid = flush_tid;
arg->oldest_flush_tid = oldest_flush_tid;
- arg->size = inode->i_size;
+ arg->size = i_size_read(inode);
ci->i_reported_size = arg->size;
arg->max_size = ci->i_wanted_max_size;
if (cap == ci->i_auth_cap) {
u32 invalidating_gen = ci->i_rdcache_gen;
spin_unlock(&ci->i_ceph_lock);
+ ceph_fscache_invalidate(inode);
invalidate_mapping_pages(&inode->i_data, 0, -1);
spin_lock(&ci->i_ceph_lock);
bool __ceph_should_report_size(struct ceph_inode_info *ci)
{
- loff_t size = ci->vfs_inode.i_size;
+ loff_t size = i_size_read(&ci->vfs_inode);
/* mds will adjust max size according to the reported size */
if (ci->i_flushing_caps & CEPH_CAP_FILE_WR)
return false;
*got = need | want;
else
*got = need;
- if (S_ISREG(inode->i_mode) &&
- (need & CEPH_CAP_FILE_RD) &&
- !(*got & CEPH_CAP_FILE_CACHE))
- ceph_disable_fscache_readpage(ci);
ceph_take_cap_refs(ci, *got, true);
ret = 1;
}
* due to a small max_size, make sure we check_max_size (and possibly
* ask the mds) so we don't get hung up indefinitely.
*/
-int ceph_get_caps(struct file *filp, int need, int want,
- loff_t endoff, int *got, struct page **pinned_page)
+int ceph_get_caps(struct file *filp, int need, int want, loff_t endoff, int *got)
{
struct ceph_file_info *fi = filp->private_data;
struct inode *inode = file_inode(filp);
struct page *page =
find_get_page(inode->i_mapping, 0);
if (page) {
- if (PageUptodate(page)) {
- *pinned_page = page;
- break;
- }
+ bool uptodate = PageUptodate(page);
+
put_page(page);
+ if (uptodate)
+ break;
}
/*
* drop cap refs first because getattr while
}
break;
}
-
- if (S_ISREG(ci->vfs_inode.i_mode) &&
- (_got & CEPH_CAP_FILE_RD) && (_got & CEPH_CAP_FILE_CACHE))
- ceph_fscache_revalidate_cookie(ci);
-
*got = _got;
return 0;
}
dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
inode, cap, session->s_mds, seq, ceph_cap_string(newcaps));
dout(" size %llu max_size %llu, i_size %llu\n", size, max_size,
- inode->i_size);
+ i_size_read(inode));
/*
seq_printf(s, "item total avg_lat(us) min_lat(us) max_lat(us) stdev(us)\n");
seq_printf(s, "-----------------------------------------------------------------------------------\n");
- spin_lock(&m->read_latency_lock);
+ spin_lock(&m->read_metric_lock);
total = m->total_reads;
sum = m->read_latency_sum;
avg = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum, total) : 0;
min = m->read_latency_min;
max = m->read_latency_max;
sq = m->read_latency_sq_sum;
- spin_unlock(&m->read_latency_lock);
+ spin_unlock(&m->read_metric_lock);
CEPH_METRIC_SHOW("read", total, avg, min, max, sq);
- spin_lock(&m->write_latency_lock);
+ spin_lock(&m->write_metric_lock);
total = m->total_writes;
sum = m->write_latency_sum;
avg = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum, total) : 0;
min = m->write_latency_min;
max = m->write_latency_max;
sq = m->write_latency_sq_sum;
- spin_unlock(&m->write_latency_lock);
+ spin_unlock(&m->write_metric_lock);
CEPH_METRIC_SHOW("write", total, avg, min, max, sq);
- spin_lock(&m->metadata_latency_lock);
+ spin_lock(&m->metadata_metric_lock);
total = m->total_metadatas;
sum = m->metadata_latency_sum;
avg = total > 0 ? DIV64_U64_ROUND_CLOSEST(sum, total) : 0;
min = m->metadata_latency_min;
max = m->metadata_latency_max;
sq = m->metadata_latency_sq_sum;
- spin_unlock(&m->metadata_latency_lock);
+ spin_unlock(&m->metadata_metric_lock);
CEPH_METRIC_SHOW("metadata", total, avg, min, max, sq);
seq_printf(s, "\n");
switch (whence) {
case SEEK_CUR:
offset += file->f_pos;
+ break;
case SEEK_SET:
break;
case SEEK_END:
retval = -EOPNOTSUPP;
+ goto out;
default:
goto out;
}
/*
* Handle lookups for the hidden .snap directory.
*/
-int ceph_handle_snapdir(struct ceph_mds_request *req,
- struct dentry *dentry, int err)
+struct dentry *ceph_handle_snapdir(struct ceph_mds_request *req,
+ struct dentry *dentry, int err)
{
struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
struct inode *parent = d_inode(dentry->d_parent); /* we hold i_mutex */
/* .snap dir? */
if (err == -ENOENT &&
ceph_snap(parent) == CEPH_NOSNAP &&
- strcmp(dentry->d_name.name,
- fsc->mount_options->snapdir_name) == 0) {
+ strcmp(dentry->d_name.name, fsc->mount_options->snapdir_name) == 0) {
+ struct dentry *res;
struct inode *inode = ceph_get_snapdir(parent);
- if (IS_ERR(inode))
- return PTR_ERR(inode);
- dout("ENOENT on snapdir %p '%pd', linking to snapdir %p\n",
- dentry, dentry, inode);
- BUG_ON(!d_unhashed(dentry));
- d_add(dentry, inode);
- err = 0;
+
+ res = d_splice_alias(inode, dentry);
+ dout("ENOENT on snapdir %p '%pd', linking to snapdir %p. Spliced dentry %p\n",
+ dentry, dentry, inode, res);
+ if (res)
+ dentry = res;
}
- return err;
+ return dentry;
}
/*
struct ceph_fs_client *fsc = ceph_sb_to_client(dir->i_sb);
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
struct ceph_mds_request *req;
+ struct dentry *res;
int op;
int mask;
int err;
req->r_parent = dir;
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
err = ceph_mdsc_do_request(mdsc, NULL, req);
- err = ceph_handle_snapdir(req, dentry, err);
+ res = ceph_handle_snapdir(req, dentry, err);
+ if (IS_ERR(res)) {
+ err = PTR_ERR(res);
+ } else {
+ dentry = res;
+ err = 0;
+ }
dentry = ceph_finish_lookup(req, dentry, err);
ceph_mdsc_put_request(req); /* will dput(dentry) */
dout("lookup result=%p\n", dentry);
vino.ino = ino;
vino.snap = CEPH_NOSNAP;
+
+ if (ceph_vino_is_reserved(vino))
+ return ERR_PTR(-ESTALE);
+
inode = ceph_find_inode(sb, vino);
if (!inode) {
struct ceph_mds_request *req;
return ERR_CAST(inode);
/* We need LINK caps to reliably check i_nlink */
err = ceph_do_getattr(inode, CEPH_CAP_LINK_SHARED, false);
- if (err)
+ if (err) {
+ iput(inode);
return ERR_PTR(err);
+ }
/* -ESTALE if inode as been unlinked and no file is open */
if ((inode->i_nlink == 0) && (atomic_read(&inode->i_count) == 1)) {
iput(inode);
vino.ino = sfh->ino;
vino.snap = sfh->snapid;
}
+
+ if (ceph_vino_is_reserved(vino))
+ return ERR_PTR(-ESTALE);
+
inode = ceph_find_inode(sb, vino);
if (inode)
return d_obtain_alias(inode);
err = ceph_mdsc_do_request(mdsc,
(flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
req);
- err = ceph_handle_snapdir(req, dentry, err);
- if (err)
+ dentry = ceph_handle_snapdir(req, dentry, err);
+ if (IS_ERR(dentry)) {
+ err = PTR_ERR(dentry);
goto out_req;
+ }
+ err = 0;
if ((flags & O_CREAT) && !req->r_reply_info.head->is_dentry)
err = ceph_handle_notrace_create(dir, dentry);
if (!ret)
ret = ceph_osdc_wait_request(osdc, req);
- ceph_update_read_latency(&fsc->mdsc->metric,
+ ceph_update_read_metrics(&fsc->mdsc->metric,
req->r_start_latency,
req->r_end_latency,
ret);
dout("ceph_aio_complete_req %p rc %d bytes %u\n",
inode, rc, osd_data->bvec_pos.iter.bi_size);
- /* r_start_latency == 0 means the request was not submitted */
- if (req->r_start_latency) {
- if (aio_req->write)
- ceph_update_write_latency(metric, req->r_start_latency,
- req->r_end_latency, rc);
- else
- ceph_update_read_latency(metric, req->r_start_latency,
- req->r_end_latency, rc);
- }
-
if (rc == -EOLDSNAPC) {
struct ceph_aio_work *aio_work;
BUG_ON(!aio_req->write);
}
}
+ /* r_start_latency == 0 means the request was not submitted */
+ if (req->r_start_latency) {
+ if (aio_req->write)
+ ceph_update_write_metrics(metric, req->r_start_latency,
+ req->r_end_latency, rc);
+ else
+ ceph_update_read_metrics(metric, req->r_start_latency,
+ req->r_end_latency, rc);
+ }
+
put_bvecs(osd_data->bvec_pos.bvecs, osd_data->num_bvecs,
aio_req->should_dirty);
ceph_osdc_put_request(req);
ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
if (write)
- ceph_update_write_latency(metric, req->r_start_latency,
+ ceph_update_write_metrics(metric, req->r_start_latency,
req->r_end_latency, ret);
else
- ceph_update_read_latency(metric, req->r_start_latency,
+ ceph_update_read_metrics(metric, req->r_start_latency,
req->r_end_latency, ret);
size = i_size_read(inode);
if (!ret)
ret = ceph_osdc_wait_request(&fsc->client->osdc, req);
- ceph_update_write_latency(&fsc->mdsc->metric, req->r_start_latency,
+ ceph_update_write_metrics(&fsc->mdsc->metric, req->r_start_latency,
req->r_end_latency, ret);
out:
ceph_osdc_put_request(req);
size_t len = iov_iter_count(to);
struct inode *inode = file_inode(filp);
struct ceph_inode_info *ci = ceph_inode(inode);
- struct page *pinned_page = NULL;
bool direct_lock = iocb->ki_flags & IOCB_DIRECT;
ssize_t ret;
int want, got = 0;
want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
else
want = CEPH_CAP_FILE_CACHE;
- ret = ceph_get_caps(filp, CEPH_CAP_FILE_RD, want, -1,
- &got, &pinned_page);
+ ret = ceph_get_caps(filp, CEPH_CAP_FILE_RD, want, -1, &got);
if (ret < 0) {
if (iocb->ki_flags & IOCB_DIRECT)
ceph_end_io_direct(inode);
dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
- if (pinned_page) {
- put_page(pinned_page);
- pinned_page = NULL;
- }
ceph_put_cap_refs(ci, got);
if (direct_lock)
else
want = CEPH_CAP_FILE_BUFFER;
got = 0;
- err = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, pos + count,
- &got, NULL);
+ err = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, pos + count, &got);
if (err < 0)
goto out;
else
want = CEPH_CAP_FILE_BUFFER;
- ret = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, endoff, &got, NULL);
+ ret = ceph_get_caps(file, CEPH_CAP_FILE_WR, want, endoff, &got);
if (ret < 0)
goto unlock;
retry_caps:
ret = ceph_get_caps(dst_filp, CEPH_CAP_FILE_WR, CEPH_CAP_FILE_BUFFER,
- dst_endoff, dst_got, NULL);
+ dst_endoff, dst_got);
if (ret < 0)
return ret;
return ret;
}
ret = ceph_get_caps(src_filp, CEPH_CAP_FILE_RD,
- CEPH_CAP_FILE_SHARED, -1, src_got, NULL);
+ CEPH_CAP_FILE_SHARED, -1, src_got);
if (ret < 0)
return ret;
/*... drop src_ci caps too, and retry */
{
struct inode *inode;
+ if (ceph_vino_is_reserved(vino))
+ return ERR_PTR(-EREMOTEIO);
+
inode = iget5_locked(sb, (unsigned long)vino.ino, ceph_ino_compare,
ceph_set_ino_cb, &vino);
if (!inode)
inode->i_mtime = parent->i_mtime;
inode->i_ctime = parent->i_ctime;
inode->i_atime = parent->i_atime;
- inode->i_op = &ceph_snapdir_iops;
- inode->i_fop = &ceph_snapdir_fops;
- ci->i_snap_caps = CEPH_CAP_PIN; /* so we can open */
ci->i_rbytes = 0;
ci->i_btime = ceph_inode(parent)->i_btime;
- if (inode->i_state & I_NEW)
+ if (inode->i_state & I_NEW) {
+ inode->i_op = &ceph_snapdir_iops;
+ inode->i_fop = &ceph_snapdir_fops;
+ ci->i_snap_caps = CEPH_CAP_PIN; /* so we can open */
unlock_new_inode(inode);
+ }
return inode;
}
{
struct ceph_inode_info *ci = ceph_inode(inode);
int queue_trunc = 0;
+ loff_t isize = i_size_read(inode);
if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) > 0 ||
- (truncate_seq == ci->i_truncate_seq && size > inode->i_size)) {
- dout("size %lld -> %llu\n", inode->i_size, size);
+ (truncate_seq == ci->i_truncate_seq && size > isize)) {
+ dout("size %lld -> %llu\n", isize, size);
if (size > 0 && S_ISDIR(inode->i_mode)) {
pr_err("fill_file_size non-zero size for directory\n");
size = 0;
ci->i_rfiles = le64_to_cpu(info->rfiles);
ci->i_rsubdirs = le64_to_cpu(info->rsubdirs);
ci->i_dir_pin = iinfo->dir_pin;
+ ci->i_rsnaps = iinfo->rsnaps;
ceph_decode_timespec64(&ci->i_rctime, &info->rctime);
}
}
bool ret;
spin_lock(&ci->i_ceph_lock);
- dout("set_size %p %llu -> %llu\n", inode, inode->i_size, size);
+ dout("set_size %p %llu -> %llu\n", inode, i_size_read(inode), size);
i_size_write(inode, size);
inode->i_blocks = calc_inode_blocks(size);
orig_gen = ci->i_rdcache_gen;
spin_unlock(&ci->i_ceph_lock);
+ ceph_fscache_invalidate(inode);
if (invalidate_inode_pages2(inode->i_mapping) < 0) {
pr_err("invalidate_pages %p fails\n", inode);
}
}
}
if (ia_valid & ATTR_SIZE) {
- dout("setattr %p size %lld -> %lld\n", inode,
- inode->i_size, attr->ia_size);
- if ((issued & CEPH_CAP_FILE_EXCL) &&
- attr->ia_size > inode->i_size) {
+ loff_t isize = i_size_read(inode);
+
+ dout("setattr %p size %lld -> %lld\n", inode, isize, attr->ia_size);
+ if ((issued & CEPH_CAP_FILE_EXCL) && attr->ia_size > isize) {
i_size_write(inode, attr->ia_size);
inode->i_blocks = calc_inode_blocks(attr->ia_size);
ci->i_reported_size = attr->ia_size;
dirtied |= CEPH_CAP_FILE_EXCL;
ia_valid |= ATTR_MTIME;
} else if ((issued & CEPH_CAP_FILE_SHARED) == 0 ||
- attr->ia_size != inode->i_size) {
+ attr->ia_size != isize) {
req->r_args.setattr.size = cpu_to_le64(attr->ia_size);
- req->r_args.setattr.old_size =
- cpu_to_le64(inode->i_size);
+ req->r_args.setattr.old_size = cpu_to_le64(isize);
mask |= CEPH_SETATTR_SIZE;
release |= CEPH_CAP_FILE_SHARED | CEPH_CAP_FILE_EXCL |
CEPH_CAP_FILE_RD | CEPH_CAP_FILE_WR;
return err;
if ((attr->ia_valid & ATTR_SIZE) &&
- attr->ia_size > max(inode->i_size, fsc->max_file_size))
+ attr->ia_size > max(i_size_read(inode), fsc->max_file_size))
return -EFBIG;
if ((attr->ia_valid & ATTR_SIZE) &&
}
/**
- * ceph_end_io_direct - declare the file is being used for direct i/o
+ * ceph_start_io_direct - declare the file is being used for direct i/o
* @inode: file inode
*
* Declare that a direct I/O operation is about to start, and ensure
memset(&info->snap_btime, 0, sizeof(info->snap_btime));
}
+ /* snapshot count, remains zero for v<=3 */
+ if (struct_v >= 4) {
+ ceph_decode_64_safe(p, end, info->rsnaps, bad);
+ } else {
+ info->rsnaps = 0;
+ }
+
*p = end;
} else {
if (features & CEPH_FEATURE_MDS_INLINE_DATA) {
}
info->dir_pin = -ENODATA;
- /* info->snap_btime remains zero */
+ /* info->snap_btime and info->rsnaps remain zero */
}
return 0;
bad:
ceph_decode_64_safe(p, end, start, bad);
ceph_decode_64_safe(p, end, len, bad);
+
+ /* Don't accept a delegation of system inodes */
+ if (start < CEPH_INO_SYSTEM_BASE) {
+ pr_warn_ratelimited("ceph: ignoring reserved inode range delegation (start=0x%llx len=0x%llx)\n",
+ start, len);
+ continue;
+ }
while (len--) {
int err = xa_insert(&s->s_delegated_inos, ino = start++,
DELEGATED_INO_AVAILABLE,
/* kick calling process */
complete_request(mdsc, req);
- ceph_update_metadata_latency(&mdsc->metric, req->r_start_latency,
+ ceph_update_metadata_metrics(&mdsc->metric, req->r_start_latency,
req->r_end_latency, err);
out:
ceph_mdsc_put_request(req);
rec.v1.cap_id = cpu_to_le64(cap->cap_id);
rec.v1.wanted = cpu_to_le32(__ceph_caps_wanted(ci));
rec.v1.issued = cpu_to_le32(cap->issued);
- rec.v1.size = cpu_to_le64(inode->i_size);
+ rec.v1.size = cpu_to_le64(i_size_read(inode));
ceph_encode_timespec64(&rec.v1.mtime, &inode->i_mtime);
ceph_encode_timespec64(&rec.v1.atime, &inode->i_atime);
rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino);
s32 dir_pin;
struct ceph_timespec btime;
struct ceph_timespec snap_btime;
+ u64 rsnaps;
u64 change_attr;
};
struct ceph_metric_write_latency *write;
struct ceph_metric_metadata_latency *meta;
struct ceph_metric_dlease *dlease;
+ struct ceph_opened_files *files;
+ struct ceph_pinned_icaps *icaps;
+ struct ceph_opened_inodes *inodes;
struct ceph_client_metric *m = &mdsc->metric;
u64 nr_caps = atomic64_read(&m->total_caps);
struct ceph_msg *msg;
s32 len;
len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write)
- + sizeof(*meta) + sizeof(*dlease);
+ + sizeof(*meta) + sizeof(*dlease) + sizeof(*files)
+ + sizeof(*icaps) + sizeof(*inodes);
msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true);
if (!msg) {
dlease->total = cpu_to_le64(atomic64_read(&m->total_dentries));
items++;
+ sum = percpu_counter_sum(&m->total_inodes);
+
+ /* encode the opened files metric */
+ files = (struct ceph_opened_files *)(dlease + 1);
+ files->type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_FILES);
+ files->ver = 1;
+ files->compat = 1;
+ files->data_len = cpu_to_le32(sizeof(*files) - 10);
+ files->opened_files = cpu_to_le64(atomic64_read(&m->opened_files));
+ files->total = cpu_to_le64(sum);
+ items++;
+
+ /* encode the pinned icaps metric */
+ icaps = (struct ceph_pinned_icaps *)(files + 1);
+ icaps->type = cpu_to_le32(CLIENT_METRIC_TYPE_PINNED_ICAPS);
+ icaps->ver = 1;
+ icaps->compat = 1;
+ icaps->data_len = cpu_to_le32(sizeof(*icaps) - 10);
+ icaps->pinned_icaps = cpu_to_le64(nr_caps);
+ icaps->total = cpu_to_le64(sum);
+ items++;
+
+ /* encode the opened inodes metric */
+ inodes = (struct ceph_opened_inodes *)(icaps + 1);
+ inodes->type = cpu_to_le32(CLIENT_METRIC_TYPE_OPENED_INODES);
+ inodes->ver = 1;
+ inodes->compat = 1;
+ inodes->data_len = cpu_to_le32(sizeof(*inodes) - 10);
+ inodes->opened_inodes = cpu_to_le64(percpu_counter_sum(&m->opened_inodes));
+ inodes->total = cpu_to_le64(sum);
+ items++;
+
put_unaligned_le32(items, &head->num);
msg->front.iov_len = len;
msg->hdr.version = cpu_to_le16(1);
if (ret)
goto err_i_caps_mis;
- spin_lock_init(&m->read_latency_lock);
+ spin_lock_init(&m->read_metric_lock);
m->read_latency_sq_sum = 0;
m->read_latency_min = KTIME_MAX;
m->read_latency_max = 0;
m->total_reads = 0;
m->read_latency_sum = 0;
- spin_lock_init(&m->write_latency_lock);
+ spin_lock_init(&m->write_metric_lock);
m->write_latency_sq_sum = 0;
m->write_latency_min = KTIME_MAX;
m->write_latency_max = 0;
m->total_writes = 0;
m->write_latency_sum = 0;
- spin_lock_init(&m->metadata_latency_lock);
+ spin_lock_init(&m->metadata_metric_lock);
m->metadata_latency_sq_sum = 0;
m->metadata_latency_min = KTIME_MAX;
m->metadata_latency_max = 0;
*sq_sump += sq;
}
-void ceph_update_read_latency(struct ceph_client_metric *m,
+void ceph_update_read_metrics(struct ceph_client_metric *m,
ktime_t r_start, ktime_t r_end,
int rc)
{
if (unlikely(rc < 0 && rc != -ENOENT && rc != -ETIMEDOUT))
return;
- spin_lock(&m->read_latency_lock);
+ spin_lock(&m->read_metric_lock);
__update_latency(&m->total_reads, &m->read_latency_sum,
&m->read_latency_min, &m->read_latency_max,
&m->read_latency_sq_sum, lat);
- spin_unlock(&m->read_latency_lock);
+ spin_unlock(&m->read_metric_lock);
}
-void ceph_update_write_latency(struct ceph_client_metric *m,
+void ceph_update_write_metrics(struct ceph_client_metric *m,
ktime_t r_start, ktime_t r_end,
int rc)
{
if (unlikely(rc && rc != -ETIMEDOUT))
return;
- spin_lock(&m->write_latency_lock);
+ spin_lock(&m->write_metric_lock);
__update_latency(&m->total_writes, &m->write_latency_sum,
&m->write_latency_min, &m->write_latency_max,
&m->write_latency_sq_sum, lat);
- spin_unlock(&m->write_latency_lock);
+ spin_unlock(&m->write_metric_lock);
}
-void ceph_update_metadata_latency(struct ceph_client_metric *m,
+void ceph_update_metadata_metrics(struct ceph_client_metric *m,
ktime_t r_start, ktime_t r_end,
int rc)
{
if (unlikely(rc && rc != -ENOENT))
return;
- spin_lock(&m->metadata_latency_lock);
+ spin_lock(&m->metadata_metric_lock);
__update_latency(&m->total_metadatas, &m->metadata_latency_sum,
&m->metadata_latency_min, &m->metadata_latency_max,
&m->metadata_latency_sq_sum, lat);
- spin_unlock(&m->metadata_latency_lock);
+ spin_unlock(&m->metadata_metric_lock);
}
CLIENT_METRIC_TYPE_WRITE_LATENCY,
CLIENT_METRIC_TYPE_METADATA_LATENCY,
CLIENT_METRIC_TYPE_DENTRY_LEASE,
+ CLIENT_METRIC_TYPE_OPENED_FILES,
+ CLIENT_METRIC_TYPE_PINNED_ICAPS,
+ CLIENT_METRIC_TYPE_OPENED_INODES,
- CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_DENTRY_LEASE,
+ CLIENT_METRIC_TYPE_MAX = CLIENT_METRIC_TYPE_OPENED_INODES,
};
/*
CLIENT_METRIC_TYPE_WRITE_LATENCY, \
CLIENT_METRIC_TYPE_METADATA_LATENCY, \
CLIENT_METRIC_TYPE_DENTRY_LEASE, \
+ CLIENT_METRIC_TYPE_OPENED_FILES, \
+ CLIENT_METRIC_TYPE_PINNED_ICAPS, \
+ CLIENT_METRIC_TYPE_OPENED_INODES, \
\
CLIENT_METRIC_TYPE_MAX, \
}
__le64 total;
} __packed;
+/* metric opened files header */
+struct ceph_opened_files {
+ __le32 type; /* ceph metric type */
+
+ __u8 ver;
+ __u8 compat;
+
+ __le32 data_len; /* length of sizeof(opened_files + total) */
+ __le64 opened_files;
+ __le64 total;
+} __packed;
+
+/* metric pinned i_caps header */
+struct ceph_pinned_icaps {
+ __le32 type; /* ceph metric type */
+
+ __u8 ver;
+ __u8 compat;
+
+ __le32 data_len; /* length of sizeof(pinned_icaps + total) */
+ __le64 pinned_icaps;
+ __le64 total;
+} __packed;
+
+/* metric opened inodes header */
+struct ceph_opened_inodes {
+ __le32 type; /* ceph metric type */
+
+ __u8 ver;
+ __u8 compat;
+
+ __le32 data_len; /* length of sizeof(opened_inodes + total) */
+ __le64 opened_inodes;
+ __le64 total;
+} __packed;
+
struct ceph_metric_head {
__le32 num; /* the number of metrics that will be sent */
} __packed;
struct percpu_counter i_caps_hit;
struct percpu_counter i_caps_mis;
- spinlock_t read_latency_lock;
+ spinlock_t read_metric_lock;
u64 total_reads;
ktime_t read_latency_sum;
ktime_t read_latency_sq_sum;
ktime_t read_latency_min;
ktime_t read_latency_max;
- spinlock_t write_latency_lock;
+ spinlock_t write_metric_lock;
u64 total_writes;
ktime_t write_latency_sum;
ktime_t write_latency_sq_sum;
ktime_t write_latency_min;
ktime_t write_latency_max;
- spinlock_t metadata_latency_lock;
+ spinlock_t metadata_metric_lock;
u64 total_metadatas;
ktime_t metadata_latency_sum;
ktime_t metadata_latency_sq_sum;
percpu_counter_inc(&m->i_caps_mis);
}
-extern void ceph_update_read_latency(struct ceph_client_metric *m,
+extern void ceph_update_read_metrics(struct ceph_client_metric *m,
ktime_t r_start, ktime_t r_end,
int rc);
-extern void ceph_update_write_latency(struct ceph_client_metric *m,
+extern void ceph_update_write_metrics(struct ceph_client_metric *m,
ktime_t r_start, ktime_t r_end,
int rc);
-extern void ceph_update_metadata_latency(struct ceph_client_metric *m,
+extern void ceph_update_metadata_metrics(struct ceph_client_metric *m,
ktime_t r_start, ktime_t r_end,
int rc);
#endif /* _FS_CEPH_MDS_METRIC_H */
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
BUG_ON(capsnap->writing);
- capsnap->size = inode->i_size;
+ capsnap->size = i_size_read(inode);
capsnap->mtime = inode->i_mtime;
capsnap->atime = inode->i_atime;
capsnap->ctime = inode->i_ctime;
#include <linux/ceph/libceph.h>
#ifdef CONFIG_CEPH_FSCACHE
+#define FSCACHE_USE_NEW_IO_API
#include <linux/fscache.h>
#endif
/* for dirs */
struct timespec64 i_rctime;
- u64 i_rbytes, i_rfiles, i_rsubdirs;
+ u64 i_rbytes, i_rfiles, i_rsubdirs, i_rsnaps;
u64 i_files, i_subdirs;
/* quotas */
#ifdef CONFIG_CEPH_FSCACHE
struct fscache_cookie *fscache;
- u32 i_fscache_gen;
#endif
errseq_t i_meta_err;
ci->i_vino.snap == pvino->snap;
}
+/*
+ * The MDS reserves a set of inodes for its own usage. These should never
+ * be accessible by clients, and so the MDS has no reason to ever hand these
+ * out. The range is CEPH_MDS_INO_MDSDIR_OFFSET..CEPH_INO_SYSTEM_BASE.
+ *
+ * These come from src/mds/mdstypes.h in the ceph sources.
+ */
+#define CEPH_MAX_MDS 0x100
+#define CEPH_NUM_STRAY 10
+#define CEPH_MDS_INO_MDSDIR_OFFSET (1 * CEPH_MAX_MDS)
+#define CEPH_INO_SYSTEM_BASE ((6*CEPH_MAX_MDS) + (CEPH_MAX_MDS * CEPH_NUM_STRAY))
+
+static inline bool ceph_vino_is_reserved(const struct ceph_vino vino)
+{
+ if (vino.ino < CEPH_INO_SYSTEM_BASE &&
+ vino.ino >= CEPH_MDS_INO_MDSDIR_OFFSET) {
+ WARN_RATELIMIT(1, "Attempt to access reserved inode number 0x%llx", vino.ino);
+ return true;
+ }
+ return false;
+}
static inline struct inode *ceph_find_inode(struct super_block *sb,
struct ceph_vino vino)
{
+ if (ceph_vino_is_reserved(vino))
+ return NULL;
+
/*
* NB: The hashval will be run through the fs/inode.c hash function
* anyway, so there is no need to squash the inode number down to
int mds, int drop, int unless);
extern int ceph_get_caps(struct file *filp, int need, int want,
- loff_t endoff, int *got, struct page **pinned_page);
+ loff_t endoff, int *got);
extern int ceph_try_get_caps(struct inode *inode,
int need, int want, bool nonblock, int *got);
extern loff_t ceph_make_fpos(unsigned high, unsigned off, bool hash_order);
extern int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry);
-extern int ceph_handle_snapdir(struct ceph_mds_request *req,
+extern struct dentry *ceph_handle_snapdir(struct ceph_mds_request *req,
struct dentry *dentry, int err);
extern struct dentry *ceph_finish_lookup(struct ceph_mds_request *req,
struct dentry *dentry, int err);
return ceph_fmt_xattr(val, size, "%lld", ci->i_rsubdirs);
}
+static ssize_t ceph_vxattrcb_dir_rsnaps(struct ceph_inode_info *ci, char *val,
+ size_t size)
+{
+ return ceph_fmt_xattr(val, size, "%lld", ci->i_rsnaps);
+}
+
static ssize_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
size_t size)
{
XATTR_RSTAT_FIELD(dir, rentries),
XATTR_RSTAT_FIELD(dir, rfiles),
XATTR_RSTAT_FIELD(dir, rsubdirs),
+ XATTR_RSTAT_FIELD(dir, rsnaps),
XATTR_RSTAT_FIELD(dir, rbytes),
XATTR_RSTAT_FIELD(dir, rctime),
{
#define CIFS_MOUNT_MODE_FROM_SID 0x10000000 /* retrieve mode from special ACE */
#define CIFS_MOUNT_RO_CACHE 0x20000000 /* assumes share will not change */
#define CIFS_MOUNT_RW_CACHE 0x40000000 /* assumes only client accessing */
+#define CIFS_MOUNT_SHUTDOWN 0x80000000
struct cifs_sb_info {
struct rb_root tlink_tree;
/* char buffer[]; */
} __packed;
+/*
+ * Dumping the commonly used 16 byte (e.g. CCM and GCM128) keys still supported
+ * for backlevel compatibility, but is not sufficient for dumping the less
+ * frequently used GCM256 (32 byte) keys (see the newer "CIFS_DUMP_FULL_KEY"
+ * ioctl for dumping decryption info for GCM256 mounts)
+ */
struct smb3_key_debug_info {
__u64 Suid;
__u16 cipher_type;
__u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE];
} __packed;
+/*
+ * Dump full key (32 byte encrypt/decrypt keys instead of 16 bytes)
+ * is needed if GCM256 (stronger encryption) negotiated
+ */
+struct smb3_full_key_debug_info {
+ __u64 Suid;
+ __u16 cipher_type;
+ __u8 auth_key[16]; /* SMB2_NTLMV2_SESSKEY_SIZE */
+ __u8 smb3encryptionkey[32]; /* SMB3_ENC_DEC_KEY_SIZE */
+ __u8 smb3decryptionkey[32]; /* SMB3_ENC_DEC_KEY_SIZE */
+} __packed;
+
struct smb3_notify {
__u32 completion_filter;
bool watch_tree;
#define CIFS_QUERY_INFO _IOWR(CIFS_IOCTL_MAGIC, 7, struct smb_query_info)
#define CIFS_DUMP_KEY _IOWR(CIFS_IOCTL_MAGIC, 8, struct smb3_key_debug_info)
#define CIFS_IOC_NOTIFY _IOW(CIFS_IOCTL_MAGIC, 9, struct smb3_notify)
+#define CIFS_DUMP_FULL_KEY _IOWR(CIFS_IOCTL_MAGIC, 10, struct smb3_full_key_debug_info)
+#define CIFS_IOC_SHUTDOWN _IOR ('X', 125, __u32)
+
+/*
+ * Flags for going down operation
+ */
+#define CIFS_GOING_FLAGS_DEFAULT 0x0 /* going down */
+#define CIFS_GOING_FLAGS_LOGFLUSH 0x1 /* flush log but not data */
+#define CIFS_GOING_FLAGS_NOLOGFLUSH 0x2 /* don't flush log nor data */
+
+static inline bool cifs_forced_shutdown(struct cifs_sb_info *sbi)
+{
+ if (CIFS_MOUNT_SHUTDOWN & sbi->mnt_cifs_flags)
+ return true;
+ else
+ return false;
+}
bool linuxExtEnabled = true;
bool lookupCacheEnabled = true;
bool disable_legacy_dialects; /* false by default */
-bool enable_gcm_256; /* false by default, change when more servers support it */
+bool enable_gcm_256 = true;
bool require_gcm_256; /* false by default */
unsigned int global_secflags = CIFSSEC_DEF;
/* unsigned int ntlmv2_support = 0; */
struct workqueue_struct *decrypt_wq;
struct workqueue_struct *fileinfo_put_wq;
struct workqueue_struct *cifsoplockd_wq;
+struct workqueue_struct *deferredclose_wq;
__u32 cifs_lock_secret;
/*
/* cifs_inode->vfs_inode.i_flags = S_NOATIME | S_NOCMTIME; */
INIT_LIST_HEAD(&cifs_inode->openFileList);
INIT_LIST_HEAD(&cifs_inode->llist);
+ INIT_LIST_HEAD(&cifs_inode->deferred_closes);
+ spin_lock_init(&cifs_inode->deferred_lock);
return &cifs_inode->vfs_inode;
}
goto out;
}
- /* cifs_setup_volume_info->smb3_parse_devname() redups UNC & prepath */
- kfree(cifs_sb->ctx->UNC);
- cifs_sb->ctx->UNC = NULL;
- kfree(cifs_sb->ctx->prepath);
- cifs_sb->ctx->prepath = NULL;
-
- rc = cifs_setup_volume_info(cifs_sb->ctx, NULL, old_ctx->UNC);
+ rc = cifs_setup_volume_info(cifs_sb->ctx, NULL, NULL);
if (rc) {
root = ERR_PTR(rc);
goto out;
goto out_destroy_fileinfo_put_wq;
}
+ deferredclose_wq = alloc_workqueue("deferredclose",
+ WQ_FREEZABLE|WQ_MEM_RECLAIM, 0);
+ if (!deferredclose_wq) {
+ rc = -ENOMEM;
+ goto out_destroy_cifsoplockd_wq;
+ }
+
rc = cifs_fscache_register();
if (rc)
- goto out_destroy_cifsoplockd_wq;
+ goto out_destroy_deferredclose_wq;
rc = cifs_init_inodecache();
if (rc)
cifs_destroy_inodecache();
out_unreg_fscache:
cifs_fscache_unregister();
+out_destroy_deferredclose_wq:
+ destroy_workqueue(deferredclose_wq);
out_destroy_cifsoplockd_wq:
destroy_workqueue(cifsoplockd_wq);
out_destroy_fileinfo_put_wq:
cifs_destroy_mids();
cifs_destroy_inodecache();
cifs_fscache_unregister();
+ destroy_workqueue(deferredclose_wq);
destroy_workqueue(cifsoplockd_wq);
destroy_workqueue(decrypt_wq);
destroy_workqueue(fileinfo_put_wq);
__u32 oplock;
};
+struct cifs_deferred_close {
+ struct list_head dlist;
+ struct tcon_link *tlink;
+ __u16 netfid;
+ __u64 persistent_fid;
+ __u64 volatile_fid;
+};
+
/*
* This info hangs off the cifsFileInfo structure, pointed to by llist.
* This is used to track byte stream locks on the file
struct cifs_search_info srch_inf;
struct work_struct oplock_break; /* work for oplock breaks */
struct work_struct put; /* work for the final part of _put */
+ struct delayed_work deferred;
+ bool oplock_break_received; /* Flag to indicate oplock break */
+ bool deferred_scheduled;
};
struct cifs_io_parms {
#define CIFS_INO_DELETE_PENDING (3) /* delete pending on server */
#define CIFS_INO_INVALID_MAPPING (4) /* pagecache is invalid */
#define CIFS_INO_LOCK (5) /* lock bit for synchronization */
+#define CIFS_INO_MODIFIED_ATTR (6) /* Indicate change in mtime/ctime */
unsigned long flags;
spinlock_t writers_lock;
unsigned int writers; /* Number of writers on this inode */
struct fscache_cookie *fscache;
#endif
struct inode vfs_inode;
+ struct list_head deferred_closes; /* list of deferred closes */
+ spinlock_t deferred_lock; /* protection on deferred list */
};
static inline struct cifsInodeInfo *
void cifs_oplock_break(struct work_struct *work);
void cifs_queue_oplock_break(struct cifsFileInfo *cfile);
+void smb2_deferred_work_close(struct work_struct *work);
+extern const struct slow_work_ops cifs_oplock_break_ops;
extern struct workqueue_struct *cifsiod_wq;
extern struct workqueue_struct *decrypt_wq;
extern struct workqueue_struct *fileinfo_put_wq;
extern struct workqueue_struct *cifsoplockd_wq;
+extern struct workqueue_struct *deferredclose_wq;
extern __u32 cifs_lock_secret;
extern mempool_t *cifs_mid_poolp;
struct tcon_link *tlink,
struct cifs_pending_open *open);
extern void cifs_del_pending_open(struct cifs_pending_open *open);
+
+extern bool cifs_is_deferred_close(struct cifsFileInfo *cfile,
+ struct cifs_deferred_close **dclose);
+
+extern void cifs_add_deferred_close(struct cifsFileInfo *cfile,
+ struct cifs_deferred_close *dclose);
+
+extern void cifs_del_deferred_close(struct cifsFileInfo *cfile);
+
+extern void cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode);
+
+extern void cifs_close_all_deferred_files(struct cifs_tcon *cifs_tcon);
+
extern struct TCP_Server_Info *cifs_get_tcp_session(struct smb3_fs_context *ctx);
extern void cifs_put_tcp_session(struct TCP_Server_Info *server,
int from_reconnect);
int rc;
struct TCP_Server_Info *server = container_of(work,
struct TCP_Server_Info, echo.work);
- unsigned long echo_interval;
-
- /*
- * If we need to renegotiate, set echo interval to zero to
- * immediately call echo service where we can renegotiate.
- */
- if (server->tcpStatus == CifsNeedNegotiate)
- echo_interval = 0;
- else
- echo_interval = server->echo_interval;
/*
* We cannot send an echo if it is disabled.
server->tcpStatus == CifsExiting ||
server->tcpStatus == CifsNew ||
(server->ops->can_echo && !server->ops->can_echo(server)) ||
- time_before(jiffies, server->lstrp + echo_interval - HZ))
+ time_before(jiffies, server->lstrp + server->echo_interval - HZ))
goto requeue_echo;
rc = server->ops->echo ? server->ops->echo(server) : -ENOSYS;
*/
if ((server->tcpStatus == CifsGood ||
server->tcpStatus == CifsNeedNegotiate) &&
+ (!server->ops->can_echo || server->ops->can_echo(server)) &&
time_after(jiffies, server->lstrp + 3 * server->echo_interval)) {
cifs_server_dbg(VFS, "has not responded in %lu seconds. Reconnecting...\n",
(3 * server->echo_interval) / HZ);
int
cifs_setup_volume_info(struct smb3_fs_context *ctx, const char *mntopts, const char *devname)
{
- int rc = 0;
+ int rc;
- smb3_parse_devname(devname, ctx);
+ if (devname) {
+ cifs_dbg(FYI, "%s: devname=%s\n", __func__, devname);
+ rc = smb3_parse_devname(devname, ctx);
+ if (rc) {
+ cifs_dbg(VFS, "%s: failed to parse %s: %d\n", __func__, devname, rc);
+ return rc;
+ }
+ }
if (mntopts) {
char *ip;
- cifs_dbg(FYI, "%s: mntopts=%s\n", __func__, mntopts);
rc = smb3_parse_opt(mntopts, "ip", &ip);
- if (!rc && !cifs_convert_address((struct sockaddr *)&ctx->dstaddr, ip,
- strlen(ip))) {
+ if (rc) {
+ cifs_dbg(VFS, "%s: failed to parse ip options: %d\n", __func__, rc);
+ return rc;
+ }
+
+ rc = cifs_convert_address((struct sockaddr *)&ctx->dstaddr, ip, strlen(ip));
+ kfree(ip);
+ if (!rc) {
cifs_dbg(VFS, "%s: failed to convert ip address\n", __func__);
return -EINVAL;
}
return -EINVAL;
}
- return rc;
+ return 0;
}
static int
#include "cifs_fs_sb.h"
#include "cifs_unicode.h"
#include "fs_context.h"
+#include "cifs_ioctl.h"
static void
renew_parental_timestamps(struct dentry *direntry)
__u32 oplock;
struct cifsFileInfo *file_info;
+ if (unlikely(cifs_forced_shutdown(CIFS_SB(inode->i_sb))))
+ return -EIO;
+
/*
* Posix open is only called (at lookup time) for file create now. For
* opens (rather than creates), because we do not know if it is a file
cifs_dbg(FYI, "cifs_create parent inode = 0x%p name is: %pd and dentry = 0x%p\n",
inode, direntry, direntry);
+ if (unlikely(cifs_forced_shutdown(CIFS_SB(inode->i_sb))))
+ return -EIO;
+
tlink = cifs_sb_tlink(CIFS_SB(inode->i_sb));
rc = PTR_ERR(tlink);
if (IS_ERR(tlink))
return -EINVAL;
cifs_sb = CIFS_SB(inode->i_sb);
+ if (unlikely(cifs_forced_shutdown(cifs_sb)))
+ return -EIO;
+
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink))
return PTR_ERR(tlink);
#include "fscache.h"
#include "smbdirect.h"
#include "fs_context.h"
+#include "cifs_ioctl.h"
static inline int cifs_convert_flags(unsigned int flags)
{
cfile->dentry = dget(dentry);
cfile->f_flags = file->f_flags;
cfile->invalidHandle = false;
+ cfile->oplock_break_received = false;
+ cfile->deferred_scheduled = false;
cfile->tlink = cifs_get_tlink(tlink);
INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
INIT_WORK(&cfile->put, cifsFileInfo_put_work);
+ INIT_DELAYED_WORK(&cfile->deferred, smb2_deferred_work_close);
mutex_init(&cfile->fh_mutex);
spin_lock_init(&cfile->file_info_lock);
xid = get_xid();
cifs_sb = CIFS_SB(inode->i_sb);
+ if (unlikely(cifs_forced_shutdown(cifs_sb))) {
+ free_xid(xid);
+ return -EIO;
+ }
+
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink)) {
free_xid(xid);
file->f_op = &cifs_file_direct_ops;
}
+ spin_lock(&CIFS_I(inode)->deferred_lock);
+ /* Get the cached handle as SMB2 close is deferred */
+ rc = cifs_get_readable_path(tcon, full_path, &cfile);
+ if (rc == 0) {
+ if (file->f_flags == cfile->f_flags) {
+ file->private_data = cfile;
+ cifs_del_deferred_close(cfile);
+ spin_unlock(&CIFS_I(inode)->deferred_lock);
+ goto out;
+ } else {
+ spin_unlock(&CIFS_I(inode)->deferred_lock);
+ _cifsFileInfo_put(cfile, true, false);
+ }
+ } else {
+ spin_unlock(&CIFS_I(inode)->deferred_lock);
+ }
+
if (server->oplocks)
oplock = REQ_OPLOCK;
else
return rc;
}
+void smb2_deferred_work_close(struct work_struct *work)
+{
+ struct cifsFileInfo *cfile = container_of(work,
+ struct cifsFileInfo, deferred.work);
+
+ spin_lock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
+ if (!cfile->deferred_scheduled) {
+ spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
+ return;
+ }
+ cifs_del_deferred_close(cfile);
+ cfile->deferred_scheduled = false;
+ spin_unlock(&CIFS_I(d_inode(cfile->dentry))->deferred_lock);
+ _cifsFileInfo_put(cfile, true, false);
+}
+
int cifs_close(struct inode *inode, struct file *file)
{
+ struct cifsFileInfo *cfile;
+ struct cifsInodeInfo *cinode = CIFS_I(inode);
+ struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
+ struct cifs_deferred_close *dclose;
+
if (file->private_data != NULL) {
- _cifsFileInfo_put(file->private_data, true, false);
+ cfile = file->private_data;
file->private_data = NULL;
+ dclose = kmalloc(sizeof(struct cifs_deferred_close), GFP_KERNEL);
+ if ((cinode->oplock == CIFS_CACHE_RHW_FLG) &&
+ dclose) {
+ if (test_bit(CIFS_INO_MODIFIED_ATTR, &cinode->flags))
+ inode->i_ctime = inode->i_mtime = current_time(inode);
+ spin_lock(&cinode->deferred_lock);
+ cifs_add_deferred_close(cfile, dclose);
+ if (cfile->deferred_scheduled) {
+ mod_delayed_work(deferredclose_wq,
+ &cfile->deferred, cifs_sb->ctx->acregmax);
+ } else {
+ /* Deferred close for files */
+ queue_delayed_work(deferredclose_wq,
+ &cfile->deferred, cifs_sb->ctx->acregmax);
+ cfile->deferred_scheduled = true;
+ spin_unlock(&cinode->deferred_lock);
+ return 0;
+ }
+ spin_unlock(&cinode->deferred_lock);
+ _cifsFileInfo_put(cfile, true, false);
+ } else {
+ _cifsFileInfo_put(cfile, true, false);
+ kfree(dclose);
+ }
}
/* return code from the ->release op is always ignored */
if (total_written > 0) {
spin_lock(&d_inode(dentry)->i_lock);
- if (*offset > d_inode(dentry)->i_size)
+ if (*offset > d_inode(dentry)->i_size) {
i_size_write(d_inode(dentry), *offset);
+ d_inode(dentry)->i_blocks = (512 - 1 + *offset) >> 9;
+ }
spin_unlock(&d_inode(dentry)->i_lock);
}
mark_inode_dirty_sync(d_inode(dentry));
if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
continue;
if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
- if (!open_file->invalidHandle) {
+ if ((!open_file->invalidHandle) &&
+ (!open_file->oplock_break_received)) {
/* found a good file */
/* lock it so it will not be closed on us */
cifsFileInfo_get(open_file);
if (cfile)
cifsFileInfo_put(cfile);
free_xid(xid);
+ /* Indication to update ctime and mtime as close is deferred */
+ set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
return rc;
}
if (rc > 0) {
spin_lock(&inode->i_lock);
- if (pos > inode->i_size)
+ if (pos > inode->i_size) {
i_size_write(inode, pos);
+ inode->i_blocks = (512 - 1 + pos) >> 9;
+ }
spin_unlock(&inode->i_lock);
}
unlock_page(page);
put_page(page);
+ /* Indication to update ctime and mtime as close is deferred */
+ set_bit(CIFS_INO_MODIFIED_ATTR, &CIFS_I(inode)->flags);
return rc;
}
struct TCP_Server_Info *server = tcon->ses->server;
int rc = 0;
bool purge_cache = false;
+ bool is_deferred = false;
+ struct cifs_deferred_close *dclose;
wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
TASK_UNINTERRUPTIBLE);
cinode);
cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
}
+ /*
+ * When oplock break is received and there are no active
+ * file handles but cached, then set the flag oplock_break_received.
+ * So, new open will not use cached handle.
+ */
+ spin_lock(&CIFS_I(inode)->deferred_lock);
+ is_deferred = cifs_is_deferred_close(cfile, &dclose);
+ if (is_deferred && cfile->deferred_scheduled) {
+ cfile->oplock_break_received = true;
+ mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
+ }
+ spin_unlock(&CIFS_I(inode)->deferred_lock);
_cifsFileInfo_put(cfile, false /* do not wait for ourself */, false);
cifs_done_oplock_break(cinode);
}
/* move "pos" up to delimiter or NULL */
pos += len;
+ kfree(ctx->UNC);
ctx->UNC = kstrndup(devname, pos - devname, GFP_KERNEL);
if (!ctx->UNC)
return -ENOMEM;
if (*pos == '/' || *pos == '\\')
pos++;
+ kfree(ctx->prepath);
+ ctx->prepath = NULL;
+
/* If pos is NULL then no prepath */
if (!*pos)
return 0;
cifs_dbg(VFS, "mount options mfsymlinks and sfu both enabled\n");
}
}
+ cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SHUTDOWN;
return;
}
#include <linux/sched/signal.h>
#include <linux/wait_bit.h>
#include <linux/fiemap.h>
-
#include <asm/div64.h>
#include "cifsfs.h"
#include "cifspdu.h"
#include "cifs_unicode.h"
#include "fscache.h"
#include "fs_context.h"
-
+#include "cifs_ioctl.h"
static void cifs_set_ops(struct inode *inode)
{
cifs_dbg(FYI, "cifs_unlink, dir=0x%p, dentry=0x%p\n", dir, dentry);
+ if (unlikely(cifs_forced_shutdown(cifs_sb)))
+ return -EIO;
+
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink))
return PTR_ERR(tlink);
goto unlink_out;
}
+ cifs_close_all_deferred_files(tcon);
if (cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
le64_to_cpu(tcon->fsUnixInfo.Capability))) {
rc = CIFSPOSIXDelFile(xid, tcon, full_path,
mode, inode);
cifs_sb = CIFS_SB(inode->i_sb);
+ if (unlikely(cifs_forced_shutdown(cifs_sb)))
+ return -EIO;
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink))
return PTR_ERR(tlink);
}
cifs_sb = CIFS_SB(inode->i_sb);
+ if (unlikely(cifs_forced_shutdown(cifs_sb))) {
+ rc = -EIO;
+ goto rmdir_exit;
+ }
+
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink)) {
rc = PTR_ERR(tlink);
return -EINVAL;
cifs_sb = CIFS_SB(source_dir->i_sb);
+ if (unlikely(cifs_forced_shutdown(cifs_sb)))
+ return -EIO;
+
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink))
return PTR_ERR(tlink);
goto cifs_rename_exit;
}
+ cifs_close_all_deferred_files(tcon);
rc = cifs_do_rename(xid, source_dentry, from_name, target_dentry,
to_name);
struct inode *inode = d_inode(dentry);
int rc;
+ if (unlikely(cifs_forced_shutdown(CIFS_SB(inode->i_sb))))
+ return -EIO;
+
/*
* We need to be sure that all dirty pages are written and the server
* has actual ctime, mtime and file length.
struct cifsFileInfo *cfile;
int rc;
+ if (unlikely(cifs_forced_shutdown(cifs_sb)))
+ return -EIO;
+
/*
* We need to be sure that all dirty pages are written as they
* might fill holes on the server.
struct cifs_tcon *pTcon = cifs_sb_master_tcon(cifs_sb);
int rc, retries = 0;
+ if (unlikely(cifs_forced_shutdown(cifs_sb)))
+ return -EIO;
+
do {
if (pTcon->unix_ext)
rc = cifs_setattr_unix(direntry, attrs);
return rc;
}
+static int cifs_shutdown(struct super_block *sb, unsigned long arg)
+{
+ struct cifs_sb_info *sbi = CIFS_SB(sb);
+ __u32 flags;
+
+ if (!capable(CAP_SYS_ADMIN))
+ return -EPERM;
+
+ if (get_user(flags, (__u32 __user *)arg))
+ return -EFAULT;
+
+ if (flags > CIFS_GOING_FLAGS_NOLOGFLUSH)
+ return -EINVAL;
+
+ if (cifs_forced_shutdown(sbi))
+ return 0;
+
+ cifs_dbg(VFS, "shut down requested (%d)", flags);
+/* trace_cifs_shutdown(sb, flags);*/
+
+ /*
+ * see:
+ * https://man7.org/linux/man-pages/man2/ioctl_xfs_goingdown.2.html
+ * for more information and description of original intent of the flags
+ */
+ switch (flags) {
+ /*
+ * We could add support later for default flag which requires:
+ * "Flush all dirty data and metadata to disk"
+ * would need to call syncfs or equivalent to flush page cache for
+ * the mount and then issue fsync to server (if nostrictsync not set)
+ */
+ case CIFS_GOING_FLAGS_DEFAULT:
+ cifs_dbg(FYI, "shutdown with default flag not supported\n");
+ return -EINVAL;
+ /*
+ * FLAGS_LOGFLUSH is easy since it asks to write out metadata (not
+ * data) but metadata writes are not cached on the client, so can treat
+ * it similarly to NOLOGFLUSH
+ */
+ case CIFS_GOING_FLAGS_LOGFLUSH:
+ case CIFS_GOING_FLAGS_NOLOGFLUSH:
+ sbi->mnt_cifs_flags |= CIFS_MOUNT_SHUTDOWN;
+ return 0;
+ default:
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int cifs_dump_full_key(struct cifs_tcon *tcon, unsigned long arg)
+{
+ struct smb3_full_key_debug_info pfull_key_inf;
+ __u64 suid;
+ struct list_head *tmp;
+ struct cifs_ses *ses;
+ bool found = false;
+
+ if (!smb3_encryption_required(tcon))
+ return -EOPNOTSUPP;
+
+ ses = tcon->ses; /* default to user id for current user */
+ if (get_user(suid, (__u64 __user *)arg))
+ suid = 0;
+ if (suid) {
+ /* search to see if there is a session with a matching SMB UID */
+ spin_lock(&cifs_tcp_ses_lock);
+ list_for_each(tmp, &tcon->ses->server->smb_ses_list) {
+ ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+ if (ses->Suid == suid) {
+ found = true;
+ break;
+ }
+ }
+ spin_unlock(&cifs_tcp_ses_lock);
+ if (found == false)
+ return -EINVAL;
+ } /* else uses default user's SMB UID (ie current user) */
+
+ pfull_key_inf.cipher_type = le16_to_cpu(ses->server->cipher_type);
+ pfull_key_inf.Suid = ses->Suid;
+ memcpy(pfull_key_inf.auth_key, ses->auth_key.response,
+ 16 /* SMB2_NTLMV2_SESSKEY_SIZE */);
+ memcpy(pfull_key_inf.smb3decryptionkey, ses->smb3decryptionkey,
+ 32 /* SMB3_ENC_DEC_KEY_SIZE */);
+ memcpy(pfull_key_inf.smb3encryptionkey,
+ ses->smb3encryptionkey, 32 /* SMB3_ENC_DEC_KEY_SIZE */);
+ if (copy_to_user((void __user *)arg, &pfull_key_inf,
+ sizeof(struct smb3_full_key_debug_info)))
+ return -EFAULT;
+
+ return 0;
+}
+
long cifs_ioctl(struct file *filep, unsigned int command, unsigned long arg)
{
struct inode *inode = file_inode(filep);
else
rc = 0;
break;
+ /*
+ * Dump full key (32 bytes instead of 16 bytes) is
+ * needed if GCM256 (stronger encryption) negotiated
+ */
+ case CIFS_DUMP_FULL_KEY:
+ if (pSMBFile == NULL)
+ break;
+ if (!capable(CAP_SYS_ADMIN)) {
+ rc = -EACCES;
+ break;
+ }
+ tcon = tlink_tcon(pSMBFile->tlink);
+ rc = cifs_dump_full_key(tcon, arg);
+
+ break;
case CIFS_IOC_NOTIFY:
if (!S_ISDIR(inode->i_mode)) {
/* Notify can only be done on directories */
rc = -EOPNOTSUPP;
cifs_put_tlink(tlink);
break;
+ case CIFS_IOC_SHUTDOWN:
+ rc = cifs_shutdown(inode->i_sb, arg);
+ break;
default:
cifs_dbg(FYI, "unsupported ioctl\n");
break;
#include "cifs_fs_sb.h"
#include "cifs_unicode.h"
#include "smb2proto.h"
+#include "cifs_ioctl.h"
/*
* M-F Symlink Functions - Begin
struct TCP_Server_Info *server;
struct cifsInodeInfo *cifsInode;
+ if (unlikely(cifs_forced_shutdown(cifs_sb)))
+ return -EIO;
+
tlink = cifs_sb_tlink(cifs_sb);
if (IS_ERR(tlink))
return PTR_ERR(tlink);
struct tcon_link *tlink;
struct cifs_tcon *pTcon;
const char *full_path;
- void *page = alloc_dentry_path();
+ void *page;
struct inode *newinode = NULL;
+ if (unlikely(cifs_forced_shutdown(cifs_sb)))
+ return -EIO;
+
+ page = alloc_dentry_path();
+ if (!page)
+ return -ENOMEM;
+
xid = get_xid();
tlink = cifs_sb_tlink(cifs_sb);
spin_unlock(&tlink_tcon(open->tlink)->open_file_lock);
}
+bool
+cifs_is_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close **pdclose)
+{
+ struct cifs_deferred_close *dclose;
+
+ list_for_each_entry(dclose, &CIFS_I(d_inode(cfile->dentry))->deferred_closes, dlist) {
+ if ((dclose->netfid == cfile->fid.netfid) &&
+ (dclose->persistent_fid == cfile->fid.persistent_fid) &&
+ (dclose->volatile_fid == cfile->fid.volatile_fid)) {
+ *pdclose = dclose;
+ return true;
+ }
+ }
+ return false;
+}
+
+void
+cifs_add_deferred_close(struct cifsFileInfo *cfile, struct cifs_deferred_close *dclose)
+{
+ bool is_deferred = false;
+ struct cifs_deferred_close *pdclose;
+
+ is_deferred = cifs_is_deferred_close(cfile, &pdclose);
+ if (is_deferred) {
+ kfree(dclose);
+ return;
+ }
+
+ dclose->tlink = cfile->tlink;
+ dclose->netfid = cfile->fid.netfid;
+ dclose->persistent_fid = cfile->fid.persistent_fid;
+ dclose->volatile_fid = cfile->fid.volatile_fid;
+ list_add_tail(&dclose->dlist, &CIFS_I(d_inode(cfile->dentry))->deferred_closes);
+}
+
+void
+cifs_del_deferred_close(struct cifsFileInfo *cfile)
+{
+ bool is_deferred = false;
+ struct cifs_deferred_close *dclose;
+
+ is_deferred = cifs_is_deferred_close(cfile, &dclose);
+ if (!is_deferred)
+ return;
+ list_del(&dclose->dlist);
+ kfree(dclose);
+}
+
+void
+cifs_close_deferred_file(struct cifsInodeInfo *cifs_inode)
+{
+ struct cifsFileInfo *cfile = NULL;
+ struct cifs_deferred_close *dclose;
+
+ list_for_each_entry(cfile, &cifs_inode->openFileList, flist) {
+ spin_lock(&cifs_inode->deferred_lock);
+ if (cifs_is_deferred_close(cfile, &dclose))
+ mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
+ spin_unlock(&cifs_inode->deferred_lock);
+ }
+}
+
+void
+cifs_close_all_deferred_files(struct cifs_tcon *tcon)
+{
+ struct cifsFileInfo *cfile;
+ struct cifsInodeInfo *cinode;
+ struct list_head *tmp;
+
+ spin_lock(&tcon->open_file_lock);
+ list_for_each(tmp, &tcon->openFileList) {
+ cfile = list_entry(tmp, struct cifsFileInfo, tlist);
+ cinode = CIFS_I(d_inode(cfile->dentry));
+ if (delayed_work_pending(&cfile->deferred))
+ mod_delayed_work(deferredclose_wq, &cfile->deferred, 0);
+ }
+ spin_unlock(&tcon->open_file_lock);
+}
+
/* parses DFS refferal V3 structure
* caller is responsible for freeing target_nodes
* returns:
#include "cifs_debug.h"
#include "cifs_fs_sb.h"
#include "cifs_unicode.h"
+#include "cifs_ioctl.h"
#define MAX_EA_VALUE_SIZE CIFSMaxBufSize
#define CIFS_XATTR_CIFS_ACL "system.cifs_acl" /* DACL only */
const char *full_path;
void *page;
+ if (unlikely(cifs_forced_shutdown(cifs_sb)))
+ return -EIO;
+
if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_XATTR)
return -EOPNOTSUPP;
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset:8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* configfs_internal.h - Internal stuff for configfs
*
* Based on sysfs:
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* dir.c - Operations for configfs directories.
*
* Based on sysfs:
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* file.c - operations for regular (text) files.
*
* Based on sysfs:
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* inode.c - basic inode and dentry operations.
*
* Based on sysfs:
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* item.c - library routines for handling generic config items
*
* Based on kobject:
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* mount.c - operations for initializing and mounting configfs.
*
* Based on sysfs:
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* symlink.c - operations for configfs symlinks.
*
* Based on sysfs:
dax_disassociate_entry(entry, mapping, false);
xas_store(xas, NULL); /* undo the PMD join */
dax_wake_entry(xas, entry, true);
- mapping->nrexceptional--;
+ mapping->nrpages -= PG_PMD_NR;
entry = NULL;
xas_set(xas, index);
}
dax_lock_entry(xas, entry);
if (xas_error(xas))
goto out_unlock;
- mapping->nrexceptional++;
+ mapping->nrpages += 1UL << order;
}
out_unlock:
goto out;
dax_disassociate_entry(entry, mapping, trunc);
xas_store(&xas, NULL);
- mapping->nrexceptional--;
+ mapping->nrpages -= 1UL << dax_entry_order(entry);
ret = 1;
out:
put_unlocked_entry(&xas, entry);
if (WARN_ON_ONCE(inode->i_blkbits != PAGE_SHIFT))
return -EIO;
- if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL)
+ if (mapping_empty(mapping) || wbc->sync_mode != WB_SYNC_ALL)
return 0;
trace_dax_writeback_range(inode, xas.xa_index, end_index);
// SPDX-License-Identifier: GPL-2.0-or-later
-/**
+/*
* eCryptfs: Linux filesystem encryption layer
*
* Copyright (C) 1997-2004 Erez Zadok
return rc;
}
-/**
+/*
* lower_offset_for_page
*
* Convert an eCryptfs page index into a lower byte offset
rc = crypt_extent(crypt_stat, page, page,
extent_offset, DECRYPT);
if (rc) {
- printk(KERN_ERR "%s: Error encrypting extent; "
+ printk(KERN_ERR "%s: Error decrypting extent; "
"rc = [%d]\n", __func__, rc);
goto out;
}
}
}
-/**
+/*
* ecryptfs_compute_root_iv
- * @crypt_stats
*
* On error, sets the root IV to all 0's.
*/
return rc;
}
-/**
+/*
* ecryptfs_read_metadata
*
* Common entry point for reading file metadata. From here, we could
return rc;
}
-/**
+/*
* ecryptfs_encrypt_filename - encrypt filename
*
* CBC-encrypts the filename. We do not want to encrypt the same
struct kmem_cache *ecryptfs_key_tfm_cache;
static struct list_head key_tfm_list;
-struct mutex key_tfm_list_mutex;
+DEFINE_MUTEX(key_tfm_list_mutex);
int __init ecryptfs_init_crypto(void)
{
- mutex_init(&key_tfm_list_mutex);
INIT_LIST_HEAD(&key_tfm_list);
return 0;
}
/**
* ecryptfs_encrypt_and_encode_filename - converts a plaintext file name to cipher text
- * @crypt_stat: The crypt_stat struct associated with the file anem to encode
+ * @encoded_name: The encrypted name
+ * @encoded_name_size: Length of the encrypted name
+ * @mount_crypt_stat: The crypt_stat struct associated with the file name to encode
* @name: The plaintext name
- * @length: The length of the plaintext
- * @encoded_name: The encypted name
+ * @name_size: The length of the plaintext name
*
* Encrypts and encodes a filename into something that constitutes a
* valid filename for a filesystem, with printable characters.
* ecryptfs_decode_and_decrypt_filename - converts the encoded cipher text name to decoded plaintext
* @plaintext_name: The plaintext name
* @plaintext_name_size: The plaintext name size
- * @ecryptfs_dir_dentry: eCryptfs directory dentry
+ * @sb: Ecryptfs's super_block
* @name: The filename in cipher text
* @name_size: The cipher text name size
*
// SPDX-License-Identifier: GPL-2.0-or-later
-/**
+/*
* eCryptfs: Linux filesystem encryption layer
* Functions only useful for debugging.
*
#include "ecryptfs_kernel.h"
-/**
+/*
* ecryptfs_dump_auth_tok - debug function to print auth toks
*
* This function will print the contents of an ecryptfs authentication
// SPDX-License-Identifier: GPL-2.0-or-later
-/**
+/*
* eCryptfs: Linux filesystem encryption layer
*
* Copyright (C) 1997-2003 Erez Zadok
}
#define ecryptfs_printk(type, fmt, arg...) \
- __ecryptfs_printk(type "%s: " fmt, __func__, ## arg);
+ __ecryptfs_printk(type "%s: " fmt, __func__, ## arg)
__printf(1, 2)
void __ecryptfs_printk(const char *fmt, ...);
// SPDX-License-Identifier: GPL-2.0-or-later
-/**
+/*
* eCryptfs: Linux filesystem encryption layer
*
* Copyright (C) 1997-2004 Erez Zadok
#include <linux/fs_stack.h>
#include "ecryptfs_kernel.h"
-/**
+/*
* ecryptfs_read_update_atime
*
* generic_file_read updates the atime of upper layer inode. But, it
// SPDX-License-Identifier: GPL-2.0-or-later
-/**
+/*
* eCryptfs: Linux filesystem encryption layer
*
* Copyright (C) 1997-2004 Erez Zadok
return inode;
}
-/**
+/*
* ecryptfs_initialize_file
*
* Cause the file to be changed from a basic empty file to an ecryptfs
return rc;
}
-/**
+/*
* ecryptfs_create
- * @dir: The inode of the directory in which to create the file.
- * @dentry: The eCryptfs dentry
* @mode: The mode of the new file.
*
* Creates a new file.
return 0;
}
-/**
+/*
* ecryptfs_lookup_interpose - Dentry interposition for a lookup
*/
static struct dentry *ecryptfs_lookup_interpose(struct dentry *dentry,
/**
* ecryptfs_setattr
+ * @mnt_userns: user namespace of the target mount
* @dentry: dentry handle to the inode to modify
* @ia: Structure with flags of what to change and values
*
// SPDX-License-Identifier: GPL-2.0-or-later
-/**
+/*
* eCryptfs: Linux filesystem encryption layer
* In-kernel key management code. Includes functions to parse and
* write authentication token-related packets with the underlying
#include <linux/slab.h>
#include "ecryptfs_kernel.h"
-/**
+/*
* request_key returned an error instead of a valid key address;
* determine the type of error, make appropriate log entries, and
* return an error code.
/**
* ecryptfs_find_auth_tok_for_sig
+ * @auth_tok_key: key containing the authentication token
* @auth_tok: Set to the matching auth_tok; NULL if not found
- * @crypt_stat: inode crypt_stat crypto context
+ * @mount_crypt_stat: inode crypt_stat crypto context
* @sig: Sig of auth_tok to find
*
* For now, this function simply looks at the registered auth_tok's
return rc;
}
-/**
+/*
* write_tag_70_packet can gobble a lot of stack space. We stuff most
* of the function's parameters in a kmalloc'd struct to help reduce
* eCryptfs' overall stack usage.
struct shash_desc *hash_desc;
};
-/**
+/*
* write_tag_70_packet - Write encrypted filename (EFN) packet against FNEK
* @filename: NULL-terminated filename string
*
};
/**
- * parse_tag_70_packet - Parse and process FNEK-encrypted passphrase packet
+ * ecryptfs_parse_tag_70_packet - Parse and process FNEK-encrypted passphrase packet
* @filename: This function kmalloc's the memory for the filename
* @filename_size: This function sets this to the amount of memory
* kmalloc'd for the filename
rc = ecryptfs_cipher_code_to_string(crypt_stat->cipher, cipher_code);
if (rc) {
ecryptfs_printk(KERN_ERR, "Cipher code [%d] is invalid\n",
- cipher_code)
+ cipher_code);
goto out;
}
crypt_stat->flags |= ECRYPTFS_KEY_VALID;
// SPDX-License-Identifier: GPL-2.0-or-later
-/**
+/*
* eCryptfs: Linux filesystem encryption layer
*
* Copyright (C) 2008 International Business Machines Corp.
* @lower_file: Result of dentry_open by root on lower dentry
* @lower_dentry: Lower dentry for file to open
* @lower_mnt: Lower vfsmount for file to open
+ * @cred: credential to use for this call
*
* This function gets a r/w file opened against the lower dentry.
*
// SPDX-License-Identifier: GPL-2.0-or-later
-/**
+/*
* eCryptfs: Linux filesystem encryption layer
*
* Copyright (C) 1997-2003 Erez Zadok
#include <linux/magic.h>
#include "ecryptfs_kernel.h"
-/**
+/*
* Module parameter that defines the ecryptfs_verbosity level.
*/
int ecryptfs_verbosity = 0;
"Initial verbosity level (0 or 1; defaults to "
"0, which is Quiet)");
-/**
+/*
* Module parameter that defines the number of message buffer elements
*/
unsigned int ecryptfs_message_buf_len = ECRYPTFS_DEFAULT_MSG_CTX_ELEMS;
MODULE_PARM_DESC(ecryptfs_message_buf_len,
"Number of message buffer elements");
-/**
+/*
* Module parameter that defines the maximum guaranteed amount of time to wait
* for a response from ecryptfsd. The actual sleep time will be, more than
* likely, a small amount greater than this specified value, but only less if
"sleep while waiting for a message response from "
"userspace");
-/**
+/*
* Module parameter that is an estimate of the maximum number of users
* that will be concurrently using eCryptfs. Set this to the right
* value to balance performance and memory use.
va_end(args);
}
-/**
+/*
* ecryptfs_init_lower_file
* @ecryptfs_dentry: Fully initialized eCryptfs dentry object, with
* the lower dentry and the lower mount set
/**
* ecryptfs_parse_options
- * @sb: The ecryptfs super block
+ * @sbi: The ecryptfs super block
* @options: The options passed to the kernel
* @check_ruid: set to 1 if device uid should be checked against the ruid
*
struct kmem_cache *ecryptfs_sb_info_cache;
static struct file_system_type ecryptfs_fs_type;
-/**
- * ecryptfs_get_sb
- * @fs_type
- * @flags
+/*
+ * ecryptfs_mount
+ * @fs_type: The filesystem type that the superblock should belong to
+ * @flags: The flags associated with the mount
* @dev_name: The path to mount over
* @raw_data: The options passed into the kernel
*/
goto out;
}
+ if (!dev_name) {
+ rc = -EINVAL;
+ err = "Device name cannot be null";
+ goto out;
+ }
+
rc = ecryptfs_parse_options(sbi, raw_data, &check_ruid);
if (rc) {
err = "Error parsing options";
};
MODULE_ALIAS_FS("ecryptfs");
-/**
+/*
* inode_info_init_once
*
* Initializes the ecryptfs_inode_info_cache when it is created
// SPDX-License-Identifier: GPL-2.0-only
-/**
+/*
* eCryptfs: Linux filesystem encryption layer
*
* Copyright (C) 2004-2008 International Business Machines Corp.
static LIST_HEAD(ecryptfs_msg_ctx_free_list);
static LIST_HEAD(ecryptfs_msg_ctx_alloc_list);
-static struct mutex ecryptfs_msg_ctx_lists_mux;
+static DEFINE_MUTEX(ecryptfs_msg_ctx_lists_mux);
static struct hlist_head *ecryptfs_daemon_hash;
-struct mutex ecryptfs_daemon_hash_mux;
+DEFINE_MUTEX(ecryptfs_daemon_hash_mux);
static int ecryptfs_hash_bits;
#define ecryptfs_current_euid_hash(uid) \
hash_long((unsigned long)from_kuid(&init_user_ns, current_euid()), ecryptfs_hash_bits)
return rc;
}
-/**
+/*
* ecryptfs_exorcise_daemon - Destroy the daemon struct
*
* Must be called ceremoniously while in possession of
}
/**
- * ecryptfs_process_reponse
+ * ecryptfs_process_response
+ * @daemon: eCryptfs daemon object
* @msg: The ecryptfs message received; the caller should sanity check
* msg->data_len and free the memory
* @seq: The sequence number of the message; must match the sequence
* ecryptfs_send_message_locked
* @data: The data to send
* @data_len: The length of data
+ * @msg_type: Type of message
* @msg_ctx: The message context allocated for the send
*
* Must be called with ecryptfs_daemon_hash_mux held.
"too large, defaulting to [%d] users\n", __func__,
ecryptfs_number_of_users);
}
- mutex_init(&ecryptfs_daemon_hash_mux);
mutex_lock(&ecryptfs_daemon_hash_mux);
ecryptfs_hash_bits = 1;
while (ecryptfs_number_of_users >> ecryptfs_hash_bits)
rc = -ENOMEM;
goto out;
}
- mutex_init(&ecryptfs_msg_ctx_lists_mux);
mutex_lock(&ecryptfs_msg_ctx_lists_mux);
ecryptfs_msg_counter = 0;
for (i = 0; i < ecryptfs_message_buf_len; i++) {
// SPDX-License-Identifier: GPL-2.0-only
-/**
+/*
* eCryptfs: Linux filesystem encryption layer
*
* Copyright (C) 2008 International Business Machines Corp.
/**
* ecryptfs_miscdev_response - miscdevess response to message previously sent to daemon
+ * @daemon: eCryptfs daemon object
* @data: Bytes comprising struct ecryptfs_message
* @data_size: sizeof(struct ecryptfs_message) + data len
* @seq: Sequence number for miscdev response packet
// SPDX-License-Identifier: GPL-2.0-or-later
-/**
+/*
* eCryptfs: Linux filesystem encryption layer
* This is where eCryptfs coordinates the symmetric encryption and
* decryption of the file data as it passes between the lower
#include <asm/unaligned.h>
#include "ecryptfs_kernel.h"
-/**
+/*
* ecryptfs_get_locked_page
*
* Get one page from cache or lower f/s, return error otherwise.
/**
* ecryptfs_writepage
* @page: Page that is locked before this call is made
+ * @wbc: Write-back control structure
*
* Returns zero on success; non-zero otherwise
*
}
}
-/**
+/*
* Header Extent:
* Octets 0-7: Unencrypted file size (big-endian)
* Octets 8-15: eCryptfs special marker
return rc;
}
-/**
+/*
* Called with lower inode mutex held.
*/
static int fill_zeros_to_end_of_page(struct page *page, unsigned int to)
return rc;
}
-/**
+/*
* ecryptfs_write_inode_size_to_header
*
* Writes the lower file size to the first 8 bytes of the header.
// SPDX-License-Identifier: GPL-2.0-or-later
-/**
+/*
* eCryptfs: Linux filesystem encryption layer
*
* Copyright (C) 2007 International Business Machines Corp.
* ecryptfs_read_lower_page_segment
* @page_for_ecryptfs: The page into which data for eCryptfs will be
* written
+ * @page_index: Page index in @page_for_ecryptfs from which to start
+ * writing
* @offset_in_page: Offset in @page_for_ecryptfs from which to start
* writing
* @size: The number of bytes to write into @page_for_ecryptfs
// SPDX-License-Identifier: GPL-2.0-or-later
-/**
+/*
* eCryptfs: Linux filesystem encryption layer
*
* Copyright (C) 1997-2003 Erez Zadok
/**
* ecryptfs_statfs
- * @sb: The ecryptfs super block
+ * @dentry: The ecryptfs dentry
* @buf: The struct kstatfs to fill in with stats
*
* Get the filesystem statistics. Currently, we let this pass right through
/**
* ecryptfs_evict_inode
- * @inode - The ecryptfs inode
+ * @inode: The ecryptfs inode
*
* Called by iput() when the inode reference count reached zero
* and the inode is not hashed anywhere. Used to clear anything
iput(ecryptfs_inode_to_lower(inode));
}
-/**
+/*
* ecryptfs_show_options
*
* Prints the mount options for a given superblock.
*/
list_splice(txlist, &ep->rdllist);
__pm_relax(ep->ws);
+
+ if (!list_empty(&ep->rdllist)) {
+ if (waitqueue_active(&ep->wq))
+ wake_up(&ep->wq);
+ }
+
write_unlock_irq(&ep->lock);
}
select CRYPTO_CRC32
select F2FS_FS_XATTR if FS_ENCRYPTION
select FS_ENCRYPTION_ALGS if FS_ENCRYPTION
+ select LZ4_COMPRESS if F2FS_FS_LZ4
+ select LZ4_DECOMPRESS if F2FS_FS_LZ4
+ select LZ4HC_COMPRESS if F2FS_FS_LZ4HC
+ select LZO_COMPRESS if F2FS_FS_LZO
+ select LZO_DECOMPRESS if F2FS_FS_LZO
+ select ZSTD_COMPRESS if F2FS_FS_ZSTD
+ select ZSTD_DECOMPRESS if F2FS_FS_ZSTD
help
F2FS is based on Log-structured File System (LFS), which supports
versatile "flash-friendly" features. The design has been focused on
config F2FS_FS_LZO
bool "LZO compression support"
depends on F2FS_FS_COMPRESSION
- select LZO_COMPRESS
- select LZO_DECOMPRESS
default y
help
Support LZO compress algorithm, if unsure, say Y.
config F2FS_FS_LZ4
bool "LZ4 compression support"
depends on F2FS_FS_COMPRESSION
- select LZ4_COMPRESS
- select LZ4_DECOMPRESS
default y
help
Support LZ4 compress algorithm, if unsure, say Y.
bool "LZ4HC compression support"
depends on F2FS_FS_COMPRESSION
depends on F2FS_FS_LZ4
- select LZ4HC_COMPRESS
default y
help
Support LZ4HC compress algorithm, LZ4HC has compatible on-disk
config F2FS_FS_ZSTD
bool "ZSTD compression support"
depends on F2FS_FS_COMPRESSION
- select ZSTD_COMPRESS
- select ZSTD_DECOMPRESS
default y
help
Support ZSTD compress algorithm, if unsure, say Y.
bool "LZO-RLE compression support"
depends on F2FS_FS_COMPRESSION
depends on F2FS_FS_LZO
- select LZO_COMPRESS
- select LZO_DECOMPRESS
default y
help
Support LZO-RLE compress algorithm, if unsure, say Y.
static inline int f2fs_acl_count(size_t size)
{
ssize_t s;
+
size -= sizeof(struct f2fs_acl_header);
s = size - 4 * sizeof(struct f2fs_acl_entry_short);
if (s < 0) {
orphan_blk = (struct f2fs_orphan_block *)page_address(page);
for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
+
err = recover_orphan_inode(sbi, ino);
if (err) {
f2fs_put_page(page, 1);
orphan_blocks);
if (__remain_node_summaries(cpc->reason))
- ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+
+ ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS +
cp_payload_blks + data_sum_blocks +
orphan_blocks + NR_CURSEG_NODE_TYPE);
else
llist_add(&req.llnode, &cprc->issue_list);
atomic_inc(&cprc->queued_ckpt);
- /* update issue_list before we wake up issue_checkpoint thread */
+ /*
+ * update issue_list before we wake up issue_checkpoint thread,
+ * this smp_mb() pairs with another barrier in ___wait_event(),
+ * see more details in comments of waitqueue_active().
+ */
smp_mb();
if (waitqueue_active(&cprc->ckpt_wait_queue))
return false;
if (IS_ATOMIC_WRITTEN_PAGE(page) || IS_DUMMY_WRITTEN_PAGE(page))
return false;
- /*
- * page->private may be set with pid.
- * pid_max is enough to check if it is traced.
- */
- if (IS_IO_TRACED_PAGE(page))
- return false;
f2fs_bug_on(F2FS_M_SB(page->mapping),
*((u32 *)page_private(page)) != F2FS_COMPRESSED_PAGE_MAGIC);
static bool __cluster_may_compress(struct compress_ctx *cc)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(cc->inode);
loff_t i_size = i_size_read(cc->inode);
unsigned nr_pages = DIV_ROUND_UP(i_size, PAGE_SIZE);
int i;
for (i = 0; i < cc->cluster_size; i++) {
struct page *page = cc->rpages[i];
- f2fs_bug_on(sbi, !page);
-
- if (unlikely(f2fs_cp_error(sbi)))
- return false;
- if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
- return false;
+ f2fs_bug_on(F2FS_I_SB(cc->inode), !page);
/* beyond EOF */
if (page->index >= nr_pages)
if (fio.compr_blocks)
f2fs_i_compr_blocks_update(inode, fio.compr_blocks - 1, false);
f2fs_i_compr_blocks_update(inode, cc->nr_cpages, true);
+ add_compr_block_stat(inode, cc->nr_cpages);
set_inode_flag(cc->inode, FI_APPEND_WRITE);
if (cc->cluster_idx == 0)
for (; count > 0; dn->ofs_in_node++) {
block_t blkaddr = f2fs_data_blkaddr(dn);
+
if (blkaddr == NULL_ADDR) {
dn->data_blkaddr = NEW_ADDR;
__set_data_blkaddr(dn);
return __get_data_block(inode, iblock, bh_result, create,
F2FS_GET_BLOCK_DIO, NULL,
f2fs_rw_hint_to_seg_type(inode->i_write_hint),
- IS_SWAPFILE(inode) ? false : true);
+ true);
}
static int get_data_block_dio(struct inode *inode, sector_t iblock,
int ret = 0;
bool compr_cluster = false;
unsigned int cluster_size = F2FS_I(inode)->i_cluster_size;
+ loff_t maxbytes;
if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
ret = f2fs_precache_extents(inode);
inode_lock(inode);
+ maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS;
+ if (start > maxbytes) {
+ ret = -EFBIG;
+ goto out;
+ }
+
+ if (len > maxbytes || (maxbytes - len) < start)
+ len = maxbytes - start;
+
if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
ret = f2fs_xattr_fiemap(inode, fieinfo);
goto out;
if (atomic_written) {
struct inmem_pages *cur;
+
list_for_each_entry(cur, &fi->inmem_pages, list)
if (cur->page == page) {
cur->page = newpage;
#endif
#ifdef CONFIG_SWAP
+static int f2fs_is_file_aligned(struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ block_t main_blkaddr = SM_I(sbi)->main_blkaddr;
+ block_t cur_lblock;
+ block_t last_lblock;
+ block_t pblock;
+ unsigned long nr_pblocks;
+ unsigned int blocks_per_sec = BLKS_PER_SEC(sbi);
+ int ret = 0;
+
+ cur_lblock = 0;
+ last_lblock = bytes_to_blks(inode, i_size_read(inode));
+
+ while (cur_lblock < last_lblock) {
+ struct f2fs_map_blocks map;
+
+ memset(&map, 0, sizeof(map));
+ map.m_lblk = cur_lblock;
+ map.m_len = last_lblock - cur_lblock;
+ map.m_next_pgofs = NULL;
+ map.m_next_extent = NULL;
+ map.m_seg_type = NO_CHECK_TYPE;
+ map.m_may_create = false;
+
+ ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP);
+ if (ret)
+ goto out;
+
+ /* hole */
+ if (!(map.m_flags & F2FS_MAP_FLAGS)) {
+ f2fs_err(sbi, "Swapfile has holes\n");
+ ret = -ENOENT;
+ goto out;
+ }
+
+ pblock = map.m_pblk;
+ nr_pblocks = map.m_len;
+
+ if ((pblock - main_blkaddr) & (blocks_per_sec - 1) ||
+ nr_pblocks & (blocks_per_sec - 1)) {
+ f2fs_err(sbi, "Swapfile does not align to section");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ cur_lblock += nr_pblocks;
+ }
+out:
+ return ret;
+}
+
static int check_swap_activate_fast(struct swap_info_struct *sis,
struct file *swap_file, sector_t *span)
{
struct address_space *mapping = swap_file->f_mapping;
struct inode *inode = mapping->host;
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
sector_t cur_lblock;
sector_t last_lblock;
sector_t pblock;
sector_t highest_pblock = 0;
int nr_extents = 0;
unsigned long nr_pblocks;
- u64 len;
- int ret;
+ unsigned int blocks_per_sec = BLKS_PER_SEC(sbi);
+ int ret = 0;
/*
* Map all the blocks into the extent list. This code doesn't try
*/
cur_lblock = 0;
last_lblock = bytes_to_blks(inode, i_size_read(inode));
- len = i_size_read(inode);
- while (cur_lblock <= last_lblock && cur_lblock < sis->max) {
+ while (cur_lblock < last_lblock && cur_lblock < sis->max) {
struct f2fs_map_blocks map;
- pgoff_t next_pgofs;
cond_resched();
memset(&map, 0, sizeof(map));
map.m_lblk = cur_lblock;
- map.m_len = bytes_to_blks(inode, len) - cur_lblock;
- map.m_next_pgofs = &next_pgofs;
+ map.m_len = last_lblock - cur_lblock;
+ map.m_next_pgofs = NULL;
+ map.m_next_extent = NULL;
map.m_seg_type = NO_CHECK_TYPE;
+ map.m_may_create = false;
ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP);
if (ret)
- goto err_out;
+ goto out;
/* hole */
- if (!(map.m_flags & F2FS_MAP_FLAGS))
- goto err_out;
+ if (!(map.m_flags & F2FS_MAP_FLAGS)) {
+ f2fs_err(sbi, "Swapfile has holes\n");
+ ret = -ENOENT;
+ goto out;
+ }
pblock = map.m_pblk;
nr_pblocks = map.m_len;
+ if ((pblock - SM_I(sbi)->main_blkaddr) & (blocks_per_sec - 1) ||
+ nr_pblocks & (blocks_per_sec - 1)) {
+ f2fs_err(sbi, "Swapfile does not align to section");
+ ret = -EINVAL;
+ goto out;
+ }
+
if (cur_lblock + nr_pblocks >= sis->max)
nr_pblocks = sis->max - cur_lblock;
sis->highest_bit = cur_lblock - 1;
out:
return ret;
-err_out:
- pr_err("swapon: swapfile has holes\n");
- return -EINVAL;
}
/* Copied from generic_swapfile_activate() to check any holes */
{
struct address_space *mapping = swap_file->f_mapping;
struct inode *inode = mapping->host;
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
unsigned blocks_per_page;
unsigned long page_no;
sector_t probe_block;
sector_t lowest_block = -1;
sector_t highest_block = 0;
int nr_extents = 0;
- int ret;
+ int ret = 0;
if (PAGE_SIZE == F2FS_BLKSIZE)
return check_swap_activate_fast(sis, swap_file, span);
+ ret = f2fs_is_file_aligned(inode);
+ if (ret)
+ goto out;
+
blocks_per_page = bytes_to_blks(inode, PAGE_SIZE);
/*
unsigned block_in_page;
sector_t first_block;
sector_t block = 0;
- int err = 0;
cond_resched();
block = probe_block;
- err = bmap(inode, &block);
- if (err || !block)
+ ret = bmap(inode, &block);
+ if (ret)
+ goto out;
+ if (!block)
goto bad_bmap;
first_block = block;
block_in_page++) {
block = probe_block + block_in_page;
- err = bmap(inode, &block);
-
- if (err || !block)
+ ret = bmap(inode, &block);
+ if (ret)
+ goto out;
+ if (!block)
goto bad_bmap;
if (block != first_block + block_in_page) {
out:
return ret;
bad_bmap:
- pr_err("swapon: swapfile has holes\n");
- return -EINVAL;
+ f2fs_err(sbi, "Swapfile has holes\n");
+ return -ENOENT;
}
static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
si->util_invalid = 50 - si->util_free - si->util_valid;
for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
struct curseg_info *curseg = CURSEG_I(sbi, i);
+
si->curseg[i] = curseg->segno;
si->cursec[i] = GET_SEC_FROM_SEG(sbi, curseg->segno);
si->curzone[i] = GET_ZONE_FROM_SEC(sbi, si->cursec[i]);
si->page_mem = 0;
if (sbi->node_inode) {
unsigned npages = NODE_MAPPING(sbi)->nrpages;
+
si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
}
if (sbi->meta_inode) {
unsigned npages = META_MAPPING(sbi)->nrpages;
+
si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
}
}
struct page *page, struct inode *inode)
{
enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA;
+
lock_page(page);
f2fs_wait_on_page_writeback(page, type, true, true);
de->ino = cpu_to_le32(inode->i_ino);
#define F2FS_MOUNT_NORECOVERY 0x04000000
#define F2FS_MOUNT_ATGC 0x08000000
#define F2FS_MOUNT_MERGE_CHECKPOINT 0x10000000
+#define F2FS_MOUNT_GC_MERGE 0x20000000
#define F2FS_OPTION(sbi) ((sbi)->mount_opt)
#define clear_opt(sbi, option) (F2FS_OPTION(sbi).opt &= ~F2FS_MOUNT_##option)
#define FADVISE_MODIFIABLE_BITS (FADVISE_COLD_BIT | FADVISE_HOT_BIT)
#define file_is_cold(inode) is_file(inode, FADVISE_COLD_BIT)
-#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT)
#define file_set_cold(inode) set_file(inode, FADVISE_COLD_BIT)
-#define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT)
#define file_clear_cold(inode) clear_file(inode, FADVISE_COLD_BIT)
+
+#define file_wrong_pino(inode) is_file(inode, FADVISE_LOST_PINO_BIT)
+#define file_lost_pino(inode) set_file(inode, FADVISE_LOST_PINO_BIT)
#define file_got_pino(inode) clear_file(inode, FADVISE_LOST_PINO_BIT)
+
#define file_is_encrypt(inode) is_file(inode, FADVISE_ENCRYPT_BIT)
#define file_set_encrypt(inode) set_file(inode, FADVISE_ENCRYPT_BIT)
-#define file_clear_encrypt(inode) clear_file(inode, FADVISE_ENCRYPT_BIT)
+
#define file_enc_name(inode) is_file(inode, FADVISE_ENC_NAME_BIT)
#define file_set_enc_name(inode) set_file(inode, FADVISE_ENC_NAME_BIT)
+
#define file_keep_isize(inode) is_file(inode, FADVISE_KEEP_SIZE_BIT)
#define file_set_keep_isize(inode) set_file(inode, FADVISE_KEEP_SIZE_BIT)
+
#define file_is_hot(inode) is_file(inode, FADVISE_HOT_BIT)
#define file_set_hot(inode) set_file(inode, FADVISE_HOT_BIT)
#define file_clear_hot(inode) clear_file(inode, FADVISE_HOT_BIT)
+
#define file_is_verity(inode) is_file(inode, FADVISE_VERITY_BIT)
#define file_set_verity(inode) set_file(inode, FADVISE_VERITY_BIT)
/* NAT cache management */
struct radix_tree_root nat_root;/* root of the nat entry cache */
struct radix_tree_root nat_set_root;/* root of the nat set cache */
- struct rw_semaphore nat_tree_lock; /* protect nat_tree_lock */
+ struct rw_semaphore nat_tree_lock; /* protect nat entry tree */
struct list_head nat_entries; /* cached nat entry list (clean) */
spinlock_t nat_list_lock; /* protect clean nat entry list */
unsigned int nat_cnt[MAX_NAT_STATE]; /* the # of cached nat entries */
#define IS_DUMMY_WRITTEN_PAGE(page) \
(page_private(page) == DUMMY_WRITTEN_PAGE)
-#ifdef CONFIG_F2FS_IO_TRACE
-#define IS_IO_TRACED_PAGE(page) \
- (page_private(page) > 0 && \
- page_private(page) < (unsigned long)PID_MAX_LIMIT)
-#else
-#define IS_IO_TRACED_PAGE(page) (0)
-#endif
-
/* For compression */
enum compress_algorithm_type {
COMPRESS_LZO,
#ifdef CONFIG_F2FS_FS_COMPRESSION
struct kmem_cache *page_array_slab; /* page array entry */
unsigned int page_array_slab_size; /* default page array slab size */
+
+ /* For runtime compression statistics */
+ u64 compr_written_block;
+ u64 compr_saved_block;
+ u32 compr_new_inode;
#endif
};
static inline void *__bitmap_ptr(struct f2fs_sb_info *sbi, int flag)
{
struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
+ void *tmp_ptr = &ckpt->sit_nat_version_bitmap;
int offset;
if (is_set_ckpt_flags(sbi, CP_LARGE_NAT_BITMAP_FLAG)) {
* if large_nat_bitmap feature is enabled, leave checksum
* protection for all nat/sit bitmaps.
*/
- return &ckpt->sit_nat_version_bitmap + offset + sizeof(__le32);
+ return tmp_ptr + offset + sizeof(__le32);
}
if (__cp_payload(sbi) > 0) {
} else {
offset = (flag == NAT_BITMAP) ?
le32_to_cpu(ckpt->sit_ver_bitmap_bytesize) : 0;
- return &ckpt->sit_nat_version_bitmap + offset;
+ return tmp_ptr + offset;
}
}
/*
* node.c
*/
-struct dnode_of_data;
struct node_info;
int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid);
int f2fs_disable_cp_again(struct f2fs_sb_info *sbi, block_t unusable);
void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi);
int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra);
+bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno);
void f2fs_init_inmem_curseg(struct f2fs_sb_info *sbi);
void f2fs_save_inmem_curseg(struct f2fs_sb_info *sbi);
void f2fs_restore_inmem_curseg(struct f2fs_sb_info *sbi);
unsigned int *newseg, bool new_sec, int dir);
void f2fs_allocate_segment_for_resize(struct f2fs_sb_info *sbi, int type,
unsigned int start, unsigned int end);
-void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type);
+void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force);
void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi);
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range);
bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
int f2fs_start_gc_thread(struct f2fs_sb_info *sbi);
void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi);
block_t f2fs_start_bidx_of_node(unsigned int node_ofs, struct inode *inode);
-int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background,
+int f2fs_gc(struct f2fs_sb_info *sbi, bool sync, bool background, bool force,
unsigned int segno);
void f2fs_build_gc_manager(struct f2fs_sb_info *sbi);
int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count);
void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi);
int __init f2fs_init_compress_cache(void);
void f2fs_destroy_compress_cache(void);
+#define inc_compr_inode_stat(inode) \
+ do { \
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode); \
+ sbi->compr_new_inode++; \
+ } while (0)
+#define add_compr_block_stat(inode, blocks) \
+ do { \
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode); \
+ int diff = F2FS_I(inode)->i_cluster_size - blocks; \
+ sbi->compr_written_block += blocks; \
+ sbi->compr_saved_block += diff; \
+ } while (0)
#else
static inline bool f2fs_is_compressed_page(struct page *page) { return false; }
static inline bool f2fs_is_compress_backend_ready(struct inode *inode)
static inline void f2fs_destroy_page_array_cache(struct f2fs_sb_info *sbi) { }
static inline int __init f2fs_init_compress_cache(void) { return 0; }
static inline void f2fs_destroy_compress_cache(void) { }
+#define inc_compr_inode_stat(inode) do { } while (0)
#endif
static inline void set_compress_context(struct inode *inode)
F2FS_I(inode)->i_flags |= F2FS_COMPR_FL;
set_inode_flag(inode, FI_COMPRESSED_FILE);
stat_inc_compr_inode(inode);
+ inc_compr_inode_stat(inode);
f2fs_mark_inode_dirty_sync(inode, true);
}
if (F2FS_IO_ALIGNED(sbi))
return true;
}
- if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED) &&
- !IS_SWAPFILE(inode))
+ if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED))
return true;
return false;
struct f2fs_map_blocks map = { .m_next_pgofs = NULL,
.m_next_extent = NULL, .m_seg_type = NO_CHECK_TYPE,
.m_may_create = true };
- pgoff_t pg_end;
+ pgoff_t pg_start, pg_end;
loff_t new_size = i_size_read(inode);
loff_t off_end;
+ block_t expanded = 0;
int err;
err = inode_newsize_ok(inode, (len + offset));
f2fs_balance_fs(sbi, true);
+ pg_start = ((unsigned long long)offset) >> PAGE_SHIFT;
pg_end = ((unsigned long long)offset + len) >> PAGE_SHIFT;
off_end = (offset + len) & (PAGE_SIZE - 1);
- map.m_lblk = ((unsigned long long)offset) >> PAGE_SHIFT;
- map.m_len = pg_end - map.m_lblk;
+ map.m_lblk = pg_start;
+ map.m_len = pg_end - pg_start;
if (off_end)
map.m_len++;
return 0;
if (f2fs_is_pinned_file(inode)) {
- block_t len = (map.m_len >> sbi->log_blocks_per_seg) <<
- sbi->log_blocks_per_seg;
- block_t done = 0;
+ block_t sec_blks = BLKS_PER_SEC(sbi);
+ block_t sec_len = roundup(map.m_len, sec_blks);
- if (map.m_len % sbi->blocks_per_seg)
- len += sbi->blocks_per_seg;
-
- map.m_len = sbi->blocks_per_seg;
+ map.m_len = sec_blks;
next_alloc:
if (has_not_enough_free_secs(sbi, 0,
GET_SEC_FROM_SEG(sbi, overprovision_segments(sbi)))) {
down_write(&sbi->gc_lock);
- err = f2fs_gc(sbi, true, false, NULL_SEGNO);
+ err = f2fs_gc(sbi, true, false, false, NULL_SEGNO);
if (err && err != -ENODATA && err != -EAGAIN)
goto out_err;
}
down_write(&sbi->pin_sem);
f2fs_lock_op(sbi);
- f2fs_allocate_new_segment(sbi, CURSEG_COLD_DATA_PINNED);
+ f2fs_allocate_new_section(sbi, CURSEG_COLD_DATA_PINNED, false);
f2fs_unlock_op(sbi);
map.m_seg_type = CURSEG_COLD_DATA_PINNED;
up_write(&sbi->pin_sem);
- done += map.m_len;
- len -= map.m_len;
+ expanded += map.m_len;
+ sec_len -= map.m_len;
map.m_lblk += map.m_len;
- if (!err && len)
+ if (!err && sec_len)
goto next_alloc;
- map.m_len = done;
+ map.m_len = expanded;
} else {
err = f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
+ expanded = map.m_len;
}
out_err:
if (err) {
pgoff_t last_off;
- if (!map.m_len)
+ if (!expanded)
return err;
- last_off = map.m_lblk + map.m_len - 1;
+ last_off = pg_start + expanded - 1;
/* update new size to the failed position */
new_size = (last_off == pg_end) ? offset + len :
down_write(&sbi->gc_lock);
}
- ret = f2fs_gc(sbi, sync, true, NULL_SEGNO);
+ ret = f2fs_gc(sbi, sync, true, false, NULL_SEGNO);
out:
mnt_drop_write_file(filp);
return ret;
down_write(&sbi->gc_lock);
}
- ret = f2fs_gc(sbi, range->sync, true, GET_SEGNO(sbi, range->start));
+ ret = f2fs_gc(sbi, range->sync, true, false,
+ GET_SEGNO(sbi, range->start));
if (ret) {
if (ret == -EBUSY)
ret = -EAGAIN;
{
struct inode *inode = file_inode(filp);
struct f2fs_map_blocks map = { .m_next_extent = NULL,
- .m_seg_type = NO_CHECK_TYPE ,
+ .m_seg_type = NO_CHECK_TYPE,
.m_may_create = false };
struct extent_info ei = {0, 0, 0};
pgoff_t pg_start, pg_end, next_pgofs;
sm->last_victim[GC_CB] = end_segno + 1;
sm->last_victim[GC_GREEDY] = end_segno + 1;
sm->last_victim[ALLOC_NEXT] = end_segno + 1;
- ret = f2fs_gc(sbi, true, true, start_segno);
+ ret = f2fs_gc(sbi, true, true, true, start_segno);
if (ret == -EAGAIN)
ret = 0;
else if (ret < 0)
clear_inode_flag(inode, FI_NO_PREALLOC);
/* if we couldn't write data, we should deallocate blocks. */
- if (preallocated && i_size_read(inode) < target_size)
+ if (preallocated && i_size_read(inode) < target_size) {
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ down_write(&F2FS_I(inode)->i_mmap_sem);
f2fs_truncate(inode);
+ up_write(&F2FS_I(inode)->i_mmap_sem);
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ }
if (ret > 0)
f2fs_update_iostat(F2FS_I_SB(inode), APP_WRITE_IO, ret);
struct f2fs_sb_info *sbi = data;
struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
+ wait_queue_head_t *fggc_wq = &sbi->gc_thread->fggc_wq;
unsigned int wait_ms;
wait_ms = gc_th->min_sleep_time;
set_freezable();
do {
- bool sync_mode;
+ bool sync_mode, foreground = false;
wait_event_interruptible_timeout(*wq,
kthread_should_stop() || freezing(current) ||
+ waitqueue_active(fggc_wq) ||
gc_th->gc_wake,
msecs_to_jiffies(wait_ms));
+ if (test_opt(sbi, GC_MERGE) && waitqueue_active(fggc_wq))
+ foreground = true;
+
/* give it a try one time */
if (gc_th->gc_wake)
gc_th->gc_wake = 0;
goto do_gc;
}
- if (!down_write_trylock(&sbi->gc_lock)) {
+ if (foreground) {
+ down_write(&sbi->gc_lock);
+ goto do_gc;
+ } else if (!down_write_trylock(&sbi->gc_lock)) {
stat_other_skip_bggc_count(sbi);
goto next;
}
else
increase_sleep_time(gc_th, &wait_ms);
do_gc:
- stat_inc_bggc_count(sbi->stat_info);
+ if (!foreground)
+ stat_inc_bggc_count(sbi->stat_info);
sync_mode = F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_SYNC;
+ /* foreground GC was been triggered via f2fs_balance_fs() */
+ if (foreground)
+ sync_mode = false;
+
/* if return value is not zero, no victim was selected */
- if (f2fs_gc(sbi, sync_mode, true, NULL_SEGNO))
+ if (f2fs_gc(sbi, sync_mode, !foreground, false, NULL_SEGNO))
wait_ms = gc_th->no_gc_sleep_time;
+ if (foreground)
+ wake_up_all(&gc_th->fggc_wq);
+
trace_f2fs_background_gc(sbi->sb, wait_ms,
prefree_segments(sbi), free_segments(sbi));
gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME;
gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME;
- gc_th->gc_wake= 0;
+ gc_th->gc_wake = 0;
sbi->gc_thread = gc_th;
init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
+ init_waitqueue_head(&sbi->gc_thread->fggc_wq);
sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
if (IS_ERR(gc_th->f2fs_gc_task)) {
void f2fs_stop_gc_thread(struct f2fs_sb_info *sbi)
{
struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
+
if (!gc_th)
return;
kthread_stop(gc_th->f2fs_gc_task);
+ wake_up_all(&gc_th->fggc_wq);
kfree(gc_th);
sbi->gc_thread = NULL;
}
if (p->gc_mode == GC_AT &&
get_valid_blocks(sbi, segno, true) == 0)
return;
-
- if (p->alloc_mode == AT_SSR &&
- get_seg_entry(sbi, segno)->ckpt_valid_blocks == 0)
- return;
}
for (i = 0; i < sbi->segs_per_sec; i++)
if (sec_usage_check(sbi, secno))
goto next;
+
/* Don't touch checkpointed data */
- if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
- get_ckpt_valid_blocks(sbi, segno) &&
- p.alloc_mode == LFS))
- goto next;
+ if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
+ if (p.alloc_mode == LFS) {
+ /*
+ * LFS is set to find source section during GC.
+ * The victim should have no checkpointed data.
+ */
+ if (get_ckpt_valid_blocks(sbi, segno, true))
+ goto next;
+ } else {
+ /*
+ * SSR | AT_SSR are set to find target segment
+ * for writes which can be full by checkpointed
+ * and newly written blocks.
+ */
+ if (!f2fs_segment_has_free_slot(sbi, segno))
+ goto next;
+ }
+ }
+
if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
goto next;
static void put_gc_inode(struct gc_inode_list *gc_list)
{
struct inode_entry *ie, *next_ie;
+
list_for_each_entry_safe(ie, next_ie, &gc_list->ilist, list) {
radix_tree_delete(&gc_list->iroot, ie->inode->i_ino);
iput(ie->inode);
bidx = node_ofs - 1;
} else if (node_ofs <= indirect_blks) {
int dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1);
+
bidx = node_ofs - 2 - dec;
} else {
int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
+
bidx = node_ofs - 5 - dec;
}
return bidx * ADDRS_PER_BLOCK(inode) + ADDRS_PER_INODE(inode);
block_t newaddr;
int err = 0;
bool lfs_mode = f2fs_lfs_mode(fio.sbi);
- int type = fio.sbi->am.atgc_enabled ?
+ int type = fio.sbi->am.atgc_enabled && (gc_type == BG_GC) &&
+ (fio.sbi->gc_mode != GC_URGENT_HIGH) ?
CURSEG_ALL_DATA_ATGC : CURSEG_COLD_DATA;
/* do not read out */
* the victim data block is ignored.
*/
static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
- struct gc_inode_list *gc_list, unsigned int segno, int gc_type)
+ struct gc_inode_list *gc_list, unsigned int segno, int gc_type,
+ bool force_migrate)
{
struct super_block *sb = sbi->sb;
struct f2fs_summary *entry;
* race condition along with SSR block allocation.
*/
if ((gc_type == BG_GC && has_not_enough_free_secs(sbi, 0, 0)) ||
- get_valid_blocks(sbi, segno, true) ==
- BLKS_PER_SEC(sbi))
+ (!force_migrate && get_valid_blocks(sbi, segno, true) ==
+ BLKS_PER_SEC(sbi)))
return submitted;
if (check_valid_map(sbi, segno, off) == 0)
static int do_garbage_collect(struct f2fs_sb_info *sbi,
unsigned int start_segno,
- struct gc_inode_list *gc_list, int gc_type)
+ struct gc_inode_list *gc_list, int gc_type,
+ bool force_migrate)
{
struct page *sum_page;
struct f2fs_summary_block *sum;
gc_type);
else
submitted += gc_data_segment(sbi, sum->entries, gc_list,
- segno, gc_type);
+ segno, gc_type,
+ force_migrate);
stat_inc_seg_count(sbi, type, gc_type);
migrated++;
}
int f2fs_gc(struct f2fs_sb_info *sbi, bool sync,
- bool background, unsigned int segno)
+ bool background, bool force, unsigned int segno)
{
int gc_type = sync ? FG_GC : BG_GC;
int sec_freed = 0, seg_freed = 0, total_freed = 0;
if (ret)
goto stop;
- seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type);
+ seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type, force);
if (gc_type == FG_GC &&
seg_freed == f2fs_usable_segs_in_sec(sbi, segno))
sec_freed++;
.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
};
- do_garbage_collect(sbi, segno, &gc_list, FG_GC);
+ do_garbage_collect(sbi, segno, &gc_list, FG_GC, true);
put_gc_inode(&gc_list);
if (!gc_only && get_valid_blocks(sbi, segno, true)) {
/* stop CP to protect MAIN_SEC in free_segment_range */
f2fs_lock_op(sbi);
+
+ spin_lock(&sbi->stat_lock);
+ if (shrunk_blocks + valid_user_blocks(sbi) +
+ sbi->current_reserved_blocks + sbi->unusable_block_count +
+ F2FS_OPTION(sbi).root_reserved_blocks > sbi->user_block_count)
+ err = -ENOSPC;
+ spin_unlock(&sbi->stat_lock);
+
+ if (err)
+ goto out_unlock;
+
err = free_segment_range(sbi, secs, true);
+
+out_unlock:
f2fs_unlock_op(sbi);
up_write(&sbi->gc_lock);
if (err)
/* for changing gc mode */
unsigned int gc_wake;
+
+ /* for GC_MERGE mount option */
+ wait_queue_head_t fggc_wq; /*
+ * caller of f2fs_balance_fs()
+ * will wait on this wait queue.
+ */
};
struct gc_inode_list {
f2fs_put_page(page, 1);
- f2fs_balance_fs(sbi, dn.node_changed);
+ if (!err)
+ f2fs_balance_fs(sbi, dn.node_changed);
return err;
}
node_page = f2fs_get_node_page(sbi, inode->i_ino);
if (IS_ERR(node_page)) {
int err = PTR_ERR(node_page);
+
if (err == -ENOMEM) {
cond_resched();
goto retry;
/*
* We need to balance fs here to prevent from producing dirty node pages
- * during the urgent cleaning time when runing out of free sections.
+ * during the urgent cleaning time when running out of free sections.
*/
f2fs_update_inode_page(inode);
if (wbc && wbc->nr_to_write)
{
struct page *page;
unsigned long ino = f2fs_inode_by_name(d_inode(child), &dotdot_name, &page);
+
if (!ino) {
if (IS_ERR(page))
return ERR_CAST(page);
struct delayed_call *done)
{
const char *link = page_get_link(dentry, inode, done);
+
if (!IS_ERR(link) && !*link) {
/* this is broken symlink case */
do_delayed_call(done);
static int f2fs_rmdir(struct inode *dir, struct dentry *dentry)
{
struct inode *inode = d_inode(dentry);
+
if (f2fs_empty_dir(inode))
return f2fs_unlink(dir, dentry);
return -ENOTEMPTY;
bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
+ struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
struct sysinfo val;
unsigned long avail_ram;
unsigned long mem_size = 0;
bool res = false;
+ if (!nm_i)
+ return true;
+
si_meminfo(&val);
/* only uses low memory */
/* it allows 20% / total_ram for inmemory pages */
mem_size = get_pages(sbi, F2FS_INMEM_PAGES);
res = mem_size < (val.totalram / 5);
+ } else if (type == DISCARD_CACHE) {
+ mem_size = (atomic_read(&dcc->discard_cmd_cnt) *
+ sizeof(struct discard_cmd)) >> PAGE_SHIFT;
+ res = mem_size < (avail_ram * nm_i->ram_thresh / 100);
} else {
if (!sbi->sb->s_bdi->wb.dirty_exceeded)
return true;
/* increment version no as node is removed */
if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
unsigned char version = nat_get_version(e);
+
nat_set_version(e, inc_node_version(version));
}
goto out_err;
}
page_hit:
- if(unlikely(nid != nid_of_node(page))) {
+ if (unlikely(nid != nid_of_node(page))) {
f2fs_warn(sbi, "inconsistent node block, nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
nid, nid_of_node(page), ino_of_node(page),
ofs_of_node(page), cpver_of_node(page),
out:
if (nwritten)
f2fs_submit_merged_write_cond(sbi, NULL, NULL, ino, NODE);
- return ret ? -EIO: 0;
+ return ret ? -EIO : 0;
}
static int f2fs_match_ino(struct inode *inode, unsigned long ino, void *data)
struct free_nid *i)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
-
int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i);
+
if (err)
return err;
struct f2fs_nat_entry raw_ne;
nid_t nid = le32_to_cpu(nid_in_journal(journal, i));
+ if (f2fs_check_nid_range(sbi, nid))
+ continue;
+
raw_ne = nat_in_journal(journal, i);
ne = __lookup_nat_cache(nm_i, nid);
while ((found = __gang_lookup_nat_set(nm_i,
set_idx, SETVEC_SIZE, setvec))) {
unsigned idx;
+
set_idx = setvec[found - 1]->set + 1;
for (idx = 0; idx < found; idx++)
__adjust_nat_entry_set(setvec[idx], &sets,
INO_ENTRIES, /* indicates inode entries */
EXTENT_CACHE, /* indicates extent cache */
INMEM_PAGES, /* indicates inmemory pages */
+ DISCARD_CACHE, /* indicates memory of cached discard cmds */
BASE_CHECK, /* check kernel status */
};
/* Get the previous summary */
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
struct curseg_info *curseg = CURSEG_I(sbi, i);
+
if (curseg->segno == segno) {
sum = curseg->sum_blk->entries[blkoff];
goto got_it;
#endif
sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */
- return ret ? ret: err;
+ return ret ? ret : err;
}
{
struct inmem_pages *new;
- f2fs_set_page_private(page, ATOMIC_WRITTEN_PAGE);
+ if (PagePrivate(page))
+ set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
+ else
+ f2fs_set_page_private(page, ATOMIC_WRITTEN_PAGE);
new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
- while (!list_empty(&fi->inmem_pages)) {
+ do {
mutex_lock(&fi->inmem_lock);
+ if (list_empty(&fi->inmem_pages)) {
+ fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
+
+ spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
+ if (!list_empty(&fi->inmem_ilist))
+ list_del_init(&fi->inmem_ilist);
+ if (f2fs_is_atomic_file(inode)) {
+ clear_inode_flag(inode, FI_ATOMIC_FILE);
+ sbi->atomic_files--;
+ }
+ spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+
+ mutex_unlock(&fi->inmem_lock);
+ break;
+ }
__revoke_inmem_pages(inode, &fi->inmem_pages,
true, false, true);
mutex_unlock(&fi->inmem_lock);
- }
-
- fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
-
- spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
- if (!list_empty(&fi->inmem_ilist))
- list_del_init(&fi->inmem_ilist);
- if (f2fs_is_atomic_file(inode)) {
- clear_inode_flag(inode, FI_ATOMIC_FILE);
- sbi->atomic_files--;
- }
- spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
+ } while (1);
}
void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
* dir/node pages without enough free segments.
*/
if (has_not_enough_free_secs(sbi, 0, 0)) {
- down_write(&sbi->gc_lock);
- f2fs_gc(sbi, false, false, NULL_SEGNO);
+ if (test_opt(sbi, GC_MERGE) && sbi->gc_thread &&
+ sbi->gc_thread->f2fs_gc_task) {
+ DEFINE_WAIT(wait);
+
+ prepare_to_wait(&sbi->gc_thread->fggc_wq, &wait,
+ TASK_UNINTERRUPTIBLE);
+ wake_up(&sbi->gc_thread->gc_wait_queue_head);
+ io_schedule();
+ finish_wait(&sbi->gc_thread->fggc_wq, &wait);
+ } else {
+ down_write(&sbi->gc_lock);
+ f2fs_gc(sbi, false, false, false, NULL_SEGNO);
+ }
}
}
llist_add(&cmd.llnode, &fcc->issue_list);
- /* update issue_list before we wake up issue_flush thread */
+ /*
+ * update issue_list before we wake up issue_flush thread, this
+ * smp_mb() pairs with another barrier in ___wait_event(), see
+ * more details in comments of waitqueue_active().
+ */
smp_mb();
if (waitqueue_active(&fcc->flush_wait_queue))
mutex_lock(&dirty_i->seglist_lock);
valid_blocks = get_valid_blocks(sbi, segno, false);
- ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
+ ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno, false);
if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
ckpt_valid_blocks == usable_blocks)) {
for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
if (get_valid_blocks(sbi, segno, false))
continue;
- if (get_ckpt_valid_blocks(sbi, segno))
+ if (get_ckpt_valid_blocks(sbi, segno, false))
continue;
mutex_unlock(&dirty_i->seglist_lock);
return segno;
struct discard_policy *dpolicy,
int discard_type, unsigned int granularity)
{
+ struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
+
/* common policy */
dpolicy->type = discard_type;
dpolicy->sync = true;
dpolicy->ordered = true;
if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
dpolicy->granularity = 1;
- dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
+ if (atomic_read(&dcc->discard_cmd_cnt))
+ dpolicy->max_interval =
+ DEF_MIN_DISCARD_ISSUE_TIME;
}
} else if (discard_type == DPOLICY_FORCE) {
dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
set_freezable();
do {
- __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
- dcc->discard_granularity);
+ if (sbi->gc_mode == GC_URGENT_HIGH ||
+ !f2fs_available_free_memory(sbi, DISCARD_CACHE))
+ __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
+ else
+ __init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
+ dcc->discard_granularity);
+
+ if (!atomic_read(&dcc->discard_cmd_cnt))
+ wait_ms = dpolicy.max_interval;
wait_event_interruptible_timeout(*q,
kthread_should_stop() || freezing(current) ||
wait_ms = dpolicy.max_interval;
continue;
}
-
- if (sbi->gc_mode == GC_URGENT_HIGH)
- __init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
+ if (!atomic_read(&dcc->discard_cmd_cnt))
+ continue;
sb_start_intwrite(sbi->sb);
if (issued > 0) {
__wait_all_discard_cmd(sbi, &dpolicy);
wait_ms = dpolicy.min_interval;
- } else if (issued == -1){
+ } else if (issued == -1) {
wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
if (!wait_ms)
wait_ms = dpolicy.mid_interval;
unsigned int segno, int modified)
{
struct seg_entry *se = get_seg_entry(sbi, segno);
+
se->type = type;
if (modified)
__mark_sit_entry_dirty(sbi, segno);
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
void *addr = curseg->sum_blk;
+
addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
memcpy(addr, sum, sizeof(struct f2fs_summary));
}
curseg->alloc_type = LFS;
}
-static void __next_free_blkoff(struct f2fs_sb_info *sbi,
- struct curseg_info *seg, block_t start)
+static int __next_free_blkoff(struct f2fs_sb_info *sbi,
+ int segno, block_t start)
{
- struct seg_entry *se = get_seg_entry(sbi, seg->segno);
+ struct seg_entry *se = get_seg_entry(sbi, segno);
int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
unsigned long *target_map = SIT_I(sbi)->tmp_map;
unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
- int i, pos;
+ int i;
for (i = 0; i < entries; i++)
target_map[i] = ckpt_map[i] | cur_map[i];
- pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
-
- seg->next_blkoff = pos;
+ return __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);
}
/*
struct curseg_info *seg)
{
if (seg->alloc_type == SSR)
- __next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
+ seg->next_blkoff =
+ __next_free_blkoff(sbi, seg->segno,
+ seg->next_blkoff + 1);
else
seg->next_blkoff++;
}
+bool f2fs_segment_has_free_slot(struct f2fs_sb_info *sbi, int segno)
+{
+ return __next_free_blkoff(sbi, segno, 0) < sbi->blocks_per_seg;
+}
+
/*
* This function always allocates a used segment(from dirty seglist) by SSR
* manner, so it should recover the existing segment information of valid blocks
reset_curseg(sbi, type, 1);
curseg->alloc_type = SSR;
- __next_free_blkoff(sbi, curseg, 0);
+ curseg->next_blkoff = __next_free_blkoff(sbi, curseg->segno, 0);
sum_page = f2fs_get_sum_page(sbi, new_segno);
if (IS_ERR(sum_page)) {
up_read(&SM_I(sbi)->curseg_lock);
}
-static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type)
+static void __allocate_new_segment(struct f2fs_sb_info *sbi, int type,
+ bool new_sec, bool force)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
unsigned int old_segno;
if (!curseg->inited)
goto alloc;
- if (!curseg->next_blkoff &&
- !get_valid_blocks(sbi, curseg->segno, false) &&
- !get_ckpt_valid_blocks(sbi, curseg->segno))
- return;
+ if (force || curseg->next_blkoff ||
+ get_valid_blocks(sbi, curseg->segno, new_sec))
+ goto alloc;
+ if (!get_ckpt_valid_blocks(sbi, curseg->segno, new_sec))
+ return;
alloc:
old_segno = curseg->segno;
SIT_I(sbi)->s_ops->allocate_segment(sbi, type, true);
locate_dirty_segment(sbi, old_segno);
}
-void f2fs_allocate_new_segment(struct f2fs_sb_info *sbi, int type)
+static void __allocate_new_section(struct f2fs_sb_info *sbi,
+ int type, bool force)
+{
+ __allocate_new_segment(sbi, type, true, force);
+}
+
+void f2fs_allocate_new_section(struct f2fs_sb_info *sbi, int type, bool force)
{
+ down_read(&SM_I(sbi)->curseg_lock);
down_write(&SIT_I(sbi)->sentry_lock);
- __allocate_new_segment(sbi, type);
+ __allocate_new_section(sbi, type, force);
up_write(&SIT_I(sbi)->sentry_lock);
+ up_read(&SM_I(sbi)->curseg_lock);
}
void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
{
int i;
+ down_read(&SM_I(sbi)->curseg_lock);
down_write(&SIT_I(sbi)->sentry_lock);
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++)
- __allocate_new_segment(sbi, i);
+ __allocate_new_segment(sbi, i, false, false);
up_write(&SIT_I(sbi)->sentry_lock);
+ up_read(&SM_I(sbi)->curseg_lock);
}
static const struct segment_allocation default_salloc_ops = {
struct inode *inode = fio->page->mapping->host;
if (is_cold_data(fio->page)) {
- if (fio->sbi->am.atgc_enabled)
+ if (fio->sbi->am.atgc_enabled &&
+ (fio->io_type == FS_DATA_IO) &&
+ (fio->sbi->gc_mode != GC_URGENT_HIGH))
return CURSEG_ALL_DATA_ATGC;
else
return CURSEG_COLD_DATA;
f2fs_inode_chksum_set(sbi, page);
}
- if (F2FS_IO_ALIGNED(sbi))
- fio->retry = false;
-
if (fio) {
struct f2fs_bio_info *io;
+ if (F2FS_IO_ALIGNED(sbi))
+ fio->retry = false;
+
INIT_LIST_HEAD(&fio->list);
fio->in_list = true;
io = sbi->write_io[fio->type] + fio->temp;
set_sbi_flag(sbi, SBI_NEED_FSCK);
f2fs_warn(sbi, "%s: incorrect segment(%u) type, run fsck to fix.",
__func__, segno);
- return -EFSCORRUPTED;
+ err = -EFSCORRUPTED;
+ goto drop_bio;
+ }
+
+ if (is_sbi_flag_set(sbi, SBI_NEED_FSCK) || f2fs_cp_error(sbi)) {
+ err = -EIO;
+ goto drop_bio;
}
stat_inc_inplace_blocks(fio->sbi);
f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
}
+ return err;
+drop_bio:
+ if (fio->bio) {
+ struct bio *bio = *(fio->bio);
+
+ bio->bi_status = BLK_STS_IOERR;
+ bio_endio(bio);
+ fio->bio = NULL;
+ }
return err;
}
struct seg_entry *se;
int type;
unsigned short old_blkoff;
+ unsigned char old_alloc_type;
segno = GET_SEGNO(sbi, new_blkaddr);
se = get_seg_entry(sbi, segno);
old_cursegno = curseg->segno;
old_blkoff = curseg->next_blkoff;
+ old_alloc_type = curseg->alloc_type;
/* change the current segment */
if (segno != curseg->segno) {
change_curseg(sbi, type, true);
}
curseg->next_blkoff = old_blkoff;
+ curseg->alloc_type = old_alloc_type;
}
up_write(&sit_i->sentry_lock);
for (j = 0; j < blk_off; j++) {
struct f2fs_summary *s;
+
s = (struct f2fs_summary *)(kaddr + offset);
seg_i->sum_blk->entries[j] = *s;
offset += SUMMARY_SIZE;
if (__exist_node_summaries(sbi)) {
struct f2fs_summary *ns = &sum->entries[0];
int i;
+
for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
ns->version = 0;
ns->ofs_in_node = 0;
/* Step 3: write summary entries */
for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
unsigned short blkoff;
+
seg_i = CURSEG_I(sbi, i);
if (sbi->ckpt->alloc_type[i] == SSR)
blkoff = sbi->blocks_per_seg;
block_t blkaddr, int type)
{
int i, end;
+
if (IS_DATASEG(type))
end = type + NR_CURSEG_DATA_TYPE;
else
/* set use the current segments */
for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
struct curseg_info *curseg_t = CURSEG_I(sbi, type);
+
__set_test_and_inuse(sbi, curseg_t->segno);
}
}
}
static int report_one_zone_cb(struct blk_zone *zone, unsigned int idx,
- void *data) {
+ void *data)
+{
memcpy(data, zone, sizeof(struct blk_zone));
return 0;
}
f2fs_notice(sbi, "Assign new section to curseg[%d]: "
"curseg[0x%x,0x%x]", type, cs->segno, cs->next_blkoff);
- allocate_segment_by_default(sbi, type, true);
+
+ f2fs_allocate_new_section(sbi, type, true);
/* check consistency of the zone curseg pointed to */
if (check_zone_write_pointer(sbi, zbd, &zone))
};
static int check_zone_write_pointer_cb(struct blk_zone *zone, unsigned int idx,
- void *data) {
+ void *data)
+{
struct check_zone_write_pointer_args *args;
+
args = (struct check_zone_write_pointer_args *)data;
return check_zone_write_pointer(args->sbi, args->fdev, zone);
static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
+
kvfree(dirty_i->victim_secmap);
}
static void destroy_free_segmap(struct f2fs_sb_info *sbi)
{
struct free_segmap_info *free_i = SM_I(sbi)->free_info;
+
if (!free_i)
return;
SM_I(sbi)->free_info = NULL;
/*
* BG_GC means the background cleaning job.
* FG_GC means the on-demand cleaning job.
- * FORCE_FG_GC means on-demand cleaning job in background.
*/
enum {
BG_GC = 0,
FG_GC,
- FORCE_FG_GC,
};
/* for a function parameter to select a victim segment */
}
static inline unsigned int get_ckpt_valid_blocks(struct f2fs_sb_info *sbi,
- unsigned int segno)
+ unsigned int segno, bool use_section)
{
+ if (use_section && __is_large_section(sbi)) {
+ unsigned int start_segno = START_SEGNO(segno);
+ unsigned int blocks = 0;
+ int i;
+
+ for (i = 0; i < sbi->segs_per_sec; i++, start_segno++) {
+ struct seg_entry *se = get_seg_entry(sbi, start_segno);
+
+ blocks += se->ckpt_valid_blocks;
+ }
+ return blocks;
+ }
return get_seg_entry(sbi, segno)->ckpt_valid_blocks;
}
Opt_compress_chksum,
Opt_compress_mode,
Opt_atgc,
+ Opt_gc_merge,
+ Opt_nogc_merge,
Opt_err,
};
{Opt_compress_chksum, "compress_chksum"},
{Opt_compress_mode, "compress_mode=%s"},
{Opt_atgc, "atgc"},
+ {Opt_gc_merge, "gc_merge"},
+ {Opt_nogc_merge, "nogc_merge"},
{Opt_err, NULL},
};
while ((p = strsep(&options, ",")) != NULL) {
int token;
+
if (!*p)
continue;
/*
case Opt_atgc:
set_opt(sbi, ATGC);
break;
+ case Opt_gc_merge:
+ set_opt(sbi, GC_MERGE);
+ break;
+ case Opt_nogc_merge:
+ clear_opt(sbi, GC_MERGE);
+ break;
default:
f2fs_err(sbi, "Unrecognized mount option \"%s\" or missing value",
p);
#endif
}
+#ifdef CONFIG_F2FS_FS_COMPRESSION
static inline void f2fs_show_compress_options(struct seq_file *seq,
struct super_block *sb)
{
else if (F2FS_OPTION(sbi).compress_mode == COMPR_MODE_USER)
seq_printf(seq, ",compress_mode=%s", "user");
}
+#endif
static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
{
else if (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF)
seq_printf(seq, ",background_gc=%s", "off");
+ if (test_opt(sbi, GC_MERGE))
+ seq_puts(seq, ",gc_merge");
+
if (test_opt(sbi, DISABLE_ROLL_FORWARD))
seq_puts(seq, ",disable_roll_forward");
if (test_opt(sbi, NORECOVERY))
set_opt(sbi, EXTENT_CACHE);
set_opt(sbi, NOHEAP);
clear_opt(sbi, DISABLE_CHECKPOINT);
+ set_opt(sbi, MERGE_CHECKPOINT);
F2FS_OPTION(sbi).unusable_cap = 0;
sbi->sb->s_flags |= SB_LAZYTIME;
set_opt(sbi, FLUSH_MERGE);
while (!f2fs_time_over(sbi, DISABLE_TIME)) {
down_write(&sbi->gc_lock);
- err = f2fs_gc(sbi, true, false, NULL_SEGNO);
+ err = f2fs_gc(sbi, true, false, false, NULL_SEGNO);
if (err == -ENODATA) {
err = 0;
break;
ret = sync_filesystem(sbi->sb);
if (ret || err) {
- err = ret ? ret: err;
+ err = ret ? ret : err;
goto restore_flag;
}
struct f2fs_mount_info org_mount_opt;
unsigned long old_sb_flags;
int err;
- bool need_restart_gc = false;
- bool need_stop_gc = false;
+ bool need_restart_gc = false, need_stop_gc = false;
+ bool need_restart_ckpt = false, need_stop_ckpt = false;
+ bool need_restart_flush = false, need_stop_flush = false;
bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
bool disable_checkpoint = test_opt(sbi, DISABLE_CHECKPOINT);
bool no_io_align = !F2FS_IO_ALIGNED(sbi);
* option. Also sync the filesystem.
*/
if ((*flags & SB_RDONLY) ||
- F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF) {
+ (F2FS_OPTION(sbi).bggc_mode == BGGC_MODE_OFF &&
+ !test_opt(sbi, GC_MERGE))) {
if (sbi->gc_thread) {
f2fs_stop_gc_thread(sbi);
need_restart_gc = true;
clear_sbi_flag(sbi, SBI_IS_CLOSE);
}
- if (checkpoint_changed) {
- if (test_opt(sbi, DISABLE_CHECKPOINT)) {
- err = f2fs_disable_checkpoint(sbi);
- if (err)
- goto restore_gc;
- } else {
- f2fs_enable_checkpoint(sbi);
- }
- }
-
- if (!test_opt(sbi, DISABLE_CHECKPOINT) &&
- test_opt(sbi, MERGE_CHECKPOINT)) {
+ if ((*flags & SB_RDONLY) || test_opt(sbi, DISABLE_CHECKPOINT) ||
+ !test_opt(sbi, MERGE_CHECKPOINT)) {
+ f2fs_stop_ckpt_thread(sbi);
+ need_restart_ckpt = true;
+ } else {
err = f2fs_start_ckpt_thread(sbi);
if (err) {
f2fs_err(sbi,
err);
goto restore_gc;
}
- } else {
- f2fs_stop_ckpt_thread(sbi);
+ need_stop_ckpt = true;
}
/*
if ((*flags & SB_RDONLY) || !test_opt(sbi, FLUSH_MERGE)) {
clear_opt(sbi, FLUSH_MERGE);
f2fs_destroy_flush_cmd_control(sbi, false);
+ need_restart_flush = true;
} else {
err = f2fs_create_flush_cmd_control(sbi);
if (err)
- goto restore_gc;
+ goto restore_ckpt;
+ need_stop_flush = true;
}
+
+ if (checkpoint_changed) {
+ if (test_opt(sbi, DISABLE_CHECKPOINT)) {
+ err = f2fs_disable_checkpoint(sbi);
+ if (err)
+ goto restore_flush;
+ } else {
+ f2fs_enable_checkpoint(sbi);
+ }
+ }
+
skip:
#ifdef CONFIG_QUOTA
/* Release old quota file names */
adjust_unusable_cap_perc(sbi);
*flags = (*flags & ~SB_LAZYTIME) | (sb->s_flags & SB_LAZYTIME);
return 0;
+restore_flush:
+ if (need_restart_flush) {
+ if (f2fs_create_flush_cmd_control(sbi))
+ f2fs_warn(sbi, "background flush thread has stopped");
+ } else if (need_stop_flush) {
+ clear_opt(sbi, FLUSH_MERGE);
+ f2fs_destroy_flush_cmd_control(sbi, false);
+ }
+restore_ckpt:
+ if (need_restart_ckpt) {
+ if (f2fs_start_ckpt_thread(sbi))
+ f2fs_warn(sbi, "background ckpt thread has stopped");
+ } else if (need_stop_ckpt) {
+ f2fs_stop_ckpt_thread(sbi);
+ }
restore_gc:
if (need_restart_gc) {
if (f2fs_start_gc_thread(sbi))
sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS;
for (i = 0; i < NR_PAGE_TYPE; i++) {
- int n = (i == META) ? 1: NR_TEMP_TYPE;
+ int n = (i == META) ? 1 : NR_TEMP_TYPE;
int j;
sbi->write_io[i] =
/* setup checkpoint request control and start checkpoint issue thread */
f2fs_init_ckpt_req_control(sbi);
- if (!test_opt(sbi, DISABLE_CHECKPOINT) &&
+ if (!f2fs_readonly(sb) && !test_opt(sbi, DISABLE_CHECKPOINT) &&
test_opt(sbi, MERGE_CHECKPOINT)) {
err = f2fs_start_ckpt_thread(sbi);
if (err) {
* previous checkpoint was not done by clean system shutdown.
*/
if (f2fs_hw_is_readonly(sbi)) {
- if (!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG))
- f2fs_err(sbi, "Need to recover fsync data, but write access unavailable");
- else
- f2fs_info(sbi, "write access unavailable, skipping recovery");
+ if (!is_set_ckpt_flags(sbi, CP_UMOUNT_FLAG)) {
+ err = f2fs_recover_fsync_data(sbi, true);
+ if (err > 0) {
+ err = -EROFS;
+ f2fs_err(sbi, "Need to recover fsync data, but "
+ "write access unavailable, please try "
+ "mount w/ disable_roll_forward or norecovery");
+ }
+ if (err < 0)
+ goto free_meta;
+ }
+ f2fs_info(sbi, "write access unavailable, skipping recovery");
goto reset_checkpoint;
}
* If filesystem is not mounted as read-only then
* do start the gc_thread.
*/
- if (F2FS_OPTION(sbi).bggc_mode != BGGC_MODE_OFF && !f2fs_readonly(sb)) {
+ if ((F2FS_OPTION(sbi).bggc_mode != BGGC_MODE_OFF ||
+ test_opt(sbi, GC_MERGE)) && !f2fs_readonly(sb)) {
/* After POR, we can run background GC thread.*/
err = f2fs_start_gc_thread(sbi);
if (err)
#include <linux/seq_file.h>
#include <linux/unicode.h>
#include <linux/ioprio.h>
+#include <linux/sysfs.h>
#include "f2fs.h"
#include "segment.h"
(unsigned long long)(free_segments(sbi)));
}
+static ssize_t ovp_segments_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ return sprintf(buf, "%llu\n",
+ (unsigned long long)(overprovision_segments(sbi)));
+}
+
static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
return len;
}
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ if (!strcmp(a->attr.name, "compr_written_block"))
+ return sysfs_emit(buf, "%llu\n", sbi->compr_written_block);
+
+ if (!strcmp(a->attr.name, "compr_saved_block"))
+ return sysfs_emit(buf, "%llu\n", sbi->compr_saved_block);
+
+ if (!strcmp(a->attr.name, "compr_new_inode"))
+ return sysfs_emit(buf, "%u\n", sbi->compr_new_inode);
+#endif
+
ui = (unsigned int *)(ptr + a->offset);
return sprintf(buf, "%u\n", *ui);
return count;
}
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ if (!strcmp(a->attr.name, "compr_written_block") ||
+ !strcmp(a->attr.name, "compr_saved_block")) {
+ if (t != 0)
+ return -EINVAL;
+ sbi->compr_written_block = 0;
+ sbi->compr_saved_block = 0;
+ return count;
+ }
+
+ if (!strcmp(a->attr.name, "compr_new_inode")) {
+ if (t != 0)
+ return -EINVAL;
+ sbi->compr_new_inode = 0;
+ return count;
+ }
+#endif
+
*ui = (unsigned int)t;
return count;
F2FS_RW_ATTR(CPRC_INFO, ckpt_req_control, ckpt_thread_ioprio, ckpt_thread_ioprio);
F2FS_GENERAL_RO_ATTR(dirty_segments);
F2FS_GENERAL_RO_ATTR(free_segments);
+F2FS_GENERAL_RO_ATTR(ovp_segments);
F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
F2FS_GENERAL_RO_ATTR(features);
F2FS_GENERAL_RO_ATTR(current_reserved_blocks);
F2FS_FEATURE_RO_ATTR(casefold, FEAT_CASEFOLD);
#ifdef CONFIG_F2FS_FS_COMPRESSION
F2FS_FEATURE_RO_ATTR(compression, FEAT_COMPRESSION);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_written_block, compr_written_block);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_saved_block, compr_saved_block);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, compr_new_inode, compr_new_inode);
#endif
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
ATTR_LIST(ckpt_thread_ioprio),
ATTR_LIST(dirty_segments),
ATTR_LIST(free_segments),
+ ATTR_LIST(ovp_segments),
ATTR_LIST(unusable),
ATTR_LIST(lifetime_write_kbytes),
ATTR_LIST(features),
ATTR_LIST(moved_blocks_foreground),
ATTR_LIST(moved_blocks_background),
ATTR_LIST(avg_vblocks),
+#endif
+#ifdef CONFIG_F2FS_FS_COMPRESSION
+ ATTR_LIST(compr_written_block),
+ ATTR_LIST(compr_saved_block),
+ ATTR_LIST(compr_new_inode),
#endif
NULL,
};
size_t desc_size, u64 merkle_tree_size)
{
struct inode *inode = file_inode(filp);
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
u64 desc_pos = f2fs_verity_metadata_pos(inode) + merkle_tree_size;
struct fsverity_descriptor_location dloc = {
.version = cpu_to_le32(F2FS_VERIFY_VER),
.size = cpu_to_le32(desc_size),
.pos = cpu_to_le64(desc_pos),
};
- int err = 0;
+ int err = 0, err2 = 0;
- if (desc != NULL) {
- /* Succeeded; write the verity descriptor. */
- err = pagecache_write(inode, desc, desc_size, desc_pos);
+ /*
+ * If an error already occurred (which fs/verity/ signals by passing
+ * desc == NULL), then only clean-up is needed.
+ */
+ if (desc == NULL)
+ goto cleanup;
- /* Write all pages before clearing FI_VERITY_IN_PROGRESS. */
- if (!err)
- err = filemap_write_and_wait(inode->i_mapping);
- }
+ /* Append the verity descriptor. */
+ err = pagecache_write(inode, desc, desc_size, desc_pos);
+ if (err)
+ goto cleanup;
+
+ /*
+ * Write all pages (both data and verity metadata). Note that this must
+ * happen before clearing FI_VERITY_IN_PROGRESS; otherwise pages beyond
+ * i_size won't be written properly. For crash consistency, this also
+ * must happen before the verity inode flag gets persisted.
+ */
+ err = filemap_write_and_wait(inode->i_mapping);
+ if (err)
+ goto cleanup;
+
+ /* Set the verity xattr. */
+ err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_VERITY,
+ F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc),
+ NULL, XATTR_CREATE);
+ if (err)
+ goto cleanup;
- /* If we failed, truncate anything we wrote past i_size. */
- if (desc == NULL || err)
- f2fs_truncate(inode);
+ /* Finally, set the verity inode flag. */
+ file_set_verity(inode);
+ f2fs_set_inode_flags(inode);
+ f2fs_mark_inode_dirty_sync(inode, true);
clear_inode_flag(inode, FI_VERITY_IN_PROGRESS);
+ return 0;
- if (desc != NULL && !err) {
- err = f2fs_setxattr(inode, F2FS_XATTR_INDEX_VERITY,
- F2FS_XATTR_NAME_VERITY, &dloc, sizeof(dloc),
- NULL, XATTR_CREATE);
- if (!err) {
- file_set_verity(inode);
- f2fs_set_inode_flags(inode);
- f2fs_mark_inode_dirty_sync(inode, true);
- }
+cleanup:
+ /*
+ * Verity failed to be enabled, so clean up by truncating any verity
+ * metadata that was written beyond i_size (both from cache and from
+ * disk) and clearing FI_VERITY_IN_PROGRESS.
+ *
+ * Taking i_gc_rwsem[WRITE] is needed to stop f2fs garbage collection
+ * from re-instantiating cached pages we are truncating (since unlike
+ * normal file accesses, garbage collection isn't limited by i_size).
+ */
+ down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ truncate_inode_pages(inode->i_mapping, inode->i_size);
+ err2 = f2fs_truncate(inode);
+ if (err2) {
+ f2fs_err(sbi, "Truncating verity metadata failed (errno=%d)",
+ err2);
+ set_sbi_flag(sbi, SBI_NEED_FSCK);
}
- return err;
+ up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
+ clear_inode_flag(inode, FI_VERITY_IN_PROGRESS);
+ return err ?: err2;
}
static int f2fs_get_verity_descriptor(struct inode *inode, void *buf,
f2fs_wait_on_page_writeback(xpage, NODE, true, true);
} else {
struct dnode_of_data dn;
+
set_new_dnode(&dn, inode, NULL, NULL, new_nid);
xpage = f2fs_new_node_page(&dn, XATTR_NODE_OFFSET);
if (IS_ERR(xpage)) {
/*
* FAT data is organized as clusters, trim at the granulary of cluster.
*
- * fstrim_range is in byte, convert vaules to cluster index.
+ * fstrim_range is in byte, convert values to cluster index.
* Treat sectors before data region as all used, not to trim them.
*/
ent_start = max_t(u64, range->start>>sbi->cluster_bits, FAT_START_ENT);
if (mapping) {
truncate_inode_pages_final(mapping);
if (!gfs2_withdrawn(sdp))
- GLOCK_BUG_ON(gl, mapping->nrpages ||
- mapping->nrexceptional);
+ GLOCK_BUG_ON(gl, !mapping_empty(mapping));
}
trace_gfs2_glock_put(gl);
sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
u8 no_of_acls; /* number of ACL's (low 3 bits) */
u8 ix; /* code page index (of filename), see
struct code_page_data */
- u8 namelen, name[1]; /* file name */
+ u8 namelen; /* file name length */
+ u8 name[]; /* file name */
/* dnode_secno down; btree down pointer, if present,
follows name on next word boundary, or maybe it
precedes next dirent, which is on a word boundary. */
struct address_space *mapping = &inode->i_data;
const pgoff_t start = lstart >> huge_page_shift(h);
const pgoff_t end = lend >> huge_page_shift(h);
- struct vm_area_struct pseudo_vma;
struct pagevec pvec;
pgoff_t next, index;
int i, freed = 0;
bool truncate_op = (lend == LLONG_MAX);
- vma_init(&pseudo_vma, current->mm);
- pseudo_vma.vm_flags = (VM_HUGETLB | VM_MAYSHARE | VM_SHARED);
pagevec_init(&pvec);
next = start;
while (next < end) {
for (i = 0; i < pagevec_count(&pvec); ++i) {
struct page *page = pvec.pages[i];
- u32 hash;
+ u32 hash = 0;
index = page->index;
- hash = hugetlb_fault_mutex_hash(mapping, index);
if (!truncate_op) {
/*
* Only need to hold the fault mutex in the
* page faults. Races are not possible in the
* case of truncation.
*/
+ hash = hugetlb_fault_mutex_hash(mapping, index);
mutex_lock(&hugetlb_fault_mutex_table[hash]);
}
if (!h)
return -1;
- return h - hstates;
+ return hstate_index(h);
}
/*
*/
xa_lock_irq(&inode->i_data.i_pages);
BUG_ON(inode->i_data.nrpages);
- BUG_ON(inode->i_data.nrexceptional);
+ /*
+ * Almost always, mapping_empty(&inode->i_data) here; but there are
+ * two known and long-standing ways in which nodes may get left behind
+ * (when deep radix-tree node allocation failed partway; or when THP
+ * collapse_file() failed). Until those two known cases are cleaned up,
+ * or a cleanup function is called here, do not BUG_ON(!mapping_empty),
+ * nor even WARN_ON(!mapping_empty).
+ */
xa_unlock_irq(&inode->i_data.i_pages);
BUG_ON(!list_empty(&inode->i_data.private_list));
BUG_ON(!(inode->i_state & I_FREEING));
struct io_buffer {
struct list_head list;
__u64 addr;
- __s32 len;
+ __u32 len;
__u16 bid;
};
spinlock_t rsrc_ref_lock;
struct io_rsrc_node *rsrc_node;
struct io_rsrc_node *rsrc_backup_node;
+ struct io_mapped_ubuf *dummy_ubuf;
struct io_restriction restrictions;
REQ_F_FORCE_ASYNC_BIT = IOSQE_ASYNC_BIT,
REQ_F_BUFFER_SELECT_BIT = IOSQE_BUFFER_SELECT_BIT,
- REQ_F_FAIL_LINK_BIT,
+ /* first byte is taken by user flags, shift it to not overlap */
+ REQ_F_FAIL_LINK_BIT = 8,
REQ_F_INFLIGHT_BIT,
REQ_F_CUR_POS_BIT,
REQ_F_NOWAIT_BIT,
goto err;
__hash_init(ctx->cancel_hash, 1U << hash_bits);
+ ctx->dummy_ubuf = kzalloc(sizeof(*ctx->dummy_ubuf), GFP_KERNEL);
+ if (!ctx->dummy_ubuf)
+ goto err;
+ /* set invalid range, so io_import_fixed() fails meeting it */
+ ctx->dummy_ubuf->ubuf = -1UL;
+
if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free,
PERCPU_REF_ALLOW_REINIT, GFP_KERNEL))
goto err;
INIT_LIST_HEAD(&ctx->submit_state.comp.locked_free_list);
return ctx;
err:
+ kfree(ctx->dummy_ubuf);
kfree(ctx->cancel_hash);
kfree(ctx);
return NULL;
break;
buf->addr = addr;
- buf->len = pbuf->len;
+ buf->len = min_t(__u32, pbuf->len, MAX_RW_COUNT);
buf->bid = bid;
addr += pbuf->len;
bid++;
req->work.creds = NULL;
/* enforce forwards compatibility on users */
- if (unlikely(sqe_flags & ~SQE_VALID_FLAGS)) {
- req->flags = 0;
+ if (unlikely(sqe_flags & ~SQE_VALID_FLAGS))
return -EINVAL;
- }
-
if (unlikely(req->opcode >= IORING_OP_LAST))
return -EINVAL;
-
if (unlikely(!io_check_restriction(ctx, req, sqe_flags)))
return -EACCES;
io_ring_submit_lock(ctx, lock_ring);
spin_lock_irqsave(&ctx->completion_lock, flags);
io_cqring_fill_event(ctx, prsrc->tag, 0, 0);
+ ctx->cq_extra++;
io_commit_cqring(ctx);
spin_unlock_irqrestore(&ctx->completion_lock, flags);
io_cqring_ev_posted(ctx);
struct io_mapped_ubuf *imu = *slot;
unsigned int i;
- for (i = 0; i < imu->nr_bvecs; i++)
- unpin_user_page(imu->bvec[i].bv_page);
- if (imu->acct_pages)
- io_unaccount_mem(ctx, imu->acct_pages);
- kvfree(imu);
+ if (imu != ctx->dummy_ubuf) {
+ for (i = 0; i < imu->nr_bvecs; i++)
+ unpin_user_page(imu->bvec[i].bv_page);
+ if (imu->acct_pages)
+ io_unaccount_mem(ctx, imu->acct_pages);
+ kvfree(imu);
+ }
*slot = NULL;
}
for (i = 0; i < ctx->nr_user_bufs; i++)
io_buffer_unmap(ctx, &ctx->user_bufs[i]);
kfree(ctx->user_bufs);
- kfree(ctx->buf_data);
+ io_rsrc_data_free(ctx->buf_data);
ctx->user_bufs = NULL;
ctx->buf_data = NULL;
ctx->nr_user_bufs = 0;
size_t size;
int ret, pret, nr_pages, i;
+ if (!iov->iov_base) {
+ *pimu = ctx->dummy_ubuf;
+ return 0;
+ }
+
ubuf = (unsigned long) iov->iov_base;
end = (ubuf + iov->iov_len + PAGE_SIZE - 1) >> PAGE_SHIFT;
start = ubuf >> PAGE_SHIFT;
* constraints here, we'll -EINVAL later when IO is
* submitted if they are wrong.
*/
- if (!iov->iov_base || !iov->iov_len)
+ if (!iov->iov_base)
+ return iov->iov_len ? -EFAULT : 0;
+ if (!iov->iov_len)
return -EFAULT;
/* arbitrary limit, but we need something */
return -ENOMEM;
ret = io_buffers_map_alloc(ctx, nr_args);
if (ret) {
- kfree(data);
+ io_rsrc_data_free(data);
return ret;
}
ret = io_buffer_validate(&iov);
if (ret)
break;
+ if (!iov.iov_base && tag) {
+ ret = -EINVAL;
+ break;
+ }
ret = io_sqe_buffer_register(ctx, &iov, &ctx->user_bufs[i],
&last_hpage);
err = io_buffer_validate(&iov);
if (err)
break;
+ if (!iov.iov_base && tag) {
+ err = -EINVAL;
+ break;
+ }
err = io_sqe_buffer_register(ctx, &iov, &imu, &last_hpage);
if (err)
break;
i = array_index_nospec(offset, ctx->nr_user_bufs);
- if (ctx->user_bufs[i]) {
+ if (ctx->user_bufs[i] != ctx->dummy_ubuf) {
err = io_queue_rsrc_removal(ctx->buf_data, offset,
ctx->rsrc_node, ctx->user_bufs[i]);
if (unlikely(err)) {
if (ctx->hash_map)
io_wq_put_hash(ctx->hash_map);
kfree(ctx->cancel_hash);
+ kfree(ctx->dummy_ubuf);
kfree(ctx);
}
if (ret)
goto err;
/* always set a rsrc node */
- io_rsrc_node_switch_start(ctx);
+ ret = io_rsrc_node_switch_start(ctx);
+ if (ret)
+ goto err;
io_rsrc_node_switch(ctx, NULL);
memset(&p->sq_off, 0, sizeof(p->sq_off));
BUILD_BUG_SQE_ELEM(42, __u16, personality);
BUILD_BUG_SQE_ELEM(44, __s32, splice_fd_in);
+ BUILD_BUG_ON(sizeof(struct io_uring_files_update) !=
+ sizeof(struct io_uring_rsrc_update));
+ BUILD_BUG_ON(sizeof(struct io_uring_rsrc_update) >
+ sizeof(struct io_uring_rsrc_update2));
+ /* should fit into one byte */
+ BUILD_BUG_ON(SQE_VALID_FLAGS >= (1 << 8));
+
BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
BUILD_BUG_ON(__REQ_F_LAST_BIT >= 8 * sizeof(int));
req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC |
}
void
-iomap_ioend_try_merge(struct iomap_ioend *ioend, struct list_head *more_ioends,
- void (*merge_private)(struct iomap_ioend *ioend,
- struct iomap_ioend *next))
+iomap_ioend_try_merge(struct iomap_ioend *ioend, struct list_head *more_ioends)
{
struct iomap_ioend *next;
break;
list_move_tail(&next->io_list, &ioend->io_list);
ioend->io_size += next->io_size;
- if (next->io_private && merge_private)
- merge_private(ioend, next);
}
}
EXPORT_SYMBOL_GPL(iomap_ioend_try_merge);
ioend->io_inode = inode;
ioend->io_size = 0;
ioend->io_offset = offset;
- ioend->io_private = NULL;
ioend->io_bio = bio;
return ioend;
}
rs.cont_extent = isonum_733(rr->u.CE.extent);
rs.cont_offset = isonum_733(rr->u.CE.offset);
rs.cont_size = isonum_733(rr->u.CE.size);
+ break;
default:
break;
}
.mmap = generic_file_readonly_mmap,
.fsync = jffs2_fsync,
.splice_read = generic_file_splice_read,
+ .splice_write = iter_file_splice_write,
};
/* jffs2_file_inode_operations */
memcpy(&fd->name, rd->name, checkedlen);
fd->name[checkedlen] = 0;
- crc = crc32(0, fd->name, rd->nsize);
+ crc = crc32(0, fd->name, checkedlen);
if (crc != je32_to_cpu(rd->name_crc)) {
pr_notice("%s(): Name CRC failed on node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
__func__, ofs, je32_to_cpu(rd->name_crc), crc);
#define jffs2_sum_active() (0)
#define jffs2_sum_init(a) (0)
-#define jffs2_sum_exit(a)
+#define jffs2_sum_exit(a) do { } while (0)
#define jffs2_sum_disable_collecting(a)
#define jffs2_sum_is_disabled(a) (0)
-#define jffs2_sum_reset_collected(a)
+#define jffs2_sum_reset_collected(a) do { } while (0)
#define jffs2_sum_add_kvec(a,b,c,d) (0)
-#define jffs2_sum_move_collected(a,b)
+#define jffs2_sum_move_collected(a,b) do { } while (0)
#define jffs2_sum_write_sumnode(a) (0)
-#define jffs2_sum_add_padding_mem(a,b)
-#define jffs2_sum_add_inode_mem(a,b,c)
-#define jffs2_sum_add_dirent_mem(a,b,c)
-#define jffs2_sum_add_xattr_mem(a,b,c)
-#define jffs2_sum_add_xref_mem(a,b,c)
+#define jffs2_sum_add_padding_mem(a,b) do { } while (0)
+#define jffs2_sum_add_inode_mem(a,b,c) do { } while (0)
+#define jffs2_sum_add_dirent_mem(a,b,c) do { } while (0)
+#define jffs2_sum_add_xattr_mem(a,b,c) do { } while (0)
+#define jffs2_sum_add_xref_mem(a,b,c) do { } while (0)
#define jffs2_sum_scan_sumnode(a,b,c,d,e) (0)
#endif /* CONFIG_JFFS2_SUMMARY */
if (flags & FL_LAYOUT)
return 0;
+ if (flags & FL_DELEG)
+ /* We leave these checks to the caller */
+ return 0;
if (arg == F_RDLCK)
return inode_is_open_for_write(inode) ? -EAGAIN : 0;
list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) {
if (!pnfs_layout_is_valid(lo))
continue;
- if (stateid != NULL &&
- !nfs4_stateid_match_other(stateid, &lo->plh_stateid))
+ if (!nfs4_stateid_match_other(stateid, &lo->plh_stateid))
continue;
- if (!nfs_sb_active(server->super))
- continue;
- inode = igrab(lo->plh_inode);
+ if (nfs_sb_active(server->super))
+ inode = igrab(lo->plh_inode);
+ else
+ inode = ERR_PTR(-EAGAIN);
rcu_read_unlock();
if (inode)
return inode;
continue;
if (nfsi->layout != lo)
continue;
- if (!nfs_sb_active(server->super))
- continue;
- inode = igrab(lo->plh_inode);
+ if (nfs_sb_active(server->super))
+ inode = igrab(lo->plh_inode);
+ else
+ inode = ERR_PTR(-EAGAIN);
rcu_read_unlock();
if (inode)
return inode;
to->to_maxval = to->to_initval;
to->to_exponential = 0;
break;
-#ifndef CONFIG_NFS_DISABLE_UDP_SUPPORT
case XPRT_TRANSPORT_UDP:
if (retrans == NFS_UNSPEC_RETRANS)
to->to_retries = NFS_DEF_UDP_RETRANS;
to->to_maxval = NFS_MAX_UDP_TIMEOUT;
to->to_exponential = 1;
break;
-#endif
default:
BUG();
}
/* Initialise the client representation from the mount data */
server->flags = ctx->flags;
server->options = ctx->options;
- server->caps |= NFS_CAP_HARDLINKS|NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
- NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|NFS_CAP_OWNER_GROUP|
- NFS_CAP_ATIME|NFS_CAP_CTIME|NFS_CAP_MTIME;
+ server->caps |= NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS;
+
+ switch (clp->rpc_ops->version) {
+ case 2:
+ server->fattr_valid = NFS_ATTR_FATTR_V2;
+ break;
+ case 3:
+ server->fattr_valid = NFS_ATTR_FATTR_V3;
+ break;
+ default:
+ server->fattr_valid = NFS_ATTR_FATTR_V4;
+ }
if (ctx->rsize)
server->rsize = nfs_block_size(ctx->rsize, NULL);
server->maxfilesize = fsinfo->maxfilesize;
server->time_delta = fsinfo->time_delta;
+ server->change_attr_type = fsinfo->change_attr_type;
server->clone_blksize = fsinfo->clone_blksize;
/* We're airborne Set socket buffersize */
return NULL;
}
+ server->change_attr_type = NFS4_CHANGE_TYPE_IS_UNDEFINED;
+
ida_init(&server->openowner_id);
ida_init(&server->lockowner_id);
pnfs_init_server(server);
return ret;
}
/**
- * nfs_have_delegation - check if inode has a delegation, mark it
+ * nfs4_have_delegation - check if inode has a delegation, mark it
* NFS_DELEGATION_REFERENCED if there is one.
* @inode: inode to check
* @flags: delegation types to check for
if (freeme == NULL)
goto out;
add_new:
+ /*
+ * If we didn't revalidate the change attribute before setting
+ * the delegation, then pre-emptively ask for a full attribute
+ * cache revalidation.
+ */
+ spin_lock(&inode->i_lock);
+ if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_CHANGE)
+ nfs_set_cache_invalid(inode,
+ NFS_INO_INVALID_ATIME | NFS_INO_INVALID_CTIME |
+ NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE |
+ NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_NLINK |
+ NFS_INO_INVALID_OTHER | NFS_INO_INVALID_DATA |
+ NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL |
+ NFS_INO_INVALID_XATTR);
+ spin_unlock(&inode->i_lock);
+
list_add_tail_rcu(&delegation->super_list, &server->delegations);
rcu_assign_pointer(nfsi->delegation, delegation);
delegation = NULL;
atomic_long_inc(&nfs_active_delegations);
trace_nfs4_set_delegation(inode, type);
-
- spin_lock(&inode->i_lock);
- if (NFS_I(inode)->cache_validity & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME))
- NFS_I(inode)->cache_validity |= NFS_INO_REVAL_FORCED;
- spin_unlock(&inode->i_lock);
out:
spin_unlock(&clp->cl_lock);
if (delegation != NULL)
}
/**
- * nfs_inode_return_delegation - synchronously return a delegation
+ * nfs4_inode_return_delegation - synchronously return a delegation
* @inode: inode to process
*
* This routine will always flush any dirty data to disk on the
}
/**
- * nfs_inode_return_delegation_on_close - asynchronously return a delegation
+ * nfs4_inode_return_delegation_on_close - asynchronously return a delegation
* @inode: inode to process
*
* This routine is called on file close in order to determine if the
}
/**
- * nfs_super_return_all_delegations - return delegations for one superblock
+ * nfs_server_return_all_delegations - return delegations for one superblock
* @server: pointer to nfs_server to process
*
*/
static inline int nfs_have_delegated_attributes(struct inode *inode)
{
- return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ) &&
- !(NFS_I(inode)->cache_validity & NFS_INO_REVAL_FORCED);
+ return NFS_PROTO(inode)->have_delegation(inode, FMODE_READ);
}
#endif
break;
}
+ verf_arg = verf_res;
+
status = nfs_readdir_page_filler(desc, entry, pages, pglen,
arrays, narrays);
} while (!status && nfs_readdir_page_needs_filling(page));
}
return res;
}
- memcpy(nfsi->cookieverf, verf, sizeof(nfsi->cookieverf));
+ /*
+ * Set the cookie verifier if the page cache was empty
+ */
+ if (desc->page_index == 0)
+ memcpy(nfsi->cookieverf, verf,
+ sizeof(nfsi->cookieverf));
}
res = nfs_readdir_search_array(desc);
if (res == 0) {
/*
* Once we've found the start of the dirent within a page: fill 'er up...
*/
-static void nfs_do_filldir(struct nfs_readdir_descriptor *desc)
+static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
+ const __be32 *verf)
{
struct file *file = desc->file;
- struct nfs_inode *nfsi = NFS_I(file_inode(file));
struct nfs_cache_array *array;
unsigned int i = 0;
desc->eof = true;
break;
}
- memcpy(desc->verf, nfsi->cookieverf, sizeof(desc->verf));
+ memcpy(desc->verf, verf, sizeof(desc->verf));
if (i < (array->size-1))
desc->dir_cookie = array->array[i+1].cookie;
else
for (i = 0; !desc->eof && i < sz && arrays[i]; i++) {
desc->page = arrays[i];
- nfs_do_filldir(desc);
+ nfs_do_filldir(desc, verf);
}
desc->page = NULL;
{
struct dentry *dentry = file_dentry(file);
struct inode *inode = d_inode(dentry);
+ struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_open_dir_context *dir_ctx = file->private_data;
struct nfs_readdir_descriptor *desc;
int res;
break;
}
if (res == -ETOOSMALL && desc->plus) {
- clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
+ clear_bit(NFS_INO_ADVISE_RDPLUS, &nfsi->flags);
nfs_zap_caches(inode);
desc->page_index = 0;
desc->plus = false;
if (res < 0)
break;
- nfs_do_filldir(desc);
+ nfs_do_filldir(desc, nfsi->cookieverf);
nfs_readdir_page_unlock_and_put_cached(desc);
} while (!desc->eof);
NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter();
nfs_set_cache_invalid(
inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME |
- NFS_INO_INVALID_OTHER | NFS_INO_REVAL_FORCED);
+ NFS_INO_INVALID_NLINK);
spin_unlock(&inode->i_lock);
}
if (S_ISDIR(inode->i_mode))
return 0;
- if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_OTHER)) {
+ if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_MODE)) {
if (mask & MAY_NOT_BLOCK)
return -ECHILD;
ret = __nfs_revalidate_inode(server, inode);
if (mask & MAY_NOT_BLOCK)
return -ECHILD;
- res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ res = nfs_revalidate_inode(inode, NFS_INO_INVALID_MODE |
+ NFS_INO_INVALID_OTHER);
if (res == 0)
res = generic_permission(&init_user_ns, inode, mask);
goto out;
}
EXPORT_SYMBOL_GPL(nfs_permission);
-
-/*
- * Local variables:
- * version-control: t
- * kept-new-versions: 5
- * End:
- */
static u64 nfs_fetch_iversion(struct inode *inode)
{
- struct nfs_server *server = NFS_SERVER(inode);
-
- /* Is this the right call?: */
- nfs_revalidate_inode(server, inode);
- /*
- * Also, note we're ignoring any returned error. That seems to be
- * the practice for cache consistency information elsewhere in
- * the server, but I'm not sure why.
- */
- if (server->nfs_client->rpc_ops->version >= 4)
- return inode_peek_iversion_raw(inode);
- else
- return time_to_chattr(&inode->i_ctime);
+ nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
+ return inode_peek_iversion_raw(inode);
}
const struct export_operations nfs_export_ops = {
if (filp->f_flags & O_DIRECT)
goto force_reval;
- if (nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE))
+ if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_SIZE))
goto force_reval;
return 0;
force_reval:
if (unlikely(!p))
return -ENOBUFS;
fh->size = be32_to_cpup(p++);
- if (fh->size > sizeof(struct nfs_fh)) {
+ if (fh->size > NFS_MAXFHSIZE) {
printk(KERN_ERR "NFS flexfiles: Too big fh received %d\n",
fh->size);
return -EOVERFLOW;
return 0;
}
+#ifdef CONFIG_NFS_DISABLE_UDP_SUPPORT
+static bool nfs_server_transport_udp_invalid(const struct nfs_fs_context *ctx)
+{
+ return true;
+}
+#else
+static bool nfs_server_transport_udp_invalid(const struct nfs_fs_context *ctx)
+{
+ if (ctx->version == 4)
+ return true;
+ return false;
+}
+#endif
+
/*
* Sanity check the NFS transport protocol.
- *
*/
-static void nfs_validate_transport_protocol(struct nfs_fs_context *ctx)
+static int nfs_validate_transport_protocol(struct fs_context *fc,
+ struct nfs_fs_context *ctx)
{
switch (ctx->nfs_server.protocol) {
case XPRT_TRANSPORT_UDP:
+ if (nfs_server_transport_udp_invalid(ctx))
+ goto out_invalid_transport_udp;
+ break;
case XPRT_TRANSPORT_TCP:
case XPRT_TRANSPORT_RDMA:
break;
default:
ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP;
}
+ return 0;
+out_invalid_transport_udp:
+ return nfs_invalf(fc, "NFS: Unsupported transport protocol udp");
}
/*
*/
static void nfs_set_mount_transport_protocol(struct nfs_fs_context *ctx)
{
- nfs_validate_transport_protocol(ctx);
-
if (ctx->mount_server.protocol == XPRT_TRANSPORT_UDP ||
ctx->mount_server.protocol == XPRT_TRANSPORT_TCP)
return;
struct nfs_fh *mntfh = ctx->mntfh;
struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
int extra_flags = NFS_MOUNT_LEGACY_INTERFACE;
+ int ret;
if (data == NULL)
goto out_no_data;
memset(mntfh->data + mntfh->size, 0,
sizeof(mntfh->data) - mntfh->size);
+ /*
+ * for proto == XPRT_TRANSPORT_UDP, which is what uses
+ * to_exponential, implying shift: limit the shift value
+ * to BITS_PER_LONG (majortimeo is unsigned long)
+ */
+ if (!(data->flags & NFS_MOUNT_TCP)) /* this will be UDP */
+ if (data->retrans >= 64) /* shift value is too large */
+ goto out_invalid_data;
+
/*
* Translate to nfs_fs_context, which nfs_fill_super
* can deal with.
goto generic;
}
+ ret = nfs_validate_transport_protocol(fc, ctx);
+ if (ret)
+ return ret;
+
ctx->skip_reconfig_option_check = true;
return 0;
out_invalid_fh:
return nfs_invalf(fc, "NFS: invalid root filehandle");
+
+out_invalid_data:
+ return nfs_invalf(fc, "NFS: invalid binary mount data");
}
#if IS_ENABLED(CONFIG_NFS_V4)
{
struct nfs_fs_context *ctx = nfs_fc2context(fc);
struct sockaddr *sap = (struct sockaddr *)&ctx->nfs_server.address;
+ int ret;
char *c;
if (!data) {
ctx->acdirmin = data->acdirmin;
ctx->acdirmax = data->acdirmax;
ctx->nfs_server.protocol = data->proto;
- nfs_validate_transport_protocol(ctx);
- if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP)
- goto out_invalid_transport_udp;
+ ret = nfs_validate_transport_protocol(fc, ctx);
+ if (ret)
+ return ret;
done:
ctx->skip_reconfig_option_check = true;
return 0;
out_no_address:
return nfs_invalf(fc, "NFS4: mount program didn't pass remote address");
-
-out_invalid_transport_udp:
- return nfs_invalf(fc, "NFS: Unsupported transport protocol udp");
}
#endif
if (!nfs_verify_server_address(sap))
goto out_no_address;
+ ret = nfs_validate_transport_protocol(fc, ctx);
+ if (ret)
+ return ret;
+
if (ctx->version == 4) {
if (IS_ENABLED(CONFIG_NFS_V4)) {
if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA)
port = NFS_PORT;
max_namelen = NFS4_MAXNAMLEN;
max_pathlen = NFS4_MAXPATHLEN;
- nfs_validate_transport_protocol(ctx);
- if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP)
- goto out_invalid_transport_udp;
ctx->flags &= ~(NFS_MOUNT_NONLM | NFS_MOUNT_NOACL |
NFS_MOUNT_VER3 | NFS_MOUNT_LOCAL_FLOCK |
NFS_MOUNT_LOCAL_FCNTL);
}
} else {
nfs_set_mount_transport_protocol(ctx);
-#ifdef CONFIG_NFS_DISABLE_UDP_SUPPORT
- if (ctx->nfs_server.protocol == XPRT_TRANSPORT_UDP)
- goto out_invalid_transport_udp;
-#endif
if (ctx->nfs_server.protocol == XPRT_TRANSPORT_RDMA)
port = NFS_RDMA_PORT;
}
out_v4_not_compiled:
nfs_errorf(fc, "NFS: NFSv4 is not compiled into kernel");
return -EPROTONOSUPPORT;
-out_invalid_transport_udp:
- return nfs_invalf(fc, "NFS: Unsupported transport protocol udp");
out_no_address:
return nfs_invalf(fc, "NFS: mount program didn't pass remote address");
out_mountproto_mismatch:
return !time_in_range_open(jiffies, nfsi->read_cache_jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo);
}
-static bool nfs_check_cache_invalid_delegated(struct inode *inode, unsigned long flags)
+static bool nfs_check_cache_flags_invalid(struct inode *inode,
+ unsigned long flags)
{
unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
- /* Special case for the pagecache or access cache */
- if (flags == NFS_INO_REVAL_PAGECACHE &&
- !(cache_validity & NFS_INO_REVAL_FORCED))
- return false;
return (cache_validity & flags) != 0;
}
-static bool nfs_check_cache_invalid_not_delegated(struct inode *inode, unsigned long flags)
-{
- unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
-
- if ((cache_validity & flags) != 0)
- return true;
- if (nfs_attribute_timeout(inode))
- return true;
- return false;
-}
-
bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags)
{
- if (NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
- return nfs_check_cache_invalid_delegated(inode, flags);
-
- return nfs_check_cache_invalid_not_delegated(inode, flags);
+ if (nfs_check_cache_flags_invalid(inode, flags))
+ return true;
+ return nfs_attribute_cache_expired(inode);
}
EXPORT_SYMBOL_GPL(nfs_check_cache_invalid);
if (have_delegation) {
if (!(flags & NFS_INO_REVAL_FORCED))
- flags &= ~NFS_INO_INVALID_OTHER;
- flags &= ~(NFS_INO_INVALID_CHANGE
- | NFS_INO_INVALID_SIZE
- | NFS_INO_REVAL_PAGECACHE
- | NFS_INO_INVALID_XATTR);
- }
+ flags &= ~(NFS_INO_INVALID_MODE |
+ NFS_INO_INVALID_OTHER |
+ NFS_INO_INVALID_XATTR);
+ flags &= ~(NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE);
+ } else if (flags & NFS_INO_REVAL_PAGECACHE)
+ flags |= NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE;
if (!nfs_has_xattr_cache(nfsi))
flags &= ~NFS_INO_INVALID_XATTR;
+ if (flags & NFS_INO_INVALID_DATA)
+ nfs_fscache_invalidate(inode);
if (inode->i_mapping->nrpages == 0)
flags &= ~(NFS_INO_INVALID_DATA|NFS_INO_DATA_INVAL_DEFER);
+ flags &= ~(NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED);
nfsi->cache_validity |= flags;
- if (flags & NFS_INO_INVALID_DATA)
- nfs_fscache_invalidate(inode);
}
EXPORT_SYMBOL_GPL(nfs_set_cache_invalid);
.fattr = fattr
};
struct inode *inode = ERR_PTR(-ENOENT);
+ u64 fattr_supported = NFS_SB(sb)->fattr_valid;
unsigned long hash;
nfs_attr_check_mountpoint(sb, fattr);
inode->i_mode = fattr->mode;
nfsi->cache_validity = 0;
if ((fattr->valid & NFS_ATTR_FATTR_MODE) == 0
- && nfs_server_capable(inode, NFS_CAP_MODE))
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
+ && (fattr_supported & NFS_ATTR_FATTR_MODE))
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE);
/* Why so? Because we want revalidate for devices/FIFOs, and
* that's precisely what we have in nfs_file_inode_operations.
*/
nfsi->attr_gencount = fattr->gencount;
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
inode->i_atime = fattr->atime;
- else if (nfs_server_capable(inode, NFS_CAP_ATIME))
+ else if (fattr_supported & NFS_ATTR_FATTR_ATIME)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATIME);
if (fattr->valid & NFS_ATTR_FATTR_MTIME)
inode->i_mtime = fattr->mtime;
- else if (nfs_server_capable(inode, NFS_CAP_MTIME))
+ else if (fattr_supported & NFS_ATTR_FATTR_MTIME)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
if (fattr->valid & NFS_ATTR_FATTR_CTIME)
inode->i_ctime = fattr->ctime;
- else if (nfs_server_capable(inode, NFS_CAP_CTIME))
+ else if (fattr_supported & NFS_ATTR_FATTR_CTIME)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_CTIME);
if (fattr->valid & NFS_ATTR_FATTR_CHANGE)
inode_set_iversion_raw(inode, fattr->change_attr);
nfs_set_cache_invalid(inode, NFS_INO_INVALID_SIZE);
if (fattr->valid & NFS_ATTR_FATTR_NLINK)
set_nlink(inode, fattr->nlink);
- else if (nfs_server_capable(inode, NFS_CAP_NLINK))
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
+ else if (fattr_supported & NFS_ATTR_FATTR_NLINK)
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_NLINK);
if (fattr->valid & NFS_ATTR_FATTR_OWNER)
inode->i_uid = fattr->uid;
- else if (nfs_server_capable(inode, NFS_CAP_OWNER))
+ else if (fattr_supported & NFS_ATTR_FATTR_OWNER)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
if (fattr->valid & NFS_ATTR_FATTR_GROUP)
inode->i_gid = fattr->gid;
- else if (nfs_server_capable(inode, NFS_CAP_OWNER_GROUP))
+ else if (fattr_supported & NFS_ATTR_FATTR_GROUP)
nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
if (nfs_server_capable(inode, NFS_CAP_XATTR))
nfs_set_cache_invalid(inode, NFS_INO_INVALID_XATTR);
if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
inode->i_blocks = fattr->du.nfs2.blocks;
+ else if (fattr_supported & NFS_ATTR_FATTR_BLOCKS_USED &&
+ fattr->size != 0)
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
if (fattr->valid & NFS_ATTR_FATTR_SPACE_USED) {
/*
* report the blocks in 512byte units
*/
inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
- }
-
- if (nfsi->cache_validity != 0)
- nfsi->cache_validity |= NFS_INO_REVAL_FORCED;
+ } else if (fattr_supported & NFS_ATTR_FATTR_SPACE_USED &&
+ fattr->size != 0)
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_BLOCKS);
nfs_setsecurity(inode, fattr, label);
}
/* Optimization: if the end result is no change, don't RPC */
- attr->ia_valid &= NFS_VALID_ATTRS;
- if ((attr->ia_valid & ~(ATTR_FILE|ATTR_OPEN)) == 0)
+ if (((attr->ia_valid & NFS_VALID_ATTRS) & ~(ATTR_FILE|ATTR_OPEN)) == 0)
return 0;
trace_nfs_setattr_enter(inode);
spin_lock(&inode->i_lock);
NFS_I(inode)->attr_gencount = fattr->gencount;
if ((attr->ia_valid & ATTR_SIZE) != 0) {
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_MTIME |
+ NFS_INO_INVALID_BLOCKS);
nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC);
nfs_vmtruncate(inode, attr->ia_size);
}
if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) {
NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_CTIME;
+ if ((attr->ia_valid & ATTR_KILL_SUID) != 0 &&
+ inode->i_mode & S_ISUID)
+ inode->i_mode &= ~S_ISUID;
+ if ((attr->ia_valid & ATTR_KILL_SGID) != 0 &&
+ (inode->i_mode & (S_ISGID | S_IXGRP)) ==
+ (S_ISGID | S_IXGRP))
+ inode->i_mode &= ~S_ISGID;
if ((attr->ia_valid & ATTR_MODE) != 0) {
int mode = attr->ia_mode & S_IALLUGO;
mode |= inode->i_mode & ~S_IALLUGO;
dput(parent);
}
-static bool nfs_need_revalidate_inode(struct inode *inode)
+static u32 nfs_get_valid_attrmask(struct inode *inode)
{
- if (NFS_I(inode)->cache_validity &
- (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL))
- return true;
- if (nfs_attribute_cache_expired(inode))
- return true;
- return false;
+ unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
+ u32 reply_mask = STATX_INO | STATX_TYPE;
+
+ if (!(cache_validity & NFS_INO_INVALID_ATIME))
+ reply_mask |= STATX_ATIME;
+ if (!(cache_validity & NFS_INO_INVALID_CTIME))
+ reply_mask |= STATX_CTIME;
+ if (!(cache_validity & NFS_INO_INVALID_MTIME))
+ reply_mask |= STATX_MTIME;
+ if (!(cache_validity & NFS_INO_INVALID_SIZE))
+ reply_mask |= STATX_SIZE;
+ if (!(cache_validity & NFS_INO_INVALID_NLINK))
+ reply_mask |= STATX_NLINK;
+ if (!(cache_validity & NFS_INO_INVALID_MODE))
+ reply_mask |= STATX_MODE;
+ if (!(cache_validity & NFS_INO_INVALID_OTHER))
+ reply_mask |= STATX_UID | STATX_GID;
+ if (!(cache_validity & NFS_INO_INVALID_BLOCKS))
+ reply_mask |= STATX_BLOCKS;
+ return reply_mask;
}
int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
trace_nfs_getattr_enter(inode);
+ request_mask &= STATX_TYPE | STATX_MODE | STATX_NLINK | STATX_UID |
+ STATX_GID | STATX_ATIME | STATX_MTIME | STATX_CTIME |
+ STATX_INO | STATX_SIZE | STATX_BLOCKS;
+
if ((query_flags & AT_STATX_DONT_SYNC) && !force_sync) {
nfs_readdirplus_parent_cache_hit(path->dentry);
- goto out_no_update;
+ goto out_no_revalidate;
}
/* Flush out writes to the server in order to update c/mtime. */
/* Check whether the cached attributes are stale */
do_update |= force_sync || nfs_attribute_cache_expired(inode);
cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
- do_update |= cache_validity &
- (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL);
+ do_update |= cache_validity & NFS_INO_INVALID_CHANGE;
if (request_mask & STATX_ATIME)
do_update |= cache_validity & NFS_INO_INVALID_ATIME;
- if (request_mask & (STATX_CTIME|STATX_MTIME))
- do_update |= cache_validity & NFS_INO_REVAL_PAGECACHE;
+ if (request_mask & STATX_CTIME)
+ do_update |= cache_validity & NFS_INO_INVALID_CTIME;
+ if (request_mask & STATX_MTIME)
+ do_update |= cache_validity & NFS_INO_INVALID_MTIME;
+ if (request_mask & STATX_SIZE)
+ do_update |= cache_validity & NFS_INO_INVALID_SIZE;
+ if (request_mask & STATX_NLINK)
+ do_update |= cache_validity & NFS_INO_INVALID_NLINK;
+ if (request_mask & STATX_MODE)
+ do_update |= cache_validity & NFS_INO_INVALID_MODE;
+ if (request_mask & (STATX_UID | STATX_GID))
+ do_update |= cache_validity & NFS_INO_INVALID_OTHER;
if (request_mask & STATX_BLOCKS)
do_update |= cache_validity & NFS_INO_INVALID_BLOCKS;
+
if (do_update) {
/* Update the attribute cache */
if (!(server->flags & NFS_MOUNT_NOAC))
nfs_readdirplus_parent_cache_hit(path->dentry);
out_no_revalidate:
/* Only return attributes that were revalidated. */
- stat->result_mask &= request_mask;
-out_no_update:
+ stat->result_mask = nfs_get_valid_attrmask(inode) | request_mask;
+
generic_fillattr(&init_user_ns, inode, stat);
stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
if (S_ISDIR(inode->i_mode))
{
struct nfs_inode *nfsi;
struct inode *inode;
- struct nfs_server *server;
if (!(ctx->mode & FMODE_WRITE))
return;
return;
if (!list_empty(&nfsi->open_files))
return;
- server = NFS_SERVER(inode);
- if (server->flags & NFS_MOUNT_NOCTO)
+ if (NFS_SERVER(inode)->flags & NFS_MOUNT_NOCTO)
return;
- nfs_revalidate_inode(server, inode);
+ nfs_revalidate_inode(inode,
+ NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE);
}
EXPORT_SYMBOL_GPL(nfs_close_context);
/**
* nfs_revalidate_inode - Revalidate the inode attributes
- * @server: pointer to nfs_server struct
* @inode: pointer to inode struct
+ * @flags: cache flags to check
*
* Updates inode attribute information by retrieving the data from the server.
*/
-int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
+int nfs_revalidate_inode(struct inode *inode, unsigned long flags)
{
- if (!nfs_need_revalidate_inode(inode))
+ if (!nfs_check_cache_invalid(inode, flags))
return NFS_STALE(inode) ? -ESTALE : 0;
- return __nfs_revalidate_inode(server, inode);
+ return __nfs_revalidate_inode(NFS_SERVER(inode), inode);
}
EXPORT_SYMBOL_GPL(nfs_revalidate_inode);
bool nfs_mapping_need_revalidate_inode(struct inode *inode)
{
- return nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE) ||
+ return nfs_check_cache_invalid(inode, NFS_INO_INVALID_CHANGE) ||
NFS_STALE(inode);
}
if (!nfs_file_has_buffered_writers(nfsi)) {
/* Verify a few of the more important attributes */
if ((fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 && !inode_eq_iversion_raw(inode, fattr->change_attr))
- invalid |= NFS_INO_INVALID_CHANGE
- | NFS_INO_REVAL_PAGECACHE;
+ invalid |= NFS_INO_INVALID_CHANGE;
ts = inode->i_mtime;
if ((fattr->valid & NFS_ATTR_FATTR_MTIME) && !timespec64_equal(&ts, &fattr->mtime))
cur_size = i_size_read(inode);
new_isize = nfs_size_to_loff_t(fattr->size);
if (cur_size != new_isize)
- invalid |= NFS_INO_INVALID_SIZE
- | NFS_INO_REVAL_PAGECACHE;
+ invalid |= NFS_INO_INVALID_SIZE;
}
}
/* Have any file permissions changed? */
if ((fattr->valid & NFS_ATTR_FATTR_MODE) && (inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO))
- invalid |= NFS_INO_INVALID_ACCESS
- | NFS_INO_INVALID_ACL
- | NFS_INO_INVALID_OTHER;
+ invalid |= NFS_INO_INVALID_MODE;
if ((fattr->valid & NFS_ATTR_FATTR_OWNER) && !uid_eq(inode->i_uid, fattr->uid))
- invalid |= NFS_INO_INVALID_ACCESS
- | NFS_INO_INVALID_ACL
- | NFS_INO_INVALID_OTHER;
+ invalid |= NFS_INO_INVALID_OTHER;
if ((fattr->valid & NFS_ATTR_FATTR_GROUP) && !gid_eq(inode->i_gid, fattr->gid))
- invalid |= NFS_INO_INVALID_ACCESS
- | NFS_INO_INVALID_ACL
- | NFS_INO_INVALID_OTHER;
+ invalid |= NFS_INO_INVALID_OTHER;
/* Has the link count changed? */
if ((fattr->valid & NFS_ATTR_FATTR_NLINK) && inode->i_nlink != fattr->nlink)
- invalid |= NFS_INO_INVALID_OTHER;
+ invalid |= NFS_INO_INVALID_NLINK;
ts = inode->i_atime;
if ((fattr->valid & NFS_ATTR_FATTR_ATIME) && !timespec64_equal(&ts, &fattr->atime))
#endif
/**
- * nfs_inode_attrs_need_update - check if the inode attributes need updating
+ * nfs_inode_attrs_cmp_generic - compare attributes
+ * @fattr: attributes
* @inode: pointer to inode
+ *
+ * Attempt to divine whether or not an RPC call reply carrying stale
+ * attributes got scheduled after another call carrying updated ones.
+ * Note also the check for wraparound of 'attr_gencount'
+ *
+ * The function returns '1' if it thinks the attributes in @fattr are
+ * more recent than the ones cached in @inode. Otherwise it returns
+ * the value '0'.
+ */
+static int nfs_inode_attrs_cmp_generic(const struct nfs_fattr *fattr,
+ const struct inode *inode)
+{
+ unsigned long attr_gencount = NFS_I(inode)->attr_gencount;
+
+ return (long)(fattr->gencount - attr_gencount) > 0 ||
+ (long)(attr_gencount - nfs_read_attr_generation_counter()) > 0;
+}
+
+/**
+ * nfs_inode_attrs_cmp_monotonic - compare attributes
* @fattr: attributes
+ * @inode: pointer to inode
*
* Attempt to divine whether or not an RPC call reply carrying stale
* attributes got scheduled after another call carrying updated ones.
*
- * To do so, the function first assumes that a more recent ctime means
- * that the attributes in fattr are newer, however it also attempt to
- * catch the case where ctime either didn't change, or went backwards
- * (if someone reset the clock on the server) by looking at whether
- * or not this RPC call was started after the inode was last updated.
- * Note also the check for wraparound of 'attr_gencount'
+ * We assume that the server observes monotonic semantics for
+ * the change attribute, so a larger value means that the attributes in
+ * @fattr are more recent, in which case the function returns the
+ * value '1'.
+ * A return value of '0' indicates no measurable change
+ * A return value of '-1' means that the attributes in @inode are
+ * more recent.
+ */
+static int nfs_inode_attrs_cmp_monotonic(const struct nfs_fattr *fattr,
+ const struct inode *inode)
+{
+ s64 diff = fattr->change_attr - inode_peek_iversion_raw(inode);
+ if (diff > 0)
+ return 1;
+ return diff == 0 ? 0 : -1;
+}
+
+/**
+ * nfs_inode_attrs_cmp_strict_monotonic - compare attributes
+ * @fattr: attributes
+ * @inode: pointer to inode
*
- * The function returns 'true' if it thinks the attributes in 'fattr' are
- * more recent than the ones cached in the inode.
+ * Attempt to divine whether or not an RPC call reply carrying stale
+ * attributes got scheduled after another call carrying updated ones.
*
+ * We assume that the server observes strictly monotonic semantics for
+ * the change attribute, so a larger value means that the attributes in
+ * @fattr are more recent, in which case the function returns the
+ * value '1'.
+ * A return value of '-1' means that the attributes in @inode are
+ * more recent or unchanged.
*/
-static int nfs_inode_attrs_need_update(const struct inode *inode, const struct nfs_fattr *fattr)
+static int nfs_inode_attrs_cmp_strict_monotonic(const struct nfs_fattr *fattr,
+ const struct inode *inode)
{
- const struct nfs_inode *nfsi = NFS_I(inode);
+ return nfs_inode_attrs_cmp_monotonic(fattr, inode) > 0 ? 1 : -1;
+}
- return ((long)fattr->gencount - (long)nfsi->attr_gencount) > 0 ||
- ((long)nfsi->attr_gencount - (long)nfs_read_attr_generation_counter() > 0);
+/**
+ * nfs_inode_attrs_cmp - compare attributes
+ * @fattr: attributes
+ * @inode: pointer to inode
+ *
+ * This function returns '1' if it thinks the attributes in @fattr are
+ * more recent than the ones cached in @inode. It returns '-1' if
+ * the attributes in @inode are more recent than the ones in @fattr,
+ * and it returns 0 if not sure.
+ */
+static int nfs_inode_attrs_cmp(const struct nfs_fattr *fattr,
+ const struct inode *inode)
+{
+ if (nfs_inode_attrs_cmp_generic(fattr, inode) > 0)
+ return 1;
+ switch (NFS_SERVER(inode)->change_attr_type) {
+ case NFS4_CHANGE_TYPE_IS_UNDEFINED:
+ break;
+ case NFS4_CHANGE_TYPE_IS_TIME_METADATA:
+ if (!(fattr->valid & NFS_ATTR_FATTR_CHANGE))
+ break;
+ return nfs_inode_attrs_cmp_monotonic(fattr, inode);
+ default:
+ if (!(fattr->valid & NFS_ATTR_FATTR_CHANGE))
+ break;
+ return nfs_inode_attrs_cmp_strict_monotonic(fattr, inode);
+ }
+ return 0;
+}
+
+/**
+ * nfs_inode_finish_partial_attr_update - complete a previous inode update
+ * @fattr: attributes
+ * @inode: pointer to inode
+ *
+ * Returns '1' if the last attribute update left the inode cached
+ * attributes in a partially unrevalidated state, and @fattr
+ * matches the change attribute of that partial update.
+ * Otherwise returns '0'.
+ */
+static int nfs_inode_finish_partial_attr_update(const struct nfs_fattr *fattr,
+ const struct inode *inode)
+{
+ const unsigned long check_valid =
+ NFS_INO_INVALID_ATIME | NFS_INO_INVALID_CTIME |
+ NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE |
+ NFS_INO_INVALID_BLOCKS | NFS_INO_INVALID_OTHER |
+ NFS_INO_INVALID_NLINK;
+ unsigned long cache_validity = NFS_I(inode)->cache_validity;
+
+ if (!(cache_validity & NFS_INO_INVALID_CHANGE) &&
+ (cache_validity & check_valid) != 0 &&
+ (fattr->valid & NFS_ATTR_FATTR_CHANGE) != 0 &&
+ nfs_inode_attrs_cmp_monotonic(fattr, inode) == 0)
+ return 1;
+ return 0;
}
-static int nfs_refresh_inode_locked(struct inode *inode, struct nfs_fattr *fattr)
+static int nfs_refresh_inode_locked(struct inode *inode,
+ struct nfs_fattr *fattr)
{
- int ret;
+ int attr_cmp = nfs_inode_attrs_cmp(fattr, inode);
+ int ret = 0;
trace_nfs_refresh_inode_enter(inode);
- if (nfs_inode_attrs_need_update(inode, fattr))
+ if (attr_cmp > 0 || nfs_inode_finish_partial_attr_update(fattr, inode))
ret = nfs_update_inode(inode, fattr);
- else
+ else if (attr_cmp == 0)
ret = nfs_check_inode_attributes(inode, fattr);
trace_nfs_refresh_inode_exit(inode, ret);
*/
int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr)
{
+ int attr_cmp = nfs_inode_attrs_cmp(fattr, inode);
int status;
/* Don't do a WCC update if these attributes are already stale */
- if ((fattr->valid & NFS_ATTR_FATTR) == 0 ||
- !nfs_inode_attrs_need_update(inode, fattr)) {
+ if (attr_cmp < 0)
+ return 0;
+ if ((fattr->valid & NFS_ATTR_FATTR) == 0 || !attr_cmp) {
fattr->valid &= ~(NFS_ATTR_FATTR_PRECHANGE
| NFS_ATTR_FATTR_PRESIZE
| NFS_ATTR_FATTR_PREMTIME
*/
static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
{
- struct nfs_server *server;
+ struct nfs_server *server = NFS_SERVER(inode);
struct nfs_inode *nfsi = NFS_I(inode);
loff_t cur_isize, new_isize;
+ u64 fattr_supported = server->fattr_valid;
unsigned long invalid = 0;
unsigned long now = jiffies;
unsigned long save_cache_validity;
goto out_err;
}
- server = NFS_SERVER(inode);
/* Update the fsid? */
if (S_ISDIR(inode->i_mode) && (fattr->valid & NFS_ATTR_FATTR_FSID) &&
!nfs_fsid_equal(&server->fsid, &fattr->fsid) &&
nfsi->cache_validity &= ~(NFS_INO_INVALID_ATTR
| NFS_INO_INVALID_ATIME
| NFS_INO_REVAL_FORCED
- | NFS_INO_REVAL_PAGECACHE
| NFS_INO_INVALID_BLOCKS);
/* Do atomic weak cache consistency updates */
nfs_wcc_update_inode(inode, fattr);
if (pnfs_layoutcommit_outstanding(inode)) {
- nfsi->cache_validity |= save_cache_validity & NFS_INO_INVALID_ATTR;
+ nfsi->cache_validity |=
+ save_cache_validity &
+ (NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME |
+ NFS_INO_INVALID_MTIME | NFS_INO_INVALID_SIZE |
+ NFS_INO_INVALID_BLOCKS);
cache_revalidated = false;
}
save_cache_validity |= NFS_INO_INVALID_CTIME
| NFS_INO_INVALID_MTIME
| NFS_INO_INVALID_SIZE
+ | NFS_INO_INVALID_BLOCKS
+ | NFS_INO_INVALID_NLINK
+ | NFS_INO_INVALID_MODE
| NFS_INO_INVALID_OTHER;
if (S_ISDIR(inode->i_mode))
nfs_force_lookup_revalidate(inode);
attr_changed = true;
}
} else {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_CHANGE
- | NFS_INO_REVAL_PAGECACHE
- | NFS_INO_REVAL_FORCED);
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_CHANGE;
cache_revalidated = false;
}
if (fattr->valid & NFS_ATTR_FATTR_MTIME) {
inode->i_mtime = fattr->mtime;
- } else if (server->caps & NFS_CAP_MTIME) {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_MTIME
- | NFS_INO_REVAL_FORCED);
+ } else if (fattr_supported & NFS_ATTR_FATTR_MTIME) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_MTIME;
cache_revalidated = false;
}
if (fattr->valid & NFS_ATTR_FATTR_CTIME) {
inode->i_ctime = fattr->ctime;
- } else if (server->caps & NFS_CAP_CTIME) {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_CTIME
- | NFS_INO_REVAL_FORCED);
+ } else if (fattr_supported & NFS_ATTR_FATTR_CTIME) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_CTIME;
cache_revalidated = false;
}
(long long)cur_isize,
(long long)new_isize);
}
+ if (new_isize == 0 &&
+ !(fattr->valid & (NFS_ATTR_FATTR_SPACE_USED |
+ NFS_ATTR_FATTR_BLOCKS_USED))) {
+ fattr->du.nfs3.used = 0;
+ fattr->valid |= NFS_ATTR_FATTR_SPACE_USED;
+ }
} else {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_SIZE
- | NFS_INO_REVAL_PAGECACHE
- | NFS_INO_REVAL_FORCED);
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_SIZE;
cache_revalidated = false;
}
-
if (fattr->valid & NFS_ATTR_FATTR_ATIME)
inode->i_atime = fattr->atime;
- else if (server->caps & NFS_CAP_ATIME) {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_ATIME
- | NFS_INO_REVAL_FORCED);
+ else if (fattr_supported & NFS_ATTR_FATTR_ATIME) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_ATIME;
cache_revalidated = false;
}
| NFS_INO_INVALID_ACL;
attr_changed = true;
}
- } else if (server->caps & NFS_CAP_MODE) {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_OTHER
- | NFS_INO_REVAL_FORCED);
+ } else if (fattr_supported & NFS_ATTR_FATTR_MODE) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_MODE;
cache_revalidated = false;
}
inode->i_uid = fattr->uid;
attr_changed = true;
}
- } else if (server->caps & NFS_CAP_OWNER) {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_OTHER
- | NFS_INO_REVAL_FORCED);
+ } else if (fattr_supported & NFS_ATTR_FATTR_OWNER) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_OTHER;
cache_revalidated = false;
}
inode->i_gid = fattr->gid;
attr_changed = true;
}
- } else if (server->caps & NFS_CAP_OWNER_GROUP) {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_OTHER
- | NFS_INO_REVAL_FORCED);
+ } else if (fattr_supported & NFS_ATTR_FATTR_GROUP) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_OTHER;
cache_revalidated = false;
}
set_nlink(inode, fattr->nlink);
attr_changed = true;
}
- } else if (server->caps & NFS_CAP_NLINK) {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_OTHER
- | NFS_INO_REVAL_FORCED);
+ } else if (fattr_supported & NFS_ATTR_FATTR_NLINK) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_NLINK;
cache_revalidated = false;
}
* report the blocks in 512byte units
*/
inode->i_blocks = nfs_calc_block_size(fattr->du.nfs3.used);
- } else if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED)
+ } else if (fattr_supported & NFS_ATTR_FATTR_SPACE_USED) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_BLOCKS;
+ cache_revalidated = false;
+ }
+
+ if (fattr->valid & NFS_ATTR_FATTR_BLOCKS_USED) {
inode->i_blocks = fattr->du.nfs2.blocks;
- else {
- nfsi->cache_validity |= save_cache_validity &
- (NFS_INO_INVALID_BLOCKS
- | NFS_INO_REVAL_FORCED);
+ } else if (fattr_supported & NFS_ATTR_FATTR_BLOCKS_USED) {
+ nfsi->cache_validity |=
+ save_cache_validity & NFS_INO_INVALID_BLOCKS;
cache_revalidated = false;
}
/* Update attrtimeo value if we're out of the unstable period */
if (attr_changed) {
- invalid &= ~NFS_INO_INVALID_ATTR;
nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE);
nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
nfsi->attrtimeo_timestamp = now;
nfsi->attrtimeo_timestamp = now;
}
/* Set the barrier to be more recent than this fattr */
- if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0)
+ if ((long)(fattr->gencount - nfsi->attr_gencount) > 0)
nfsi->attr_gencount = fattr->gencount;
}
struct net *net;
};
-extern int nfs_mount(struct nfs_mount_request *info);
+extern int nfs_mount(struct nfs_mount_request *info, int timeo, int retrans);
extern void nfs_umount(const struct nfs_mount_request *info);
/* client.c */
}
/**
- * nfs_end_io_direct - declare the file is being used for direct i/o
+ * nfs_start_io_direct - declare the file is being used for direct i/o
* @inode: file inode
*
* Declare that a direct I/O operation is about to start, and ensure
/**
* nfs_mount - Obtain an NFS file handle for the given host and path
* @info: pointer to mount request arguments
+ * @timeo: deciseconds the mount waits for a response before it retries
+ * @retrans: number of times the mount retries a request
*
- * Uses default timeout parameters specified by underlying transport. On
- * successful return, the auth_flavs list and auth_flav_len will be populated
- * with the list from the server or a faked-up list if the server didn't
- * provide one.
+ * Uses timeout parameters specified by caller. On successful return, the
+ * auth_flavs list and auth_flav_len will be populated with the list from the
+ * server or a faked-up list if the server didn't provide one.
*/
-int nfs_mount(struct nfs_mount_request *info)
+int nfs_mount(struct nfs_mount_request *info, int timeo, int retrans)
{
+ struct rpc_timeout mnt_timeout;
struct mountres result = {
.fh = info->fh,
.auth_count = info->auth_flav_len,
.protocol = info->protocol,
.address = info->sap,
.addrsize = info->salen,
+ .timeout = &mnt_timeout,
.servername = info->hostname,
.program = &mnt_program,
.version = info->version,
if (info->noresvport)
args.flags |= RPC_CLNT_CREATE_NONPRIVPORT;
+ nfs_init_timeout_values(&mnt_timeout, info->protocol, timeo, retrans);
mnt_clnt = rpc_create(&args);
if (IS_ERR(mnt_clnt))
goto out_clnt_err;
if (!nfs_server_capable(inode, NFS_CAP_ACLS))
return ERR_PTR(-EOPNOTSUPP);
- status = nfs_revalidate_inode(server, inode);
+ status = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
if (status < 0)
return ERR_PTR(status);
if (unlikely(!p))
return -EIO;
length = be32_to_cpup(p++);
- if (unlikely(length > NFS3_FHSIZE))
+ if (unlikely(length > NFS3_FHSIZE || length == 0))
goto out_toobig;
p = xdr_inline_decode(xdr, length);
if (unlikely(!p))
memcpy(fh->data, p, length);
return 0;
out_toobig:
- dprintk("NFS: file handle size (%u) too big\n", length);
+ trace_nfs_xdr_bad_filehandle(xdr, NFSERR_BADHANDLE);
return -E2BIG;
}
/* ignore properties */
result->lease_time = 0;
+ result->change_attr_type = NFS4_CHANGE_TYPE_IS_TIME_METADATA;
return 0;
}
{
struct inode *inode = file_inode(filep);
struct nfs_server *server = NFS_SERVER(inode);
+ u32 bitmask[3];
struct nfs42_falloc_args args = {
.falloc_fh = NFS_FH(inode),
.falloc_offset = offset,
.falloc_length = len,
- .falloc_bitmask = nfs4_fattr_bitmap,
+ .falloc_bitmask = bitmask,
};
struct nfs42_falloc_res res = {
.falloc_server = server,
return status;
}
+ memcpy(bitmask, server->cache_consistency_bitmask, sizeof(bitmask));
+ if (server->attr_bitmask[1] & FATTR4_WORD1_SPACE_USED)
+ bitmask[1] |= FATTR4_WORD1_SPACE_USED;
+
res.falloc_fattr = nfs_alloc_fattr();
if (!res.falloc_fattr)
return -ENOMEM;
status = nfs4_call_sync(server->client, server, msg,
&args.seq_args, &res.seq_res, 0);
if (status == 0)
- status = nfs_post_op_update_inode(inode, res.falloc_fattr);
+ status = nfs_post_op_update_inode_force_wcc(inode,
+ res.falloc_fattr);
kfree(res.falloc_fattr);
return status;
static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
loff_t offset, loff_t len)
{
- struct nfs_server *server = NFS_SERVER(file_inode(filep));
+ struct inode *inode = file_inode(filep);
+ struct nfs_server *server = NFS_SERVER(inode);
struct nfs4_exception exception = { };
struct nfs_lock_context *lock;
int err;
if (IS_ERR(lock))
return PTR_ERR(lock);
- exception.inode = file_inode(filep);
+ exception.inode = inode;
exception.state = lock->open_context->state;
+ err = nfs_sync_inode(inode);
+ if (err)
+ goto out;
+
do {
err = _nfs42_proc_fallocate(msg, filep, lock, offset, len);
if (err == -ENOTSUPP) {
}
err = nfs4_handle_exception(server, err, &exception);
} while (exception.retry);
-
+out:
nfs_put_lock_context(lock);
return err;
}
return -EOPNOTSUPP;
inode_lock(inode);
- err = nfs_sync_inode(inode);
- if (err)
- goto out_unlock;
err = nfs42_proc_fallocate(&msg, filep, offset, len);
if (err == 0)
truncate_pagecache_range(inode, offset, (offset + len) -1);
if (err == -EOPNOTSUPP)
NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE;
-out_unlock:
+
inode_unlock(inode);
return err;
}
return status;
}
+/**
+ * nfs42_copy_dest_done - perform inode cache updates after clone/copy offload
+ * @inode: pointer to destination inode
+ * @pos: destination offset
+ * @len: copy length
+ *
+ * Punch a hole in the inode page cache, so that the NFS client will
+ * know to retrieve new data.
+ * Update the file size if necessary, and then mark the inode as having
+ * invalid cached values for change attribute, ctime, mtime and space used.
+ */
+static void nfs42_copy_dest_done(struct inode *inode, loff_t pos, loff_t len)
+{
+ loff_t newsize = pos + len;
+ loff_t end = newsize - 1;
+
+ truncate_pagecache_range(inode, pos, end);
+ spin_lock(&inode->i_lock);
+ if (newsize > i_size_read(inode))
+ i_size_write(inode, newsize);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE |
+ NFS_INO_INVALID_CTIME |
+ NFS_INO_INVALID_MTIME |
+ NFS_INO_INVALID_BLOCKS);
+ spin_unlock(&inode->i_lock);
+}
+
static ssize_t _nfs42_proc_copy(struct file *src,
struct nfs_lock_context *src_lock,
struct file *dst,
goto out;
}
- truncate_pagecache_range(dst_inode, pos_dst,
- pos_dst + res->write_res.count);
- spin_lock(&dst_inode->i_lock);
- nfs_set_cache_invalid(
- dst_inode, NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED |
- NFS_INO_INVALID_SIZE | NFS_INO_INVALID_ATTR |
- NFS_INO_INVALID_DATA);
- spin_unlock(&dst_inode->i_lock);
- spin_lock(&src_inode->i_lock);
- nfs_set_cache_invalid(src_inode, NFS_INO_REVAL_PAGECACHE |
- NFS_INO_REVAL_FORCED |
- NFS_INO_INVALID_ATIME);
- spin_unlock(&src_inode->i_lock);
+ nfs42_copy_dest_done(dst_inode, pos_dst, res->write_res.count);
+ nfs_invalidate_atime(src_inode);
status = res->write_res.count;
out:
if (args->sync)
if (status)
return status;
- return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes);
+ if (whence == SEEK_DATA && res.sr_eof)
+ return -NFS4ERR_NXIO;
+ else
+ return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes);
}
loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
status = nfs4_call_sync(server->client, server, msg,
&args.seq_args, &res.seq_res, 0);
- if (status == 0)
+ if (status == 0) {
+ nfs42_copy_dest_done(dst_inode, dst_offset, count);
status = nfs_post_op_update_inode(dst_inode, res.dst_fattr);
+ }
kfree(res.dst_fattr);
return status;
* make it easier to copy the value after an RPC, even if
* the value will not be passed up to application (e.g.
* for a 'query' getxattr with NULL buffer).
- * @len: Length of the value. Can be 0 for zero-length attribues.
+ * @len: Length of the value. Can be 0 for zero-length attributes.
* @value and @pages will be NULL if @len is 0.
*/
static struct nfs4_xattr_entry *
*/
void nfs42_ssc_register_ops(void)
{
-#ifdef CONFIG_NFSD_V4
nfs42_ssc_register(&nfs4_ssc_clnt_ops_tbl);
-#endif
}
/**
*/
void nfs42_ssc_unregister_ops(void)
{
-#ifdef CONFIG_NFSD_V4
nfs42_ssc_unregister(&nfs4_ssc_clnt_ops_tbl);
-#endif
}
#endif /* CONFIG_NFS_V4_2 */
static int nfs41_free_stateid(struct nfs_server *, const nfs4_stateid *,
const struct cred *, bool);
#endif
-static void nfs4_bitmask_adjust(__u32 *bitmask, struct inode *inode,
- struct nfs_server *server,
- struct nfs4_label *label);
+static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ],
+ const __u32 *src, struct inode *inode,
+ struct nfs_server *server,
+ struct nfs4_label *label);
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
static inline struct nfs4_label *
| FATTR4_WORD1_FS_LAYOUT_TYPES,
FATTR4_WORD2_LAYOUT_BLKSIZE
| FATTR4_WORD2_CLONE_BLKSIZE
+ | FATTR4_WORD2_CHANGE_ATTR_TYPE
| FATTR4_WORD2_XATTR_SUPPORT
};
};
static void nfs4_bitmap_copy_adjust(__u32 *dst, const __u32 *src,
- struct inode *inode)
+ struct inode *inode, unsigned long flags)
{
unsigned long cache_validity;
if (!inode || !nfs4_have_delegation(inode, FMODE_READ))
return;
- cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
- if (!(cache_validity & NFS_INO_REVAL_FORCED))
- cache_validity &= ~(NFS_INO_INVALID_CHANGE
- | NFS_INO_INVALID_SIZE);
+ cache_validity = READ_ONCE(NFS_I(inode)->cache_validity) | flags;
+ /* Remove the attributes over which we have full control */
+ dst[1] &= ~FATTR4_WORD1_RAWDEV;
if (!(cache_validity & NFS_INO_INVALID_SIZE))
dst[0] &= ~FATTR4_WORD0_SIZE;
if (!(cache_validity & NFS_INO_INVALID_CHANGE))
dst[0] &= ~FATTR4_WORD0_CHANGE;
-}
-static void nfs4_bitmap_copy_adjust_setattr(__u32 *dst,
- const __u32 *src, struct inode *inode)
-{
- nfs4_bitmap_copy_adjust(dst, src, inode);
+ if (!(cache_validity & NFS_INO_INVALID_MODE))
+ dst[1] &= ~FATTR4_WORD1_MODE;
+ if (!(cache_validity & NFS_INO_INVALID_OTHER))
+ dst[1] &= ~(FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP);
}
static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry,
static void
nfs4_inc_nlink_locked(struct inode *inode)
{
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE |
+ NFS_INO_INVALID_CTIME |
+ NFS_INO_INVALID_NLINK);
inc_nlink(inode);
}
+static void
+nfs4_inc_nlink(struct inode *inode)
+{
+ spin_lock(&inode->i_lock);
+ nfs4_inc_nlink_locked(inode);
+ spin_unlock(&inode->i_lock);
+}
+
static void
nfs4_dec_nlink_locked(struct inode *inode)
{
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE |
+ NFS_INO_INVALID_CTIME |
+ NFS_INO_INVALID_NLINK);
drop_nlink(inode);
}
unsigned long timestamp, unsigned long cache_validity)
{
struct nfs_inode *nfsi = NFS_I(inode);
+ u64 change_attr = inode_peek_iversion_raw(inode);
cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME;
- if (cinfo->atomic && cinfo->before == inode_peek_iversion_raw(inode)) {
- nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE;
+ switch (NFS_SERVER(inode)->change_attr_type) {
+ case NFS4_CHANGE_TYPE_IS_UNDEFINED:
+ break;
+ case NFS4_CHANGE_TYPE_IS_TIME_METADATA:
+ if ((s64)(change_attr - cinfo->after) > 0)
+ goto out;
+ break;
+ default:
+ if ((s64)(change_attr - cinfo->after) >= 0)
+ goto out;
+ }
+
+ if (cinfo->atomic && cinfo->before == change_attr) {
nfsi->attrtimeo_timestamp = jiffies;
} else {
if (S_ISDIR(inode->i_mode)) {
cache_validity |= NFS_INO_REVAL_PAGECACHE;
}
- if (cinfo->before != inode_peek_iversion_raw(inode))
+ if (cinfo->before != change_attr)
cache_validity |= NFS_INO_INVALID_ACCESS |
NFS_INO_INVALID_ACL |
NFS_INO_INVALID_XATTR;
inode_set_iversion_raw(inode, cinfo->after);
nfsi->read_cache_jiffies = timestamp;
nfsi->attr_gencount = nfs_inc_attr_generation_counter();
- nfs_set_cache_invalid(inode, cache_validity);
nfsi->cache_validity &= ~NFS_INO_INVALID_CHANGE;
+out:
+ nfs_set_cache_invalid(inode, cache_validity);
}
void
.inode = inode,
.stateid = &arg.stateid,
};
+ unsigned long adjust_flags = NFS_INO_INVALID_CHANGE;
int err;
+ if (sattr->ia_valid & (ATTR_MODE | ATTR_KILL_SUID | ATTR_KILL_SGID))
+ adjust_flags |= NFS_INO_INVALID_MODE;
+ if (sattr->ia_valid & (ATTR_UID | ATTR_GID))
+ adjust_flags |= NFS_INO_INVALID_OTHER;
+
do {
- nfs4_bitmap_copy_adjust_setattr(bitmask,
- nfs4_bitmask(server, olabel),
- inode);
+ nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, olabel),
+ inode, adjust_flags);
err = _nfs4_do_setattr(inode, &arg, &res, cred, ctx);
switch (err) {
struct nfs4_closedata *calldata = data;
struct nfs4_state *state = calldata->state;
struct inode *inode = calldata->inode;
+ struct nfs_server *server = NFS_SERVER(inode);
struct pnfs_layout_hdr *lo;
bool is_rdonly, is_wronly, is_rdwr;
int call_close = 0;
if (calldata->arg.fmode == 0 || calldata->arg.fmode == FMODE_READ) {
/* Close-to-open cache consistency revalidation */
if (!nfs4_have_delegation(inode, FMODE_READ)) {
- calldata->arg.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;
- nfs4_bitmask_adjust(calldata->arg.bitmask, inode, NFS_SERVER(inode), NULL);
+ nfs4_bitmask_set(calldata->arg.bitmask_store,
+ server->cache_consistency_bitmask,
+ inode, server, NULL);
+ calldata->arg.bitmask = calldata->arg.bitmask_store;
} else
calldata->arg.bitmask = NULL;
}
res.attr_bitmask[2] &= FATTR4_WORD2_NFS42_MASK;
}
memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
- server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS|
- NFS_CAP_SYMLINKS|NFS_CAP_FILEID|
- NFS_CAP_MODE|NFS_CAP_NLINK|NFS_CAP_OWNER|
- NFS_CAP_OWNER_GROUP|NFS_CAP_ATIME|
- NFS_CAP_CTIME|NFS_CAP_MTIME|
- NFS_CAP_SECURITY_LABEL);
+ server->caps &= ~(NFS_CAP_ACLS | NFS_CAP_HARDLINKS |
+ NFS_CAP_SYMLINKS| NFS_CAP_SECURITY_LABEL);
+ server->fattr_valid = NFS_ATTR_FATTR_V4;
if (res.attr_bitmask[0] & FATTR4_WORD0_ACL &&
res.acl_bitmask & ACL4_SUPPORT_ALLOW_ACL)
server->caps |= NFS_CAP_ACLS;
server->caps |= NFS_CAP_HARDLINKS;
if (res.has_symlinks != 0)
server->caps |= NFS_CAP_SYMLINKS;
- if (res.attr_bitmask[0] & FATTR4_WORD0_FILEID)
- server->caps |= NFS_CAP_FILEID;
- if (res.attr_bitmask[1] & FATTR4_WORD1_MODE)
- server->caps |= NFS_CAP_MODE;
- if (res.attr_bitmask[1] & FATTR4_WORD1_NUMLINKS)
- server->caps |= NFS_CAP_NLINK;
- if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER)
- server->caps |= NFS_CAP_OWNER;
- if (res.attr_bitmask[1] & FATTR4_WORD1_OWNER_GROUP)
- server->caps |= NFS_CAP_OWNER_GROUP;
- if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_ACCESS)
- server->caps |= NFS_CAP_ATIME;
- if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_METADATA)
- server->caps |= NFS_CAP_CTIME;
- if (res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY)
- server->caps |= NFS_CAP_MTIME;
+ if (!(res.attr_bitmask[0] & FATTR4_WORD0_FILEID))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_FILEID;
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_MODE))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_MODE;
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_NUMLINKS))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_NLINK;
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_OWNER))
+ server->fattr_valid &= ~(NFS_ATTR_FATTR_OWNER |
+ NFS_ATTR_FATTR_OWNER_NAME);
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_OWNER_GROUP))
+ server->fattr_valid &= ~(NFS_ATTR_FATTR_GROUP |
+ NFS_ATTR_FATTR_GROUP_NAME);
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_SPACE_USED))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_SPACE_USED;
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_ACCESS))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_ATIME;
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_METADATA))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_CTIME;
+ if (!(res.attr_bitmask[1] & FATTR4_WORD1_TIME_MODIFY))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_MTIME;
#ifdef CONFIG_NFS_V4_SECURITY_LABEL
- if (res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL)
- server->caps |= NFS_CAP_SECURITY_LABEL;
+ if (!(res.attr_bitmask[2] & FATTR4_WORD2_SECURITY_LABEL))
+ server->fattr_valid &= ~NFS_ATTR_FATTR_V4_SECURITY_LABEL;
#endif
memcpy(server->attr_bitmask_nl, res.attr_bitmask,
sizeof(server->attr_bitmask));
if (inode && (server->flags & NFS_MOUNT_SOFTREVAL))
task_flags |= RPC_TASK_TIMEOUT;
- nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode);
-
+ nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode, 0);
nfs_fattr_init(fattr);
nfs4_init_sequence(&args.seq_args, &res.seq_res, 0, 0);
return nfs4_do_call_sync(server->client, server, &msg,
status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
if (status == 0) {
spin_lock(&dir->i_lock);
- nfs4_update_changeattr_locked(dir, &res.cinfo, timestamp,
- NFS_INO_INVALID_DATA);
/* Removing a directory decrements nlink in the parent */
if (ftype == NF4DIR && dir->i_nlink > 2)
nfs4_dec_nlink_locked(dir);
+ nfs4_update_changeattr_locked(dir, &res.cinfo, timestamp,
+ NFS_INO_INVALID_DATA);
spin_unlock(&dir->i_lock);
}
return status;
/* Note: If we moved a directory, nlink will change */
nfs4_update_changeattr(old_dir, &res->old_cinfo,
res->old_fattr->time_start,
- NFS_INO_INVALID_OTHER |
+ NFS_INO_INVALID_NLINK |
NFS_INO_INVALID_DATA);
nfs4_update_changeattr(new_dir, &res->new_cinfo,
res->new_fattr->time_start,
- NFS_INO_INVALID_OTHER |
+ NFS_INO_INVALID_NLINK |
NFS_INO_INVALID_DATA);
} else
nfs4_update_changeattr(old_dir, &res->old_cinfo,
}
nfs4_inode_make_writeable(inode);
- nfs4_bitmap_copy_adjust_setattr(bitmask, nfs4_bitmask(server, res.label), inode);
-
+ nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, res.label), inode,
+ NFS_INO_INVALID_CHANGE);
status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
if (!status) {
nfs4_update_changeattr(dir, &res.cinfo, res.fattr->time_start,
NFS_INO_INVALID_DATA);
+ nfs4_inc_nlink(inode);
status = nfs_post_op_update_inode(inode, res.fattr);
if (!status)
nfs_setsecurity(inode, res.fattr, res.label);
&data->arg.seq_args, &data->res.seq_res, 1);
if (status == 0) {
spin_lock(&dir->i_lock);
- nfs4_update_changeattr_locked(dir, &data->res.dir_cinfo,
- data->res.fattr->time_start,
- NFS_INO_INVALID_DATA);
/* Creating a directory bumps nlink in the parent */
if (data->arg.ftype == NF4DIR)
nfs4_inc_nlink_locked(dir);
+ nfs4_update_changeattr_locked(dir, &data->res.dir_cinfo,
+ data->res.fattr->time_start,
+ NFS_INO_INVALID_DATA);
spin_unlock(&dir->i_lock);
status = nfs_instantiate(dentry, data->res.fh, data->res.fattr, data->res.label);
}
return nfs4_have_delegation(hdr->inode, FMODE_READ) == 0;
}
-static void nfs4_bitmask_adjust(__u32 *bitmask, struct inode *inode,
- struct nfs_server *server,
- struct nfs4_label *label)
+static void nfs4_bitmask_set(__u32 bitmask[NFS4_BITMASK_SZ], const __u32 *src,
+ struct inode *inode, struct nfs_server *server,
+ struct nfs4_label *label)
{
-
unsigned long cache_validity = READ_ONCE(NFS_I(inode)->cache_validity);
+ unsigned int i;
- if ((cache_validity & NFS_INO_INVALID_DATA) ||
- (cache_validity & NFS_INO_REVAL_PAGECACHE) ||
- (cache_validity & NFS_INO_REVAL_FORCED) ||
- (cache_validity & NFS_INO_INVALID_OTHER))
- nfs4_bitmap_copy_adjust(bitmask, nfs4_bitmask(server, label), inode);
+ memcpy(bitmask, src, sizeof(*bitmask) * NFS4_BITMASK_SZ);
+ if (cache_validity & NFS_INO_INVALID_CHANGE)
+ bitmask[0] |= FATTR4_WORD0_CHANGE;
if (cache_validity & NFS_INO_INVALID_ATIME)
bitmask[1] |= FATTR4_WORD1_TIME_ACCESS;
+ if (cache_validity & NFS_INO_INVALID_MODE)
+ bitmask[1] |= FATTR4_WORD1_MODE;
if (cache_validity & NFS_INO_INVALID_OTHER)
- bitmask[1] |= FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER |
- FATTR4_WORD1_OWNER_GROUP |
- FATTR4_WORD1_NUMLINKS;
+ bitmask[1] |= FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP;
+ if (cache_validity & NFS_INO_INVALID_NLINK)
+ bitmask[1] |= FATTR4_WORD1_NUMLINKS;
if (label && label->len && cache_validity & NFS_INO_INVALID_LABEL)
bitmask[2] |= FATTR4_WORD2_SECURITY_LABEL;
- if (cache_validity & NFS_INO_INVALID_CHANGE)
- bitmask[0] |= FATTR4_WORD0_CHANGE;
if (cache_validity & NFS_INO_INVALID_CTIME)
bitmask[1] |= FATTR4_WORD1_TIME_METADATA;
if (cache_validity & NFS_INO_INVALID_MTIME)
bitmask[1] |= FATTR4_WORD1_TIME_MODIFY;
- if (cache_validity & NFS_INO_INVALID_SIZE)
- bitmask[0] |= FATTR4_WORD0_SIZE;
if (cache_validity & NFS_INO_INVALID_BLOCKS)
bitmask[1] |= FATTR4_WORD1_SPACE_USED;
+
+ if (cache_validity & NFS_INO_INVALID_SIZE)
+ bitmask[0] |= FATTR4_WORD0_SIZE;
+
+ for (i = 0; i < NFS4_BITMASK_SZ; i++)
+ bitmask[i] &= server->attr_bitmask[i];
}
static void nfs4_proc_write_setup(struct nfs_pgio_header *hdr,
hdr->args.bitmask = NULL;
hdr->res.fattr = NULL;
} else {
- hdr->args.bitmask = server->cache_consistency_bitmask;
- nfs4_bitmask_adjust(hdr->args.bitmask, hdr->inode, server, NULL);
+ nfs4_bitmask_set(hdr->args.bitmask_store,
+ server->cache_consistency_bitmask,
+ hdr->inode, server, NULL);
+ hdr->args.bitmask = hdr->args.bitmask_store;
}
if (!hdr->pgio_done_cb)
if (!nfs4_server_supports_acls(server))
return -EOPNOTSUPP;
- ret = nfs_revalidate_inode(server, inode);
+ ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
if (ret < 0)
return ret;
if (NFS_I(inode)->cache_validity & NFS_INO_INVALID_ACL)
data->args.fhandle = &data->fh;
data->args.stateid = &data->stateid;
- data->args.bitmask = server->cache_consistency_bitmask;
- nfs4_bitmask_adjust(data->args.bitmask, inode, server, NULL);
+ nfs4_bitmask_set(data->args.bitmask_store,
+ server->cache_consistency_bitmask, inode, server,
+ NULL);
+ data->args.bitmask = data->args.bitmask_store;
nfs_copy_fh(&data->fh, NFS_FH(inode));
nfs4_stateid_copy(&data->stateid, stateid);
data->res.fattr = &data->fattr;
#ifdef CONFIG_NFS_V4_1
struct nfs4_lock_waiter {
- struct task_struct *task;
struct inode *inode;
- struct nfs_lowner *owner;
+ struct nfs_lowner owner;
+ wait_queue_entry_t wait;
};
static int
nfs4_wake_lock_waiter(wait_queue_entry_t *wait, unsigned int mode, int flags, void *key)
{
- int ret;
- struct nfs4_lock_waiter *waiter = wait->private;
+ struct nfs4_lock_waiter *waiter =
+ container_of(wait, struct nfs4_lock_waiter, wait);
/* NULL key means to wake up everyone */
if (key) {
struct cb_notify_lock_args *cbnl = key;
struct nfs_lowner *lowner = &cbnl->cbnl_owner,
- *wowner = waiter->owner;
+ *wowner = &waiter->owner;
/* Only wake if the callback was for the same owner. */
if (lowner->id != wowner->id || lowner->s_dev != wowner->s_dev)
return 0;
}
- /* override "private" so we can use default_wake_function */
- wait->private = waiter->task;
- ret = woken_wake_function(wait, mode, flags, key);
- if (ret)
- list_del_init(&wait->entry);
- wait->private = waiter;
- return ret;
+ return woken_wake_function(wait, mode, flags, key);
}
static int
nfs4_retry_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
{
- int status = -ERESTARTSYS;
struct nfs4_lock_state *lsp = request->fl_u.nfs4_fl.owner;
struct nfs_server *server = NFS_SERVER(state->inode);
struct nfs_client *clp = server->nfs_client;
wait_queue_head_t *q = &clp->cl_lock_waitq;
- struct nfs_lowner owner = { .clientid = clp->cl_clientid,
- .id = lsp->ls_seqid.owner_id,
- .s_dev = server->s_dev };
- struct nfs4_lock_waiter waiter = { .task = current,
- .inode = state->inode,
- .owner = &owner};
- wait_queue_entry_t wait;
+ struct nfs4_lock_waiter waiter = {
+ .inode = state->inode,
+ .owner = { .clientid = clp->cl_clientid,
+ .id = lsp->ls_seqid.owner_id,
+ .s_dev = server->s_dev },
+ };
+ int status;
/* Don't bother with waitqueue if we don't expect a callback */
if (!test_bit(NFS_STATE_MAY_NOTIFY_LOCK, &state->flags))
return nfs4_retry_setlk_simple(state, cmd, request);
- init_wait(&wait);
- wait.private = &waiter;
- wait.func = nfs4_wake_lock_waiter;
+ init_wait(&waiter.wait);
+ waiter.wait.func = nfs4_wake_lock_waiter;
+ add_wait_queue(q, &waiter.wait);
- while(!signalled()) {
- add_wait_queue(q, &wait);
+ do {
status = nfs4_proc_setlk(state, cmd, request);
- if ((status != -EAGAIN) || IS_SETLK(cmd)) {
- finish_wait(q, &wait);
+ if (status != -EAGAIN || IS_SETLK(cmd))
break;
- }
status = -ERESTARTSYS;
freezer_do_not_count();
- wait_woken(&wait, TASK_INTERRUPTIBLE, NFS4_LOCK_MAXTIMEOUT);
+ wait_woken(&waiter.wait, TASK_INTERRUPTIBLE,
+ NFS4_LOCK_MAXTIMEOUT);
freezer_count();
- finish_wait(q, &wait);
- }
+ } while (!signalled());
+
+ remove_wait_queue(q, &waiter.wait);
return status;
}
return -EACCES;
}
- ret = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
if (ret)
return ret;
return 0;
}
- ret = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE);
if (ret)
return ret;
#endif
NULL
};
-
-/*
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
/* Cap maximum reconnect timeout at 1/2 lease period */
rpc_set_connect_timeout(clp->cl_rpcclient, lease, lease >> 1);
}
-
-/*
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
}
/**
- * nfs4_purge_state_owners - Release all cached state owners
+ * nfs4_free_state_owners - Release all cached state owners
* @head: resulting list of state owners
*
* Frees a list of state owners that was generated by
module_put_and_exit(0);
return 0;
}
-
-/*
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
)
)
-TRACE_EVENT(nfs4_xdr_status,
+TRACE_EVENT(nfs4_xdr_bad_operation,
+ TP_PROTO(
+ const struct xdr_stream *xdr,
+ u32 op,
+ u32 expected
+ ),
+
+ TP_ARGS(xdr, op, expected),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, xid)
+ __field(u32, op)
+ __field(u32, expected)
+ ),
+
+ TP_fast_assign(
+ const struct rpc_rqst *rqstp = xdr->rqst;
+ const struct rpc_task *task = rqstp->rq_task;
+
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client->cl_clid;
+ __entry->xid = be32_to_cpu(rqstp->rq_xid);
+ __entry->op = op;
+ __entry->expected = expected;
+ ),
+
+ TP_printk(
+ "task:%u@%d xid=0x%08x operation=%u, expected=%u",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __entry->op, __entry->expected
+ )
+);
+
+DECLARE_EVENT_CLASS(nfs4_xdr_event,
TP_PROTO(
const struct xdr_stream *xdr,
u32 op,
__entry->op
)
);
+#define DEFINE_NFS4_XDR_EVENT(name) \
+ DEFINE_EVENT(nfs4_xdr_event, name, \
+ TP_PROTO( \
+ const struct xdr_stream *xdr, \
+ u32 op, \
+ u32 error \
+ ), \
+ TP_ARGS(xdr, op, error))
+DEFINE_NFS4_XDR_EVENT(nfs4_xdr_status);
+DEFINE_NFS4_XDR_EVENT(nfs4_xdr_bad_filehandle);
DECLARE_EVENT_CLASS(nfs4_cb_error_class,
TP_PROTO(
* layout types will be returned.
*/
#define decode_fsinfo_maxsz (op_decode_hdr_maxsz + \
- nfs4_fattr_bitmap_maxsz + 4 + 8 + 5)
+ nfs4_fattr_bitmap_maxsz + 1 + \
+ 1 /* lease time */ + \
+ 2 /* max filesize */ + \
+ 2 /* max read */ + \
+ 2 /* max write */ + \
+ nfstime4_maxsz /* time delta */ + \
+ 5 /* fs layout types */ + \
+ 1 /* layout blksize */ + \
+ 1 /* clone blksize */ + \
+ 1 /* change attr type */ + \
+ 1 /* xattr support */)
#define encode_renew_maxsz (op_encode_hdr_maxsz + 3)
#define decode_renew_maxsz (op_decode_hdr_maxsz)
#define encode_setclientid_maxsz \
*nfs_retval = nfs4_stat_to_errno(nfserr);
return true;
out_bad_operation:
- dprintk("nfs: Server returned operation"
- " %d but we issued a request for %d\n",
- opnum, expected);
+ trace_nfs4_xdr_bad_operation(xdr, opnum, expected);
*nfs_retval = -EREMOTEIO;
return false;
out_overflow:
if (unlikely(!p))
return -EIO;
len = be32_to_cpup(p);
- if (len > NFS4_FHSIZE)
- return -EIO;
+ if (len > NFS4_FHSIZE || len == 0) {
+ trace_nfs4_xdr_bad_filehandle(xdr, OP_READDIR,
+ NFS4ERR_BADHANDLE);
+ return -EREMOTEIO;
+ }
p = xdr_inline_decode(xdr, len);
if (unlikely(!p))
return -EIO;
return 0;
}
+static int decode_attr_change_attr_type(struct xdr_stream *xdr,
+ uint32_t *bitmap,
+ enum nfs4_change_attr_type *res)
+{
+ u32 tmp = NFS4_CHANGE_TYPE_IS_UNDEFINED;
+
+ dprintk("%s: bitmap is %x\n", __func__, bitmap[2]);
+ if (bitmap[2] & FATTR4_WORD2_CHANGE_ATTR_TYPE) {
+ if (xdr_stream_decode_u32(xdr, &tmp))
+ return -EIO;
+ bitmap[2] &= ~FATTR4_WORD2_CHANGE_ATTR_TYPE;
+ }
+
+ switch(tmp) {
+ case NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR:
+ case NFS4_CHANGE_TYPE_IS_VERSION_COUNTER:
+ case NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS:
+ case NFS4_CHANGE_TYPE_IS_TIME_METADATA:
+ *res = tmp;
+ break;
+ default:
+ *res = NFS4_CHANGE_TYPE_IS_UNDEFINED;
+ }
+ return 0;
+}
+
static int decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
{
unsigned int savep;
if (status)
goto xdr_error;
+ status = decode_attr_change_attr_type(xdr, bitmap,
+ &fsinfo->change_attr_type);
+ if (status)
+ goto xdr_error;
+
status = decode_attr_xattrsupport(xdr, bitmap,
&fsinfo->xattr_support);
if (status)
if (unlikely(!p))
return -EIO;
len = be32_to_cpup(p);
- if (len > NFS4_FHSIZE)
- return -EIO;
+ if (len > NFS4_FHSIZE || len == 0) {
+ trace_nfs4_xdr_bad_filehandle(xdr, OP_GETFH, NFS4ERR_BADHANDLE);
+ return -EREMOTEIO;
+ }
fh->size = len;
p = xdr_inline_decode(xdr, len);
if (unlikely(!p))
.procs = nfs4_procedures,
.counts = nfs_version4_counts,
};
-
-/*
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_enter);
EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_fsync_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_xdr_status);
+EXPORT_TRACEPOINT_SYMBOL_GPL(nfs_xdr_bad_filehandle);
TRACE_DEFINE_ENUM(NFS_INO_INVALID_MTIME);
TRACE_DEFINE_ENUM(NFS_INO_INVALID_SIZE);
TRACE_DEFINE_ENUM(NFS_INO_INVALID_OTHER);
+TRACE_DEFINE_ENUM(NFS_INO_DATA_INVAL_DEFER);
+TRACE_DEFINE_ENUM(NFS_INO_INVALID_BLOCKS);
+TRACE_DEFINE_ENUM(NFS_INO_INVALID_XATTR);
+TRACE_DEFINE_ENUM(NFS_INO_INVALID_NLINK);
+TRACE_DEFINE_ENUM(NFS_INO_INVALID_MODE);
#define nfs_show_cache_validity(v) \
__print_flags(v, "|", \
{ NFS_INO_INVALID_MTIME, "INVALID_MTIME" }, \
{ NFS_INO_INVALID_SIZE, "INVALID_SIZE" }, \
{ NFS_INO_INVALID_OTHER, "INVALID_OTHER" }, \
- { NFS_INO_INVALID_XATTR, "INVALID_XATTR" })
+ { NFS_INO_DATA_INVAL_DEFER, "DATA_INVAL_DEFER" }, \
+ { NFS_INO_INVALID_BLOCKS, "INVALID_BLOCKS" }, \
+ { NFS_INO_INVALID_XATTR, "INVALID_XATTR" }, \
+ { NFS_INO_INVALID_NLINK, "INVALID_NLINK" }, \
+ { NFS_INO_INVALID_MODE, "INVALID_MODE" })
TRACE_DEFINE_ENUM(NFS_INO_ADVISE_RDPLUS);
TRACE_DEFINE_ENUM(NFS_INO_STALE);
{ NFSERR_BADTYPE, "BADTYPE" }, \
{ NFSERR_JUKEBOX, "JUKEBOX" })
-TRACE_EVENT(nfs_xdr_status,
+DECLARE_EVENT_CLASS(nfs_xdr_event,
TP_PROTO(
const struct xdr_stream *xdr,
int error
nfs_show_status(__entry->error)
)
);
+#define DEFINE_NFS_XDR_EVENT(name) \
+ DEFINE_EVENT(nfs_xdr_event, name, \
+ TP_PROTO( \
+ const struct xdr_stream *xdr, \
+ int error \
+ ), \
+ TP_ARGS(xdr, error))
+DEFINE_NFS_XDR_EVENT(nfs_xdr_status);
+DEFINE_NFS_XDR_EVENT(nfs_xdr_bad_filehandle);
#endif /* _TRACE_NFS_H */
}
/**
- * nfs_release_request - Release the count on an NFS read/write request
+ * nfs_free_request - Release the count on an NFS read/write request
* @req: request to release
*
* Note: Should never be called with the spinlock held!
}
/**
- * nfs_pageio_add_request - Attempt to coalesce a request into a page list.
+ * __nfs_pageio_add_request - Attempt to coalesce a request into a page list.
* @desc: destination io descriptor
* @req: request
*
}
valid_layout = pnfs_layout_is_valid(lo);
pnfs_clear_layoutcommit(ino, &tmp_list);
- pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL, 0);
+ pnfs_mark_matching_lsegs_return(lo, &tmp_list, NULL, 0);
if (NFS_SERVER(ino)->pnfs_curr_ld->return_range) {
struct pnfs_layout_range range = {
.iomode = IOMODE_ANY,
.length = NFS4_MAX_UINT64,
};
- pnfs_set_plh_return_info(lo, IOMODE_ANY, 0);
- pnfs_mark_matching_lsegs_return(lo, &lo->plh_return_segs,
- &range, 0);
+ pnfs_mark_matching_lsegs_return(lo, &free_me, &range, 0);
goto out_forget;
} else {
/* We have a completely new layout */
assert_spin_locked(&lo->plh_inode->i_lock);
+ if (test_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags))
+ tmp_list = &lo->plh_return_segs;
+
list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
if (pnfs_match_lseg_recall(lseg, return_range, seq)) {
dprintk("%s: marking lseg %p iomode %d "
lseg, lseg->pls_range.iomode,
lseg->pls_range.offset,
lseg->pls_range.length);
+ if (test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
+ tmp_list = &lo->plh_return_segs;
if (mark_lseg_invalid(lseg, tmp_list))
continue;
remaining++;
info->dtpref = fsinfo.tsize;
info->maxfilesize = 0x7FFFFFFF;
info->lease_time = 0;
+ info->change_attr_type = NFS4_CHANGE_TYPE_IS_TIME_METADATA;
return 0;
}
#ifdef CONFIG_NFS_V4_2
static void nfs_ssc_register_ops(void)
{
-#ifdef CONFIG_NFSD_V4
nfs_ssc_register(&nfs_ssc_clnt_ops_tbl);
-#endif
}
static void nfs_ssc_unregister_ops(void)
{
-#ifdef CONFIG_NFSD_V4
nfs_ssc_unregister(&nfs_ssc_clnt_ops_tbl);
-#endif
}
#endif /* CONFIG_NFS_V4_2 */
* Now ask the mount server to map our export path
* to a file handle.
*/
- status = nfs_mount(&request);
+ status = nfs_mount(&request, ctx->timeo, ctx->retrans);
if (status != 0) {
dfprintk(MOUNT, "NFS: unable to mount server %s, error %d\n",
request.hostname, status);
* with invalidate/truncate.
*/
spin_lock(&mapping->private_lock);
- if (!nfs_have_writebacks(inode) &&
- NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
- inode_inc_iversion_raw(inode);
if (likely(!PageSwapCache(req->wb_page))) {
set_bit(PG_MAPPED, &req->wb_flags);
SetPagePrivate(req->wb_page);
if (nfs_have_delegated_attributes(inode))
goto out;
if (nfsi->cache_validity &
- (NFS_INO_REVAL_PAGECACHE | NFS_INO_INVALID_SIZE))
+ (NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_SIZE))
return false;
smp_rmb();
if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags) && pagelen != 0)
/* Deal with the suid/sgid bit corner case */
if (nfs_should_remove_suid(inode)) {
spin_lock(&inode->i_lock);
- nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
+ nfs_set_cache_invalid(inode, NFS_INO_INVALID_MODE);
spin_unlock(&inode->i_lock);
}
return 0;
config NFSD_V4_2_INTER_SSC
bool "NFSv4.2 inter server to server COPY"
- depends on NFSD_V4 && NFS_V4_1 && NFS_V4_2
+ depends on NFSD_V4 && NFS_V4_2
help
This option enables support for NFSv4.2 inter server to
server copy where the destination server calls the NFSv4.2
.vs_rpcb_optnl = true,
.vs_need_cong_ctrl = true,
};
-
-/*
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
.release = nfsd4_cb_notify_lock_release,
};
+/*
+ * We store the NONE, READ, WRITE, and BOTH bits separately in the
+ * st_{access,deny}_bmap field of the stateid, in order to track not
+ * only what share bits are currently in force, but also what
+ * combinations of share bits previous opens have used. This allows us
+ * to enforce the recommendation of rfc 3530 14.2.19 that the server
+ * return an error if the client attempt to downgrade to a combination
+ * of share bits not explicable by closing some of its previous opens.
+ *
+ * XXX: This enforcement is actually incomplete, since we don't keep
+ * track of access/deny bit combinations; so, e.g., we allow:
+ *
+ * OPEN allow read, deny write
+ * OPEN allow both, deny none
+ * DOWNGRADE allow read, deny none
+ *
+ * which we should reject.
+ */
+static unsigned int
+bmap_to_share_mode(unsigned long bmap)
+{
+ int i;
+ unsigned int access = 0;
+
+ for (i = 1; i < 4; i++) {
+ if (test_bit(i, &bmap))
+ access |= i;
+ }
+ return access;
+}
+
+/* set share access for a given stateid */
+static inline void
+set_access(u32 access, struct nfs4_ol_stateid *stp)
+{
+ unsigned char mask = 1 << access;
+
+ WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
+ stp->st_access_bmap |= mask;
+}
+
+/* clear share access for a given stateid */
+static inline void
+clear_access(u32 access, struct nfs4_ol_stateid *stp)
+{
+ unsigned char mask = 1 << access;
+
+ WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
+ stp->st_access_bmap &= ~mask;
+}
+
+/* test whether a given stateid has access */
+static inline bool
+test_access(u32 access, struct nfs4_ol_stateid *stp)
+{
+ unsigned char mask = 1 << access;
+
+ return (bool)(stp->st_access_bmap & mask);
+}
+
+/* set share deny for a given stateid */
+static inline void
+set_deny(u32 deny, struct nfs4_ol_stateid *stp)
+{
+ unsigned char mask = 1 << deny;
+
+ WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
+ stp->st_deny_bmap |= mask;
+}
+
+/* clear share deny for a given stateid */
+static inline void
+clear_deny(u32 deny, struct nfs4_ol_stateid *stp)
+{
+ unsigned char mask = 1 << deny;
+
+ WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
+ stp->st_deny_bmap &= ~mask;
+}
+
+/* test whether a given stateid is denying specific access */
+static inline bool
+test_deny(u32 deny, struct nfs4_ol_stateid *stp)
+{
+ unsigned char mask = 1 << deny;
+
+ return (bool)(stp->st_deny_bmap & mask);
+}
+
+static int nfs4_access_to_omode(u32 access)
+{
+ switch (access & NFS4_SHARE_ACCESS_BOTH) {
+ case NFS4_SHARE_ACCESS_READ:
+ return O_RDONLY;
+ case NFS4_SHARE_ACCESS_WRITE:
+ return O_WRONLY;
+ case NFS4_SHARE_ACCESS_BOTH:
+ return O_RDWR;
+ }
+ WARN_ON_ONCE(1);
+ return O_RDONLY;
+}
+
+static inline int
+access_permit_read(struct nfs4_ol_stateid *stp)
+{
+ return test_access(NFS4_SHARE_ACCESS_READ, stp) ||
+ test_access(NFS4_SHARE_ACCESS_BOTH, stp) ||
+ test_access(NFS4_SHARE_ACCESS_WRITE, stp);
+}
+
+static inline int
+access_permit_write(struct nfs4_ol_stateid *stp)
+{
+ return test_access(NFS4_SHARE_ACCESS_WRITE, stp) ||
+ test_access(NFS4_SHARE_ACCESS_BOTH, stp);
+}
+
static inline struct nfs4_stateowner *
nfs4_get_stateowner(struct nfs4_stateowner *sop)
{
#define FILE_HASH_BITS 8
#define FILE_HASH_SIZE (1 << FILE_HASH_BITS)
-static unsigned int nfsd_fh_hashval(struct knfsd_fh *fh)
+static unsigned int file_hashval(struct svc_fh *fh)
{
- return jhash2(fh->fh_base.fh_pad, XDR_QUADLEN(fh->fh_size), 0);
-}
+ struct inode *inode = d_inode(fh->fh_dentry);
-static unsigned int file_hashval(struct knfsd_fh *fh)
-{
- return nfsd_fh_hashval(fh) & (FILE_HASH_SIZE - 1);
+ /* XXX: why not (here & in file cache) use inode? */
+ return (unsigned int)hash_long(inode->i_ino, FILE_HASH_BITS);
}
static struct hlist_head file_hashtbl[FILE_HASH_SIZE];
return opaque_hashval(name.data, 8) & CLIENT_HASH_MASK;
}
-/*
- * We store the NONE, READ, WRITE, and BOTH bits separately in the
- * st_{access,deny}_bmap field of the stateid, in order to track not
- * only what share bits are currently in force, but also what
- * combinations of share bits previous opens have used. This allows us
- * to enforce the recommendation of rfc 3530 14.2.19 that the server
- * return an error if the client attempt to downgrade to a combination
- * of share bits not explicable by closing some of its previous opens.
- *
- * XXX: This enforcement is actually incomplete, since we don't keep
- * track of access/deny bit combinations; so, e.g., we allow:
- *
- * OPEN allow read, deny write
- * OPEN allow both, deny none
- * DOWNGRADE allow read, deny none
- *
- * which we should reject.
- */
-static unsigned int
-bmap_to_share_mode(unsigned long bmap) {
- int i;
- unsigned int access = 0;
-
- for (i = 1; i < 4; i++) {
- if (test_bit(i, &bmap))
- access |= i;
- }
- return access;
-}
-
-/* set share access for a given stateid */
-static inline void
-set_access(u32 access, struct nfs4_ol_stateid *stp)
-{
- unsigned char mask = 1 << access;
-
- WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
- stp->st_access_bmap |= mask;
-}
-
-/* clear share access for a given stateid */
-static inline void
-clear_access(u32 access, struct nfs4_ol_stateid *stp)
-{
- unsigned char mask = 1 << access;
-
- WARN_ON_ONCE(access > NFS4_SHARE_ACCESS_BOTH);
- stp->st_access_bmap &= ~mask;
-}
-
-/* test whether a given stateid has access */
-static inline bool
-test_access(u32 access, struct nfs4_ol_stateid *stp)
-{
- unsigned char mask = 1 << access;
-
- return (bool)(stp->st_access_bmap & mask);
-}
-
-/* set share deny for a given stateid */
-static inline void
-set_deny(u32 deny, struct nfs4_ol_stateid *stp)
-{
- unsigned char mask = 1 << deny;
-
- WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
- stp->st_deny_bmap |= mask;
-}
-
-/* clear share deny for a given stateid */
-static inline void
-clear_deny(u32 deny, struct nfs4_ol_stateid *stp)
-{
- unsigned char mask = 1 << deny;
-
- WARN_ON_ONCE(deny > NFS4_SHARE_DENY_BOTH);
- stp->st_deny_bmap &= ~mask;
-}
-
-/* test whether a given stateid is denying specific access */
-static inline bool
-test_deny(u32 deny, struct nfs4_ol_stateid *stp)
-{
- unsigned char mask = 1 << deny;
-
- return (bool)(stp->st_deny_bmap & mask);
-}
-
-static int nfs4_access_to_omode(u32 access)
-{
- switch (access & NFS4_SHARE_ACCESS_BOTH) {
- case NFS4_SHARE_ACCESS_READ:
- return O_RDONLY;
- case NFS4_SHARE_ACCESS_WRITE:
- return O_WRONLY;
- case NFS4_SHARE_ACCESS_BOTH:
- return O_RDWR;
- }
- WARN_ON_ONCE(1);
- return O_RDONLY;
-}
-
/*
* A stateid that had a deny mode associated with it is being released
* or downgraded. Recalculate the deny mode on the file.
goto out_nolock;
}
new->cl_mach_cred = true;
+ break;
case SP4_NONE:
break;
default: /* checked by xdr code */
}
/* OPEN Share state helper functions */
-static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
+static void nfsd4_init_file(struct svc_fh *fh, unsigned int hashval,
struct nfs4_file *fp)
{
lockdep_assert_held(&state_lock);
INIT_LIST_HEAD(&fp->fi_stateids);
INIT_LIST_HEAD(&fp->fi_delegations);
INIT_LIST_HEAD(&fp->fi_clnt_odstate);
- fh_copy_shallow(&fp->fi_fhandle, fh);
+ fh_copy_shallow(&fp->fi_fhandle, &fh->fh_handle);
fp->fi_deleg_file = NULL;
fp->fi_had_conflict = false;
fp->fi_share_deny = 0;
memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
memset(fp->fi_access, 0, sizeof(fp->fi_access));
+ fp->fi_aliased = false;
+ fp->fi_inode = d_inode(fh->fh_dentry);
#ifdef CONFIG_NFSD_PNFS
INIT_LIST_HEAD(&fp->fi_lo_states);
atomic_set(&fp->fi_lo_recalls, 0);
/* search file_hashtbl[] for file */
static struct nfs4_file *
-find_file_locked(struct knfsd_fh *fh, unsigned int hashval)
+find_file_locked(struct svc_fh *fh, unsigned int hashval)
{
struct nfs4_file *fp;
hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash,
lockdep_is_held(&state_lock)) {
- if (fh_match(&fp->fi_fhandle, fh)) {
+ if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) {
if (refcount_inc_not_zero(&fp->fi_ref))
return fp;
}
return NULL;
}
-struct nfs4_file *
-find_file(struct knfsd_fh *fh)
+static struct nfs4_file *insert_file(struct nfs4_file *new, struct svc_fh *fh,
+ unsigned int hashval)
+{
+ struct nfs4_file *fp;
+ struct nfs4_file *ret = NULL;
+ bool alias_found = false;
+
+ spin_lock(&state_lock);
+ hlist_for_each_entry_rcu(fp, &file_hashtbl[hashval], fi_hash,
+ lockdep_is_held(&state_lock)) {
+ if (fh_match(&fp->fi_fhandle, &fh->fh_handle)) {
+ if (refcount_inc_not_zero(&fp->fi_ref))
+ ret = fp;
+ } else if (d_inode(fh->fh_dentry) == fp->fi_inode)
+ fp->fi_aliased = alias_found = true;
+ }
+ if (likely(ret == NULL)) {
+ nfsd4_init_file(fh, hashval, new);
+ new->fi_aliased = alias_found;
+ ret = new;
+ }
+ spin_unlock(&state_lock);
+ return ret;
+}
+
+static struct nfs4_file * find_file(struct svc_fh *fh)
{
struct nfs4_file *fp;
unsigned int hashval = file_hashval(fh);
}
static struct nfs4_file *
-find_or_add_file(struct nfs4_file *new, struct knfsd_fh *fh)
+find_or_add_file(struct nfs4_file *new, struct svc_fh *fh)
{
struct nfs4_file *fp;
unsigned int hashval = file_hashval(fh);
if (fp)
return fp;
- spin_lock(&state_lock);
- fp = find_file_locked(fh, hashval);
- if (likely(fp == NULL)) {
- nfsd4_init_file(fh, hashval, new);
- fp = new;
- }
- spin_unlock(&state_lock);
-
- return fp;
+ return insert_file(new, fh, hashval);
}
/*
struct nfs4_file *fp;
__be32 ret = nfs_ok;
- fp = find_file(¤t_fh->fh_handle);
+ fp = find_file(current_fh);
if (!fp)
return ret;
/* Check for conflicting share reservations */
if (nf)
nfsd_file_put(nf);
+ status = nfserrno(nfsd_open_break_lease(cur_fh->fh_dentry->d_inode,
+ access));
+ if (status)
+ goto out_put_access;
+
status = nfsd4_truncate(rqstp, cur_fh, open);
if (status)
goto out_put_access;
return fl;
}
+static int nfsd4_check_conflicting_opens(struct nfs4_client *clp,
+ struct nfs4_file *fp)
+{
+ struct nfs4_ol_stateid *st;
+ struct file *f = fp->fi_deleg_file->nf_file;
+ struct inode *ino = locks_inode(f);
+ int writes;
+
+ writes = atomic_read(&ino->i_writecount);
+ if (!writes)
+ return 0;
+ /*
+ * There could be multiple filehandles (hence multiple
+ * nfs4_files) referencing this file, but that's not too
+ * common; let's just give up in that case rather than
+ * trying to go look up all the clients using that other
+ * nfs4_file as well:
+ */
+ if (fp->fi_aliased)
+ return -EAGAIN;
+ /*
+ * If there's a close in progress, make sure that we see it
+ * clear any fi_fds[] entries before we see it decrement
+ * i_writecount:
+ */
+ smp_mb__after_atomic();
+
+ if (fp->fi_fds[O_WRONLY])
+ writes--;
+ if (fp->fi_fds[O_RDWR])
+ writes--;
+ if (writes > 0)
+ return -EAGAIN; /* There may be non-NFSv4 writers */
+ /*
+ * It's possible there are non-NFSv4 write opens in progress,
+ * but if they haven't incremented i_writecount yet then they
+ * also haven't called break lease yet; so, they'll break this
+ * lease soon enough. So, all that's left to check for is NFSv4
+ * opens:
+ */
+ spin_lock(&fp->fi_lock);
+ list_for_each_entry(st, &fp->fi_stateids, st_perfile) {
+ if (st->st_openstp == NULL /* it's an open */ &&
+ access_permit_write(st) &&
+ st->st_stid.sc_client != clp) {
+ spin_unlock(&fp->fi_lock);
+ return -EAGAIN;
+ }
+ }
+ spin_unlock(&fp->fi_lock);
+ /*
+ * There's a small chance that we could be racing with another
+ * NFSv4 open. However, any open that hasn't added itself to
+ * the fi_stateids list also hasn't called break_lease yet; so,
+ * they'll break this lease soon enough.
+ */
+ return 0;
+}
+
static struct nfs4_delegation *
nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate)
nf = find_readable_file(fp);
if (!nf) {
- /* We should always have a readable file here */
- WARN_ON_ONCE(1);
- return ERR_PTR(-EBADF);
+ /*
+ * We probably could attempt another open and get a read
+ * delegation, but for now, don't bother until the
+ * client actually sends us one.
+ */
+ return ERR_PTR(-EAGAIN);
}
spin_lock(&state_lock);
spin_lock(&fp->fi_lock);
locks_free_lock(fl);
if (status)
goto out_clnt_odstate;
+ status = nfsd4_check_conflicting_opens(clp, fp);
+ if (status)
+ goto out_unlock;
spin_lock(&state_lock);
spin_lock(&fp->fi_lock);
goto out_no_deleg;
if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED))
goto out_no_deleg;
- /*
- * Also, if the file was opened for write or
- * create, there's a good chance the client's
- * about to write to it, resulting in an
- * immediate recall (since we don't support
- * write delegations):
- */
- if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
- goto out_no_deleg;
- if (open->op_create == NFS4_OPEN_CREATE)
- goto out_no_deleg;
break;
default:
goto out_no_deleg;
* and check for delegations in the process of being recalled.
* If not found, create the nfs4_file struct
*/
- fp = find_or_add_file(open->op_file, ¤t_fh->fh_handle);
+ fp = find_or_add_file(open->op_file, current_fh);
if (fp != open->op_file) {
status = nfs4_check_deleg(cl, open, &dp);
if (status)
return nfs_ok;
}
-static inline int
-access_permit_read(struct nfs4_ol_stateid *stp)
-{
- return test_access(NFS4_SHARE_ACCESS_READ, stp) ||
- test_access(NFS4_SHARE_ACCESS_BOTH, stp) ||
- test_access(NFS4_SHARE_ACCESS_WRITE, stp);
-}
-
-static inline int
-access_permit_write(struct nfs4_ol_stateid *stp)
-{
- return test_access(NFS4_SHARE_ACCESS_WRITE, stp) ||
- test_access(NFS4_SHARE_ACCESS_BOTH, stp);
-}
-
static
__be32 nfs4_check_openmode(struct nfs4_ol_stateid *stp, int flags)
{
return status;
}
-static inline u64
-end_offset(u64 start, u64 len)
-{
- u64 end;
-
- end = start + len;
- return end >= start ? end: NFS4_MAX_UINT64;
-}
-
/* last octet in a range */
static inline u64
last_byte_offset(u64 start, u64 len)
static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock)
{
struct nfsd_file *nf;
- __be32 err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
- if (!err) {
- err = nfserrno(vfs_test_lock(nf->nf_file, lock));
- nfsd_file_put(nf);
- }
+ __be32 err;
+
+ err = nfsd_file_acquire(rqstp, fhp, NFSD_MAY_READ, &nf);
+ if (err)
+ return err;
+ fh_lock(fhp); /* to block new leases till after test_lock: */
+ err = nfserrno(nfsd_open_break_lease(fhp->fh_dentry->d_inode,
+ NFSD_MAY_READ));
+ if (err)
+ goto out;
+ err = nfserrno(vfs_test_lock(nf->nf_file, lock));
+out:
+ fh_unlock(fhp);
+ nfsd_file_put(nf);
return err;
}
nfsd4_sequence_done(resp);
return 1;
}
-
-/*
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
inode->i_fop = &simple_dir_operations;
inode->i_op = &simple_dir_inode_operations;
inc_nlink(inode);
+ break;
default:
break;
}
static int nfsd_users = 0;
-static int nfsd_startup_generic(int nrservs)
+static int nfsd_startup_generic(void)
{
int ret;
write_sequnlock(&nn->boot_lock);
}
-static int nfsd_startup_net(int nrservs, struct net *net, const struct cred *cred)
+static int nfsd_startup_net(struct net *net, const struct cred *cred)
{
struct nfsd_net *nn = net_generic(net, nfsd_net_id);
int ret;
if (nn->nfsd_net_up)
return 0;
- ret = nfsd_startup_generic(nrservs);
+ ret = nfsd_startup_generic();
if (ret)
return ret;
ret = nfsd_init_socks(net, cred);
nfsd_up_before = nn->nfsd_net_up;
- error = nfsd_startup_net(nrservs, net, cred);
+ error = nfsd_startup_net(net, cred);
if (error)
goto out_destroy;
error = nn->nfsd_serv->sv_ops->svo_setup(nn->nfsd_serv,
*/
struct nfs4_file {
refcount_t fi_ref;
+ struct inode * fi_inode;
+ bool fi_aliased;
spinlock_t fi_lock;
struct hlist_node fi_hash; /* hash on fi_fhandle */
struct list_head fi_stateids;
struct xdr_netobj princhash, struct nfsd_net *nn);
extern bool nfs4_has_reclaimed_state(struct xdr_netobj name, struct nfsd_net *nn);
-struct nfs4_file *find_file(struct knfsd_fh *fh);
void put_nfs4_file(struct nfs4_file *fi);
extern void nfs4_put_copy(struct nfsd4_copy *copy);
extern struct nfsd4_copy *
#endif
-
-/*
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
* nilfs_cpfile_delete_checkpoints - delete checkpoints
* @cpfile: inode of checkpoint file
* @start: start checkpoint number
- * @end: end checkpoint numer
+ * @end: end checkpoint number
*
* Description: nilfs_cpfile_delete_checkpoints() deletes the checkpoints in
* the period from @start to @end, excluding @end itself. The checkpoints
* @inode: inode object
* @argp: pointer on argument from userspace
*
- * Decription: nilfs_ioctl_trim_fs is the FITRIM ioctl handle function. It
+ * Description: nilfs_ioctl_trim_fs is the FITRIM ioctl handle function. It
* checks the arguments from userspace and calls nilfs_sufile_trim_fs, which
* performs the actual trim operation.
*
* @inode: inode object
* @argp: pointer on argument from userspace
*
- * Decription: nilfs_ioctl_set_alloc_range() function defines lower limit
+ * Description: nilfs_ioctl_set_alloc_range() function defines lower limit
* of segments in bytes and upper limit of segments in bytes.
* The NILFS_IOCTL_SET_ALLOC_RANGE is used by nilfs_resize utility.
*
* nilfs_construct_segment - construct a logical segment
* @sb: super block
*
- * Return Value: On success, 0 is retured. On errors, one of the following
+ * Return Value: On success, 0 is returned. On errors, one of the following
* negative error code is returned.
*
* %-EROFS - Read only filesystem.
* @start: start byte offset
* @end: end byte offset (inclusive)
*
- * Return Value: On success, 0 is retured. On errors, one of the following
+ * Return Value: On success, 0 is returned. On errors, one of the following
* negative error code is returned.
*
* %-EROFS - Read only filesystem.
/**
* load_nilfs - load and recover the nilfs
* @nilfs: the_nilfs structure to be released
- * @sb: super block isntance used to recover past segment
+ * @sb: super block instance used to recover past segment
*
* load_nilfs() searches and load the latest super root,
* attaches the last segment, and does recovery if needed.
// SPDX-License-Identifier: GPL-2.0-only
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* acl.c
*
* Copyright (C) 2004, 2008 Oracle. All rights reserved.
/* SPDX-License-Identifier: GPL-2.0-only */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* acl.h
*
* Copyright (C) 2004, 2008 Oracle. All rights reserved.
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* alloc.c
*
* Extent allocs and frees
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* alloc.h
*
* Function prototypes
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
*/
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* Copyright (C) 2002, 2004, 2005 Oracle. All rights reserved.
*/
// SPDX-License-Identifier: GPL-2.0-only
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* blockcheck.c
*
* Checksum and ECC codes for the OCFS2 userspace library.
/* SPDX-License-Identifier: GPL-2.0-only */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* blockcheck.h
*
* Checksum and ECC codes for the OCFS2 userspace library.
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* io.c
*
* Buffer cache handling
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* ocfs2_buffer_head.h
*
* Buffer cache handling functions defined
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* Copyright (C) 2004, 2005 Oracle. All rights reserved.
*/
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* heartbeat.h
*
* Function prototypes
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* Copyright (C) 2004, 2005 Oracle. All rights reserved.
*/
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* Copyright (C) 2005 Oracle. All rights reserved.
*/
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* netdebug.c
*
* debug functionality for o2net
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* Copyright (C) 2004, 2005 Oracle. All rights reserved.
*/
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* nodemanager.h
*
* Function prototypes
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* ocfs2_heartbeat.h
*
* On-disk structures for ocfs2_heartbeat
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* ocfs2_nodemanager.h
*
* Header describing the interface between userspace and the kernel
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- *
- * vim: noexpandtab sw=8 ts=8 sts=0:
+/*
*
* Copyright (C) 2005 Oracle. All rights reserved.
*/
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* Copyright (C) 2005 Oracle. All rights reserved.
*/
// SPDX-License-Identifier: GPL-2.0-only
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* sys.c
*
* OCFS2 cluster sysfs interface
/* SPDX-License-Identifier: GPL-2.0-only */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* sys.h
*
* Function prototypes for o2cb sysfs interface
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- *
- * vim: noexpandtab sw=8 ts=8 sts=0:
+/*
*
* Copyright (C) 2004 Oracle. All rights reserved.
*
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* tcp.h
*
* Function prototypes
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* Copyright (C) 2005 Oracle. All rights reserved.
*/
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* dcache.c
*
* dentry cache handling code
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* dcache.h
*
* Function prototypes
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* dir.c
*
* Creates, reads, walks and deletes directory-nodes
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* dir.h
*
* Function prototypes
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* dlmapi.h
*
* externally exported dlm interfaces
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* dlmast.c
*
* AST and BAST functionality for local and remote nodes
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* dlmcommon.h
*
* Copyright (C) 2004 Oracle. All rights reserved.
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* dlmconvert.c
*
* underlying calls for lock conversion
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* dlmconvert.h
*
* Copyright (C) 2004 Oracle. All rights reserved.
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* dlmdebug.c
*
* debug functionality for the dlm
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* dlmdebug.h
*
* Copyright (C) 2008 Oracle. All rights reserved.
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* dlmdomain.c
*
* defines domain join / leave apis
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* dlmdomain.h
*
* Copyright (C) 2004 Oracle. All rights reserved.
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* dlmlock.c
*
* underlying calls for lock creation
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* dlmmod.c
*
* standalone DLM module
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* dlmrecovery.c
*
* recovery stuff
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* dlmthread.c
*
* standalone DLM module
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* dlmunlock.c
*
* underlying calls for unlocking locks
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* dlmfs.c
*
* Code which implements the kernel side of a minimal userspace
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* userdlm.c
*
* Code which implements the kernel side of a minimal userspace
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* userdlm.h
*
* Userspace dlm defines
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* dlmglue.c
*
* Code which implements an OCFS2 specific interface to our DLM.
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* dlmglue.h
*
* description here
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* export.c
*
* Functions to facilitate NFS exporting
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* export.h
*
* Function prototypes
// SPDX-License-Identifier: GPL-2.0-only
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* extent_map.c
*
* Block/Cluster mapping functions
/* SPDX-License-Identifier: GPL-2.0-only */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* extent_map.h
*
* In-memory file extent mappings for OCFS2.
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* file.c
*
* File open, close, extend, truncate
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* file.h
*
* Function prototypes
// SPDX-License-Identifier: GPL-2.0-only
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* filecheck.c
*
* Code which implements online file check.
/* SPDX-License-Identifier: GPL-2.0-only */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* filecheck.h
*
* Online file check.
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* heartbeat.c
*
* Register ourselves with the heartbaet service, keep our node maps
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* heartbeat.h
*
* Function prototypes
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* inode.c
*
* vfs' aops, fops, dops and iops
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* inode.h
*
* Function prototypes
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* journal.c
*
* Defines functions of journalling api
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* journal.h
*
* Defines journalling api and structures.
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* localalloc.c
*
* Node local data allocation
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* localalloc.h
*
* Function prototypes
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* locks.c
*
* Userspace file locking support
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* locks.h
*
* Function prototypes for Userspace file locking support
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* mmap.c
*
* Code to deal with the mess that is clustered mmap.
// SPDX-License-Identifier: GPL-2.0-only
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* move_extents.c
*
* Copyright (C) 2011 Oracle. All rights reserved.
/* SPDX-License-Identifier: GPL-2.0-only */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* move_extents.h
*
* Copyright (C) 2011 Oracle. All rights reserved.
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* namei.c
*
* Create and rename file, directory, symlinks
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* namei.h
*
* Function prototypes
/* SPDX-License-Identifier: GPL-2.0-only */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* ocfs1_fs_compat.h
*
* OCFS1 volume header definitions. OCFS2 creates valid but unmountable
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* ocfs2.h
*
* Defines macros and structures used in OCFS2
/* SPDX-License-Identifier: GPL-2.0-only */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* ocfs2_fs.h
*
* On-disk structures for OCFS2.
/* SPDX-License-Identifier: GPL-2.0-only */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* ocfs2_ioctl.h
*
* Defines OCFS2 ioctls.
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* ocfs2_lockid.h
*
* Defines OCFS2 lockid bits.
/* SPDX-License-Identifier: GPL-2.0-only */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* ocfs2_lockingver.h
*
* Defines OCFS2 Locking version values.
// SPDX-License-Identifier: GPL-2.0-only
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* refcounttree.c
*
* Copyright (C) 2009 Oracle. All rights reserved.
/* SPDX-License-Identifier: GPL-2.0-only */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* refcounttree.h
*
* Copyright (C) 2009 Oracle. All rights reserved.
// SPDX-License-Identifier: GPL-2.0-only
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* reservations.c
*
* Allocation reservations implementation
/* SPDX-License-Identifier: GPL-2.0-only */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* reservations.h
*
* Allocation reservations function prototypes and structures.
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* resize.c
*
* volume resize.
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* resize.h
*
* Function prototypes
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* slot_map.c
*
* Copyright (C) 2002, 2004 Oracle. All rights reserved.
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* slotmap.h
*
* description here
// SPDX-License-Identifier: GPL-2.0-only
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* stack_o2cb.c
*
* Code which interfaces ocfs2 with the o2cb stack.
// SPDX-License-Identifier: GPL-2.0-only
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* stack_user.c
*
* Code which interfaces ocfs2 with fs/dlm and a userspace stack.
// SPDX-License-Identifier: GPL-2.0-only
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* stackglue.c
*
* Code which implements an OCFS2 specific interface to underlying
/* SPDX-License-Identifier: GPL-2.0-only */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* stackglue.h
*
* Glue to the underlying cluster stack.
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* suballoc.c
*
* metadata alloc and free
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* suballoc.h
*
* Defines sub allocator api
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* super.c
*
* load/unload driver, mount/dismount volumes
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* super.h
*
* Function prototypes
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* linux/cluster/ssi/cfs/symlink.c
*
* This program is free software; you can redistribute it and/or
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* symlink.h
*
* Function prototypes
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* sysfile.c
*
* Initialize, read, write, etc. system files.
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* sysfile.h
*
* Function prototypes
// SPDX-License-Identifier: GPL-2.0-or-later
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* uptodate.c
*
* Tracking the up-to-date-ness of a local buffer_head with respect to
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* uptodate.h
*
* Cluster uptodate tracking
// SPDX-License-Identifier: GPL-2.0-only
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* xattr.c
*
* Copyright (C) 2004, 2008 Oracle. All rights reserved.
/* SPDX-License-Identifier: GPL-2.0-only */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* xattr.h
*
* Copyright (C) 2004, 2008 Oracle. All rights reserved.
const char *cp = name, *next;
struct proc_dir_entry *de;
- de = *ret;
- if (!de)
- de = &proc_root;
-
- while (1) {
- next = strchr(cp, '/');
- if (!next)
- break;
-
+ de = *ret ?: &proc_root;
+ while ((next = strchr(cp, '/')) != NULL) {
de = pde_subdir_find(de, cp, next - cp);
if (!de) {
WARN(1, "name '%s'\n", name);
while (1) {
next = pde_subdir_first(de);
if (next) {
- if (unlikely(pde_is_permanent(root))) {
+ if (unlikely(pde_is_permanent(next))) {
write_unlock(&proc_subdir_lock);
WARN(1, "removing permanent /proc entry '%s/%s'",
next->parent->name, next->name);
spin_unlock(&de->pde_unload_lock);
}
-static loff_t pde_lseek(struct proc_dir_entry *pde, struct file *file, loff_t offset, int whence)
-{
- typeof_member(struct proc_ops, proc_lseek) lseek;
-
- lseek = pde->proc_ops->proc_lseek;
- if (!lseek)
- lseek = default_llseek;
- return lseek(file, offset, whence);
-}
-
static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
{
struct proc_dir_entry *pde = PDE(file_inode(file));
loff_t rv = -EINVAL;
if (pde_is_permanent(pde)) {
- return pde_lseek(pde, file, offset, whence);
+ return pde->proc_ops->proc_lseek(file, offset, whence);
} else if (use_pde(pde)) {
- rv = pde_lseek(pde, file, offset, whence);
+ rv = pde->proc_ops->proc_lseek(file, offset, whence);
unuse_pde(pde);
}
return rv;
static int proc_reg_open(struct inode *inode, struct file *file)
{
- struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
struct proc_dir_entry *pde = PDE(inode);
int rv = 0;
typeof_member(struct proc_ops, proc_open) open;
return rv;
}
- if (fs_info->pidonly == PROC_PIDONLY_ON)
- return -ENOENT;
-
/*
* Ensure that
* 1) PDE's ->release hook will be called no matter what
}
/**
- * register_sysctl_table_path - register a sysctl table hierarchy
+ * register_sysctl_paths - register a sysctl table hierarchy
* @path: The path to the directory the sysctl table is in.
* @table: the top-level table structure
*
[ilog2(VM_PKEY_BIT4)] = "",
#endif
#endif /* CONFIG_ARCH_HAS_PKEYS */
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
+ [ilog2(VM_UFFD_MINOR)] = "ui",
+#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
};
size_t i;
* (available at http://www.namesys.com/legalese.html)
*
*/
-
-/*
- * Make Linus happy.
- * Local variables:
- * c-indentation-style: "K&R"
- * mode-name: "LC"
- * c-basic-offset: 8
- * tab-width: 8
- * End:
- */
*/
list_for_each_entry_reverse(r, &c->replay_list, list) {
ubifs_assert(c, r->sqnum >= rino->sqnum);
- if (key_inum(c, &r->key) == key_inum(c, &rino->key))
+ if (key_inum(c, &r->key) == key_inum(c, &rino->key) &&
+ key_type(c, &r->key) == UBIFS_INO_KEY)
return r->deletion == 0;
}
static int get_default_compressor(struct ubifs_info *c)
{
+ if (ubifs_compr_present(c, UBIFS_COMPR_ZSTD))
+ return UBIFS_COMPR_ZSTD;
+
if (ubifs_compr_present(c, UBIFS_COMPR_LZO))
return UBIFS_COMPR_LZO;
ubifs_msg(c, "LEB size: %d bytes (%d KiB), min./max. I/O unit sizes: %d bytes/%d bytes",
c->leb_size, c->leb_size >> 10, c->min_io_size,
c->max_write_size);
- ubifs_msg(c, "FS size: %lld bytes (%lld MiB, %d LEBs), journal size %lld bytes (%lld MiB, %d LEBs)",
- x, x >> 20, c->main_lebs,
+ ubifs_msg(c, "FS size: %lld bytes (%lld MiB, %d LEBs), max %d LEBs, journal size %lld bytes (%lld MiB, %d LEBs)",
+ x, x >> 20, c->main_lebs, c->max_leb_cnt,
y, y >> 20, c->log_lebs + c->max_bud_cnt);
ubifs_msg(c, "reserved for root: %llu bytes (%llu KiB)",
c->report_rp_size, c->report_rp_size >> 10);
goto out_umount;
}
+ import_uuid(&sb->s_uuid, c->uuid);
+
mutex_unlock(&c->umount_mutex);
return 0;
# SPDX-License-Identifier: GPL-2.0-only
-mkutf8data
-utf8data.h
+/mkutf8data
+/utf8data.h
#include <linux/sched/signal.h>
#include <linux/sched/mm.h>
#include <linux/mm.h>
+#include <linux/mmu_notifier.h>
#include <linux/poll.h>
#include <linux/slab.h>
#include <linux/seq_file.h>
msg_init(&msg);
msg.event = UFFD_EVENT_PAGEFAULT;
msg.arg.pagefault.address = address;
+ /*
+ * These flags indicate why the userfault occurred:
+ * - UFFD_PAGEFAULT_FLAG_WP indicates a write protect fault.
+ * - UFFD_PAGEFAULT_FLAG_MINOR indicates a minor fault.
+ * - Neither of these flags being set indicates a MISSING fault.
+ *
+ * Separately, UFFD_PAGEFAULT_FLAG_WRITE indicates it was a write
+ * fault. Otherwise, it was a read fault.
+ */
if (flags & FAULT_FLAG_WRITE)
- /*
- * If UFFD_FEATURE_PAGEFAULT_FLAG_WP was set in the
- * uffdio_api.features and UFFD_PAGEFAULT_FLAG_WRITE
- * was not set in a UFFD_EVENT_PAGEFAULT, it means it
- * was a read fault, otherwise if set it means it's
- * a write fault.
- */
msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_WRITE;
if (reason & VM_UFFD_WP)
- /*
- * If UFFD_FEATURE_PAGEFAULT_FLAG_WP was set in the
- * uffdio_api.features and UFFD_PAGEFAULT_FLAG_WP was
- * not set in a UFFD_EVENT_PAGEFAULT, it means it was
- * a missing fault, otherwise if set it means it's a
- * write protect fault.
- */
msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_WP;
+ if (reason & VM_UFFD_MINOR)
+ msg.arg.pagefault.flags |= UFFD_PAGEFAULT_FLAG_MINOR;
if (features & UFFD_FEATURE_THREAD_ID)
msg.arg.pagefault.feat.ptid = task_pid_vnr(current);
return msg;
BUG_ON(ctx->mm != mm);
- VM_BUG_ON(reason & ~(VM_UFFD_MISSING|VM_UFFD_WP));
- VM_BUG_ON(!(reason & VM_UFFD_MISSING) ^ !!(reason & VM_UFFD_WP));
+ /* Any unrecognized flag is a bug. */
+ VM_BUG_ON(reason & ~__VM_UFFD_FLAGS);
+ /* 0 or > 1 flags set is a bug; we expect exactly 1. */
+ VM_BUG_ON(!reason || (reason & (reason - 1)));
if (ctx->features & UFFD_FEATURE_SIGBUS)
goto out;
for (vma = mm->mmap; vma; vma = vma->vm_next)
if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx) {
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
- vma->vm_flags &= ~(VM_UFFD_WP | VM_UFFD_MISSING);
+ vma->vm_flags &= ~__VM_UFFD_FLAGS;
}
mmap_write_unlock(mm);
octx = vma->vm_userfaultfd_ctx.ctx;
if (!octx || !(octx->features & UFFD_FEATURE_EVENT_FORK)) {
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
- vma->vm_flags &= ~(VM_UFFD_WP | VM_UFFD_MISSING);
+ vma->vm_flags &= ~__VM_UFFD_FLAGS;
return 0;
}
} else {
/* Drop uffd context if remap feature not enabled */
vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
- vma->vm_flags &= ~(VM_UFFD_WP | VM_UFFD_MISSING);
+ vma->vm_flags &= ~__VM_UFFD_FLAGS;
}
}
for (vma = mm->mmap; vma; vma = vma->vm_next) {
cond_resched();
BUG_ON(!!vma->vm_userfaultfd_ctx.ctx ^
- !!(vma->vm_flags & (VM_UFFD_MISSING | VM_UFFD_WP)));
+ !!(vma->vm_flags & __VM_UFFD_FLAGS));
if (vma->vm_userfaultfd_ctx.ctx != ctx) {
prev = vma;
continue;
}
- new_flags = vma->vm_flags & ~(VM_UFFD_MISSING | VM_UFFD_WP);
+ new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS;
prev = vma_merge(mm, prev, vma->vm_start, vma->vm_end,
new_flags, vma->anon_vma,
vma->vm_file, vma->vm_pgoff,
unsigned long vm_flags)
{
/* FIXME: add WP support to hugetlbfs and shmem */
- return vma_is_anonymous(vma) ||
- ((is_vm_hugetlb_page(vma) || vma_is_shmem(vma)) &&
- !(vm_flags & VM_UFFD_WP));
+ if (vm_flags & VM_UFFD_WP) {
+ if (is_vm_hugetlb_page(vma) || vma_is_shmem(vma))
+ return false;
+ }
+
+ if (vm_flags & VM_UFFD_MINOR) {
+ /* FIXME: Add minor fault interception for shmem. */
+ if (!is_vm_hugetlb_page(vma))
+ return false;
+ }
+
+ return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) ||
+ vma_is_shmem(vma);
}
static int userfaultfd_register(struct userfaultfd_ctx *ctx,
ret = -EINVAL;
if (!uffdio_register.mode)
goto out;
- if (uffdio_register.mode & ~(UFFDIO_REGISTER_MODE_MISSING|
- UFFDIO_REGISTER_MODE_WP))
+ if (uffdio_register.mode & ~UFFD_API_REGISTER_MODES)
goto out;
vm_flags = 0;
if (uffdio_register.mode & UFFDIO_REGISTER_MODE_MISSING)
vm_flags |= VM_UFFD_MISSING;
if (uffdio_register.mode & UFFDIO_REGISTER_MODE_WP)
vm_flags |= VM_UFFD_WP;
+ if (uffdio_register.mode & UFFDIO_REGISTER_MODE_MINOR) {
+#ifndef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
+ goto out;
+#endif
+ vm_flags |= VM_UFFD_MINOR;
+ }
ret = validate_range(mm, &uffdio_register.range.start,
uffdio_register.range.len);
cond_resched();
BUG_ON(!!cur->vm_userfaultfd_ctx.ctx ^
- !!(cur->vm_flags & (VM_UFFD_MISSING | VM_UFFD_WP)));
+ !!(cur->vm_flags & __VM_UFFD_FLAGS));
/* check not compatible vmas */
ret = -EINVAL;
start = vma->vm_start;
vma_end = min(end, vma->vm_end);
- new_flags = (vma->vm_flags &
- ~(VM_UFFD_MISSING|VM_UFFD_WP)) | vm_flags;
+ new_flags = (vma->vm_flags & ~__VM_UFFD_FLAGS) | vm_flags;
prev = vma_merge(mm, prev, start, vma_end, new_flags,
vma->anon_vma, vma->vm_file, vma->vm_pgoff,
vma_policy(vma),
vma->vm_flags = new_flags;
vma->vm_userfaultfd_ctx.ctx = ctx;
+ if (is_vm_hugetlb_page(vma) && uffd_disable_huge_pmd_share(vma))
+ hugetlb_unshare_all_pmds(vma);
+
skip:
prev = vma;
start = vma->vm_end;
if (!(uffdio_register.mode & UFFDIO_REGISTER_MODE_WP))
ioctls_out &= ~((__u64)1 << _UFFDIO_WRITEPROTECT);
+ /* CONTINUE ioctl is only supported for MINOR ranges. */
+ if (!(uffdio_register.mode & UFFDIO_REGISTER_MODE_MINOR))
+ ioctls_out &= ~((__u64)1 << _UFFDIO_CONTINUE);
+
/*
* Now that we scanned all vmas we can already tell
* userland which ioctls methods are guaranteed to
cond_resched();
BUG_ON(!!cur->vm_userfaultfd_ctx.ctx ^
- !!(cur->vm_flags & (VM_UFFD_MISSING | VM_UFFD_WP)));
+ !!(cur->vm_flags & __VM_UFFD_FLAGS));
/*
* Check not compatible vmas, not strictly required
wake_userfault(vma->vm_userfaultfd_ctx.ctx, &range);
}
- new_flags = vma->vm_flags & ~(VM_UFFD_MISSING | VM_UFFD_WP);
+ new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS;
prev = vma_merge(mm, prev, start, vma_end, new_flags,
vma->anon_vma, vma->vm_file, vma->vm_pgoff,
vma_policy(vma),
return ret;
}
+static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
+{
+ __s64 ret;
+ struct uffdio_continue uffdio_continue;
+ struct uffdio_continue __user *user_uffdio_continue;
+ struct userfaultfd_wake_range range;
+
+ user_uffdio_continue = (struct uffdio_continue __user *)arg;
+
+ ret = -EAGAIN;
+ if (READ_ONCE(ctx->mmap_changing))
+ goto out;
+
+ ret = -EFAULT;
+ if (copy_from_user(&uffdio_continue, user_uffdio_continue,
+ /* don't copy the output fields */
+ sizeof(uffdio_continue) - (sizeof(__s64))))
+ goto out;
+
+ ret = validate_range(ctx->mm, &uffdio_continue.range.start,
+ uffdio_continue.range.len);
+ if (ret)
+ goto out;
+
+ ret = -EINVAL;
+ /* double check for wraparound just in case. */
+ if (uffdio_continue.range.start + uffdio_continue.range.len <=
+ uffdio_continue.range.start) {
+ goto out;
+ }
+ if (uffdio_continue.mode & ~UFFDIO_CONTINUE_MODE_DONTWAKE)
+ goto out;
+
+ if (mmget_not_zero(ctx->mm)) {
+ ret = mcopy_continue(ctx->mm, uffdio_continue.range.start,
+ uffdio_continue.range.len,
+ &ctx->mmap_changing);
+ mmput(ctx->mm);
+ } else {
+ return -ESRCH;
+ }
+
+ if (unlikely(put_user(ret, &user_uffdio_continue->mapped)))
+ return -EFAULT;
+ if (ret < 0)
+ goto out;
+
+ /* len == 0 would wake all */
+ BUG_ON(!ret);
+ range.len = ret;
+ if (!(uffdio_continue.mode & UFFDIO_CONTINUE_MODE_DONTWAKE)) {
+ range.start = uffdio_continue.range.start;
+ wake_userfault(ctx, &range);
+ }
+ ret = range.len == uffdio_continue.range.len ? 0 : -EAGAIN;
+
+out:
+ return ret;
+}
+
static inline unsigned int uffd_ctx_features(__u64 user_features)
{
/*
goto err_out;
/* report all available features and ioctls to userland */
uffdio_api.features = UFFD_API_FEATURES;
+#ifndef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
+ uffdio_api.features &= ~UFFD_FEATURE_MINOR_HUGETLBFS;
+#endif
uffdio_api.ioctls = UFFD_API_IOCTLS;
ret = -EFAULT;
if (copy_to_user(buf, &uffdio_api, sizeof(uffdio_api)))
case UFFDIO_WRITEPROTECT:
ret = userfaultfd_writeprotect(ctx, arg);
break;
+ case UFFDIO_CONTINUE:
+ ret = userfaultfd_continue(ctx, arg);
+ break;
}
return ret;
}
xfs_agnumber_t agno = pag->pag_agno;
xfs_extlen_t ask;
xfs_extlen_t used;
- int error = 0;
+ int error = 0, error2;
+ bool has_resv = false;
/* Create the metadata reservation. */
if (pag->pag_meta_resv.ar_asked == 0) {
if (error)
goto out;
}
+ if (ask)
+ has_resv = true;
}
/* Create the RMAPBT metadata reservation */
error = __xfs_ag_resv_init(pag, XFS_AG_RESV_RMAPBT, ask, used);
if (error)
goto out;
+ if (ask)
+ has_resv = true;
}
-#ifdef DEBUG
- /* need to read in the AGF for the ASSERT below to work */
- error = xfs_alloc_pagf_init(pag->pag_mount, tp, pag->pag_agno, 0);
- if (error)
- return error;
-
- ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
- xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved <=
- pag->pagf_freeblks + pag->pagf_flcount);
-#endif
out:
+ /*
+ * Initialize the pagf if we have at least one active reservation on the
+ * AG. This may have occurred already via reservation calculation, but
+ * fall back to an explicit init to ensure the in-core allocbt usage
+ * counters are initialized as soon as possible. This is important
+ * because filesystems with large perag reservations are susceptible to
+ * free space reservation problems that the allocbt counter is used to
+ * address.
+ */
+ if (has_resv) {
+ error2 = xfs_alloc_pagf_init(mp, tp, pag->pag_agno, 0);
+ if (error2)
+ return error2;
+ ASSERT(xfs_perag_resv(pag, XFS_AG_RESV_METADATA)->ar_reserved +
+ xfs_perag_resv(pag, XFS_AG_RESV_RMAPBT)->ar_reserved <=
+ pag->pagf_freeblks + pag->pagf_flcount);
+ }
return error;
}
agbp->b_pag->pagf_freeblks += len;
be32_add_cpu(&agf->agf_freeblks, len);
- xfs_trans_agblocks_delta(tp, len);
if (unlikely(be32_to_cpu(agf->agf_freeblks) >
be32_to_cpu(agf->agf_length))) {
xfs_buf_mark_corrupt(agbp);
pag = agbp->b_pag;
ASSERT(!pag->pagf_agflreset);
be32_add_cpu(&agf->agf_flcount, -1);
- xfs_trans_agflist_delta(tp, -1);
pag->pagf_flcount--;
logflags = XFS_AGF_FLFIRST | XFS_AGF_FLCOUNT;
pag = agbp->b_pag;
ASSERT(!pag->pagf_agflreset);
be32_add_cpu(&agf->agf_flcount, 1);
- xfs_trans_agflist_delta(tp, 1);
pag->pagf_flcount++;
logflags = XFS_AGF_FLLAST | XFS_AGF_FLCOUNT;
struct xfs_agf *agf; /* ag freelist header */
struct xfs_perag *pag; /* per allocation group data */
int error;
+ int allocbt_blks;
trace_xfs_alloc_read_agf(mp, agno);
pag->pagf_refcount_level = be32_to_cpu(agf->agf_refcount_level);
pag->pagf_init = 1;
pag->pagf_agflreset = xfs_agfl_needs_reset(mp, agf);
+
+ /*
+ * Update the in-core allocbt counter. Filter out the rmapbt
+ * subset of the btreeblks counter because the rmapbt is managed
+ * by perag reservation. Subtract one for the rmapbt root block
+ * because the rmap counter includes it while the btreeblks
+ * counter only tracks non-root blocks.
+ */
+ allocbt_blks = pag->pagf_btreeblks;
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb))
+ allocbt_blks -= be32_to_cpu(agf->agf_rmap_blocks) - 1;
+ if (allocbt_blks > 0)
+ atomic64_add(allocbt_blks, &mp->m_allocbt_blks);
}
#ifdef DEBUG
else if (!XFS_FORCED_SHUTDOWN(mp)) {
return 0;
}
+ atomic64_inc(&cur->bc_mp->m_allocbt_blks);
xfs_extent_busy_reuse(cur->bc_mp, cur->bc_ag.agno, bno, 1, false);
- xfs_trans_agbtree_delta(cur->bc_tp, 1);
new->s = cpu_to_be32(bno);
*stat = 1;
if (error)
return error;
+ atomic64_dec(&cur->bc_mp->m_allocbt_blks);
xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
XFS_EXTENT_BUSY_SKIP_DISCARD);
- xfs_trans_agbtree_delta(cur->bc_tp, -1);
return 0;
}
* directly mirrors the xfs_dinode structure as it must contain all the same
* information.
*/
-typedef uint64_t xfs_ictimestamp_t;
+typedef uint64_t xfs_log_timestamp_t;
/* Legacy timestamp encoding format. */
-struct xfs_legacy_ictimestamp {
+struct xfs_log_legacy_timestamp {
int32_t t_sec; /* timestamp seconds */
int32_t t_nsec; /* timestamp nanoseconds */
};
uint16_t di_projid_hi; /* higher part of owner's project id */
uint8_t di_pad[6]; /* unused, zeroed space */
uint16_t di_flushiter; /* incremented on flush */
- xfs_ictimestamp_t di_atime; /* time last accessed */
- xfs_ictimestamp_t di_mtime; /* time last modified */
- xfs_ictimestamp_t di_ctime; /* time created/inode modified */
+ xfs_log_timestamp_t di_atime; /* time last accessed */
+ xfs_log_timestamp_t di_mtime; /* time last modified */
+ xfs_log_timestamp_t di_ctime; /* time created/inode modified */
xfs_fsize_t di_size; /* number of bytes in file */
xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */
xfs_extlen_t di_extsize; /* basic/minimum extent size for file */
uint8_t di_pad2[12]; /* more padding for future expansion */
/* fields only written to during inode creation */
- xfs_ictimestamp_t di_crtime; /* time created */
+ xfs_log_timestamp_t di_crtime; /* time created */
xfs_ino_t di_ino; /* inode number */
uuid_t di_uuid; /* UUID of the filesystem */
xfs_extent_busy_reuse(cur->bc_mp, cur->bc_ag.agno, bno, 1,
false);
- xfs_trans_agbtree_delta(cur->bc_tp, 1);
new->s = cpu_to_be32(bno);
be32_add_cpu(&agf->agf_rmap_blocks, 1);
xfs_alloc_log_agf(cur->bc_tp, agbp, XFS_AGF_RMAP_BLOCKS);
xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
XFS_EXTENT_BUSY_SKIP_DISCARD);
- xfs_trans_agbtree_delta(cur->bc_tp, -1);
pag = cur->bc_ag.agbp->b_pag;
xfs_ag_resv_free_extent(pag, XFS_AG_RESV_RMAPBT, NULL, 1);
struct xfs_mount *mp = tp->t_mountp;
struct xfs_buf *bp = xfs_trans_getsb(tp);
- mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
- mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree);
- mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks);
+ /*
+ * Lazy sb counters don't update the in-core superblock so do that now.
+ * If this is at unmount, the counters will be exactly correct, but at
+ * any other time they will only be ballpark correct because of
+ * reservations that have been taken out percpu counters. If we have an
+ * unclean shutdown, this will be corrected by log recovery rebuilding
+ * the counters from the AGF block counts.
+ */
+ if (xfs_sb_version_haslazysbcount(&mp->m_sb)) {
+ mp->m_sb.sb_icount = percpu_counter_sum(&mp->m_icount);
+ mp->m_sb.sb_ifree = percpu_counter_sum(&mp->m_ifree);
+ mp->m_sb.sb_fdblocks = percpu_counter_sum(&mp->m_fdblocks);
+ }
xfs_sb_to_disk(bp->b_addr, &mp->m_sb);
xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
xfs_agblock_t btreeblks;
int error;
+ /* agf_btreeblks didn't exist before lazysbcount */
+ if (!xfs_sb_version_haslazysbcount(&sc->mp->m_sb))
+ return;
+
/* Check agf_rmap_blocks; set up for agf_btreeblks check */
if (sc->sa.rmap_cur) {
error = xfs_btree_count_blocks(sc->sa.rmap_cur, &blocks);
xchk_block_set_corrupt(sc, sc->sa.agf_bp);
if (pag->pagf_flcount != be32_to_cpu(agf->agf_flcount))
xchk_block_set_corrupt(sc, sc->sa.agf_bp);
- if (pag->pagf_btreeblks != be32_to_cpu(agf->agf_btreeblks))
+ if (xfs_sb_version_haslazysbcount(&sc->mp->m_sb) &&
+ pag->pagf_btreeblks != be32_to_cpu(agf->agf_btreeblks))
xchk_block_set_corrupt(sc, sc->sa.agf_bp);
xfs_perag_put(pag);
#include "xfs_alloc.h"
#include "xfs_ialloc.h"
#include "xfs_health.h"
+#include "xfs_btree.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
return xchk_trans_alloc(sc, 0);
}
+/* Count free space btree blocks manually for pre-lazysbcount filesystems. */
+static int
+xchk_fscount_btreeblks(
+ struct xfs_scrub *sc,
+ struct xchk_fscounters *fsc,
+ xfs_agnumber_t agno)
+{
+ xfs_extlen_t blocks;
+ int error;
+
+ error = xchk_ag_init(sc, agno, &sc->sa);
+ if (error)
+ return error;
+
+ error = xfs_btree_count_blocks(sc->sa.bno_cur, &blocks);
+ if (error)
+ goto out_free;
+ fsc->fdblocks += blocks - 1;
+
+ error = xfs_btree_count_blocks(sc->sa.cnt_cur, &blocks);
+ if (error)
+ goto out_free;
+ fsc->fdblocks += blocks - 1;
+
+out_free:
+ xchk_ag_free(sc, &sc->sa);
+ return error;
+}
+
/*
* Calculate what the global in-core counters ought to be from the incore
* per-AG structure. Callers can compare this to the actual in-core counters
/* Add up the free/freelist/bnobt/cntbt blocks */
fsc->fdblocks += pag->pagf_freeblks;
fsc->fdblocks += pag->pagf_flcount;
- fsc->fdblocks += pag->pagf_btreeblks;
+ if (xfs_sb_version_haslazysbcount(&sc->mp->m_sb)) {
+ fsc->fdblocks += pag->pagf_btreeblks;
+ } else {
+ error = xchk_fscount_btreeblks(sc, fsc, agno);
+ if (error) {
+ xfs_perag_put(pag);
+ break;
+ }
+ }
/*
* Per-AG reservations are taken out of the incore counters,
while ((ioend = list_first_entry_or_null(&tmp, struct iomap_ioend,
io_list))) {
list_del_init(&ioend->io_list);
- iomap_ioend_try_merge(ioend, &tmp, NULL);
+ iomap_ioend_try_merge(ioend, &tmp);
xfs_end_ioend(ioend);
}
}
if (error)
return error;
- xfs_trans_agblocks_delta(tp, id->nfree);
-
if (delta) {
*lastag_extended = true;
error = xfs_ag_extend_space(mp, tp, id, delta);
* Convert an incore timestamp to a log timestamp. Note that the log format
* specifies host endian format!
*/
-static inline xfs_ictimestamp_t
+static inline xfs_log_timestamp_t
xfs_inode_to_log_dinode_ts(
struct xfs_inode *ip,
const struct timespec64 tv)
{
- struct xfs_legacy_ictimestamp *lits;
- xfs_ictimestamp_t its;
+ struct xfs_log_legacy_timestamp *lits;
+ xfs_log_timestamp_t its;
if (xfs_inode_has_bigtime(ip))
return xfs_inode_encode_bigtime(tv);
- lits = (struct xfs_legacy_ictimestamp *)&its;
+ lits = (struct xfs_log_legacy_timestamp *)&its;
lits->t_sec = tv.tv_sec;
lits->t_nsec = tv.tv_nsec;
static inline xfs_timestamp_t
xfs_log_dinode_to_disk_ts(
struct xfs_log_dinode *from,
- const xfs_ictimestamp_t its)
+ const xfs_log_timestamp_t its)
{
struct xfs_legacy_timestamp *lts;
- struct xfs_legacy_ictimestamp *lits;
+ struct xfs_log_legacy_timestamp *lits;
xfs_timestamp_t ts;
if (xfs_log_dinode_has_bigtime(from))
return cpu_to_be64(its);
lts = (struct xfs_legacy_timestamp *)&ts;
- lits = (struct xfs_legacy_ictimestamp *)&its;
+ lits = (struct xfs_log_legacy_timestamp *)&its;
lts->t_sec = cpu_to_be32(lits->t_sec);
lts->t_nsec = cpu_to_be32(lits->t_nsec);
struct xfs_mount *mp)
{
/*
- * Never write to the log on norecovery mounts, if the block device is
- * read-only, or if the filesystem is shutdown. Read-only mounts still
- * allow internal writes for log recovery and unmount purposes, so don't
- * restrict that case here.
+ * Do not write to the log on norecovery mounts, if the data or log
+ * devices are read-only, or if the filesystem is shutdown. Read-only
+ * mounts allow internal writes for log recovery and unmount purposes,
+ * so don't restrict that case.
*/
if (mp->m_flags & XFS_MOUNT_NORECOVERY)
return false;
+ if (xfs_readonly_buftarg(mp->m_ddev_targp))
+ return false;
if (xfs_readonly_buftarg(mp->m_log->l_targ))
return false;
if (XFS_FORCED_SHUTDOWN(mp))
int64_t lcounter;
long long res_used;
s32 batch;
+ uint64_t set_aside;
if (delta > 0) {
/*
else
batch = XFS_FDBLOCKS_BATCH;
+ /*
+ * Set aside allocbt blocks because these blocks are tracked as free
+ * space but not available for allocation. Technically this means that a
+ * single reservation cannot consume all remaining free space, but the
+ * ratio of allocbt blocks to usable free blocks should be rather small.
+ * The tradeoff without this is that filesystems that maintain high
+ * perag block reservations can over reserve physical block availability
+ * and fail physical allocation, which leads to much more serious
+ * problems (i.e. transaction abort, pagecache discards, etc.) than
+ * slightly premature -ENOSPC.
+ */
+ set_aside = mp->m_alloc_set_aside + atomic64_read(&mp->m_allocbt_blks);
percpu_counter_add_batch(&mp->m_fdblocks, delta, batch);
- if (__percpu_counter_compare(&mp->m_fdblocks, mp->m_alloc_set_aside,
+ if (__percpu_counter_compare(&mp->m_fdblocks, set_aside,
XFS_FDBLOCKS_BATCH) >= 0) {
/* we had space! */
return 0;
* extents or anything related to the rt device.
*/
struct percpu_counter m_delalloc_blks;
+ /*
+ * Global count of allocation btree blocks in use across all AGs. Only
+ * used when perag reservation is enabled. Helps prevent block
+ * reservation from attempting to reserve allocation btree blocks.
+ */
+ atomic64_t m_allocbt_blks;
struct radix_tree_root m_perag_tree; /* per-ag accounting info */
spinlock_t m_perag_lock; /* lock for m_perag_tree */
XFS_CHECK_STRUCT_SIZE(struct xfs_extent_64, 16);
XFS_CHECK_STRUCT_SIZE(struct xfs_log_dinode, 176);
XFS_CHECK_STRUCT_SIZE(struct xfs_icreate_log, 28);
- XFS_CHECK_STRUCT_SIZE(xfs_ictimestamp_t, 8);
- XFS_CHECK_STRUCT_SIZE(struct xfs_legacy_ictimestamp, 8);
+ XFS_CHECK_STRUCT_SIZE(xfs_log_timestamp_t, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_log_legacy_timestamp, 8);
XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_32, 52);
XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format, 56);
XFS_CHECK_STRUCT_SIZE(struct xfs_qoff_logformat, 20);
if (error)
goto out;
- error = filemap_write_and_wait_range(inode->i_mapping, offset, len);
+ error = filemap_write_and_wait_range(inode->i_mapping, offset,
+ offset + len - 1);
if (error)
goto out;
bp = xfs_trans_getsb(tp);
sbp = bp->b_addr;
- /*
- * Check that superblock mods match the mods made to AGF counters.
- */
- ASSERT((tp->t_fdblocks_delta + tp->t_res_fdblocks_delta) ==
- (tp->t_ag_freeblks_delta + tp->t_ag_flist_delta +
- tp->t_ag_btree_delta));
-
/*
* Only update the superblock counters if we are logging them
*/
/* apply remaining deltas */
spin_lock(&mp->m_sb_lock);
+ mp->m_sb.sb_fdblocks += tp->t_fdblocks_delta + tp->t_res_fdblocks_delta;
+ mp->m_sb.sb_icount += idelta;
+ mp->m_sb.sb_ifree += ifreedelta;
mp->m_sb.sb_frextents += rtxdelta;
mp->m_sb.sb_dblocks += tp->t_dblocks_delta;
mp->m_sb.sb_agcount += tp->t_agcount_delta;
int64_t t_res_fdblocks_delta; /* on-disk only chg */
int64_t t_frextents_delta;/* superblock freextents chg*/
int64_t t_res_frextents_delta; /* on-disk only chg */
-#if defined(DEBUG) || defined(XFS_WARN)
- int64_t t_ag_freeblks_delta; /* debugging counter */
- int64_t t_ag_flist_delta; /* debugging counter */
- int64_t t_ag_btree_delta; /* debugging counter */
-#endif
int64_t t_dblocks_delta;/* superblock dblocks change */
int64_t t_agcount_delta;/* superblock agcount change */
int64_t t_imaxpct_delta;/* superblock imaxpct change */
*/
#define xfs_trans_set_sync(tp) ((tp)->t_flags |= XFS_TRANS_SYNC)
-#if defined(DEBUG) || defined(XFS_WARN)
-#define xfs_trans_agblocks_delta(tp, d) ((tp)->t_ag_freeblks_delta += (int64_t)d)
-#define xfs_trans_agflist_delta(tp, d) ((tp)->t_ag_flist_delta += (int64_t)d)
-#define xfs_trans_agbtree_delta(tp, d) ((tp)->t_ag_btree_delta += (int64_t)d)
-#else
-#define xfs_trans_agblocks_delta(tp, d)
-#define xfs_trans_agflist_delta(tp, d)
-#define xfs_trans_agbtree_delta(tp, d)
-#endif
-
/*
* XFS transaction mechanism exported interfaces.
*/
#ifndef _ASM_GENERIC_BITOPS_FIND_H_
#define _ASM_GENERIC_BITOPS_FIND_H_
+extern unsigned long _find_next_bit(const unsigned long *addr1,
+ const unsigned long *addr2, unsigned long nbits,
+ unsigned long start, unsigned long invert, unsigned long le);
+extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size);
+extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size);
+extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size);
+
#ifndef find_next_bit
/**
* find_next_bit - find the next set bit in a memory region
* Returns the bit number for the next set bit
* If no bits are set, returns @size.
*/
-extern unsigned long find_next_bit(const unsigned long *addr, unsigned long
- size, unsigned long offset);
+static inline
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ val = *addr & GENMASK(size - 1, offset);
+ return val ? __ffs(val) : size;
+ }
+
+ return _find_next_bit(addr, NULL, size, offset, 0UL, 0);
+}
#endif
#ifndef find_next_and_bit
* Returns the bit number for the next set bit
* If no bits are set, returns @size.
*/
-extern unsigned long find_next_and_bit(const unsigned long *addr1,
+static inline
+unsigned long find_next_and_bit(const unsigned long *addr1,
const unsigned long *addr2, unsigned long size,
- unsigned long offset);
+ unsigned long offset)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ val = *addr1 & *addr2 & GENMASK(size - 1, offset);
+ return val ? __ffs(val) : size;
+ }
+
+ return _find_next_bit(addr1, addr2, size, offset, 0UL, 0);
+}
#endif
#ifndef find_next_zero_bit
* Returns the bit number of the next zero bit
* If no bits are zero, returns @size.
*/
-extern unsigned long find_next_zero_bit(const unsigned long *addr, unsigned
- long size, unsigned long offset);
+static inline
+unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ val = *addr | ~GENMASK(size - 1, offset);
+ return val == ~0UL ? size : ffz(val);
+ }
+
+ return _find_next_bit(addr, NULL, size, offset, ~0UL, 0);
+}
#endif
#ifdef CONFIG_GENERIC_FIND_FIRST_BIT
* Returns the bit number of the first set bit.
* If no bits are set, returns @size.
*/
-extern unsigned long find_first_bit(const unsigned long *addr,
- unsigned long size);
+static inline
+unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val = *addr & GENMASK(size - 1, 0);
+
+ return val ? __ffs(val) : size;
+ }
+
+ return _find_first_bit(addr, size);
+}
/**
* find_first_zero_bit - find the first cleared bit in a memory region
* Returns the bit number of the first cleared bit.
* If no bits are zero, returns @size.
*/
-extern unsigned long find_first_zero_bit(const unsigned long *addr,
- unsigned long size);
+static inline
+unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val = *addr | ~GENMASK(size - 1, 0);
+
+ return val == ~0UL ? size : ffz(val);
+ }
+
+ return _find_first_zero_bit(addr, size);
+}
#else /* CONFIG_GENERIC_FIND_FIRST_BIT */
#ifndef find_first_bit
#endif /* CONFIG_GENERIC_FIND_FIRST_BIT */
+#ifndef find_last_bit
+/**
+ * find_last_bit - find the last set bit in a memory region
+ * @addr: The address to start the search at
+ * @size: The number of bits to search
+ *
+ * Returns the bit number of the last set bit, or size.
+ */
+static inline
+unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val = *addr & GENMASK(size - 1, 0);
+
+ return val ? __fls(val) : size;
+ }
+
+ return _find_last_bit(addr, size);
+}
+#endif
+
/**
* find_next_clump8 - find next 8-bit clump with set bits in a memory region
* @clump: location to store copy of found clump
#ifndef _ASM_GENERIC_BITOPS_LE_H_
#define _ASM_GENERIC_BITOPS_LE_H_
+#include <asm-generic/bitops/find.h>
#include <asm/types.h>
#include <asm/byteorder.h>
+#include <linux/swab.h>
#if defined(__LITTLE_ENDIAN)
#define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7)
#ifndef find_next_zero_bit_le
-extern unsigned long find_next_zero_bit_le(const void *addr,
- unsigned long size, unsigned long offset);
+static inline
+unsigned long find_next_zero_bit_le(const void *addr, unsigned
+ long size, unsigned long offset)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val = *(const unsigned long *)addr;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ val = swab(val) | ~GENMASK(size - 1, offset);
+ return val == ~0UL ? size : ffz(val);
+ }
+
+ return _find_next_bit(addr, NULL, size, offset, ~0UL, 1);
+}
#endif
#ifndef find_next_bit_le
-extern unsigned long find_next_bit_le(const void *addr,
- unsigned long size, unsigned long offset);
+static inline
+unsigned long find_next_bit_le(const void *addr, unsigned
+ long size, unsigned long offset)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val = *(const unsigned long *)addr;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ val = swab(val) & GENMASK(size - 1, offset);
+ return val ? __ffs(val) : size;
+ }
+
+ return _find_next_bit(addr, NULL, size, offset, 0UL, 1);
+}
#endif
#ifndef find_first_zero_bit_le
#define BITS_PER_LONG_LONG 64
#endif
+/*
+ * small_const_nbits(n) is true precisely when it is known at compile-time
+ * that BITMAP_SIZE(n) is 1, i.e. 1 <= n <= BITS_PER_LONG. This allows
+ * various bit/bitmap APIs to provide a fast inline implementation. Bitmaps
+ * of size 0 are very rare, and a compile-time-known-size 0 is most likely
+ * a sign of error. They will be handled correctly by the bit/bitmap APIs,
+ * but using the out-of-line functions, so that the inline implementations
+ * can unconditionally dereference the pointer(s).
+ */
+#define small_const_nbits(nbits) \
+ (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG && (nbits) > 0)
+
#endif /* __ASM_GENERIC_BITS_PER_LONG */
#endif
#endif /* CONFIG_GENERIC_IOMAP */
-/*
- * Convert a virtual cached pointer to an uncached pointer
- */
-#ifndef xlate_dev_kmem_ptr
-#define xlate_dev_kmem_ptr xlate_dev_kmem_ptr
-static inline void *xlate_dev_kmem_ptr(void *addr)
-{
- return addr;
-}
-#endif
-
#ifndef xlate_dev_mem_ptr
#define xlate_dev_mem_ptr xlate_dev_mem_ptr
static inline void *xlate_dev_mem_ptr(phys_addr_t addr)
#define PRCI_CLK_CLTXPLL 5
#define PRCI_CLK_TLCLK 6
#define PRCI_CLK_PCLK 7
+#define PRCI_CLK_PCIE_AUX 8
#endif /* __DT_BINDINGS_CLOCK_SIFIVE_FU740_PRCI_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef _DT_BINDINGS_ATMEL_MAXTOUCH_H
+#define _DT_BINDINGS_ATMEL_MAXTOUCH_H
+
+#define ATMEL_MXT_WAKEUP_NONE 0
+#define ATMEL_MXT_WAKEUP_I2C_SCL 1
+#define ATMEL_MXT_WAKEUP_GPIO 2
+
+#endif /* _DT_BINDINGS_ATMEL_MAXTOUCH_H */
--- /dev/null
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_ALIGN_H
+#define _LINUX_ALIGN_H
+
+#include <linux/const.h>
+
+/* @a is a power of 2 value */
+#define ALIGN(x, a) __ALIGN_KERNEL((x), (a))
+#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a))
+#define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask))
+#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a)))
+#define PTR_ALIGN_DOWN(p, a) ((typeof(p))ALIGN_DOWN((unsigned long)(p), (a)))
+#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0)
+
+#endif /* _LINUX_ALIGN_H */
return async_schedule_node_domain(func, dev, dev_to_node(dev), domain);
}
-void async_unregister_domain(struct async_domain *domain);
extern void async_synchronize_full(void);
extern void async_synchronize_full_domain(struct async_domain *domain);
extern void async_synchronize_cookie(async_cookie_t cookie);
return NULL;
}
+extern unsigned int bio_max_size(struct bio *bio);
+
/**
* bio_full - check if the bio is full
* @bio: bio to check
if (bio->bi_vcnt >= bio->bi_max_vecs)
return true;
- if (bio->bi_iter.bi_size > UINT_MAX - len)
+ if (bio->bi_iter.bi_size > bio_max_size(bio) - len)
return true;
return false;
#ifndef __ASSEMBLY__
-#include <linux/types.h>
+#include <linux/align.h>
#include <linux/bitops.h>
+#include <linux/limits.h>
#include <linux/string.h>
-#include <linux/kernel.h>
+#include <linux/types.h>
+
+struct device;
/*
* bitmaps provide bit arrays that consume one or more unsigned
* Allocation and deallocation of bitmap.
* Provided in lib/bitmap.c to avoid circular dependency.
*/
-extern unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags);
-extern unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags);
-extern void bitmap_free(const unsigned long *bitmap);
+unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags);
+unsigned long *bitmap_zalloc(unsigned int nbits, gfp_t flags);
+void bitmap_free(const unsigned long *bitmap);
+
+/* Managed variants of the above. */
+unsigned long *devm_bitmap_alloc(struct device *dev,
+ unsigned int nbits, gfp_t flags);
+unsigned long *devm_bitmap_zalloc(struct device *dev,
+ unsigned int nbits, gfp_t flags);
/*
* lib/bitmap.c provides these functions:
*/
-extern int __bitmap_equal(const unsigned long *bitmap1,
- const unsigned long *bitmap2, unsigned int nbits);
-extern bool __pure __bitmap_or_equal(const unsigned long *src1,
- const unsigned long *src2,
- const unsigned long *src3,
- unsigned int nbits);
-extern void __bitmap_complement(unsigned long *dst, const unsigned long *src,
- unsigned int nbits);
-extern void __bitmap_shift_right(unsigned long *dst, const unsigned long *src,
- unsigned int shift, unsigned int nbits);
-extern void __bitmap_shift_left(unsigned long *dst, const unsigned long *src,
- unsigned int shift, unsigned int nbits);
-extern void bitmap_cut(unsigned long *dst, const unsigned long *src,
- unsigned int first, unsigned int cut,
- unsigned int nbits);
-extern int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
- const unsigned long *bitmap2, unsigned int nbits);
-extern void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
- const unsigned long *bitmap2, unsigned int nbits);
-extern void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
- const unsigned long *bitmap2, unsigned int nbits);
-extern int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
+int __bitmap_equal(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, unsigned int nbits);
+bool __pure __bitmap_or_equal(const unsigned long *src1,
+ const unsigned long *src2,
+ const unsigned long *src3,
+ unsigned int nbits);
+void __bitmap_complement(unsigned long *dst, const unsigned long *src,
+ unsigned int nbits);
+void __bitmap_shift_right(unsigned long *dst, const unsigned long *src,
+ unsigned int shift, unsigned int nbits);
+void __bitmap_shift_left(unsigned long *dst, const unsigned long *src,
+ unsigned int shift, unsigned int nbits);
+void bitmap_cut(unsigned long *dst, const unsigned long *src,
+ unsigned int first, unsigned int cut, unsigned int nbits);
+int __bitmap_and(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, unsigned int nbits);
+void __bitmap_or(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, unsigned int nbits);
+void __bitmap_xor(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, unsigned int nbits);
+int __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1,
+ const unsigned long *bitmap2, unsigned int nbits);
+void __bitmap_replace(unsigned long *dst,
+ const unsigned long *old, const unsigned long *new,
+ const unsigned long *mask, unsigned int nbits);
+int __bitmap_intersects(const unsigned long *bitmap1,
const unsigned long *bitmap2, unsigned int nbits);
-extern void __bitmap_replace(unsigned long *dst,
- const unsigned long *old, const unsigned long *new,
- const unsigned long *mask, unsigned int nbits);
-extern int __bitmap_intersects(const unsigned long *bitmap1,
- const unsigned long *bitmap2, unsigned int nbits);
-extern int __bitmap_subset(const unsigned long *bitmap1,
- const unsigned long *bitmap2, unsigned int nbits);
-extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
-extern void __bitmap_set(unsigned long *map, unsigned int start, int len);
-extern void __bitmap_clear(unsigned long *map, unsigned int start, int len);
-
-extern unsigned long bitmap_find_next_zero_area_off(unsigned long *map,
- unsigned long size,
- unsigned long start,
- unsigned int nr,
- unsigned long align_mask,
- unsigned long align_offset);
+int __bitmap_subset(const unsigned long *bitmap1,
+ const unsigned long *bitmap2, unsigned int nbits);
+int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
+void __bitmap_set(unsigned long *map, unsigned int start, int len);
+void __bitmap_clear(unsigned long *map, unsigned int start, int len);
+
+unsigned long bitmap_find_next_zero_area_off(unsigned long *map,
+ unsigned long size,
+ unsigned long start,
+ unsigned int nr,
+ unsigned long align_mask,
+ unsigned long align_offset);
/**
* bitmap_find_next_zero_area - find a contiguous aligned zero area
align_mask, 0);
}
-extern int bitmap_parse(const char *buf, unsigned int buflen,
+int bitmap_parse(const char *buf, unsigned int buflen,
unsigned long *dst, int nbits);
-extern int bitmap_parse_user(const char __user *ubuf, unsigned int ulen,
+int bitmap_parse_user(const char __user *ubuf, unsigned int ulen,
unsigned long *dst, int nbits);
-extern int bitmap_parselist(const char *buf, unsigned long *maskp,
+int bitmap_parselist(const char *buf, unsigned long *maskp,
int nmaskbits);
-extern int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen,
+int bitmap_parselist_user(const char __user *ubuf, unsigned int ulen,
unsigned long *dst, int nbits);
-extern void bitmap_remap(unsigned long *dst, const unsigned long *src,
+void bitmap_remap(unsigned long *dst, const unsigned long *src,
const unsigned long *old, const unsigned long *new, unsigned int nbits);
-extern int bitmap_bitremap(int oldbit,
+int bitmap_bitremap(int oldbit,
const unsigned long *old, const unsigned long *new, int bits);
-extern void bitmap_onto(unsigned long *dst, const unsigned long *orig,
+void bitmap_onto(unsigned long *dst, const unsigned long *orig,
const unsigned long *relmap, unsigned int bits);
-extern void bitmap_fold(unsigned long *dst, const unsigned long *orig,
+void bitmap_fold(unsigned long *dst, const unsigned long *orig,
unsigned int sz, unsigned int nbits);
-extern int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order);
-extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order);
-extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order);
+int bitmap_find_free_region(unsigned long *bitmap, unsigned int bits, int order);
+void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int order);
+int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order);
#ifdef __BIG_ENDIAN
-extern void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits);
+void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits);
#else
#define bitmap_copy_le bitmap_copy
#endif
-extern unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int ord, unsigned int nbits);
-extern int bitmap_print_to_pagebuf(bool list, char *buf,
+unsigned int bitmap_ord_to_pos(const unsigned long *bitmap, unsigned int ord, unsigned int nbits);
+int bitmap_print_to_pagebuf(bool list, char *buf,
const unsigned long *maskp, int nmaskbits);
#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
#define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1)))
-/*
- * The static inlines below do not handle constant nbits==0 correctly,
- * so make such users (should any ever turn up) call the out-of-line
- * versions.
- */
-#define small_const_nbits(nbits) \
- (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG && (nbits) > 0)
-
static inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
{
unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
* therefore conversion is not needed when copying data from/to arrays of u32.
*/
#if BITS_PER_LONG == 64
-extern void bitmap_from_arr32(unsigned long *bitmap, const u32 *buf,
+void bitmap_from_arr32(unsigned long *bitmap, const u32 *buf,
unsigned int nbits);
-extern void bitmap_to_arr32(u32 *buf, const unsigned long *bitmap,
+void bitmap_to_arr32(u32 *buf, const unsigned long *bitmap,
unsigned int nbits);
#else
#define bitmap_from_arr32(bitmap, buf, nbits) \
})
#endif
-#ifndef find_last_bit
-/**
- * find_last_bit - find the last set bit in a memory region
- * @addr: The address to start the search at
- * @size: The number of bits to search
- *
- * Returns the bit number of the last set bit, or size.
- */
-extern unsigned long find_last_bit(const unsigned long *addr,
- unsigned long size);
-#endif
-
#endif /* __KERNEL__ */
#endif
*/
void (*put_budget)(struct request_queue *, int);
- /*
- * @set_rq_budget_toekn: store rq's budget token
+ /**
+ * @set_rq_budget_token: store rq's budget token
*/
void (*set_rq_budget_token)(struct request *, int);
- /*
- * @get_rq_budget_toekn: retrieve rq's budget token
+ /**
+ * @get_rq_budget_token: retrieve rq's budget token
*/
int (*get_rq_budget_token)(struct request *);
#include <linux/minmax.h>
#include <linux/timer.h>
#include <linux/workqueue.h>
-#include <linux/pagemap.h>
#include <linux/backing-dev-defs.h>
#include <linux/wait.h>
#include <linux/mempool.h>
};
struct queue_limits {
+ unsigned int bio_max_bytes;
+
enum blk_bounce bounce;
unsigned long seg_boundary_mask;
unsigned long virt_boundary_mask;
};
/* Possible states for alu_state member. */
-#define BPF_ALU_SANITIZE_SRC 1U
-#define BPF_ALU_SANITIZE_DST 2U
+#define BPF_ALU_SANITIZE_SRC (1U << 0)
+#define BPF_ALU_SANITIZE_DST (1U << 1)
#define BPF_ALU_NEG_VALUE (1U << 2)
#define BPF_ALU_NON_POINTER (1U << 3)
+#define BPF_ALU_IMMEDIATE (1U << 4)
#define BPF_ALU_SANITIZE (BPF_ALU_SANITIZE_SRC | \
BPF_ALU_SANITIZE_DST)
struct buffer_head *__bread_gfp(struct block_device *,
sector_t block, unsigned size, gfp_t gfp);
void invalidate_bh_lrus(void);
+void invalidate_bh_lrus_cpu(int cpu);
+bool has_bh_in_lru(int cpu, void *dummy);
struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
void free_buffer_head(struct buffer_head * bh);
void unlock_buffer(struct buffer_head *bh);
static inline void invalidate_inode_buffers(struct inode *inode) {}
static inline int remove_inode_buffers(struct inode *inode) { return 1; }
static inline int sync_mapping_buffers(struct address_space *mapping) { return 0; }
+static inline void invalidate_bh_lrus_cpu(int cpu) {}
+static inline bool has_bh_in_lru(int cpu, void *dummy) { return 0; }
#define buffer_heads_over_limit 0
#endif /* CONFIG_BLOCK */
unsigned int order_per_bit,
const char *name,
struct cma **res_cma);
-extern struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
+extern struct page *cma_alloc(struct cma *cma, unsigned long count, unsigned int align,
bool no_warn);
-extern bool cma_release(struct cma *cma, const struct page *pages, unsigned int count);
+extern bool cma_release(struct cma *cma, const struct page *pages, unsigned long count);
extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data);
#endif
}
#ifdef CONFIG_COMPACTION
-extern int sysctl_compact_memory;
extern unsigned int sysctl_compaction_proactiveness;
extern int sysctl_compaction_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos);
__diag_push(); \
__diag_ignore(GCC, 8, "-Wattribute-alias", \
"Type aliasing is used to sanitize syscall arguments");\
- asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)); \
asmlinkage long compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) \
__attribute__((alias(__stringify(__se_compat_sys##name)))); \
ALLOW_ERROR_INJECTION(compat_sys##name, ERRNO); \
/* SPDX-License-Identifier: GPL-2.0-or-later */
-/* -*- mode: c; c-basic-offset: 8; -*-
- * vim: noexpandtab sw=8 ts=8 sts=0:
- *
+/*
* configfs.h - definitions for the device driver filesystem
*
* Based on sysfs:
CPUHP_AP_PERF_X86_RAPL_ONLINE,
CPUHP_AP_PERF_X86_CQM_ONLINE,
CPUHP_AP_PERF_X86_CSTATE_ONLINE,
+ CPUHP_AP_PERF_X86_IDXD_ONLINE,
CPUHP_AP_PERF_S390_CF_ONLINE,
CPUHP_AP_PERF_S390_CFD_ONLINE,
CPUHP_AP_PERF_S390_SF_ONLINE,
* Williams, Ross N., ross<at>ross.net
* (see URL http://www.ross.net/crc/download/crc_v3.txt).
*/
-u8 crc8(const u8 table[CRC8_TABLE_SIZE], u8 *pdata, size_t nbytes, u8 crc);
+u8 crc8(const u8 table[CRC8_TABLE_SIZE], const u8 *pdata, size_t nbytes, u8 crc);
#endif /* __CRC8_H_ */
groups_free(group_info); \
} while (0)
-extern struct group_info init_groups;
#ifdef CONFIG_MULTIUSER
extern struct group_info *groups_alloc(int);
extern void groups_free(struct group_info *);
return 0;
}
-static inline void delayacct_set_flag(int flag)
+static inline void delayacct_set_flag(struct task_struct *p, int flag)
{
- if (current->delays)
- current->delays->flags |= flag;
+ if (p->delays)
+ p->delays->flags |= flag;
}
-static inline void delayacct_clear_flag(int flag)
+static inline void delayacct_clear_flag(struct task_struct *p, int flag)
{
- if (current->delays)
- current->delays->flags &= ~flag;
+ if (p->delays)
+ p->delays->flags &= ~flag;
}
static inline void delayacct_tsk_init(struct task_struct *tsk)
static inline void delayacct_blkio_start(void)
{
- delayacct_set_flag(DELAYACCT_PF_BLKIO);
+ delayacct_set_flag(current, DELAYACCT_PF_BLKIO);
if (current->delays)
__delayacct_blkio_start();
}
{
if (p->delays)
__delayacct_blkio_end(p);
- delayacct_clear_flag(DELAYACCT_PF_BLKIO);
+ delayacct_clear_flag(p, DELAYACCT_PF_BLKIO);
}
static inline int delayacct_add_tsk(struct taskstats *d,
}
#else
-static inline void delayacct_set_flag(int flag)
+static inline void delayacct_set_flag(struct task_struct *p, int flag)
{}
-static inline void delayacct_clear_flag(int flag)
+static inline void delayacct_clear_flag(struct task_struct *p, int flag)
{}
static inline void delayacct_init(void)
{}
gfp_t gfp);
void (*free_pages)(struct device *dev, size_t size, struct page *vaddr,
dma_addr_t dma_handle, enum dma_data_direction dir);
+ struct sg_table *(*alloc_noncontiguous)(struct device *dev, size_t size,
+ enum dma_data_direction dir, gfp_t gfp,
+ unsigned long attrs);
+ void (*free_noncontiguous)(struct device *dev, size_t size,
+ struct sg_table *sgt, enum dma_data_direction dir);
int (*mmap)(struct device *, struct vm_area_struct *,
void *, dma_addr_t, size_t, unsigned long attrs);
}
#endif /* CONFIG_DMA_DECLARE_COHERENT */
+/*
+ * This is the actual return value from the ->alloc_noncontiguous method.
+ * The users of the DMA API should only care about the sg_table, but to make
+ * the DMA-API internal vmaping and freeing easier we stash away the page
+ * array as well (except for the fallback case). This can go away any time,
+ * e.g. when a vmap-variant that takes a scatterlist comes along.
+ */
+struct dma_sgt_handle {
+ struct sg_table sgt;
+ struct page **pages;
+};
+#define sgt_handle(sgt) \
+ container_of((sgt), struct dma_sgt_handle, sgt)
+
int dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
void *cpu_addr, dma_addr_t dma_addr, size_t size,
unsigned long attrs);
{
debug_dma_mapping_error(dev, dma_addr);
- if (dma_addr == DMA_MAPPING_ERROR)
+ if (unlikely(dma_addr == DMA_MAPPING_ERROR))
return -ENOMEM;
return 0;
}
size_t dma_max_mapping_size(struct device *dev);
bool dma_need_sync(struct device *dev, dma_addr_t dma_addr);
unsigned long dma_get_merge_boundary(struct device *dev);
+struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size,
+ enum dma_data_direction dir, gfp_t gfp, unsigned long attrs);
+void dma_free_noncontiguous(struct device *dev, size_t size,
+ struct sg_table *sgt, enum dma_data_direction dir);
+void *dma_vmap_noncontiguous(struct device *dev, size_t size,
+ struct sg_table *sgt);
+void dma_vunmap_noncontiguous(struct device *dev, void *vaddr);
+int dma_mmap_noncontiguous(struct device *dev, struct vm_area_struct *vma,
+ size_t size, struct sg_table *sgt);
#else /* CONFIG_HAS_DMA */
static inline dma_addr_t dma_map_page_attrs(struct device *dev,
struct page *page, size_t offset, size_t size,
{
return 0;
}
+static inline struct sg_table *dma_alloc_noncontiguous(struct device *dev,
+ size_t size, enum dma_data_direction dir, gfp_t gfp,
+ unsigned long attrs)
+{
+ return NULL;
+}
+static inline void dma_free_noncontiguous(struct device *dev, size_t size,
+ struct sg_table *sgt, enum dma_data_direction dir)
+{
+}
+static inline void *dma_vmap_noncontiguous(struct device *dev, size_t size,
+ struct sg_table *sgt)
+{
+ return NULL;
+}
+static inline void dma_vunmap_noncontiguous(struct device *dev, void *vaddr)
+{
+}
+static inline int dma_mmap_noncontiguous(struct device *dev,
+ struct vm_area_struct *vma, size_t size, struct sg_table *sgt)
+{
+ return -EINVAL;
+}
#endif /* CONFIG_HAS_DMA */
struct page *dma_alloc_pages(struct device *dev, size_t size,
dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp);
void dma_free_pages(struct device *dev, size_t size, struct page *page,
dma_addr_t dma_handle, enum dma_data_direction dir);
+int dma_mmap_pages(struct device *dev, struct vm_area_struct *vma,
+ size_t size, struct page *page);
static inline void *dma_alloc_noncoherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp)
static inline void *dma_alloc_coherent(struct device *dev, size_t size,
dma_addr_t *dma_handle, gfp_t gfp)
{
-
return dma_alloc_attrs(dev, size, dma_handle, gfp,
(gfp & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0);
}
unsigned char alloc_type[MAX_ACTIVE_LOGS];
/* SIT and NAT version bitmap */
- unsigned char sit_nat_version_bitmap[1];
+ unsigned char sit_nat_version_bitmap[];
} __packed;
#define CP_CHKSUM_OFFSET 4092 /* default chksum offset in checkpoint */
/* Expect random access pattern */
#define FMODE_RANDOM ((__force fmode_t)0x1000)
-/* File is huge (eg. /dev/kmem): treat loff_t as unsigned */
+/* File is huge (eg. /dev/mem): treat loff_t as unsigned */
#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000)
/* File is opened with O_PATH; almost nothing can be done with it */
* @i_mmap: Tree of private and shared mappings.
* @i_mmap_rwsem: Protects @i_mmap and @i_mmap_writable.
* @nrpages: Number of page entries, protected by the i_pages lock.
- * @nrexceptional: Shadow or DAX entries, protected by the i_pages lock.
* @writeback_index: Writeback starts here.
* @a_ops: Methods.
* @flags: Error bits and flags (AS_*).
struct rb_root_cached i_mmap;
struct rw_semaphore i_mmap_rwsem;
unsigned long nrpages;
- unsigned long nrexceptional;
pgoff_t writeback_index;
const struct address_space_operations *a_ops;
unsigned long flags;
/* }}}1 */
#endif /* GENL_MAGIC_FUNC_H */
-/* vim: set foldmethod=marker foldlevel=1 nofoldenable : */
/* }}}1 */
#endif /* GENL_MAGIC_STRUCT_H */
-/* vim: set foldmethod=marker nofoldenable : */
/*
* We get the zone list from the current node and the gfp_mask.
- * This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones.
+ * This zone list contains a maximum of MAX_NUMNODES*MAX_NR_ZONES zones.
* There are two zonelists per node, one for all zones with memory and
* one containing just zones from the node the zonelist belongs to.
*
extern struct page *alloc_contig_pages(unsigned long nr_pages, gfp_t gfp_mask,
int nid, nodemask_t *nodemask);
#endif
-void free_contig_range(unsigned long pfn, unsigned int nr_pages);
+void free_contig_range(unsigned long pfn, unsigned long nr_pages);
#ifdef CONFIG_CMA
/* CMA stuff */
/**
* @valid_mask:
*
- * If not %NULL holds bitmask of GPIOs which are valid to be included
+ * If not %NULL, holds bitmask of GPIOs which are valid to be included
* in IRQ domain of the chip.
*/
unsigned long *valid_mask;
* output.
*
* A gpio_chip can help platforms abstract various sources of GPIOs so
- * they can all be accessed through a common programing interface.
+ * they can all be accessed through a common programming interface.
* Example sources would be SOC controllers, FPGAs, multifunction
* chips, dedicated GPIO expanders, and so on.
*
/**
* @valid_mask:
*
- * If not %NULL holds bitmask of GPIOs which are valid to be used
+ * If not %NULL, holds bitmask of GPIOs which are valid to be used
* from the chip.
*/
unsigned long *valid_mask;
#if defined(CONFIG_OF_GPIO)
/*
- * If CONFIG_OF is enabled, then all GPIO controllers described in the
- * device tree automatically may have an OF translation
+ * If CONFIG_OF_GPIO is enabled, then all GPIO controllers described in
+ * the device tree automatically may have an OF translation
*/
/**
* for GPIOs will fail rudely.
*
* gpiochip_add_data() must only be called after gpiolib initialization,
- * ie after core_initcall().
+ * i.e. after core_initcall().
*
* If gc->base is negative, this requests dynamic assignment of
* a range of valid GPIOs.
kunmap_local(to);
}
+static inline void memzero_page(struct page *page, size_t offset, size_t len)
+{
+ char *addr = kmap_atomic(page);
+ memset(addr + offset, 0, len);
+ kunmap_atomic(addr);
+}
+
#endif /* _LINUX_HIGHMEM_H */
TRANSPARENT_HUGEPAGE_DEFRAG_REQ_MADV_FLAG,
TRANSPARENT_HUGEPAGE_DEFRAG_KHUGEPAGED_FLAG,
TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG,
-#ifdef CONFIG_DEBUG_VM
- TRANSPARENT_HUGEPAGE_DEBUG_COW_FLAG,
-#endif
};
struct kobject;
#include <linux/kref.h>
#include <linux/pgtable.h>
#include <linux/gfp.h>
+#include <linux/userfaultfd_k.h>
struct ctl_table;
struct user_struct;
unsigned long hugetlb_total_pages(void);
vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, unsigned int flags);
+#ifdef CONFIG_USERFAULTFD
int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte,
struct vm_area_struct *dst_vma,
unsigned long dst_addr,
unsigned long src_addr,
+ enum mcopy_atomic_mode mode,
struct page **pagep);
+#endif /* CONFIG_USERFAULTFD */
bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
struct vm_area_struct *vma,
vm_flags_t vm_flags);
extern struct mutex *hugetlb_fault_mutex_table;
u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx);
-pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud);
+pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long addr, pud_t *pud);
struct address_space *hugetlb_page_mapping_lock_write(struct page *hpage);
/* arch callbacks */
-pte_t *huge_pte_alloc(struct mm_struct *mm,
+pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, unsigned long sz);
pte_t *huge_pte_offset(struct mm_struct *mm,
unsigned long addr, unsigned long sz);
unsigned long address, unsigned long end, pgprot_t newprot);
bool is_hugetlb_entry_migration(pte_t pte);
+void hugetlb_unshare_all_pmds(struct vm_area_struct *vma);
#else /* !CONFIG_HUGETLB_PAGE */
BUG();
}
+#ifdef CONFIG_USERFAULTFD
static inline int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
pte_t *dst_pte,
struct vm_area_struct *dst_vma,
unsigned long dst_addr,
unsigned long src_addr,
+ enum mcopy_atomic_mode mode,
struct page **pagep)
{
BUG();
return 0;
}
+#endif /* CONFIG_USERFAULTFD */
static inline pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr,
unsigned long sz)
return 0;
}
+static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { }
+
#endif /* !CONFIG_HUGETLB_PAGE */
/*
* hugepages at page global directory. If arch support
#define HSTATE_NAME_LEN 32
/* Defines one hugetlb page size */
struct hstate {
+ struct mutex resize_lock;
int next_nid_to_alloc;
int next_nid_to_free;
unsigned int order;
struct hstate *hstate;
};
+int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list);
struct page *alloc_huge_page(struct vm_area_struct *vma,
unsigned long addr, int avoid_reserve);
struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
#else /* CONFIG_HUGETLB_PAGE */
struct hstate {};
+static inline int isolate_or_dissolve_huge_page(struct page *page,
+ struct list_head *list)
+{
+ return -ENOMEM;
+}
+
static inline struct page *alloc_huge_page(struct vm_area_struct *vma,
unsigned long addr,
int avoid_reserve)
}
#endif
+bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr);
+
+#ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
+/*
+ * ARCHes with special requirements for evicting HUGETLB backing TLB entries can
+ * implement this.
+ */
+#define flush_hugetlb_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end)
+#endif
+
#endif /* _LINUX_HUGETLB_H */
extern struct files_struct init_files;
extern struct fs_struct init_fs;
extern struct nsproxy init_nsproxy;
-extern struct group_info init_groups;
extern struct cred init_cred;
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
#ifdef CONFIG_BLK_DEV_INITRD
extern void __init reserve_initrd_mem(void);
+extern void wait_for_initramfs(void);
#else
static inline void __init reserve_initrd_mem(void) {}
+static inline void wait_for_initramfs(void) {}
#endif
extern phys_addr_t phys_initrd_start;
struct inode *io_inode; /* file being written to */
size_t io_size; /* size of the extent */
loff_t io_offset; /* offset in the file */
- void *io_private; /* file system private data */
struct bio *io_bio; /* bio being built */
struct bio io_inline_bio; /* MUST BE LAST! */
};
void iomap_finish_ioends(struct iomap_ioend *ioend, int error);
void iomap_ioend_try_merge(struct iomap_ioend *ioend,
- struct list_head *more_ioends,
- void (*merge_private)(struct iomap_ioend *ioend,
- struct iomap_ioend *next));
+ struct list_head *more_ioends);
void iomap_sort_ioends(struct list_head *ioend_list);
int iomap_writepage(struct page *page, struct writeback_control *wbc,
struct iomap_writepage_ctx *wpc,
irq_hw_number_t hwirq_max, int direct_max,
const struct irq_domain_ops *ops,
void *host_data);
-struct irq_domain *irq_domain_add_simple(struct device_node *of_node,
- unsigned int size,
- unsigned int first_irq,
- const struct irq_domain_ops *ops,
- void *host_data);
+struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode,
+ unsigned int size,
+ unsigned int first_irq,
+ const struct irq_domain_ops *ops,
+ void *host_data);
struct irq_domain *irq_domain_add_legacy(struct device_node *of_node,
unsigned int size,
unsigned int first_irq,
return d;
}
+static inline struct irq_domain *irq_domain_add_simple(struct device_node *of_node,
+ unsigned int size,
+ unsigned int first_irq,
+ const struct irq_domain_ops *ops,
+ void *host_data)
+{
+ return irq_domain_create_simple(of_node_to_fwnode(of_node), size, first_irq, ops, host_data);
+}
+
/**
* irq_domain_add_linear() - Allocate and register a linear revmap irq_domain.
* @of_node: pointer to interrupt controller's device tree node.
*/
#define IS_ENABLED(option) __or(IS_BUILTIN(option), IS_MODULE(option))
-/*
- * IF_ENABLED(CONFIG_FOO, ptr) evaluates to (ptr) if CONFIG_FOO is set to 'y'
- * or 'm', NULL otherwise.
- */
-#define IF_ENABLED(option, ptr) (IS_ENABLED(option) ? (ptr) : NULL)
-
#endif /* __LINUX_KCONFIG_H */
#define _LINUX_KERNEL_H
#include <stdarg.h>
+#include <linux/align.h>
#include <linux/limits.h>
#include <linux/linkage.h>
#include <linux/stddef.h>
*/
#define REPEAT_BYTE(x) ((~0ul / 0xff) * (x))
-/* @a is a power of 2 value */
-#define ALIGN(x, a) __ALIGN_KERNEL((x), (a))
-#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a))
-#define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask))
-#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a)))
-#define PTR_ALIGN_DOWN(p, a) ((typeof(p))ALIGN_DOWN((unsigned long)(p), (a)))
-#define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0)
-
/* generic data direction definitions */
#define READ 0
#define WRITE 1
*/
#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]) + __must_be_array(arr))
+#define PTR_IF(cond, ptr) ((cond) ? (ptr) : NULL)
+
#define u64_to_user_ptr(x) ( \
{ \
typecheck(u64, (x)); \
};
/*
- * Bitmap of shrinker::id corresponding to memcg-aware shrinkers,
- * which have elements charged to this memcg.
+ * Bitmap and deferred work of shrinker::id corresponding to memcg-aware
+ * shrinkers, which have elements charged to this memcg.
*/
-struct memcg_shrinker_map {
+struct shrinker_info {
struct rcu_head rcu;
- unsigned long map[];
+ atomic_long_t *nr_deferred;
+ unsigned long *map;
};
/*
struct mem_cgroup_reclaim_iter iter;
- struct memcg_shrinker_map __rcu *shrinker_map;
+ struct shrinker_info __rcu *shrinker_info;
struct rb_node tree_node; /* RB tree node */
unsigned long usage_in_excess;/* Set to the value by which */
return false;
}
-extern int memcg_expand_shrinker_maps(int new_id);
-
-extern void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
- int nid, int shrinker_id);
+int alloc_shrinker_info(struct mem_cgroup *memcg);
+void free_shrinker_info(struct mem_cgroup *memcg);
+void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id);
+void reparent_shrinker_deferred(struct mem_cgroup *memcg);
#else
#define mem_cgroup_sockets_enabled 0
static inline void mem_cgroup_sk_alloc(struct sock *sk) { };
return false;
}
-static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
- int nid, int shrinker_id)
+static inline void set_shrinker_bit(struct mem_cgroup *memcg,
+ int nid, int shrinker_id)
{
}
#endif
int online_type; /* for passing data to online routine */
int nid; /* NID for this memory block */
struct device dev;
+ /*
+ * Number of vmemmap pages. These pages
+ * lay at the beginning of the memory block.
+ */
+ unsigned long nr_vmemmap_pages;
};
int arch_get_memory_phys_device(unsigned long start_pfn);
#else
extern int register_memory_notifier(struct notifier_block *nb);
extern void unregister_memory_notifier(struct notifier_block *nb);
-int create_memory_block_devices(unsigned long start, unsigned long size);
+int create_memory_block_devices(unsigned long start, unsigned long size,
+ unsigned long vmemmap_pages);
void remove_memory_block_devices(unsigned long start, unsigned long size);
extern void memory_dev_init(void);
extern int memory_notify(unsigned long val, void *v);
*/
#define MHP_MERGE_RESOURCE ((__force mhp_t)BIT(0))
+/*
+ * We want memmap (struct page array) to be self contained.
+ * To do so, we will use the beginning of the hot-added range to build
+ * the page tables for the memmap array that describes the entire range.
+ * Only selected architectures support it with SPARSE_VMEMMAP.
+ */
+#define MHP_MEMMAP_ON_MEMORY ((__force mhp_t)BIT(1))
+
/*
* Extended parameters for memory hotplug:
* altmap: alternative allocator for memmap array (optional)
extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages);
extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);
extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
+extern void adjust_present_page_count(struct zone *zone, long nr_pages);
/* VM interface that may be used by firmware interface */
+extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
+ struct zone *zone);
+extern void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages);
extern int online_pages(unsigned long pfn, unsigned long nr_pages,
- int online_type, int nid);
+ struct zone *zone);
extern struct zone *test_pages_in_a_zone(unsigned long start_pfn,
unsigned long end_pfn);
extern void __offline_isolated_pages(unsigned long start_pfn,
extern int arch_create_linear_mapping(int nid, u64 start, u64 size,
struct mhp_params *params);
void arch_remove_linear_mapping(u64 start, u64 size);
+extern bool mhp_supports_memmap_on_memory(unsigned long size);
#endif /* CONFIG_MEMORY_HOTPLUG */
#endif /* __LINUX_MEMORY_HOTPLUG_H */
* @alloc: track pages consumed, private to vmemmap_populate()
*/
struct vmem_altmap {
- const unsigned long base_pfn;
+ unsigned long base_pfn;
const unsigned long end_pfn;
const unsigned long reserve;
unsigned long free;
MR_MEMPOLICY_MBIND,
MR_NUMA_MISPLACED,
MR_CONTIG_RANGE,
+ MR_LONGTERM_PIN,
MR_TYPES
};
unsigned long private, enum migrate_mode mode, int reason);
extern struct page *alloc_migration_target(struct page *page, unsigned long private);
extern int isolate_movable_page(struct page *page, isolate_mode_t mode);
-extern void putback_movable_page(struct page *page);
-extern void migrate_prep(void);
-extern void migrate_prep_local(void);
extern void migrate_page_states(struct page *newpage, struct page *page);
extern void migrate_page_copy(struct page *newpage, struct page *page);
extern int migrate_huge_page_move_mapping(struct address_space *mapping,
static inline int isolate_movable_page(struct page *page, isolate_mode_t mode)
{ return -EBUSY; }
-static inline int migrate_prep(void) { return -ENOSYS; }
-static inline int migrate_prep_local(void) { return -ENOSYS; }
-
static inline void migrate_page_states(struct page *newpage, struct page *page)
{
}
* embedding these tags into addresses that point to these memory regions, and
* checking that the memory and the pointer tags match on memory accesses)
* redefine this macro to strip tags from pointers.
- * It's defined as noop for arcitectures that don't support memory tagging.
+ * It's defined as noop for architectures that don't support memory tagging.
*/
#ifndef untagged_addr
#define untagged_addr(addr) (addr)
# define VM_GROWSUP VM_NONE
#endif
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
+# define VM_UFFD_MINOR_BIT 37
+# define VM_UFFD_MINOR BIT(VM_UFFD_MINOR_BIT) /* UFFD minor faults */
+#else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
+# define VM_UFFD_MINOR VM_NONE
+#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */
+
/* Bits set in the VMA until the stack is in its final location */
#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ)
}
#endif
+static inline bool is_zone_movable_page(const struct page *page)
+{
+ return page_zonenum(page) == ZONE_MOVABLE;
+}
+
#ifdef CONFIG_DEV_PAGEMAP_OPS
void free_devmap_managed_page(struct page *page);
DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
}
#endif
+/* MIGRATE_CMA and ZONE_MOVABLE do not allow pin pages */
+#ifdef CONFIG_MIGRATION
+static inline bool is_pinnable_page(struct page *page)
+{
+ return !(is_zone_movable_page(page) || is_migrate_cma_page(page)) ||
+ is_zero_pfn(page_to_pfn(page));
+}
+#else
+static inline bool is_pinnable_page(struct page *page)
+{
+ return true;
+}
+#endif
+
static inline void set_page_zone(struct page *page, enum zone_type zone)
{
page->flags &= ~(ZONES_MASK << ZONES_PGSHIFT);
* pageblocks to MIGRATE_CMA which can be done by
* __free_pageblock_cma() function. What is important though
* is that a range of pageblocks must be aligned to
- * MAX_ORDER_NR_PAGES should biggest page be bigger then
+ * MAX_ORDER_NR_PAGES should biggest page be bigger than
* a single pageblock.
*/
MIGRATE_CMA,
* to increase the number of THP/huge pages. Notable special cases are:
*
* 1. Pinned pages: (long-term) pinning of movable pages might
- * essentially turn such pages unmovable. Memory offlining might
- * retry a long time.
+ * essentially turn such pages unmovable. Therefore, we do not allow
+ * pinning long-term pages in ZONE_MOVABLE. When pages are pinned and
+ * faulted, they come from the right zone right away. However, it is
+ * still possible that address space already has pages in
+ * ZONE_MOVABLE at the time when pages are pinned (i.e. user has
+ * touches that memory before pinning). In such case we migrate them
+ * to a different zone. When migration fails - pinning fails.
* 2. memblock allocations: kernelcore/movablecore setups might create
* situations where ZONE_MOVABLE contains unmovable allocations
* after boot. Memory offlining and allocations fail early.
* techniques might use alloc_contig_range() to hide previously
* exposed pages from the buddy again (e.g., to implement some sort
* of memory unplug in virtio-mem).
+ * 6. ZERO_PAGE(0), kernelcore/movablecore setups might create
+ * situations where ZERO_PAGE(0) which is allocated differently
+ * on different platforms may end up in a movable zone. ZERO_PAGE(0)
+ * cannot be migrated.
+ * 7. Memory-hotplug: when using memmap_on_memory and onlining the
+ * memory to the MOVABLE zone, the vmemmap pages are also placed in
+ * such zone. Such pages cannot be really moved around as they are
+ * self-stored in the range, but they are treated as movable when
+ * the range they describe is about to be offlined.
*
* In general, no unmovable allocations that degrade memory offlining
* should end up in ZONE_MOVABLE. Allocators (like alloc_contig_range())
#ifdef CONFIG_MEMORY_HOTPLUG
void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn);
-#ifdef CONFIG_MEMORY_HOTREMOVE
void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn);
#endif
-#endif
static inline struct mem_section *__pfn_to_section(unsigned long pfn)
{
/*
* The arch hooks to setup up msi irqs. Default functions are implemented
* as weak symbols so that they /can/ be overriden by architecture specific
- * code if needed. These hooks must be enabled by the architecture or by
- * drivers which depend on them via msi_controller based MSI handling.
+ * code if needed. These hooks can only be enabled by the architecture.
*
* If CONFIG_PCI_MSI_ARCH_FALLBACKS is not selected they are replaced by
* stubs with warnings.
void arch_teardown_msi_irq(unsigned int irq);
int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
void arch_teardown_msi_irqs(struct pci_dev *dev);
-void default_teardown_msi_irqs(struct pci_dev *dev);
#else
static inline int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
{
void arch_restore_msi_irqs(struct pci_dev *dev);
void default_restore_msi_irqs(struct pci_dev *dev);
-struct msi_controller {
- struct module *owner;
- struct device *dev;
- struct device_node *of_node;
- struct list_head list;
-
- int (*setup_irq)(struct msi_controller *chip, struct pci_dev *dev,
- struct msi_desc *desc);
- int (*setup_irqs)(struct msi_controller *chip, struct pci_dev *dev,
- int nvec, int type);
- void (*teardown_irq)(struct msi_controller *chip, unsigned int irq);
-};
-
#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
#include <linux/irqhandler.h>
const struct arpt_replace *repl,
const struct nf_hook_ops *ops);
void arpt_unregister_table(struct net *net, const char *name);
-void arpt_unregister_table_pre_exit(struct net *net, const char *name,
- const struct nf_hook_ops *ops);
+void arpt_unregister_table_pre_exit(struct net *net, const char *name);
extern unsigned int arpt_do_table(struct sk_buff *skb,
const struct nf_hook_state *state,
struct xt_table *table);
#define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1)
#define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4)
#define FATTR4_WORD2_CLONE_BLKSIZE (1UL << 13)
+#define FATTR4_WORD2_CHANGE_ATTR_TYPE (1UL << 15)
#define FATTR4_WORD2_SECURITY_LABEL (1UL << 16)
#define FATTR4_WORD2_MODE_UMASK (1UL << 17)
#define FATTR4_WORD2_XATTR_SUPPORT (1UL << 18)
} u;
};
+enum nfs4_change_attr_type {
+ NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR = 0,
+ NFS4_CHANGE_TYPE_IS_VERSION_COUNTER = 1,
+ NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS = 2,
+ NFS4_CHANGE_TYPE_IS_TIME_METADATA = 3,
+ NFS4_CHANGE_TYPE_IS_UNDEFINED = 4,
+};
+
/*
* Options for setxattr. These match the flags for setxattr(2).
*/
BIT(13) /* Deferred cache invalidation */
#define NFS_INO_INVALID_BLOCKS BIT(14) /* cached blocks are invalid */
#define NFS_INO_INVALID_XATTR BIT(15) /* xattrs are invalid */
+#define NFS_INO_INVALID_NLINK BIT(16) /* cached nlinks is invalid */
+#define NFS_INO_INVALID_MODE BIT(17) /* cached mode is invalid */
#define NFS_INO_INVALID_ATTR (NFS_INO_INVALID_CHANGE \
| NFS_INO_INVALID_CTIME \
| NFS_INO_INVALID_MTIME \
| NFS_INO_INVALID_SIZE \
+ | NFS_INO_INVALID_NLINK \
+ | NFS_INO_INVALID_MODE \
| NFS_INO_INVALID_OTHER) /* inode metadata is invalid */
/*
extern int nfs_permission(struct user_namespace *, struct inode *, int);
extern int nfs_open(struct inode *, struct file *);
extern int nfs_attribute_cache_expired(struct inode *inode);
-extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
+extern int nfs_revalidate_inode(struct inode *inode, unsigned long flags);
extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
extern int nfs_clear_invalid_mapping(struct address_space *mapping);
extern bool nfs_mapping_need_revalidate_inode(struct inode *inode);
#define NFS_MOUNT_WRITE_EAGER 0x01000000
#define NFS_MOUNT_WRITE_WAIT 0x02000000
+ unsigned int fattr_valid; /* Valid attributes */
unsigned int caps; /* server capabilities */
unsigned int rsize; /* read size */
unsigned int rpages; /* read size (in pages) */
#define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */
#define NFS_OPTION_MIGRATION 0x00000002 /* - NFSv4 migration enabled */
+ enum nfs4_change_attr_type
+ change_attr_type;/* Description of change attribute */
+
struct nfs_fsid fsid;
__u64 maxfilesize; /* maximum file size */
struct timespec64 time_delta; /* smallest time granularity */
#define NFS_CAP_SYMLINKS (1U << 2)
#define NFS_CAP_ACLS (1U << 3)
#define NFS_CAP_ATOMIC_OPEN (1U << 4)
-/* #define NFS_CAP_CHANGE_ATTR (1U << 5) */
#define NFS_CAP_LGOPEN (1U << 5)
-#define NFS_CAP_FILEID (1U << 6)
-#define NFS_CAP_MODE (1U << 7)
-#define NFS_CAP_NLINK (1U << 8)
-#define NFS_CAP_OWNER (1U << 9)
-#define NFS_CAP_OWNER_GROUP (1U << 10)
-#define NFS_CAP_ATIME (1U << 11)
-#define NFS_CAP_CTIME (1U << 12)
-#define NFS_CAP_MTIME (1U << 13)
#define NFS_CAP_POSIX_LOCK (1U << 14)
#define NFS_CAP_UIDGID_NOMAP (1U << 15)
#define NFS_CAP_STATEID_NFSV41 (1U << 16)
#define NFS_DEF_FILE_IO_SIZE (4096U)
#define NFS_MIN_FILE_IO_SIZE (1024U)
+#define NFS_BITMASK_SZ 3
+
struct nfs4_string {
unsigned int len;
char *data;
__u32 layouttype[NFS_MAX_LAYOUT_TYPES]; /* supported pnfs layout driver */
__u32 blksize; /* preferred pnfs io block size */
__u32 clone_blksize; /* granularity of a CLONE operation */
+ enum nfs4_change_attr_type
+ change_attr_type; /* Info about change attr */
__u32 xattr_support; /* User xattrs supported */
};
struct nfs_seqid * seqid;
fmode_t fmode;
u32 share_access;
- u32 * bitmask;
+ const u32 * bitmask;
+ u32 bitmask_store[NFS_BITMASK_SZ];
struct nfs4_layoutreturn_args *lr_args;
};
struct nfs4_sequence_args seq_args;
const struct nfs_fh *fhandle;
const nfs4_stateid *stateid;
- u32 * bitmask;
+ const u32 *bitmask;
+ u32 bitmask_store[NFS_BITMASK_SZ];
struct nfs4_layoutreturn_args *lr_args;
};
union {
unsigned int replen; /* used by read */
struct {
- u32 * bitmask; /* used by write */
+ const u32 * bitmask; /* used by write */
+ u32 bitmask_store[NFS_BITMASK_SZ]; /* used by write */
enum nfs3_stable_how stable; /* used by write */
};
};
struct pagevec;
+static inline bool mapping_empty(struct address_space *mapping)
+{
+ return xa_empty(&mapping->i_pages);
+}
+
/*
* Bits in mapping->flags.
*/
extern const struct pci_ecam_ops xgene_v1_pcie_ecam_ops; /* APM X-Gene PCIe v1 */
extern const struct pci_ecam_ops xgene_v2_pcie_ecam_ops; /* APM X-Gene PCIe v2.x */
extern const struct pci_ecam_ops al_pcie_ops; /* Amazon Annapurna Labs PCIe */
+extern const struct pci_ecam_ops tegra194_pcie_ops; /* Tegra194 PCIe */
#endif
#if IS_ENABLED(CONFIG_PCI_HOST_COMMON)
u32 saved_config_space[16]; /* Config space saved at suspend time */
struct hlist_head saved_cap_space;
- struct bin_attribute *rom_attr; /* Attribute descriptor for sysfs ROM entry */
int rom_attr_enabled; /* Display of ROM attribute enabled? */
struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */
struct bin_attribute *res_attr_wc[DEVICE_COUNT_RESOURCE]; /* sysfs file for WC mapping of resources */
int (*map_irq)(const struct pci_dev *, u8, u8);
void (*release_fn)(struct pci_host_bridge *);
void *release_data;
- struct msi_controller *msi;
unsigned int ignore_reset_delay:1; /* For entire hierarchy */
unsigned int no_ext_tags:1; /* No Extended Tags */
unsigned int native_aer:1; /* OS may use PCIe AER */
unsigned int native_dpc:1; /* OS may use PCIe DPC */
unsigned int preserve_config:1; /* Preserve FW resource setup */
unsigned int size_windows:1; /* Enable root bus sizing */
+ unsigned int msi_domain:1; /* Bridge wants MSI domain */
/* Resource alignment requirements */
resource_size_t (*align_resource)(struct pci_dev *dev,
struct resource busn_res; /* Bus numbers routed to this bus */
struct pci_ops *ops; /* Configuration access functions */
- struct msi_controller *msi; /* MSI controller */
void *sysdata; /* Hook for sys-specific extension */
struct proc_dir_entry *procdir; /* Directory entry in /proc/bus/pci */
u16 pci_find_ext_capability(struct pci_dev *dev, int cap);
u16 pci_find_next_ext_capability(struct pci_dev *dev, u16 pos, int cap);
struct pci_bus *pci_find_next_bus(const struct pci_bus *from);
+u16 pci_find_vsec_capability(struct pci_dev *dev, u16 vendor, int cap);
u64 pci_get_dsn(struct pci_dev *dev);
int __must_check pcim_set_mwi(struct pci_dev *dev);
int pci_try_set_mwi(struct pci_dev *dev);
void pci_clear_mwi(struct pci_dev *dev);
+void pci_disable_parity(struct pci_dev *dev);
void pci_intx(struct pci_dev *dev, int enable);
bool pci_check_and_mask_intx(struct pci_dev *dev);
bool pci_check_and_unmask_intx(struct pci_dev *dev);
/* Vital Product Data routines */
ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf);
ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf);
-int pci_set_vpd_size(struct pci_dev *dev, size_t len);
/* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */
resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx);
/**
* pci_vpd_find_tag - Locates the Resource Data Type tag provided
* @buf: Pointer to buffered vpd data
- * @off: The offset into the buffer at which to begin the search
* @len: The length of the vpd buffer
* @rdt: The Resource Data Type to search for
*
* Returns the index where the Resource Data Type was found or
* -ENOENT otherwise.
*/
-int pci_vpd_find_tag(const u8 *buf, unsigned int off, unsigned int len, u8 rdt);
+int pci_vpd_find_tag(const u8 *buf, unsigned int len, u8 rdt);
/**
* pci_vpd_find_info_keyword - Locates an information field keyword in the VPD
/*
* On some architectures hardware does not set page access bit when accessing
- * memory page, it is responsibilty of software setting this bit. It brings
+ * memory page, it is responsibility of software setting this bit. It brings
* out extra page fault penalty to track page access bit. For optimization page
* access bit can be set during all page fault flow on these arches.
* To be differentiate with macro pte_mkyoung, this macro is used on platforms
/*
* This is an implementation of pmdp_establish() that is only suitable for an
* architecture that doesn't have hardware dirty/accessed bits. In this case we
- * can't race with CPU which sets these bits and non-atomic aproach is fine.
+ * can't race with CPU which sets these bits and non-atomic approach is fine.
*/
static inline pmd_t generic_pmdp_establish(struct vm_area_struct *vma,
unsigned long address, pmd_t *pmdp, pmd_t pmd)
* updates, but to prevent any updates it may make from being lost.
*
* This does not protect against other software modifications of the
- * pte; the appropriate pte lock must be held over the transation.
+ * pte; the appropriate pte lock must be held over the transaction.
*
* Note that this interface is intended to be batchable, meaning that
* ptep_modify_prot_commit may not actually update the pte, but merely
extern void untrack_pfn_moved(struct vm_area_struct *vma);
#endif
+#ifdef CONFIG_MMU
#ifdef __HAVE_COLOR_ZERO_PAGE
static inline int is_zero_pfn(unsigned long pfn)
{
return zero_pfn;
}
#endif
+#else
+static inline int is_zero_pfn(unsigned long pfn)
+{
+ return 0;
+}
+
+static inline unsigned long my_zero_pfn(unsigned long addr)
+{
+ return 0;
+}
+#endif /* CONFIG_MMU */
#ifdef CONFIG_MMU
*
* The complete check uses is_pmd_migration_entry() in linux/swapops.h
* But using that requires moving current function and pmd_trans_unstable()
- * to linux/swapops.h to resovle dependency, which is too much code move.
+ * to linux/swapops.h to resolve dependency, which is too much code move.
*
* !pmd_present() is equivalent to is_pmd_migration_entry() currently,
* because !pmd_present() pages can only be under migration not swapped
* out.
*
- * pmd_none() is preseved for future condition checks on pmd migration
+ * pmd_none() is preserved for future condition checks on pmd migration
* entries and not confusing with this function name, although it is
* redundant with !pmd_present().
*/
#define PD_STATUS_EVENT_SOP_DISC_DONE BIT(0)
#define PD_STATUS_EVENT_SOP_PRIME_DISC_DONE BIT(1)
+#define PD_STATUS_EVENT_HARD_RESET BIT(2)
struct ec_params_typec_status {
uint8_t port;
ssize_t (*proc_read)(struct file *, char __user *, size_t, loff_t *);
ssize_t (*proc_read_iter)(struct kiocb *, struct iov_iter *);
ssize_t (*proc_write)(struct file *, const char __user *, size_t, loff_t *);
+ /* mandatory unless nonseekable_open() or equivalent is used */
loff_t (*proc_lseek)(struct file *, loff_t, int);
int (*proc_release)(struct inode *, struct file *);
__poll_t (*proc_poll)(struct file *, struct poll_table_struct *);
#define KVM_PROFILING 4
struct proc_dir_entry;
-struct pt_regs;
struct notifier_block;
#if defined(CONFIG_PROFILING) && defined(CONFIG_PROC_FS)
int profile_event_register(enum profile_type, struct notifier_block * n);
int profile_event_unregister(enum profile_type, struct notifier_block * n);
-struct pt_regs;
-
#else
#define prof_on 0
* pwm_get_state() - retrieve the current PWM state
* @pwm: PWM device
* @state: state to fill with the current PWM state
+ *
+ * The returned PWM state represents the state that was applied by a previous call to
+ * pwm_apply_state(). Drivers may have to slightly tweak that state before programming it to
+ * hardware. If pwm_apply_state() was never called, this returns either the current hardware
+ * state (if supported) or the default settings.
*/
static inline void pwm_get_state(const struct pwm_device *pwm,
struct pwm_state *state)
int pwm_set_chip_data(struct pwm_device *pwm, void *data);
void *pwm_get_chip_data(struct pwm_device *pwm);
-int pwmchip_add_with_polarity(struct pwm_chip *chip,
- enum pwm_polarity polarity);
int pwmchip_add(struct pwm_chip *chip);
int pwmchip_remove(struct pwm_chip *chip);
struct pwm_device *pwm_request_from_chip(struct pwm_chip *chip,
/**
* struct rproc_mem_entry - memory entry descriptor
* @va: virtual address
+ * @is_iomem: io memory
* @dma: dma address
* @len: length, in bytes
* @da: device address
*/
struct rproc_mem_entry {
void *va;
+ bool is_iomem;
dma_addr_t dma;
size_t len;
u32 da;
* @start: power on the device and boot it
* @stop: power off the device
* @attach: attach to a device that his already powered up
+ * @detach: detach from a device, leaving it powered up
* @kick: kick a virtqueue (virtqueue id given as a parameter)
* @da_to_va: optional platform hook to perform address translations
* @parse_fw: parse firmware to extract information (e.g. resource table)
* RSC_HANDLED if resource was handled, RSC_IGNORED if not handled and a
* negative value on error
* @load_rsc_table: load resource table from firmware image
- * @find_loaded_rsc_table: find the loaded resouce table
+ * @find_loaded_rsc_table: find the loaded resource table from firmware image
+ * @get_loaded_rsc_table: get resource table installed in memory
+ * by external entity
* @load: load firmware to memory, where the remote processor
* expects to find it
* @sanity_check: sanity check the fw image
int (*start)(struct rproc *rproc);
int (*stop)(struct rproc *rproc);
int (*attach)(struct rproc *rproc);
+ int (*detach)(struct rproc *rproc);
void (*kick)(struct rproc *rproc, int vqid);
- void * (*da_to_va)(struct rproc *rproc, u64 da, size_t len);
+ void * (*da_to_va)(struct rproc *rproc, u64 da, size_t len, bool *is_iomem);
int (*parse_fw)(struct rproc *rproc, const struct firmware *fw);
int (*handle_rsc)(struct rproc *rproc, u32 rsc_type, void *rsc,
int offset, int avail);
struct resource_table *(*find_loaded_rsc_table)(
struct rproc *rproc, const struct firmware *fw);
+ struct resource_table *(*get_loaded_rsc_table)(
+ struct rproc *rproc, size_t *size);
int (*load)(struct rproc *rproc, const struct firmware *fw);
int (*sanity_check)(struct rproc *rproc, const struct firmware *fw);
u64 (*get_boot_addr)(struct rproc *rproc, const struct firmware *fw);
* @RPROC_RUNNING: device is up and running
* @RPROC_CRASHED: device has crashed; need to start recovery
* @RPROC_DELETED: device is deleted
+ * @RPROC_ATTACHED: device has been booted by another entity and the core
+ * has attached to it
* @RPROC_DETACHED: device has been booted by another entity and waiting
* for the core to attach to it
* @RPROC_LAST: just keep this one at the end
RPROC_RUNNING = 2,
RPROC_CRASHED = 3,
RPROC_DELETED = 4,
- RPROC_DETACHED = 5,
- RPROC_LAST = 6,
+ RPROC_ATTACHED = 5,
+ RPROC_DETACHED = 6,
+ RPROC_LAST = 7,
};
/**
* @recovery_disabled: flag that state if recovery was disabled
* @max_notifyid: largest allocated notify id.
* @table_ptr: pointer to the resource table in effect
+ * @clean_table: copy of the resource table without modifications. Used
+ * when a remote processor is attached or detached from the core
* @cached_table: copy of the resource table
* @table_sz: size of @cached_table
* @has_iommu: flag to indicate if remote processor is behind an MMU
* @auto_boot: flag to indicate if remote processor should be auto-started
- * @autonomous: true if an external entity has booted the remote processor
* @dump_segments: list of segments in the firmware
* @nb_vdev: number of vdev currently handled by rproc
* @char_dev: character device of the rproc
bool recovery_disabled;
int max_notifyid;
struct resource_table *table_ptr;
+ struct resource_table *clean_table;
struct resource_table *cached_table;
size_t table_sz;
bool has_iommu;
bool auto_boot;
- bool autonomous;
struct list_head dump_segments;
int nb_vdev;
u8 elf_class;
int rproc_boot(struct rproc *rproc);
void rproc_shutdown(struct rproc *rproc);
+int rproc_detach(struct rproc *rproc);
int rproc_set_firmware(struct rproc *rproc, const char *fw_name);
void rproc_report_crash(struct rproc *rproc, enum rproc_crash_type type);
void rproc_coredump_using_sections(struct rproc *rproc);
return 0;
}
+static inline int reset_control_rearm(struct reset_control *rstc)
+{
+ return 0;
+}
+
static inline int reset_control_assert(struct reset_control *rstc)
{
return 0;
#include <linux/mutex.h>
#include <linux/poll.h>
#include <linux/rpmsg/byteorder.h>
-
-#define RPMSG_ADDR_ANY 0xFFFFFFFF
+#include <uapi/linux/rpmsg.h>
struct rpmsg_device;
struct rpmsg_endpoint;
#define PF_SWAPWRITE 0x00800000 /* Allowed to write to swap */
#define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_mask */
#define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */
-#define PF_MEMALLOC_NOCMA 0x10000000 /* All allocation request will have _GFP_MOVABLE cleared */
+#define PF_MEMALLOC_PIN 0x10000000 /* Allocation context constrained to zones which allow long term pinning. */
#define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
#define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */
* Applies per-task gfp context to the given allocation flags.
* PF_MEMALLOC_NOIO implies GFP_NOIO
* PF_MEMALLOC_NOFS implies GFP_NOFS
+ * PF_MEMALLOC_PIN implies !GFP_MOVABLE
*/
static inline gfp_t current_gfp_context(gfp_t flags)
{
unsigned int pflags = READ_ONCE(current->flags);
- if (unlikely(pflags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS))) {
+ if (unlikely(pflags & (PF_MEMALLOC_NOIO | PF_MEMALLOC_NOFS | PF_MEMALLOC_PIN))) {
/*
* NOIO implies both NOIO and NOFS and it is a weaker context
* so always make sure it makes precedence
flags &= ~(__GFP_IO | __GFP_FS);
else if (pflags & PF_MEMALLOC_NOFS)
flags &= ~__GFP_FS;
+
+ if (pflags & PF_MEMALLOC_PIN)
+ flags &= ~__GFP_MOVABLE;
}
return flags;
}
current->flags = (current->flags & ~PF_MEMALLOC) | flags;
}
-#ifdef CONFIG_CMA
-static inline unsigned int memalloc_nocma_save(void)
+static inline unsigned int memalloc_pin_save(void)
{
- unsigned int flags = current->flags & PF_MEMALLOC_NOCMA;
+ unsigned int flags = current->flags & PF_MEMALLOC_PIN;
- current->flags |= PF_MEMALLOC_NOCMA;
+ current->flags |= PF_MEMALLOC_PIN;
return flags;
}
-static inline void memalloc_nocma_restore(unsigned int flags)
-{
- current->flags = (current->flags & ~PF_MEMALLOC_NOCMA) | flags;
-}
-#else
-static inline unsigned int memalloc_nocma_save(void)
-{
- return 0;
-}
-
-static inline void memalloc_nocma_restore(unsigned int flags)
+static inline void memalloc_pin_restore(unsigned int flags)
{
+ current->flags = (current->flags & ~PF_MEMALLOC_PIN) | flags;
}
-#endif
#ifdef CONFIG_MEMCG
DECLARE_PER_CPU(struct mem_cgroup *, int_active_memcg);
#define DEFAULT_SEEKS 2 /* A good number if you don't know better. */
/* Flags */
-#define SHRINKER_NUMA_AWARE (1 << 0)
-#define SHRINKER_MEMCG_AWARE (1 << 1)
+#define SHRINKER_REGISTERED (1 << 0)
+#define SHRINKER_NUMA_AWARE (1 << 1)
+#define SHRINKER_MEMCG_AWARE (1 << 2)
/*
* It just makes sense when the shrinker is also MEMCG_AWARE for now,
* non-MEMCG_AWARE shrinker should not have this flag set.
*/
-#define SHRINKER_NONSLAB (1 << 2)
+#define SHRINKER_NONSLAB (1 << 3)
extern int prealloc_shrinker(struct shrinker *shrinker);
extern void register_shrinker_prepared(struct shrinker *shrinker);
int smp_call_function_single_async(int cpu, call_single_data_t *csd);
+/*
+ * Cpus stopping functions in panic. All have default weak definitions.
+ * Architecture-dependent code may override them.
+ */
+void panic_smp_self_stop(void);
+void nmi_panic_self_stop(struct pt_regs *regs);
+void crash_smp_send_stop(void);
+
/*
* Call a function on all processors
*/
struct rpc_task * snd_task; /* Task blocked in send */
struct list_head xmit_queue; /* Send queue */
+ atomic_long_t xmit_queuelen;
struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
#include <linux/sched.h>
#include <linux/node.h>
#include <linux/fs.h>
+#include <linux/pagemap.h>
#include <linux/atomic.h>
#include <linux/page-flags.h>
+#include <uapi/linux/mempolicy.h>
#include <asm/page.h>
struct notifier_block;
extern void lru_note_cost_page(struct page *);
extern void lru_cache_add(struct page *);
extern void mark_page_accessed(struct page *);
+
+extern atomic_t lru_disable_count;
+
+static inline bool lru_cache_disabled(void)
+{
+ return atomic_read(&lru_disable_count);
+}
+
+static inline void lru_cache_enable(void)
+{
+ atomic_dec(&lru_disable_count);
+}
+
+extern void lru_cache_disable(void);
extern void lru_add_drain(void);
extern void lru_add_drain_cpu(int cpu);
extern void lru_add_drain_cpu_zone(struct zone *zone);
#define node_reclaim_mode 0
#endif
+static inline bool node_reclaim_enabled(void)
+{
+ /* Is any node_reclaim_mode bit set? */
+ return node_reclaim_mode & (RECLAIM_ZONE|RECLAIM_WRITE|RECLAIM_UNMAP);
+}
+
extern void check_move_unevictable_pages(struct pagevec *pvec);
extern int kswapd_run(int nid);
#include <linux/init.h>
#include <linux/types.h>
#include <linux/limits.h>
+#include <linux/spinlock.h>
struct device;
struct page;
extern void swiotlb_init(int verbose);
int swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose);
-extern unsigned long swiotlb_nr_tbl(void);
unsigned long swiotlb_size_or_default(void);
extern int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs);
extern int swiotlb_late_init_with_default_size(size_t default_size);
extern void __init swiotlb_update_mem_attributes(void);
-/*
- * Enumeration for sync targets
- */
-enum dma_sync_target {
- SYNC_FOR_CPU = 0,
- SYNC_FOR_DEVICE = 1,
-};
-
phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t phys,
size_t mapping_size, size_t alloc_size,
enum dma_data_direction dir, unsigned long attrs);
extern void swiotlb_tbl_unmap_single(struct device *hwdev,
phys_addr_t tlb_addr,
size_t mapping_size,
- size_t alloc_size,
enum dma_data_direction dir,
unsigned long attrs);
-extern void swiotlb_tbl_sync_single(struct device *hwdev,
- phys_addr_t tlb_addr,
- size_t size, enum dma_data_direction dir,
- enum dma_sync_target target);
-
+void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
+ size_t size, enum dma_data_direction dir);
+void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr,
+ size_t size, enum dma_data_direction dir);
dma_addr_t swiotlb_map(struct device *dev, phys_addr_t phys,
size_t size, enum dma_data_direction dir, unsigned long attrs);
#ifdef CONFIG_SWIOTLB
extern enum swiotlb_force swiotlb_force;
-extern phys_addr_t io_tlb_start, io_tlb_end;
+
+/**
+ * struct io_tlb_mem - IO TLB Memory Pool Descriptor
+ *
+ * @start: The start address of the swiotlb memory pool. Used to do a quick
+ * range check to see if the memory was in fact allocated by this
+ * API.
+ * @end: The end address of the swiotlb memory pool. Used to do a quick
+ * range check to see if the memory was in fact allocated by this
+ * API.
+ * @nslabs: The number of IO TLB blocks (in groups of 64) between @start and
+ * @end. This is command line adjustable via setup_io_tlb_npages.
+ * @used: The number of used IO TLB block.
+ * @list: The free list describing the number of free entries available
+ * from each index.
+ * @index: The index to start searching in the next round.
+ * @orig_addr: The original address corresponding to a mapped entry.
+ * @alloc_size: Size of the allocated buffer.
+ * @lock: The lock to protect the above data structures in the map and
+ * unmap calls.
+ * @debugfs: The dentry to debugfs.
+ * @late_alloc: %true if allocated using the page allocator
+ */
+struct io_tlb_mem {
+ phys_addr_t start;
+ phys_addr_t end;
+ unsigned long nslabs;
+ unsigned long used;
+ unsigned int index;
+ spinlock_t lock;
+ struct dentry *debugfs;
+ bool late_alloc;
+ struct io_tlb_slot {
+ phys_addr_t orig_addr;
+ size_t alloc_size;
+ unsigned int list;
+ } slots[];
+};
+extern struct io_tlb_mem *io_tlb_default_mem;
static inline bool is_swiotlb_buffer(phys_addr_t paddr)
{
- return paddr >= io_tlb_start && paddr < io_tlb_end;
+ struct io_tlb_mem *mem = io_tlb_default_mem;
+
+ return mem && paddr >= mem->start && paddr < mem->end;
}
void __init swiotlb_exit(void);
unsigned int swiotlb_max_segment(void);
size_t swiotlb_max_mapping_size(struct device *dev);
bool is_swiotlb_active(void);
-void __init swiotlb_adjust_size(unsigned long new_size);
+void __init swiotlb_adjust_size(unsigned long size);
#else
#define swiotlb_force SWIOTLB_NO_FORCE
static inline bool is_swiotlb_buffer(phys_addr_t paddr)
return false;
}
-static inline void swiotlb_adjust_size(unsigned long new_size)
+static inline void swiotlb_adjust_size(unsigned long size)
{
}
#endif /* CONFIG_SWIOTLB */
struct thermal_cooling_device {
int id;
- char type[THERMAL_NAME_LENGTH];
+ char *type;
struct device device;
struct device_node *np;
void *devdata;
int thermal_zone_get_slope(struct thermal_zone_device *tz);
int thermal_zone_get_offset(struct thermal_zone_device *tz);
-void thermal_notify_framework(struct thermal_zone_device *, int);
int thermal_zone_device_enable(struct thermal_zone_device *tz);
int thermal_zone_device_disable(struct thermal_zone_device *tz);
void thermal_zone_device_critical(struct thermal_zone_device *tz);
struct thermal_zone_device *tz)
{ return -ENODEV; }
-static inline void thermal_notify_framework(struct thermal_zone_device *tz,
- int trip)
-{ }
-
static inline int thermal_zone_device_enable(struct thermal_zone_device *tz)
{ return -ENODEV; }
#include <linux/mm.h>
#include <asm-generic/pgtable_uffd.h>
+/* The set of all possible UFFD-related VM flags. */
+#define __VM_UFFD_FLAGS (VM_UFFD_MISSING | VM_UFFD_WP | VM_UFFD_MINOR)
+
/*
* CAREFUL: Check include/uapi/asm-generic/fcntl.h when defining
* new flags, since they might collide with O_* ones. We want
extern vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason);
+/*
+ * The mode of operation for __mcopy_atomic and its helpers.
+ *
+ * This is almost an implementation detail (mcopy_atomic below doesn't take this
+ * as a parameter), but it's exposed here because memory-kind-specific
+ * implementations (e.g. hugetlbfs) need to know the mode of operation.
+ */
+enum mcopy_atomic_mode {
+ /* A normal copy_from_user into the destination range. */
+ MCOPY_ATOMIC_NORMAL,
+ /* Don't copy; map the destination range to the zero page. */
+ MCOPY_ATOMIC_ZEROPAGE,
+ /* Just install pte(s) with the existing page(s) in the page cache. */
+ MCOPY_ATOMIC_CONTINUE,
+};
+
extern ssize_t mcopy_atomic(struct mm_struct *dst_mm, unsigned long dst_start,
unsigned long src_start, unsigned long len,
bool *mmap_changing, __u64 mode);
unsigned long dst_start,
unsigned long len,
bool *mmap_changing);
+extern ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long dst_start,
+ unsigned long len, bool *mmap_changing);
extern int mwriteprotect_range(struct mm_struct *dst_mm,
unsigned long start, unsigned long len,
bool enable_wp, bool *mmap_changing);
return vma->vm_userfaultfd_ctx.ctx == vm_ctx.ctx;
}
+/*
+ * Never enable huge pmd sharing on some uffd registered vmas:
+ *
+ * - VM_UFFD_WP VMAs, because write protect information is per pgtable entry.
+ *
+ * - VM_UFFD_MINOR VMAs, because otherwise we would never get minor faults for
+ * VMAs which share huge pmds. (If you have two mappings to the same
+ * underlying pages, and fault in the non-UFFD-registered one with a write,
+ * with huge pmd sharing this would *also* setup the second UFFD-registered
+ * mapping, and we'd not get minor faults.)
+ */
+static inline bool uffd_disable_huge_pmd_share(struct vm_area_struct *vma)
+{
+ return vma->vm_flags & (VM_UFFD_WP | VM_UFFD_MINOR);
+}
+
static inline bool userfaultfd_missing(struct vm_area_struct *vma)
{
return vma->vm_flags & VM_UFFD_MISSING;
return vma->vm_flags & VM_UFFD_WP;
}
+static inline bool userfaultfd_minor(struct vm_area_struct *vma)
+{
+ return vma->vm_flags & VM_UFFD_MINOR;
+}
+
static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma,
pte_t pte)
{
static inline bool userfaultfd_armed(struct vm_area_struct *vma)
{
- return vma->vm_flags & (VM_UFFD_MISSING | VM_UFFD_WP);
+ return vma->vm_flags & __VM_UFFD_FLAGS;
}
extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *);
return false;
}
+static inline bool userfaultfd_minor(struct vm_area_struct *vma)
+{
+ return false;
+}
+
static inline bool userfaultfd_pte_wp(struct vm_area_struct *vma,
pte_t pte)
{
#include <linux/vhost_iotlb.h>
/**
- * vDPA callback definition.
+ * struct vdpa_calllback - vDPA callback definition.
* @callback: interrupt callback function
* @private: the data passed to the callback function
*/
};
/**
- * vDPA notification area
+ * struct vdpa_notification_area - vDPA notification area
* @addr: base address of the notification area
* @size: size of the notification area
*/
};
/**
- * vDPA vq_state definition
+ * struct vdpa_vq_state - vDPA vq_state definition
* @avail_index: available index
*/
struct vdpa_vq_state {
struct vdpa_mgmt_dev;
/**
- * vDPA device - representation of a vDPA device
+ * struct vdpa_device - representation of a vDPA device
* @dev: underlying device
* @dma_dev: the actual device that is performing DMA
* @config: the configuration ops for this device.
};
/**
- * vDPA IOVA range - the IOVA range support by the device
+ * struct vdpa_iova_range - the IOVA range support by the device
* @first: start of the IOVA range
* @last: end of the IOVA range
*/
};
/**
- * vDPA_config_ops - operations for configuring a vDPA device.
+ * struct vdpa_config_ops - operations for configuring a vDPA device.
* Note: vDPA device drivers are required to implement all of the
* operations unless it is mentioned to be optional in the following
* list.
* @set_status: Set the device status
* @vdev: vdpa device
* @status: virtio device status
+ * @get_config_size: Get the size of the configuration space
+ * @vdev: vdpa device
+ * Returns size_t: configuration size
* @get_config: Read from device specific configuration space
* @vdev: vdpa device
* @offset: offset from the beginning of
u32 (*get_vendor_id)(struct vdpa_device *vdev);
u8 (*get_status)(struct vdpa_device *vdev);
void (*set_status)(struct vdpa_device *vdev, u8 status);
+ size_t (*get_config_size)(struct vdpa_device *vdev);
void (*get_config)(struct vdpa_device *vdev, unsigned int offset,
void *buf, unsigned int len);
void (*set_config)(struct vdpa_device *vdev, unsigned int offset,
void _vdpa_unregister_device(struct vdpa_device *vdev);
/**
- * vdpa_driver - operations for a vDPA driver
+ * struct vdpa_driver - operations for a vDPA driver
* @driver: underlying device driver
* @probe: the function to call when a device is found. Returns 0 or -errno.
* @remove: the function to call when a device is removed.
}
/**
- * vdpa_mgmtdev_ops - vdpa device ops
- * @dev_add: Add a vdpa device using alloc and register
- * @mdev: parent device to use for device addition
- * @name: name of the new vdpa device
- * Driver need to add a new device using _vdpa_register_device()
- * after fully initializing the vdpa device. Driver must return 0
- * on success or appropriate error code.
- * @dev_del: Remove a vdpa device using unregister
- * @mdev: parent device to use for device removal
- * @dev: vdpa device to remove
- * Driver need to remove the specified device by calling
- * _vdpa_unregister_device().
+ * struct vdpa_mgmtdev_ops - vdpa device ops
+ * @dev_add: Add a vdpa device using alloc and register
+ * @mdev: parent device to use for device addition
+ * @name: name of the new vdpa device
+ * Driver need to add a new device using _vdpa_register_device()
+ * after fully initializing the vdpa device. Driver must return 0
+ * on success or appropriate error code.
+ * @dev_del: Remove a vdpa device using unregister
+ * @mdev: parent device to use for device removal
+ * @dev: vdpa device to remove
+ * Driver need to remove the specified device by calling
+ * _vdpa_unregister_device().
*/
struct vdpa_mgmtdev_ops {
int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name);
void __iomem *device;
/* Base of vq notifications (non-legacy mode). */
void __iomem *notify_base;
+ /* Physical base of vq notifications */
+ resource_size_t notify_pa;
/* Where to read and clear interrupt */
u8 __iomem *isr;
u16 vp_modern_get_queue_size(struct virtio_pci_modern_device *mdev,
u16 idx);
u16 vp_modern_get_num_queues(struct virtio_pci_modern_device *mdev);
-u16 vp_modern_get_queue_notify_off(struct virtio_pci_modern_device *mdev,
- u16 idx);
-void __iomem *vp_modern_map_capability(struct virtio_pci_modern_device *mdev, int off,
- size_t minlen,
- u32 align,
- u32 start, u32 size,
- size_t *len);
+void __iomem * vp_modern_map_vq_notify(struct virtio_pci_modern_device *mdev,
+ u16 index, resource_size_t *pa);
int vp_modern_probe(struct virtio_pci_modern_device *mdev);
void vp_modern_remove(struct virtio_pci_modern_device *mdev);
#endif
#endif
#ifdef CONFIG_HUGETLB_PAGE
HTLB_BUDDY_PGALLOC, HTLB_BUDDY_PGALLOC_FAIL,
+#endif
+#ifdef CONFIG_CMA
+ CMA_ALLOC_SUCCESS,
+ CMA_ALLOC_FAIL,
#endif
UNEVICTABLE_PGCULLED, /* culled to noreclaim list */
UNEVICTABLE_PGSCANNED, /* scanned for reclaimability */
#ifdef CONFIG_SWAP
SWAP_RA,
SWAP_RA_HIT,
+#endif
+#ifdef CONFIG_X86
+ DIRECT_MAP_LEVEL2_SPLIT,
+ DIRECT_MAP_LEVEL3_SPLIT,
#endif
NR_VM_EVENT_ITEMS
};
*
* If IS_ENABLED(CONFIG_KASAN_VMALLOC), VM_KASAN is set on a vm_struct after
* shadow memory has been mapped. It's used to handle allocation errors so that
- * we don't try to poision shadow on free if it was never allocated.
+ * we don't try to poison shadow on free if it was never allocated.
*
* Otherwise, VM_KASAN is set for kasan_module_alloc() allocations and used to
* determine which allocations need the module shadow freed.
/*
* Maximum alignment for ioremap() regions.
- * Can be overriden by arch-specific value.
+ * Can be overridden by arch-specific value.
*/
#ifndef IOREMAP_MAX_ORDER
#define IOREMAP_MAX_ORDER (7 + PAGE_SHIFT) /* 128 pages */
}
#endif
-/* for /dev/kmem */
+/* for /proc/kcore */
extern long vread(char *buf, char *addr, unsigned long count);
-extern long vwrite(char *buf, char *addr, unsigned long count);
/*
* Internals. Dont't use..
/* IOTLB for this vring */
struct vhost_iotlb *iotlb;
+ /* spinlock to synchronize IOTLB accesses */
+ spinlock_t *iotlb_lock;
+
/* The function to call to notify the guest about added buffers */
void (*notify)(struct vringh *);
};
kiov->iov = NULL;
}
+static inline size_t vringh_kiov_length(struct vringh_kiov *kiov)
+{
+ size_t len = 0;
+ int i;
+
+ for (i = kiov->i; i < kiov->used; i++)
+ len += kiov->iov[i].iov_len;
+
+ return len;
+}
+
+void vringh_kiov_advance(struct vringh_kiov *kiov, size_t len);
+
int vringh_getdesc_kern(struct vringh *vrh,
struct vringh_kiov *riov,
struct vringh_kiov *wiov,
#if IS_REACHABLE(CONFIG_VHOST_IOTLB)
-void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb);
+void vringh_set_iotlb(struct vringh *vrh, struct vhost_iotlb *iotlb,
+ spinlock_t *iotlb_lock);
int vringh_init_iotlb(struct vringh *vrh, u64 features,
unsigned int num, bool weak_barriers,
SCTP_CMD_ASSOC_FAILED, /* Handle association failure. */
SCTP_CMD_DISCARD_PACKET, /* Discard the whole packet. */
SCTP_CMD_GEN_SHUTDOWN, /* Generate a SHUTDOWN chunk. */
- SCTP_CMD_UPDATE_ASSOC, /* Update association information. */
SCTP_CMD_PURGE_OUTQUEUE, /* Purge all data waiting to be sent. */
SCTP_CMD_SETUP_T2, /* Hi-level, setup T2-shutdown parms. */
SCTP_CMD_RTO_PENDING, /* Set transport's rto_pending. */
#include <linux/types.h>
#include <linux/tracepoint.h>
-TRACE_EVENT(cma_alloc,
+DECLARE_EVENT_CLASS(cma_alloc_class,
- TP_PROTO(unsigned long pfn, const struct page *page,
- unsigned int count, unsigned int align),
+ TP_PROTO(const char *name, unsigned long pfn, const struct page *page,
+ unsigned long count, unsigned int align),
- TP_ARGS(pfn, page, count, align),
+ TP_ARGS(name, pfn, page, count, align),
TP_STRUCT__entry(
+ __string(name, name)
__field(unsigned long, pfn)
__field(const struct page *, page)
- __field(unsigned int, count)
+ __field(unsigned long, count)
__field(unsigned int, align)
),
TP_fast_assign(
+ __assign_str(name, name);
__entry->pfn = pfn;
__entry->page = page;
__entry->count = count;
__entry->align = align;
),
- TP_printk("pfn=%lx page=%p count=%u align=%u",
+ TP_printk("name=%s pfn=%lx page=%p count=%lu align=%u",
+ __get_str(name),
__entry->pfn,
__entry->page,
__entry->count,
TRACE_EVENT(cma_release,
- TP_PROTO(unsigned long pfn, const struct page *page,
- unsigned int count),
+ TP_PROTO(const char *name, unsigned long pfn, const struct page *page,
+ unsigned long count),
- TP_ARGS(pfn, page, count),
+ TP_ARGS(name, pfn, page, count),
TP_STRUCT__entry(
+ __string(name, name)
__field(unsigned long, pfn)
__field(const struct page *, page)
- __field(unsigned int, count)
+ __field(unsigned long, count)
),
TP_fast_assign(
+ __assign_str(name, name);
__entry->pfn = pfn;
__entry->page = page;
__entry->count = count;
),
- TP_printk("pfn=%lx page=%p count=%u",
+ TP_printk("name=%s pfn=%lx page=%p count=%lu",
+ __get_str(name),
__entry->pfn,
__entry->page,
__entry->count)
);
+TRACE_EVENT(cma_alloc_start,
+
+ TP_PROTO(const char *name, unsigned long count, unsigned int align),
+
+ TP_ARGS(name, count, align),
+
+ TP_STRUCT__entry(
+ __string(name, name)
+ __field(unsigned long, count)
+ __field(unsigned int, align)
+ ),
+
+ TP_fast_assign(
+ __assign_str(name, name);
+ __entry->count = count;
+ __entry->align = align;
+ ),
+
+ TP_printk("name=%s count=%lu align=%u",
+ __get_str(name),
+ __entry->count,
+ __entry->align)
+);
+
+DEFINE_EVENT(cma_alloc_class, cma_alloc_finish,
+
+ TP_PROTO(const char *name, unsigned long pfn, const struct page *page,
+ unsigned long count, unsigned int align),
+
+ TP_ARGS(name, pfn, page, count, align)
+);
+
+DEFINE_EVENT(cma_alloc_class, cma_alloc_busy_retry,
+
+ TP_PROTO(const char *name, unsigned long pfn, const struct page *page,
+ unsigned long count, unsigned int align),
+
+ TP_ARGS(name, pfn, page, count, align)
+);
+
#endif /* _TRACE_CMA_H */
/* This part must be outside protection */
EM( MR_SYSCALL, "syscall_or_cpuset") \
EM( MR_MEMPOLICY_MBIND, "mempolicy_mbind") \
EM( MR_NUMA_MISPLACED, "numa_misplaced") \
- EMe(MR_CONTIG_RANGE, "contig_range")
+ EM( MR_CONTIG_RANGE, "contig_range") \
+ EMe(MR_LONGTERM_PIN, "longterm_pin")
/*
* First define the enums in the above macros to be exported to userspace
__print_symbolic(__entry->mode, MIGRATE_MODE),
__print_symbolic(__entry->reason, MIGRATE_REASON))
);
+
+TRACE_EVENT(mm_migrate_pages_start,
+
+ TP_PROTO(enum migrate_mode mode, int reason),
+
+ TP_ARGS(mode, reason),
+
+ TP_STRUCT__entry(
+ __field(enum migrate_mode, mode)
+ __field(int, reason)
+ ),
+
+ TP_fast_assign(
+ __entry->mode = mode;
+ __entry->reason = reason;
+ ),
+
+ TP_printk("mode=%s reason=%s",
+ __print_symbolic(__entry->mode, MIGRATE_MODE),
+ __print_symbolic(__entry->reason, MIGRATE_REASON))
+);
+
#endif /* _TRACE_MIGRATE_H */
/* This part must be outside protection */
#define IF_HAVE_VM_SOFTDIRTY(flag,name)
#endif
+#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR
+# define IF_HAVE_UFFD_MINOR(flag, name) {flag, name},
+#else
+# define IF_HAVE_UFFD_MINOR(flag, name)
+#endif
+
#define __def_vmaflag_names \
{VM_READ, "read" }, \
{VM_WRITE, "write" }, \
{VM_MAYSHARE, "mayshare" }, \
{VM_GROWSDOWN, "growsdown" }, \
{VM_UFFD_MISSING, "uffd_missing" }, \
+IF_HAVE_UFFD_MINOR(VM_UFFD_MINOR, "uffd_minor" ) \
{VM_PFNMAP, "pfnmap" }, \
{VM_DENYWRITE, "denywrite" }, \
{VM_UFFD_WP, "uffd_wp" }, \
), \
TP_ARGS(wc, cid))
+DECLARE_EVENT_CLASS(rpcrdma_mr_completion_class,
+ TP_PROTO(
+ const struct ib_wc *wc,
+ const struct rpc_rdma_cid *cid
+ ),
+
+ TP_ARGS(wc, cid),
+
+ TP_STRUCT__entry(
+ __field(u32, cq_id)
+ __field(int, completion_id)
+ __field(unsigned long, status)
+ __field(unsigned int, vendor_err)
+ ),
+
+ TP_fast_assign(
+ __entry->cq_id = cid->ci_queue_id;
+ __entry->completion_id = cid->ci_completion_id;
+ __entry->status = wc->status;
+ if (wc->status)
+ __entry->vendor_err = wc->vendor_err;
+ else
+ __entry->vendor_err = 0;
+ ),
+
+ TP_printk("cq.id=%u mr.id=%d status=%s (%lu/0x%x)",
+ __entry->cq_id, __entry->completion_id,
+ rdma_show_wc_status(__entry->status),
+ __entry->status, __entry->vendor_err
+ )
+);
+
+#define DEFINE_MR_COMPLETION_EVENT(name) \
+ DEFINE_EVENT(rpcrdma_mr_completion_class, name, \
+ TP_PROTO( \
+ const struct ib_wc *wc, \
+ const struct rpc_rdma_cid *cid \
+ ), \
+ TP_ARGS(wc, cid))
+
DECLARE_EVENT_CLASS(rpcrdma_receive_completion_class,
TP_PROTO(
const struct ib_wc *wc,
TP_ARGS(r_xprt),
TP_STRUCT__entry(
- __field(const void *, r_xprt)
__string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt))
),
TP_fast_assign(
- __entry->r_xprt = r_xprt;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
- TP_printk("peer=[%s]:%s r_xprt=%p",
- __get_str(addr), __get_str(port), __entry->r_xprt
+ TP_printk("peer=[%s]:%s",
+ __get_str(addr), __get_str(port)
)
);
TP_ARGS(r_xprt, rc),
TP_STRUCT__entry(
- __field(const void *, r_xprt)
__field(int, rc)
__field(int, connect_status)
__string(addr, rpcrdma_addrstr(r_xprt))
),
TP_fast_assign(
- __entry->r_xprt = r_xprt;
__entry->rc = rc;
__entry->connect_status = r_xprt->rx_ep->re_connect_status;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
- TP_printk("peer=[%s]:%s r_xprt=%p: rc=%d connection status=%d",
- __get_str(addr), __get_str(port), __entry->r_xprt,
+ TP_printk("peer=[%s]:%s rc=%d connection status=%d",
+ __get_str(addr), __get_str(port),
__entry->rc, __entry->connect_status
)
);
__entry->task_id = task->tk_pid;
__entry->client_id = task->tk_client->cl_clid;
- __entry->mr_id = mr->frwr.fr_mr->res.id;
+ __entry->mr_id = mr->mr_ibmr->res.id;
__entry->nents = mr->mr_nents;
__entry->handle = mr->mr_handle;
__entry->length = mr->mr_length;
),
TP_fast_assign(
- __entry->mr_id = mr->frwr.fr_mr->res.id;
+ __entry->mr_id = mr->mr_ibmr->res.id;
__entry->nents = mr->mr_nents;
__entry->handle = mr->mr_handle;
__entry->length = mr->mr_length;
TP_ARGS(r_xprt, delay),
TP_STRUCT__entry(
- __field(const void *, r_xprt)
__field(unsigned long, delay)
__string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt))
),
TP_fast_assign(
- __entry->r_xprt = r_xprt;
__entry->delay = delay;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
- TP_printk("peer=[%s]:%s r_xprt=%p delay=%lu",
- __get_str(addr), __get_str(port), __entry->r_xprt,
- __entry->delay
+ TP_printk("peer=[%s]:%s delay=%lu",
+ __get_str(addr), __get_str(port), __entry->delay
)
);
TP_ARGS(r_xprt, connect, reconnect),
TP_STRUCT__entry(
- __field(const void *, r_xprt)
__field(unsigned long, connect)
__field(unsigned long, reconnect)
__string(addr, rpcrdma_addrstr(r_xprt))
),
TP_fast_assign(
- __entry->r_xprt = r_xprt;
__entry->connect = connect;
__entry->reconnect = reconnect;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
- TP_printk("peer=[%s]:%s r_xprt=%p: connect=%lu reconnect=%lu",
- __get_str(addr), __get_str(port), __entry->r_xprt,
+ TP_printk("peer=[%s]:%s connect=%lu reconnect=%lu",
+ __get_str(addr), __get_str(port),
__entry->connect / HZ, __entry->reconnect / HZ
)
);
-TRACE_EVENT(xprtrdma_qp_event,
- TP_PROTO(
- const struct rpcrdma_ep *ep,
- const struct ib_event *event
- ),
-
- TP_ARGS(ep, event),
-
- TP_STRUCT__entry(
- __field(unsigned long, event)
- __string(name, event->device->name)
- __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6))
- __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6))
- ),
-
- TP_fast_assign(
- const struct rdma_cm_id *id = ep->re_id;
-
- __entry->event = event->event;
- __assign_str(name, event->device->name);
- memcpy(__entry->srcaddr, &id->route.addr.src_addr,
- sizeof(struct sockaddr_in6));
- memcpy(__entry->dstaddr, &id->route.addr.dst_addr,
- sizeof(struct sockaddr_in6));
- ),
-
- TP_printk("%pISpc -> %pISpc device=%s %s (%lu)",
- __entry->srcaddr, __entry->dstaddr, __get_str(name),
- rdma_show_ib_event(__entry->event), __entry->event
- )
-);
-
/**
** Call events
**/
TP_ARGS(r_xprt, count),
TP_STRUCT__entry(
- __field(const void *, r_xprt)
__string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt))
__field(unsigned int, count)
),
TP_fast_assign(
- __entry->r_xprt = r_xprt;
__entry->count = count;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
- TP_printk("peer=[%s]:%s r_xprt=%p: created %u MRs",
- __get_str(addr), __get_str(port), __entry->r_xprt,
- __entry->count
+ TP_printk("peer=[%s]:%s created %u MRs",
+ __get_str(addr), __get_str(port), __entry->count
)
);
TP_ARGS(r_xprt, count, status),
TP_STRUCT__entry(
- __field(const void *, r_xprt)
+ __field(u32, cq_id)
__field(unsigned int, count)
__field(int, status)
__field(int, posted)
),
TP_fast_assign(
- __entry->r_xprt = r_xprt;
+ const struct rpcrdma_ep *ep = r_xprt->rx_ep;
+
+ __entry->cq_id = ep->re_attr.recv_cq->res.id;
__entry->count = count;
__entry->status = status;
- __entry->posted = r_xprt->rx_ep->re_receive_count;
+ __entry->posted = ep->re_receive_count;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
- TP_printk("peer=[%s]:%s r_xprt=%p: %u new recvs, %d active (rc %d)",
- __get_str(addr), __get_str(port), __entry->r_xprt,
+ TP_printk("peer=[%s]:%s cq.id=%d %u new recvs, %d active (rc %d)",
+ __get_str(addr), __get_str(port), __entry->cq_id,
__entry->count, __entry->posted, __entry->status
)
);
DEFINE_RECEIVE_COMPLETION_EVENT(xprtrdma_wc_receive);
DEFINE_COMPLETION_EVENT(xprtrdma_wc_send);
-DEFINE_COMPLETION_EVENT(xprtrdma_wc_fastreg);
-DEFINE_COMPLETION_EVENT(xprtrdma_wc_li);
-DEFINE_COMPLETION_EVENT(xprtrdma_wc_li_wake);
-DEFINE_COMPLETION_EVENT(xprtrdma_wc_li_done);
+DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_fastreg);
+DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_li);
+DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_li_wake);
+DEFINE_MR_COMPLETION_EVENT(xprtrdma_wc_li_done);
TRACE_EVENT(xprtrdma_frwr_alloc,
TP_PROTO(
),
TP_fast_assign(
- __entry->mr_id = mr->frwr.fr_mr->res.id;
+ __entry->mr_id = mr->mr_ibmr->res.id;
__entry->rc = rc;
),
),
TP_fast_assign(
- __entry->mr_id = mr->frwr.fr_mr->res.id;
+ __entry->mr_id = mr->mr_ibmr->res.id;
__entry->nents = mr->mr_nents;
__entry->handle = mr->mr_handle;
__entry->length = mr->mr_length;
),
TP_fast_assign(
- __entry->mr_id = mr->frwr.fr_mr->res.id;
+ __entry->mr_id = mr->mr_ibmr->res.id;
__entry->addr = mr->mr_sg->dma_address;
__entry->dir = mr->mr_dir;
__entry->nents = sg_nents;
),
TP_fast_assign(
- __entry->mr_id = mr->frwr.fr_mr->res.id;
+ __entry->mr_id = mr->mr_ibmr->res.id;
__entry->addr = mr->mr_sg->dma_address;
__entry->dir = mr->mr_dir;
__entry->num_mapped = num_mapped;
)
);
+DEFINE_MR_EVENT(fastreg);
DEFINE_MR_EVENT(localinv);
+DEFINE_MR_EVENT(reminv);
DEFINE_MR_EVENT(map);
DEFINE_ANON_MR_EVENT(unmap);
-DEFINE_ANON_MR_EVENT(recycle);
TRACE_EVENT(xprtrdma_dma_maperr,
TP_PROTO(
TP_ARGS(r_xprt, reqs),
TP_STRUCT__entry(
- __field(const void *, r_xprt)
__field(unsigned int, reqs)
__string(addr, rpcrdma_addrstr(r_xprt))
__string(port, rpcrdma_portstr(r_xprt))
),
TP_fast_assign(
- __entry->r_xprt = r_xprt;
__entry->reqs = reqs;
__assign_str(addr, rpcrdma_addrstr(r_xprt));
__assign_str(port, rpcrdma_portstr(r_xprt));
),
- TP_printk("peer=[%s]:%s r_xprt=%p: %u reqs",
- __get_str(addr), __get_str(port),
- __entry->r_xprt, __entry->reqs
+ TP_printk("peer=[%s]:%s %u reqs",
+ __get_str(addr), __get_str(port), __entry->reqs
)
);
__entry->seqno, __entry->status)
);
+TRACE_EVENT(xprt_retransmit,
+ TP_PROTO(
+ const struct rpc_rqst *rqst
+ ),
+
+ TP_ARGS(rqst),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, task_id)
+ __field(unsigned int, client_id)
+ __field(u32, xid)
+ __field(int, ntrans)
+ __field(int, version)
+ __string(progname,
+ rqst->rq_task->tk_client->cl_program->name)
+ __string(procedure,
+ rqst->rq_task->tk_msg.rpc_proc->p_name)
+ ),
+
+ TP_fast_assign(
+ struct rpc_task *task = rqst->rq_task;
+
+ __entry->task_id = task->tk_pid;
+ __entry->client_id = task->tk_client ?
+ task->tk_client->cl_clid : -1;
+ __entry->xid = be32_to_cpu(rqst->rq_xid);
+ __entry->ntrans = rqst->rq_ntrans;
+ __assign_str(progname,
+ task->tk_client->cl_program->name)
+ __entry->version = task->tk_client->cl_vers;
+ __assign_str(procedure, task->tk_msg.rpc_proc->p_name)
+ ),
+
+ TP_printk(
+ "task:%u@%u xid=0x%08x %sv%d %s ntrans=%d",
+ __entry->task_id, __entry->client_id, __entry->xid,
+ __get_str(progname), __entry->version, __get_str(procedure),
+ __entry->ntrans)
+);
+
TRACE_EVENT(xprt_ping,
TP_PROTO(const struct rpc_xprt *xprt, int status),
DEFINE_WRITELOCK_EVENT(reserve_xprt);
DEFINE_WRITELOCK_EVENT(release_xprt);
-DEFINE_WRITELOCK_EVENT(transmit_queued);
DECLARE_EVENT_CLASS(xprt_cong_event,
TP_PROTO(
#define BOND_3AD_STAT_MAX (__BOND_3AD_STAT_MAX - 1)
#endif /* _LINUX_IF_BONDING_H */
-
-/*
- * Local variables:
- * version-control: t
- * kept-new-versions: 5
- * c-indent-level: 8
- * c-basic-offset: 8
- * tab-width: 8
- * End:
- */
-
#define KEXEC_ARCH_MIPS_LE (10 << 16)
#define KEXEC_ARCH_MIPS ( 8 << 16)
#define KEXEC_ARCH_AARCH64 (183 << 16)
+#define KEXEC_ARCH_RISCV (243 << 16)
/* The artificial cap on the number of segments passed to kexec_load. */
#define KEXEC_SEGMENT_MAX 16
#define MPOL_F_MOF (1 << 3) /* this policy wants migrate on fault */
#define MPOL_F_MORON (1 << 4) /* Migrate On protnone Reference On Node */
+/*
+ * These bit locations are exposed in the vm.zone_reclaim_mode sysctl
+ * ABI. New bits are OK, but existing bits can never change.
+ */
+#define RECLAIM_ZONE (1<<0) /* Run shrink_inactive_list on the zone */
+#define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */
+#define RECLAIM_UNMAP (1<<2) /* Unmap pages during reclaim */
#endif /* _UAPI_LINUX_MEMPOLICY_H */
char secctx[SECMARK_SECCTX_MAX];
};
+struct xt_secmark_target_info_v1 {
+ __u8 mode;
+ char secctx[SECMARK_SECCTX_MAX];
+ __u32 secid;
+};
+
#endif /*_XT_SECMARK_H_target */
#define NFS4_MAX_BACK_CHANNEL_OPS 2
#endif /* _UAPI_LINUX_NFS4_H */
-
-/*
- * Local variables:
- * c-basic-offset: 8
- * End:
- */
#include <linux/ioctl.h>
#include <linux/types.h>
+#define RPMSG_ADDR_ANY 0xFFFFFFFF
+
/**
* struct rpmsg_endpoint_info - endpoint info representation
* @name: name of service
- * @src: local address
- * @dst: destination address
+ * @src: local address. To set to RPMSG_ADDR_ANY if not used.
+ * @dst: destination address. To set to RPMSG_ADDR_ANY if not used.
*/
struct rpmsg_endpoint_info {
char name[32];
__u32 dst;
};
+/**
+ * Instantiate a new rmpsg char device endpoint.
+ */
#define RPMSG_CREATE_EPT_IOCTL _IOW(0xb5, 0x1, struct rpmsg_endpoint_info)
+
+/**
+ * Destroy a rpmsg char device endpoint created by the RPMSG_CREATE_EPT_IOCTL.
+ */
#define RPMSG_DESTROY_EPT_IOCTL _IO(0xb5, 0x2)
#endif
SEG6_LOCAL_OIF,
SEG6_LOCAL_BPF,
SEG6_LOCAL_VRFTABLE,
+ SEG6_LOCAL_COUNTERS,
__SEG6_LOCAL_MAX,
};
#define SEG6_LOCAL_MAX (__SEG6_LOCAL_MAX - 1)
#define SEG6_LOCAL_BPF_PROG_MAX (__SEG6_LOCAL_BPF_PROG_MAX - 1)
+/* SRv6 Behavior counters are encoded as netlink attributes guaranteeing the
+ * correct alignment.
+ * Each counter is identified by a different attribute type (i.e.
+ * SEG6_LOCAL_CNT_PACKETS).
+ *
+ * - SEG6_LOCAL_CNT_PACKETS: identifies a counter that counts the number of
+ * packets that have been CORRECTLY processed by an SRv6 Behavior instance
+ * (i.e., packets that generate errors or are dropped are NOT counted).
+ *
+ * - SEG6_LOCAL_CNT_BYTES: identifies a counter that counts the total amount
+ * of traffic in bytes of all packets that have been CORRECTLY processed by
+ * an SRv6 Behavior instance (i.e., packets that generate errors or are
+ * dropped are NOT counted).
+ *
+ * - SEG6_LOCAL_CNT_ERRORS: identifies a counter that counts the number of
+ * packets that have NOT been properly processed by an SRv6 Behavior instance
+ * (i.e., packets that generate errors or are dropped).
+ */
+enum {
+ SEG6_LOCAL_CNT_UNSPEC,
+ SEG6_LOCAL_CNT_PAD, /* pad for 64 bits values */
+ SEG6_LOCAL_CNT_PACKETS,
+ SEG6_LOCAL_CNT_BYTES,
+ SEG6_LOCAL_CNT_ERRORS,
+ __SEG6_LOCAL_CNT_MAX,
+};
+
+#define SEG6_LOCAL_CNT_MAX (__SEG6_LOCAL_CNT_MAX - 1)
+
#endif
THERMAL_GENL_EVENT_UNSPEC,
THERMAL_GENL_EVENT_TZ_CREATE, /* Thermal zone creation */
THERMAL_GENL_EVENT_TZ_DELETE, /* Thermal zone deletion */
- THERMAL_GENL_EVENT_TZ_DISABLE, /* Thermal zone disabed */
+ THERMAL_GENL_EVENT_TZ_DISABLE, /* Thermal zone disabled */
THERMAL_GENL_EVENT_TZ_ENABLE, /* Thermal zone enabled */
THERMAL_GENL_EVENT_TZ_TRIP_UP, /* Trip point crossed the way up */
THERMAL_GENL_EVENT_TZ_TRIP_DOWN, /* Trip point crossed the way down */
* means the userland is reading).
*/
#define UFFD_API ((__u64)0xAA)
+#define UFFD_API_REGISTER_MODES (UFFDIO_REGISTER_MODE_MISSING | \
+ UFFDIO_REGISTER_MODE_WP | \
+ UFFDIO_REGISTER_MODE_MINOR)
#define UFFD_API_FEATURES (UFFD_FEATURE_PAGEFAULT_FLAG_WP | \
UFFD_FEATURE_EVENT_FORK | \
UFFD_FEATURE_EVENT_REMAP | \
- UFFD_FEATURE_EVENT_REMOVE | \
+ UFFD_FEATURE_EVENT_REMOVE | \
UFFD_FEATURE_EVENT_UNMAP | \
UFFD_FEATURE_MISSING_HUGETLBFS | \
UFFD_FEATURE_MISSING_SHMEM | \
UFFD_FEATURE_SIGBUS | \
- UFFD_FEATURE_THREAD_ID)
+ UFFD_FEATURE_THREAD_ID | \
+ UFFD_FEATURE_MINOR_HUGETLBFS)
#define UFFD_API_IOCTLS \
((__u64)1 << _UFFDIO_REGISTER | \
(__u64)1 << _UFFDIO_UNREGISTER | \
((__u64)1 << _UFFDIO_WAKE | \
(__u64)1 << _UFFDIO_COPY | \
(__u64)1 << _UFFDIO_ZEROPAGE | \
- (__u64)1 << _UFFDIO_WRITEPROTECT)
+ (__u64)1 << _UFFDIO_WRITEPROTECT | \
+ (__u64)1 << _UFFDIO_CONTINUE)
#define UFFD_API_RANGE_IOCTLS_BASIC \
((__u64)1 << _UFFDIO_WAKE | \
- (__u64)1 << _UFFDIO_COPY)
+ (__u64)1 << _UFFDIO_COPY | \
+ (__u64)1 << _UFFDIO_CONTINUE)
/*
* Valid ioctl command number range with this API is from 0x00 to
#define _UFFDIO_COPY (0x03)
#define _UFFDIO_ZEROPAGE (0x04)
#define _UFFDIO_WRITEPROTECT (0x06)
+#define _UFFDIO_CONTINUE (0x07)
#define _UFFDIO_API (0x3F)
/* userfaultfd ioctl ids */
struct uffdio_zeropage)
#define UFFDIO_WRITEPROTECT _IOWR(UFFDIO, _UFFDIO_WRITEPROTECT, \
struct uffdio_writeprotect)
+#define UFFDIO_CONTINUE _IOR(UFFDIO, _UFFDIO_CONTINUE, \
+ struct uffdio_continue)
/* read() structure */
struct uffd_msg {
/* flags for UFFD_EVENT_PAGEFAULT */
#define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */
#define UFFD_PAGEFAULT_FLAG_WP (1<<1) /* If reason is VM_UFFD_WP */
+#define UFFD_PAGEFAULT_FLAG_MINOR (1<<2) /* If reason is VM_UFFD_MINOR */
struct uffdio_api {
/* userland asks for an API number and the features to enable */
*
* UFFD_FEATURE_THREAD_ID pid of the page faulted task_struct will
* be returned, if feature is not requested 0 will be returned.
+ *
+ * UFFD_FEATURE_MINOR_HUGETLBFS indicates that minor faults
+ * can be intercepted (via REGISTER_MODE_MINOR) for
+ * hugetlbfs-backed pages.
*/
#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
#define UFFD_FEATURE_EVENT_FORK (1<<1)
#define UFFD_FEATURE_EVENT_UNMAP (1<<6)
#define UFFD_FEATURE_SIGBUS (1<<7)
#define UFFD_FEATURE_THREAD_ID (1<<8)
+#define UFFD_FEATURE_MINOR_HUGETLBFS (1<<9)
__u64 features;
__u64 ioctls;
struct uffdio_range range;
#define UFFDIO_REGISTER_MODE_MISSING ((__u64)1<<0)
#define UFFDIO_REGISTER_MODE_WP ((__u64)1<<1)
+#define UFFDIO_REGISTER_MODE_MINOR ((__u64)1<<2)
__u64 mode;
/*
__u64 mode;
};
+struct uffdio_continue {
+ struct uffdio_range range;
+#define UFFDIO_CONTINUE_MODE_DONTWAKE ((__u64)1<<0)
+ __u64 mode;
+
+ /*
+ * Fields below here are written by the ioctl and must be at the end:
+ * the copy_from_user will not read past here.
+ */
+ __s64 mapped;
+};
+
/*
* Flags for the userfaultfd(2) system call itself.
*/
#define VFIO_REGION_SUBTYPE_INTEL_IGD_LPC_CFG (3)
/* 10de vendor PCI sub-types */
-/* subtype 1 was VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM, don't use */
+/*
+ * NVIDIA GPU NVlink2 RAM is coherent RAM mapped onto the host address space.
+ *
+ * Deprecated, region no longer provided
+ */
+#define VFIO_REGION_SUBTYPE_NVIDIA_NVLINK2_RAM (1)
/* 1014 vendor PCI sub-types */
-/* subtype 1 was VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD, don't use */
+/*
+ * IBM NPU NVlink2 ATSD (Address Translation Shootdown) register of NPU
+ * to do TLB invalidation on a GPU.
+ *
+ * Deprecated, region no longer provided
+ */
+#define VFIO_REGION_SUBTYPE_IBM_NVLINK2_ATSD (1)
/* sub-types for VFIO_REGION_TYPE_GFX */
#define VFIO_REGION_SUBTYPE_GFX_EDID (1)
*/
#define VFIO_REGION_INFO_CAP_MSIX_MAPPABLE 3
-/* subtype 4 was VFIO_REGION_INFO_CAP_NVLINK2_SSATGT, don't use */
+/*
+ * Capability with compressed real address (aka SSA - small system address)
+ * where GPU RAM is mapped on a system bus. Used by a GPU for DMA routing
+ * and by the userspace to associate a NVLink bridge with a GPU.
+ *
+ * Deprecated, capability no longer provided
+ */
+#define VFIO_REGION_INFO_CAP_NVLINK2_SSATGT 4
+
+struct vfio_region_info_cap_nvlink2_ssatgt {
+ struct vfio_info_cap_header header;
+ __u64 tgt;
+};
-/* subtype 5 was VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD, don't use */
+/*
+ * Capability with an NVLink link speed. The value is read by
+ * the NVlink2 bridge driver from the bridge's "ibm,nvlink-speed"
+ * property in the device tree. The value is fixed in the hardware
+ * and failing to provide the correct value results in the link
+ * not working with no indication from the driver why.
+ *
+ * Deprecated, capability no longer provided
+ */
+#define VFIO_REGION_INFO_CAP_NVLINK2_LNKSPD 5
+
+struct vfio_region_info_cap_nvlink2_lnkspd {
+ struct vfio_info_cap_header header;
+ __u32 link_speed;
+ __u32 __pad;
+};
/**
* VFIO_DEVICE_GET_IRQ_INFO - _IOWR(VFIO_TYPE, VFIO_BASE + 9,
#define XEN_ELFNOTE_MAX XEN_ELFNOTE_PHYS32_ENTRY
#endif /* __XEN_PUBLIC_ELFNOTE_H__ */
-
-/*
- * Local variables:
- * mode: C
- * c-set-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
typedef struct vcpu_hvm_context vcpu_hvm_context_t;
#endif /* __XEN_PUBLIC_HVM_HVM_VCPU_H__ */
-
-/*
- * Local variables:
- * mode: C
- * c-file-style: "BSD"
- * c-basic-offset: 4
- * tab-width: 4
- * indent-tabs-mode: nil
- * End:
- */
};
#endif /* _XEN_PUBLIC_IO_XENBUS_H */
-
-/*
- * Local variables:
- * c-file-style: "linux"
- * indent-tabs-mode: t
- * c-indent-level: 8
- * c-basic-offset: 8
- * tab-width: 8
- * End:
- */
void xen_dma_sync_for_device(struct device *dev, dma_addr_t handle,
size_t size, enum dma_data_direction dir);
-extern int xen_swiotlb_init(int verbose, bool early);
+int xen_swiotlb_init(void);
+void __init xen_swiotlb_init_early(void);
extern const struct dma_map_ops xen_swiotlb_dma_ops;
#endif /* __LINUX_SWIOTLB_XEN_H */
help
Arch has userfaultfd write protection support
+config HAVE_ARCH_USERFAULTFD_MINOR
+ bool
+ help
+ Arch has userfaultfd minor fault support
+
config MEMBARRIER
bool "Enable membarrier() system call" if EXPERT
default y
If unsure, say N.
+config MODPROBE_PATH
+ string "Path to modprobe binary"
+ default "/sbin/modprobe"
+ help
+ When kernel code requests a module, it does so by calling
+ the "modprobe" userspace utility. This option allows you to
+ set the path where that binary is found. This can be changed
+ at runtime via the sysctl file
+ /proc/sys/kernel/modprobe. Setting this to the empty string
+ removes the kernel's ability to request modules (but
+ userspace can still load modules explicitly).
+
config TRIM_UNUSED_KSYMS
bool "Trim unused exported kernel symbols" if EXPERT
depends on !COMPILE_TEST
// SPDX-License-Identifier: GPL-2.0
#include <linux/init.h>
+#include <linux/async.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/types.h>
__setup("keepinitrd", keepinitrd_setup);
#endif
+static bool __initdata initramfs_async = true;
+static int __init initramfs_async_setup(char *str)
+{
+ strtobool(str, &initramfs_async);
+ return 1;
+}
+__setup("initramfs_async=", initramfs_async_setup);
+
extern char __initramfs_start[];
extern unsigned long __initramfs_size;
#include <linux/initrd.h>
}
#endif /* CONFIG_BLK_DEV_RAM */
-static int __init populate_rootfs(void)
+static void __init do_populate_rootfs(void *unused, async_cookie_t cookie)
{
/* Load the built in initramfs */
char *err = unpack_to_rootfs(__initramfs_start, __initramfs_size);
initrd_end = 0;
flush_delayed_fput();
+}
+
+static ASYNC_DOMAIN_EXCLUSIVE(initramfs_domain);
+static async_cookie_t initramfs_cookie;
+
+void wait_for_initramfs(void)
+{
+ if (!initramfs_cookie) {
+ /*
+ * Something before rootfs_initcall wants to access
+ * the filesystem/initramfs. Probably a bug. Make a
+ * note, avoid deadlocking the machine, and let the
+ * caller's access fail as it used to.
+ */
+ pr_warn_once("wait_for_initramfs() called before rootfs_initcalls\n");
+ return;
+ }
+ async_synchronize_cookie_domain(initramfs_cookie + 1, &initramfs_domain);
+}
+EXPORT_SYMBOL_GPL(wait_for_initramfs);
+
+static int __init populate_rootfs(void)
+{
+ initramfs_cookie = async_schedule_domain(do_populate_rootfs, NULL,
+ &initramfs_domain);
+ if (!initramfs_async)
+ wait_for_initramfs();
return 0;
}
rootfs_initcall(populate_rootfs);
kunit_run_all_tests();
+ wait_for_initramfs();
console_on_rootfs();
/*
* - two Linux specific semctl() commands: SEM_STAT, SEM_INFO.
* - undo adjustments at process exit are limited to 0..SEMVMX.
* - namespace are supported.
- * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtine by writing
+ * - SEMMSL, SEMMNS, SEMOPM and SEMMNI can be configured at runtime by writing
* to /proc/sys/kernel/sem.
* - statistics about the usage are reported in /proc/sysvipc/sem.
*
* Setting it to a result code is a RELEASE, this is ensured by both a
* smp_store_release() (for case a) and while holding sem_lock()
* (for case b).
- * The AQUIRE when reading the result code without holding sem_lock() is
+ * The ACQUIRE when reading the result code without holding sem_lock() is
* achieved by using READ_ONCE() + smp_acquire__after_ctrl_dep().
* (case a above).
* Reading the result code while holding sem_lock() needs no further barriers,
{
get_task_struct(q->sleeper);
- /* see SEM_BARRIER_2 for purpuse/pairing */
+ /* see SEM_BARRIER_2 for purpose/pairing */
smp_store_release(&q->status, error);
wake_q_add_safe(wake_q, q->sleeper);
/* It is impossible that someone waits for the new value:
* - complex operations always restart.
- * - wait-for-zero are handled seperately.
+ * - wait-for-zero are handled separately.
* - q is a previously sleeping simple operation that
* altered the array. It must be a decrement, because
* simple increments never sleep.
* - No complex ops, thus all sleeping ops are
* decrease.
* - if we decreased the value, then any sleeping
- * semaphore ops wont be able to run: If the
+ * semaphore ops won't be able to run: If the
* previous value was too small, then the new
* value will be too small, too.
*/
queue.dupsop = dupsop;
error = perform_atomic_semop(sma, &queue);
- if (error == 0) { /* non-blocking succesfull path */
+ if (error == 0) { /* non-blocking successful path */
DEFINE_WAKE_Q(wake_q);
/*
# SPDX-License-Identifier: GPL-2.0-only
-kheaders.md5
-timeconst.h
-hz.bc
+/config_data
+/kheaders.md5
$(obj)/configs.o: $(obj)/config_data.gz
-targets += config_data.gz
-$(obj)/config_data.gz: $(KCONFIG_CONFIG) FORCE
+targets += config_data config_data.gz
+$(obj)/config_data.gz: $(obj)/config_data FORCE
$(call if_changed,gzip)
+filechk_cat = cat $<
+
+$(obj)/config_data: $(KCONFIG_CONFIG) FORCE
+ $(call filechk,cat)
+
$(obj)/kheaders.o: $(obj)/kheaders_data.tar.xz
quiet_cmd_genikh = CHK $(obj)/kheaders_data.tar.xz
static atomic_t entry_count;
+static long long microseconds_since(ktime_t start)
+{
+ ktime_t now = ktime_get();
+ return ktime_to_ns(ktime_sub(now, start)) >> 10;
+}
+
static async_cookie_t lowest_in_progress(struct async_domain *domain)
{
struct async_entry *first = NULL;
struct async_entry *entry =
container_of(work, struct async_entry, work);
unsigned long flags;
- ktime_t calltime, delta, rettime;
+ ktime_t calltime;
/* 1) run (and print duration) */
- if (initcall_debug && system_state < SYSTEM_RUNNING) {
- pr_debug("calling %lli_%pS @ %i\n",
- (long long)entry->cookie,
- entry->func, task_pid_nr(current));
- calltime = ktime_get();
- }
+ pr_debug("calling %lli_%pS @ %i\n", (long long)entry->cookie,
+ entry->func, task_pid_nr(current));
+ calltime = ktime_get();
+
entry->func(entry->data, entry->cookie);
- if (initcall_debug && system_state < SYSTEM_RUNNING) {
- rettime = ktime_get();
- delta = ktime_sub(rettime, calltime);
- pr_debug("initcall %lli_%pS returned 0 after %lld usecs\n",
- (long long)entry->cookie,
- entry->func,
- (long long)ktime_to_ns(delta) >> 10);
- }
+
+ pr_debug("initcall %lli_%pS returned after %lld usecs\n",
+ (long long)entry->cookie, entry->func,
+ microseconds_since(calltime));
/* 2) remove self from the pending queues */
spin_lock_irqsave(&async_lock, flags);
}
EXPORT_SYMBOL_GPL(async_synchronize_full);
-/**
- * async_unregister_domain - ensure no more anonymous waiters on this domain
- * @domain: idle domain to flush out of any async_synchronize_full instances
- *
- * async_synchronize_{cookie|full}_domain() are not flushed since callers
- * of these routines should know the lifetime of @domain
- *
- * Prefer ASYNC_DOMAIN_EXCLUSIVE() declarations over flushing
- */
-void async_unregister_domain(struct async_domain *domain)
-{
- spin_lock_irq(&async_lock);
- WARN_ON(!domain->registered || !list_empty(&domain->pending));
- domain->registered = 0;
- spin_unlock_irq(&async_lock);
-}
-EXPORT_SYMBOL_GPL(async_unregister_domain);
-
/**
* async_synchronize_full_domain - synchronize all asynchronous function within a certain domain
* @domain: the domain to synchronize
*/
void async_synchronize_cookie_domain(async_cookie_t cookie, struct async_domain *domain)
{
- ktime_t starttime, delta, endtime;
+ ktime_t starttime;
- if (initcall_debug && system_state < SYSTEM_RUNNING) {
- pr_debug("async_waiting @ %i\n", task_pid_nr(current));
- starttime = ktime_get();
- }
+ pr_debug("async_waiting @ %i\n", task_pid_nr(current));
+ starttime = ktime_get();
wait_event(async_done, lowest_in_progress(domain) >= cookie);
- if (initcall_debug && system_state < SYSTEM_RUNNING) {
- endtime = ktime_get();
- delta = ktime_sub(endtime, starttime);
-
- pr_debug("async_continuing @ %i after %lli usec\n",
- task_pid_nr(current),
- (long long)ktime_to_ns(delta) >> 10);
- }
+ pr_debug("async_continuing @ %i after %lli usec\n", task_pid_nr(current),
+ microseconds_since(starttime));
}
EXPORT_SYMBOL_GPL(async_synchronize_cookie_domain);
{
struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : tmp_aux;
struct bpf_verifier_state *vstate = env->cur_state;
+ bool off_is_imm = tnum_is_const(off_reg->var_off);
bool off_is_neg = off_reg->smin_value < 0;
bool ptr_is_dst_reg = ptr_reg == dst_reg;
u8 opcode = BPF_OP(insn->code);
alu_limit = abs(tmp_aux->alu_limit - alu_limit);
} else {
alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
+ alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
alu_state |= ptr_is_dst_reg ?
BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
}
const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
struct bpf_insn *patch = &insn_buf[0];
- bool issrc, isneg;
+ bool issrc, isneg, isimm;
u32 off_reg;
aux = &env->insn_aux_data[i + delta];
isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
issrc = (aux->alu_state & BPF_ALU_SANITIZE) ==
BPF_ALU_SANITIZE_SRC;
+ isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
off_reg = issrc ? insn->src_reg : insn->dst_reg;
- if (isneg)
- *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
- *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
- *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
- *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
- *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
- *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
- if (issrc) {
- *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX,
- off_reg);
- insn->src_reg = BPF_REG_AX;
+ if (isimm) {
+ *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
} else {
- *patch++ = BPF_ALU64_REG(BPF_AND, off_reg,
- BPF_REG_AX);
+ if (isneg)
+ *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
+ *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
+ *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
+ *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
+ *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
+ *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, 63);
+ *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
}
+ if (!issrc)
+ *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
+ insn->src_reg = BPF_REG_AX;
if (isneg)
insn->code = insn->code == code_add ?
code_sub : code_add;
*patch++ = *insn;
- if (issrc && isneg)
+ if (issrc && isneg && !isimm)
*patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
cnt = patch - insn_buf;
# KEEP ALPHABETICALLY SORTED
-# CONFIG_DEVKMEM is not set
# CONFIG_DEVMEM is not set
# CONFIG_FHANDLE is not set
# CONFIG_INET_LRO is not set
static struct kmem_cache *cred_jar;
/* init to 2 - one for init_task, one to ensure it is never freed */
-struct group_info init_groups = { .usage = ATOMIC_INIT(2) };
+static struct group_info init_groups = { .usage = ATOMIC_INIT(2) };
/*
* The initial credentials for the initial task
phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg));
if (unlikely(is_swiotlb_buffer(paddr)))
- swiotlb_tbl_sync_single(dev, paddr, sg->length,
- dir, SYNC_FOR_DEVICE);
+ swiotlb_sync_single_for_device(dev, paddr, sg->length,
+ dir);
if (!dev_is_dma_coherent(dev))
arch_sync_dma_for_device(paddr, sg->length,
arch_sync_dma_for_cpu(paddr, sg->length, dir);
if (unlikely(is_swiotlb_buffer(paddr)))
- swiotlb_tbl_sync_single(dev, paddr, sg->length, dir,
- SYNC_FOR_CPU);
+ swiotlb_sync_single_for_cpu(dev, paddr, sg->length,
+ dir);
if (dir == DMA_FROM_DEVICE)
arch_dma_mark_clean(paddr, sg->length);
phys_addr_t paddr = dma_to_phys(dev, addr);
if (unlikely(is_swiotlb_buffer(paddr)))
- swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE);
+ swiotlb_sync_single_for_device(dev, paddr, size, dir);
if (!dev_is_dma_coherent(dev))
arch_sync_dma_for_device(paddr, size, dir);
}
if (unlikely(is_swiotlb_buffer(paddr)))
- swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU);
+ swiotlb_sync_single_for_cpu(dev, paddr, size, dir);
if (dir == DMA_FROM_DEVICE)
arch_dma_mark_clean(paddr, size);
dma_direct_sync_single_for_cpu(dev, addr, size, dir);
if (unlikely(is_swiotlb_buffer(phys)))
- swiotlb_tbl_unmap_single(dev, phys, size, size, dir, attrs);
+ swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs);
}
#endif /* _KERNEL_DMA_DIRECT_H */
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020 Hisilicon Limited.
+ * Copyright (C) 2020 HiSilicon Limited.
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
__u32 dma_bits; /* DMA addressing capability */
__u32 dma_dir; /* DMA data direction */
__u32 dma_trans_ns; /* time for DMA transmission in ns */
- __u8 expansion[80]; /* For future use */
+ __u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */
+ __u8 expansion[76]; /* For future use */
};
struct map_benchmark_data {
void *buf;
dma_addr_t dma_addr;
struct map_benchmark_data *map = data;
+ int npages = map->bparam.granule;
+ u64 size = npages * PAGE_SIZE;
int ret = 0;
- buf = (void *)__get_free_page(GFP_KERNEL);
+ buf = alloc_pages_exact(size, GFP_KERNEL);
if (!buf)
return -ENOMEM;
* 66 means evertything goes well! 66 is lucky.
*/
if (map->dir != DMA_FROM_DEVICE)
- memset(buf, 0x66, PAGE_SIZE);
+ memset(buf, 0x66, size);
map_stime = ktime_get();
- dma_addr = dma_map_single(map->dev, buf, PAGE_SIZE, map->dir);
+ dma_addr = dma_map_single(map->dev, buf, size, map->dir);
if (unlikely(dma_mapping_error(map->dev, dma_addr))) {
pr_err("dma_map_single failed on %s\n",
dev_name(map->dev));
ndelay(map->bparam.dma_trans_ns);
unmap_stime = ktime_get();
- dma_unmap_single(map->dev, dma_addr, PAGE_SIZE, map->dir);
+ dma_unmap_single(map->dev, dma_addr, size, map->dir);
unmap_etime = ktime_get();
unmap_delta = ktime_sub(unmap_etime, unmap_stime);
}
out:
- free_page((unsigned long)buf);
+ free_pages_exact(buf, size);
return ret;
}
struct map_benchmark_data *map = file->private_data;
void __user *argp = (void __user *)arg;
u64 old_dma_mask;
-
int ret;
if (copy_from_user(&map->bparam, argp, sizeof(map->bparam)))
return -EINVAL;
}
+ if (map->bparam.granule < 1 || map->bparam.granule > 1024) {
+ pr_err("invalid granule size\n");
+ return -EINVAL;
+ }
+
switch (map->bparam.dma_dir) {
case DMA_MAP_BIDIRECTIONAL:
map->dir = DMA_BIDIRECTIONAL;
}
EXPORT_SYMBOL(dma_free_attrs);
-struct page *dma_alloc_pages(struct device *dev, size_t size,
+static struct page *__dma_alloc_pages(struct device *dev, size_t size,
dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
- struct page *page;
if (WARN_ON_ONCE(!dev->coherent_dma_mask))
return NULL;
size = PAGE_ALIGN(size);
if (dma_alloc_direct(dev, ops))
- page = dma_direct_alloc_pages(dev, size, dma_handle, dir, gfp);
- else if (ops->alloc_pages)
- page = ops->alloc_pages(dev, size, dma_handle, dir, gfp);
- else
+ return dma_direct_alloc_pages(dev, size, dma_handle, dir, gfp);
+ if (!ops->alloc_pages)
return NULL;
+ return ops->alloc_pages(dev, size, dma_handle, dir, gfp);
+}
- debug_dma_map_page(dev, page, 0, size, dir, *dma_handle);
+struct page *dma_alloc_pages(struct device *dev, size_t size,
+ dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp)
+{
+ struct page *page = __dma_alloc_pages(dev, size, dma_handle, dir, gfp);
+ if (page)
+ debug_dma_map_page(dev, page, 0, size, dir, *dma_handle);
return page;
}
EXPORT_SYMBOL_GPL(dma_alloc_pages);
-void dma_free_pages(struct device *dev, size_t size, struct page *page,
+static void __dma_free_pages(struct device *dev, size_t size, struct page *page,
dma_addr_t dma_handle, enum dma_data_direction dir)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
size = PAGE_ALIGN(size);
- debug_dma_unmap_page(dev, dma_handle, size, dir);
-
if (dma_alloc_direct(dev, ops))
dma_direct_free_pages(dev, size, page, dma_handle, dir);
else if (ops->free_pages)
ops->free_pages(dev, size, page, dma_handle, dir);
}
+
+void dma_free_pages(struct device *dev, size_t size, struct page *page,
+ dma_addr_t dma_handle, enum dma_data_direction dir)
+{
+ debug_dma_unmap_page(dev, dma_handle, size, dir);
+ __dma_free_pages(dev, size, page, dma_handle, dir);
+}
EXPORT_SYMBOL_GPL(dma_free_pages);
+int dma_mmap_pages(struct device *dev, struct vm_area_struct *vma,
+ size_t size, struct page *page)
+{
+ unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+ if (vma->vm_pgoff >= count || vma_pages(vma) > count - vma->vm_pgoff)
+ return -ENXIO;
+ return remap_pfn_range(vma, vma->vm_start,
+ page_to_pfn(page) + vma->vm_pgoff,
+ vma_pages(vma) << PAGE_SHIFT, vma->vm_page_prot);
+}
+EXPORT_SYMBOL_GPL(dma_mmap_pages);
+
+static struct sg_table *alloc_single_sgt(struct device *dev, size_t size,
+ enum dma_data_direction dir, gfp_t gfp)
+{
+ struct sg_table *sgt;
+ struct page *page;
+
+ sgt = kmalloc(sizeof(*sgt), gfp);
+ if (!sgt)
+ return NULL;
+ if (sg_alloc_table(sgt, 1, gfp))
+ goto out_free_sgt;
+ page = __dma_alloc_pages(dev, size, &sgt->sgl->dma_address, dir, gfp);
+ if (!page)
+ goto out_free_table;
+ sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
+ sg_dma_len(sgt->sgl) = sgt->sgl->length;
+ return sgt;
+out_free_table:
+ sg_free_table(sgt);
+out_free_sgt:
+ kfree(sgt);
+ return NULL;
+}
+
+struct sg_table *dma_alloc_noncontiguous(struct device *dev, size_t size,
+ enum dma_data_direction dir, gfp_t gfp, unsigned long attrs)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+ struct sg_table *sgt;
+
+ if (WARN_ON_ONCE(attrs & ~DMA_ATTR_ALLOC_SINGLE_PAGES))
+ return NULL;
+
+ if (ops && ops->alloc_noncontiguous)
+ sgt = ops->alloc_noncontiguous(dev, size, dir, gfp, attrs);
+ else
+ sgt = alloc_single_sgt(dev, size, dir, gfp);
+
+ if (sgt) {
+ sgt->nents = 1;
+ debug_dma_map_sg(dev, sgt->sgl, sgt->orig_nents, 1, dir);
+ }
+ return sgt;
+}
+EXPORT_SYMBOL_GPL(dma_alloc_noncontiguous);
+
+static void free_single_sgt(struct device *dev, size_t size,
+ struct sg_table *sgt, enum dma_data_direction dir)
+{
+ __dma_free_pages(dev, size, sg_page(sgt->sgl), sgt->sgl->dma_address,
+ dir);
+ sg_free_table(sgt);
+ kfree(sgt);
+}
+
+void dma_free_noncontiguous(struct device *dev, size_t size,
+ struct sg_table *sgt, enum dma_data_direction dir)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+
+ debug_dma_unmap_sg(dev, sgt->sgl, sgt->orig_nents, dir);
+ if (ops && ops->free_noncontiguous)
+ ops->free_noncontiguous(dev, size, sgt, dir);
+ else
+ free_single_sgt(dev, size, sgt, dir);
+}
+EXPORT_SYMBOL_GPL(dma_free_noncontiguous);
+
+void *dma_vmap_noncontiguous(struct device *dev, size_t size,
+ struct sg_table *sgt)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+ unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+ if (ops && ops->alloc_noncontiguous)
+ return vmap(sgt_handle(sgt)->pages, count, VM_MAP, PAGE_KERNEL);
+ return page_address(sg_page(sgt->sgl));
+}
+EXPORT_SYMBOL_GPL(dma_vmap_noncontiguous);
+
+void dma_vunmap_noncontiguous(struct device *dev, void *vaddr)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+
+ if (ops && ops->alloc_noncontiguous)
+ vunmap(vaddr);
+}
+EXPORT_SYMBOL_GPL(dma_vunmap_noncontiguous);
+
+int dma_mmap_noncontiguous(struct device *dev, struct vm_area_struct *vma,
+ size_t size, struct sg_table *sgt)
+{
+ const struct dma_map_ops *ops = get_dma_ops(dev);
+
+ if (ops && ops->alloc_noncontiguous) {
+ unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+ if (vma->vm_pgoff >= count ||
+ vma_pages(vma) > count - vma->vm_pgoff)
+ return -ENXIO;
+ return vm_map_pages(vma, sgt_handle(sgt)->pages, count);
+ }
+ return dma_mmap_pages(dev, vma, size, sg_page(sgt->sgl));
+}
+EXPORT_SYMBOL_GPL(dma_mmap_noncontiguous);
+
int dma_supported(struct device *dev, u64 mask)
{
const struct dma_map_ops *ops = get_dma_ops(dev);
*/
#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT)
-enum swiotlb_force swiotlb_force;
-
-/*
- * Used to do a quick range check in swiotlb_tbl_unmap_single and
- * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this
- * API.
- */
-phys_addr_t io_tlb_start, io_tlb_end;
-
-/*
- * The number of IO TLB blocks (in groups of 64) between io_tlb_start and
- * io_tlb_end. This is command line adjustable via setup_io_tlb_npages.
- */
-static unsigned long io_tlb_nslabs;
+#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
-/*
- * The number of used IO TLB block
- */
-static unsigned long io_tlb_used;
+enum swiotlb_force swiotlb_force;
-/*
- * This is a free list describing the number of free entries available from
- * each index
- */
-static unsigned int *io_tlb_list;
-static unsigned int io_tlb_index;
+struct io_tlb_mem *io_tlb_default_mem;
/*
* Max segment that we can provide which (if pages are contingous) will
*/
static unsigned int max_segment;
-/*
- * We need to save away the original address corresponding to a mapped entry
- * for the sync operations.
- */
-#define INVALID_PHYS_ADDR (~(phys_addr_t)0)
-static phys_addr_t *io_tlb_orig_addr;
-
-/*
- * The mapped buffer's size should be validated during a sync operation.
- */
-static size_t *io_tlb_orig_size;
-
-/*
- * Protect the above data structures in the map and unmap calls
- */
-static DEFINE_SPINLOCK(io_tlb_lock);
-
-static int late_alloc;
+static unsigned long default_nslabs = IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT;
static int __init
setup_io_tlb_npages(char *str)
{
if (isdigit(*str)) {
- io_tlb_nslabs = simple_strtoul(str, &str, 0);
/* avoid tail segment of size < IO_TLB_SEGSIZE */
- io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
+ default_nslabs =
+ ALIGN(simple_strtoul(str, &str, 0), IO_TLB_SEGSIZE);
}
if (*str == ',')
++str;
- if (!strcmp(str, "force")) {
+ if (!strcmp(str, "force"))
swiotlb_force = SWIOTLB_FORCE;
- } else if (!strcmp(str, "noforce")) {
+ else if (!strcmp(str, "noforce"))
swiotlb_force = SWIOTLB_NO_FORCE;
- io_tlb_nslabs = 1;
- }
return 0;
}
early_param("swiotlb", setup_io_tlb_npages);
-static bool no_iotlb_memory;
-
-unsigned long swiotlb_nr_tbl(void)
-{
- return unlikely(no_iotlb_memory) ? 0 : io_tlb_nslabs;
-}
-EXPORT_SYMBOL_GPL(swiotlb_nr_tbl);
-
unsigned int swiotlb_max_segment(void)
{
- return unlikely(no_iotlb_memory) ? 0 : max_segment;
+ return io_tlb_default_mem ? max_segment : 0;
}
EXPORT_SYMBOL_GPL(swiotlb_max_segment);
unsigned long swiotlb_size_or_default(void)
{
- unsigned long size;
-
- size = io_tlb_nslabs << IO_TLB_SHIFT;
-
- return size ? size : (IO_TLB_DEFAULT_SIZE);
+ return default_nslabs << IO_TLB_SHIFT;
}
-void __init swiotlb_adjust_size(unsigned long new_size)
+void __init swiotlb_adjust_size(unsigned long size)
{
- unsigned long size;
-
/*
* If swiotlb parameter has not been specified, give a chance to
* architectures such as those supporting memory encryption to
* adjust/expand SWIOTLB size for their use.
*/
- if (!io_tlb_nslabs) {
- size = ALIGN(new_size, IO_TLB_SIZE);
- io_tlb_nslabs = size >> IO_TLB_SHIFT;
- io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
-
- pr_info("SWIOTLB bounce buffer size adjusted to %luMB", size >> 20);
- }
+ if (default_nslabs != IO_TLB_DEFAULT_SIZE >> IO_TLB_SHIFT)
+ return;
+ size = ALIGN(size, IO_TLB_SIZE);
+ default_nslabs = ALIGN(size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
+ pr_info("SWIOTLB bounce buffer size adjusted to %luMB", size >> 20);
}
void swiotlb_print_info(void)
{
- unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+ struct io_tlb_mem *mem = io_tlb_default_mem;
- if (no_iotlb_memory) {
+ if (!mem) {
pr_warn("No low mem\n");
return;
}
- pr_info("mapped [mem %pa-%pa] (%luMB)\n", &io_tlb_start, &io_tlb_end,
- bytes >> 20);
+ pr_info("mapped [mem %pa-%pa] (%luMB)\n", &mem->start, &mem->end,
+ (mem->nslabs << IO_TLB_SHIFT) >> 20);
}
static inline unsigned long io_tlb_offset(unsigned long val)
*/
void __init swiotlb_update_mem_attributes(void)
{
+ struct io_tlb_mem *mem = io_tlb_default_mem;
void *vaddr;
unsigned long bytes;
- if (no_iotlb_memory || late_alloc)
+ if (!mem || mem->late_alloc)
return;
-
- vaddr = phys_to_virt(io_tlb_start);
- bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
+ vaddr = phys_to_virt(mem->start);
+ bytes = PAGE_ALIGN(mem->nslabs << IO_TLB_SHIFT);
set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
memset(vaddr, 0, bytes);
}
int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
{
- unsigned long i, bytes;
+ unsigned long bytes = nslabs << IO_TLB_SHIFT, i;
+ struct io_tlb_mem *mem;
size_t alloc_size;
- bytes = nslabs << IO_TLB_SHIFT;
-
- io_tlb_nslabs = nslabs;
- io_tlb_start = __pa(tlb);
- io_tlb_end = io_tlb_start + bytes;
-
- /*
- * Allocate and initialize the free list array. This array is used
- * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
- * between io_tlb_start and io_tlb_end.
- */
- alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(int));
- io_tlb_list = memblock_alloc(alloc_size, PAGE_SIZE);
- if (!io_tlb_list)
- panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
- __func__, alloc_size, PAGE_SIZE);
+ if (swiotlb_force == SWIOTLB_NO_FORCE)
+ return 0;
- alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t));
- io_tlb_orig_addr = memblock_alloc(alloc_size, PAGE_SIZE);
- if (!io_tlb_orig_addr)
- panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
- __func__, alloc_size, PAGE_SIZE);
+ /* protect against double initialization */
+ if (WARN_ON_ONCE(io_tlb_default_mem))
+ return -ENOMEM;
- alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(size_t));
- io_tlb_orig_size = memblock_alloc(alloc_size, PAGE_SIZE);
- if (!io_tlb_orig_size)
+ alloc_size = PAGE_ALIGN(struct_size(mem, slots, nslabs));
+ mem = memblock_alloc(alloc_size, PAGE_SIZE);
+ if (!mem)
panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
__func__, alloc_size, PAGE_SIZE);
-
- for (i = 0; i < io_tlb_nslabs; i++) {
- io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i);
- io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
- io_tlb_orig_size[i] = 0;
+ mem->nslabs = nslabs;
+ mem->start = __pa(tlb);
+ mem->end = mem->start + bytes;
+ mem->index = 0;
+ spin_lock_init(&mem->lock);
+ for (i = 0; i < mem->nslabs; i++) {
+ mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
+ mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
+ mem->slots[i].alloc_size = 0;
}
- io_tlb_index = 0;
- no_iotlb_memory = false;
+ io_tlb_default_mem = mem;
if (verbose)
swiotlb_print_info();
-
- swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
+ swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT);
return 0;
}
void __init
swiotlb_init(int verbose)
{
- size_t default_size = IO_TLB_DEFAULT_SIZE;
- unsigned char *vstart;
- unsigned long bytes;
-
- if (!io_tlb_nslabs) {
- io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
- io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
- }
-
- bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+ size_t bytes = PAGE_ALIGN(default_nslabs << IO_TLB_SHIFT);
+ void *tlb;
- /* Get IO TLB memory from the low pages */
- vstart = memblock_alloc_low(PAGE_ALIGN(bytes), PAGE_SIZE);
- if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose))
+ if (swiotlb_force == SWIOTLB_NO_FORCE)
return;
- if (io_tlb_start) {
- memblock_free_early(io_tlb_start,
- PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
- io_tlb_start = 0;
- }
+ /* Get IO TLB memory from the low pages */
+ tlb = memblock_alloc_low(bytes, PAGE_SIZE);
+ if (!tlb)
+ goto fail;
+ if (swiotlb_init_with_tbl(tlb, default_nslabs, verbose))
+ goto fail_free_mem;
+ return;
+
+fail_free_mem:
+ memblock_free_early(__pa(tlb), bytes);
+fail:
pr_warn("Cannot allocate buffer");
- no_iotlb_memory = true;
}
/*
int
swiotlb_late_init_with_default_size(size_t default_size)
{
- unsigned long bytes, req_nslabs = io_tlb_nslabs;
+ unsigned long nslabs =
+ ALIGN(default_size >> IO_TLB_SHIFT, IO_TLB_SEGSIZE);
+ unsigned long bytes;
unsigned char *vstart = NULL;
unsigned int order;
int rc = 0;
- if (!io_tlb_nslabs) {
- io_tlb_nslabs = (default_size >> IO_TLB_SHIFT);
- io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
- }
+ if (swiotlb_force == SWIOTLB_NO_FORCE)
+ return 0;
/*
* Get IO TLB memory from the low pages
*/
- order = get_order(io_tlb_nslabs << IO_TLB_SHIFT);
- io_tlb_nslabs = SLABS_PER_PAGE << order;
- bytes = io_tlb_nslabs << IO_TLB_SHIFT;
+ order = get_order(nslabs << IO_TLB_SHIFT);
+ nslabs = SLABS_PER_PAGE << order;
+ bytes = nslabs << IO_TLB_SHIFT;
while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) {
vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN,
order--;
}
- if (!vstart) {
- io_tlb_nslabs = req_nslabs;
+ if (!vstart)
return -ENOMEM;
- }
+
if (order != get_order(bytes)) {
pr_warn("only able to allocate %ld MB\n",
(PAGE_SIZE << order) >> 20);
- io_tlb_nslabs = SLABS_PER_PAGE << order;
+ nslabs = SLABS_PER_PAGE << order;
}
- rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs);
+ rc = swiotlb_late_init_with_tbl(vstart, nslabs);
if (rc)
free_pages((unsigned long)vstart, order);
return rc;
}
-static void swiotlb_cleanup(void)
-{
- io_tlb_end = 0;
- io_tlb_start = 0;
- io_tlb_nslabs = 0;
- max_segment = 0;
-}
-
int
swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
{
- unsigned long i, bytes;
+ unsigned long bytes = nslabs << IO_TLB_SHIFT, i;
+ struct io_tlb_mem *mem;
- bytes = nslabs << IO_TLB_SHIFT;
+ if (swiotlb_force == SWIOTLB_NO_FORCE)
+ return 0;
- io_tlb_nslabs = nslabs;
- io_tlb_start = virt_to_phys(tlb);
- io_tlb_end = io_tlb_start + bytes;
+ /* protect against double initialization */
+ if (WARN_ON_ONCE(io_tlb_default_mem))
+ return -ENOMEM;
- set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
- memset(tlb, 0, bytes);
+ mem = (void *)__get_free_pages(GFP_KERNEL,
+ get_order(struct_size(mem, slots, nslabs)));
+ if (!mem)
+ return -ENOMEM;
- /*
- * Allocate and initialize the free list array. This array is used
- * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE
- * between io_tlb_start and io_tlb_end.
- */
- io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
- get_order(io_tlb_nslabs * sizeof(int)));
- if (!io_tlb_list)
- goto cleanup3;
-
- io_tlb_orig_addr = (phys_addr_t *)
- __get_free_pages(GFP_KERNEL,
- get_order(io_tlb_nslabs *
- sizeof(phys_addr_t)));
- if (!io_tlb_orig_addr)
- goto cleanup4;
-
- io_tlb_orig_size = (size_t *)
- __get_free_pages(GFP_KERNEL,
- get_order(io_tlb_nslabs *
- sizeof(size_t)));
- if (!io_tlb_orig_size)
- goto cleanup5;
-
-
- for (i = 0; i < io_tlb_nslabs; i++) {
- io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i);
- io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
- io_tlb_orig_size[i] = 0;
+ mem->nslabs = nslabs;
+ mem->start = virt_to_phys(tlb);
+ mem->end = mem->start + bytes;
+ mem->index = 0;
+ mem->late_alloc = 1;
+ spin_lock_init(&mem->lock);
+ for (i = 0; i < mem->nslabs; i++) {
+ mem->slots[i].list = IO_TLB_SEGSIZE - io_tlb_offset(i);
+ mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
+ mem->slots[i].alloc_size = 0;
}
- io_tlb_index = 0;
- no_iotlb_memory = false;
-
- swiotlb_print_info();
- late_alloc = 1;
-
- swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT);
+ set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
+ memset(tlb, 0, bytes);
+ io_tlb_default_mem = mem;
+ swiotlb_print_info();
+ swiotlb_set_max_segment(mem->nslabs << IO_TLB_SHIFT);
return 0;
-
-cleanup5:
- free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs *
- sizeof(phys_addr_t)));
-
-cleanup4:
- free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
- sizeof(int)));
- io_tlb_list = NULL;
-cleanup3:
- swiotlb_cleanup();
- return -ENOMEM;
}
void __init swiotlb_exit(void)
{
- if (!io_tlb_orig_addr)
+ struct io_tlb_mem *mem = io_tlb_default_mem;
+ size_t size;
+
+ if (!mem)
return;
- if (late_alloc) {
- free_pages((unsigned long)io_tlb_orig_size,
- get_order(io_tlb_nslabs * sizeof(size_t)));
- free_pages((unsigned long)io_tlb_orig_addr,
- get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
- free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
- sizeof(int)));
- free_pages((unsigned long)phys_to_virt(io_tlb_start),
- get_order(io_tlb_nslabs << IO_TLB_SHIFT));
- } else {
- memblock_free_late(__pa(io_tlb_orig_addr),
- PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
- memblock_free_late(__pa(io_tlb_orig_size),
- PAGE_ALIGN(io_tlb_nslabs * sizeof(size_t)));
- memblock_free_late(__pa(io_tlb_list),
- PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
- memblock_free_late(io_tlb_start,
- PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT));
- }
- swiotlb_cleanup();
+ size = struct_size(mem, slots, mem->nslabs);
+ if (mem->late_alloc)
+ free_pages((unsigned long)mem, get_order(size));
+ else
+ memblock_free_late(__pa(mem), PAGE_ALIGN(size));
+ io_tlb_default_mem = NULL;
}
/*
* Bounce: copy the swiotlb buffer from or back to the original dma location
*/
-static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
- size_t size, enum dma_data_direction dir)
+static void swiotlb_bounce(struct device *dev, phys_addr_t tlb_addr, size_t size,
+ enum dma_data_direction dir)
{
+ struct io_tlb_mem *mem = io_tlb_default_mem;
+ int index = (tlb_addr - mem->start) >> IO_TLB_SHIFT;
+ phys_addr_t orig_addr = mem->slots[index].orig_addr;
+ size_t alloc_size = mem->slots[index].alloc_size;
unsigned long pfn = PFN_DOWN(orig_addr);
unsigned char *vaddr = phys_to_virt(tlb_addr);
+ if (orig_addr == INVALID_PHYS_ADDR)
+ return;
+
+ if (size > alloc_size) {
+ dev_WARN_ONCE(dev, 1,
+ "Buffer overflow detected. Allocation size: %zu. Mapping size: %zu.\n",
+ alloc_size, size);
+ size = alloc_size;
+ }
+
if (PageHighMem(pfn_to_page(pfn))) {
/* The buffer does not have a mapping. Map it in and copy */
unsigned int offset = orig_addr & ~PAGE_MASK;
return nr_slots(boundary_mask + 1);
}
-static unsigned int wrap_index(unsigned int index)
+static unsigned int wrap_index(struct io_tlb_mem *mem, unsigned int index)
{
- if (index >= io_tlb_nslabs)
+ if (index >= mem->nslabs)
return 0;
return index;
}
static int find_slots(struct device *dev, phys_addr_t orig_addr,
size_t alloc_size)
{
+ struct io_tlb_mem *mem = io_tlb_default_mem;
unsigned long boundary_mask = dma_get_seg_boundary(dev);
dma_addr_t tbl_dma_addr =
- phys_to_dma_unencrypted(dev, io_tlb_start) & boundary_mask;
+ phys_to_dma_unencrypted(dev, mem->start) & boundary_mask;
unsigned long max_slots = get_max_slots(boundary_mask);
unsigned int iotlb_align_mask =
dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
if (alloc_size >= PAGE_SIZE)
stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
- spin_lock_irqsave(&io_tlb_lock, flags);
- if (unlikely(nslots > io_tlb_nslabs - io_tlb_used))
+ spin_lock_irqsave(&mem->lock, flags);
+ if (unlikely(nslots > mem->nslabs - mem->used))
goto not_found;
- index = wrap = wrap_index(ALIGN(io_tlb_index, stride));
+ index = wrap = wrap_index(mem, ALIGN(mem->index, stride));
do {
if ((slot_addr(tbl_dma_addr, index) & iotlb_align_mask) !=
(orig_addr & iotlb_align_mask)) {
- index = wrap_index(index + 1);
+ index = wrap_index(mem, index + 1);
continue;
}
if (!iommu_is_span_boundary(index, nslots,
nr_slots(tbl_dma_addr),
max_slots)) {
- if (io_tlb_list[index] >= nslots)
+ if (mem->slots[index].list >= nslots)
goto found;
}
- index = wrap_index(index + stride);
+ index = wrap_index(mem, index + stride);
} while (index != wrap);
not_found:
- spin_unlock_irqrestore(&io_tlb_lock, flags);
+ spin_unlock_irqrestore(&mem->lock, flags);
return -1;
found:
for (i = index; i < index + nslots; i++)
- io_tlb_list[i] = 0;
+ mem->slots[i].list = 0;
for (i = index - 1;
io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
- io_tlb_list[i]; i--)
- io_tlb_list[i] = ++count;
+ mem->slots[i].list; i--)
+ mem->slots[i].list = ++count;
/*
* Update the indices to avoid searching in the next round.
*/
- if (index + nslots < io_tlb_nslabs)
- io_tlb_index = index + nslots;
+ if (index + nslots < mem->nslabs)
+ mem->index = index + nslots;
else
- io_tlb_index = 0;
- io_tlb_used += nslots;
+ mem->index = 0;
+ mem->used += nslots;
- spin_unlock_irqrestore(&io_tlb_lock, flags);
+ spin_unlock_irqrestore(&mem->lock, flags);
return index;
}
size_t mapping_size, size_t alloc_size,
enum dma_data_direction dir, unsigned long attrs)
{
+ struct io_tlb_mem *mem = io_tlb_default_mem;
unsigned int offset = swiotlb_align_offset(dev, orig_addr);
- unsigned int index, i;
+ unsigned int i;
+ int index;
phys_addr_t tlb_addr;
- if (no_iotlb_memory)
+ if (!mem)
panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
if (mem_encrypt_active())
if (!(attrs & DMA_ATTR_NO_WARN))
dev_warn_ratelimited(dev,
"swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
- alloc_size, io_tlb_nslabs, io_tlb_used);
+ alloc_size, mem->nslabs, mem->used);
return (phys_addr_t)DMA_MAPPING_ERROR;
}
* needed.
*/
for (i = 0; i < nr_slots(alloc_size + offset); i++) {
- io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i);
- io_tlb_orig_size[index+i] = alloc_size - (i << IO_TLB_SHIFT);
+ mem->slots[index + i].orig_addr = slot_addr(orig_addr, i);
+ mem->slots[index + i].alloc_size =
+ alloc_size - (i << IO_TLB_SHIFT);
}
- tlb_addr = slot_addr(io_tlb_start, index) + offset;
+ tlb_addr = slot_addr(mem->start, index) + offset;
if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
- swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
+ swiotlb_bounce(dev, tlb_addr, mapping_size, DMA_TO_DEVICE);
return tlb_addr;
}
-static void validate_sync_size_and_truncate(struct device *hwdev, size_t orig_size, size_t *size)
-{
- if (*size > orig_size) {
- /* Warn and truncate mapping_size */
- dev_WARN_ONCE(hwdev, 1,
- "Attempt for buffer overflow. Original size: %zu. Mapping size: %zu.\n",
- orig_size, *size);
- *size = orig_size;
- }
-}
-
/*
* tlb_addr is the physical address of the bounce buffer to unmap.
*/
void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
- size_t mapping_size, size_t alloc_size,
- enum dma_data_direction dir, unsigned long attrs)
+ size_t mapping_size, enum dma_data_direction dir,
+ unsigned long attrs)
{
+ struct io_tlb_mem *mem = io_tlb_default_mem;
unsigned long flags;
unsigned int offset = swiotlb_align_offset(hwdev, tlb_addr);
- int i, count, nslots = nr_slots(alloc_size + offset);
- int index = (tlb_addr - offset - io_tlb_start) >> IO_TLB_SHIFT;
- phys_addr_t orig_addr = io_tlb_orig_addr[index];
-
- validate_sync_size_and_truncate(hwdev, io_tlb_orig_size[index], &mapping_size);
+ int index = (tlb_addr - offset - mem->start) >> IO_TLB_SHIFT;
+ int nslots = nr_slots(mem->slots[index].alloc_size + offset);
+ int count, i;
/*
* First, sync the memory before unmapping the entry
*/
- if (orig_addr != INVALID_PHYS_ADDR &&
- !(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
- ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL)))
- swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_FROM_DEVICE);
+ if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
+ (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
+ swiotlb_bounce(hwdev, tlb_addr, mapping_size, DMA_FROM_DEVICE);
/*
* Return the buffer to the free list by setting the corresponding
* While returning the entries to the free list, we merge the entries
* with slots below and above the pool being returned.
*/
- spin_lock_irqsave(&io_tlb_lock, flags);
+ spin_lock_irqsave(&mem->lock, flags);
if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE))
- count = io_tlb_list[index + nslots];
+ count = mem->slots[index + nslots].list;
else
count = 0;
* superceeding slots
*/
for (i = index + nslots - 1; i >= index; i--) {
- io_tlb_list[i] = ++count;
- io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
- io_tlb_orig_size[i] = 0;
+ mem->slots[i].list = ++count;
+ mem->slots[i].orig_addr = INVALID_PHYS_ADDR;
+ mem->slots[i].alloc_size = 0;
}
/*
* available (non zero)
*/
for (i = index - 1;
- io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && io_tlb_list[i];
+ io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && mem->slots[i].list;
i--)
- io_tlb_list[i] = ++count;
- io_tlb_used -= nslots;
- spin_unlock_irqrestore(&io_tlb_lock, flags);
+ mem->slots[i].list = ++count;
+ mem->used -= nslots;
+ spin_unlock_irqrestore(&mem->lock, flags);
}
-void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
- size_t size, enum dma_data_direction dir,
- enum dma_sync_target target)
+void swiotlb_sync_single_for_device(struct device *dev, phys_addr_t tlb_addr,
+ size_t size, enum dma_data_direction dir)
{
- int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
- size_t orig_size = io_tlb_orig_size[index];
- phys_addr_t orig_addr = io_tlb_orig_addr[index];
-
- if (orig_addr == INVALID_PHYS_ADDR)
- return;
+ if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)
+ swiotlb_bounce(dev, tlb_addr, size, DMA_TO_DEVICE);
+ else
+ BUG_ON(dir != DMA_FROM_DEVICE);
+}
- validate_sync_size_and_truncate(hwdev, orig_size, &size);
-
- switch (target) {
- case SYNC_FOR_CPU:
- if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL))
- swiotlb_bounce(orig_addr, tlb_addr,
- size, DMA_FROM_DEVICE);
- else
- BUG_ON(dir != DMA_TO_DEVICE);
- break;
- case SYNC_FOR_DEVICE:
- if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
- swiotlb_bounce(orig_addr, tlb_addr,
- size, DMA_TO_DEVICE);
- else
- BUG_ON(dir != DMA_FROM_DEVICE);
- break;
- default:
- BUG();
- }
+void swiotlb_sync_single_for_cpu(struct device *dev, phys_addr_t tlb_addr,
+ size_t size, enum dma_data_direction dir)
+{
+ if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
+ swiotlb_bounce(dev, tlb_addr, size, DMA_FROM_DEVICE);
+ else
+ BUG_ON(dir != DMA_TO_DEVICE);
}
/*
/* Ensure that the address returned is DMA'ble */
dma_addr = phys_to_dma_unencrypted(dev, swiotlb_addr);
if (unlikely(!dma_capable(dev, dma_addr, size, true))) {
- swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, size, dir,
+ swiotlb_tbl_unmap_single(dev, swiotlb_addr, size, dir,
attrs | DMA_ATTR_SKIP_CPU_SYNC);
dev_WARN_ONCE(dev, 1,
"swiotlb addr %pad+%zu overflow (mask %llx, bus limit %llx).\n",
bool is_swiotlb_active(void)
{
- /*
- * When SWIOTLB is initialized, even if io_tlb_start points to physical
- * address zero, io_tlb_end surely doesn't.
- */
- return io_tlb_end != 0;
+ return io_tlb_default_mem != NULL;
}
+EXPORT_SYMBOL_GPL(is_swiotlb_active);
#ifdef CONFIG_DEBUG_FS
static int __init swiotlb_create_debugfs(void)
{
- struct dentry *root;
+ struct io_tlb_mem *mem = io_tlb_default_mem;
- root = debugfs_create_dir("swiotlb", NULL);
- debugfs_create_ulong("io_tlb_nslabs", 0400, root, &io_tlb_nslabs);
- debugfs_create_ulong("io_tlb_used", 0400, root, &io_tlb_used);
+ if (!mem)
+ return 0;
+ mem->debugfs = debugfs_create_dir("swiotlb", NULL);
+ debugfs_create_ulong("io_tlb_nslabs", 0400, mem->debugfs, &mem->nslabs);
+ debugfs_create_ulong("io_tlb_used", 0400, mem->debugfs, &mem->used);
return 0;
}
TASK_INTERRUPTIBLE, p);
}
+static bool is_effectively_child(struct wait_opts *wo, bool ptrace,
+ struct task_struct *target)
+{
+ struct task_struct *parent =
+ !ptrace ? target->real_parent : target->parent;
+
+ return current == parent || (!(wo->wo_flags & __WNOTHREAD) &&
+ same_thread_group(current, parent));
+}
+
+/*
+ * Optimization for waiting on PIDTYPE_PID. No need to iterate through child
+ * and tracee lists to find the target task.
+ */
+static int do_wait_pid(struct wait_opts *wo)
+{
+ bool ptrace;
+ struct task_struct *target;
+ int retval;
+
+ ptrace = false;
+ target = pid_task(wo->wo_pid, PIDTYPE_TGID);
+ if (target && is_effectively_child(wo, ptrace, target)) {
+ retval = wait_consider_task(wo, ptrace, target);
+ if (retval)
+ return retval;
+ }
+
+ ptrace = true;
+ target = pid_task(wo->wo_pid, PIDTYPE_PID);
+ if (target && target->ptrace &&
+ is_effectively_child(wo, ptrace, target)) {
+ retval = wait_consider_task(wo, ptrace, target);
+ if (retval)
+ return retval;
+ }
+
+ return 0;
+}
+
static long do_wait(struct wait_opts *wo)
{
- struct task_struct *tsk;
int retval;
trace_sched_process_wait(wo->wo_pid);
set_current_state(TASK_INTERRUPTIBLE);
read_lock(&tasklist_lock);
- tsk = current;
- do {
- retval = do_wait_thread(wo, tsk);
- if (retval)
- goto end;
- retval = ptrace_do_wait(wo, tsk);
+ if (wo->wo_type == PIDTYPE_PID) {
+ retval = do_wait_pid(wo);
if (retval)
goto end;
+ } else {
+ struct task_struct *tsk = current;
+
+ do {
+ retval = do_wait_thread(wo, tsk);
+ if (retval)
+ goto end;
- if (wo->wo_flags & __WNOTHREAD)
- break;
- } while_each_thread(current, tsk);
+ retval = ptrace_do_wait(wo, tsk);
+ if (retval)
+ goto end;
+
+ if (wo->wo_flags & __WNOTHREAD)
+ break;
+ } while_each_thread(current, tsk);
+ }
read_unlock(&tasklist_lock);
notask:
* invocations: in mmput() nobody alive left, in execve task is single
* threaded. sys_prctl(PR_SET_MM_MAP/EXE_FILE) also needs to set the
* mm->exe_file, but does so without using set_mm_exe_file() in order
- * to do avoid the need for any locks.
+ * to avoid the need for any locks.
*/
void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
{
static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
{
struct mm_struct *mm, *oldmm;
- int retval;
tsk->min_flt = tsk->maj_flt = 0;
tsk->nvcsw = tsk->nivcsw = 0;
if (clone_flags & CLONE_VM) {
mmget(oldmm);
mm = oldmm;
- goto good_mm;
+ } else {
+ mm = dup_mm(tsk, current->mm);
+ if (!mm)
+ return -ENOMEM;
}
- retval = -ENOMEM;
- mm = dup_mm(tsk, current->mm);
- if (!mm)
- goto fail_nomem;
-
-good_mm:
tsk->mm = mm;
tsk->active_mm = mm;
return 0;
-
-fail_nomem:
- return retval;
}
static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
* /proc/<pid>/status where Pid and NSpid are always shown relative to
* the pid namespace of the procfs instance. The difference becomes
* obvious when sending around a pidfd between pid namespaces from a
- * different branch of the tree, i.e. where no ancestoral relation is
+ * different branch of the tree, i.e. where no ancestral relation is
* present between the pid namespaces:
* - create two new pid namespaces ns1 and ns2 in the initial pid
* namespace (also take care to create new mount namespaces in the
return false;
/*
- * - make the CLONE_DETACHED bit reuseable for clone3
- * - make the CSIGNAL bits reuseable for clone3
+ * - make the CLONE_DETACHED bit reusable for clone3
+ * - make the CSIGNAL bits reusable for clone3
*/
if (kargs->flags & (CLONE_DETACHED | CSIGNAL))
return false;
config GCOV_KERNEL
bool "Enable gcov-based kernel profiling"
depends on DEBUG_FS
+ depends on !CC_IS_CLANG || CLANG_VERSION >= 110000
select CONSTRUCTORS
default n
help
mutex_unlock(&gcov_lock);
}
+/**
+ * store_gcov_u32 - store 32 bit number in gcov format to buffer
+ * @buffer: target buffer or NULL
+ * @off: offset into the buffer
+ * @v: value to be stored
+ *
+ * Number format defined by gcc: numbers are recorded in the 32 bit
+ * unsigned binary form of the endianness of the machine generating the
+ * file. Returns the number of bytes stored. If @buffer is %NULL, doesn't
+ * store anything.
+ */
+size_t store_gcov_u32(void *buffer, size_t off, u32 v)
+{
+ u32 *data;
+
+ if (buffer) {
+ data = buffer + off;
+ *data = v;
+ }
+
+ return sizeof(*data);
+}
+
+/**
+ * store_gcov_u64 - store 64 bit number in gcov format to buffer
+ * @buffer: target buffer or NULL
+ * @off: offset into the buffer
+ * @v: value to be stored
+ *
+ * Number format defined by gcc: numbers are recorded in the 32 bit
+ * unsigned binary form of the endianness of the machine generating the
+ * file. 64 bit numbers are stored as two 32 bit numbers, the low part
+ * first. Returns the number of bytes stored. If @buffer is %NULL, doesn't store
+ * anything.
+ */
+size_t store_gcov_u64(void *buffer, size_t off, u64 v)
+{
+ u32 *data;
+
+ if (buffer) {
+ data = buffer + off;
+
+ data[0] = (v & 0xffffffffUL);
+ data[1] = (v >> 32);
+ }
+
+ return sizeof(*data) * 2;
+}
+
#ifdef CONFIG_MODULES
/* Update list and generate events when modules are unloaded. */
static int gcov_module_notifier(struct notifier_block *nb, unsigned long event,
#include <linux/list.h>
#include <linux/printk.h>
#include <linux/ratelimit.h>
-#include <linux/seq_file.h>
#include <linux/slab.h>
-#include <linux/vmalloc.h>
+#include <linux/mm.h>
#include "gcov.h"
typedef void (*llvm_gcov_callback)(void);
u32 ident;
u32 checksum;
-#if CONFIG_CLANG_VERSION < 110000
- u8 use_extra_checksum;
-#endif
u32 cfg_checksum;
u32 num_counters;
u64 *counters;
-#if CONFIG_CLANG_VERSION < 110000
- const char *function_name;
-#endif
};
static struct gcov_info *current_info;
}
EXPORT_SYMBOL(llvm_gcov_init);
-#if CONFIG_CLANG_VERSION < 110000
-void llvm_gcda_start_file(const char *orig_filename, const char version[4],
- u32 checksum)
-{
- current_info->filename = orig_filename;
- memcpy(¤t_info->version, version, sizeof(current_info->version));
- current_info->checksum = checksum;
-}
-EXPORT_SYMBOL(llvm_gcda_start_file);
-#else
void llvm_gcda_start_file(const char *orig_filename, u32 version, u32 checksum)
{
current_info->filename = orig_filename;
current_info->checksum = checksum;
}
EXPORT_SYMBOL(llvm_gcda_start_file);
-#endif
-
-#if CONFIG_CLANG_VERSION < 110000
-void llvm_gcda_emit_function(u32 ident, const char *function_name,
- u32 func_checksum, u8 use_extra_checksum, u32 cfg_checksum)
-{
- struct gcov_fn_info *info = kzalloc(sizeof(*info), GFP_KERNEL);
-
- if (!info)
- return;
-
- INIT_LIST_HEAD(&info->head);
- info->ident = ident;
- info->checksum = func_checksum;
- info->use_extra_checksum = use_extra_checksum;
- info->cfg_checksum = cfg_checksum;
- if (function_name)
- info->function_name = kstrdup(function_name, GFP_KERNEL);
- list_add_tail(&info->head, ¤t_info->functions);
-}
-#else
void llvm_gcda_emit_function(u32 ident, u32 func_checksum, u32 cfg_checksum)
{
struct gcov_fn_info *info = kzalloc(sizeof(*info), GFP_KERNEL);
info->cfg_checksum = cfg_checksum;
list_add_tail(&info->head, ¤t_info->functions);
}
-#endif
EXPORT_SYMBOL(llvm_gcda_emit_function);
void llvm_gcda_emit_arcs(u32 num_counters, u64 *counters)
!list_is_last(&fn_ptr2->head, &info2->functions)) {
if (fn_ptr1->checksum != fn_ptr2->checksum)
return false;
-#if CONFIG_CLANG_VERSION < 110000
- if (fn_ptr1->use_extra_checksum != fn_ptr2->use_extra_checksum)
- return false;
- if (fn_ptr1->use_extra_checksum &&
- fn_ptr1->cfg_checksum != fn_ptr2->cfg_checksum)
- return false;
-#else
if (fn_ptr1->cfg_checksum != fn_ptr2->cfg_checksum)
return false;
-#endif
fn_ptr1 = list_next_entry(fn_ptr1, head);
fn_ptr2 = list_next_entry(fn_ptr2, head);
}
}
}
-#if CONFIG_CLANG_VERSION < 110000
-static struct gcov_fn_info *gcov_fn_info_dup(struct gcov_fn_info *fn)
-{
- size_t cv_size; /* counter values size */
- struct gcov_fn_info *fn_dup = kmemdup(fn, sizeof(*fn),
- GFP_KERNEL);
- if (!fn_dup)
- return NULL;
- INIT_LIST_HEAD(&fn_dup->head);
-
- fn_dup->function_name = kstrdup(fn->function_name, GFP_KERNEL);
- if (!fn_dup->function_name)
- goto err_name;
-
- cv_size = fn->num_counters * sizeof(fn->counters[0]);
- fn_dup->counters = vmalloc(cv_size);
- if (!fn_dup->counters)
- goto err_counters;
- memcpy(fn_dup->counters, fn->counters, cv_size);
-
- return fn_dup;
-
-err_counters:
- kfree(fn_dup->function_name);
-err_name:
- kfree(fn_dup);
- return NULL;
-}
-#else
static struct gcov_fn_info *gcov_fn_info_dup(struct gcov_fn_info *fn)
{
size_t cv_size; /* counter values size */
INIT_LIST_HEAD(&fn_dup->head);
cv_size = fn->num_counters * sizeof(fn->counters[0]);
- fn_dup->counters = vmalloc(cv_size);
+ fn_dup->counters = kvmalloc(cv_size, GFP_KERNEL);
if (!fn_dup->counters) {
kfree(fn_dup);
return NULL;
return fn_dup;
}
-#endif
/**
* gcov_info_dup - duplicate profiling data set
* gcov_info_free - release memory for profiling data set duplicate
* @info: profiling data set duplicate to free
*/
-#if CONFIG_CLANG_VERSION < 110000
-void gcov_info_free(struct gcov_info *info)
-{
- struct gcov_fn_info *fn, *tmp;
-
- list_for_each_entry_safe(fn, tmp, &info->functions, head) {
- kfree(fn->function_name);
- vfree(fn->counters);
- list_del(&fn->head);
- kfree(fn);
- }
- kfree(info->filename);
- kfree(info);
-}
-#else
void gcov_info_free(struct gcov_info *info)
{
struct gcov_fn_info *fn, *tmp;
list_for_each_entry_safe(fn, tmp, &info->functions, head) {
- vfree(fn->counters);
+ kvfree(fn->counters);
list_del(&fn->head);
kfree(fn);
}
kfree(info->filename);
kfree(info);
}
-#endif
-
-#define ITER_STRIDE PAGE_SIZE
-
-/**
- * struct gcov_iterator - specifies current file position in logical records
- * @info: associated profiling data
- * @buffer: buffer containing file data
- * @size: size of buffer
- * @pos: current position in file
- */
-struct gcov_iterator {
- struct gcov_info *info;
- void *buffer;
- size_t size;
- loff_t pos;
-};
-
-/**
- * store_gcov_u32 - store 32 bit number in gcov format to buffer
- * @buffer: target buffer or NULL
- * @off: offset into the buffer
- * @v: value to be stored
- *
- * Number format defined by gcc: numbers are recorded in the 32 bit
- * unsigned binary form of the endianness of the machine generating the
- * file. Returns the number of bytes stored. If @buffer is %NULL, doesn't
- * store anything.
- */
-static size_t store_gcov_u32(void *buffer, size_t off, u32 v)
-{
- u32 *data;
-
- if (buffer) {
- data = buffer + off;
- *data = v;
- }
-
- return sizeof(*data);
-}
-
-/**
- * store_gcov_u64 - store 64 bit number in gcov format to buffer
- * @buffer: target buffer or NULL
- * @off: offset into the buffer
- * @v: value to be stored
- *
- * Number format defined by gcc: numbers are recorded in the 32 bit
- * unsigned binary form of the endianness of the machine generating the
- * file. 64 bit numbers are stored as two 32 bit numbers, the low part
- * first. Returns the number of bytes stored. If @buffer is %NULL, doesn't store
- * anything.
- */
-static size_t store_gcov_u64(void *buffer, size_t off, u64 v)
-{
- u32 *data;
-
- if (buffer) {
- data = buffer + off;
-
- data[0] = (v & 0xffffffffUL);
- data[1] = (v >> 32);
- }
-
- return sizeof(*data) * 2;
-}
/**
* convert_to_gcda - convert profiling data set to gcda file format
*
* Returns the number of bytes that were/would have been stored into the buffer.
*/
-static size_t convert_to_gcda(char *buffer, struct gcov_info *info)
+size_t convert_to_gcda(char *buffer, struct gcov_info *info)
{
struct gcov_fn_info *fi_ptr;
size_t pos = 0;
u32 i;
pos += store_gcov_u32(buffer, pos, GCOV_TAG_FUNCTION);
-#if CONFIG_CLANG_VERSION < 110000
- pos += store_gcov_u32(buffer, pos,
- fi_ptr->use_extra_checksum ? 3 : 2);
-#else
pos += store_gcov_u32(buffer, pos, 3);
-#endif
pos += store_gcov_u32(buffer, pos, fi_ptr->ident);
pos += store_gcov_u32(buffer, pos, fi_ptr->checksum);
-#if CONFIG_CLANG_VERSION < 110000
- if (fi_ptr->use_extra_checksum)
- pos += store_gcov_u32(buffer, pos, fi_ptr->cfg_checksum);
-#else
pos += store_gcov_u32(buffer, pos, fi_ptr->cfg_checksum);
-#endif
-
pos += store_gcov_u32(buffer, pos, GCOV_TAG_COUNTER_BASE);
pos += store_gcov_u32(buffer, pos, fi_ptr->num_counters * 2);
for (i = 0; i < fi_ptr->num_counters; i++)
return pos;
}
-
-/**
- * gcov_iter_new - allocate and initialize profiling data iterator
- * @info: profiling data set to be iterated
- *
- * Return file iterator on success, %NULL otherwise.
- */
-struct gcov_iterator *gcov_iter_new(struct gcov_info *info)
-{
- struct gcov_iterator *iter;
-
- iter = kzalloc(sizeof(struct gcov_iterator), GFP_KERNEL);
- if (!iter)
- goto err_free;
-
- iter->info = info;
- /* Dry-run to get the actual buffer size. */
- iter->size = convert_to_gcda(NULL, info);
- iter->buffer = vmalloc(iter->size);
- if (!iter->buffer)
- goto err_free;
-
- convert_to_gcda(iter->buffer, info);
-
- return iter;
-
-err_free:
- kfree(iter);
- return NULL;
-}
-
-
-/**
- * gcov_iter_get_info - return profiling data set for given file iterator
- * @iter: file iterator
- */
-void gcov_iter_free(struct gcov_iterator *iter)
-{
- vfree(iter->buffer);
- kfree(iter);
-}
-
-/**
- * gcov_iter_get_info - return profiling data set for given file iterator
- * @iter: file iterator
- */
-struct gcov_info *gcov_iter_get_info(struct gcov_iterator *iter)
-{
- return iter->info;
-}
-
-/**
- * gcov_iter_start - reset file iterator to starting position
- * @iter: file iterator
- */
-void gcov_iter_start(struct gcov_iterator *iter)
-{
- iter->pos = 0;
-}
-
-/**
- * gcov_iter_next - advance file iterator to next logical record
- * @iter: file iterator
- *
- * Return zero if new position is valid, non-zero if iterator has reached end.
- */
-int gcov_iter_next(struct gcov_iterator *iter)
-{
- if (iter->pos < iter->size)
- iter->pos += ITER_STRIDE;
-
- if (iter->pos >= iter->size)
- return -EINVAL;
-
- return 0;
-}
-
-/**
- * gcov_iter_write - write data for current pos to seq_file
- * @iter: file iterator
- * @seq: seq_file handle
- *
- * Return zero on success, non-zero otherwise.
- */
-int gcov_iter_write(struct gcov_iterator *iter, struct seq_file *seq)
-{
- size_t len;
-
- if (iter->pos >= iter->size)
- return -EINVAL;
-
- len = ITER_STRIDE;
- if (iter->pos + len > iter->size)
- len = iter->size - iter->pos;
-
- seq_write(seq, iter->buffer + iter->pos, len);
-
- return 0;
-}
#include <linux/slab.h>
#include <linux/mutex.h>
#include <linux/seq_file.h>
+#include <linux/mm.h>
#include "gcov.h"
/**
}
__setup("gcov_persist=", gcov_persist_setup);
+#define ITER_STRIDE PAGE_SIZE
+
+/**
+ * struct gcov_iterator - specifies current file position in logical records
+ * @info: associated profiling data
+ * @buffer: buffer containing file data
+ * @size: size of buffer
+ * @pos: current position in file
+ */
+struct gcov_iterator {
+ struct gcov_info *info;
+ size_t size;
+ loff_t pos;
+ char buffer[];
+};
+
+/**
+ * gcov_iter_new - allocate and initialize profiling data iterator
+ * @info: profiling data set to be iterated
+ *
+ * Return file iterator on success, %NULL otherwise.
+ */
+static struct gcov_iterator *gcov_iter_new(struct gcov_info *info)
+{
+ struct gcov_iterator *iter;
+ size_t size;
+
+ /* Dry-run to get the actual buffer size. */
+ size = convert_to_gcda(NULL, info);
+
+ iter = kvmalloc(struct_size(iter, buffer, size), GFP_KERNEL);
+ if (!iter)
+ return NULL;
+
+ iter->info = info;
+ iter->size = size;
+ convert_to_gcda(iter->buffer, info);
+
+ return iter;
+}
+
+
+/**
+ * gcov_iter_free - free iterator data
+ * @iter: file iterator
+ */
+static void gcov_iter_free(struct gcov_iterator *iter)
+{
+ kvfree(iter);
+}
+
+/**
+ * gcov_iter_get_info - return profiling data set for given file iterator
+ * @iter: file iterator
+ */
+static struct gcov_info *gcov_iter_get_info(struct gcov_iterator *iter)
+{
+ return iter->info;
+}
+
+/**
+ * gcov_iter_start - reset file iterator to starting position
+ * @iter: file iterator
+ */
+static void gcov_iter_start(struct gcov_iterator *iter)
+{
+ iter->pos = 0;
+}
+
+/**
+ * gcov_iter_next - advance file iterator to next logical record
+ * @iter: file iterator
+ *
+ * Return zero if new position is valid, non-zero if iterator has reached end.
+ */
+static int gcov_iter_next(struct gcov_iterator *iter)
+{
+ if (iter->pos < iter->size)
+ iter->pos += ITER_STRIDE;
+
+ if (iter->pos >= iter->size)
+ return -EINVAL;
+
+ return 0;
+}
+
+/**
+ * gcov_iter_write - write data for current pos to seq_file
+ * @iter: file iterator
+ * @seq: seq_file handle
+ *
+ * Return zero on success, non-zero otherwise.
+ */
+static int gcov_iter_write(struct gcov_iterator *iter, struct seq_file *seq)
+{
+ size_t len;
+
+ if (iter->pos >= iter->size)
+ return -EINVAL;
+
+ len = ITER_STRIDE;
+ if (iter->pos + len > iter->size)
+ len = iter->size - iter->pos;
+
+ seq_write(seq, iter->buffer + iter->pos, len);
+
+ return 0;
+}
+
/*
* seq_file.start() implementation for gcov data files. Note that the
* gcov_iterator interface is designed to be more restrictive than seq_file
#include <linux/errno.h>
#include <linux/slab.h>
#include <linux/string.h>
-#include <linux/seq_file.h>
-#include <linux/vmalloc.h>
+#include <linux/mm.h>
#include "gcov.h"
#if (__GNUC__ >= 10)
cv_size = sizeof(gcov_type) * sci_ptr->num;
- dci_ptr->values = vmalloc(cv_size);
+ dci_ptr->values = kvmalloc(cv_size, GFP_KERNEL);
if (!dci_ptr->values)
goto err_free;
ci_ptr = info->functions[fi_idx]->ctrs;
for (ct_idx = 0; ct_idx < active; ct_idx++, ci_ptr++)
- vfree(ci_ptr->values);
+ kvfree(ci_ptr->values);
kfree(info->functions[fi_idx]);
}
kfree(info);
}
-#define ITER_STRIDE PAGE_SIZE
-
-/**
- * struct gcov_iterator - specifies current file position in logical records
- * @info: associated profiling data
- * @buffer: buffer containing file data
- * @size: size of buffer
- * @pos: current position in file
- */
-struct gcov_iterator {
- struct gcov_info *info;
- void *buffer;
- size_t size;
- loff_t pos;
-};
-
-/**
- * store_gcov_u32 - store 32 bit number in gcov format to buffer
- * @buffer: target buffer or NULL
- * @off: offset into the buffer
- * @v: value to be stored
- *
- * Number format defined by gcc: numbers are recorded in the 32 bit
- * unsigned binary form of the endianness of the machine generating the
- * file. Returns the number of bytes stored. If @buffer is %NULL, doesn't
- * store anything.
- */
-static size_t store_gcov_u32(void *buffer, size_t off, u32 v)
-{
- u32 *data;
-
- if (buffer) {
- data = buffer + off;
- *data = v;
- }
-
- return sizeof(*data);
-}
-
-/**
- * store_gcov_u64 - store 64 bit number in gcov format to buffer
- * @buffer: target buffer or NULL
- * @off: offset into the buffer
- * @v: value to be stored
- *
- * Number format defined by gcc: numbers are recorded in the 32 bit
- * unsigned binary form of the endianness of the machine generating the
- * file. 64 bit numbers are stored as two 32 bit numbers, the low part
- * first. Returns the number of bytes stored. If @buffer is %NULL, doesn't store
- * anything.
- */
-static size_t store_gcov_u64(void *buffer, size_t off, u64 v)
-{
- u32 *data;
-
- if (buffer) {
- data = buffer + off;
-
- data[0] = (v & 0xffffffffUL);
- data[1] = (v >> 32);
- }
-
- return sizeof(*data) * 2;
-}
-
/**
* convert_to_gcda - convert profiling data set to gcda file format
* @buffer: the buffer to store file data or %NULL if no data should be stored
*
* Returns the number of bytes that were/would have been stored into the buffer.
*/
-static size_t convert_to_gcda(char *buffer, struct gcov_info *info)
+size_t convert_to_gcda(char *buffer, struct gcov_info *info)
{
struct gcov_fn_info *fi_ptr;
struct gcov_ctr_info *ci_ptr;
return pos;
}
-
-/**
- * gcov_iter_new - allocate and initialize profiling data iterator
- * @info: profiling data set to be iterated
- *
- * Return file iterator on success, %NULL otherwise.
- */
-struct gcov_iterator *gcov_iter_new(struct gcov_info *info)
-{
- struct gcov_iterator *iter;
-
- iter = kzalloc(sizeof(struct gcov_iterator), GFP_KERNEL);
- if (!iter)
- goto err_free;
-
- iter->info = info;
- /* Dry-run to get the actual buffer size. */
- iter->size = convert_to_gcda(NULL, info);
- iter->buffer = vmalloc(iter->size);
- if (!iter->buffer)
- goto err_free;
-
- convert_to_gcda(iter->buffer, info);
-
- return iter;
-
-err_free:
- kfree(iter);
- return NULL;
-}
-
-
-/**
- * gcov_iter_get_info - return profiling data set for given file iterator
- * @iter: file iterator
- */
-void gcov_iter_free(struct gcov_iterator *iter)
-{
- vfree(iter->buffer);
- kfree(iter);
-}
-
-/**
- * gcov_iter_get_info - return profiling data set for given file iterator
- * @iter: file iterator
- */
-struct gcov_info *gcov_iter_get_info(struct gcov_iterator *iter)
-{
- return iter->info;
-}
-
-/**
- * gcov_iter_start - reset file iterator to starting position
- * @iter: file iterator
- */
-void gcov_iter_start(struct gcov_iterator *iter)
-{
- iter->pos = 0;
-}
-
-/**
- * gcov_iter_next - advance file iterator to next logical record
- * @iter: file iterator
- *
- * Return zero if new position is valid, non-zero if iterator has reached end.
- */
-int gcov_iter_next(struct gcov_iterator *iter)
-{
- if (iter->pos < iter->size)
- iter->pos += ITER_STRIDE;
-
- if (iter->pos >= iter->size)
- return -EINVAL;
-
- return 0;
-}
-
-/**
- * gcov_iter_write - write data for current pos to seq_file
- * @iter: file iterator
- * @seq: seq_file handle
- *
- * Return zero on success, non-zero otherwise.
- */
-int gcov_iter_write(struct gcov_iterator *iter, struct seq_file *seq)
-{
- size_t len;
-
- if (iter->pos >= iter->size)
- return -EINVAL;
-
- len = ITER_STRIDE;
- if (iter->pos + len > iter->size)
- len = iter->size - iter->pos;
-
- seq_write(seq, iter->buffer + iter->pos, len);
-
- return 0;
-}
void gcov_info_link(struct gcov_info *info);
void gcov_info_unlink(struct gcov_info *prev, struct gcov_info *info);
bool gcov_info_within_module(struct gcov_info *info, struct module *mod);
+size_t convert_to_gcda(char *buffer, struct gcov_info *info);
/* Base interface. */
enum gcov_action {
void gcov_event(enum gcov_action action, struct gcov_info *info);
void gcov_enable_events(void);
-/* Iterator control. */
-struct seq_file;
-struct gcov_iterator;
-
-struct gcov_iterator *gcov_iter_new(struct gcov_info *info);
-void gcov_iter_free(struct gcov_iterator *iter);
-void gcov_iter_start(struct gcov_iterator *iter);
-int gcov_iter_next(struct gcov_iterator *iter);
-int gcov_iter_write(struct gcov_iterator *iter, struct seq_file *seq);
-struct gcov_info *gcov_iter_get_info(struct gcov_iterator *iter);
+/* writing helpers */
+size_t store_gcov_u32(void *buffer, size_t off, u32 v);
+size_t store_gcov_u64(void *buffer, size_t off, u64 v);
/* gcov_info control. */
void gcov_info_reset(struct gcov_info *info);
EXPORT_SYMBOL_GPL(irq_domain_update_bus_token);
/**
- * irq_domain_add_simple() - Register an irq_domain and optionally map a range of irqs
- * @of_node: pointer to interrupt controller's device tree node.
+ * irq_domain_create_simple() - Register an irq_domain and optionally map a range of irqs
+ * @fwnode: firmware node for the interrupt controller
* @size: total number of irqs in mapping
* @first_irq: first number of irq block assigned to the domain,
* pass zero to assign irqs on-the-fly. If first_irq is non-zero, then
* irqs get mapped dynamically on the fly. However, if the controller requires
* static virq assignments (non-DT boot) then it will set that up correctly.
*/
-struct irq_domain *irq_domain_add_simple(struct device_node *of_node,
- unsigned int size,
- unsigned int first_irq,
- const struct irq_domain_ops *ops,
- void *host_data)
+struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode,
+ unsigned int size,
+ unsigned int first_irq,
+ const struct irq_domain_ops *ops,
+ void *host_data)
{
struct irq_domain *domain;
- domain = __irq_domain_add(of_node_to_fwnode(of_node), size, size, 0, ops, host_data);
+ domain = __irq_domain_add(fwnode, size, size, 0, ops, host_data);
if (!domain)
return NULL;
if (IS_ENABLED(CONFIG_SPARSE_IRQ)) {
/* attempt to allocated irq_descs */
int rc = irq_alloc_descs(first_irq, first_irq, size,
- of_node_to_nid(of_node));
+ of_node_to_nid(to_of_node(fwnode)));
if (rc < 0)
pr_info("Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n",
first_irq);
return domain;
}
-EXPORT_SYMBOL_GPL(irq_domain_add_simple);
+EXPORT_SYMBOL_GPL(irq_domain_create_simple);
/**
* irq_domain_add_legacy() - Allocate and register a legacy revmap irq_domain.
#include <linux/compiler.h>
#include <linux/hugetlb.h>
#include <linux/objtool.h>
+#include <linux/kmsg_dump.h>
#include <asm/page.h>
#include <asm/sections.h>
#endif
{
kexec_in_progress = true;
- kernel_restart_prepare(NULL);
+ kernel_restart_prepare("kexec reboot");
migrate_to_reboot_cpu();
/*
machine_shutdown();
}
+ kmsg_dump(KMSG_DUMP_SHUTDOWN);
machine_kexec(kexec_image);
#ifdef CONFIG_KEXEC_JUMP
sha_region_sz = KEXEC_SEGMENT_MAX * sizeof(struct kexec_sha_region);
sha_regions = vzalloc(sha_region_sz);
- if (!sha_regions)
+ if (!sha_regions) {
+ ret = -ENOMEM;
goto out_free_desc;
+ }
desc->tfm = tfm;
/*
modprobe_path is set via /proc/sys.
*/
-char modprobe_path[KMOD_PATH_LEN] = "/sbin/modprobe";
+char modprobe_path[KMOD_PATH_LEN] = CONFIG_MODPROBE_PATH;
static void free_modprobe_argv(struct subprocess_info *info)
{
static struct resource *bootmem_resource_free;
static DEFINE_SPINLOCK(bootmem_resource_lock);
-static struct resource *next_resource(struct resource *p, bool sibling_only)
+static struct resource *next_resource(struct resource *p)
{
- /* Caller wants to traverse through siblings only */
- if (sibling_only)
- return p->sibling;
-
if (p->child)
return p->child;
while (!p->sibling && p->parent)
{
struct resource *p = v;
(*pos)++;
- return (void *)next_resource(p, false);
+ return (void *)next_resource(p);
}
#ifdef CONFIG_PROC_FS
* of the resource that's within [@start..@end]; if none is found, returns
* -ENODEV. Returns -EINVAL for invalid parameters.
*
- * This function walks the whole tree and not just first level children
- * unless @first_lvl is true.
- *
* @start: start address of the resource searched for
* @end: end address of same resource
* @flags: flags which the resource must have
* @desc: descriptor the resource must have
- * @first_lvl: walk only the first level children, if set
* @res: return ptr, if resource found
*
* The caller must specify @start, @end, @flags, and @desc
*/
static int find_next_iomem_res(resource_size_t start, resource_size_t end,
unsigned long flags, unsigned long desc,
- bool first_lvl, struct resource *res)
+ struct resource *res)
{
- bool siblings_only = true;
struct resource *p;
if (!res)
read_lock(&resource_lock);
- for (p = iomem_resource.child; p; p = next_resource(p, siblings_only)) {
+ for (p = iomem_resource.child; p; p = next_resource(p)) {
/* If we passed the resource we are looking for, stop */
if (p->start > end) {
p = NULL;
if (p->end < start)
continue;
- /*
- * Now that we found a range that matches what we look for,
- * check the flags and the descriptor. If we were not asked to
- * use only the first level, start looking at children as well.
- */
- siblings_only = first_lvl;
-
if ((p->flags & flags) != flags)
continue;
if ((desc != IORES_DESC_NONE) && (desc != p->desc))
static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end,
unsigned long flags, unsigned long desc,
- bool first_lvl, void *arg,
+ void *arg,
int (*func)(struct resource *, void *))
{
struct resource res;
int ret = -EINVAL;
while (start < end &&
- !find_next_iomem_res(start, end, flags, desc, first_lvl, &res)) {
+ !find_next_iomem_res(start, end, flags, desc, &res)) {
ret = (*func)(&res, arg);
if (ret)
break;
* @arg: function argument for the callback @func
* @func: callback function that is called for each qualifying resource area
*
- * This walks through whole tree and not just first level children.
* All the memory ranges which overlap start,end and also match flags and
* desc are valid candidates.
*
int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start,
u64 end, void *arg, int (*func)(struct resource *, void *))
{
- return __walk_iomem_res_desc(start, end, flags, desc, false, arg, func);
+ return __walk_iomem_res_desc(start, end, flags, desc, arg, func);
}
EXPORT_SYMBOL_GPL(walk_iomem_res_desc);
{
unsigned long flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
- return __walk_iomem_res_desc(start, end, flags, IORES_DESC_NONE, true,
- arg, func);
+ return __walk_iomem_res_desc(start, end, flags, IORES_DESC_NONE, arg,
+ func);
}
/*
{
unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY;
- return __walk_iomem_res_desc(start, end, flags, IORES_DESC_NONE, true,
- arg, func);
+ return __walk_iomem_res_desc(start, end, flags, IORES_DESC_NONE, arg,
+ func);
}
/*
* This function calls the @func callback against all memory ranges of type
* System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY.
* It is to be used only for System RAM.
- *
- * This will find System RAM ranges that are children of top-level resources
- * in addition to top-level System RAM resources.
*/
int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages,
void *arg, int (*func)(unsigned long, unsigned long, void *))
end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1;
flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
while (start < end &&
- !find_next_iomem_res(start, end, flags, IORES_DESC_NONE,
- false, &res)) {
+ !find_next_iomem_res(start, end, flags, IORES_DESC_NONE, &res)) {
pfn = PFN_UP(res.start);
end_pfn = PFN_DOWN(res.end + 1);
if (end_pfn > pfn)
}
EXPORT_SYMBOL_GPL(page_is_ram);
+static int __region_intersects(resource_size_t start, size_t size,
+ unsigned long flags, unsigned long desc)
+{
+ struct resource res;
+ int type = 0; int other = 0;
+ struct resource *p;
+
+ res.start = start;
+ res.end = start + size - 1;
+
+ for (p = iomem_resource.child; p ; p = p->sibling) {
+ bool is_type = (((p->flags & flags) == flags) &&
+ ((desc == IORES_DESC_NONE) ||
+ (desc == p->desc)));
+
+ if (resource_overlaps(p, &res))
+ is_type ? type++ : other++;
+ }
+
+ if (type == 0)
+ return REGION_DISJOINT;
+
+ if (other == 0)
+ return REGION_INTERSECTS;
+
+ return REGION_MIXED;
+}
+
/**
* region_intersects() - determine intersection of region with known resources
* @start: region start address
int region_intersects(resource_size_t start, size_t size, unsigned long flags,
unsigned long desc)
{
- struct resource res;
- int type = 0; int other = 0;
- struct resource *p;
-
- res.start = start;
- res.end = start + size - 1;
+ int ret;
read_lock(&resource_lock);
- for (p = iomem_resource.child; p ; p = p->sibling) {
- bool is_type = (((p->flags & flags) == flags) &&
- ((desc == IORES_DESC_NONE) ||
- (desc == p->desc)));
-
- if (resource_overlaps(p, &res))
- is_type ? type++ : other++;
- }
+ ret = __region_intersects(start, size, flags, desc);
read_unlock(&resource_lock);
- if (type == 0)
- return REGION_DISJOINT;
-
- if (other == 0)
- return REGION_INTERSECTS;
-
- return REGION_MIXED;
+ return ret;
}
EXPORT_SYMBOL_GPL(region_intersects);
return smp_load_acquire(&iomem_inode)->i_mapping;
}
-/**
- * __request_region - create a new busy resource region
- * @parent: parent resource descriptor
- * @start: resource start address
- * @n: resource region size
- * @name: reserving caller's ID string
- * @flags: IO resource flags
- */
-struct resource * __request_region(struct resource *parent,
+static int __request_region_locked(struct resource *res, struct resource *parent,
resource_size_t start, resource_size_t n,
const char *name, int flags)
{
DECLARE_WAITQUEUE(wait, current);
- struct resource *res = alloc_resource(GFP_KERNEL);
- struct resource *orig_parent = parent;
-
- if (!res)
- return NULL;
res->name = name;
res->start = start;
res->end = start + n - 1;
- write_lock(&resource_lock);
-
for (;;) {
struct resource *conflict;
continue;
}
/* Uhhuh, that didn't work out.. */
- free_resource(res);
- res = NULL;
- break;
+ return -EBUSY;
}
+
+ return 0;
+}
+
+/**
+ * __request_region - create a new busy resource region
+ * @parent: parent resource descriptor
+ * @start: resource start address
+ * @n: resource region size
+ * @name: reserving caller's ID string
+ * @flags: IO resource flags
+ */
+struct resource *__request_region(struct resource *parent,
+ resource_size_t start, resource_size_t n,
+ const char *name, int flags)
+{
+ struct resource *res = alloc_resource(GFP_KERNEL);
+ int ret;
+
+ if (!res)
+ return NULL;
+
+ write_lock(&resource_lock);
+ ret = __request_region_locked(res, parent, start, n, name, flags);
write_unlock(&resource_lock);
- if (res && orig_parent == &iomem_resource)
+ if (ret) {
+ free_resource(res);
+ return NULL;
+ }
+
+ if (parent == &iomem_resource)
revoke_iomem(res);
return res;
{
resource_size_t end, addr;
struct resource *res;
+ struct region_devres *dr = NULL;
size = ALIGN(size, 1UL << PA_SECTION_SHIFT);
end = min_t(unsigned long, base->end, (1UL << MAX_PHYSMEM_BITS) - 1);
addr = end - size + 1UL;
+ res = alloc_resource(GFP_KERNEL);
+ if (!res)
+ return ERR_PTR(-ENOMEM);
+
+ if (dev) {
+ dr = devres_alloc(devm_region_release,
+ sizeof(struct region_devres), GFP_KERNEL);
+ if (!dr) {
+ free_resource(res);
+ return ERR_PTR(-ENOMEM);
+ }
+ }
+
+ write_lock(&resource_lock);
for (; addr > size && addr >= base->start; addr -= size) {
- if (region_intersects(addr, size, 0, IORES_DESC_NONE) !=
+ if (__region_intersects(addr, size, 0, IORES_DESC_NONE) !=
REGION_DISJOINT)
continue;
- if (dev)
- res = devm_request_mem_region(dev, addr, size, name);
- else
- res = request_mem_region(addr, size, name);
- if (!res)
- return ERR_PTR(-ENOMEM);
+ if (!__request_region_locked(res, &iomem_resource, addr, size,
+ name, 0))
+ break;
+
+ if (dev) {
+ dr->parent = &iomem_resource;
+ dr->start = addr;
+ dr->n = size;
+ devres_add(dev, dr);
+ }
+
res->desc = IORES_DESC_DEVICE_PRIVATE_MEMORY;
+ write_unlock(&resource_lock);
+
+ /*
+ * A driver is claiming this region so revoke any mappings.
+ */
+ revoke_iomem(res);
return res;
}
+ write_unlock(&resource_lock);
+
+ free_resource(res);
+ if (dr)
+ devres_free(dr);
return ERR_PTR(-ERANGE);
}
/*
* RLIMIT_CPU handling. Arm the posix CPU timer if the limit is not
- * infite. In case of RLIM_INFINITY the posix CPU timer code
+ * infinite. In case of RLIM_INFINITY the posix CPU timer code
* ignores the rlimit.
*/
if (!retval && new_rlim && resource == RLIMIT_CPU &&
}
/*
- * arg_lock protects concurent updates but we still need mmap_lock for
+ * arg_lock protects concurrent updates but we still need mmap_lock for
* read to exclude races with sys_brk.
*/
mmap_read_lock(mm);
* output in procfs mostly, except
*
* - @start_brk/@brk which are used in do_brk_flags but kernel lookups
- * for VMAs when updating these memvers so anything wrong written
+ * for VMAs when updating these members so anything wrong written
* here cause kernel to swear at userspace program but won't lead
* to any problem in kernel itself
*/
error = -EINVAL;
/*
- * arg_lock protects concurent updates of arg boundaries, we need
+ * arg_lock protects concurrent updates of arg boundaries, we need
* mmap_lock for a) concurrent sys_brk, b) finding VMA for addr
* validation.
*/
* If command line arguments and environment
* are placed somewhere else on stack, we can
* set them up here, ARG_START/END to setup
- * command line argumets and ENV_START/END
+ * command line arguments and ENV_START/END
* for environment.
*/
case PR_SET_MM_START_STACK:
static int propagate_has_child_subreaper(struct task_struct *p, void *data)
{
/*
- * If task has has_child_subreaper - all its decendants
- * already have these flag too and new decendants will
+ * If task has has_child_subreaper - all its descendants
+ * already have these flag too and new descendants will
* inherit it on fork, skip them.
*
* If we've found child_reaper - skip descendants in
#ifdef CONFIG_COMPACTION
{
.procname = "compact_memory",
- .data = &sysctl_compact_memory,
+ .data = NULL,
.maxlen = sizeof(int),
.mode = 0200,
.proc_handler = sysctl_compaction_handler,
parser = &iter->parser;
if (trace_parser_loaded(parser)) {
- ftrace_match_records(iter->hash, parser->buffer, parser->idx);
+ int enable = !(iter->flags & FTRACE_ITER_NOTRACE);
+
+ ftrace_process_regex(iter, parser->buffer,
+ parser->idx, enable);
}
trace_parser_put(parser);
#include <linux/ptrace.h>
#include <linux/async.h>
#include <linux/uaccess.h>
+#include <linux/initrd.h>
#include <trace/events/module.h>
commit_creds(new);
+ wait_for_initramfs();
retval = kernel_execve(sub_info->path,
(const char *const *)sub_info->argv,
(const char *const *)sub_info->envp);
* @argv: arg vector for process
* @envp: environment for process
* @gfp_mask: gfp mask for memory allocation
- * @cleanup: a cleanup function
* @init: an init function
+ * @cleanup: a cleanup function
* @data: arbitrary context sensitive data
*
* Returns either %NULL on allocation failure, or a subprocess_info
* exec. A non-zero return code causes the process to error out, exit,
* and return the failure to the calling process
*
- * The cleanup function is just before ethe subprocess_info is about to
+ * The cleanup function is just before the subprocess_info is about to
* be freed. This can be used for freeing the argv and envp. The
* Function must be runnable in either a process context or the
* context in which call_usermodehelper_exec is called.
/**
* call_usermodehelper_exec - start a usermode application
- * @sub_info: information about the subprocessa
+ * @sub_info: information about the subprocess
* @wait: wait for the application to finish and return status.
* when UMH_NO_WAIT don't wait at all, but you get no useful error back
* when the program couldn't be exec'ed. This makes it safe to call
/*
* Preemption is disabled here to make sure the cond_func is called under the
- * same condtions in UP and SMP.
+ * same conditions in UP and SMP.
*/
void on_each_cpu_cond_mask(smp_cond_func_t cond_func, smp_call_func_t func,
void *info, bool wait, const struct cpumask *mask)
/*
* Verify that we can not violate the policy of which files
* may be accessed that is specified by the root directory,
- * by verifing that the root directory is at the root of the
+ * by verifying that the root directory is at the root of the
* mount namespace which allows all files to be accessed.
*/
ret = -EPERM;
goto out;
ret = -EINVAL;
}
- /* Be very certaint the new map actually exists */
+ /* Be very certain the new map actually exists */
if (new_map.nr_extents == 0)
goto out;
/* Allow the specified ids if we have the appropriate capability
* (CAP_SETUID or CAP_SETGID) over the parent user namespace.
- * And the opener of the id file also had the approprpiate capability.
+ * And the opener of the id file also has the appropriate capability.
*/
if (ns_capable(ns->parent, cap_setid) &&
file_ns_capable(file, ns->parent, cap_setid))
# SPDX-License-Identifier: GPL-2.0-only
-gen_crc32table
-gen_crc64table
-crc32table.h
-crc64table.h
-oid_registry_data.c
+/crc32table.h
+/crc64table.h
+/gen_crc32table
+/gen_crc64table
+/oid_registry_data.c
bool "KFENCE: low-overhead sampling-based memory safety error detector"
depends on HAVE_ARCH_KFENCE && (SLAB || SLUB)
select STACKTRACE
+ select IRQ_WORK
help
KFENCE is a low-overhead sampling-based detector of heap out-of-bounds
access, use-after-free, and invalid-free errors. KFENCE is designed
k = a_log(bch, a);
rows[0] = c;
- /* buid linear system to solve X^4+aX^2+bX+c = 0 */
+ /* build linear system to solve X^4+aX^2+bX+c = 0 */
for (i = 0; i < m; i++) {
rows[i+1] = bch->a_pow_tab[4*i]^
(a ? bch->a_pow_tab[mod_s(bch, k)] : 0)^
* lib/bitmap.c
* Helper functions for bitmap.h.
*/
-#include <linux/export.h>
-#include <linux/thread_info.h>
-#include <linux/ctype.h>
-#include <linux/errno.h>
+
#include <linux/bitmap.h>
#include <linux/bitops.h>
#include <linux/bug.h>
+#include <linux/ctype.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/export.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/string.h>
+#include <linux/thread_info.h>
#include <linux/uaccess.h>
#include <asm/page.h>
}
EXPORT_SYMBOL(bitmap_free);
+static void devm_bitmap_free(void *data)
+{
+ unsigned long *bitmap = data;
+
+ bitmap_free(bitmap);
+}
+
+unsigned long *devm_bitmap_alloc(struct device *dev,
+ unsigned int nbits, gfp_t flags)
+{
+ unsigned long *bitmap;
+ int ret;
+
+ bitmap = bitmap_alloc(nbits, flags);
+ if (!bitmap)
+ return NULL;
+
+ ret = devm_add_action_or_reset(dev, devm_bitmap_free, bitmap);
+ if (ret)
+ return NULL;
+
+ return bitmap;
+}
+EXPORT_SYMBOL_GPL(devm_bitmap_alloc);
+
+unsigned long *devm_bitmap_zalloc(struct device *dev,
+ unsigned int nbits, gfp_t flags)
+{
+ return devm_bitmap_alloc(dev, nbits, flags | __GFP_ZERO);
+}
+EXPORT_SYMBOL_GPL(devm_bitmap_zalloc);
+
#if BITS_PER_LONG == 64
/**
* bitmap_from_arr32 - copy the contents of u32 array of bits to bitmap
/* Chew up trailing spaces. */
return skip_spaces(args);
}
+EXPORT_SYMBOL(next_arg);
* @nbytes: number of bytes in data buffer.
* @crc: previous returned crc8 value.
*/
-u8 crc8(const u8 table[CRC8_TABLE_SIZE], u8 *pdata, size_t nbytes, u8 crc)
+u8 crc8(const u8 table[CRC8_TABLE_SIZE], const u8 *pdata, size_t nbytes, u8 crc)
{
/* loop over the buffer data */
while (nbytes-- > 0)
static inline int INIT process_bit1(struct writer *wr, struct rc *rc,
struct cstate *cst, uint16_t *p,
int pos_state, uint16_t *prob) {
- int offset;
+ int offset;
uint16_t *prob_len;
int num_bits;
int len;
/* tail :$info is function or line-range */
fline = strchr(query->filename, ':');
if (!fline)
- break;
+ continue;
*fline++ = '\0';
if (isalpha(*fline) || *fline == '*' || *fline == '?') {
/* take as function name */
* searching it for one bits.
* - The optional "addr2", which is anded with "addr1" if present.
*/
-static unsigned long _find_next_bit(const unsigned long *addr1,
+unsigned long _find_next_bit(const unsigned long *addr1,
const unsigned long *addr2, unsigned long nbits,
unsigned long start, unsigned long invert, unsigned long le)
{
return min(start + __ffs(tmp), nbits);
}
-#endif
-
-#ifndef find_next_bit
-/*
- * Find the next set bit in a memory region.
- */
-unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
- unsigned long offset)
-{
- return _find_next_bit(addr, NULL, size, offset, 0UL, 0);
-}
-EXPORT_SYMBOL(find_next_bit);
-#endif
-
-#ifndef find_next_zero_bit
-unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
- unsigned long offset)
-{
- return _find_next_bit(addr, NULL, size, offset, ~0UL, 0);
-}
-EXPORT_SYMBOL(find_next_zero_bit);
-#endif
-
-#if !defined(find_next_and_bit)
-unsigned long find_next_and_bit(const unsigned long *addr1,
- const unsigned long *addr2, unsigned long size,
- unsigned long offset)
-{
- return _find_next_bit(addr1, addr2, size, offset, 0UL, 0);
-}
-EXPORT_SYMBOL(find_next_and_bit);
+EXPORT_SYMBOL(_find_next_bit);
#endif
#ifndef find_first_bit
/*
* Find the first set bit in a memory region.
*/
-unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
+unsigned long _find_first_bit(const unsigned long *addr, unsigned long size)
{
unsigned long idx;
return size;
}
-EXPORT_SYMBOL(find_first_bit);
+EXPORT_SYMBOL(_find_first_bit);
#endif
#ifndef find_first_zero_bit
/*
* Find the first cleared bit in a memory region.
*/
-unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
+unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size)
{
unsigned long idx;
return size;
}
-EXPORT_SYMBOL(find_first_zero_bit);
+EXPORT_SYMBOL(_find_first_zero_bit);
#endif
#ifndef find_last_bit
-unsigned long find_last_bit(const unsigned long *addr, unsigned long size)
+unsigned long _find_last_bit(const unsigned long *addr, unsigned long size)
{
if (size) {
unsigned long val = BITMAP_LAST_WORD_MASK(size);
}
return size;
}
-EXPORT_SYMBOL(find_last_bit);
-#endif
-
-#ifdef __BIG_ENDIAN
-
-#ifndef find_next_zero_bit_le
-unsigned long find_next_zero_bit_le(const void *addr, unsigned
- long size, unsigned long offset)
-{
- return _find_next_bit(addr, NULL, size, offset, ~0UL, 1);
-}
-EXPORT_SYMBOL(find_next_zero_bit_le);
-#endif
-
-#ifndef find_next_bit_le
-unsigned long find_next_bit_le(const void *addr, unsigned
- long size, unsigned long offset)
-{
- return _find_next_bit(addr, NULL, size, offset, 0UL, 1);
-}
-EXPORT_SYMBOL(find_next_bit_le);
+EXPORT_SYMBOL(_find_last_bit);
#endif
-#endif /* __BIG_ENDIAN */
-
unsigned long find_next_clump8(unsigned long *clump, const unsigned long *addr,
unsigned long size, unsigned long offset)
{
* @nr: The number of zeroed bits we're looking for
* @data: additional data - unused
* @pool: pool to find the fit region memory from
+ * @start_addr: not used in this function
*/
unsigned long gen_pool_first_fit(unsigned long *map, unsigned long size,
unsigned long start, unsigned int nr, void *data,
* @nr: The number of zeroed bits we're looking for
* @data: data for alignment
* @pool: pool to get order from
+ * @start_addr: start addr of alloction chunk
*/
unsigned long gen_pool_first_fit_align(unsigned long *map, unsigned long size,
unsigned long start, unsigned int nr, void *data,
* @nr: The number of zeroed bits we're looking for
* @data: data for alignment
* @pool: pool to get order from
+ * @start_addr: not used in this function
*/
unsigned long gen_pool_fixed_alloc(unsigned long *map, unsigned long size,
unsigned long start, unsigned int nr, void *data,
* @nr: The number of zeroed bits we're looking for
* @data: additional data - unused
* @pool: pool to find the fit region memory from
+ * @start_addr: not used in this function
*/
unsigned long gen_pool_first_fit_order_align(unsigned long *map,
unsigned long size, unsigned long start,
/**
* gen_pool_best_fit - find the best fitting region of memory
- * macthing the size requirement (no alignment constraint)
+ * matching the size requirement (no alignment constraint)
* @map: The address to base the search on
* @size: The bitmap size in bits
* @start: The bitnumber to start searching at
* @nr: The number of zeroed bits we're looking for
* @data: additional data - unused
* @pool: pool to find the fit region memory from
+ * @start_addr: not used in this function
*
* Iterate over the bitmap to find the smallest free region
* which we can allocate the memory.
#include <linux/fault-inject-usercopy.h>
#include <linux/uio.h>
#include <linux/pagemap.h>
+#include <linux/highmem.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/splice.h>
}
EXPORT_SYMBOL(iov_iter_init);
-static void memzero_page(struct page *page, size_t offset, size_t len)
-{
- char *addr = kmap_atomic(page);
- memset(addr + offset, 0, len);
- kunmap_atomic(addr);
-}
-
static inline bool allocated(struct pipe_buffer *buf)
{
return buf->ops == &default_pipe_buf_ops;
*
*
* The merging is controlled by "count", the number of elements in the
- * pending lists. This is beautiully simple code, but rather subtle.
+ * pending lists. This is beautifully simple code, but rather subtle.
*
* Each time we increment "count", we set one bit (bit k) and clear
* bits k-1 .. 0. Each time this happens (except the very first time
int attrlen = nla_len(nla);
int d;
- if (attrlen > 0 && buf[attrlen - 1] == '\0')
+ while (attrlen > 0 && buf[attrlen - 1] == '\0')
attrlen--;
d = attrlen - len;
* locations.
*
* Description: Detects which if any of a set of token strings has been passed
- * to it. Tokens can include up to MAX_OPT_ARGS instances of basic c-style
+ * to it. Tokens can include up to %MAX_OPT_ARGS instances of basic c-style
* format identifiers which will be taken into account when matching the
* tokens, and whose locations will be returned in the @args array.
*/
* @base: base to use when converting string
*
* Description: Given a &substring_t and a base, attempts to parse the substring
- * as a number in that base. On success, sets @result to the integer represented
- * by the string and returns 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
+ * as a number in that base.
+ *
+ * Return: On success, sets @result to the integer represented by the
+ * string and returns 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
*/
static int match_number(substring_t *s, int *result, int base)
{
* @base: base to use when converting string
*
* Description: Given a &substring_t and a base, attempts to parse the substring
- * as a number in that base. On success, sets @result to the integer represented
- * by the string and returns 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
+ * as a number in that base.
+ *
+ * Return: On success, sets @result to the integer represented by the
+ * string and returns 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
*/
static int match_u64int(substring_t *s, u64 *result, int base)
{
* @s: substring_t to be scanned
* @result: resulting integer on success
*
- * Description: Attempts to parse the &substring_t @s as a decimal integer. On
- * success, sets @result to the integer represented by the string and returns 0.
- * Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
+ * Description: Attempts to parse the &substring_t @s as a decimal integer.
+ *
+ * Return: On success, sets @result to the integer represented by the string
+ * and returns 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
*/
int match_int(substring_t *s, int *result)
{
}
EXPORT_SYMBOL(match_int);
-/*
+/**
* match_uint - scan a decimal representation of an integer from a substring_t
* @s: substring_t to be scanned
* @result: resulting integer on success
*
- * Description: Attempts to parse the &substring_t @s as a decimal integer. On
- * success, sets @result to the integer represented by the string and returns 0.
- * Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
+ * Description: Attempts to parse the &substring_t @s as a decimal integer.
+ *
+ * Return: On success, sets @result to the integer represented by the string
+ * and returns 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
*/
int match_uint(substring_t *s, unsigned int *result)
{
* @result: resulting unsigned long long on success
*
* Description: Attempts to parse the &substring_t @s as a long decimal
- * integer. On success, sets @result to the integer represented by the
- * string and returns 0.
- * Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
+ * integer.
+ *
+ * Return: On success, sets @result to the integer represented by the string
+ * and returns 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
*/
int match_u64(substring_t *s, u64 *result)
{
* @s: substring_t to be scanned
* @result: resulting integer on success
*
- * Description: Attempts to parse the &substring_t @s as an octal integer. On
- * success, sets @result to the integer represented by the string and returns
- * 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
+ * Description: Attempts to parse the &substring_t @s as an octal integer.
+ *
+ * Return: On success, sets @result to the integer represented by the string
+ * and returns 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
*/
int match_octal(substring_t *s, int *result)
{
* @result: resulting integer on success
*
* Description: Attempts to parse the &substring_t @s as a hexadecimal integer.
- * On success, sets @result to the integer represented by the string and
- * returns 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
+ *
+ * Return: On success, sets @result to the integer represented by the string
+ * and returns 0. Returns -ENOMEM, -EINVAL, or -ERANGE on failure.
*/
int match_hex(substring_t *s, int *result)
{
* @str: the string to be parsed
*
* Description: Parse the string @str to check if matches wildcard
- * pattern @pattern. The pattern may contain two type wildcardes:
+ * pattern @pattern. The pattern may contain two types of wildcards:
* '*' - matches zero or more characters
* '?' - matches one character
- * If it's matched, return true, else return false.
+ *
+ * Return: If the @str matches the @pattern, return true, else return false.
*/
bool match_wildcard(const char *pattern, const char *str)
{
*
* Description: Copy the characters in &substring_t @src to the
* c-style string @dest. Copy no more than @size - 1 characters, plus
- * the terminating NUL. Return length of @src.
+ * the terminating NUL.
+ *
+ * Return: length of @src.
*/
size_t match_strlcpy(char *dest, const substring_t *src, size_t size)
{
* Description: Allocates and returns a string filled with the contents of
* the &substring_t @s. The caller is responsible for freeing the returned
* string with kfree().
+ *
+ * Return: the address of the newly allocated NUL-terminated string or
+ * %NULL on error.
*/
char *match_strdup(const substring_t *s)
{
}
EXPORT_SYMBOL(percpu_counter_set);
-/**
+/*
* This function is both preempt and irq safe. The former is due to explicit
* preemption disable. The latter is guaranteed by the fact that the slow path
* is explicitly protected by an irq-safe spinlock whereas the fast patch uses
static int depot_index;
static int next_slab_inited;
static size_t depot_offset;
-static DEFINE_SPINLOCK(depot_lock);
+static DEFINE_RAW_SPINLOCK(depot_lock);
static bool init_stack_slab(void **prealloc)
{
prealloc = page_address(page);
}
- spin_lock_irqsave(&depot_lock, flags);
+ raw_spin_lock_irqsave(&depot_lock, flags);
found = find_stack(*bucket, entries, nr_entries, hash);
if (!found) {
WARN_ON(!init_stack_slab(&prealloc));
}
- spin_unlock_irqrestore(&depot_lock, flags);
+ raw_spin_unlock_irqrestore(&depot_lock, flags);
exit:
if (prealloc) {
/* Nobody used this memory, ok to free it. */
config HAVE_BOOTMEM_INFO_NODE
def_bool n
+config ARCH_ENABLE_MEMORY_HOTPLUG
+ bool
+
# eventually, we can have this option just 'select SPARSEMEM'
config MEMORY_HOTPLUG
bool "Allow for memory hot-add"
Say N here if you want the default policy to keep all hot-plugged
memory blocks in 'offline' state.
+config ARCH_ENABLE_MEMORY_HOTREMOVE
+ bool
+
config MEMORY_HOTREMOVE
bool "Allow for memory hot remove"
select HAVE_BOOTMEM_INFO_NODE if (X86_64 || PPC64)
depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE
depends on MIGRATION
+config MHP_MEMMAP_ON_MEMORY
+ def_bool y
+ depends on MEMORY_HOTPLUG && SPARSEMEM_VMEMMAP
+ depends on ARCH_MHP_MEMMAP_ON_MEMORY_ENABLE
+
# Heavily threaded applications may benefit from splitting the mm-wide
# page_table_lock, so that faults on different parts of the user address
# space can be handled with less contention: split it at this NR_CPUS.
config ARCH_ENABLE_THP_MIGRATION
bool
+config HUGETLB_PAGE_SIZE_VARIABLE
+ def_bool n
+ help
+ Allows the pageblock_order value to be dynamic instead of just standard
+ HUGETLB_PAGE_ORDER when there are multiple HugeTLB page sizes available
+ on a platform.
+
config CONTIG_ALLOC
def_bool (MEMORY_ISOLATION && COMPACTION) || CMA
help
Turns on the DebugFS interface for CMA.
+config CMA_SYSFS
+ bool "CMA information through sysfs interface"
+ depends on CMA && SYSFS
+ help
+ This option exposes some sysfs attributes to get information
+ from CMA.
+
config CMA_AREAS
int "Maximum count of the CMA areas"
depends on CMA
See Documentation/admin-guide/mm/idle_page_tracking.rst for
more details.
+config ARCH_HAS_CACHE_LINE_SIZE
+ bool
+
config ARCH_HAS_PTE_DEVMAP
bool
page-alloc-y := page_alloc.o
page-alloc-$(CONFIG_SHUFFLE_PAGE_ALLOCATOR) += shuffle.o
+# Give 'memory_hotplug' its own module-parameter namespace
+memory-hotplug-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
+
obj-y += page-alloc.o
obj-y += init-mm.o
obj-y += memblock.o
+obj-y += $(memory-hotplug-y)
ifdef CONFIG_MMU
obj-$(CONFIG_ADVISE_SYSCALLS) += madvise.o
obj-$(CONFIG_KASAN) += kasan/
obj-$(CONFIG_KFENCE) += kfence/
obj-$(CONFIG_FAILSLAB) += failslab.o
-obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
obj-$(CONFIG_MEMTEST) += memtest.o
obj-$(CONFIG_MIGRATION) += migrate.o
obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o
obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o
obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o
obj-$(CONFIG_CMA_DEBUGFS) += cma_debug.o
+obj-$(CONFIG_CMA_SYSFS) += cma_sysfs.o
obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
obj-$(CONFIG_IDLE_PAGE_TRACKING) += page_idle.o
obj-$(CONFIG_DEBUG_PAGE_REF) += debug_page_ref.o
/**
* balloon_page_list_dequeue() - removes pages from balloon's page list and
* returns a list of the pages.
- * @b_dev_info: balloon device decriptor where we will grab a page from.
+ * @b_dev_info: balloon device descriptor where we will grab a page from.
* @pages: pointer to the list of pages that would be returned to the caller.
* @n_req_pages: number of requested pages.
*
/*
* balloon_page_dequeue - removes a page from balloon's page list and returns
* its address to allow the driver to release the page.
- * @b_dev_info: balloon device decriptor where we will grab a page from.
+ * @b_dev_info: balloon device descriptor where we will grab a page from.
*
* Driver must call this function to properly dequeue a previously enqueued page
* before definitively releasing it back to the guest system.
#include <linux/memblock.h>
#include <linux/err.h>
#include <linux/mm.h>
-#include <linux/mutex.h>
#include <linux/sizes.h>
#include <linux/slab.h>
#include <linux/log2.h>
}
static void cma_clear_bitmap(struct cma *cma, unsigned long pfn,
- unsigned int count)
+ unsigned long count)
{
unsigned long bitmap_no, bitmap_count;
+ unsigned long flags;
bitmap_no = (pfn - cma->base_pfn) >> cma->order_per_bit;
bitmap_count = cma_bitmap_pages_to_bits(cma, count);
- mutex_lock(&cma->lock);
+ spin_lock_irqsave(&cma->lock, flags);
bitmap_clear(cma->bitmap, bitmap_no, bitmap_count);
- mutex_unlock(&cma->lock);
+ spin_unlock_irqrestore(&cma->lock, flags);
}
static void __init cma_activate_area(struct cma *cma)
pfn += pageblock_nr_pages)
init_cma_reserved_pageblock(pfn_to_page(pfn));
- mutex_init(&cma->lock);
+ spin_lock_init(&cma->lock);
#ifdef CONFIG_CMA_DEBUGFS
INIT_HLIST_HEAD(&cma->mem_head);
unsigned long nr_part, nr_total = 0;
unsigned long nbits = cma_bitmap_maxno(cma);
- mutex_lock(&cma->lock);
+ spin_lock_irq(&cma->lock);
pr_info("number of available pages: ");
for (;;) {
next_zero_bit = find_next_zero_bit(cma->bitmap, nbits, start);
start = next_zero_bit + nr_zero;
}
pr_cont("=> %lu free of %lu total pages\n", nr_total, cma->count);
- mutex_unlock(&cma->lock);
+ spin_unlock_irq(&cma->lock);
}
#else
static inline void cma_debug_show_areas(struct cma *cma) { }
* This function allocates part of contiguous memory on specific
* contiguous memory area.
*/
-struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
- bool no_warn)
+struct page *cma_alloc(struct cma *cma, unsigned long count,
+ unsigned int align, bool no_warn)
{
unsigned long mask, offset;
unsigned long pfn = -1;
unsigned long start = 0;
unsigned long bitmap_maxno, bitmap_no, bitmap_count;
- size_t i;
+ unsigned long i;
struct page *page = NULL;
int ret = -ENOMEM;
if (!cma || !cma->count || !cma->bitmap)
- return NULL;
+ goto out;
- pr_debug("%s(cma %p, count %zu, align %d)\n", __func__, (void *)cma,
+ pr_debug("%s(cma %p, count %lu, align %d)\n", __func__, (void *)cma,
count, align);
if (!count)
- return NULL;
+ goto out;
+
+ trace_cma_alloc_start(cma->name, count, align);
mask = cma_bitmap_aligned_mask(cma, align);
offset = cma_bitmap_aligned_offset(cma, align);
bitmap_count = cma_bitmap_pages_to_bits(cma, count);
if (bitmap_count > bitmap_maxno)
- return NULL;
+ goto out;
for (;;) {
- mutex_lock(&cma->lock);
+ spin_lock_irq(&cma->lock);
bitmap_no = bitmap_find_next_zero_area_off(cma->bitmap,
bitmap_maxno, start, bitmap_count, mask,
offset);
if (bitmap_no >= bitmap_maxno) {
- mutex_unlock(&cma->lock);
+ spin_unlock_irq(&cma->lock);
break;
}
bitmap_set(cma->bitmap, bitmap_no, bitmap_count);
* our exclusive use. If the migration fails we will take the
* lock again and unmark it.
*/
- mutex_unlock(&cma->lock);
+ spin_unlock_irq(&cma->lock);
pfn = cma->base_pfn + (bitmap_no << cma->order_per_bit);
ret = alloc_contig_range(pfn, pfn + count, MIGRATE_CMA,
pr_debug("%s(): memory range at %p is busy, retrying\n",
__func__, pfn_to_page(pfn));
+
+ trace_cma_alloc_busy_retry(cma->name, pfn, pfn_to_page(pfn),
+ count, align);
/* try again with a bit different memory target */
start = bitmap_no + mask + 1;
}
- trace_cma_alloc(pfn, page, count, align);
+ trace_cma_alloc_finish(cma->name, pfn, page, count, align);
/*
* CMA can allocate multiple page blocks, which results in different
}
if (ret && !no_warn) {
- pr_err("%s: %s: alloc failed, req-size: %zu pages, ret: %d\n",
- __func__, cma->name, count, ret);
+ pr_err_ratelimited("%s: %s: alloc failed, req-size: %lu pages, ret: %d\n",
+ __func__, cma->name, count, ret);
cma_debug_show_areas(cma);
}
pr_debug("%s(): returned %p\n", __func__, page);
+out:
+ if (page) {
+ count_vm_event(CMA_ALLOC_SUCCESS);
+ cma_sysfs_account_success_pages(cma, count);
+ } else {
+ count_vm_event(CMA_ALLOC_FAIL);
+ if (cma)
+ cma_sysfs_account_fail_pages(cma, count);
+ }
+
return page;
}
* It returns false when provided pages do not belong to contiguous area and
* true otherwise.
*/
-bool cma_release(struct cma *cma, const struct page *pages, unsigned int count)
+bool cma_release(struct cma *cma, const struct page *pages,
+ unsigned long count)
{
unsigned long pfn;
if (!cma || !pages)
return false;
- pr_debug("%s(page %p, count %u)\n", __func__, (void *)pages, count);
+ pr_debug("%s(page %p, count %lu)\n", __func__, (void *)pages, count);
pfn = page_to_pfn(pages);
free_contig_range(pfn, count);
cma_clear_bitmap(cma, pfn, count);
- trace_cma_release(pfn, pages, count);
+ trace_cma_release(cma->name, pfn, pages, count);
return true;
}
#define __MM_CMA_H__
#include <linux/debugfs.h>
+#include <linux/kobject.h>
+
+struct cma_kobject {
+ struct kobject kobj;
+ struct cma *cma;
+};
struct cma {
unsigned long base_pfn;
unsigned long count;
unsigned long *bitmap;
unsigned int order_per_bit; /* Order of pages represented by one bit */
- struct mutex lock;
+ spinlock_t lock;
#ifdef CONFIG_CMA_DEBUGFS
struct hlist_head mem_head;
spinlock_t mem_head_lock;
struct debugfs_u32_array dfs_bitmap;
#endif
char name[CMA_MAX_NAME];
+#ifdef CONFIG_CMA_SYSFS
+ /* the number of CMA page successful allocations */
+ atomic64_t nr_pages_succeeded;
+ /* the number of CMA page allocation failures */
+ atomic64_t nr_pages_failed;
+ /* kobject requires dynamic object */
+ struct cma_kobject *cma_kobj;
+#endif
};
extern struct cma cma_areas[MAX_CMA_AREAS];
return cma->count >> cma->order_per_bit;
}
+#ifdef CONFIG_CMA_SYSFS
+void cma_sysfs_account_success_pages(struct cma *cma, unsigned long nr_pages);
+void cma_sysfs_account_fail_pages(struct cma *cma, unsigned long nr_pages);
+#else
+static inline void cma_sysfs_account_success_pages(struct cma *cma,
+ unsigned long nr_pages) {};
+static inline void cma_sysfs_account_fail_pages(struct cma *cma,
+ unsigned long nr_pages) {};
+#endif
#endif
struct cma *cma = data;
unsigned long used;
- mutex_lock(&cma->lock);
+ spin_lock_irq(&cma->lock);
/* pages counter is smaller than sizeof(int) */
used = bitmap_weight(cma->bitmap, (int)cma_bitmap_maxno(cma));
- mutex_unlock(&cma->lock);
+ spin_unlock_irq(&cma->lock);
*val = (u64)used << cma->order_per_bit;
return 0;
unsigned long start, end = 0;
unsigned long bitmap_maxno = cma_bitmap_maxno(cma);
- mutex_lock(&cma->lock);
+ spin_lock_irq(&cma->lock);
for (;;) {
start = find_next_zero_bit(cma->bitmap, bitmap_maxno, end);
if (start >= bitmap_maxno)
end = find_next_bit(cma->bitmap, bitmap_maxno, start);
maxchunk = max(end - start, maxchunk);
}
- mutex_unlock(&cma->lock);
+ spin_unlock_irq(&cma->lock);
*val = (u64)maxchunk << cma->order_per_bit;
return 0;
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * CMA SysFS Interface
+ *
+ * Copyright (c) 2021 Minchan Kim <minchan@kernel.org>
+ */
+
+#include <linux/cma.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+
+#include "cma.h"
+
+#define CMA_ATTR_RO(_name) \
+ static struct kobj_attribute _name##_attr = __ATTR_RO(_name)
+
+void cma_sysfs_account_success_pages(struct cma *cma, unsigned long nr_pages)
+{
+ atomic64_add(nr_pages, &cma->nr_pages_succeeded);
+}
+
+void cma_sysfs_account_fail_pages(struct cma *cma, unsigned long nr_pages)
+{
+ atomic64_add(nr_pages, &cma->nr_pages_failed);
+}
+
+static inline struct cma *cma_from_kobj(struct kobject *kobj)
+{
+ return container_of(kobj, struct cma_kobject, kobj)->cma;
+}
+
+static ssize_t alloc_pages_success_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct cma *cma = cma_from_kobj(kobj);
+
+ return sysfs_emit(buf, "%llu\n",
+ atomic64_read(&cma->nr_pages_succeeded));
+}
+CMA_ATTR_RO(alloc_pages_success);
+
+static ssize_t alloc_pages_fail_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct cma *cma = cma_from_kobj(kobj);
+
+ return sysfs_emit(buf, "%llu\n", atomic64_read(&cma->nr_pages_failed));
+}
+CMA_ATTR_RO(alloc_pages_fail);
+
+static void cma_kobj_release(struct kobject *kobj)
+{
+ struct cma *cma = cma_from_kobj(kobj);
+ struct cma_kobject *cma_kobj = cma->cma_kobj;
+
+ kfree(cma_kobj);
+ cma->cma_kobj = NULL;
+}
+
+static struct attribute *cma_attrs[] = {
+ &alloc_pages_success_attr.attr,
+ &alloc_pages_fail_attr.attr,
+ NULL,
+};
+ATTRIBUTE_GROUPS(cma);
+
+static struct kobj_type cma_ktype = {
+ .release = cma_kobj_release,
+ .sysfs_ops = &kobj_sysfs_ops,
+ .default_groups = cma_groups,
+};
+
+static int __init cma_sysfs_init(void)
+{
+ struct kobject *cma_kobj_root;
+ struct cma_kobject *cma_kobj;
+ struct cma *cma;
+ int i, err;
+
+ cma_kobj_root = kobject_create_and_add("cma", mm_kobj);
+ if (!cma_kobj_root)
+ return -ENOMEM;
+
+ for (i = 0; i < cma_area_count; i++) {
+ cma_kobj = kzalloc(sizeof(*cma_kobj), GFP_KERNEL);
+ if (!cma_kobj) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ cma = &cma_areas[i];
+ cma->cma_kobj = cma_kobj;
+ cma_kobj->cma = cma;
+ err = kobject_init_and_add(&cma_kobj->kobj, &cma_ktype,
+ cma_kobj_root, "%s", cma->name);
+ if (err) {
+ kobject_put(&cma_kobj->kobj);
+ goto out;
+ }
+ }
+
+ return 0;
+out:
+ while (--i >= 0) {
+ cma = &cma_areas[i];
+ kobject_put(&cma->cma_kobj->kobj);
+ }
+ kobject_put(cma_kobj_root);
+
+ return err;
+}
+subsys_initcall(cma_sysfs_init);
*
* Isolate all pages that can be migrated from the range specified by
* [low_pfn, end_pfn). The range is expected to be within same pageblock.
- * Returns zero if there is a fatal signal pending, otherwise PFN of the
- * first page that was not scanned (which may be both less, equal to or more
- * than end_pfn).
+ * Returns errno, like -EAGAIN or -EINTR in case e.g signal pending or congestion,
+ * -ENOMEM in case we could not allocate a page, or 0.
+ * cc->migrate_pfn will contain the next pfn to scan.
*
* The pages are isolated on cc->migratepages list (not required to be empty),
- * and cc->nr_migratepages is updated accordingly. The cc->migrate_pfn field
- * is neither read nor updated.
+ * and cc->nr_migratepages is updated accordingly.
*/
-static unsigned long
+static int
isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
unsigned long end_pfn, isolate_mode_t isolate_mode)
{
bool skip_on_failure = false;
unsigned long next_skip_pfn = 0;
bool skip_updated = false;
+ int ret = 0;
+
+ cc->migrate_pfn = low_pfn;
/*
* Ensure that there are not too many pages isolated from the LRU
while (unlikely(too_many_isolated(pgdat))) {
/* stop isolation if there are still pages not migrated */
if (cc->nr_migratepages)
- return 0;
+ return -EAGAIN;
/* async migration should just abort */
if (cc->mode == MIGRATE_ASYNC)
- return 0;
+ return -EAGAIN;
congestion_wait(BLK_RW_ASYNC, HZ/10);
if (fatal_signal_pending(current))
- return 0;
+ return -EINTR;
}
cond_resched();
if (fatal_signal_pending(current)) {
cc->contended = true;
+ ret = -EINTR;
- low_pfn = 0;
goto fatal_pending;
}
valid_page = page;
}
+ if (PageHuge(page) && cc->alloc_contig) {
+ ret = isolate_or_dissolve_huge_page(page, &cc->migratepages);
+
+ /*
+ * Fail isolation in case isolate_or_dissolve_huge_page()
+ * reports an error. In case of -ENOMEM, abort right away.
+ */
+ if (ret < 0) {
+ /* Do not report -EBUSY down the chain */
+ if (ret == -EBUSY)
+ ret = 0;
+ low_pfn += (1UL << compound_order(page)) - 1;
+ goto isolate_fail;
+ }
+
+ if (PageHuge(page)) {
+ /*
+ * Hugepage was successfully isolated and placed
+ * on the cc->migratepages list.
+ */
+ low_pfn += compound_nr(page) - 1;
+ goto isolate_success_no_list;
+ }
+
+ /*
+ * Ok, the hugepage was dissolved. Now these pages are
+ * Buddy and cannot be re-allocated because they are
+ * isolated. Fall-through as the check below handles
+ * Buddy pages.
+ */
+ }
+
/*
* Skip if free. We read page order here without zone lock
* which is generally unsafe, but the race window is small and
isolate_success:
list_add(&page->lru, &cc->migratepages);
+isolate_success_no_list:
cc->nr_migratepages += compound_nr(page);
nr_isolated += compound_nr(page);
put_page(page);
isolate_fail:
- if (!skip_on_failure)
+ if (!skip_on_failure && ret != -ENOMEM)
continue;
/*
*/
next_skip_pfn += 1UL << cc->order;
}
+
+ if (ret == -ENOMEM)
+ break;
}
/*
if (nr_isolated)
count_compact_events(COMPACTISOLATED, nr_isolated);
- return low_pfn;
+ cc->migrate_pfn = low_pfn;
+
+ return ret;
}
/**
* @start_pfn: The first PFN to start isolating.
* @end_pfn: The one-past-last PFN.
*
- * Returns zero if isolation fails fatally due to e.g. pending signal.
- * Otherwise, function returns one-past-the-last PFN of isolated page
- * (which may be greater than end_pfn if end fell in a middle of a THP page).
+ * Returns -EAGAIN when contented, -EINTR in case of a signal pending, -ENOMEM
+ * in case we could not allocate a page, or 0.
*/
-unsigned long
+int
isolate_migratepages_range(struct compact_control *cc, unsigned long start_pfn,
unsigned long end_pfn)
{
unsigned long pfn, block_start_pfn, block_end_pfn;
+ int ret = 0;
/* Scan block by block. First and last block may be incomplete */
pfn = start_pfn;
block_end_pfn, cc->zone))
continue;
- pfn = isolate_migratepages_block(cc, pfn, block_end_pfn,
- ISOLATE_UNEVICTABLE);
+ ret = isolate_migratepages_block(cc, pfn, block_end_pfn,
+ ISOLATE_UNEVICTABLE);
- if (!pfn)
+ if (ret)
break;
if (cc->nr_migratepages >= COMPACT_CLUSTER_MAX)
break;
}
- return pfn;
+ return ret;
}
#endif /* CONFIG_COMPACTION || CONFIG_CMA */
*/
for (; block_end_pfn <= cc->free_pfn;
fast_find_block = false,
- low_pfn = block_end_pfn,
+ cc->migrate_pfn = low_pfn = block_end_pfn,
block_start_pfn = block_end_pfn,
block_end_pfn += pageblock_nr_pages) {
}
/* Perform the isolation */
- low_pfn = isolate_migratepages_block(cc, low_pfn,
- block_end_pfn, isolate_mode);
-
- if (!low_pfn)
+ if (isolate_migratepages_block(cc, low_pfn, block_end_pfn,
+ isolate_mode))
return ISOLATE_ABORT;
/*
break;
}
- /* Record where migration scanner will be restarted. */
- cc->migrate_pfn = low_pfn;
-
return cc->nr_migratepages ? ISOLATE_SUCCESS : ISOLATE_NONE;
}
unsigned int wmark_low;
/*
- * Cap the low watermak to avoid excessive compaction
- * activity in case a user sets the proactivess tunable
+ * Cap the low watermark to avoid excessive compaction
+ * activity in case a user sets the proactiveness tunable
* close to 100 (maximum).
*/
wmark_low = max(100U - sysctl_compaction_proactiveness, 5U);
trace_mm_compaction_begin(start_pfn, cc->migrate_pfn,
cc->free_pfn, end_pfn, sync);
- migrate_prep_local();
+ /* lru_add_drain_all could be expensive with involving other CPUs */
+ lru_add_drain();
while ((ret = compact_finished(cc)) == COMPACT_CONTINUE) {
int err;
*/
WRITE_ONCE(current->capture_control, NULL);
*capture = READ_ONCE(capc.page);
+ /*
+ * Technically, it is also possible that compaction is skipped but
+ * the page is still captured out of luck(IRQ came and freed the page).
+ * Returning COMPACT_SUCCESS in such cases helps in properly accounting
+ * the COMPACT[STALL|FAIL] when compaction is skipped.
+ */
+ if (*capture)
+ ret = COMPACT_SUCCESS;
return ret;
}
compact_node(nid);
}
-/* The written value is actually unused, all memory is compacted */
-int sysctl_compact_memory;
-
/*
* Tunable for proactive compaction. It determines how
* aggressively the kernel should compact memory in the
*/
static int kcompactd(void *p)
{
- pg_data_t *pgdat = (pg_data_t*)p;
+ pg_data_t *pgdat = (pg_data_t *)p;
struct task_struct *tsk = current;
unsigned int proactive_defer = 0;
page->mapping = NULL;
/* Leave page->index set: truncation lookup relies upon it */
-
- if (shadow) {
- mapping->nrexceptional += nr;
- /*
- * Make sure the nrexceptional update is committed before
- * the nrpages update so that final truncate racing
- * with reclaim does not see both counters 0 at the
- * same time and miss a shadow entry.
- */
- smp_wmb();
- }
mapping->nrpages -= nr;
}
/* Returns true if writeback might be needed or already in progress. */
static bool mapping_needs_writeback(struct address_space *mapping)
{
- if (dax_mapping(mapping))
- return mapping->nrexceptional;
-
return mapping->nrpages;
}
if (xas_error(&xas))
goto unlock;
- if (old)
- mapping->nrexceptional--;
mapping->nrpages++;
/* hugetlb pages do not participate in page cache accounting */
* entirely memory-based such as tmpfs, and filesystems which support
* unwritten extents.
*
- * Return: The requested offset on successs, or -ENXIO if @whence specifies
+ * Return: The requested offset on success, or -ENXIO if @whence specifies
* SEEK_DATA and there is no data after @start. There is an implicit hole
* after @end - 1, so SEEK_HOLE returns @end if all the bytes between @start
* and @end contain data.
/* This is used for a general mmap of a disk file */
-int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
+int generic_file_mmap(struct file *file, struct vm_area_struct *vma)
{
struct address_space *mapping = file->f_mapping;
{
return VM_FAULT_SIGBUS;
}
-int generic_file_mmap(struct file * file, struct vm_area_struct * vma)
+int generic_file_mmap(struct file *file, struct vm_area_struct *vma)
{
return -ENOSYS;
}
-int generic_file_readonly_mmap(struct file * file, struct vm_area_struct * vma)
+int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
{
return -ENOSYS;
}
ssize_t __generic_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
- struct address_space * mapping = file->f_mapping;
+ struct address_space *mapping = file->f_mapping;
struct inode *inode = mapping->host;
ssize_t written = 0;
ssize_t err;
static u64 frontswap_failed_stores;
static u64 frontswap_invalidates;
-static inline void inc_frontswap_loads(void) {
+static inline void inc_frontswap_loads(void)
+{
data_race(frontswap_loads++);
}
-static inline void inc_frontswap_succ_stores(void) {
+static inline void inc_frontswap_succ_stores(void)
+{
data_race(frontswap_succ_stores++);
}
-static inline void inc_frontswap_failed_stores(void) {
+static inline void inc_frontswap_failed_stores(void)
+{
data_race(frontswap_failed_stores++);
}
-static inline void inc_frontswap_invalidates(void) {
+static inline void inc_frontswap_invalidates(void)
+{
data_race(frontswap_invalidates++);
}
#else
int orig_refs = refs;
/*
- * Can't do FOLL_LONGTERM + FOLL_PIN with CMA in the gup fast
- * path, so fail and let the caller fall back to the slow path.
+ * Can't do FOLL_LONGTERM + FOLL_PIN gup fast path if not in a
+ * right zone, so fail and let the caller fall back to the slow
+ * path.
*/
- if (unlikely(flags & FOLL_LONGTERM) &&
- is_migrate_cma_page(page))
+ if (unlikely((flags & FOLL_LONGTERM) &&
+ !is_pinnable_page(page)))
return NULL;
/*
{
struct vm_area_struct *vma;
unsigned long vm_flags;
- int i;
+ long i;
/* calculate required read or write permissions.
* If FOLL_FORCE is set, we only require the "MAY" flags.
* Returns NULL on any kind of failure - a hole must then be inserted into
* the corefile, to preserve alignment with its headers; and also returns
* NULL wherever the ZERO_PAGE, or an anonymous pte_none, has been found -
- * allowing a hole to be left in the corefile to save diskspace.
+ * allowing a hole to be left in the corefile to save disk space.
*
* Called without mmap_lock (takes and releases the mmap_lock by itself).
*/
}
#endif /* CONFIG_ELF_CORE */
-#ifdef CONFIG_CMA
-static long check_and_migrate_cma_pages(struct mm_struct *mm,
- unsigned long start,
- unsigned long nr_pages,
- struct page **pages,
- struct vm_area_struct **vmas,
- unsigned int gup_flags)
+#ifdef CONFIG_MIGRATION
+/*
+ * Check whether all pages are pinnable, if so return number of pages. If some
+ * pages are not pinnable, migrate them, and unpin all pages. Return zero if
+ * pages were migrated, or if some pages were not successfully isolated.
+ * Return negative error if migration fails.
+ */
+static long check_and_migrate_movable_pages(unsigned long nr_pages,
+ struct page **pages,
+ unsigned int gup_flags)
{
unsigned long i;
- unsigned long step;
+ unsigned long isolation_error_count = 0;
bool drain_allow = true;
- bool migrate_allow = true;
- LIST_HEAD(cma_page_list);
- long ret = nr_pages;
+ LIST_HEAD(movable_page_list);
+ long ret = 0;
+ struct page *prev_head = NULL;
+ struct page *head;
struct migration_target_control mtc = {
.nid = NUMA_NO_NODE,
- .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_NOWARN,
+ .gfp_mask = GFP_USER | __GFP_NOWARN,
};
-check_again:
- for (i = 0; i < nr_pages;) {
-
- struct page *head = compound_head(pages[i]);
-
- /*
- * gup may start from a tail page. Advance step by the left
- * part.
- */
- step = compound_nr(head) - (pages[i] - head);
+ for (i = 0; i < nr_pages; i++) {
+ head = compound_head(pages[i]);
+ if (head == prev_head)
+ continue;
+ prev_head = head;
/*
- * If we get a page from the CMA zone, since we are going to
- * be pinning these entries, we might as well move them out
- * of the CMA zone if possible.
+ * If we get a movable page, since we are going to be pinning
+ * these entries, try to move them out if possible.
*/
- if (is_migrate_cma_page(head)) {
- if (PageHuge(head))
- isolate_huge_page(head, &cma_page_list);
- else {
+ if (!is_pinnable_page(head)) {
+ if (PageHuge(head)) {
+ if (!isolate_huge_page(head, &movable_page_list))
+ isolation_error_count++;
+ } else {
if (!PageLRU(head) && drain_allow) {
lru_add_drain_all();
drain_allow = false;
}
- if (!isolate_lru_page(head)) {
- list_add_tail(&head->lru, &cma_page_list);
- mod_node_page_state(page_pgdat(head),
- NR_ISOLATED_ANON +
- page_is_file_lru(head),
- thp_nr_pages(head));
+ if (isolate_lru_page(head)) {
+ isolation_error_count++;
+ continue;
}
+ list_add_tail(&head->lru, &movable_page_list);
+ mod_node_page_state(page_pgdat(head),
+ NR_ISOLATED_ANON +
+ page_is_file_lru(head),
+ thp_nr_pages(head));
}
}
-
- i += step;
}
- if (!list_empty(&cma_page_list)) {
- /*
- * drop the above get_user_pages reference.
- */
- if (gup_flags & FOLL_PIN)
- unpin_user_pages(pages, nr_pages);
- else
- for (i = 0; i < nr_pages; i++)
- put_page(pages[i]);
-
- if (migrate_pages(&cma_page_list, alloc_migration_target, NULL,
- (unsigned long)&mtc, MIGRATE_SYNC, MR_CONTIG_RANGE)) {
- /*
- * some of the pages failed migration. Do get_user_pages
- * without migration.
- */
- migrate_allow = false;
+ /*
+ * If list is empty, and no isolation errors, means that all pages are
+ * in the correct zone.
+ */
+ if (list_empty(&movable_page_list) && !isolation_error_count)
+ return nr_pages;
- if (!list_empty(&cma_page_list))
- putback_movable_pages(&cma_page_list);
- }
- /*
- * We did migrate all the pages, Try to get the page references
- * again migrating any new CMA pages which we failed to isolate
- * earlier.
- */
- ret = __get_user_pages_locked(mm, start, nr_pages,
- pages, vmas, NULL,
- gup_flags);
-
- if ((ret > 0) && migrate_allow) {
- nr_pages = ret;
- drain_allow = true;
- goto check_again;
- }
+ if (gup_flags & FOLL_PIN) {
+ unpin_user_pages(pages, nr_pages);
+ } else {
+ for (i = 0; i < nr_pages; i++)
+ put_page(pages[i]);
+ }
+ if (!list_empty(&movable_page_list)) {
+ ret = migrate_pages(&movable_page_list, alloc_migration_target,
+ NULL, (unsigned long)&mtc, MIGRATE_SYNC,
+ MR_LONGTERM_PIN);
+ if (ret && !list_empty(&movable_page_list))
+ putback_movable_pages(&movable_page_list);
}
- return ret;
+ return ret > 0 ? -ENOMEM : ret;
}
#else
-static long check_and_migrate_cma_pages(struct mm_struct *mm,
- unsigned long start,
- unsigned long nr_pages,
- struct page **pages,
- struct vm_area_struct **vmas,
- unsigned int gup_flags)
+static long check_and_migrate_movable_pages(unsigned long nr_pages,
+ struct page **pages,
+ unsigned int gup_flags)
{
return nr_pages;
}
-#endif /* CONFIG_CMA */
+#endif /* CONFIG_MIGRATION */
/*
* __gup_longterm_locked() is a wrapper for __get_user_pages_locked which
struct vm_area_struct **vmas,
unsigned int gup_flags)
{
- unsigned long flags = 0;
+ unsigned int flags;
long rc;
- if (gup_flags & FOLL_LONGTERM)
- flags = memalloc_nocma_save();
-
- rc = __get_user_pages_locked(mm, start, nr_pages, pages, vmas, NULL,
- gup_flags);
+ if (!(gup_flags & FOLL_LONGTERM))
+ return __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
+ NULL, gup_flags);
+ flags = memalloc_pin_save();
+ do {
+ rc = __get_user_pages_locked(mm, start, nr_pages, pages, vmas,
+ NULL, gup_flags);
+ if (rc <= 0)
+ break;
+ rc = check_and_migrate_movable_pages(rc, pages, gup_flags);
+ } while (!rc);
+ memalloc_pin_restore(flags);
- if (gup_flags & FOLL_LONGTERM) {
- if (rc > 0)
- rc = check_and_migrate_cma_pages(mm, start, rc, pages,
- vmas, gup_flags);
- memalloc_nocma_restore(flags);
- }
return rc;
}
dump_page(page, "gup_test failure");
break;
+ } else if (cmd == PIN_LONGTERM_BENCHMARK &&
+ WARN(!is_pinnable_page(page),
+ "pages[%lu] is NOT pinnable but pinned\n",
+ i)) {
+ dump_page(page, "gup_test failure");
+ break;
}
}
break;
{
ktime_t start_time, end_time;
unsigned long i, nr_pages, addr, next;
- int nr;
+ long nr;
struct page **pages;
int ret = 0;
bool needs_mmap_lock =
nr = (next - addr) / PAGE_SIZE;
}
- /* Filter out most gup flags: only allow a tiny subset here: */
- gup->flags &= FOLL_WRITE;
-
switch (cmd) {
case GUP_FAST_BENCHMARK:
- nr = get_user_pages_fast(addr, nr, gup->flags,
+ nr = get_user_pages_fast(addr, nr, gup->gup_flags,
pages + i);
break;
case GUP_BASIC_TEST:
- nr = get_user_pages(addr, nr, gup->flags, pages + i,
+ nr = get_user_pages(addr, nr, gup->gup_flags, pages + i,
NULL);
break;
case PIN_FAST_BENCHMARK:
- nr = pin_user_pages_fast(addr, nr, gup->flags,
+ nr = pin_user_pages_fast(addr, nr, gup->gup_flags,
pages + i);
break;
case PIN_BASIC_TEST:
- nr = pin_user_pages(addr, nr, gup->flags, pages + i,
+ nr = pin_user_pages(addr, nr, gup->gup_flags, pages + i,
NULL);
break;
case PIN_LONGTERM_BENCHMARK:
nr = pin_user_pages(addr, nr,
- gup->flags | FOLL_LONGTERM,
+ gup->gup_flags | FOLL_LONGTERM,
pages + i, NULL);
break;
case DUMP_USER_PAGES_TEST:
- if (gup->flags & GUP_TEST_FLAG_DUMP_PAGES_USE_PIN)
- nr = pin_user_pages(addr, nr, gup->flags,
+ if (gup->test_flags & GUP_TEST_FLAG_DUMP_PAGES_USE_PIN)
+ nr = pin_user_pages(addr, nr, gup->gup_flags,
pages + i, NULL);
else
- nr = get_user_pages(addr, nr, gup->flags,
+ nr = get_user_pages(addr, nr, gup->gup_flags,
pages + i, NULL);
break;
default:
start_time = ktime_get();
- put_back_pages(cmd, pages, nr_pages, gup->flags);
+ put_back_pages(cmd, pages, nr_pages, gup->test_flags);
end_time = ktime_get();
gup->put_delta_usec = ktime_us_delta(end_time, start_time);
__u64 addr;
__u64 size;
__u32 nr_pages_per_call;
- __u32 flags;
+ __u32 gup_flags;
+ __u32 test_flags;
/*
* Each non-zero entry is the number of the page (1-based: first page is
* page 1, so that zero entries mean "do nothing") from the .addr base.
atomic_long_t _totalhigh_pages __read_mostly;
EXPORT_SYMBOL(_totalhigh_pages);
-unsigned int __nr_free_highpages (void)
+unsigned int __nr_free_highpages(void)
{
struct zone *zone;
unsigned int pages = 0;
static int pkmap_count[LAST_PKMAP];
static __cacheline_aligned_in_smp DEFINE_SPINLOCK(kmap_lock);
-pte_t * pkmap_page_table;
+pte_t *pkmap_page_table;
/*
* Most architectures have no use for kmap_high_get(), so let's abstract
if (addr >= PKMAP_ADDR(0) && addr < PKMAP_ADDR(LAST_PKMAP)) {
int i = PKMAP_NR(addr);
+
return pte_page(pkmap_page_table[i]);
}
pkmap_count[PKMAP_NR(vaddr)]++;
BUG_ON(pkmap_count[PKMAP_NR(vaddr)] < 2);
unlock_kmap();
- return (void*) vaddr;
+ return (void *) vaddr;
}
-
EXPORT_SYMBOL(kmap_high);
#ifdef ARCH_NEEDS_KMAP_HIGH_GET
pkmap_count[PKMAP_NR(vaddr)]++;
}
unlock_kmap_any(flags);
- return (void*) vaddr;
+ return (void *) vaddr;
}
#endif
/*
* Disable migration so resulting virtual address is stable
- * accross preemption.
+ * across preemption.
*/
migrate_disable();
preempt_disable();
spin_unlock_irqrestore(&pas->lock, flags);
return ret;
}
-
EXPORT_SYMBOL(page_address);
/**
#include <linux/mm.h>
#include <linux/sched.h>
+#include <linux/sched/mm.h>
#include <linux/sched/coredump.h>
#include <linux/sched/numa_balancing.h>
#include <linux/highmem.h>
return false;
}
-static struct page *get_huge_zero_page(void)
+static bool get_huge_zero_page(void)
{
struct page *zero_page;
retry:
if (likely(atomic_inc_not_zero(&huge_zero_refcount)))
- return READ_ONCE(huge_zero_page);
+ return true;
zero_page = alloc_pages((GFP_TRANSHUGE | __GFP_ZERO) & ~__GFP_MOVABLE,
HPAGE_PMD_ORDER);
if (!zero_page) {
count_vm_event(THP_ZERO_PAGE_ALLOC_FAILED);
- return NULL;
+ return false;
}
count_vm_event(THP_ZERO_PAGE_ALLOC);
preempt_disable();
/* We take additional reference here. It will be put back by shrinker */
atomic_set(&huge_zero_refcount, 2);
preempt_enable();
- return READ_ONCE(huge_zero_page);
+ return true;
}
static void put_huge_zero_page(void)
/* Deliver the page fault to userland */
if (userfaultfd_missing(vma)) {
- vm_fault_t ret2;
-
spin_unlock(vmf->ptl);
put_page(page);
pte_free(vma->vm_mm, pgtable);
- ret2 = handle_userfault(vmf, VM_UFFD_MISSING);
- VM_BUG_ON(ret2 & VM_FAULT_FALLBACK);
- return ret2;
+ ret = handle_userfault(vmf, VM_UFFD_MISSING);
+ VM_BUG_ON(ret & VM_FAULT_FALLBACK);
+ return ret;
}
entry = mk_huge_pmd(page, vma->vm_page_prot);
}
page = pmd_page(orig_pmd);
- VM_BUG_ON_PAGE(!PageCompound(page) || !PageHead(page), page);
+ VM_BUG_ON_PAGE(!PageHead(page), page);
/* Lock page for reuse_swap_page() */
if (!trylock_page(page)) {
*/
page_locked = trylock_page(page);
target_nid = mpol_misplaced(page, vma, haddr);
- if (target_nid == NUMA_NO_NODE) {
- /* If the page was locked, there are no parallel migrations */
- if (page_locked)
- goto clear_pmdnuma;
- }
-
/* Migration could have started since the pmd_trans_migrating check */
if (!page_locked) {
page_nid = NUMA_NO_NODE;
spin_unlock(vmf->ptl);
put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE);
goto out;
+ } else if (target_nid == NUMA_NO_NODE) {
+ /* There are no parallel migrations and page is in the right
+ * node. Clear the numa hinting info in this pmd.
+ */
+ goto clear_pmdnuma;
}
/*
VM_BUG_ON(!is_pmd_migration_entry(orig_pmd));
entry = pmd_to_swp_entry(orig_pmd);
- page = pfn_to_page(swp_offset(entry));
+ page = migration_entry_to_page(entry);
flush_needed = 0;
} else
WARN_ONCE(1, "Non present huge pmd without pmd migration enabled!");
/*
* Returns
* - 0 if PMD could not be locked
- * - 1 if PMD was locked but protections unchange and TLB flush unnecessary
- * - HPAGE_PMD_NR is protections changed and TLB flush necessary
+ * - 1 if PMD was locked but protections unchanged and TLB flush unnecessary
+ * - HPAGE_PMD_NR if protections changed and TLB flush necessary
*/
int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
unsigned long addr, pgprot_t newprot, unsigned long cp_flags)
swp_entry_t entry;
entry = pmd_to_swp_entry(old_pmd);
- page = pfn_to_page(swp_offset(entry));
+ page = migration_entry_to_page(entry);
write = is_write_migration_entry(entry);
young = false;
soft_dirty = pmd_swp_soft_dirty(old_pmd);
__split_huge_pmd(vma, pmd, address, freeze, page);
}
+static inline void split_huge_pmd_if_needed(struct vm_area_struct *vma, unsigned long address)
+{
+ /*
+ * If the new address isn't hpage aligned and it could previously
+ * contain an hugepage: check if we need to split an huge pmd.
+ */
+ if (!IS_ALIGNED(address, HPAGE_PMD_SIZE) &&
+ range_in_vma(vma, ALIGN_DOWN(address, HPAGE_PMD_SIZE),
+ ALIGN(address, HPAGE_PMD_SIZE)))
+ split_huge_pmd_address(vma, address, false, NULL);
+}
+
void vma_adjust_trans_huge(struct vm_area_struct *vma,
unsigned long start,
unsigned long end,
long adjust_next)
{
- /*
- * If the new start address isn't hpage aligned and it could
- * previously contain an hugepage: check if we need to split
- * an huge pmd.
- */
- if (start & ~HPAGE_PMD_MASK &&
- (start & HPAGE_PMD_MASK) >= vma->vm_start &&
- (start & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end)
- split_huge_pmd_address(vma, start, false, NULL);
+ /* Check if we need to split start first. */
+ split_huge_pmd_if_needed(vma, start);
- /*
- * If the new end address isn't hpage aligned and it could
- * previously contain an hugepage: check if we need to split
- * an huge pmd.
- */
- if (end & ~HPAGE_PMD_MASK &&
- (end & HPAGE_PMD_MASK) >= vma->vm_start &&
- (end & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= vma->vm_end)
- split_huge_pmd_address(vma, end, false, NULL);
+ /* Check if we need to split end next. */
+ split_huge_pmd_if_needed(vma, end);
/*
- * If we're also updating the vma->vm_next->vm_start, if the new
- * vm_next->vm_start isn't hpage aligned and it could previously
- * contain an hugepage: check if we need to split an huge pmd.
+ * If we're also updating the vma->vm_next->vm_start,
+ * check if we need to split it.
*/
if (adjust_next > 0) {
struct vm_area_struct *next = vma->vm_next;
unsigned long nstart = next->vm_start;
nstart += adjust_next;
- if (nstart & ~HPAGE_PMD_MASK &&
- (nstart & HPAGE_PMD_MASK) >= next->vm_start &&
- (nstart & HPAGE_PMD_MASK) + HPAGE_PMD_SIZE <= next->vm_end)
- split_huge_pmd_address(next, nstart, false, NULL);
+ split_huge_pmd_if_needed(next, nstart);
}
}
xa_lock(&swap_cache->i_pages);
}
- /* lock lru list/PageCompound, ref freezed by page_ref_freeze */
+ /* lock lru list/PageCompound, ref frozen by page_ref_freeze */
lruvec = lock_page_lruvec(head);
for (i = nr - 1; i >= 1; i--) {
ds_queue->split_queue_len++;
#ifdef CONFIG_MEMCG
if (memcg)
- memcg_set_shrinker_bit(memcg, page_to_nid(page),
- deferred_split_shrinker.id);
+ set_shrinker_bit(memcg, page_to_nid(page),
+ deferred_split_shrinker.id);
#endif
}
spin_unlock_irqrestore(&ds_queue->split_queue_lock, flags);
};
#ifdef CONFIG_DEBUG_FS
-static int split_huge_pages_set(void *data, u64 val)
+static void split_huge_pages_all(void)
{
struct zone *zone;
struct page *page;
unsigned long pfn, max_zone_pfn;
unsigned long total = 0, split = 0;
- if (val != 1)
- return -EINVAL;
-
+ pr_debug("Split all THPs\n");
for_each_populated_zone(zone) {
max_zone_pfn = zone_end_pfn(zone);
for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) {
unlock_page(page);
next:
put_page(page);
+ cond_resched();
}
}
- pr_info("%lu of %lu THP split\n", split, total);
+ pr_debug("%lu of %lu THP split\n", split, total);
+}
- return 0;
+static inline bool vma_not_suitable_for_thp_split(struct vm_area_struct *vma)
+{
+ return vma_is_special_huge(vma) || (vma->vm_flags & VM_IO) ||
+ is_vm_hugetlb_page(vma);
+}
+
+static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
+ unsigned long vaddr_end)
+{
+ int ret = 0;
+ struct task_struct *task;
+ struct mm_struct *mm;
+ unsigned long total = 0, split = 0;
+ unsigned long addr;
+
+ vaddr_start &= PAGE_MASK;
+ vaddr_end &= PAGE_MASK;
+
+ /* Find the task_struct from pid */
+ rcu_read_lock();
+ task = find_task_by_vpid(pid);
+ if (!task) {
+ rcu_read_unlock();
+ ret = -ESRCH;
+ goto out;
+ }
+ get_task_struct(task);
+ rcu_read_unlock();
+
+ /* Find the mm_struct */
+ mm = get_task_mm(task);
+ put_task_struct(task);
+
+ if (!mm) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ pr_debug("Split huge pages in pid: %d, vaddr: [0x%lx - 0x%lx]\n",
+ pid, vaddr_start, vaddr_end);
+
+ mmap_read_lock(mm);
+ /*
+ * always increase addr by PAGE_SIZE, since we could have a PTE page
+ * table filled with PTE-mapped THPs, each of which is distinct.
+ */
+ for (addr = vaddr_start; addr < vaddr_end; addr += PAGE_SIZE) {
+ struct vm_area_struct *vma = find_vma(mm, addr);
+ unsigned int follflags;
+ struct page *page;
+
+ if (!vma || addr < vma->vm_start)
+ break;
+
+ /* skip special VMA and hugetlb VMA */
+ if (vma_not_suitable_for_thp_split(vma)) {
+ addr = vma->vm_end;
+ continue;
+ }
+
+ /* FOLL_DUMP to ignore special (like zero) pages */
+ follflags = FOLL_GET | FOLL_DUMP;
+ page = follow_page(vma, addr, follflags);
+
+ if (IS_ERR(page))
+ continue;
+ if (!page)
+ continue;
+
+ if (!is_transparent_hugepage(page))
+ goto next;
+
+ total++;
+ if (!can_split_huge_page(compound_head(page), NULL))
+ goto next;
+
+ if (!trylock_page(page))
+ goto next;
+
+ if (!split_huge_page(page))
+ split++;
+
+ unlock_page(page);
+next:
+ put_page(page);
+ cond_resched();
+ }
+ mmap_read_unlock(mm);
+ mmput(mm);
+
+ pr_debug("%lu of %lu THP split\n", split, total);
+
+out:
+ return ret;
+}
+
+static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
+ pgoff_t off_end)
+{
+ struct filename *file;
+ struct file *candidate;
+ struct address_space *mapping;
+ int ret = -EINVAL;
+ pgoff_t index;
+ int nr_pages = 1;
+ unsigned long total = 0, split = 0;
+
+ file = getname_kernel(file_path);
+ if (IS_ERR(file))
+ return ret;
+
+ candidate = file_open_name(file, O_RDONLY, 0);
+ if (IS_ERR(candidate))
+ goto out;
+
+ pr_debug("split file-backed THPs in file: %s, page offset: [0x%lx - 0x%lx]\n",
+ file_path, off_start, off_end);
+
+ mapping = candidate->f_mapping;
+
+ for (index = off_start; index < off_end; index += nr_pages) {
+ struct page *fpage = pagecache_get_page(mapping, index,
+ FGP_ENTRY | FGP_HEAD, 0);
+
+ nr_pages = 1;
+ if (xa_is_value(fpage) || !fpage)
+ continue;
+
+ if (!is_transparent_hugepage(fpage))
+ goto next;
+
+ total++;
+ nr_pages = thp_nr_pages(fpage);
+
+ if (!trylock_page(fpage))
+ goto next;
+
+ if (!split_huge_page(fpage))
+ split++;
+
+ unlock_page(fpage);
+next:
+ put_page(fpage);
+ cond_resched();
+ }
+
+ filp_close(candidate, NULL);
+ ret = 0;
+
+ pr_debug("%lu of %lu file-backed THP split\n", split, total);
+out:
+ putname(file);
+ return ret;
}
-DEFINE_DEBUGFS_ATTRIBUTE(split_huge_pages_fops, NULL, split_huge_pages_set,
- "%llu\n");
+
+#define MAX_INPUT_BUF_SZ 255
+
+static ssize_t split_huge_pages_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *ppops)
+{
+ static DEFINE_MUTEX(split_debug_mutex);
+ ssize_t ret;
+ /* hold pid, start_vaddr, end_vaddr or file_path, off_start, off_end */
+ char input_buf[MAX_INPUT_BUF_SZ];
+ int pid;
+ unsigned long vaddr_start, vaddr_end;
+
+ ret = mutex_lock_interruptible(&split_debug_mutex);
+ if (ret)
+ return ret;
+
+ ret = -EFAULT;
+
+ memset(input_buf, 0, MAX_INPUT_BUF_SZ);
+ if (copy_from_user(input_buf, buf, min_t(size_t, count, MAX_INPUT_BUF_SZ)))
+ goto out;
+
+ input_buf[MAX_INPUT_BUF_SZ - 1] = '\0';
+
+ if (input_buf[0] == '/') {
+ char *tok;
+ char *buf = input_buf;
+ char file_path[MAX_INPUT_BUF_SZ];
+ pgoff_t off_start = 0, off_end = 0;
+ size_t input_len = strlen(input_buf);
+
+ tok = strsep(&buf, ",");
+ if (tok) {
+ strncpy(file_path, tok, MAX_INPUT_BUF_SZ);
+ } else {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = sscanf(buf, "0x%lx,0x%lx", &off_start, &off_end);
+ if (ret != 2) {
+ ret = -EINVAL;
+ goto out;
+ }
+ ret = split_huge_pages_in_file(file_path, off_start, off_end);
+ if (!ret)
+ ret = input_len;
+
+ goto out;
+ }
+
+ ret = sscanf(input_buf, "%d,0x%lx,0x%lx", &pid, &vaddr_start, &vaddr_end);
+ if (ret == 1 && pid == 1) {
+ split_huge_pages_all();
+ ret = strlen(input_buf);
+ goto out;
+ } else if (ret != 3) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = split_huge_pages_pid(pid, vaddr_start, vaddr_end);
+ if (!ret)
+ ret = strlen(input_buf);
+out:
+ mutex_unlock(&split_debug_mutex);
+ return ret;
+
+}
+
+static const struct file_operations split_huge_pages_fops = {
+ .owner = THIS_MODULE,
+ .write = split_huge_pages_write,
+ .llseek = no_llseek,
+};
static int __init split_huge_pages_debugfs(void)
{
#include <linux/hugetlb.h>
#include <linux/hugetlb_cgroup.h>
#include <linux/node.h>
-#include <linux/userfaultfd_k.h>
#include <linux/page_owner.h>
#include "internal.h"
return true;
}
-static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
+static inline void unlock_or_release_subpool(struct hugepage_subpool *spool,
+ unsigned long irq_flags)
{
- spin_unlock(&spool->lock);
+ spin_unlock_irqrestore(&spool->lock, irq_flags);
/* If no pages are used, and no other handles to the subpool
* remain, give up any reservations based on minimum size and
void hugepage_put_subpool(struct hugepage_subpool *spool)
{
- spin_lock(&spool->lock);
+ unsigned long flags;
+
+ spin_lock_irqsave(&spool->lock, flags);
BUG_ON(!spool->count);
spool->count--;
- unlock_or_release_subpool(spool);
+ unlock_or_release_subpool(spool, flags);
}
/*
if (!spool)
return ret;
- spin_lock(&spool->lock);
+ spin_lock_irq(&spool->lock);
if (spool->max_hpages != -1) { /* maximum size accounting */
if ((spool->used_hpages + delta) <= spool->max_hpages)
}
unlock_ret:
- spin_unlock(&spool->lock);
+ spin_unlock_irq(&spool->lock);
return ret;
}
long delta)
{
long ret = delta;
+ unsigned long flags;
if (!spool)
return delta;
- spin_lock(&spool->lock);
+ spin_lock_irqsave(&spool->lock, flags);
if (spool->max_hpages != -1) /* maximum size accounting */
spool->used_hpages -= delta;
* If hugetlbfs_put_super couldn't free spool due to an outstanding
* quota reference, free it now.
*/
- unlock_or_release_subpool(spool);
+ unlock_or_release_subpool(spool, flags);
return ret;
}
resv->region_cache_count;
/* At this point, we should have enough entries in the cache
- * for all the existings adds_in_progress. We should only be
+ * for all the existing adds_in_progress. We should only be
* needing to allocate for regions_needed.
*/
VM_BUG_ON(resv->region_cache_count < resv->adds_in_progress);
resv->adds_in_progress -= in_regions_needed;
spin_unlock(&resv->lock);
- VM_BUG_ON(add < 0);
return add;
}
{
struct hugepage_subpool *spool = subpool_inode(inode);
long rsv_adjust;
+ bool reserved = false;
rsv_adjust = hugepage_subpool_get_pages(spool, 1);
- if (rsv_adjust) {
+ if (rsv_adjust > 0) {
struct hstate *h = hstate_inode(inode);
- hugetlb_acct_memory(h, 1);
+ if (!hugetlb_acct_memory(h, 1))
+ reserved = true;
+ } else if (!rsv_adjust) {
+ reserved = true;
}
+
+ if (!reserved)
+ pr_warn("hugetlb: Huge Page Reserved count may go negative.\n");
}
/*
static void enqueue_huge_page(struct hstate *h, struct page *page)
{
int nid = page_to_nid(page);
+
+ lockdep_assert_held(&hugetlb_lock);
list_move(&page->lru, &h->hugepage_freelists[nid]);
h->free_huge_pages++;
h->free_huge_pages_node[nid]++;
static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
{
struct page *page;
- bool nocma = !!(current->flags & PF_MEMALLOC_NOCMA);
+ bool pin = !!(current->flags & PF_MEMALLOC_PIN);
+ lockdep_assert_held(&hugetlb_lock);
list_for_each_entry(page, &h->hugepage_freelists[nid], lru) {
- if (nocma && is_migrate_cma_page(page))
+ if (pin && !is_pinnable_page(page))
continue;
if (PageHWPoison(page))
}
/*
- * helper for free_pool_huge_page() - return the previously saved
+ * helper for remove_pool_huge_page() - return the previously saved
* node ["this node"] from which to free a huge page. Advance the
* next node id whether or not we find a free huge page to free so
* that the next attempt to free addresses the next node.
static struct page *alloc_gigantic_page(struct hstate *h, gfp_t gfp_mask,
int nid, nodemask_t *nodemask)
{
- unsigned long nr_pages = 1UL << huge_page_order(h);
+ unsigned long nr_pages = pages_per_huge_page(h);
if (nid == NUMA_NO_NODE)
nid = numa_mem_id();
unsigned int order) { }
#endif
+/*
+ * Remove hugetlb page from lists, and update dtor so that page appears
+ * as just a compound page. A reference is held on the page.
+ *
+ * Must be called with hugetlb lock held.
+ */
+static void remove_hugetlb_page(struct hstate *h, struct page *page,
+ bool adjust_surplus)
+{
+ int nid = page_to_nid(page);
+
+ VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page);
+ VM_BUG_ON_PAGE(hugetlb_cgroup_from_page_rsvd(page), page);
+
+ lockdep_assert_held(&hugetlb_lock);
+ if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
+ return;
+
+ list_del(&page->lru);
+
+ if (HPageFreed(page)) {
+ h->free_huge_pages--;
+ h->free_huge_pages_node[nid]--;
+ }
+ if (adjust_surplus) {
+ h->surplus_huge_pages--;
+ h->surplus_huge_pages_node[nid]--;
+ }
+
+ set_page_refcounted(page);
+ set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
+
+ h->nr_huge_pages--;
+ h->nr_huge_pages_node[nid]--;
+}
+
static void update_and_free_page(struct hstate *h, struct page *page)
{
int i;
if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
return;
- h->nr_huge_pages--;
- h->nr_huge_pages_node[page_to_nid(page)]--;
for (i = 0; i < pages_per_huge_page(h);
i++, subpage = mem_map_next(subpage, page, i)) {
subpage->flags &= ~(1 << PG_locked | 1 << PG_error |
1 << PG_active | 1 << PG_private |
1 << PG_writeback);
}
- VM_BUG_ON_PAGE(hugetlb_cgroup_from_page(page), page);
- VM_BUG_ON_PAGE(hugetlb_cgroup_from_page_rsvd(page), page);
- set_compound_page_dtor(page, NULL_COMPOUND_DTOR);
- set_page_refcounted(page);
if (hstate_is_gigantic(h)) {
- /*
- * Temporarily drop the hugetlb_lock, because
- * we might block in free_gigantic_page().
- */
- spin_unlock(&hugetlb_lock);
destroy_compound_gigantic_page(page, huge_page_order(h));
free_gigantic_page(page, huge_page_order(h));
- spin_lock(&hugetlb_lock);
} else {
__free_pages(page, huge_page_order(h));
}
}
+static void update_and_free_pages_bulk(struct hstate *h, struct list_head *list)
+{
+ struct page *page, *t_page;
+
+ list_for_each_entry_safe(page, t_page, list, lru) {
+ update_and_free_page(h, page);
+ cond_resched();
+ }
+}
+
struct hstate *size_to_hstate(unsigned long size)
{
struct hstate *h;
return NULL;
}
-static void __free_huge_page(struct page *page)
+void free_huge_page(struct page *page)
{
/*
* Can't pass hstate in here because it is called from the
int nid = page_to_nid(page);
struct hugepage_subpool *spool = hugetlb_page_subpool(page);
bool restore_reserve;
+ unsigned long flags;
VM_BUG_ON_PAGE(page_count(page), page);
VM_BUG_ON_PAGE(page_mapcount(page), page);
restore_reserve = true;
}
- spin_lock(&hugetlb_lock);
+ spin_lock_irqsave(&hugetlb_lock, flags);
ClearHPageMigratable(page);
hugetlb_cgroup_uncharge_page(hstate_index(h),
pages_per_huge_page(h), page);
h->resv_huge_pages++;
if (HPageTemporary(page)) {
- list_del(&page->lru);
- ClearHPageTemporary(page);
+ remove_hugetlb_page(h, page, false);
+ spin_unlock_irqrestore(&hugetlb_lock, flags);
update_and_free_page(h, page);
} else if (h->surplus_huge_pages_node[nid]) {
/* remove the page from active list */
- list_del(&page->lru);
+ remove_hugetlb_page(h, page, true);
+ spin_unlock_irqrestore(&hugetlb_lock, flags);
update_and_free_page(h, page);
- h->surplus_huge_pages--;
- h->surplus_huge_pages_node[nid]--;
} else {
arch_clear_hugepage_flags(page);
enqueue_huge_page(h, page);
+ spin_unlock_irqrestore(&hugetlb_lock, flags);
}
- spin_unlock(&hugetlb_lock);
}
/*
- * As free_huge_page() can be called from a non-task context, we have
- * to defer the actual freeing in a workqueue to prevent potential
- * hugetlb_lock deadlock.
- *
- * free_hpage_workfn() locklessly retrieves the linked list of pages to
- * be freed and frees them one-by-one. As the page->mapping pointer is
- * going to be cleared in __free_huge_page() anyway, it is reused as the
- * llist_node structure of a lockless linked list of huge pages to be freed.
+ * Must be called with the hugetlb lock held
*/
-static LLIST_HEAD(hpage_freelist);
-
-static void free_hpage_workfn(struct work_struct *work)
+static void __prep_account_new_huge_page(struct hstate *h, int nid)
{
- struct llist_node *node;
- struct page *page;
-
- node = llist_del_all(&hpage_freelist);
-
- while (node) {
- page = container_of((struct address_space **)node,
- struct page, mapping);
- node = node->next;
- __free_huge_page(page);
- }
-}
-static DECLARE_WORK(free_hpage_work, free_hpage_workfn);
-
-void free_huge_page(struct page *page)
-{
- /*
- * Defer freeing if in non-task context to avoid hugetlb_lock deadlock.
- */
- if (!in_task()) {
- /*
- * Only call schedule_work() if hpage_freelist is previously
- * empty. Otherwise, schedule_work() had been called but the
- * workfn hasn't retrieved the list yet.
- */
- if (llist_add((struct llist_node *)&page->mapping,
- &hpage_freelist))
- schedule_work(&free_hpage_work);
- return;
- }
-
- __free_huge_page(page);
+ lockdep_assert_held(&hugetlb_lock);
+ h->nr_huge_pages++;
+ h->nr_huge_pages_node[nid]++;
}
-static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
+static void __prep_new_huge_page(struct page *page)
{
INIT_LIST_HEAD(&page->lru);
set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
hugetlb_set_page_subpool(page, NULL);
set_hugetlb_cgroup(page, NULL);
set_hugetlb_cgroup_rsvd(page, NULL);
- spin_lock(&hugetlb_lock);
- h->nr_huge_pages++;
- h->nr_huge_pages_node[nid]++;
- ClearHPageFreed(page);
- spin_unlock(&hugetlb_lock);
+}
+
+static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
+{
+ __prep_new_huge_page(page);
+ spin_lock_irq(&hugetlb_lock);
+ __prep_account_new_huge_page(h, nid);
+ spin_unlock_irq(&hugetlb_lock);
}
static void prep_compound_gigantic_page(struct page *page, unsigned int order)
}
/*
- * Free huge page from pool from next node to free.
- * Attempt to keep persistent huge pages more or less
- * balanced over allowed nodes.
+ * Remove huge page from pool from next node to free. Attempt to keep
+ * persistent huge pages more or less balanced over allowed nodes.
+ * This routine only 'removes' the hugetlb page. The caller must make
+ * an additional call to free the page to low level allocators.
* Called with hugetlb_lock locked.
*/
-static int free_pool_huge_page(struct hstate *h, nodemask_t *nodes_allowed,
- bool acct_surplus)
+static struct page *remove_pool_huge_page(struct hstate *h,
+ nodemask_t *nodes_allowed,
+ bool acct_surplus)
{
int nr_nodes, node;
- int ret = 0;
+ struct page *page = NULL;
+ lockdep_assert_held(&hugetlb_lock);
for_each_node_mask_to_free(h, nr_nodes, node, nodes_allowed) {
/*
* If we're returning unused surplus pages, only examine
*/
if ((!acct_surplus || h->surplus_huge_pages_node[node]) &&
!list_empty(&h->hugepage_freelists[node])) {
- struct page *page =
- list_entry(h->hugepage_freelists[node].next,
+ page = list_entry(h->hugepage_freelists[node].next,
struct page, lru);
- list_del(&page->lru);
- h->free_huge_pages--;
- h->free_huge_pages_node[node]--;
- if (acct_surplus) {
- h->surplus_huge_pages--;
- h->surplus_huge_pages_node[node]--;
- }
- update_and_free_page(h, page);
- ret = 1;
+ remove_hugetlb_page(h, page, acct_surplus);
break;
}
}
- return ret;
+ return page;
}
/*
if (!PageHuge(page))
return 0;
- spin_lock(&hugetlb_lock);
+ spin_lock_irq(&hugetlb_lock);
if (!PageHuge(page)) {
rc = 0;
goto out;
if (!page_count(page)) {
struct page *head = compound_head(page);
struct hstate *h = page_hstate(head);
- int nid = page_to_nid(head);
if (h->free_huge_pages - h->resv_huge_pages == 0)
goto out;
* when it is dissolved.
*/
if (unlikely(!HPageFreed(head))) {
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irq(&hugetlb_lock);
cond_resched();
/*
SetPageHWPoison(page);
ClearPageHWPoison(head);
}
- list_del(&head->lru);
- h->free_huge_pages--;
- h->free_huge_pages_node[nid]--;
+ remove_hugetlb_page(h, page, false);
h->max_huge_pages--;
+ spin_unlock_irq(&hugetlb_lock);
update_and_free_page(h, head);
- rc = 0;
+ return 0;
}
out:
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irq(&hugetlb_lock);
return rc;
}
if (hstate_is_gigantic(h))
return NULL;
- spin_lock(&hugetlb_lock);
+ spin_lock_irq(&hugetlb_lock);
if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages)
goto out_unlock;
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irq(&hugetlb_lock);
page = alloc_fresh_huge_page(h, gfp_mask, nid, nmask, NULL);
if (!page)
return NULL;
- spin_lock(&hugetlb_lock);
+ spin_lock_irq(&hugetlb_lock);
/*
* We could have raced with the pool size change.
* Double check that and simply deallocate the new page
*/
if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
SetHPageTemporary(page);
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irq(&hugetlb_lock);
put_page(page);
return NULL;
} else {
}
out_unlock:
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irq(&hugetlb_lock);
return page;
}
struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid,
nodemask_t *nmask, gfp_t gfp_mask)
{
- spin_lock(&hugetlb_lock);
+ spin_lock_irq(&hugetlb_lock);
if (h->free_huge_pages - h->resv_huge_pages > 0) {
struct page *page;
page = dequeue_huge_page_nodemask(h, gfp_mask, preferred_nid, nmask);
if (page) {
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irq(&hugetlb_lock);
return page;
}
}
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irq(&hugetlb_lock);
return alloc_migrate_huge_page(h, gfp_mask, preferred_nid, nmask);
}
long needed, allocated;
bool alloc_ok = true;
+ lockdep_assert_held(&hugetlb_lock);
needed = (h->resv_huge_pages + delta) - h->free_huge_pages;
if (needed <= 0) {
h->resv_huge_pages += delta;
ret = -ENOMEM;
retry:
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irq(&hugetlb_lock);
for (i = 0; i < needed; i++) {
page = alloc_surplus_huge_page(h, htlb_alloc_mask(h),
NUMA_NO_NODE, NULL);
* After retaking hugetlb_lock, we need to recalculate 'needed'
* because either resv_huge_pages or free_huge_pages may have changed.
*/
- spin_lock(&hugetlb_lock);
+ spin_lock_irq(&hugetlb_lock);
needed = (h->resv_huge_pages + delta) -
(h->free_huge_pages + allocated);
if (needed > 0) {
enqueue_huge_page(h, page);
}
free:
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irq(&hugetlb_lock);
/* Free unnecessary surplus pages to the buddy allocator */
list_for_each_entry_safe(page, tmp, &surplus_list, lru)
put_page(page);
- spin_lock(&hugetlb_lock);
+ spin_lock_irq(&hugetlb_lock);
return ret;
}
* to the associated reservation map.
* 2) Free any unused surplus pages that may have been allocated to satisfy
* the reservation. As many as unused_resv_pages may be freed.
- *
- * Called with hugetlb_lock held. However, the lock could be dropped (and
- * reacquired) during calls to cond_resched_lock. Whenever dropping the lock,
- * we must make sure nobody else can claim pages we are in the process of
- * freeing. Do this by ensuring resv_huge_page always is greater than the
- * number of huge pages we plan to free when dropping the lock.
*/
static void return_unused_surplus_pages(struct hstate *h,
unsigned long unused_resv_pages)
{
unsigned long nr_pages;
+ struct page *page;
+ LIST_HEAD(page_list);
+
+ lockdep_assert_held(&hugetlb_lock);
+ /* Uncommit the reservation */
+ h->resv_huge_pages -= unused_resv_pages;
/* Cannot return gigantic pages currently */
if (hstate_is_gigantic(h))
* evenly across all nodes with memory. Iterate across these nodes
* until we can no longer free unreserved surplus pages. This occurs
* when the nodes with surplus pages have no free pages.
- * free_pool_huge_page() will balance the freed pages across the
+ * remove_pool_huge_page() will balance the freed pages across the
* on-line nodes with memory and will handle the hstate accounting.
- *
- * Note that we decrement resv_huge_pages as we free the pages. If
- * we drop the lock, resv_huge_pages will still be sufficiently large
- * to cover subsequent pages we may free.
*/
while (nr_pages--) {
- h->resv_huge_pages--;
- unused_resv_pages--;
- if (!free_pool_huge_page(h, &node_states[N_MEMORY], 1))
+ page = remove_pool_huge_page(h, &node_states[N_MEMORY], 1);
+ if (!page)
goto out;
- cond_resched_lock(&hugetlb_lock);
+
+ list_add(&page->lru, &page_list);
}
out:
- /* Fully uncommit the reservation */
- h->resv_huge_pages -= unused_resv_pages;
+ spin_unlock_irq(&hugetlb_lock);
+ update_and_free_pages_bulk(h, &page_list);
+ spin_lock_irq(&hugetlb_lock);
}
if (vma->vm_flags & VM_MAYSHARE)
return ret;
- else if (is_vma_resv_set(vma, HPAGE_RESV_OWNER) && ret >= 0) {
- /*
- * In most cases, reserves always exist for private mappings.
- * However, a file associated with mapping could have been
- * hole punched or truncated after reserves were consumed.
- * As subsequent fault on such a range will not use reserves.
- * Subtle - The reserve map for private mappings has the
- * opposite meaning than that of shared mappings. If NO
- * entry is in the reserve map, it means a reservation exists.
- * If an entry exists in the reserve map, it means the
- * reservation has already been consumed. As a result, the
- * return value of this routine is the opposite of the
- * value returned from reserve map manipulation routines above.
- */
- if (ret)
- return 0;
- else
- return 1;
- }
- else
- return ret < 0 ? ret : 0;
+ /*
+ * We know private mapping must have HPAGE_RESV_OWNER set.
+ *
+ * In most cases, reserves always exist for private mappings.
+ * However, a file associated with mapping could have been
+ * hole punched or truncated after reserves were consumed.
+ * As subsequent fault on such a range will not use reserves.
+ * Subtle - The reserve map for private mappings has the
+ * opposite meaning than that of shared mappings. If NO
+ * entry is in the reserve map, it means a reservation exists.
+ * If an entry exists in the reserve map, it means the
+ * reservation has already been consumed. As a result, the
+ * return value of this routine is the opposite of the
+ * value returned from reserve map manipulation routines above.
+ */
+ if (ret > 0)
+ return 0;
+ if (ret == 0)
+ return 1;
+ return ret;
}
static long vma_needs_reservation(struct hstate *h,
}
}
+/*
+ * alloc_and_dissolve_huge_page - Allocate a new page and dissolve the old one
+ * @h: struct hstate old page belongs to
+ * @old_page: Old page to dissolve
+ * @list: List to isolate the page in case we need to
+ * Returns 0 on success, otherwise negated error.
+ */
+static int alloc_and_dissolve_huge_page(struct hstate *h, struct page *old_page,
+ struct list_head *list)
+{
+ gfp_t gfp_mask = htlb_alloc_mask(h) | __GFP_THISNODE;
+ int nid = page_to_nid(old_page);
+ struct page *new_page;
+ int ret = 0;
+
+ /*
+ * Before dissolving the page, we need to allocate a new one for the
+ * pool to remain stable. Using alloc_buddy_huge_page() allows us to
+ * not having to deal with prep_new_huge_page() and avoids dealing of any
+ * counters. This simplifies and let us do the whole thing under the
+ * lock.
+ */
+ new_page = alloc_buddy_huge_page(h, gfp_mask, nid, NULL, NULL);
+ if (!new_page)
+ return -ENOMEM;
+
+retry:
+ spin_lock_irq(&hugetlb_lock);
+ if (!PageHuge(old_page)) {
+ /*
+ * Freed from under us. Drop new_page too.
+ */
+ goto free_new;
+ } else if (page_count(old_page)) {
+ /*
+ * Someone has grabbed the page, try to isolate it here.
+ * Fail with -EBUSY if not possible.
+ */
+ spin_unlock_irq(&hugetlb_lock);
+ if (!isolate_huge_page(old_page, list))
+ ret = -EBUSY;
+ spin_lock_irq(&hugetlb_lock);
+ goto free_new;
+ } else if (!HPageFreed(old_page)) {
+ /*
+ * Page's refcount is 0 but it has not been enqueued in the
+ * freelist yet. Race window is small, so we can succeed here if
+ * we retry.
+ */
+ spin_unlock_irq(&hugetlb_lock);
+ cond_resched();
+ goto retry;
+ } else {
+ /*
+ * Ok, old_page is still a genuine free hugepage. Remove it from
+ * the freelist and decrease the counters. These will be
+ * incremented again when calling __prep_account_new_huge_page()
+ * and enqueue_huge_page() for new_page. The counters will remain
+ * stable since this happens under the lock.
+ */
+ remove_hugetlb_page(h, old_page, false);
+
+ /*
+ * new_page needs to be initialized with the standard hugetlb
+ * state. This is normally done by prep_new_huge_page() but
+ * that takes hugetlb_lock which is already held so we need to
+ * open code it here.
+ * Reference count trick is needed because allocator gives us
+ * referenced page but the pool requires pages with 0 refcount.
+ */
+ __prep_new_huge_page(new_page);
+ __prep_account_new_huge_page(h, nid);
+ page_ref_dec(new_page);
+ enqueue_huge_page(h, new_page);
+
+ /*
+ * Pages have been replaced, we can safely free the old one.
+ */
+ spin_unlock_irq(&hugetlb_lock);
+ update_and_free_page(h, old_page);
+ }
+
+ return ret;
+
+free_new:
+ spin_unlock_irq(&hugetlb_lock);
+ __free_pages(new_page, huge_page_order(h));
+
+ return ret;
+}
+
+int isolate_or_dissolve_huge_page(struct page *page, struct list_head *list)
+{
+ struct hstate *h;
+ struct page *head;
+ int ret = -EBUSY;
+
+ /*
+ * The page might have been dissolved from under our feet, so make sure
+ * to carefully check the state under the lock.
+ * Return success when racing as if we dissolved the page ourselves.
+ */
+ spin_lock_irq(&hugetlb_lock);
+ if (PageHuge(page)) {
+ head = compound_head(page);
+ h = page_hstate(head);
+ } else {
+ spin_unlock_irq(&hugetlb_lock);
+ return 0;
+ }
+ spin_unlock_irq(&hugetlb_lock);
+
+ /*
+ * Fence off gigantic pages as there is a cyclic dependency between
+ * alloc_contig_range and them. Return -ENOMEM as this has the effect
+ * of bailing out right away without further retrying.
+ */
+ if (hstate_is_gigantic(h))
+ return -ENOMEM;
+
+ if (page_count(head) && isolate_huge_page(head, list))
+ ret = 0;
+ else if (!page_count(head))
+ ret = alloc_and_dissolve_huge_page(h, head, list);
+
+ return ret;
+}
+
struct page *alloc_huge_page(struct vm_area_struct *vma,
unsigned long addr, int avoid_reserve)
{
/* If this allocation is not consuming a reservation, charge it now.
*/
- deferred_reserve = map_chg || avoid_reserve || !vma_resv_map(vma);
+ deferred_reserve = map_chg || avoid_reserve;
if (deferred_reserve) {
ret = hugetlb_cgroup_charge_cgroup_rsvd(
idx, pages_per_huge_page(h), &h_cg);
if (ret)
goto out_uncharge_cgroup_reservation;
- spin_lock(&hugetlb_lock);
+ spin_lock_irq(&hugetlb_lock);
/*
* glb_chg is passed to indicate whether or not a page must be taken
* from the global free pool (global change). gbl_chg == 0 indicates
*/
page = dequeue_huge_page_vma(h, vma, addr, avoid_reserve, gbl_chg);
if (!page) {
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irq(&hugetlb_lock);
page = alloc_buddy_huge_page_with_mpol(h, vma, addr);
if (!page)
goto out_uncharge_cgroup;
SetHPageRestoreReserve(page);
h->resv_huge_pages--;
}
- spin_lock(&hugetlb_lock);
+ spin_lock_irq(&hugetlb_lock);
list_add(&page->lru, &h->hugepage_activelist);
/* Fall through */
}
h_cg, page);
}
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irq(&hugetlb_lock);
hugetlb_set_page_subpool(page, spool);
nodemask_t *nodes_allowed)
{
int i;
+ LIST_HEAD(page_list);
+ lockdep_assert_held(&hugetlb_lock);
if (hstate_is_gigantic(h))
return;
+ /*
+ * Collect pages to be freed on a list, and free after dropping lock
+ */
for_each_node_mask(i, *nodes_allowed) {
struct page *page, *next;
struct list_head *freel = &h->hugepage_freelists[i];
list_for_each_entry_safe(page, next, freel, lru) {
if (count >= h->nr_huge_pages)
- return;
+ goto out;
if (PageHighMem(page))
continue;
- list_del(&page->lru);
- update_and_free_page(h, page);
- h->free_huge_pages--;
- h->free_huge_pages_node[page_to_nid(page)]--;
+ remove_hugetlb_page(h, page, false);
+ list_add(&page->lru, &page_list);
}
}
+
+out:
+ spin_unlock_irq(&hugetlb_lock);
+ update_and_free_pages_bulk(h, &page_list);
+ spin_lock_irq(&hugetlb_lock);
}
#else
static inline void try_to_free_low(struct hstate *h, unsigned long count,
{
int nr_nodes, node;
+ lockdep_assert_held(&hugetlb_lock);
VM_BUG_ON(delta != -1 && delta != 1);
if (delta < 0) {
nodemask_t *nodes_allowed)
{
unsigned long min_count, ret;
+ struct page *page;
+ LIST_HEAD(page_list);
NODEMASK_ALLOC(nodemask_t, node_alloc_noretry, GFP_KERNEL);
/*
else
return -ENOMEM;
- spin_lock(&hugetlb_lock);
+ /*
+ * resize_lock mutex prevents concurrent adjustments to number of
+ * pages in hstate via the proc/sysfs interfaces.
+ */
+ mutex_lock(&h->resize_lock);
+ spin_lock_irq(&hugetlb_lock);
/*
* Check for a node specific request.
*/
if (hstate_is_gigantic(h) && !IS_ENABLED(CONFIG_CONTIG_ALLOC)) {
if (count > persistent_huge_pages(h)) {
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irq(&hugetlb_lock);
+ mutex_unlock(&h->resize_lock);
NODEMASK_FREE(node_alloc_noretry);
return -EINVAL;
}
* page, free_huge_page will handle it by freeing the page
* and reducing the surplus.
*/
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irq(&hugetlb_lock);
/* yield cpu to avoid soft lockup */
cond_resched();
ret = alloc_pool_huge_page(h, nodes_allowed,
node_alloc_noretry);
- spin_lock(&hugetlb_lock);
+ spin_lock_irq(&hugetlb_lock);
if (!ret)
goto out;
min_count = h->resv_huge_pages + h->nr_huge_pages - h->free_huge_pages;
min_count = max(count, min_count);
try_to_free_low(h, min_count, nodes_allowed);
+
+ /*
+ * Collect pages to be removed on list without dropping lock
+ */
while (min_count < persistent_huge_pages(h)) {
- if (!free_pool_huge_page(h, nodes_allowed, 0))
+ page = remove_pool_huge_page(h, nodes_allowed, 0);
+ if (!page)
break;
- cond_resched_lock(&hugetlb_lock);
+
+ list_add(&page->lru, &page_list);
}
+ /* free the pages after dropping lock */
+ spin_unlock_irq(&hugetlb_lock);
+ update_and_free_pages_bulk(h, &page_list);
+ spin_lock_irq(&hugetlb_lock);
+
while (count < persistent_huge_pages(h)) {
if (!adjust_pool_surplus(h, nodes_allowed, 1))
break;
}
out:
h->max_huge_pages = persistent_huge_pages(h);
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irq(&hugetlb_lock);
+ mutex_unlock(&h->resize_lock);
NODEMASK_FREE(node_alloc_noretry);
if (err)
return err;
- spin_lock(&hugetlb_lock);
+ spin_lock_irq(&hugetlb_lock);
h->nr_overcommit_huge_pages = input;
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irq(&hugetlb_lock);
return count;
}
BUG_ON(hugetlb_max_hstate >= HUGE_MAX_HSTATE);
BUG_ON(order == 0);
h = &hstates[hugetlb_max_hstate++];
+ mutex_init(&h->resize_lock);
h->order = order;
h->mask = ~(huge_page_size(h) - 1);
for (i = 0; i < MAX_NUMNODES; ++i)
/*
* Global state is always initialized later in hugetlb_init.
- * But we need to allocate >= MAX_ORDER hstates here early to still
+ * But we need to allocate gigantic hstates here early to still
* use the bootmem allocator.
*/
- if (hugetlb_max_hstate && parsed_hstate->order >= MAX_ORDER)
+ if (hugetlb_max_hstate && hstate_is_gigantic(parsed_hstate))
hugetlb_hstate_alloc_pages(parsed_hstate);
last_mhp = mhp;
goto out;
if (write) {
- spin_lock(&hugetlb_lock);
+ spin_lock_irq(&hugetlb_lock);
h->nr_overcommit_huge_pages = tmp;
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irq(&hugetlb_lock);
}
out:
return ret;
if (!delta)
return 0;
- spin_lock(&hugetlb_lock);
+ spin_lock_irq(&hugetlb_lock);
/*
* When cpuset is configured, it breaks the strict hugetlb page
* reservation as the accounting is done on a global variable. Such
return_unused_surplus_pages(h, (unsigned long) -delta);
out:
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irq(&hugetlb_lock);
return ret;
}
src_pte = huge_pte_offset(src, addr, sz);
if (!src_pte)
continue;
- dst_pte = huge_pte_alloc(dst, addr, sz);
+ dst_pte = huge_pte_alloc(dst, vma, addr, sz);
if (!dst_pte) {
ret = -ENOMEM;
break;
return 0;
}
+static inline vm_fault_t hugetlb_handle_userfault(struct vm_area_struct *vma,
+ struct address_space *mapping,
+ pgoff_t idx,
+ unsigned int flags,
+ unsigned long haddr,
+ unsigned long reason)
+{
+ vm_fault_t ret;
+ u32 hash;
+ struct vm_fault vmf = {
+ .vma = vma,
+ .address = haddr,
+ .flags = flags,
+
+ /*
+ * Hard to debug if it ends up being
+ * used by a callee that assumes
+ * something about the other
+ * uninitialized fields... same as in
+ * memory.c
+ */
+ };
+
+ /*
+ * hugetlb_fault_mutex and i_mmap_rwsem must be
+ * dropped before handling userfault. Reacquire
+ * after handling fault to make calling code simpler.
+ */
+ hash = hugetlb_fault_mutex_hash(mapping, idx);
+ mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+ i_mmap_unlock_read(mapping);
+ ret = handle_userfault(&vmf, reason);
+ i_mmap_lock_read(mapping);
+ mutex_lock(&hugetlb_fault_mutex_table[hash]);
+
+ return ret;
+}
+
static vm_fault_t hugetlb_no_page(struct mm_struct *mm,
struct vm_area_struct *vma,
struct address_space *mapping, pgoff_t idx,
retry:
page = find_lock_page(mapping, idx);
if (!page) {
- /*
- * Check for page in userfault range
- */
+ /* Check for page in userfault range */
if (userfaultfd_missing(vma)) {
- u32 hash;
- struct vm_fault vmf = {
- .vma = vma,
- .address = haddr,
- .flags = flags,
- /*
- * Hard to debug if it ends up being
- * used by a callee that assumes
- * something about the other
- * uninitialized fields... same as in
- * memory.c
- */
- };
-
- /*
- * hugetlb_fault_mutex and i_mmap_rwsem must be
- * dropped before handling userfault. Reacquire
- * after handling fault to make calling code simpler.
- */
- hash = hugetlb_fault_mutex_hash(mapping, idx);
- mutex_unlock(&hugetlb_fault_mutex_table[hash]);
- i_mmap_unlock_read(mapping);
- ret = handle_userfault(&vmf, VM_UFFD_MISSING);
- i_mmap_lock_read(mapping);
- mutex_lock(&hugetlb_fault_mutex_table[hash]);
+ ret = hugetlb_handle_userfault(vma, mapping, idx,
+ flags, haddr,
+ VM_UFFD_MISSING);
goto out;
}
* sure there really is no pte entry.
*/
ptl = huge_pte_lock(h, mm, ptep);
- if (!huge_pte_none(huge_ptep_get(ptep))) {
- ret = 0;
- spin_unlock(ptl);
- goto out;
- }
+ ret = 0;
+ if (huge_pte_none(huge_ptep_get(ptep)))
+ ret = vmf_error(PTR_ERR(page));
spin_unlock(ptl);
- ret = vmf_error(PTR_ERR(page));
goto out;
}
clear_huge_page(page, address, pages_per_huge_page(h));
VM_FAULT_SET_HINDEX(hstate_index(h));
goto backout_unlocked;
}
+
+ /* Check for page in userfault range. */
+ if (userfaultfd_minor(vma)) {
+ unlock_page(page);
+ put_page(page);
+ ret = hugetlb_handle_userfault(vma, mapping, idx,
+ flags, haddr,
+ VM_UFFD_MINOR);
+ goto out;
+ }
}
/*
*/
mapping = vma->vm_file->f_mapping;
i_mmap_lock_read(mapping);
- ptep = huge_pte_alloc(mm, haddr, huge_page_size(h));
+ ptep = huge_pte_alloc(mm, vma, haddr, huge_page_size(h));
if (!ptep) {
i_mmap_unlock_read(mapping);
return VM_FAULT_OOM;
return ret;
}
+#ifdef CONFIG_USERFAULTFD
/*
* Used by userfaultfd UFFDIO_COPY. Based on mcopy_atomic_pte with
* modifications for huge pages.
struct vm_area_struct *dst_vma,
unsigned long dst_addr,
unsigned long src_addr,
+ enum mcopy_atomic_mode mode,
struct page **pagep)
{
+ bool is_continue = (mode == MCOPY_ATOMIC_CONTINUE);
struct address_space *mapping;
pgoff_t idx;
unsigned long size;
spinlock_t *ptl;
int ret;
struct page *page;
+ int writable;
+
+ mapping = dst_vma->vm_file->f_mapping;
+ idx = vma_hugecache_offset(h, dst_vma, dst_addr);
- if (!*pagep) {
+ if (is_continue) {
+ ret = -EFAULT;
+ page = find_lock_page(mapping, idx);
+ if (!page)
+ goto out;
+ } else if (!*pagep) {
ret = -ENOMEM;
page = alloc_huge_page(dst_vma, dst_addr, 0);
if (IS_ERR(page))
*/
__SetPageUptodate(page);
- mapping = dst_vma->vm_file->f_mapping;
- idx = vma_hugecache_offset(h, dst_vma, dst_addr);
-
- /*
- * If shared, add to page cache
- */
- if (vm_shared) {
+ /* Add shared, newly allocated pages to the page cache. */
+ if (vm_shared && !is_continue) {
size = i_size_read(mapping->host) >> huge_page_shift(h);
ret = -EFAULT;
if (idx >= size)
hugepage_add_new_anon_rmap(page, dst_vma, dst_addr);
}
- _dst_pte = make_huge_pte(dst_vma, page, dst_vma->vm_flags & VM_WRITE);
- if (dst_vma->vm_flags & VM_WRITE)
+ /* For CONTINUE on a non-shared VMA, don't set VM_WRITE for CoW. */
+ if (is_continue && !vm_shared)
+ writable = 0;
+ else
+ writable = dst_vma->vm_flags & VM_WRITE;
+
+ _dst_pte = make_huge_pte(dst_vma, page, writable);
+ if (writable)
_dst_pte = huge_pte_mkdirty(_dst_pte);
_dst_pte = pte_mkyoung(_dst_pte);
update_mmu_cache(dst_vma, dst_addr, dst_pte);
spin_unlock(ptl);
- SetHPageMigratable(page);
- if (vm_shared)
+ if (!is_continue)
+ SetHPageMigratable(page);
+ if (vm_shared || is_continue)
unlock_page(page);
ret = 0;
out:
return ret;
out_release_unlock:
spin_unlock(ptl);
- if (vm_shared)
+ if (vm_shared || is_continue)
unlock_page(page);
out_release_nounlock:
put_page(page);
goto out;
}
+#endif /* CONFIG_USERFAULTFD */
static void record_subpages_vmas(struct page *page, struct vm_area_struct *vma,
int refs, struct page **pages,
return i ? i : err;
}
-#ifndef __HAVE_ARCH_FLUSH_HUGETLB_TLB_RANGE
-/*
- * ARCHes with special requirements for evicting HUGETLB backing TLB entries can
- * implement this.
- */
-#define flush_hugetlb_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end)
-#endif
-
unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
unsigned long address, unsigned long end, pgprot_t newprot)
{
/*
* If the subpool has a minimum size, the number of global
* reservations to be released may be adjusted.
+ *
+ * Note that !resv_map implies freed == 0. So (chg - freed)
+ * won't go negative.
*/
gbl_reserve = hugepage_subpool_put_pages(spool, (chg - freed));
hugetlb_acct_memory(h, -gbl_reserve);
return false;
}
+bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr)
+{
+#ifdef CONFIG_USERFAULTFD
+ if (uffd_disable_huge_pmd_share(vma))
+ return false;
+#endif
+ return vma_shareable(vma, addr);
+}
+
/*
* Determine if start,end range within vma could be mapped by shared pmd.
* If yes, adjust start and end to cover range associated with possible
v_end = ALIGN_DOWN(vma->vm_end, PUD_SIZE);
/*
- * vma need span at least one aligned PUD size and the start,end range
- * must at least partialy within it.
+ * vma needs to span at least one aligned PUD size, and the range
+ * must be at least partially within in.
*/
if (!(vma->vm_flags & VM_MAYSHARE) || !(v_end > v_start) ||
(*end <= v_start) || (*start >= v_end))
* if !vma_shareable check at the beginning of the routine. i_mmap_rwsem is
* only required for subsequent processing.
*/
-pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
+pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long addr, pud_t *pud)
{
- struct vm_area_struct *vma = find_vma(mm, addr);
struct address_space *mapping = vma->vm_file->f_mapping;
pgoff_t idx = ((addr - vma->vm_start) >> PAGE_SHIFT) +
vma->vm_pgoff;
pte_t *pte;
spinlock_t *ptl;
- if (!vma_shareable(vma, addr))
- return (pte_t *)pmd_alloc(mm, pud, addr);
-
i_mmap_assert_locked(mapping);
vma_interval_tree_foreach(svma, &mapping->i_mmap, idx, idx) {
if (svma == vma)
*addr = ALIGN(*addr, HPAGE_SIZE * PTRS_PER_PTE) - HPAGE_SIZE;
return 1;
}
-#define want_pmd_share() (1)
+
#else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
-pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud)
+pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long addr, pud_t *pud)
{
return NULL;
}
unsigned long *start, unsigned long *end)
{
}
-#define want_pmd_share() (0)
+
+bool want_pmd_share(struct vm_area_struct *vma, unsigned long addr)
+{
+ return false;
+}
#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */
#ifdef CONFIG_ARCH_WANT_GENERAL_HUGETLB
-pte_t *huge_pte_alloc(struct mm_struct *mm,
+pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long addr, unsigned long sz)
{
pgd_t *pgd;
pte = (pte_t *)pud;
} else {
BUG_ON(sz != PMD_SIZE);
- if (want_pmd_share() && pud_none(*pud))
- pte = huge_pmd_share(mm, addr, pud);
+ if (want_pmd_share(vma, addr) && pud_none(*pud))
+ pte = huge_pmd_share(mm, vma, addr, pud);
else
pte = (pte_t *)pmd_alloc(mm, pud, addr);
}
{
bool ret = true;
- spin_lock(&hugetlb_lock);
+ spin_lock_irq(&hugetlb_lock);
if (!PageHeadHuge(page) ||
!HPageMigratable(page) ||
!get_page_unless_zero(page)) {
ClearHPageMigratable(page);
list_move_tail(&page->lru, list);
unlock:
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irq(&hugetlb_lock);
return ret;
}
void putback_active_hugepage(struct page *page)
{
- spin_lock(&hugetlb_lock);
+ spin_lock_irq(&hugetlb_lock);
SetHPageMigratable(page);
list_move_tail(&page->lru, &(page_hstate(page))->hugepage_activelist);
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irq(&hugetlb_lock);
put_page(page);
}
SetHPageTemporary(oldpage);
ClearHPageTemporary(newpage);
- spin_lock(&hugetlb_lock);
+ /*
+ * There is no need to transfer the per-node surplus state
+ * when we do not cross the node.
+ */
+ if (new_nid == old_nid)
+ return;
+ spin_lock_irq(&hugetlb_lock);
if (h->surplus_huge_pages_node[old_nid]) {
h->surplus_huge_pages_node[old_nid]--;
h->surplus_huge_pages_node[new_nid]++;
}
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irq(&hugetlb_lock);
+ }
+}
+
+/*
+ * This function will unconditionally remove all the shared pmd pgtable entries
+ * within the specific vma for a hugetlbfs memory range.
+ */
+void hugetlb_unshare_all_pmds(struct vm_area_struct *vma)
+{
+ struct hstate *h = hstate_vma(vma);
+ unsigned long sz = huge_page_size(h);
+ struct mm_struct *mm = vma->vm_mm;
+ struct mmu_notifier_range range;
+ unsigned long address, start, end;
+ spinlock_t *ptl;
+ pte_t *ptep;
+
+ if (!(vma->vm_flags & VM_MAYSHARE))
+ return;
+
+ start = ALIGN(vma->vm_start, PUD_SIZE);
+ end = ALIGN_DOWN(vma->vm_end, PUD_SIZE);
+
+ if (start >= end)
+ return;
+
+ /*
+ * No need to call adjust_range_if_pmd_sharing_possible(), because
+ * we have already done the PUD_SIZE alignment.
+ */
+ mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, mm,
+ start, end);
+ mmu_notifier_invalidate_range_start(&range);
+ i_mmap_lock_write(vma->vm_file->f_mapping);
+ for (address = start; address < end; address += PUD_SIZE) {
+ unsigned long tmp = address;
+
+ ptep = huge_pte_offset(mm, address, sz);
+ if (!ptep)
+ continue;
+ ptl = huge_pte_lock(h, mm, ptep);
+ /* We don't want 'address' to be changed */
+ huge_pmd_unshare(mm, vma, &tmp, ptep);
+ spin_unlock(ptl);
}
+ flush_hugetlb_tlb_range(vma, start, end);
+ i_mmap_unlock_write(vma->vm_file->f_mapping);
+ /*
+ * No need to call mmu_notifier_invalidate_range(), see
+ * Documentation/vm/mmu_notifier.rst.
+ */
+ mmu_notifier_invalidate_range_end(&range);
}
#ifdef CONFIG_CMA
do {
idx = 0;
for_each_hstate(h) {
- spin_lock(&hugetlb_lock);
+ spin_lock_irq(&hugetlb_lock);
list_for_each_entry(page, &h->hugepage_activelist, lru)
hugetlb_cgroup_move_parent(idx, h_cg, page);
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irq(&hugetlb_lock);
idx++;
}
cond_resched();
if (hugetlb_cgroup_disabled())
return;
- VM_BUG_ON_PAGE(!PageHuge(oldhpage), oldhpage);
- spin_lock(&hugetlb_lock);
+ spin_lock_irq(&hugetlb_lock);
h_cg = hugetlb_cgroup_from_page(oldhpage);
h_cg_rsvd = hugetlb_cgroup_from_page_rsvd(oldhpage);
set_hugetlb_cgroup(oldhpage, NULL);
set_hugetlb_cgroup(newhpage, h_cg);
set_hugetlb_cgroup_rsvd(newhpage, h_cg_rsvd);
list_move(&newhpage->lru, &h->hugepage_activelist);
- spin_unlock(&hugetlb_lock);
+ spin_unlock_irq(&hugetlb_lock);
return;
}
unsigned int nr_freepages; /* Number of isolated free pages */
unsigned int nr_migratepages; /* Number of pages to migrate */
unsigned long free_pfn; /* isolate_freepages search base */
- unsigned long migrate_pfn; /* isolate_migratepages search base */
+ /*
+ * Acts as an in/out parameter to page isolation for migration.
+ * isolate_migratepages uses it as a search base.
+ * isolate_migratepages_block will update the value to the next pfn
+ * after the last isolated one.
+ */
+ unsigned long migrate_pfn;
unsigned long fast_start_pfn; /* a pfn to start linear scan from */
struct zone *zone;
unsigned long total_migrate_scanned;
unsigned long
isolate_freepages_range(struct compact_control *cc,
unsigned long start_pfn, unsigned long end_pfn);
-unsigned long
+int
isolate_migratepages_range(struct compact_control *cc,
unsigned long low_pfn, unsigned long end_pfn);
int find_suitable_fallback(struct free_area *area, unsigned int order,
}
/*
- * Stack area - atomatically grows in one direction
+ * Stack area - automatically grows in one direction
*
* VM_GROWSUP / VM_GROWSDOWN VMAs are always private anonymous:
* do_mmap() forbids all other combinations.
#define KASAN_TAG_MAX 0xFD /* maximum value for random tags */
#ifdef CONFIG_KASAN_HW_TAGS
-#define KASAN_TAG_MIN 0xF0 /* mimimum value for random tags */
+#define KASAN_TAG_MIN 0xF0 /* minimum value for random tags */
#else
-#define KASAN_TAG_MIN 0x00 /* mimimum value for random tags */
+#define KASAN_TAG_MIN 0x00 /* minimum value for random tags */
#endif
#ifdef CONFIG_KASAN_GENERIC
#else /* CONFIG_KASAN_HW_TAGS */
/**
- * kasan_poison - mark the memory range as unaccessible
+ * kasan_poison - mark the memory range as inaccessible
* @addr - range start address, must be aligned to KASAN_GRANULE_SIZE
* @size - range size, must be aligned to KASAN_GRANULE_SIZE
* @value - value that's written to metadata for the range
/**
* kasan_poison_last_granule - mark the last granule of the memory range as
- * unaccessible
+ * inaccessible
* @addr - range start address, must be aligned to KASAN_GRANULE_SIZE
* @size - range size
*
/* Data structure and operations for quarantine queues. */
/*
- * Each queue is a signle-linked list, which also stores the total size of
+ * Each queue is a single-linked list, which also stores the total size of
* objects inside of it.
*/
struct qlist_head {
local_irq_save(flags);
/*
- * As the object now gets freed from the quaratine, assume that its
+ * As the object now gets freed from the quarantine, assume that its
* free track is no longer valid.
*/
*(u8 *)kasan_mem_to_shadow(object) = KASAN_KMALLOC_FREE;
* // rest of vmalloc process <data dependency>
* STORE p, a LOAD shadow(x+99)
*
- * If there is no barrier between the end of unpoisioning the shadow
+ * If there is no barrier between the end of unpoisoning the shadow
* and the store of the result to p, the stores could be committed
* in a different order by CPU#0, and CPU#1 could erroneously observe
* poison in the shadow.
* How does this work?
* -------------------
*
- * We have a region that is page aligned, labelled as A.
+ * We have a region that is page aligned, labeled as A.
* That might not map onto the shadow in a way that is page-aligned:
*
* start end
#include <linux/atomic.h>
#include <linux/bug.h>
#include <linux/debugfs.h>
+#include <linux/irq_work.h>
#include <linux/kcsan-checks.h>
#include <linux/kfence.h>
#include <linux/kmemleak.h>
#include <linux/moduleparam.h>
#include <linux/random.h>
#include <linux/rcupdate.h>
+#include <linux/sched/sysctl.h>
#include <linux/seq_file.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
/* Restore page protection if there was an OOB access. */
if (meta->unprotected_page) {
+ memzero_explicit((void *)ALIGN_DOWN(meta->unprotected_page, PAGE_SIZE), PAGE_SIZE);
kfence_protect(meta->unprotected_page);
meta->unprotected_page = 0;
}
/* === Allocation Gate Timer ================================================ */
+#ifdef CONFIG_KFENCE_STATIC_KEYS
+/* Wait queue to wake up allocation-gate timer task. */
+static DECLARE_WAIT_QUEUE_HEAD(allocation_wait);
+
+static void wake_up_kfence_timer(struct irq_work *work)
+{
+ wake_up(&allocation_wait);
+}
+static DEFINE_IRQ_WORK(wake_up_kfence_timer_work, wake_up_kfence_timer);
+#endif
+
/*
* Set up delayed work, which will enable and disable the static key. We need to
* use a work queue (rather than a simple timer), since enabling and disabling a
if (!READ_ONCE(kfence_enabled))
return;
- /* Enable static key, and await allocation to happen. */
atomic_set(&kfence_allocation_gate, 0);
#ifdef CONFIG_KFENCE_STATIC_KEYS
+ /* Enable static key, and await allocation to happen. */
static_branch_enable(&kfence_allocation_key);
- /*
- * Await an allocation. Timeout after 1 second, in case the kernel stops
- * doing allocations, to avoid stalling this worker task for too long.
- */
- {
- unsigned long end_wait = jiffies + HZ;
-
- do {
- set_current_state(TASK_UNINTERRUPTIBLE);
- if (atomic_read(&kfence_allocation_gate) != 0)
- break;
- schedule_timeout(1);
- } while (time_before(jiffies, end_wait));
- __set_current_state(TASK_RUNNING);
+
+ if (sysctl_hung_task_timeout_secs) {
+ /*
+ * During low activity with no allocations we might wait a
+ * while; let's avoid the hung task warning.
+ */
+ wait_event_timeout(allocation_wait, atomic_read(&kfence_allocation_gate),
+ sysctl_hung_task_timeout_secs * HZ / 2);
+ } else {
+ wait_event(allocation_wait, atomic_read(&kfence_allocation_gate));
}
+
/* Disable static key and reset timer. */
static_branch_disable(&kfence_allocation_key);
#endif
- schedule_delayed_work(&kfence_timer, msecs_to_jiffies(kfence_sample_interval));
+ queue_delayed_work(system_power_efficient_wq, &kfence_timer,
+ msecs_to_jiffies(kfence_sample_interval));
}
static DECLARE_DELAYED_WORK(kfence_timer, toggle_allocation_gate);
}
WRITE_ONCE(kfence_enabled, true);
- schedule_delayed_work(&kfence_timer, 0);
+ queue_delayed_work(system_power_efficient_wq, &kfence_timer, 0);
pr_info("initialized - using %lu bytes for %d objects at 0x%p-0x%p\n", KFENCE_POOL_SIZE,
CONFIG_KFENCE_NUM_OBJECTS, (void *)__kfence_pool,
(void *)(__kfence_pool + KFENCE_POOL_SIZE));
*/
if (atomic_read(&kfence_allocation_gate) || atomic_inc_return(&kfence_allocation_gate) > 1)
return NULL;
+#ifdef CONFIG_KFENCE_STATIC_KEYS
+ /*
+ * waitqueue_active() is fully ordered after the update of
+ * kfence_allocation_gate per atomic_inc_return().
+ */
+ if (waitqueue_active(&allocation_wait)) {
+ /*
+ * Calling wake_up() here may deadlock when allocations happen
+ * from within timer code. Use an irq_work to defer it.
+ */
+ irq_work_queue(&wake_up_kfence_timer_work);
+ }
+#endif
if (!READ_ONCE(kfence_enabled))
return NULL;
if (panic_on_warn)
panic("panic_on_warn set ...\n");
- /* We encountered a memory unsafety error, taint the kernel! */
+ /* We encountered a memory safety error, taint the kernel! */
add_taint(TAINT_BAD_PAGE, LOCKDEP_STILL_OK);
}
return -ENOMEM;
/* __khugepaged_exit() must not run from under us */
- VM_BUG_ON_MM(atomic_read(&mm->mm_users) == 0, mm);
+ VM_BUG_ON_MM(khugepaged_test_exit(mm), mm);
if (unlikely(test_and_set_bit(MMF_VM_HUGEPAGE, &mm->flags))) {
free_mm_slot(mm_slot);
return 0;
*
* The page table that maps the page has been already unlinked
* from the page table tree and this process cannot get
- * an additinal pin on the page.
+ * an additional pin on the page.
*
* New pins can come later if the page is shared across fork,
* but not from this process. The other process cannot write to
if (pte_write(pteval))
writable = true;
}
- if (likely(writable)) {
- if (likely(referenced)) {
- result = SCAN_SUCCEED;
- trace_mm_collapse_huge_page_isolate(page, none_or_zero,
- referenced, writable, result);
- return 1;
- }
- } else {
+
+ if (unlikely(!writable)) {
result = SCAN_PAGE_RO;
+ } else if (unlikely(!referenced)) {
+ result = SCAN_LACK_REFERENCED_PAGE;
+ } else {
+ result = SCAN_SUCCEED;
+ trace_mm_collapse_huge_page_isolate(page, none_or_zero,
+ referenced, writable, result);
+ return 1;
}
-
out:
release_pte_pages(pte, _pte, compound_pagelist);
trace_mm_collapse_huge_page_isolate(page, none_or_zero,
* If node_reclaim_mode is disabled, then no extra effort is made to
* allocate memory locally.
*/
- if (!node_reclaim_mode)
+ if (!node_reclaim_enabled())
return false;
/* If there is a count for this node already, it must be acceptable */
mmap_write_lock(mm);
result = hugepage_vma_revalidate(mm, address, &vma);
if (result)
- goto out;
+ goto out_up_write;
/* check if the pmd is still valid */
if (mm_find_pmd(mm, address) != pmd)
- goto out;
+ goto out_up_write;
anon_vma_lock_write(vma->anon_vma);
spin_unlock(pmd_ptl);
anon_vma_unlock_write(vma->anon_vma);
result = SCAN_FAIL;
- goto out;
+ goto out_up_write;
}
/*
__collapse_huge_page_copy(pte, new_page, vma, address, pte_ptl,
&compound_pagelist);
pte_unmap(pte);
+ /*
+ * spin_lock() below is not the equivalent of smp_wmb(), but
+ * the smp_wmb() inside __SetPageUptodate() can be reused to
+ * avoid the copy_huge_page writes to become visible after
+ * the set_pmd_at() write.
+ */
__SetPageUptodate(new_page);
pgtable = pmd_pgtable(_pmd);
_pmd = mk_huge_pmd(new_page, vma->vm_page_prot);
_pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma);
- /*
- * spin_lock() below is not the equivalent of smp_wmb(), so
- * this is needed to avoid the copy_huge_page writes to become
- * visible after the set_pmd_at() write.
- */
- smp_wmb();
-
spin_lock(pmd_ptl);
BUG_ON(!pmd_none(*pmd));
page_add_new_anon_rmap(new_page, vma, address, true);
mem_cgroup_uncharge(*hpage);
trace_mm_collapse_huge_page(mm, isolated, result);
return;
-out:
- goto out_up_write;
}
static int khugepaged_scan_pmd(struct mm_struct *mm,
goto out_unmap;
}
}
- if (!pte_present(pteval)) {
- result = SCAN_PTE_NON_PRESENT;
- goto out_unmap;
- }
if (pte_uffd_wp(pteval)) {
/*
* Don't collapse the page if any of the small
int i;
if (!vma || !vma->vm_file ||
- vma->vm_start > haddr || vma->vm_end < haddr + HPAGE_PMD_SIZE)
+ !range_in_vma(vma, haddr, haddr + HPAGE_PMD_SIZE))
return;
/*
goto drop_hpage;
}
-static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
+static void khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
{
struct mm_struct *mm = mm_slot->mm;
int i;
if (likely(mm_slot->nr_pte_mapped_thp == 0))
- return 0;
+ return;
if (!mmap_write_trylock(mm))
- return -EBUSY;
+ return;
if (unlikely(khugepaged_test_exit(mm)))
goto out;
out:
mm_slot->nr_pte_mapped_thp = 0;
mmap_write_unlock(mm);
- return 0;
}
static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff)
BUILD_BUG();
}
-static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
+static void khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot)
{
- return 0;
}
#endif
{
struct page *hpage = NULL;
unsigned int progress = 0, pass_through_head = 0;
- unsigned int pages = khugepaged_pages_to_scan;
+ unsigned int pages = READ_ONCE(khugepaged_pages_to_scan);
bool wait = true;
- barrier(); /* write khugepaged_pages_to_scan to local stack */
-
lru_add_drain_all();
while (progress < pages) {
#define SEQNR_MASK 0x0ff /* low bits of unstable tree seqnr */
#define UNSTABLE_FLAG 0x100 /* is a node of the unstable tree */
#define STABLE_FLAG 0x200 /* is listed from the stable tree */
-#define KSM_FLAG_MASK (SEQNR_MASK|UNSTABLE_FLAG|STABLE_FLAG)
- /* to mask all the flags */
/* The stable and unstable tree heads */
static struct rb_root one_stable_tree[1] = { RB_ROOT };
* but taking great care only to touch a ksm page, in a VM_MERGEABLE vma,
* in case the application has unmapped and remapped mm,addr meanwhile.
* Could a ksm page appear anywhere else? Actually yes, in a VM_PFNMAP
- * mmap of /dev/mem or /dev/kmem, where we would not want to touch it.
+ * mmap of /dev/mem, where we would not want to touch it.
*
* FAULT_FLAG/FOLL_REMOTE are because we do this outside the context
* of the process that owns 'vma'. We also do not want to enforce
struct page *page;
stable_node = rmap_item->head;
- page = get_ksm_page(stable_node, GET_KSM_PAGE_LOCK);
+ page = get_ksm_page(stable_node, GET_KSM_PAGE_NOLOCK);
if (!page)
goto out;
hlist_del(&rmap_item->hlist);
- unlock_page(page);
put_page(page);
if (!hlist_empty(&stable_node->hlist))
stable_node->rmap_hlist_len--;
put_anon_vma(rmap_item->anon_vma);
+ rmap_item->head = NULL;
rmap_item->address &= PAGE_MASK;
} else if (rmap_item->address & UNSTABLE_FLAG) {
cond_resched(); /* we're called from many long loops */
}
-static void remove_trailing_rmap_items(struct mm_slot *mm_slot,
- struct rmap_item **rmap_list)
+static void remove_trailing_rmap_items(struct rmap_item **rmap_list)
{
while (*rmap_list) {
struct rmap_item *rmap_item = *rmap_list;
goto error;
}
- remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list);
+ remove_trailing_rmap_items(&mm_slot->rmap_list);
mmap_read_unlock(mm);
spin_lock(&ksm_mmlist_lock);
/*
* Ok this is tricky, when get_user_pages_fast() run it doesn't
* take any lock, therefore the check that we are going to make
- * with the pagecount against the mapcount is racey and
+ * with the pagecount against the mapcount is racy and
* O_DIRECT can happen right after the check.
* So we clear the pte and flush the tlb before the check
* this assure us that no O_DIRECT can happen after the check
*/
*_stable_node = found;
/*
- * Just for robustneess as stable_node is
+ * Just for robustness, as stable_node is
* otherwise left as a stable pointer, the
* compiler shall optimize it away at build
* time.
* stable_node_dup is the dup to replace.
*/
if (stable_node_dup == stable_node) {
- VM_BUG_ON(is_stable_node_chain(stable_node_dup));
VM_BUG_ON(is_stable_node_dup(stable_node_dup));
/* chain is missing so create it */
stable_node = alloc_stable_node_chain(stable_node_dup,
* of the current nid for this page
* content.
*/
- VM_BUG_ON(!is_stable_node_chain(stable_node));
VM_BUG_ON(!is_stable_node_dup(stable_node_dup));
VM_BUG_ON(page_node->head != &migrate_nodes);
list_del(&page_node->list);
* Nuke all the rmap_items that are above this current rmap:
* because there were no VM_MERGEABLE vmas with such addresses.
*/
- remove_trailing_rmap_items(slot, ksm_scan.rmap_list);
+ remove_trailing_rmap_items(ksm_scan.rmap_list);
spin_lock(&ksm_mmlist_lock);
ksm_scan.mm_slot = list_entry(slot->mm_list.next,
vma = vmac->vma;
/* Ignore the stable/unstable/sqnr flags */
- addr = rmap_item->address & ~KSM_FLAG_MASK;
+ addr = rmap_item->address & PAGE_MASK;
if (addr < vma->vm_start || addr >= vma->vm_end)
continue;
list_add_tail(item, &l->list);
/* Set shrinker bit if the first element was added */
if (!l->nr_items++)
- memcg_set_shrinker_bit(memcg, nid,
- lru_shrinker_id(lru));
+ set_shrinker_bit(memcg, nid,
+ lru_shrinker_id(lru));
nlru->nr_items++;
spin_unlock(&nlru->lock);
return true;
if (src->nr_items) {
dst->nr_items += src->nr_items;
- memcg_set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
+ set_shrinker_bit(dst_memcg, nid, lru_shrinker_id(lru));
src->nr_items = 0;
}
if (end > vma->vm_end) {
/*
* Don't fail if end > vma->vm_end. If the old
- * vma was splitted while the mmap_lock was
+ * vma was split while the mmap_lock was
* released the effect of the concurrent
* operation may not cause madvise() to
* have an undefined result. There may be an
* MADV_DODUMP - cancel MADV_DONTDUMP: no longer exclude from core dump.
* MADV_COLD - the application is not expected to use this memory soon,
* deactivate pages in this range so that they can be reclaimed
- * easily if memory pressure hanppens.
+ * easily if memory pressure happens.
* MADV_PAGEOUT - the application is not expected to use this memory soon,
* page out the pages in this range immediately.
*
#define MEMFILE_PRIVATE(x, val) ((x) << 16 | (val))
#define MEMFILE_TYPE(val) ((val) >> 16 & 0xffff)
#define MEMFILE_ATTR(val) ((val) & 0xffff)
-/* Used for OOM nofiier */
+/* Used for OOM notifier */
#define OOM_CONTROL (0)
/*
EXPORT_SYMBOL(memcg_kmem_enabled_key);
#endif
-static int memcg_shrinker_map_size;
-static DEFINE_MUTEX(memcg_shrinker_map_mutex);
-
-static void memcg_free_shrinker_map_rcu(struct rcu_head *head)
-{
- kvfree(container_of(head, struct memcg_shrinker_map, rcu));
-}
-
-static int memcg_expand_one_shrinker_map(struct mem_cgroup *memcg,
- int size, int old_size)
-{
- struct memcg_shrinker_map *new, *old;
- struct mem_cgroup_per_node *pn;
- int nid;
-
- lockdep_assert_held(&memcg_shrinker_map_mutex);
-
- for_each_node(nid) {
- pn = memcg->nodeinfo[nid];
- old = rcu_dereference_protected(pn->shrinker_map, true);
- /* Not yet online memcg */
- if (!old)
- return 0;
-
- new = kvmalloc_node(sizeof(*new) + size, GFP_KERNEL, nid);
- if (!new)
- return -ENOMEM;
-
- /* Set all old bits, clear all new bits */
- memset(new->map, (int)0xff, old_size);
- memset((void *)new->map + old_size, 0, size - old_size);
-
- rcu_assign_pointer(pn->shrinker_map, new);
- call_rcu(&old->rcu, memcg_free_shrinker_map_rcu);
- }
-
- return 0;
-}
-
-static void memcg_free_shrinker_maps(struct mem_cgroup *memcg)
-{
- struct mem_cgroup_per_node *pn;
- struct memcg_shrinker_map *map;
- int nid;
-
- if (mem_cgroup_is_root(memcg))
- return;
-
- for_each_node(nid) {
- pn = memcg->nodeinfo[nid];
- map = rcu_dereference_protected(pn->shrinker_map, true);
- kvfree(map);
- rcu_assign_pointer(pn->shrinker_map, NULL);
- }
-}
-
-static int memcg_alloc_shrinker_maps(struct mem_cgroup *memcg)
-{
- struct memcg_shrinker_map *map;
- int nid, size, ret = 0;
-
- if (mem_cgroup_is_root(memcg))
- return 0;
-
- mutex_lock(&memcg_shrinker_map_mutex);
- size = memcg_shrinker_map_size;
- for_each_node(nid) {
- map = kvzalloc_node(sizeof(*map) + size, GFP_KERNEL, nid);
- if (!map) {
- memcg_free_shrinker_maps(memcg);
- ret = -ENOMEM;
- break;
- }
- rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_map, map);
- }
- mutex_unlock(&memcg_shrinker_map_mutex);
-
- return ret;
-}
-
-int memcg_expand_shrinker_maps(int new_id)
-{
- int size, old_size, ret = 0;
- struct mem_cgroup *memcg;
-
- size = DIV_ROUND_UP(new_id + 1, BITS_PER_LONG) * sizeof(unsigned long);
- old_size = memcg_shrinker_map_size;
- if (size <= old_size)
- return 0;
-
- mutex_lock(&memcg_shrinker_map_mutex);
- if (!root_mem_cgroup)
- goto unlock;
-
- for_each_mem_cgroup(memcg) {
- if (mem_cgroup_is_root(memcg))
- continue;
- ret = memcg_expand_one_shrinker_map(memcg, size, old_size);
- if (ret) {
- mem_cgroup_iter_break(NULL, memcg);
- goto unlock;
- }
- }
-unlock:
- if (!ret)
- memcg_shrinker_map_size = size;
- mutex_unlock(&memcg_shrinker_map_mutex);
- return ret;
-}
-
-void memcg_set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id)
-{
- if (shrinker_id >= 0 && memcg && !mem_cgroup_is_root(memcg)) {
- struct memcg_shrinker_map *map;
-
- rcu_read_lock();
- map = rcu_dereference(memcg->nodeinfo[nid]->shrinker_map);
- /* Pairs with smp mb in shrink_slab() */
- smp_mb__before_atomic();
- set_bit(shrinker_id, map->map);
- rcu_read_unlock();
- }
-}
-
/**
* mem_cgroup_css_from_page - css of the memcg associated with a page
* @page: page of interest
* __count_memcg_events - account VM events in a cgroup
* @memcg: the memory cgroup
* @idx: the event item
- * @count: the number of events that occured
+ * @count: the number of events that occurred
*/
void __count_memcg_events(struct mem_cgroup *memcg, enum vm_event_item idx,
unsigned long count)
rcu_read_lock();
do {
/*
- * Page cache insertions can happen withou an
+ * Page cache insertions can happen without an
* actual mm context, e.g. during disk probing
* on boot, loopback IO, acct() writes etc.
*/
struct mem_cgroup *iter;
/*
- * Be careful about under_oom underflows becase a child memcg
+ * Be careful about under_oom underflows because a child memcg
* could have been added after mem_cgroup_mark_under_oom.
*/
spin_lock(&memcg_oom_lock);
/*
* There is no guarantee that an OOM-lock contender
* sees the wakeups triggered by the OOM kill
- * uncharges. Wake any sleepers explicitely.
+ * uncharges. Wake any sleepers explicitly.
*/
memcg_oom_recover(memcg);
}
* Foreign dirty flushing
*
* There's an inherent mismatch between memcg and writeback. The former
- * trackes ownership per-page while the latter per-inode. This was a
+ * tracks ownership per-page while the latter per-inode. This was a
* deliberate design decision because honoring per-page ownership in the
* writeback path is complicated, may lead to higher CPU and IO overheads
* and deemed unnecessary given that write-sharing an inode across
* triggering background writeback. A will be slowed down without a way to
* make writeback of the dirty pages happen.
*
- * Conditions like the above can lead to a cgroup getting repatedly and
+ * Conditions like the above can lead to a cgroup getting repeatedly and
* severely throttled after making some progress after each
- * dirty_expire_interval while the underyling IO device is almost
+ * dirty_expire_interval while the underlying IO device is almost
* completely idle.
*
* Solving this problem completely requires matching the ownership tracking
struct mem_cgroup *memcg = mem_cgroup_from_css(css);
/*
- * A memcg must be visible for memcg_expand_shrinker_maps()
+ * A memcg must be visible for expand_shrinker_info()
* by the time the maps are allocated. So, we allocate maps
* here, when for_each_mem_cgroup() can't skip it.
*/
- if (memcg_alloc_shrinker_maps(memcg)) {
+ if (alloc_shrinker_info(memcg)) {
mem_cgroup_id_remove(memcg);
return -ENOMEM;
}
page_counter_set_low(&memcg->memory, 0);
memcg_offline_kmem(memcg);
+ reparent_shrinker_deferred(memcg);
wb_memcg_offline(memcg);
drain_all_stock(memcg);
vmpressure_cleanup(&memcg->vmpressure);
cancel_work_sync(&memcg->high_work);
mem_cgroup_remove_from_trees(memcg);
- memcg_free_shrinker_maps(memcg);
+ free_shrinker_info(memcg);
memcg_free_kmem(memcg);
mem_cgroup_free(memcg);
}
return 0;
/*
- * We are now commited to this value whatever it is. Changes in this
+ * We are now committed to this value whatever it is. Changes in this
* tunable will only affect upcoming migrations, not the current one.
* So we need to save it, and keep it going.
*/
if (dissolve_free_huge_page(page) || !take_page_off_buddy(page))
/*
* We could fail to take off the target page from buddy
- * for example due to racy page allocaiton, but that's
+ * for example due to racy page allocation, but that's
* acceptable because soft-offlined page is not broken
* and if someone really want to use it, they should
* take it.
}
- delayacct_set_flag(DELAYACCT_PF_SWAPIN);
+ delayacct_set_flag(current, DELAYACCT_PF_SWAPIN);
page = lookup_swap_cache(entry, vma, vmf->address);
swapcache = page;
vmf->address, &vmf->ptl);
if (likely(pte_same(*vmf->pte, vmf->orig_pte)))
ret = VM_FAULT_OOM;
- delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+ delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN);
goto unlock;
}
* owner processes (which may be unknown at hwpoison time)
*/
ret = VM_FAULT_HWPOISON;
- delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+ delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN);
goto out_release;
}
locked = lock_page_or_retry(page, vma->vm_mm, vmf->flags);
- delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
+ delayacct_clear_flag(current, DELAYACCT_PF_SWAPIN);
if (!locked) {
ret |= VM_FAULT_RETRY;
goto out_release;
return ret;
/*
- * Archs like ppc64 need additonal space to store information
+ * Archs like ppc64 need additional space to store information
* related to pte entry. Use the preallocated table for that.
*/
if (arch_needs_pgtable_deposit() && !vmf->prealloc_pte) {
}
/**
- * mm_account_fault - Do page fault accountings
+ * mm_account_fault - Do page fault accounting
*
* @regs: the pt_regs struct pointer. When set to NULL, will skip accounting
* of perf event counters, but we'll still do the per-task accounting to
* @flags: the fault flags.
* @ret: the fault retcode.
*
- * This will take care of most of the page fault accountings. Meanwhile, it
+ * This will take care of most of the page fault accounting. Meanwhile, it
* will also include the PERF_COUNT_SW_PAGE_FAULTS_[MAJ|MIN] perf counter
- * updates. However note that the handling of PERF_COUNT_SW_PAGE_FAULTS should
+ * updates. However, note that the handling of PERF_COUNT_SW_PAGE_FAULTS should
* still be in per-arch page fault handlers at the entry of page fault.
*/
static inline void mm_account_fault(struct pt_regs *regs,
/**
* generic_access_phys - generic implementation for iomem mmap access
* @vma: the vma to access
- * @addr: userspace addres, not relative offset within @vma
+ * @addr: userspace address, not relative offset within @vma
* @buf: buffer to read/write
* @len: length of transfer
* @write: set to FOLL_WRITE when writing, otherwise reading
#include "internal.h"
#include "shuffle.h"
+
+/*
+ * memory_hotplug.memmap_on_memory parameter
+ */
+static bool memmap_on_memory __ro_after_init;
+#ifdef CONFIG_MHP_MEMMAP_ON_MEMORY
+module_param(memmap_on_memory, bool, 0444);
+MODULE_PARM_DESC(memmap_on_memory, "Enable memmap on memory for memory hotplug");
+#endif
+
/*
* online_page_callback contains pointer to current page onlining function.
* Initially it is generic_online_page(). If it is required it could be
* decide to not expose all pages to the buddy (e.g., expose them
* later). We account all pages as being online and belonging to this
* zone ("present").
+ * When using memmap_on_memory, the range might not be aligned to
+ * MAX_ORDER_NR_PAGES - 1, but pageblock aligned. __ffs() will detect
+ * this and the first chunk to online will be pageblock_nr_pages.
*/
- for (pfn = start_pfn; pfn < end_pfn; pfn += MAX_ORDER_NR_PAGES)
- (*online_page_callback)(pfn_to_page(pfn), MAX_ORDER - 1);
+ for (pfn = start_pfn; pfn < end_pfn;) {
+ int order = min(MAX_ORDER - 1UL, __ffs(pfn));
+
+ (*online_page_callback)(pfn_to_page(pfn), order);
+ pfn += (1UL << order);
+ }
/* mark all involved sections as online */
online_mem_sections(start_pfn, end_pfn);
return movable_node_enabled ? movable_zone : kernel_zone;
}
-struct zone * zone_for_pfn_range(int online_type, int nid, unsigned start_pfn,
+struct zone *zone_for_pfn_range(int online_type, int nid, unsigned start_pfn,
unsigned long nr_pages)
{
if (online_type == MMOP_ONLINE_KERNEL)
return default_zone_for_pfn(nid, start_pfn, nr_pages);
}
-int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
- int online_type, int nid)
+/*
+ * This function should only be called by memory_block_{online,offline},
+ * and {online,offline}_pages.
+ */
+void adjust_present_page_count(struct zone *zone, long nr_pages)
+{
+ unsigned long flags;
+
+ zone->present_pages += nr_pages;
+ pgdat_resize_lock(zone->zone_pgdat, &flags);
+ zone->zone_pgdat->node_present_pages += nr_pages;
+ pgdat_resize_unlock(zone->zone_pgdat, &flags);
+}
+
+int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
+ struct zone *zone)
+{
+ unsigned long end_pfn = pfn + nr_pages;
+ int ret;
+
+ ret = kasan_add_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages));
+ if (ret)
+ return ret;
+
+ move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_UNMOVABLE);
+
+ /*
+ * It might be that the vmemmap_pages fully span sections. If that is
+ * the case, mark those sections online here as otherwise they will be
+ * left offline.
+ */
+ if (nr_pages >= PAGES_PER_SECTION)
+ online_mem_sections(pfn, ALIGN_DOWN(end_pfn, PAGES_PER_SECTION));
+
+ return ret;
+}
+
+void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages)
+{
+ unsigned long end_pfn = pfn + nr_pages;
+
+ /*
+ * It might be that the vmemmap_pages fully span sections. If that is
+ * the case, mark those sections offline here as otherwise they will be
+ * left online.
+ */
+ if (nr_pages >= PAGES_PER_SECTION)
+ offline_mem_sections(pfn, ALIGN_DOWN(end_pfn, PAGES_PER_SECTION));
+
+ /*
+ * The pages associated with this vmemmap have been offlined, so
+ * we can reset its state here.
+ */
+ remove_pfn_range_from_zone(page_zone(pfn_to_page(pfn)), pfn, nr_pages);
+ kasan_remove_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages));
+}
+
+int __ref online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *zone)
{
unsigned long flags;
- struct zone *zone;
int need_zonelists_rebuild = 0;
+ const int nid = zone_to_nid(zone);
int ret;
struct memory_notify arg;
- /* We can only online full sections (e.g., SECTION_IS_ONLINE) */
+ /*
+ * {on,off}lining is constrained to full memory sections (or more
+ * precisly to memory blocks from the user space POV).
+ * memmap_on_memory is an exception because it reserves initial part
+ * of the physical memory space for vmemmaps. That space is pageblock
+ * aligned.
+ */
if (WARN_ON_ONCE(!nr_pages ||
- !IS_ALIGNED(pfn | nr_pages, PAGES_PER_SECTION)))
+ !IS_ALIGNED(pfn, pageblock_nr_pages) ||
+ !IS_ALIGNED(pfn + nr_pages, PAGES_PER_SECTION)))
return -EINVAL;
mem_hotplug_begin();
/* associate pfn range with the zone */
- zone = zone_for_pfn_range(online_type, nid, pfn, nr_pages);
move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_ISOLATE);
arg.start_pfn = pfn;
}
online_pages_range(pfn, nr_pages);
- zone->present_pages += nr_pages;
-
- pgdat_resize_lock(zone->zone_pgdat, &flags);
- zone->zone_pgdat->node_present_pages += nr_pages;
- pgdat_resize_unlock(zone->zone_pgdat, &flags);
+ adjust_present_page_count(zone, nr_pages);
node_states_set_node(nid, &arg);
if (need_zonelists_rebuild)
return device_online(&mem->dev);
}
+bool mhp_supports_memmap_on_memory(unsigned long size)
+{
+ unsigned long nr_vmemmap_pages = size / PAGE_SIZE;
+ unsigned long vmemmap_size = nr_vmemmap_pages * sizeof(struct page);
+ unsigned long remaining_size = size - vmemmap_size;
+
+ /*
+ * Besides having arch support and the feature enabled at runtime, we
+ * need a few more assumptions to hold true:
+ *
+ * a) We span a single memory block: memory onlining/offlinin;g happens
+ * in memory block granularity. We don't want the vmemmap of online
+ * memory blocks to reside on offline memory blocks. In the future,
+ * we might want to support variable-sized memory blocks to make the
+ * feature more versatile.
+ *
+ * b) The vmemmap pages span complete PMDs: We don't want vmemmap code
+ * to populate memory from the altmap for unrelated parts (i.e.,
+ * other memory blocks)
+ *
+ * c) The vmemmap pages (and thereby the pages that will be exposed to
+ * the buddy) have to cover full pageblocks: memory onlining/offlining
+ * code requires applicable ranges to be page-aligned, for example, to
+ * set the migratetypes properly.
+ *
+ * TODO: Although we have a check here to make sure that vmemmap pages
+ * fully populate a PMD, it is not the right place to check for
+ * this. A much better solution involves improving vmemmap code
+ * to fallback to base pages when trying to populate vmemmap using
+ * altmap as an alternative source of memory, and we do not exactly
+ * populate a single PMD.
+ */
+ return memmap_on_memory &&
+ IS_ENABLED(CONFIG_MHP_MEMMAP_ON_MEMORY) &&
+ size == memory_block_size_bytes() &&
+ IS_ALIGNED(vmemmap_size, PMD_SIZE) &&
+ IS_ALIGNED(remaining_size, (pageblock_nr_pages << PAGE_SHIFT));
+}
+
/*
* NOTE: The caller must call lock_device_hotplug() to serialize hotplug
* and online/offline operations (triggered e.g. by sysfs).
int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
{
struct mhp_params params = { .pgprot = pgprot_mhp(PAGE_KERNEL) };
+ struct vmem_altmap mhp_altmap = {};
u64 start, size;
bool new_node = false;
int ret;
goto error;
new_node = ret;
+ /*
+ * Self hosted memmap array
+ */
+ if (mhp_flags & MHP_MEMMAP_ON_MEMORY) {
+ if (!mhp_supports_memmap_on_memory(size)) {
+ ret = -EINVAL;
+ goto error;
+ }
+ mhp_altmap.free = PHYS_PFN(size);
+ mhp_altmap.base_pfn = PHYS_PFN(start);
+ params.altmap = &mhp_altmap;
+ }
+
/* call arch's memory hotadd */
ret = arch_add_memory(nid, start, size, ¶ms);
if (ret < 0)
goto error;
/* create memory block devices after memory was added */
- ret = create_memory_block_devices(start, size);
+ ret = create_memory_block_devices(start, size, mhp_altmap.alloc);
if (ret) {
arch_remove_memory(nid, start, size, NULL);
goto error;
int ret, node;
char *reason;
- /* We can only offline full sections (e.g., SECTION_IS_ONLINE) */
+ /*
+ * {on,off}lining is constrained to full memory sections (or more
+ * precisly to memory blocks from the user space POV).
+ * memmap_on_memory is an exception because it reserves initial part
+ * of the physical memory space for vmemmaps. That space is pageblock
+ * aligned.
+ */
if (WARN_ON_ONCE(!nr_pages ||
- !IS_ALIGNED(start_pfn | nr_pages, PAGES_PER_SECTION)))
+ !IS_ALIGNED(start_pfn, pageblock_nr_pages) ||
+ !IS_ALIGNED(start_pfn + nr_pages, PAGES_PER_SECTION)))
return -EINVAL;
mem_hotplug_begin();
* in a way that pages from isolated pageblock are left on pcplists.
*/
zone_pcp_disable(zone);
+ lru_cache_disable();
/* set above range as isolated */
ret = start_isolate_page_range(start_pfn, end_pfn,
}
cond_resched();
- lru_add_drain_all();
ret = scan_movable_pages(pfn, end_pfn, &pfn);
if (!ret) {
zone->nr_isolate_pageblock -= nr_pages / pageblock_nr_pages;
spin_unlock_irqrestore(&zone->lock, flags);
+ lru_cache_enable();
zone_pcp_enable(zone);
/* removal success */
adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages);
- zone->present_pages -= nr_pages;
-
- pgdat_resize_lock(zone->zone_pgdat, &flags);
- zone->zone_pgdat->node_present_pages -= nr_pages;
- pgdat_resize_unlock(zone->zone_pgdat, &flags);
+ adjust_present_page_count(zone, -nr_pages);
init_per_zone_wmark_min();
return 0;
}
+static int get_nr_vmemmap_pages_cb(struct memory_block *mem, void *arg)
+{
+ /*
+ * If not set, continue with the next block.
+ */
+ return mem->nr_vmemmap_pages;
+}
+
static int check_cpu_on_node(pg_data_t *pgdat)
{
int cpu;
static int __ref try_remove_memory(int nid, u64 start, u64 size)
{
int rc = 0;
+ struct vmem_altmap mhp_altmap = {};
+ struct vmem_altmap *altmap = NULL;
+ unsigned long nr_vmemmap_pages;
BUG_ON(check_hotplug_memory_range(start, size));
if (rc)
return rc;
+ /*
+ * We only support removing memory added with MHP_MEMMAP_ON_MEMORY in
+ * the same granularity it was added - a single memory block.
+ */
+ if (memmap_on_memory) {
+ nr_vmemmap_pages = walk_memory_blocks(start, size, NULL,
+ get_nr_vmemmap_pages_cb);
+ if (nr_vmemmap_pages) {
+ if (size != memory_block_size_bytes()) {
+ pr_warn("Refuse to remove %#llx - %#llx,"
+ "wrong granularity\n",
+ start, start + size);
+ return -EINVAL;
+ }
+
+ /*
+ * Let remove_pmd_table->free_hugepage_table do the
+ * right thing if we used vmem_altmap when hot-adding
+ * the range.
+ */
+ mhp_altmap.alloc = nr_vmemmap_pages;
+ altmap = &mhp_altmap;
+ }
+ }
+
/* remove memmap entry */
firmware_map_remove(start, start + size, "System RAM");
mem_hotplug_begin();
- arch_remove_memory(nid, start, size, NULL);
+ arch_remove_memory(nid, start, size, altmap);
if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) {
memblock_free(start, size);
else if (pol->flags & MPOL_F_RELATIVE_NODES)
mpol_relative_nodemask(&tmp, &pol->w.user_nodemask, nodes);
else {
- nodes_remap(tmp, pol->v.nodes,pol->w.cpuset_mems_allowed,
+ nodes_remap(tmp, pol->v.nodes, pol->w.cpuset_mems_allowed,
*nodes);
pol->w.cpuset_mems_allowed = *nodes;
}
if (flags & MPOL_F_ADDR) {
/*
* Take a refcount on the mpol, lookup_node()
- * wil drop the mmap_lock, so after calling
+ * will drop the mmap_lock, so after calling
* lookup_node() only "pol" remains valid, "vma"
* is stale.
*/
int err = 0;
nodemask_t tmp;
- migrate_prep();
+ lru_cache_disable();
mmap_read_lock(mm);
tmp = *from;
while (!nodes_empty(tmp)) {
- int s,d;
+ int s, d;
int source = NUMA_NO_NODE;
int dest = 0;
break;
}
mmap_read_unlock(mm);
+
+ lru_cache_enable();
if (err < 0)
return err;
return busy;
if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) {
- migrate_prep();
+ lru_cache_disable();
}
{
NODEMASK_SCRATCH(scratch);
mmap_write_unlock(mm);
mpol_out:
mpol_put(new);
+ if (flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL))
+ lru_cache_enable();
return err;
}
* we apply policy when gfp_zone(gfp) = ZONE_MOVABLE only.
*
* policy->v.nodes is intersect with node_states[N_MEMORY].
- * so if the following test faile, it implies
+ * so if the following test fails, it implies
* policy->v.nodes has movable memory only.
*/
if (!nodes_intersects(policy->v.nodes, node_states[N_HIGH_MEMORY]))
*
* If tsk's mempolicy is "default" [NULL], return 'true' to indicate default
* policy. Otherwise, check for intersection between mask and the policy
- * nodemask for 'bind' or 'interleave' policy. For 'perferred' or 'local'
+ * nodemask for 'bind' or 'interleave' policy. For 'preferred' or 'local'
* policy, always return true since it may allocate elsewhere on fallback.
*
* Takes task_lock(tsk) to prevent freeing of its mempolicy.
mempool_t *mempool_create(int min_nr, mempool_alloc_t *alloc_fn,
mempool_free_t *free_fn, void *pool_data)
{
- return mempool_create_node(min_nr,alloc_fn,free_fn, pool_data,
+ return mempool_create_node(min_nr, alloc_fn, free_fn, pool_data,
GFP_KERNEL, NUMA_NO_NODE);
}
EXPORT_SYMBOL(mempool_create);
#include "internal.h"
-/*
- * migrate_prep() needs to be called before we start compiling a list of pages
- * to be migrated using isolate_lru_page(). If scheduling work on other CPUs is
- * undesirable, use migrate_prep_local()
- */
-void migrate_prep(void)
-{
- /*
- * Clear the LRU lists so pages can be isolated.
- * Note that pages may be moved off the LRU after we have
- * drained them. Those pages will fail to migrate like other
- * pages that may be busy.
- */
- lru_add_drain_all();
-}
-
-/* Do the necessary work of migrate_prep but not if it involves other CPUs */
-void migrate_prep_local(void)
-{
- lru_add_drain();
-}
-
int isolate_movable_page(struct page *page, isolate_mode_t mode)
{
struct address_space *mapping;
return -EBUSY;
}
-/* It should be called on page which is PG_movable */
-void putback_movable_page(struct page *page)
+static void putback_movable_page(struct page *page)
{
struct address_space *mapping;
- VM_BUG_ON_PAGE(!PageLocked(page), page);
- VM_BUG_ON_PAGE(!PageMovable(page), page);
- VM_BUG_ON_PAGE(!PageIsolated(page), page);
-
mapping = page_mapping(page);
mapping->a_ops->putback_page(page);
__ClearPageIsolated(page);
out:
if (rc == MIGRATEPAGE_SUCCESS)
putback_active_hugepage(hpage);
- else if (rc != -EAGAIN && rc != MIGRATEPAGE_SUCCESS)
+ else if (rc != -EAGAIN)
list_move_tail(&hpage->lru, ret);
/*
int rc, nr_subpages;
LIST_HEAD(ret_pages);
+ trace_mm_migrate_pages_start(mode, reason);
+
if (!swapwrite)
current->flags |= PF_SWAPWRITE;
int start, i;
int err = 0, err1;
- migrate_prep();
+ lru_cache_disable();
for (i = start = 0; i < nr_pages; i++) {
const void __user *p;
if (err >= 0)
err = err1;
out:
+ lru_cache_enable();
return err;
}
return PageLocked(page);
}
-static inline bool is_shared_exec_page(struct vm_area_struct *vma,
- struct page *page)
-{
- if (page_mapcount(page) != 1 &&
- (page_is_file_lru(page) || vma_is_shmem(vma)) &&
- (vma->vm_flags & VM_EXEC))
- return true;
-
- return false;
-}
-
/*
* Attempt to migrate a misplaced page to the specified destination
* node. Caller is expected to have an elevated reference count on
* Don't migrate file pages that are mapped in multiple processes
* with execute permissions as they are probably shared libraries.
*/
- if (is_shared_exec_page(vma, page))
+ if (page_mapcount(page) != 1 && page_is_file_lru(page) &&
+ (vma->vm_flags & VM_EXEC))
goto out;
/*
int page_lru = page_is_file_lru(page);
unsigned long start = address & HPAGE_PMD_MASK;
- if (is_shared_exec_page(vma, page))
- goto out;
-
new_page = alloc_pages_node(node,
(GFP_TRANSHUGE_LIGHT | __GFP_THISNODE),
HPAGE_PMD_ORDER);
out_unlock:
unlock_page(page);
-out:
put_page(page);
return 0;
}
#endif /* CONFIG_NUMA */
#ifdef CONFIG_DEVICE_PRIVATE
-static int migrate_vma_collect_hole(unsigned long start,
+static int migrate_vma_collect_skip(unsigned long start,
unsigned long end,
- __always_unused int depth,
struct mm_walk *walk)
{
struct migrate_vma *migrate = walk->private;
unsigned long addr;
- /* Only allow populating anonymous memory. */
- if (!vma_is_anonymous(walk->vma)) {
- for (addr = start; addr < end; addr += PAGE_SIZE) {
- migrate->src[migrate->npages] = 0;
- migrate->dst[migrate->npages] = 0;
- migrate->npages++;
- }
- return 0;
- }
-
for (addr = start; addr < end; addr += PAGE_SIZE) {
- migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE;
migrate->dst[migrate->npages] = 0;
- migrate->npages++;
- migrate->cpages++;
+ migrate->src[migrate->npages++] = 0;
}
return 0;
}
-static int migrate_vma_collect_skip(unsigned long start,
+static int migrate_vma_collect_hole(unsigned long start,
unsigned long end,
+ __always_unused int depth,
struct mm_walk *walk)
{
struct migrate_vma *migrate = walk->private;
unsigned long addr;
+ /* Only allow populating anonymous memory. */
+ if (!vma_is_anonymous(walk->vma))
+ return migrate_vma_collect_skip(start, end, walk);
+
for (addr = start; addr < end; addr += PAGE_SIZE) {
+ migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE;
migrate->dst[migrate->npages] = 0;
- migrate->src[migrate->npages++] = 0;
+ migrate->npages++;
+ migrate->cpages++;
}
return 0;
*
* For empty entries inside CPU page table (pte_none() or pmd_none() is true) we
* do set MIGRATE_PFN_MIGRATE flag inside the corresponding source array thus
- * allowing the caller to allocate device memory for those unback virtual
- * address. For this the caller simply has to allocate device memory and
+ * allowing the caller to allocate device memory for those unbacked virtual
+ * addresses. For this the caller simply has to allocate device memory and
* properly set the destination entry like for regular migration. Note that
- * this can still fails and thus inside the device driver must check if the
- * migration was successful for those entries after calling migrate_vma_pages()
+ * this can still fail, and thus inside the device driver you must check if the
+ * migration was successful for those entries after calling migrate_vma_pages(),
* just like for regular migration.
*
* After that, the callers must call migrate_vma_pages() to go over each entry
swp_entry = make_device_private_entry(page, vma->vm_flags & VM_WRITE);
entry = swp_entry_to_pte(swp_entry);
+ } else {
+ /*
+ * For now we only support migrating to un-addressable
+ * device memory.
+ */
+ pr_warn_once("Unsupported ZONE_DEVICE page type.\n");
+ goto abort;
}
} else {
entry = mk_pte(page, vma->vm_page_prot);
vm_flags_t flags)
{
unsigned long nstart, end, tmp;
- struct vm_area_struct * vma, * prev;
+ struct vm_area_struct *vma, *prev;
int error;
VM_BUG_ON(offset_in_page(start));
*/
static int apply_mlockall_flags(int flags)
{
- struct vm_area_struct * vma, * prev = NULL;
+ struct vm_area_struct *vma, *prev = NULL;
vm_flags_t to_add = 0;
current->mm->def_flags &= VM_LOCKED_CLEAR_MASK;
unsigned long nr_pages = 0;
struct vm_area_struct *vma;
- /* Find first overlaping mapping */
+ /* Find first overlapping mapping */
vma = find_vma_intersection(mm, addr, end);
if (!vma)
return 0;
if (unlikely(uf)) {
/*
* If userfaultfd_unmap_prep returns an error the vmas
- * will remain splitted, but userland will get a
+ * will remain split, but userland will get a
* highly unexpected error anyway. This is no
* different than the case where the first of the two
* __split_vma fails, but we don't undo the first
flags &= MAP_NONBLOCK;
flags |= MAP_SHARED | MAP_FIXED | MAP_POPULATE;
- if (vma->vm_flags & VM_LOCKED) {
- struct vm_area_struct *tmp;
+ if (vma->vm_flags & VM_LOCKED)
flags |= MAP_LOCKED;
- /* drop PG_Mlocked flag for over-mapped range */
- for (tmp = vma; tmp->vm_start >= start + size;
- tmp = tmp->vm_next) {
- /*
- * Split pmd and munlock page on the border
- * of the range.
- */
- vma_adjust_trans_huge(tmp, start, start + size, 0);
-
- munlock_vma_pages_range(tmp,
- max(tmp->vm_start, start),
- min(tmp->vm_end, start + size));
- }
- }
-
file = get_file(vma->vm_file);
ret = do_mmap(vma->vm_file, start, size,
prot, flags, pgoff, &populate, NULL);
mmap_write_unlock(current->mm);
/*
- * We could provie warnings or errors if any VMA still
+ * We could provide warnings or errors if any VMA still
* has the pkey set here.
*/
return ret;
* So, to avoid such scenario we can pre-compute if the whole
* operation has high chances to success map-wise.
* Worst-scenario case is when both vma's (new_addr and old_addr) get
- * split in 3 before unmaping it.
+ * split in 3 before unmapping it.
* That means 2 more maps (1 for each) to the ones we already hold.
* Check whether current map count plus 2 still leads us to 4 maps below
* the threshold, otherwise return -ENOMEM here to be more safe.
return count;
}
-long vwrite(char *buf, char *addr, unsigned long count)
-{
- /* Don't allow overflow */
- if ((unsigned long) addr + count < count)
- count = -(unsigned long) addr;
-
- memcpy(addr, buf, count);
- return count;
-}
-
/*
* vmalloc - allocate virtually contiguous memory
*
#ifdef CONFIG_NUMA
/**
- * oom_cpuset_eligible() - check task eligiblity for kill
+ * oom_cpuset_eligible() - check task eligibility for kill
* @start: task struct of which task to consider
* @oc: pointer to struct oom_control
*
if (oom_group) {
mem_cgroup_print_oom_group(oom_group);
mem_cgroup_scan_tasks(oom_group, oom_kill_memcg_member,
- (void*)message);
+ (void *)message);
mem_cgroup_put(oom_group);
}
}
break;
/*
- * In the case of an unresponding NFS server and the NFS dirty
+ * In the case of an unresponsive NFS server and the NFS dirty
* pages exceeds dirty_thresh, give the other good wb's a pipe
* to go through, so that tasks on them still remain responsive.
*
* Page truncated or invalidated. We can freely skip it
* then, even for data integrity operations: the page
* has disappeared concurrently, so there could be no
- * real expectation of this data interity operation
+ * real expectation of this data integrity operation
* even if there is now a new, dirty page at the same
* pagecache address.
*/
return false;
/*
- * Do not let lower order allocations polluate a movable pageblock.
+ * Do not let lower order allocations pollute a movable pageblock.
* This might let an unmovable request use a reclaimable pageblock
* and vice-versa but no more than normal fallback logic which can
* have trouble finding a high-order free page.
/*
* In page freeing path, migratetype change is racy so
* we can counter several free pages in a pageblock
- * in this loop althoug we changed the pageblock type
+ * in this loop although we changed the pageblock type
* from highatomic to ac->migratetype. So we should
* adjust the count once.
*/
* drain_all_pages doesn't use proper cpu hotplug protection so
* we can race with cpu offline when the WQ can move this from
* a cpu pinned worker to an unbound one. We can operate on a different
- * cpu which is allright but we also have to make sure to not move to
+ * cpu which is alright but we also have to make sure to not move to
* a different one.
*/
preempt_disable();
return alloc_flags;
}
-static inline unsigned int current_alloc_flags(gfp_t gfp_mask,
- unsigned int alloc_flags)
+/* Must be called after current_gfp_context() which can change gfp_mask */
+static inline unsigned int gfp_to_alloc_flags_cma(gfp_t gfp_mask,
+ unsigned int alloc_flags)
{
#ifdef CONFIG_CMA
- unsigned int pflags = current->flags;
-
- if (!(pflags & PF_MEMALLOC_NOCMA) &&
- gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
+ if (gfp_migratetype(gfp_mask) == MIGRATE_MOVABLE)
alloc_flags |= ALLOC_CMA;
-
#endif
return alloc_flags;
}
if (alloc_flags & ALLOC_NO_WATERMARKS)
goto try_this_zone;
- if (node_reclaim_mode == 0 ||
+ if (!node_reclaim_enabled() ||
!zone_allows_reclaim(ac->preferred_zoneref->zone, zone))
continue;
}
/*
- * Maximum number of compaction retries wit a progress before OOM
+ * Maximum number of compaction retries with a progress before OOM
* killer is consider as the only way to move forward.
*/
#define MAX_COMPACT_RETRIES 16
memalloc_noreclaim_restore(noreclaim_flag);
psi_memstall_leave(&pflags);
+ if (*compact_result == COMPACT_SKIPPED)
+ return NULL;
/*
* At least in one zone compaction wasn't deferred or skipped, so let's
* count a compaction stall
} else if (unlikely(rt_task(current)) && !in_interrupt())
alloc_flags |= ALLOC_HARDER;
- alloc_flags = current_alloc_flags(gfp_mask, alloc_flags);
+ alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, alloc_flags);
return alloc_flags;
}
reserve_flags = __gfp_pfmemalloc_flags(gfp_mask);
if (reserve_flags)
- alloc_flags = current_alloc_flags(gfp_mask, reserve_flags);
+ alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, reserve_flags);
/*
* Reset the nodemask and zonelist iterators if memory policies can be
if (should_fail_alloc_page(gfp_mask, order))
return false;
- *alloc_flags = current_alloc_flags(gfp_mask, *alloc_flags);
+ *alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, *alloc_flags);
/* Dirty zone balancing only done in the fast path */
ac->spread_dirty_pages = (gfp_mask & __GFP_WRITE);
}
gfp &= gfp_allowed_mask;
+ /*
+ * Apply scoped allocation constraints. This is mainly about GFP_NOFS
+ * resp. GFP_NOIO which has to be inherited for all allocation requests
+ * from a particular context which has been marked by
+ * memalloc_no{fs,io}_{save,restore}. And PF_MEMALLOC_PIN which ensures
+ * movable zones are not used during allocation.
+ */
+ gfp = current_gfp_context(gfp);
alloc_gfp = gfp;
if (!prepare_alloc_pages(gfp, order, preferred_nid, nodemask, &ac,
&alloc_gfp, &alloc_flags))
if (likely(page))
goto out;
- /*
- * Apply scoped allocation constraints. This is mainly about GFP_NOFS
- * resp. GFP_NOIO which has to be inherited for all allocation requests
- * from a particular context which has been marked by
- * memalloc_no{fs,io}_{save,restore}.
- */
- alloc_gfp = current_gfp_context(gfp);
+ alloc_gfp = gfp;
ac.spread_dirty_pages = false;
/*
static int __parse_numa_zonelist_order(char *s)
{
/*
- * We used to support different zonlists modes but they turned
+ * We used to support different zonelists modes but they turned
* out to be just not useful. Let's keep the warning in place
* if somebody still use the cmd line parameter so that we do
* not fail it silently
}
/*
- * Some architecturs, e.g. ARC may have ZONE_HIGHMEM below ZONE_NORMAL. For
+ * Some architectures, e.g. ARC may have ZONE_HIGHMEM below ZONE_NORMAL. For
* such cases we allow max_zone_pfn sorted in the descending order
*/
bool __weak arch_has_descending_max_zone_pfns(void)
.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
};
- migrate_prep();
+ lru_cache_disable();
while (pfn < end || !list_empty(&cc->migratepages)) {
if (fatal_signal_pending(current)) {
if (list_empty(&cc->migratepages)) {
cc->nr_migratepages = 0;
- pfn = isolate_migratepages_range(cc, pfn, end);
- if (!pfn) {
- ret = -EINTR;
+ ret = isolate_migratepages_range(cc, pfn, end);
+ if (ret && ret != -EAGAIN)
break;
- }
+ pfn = cc->migrate_pfn;
tries = 0;
} else if (++tries == 5) {
- ret = ret < 0 ? ret : -EBUSY;
+ ret = -EBUSY;
break;
}
ret = migrate_pages(&cc->migratepages, alloc_migration_target,
NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE);
+
+ /*
+ * On -ENOMEM, migrate_pages() bails out right away. It is pointless
+ * to retry again over this error, so do the same here.
+ */
+ if (ret == -ENOMEM)
+ break;
}
+
+ lru_cache_enable();
if (ret < 0) {
alloc_contig_dump_pages(&cc->migratepages);
putback_movable_pages(&cc->migratepages);
* alloc_contig_range() -- tries to allocate given range of pages
* @start: start PFN to allocate
* @end: one-past-the-last PFN to allocate
- * @migratetype: migratetype of the underlaying pageblocks (either
+ * @migratetype: migratetype of the underlying pageblocks (either
* #MIGRATE_MOVABLE or #MIGRATE_CMA). All pageblocks
* in range must have the same migratetype and it must
* be either of the two.
ret = __alloc_contig_migrate_range(&cc, start, end);
if (ret && ret != -EBUSY)
goto done;
- ret =0;
+ ret = 0;
/*
* Pages from [start, end) are within a MAX_ORDER_NR_PAGES
if (PageReserved(page))
return false;
-
- if (page_count(page) > 0)
- return false;
-
- if (PageHuge(page))
- return false;
}
return true;
}
}
#endif /* CONFIG_CONTIG_ALLOC */
-void free_contig_range(unsigned long pfn, unsigned int nr_pages)
+void free_contig_range(unsigned long pfn, unsigned long nr_pages)
{
- unsigned int count = 0;
+ unsigned long count = 0;
for (; nr_pages--; pfn++) {
struct page *page = pfn_to_page(pfn);
count += page_count(page) != 1;
__free_page(page);
}
- WARN(count != 0, "%d pages are still in use!\n", count);
+ WARN(count != 0, "%lu pages are still in use!\n", count);
}
EXPORT_SYMBOL(free_contig_range);
/*
* The zone indicated has a new number of managed_pages; batch sizes and percpu
- * page high values need to be recalulated.
+ * page high values need to be recalculated.
*/
void __meminit zone_pcp_update(struct zone *zone)
{
void zone_pcp_reset(struct zone *zone)
{
- unsigned long flags;
int cpu;
struct per_cpu_pageset *pset;
- /* avoid races with drain_pages() */
- local_irq_save(flags);
if (zone->pageset != &boot_pageset) {
for_each_online_cpu(cpu) {
pset = per_cpu_ptr(zone->pageset, cpu);
free_percpu(zone->pageset);
zone->pageset = &boot_pageset;
}
- local_irq_restore(flags);
}
#ifdef CONFIG_MEMORY_HOTREMOVE
/*
* We don't clear the bit on the oldpage as it's going to be freed
* after migration. Until then, the info can be useful in case of
- * a bug, and the overal stats will be off a bit only temporarily.
+ * a bug, and the overall stats will be off a bit only temporarily.
* Also, migrate_misplaced_transhuge_page() can still fail the
* migration and then we want the oldpage to retain the info. But
* in that case we also don't need to explicitly clear the info from
* regardless of which page table level the page is mapped at. @pvmw->pmd is
* NULL.
*
- * Retruns false if there are no more page table entries for the page in
+ * Returns false if there are no more page table entries for the page in
* the vma. @pvmw->ptl is unlocked and @pvmw->pte is unmapped.
*
* If you need to stop the walk before page_vma_mapped_walk() returned false,
u64 nr_max_alloc; /* max # of live allocations */
u32 nr_chunks; /* current # of live chunks */
u32 nr_max_chunks; /* max # of live chunks */
- size_t min_alloc_size; /* min allocaiton size */
+ size_t min_alloc_size; /* min allocation size */
size_t max_alloc_size; /* max allocation size */
};
pr_info("limit reached, disable warning\n");
}
if (is_atomic) {
- /* see the flag handling in pcpu_blance_workfn() */
+ /* see the flag handling in pcpu_balance_workfn() */
pcpu_atomic_alloc_failed = true;
pcpu_schedule_balance_work();
} else {
/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _LINUX_PGALLLC_TRACK_H
-#define _LINUX_PGALLLC_TRACK_H
+#ifndef _LINUX_PGALLOC_TRACK_H
+#define _LINUX_PGALLOC_TRACK_H
#if defined(CONFIG_MMU)
static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
(__pte_alloc_kernel(pmd) || ({*(mask)|=PGTBL_PMD_MODIFIED;0;})))?\
NULL: pte_offset_kernel(pmd, address))
-#endif /* _LINUX_PGALLLC_TRACK_H */
+#endif /* _LINUX_PGALLOC_TRACK_H */
#include <linux/mm.h>
#include <linux/uio.h>
#include <linux/sched.h>
-#include <linux/compat.h>
#include <linux/sched/mm.h>
#include <linux/highmem.h>
#include <linux/ptrace.h>
* Attach the anon_vmas from src to dst.
* Returns 0 on success, -ENOMEM on failure.
*
- * anon_vma_clone() is called by __vma_split(), __split_vma(), copy_vma() and
+ * anon_vma_clone() is called by __vma_adjust(), __split_vma(), copy_vma() and
* anon_vma_fork(). The first three want an exact copy of src, while the last
* one, anon_vma_fork(), may try to reuse an existing anon_vma to prevent
* endless growth of anon_vma. Since dst->anon_vma is set to NULL before call,
}
}
if (*this_char) {
- char *value = strchr(this_char,'=');
+ char *value = strchr(this_char, '=');
size_t len = 0;
int err;
#define BATCHREFILL_LIMIT 16
/*
- * Optimization question: fewer reaps means less probability for unnessary
+ * Optimization question: fewer reaps means less probability for unnecessary
* cpucache drain/refill cycles.
*
* OTOH the cpuarrays can contain lots of objects,
* Because if it is the case, that means we defer the creation of
* the kmalloc_{dma,}_cache of size sizeof(slab descriptor) to this point.
* And we eventually call down to __kmem_cache_create(), which
- * in turn looks up in the kmalloc_{dma,}_caches for the disired-size one.
+ * in turn looks up in the kmalloc_{dma,}_caches for the desired-size one.
* This is a "chicken-and-egg" problem.
*
* So the off-slab slab descriptor shall come from the kmalloc_{dma,}_caches,
};
/*
- * Initialize the state based on the randomization methode available.
- * return true if the pre-computed list is available, false otherwize.
+ * Initialize the state based on the randomization method available.
+ * return true if the pre-computed list is available, false otherwise.
*/
static bool freelist_state_initialize(union freelist_init_state *state,
struct kmem_cache *cachep,
*/
/*
- * Mininum / Maximum order of slab pages. This influences locking overhead
+ * Minimum / Maximum order of slab pages. This influences locking overhead
* and slab fragmentation. A higher order reduces the number of partial slabs
* and increases the number of allocations possible without having to
* take the list_lock.
if (unlikely(!mem_section)) {
unsigned long size, align;
- size = sizeof(struct mem_section*) * NR_SECTION_ROOTS;
+ size = sizeof(struct mem_section *) * NR_SECTION_ROOTS;
align = 1 << (INTERNODE_CACHE_SHIFT);
mem_section = memblock_alloc(size, align);
if (!mem_section)
}
}
-#ifdef CONFIG_MEMORY_HOTREMOVE
/* Mark all memory sections within the pfn range as offline */
void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
{
ms->section_mem_map &= ~SECTION_IS_ONLINE;
}
}
-#endif
#ifdef CONFIG_SPARSEMEM_VMEMMAP
static struct page * __meminit populate_section_memmap(unsigned long pfn,
#include <linux/hugetlb.h>
#include <linux/page_idle.h>
#include <linux/local_lock.h>
+#include <linux/buffer_head.h>
#include "internal.h"
}
}
+/* return true if pagevec needs to drain */
+static bool pagevec_add_and_need_flush(struct pagevec *pvec, struct page *page)
+{
+ bool ret = false;
+
+ if (!pagevec_add(pvec, page) || PageCompound(page) ||
+ lru_cache_disabled())
+ ret = true;
+
+ return ret;
+}
+
/*
* Writeback is about to end against a page which has been marked for immediate
* reclaim. If it still appears to be reclaimable, move it to the tail of the
get_page(page);
local_lock_irqsave(&lru_rotate.lock, flags);
pvec = this_cpu_ptr(&lru_rotate.pvec);
- if (!pagevec_add(pvec, page) || PageCompound(page))
+ if (pagevec_add_and_need_flush(pvec, page))
pagevec_lru_move_fn(pvec, pagevec_move_tail_fn);
local_unlock_irqrestore(&lru_rotate.lock, flags);
}
local_lock(&lru_pvecs.lock);
pvec = this_cpu_ptr(&lru_pvecs.activate_page);
get_page(page);
- if (!pagevec_add(pvec, page) || PageCompound(page))
+ if (pagevec_add_and_need_flush(pvec, page))
pagevec_lru_move_fn(pvec, __activate_page);
local_unlock(&lru_pvecs.lock);
}
get_page(page);
local_lock(&lru_pvecs.lock);
pvec = this_cpu_ptr(&lru_pvecs.lru_add);
- if (!pagevec_add(pvec, page) || PageCompound(page))
+ if (pagevec_add_and_need_flush(pvec, page))
__pagevec_lru_add(pvec);
local_unlock(&lru_pvecs.lock);
}
if (unlikely(unevictable) && !TestSetPageMlocked(page)) {
int nr_pages = thp_nr_pages(page);
/*
- * We use the irq-unsafe __mod_zone_page_stat because this
+ * We use the irq-unsafe __mod_zone_page_state because this
* counter is not modified from interrupt context, and the pte
* lock is held(spinlock), which implies preemption disabled.
*/
pagevec_lru_move_fn(pvec, lru_lazyfree_fn);
activate_page_drain(cpu);
+ invalidate_bh_lrus_cpu(cpu);
}
/**
local_lock(&lru_pvecs.lock);
pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate_file);
- if (!pagevec_add(pvec, page) || PageCompound(page))
+ if (pagevec_add_and_need_flush(pvec, page))
pagevec_lru_move_fn(pvec, lru_deactivate_file_fn);
local_unlock(&lru_pvecs.lock);
}
local_lock(&lru_pvecs.lock);
pvec = this_cpu_ptr(&lru_pvecs.lru_deactivate);
get_page(page);
- if (!pagevec_add(pvec, page) || PageCompound(page))
+ if (pagevec_add_and_need_flush(pvec, page))
pagevec_lru_move_fn(pvec, lru_deactivate_fn);
local_unlock(&lru_pvecs.lock);
}
local_lock(&lru_pvecs.lock);
pvec = this_cpu_ptr(&lru_pvecs.lru_lazyfree);
get_page(page);
- if (!pagevec_add(pvec, page) || PageCompound(page))
+ if (pagevec_add_and_need_flush(pvec, page))
pagevec_lru_move_fn(pvec, lru_lazyfree_fn);
local_unlock(&lru_pvecs.lock);
}
* Calling this function with cpu hotplug locks held can actually lead
* to obscure indirect dependencies via WQ context.
*/
-void lru_add_drain_all(void)
+inline void __lru_add_drain_all(bool force_all_cpus)
{
/*
* lru_drain_gen - Global pages generation number
* (C) Exit the draining operation if a newer generation, from another
* lru_add_drain_all(), was already scheduled for draining. Check (A).
*/
- if (unlikely(this_gen != lru_drain_gen))
+ if (unlikely(this_gen != lru_drain_gen && !force_all_cpus))
goto done;
/*
* below which drains the page vectors.
*
* Let x, y, and z represent some system CPU numbers, where x < y < z.
- * Assume CPU #z is is in the middle of the for_each_online_cpu loop
+ * Assume CPU #z is in the middle of the for_each_online_cpu loop
* below and has already reached CPU #y's per-cpu data. CPU #x comes
* along, adds some pages to its per-cpu vectors, then calls
* lru_add_drain_all().
for_each_online_cpu(cpu) {
struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
- if (pagevec_count(&per_cpu(lru_pvecs.lru_add, cpu)) ||
+ if (force_all_cpus ||
+ pagevec_count(&per_cpu(lru_pvecs.lru_add, cpu)) ||
data_race(pagevec_count(&per_cpu(lru_rotate.pvec, cpu))) ||
pagevec_count(&per_cpu(lru_pvecs.lru_deactivate_file, cpu)) ||
pagevec_count(&per_cpu(lru_pvecs.lru_deactivate, cpu)) ||
pagevec_count(&per_cpu(lru_pvecs.lru_lazyfree, cpu)) ||
- need_activate_page_drain(cpu)) {
+ need_activate_page_drain(cpu) ||
+ has_bh_in_lru(cpu, NULL)) {
INIT_WORK(work, lru_add_drain_per_cpu);
queue_work_on(cpu, mm_percpu_wq, work);
__cpumask_set_cpu(cpu, &has_work);
done:
mutex_unlock(&lock);
}
+
+void lru_add_drain_all(void)
+{
+ __lru_add_drain_all(false);
+}
#else
void lru_add_drain_all(void)
{
}
#endif /* CONFIG_SMP */
+atomic_t lru_disable_count = ATOMIC_INIT(0);
+
+/*
+ * lru_cache_disable() needs to be called before we start compiling
+ * a list of pages to be migrated using isolate_lru_page().
+ * It drains pages on LRU cache and then disable on all cpus until
+ * lru_cache_enable is called.
+ *
+ * Must be paired with a call to lru_cache_enable().
+ */
+void lru_cache_disable(void)
+{
+ atomic_inc(&lru_disable_count);
+#ifdef CONFIG_SMP
+ /*
+ * lru_add_drain_all in the force mode will schedule draining on
+ * all online CPUs so any calls of lru_cache_disabled wrapped by
+ * local_lock or preemption disabled would be ordered by that.
+ * The atomic operation doesn't need to have stronger ordering
+ * requirements because that is enforeced by the scheduling
+ * guarantees.
+ */
+ __lru_add_drain_all(true);
+#else
+ lru_add_drain();
+#endif
+}
+
/**
* release_pages - batched put_page()
* @pages: array of pages to release
* to local caches without needing to acquire swap_info
* lock. We do not reuse the returned slots directly but
* move them back to the global pool in a batch. This
- * allows the slots to coaellesce and reduce fragmentation.
+ * allows the slots to coalesce and reduce fragmentation.
*
* The swap entry allocated is marked with SWAP_HAS_CACHE
* flag in map_count that prevents it from being allocated
xas_store(&xas, page);
xas_next(&xas);
}
- address_space->nrexceptional -= nr_shadows;
address_space->nrpages += nr;
__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
__mod_lruvec_page_state(page, NR_SWAPCACHE, nr);
xas_next(&xas);
}
ClearPageSwapCache(page);
- if (shadow)
- address_space->nrexceptional += nr;
address_space->nrpages -= nr;
__mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr);
__mod_lruvec_page_state(page, NR_SWAPCACHE, -nr);
xas_store(&xas, NULL);
nr_shadows++;
}
- address_space->nrexceptional -= nr_shadows;
xa_unlock_irq(&address_space->i_pages);
/* search the next swapcache until we meet end */
*
* Returns the struct page for entry and addr, after queueing swapin.
*
- * Primitive swap readahead code. We simply read in a few pages whoes
+ * Primitive swap readahead code. We simply read in a few pages whose
* virtual addresses are around the fault address in the same vma.
*
* Caller must hold read mmap_lock if vmf->vma is not NULL.
unsigned int bytes, inuse;
if (si == SEQ_START_TOKEN) {
- seq_puts(swap,"Filename\t\t\t\tType\t\tSize\t\tUsed\t\tPriority\n");
+ seq_puts(swap, "Filename\t\t\t\tType\t\tSize\t\tUsed\t\tPriority\n");
return 0;
}
sizeof(long),
GFP_KERNEL);
- if (p->bdev &&(swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) {
+ if (p->bdev && (swap_flags & SWAP_FLAG_DISCARD) && swap_discardable(p)) {
/*
* When discard is enabled for swap with no particular
* policy flagged, we set all swap discard flags here in
if (xas_load(&xas) != entry)
return;
xas_store(&xas, NULL);
- mapping->nrexceptional--;
}
static void clear_shadow_entry(struct address_space *mapping, pgoff_t index,
pgoff_t index;
int i;
- if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
+ if (mapping_empty(mapping))
goto out;
/* Offsets within partial pages */
*/
void truncate_inode_pages_final(struct address_space *mapping)
{
- unsigned long nrexceptional;
- unsigned long nrpages;
-
/*
* Page reclaim can not participate in regular inode lifetime
* management (can't call iput()) and thus can race with the
*/
mapping_set_exiting(mapping);
- /*
- * When reclaim installs eviction entries, it increases
- * nrexceptional first, then decreases nrpages. Make sure we see
- * this in the right order or we might miss an entry.
- */
- nrpages = mapping->nrpages;
- smp_rmb();
- nrexceptional = mapping->nrexceptional;
-
- if (nrpages || nrexceptional) {
+ if (!mapping_empty(mapping)) {
/*
* As truncation uses a lockless tree lookup, cycle
* the tree lock to make sure any ongoing tree
int ret2 = 0;
int did_range_unmap = 0;
- if (mapping->nrpages == 0 && mapping->nrexceptional == 0)
+ if (mapping_empty(mapping))
goto out;
pagevec_init(&pvec);
unsigned long dst_start,
unsigned long src_start,
unsigned long len,
- bool zeropage)
+ enum mcopy_atomic_mode mode)
{
int vm_alloc_shared = dst_vma->vm_flags & VM_SHARED;
int vm_shared = dst_vma->vm_flags & VM_SHARED;
* by THP. Since we can not reliably insert a zero page, this
* feature is not supported.
*/
- if (zeropage) {
+ if (mode == MCOPY_ATOMIC_ZEROPAGE) {
mmap_read_unlock(dst_mm);
return -EINVAL;
}
}
while (src_addr < src_start + len) {
- pte_t dst_pteval;
-
BUG_ON(dst_addr >= dst_start + len);
/*
mutex_lock(&hugetlb_fault_mutex_table[hash]);
err = -ENOMEM;
- dst_pte = huge_pte_alloc(dst_mm, dst_addr, vma_hpagesize);
+ dst_pte = huge_pte_alloc(dst_mm, dst_vma, dst_addr, vma_hpagesize);
if (!dst_pte) {
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
i_mmap_unlock_read(mapping);
goto out_unlock;
}
- err = -EEXIST;
- dst_pteval = huge_ptep_get(dst_pte);
- if (!huge_pte_none(dst_pteval)) {
+ if (mode != MCOPY_ATOMIC_CONTINUE &&
+ !huge_pte_none(huge_ptep_get(dst_pte))) {
+ err = -EEXIST;
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
i_mmap_unlock_read(mapping);
goto out_unlock;
}
err = hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma,
- dst_addr, src_addr, &page);
+ dst_addr, src_addr, mode, &page);
mutex_unlock(&hugetlb_fault_mutex_table[hash]);
i_mmap_unlock_read(mapping);
unsigned long dst_start,
unsigned long src_start,
unsigned long len,
- bool zeropage);
+ enum mcopy_atomic_mode mode);
#endif /* CONFIG_HUGETLB_PAGE */
static __always_inline ssize_t mfill_atomic_pte(struct mm_struct *dst_mm,
unsigned long dst_start,
unsigned long src_start,
unsigned long len,
- bool zeropage,
+ enum mcopy_atomic_mode mcopy_mode,
bool *mmap_changing,
__u64 mode)
{
long copied;
struct page *page;
bool wp_copy;
+ bool zeropage = (mcopy_mode == MCOPY_ATOMIC_ZEROPAGE);
/*
* Sanitize the command parameters:
*/
if (is_vm_hugetlb_page(dst_vma))
return __mcopy_atomic_hugetlb(dst_mm, dst_vma, dst_start,
- src_start, len, zeropage);
+ src_start, len, mcopy_mode);
if (!vma_is_anonymous(dst_vma) && !vma_is_shmem(dst_vma))
goto out_unlock;
+ if (mcopy_mode == MCOPY_ATOMIC_CONTINUE)
+ goto out_unlock;
/*
* Ensure the dst_vma has a anon_vma or this page
unsigned long src_start, unsigned long len,
bool *mmap_changing, __u64 mode)
{
- return __mcopy_atomic(dst_mm, dst_start, src_start, len, false,
- mmap_changing, mode);
+ return __mcopy_atomic(dst_mm, dst_start, src_start, len,
+ MCOPY_ATOMIC_NORMAL, mmap_changing, mode);
}
ssize_t mfill_zeropage(struct mm_struct *dst_mm, unsigned long start,
unsigned long len, bool *mmap_changing)
{
- return __mcopy_atomic(dst_mm, start, 0, len, true, mmap_changing, 0);
+ return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_ZEROPAGE,
+ mmap_changing, 0);
+}
+
+ssize_t mcopy_continue(struct mm_struct *dst_mm, unsigned long start,
+ unsigned long len, bool *mmap_changing)
+{
+ return __mcopy_atomic(dst_mm, start, 0, len, MCOPY_ATOMIC_CONTINUE,
+ mmap_changing, 0);
}
int mwriteprotect_range(struct mm_struct *dst_mm, unsigned long start,
* The deviation of sync_overcommit_as could be big with loose policy
* like OVERCOMMIT_ALWAYS/OVERCOMMIT_GUESS. When changing policy to
* strict OVERCOMMIT_NEVER, we need to reduce the deviation to comply
- * with the strict "NEVER", and to avoid possible race condtion (even
+ * with the strict "NEVER", and to avoid possible race condition (even
* though user usually won't too frequently do the switching to policy
* OVERCOMMIT_NEVER), the switch is done in the following order:
* 1. changing the batch
*/
void mem_dump_obj(void *object)
{
+ const char *type;
+
if (kmem_valid_obj(object)) {
kmem_dump_obj(object);
return;
}
+
if (vmalloc_dump_obj(object))
return;
- if (!virt_addr_valid(object)) {
- if (object == NULL)
- pr_cont(" NULL pointer.\n");
- else if (object == ZERO_SIZE_PTR)
- pr_cont(" zero-size pointer.\n");
- else
- pr_cont(" non-paged memory.\n");
- return;
- }
- pr_cont(" non-slab/vmalloc memory.\n");
+
+ if (virt_addr_valid(object))
+ type = "non-slab/vmalloc memory";
+ else if (object == NULL)
+ type = "NULL pointer";
+ else if (object == ZERO_SIZE_PTR)
+ type = "zero-size pointer";
+ else
+ type = "non-paged memory";
+
+ pr_cont(" %s\n", type);
}
EXPORT_SYMBOL_GPL(mem_dump_obj);
#endif
static atomic_long_t vmap_lazy_nr = ATOMIC_LONG_INIT(0);
/*
- * Serialize vmap purging. There is no actual criticial section protected
+ * Serialize vmap purging. There is no actual critical section protected
* by this look, but we want to avoid concurrent calls for performance
* reasons and to make the pcpu_get_vm_areas more deterministic.
*/
* May sleep if called *not* from interrupt context.
* Must not be called in NMI context (strictly speaking, it could be
* if we have CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG, but making the calling
- * conventions for vfree() arch-depenedent would be a really bad idea).
+ * conventions for vfree() arch-dependent would be a really bad idea).
*/
void vfree(const void *addr)
{
* 64b systems should always have either DMA or DMA32 zones. For others
* GFP_DMA32 should do the right thing and use the normal zone.
*/
-#define GFP_VMALLOC32 GFP_DMA32 | GFP_KERNEL
+#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
#endif
/**
/*
* To do safe access to this _mapped_ area, we need
* lock. But adding lock here means that we need to add
- * overhead of vmalloc()/vfree() calles for this _debug_
+ * overhead of vmalloc()/vfree() calls for this _debug_
* interface, rarely used. Instead of that, we'll use
* kmap() and get small overhead in this access function.
*/
if (p) {
- /*
- * we can expect USER0 is not used (see vread/vwrite's
- * function description)
- */
+ /* We can expect USER0 is not used -- see vread() */
void *map = kmap_atomic(p);
memcpy(buf, map + offset, length);
kunmap_atomic(map);
return copied;
}
-static int aligned_vwrite(char *buf, char *addr, unsigned long count)
-{
- struct page *p;
- int copied = 0;
-
- while (count) {
- unsigned long offset, length;
-
- offset = offset_in_page(addr);
- length = PAGE_SIZE - offset;
- if (length > count)
- length = count;
- p = vmalloc_to_page(addr);
- /*
- * To do safe access to this _mapped_ area, we need
- * lock. But adding lock here means that we need to add
- * overhead of vmalloc()/vfree() calles for this _debug_
- * interface, rarely used. Instead of that, we'll use
- * kmap() and get small overhead in this access function.
- */
- if (p) {
- /*
- * we can expect USER0 is not used (see vread/vwrite's
- * function description)
- */
- void *map = kmap_atomic(p);
- memcpy(map + offset, buf, length);
- kunmap_atomic(map);
- }
- addr += length;
- buf += length;
- copied += length;
- count -= length;
- }
- return copied;
-}
-
/**
* vread() - read vmalloc area in a safe way.
* @buf: buffer for reading data
* Note: In usual ops, vread() is never necessary because the caller
* should know vmalloc() area is valid and can use memcpy().
* This is for routines which have to access vmalloc area without
- * any information, as /dev/kmem.
+ * any information, as /proc/kcore.
*
* Return: number of bytes for which addr and buf should be increased
* (same number as @count) or %0 if [addr...addr+count) doesn't
return buflen;
}
-/**
- * vwrite() - write vmalloc area in a safe way.
- * @buf: buffer for source data
- * @addr: vm address.
- * @count: number of bytes to be read.
- *
- * This function checks that addr is a valid vmalloc'ed area, and
- * copy data from a buffer to the given addr. If specified range of
- * [addr...addr+count) includes some valid address, data is copied from
- * proper area of @buf. If there are memory holes, no copy to hole.
- * IOREMAP area is treated as memory hole and no copy is done.
- *
- * If [addr...addr+count) doesn't includes any intersects with alive
- * vm_struct area, returns 0. @buf should be kernel's buffer.
- *
- * Note: In usual ops, vwrite() is never necessary because the caller
- * should know vmalloc() area is valid and can use memcpy().
- * This is for routines which have to access vmalloc area without
- * any information, as /dev/kmem.
- *
- * Return: number of bytes for which addr and buf should be
- * increased (same number as @count) or %0 if [addr...addr+count)
- * doesn't include any intersection with valid vmalloc area
- */
-long vwrite(char *buf, char *addr, unsigned long count)
-{
- struct vmap_area *va;
- struct vm_struct *vm;
- char *vaddr;
- unsigned long n, buflen;
- int copied = 0;
-
- /* Don't allow overflow */
- if ((unsigned long) addr + count < count)
- count = -(unsigned long) addr;
- buflen = count;
-
- spin_lock(&vmap_area_lock);
- list_for_each_entry(va, &vmap_area_list, list) {
- if (!count)
- break;
-
- if (!va->vm)
- continue;
-
- vm = va->vm;
- vaddr = (char *) vm->addr;
- if (addr >= vaddr + get_vm_area_size(vm))
- continue;
- while (addr < vaddr) {
- if (count == 0)
- goto finished;
- buf++;
- addr++;
- count--;
- }
- n = vaddr + get_vm_area_size(vm) - addr;
- if (n > count)
- n = count;
- if (!(vm->flags & VM_IOREMAP)) {
- aligned_vwrite(buf, addr, n);
- copied++;
- }
- buf += n;
- addr += n;
- count -= n;
- }
-finished:
- spin_unlock(&vmap_area_lock);
- if (!copied)
- return 0;
- return buflen;
-}
-
/**
* remap_vmalloc_range_partial - map vmalloc pages to userspace
* @vma: vma to cover
static DECLARE_RWSEM(shrinker_rwsem);
#ifdef CONFIG_MEMCG
-/*
- * We allow subsystems to populate their shrinker-related
- * LRU lists before register_shrinker_prepared() is called
- * for the shrinker, since we don't want to impose
- * restrictions on their internal registration order.
- * In this case shrink_slab_memcg() may find corresponding
- * bit is set in the shrinkers map.
- *
- * This value is used by the function to detect registering
- * shrinkers and to skip do_shrink_slab() calls for them.
- */
-#define SHRINKER_REGISTERING ((struct shrinker *)~0UL)
+static int shrinker_nr_max;
+
+/* The shrinker_info is expanded in a batch of BITS_PER_LONG */
+static inline int shrinker_map_size(int nr_items)
+{
+ return (DIV_ROUND_UP(nr_items, BITS_PER_LONG) * sizeof(unsigned long));
+}
+
+static inline int shrinker_defer_size(int nr_items)
+{
+ return (round_up(nr_items, BITS_PER_LONG) * sizeof(atomic_long_t));
+}
+
+static struct shrinker_info *shrinker_info_protected(struct mem_cgroup *memcg,
+ int nid)
+{
+ return rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_info,
+ lockdep_is_held(&shrinker_rwsem));
+}
+
+static int expand_one_shrinker_info(struct mem_cgroup *memcg,
+ int map_size, int defer_size,
+ int old_map_size, int old_defer_size)
+{
+ struct shrinker_info *new, *old;
+ struct mem_cgroup_per_node *pn;
+ int nid;
+ int size = map_size + defer_size;
+
+ for_each_node(nid) {
+ pn = memcg->nodeinfo[nid];
+ old = shrinker_info_protected(memcg, nid);
+ /* Not yet online memcg */
+ if (!old)
+ return 0;
+
+ new = kvmalloc_node(sizeof(*new) + size, GFP_KERNEL, nid);
+ if (!new)
+ return -ENOMEM;
+
+ new->nr_deferred = (atomic_long_t *)(new + 1);
+ new->map = (void *)new->nr_deferred + defer_size;
+
+ /* map: set all old bits, clear all new bits */
+ memset(new->map, (int)0xff, old_map_size);
+ memset((void *)new->map + old_map_size, 0, map_size - old_map_size);
+ /* nr_deferred: copy old values, clear all new values */
+ memcpy(new->nr_deferred, old->nr_deferred, old_defer_size);
+ memset((void *)new->nr_deferred + old_defer_size, 0,
+ defer_size - old_defer_size);
+
+ rcu_assign_pointer(pn->shrinker_info, new);
+ kvfree_rcu(old, rcu);
+ }
+
+ return 0;
+}
+
+void free_shrinker_info(struct mem_cgroup *memcg)
+{
+ struct mem_cgroup_per_node *pn;
+ struct shrinker_info *info;
+ int nid;
+
+ for_each_node(nid) {
+ pn = memcg->nodeinfo[nid];
+ info = rcu_dereference_protected(pn->shrinker_info, true);
+ kvfree(info);
+ rcu_assign_pointer(pn->shrinker_info, NULL);
+ }
+}
+
+int alloc_shrinker_info(struct mem_cgroup *memcg)
+{
+ struct shrinker_info *info;
+ int nid, size, ret = 0;
+ int map_size, defer_size = 0;
+
+ down_write(&shrinker_rwsem);
+ map_size = shrinker_map_size(shrinker_nr_max);
+ defer_size = shrinker_defer_size(shrinker_nr_max);
+ size = map_size + defer_size;
+ for_each_node(nid) {
+ info = kvzalloc_node(sizeof(*info) + size, GFP_KERNEL, nid);
+ if (!info) {
+ free_shrinker_info(memcg);
+ ret = -ENOMEM;
+ break;
+ }
+ info->nr_deferred = (atomic_long_t *)(info + 1);
+ info->map = (void *)info->nr_deferred + defer_size;
+ rcu_assign_pointer(memcg->nodeinfo[nid]->shrinker_info, info);
+ }
+ up_write(&shrinker_rwsem);
+
+ return ret;
+}
+
+static inline bool need_expand(int nr_max)
+{
+ return round_up(nr_max, BITS_PER_LONG) >
+ round_up(shrinker_nr_max, BITS_PER_LONG);
+}
+
+static int expand_shrinker_info(int new_id)
+{
+ int ret = 0;
+ int new_nr_max = new_id + 1;
+ int map_size, defer_size = 0;
+ int old_map_size, old_defer_size = 0;
+ struct mem_cgroup *memcg;
+
+ if (!need_expand(new_nr_max))
+ goto out;
+
+ if (!root_mem_cgroup)
+ goto out;
+
+ lockdep_assert_held(&shrinker_rwsem);
+
+ map_size = shrinker_map_size(new_nr_max);
+ defer_size = shrinker_defer_size(new_nr_max);
+ old_map_size = shrinker_map_size(shrinker_nr_max);
+ old_defer_size = shrinker_defer_size(shrinker_nr_max);
+
+ memcg = mem_cgroup_iter(NULL, NULL, NULL);
+ do {
+ ret = expand_one_shrinker_info(memcg, map_size, defer_size,
+ old_map_size, old_defer_size);
+ if (ret) {
+ mem_cgroup_iter_break(NULL, memcg);
+ goto out;
+ }
+ } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL);
+out:
+ if (!ret)
+ shrinker_nr_max = new_nr_max;
+
+ return ret;
+}
+
+void set_shrinker_bit(struct mem_cgroup *memcg, int nid, int shrinker_id)
+{
+ if (shrinker_id >= 0 && memcg && !mem_cgroup_is_root(memcg)) {
+ struct shrinker_info *info;
+
+ rcu_read_lock();
+ info = rcu_dereference(memcg->nodeinfo[nid]->shrinker_info);
+ /* Pairs with smp mb in shrink_slab() */
+ smp_mb__before_atomic();
+ set_bit(shrinker_id, info->map);
+ rcu_read_unlock();
+ }
+}
static DEFINE_IDR(shrinker_idr);
-static int shrinker_nr_max;
static int prealloc_memcg_shrinker(struct shrinker *shrinker)
{
int id, ret = -ENOMEM;
+ if (mem_cgroup_disabled())
+ return -ENOSYS;
+
down_write(&shrinker_rwsem);
/* This may call shrinker, so it must use down_read_trylock() */
- id = idr_alloc(&shrinker_idr, SHRINKER_REGISTERING, 0, 0, GFP_KERNEL);
+ id = idr_alloc(&shrinker_idr, shrinker, 0, 0, GFP_KERNEL);
if (id < 0)
goto unlock;
if (id >= shrinker_nr_max) {
- if (memcg_expand_shrinker_maps(id)) {
+ if (expand_shrinker_info(id)) {
idr_remove(&shrinker_idr, id);
goto unlock;
}
-
- shrinker_nr_max = id + 1;
}
shrinker->id = id;
ret = 0;
BUG_ON(id < 0);
- down_write(&shrinker_rwsem);
+ lockdep_assert_held(&shrinker_rwsem);
+
idr_remove(&shrinker_idr, id);
- up_write(&shrinker_rwsem);
+}
+
+static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker,
+ struct mem_cgroup *memcg)
+{
+ struct shrinker_info *info;
+
+ info = shrinker_info_protected(memcg, nid);
+ return atomic_long_xchg(&info->nr_deferred[shrinker->id], 0);
+}
+
+static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker,
+ struct mem_cgroup *memcg)
+{
+ struct shrinker_info *info;
+
+ info = shrinker_info_protected(memcg, nid);
+ return atomic_long_add_return(nr, &info->nr_deferred[shrinker->id]);
+}
+
+void reparent_shrinker_deferred(struct mem_cgroup *memcg)
+{
+ int i, nid;
+ long nr;
+ struct mem_cgroup *parent;
+ struct shrinker_info *child_info, *parent_info;
+
+ parent = parent_mem_cgroup(memcg);
+ if (!parent)
+ parent = root_mem_cgroup;
+
+ /* Prevent from concurrent shrinker_info expand */
+ down_read(&shrinker_rwsem);
+ for_each_node(nid) {
+ child_info = shrinker_info_protected(memcg, nid);
+ parent_info = shrinker_info_protected(parent, nid);
+ for (i = 0; i < shrinker_nr_max; i++) {
+ nr = atomic_long_read(&child_info->nr_deferred[i]);
+ atomic_long_add(nr, &parent_info->nr_deferred[i]);
+ }
+ }
+ up_read(&shrinker_rwsem);
}
static bool cgroup_reclaim(struct scan_control *sc)
#else
static int prealloc_memcg_shrinker(struct shrinker *shrinker)
{
- return 0;
+ return -ENOSYS;
}
static void unregister_memcg_shrinker(struct shrinker *shrinker)
{
}
+static long xchg_nr_deferred_memcg(int nid, struct shrinker *shrinker,
+ struct mem_cgroup *memcg)
+{
+ return 0;
+}
+
+static long add_nr_deferred_memcg(long nr, int nid, struct shrinker *shrinker,
+ struct mem_cgroup *memcg)
+{
+ return 0;
+}
+
static bool cgroup_reclaim(struct scan_control *sc)
{
return false;
}
#endif
+static long xchg_nr_deferred(struct shrinker *shrinker,
+ struct shrink_control *sc)
+{
+ int nid = sc->nid;
+
+ if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
+ nid = 0;
+
+ if (sc->memcg &&
+ (shrinker->flags & SHRINKER_MEMCG_AWARE))
+ return xchg_nr_deferred_memcg(nid, shrinker,
+ sc->memcg);
+
+ return atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
+}
+
+
+static long add_nr_deferred(long nr, struct shrinker *shrinker,
+ struct shrink_control *sc)
+{
+ int nid = sc->nid;
+
+ if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
+ nid = 0;
+
+ if (sc->memcg &&
+ (shrinker->flags & SHRINKER_MEMCG_AWARE))
+ return add_nr_deferred_memcg(nr, nid, shrinker,
+ sc->memcg);
+
+ return atomic_long_add_return(nr, &shrinker->nr_deferred[nid]);
+}
+
/*
* This misses isolated pages which are not accounted for to save counters.
* As the data only determines if reclaim or compaction continues, it is
*/
int prealloc_shrinker(struct shrinker *shrinker)
{
- unsigned int size = sizeof(*shrinker->nr_deferred);
+ unsigned int size;
+ int err;
+
+ if (shrinker->flags & SHRINKER_MEMCG_AWARE) {
+ err = prealloc_memcg_shrinker(shrinker);
+ if (err != -ENOSYS)
+ return err;
+
+ shrinker->flags &= ~SHRINKER_MEMCG_AWARE;
+ }
+ size = sizeof(*shrinker->nr_deferred);
if (shrinker->flags & SHRINKER_NUMA_AWARE)
size *= nr_node_ids;
if (!shrinker->nr_deferred)
return -ENOMEM;
- if (shrinker->flags & SHRINKER_MEMCG_AWARE) {
- if (prealloc_memcg_shrinker(shrinker))
- goto free_deferred;
- }
-
return 0;
-
-free_deferred:
- kfree(shrinker->nr_deferred);
- shrinker->nr_deferred = NULL;
- return -ENOMEM;
}
void free_prealloced_shrinker(struct shrinker *shrinker)
{
- if (!shrinker->nr_deferred)
- return;
-
- if (shrinker->flags & SHRINKER_MEMCG_AWARE)
+ if (shrinker->flags & SHRINKER_MEMCG_AWARE) {
+ down_write(&shrinker_rwsem);
unregister_memcg_shrinker(shrinker);
+ up_write(&shrinker_rwsem);
+ return;
+ }
kfree(shrinker->nr_deferred);
shrinker->nr_deferred = NULL;
{
down_write(&shrinker_rwsem);
list_add_tail(&shrinker->list, &shrinker_list);
-#ifdef CONFIG_MEMCG
- if (shrinker->flags & SHRINKER_MEMCG_AWARE)
- idr_replace(&shrinker_idr, shrinker, shrinker->id);
-#endif
+ shrinker->flags |= SHRINKER_REGISTERED;
up_write(&shrinker_rwsem);
}
*/
void unregister_shrinker(struct shrinker *shrinker)
{
- if (!shrinker->nr_deferred)
+ if (!(shrinker->flags & SHRINKER_REGISTERED))
return;
- if (shrinker->flags & SHRINKER_MEMCG_AWARE)
- unregister_memcg_shrinker(shrinker);
+
down_write(&shrinker_rwsem);
list_del(&shrinker->list);
+ shrinker->flags &= ~SHRINKER_REGISTERED;
+ if (shrinker->flags & SHRINKER_MEMCG_AWARE)
+ unregister_memcg_shrinker(shrinker);
up_write(&shrinker_rwsem);
+
kfree(shrinker->nr_deferred);
shrinker->nr_deferred = NULL;
}
long freeable;
long nr;
long new_nr;
- int nid = shrinkctl->nid;
long batch_size = shrinker->batch ? shrinker->batch
: SHRINK_BATCH;
long scanned = 0, next_deferred;
- if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
- nid = 0;
-
freeable = shrinker->count_objects(shrinker, shrinkctl);
if (freeable == 0 || freeable == SHRINK_EMPTY)
return freeable;
* and zero it so that other concurrent shrinker invocations
* don't also do this scanning work.
*/
- nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
+ nr = xchg_nr_deferred(shrinker, shrinkctl);
- total_scan = nr;
if (shrinker->seeks) {
delta = freeable >> priority;
delta *= 4;
delta = freeable / 2;
}
+ total_scan = nr >> priority;
total_scan += delta;
- if (total_scan < 0) {
- pr_err("shrink_slab: %pS negative objects to delete nr=%ld\n",
- shrinker->scan_objects, total_scan);
- total_scan = freeable;
- next_deferred = nr;
- } else
- next_deferred = total_scan;
-
- /*
- * We need to avoid excessive windup on filesystem shrinkers
- * due to large numbers of GFP_NOFS allocations causing the
- * shrinkers to return -1 all the time. This results in a large
- * nr being built up so when a shrink that can do some work
- * comes along it empties the entire cache due to nr >>>
- * freeable. This is bad for sustaining a working set in
- * memory.
- *
- * Hence only allow the shrinker to scan the entire cache when
- * a large delta change is calculated directly.
- */
- if (delta < freeable / 4)
- total_scan = min(total_scan, freeable / 2);
-
- /*
- * Avoid risking looping forever due to too large nr value:
- * never try to free more than twice the estimate number of
- * freeable entries.
- */
- if (total_scan > freeable * 2)
- total_scan = freeable * 2;
+ total_scan = min(total_scan, (2 * freeable));
trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
freeable, delta, total_scan, priority);
cond_resched();
}
- if (next_deferred >= scanned)
- next_deferred -= scanned;
- else
- next_deferred = 0;
+ /*
+ * The deferred work is increased by any new work (delta) that wasn't
+ * done, decreased by old deferred work that was done now.
+ *
+ * And it is capped to two times of the freeable items.
+ */
+ next_deferred = max_t(long, (nr + delta - scanned), 0);
+ next_deferred = min(next_deferred, (2 * freeable));
+
/*
* move the unused scan count back into the shrinker in a
- * manner that handles concurrent updates. If we exhausted the
- * scan, there is no need to do an update.
+ * manner that handles concurrent updates.
*/
- if (next_deferred > 0)
- new_nr = atomic_long_add_return(next_deferred,
- &shrinker->nr_deferred[nid]);
- else
- new_nr = atomic_long_read(&shrinker->nr_deferred[nid]);
+ new_nr = add_nr_deferred(next_deferred, shrinker, shrinkctl);
- trace_mm_shrink_slab_end(shrinker, nid, freed, nr, new_nr, total_scan);
+ trace_mm_shrink_slab_end(shrinker, shrinkctl->nid, freed, nr, new_nr, total_scan);
return freed;
}
static unsigned long shrink_slab_memcg(gfp_t gfp_mask, int nid,
struct mem_cgroup *memcg, int priority)
{
- struct memcg_shrinker_map *map;
+ struct shrinker_info *info;
unsigned long ret, freed = 0;
int i;
if (!down_read_trylock(&shrinker_rwsem))
return 0;
- map = rcu_dereference_protected(memcg->nodeinfo[nid]->shrinker_map,
- true);
- if (unlikely(!map))
+ info = shrinker_info_protected(memcg, nid);
+ if (unlikely(!info))
goto unlock;
- for_each_set_bit(i, map->map, shrinker_nr_max) {
+ for_each_set_bit(i, info->map, shrinker_nr_max) {
struct shrink_control sc = {
.gfp_mask = gfp_mask,
.nid = nid,
struct shrinker *shrinker;
shrinker = idr_find(&shrinker_idr, i);
- if (unlikely(!shrinker || shrinker == SHRINKER_REGISTERING)) {
+ if (unlikely(!shrinker || !(shrinker->flags & SHRINKER_REGISTERED))) {
if (!shrinker)
- clear_bit(i, map->map);
+ clear_bit(i, info->map);
continue;
}
ret = do_shrink_slab(&sc, shrinker, priority);
if (ret == SHRINK_EMPTY) {
- clear_bit(i, map->map);
+ clear_bit(i, info->map);
/*
* After the shrinker reported that it had no objects to
* free, but before we cleared the corresponding bit in
* case, we invoke the shrinker one more time and reset
* the bit if it reports that it is not empty anymore.
* The memory barrier here pairs with the barrier in
- * memcg_set_shrinker_bit():
+ * set_shrinker_bit():
*
* list_lru_add() shrink_slab_memcg()
* list_add_tail() clear_bit()
if (ret == SHRINK_EMPTY)
ret = 0;
else
- memcg_set_shrinker_bit(memcg, nid, i);
+ set_shrinker_bit(memcg, nid, i);
}
freed += ret;
LIST_HEAD(clean_pages);
list_for_each_entry_safe(page, next, page_list, lru) {
- if (page_is_file_lru(page) && !PageDirty(page) &&
- !__PageMovable(page) && !PageUnevictable(page)) {
+ if (!PageHuge(page) && page_is_file_lru(page) &&
+ !PageDirty(page) && !__PageMovable(page) &&
+ !PageUnevictable(page)) {
ClearPageActive(page);
list_move(&page->lru, &clean_pages);
}
{
unsigned int alloc_order, reclaim_order;
unsigned int highest_zoneidx = MAX_NR_ZONES - 1;
- pg_data_t *pgdat = (pg_data_t*)p;
+ pg_data_t *pgdat = (pg_data_t *)p;
struct task_struct *tsk = current;
const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
*/
int node_reclaim_mode __read_mostly;
-/*
- * These bit locations are exposed in the vm.zone_reclaim_mode sysctl
- * ABI. New bits are OK, but existing bits can never change.
- */
-#define RECLAIM_ZONE (1<<0) /* Run shrink_inactive_list on the zone */
-#define RECLAIM_WRITE (1<<1) /* Writeout pages during reclaim */
-#define RECLAIM_UNMAP (1<<2) /* Unmap pages during reclaim */
-
/*
* Priority for NODE_RECLAIM. This determines the fraction of pages
* of a node considered for each zone_reclaim. 4 scans 1/16th of
/*
* this is only called if !populated_zone(zone), which implies no other users of
- * pset->vm_stat_diff[] exsist.
+ * pset->vm_stat_diff[] exist.
*/
void drain_zonestat(struct zone *zone, struct per_cpu_pageset *pset)
{
#ifdef CONFIG_HUGETLB_PAGE
"htlb_buddy_alloc_success",
"htlb_buddy_alloc_fail",
+#endif
+#ifdef CONFIG_CMA
+ "cma_alloc_success",
+ "cma_alloc_fail",
#endif
"unevictable_pgs_culled",
"unevictable_pgs_scanned",
"swap_ra",
"swap_ra_hit",
#endif
+#ifdef CONFIG_X86
+ "direct_map_level2_splits",
+ "direct_map_level3_splits",
+#endif
#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
};
#endif /* CONFIG_PROC_FS || CONFIG_SYSFS || CONFIG_NUMA || CONFIG_MEMCG */
if (err)
return err;
for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) {
+ /*
+ * Skip checking stats known to go negative occasionally.
+ */
+ switch (i) {
+ case NR_ZONE_WRITE_PENDING:
+ case NR_FREE_CMA_PAGES:
+ continue;
+ }
val = atomic_long_read(&vm_zone_stat[i]);
if (val < 0) {
pr_warn("%s: %s %ld\n",
__func__, zone_stat_name(i), val);
- err = -EINVAL;
}
}
-#ifdef CONFIG_NUMA
- for (i = 0; i < NR_VM_NUMA_STAT_ITEMS; i++) {
- val = atomic_long_read(&vm_numa_stat[i]);
+ for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
+ /*
+ * Skip checking stats known to go negative occasionally.
+ */
+ switch (i) {
+ case NR_WRITEBACK:
+ continue;
+ }
+ val = atomic_long_read(&vm_node_stat[i]);
if (val < 0) {
pr_warn("%s: %s %ld\n",
- __func__, numa_stat_name(i), val);
- err = -EINVAL;
+ __func__, node_stat_name(i), val);
}
}
-#endif
- if (err)
- return err;
if (write)
*ppos += *lenp;
else
goto out_invalid;
if (WARN_ON_ONCE(node->count != node->nr_values))
goto out_invalid;
- mapping->nrexceptional -= node->nr_values;
xa_delete_node(node, workingset_update_node);
__inc_lruvec_kmem_state(node, WORKINGSET_NODERECLAIM);
{
if (pool->inode)
iput(pool->inode);
- }
+}
/* Initializes the z3fold header of a newly allocated z3fold page */
static struct z3fold_header *init_z3fold_page(struct page *page, bool headless,
* This may hold locks, disable interrupts, and/or preemption,
* and the zpool_unmap_handle() must be called to undo those
* actions. The code that uses the mapped handle should complete
- * its operatons on the mapped handle memory quickly and unmap
+ * its operations on the mapped handle memory quickly and unmap
* as soon as possible. As the implementation may use per-cpu
* data, multiple handles should not be mapped concurrently on
* any cpu.
#define ZSPAGE_MAGIC 0x58
/*
- * This must be power of 2 and greater than of equal to sizeof(link_free).
+ * This must be power of 2 and greater than or equal to sizeof(link_free).
* These two conditions ensure that any 'struct link_free' itself doesn't
* span more than 1 page which avoids complex case of mapping 2 pages simply
* to restore link_free pointer values.
* class maintains a list of zspages where each zspage is divided
* into equal sized chunks. Each allocation falls into one of these
* classes depending on its size. This function returns index of the
- * size class which has chunk size big enough to hold the give size.
+ * size class which has chunk size big enough to hold the given size.
*/
static int get_size_class_index(int size)
{
* zs_map_object - get address of allocated object from handle.
* @pool: pool from which the object was allocated
* @handle: handle returned from zs_malloc
- * @mm: maping mode to use
+ * @mm: mapping mode to use
*
* Before using an object allocated from zs_malloc, it must be mapped using
* this function. When done with the object, it must be unmapped using
head = obj_to_head(page, addr);
if (head & OBJ_ALLOCATED_TAG) {
handle = head & ~OBJ_ALLOCATED_TAG;
- if (!testpin_tag(handle))
- BUG();
+ BUG_ON(!testpin_tag(handle));
old_obj = handle_to_obj(handle);
obj_to_location(old_obj, &dummy, &obj_idx);
head = obj_to_head(page, addr);
if (head & OBJ_ALLOCATED_TAG) {
handle = head & ~OBJ_ALLOCATED_TAG;
- if (!testpin_tag(handle))
- BUG();
+ BUG_ON(!testpin_tag(handle));
unpin_tag(handle);
}
}
}
pr_debug("using %s zpool\n", zpool_get_type(pool->zpool));
- strlcpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
+ strscpy(pool->tfm_name, compressor, sizeof(pool->tfm_name));
pool->acomp_ctx = alloc_percpu(*pool->acomp_ctx);
if (!pool->acomp_ctx) {
rcu_read_lock();
if (netif_is_bridge_port(dev)) {
- p = br_port_get_rcu(dev);
- vg = nbp_vlan_group_rcu(p);
+ p = br_port_get_check_rcu(dev);
+ if (p)
+ vg = nbp_vlan_group_rcu(p);
} else if (dev->priv_flags & IFF_EBRIDGE) {
br = netdev_priv(dev);
vg = br_vlan_group_rcu(br);
}
}
+static void set_global_id(struct ceph_auth_client *ac, u64 global_id)
+{
+ dout("%s global_id %llu\n", __func__, global_id);
+
+ if (!global_id)
+ pr_err("got zero global_id\n");
+
+ if (ac->global_id && global_id != ac->global_id)
+ pr_err("global_id changed from %llu to %llu\n", ac->global_id,
+ global_id);
+
+ ac->global_id = global_id;
+}
+
/*
* setup, teardown.
*/
payload_end = payload + payload_len;
- if (global_id && ac->global_id != global_id) {
- dout(" set global_id %lld -> %lld\n", ac->global_id, global_id);
- ac->global_id = global_id;
- }
-
if (ac->negotiating) {
/* server does not support our protocols? */
if (!protocol && result < 0) {
ret = ac->ops->handle_reply(ac, result, payload, payload_end,
NULL, NULL, NULL, NULL);
- if (ret == -EAGAIN)
+ if (ret == -EAGAIN) {
ret = build_request(ac, true, reply_buf, reply_len);
- else if (ret)
+ goto out;
+ } else if (ret) {
pr_err("auth protocol '%s' mauth authentication failed: %d\n",
ceph_auth_proto_name(ac->protocol), result);
+ goto out;
+ }
+
+ set_global_id(ac, global_id);
out:
mutex_unlock(&ac->mutex);
int ret;
mutex_lock(&ac->mutex);
- if (global_id && ac->global_id != global_id) {
- dout("%s global_id %llu -> %llu\n", __func__, ac->global_id,
- global_id);
- ac->global_id = global_id;
- }
-
ret = ac->ops->handle_reply(ac, 0, reply, reply + reply_len,
session_key, session_key_len,
con_secret, con_secret_len);
+ if (!ret)
+ set_global_id(ac, global_id);
mutex_unlock(&ac->mutex);
return ret;
}
if (ret < 0)
return ret;
- auth->struct_v = 2; /* nautilus+ */
+ auth->struct_v = 3; /* nautilus+ */
auth->key = 0;
for (u = (u64 *)enc_buf; u + 1 <= (u64 *)(enc_buf + ret); u++)
auth->key ^= *(__le64 *)u;
#include <linux/inet.h>
#include <linux/ceph/decode.h>
+#include <linux/ceph/messenger.h> /* for ceph_pr_addr() */
static int
ceph_decode_entity_addr_versioned(void **p, void *end,
}
ceph_decode_32_safe(p, end, addr_cnt, e_inval);
+ dout("%s addr_cnt %d\n", __func__, addr_cnt);
found = false;
for (i = 0; i < addr_cnt; i++) {
if (ret)
return ret;
+ dout("%s i %d addr %s\n", __func__, i, ceph_pr_addr(&tmp_addr));
if (tmp_addr.type == my_type) {
if (found) {
pr_err("another match of type %d in addrvec\n",
found = true;
}
}
- if (!found && addr_cnt != 0) {
- pr_err("no match of type %d in addrvec\n",
- le32_to_cpu(my_type));
- return -ENOENT;
- }
- return 0;
+ if (found)
+ return 0;
+
+ if (!addr_cnt)
+ return 0; /* normal -- e.g. unused OSD id/slot */
+
+ if (addr_cnt == 1 && !memchr_inv(&tmp_addr, 0, sizeof(tmp_addr)))
+ return 0; /* weird but effectively the same as !addr_cnt */
+
+ pr_err("no match of type %d in addrvec\n", le32_to_cpu(my_type));
+ return -ENOENT;
e_inval:
return -EINVAL;
int ret;
ehdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
- ðtool_genl_family, 0, ctx->ops->reply_cmd);
+ ðtool_genl_family, NLM_F_MULTI,
+ ctx->ops->reply_cmd);
if (!ehdr)
return -EMSGSIZE;
struct ethhdr *ethhdr;
__be16 proto;
+ /* Check if skb contains hsr_ethhdr */
+ if (skb->mac_len < sizeof(struct hsr_ethhdr))
+ return -EINVAL;
+
memset(frame, 0, sizeof(*frame));
frame->is_supervision = is_supervision_frame(port->hsr, skb);
frame->node_src = hsr_get_node(port, &hsr->node_db, skb,
return ret;
}
-void arpt_unregister_table_pre_exit(struct net *net, const char *name,
- const struct nf_hook_ops *ops)
+void arpt_unregister_table_pre_exit(struct net *net, const char *name)
{
struct xt_table *table = xt_find_table(net, NFPROTO_ARP, name);
if (table)
- nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
+ nf_unregister_net_hooks(net, table->ops, hweight32(table->valid_hooks));
}
EXPORT_SYMBOL(arpt_unregister_table_pre_exit);
static void __net_exit arptable_filter_net_pre_exit(struct net *net)
{
- arpt_unregister_table_pre_exit(net, "filter", arpfilter_ops);
+ arpt_unregister_table_pre_exit(net, "filter");
}
static void __net_exit arptable_filter_net_exit(struct net *net)
(__kernel_size_t)zc->msg_controllen;
cmsg_dummy.msg_flags = in_compat_syscall()
? MSG_CMSG_COMPAT : 0;
+ cmsg_dummy.msg_control_is_user = true;
zc->msg_flags = 0;
if (zc->msg_control == msg_control_addr &&
zc->msg_controllen == cmsg_dummy.msg_controllen) {
ret = -ENOENT;
} else if (!bpf_try_module_get(ca, ca->owner)) {
ret = -EBUSY;
+ } else if (!net_eq(net, &init_net) &&
+ !(ca->flags & TCP_CONG_NON_RESTRICTED)) {
+ /* Only init netns can set default to a restricted algorithm */
+ ret = -EPERM;
} else {
prev = xchg(&net->ipv4.tcp_congestion_control, ca);
if (prev)
hinfo = seg6_hmac_info_lookup(net, hmackeyid);
if (!slen) {
- if (!hinfo)
- err = -ENOENT;
-
err = seg6_hmac_info_del(net, hmackeyid);
goto out_unlock;
int hdrlen;
};
+struct pcpu_seg6_local_counters {
+ u64_stats_t packets;
+ u64_stats_t bytes;
+ u64_stats_t errors;
+
+ struct u64_stats_sync syncp;
+};
+
+/* This struct groups all the SRv6 Behavior counters supported so far.
+ *
+ * put_nla_counters() makes use of this data structure to collect all counter
+ * values after the per-CPU counter evaluation has been performed.
+ * Finally, each counter value (in seg6_local_counters) is stored in the
+ * corresponding netlink attribute and sent to user space.
+ *
+ * NB: we don't want to expose this structure to user space!
+ */
+struct seg6_local_counters {
+ __u64 packets;
+ __u64 bytes;
+ __u64 errors;
+};
+
+#define seg6_local_alloc_pcpu_counters(__gfp) \
+ __netdev_alloc_pcpu_stats(struct pcpu_seg6_local_counters, \
+ ((__gfp) | __GFP_ZERO))
+
+#define SEG6_F_LOCAL_COUNTERS SEG6_F_ATTR(SEG6_LOCAL_COUNTERS)
+
struct seg6_local_lwt {
int action;
struct ipv6_sr_hdr *srh;
#ifdef CONFIG_NET_L3_MASTER_DEV
struct seg6_end_dt_info dt_info;
#endif
+ struct pcpu_seg6_local_counters __percpu *pcpu_counters;
int headroom;
struct seg6_action_desc *desc;
{
.action = SEG6_LOCAL_ACTION_END,
.attrs = 0,
+ .optattrs = SEG6_F_LOCAL_COUNTERS,
.input = input_action_end,
},
{
.action = SEG6_LOCAL_ACTION_END_X,
.attrs = SEG6_F_ATTR(SEG6_LOCAL_NH6),
+ .optattrs = SEG6_F_LOCAL_COUNTERS,
.input = input_action_end_x,
},
{
.action = SEG6_LOCAL_ACTION_END_T,
.attrs = SEG6_F_ATTR(SEG6_LOCAL_TABLE),
+ .optattrs = SEG6_F_LOCAL_COUNTERS,
.input = input_action_end_t,
},
{
.action = SEG6_LOCAL_ACTION_END_DX2,
.attrs = SEG6_F_ATTR(SEG6_LOCAL_OIF),
+ .optattrs = SEG6_F_LOCAL_COUNTERS,
.input = input_action_end_dx2,
},
{
.action = SEG6_LOCAL_ACTION_END_DX6,
.attrs = SEG6_F_ATTR(SEG6_LOCAL_NH6),
+ .optattrs = SEG6_F_LOCAL_COUNTERS,
.input = input_action_end_dx6,
},
{
.action = SEG6_LOCAL_ACTION_END_DX4,
.attrs = SEG6_F_ATTR(SEG6_LOCAL_NH4),
+ .optattrs = SEG6_F_LOCAL_COUNTERS,
.input = input_action_end_dx4,
},
{
.action = SEG6_LOCAL_ACTION_END_DT4,
.attrs = SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
+ .optattrs = SEG6_F_LOCAL_COUNTERS,
#ifdef CONFIG_NET_L3_MASTER_DEV
.input = input_action_end_dt4,
.slwt_ops = {
.action = SEG6_LOCAL_ACTION_END_DT6,
#ifdef CONFIG_NET_L3_MASTER_DEV
.attrs = 0,
- .optattrs = SEG6_F_ATTR(SEG6_LOCAL_TABLE) |
+ .optattrs = SEG6_F_LOCAL_COUNTERS |
+ SEG6_F_ATTR(SEG6_LOCAL_TABLE) |
SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE),
.slwt_ops = {
.build_state = seg6_end_dt6_build,
},
#else
.attrs = SEG6_F_ATTR(SEG6_LOCAL_TABLE),
+ .optattrs = SEG6_F_LOCAL_COUNTERS,
#endif
.input = input_action_end_dt6,
},
{
.action = SEG6_LOCAL_ACTION_END_B6,
.attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH),
+ .optattrs = SEG6_F_LOCAL_COUNTERS,
.input = input_action_end_b6,
},
{
.action = SEG6_LOCAL_ACTION_END_B6_ENCAP,
.attrs = SEG6_F_ATTR(SEG6_LOCAL_SRH),
+ .optattrs = SEG6_F_LOCAL_COUNTERS,
.input = input_action_end_b6_encap,
.static_headroom = sizeof(struct ipv6hdr),
},
{
.action = SEG6_LOCAL_ACTION_END_BPF,
.attrs = SEG6_F_ATTR(SEG6_LOCAL_BPF),
+ .optattrs = SEG6_F_LOCAL_COUNTERS,
.input = input_action_end_bpf,
},
return NULL;
}
+static bool seg6_lwtunnel_counters_enabled(struct seg6_local_lwt *slwt)
+{
+ return slwt->parsed_optattrs & SEG6_F_LOCAL_COUNTERS;
+}
+
+static void seg6_local_update_counters(struct seg6_local_lwt *slwt,
+ unsigned int len, int err)
+{
+ struct pcpu_seg6_local_counters *pcounters;
+
+ pcounters = this_cpu_ptr(slwt->pcpu_counters);
+ u64_stats_update_begin(&pcounters->syncp);
+
+ if (likely(!err)) {
+ u64_stats_inc(&pcounters->packets);
+ u64_stats_add(&pcounters->bytes, len);
+ } else {
+ u64_stats_inc(&pcounters->errors);
+ }
+
+ u64_stats_update_end(&pcounters->syncp);
+}
+
static int seg6_local_input(struct sk_buff *skb)
{
struct dst_entry *orig_dst = skb_dst(skb);
struct seg6_action_desc *desc;
struct seg6_local_lwt *slwt;
+ unsigned int len = skb->len;
+ int rc;
if (skb->protocol != htons(ETH_P_IPV6)) {
kfree_skb(skb);
slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
desc = slwt->desc;
- return desc->input(skb, slwt);
+ rc = desc->input(skb, slwt);
+
+ if (!seg6_lwtunnel_counters_enabled(slwt))
+ return rc;
+
+ seg6_local_update_counters(slwt, len, rc);
+
+ return rc;
}
static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = {
[SEG6_LOCAL_IIF] = { .type = NLA_U32 },
[SEG6_LOCAL_OIF] = { .type = NLA_U32 },
[SEG6_LOCAL_BPF] = { .type = NLA_NESTED },
+ [SEG6_LOCAL_COUNTERS] = { .type = NLA_NESTED },
};
static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt)
bpf_prog_put(slwt->bpf.prog);
}
+static const struct
+nla_policy seg6_local_counters_policy[SEG6_LOCAL_CNT_MAX + 1] = {
+ [SEG6_LOCAL_CNT_PACKETS] = { .type = NLA_U64 },
+ [SEG6_LOCAL_CNT_BYTES] = { .type = NLA_U64 },
+ [SEG6_LOCAL_CNT_ERRORS] = { .type = NLA_U64 },
+};
+
+static int parse_nla_counters(struct nlattr **attrs,
+ struct seg6_local_lwt *slwt)
+{
+ struct pcpu_seg6_local_counters __percpu *pcounters;
+ struct nlattr *tb[SEG6_LOCAL_CNT_MAX + 1];
+ int ret;
+
+ ret = nla_parse_nested_deprecated(tb, SEG6_LOCAL_CNT_MAX,
+ attrs[SEG6_LOCAL_COUNTERS],
+ seg6_local_counters_policy, NULL);
+ if (ret < 0)
+ return ret;
+
+ /* basic support for SRv6 Behavior counters requires at least:
+ * packets, bytes and errors.
+ */
+ if (!tb[SEG6_LOCAL_CNT_PACKETS] || !tb[SEG6_LOCAL_CNT_BYTES] ||
+ !tb[SEG6_LOCAL_CNT_ERRORS])
+ return -EINVAL;
+
+ /* counters are always zero initialized */
+ pcounters = seg6_local_alloc_pcpu_counters(GFP_KERNEL);
+ if (!pcounters)
+ return -ENOMEM;
+
+ slwt->pcpu_counters = pcounters;
+
+ return 0;
+}
+
+static int seg6_local_fill_nla_counters(struct sk_buff *skb,
+ struct seg6_local_counters *counters)
+{
+ if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_PACKETS, counters->packets,
+ SEG6_LOCAL_CNT_PAD))
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_BYTES, counters->bytes,
+ SEG6_LOCAL_CNT_PAD))
+ return -EMSGSIZE;
+
+ if (nla_put_u64_64bit(skb, SEG6_LOCAL_CNT_ERRORS, counters->errors,
+ SEG6_LOCAL_CNT_PAD))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+static int put_nla_counters(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+ struct seg6_local_counters counters = { 0, 0, 0 };
+ struct nlattr *nest;
+ int rc, i;
+
+ nest = nla_nest_start(skb, SEG6_LOCAL_COUNTERS);
+ if (!nest)
+ return -EMSGSIZE;
+
+ for_each_possible_cpu(i) {
+ struct pcpu_seg6_local_counters *pcounters;
+ u64 packets, bytes, errors;
+ unsigned int start;
+
+ pcounters = per_cpu_ptr(slwt->pcpu_counters, i);
+ do {
+ start = u64_stats_fetch_begin_irq(&pcounters->syncp);
+
+ packets = u64_stats_read(&pcounters->packets);
+ bytes = u64_stats_read(&pcounters->bytes);
+ errors = u64_stats_read(&pcounters->errors);
+
+ } while (u64_stats_fetch_retry_irq(&pcounters->syncp, start));
+
+ counters.packets += packets;
+ counters.bytes += bytes;
+ counters.errors += errors;
+ }
+
+ rc = seg6_local_fill_nla_counters(skb, &counters);
+ if (rc < 0) {
+ nla_nest_cancel(skb, nest);
+ return rc;
+ }
+
+ return nla_nest_end(skb, nest);
+}
+
+static int cmp_nla_counters(struct seg6_local_lwt *a, struct seg6_local_lwt *b)
+{
+ /* a and b are equal if both have pcpu_counters set or not */
+ return (!!((unsigned long)a->pcpu_counters)) ^
+ (!!((unsigned long)b->pcpu_counters));
+}
+
+static void destroy_attr_counters(struct seg6_local_lwt *slwt)
+{
+ free_percpu(slwt->pcpu_counters);
+}
+
struct seg6_action_param {
int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt);
int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt);
.put = put_nla_vrftable,
.cmp = cmp_nla_vrftable },
+ [SEG6_LOCAL_COUNTERS] = { .parse = parse_nla_counters,
+ .put = put_nla_counters,
+ .cmp = cmp_nla_counters,
+ .destroy = destroy_attr_counters },
};
/* call the destroy() callback (if available) for each set attribute in
if (attrs & SEG6_F_ATTR(SEG6_LOCAL_VRFTABLE))
nlsize += nla_total_size(4);
+ if (attrs & SEG6_F_LOCAL_COUNTERS)
+ nlsize += nla_total_size(0) + /* nest SEG6_LOCAL_COUNTERS */
+ /* SEG6_LOCAL_CNT_PACKETS */
+ nla_total_size_64bit(sizeof(__u64)) +
+ /* SEG6_LOCAL_CNT_BYTES */
+ nla_total_size_64bit(sizeof(__u64)) +
+ /* SEG6_LOCAL_CNT_ERRORS */
+ nla_total_size_64bit(sizeof(__u64));
+
return nlsize;
}
* ESTABLISHED state and will not have the SOCK_DEAD flag.
* Both result in warnings from inet_sock_destruct.
*/
-
- if (sk->sk_state == TCP_ESTABLISHED) {
+ if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) {
sk->sk_state = TCP_CLOSE;
WARN_ON_ONCE(sk->sk_socket);
sock_orphan(sk);
spin_lock_bh(&nf_ftp_lock);
fb_ptr = skb_header_pointer(skb, dataoff, datalen, ftp_buffer);
- BUG_ON(fb_ptr == NULL);
+ if (!fb_ptr) {
+ spin_unlock_bh(&nf_ftp_lock);
+ return NF_ACCEPT;
+ }
ends_in_nl = (fb_ptr[datalen - 1] == '\n');
seq = ntohl(th->seq) + datalen;
/* Get first TPKT pointer */
tpkt = skb_header_pointer(skb, tcpdataoff, tcpdatalen,
h323_buffer);
- BUG_ON(tpkt == NULL);
+ if (!tpkt)
+ goto clear_out;
/* Validate TPKT identifier */
if (tcpdatalen < 4 || tpkt[0] != 0x03 || tpkt[1] != 0) {
spin_lock_bh(&irc_buffer_lock);
ib_ptr = skb_header_pointer(skb, dataoff, skb->len - dataoff,
irc_buffer);
- BUG_ON(ib_ptr == NULL);
+ if (!ib_ptr) {
+ spin_unlock_bh(&irc_buffer_lock);
+ return NF_ACCEPT;
+ }
data = ib_ptr;
data_limit = ib_ptr + skb->len - dataoff;
nexthdr_off = protoff;
tcph = skb_header_pointer(skb, nexthdr_off, sizeof(_tcph), &_tcph);
- BUG_ON(!tcph);
+ if (!tcph)
+ return NF_ACCEPT;
+
nexthdr_off += tcph->doff * 4;
datalen = tcplen - tcph->doff * 4;
ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
length, buff);
- BUG_ON(ptr == NULL);
+ if (!ptr)
+ return;
state->td_scale =
state->flags = 0;
ptr = skb_header_pointer(skb, dataoff + sizeof(struct tcphdr),
length, buff);
- BUG_ON(ptr == NULL);
+ if (!ptr)
+ return;
/* Fast path for timestamp-only option */
if (length == TCPOLEN_TSTAMP_ALIGNED
spin_lock_bh(&nf_sane_lock);
sb_ptr = skb_header_pointer(skb, dataoff, datalen, sane_buffer);
- BUG_ON(sb_ptr == NULL);
+ if (!sb_ptr) {
+ spin_unlock_bh(&nf_sane_lock);
+ return NF_ACCEPT;
+ }
if (dir == IP_CT_DIR_ORIGINAL) {
if (datalen != sizeof(struct sane_request))
unsigned char *udata;
struct nft_set *set;
struct nft_ctx ctx;
+ size_t alloc_size;
u64 timeout;
char *name;
int err, i;
size = 0;
if (ops->privsize != NULL)
size = ops->privsize(nla, &desc);
-
- set = kvzalloc(sizeof(*set) + size + udlen, GFP_KERNEL);
+ alloc_size = sizeof(*set) + size + udlen;
+ if (alloc_size < size)
+ return -ENOMEM;
+ set = kvzalloc(alloc_size, GFP_KERNEL);
if (!set)
return -ENOMEM;
INIT_LIST_HEAD(&obj->list);
return err;
err_trans:
- kfree(obj->key.name);
-err_userdata:
kfree(obj->udata);
+err_userdata:
+ kfree(obj->key.name);
err_strdup:
if (obj->ops->destroy)
obj->ops->destroy(&ctx, obj);
nfnl_unlock(subsys_id);
break;
default:
+ rcu_read_unlock();
err = -EINVAL;
break;
}
ctx->optp = skb_header_pointer(skb, ip_hdrlen(skb) +
sizeof(struct tcphdr), ctx->optsize, opts);
+ if (!ctx->optp)
+ return NULL;
}
return tcp;
(void *)set);
}
+/* Number of buckets is stored in u32, so cap our result to 1U<<31 */
+#define NFT_MAX_BUCKETS (1U << 31)
+
static u32 nft_hash_buckets(u32 size)
{
- return roundup_pow_of_two(size * 4 / 3);
+ u64 val = div_u64((u64)size * 4, 3);
+
+ if (val >= NFT_MAX_BUCKETS)
+ return NFT_MAX_BUCKETS;
+
+ return roundup_pow_of_two(val);
}
static bool nft_rhash_estimate(const struct nft_set_desc *desc, u32 features,
const struct nft_set_desc *desc)
{
return sizeof(struct nft_hash) +
- nft_hash_buckets(desc->size) * sizeof(struct hlist_head);
+ (u64)nft_hash_buckets(desc->size) * sizeof(struct hlist_head);
}
static int nft_hash_init(const struct nft_set *set,
return false;
est->size = sizeof(struct nft_hash) +
- nft_hash_buckets(desc->size) * sizeof(struct hlist_head) +
- desc->size * sizeof(struct nft_hash_elem);
+ (u64)nft_hash_buckets(desc->size) * sizeof(struct hlist_head) +
+ (u64)desc->size * sizeof(struct nft_hash_elem);
est->lookup = NFT_SET_CLASS_O_1;
est->space = NFT_SET_CLASS_O_N;
return false;
est->size = sizeof(struct nft_hash) +
- nft_hash_buckets(desc->size) * sizeof(struct hlist_head) +
- desc->size * sizeof(struct nft_hash_elem);
+ (u64)nft_hash_buckets(desc->size) * sizeof(struct hlist_head) +
+ (u64)desc->size * sizeof(struct nft_hash_elem);
est->lookup = NFT_SET_CLASS_O_1;
est->space = NFT_SET_CLASS_O_N;
static u8 mode;
static unsigned int
-secmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
+secmark_tg(struct sk_buff *skb, const struct xt_secmark_target_info_v1 *info)
{
u32 secmark = 0;
- const struct xt_secmark_target_info *info = par->targinfo;
switch (mode) {
case SECMARK_MODE_SEL:
return XT_CONTINUE;
}
-static int checkentry_lsm(struct xt_secmark_target_info *info)
+static int checkentry_lsm(struct xt_secmark_target_info_v1 *info)
{
int err;
return 0;
}
-static int secmark_tg_check(const struct xt_tgchk_param *par)
+static int
+secmark_tg_check(const char *table, struct xt_secmark_target_info_v1 *info)
{
- struct xt_secmark_target_info *info = par->targinfo;
int err;
- if (strcmp(par->table, "mangle") != 0 &&
- strcmp(par->table, "security") != 0) {
+ if (strcmp(table, "mangle") != 0 &&
+ strcmp(table, "security") != 0) {
pr_info_ratelimited("only valid in \'mangle\' or \'security\' table, not \'%s\'\n",
- par->table);
+ table);
return -EINVAL;
}
}
}
-static struct xt_target secmark_tg_reg __read_mostly = {
- .name = "SECMARK",
- .revision = 0,
- .family = NFPROTO_UNSPEC,
- .checkentry = secmark_tg_check,
- .destroy = secmark_tg_destroy,
- .target = secmark_tg,
- .targetsize = sizeof(struct xt_secmark_target_info),
- .me = THIS_MODULE,
+static int secmark_tg_check_v0(const struct xt_tgchk_param *par)
+{
+ struct xt_secmark_target_info *info = par->targinfo;
+ struct xt_secmark_target_info_v1 newinfo = {
+ .mode = info->mode,
+ };
+ int ret;
+
+ memcpy(newinfo.secctx, info->secctx, SECMARK_SECCTX_MAX);
+
+ ret = secmark_tg_check(par->table, &newinfo);
+ info->secid = newinfo.secid;
+
+ return ret;
+}
+
+static unsigned int
+secmark_tg_v0(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ const struct xt_secmark_target_info *info = par->targinfo;
+ struct xt_secmark_target_info_v1 newinfo = {
+ .secid = info->secid,
+ };
+
+ return secmark_tg(skb, &newinfo);
+}
+
+static int secmark_tg_check_v1(const struct xt_tgchk_param *par)
+{
+ return secmark_tg_check(par->table, par->targinfo);
+}
+
+static unsigned int
+secmark_tg_v1(struct sk_buff *skb, const struct xt_action_param *par)
+{
+ return secmark_tg(skb, par->targinfo);
+}
+
+static struct xt_target secmark_tg_reg[] __read_mostly = {
+ {
+ .name = "SECMARK",
+ .revision = 0,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = secmark_tg_check_v0,
+ .destroy = secmark_tg_destroy,
+ .target = secmark_tg_v0,
+ .targetsize = sizeof(struct xt_secmark_target_info),
+ .me = THIS_MODULE,
+ },
+ {
+ .name = "SECMARK",
+ .revision = 1,
+ .family = NFPROTO_UNSPEC,
+ .checkentry = secmark_tg_check_v1,
+ .destroy = secmark_tg_destroy,
+ .target = secmark_tg_v1,
+ .targetsize = sizeof(struct xt_secmark_target_info_v1),
+ .usersize = offsetof(struct xt_secmark_target_info_v1, secid),
+ .me = THIS_MODULE,
+ },
};
static int __init secmark_tg_init(void)
{
- return xt_register_target(&secmark_tg_reg);
+ return xt_register_targets(secmark_tg_reg, ARRAY_SIZE(secmark_tg_reg));
}
static void __exit secmark_tg_exit(void)
{
- xt_unregister_target(&secmark_tg_reg);
+ xt_unregister_targets(secmark_tg_reg, ARRAY_SIZE(secmark_tg_reg));
}
module_init(secmark_tg_init);
GFP_KERNEL);
if (!llcp_sock->service_name) {
nfc_llcp_local_put(llcp_sock->local);
+ llcp_sock->local = NULL;
ret = -ENOMEM;
goto put_dev;
}
llcp_sock->ssap = nfc_llcp_get_sdp_ssap(local, llcp_sock);
if (llcp_sock->ssap == LLCP_SAP_MAX) {
nfc_llcp_local_put(llcp_sock->local);
+ llcp_sock->local = NULL;
kfree(llcp_sock->service_name);
llcp_sock->service_name = NULL;
ret = -EADDRINUSE;
llcp_sock->ssap = nfc_llcp_get_local_ssap(local);
if (llcp_sock->ssap == LLCP_SAP_MAX) {
nfc_llcp_local_put(llcp_sock->local);
+ llcp_sock->local = NULL;
ret = -ENOMEM;
goto put_dev;
}
sock_llcp_release:
nfc_llcp_put_ssap(local, llcp_sock->ssap);
nfc_llcp_local_put(llcp_sock->local);
+ llcp_sock->local = NULL;
put_dev:
nfc_put_device(dev);
}
if (key->eth.type == htons(ETH_P_IP)) {
- struct dst_entry ovs_dst;
+ struct rtable ovs_rt = { 0 };
unsigned long orig_dst;
prepare_frag(vport, skb, orig_network_offset,
ovs_key_mac_proto(key));
- dst_init(&ovs_dst, &ovs_dst_ops, NULL, 1,
+ dst_init(&ovs_rt.dst, &ovs_dst_ops, NULL, 1,
DST_OBSOLETE_NONE, DST_NOCOUNT);
- ovs_dst.dev = vport->dev;
+ ovs_rt.dst.dev = vport->dev;
orig_dst = skb->_skb_refdst;
- skb_dst_set_noref(skb, &ovs_dst);
+ skb_dst_set_noref(skb, &ovs_rt.dst);
IPCB(skb)->frag_max_size = mru;
ip_do_fragment(net, skb->sk, skb, ovs_vport_output);
}
if (skb_protocol(skb, true) == htons(ETH_P_IP)) {
- struct dst_entry sch_frag_dst;
+ struct rtable sch_frag_rt = { 0 };
unsigned long orig_dst;
sch_frag_prepare_frag(skb, xmit);
- dst_init(&sch_frag_dst, &sch_frag_dst_ops, NULL, 1,
+ dst_init(&sch_frag_rt.dst, &sch_frag_dst_ops, NULL, 1,
DST_OBSOLETE_NONE, DST_NOCOUNT);
- sch_frag_dst.dev = skb->dev;
+ sch_frag_rt.dst.dev = skb->dev;
orig_dst = skb->_skb_refdst;
- skb_dst_set_noref(skb, &sch_frag_dst);
+ skb_dst_set_noref(skb, &sch_frag_rt.dst);
IPCB(skb)->frag_max_size = mru;
ret = ip_do_fragment(net, skb->sk, skb, sch_frag_xmit);
struct sctp_chunk *retval;
__u32 ctsn;
- if (chunk && chunk->asoc)
- ctsn = sctp_tsnmap_get_ctsn(&chunk->asoc->peer.tsn_map);
- else
- ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
-
+ ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map);
shut.cum_tsn_ack = htonl(ctsn);
retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN, 0,
asoc->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = t->rto;
}
-static void sctp_cmd_assoc_update(struct sctp_cmd_seq *cmds,
- struct sctp_association *asoc,
- struct sctp_association *new)
-{
- struct net *net = asoc->base.net;
- struct sctp_chunk *abort;
-
- if (!sctp_assoc_update(asoc, new))
- return;
-
- abort = sctp_make_abort(asoc, NULL, sizeof(struct sctp_errhdr));
- if (abort) {
- sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, 0);
- sctp_add_cmd_sf(cmds, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
- }
- sctp_add_cmd_sf(cmds, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED));
- sctp_add_cmd_sf(cmds, SCTP_CMD_ASSOC_FAILED,
- SCTP_PERR(SCTP_ERROR_RSRC_LOW));
- SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
- SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
-}
-
/* Helper function to change the state of an association. */
static void sctp_cmd_new_state(struct sctp_cmd_seq *cmds,
struct sctp_association *asoc,
sctp_endpoint_add_asoc(ep, asoc);
break;
- case SCTP_CMD_UPDATE_ASSOC:
- sctp_cmd_assoc_update(commands, asoc, cmd->obj.asoc);
- break;
-
case SCTP_CMD_PURGE_OUTQUEUE:
sctp_outq_teardown(&asoc->outqueue);
break;
return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands);
}
+static int sctp_sf_do_assoc_update(struct sctp_association *asoc,
+ struct sctp_association *new,
+ struct sctp_cmd_seq *cmds)
+{
+ struct net *net = asoc->base.net;
+ struct sctp_chunk *abort;
+
+ if (!sctp_assoc_update(asoc, new))
+ return 0;
+
+ abort = sctp_make_abort(asoc, NULL, sizeof(struct sctp_errhdr));
+ if (abort) {
+ sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, 0);
+ sctp_add_cmd_sf(cmds, SCTP_CMD_REPLY, SCTP_CHUNK(abort));
+ }
+ sctp_add_cmd_sf(cmds, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNABORTED));
+ sctp_add_cmd_sf(cmds, SCTP_CMD_ASSOC_FAILED,
+ SCTP_PERR(SCTP_ERROR_RSRC_LOW));
+ SCTP_INC_STATS(net, SCTP_MIB_ABORTEDS);
+ SCTP_DEC_STATS(net, SCTP_MIB_CURRESTAB);
+
+ return -ENOMEM;
+}
+
/* Unexpected COOKIE-ECHO handler for peer restart (Table 2, action 'A')
*
* Section 5.2.4
SCTP_TO(SCTP_EVENT_TIMEOUT_T4_RTO));
sctp_add_cmd_sf(commands, SCTP_CMD_PURGE_ASCONF_QUEUE, SCTP_NULL());
- repl = sctp_make_cookie_ack(new_asoc, chunk);
+ /* Update the content of current association. */
+ if (sctp_sf_do_assoc_update((struct sctp_association *)asoc, new_asoc, commands))
+ goto nomem;
+
+ repl = sctp_make_cookie_ack(asoc, chunk);
if (!repl)
goto nomem;
/* Report association restart to upper layer. */
ev = sctp_ulpevent_make_assoc_change(asoc, 0, SCTP_RESTART, 0,
- new_asoc->c.sinit_num_ostreams,
- new_asoc->c.sinit_max_instreams,
+ asoc->c.sinit_num_ostreams,
+ asoc->c.sinit_max_instreams,
NULL, GFP_ATOMIC);
if (!ev)
goto nomem_ev;
- /* Update the content of current association. */
- sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev));
if ((sctp_state(asoc, SHUTDOWN_PENDING) ||
sctp_state(asoc, SHUTDOWN_SENT)) &&
if (!sctp_auth_chunk_verify(net, chunk, new_asoc))
return SCTP_DISPOSITION_DISCARD;
- /* Update the content of current association. */
- sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc));
sctp_add_cmd_sf(commands, SCTP_CMD_NEW_STATE,
SCTP_STATE(SCTP_STATE_ESTABLISHED));
- SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB);
+ if (asoc->state < SCTP_STATE_ESTABLISHED)
+ SCTP_INC_STATS(net, SCTP_MIB_CURRESTAB);
sctp_add_cmd_sf(commands, SCTP_CMD_HB_TIMERS_START, SCTP_NULL());
- repl = sctp_make_cookie_ack(new_asoc, chunk);
+ /* Update the content of current association. */
+ if (sctp_sf_do_assoc_update((struct sctp_association *)asoc, new_asoc, commands))
+ goto nomem;
+
+ repl = sctp_make_cookie_ack(asoc, chunk);
if (!repl)
goto nomem;
return af;
}
+static void sctp_auto_asconf_init(struct sctp_sock *sp)
+{
+ struct net *net = sock_net(&sp->inet.sk);
+
+ if (net->sctp.default_auto_asconf) {
+ spin_lock(&net->sctp.addr_wq_lock);
+ list_add_tail(&sp->auto_asconf_list, &net->sctp.auto_asconf_splist);
+ spin_unlock(&net->sctp.addr_wq_lock);
+ sp->do_auto_asconf = 1;
+ }
+}
+
/* Bind a local address either to an endpoint or to an association. */
static int sctp_do_bind(struct sock *sk, union sctp_addr *addr, int len)
{
return -EADDRINUSE;
/* Refresh ephemeral port. */
- if (!bp->port)
+ if (!bp->port) {
bp->port = inet_sk(sk)->inet_num;
+ sctp_auto_asconf_init(sp);
+ }
/* Add the address to the bind address list.
* Use GFP_ATOMIC since BHs will be disabled.
/* Supposedly, no process has access to the socket, but
* the net layers still may.
+ * Also, sctp_destroy_sock() needs to be called with addr_wq_lock
+ * held and that should be grabbed before socket lock.
*/
- local_bh_disable();
- bh_lock_sock(sk);
+ spin_lock_bh(&net->sctp.addr_wq_lock);
+ bh_lock_sock_nested(sk);
/* Hold the sock, since sk_common_release() will put sock_put()
* and we have just a little more cleanup.
sk_common_release(sk);
bh_unlock_sock(sk);
- local_bh_enable();
+ spin_unlock_bh(&net->sctp.addr_wq_lock);
sock_put(sk);
sk_sockets_allocated_inc(sk);
sock_prot_inuse_add(net, sk->sk_prot, 1);
- if (net->sctp.default_auto_asconf) {
- spin_lock(&sock_net(sk)->sctp.addr_wq_lock);
- list_add_tail(&sp->auto_asconf_list,
- &net->sctp.auto_asconf_splist);
- sp->do_auto_asconf = 1;
- spin_unlock(&sock_net(sk)->sctp.addr_wq_lock);
- } else {
- sp->do_auto_asconf = 0;
- }
-
local_bh_enable();
return 0;
if (sp->do_auto_asconf) {
sp->do_auto_asconf = 0;
- spin_lock_bh(&sock_net(sk)->sctp.addr_wq_lock);
list_del(&sp->auto_asconf_list);
- spin_unlock_bh(&sock_net(sk)->sctp.addr_wq_lock);
}
sctp_endpoint_free(sp->ep);
local_bh_disable();
return err;
}
+ sctp_auto_asconf_init(newsp);
+
/* Move any messages in the old socket's receive queue that are for the
* peeled off association to the new socket's receive queue.
*/
struct smc_sock *smc;
int val, rc;
+ if (level == SOL_TCP && optname == TCP_ULP)
+ return -EOPNOTSUPP;
+
smc = smc_sk(sk);
/* generic setsockopts reaching us here always apply to the
if (rc || smc->use_fallback)
goto out;
switch (optname) {
- case TCP_ULP:
case TCP_FASTOPEN:
case TCP_FASTOPEN_CONNECT:
case TCP_FASTOPEN_KEY:
status = xprt->ops->buf_alloc(task);
trace_rpc_buf_alloc(task, status);
- xprt_inject_disconnect(xprt);
if (status == 0)
return;
if (status != -ENOMEM) {
task->tk_flags &= ~RPC_CALL_MAJORSEEN;
}
- /*
- * Ensure that we see all writes made by xprt_complete_rqst()
- * before it changed req->rq_reply_bytes_recvd.
- */
- smp_rmb();
-
/*
* Did we ever call xprt_complete_rqst()? If not, we should assume
* the message is incomplete.
if (!req->rq_reply_bytes_recvd)
goto out;
+ /* Ensure that we see all writes made by xprt_complete_rqst()
+ * before it changed req->rq_reply_bytes_recvd.
+ */
+ smp_rmb();
+
req->rq_rcv_buf.len = req->rq_private_buf.len;
trace_rpc_xdr_recvfrom(task, &req->rq_rcv_buf);
const char *hostname,
struct sockaddr *srvaddr, size_t salen,
int proto, u32 version,
- const struct cred *cred)
+ const struct cred *cred,
+ const struct rpc_timeout *timeo)
{
struct rpc_create_args args = {
.net = net,
.protocol = proto,
.address = srvaddr,
.addrsize = salen,
+ .timeout = timeo,
.servername = hostname,
.nodename = nodename,
.program = &rpcb_program,
clnt->cl_nodename,
xprt->servername, sap, salen,
xprt->prot, bind_version,
- clnt->cl_cred);
+ clnt->cl_cred,
+ task->tk_client->cl_timeout);
if (IS_ERR(rpcb_clnt)) {
status = PTR_ERR(rpcb_clnt);
goto bailout_nofree;
svc_rqst_free(struct svc_rqst *rqstp)
{
svc_release_buffer(rqstp);
- put_page(rqstp->rq_scratch_page);
+ if (rqstp->rq_scratch_page)
+ put_page(rqstp->rq_scratch_page);
kfree(rqstp->rq_resp);
kfree(rqstp->rq_argp);
kfree(rqstp->rq_auth_data);
tcp_sock_set_cork(svsk->sk_sk, true);
err = svc_tcp_sendmsg(svsk->sk_sock, xdr, marker, &sent);
xdr_free_bvec(xdr);
- trace_svcsock_tcp_send(xprt, err < 0 ? err : sent);
+ trace_svcsock_tcp_send(xprt, err < 0 ? (long)err : sent);
if (err < 0 || sent != (xdr->len + sizeof(marker)))
goto out_close;
if (atomic_dec_and_test(&svsk->sk_sendqlen))
const struct rpc_timeout *to = req->rq_task->tk_client->cl_timeout;
int status = 0;
- if (time_before(jiffies, req->rq_minortimeo))
- return status;
if (time_before(jiffies, req->rq_majortimeo)) {
+ if (time_before(jiffies, req->rq_minortimeo))
+ return status;
if (to->to_exponential)
req->rq_timeout <<= 1;
else
list_add_tail(&req->rq_xmit, &xprt->xmit_queue);
INIT_LIST_HEAD(&req->rq_xmit2);
out:
+ atomic_long_inc(&xprt->xmit_queuelen);
set_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate);
spin_unlock(&xprt->queue_lock);
}
}
} else
list_del(&req->rq_xmit2);
+ atomic_long_dec(&req->rq_xprt->xmit_queuelen);
}
/**
struct rpc_xprt *xprt = req->rq_xprt;
if (!xprt_lock_write(xprt, task)) {
- trace_xprt_transmit_queued(xprt, task);
-
/* Race breaker: someone may have transmitted us */
if (!test_bit(RPC_TASK_NEED_XMIT, &task->tk_runstate))
rpc_wake_up_queued_task_set_status(&xprt->sending,
void xprt_end_transmit(struct rpc_task *task)
{
- xprt_release_write(task->tk_rqstp->rq_xprt, task);
+ struct rpc_xprt *xprt = task->tk_rqstp->rq_xprt;
+
+ xprt_inject_disconnect(xprt);
+ xprt_release_write(xprt, task);
}
/**
return status;
}
- if (is_retrans)
+ if (is_retrans) {
task->tk_client->cl_stats->rpcretrans++;
+ trace_xprt_retransmit(req);
+ }
xprt_inject_disconnect(xprt);
spin_unlock(&xprt->transport_lock);
if (req->rq_buffer)
xprt->ops->buf_free(task);
- xprt_inject_disconnect(xprt);
xdr_free_bvec(&req->rq_rcv_buf);
xdr_free_bvec(&req->rq_snd_buf);
if (req->rq_cred != NULL)
void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst)
{
struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+ struct rpcrdma_rep *rep = req->rl_reply;
struct rpc_xprt *xprt = rqst->rq_xprt;
+ struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
- rpcrdma_recv_buffer_put(req->rl_reply);
+ rpcrdma_rep_put(&r_xprt->rx_buf, rep);
req->rl_reply = NULL;
spin_lock(&xprt->bc_pa_lock);
# define RPCDBG_FACILITY RPCDBG_TRANS
#endif
-/**
- * frwr_release_mr - Destroy one MR
- * @mr: MR allocated by frwr_mr_init
- *
- */
-void frwr_release_mr(struct rpcrdma_mr *mr)
+static void frwr_cid_init(struct rpcrdma_ep *ep,
+ struct rpcrdma_mr *mr)
{
- int rc;
+ struct rpc_rdma_cid *cid = &mr->mr_cid;
- rc = ib_dereg_mr(mr->frwr.fr_mr);
- if (rc)
- trace_xprtrdma_frwr_dereg(mr, rc);
- kfree(mr->mr_sg);
- kfree(mr);
+ cid->ci_queue_id = ep->re_attr.send_cq->res.id;
+ cid->ci_completion_id = mr->mr_ibmr->res.id;
}
static void frwr_mr_unmap(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr)
}
}
-static void frwr_mr_recycle(struct rpcrdma_mr *mr)
+/**
+ * frwr_mr_release - Destroy one MR
+ * @mr: MR allocated by frwr_mr_init
+ *
+ */
+void frwr_mr_release(struct rpcrdma_mr *mr)
{
- struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
-
- trace_xprtrdma_mr_recycle(mr);
-
- frwr_mr_unmap(r_xprt, mr);
+ int rc;
- spin_lock(&r_xprt->rx_buf.rb_lock);
- list_del(&mr->mr_all);
- r_xprt->rx_stats.mrs_recycled++;
- spin_unlock(&r_xprt->rx_buf.rb_lock);
+ frwr_mr_unmap(mr->mr_xprt, mr);
- frwr_release_mr(mr);
+ rc = ib_dereg_mr(mr->mr_ibmr);
+ if (rc)
+ trace_xprtrdma_frwr_dereg(mr, rc);
+ kfree(mr->mr_sg);
+ kfree(mr);
}
static void frwr_mr_put(struct rpcrdma_mr *mr)
goto out_list_err;
mr->mr_xprt = r_xprt;
- mr->frwr.fr_mr = frmr;
+ mr->mr_ibmr = frmr;
mr->mr_device = NULL;
INIT_LIST_HEAD(&mr->mr_list);
- init_completion(&mr->frwr.fr_linv_done);
+ init_completion(&mr->mr_linv_done);
+ frwr_cid_init(ep, mr);
sg_init_table(sg, depth);
mr->mr_sg = sg;
ep->re_attr.cap.max_send_wr += 1; /* for ib_drain_sq */
ep->re_attr.cap.max_recv_wr = ep->re_max_requests;
ep->re_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS;
+ ep->re_attr.cap.max_recv_wr += RPCRDMA_MAX_RECV_BATCH;
ep->re_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */
ep->re_max_rdma_segs =
goto out_dmamap_err;
mr->mr_device = ep->re_id->device;
- ibmr = mr->frwr.fr_mr;
+ ibmr = mr->mr_ibmr;
n = ib_map_mr_sg(ibmr, mr->mr_sg, dma_nents, NULL, PAGE_SIZE);
if (n != dma_nents)
goto out_mapmr_err;
key = (u8)(ibmr->rkey & 0x000000FF);
ib_update_fast_reg_key(ibmr, ++key);
- reg_wr = &mr->frwr.fr_regwr;
+ reg_wr = &mr->mr_regwr;
reg_wr->mr = ibmr;
reg_wr->key = ibmr->rkey;
reg_wr->access = writing ?
* @cq: completion queue
* @wc: WCE for a completed FastReg WR
*
+ * Each flushed MR gets destroyed after the QP has drained.
*/
static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
{
struct ib_cqe *cqe = wc->wr_cqe;
- struct rpcrdma_frwr *frwr =
- container_of(cqe, struct rpcrdma_frwr, fr_cqe);
+ struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
- trace_xprtrdma_wc_fastreg(wc, &frwr->fr_cid);
- /* The MR will get recycled when the associated req is retransmitted */
+ trace_xprtrdma_wc_fastreg(wc, &mr->mr_cid);
rpcrdma_flush_disconnect(cq->cq_context, wc);
}
-static void frwr_cid_init(struct rpcrdma_ep *ep,
- struct rpcrdma_frwr *frwr)
-{
- struct rpc_rdma_cid *cid = &frwr->fr_cid;
-
- cid->ci_queue_id = ep->re_attr.send_cq->res.id;
- cid->ci_completion_id = frwr->fr_mr->res.id;
-}
-
/**
* frwr_send - post Send WRs containing the RPC Call message
* @r_xprt: controlling transport instance
*/
int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
+ struct ib_send_wr *post_wr, *send_wr = &req->rl_wr;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
- struct ib_send_wr *post_wr;
struct rpcrdma_mr *mr;
+ unsigned int num_wrs;
- post_wr = &req->rl_wr;
+ num_wrs = 1;
+ post_wr = send_wr;
list_for_each_entry(mr, &req->rl_registered, mr_list) {
- struct rpcrdma_frwr *frwr;
-
- frwr = &mr->frwr;
-
- frwr->fr_cqe.done = frwr_wc_fastreg;
- frwr_cid_init(ep, frwr);
- frwr->fr_regwr.wr.next = post_wr;
- frwr->fr_regwr.wr.wr_cqe = &frwr->fr_cqe;
- frwr->fr_regwr.wr.num_sge = 0;
- frwr->fr_regwr.wr.opcode = IB_WR_REG_MR;
- frwr->fr_regwr.wr.send_flags = 0;
+ trace_xprtrdma_mr_fastreg(mr);
+
+ mr->mr_cqe.done = frwr_wc_fastreg;
+ mr->mr_regwr.wr.next = post_wr;
+ mr->mr_regwr.wr.wr_cqe = &mr->mr_cqe;
+ mr->mr_regwr.wr.num_sge = 0;
+ mr->mr_regwr.wr.opcode = IB_WR_REG_MR;
+ mr->mr_regwr.wr.send_flags = 0;
+ post_wr = &mr->mr_regwr.wr;
+ ++num_wrs;
+ }
- post_wr = &frwr->fr_regwr.wr;
+ if ((kref_read(&req->rl_kref) > 1) || num_wrs > ep->re_send_count) {
+ send_wr->send_flags |= IB_SEND_SIGNALED;
+ ep->re_send_count = min_t(unsigned int, ep->re_send_batch,
+ num_wrs - ep->re_send_count);
+ } else {
+ send_wr->send_flags &= ~IB_SEND_SIGNALED;
+ ep->re_send_count -= num_wrs;
}
+ trace_xprtrdma_post_send(req);
return ib_post_send(ep->re_id->qp, post_wr, NULL);
}
list_for_each_entry(mr, mrs, mr_list)
if (mr->mr_handle == rep->rr_inv_rkey) {
list_del_init(&mr->mr_list);
+ trace_xprtrdma_mr_reminv(mr);
frwr_mr_put(mr);
break; /* only one invalidated MR per RPC */
}
static void frwr_mr_done(struct ib_wc *wc, struct rpcrdma_mr *mr)
{
- if (wc->status != IB_WC_SUCCESS)
- frwr_mr_recycle(mr);
- else
+ if (likely(wc->status == IB_WC_SUCCESS))
frwr_mr_put(mr);
}
static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
{
struct ib_cqe *cqe = wc->wr_cqe;
- struct rpcrdma_frwr *frwr =
- container_of(cqe, struct rpcrdma_frwr, fr_cqe);
- struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
+ struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
- trace_xprtrdma_wc_li(wc, &frwr->fr_cid);
+ trace_xprtrdma_wc_li(wc, &mr->mr_cid);
frwr_mr_done(wc, mr);
rpcrdma_flush_disconnect(cq->cq_context, wc);
static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
{
struct ib_cqe *cqe = wc->wr_cqe;
- struct rpcrdma_frwr *frwr =
- container_of(cqe, struct rpcrdma_frwr, fr_cqe);
- struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
+ struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
- trace_xprtrdma_wc_li_wake(wc, &frwr->fr_cid);
+ trace_xprtrdma_wc_li_wake(wc, &mr->mr_cid);
frwr_mr_done(wc, mr);
- complete(&frwr->fr_linv_done);
+ complete(&mr->mr_linv_done);
rpcrdma_flush_disconnect(cq->cq_context, wc);
}
struct ib_send_wr *first, **prev, *last;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
const struct ib_send_wr *bad_wr;
- struct rpcrdma_frwr *frwr;
struct rpcrdma_mr *mr;
int rc;
* Chain the LOCAL_INV Work Requests and post them with
* a single ib_post_send() call.
*/
- frwr = NULL;
prev = &first;
while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
trace_xprtrdma_mr_localinv(mr);
r_xprt->rx_stats.local_inv_needed++;
- frwr = &mr->frwr;
- frwr->fr_cqe.done = frwr_wc_localinv;
- frwr_cid_init(ep, frwr);
- last = &frwr->fr_invwr;
+ last = &mr->mr_invwr;
last->next = NULL;
- last->wr_cqe = &frwr->fr_cqe;
+ last->wr_cqe = &mr->mr_cqe;
last->sg_list = NULL;
last->num_sge = 0;
last->opcode = IB_WR_LOCAL_INV;
last->send_flags = IB_SEND_SIGNALED;
last->ex.invalidate_rkey = mr->mr_handle;
+ last->wr_cqe->done = frwr_wc_localinv;
+
*prev = last;
prev = &last->next;
}
+ mr = container_of(last, struct rpcrdma_mr, mr_invwr);
/* Strong send queue ordering guarantees that when the
* last WR in the chain completes, all WRs in the chain
* are complete.
*/
- frwr->fr_cqe.done = frwr_wc_localinv_wake;
- reinit_completion(&frwr->fr_linv_done);
+ last->wr_cqe->done = frwr_wc_localinv_wake;
+ reinit_completion(&mr->mr_linv_done);
/* Transport disconnect drains the receive CQ before it
* replaces the QP. The RPC reply handler won't call us
* not happen, so don't wait in that case.
*/
if (bad_wr != first)
- wait_for_completion(&frwr->fr_linv_done);
+ wait_for_completion(&mr->mr_linv_done);
if (!rc)
return;
- /* Recycle MRs in the LOCAL_INV chain that did not get posted.
- */
+ /* On error, the MRs get destroyed once the QP has drained. */
trace_xprtrdma_post_linv_err(req, rc);
- while (bad_wr) {
- frwr = container_of(bad_wr, struct rpcrdma_frwr,
- fr_invwr);
- mr = container_of(frwr, struct rpcrdma_mr, frwr);
- bad_wr = bad_wr->next;
-
- list_del_init(&mr->mr_list);
- frwr_mr_recycle(mr);
- }
}
/**
static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
{
struct ib_cqe *cqe = wc->wr_cqe;
- struct rpcrdma_frwr *frwr =
- container_of(cqe, struct rpcrdma_frwr, fr_cqe);
- struct rpcrdma_mr *mr = container_of(frwr, struct rpcrdma_mr, frwr);
- struct rpcrdma_rep *rep = mr->mr_req->rl_reply;
+ struct rpcrdma_mr *mr = container_of(cqe, struct rpcrdma_mr, mr_cqe);
+ struct rpcrdma_rep *rep;
/* WARNING: Only wr_cqe and status are reliable at this point */
- trace_xprtrdma_wc_li_done(wc, &frwr->fr_cid);
- frwr_mr_done(wc, mr);
+ trace_xprtrdma_wc_li_done(wc, &mr->mr_cid);
- /* Ensure @rep is generated before frwr_mr_done */
+ /* Ensure that @rep is generated before the MR is released */
+ rep = mr->mr_req->rl_reply;
smp_rmb();
- rpcrdma_complete_rqst(rep);
- rpcrdma_flush_disconnect(cq->cq_context, wc);
+ if (wc->status != IB_WC_SUCCESS) {
+ if (rep)
+ rpcrdma_unpin_rqst(rep);
+ rpcrdma_flush_disconnect(cq->cq_context, wc);
+ return;
+ }
+ frwr_mr_put(mr);
+ rpcrdma_complete_rqst(rep);
}
/**
{
struct ib_send_wr *first, *last, **prev;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
- const struct ib_send_wr *bad_wr;
- struct rpcrdma_frwr *frwr;
struct rpcrdma_mr *mr;
int rc;
/* Chain the LOCAL_INV Work Requests and post them with
* a single ib_post_send() call.
*/
- frwr = NULL;
prev = &first;
while ((mr = rpcrdma_mr_pop(&req->rl_registered))) {
trace_xprtrdma_mr_localinv(mr);
r_xprt->rx_stats.local_inv_needed++;
- frwr = &mr->frwr;
- frwr->fr_cqe.done = frwr_wc_localinv;
- frwr_cid_init(ep, frwr);
- last = &frwr->fr_invwr;
+ last = &mr->mr_invwr;
last->next = NULL;
- last->wr_cqe = &frwr->fr_cqe;
+ last->wr_cqe = &mr->mr_cqe;
last->sg_list = NULL;
last->num_sge = 0;
last->opcode = IB_WR_LOCAL_INV;
last->send_flags = IB_SEND_SIGNALED;
last->ex.invalidate_rkey = mr->mr_handle;
+ last->wr_cqe->done = frwr_wc_localinv;
+
*prev = last;
prev = &last->next;
}
* are complete. The last completion will wake up the
* RPC waiter.
*/
- frwr->fr_cqe.done = frwr_wc_localinv_done;
+ last->wr_cqe->done = frwr_wc_localinv_done;
/* Transport disconnect drains the receive CQ before it
* replaces the QP. The RPC reply handler won't call us
* unless re_id->qp is a valid pointer.
*/
- bad_wr = NULL;
- rc = ib_post_send(ep->re_id->qp, first, &bad_wr);
+ rc = ib_post_send(ep->re_id->qp, first, NULL);
if (!rc)
return;
- /* Recycle MRs in the LOCAL_INV chain that did not get posted.
- */
+ /* On error, the MRs get destroyed once the QP has drained. */
trace_xprtrdma_post_linv_err(req, rc);
- while (bad_wr) {
- frwr = container_of(bad_wr, struct rpcrdma_frwr, fr_invwr);
- mr = container_of(frwr, struct rpcrdma_mr, frwr);
- bad_wr = bad_wr->next;
-
- frwr_mr_recycle(mr);
- }
/* The final LOCAL_INV WR in the chain is supposed to
- * do the wake. If it was never posted, the wake will
- * not happen, so wake here in that case.
+ * do the wake. If it was never posted, the wake does
+ * not happen. Unpin the rqst in preparation for its
+ * retransmission.
*/
- rpcrdma_complete_rqst(req->rl_reply);
+ rpcrdma_unpin_rqst(req->rl_reply);
}
return -EIO;
}
-/* Perform XID lookup, reconstruction of the RPC reply, and
- * RPC completion while holding the transport lock to ensure
- * the rep, rqst, and rq_task pointers remain stable.
+/**
+ * rpcrdma_unpin_rqst - Release rqst without completing it
+ * @rep: RPC/RDMA Receive context
+ *
+ * This is done when a connection is lost so that a Reply
+ * can be dropped and its matching Call can be subsequently
+ * retransmitted on a new connection.
+ */
+void rpcrdma_unpin_rqst(struct rpcrdma_rep *rep)
+{
+ struct rpc_xprt *xprt = &rep->rr_rxprt->rx_xprt;
+ struct rpc_rqst *rqst = rep->rr_rqst;
+ struct rpcrdma_req *req = rpcr_to_rdmar(rqst);
+
+ req->rl_reply = NULL;
+ rep->rr_rqst = NULL;
+
+ spin_lock(&xprt->queue_lock);
+ xprt_unpin_rqst(rqst);
+ spin_unlock(&xprt->queue_lock);
+}
+
+/**
+ * rpcrdma_complete_rqst - Pass completed rqst back to RPC
+ * @rep: RPC/RDMA Receive context
+ *
+ * Reconstruct the RPC reply and complete the transaction
+ * while @rqst is still pinned to ensure the rep, rqst, and
+ * rq_task pointers remain stable.
*/
void rpcrdma_complete_rqst(struct rpcrdma_rep *rep)
{
credits = 1; /* don't deadlock */
else if (credits > r_xprt->rx_ep->re_max_requests)
credits = r_xprt->rx_ep->re_max_requests;
+ rpcrdma_post_recvs(r_xprt, credits + (buf->rb_bc_srv_max_requests << 1),
+ false);
if (buf->rb_credits != credits)
rpcrdma_update_cwnd(r_xprt, credits);
- rpcrdma_post_recvs(r_xprt, false);
req = rpcr_to_rdmar(rqst);
if (unlikely(req->rl_reply))
- rpcrdma_recv_buffer_put(req->rl_reply);
+ rpcrdma_rep_put(buf, req->rl_reply);
req->rl_reply = rep;
rep->rr_rqst = rqst;
trace_xprtrdma_reply_short_err(rep);
out:
- rpcrdma_recv_buffer_put(rep);
+ rpcrdma_rep_put(buf, rep);
}
struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt;
__be32 *rdma_argp = rctxt->rc_recv_buf;
struct svc_rdma_send_ctxt *sctxt;
+ unsigned int rc_size;
__be32 *p;
int ret;
ret = -ENOTCONN;
if (svc_xprt_is_dead(xprt))
- goto err0;
+ goto drop_connection;
ret = -ENOMEM;
sctxt = svc_rdma_send_ctxt_get(rdma);
if (!sctxt)
- goto err0;
+ goto drop_connection;
+ ret = -EMSGSIZE;
p = xdr_reserve_space(&sctxt->sc_stream,
rpcrdma_fixed_maxsz * sizeof(*p));
if (!p)
- goto err0;
+ goto put_ctxt;
ret = svc_rdma_send_reply_chunk(rdma, rctxt, &rqstp->rq_res);
if (ret < 0)
- goto err2;
+ goto reply_chunk;
+ rc_size = ret;
*p++ = *rdma_argp;
*p++ = *(rdma_argp + 1);
*p++ = rdma->sc_fc_credits;
*p = pcl_is_empty(&rctxt->rc_reply_pcl) ? rdma_msg : rdma_nomsg;
- if (svc_rdma_encode_read_list(sctxt) < 0)
- goto err0;
- if (svc_rdma_encode_write_list(rctxt, sctxt) < 0)
- goto err0;
- if (svc_rdma_encode_reply_chunk(rctxt, sctxt, ret) < 0)
- goto err0;
+ ret = svc_rdma_encode_read_list(sctxt);
+ if (ret < 0)
+ goto put_ctxt;
+ ret = svc_rdma_encode_write_list(rctxt, sctxt);
+ if (ret < 0)
+ goto put_ctxt;
+ ret = svc_rdma_encode_reply_chunk(rctxt, sctxt, rc_size);
+ if (ret < 0)
+ goto put_ctxt;
ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp);
if (ret < 0)
- goto err1;
+ goto put_ctxt;
/* Prevent svc_xprt_release() from releasing the page backing
* rq_res.head[0].iov_base. It's no longer being accessed by
rqstp->rq_respages++;
return 0;
- err2:
+reply_chunk:
if (ret != -E2BIG && ret != -EINVAL)
- goto err1;
+ goto put_ctxt;
svc_rdma_send_error_msg(rdma, sctxt, rctxt, ret);
return 0;
- err1:
+put_ctxt:
svc_rdma_send_ctxt_put(rdma, sctxt);
- err0:
+drop_connection:
trace_svcrdma_send_err(rqstp, ret);
svc_xprt_deferred_close(&rdma->sc_xprt);
return -ENOTCONN;
* xprt_rdma_inject_disconnect - inject a connection fault
* @xprt: transport context
*
- * If @xprt is connected, disconnect it to simulate spurious connection
- * loss.
+ * If @xprt is connected, disconnect it to simulate spurious
+ * connection loss. Caller must hold @xprt's send lock to
+ * ensure that data structures and hardware resources are
+ * stable during the rdma_disconnect() call.
*/
static void
xprt_rdma_inject_disconnect(struct rpc_xprt *xprt)
struct rpcrdma_ep *ep = r_xprt->rx_ep;
struct rdma_cm_id *id = ep->re_id;
+ /* Wait for rpcrdma_post_recvs() to leave its critical
+ * section.
+ */
+ if (atomic_inc_return(&ep->re_receiving) > 1)
+ wait_for_completion(&ep->re_done);
+
/* Flush Receives, then wait for deferred Reply work
* to complete.
*/
rpcrdma_ep_put(ep);
}
-/**
- * rpcrdma_qp_event_handler - Handle one QP event (error notification)
- * @event: details of the event
- * @context: ep that owns QP where event occurred
- *
- * Called from the RDMA provider (device driver) possibly in an interrupt
- * context. The QP is always destroyed before the ID, so the ID will be
- * reliably available when this handler is invoked.
- */
-static void rpcrdma_qp_event_handler(struct ib_event *event, void *context)
-{
- struct rpcrdma_ep *ep = context;
-
- trace_xprtrdma_qp_event(ep, event);
-}
-
/* Ensure xprt_force_disconnect() is invoked exactly once when a
* connection is closed or lost. (The important thing is it needs
* to be invoked "at least" once).
out_flushed:
rpcrdma_flush_disconnect(r_xprt, wc);
- rpcrdma_rep_destroy(rep);
+ rpcrdma_rep_put(&r_xprt->rx_buf, rep);
}
static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep,
__module_get(THIS_MODULE);
device = id->device;
ep->re_id = id;
+ reinit_completion(&ep->re_done);
ep->re_max_requests = r_xprt->rx_xprt.max_reqs;
ep->re_inline_send = xprt_rdma_max_inline_write;
r_xprt->rx_buf.rb_max_requests = cpu_to_be32(ep->re_max_requests);
- ep->re_attr.event_handler = rpcrdma_qp_event_handler;
- ep->re_attr.qp_context = ep;
ep->re_attr.srq = NULL;
ep->re_attr.cap.max_inline_data = 0;
ep->re_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
* outstanding Receives.
*/
rpcrdma_ep_get(ep);
- rpcrdma_post_recvs(r_xprt, true);
+ rpcrdma_post_recvs(r_xprt, 1, true);
rc = rdma_connect(ep->re_id, &ep->re_remote_cma);
if (rc)
rpcrdma_req_reset(req);
}
-/* No locking needed here. This function is called only by the
- * Receive completion handler.
- */
static noinline
struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt,
bool temp)
{
+ struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_rep *rep;
rep = kzalloc(sizeof(*rep), GFP_KERNEL);
rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov;
rep->rr_recv_wr.num_sge = 1;
rep->rr_temp = temp;
- list_add(&rep->rr_all, &r_xprt->rx_buf.rb_all_reps);
+
+ spin_lock(&buf->rb_lock);
+ list_add(&rep->rr_all, &buf->rb_all_reps);
+ spin_unlock(&buf->rb_lock);
return rep;
out_free_regbuf:
return NULL;
}
-/* No locking needed here. This function is invoked only by the
- * Receive completion handler, or during transport shutdown.
- */
-static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
+static void rpcrdma_rep_free(struct rpcrdma_rep *rep)
{
- list_del(&rep->rr_all);
rpcrdma_regbuf_free(rep->rr_rdmabuf);
kfree(rep);
}
+static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep)
+{
+ struct rpcrdma_buffer *buf = &rep->rr_rxprt->rx_buf;
+
+ spin_lock(&buf->rb_lock);
+ list_del(&rep->rr_all);
+ spin_unlock(&buf->rb_lock);
+
+ rpcrdma_rep_free(rep);
+}
+
static struct rpcrdma_rep *rpcrdma_rep_get_locked(struct rpcrdma_buffer *buf)
{
struct llist_node *node;
return llist_entry(node, struct rpcrdma_rep, rr_node);
}
-static void rpcrdma_rep_put(struct rpcrdma_buffer *buf,
- struct rpcrdma_rep *rep)
+/**
+ * rpcrdma_rep_put - Release rpcrdma_rep back to free list
+ * @buf: buffer pool
+ * @rep: rep to release
+ *
+ */
+void rpcrdma_rep_put(struct rpcrdma_buffer *buf, struct rpcrdma_rep *rep)
{
llist_add(&rep->rr_node, &buf->rb_free_reps);
}
+/* Caller must ensure the QP is quiescent (RQ is drained) before
+ * invoking this function, to guarantee rb_all_reps is not
+ * changing.
+ */
static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
list_for_each_entry(rep, &buf->rb_all_reps, rr_all) {
rpcrdma_regbuf_dma_unmap(rep->rr_rdmabuf);
- rep->rr_temp = true;
+ rep->rr_temp = true; /* Mark this rep for destruction */
}
}
{
struct rpcrdma_rep *rep;
- while ((rep = rpcrdma_rep_get_locked(buf)) != NULL)
- rpcrdma_rep_destroy(rep);
+ spin_lock(&buf->rb_lock);
+ while ((rep = list_first_entry_or_null(&buf->rb_all_reps,
+ struct rpcrdma_rep,
+ rr_all)) != NULL) {
+ list_del(&rep->rr_all);
+ spin_unlock(&buf->rb_lock);
+
+ rpcrdma_rep_free(rep);
+
+ spin_lock(&buf->rb_lock);
+ }
+ spin_unlock(&buf->rb_lock);
}
/**
list_del(&mr->mr_all);
spin_unlock(&buf->rb_lock);
- frwr_release_mr(mr);
+ frwr_mr_release(mr);
}
rpcrdma_regbuf_free(req->rl_recvbuf);
list_del(&mr->mr_all);
spin_unlock(&buf->rb_lock);
- frwr_release_mr(mr);
+ frwr_mr_release(mr);
spin_lock(&buf->rb_lock);
}
spin_unlock(&buffers->rb_lock);
}
-/**
- * rpcrdma_recv_buffer_put - Release rpcrdma_rep back to free list
- * @rep: rep to release
- *
- * Used after error conditions.
- */
-void rpcrdma_recv_buffer_put(struct rpcrdma_rep *rep)
-{
- rpcrdma_rep_put(&rep->rr_rxprt->rx_buf, rep);
-}
-
/* Returns a pointer to a rpcrdma_regbuf object, or NULL.
*
* xprtrdma uses a regbuf for posting an outgoing RDMA SEND, or for
*/
int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req)
{
- struct ib_send_wr *send_wr = &req->rl_wr;
- struct rpcrdma_ep *ep = r_xprt->rx_ep;
- int rc;
-
- if (!ep->re_send_count || kref_read(&req->rl_kref) > 1) {
- send_wr->send_flags |= IB_SEND_SIGNALED;
- ep->re_send_count = ep->re_send_batch;
- } else {
- send_wr->send_flags &= ~IB_SEND_SIGNALED;
- --ep->re_send_count;
- }
-
- trace_xprtrdma_post_send(req);
- rc = frwr_send(r_xprt, req);
- if (rc)
+ if (frwr_send(r_xprt, req))
return -ENOTCONN;
return 0;
}
/**
* rpcrdma_post_recvs - Refill the Receive Queue
* @r_xprt: controlling transport instance
- * @temp: mark Receive buffers to be deleted after use
+ * @needed: current credit grant
+ * @temp: mark Receive buffers to be deleted after one use
*
*/
-void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp)
+void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp)
{
struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
struct rpcrdma_ep *ep = r_xprt->rx_ep;
struct ib_recv_wr *wr, *bad_wr;
struct rpcrdma_rep *rep;
- int needed, count, rc;
+ int count, rc;
rc = 0;
count = 0;
- needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1);
if (likely(ep->re_receive_count > needed))
goto out;
needed -= ep->re_receive_count;
if (!temp)
needed += RPCRDMA_MAX_RECV_BATCH;
+ if (atomic_inc_return(&ep->re_receiving) > 1)
+ goto out;
+
/* fast path: all needed reps can be found on the free list */
wr = NULL;
while (needed) {
rc = ib_post_recv(ep->re_id->qp, wr,
(const struct ib_recv_wr **)&bad_wr);
+ if (atomic_dec_return(&ep->re_receiving) > 0)
+ complete(&ep->re_done);
+
out:
trace_xprtrdma_post_recvs(r_xprt, count, rc);
if (rc) {
rep = container_of(wr, struct rpcrdma_rep, rr_recv_wr);
wr = wr->next;
- rpcrdma_recv_buffer_put(rep);
+ rpcrdma_rep_put(buf, rep);
--count;
}
}
unsigned int re_max_inline_recv;
int re_async_rc;
int re_connect_status;
+ atomic_t re_receiving;
atomic_t re_force_disconnect;
struct ib_qp_init_attr re_attr;
wait_queue_head_t re_connect_wait;
* An external memory region is any buffer or page that is registered
* on the fly (ie, not pre-registered).
*/
-struct rpcrdma_frwr {
- struct ib_mr *fr_mr;
- struct ib_cqe fr_cqe;
- struct rpc_rdma_cid fr_cid;
- struct completion fr_linv_done;
- union {
- struct ib_reg_wr fr_regwr;
- struct ib_send_wr fr_invwr;
- };
-};
-
struct rpcrdma_req;
struct rpcrdma_mr {
struct list_head mr_list;
struct rpcrdma_req *mr_req;
+
+ struct ib_mr *mr_ibmr;
struct ib_device *mr_device;
struct scatterlist *mr_sg;
int mr_nents;
enum dma_data_direction mr_dir;
- struct rpcrdma_frwr frwr;
+ struct ib_cqe mr_cqe;
+ struct completion mr_linv_done;
+ union {
+ struct ib_reg_wr mr_regwr;
+ struct ib_send_wr mr_invwr;
+ };
struct rpcrdma_xprt *mr_xprt;
u32 mr_handle;
u32 mr_length;
u64 mr_offset;
struct list_head mr_all;
+ struct rpc_rdma_cid mr_cid;
};
/*
void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt);
int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req);
-void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp);
+void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, int needed, bool temp);
/*
* Buffer calls - xprtrdma/verbs.c
struct rpcrdma_req *rpcrdma_buffer_get(struct rpcrdma_buffer *);
void rpcrdma_buffer_put(struct rpcrdma_buffer *buffers,
struct rpcrdma_req *req);
-void rpcrdma_recv_buffer_put(struct rpcrdma_rep *);
+void rpcrdma_rep_put(struct rpcrdma_buffer *buf, struct rpcrdma_rep *rep);
bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size,
gfp_t flags);
void frwr_reset(struct rpcrdma_req *req);
int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device);
int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr);
-void frwr_release_mr(struct rpcrdma_mr *mr);
+void frwr_mr_release(struct rpcrdma_mr *mr);
struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
struct rpcrdma_mr_seg *seg,
int nsegs, bool writing, __be32 xid,
void rpcrdma_set_max_header_sizes(struct rpcrdma_ep *ep);
void rpcrdma_reset_cwnd(struct rpcrdma_xprt *r_xprt);
void rpcrdma_complete_rqst(struct rpcrdma_rep *rep);
+void rpcrdma_unpin_rqst(struct rpcrdma_rep *rep);
void rpcrdma_reply_handler(struct rpcrdma_rep *rep);
static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len)
struct rpc_rqst *req;
ssize_t ret;
+ /* Is this transport associated with the backchannel? */
+ if (!xprt->bc_serv)
+ return -ESHUTDOWN;
+
/* Look up and lock the request corresponding to the given XID */
req = xprt_lookup_bc_request(xprt, transport->recv.xid);
if (!req) {
* to cope with writespace callbacks arriving _after_ we have
* called sendmsg(). */
req->rq_xtime = ktime_get();
+ tcp_sock_set_cork(transport->inet, true);
while (1) {
status = xprt_sock_sendmsg(transport->sock, &msg, xdr,
transport->xmit.offset, rm, &sent);
if (likely(req->rq_bytes_sent >= msglen)) {
req->rq_xmit_bytes_sent += transport->xmit.offset;
transport->xmit.offset = 0;
+ if (atomic_long_read(&xprt->xmit_queuelen) == 1)
+ tcp_sock_set_cork(transport->inet, false);
return 0;
}
}
xs_tcp_set_socket_timeouts(xprt, sock);
+ tcp_sock_set_nodelay(sk);
write_lock_bh(&sk->sk_callback_lock);
/* socket options */
sock_reset_flag(sk, SOCK_LINGER);
- tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
xprt_clear_connected(xprt);
bool old_request = false;
bool old_pkt_proto = false;
- err = 0;
-
/* Because we are in the listen state, we could be receiving a packet
* for ourself or any previous connection requests that we received.
* If it's the latter, we try to find a socket in our list of pending
static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
struct xdp_desc *desc)
{
- u64 chunk, chunk_end;
+ u64 chunk;
- chunk = xp_aligned_extract_addr(pool, desc->addr);
- chunk_end = xp_aligned_extract_addr(pool, desc->addr + desc->len);
- if (chunk != chunk_end)
+ if (desc->len > pool->chunk_size)
return false;
+ chunk = xp_aligned_extract_addr(pool, desc->addr);
if (chunk >= pool->addrs_cnt)
return false;
# SPDX-License-Identifier: GPL-2.0-only
-cfag12864b-example
+/cfag12864b-example
-binderfs_example
+# SPDX-License-Identifier: GPL-2.0
+/binderfs_example
// SPDX-License-Identifier: GPL-2.0-or-later
/*
- * vim: noexpandtab ts=8 sts=0 sw=8:
- *
* configfs_example_macros.c - This file is a demonstration module
* containing a number of configfs subsystems. It uses the helper
* macros defined by configfs.h
# SPDX-License-Identifier: GPL-2.0-only
-ucon
+/ucon
# SPDX-License-Identifier: GPL-2.0-only
-hid-example
+/hid-example
pr_info("<%s> pre_handler: p->addr = 0x%p, pc = 0x%lx, cpsr = 0x%lx\n",
p->symbol_name, p->addr, (long)regs->ARM_pc, (long)regs->ARM_cpsr);
#endif
+#ifdef CONFIG_RISCV
+ pr_info("<%s> pre_handler: p->addr = 0x%p, pc = 0x%lx, status = 0x%lx\n",
+ p->symbol_name, p->addr, regs->epc, regs->status);
+#endif
#ifdef CONFIG_S390
pr_info("<%s> pre_handler: p->addr, 0x%p, ip = 0x%lx, flags = 0x%lx\n",
p->symbol_name, p->addr, regs->psw.addr, regs->flags);
pr_info("<%s> post_handler: p->addr = 0x%p, cpsr = 0x%lx\n",
p->symbol_name, p->addr, (long)regs->ARM_cpsr);
#endif
+#ifdef CONFIG_RISCV
+ pr_info("<%s> post_handler: p->addr = 0x%p, status = 0x%lx\n",
+ p->symbol_name, p->addr, regs->status);
+#endif
#ifdef CONFIG_S390
pr_info("<%s> pre_handler: p->addr, 0x%p, flags = 0x%lx\n",
p->symbol_name, p->addr, regs->flags);
# SPDX-License-Identifier: GPL-2.0-only
-mei-amt-version
+/mei-amt-version
# SPDX-License-Identifier: GPL-2.0
-ne_ioctl_sample
+/ne_ioctl_sample
# SPDX-License-Identifier: GPL-2.0-only
-pidfd-metadata
+/pidfd-metadata
# SPDX-License-Identifier: GPL-2.0-only
-bpf-direct
-bpf-fancy
-dropper
-user-trap
+/bpf-direct
+/bpf-fancy
+/dropper
+/user-trap
# SPDX-License-Identifier: GPL-2.0-only
-hpet_example
+/hpet_example
struct device *dev = mdev_dev(mdev);
struct mdev_state *mdev_state;
- if (!type)
- type = &mbochs_types[0];
if (type->mbytes + mbochs_used_mbytes > max_mbytes)
return -ENOMEM;
&mdpy_types[mtype_get_type_group_id(mtype)];
return sprintf(buf, "virtual display, %dx%d framebuffer\n",
- type ? type->width : 0,
- type ? type->height : 0);
+ type->width, type->height);
}
static MDEV_TYPE_ATTR_RO(description);
# SPDX-License-Identifier: GPL-2.0-only
-test-fsmount
-test-statx
+/test-fsmount
+/test-statx
-watch_test
+# SPDX-License-Identifier: GPL-2.0-only
+/watch_test
# SPDX-License-Identifier: GPL-2.0-only
-watchdog-simple
+/watchdog-simple
# SPDX-License-Identifier: GPL-2.0-only
-bin2c
-kallsyms
-unifdef
-recordmcount
-sorttable
-asn1_compiler
-extract-cert
-sign-file
-insert-sys-cert
+/asn1_compiler
+/bin2c
+/extract-cert
+/insert-sys-cert
+/kallsyms
/module.lds
+/recordmcount
+/sign-file
+/sorttable
+/unifdef
targets += $(filter-out $(subdir-builtin), $(real-obj-y))
targets += $(filter-out $(subdir-modorder), $(real-obj-m))
-targets += $(lib-y) $(always-y) $(MAKECMDGOALS)
+targets += $(real-dtb-y) $(lib-y) $(always-y) $(MAKECMDGOALS)
# Linker scripts preprocessor (.lds.S -> .lds)
# ---------------------------------------------------------------------------
obj-y := $(filter-out %/, $(obj-y))
endif
-# Expand $(foo-objs) $(foo-y) by calling $(call suffix-search,foo.o,-objs -y)
-suffix-search = $(strip $(foreach s, $2, $($(1:.o=$s))))
+# Expand $(foo-objs) $(foo-y) etc. by replacing their individuals
+suffix-search = $(strip $(foreach s, $3, $($(1:%$(strip $2)=%$s))))
+# List composite targets that are constructed by combining other targets
+multi-search = $(sort $(foreach m, $1, $(if $(call suffix-search, $m, $2, $3 -), $m)))
+# List primitive targets that are compiled from source files
+real-search = $(foreach m, $1, $(if $(call suffix-search, $m, $2, $3 -), $(call suffix-search, $m, $2, $3), $m))
+
# If $(foo-objs), $(foo-y), $(foo-m), or $(foo-) exists, foo.o is a composite object
-multi-search = $(sort $(foreach m, $1, $(if $(call suffix-search, $m, $2 -), $m)))
-multi-obj-y := $(call multi-search,$(obj-y),-objs -y)
-multi-obj-m := $(call multi-search,$(obj-m),-objs -y -m)
+multi-obj-y := $(call multi-search, $(obj-y), .o, -objs -y)
+multi-obj-m := $(call multi-search, $(obj-m), .o, -objs -y -m)
multi-obj-ym := $(multi-obj-y) $(multi-obj-m)
# Replace multi-part objects by their individual parts,
# including built-in.a from subdirectories
-real-search = $(foreach m, $1, $(if $(call suffix-search, $m, $2 -), $(call suffix-search, $m, $2), $m))
-real-obj-y := $(call real-search, $(obj-y),-objs -y)
-real-obj-m := $(call real-search, $(obj-m),-objs -y -m)
+real-obj-y := $(call real-search, $(obj-y), .o, -objs -y)
+real-obj-m := $(call real-search, $(obj-m), .o, -objs -y -m)
always-y += $(always-m)
# If CONFIG_OF_ALL_DTBS is enabled, all DT blobs are built
dtb-$(CONFIG_OF_ALL_DTBS) += $(dtb-)
-# List all dtbs to be generated by fdtoverlay
-overlay-y := $(foreach m,$(dtb-y), $(if $(strip $($(m:.dtb=-dtbs))),$(m),))
-
-# Generate symbols for the base files so overlays can be applied to them.
-$(foreach m,$(overlay-y), $(eval DTC_FLAGS_$(basename $(firstword $($(m:.dtb=-dtbs)))) += -@))
-
-# Add base dtb and overlay dtbo
-dtb-y += $(foreach m,$(overlay-y), $($(m:.dtb=-dtbs)))
+# Composite DTB (i.e. DTB constructed by overlay)
+multi-dtb-y := $(call multi-search, $(dtb-y), .dtb, -dtbs)
+# Primitive DTB compiled from *.dts
+real-dtb-y := $(call real-search, $(dtb-y), .dtb, -dtbs)
+# Base DTB that overlay is applied onto (each first word of $(*-dtbs) expansion)
+base-dtb-y := $(foreach m, $(multi-dtb-y), $(firstword $(call suffix-search, $m, .dtb, -dtbs)))
always-y += $(dtb-y)
ifneq ($(CHECK_DTBS),)
-# Don't run schema checks for dtbs created by fdtoverlay as they don't
-# have corresponding dts files.
-dt-yaml-y := $(filter-out $(overlay-y),$(dtb-y))
-
-always-y += $(patsubst %.dtb,%.dt.yaml, $(dt-yaml-y))
-always-y += $(patsubst %.dtbo,%.dt.yaml, $(dt-yaml-y))
+always-y += $(patsubst %.dtb,%.dt.yaml, $(real-dtb-y))
+always-y += $(patsubst %.dtbo,%.dt.yaml, $(real-dtb-y))
endif
# Add subdir path
real-obj-y := $(addprefix $(obj)/,$(real-obj-y))
real-obj-m := $(addprefix $(obj)/,$(real-obj-m))
multi-obj-m := $(addprefix $(obj)/, $(multi-obj-m))
+multi-dtb-y := $(addprefix $(obj)/, $(multi-dtb-y))
+real-dtb-y := $(addprefix $(obj)/, $(real-dtb-y))
subdir-ym := $(addprefix $(obj)/,$(subdir-ym))
# Finds the multi-part object the current object will be linked into.
# If the object belongs to two or more multi-part objects, list them all.
modname-multi = $(sort $(foreach m,$(multi-obj-ym),\
- $(if $(filter $*.o, $($(m:.o=-objs)) $($(m:.o=-y)) $($(m:.o=-m))),$(m:.o=))))
+ $(if $(filter $*.o, $(call suffix-search, $m, .o, -objs -y -m)),$(m:.o=))))
__modname = $(if $(modname-multi),$(modname-multi),$(basetarget))
# Shipped files
# ===========================================================================
+# 'cp' preserves permissions. If you use it to copy a file in read-only srctree,
+# the copy would be read-only as well, leading to an error when executing the
+# rule next time. Use 'cat' instead in order to generate a writable file.
quiet_cmd_shipped = SHIPPED $@
cmd_shipped = cat $< > $@
DTC_FLAGS += $(DTC_FLAGS_$(basetarget))
+# Set -@ if the target is a base DTB that overlay is applied onto
+DTC_FLAGS += $(if $(filter $(patsubst $(obj)/%,%,$@), $(base-dtb-y)), -@)
+
# Generate an assembly file to wrap the output of the device tree compiler
quiet_cmd_dt_S_dtb= DTB $@
cmd_dt_S_dtb= \
$(obj)/%.dtbo: $(src)/%.dts $(DTC) FORCE
$(call if_changed_dep,dtc)
-overlay-y := $(addprefix $(obj)/, $(overlay-y))
-
quiet_cmd_fdtoverlay = DTOVL $@
cmd_fdtoverlay = $(objtree)/scripts/dtc/fdtoverlay -o $@ -i $(real-prereqs)
-$(overlay-y): FORCE
+$(multi-dtb-y): FORCE
$(call if_changed,fdtoverlay)
-$(call multi_depend, $(overlay-y), .dtb, -dtbs)
+$(call multi_depend, $(multi-dtb-y), .dtb, -dtbs)
DT_CHECKER ?= dt-validate
DT_CHECKER_FLAGS ?= $(if $(DT_SCHEMA_FILES),,-m)
# SPDX-License-Identifier: GPL-2.0-only
-fixdep
+/fixdep
next if ($arg =~ /\.\.\./);
next if ($arg =~ /^type$/i);
my $tmp_stmt = $define_stmt;
- $tmp_stmt =~ s/\b(sizeof|typeof|__typeof__|__builtin\w+|typecheck\s*\(\s*$Type\s*,|\#+)\s*\(*\s*$arg\s*\)*\b//g;
+ $tmp_stmt =~ s/\b(__must_be_array|offsetof|sizeof|sizeof_field|__stringify|typeof|__typeof__|__builtin\w+|typecheck\s*\(\s*$Type\s*,|\#+)\s*\(*\s*$arg\s*\)*\b//g;
$tmp_stmt =~ s/\#+\s*$arg\b//g;
$tmp_stmt =~ s/\b$arg\s*\#\#//g;
my $use_cnt = () = $tmp_stmt =~ /\b$arg\b/g;
}
# check for alloc argument mismatch
- if ($line =~ /\b(kcalloc|kmalloc_array)\s*\(\s*sizeof\b/) {
+ if ($line =~ /\b((?:devm_)?(?:kcalloc|kmalloc_array))\s*\(\s*sizeof\b/) {
WARN("ALLOC_ARRAY_ARGS",
"$1 uses number as first arg, sizeof is generally wrong\n" . $herecurr);
}
"Using $1 should generally have parentheses around the comparison\n" . $herecurr);
}
+# return sysfs_emit(foo, fmt, ...) fmt without newline
+ if ($line =~ /\breturn\s+sysfs_emit\s*\(\s*$FuncArg\s*,\s*($String)/ &&
+ substr($rawline, $-[6], $+[6] - $-[6]) !~ /\\n"$/) {
+ my $offset = $+[6] - 1;
+ if (WARN("SYSFS_EMIT",
+ "return sysfs_emit(...) formats should include a terminating newline\n" . $herecurr) &&
+ $fix) {
+ substr($fixed[$fixlinenr], $offset, 0) = '\\n';
+ }
+ }
+
# nested likely/unlikely calls
if ($line =~ /\b(?:(?:un)?likely)\s*\(\s*!?\s*(IS_ERR(?:_OR_NULL|_VALUE)?|WARN)/) {
WARN("LIKELY_MISUSE",
# SPDX-License-Identifier: GPL-2.0-only
-dtc
-fdtoverlay
+/dtc
+/fdtoverlay
# SPDX-License-Identifier: GPL-2.0-only
-randomize_layout_seed.h
+/randomize_layout_seed.h
from linux import tasks, utils
+task_type = utils.CachedType("struct task_struct")
+
+
MAX_CPUS = 4096
PerCpu()
+def get_current_task(cpu):
+ task_ptr_type = task_type.get_type().pointer()
+
+ if utils.is_target_arch("x86"):
+ var_ptr = gdb.parse_and_eval("¤t_task")
+ return per_cpu(var_ptr, cpu).dereference()
+ elif utils.is_target_arch("aarch64"):
+ current_task_addr = gdb.parse_and_eval("$SP_EL0")
+ if((current_task_addr >> 63) != 0):
+ current_task = current_task_addr.cast(task_ptr_type)
+ return current_task.dereference()
+ else:
+ raise gdb.GdbError("Sorry, obtaining the current task is not allowed "
+ "while running in userspace(EL0)")
+ else:
+ raise gdb.GdbError("Sorry, obtaining the current task is not yet "
+ "supported with this arch")
class LxCurrentFunc(gdb.Function):
"""Return current task.
super(LxCurrentFunc, self).__init__("lx_current")
def invoke(self, cpu=-1):
- var_ptr = gdb.parse_and_eval("¤t_task")
- return per_cpu(var_ptr, cpu).dereference()
+ return get_current_task(cpu)
LxCurrentFunc()
saved_state['breakpoint'].enabled = saved_state['enabled']
def invoke(self, arg, from_tty):
- self.module_paths = [os.path.expanduser(p) for p in arg.split()]
+ self.module_paths = [os.path.abspath(os.path.expanduser(p))
+ for p in arg.split()]
self.module_paths.append(os.getcwd())
# enforce update
# SPDX-License-Identifier: GPL-2.0-only
-genksyms
+/genksyms
endif
-# -I needed for generated C source (shipped source)
+# -I needed for generated C source to include headers in source tree
HOSTCFLAGS_parse.tab.o := -I $(srctree)/$(src)
HOSTCFLAGS_lex.lex.o := -I $(srctree)/$(src)
$prototype =~ s/^noinline +//;
$prototype =~ s/__init +//;
$prototype =~ s/__init_or_module +//;
+ $prototype =~ s/__deprecated +//;
$prototype =~ s/__flatten +//;
$prototype =~ s/__meminit +//;
$prototype =~ s/__must_check +//;
rm -f .vmlinux.d
}
-on_exit()
-{
- if [ $? -ne 0 ]; then
- cleanup
- fi
-}
-trap on_exit EXIT
-
-on_signals()
-{
- exit 1
-}
-trap on_signals HUP INT QUIT TERM
-
# Use "make V=1" to debug this script
case "${KBUILD_VERBOSE}" in
*1*)
# SPDX-License-Identifier: GPL-2.0-only
-elfconfig.h
-mk_elfconfig
-modpost
-devicetable-offsets.h
+/devicetable-offsets.h
+/elfconfig.h
+/mk_elfconfig
+/modpost
for source_file in $mod_source_files; do
sed '/MODULE_IMPORT_NS/Q' $source_file > ${source_file}.tmp
offset=$(wc -l ${source_file}.tmp | awk '{print $1;}')
- cat $source_file | grep MODULE_IMPORT_NS | LANG=C sort -u >> ${source_file}.tmp
+ cat $source_file | grep MODULE_IMPORT_NS | LC_ALL=C sort -u >> ${source_file}.tmp
tail -n +$((offset +1)) ${source_file} | grep -v MODULE_IMPORT_NS >> ${source_file}.tmp
if ! diff -q ${source_file} ${source_file}.tmp; then
mv ${source_file}.tmp ${source_file}
fi
done
;;
+ riscv)
+ for i in Image.bz2 Image.gz Image; do
+ if [ -f "${objtree}/arch/riscv/boot/${i}" ] ; then
+ cp -v -- "${objtree}/arch/riscv/boot/${i}" "${tmpdir}/boot/vmlinux-${KERNELRELEASE}"
+ break
+ fi
+ done
+ ;;
*)
[ -f "${KBUILD_IMAGE}" ] && cp -v -- "${KBUILD_IMAGE}" "${tmpdir}/boot/vmlinux-kbuild-${KERNELRELEASE}"
echo "" >&2
$mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$";
} elsif ($arch eq "riscv") {
$function_regex = "^([0-9a-fA-F]+)\\s+<([^.0-9][0-9a-zA-Z_\\.]+)>:";
- $mcount_regex = "^\\s*([0-9a-fA-F]+):\\sR_RISCV_CALL\\s_mcount\$";
+ $mcount_regex = "^\\s*([0-9a-fA-F]+):\\sR_RISCV_CALL(_PLT)?\\s_?mcount\$";
$type = ".quad";
$alignment = 2;
} elsif ($arch eq "nds32") {
#
# Step 2: find the sections and mcount call sites
#
-open(IN, "LANG=C $objdump -hdr $inputfile|") || die "error running $objdump";
+open(IN, "LC_ALL=C $objdump -hdr $inputfile|") || die "error running $objdump";
my $text;
--- /dev/null
+#!/bin/sh
+
+set -e
+
+# When you move, remove or rename generated files, you probably also update
+# .gitignore and cleaning rules in the Makefile. This is the right thing
+# to do. However, people usually do 'git pull', 'git bisect', etc. without
+# running 'make clean'. Then, the stale generated files are left over, often
+# causing build issues.
+#
+# Also, 'git status' shows such stale build artifacts as untracked files.
+# What is worse, some people send a wrong patch to get them back to .gitignore
+# without checking the commit history.
+#
+# So, when you (re)move generated files, please move the cleaning rules from
+# the Makefile to this script. This is run before Kbuild starts building
+# anything, so people will not be annoyed by such garbage files.
+#
+# This script is not intended to grow endlessly. Rather, it is a temporary scrap
+# yard. Stale files stay in this file for a while (for some release cycles?),
+# then will be really dead and removed from the code base entirely.
+
+# These were previously generated source files. When you are building the kernel
+# with O=, make sure to remove the stale files in the output tree. Otherwise,
+# the build system wrongly compiles the stale ones.
+if [ -n "${building_out_of_srctree}" ]; then
+ for f in fdt_rw.c fdt_ro.c fdt_wip.c fdt.c
+ do
+ rm -f arch/arm/boot/compressed/${f}
+ done
+fi
fi
# Check for svn and a svn repo.
- if rev=$(LANG= LC_ALL= LC_MESSAGES=C svn info 2>/dev/null | grep '^Last Changed Rev'); then
+ if rev=$(LC_ALL=C svn info 2>/dev/null | grep '^Last Changed Rev'); then
rev=$(echo $rev | awk '{print $NF}')
printf -- '-svn%s' "$rev"
deviece||device
devision||division
diable||disable
+diabled||disabled
dicline||decline
dictionnary||dictionary
didnt||didn't
overaall||overall
overhread||overhead
overlaping||overlapping
+overflw||overflow
+overlfow||overflow
overide||override
overrided||overridden
overriden||overridden
# Remove structure forward declarations.
if [ -n "$remove_structs" ]; then
- LANG=C sed -i -e '/^\([a-zA-Z_][a-zA-Z0-9_]*\)\t.*\t\/\^struct \1;.*\$\/;"\tx$/d' $1
+ LC_ALL=C sed -i -e '/^\([a-zA-Z_][a-zA-Z0-9_]*\)\t.*\t\/\^struct \1;.*\$\/;"\tx$/d' $1
fi
printversion("Net-tools", version("ifconfig --version"))
printversion("Kbd", version("loadkeys -V"))
printversion("Console-tools", version("loadkeys -V"))
- printversion("Oprofile", version("oprofiled --version"))
printversion("Sh-utils", version("expr --v"))
printversion("Udev", version("udevadm --version"))
printversion("Wireless-tools", version("iwconfig --version"))
pr_warn("Operation requires CAP_SETUID, which is not available to UID %u for operations besides approved set*uid transitions\n",
__kuid_val(cred->uid));
return -EPERM;
- break;
case CAP_SETGID:
/*
* If no policy applies to this task, allow the use of CAP_SETGID for
pr_warn("Operation requires CAP_SETGID, which is not available to GID %u for operations besides approved set*gid transitions\n",
__kuid_val(cred->uid));
return -EPERM;
- break;
default:
/* Error, the only capabilities were checking for is CAP_SETUID/GID */
return 0;
- break;
}
return 0;
}
*index = ch;
return "Headphone";
case AUTO_PIN_LINE_OUT:
- /* This deals with the case where we have two DACs and
- * one LO, one HP and one Speaker */
- if (!ch && cfg->speaker_outs && cfg->hp_outs) {
- bool hp_lo_shared = !path_has_mixer(codec, spec->hp_paths[0], ctl_type);
- bool spk_lo_shared = !path_has_mixer(codec, spec->speaker_paths[0], ctl_type);
+ /* This deals with the case where one HP or one Speaker or
+ * one HP + one Speaker need to share the DAC with LO
+ */
+ if (!ch) {
+ bool hp_lo_shared = false, spk_lo_shared = false;
+
+ if (cfg->speaker_outs)
+ spk_lo_shared = !path_has_mixer(codec,
+ spec->speaker_paths[0], ctl_type);
+ if (cfg->hp_outs)
+ hp_lo_shared = !path_has_mixer(codec, spec->hp_paths[0], ctl_type);
if (hp_lo_shared && spk_lo_shared)
return spec->vmaster_mute.hook ? "PCM" : "Master";
if (hp_lo_shared)
}
}
+/* toggle GPIO2 at each time stream is started; we use PREPARE state instead */
+static void alc274_hp_envy_pcm_hook(struct hda_pcm_stream *hinfo,
+ struct hda_codec *codec,
+ struct snd_pcm_substream *substream,
+ int action)
+{
+ switch (action) {
+ case HDA_GEN_PCM_ACT_PREPARE:
+ alc_update_gpio_data(codec, 0x04, true);
+ break;
+ case HDA_GEN_PCM_ACT_CLEANUP:
+ alc_update_gpio_data(codec, 0x04, false);
+ break;
+ }
+}
+
+static void alc274_fixup_hp_envy_gpio(struct hda_codec *codec,
+ const struct hda_fixup *fix,
+ int action)
+{
+ struct alc_spec *spec = codec->spec;
+
+ if (action == HDA_FIXUP_ACT_PROBE) {
+ spec->gpio_mask |= 0x04;
+ spec->gpio_dir |= 0x04;
+ spec->gen.pcm_playback_hook = alc274_hp_envy_pcm_hook;
+ }
+}
+
static void alc_update_coef_led(struct hda_codec *codec,
struct alc_coef_led *led,
bool polarity, bool on)
spec->gen.preferred_dacs = preferred_pairs;
}
+static void alc295_fixup_asus_dacs(struct hda_codec *codec,
+ const struct hda_fixup *fix, int action)
+{
+ static const hda_nid_t preferred_pairs[] = {
+ 0x17, 0x02, 0x21, 0x03, 0
+ };
+ struct alc_spec *spec = codec->spec;
+
+ if (action == HDA_FIXUP_ACT_PRE_PROBE)
+ spec->gen.preferred_dacs = preferred_pairs;
+}
+
static void alc_shutup_dell_xps13(struct hda_codec *codec)
{
struct alc_spec *spec = codec->spec;
ALC255_FIXUP_XIAOMI_HEADSET_MIC,
ALC274_FIXUP_HP_MIC,
ALC274_FIXUP_HP_HEADSET_MIC,
+ ALC274_FIXUP_HP_ENVY_GPIO,
ALC256_FIXUP_ASUS_HPE,
ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK,
ALC287_FIXUP_HP_GPIO_LED,
ALC256_FIXUP_ACER_HEADSET_MIC,
ALC285_FIXUP_IDEAPAD_S740_COEF,
ALC285_FIXUP_HP_LIMIT_INT_MIC_BOOST,
+ ALC295_FIXUP_ASUS_DACS,
+ ALC295_FIXUP_HP_OMEN,
};
static const struct hda_fixup alc269_fixups[] = {
.chained = true,
.chain_id = ALC274_FIXUP_HP_MIC
},
+ [ALC274_FIXUP_HP_ENVY_GPIO] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc274_fixup_hp_envy_gpio,
+ },
[ALC256_FIXUP_ASUS_HPE] = {
.type = HDA_FIXUP_VERBS,
.v.verbs = (const struct hda_verb[]) {
.chained = true,
.chain_id = ALC285_FIXUP_HP_MUTE_LED,
},
+ [ALC295_FIXUP_ASUS_DACS] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = alc295_fixup_asus_dacs,
+ },
+ [ALC295_FIXUP_HP_OMEN] = {
+ .type = HDA_FIXUP_PINS,
+ .v.pins = (const struct hda_pintbl[]) {
+ { 0x12, 0xb7a60130 },
+ { 0x13, 0x40000000 },
+ { 0x14, 0x411111f0 },
+ { 0x16, 0x411111f0 },
+ { 0x17, 0x90170110 },
+ { 0x18, 0x411111f0 },
+ { 0x19, 0x02a11030 },
+ { 0x1a, 0x411111f0 },
+ { 0x1b, 0x04a19030 },
+ { 0x1d, 0x40600001 },
+ { 0x1e, 0x411111f0 },
+ { 0x21, 0x03211020 },
+ {}
+ },
+ .chained = true,
+ .chain_id = ALC269_FIXUP_HP_LINE1_MIC1_LED,
+ },
};
static const struct snd_pci_quirk alc269_fixup_tbl[] = {
SND_PCI_QUIRK(0x103c, 0x82c0, "HP G3 mini premium", ALC221_FIXUP_HP_MIC_NO_PRESENCE),
SND_PCI_QUIRK(0x103c, 0x83b9, "HP Spectre x360", ALC269_FIXUP_HP_MUTE_LED_MIC3),
SND_PCI_QUIRK(0x103c, 0x8497, "HP Envy x360", ALC269_FIXUP_HP_MUTE_LED_MIC3),
+ SND_PCI_QUIRK(0x103c, 0x84da, "HP OMEN dc0019-ur", ALC295_FIXUP_HP_OMEN),
SND_PCI_QUIRK(0x103c, 0x84e7, "HP Pavilion 15", ALC269_FIXUP_HP_MUTE_LED_MIC3),
SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED),
+ SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO),
SND_PCI_QUIRK(0x103c, 0x8724, "HP EliteBook 850 G7", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8729, "HP", ALC285_FIXUP_HP_GPIO_LED),
SND_PCI_QUIRK(0x103c, 0x8730, "HP ProBook 445 G7", ALC236_FIXUP_HP_MUTE_LED_MICMUTE_VREF),
SND_PCI_QUIRK(0x1043, 0x1427, "Asus Zenbook UX31E", ALC269VB_FIXUP_ASUS_ZENBOOK),
SND_PCI_QUIRK(0x1043, 0x1517, "Asus Zenbook UX31A", ALC269VB_FIXUP_ASUS_ZENBOOK_UX31A),
SND_PCI_QUIRK(0x1043, 0x16e3, "ASUS UX50", ALC269_FIXUP_STEREO_DMIC),
+ SND_PCI_QUIRK(0x1043, 0x1740, "ASUS UX430UA", ALC295_FIXUP_ASUS_DACS),
SND_PCI_QUIRK(0x1043, 0x17d1, "ASUS UX431FL", ALC294_FIXUP_ASUS_DUAL_SPK),
SND_PCI_QUIRK(0x1043, 0x1881, "ASUS Zephyrus S/M", ALC294_FIXUP_ASUS_GX502_PINS),
SND_PCI_QUIRK(0x1043, 0x18b1, "Asus MJ401TA", ALC256_FIXUP_ASUS_HEADSET_MIC),
{.id = ALC255_FIXUP_XIAOMI_HEADSET_MIC, .name = "alc255-xiaomi-headset"},
{.id = ALC274_FIXUP_HP_MIC, .name = "alc274-hp-mic-detect"},
{.id = ALC245_FIXUP_HP_X360_AMP, .name = "alc245-hp-x360-amp"},
+ {.id = ALC295_FIXUP_HP_OMEN, .name = "alc295-hp-omen"},
{}
};
#define ALC225_STANDARD_PINS \
{0x19, 0x03a11020},
{0x21, 0x0321101f}),
SND_HDA_PIN_QUIRK(0x10ec0285, 0x17aa, "Lenovo", ALC285_FIXUP_LENOVO_PC_BEEP_IN_NOISE,
+ {0x12, 0x90a60130},
+ {0x14, 0x90170110},
+ {0x19, 0x04a11040},
+ {0x21, 0x04211020}),
+ SND_HDA_PIN_QUIRK(0x10ec0285, 0x17aa, "Lenovo", ALC285_FIXUP_LENOVO_PC_BEEP_IN_NOISE,
+ {0x14, 0x90170110},
+ {0x19, 0x04a11040},
+ {0x1d, 0x40600001},
+ {0x21, 0x04211020}),
+ SND_HDA_PIN_QUIRK(0x10ec0285, 0x17aa, "Lenovo", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK,
{0x14, 0x90170110},
{0x19, 0x04a11040},
{0x21, 0x04211020}),
SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
{0x19, 0x40000000},
{0x1a, 0x40000000}),
- SND_HDA_PIN_QUIRK(0x10ec0285, 0x17aa, "Lenovo", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK,
- {0x14, 0x90170110},
- {0x19, 0x04a11040},
- {0x21, 0x04211020}),
{}
};
{ 0 } /* terminator */
};
+/* Sennheiser Communications Headset [PC 8], the dB value is reported as -6 negative maximum */
+static const struct usbmix_dB_map sennheiser_pc8_dB = {-9500, 0};
+static const struct usbmix_name_map sennheiser_pc8_map[] = {
+ { 9, NULL, .dB = &sennheiser_pc8_dB },
+ { 0 } /* terminator */
+};
+
/*
* Dell usb dock with ALC4020 codec had a firmware problem where it got
* screwed up when zero volume is passed; just skip it as a workaround
.id = USB_ID(0x17aa, 0x1046),
.map = lenovo_p620_rear_map,
},
+ {
+ /* Sennheiser Communications Headset [PC 8] */
+ .id = USB_ID(0x1395, 0x0025),
+ .map = sennheiser_pc8_map,
+ },
{ 0 } /* terminator */
};
#define CONSUMER "gpio-utils"
/**
- * doc: Operation of gpio
+ * DOC: Operation of gpio
*
* Provide the api of gpiochip for chardev interface. There are two
* types of api. The first one provide as same function as each
}
/**
- * gpiotools_set_values(): Set the value of gpio(s)
+ * gpiotools_set_values() - Set the value of gpio(s)
* @fd: The fd returned by
* gpiotools_request_line().
* @values: The array of values want to set.
}
/**
- * gpiotools_get_values(): Get the value of gpio(s)
+ * gpiotools_get_values() - Get the value of gpio(s)
* @fd: The fd returned by
* gpiotools_request_line().
* @values: The array of values get from hardware.
}
/**
- * gpiotools_release_line(): Release the line(s) of gpiochip
+ * gpiotools_release_line() - Release the line(s) of gpiochip
* @fd: The fd returned by
* gpiotools_request_line().
*
}
/**
- * gpiotools_get(): Get value from specific line
+ * gpiotools_get() - Get value from specific line
* @device_name: The name of gpiochip without prefix "/dev/",
* such as "gpiochip0"
* @line: number of line, such as 2.
/**
- * gpiotools_gets(): Get values from specific lines.
+ * gpiotools_gets() - Get values from specific lines.
* @device_name: The name of gpiochip without prefix "/dev/",
* such as "gpiochip0".
* @lines: An array desired lines, specified by offset
}
/**
- * gpiotools_set(): Set value to specific line
+ * gpiotools_set() - Set value to specific line
* @device_name: The name of gpiochip without prefix "/dev/",
* such as "gpiochip0"
* @line: number of line, such as 2.
}
/**
- * gpiotools_sets(): Set values to specific lines.
+ * gpiotools_sets() - Set values to specific lines.
* @device_name: The name of gpiochip without prefix "/dev/",
* such as "gpiochip0".
* @lines: An array desired lines, specified by offset
* index for the associated GPIO device.
* @num_lines: The number of lines to request.
- * @value: The array of values set to gpiochip, must be
+ * @values: The array of values set to gpiochip, must be
* 0(low) or 1(high).
*
* Return: On success return 0;
#ifndef _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_
#define _TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_
+extern unsigned long _find_next_bit(const unsigned long *addr1,
+ const unsigned long *addr2, unsigned long nbits,
+ unsigned long start, unsigned long invert, unsigned long le);
+extern unsigned long _find_first_bit(const unsigned long *addr, unsigned long size);
+extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size);
+extern unsigned long _find_last_bit(const unsigned long *addr, unsigned long size);
+
#ifndef find_next_bit
/**
* find_next_bit - find the next set bit in a memory region
* Returns the bit number for the next set bit
* If no bits are set, returns @size.
*/
-extern unsigned long find_next_bit(const unsigned long *addr, unsigned long
- size, unsigned long offset);
+static inline
+unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ val = *addr & GENMASK(size - 1, offset);
+ return val ? __ffs(val) : size;
+ }
+
+ return _find_next_bit(addr, NULL, size, offset, 0UL, 0);
+}
#endif
#ifndef find_next_and_bit
* Returns the bit number for the next set bit
* If no bits are set, returns @size.
*/
-extern unsigned long find_next_and_bit(const unsigned long *addr1,
+static inline
+unsigned long find_next_and_bit(const unsigned long *addr1,
const unsigned long *addr2, unsigned long size,
- unsigned long offset);
+ unsigned long offset)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ val = *addr1 & *addr2 & GENMASK(size - 1, offset);
+ return val ? __ffs(val) : size;
+ }
+
+ return _find_next_bit(addr1, addr2, size, offset, 0UL, 0);
+}
#endif
#ifndef find_next_zero_bit
-
/**
* find_next_zero_bit - find the next cleared bit in a memory region
* @addr: The address to base the search on
* Returns the bit number of the next zero bit
* If no bits are zero, returns @size.
*/
+static inline
unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
- unsigned long offset);
+ unsigned long offset)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val;
+
+ if (unlikely(offset >= size))
+ return size;
+
+ val = *addr | ~GENMASK(size - 1, offset);
+ return val == ~0UL ? size : ffz(val);
+ }
+
+ return _find_next_bit(addr, NULL, size, offset, ~0UL, 0);
+}
#endif
#ifndef find_first_bit
* Returns the bit number of the first set bit.
* If no bits are set, returns @size.
*/
-extern unsigned long find_first_bit(const unsigned long *addr,
- unsigned long size);
+static inline
+unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val = *addr & GENMASK(size - 1, 0);
+
+ return val ? __ffs(val) : size;
+ }
+
+ return _find_first_bit(addr, size);
+}
#endif /* find_first_bit */
* Returns the bit number of the first cleared bit.
* If no bits are zero, returns @size.
*/
-unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size);
+static inline
+unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
+{
+ if (small_const_nbits(size)) {
+ unsigned long val = *addr | ~GENMASK(size - 1, 0);
+
+ return val == ~0UL ? size : ffz(val);
+ }
+
+ return _find_first_zero_bit(addr, size);
+}
#endif
#endif /*_TOOLS_LINUX_ASM_GENERIC_BITOPS_FIND_H_ */
#define BITS_PER_LONG_LONG 64
#endif
+#define small_const_nbits(nbits) \
+ (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG && (nbits) > 0)
+
#endif /* __ASM_GENERIC_BITS_PER_LONG */
void bitmap_clear(unsigned long *map, unsigned int start, int len);
#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
+#define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1)))
-#define BITMAP_LAST_WORD_MASK(nbits) \
-( \
- ((nbits) % BITS_PER_LONG) ? \
- (1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL \
-)
-
-#define small_const_nbits(nbits) \
- (__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG)
-
-static inline void bitmap_zero(unsigned long *dst, int nbits)
+static inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
{
if (small_const_nbits(nbits))
*dst = 0UL;
return find_first_zero_bit(src, nbits) == nbits;
}
-static inline int bitmap_weight(const unsigned long *src, int nbits)
+static inline int bitmap_weight(const unsigned long *src, unsigned int nbits)
{
if (small_const_nbits(nbits))
return hweight_long(*src & BITMAP_LAST_WORD_MASK(nbits));
}
static inline void bitmap_or(unsigned long *dst, const unsigned long *src1,
- const unsigned long *src2, int nbits)
+ const unsigned long *src2, unsigned int nbits)
{
if (small_const_nbits(nbits))
*dst = *src1 | *src2;
* @buf: buffer to store output
* @size: size of @buf
*/
-size_t bitmap_scnprintf(unsigned long *bitmap, int nbits,
+size_t bitmap_scnprintf(unsigned long *bitmap, unsigned int nbits,
char *buf, size_t size);
/**
dst[k] = bitmap1[k] | bitmap2[k];
}
-size_t bitmap_scnprintf(unsigned long *bitmap, int nbits,
+size_t bitmap_scnprintf(unsigned long *bitmap, unsigned int nbits,
char *buf, size_t size)
{
/* current bit is 'cur', most recently seen range is [rbot, rtop] */
- int cur, rbot, rtop;
+ unsigned int cur, rbot, rtop;
bool first = true;
size_t ret = 0;
return (len + 7) / 8 * 8;
}
-static int ringbuf_process_ring(struct ring* r)
+static int64_t ringbuf_process_ring(struct ring* r)
{
- int *len_ptr, len, err, cnt = 0;
+ int *len_ptr, len, err;
+ /* 64-bit to avoid overflow in case of extreme application behavior */
+ int64_t cnt = 0;
unsigned long cons_pos, prod_pos;
bool got_new_data;
void *sample;
}
/* Consume available ring buffer(s) data without event polling.
- * Returns number of records consumed across all registered ring buffers, or
- * negative number if any of the callbacks return error.
+ * Returns number of records consumed across all registered ring buffers (or
+ * INT_MAX, whichever is less), or negative number if any of the callbacks
+ * return error.
*/
int ring_buffer__consume(struct ring_buffer *rb)
{
- int i, err, res = 0;
+ int64_t err, res = 0;
+ int i;
for (i = 0; i < rb->ring_cnt; i++) {
struct ring *ring = &rb->rings[i];
return err;
res += err;
}
+ if (res > INT_MAX)
+ return INT_MAX;
return res;
}
/* Poll for available data and consume records, if any are available.
- * Returns number of records consumed, or negative number, if any of the
- * registered callbacks returned error.
+ * Returns number of records consumed (or INT_MAX, whichever is less), or
+ * negative number, if any of the registered callbacks returned error.
*/
int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
{
- int i, cnt, err, res = 0;
+ int i, cnt;
+ int64_t err, res = 0;
cnt = epoll_wait(rb->epoll_fd, rb->events, rb->ring_cnt, timeout_ms);
+ if (cnt < 0)
+ return -errno;
+
for (i = 0; i < cnt; i++) {
__u32 ring_id = rb->events[i].data.fd;
struct ring *ring = &rb->rings[ring_id];
return err;
res += err;
}
- return cnt < 0 ? -errno : res;
+ if (res > INT_MAX)
+ return INT_MAX;
+ return res;
}
/* Get an fd that can be used to sleep until data is available in the ring(s) */
* searching it for one bits.
* - The optional "addr2", which is anded with "addr1" if present.
*/
-static inline unsigned long _find_next_bit(const unsigned long *addr1,
+unsigned long _find_next_bit(const unsigned long *addr1,
const unsigned long *addr2, unsigned long nbits,
- unsigned long start, unsigned long invert)
+ unsigned long start, unsigned long invert, unsigned long le)
{
- unsigned long tmp;
+ unsigned long tmp, mask;
+ (void) le;
if (unlikely(start >= nbits))
return nbits;
tmp ^= invert;
/* Handle 1st word. */
- tmp &= BITMAP_FIRST_WORD_MASK(start);
+ mask = BITMAP_FIRST_WORD_MASK(start);
+
+ /*
+ * Due to the lack of swab() in tools, and the fact that it doesn't
+ * need little-endian support, just comment it out
+ */
+#if (0)
+ if (le)
+ mask = swab(mask);
+#endif
+
+ tmp &= mask;
+
start = round_down(start, BITS_PER_LONG);
while (!tmp) {
tmp ^= invert;
}
- return min(start + __ffs(tmp), nbits);
-}
+#if (0)
+ if (le)
+ tmp = swab(tmp);
#endif
-#ifndef find_next_bit
-/*
- * Find the next set bit in a memory region.
- */
-unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
- unsigned long offset)
-{
- return _find_next_bit(addr, NULL, size, offset, 0UL);
+ return min(start + __ffs(tmp), nbits);
}
#endif
/*
* Find the first set bit in a memory region.
*/
-unsigned long find_first_bit(const unsigned long *addr, unsigned long size)
+unsigned long _find_first_bit(const unsigned long *addr, unsigned long size)
{
unsigned long idx;
/*
* Find the first cleared bit in a memory region.
*/
-unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size)
+unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size)
{
unsigned long idx;
return size;
}
#endif
-
-#ifndef find_next_zero_bit
-unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
- unsigned long offset)
-{
- return _find_next_bit(addr, NULL, size, offset, ~0UL);
-}
-#endif
-
-#ifndef find_next_and_bit
-unsigned long find_next_and_bit(const unsigned long *addr1,
- const unsigned long *addr2, unsigned long size,
- unsigned long offset)
-{
- return _find_next_bit(addr1, addr2, size, offset, 0UL);
-}
-#endif
.PP
\fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set. If cpu-set is the string "core", then the system summary plus the first CPU in each core are printed -- eg. subsequent HT siblings are not printed. Or if cpu-set is the string "package", then the system summary plus the first CPU in each package is printed. Otherwise, the system summary plus the specified set of CPUs are printed. The cpu-set is ordered from low to high, comma delimited with ".." and "-" permitted to denote a range. eg. 1,2,8,14..17,21-44
.PP
-\fB--hide column\fP do not show the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--hide sysfs" to hide the sysfs statistics columns as a group.
+\fB--hide column\fP do not show the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names.
.PP
\fB--enable column\fP show the specified built-in columns, which are otherwise disabled, by default. Currently the only built-in counters disabled by default are "usec", "Time_Of_Day_Seconds", "APIC" and "X2APIC".
The column name "all" can be used to enable all disabled-by-default built-in counters.
.PP
-\fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. Use "--show sysfs" to show the sysfs statistics columns as a group.
+\fB--show column\fP show only the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names.
+.PP
+\fB--show CATEGORY --hide CATEGORY\fP Show and hide also accept a single CATEGORY of columns: "all", "topology", "idle", "frequency", "power", "sysfs", "other".
.PP
\fB--Dump\fP displays the raw counter values.
.PP
* turbostat -- show CPU frequency and C-state residency
* on modern Intel and AMD processors.
*
- * Copyright (c) 2013 Intel Corporation.
+ * Copyright (c) 2021 Intel Corporation.
* Len Brown <len.brown@intel.com>
*/
#include <sys/capability.h>
#include <errno.h>
#include <math.h>
+#include <linux/perf_event.h>
+#include <asm/unistd.h>
+#include <stdbool.h>
char *proc_stat = "/proc/stat";
FILE *outf;
int *fd_percpu;
-struct timeval interval_tv = {5, 0};
-struct timespec interval_ts = {5, 0};
+int *fd_instr_count_percpu;
+struct timeval interval_tv = { 5, 0 };
+struct timespec interval_ts = { 5, 0 };
+
+/* Save original CPU model */
+unsigned int model_orig;
+
unsigned int num_iterations;
unsigned int debug;
unsigned int quiet;
unsigned int do_rapl;
unsigned int do_dts;
unsigned int do_ptm;
-unsigned long long gfx_cur_rc6_ms;
+unsigned int do_ipc;
+unsigned long long gfx_cur_rc6_ms;
unsigned long long cpuidle_cur_cpu_lpi_us;
unsigned long long cpuidle_cur_sys_lpi_us;
unsigned int gfx_cur_mhz;
unsigned int gfx_act_mhz;
-unsigned int tcc_activation_temp;
-unsigned int tcc_activation_temp_override;
+unsigned int tj_max;
+unsigned int tj_max_override;
+int tcc_offset_bits;
double rapl_power_units, rapl_time_units;
double rapl_dram_energy_units, rapl_energy_units;
double rapl_joule_counter_range;
unsigned int do_core_perf_limit_reasons;
unsigned int has_automatic_cstate_conversion;
+unsigned int dis_cstate_prewake;
unsigned int do_gfx_perf_limit_reasons;
unsigned int do_ring_perf_limit_reasons;
unsigned int crystal_hz;
unsigned long long tsc_hz;
int base_cpu;
double discover_bclk(unsigned int family, unsigned int model);
-unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
+unsigned int has_hwp; /* IA32_PM_ENABLE, IA32_HWP_CAPABILITIES */
/* IA32_HWP_REQUEST, IA32_HWP_STATUS */
-unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */
+unsigned int has_hwp_notify; /* IA32_HWP_INTERRUPT */
unsigned int has_hwp_activity_window; /* IA32_HWP_REQUEST[bits 41:32] */
-unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
-unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
+unsigned int has_hwp_epp; /* IA32_HWP_REQUEST[bits 31:24] */
+unsigned int has_hwp_pkg; /* IA32_HWP_REQUEST_PKG */
unsigned int has_misc_feature_control;
unsigned int first_counter_read = 1;
int ignore_stdin;
unsigned long long aperf;
unsigned long long mperf;
unsigned long long c1;
- unsigned long long irq_count;
+ unsigned long long instr_count;
+ unsigned long long irq_count;
unsigned int smi_count;
unsigned int cpu_id;
unsigned int apic_id;
unsigned int x2apic_id;
unsigned int flags;
+ bool is_atom;
#define CPU_IS_FIRST_THREAD_IN_CORE 0x2
#define CPU_IS_FIRST_CORE_IN_PACKAGE 0x4
unsigned long long counter[MAX_ADDED_THREAD_COUNTERS];
((node_no) * topo.cores_per_node) + \
(core_no))
-
#define GET_PKG(pkg_base, pkg_no) (pkg_base + pkg_no)
-enum counter_scope {SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE};
-enum counter_type {COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC};
-enum counter_format {FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT};
+enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE };
+enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC };
+enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT };
struct msr_counter {
unsigned int msr_num;
struct msr_sum_array {
/* get_msr_sum() = sum + (get_msr() - last) */
struct {
- /*The accumulated MSR value is updated by the timer*/
+ /*The accumulated MSR value is updated by the timer */
unsigned long long sum;
- /*The MSR footprint recorded in last timer*/
+ /*The MSR footprint recorded in last timer */
unsigned long long last;
} entries[IDX_COUNT];
};
/* The percpu MSR sum array.*/
struct msr_sum_array *per_cpu_msr_sum;
-int idx_to_offset(int idx)
+off_t idx_to_offset(int idx)
{
- int offset;
+ off_t offset;
switch (idx) {
case IDX_PKG_ENERGY:
- offset = MSR_PKG_ENERGY_STATUS;
+ if (do_rapl & RAPL_AMD_F17H)
+ offset = MSR_PKG_ENERGY_STAT;
+ else
+ offset = MSR_PKG_ENERGY_STATUS;
break;
case IDX_DRAM_ENERGY:
offset = MSR_DRAM_ENERGY_STATUS;
return offset;
}
-int offset_to_idx(int offset)
+int offset_to_idx(off_t offset)
{
int idx;
switch (offset) {
case MSR_PKG_ENERGY_STATUS:
+ case MSR_PKG_ENERGY_STAT:
idx = IDX_PKG_ENERGY;
break;
case MSR_DRAM_ENERGY_STATUS:
{
switch (idx) {
case IDX_PKG_ENERGY:
- return do_rapl & RAPL_PKG;
+ return do_rapl & (RAPL_PKG | RAPL_AMD_F17H);
case IDX_DRAM_ENERGY:
return do_rapl & RAPL_DRAM;
case IDX_PP0_ENERGY:
return 0;
}
}
+
struct sys_counters {
unsigned int added_thread_counters;
unsigned int added_core_counters;
int logical_node_id; /* 0-based count within the package */
int physical_core_id;
int thread_id;
- cpu_set_t *put_ids; /* Processing Unit/Thread IDs */
+ cpu_set_t *put_ids; /* Processing Unit/Thread IDs */
} *cpus;
struct topo_params {
struct timeval tv_even, tv_odd, tv_delta;
-int *irq_column_2_cpu; /* /proc/interrupts column numbers */
+int *irq_column_2_cpu; /* /proc/interrupts column numbers */
int *irqs_per_cpu; /* indexed by cpu_num */
void setup_all_buffers(void);
{
return !CPU_ISSET_S(cpu, cpu_present_setsize, cpu_present_set);
}
+
/*
* run func(thread, core, package) in topology order
* skip non-present cpus
*/
-int for_all_cpus(int (func)(struct thread_data *, struct core_data *, struct pkg_data *),
- struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
+int for_all_cpus(int (func) (struct thread_data *, struct core_data *, struct pkg_data *),
+ struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base)
{
int retval, pkg_no, core_no, thread_no, node_no;
for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
for (node_no = 0; node_no < topo.nodes_per_pkg; node_no++) {
for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
- for (thread_no = 0; thread_no <
- topo.threads_per_core; ++thread_no) {
+ for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) {
struct thread_data *t;
struct core_data *c;
struct pkg_data *p;
- t = GET_THREAD(thread_base, thread_no,
- core_no, node_no,
- pkg_no);
+ t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
if (cpu_is_not_present(t->cpu_id))
continue;
- c = GET_CORE(core_base, core_no,
- node_no, pkg_no);
+ c = GET_CORE(core_base, core_no, node_no, pkg_no);
p = GET_PKG(pkg_base, pkg_no);
retval = func(t, c, p);
else
return 0;
}
+
int get_msr_fd(int cpu)
{
char pathname[32];
return fd;
}
+static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu, int group_fd, unsigned long flags)
+{
+ return syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
+}
+
+static int perf_instr_count_open(int cpu_num)
+{
+ struct perf_event_attr pea;
+ int fd;
+
+ memset(&pea, 0, sizeof(struct perf_event_attr));
+ pea.type = PERF_TYPE_HARDWARE;
+ pea.size = sizeof(struct perf_event_attr);
+ pea.config = PERF_COUNT_HW_INSTRUCTIONS;
+
+ /* counter for cpu_num, including user + kernel and all processes */
+ fd = perf_event_open(&pea, -1, cpu_num, -1, 0);
+ if (fd == -1)
+ err(-1, "cpu%d: perf instruction counter\n", cpu_num);
+
+ return fd;
+}
+
+int get_instr_count_fd(int cpu)
+{
+ if (fd_instr_count_percpu[cpu])
+ return fd_instr_count_percpu[cpu];
+
+ fd_instr_count_percpu[cpu] = perf_instr_count_open(cpu);
+
+ return fd_instr_count_percpu[cpu];
+}
+
int get_msr(int cpu, off_t offset, unsigned long long *msr)
{
ssize_t retval;
{ 0x0, "Bzy_MHz" },
{ 0x0, "TSC_MHz" },
{ 0x0, "IRQ" },
- { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL},
+ { 0x0, "SMI", "", 32, 0, FORMAT_DELTA, NULL },
{ 0x0, "sysfs" },
{ 0x0, "CPU%c1" },
{ 0x0, "CPU%c3" },
{ 0x0, "X2APIC" },
{ 0x0, "Die" },
{ 0x0, "GFXAMHz" },
+ { 0x0, "IPC" },
};
#define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter))
#define BIC_X2APIC (1ULL << 49)
#define BIC_Die (1ULL << 50)
#define BIC_GFXACTMHz (1ULL << 51)
+#define BIC_IPC (1ULL << 52)
+
+#define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die )
+#define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__)
+#define BIC_FREQUENCY ( BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz )
+#define BIC_IDLE ( BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX)
+#define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC)
#define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC)
#define ENABLE_BIC(COUNTER_NAME) (bic_enabled |= COUNTER_NAME)
#define BIC_PRESENT(COUNTER_BIT) (bic_present |= COUNTER_BIT)
#define BIC_NOT_PRESENT(COUNTER_BIT) (bic_present &= ~COUNTER_BIT)
-
+#define BIC_IS_ENABLED(COUNTER_BIT) (bic_enabled & COUNTER_BIT)
#define MAX_DEFERRED 16
char *deferred_skip_names[MAX_DEFERRED];
void help(void)
{
fprintf(outf,
- "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
- "\n"
- "Turbostat forks the specified COMMAND and prints statistics\n"
- "when COMMAND completes.\n"
- "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
- "to print statistics, until interrupted.\n"
- " -a, --add add a counter\n"
- " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
- " -c, --cpu cpu-set limit output to summary plus cpu-set:\n"
- " {core | package | j,k,l..m,n-p }\n"
- " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n"
- " -D, --Dump displays the raw counter values\n"
- " -e, --enable [all | column]\n"
- " shows all or the specified disabled column\n"
- " -H, --hide [column|column,column,...]\n"
- " hide the specified column(s)\n"
- " -i, --interval sec.subsec\n"
- " Override default 5-second measurement interval\n"
- " -J, --Joules displays energy in Joules instead of Watts\n"
- " -l, --list list column headers only\n"
- " -n, --num_iterations num\n"
- " number of the measurement iterations\n"
- " -o, --out file\n"
- " create or truncate \"file\" for all output\n"
- " -q, --quiet skip decoding system configuration header\n"
- " -s, --show [column|column,column,...]\n"
- " show only the specified column(s)\n"
- " -S, --Summary\n"
- " limits output to 1-line system summary per interval\n"
- " -T, --TCC temperature\n"
- " sets the Thermal Control Circuit temperature in\n"
- " degrees Celsius\n"
- " -h, --help print this help message\n"
- " -v, --version print version information\n"
- "\n"
- "For more help, run \"man turbostat\"\n");
+ "Usage: turbostat [OPTIONS][(--interval seconds) | COMMAND ...]\n"
+ "\n"
+ "Turbostat forks the specified COMMAND and prints statistics\n"
+ "when COMMAND completes.\n"
+ "If no COMMAND is specified, turbostat wakes every 5-seconds\n"
+ "to print statistics, until interrupted.\n"
+ " -a, --add add a counter\n"
+ " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n"
+ " -c, --cpu cpu-set limit output to summary plus cpu-set:\n"
+ " {core | package | j,k,l..m,n-p }\n"
+ " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n"
+ " -D, --Dump displays the raw counter values\n"
+ " -e, --enable [all | column]\n"
+ " shows all or the specified disabled column\n"
+ " -H, --hide [column|column,column,...]\n"
+ " hide the specified column(s)\n"
+ " -i, --interval sec.subsec\n"
+ " Override default 5-second measurement interval\n"
+ " -J, --Joules displays energy in Joules instead of Watts\n"
+ " -l, --list list column headers only\n"
+ " -n, --num_iterations num\n"
+ " number of the measurement iterations\n"
+ " -o, --out file\n"
+ " create or truncate \"file\" for all output\n"
+ " -q, --quiet skip decoding system configuration header\n"
+ " -s, --show [column|column,column,...]\n"
+ " show only the specified column(s)\n"
+ " -S, --Summary\n"
+ " limits output to 1-line system summary per interval\n"
+ " -T, --TCC temperature\n"
+ " sets the Thermal Control Circuit temperature in\n"
+ " degrees Celsius\n"
+ " -h, --help print this help message\n"
+ " -v, --version print version information\n" "\n" "For more help, run \"man turbostat\"\n");
}
/*
if (!strcmp(name_list, "all"))
return ~0;
+ if (!strcmp(name_list, "topology"))
+ return BIC_TOPOLOGY;
+ if (!strcmp(name_list, "power"))
+ return BIC_THERMAL_PWR;
+ if (!strcmp(name_list, "idle"))
+ return BIC_IDLE;
+ if (!strcmp(name_list, "frequency"))
+ return BIC_FREQUENCY;
+ if (!strcmp(name_list, "other"))
+ return BIC_OTHER;
+ if (!strcmp(name_list, "all"))
+ return 0;
for (i = 0; i < MAX_BIC; ++i) {
if (!strcmp(name_list, bic[i].name)) {
return retval;
}
-
void print_header(char *delim)
{
struct msr_counter *mp;
if (DO_BIC(BIC_TSC_MHz))
outp += sprintf(outp, "%sTSC_MHz", (printed++ ? delim : ""));
+ if (DO_BIC(BIC_IPC))
+ outp += sprintf(outp, "%sIPC", (printed++ ? delim : ""));
+
if (DO_BIC(BIC_IRQ)) {
if (sums_need_wide_columns)
outp += sprintf(outp, "%s IRQ", (printed++ ? delim : ""));
outp += sprintf(outp, "\n");
}
-int dump_counters(struct thread_data *t, struct core_data *c,
- struct pkg_data *p)
+int dump_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
int i;
struct msr_counter *mp;
outp += sprintf(outp, "t %p, c %p, p %p\n", t, c, p);
if (t) {
- outp += sprintf(outp, "CPU: %d flags 0x%x\n",
- t->cpu_id, t->flags);
+ outp += sprintf(outp, "CPU: %d flags 0x%x\n", t->cpu_id, t->flags);
outp += sprintf(outp, "TSC: %016llX\n", t->tsc);
outp += sprintf(outp, "aperf: %016llX\n", t->aperf);
outp += sprintf(outp, "mperf: %016llX\n", t->mperf);
outp += sprintf(outp, "c1: %016llX\n", t->c1);
+ if (DO_BIC(BIC_IPC))
+ outp += sprintf(outp, "IPC: %lld\n", t->instr_count);
+
if (DO_BIC(BIC_IRQ))
outp += sprintf(outp, "IRQ: %lld\n", t->irq_count);
if (DO_BIC(BIC_SMI))
outp += sprintf(outp, "SMI: %d\n", t->smi_count);
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
- outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n",
- i, mp->msr_num, t->counter[i]);
+ outp += sprintf(outp, "tADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, t->counter[i]);
}
}
outp += sprintf(outp, "Joules: %0X\n", c->core_energy);
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
- outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n",
- i, mp->msr_num, c->counter[i]);
+ outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, c->counter[i]);
}
outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us);
}
outp += sprintf(outp, "Joules COR: %0llX\n", p->energy_cores);
outp += sprintf(outp, "Joules GFX: %0llX\n", p->energy_gfx);
outp += sprintf(outp, "Joules RAM: %0llX\n", p->energy_dram);
- outp += sprintf(outp, "Throttle PKG: %0llX\n",
- p->rapl_pkg_perf_status);
- outp += sprintf(outp, "Throttle RAM: %0llX\n",
- p->rapl_dram_perf_status);
+ outp += sprintf(outp, "Throttle PKG: %0llX\n", p->rapl_pkg_perf_status);
+ outp += sprintf(outp, "Throttle RAM: %0llX\n", p->rapl_dram_perf_status);
outp += sprintf(outp, "PTM: %dC\n", p->pkg_temp_c);
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
- outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n",
- i, mp->msr_num, p->counter[i]);
+ outp += sprintf(outp, "pADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, p->counter[i]);
}
}
/*
* column formatting convention & formats
*/
-int format_counters(struct thread_data *t, struct core_data *c,
- struct pkg_data *p)
+int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
double interval_float, tsc;
char *fmt8;
char *delim = "\t";
int printed = 0;
- /* if showing only 1st thread in core and this isn't one, bail out */
+ /* if showing only 1st thread in core and this isn't one, bail out */
if (show_core_only && !(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
return 0;
- /* if showing only 1st thread in pkg and this isn't one, bail out */
+ /* if showing only 1st thread in pkg and this isn't one, bail out */
if (show_pkg_only && !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
return 0;
/*if not summary line and --cpu is used */
- if ((t != &average.threads) &&
- (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
+ if ((t != &average.threads) && (cpu_subset && !CPU_ISSET_S(t->cpu_id, cpu_subset_size, cpu_subset)))
return 0;
if (DO_BIC(BIC_USEC)) {
if (DO_BIC(BIC_TOD))
outp += sprintf(outp, "%10ld.%06ld\t", t->tv_end.tv_sec, t->tv_end.tv_usec);
- interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec/1000000.0;
+ interval_float = t->tv_delta.tv_sec + t->tv_delta.tv_usec / 1000000.0;
tsc = t->tsc * tsc_tweak;
if (DO_BIC(BIC_Node)) {
if (t)
outp += sprintf(outp, "%s%d",
- (printed++ ? delim : ""),
- cpus[t->cpu_id].physical_node_id);
+ (printed++ ? delim : ""), cpus[t->cpu_id].physical_node_id);
else
- outp += sprintf(outp, "%s-",
- (printed++ ? delim : ""));
+ outp += sprintf(outp, "%s-", (printed++ ? delim : ""));
}
if (DO_BIC(BIC_Core)) {
if (c)
}
if (DO_BIC(BIC_Avg_MHz))
- outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
- 1.0 / units * t->aperf / interval_float);
+ outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 / units * t->aperf / interval_float);
if (DO_BIC(BIC_Busy))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf/tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->mperf / tsc);
if (DO_BIC(BIC_Bzy_MHz)) {
if (has_base_hz)
- outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
+ outp +=
+ sprintf(outp, "%s%.0f", (printed++ ? delim : ""), base_hz / units * t->aperf / t->mperf);
else
outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""),
- tsc / units * t->aperf / t->mperf / interval_float);
+ tsc / units * t->aperf / t->mperf / interval_float);
}
if (DO_BIC(BIC_TSC_MHz))
- outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc/units/interval_float);
+ outp += sprintf(outp, "%s%.0f", (printed++ ? delim : ""), 1.0 * t->tsc / units / interval_float);
+
+ if (DO_BIC(BIC_IPC))
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 1.0 * t->instr_count / t->aperf);
/* IRQ */
if (DO_BIC(BIC_IRQ)) {
for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW) {
if (mp->width == 32)
- outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) t->counter[i]);
+ outp +=
+ sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)t->counter[i]);
else
outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->counter[i]);
} else if (mp->format == FORMAT_DELTA) {
outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->counter[i]);
} else if (mp->format == FORMAT_PERCENT) {
if (mp->type == COUNTER_USEC)
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), t->counter[i]/interval_float/10000);
+ outp +=
+ sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
+ t->counter[i] / interval_float / 10000);
else
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i]/tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->counter[i] / tsc);
}
}
/* C1 */
if (DO_BIC(BIC_CPU_c1))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1/tsc);
-
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc);
/* print per-core data only for 1st thread in core */
if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE))
goto done;
if (DO_BIC(BIC_CPU_c3))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3/tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c3 / tsc);
if (DO_BIC(BIC_CPU_c6))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6/tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c6 / tsc);
if (DO_BIC(BIC_CPU_c7))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7/tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->c7 / tsc);
/* Mod%c6 */
if (DO_BIC(BIC_Mod_c6))
for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW) {
if (mp->width == 32)
- outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) c->counter[i]);
+ outp +=
+ sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)c->counter[i]);
else
outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->counter[i]);
} else if (mp->format == FORMAT_DELTA) {
else
outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->counter[i]);
} else if (mp->format == FORMAT_PERCENT) {
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i]/tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->counter[i] / tsc);
}
}
fmt8 = "%s%.2f";
if (DO_BIC(BIC_CorWatt) && (do_rapl & RAPL_PER_CORE_ENERGY))
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
+ outp +=
+ sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units / interval_float);
if (DO_BIC(BIC_Cor_J) && (do_rapl & RAPL_PER_CORE_ENERGY))
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), c->core_energy * rapl_energy_units);
outp += sprintf(outp, "%s**.**", (printed++ ? delim : ""));
} else {
outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""),
- p->gfx_rc6_ms / 10.0 / interval_float);
+ p->gfx_rc6_ms / 10.0 / interval_float);
}
}
/* Totl%C0, Any%C0 GFX%C0 CPUGFX% */
if (DO_BIC(BIC_Totl_c0))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0/tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_wtd_core_c0 / tsc);
if (DO_BIC(BIC_Any_c0))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0/tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_core_c0 / tsc);
if (DO_BIC(BIC_GFX_c0))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0/tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_any_gfxe_c0 / tsc);
if (DO_BIC(BIC_CPUGFX))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0/tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pkg_both_core_gfxe_c0 / tsc);
if (DO_BIC(BIC_Pkgpc2))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2/tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc2 / tsc);
if (DO_BIC(BIC_Pkgpc3))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3/tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc3 / tsc);
if (DO_BIC(BIC_Pkgpc6))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6/tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc6 / tsc);
if (DO_BIC(BIC_Pkgpc7))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7/tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc7 / tsc);
if (DO_BIC(BIC_Pkgpc8))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8/tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc8 / tsc);
if (DO_BIC(BIC_Pkgpc9))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9/tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc9 / tsc);
if (DO_BIC(BIC_Pkgpc10))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10/tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc);
if (DO_BIC(BIC_CPU_LPI))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
+ outp +=
+ sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->cpu_lpi / 1000000.0 / interval_float);
if (DO_BIC(BIC_SYS_LPI))
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
+ outp +=
+ sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->sys_lpi / 1000000.0 / interval_float);
if (DO_BIC(BIC_PkgWatt))
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
+ outp +=
+ sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units / interval_float);
if (DO_BIC(BIC_CorWatt) && !(do_rapl & RAPL_PER_CORE_ENERGY))
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
+ outp +=
+ sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_cores * rapl_energy_units / interval_float);
if (DO_BIC(BIC_GFXWatt))
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
+ outp +=
+ sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_gfx * rapl_energy_units / interval_float);
if (DO_BIC(BIC_RAMWatt))
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units / interval_float);
+ outp +=
+ sprintf(outp, fmt8, (printed++ ? delim : ""),
+ p->energy_dram * rapl_dram_energy_units / interval_float);
if (DO_BIC(BIC_Pkg_J))
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_pkg * rapl_energy_units);
if (DO_BIC(BIC_Cor_J) && !(do_rapl & RAPL_PER_CORE_ENERGY))
if (DO_BIC(BIC_RAM_J))
outp += sprintf(outp, fmt8, (printed++ ? delim : ""), p->energy_dram * rapl_dram_energy_units);
if (DO_BIC(BIC_PKG__))
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
+ outp +=
+ sprintf(outp, fmt8, (printed++ ? delim : ""),
+ 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float);
if (DO_BIC(BIC_RAM__))
- outp += sprintf(outp, fmt8, (printed++ ? delim : ""), 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
+ outp +=
+ sprintf(outp, fmt8, (printed++ ? delim : ""),
+ 100.0 * p->rapl_dram_perf_status * rapl_time_units / interval_float);
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) {
if (mp->format == FORMAT_RAW) {
if (mp->width == 32)
- outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int) p->counter[i]);
+ outp +=
+ sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), (unsigned int)p->counter[i]);
else
outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->counter[i]);
} else if (mp->format == FORMAT_DELTA) {
else
outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->counter[i]);
} else if (mp->format == FORMAT_PERCENT) {
- outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i]/tsc);
+ outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->counter[i] / tsc);
}
}
outp = output_buffer;
}
+
void flush_output_stderr(void)
{
fputs(output_buffer, outf);
fflush(outf);
outp = output_buffer;
}
+
void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
static int printed;
#define DELTA_WRAP32(new, old) \
old = ((((unsigned long long)new << 32) - ((unsigned long long)old << 32)) >> 32);
-int
-delta_package(struct pkg_data *new, struct pkg_data *old)
+int delta_package(struct pkg_data *new, struct pkg_data *old)
{
int i;
struct msr_counter *mp;
-
if (DO_BIC(BIC_Totl_c0))
old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0;
if (DO_BIC(BIC_Any_c0))
old->pkg_temp_c = new->pkg_temp_c;
/* flag an error when rc6 counter resets/wraps */
- if (old->gfx_rc6_ms > new->gfx_rc6_ms)
+ if (old->gfx_rc6_ms > new->gfx_rc6_ms)
old->gfx_rc6_ms = -1;
else
old->gfx_rc6_ms = new->gfx_rc6_ms - old->gfx_rc6_ms;
return 0;
}
-void
-delta_core(struct core_data *new, struct core_data *old)
+void delta_core(struct core_data *new, struct core_data *old)
{
int i;
struct msr_counter *mp;
/*
* old = new - old
*/
-int
-delta_thread(struct thread_data *new, struct thread_data *old,
- struct core_data *core_delta)
+int delta_thread(struct thread_data *new, struct thread_data *old, struct core_data *core_delta)
{
int i;
struct msr_counter *mp;
old->c1 = new->c1 - old->c1;
- if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) ||
- soft_c1_residency_display(BIC_Avg_MHz)) {
+ if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) {
if ((new->aperf > old->aperf) && (new->mperf > old->mperf)) {
old->aperf = new->aperf - old->aperf;
old->mperf = new->mperf - old->mperf;
}
}
-
if (use_c1_residency_msr) {
/*
* Some models have a dedicated C1 residency MSR,
else {
/* normal case, derive c1 */
old->c1 = (old->tsc * tsc_tweak) - old->mperf - core_delta->c3
- - core_delta->c6 - core_delta->c7;
+ - core_delta->c6 - core_delta->c7;
}
}
old->mperf = 1; /* divide by 0 protection */
}
+ if (DO_BIC(BIC_IPC))
+ old->instr_count = new->instr_count - old->instr_count;
+
if (DO_BIC(BIC_IRQ))
old->irq_count = new->irq_count - old->irq_count;
}
int delta_cpu(struct thread_data *t, struct core_data *c,
- struct pkg_data *p, struct thread_data *t2,
- struct core_data *c2, struct pkg_data *p2)
+ struct pkg_data *p, struct thread_data *t2, struct core_data *c2, struct pkg_data *p2)
{
int retval = 0;
void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
int i;
- struct msr_counter *mp;
+ struct msr_counter *mp;
t->tv_begin.tv_sec = 0;
t->tv_begin.tv_usec = 0;
t->mperf = 0;
t->c1 = 0;
+ t->instr_count = 0;
+
t->irq_count = 0;
t->smi_count = 0;
for (i = 0, mp = sys.pp; mp; i++, mp = mp->next)
p->counter[i] = 0;
}
-int sum_counters(struct thread_data *t, struct core_data *c,
- struct pkg_data *p)
+
+int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
int i;
struct msr_counter *mp;
average.threads.mperf += t->mperf;
average.threads.c1 += t->c1;
+ average.threads.instr_count += t->instr_count;
+
average.threads.irq_count += t->irq_count;
average.threads.smi_count += t->smi_count;
}
return 0;
}
+
/*
* sum the counters for all cpus in the system
* compute the weighted average
*/
-void compute_average(struct thread_data *t, struct core_data *c,
- struct pkg_data *p)
+void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
int i;
struct msr_counter *mp;
average.threads.tsc /= topo.num_cpus;
average.threads.aperf /= topo.num_cpus;
average.threads.mperf /= topo.num_cpus;
+ average.threads.instr_count /= topo.num_cpus;
average.threads.c1 /= topo.num_cpus;
if (average.threads.irq_count > 9999999)
{
unsigned int low, high;
- asm volatile("rdtsc" : "=a" (low), "=d" (high));
+ asm volatile ("rdtsc":"=a" (low), "=d"(high));
return low | ((unsigned long long)high) << 32;
}
err(1, "%s: open failed", path);
return filep;
}
+
/*
* snapshot_sysfs_counter()
*
char path[128 + PATH_BYTES];
if (mp->flags & SYSFS_PERCPU) {
- sprintf(path, "/sys/devices/system/cpu/cpu%d/%s",
- cpu, mp->path);
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/%s", cpu, mp->path);
*counterp = snapshot_sysfs_counter(path);
} else {
eax = ebx = ecx = edx = 0;
__cpuid(0x80000001, eax, ebx, ecx, edx);
- topology_extensions = ecx & (1 << 22);
+ topology_extensions = ecx & (1 << 22);
if (topology_extensions == 0)
return;
t->x2apic_id = edx;
if (debug && (t->apic_id != (t->x2apic_id & 0xff)))
- fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n",
- t->cpu_id, t->apic_id, t->x2apic_id);
+ fprintf(outf, "cpu%d: BIOS BUG: apic 0x%x x2apic 0x%x\n", t->cpu_id, t->apic_id, t->x2apic_id);
}
/*
retry:
t->tsc = rdtsc(); /* we are running on local CPU of interest */
- if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) ||
- soft_c1_residency_display(BIC_Avg_MHz)) {
+ if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || soft_c1_residency_display(BIC_Avg_MHz)) {
unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time;
/*
if (aperf_mperf_retry_count < 5)
goto retry;
else
- warnx("cpu%d jitter %lld %lld",
- cpu, aperf_time, mperf_time);
+ warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time);
}
aperf_mperf_retry_count = 0;
t->mperf = t->mperf * aperf_mperf_multiplier;
}
+ if (DO_BIC(BIC_IPC))
+ if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long))
+ return -4;
+
if (DO_BIC(BIC_IRQ))
t->irq_count = irqs_per_cpu[cpu];
if (DO_BIC(BIC_SMI)) {
return -7;
}
- if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7))
+ if (DO_BIC(BIC_CPU_c7) || soft_c1_residency_display(BIC_CPU_c7)) {
if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7))
return -8;
+ else if (t->is_atom) {
+ /*
+ * For Atom CPUs that has core cstate deeper than c6,
+ * MSR_CORE_C6_RESIDENCY returns residency of cc6 and deeper.
+ * Minus CC7 (and deeper cstates) residency to get
+ * accturate cc6 residency.
+ */
+ c->c6 -= c->c7;
+ }
+ }
if (DO_BIC(BIC_Mod_c6))
if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us))
if (DO_BIC(BIC_CoreTmp)) {
if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr))
return -9;
- c->core_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
+ c->core_temp_c = tj_max - ((msr >> 16) & 0x7F);
}
if (do_rapl & RAPL_AMD_F17H) {
if (DO_BIC(BIC_PkgTmp)) {
if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_STATUS, &msr))
return -17;
- p->pkg_temp_c = tcc_activation_temp - ((msr >> 16) & 0x7F);
+ p->pkg_temp_c = tj_max - ((msr >> 16) & 0x7F);
}
if (DO_BIC(BIC_GFX_rc6))
* (>= PCL__7) and to index pkg_cstate_limit_strings[].
*/
-#define PCLUKN 0 /* Unknown */
-#define PCLRSV 1 /* Reserved */
-#define PCL__0 2 /* PC0 */
-#define PCL__1 3 /* PC1 */
-#define PCL__2 4 /* PC2 */
-#define PCL__3 5 /* PC3 */
-#define PCL__4 6 /* PC4 */
-#define PCL__6 7 /* PC6 */
-#define PCL_6N 8 /* PC6 No Retention */
-#define PCL_6R 9 /* PC6 Retention */
-#define PCL__7 10 /* PC7 */
-#define PCL_7S 11 /* PC7 Shrink */
-#define PCL__8 12 /* PC8 */
-#define PCL__9 13 /* PC9 */
-#define PCL_10 14 /* PC10 */
-#define PCLUNL 15 /* Unlimited */
+#define PCLUKN 0 /* Unknown */
+#define PCLRSV 1 /* Reserved */
+#define PCL__0 2 /* PC0 */
+#define PCL__1 3 /* PC1 */
+#define PCL__2 4 /* PC2 */
+#define PCL__3 5 /* PC3 */
+#define PCL__4 6 /* PC4 */
+#define PCL__6 7 /* PC6 */
+#define PCL_6N 8 /* PC6 No Retention */
+#define PCL_6R 9 /* PC6 Retention */
+#define PCL__7 10 /* PC7 */
+#define PCL_7S 11 /* PC7 Shrink */
+#define PCL__8 12 /* PC8 */
+#define PCL__9 13 /* PC9 */
+#define PCL_10 14 /* PC10 */
+#define PCLUNL 15 /* Unlimited */
int pkg_cstate_limit = PCLUKN;
char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2",
- "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited"};
+ "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "pc10", "unlimited"
+};
-int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
-int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
-int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
-int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7};
-int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
-int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
-int glm_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
-int skx_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV};
+int nhm_pkg_cstate_limits[16] =
+ { PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
+ PCLRSV, PCLRSV
+};
+int snb_pkg_cstate_limits[16] =
+ { PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
+ PCLRSV, PCLRSV
+};
-static void
-calculate_tsc_tweak()
+int hsw_pkg_cstate_limits[16] =
+ { PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
+ PCLRSV, PCLRSV
+};
+
+int slv_pkg_cstate_limits[16] =
+ { PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
+ PCL__6, PCL__7
+};
+
+int amt_pkg_cstate_limits[16] =
+ { PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
+ PCLRSV, PCLRSV
+};
+
+int phi_pkg_cstate_limits[16] =
+ { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
+ PCLRSV, PCLRSV
+};
+
+int glm_pkg_cstate_limits[16] =
+ { PCLUNL, PCL__1, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCL_10, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
+ PCLRSV, PCLRSV
+};
+
+int skx_pkg_cstate_limits[16] =
+ { PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
+ PCLRSV, PCLRSV
+};
+
+int icx_pkg_cstate_limits[16] =
+ { PCL__0, PCL__2, PCL__6, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV,
+ PCLRSV, PCLRSV
+};
+
+static void calculate_tsc_tweak()
{
tsc_tweak = base_hz / tsc_hz;
}
-static void
-dump_nhm_platform_info(void)
+void prewake_cstate_probe(unsigned int family, unsigned int model);
+
+static void dump_nhm_platform_info(void)
{
unsigned long long msr;
unsigned int ratio;
fprintf(outf, "cpu%d: MSR_PLATFORM_INFO: 0x%08llx\n", base_cpu, msr);
ratio = (msr >> 40) & 0xFF;
- fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz max efficiency frequency\n", ratio, bclk, ratio * bclk);
ratio = (msr >> 8) & 0xFF;
- fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk);
get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr);
fprintf(outf, "cpu%d: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n",
base_cpu, msr, msr & 0x2 ? "EN" : "DIS");
+ /* C-state Pre-wake Disable (CSTATE_PREWAKE_DISABLE) */
+ if (dis_cstate_prewake)
+ fprintf(outf, "C-state Pre-wake: %sabled\n", msr & 0x40000000 ? "DIS" : "EN");
+
return;
}
-static void
-dump_hsw_turbo_ratio_limits(void)
+static void dump_hsw_turbo_ratio_limits(void)
{
unsigned long long msr;
unsigned int ratio;
ratio = (msr >> 8) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 18 active cores\n", ratio, bclk, ratio * bclk);
ratio = (msr >> 0) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 17 active cores\n", ratio, bclk, ratio * bclk);
return;
}
-static void
-dump_ivt_turbo_ratio_limits(void)
+static void dump_ivt_turbo_ratio_limits(void)
{
unsigned long long msr;
unsigned int ratio;
ratio = (msr >> 56) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 16 active cores\n", ratio, bclk, ratio * bclk);
ratio = (msr >> 48) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 15 active cores\n", ratio, bclk, ratio * bclk);
ratio = (msr >> 40) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 14 active cores\n", ratio, bclk, ratio * bclk);
ratio = (msr >> 32) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 13 active cores\n", ratio, bclk, ratio * bclk);
ratio = (msr >> 24) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 12 active cores\n", ratio, bclk, ratio * bclk);
ratio = (msr >> 16) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 11 active cores\n", ratio, bclk, ratio * bclk);
ratio = (msr >> 8) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 10 active cores\n", ratio, bclk, ratio * bclk);
ratio = (msr >> 0) & 0xFF;
if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 9 active cores\n", ratio, bclk, ratio * bclk);
return;
}
+
int has_turbo_ratio_group_limits(int family, int model)
{
switch (model) {
case INTEL_FAM6_ATOM_GOLDMONT:
case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_FAM6_ICELAKE_X:
case INTEL_FAM6_ATOM_GOLDMONT_D:
case INTEL_FAM6_ATOM_TREMONT_D:
return 1;
return 0;
}
-static void
-dump_turbo_ratio_limits(int family, int model)
+static void dump_turbo_ratio_limits(int family, int model)
{
unsigned long long msr, core_counts;
unsigned int ratio, group_size;
return;
}
-static void
-dump_atom_turbo_ratio_limits(void)
+static void dump_atom_turbo_ratio_limits(void)
{
unsigned long long msr;
unsigned int ratio;
ratio = (msr >> 0) & 0x3F;
if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz minimum operating frequency\n", ratio, bclk, ratio * bclk);
ratio = (msr >> 8) & 0x3F;
if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz low frequency mode (LFM)\n", ratio, bclk, ratio * bclk);
ratio = (msr >> 16) & 0x3F;
if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz base frequency\n", ratio, bclk, ratio * bclk);
get_msr(base_cpu, MSR_ATOM_CORE_TURBO_RATIOS, &msr);
fprintf(outf, "cpu%d: MSR_ATOM_CORE_TURBO_RATIOS: 0x%08llx\n", base_cpu, msr & 0xFFFFFFFF);
ratio = (msr >> 24) & 0x3F;
if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 4 active cores\n", ratio, bclk, ratio * bclk);
ratio = (msr >> 16) & 0x3F;
if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 3 active cores\n", ratio, bclk, ratio * bclk);
ratio = (msr >> 8) & 0x3F;
if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 2 active cores\n", ratio, bclk, ratio * bclk);
ratio = (msr >> 0) & 0x3F;
if (ratio)
- fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n",
- ratio, bclk, ratio * bclk);
+ fprintf(outf, "%d * %.1f = %.1f MHz max turbo 1 active core\n", ratio, bclk, ratio * bclk);
}
-static void
-dump_knl_turbo_ratio_limits(void)
+static void dump_knl_turbo_ratio_limits(void)
{
const unsigned int buckets_no = 7;
get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr);
- fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n",
- base_cpu, msr);
+ fprintf(outf, "cpu%d: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", base_cpu, msr);
/*
* Turbo encoding in KNL is as follows:
ratio[i], bclk, ratio[i] * bclk, cores[i]);
}
-static void
-dump_nhm_cst_cfg(void)
+static void dump_nhm_cst_cfg(void)
{
unsigned long long msr;
(msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "",
(msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "",
(msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "",
- (msr & (1 << 15)) ? "" : "UN",
- (unsigned int)msr & 0xF,
- pkg_cstate_limit_strings[pkg_cstate_limit]);
+ (msr & (1 << 15)) ? "" : "UN", (unsigned int)msr & 0xF, pkg_cstate_limit_strings[pkg_cstate_limit]);
#define AUTOMATIC_CSTATE_CONVERSION (1UL << 16)
if (has_automatic_cstate_conversion) {
- fprintf(outf, ", automatic c-state conversion=%s",
- (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
+ fprintf(outf, ", automatic c-state conversion=%s", (msr & AUTOMATIC_CSTATE_CONVERSION) ? "on" : "off");
}
fprintf(outf, ")\n");
return;
}
-static void
-dump_config_tdp(void)
+static void dump_config_tdp(void)
{
unsigned long long msr;
fprintf(outf, ")\n");
}
-unsigned int irtl_time_units[] = {1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
+unsigned int irtl_time_units[] = { 1, 32, 1024, 32768, 1048576, 33554432, 0, 0 };
void print_irtl(void)
{
(msr & 0x3FF) * irtl_time_units[(msr >> 10) & 0x3]);
}
+
void free_fd_percpu(void)
{
int i;
free(cpus);
}
-
/*
* Parse a file containing a single int.
* Return 0 if file can not be opened
* the logical_node_id
*/
for (cpux = cpu; cpux <= topo.max_cpu_num; cpux++) {
- if ((cpus[cpux].physical_package_id == pkg) &&
- (cpus[cpux].physical_node_id == node)) {
+ if ((cpus[cpux].physical_package_id == pkg) && (cpus[cpux].physical_node_id == node)) {
cpus[cpux].logical_node_id = lnode;
cpu_count++;
}
int cpu = thiscpu->logical_cpu_id;
for (i = 0; i <= topo.max_cpu_num; i++) {
- sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist",
- cpu, i);
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/node%i/cpulist", cpu, i);
filep = fopen(path, "r");
if (!filep)
continue;
size = CPU_ALLOC_SIZE((topo.max_cpu_num + 1));
CPU_ZERO_S(size, thiscpu->put_ids);
- sprintf(path,
- "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", cpu);
filep = fopen(path, "r");
if (!filep) {
sib_core = get_core_id(so);
if (sib_core == thiscpu->physical_core_id) {
CPU_SET_S(so, size, thiscpu->put_ids);
- if ((so != cpu) &&
- (cpus[so].thread_id < 0))
- cpus[so].thread_id =
- thread_id++;
+ if ((so != cpu) && (cpus[so].thread_id < 0))
+ cpus[so].thread_id = thread_id++;
}
}
}
* skip non-present cpus
*/
-int for_all_cpus_2(int (func)(struct thread_data *, struct core_data *,
- struct pkg_data *, struct thread_data *, struct core_data *,
- struct pkg_data *), struct thread_data *thread_base,
- struct core_data *core_base, struct pkg_data *pkg_base,
- struct thread_data *thread_base2, struct core_data *core_base2,
- struct pkg_data *pkg_base2)
+int for_all_cpus_2(int (func) (struct thread_data *, struct core_data *,
+ struct pkg_data *, struct thread_data *, struct core_data *,
+ struct pkg_data *), struct thread_data *thread_base,
+ struct core_data *core_base, struct pkg_data *pkg_base,
+ struct thread_data *thread_base2, struct core_data *core_base2, struct pkg_data *pkg_base2)
{
int retval, pkg_no, node_no, core_no, thread_no;
for (pkg_no = 0; pkg_no < topo.num_packages; ++pkg_no) {
for (node_no = 0; node_no < topo.nodes_per_pkg; ++node_no) {
- for (core_no = 0; core_no < topo.cores_per_node;
- ++core_no) {
- for (thread_no = 0; thread_no <
- topo.threads_per_core; ++thread_no) {
+ for (core_no = 0; core_no < topo.cores_per_node; ++core_no) {
+ for (thread_no = 0; thread_no < topo.threads_per_core; ++thread_no) {
struct thread_data *t, *t2;
struct core_data *c, *c2;
struct pkg_data *p, *p2;
- t = GET_THREAD(thread_base, thread_no,
- core_no, node_no,
- pkg_no);
+ t = GET_THREAD(thread_base, thread_no, core_no, node_no, pkg_no);
if (cpu_is_not_present(t->cpu_id))
continue;
- t2 = GET_THREAD(thread_base2, thread_no,
- core_no, node_no,
- pkg_no);
+ t2 = GET_THREAD(thread_base2, thread_no, core_no, node_no, pkg_no);
- c = GET_CORE(core_base, core_no,
- node_no, pkg_no);
- c2 = GET_CORE(core_base2, core_no,
- node_no,
- pkg_no);
+ c = GET_CORE(core_base, core_no, node_no, pkg_no);
+ c2 = GET_CORE(core_base2, core_no, node_no, pkg_no);
p = GET_PKG(pkg_base, pkg_no);
p2 = GET_PKG(pkg_base2, pkg_no);
* run func(cpu) on every cpu in /proc/stat
* return max_cpu number
*/
-int for_all_proc_cpus(int (func)(int))
+int for_all_proc_cpus(int (func) (int))
{
FILE *fp;
int cpu_num;
retval = func(cpu_num);
if (retval) {
fclose(fp);
- return(retval);
+ return (retval);
}
}
fclose(fp);
base_cpu = sched_getcpu();
if (base_cpu < 0)
err(1, "cannot find calling cpu ID");
- sprintf(pathname,
- "/sys/devices/system/cpu/cpu%d/topology/thread_siblings",
- base_cpu);
+ sprintf(pathname, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings", base_cpu);
filep = fopen_or_die(pathname, "r");
topo.max_cpu_num = 0;
while (fscanf(filep, "%lx,", &dummy) == 1)
topo.max_cpu_num += BITMASK_SIZE;
fclose(filep);
- topo.max_cpu_num--; /* 0 based */
+ topo.max_cpu_num--; /* 0 based */
}
/*
topo.num_cpus++;
return 0;
}
+
int mark_cpu_present(int cpu)
{
CPU_SET_S(cpu, cpu_present_setsize, cpu_present_set);
}
- while (getc(fp) != '\n')
- ; /* flush interrupt description */
+ while (getc(fp) != '\n') ; /* flush interrupt description */
}
return 0;
}
+
/*
* snapshot_gfx_rc6_ms()
*
return 0;
}
+
/*
* snapshot_gfx_mhz()
*
return 0;
}
+
/*
* snapshot_sys_lpi()
*
return 0;
}
+
/*
* snapshot /proc and /sys files
*
int exit_requested;
-static void signal_handler (int signal)
+static void signal_handler(int signal)
{
switch (signal) {
case SIGINT:
for (i = IDX_PKG_ENERGY; i < IDX_COUNT; i++) {
unsigned long long msr_cur, msr_last;
- int offset;
+ off_t offset;
if (!idx_valid(i))
continue;
continue;
ret = get_msr(cpu, offset, &msr_cur);
if (ret) {
- fprintf(outf, "Can not update msr(0x%x)\n", offset);
+ fprintf(outf, "Can not update msr(0x%llx)\n", (unsigned long long)offset);
continue;
}
return 0;
}
-static void
-msr_record_handler(union sigval v)
+static void msr_record_handler(union sigval v)
{
for_all_cpus(update_msr_sum, EVEN_COUNTERS);
}
}
return;
- release_timer:
+release_timer:
timer_delete(timerid);
- release_msr:
+release_msr:
free(per_cpu_msr_sum);
}
+/*
+ * set_my_sched_priority(pri)
+ * return previous
+ */
+int set_my_sched_priority(int priority)
+{
+ int retval;
+ int original_priority;
+
+ errno = 0;
+ original_priority = getpriority(PRIO_PROCESS, 0);
+ if (errno && (original_priority == -1))
+ err(errno, "getpriority");
+
+ retval = setpriority(PRIO_PROCESS, 0, priority);
+ if (retval)
+ err(retval, "setpriority(%d)", priority);
+
+ errno = 0;
+ retval = getpriority(PRIO_PROCESS, 0);
+ if (retval != priority)
+ err(-1, "getpriority(%d) != setpriority(%d)", retval, priority);
+
+ return original_priority;
+}
+
void turbostat_loop()
{
int retval;
setup_signal_handler();
+ /*
+ * elevate own priority for interval mode
+ */
+ set_my_sched_priority(-20);
+
restart:
restarted++;
sprintf(pathname, "/dev/cpu/%d/msr", base_cpu);
if (stat(pathname, &sb))
- if (system("/sbin/modprobe msr > /dev/null 2>&1"))
+ if (system("/sbin/modprobe msr > /dev/null 2>&1"))
err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" ");
}
err(-6, "cap_get\n");
if (cap_flag_value != CAP_SET) {
- warnx("capget(CAP_SYS_RAWIO) failed,"
- " try \"# setcap cap_sys_rawio=ep %s\"", progname);
+ warnx("capget(CAP_SYS_RAWIO) failed," " try \"# setcap cap_sys_rawio=ep %s\"", progname);
return 1;
}
return 0;
}
+
void check_permissions(void)
{
int do_exit = 0;
pkg_cstate_limits = skx_pkg_cstate_limits;
has_misc_feature_control = 1;
break;
+ case INTEL_FAM6_ICELAKE_X: /* ICX */
+ pkg_cstate_limits = icx_pkg_cstate_limits;
+ has_misc_feature_control = 1;
+ break;
case INTEL_FAM6_ATOM_SILVERMONT: /* BYT */
no_MSR_MISC_PWR_MGMT = 1;
case INTEL_FAM6_ATOM_SILVERMONT_D: /* AVN */
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */
case INTEL_FAM6_ATOM_TREMONT: /* EHL */
- case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
+ case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
pkg_cstate_limits = glm_pkg_cstate_limits;
break;
default:
has_base_hz = 1;
return 1;
}
+
/*
* SLV client has support for unique MSRs:
*
}
return 0;
}
+
int is_dnv(unsigned int family, unsigned int model)
{
}
return 0;
}
+
int is_bdx(unsigned int family, unsigned int model)
{
}
return 0;
}
+
int is_skx(unsigned int family, unsigned int model)
{
}
return 0;
}
+
+int is_icx(unsigned int family, unsigned int model)
+{
+
+ if (!genuine_intel)
+ return 0;
+
+ switch (model) {
+ case INTEL_FAM6_ICELAKE_X:
+ return 1;
+ }
+ return 0;
+}
+
int is_ehl(unsigned int family, unsigned int model)
{
if (!genuine_intel)
}
return 0;
}
+
int is_jvl(unsigned int family, unsigned int model)
{
if (!genuine_intel)
return 0;
switch (model) {
- /* Nehalem compatible, but do not include turbo-ratio limit support */
+ /* Nehalem compatible, but do not include turbo-ratio limit support */
case INTEL_FAM6_NEHALEM_EX: /* Nehalem-EX Xeon - Beckton */
case INTEL_FAM6_XEON_PHI_KNL: /* PHI - Knights Landing (different MSR definition) */
return 0;
return 1;
}
}
+
int has_atom_turbo_ratio_limit(unsigned int family, unsigned int model)
{
if (has_slv_msrs(family, model))
return 0;
}
+
int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model)
{
if (!genuine_intel)
return 0;
}
}
+
int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model)
{
if (!genuine_intel)
return 0;
}
}
+
int has_glm_turbo_ratio_limit(unsigned int family, unsigned int model)
{
if (!genuine_intel)
switch (model) {
case INTEL_FAM6_ATOM_GOLDMONT:
case INTEL_FAM6_SKYLAKE_X:
+ case INTEL_FAM6_ICELAKE_X:
return 1;
default:
return 0;
}
}
+
int has_config_tdp(unsigned int family, unsigned int model)
{
if (!genuine_intel)
case INTEL_FAM6_SKYLAKE_L: /* SKL */
case INTEL_FAM6_CANNONLAKE_L: /* CNL */
case INTEL_FAM6_SKYLAKE_X: /* SKX */
+ case INTEL_FAM6_ICELAKE_X: /* ICX */
case INTEL_FAM6_XEON_PHI_KNL: /* Knights Landing */
return 1;
}
}
-static void
-remove_underbar(char *s)
+/*
+ * tcc_offset_bits:
+ * 0: Tcc Offset not supported (Default)
+ * 6: Bit 29:24 of MSR_PLATFORM_INFO
+ * 4: Bit 27:24 of MSR_PLATFORM_INFO
+ */
+void check_tcc_offset(int model)
+{
+ unsigned long long msr;
+
+ if (!genuine_intel)
+ return;
+
+ switch (model) {
+ case INTEL_FAM6_SKYLAKE_L:
+ case INTEL_FAM6_SKYLAKE:
+ case INTEL_FAM6_KABYLAKE_L:
+ case INTEL_FAM6_KABYLAKE:
+ case INTEL_FAM6_ICELAKE_L:
+ case INTEL_FAM6_ICELAKE:
+ case INTEL_FAM6_TIGERLAKE_L:
+ case INTEL_FAM6_TIGERLAKE:
+ case INTEL_FAM6_COMETLAKE:
+ if (!get_msr(base_cpu, MSR_PLATFORM_INFO, &msr)) {
+ msr = (msr >> 30) & 1;
+ if (msr)
+ tcc_offset_bits = 6;
+ }
+ return;
+ default:
+ return;
+ }
+}
+
+static void remove_underbar(char *s)
{
char *to = s;
*to = 0;
}
-static void
-dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
+static void dump_cstate_pstate_config_info(unsigned int family, unsigned int model)
{
if (!do_nhm_platform_info)
return;
fprintf(outf, "%s: %s", strrchr(path, '/') + 1, cpuidle_buf);
}
-static void
-dump_sysfs_cstate_config(void)
+
+static void dump_sysfs_cstate_config(void)
{
char path[64];
char name_buf[16];
for (state = 0; state < 10; ++state) {
- sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
- base_cpu, state);
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
input = fopen(path, "r");
if (input == NULL)
continue;
if (!fgets(name_buf, sizeof(name_buf), input))
err(1, "%s: failed to read file", path);
- /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
+ /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
sp = strchr(name_buf, '-');
if (!sp)
sp = strchrnul(name_buf, '\n');
remove_underbar(name_buf);
- sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc",
- base_cpu, state);
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/desc", base_cpu, state);
input = fopen(path, "r");
if (input == NULL)
continue;
fclose(input);
}
}
-static void
-dump_sysfs_pstate_config(void)
+
+static void dump_sysfs_pstate_config(void)
{
char path[64];
char driver_buf[64];
FILE *input;
int turbo;
- sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver",
- base_cpu);
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_driver", base_cpu);
input = fopen(path, "r");
if (input == NULL) {
fprintf(outf, "NSFOD %s\n", path);
err(1, "%s: failed to read file", path);
fclose(input);
- sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor",
- base_cpu);
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/cpufreq/scaling_governor", base_cpu);
input = fopen(path, "r");
if (input == NULL) {
fprintf(outf, "NSFOD %s\n", path);
}
}
-
/*
* print_epb()
* Decode the ENERGY_PERF_BIAS MSR
return 0;
}
+
/*
* print_hwp()
* Decode the MSR_HWP_CAPABILITIES
if (get_msr(cpu, MSR_PM_ENABLE, &msr))
return 0;
- fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n",
- cpu, msr, (msr & (1 << 0)) ? "" : "No-");
+ fprintf(outf, "cpu%d: MSR_PM_ENABLE: 0x%08llx (%sHWP)\n", cpu, msr, (msr & (1 << 0)) ? "" : "No-");
/* MSR_PM_ENABLE[1] == 1 if HWP is enabled and MSRs visible */
if ((msr & (1 << 0)) == 0)
return 0;
fprintf(outf, "cpu%d: MSR_HWP_CAPABILITIES: 0x%08llx "
- "(high %d guar %d eff %d low %d)\n",
- cpu, msr,
- (unsigned int)HWP_HIGHEST_PERF(msr),
- (unsigned int)HWP_GUARANTEED_PERF(msr),
- (unsigned int)HWP_MOSTEFFICIENT_PERF(msr),
- (unsigned int)HWP_LOWEST_PERF(msr));
+ "(high %d guar %d eff %d low %d)\n",
+ cpu, msr,
+ (unsigned int)HWP_HIGHEST_PERF(msr),
+ (unsigned int)HWP_GUARANTEED_PERF(msr),
+ (unsigned int)HWP_MOSTEFFICIENT_PERF(msr), (unsigned int)HWP_LOWEST_PERF(msr));
if (get_msr(cpu, MSR_HWP_REQUEST, &msr))
return 0;
fprintf(outf, "cpu%d: MSR_HWP_REQUEST: 0x%08llx "
- "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
- cpu, msr,
- (unsigned int)(((msr) >> 0) & 0xff),
- (unsigned int)(((msr) >> 8) & 0xff),
- (unsigned int)(((msr) >> 16) & 0xff),
- (unsigned int)(((msr) >> 24) & 0xff),
- (unsigned int)(((msr) >> 32) & 0xff3),
- (unsigned int)(((msr) >> 42) & 0x1));
+ "(min %d max %d des %d epp 0x%x window 0x%x pkg 0x%x)\n",
+ cpu, msr,
+ (unsigned int)(((msr) >> 0) & 0xff),
+ (unsigned int)(((msr) >> 8) & 0xff),
+ (unsigned int)(((msr) >> 16) & 0xff),
+ (unsigned int)(((msr) >> 24) & 0xff),
+ (unsigned int)(((msr) >> 32) & 0xff3), (unsigned int)(((msr) >> 42) & 0x1));
if (has_hwp_pkg) {
if (get_msr(cpu, MSR_HWP_REQUEST_PKG, &msr))
(unsigned int)(((msr) >> 0) & 0xff),
(unsigned int)(((msr) >> 8) & 0xff),
(unsigned int)(((msr) >> 16) & 0xff),
- (unsigned int)(((msr) >> 24) & 0xff),
- (unsigned int)(((msr) >> 32) & 0xff3));
+ (unsigned int)(((msr) >> 24) & 0xff), (unsigned int)(((msr) >> 32) & 0xff3));
}
if (has_hwp_notify) {
if (get_msr(cpu, MSR_HWP_INTERRUPT, &msr))
fprintf(outf, "cpu%d: MSR_HWP_INTERRUPT: 0x%08llx "
"(%s_Guaranteed_Perf_Change, %s_Excursion_Min)\n",
- cpu, msr,
- ((msr) & 0x1) ? "EN" : "Dis",
- ((msr) & 0x2) ? "EN" : "Dis");
+ cpu, msr, ((msr) & 0x1) ? "EN" : "Dis", ((msr) & 0x2) ? "EN" : "Dis");
}
if (get_msr(cpu, MSR_HWP_STATUS, &msr))
return 0;
fprintf(outf, "cpu%d: MSR_HWP_STATUS: 0x%08llx "
- "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
- cpu, msr,
- ((msr) & 0x1) ? "" : "No-",
- ((msr) & 0x2) ? "" : "No-");
+ "(%sGuaranteed_Perf_Change, %sExcursion_Min)\n",
+ cpu, msr, ((msr) & 0x1) ? "" : "No-", ((msr) & 0x2) ? "" : "No-");
return 0;
}
(msr & 1 << 5) ? "Auto-HWP, " : "",
(msr & 1 << 4) ? "Graphics, " : "",
(msr & 1 << 2) ? "bit2, " : "",
- (msr & 1 << 1) ? "ThermStatus, " : "",
- (msr & 1 << 0) ? "PROCHOT, " : "");
+ (msr & 1 << 1) ? "ThermStatus, " : "", (msr & 1 << 0) ? "PROCHOT, " : "");
fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n",
(msr & 1 << 31) ? "bit31, " : "",
(msr & 1 << 30) ? "bit30, " : "",
(msr & 1 << 21) ? "Auto-HWP, " : "",
(msr & 1 << 20) ? "Graphics, " : "",
(msr & 1 << 18) ? "bit18, " : "",
- (msr & 1 << 17) ? "ThermStatus, " : "",
- (msr & 1 << 16) ? "PROCHOT, " : "");
+ (msr & 1 << 17) ? "ThermStatus, " : "", (msr & 1 << 16) ? "PROCHOT, " : "");
}
if (do_gfx_perf_limit_reasons) {
(msr & 1 << 6) ? "VR-Therm, " : "",
(msr & 1 << 8) ? "Amps, " : "",
(msr & 1 << 9) ? "GFXPwr, " : "",
- (msr & 1 << 10) ? "PkgPwrL1, " : "",
- (msr & 1 << 11) ? "PkgPwrL2, " : "");
+ (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : "");
fprintf(outf, " (Logged: %s%s%s%s%s%s%s%s)\n",
(msr & 1 << 16) ? "PROCHOT, " : "",
(msr & 1 << 17) ? "ThermStatus, " : "",
(msr & 1 << 22) ? "VR-Therm, " : "",
(msr & 1 << 24) ? "Amps, " : "",
(msr & 1 << 25) ? "GFXPwr, " : "",
- (msr & 1 << 26) ? "PkgPwrL1, " : "",
- (msr & 1 << 27) ? "PkgPwrL2, " : "");
+ (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
}
if (do_ring_perf_limit_reasons) {
get_msr(cpu, MSR_RING_PERF_LIMIT_REASONS, &msr);
(msr & 1 << 1) ? "ThermStatus, " : "",
(msr & 1 << 6) ? "VR-Therm, " : "",
(msr & 1 << 8) ? "Amps, " : "",
- (msr & 1 << 10) ? "PkgPwrL1, " : "",
- (msr & 1 << 11) ? "PkgPwrL2, " : "");
+ (msr & 1 << 10) ? "PkgPwrL1, " : "", (msr & 1 << 11) ? "PkgPwrL2, " : "");
fprintf(outf, " (Logged: %s%s%s%s%s%s)\n",
(msr & 1 << 16) ? "PROCHOT, " : "",
(msr & 1 << 17) ? "ThermStatus, " : "",
(msr & 1 << 22) ? "VR-Therm, " : "",
(msr & 1 << 24) ? "Amps, " : "",
- (msr & 1 << 26) ? "PkgPwrL1, " : "",
- (msr & 1 << 27) ? "PkgPwrL2, " : "");
+ (msr & 1 << 26) ? "PkgPwrL1, " : "", (msr & 1 << 27) ? "PkgPwrL2, " : "");
}
return 0;
}
#define RAPL_POWER_GRANULARITY 0x7FFF /* 15 bit power granularity */
-#define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */
+#define RAPL_TIME_GRANULARITY 0x3F /* 6 bit time granularity */
double get_tdp_intel(unsigned int model)
{
* rapl_dram_energy_units_probe()
* Energy units are either hard-coded, or come from RAPL Energy Unit MSR.
*/
-static double
-rapl_dram_energy_units_probe(int model, double rapl_energy_units)
+static double rapl_dram_energy_units_probe(int model, double rapl_energy_units)
{
/* only called for genuine_intel, family 6 */
switch (model) {
case INTEL_FAM6_HASWELL_X: /* HSX */
case INTEL_FAM6_BROADWELL_X: /* BDX */
+ case INTEL_FAM6_SKYLAKE_X: /* SKX */
case INTEL_FAM6_XEON_PHI_KNL: /* KNL */
return (rapl_dram_energy_units = 15.3 / 1000000);
default:
BIC_PRESENT(BIC_PkgWatt);
break;
case INTEL_FAM6_ATOM_TREMONT: /* EHL */
- do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
+ do_rapl =
+ RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS
+ | RAPL_GFX | RAPL_PKG_POWER_INFO;
if (rapl_joules) {
BIC_PRESENT(BIC_Pkg_J);
BIC_PRESENT(BIC_Cor_J);
break;
case INTEL_FAM6_SKYLAKE_L: /* SKL */
case INTEL_FAM6_CANNONLAKE_L: /* CNL */
- do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_GFX | RAPL_PKG_POWER_INFO;
+ do_rapl =
+ RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS
+ | RAPL_GFX | RAPL_PKG_POWER_INFO;
BIC_PRESENT(BIC_PKG__);
BIC_PRESENT(BIC_RAM__);
if (rapl_joules) {
case INTEL_FAM6_HASWELL_X: /* HSX */
case INTEL_FAM6_BROADWELL_X: /* BDX */
case INTEL_FAM6_SKYLAKE_X: /* SKX */
+ case INTEL_FAM6_ICELAKE_X: /* ICX */
case INTEL_FAM6_XEON_PHI_KNL: /* KNL */
- do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO;
+ do_rapl =
+ RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS |
+ RAPL_PKG_POWER_INFO;
BIC_PRESENT(BIC_PKG__);
BIC_PRESENT(BIC_RAM__);
if (rapl_joules) {
break;
case INTEL_FAM6_SANDYBRIDGE_X:
case INTEL_FAM6_IVYBRIDGE_X:
- do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
+ do_rapl =
+ RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS |
+ RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO;
BIC_PRESENT(BIC_PKG__);
BIC_PRESENT(BIC_RAM__);
if (rapl_joules) {
}
break;
case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */
- do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
+ do_rapl =
+ RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS |
+ RAPL_PKG_POWER_INFO | RAPL_CORES_ENERGY_STATUS;
BIC_PRESENT(BIC_PKG__);
BIC_PRESENT(BIC_RAM__);
if (rapl_joules) {
void automatic_cstate_conversion_probe(unsigned int family, unsigned int model)
{
- if (is_skx(family, model) || is_bdx(family, model))
+ if (is_skx(family, model) || is_bdx(family, model) || is_icx(family, model))
has_automatic_cstate_conversion = 1;
}
+void prewake_cstate_probe(unsigned int family, unsigned int model)
+{
+ if (is_icx(family, model))
+ dis_cstate_prewake = 1;
+}
+
int print_thermal(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
unsigned long long msr;
return 0;
dts = (msr >> 16) & 0x7F;
- fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n",
- cpu, msr, tcc_activation_temp - dts);
+ fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_STATUS: 0x%08llx (%d C)\n", cpu, msr, tj_max - dts);
if (get_msr(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, &msr))
return 0;
dts = (msr >> 16) & 0x7F;
dts2 = (msr >> 8) & 0x7F;
fprintf(outf, "cpu%d: MSR_IA32_PACKAGE_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
- cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
+ cpu, msr, tj_max - dts, tj_max - dts2);
}
-
if (do_dts && debug) {
unsigned int resolution;
dts = (msr >> 16) & 0x7F;
resolution = (msr >> 27) & 0xF;
fprintf(outf, "cpu%d: MSR_IA32_THERM_STATUS: 0x%08llx (%d C +/- %d)\n",
- cpu, msr, tcc_activation_temp - dts, resolution);
+ cpu, msr, tj_max - dts, resolution);
if (get_msr(cpu, MSR_IA32_THERM_INTERRUPT, &msr))
return 0;
dts = (msr >> 16) & 0x7F;
dts2 = (msr >> 8) & 0x7F;
fprintf(outf, "cpu%d: MSR_IA32_THERM_INTERRUPT: 0x%08llx (%d C, %d C)\n",
- cpu, msr, tcc_activation_temp - dts, tcc_activation_temp - dts2);
+ cpu, msr, tj_max - dts, tj_max - dts2);
}
return 0;
cpu, label,
((msr >> 15) & 1) ? "EN" : "DIS",
((msr >> 0) & 0x7FFF) * rapl_power_units,
- (1.0 + (((msr >> 22) & 0x3)/4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
+ (1.0 + (((msr >> 22) & 0x3) / 4.0)) * (1 << ((msr >> 17) & 0x1F)) * rapl_time_units,
(((msr >> 16) & 1) ? "EN" : "DIS"));
return;
if (do_rapl & RAPL_PKG_POWER_INFO) {
if (get_msr(cpu, MSR_PKG_POWER_INFO, &msr))
- return -5;
-
+ return -5;
fprintf(outf, "cpu%d: MSR_PKG_POWER_INFO: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
cpu, msr,
- ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
+ ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
cpu,
((msr >> 47) & 1) ? "EN" : "DIS",
((msr >> 32) & 0x7FFF) * rapl_power_units,
- (1.0 + (((msr >> 54) & 0x3)/4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
+ (1.0 + (((msr >> 54) & 0x3) / 4.0)) * (1 << ((msr >> 49) & 0x1F)) * rapl_time_units,
((msr >> 48) & 1) ? "EN" : "DIS");
}
if (do_rapl & RAPL_DRAM_POWER_INFO) {
if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr))
- return -6;
+ return -6;
fprintf(outf, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n",
cpu, msr,
- ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
+ ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units,
((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units,
((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units,
((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units);
if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr))
return -9;
fprintf(outf, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n",
- cpu, msr, (msr >> 31) & 1 ? "" : "UN");
+ cpu, msr, (msr >> 31) & 1 ? "" : "UN");
print_power_limit_msr(cpu, msr, "DRAM Limit");
}
if (get_msr(cpu, MSR_PP0_POWER_LIMIT, &msr))
return -9;
fprintf(outf, "cpu%d: MSR_PP0_POWER_LIMIT: 0x%08llx (%slocked)\n",
- cpu, msr, (msr >> 31) & 1 ? "" : "UN");
+ cpu, msr, (msr >> 31) & 1 ? "" : "UN");
print_power_limit_msr(cpu, msr, "Cores Limit");
}
if (do_rapl & RAPL_GFX) {
if (get_msr(cpu, MSR_PP1_POWER_LIMIT, &msr))
return -9;
fprintf(outf, "cpu%d: MSR_PP1_POWER_LIMIT: 0x%08llx (%slocked)\n",
- cpu, msr, (msr >> 31) & 1 ? "" : "UN");
+ cpu, msr, (msr >> 31) & 1 ? "" : "UN");
print_power_limit_msr(cpu, msr, "GFX Limit");
}
return 0;
switch (model) {
case INTEL_FAM6_SANDYBRIDGE:
case INTEL_FAM6_SANDYBRIDGE_X:
- case INTEL_FAM6_IVYBRIDGE: /* IVB */
- case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */
- case INTEL_FAM6_HASWELL: /* HSW */
- case INTEL_FAM6_HASWELL_X: /* HSW */
- case INTEL_FAM6_HASWELL_L: /* HSW */
- case INTEL_FAM6_HASWELL_G: /* HSW */
- case INTEL_FAM6_BROADWELL: /* BDW */
- case INTEL_FAM6_BROADWELL_G: /* BDW */
- case INTEL_FAM6_BROADWELL_X: /* BDX */
- case INTEL_FAM6_SKYLAKE_L: /* SKL */
- case INTEL_FAM6_CANNONLAKE_L: /* CNL */
- case INTEL_FAM6_SKYLAKE_X: /* SKX */
- case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
+ case INTEL_FAM6_IVYBRIDGE: /* IVB */
+ case INTEL_FAM6_IVYBRIDGE_X: /* IVB Xeon */
+ case INTEL_FAM6_HASWELL: /* HSW */
+ case INTEL_FAM6_HASWELL_X: /* HSW */
+ case INTEL_FAM6_HASWELL_L: /* HSW */
+ case INTEL_FAM6_HASWELL_G: /* HSW */
+ case INTEL_FAM6_BROADWELL: /* BDW */
+ case INTEL_FAM6_BROADWELL_G: /* BDW */
+ case INTEL_FAM6_BROADWELL_X: /* BDX */
+ case INTEL_FAM6_SKYLAKE_L: /* SKL */
+ case INTEL_FAM6_CANNONLAKE_L: /* CNL */
+ case INTEL_FAM6_SKYLAKE_X: /* SKX */
+ case INTEL_FAM6_ICELAKE_X: /* ICX */
+ case INTEL_FAM6_ATOM_GOLDMONT: /* BXT */
case INTEL_FAM6_ATOM_GOLDMONT_PLUS:
case INTEL_FAM6_ATOM_GOLDMONT_D: /* DNV */
- case INTEL_FAM6_ATOM_TREMONT: /* EHL */
- case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
+ case INTEL_FAM6_ATOM_TREMONT: /* EHL */
+ case INTEL_FAM6_ATOM_TREMONT_D: /* JVL */
return 1;
}
return 0;
return 0;
switch (model) {
- case INTEL_FAM6_CANNONLAKE_L: /* CNL */
+ case INTEL_FAM6_CANNONLAKE_L: /* CNL */
return 1;
}
}
#define SLM_BCLK_FREQS 5
-double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0};
+double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0 };
double slm_bclk(void)
{
return 133.33;
}
+int get_cpu_type(struct thread_data *t, struct core_data *c, struct pkg_data *p)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ if (!genuine_intel)
+ return 0;
+
+ if (cpu_migrate(t->cpu_id)) {
+ fprintf(outf, "Could not migrate to CPU %d\n", t->cpu_id);
+ return -1;
+ }
+
+ if (max_level < 0x1a)
+ return 0;
+
+ __cpuid(0x1a, eax, ebx, ecx, edx);
+ eax = (eax >> 24) & 0xFF;
+ if (eax == 0x20)
+ t->is_atom = true;
+ return 0;
+}
+
/*
* MSR_IA32_TEMPERATURE_TARGET indicates the temperature where
* the Thermal Control Circuit (TCC) activates.
* below this value, including the Digital Thermal Sensor (DTS),
* Package Thermal Management Sensor (PTM), and thermal event thresholds.
*/
-int read_tcc_activation_temp()
+int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
{
unsigned long long msr;
- unsigned int tcc, target_c, offset_c;
+ unsigned int tcc_default, tcc_offset;
+ int cpu;
- /* Temperature Target MSR is Nehalem and newer only */
- if (!do_nhm_platform_info)
+ /* tj_max is used only for dts or ptm */
+ if (!(do_dts || do_ptm))
return 0;
- if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
+ /* this is a per-package concept */
+ if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
return 0;
- target_c = (msr >> 16) & 0xFF;
+ cpu = t->cpu_id;
+ if (cpu_migrate(cpu)) {
+ fprintf(outf, "Could not migrate to CPU %d\n", cpu);
+ return -1;
+ }
- offset_c = (msr >> 24) & 0xF;
+ if (tj_max_override != 0) {
+ tj_max = tj_max_override;
+ fprintf(outf, "cpu%d: Using cmdline TCC Target (%d C)\n", cpu, tj_max);
+ return 0;
+ }
- tcc = target_c - offset_c;
+ /* Temperature Target MSR is Nehalem and newer only */
+ if (!do_nhm_platform_info)
+ goto guess;
- if (!quiet)
- fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
- base_cpu, msr, tcc, target_c, offset_c);
+ if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr))
+ goto guess;
- return tcc;
-}
+ tcc_default = (msr >> 16) & 0xFF;
-int set_temperature_target(struct thread_data *t, struct core_data *c, struct pkg_data *p)
-{
- /* tcc_activation_temp is used only for dts or ptm */
- if (!(do_dts || do_ptm))
- return 0;
+ if (!quiet) {
+ switch (tcc_offset_bits) {
+ case 4:
+ tcc_offset = (msr >> 24) & 0xF;
+ fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
+ cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset);
+ break;
+ case 6:
+ tcc_offset = (msr >> 24) & 0x3F;
+ fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C) (%d default - %d offset)\n",
+ cpu, msr, tcc_default - tcc_offset, tcc_default, tcc_offset);
+ break;
+ default:
+ fprintf(outf, "cpu%d: MSR_IA32_TEMPERATURE_TARGET: 0x%08llx (%d C)\n", cpu, msr, tcc_default);
+ break;
+ }
+ }
- /* this is a per-package concept */
- if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE) || !(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE))
- return 0;
+ if (!tcc_default)
+ goto guess;
- if (tcc_activation_temp_override != 0) {
- tcc_activation_temp = tcc_activation_temp_override;
- fprintf(outf, "Using cmdline TCC Target (%d C)\n", tcc_activation_temp);
- return 0;
- }
+ tj_max = tcc_default;
- tcc_activation_temp = read_tcc_activation_temp();
- if (tcc_activation_temp)
- return 0;
+ return 0;
- tcc_activation_temp = TJMAX_DEFAULT;
- fprintf(outf, "Guessing tjMax %d C, Please use -T to specify\n", tcc_activation_temp);
+guess:
+ tj_max = TJMAX_DEFAULT;
+ fprintf(outf, "cpu%d: Guessing tjMax %d C, Please use -T to specify\n", cpu, tj_max);
return 0;
}
if (!get_msr(base_cpu, MSR_IA32_FEAT_CTL, &msr))
fprintf(outf, "cpu%d: MSR_IA32_FEATURE_CONTROL: 0x%08llx (%sLocked %s)\n",
- base_cpu, msr,
- msr & FEAT_CTL_LOCKED ? "" : "UN-",
- msr & (1 << 18) ? "SGX" : "");
+ base_cpu, msr, msr & FEAT_CTL_LOCKED ? "" : "UN-", msr & (1 << 18) ? "SGX" : "");
}
void decode_misc_enable_msr(void)
return;
if (!get_msr(base_cpu, MSR_MISC_FEATURE_CONTROL, &msr))
- fprintf(outf, "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
- base_cpu, msr,
- msr & (0 << 0) ? "No-" : "",
- msr & (1 << 0) ? "No-" : "",
- msr & (2 << 0) ? "No-" : "",
- msr & (3 << 0) ? "No-" : "");
+ fprintf(outf,
+ "cpu%d: MSR_MISC_FEATURE_CONTROL: 0x%08llx (%sL2-Prefetch %sL2-Prefetch-pair %sL1-Prefetch %sL1-IP-Prefetch)\n",
+ base_cpu, msr, msr & (0 << 0) ? "No-" : "", msr & (1 << 0) ? "No-" : "",
+ msr & (2 << 0) ? "No-" : "", msr & (3 << 0) ? "No-" : "");
}
+
/*
* Decode MSR_MISC_PWR_MGMT
*
if (!get_msr(base_cpu, MSR_MISC_PWR_MGMT, &msr))
fprintf(outf, "cpu%d: MSR_MISC_PWR_MGMT: 0x%08llx (%sable-EIST_Coordination %sable-EPB %sable-OOB)\n",
base_cpu, msr,
- msr & (1 << 0) ? "DIS" : "EN",
- msr & (1 << 1) ? "EN" : "DIS",
- msr & (1 << 8) ? "EN" : "DIS");
+ msr & (1 << 0) ? "DIS" : "EN", msr & (1 << 1) ? "EN" : "DIS", msr & (1 << 8) ? "EN" : "DIS");
}
+
/*
* Decode MSR_CC6_DEMOTION_POLICY_CONFIG, MSR_MC6_DEMOTION_POLICY_CONFIG
*
unsigned int intel_model_duplicates(unsigned int model)
{
- switch(model) {
+ switch (model) {
case INTEL_FAM6_NEHALEM_EP: /* Core i7, Xeon 5500 series - Bloomfield, Gainstown NHM-EP */
case INTEL_FAM6_NEHALEM: /* Core i7 and i5 Processor - Clarksfield, Lynnfield, Jasper Forest */
- case 0x1F: /* Core i7 and i5 Processor - Nehalem */
+ case 0x1F: /* Core i7 and i5 Processor - Nehalem */
case INTEL_FAM6_WESTMERE: /* Westmere Client - Clarkdale, Arrandale */
case INTEL_FAM6_WESTMERE_EP: /* Westmere EP - Gulftown */
return INTEL_FAM6_NEHALEM;
case INTEL_FAM6_ROCKETLAKE:
case INTEL_FAM6_LAKEFIELD:
case INTEL_FAM6_ALDERLAKE:
+ case INTEL_FAM6_ALDERLAKE_L:
return INTEL_FAM6_CANNONLAKE_L;
case INTEL_FAM6_ATOM_TREMONT_L:
return INTEL_FAM6_ATOM_TREMONT;
- case INTEL_FAM6_ICELAKE_X:
+ case INTEL_FAM6_ICELAKE_D:
case INTEL_FAM6_SAPPHIRERAPIDS_X:
- return INTEL_FAM6_SKYLAKE_X;
+ return INTEL_FAM6_ICELAKE_X;
}
return model;
}
close(fd);
return;
}
- fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n",
- value, value == 2000000000 ? "default" : "constrained");
+ fprintf(outf, "/dev/cpu_dma_latency: %d usec (%s)\n", value, value == 2000000000 ? "default" : "constrained");
close(fd);
}
+/*
+ * Linux-perf manages the the HW instructions-retired counter
+ * by enabling when requested, and hiding rollover
+ */
+void linux_perf_init(void)
+{
+ if (!BIC_IS_ENABLED(BIC_IPC))
+ return;
+
+ if (access("/proc/sys/kernel/perf_event_paranoid", F_OK))
+ return;
+
+ fd_instr_count_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
+ if (fd_instr_count_percpu == NULL)
+ err(-1, "calloc fd_instr_count_percpu");
+
+ BIC_PRESENT(BIC_IPC);
+}
+
void process_cpuid()
{
unsigned int eax, ebx, ecx, edx;
unsigned int fms, family, model, stepping, ecx_flags, edx_flags;
unsigned int has_turbo;
+ unsigned long long ucode_patch = 0;
eax = ebx = ecx = edx = 0;
hygon_genuine = 1;
if (!quiet)
- fprintf(outf, "CPUID(0): %.4s%.4s%.4s ",
- (char *)&ebx, (char *)&edx, (char *)&ecx);
+ fprintf(outf, "CPUID(0): %.4s%.4s%.4s 0x%x CPUID levels\n",
+ (char *)&ebx, (char *)&edx, (char *)&ecx, max_level);
__cpuid(1, fms, ebx, ecx, edx);
family = (fms >> 8) & 0xf;
ecx_flags = ecx;
edx_flags = edx;
+ if (get_msr(sched_getcpu(), MSR_IA32_UCODE_REV, &ucode_patch))
+ warnx("get_msr(UCODE)\n");
+
/*
* check max extended function levels of CPUID.
* This is needed to check for invariant TSC.
__cpuid(0x80000000, max_extended_level, ebx, ecx, edx);
if (!quiet) {
- fprintf(outf, "0x%x CPUID levels; 0x%x xlevels; family:model:stepping 0x%x:%x:%x (%d:%d:%d)\n",
- max_level, max_extended_level, family, model, stepping, family, model, stepping);
+ fprintf(outf, "CPUID(1): family:model:stepping 0x%x:%x:%x (%d:%d:%d) microcode 0x%x\n",
+ family, model, stepping, family, model, stepping,
+ (unsigned int)((ucode_patch >> 32) & 0xFFFFFFFF));
+ fprintf(outf, "CPUID(0x80000000): max_extended_levels: 0x%x\n", max_extended_level);
fprintf(outf, "CPUID(1): %s %s %s %s %s %s %s %s %s %s\n",
ecx_flags & (1 << 0) ? "SSE3" : "-",
ecx_flags & (1 << 3) ? "MONITOR" : "-",
edx_flags & (1 << 4) ? "TSC" : "-",
edx_flags & (1 << 5) ? "MSR" : "-",
edx_flags & (1 << 22) ? "ACPI-TM" : "-",
- edx_flags & (1 << 28) ? "HT" : "-",
- edx_flags & (1 << 29) ? "TM" : "-");
+ edx_flags & (1 << 28) ? "HT" : "-", edx_flags & (1 << 29) ? "TM" : "-");
}
- if (genuine_intel)
+ if (genuine_intel) {
+ model_orig = model;
model = intel_model_duplicates(model);
+ }
if (!(edx_flags & (1 << 5)))
errx(1, "CPUID: no MSR");
has_hwp ? "" : "No-",
has_hwp_notify ? "" : "No-",
has_hwp_activity_window ? "" : "No-",
- has_hwp_epp ? "" : "No-",
- has_hwp_pkg ? "" : "No-",
- has_epb ? "" : "No-");
+ has_hwp_epp ? "" : "No-", has_hwp_pkg ? "" : "No-", has_epb ? "" : "No-");
if (!quiet)
decode_misc_enable_msr();
-
if (max_level >= 0x7 && !quiet) {
int has_sgx;
eax_crystal, ebx_tsc, crystal_hz);
if (crystal_hz == 0)
- switch(model) {
+ switch (model) {
case INTEL_FAM6_SKYLAKE_L: /* SKL */
crystal_hz = 24000000; /* 24.0 MHz */
break;
break;
default:
crystal_hz = 0;
- }
+ }
if (crystal_hz) {
- tsc_hz = (unsigned long long) crystal_hz * ebx_tsc / eax_crystal;
+ tsc_hz = (unsigned long long)crystal_hz *ebx_tsc / eax_crystal;
if (!quiet)
fprintf(outf, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n",
- tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal);
+ tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal);
}
}
}
BIC_NOT_PRESENT(BIC_Pkgpc7);
use_c1_residency_msr = 1;
}
- if (is_skx(family, model)) {
+ if (is_skx(family, model) || is_icx(family, model)) {
BIC_NOT_PRESENT(BIC_CPU_c3);
BIC_NOT_PRESENT(BIC_Pkgpc3);
BIC_NOT_PRESENT(BIC_CPU_c7);
BIC_PRESENT(BIC_CPUGFX);
}
do_slm_cstates = is_slm(family, model);
- do_knl_cstates = is_knl(family, model);
+ do_knl_cstates = is_knl(family, model);
- if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) ||
- is_ehl(family, model))
+ if (do_slm_cstates || do_knl_cstates || is_cnl(family, model) || is_ehl(family, model))
BIC_NOT_PRESENT(BIC_CPU_c3);
if (!quiet)
perf_limit_reasons_probe(family, model);
automatic_cstate_conversion_probe(family, model);
+ check_tcc_offset(model_orig);
+
if (!quiet)
dump_cstate_pstate_config_info(family, model);
if (!quiet)
dump_sysfs_pstate_config();
- if (has_skl_msrs(family, model))
+ if (has_skl_msrs(family, model) || is_ehl(family, model))
calculate_tsc_tweak();
if (!access("/sys/class/drm/card0/power/rc6_residency_ms", R_OK))
if (debug > 1)
fprintf(outf, "num_cpus %d max_cpu_num %d\n", topo.num_cpus, topo.max_cpu_num);
- cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology));
+ cpus = calloc(1, (topo.max_cpu_num + 1) * sizeof(struct cpu_topology));
if (cpus == NULL)
err(1, "calloc cpus");
topo.cores_per_node = max_core_id + 1;
if (debug > 1)
- fprintf(outf, "max_core_id %d, sizing for %d cores per package\n",
- max_core_id, topo.cores_per_node);
+ fprintf(outf, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.cores_per_node);
if (!summary_only && topo.cores_per_node > 1)
BIC_PRESENT(BIC_Core);
topo.num_die = max_die_id + 1;
if (debug > 1)
- fprintf(outf, "max_die_id %d, sizing for %d die\n",
- max_die_id, topo.num_die);
+ fprintf(outf, "max_die_id %d, sizing for %d die\n", max_die_id, topo.num_die);
if (!summary_only && topo.num_die > 1)
BIC_PRESENT(BIC_Die);
topo.num_packages = max_package_id + 1;
if (debug > 1)
- fprintf(outf, "max_package_id %d, sizing for %d packages\n",
- max_package_id, topo.num_packages);
+ fprintf(outf, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages);
if (!summary_only && topo.num_packages > 1)
BIC_PRESENT(BIC_Package);
fprintf(outf,
"cpu %d pkg %d die %d node %d lnode %d core %d thread %d\n",
i, cpus[i].physical_package_id, cpus[i].die_id,
- cpus[i].physical_node_id,
- cpus[i].logical_node_id,
- cpus[i].physical_core_id,
- cpus[i].thread_id);
+ cpus[i].physical_node_id, cpus[i].logical_node_id, cpus[i].physical_core_id, cpus[i].thread_id);
}
}
-void
-allocate_counters(struct thread_data **t, struct core_data **c,
- struct pkg_data **p)
+void allocate_counters(struct thread_data **t, struct core_data **c, struct pkg_data **p)
{
int i;
- int num_cores = topo.cores_per_node * topo.nodes_per_pkg *
- topo.num_packages;
+ int num_cores = topo.cores_per_node * topo.nodes_per_pkg * topo.num_packages;
int num_threads = topo.threads_per_core * num_cores;
*t = calloc(num_threads, sizeof(struct thread_data));
error:
err(1, "calloc counters");
}
+
/*
* init_counter()
*
* set FIRST_THREAD_IN_CORE and FIRST_CORE_IN_PACKAGE
*/
-void init_counter(struct thread_data *thread_base, struct core_data *core_base,
- struct pkg_data *pkg_base, int cpu_id)
+void init_counter(struct thread_data *thread_base, struct core_data *core_base, struct pkg_data *pkg_base, int cpu_id)
{
int pkg_id = cpus[cpu_id].physical_package_id;
int node_id = cpus[cpu_id].logical_node_id;
struct core_data *c;
struct pkg_data *p;
-
/* Workaround for systems where physical_node_id==-1
* and logical_node_id==(-1 - topo.num_cpus)
*/
p->package_id = pkg_id;
}
-
int initialize_counters(int cpu_id)
{
init_counter(EVEN_COUNTERS, cpu_id);
if (outp == NULL)
err(-1, "calloc output buffer");
}
+
void allocate_fd_percpu(void)
{
fd_percpu = calloc(topo.max_cpu_num + 1, sizeof(int));
if (fd_percpu == NULL)
err(-1, "calloc fd_percpu");
}
+
void allocate_irq_buffers(void)
{
irq_column_2_cpu = calloc(topo.num_cpus, sizeof(int));
if (irqs_per_cpu == NULL)
err(-1, "calloc %d", topo.max_cpu_num + 1);
}
+
void setup_all_buffers(void)
{
topology_probe();
check_dev_msr();
check_permissions();
process_cpuid();
-
+ linux_perf_init();
if (!quiet)
for_all_cpus(print_hwp, ODD_COUNTERS);
for_all_cpus(set_temperature_target, ODD_COUNTERS);
+ for_all_cpus(get_cpu_type, ODD_COUNTERS);
+ for_all_cpus(get_cpu_type, EVEN_COUNTERS);
+
if (!quiet)
for_all_cpus(print_thermal, ODD_COUNTERS);
format_all_counters(EVEN_COUNTERS);
}
- fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec/1000000.0);
+ fprintf(outf, "%.6f sec\n", tv_delta.tv_sec + tv_delta.tv_usec / 1000000.0);
flush_output_stderr();
return status;
}
-void print_version() {
- fprintf(outf, "turbostat version 20.09.30"
- " - Len Brown <lenb@kernel.org>\n");
+void print_version()
+{
+ fprintf(outf, "turbostat version 21.05.04" " - Len Brown <lenb@kernel.org>\n");
}
int add_counter(unsigned int msr_num, char *path, char *name,
- unsigned int width, enum counter_scope scope,
- enum counter_type type, enum counter_format format, int flags)
+ unsigned int width, enum counter_scope scope,
+ enum counter_type type, enum counter_format format, int flags)
{
struct msr_counter *msrp;
sys.tp = msrp;
sys.added_thread_counters++;
if (sys.added_thread_counters > MAX_ADDED_THREAD_COUNTERS) {
- fprintf(stderr, "exceeded max %d added thread counters\n",
- MAX_ADDED_COUNTERS);
+ fprintf(stderr, "exceeded max %d added thread counters\n", MAX_ADDED_COUNTERS);
exit(-1);
}
break;
sys.cp = msrp;
sys.added_core_counters++;
if (sys.added_core_counters > MAX_ADDED_COUNTERS) {
- fprintf(stderr, "exceeded max %d added core counters\n",
- MAX_ADDED_COUNTERS);
+ fprintf(stderr, "exceeded max %d added core counters\n", MAX_ADDED_COUNTERS);
exit(-1);
}
break;
sys.pp = msrp;
sys.added_package_counters++;
if (sys.added_package_counters > MAX_ADDED_COUNTERS) {
- fprintf(stderr, "exceeded max %d added package counters\n",
- MAX_ADDED_COUNTERS);
+ fprintf(stderr, "exceeded max %d added package counters\n", MAX_ADDED_COUNTERS);
exit(-1);
}
break;
for (state = 10; state >= 0; --state) {
- sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
- base_cpu, state);
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
input = fopen(path, "r");
if (input == NULL)
continue;
if (!fgets(name_buf, sizeof(name_buf), input))
err(1, "%s: failed to read file", path);
- /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
+ /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
sp = strchr(name_buf, '-');
if (!sp)
sp = strchrnul(name_buf, '\n');
if (is_deferred_skip(name_buf))
continue;
- add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC,
- FORMAT_PERCENT, SYSFS_PERCPU);
+ add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_USEC, FORMAT_PERCENT, SYSFS_PERCPU);
}
for (state = 10; state >= 0; --state) {
- sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name",
- base_cpu, state);
+ sprintf(path, "/sys/devices/system/cpu/cpu%d/cpuidle/state%d/name", base_cpu, state);
input = fopen(path, "r");
if (input == NULL)
continue;
if (!fgets(name_buf, sizeof(name_buf), input))
err(1, "%s: failed to read file", path);
- /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
+ /* truncate "C1-HSW\n" to "C1", or truncate "C1\n" to "C1" */
sp = strchr(name_buf, '-');
if (!sp)
sp = strchrnul(name_buf, '\n');
if (is_deferred_skip(name_buf))
continue;
- add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS,
- FORMAT_DELTA, SYSFS_PERCPU);
+ add_counter(0, path, name_buf, 64, SCOPE_CPU, COUNTER_ITEMS, FORMAT_DELTA, SYSFS_PERCPU);
}
}
-
/*
* parse cpuset with following syntax
* 1,2,4..6,8-10 and set bits in cpu_subset
exit(-1);
}
-
void cmdline(int argc, char **argv)
{
int opt;
int option_index = 0;
static struct option long_options[] = {
- {"add", required_argument, 0, 'a'},
- {"cpu", required_argument, 0, 'c'},
- {"Dump", no_argument, 0, 'D'},
- {"debug", no_argument, 0, 'd'}, /* internal, not documented */
- {"enable", required_argument, 0, 'e'},
- {"interval", required_argument, 0, 'i'},
- {"num_iterations", required_argument, 0, 'n'},
- {"help", no_argument, 0, 'h'},
- {"hide", required_argument, 0, 'H'}, // meh, -h taken by --help
- {"Joules", no_argument, 0, 'J'},
- {"list", no_argument, 0, 'l'},
- {"out", required_argument, 0, 'o'},
- {"quiet", no_argument, 0, 'q'},
- {"show", required_argument, 0, 's'},
- {"Summary", no_argument, 0, 'S'},
- {"TCC", required_argument, 0, 'T'},
- {"version", no_argument, 0, 'v' },
- {0, 0, 0, 0 }
+ { "add", required_argument, 0, 'a' },
+ { "cpu", required_argument, 0, 'c' },
+ { "Dump", no_argument, 0, 'D' },
+ { "debug", no_argument, 0, 'd' }, /* internal, not documented */
+ { "enable", required_argument, 0, 'e' },
+ { "interval", required_argument, 0, 'i' },
+ { "IPC", no_argument, 0, 'I' },
+ { "num_iterations", required_argument, 0, 'n' },
+ { "help", no_argument, 0, 'h' },
+ { "hide", required_argument, 0, 'H' }, // meh, -h taken by --help
+ { "Joules", no_argument, 0, 'J' },
+ { "list", no_argument, 0, 'l' },
+ { "out", required_argument, 0, 'o' },
+ { "quiet", no_argument, 0, 'q' },
+ { "show", required_argument, 0, 's' },
+ { "Summary", no_argument, 0, 'S' },
+ { "TCC", required_argument, 0, 'T' },
+ { "version", no_argument, 0, 'v' },
+ { 0, 0, 0, 0 }
};
progname = argv[0];
- while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v",
- long_options, &option_index)) != -1) {
+ while ((opt = getopt_long_only(argc, argv, "+C:c:Dde:hi:Jn:o:qST:v", long_options, &option_index)) != -1) {
switch (opt) {
case 'a':
parse_add_command(optarg);
double interval = strtod(optarg, NULL);
if (interval < 0.001) {
- fprintf(outf, "interval %f seconds is too small\n",
- interval);
+ fprintf(outf, "interval %f seconds is too small\n", interval);
exit(2);
}
num_iterations = strtod(optarg, NULL);
if (num_iterations <= 0) {
- fprintf(outf, "iterations %d should be positive number\n",
- num_iterations);
+ fprintf(outf, "iterations %d should be positive number\n", num_iterations);
exit(2);
}
break;
summary_only++;
break;
case 'T':
- tcc_activation_temp_override = atoi(optarg);
+ tj_max_override = atoi(optarg);
break;
case 'v':
print_version();
EXTRA_WARNINGS += -Wundef
EXTRA_WARNINGS += -Wwrite-strings
EXTRA_WARNINGS += -Wformat
+EXTRA_WARNINGS += -Wno-type-limits
# Makefiles suck: This macro sets a default value of $(2) for the
# variable named by $(1), unless the variable has been set by
--- /dev/null
+#
+# This config is an example usage of ktest.pl with a vmware guest
+#
+# VMware Setup:
+# -------------
+# - Edit the Virtual Machine ("Edit virtual machine settings")
+# - Add a Serial Port
+# - You almost certainly want it set "Connect at power on"
+# - Select "Use socket (named pipe)"
+# - Select a name that you'll recognize, like 'ktestserialpipe'
+# - From: Server
+# - To: A Virtual Machine
+# - Save
+# - Make sure you note the name, it will be in the base directory of the
+# virtual machine (where the "disks" are stored. The default
+# is /var/lib/vmware/<virtual machine name>/<the name you entered above>
+#
+# - Make note of the path to the VM
+# </End VMware setup>
+#
+# The guest is called 'Guest' and this would be something that
+# could be run on the host to test a virtual machine target.
+
+MACHINE = Guest
+
+# Name of the serial pipe you set in the VMware settings
+VMWARE_SERIAL_NAME = <the name you entered above>
+
+# Define a variable of the name of the VM
+# Noting this needs to be the name of the kmx file, and usually, the
+# name of the directory that it's in. If the directory and name
+# differ change the VMWARE_VM_DIR accordingly.
+# Please ommit the .kmx extension
+VMWARE_VM_NAME = <virtual machine name>
+
+# VM dir name. This is usually the same as the virtual machine's name,
+# but not always the case. Change if they differ
+VMWARE_VM_DIR = ${VMWARE_VM_NAME}
+
+# Base directory that the Virtual machine is contained in
+# /var/lib/vmware is the default on Linux
+VMWARE_VM_BASE_DIR = /var/lib/vmware/${VMWARE_VM_DIR}
+
+# Use ncat to read the unix pipe. Anything that can read the Unix Pipe
+# and output it's contents to stdout will work
+CONSOLE = /usr/bin/ncat -U ${VMWARE_VM_BASE_DIR}/${VMWARE_SERIAL_NAME}
+
+# Define what version of Workstation you are using
+# This is used by vmrun to use the appropriate appripriate pieces to
+# test this. In all likelihood you want 'ws' or 'player'
+# Valid options:
+# ws - Workstation (Windows or Linux host)
+# fusion - Fusion (Mac host)
+# player - Using VMware Player (Windows or Linux host)
+# Note: vmrun has to run directly on the host machine
+VMWARE_HOST_TYPE = ws
+
+# VMware provides `vmrun` to allow you to do certain things to the virtual machine
+# This should hard reset the VM and force a boot
+VMWARE_POWER_CYCLE = /usr/bin/vmrun -T ${VMWARE_HOST_TYPE} reset ${VMWARE_VM_BASE_DIR}/${VMWARE_VM_NAME}.kmx nogui
+
+#*************************************#
+# This part is the same as test.conf #
+#*************************************#
+
+# The include files will set up the type of test to run. Just set TEST to
+# which test you want to run.
+#
+# TESTS = patchcheck, randconfig, boot, test, config-bisect, bisect, min-config
+#
+# See the include/*.conf files that define these tests
+#
+TEST := patchcheck
+
+# Some tests may have more than one test to run. Define MULTI := 1 to run
+# the extra tests.
+MULTI := 0
+
+# In case you want to differentiate which type of system you are testing
+BITS := 64
+
+# REBOOT = none, error, fail, empty
+# See include/defaults.conf
+REBOOT := empty
+
+
+# The defaults file will set up various settings that can be used by all
+# machine configs.
+INCLUDE include/defaults.conf
+
+
+#*************************************#
+# Now we are different from test.conf #
+#*************************************#
+
+
+# The example here assumes that Guest is running a Fedora release
+# that uses dracut for its initfs. The POST_INSTALL will be executed
+# after the install of the kernel and modules are complete.
+#
+POST_INSTALL = ${SSH} /sbin/dracut -f /boot/initramfs-test.img $KERNEL_VERSION
+
+# Guests sometimes get stuck on reboot. We wait 3 seconds after running
+# the reboot command and then do a full power-cycle of the guest.
+# This forces the guest to restart.
+#
+POWERCYCLE_AFTER_REBOOT = 3
+
+# We do the same after the halt command, but this time we wait 20 seconds.
+POWEROFF_AFTER_HALT = 20
+
+
+# As the defaults.conf file has a POWER_CYCLE option already defined,
+# and options can not be defined in the same section more than once
+# (all DEFAULTS sections are considered the same). We use the
+# DEFAULTS OVERRIDE to tell ktest.pl to ignore the previous defined
+# options, for the options set in the OVERRIDE section.
+#
+DEFAULTS OVERRIDE
+
+# Instead of using the default POWER_CYCLE option defined in
+# defaults.conf, we use virsh to cycle it. To do so, we destroy
+# the guest, wait 5 seconds, and then start it up again.
+# Crude, but effective.
+#
+POWER_CYCLE = ${VMWARE_POWER_CYCLE}
+
+
+DEFAULTS
+
+# The following files each handle a different test case.
+# Having them included allows you to set up more than one machine and share
+# the same tests.
+INCLUDE include/patchcheck.conf
+INCLUDE include/tests.conf
+INCLUDE include/bisect.conf
+INCLUDE include/min-config.conf
#default opts
my %default = (
- "MAILER" => "sendmail", # default mailer
+ "MAILER" => "sendmail", # default mailer
"EMAIL_ON_ERROR" => 1,
"EMAIL_WHEN_FINISHED" => 1,
"EMAIL_WHEN_CANCELED" => 0,
"CLOSE_CONSOLE_SIGNAL" => "INT",
"TIMEOUT" => 120,
"TMP_DIR" => "/tmp/ktest/\${MACHINE}",
- "SLEEP_TIME" => 60, # sleep time between tests
+ "SLEEP_TIME" => 60, # sleep time between tests
"BUILD_NOCLEAN" => 0,
"REBOOT_ON_ERROR" => 0,
"POWEROFF_ON_ERROR" => 0,
"REBOOT_ON_SUCCESS" => 1,
"POWEROFF_ON_SUCCESS" => 0,
"BUILD_OPTIONS" => "",
- "BISECT_SLEEP_TIME" => 60, # sleep time between bisects
- "PATCHCHECK_SLEEP_TIME" => 60, # sleep time between patch checks
+ "BISECT_SLEEP_TIME" => 60, # sleep time between bisects
+ "PATCHCHECK_SLEEP_TIME" => 60, # sleep time between patch checks
"CLEAR_LOG" => 0,
"BISECT_MANUAL" => 0,
"BISECT_SKIP" => 1,
EOF
;
+# used with process_expression()
+my $d = 0;
+
+# defined before get_test_name()
+my $in_die = 0;
+
+# defined before process_warning_line()
+my $check_build_re = ".*:.*(warning|error|Error):.*";
+my $utf8_quote = "\\x{e2}\\x{80}(\\x{98}|\\x{99})";
+
+# defined before child_finished()
+my $child_done;
+
+# config_ignore holds the configs that were set (or unset) for
+# a good config and we will ignore these configs for the rest
+# of a config bisect. These configs stay as they were.
+my %config_ignore;
+
+# config_set holds what all configs were set as.
+my %config_set;
+
+# config_off holds the set of configs that the bad config had disabled.
+# We need to record them and set them in the .config when running
+# olddefconfig, because olddefconfig keeps the defaults.
+my %config_off;
+
+# config_off_tmp holds a set of configs to turn off for now
+my @config_off_tmp;
+
+# config_list is the set of configs that are being tested
+my %config_list;
+my %null_config;
+
+my %dependency;
+
+# found above run_config_bisect()
+my $pass = 1;
+
+# found above add_dep()
+
+my %depends;
+my %depcount;
+my $iflevel = 0;
+my @ifdeps;
+
+# prevent recursion
+my %read_kconfigs;
+
+# found above test_this_config()
+my %min_configs;
+my %keep_configs;
+my %save_configs;
+my %processed_configs;
+my %nochange_config;
+
+#
+# These are first defined here, main function later on
+#
+sub run_command;
+sub start_monitor;
+sub end_monitor;
+sub wait_for_monitor;
+
sub _logit {
if (defined($opt{"LOG_FILE"})) {
print LOG @_;
my $ans;
for (;;) {
- if ($cancel) {
+ if ($cancel) {
print "$prompt [y/n/C] ";
} else {
print "$prompt [Y/n] ";
# remove the space added in the beginning
$retval =~ s/ //;
- return "$retval"
+ return "$retval";
}
sub set_value {
defined($opt{$2});
}
-my $d = 0;
sub process_expression {
my ($name, $val) = @_;
$override = 0;
if ($type eq "TEST_START") {
-
if ($num_tests_set) {
die "$name: $.: Can not specify both NUM_TESTS and TEST_START\n";
}
$test_num = $old_test_num;
$repeat = $old_repeat;
}
-
} elsif (/^\s*ELSE\b(.*)$/) {
if (!$if) {
die "$name: $.: ELSE found with out matching IF section\n$_";
}
}
}
-
+
if ( ! -r $file ) {
die "$name: $.: Can't read file $file\n$_";
}
}
sub get_test_case {
- print "What test case would you like to run?\n";
- print " (build, install or boot)\n";
- print " Other tests are available but require editing ktest.conf\n";
- print " (see tools/testing/ktest/sample.conf)\n";
- my $ans = <STDIN>;
- chomp $ans;
- $default{"TEST_TYPE"} = $ans;
+ print "What test case would you like to run?\n";
+ print " (build, install or boot)\n";
+ print " Other tests are available but require editing ktest.conf\n";
+ print " (see tools/testing/ktest/sample.conf)\n";
+ my $ans = <STDIN>;
+ chomp $ans;
+ $default{"TEST_TYPE"} = $ans;
}
sub read_config {
return $option;
}
-sub run_command;
-sub start_monitor;
-sub end_monitor;
-sub wait_for_monitor;
-
sub reboot {
my ($time) = @_;
my $powercycle = 0;
($test_type eq "config_bisect" && $opt{"CONFIG_BISECT_TYPE[$i]"} eq "build");
}
-my $in_die = 0;
-
sub get_test_name() {
my $name;
}
sub dodie {
-
# avoid recursion
return if ($in_die);
$in_die = 1;
doprint "CRITICAL FAILURE... [TEST $i] ", @_, "\n";
if ($reboot_on_error && !do_not_reboot) {
-
doprint "REBOOTING\n";
reboot_to_good;
-
} elsif ($poweroff_on_error && defined($power_off)) {
doprint "POWERING OFF\n";
`$power_off`;
close O;
close L;
}
- send_email("KTEST: critical failure for test $i [$name]",
- "Your test started at $script_start_time has failed with:\n@_\n", $log_file);
+
+ send_email("KTEST: critical failure for test $i [$name]",
+ "Your test started at $script_start_time has failed with:\n@_\n", $log_file);
}
if ($monitor_cnt) {
- # restore terminal settings
- system("stty $stty_orig");
+ # restore terminal settings
+ system("stty $stty_orig");
}
if (defined($post_test)) {
}
sub save_logs {
- my ($result, $basedir) = @_;
- my @t = localtime;
- my $date = sprintf "%04d%02d%02d%02d%02d%02d",
- 1900+$t[5],$t[4],$t[3],$t[2],$t[1],$t[0];
+ my ($result, $basedir) = @_;
+ my @t = localtime;
+ my $date = sprintf "%04d%02d%02d%02d%02d%02d",
+ 1900+$t[5],$t[4],$t[3],$t[2],$t[1],$t[0];
- my $type = $build_type;
- if ($type =~ /useconfig/) {
- $type = "useconfig";
- }
+ my $type = $build_type;
+ if ($type =~ /useconfig/) {
+ $type = "useconfig";
+ }
- my $dir = "$machine-$test_type-$type-$result-$date";
+ my $dir = "$machine-$test_type-$type-$result-$date";
- $dir = "$basedir/$dir";
+ $dir = "$basedir/$dir";
- if (!-d $dir) {
- mkpath($dir) or
- dodie "can't create $dir";
- }
+ if (!-d $dir) {
+ mkpath($dir) or
+ dodie "can't create $dir";
+ }
- my %files = (
- "config" => $output_config,
- "buildlog" => $buildlog,
- "dmesg" => $dmesg,
- "testlog" => $testlog,
- );
+ my %files = (
+ "config" => $output_config,
+ "buildlog" => $buildlog,
+ "dmesg" => $dmesg,
+ "testlog" => $testlog,
+ );
- while (my ($name, $source) = each(%files)) {
- if (-f "$source") {
- cp "$source", "$dir/$name" or
- dodie "failed to copy $source";
- }
+ while (my ($name, $source) = each(%files)) {
+ if (-f "$source") {
+ cp "$source", "$dir/$name" or
+ dodie "failed to copy $source";
}
+ }
- doprint "*** Saved info to $dir ***\n";
+ doprint "*** Saved info to $dir ***\n";
}
sub fail {
- if ($die_on_failure) {
- dodie @_;
- }
+ if ($die_on_failure) {
+ dodie @_;
+ }
- doprint "FAILED\n";
+ doprint "FAILED\n";
- my $i = $iteration;
+ my $i = $iteration;
- # no need to reboot for just building.
- if (!do_not_reboot) {
- doprint "REBOOTING\n";
- reboot_to_good $sleep_time;
- }
+ # no need to reboot for just building.
+ if (!do_not_reboot) {
+ doprint "REBOOTING\n";
+ reboot_to_good $sleep_time;
+ }
- my $name = "";
+ my $name = "";
- if (defined($test_name)) {
- $name = " ($test_name)";
- }
+ if (defined($test_name)) {
+ $name = " ($test_name)";
+ }
- print_times;
+ print_times;
- doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
- doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
- doprint "KTEST RESULT: TEST $i$name Failed: ", @_, "\n";
- doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
- doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+ doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+ doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+ doprint "KTEST RESULT: TEST $i$name Failed: ", @_, "\n";
+ doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
+ doprint "%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%\n";
- if (defined($store_failures)) {
- save_logs "fail", $store_failures;
- }
+ if (defined($store_failures)) {
+ save_logs "fail", $store_failures;
+ }
- if (defined($post_test)) {
- run_command $post_test;
- }
+ if (defined($post_test)) {
+ run_command $post_test;
+ }
- return 1;
+ return 1;
}
sub run_command {
my ($command, $target, $skip) = @_;
return if (defined($grub_number) && defined($last_grub_menu) &&
- $last_grub_menu eq $grub_menu && defined($last_machine) &&
- $last_machine eq $machine);
+ $last_grub_menu eq $grub_menu && defined($last_machine) &&
+ $last_machine eq $machine);
doprint "Find $reboot_type menu ... ";
$grub_number = -1;
my $ssh_grub = $ssh_exec;
$ssh_grub =~ s,\$SSH_COMMAND,$command,g;
- open(IN, "$ssh_grub |")
- or dodie "unable to execute $command";
+ open(IN, "$ssh_grub |") or
+ dodie "unable to execute $command";
my $found = 0;
$target = '^menuentry.*' . $grub_menu_qt;
$skip = '^menuentry\s|^submenu\s';
} elsif ($reboot_type eq "grub2bls") {
- $command = $grub_bls_get;
- $target = '^title=.*' . $grub_menu_qt;
- $skip = '^title=';
+ $command = $grub_bls_get;
+ $target = '^title=.*' . $grub_menu_qt;
+ $skip = '^title=';
} else {
return;
}
_get_grub_index($command, $target, $skip);
}
-sub wait_for_input
-{
+sub wait_for_input {
my ($fp, $time) = @_;
my $start_time;
my $rin;
my $version_found = 0;
while (!$done) {
-
if ($bug && defined($stop_after_failure) &&
$stop_after_failure >= 0) {
my $time = $stop_after_failure - (time - $failure_start);
return monitor;
}
-my $check_build_re = ".*:.*(warning|error|Error):.*";
-my $utf8_quote = "\\x{e2}\\x{80}(\\x{98}|\\x{99})";
-
sub process_warning_line {
my ($line) = @_;
while (<IN>) {
if (/$check_build_re/) {
my $warning = process_warning_line $_;
-
+
$warnings_list{$warning} = 1;
}
}
run_command "mv $outputdir/config_temp $output_config" or
dodie "moving config_temp";
}
-
} elsif (!$noclean) {
unlink "$output_config";
run_command "$make mrproper" or
# Run old config regardless, to enforce min configurations
make_oldconfig;
+ if (not defined($build_options)){
+ $build_options = "";
+ }
my $build_ret = run_command "$make $build_options", $buildlog;
if (defined($post_build)) {
print_times;
- doprint "\n\n*******************************************\n";
- doprint "*******************************************\n";
- doprint "KTEST RESULT: TEST $i$name SUCCESS!!!! **\n";
- doprint "*******************************************\n";
- doprint "*******************************************\n";
+ doprint "\n\n";
+ doprint "*******************************************\n";
+ doprint "*******************************************\n";
+ doprint "KTEST RESULT: TEST $i$name SUCCESS!!!! **\n";
+ doprint "*******************************************\n";
+ doprint "*******************************************\n";
if (defined($store_successes)) {
- save_logs "success", $store_successes;
+ save_logs "success", $store_successes;
}
if ($i != $opt{"NUM_TESTS"} && !do_not_reboot) {
exit $run_command_status;
}
-my $child_done;
-
sub child_finished {
$child_done = 1;
}
}
if ($do_check) {
-
# get current HEAD
my $head = get_sha1("HEAD");
run_command "git bisect replay $replay" or
dodie "failed to run replay";
} else {
-
run_command "git bisect good $good" or
dodie "could not set bisect good to $good";
run_git_bisect "git bisect bad $bad" or
dodie "could not set bisect bad to $bad";
-
}
if (defined($start)) {
success $i;
}
-# config_ignore holds the configs that were set (or unset) for
-# a good config and we will ignore these configs for the rest
-# of a config bisect. These configs stay as they were.
-my %config_ignore;
-
-# config_set holds what all configs were set as.
-my %config_set;
-
-# config_off holds the set of configs that the bad config had disabled.
-# We need to record them and set them in the .config when running
-# olddefconfig, because olddefconfig keeps the defaults.
-my %config_off;
-
-# config_off_tmp holds a set of configs to turn off for now
-my @config_off_tmp;
-
-# config_list is the set of configs that are being tested
-my %config_list;
-my %null_config;
-
-my %dependency;
-
sub assign_configs {
my ($hash, $config) = @_;
doprint "Reading configs from $config\n";
- open (IN, $config)
- or dodie "Failed to read $config";
+ open (IN, $config) or
+ dodie "Failed to read $config";
while (<IN>) {
chomp;
doprint "***************************************\n\n";
}
-my $pass = 1;
-
sub run_config_bisect {
my ($good, $bad, $last_result) = @_;
my $reset = "";
$ret = run_config_bisect_test $config_bisect_type;
if ($ret) {
- doprint "NEW GOOD CONFIG ($pass)\n";
+ doprint "NEW GOOD CONFIG ($pass)\n";
system("cp $output_config $tmpdir/good_config.tmp.$pass");
$pass++;
# Return 3 for good config
return 3;
} else {
- doprint "NEW BAD CONFIG ($pass)\n";
+ doprint "NEW BAD CONFIG ($pass)\n";
system("cp $output_config $tmpdir/bad_config.tmp.$pass");
$pass++;
# Return 4 for bad config
if (!defined($config_bisect_exec)) {
# First check the location that ktest.pl ran
- my @locations = ( "$pwd/config-bisect.pl",
- "$dirname/config-bisect.pl",
- "$builddir/tools/testing/ktest/config-bisect.pl",
- undef );
+ my @locations = (
+ "$pwd/config-bisect.pl",
+ "$dirname/config-bisect.pl",
+ "$builddir/tools/testing/ktest/config-bisect.pl",
+ undef );
foreach my $loc (@locations) {
doprint "loc = $loc\n";
$config_bisect_exec = $loc;
} while ($ret == 3 || $ret == 4);
if ($ret == 2) {
- config_bisect_end "$good_config.tmp", "$bad_config.tmp";
+ config_bisect_end "$good_config.tmp", "$bad_config.tmp";
}
return $ret if ($ret < 0);
return 1;
}
-my %depends;
-my %depcount;
-my $iflevel = 0;
-my @ifdeps;
-
-# prevent recursion
-my %read_kconfigs;
-
sub add_dep {
# $config depends on $dep
my ($config, $dep) = @_;
my $cont = 0;
my $line;
-
if (! -f $kconfig) {
doprint "file $kconfig does not exist, skipping\n";
return;
sub read_depends {
# find out which arch this is by the kconfig file
- open (IN, $output_config)
- or dodie "Failed to read $output_config";
+ open (IN, $output_config) or
+ dodie "Failed to read $output_config";
my $arch;
while (<IN>) {
if (m,Linux/(\S+)\s+\S+\s+Kernel Configuration,) {
if (! -f $kconfig && $arch =~ /\d$/) {
my $orig = $arch;
- # some subarchs have numbers, truncate them
+ # some subarchs have numbers, truncate them
$arch =~ s/\d*$//;
$kconfig = "$builddir/arch/$arch/Kconfig";
if (! -f $kconfig) {
my @configs;
while ($dep =~ /[$valid]/) {
-
if ($dep =~ /^[^$valid]*([$valid]+)/) {
my $conf = "CONFIG_" . $1;
return @configs;
}
-my %min_configs;
-my %keep_configs;
-my %save_configs;
-my %processed_configs;
-my %nochange_config;
-
sub test_this_config {
my ($config) = @_;
foreach my $config (@config_keys) {
my $kconfig = chomp_config $config;
if (!defined $depcount{$kconfig}) {
- $depcount{$kconfig} = 0;
+ $depcount{$kconfig} = 0;
}
}
my $take_two = 0;
while (!$done) {
-
my $config;
my $found;
# Sort keys by who is most dependent on
@test_configs = sort { $depcount{chomp_config($b)} <=> $depcount{chomp_config($a)} }
- @test_configs ;
+ @test_configs ;
# Put configs that did not modify the config at the end.
my $reset = 1;
my $failed = 0;
build "oldconfig" or $failed = 1;
if (!$failed) {
- start_monitor_and_install or $failed = 1;
+ start_monitor_and_install or $failed = 1;
- if ($type eq "test" && !$failed) {
- do_run_test or $failed = 1;
- }
+ if ($type eq "test" && !$failed) {
+ do_run_test or $failed = 1;
+ }
- end_monitor;
+ end_monitor;
}
$in_bisect = 0;
# update new ignore configs
if (defined($ignore_config)) {
- open (OUT, ">$temp_config")
- or dodie "Can't write to $temp_config";
+ open (OUT, ">$temp_config") or
+ dodie "Can't write to $temp_config";
foreach my $config (keys %save_configs) {
print OUT "$save_configs{$config}\n";
}
}
# Save off all the current mandatory configs
- open (OUT, ">$temp_config")
- or dodie "Can't write to $temp_config";
+ open (OUT, ">$temp_config") or
+ dodie "Can't write to $temp_config";
foreach my $config (keys %keep_configs) {
print OUT "$keep_configs{$config}\n";
}
open(IN, $buildlog) or dodie "Can't open $buildlog";
while (<IN>) {
-
# Some compilers use UTF-8 extended for quotes
# for distcc heterogeneous systems, this causes issues
s/$utf8_quote/'/g;
success $i;
}
-$#ARGV < 1 or die "ktest.pl version: $VERSION\n usage: ktest.pl [config-file]\n";
-
-if ($#ARGV == 0) {
- $ktest_config = $ARGV[0];
- if (! -f $ktest_config) {
- print "$ktest_config does not exist.\n";
- if (!read_yn "Create it?") {
- exit 0;
- }
- }
-}
-
-if (! -f $ktest_config) {
- $newconfig = 1;
- get_test_case;
- open(OUT, ">$ktest_config") or die "Can not create $ktest_config";
- print OUT << "EOF"
-# Generated by ktest.pl
-#
-
-# PWD is a ktest.pl variable that will result in the process working
-# directory that ktest.pl is executed in.
-
-# THIS_DIR is automatically assigned the PWD of the path that generated
-# the config file. It is best to use this variable when assigning other
-# directory paths within this directory. This allows you to easily
-# move the test cases to other locations or to other machines.
-#
-THIS_DIR := $variable{"PWD"}
-
-# Define each test with TEST_START
-# The config options below it will override the defaults
-TEST_START
-TEST_TYPE = $default{"TEST_TYPE"}
-
-DEFAULTS
-EOF
-;
- close(OUT);
-}
-read_config $ktest_config;
-
-if (defined($opt{"LOG_FILE"})) {
- $opt{"LOG_FILE"} = eval_option("LOG_FILE", $opt{"LOG_FILE"}, -1);
-}
-
-# Append any configs entered in manually to the config file.
-my @new_configs = keys %entered_configs;
-if ($#new_configs >= 0) {
- print "\nAppending entered in configs to $ktest_config\n";
- open(OUT, ">>$ktest_config") or die "Can not append to $ktest_config";
- foreach my $config (@new_configs) {
- print OUT "$config = $entered_configs{$config}\n";
- $opt{$config} = process_variables($entered_configs{$config});
- }
-}
-
-if (defined($opt{"LOG_FILE"})) {
- if ($opt{"CLEAR_LOG"}) {
- unlink $opt{"LOG_FILE"};
- }
- open(LOG, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}";
- LOG->autoflush(1);
-}
-
-doprint "\n\nSTARTING AUTOMATED TESTS\n\n";
-
-for (my $i = 0, my $repeat = 1; $i <= $opt{"NUM_TESTS"}; $i += $repeat) {
-
- if (!$i) {
- doprint "DEFAULT OPTIONS:\n";
- } else {
- doprint "\nTEST $i OPTIONS";
- if (defined($repeat_tests{$i})) {
- $repeat = $repeat_tests{$i};
- doprint " ITERATE $repeat";
- }
- doprint "\n";
- }
-
- foreach my $option (sort keys %opt) {
-
- if ($option =~ /\[(\d+)\]$/) {
- next if ($i != $1);
- } else {
- next if ($i);
- }
-
- doprint "$option = $opt{$option}\n";
- }
-}
-
sub option_defined {
my ($option) = @_;
}
sub send_email {
-
if (defined($mailto)) {
if (!defined($mailer)) {
doprint "No email sent: email or mailer not specified in config.\n";
sub cancel_test {
if ($email_when_canceled) {
my $name = get_test_name;
- send_email("KTEST: Your [$name] test was cancelled",
- "Your test started at $script_start_time was cancelled: sig int");
+ send_email("KTEST: Your [$name] test was cancelled",
+ "Your test started at $script_start_time was cancelled: sig int");
}
die "\nCaught Sig Int, test interrupted: $!\n"
}
+$#ARGV < 1 or die "ktest.pl version: $VERSION\n usage: ktest.pl [config-file]\n";
+
+if ($#ARGV == 0) {
+ $ktest_config = $ARGV[0];
+ if (! -f $ktest_config) {
+ print "$ktest_config does not exist.\n";
+ if (!read_yn "Create it?") {
+ exit 0;
+ }
+ }
+}
+
+if (! -f $ktest_config) {
+ $newconfig = 1;
+ get_test_case;
+ open(OUT, ">$ktest_config") or die "Can not create $ktest_config";
+ print OUT << "EOF"
+# Generated by ktest.pl
+#
+
+# PWD is a ktest.pl variable that will result in the process working
+# directory that ktest.pl is executed in.
+
+# THIS_DIR is automatically assigned the PWD of the path that generated
+# the config file. It is best to use this variable when assigning other
+# directory paths within this directory. This allows you to easily
+# move the test cases to other locations or to other machines.
+#
+THIS_DIR := $variable{"PWD"}
+
+# Define each test with TEST_START
+# The config options below it will override the defaults
+TEST_START
+TEST_TYPE = $default{"TEST_TYPE"}
+
+DEFAULTS
+EOF
+;
+ close(OUT);
+}
+read_config $ktest_config;
+
+if (defined($opt{"LOG_FILE"})) {
+ $opt{"LOG_FILE"} = eval_option("LOG_FILE", $opt{"LOG_FILE"}, -1);
+}
+
+# Append any configs entered in manually to the config file.
+my @new_configs = keys %entered_configs;
+if ($#new_configs >= 0) {
+ print "\nAppending entered in configs to $ktest_config\n";
+ open(OUT, ">>$ktest_config") or die "Can not append to $ktest_config";
+ foreach my $config (@new_configs) {
+ print OUT "$config = $entered_configs{$config}\n";
+ $opt{$config} = process_variables($entered_configs{$config});
+ }
+}
+
+if (defined($opt{"LOG_FILE"})) {
+ if ($opt{"CLEAR_LOG"}) {
+ unlink $opt{"LOG_FILE"};
+ }
+ open(LOG, ">> $opt{LOG_FILE}") or die "Can't write to $opt{LOG_FILE}";
+ LOG->autoflush(1);
+}
+
+doprint "\n\nSTARTING AUTOMATED TESTS\n\n";
+
+for (my $i = 0, my $repeat = 1; $i <= $opt{"NUM_TESTS"}; $i += $repeat) {
+
+ if (!$i) {
+ doprint "DEFAULT OPTIONS:\n";
+ } else {
+ doprint "\nTEST $i OPTIONS";
+ if (defined($repeat_tests{$i})) {
+ $repeat = $repeat_tests{$i};
+ doprint " ITERATE $repeat";
+ }
+ doprint "\n";
+ }
+
+ foreach my $option (sort keys %opt) {
+ if ($option =~ /\[(\d+)\]$/) {
+ next if ($i != $1);
+ } else {
+ next if ($i);
+ }
+
+ doprint "$option = $opt{$option}\n";
+ }
+}
+
$SIG{INT} = qw(cancel_test);
# First we need to do is the builds
# The first test may override the PRE_KTEST option
if ($i == 1) {
- if (defined($pre_ktest)) {
- doprint "\n";
- run_command $pre_ktest;
- }
- if ($email_when_started) {
+ if (defined($pre_ktest)) {
+ doprint "\n";
+ run_command $pre_ktest;
+ }
+ if ($email_when_started) {
my $name = get_test_name;
- send_email("KTEST: Your [$name] test was started",
- "Your test was started on $script_start_time");
- }
+ send_email("KTEST: Your [$name] test was started",
+ "Your test was started on $script_start_time");
+ }
}
# Any test can override the POST_KTEST option
my $ret = run_command $pre_test;
if (!$ret && defined($pre_test_die) &&
$pre_test_die) {
- dodie "failed to pre_test\n";
+ dodie "failed to pre_test\n";
}
}
run_command $switch_to_good;
}
-
doprint "\n $successes of $opt{NUM_TESTS} tests were successful\n\n";
if ($email_when_finished) {
send_email("KTEST: Your test has finished!",
- "$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!");
+ "$successes of $opt{NUM_TESTS} tests started at $script_start_time were successful!");
}
if (defined($opt{"LOG_FILE"})) {
}
exit 0;
+
+##
+# The following are here to standardize tabs/spaces/etc across the most likely editors
+###
+
+# Local Variables:
+# mode: perl
+# End:
+# vim: softtabstop=4
if (!ASSERT_OK_PTR(skel, "skel_open"))
return;
+ skel->bss->pid = getpid();
+
if (!ASSERT_OK(test_snprintf__attach(skel), "skel_attach"))
goto cleanup;
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+__u32 pid = 0;
+
char num_out[64] = {};
long num_ret = 0;
static const char str1[] = "str1";
static const char longstr[] = "longstr";
+ if ((int)bpf_get_current_pid_tgid() != pid)
+ return 0;
+
/* Integer types */
num_ret = BPF_SNPRINTF(num_out, sizeof(num_out),
"%d %u %x %li %llu %lX",
// SPDX-License-Identifier: GPL-2.0-only
/*
- * Copyright (C) 2020 Hisilicon Limited.
+ * Copyright (C) 2020 HiSilicon Limited.
*/
#include <fcntl.h>
__u32 dma_bits; /* DMA addressing capability */
__u32 dma_dir; /* DMA data direction */
__u32 dma_trans_ns; /* time for DMA transmission in ns */
- __u8 expansion[80]; /* For future use */
+ __u32 granule; /* how many PAGE_SIZE will do map/unmap once a time */
+ __u8 expansion[76]; /* For future use */
};
int main(int argc, char **argv)
int threads = 1, seconds = 20, node = -1;
/* default dma mask 32bit, bidirectional DMA */
int bits = 32, xdelay = 0, dir = DMA_MAP_BIDIRECTIONAL;
+ /* default granule 1 PAGESIZE */
+ int granule = 1;
int cmd = DMA_MAP_BENCHMARK;
char *p;
- while ((opt = getopt(argc, argv, "t:s:n:b:d:x:")) != -1) {
+ while ((opt = getopt(argc, argv, "t:s:n:b:d:x:g:")) != -1) {
switch (opt) {
case 't':
threads = atoi(optarg);
case 'x':
xdelay = atoi(optarg);
break;
+ case 'g':
+ granule = atoi(optarg);
+ break;
default:
return -1;
}
exit(1);
}
+ if (granule < 1 || granule > 1024) {
+ fprintf(stderr, "invalid granule size\n");
+ exit(1);
+ }
+
fd = open("/sys/kernel/debug/dma_map_benchmark", O_RDWR);
if (fd == -1) {
perror("open");
map.dma_bits = bits;
map.dma_dir = dir;
map.dma_trans_ns = xdelay;
+ map.granule = granule;
if (ioctl(fd, cmd, &map)) {
perror("ioctl");
exit(1);
}
- printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s\n",
- threads, seconds, node, dir[directions]);
+ printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s granule: %d\n",
+ threads, seconds, node, dir[directions], granule);
printf("average map latency(us):%.1f standard deviation:%.1f\n",
map.avg_map_100ns/10.0, map.map_stddev/10.0);
printf("average unmap latency(us):%.1f standard deviation:%.1f\n",
close(sfd[1]);
}
+/*
+ * t0 t1
+ * (ew) \ / (ew)
+ * e0
+ * | (lt)
+ * s0
+ */
+TEST(epoll64)
+{
+ pthread_t waiter[2];
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ /*
+ * main will act as the emitter once both waiter threads are
+ * blocked and expects to both be awoken upon the ready event.
+ */
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&waiter[0], NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&waiter[1], NULL, waiter_entry1a, &ctx), 0);
+
+ usleep(100000);
+ ASSERT_EQ(write(ctx.sfd[1], "w", 1), 1);
+
+ ASSERT_EQ(pthread_join(waiter[0], NULL), 0);
+ ASSERT_EQ(pthread_join(waiter[1], NULL), 0);
+
+ EXPECT_EQ(ctx.count, 2);
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
TEST_HARNESS_MAIN
*/
#include <stdlib.h>
-#include <assert.h>
struct range {
sparsebit_idx_t first, last;
#include <sys/mman.h>
#include <string.h>
#include <fcntl.h>
-#include <string.h>
#include "../kselftest.h"
#include "../kselftest_harness.h"
ip netns exec ${disabled_ns} sysctl -q net.mptcp.enabled=0
local err=0
- LANG=C ip netns exec ${disabled_ns} ./mptcp_connect -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \
+ LC_ALL=C ip netns exec ${disabled_ns} ./mptcp_connect -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \
grep -q "^socket: Protocol not available$" && err=1
ip netns delete ${disabled_ns}
#include <sched.h>
#include <time.h>
#include <stdarg.h>
-#include <sched.h>
#include <pthread.h>
#include <signal.h>
#include <sys/prctl.h>
TEST_GEN_PROGS += proc-self-map-files-002
TEST_GEN_PROGS += proc-self-syscall
TEST_GEN_PROGS += proc-self-wchan
+TEST_GEN_PROGS += proc-subset-pid
TEST_GEN_PROGS += proc-uptime-001
TEST_GEN_PROGS += proc-uptime-002
TEST_GEN_PROGS += read
--- /dev/null
+/*
+ * Copyright (c) 2021 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/*
+ * Test that "mount -t proc -o subset=pid" hides everything but pids,
+ * /proc/self and /proc/thread-self.
+ */
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <stdio.h>
+
+static inline bool streq(const char *a, const char *b)
+{
+ return strcmp(a, b) == 0;
+}
+
+static void make_private_proc(void)
+{
+ if (unshare(CLONE_NEWNS) == -1) {
+ if (errno == ENOSYS || errno == EPERM) {
+ exit(4);
+ }
+ exit(1);
+ }
+ if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) {
+ exit(1);
+ }
+ if (mount(NULL, "/proc", "proc", 0, "subset=pid") == -1) {
+ exit(1);
+ }
+}
+
+static bool string_is_pid(const char *s)
+{
+ while (1) {
+ switch (*s++) {
+ case '0':case '1':case '2':case '3':case '4':
+ case '5':case '6':case '7':case '8':case '9':
+ continue;
+
+ case '\0':
+ return true;
+
+ default:
+ return false;
+ }
+ }
+}
+
+int main(void)
+{
+ make_private_proc();
+
+ DIR *d = opendir("/proc");
+ assert(d);
+
+ struct dirent *de;
+
+ bool dot = false;
+ bool dot_dot = false;
+ bool self = false;
+ bool thread_self = false;
+
+ while ((de = readdir(d))) {
+ if (streq(de->d_name, ".")) {
+ assert(!dot);
+ dot = true;
+ assert(de->d_type == DT_DIR);
+ } else if (streq(de->d_name, "..")) {
+ assert(!dot_dot);
+ dot_dot = true;
+ assert(de->d_type == DT_DIR);
+ } else if (streq(de->d_name, "self")) {
+ assert(!self);
+ self = true;
+ assert(de->d_type == DT_LNK);
+ } else if (streq(de->d_name, "thread-self")) {
+ assert(!thread_self);
+ thread_self = true;
+ assert(de->d_type == DT_LNK);
+ } else {
+ if (!string_is_pid(de->d_name)) {
+ fprintf(stderr, "d_name '%s'\n", de->d_name);
+ assert(0);
+ }
+ assert(de->d_type == DT_DIR);
+ }
+ }
+
+ char c;
+ int rv = readlink("/proc/cpuinfo", &c, 1);
+ assert(rv == -1 && errno == ENOENT);
+
+ int fd = open("/proc/cpuinfo", O_RDONLY);
+ assert(fd == -1 && errno == ENOENT);
+
+ return 0;
+}
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
// Test
-// 1) read of every file in /proc
+// 1) read and lseek on every file in /proc
// 2) readlink of every symlink in /proc
// 3) recursively (1) + (2) for every directory in /proc
// 4) write to /proc/*/clear_refs and /proc/*/task/*/clear_refs
fd = openat(dirfd(d), filename, O_RDONLY|O_NONBLOCK);
if (fd == -1)
return;
+ /* struct proc_ops::proc_lseek is mandatory if file is seekable. */
+ (void)lseek(fd, 0, SEEK_SET);
rv = read(fd, buf, sizeof(buf));
assert((0 <= rv && rv <= sizeof(buf)) || rv == -1);
close(fd);
write_to_hugetlbfs
hmm-tests
local_config.*
+split_huge_page_test
TEST_GEN_FILES += thuge-gen
TEST_GEN_FILES += transhuge-stress
TEST_GEN_FILES += userfaultfd
+TEST_GEN_FILES += split_huge_page_test
ifeq ($(MACHINE),x86_64)
CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32)
/* Just the flags we need, copied from mm.h: */
#define FOLL_WRITE 0x01 /* check pte is writable */
+#define FOLL_TOUCH 0x02 /* mark page accessed */
static char *cmd_to_str(unsigned long cmd)
{
{
struct gup_test gup = { 0 };
unsigned long size = 128 * MB;
- int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0;
+ int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 1;
unsigned long cmd = GUP_FAST_BENCHMARK;
- int flags = MAP_PRIVATE;
+ int flags = MAP_PRIVATE, touch = 0;
char *file = "/dev/zero";
char *p;
- while ((opt = getopt(argc, argv, "m:r:n:F:f:abctTLUuwSH")) != -1) {
+ while ((opt = getopt(argc, argv, "m:r:n:F:f:abctTLUuwWSHpz")) != -1) {
switch (opt) {
case 'a':
cmd = PIN_FAST_BENCHMARK;
*/
gup.which_pages[0] = 1;
break;
+ case 'p':
+ /* works only with DUMP_USER_PAGES_TEST */
+ gup.test_flags |= GUP_TEST_FLAG_DUMP_PAGES_USE_PIN;
+ break;
case 'F':
/* strtol, so you can pass flags in hex form */
- gup.flags = strtol(optarg, 0, 0);
+ gup.gup_flags = strtol(optarg, 0, 0);
break;
case 'm':
size = atoi(optarg) * MB;
case 'w':
write = 1;
break;
+ case 'W':
+ write = 0;
+ break;
case 'f':
file = optarg;
break;
case 'H':
flags |= (MAP_HUGETLB | MAP_ANONYMOUS);
break;
+ case 'z':
+ /* fault pages in gup, do not fault in userland */
+ touch = 1;
+ break;
default:
return -1;
}
gup.nr_pages_per_call = nr_pages;
if (write)
- gup.flags |= FOLL_WRITE;
+ gup.gup_flags |= FOLL_WRITE;
fd = open("/sys/kernel/debug/gup_test", O_RDWR);
if (fd == -1) {
else if (thp == 0)
madvise(p, size, MADV_NOHUGEPAGE);
- for (; (unsigned long)p < gup.addr + size; p += PAGE_SIZE)
- p[0] = 0;
+ /*
+ * FOLL_TOUCH, in gup_test, is used as an either/or case: either
+ * fault pages in from the kernel via FOLL_TOUCH, or fault them
+ * in here, from user space. This allows comparison of performance
+ * between those two cases.
+ */
+ if (touch) {
+ gup.gup_flags |= FOLL_TOUCH;
+ } else {
+ for (; (unsigned long)p < gup.addr + size; p += PAGE_SIZE)
+ p[0] = 0;
+ }
/* Only report timing information on the *_BENCHMARK commands: */
if ((cmd == PIN_FAST_BENCHMARK) || (cmd == GUP_FAST_BENCHMARK) ||
--- /dev/null
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A test of splitting PMD THPs and PTE-mapped THPs from a specified virtual
+ * address range in a process via <debugfs>/split_huge_pages interface.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/mount.h>
+#include <malloc.h>
+#include <stdbool.h>
+
+uint64_t pagesize;
+unsigned int pageshift;
+uint64_t pmd_pagesize;
+
+#define PMD_SIZE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
+#define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages"
+#define SMAP_PATH "/proc/self/smaps"
+#define INPUT_MAX 80
+
+#define PID_FMT "%d,0x%lx,0x%lx"
+#define PATH_FMT "%s,0x%lx,0x%lx"
+
+#define PFN_MASK ((1UL<<55)-1)
+#define KPF_THP (1UL<<22)
+
+int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file)
+{
+ uint64_t paddr;
+ uint64_t page_flags;
+
+ if (pagemap_file) {
+ pread(pagemap_file, &paddr, sizeof(paddr),
+ ((long)vaddr >> pageshift) * sizeof(paddr));
+
+ if (kpageflags_file) {
+ pread(kpageflags_file, &page_flags, sizeof(page_flags),
+ (paddr & PFN_MASK) * sizeof(page_flags));
+
+ return !!(page_flags & KPF_THP);
+ }
+ }
+ return 0;
+}
+
+
+static uint64_t read_pmd_pagesize(void)
+{
+ int fd;
+ char buf[20];
+ ssize_t num_read;
+
+ fd = open(PMD_SIZE_PATH, O_RDONLY);
+ if (fd == -1) {
+ perror("Open hpage_pmd_size failed");
+ exit(EXIT_FAILURE);
+ }
+ num_read = read(fd, buf, 19);
+ if (num_read < 1) {
+ close(fd);
+ perror("Read hpage_pmd_size failed");
+ exit(EXIT_FAILURE);
+ }
+ buf[num_read] = '\0';
+ close(fd);
+
+ return strtoul(buf, NULL, 10);
+}
+
+static int write_file(const char *path, const char *buf, size_t buflen)
+{
+ int fd;
+ ssize_t numwritten;
+
+ fd = open(path, O_WRONLY);
+ if (fd == -1)
+ return 0;
+
+ numwritten = write(fd, buf, buflen - 1);
+ close(fd);
+ if (numwritten < 1)
+ return 0;
+
+ return (unsigned int) numwritten;
+}
+
+static void write_debugfs(const char *fmt, ...)
+{
+ char input[INPUT_MAX];
+ int ret;
+ va_list argp;
+
+ va_start(argp, fmt);
+ ret = vsnprintf(input, INPUT_MAX, fmt, argp);
+ va_end(argp);
+
+ if (ret >= INPUT_MAX) {
+ printf("%s: Debugfs input is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ if (!write_file(SPLIT_DEBUGFS, input, ret + 1)) {
+ perror(SPLIT_DEBUGFS);
+ exit(EXIT_FAILURE);
+ }
+}
+
+#define MAX_LINE_LENGTH 500
+
+static bool check_for_pattern(FILE *fp, const char *pattern, char *buf)
+{
+ while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) {
+ if (!strncmp(buf, pattern, strlen(pattern)))
+ return true;
+ }
+ return false;
+}
+
+static uint64_t check_huge(void *addr)
+{
+ uint64_t thp = 0;
+ int ret;
+ FILE *fp;
+ char buffer[MAX_LINE_LENGTH];
+ char addr_pattern[MAX_LINE_LENGTH];
+
+ ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
+ (unsigned long) addr);
+ if (ret >= MAX_LINE_LENGTH) {
+ printf("%s: Pattern is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+
+ fp = fopen(SMAP_PATH, "r");
+ if (!fp) {
+ printf("%s: Failed to open file %s\n", __func__, SMAP_PATH);
+ exit(EXIT_FAILURE);
+ }
+ if (!check_for_pattern(fp, addr_pattern, buffer))
+ goto err_out;
+
+ /*
+ * Fetch the AnonHugePages: in the same block and check the number of
+ * hugepages.
+ */
+ if (!check_for_pattern(fp, "AnonHugePages:", buffer))
+ goto err_out;
+
+ if (sscanf(buffer, "AnonHugePages:%10ld kB", &thp) != 1) {
+ printf("Reading smap error\n");
+ exit(EXIT_FAILURE);
+ }
+
+err_out:
+ fclose(fp);
+ return thp;
+}
+
+void split_pmd_thp(void)
+{
+ char *one_page;
+ size_t len = 4 * pmd_pagesize;
+ uint64_t thp_size;
+ size_t i;
+
+ one_page = memalign(pmd_pagesize, len);
+
+ if (!one_page) {
+ printf("Fail to allocate memory\n");
+ exit(EXIT_FAILURE);
+ }
+
+ madvise(one_page, len, MADV_HUGEPAGE);
+
+ for (i = 0; i < len; i++)
+ one_page[i] = (char)i;
+
+ thp_size = check_huge(one_page);
+ if (!thp_size) {
+ printf("No THP is allocated\n");
+ exit(EXIT_FAILURE);
+ }
+
+ /* split all THPs */
+ write_debugfs(PID_FMT, getpid(), (uint64_t)one_page,
+ (uint64_t)one_page + len);
+
+ for (i = 0; i < len; i++)
+ if (one_page[i] != (char)i) {
+ printf("%ld byte corrupted\n", i);
+ exit(EXIT_FAILURE);
+ }
+
+
+ thp_size = check_huge(one_page);
+ if (thp_size) {
+ printf("Still %ld kB AnonHugePages not split\n", thp_size);
+ exit(EXIT_FAILURE);
+ }
+
+ printf("Split huge pages successful\n");
+ free(one_page);
+}
+
+void split_pte_mapped_thp(void)
+{
+ char *one_page, *pte_mapped, *pte_mapped2;
+ size_t len = 4 * pmd_pagesize;
+ uint64_t thp_size;
+ size_t i;
+ const char *pagemap_template = "/proc/%d/pagemap";
+ const char *kpageflags_proc = "/proc/kpageflags";
+ char pagemap_proc[255];
+ int pagemap_fd;
+ int kpageflags_fd;
+
+ if (snprintf(pagemap_proc, 255, pagemap_template, getpid()) < 0) {
+ perror("get pagemap proc error");
+ exit(EXIT_FAILURE);
+ }
+ pagemap_fd = open(pagemap_proc, O_RDONLY);
+
+ if (pagemap_fd == -1) {
+ perror("read pagemap:");
+ exit(EXIT_FAILURE);
+ }
+
+ kpageflags_fd = open(kpageflags_proc, O_RDONLY);
+
+ if (kpageflags_fd == -1) {
+ perror("read kpageflags:");
+ exit(EXIT_FAILURE);
+ }
+
+ one_page = mmap((void *)(1UL << 30), len, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+
+ madvise(one_page, len, MADV_HUGEPAGE);
+
+ for (i = 0; i < len; i++)
+ one_page[i] = (char)i;
+
+ thp_size = check_huge(one_page);
+ if (!thp_size) {
+ printf("No THP is allocated\n");
+ exit(EXIT_FAILURE);
+ }
+
+ /* remap the first pagesize of first THP */
+ pte_mapped = mremap(one_page, pagesize, pagesize, MREMAP_MAYMOVE);
+
+ /* remap the Nth pagesize of Nth THP */
+ for (i = 1; i < 4; i++) {
+ pte_mapped2 = mremap(one_page + pmd_pagesize * i + pagesize * i,
+ pagesize, pagesize,
+ MREMAP_MAYMOVE|MREMAP_FIXED,
+ pte_mapped + pagesize * i);
+ if (pte_mapped2 == (char *)-1) {
+ perror("mremap failed");
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ /* smap does not show THPs after mremap, use kpageflags instead */
+ thp_size = 0;
+ for (i = 0; i < pagesize * 4; i++)
+ if (i % pagesize == 0 &&
+ is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd))
+ thp_size++;
+
+ if (thp_size != 4) {
+ printf("Some THPs are missing during mremap\n");
+ exit(EXIT_FAILURE);
+ }
+
+ /* split all remapped THPs */
+ write_debugfs(PID_FMT, getpid(), (uint64_t)pte_mapped,
+ (uint64_t)pte_mapped + pagesize * 4);
+
+ /* smap does not show THPs after mremap, use kpageflags instead */
+ thp_size = 0;
+ for (i = 0; i < pagesize * 4; i++) {
+ if (pte_mapped[i] != (char)i) {
+ printf("%ld byte corrupted\n", i);
+ exit(EXIT_FAILURE);
+ }
+ if (i % pagesize == 0 &&
+ is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd))
+ thp_size++;
+ }
+
+ if (thp_size) {
+ printf("Still %ld THPs not split\n", thp_size);
+ exit(EXIT_FAILURE);
+ }
+
+ printf("Split PTE-mapped huge pages successful\n");
+ munmap(one_page, len);
+ close(pagemap_fd);
+ close(kpageflags_fd);
+}
+
+void split_file_backed_thp(void)
+{
+ int status;
+ int fd;
+ ssize_t num_written;
+ char tmpfs_template[] = "/tmp/thp_split_XXXXXX";
+ const char *tmpfs_loc = mkdtemp(tmpfs_template);
+ char testfile[INPUT_MAX];
+ uint64_t pgoff_start = 0, pgoff_end = 1024;
+
+ printf("Please enable pr_debug in split_huge_pages_in_file() if you need more info.\n");
+
+ status = mount("tmpfs", tmpfs_loc, "tmpfs", 0, "huge=always,size=4m");
+
+ if (status) {
+ printf("Unable to create a tmpfs for testing\n");
+ exit(EXIT_FAILURE);
+ }
+
+ status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc);
+ if (status >= INPUT_MAX) {
+ printf("Fail to create file-backed THP split testing file\n");
+ goto cleanup;
+ }
+
+ fd = open(testfile, O_CREAT|O_WRONLY);
+ if (fd == -1) {
+ perror("Cannot open testing file\n");
+ goto cleanup;
+ }
+
+ /* write something to the file, so a file-backed THP can be allocated */
+ num_written = write(fd, tmpfs_loc, sizeof(tmpfs_loc));
+ close(fd);
+
+ if (num_written < 1) {
+ printf("Fail to write data to testing file\n");
+ goto cleanup;
+ }
+
+ /* split the file-backed THP */
+ write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end);
+
+ status = unlink(testfile);
+ if (status)
+ perror("Cannot remove testing file\n");
+
+cleanup:
+ status = umount(tmpfs_loc);
+ if (status) {
+ printf("Unable to umount %s\n", tmpfs_loc);
+ exit(EXIT_FAILURE);
+ }
+ status = rmdir(tmpfs_loc);
+ if (status) {
+ perror("cannot remove tmp dir");
+ exit(EXIT_FAILURE);
+ }
+
+ printf("file-backed THP split test done, please check dmesg for more information\n");
+}
+
+int main(int argc, char **argv)
+{
+ if (geteuid() != 0) {
+ printf("Please run the benchmark as root\n");
+ exit(EXIT_FAILURE);
+ }
+
+ pagesize = getpagesize();
+ pageshift = ffs(pagesize) - 1;
+ pmd_pagesize = read_pmd_pagesize();
+
+ split_pmd_thp();
+ split_pte_mapped_thp();
+ split_file_backed_thp();
+
+ return 0;
+}
static volatile bool test_uffdio_zeropage_eexist = true;
/* Whether to test uffd write-protection */
static bool test_uffdio_wp = false;
+/* Whether to test uffd minor faults */
+static bool test_uffdio_minor = false;
static bool map_shared;
static int huge_fd;
int cpu;
unsigned long missing_faults;
unsigned long wp_faults;
+ unsigned long minor_faults;
};
/* pthread_mutex_t starts at page offset 0 */
uffd_stats[i].cpu = i;
uffd_stats[i].missing_faults = 0;
uffd_stats[i].wp_faults = 0;
+ uffd_stats[i].minor_faults = 0;
}
}
static void uffd_stats_report(struct uffd_stats *stats, int n_cpus)
{
int i;
- unsigned long long miss_total = 0, wp_total = 0;
+ unsigned long long miss_total = 0, wp_total = 0, minor_total = 0;
for (i = 0; i < n_cpus; i++) {
miss_total += stats[i].missing_faults;
wp_total += stats[i].wp_faults;
+ minor_total += stats[i].minor_faults;
}
printf("userfaults: %llu missing (", miss_total);
printf("\b), %llu wp (", wp_total);
for (i = 0; i < n_cpus; i++)
printf("%lu+", stats[i].wp_faults);
+ printf("\b), %llu minor (", minor_total);
+ for (i = 0; i < n_cpus; i++)
+ printf("%lu+", stats[i].minor_faults);
printf("\b)\n");
}
};
static struct uffd_test_ops hugetlb_uffd_test_ops = {
- .expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC,
+ .expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC & ~(1 << _UFFDIO_CONTINUE),
.allocate_area = hugetlb_allocate_area,
.release_pages = hugetlb_release_pages,
.alias_mapping = hugetlb_alias_mapping,
}
}
+static void continue_range(int ufd, __u64 start, __u64 len)
+{
+ struct uffdio_continue req;
+
+ req.range.start = start;
+ req.range.len = len;
+ req.mode = 0;
+
+ if (ioctl(ufd, UFFDIO_CONTINUE, &req)) {
+ fprintf(stderr,
+ "UFFDIO_CONTINUE failed for address 0x%" PRIx64 "\n",
+ (uint64_t)start);
+ exit(1);
+ }
+}
+
static void *locking_thread(void *arg)
{
unsigned long cpu = (unsigned long) arg;
}
if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) {
+ /* Write protect page faults */
wp_range(uffd, msg->arg.pagefault.address, page_size, false);
stats->wp_faults++;
+ } else if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) {
+ uint8_t *area;
+ int b;
+
+ /*
+ * Minor page faults
+ *
+ * To prove we can modify the original range for testing
+ * purposes, we're going to bit flip this range before
+ * continuing.
+ *
+ * Note that this requires all minor page fault tests operate on
+ * area_dst (non-UFFD-registered) and area_dst_alias
+ * (UFFD-registered).
+ */
+
+ area = (uint8_t *)(area_dst +
+ ((char *)msg->arg.pagefault.address -
+ area_dst_alias));
+ for (b = 0; b < page_size; ++b)
+ area[b] = ~area[b];
+ continue_range(uffd, msg->arg.pagefault.address, page_size);
+ stats->minor_faults++;
} else {
/* Missing page faults */
if (bounces & BOUNCE_VERIFY &&
return 0;
}
-static int userfaultfd_open(int features)
+static int userfaultfd_open_ext(uint64_t *features)
{
struct uffdio_api uffdio_api;
uffd_flags = fcntl(uffd, F_GETFD, NULL);
uffdio_api.api = UFFD_API;
- uffdio_api.features = features;
+ uffdio_api.features = *features;
if (ioctl(uffd, UFFDIO_API, &uffdio_api)) {
fprintf(stderr, "UFFDIO_API failed.\nPlease make sure to "
"run with either root or ptrace capability.\n");
return 1;
}
+ *features = uffdio_api.features;
return 0;
}
+static int userfaultfd_open(uint64_t features)
+{
+ return userfaultfd_open_ext(&features);
+}
+
sigjmp_buf jbuf, *sigbuf;
static void sighndl(int sig, siginfo_t *siginfo, void *ptr)
}
if (!pid)
- return faulting_process(0);
+ exit(faulting_process(0));
waitpid(pid, &err, 0);
if (err) {
return userfaults != 0;
}
+static int userfaultfd_minor_test(void)
+{
+ struct uffdio_register uffdio_register;
+ unsigned long expected_ioctls;
+ unsigned long p;
+ pthread_t uffd_mon;
+ uint8_t expected_byte;
+ void *expected_page;
+ char c;
+ struct uffd_stats stats = { 0 };
+ uint64_t features = UFFD_FEATURE_MINOR_HUGETLBFS;
+
+ if (!test_uffdio_minor)
+ return 0;
+
+ printf("testing minor faults: ");
+ fflush(stdout);
+
+ if (uffd_test_ops->release_pages(area_dst))
+ return 1;
+
+ if (userfaultfd_open_ext(&features))
+ return 1;
+ /* If kernel reports the feature isn't supported, skip the test. */
+ if (!(features & UFFD_FEATURE_MINOR_HUGETLBFS)) {
+ printf("skipping test due to lack of feature support\n");
+ fflush(stdout);
+ return 0;
+ }
+
+ uffdio_register.range.start = (unsigned long)area_dst_alias;
+ uffdio_register.range.len = nr_pages * page_size;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_MINOR;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+ fprintf(stderr, "register failure\n");
+ exit(1);
+ }
+
+ expected_ioctls = uffd_test_ops->expected_ioctls;
+ expected_ioctls |= 1 << _UFFDIO_CONTINUE;
+ if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) {
+ fprintf(stderr, "unexpected missing ioctl(s)\n");
+ exit(1);
+ }
+
+ /*
+ * After registering with UFFD, populate the non-UFFD-registered side of
+ * the shared mapping. This should *not* trigger any UFFD minor faults.
+ */
+ for (p = 0; p < nr_pages; ++p) {
+ memset(area_dst + (p * page_size), p % ((uint8_t)-1),
+ page_size);
+ }
+
+ if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) {
+ perror("uffd_poll_thread create");
+ exit(1);
+ }
+
+ /*
+ * Read each of the pages back using the UFFD-registered mapping. We
+ * expect that the first time we touch a page, it will result in a minor
+ * fault. uffd_poll_thread will resolve the fault by bit-flipping the
+ * page's contents, and then issuing a CONTINUE ioctl.
+ */
+
+ if (posix_memalign(&expected_page, page_size, page_size)) {
+ fprintf(stderr, "out of memory\n");
+ return 1;
+ }
+
+ for (p = 0; p < nr_pages; ++p) {
+ expected_byte = ~((uint8_t)(p % ((uint8_t)-1)));
+ memset(expected_page, expected_byte, page_size);
+ if (my_bcmp(expected_page, area_dst_alias + (p * page_size),
+ page_size)) {
+ fprintf(stderr,
+ "unexpected page contents after minor fault\n");
+ exit(1);
+ }
+ }
+
+ if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) {
+ perror("pipe write");
+ exit(1);
+ }
+ if (pthread_join(uffd_mon, NULL))
+ return 1;
+
+ close(uffd);
+
+ uffd_stats_report(&stats, 1);
+
+ return stats.missing_faults != 0 || stats.minor_faults != nr_pages;
+}
+
static int userfaultfd_stress(void)
{
void *area;
close(uffd);
return userfaultfd_zeropage_test() || userfaultfd_sig_test()
- || userfaultfd_events_test();
+ || userfaultfd_events_test() || userfaultfd_minor_test();
}
/*
map_shared = true;
test_type = TEST_HUGETLB;
uffd_test_ops = &hugetlb_uffd_test_ops;
+ /* Minor faults require shared hugetlb; only enable here. */
+ test_uffdio_minor = true;
} else if (!strcmp(type, "shmem")) {
map_shared = true;
test_type = TEST_SHMEM;
echo ''
done
done
-
-# vim: sw=4
# SPDX-License-Identifier: GPL-2.0-only
-gen_init_cpio
-initramfs_data.cpio
+/gen_init_cpio
+/initramfs_data.cpio
/initramfs_inc_data
header "$1"
srcdir=$(echo "$1" | sed -e 's://*:/:g')
- dirlist=$(find "${srcdir}" -printf "%p %m %U %G\n" | LANG=C sort)
+ dirlist=$(find "${srcdir}" -printf "%p %m %U %G\n" | LC_ALL=C sort)
# If $dirlist is only one line, then the directory is empty
if [ "$(echo "${dirlist}" | wc -l)" -gt 1 ]; then
# SPDX-License-Identifier: GPL-2.0-only
-*
-!.gitignore
-!Makefile
+/*/