Merge tag 'perf-tools-for-v5.15-2021-09-11' of git://git.kernel.org/pub/scm/linux...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sun, 12 Sep 2021 23:18:15 +0000 (16:18 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sun, 12 Sep 2021 23:18:15 +0000 (16:18 -0700)
Pull more perf tools updates from Arnaldo Carvalho de Melo:

 - Add missing fields and remove some duplicate fields when printing a
   perf_event_attr.

 - Fix hybrid config terms list corruption.

 - Update kernel header copies, some resulted in new kernel features
   being automagically added to 'perf trace' syscall/tracepoint argument
   id->string translators.

 - Add a file generated during the documentation build to .gitignore.

 - Add an option to build without libbfd, as some distros, like Debian
   consider its ABI unstable.

 - Add support to print a textual representation of IBS raw sample data
   in 'perf report'.

 - Fix bpf 'perf test' sample mismatch reporting

 - Fix passing arguments to stackcollapse report in a 'perf script'
   python script.

 - Allow build-id with trailing zeros.

 - Look for ImageBase in PE file to compute .text offset.

* tag 'perf-tools-for-v5.15-2021-09-11' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux: (25 commits)
  tools headers UAPI: Update tools's copy of drm.h headers
  tools headers UAPI: Sync drm/i915_drm.h with the kernel sources
  tools headers UAPI: Sync linux/fs.h with the kernel sources
  tools headers UAPI: Sync linux/in.h copy with the kernel sources
  perf tools: Add an option to build without libbfd
  perf tools: Allow build-id with trailing zeros
  perf tools: Fix hybrid config terms list corruption
  perf tools: Factor out copy_config_terms() and free_config_terms()
  perf tools: Fix perf_event_attr__fprintf() missing/dupl. fields
  perf tools: Ignore Documentation dependency file
  perf bpf: Provide a weak btf__load_from_kernel_by_id() for older libbpf versions
  tools include UAPI: Update linux/mount.h copy
  perf beauty: Cover more flags in the  move_mount syscall argument beautifier
  tools headers UAPI: Sync linux/prctl.h with the kernel sources
  tools include UAPI: Sync sound/asound.h copy with the kernel sources
  tools headers UAPI: Sync linux/kvm.h with the kernel sources
  tools headers UAPI: Sync x86's asm/kvm.h with the kernel sources
  perf report: Add support to print a textual representation of IBS raw sample data
  perf report: Add tools/arch/x86/include/asm/amd-ibs.h
  perf env: Add perf_env__cpuid, perf_env__{nr_}pmu_mappings
  ...

333 files changed:
Documentation/ABI/testing/debugfs-driver-habanalabs
Documentation/core-api/cpu_hotplug.rst
Documentation/core-api/kernel-api.rst
Documentation/devicetree/bindings/auxdisplay/hit,hd44780.yaml
Documentation/devicetree/bindings/display/msm/dsi-phy-7nm.yaml
Documentation/devicetree/bindings/gpio/gpio-virtio.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/i2c/i2c-virtio.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/input/allwinner,sun4i-a10-lradc-keys.yaml
Documentation/devicetree/bindings/input/qcom,pm8941-pwrkey.txt [deleted file]
Documentation/devicetree/bindings/input/qcom,pm8941-pwrkey.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/input/regulator-haptic.txt [deleted file]
Documentation/devicetree/bindings/input/regulator-haptic.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/input/touchscreen/chipone,icn8318.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/input/touchscreen/chipone_icn8318.txt [deleted file]
Documentation/devicetree/bindings/input/touchscreen/pixcir,pixcir_ts.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/input/touchscreen/pixcir_i2c_ts.txt [deleted file]
Documentation/devicetree/bindings/input/touchscreen/ti,tsc2005.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/input/touchscreen/tsc2005.txt [deleted file]
Documentation/devicetree/bindings/power/reset/qcom,pon.txt [deleted file]
Documentation/devicetree/bindings/power/reset/qcom,pon.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/power/reset/reboot-mode.yaml
Documentation/devicetree/bindings/pwm/pwm-rockchip.yaml
Documentation/devicetree/bindings/rtc/trivial-rtc.yaml
Documentation/devicetree/bindings/spi/omap-spi.yaml
Documentation/devicetree/bindings/spi/spi-xilinx.yaml
Documentation/devicetree/bindings/thermal/qcom-lmh.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/thermal/thermal-zones.yaml
Documentation/devicetree/bindings/virtio/mmio.yaml
Documentation/devicetree/bindings/virtio/virtio-device.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/watchdog/maxim,max63xx.yaml
Documentation/features/vm/ELF-ASLR/arch-support.txt
Documentation/filesystems/api-summary.rst
Documentation/gpu/drm-mm.rst
Documentation/power/energy-model.rst
Documentation/userspace-api/index.rst
Documentation/userspace-api/ioctl/ioctl-number.rst
Documentation/userspace-api/vduse.rst [new file with mode: 0644]
MAINTAINERS
arch/arm64/kernel/cacheinfo.c
arch/arm64/mm/init.c
arch/mips/kernel/cacheinfo.c
arch/parisc/Kconfig
arch/parisc/boot/compressed/Makefile
arch/parisc/include/asm/processor.h
arch/parisc/include/asm/rt_sigframe.h
arch/parisc/include/asm/thread_info.h
arch/parisc/include/asm/uaccess.h
arch/parisc/kernel/asm-offsets.c
arch/parisc/kernel/parisc_ksyms.c
arch/parisc/kernel/setup.c
arch/parisc/kernel/signal.c
arch/parisc/kernel/signal32.h
arch/parisc/kernel/time.c
arch/parisc/lib/lusercopy.S
arch/riscv/Kconfig
arch/riscv/Makefile
arch/riscv/boot/Makefile
arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts
arch/riscv/configs/defconfig
arch/riscv/include/asm/elf.h
arch/riscv/kernel/cacheinfo.c
arch/riscv/kernel/vmlinux-xip.lds.S
arch/riscv/kernel/vmlinux.lds.S
arch/x86/kernel/cpu/cacheinfo.c
block/Makefile
block/bdev.c [new file with mode: 0644]
block/blk-mq.c
block/blk-throttle.c
block/blk.h
block/fops.c [new file with mode: 0644]
block/genhd.c
drivers/acpi/cppc_acpi.c
drivers/acpi/prmt.c
drivers/acpi/scan.c
drivers/auxdisplay/cfag12864b.c
drivers/auxdisplay/charlcd.c
drivers/auxdisplay/hd44780.c
drivers/auxdisplay/ks0108.c
drivers/base/power/main.c
drivers/base/power/wakeirq.c
drivers/block/n64cart.c
drivers/block/virtio_blk.c
drivers/char/ipmi/ipmi_si_intf.c
drivers/clk/qcom/gcc-sm6350.c
drivers/cpufreq/intel_pstate.c
drivers/dma-buf/Kconfig
drivers/firewire/net.c
drivers/firmware/qcom_scm.c
drivers/firmware/qcom_scm.h
drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_fdinfo.c
drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ras_eeprom.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
drivers/gpu/drm/amd/amdgpu/vi.c
drivers/gpu/drm/amd/amdkfd/kfd_svm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
drivers/gpu/drm/amd/display/dc/dcn303/dcn303_resource.c
drivers/gpu/drm/amd/display/dc/dcn31/dcn31_panel_cntl.c
drivers/gpu/drm/amd/display/dc/dml/dcn20/display_mode_vba_20v2.c
drivers/gpu/drm/amd/pm/amdgpu_pm.c
drivers/gpu/drm/amd/pm/inc/amdgpu_smu.h
drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/aldebaran_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu13/yellow_carp_ppt.c
drivers/gpu/drm/i915/gt/intel_gt_requests.h
drivers/gpu/drm/mgag200/mgag200_pll.c
drivers/gpu/drm/panfrost/panfrost_mmu.c
drivers/gpu/drm/panfrost/panfrost_regs.h
drivers/gpu/drm/ttm/ttm_bo.c
drivers/gpu/drm/ttm/ttm_bo_util.c
drivers/gpu/drm/ttm/ttm_tt.c
drivers/gpu/drm/vc4/vc4_hdmi.c
drivers/input/joystick/analog.c
drivers/input/keyboard/Kconfig
drivers/input/keyboard/adc-keys.c
drivers/input/keyboard/adp5588-keys.c
drivers/input/keyboard/adp5589-keys.c
drivers/input/keyboard/ep93xx_keypad.c
drivers/input/misc/Kconfig
drivers/input/misc/Makefile
drivers/input/misc/ixp4xx-beeper.c [deleted file]
drivers/input/misc/pm8941-pwrkey.c
drivers/input/misc/sirfsoc-onkey.c [deleted file]
drivers/input/mouse/elan_i2c.h
drivers/input/mouse/elan_i2c_core.c
drivers/input/serio/parkbd.c
drivers/input/touchscreen/Kconfig
drivers/input/touchscreen/edt-ft5x06.c
drivers/input/touchscreen/mms114.c
drivers/iommu/Kconfig
drivers/iommu/amd/init.c
drivers/iommu/intel/svm.c
drivers/iommu/iova.c
drivers/misc/habanalabs/common/Makefile
drivers/misc/habanalabs/common/command_buffer.c
drivers/misc/habanalabs/common/command_submission.c
drivers/misc/habanalabs/common/context.c
drivers/misc/habanalabs/common/debugfs.c
drivers/misc/habanalabs/common/device.c
drivers/misc/habanalabs/common/firmware_if.c
drivers/misc/habanalabs/common/habanalabs.h
drivers/misc/habanalabs/common/habanalabs_drv.c
drivers/misc/habanalabs/common/habanalabs_ioctl.c
drivers/misc/habanalabs/common/hw_queue.c
drivers/misc/habanalabs/common/memory.c
drivers/misc/habanalabs/common/mmu/mmu_v1.c
drivers/misc/habanalabs/common/pci/pci.c
drivers/misc/habanalabs/common/state_dump.c [new file with mode: 0644]
drivers/misc/habanalabs/common/sysfs.c
drivers/misc/habanalabs/gaudi/gaudi.c
drivers/misc/habanalabs/gaudi/gaudiP.h
drivers/misc/habanalabs/gaudi/gaudi_coresight.c
drivers/misc/habanalabs/gaudi/gaudi_security.c
drivers/misc/habanalabs/goya/goya.c
drivers/misc/habanalabs/include/common/cpucp_if.h
drivers/misc/habanalabs/include/common/hl_boot_if.h
drivers/misc/habanalabs/include/gaudi/asic_reg/gaudi_regs.h
drivers/misc/habanalabs/include/gaudi/gaudi_masks.h
drivers/misc/habanalabs/include/gaudi/gaudi_reg_map.h
drivers/misc/lkdtm/core.c
drivers/misc/lkdtm/lkdtm.h
drivers/nvme/host/core.c
drivers/nvme/host/multipath.c
drivers/nvme/host/nvme.h
drivers/nvme/host/tcp.c
drivers/nvme/target/admin-cmd.c
drivers/nvme/target/configfs.c
drivers/nvme/target/core.c
drivers/nvme/target/nvmet.h
drivers/nvme/target/passthru.c
drivers/of/property.c
drivers/parisc/dino.c
drivers/pwm/Kconfig
drivers/pwm/core.c
drivers/pwm/pwm-ab8500.c
drivers/pwm/pwm-atmel-hlcdc.c
drivers/pwm/pwm-atmel-tcb.c
drivers/pwm/pwm-atmel.c
drivers/pwm/pwm-bcm-kona.c
drivers/pwm/pwm-brcmstb.c
drivers/pwm/pwm-cros-ec.c
drivers/pwm/pwm-ep93xx.c
drivers/pwm/pwm-fsl-ftm.c
drivers/pwm/pwm-hibvt.c
drivers/pwm/pwm-img.c
drivers/pwm/pwm-imx-tpm.c
drivers/pwm/pwm-imx27.c
drivers/pwm/pwm-intel-lgm.c
drivers/pwm/pwm-iqs620a.c
drivers/pwm/pwm-jz4740.c
drivers/pwm/pwm-keembay.c
drivers/pwm/pwm-lp3943.c
drivers/pwm/pwm-lpc32xx.c
drivers/pwm/pwm-mediatek.c
drivers/pwm/pwm-mtk-disp.c
drivers/pwm/pwm-mxs.c
drivers/pwm/pwm-ntxec.c
drivers/pwm/pwm-omap-dmtimer.c
drivers/pwm/pwm-pca9685.c
drivers/pwm/pwm-pxa.c
drivers/pwm/pwm-raspberrypi-poe.c
drivers/pwm/pwm-rcar.c
drivers/pwm/pwm-renesas-tpu.c
drivers/pwm/pwm-rockchip.c
drivers/pwm/pwm-samsung.c
drivers/pwm/pwm-sifive.c
drivers/pwm/pwm-sl28cpld.c
drivers/pwm/pwm-stm32-lp.c
drivers/pwm/pwm-sun4i.c
drivers/pwm/pwm-tiecap.c
drivers/pwm/pwm-tiehrpwm.c
drivers/pwm/pwm-twl-led.c
drivers/pwm/pwm-twl.c
drivers/rtc/Kconfig
drivers/rtc/Makefile
drivers/rtc/lib.c
drivers/rtc/lib_test.c [new file with mode: 0644]
drivers/rtc/rtc-cmos.c
drivers/rtc/rtc-rx8025.c
drivers/rtc/rtc-s5m.c
drivers/rtc/rtc-tps65910.c
drivers/thermal/intel/int340x_thermal/int3400_thermal.c
drivers/thermal/intel/intel_powerclamp.c
drivers/thermal/intel/intel_tcc_cooling.c
drivers/thermal/qcom/Kconfig
drivers/thermal/qcom/Makefile
drivers/thermal/qcom/lmh.c [new file with mode: 0644]
drivers/thermal/qcom/qcom-spmi-adc-tm5.c
drivers/thermal/rcar_gen3_thermal.c
drivers/thermal/samsung/exynos_tmu.c
drivers/thermal/tegra/Kconfig
drivers/thermal/tegra/Makefile
drivers/thermal/tegra/soctherm.c
drivers/thermal/tegra/tegra30-tsensor.c [new file with mode: 0644]
drivers/vdpa/Kconfig
drivers/vdpa/Makefile
drivers/vdpa/ifcvf/ifcvf_base.c
drivers/vdpa/ifcvf/ifcvf_base.h
drivers/vdpa/ifcvf/ifcvf_main.c
drivers/vdpa/mlx5/core/mlx5_vdpa.h
drivers/vdpa/mlx5/core/mr.c
drivers/vdpa/mlx5/core/resources.c
drivers/vdpa/mlx5/net/mlx5_vnet.c
drivers/vdpa/vdpa.c
drivers/vdpa/vdpa_sim/vdpa_sim.c
drivers/vdpa/vdpa_user/Makefile [new file with mode: 0644]
drivers/vdpa/vdpa_user/iova_domain.c [new file with mode: 0644]
drivers/vdpa/vdpa_user/iova_domain.h [new file with mode: 0644]
drivers/vdpa/vdpa_user/vduse_dev.c [new file with mode: 0644]
drivers/vdpa/virtio_pci/vp_vdpa.c
drivers/vhost/iotlb.c
drivers/vhost/scsi.c
drivers/vhost/vdpa.c
drivers/vhost/vsock.c
drivers/video/fbdev/core/fbmem.c
drivers/virtio/virtio.c
drivers/virtio/virtio_balloon.c
fs/Kconfig
fs/Makefile
fs/block_dev.c [deleted file]
fs/cifs/cifsencrypt.c
fs/cifs/cifspdu.h
fs/cifs/smb2ops.c
fs/cifs/smbencrypt.c
fs/cifs/smbfsctl.h [deleted file]
fs/cifs_common/Makefile [deleted file]
fs/cifs_common/arc4.h [deleted file]
fs/cifs_common/cifs_arc4.c [deleted file]
fs/cifs_common/cifs_md4.c [deleted file]
fs/cifs_common/md4.h [deleted file]
fs/file.c
fs/fs_parser.c
fs/internal.h
fs/io-wq.c
fs/io_uring.c
fs/namei.c
fs/notify/mark.c
fs/smbfs_common/Makefile [new file with mode: 0644]
fs/smbfs_common/arc4.h [new file with mode: 0644]
fs/smbfs_common/cifs_arc4.c [new file with mode: 0644]
fs/smbfs_common/cifs_md4.c [new file with mode: 0644]
fs/smbfs_common/md4.h [new file with mode: 0644]
fs/smbfs_common/smbfsctl.h [new file with mode: 0644]
include/acpi/cppc_acpi.h
include/drm/ttm/ttm_tt.h
include/linux/bootconfig.h
include/linux/cacheinfo.h
include/linux/compiler-gcc.h
include/linux/compiler_attributes.h
include/linux/compiler_types.h
include/linux/cpu.h
include/linux/cpuhotplug.h
include/linux/energy_model.h
include/linux/file.h
include/linux/pwm.h
include/linux/qcom_scm.h
include/linux/rwsem.h
include/linux/thermal.h
include/linux/time64.h
include/linux/vdpa.h
include/linux/vhost_iotlb.h
include/uapi/linux/vduse.h [new file with mode: 0644]
include/uapi/linux/virtio_ids.h
include/uapi/linux/virtio_vsock.h
include/uapi/misc/habanalabs.h
kernel/futex.c
kernel/locking/rtmutex.c
kernel/locking/rwsem.c
kernel/sched/core.c
kernel/sched/idle.c
kernel/trace/trace_boot.c
lib/bootconfig.c
net/vmw_vsock/af_vsock.c
net/vmw_vsock/virtio_transport_common.c
scripts/coccinelle/api/kvmalloc.cocci
scripts/coccinelle/iterators/use_after_iter.cocci
scripts/sorttable.c
tools/testing/vsock/vsock_test.c
tools/thermal/tmon/Makefile

index a5c28f6..284e2df 100644 (file)
@@ -215,6 +215,17 @@ Description:    Sets the skip reset on timeout option for the device. Value of
                 "0" means device will be reset in case some CS has timed out,
                 otherwise it will not be reset.
 
+What:           /sys/kernel/debug/habanalabs/hl<n>/state_dump
+Date:           Oct 2021
+KernelVersion:  5.15
+Contact:        ynudelman@habana.ai
+Description:    Gets the state dump occurring on a CS timeout or failure.
+                State dump is used for debug and is created each time in case of
+                a problem in a CS execution, before reset.
+                Reading from the node returns the newest state dump available.
+                Writing an integer X discards X state dumps, so that the
+                next read would return X+1-st newest state dump.
+
 What:           /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
 Date:           Mar 2020
 KernelVersion:  5.6
@@ -230,6 +241,14 @@ Description:    Displays a list with information about the currently user
                 pointers (user virtual addresses) that are pinned and mapped
                 to DMA addresses
 
+What:           /sys/kernel/debug/habanalabs/hl<n>/userptr_lookup
+Date:           Aug 2021
+KernelVersion:  5.15
+Contact:        ogabbay@kernel.org
+Description:    Allows to search for specific user pointers (user virtual
+                addresses) that are pinned and mapped to DMA addresses, and see
+                their resolution to the specific dma address.
+
 What:           /sys/kernel/debug/habanalabs/hl<n>/vm
 Date:           Jan 2019
 KernelVersion:  5.1
index b66e3ca..c6f4ba2 100644 (file)
@@ -2,12 +2,13 @@
 CPU hotplug in the Kernel
 =========================
 
-:Date: December, 2016
+:Date: September, 2021
 :Author: Sebastian Andrzej Siewior <bigeasy@linutronix.de>,
-          Rusty Russell <rusty@rustcorp.com.au>,
-          Srivatsa Vaddagiri <vatsa@in.ibm.com>,
-          Ashok Raj <ashok.raj@intel.com>,
-          Joel Schopp <jschopp@austin.ibm.com>
+         Rusty Russell <rusty@rustcorp.com.au>,
+         Srivatsa Vaddagiri <vatsa@in.ibm.com>,
+         Ashok Raj <ashok.raj@intel.com>,
+         Joel Schopp <jschopp@austin.ibm.com>,
+        Thomas Gleixner <tglx@linutronix.de>
 
 Introduction
 ============
@@ -158,100 +159,480 @@ at state ``CPUHP_OFFLINE``. This includes:
 * Once all services are migrated, kernel calls an arch specific routine
   ``__cpu_disable()`` to perform arch specific cleanup.
 
-Using the hotplug API
----------------------
-
-It is possible to receive notifications once a CPU is offline or onlined. This
-might be important to certain drivers which need to perform some kind of setup
-or clean up functions based on the number of available CPUs::
-
-  #include <linux/cpuhotplug.h>
-
-  ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "X/Y:online",
-                          Y_online, Y_prepare_down);
-
-*X* is the subsystem and *Y* the particular driver. The *Y_online* callback
-will be invoked during registration on all online CPUs. If an error
-occurs during the online callback the *Y_prepare_down* callback will be
-invoked on all CPUs on which the online callback was previously invoked.
-After registration completed, the *Y_online* callback will be invoked
-once a CPU is brought online and *Y_prepare_down* will be invoked when a
-CPU is shutdown. All resources which were previously allocated in
-*Y_online* should be released in *Y_prepare_down*.
-The return value *ret* is negative if an error occurred during the
-registration process. Otherwise a positive value is returned which
-contains the allocated hotplug for dynamically allocated states
-(*CPUHP_AP_ONLINE_DYN*). It will return zero for predefined states.
-
-The callback can be remove by invoking ``cpuhp_remove_state()``. In case of a
-dynamically allocated state (*CPUHP_AP_ONLINE_DYN*) use the returned state.
-During the removal of a hotplug state the teardown callback will be invoked.
-
-Multiple instances
-~~~~~~~~~~~~~~~~~~
-
-If a driver has multiple instances and each instance needs to perform the
-callback independently then it is likely that a ''multi-state'' should be used.
-First a multi-state state needs to be registered::
-
-  ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, "X/Y:online,
-                                Y_online, Y_prepare_down);
-  Y_hp_online = ret;
-
-The ``cpuhp_setup_state_multi()`` behaves similar to ``cpuhp_setup_state()``
-except it prepares the callbacks for a multi state and does not invoke
-the callbacks. This is a one time setup.
-Once a new instance is allocated, you need to register this new instance::
-
-  ret = cpuhp_state_add_instance(Y_hp_online, &d->node);
-
-This function will add this instance to your previously allocated
-*Y_hp_online* state and invoke the previously registered callback
-(*Y_online*) on all online CPUs. The *node* element is a ``struct
-hlist_node`` member of your per-instance data structure.
-
-On removal of the instance::
-
-  cpuhp_state_remove_instance(Y_hp_online, &d->node)
-
-should be invoked which will invoke the teardown callback on all online
-CPUs.
-
-Manual setup
-~~~~~~~~~~~~
-
-Usually it is handy to invoke setup and teardown callbacks on registration or
-removal of a state because usually the operation needs to performed once a CPU
-goes online (offline) and during initial setup (shutdown) of the driver. However
-each registration and removal function is also available with a ``_nocalls``
-suffix which does not invoke the provided callbacks if the invocation of the
-callbacks is not desired. During the manual setup (or teardown) the functions
-``cpus_read_lock()`` and ``cpus_read_unlock()`` should be used to inhibit CPU
-hotplug operations.
-
-
-The ordering of the events
---------------------------
-
-The hotplug states are defined in ``include/linux/cpuhotplug.h``:
-
-* The states *CPUHP_OFFLINE* … *CPUHP_AP_OFFLINE* are invoked before the
-  CPU is up.
-* The states *CPUHP_AP_OFFLINE* … *CPUHP_AP_ONLINE* are invoked
-  just the after the CPU has been brought up. The interrupts are off and
-  the scheduler is not yet active on this CPU. Starting with *CPUHP_AP_OFFLINE*
-  the callbacks are invoked on the target CPU.
-* The states between *CPUHP_AP_ONLINE_DYN* and *CPUHP_AP_ONLINE_DYN_END* are
-  reserved for the dynamic allocation.
-* The states are invoked in the reverse order on CPU shutdown starting with
-  *CPUHP_ONLINE* and stopping at *CPUHP_OFFLINE*. Here the callbacks are
-  invoked on the CPU that will be shutdown until *CPUHP_AP_OFFLINE*.
-
-A dynamically allocated state via *CPUHP_AP_ONLINE_DYN* is often enough.
-However if an earlier invocation during the bring up or shutdown is required
-then an explicit state should be acquired. An explicit state might also be
-required if the hotplug event requires specific ordering in respect to
-another hotplug event.
+
+The CPU hotplug API
+===================
+
+CPU hotplug state machine
+-------------------------
+
+CPU hotplug uses a trivial state machine with a linear state space from
+CPUHP_OFFLINE to CPUHP_ONLINE. Each state has a startup and a teardown
+callback.
+
+When a CPU is onlined, the startup callbacks are invoked sequentially until
+the state CPUHP_ONLINE is reached. They can also be invoked when the
+callbacks of a state are set up or an instance is added to a multi-instance
+state.
+
+When a CPU is offlined the teardown callbacks are invoked in the reverse
+order sequentially until the state CPUHP_OFFLINE is reached. They can also
+be invoked when the callbacks of a state are removed or an instance is
+removed from a multi-instance state.
+
+If a usage site requires only a callback in one direction of the hotplug
+operations (CPU online or CPU offline) then the other not-required callback
+can be set to NULL when the state is set up.
+
+The state space is divided into three sections:
+
+* The PREPARE section
+
+  The PREPARE section covers the state space from CPUHP_OFFLINE to
+  CPUHP_BRINGUP_CPU.
+
+  The startup callbacks in this section are invoked before the CPU is
+  started during a CPU online operation. The teardown callbacks are invoked
+  after the CPU has become dysfunctional during a CPU offline operation.
+
+  The callbacks are invoked on a control CPU as they can't obviously run on
+  the hotplugged CPU which is either not yet started or has become
+  dysfunctional already.
+
+  The startup callbacks are used to setup resources which are required to
+  bring a CPU successfully online. The teardown callbacks are used to free
+  resources or to move pending work to an online CPU after the hotplugged
+  CPU became dysfunctional.
+
+  The startup callbacks are allowed to fail. If a callback fails, the CPU
+  online operation is aborted and the CPU is brought down to the previous
+  state (usually CPUHP_OFFLINE) again.
+
+  The teardown callbacks in this section are not allowed to fail.
+
+* The STARTING section
+
+  The STARTING section covers the state space between CPUHP_BRINGUP_CPU + 1
+  and CPUHP_AP_ONLINE.
+
+  The startup callbacks in this section are invoked on the hotplugged CPU
+  with interrupts disabled during a CPU online operation in the early CPU
+  setup code. The teardown callbacks are invoked with interrupts disabled
+  on the hotplugged CPU during a CPU offline operation shortly before the
+  CPU is completely shut down.
+
+  The callbacks in this section are not allowed to fail.
+
+  The callbacks are used for low level hardware initialization/shutdown and
+  for core subsystems.
+
+* The ONLINE section
+
+  The ONLINE section covers the state space between CPUHP_AP_ONLINE + 1 and
+  CPUHP_ONLINE.
+
+  The startup callbacks in this section are invoked on the hotplugged CPU
+  during a CPU online operation. The teardown callbacks are invoked on the
+  hotplugged CPU during a CPU offline operation.
+
+  The callbacks are invoked in the context of the per CPU hotplug thread,
+  which is pinned on the hotplugged CPU. The callbacks are invoked with
+  interrupts and preemption enabled.
+
+  The callbacks are allowed to fail. When a callback fails the hotplug
+  operation is aborted and the CPU is brought back to the previous state.
+
+CPU online/offline operations
+-----------------------------
+
+A successful online operation looks like this::
+
+  [CPUHP_OFFLINE]
+  [CPUHP_OFFLINE + 1]->startup()       -> success
+  [CPUHP_OFFLINE + 2]->startup()       -> success
+  [CPUHP_OFFLINE + 3]                  -> skipped because startup == NULL
+  ...
+  [CPUHP_BRINGUP_CPU]->startup()       -> success
+  === End of PREPARE section
+  [CPUHP_BRINGUP_CPU + 1]->startup()   -> success
+  ...
+  [CPUHP_AP_ONLINE]->startup()         -> success
+  === End of STARTUP section
+  [CPUHP_AP_ONLINE + 1]->startup()     -> success
+  ...
+  [CPUHP_ONLINE - 1]->startup()        -> success
+  [CPUHP_ONLINE]
+
+A successful offline operation looks like this::
+
+  [CPUHP_ONLINE]
+  [CPUHP_ONLINE - 1]->teardown()       -> success
+  ...
+  [CPUHP_AP_ONLINE + 1]->teardown()    -> success
+  === Start of STARTUP section
+  [CPUHP_AP_ONLINE]->teardown()        -> success
+  ...
+  [CPUHP_BRINGUP_ONLINE - 1]->teardown()
+  ...
+  === Start of PREPARE section
+  [CPUHP_BRINGUP_CPU]->teardown()
+  [CPUHP_OFFLINE + 3]->teardown()
+  [CPUHP_OFFLINE + 2]                  -> skipped because teardown == NULL
+  [CPUHP_OFFLINE + 1]->teardown()
+  [CPUHP_OFFLINE]
+
+A failed online operation looks like this::
+
+  [CPUHP_OFFLINE]
+  [CPUHP_OFFLINE + 1]->startup()       -> success
+  [CPUHP_OFFLINE + 2]->startup()       -> success
+  [CPUHP_OFFLINE + 3]                  -> skipped because startup == NULL
+  ...
+  [CPUHP_BRINGUP_CPU]->startup()       -> success
+  === End of PREPARE section
+  [CPUHP_BRINGUP_CPU + 1]->startup()   -> success
+  ...
+  [CPUHP_AP_ONLINE]->startup()         -> success
+  === End of STARTUP section
+  [CPUHP_AP_ONLINE + 1]->startup()     -> success
+  ---
+  [CPUHP_AP_ONLINE + N]->startup()     -> fail
+  [CPUHP_AP_ONLINE + (N - 1)]->teardown()
+  ...
+  [CPUHP_AP_ONLINE + 1]->teardown()
+  === Start of STARTUP section
+  [CPUHP_AP_ONLINE]->teardown()
+  ...
+  [CPUHP_BRINGUP_ONLINE - 1]->teardown()
+  ...
+  === Start of PREPARE section
+  [CPUHP_BRINGUP_CPU]->teardown()
+  [CPUHP_OFFLINE + 3]->teardown()
+  [CPUHP_OFFLINE + 2]                  -> skipped because teardown == NULL
+  [CPUHP_OFFLINE + 1]->teardown()
+  [CPUHP_OFFLINE]
+
+A failed offline operation looks like this::
+
+  [CPUHP_ONLINE]
+  [CPUHP_ONLINE - 1]->teardown()       -> success
+  ...
+  [CPUHP_ONLINE - N]->teardown()       -> fail
+  [CPUHP_ONLINE - (N - 1)]->startup()
+  ...
+  [CPUHP_ONLINE - 1]->startup()
+  [CPUHP_ONLINE]
+
+Recursive failures cannot be handled sensibly. Look at the following
+example of a recursive fail due to a failed offline operation: ::
+
+  [CPUHP_ONLINE]
+  [CPUHP_ONLINE - 1]->teardown()       -> success
+  ...
+  [CPUHP_ONLINE - N]->teardown()       -> fail
+  [CPUHP_ONLINE - (N - 1)]->startup()  -> success
+  [CPUHP_ONLINE - (N - 2)]->startup()  -> fail
+
+The CPU hotplug state machine stops right here and does not try to go back
+down again because that would likely result in an endless loop::
+
+  [CPUHP_ONLINE - (N - 1)]->teardown() -> success
+  [CPUHP_ONLINE - N]->teardown()       -> fail
+  [CPUHP_ONLINE - (N - 1)]->startup()  -> success
+  [CPUHP_ONLINE - (N - 2)]->startup()  -> fail
+  [CPUHP_ONLINE - (N - 1)]->teardown() -> success
+  [CPUHP_ONLINE - N]->teardown()       -> fail
+
+Lather, rinse and repeat. In this case the CPU left in state::
+
+  [CPUHP_ONLINE - (N - 1)]
+
+which at least lets the system make progress and gives the user a chance to
+debug or even resolve the situation.
+
+Allocating a state
+------------------
+
+There are two ways to allocate a CPU hotplug state:
+
+* Static allocation
+
+  Static allocation has to be used when the subsystem or driver has
+  ordering requirements versus other CPU hotplug states. E.g. the PERF core
+  startup callback has to be invoked before the PERF driver startup
+  callbacks during a CPU online operation. During a CPU offline operation
+  the driver teardown callbacks have to be invoked before the core teardown
+  callback. The statically allocated states are described by constants in
+  the cpuhp_state enum which can be found in include/linux/cpuhotplug.h.
+
+  Insert the state into the enum at the proper place so the ordering
+  requirements are fulfilled. The state constant has to be used for state
+  setup and removal.
+
+  Static allocation is also required when the state callbacks are not set
+  up at runtime and are part of the initializer of the CPU hotplug state
+  array in kernel/cpu.c.
+
+* Dynamic allocation
+
+  When there are no ordering requirements for the state callbacks then
+  dynamic allocation is the preferred method. The state number is allocated
+  by the setup function and returned to the caller on success.
+
+  Only the PREPARE and ONLINE sections provide a dynamic allocation
+  range. The STARTING section does not as most of the callbacks in that
+  section have explicit ordering requirements.
+
+Setup of a CPU hotplug state
+----------------------------
+
+The core code provides the following functions to setup a state:
+
+* cpuhp_setup_state(state, name, startup, teardown)
+* cpuhp_setup_state_nocalls(state, name, startup, teardown)
+* cpuhp_setup_state_cpuslocked(state, name, startup, teardown)
+* cpuhp_setup_state_nocalls_cpuslocked(state, name, startup, teardown)
+
+For cases where a driver or a subsystem has multiple instances and the same
+CPU hotplug state callbacks need to be invoked for each instance, the CPU
+hotplug core provides multi-instance support. The advantage over driver
+specific instance lists is that the instance related functions are fully
+serialized against CPU hotplug operations and provide the automatic
+invocations of the state callbacks on add and removal. To set up such a
+multi-instance state the following function is available:
+
+* cpuhp_setup_state_multi(state, name, startup, teardown)
+
+The @state argument is either a statically allocated state or one of the
+constants for dynamically allocated states - CPUHP_PREPARE_DYN,
+CPUHP_ONLINE_DYN - depending on the state section (PREPARE, ONLINE) for
+which a dynamic state should be allocated.
+
+The @name argument is used for sysfs output and for instrumentation. The
+naming convention is "subsys:mode" or "subsys/driver:mode",
+e.g. "perf:mode" or "perf/x86:mode". The common mode names are:
+
+======== =======================================================
+prepare  For states in the PREPARE section
+
+dead     For states in the PREPARE section which do not provide
+         a startup callback
+
+starting For states in the STARTING section
+
+dying    For states in the STARTING section which do not provide
+         a startup callback
+
+online   For states in the ONLINE section
+
+offline  For states in the ONLINE section which do not provide
+         a startup callback
+======== =======================================================
+
+As the @name argument is only used for sysfs and instrumentation other mode
+descriptors can be used as well if they describe the nature of the state
+better than the common ones.
+
+Examples for @name arguments: "perf/online", "perf/x86:prepare",
+"RCU/tree:dying", "sched/waitempty"
+
+The @startup argument is a function pointer to the callback which should be
+invoked during a CPU online operation. If the usage site does not require a
+startup callback set the pointer to NULL.
+
+The @teardown argument is a function pointer to the callback which should
+be invoked during a CPU offline operation. If the usage site does not
+require a teardown callback set the pointer to NULL.
+
+The functions differ in the way how the installed callbacks are treated:
+
+  * cpuhp_setup_state_nocalls(), cpuhp_setup_state_nocalls_cpuslocked()
+    and cpuhp_setup_state_multi() only install the callbacks
+
+  * cpuhp_setup_state() and cpuhp_setup_state_cpuslocked() install the
+    callbacks and invoke the @startup callback (if not NULL) for all online
+    CPUs which have currently a state greater than the newly installed
+    state. Depending on the state section the callback is either invoked on
+    the current CPU (PREPARE section) or on each online CPU (ONLINE
+    section) in the context of the CPU's hotplug thread.
+
+    If a callback fails for CPU N then the teardown callback for CPU
+    0 .. N-1 is invoked to rollback the operation. The state setup fails,
+    the callbacks for the state are not installed and in case of dynamic
+    allocation the allocated state is freed.
+
+The state setup and the callback invocations are serialized against CPU
+hotplug operations. If the setup function has to be called from a CPU
+hotplug read locked region, then the _cpuslocked() variants have to be
+used. These functions cannot be used from within CPU hotplug callbacks.
+
+The function return values:
+  ======== ===================================================================
+  0        Statically allocated state was successfully set up
+
+  >0       Dynamically allocated state was successfully set up.
+
+           The returned number is the state number which was allocated. If
+           the state callbacks have to be removed later, e.g. module
+           removal, then this number has to be saved by the caller and used
+           as @state argument for the state remove function. For
+           multi-instance states the dynamically allocated state number is
+           also required as @state argument for the instance add/remove
+           operations.
+
+  <0      Operation failed
+  ======== ===================================================================
+
+Removal of a CPU hotplug state
+------------------------------
+
+To remove a previously set up state, the following functions are provided:
+
+* cpuhp_remove_state(state)
+* cpuhp_remove_state_nocalls(state)
+* cpuhp_remove_state_nocalls_cpuslocked(state)
+* cpuhp_remove_multi_state(state)
+
+The @state argument is either a statically allocated state or the state
+number which was allocated in the dynamic range by cpuhp_setup_state*(). If
+the state is in the dynamic range, then the state number is freed and
+available for dynamic allocation again.
+
+The functions differ in the way how the installed callbacks are treated:
+
+  * cpuhp_remove_state_nocalls(), cpuhp_remove_state_nocalls_cpuslocked()
+    and cpuhp_remove_multi_state() only remove the callbacks.
+
+  * cpuhp_remove_state() removes the callbacks and invokes the teardown
+    callback (if not NULL) for all online CPUs which have currently a state
+    greater than the removed state. Depending on the state section the
+    callback is either invoked on the current CPU (PREPARE section) or on
+    each online CPU (ONLINE section) in the context of the CPU's hotplug
+    thread.
+
+    In order to complete the removal, the teardown callback should not fail.
+
+The state removal and the callback invocations are serialized against CPU
+hotplug operations. If the remove function has to be called from a CPU
+hotplug read locked region, then the _cpuslocked() variants have to be
+used. These functions cannot be used from within CPU hotplug callbacks.
+
+If a multi-instance state is removed then the caller has to remove all
+instances first.
+
+Multi-Instance state instance management
+----------------------------------------
+
+Once the multi-instance state is set up, instances can be added to the
+state:
+
+  * cpuhp_state_add_instance(state, node)
+  * cpuhp_state_add_instance_nocalls(state, node)
+
+The @state argument is either a statically allocated state or the state
+number which was allocated in the dynamic range by cpuhp_setup_state_multi().
+
+The @node argument is a pointer to an hlist_node which is embedded in the
+instance's data structure. The pointer is handed to the multi-instance
+state callbacks and can be used by the callback to retrieve the instance
+via container_of().
+
+The functions differ in the way how the installed callbacks are treated:
+
+  * cpuhp_state_add_instance_nocalls() and only adds the instance to the
+    multi-instance state's node list.
+
+  * cpuhp_state_add_instance() adds the instance and invokes the startup
+    callback (if not NULL) associated with @state for all online CPUs which
+    have currently a state greater than @state. The callback is only
+    invoked for the to be added instance. Depending on the state section
+    the callback is either invoked on the current CPU (PREPARE section) or
+    on each online CPU (ONLINE section) in the context of the CPU's hotplug
+    thread.
+
+    If a callback fails for CPU N then the teardown callback for CPU
+    0 .. N-1 is invoked to rollback the operation, the function fails and
+    the instance is not added to the node list of the multi-instance state.
+
+To remove an instance from the state's node list these functions are
+available:
+
+  * cpuhp_state_remove_instance(state, node)
+  * cpuhp_state_remove_instance_nocalls(state, node)
+
+The arguments are the same as for the the cpuhp_state_add_instance*()
+variants above.
+
+The functions differ in the way how the installed callbacks are treated:
+
+  * cpuhp_state_remove_instance_nocalls() only removes the instance from the
+    state's node list.
+
+  * cpuhp_state_remove_instance() removes the instance and invokes the
+    teardown callback (if not NULL) associated with @state for all online
+    CPUs which have currently a state greater than @state.  The callback is
+    only invoked for the to be removed instance.  Depending on the state
+    section the callback is either invoked on the current CPU (PREPARE
+    section) or on each online CPU (ONLINE section) in the context of the
+    CPU's hotplug thread.
+
+    In order to complete the removal, the teardown callback should not fail.
+
+The node list add/remove operations and the callback invocations are
+serialized against CPU hotplug operations. These functions cannot be used
+from within CPU hotplug callbacks and CPU hotplug read locked regions.
+
+Examples
+--------
+
+Setup and teardown a statically allocated state in the STARTING section for
+notifications on online and offline operations::
+
+   ret = cpuhp_setup_state(CPUHP_SUBSYS_STARTING, "subsys:starting", subsys_cpu_starting, subsys_cpu_dying);
+   if (ret < 0)
+        return ret;
+   ....
+   cpuhp_remove_state(CPUHP_SUBSYS_STARTING);
+
+Setup and teardown a dynamically allocated state in the ONLINE section
+for notifications on offline operations::
+
+   state = cpuhp_setup_state(CPUHP_ONLINE_DYN, "subsys:offline", NULL, subsys_cpu_offline);
+   if (state < 0)
+       return state;
+   ....
+   cpuhp_remove_state(state);
+
+Setup and teardown a dynamically allocated state in the ONLINE section
+for notifications on online operations without invoking the callbacks::
+
+   state = cpuhp_setup_state_nocalls(CPUHP_ONLINE_DYN, "subsys:online", subsys_cpu_online, NULL);
+   if (state < 0)
+       return state;
+   ....
+   cpuhp_remove_state_nocalls(state);
+
+Setup, use and teardown a dynamically allocated multi-instance state in the
+ONLINE section for notifications on online and offline operation::
+
+   state = cpuhp_setup_state_multi(CPUHP_ONLINE_DYN, "subsys:online", subsys_cpu_online, subsys_cpu_offline);
+   if (state < 0)
+       return state;
+   ....
+   ret = cpuhp_state_add_instance(state, &inst1->node);
+   if (ret)
+        return ret;
+   ....
+   ret = cpuhp_state_add_instance(state, &inst2->node);
+   if (ret)
+        return ret;
+   ....
+   cpuhp_remove_instance(state, &inst1->node);
+   ....
+   cpuhp_remove_instance(state, &inst2->node);
+   ....
+   remove_multi_state(state);
+
 
 Testing of hotplug states
 =========================
index 2a7444e..2e71868 100644 (file)
@@ -315,6 +315,9 @@ Block Devices
 .. kernel-doc:: block/genhd.c
    :export:
 
+.. kernel-doc:: block/bdev.c
+   :export:
+
 Char devices
 ============
 
index 9222b06..fde07e4 100644 (file)
@@ -12,7 +12,10 @@ maintainers:
 description:
   The Hitachi HD44780 Character LCD Controller is commonly used on character
   LCDs that can display one or more lines of text. It exposes an M6800 bus
-  interface, which can be used in either 4-bit or 8-bit mode.
+  interface, which can be used in either 4-bit or 8-bit mode. By using a
+  GPIO expander it is possible to use the driver with one of the popular I2C
+  expander boards based on the PCF8574 available for these displays. For
+  an example see below.
 
 properties:
   compatible:
@@ -94,3 +97,29 @@ examples:
             display-height-chars = <2>;
             display-width-chars = <16>;
     };
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    i2c {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            pcf8574: pcf8574@27 {
+                    compatible = "nxp,pcf8574";
+                    reg = <0x27>;
+                    gpio-controller;
+                    #gpio-cells = <2>;
+            };
+    };
+    hd44780 {
+            compatible = "hit,hd44780";
+            display-height-chars = <2>;
+            display-width-chars  = <16>;
+            data-gpios = <&pcf8574 4 0>,
+                         <&pcf8574 5 0>,
+                         <&pcf8574 6 0>,
+                         <&pcf8574 7 0>;
+            enable-gpios = <&pcf8574 2 0>;
+            rs-gpios = <&pcf8574 0 0>;
+            rw-gpios = <&pcf8574 1 0>;
+            backlight-gpios = <&pcf8574 3 0>;
+    };
index 4265399..c851770 100644 (file)
@@ -14,10 +14,10 @@ allOf:
 
 properties:
   compatible:
-    oneOf:
-      - const: qcom,dsi-phy-7nm
-      - const: qcom,dsi-phy-7nm-8150
-      - const: qcom,sc7280-dsi-phy-7nm
+    enum:
+      - qcom,dsi-phy-7nm
+      - qcom,dsi-phy-7nm-8150
+      - qcom,sc7280-dsi-phy-7nm
 
   reg:
     items:
diff --git a/Documentation/devicetree/bindings/gpio/gpio-virtio.yaml b/Documentation/devicetree/bindings/gpio/gpio-virtio.yaml
new file mode 100644 (file)
index 0000000..601d857
--- /dev/null
@@ -0,0 +1,59 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/gpio/gpio-virtio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Virtio GPIO controller
+
+maintainers:
+  - Viresh Kumar <viresh.kumar@linaro.org>
+
+allOf:
+  - $ref: /schemas/virtio/virtio-device.yaml#
+
+description:
+  Virtio GPIO controller, see /schemas/virtio/virtio-device.yaml for more
+  details.
+
+properties:
+  $nodename:
+    const: gpio
+
+  compatible:
+    const: virtio,device29
+
+  gpio-controller: true
+
+  "#gpio-cells":
+    const: 2
+
+  interrupt-controller: true
+
+  "#interrupt-cells":
+    const: 2
+
+required:
+  - compatible
+  - gpio-controller
+  - "#gpio-cells"
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    virtio@3000 {
+        compatible = "virtio,mmio";
+        reg = <0x3000 0x100>;
+        interrupts = <41>;
+
+        gpio {
+            compatible = "virtio,device29";
+            gpio-controller;
+            #gpio-cells = <2>;
+            interrupt-controller;
+            #interrupt-cells = <2>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/i2c/i2c-virtio.yaml b/Documentation/devicetree/bindings/i2c/i2c-virtio.yaml
new file mode 100644 (file)
index 0000000..7d87ed8
--- /dev/null
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/i2c/i2c-virtio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Virtio I2C Adapter
+
+maintainers:
+  - Viresh Kumar <viresh.kumar@linaro.org>
+
+allOf:
+  - $ref: /schemas/i2c/i2c-controller.yaml#
+  - $ref: /schemas/virtio/virtio-device.yaml#
+
+description:
+  Virtio I2C device, see /schemas/virtio/virtio-device.yaml for more details.
+
+properties:
+  $nodename:
+    const: i2c
+
+  compatible:
+    const: virtio,device22
+
+required:
+  - compatible
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    virtio@3000 {
+        compatible = "virtio,mmio";
+        reg = <0x3000 0x100>;
+        interrupts = <41>;
+
+        i2c {
+            compatible = "virtio,device22";
+
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            light-sensor@20 {
+                compatible = "dynaimage,al3320a";
+                reg = <0x20>;
+            };
+        };
+    };
+
+...
index cffd020..d74f200 100644 (file)
@@ -29,6 +29,8 @@ properties:
     description:
       Regulator for the LRADC reference voltage
 
+  wakeup-source: true
+
 patternProperties:
   "^button-[0-9]+$":
     type: object
diff --git a/Documentation/devicetree/bindings/input/qcom,pm8941-pwrkey.txt b/Documentation/devicetree/bindings/input/qcom,pm8941-pwrkey.txt
deleted file mode 100644 (file)
index 6cd08bc..0000000
+++ /dev/null
@@ -1,55 +0,0 @@
-Qualcomm PM8941 PMIC Power Key
-
-PROPERTIES
-
-- compatible:
-       Usage: required
-       Value type: <string>
-       Definition: must be one of:
-                   "qcom,pm8941-pwrkey"
-                   "qcom,pm8941-resin"
-                   "qcom,pmk8350-pwrkey"
-                   "qcom,pmk8350-resin"
-
-- reg:
-       Usage: required
-       Value type: <prop-encoded-array>
-       Definition: base address of registers for block
-
-- interrupts:
-       Usage: required
-       Value type: <prop-encoded-array>
-       Definition: key change interrupt; The format of the specifier is
-                   defined by the binding document describing the node's
-                   interrupt parent.
-
-- debounce:
-       Usage: optional
-       Value type: <u32>
-       Definition: time in microseconds that key must be pressed or released
-                   for state change interrupt to trigger.
-
-- bias-pull-up:
-       Usage: optional
-       Value type: <empty>
-       Definition: presence of this property indicates that the KPDPWR_N pin
-                   should be configured for pull up.
-
-- linux,code:
-       Usage: optional
-       Value type: <u32>
-       Definition: The input key-code associated with the power key.
-                   Use the linux event codes defined in
-                   include/dt-bindings/input/linux-event-codes.h
-                   When property is omitted KEY_POWER is assumed.
-
-EXAMPLE
-
-       pwrkey@800 {
-               compatible = "qcom,pm8941-pwrkey";
-               reg = <0x800>;
-               interrupts = <0x0 0x8 0 IRQ_TYPE_EDGE_BOTH>;
-               debounce = <15625>;
-               bias-pull-up;
-               linux,code = <KEY_POWER>;
-       };
diff --git a/Documentation/devicetree/bindings/input/qcom,pm8941-pwrkey.yaml b/Documentation/devicetree/bindings/input/qcom,pm8941-pwrkey.yaml
new file mode 100644 (file)
index 0000000..62314a5
--- /dev/null
@@ -0,0 +1,51 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/qcom,pm8941-pwrkey.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm PM8941 PMIC Power Key
+
+maintainers:
+  - Courtney Cavin <courtney.cavin@sonymobile.com>
+  - Vinod Koul <vkoul@kernel.org>
+
+allOf:
+  - $ref: input.yaml#
+
+properties:
+  compatible:
+    enum:
+      - qcom,pm8941-pwrkey
+      - qcom,pm8941-resin
+      - qcom,pmk8350-pwrkey
+      - qcom,pmk8350-resin
+
+  interrupts:
+    maxItems: 1
+
+  debounce:
+    description: |
+          Time in microseconds that key must be pressed or
+          released for state change interrupt to trigger.
+    $ref: /schemas/types.yaml#/definitions/uint32
+
+  bias-pull-up:
+    description: |
+           Presence of this property indicates that the KPDPWR_N
+           pin should be configured for pull up.
+    $ref: /schemas/types.yaml#/definitions/flag
+
+  linux,code:
+    description: |
+           The input key-code associated with the power key.
+           Use the linux event codes defined in
+           include/dt-bindings/input/linux-event-codes.h
+           When property is omitted KEY_POWER is assumed.
+
+required:
+  - compatible
+  - interrupts
+
+unevaluatedProperties: false
+...
diff --git a/Documentation/devicetree/bindings/input/regulator-haptic.txt b/Documentation/devicetree/bindings/input/regulator-haptic.txt
deleted file mode 100644 (file)
index 3ed1c7e..0000000
+++ /dev/null
@@ -1,21 +0,0 @@
-* Regulator Haptic Device Tree Bindings
-
-Required Properties:
- - compatible : Should be "regulator-haptic"
- - haptic-supply : Power supply to the haptic motor.
-       [*] refer Documentation/devicetree/bindings/regulator/regulator.txt
-
- - max-microvolt : The maximum voltage value supplied to the haptic motor.
-               [The unit of the voltage is a micro]
-
- - min-microvolt : The minimum voltage value supplied to the haptic motor.
-               [The unit of the voltage is a micro]
-
-Example:
-
-       haptics {
-               compatible = "regulator-haptic";
-               haptic-supply = <&motor_regulator>;
-               max-microvolt = <2700000>;
-               min-microvolt = <1100000>;
-       };
diff --git a/Documentation/devicetree/bindings/input/regulator-haptic.yaml b/Documentation/devicetree/bindings/input/regulator-haptic.yaml
new file mode 100644 (file)
index 0000000..b1ae72f
--- /dev/null
@@ -0,0 +1,43 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/input/regulator-haptic.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Regulator Haptic Device Tree Bindings
+
+maintainers:
+  - Jaewon Kim <jaewon02.kim@samsung.com>
+
+properties:
+  compatible:
+    const: regulator-haptic
+
+  haptic-supply:
+    description: >
+      Power supply to the haptic motor
+
+  max-microvolt:
+    description: >
+      The maximum voltage value supplied to the haptic motor
+
+  min-microvolt:
+    description: >
+      The minimum voltage value supplied to the haptic motor
+
+required:
+  - compatible
+  - haptic-supply
+  - max-microvolt
+  - min-microvolt
+
+additionalProperties: false
+
+examples:
+  - |
+    haptics {
+        compatible = "regulator-haptic";
+        haptic-supply = <&motor_regulator>;
+        max-microvolt = <2700000>;
+        min-microvolt = <1100000>;
+    };
diff --git a/Documentation/devicetree/bindings/input/touchscreen/chipone,icn8318.yaml b/Documentation/devicetree/bindings/input/touchscreen/chipone,icn8318.yaml
new file mode 100644 (file)
index 0000000..9df685b
--- /dev/null
@@ -0,0 +1,62 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/touchscreen/chipone,icn8318.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: ChipOne ICN8318 Touchscreen Controller Device Tree Bindings
+
+maintainers:
+  - Dmitry Torokhov <dmitry.torokhov@gmail.com>
+
+allOf:
+  - $ref: touchscreen.yaml#
+
+properties:
+  compatible:
+    const: chipone,icn8318
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  wake-gpios:
+    maxItems: 1
+
+unevaluatedProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - wake-gpios
+  - touchscreen-size-x
+  - touchscreen-size-y
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        touchscreen@40 {
+            compatible = "chipone,icn8318";
+            reg = <0x40>;
+            interrupt-parent = <&pio>;
+            interrupts = <9 IRQ_TYPE_EDGE_FALLING>; /* EINT9 (PG9) */
+            pinctrl-names = "default";
+            pinctrl-0 = <&ts_wake_pin_p66>;
+            wake-gpios = <&pio 1 3 GPIO_ACTIVE_HIGH>; /* PB3 */
+            touchscreen-size-x = <800>;
+            touchscreen-size-y = <480>;
+            touchscreen-inverted-x;
+            touchscreen-swapped-x-y;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/input/touchscreen/chipone_icn8318.txt b/Documentation/devicetree/bindings/input/touchscreen/chipone_icn8318.txt
deleted file mode 100644 (file)
index 38b0603..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-* ChipOne icn8318 I2C touchscreen controller
-
-Required properties:
- - compatible            : "chipone,icn8318"
- - reg                   : I2C slave address of the chip (0x40)
- - interrupts            : interrupt specification for the icn8318 interrupt
- - wake-gpios            : GPIO specification for the WAKE input
- - touchscreen-size-x    : horizontal resolution of touchscreen (in pixels)
- - touchscreen-size-y    : vertical resolution of touchscreen (in pixels)
-
-Optional properties:
- - pinctrl-names         : should be "default"
- - pinctrl-0:            : a phandle pointing to the pin settings for the
-                           control gpios
- - touchscreen-fuzz-x    : horizontal noise value of the absolute input
-                           device (in pixels)
- - touchscreen-fuzz-y    : vertical noise value of the absolute input
-                           device (in pixels)
- - touchscreen-inverted-x : X axis is inverted (boolean)
- - touchscreen-inverted-y : Y axis is inverted (boolean)
- - touchscreen-swapped-x-y       : X and Y axis are swapped (boolean)
-                           Swapping is done after inverting the axis
-
-Example:
-
-i2c@00000000 {
-       /* ... */
-
-       chipone_icn8318@40 {
-               compatible = "chipone,icn8318";
-               reg = <0x40>;
-               interrupt-parent = <&pio>;
-               interrupts = <9 IRQ_TYPE_EDGE_FALLING>; /* EINT9 (PG9) */
-               pinctrl-names = "default";
-               pinctrl-0 = <&ts_wake_pin_p66>;
-               wake-gpios = <&pio 1 3 GPIO_ACTIVE_HIGH>; /* PB3 */
-               touchscreen-size-x = <800>;
-               touchscreen-size-y = <480>;
-               touchscreen-inverted-x;
-               touchscreen-swapped-x-y;
-       };
-
-       /* ... */
-};
diff --git a/Documentation/devicetree/bindings/input/touchscreen/pixcir,pixcir_ts.yaml b/Documentation/devicetree/bindings/input/touchscreen/pixcir,pixcir_ts.yaml
new file mode 100644 (file)
index 0000000..f9998ed
--- /dev/null
@@ -0,0 +1,68 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/touchscreen/pixcir,pixcir_ts.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Pixcir Touchscreen Controller Device Tree Bindings
+
+maintainers:
+  - Dmitry Torokhov <dmitry.torokhov@gmail.com>
+
+allOf:
+  - $ref: touchscreen.yaml#
+
+properties:
+  compatible:
+    enum:
+      - pixcir,pixcir_ts
+      - pixcir,pixcir_tangoc
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  attb-gpio:
+    maxItems: 1
+
+  reset-gpios:
+    maxItems: 1
+
+  enable-gpios:
+    maxItems: 1
+
+  wake-gpios:
+    maxItems: 1
+
+unevaluatedProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - attb-gpio
+  - touchscreen-size-x
+  - touchscreen-size-y
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        touchscreen@5c {
+            compatible = "pixcir,pixcir_ts";
+            reg = <0x5c>;
+            interrupts = <2 0>;
+            attb-gpio = <&gpf 2 0 2>;
+            touchscreen-size-x = <800>;
+            touchscreen-size-y = <600>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/input/touchscreen/pixcir_i2c_ts.txt b/Documentation/devicetree/bindings/input/touchscreen/pixcir_i2c_ts.txt
deleted file mode 100644 (file)
index 697a3e7..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-* Pixcir I2C touchscreen controllers
-
-Required properties:
-- compatible: must be "pixcir,pixcir_ts" or "pixcir,pixcir_tangoc"
-- reg: I2C address of the chip
-- interrupts: interrupt to which the chip is connected
-- attb-gpio: GPIO connected to the ATTB line of the chip
-- touchscreen-size-x: horizontal resolution of touchscreen (in pixels)
-- touchscreen-size-y: vertical resolution of touchscreen (in pixels)
-
-Optional properties:
-- reset-gpios: GPIO connected to the RESET line of the chip
-- enable-gpios: GPIO connected to the ENABLE line of the chip
-- wake-gpios: GPIO connected to the WAKE line of the chip
-
-Example:
-
-       i2c@00000000 {
-               /* ... */
-
-               pixcir_ts@5c {
-                       compatible = "pixcir,pixcir_ts";
-                       reg = <0x5c>;
-                       interrupts = <2 0>;
-                       attb-gpio = <&gpf 2 0 2>;
-                       touchscreen-size-x = <800>;
-                       touchscreen-size-y = <600>;
-               };
-
-               /* ... */
-       };
diff --git a/Documentation/devicetree/bindings/input/touchscreen/ti,tsc2005.yaml b/Documentation/devicetree/bindings/input/touchscreen/ti,tsc2005.yaml
new file mode 100644 (file)
index 0000000..938aab0
--- /dev/null
@@ -0,0 +1,128 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/touchscreen/ti,tsc2005.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Texas Instruments TSC2004 and TSC2005 touchscreen controller bindings
+
+maintainers:
+  - Marek Vasut <marex@denx.de>
+  - Michael Welling <mwelling@ieee.org>
+
+properties:
+  $nodename:
+    pattern: "^touchscreen(@.*)?$"
+
+  compatible:
+    enum:
+      - ti,tsc2004
+      - ti,tsc2005
+
+  reg:
+    maxItems: 1
+    description: |
+      I2C address when used on the I2C bus, or the SPI chip select index
+      when used on the SPI bus
+
+  interrupts:
+    maxItems: 1
+
+  reset-gpios:
+    maxItems: 1
+    description: GPIO specifier for the controller reset line
+
+  spi-max-frequency:
+    description: TSC2005 SPI bus clock frequency.
+    maximum: 25000000
+
+  ti,x-plate-ohms:
+    description: resistance of the touchscreen's X plates in ohm (defaults to 280)
+
+  ti,esd-recovery-timeout-ms:
+    description: |
+        if the touchscreen does not respond after the configured time
+        (in milli seconds), the driver will reset it. This is disabled
+        by default.
+
+  vio-supply:
+    description: Regulator specifier
+
+  touchscreen-fuzz-pressure: true
+  touchscreen-fuzz-x: true
+  touchscreen-fuzz-y: true
+  touchscreen-max-pressure: true
+  touchscreen-size-x: true
+  touchscreen-size-y: true
+
+allOf:
+  - $ref: touchscreen.yaml#
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: ti,tsc2004
+    then:
+      properties:
+        spi-max-frequency: false
+
+additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - interrupts
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/gpio/gpio.h>
+    i2c {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        touchscreen@48 {
+            compatible = "ti,tsc2004";
+            reg = <0x48>;
+            vio-supply = <&vio>;
+
+            reset-gpios = <&gpio4 8 GPIO_ACTIVE_HIGH>;
+            interrupts-extended = <&gpio1 27 IRQ_TYPE_EDGE_RISING>;
+
+            touchscreen-fuzz-x = <4>;
+            touchscreen-fuzz-y = <7>;
+            touchscreen-fuzz-pressure = <2>;
+            touchscreen-size-x = <4096>;
+            touchscreen-size-y = <4096>;
+            touchscreen-max-pressure = <2048>;
+
+            ti,x-plate-ohms = <280>;
+            ti,esd-recovery-timeout-ms = <8000>;
+        };
+    };
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/gpio/gpio.h>
+    spi {
+        #address-cells = <1>;
+        #size-cells = <0>;
+        touchscreen@0 {
+            compatible = "ti,tsc2005";
+            spi-max-frequency = <6000000>;
+            reg = <0>;
+
+            vio-supply = <&vio>;
+
+            reset-gpios = <&gpio4 8 GPIO_ACTIVE_HIGH>; /* 104 */
+            interrupts-extended = <&gpio4 4 IRQ_TYPE_EDGE_RISING>; /* 100 */
+
+            touchscreen-fuzz-x = <4>;
+            touchscreen-fuzz-y = <7>;
+            touchscreen-fuzz-pressure = <2>;
+            touchscreen-size-x = <4096>;
+            touchscreen-size-y = <4096>;
+            touchscreen-max-pressure = <2048>;
+
+            ti,x-plate-ohms = <280>;
+            ti,esd-recovery-timeout-ms = <8000>;
+        };
+    };
diff --git a/Documentation/devicetree/bindings/input/touchscreen/tsc2005.txt b/Documentation/devicetree/bindings/input/touchscreen/tsc2005.txt
deleted file mode 100644 (file)
index b80c04b..0000000
+++ /dev/null
@@ -1,64 +0,0 @@
-* Texas Instruments tsc2004 and tsc2005 touchscreen controllers
-
-Required properties:
- - compatible                : "ti,tsc2004" or "ti,tsc2005"
- - reg                       : Device address
- - interrupts                : IRQ specifier
- - spi-max-frequency         : Maximum SPI clocking speed of the device
-                               (for tsc2005)
-
-Optional properties:
- - vio-supply                : Regulator specifier
- - reset-gpios               : GPIO specifier for the controller reset line
- - ti,x-plate-ohms           : integer, resistance of the touchscreen's X plates
-                               in ohm (defaults to 280)
- - ti,esd-recovery-timeout-ms : integer, if the touchscreen does not respond after
-                               the configured time (in milli seconds), the driver
-                               will reset it. This is disabled by default.
- - properties defined in touchscreen.txt
-
-Example:
-
-&i2c3 {
-       tsc2004@48 {
-               compatible = "ti,tsc2004";
-               reg = <0x48>;
-               vio-supply = <&vio>;
-
-               reset-gpios = <&gpio4 8 GPIO_ACTIVE_HIGH>;
-               interrupts-extended = <&gpio1 27 IRQ_TYPE_EDGE_RISING>;
-
-               touchscreen-fuzz-x = <4>;
-               touchscreen-fuzz-y = <7>;
-               touchscreen-fuzz-pressure = <2>;
-               touchscreen-size-x = <4096>;
-               touchscreen-size-y = <4096>;
-               touchscreen-max-pressure = <2048>;
-
-               ti,x-plate-ohms = <280>;
-               ti,esd-recovery-timeout-ms = <8000>;
-       };
-}
-
-&mcspi1 {
-       tsc2005@0 {
-               compatible = "ti,tsc2005";
-               spi-max-frequency = <6000000>;
-               reg = <0>;
-
-               vio-supply = <&vio>;
-
-               reset-gpios = <&gpio4 8 GPIO_ACTIVE_HIGH>; /* 104 */
-               interrupts-extended = <&gpio4 4 IRQ_TYPE_EDGE_RISING>; /* 100 */
-
-               touchscreen-fuzz-x = <4>;
-               touchscreen-fuzz-y = <7>;
-               touchscreen-fuzz-pressure = <2>;
-               touchscreen-size-x = <4096>;
-               touchscreen-size-y = <4096>;
-               touchscreen-max-pressure = <2048>;
-
-               ti,x-plate-ohms = <280>;
-               ti,esd-recovery-timeout-ms = <8000>;
-       };
-}
diff --git a/Documentation/devicetree/bindings/power/reset/qcom,pon.txt b/Documentation/devicetree/bindings/power/reset/qcom,pon.txt
deleted file mode 100644 (file)
index 0c0dc3a..0000000
+++ /dev/null
@@ -1,49 +0,0 @@
-Qualcomm PON Device
-
-The Power On device for Qualcomm PM8xxx is MFD supporting pwrkey
-and resin along with the Android reboot-mode.
-
-This DT node has pwrkey and resin as sub nodes.
-
-Required Properties:
--compatible: Must be one of:
-       "qcom,pm8916-pon"
-       "qcom,pms405-pon"
-       "qcom,pm8998-pon"
-
--reg: Specifies the physical address of the pon register
-
-Optional subnode:
--pwrkey: Specifies the subnode pwrkey and should follow the
- qcom,pm8941-pwrkey.txt description.
--resin: Specifies the subnode resin and should follow the
- qcom,pm8xxx-pwrkey.txt description.
-
-The rest of the properties should follow the generic reboot-mode description
-found in reboot-mode.txt
-
-Example:
-
-       pon@800 {
-               compatible = "qcom,pm8916-pon";
-
-               reg = <0x800>;
-               mode-bootloader = <0x2>;
-               mode-recovery = <0x1>;
-
-               pwrkey {
-                       compatible = "qcom,pm8941-pwrkey";
-                       interrupts = <0x0 0x8 0 IRQ_TYPE_EDGE_BOTH>;
-                       debounce = <15625>;
-                       bias-pull-up;
-                       linux,code = <KEY_POWER>;
-               };
-
-               resin {
-                       compatible = "qcom,pm8941-resin";
-                       interrupts = <0x0 0x8 1 IRQ_TYPE_EDGE_BOTH>;
-                       debounce = <15625>;
-                       bias-pull-up;
-                       linux,code = <KEY_VOLUMEDOWN>;
-               };
-       };
diff --git a/Documentation/devicetree/bindings/power/reset/qcom,pon.yaml b/Documentation/devicetree/bindings/power/reset/qcom,pon.yaml
new file mode 100644 (file)
index 0000000..353f155
--- /dev/null
@@ -0,0 +1,80 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/power/reset/qcom,pon.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm PON Device
+
+maintainers:
+  - Vinod Koul <vkoul@kernel.org>
+
+description: |
+  The Power On device for Qualcomm PM8xxx is MFD supporting pwrkey
+  and resin along with the Android reboot-mode.
+
+  This DT node has pwrkey and resin as sub nodes.
+
+allOf:
+  - $ref: reboot-mode.yaml#
+
+properties:
+  compatible:
+    enum:
+      - qcom,pm8916-pon
+      - qcom,pms405-pon
+      - qcom,pm8998-pon
+
+  reg:
+    maxItems: 1
+
+  pwrkey:
+    type: object
+    $ref: "../../input/qcom,pm8941-pwrkey.yaml#"
+
+  resin:
+    type: object
+    $ref: "../../input/qcom,pm8941-pwrkey.yaml#"
+
+required:
+  - compatible
+  - reg
+
+unevaluatedProperties: false
+
+examples:
+  - |
+   #include <dt-bindings/interrupt-controller/irq.h>
+   #include <dt-bindings/input/linux-event-codes.h>
+   #include <dt-bindings/spmi/spmi.h>
+   spmi_bus: spmi@c440000 {
+     reg = <0x0c440000 0x1100>;
+     #address-cells = <2>;
+     #size-cells = <0>;
+     pmk8350: pmic@0 {
+       reg = <0x0 SPMI_USID>;
+       #address-cells = <1>;
+       #size-cells = <0>;
+       pmk8350_pon: pon_hlos@1300 {
+         reg = <0x1300>;
+         compatible = "qcom,pm8998-pon";
+
+         pwrkey {
+            compatible = "qcom,pm8941-pwrkey";
+            interrupts = < 0x0 0x8 0 IRQ_TYPE_EDGE_BOTH >;
+            debounce = <15625>;
+            bias-pull-up;
+            linux,code = <KEY_POWER>;
+         };
+
+         resin {
+            compatible = "qcom,pm8941-resin";
+            interrupts = <0x0 0x8 1 IRQ_TYPE_EDGE_BOTH>;
+            debounce = <15625>;
+            bias-pull-up;
+            linux,code = <KEY_VOLUMEDOWN>;
+         };
+       };
+     };
+   };
+...
index 9c6fda6..ad0a0b9 100644 (file)
@@ -36,7 +36,7 @@ patternProperties:
   "^mode-.*$":
     $ref: /schemas/types.yaml#/definitions/uint32
 
-additionalProperties: false
+additionalProperties: true
 
 examples:
   - |
index 5596bee..81a54a4 100644 (file)
@@ -29,6 +29,7 @@ properties:
           - enum:
               - rockchip,px30-pwm
               - rockchip,rk3308-pwm
+              - rockchip,rk3568-pwm
           - const: rockchip,rk3328-pwm
 
   reg:
index 7548d87..13925bb 100644 (file)
@@ -32,6 +32,9 @@ properties:
       - dallas,ds3232
       # I2C-BUS INTERFACE REAL TIME CLOCK MODULE
       - epson,rx8010
+      # I2C-BUS INTERFACE REAL TIME CLOCK MODULE
+      - epson,rx8025
+      - epson,rx8035
       # I2C-BUS INTERFACE REAL TIME CLOCK MODULE with Battery Backed RAM
       - epson,rx8571
       # I2C-BUS INTERFACE REAL TIME CLOCK MODULE
index e555381..9952199 100644 (file)
@@ -84,9 +84,9 @@ unevaluatedProperties: false
 if:
   properties:
     compatible:
-      oneOf:
-        - const: ti,omap2-mcspi
-        - const: ti,omap4-mcspi
+      enum:
+        - ti,omap2-mcspi
+        - ti,omap4-mcspi
 
 then:
   properties:
index 593f769..03e5dca 100644 (file)
@@ -27,13 +27,11 @@ properties:
 
   xlnx,num-ss-bits:
     description: Number of chip selects used.
-    $ref: /schemas/types.yaml#/definitions/uint32
     minimum: 1
     maximum: 32
 
   xlnx,num-transfer-bits:
     description: Number of bits per transfer. This will be 8 if not specified.
-    $ref: /schemas/types.yaml#/definitions/uint32
     enum: [8, 16, 32]
     default: 8
 
diff --git a/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml b/Documentation/devicetree/bindings/thermal/qcom-lmh.yaml
new file mode 100644 (file)
index 0000000..289e9a8
--- /dev/null
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright 2021 Linaro Ltd.
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/thermal/qcom-lmh.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm Limits Management Hardware(LMh)
+
+maintainers:
+  - Thara Gopinath <thara.gopinath@linaro.org>
+
+description:
+  Limits Management Hardware(LMh) is a hardware infrastructure on some
+  Qualcomm SoCs that can enforce temperature and current limits as
+  programmed by software for certain IPs like CPU.
+
+properties:
+  compatible:
+    enum:
+      - qcom,sdm845-lmh
+
+  reg:
+    items:
+      - description: core registers
+
+  interrupts:
+    maxItems: 1
+
+  '#interrupt-cells':
+    const: 1
+
+  interrupt-controller: true
+
+  cpus:
+    description:
+      phandle of the first cpu in the LMh cluster
+    $ref: /schemas/types.yaml#/definitions/phandle
+
+  qcom,lmh-temp-arm-millicelsius:
+    description:
+      An integer expressing temperature threshold at which the LMh thermal
+      FSM is engaged.
+
+  qcom,lmh-temp-low-millicelsius:
+    description:
+      An integer expressing temperature threshold at which the state machine
+      will attempt to remove frequency throttling.
+
+  qcom,lmh-temp-high-millicelsius:
+    description:
+      An integer expressing temperature threshold at which the state machine
+      will attempt to throttle the frequency.
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - '#interrupt-cells'
+  - interrupt-controller
+  - cpus
+  - qcom,lmh-temp-arm-millicelsius
+  - qcom,lmh-temp-low-millicelsius
+  - qcom,lmh-temp-high-millicelsius
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    lmh@17d70800 {
+      compatible = "qcom,sdm845-lmh";
+      reg = <0x17d70800 0x400>;
+      interrupts = <GIC_SPI 33 IRQ_TYPE_LEVEL_HIGH>;
+      cpus = <&CPU4>;
+      qcom,lmh-temp-arm-millicelsius = <65000>;
+      qcom,lmh-temp-low-millicelsius = <94500>;
+      qcom,lmh-temp-high-millicelsius = <95000>;
+      interrupt-controller;
+      #interrupt-cells = <1>;
+    };
index 164f715..a07de5e 100644 (file)
@@ -215,7 +215,7 @@ patternProperties:
       - polling-delay
       - polling-delay-passive
       - thermal-sensors
-      - trips
+
     additionalProperties: false
 
 additionalProperties: false
index d465970..4b7a027 100644 (file)
@@ -36,7 +36,8 @@ required:
   - reg
   - interrupts
 
-additionalProperties: false
+additionalProperties:
+  type: object
 
 examples:
   - |
diff --git a/Documentation/devicetree/bindings/virtio/virtio-device.yaml b/Documentation/devicetree/bindings/virtio/virtio-device.yaml
new file mode 100644 (file)
index 0000000..1778ea9
--- /dev/null
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/virtio/virtio-device.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Virtio device bindings
+
+maintainers:
+  - Viresh Kumar <viresh.kumar@linaro.org>
+
+description:
+  These bindings are applicable to virtio devices irrespective of the bus they
+  are bound to, like mmio or pci.
+
+# We need a select here so we don't match all nodes with 'virtio,mmio'
+properties:
+  compatible:
+    pattern: "^virtio,device[0-9a-f]{1,8}$"
+    description: Virtio device nodes.
+      "virtio,deviceID", where ID is the virtio device id. The textual
+      representation of ID shall be in lower case hexadecimal with leading
+      zeroes suppressed.
+
+required:
+  - compatible
+
+additionalProperties: true
+
+examples:
+  - |
+    virtio@3000 {
+        compatible = "virtio,mmio";
+        reg = <0x3000 0x100>;
+        interrupts = <43>;
+
+        i2c {
+            compatible = "virtio,device22";
+        };
+    };
+...
index f2105ee..ab9641e 100644 (file)
@@ -15,13 +15,13 @@ maintainers:
 
 properties:
   compatible:
-    oneOf:
-      - const: maxim,max6369
-      - const: maxim,max6370
-      - const: maxim,max6371
-      - const: maxim,max6372
-      - const: maxim,max6373
-      - const: maxim,max6374
+    enum:
+      - maxim,max6369
+      - maxim,max6370
+      - maxim,max6371
+      - maxim,max6372
+      - maxim,max6373
+      - maxim,max6374
 
   reg:
     description: This is a 1-byte memory-mapped address
index 99cb6d7..2949c99 100644 (file)
@@ -22,7 +22,7 @@
     |    openrisc: | TODO |
     |      parisc: |  ok  |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: | TODO |
     |       sparc: | TODO |
index 7e5c04c..98db2ea 100644 (file)
@@ -71,9 +71,6 @@ Other Functions
 .. kernel-doc:: fs/fs-writeback.c
    :export:
 
-.. kernel-doc:: fs/block_dev.c
-   :export:
-
 .. kernel-doc:: fs/anon_inodes.c
    :export:
 
index d5a73fa..8126bea 100644 (file)
@@ -37,7 +37,7 @@ TTM initialization
     This section is outdated.
 
 Drivers wishing to support TTM must pass a filled :c:type:`ttm_bo_driver
-<ttm_bo_driver>` structure to ttm_bo_device_init, together with an
+<ttm_bo_driver>` structure to ttm_device_init, together with an
 initialized global reference to the memory manager.  The ttm_bo_driver
 structure contains several fields with function pointers for
 initializing the TTM, allocating and freeing memory, waiting for command
index 60ac091..8a2788a 100644 (file)
@@ -101,8 +101,7 @@ subsystems which use EM might rely on this flag to check if all EM devices use
 the same scale. If there are different scales, these subsystems might decide
 to: return warning/error, stop working or panic.
 See Section 3. for an example of driver implementing this
-callback, and kernel/power/energy_model.c for further documentation on this
-API.
+callback, or Section 2.4 for further documentation on this API
 
 
 2.3 Accessing performance domains
@@ -123,7 +122,17 @@ em_cpu_energy() API. The estimation is performed assuming that the schedutil
 CPUfreq governor is in use in case of CPU device. Currently this calculation is
 not provided for other type of devices.
 
-More details about the above APIs can be found in include/linux/energy_model.h.
+More details about the above APIs can be found in ``<linux/energy_model.h>``
+or in Section 2.4
+
+
+2.4 Description details of this API
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+.. kernel-doc:: include/linux/energy_model.h
+   :internal:
+
+.. kernel-doc:: kernel/power/energy_model.c
+   :export:
 
 
 3. Example driver
index 0b5eefe..c432be0 100644 (file)
@@ -27,6 +27,7 @@ place where this information is gathered.
    iommu
    media/index
    sysfs-platform_profile
+   vduse
 
 .. only::  subproject and html
 
index b7070d7..2e81340 100644 (file)
@@ -299,6 +299,7 @@ Code  Seq#    Include File                                           Comments
 'z'   10-4F  drivers/s390/crypto/zcrypt_api.h                        conflict!
 '|'   00-7F  linux/media.h
 0x80  00-1F  linux/fb.h
+0x81  00-1F  linux/vduse.h
 0x89  00-06  arch/x86/include/asm/sockios.h
 0x89  0B-DF  linux/sockios.h
 0x89  E0-EF  linux/sockios.h                                         SIOCPROTOPRIVATE range
diff --git a/Documentation/userspace-api/vduse.rst b/Documentation/userspace-api/vduse.rst
new file mode 100644 (file)
index 0000000..42ef59e
--- /dev/null
@@ -0,0 +1,233 @@
+==================================
+VDUSE - "vDPA Device in Userspace"
+==================================
+
+vDPA (virtio data path acceleration) device is a device that uses a
+datapath which complies with the virtio specifications with vendor
+specific control path. vDPA devices can be both physically located on
+the hardware or emulated by software. VDUSE is a framework that makes it
+possible to implement software-emulated vDPA devices in userspace. And
+to make the device emulation more secure, the emulated vDPA device's
+control path is handled in the kernel and only the data path is
+implemented in the userspace.
+
+Note that only virtio block device is supported by VDUSE framework now,
+which can reduce security risks when the userspace process that implements
+the data path is run by an unprivileged user. The support for other device
+types can be added after the security issue of corresponding device driver
+is clarified or fixed in the future.
+
+Create/Destroy VDUSE devices
+------------------------
+
+VDUSE devices are created as follows:
+
+1. Create a new VDUSE instance with ioctl(VDUSE_CREATE_DEV) on
+   /dev/vduse/control.
+
+2. Setup each virtqueue with ioctl(VDUSE_VQ_SETUP) on /dev/vduse/$NAME.
+
+3. Begin processing VDUSE messages from /dev/vduse/$NAME. The first
+   messages will arrive while attaching the VDUSE instance to vDPA bus.
+
+4. Send the VDPA_CMD_DEV_NEW netlink message to attach the VDUSE
+   instance to vDPA bus.
+
+VDUSE devices are destroyed as follows:
+
+1. Send the VDPA_CMD_DEV_DEL netlink message to detach the VDUSE
+   instance from vDPA bus.
+
+2. Close the file descriptor referring to /dev/vduse/$NAME.
+
+3. Destroy the VDUSE instance with ioctl(VDUSE_DESTROY_DEV) on
+   /dev/vduse/control.
+
+The netlink messages can be sent via vdpa tool in iproute2 or use the
+below sample codes:
+
+.. code-block:: c
+
+       static int netlink_add_vduse(const char *name, enum vdpa_command cmd)
+       {
+               struct nl_sock *nlsock;
+               struct nl_msg *msg;
+               int famid;
+
+               nlsock = nl_socket_alloc();
+               if (!nlsock)
+                       return -ENOMEM;
+
+               if (genl_connect(nlsock))
+                       goto free_sock;
+
+               famid = genl_ctrl_resolve(nlsock, VDPA_GENL_NAME);
+               if (famid < 0)
+                       goto close_sock;
+
+               msg = nlmsg_alloc();
+               if (!msg)
+                       goto close_sock;
+
+               if (!genlmsg_put(msg, NL_AUTO_PORT, NL_AUTO_SEQ, famid, 0, 0, cmd, 0))
+                       goto nla_put_failure;
+
+               NLA_PUT_STRING(msg, VDPA_ATTR_DEV_NAME, name);
+               if (cmd == VDPA_CMD_DEV_NEW)
+                       NLA_PUT_STRING(msg, VDPA_ATTR_MGMTDEV_DEV_NAME, "vduse");
+
+               if (nl_send_sync(nlsock, msg))
+                       goto close_sock;
+
+               nl_close(nlsock);
+               nl_socket_free(nlsock);
+
+               return 0;
+       nla_put_failure:
+               nlmsg_free(msg);
+       close_sock:
+               nl_close(nlsock);
+       free_sock:
+               nl_socket_free(nlsock);
+               return -1;
+       }
+
+How VDUSE works
+---------------
+
+As mentioned above, a VDUSE device is created by ioctl(VDUSE_CREATE_DEV) on
+/dev/vduse/control. With this ioctl, userspace can specify some basic configuration
+such as device name (uniquely identify a VDUSE device), virtio features, virtio
+configuration space, the number of virtqueues and so on for this emulated device.
+Then a char device interface (/dev/vduse/$NAME) is exported to userspace for device
+emulation. Userspace can use the VDUSE_VQ_SETUP ioctl on /dev/vduse/$NAME to
+add per-virtqueue configuration such as the max size of virtqueue to the device.
+
+After the initialization, the VDUSE device can be attached to vDPA bus via
+the VDPA_CMD_DEV_NEW netlink message. Userspace needs to read()/write() on
+/dev/vduse/$NAME to receive/reply some control messages from/to VDUSE kernel
+module as follows:
+
+.. code-block:: c
+
+       static int vduse_message_handler(int dev_fd)
+       {
+               int len;
+               struct vduse_dev_request req;
+               struct vduse_dev_response resp;
+
+               len = read(dev_fd, &req, sizeof(req));
+               if (len != sizeof(req))
+                       return -1;
+
+               resp.request_id = req.request_id;
+
+               switch (req.type) {
+
+               /* handle different types of messages */
+
+               }
+
+               len = write(dev_fd, &resp, sizeof(resp));
+               if (len != sizeof(resp))
+                       return -1;
+
+               return 0;
+       }
+
+There are now three types of messages introduced by VDUSE framework:
+
+- VDUSE_GET_VQ_STATE: Get the state for virtqueue, userspace should return
+  avail index for split virtqueue or the device/driver ring wrap counters and
+  the avail and used index for packed virtqueue.
+
+- VDUSE_SET_STATUS: Set the device status, userspace should follow
+  the virtio spec: https://docs.oasis-open.org/virtio/virtio/v1.1/virtio-v1.1.html
+  to process this message. For example, fail to set the FEATURES_OK device
+  status bit if the device can not accept the negotiated virtio features
+  get from the VDUSE_DEV_GET_FEATURES ioctl.
+
+- VDUSE_UPDATE_IOTLB: Notify userspace to update the memory mapping for specified
+  IOVA range, userspace should firstly remove the old mapping, then setup the new
+  mapping via the VDUSE_IOTLB_GET_FD ioctl.
+
+After DRIVER_OK status bit is set via the VDUSE_SET_STATUS message, userspace is
+able to start the dataplane processing as follows:
+
+1. Get the specified virtqueue's information with the VDUSE_VQ_GET_INFO ioctl,
+   including the size, the IOVAs of descriptor table, available ring and used ring,
+   the state and the ready status.
+
+2. Pass the above IOVAs to the VDUSE_IOTLB_GET_FD ioctl so that those IOVA regions
+   can be mapped into userspace. Some sample codes is shown below:
+
+.. code-block:: c
+
+       static int perm_to_prot(uint8_t perm)
+       {
+               int prot = 0;
+
+               switch (perm) {
+               case VDUSE_ACCESS_WO:
+                       prot |= PROT_WRITE;
+                       break;
+               case VDUSE_ACCESS_RO:
+                       prot |= PROT_READ;
+                       break;
+               case VDUSE_ACCESS_RW:
+                       prot |= PROT_READ | PROT_WRITE;
+                       break;
+               }
+
+               return prot;
+       }
+
+       static void *iova_to_va(int dev_fd, uint64_t iova, uint64_t *len)
+       {
+               int fd;
+               void *addr;
+               size_t size;
+               struct vduse_iotlb_entry entry;
+
+               entry.start = iova;
+               entry.last = iova;
+
+               /*
+                * Find the first IOVA region that overlaps with the specified
+                * range [start, last] and return the corresponding file descriptor.
+                */
+               fd = ioctl(dev_fd, VDUSE_IOTLB_GET_FD, &entry);
+               if (fd < 0)
+                       return NULL;
+
+               size = entry.last - entry.start + 1;
+               *len = entry.last - iova + 1;
+               addr = mmap(0, size, perm_to_prot(entry.perm), MAP_SHARED,
+                           fd, entry.offset);
+               close(fd);
+               if (addr == MAP_FAILED)
+                       return NULL;
+
+               /*
+                * Using some data structures such as linked list to store
+                * the iotlb mapping. The munmap(2) should be called for the
+                * cached mapping when the corresponding VDUSE_UPDATE_IOTLB
+                * message is received or the device is reset.
+                */
+
+               return addr + iova - entry.start;
+       }
+
+3. Setup the kick eventfd for the specified virtqueues with the VDUSE_VQ_SETUP_KICKFD
+   ioctl. The kick eventfd is used by VDUSE kernel module to notify userspace to
+   consume the available ring. This is optional since userspace can choose to poll the
+   available ring instead.
+
+4. Listen to the kick eventfd (optional) and consume the available ring. The buffer
+   described by the descriptors in the descriptor table should be also mapped into
+   userspace via the VDUSE_IOTLB_GET_FD ioctl before accessing.
+
+5. Inject an interrupt for specific virtqueue with the VDUSE_INJECT_VQ_IRQ ioctl
+   after the used ring is filled.
+
+For more details on the uAPI, please see include/uapi/linux/vduse.h.
index 5ec52be..eeb4c70 100644 (file)
@@ -333,7 +333,7 @@ S:  Maintained
 F:     drivers/platform/x86/acer-wmi.c
 
 ACPI
-M:     "Rafael J. Wysocki" <rjw@rjwysocki.net>
+M:     "Rafael J. Wysocki" <rafael@kernel.org>
 M:     Len Brown <lenb@kernel.org>
 L:     linux-acpi@vger.kernel.org
 S:     Supported
@@ -354,7 +354,7 @@ F:  include/linux/fwnode.h
 F:     tools/power/acpi/
 
 ACPI APEI
-M:     "Rafael J. Wysocki" <rjw@rjwysocki.net>
+M:     "Rafael J. Wysocki" <rafael@kernel.org>
 M:     Len Brown <lenb@kernel.org>
 R:     James Morse <james.morse@arm.com>
 R:     Tony Luck <tony.luck@intel.com>
@@ -364,7 +364,6 @@ F:  drivers/acpi/apei/
 
 ACPI COMPONENT ARCHITECTURE (ACPICA)
 M:     Robert Moore <robert.moore@intel.com>
-M:     Erik Kaneda <erik.kaneda@intel.com>
 M:     "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
 L:     linux-acpi@vger.kernel.org
 L:     devel@acpica.org
@@ -403,7 +402,7 @@ S:  Maintained
 F:     drivers/platform/x86/i2c-multi-instantiate.c
 
 ACPI PMIC DRIVERS
-M:     "Rafael J. Wysocki" <rjw@rjwysocki.net>
+M:     "Rafael J. Wysocki" <rafael@kernel.org>
 M:     Len Brown <lenb@kernel.org>
 R:     Andy Shevchenko <andy@kernel.org>
 R:     Mika Westerberg <mika.westerberg@linux.intel.com>
@@ -3314,7 +3313,6 @@ S:        Maintained
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
 F:     block/
 F:     drivers/block/
-F:     fs/block_dev.c
 F:     include/linux/blk*
 F:     kernel/trace/blktrace.c
 F:     lib/sbitmap.c
@@ -4686,6 +4684,7 @@ F:        drivers/platform/x86/compal-laptop.c
 
 COMPILER ATTRIBUTES
 M:     Miguel Ojeda <ojeda@kernel.org>
+R:     Nick Desaulniers <ndesaulniers@google.com>
 S:     Maintained
 F:     include/linux/compiler_attributes.h
 
@@ -4827,7 +4826,7 @@ W:        http://www.arm.com/products/processors/technologies/biglittleprocessing.php
 F:     drivers/cpufreq/vexpress-spc-cpufreq.c
 
 CPU FREQUENCY SCALING FRAMEWORK
-M:     "Rafael J. Wysocki" <rjw@rjwysocki.net>
+M:     "Rafael J. Wysocki" <rafael@kernel.org>
 M:     Viresh Kumar <viresh.kumar@linaro.org>
 L:     linux-pm@vger.kernel.org
 S:     Maintained
@@ -4845,7 +4844,7 @@ F:        kernel/sched/cpufreq*.c
 F:     tools/testing/selftests/cpufreq/
 
 CPU IDLE TIME MANAGEMENT FRAMEWORK
-M:     "Rafael J. Wysocki" <rjw@rjwysocki.net>
+M:     "Rafael J. Wysocki" <rafael@kernel.org>
 M:     Daniel Lezcano <daniel.lezcano@linaro.org>
 L:     linux-pm@vger.kernel.org
 S:     Maintained
@@ -7591,7 +7590,7 @@ W:        ftp://ftp.openlinux.org/pub/people/hch/vxfs
 F:     fs/freevxfs/
 
 FREEZER
-M:     "Rafael J. Wysocki" <rjw@rjwysocki.net>
+M:     "Rafael J. Wysocki" <rafael@kernel.org>
 M:     Pavel Machek <pavel@ucw.cz>
 L:     linux-pm@vger.kernel.org
 S:     Supported
@@ -7844,7 +7843,7 @@ S:        Supported
 F:     drivers/i2c/muxes/i2c-demux-pinctrl.c
 
 GENERIC PM DOMAINS
-M:     "Rafael J. Wysocki" <rjw@rjwysocki.net>
+M:     "Rafael J. Wysocki" <rafael@kernel.org>
 M:     Kevin Hilman <khilman@kernel.org>
 M:     Ulf Hansson <ulf.hansson@linaro.org>
 L:     linux-pm@vger.kernel.org
@@ -8310,7 +8309,7 @@ W:        http://drama.obuda.kando.hu/~fero/cgi-bin/hgafb.shtml
 F:     drivers/video/fbdev/hgafb.c
 
 HIBERNATION (aka Software Suspend, aka swsusp)
-M:     "Rafael J. Wysocki" <rjw@rjwysocki.net>
+M:     "Rafael J. Wysocki" <rafael@kernel.org>
 M:     Pavel Machek <pavel@ucw.cz>
 L:     linux-pm@vger.kernel.org
 S:     Supported
@@ -10623,10 +10622,10 @@ T:    git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
 F:     drivers/ata/sata_promise.*
 
 LIBATA SUBSYSTEM (Serial and Parallel ATA drivers)
-M:     Jens Axboe <axboe@kernel.dk>
+M:     Damien Le Moal <damien.lemoal@opensource.wdc.com>
 L:     linux-ide@vger.kernel.org
 S:     Maintained
-T:     git git://git.kernel.org/pub/scm/linux/kernel/git/axboe/linux-block.git
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/dlemoal/libata.git
 F:     Documentation/devicetree/bindings/ata/
 F:     drivers/ata/
 F:     include/linux/ata.h
@@ -13410,7 +13409,7 @@ F:      include/linux/nvme-fc.h
 NVM EXPRESS TARGET DRIVER
 M:     Christoph Hellwig <hch@lst.de>
 M:     Sagi Grimberg <sagi@grimberg.me>
-M:     Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
+M:     Chaitanya Kulkarni <kch@nvidia.com>
 L:     linux-nvme@lists.infradead.org
 S:     Supported
 W:     http://git.infradead.org/nvme.git
@@ -14969,7 +14968,7 @@ F:      kernel/time/*timer*
 F:     kernel/time/namespace.c
 
 POWER MANAGEMENT CORE
-M:     "Rafael J. Wysocki" <rjw@rjwysocki.net>
+M:     "Rafael J. Wysocki" <rafael@kernel.org>
 L:     linux-pm@vger.kernel.org
 S:     Supported
 B:     https://bugzilla.kernel.org
@@ -17947,7 +17946,7 @@ F:      arch/sh/
 F:     drivers/sh/
 
 SUSPEND TO RAM
-M:     "Rafael J. Wysocki" <rjw@rjwysocki.net>
+M:     "Rafael J. Wysocki" <rafael@kernel.org>
 M:     Len Brown <len.brown@intel.com>
 M:     Pavel Machek <pavel@ucw.cz>
 L:     linux-pm@vger.kernel.org
@@ -18567,6 +18566,7 @@ F:      drivers/thermal/
 F:     include/linux/cpu_cooling.h
 F:     include/linux/thermal.h
 F:     include/uapi/linux/thermal.h
+F:     tools/thermal/
 
 THERMAL DRIVER FOR AMLOGIC SOCS
 M:     Guillaume La Roque <glaroque@baylibre.com>
index 7fa6828..587543c 100644 (file)
@@ -43,7 +43,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf,
        this_leaf->type = type;
 }
 
-static int __init_cache_level(unsigned int cpu)
+int init_cache_level(unsigned int cpu)
 {
        unsigned int ctype, level, leaves, fw_level;
        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
@@ -78,7 +78,7 @@ static int __init_cache_level(unsigned int cpu)
        return 0;
 }
 
-static int __populate_cache_leaves(unsigned int cpu)
+int populate_cache_leaves(unsigned int cpu)
 {
        unsigned int level, idx;
        enum cache_type type;
@@ -97,6 +97,3 @@ static int __populate_cache_leaves(unsigned int cpu)
        }
        return 0;
 }
-
-DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
-DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
index b16be52..37a8175 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/crash_dump.h>
 #include <linux/hugetlb.h>
 #include <linux/acpi_iort.h>
+#include <linux/kmemleak.h>
 
 #include <asm/boot.h>
 #include <asm/fixmap.h>
@@ -101,6 +102,11 @@ static void __init reserve_crashkernel(void)
        pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
                crash_base, crash_base + crash_size, crash_size >> 20);
 
+       /*
+        * The crashkernel memory will be removed from the kernel linear
+        * map. Inform kmemleak so that it won't try to access it.
+        */
+       kmemleak_ignore_phys(crash_base);
        crashk_res.start = crash_base;
        crashk_res.end = crash_base + crash_size - 1;
 }
@@ -222,7 +228,21 @@ early_param("mem", early_mem);
 
 void __init arm64_memblock_init(void)
 {
-       const s64 linear_region_size = PAGE_END - _PAGE_OFFSET(vabits_actual);
+       s64 linear_region_size = PAGE_END - _PAGE_OFFSET(vabits_actual);
+
+       /*
+        * Corner case: 52-bit VA capable systems running KVM in nVHE mode may
+        * be limited in their ability to support a linear map that exceeds 51
+        * bits of VA space, depending on the placement of the ID map. Given
+        * that the placement of the ID map may be randomized, let's simply
+        * limit the kernel's linear map to 51 bits as well if we detect this
+        * configuration.
+        */
+       if (IS_ENABLED(CONFIG_KVM) && vabits_actual == 52 &&
+           is_hyp_mode_available() && !is_kernel_in_hyp_mode()) {
+               pr_info("Capping linear region to 51 bits for KVM in nVHE mode on LVA capable hardware.\n");
+               linear_region_size = min_t(u64, linear_region_size, BIT(51));
+       }
 
        /* Remove memory above our supported physical address size */
        memblock_remove(1ULL << PHYS_MASK_SHIFT, ULLONG_MAX);
index 53d8ea7..495dd05 100644 (file)
@@ -17,7 +17,7 @@ do {                                                          \
        leaf++;                                                 \
 } while (0)
 
-static int __init_cache_level(unsigned int cpu)
+int init_cache_level(unsigned int cpu)
 {
        struct cpuinfo_mips *c = &current_cpu_data;
        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
@@ -74,7 +74,7 @@ static void fill_cpumask_cluster(int cpu, cpumask_t *cpu_map)
                        cpumask_set_cpu(cpu1, cpu_map);
 }
 
-static int __populate_cache_leaves(unsigned int cpu)
+int populate_cache_leaves(unsigned int cpu)
 {
        struct cpuinfo_mips *c = &current_cpu_data;
        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
@@ -114,6 +114,3 @@ static int __populate_cache_leaves(unsigned int cpu)
 
        return 0;
 }
-
-DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
-DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
index 3001a7d..4742b6f 100644 (file)
@@ -10,7 +10,6 @@ config PARISC
        select ARCH_HAS_ELF_RANDOMIZE
        select ARCH_HAS_STRICT_KERNEL_RWX
        select ARCH_HAS_UBSAN_SANITIZE_ALL
-       select ARCH_HAS_STRNLEN_USER
        select ARCH_NO_SG_CHAIN
        select ARCH_SUPPORTS_HUGETLBFS if PA20
        select ARCH_SUPPORTS_MEMORY_FAILURE
@@ -65,7 +64,6 @@ config PARISC
        select HAVE_KPROBES_ON_FTRACE
        select HAVE_DYNAMIC_FTRACE_WITH_REGS
        select HAVE_SOFTIRQ_ON_OWN_STACK if IRQSTACKS
-       select SET_FS
        select TRACE_IRQFLAGS_SUPPORT
 
        help
index dff4536..9fe5487 100644 (file)
@@ -26,7 +26,7 @@ endif
 OBJECTS += $(obj)/head.o $(obj)/real2.o $(obj)/firmware.o $(obj)/misc.o $(obj)/piggy.o
 
 LDFLAGS_vmlinux := -X -e startup --as-needed -T
-$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS) $(LIBGCC)
+$(obj)/vmlinux: $(obj)/vmlinux.lds $(OBJECTS) $(LIBGCC) FORCE
        $(call if_changed,ld)
 
 sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\|parisc_kernel_start\)$$/\#define SZ\2 0x\1/p'
@@ -34,7 +34,7 @@ sed-sizes := -e 's/^\([0-9a-fA-F]*\) . \(__bss_start\|_end\|parisc_kernel_start\
 quiet_cmd_sizes = GEN $@
       cmd_sizes = $(NM) $< | sed -n $(sed-sizes) > $@
 
-$(obj)/sizes.h: vmlinux
+$(obj)/sizes.h: vmlinux FORCE
        $(call if_changed,sizes)
 
 AFLAGS_head.o += -I$(objtree)/$(obj) -DBOOTLOADER
@@ -70,19 +70,19 @@ suffix-$(CONFIG_KERNEL_LZMA)  := lzma
 suffix-$(CONFIG_KERNEL_LZO)  := lzo
 suffix-$(CONFIG_KERNEL_XZ)  := xz
 
-$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.gz: $(vmlinux.bin.all-y) FORCE
        $(call if_changed,gzip)
-$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.bz2: $(vmlinux.bin.all-y) FORCE
        $(call if_changed,bzip2)
-$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.lz4: $(vmlinux.bin.all-y) FORCE
        $(call if_changed,lz4)
-$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.lzma: $(vmlinux.bin.all-y) FORCE
        $(call if_changed,lzma)
-$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.lzo: $(vmlinux.bin.all-y) FORCE
        $(call if_changed,lzo)
-$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y)
+$(obj)/vmlinux.bin.xz: $(vmlinux.bin.all-y) FORCE
        $(call if_changed,xzkern)
 
 LDFLAGS_piggy.o := -r --format binary --oformat $(LD_BFD) -T
-$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y)
+$(obj)/piggy.o: $(obj)/vmlinux.scr $(obj)/vmlinux.bin.$(suffix-y) FORCE
        $(call if_changed,ld)
index b5fbcd2..eeb7da0 100644 (file)
@@ -101,10 +101,6 @@ DECLARE_PER_CPU(struct cpuinfo_parisc, cpu_data);
 
 #define CPU_HVERSION ((boot_cpu_data.hversion >> 4) & 0x0FFF)
 
-typedef struct {
-       int seg;  
-} mm_segment_t;
-
 #define ARCH_MIN_TASKALIGN     8
 
 struct thread_struct {
index 2b3010a..4b9e3d7 100644 (file)
@@ -2,7 +2,7 @@
 #ifndef _ASM_PARISC_RT_SIGFRAME_H
 #define _ASM_PARISC_RT_SIGFRAME_H
 
-#define SIGRETURN_TRAMP 4
+#define SIGRETURN_TRAMP 3
 #define SIGRESTARTBLOCK_TRAMP 5 
 #define TRAMP_SIZE (SIGRETURN_TRAMP + SIGRESTARTBLOCK_TRAMP)
 
index 0bd38a9..00ad50f 100644 (file)
@@ -11,7 +11,6 @@
 struct thread_info {
        struct task_struct *task;       /* main task structure */
        unsigned long flags;            /* thread_info flags (see TIF_*) */
-       mm_segment_t addr_limit;        /* user-level address space limit */
        __u32 cpu;                      /* current CPU */
        int preempt_count;              /* 0=premptable, <0=BUG; will also serve as bh-counter */
 };
@@ -21,7 +20,6 @@ struct thread_info {
        .task           = &tsk,                 \
        .flags          = 0,                    \
        .cpu            = 0,                    \
-       .addr_limit     = KERNEL_DS,            \
        .preempt_count  = INIT_PREEMPT_COUNT,   \
 }
 
index 7c13314..192ad9e 100644 (file)
 #include <linux/bug.h>
 #include <linux/string.h>
 
-#define KERNEL_DS      ((mm_segment_t){0})
-#define USER_DS        ((mm_segment_t){1})
-
-#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
-
-#define get_fs()       (current_thread_info()->addr_limit)
-#define set_fs(x)      (current_thread_info()->addr_limit = (x))
-
 /*
  * Note that since kernel addresses are in a separate address space on
  * parisc, we don't need to do anything for access_ok().
 #define get_user __get_user
 
 #if !defined(CONFIG_64BIT)
-#define LDD_USER(val, ptr)     __get_user_asm64(val, ptr)
-#define STD_USER(x, ptr)       __put_user_asm64(x, ptr)
+#define LDD_USER(sr, val, ptr) __get_user_asm64(sr, val, ptr)
+#define STD_USER(sr, x, ptr)   __put_user_asm64(sr, x, ptr)
 #else
-#define LDD_USER(val, ptr)     __get_user_asm(val, "ldd", ptr)
-#define STD_USER(x, ptr)       __put_user_asm("std", x, ptr)
+#define LDD_USER(sr, val, ptr) __get_user_asm(sr, val, "ldd", ptr)
+#define STD_USER(sr, x, ptr)   __put_user_asm(sr, "std", x, ptr)
 #endif
 
 /*
@@ -67,28 +59,15 @@ struct exception_table_entry {
 #define ASM_EXCEPTIONTABLE_ENTRY_EFAULT( fault_addr, except_addr )\
        ASM_EXCEPTIONTABLE_ENTRY( fault_addr, except_addr + 1)
 
-/*
- * load_sr2() preloads the space register %%sr2 - based on the value of
- * get_fs() - with either a value of 0 to access kernel space (KERNEL_DS which
- * is 0), or with the current value of %%sr3 to access user space (USER_DS)
- * memory. The following __get_user_asm() and __put_user_asm() functions have
- * %%sr2 hard-coded to access the requested memory.
- */
-#define load_sr2() \
-       __asm__(" or,=  %0,%%r0,%%r0\n\t"       \
-               " mfsp %%sr3,%0\n\t"            \
-               " mtsp %0,%%sr2\n\t"            \
-               : : "r"(get_fs()) : )
-
-#define __get_user_internal(val, ptr)                  \
+#define __get_user_internal(sr, val, ptr)              \
 ({                                                     \
        register long __gu_err __asm__ ("r8") = 0;      \
                                                        \
        switch (sizeof(*(ptr))) {                       \
-       case 1: __get_user_asm(val, "ldb", ptr); break; \
-       case 2: __get_user_asm(val, "ldh", ptr); break; \
-       case 4: __get_user_asm(val, "ldw", ptr); break; \
-       case 8: LDD_USER(val, ptr); break;              \
+       case 1: __get_user_asm(sr, val, "ldb", ptr); break; \
+       case 2: __get_user_asm(sr, val, "ldh", ptr); break; \
+       case 4: __get_user_asm(sr, val, "ldw", ptr); break; \
+       case 8: LDD_USER(sr, val, ptr); break;          \
        default: BUILD_BUG();                           \
        }                                               \
                                                        \
@@ -97,15 +76,14 @@ struct exception_table_entry {
 
 #define __get_user(val, ptr)                           \
 ({                                                     \
-       load_sr2();                                     \
-       __get_user_internal(val, ptr);                  \
+       __get_user_internal("%%sr3,", val, ptr);        \
 })
 
-#define __get_user_asm(val, ldx, ptr)                  \
+#define __get_user_asm(sr, val, ldx, ptr)              \
 {                                                      \
        register long __gu_val;                         \
                                                        \
-       __asm__("1: " ldx " 0(%%sr2,%2),%0\n"           \
+       __asm__("1: " ldx " 0(" sr "%2),%0\n"           \
                "9:\n"                                  \
                ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
                : "=r"(__gu_val), "=r"(__gu_err)        \
@@ -114,9 +92,22 @@ struct exception_table_entry {
        (val) = (__force __typeof__(*(ptr))) __gu_val;  \
 }
 
+#define HAVE_GET_KERNEL_NOFAULT
+#define __get_kernel_nofault(dst, src, type, err_label)        \
+{                                                      \
+       type __z;                                       \
+       long __err;                                     \
+       __err = __get_user_internal("%%sr0,", __z, (type *)(src)); \
+       if (unlikely(__err))                            \
+               goto err_label;                         \
+       else                                            \
+               *(type *)(dst) = __z;                   \
+}
+
+
 #if !defined(CONFIG_64BIT)
 
-#define __get_user_asm64(val, ptr)                     \
+#define __get_user_asm64(sr, val, ptr)                 \
 {                                                      \
        union {                                         \
                unsigned long long      l;              \
@@ -124,8 +115,8 @@ struct exception_table_entry {
        } __gu_tmp;                                     \
                                                        \
        __asm__("   copy %%r0,%R0\n"                    \
-               "1: ldw 0(%%sr2,%2),%0\n"               \
-               "2: ldw 4(%%sr2,%2),%R0\n"              \
+               "1: ldw 0(" sr "%2),%0\n"               \
+               "2: ldw 4(" sr "%2),%R0\n"              \
                "9:\n"                                  \
                ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \
                ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \
@@ -138,16 +129,16 @@ struct exception_table_entry {
 #endif /* !defined(CONFIG_64BIT) */
 
 
-#define __put_user_internal(x, ptr)                            \
+#define __put_user_internal(sr, x, ptr)                                \
 ({                                                             \
        register long __pu_err __asm__ ("r8") = 0;              \
         __typeof__(*(ptr)) __x = (__typeof__(*(ptr)))(x);      \
                                                                \
        switch (sizeof(*(ptr))) {                               \
-       case 1: __put_user_asm("stb", __x, ptr); break;         \
-       case 2: __put_user_asm("sth", __x, ptr); break;         \
-       case 4: __put_user_asm("stw", __x, ptr); break;         \
-       case 8: STD_USER(__x, ptr); break;                      \
+       case 1: __put_user_asm(sr, "stb", __x, ptr); break;     \
+       case 2: __put_user_asm(sr, "sth", __x, ptr); break;     \
+       case 4: __put_user_asm(sr, "stw", __x, ptr); break;     \
+       case 8: STD_USER(sr, __x, ptr); break;                  \
        default: BUILD_BUG();                                   \
        }                                                       \
                                                                \
@@ -156,10 +147,20 @@ struct exception_table_entry {
 
 #define __put_user(x, ptr)                                     \
 ({                                                             \
-       load_sr2();                                             \
-       __put_user_internal(x, ptr);                            \
+       __put_user_internal("%%sr3,", x, ptr);                  \
 })
 
+#define __put_kernel_nofault(dst, src, type, err_label)                \
+{                                                              \
+       type __z = *(type *)(src);                              \
+       long __err;                                             \
+       __err = __put_user_internal("%%sr0,", __z, (type *)(dst)); \
+       if (unlikely(__err))                                    \
+               goto err_label;                                 \
+}
+
+
+
 
 /*
  * The "__put_user/kernel_asm()" macros tell gcc they read from memory
@@ -170,26 +171,26 @@ struct exception_table_entry {
  * r8 is already listed as err.
  */
 
-#define __put_user_asm(stx, x, ptr)                         \
-       __asm__ __volatile__ (                              \
-               "1: " stx " %2,0(%%sr2,%1)\n"               \
-               "9:\n"                                      \
-               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)     \
-               : "=r"(__pu_err)                            \
+#define __put_user_asm(sr, stx, x, ptr)                                \
+       __asm__ __volatile__ (                                  \
+               "1: " stx " %2,0(" sr "%1)\n"                   \
+               "9:\n"                                          \
+               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)         \
+               : "=r"(__pu_err)                                \
                : "r"(ptr), "r"(x), "0"(__pu_err))
 
 
 #if !defined(CONFIG_64BIT)
 
-#define __put_user_asm64(__val, ptr) do {                  \
-       __asm__ __volatile__ (                              \
-               "1: stw %2,0(%%sr2,%1)\n"                   \
-               "2: stw %R2,4(%%sr2,%1)\n"                  \
-               "9:\n"                                      \
-               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)     \
-               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b)     \
-               : "=r"(__pu_err)                            \
-               : "r"(ptr), "r"(__val), "0"(__pu_err));     \
+#define __put_user_asm64(sr, __val, ptr) do {                  \
+       __asm__ __volatile__ (                                  \
+               "1: stw %2,0(" sr "%1)\n"                       \
+               "2: stw %R2,4(" sr "%1)\n"                      \
+               "9:\n"                                          \
+               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b)         \
+               ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b)         \
+               : "=r"(__pu_err)                                \
+               : "r"(ptr), "r"(__val), "0"(__pu_err));         \
 } while (0)
 
 #endif /* !defined(CONFIG_64BIT) */
@@ -200,14 +201,12 @@ struct exception_table_entry {
  */
 
 extern long strncpy_from_user(char *, const char __user *, long);
-extern unsigned lclear_user(void __user *, unsigned long);
-extern long lstrnlen_user(const char __user *, long);
+extern __must_check unsigned lclear_user(void __user *, unsigned long);
+extern __must_check long strnlen_user(const char __user *src, long n);
 /*
  * Complex access routines -- macros
  */
-#define user_addr_max() (~0UL)
 
-#define strnlen_user lstrnlen_user
 #define clear_user lclear_user
 #define __clear_user lclear_user
 
index 33113ba..22924a3 100644 (file)
@@ -230,7 +230,6 @@ int main(void)
        DEFINE(TI_TASK, offsetof(struct thread_info, task));
        DEFINE(TI_FLAGS, offsetof(struct thread_info, flags));
        DEFINE(TI_CPU, offsetof(struct thread_info, cpu));
-       DEFINE(TI_SEGMENT, offsetof(struct thread_info, addr_limit));
        DEFINE(TI_PRE_COUNT, offsetof(struct thread_info, preempt_count));
        DEFINE(THREAD_SZ, sizeof(struct thread_info));
        /* THREAD_SZ_ALGN includes space for a stack frame. */
index e8a6a75..00297e8 100644 (file)
@@ -32,7 +32,6 @@ EXPORT_SYMBOL(__xchg64);
 
 #include <linux/uaccess.h>
 EXPORT_SYMBOL(lclear_user);
-EXPORT_SYMBOL(lstrnlen_user);
 
 #ifndef CONFIG_64BIT
 /* Needed so insmod can set dp value */
index 3fb86ee..cceb098 100644 (file)
@@ -150,8 +150,6 @@ void __init setup_arch(char **cmdline_p)
 #ifdef CONFIG_PA11
        dma_ops_init();
 #endif
-
-       clear_sched_clock_stable();
 }
 
 /*
index db1a47c..bbfe23c 100644 (file)
@@ -237,18 +237,22 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 #endif
        
        usp = (regs->gr[30] & ~(0x01UL));
+       sigframe_size = PARISC_RT_SIGFRAME_SIZE;
 #ifdef CONFIG_64BIT
        if (is_compat_task()) {
                /* The gcc alloca implementation leaves garbage in the upper 32 bits of sp */
                usp = (compat_uint_t)usp;
+               sigframe_size = PARISC_RT_SIGFRAME_SIZE32;
        }
 #endif
-       /*FIXME: frame_size parameter is unused, remove it. */
-       frame = get_sigframe(&ksig->ka, usp, sizeof(*frame));
+       frame = get_sigframe(&ksig->ka, usp, sigframe_size);
 
        DBG(1,"SETUP_RT_FRAME: START\n");
        DBG(1,"setup_rt_frame: frame %p info %p\n", frame, ksig->info);
 
+       start = (unsigned long) frame;
+       if (start >= user_addr_max() - sigframe_size)
+               return -EFAULT;
        
 #ifdef CONFIG_64BIT
 
@@ -284,32 +288,21 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
           already in userspace. The first words of tramp are used to
           save the previous sigrestartblock trampoline that might be
           on the stack. We start the sigreturn trampoline at 
-          SIGRESTARTBLOCK_TRAMP+X. */
+          SIGRESTARTBLOCK_TRAMP. */
        err |= __put_user(in_syscall ? INSN_LDI_R25_1 : INSN_LDI_R25_0,
                        &frame->tramp[SIGRESTARTBLOCK_TRAMP+0]);
-       err |= __put_user(INSN_LDI_R20, 
-                       &frame->tramp[SIGRESTARTBLOCK_TRAMP+1]);
        err |= __put_user(INSN_BLE_SR2_R0, 
+                       &frame->tramp[SIGRESTARTBLOCK_TRAMP+1]);
+       err |= __put_user(INSN_LDI_R20,
                        &frame->tramp[SIGRESTARTBLOCK_TRAMP+2]);
-       err |= __put_user(INSN_NOP, &frame->tramp[SIGRESTARTBLOCK_TRAMP+3]);
-
-#if DEBUG_SIG
-       /* Assert that we're flushing in the correct space... */
-       {
-               unsigned long sid;
-               asm ("mfsp %%sr3,%0" : "=r" (sid));
-               DBG(1,"setup_rt_frame: Flushing 64 bytes at space %#x offset %p\n",
-                      sid, frame->tramp);
-       }
-#endif
 
-       start = (unsigned long) &frame->tramp[0];
-       end = (unsigned long) &frame->tramp[TRAMP_SIZE];
+       start = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+0];
+       end = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP+3];
        flush_user_dcache_range_asm(start, end);
        flush_user_icache_range_asm(start, end);
 
        /* TRAMP Words 0-4, Length 5 = SIGRESTARTBLOCK_TRAMP
-        * TRAMP Words 5-9, Length 4 = SIGRETURN_TRAMP
+        * TRAMP Words 5-7, Length 3 = SIGRETURN_TRAMP
         * So the SIGRETURN_TRAMP is at the end of SIGRESTARTBLOCK_TRAMP
         */
        rp = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP];
@@ -353,11 +346,6 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs,
 
        /* The syscall return path will create IAOQ values from r31.
         */
-       sigframe_size = PARISC_RT_SIGFRAME_SIZE;
-#ifdef CONFIG_64BIT
-       if (is_compat_task())
-               sigframe_size = PARISC_RT_SIGFRAME_SIZE32;
-#endif
        if (in_syscall) {
                regs->gr[31] = haddr;
 #ifdef CONFIG_64BIT
@@ -501,7 +489,6 @@ syscall_restart(struct pt_regs *regs, struct k_sigaction *ka)
                DBG(1,"ERESTARTNOHAND: returning -EINTR\n");
                regs->gr[28] = -EINTR;
                break;
-
        case -ERESTARTSYS:
                if (!(ka->sa.sa_flags & SA_RESTART)) {
                        DBG(1,"ERESTARTSYS: putting -EINTR\n");
@@ -529,6 +516,10 @@ insert_restart_trampoline(struct pt_regs *regs)
                unsigned long end  = (unsigned long) &usp[5];
                long err = 0;
 
+               /* check that we don't exceed the stack */
+               if (A(&usp[0]) >= user_addr_max() - 5 * sizeof(int))
+                       return;
+
                /* Setup a trampoline to restart the syscall
                 * with __NR_restart_syscall
                 *
@@ -569,10 +560,6 @@ insert_restart_trampoline(struct pt_regs *regs)
 }
 
 /*
- * Note that 'init' is a special process: it doesn't get signals it doesn't
- * want to handle. Thus you cannot kill init even with a SIGKILL even by
- * mistake.
- *
  * We need to be able to restore the syscall arguments (r21-r26) to
  * restart syscalls.  Thus, the syscall path should save them in the
  * pt_regs structure (it's okay to do so since they are caller-save
index f166250..a5bdbb5 100644 (file)
@@ -36,7 +36,7 @@ struct compat_regfile {
         compat_int_t rf_sar;
 };
 
-#define COMPAT_SIGRETURN_TRAMP 4
+#define COMPAT_SIGRETURN_TRAMP 3
 #define COMPAT_SIGRESTARTBLOCK_TRAMP 5
 #define COMPAT_TRAMP_SIZE (COMPAT_SIGRETURN_TRAMP + \
                                COMPAT_SIGRESTARTBLOCK_TRAMP)
index 08e4d48..9fb1e79 100644 (file)
@@ -265,6 +265,9 @@ static int __init init_cr16_clocksource(void)
                            (cpu0_loc == per_cpu(cpu_data, cpu).cpu_loc))
                                continue;
 
+                       /* mark sched_clock unstable */
+                       clear_sched_clock_stable();
+
                        clocksource_cr16.name = "cr16_unstable";
                        clocksource_cr16.flags = CLOCK_SOURCE_UNSTABLE;
                        clocksource_cr16.rating = 0;
@@ -272,10 +275,6 @@ static int __init init_cr16_clocksource(void)
                }
        }
 
-       /* XXX: We may want to mark sched_clock stable here if cr16 clocks are
-        *      in sync:
-        *      (clocksource_cr16.flags == CLOCK_SOURCE_IS_CONTINUOUS) */
-
        /* register at clocksource framework */
        clocksource_register_hz(&clocksource_cr16,
                100 * PAGE0->mem_10msec);
index 36d6a86..b428d29 100644 (file)
 #include <linux/linkage.h>
 
        /*
-        * get_sr gets the appropriate space value into
-        * sr1 for kernel/user space access, depending
-        * on the flag stored in the task structure.
-        */
-
-       .macro  get_sr
-       mfctl       %cr30,%r1
-       ldw         TI_SEGMENT(%r1),%r22
-       mfsp        %sr3,%r1
-       or,<>       %r22,%r0,%r0
-       copy        %r0,%r1
-       mtsp        %r1,%sr1
-       .endm
-
-       /*
         * unsigned long lclear_user(void *to, unsigned long n)
         *
         * Returns 0 for success.
 
 ENTRY_CFI(lclear_user)
        comib,=,n   0,%r25,$lclu_done
-       get_sr
 $lclu_loop:
        addib,<>    -1,%r25,$lclu_loop
-1:      stbs,ma     %r0,1(%sr1,%r26)
+1:     stbs,ma     %r0,1(%sr3,%r26)
 
 $lclu_done:
        bv          %r0(%r2)
@@ -67,40 +51,6 @@ $lclu_done:
 ENDPROC_CFI(lclear_user)
 
 
-       /*
-        * long lstrnlen_user(char *s, long n)
-        *
-        * Returns 0 if exception before zero byte or reaching N,
-        *         N+1 if N would be exceeded,
-        *         else strlen + 1 (i.e. includes zero byte).
-        */
-
-ENTRY_CFI(lstrnlen_user)
-       comib,=     0,%r25,$lslen_nzero
-       copy        %r26,%r24
-       get_sr
-1:      ldbs,ma     1(%sr1,%r26),%r1
-$lslen_loop:
-       comib,=,n   0,%r1,$lslen_done
-       addib,<>    -1,%r25,$lslen_loop
-2:      ldbs,ma     1(%sr1,%r26),%r1
-$lslen_done:
-       bv          %r0(%r2)
-       sub         %r26,%r24,%r28
-
-$lslen_nzero:
-       b           $lslen_done
-       ldo         1(%r26),%r26 /* special case for N == 0 */
-
-3:      b          $lslen_done
-       copy        %r24,%r26    /* reset r26 so 0 is returned on fault */
-
-       ASM_EXCEPTIONTABLE_ENTRY(1b,3b)
-       ASM_EXCEPTIONTABLE_ENTRY(2b,3b)
-
-ENDPROC_CFI(lstrnlen_user)
-
-
 /*
  * unsigned long pa_memcpy(void *dstp, const void *srcp, unsigned long len)
  *
index c799556..c3f3fd5 100644 (file)
@@ -41,6 +41,7 @@ config RISCV
        select ARCH_WANT_FRAME_POINTERS
        select ARCH_WANT_HUGE_PMD_SHARE if 64BIT
        select BINFMT_FLAT_NO_DATA_START_OFFSET if !MMU
+       select BUILDTIME_TABLE_SORT if MMU
        select CLONE_BACKWARDS
        select CLINT_TIMER if !MMU
        select COMMON_CLK
index 01906a9..0eb4568 100644 (file)
@@ -132,8 +132,11 @@ $(BOOT_TARGETS): vmlinux
 Image.%: Image
        $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
 
-zinstall install:
-       $(Q)$(MAKE) $(build)=$(boot) $@
+install: install-image = Image
+zinstall: install-image = Image.gz
+install zinstall:
+       $(CONFIG_SHELL) $(srctree)/$(boot)/install.sh $(KERNELRELEASE) \
+       $(boot)/$(install-image) System.map "$(INSTALL_PATH)"
 
 archclean:
        $(Q)$(MAKE) $(clean)=$(boot)
index 6bf299f..becd062 100644 (file)
@@ -58,11 +58,3 @@ $(obj)/Image.lzo: $(obj)/Image FORCE
 
 $(obj)/loader.bin: $(obj)/loader FORCE
        $(call if_changed,objcopy)
-
-install:
-       $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
-       $(obj)/Image System.map "$(INSTALL_PATH)"
-
-zinstall:
-       $(CONFIG_SHELL) $(srctree)/$(src)/install.sh $(KERNELRELEASE) \
-       $(obj)/Image.gz System.map "$(INSTALL_PATH)"
index baea7d2..b254c60 100644 (file)
 
        aliases {
                ethernet0 = &emac1;
+               serial0 = &serial0;
+               serial1 = &serial1;
+               serial2 = &serial2;
+               serial3 = &serial3;
        };
 
        chosen {
-               stdout-path = &serial0;
+               stdout-path = "serial0:115200n8";
        };
 
        cpus {
index bc68231..4ebc803 100644 (file)
@@ -39,10 +39,12 @@ CONFIG_PCI=y
 CONFIG_PCIEPORTBUS=y
 CONFIG_PCI_HOST_GENERIC=y
 CONFIG_PCIE_XILINX=y
+CONFIG_PCIE_FU740=y
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_VIRTIO_BLK=y
+CONFIG_BLK_DEV_NVME=m
 CONFIG_BLK_DEV_SD=y
 CONFIG_BLK_DEV_SR=y
 CONFIG_SCSI_VIRTIO=y
@@ -108,6 +110,8 @@ CONFIG_NFS_V4_1=y
 CONFIG_NFS_V4_2=y
 CONFIG_ROOT_NFS=y
 CONFIG_9P_FS=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_ISO8859_1=m
 CONFIG_CRYPTO_USER_API_HASH=y
 CONFIG_CRYPTO_DEV_VIRTIO=y
 CONFIG_PRINTK_TIME=y
index f4b490c..f53c400 100644 (file)
@@ -42,6 +42,9 @@
  */
 #define ELF_ET_DYN_BASE                ((TASK_SIZE / 3) * 2)
 
+#ifdef CONFIG_64BIT
+#define STACK_RND_MASK         (0x3ffff >> (PAGE_SHIFT - 12))
+#endif
 /*
  * This yields a mask that user programs can use to figure out what
  * instruction set this CPU supports.  This could be done in user space,
index d867813..90deabf 100644 (file)
@@ -113,7 +113,7 @@ static void fill_cacheinfo(struct cacheinfo **this_leaf,
        }
 }
 
-static int __init_cache_level(unsigned int cpu)
+int init_cache_level(unsigned int cpu)
 {
        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
        struct device_node *np = of_cpu_device_node_get(cpu);
@@ -155,7 +155,7 @@ static int __init_cache_level(unsigned int cpu)
        return 0;
 }
 
-static int __populate_cache_leaves(unsigned int cpu)
+int populate_cache_leaves(unsigned int cpu)
 {
        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
        struct cacheinfo *this_leaf = this_cpu_ci->info_list;
@@ -187,6 +187,3 @@ static int __populate_cache_leaves(unsigned int cpu)
 
        return 0;
 }
-
-DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
-DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
index af77655..9c9f350 100644 (file)
@@ -121,7 +121,6 @@ SECTIONS
        }
 
        BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0)
-       EXCEPTION_TABLE(0x10)
 
        .rel.dyn : AT(ADDR(.rel.dyn) - LOAD_OFFSET) {
                *(.rel.dyn*)
index 502d082..5104f3a 100644 (file)
@@ -4,6 +4,8 @@
  * Copyright (C) 2017 SiFive
  */
 
+#define RO_EXCEPTION_TABLE_ALIGN       16
+
 #ifdef CONFIG_XIP_KERNEL
 #include "vmlinux-xip.lds.S"
 #else
@@ -112,8 +114,6 @@ SECTIONS
                *(.srodata*)
        }
 
-       EXCEPTION_TABLE(0x10)
-
        . = ALIGN(SECTION_ALIGN);
        _data = .;
 
index d66af29..b5e36bd 100644 (file)
@@ -985,7 +985,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf,
        this_leaf->priv = base->nb;
 }
 
-static int __init_cache_level(unsigned int cpu)
+int init_cache_level(unsigned int cpu)
 {
        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
 
@@ -1014,7 +1014,7 @@ static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs)
        id4_regs->id = c->apicid >> index_msb;
 }
 
-static int __populate_cache_leaves(unsigned int cpu)
+int populate_cache_leaves(unsigned int cpu)
 {
        unsigned int idx, ret;
        struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
@@ -1033,6 +1033,3 @@ static int __populate_cache_leaves(unsigned int cpu)
 
        return 0;
 }
-
-DEFINE_SMP_CALL_CACHE_FUNCTION(init_cache_level)
-DEFINE_SMP_CALL_CACHE_FUNCTION(populate_cache_leaves)
index 6cf4027..41aa1ba 100644 (file)
@@ -3,7 +3,7 @@
 # Makefile for the kernel block layer
 #
 
-obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-sysfs.o \
+obj-$(CONFIG_BLOCK) := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \
                        blk-flush.o blk-settings.o blk-ioc.o blk-map.o \
                        blk-exec.o blk-merge.o blk-timeout.o \
                        blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \
diff --git a/block/bdev.c b/block/bdev.c
new file mode 100644 (file)
index 0000000..cf2780c
--- /dev/null
@@ -0,0 +1,1058 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *  Copyright (C) 2001  Andrea Arcangeli <andrea@suse.de> SuSE
+ *  Copyright (C) 2016 - 2020 Christoph Hellwig
+ */
+
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/kmod.h>
+#include <linux/major.h>
+#include <linux/device_cgroup.h>
+#include <linux/blkdev.h>
+#include <linux/backing-dev.h>
+#include <linux/module.h>
+#include <linux/blkpg.h>
+#include <linux/magic.h>
+#include <linux/buffer_head.h>
+#include <linux/swap.h>
+#include <linux/writeback.h>
+#include <linux/mount.h>
+#include <linux/pseudo_fs.h>
+#include <linux/uio.h>
+#include <linux/namei.h>
+#include <linux/cleancache.h>
+#include <linux/part_stat.h>
+#include <linux/uaccess.h>
+#include "../fs/internal.h"
+#include "blk.h"
+
+struct bdev_inode {
+       struct block_device bdev;
+       struct inode vfs_inode;
+};
+
+static inline struct bdev_inode *BDEV_I(struct inode *inode)
+{
+       return container_of(inode, struct bdev_inode, vfs_inode);
+}
+
+struct block_device *I_BDEV(struct inode *inode)
+{
+       return &BDEV_I(inode)->bdev;
+}
+EXPORT_SYMBOL(I_BDEV);
+
+static void bdev_write_inode(struct block_device *bdev)
+{
+       struct inode *inode = bdev->bd_inode;
+       int ret;
+
+       spin_lock(&inode->i_lock);
+       while (inode->i_state & I_DIRTY) {
+               spin_unlock(&inode->i_lock);
+               ret = write_inode_now(inode, true);
+               if (ret) {
+                       char name[BDEVNAME_SIZE];
+                       pr_warn_ratelimited("VFS: Dirty inode writeback failed "
+                                           "for block device %s (err=%d).\n",
+                                           bdevname(bdev, name), ret);
+               }
+               spin_lock(&inode->i_lock);
+       }
+       spin_unlock(&inode->i_lock);
+}
+
+/* Kill _all_ buffers and pagecache , dirty or not.. */
+static void kill_bdev(struct block_device *bdev)
+{
+       struct address_space *mapping = bdev->bd_inode->i_mapping;
+
+       if (mapping_empty(mapping))
+               return;
+
+       invalidate_bh_lrus();
+       truncate_inode_pages(mapping, 0);
+}
+
+/* Invalidate clean unused buffers and pagecache. */
+void invalidate_bdev(struct block_device *bdev)
+{
+       struct address_space *mapping = bdev->bd_inode->i_mapping;
+
+       if (mapping->nrpages) {
+               invalidate_bh_lrus();
+               lru_add_drain_all();    /* make sure all lru add caches are flushed */
+               invalidate_mapping_pages(mapping, 0, -1);
+       }
+       /* 99% of the time, we don't need to flush the cleancache on the bdev.
+        * But, for the strange corners, lets be cautious
+        */
+       cleancache_invalidate_inode(mapping);
+}
+EXPORT_SYMBOL(invalidate_bdev);
+
+/*
+ * Drop all buffers & page cache for given bdev range. This function bails
+ * with error if bdev has other exclusive owner (such as filesystem).
+ */
+int truncate_bdev_range(struct block_device *bdev, fmode_t mode,
+                       loff_t lstart, loff_t lend)
+{
+       /*
+        * If we don't hold exclusive handle for the device, upgrade to it
+        * while we discard the buffer cache to avoid discarding buffers
+        * under live filesystem.
+        */
+       if (!(mode & FMODE_EXCL)) {
+               int err = bd_prepare_to_claim(bdev, truncate_bdev_range);
+               if (err)
+                       goto invalidate;
+       }
+
+       truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend);
+       if (!(mode & FMODE_EXCL))
+               bd_abort_claiming(bdev, truncate_bdev_range);
+       return 0;
+
+invalidate:
+       /*
+        * Someone else has handle exclusively open. Try invalidating instead.
+        * The 'end' argument is inclusive so the rounding is safe.
+        */
+       return invalidate_inode_pages2_range(bdev->bd_inode->i_mapping,
+                                            lstart >> PAGE_SHIFT,
+                                            lend >> PAGE_SHIFT);
+}
+
+static void set_init_blocksize(struct block_device *bdev)
+{
+       unsigned int bsize = bdev_logical_block_size(bdev);
+       loff_t size = i_size_read(bdev->bd_inode);
+
+       while (bsize < PAGE_SIZE) {
+               if (size & bsize)
+                       break;
+               bsize <<= 1;
+       }
+       bdev->bd_inode->i_blkbits = blksize_bits(bsize);
+}
+
+int set_blocksize(struct block_device *bdev, int size)
+{
+       /* Size must be a power of two, and between 512 and PAGE_SIZE */
+       if (size > PAGE_SIZE || size < 512 || !is_power_of_2(size))
+               return -EINVAL;
+
+       /* Size cannot be smaller than the size supported by the device */
+       if (size < bdev_logical_block_size(bdev))
+               return -EINVAL;
+
+       /* Don't change the size if it is same as current */
+       if (bdev->bd_inode->i_blkbits != blksize_bits(size)) {
+               sync_blockdev(bdev);
+               bdev->bd_inode->i_blkbits = blksize_bits(size);
+               kill_bdev(bdev);
+       }
+       return 0;
+}
+
+EXPORT_SYMBOL(set_blocksize);
+
+int sb_set_blocksize(struct super_block *sb, int size)
+{
+       if (set_blocksize(sb->s_bdev, size))
+               return 0;
+       /* If we get here, we know size is power of two
+        * and it's value is between 512 and PAGE_SIZE */
+       sb->s_blocksize = size;
+       sb->s_blocksize_bits = blksize_bits(size);
+       return sb->s_blocksize;
+}
+
+EXPORT_SYMBOL(sb_set_blocksize);
+
+int sb_min_blocksize(struct super_block *sb, int size)
+{
+       int minsize = bdev_logical_block_size(sb->s_bdev);
+       if (size < minsize)
+               size = minsize;
+       return sb_set_blocksize(sb, size);
+}
+
+EXPORT_SYMBOL(sb_min_blocksize);
+
+int __sync_blockdev(struct block_device *bdev, int wait)
+{
+       if (!bdev)
+               return 0;
+       if (!wait)
+               return filemap_flush(bdev->bd_inode->i_mapping);
+       return filemap_write_and_wait(bdev->bd_inode->i_mapping);
+}
+
+/*
+ * Write out and wait upon all the dirty data associated with a block
+ * device via its mapping.  Does not take the superblock lock.
+ */
+int sync_blockdev(struct block_device *bdev)
+{
+       return __sync_blockdev(bdev, 1);
+}
+EXPORT_SYMBOL(sync_blockdev);
+
+/*
+ * Write out and wait upon all dirty data associated with this
+ * device.   Filesystem data as well as the underlying block
+ * device.  Takes the superblock lock.
+ */
+int fsync_bdev(struct block_device *bdev)
+{
+       struct super_block *sb = get_super(bdev);
+       if (sb) {
+               int res = sync_filesystem(sb);
+               drop_super(sb);
+               return res;
+       }
+       return sync_blockdev(bdev);
+}
+EXPORT_SYMBOL(fsync_bdev);
+
+/**
+ * freeze_bdev  --  lock a filesystem and force it into a consistent state
+ * @bdev:      blockdevice to lock
+ *
+ * If a superblock is found on this device, we take the s_umount semaphore
+ * on it to make sure nobody unmounts until the snapshot creation is done.
+ * The reference counter (bd_fsfreeze_count) guarantees that only the last
+ * unfreeze process can unfreeze the frozen filesystem actually when multiple
+ * freeze requests arrive simultaneously. It counts up in freeze_bdev() and
+ * count down in thaw_bdev(). When it becomes 0, thaw_bdev() will unfreeze
+ * actually.
+ */
+int freeze_bdev(struct block_device *bdev)
+{
+       struct super_block *sb;
+       int error = 0;
+
+       mutex_lock(&bdev->bd_fsfreeze_mutex);
+       if (++bdev->bd_fsfreeze_count > 1)
+               goto done;
+
+       sb = get_active_super(bdev);
+       if (!sb)
+               goto sync;
+       if (sb->s_op->freeze_super)
+               error = sb->s_op->freeze_super(sb);
+       else
+               error = freeze_super(sb);
+       deactivate_super(sb);
+
+       if (error) {
+               bdev->bd_fsfreeze_count--;
+               goto done;
+       }
+       bdev->bd_fsfreeze_sb = sb;
+
+sync:
+       sync_blockdev(bdev);
+done:
+       mutex_unlock(&bdev->bd_fsfreeze_mutex);
+       return error;
+}
+EXPORT_SYMBOL(freeze_bdev);
+
+/**
+ * thaw_bdev  -- unlock filesystem
+ * @bdev:      blockdevice to unlock
+ *
+ * Unlocks the filesystem and marks it writeable again after freeze_bdev().
+ */
+int thaw_bdev(struct block_device *bdev)
+{
+       struct super_block *sb;
+       int error = -EINVAL;
+
+       mutex_lock(&bdev->bd_fsfreeze_mutex);
+       if (!bdev->bd_fsfreeze_count)
+               goto out;
+
+       error = 0;
+       if (--bdev->bd_fsfreeze_count > 0)
+               goto out;
+
+       sb = bdev->bd_fsfreeze_sb;
+       if (!sb)
+               goto out;
+
+       if (sb->s_op->thaw_super)
+               error = sb->s_op->thaw_super(sb);
+       else
+               error = thaw_super(sb);
+       if (error)
+               bdev->bd_fsfreeze_count++;
+       else
+               bdev->bd_fsfreeze_sb = NULL;
+out:
+       mutex_unlock(&bdev->bd_fsfreeze_mutex);
+       return error;
+}
+EXPORT_SYMBOL(thaw_bdev);
+
+/**
+ * bdev_read_page() - Start reading a page from a block device
+ * @bdev: The device to read the page from
+ * @sector: The offset on the device to read the page to (need not be aligned)
+ * @page: The page to read
+ *
+ * On entry, the page should be locked.  It will be unlocked when the page
+ * has been read.  If the block driver implements rw_page synchronously,
+ * that will be true on exit from this function, but it need not be.
+ *
+ * Errors returned by this function are usually "soft", eg out of memory, or
+ * queue full; callers should try a different route to read this page rather
+ * than propagate an error back up the stack.
+ *
+ * Return: negative errno if an error occurs, 0 if submission was successful.
+ */
+int bdev_read_page(struct block_device *bdev, sector_t sector,
+                       struct page *page)
+{
+       const struct block_device_operations *ops = bdev->bd_disk->fops;
+       int result = -EOPNOTSUPP;
+
+       if (!ops->rw_page || bdev_get_integrity(bdev))
+               return result;
+
+       result = blk_queue_enter(bdev->bd_disk->queue, 0);
+       if (result)
+               return result;
+       result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
+                             REQ_OP_READ);
+       blk_queue_exit(bdev->bd_disk->queue);
+       return result;
+}
+
+/**
+ * bdev_write_page() - Start writing a page to a block device
+ * @bdev: The device to write the page to
+ * @sector: The offset on the device to write the page to (need not be aligned)
+ * @page: The page to write
+ * @wbc: The writeback_control for the write
+ *
+ * On entry, the page should be locked and not currently under writeback.
+ * On exit, if the write started successfully, the page will be unlocked and
+ * under writeback.  If the write failed already (eg the driver failed to
+ * queue the page to the device), the page will still be locked.  If the
+ * caller is a ->writepage implementation, it will need to unlock the page.
+ *
+ * Errors returned by this function are usually "soft", eg out of memory, or
+ * queue full; callers should try a different route to write this page rather
+ * than propagate an error back up the stack.
+ *
+ * Return: negative errno if an error occurs, 0 if submission was successful.
+ */
+int bdev_write_page(struct block_device *bdev, sector_t sector,
+                       struct page *page, struct writeback_control *wbc)
+{
+       int result;
+       const struct block_device_operations *ops = bdev->bd_disk->fops;
+
+       if (!ops->rw_page || bdev_get_integrity(bdev))
+               return -EOPNOTSUPP;
+       result = blk_queue_enter(bdev->bd_disk->queue, 0);
+       if (result)
+               return result;
+
+       set_page_writeback(page);
+       result = ops->rw_page(bdev, sector + get_start_sect(bdev), page,
+                             REQ_OP_WRITE);
+       if (result) {
+               end_page_writeback(page);
+       } else {
+               clean_page_buffers(page);
+               unlock_page(page);
+       }
+       blk_queue_exit(bdev->bd_disk->queue);
+       return result;
+}
+
+/*
+ * pseudo-fs
+ */
+
+static  __cacheline_aligned_in_smp DEFINE_SPINLOCK(bdev_lock);
+static struct kmem_cache * bdev_cachep __read_mostly;
+
+static struct inode *bdev_alloc_inode(struct super_block *sb)
+{
+       struct bdev_inode *ei = kmem_cache_alloc(bdev_cachep, GFP_KERNEL);
+
+       if (!ei)
+               return NULL;
+       memset(&ei->bdev, 0, sizeof(ei->bdev));
+       return &ei->vfs_inode;
+}
+
+static void bdev_free_inode(struct inode *inode)
+{
+       struct block_device *bdev = I_BDEV(inode);
+
+       free_percpu(bdev->bd_stats);
+       kfree(bdev->bd_meta_info);
+
+       if (!bdev_is_partition(bdev)) {
+               if (bdev->bd_disk && bdev->bd_disk->bdi)
+                       bdi_put(bdev->bd_disk->bdi);
+               kfree(bdev->bd_disk);
+       }
+
+       if (MAJOR(bdev->bd_dev) == BLOCK_EXT_MAJOR)
+               blk_free_ext_minor(MINOR(bdev->bd_dev));
+
+       kmem_cache_free(bdev_cachep, BDEV_I(inode));
+}
+
+static void init_once(void *data)
+{
+       struct bdev_inode *ei = data;
+
+       inode_init_once(&ei->vfs_inode);
+}
+
+static void bdev_evict_inode(struct inode *inode)
+{
+       truncate_inode_pages_final(&inode->i_data);
+       invalidate_inode_buffers(inode); /* is it needed here? */
+       clear_inode(inode);
+}
+
+static const struct super_operations bdev_sops = {
+       .statfs = simple_statfs,
+       .alloc_inode = bdev_alloc_inode,
+       .free_inode = bdev_free_inode,
+       .drop_inode = generic_delete_inode,
+       .evict_inode = bdev_evict_inode,
+};
+
+static int bd_init_fs_context(struct fs_context *fc)
+{
+       struct pseudo_fs_context *ctx = init_pseudo(fc, BDEVFS_MAGIC);
+       if (!ctx)
+               return -ENOMEM;
+       fc->s_iflags |= SB_I_CGROUPWB;
+       ctx->ops = &bdev_sops;
+       return 0;
+}
+
+static struct file_system_type bd_type = {
+       .name           = "bdev",
+       .init_fs_context = bd_init_fs_context,
+       .kill_sb        = kill_anon_super,
+};
+
+struct super_block *blockdev_superblock __read_mostly;
+EXPORT_SYMBOL_GPL(blockdev_superblock);
+
+void __init bdev_cache_init(void)
+{
+       int err;
+       static struct vfsmount *bd_mnt;
+
+       bdev_cachep = kmem_cache_create("bdev_cache", sizeof(struct bdev_inode),
+                       0, (SLAB_HWCACHE_ALIGN|SLAB_RECLAIM_ACCOUNT|
+                               SLAB_MEM_SPREAD|SLAB_ACCOUNT|SLAB_PANIC),
+                       init_once);
+       err = register_filesystem(&bd_type);
+       if (err)
+               panic("Cannot register bdev pseudo-fs");
+       bd_mnt = kern_mount(&bd_type);
+       if (IS_ERR(bd_mnt))
+               panic("Cannot create bdev pseudo-fs");
+       blockdev_superblock = bd_mnt->mnt_sb;   /* For writeback */
+}
+
+struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
+{
+       struct block_device *bdev;
+       struct inode *inode;
+
+       inode = new_inode(blockdev_superblock);
+       if (!inode)
+               return NULL;
+       inode->i_mode = S_IFBLK;
+       inode->i_rdev = 0;
+       inode->i_data.a_ops = &def_blk_aops;
+       mapping_set_gfp_mask(&inode->i_data, GFP_USER);
+
+       bdev = I_BDEV(inode);
+       mutex_init(&bdev->bd_fsfreeze_mutex);
+       spin_lock_init(&bdev->bd_size_lock);
+       bdev->bd_disk = disk;
+       bdev->bd_partno = partno;
+       bdev->bd_inode = inode;
+       bdev->bd_stats = alloc_percpu(struct disk_stats);
+       if (!bdev->bd_stats) {
+               iput(inode);
+               return NULL;
+       }
+       return bdev;
+}
+
+void bdev_add(struct block_device *bdev, dev_t dev)
+{
+       bdev->bd_dev = dev;
+       bdev->bd_inode->i_rdev = dev;
+       bdev->bd_inode->i_ino = dev;
+       insert_inode_hash(bdev->bd_inode);
+}
+
+long nr_blockdev_pages(void)
+{
+       struct inode *inode;
+       long ret = 0;
+
+       spin_lock(&blockdev_superblock->s_inode_list_lock);
+       list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list)
+               ret += inode->i_mapping->nrpages;
+       spin_unlock(&blockdev_superblock->s_inode_list_lock);
+
+       return ret;
+}
+
+/**
+ * bd_may_claim - test whether a block device can be claimed
+ * @bdev: block device of interest
+ * @whole: whole block device containing @bdev, may equal @bdev
+ * @holder: holder trying to claim @bdev
+ *
+ * Test whether @bdev can be claimed by @holder.
+ *
+ * CONTEXT:
+ * spin_lock(&bdev_lock).
+ *
+ * RETURNS:
+ * %true if @bdev can be claimed, %false otherwise.
+ */
+static bool bd_may_claim(struct block_device *bdev, struct block_device *whole,
+                        void *holder)
+{
+       if (bdev->bd_holder == holder)
+               return true;     /* already a holder */
+       else if (bdev->bd_holder != NULL)
+               return false;    /* held by someone else */
+       else if (whole == bdev)
+               return true;     /* is a whole device which isn't held */
+
+       else if (whole->bd_holder == bd_may_claim)
+               return true;     /* is a partition of a device that is being partitioned */
+       else if (whole->bd_holder != NULL)
+               return false;    /* is a partition of a held device */
+       else
+               return true;     /* is a partition of an un-held device */
+}
+
+/**
+ * bd_prepare_to_claim - claim a block device
+ * @bdev: block device of interest
+ * @holder: holder trying to claim @bdev
+ *
+ * Claim @bdev.  This function fails if @bdev is already claimed by another
+ * holder and waits if another claiming is in progress. return, the caller
+ * has ownership of bd_claiming and bd_holder[s].
+ *
+ * RETURNS:
+ * 0 if @bdev can be claimed, -EBUSY otherwise.
+ */
+int bd_prepare_to_claim(struct block_device *bdev, void *holder)
+{
+       struct block_device *whole = bdev_whole(bdev);
+
+       if (WARN_ON_ONCE(!holder))
+               return -EINVAL;
+retry:
+       spin_lock(&bdev_lock);
+       /* if someone else claimed, fail */
+       if (!bd_may_claim(bdev, whole, holder)) {
+               spin_unlock(&bdev_lock);
+               return -EBUSY;
+       }
+
+       /* if claiming is already in progress, wait for it to finish */
+       if (whole->bd_claiming) {
+               wait_queue_head_t *wq = bit_waitqueue(&whole->bd_claiming, 0);
+               DEFINE_WAIT(wait);
+
+               prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
+               spin_unlock(&bdev_lock);
+               schedule();
+               finish_wait(wq, &wait);
+               goto retry;
+       }
+
+       /* yay, all mine */
+       whole->bd_claiming = holder;
+       spin_unlock(&bdev_lock);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(bd_prepare_to_claim); /* only for the loop driver */
+
+static void bd_clear_claiming(struct block_device *whole, void *holder)
+{
+       lockdep_assert_held(&bdev_lock);
+       /* tell others that we're done */
+       BUG_ON(whole->bd_claiming != holder);
+       whole->bd_claiming = NULL;
+       wake_up_bit(&whole->bd_claiming, 0);
+}
+
+/**
+ * bd_finish_claiming - finish claiming of a block device
+ * @bdev: block device of interest
+ * @holder: holder that has claimed @bdev
+ *
+ * Finish exclusive open of a block device. Mark the device as exlusively
+ * open by the holder and wake up all waiters for exclusive open to finish.
+ */
+static void bd_finish_claiming(struct block_device *bdev, void *holder)
+{
+       struct block_device *whole = bdev_whole(bdev);
+
+       spin_lock(&bdev_lock);
+       BUG_ON(!bd_may_claim(bdev, whole, holder));
+       /*
+        * Note that for a whole device bd_holders will be incremented twice,
+        * and bd_holder will be set to bd_may_claim before being set to holder
+        */
+       whole->bd_holders++;
+       whole->bd_holder = bd_may_claim;
+       bdev->bd_holders++;
+       bdev->bd_holder = holder;
+       bd_clear_claiming(whole, holder);
+       spin_unlock(&bdev_lock);
+}
+
+/**
+ * bd_abort_claiming - abort claiming of a block device
+ * @bdev: block device of interest
+ * @holder: holder that has claimed @bdev
+ *
+ * Abort claiming of a block device when the exclusive open failed. This can be
+ * also used when exclusive open is not actually desired and we just needed
+ * to block other exclusive openers for a while.
+ */
+void bd_abort_claiming(struct block_device *bdev, void *holder)
+{
+       spin_lock(&bdev_lock);
+       bd_clear_claiming(bdev_whole(bdev), holder);
+       spin_unlock(&bdev_lock);
+}
+EXPORT_SYMBOL(bd_abort_claiming);
+
+static void blkdev_flush_mapping(struct block_device *bdev)
+{
+       WARN_ON_ONCE(bdev->bd_holders);
+       sync_blockdev(bdev);
+       kill_bdev(bdev);
+       bdev_write_inode(bdev);
+}
+
+static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
+{
+       struct gendisk *disk = bdev->bd_disk;
+       int ret = 0;
+
+       if (disk->fops->open) {
+               ret = disk->fops->open(bdev, mode);
+               if (ret) {
+                       /* avoid ghost partitions on a removed medium */
+                       if (ret == -ENOMEDIUM &&
+                            test_bit(GD_NEED_PART_SCAN, &disk->state))
+                               bdev_disk_changed(disk, true);
+                       return ret;
+               }
+       }
+
+       if (!bdev->bd_openers)
+               set_init_blocksize(bdev);
+       if (test_bit(GD_NEED_PART_SCAN, &disk->state))
+               bdev_disk_changed(disk, false);
+       bdev->bd_openers++;
+       return 0;;
+}
+
+static void blkdev_put_whole(struct block_device *bdev, fmode_t mode)
+{
+       if (!--bdev->bd_openers)
+               blkdev_flush_mapping(bdev);
+       if (bdev->bd_disk->fops->release)
+               bdev->bd_disk->fops->release(bdev->bd_disk, mode);
+}
+
+static int blkdev_get_part(struct block_device *part, fmode_t mode)
+{
+       struct gendisk *disk = part->bd_disk;
+       int ret;
+
+       if (part->bd_openers)
+               goto done;
+
+       ret = blkdev_get_whole(bdev_whole(part), mode);
+       if (ret)
+               return ret;
+
+       ret = -ENXIO;
+       if (!bdev_nr_sectors(part))
+               goto out_blkdev_put;
+
+       disk->open_partitions++;
+       set_init_blocksize(part);
+done:
+       part->bd_openers++;
+       return 0;
+
+out_blkdev_put:
+       blkdev_put_whole(bdev_whole(part), mode);
+       return ret;
+}
+
+static void blkdev_put_part(struct block_device *part, fmode_t mode)
+{
+       struct block_device *whole = bdev_whole(part);
+
+       if (--part->bd_openers)
+               return;
+       blkdev_flush_mapping(part);
+       whole->bd_disk->open_partitions--;
+       blkdev_put_whole(whole, mode);
+}
+
+struct block_device *blkdev_get_no_open(dev_t dev)
+{
+       struct block_device *bdev;
+       struct inode *inode;
+
+       inode = ilookup(blockdev_superblock, dev);
+       if (!inode) {
+               blk_request_module(dev);
+               inode = ilookup(blockdev_superblock, dev);
+               if (!inode)
+                       return NULL;
+       }
+
+       /* switch from the inode reference to a device mode one: */
+       bdev = &BDEV_I(inode)->bdev;
+       if (!kobject_get_unless_zero(&bdev->bd_device.kobj))
+               bdev = NULL;
+       iput(inode);
+
+       if (!bdev)
+               return NULL;
+       if ((bdev->bd_disk->flags & GENHD_FL_HIDDEN) ||
+           !try_module_get(bdev->bd_disk->fops->owner)) {
+               put_device(&bdev->bd_device);
+               return NULL;
+       }
+
+       return bdev;
+}
+
+void blkdev_put_no_open(struct block_device *bdev)
+{
+       module_put(bdev->bd_disk->fops->owner);
+       put_device(&bdev->bd_device);
+}
+
+/**
+ * blkdev_get_by_dev - open a block device by device number
+ * @dev: device number of block device to open
+ * @mode: FMODE_* mask
+ * @holder: exclusive holder identifier
+ *
+ * Open the block device described by device number @dev. If @mode includes
+ * %FMODE_EXCL, the block device is opened with exclusive access.  Specifying
+ * %FMODE_EXCL with a %NULL @holder is invalid.  Exclusive opens may nest for
+ * the same @holder.
+ *
+ * Use this interface ONLY if you really do not have anything better - i.e. when
+ * you are behind a truly sucky interface and all you are given is a device
+ * number.  Everything else should use blkdev_get_by_path().
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * Reference to the block_device on success, ERR_PTR(-errno) on failure.
+ */
+struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
+{
+       bool unblock_events = true;
+       struct block_device *bdev;
+       struct gendisk *disk;
+       int ret;
+
+       ret = devcgroup_check_permission(DEVCG_DEV_BLOCK,
+                       MAJOR(dev), MINOR(dev),
+                       ((mode & FMODE_READ) ? DEVCG_ACC_READ : 0) |
+                       ((mode & FMODE_WRITE) ? DEVCG_ACC_WRITE : 0));
+       if (ret)
+               return ERR_PTR(ret);
+
+       bdev = blkdev_get_no_open(dev);
+       if (!bdev)
+               return ERR_PTR(-ENXIO);
+       disk = bdev->bd_disk;
+
+       if (mode & FMODE_EXCL) {
+               ret = bd_prepare_to_claim(bdev, holder);
+               if (ret)
+                       goto put_blkdev;
+       }
+
+       disk_block_events(disk);
+
+       mutex_lock(&disk->open_mutex);
+       ret = -ENXIO;
+       if (!disk_live(disk))
+               goto abort_claiming;
+       if (bdev_is_partition(bdev))
+               ret = blkdev_get_part(bdev, mode);
+       else
+               ret = blkdev_get_whole(bdev, mode);
+       if (ret)
+               goto abort_claiming;
+       if (mode & FMODE_EXCL) {
+               bd_finish_claiming(bdev, holder);
+
+               /*
+                * Block event polling for write claims if requested.  Any write
+                * holder makes the write_holder state stick until all are
+                * released.  This is good enough and tracking individual
+                * writeable reference is too fragile given the way @mode is
+                * used in blkdev_get/put().
+                */
+               if ((mode & FMODE_WRITE) && !bdev->bd_write_holder &&
+                   (disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE)) {
+                       bdev->bd_write_holder = true;
+                       unblock_events = false;
+               }
+       }
+       mutex_unlock(&disk->open_mutex);
+
+       if (unblock_events)
+               disk_unblock_events(disk);
+       return bdev;
+
+abort_claiming:
+       if (mode & FMODE_EXCL)
+               bd_abort_claiming(bdev, holder);
+       mutex_unlock(&disk->open_mutex);
+       disk_unblock_events(disk);
+put_blkdev:
+       blkdev_put_no_open(bdev);
+       return ERR_PTR(ret);
+}
+EXPORT_SYMBOL(blkdev_get_by_dev);
+
+/**
+ * blkdev_get_by_path - open a block device by name
+ * @path: path to the block device to open
+ * @mode: FMODE_* mask
+ * @holder: exclusive holder identifier
+ *
+ * Open the block device described by the device file at @path.  If @mode
+ * includes %FMODE_EXCL, the block device is opened with exclusive access.
+ * Specifying %FMODE_EXCL with a %NULL @holder is invalid.  Exclusive opens may
+ * nest for the same @holder.
+ *
+ * CONTEXT:
+ * Might sleep.
+ *
+ * RETURNS:
+ * Reference to the block_device on success, ERR_PTR(-errno) on failure.
+ */
+struct block_device *blkdev_get_by_path(const char *path, fmode_t mode,
+                                       void *holder)
+{
+       struct block_device *bdev;
+       dev_t dev;
+       int error;
+
+       error = lookup_bdev(path, &dev);
+       if (error)
+               return ERR_PTR(error);
+
+       bdev = blkdev_get_by_dev(dev, mode, holder);
+       if (!IS_ERR(bdev) && (mode & FMODE_WRITE) && bdev_read_only(bdev)) {
+               blkdev_put(bdev, mode);
+               return ERR_PTR(-EACCES);
+       }
+
+       return bdev;
+}
+EXPORT_SYMBOL(blkdev_get_by_path);
+
+void blkdev_put(struct block_device *bdev, fmode_t mode)
+{
+       struct gendisk *disk = bdev->bd_disk;
+
+       /*
+        * Sync early if it looks like we're the last one.  If someone else
+        * opens the block device between now and the decrement of bd_openers
+        * then we did a sync that we didn't need to, but that's not the end
+        * of the world and we want to avoid long (could be several minute)
+        * syncs while holding the mutex.
+        */
+       if (bdev->bd_openers == 1)
+               sync_blockdev(bdev);
+
+       mutex_lock(&disk->open_mutex);
+       if (mode & FMODE_EXCL) {
+               struct block_device *whole = bdev_whole(bdev);
+               bool bdev_free;
+
+               /*
+                * Release a claim on the device.  The holder fields
+                * are protected with bdev_lock.  open_mutex is to
+                * synchronize disk_holder unlinking.
+                */
+               spin_lock(&bdev_lock);
+
+               WARN_ON_ONCE(--bdev->bd_holders < 0);
+               WARN_ON_ONCE(--whole->bd_holders < 0);
+
+               if ((bdev_free = !bdev->bd_holders))
+                       bdev->bd_holder = NULL;
+               if (!whole->bd_holders)
+                       whole->bd_holder = NULL;
+
+               spin_unlock(&bdev_lock);
+
+               /*
+                * If this was the last claim, remove holder link and
+                * unblock evpoll if it was a write holder.
+                */
+               if (bdev_free && bdev->bd_write_holder) {
+                       disk_unblock_events(disk);
+                       bdev->bd_write_holder = false;
+               }
+       }
+
+       /*
+        * Trigger event checking and tell drivers to flush MEDIA_CHANGE
+        * event.  This is to ensure detection of media removal commanded
+        * from userland - e.g. eject(1).
+        */
+       disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
+
+       if (bdev_is_partition(bdev))
+               blkdev_put_part(bdev, mode);
+       else
+               blkdev_put_whole(bdev, mode);
+       mutex_unlock(&disk->open_mutex);
+
+       blkdev_put_no_open(bdev);
+}
+EXPORT_SYMBOL(blkdev_put);
+
+/**
+ * lookup_bdev  - lookup a struct block_device by name
+ * @pathname:  special file representing the block device
+ * @dev:       return value of the block device's dev_t
+ *
+ * Get a reference to the blockdevice at @pathname in the current
+ * namespace if possible and return it.  Return ERR_PTR(error)
+ * otherwise.
+ */
+int lookup_bdev(const char *pathname, dev_t *dev)
+{
+       struct inode *inode;
+       struct path path;
+       int error;
+
+       if (!pathname || !*pathname)
+               return -EINVAL;
+
+       error = kern_path(pathname, LOOKUP_FOLLOW, &path);
+       if (error)
+               return error;
+
+       inode = d_backing_inode(path.dentry);
+       error = -ENOTBLK;
+       if (!S_ISBLK(inode->i_mode))
+               goto out_path_put;
+       error = -EACCES;
+       if (!may_open_dev(&path))
+               goto out_path_put;
+
+       *dev = inode->i_rdev;
+       error = 0;
+out_path_put:
+       path_put(&path);
+       return error;
+}
+EXPORT_SYMBOL(lookup_bdev);
+
+int __invalidate_device(struct block_device *bdev, bool kill_dirty)
+{
+       struct super_block *sb = get_super(bdev);
+       int res = 0;
+
+       if (sb) {
+               /*
+                * no need to lock the super, get_super holds the
+                * read mutex so the filesystem cannot go away
+                * under us (->put_super runs with the write lock
+                * hold).
+                */
+               shrink_dcache_sb(sb);
+               res = invalidate_inodes(sb, kill_dirty);
+               drop_super(sb);
+       }
+       invalidate_bdev(bdev);
+       return res;
+}
+EXPORT_SYMBOL(__invalidate_device);
+
+void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
+{
+       struct inode *inode, *old_inode = NULL;
+
+       spin_lock(&blockdev_superblock->s_inode_list_lock);
+       list_for_each_entry(inode, &blockdev_superblock->s_inodes, i_sb_list) {
+               struct address_space *mapping = inode->i_mapping;
+               struct block_device *bdev;
+
+               spin_lock(&inode->i_lock);
+               if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW) ||
+                   mapping->nrpages == 0) {
+                       spin_unlock(&inode->i_lock);
+                       continue;
+               }
+               __iget(inode);
+               spin_unlock(&inode->i_lock);
+               spin_unlock(&blockdev_superblock->s_inode_list_lock);
+               /*
+                * We hold a reference to 'inode' so it couldn't have been
+                * removed from s_inodes list while we dropped the
+                * s_inode_list_lock  We cannot iput the inode now as we can
+                * be holding the last reference and we cannot iput it under
+                * s_inode_list_lock. So we keep the reference and iput it
+                * later.
+                */
+               iput(old_inode);
+               old_inode = inode;
+               bdev = I_BDEV(inode);
+
+               mutex_lock(&bdev->bd_disk->open_mutex);
+               if (bdev->bd_openers)
+                       func(bdev, arg);
+               mutex_unlock(&bdev->bd_disk->open_mutex);
+
+               spin_lock(&blockdev_superblock->s_inode_list_lock);
+       }
+       spin_unlock(&blockdev_superblock->s_inode_list_lock);
+       iput(old_inode);
+}
index 65d3a63..108a352 100644 (file)
@@ -2135,6 +2135,18 @@ static void blk_add_rq_to_plug(struct blk_plug *plug, struct request *rq)
        }
 }
 
+/*
+ * Allow 4x BLK_MAX_REQUEST_COUNT requests on plug queue for multiple
+ * queues. This is important for md arrays to benefit from merging
+ * requests.
+ */
+static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
+{
+       if (plug->multiple_queues)
+               return BLK_MAX_REQUEST_COUNT * 4;
+       return BLK_MAX_REQUEST_COUNT;
+}
+
 /**
  * blk_mq_submit_bio - Create and send a request to block device.
  * @bio: Bio pointer.
@@ -2231,7 +2243,7 @@ blk_qc_t blk_mq_submit_bio(struct bio *bio)
                else
                        last = list_entry_rq(plug->mq_list.prev);
 
-               if (request_count >= BLK_MAX_REQUEST_COUNT || (last &&
+               if (request_count >= blk_plug_max_rq_count(plug) || (last &&
                    blk_rq_bytes(last) >= BLK_PLUG_FLUSH_SIZE)) {
                        blk_flush_plug_list(plug, false);
                        trace_block_plug(q);
index 55c4901..7c4e799 100644 (file)
@@ -2458,6 +2458,7 @@ int blk_throtl_init(struct request_queue *q)
 void blk_throtl_exit(struct request_queue *q)
 {
        BUG_ON(!q->td);
+       del_timer_sync(&q->td->service_queue.pending_timer);
        throtl_shutdown_wq(q);
        blkcg_deactivate_policy(q, &blkcg_policy_throtl);
        free_percpu(q->td->latency_buckets[READ]);
index 8c96b0c..7d2a0ba 100644 (file)
@@ -373,4 +373,6 @@ static inline void bio_clear_hipri(struct bio *bio)
        bio->bi_opf &= ~REQ_HIPRI;
 }
 
+extern const struct address_space_operations def_blk_aops;
+
 #endif /* BLK_INTERNAL_H */
diff --git a/block/fops.c b/block/fops.c
new file mode 100644 (file)
index 0000000..ffce6f6
--- /dev/null
@@ -0,0 +1,640 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 1991, 1992  Linus Torvalds
+ * Copyright (C) 2001  Andrea Arcangeli <andrea@suse.de> SuSE
+ * Copyright (C) 2016 - 2020 Christoph Hellwig
+ */
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/blkdev.h>
+#include <linux/buffer_head.h>
+#include <linux/mpage.h>
+#include <linux/uio.h>
+#include <linux/namei.h>
+#include <linux/task_io_accounting_ops.h>
+#include <linux/falloc.h>
+#include <linux/suspend.h>
+#include "blk.h"
+
+static struct inode *bdev_file_inode(struct file *file)
+{
+       return file->f_mapping->host;
+}
+
+static int blkdev_get_block(struct inode *inode, sector_t iblock,
+               struct buffer_head *bh, int create)
+{
+       bh->b_bdev = I_BDEV(inode);
+       bh->b_blocknr = iblock;
+       set_buffer_mapped(bh);
+       return 0;
+}
+
+static unsigned int dio_bio_write_op(struct kiocb *iocb)
+{
+       unsigned int op = REQ_OP_WRITE | REQ_SYNC | REQ_IDLE;
+
+       /* avoid the need for a I/O completion work item */
+       if (iocb->ki_flags & IOCB_DSYNC)
+               op |= REQ_FUA;
+       return op;
+}
+
+#define DIO_INLINE_BIO_VECS 4
+
+static void blkdev_bio_end_io_simple(struct bio *bio)
+{
+       struct task_struct *waiter = bio->bi_private;
+
+       WRITE_ONCE(bio->bi_private, NULL);
+       blk_wake_io_task(waiter);
+}
+
+static ssize_t __blkdev_direct_IO_simple(struct kiocb *iocb,
+               struct iov_iter *iter, unsigned int nr_pages)
+{
+       struct file *file = iocb->ki_filp;
+       struct block_device *bdev = I_BDEV(bdev_file_inode(file));
+       struct bio_vec inline_vecs[DIO_INLINE_BIO_VECS], *vecs;
+       loff_t pos = iocb->ki_pos;
+       bool should_dirty = false;
+       struct bio bio;
+       ssize_t ret;
+       blk_qc_t qc;
+
+       if ((pos | iov_iter_alignment(iter)) &
+           (bdev_logical_block_size(bdev) - 1))
+               return -EINVAL;
+
+       if (nr_pages <= DIO_INLINE_BIO_VECS)
+               vecs = inline_vecs;
+       else {
+               vecs = kmalloc_array(nr_pages, sizeof(struct bio_vec),
+                                    GFP_KERNEL);
+               if (!vecs)
+                       return -ENOMEM;
+       }
+
+       bio_init(&bio, vecs, nr_pages);
+       bio_set_dev(&bio, bdev);
+       bio.bi_iter.bi_sector = pos >> 9;
+       bio.bi_write_hint = iocb->ki_hint;
+       bio.bi_private = current;
+       bio.bi_end_io = blkdev_bio_end_io_simple;
+       bio.bi_ioprio = iocb->ki_ioprio;
+
+       ret = bio_iov_iter_get_pages(&bio, iter);
+       if (unlikely(ret))
+               goto out;
+       ret = bio.bi_iter.bi_size;
+
+       if (iov_iter_rw(iter) == READ) {
+               bio.bi_opf = REQ_OP_READ;
+               if (iter_is_iovec(iter))
+                       should_dirty = true;
+       } else {
+               bio.bi_opf = dio_bio_write_op(iocb);
+               task_io_account_write(ret);
+       }
+       if (iocb->ki_flags & IOCB_NOWAIT)
+               bio.bi_opf |= REQ_NOWAIT;
+       if (iocb->ki_flags & IOCB_HIPRI)
+               bio_set_polled(&bio, iocb);
+
+       qc = submit_bio(&bio);
+       for (;;) {
+               set_current_state(TASK_UNINTERRUPTIBLE);
+               if (!READ_ONCE(bio.bi_private))
+                       break;
+               if (!(iocb->ki_flags & IOCB_HIPRI) ||
+                   !blk_poll(bdev_get_queue(bdev), qc, true))
+                       blk_io_schedule();
+       }
+       __set_current_state(TASK_RUNNING);
+
+       bio_release_pages(&bio, should_dirty);
+       if (unlikely(bio.bi_status))
+               ret = blk_status_to_errno(bio.bi_status);
+
+out:
+       if (vecs != inline_vecs)
+               kfree(vecs);
+
+       bio_uninit(&bio);
+
+       return ret;
+}
+
+struct blkdev_dio {
+       union {
+               struct kiocb            *iocb;
+               struct task_struct      *waiter;
+       };
+       size_t                  size;
+       atomic_t                ref;
+       bool                    multi_bio : 1;
+       bool                    should_dirty : 1;
+       bool                    is_sync : 1;
+       struct bio              bio;
+};
+
+static struct bio_set blkdev_dio_pool;
+
+static int blkdev_iopoll(struct kiocb *kiocb, bool wait)
+{
+       struct block_device *bdev = I_BDEV(kiocb->ki_filp->f_mapping->host);
+       struct request_queue *q = bdev_get_queue(bdev);
+
+       return blk_poll(q, READ_ONCE(kiocb->ki_cookie), wait);
+}
+
+static void blkdev_bio_end_io(struct bio *bio)
+{
+       struct blkdev_dio *dio = bio->bi_private;
+       bool should_dirty = dio->should_dirty;
+
+       if (bio->bi_status && !dio->bio.bi_status)
+               dio->bio.bi_status = bio->bi_status;
+
+       if (!dio->multi_bio || atomic_dec_and_test(&dio->ref)) {
+               if (!dio->is_sync) {
+                       struct kiocb *iocb = dio->iocb;
+                       ssize_t ret;
+
+                       if (likely(!dio->bio.bi_status)) {
+                               ret = dio->size;
+                               iocb->ki_pos += ret;
+                       } else {
+                               ret = blk_status_to_errno(dio->bio.bi_status);
+                       }
+
+                       dio->iocb->ki_complete(iocb, ret, 0);
+                       if (dio->multi_bio)
+                               bio_put(&dio->bio);
+               } else {
+                       struct task_struct *waiter = dio->waiter;
+
+                       WRITE_ONCE(dio->waiter, NULL);
+                       blk_wake_io_task(waiter);
+               }
+       }
+
+       if (should_dirty) {
+               bio_check_pages_dirty(bio);
+       } else {
+               bio_release_pages(bio, false);
+               bio_put(bio);
+       }
+}
+
+static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+               unsigned int nr_pages)
+{
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = bdev_file_inode(file);
+       struct block_device *bdev = I_BDEV(inode);
+       struct blk_plug plug;
+       struct blkdev_dio *dio;
+       struct bio *bio;
+       bool is_poll = (iocb->ki_flags & IOCB_HIPRI) != 0;
+       bool is_read = (iov_iter_rw(iter) == READ), is_sync;
+       loff_t pos = iocb->ki_pos;
+       blk_qc_t qc = BLK_QC_T_NONE;
+       int ret = 0;
+
+       if ((pos | iov_iter_alignment(iter)) &
+           (bdev_logical_block_size(bdev) - 1))
+               return -EINVAL;
+
+       bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool);
+
+       dio = container_of(bio, struct blkdev_dio, bio);
+       dio->is_sync = is_sync = is_sync_kiocb(iocb);
+       if (dio->is_sync) {
+               dio->waiter = current;
+               bio_get(bio);
+       } else {
+               dio->iocb = iocb;
+       }
+
+       dio->size = 0;
+       dio->multi_bio = false;
+       dio->should_dirty = is_read && iter_is_iovec(iter);
+
+       /*
+        * Don't plug for HIPRI/polled IO, as those should go straight
+        * to issue
+        */
+       if (!is_poll)
+               blk_start_plug(&plug);
+
+       for (;;) {
+               bio_set_dev(bio, bdev);
+               bio->bi_iter.bi_sector = pos >> 9;
+               bio->bi_write_hint = iocb->ki_hint;
+               bio->bi_private = dio;
+               bio->bi_end_io = blkdev_bio_end_io;
+               bio->bi_ioprio = iocb->ki_ioprio;
+
+               ret = bio_iov_iter_get_pages(bio, iter);
+               if (unlikely(ret)) {
+                       bio->bi_status = BLK_STS_IOERR;
+                       bio_endio(bio);
+                       break;
+               }
+
+               if (is_read) {
+                       bio->bi_opf = REQ_OP_READ;
+                       if (dio->should_dirty)
+                               bio_set_pages_dirty(bio);
+               } else {
+                       bio->bi_opf = dio_bio_write_op(iocb);
+                       task_io_account_write(bio->bi_iter.bi_size);
+               }
+               if (iocb->ki_flags & IOCB_NOWAIT)
+                       bio->bi_opf |= REQ_NOWAIT;
+
+               dio->size += bio->bi_iter.bi_size;
+               pos += bio->bi_iter.bi_size;
+
+               nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS);
+               if (!nr_pages) {
+                       bool polled = false;
+
+                       if (iocb->ki_flags & IOCB_HIPRI) {
+                               bio_set_polled(bio, iocb);
+                               polled = true;
+                       }
+
+                       qc = submit_bio(bio);
+
+                       if (polled)
+                               WRITE_ONCE(iocb->ki_cookie, qc);
+                       break;
+               }
+
+               if (!dio->multi_bio) {
+                       /*
+                        * AIO needs an extra reference to ensure the dio
+                        * structure which is embedded into the first bio
+                        * stays around.
+                        */
+                       if (!is_sync)
+                               bio_get(bio);
+                       dio->multi_bio = true;
+                       atomic_set(&dio->ref, 2);
+               } else {
+                       atomic_inc(&dio->ref);
+               }
+
+               submit_bio(bio);
+               bio = bio_alloc(GFP_KERNEL, nr_pages);
+       }
+
+       if (!is_poll)
+               blk_finish_plug(&plug);
+
+       if (!is_sync)
+               return -EIOCBQUEUED;
+
+       for (;;) {
+               set_current_state(TASK_UNINTERRUPTIBLE);
+               if (!READ_ONCE(dio->waiter))
+                       break;
+
+               if (!(iocb->ki_flags & IOCB_HIPRI) ||
+                   !blk_poll(bdev_get_queue(bdev), qc, true))
+                       blk_io_schedule();
+       }
+       __set_current_state(TASK_RUNNING);
+
+       if (!ret)
+               ret = blk_status_to_errno(dio->bio.bi_status);
+       if (likely(!ret))
+               ret = dio->size;
+
+       bio_put(&dio->bio);
+       return ret;
+}
+
+static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+{
+       unsigned int nr_pages;
+
+       if (!iov_iter_count(iter))
+               return 0;
+
+       nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
+       if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_VECS)
+               return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
+
+       return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
+}
+
+static int blkdev_writepage(struct page *page, struct writeback_control *wbc)
+{
+       return block_write_full_page(page, blkdev_get_block, wbc);
+}
+
+static int blkdev_readpage(struct file * file, struct page * page)
+{
+       return block_read_full_page(page, blkdev_get_block);
+}
+
+static void blkdev_readahead(struct readahead_control *rac)
+{
+       mpage_readahead(rac, blkdev_get_block);
+}
+
+static int blkdev_write_begin(struct file *file, struct address_space *mapping,
+               loff_t pos, unsigned len, unsigned flags, struct page **pagep,
+               void **fsdata)
+{
+       return block_write_begin(mapping, pos, len, flags, pagep,
+                                blkdev_get_block);
+}
+
+static int blkdev_write_end(struct file *file, struct address_space *mapping,
+               loff_t pos, unsigned len, unsigned copied, struct page *page,
+               void *fsdata)
+{
+       int ret;
+       ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
+
+       unlock_page(page);
+       put_page(page);
+
+       return ret;
+}
+
+static int blkdev_writepages(struct address_space *mapping,
+                            struct writeback_control *wbc)
+{
+       return generic_writepages(mapping, wbc);
+}
+
+const struct address_space_operations def_blk_aops = {
+       .set_page_dirty = __set_page_dirty_buffers,
+       .readpage       = blkdev_readpage,
+       .readahead      = blkdev_readahead,
+       .writepage      = blkdev_writepage,
+       .write_begin    = blkdev_write_begin,
+       .write_end      = blkdev_write_end,
+       .writepages     = blkdev_writepages,
+       .direct_IO      = blkdev_direct_IO,
+       .migratepage    = buffer_migrate_page_norefs,
+       .is_dirty_writeback = buffer_check_dirty_writeback,
+};
+
+/*
+ * for a block special file file_inode(file)->i_size is zero
+ * so we compute the size by hand (just as in block_read/write above)
+ */
+static loff_t blkdev_llseek(struct file *file, loff_t offset, int whence)
+{
+       struct inode *bd_inode = bdev_file_inode(file);
+       loff_t retval;
+
+       inode_lock(bd_inode);
+       retval = fixed_size_llseek(file, offset, whence, i_size_read(bd_inode));
+       inode_unlock(bd_inode);
+       return retval;
+}
+
+static int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
+               int datasync)
+{
+       struct inode *bd_inode = bdev_file_inode(filp);
+       struct block_device *bdev = I_BDEV(bd_inode);
+       int error;
+
+       error = file_write_and_wait_range(filp, start, end);
+       if (error)
+               return error;
+
+       /*
+        * There is no need to serialise calls to blkdev_issue_flush with
+        * i_mutex and doing so causes performance issues with concurrent
+        * O_SYNC writers to a block device.
+        */
+       error = blkdev_issue_flush(bdev);
+       if (error == -EOPNOTSUPP)
+               error = 0;
+
+       return error;
+}
+
+static int blkdev_open(struct inode *inode, struct file *filp)
+{
+       struct block_device *bdev;
+
+       /*
+        * Preserve backwards compatibility and allow large file access
+        * even if userspace doesn't ask for it explicitly. Some mkfs
+        * binary needs it. We might want to drop this workaround
+        * during an unstable branch.
+        */
+       filp->f_flags |= O_LARGEFILE;
+       filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC;
+
+       if (filp->f_flags & O_NDELAY)
+               filp->f_mode |= FMODE_NDELAY;
+       if (filp->f_flags & O_EXCL)
+               filp->f_mode |= FMODE_EXCL;
+       if ((filp->f_flags & O_ACCMODE) == 3)
+               filp->f_mode |= FMODE_WRITE_IOCTL;
+
+       bdev = blkdev_get_by_dev(inode->i_rdev, filp->f_mode, filp);
+       if (IS_ERR(bdev))
+               return PTR_ERR(bdev);
+       filp->f_mapping = bdev->bd_inode->i_mapping;
+       filp->f_wb_err = filemap_sample_wb_err(filp->f_mapping);
+       return 0;
+}
+
+static int blkdev_close(struct inode *inode, struct file *filp)
+{
+       struct block_device *bdev = I_BDEV(bdev_file_inode(filp));
+
+       blkdev_put(bdev, filp->f_mode);
+       return 0;
+}
+
+static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg)
+{
+       struct block_device *bdev = I_BDEV(bdev_file_inode(file));
+       fmode_t mode = file->f_mode;
+
+       /*
+        * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have
+        * to updated it before every ioctl.
+        */
+       if (file->f_flags & O_NDELAY)
+               mode |= FMODE_NDELAY;
+       else
+               mode &= ~FMODE_NDELAY;
+
+       return blkdev_ioctl(bdev, mode, cmd, arg);
+}
+
+/*
+ * Write data to the block device.  Only intended for the block device itself
+ * and the raw driver which basically is a fake block device.
+ *
+ * Does not take i_mutex for the write and thus is not for general purpose
+ * use.
+ */
+static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
+{
+       struct file *file = iocb->ki_filp;
+       struct inode *bd_inode = bdev_file_inode(file);
+       loff_t size = i_size_read(bd_inode);
+       struct blk_plug plug;
+       size_t shorted = 0;
+       ssize_t ret;
+
+       if (bdev_read_only(I_BDEV(bd_inode)))
+               return -EPERM;
+
+       if (IS_SWAPFILE(bd_inode) && !is_hibernate_resume_dev(bd_inode->i_rdev))
+               return -ETXTBSY;
+
+       if (!iov_iter_count(from))
+               return 0;
+
+       if (iocb->ki_pos >= size)
+               return -ENOSPC;
+
+       if ((iocb->ki_flags & (IOCB_NOWAIT | IOCB_DIRECT)) == IOCB_NOWAIT)
+               return -EOPNOTSUPP;
+
+       size -= iocb->ki_pos;
+       if (iov_iter_count(from) > size) {
+               shorted = iov_iter_count(from) - size;
+               iov_iter_truncate(from, size);
+       }
+
+       blk_start_plug(&plug);
+       ret = __generic_file_write_iter(iocb, from);
+       if (ret > 0)
+               ret = generic_write_sync(iocb, ret);
+       iov_iter_reexpand(from, iov_iter_count(from) + shorted);
+       blk_finish_plug(&plug);
+       return ret;
+}
+
+static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
+{
+       struct file *file = iocb->ki_filp;
+       struct inode *bd_inode = bdev_file_inode(file);
+       loff_t size = i_size_read(bd_inode);
+       loff_t pos = iocb->ki_pos;
+       size_t shorted = 0;
+       ssize_t ret;
+
+       if (pos >= size)
+               return 0;
+
+       size -= pos;
+       if (iov_iter_count(to) > size) {
+               shorted = iov_iter_count(to) - size;
+               iov_iter_truncate(to, size);
+       }
+
+       ret = generic_file_read_iter(iocb, to);
+       iov_iter_reexpand(to, iov_iter_count(to) + shorted);
+       return ret;
+}
+
+#define        BLKDEV_FALLOC_FL_SUPPORTED                                      \
+               (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |           \
+                FALLOC_FL_ZERO_RANGE | FALLOC_FL_NO_HIDE_STALE)
+
+static long blkdev_fallocate(struct file *file, int mode, loff_t start,
+                            loff_t len)
+{
+       struct block_device *bdev = I_BDEV(bdev_file_inode(file));
+       loff_t end = start + len - 1;
+       loff_t isize;
+       int error;
+
+       /* Fail if we don't recognize the flags. */
+       if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED)
+               return -EOPNOTSUPP;
+
+       /* Don't go off the end of the device. */
+       isize = i_size_read(bdev->bd_inode);
+       if (start >= isize)
+               return -EINVAL;
+       if (end >= isize) {
+               if (mode & FALLOC_FL_KEEP_SIZE) {
+                       len = isize - start;
+                       end = start + len - 1;
+               } else
+                       return -EINVAL;
+       }
+
+       /*
+        * Don't allow IO that isn't aligned to logical block size.
+        */
+       if ((start | len) & (bdev_logical_block_size(bdev) - 1))
+               return -EINVAL;
+
+       /* Invalidate the page cache, including dirty pages. */
+       error = truncate_bdev_range(bdev, file->f_mode, start, end);
+       if (error)
+               return error;
+
+       switch (mode) {
+       case FALLOC_FL_ZERO_RANGE:
+       case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
+               error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
+                                           GFP_KERNEL, BLKDEV_ZERO_NOUNMAP);
+               break;
+       case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
+               error = blkdev_issue_zeroout(bdev, start >> 9, len >> 9,
+                                            GFP_KERNEL, BLKDEV_ZERO_NOFALLBACK);
+               break;
+       case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE | FALLOC_FL_NO_HIDE_STALE:
+               error = blkdev_issue_discard(bdev, start >> 9, len >> 9,
+                                            GFP_KERNEL, 0);
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+       if (error)
+               return error;
+
+       /*
+        * Invalidate the page cache again; if someone wandered in and dirtied
+        * a page, we just discard it - userspace has no way of knowing whether
+        * the write happened before or after discard completing...
+        */
+       return truncate_bdev_range(bdev, file->f_mode, start, end);
+}
+
+const struct file_operations def_blk_fops = {
+       .open           = blkdev_open,
+       .release        = blkdev_close,
+       .llseek         = blkdev_llseek,
+       .read_iter      = blkdev_read_iter,
+       .write_iter     = blkdev_write_iter,
+       .iopoll         = blkdev_iopoll,
+       .mmap           = generic_file_mmap,
+       .fsync          = blkdev_fsync,
+       .unlocked_ioctl = block_ioctl,
+#ifdef CONFIG_COMPAT
+       .compat_ioctl   = compat_blkdev_ioctl,
+#endif
+       .splice_read    = generic_file_splice_read,
+       .splice_write   = iter_file_splice_write,
+       .fallocate      = blkdev_fallocate,
+};
+
+static __init int blkdev_init(void)
+{
+       return bioset_init(&blkdev_dio_pool, 4,
+                               offsetof(struct blkdev_dio, bio),
+                               BIOSET_NEED_BVECS|BIOSET_PERCPU_CACHE);
+}
+module_init(blkdev_init);
index 567549a..7b6e5e1 100644 (file)
@@ -183,6 +183,7 @@ static struct blk_major_name {
        void (*probe)(dev_t devt);
 } *major_names[BLKDEV_MAJOR_HASH_SIZE];
 static DEFINE_MUTEX(major_names_lock);
+static DEFINE_SPINLOCK(major_names_spinlock);
 
 /* index in the above - for now: assume no multimajor ranges */
 static inline int major_to_index(unsigned major)
@@ -195,11 +196,11 @@ void blkdev_show(struct seq_file *seqf, off_t offset)
 {
        struct blk_major_name *dp;
 
-       mutex_lock(&major_names_lock);
+       spin_lock(&major_names_spinlock);
        for (dp = major_names[major_to_index(offset)]; dp; dp = dp->next)
                if (dp->major == offset)
                        seq_printf(seqf, "%3d %s\n", dp->major, dp->name);
-       mutex_unlock(&major_names_lock);
+       spin_unlock(&major_names_spinlock);
 }
 #endif /* CONFIG_PROC_FS */
 
@@ -271,6 +272,7 @@ int __register_blkdev(unsigned int major, const char *name,
        p->next = NULL;
        index = major_to_index(major);
 
+       spin_lock(&major_names_spinlock);
        for (n = &major_names[index]; *n; n = &(*n)->next) {
                if ((*n)->major == major)
                        break;
@@ -279,6 +281,7 @@ int __register_blkdev(unsigned int major, const char *name,
                *n = p;
        else
                ret = -EBUSY;
+       spin_unlock(&major_names_spinlock);
 
        if (ret < 0) {
                printk("register_blkdev: cannot get major %u for %s\n",
@@ -298,6 +301,7 @@ void unregister_blkdev(unsigned int major, const char *name)
        int index = major_to_index(major);
 
        mutex_lock(&major_names_lock);
+       spin_lock(&major_names_spinlock);
        for (n = &major_names[index]; *n; n = &(*n)->next)
                if ((*n)->major == major)
                        break;
@@ -307,6 +311,7 @@ void unregister_blkdev(unsigned int major, const char *name)
                p = *n;
                *n = p->next;
        }
+       spin_unlock(&major_names_spinlock);
        mutex_unlock(&major_names_lock);
        kfree(p);
 }
index a4d4eeb..bd48210 100644 (file)
@@ -1008,23 +1008,14 @@ static int cpc_write(int cpu, struct cpc_register_resource *reg_res, u64 val)
        return ret_val;
 }
 
-/**
- * cppc_get_desired_perf - Get the value of desired performance register.
- * @cpunum: CPU from which to get desired performance.
- * @desired_perf: address of a variable to store the returned desired performance
- *
- * Return: 0 for success, -EIO otherwise.
- */
-int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
+static int cppc_get_perf(int cpunum, enum cppc_regs reg_idx, u64 *perf)
 {
        struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum);
-       int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum);
-       struct cpc_register_resource *desired_reg;
-       struct cppc_pcc_data *pcc_ss_data = NULL;
-
-       desired_reg = &cpc_desc->cpc_regs[DESIRED_PERF];
+       struct cpc_register_resource *reg = &cpc_desc->cpc_regs[reg_idx];
 
-       if (CPC_IN_PCC(desired_reg)) {
+       if (CPC_IN_PCC(reg)) {
+               int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum);
+               struct cppc_pcc_data *pcc_ss_data = NULL;
                int ret = 0;
 
                if (pcc_ss_id < 0)
@@ -1035,7 +1026,7 @@ int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
                down_write(&pcc_ss_data->pcc_lock);
 
                if (send_pcc_cmd(pcc_ss_id, CMD_READ) >= 0)
-                       cpc_read(cpunum, desired_reg, desired_perf);
+                       cpc_read(cpunum, reg, perf);
                else
                        ret = -EIO;
 
@@ -1044,13 +1035,37 @@ int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
                return ret;
        }
 
-       cpc_read(cpunum, desired_reg, desired_perf);
+       cpc_read(cpunum, reg, perf);
 
        return 0;
 }
+
+/**
+ * cppc_get_desired_perf - Get the desired performance register value.
+ * @cpunum: CPU from which to get desired performance.
+ * @desired_perf: Return address.
+ *
+ * Return: 0 for success, -EIO otherwise.
+ */
+int cppc_get_desired_perf(int cpunum, u64 *desired_perf)
+{
+       return cppc_get_perf(cpunum, DESIRED_PERF, desired_perf);
+}
 EXPORT_SYMBOL_GPL(cppc_get_desired_perf);
 
 /**
+ * cppc_get_nominal_perf - Get the nominal performance register value.
+ * @cpunum: CPU from which to get nominal performance.
+ * @nominal_perf: Return address.
+ *
+ * Return: 0 for success, -EIO otherwise.
+ */
+int cppc_get_nominal_perf(int cpunum, u64 *nominal_perf)
+{
+       return cppc_get_perf(cpunum, NOMINAL_PERF, nominal_perf);
+}
+
+/**
  * cppc_get_perf_caps - Get a CPU's performance capabilities.
  * @cpunum: CPU from which to get capabilities info.
  * @perf_caps: ptr to cppc_perf_caps. See cppc_acpi.h
index 1f6007a..89c22bc 100644 (file)
@@ -288,10 +288,18 @@ invalid_guid:
 
 void __init init_prmt(void)
 {
+       struct acpi_table_header *tbl;
        acpi_status status;
-       int mc = acpi_table_parse_entries(ACPI_SIG_PRMT, sizeof(struct acpi_table_prmt) +
+       int mc;
+
+       status = acpi_get_table(ACPI_SIG_PRMT, 0, &tbl);
+       if (ACPI_FAILURE(status))
+               return;
+
+       mc = acpi_table_parse_entries(ACPI_SIG_PRMT, sizeof(struct acpi_table_prmt) +
                                          sizeof (struct acpi_table_prmt_header),
                                          0, acpi_parse_prmt, 0);
+       acpi_put_table(tbl);
        /*
         * Return immediately if PRMT table is not present or no PRM module found.
         */
index b24513e..5b54c80 100644 (file)
@@ -16,7 +16,6 @@
 #include <linux/signal.h>
 #include <linux/kthread.h>
 #include <linux/dmi.h>
-#include <linux/nls.h>
 #include <linux/dma-map-ops.h>
 #include <linux/platform_data/x86/apple.h>
 #include <linux/pgtable.h>
index fd430e6..6526aa5 100644 (file)
@@ -33,7 +33,7 @@
  */
 
 static unsigned int cfag12864b_rate = CONFIG_CFAG12864B_RATE;
-module_param(cfag12864b_rate, uint, S_IRUGO);
+module_param(cfag12864b_rate, uint, 0444);
 MODULE_PARM_DESC(cfag12864b_rate,
        "Refresh rate (hertz)");
 
index 24fd6f3..304accd 100644 (file)
@@ -637,9 +637,7 @@ static int panel_notify_sys(struct notifier_block *this, unsigned long code,
 }
 
 static struct notifier_block panel_notifier = {
-       panel_notify_sys,
-       NULL,
-       0
+       .notifier_call = panel_notify_sys,
 };
 
 int charlcd_register(struct charlcd *lcd)
index 2e5e7c9..8b2a0eb 100644 (file)
@@ -323,8 +323,8 @@ static int hd44780_remove(struct platform_device *pdev)
 {
        struct charlcd *lcd = platform_get_drvdata(pdev);
 
-       kfree(lcd->drvdata);
        charlcd_unregister(lcd);
+       kfree(lcd->drvdata);
 
        kfree(lcd);
        return 0;
index 03c95ad..e871b94 100644 (file)
  */
 
 static unsigned int ks0108_port = CONFIG_KS0108_PORT;
-module_param(ks0108_port, uint, S_IRUGO);
+module_param(ks0108_port, uint, 0444);
 MODULE_PARM_DESC(ks0108_port, "Parallel port where the LCD is connected");
 
 static unsigned int ks0108_delay = CONFIG_KS0108_DELAY;
-module_param(ks0108_delay, uint, S_IRUGO);
+module_param(ks0108_delay, uint, 0444);
 MODULE_PARM_DESC(ks0108_delay, "Delay between each control writing (microseconds)");
 
 /*
@@ -167,19 +167,7 @@ static struct parport_driver ks0108_parport_driver = {
        .detach = ks0108_parport_detach,
        .devmodel = true,
 };
-
-static int __init ks0108_init(void)
-{
-       return parport_register_driver(&ks0108_parport_driver);
-}
-
-static void __exit ks0108_exit(void)
-{
-       parport_unregister_driver(&ks0108_parport_driver);
-}
-
-module_init(ks0108_init);
-module_exit(ks0108_exit);
+module_parport_driver(ks0108_parport_driver);
 
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Miguel Ojeda <ojeda@kernel.org>");
index d568772..cbea78e 100644 (file)
@@ -1642,7 +1642,7 @@ static int __device_suspend(struct device *dev, pm_message_t state, bool async)
        }
 
        dev->power.may_skip_resume = true;
-       dev->power.must_resume = false;
+       dev->power.must_resume = !dev_pm_test_driver_flags(dev, DPM_FLAG_MAY_SKIP_RESUME);
 
        dpm_watchdog_set(&wd, dev);
        device_lock(dev);
index 3bad326..b91a3a9 100644 (file)
 /**
  * dev_pm_attach_wake_irq - Attach device interrupt as a wake IRQ
  * @dev: Device entry
- * @irq: Device wake-up capable interrupt
  * @wirq: Wake irq specific data
  *
- * Internal function to attach either a device IO interrupt or a
- * dedicated wake-up interrupt as a wake IRQ.
+ * Internal function to attach a dedicated wake-up interrupt as a wake IRQ.
  */
-static int dev_pm_attach_wake_irq(struct device *dev, int irq,
-                                 struct wake_irq *wirq)
+static int dev_pm_attach_wake_irq(struct device *dev, struct wake_irq *wirq)
 {
        unsigned long flags;
 
@@ -65,7 +62,7 @@ int dev_pm_set_wake_irq(struct device *dev, int irq)
        wirq->dev = dev;
        wirq->irq = irq;
 
-       err = dev_pm_attach_wake_irq(dev, irq, wirq);
+       err = dev_pm_attach_wake_irq(dev, wirq);
        if (err)
                kfree(wirq);
 
@@ -196,7 +193,7 @@ int dev_pm_set_dedicated_wake_irq(struct device *dev, int irq)
        if (err)
                goto err_free_name;
 
-       err = dev_pm_attach_wake_irq(dev, irq, wirq);
+       err = dev_pm_attach_wake_irq(dev, wirq);
        if (err)
                goto err_free_irq;
 
index c84be00..26798da 100644 (file)
@@ -129,8 +129,8 @@ static int __init n64cart_probe(struct platform_device *pdev)
        }
 
        reg_base = devm_platform_ioremap_resource(pdev, 0);
-       if (!reg_base)
-               return -EINVAL;
+       if (IS_ERR(reg_base))
+               return PTR_ERR(reg_base);
 
        disk = blk_alloc_disk(NUMA_NO_NODE);
        if (!disk)
index 57c6ae7..9b3bd08 100644 (file)
@@ -762,7 +762,7 @@ static int virtblk_probe(struct virtio_device *vdev)
                goto out_free_vblk;
 
        /* Default queue sizing is to fill the ring. */
-       if (likely(!virtblk_queue_depth)) {
+       if (!virtblk_queue_depth) {
                queue_depth = vblk->vqs[0].vq->num_free;
                /* ... but without indirect descs, we use 2 descs per req */
                if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
@@ -836,7 +836,7 @@ static int virtblk_probe(struct virtio_device *vdev)
        else
                blk_size = queue_logical_block_size(q);
 
-       if (unlikely(blk_size < SECTOR_SIZE || blk_size > PAGE_SIZE)) {
+       if (blk_size < SECTOR_SIZE || blk_size > PAGE_SIZE) {
                dev_err(&vdev->dev,
                        "block size is changed unexpectedly, now is %u\n",
                        blk_size);
index bb46698..6f3272b 100644 (file)
@@ -591,7 +591,7 @@ static void handle_transaction_done(struct smi_info *smi_info)
                smi_info->handlers->get_result(smi_info->si_sm, msg, 3);
                if (msg[2] != 0) {
                        /* Error clearing flags */
-                       dev_warn(smi_info->io.dev,
+                       dev_warn_ratelimited(smi_info->io.dev,
                                 "Error clearing flags: %2.2x\n", msg[2]);
                }
                smi_info->si_state = SI_NORMAL;
@@ -683,10 +683,10 @@ static void handle_transaction_done(struct smi_info *smi_info)
                /* We got the flags from the SMI, now handle them. */
                smi_info->handlers->get_result(smi_info->si_sm, msg, 4);
                if (msg[2] != 0) {
-                       dev_warn(smi_info->io.dev,
-                                "Couldn't get irq info: %x.\n", msg[2]);
-                       dev_warn(smi_info->io.dev,
-                                "Maybe ok, but ipmi might run very slowly.\n");
+                       dev_warn_ratelimited(smi_info->io.dev,
+                               "Couldn't get irq info: %x,\n"
+                               "Maybe ok, but ipmi might run very slowly.\n",
+                               msg[2]);
                        smi_info->si_state = SI_NORMAL;
                        break;
                }
@@ -721,7 +721,7 @@ static void handle_transaction_done(struct smi_info *smi_info)
 
                smi_info->handlers->get_result(smi_info->si_sm, msg, 4);
                if (msg[2] != 0)
-                       dev_warn(smi_info->io.dev,
+                       dev_warn_ratelimited(smi_info->io.dev,
                                 "Could not set the global enables: 0x%x.\n",
                                 msg[2]);
 
@@ -1343,7 +1343,7 @@ retry:
 
                if (cc != IPMI_CC_NO_ERROR &&
                    ++retry_count <= GET_DEVICE_ID_MAX_RETRY) {
-                       dev_warn(smi_info->io.dev,
+                       dev_warn_ratelimited(smi_info->io.dev,
                            "BMC returned 0x%2.2x, retry get bmc device id\n",
                            cc);
                        goto retry;
@@ -1605,7 +1605,7 @@ static ssize_t name##_show(struct device *dev,                    \
                                                                        \
        return snprintf(buf, 10, "%u\n", smi_get_stat(smi_info, name)); \
 }                                                                      \
-static DEVICE_ATTR(name, 0444, name##_show, NULL)
+static DEVICE_ATTR_RO(name)
 
 static ssize_t type_show(struct device *dev,
                         struct device_attribute *attr,
@@ -1615,7 +1615,7 @@ static ssize_t type_show(struct device *dev,
 
        return snprintf(buf, 10, "%s\n", si_to_str[smi_info->io.si_type]);
 }
-static DEVICE_ATTR(type, 0444, type_show, NULL);
+static DEVICE_ATTR_RO(type);
 
 static ssize_t interrupts_enabled_show(struct device *dev,
                                       struct device_attribute *attr,
@@ -1626,8 +1626,7 @@ static ssize_t interrupts_enabled_show(struct device *dev,
 
        return snprintf(buf, 10, "%d\n", enabled);
 }
-static DEVICE_ATTR(interrupts_enabled, 0444,
-                  interrupts_enabled_show, NULL);
+static DEVICE_ATTR_RO(interrupts_enabled);
 
 IPMI_SI_ATTR(short_timeouts);
 IPMI_SI_ATTR(long_timeouts);
@@ -1658,7 +1657,7 @@ static ssize_t params_show(struct device *dev,
                        smi_info->io.irq,
                        smi_info->io.slave_addr);
 }
-static DEVICE_ATTR(params, 0444, params_show, NULL);
+static DEVICE_ATTR_RO(params);
 
 static struct attribute *ipmi_si_dev_attrs[] = {
        &dev_attr_type.attr,
index 053089f..3236706 100644 (file)
@@ -176,10 +176,6 @@ static const struct parent_map gcc_parent_map_2[] = {
        { P_GPLL0_OUT_ODD, 2 },
 };
 
-static const struct clk_parent_data gcc_parent_data_2[] = {
-       { .fw_name = "bi_tcxo" },
-       { .hw = &gpll0_out_odd.clkr.hw },
-};
 static const struct clk_parent_data gcc_parent_data_2_ao[] = {
        { .fw_name = "bi_tcxo_ao" },
        { .hw = &gpll0_out_odd.clkr.hw },
index 2d83a9f..1097f82 100644 (file)
@@ -268,6 +268,7 @@ static struct cpudata **all_cpu_data;
  * @get_min:           Callback to get minimum P state
  * @get_turbo:         Callback to get turbo P state
  * @get_scaling:       Callback to get frequency scaling factor
+ * @get_cpu_scaling:   Get frequency scaling factor for a given cpu
  * @get_aperf_mperf_shift: Callback to get the APERF vs MPERF frequency difference
  * @get_val:           Callback to convert P state to actual MSR write value
  * @get_vid:           Callback to get VID data for Atom platforms
@@ -281,6 +282,7 @@ struct pstate_funcs {
        int (*get_min)(void);
        int (*get_turbo)(void);
        int (*get_scaling)(void);
+       int (*get_cpu_scaling)(int cpu);
        int (*get_aperf_mperf_shift)(void);
        u64 (*get_val)(struct cpudata*, int pstate);
        void (*get_vid)(struct cpudata *);
@@ -384,6 +386,15 @@ static int intel_pstate_get_cppc_guaranteed(int cpu)
        return cppc_perf.nominal_perf;
 }
 
+static u32 intel_pstate_cppc_nominal(int cpu)
+{
+       u64 nominal_perf;
+
+       if (cppc_get_nominal_perf(cpu, &nominal_perf))
+               return 0;
+
+       return nominal_perf;
+}
 #else /* CONFIG_ACPI_CPPC_LIB */
 static inline void intel_pstate_set_itmt_prio(int cpu)
 {
@@ -470,20 +481,6 @@ static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
 
        acpi_processor_unregister_performance(policy->cpu);
 }
-
-static bool intel_pstate_cppc_perf_valid(u32 perf, struct cppc_perf_caps *caps)
-{
-       return perf && perf <= caps->highest_perf && perf >= caps->lowest_perf;
-}
-
-static bool intel_pstate_cppc_perf_caps(struct cpudata *cpu,
-                                       struct cppc_perf_caps *caps)
-{
-       if (cppc_get_perf_caps(cpu->cpu, caps))
-               return false;
-
-       return caps->highest_perf && caps->lowest_perf <= caps->highest_perf;
-}
 #else /* CONFIG_ACPI */
 static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
 {
@@ -506,15 +503,8 @@ static inline int intel_pstate_get_cppc_guaranteed(int cpu)
 }
 #endif /* CONFIG_ACPI_CPPC_LIB */
 
-static void intel_pstate_hybrid_hwp_perf_ctl_parity(struct cpudata *cpu)
-{
-       pr_debug("CPU%d: Using PERF_CTL scaling for HWP\n", cpu->cpu);
-
-       cpu->pstate.scaling = cpu->pstate.perf_ctl_scaling;
-}
-
 /**
- * intel_pstate_hybrid_hwp_calibrate - Calibrate HWP performance levels.
+ * intel_pstate_hybrid_hwp_adjust - Calibrate HWP performance levels.
  * @cpu: Target CPU.
  *
  * On hybrid processors, HWP may expose more performance levels than there are
@@ -522,115 +512,46 @@ static void intel_pstate_hybrid_hwp_perf_ctl_parity(struct cpudata *cpu)
  * scaling factor between HWP performance levels and CPU frequency will be less
  * than the scaling factor between P-state values and CPU frequency.
  *
- * In that case, the scaling factor between HWP performance levels and CPU
- * frequency needs to be determined which can be done with the help of the
- * observation that certain HWP performance levels should correspond to certain
- * P-states, like for example the HWP highest performance should correspond
- * to the maximum turbo P-state of the CPU.
+ * In that case, adjust the CPU parameters used in computations accordingly.
  */
-static void intel_pstate_hybrid_hwp_calibrate(struct cpudata *cpu)
+static void intel_pstate_hybrid_hwp_adjust(struct cpudata *cpu)
 {
        int perf_ctl_max_phys = cpu->pstate.max_pstate_physical;
        int perf_ctl_scaling = cpu->pstate.perf_ctl_scaling;
        int perf_ctl_turbo = pstate_funcs.get_turbo();
        int turbo_freq = perf_ctl_turbo * perf_ctl_scaling;
-       int perf_ctl_max = pstate_funcs.get_max();
-       int max_freq = perf_ctl_max * perf_ctl_scaling;
-       int scaling = INT_MAX;
-       int freq;
+       int scaling = cpu->pstate.scaling;
 
        pr_debug("CPU%d: perf_ctl_max_phys = %d\n", cpu->cpu, perf_ctl_max_phys);
-       pr_debug("CPU%d: perf_ctl_max = %d\n", cpu->cpu, perf_ctl_max);
+       pr_debug("CPU%d: perf_ctl_max = %d\n", cpu->cpu, pstate_funcs.get_max());
        pr_debug("CPU%d: perf_ctl_turbo = %d\n", cpu->cpu, perf_ctl_turbo);
        pr_debug("CPU%d: perf_ctl_scaling = %d\n", cpu->cpu, perf_ctl_scaling);
-
        pr_debug("CPU%d: HWP_CAP guaranteed = %d\n", cpu->cpu, cpu->pstate.max_pstate);
        pr_debug("CPU%d: HWP_CAP highest = %d\n", cpu->cpu, cpu->pstate.turbo_pstate);
-
-#ifdef CONFIG_ACPI
-       if (IS_ENABLED(CONFIG_ACPI_CPPC_LIB)) {
-               struct cppc_perf_caps caps;
-
-               if (intel_pstate_cppc_perf_caps(cpu, &caps)) {
-                       if (intel_pstate_cppc_perf_valid(caps.nominal_perf, &caps)) {
-                               pr_debug("CPU%d: Using CPPC nominal\n", cpu->cpu);
-
-                               /*
-                                * If the CPPC nominal performance is valid, it
-                                * can be assumed to correspond to cpu_khz.
-                                */
-                               if (caps.nominal_perf == perf_ctl_max_phys) {
-                                       intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
-                                       return;
-                               }
-                               scaling = DIV_ROUND_UP(cpu_khz, caps.nominal_perf);
-                       } else if (intel_pstate_cppc_perf_valid(caps.guaranteed_perf, &caps)) {
-                               pr_debug("CPU%d: Using CPPC guaranteed\n", cpu->cpu);
-
-                               /*
-                                * If the CPPC guaranteed performance is valid,
-                                * it can be assumed to correspond to max_freq.
-                                */
-                               if (caps.guaranteed_perf == perf_ctl_max) {
-                                       intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
-                                       return;
-                               }
-                               scaling = DIV_ROUND_UP(max_freq, caps.guaranteed_perf);
-                       }
-               }
-       }
-#endif
-       /*
-        * If using the CPPC data to compute the HWP-to-frequency scaling factor
-        * doesn't work, use the HWP_CAP gauranteed perf for this purpose with
-        * the assumption that it corresponds to max_freq.
-        */
-       if (scaling > perf_ctl_scaling) {
-               pr_debug("CPU%d: Using HWP_CAP guaranteed\n", cpu->cpu);
-
-               if (cpu->pstate.max_pstate == perf_ctl_max) {
-                       intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
-                       return;
-               }
-               scaling = DIV_ROUND_UP(max_freq, cpu->pstate.max_pstate);
-               if (scaling > perf_ctl_scaling) {
-                       /*
-                        * This should not happen, because it would mean that
-                        * the number of HWP perf levels was less than the
-                        * number of P-states, so use the PERF_CTL scaling in
-                        * that case.
-                        */
-                       pr_debug("CPU%d: scaling (%d) out of range\n", cpu->cpu,
-                               scaling);
-
-                       intel_pstate_hybrid_hwp_perf_ctl_parity(cpu);
-                       return;
-               }
-       }
+       pr_debug("CPU%d: HWP-to-frequency scaling factor: %d\n", cpu->cpu, scaling);
 
        /*
-        * If the product of the HWP performance scaling factor obtained above
-        * and the HWP_CAP highest performance is greater than the maximum turbo
-        * frequency corresponding to the pstate_funcs.get_turbo() return value,
-        * the scaling factor is too high, so recompute it so that the HWP_CAP
-        * highest performance corresponds to the maximum turbo frequency.
+        * If the product of the HWP performance scaling factor and the HWP_CAP
+        * highest performance is greater than the maximum turbo frequency
+        * corresponding to the pstate_funcs.get_turbo() return value, the
+        * scaling factor is too high, so recompute it to make the HWP_CAP
+        * highest performance correspond to the maximum turbo frequency.
         */
        if (turbo_freq < cpu->pstate.turbo_pstate * scaling) {
-               pr_debug("CPU%d: scaling too high (%d)\n", cpu->cpu, scaling);
-
                cpu->pstate.turbo_freq = turbo_freq;
                scaling = DIV_ROUND_UP(turbo_freq, cpu->pstate.turbo_pstate);
-       }
+               cpu->pstate.scaling = scaling;
 
-       cpu->pstate.scaling = scaling;
-
-       pr_debug("CPU%d: HWP-to-frequency scaling factor: %d\n", cpu->cpu, scaling);
+               pr_debug("CPU%d: refined HWP-to-frequency scaling factor: %d\n",
+                        cpu->cpu, scaling);
+       }
 
        cpu->pstate.max_freq = rounddown(cpu->pstate.max_pstate * scaling,
                                         perf_ctl_scaling);
 
-       freq = perf_ctl_max_phys * perf_ctl_scaling;
-       cpu->pstate.max_pstate_physical = DIV_ROUND_UP(freq, scaling);
+       cpu->pstate.max_pstate_physical =
+                       DIV_ROUND_UP(perf_ctl_max_phys * perf_ctl_scaling,
+                                    scaling);
 
        cpu->pstate.min_freq = cpu->pstate.min_pstate * perf_ctl_scaling;
        /*
@@ -1861,6 +1782,38 @@ static int knl_get_turbo_pstate(void)
        return ret;
 }
 
+#ifdef CONFIG_ACPI_CPPC_LIB
+static u32 hybrid_ref_perf;
+
+static int hybrid_get_cpu_scaling(int cpu)
+{
+       return DIV_ROUND_UP(core_get_scaling() * hybrid_ref_perf,
+                           intel_pstate_cppc_nominal(cpu));
+}
+
+static void intel_pstate_cppc_set_cpu_scaling(void)
+{
+       u32 min_nominal_perf = U32_MAX;
+       int cpu;
+
+       for_each_present_cpu(cpu) {
+               u32 nominal_perf = intel_pstate_cppc_nominal(cpu);
+
+               if (nominal_perf && nominal_perf < min_nominal_perf)
+                       min_nominal_perf = nominal_perf;
+       }
+
+       if (min_nominal_perf < U32_MAX) {
+               hybrid_ref_perf = min_nominal_perf;
+               pstate_funcs.get_cpu_scaling = hybrid_get_cpu_scaling;
+       }
+}
+#else
+static inline void intel_pstate_cppc_set_cpu_scaling(void)
+{
+}
+#endif /* CONFIG_ACPI_CPPC_LIB */
+
 static void intel_pstate_set_pstate(struct cpudata *cpu, int pstate)
 {
        trace_cpu_frequency(pstate * cpu->pstate.scaling, cpu->cpu);
@@ -1889,10 +1842,8 @@ static void intel_pstate_max_within_limits(struct cpudata *cpu)
 
 static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
 {
-       bool hybrid_cpu = boot_cpu_has(X86_FEATURE_HYBRID_CPU);
        int perf_ctl_max_phys = pstate_funcs.get_max_physical();
-       int perf_ctl_scaling = hybrid_cpu ? cpu_khz / perf_ctl_max_phys :
-                                           pstate_funcs.get_scaling();
+       int perf_ctl_scaling = pstate_funcs.get_scaling();
 
        cpu->pstate.min_pstate = pstate_funcs.get_min();
        cpu->pstate.max_pstate_physical = perf_ctl_max_phys;
@@ -1901,10 +1852,13 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
        if (hwp_active && !hwp_mode_bdw) {
                __intel_pstate_get_hwp_cap(cpu);
 
-               if (hybrid_cpu)
-                       intel_pstate_hybrid_hwp_calibrate(cpu);
-               else
+               if (pstate_funcs.get_cpu_scaling) {
+                       cpu->pstate.scaling = pstate_funcs.get_cpu_scaling(cpu->cpu);
+                       if (cpu->pstate.scaling != perf_ctl_scaling)
+                               intel_pstate_hybrid_hwp_adjust(cpu);
+               } else {
                        cpu->pstate.scaling = perf_ctl_scaling;
+               }
        } else {
                cpu->pstate.scaling = perf_ctl_scaling;
                cpu->pstate.max_pstate = pstate_funcs.get_max();
@@ -3276,6 +3230,9 @@ static int __init intel_pstate_init(void)
                        if (!default_driver)
                                default_driver = &intel_pstate;
 
+                       if (boot_cpu_has(X86_FEATURE_HYBRID_CPU))
+                               intel_pstate_cppc_set_cpu_scaling();
+
                        goto hwp_cpu_matched;
                }
        } else {
index 9561e3d..541efe0 100644 (file)
@@ -42,6 +42,7 @@ config UDMABUF
 config DMABUF_MOVE_NOTIFY
        bool "Move notify between drivers (EXPERIMENTAL)"
        default n
+       depends on DMA_SHARED_BUFFER
        help
          Don't pin buffers if the dynamic DMA-buf interface is available on
          both the exporter as well as the importer. This fixes a security
@@ -52,6 +53,7 @@ config DMABUF_MOVE_NOTIFY
 
 config DMABUF_DEBUG
        bool "DMA-BUF debug checks"
+       depends on DMA_SHARED_BUFFER
        default y if DMA_API_DEBUG
        help
          This option enables additional checks for DMA-BUF importers and
@@ -74,7 +76,7 @@ menuconfig DMABUF_HEAPS
 
 menuconfig DMABUF_SYSFS_STATS
        bool "DMA-BUF sysfs statistics"
-       select DMA_SHARED_BUFFER
+       depends on DMA_SHARED_BUFFER
        help
           Choose this option to enable DMA-BUF sysfs statistics
           in location /sys/kernel/dmabuf/buffers.