Merge branch 'akpm' (patches from Andrew)
authorLinus Torvalds <torvalds@linux-foundation.org>
Wed, 8 Sep 2021 19:55:35 +0000 (12:55 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Wed, 8 Sep 2021 19:55:35 +0000 (12:55 -0700)
Merge more updates from Andrew Morton:
 "147 patches, based on 7d2a07b769330c34b4deabeed939325c77a7ec2f.

  Subsystems affected by this patch series: mm (memory-hotplug, rmap,
  ioremap, highmem, cleanups, secretmem, kfence, damon, and vmscan),
  alpha, percpu, procfs, misc, core-kernel, MAINTAINERS, lib,
  checkpatch, epoll, init, nilfs2, coredump, fork, pids, criu, kconfig,
  selftests, ipc, and scripts"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (94 commits)
  scripts: check_extable: fix typo in user error message
  mm/workingset: correct kernel-doc notations
  ipc: replace costly bailout check in sysvipc_find_ipc()
  selftests/memfd: remove unused variable
  Kconfig.debug: drop selecting non-existing HARDLOCKUP_DETECTOR_ARCH
  configs: remove the obsolete CONFIG_INPUT_POLLDEV
  prctl: allow to setup brk for et_dyn executables
  pid: cleanup the stale comment mentioning pidmap_init().
  kernel/fork.c: unexport get_{mm,task}_exe_file
  coredump: fix memleak in dump_vma_snapshot()
  fs/coredump.c: log if a core dump is aborted due to changed file permissions
  nilfs2: use refcount_dec_and_lock() to fix potential UAF
  nilfs2: fix memory leak in nilfs_sysfs_delete_snapshot_group
  nilfs2: fix memory leak in nilfs_sysfs_create_snapshot_group
  nilfs2: fix memory leak in nilfs_sysfs_delete_##name##_group
  nilfs2: fix memory leak in nilfs_sysfs_create_##name##_group
  nilfs2: fix NULL pointer in nilfs_##name##_attr_release
  nilfs2: fix memory leak in nilfs_sysfs_create_device_group
  trap: cleanup trap_init()
  init: move usermodehelper_enable() to populate_rootfs()
  ...

35 files changed:
1  2 
Documentation/kbuild/llvm.rst
MAINTAINERS
arch/Kconfig
arch/powerpc/kernel/traps.c
arch/powerpc/platforms/pseries/hotplug-memory.c
arch/riscv/Kconfig
arch/s390/mm/init.c
drivers/base/memory.c
drivers/base/node.c
drivers/mtd/nand/raw/intel-nand-controller.c
include/linux/memory.h
include/linux/mmzone.h
include/linux/page-flags.h
include/linux/pagemap.h
include/linux/units.h
include/trace/events/mmflags.h
init/main.c
kernel/fork.c
kernel/sys.c
lib/Kconfig.debug
lib/test_printf.c
lib/vsprintf.c
mm/compaction.c
mm/kfence/kfence_test.c
mm/kfence/report.c
mm/memory_hotplug.c
mm/page_alloc.c
mm/page_isolation.c
mm/percpu.c
mm/rmap.c
mm/vmalloc.c
tools/perf/builtin-c2c.c
tools/perf/builtin-record.c
tools/perf/util/header.c
tools/testing/selftests/kvm/dirty_log_perf_test.c

@@@ -38,7 -38,7 +38,7 @@@ Cross Compilin
  A single Clang compiler binary will typically contain all supported backends,
  which can help simplify cross compiling. ::
  
 -      ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- make CC=clang
 +      make ARCH=arm64 CC=clang CROSS_COMPILE=aarch64-linux-gnu-
  
  ``CROSS_COMPILE`` is not used to prefix the Clang compiler binary, instead
  ``CROSS_COMPILE`` is used to set a command line flag: ``--target=<triple>``. For
@@@ -60,27 -60,8 +60,27 @@@ They can be enabled individually. The f
          OBJCOPY=llvm-objcopy OBJDUMP=llvm-objdump READELF=llvm-readelf \
          HOSTCC=clang HOSTCXX=clang++ HOSTAR=llvm-ar HOSTLD=ld.lld
  
 -Currently, the integrated assembler is disabled by default. You can pass
 -``LLVM_IAS=1`` to enable it.
 +The integrated assembler is enabled by default. You can pass ``LLVM_IAS=0`` to
 +disable it.
 +
 +Omitting CROSS_COMPILE
 +----------------------
 +
 +As explained above, ``CROSS_COMPILE`` is used to set ``--target=<triple>``.
 +
 +If ``CROSS_COMPILE`` is not specified, the ``--target=<triple>`` is inferred
 +from ``ARCH``.
 +
 +That means if you use only LLVM tools, ``CROSS_COMPILE`` becomes unnecessary.
 +
 +For example, to cross-compile the arm64 kernel::
 +
 +      make ARCH=arm64 LLVM=1
 +
 +If ``LLVM_IAS=0`` is specified, ``CROSS_COMPILE`` is also used to derive
 +``--prefix=<path>`` to search for the GNU assembler and linker. ::
 +
 +      make ARCH=arm64 LLVM=1 LLVM_IAS=0 CROSS_COMPILE=aarch64-linux-gnu-
  
  Supported Architectures
  -----------------------
@@@ -130,9 -111,10 +130,10 @@@ Getting Hel
  ------------
  
  - `Website <https://clangbuiltlinux.github.io/>`_
- - `Mailing List <https://groups.google.com/forum/#!forum/clang-built-linux>`_: <clang-built-linux@googlegroups.com>
+ - `Mailing List <https://lore.kernel.org/llvm/>`_: <llvm@lists.linux.dev>
+ - `Old Mailing List Archives <https://groups.google.com/g/clang-built-linux>`_
  - `Issue Tracker <https://github.com/ClangBuiltLinux/linux/issues>`_
- - IRC: #clangbuiltlinux on chat.freenode.net
+ - IRC: #clangbuiltlinux on irc.libera.chat
  - `Telegram <https://t.me/ClangBuiltLinux>`_: @ClangBuiltLinux
  - `Wiki <https://github.com/ClangBuiltLinux/linux/wiki>`_
  - `Beginner Bugs <https://github.com/ClangBuiltLinux/linux/issues?q=is%3Aopen+is%3Aissue+label%3A%22good+first+issue%22>`_
diff --combined MAINTAINERS
@@@ -459,12 -459,6 +459,12 @@@ S:       Maintaine
  W:    https://parisc.wiki.kernel.org/index.php/AD1889
  F:    sound/pci/ad1889.*
  
 +AD5110 ANALOG DEVICES DIGITAL POTENTIOMETERS DRIVER
 +M:    Mugilraj Dhavachelvan <dmugil2000@gmail.com>
 +L:    linux-iio@vger.kernel.org
 +S:    Supported
 +F:    drivers/iio/potentiometer/ad5110.c
 +
  AD525X ANALOG DEVICES DIGITAL POTENTIOMETERS DRIVER
  M:    Michael Hennerich <michael.hennerich@analog.com>
  S:    Supported
@@@ -798,7 -792,7 +798,7 @@@ F: Documentation/devicetree/bindings/i2
  F:    drivers/i2c/busses/i2c-altera.c
  
  ALTERA MAILBOX DRIVER
 -M:    Ley Foon Tan <ley.foon.tan@intel.com>
 +M:    Joyce Ooi <joyce.ooi@intel.com>
  S:    Maintained
  F:    drivers/mailbox/mailbox-altera.c
  
@@@ -1268,13 -1262,6 +1268,13 @@@ L:    linux-input@vger.kernel.or
  S:    Odd fixes
  F:    drivers/input/mouse/bcm5974.c
  
 +APPLE DART IOMMU DRIVER
 +M:    Sven Peter <sven@svenpeter.dev>
 +L:    iommu@lists.linux-foundation.org
 +S:    Maintained
 +F:    Documentation/devicetree/bindings/iommu/apple,dart.yaml
 +F:    drivers/iommu/apple-dart.c
 +
  APPLE SMC DRIVER
  M:    Henrik Rydberg <rydberg@bitmath.org>
  L:    linux-hwmon@vger.kernel.org
@@@ -1329,13 -1316,6 +1329,13 @@@ L:    linux-media@vger.kernel.or
  S:    Maintained
  F:    drivers/media/i2c/aptina-pll.*
  
 +AQUACOMPUTER D5 NEXT PUMP SENSOR DRIVER
 +M:    Aleksa Savic <savicaleksa83@gmail.com>
 +L:    linux-hwmon@vger.kernel.org
 +S:    Maintained
 +F:    Documentation/hwmon/aquacomputer_d5next.rst
 +F:    drivers/hwmon/aquacomputer_d5next.c
 +
  AQUANTIA ETHERNET DRIVER (atlantic)
  M:    Igor Russkikh <irusskikh@marvell.com>
  L:    netdev@vger.kernel.org
@@@ -1402,7 -1382,7 +1402,7 @@@ F:      Documentation/devicetree/bindings/ar
  F:    Documentation/devicetree/bindings/arm/arm,realview.yaml
  F:    Documentation/devicetree/bindings/arm/arm,versatile.yaml
  F:    Documentation/devicetree/bindings/arm/arm,vexpress-juno.yaml
 -F:    Documentation/devicetree/bindings/auxdisplay/arm-charlcd.txt
 +F:    Documentation/devicetree/bindings/auxdisplay/arm,versatile-lcd.yaml
  F:    Documentation/devicetree/bindings/clock/arm,syscon-icst.yaml
  F:    Documentation/devicetree/bindings/i2c/i2c-versatile.txt
  F:    Documentation/devicetree/bindings/interrupt-controller/arm,versatile-fpga-irq.txt
@@@ -1496,7 -1476,7 +1496,7 @@@ F:      drivers/amba
  F:    include/linux/amba/bus.h
  
  ARM PRIMECELL PL35X NAND CONTROLLER DRIVER
 -M:    Miquel Raynal <miquel.raynal@bootlin.com@bootlin.com>
 +M:    Miquel Raynal <miquel.raynal@bootlin.com>
  M:    Naga Sureshkumar Relli <nagasure@xilinx.com>
  L:    linux-mtd@lists.infradead.org
  S:    Maintained
@@@ -1504,11 -1484,11 +1504,11 @@@ F:   Documentation/devicetree/bindings/mt
  F:    drivers/mtd/nand/raw/pl35x-nand-controller.c
  
  ARM PRIMECELL PL35X SMC DRIVER
 -M:    Miquel Raynal <miquel.raynal@bootlin.com@bootlin.com>
 +M:    Miquel Raynal <miquel.raynal@bootlin.com>
  M:    Naga Sureshkumar Relli <nagasure@xilinx.com>
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:    Maintained
 -F:    Documentation/devicetree/bindings/mtd/arm,pl353-smc.yaml
 +F:    Documentation/devicetree/bindings/memory-controllers/arm,pl353-smc.yaml
  F:    drivers/memory/pl353-smc.c
  
  ARM PRIMECELL CLCD PL110 DRIVER
@@@ -1710,7 -1690,7 +1710,7 @@@ L:      linux-arm-kernel@lists.infradead.or
  S:    Maintained
  W:    https://asahilinux.org
  B:    https://github.com/AsahiLinux/linux/issues
 -C:    irc://chat.freenode.net/asahi-dev
 +C:    irc://irc.oftc.net/asahi-dev
  T:    git https://github.com/AsahiLinux/linux.git
  F:    Documentation/devicetree/bindings/arm/apple.yaml
  F:    Documentation/devicetree/bindings/interrupt-controller/apple,aic.yaml
@@@ -2030,12 -2010,10 +2030,12 @@@ M:   Krzysztof Halasa <khalasa@piap.pl
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:    Maintained
  F:    Documentation/devicetree/bindings/arm/intel-ixp4xx.yaml
 +F:    Documentation/devicetree/bindings/bus/intel,ixp4xx-expansion-bus-controller.yaml
  F:    Documentation/devicetree/bindings/gpio/intel,ixp4xx-gpio.txt
  F:    Documentation/devicetree/bindings/interrupt-controller/intel,ixp4xx-interrupt.yaml
  F:    Documentation/devicetree/bindings/timer/intel,ixp4xx-timer.yaml
  F:    arch/arm/mach-ixp4xx/
 +F:    drivers/bus/intel-ixp4xx-eb.c
  F:    drivers/clocksource/timer-ixp4xx.c
  F:    drivers/crypto/ixp4xx_crypto.c
  F:    drivers/gpio/gpio-ixp4xx.c
@@@ -2271,6 -2249,7 +2271,6 @@@ F:      drivers/iio/adc/ab8500-gpadc.
  F:    drivers/mfd/ab8500*
  F:    drivers/mfd/abx500*
  F:    drivers/mfd/db8500*
 -F:    drivers/mfd/dbx500*
  F:    drivers/pinctrl/nomadik/
  F:    drivers/rtc/rtc-ab8500.c
  F:    drivers/rtc/rtc-pl031.c
@@@ -2733,13 -2712,11 +2733,13 @@@ T:   git git://git.kernel.org/pub/scm/lin
  F:    Documentation/devicetree/bindings/arm/toshiba.yaml
  F:    Documentation/devicetree/bindings/net/toshiba,visconti-dwmac.yaml
  F:    Documentation/devicetree/bindings/gpio/toshiba,gpio-visconti.yaml
 +F:    Documentation/devicetree/bindings/pci/toshiba,visconti-pcie.yaml
  F:    Documentation/devicetree/bindings/pinctrl/toshiba,tmpv7700-pinctrl.yaml
  F:    Documentation/devicetree/bindings/watchdog/toshiba,visconti-wdt.yaml
  F:    arch/arm64/boot/dts/toshiba/
  F:    drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
  F:    drivers/gpio/gpio-visconti.c
 +F:    drivers/pci/controller/dwc/pcie-visconti.c
  F:    drivers/pinctrl/visconti/
  F:    drivers/watchdog/visconti_wdt.c
  N:    visconti
@@@ -2865,7 -2842,7 +2865,7 @@@ AS3645A LED FLASH CONTROLLER DRIVE
  M:    Sakari Ailus <sakari.ailus@iki.fi>
  L:    linux-leds@vger.kernel.org
  S:    Maintained
 -F:    drivers/leds/leds-as3645a.c
 +F:    drivers/leds/flash/leds-as3645a.c
  
  ASAHI KASEI AK7375 LENS VOICE COIL DRIVER
  M:    Tianshu Qiu <tian.shu.qiu@intel.com>
@@@ -3220,7 -3197,7 +3220,7 @@@ S:      Maintaine
  W:    https://www.open-mesh.org/
  Q:    https://patchwork.open-mesh.org/project/batman/list/
  B:    https://www.open-mesh.org/projects/batman-adv/issues
 -C:    irc://chat.freenode.net/batman
 +C:    ircs://irc.hackint.org/batadv
  T:    git https://git.open-mesh.org/linux-merge.git
  F:    Documentation/networking/batman-adv.rst
  F:    include/uapi/linux/batadv_packet.h
@@@ -3432,6 -3409,7 +3432,6 @@@ F:      drivers/net/ethernet/netronome/nfp/b
  
  BPF JIT for POWERPC (32-BIT AND 64-BIT)
  M:    Naveen N. Rao <naveen.n.rao@linux.ibm.com>
 -M:    Sandipan Das <sandipan@linux.ibm.com>
  L:    netdev@vger.kernel.org
  L:    bpf@vger.kernel.org
  S:    Maintained
@@@ -3877,7 -3855,7 +3877,7 @@@ M:      Markus Mayer <mmayer@broadcom.com
  M:    bcm-kernel-feedback-list@broadcom.com
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:    Maintained
 -F:    Documentation/devicetree/bindings/memory-controllers/brcm,dpfe-cpu.txt
 +F:    Documentation/devicetree/bindings/memory-controllers/brcm,dpfe-cpu.yaml
  F:    drivers/memory/brcmstb_dpfe.c
  
  BROADCOM STB NAND FLASH DRIVER
@@@ -4526,14 -4504,13 +4526,14 @@@ F:   .clang-forma
  CLANG/LLVM BUILD SUPPORT
  M:    Nathan Chancellor <nathan@kernel.org>
  M:    Nick Desaulniers <ndesaulniers@google.com>
- L:    clang-built-linux@googlegroups.com
+ L:    llvm@lists.linux.dev
  S:    Supported
  W:    https://clangbuiltlinux.github.io/
  B:    https://github.com/ClangBuiltLinux/linux/issues
  C:    irc://irc.libera.chat/clangbuiltlinux
  F:    Documentation/kbuild/llvm.rst
  F:    include/linux/compiler-clang.h
 +F:    scripts/Makefile.clang
  F:    scripts/clang-tools/
  K:    \b(?i:clang|llvm)\b
  
@@@ -4542,7 -4519,7 +4542,7 @@@ M:      Sami Tolvanen <samitolvanen@google.c
  M:    Kees Cook <keescook@chromium.org>
  R:    Nathan Chancellor <nathan@kernel.org>
  R:    Nick Desaulniers <ndesaulniers@google.com>
- L:    clang-built-linux@googlegroups.com
+ L:    llvm@lists.linux.dev
  S:    Supported
  B:    https://github.com/ClangBuiltLinux/linux/issues
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/kees/linux.git for-next/clang/features
@@@ -4643,7 -4620,7 +4643,7 @@@ F:      include/linux/clk
  F:    include/linux/of_clk.h
  X:    drivers/clk/clkdev.c
  
 -COMMON INTERNET FILE SYSTEM (CIFS)
 +COMMON INTERNET FILE SYSTEM CLIENT (CIFS)
  M:    Steve French <sfrench@samba.org>
  L:    linux-cifs@vger.kernel.org
  L:    samba-technical@lists.samba.org (moderated for non-subscribers)
@@@ -4652,7 -4629,6 +4652,7 @@@ W:      http://linux-cifs.samba.org
  T:    git git://git.samba.org/sfrench/cifs-2.6.git
  F:    Documentation/admin-guide/cifs/
  F:    fs/cifs/
 +F:    fs/cifs_common/
  
  COMPACTPCI HOTPLUG CORE
  M:    Scott Murray <scott@spiteful.org>
@@@ -5149,6 -5125,17 +5149,17 @@@ F:    net/ax25/ax25_out.
  F:    net/ax25/ax25_timer.c
  F:    net/ax25/sysctl_net_ax25.c
  
+ DATA ACCESS MONITOR
+ M:    SeongJae Park <sjpark@amazon.de>
+ L:    linux-mm@kvack.org
+ S:    Maintained
+ F:    Documentation/admin-guide/mm/damon/
+ F:    Documentation/vm/damon/
+ F:    include/linux/damon.h
+ F:    include/trace/events/damon.h
+ F:    mm/damon/
+ F:    tools/testing/selftests/damon/
  DAVICOM FAST ETHERNET (DMFE) NETWORK DRIVER
  L:    netdev@vger.kernel.org
  S:    Orphan
@@@ -5604,7 -5591,7 +5615,7 @@@ M:      Lukasz Luba <lukasz.luba@arm.com
  L:    linux-pm@vger.kernel.org
  L:    linux-samsung-soc@vger.kernel.org
  S:    Maintained
 -F:    Documentation/devicetree/bindings/memory-controllers/exynos5422-dmc.txt
 +F:    Documentation/devicetree/bindings/memory-controllers/samsung,exynos5422-dmc.yaml
  F:    drivers/memory/samsung/exynos5422-dmc.c
  
  DME1737 HARDWARE MONITOR DRIVER
@@@ -5718,7 -5705,6 +5729,7 @@@ DPAA2 ETHERNET SWITCH DRIVE
  M:    Ioana Ciornei <ioana.ciornei@nxp.com>
  L:    netdev@vger.kernel.org
  S:    Maintained
 +F:    Documentation/networking/device_drivers/ethernet/freescale/dpaa2/switch-driver.rst
  F:    drivers/net/ethernet/freescale/dpaa2/dpaa2-switch*
  F:    drivers/net/ethernet/freescale/dpaa2/dpsw*
  
@@@ -5742,11 -5728,6 +5753,11 @@@ F:    Documentation/admin-guide/blockdev
  F:    drivers/block/drbd/
  F:    lib/lru_cache.c
  
 +DRIVER COMPONENT FRAMEWORK
 +L:    dri-devel@lists.freedesktop.org
 +F:    drivers/base/component.c
 +F:    include/linux/component.h
 +
  DRIVER CORE, KOBJECTS, DEBUGFS AND SYSFS
  M:    Greg Kroah-Hartman <gregkh@linuxfoundation.org>
  R:    "Rafael J. Wysocki" <rafael@kernel.org>
@@@ -5810,7 -5791,7 +5821,7 @@@ M:      Gerd Hoffmann <kraxel@redhat.com
  L:    virtualization@lists.linux-foundation.org
  S:    Maintained
  T:    git git://anongit.freedesktop.org/drm/drm-misc
 -F:    drivers/gpu/drm/bochs/
 +F:    drivers/gpu/drm/tiny/bochs.c
  
  DRM DRIVER FOR BOE HIMAX8279D PANELS
  M:    Jerry Han <hanxu5@huaqin.corp-partner.google.com>
@@@ -5995,13 -5976,6 +6006,13 @@@ S:    Maintaine
  F:    Documentation/devicetree/bindings/display/panel/raydium,rm67191.yaml
  F:    drivers/gpu/drm/panel/panel-raydium-rm67191.c
  
 +DRM DRIVER FOR SAMSUNG DB7430 PANELS
 +M:    Linus Walleij <linus.walleij@linaro.org>
 +S:    Maintained
 +T:    git git://anongit.freedesktop.org/drm/drm-misc
 +F:    Documentation/devicetree/bindings/display/panel/samsung,lms397kf04.yaml
 +F:    drivers/gpu/drm/panel/panel-samsung-db7430.c
 +
  DRM DRIVER FOR SITRONIX ST7703 PANELS
  M:    Guido Günther <agx@sigxcpu.org>
  R:    Purism Kernel Team <kernel@puri.sm>
@@@ -6100,27 -6074,21 +6111,27 @@@ F:   drivers/gpu/drm/vboxvideo
  
  DRM DRIVER FOR VMWARE VIRTUAL GPU
  M:    "VMware Graphics" <linux-graphics-maintainer@vmware.com>
 -M:    Roland Scheidegger <sroland@vmware.com>
  M:    Zack Rusin <zackr@vmware.com>
  L:    dri-devel@lists.freedesktop.org
  S:    Supported
 -T:    git git://people.freedesktop.org/~sroland/linux
 +T:    git git://anongit.freedesktop.org/drm/drm-misc
  F:    drivers/gpu/drm/vmwgfx/
  F:    include/uapi/drm/vmwgfx_drm.h
  
 +DRM DRIVER FOR WIDECHIPS WS2401 PANELS
 +M:    Linus Walleij <linus.walleij@linaro.org>
 +S:    Maintained
 +T:    git git://anongit.freedesktop.org/drm/drm-misc
 +F:    Documentation/devicetree/bindings/display/panel/samsung,lms380kf01.yaml
 +F:    drivers/gpu/drm/panel/panel-widechips-ws2401.c
 +
  DRM DRIVERS
  M:    David Airlie <airlied@linux.ie>
  M:    Daniel Vetter <daniel@ffwll.ch>
  L:    dri-devel@lists.freedesktop.org
  S:    Maintained
  B:    https://gitlab.freedesktop.org/drm
 -C:    irc://chat.freenode.net/dri-devel
 +C:    irc://irc.oftc.net/dri-devel
  T:    git git://anongit.freedesktop.org/drm/drm
  F:    Documentation/devicetree/bindings/display/
  F:    Documentation/devicetree/bindings/gpu/
@@@ -6613,7 -6581,6 +6624,7 @@@ EDAC-ARMAD
  M:    Jan Luebbe <jlu@pengutronix.de>
  L:    linux-edac@vger.kernel.org
  S:    Maintained
 +F:    Documentation/devicetree/bindings/memory-controllers/marvell,mvebu-sdram-controller.yaml
  F:    drivers/edac/armada_xp_*
  
  EDAC-AST2500
@@@ -6855,6 -6822,7 +6866,6 @@@ F:      Documentation/admin-guide/media/em28
  F:    drivers/media/usb/em28xx/
  
  EMBEDDED LINUX
 -M:    Paul Gortmaker <paul.gortmaker@windriver.com>
  M:    Matt Mackall <mpm@selenic.com>
  M:    David Woodhouse <dwmw2@infradead.org>
  L:    linux-embedded@vger.kernel.org
@@@ -6957,12 -6925,6 +6968,12 @@@ M:    Mark Einon <mark.einon@gmail.com
  S:    Odd Fixes
  F:    drivers/net/ethernet/agere/
  
 +ETAS ES58X CAN/USB DRIVER
 +M:    Vincent Mailhol <mailhol.vincent@wanadoo.fr>
 +L:    linux-can@vger.kernel.org
 +S:    Maintained
 +F:    drivers/net/can/usb/etas_es58x/
 +
  ETHERNET BRIDGE
  M:    Roopa Prabhu <roopa@nvidia.com>
  M:    Nikolay Aleksandrov <nikolay@nvidia.com>
@@@ -8454,7 -8416,7 +8465,7 @@@ F:      drivers/crypto/hisilicon/sgl.
  F:    drivers/crypto/hisilicon/zip/
  
  HISILICON ROCE DRIVER
 -M:    Lijun Ou <oulijun@huawei.com>
 +M:    Wenpeng Liang <liangwenpeng@huawei.com>
  M:    Weihang Li <liweihang@huawei.com>
  L:    linux-rdma@vger.kernel.org
  S:    Maintained
@@@ -8492,12 -8454,10 +8503,12 @@@ S:   Maintaine
  F:    Documentation/devicetree/bindings/spmi/hisilicon,hisi-spmi-controller.yaml
  F:    drivers/spmi/hisi-spmi-controller.c
  
 -HISILICON STAGING DRIVERS FOR HIKEY 960/970
 +HISILICON SPMI PMIC DRIVER FOR HIKEY 6421v600
  M:    Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
 +L:    linux-kernel@vger.kernel.org
  S:    Maintained
 -F:    drivers/staging/hikey9xx/
 +F:    Documentation/devicetree/bindings/mfd/hisilicon,hi6421-spmi-pmic.yaml
 +F:    drivers/mfd/hi6421-spmi-pmic.c
  
  HISILICON TRUE RANDOM NUMBER GENERATOR V2 SUPPORT
  M:    Zaibo Xu <xuzaibo@huawei.com>
@@@ -8656,9 -8616,6 +8667,9 @@@ T:      git git://git.kernel.org/pub/scm/lin
  F:    Documentation/ABI/stable/sysfs-bus-vmbus
  F:    Documentation/ABI/testing/debugfs-hyperv
  F:    Documentation/networking/device_drivers/ethernet/microsoft/netvsc.rst
 +F:    arch/arm64/hyperv
 +F:    arch/arm64/include/asm/hyperv-tlfs.h
 +F:    arch/arm64/include/asm/mshyperv.h
  F:    arch/x86/hyperv
  F:    arch/x86/include/asm/hyperv-tlfs.h
  F:    arch/x86/include/asm/mshyperv.h
@@@ -9105,7 -9062,7 +9116,7 @@@ F:      drivers/usb/atm/ueagle-atm.
  IMGTEC ASCII LCD DRIVER
  M:    Paul Burton <paulburton@kernel.org>
  S:    Maintained
 -F:    Documentation/devicetree/bindings/auxdisplay/img-ascii-lcd.txt
 +F:    Documentation/devicetree/bindings/auxdisplay/img,ascii-lcd.yaml
  F:    drivers/auxdisplay/img-ascii-lcd.c
  
  IMGTEC IR DECODER DRIVER
@@@ -9277,20 -9234,13 +9288,20 @@@ INTEL ATOMISP2 DUMMY / POWER-MANAGEMEN
  M:    Hans de Goede <hdegoede@redhat.com>
  L:    platform-driver-x86@vger.kernel.org
  S:    Maintained
 -F:    drivers/platform/x86/intel_atomisp2_pm.c
 +F:    drivers/platform/x86/intel/atomisp2/pm.c
  
  INTEL ATOMISP2 LED DRIVER
  M:    Hans de Goede <hdegoede@redhat.com>
  L:    platform-driver-x86@vger.kernel.org
  S:    Maintained
 -F:    drivers/platform/x86/intel_atomisp2_led.c
 +F:    drivers/platform/x86/intel/atomisp2/led.c
 +
 +INTEL BIOS SAR INT1092 DRIVER
 +M:    Shravan S <s.shravan@intel.com>
 +M:    Intel Corporation <linuxwwan@intel.com>
 +L:    platform-driver-x86@vger.kernel.org
 +S:    Maintained
 +F:    drivers/platform/x86/intel/int1092/
  
  INTEL BROXTON PMC DRIVER
  M:    Mika Westerberg <mika.westerberg@linux.intel.com>
@@@ -9322,7 -9272,7 +9333,7 @@@ S:      Supporte
  W:    https://01.org/linuxgraphics/
  Q:    http://patchwork.freedesktop.org/project/intel-gfx/
  B:    https://gitlab.freedesktop.org/drm/intel/-/wikis/How-to-file-i915-bugs
 -C:    irc://chat.freenode.net/intel-gfx
 +C:    irc://irc.oftc.net/intel-gfx
  T:    git git://anongit.freedesktop.org/drm-intel
  F:    Documentation/gpu/i915.rst
  F:    drivers/gpu/drm/i915/
@@@ -9386,7 -9336,7 +9397,7 @@@ INTEL HID EVENT DRIVE
  M:    Alex Hung <alex.hung@canonical.com>
  L:    platform-driver-x86@vger.kernel.org
  S:    Maintained
 -F:    drivers/platform/x86/intel-hid.c
 +F:    drivers/platform/x86/intel/hid.c
  
  INTEL I/OAT DMA DRIVER
  M:    Dave Jiang <dave.jiang@intel.com>
@@@ -9530,17 -9480,17 +9541,17 @@@ F:   include/linux/mfd/intel-m10-bmc.
  
  INTEL MENLOW THERMAL DRIVER
  M:    Sujith Thomas <sujith.thomas@intel.com>
 -L:    platform-driver-x86@vger.kernel.org
 +L:    linux-pm@vger.kernel.org
  S:    Supported
  W:    https://01.org/linux-acpi
 -F:    drivers/platform/x86/intel_menlow.c
 +F:    drivers/thermal/intel/intel_menlow.c
  
  INTEL P-Unit IPC DRIVER
  M:    Zha Qipeng <qipeng.zha@intel.com>
  L:    platform-driver-x86@vger.kernel.org
  S:    Maintained
  F:    arch/x86/include/asm/intel_punit_ipc.h
 -F:    drivers/platform/x86/intel_punit_ipc.c
 +F:    drivers/platform/x86/intel/punit_ipc.c
  
  INTEL PMC CORE DRIVER
  M:    Rajneesh Bhardwaj <irenic.rajneesh@gmail.com>
@@@ -9548,7 -9498,7 +9559,7 @@@ M:      David E Box <david.e.box@intel.com
  L:    platform-driver-x86@vger.kernel.org
  S:    Maintained
  F:    Documentation/ABI/testing/sysfs-platform-intel-pmc
 -F:    drivers/platform/x86/intel_pmc_core*
 +F:    drivers/platform/x86/intel/pmc/
  
  INTEL PMIC GPIO DRIVERS
  M:    Andy Shevchenko <andy@kernel.org>
@@@ -9566,7 -9516,7 +9577,7 @@@ INTEL PMT DRIVE
  M:    "David E. Box" <david.e.box@linux.intel.com>
  S:    Maintained
  F:    drivers/mfd/intel_pmt.c
 -F:    drivers/platform/x86/intel_pmt_*
 +F:    drivers/platform/x86/intel/pmt/
  
  INTEL PRO/WIRELESS 2100, 2200BG, 2915ABG NETWORK CONNECTION SUPPORT
  M:    Stanislav Yakovlev <stas.yakovlev@gmail.com>
@@@ -9603,7 -9553,7 +9614,7 @@@ INTEL SPEED SELECT TECHNOLOG
  M:    Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  L:    platform-driver-x86@vger.kernel.org
  S:    Maintained
 -F:    drivers/platform/x86/intel_speed_select_if/
 +F:    drivers/platform/x86/intel/speed_select_if/
  F:    include/uapi/linux/isst_if.h
  F:    tools/power/x86/intel-speed-select/
  
@@@ -9624,19 -9574,19 +9635,19 @@@ M:   "David E. Box" <david.e.box@linux.in
  L:    platform-driver-x86@vger.kernel.org
  S:    Maintained
  F:    arch/x86/include/asm/intel_telemetry.h
 -F:    drivers/platform/x86/intel_telemetry*
 +F:    drivers/platform/x86/intel/telemetry/
  
  INTEL UNCORE FREQUENCY CONTROL
  M:    Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
  L:    platform-driver-x86@vger.kernel.org
  S:    Maintained
 -F:    drivers/platform/x86/intel-uncore-frequency.c
 +F:    drivers/platform/x86/intel/uncore-frequency.c
  
  INTEL VIRTUAL BUTTON DRIVER
  M:    AceLan Kao <acelan.kao@canonical.com>
  L:    platform-driver-x86@vger.kernel.org
  S:    Maintained
 -F:    drivers/platform/x86/intel-vbtn.c
 +F:    drivers/platform/x86/intel/vbtn.c
  
  INTEL WIRELESS 3945ABG/BG, 4965AGN (iwlegacy)
  M:    Stanislaw Gruszka <stf_xl@wp.pl>
@@@ -9657,12 -9607,12 +9668,12 @@@ M:   Jithu Joseph <jithu.joseph@intel.com
  R:    Maurice Ma <maurice.ma@intel.com>
  S:    Maintained
  W:    https://slimbootloader.github.io/security/firmware-update.html
 -F:    drivers/platform/x86/intel-wmi-sbl-fw-update.c
 +F:    drivers/platform/x86/intel/wmi/sbl-fw-update.c
  
  INTEL WMI THUNDERBOLT FORCE POWER DRIVER
  L:    Dell.Client.Kernel@dell.com
  S:    Maintained
 -F:    drivers/platform/x86/intel-wmi-thunderbolt.c
 +F:    drivers/platform/x86/intel/wmi/thunderbolt.c
  
  INTEL WWAN IOSM DRIVER
  M:    M Chetan Kumar <m.chetan.kumar@intel.com>
@@@ -9820,6 -9770,11 +9831,6 @@@ M:     David Sterba <dsterba@suse.com
  S:    Odd Fixes
  F:    drivers/tty/ipwireless/
  
 -IPX NETWORK LAYER
 -L:    netdev@vger.kernel.org
 -S:    Obsolete
 -F:    include/uapi/linux/ipx.h
 -
  IRQ DOMAINS (IRQ NUMBER MAPPING LIBRARY)
  M:    Marc Zyngier <maz@kernel.org>
  S:    Maintained
@@@ -10118,7 -10073,6 +10129,7 @@@ F:   fs/autofs
  KERNEL BUILD + files below scripts/ (unless maintained elsewhere)
  M:    Masahiro Yamada <masahiroy@kernel.org>
  M:    Michal Marek <michal.lkml@markovi.net>
 +R:    Nick Desaulniers <ndesaulniers@google.com>
  L:    linux-kbuild@vger.kernel.org
  S:    Maintained
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/masahiroy/linux-kbuild.git
@@@ -10170,17 -10124,6 +10181,17 @@@ T: git git://git.kernel.org/pub/scm/lin
  F:    Documentation/dev-tools/kselftest*
  F:    tools/testing/selftests/
  
 +KERNEL SMB3 SERVER (KSMBD)
 +M:    Namjae Jeon <linkinjeon@kernel.org>
 +M:    Sergey Senozhatsky <senozhatsky@chromium.org>
 +M:    Steve French <sfrench@samba.org>
 +M:    Hyunchul Lee <hyc.lee@gmail.com>
 +L:    linux-cifs@vger.kernel.org
 +S:    Maintained
 +T:    git git://git.samba.org/ksmbd.git
 +F:    fs/cifs_common/
 +F:    fs/ksmbd/
 +
  KERNEL UNIT TESTING FRAMEWORK (KUnit)
  M:    Brendan Higgins <brendanhiggins@google.com>
  L:    linux-kselftest@vger.kernel.org
@@@ -10466,7 -10409,6 +10477,7 @@@ F:   net/core/skmsg.
  F:    net/core/sock_map.c
  F:    net/ipv4/tcp_bpf.c
  F:    net/ipv4/udp_bpf.c
 +F:    net/unix/unix_bpf.c
  
  LANDLOCK SECURITY MODULE
  M:    Mickaël Salaün <mic@digikod.net>
@@@ -10688,6 -10630,15 +10699,6 @@@ F:  LICENSES
  F:    scripts/spdxcheck-test.sh
  F:    scripts/spdxcheck.py
  
 -LIGHTNVM PLATFORM SUPPORT
 -M:    Matias Bjorling <mb@lightnvm.io>
 -L:    linux-block@vger.kernel.org
 -S:    Maintained
 -W:    http://github/OpenChannelSSD
 -F:    drivers/lightnvm/
 -F:    include/linux/lightnvm.h
 -F:    include/uapi/linux/lightnvm.h
 -
  LINEAR RANGES HELPERS
  M:    Mark Brown <broonie@kernel.org>
  R:    Matti Vaittinen <matti.vaittinen@fi.rohmeurope.com>
@@@ -11100,18 -11051,6 +11111,18 @@@ F: drivers/mailbox/arm_mhuv2.
  F:    include/linux/mailbox/arm_mhuv2_message.h
  F:    Documentation/devicetree/bindings/mailbox/arm,mhuv2.yaml
  
 +MANAGEMENT COMPONENT TRANSPORT PROTOCOL (MCTP)
 +M:    Jeremy Kerr <jk@codeconstruct.com.au>
 +M:    Matt Johnston <matt@codeconstruct.com.au>
 +L:    netdev@vger.kernel.org
 +S:    Maintained
 +F:    Documentation/networking/mctp.rst
 +F:    drivers/net/mctp/
 +F:    include/net/mctp.h
 +F:    include/net/mctpdevice.h
 +F:    include/net/netns/mctp.h
 +F:    net/mctp/
 +
  MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7
  M:    Michael Kerrisk <mtk.manpages@gmail.com>
  L:    linux-man@vger.kernel.org
@@@ -11122,7 -11061,7 +11133,7 @@@ MARDUK (CREATOR CI40) DEVICE TREE SUPPO
  M:    Rahul Bedarkar <rahulbedarkar89@gmail.com>
  L:    linux-mips@vger.kernel.org
  S:    Maintained
 -F:    arch/mips/boot/dts/img/pistachio_marduk.dts
 +F:    arch/mips/boot/dts/img/pistachio*
  
  MARVELL 88E6XXX ETHERNET SWITCH FABRIC DRIVER
  M:    Andrew Lunn <andrew@lunn.ch>
@@@ -11409,12 -11348,6 +11420,12 @@@ W: https://linuxtv.or
  T:    git git://linuxtv.org/media_tree.git
  F:    drivers/media/radio/radio-maxiradio*
  
 +MAXLINEAR ETHERNET PHY DRIVER
 +M:    Xu Liang <lxu@maxlinear.com>
 +L:    netdev@vger.kernel.org
 +S:    Supported
 +F:    drivers/net/phy/mxl-gpy.c
 +
  MCBA MICROCHIP CAN BUS ANALYZER TOOL DRIVER
  R:    Yasushi SHOJI <yashi@spacecubics.com>
  L:    linux-can@vger.kernel.org
@@@ -13341,15 -13274,6 +13352,15 @@@ T: git git://git.kernel.org/pub/scm/lin
  F:    Documentation/filesystems/ntfs.rst
  F:    fs/ntfs/
  
 +NTFS3 FILESYSTEM
 +M:    Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
 +L:    ntfs3@lists.linux.dev
 +S:    Supported
 +W:    http://www.paragon-software.com/
 +T:    git https://github.com/Paragon-Software-Group/linux-ntfs3.git
 +F:    Documentation/filesystems/ntfs3.rst
 +F:    fs/ntfs3/
 +
  NUBUS SUBSYSTEM
  M:    Finn Thain <fthain@linux-m68k.org>
  L:    linux-m68k@lists.linux-m68k.org
@@@ -13877,15 -13801,6 +13888,15 @@@ T: git git://linuxtv.org/media_tree.gi
  F:    Documentation/devicetree/bindings/media/i2c/ov8856.yaml
  F:    drivers/media/i2c/ov8856.c
  
 +OMNIVISION OV9282 SENSOR DRIVER
 +M:    Paul J. Murphy <paul.j.murphy@intel.com>
 +M:    Daniele Alessandrelli <daniele.alessandrelli@intel.com>
 +L:    linux-media@vger.kernel.org
 +S:    Maintained
 +T:    git git://linuxtv.org/media_tree.git
 +F:    Documentation/devicetree/bindings/media/i2c/ovti,ov9282.yaml
 +F:    drivers/media/i2c/ov9282.c
 +
  OMNIVISION OV9640 SENSOR DRIVER
  M:    Petr Cvek <petrcvekcz@gmail.com>
  L:    linux-media@vger.kernel.org
@@@ -13976,12 -13891,6 +13987,12 @@@ F: Documentation/devicetree
  F:    arch/*/boot/dts/
  F:    include/dt-bindings/
  
 +OPENCOMPUTE PTP CLOCK DRIVER
 +M:    Jonathan Lemon <jonathan.lemon@gmail.com>
 +L:    netdev@vger.kernel.org
 +S:    Maintained
 +F:    drivers/ptp/ptp_ocp.c
 +
  OPENCORES I2C BUS DRIVER
  M:    Peter Korsgaard <peter@korsgaard.com>
  M:    Andrew Lunn <andrew@lunn.ch>
@@@ -14307,7 -14216,7 +14318,7 @@@ M:   Lucas Stach <l.stach@pengutronix.de
  L:    linux-pci@vger.kernel.org
  L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
  S:    Maintained
 -F:    Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.txt
 +F:    Documentation/devicetree/bindings/pci/fsl,imx6q-pcie.yaml
  F:    drivers/pci/controller/dwc/*imx6*
  
  PCI DRIVER FOR FU740
@@@ -14395,8 -14304,7 +14406,8 @@@ M:   Jingoo Han <jingoohan1@gmail.com
  M:    Gustavo Pimentel <gustavo.pimentel@synopsys.com>
  L:    linux-pci@vger.kernel.org
  S:    Maintained
 -F:    Documentation/devicetree/bindings/pci/designware-pcie.txt
 +F:    Documentation/devicetree/bindings/pci/snps,dw-pcie.yaml
 +F:    Documentation/devicetree/bindings/pci/snps,dw-pcie-ep.yaml
  F:    drivers/pci/controller/dwc/*designware*
  
  PCI DRIVER FOR TI DRA7XX/J721E
@@@ -14533,7 -14441,7 +14544,7 @@@ M:   Xiaowei Song <songxiaowei@hisilicon.
  M:    Binghui Wang <wangbinghui@hisilicon.com>
  L:    linux-pci@vger.kernel.org
  S:    Maintained
 -F:    Documentation/devicetree/bindings/pci/kirin-pcie.txt
 +F:    Documentation/devicetree/bindings/pci/hisilicon,kirin-pcie.yaml
  F:    drivers/pci/controller/dwc/pcie-kirin.c
  
  PCIE DRIVER FOR HISILICON STB
@@@ -14543,13 -14451,6 +14554,13 @@@ S: Maintaine
  F:    Documentation/devicetree/bindings/pci/hisilicon-histb-pcie.txt
  F:    drivers/pci/controller/dwc/pcie-histb.c
  
 +PCIE DRIVER FOR INTEL KEEM BAY
 +M:    Srikanth Thokala <srikanth.thokala@intel.com>
 +L:    linux-pci@vger.kernel.org
 +S:    Supported
 +F:    Documentation/devicetree/bindings/pci/intel,keembay-pcie*
 +F:    drivers/pci/controller/dwc/pcie-keembay.c
 +
  PCIE DRIVER FOR INTEL LGM GW SOC
  M:    Rahul Tanwar <rtanwar@maxlinear.com>
  L:    linux-pci@vger.kernel.org
@@@ -14752,12 -14653,6 +14763,12 @@@ F: Documentation/driver-api/pin-control
  F:    drivers/pinctrl/
  F:    include/linux/pinctrl/
  
 +PIN CONTROLLER - AMD
 +M:    Basavaraj Natikar <Basavaraj.Natikar@amd.com>
 +M:    Shyam Sundar S K <Shyam-sundar.S-k@amd.com>
 +S:    Maintained
 +F:    drivers/pinctrl/pinctrl-amd.c
 +
  PIN CONTROLLER - FREESCALE
  M:    Dong Aisheng <aisheng.dong@nxp.com>
  M:    Fabio Estevam <festevam@gmail.com>
@@@ -14776,19 -14671,12 +14787,19 @@@ S:        Maintaine
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/pinctrl/intel.git
  F:    drivers/pinctrl/intel/
  
 +PIN CONTROLLER - KEEMBAY
 +M:    Lakshmi Sowjanya D <lakshmi.sowjanya.d@intel.com>
 +S:    Supported
 +F:    drivers/pinctrl/pinctrl-keembay*
 +
  PIN CONTROLLER - MEDIATEK
  M:    Sean Wang <sean.wang@kernel.org>
  L:    linux-mediatek@lists.infradead.org (moderated for non-subscribers)
  S:    Maintained
 -F:    Documentation/devicetree/bindings/pinctrl/pinctrl-mt65xx.txt
 -F:    Documentation/devicetree/bindings/pinctrl/pinctrl-mt7622.txt
 +F:    Documentation/devicetree/bindings/pinctrl/mediatek,mt65xx-pinctrl.yaml
 +F:    Documentation/devicetree/bindings/pinctrl/mediatek,mt6797-pinctrl.yaml
 +F:    Documentation/devicetree/bindings/pinctrl/mediatek,mt7622-pinctrl.yaml
 +F:    Documentation/devicetree/bindings/pinctrl/mediatek,mt8183-pinctrl.yaml
  F:    drivers/pinctrl/mediatek/
  
  PIN CONTROLLER - MICROCHIP AT91
@@@ -14842,6 -14730,14 +14853,6 @@@ S:  Maintaine
  W:    http://www.st.com/spear
  F:    drivers/pinctrl/spear/
  
 -PISTACHIO SOC SUPPORT
 -M:    James Hartley <james.hartley@sondrel.com>
 -L:    linux-mips@vger.kernel.org
 -S:    Odd Fixes
 -F:    arch/mips/boot/dts/img/pistachio*
 -F:    arch/mips/configs/pistachio*_defconfig
 -F:    arch/mips/pistachio/
 -
  PKTCDVD DRIVER
  M:    linux-block@vger.kernel.org
  S:    Orphan
@@@ -15058,10 -14954,12 +15069,10 @@@ S:        Maintaine
  F:    include/linux/printk.h
  F:    kernel/printk/
  
 -PRISM54 WIRELESS DRIVER
 -M:    Luis Chamberlain <mcgrof@kernel.org>
 -L:    linux-wireless@vger.kernel.org
 -S:    Obsolete
 -W:    https://wireless.wiki.kernel.org/en/users/Drivers/p54
 -F:    drivers/net/wireless/intersil/prism54/
 +PRINTK INDEXING
 +R:    Chris Down <chris@chrisdown.name>
 +S:    Maintained
 +F:    kernel/printk/index.c
  
  PROC FILESYSTEM
  L:    linux-kernel@vger.kernel.org
@@@ -15978,14 -15876,6 +15989,14 @@@ L: linux-renesas-soc@vger.kernel.or
  S:    Maintained
  F:    drivers/phy/renesas/phy-rcar-gen3-usb*.c
  
 +RENESAS RZ/G2L A/D DRIVER
 +M:    Lad Prabhakar <prabhakar.mahadev-lad.rj@bp.renesas.com>
 +L:    linux-iio@vger.kernel.org
 +L:    linux-renesas-soc@vger.kernel.org
 +S:    Supported
 +F:    Documentation/devicetree/bindings/iio/adc/renesas,rzg2l-adc.yaml
 +F:    drivers/iio/adc/rzg2l_adc.c
 +
  RESET CONTROLLER FRAMEWORK
  M:    Philipp Zabel <p.zabel@pengutronix.de>
  S:    Maintained
@@@ -16456,7 -16346,7 +16467,7 @@@ SAMSUNG EXYNOS TRUE RANDOM NUMBER GENER
  M:    Łukasz Stelmach <l.stelmach@samsung.com>
  L:    linux-samsung-soc@vger.kernel.org
  S:    Maintained
 -F:    Documentation/devicetree/bindings/rng/samsung,exynos5250-trng.txt
 +F:    Documentation/devicetree/bindings/rng/samsung,exynos5250-trng.yaml
  F:    drivers/char/hw_random/exynos-trng.c
  
  SAMSUNG FRAMEBUFFER DRIVER
@@@ -16549,14 -16439,10 +16560,14 @@@ L:        linux-samsung-soc@vger.kernel.or
  S:    Supported
  T:    git git://git.kernel.org/pub/scm/linux/kernel/git/snawrocki/clk.git
  F:    Documentation/devicetree/bindings/clock/exynos*.txt
 +F:    Documentation/devicetree/bindings/clock/samsung,*.yaml
  F:    Documentation/devicetree/bindings/clock/samsung,s3c*
  F:    Documentation/devicetree/bindings/clock/samsung,s5p*
  F:    drivers/clk/samsung/
  F:    include/dt-bindings/clock/exynos*.h
 +F:    include/dt-bindings/clock/s3c*.h
 +F:    include/dt-bindings/clock/s5p*.h
 +F:    include/dt-bindings/clock/samsung,*.h
  F:    include/linux/clk/samsung.h
  F:    include/linux/platform_data/clk-s3c2410.h
  
@@@ -16598,12 -16484,6 +16609,12 @@@ F: drivers/phy/samsung/phy-s5pv210-usb2
  F:    drivers/phy/samsung/phy-samsung-usb2.c
  F:    drivers/phy/samsung/phy-samsung-usb2.h
  
 +SANCLOUD BEAGLEBONE ENHANCED DEVICE TREE
 +M:    Paul Barker <paul.barker@sancloud.com>
 +R:    Marc Murphy <marc.murphy@sancloud.com>
 +S:    Supported
 +F:    arch/arm/boot/dts/am335x-sancloud*
 +
  SC1200 WDT DRIVER
  M:    Zwane Mwaikambo <zwanem@gmail.com>
  S:    Maintained
@@@ -16863,12 -16743,6 +16874,12 @@@ F: drivers/iio/chemical/scd30_core.
  F:    drivers/iio/chemical/scd30_i2c.c
  F:    drivers/iio/chemical/scd30_serial.c
  
 +SENSIRION SGP40 GAS SENSOR DRIVER
 +M:    Andreas Klinger <ak@it-klinger.de>
 +S:    Maintained
 +F:    Documentation/ABI/testing/sysfs-bus-iio-chemical-sgp40
 +F:    drivers/iio/chemical/sgp40.c
 +
  SENSIRION SPS30 AIR POLLUTION SENSOR DRIVER
  M:    Tomasz Duszynski <tduszyns@gmail.com>
  S:    Maintained
@@@ -17447,15 -17321,6 +17458,15 @@@ T: git git://linuxtv.org/media_tree.gi
  F:    Documentation/devicetree/bindings/media/i2c/sony,imx334.yaml
  F:    drivers/media/i2c/imx334.c
  
 +SONY IMX335 SENSOR DRIVER
 +M:    Paul J. Murphy <paul.j.murphy@intel.com>
 +M:    Daniele Alessandrelli <daniele.alessandrelli@intel.com>
 +L:    linux-media@vger.kernel.org
 +S:    Maintained
 +T:    git git://linuxtv.org/media_tree.git
 +F:    Documentation/devicetree/bindings/media/i2c/sony,imx335.yaml
 +F:    drivers/media/i2c/imx335.c
 +
  SONY IMX355 SENSOR DRIVER
  M:    Tianshu Qiu <tian.shu.qiu@intel.com>
  L:    linux-media@vger.kernel.org
@@@ -17463,15 -17328,6 +17474,15 @@@ S: Maintaine
  T:    git git://linuxtv.org/media_tree.git
  F:    drivers/media/i2c/imx355.c
  
 +SONY IMX412 SENSOR DRIVER
 +M:    Paul J. Murphy <paul.j.murphy@intel.com>
 +M:    Daniele Alessandrelli <daniele.alessandrelli@intel.com>
 +L:    linux-media@vger.kernel.org
 +S:    Maintained
 +T:    git git://linuxtv.org/media_tree.git
 +F:    Documentation/devicetree/bindings/media/i2c/sony,imx412.yaml
 +F:    drivers/media/i2c/imx412.c
 +
  SONY MEMORYSTICK SUBSYSTEM
  M:    Maxim Levitsky <maximlevitsky@gmail.com>
  M:    Alex Dubov <oakad@yahoo.com>
@@@ -17791,9 -17647,8 +17802,9 @@@ F:   drivers/staging/olpc_dcon
  
  STAGING - REALTEK RTL8188EU DRIVERS
  M:    Larry Finger <Larry.Finger@lwfinger.net>
 -S:    Odd Fixes
 -F:    drivers/staging/rtl8188eu/
 +M:    Phillip Potter <phil@philpotter.co.uk>
 +S:    Supported
 +F:    drivers/staging/r8188eu/
  
  STAGING - REALTEK RTL8712U DRIVERS
  M:    Larry Finger <Larry.Finger@lwfinger.net>
@@@ -18130,7 -17985,6 +18141,7 @@@ F:   drivers/regulator/scmi-regulator.
  F:    drivers/reset/reset-scmi.c
  F:    include/linux/sc[mp]i_protocol.h
  F:    include/trace/events/scmi.h
 +F:    include/uapi/linux/virtio_scmi.h
  
  SYSTEM RESET/SHUTDOWN DRIVERS
  M:    Sebastian Reichel <sre@kernel.org>
@@@ -18981,28 -18835,6 +18992,28 @@@ F: arch/x86/mm/testmmiotrace.
  F:    include/linux/mmiotrace.h
  F:    kernel/trace/trace_mmiotrace.c
  
 +TRACING OS NOISE / LATENCY TRACERS
 +M:    Steven Rostedt <rostedt@goodmis.org>
 +M:    Daniel Bristot de Oliveira <bristot@kernel.org>
 +S:    Maintained
 +F:    kernel/trace/trace_osnoise.c
 +F:    include/trace/events/osnoise.h
 +F:    kernel/trace/trace_hwlat.c
 +F:    kernel/trace/trace_irqsoff.c
 +F:    kernel/trace/trace_sched_wakeup.c
 +F:    Documentation/trace/osnoise-tracer.rst
 +F:    Documentation/trace/timerlat-tracer.rst
 +F:    Documentation/trace/hwlat_detector.rst
 +F:    arch/*/kernel/trace.c
 +
 +TRADITIONAL CHINESE DOCUMENTATION
 +M:    Hu Haowen <src.res@email.cn>
 +L:    linux-doc-tw-discuss@lists.sourceforge.net
 +S:    Maintained
 +W:    https://github.com/srcres258/linux-doc
 +T:    git git://github.com/srcres258/linux-doc.git doc-zh-tw
 +F:    Documentation/translations/zh_TW/
 +
  TRIVIAL PATCHES
  M:    Jiri Kosina <trivial@kernel.org>
  S:    Maintained
@@@ -19175,8 -19007,9 +19186,8 @@@ W:   http://dotat.at/prog/unifde
  F:    scripts/unifdef.c
  
  UNIFORM CDROM DRIVER
 -M:    Jens Axboe <axboe@kernel.dk>
 +M:    Phillip Potter <phil@philpotter.co.uk>
  S:    Maintained
 -W:    http://www.kernel.dk
  F:    Documentation/cdrom/
  F:    drivers/cdrom/cdrom.c
  F:    include/linux/cdrom.h
@@@ -19661,7 -19494,6 +19672,7 @@@ T:   git git://github.com/awilliam/linux-
  F:    Documentation/driver-api/vfio.rst
  F:    drivers/vfio/
  F:    include/linux/vfio.h
 +F:    include/linux/vfio_pci_core.h
  F:    include/uapi/linux/vfio.h
  
  VFIO FSL-MC DRIVER
@@@ -19770,11 -19602,18 +19781,11 @@@ L:        kvm@vger.kernel.or
  L:    virtualization@lists.linux-foundation.org
  L:    netdev@vger.kernel.org
  S:    Maintained
 -F:    drivers/net/vsockmon.c
  F:    drivers/vhost/vsock.c
  F:    include/linux/virtio_vsock.h
  F:    include/uapi/linux/virtio_vsock.h
 -F:    include/uapi/linux/vm_sockets_diag.h
 -F:    include/uapi/linux/vsockmon.h
 -F:    net/vmw_vsock/af_vsock_tap.c
 -F:    net/vmw_vsock/diag.c
  F:    net/vmw_vsock/virtio_transport.c
  F:    net/vmw_vsock/virtio_transport_common.c
 -F:    net/vmw_vsock/vsock_loopback.c
 -F:    tools/testing/vsock/
  
  VIRTIO BLOCK AND SCSI DRIVERS
  M:    "Michael S. Tsirkin" <mst@redhat.com>
@@@ -19853,15 -19692,6 +19864,15 @@@ F: Documentation/filesystems/virtiofs.r
  F:    fs/fuse/virtio_fs.c
  F:    include/uapi/linux/virtio_fs.h
  
 +VIRTIO GPIO DRIVER
 +M:    Enrico Weigelt, metux IT consult <info@metux.net>
 +M:    Viresh Kumar <vireshk@kernel.org>
 +L:    linux-gpio@vger.kernel.org
 +L:    virtualization@lists.linux-foundation.org
 +S:    Maintained
 +F:    drivers/gpio/gpio-virtio.c
 +F:    include/uapi/linux/virtio_gpio.h
 +
  VIRTIO GPU DRIVER
  M:    David Airlie <airlied@linux.ie>
  M:    Gerd Hoffmann <kraxel@redhat.com>
@@@ -19914,15 -19744,6 +19925,15 @@@ S: Maintaine
  F:    include/uapi/linux/virtio_snd.h
  F:    sound/virtio/*
  
 +VIRTIO I2C DRIVER
 +M:    Jie Deng <jie.deng@intel.com>
 +M:    Viresh Kumar <viresh.kumar@linaro.org>
 +L:    linux-i2c@vger.kernel.org
 +L:    virtualization@lists.linux-foundation.org
 +S:    Maintained
 +F:    drivers/i2c/busses/i2c-virtio.c
 +F:    include/uapi/linux/virtio_i2c.h
 +
  VIRTUAL BOX GUEST DEVICE DRIVER
  M:    Hans de Goede <hdegoede@redhat.com>
  M:    Arnd Bergmann <arnd@arndb.de>
@@@ -19979,19 -19800,6 +19990,19 @@@ F: drivers/staging/vme
  F:    drivers/vme/
  F:    include/linux/vme*
  
 +VM SOCKETS (AF_VSOCK)
 +M:    Stefano Garzarella <sgarzare@redhat.com>
 +L:    virtualization@lists.linux-foundation.org
 +L:    netdev@vger.kernel.org
 +S:    Maintained
 +F:    drivers/net/vsockmon.c
 +F:    include/net/af_vsock.h
 +F:    include/uapi/linux/vm_sockets.h
 +F:    include/uapi/linux/vm_sockets_diag.h
 +F:    include/uapi/linux/vsockmon.h
 +F:    net/vmw_vsock/
 +F:    tools/testing/vsock/
 +
  VMWARE BALLOON DRIVER
  M:    Nadav Amit <namit@vmware.com>
  M:    "VMware, Inc." <pv-drivers@vmware.com>
@@@ -20569,7 -20377,7 +20580,7 @@@ R:   Srinivas Neeli <srinivas.neeli@xilin
  R:    Michal Simek <michal.simek@xilinx.com>
  S:    Maintained
  F:    Documentation/devicetree/bindings/gpio/gpio-xilinx.txt
 -F:    Documentation/devicetree/bindings/gpio/gpio-zynq.txt
 +F:    Documentation/devicetree/bindings/gpio/gpio-zynq.yaml
  F:    drivers/gpio/gpio-xilinx.c
  F:    drivers/gpio/gpio-zynq.c
  
diff --combined arch/Kconfig
@@@ -197,9 -197,6 +197,9 @@@ config HAVE_FUNCTION_ERROR_INJECTIO
  config HAVE_NMI
        bool
  
 +config TRACE_IRQFLAGS_SUPPORT
 +      bool
 +
  #
  # An arch should select this if it provides all these things:
  #
@@@ -889,7 -886,7 +889,7 @@@ config HAVE_SOFTIRQ_ON_OWN_STAC
        bool
        help
          Architecture provides a function to run __do_softirq() on a
-         seperate stack.
+         separate stack.
  
  config PGTABLE_LEVELS
        int
@@@ -1285,9 -1282,6 +1285,9 @@@ config ARCH_SPLIT_ARG6
  config ARCH_HAS_ELFCORE_COMPAT
        bool
  
 +config ARCH_HAS_PARANOID_L1D_FLUSH
 +      bool
 +
  source "kernel/gcov/Kconfig"
  
  source "scripts/gcc-plugins/Kconfig"
  #include <linux/smp.h>
  #include <linux/console.h>
  #include <linux/kmsg_dump.h>
 +#include <linux/debugfs.h>
  
  #include <asm/emulated_ops.h>
  #include <linux/uaccess.h>
 -#include <asm/debugfs.h>
  #include <asm/interrupt.h>
  #include <asm/io.h>
  #include <asm/machdep.h>
@@@ -171,6 -171,7 +171,6 @@@ extern void panic_flush_kmsg_start(void
  
  extern void panic_flush_kmsg_end(void)
  {
 -      printk_safe_flush_on_panic();
        kmsg_dump(KMSG_DUMP_PANIC);
        bust_spinlocks(0);
        debug_locks_off();
@@@ -427,7 -428,7 +427,7 @@@ void hv_nmi_check_nonrecoverable(struc
        return;
  
  nonrecoverable:
 -      regs_set_return_msr(regs, regs->msr & ~MSR_RI);
 +      regs_set_unrecoverable(regs);
  #endif
  }
  DEFINE_INTERRUPT_HANDLER_NMI(system_reset_exception)
@@@ -497,7 -498,7 +497,7 @@@ out
                die("Unrecoverable nested System Reset", regs, SIGABRT);
  #endif
        /* Must die if the interrupt is not recoverable */
 -      if (!(regs->msr & MSR_RI)) {
 +      if (regs_is_unrecoverable(regs)) {
                /* For the reason explained in die_mce, nmi_exit before die */
                nmi_exit();
                die("Unrecoverable System Reset", regs, SIGABRT);
@@@ -549,7 -550,7 +549,7 @@@ static inline int check_io_access(struc
                        printk(KERN_DEBUG "%s bad port %lx at %p\n",
                               (*nip & 0x100)? "OUT to": "IN from",
                               regs->gpr[rb] - _IO_BASE, nip);
 -                      regs_set_return_msr(regs, regs->msr | MSR_RI);
 +                      regs_set_recoverable(regs);
                        regs_set_return_ip(regs, extable_fixup(entry));
                        return 1;
                }
  #ifdef CONFIG_PPC_ADV_DEBUG_REGS
  /* On 4xx, the reason for the machine check or program exception
     is in the ESR. */
 -#define get_reason(regs)      ((regs)->dsisr)
 +#define get_reason(regs)      ((regs)->esr)
  #define REASON_FP             ESR_FP
  #define REASON_ILLEGAL                (ESR_PIL | ESR_PUO)
  #define REASON_PRIVILEGED     ESR_PPR
@@@ -839,7 -840,7 +839,7 @@@ DEFINE_INTERRUPT_HANDLER_NMI(machine_ch
  
  bail:
        /* Must die if the interrupt is not recoverable */
 -      if (!(regs->msr & MSR_RI))
 +      if (regs_is_unrecoverable(regs))
                die_mce("Unrecoverable Machine check", regs, SIGBUS);
  
  #ifdef CONFIG_PPC_BOOK3S_64
@@@ -1481,13 -1482,8 +1481,13 @@@ static void do_program_check(struct pt_
  
                if (!(regs->msr & MSR_PR) &&  /* not user-mode */
                    report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) {
 -                      regs_add_return_ip(regs, 4);
 -                      return;
 +                      const struct exception_table_entry *entry;
 +
 +                      entry = search_exception_tables(bugaddr);
 +                      if (entry) {
 +                              regs_set_return_ip(regs, extable_fixup(entry) + regs->nip - bugaddr);
 +                              return;
 +                      }
                }
                _exception(SIGTRAP, regs, TRAP_BRKPT, regs->nip);
                return;
@@@ -2219,11 -2215,6 +2219,6 @@@ DEFINE_INTERRUPT_HANDLER(kernel_bad_sta
        die("Bad kernel stack pointer", regs, SIGABRT);
  }
  
- void __init trap_init(void)
- {
- }
  #ifdef CONFIG_PPC_EMULATED_STATS
  
  #define WARN_EMULATED_SETUP(type)     .type = { .name = #type }
@@@ -2276,7 -2267,7 +2271,7 @@@ static int __init ppc_warn_emulated_ini
        struct ppc_emulated_entry *entries = (void *)&ppc_emulated;
  
        dir = debugfs_create_dir("emulated_instructions",
 -                               powerpc_debugfs_root);
 +                               arch_debugfs_dir);
  
        debugfs_create_u32("do_warn", 0644, dir, &ppc_warn_emulated);
  
@@@ -180,8 -180,6 +180,8 @@@ static int update_lmb_associativity_ind
                return -ENODEV;
        }
  
 +      update_numa_distance(lmb_node);
 +
        dr_node = of_find_node_by_path("/ibm,dynamic-reconfiguration-memory");
        if (!dr_node) {
                dlpar_free_cc_nodes(lmb_node);
  static struct memory_block *lmb_to_memblock(struct drmem_lmb *lmb)
  {
        unsigned long section_nr;
 -      struct mem_section *mem_sect;
        struct memory_block *mem_block;
  
        section_nr = pfn_to_section_nr(PFN_DOWN(lmb->base_addr));
 -      mem_sect = __nr_to_section(section_nr);
  
 -      mem_block = find_memory_block(mem_sect);
 +      mem_block = find_memory_block(section_nr);
        return mem_block;
  }
  
@@@ -286,7 -286,7 +286,7 @@@ static int pseries_remove_memblock(unsi
  {
        unsigned long block_sz, start_pfn;
        int sections_per_block;
-       int i, nid;
+       int i;
  
        start_pfn = base >> PAGE_SHIFT;
  
  
        block_sz = pseries_memory_block_size();
        sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE;
-       nid = memory_add_physaddr_to_nid(base);
  
        for (i = 0; i < sections_per_block; i++) {
-               __remove_memory(nid, base, MIN_MEMORY_BLOCK_SIZE);
+               __remove_memory(base, MIN_MEMORY_BLOCK_SIZE);
                base += MIN_MEMORY_BLOCK_SIZE;
        }
  
@@@ -387,7 -386,7 +386,7 @@@ static int dlpar_remove_lmb(struct drme
  
        block_sz = pseries_memory_block_size();
  
-       __remove_memory(mem_block->nid, lmb->base_addr, block_sz);
+       __remove_memory(lmb->base_addr, block_sz);
        put_device(&mem_block->dev);
  
        /* Update memory regions for memory remove */
@@@ -660,7 -659,7 +659,7 @@@ static int dlpar_add_lmb(struct drmem_l
  
        rc = dlpar_online_lmb(lmb);
        if (rc) {
-               __remove_memory(nid, lmb->base_addr, block_sz);
+               __remove_memory(lmb->base_addr, block_sz);
                invalidate_lmb_associativity_index(lmb);
        } else {
                lmb->flags |= DRCONF_MEM_ASSIGNED;
@@@ -979,10 -978,6 +978,10 @@@ static int pseries_memory_notifier(stru
        case OF_RECONFIG_DETACH_NODE:
                err = pseries_remove_mem_node(rd->dn);
                break;
 +      case OF_RECONFIG_UPDATE_PROPERTY:
 +              if (!strcmp(rd->dn->name,
 +                          "ibm,dynamic-reconfiguration-memory"))
 +                      drmem_update_lmbs(rd->prop);
        }
        return notifier_from_errno(err);
  }
diff --combined arch/riscv/Kconfig
@@@ -13,7 -13,9 +13,7 @@@ config 32BI
  config RISCV
        def_bool y
        select ARCH_CLOCKSOURCE_INIT
 -      select ARCH_SUPPORTS_ATOMIC_RMW
 -      select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
 -      select ARCH_STACKWALK
 +      select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION
        select ARCH_HAS_BINFMT_FLAT
        select ARCH_HAS_DEBUG_VM_PGTABLE
        select ARCH_HAS_DEBUG_VIRTUAL if MMU
        select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL
        select ARCH_HAS_STRICT_MODULE_RWX if MMU && !XIP_KERNEL
        select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST
 +      select ARCH_HAS_UBSAN_SANITIZE_ALL
        select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX
        select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT
 +      select ARCH_STACKWALK
 +      select ARCH_SUPPORTS_ATOMIC_RMW
 +      select ARCH_SUPPORTS_DEBUG_PAGEALLOC if MMU
        select ARCH_SUPPORTS_HUGETLBFS if MMU
        select ARCH_USE_MEMTEST
        select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU
        select GENERIC_CLOCKEVENTS_BROADCAST if SMP
        select GENERIC_EARLY_IOREMAP
        select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO
-       select GENERIC_IOREMAP
 +      select GENERIC_IDLE_POLL_SETUP
+       select GENERIC_IOREMAP if MMU
        select GENERIC_IRQ_MULTI_HANDLER
        select GENERIC_IRQ_SHOW
 +      select GENERIC_IRQ_SHOW_LEVEL
        select GENERIC_LIB_DEVMEM_IS_ALLOWED
        select GENERIC_PCI_IOMAP
        select GENERIC_PTDUMP if MMU
        select GENERIC_SCHED_CLOCK
        select GENERIC_SMP_IDLE_THREAD
 -      select GENERIC_STRNCPY_FROM_USER if MMU
 -      select GENERIC_STRNLEN_USER if MMU
        select GENERIC_TIME_VSYSCALL if MMU && 64BIT
        select HANDLE_DOMAIN_IRQ
        select HAVE_ARCH_AUDITSYSCALL
@@@ -74,7 -72,6 +74,7 @@@
        select HAVE_ARCH_SECCOMP_FILTER
        select HAVE_ARCH_TRACEHOOK
        select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU
 +      select HAVE_ARCH_THREAD_STRUCT_WHITELIST
        select HAVE_ARCH_VMAP_STACK if MMU && 64BIT
        select HAVE_ASM_MODVERSIONS
        select HAVE_CONTEXT_TRACKING
        select HAVE_STACKPROTECTOR
        select HAVE_SYSCALL_TRACEPOINTS
        select IRQ_DOMAIN
 +      select IRQ_FORCED_THREADING
        select MODULES_USE_ELF_RELA if MODULES
        select MODULE_SECTIONS if MODULES
        select OF
        select SPARSE_IRQ
        select SYSCTL_EXCEPTION_TRACE
        select THREAD_INFO_IN_TASK
 +      select TRACE_IRQFLAGS_SUPPORT
        select UACCESS_MEMCPY if !MMU
        select ZONE_DMA32 if 64BIT
  
@@@ -183,6 -178,9 +183,6 @@@ config ARCH_SUPPORTS_UPROBE
  config STACKTRACE_SUPPORT
        def_bool y
  
 -config TRACE_IRQFLAGS_SUPPORT
 -      def_bool y
 -
  config GENERIC_BUG
        def_bool y
        depends on BUG
diff --combined arch/s390/mm/init.c
@@@ -34,7 -34,6 +34,7 @@@
  #include <asm/processor.h>
  #include <linux/uaccess.h>
  #include <asm/pgalloc.h>
 +#include <asm/kfence.h>
  #include <asm/ptdump.h>
  #include <asm/dma.h>
  #include <asm/lowcore.h>
@@@ -187,9 -186,9 +187,9 @@@ static void pv_init(void
                return;
  
        /* make sure bounce buffers are shared */
 +      swiotlb_force = SWIOTLB_FORCE;
        swiotlb_init(1);
        swiotlb_update_mem_attributes();
 -      swiotlb_force = SWIOTLB_FORCE;
  }
  
  void __init mem_init(void)
          high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
  
        pv_init();
 -
 +      kfence_split_mapping();
        /* Setup guest page hinting */
        cmma_init();
  
@@@ -307,8 -306,7 +307,7 @@@ int arch_add_memory(int nid, u64 start
        return rc;
  }
  
- void arch_remove_memory(int nid, u64 start, u64 size,
-                       struct vmem_altmap *altmap)
+ void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap)
  {
        unsigned long start_pfn = start >> PAGE_SHIFT;
        unsigned long nr_pages = size >> PAGE_SHIFT;
diff --combined drivers/base/memory.c
@@@ -82,6 -82,12 +82,12 @@@ static struct bus_type memory_subsys = 
   */
  static DEFINE_XARRAY(memory_blocks);
  
+ /*
+  * Memory groups, indexed by memory group id (mgid).
+  */
+ static DEFINE_XARRAY_FLAGS(memory_groups, XA_FLAGS_ALLOC);
+ #define MEMORY_GROUP_MARK_DYNAMIC     XA_MARK_1
  static BLOCKING_NOTIFIER_HEAD(memory_chain);
  
  int register_memory_notifier(struct notifier_block *nb)
@@@ -177,7 -183,8 +183,8 @@@ static int memory_block_online(struct m
        struct zone *zone;
        int ret;
  
-       zone = zone_for_pfn_range(mem->online_type, mem->nid, start_pfn, nr_pages);
+       zone = zone_for_pfn_range(mem->online_type, mem->nid, mem->group,
+                                 start_pfn, nr_pages);
  
        /*
         * Although vmemmap pages have a different lifecycle than the pages
        }
  
        ret = online_pages(start_pfn + nr_vmemmap_pages,
-                          nr_pages - nr_vmemmap_pages, zone);
+                          nr_pages - nr_vmemmap_pages, zone, mem->group);
        if (ret) {
                if (nr_vmemmap_pages)
                        mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages);
         * now already properly populated.
         */
        if (nr_vmemmap_pages)
-               adjust_present_page_count(zone, nr_vmemmap_pages);
+               adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
+                                         nr_vmemmap_pages);
  
        return ret;
  }
@@@ -215,24 -223,23 +223,23 @@@ static int memory_block_offline(struct 
        unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
        unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
        unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages;
-       struct zone *zone;
        int ret;
  
        /*
         * Unaccount before offlining, such that unpopulated zone and kthreads
         * can properly be torn down in offline_pages().
         */
-       if (nr_vmemmap_pages) {
-               zone = page_zone(pfn_to_page(start_pfn));
-               adjust_present_page_count(zone, -nr_vmemmap_pages);
-       }
+       if (nr_vmemmap_pages)
+               adjust_present_page_count(pfn_to_page(start_pfn), mem->group,
+                                         -nr_vmemmap_pages);
  
        ret = offline_pages(start_pfn + nr_vmemmap_pages,
-                           nr_pages - nr_vmemmap_pages);
+                           nr_pages - nr_vmemmap_pages, mem->group);
        if (ret) {
                /* offline_pages() failed. Account back. */
                if (nr_vmemmap_pages)
-                       adjust_present_page_count(zone, nr_vmemmap_pages);
+                       adjust_present_page_count(pfn_to_page(start_pfn),
+                                                 mem->group, nr_vmemmap_pages);
                return ret;
        }
  
@@@ -374,12 -381,13 +381,13 @@@ static ssize_t phys_device_show(struct 
  
  #ifdef CONFIG_MEMORY_HOTREMOVE
  static int print_allowed_zone(char *buf, int len, int nid,
+                             struct memory_group *group,
                              unsigned long start_pfn, unsigned long nr_pages,
                              int online_type, struct zone *default_zone)
  {
        struct zone *zone;
  
-       zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages);
+       zone = zone_for_pfn_range(online_type, nid, group, start_pfn, nr_pages);
        if (zone == default_zone)
                return 0;
  
@@@ -392,9 -400,10 +400,10 @@@ static ssize_t valid_zones_show(struct 
        struct memory_block *mem = to_memory_block(dev);
        unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
        unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
+       struct memory_group *group = mem->group;
        struct zone *default_zone;
+       int nid = mem->nid;
        int len = 0;
-       int nid;
  
        /*
         * Check the existing zone. Make sure that we do that only on the
                goto out;
        }
  
-       nid = mem->nid;
-       default_zone = zone_for_pfn_range(MMOP_ONLINE, nid, start_pfn,
-                                         nr_pages);
+       default_zone = zone_for_pfn_range(MMOP_ONLINE, nid, group,
+                                         start_pfn, nr_pages);
  
        len += sysfs_emit_at(buf, len, "%s", default_zone->name);
-       len += print_allowed_zone(buf, len, nid, start_pfn, nr_pages,
+       len += print_allowed_zone(buf, len, nid, group, start_pfn, nr_pages,
                                  MMOP_ONLINE_KERNEL, default_zone);
-       len += print_allowed_zone(buf, len, nid, start_pfn, nr_pages,
+       len += print_allowed_zone(buf, len, nid, group, start_pfn, nr_pages,
                                  MMOP_ONLINE_MOVABLE, default_zone);
  out:
        len += sysfs_emit_at(buf, len, "\n");
@@@ -578,9 -586,9 +586,9 @@@ static struct memory_block *find_memory
  /*
   * Called under device_hotplug_lock.
   */
 -struct memory_block *find_memory_block(struct mem_section *section)
 +struct memory_block *find_memory_block(unsigned long section_nr)
  {
 -      unsigned long block_id = memory_block_id(__section_nr(section));
 +      unsigned long block_id = memory_block_id(section_nr);
  
        return find_memory_block_by_id(block_id);
  }
@@@ -634,7 -642,8 +642,8 @@@ int register_memory(struct memory_bloc
  }
  
  static int init_memory_block(unsigned long block_id, unsigned long state,
-                            unsigned long nr_vmemmap_pages)
+                            unsigned long nr_vmemmap_pages,
+                            struct memory_group *group)
  {
        struct memory_block *mem;
        int ret = 0;
        mem->state = state;
        mem->nid = NUMA_NO_NODE;
        mem->nr_vmemmap_pages = nr_vmemmap_pages;
+       INIT_LIST_HEAD(&mem->group_next);
+       if (group) {
+               mem->group = group;
+               list_add(&mem->group_next, &group->memory_blocks);
+       }
  
        ret = register_memory(mem);
  
@@@ -671,7 -686,7 +686,7 @@@ static int add_memory_block(unsigned lo
        if (section_count == 0)
                return 0;
        return init_memory_block(memory_block_id(base_section_nr),
-                                MEM_ONLINE, 0);
+                                MEM_ONLINE, 0,  NULL);
  }
  
  static void unregister_memory(struct memory_block *memory)
  
        WARN_ON(xa_erase(&memory_blocks, memory->dev.id) == NULL);
  
+       if (memory->group) {
+               list_del(&memory->group_next);
+               memory->group = NULL;
+       }
        /* drop the ref. we got via find_memory_block() */
        put_device(&memory->dev);
        device_unregister(&memory->dev);
   * Called under device_hotplug_lock.
   */
  int create_memory_block_devices(unsigned long start, unsigned long size,
-                               unsigned long vmemmap_pages)
+                               unsigned long vmemmap_pages,
+                               struct memory_group *group)
  {
        const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start));
        unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size));
                return -EINVAL;
  
        for (block_id = start_block_id; block_id != end_block_id; block_id++) {
-               ret = init_memory_block(block_id, MEM_OFFLINE, vmemmap_pages);
+               ret = init_memory_block(block_id, MEM_OFFLINE, vmemmap_pages,
+                                       group);
                if (ret)
                        break;
        }
@@@ -891,3 -913,164 +913,164 @@@ int for_each_memory_block(void *arg, wa
        return bus_for_each_dev(&memory_subsys, NULL, &cb_data,
                                for_each_memory_block_cb);
  }
+ /*
+  * This is an internal helper to unify allocation and initialization of
+  * memory groups. Note that the passed memory group will be copied to a
+  * dynamically allocated memory group. After this call, the passed
+  * memory group should no longer be used.
+  */
+ static int memory_group_register(struct memory_group group)
+ {
+       struct memory_group *new_group;
+       uint32_t mgid;
+       int ret;
+       if (!node_possible(group.nid))
+               return -EINVAL;
+       new_group = kzalloc(sizeof(group), GFP_KERNEL);
+       if (!new_group)
+               return -ENOMEM;
+       *new_group = group;
+       INIT_LIST_HEAD(&new_group->memory_blocks);
+       ret = xa_alloc(&memory_groups, &mgid, new_group, xa_limit_31b,
+                      GFP_KERNEL);
+       if (ret) {
+               kfree(new_group);
+               return ret;
+       } else if (group.is_dynamic) {
+               xa_set_mark(&memory_groups, mgid, MEMORY_GROUP_MARK_DYNAMIC);
+       }
+       return mgid;
+ }
+ /**
+  * memory_group_register_static() - Register a static memory group.
+  * @nid: The node id.
+  * @max_pages: The maximum number of pages we'll have in this static memory
+  *           group.
+  *
+  * Register a new static memory group and return the memory group id.
+  * All memory in the group belongs to a single unit, such as a DIMM. All
+  * memory belonging to a static memory group is added in one go to be removed
+  * in one go -- it's static.
+  *
+  * Returns an error if out of memory, if the node id is invalid, if no new
+  * memory groups can be registered, or if max_pages is invalid (0). Otherwise,
+  * returns the new memory group id.
+  */
+ int memory_group_register_static(int nid, unsigned long max_pages)
+ {
+       struct memory_group group = {
+               .nid = nid,
+               .s = {
+                       .max_pages = max_pages,
+               },
+       };
+       if (!max_pages)
+               return -EINVAL;
+       return memory_group_register(group);
+ }
+ EXPORT_SYMBOL_GPL(memory_group_register_static);
+ /**
+  * memory_group_register_dynamic() - Register a dynamic memory group.
+  * @nid: The node id.
+  * @unit_pages: Unit in pages in which is memory added/removed in this dynamic
+  *            memory group.
+  *
+  * Register a new dynamic memory group and return the memory group id.
+  * Memory within a dynamic memory group is added/removed dynamically
+  * in unit_pages.
+  *
+  * Returns an error if out of memory, if the node id is invalid, if no new
+  * memory groups can be registered, or if unit_pages is invalid (0, not a
+  * power of two, smaller than a single memory block). Otherwise, returns the
+  * new memory group id.
+  */
+ int memory_group_register_dynamic(int nid, unsigned long unit_pages)
+ {
+       struct memory_group group = {
+               .nid = nid,
+               .is_dynamic = true,
+               .d = {
+                       .unit_pages = unit_pages,
+               },
+       };
+       if (!unit_pages || !is_power_of_2(unit_pages) ||
+           unit_pages < PHYS_PFN(memory_block_size_bytes()))
+               return -EINVAL;
+       return memory_group_register(group);
+ }
+ EXPORT_SYMBOL_GPL(memory_group_register_dynamic);
+ /**
+  * memory_group_unregister() - Unregister a memory group.
+  * @mgid: the memory group id
+  *
+  * Unregister a memory group. If any memory block still belongs to this
+  * memory group, unregistering will fail.
+  *
+  * Returns -EINVAL if the memory group id is invalid, returns -EBUSY if some
+  * memory blocks still belong to this memory group and returns 0 if
+  * unregistering succeeded.
+  */
+ int memory_group_unregister(int mgid)
+ {
+       struct memory_group *group;
+       if (mgid < 0)
+               return -EINVAL;
+       group = xa_load(&memory_groups, mgid);
+       if (!group)
+               return -EINVAL;
+       if (!list_empty(&group->memory_blocks))
+               return -EBUSY;
+       xa_erase(&memory_groups, mgid);
+       kfree(group);
+       return 0;
+ }
+ EXPORT_SYMBOL_GPL(memory_group_unregister);
+ /*
+  * This is an internal helper only to be used in core memory hotplug code to
+  * lookup a memory group. We don't care about locking, as we don't expect a
+  * memory group to get unregistered while adding memory to it -- because
+  * the group and the memory is managed by the same driver.
+  */
+ struct memory_group *memory_group_find_by_id(int mgid)
+ {
+       return xa_load(&memory_groups, mgid);
+ }
+ /*
+  * This is an internal helper only to be used in core memory hotplug code to
+  * walk all dynamic memory groups excluding a given memory group, either
+  * belonging to a specific node, or belonging to any node.
+  */
+ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
+                              struct memory_group *excluded, void *arg)
+ {
+       struct memory_group *group;
+       unsigned long index;
+       int ret = 0;
+       xa_for_each_marked(&memory_groups, index, group,
+                          MEMORY_GROUP_MARK_DYNAMIC) {
+               if (group == excluded)
+                       continue;
+ #ifdef CONFIG_NUMA
+               if (nid != NUMA_NO_NODE && group->nid != nid)
+                       continue;
+ #endif /* CONFIG_NUMA */
+               ret = func(group, arg);
+               if (ret)
+                       break;
+       }
+       return ret;
+ }
diff --combined drivers/base/node.c
@@@ -26,47 -26,43 +26,47 @@@ static struct bus_type node_subsys = 
        .dev_name = "node",
  };
  
 -
 -static ssize_t node_read_cpumap(struct device *dev, bool list, char *buf)
 +static inline ssize_t cpumap_read(struct file *file, struct kobject *kobj,
 +                                struct bin_attribute *attr, char *buf,
 +                                loff_t off, size_t count)
  {
 -      ssize_t n;
 -      cpumask_var_t mask;
 +      struct device *dev = kobj_to_dev(kobj);
        struct node *node_dev = to_node(dev);
 -
 -      /* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */
 -      BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1));
 +      cpumask_var_t mask;
 +      ssize_t n;
  
        if (!alloc_cpumask_var(&mask, GFP_KERNEL))
                return 0;
  
        cpumask_and(mask, cpumask_of_node(node_dev->dev.id), cpu_online_mask);
 -      n = cpumap_print_to_pagebuf(list, buf, mask);
 +      n = cpumap_print_bitmask_to_buf(buf, mask, off, count);
        free_cpumask_var(mask);
  
        return n;
  }
  
 -static inline ssize_t cpumap_show(struct device *dev,
 -                                struct device_attribute *attr,
 -                                char *buf)
 +static BIN_ATTR_RO(cpumap, 0);
 +
 +static inline ssize_t cpulist_read(struct file *file, struct kobject *kobj,
 +                                 struct bin_attribute *attr, char *buf,
 +                                 loff_t off, size_t count)
  {
 -      return node_read_cpumap(dev, false, buf);
 -}
 +      struct device *dev = kobj_to_dev(kobj);
 +      struct node *node_dev = to_node(dev);
 +      cpumask_var_t mask;
 +      ssize_t n;
 +
 +      if (!alloc_cpumask_var(&mask, GFP_KERNEL))
 +              return 0;
  
 -static DEVICE_ATTR_RO(cpumap);
 +      cpumask_and(mask, cpumask_of_node(node_dev->dev.id), cpu_online_mask);
 +      n = cpumap_print_list_to_buf(buf, mask, off, count);
 +      free_cpumask_var(mask);
  
 -static inline ssize_t cpulist_show(struct device *dev,
 -                                 struct device_attribute *attr,
 -                                 char *buf)
 -{
 -      return node_read_cpumap(dev, true, buf);
 +      return n;
  }
  
 -static DEVICE_ATTR_RO(cpulist);
 +static BIN_ATTR_RO(cpulist, 0);
  
  /**
   * struct node_access_nodes - Access class device to hold user visible
@@@ -79,7 -75,7 +79,7 @@@
  struct node_access_nodes {
        struct device           dev;
        struct list_head        list_node;
 -      unsigned                access;
 +      unsigned int            access;
  #ifdef CONFIG_HMEM_REPORTING
        struct node_hmem_attrs  hmem_attrs;
  #endif
@@@ -126,7 -122,7 +126,7 @@@ static void node_access_release(struct 
  }
  
  static struct node_access_nodes *node_init_node_access(struct node *node,
 -                                                     unsigned access)
 +                                                     unsigned int access)
  {
        struct node_access_nodes *access_node;
        struct device *dev;
@@@ -191,7 -187,7 +191,7 @@@ static struct attribute *access_attrs[
   * @access: The access class the for the given attributes
   */
  void node_set_perf_attrs(unsigned int nid, struct node_hmem_attrs *hmem_attrs,
 -                       unsigned access)
 +                       unsigned int access)
  {
        struct node_access_nodes *c;
        struct node *node;
@@@ -561,28 -557,15 +561,28 @@@ static ssize_t node_read_distance(struc
  static DEVICE_ATTR(distance, 0444, node_read_distance, NULL);
  
  static struct attribute *node_dev_attrs[] = {
 -      &dev_attr_cpumap.attr,
 -      &dev_attr_cpulist.attr,
        &dev_attr_meminfo.attr,
        &dev_attr_numastat.attr,
        &dev_attr_distance.attr,
        &dev_attr_vmstat.attr,
        NULL
  };
 -ATTRIBUTE_GROUPS(node_dev);
 +
 +static struct bin_attribute *node_dev_bin_attrs[] = {
 +      &bin_attr_cpumap,
 +      &bin_attr_cpulist,
 +      NULL
 +};
 +
 +static const struct attribute_group node_dev_group = {
 +      .attrs = node_dev_attrs,
 +      .bin_attrs = node_dev_bin_attrs
 +};
 +
 +static const struct attribute_group *node_dev_groups[] = {
 +      &node_dev_group,
 +      NULL
 +};
  
  #ifdef CONFIG_HUGETLBFS
  /*
@@@ -728,7 -711,7 +728,7 @@@ int register_cpu_under_node(unsigned in
   */
  int register_memory_node_under_compute_node(unsigned int mem_nid,
                                            unsigned int cpu_nid,
 -                                          unsigned access)
 +                                          unsigned int access)
  {
        struct node *init_node, *targ_node;
        struct node_access_nodes *initiator, *target;
@@@ -785,8 -768,6 +785,6 @@@ int unregister_cpu_under_node(unsigned 
  #ifdef CONFIG_MEMORY_HOTPLUG_SPARSE
  static int __ref get_nid_for_pfn(unsigned long pfn)
  {
-       if (!pfn_valid_within(pfn))
-               return -1;
  #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
        if (system_state < SYSTEM_RUNNING)
                return early_pfn_to_nid(pfn);
@@@ -20,6 -20,7 +20,7 @@@
  #include <linux/sched.h>
  #include <linux/slab.h>
  #include <linux/types.h>
+ #include <linux/units.h>
  #include <asm/unaligned.h>
  
  #define EBU_CLC                       0x000
  
  #define MAX_CS        2
  
- #define HZ_PER_MHZ    1000000L
  #define USEC_PER_SEC  1000000L
  
  struct ebu_nand_cs {
@@@ -631,26 -631,19 +631,26 @@@ static int ebu_nand_probe(struct platfo
        ebu_host->clk_rate = clk_get_rate(ebu_host->clk);
  
        ebu_host->dma_tx = dma_request_chan(dev, "tx");
 -      if (IS_ERR(ebu_host->dma_tx))
 -              return dev_err_probe(dev, PTR_ERR(ebu_host->dma_tx),
 -                                   "failed to request DMA tx chan!.\n");
 +      if (IS_ERR(ebu_host->dma_tx)) {
 +              ret = dev_err_probe(dev, PTR_ERR(ebu_host->dma_tx),
 +                                  "failed to request DMA tx chan!.\n");
 +              goto err_disable_unprepare_clk;
 +      }
  
        ebu_host->dma_rx = dma_request_chan(dev, "rx");
 -      if (IS_ERR(ebu_host->dma_rx))
 -              return dev_err_probe(dev, PTR_ERR(ebu_host->dma_rx),
 -                                   "failed to request DMA rx chan!.\n");
 +      if (IS_ERR(ebu_host->dma_rx)) {
 +              ret = dev_err_probe(dev, PTR_ERR(ebu_host->dma_rx),
 +                                  "failed to request DMA rx chan!.\n");
 +              ebu_host->dma_rx = NULL;
 +              goto err_cleanup_dma;
 +      }
  
        resname = devm_kasprintf(dev, GFP_KERNEL, "addr_sel%d", cs);
        res = platform_get_resource_byname(pdev, IORESOURCE_MEM, resname);
 -      if (!res)
 -              return -EINVAL;
 +      if (!res) {
 +              ret = -EINVAL;
 +              goto err_cleanup_dma;
 +      }
        ebu_host->cs[cs].addr_sel = res->start;
        writel(ebu_host->cs[cs].addr_sel | EBU_ADDR_MASK(5) | EBU_ADDR_SEL_REGEN,
               ebu_host->ebu + EBU_ADDR_SEL(cs));
        mtd = nand_to_mtd(&ebu_host->chip);
        if (!mtd->name) {
                dev_err(ebu_host->dev, "NAND label property is mandatory\n");
 -              return -EINVAL;
 +              ret = -EINVAL;
 +              goto err_cleanup_dma;
        }
  
        mtd->dev.parent = dev;
@@@ -689,7 -681,6 +689,7 @@@ err_clean_nand
        nand_cleanup(&ebu_host->chip);
  err_cleanup_dma:
        ebu_dma_cleanup(ebu_host);
 +err_disable_unprepare_clk:
        clk_disable_unprepare(ebu_host->clk);
  
        return ret;
diff --combined include/linux/memory.h
  
  #define MIN_MEMORY_BLOCK_SIZE     (1UL << SECTION_SIZE_BITS)
  
+ /**
+  * struct memory_group - a logical group of memory blocks
+  * @nid: The node id for all memory blocks inside the memory group.
+  * @blocks: List of all memory blocks belonging to this memory group.
+  * @present_kernel_pages: Present (online) memory outside ZONE_MOVABLE of this
+  *                      memory group.
+  * @present_movable_pages: Present (online) memory in ZONE_MOVABLE of this
+  *                       memory group.
+  * @is_dynamic: The memory group type: static vs. dynamic
+  * @s.max_pages: Valid with &memory_group.is_dynamic == false. The maximum
+  *             number of pages we'll have in this static memory group.
+  * @d.unit_pages: Valid with &memory_group.is_dynamic == true. Unit in pages
+  *              in which memory is added/removed in this dynamic memory group.
+  *              This granularity defines the alignment of a unit in physical
+  *              address space; it has to be at least as big as a single
+  *              memory block.
+  *
+  * A memory group logically groups memory blocks; each memory block
+  * belongs to at most one memory group. A memory group corresponds to
+  * a memory device, such as a DIMM or a NUMA node, which spans multiple
+  * memory blocks and might even span multiple non-contiguous physical memory
+  * ranges.
+  *
+  * Modification of members after registration is serialized by memory
+  * hot(un)plug code.
+  */
+ struct memory_group {
+       int nid;
+       struct list_head memory_blocks;
+       unsigned long present_kernel_pages;
+       unsigned long present_movable_pages;
+       bool is_dynamic;
+       union {
+               struct {
+                       unsigned long max_pages;
+               } s;
+               struct {
+                       unsigned long unit_pages;
+               } d;
+       };
+ };
  struct memory_block {
        unsigned long start_section_nr;
        unsigned long state;            /* serialized by the dev->lock */
@@@ -34,6 -76,8 +76,8 @@@
         * lay at the beginning of the memory block.
         */
        unsigned long nr_vmemmap_pages;
+       struct memory_group *group;     /* group (if any) for this block */
+       struct list_head group_next;    /* next block inside memory group */
  };
  
  int arch_get_memory_phys_device(unsigned long start_pfn);
@@@ -86,16 -130,25 +130,25 @@@ static inline int memory_notify(unsigne
  extern int register_memory_notifier(struct notifier_block *nb);
  extern void unregister_memory_notifier(struct notifier_block *nb);
  int create_memory_block_devices(unsigned long start, unsigned long size,
-                               unsigned long vmemmap_pages);
+                               unsigned long vmemmap_pages,
+                               struct memory_group *group);
  void remove_memory_block_devices(unsigned long start, unsigned long size);
  extern void memory_dev_init(void);
  extern int memory_notify(unsigned long val, void *v);
 -extern struct memory_block *find_memory_block(struct mem_section *);
 +extern struct memory_block *find_memory_block(unsigned long section_nr);
  typedef int (*walk_memory_blocks_func_t)(struct memory_block *, void *);
  extern int walk_memory_blocks(unsigned long start, unsigned long size,
                              void *arg, walk_memory_blocks_func_t func);
  extern int for_each_memory_block(void *arg, walk_memory_blocks_func_t func);
  #define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT)
+ extern int memory_group_register_static(int nid, unsigned long max_pages);
+ extern int memory_group_register_dynamic(int nid, unsigned long unit_pages);
+ extern int memory_group_unregister(int mgid);
+ struct memory_group *memory_group_find_by_id(int mgid);
+ typedef int (*walk_memory_groups_func_t)(struct memory_group *, void *);
+ int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
+                              struct memory_group *excluded, void *arg);
  #endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
  
  #ifdef CONFIG_MEMORY_HOTPLUG
diff --combined include/linux/mmzone.h
@@@ -540,6 -540,10 +540,10 @@@ struct zone 
         * is calculated as:
         *      present_pages = spanned_pages - absent_pages(pages in holes);
         *
+        * present_early_pages is present pages existing within the zone
+        * located on memory available since early boot, excluding hotplugged
+        * memory.
+        *
         * managed_pages is present pages managed by the buddy system, which
         * is calculated as (reserved_pages includes pages allocated by the
         * bootmem allocator):
        atomic_long_t           managed_pages;
        unsigned long           spanned_pages;
        unsigned long           present_pages;
+ #if defined(CONFIG_MEMORY_HOTPLUG)
+       unsigned long           present_early_pages;
+ #endif
  #ifdef CONFIG_CMA
        unsigned long           cma_pages;
  #endif
@@@ -846,7 -853,6 +853,7 @@@ typedef struct pglist_data 
        enum zone_type kcompactd_highest_zoneidx;
        wait_queue_head_t kcompactd_wait;
        struct task_struct *kcompactd;
 +      bool proactive_compact_trigger;
  #endif
        /*
         * This is a per-node reserve of pages that are not available
@@@ -1343,6 -1349,7 +1350,6 @@@ static inline struct mem_section *__nr_
                return NULL;
        return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
  }
 -extern unsigned long __section_nr(struct mem_section *ms);
  extern size_t mem_section_usage_size(void);
  
  /*
  #define SECTION_TAINT_ZONE_DEVICE     (1UL<<4)
  #define SECTION_MAP_LAST_BIT          (1UL<<5)
  #define SECTION_MAP_MASK              (~(SECTION_MAP_LAST_BIT-1))
 -#define SECTION_NID_SHIFT             3
 +#define SECTION_NID_SHIFT             6
  
  static inline struct page *__section_mem_map_addr(struct mem_section *section)
  {
@@@ -1525,18 -1532,6 +1532,6 @@@ void sparse_init(void)
  #define subsection_map_init(_pfn, _nr_pages) do {} while (0)
  #endif /* CONFIG_SPARSEMEM */
  
- /*
-  * If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we
-  * need to check pfn validity within that MAX_ORDER_NR_PAGES block.
-  * pfn_valid_within() should be used in this case; we optimise this away
-  * when we have no holes within a MAX_ORDER_NR_PAGES block.
-  */
- #ifdef CONFIG_HOLES_IN_ZONE
- #define pfn_valid_within(pfn) pfn_valid(pfn)
- #else
- #define pfn_valid_within(pfn) (1)
- #endif
  #endif /* !__GENERATING_BOUNDS.H */
  #endif /* !__ASSEMBLY__ */
  #endif /* _LINUX_MMZONE_H */
@@@ -131,7 -131,7 +131,7 @@@ enum pageflags 
  #ifdef CONFIG_MEMORY_FAILURE
        PG_hwpoison,            /* hardware poisoned page. Don't touch */
  #endif
- #if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
+ #if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT)
        PG_young,
        PG_idle,
  #endif
        PG_reported = PG_uptodate,
  };
  
+ #define PAGEFLAGS_MASK                ((1UL << NR_PAGEFLAGS) - 1)
  #ifndef __GENERATING_BOUNDS_H
  
  static inline unsigned long _compound_head(const struct page *page)
@@@ -439,7 -441,7 +441,7 @@@ PAGEFLAG_FALSE(HWPoison
  #define __PG_HWPOISON 0
  #endif
  
- #if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
+ #if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT)
  TESTPAGEFLAG(Young, young, PF_ANY)
  SETPAGEFLAG(Young, young, PF_ANY)
  TESTCLEARFLAG(Young, young, PF_ANY)
@@@ -632,6 -634,43 +634,6 @@@ static inline int PageTransCompound(str
        return PageCompound(page);
  }
  
 -/*
 - * PageTransCompoundMap is the same as PageTransCompound, but it also
 - * guarantees the primary MMU has the entire compound page mapped
 - * through pmd_trans_huge, which in turn guarantees the secondary MMUs
 - * can also map the entire compound page. This allows the secondary
 - * MMUs to call get_user_pages() only once for each compound page and
 - * to immediately map the entire compound page with a single secondary
 - * MMU fault. If there will be a pmd split later, the secondary MMUs
 - * will get an update through the MMU notifier invalidation through
 - * split_huge_pmd().
 - *
 - * Unlike PageTransCompound, this is safe to be called only while
 - * split_huge_pmd() cannot run from under us, like if protected by the
 - * MMU notifier, otherwise it may result in page->_mapcount check false
 - * positives.
 - *
 - * We have to treat page cache THP differently since every subpage of it
 - * would get _mapcount inc'ed once it is PMD mapped.  But, it may be PTE
 - * mapped in the current process so comparing subpage's _mapcount to
 - * compound_mapcount to filter out PTE mapped case.
 - */
 -static inline int PageTransCompoundMap(struct page *page)
 -{
 -      struct page *head;
 -
 -      if (!PageTransCompound(page))
 -              return 0;
 -
 -      if (PageAnon(page))
 -              return atomic_read(&page->_mapcount) < 0;
 -
 -      head = compound_head(page);
 -      /* File THP is PMD mapped and not PTE mapped */
 -      return atomic_read(&page->_mapcount) ==
 -             atomic_read(compound_mapcount_ptr(head));
 -}
 -
  /*
   * PageTransTail returns true for both transparent huge pages
   * and hugetlbfs pages, so it should only be called when it's known
@@@ -778,15 -817,6 +780,15 @@@ static inline int PageSlabPfmemalloc(st
        return PageActive(page);
  }
  
 +/*
 + * A version of PageSlabPfmemalloc() for opportunistic checks where the page
 + * might have been freed under us and not be a PageSlab anymore.
 + */
 +static inline int __PageSlabPfmemalloc(struct page *page)
 +{
 +      return PageActive(page);
 +}
 +
  static inline void SetPageSlabPfmemalloc(struct page *page)
  {
        VM_BUG_ON_PAGE(!PageSlab(page), page);
@@@ -831,7 -861,7 +833,7 @@@ static inline void ClearPageSlabPfmemal
   * alloc-free cycle to prevent from reusing the page.
   */
  #define PAGE_FLAGS_CHECK_AT_PREP      \
-       (((1UL << NR_PAGEFLAGS) - 1) & ~__PG_HWPOISON)
+       (PAGEFLAGS_MASK & ~__PG_HWPOISON)
  
  #define PAGE_FLAGS_PRIVATE                            \
        (1UL << PG_private | 1UL << PG_private_2)
diff --combined include/linux/pagemap.h
@@@ -521,18 -521,17 +521,17 @@@ static inline struct page *read_mapping
   */
  static inline pgoff_t page_to_index(struct page *page)
  {
-       pgoff_t pgoff;
+       struct page *head;
  
        if (likely(!PageTransTail(page)))
                return page->index;
  
+       head = compound_head(page);
        /*
         *  We don't initialize ->index for tail pages: calculate based on
         *  head page
         */
-       pgoff = compound_head(page)->index;
-       pgoff += page - compound_head(page);
-       return pgoff;
+       return head->index + page - head;
  }
  
  extern pgoff_t hugetlb_basepage_index(struct page *page);
@@@ -736,7 -735,7 +735,7 @@@ extern void add_page_wait_queue(struct 
  /*
   * Fault everything in given userspace address range in.
   */
 -static inline int fault_in_pages_writeable(char __user *uaddr, int size)
 +static inline int fault_in_pages_writeable(char __user *uaddr, size_t size)
  {
        char __user *end = uaddr + size - 1;
  
        return 0;
  }
  
 -static inline int fault_in_pages_readable(const char __user *uaddr, int size)
 +static inline int fault_in_pages_readable(const char __user *uaddr, size_t size)
  {
        volatile char c;
        const char __user *end = uaddr + size - 1;
diff --combined include/linux/units.h
@@@ -4,25 -4,13 +4,29 @@@
  
  #include <linux/math.h>
  
- #define MILLIWATT_PER_WATT    1000L
- #define MICROWATT_PER_MILLIWATT       1000L
- #define MICROWATT_PER_WATT    1000000L
 +/* Metric prefixes in accordance with Système international (d'unités) */
 +#define PETA  1000000000000000ULL
 +#define TERA  1000000000000ULL
 +#define GIGA  1000000000UL
 +#define MEGA  1000000UL
 +#define KILO  1000UL
 +#define HECTO 100UL
 +#define DECA  10UL
 +#define DECI  10UL
 +#define CENTI 100UL
 +#define MILLI 1000UL
 +#define MICRO 1000000UL
 +#define NANO  1000000000UL
 +#define PICO  1000000000000ULL
 +#define FEMTO 1000000000000000ULL
 +
+ #define HZ_PER_KHZ            1000UL
+ #define KHZ_PER_MHZ           1000UL
+ #define HZ_PER_MHZ            1000000UL
+ #define MILLIWATT_PER_WATT    1000UL
+ #define MICROWATT_PER_MILLIWATT       1000UL
+ #define MICROWATT_PER_WATT    1000000UL
  
  #define ABSOLUTE_ZERO_MILLICELSIUS -273150
  
@@@ -75,7 -75,7 +75,7 @@@
  #define IF_HAVE_PG_HWPOISON(flag,string)
  #endif
  
- #if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
+ #if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT)
  #define IF_HAVE_PG_IDLE(flag,string) ,{1UL << flag, string}
  #else
  #define IF_HAVE_PG_IDLE(flag,string)
@@@ -165,6 -165,7 +165,6 @@@ IF_HAVE_PG_SKIP_KASAN_POISON(PG_skip_ka
        {VM_UFFD_MISSING,               "uffd_missing"  },              \
  IF_HAVE_UFFD_MINOR(VM_UFFD_MINOR,     "uffd_minor"    )               \
        {VM_PFNMAP,                     "pfnmap"        },              \
 -      {VM_DENYWRITE,                  "denywrite"     },              \
        {VM_UFFD_WP,                    "uffd_wp"       },              \
        {VM_LOCKED,                     "locked"        },              \
        {VM_IO,                         "io"            },              \
diff --combined init/main.c
@@@ -777,6 -777,8 +777,8 @@@ void __init __weak poking_init(void) { 
  
  void __init __weak pgtable_cache_init(void) { }
  
+ void __init __weak trap_init(void) { }
  bool initcall_debug;
  core_param(initcall_debug, initcall_debug, bool, 0644);
  
@@@ -1226,7 -1228,7 +1228,7 @@@ trace_initcall_start_cb(void *data, ini
  {
        ktime_t *calltime = (ktime_t *)data;
  
 -      printk(KERN_DEBUG "calling  %pS @ %i\n", fn, task_pid_nr(current));
 +      printk(KERN_DEBUG "calling  %pS @ %i irqs_disabled() %d\n", fn, task_pid_nr(current), irqs_disabled());
        *calltime = ktime_get();
  }
  
@@@ -1240,8 -1242,8 +1242,8 @@@ trace_initcall_finish_cb(void *data, in
        rettime = ktime_get();
        delta = ktime_sub(rettime, *calltime);
        duration = (unsigned long long) ktime_to_ns(delta) >> 10;
 -      printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs\n",
 -               fn, ret, duration);
 +      printk(KERN_DEBUG "initcall %pS returned %d after %lld usecs, irqs_disabled() %d\n",
 +               fn, ret, duration, irqs_disabled());
  }
  
  static ktime_t initcall_calltime;
@@@ -1392,7 -1394,6 +1394,6 @@@ static void __init do_basic_setup(void
        driver_init();
        init_irq_proc();
        do_ctors();
-       usermodehelper_enable();
        do_initcalls();
  }
  
diff --combined kernel/fork.c
@@@ -446,7 -446,6 +446,7 @@@ void put_task_stack(struct task_struct 
  
  void free_task(struct task_struct *tsk)
  {
 +      release_user_cpus_ptr(tsk);
        scs_release(tsk);
  
  #ifndef CONFIG_THREAD_INFO_IN_TASK
  }
  EXPORT_SYMBOL(free_task);
  
 +static void dup_mm_exe_file(struct mm_struct *mm, struct mm_struct *oldmm)
 +{
 +      struct file *exe_file;
 +
 +      exe_file = get_mm_exe_file(oldmm);
 +      RCU_INIT_POINTER(mm->exe_file, exe_file);
 +      /*
 +       * We depend on the oldmm having properly denied write access to the
 +       * exe_file already.
 +       */
 +      if (exe_file && deny_write_access(exe_file))
 +              pr_warn_once("deny_write_access() failed in %s\n", __func__);
 +}
 +
  #ifdef CONFIG_MMU
  static __latent_entropy int dup_mmap(struct mm_struct *mm,
                                        struct mm_struct *oldmm)
        mmap_write_lock_nested(mm, SINGLE_DEPTH_NESTING);
  
        /* No ordering required: file already has been exposed. */
 -      RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
 +      dup_mm_exe_file(mm, oldmm);
  
        mm->total_vm = oldmm->total_vm;
        mm->data_vm = oldmm->data_vm;
                tmp->vm_flags &= ~(VM_LOCKED | VM_LOCKONFAULT);
                file = tmp->vm_file;
                if (file) {
 -                      struct inode *inode = file_inode(file);
                        struct address_space *mapping = file->f_mapping;
  
                        get_file(file);
 -                      if (tmp->vm_flags & VM_DENYWRITE)
 -                              put_write_access(inode);
                        i_mmap_lock_write(mapping);
                        if (tmp->vm_flags & VM_SHARED)
                                mapping_allow_writable(mapping);
@@@ -651,7 -639,7 +651,7 @@@ static inline void mm_free_pgd(struct m
  static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
  {
        mmap_write_lock(oldmm);
 -      RCU_INIT_POINTER(mm->exe_file, get_mm_exe_file(oldmm));
 +      dup_mm_exe_file(mm, oldmm);
        mmap_write_unlock(oldmm);
        return 0;
  }
@@@ -936,7 -924,6 +936,7 @@@ static struct task_struct *dup_task_str
  #endif
        if (orig->cpus_ptr == &orig->cpus_mask)
                tsk->cpus_ptr = &tsk->cpus_mask;
 +      dup_user_cpus_ptr(tsk, orig, node);
  
        /*
         * One for the user space visible state that goes away when reaped.
@@@ -1161,11 -1148,11 +1161,11 @@@ void mmput_async(struct mm_struct *mm
   *
   * Main users are mmput() and sys_execve(). Callers prevent concurrent
   * invocations: in mmput() nobody alive left, in execve task is single
 - * threaded. sys_prctl(PR_SET_MM_MAP/EXE_FILE) also needs to set the
 - * mm->exe_file, but does so without using set_mm_exe_file() in order
 - * to avoid the need for any locks.
 + * threaded.
 + *
 + * Can only fail if new_exe_file != NULL.
   */
 -void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
 +int set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
  {
        struct file *old_exe_file;
  
         */
        old_exe_file = rcu_dereference_raw(mm->exe_file);
  
 -      if (new_exe_file)
 +      if (new_exe_file) {
 +              /*
 +               * We expect the caller (i.e., sys_execve) to already denied
 +               * write access, so this is unlikely to fail.
 +               */
 +              if (unlikely(deny_write_access(new_exe_file)))
 +                      return -EACCES;
                get_file(new_exe_file);
 +      }
        rcu_assign_pointer(mm->exe_file, new_exe_file);
 -      if (old_exe_file)
 +      if (old_exe_file) {
 +              allow_write_access(old_exe_file);
                fput(old_exe_file);
 +      }
 +      return 0;
 +}
 +
 +/**
 + * replace_mm_exe_file - replace a reference to the mm's executable file
 + *
 + * This changes mm's executable file (shown as symlink /proc/[pid]/exe),
 + * dealing with concurrent invocation and without grabbing the mmap lock in
 + * write mode.
 + *
 + * Main user is sys_prctl(PR_SET_MM_MAP/EXE_FILE).
 + */
 +int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
 +{
 +      struct vm_area_struct *vma;
 +      struct file *old_exe_file;
 +      int ret = 0;
 +
 +      /* Forbid mm->exe_file change if old file still mapped. */
 +      old_exe_file = get_mm_exe_file(mm);
 +      if (old_exe_file) {
 +              mmap_read_lock(mm);
 +              for (vma = mm->mmap; vma && !ret; vma = vma->vm_next) {
 +                      if (!vma->vm_file)
 +                              continue;
 +                      if (path_equal(&vma->vm_file->f_path,
 +                                     &old_exe_file->f_path))
 +                              ret = -EBUSY;
 +              }
 +              mmap_read_unlock(mm);
 +              fput(old_exe_file);
 +              if (ret)
 +                      return ret;
 +      }
 +
 +      /* set the new file, lockless */
 +      ret = deny_write_access(new_exe_file);
 +      if (ret)
 +              return -EACCES;
 +      get_file(new_exe_file);
 +
 +      old_exe_file = xchg(&mm->exe_file, new_exe_file);
 +      if (old_exe_file) {
 +              /*
 +               * Don't race with dup_mmap() getting the file and disallowing
 +               * write access while someone might open the file writable.
 +               */
 +              mmap_read_lock(mm);
 +              allow_write_access(old_exe_file);
 +              fput(old_exe_file);
 +              mmap_read_unlock(mm);
 +      }
 +      return 0;
  }
  
  /**
@@@ -1262,7 -1187,6 +1262,6 @@@ struct file *get_mm_exe_file(struct mm_
        rcu_read_unlock();
        return exe_file;
  }
- EXPORT_SYMBOL(get_mm_exe_file);
  
  /**
   * get_task_exe_file - acquire a reference to the task's executable file
@@@ -1285,7 -1209,6 +1284,6 @@@ struct file *get_task_exe_file(struct t
        task_unlock(task);
        return exe_file;
  }
- EXPORT_SYMBOL(get_task_exe_file);
  
  /**
   * get_task_mm - acquire a reference to the task's mm
@@@ -2158,7 -2081,6 +2156,7 @@@ static __latent_entropy struct task_str
  #endif
  #ifdef CONFIG_BPF_SYSCALL
        RCU_INIT_POINTER(p->bpf_storage, NULL);
 +      p->bpf_ctx = NULL;
  #endif
  
        /* Perform scheduler related setup. Assign this task to a CPU. */
diff --combined kernel/sys.c
@@@ -480,8 -480,7 +480,8 @@@ static int set_user(struct cred *new
         * failure to the execve() stage.
         */
        if (is_ucounts_overlimit(new->ucounts, UCOUNT_RLIMIT_NPROC, rlimit(RLIMIT_NPROC)) &&
 -                      new_user != INIT_USER)
 +                      new_user != INIT_USER &&
 +                      !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN))
                current->flags |= PF_NPROC_EXCEEDED;
        else
                current->flags &= ~PF_NPROC_EXCEEDED;
@@@ -1847,6 -1846,7 +1847,6 @@@ SYSCALL_DEFINE1(umask, int, mask
  static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
  {
        struct fd exe;
 -      struct file *old_exe, *exe_file;
        struct inode *inode;
        int err;
  
        if (err)
                goto exit;
  
 -      /*
 -       * Forbid mm->exe_file change if old file still mapped.
 -       */
 -      exe_file = get_mm_exe_file(mm);
 -      err = -EBUSY;
 -      if (exe_file) {
 -              struct vm_area_struct *vma;
 -
 -              mmap_read_lock(mm);
 -              for (vma = mm->mmap; vma; vma = vma->vm_next) {
 -                      if (!vma->vm_file)
 -                              continue;
 -                      if (path_equal(&vma->vm_file->f_path,
 -                                     &exe_file->f_path))
 -                              goto exit_err;
 -              }
 -
 -              mmap_read_unlock(mm);
 -              fput(exe_file);
 -      }
 -
 -      err = 0;
 -      /* set the new file, lockless */
 -      get_file(exe.file);
 -      old_exe = xchg(&mm->exe_file, exe.file);
 -      if (old_exe)
 -              fput(old_exe);
 +      err = replace_mm_exe_file(mm, exe.file);
  exit:
        fdput(exe);
        return err;
 -exit_err:
 -      mmap_read_unlock(mm);
 -      fput(exe_file);
 -      goto exit;
  }
  
  /*
@@@ -1929,13 -1959,6 +1929,6 @@@ static int validate_prctl_map_addr(stru
  
        error = -EINVAL;
  
-       /*
-        * @brk should be after @end_data in traditional maps.
-        */
-       if (prctl_map->start_brk <= prctl_map->end_data ||
-           prctl_map->brk <= prctl_map->end_data)
-               goto out;
        /*
         * Neither we should allow to override limits if they set.
         */
diff --combined lib/Kconfig.debug
@@@ -346,7 -346,7 +346,7 @@@ config FRAME_WAR
        int "Warn for stack frames larger than"
        range 0 8192
        default 2048 if GCC_PLUGIN_LATENT_ENTROPY
 -      default 1280 if (!64BIT && PARISC)
 +      default 1536 if (!64BIT && PARISC)
        default 1024 if (!64BIT && !PARISC)
        default 2048 if 64BIT
        help
@@@ -365,7 -365,6 +365,7 @@@ config STRIP_ASM_SYM
  config READABLE_ASM
        bool "Generate readable assembler code"
        depends on DEBUG_KERNEL
 +      depends on CC_IS_GCC
        help
          Disable some compiler optimizations that tend to generate human unreadable
          assembler output. This may make the kernel slightly slower, but it helps
@@@ -384,7 -383,6 +384,7 @@@ config HEADERS_INSTAL
  
  config DEBUG_SECTION_MISMATCH
        bool "Enable full Section mismatch analysis"
 +      depends on CC_IS_GCC
        help
          The section mismatch analysis checks if there are illegal
          references from one section to another section.
@@@ -1064,7 -1062,6 +1064,6 @@@ config HARDLOCKUP_DETECTO
        depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_ARCH
        select LOCKUP_DETECTOR
        select HARDLOCKUP_DETECTOR_PERF if HAVE_HARDLOCKUP_DETECTOR_PERF
-       select HARDLOCKUP_DETECTOR_ARCH if HAVE_HARDLOCKUP_DETECTOR_ARCH
        help
          Say Y here to enable the kernel to act as a watchdog to detect
          hard lockups.
@@@ -1237,7 -1234,7 +1236,7 @@@ config PROVE_LOCKIN
        depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
        select LOCKDEP
        select DEBUG_SPINLOCK
 -      select DEBUG_MUTEXES
 +      select DEBUG_MUTEXES if !PREEMPT_RT
        select DEBUG_RT_MUTEXES if RT_MUTEXES
        select DEBUG_RWSEMS
        select DEBUG_WW_MUTEX_SLOWPATH
@@@ -1301,7 -1298,7 +1300,7 @@@ config LOCK_STA
        depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
        select LOCKDEP
        select DEBUG_SPINLOCK
 -      select DEBUG_MUTEXES
 +      select DEBUG_MUTEXES if !PREEMPT_RT
        select DEBUG_RT_MUTEXES if RT_MUTEXES
        select DEBUG_LOCK_ALLOC
        default n
@@@ -1337,7 -1334,7 +1336,7 @@@ config DEBUG_SPINLOC
  
  config DEBUG_MUTEXES
        bool "Mutex debugging: basic checks"
 -      depends on DEBUG_KERNEL
 +      depends on DEBUG_KERNEL && !PREEMPT_RT
        help
         This feature allows mutex semantics violations to be detected and
         reported.
@@@ -1347,8 -1344,7 +1346,8 @@@ config DEBUG_WW_MUTEX_SLOWPAT
        depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
        select DEBUG_LOCK_ALLOC
        select DEBUG_SPINLOCK
 -      select DEBUG_MUTEXES
 +      select DEBUG_MUTEXES if !PREEMPT_RT
 +      select DEBUG_RT_MUTEXES if PREEMPT_RT
        help
         This feature enables slowpath testing for w/w mutex users by
         injecting additional -EDEADLK wound/backoff cases. Together with
@@@ -1371,7 -1367,7 +1370,7 @@@ config DEBUG_LOCK_ALLO
        bool "Lock debugging: detect incorrect freeing of live locks"
        depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
        select DEBUG_SPINLOCK
 -      select DEBUG_MUTEXES
 +      select DEBUG_MUTEXES if !PREEMPT_RT
        select DEBUG_RT_MUTEXES if RT_MUTEXES
        select LOCKDEP
        help
@@@ -1682,6 -1678,33 +1681,6 @@@ config DEBUG_WQ_FORCE_RR_CP
          feature by default.  When enabled, memory and cache locality will
          be impacted.
  
 -config DEBUG_BLOCK_EXT_DEVT
 -      bool "Force extended block device numbers and spread them"
 -      depends on DEBUG_KERNEL
 -      depends on BLOCK
 -      default n
 -      help
 -        BIG FAT WARNING: ENABLING THIS OPTION MIGHT BREAK BOOTING ON
 -        SOME DISTRIBUTIONS.  DO NOT ENABLE THIS UNLESS YOU KNOW WHAT
 -        YOU ARE DOING.  Distros, please enable this and fix whatever
 -        is broken.
 -
 -        Conventionally, block device numbers are allocated from
 -        predetermined contiguous area.  However, extended block area
 -        may introduce non-contiguous block device numbers.  This
 -        option forces most block device numbers to be allocated from
 -        the extended space and spreads them to discover kernel or
 -        userland code paths which assume predetermined contiguous
 -        device number allocation.
 -
 -        Note that turning on this debug option shuffles all the
 -        device numbers for all IDE and SCSI devices including libata
 -        ones, so root partition specified using device number
 -        directly (via rdev or root=MAJ:MIN) won't work anymore.
 -        Textual device names (root=/dev/sdXn) will continue to work.
 -
 -        Say N if you are unsure.
 -
  config CPU_HOTPLUG_STATE_CONTROL
        bool "Enable CPU hotplug state control"
        depends on DEBUG_KERNEL
@@@ -1947,13 -1970,6 +1946,13 @@@ config FAIL_MMC_REQUES
          and to test how the mmc host driver handles retries from
          the block device.
  
 +config FAIL_SUNRPC
 +      bool "Fault-injection capability for SunRPC"
 +      depends on FAULT_INJECTION_DEBUG_FS && SUNRPC_DEBUG
 +      help
 +        Provide fault-injection capability for SunRPC and
 +        its consumers.
 +
  config FAULT_INJECTION_STACKTRACE_FILTER
        bool "stacktrace filter for fault-injection capabilities"
        depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
@@@ -2061,8 -2077,9 +2060,9 @@@ config TEST_MIN_HEA
          If unsure, say N.
  
  config TEST_SORT
-       tristate "Array-based sort test"
-       depends on DEBUG_KERNEL || m
+       tristate "Array-based sort test" if !KUNIT_ALL_TESTS
+       depends on KUNIT
+       default KUNIT_ALL_TESTS
        help
          This option enables the self-test function of 'sort()' at boot,
          or at module load time.
@@@ -2443,8 -2460,7 +2443,7 @@@ config SLUB_KUNIT_TES
  
  config RATIONAL_KUNIT_TEST
        tristate "KUnit test for rational.c" if !KUNIT_ALL_TESTS
-       depends on KUNIT
-       select RATIONAL
+       depends on KUNIT && RATIONAL
        default KUNIT_ALL_TESTS
        help
          This builds the rational math unit test.
diff --combined lib/test_printf.c
@@@ -614,7 -614,7 +614,7 @@@ page_flags_test(int section, int node, 
        bool append = false;
        int i;
  
-       flags &= BIT(NR_PAGEFLAGS) - 1;
+       flags &= PAGEFLAGS_MASK;
        if (flags) {
                page_flags |= flags;
                snprintf(cmp_buf + size, BUF_SIZE - size, "%s", name);
@@@ -675,8 -675,9 +675,8 @@@ flags(void
                        "uptodate|dirty|lru|active|swapbacked",
                        cmp_buffer);
  
 -      flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC
 -                      | VM_DENYWRITE;
 -      test("read|exec|mayread|maywrite|mayexec|denywrite", "%pGv", &flags);
 +      flags = VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
 +      test("read|exec|mayread|maywrite|mayexec", "%pGv", &flags);
  
        gfp = GFP_TRANSHUGE;
        test("GFP_TRANSHUGE", "%pGg", &gfp);
diff --combined lib/vsprintf.c
@@@ -17,7 -17,7 +17,7 @@@
   * - scnprintf and vscnprintf
   */
  
 -#include <stdarg.h>
 +#include <linux/stdarg.h>
  #include <linux/build_bug.h>
  #include <linux/clk.h>
  #include <linux/clk-provider.h>
@@@ -2019,7 -2019,7 +2019,7 @@@ static const struct page_flags_fields p
  static
  char *format_page_flags(char *buf, char *end, unsigned long flags)
  {
-       unsigned long main_flags = flags & (BIT(NR_PAGEFLAGS) - 1);
+       unsigned long main_flags = flags & PAGEFLAGS_MASK;
        bool append = false;
        int i;
  
diff --combined mm/compaction.c
@@@ -306,16 -306,14 +306,14 @@@ __reset_isolation_pfn(struct zone *zone
         * is necessary for the block to be a migration source/target.
         */
        do {
-               if (pfn_valid_within(pfn)) {
-                       if (check_source && PageLRU(page)) {
-                               clear_pageblock_skip(page);
-                               return true;
-                       }
+               if (check_source && PageLRU(page)) {
+                       clear_pageblock_skip(page);
+                       return true;
+               }
  
-                       if (check_target && PageBuddy(page)) {
-                               clear_pageblock_skip(page);
-                               return true;
-                       }
+               if (check_target && PageBuddy(page)) {
+                       clear_pageblock_skip(page);
+                       return true;
                }
  
                page += (1 << PAGE_ALLOC_COSTLY_ORDER);
@@@ -585,8 -583,6 +583,6 @@@ static unsigned long isolate_freepages_
                        break;
  
                nr_scanned++;
-               if (!pfn_valid_within(blockpfn))
-                       goto isolate_fail;
  
                /*
                 * For compound pages such as THP and hugetlbfs, we can save
@@@ -885,8 -881,6 +881,6 @@@ isolate_migratepages_block(struct compa
                        cond_resched();
                }
  
-               if (!pfn_valid_within(low_pfn))
-                       goto isolate_fail;
                nr_scanned++;
  
                page = pfn_to_page(low_pfn);
@@@ -2398,7 -2392,7 +2392,7 @@@ compact_zone(struct compact_control *cc
  
                err = migrate_pages(&cc->migratepages, compaction_alloc,
                                compaction_free, (unsigned long)cc, cc->mode,
 -                              MR_COMPACTION);
 +                              MR_COMPACTION, NULL);
  
                trace_mm_compaction_migratepages(cc->nr_migratepages, err,
                                                        &cc->migratepages);
@@@ -2706,30 -2700,6 +2700,30 @@@ static void compact_nodes(void
   */
  unsigned int __read_mostly sysctl_compaction_proactiveness = 20;
  
 +int compaction_proactiveness_sysctl_handler(struct ctl_table *table, int write,
 +              void *buffer, size_t *length, loff_t *ppos)
 +{
 +      int rc, nid;
 +
 +      rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
 +      if (rc)
 +              return rc;
 +
 +      if (write && sysctl_compaction_proactiveness) {
 +              for_each_online_node(nid) {
 +                      pg_data_t *pgdat = NODE_DATA(nid);
 +
 +                      if (pgdat->proactive_compact_trigger)
 +                              continue;
 +
 +                      pgdat->proactive_compact_trigger = true;
 +                      wake_up_interruptible(&pgdat->kcompactd_wait);
 +              }
 +      }
 +
 +      return 0;
 +}
 +
  /*
   * This is the entry point for compacting all nodes via
   * /proc/sys/vm/compact_memory
@@@ -2774,8 -2744,7 +2768,8 @@@ void compaction_unregister_node(struct 
  
  static inline bool kcompactd_work_requested(pg_data_t *pgdat)
  {
 -      return pgdat->kcompactd_max_order > 0 || kthread_should_stop();
 +      return pgdat->kcompactd_max_order > 0 || kthread_should_stop() ||
 +              pgdat->proactive_compact_trigger;
  }
  
  static bool kcompactd_node_suitable(pg_data_t *pgdat)
@@@ -2910,8 -2879,7 +2904,8 @@@ static int kcompactd(void *p
  {
        pg_data_t *pgdat = (pg_data_t *)p;
        struct task_struct *tsk = current;
 -      unsigned int proactive_defer = 0;
 +      long default_timeout = msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC);
 +      long timeout = default_timeout;
  
        const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
  
        while (!kthread_should_stop()) {
                unsigned long pflags;
  
 +              /*
 +               * Avoid the unnecessary wakeup for proactive compaction
 +               * when it is disabled.
 +               */
 +              if (!sysctl_compaction_proactiveness)
 +                      timeout = MAX_SCHEDULE_TIMEOUT;
                trace_mm_compaction_kcompactd_sleep(pgdat->node_id);
                if (wait_event_freezable_timeout(pgdat->kcompactd_wait,
 -                      kcompactd_work_requested(pgdat),
 -                      msecs_to_jiffies(HPAGE_FRAG_CHECK_INTERVAL_MSEC))) {
 +                      kcompactd_work_requested(pgdat), timeout) &&
 +                      !pgdat->proactive_compact_trigger) {
  
                        psi_memstall_enter(&pflags);
                        kcompactd_do_work(pgdat);
                        psi_memstall_leave(&pflags);
 +                      /*
 +                       * Reset the timeout value. The defer timeout from
 +                       * proactive compaction is lost here but that is fine
 +                       * as the condition of the zone changing substantionally
 +                       * then carrying on with the previous defer interval is
 +                       * not useful.
 +                       */
 +                      timeout = default_timeout;
                        continue;
                }
  
 -              /* kcompactd wait timeout */
 +              /*
 +               * Start the proactive work with default timeout. Based
 +               * on the fragmentation score, this timeout is updated.
 +               */
 +              timeout = default_timeout;
                if (should_proactive_compact_node(pgdat)) {
                        unsigned int prev_score, score;
  
 -                      if (proactive_defer) {
 -                              proactive_defer--;
 -                              continue;
 -                      }
                        prev_score = fragmentation_score_node(pgdat);
                        proactive_compact_node(pgdat);
                        score = fragmentation_score_node(pgdat);
                         * Defer proactive compaction if the fragmentation
                         * score did not go down i.e. no progress made.
                         */
 -                      proactive_defer = score < prev_score ?
 -                                      0 : 1 << COMPACT_MAX_DEFER_SHIFT;
 +                      if (unlikely(score >= prev_score))
 +                              timeout =
 +                                 default_timeout << COMPACT_MAX_DEFER_SHIFT;
                }
 +              if (unlikely(pgdat->proactive_compact_trigger))
 +                      pgdat->proactive_compact_trigger = false;
        }
  
        return 0;
diff --combined mm/kfence/kfence_test.c
  #include <linux/tracepoint.h>
  #include <trace/events/printk.h>
  
 +#include <asm/kfence.h>
 +
  #include "kfence.h"
  
 +/* May be overridden by <asm/kfence.h>. */
 +#ifndef arch_kfence_test_address
 +#define arch_kfence_test_address(addr) (addr)
 +#endif
 +
  /* Report as observed from console. */
  static struct {
        spinlock_t lock;
@@@ -89,7 -82,6 +89,7 @@@ static const char *get_access_type(cons
  /* Check observed report matches information in @r. */
  static bool report_matches(const struct expect_report *r)
  {
 +      unsigned long addr = (unsigned long)r->addr;
        bool ret = false;
        unsigned long flags;
        typeof(observed.lines) expect;
        switch (r->type) {
        case KFENCE_ERROR_OOB:
                cur += scnprintf(cur, end - cur, "Out-of-bounds %s at", get_access_type(r));
 +              addr = arch_kfence_test_address(addr);
                break;
        case KFENCE_ERROR_UAF:
                cur += scnprintf(cur, end - cur, "Use-after-free %s at", get_access_type(r));
 +              addr = arch_kfence_test_address(addr);
                break;
        case KFENCE_ERROR_CORRUPTION:
                cur += scnprintf(cur, end - cur, "Corrupted memory at");
                break;
        case KFENCE_ERROR_INVALID:
                cur += scnprintf(cur, end - cur, "Invalid %s at", get_access_type(r));
 +              addr = arch_kfence_test_address(addr);
                break;
        case KFENCE_ERROR_INVALID_FREE:
                cur += scnprintf(cur, end - cur, "Invalid free of");
                break;
        }
  
 -      cur += scnprintf(cur, end - cur, " 0x%p", (void *)r->addr);
 +      cur += scnprintf(cur, end - cur, " 0x%p", (void *)addr);
  
        spin_lock_irqsave(&observed.lock, flags);
        if (!report_available())
@@@ -800,6 -789,9 +800,9 @@@ static int test_init(struct kunit *test
        unsigned long flags;
        int i;
  
+       if (!__kfence_pool)
+               return -EINVAL;
        spin_lock_irqsave(&observed.lock, flags);
        for (i = 0; i < ARRAY_SIZE(observed.lines); i++)
                observed.lines[i][0] = '\0';
diff --combined mm/kfence/report.c
@@@ -5,10 -5,11 +5,11 @@@
   * Copyright (C) 2020, Google LLC.
   */
  
 -#include <stdarg.h>
 +#include <linux/stdarg.h>
  
  #include <linux/kernel.h>
  #include <linux/lockdep.h>
+ #include <linux/math.h>
  #include <linux/printk.h>
  #include <linux/sched/debug.h>
  #include <linux/seq_file.h>
@@@ -100,6 -101,13 +101,13 @@@ static void kfence_print_stack(struct s
                               bool show_alloc)
  {
        const struct kfence_track *track = show_alloc ? &meta->alloc_track : &meta->free_track;
+       u64 ts_sec = track->ts_nsec;
+       unsigned long rem_nsec = do_div(ts_sec, NSEC_PER_SEC);
+       /* Timestamp matches printk timestamp format. */
+       seq_con_printf(seq, "%s by task %d on cpu %d at %lu.%06lus:\n",
+                      show_alloc ? "allocated" : "freed", track->pid,
+                      track->cpu, (unsigned long)ts_sec, rem_nsec / 1000);
  
        if (track->num_stack_entries) {
                /* Skip allocation/free internals stack. */
@@@ -126,15 -134,14 +134,14 @@@ void kfence_print_object(struct seq_fil
                return;
        }
  
-       seq_con_printf(seq,
-                      "kfence-#%td [0x%p-0x%p"
-                      ", size=%d, cache=%s] allocated by task %d:\n",
-                      meta - kfence_metadata, (void *)start, (void *)(start + size - 1), size,
-                      (cache && cache->name) ? cache->name : "<destroyed>", meta->alloc_track.pid);
+       seq_con_printf(seq, "kfence-#%td: 0x%p-0x%p, size=%d, cache=%s\n\n",
+                      meta - kfence_metadata, (void *)start, (void *)(start + size - 1),
+                      size, (cache && cache->name) ? cache->name : "<destroyed>");
        kfence_print_stack(seq, meta, true);
  
        if (meta->state == KFENCE_OBJECT_FREED) {
-               seq_con_printf(seq, "\nfreed by task %d:\n", meta->free_track.pid);
+               seq_con_printf(seq, "\n");
                kfence_print_stack(seq, meta, false);
        }
  }
diff --combined mm/memory_hotplug.c
@@@ -52,6 -52,73 +52,73 @@@ module_param(memmap_on_memory, bool, 04
  MODULE_PARM_DESC(memmap_on_memory, "Enable memmap on memory for memory hotplug");
  #endif
  
+ enum {
+       ONLINE_POLICY_CONTIG_ZONES = 0,
+       ONLINE_POLICY_AUTO_MOVABLE,
+ };
+ const char *online_policy_to_str[] = {
+       [ONLINE_POLICY_CONTIG_ZONES] = "contig-zones",
+       [ONLINE_POLICY_AUTO_MOVABLE] = "auto-movable",
+ };
+ static int set_online_policy(const char *val, const struct kernel_param *kp)
+ {
+       int ret = sysfs_match_string(online_policy_to_str, val);
+       if (ret < 0)
+               return ret;
+       *((int *)kp->arg) = ret;
+       return 0;
+ }
+ static int get_online_policy(char *buffer, const struct kernel_param *kp)
+ {
+       return sprintf(buffer, "%s\n", online_policy_to_str[*((int *)kp->arg)]);
+ }
+ /*
+  * memory_hotplug.online_policy: configure online behavior when onlining without
+  * specifying a zone (MMOP_ONLINE)
+  *
+  * "contig-zones": keep zone contiguous
+  * "auto-movable": online memory to ZONE_MOVABLE if the configuration
+  *                 (auto_movable_ratio, auto_movable_numa_aware) allows for it
+  */
+ static int online_policy __read_mostly = ONLINE_POLICY_CONTIG_ZONES;
+ static const struct kernel_param_ops online_policy_ops = {
+       .set = set_online_policy,
+       .get = get_online_policy,
+ };
+ module_param_cb(online_policy, &online_policy_ops, &online_policy, 0644);
+ MODULE_PARM_DESC(online_policy,
+               "Set the online policy (\"contig-zones\", \"auto-movable\") "
+               "Default: \"contig-zones\"");
+ /*
+  * memory_hotplug.auto_movable_ratio: specify maximum MOVABLE:KERNEL ratio
+  *
+  * The ratio represent an upper limit and the kernel might decide to not
+  * online some memory to ZONE_MOVABLE -- e.g., because hotplugged KERNEL memory
+  * doesn't allow for more MOVABLE memory.
+  */
+ static unsigned int auto_movable_ratio __read_mostly = 301;
+ module_param(auto_movable_ratio, uint, 0644);
+ MODULE_PARM_DESC(auto_movable_ratio,
+               "Set the maximum ratio of MOVABLE:KERNEL memory in the system "
+               "in percent for \"auto-movable\" online policy. Default: 301");
+ /*
+  * memory_hotplug.auto_movable_numa_aware: consider numa node stats
+  */
+ #ifdef CONFIG_NUMA
+ static bool auto_movable_numa_aware __read_mostly = true;
+ module_param(auto_movable_numa_aware, bool, 0644);
+ MODULE_PARM_DESC(auto_movable_numa_aware,
+               "Consider numa node stats in addition to global stats in "
+               "\"auto-movable\" online policy. Default: true");
+ #endif /* CONFIG_NUMA */
  /*
   * online_page_callback contains pointer to current page onlining function.
   * Initially it is generic_online_page(). If it is required it could be
@@@ -410,15 -477,13 +477,13 @@@ void __ref remove_pfn_range_from_zone(s
                                 sizeof(struct page) * cur_nr_pages);
        }
  
- #ifdef CONFIG_ZONE_DEVICE
        /*
         * Zone shrinking code cannot properly deal with ZONE_DEVICE. So
         * we will not try to shrink the zones - which is okay as
         * set_zone_contiguous() cannot deal with ZONE_DEVICE either way.
         */
-       if (zone_idx(zone) == ZONE_DEVICE)
+       if (zone_is_zone_device(zone))
                return;
- #endif
  
        clear_zone_contiguous(zone);
  
@@@ -663,6 -728,109 +728,109 @@@ void __ref move_pfn_range_to_zone(struc
        set_zone_contiguous(zone);
  }
  
+ struct auto_movable_stats {
+       unsigned long kernel_early_pages;
+       unsigned long movable_pages;
+ };
+ static void auto_movable_stats_account_zone(struct auto_movable_stats *stats,
+                                           struct zone *zone)
+ {
+       if (zone_idx(zone) == ZONE_MOVABLE) {
+               stats->movable_pages += zone->present_pages;
+       } else {
+               stats->kernel_early_pages += zone->present_early_pages;
+ #ifdef CONFIG_CMA
+               /*
+                * CMA pages (never on hotplugged memory) behave like
+                * ZONE_MOVABLE.
+                */
+               stats->movable_pages += zone->cma_pages;
+               stats->kernel_early_pages -= zone->cma_pages;
+ #endif /* CONFIG_CMA */
+       }
+ }
+ struct auto_movable_group_stats {
+       unsigned long movable_pages;
+       unsigned long req_kernel_early_pages;
+ };
+ static int auto_movable_stats_account_group(struct memory_group *group,
+                                          void *arg)
+ {
+       const int ratio = READ_ONCE(auto_movable_ratio);
+       struct auto_movable_group_stats *stats = arg;
+       long pages;
+       /*
+        * We don't support modifying the config while the auto-movable online
+        * policy is already enabled. Just avoid the division by zero below.
+        */
+       if (!ratio)
+               return 0;
+       /*
+        * Calculate how many early kernel pages this group requires to
+        * satisfy the configured zone ratio.
+        */
+       pages = group->present_movable_pages * 100 / ratio;
+       pages -= group->present_kernel_pages;
+       if (pages > 0)
+               stats->req_kernel_early_pages += pages;
+       stats->movable_pages += group->present_movable_pages;
+       return 0;
+ }
+ static bool auto_movable_can_online_movable(int nid, struct memory_group *group,
+                                           unsigned long nr_pages)
+ {
+       unsigned long kernel_early_pages, movable_pages;
+       struct auto_movable_group_stats group_stats = {};
+       struct auto_movable_stats stats = {};
+       pg_data_t *pgdat = NODE_DATA(nid);
+       struct zone *zone;
+       int i;
+       /* Walk all relevant zones and collect MOVABLE vs. KERNEL stats. */
+       if (nid == NUMA_NO_NODE) {
+               /* TODO: cache values */
+               for_each_populated_zone(zone)
+                       auto_movable_stats_account_zone(&stats, zone);
+       } else {
+               for (i = 0; i < MAX_NR_ZONES; i++) {
+                       zone = pgdat->node_zones + i;
+                       if (populated_zone(zone))
+                               auto_movable_stats_account_zone(&stats, zone);
+               }
+       }
+       kernel_early_pages = stats.kernel_early_pages;
+       movable_pages = stats.movable_pages;
+       /*
+        * Kernel memory inside dynamic memory group allows for more MOVABLE
+        * memory within the same group. Remove the effect of all but the
+        * current group from the stats.
+        */
+       walk_dynamic_memory_groups(nid, auto_movable_stats_account_group,
+                                  group, &group_stats);
+       if (kernel_early_pages <= group_stats.req_kernel_early_pages)
+               return false;
+       kernel_early_pages -= group_stats.req_kernel_early_pages;
+       movable_pages -= group_stats.movable_pages;
+       if (group && group->is_dynamic)
+               kernel_early_pages += group->present_kernel_pages;
+       /*
+        * Test if we could online the given number of pages to ZONE_MOVABLE
+        * and still stay in the configured ratio.
+        */
+       movable_pages += nr_pages;
+       return movable_pages <= (auto_movable_ratio * kernel_early_pages) / 100;
+ }
  /*
   * Returns a default kernel memory zone for the given pfn range.
   * If no kernel zone covers this pfn range it will automatically go
@@@ -684,6 -852,117 +852,117 @@@ static struct zone *default_kernel_zone
        return &pgdat->node_zones[ZONE_NORMAL];
  }
  
+ /*
+  * Determine to which zone to online memory dynamically based on user
+  * configuration and system stats. We care about the following ratio:
+  *
+  *   MOVABLE : KERNEL
+  *
+  * Whereby MOVABLE is memory in ZONE_MOVABLE and KERNEL is memory in
+  * one of the kernel zones. CMA pages inside one of the kernel zones really
+  * behaves like ZONE_MOVABLE, so we treat them accordingly.
+  *
+  * We don't allow for hotplugged memory in a KERNEL zone to increase the
+  * amount of MOVABLE memory we can have, so we end up with:
+  *
+  *   MOVABLE : KERNEL_EARLY
+  *
+  * Whereby KERNEL_EARLY is memory in one of the kernel zones, available sinze
+  * boot. We base our calculation on KERNEL_EARLY internally, because:
+  *
+  * a) Hotplugged memory in one of the kernel zones can sometimes still get
+  *    hotunplugged, especially when hot(un)plugging individual memory blocks.
+  *    There is no coordination across memory devices, therefore "automatic"
+  *    hotunplugging, as implemented in hypervisors, could result in zone
+  *    imbalances.
+  * b) Early/boot memory in one of the kernel zones can usually not get
+  *    hotunplugged again (e.g., no firmware interface to unplug, fragmented
+  *    with unmovable allocations). While there are corner cases where it might
+  *    still work, it is barely relevant in practice.
+  *
+  * Exceptions are dynamic memory groups, which allow for more MOVABLE
+  * memory within the same memory group -- because in that case, there is
+  * coordination within the single memory device managed by a single driver.
+  *
+  * We rely on "present pages" instead of "managed pages", as the latter is
+  * highly unreliable and dynamic in virtualized environments, and does not
+  * consider boot time allocations. For example, memory ballooning adjusts the
+  * managed pages when inflating/deflating the balloon, and balloon compaction
+  * can even migrate inflated pages between zones.
+  *
+  * Using "present pages" is better but some things to keep in mind are:
+  *
+  * a) Some memblock allocations, such as for the crashkernel area, are
+  *    effectively unused by the kernel, yet they account to "present pages".
+  *    Fortunately, these allocations are comparatively small in relevant setups
+  *    (e.g., fraction of system memory).
+  * b) Some hotplugged memory blocks in virtualized environments, esecially
+  *    hotplugged by virtio-mem, look like they are completely present, however,
+  *    only parts of the memory block are actually currently usable.
+  *    "present pages" is an upper limit that can get reached at runtime. As
+  *    we base our calculations on KERNEL_EARLY, this is not an issue.
+  */
+ static struct zone *auto_movable_zone_for_pfn(int nid,
+                                             struct memory_group *group,
+                                             unsigned long pfn,
+                                             unsigned long nr_pages)
+ {
+       unsigned long online_pages = 0, max_pages, end_pfn;
+       struct page *page;
+       if (!auto_movable_ratio)
+               goto kernel_zone;
+       if (group && !group->is_dynamic) {
+               max_pages = group->s.max_pages;
+               online_pages = group->present_movable_pages;
+               /* If anything is !MOVABLE online the rest !MOVABLE. */
+               if (group->present_kernel_pages)
+                       goto kernel_zone;
+       } else if (!group || group->d.unit_pages == nr_pages) {
+               max_pages = nr_pages;
+       } else {
+               max_pages = group->d.unit_pages;
+               /*
+                * Take a look at all online sections in the current unit.
+                * We can safely assume that all pages within a section belong
+                * to the same zone, because dynamic memory groups only deal
+                * with hotplugged memory.
+                */
+               pfn = ALIGN_DOWN(pfn, group->d.unit_pages);
+               end_pfn = pfn + group->d.unit_pages;
+               for (; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
+                       page = pfn_to_online_page(pfn);
+                       if (!page)
+                               continue;
+                       /* If anything is !MOVABLE online the rest !MOVABLE. */
+                       if (page_zonenum(page) != ZONE_MOVABLE)
+                               goto kernel_zone;
+                       online_pages += PAGES_PER_SECTION;
+               }
+       }
+       /*
+        * Online MOVABLE if we could *currently* online all remaining parts
+        * MOVABLE. We expect to (add+) online them immediately next, so if
+        * nobody interferes, all will be MOVABLE if possible.
+        */
+       nr_pages = max_pages - online_pages;
+       if (!auto_movable_can_online_movable(NUMA_NO_NODE, group, nr_pages))
+               goto kernel_zone;
+ #ifdef CONFIG_NUMA
+       if (auto_movable_numa_aware &&
+           !auto_movable_can_online_movable(nid, group, nr_pages))
+               goto kernel_zone;
+ #endif /* CONFIG_NUMA */
+       return &NODE_DATA(nid)->node_zones[ZONE_MOVABLE];
+ kernel_zone:
+       return default_kernel_zone_for_pfn(nid, pfn, nr_pages);
+ }
  static inline struct zone *default_zone_for_pfn(int nid, unsigned long start_pfn,
                unsigned long nr_pages)
  {
        return movable_node_enabled ? movable_zone : kernel_zone;
  }
  
- struct zone *zone_for_pfn_range(int online_type, int nid, unsigned start_pfn,
+ struct zone *zone_for_pfn_range(int online_type, int nid,
+               struct memory_group *group, unsigned long start_pfn,
                unsigned long nr_pages)
  {
        if (online_type == MMOP_ONLINE_KERNEL)
        if (online_type == MMOP_ONLINE_MOVABLE)
                return &NODE_DATA(nid)->node_zones[ZONE_MOVABLE];
  
+       if (online_policy == ONLINE_POLICY_AUTO_MOVABLE)
+               return auto_movable_zone_for_pfn(nid, group, start_pfn, nr_pages);
        return default_zone_for_pfn(nid, start_pfn, nr_pages);
  }
  
   * This function should only be called by memory_block_{online,offline},
   * and {online,offline}_pages.
   */
- void adjust_present_page_count(struct zone *zone, long nr_pages)
+ void adjust_present_page_count(struct page *page, struct memory_group *group,
+                              long nr_pages)
  {
+       struct zone *zone = page_zone(page);
+       const bool movable = zone_idx(zone) == ZONE_MOVABLE;
+       /*
+        * We only support onlining/offlining/adding/removing of complete
+        * memory blocks; therefore, either all is either early or hotplugged.
+        */
+       if (early_section(__pfn_to_section(page_to_pfn(page))))
+               zone->present_early_pages += nr_pages;
        zone->present_pages += nr_pages;
        zone->zone_pgdat->node_present_pages += nr_pages;
+       if (group && movable)
+               group->present_movable_pages += nr_pages;
+       else if (group && !movable)
+               group->present_kernel_pages += nr_pages;
  }
  
  int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
@@@ -773,7 -1071,8 +1071,8 @@@ void mhp_deinit_memmap_on_memory(unsign
        kasan_remove_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages));
  }
  
- int __ref online_pages(unsigned long pfn, unsigned long nr_pages, struct zone *zone)
+ int __ref online_pages(unsigned long pfn, unsigned long nr_pages,
+                      struct zone *zone, struct memory_group *group)
  {
        unsigned long flags;
        int need_zonelists_rebuild = 0;
        }
  
        online_pages_range(pfn, nr_pages);
-       adjust_present_page_count(zone, nr_pages);
+       adjust_present_page_count(pfn_to_page(pfn), group, nr_pages);
  
        node_states_set_node(nid, &arg);
        if (need_zonelists_rebuild)
@@@ -1059,6 -1358,7 +1358,7 @@@ int __ref add_memory_resource(int nid, 
  {
        struct mhp_params params = { .pgprot = pgprot_mhp(PAGE_KERNEL) };
        struct vmem_altmap mhp_altmap = {};
+       struct memory_group *group = NULL;
        u64 start, size;
        bool new_node = false;
        int ret;
        if (ret)
                return ret;
  
+       if (mhp_flags & MHP_NID_IS_MGID) {
+               group = memory_group_find_by_id(nid);
+               if (!group)
+                       return -EINVAL;
+               nid = group->nid;
+       }
        if (!node_possible(nid)) {
                WARN(1, "node %d was absent from the node_possible_map\n", nid);
                return -EINVAL;
                goto error;
  
        /* create memory block devices after memory was added */
-       ret = create_memory_block_devices(start, size, mhp_altmap.alloc);
+       ret = create_memory_block_devices(start, size, mhp_altmap.alloc,
+                                         group);
        if (ret) {
-               arch_remove_memory(nid, start, size, NULL);
+               arch_remove_memory(start, size, NULL);
                goto error;
        }
  
@@@ -1298,7 -1606,7 +1606,7 @@@ struct zone *test_pages_in_a_zone(unsig
        unsigned long pfn, sec_end_pfn;
        struct zone *zone = NULL;
        struct page *page;
-       int i;
        for (pfn = start_pfn, sec_end_pfn = SECTION_ALIGN_UP(start_pfn + 1);
             pfn < end_pfn;
             pfn = sec_end_pfn, sec_end_pfn += PAGES_PER_SECTION) {
                        continue;
                for (; pfn < sec_end_pfn && pfn < end_pfn;
                     pfn += MAX_ORDER_NR_PAGES) {
-                       i = 0;
-                       /* This is just a CONFIG_HOLES_IN_ZONE check.*/
-                       while ((i < MAX_ORDER_NR_PAGES) &&
-                               !pfn_valid_within(pfn + i))
-                               i++;
-                       if (i == MAX_ORDER_NR_PAGES || pfn + i >= end_pfn)
-                               continue;
                        /* Check if we got outside of the zone */
-                       if (zone && !zone_spans_pfn(zone, pfn + i))
+                       if (zone && !zone_spans_pfn(zone, pfn))
                                return NULL;
-                       page = pfn_to_page(pfn + i);
+                       page = pfn_to_page(pfn);
                        if (zone && page_zone(page) != zone)
                                return NULL;
                        zone = page_zone(page);
@@@ -1469,7 -1770,7 +1770,7 @@@ do_migrate_range(unsigned long start_pf
                if (nodes_empty(nmask))
                        node_set(mtc.nid, nmask);
                ret = migrate_pages(&source, alloc_migration_target, NULL,
 -                      (unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_HOTPLUG);
 +                      (unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_HOTPLUG, NULL);
                if (ret) {
                        list_for_each_entry(page, &source, lru) {
                                if (__ratelimit(&migrate_rs)) {
@@@ -1568,7 -1869,8 +1869,8 @@@ static int count_system_ram_pages_cb(un
        return 0;
  }
  
- int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages)
+ int __ref offline_pages(unsigned long start_pfn, unsigned long nr_pages,
+                       struct memory_group *group)
  {
        const unsigned long end_pfn = start_pfn + nr_pages;
        unsigned long pfn, system_ram_pages = 0;
  
        /* removal success */
        adjust_managed_page_count(pfn_to_page(start_pfn), -nr_pages);
-       adjust_present_page_count(zone, -nr_pages);
+       adjust_present_page_count(pfn_to_page(start_pfn), group, -nr_pages);
  
        /* reinitialise watermarks and update pcp limits */
        init_per_zone_wmark_min();
@@@ -1746,7 -2048,9 +2048,9 @@@ failed_removal
  static int check_memblock_offlined_cb(struct memory_block *mem, void *arg)
  {
        int ret = !is_memblock_offlined(mem);
+       int *nid = arg;
  
+       *nid = mem->nid;
        if (unlikely(ret)) {
                phys_addr_t beginpa, endpa;
  
@@@ -1839,12 -2143,12 +2143,12 @@@ void try_offline_node(int nid
  }
  EXPORT_SYMBOL(try_offline_node);
  
- static int __ref try_remove_memory(int nid, u64 start, u64 size)
+ static int __ref try_remove_memory(u64 start, u64 size)
  {
-       int rc = 0;
        struct vmem_altmap mhp_altmap = {};
        struct vmem_altmap *altmap = NULL;
        unsigned long nr_vmemmap_pages;
+       int rc = 0, nid = NUMA_NO_NODE;
  
        BUG_ON(check_hotplug_memory_range(start, size));
  
         * All memory blocks must be offlined before removing memory.  Check
         * whether all memory blocks in question are offline and return error
         * if this is not the case.
+        *
+        * While at it, determine the nid. Note that if we'd have mixed nodes,
+        * we'd only try to offline the last determined one -- which is good
+        * enough for the cases we care about.
         */
-       rc = walk_memory_blocks(start, size, NULL, check_memblock_offlined_cb);
+       rc = walk_memory_blocks(start, size, &nid, check_memblock_offlined_cb);
        if (rc)
                return rc;
  
  
        mem_hotplug_begin();
  
-       arch_remove_memory(nid, start, size, altmap);
+       arch_remove_memory(start, size, altmap);
  
        if (IS_ENABLED(CONFIG_ARCH_KEEP_MEMBLOCK)) {
                memblock_free(start, size);
  
        release_mem_region_adjustable(start, size);
  
-       try_offline_node(nid);
+       if (nid != NUMA_NO_NODE)
+               try_offline_node(nid);
  
        mem_hotplug_done();
        return 0;
  
  /**
   * __remove_memory - Remove memory if every memory block is offline
-  * @nid: the node ID
   * @start: physical address of the region to remove
   * @size: size of the region to remove
   *
   * and online/offline operations before this call, as required by
   * try_offline_node().
   */
- void __remove_memory(int nid, u64 start, u64 size)
+ void __remove_memory(u64 start, u64 size)
  {
  
        /*
         * trigger BUG() if some memory is not offlined prior to calling this
         * function
         */
-       if (try_remove_memory(nid, start, size))
+       if (try_remove_memory(start, size))
                BUG();
  }
  
   * Remove memory if every memory block is offline, otherwise return -EBUSY is
   * some memory is not offline
   */
- int remove_memory(int nid, u64 start, u64 size)
+ int remove_memory(u64 start, u64 size)
  {
        int rc;
  
        lock_device_hotplug();
-       rc  = try_remove_memory(nid, start, size);
+       rc = try_remove_memory(start, size);
        unlock_device_hotplug();
  
        return rc;
@@@ -1998,7 -2306,7 +2306,7 @@@ static int try_reonline_memory_block(st
   * unplugged all memory (so it's no longer in use) and want to offline + remove
   * that memory.
   */
- int offline_and_remove_memory(int nid, u64 start, u64 size)
+ int offline_and_remove_memory(u64 start, u64 size)
  {
        const unsigned long mb_count = size / memory_block_size_bytes();
        uint8_t *online_types, *tmp;
         * This cannot fail as it cannot get onlined in the meantime.
         */
        if (!rc) {
-               rc = try_remove_memory(nid, start, size);
+               rc = try_remove_memory(start, size);
                if (rc)
                        pr_err("%s: Failed to remove memory: %d", __func__, rc);
        }
diff --combined mm/page_alloc.c
@@@ -594,8 -594,6 +594,6 @@@ static int page_outside_zone_boundaries
  
  static int page_is_consistent(struct zone *zone, struct page *page)
  {
-       if (!pfn_valid_within(page_to_pfn(page)))
-               return 0;
        if (zone != page_zone(page))
                return 0;
  
@@@ -1025,16 -1023,12 +1023,12 @@@ buddy_merge_likely(unsigned long pfn, u
        if (order >= MAX_ORDER - 2)
                return false;
  
-       if (!pfn_valid_within(buddy_pfn))
-               return false;
        combined_pfn = buddy_pfn & pfn;
        higher_page = page + (combined_pfn - pfn);
        buddy_pfn = __find_buddy_pfn(combined_pfn, order + 1);
        higher_buddy = higher_page + (buddy_pfn - combined_pfn);
  
-       return pfn_valid_within(buddy_pfn) &&
-              page_is_buddy(higher_page, higher_buddy, order + 1);
+       return page_is_buddy(higher_page, higher_buddy, order + 1);
  }
  
  /*
@@@ -1095,8 -1089,6 +1089,6 @@@ continue_merging
                buddy_pfn = __find_buddy_pfn(pfn, order);
                buddy = page + (buddy_pfn - pfn);
  
-               if (!pfn_valid_within(buddy_pfn))
-                       goto done_merging;
                if (!page_is_buddy(page, buddy, order))
                        goto done_merging;
                /*
@@@ -1754,9 -1746,7 +1746,7 @@@ void __init memblock_free_pages(struct 
  /*
   * Check that the whole (or subset of) a pageblock given by the interval of
   * [start_pfn, end_pfn) is valid and within the same zone, before scanning it
-  * with the migration of free compaction scanner. The scanners then need to
-  * use only pfn_valid_within() check for arches that allow holes within
-  * pageblocks.
+  * with the migration of free compaction scanner.
   *
   * Return struct page pointer of start_pfn, or NULL if checks were not passed.
   *
@@@ -1872,8 -1862,6 +1862,6 @@@ static inline void __init pgdat_init_re
   */
  static inline bool __init deferred_pfn_valid(unsigned long pfn)
  {
-       if (!pfn_valid_within(pfn))
-               return false;
        if (!(pfn & (pageblock_nr_pages - 1)) && !pfn_valid(pfn))
                return false;
        return true;
@@@ -2520,11 -2508,6 +2508,6 @@@ static int move_freepages(struct zone *
        int pages_moved = 0;
  
        for (pfn = start_pfn; pfn <= end_pfn;) {
-               if (!pfn_valid_within(pfn)) {
-                       pfn++;
-                       continue;
-               }
                page = pfn_to_page(pfn);
                if (!PageBuddy(page)) {
                        /*
@@@ -4211,7 -4194,7 +4194,7 @@@ static void warn_alloc_show_mem(gfp_t g
                if (tsk_is_oom_victim(current) ||
                    (current->flags & (PF_MEMALLOC | PF_EXITING)))
                        filter &= ~SHOW_MEM_FILTER_NODES;
 -      if (in_interrupt() || !(gfp_mask & __GFP_DIRECT_RECLAIM))
 +      if (!in_task() || !(gfp_mask & __GFP_DIRECT_RECLAIM))
                filter &= ~SHOW_MEM_FILTER_NODES;
  
        show_mem(filter, nodemask);
@@@ -4549,14 -4532,14 +4532,14 @@@ static bool __need_reclaim(gfp_t gfp_ma
        return true;
  }
  
 -void __fs_reclaim_acquire(void)
 +void __fs_reclaim_acquire(unsigned long ip)
  {
 -      lock_map_acquire(&__fs_reclaim_map);
 +      lock_acquire_exclusive(&__fs_reclaim_map, 0, 0, NULL, ip);
  }
  
 -void __fs_reclaim_release(void)
 +void __fs_reclaim_release(unsigned long ip)
  {
 -      lock_map_release(&__fs_reclaim_map);
 +      lock_release(&__fs_reclaim_map, ip);
  }
  
  void fs_reclaim_acquire(gfp_t gfp_mask)
  
        if (__need_reclaim(gfp_mask)) {
                if (gfp_mask & __GFP_FS)
 -                      __fs_reclaim_acquire();
 +                      __fs_reclaim_acquire(_RET_IP_);
  
  #ifdef CONFIG_MMU_NOTIFIER
                lock_map_acquire(&__mmu_notifier_invalidate_range_start_map);
@@@ -4582,7 -4565,7 +4565,7 @@@ void fs_reclaim_release(gfp_t gfp_mask
  
        if (__need_reclaim(gfp_mask)) {
                if (gfp_mask & __GFP_FS)
 -                      __fs_reclaim_release();
 +                      __fs_reclaim_release(_RET_IP_);
        }
  }
  EXPORT_SYMBOL_GPL(fs_reclaim_release);
@@@ -4697,7 -4680,7 +4680,7 @@@ gfp_to_alloc_flags(gfp_t gfp_mask
                 * comment for __cpuset_node_allowed().
                 */
                alloc_flags &= ~ALLOC_CPUSET;
 -      } else if (unlikely(rt_task(current)) && !in_interrupt())
 +      } else if (unlikely(rt_task(current)) && in_task())
                alloc_flags |= ALLOC_HARDER;
  
        alloc_flags = gfp_to_alloc_flags_cma(gfp_mask, alloc_flags);
@@@ -5157,7 -5140,7 +5140,7 @@@ static inline bool prepare_alloc_pages(
                 * When we are in the interrupt context, it is irrelevant
                 * to the current task context. It means that any node ok.
                 */
 -              if (!in_interrupt() && !ac->nodemask)
 +              if (in_task() && !ac->nodemask)
                        ac->nodemask = &cpuset_current_mems_allowed;
                else
                        *alloc_flags |= ALLOC_CPUSET;
@@@ -5903,7 -5886,6 +5886,7 @@@ void show_free_areas(unsigned int filte
                " unevictable:%lu dirty:%lu writeback:%lu\n"
                " slab_reclaimable:%lu slab_unreclaimable:%lu\n"
                " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n"
 +              " kernel_misc_reclaimable:%lu\n"
                " free:%lu free_pcp:%lu free_cma:%lu\n",
                global_node_page_state(NR_ACTIVE_ANON),
                global_node_page_state(NR_INACTIVE_ANON),
                global_node_page_state(NR_SHMEM),
                global_node_page_state(NR_PAGETABLE),
                global_zone_page_state(NR_BOUNCE),
 +              global_node_page_state(NR_KERNEL_MISC_RECLAIMABLE),
                global_zone_page_state(NR_FREE_PAGES),
                free_pcp,
                global_zone_page_state(NR_FREE_CMA_PAGES));
@@@ -6157,7 -6138,7 +6140,7 @@@ static int node_load[MAX_NUMNODES]
   *
   * Return: node id of the found node or %NUMA_NO_NODE if no node is found.
   */
 -static int find_next_best_node(int node, nodemask_t *used_node_mask)
 +int find_next_best_node(int node, nodemask_t *used_node_mask)
  {
        int n, val;
        int min_val = INT_MAX;
@@@ -6642,6 -6623,7 +6625,6 @@@ static void __meminit zone_init_free_li
        }
  }
  
 -#if !defined(CONFIG_FLATMEM)
  /*
   * Only struct pages that correspond to ranges defined by memblock.memory
   * are zeroed and initialized by going through __init_single_page() during
@@@ -6686,6 -6668,13 +6669,6 @@@ static void __init init_unavailable_ran
                pr_info("On node %d, zone %s: %lld pages in unavailable ranges",
                        node, zone_names[zone], pgcnt);
  }
 -#else
 -static inline void init_unavailable_range(unsigned long spfn,
 -                                        unsigned long epfn,
 -                                        int zone, int node)
 -{
 -}
 -#endif
  
  static void __init memmap_init_zone_range(struct zone *zone,
                                          unsigned long start_pfn,
@@@ -6715,7 -6704,7 +6698,7 @@@ static void __init memmap_init(void
  {
        unsigned long start_pfn, end_pfn;
        unsigned long hole_pfn = 0;
 -      int i, j, zone_id, nid;
 +      int i, j, zone_id = 0, nid;
  
        for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
                struct pglist_data *node = NODE_DATA(nid);
                init_unavailable_range(hole_pfn, end_pfn, zone_id, nid);
  }
  
 +void __init *memmap_alloc(phys_addr_t size, phys_addr_t align,
 +                        phys_addr_t min_addr, int nid, bool exact_nid)
 +{
 +      void *ptr;
 +
 +      if (exact_nid)
 +              ptr = memblock_alloc_exact_nid_raw(size, align, min_addr,
 +                                                 MEMBLOCK_ALLOC_ACCESSIBLE,
 +                                                 nid);
 +      else
 +              ptr = memblock_alloc_try_nid_raw(size, align, min_addr,
 +                                               MEMBLOCK_ALLOC_ACCESSIBLE,
 +                                               nid);
 +
 +      if (ptr && size > 0)
 +              page_init_poison(ptr, size);
 +
 +      return ptr;
 +}
 +
  static int zone_batchsize(struct zone *zone)
  {
  #ifdef CONFIG_MMU
@@@ -7271,6 -7240,9 +7254,9 @@@ static void __init calculate_node_total
                        zone->zone_start_pfn = 0;
                zone->spanned_pages = size;
                zone->present_pages = real_size;
+ #if defined(CONFIG_MEMORY_HOTPLUG)
+               zone->present_early_pages = real_size;
+ #endif
  
                totalpages += size;
                realtotalpages += real_size;
@@@ -7515,7 -7487,7 +7501,7 @@@ static void __init free_area_init_core(
  }
  
  #ifdef CONFIG_FLATMEM
 -static void __ref alloc_node_mem_map(struct pglist_data *pgdat)
 +static void __init alloc_node_mem_map(struct pglist_data *pgdat)
  {
        unsigned long __maybe_unused start = 0;
        unsigned long __maybe_unused offset = 0;
                end = pgdat_end_pfn(pgdat);
                end = ALIGN(end, MAX_ORDER_NR_PAGES);
                size =  (end - start) * sizeof(struct page);
 -              map = memblock_alloc_node(size, SMP_CACHE_BYTES,
 -                                        pgdat->node_id);
 +              map = memmap_alloc(size, SMP_CACHE_BYTES, MEMBLOCK_LOW_LIMIT,
 +                                 pgdat->node_id, false);
                if (!map)
                        panic("Failed to allocate %ld bytes for node %d memory map\n",
                              size, pgdat->node_id);
  #endif
  }
  #else
 -static void __ref alloc_node_mem_map(struct pglist_data *pgdat) { }
 +static inline void alloc_node_mem_map(struct pglist_data *pgdat) { }
  #endif /* CONFIG_FLATMEM */
  
  #ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
@@@ -8828,9 -8800,6 +8814,6 @@@ struct page *has_unmovable_pages(struc
        }
  
        for (; iter < pageblock_nr_pages - offset; iter++) {
-               if (!pfn_valid_within(pfn + iter))
-                       continue;
                page = pfn_to_page(pfn + iter);
  
                /*
@@@ -8990,7 -8959,7 +8973,7 @@@ static int __alloc_contig_migrate_range
                cc->nr_migratepages -= nr_reclaimed;
  
                ret = migrate_pages(&cc->migratepages, alloc_migration_target,
 -                              NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE);
 +                      NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE, NULL);
  
                /*
                 * On -ENOMEM, migrate_pages() bails out right away. It is pointless
diff --combined mm/page_isolation.c
@@@ -93,8 -93,7 +93,7 @@@ static void unset_migratetype_isolate(s
                        buddy_pfn = __find_buddy_pfn(pfn, order);
                        buddy = page + (buddy_pfn - pfn);
  
-                       if (pfn_valid_within(buddy_pfn) &&
-                           !is_migrate_isolate_page(buddy)) {
+                       if (!is_migrate_isolate_page(buddy)) {
                                __isolate_free_page(page, order);
                                isolated_page = true;
                        }
@@@ -250,10 -249,6 +249,6 @@@ __test_page_isolated_in_pageblock(unsig
        struct page *page;
  
        while (pfn < end_pfn) {
-               if (!pfn_valid_within(pfn)) {
-                       pfn++;
-                       continue;
-               }
                page = pfn_to_page(pfn);
                if (PageBuddy(page))
                        /*
@@@ -287,7 -282,6 +282,7 @@@ int test_pages_isolated(unsigned long s
        unsigned long pfn, flags;
        struct page *page;
        struct zone *zone;
 +      int ret;
  
        /*
         * Note: pageblock_nr_pages != MAX_ORDER. Then, chunks of free pages
                        break;
        }
        page = __first_valid_page(start_pfn, end_pfn - start_pfn);
 -      if ((pfn < end_pfn) || !page)
 -              return -EBUSY;
 +      if ((pfn < end_pfn) || !page) {
 +              ret = -EBUSY;
 +              goto out;
 +      }
 +
        /* Check all pages are free or marked as ISOLATED */
        zone = page_zone(page);
        spin_lock_irqsave(&zone->lock, flags);
        pfn = __test_page_isolated_in_pageblock(start_pfn, end_pfn, isol_flags);
        spin_unlock_irqrestore(&zone->lock, flags);
  
 +      ret = pfn < end_pfn ? -EBUSY : 0;
 +
 +out:
        trace_test_pages_isolated(start_pfn, end_pfn, pfn);
  
 -      return pfn < end_pfn ? -EBUSY : 0;
 +      return ret;
  }
diff --combined mm/percpu.c
@@@ -146,7 -146,6 +146,6 @@@ static unsigned int pcpu_high_unit_cpu 
  
  /* the address of the first chunk which starts with the kernel static area */
  void *pcpu_base_addr __ro_after_init;
- EXPORT_SYMBOL_GPL(pcpu_base_addr);
  
  static const int *pcpu_unit_map __ro_after_init;              /* cpu -> unit */
  const unsigned long *pcpu_unit_offsets __ro_after_init;       /* cpu -> unit offset */
@@@ -1520,6 -1519,9 +1519,6 @@@ static void pcpu_free_chunk(struct pcpu
   * Pages in [@page_start,@page_end) have been populated to @chunk.  Update
   * the bookkeeping information accordingly.  Must be called after each
   * successful population.
 - *
 - * If this is @for_alloc, do not increment pcpu_nr_empty_pop_pages because it
 - * is to serve an allocation in that area.
   */
  static void pcpu_chunk_populated(struct pcpu_chunk *chunk, int page_start,
                                 int page_end)
diff --combined mm/rmap.c
+++ b/mm/rmap.c
  /*
   * Lock ordering in mm:
   *
 - * inode->i_mutex     (while writing or truncating, not reading or faulting)
 + * inode->i_rwsem     (while writing or truncating, not reading or faulting)
   *   mm->mmap_lock
 - *     page->flags PG_locked (lock_page)   * (see huegtlbfs below)
 - *       hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share)
 - *         mapping->i_mmap_rwsem
 - *           hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
 - *           anon_vma->rwsem
 - *             mm->page_table_lock or pte_lock
 - *               swap_lock (in swap_duplicate, swap_info_get)
 - *                 mmlist_lock (in mmput, drain_mmlist and others)
 - *                 mapping->private_lock (in __set_page_dirty_buffers)
 - *                   lock_page_memcg move_lock (in __set_page_dirty_buffers)
 - *                     i_pages lock (widely used)
 - *                       lruvec->lru_lock (in lock_page_lruvec_irq)
 - *                 inode->i_lock (in set_page_dirty's __mark_inode_dirty)
 - *                 bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
 - *                   sb_lock (within inode_lock in fs/fs-writeback.c)
 - *                   i_pages lock (widely used, in set_page_dirty,
 - *                             in arch-dependent flush_dcache_mmap_lock,
 - *                             within bdi.wb->list_lock in __sync_single_inode)
 + *     mapping->invalidate_lock (in filemap_fault)
 + *       page->flags PG_locked (lock_page)   * (see hugetlbfs below)
 + *         hugetlbfs_i_mmap_rwsem_key (in huge_pmd_share)
 + *           mapping->i_mmap_rwsem
 + *             hugetlb_fault_mutex (hugetlbfs specific page fault mutex)
 + *             anon_vma->rwsem
 + *               mm->page_table_lock or pte_lock
 + *                 swap_lock (in swap_duplicate, swap_info_get)
 + *                   mmlist_lock (in mmput, drain_mmlist and others)
 + *                   mapping->private_lock (in __set_page_dirty_buffers)
 + *                     lock_page_memcg move_lock (in __set_page_dirty_buffers)
 + *                       i_pages lock (widely used)
 + *                         lruvec->lru_lock (in lock_page_lruvec_irq)
 + *                   inode->i_lock (in set_page_dirty's __mark_inode_dirty)
 + *                   bdi.wb->list_lock (in set_page_dirty's __mark_inode_dirty)
 + *                     sb_lock (within inode_lock in fs/fs-writeback.c)
 + *                     i_pages lock (widely used, in set_page_dirty,
 + *                               in arch-dependent flush_dcache_mmap_lock,
 + *                               within bdi.wb->list_lock in __sync_single_inode)
   *
 - * anon_vma->rwsem,mapping->i_mutex      (memory_failure, collect_procs_anon)
 + * anon_vma->rwsem,mapping->i_mmap_rwsem   (memory_failure, collect_procs_anon)
   *   ->tasklist_lock
   *     pte map lock
   *
@@@ -1231,11 -1230,13 +1231,13 @@@ void page_add_file_rmap(struct page *pa
                                                nr_pages);
        } else {
                if (PageTransCompound(page) && page_mapping(page)) {
+                       struct page *head = compound_head(page);
                        VM_WARN_ON_ONCE(!PageLocked(page));
  
-                       SetPageDoubleMap(compound_head(page));
+                       SetPageDoubleMap(head);
                        if (PageMlocked(page))
-                               clear_page_mlock(compound_head(page));
+                               clear_page_mlock(head);
                }
                if (!atomic_inc_and_test(&page->_mapcount))
                        goto out;
diff --combined mm/vmalloc.c
  #include "internal.h"
  #include "pgalloc-track.h"
  
+ #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
+ static unsigned int __ro_after_init ioremap_max_page_shift = BITS_PER_LONG - 1;
+ static int __init set_nohugeiomap(char *str)
+ {
+       ioremap_max_page_shift = PAGE_SHIFT;
+       return 0;
+ }
+ early_param("nohugeiomap", set_nohugeiomap);
+ #else /* CONFIG_HAVE_ARCH_HUGE_VMAP */
+ static const unsigned int ioremap_max_page_shift = PAGE_SHIFT;
+ #endif        /* CONFIG_HAVE_ARCH_HUGE_VMAP */
  #ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
  static bool __ro_after_init vmap_allow_huge = true;
  
@@@ -298,15 -311,14 +311,14 @@@ static int vmap_range_noflush(unsigned 
        return err;
  }
  
- int vmap_range(unsigned long addr, unsigned long end,
-                       phys_addr_t phys_addr, pgprot_t prot,
-                       unsigned int max_page_shift)
+ int ioremap_page_range(unsigned long addr, unsigned long end,
+               phys_addr_t phys_addr, pgprot_t prot)
  {
        int err;
  
-       err = vmap_range_noflush(addr, end, phys_addr, prot, max_page_shift);
+       err = vmap_range_noflush(addr, end, phys_addr, pgprot_nx(prot),
+                                ioremap_max_page_shift);
        flush_cache_vmap(addr, end);
        return err;
  }
  
@@@ -787,28 -799,6 +799,28 @@@ unsigned long vmalloc_nr_pages(void
        return atomic_long_read(&nr_vmalloc_pages);
  }
  
 +static struct vmap_area *find_vmap_area_exceed_addr(unsigned long addr)
 +{
 +      struct vmap_area *va = NULL;
 +      struct rb_node *n = vmap_area_root.rb_node;
 +
 +      while (n) {
 +              struct vmap_area *tmp;
 +
 +              tmp = rb_entry(n, struct vmap_area, rb_node);
 +              if (tmp->va_end > addr) {
 +                      va = tmp;
 +                      if (tmp->va_start <= addr)
 +                              break;
 +
 +                      n = n->rb_left;
 +              } else
 +                      n = n->rb_right;
 +      }
 +
 +      return va;
 +}
 +
  static struct vmap_area *__find_vmap_area(unsigned long addr)
  {
        struct rb_node *n = vmap_area_root.rb_node;
@@@ -1501,7 -1491,6 +1513,7 @@@ static struct vmap_area *alloc_vmap_are
                                int node, gfp_t gfp_mask)
  {
        struct vmap_area *va;
 +      unsigned long freed;
        unsigned long addr;
        int purged = 0;
        int ret;
@@@ -1565,12 -1554,13 +1577,12 @@@ overflow
                goto retry;
        }
  
 -      if (gfpflags_allow_blocking(gfp_mask)) {
 -              unsigned long freed = 0;
 -              blocking_notifier_call_chain(&vmap_notify_list, 0, &freed);
 -              if (freed > 0) {
 -                      purged = 0;
 -                      goto retry;
 -              }
 +      freed = 0;
 +      blocking_notifier_call_chain(&vmap_notify_list, 0, &freed);
 +
 +      if (freed > 0) {
 +              purged = 0;
 +              goto retry;
        }
  
        if (!(gfp_mask & __GFP_NOWARN) && printk_ratelimit())
@@@ -2801,7 -2791,7 +2813,7 @@@ EXPORT_SYMBOL_GPL(vmap_pfn)
  
  static inline unsigned int
  vm_area_alloc_pages(gfp_t gfp, int nid,
 -              unsigned int order, unsigned long nr_pages, struct page **pages)
 +              unsigned int order, unsigned int nr_pages, struct page **pages)
  {
        unsigned int nr_allocated = 0;
  
         * to fails, fallback to a single page allocator that is
         * more permissive.
         */
 -      if (!order)
 -              nr_allocated = alloc_pages_bulk_array_node(
 -                      gfp, nid, nr_pages, pages);
 -      else
 +      if (!order) {
 +              while (nr_allocated < nr_pages) {
 +                      unsigned int nr, nr_pages_request;
 +
 +                      /*
 +                       * A maximum allowed request is hard-coded and is 100
 +                       * pages per call. That is done in order to prevent a
 +                       * long preemption off scenario in the bulk-allocator
 +                       * so the range is [1:100].
 +                       */
 +                      nr_pages_request = min(100U, nr_pages - nr_allocated);
 +
 +                      nr = alloc_pages_bulk_array_node(gfp, nid,
 +                              nr_pages_request, pages + nr_allocated);
 +
 +                      nr_allocated += nr;
 +                      cond_resched();
 +
 +                      /*
 +                       * If zero or pages were obtained partly,
 +                       * fallback to a single page allocator.
 +                       */
 +                      if (nr != nr_pages_request)
 +                              break;
 +              }
 +      } else
                /*
                 * Compound pages required for remap_vmalloc_page if
                 * high-order pages.
                for (i = 0; i < (1U << order); i++)
                        pages[nr_allocated + i] = page + i;
  
 -              if (gfpflags_allow_blocking(gfp))
 -                      cond_resched();
 -
 +              cond_resched();
                nr_allocated += 1U << order;
        }
  
@@@ -3309,14 -3279,9 +3321,14 @@@ long vread(char *buf, char *addr, unsig
                count = -(unsigned long) addr;
  
        spin_lock(&vmap_area_lock);
 -      va = __find_vmap_area((unsigned long)addr);
 +      va = find_vmap_area_exceed_addr((unsigned long)addr);
        if (!va)
                goto finished;
 +
 +      /* no intersects with alive vmap_area */
 +      if ((unsigned long)addr + count <= va->va_start)
 +              goto finished;
 +
        list_for_each_entry_from(va, &vmap_area_list, list) {
                if (!count)
                        break;
diff --combined tools/perf/builtin-c2c.c
@@@ -139,11 -139,11 +139,11 @@@ static void *c2c_he_zalloc(size_t size
        if (!c2c_he)
                return NULL;
  
-       c2c_he->cpuset = bitmap_alloc(c2c.cpus_cnt);
+       c2c_he->cpuset = bitmap_zalloc(c2c.cpus_cnt);
        if (!c2c_he->cpuset)
                return NULL;
  
-       c2c_he->nodeset = bitmap_alloc(c2c.nodes_cnt);
+       c2c_he->nodeset = bitmap_zalloc(c2c.nodes_cnt);
        if (!c2c_he->nodeset)
                return NULL;
  
@@@ -2047,7 -2047,7 +2047,7 @@@ static int setup_nodes(struct perf_sess
                struct perf_cpu_map *map = n[node].map;
                unsigned long *set;
  
-               set = bitmap_alloc(c2c.cpus_cnt);
+               set = bitmap_zalloc(c2c.cpus_cnt);
                if (!set)
                        return -ENOMEM;
  
@@@ -2790,7 -2790,7 +2790,7 @@@ static int perf_c2c__report(int argc, c
                goto out;
        }
  
 -      session = perf_session__new(&data, 0, &c2c.tool);
 +      session = perf_session__new(&data, &c2c.tool);
        if (IS_ERR(session)) {
                err = PTR_ERR(session);
                pr_debug("Error creating perf session\n");
@@@ -910,8 -910,7 +910,8 @@@ static int record__open(struct record *
                 * Enable the dummy event when the process is forked for
                 * initial_delay, immediately for system wide.
                 */
 -              if (opts->initial_delay && !pos->immediate)
 +              if (opts->initial_delay && !pos->immediate &&
 +                  !target__has_cpu(&opts->target))
                        pos->core.attr.enable_on_exec = 1;
                else
                        pos->immediate = 1;
@@@ -1388,6 -1387,7 +1388,6 @@@ static int record__synthesize(struct re
        struct perf_data *data = &rec->data;
        struct record_opts *opts = &rec->opts;
        struct perf_tool *tool = &rec->tool;
 -      int fd = perf_data__fd(data);
        int err = 0;
        event_op f = process_synthesized_event;
  
                return 0;
  
        if (data->is_pipe) {
 -              /*
 -               * We need to synthesize events first, because some
 -               * features works on top of them (on report side).
 -               */
 -              err = perf_event__synthesize_attrs(tool, rec->evlist,
 -                                                 process_synthesized_event);
 -              if (err < 0) {
 -                      pr_err("Couldn't synthesize attrs.\n");
 -                      goto out;
 -              }
 -
 -              err = perf_event__synthesize_features(tool, session, rec->evlist,
 +              err = perf_event__synthesize_for_pipe(tool, session, data,
                                                      process_synthesized_event);
 -              if (err < 0) {
 -                      pr_err("Couldn't synthesize features.\n");
 -                      return err;
 -              }
 +              if (err < 0)
 +                      goto out;
  
 -              if (have_tracepoints(&rec->evlist->core.entries)) {
 -                      /*
 -                       * FIXME err <= 0 here actually means that
 -                       * there were no tracepoints so its not really
 -                       * an error, just that we don't need to
 -                       * synthesize anything.  We really have to
 -                       * return this more properly and also
 -                       * propagate errors that now are calling die()
 -                       */
 -                      err = perf_event__synthesize_tracing_data(tool, fd, rec->evlist,
 -                                                                process_synthesized_event);
 -                      if (err <= 0) {
 -                              pr_err("Couldn't record tracing data.\n");
 -                              goto out;
 -                      }
 -                      rec->bytes_written += err;
 -              }
 +              rec->bytes_written += err;
        }
  
        err = perf_event__synth_time_conv(record__pick_pc(rec), tool,
@@@ -1652,7 -1681,7 +1652,7 @@@ static int __cmd_record(struct record *
                signal(SIGUSR2, SIG_IGN);
        }
  
 -      session = perf_session__new(data, false, tool);
 +      session = perf_session__new(data, tool);
        if (IS_ERR(session)) {
                pr_err("Perf session creation failed.\n");
                return PTR_ERR(session);
@@@ -2757,7 -2786,7 +2757,7 @@@ int cmd_record(int argc, const char **a
  
        if (rec->opts.affinity != PERF_AFFINITY_SYS) {
                rec->affinity_mask.nbits = cpu__max_cpu();
-               rec->affinity_mask.bits = bitmap_alloc(rec->affinity_mask.nbits);
+               rec->affinity_mask.bits = bitmap_zalloc(rec->affinity_mask.nbits);
                if (!rec->affinity_mask.bits) {
                        pr_err("Failed to allocate thread mask for %zd cpus\n", rec->affinity_mask.nbits);
                        err = -ENOMEM;
        /* Enable ignoring missing threads when -u/-p option is defined. */
        rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid;
  
 +      if (evlist__fix_hybrid_cpus(rec->evlist, rec->opts.target.cpu_list)) {
 +              pr_err("failed to use cpu list %s\n",
 +                     rec->opts.target.cpu_list);
 +              goto out;
 +      }
 +
 +      rec->opts.target.hybrid = perf_pmu__has_hybrid();
        err = -ENOMEM;
        if (evlist__create_maps(rec->evlist, &rec->opts.target) < 0)
                usage_with_options(record_usage, record_options);
diff --combined tools/perf/util/header.c
@@@ -278,7 -278,7 +278,7 @@@ static int do_read_bitmap(struct feat_f
        if (ret)
                return ret;
  
-       set = bitmap_alloc(size);
+       set = bitmap_zalloc(size);
        if (!set)
                return -ENOMEM;
  
@@@ -1284,7 -1284,7 +1284,7 @@@ static int memory_node__read(struct mem
  
        dir = opendir(path);
        if (!dir) {
 -              pr_warning("failed: cant' open memory sysfs data\n");
 +              pr_warning("failed: can't open memory sysfs data\n");
                return -1;
        }
  
  
        size++;
  
-       n->set = bitmap_alloc(size);
+       n->set = bitmap_zalloc(size);
        if (!n->set) {
                closedir(dir);
                return -ENOMEM;
@@@ -3865,10 -3865,10 +3865,10 @@@ static int perf_file_section__process(s
  static int perf_file_header__read_pipe(struct perf_pipe_file_header *header,
                                       struct perf_header *ph,
                                       struct perf_data* data,
 -                                     bool repipe)
 +                                     bool repipe, int repipe_fd)
  {
        struct feat_fd ff = {
 -              .fd = STDOUT_FILENO,
 +              .fd = repipe_fd,
                .ph = ph,
        };
        ssize_t ret;
        return 0;
  }
  
 -static int perf_header__read_pipe(struct perf_session *session)
 +static int perf_header__read_pipe(struct perf_session *session, int repipe_fd)
  {
        struct perf_header *header = &session->header;
        struct perf_pipe_file_header f_header;
  
        if (perf_file_header__read_pipe(&f_header, header, session->data,
 -                                      session->repipe) < 0) {
 +                                      session->repipe, repipe_fd) < 0) {
                pr_debug("incompatible file format\n");
                return -EINVAL;
        }
@@@ -3995,7 -3995,7 +3995,7 @@@ static int evlist__prepare_tracepoint_e
        return 0;
  }
  
 -int perf_session__read_header(struct perf_session *session)
 +int perf_session__read_header(struct perf_session *session, int repipe_fd)
  {
        struct perf_data *data = session->data;
        struct perf_header *header = &session->header;
         * We can read 'pipe' data event from regular file,
         * check for the pipe header regardless of source.
         */
 -      err = perf_header__read_pipe(session);
 +      err = perf_header__read_pipe(session, repipe_fd);
        if (!err || perf_data__is_pipe(data)) {
                data->is_pipe = true;
                return err;
@@@ -44,6 -44,7 +44,6 @@@ static void *vcpu_worker(void *data
        struct perf_test_vcpu_args *vcpu_args = (struct perf_test_vcpu_args *)data;
        int vcpu_id = vcpu_args->vcpu_id;
  
 -      vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
        run = vcpu_state(vm, vcpu_id);
  
        while (!READ_ONCE(host_quit)) {
@@@ -93,59 -94,8 +93,59 @@@ struct test_params 
        int wr_fract;
        bool partition_vcpu_memory_access;
        enum vm_mem_backing_src_type backing_src;
 +      int slots;
  };
  
 +static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable)
 +{
 +      int i;
 +
 +      for (i = 0; i < slots; i++) {
 +              int slot = PERF_TEST_MEM_SLOT_INDEX + i;
 +              int flags = enable ? KVM_MEM_LOG_DIRTY_PAGES : 0;
 +
 +              vm_mem_region_set_flags(vm, slot, flags);
 +      }
 +}
 +
 +static inline void enable_dirty_logging(struct kvm_vm *vm, int slots)
 +{
 +      toggle_dirty_logging(vm, slots, true);
 +}
 +
 +static inline void disable_dirty_logging(struct kvm_vm *vm, int slots)
 +{
 +      toggle_dirty_logging(vm, slots, false);
 +}
 +
 +static void get_dirty_log(struct kvm_vm *vm, int slots, unsigned long *bitmap,
 +                        uint64_t nr_pages)
 +{
 +      uint64_t slot_pages = nr_pages / slots;
 +      int i;
 +
 +      for (i = 0; i < slots; i++) {
 +              int slot = PERF_TEST_MEM_SLOT_INDEX + i;
 +              unsigned long *slot_bitmap = bitmap + i * slot_pages;
 +
 +              kvm_vm_get_dirty_log(vm, slot, slot_bitmap);
 +      }
 +}
 +
 +static void clear_dirty_log(struct kvm_vm *vm, int slots, unsigned long *bitmap,
 +                          uint64_t nr_pages)
 +{
 +      uint64_t slot_pages = nr_pages / slots;
 +      int i;
 +
 +      for (i = 0; i < slots; i++) {
 +              int slot = PERF_TEST_MEM_SLOT_INDEX + i;
 +              unsigned long *slot_bitmap = bitmap + i * slot_pages;
 +
 +              kvm_vm_clear_dirty_log(vm, slot, slot_bitmap, 0, slot_pages);
 +      }
 +}
 +
  static void run_test(enum vm_guest_mode mode, void *arg)
  {
        struct test_params *p = arg;
        struct timespec clear_dirty_log_total = (struct timespec){0};
  
        vm = perf_test_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
 -                               p->backing_src);
 +                               p->slots, p->backing_src);
  
        perf_test_args.wr_fract = p->wr_fract;
  
        guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm);
        guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
        host_num_pages = vm_num_host_pages(mode, guest_num_pages);
-       bmap = bitmap_alloc(host_num_pages);
+       bmap = bitmap_zalloc(host_num_pages);
  
        if (dirty_log_manual_caps) {
                cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
  
        /* Enable dirty logging */
        clock_gettime(CLOCK_MONOTONIC, &start);
 -      vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX,
 -                              KVM_MEM_LOG_DIRTY_PAGES);
 +      enable_dirty_logging(vm, p->slots);
        ts_diff = timespec_elapsed(start);
        pr_info("Enabling dirty logging time: %ld.%.9lds\n\n",
                ts_diff.tv_sec, ts_diff.tv_nsec);
                        iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
  
                clock_gettime(CLOCK_MONOTONIC, &start);
 -              kvm_vm_get_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap);
 -
 +              get_dirty_log(vm, p->slots, bmap, host_num_pages);
                ts_diff = timespec_elapsed(start);
                get_dirty_log_total = timespec_add(get_dirty_log_total,
                                                   ts_diff);
  
                if (dirty_log_manual_caps) {
                        clock_gettime(CLOCK_MONOTONIC, &start);
 -                      kvm_vm_clear_dirty_log(vm, PERF_TEST_MEM_SLOT_INDEX, bmap, 0,
 -                                             host_num_pages);
 -
 +                      clear_dirty_log(vm, p->slots, bmap, host_num_pages);
                        ts_diff = timespec_elapsed(start);
                        clear_dirty_log_total = timespec_add(clear_dirty_log_total,
                                                             ts_diff);
  
        /* Disable dirty logging */
        clock_gettime(CLOCK_MONOTONIC, &start);
 -      vm_mem_region_set_flags(vm, PERF_TEST_MEM_SLOT_INDEX, 0);
 +      disable_dirty_logging(vm, p->slots);
        ts_diff = timespec_elapsed(start);
        pr_info("Disabling dirty logging time: %ld.%.9lds\n",
                ts_diff.tv_sec, ts_diff.tv_nsec);
@@@ -290,8 -244,7 +290,8 @@@ static void help(char *name
  {
        puts("");
        printf("usage: %s [-h] [-i iterations] [-p offset] "
 -             "[-m mode] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]\n", name);
 +             "[-m mode] [-b vcpu bytes] [-v vcpus] [-o] [-s mem type]"
 +             "[-x memslots]\n", name);
        puts("");
        printf(" -i: specify iteration counts (default: %"PRIu64")\n",
               TEST_HOST_LOOP_N);
               "     them into a separate region of memory for each vCPU.\n");
        printf(" -s: specify the type of memory that should be used to\n"
               "     back the guest data region.\n\n");
 +      printf(" -x: Split the memory region into this number of memslots.\n"
 +             "     (default: 1)");
        backing_src_help();
        puts("");
        exit(0);
@@@ -325,7 -276,6 +325,7 @@@ int main(int argc, char *argv[]
                .wr_fract = 1,
                .partition_vcpu_memory_access = true,
                .backing_src = VM_MEM_SRC_ANONYMOUS,
 +              .slots = 1,
        };
        int opt;
  
  
        guest_modes_append_default();
  
 -      while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:os:")) != -1) {
 +      while ((opt = getopt(argc, argv, "hi:p:m:b:f:v:os:x:")) != -1) {
                switch (opt) {
                case 'i':
                        p.iterations = atoi(optarg);
                case 's':
                        p.backing_src = parse_backing_src_type(optarg);
                        break;
 +              case 'x':
 +                      p.slots = atoi(optarg);
 +                      break;
                case 'h':
                default:
                        help(argv[0]);