Merge tag 'powerpc-5.5-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc...
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 30 Nov 2019 22:35:43 +0000 (14:35 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 30 Nov 2019 22:35:43 +0000 (14:35 -0800)
Pull powerpc updates from Michael Ellerman:
 "Highlights:

   - Infrastructure for secure boot on some bare metal Power9 machines.
     The firmware support is still in development, so the code here
     won't actually activate secure boot on any existing systems.

   - A change to xmon (our crash handler / pseudo-debugger) to restrict
     it to read-only mode when the kernel is lockdown'ed, otherwise it's
     trivial to drop into xmon and modify kernel data, such as the
     lockdown state.

   - Support for KASLR on 32-bit BookE machines (Freescale / NXP).

   - Fixes for our flush_icache_range() and __kernel_sync_dicache()
     (VDSO) to work with memory ranges >4GB.

   - Some reworks of the pseries CMM (Cooperative Memory Management)
     driver to make it behave more like other balloon drivers and enable
     some cleanups of generic mm code.

   - A series of fixes to our hardware breakpoint support to properly
     handle unaligned watchpoint addresses.

  Plus a bunch of other smaller improvements, fixes and cleanups.

  Thanks to: Alastair D'Silva, Andrew Donnellan, Aneesh Kumar K.V,
  Anthony Steinhauser, Cédric Le Goater, Chris Packham, Chris Smart,
  Christophe Leroy, Christopher M. Riedl, Christoph Hellwig, Claudio
  Carvalho, Daniel Axtens, David Hildenbrand, Deb McLemore, Diana
  Craciun, Eric Richter, Geert Uytterhoeven, Greg Kroah-Hartman, Greg
  Kurz, Gustavo L. F. Walbon, Hari Bathini, Harish, Jason Yan, Krzysztof
  Kozlowski, Leonardo Bras, Mathieu Malaterre, Mauro S. M. Rodrigues,
  Michal Suchanek, Mimi Zohar, Nathan Chancellor, Nathan Lynch, Nayna
  Jain, Nick Desaulniers, Oliver O'Halloran, Qian Cai, Rasmus Villemoes,
  Ravi Bangoria, Sam Bobroff, Santosh Sivaraj, Scott Wood, Thomas Huth,
  Tyrel Datwyler, Vaibhav Jain, Valentin Longchamp, YueHaibing"

* tag 'powerpc-5.5-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (144 commits)
  powerpc/fixmap: fix crash with HIGHMEM
  x86/efi: remove unused variables
  powerpc: Define arch_is_kernel_initmem_freed() for lockdep
  powerpc/prom_init: Use -ffreestanding to avoid a reference to bcmp
  powerpc: Avoid clang warnings around setjmp and longjmp
  powerpc: Don't add -mabi= flags when building with Clang
  powerpc: Fix Kconfig indentation
  powerpc/fixmap: don't clear fixmap area in paging_init()
  selftests/powerpc: spectre_v2 test must be built 64-bit
  powerpc/powernv: Disable native PCIe port management
  powerpc/kexec: Move kexec files into a dedicated subdir.
  powerpc/32: Split kexec low level code out of misc_32.S
  powerpc/sysdev: drop simple gpio
  powerpc/83xx: map IMMR with a BAT.
  powerpc/32s: automatically allocate BAT in setbat()
  powerpc/ioremap: warn on early use of ioremap()
  powerpc: Add support for GENERIC_EARLY_IOREMAP
  powerpc/fixmap: Use __fix_to_virt() instead of fix_to_virt()
  powerpc/8xx: use the fixmapped IMMR in cpm_reset()
  powerpc/8xx: add __init to cpm1 init functions
  ...

225 files changed:
Documentation/ABI/testing/ima_policy
Documentation/ABI/testing/sysfs-secvar [new file with mode: 0644]
Documentation/devicetree/bindings/board/fsl-board.txt
Documentation/powerpc/index.rst
Documentation/powerpc/kaslr-booke32.rst [new file with mode: 0644]
arch/powerpc/Kbuild
arch/powerpc/Kconfig
arch/powerpc/Kconfig.debug
arch/powerpc/Makefile
arch/powerpc/boot/dts/fsl/kmcent2.dts
arch/powerpc/configs/40x/acadia_defconfig
arch/powerpc/configs/40x/ep405_defconfig
arch/powerpc/configs/40x/kilauea_defconfig
arch/powerpc/configs/40x/makalu_defconfig
arch/powerpc/configs/40x/obs600_defconfig
arch/powerpc/configs/40x/walnut_defconfig
arch/powerpc/configs/44x/akebono_defconfig
arch/powerpc/configs/44x/arches_defconfig
arch/powerpc/configs/44x/bamboo_defconfig
arch/powerpc/configs/44x/canyonlands_defconfig
arch/powerpc/configs/44x/currituck_defconfig
arch/powerpc/configs/44x/ebony_defconfig
arch/powerpc/configs/44x/eiger_defconfig
arch/powerpc/configs/44x/fsp2_defconfig
arch/powerpc/configs/44x/icon_defconfig
arch/powerpc/configs/44x/iss476-smp_defconfig
arch/powerpc/configs/44x/katmai_defconfig
arch/powerpc/configs/44x/rainier_defconfig
arch/powerpc/configs/44x/redwood_defconfig
arch/powerpc/configs/44x/sam440ep_defconfig
arch/powerpc/configs/44x/sequoia_defconfig
arch/powerpc/configs/44x/taishan_defconfig
arch/powerpc/configs/52xx/pcm030_defconfig
arch/powerpc/configs/83xx/kmeter1_defconfig
arch/powerpc/configs/83xx/mpc837x_rdb_defconfig
arch/powerpc/configs/85xx/ge_imp3a_defconfig
arch/powerpc/configs/adder875_defconfig
arch/powerpc/configs/amigaone_defconfig
arch/powerpc/configs/cell_defconfig
arch/powerpc/configs/chrp32_defconfig
arch/powerpc/configs/corenet_base.config [new file with mode: 0644]
arch/powerpc/configs/corenet_basic_defconfig [deleted file]
arch/powerpc/configs/debug.config [new file with mode: 0644]
arch/powerpc/configs/ep88xc_defconfig
arch/powerpc/configs/gamecube_defconfig
arch/powerpc/configs/mpc512x_defconfig
arch/powerpc/configs/mpc5200_defconfig
arch/powerpc/configs/mpc85xx_base.config [new file with mode: 0644]
arch/powerpc/configs/mpc85xx_basic_defconfig [deleted file]
arch/powerpc/configs/mpc86xx_base.config [new file with mode: 0644]
arch/powerpc/configs/mpc86xx_basic_defconfig [deleted file]
arch/powerpc/configs/mpc885_ads_defconfig
arch/powerpc/configs/pmac32_defconfig
arch/powerpc/configs/powernv_defconfig
arch/powerpc/configs/ppc44x_defconfig
arch/powerpc/configs/ppc6xx_defconfig
arch/powerpc/configs/ps3_defconfig
arch/powerpc/configs/skiroot_defconfig
arch/powerpc/configs/storcenter_defconfig
arch/powerpc/configs/tqm8xx_defconfig
arch/powerpc/configs/wii_defconfig
arch/powerpc/crypto/crc-vpmsum_test.c
arch/powerpc/include/asm/Kbuild
arch/powerpc/include/asm/book3s/64/pgalloc.h
arch/powerpc/include/asm/book3s/64/tlbflush.h
arch/powerpc/include/asm/bug.h
arch/powerpc/include/asm/cache.h
arch/powerpc/include/asm/cacheflush.h
arch/powerpc/include/asm/dma-mapping.h [deleted file]
arch/powerpc/include/asm/fixmap.h
arch/powerpc/include/asm/hw_breakpoint.h
arch/powerpc/include/asm/hw_irq.h
arch/powerpc/include/asm/nohash/32/kup-8xx.h
arch/powerpc/include/asm/nohash/mmu-book3e.h
arch/powerpc/include/asm/opal-api.h
arch/powerpc/include/asm/opal.h
arch/powerpc/include/asm/page.h
arch/powerpc/include/asm/pgtable.h
arch/powerpc/include/asm/reg.h
arch/powerpc/include/asm/reg_8xx.h
arch/powerpc/include/asm/sections.h
arch/powerpc/include/asm/secure_boot.h [new file with mode: 0644]
arch/powerpc/include/asm/security_features.h
arch/powerpc/include/asm/secvar.h [new file with mode: 0644]
arch/powerpc/include/uapi/asm/spu_info.h
arch/powerpc/kernel/Makefile
arch/powerpc/kernel/cpu_setup_fsl_booke.S
arch/powerpc/kernel/crash.c [deleted file]
arch/powerpc/kernel/dawr.c
arch/powerpc/kernel/early_32.c
arch/powerpc/kernel/eeh_driver.c
arch/powerpc/kernel/eeh_sysfs.c
arch/powerpc/kernel/exceptions-64e.S
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/fadump.c
arch/powerpc/kernel/fsl_booke_entry_mapping.S
arch/powerpc/kernel/head_fsl_booke.S
arch/powerpc/kernel/hw_breakpoint.c
arch/powerpc/kernel/ima_arch.c [new file with mode: 0644]
arch/powerpc/kernel/ima_kexec.c [deleted file]
arch/powerpc/kernel/kexec_elf_64.c [deleted file]
arch/powerpc/kernel/machine_kexec.c [deleted file]
arch/powerpc/kernel/machine_kexec_32.c [deleted file]
arch/powerpc/kernel/machine_kexec_64.c [deleted file]
arch/powerpc/kernel/machine_kexec_file_64.c [deleted file]
arch/powerpc/kernel/misc_32.S
arch/powerpc/kernel/misc_64.S
arch/powerpc/kernel/process.c
arch/powerpc/kernel/prom_init.c
arch/powerpc/kernel/ptrace.c
arch/powerpc/kernel/secure_boot.c [new file with mode: 0644]
arch/powerpc/kernel/security.c
arch/powerpc/kernel/secvar-ops.c [new file with mode: 0644]
arch/powerpc/kernel/secvar-sysfs.c [new file with mode: 0644]
arch/powerpc/kernel/setup-common.c
arch/powerpc/kernel/setup_32.c
arch/powerpc/kernel/setup_64.c
arch/powerpc/kernel/time.c
arch/powerpc/kernel/traps.c
arch/powerpc/kernel/udbg.c
arch/powerpc/kernel/vdso64/cacheflush.S
arch/powerpc/kexec/Makefile [new file with mode: 0644]
arch/powerpc/kexec/core.c [new file with mode: 0644]
arch/powerpc/kexec/core_32.c [new file with mode: 0644]
arch/powerpc/kexec/core_64.c [new file with mode: 0644]
arch/powerpc/kexec/crash.c [new file with mode: 0644]
arch/powerpc/kexec/elf_64.c [new file with mode: 0644]
arch/powerpc/kexec/file_load.c [new file with mode: 0644]
arch/powerpc/kexec/ima.c [new file with mode: 0644]
arch/powerpc/kexec/relocate_32.S [new file with mode: 0644]
arch/powerpc/mm/book3s32/mmu.c
arch/powerpc/mm/book3s64/hash_native.c
arch/powerpc/mm/book3s64/hash_utils.c
arch/powerpc/mm/book3s64/pkeys.c
arch/powerpc/mm/book3s64/radix_tlb.c
arch/powerpc/mm/fault.c
arch/powerpc/mm/init-common.c
arch/powerpc/mm/init_32.c
arch/powerpc/mm/init_64.c
arch/powerpc/mm/ioremap_32.c
arch/powerpc/mm/ioremap_64.c
arch/powerpc/mm/mem.c
arch/powerpc/mm/mmu_decl.h
arch/powerpc/mm/nohash/8xx.c
arch/powerpc/mm/nohash/Makefile
arch/powerpc/mm/nohash/fsl_booke.c
arch/powerpc/mm/nohash/kaslr_booke.c [new file with mode: 0644]
arch/powerpc/mm/pgtable_32.c
arch/powerpc/perf/callchain.c
arch/powerpc/platforms/83xx/misc.c
arch/powerpc/platforms/83xx/mpc836x_mds.c
arch/powerpc/platforms/85xx/common.c
arch/powerpc/platforms/85xx/corenet_generic.c
arch/powerpc/platforms/85xx/mpc85xx.h
arch/powerpc/platforms/85xx/mpc85xx_mds.c
arch/powerpc/platforms/85xx/mpc85xx_rdb.c
arch/powerpc/platforms/85xx/twr_p102x.c
arch/powerpc/platforms/86xx/mpc8610_hpcd.c
arch/powerpc/platforms/8xx/cpm1.c
arch/powerpc/platforms/8xx/pic.c
arch/powerpc/platforms/Kconfig
arch/powerpc/platforms/Kconfig.cputype
arch/powerpc/platforms/cell/spufs/inode.c
arch/powerpc/platforms/powernv/Makefile
arch/powerpc/platforms/powernv/opal-call.c
arch/powerpc/platforms/powernv/opal-powercap.c
arch/powerpc/platforms/powernv/opal-psr.c
arch/powerpc/platforms/powernv/opal-secvar.c [new file with mode: 0644]
arch/powerpc/platforms/powernv/opal-sensor-groups.c
arch/powerpc/platforms/powernv/opal.c
arch/powerpc/platforms/powernv/pci-ioda-tce.c
arch/powerpc/platforms/powernv/pci-ioda.c
arch/powerpc/platforms/powernv/pci.c
arch/powerpc/platforms/pseries/Kconfig
arch/powerpc/platforms/pseries/cmm.c
arch/powerpc/platforms/pseries/hotplug-cpu.c
arch/powerpc/platforms/pseries/hotplug-memory.c
arch/powerpc/platforms/pseries/lpar.c
arch/powerpc/platforms/pseries/of_helpers.c
arch/powerpc/platforms/pseries/papr_scm.c
arch/powerpc/platforms/pseries/pci_dlpar.c
arch/powerpc/platforms/pseries/pseries_energy.c
arch/powerpc/platforms/pseries/ras.c
arch/powerpc/sysdev/Makefile
arch/powerpc/sysdev/simple_gpio.c [deleted file]
arch/powerpc/sysdev/simple_gpio.h [deleted file]
arch/powerpc/sysdev/xive/common.c
arch/powerpc/tools/relocs_check.sh
arch/powerpc/tools/unrel_branch_check.sh
arch/powerpc/xmon/Makefile
arch/powerpc/xmon/xmon.c
certs/blacklist.c
drivers/macintosh/ans-lcd.c
drivers/pci/hotplug/rpaphp_core.c
include/asm-generic/tlb.h
include/keys/system_keyring.h
include/linux/ima.h
include/linux/security.h
include/uapi/linux/magic.h
security/integrity/Kconfig
security/integrity/Makefile
security/integrity/ima/ima.h
security/integrity/ima/ima_appraise.c
security/integrity/ima/ima_main.c
security/integrity/ima/ima_policy.c
security/integrity/integrity.h
security/integrity/platform_certs/keyring_handler.c [new file with mode: 0644]
security/integrity/platform_certs/keyring_handler.h [new file with mode: 0644]
security/integrity/platform_certs/load_powerpc.c [new file with mode: 0644]
security/integrity/platform_certs/load_uefi.c
security/lockdown/lockdown.c
tools/testing/selftests/powerpc/include/utils.h
tools/testing/selftests/powerpc/pmu/ebb/Makefile
tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
tools/testing/selftests/powerpc/security/Makefile
tools/testing/selftests/powerpc/security/branch_loops.S [new file with mode: 0644]
tools/testing/selftests/powerpc/security/spectre_v2.c [new file with mode: 0644]
tools/testing/selftests/powerpc/signal/sigfuz.c
tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c
tools/testing/selftests/powerpc/utils.c

index 29ebe9a..29aaedf 100644 (file)
@@ -25,6 +25,7 @@ Description:
                        lsm:    [[subj_user=] [subj_role=] [subj_type=]
                                 [obj_user=] [obj_role=] [obj_type=]]
                        option: [[appraise_type=]] [template=] [permit_directio]
+                               [appraise_flag=]
                base:   func:= [BPRM_CHECK][MMAP_CHECK][CREDS_CHECK][FILE_CHECK][MODULE_CHECK]
                                [FIRMWARE_CHECK]
                                [KEXEC_KERNEL_CHECK] [KEXEC_INITRAMFS_CHECK]
@@ -38,6 +39,9 @@ Description:
                        fowner:= decimal value
                lsm:    are LSM specific
                option: appraise_type:= [imasig] [imasig|modsig]
+                       appraise_flag:= [check_blacklist]
+                       Currently, blacklist check is only for files signed with appended
+                       signature.
                        template:= name of a defined IMA template type
                        (eg, ima-ng). Only valid when action is "measure".
                        pcr:= decimal value
diff --git a/Documentation/ABI/testing/sysfs-secvar b/Documentation/ABI/testing/sysfs-secvar
new file mode 100644 (file)
index 0000000..feebb8c
--- /dev/null
@@ -0,0 +1,46 @@
+What:          /sys/firmware/secvar
+Date:          August 2019
+Contact:       Nayna Jain <nayna@linux.ibm.com>
+Description:   This directory is created if the POWER firmware supports OS
+               secureboot, thereby secure variables. It exposes interface
+               for reading/writing the secure variables
+
+What:          /sys/firmware/secvar/vars
+Date:          August 2019
+Contact:       Nayna Jain <nayna@linux.ibm.com>
+Description:   This directory lists all the secure variables that are supported
+               by the firmware.
+
+What:          /sys/firmware/secvar/format
+Date:          August 2019
+Contact:       Nayna Jain <nayna@linux.ibm.com>
+Description:   A string indicating which backend is in use by the firmware.
+               This determines the format of the variable and the accepted
+               format of variable updates.
+
+What:          /sys/firmware/secvar/vars/<variable name>
+Date:          August 2019
+Contact:       Nayna Jain <nayna@linux.ibm.com>
+Description:   Each secure variable is represented as a directory named as
+               <variable_name>. The variable name is unique and is in ASCII
+               representation. The data and size can be determined by reading
+               their respective attribute files.
+
+What:          /sys/firmware/secvar/vars/<variable_name>/size
+Date:          August 2019
+Contact:       Nayna Jain <nayna@linux.ibm.com>
+Description:   An integer representation of the size of the content of the
+               variable. In other words, it represents the size of the data.
+
+What:          /sys/firmware/secvar/vars/<variable_name>/data
+Date:          August 2019
+Contact:       Nayna Jain h<nayna@linux.ibm.com>
+Description:   A read-only file containing the value of the variable. The size
+               of the file represents the maximum size of the variable data.
+
+What:          /sys/firmware/secvar/vars/<variable_name>/update
+Date:          August 2019
+Contact:       Nayna Jain <nayna@linux.ibm.com>
+Description:   A write-only file that is used to submit the new value for the
+               variable. The size of the file represents the maximum size of
+               the variable data that can be written.
index eb52f6b..9cde570 100644 (file)
@@ -47,36 +47,6 @@ Example (LS2080A-RDB):
                 reg = <0x3 0 0x10000>;
         };
 
-* Freescale BCSR GPIO banks
-
-Some BCSR registers act as simple GPIO controllers, each such
-register can be represented by the gpio-controller node.
-
-Required properities:
-- compatible : Should be "fsl,<board>-bcsr-gpio".
-- reg : Should contain the address and the length of the GPIO bank
-  register.
-- #gpio-cells : Should be two. The first cell is the pin number and the
-  second cell is used to specify optional parameters (currently unused).
-- gpio-controller : Marks the port as GPIO controller.
-
-Example:
-
-       bcsr@1,0 {
-               #address-cells = <1>;
-               #size-cells = <1>;
-               compatible = "fsl,mpc8360mds-bcsr";
-               reg = <1 0 0x8000>;
-               ranges = <0 1 0 0x8000>;
-
-               bcsr13: gpio-controller@d {
-                       #gpio-cells = <2>;
-                       compatible = "fsl,mpc8360mds-bcsr-gpio";
-                       reg = <0xd 1>;
-                       gpio-controller;
-               };
-       };
-
 * Freescale on-board FPGA connected on I2C bus
 
 Some Freescale boards like BSC9132QDS have on board FPGA connected on
index db7b6a8..ba5edb3 100644 (file)
@@ -19,6 +19,7 @@ powerpc
     firmware-assisted-dump
     hvcs
     isa-versions
+    kaslr-booke32
     mpc52xx
     pci_iov_resource_on_powernv
     pmu-ebb
diff --git a/Documentation/powerpc/kaslr-booke32.rst b/Documentation/powerpc/kaslr-booke32.rst
new file mode 100644 (file)
index 0000000..8b259fd
--- /dev/null
@@ -0,0 +1,42 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+KASLR for Freescale BookE32
+===========================
+
+The word KASLR stands for Kernel Address Space Layout Randomization.
+
+This document tries to explain the implementation of the KASLR for
+Freescale BookE32. KASLR is a security feature that deters exploit
+attempts relying on knowledge of the location of kernel internals.
+
+Since CONFIG_RELOCATABLE has already supported, what we need to do is
+map or copy kernel to a proper place and relocate. Freescale Book-E
+parts expect lowmem to be mapped by fixed TLB entries(TLB1). The TLB1
+entries are not suitable to map the kernel directly in a randomized
+region, so we chose to copy the kernel to a proper place and restart to
+relocate.
+
+Entropy is derived from the banner and timer base, which will change every
+build and boot. This not so much safe so additionally the bootloader may
+pass entropy via the /chosen/kaslr-seed node in device tree.
+
+We will use the first 512M of the low memory to randomize the kernel
+image. The memory will be split in 64M zones. We will use the lower 8
+bit of the entropy to decide the index of the 64M zone. Then we chose a
+16K aligned offset inside the 64M zone to put the kernel in::
+
+    KERNELBASE
+
+        |-->   64M   <--|
+        |               |
+        +---------------+    +----------------+---------------+
+        |               |....|    |kernel|    |               |
+        +---------------+    +----------------+---------------+
+        |                         |
+        |----->   offset    <-----|
+
+                              kernstart_virt_addr
+
+To enable KASLR, set CONFIG_RANDOMIZE_BASE = y. If KASLR is enable and you
+want to disable it at runtime, add "nokaslr" to the kernel cmdline.
index 51e6908..5e2f9ea 100644 (file)
@@ -14,4 +14,5 @@ obj-$(CONFIG_XMON) += xmon/
 obj-$(CONFIG_KVM)  += kvm/
 
 obj-$(CONFIG_PERF_EVENTS) += perf/
+obj-$(CONFIG_KEXEC_CORE)  += kexec/
 obj-$(CONFIG_KEXEC_FILE)  += purgatory/
index 3e56c9c..e446bb5 100644 (file)
@@ -161,6 +161,7 @@ config PPC
        select GENERIC_CMOS_UPDATE
        select GENERIC_CPU_AUTOPROBE
        select GENERIC_CPU_VULNERABILITIES      if PPC_BARRIER_NOSPEC
+       select GENERIC_EARLY_IOREMAP
        select GENERIC_IRQ_SHOW
        select GENERIC_IRQ_SHOW_LEVEL
        select GENERIC_PCI_IOMAP                if PCI
@@ -551,6 +552,17 @@ config RELOCATABLE
          setting can still be useful to bootwrappers that need to know the
          load address of the kernel (eg. u-boot/mkimage).
 
+config RANDOMIZE_BASE
+       bool "Randomize the address of the kernel image"
+       depends on (FSL_BOOKE && FLATMEM && PPC32)
+       depends on RELOCATABLE
+       help
+         Randomizes the virtual address at which the kernel image is
+         loaded, as a security feature that deters exploit attempts
+         relying on knowledge of the location of kernel internals.
+
+         If unsure, say Y.
+
 config RELOCATABLE_TEST
        bool "Test relocatable kernel"
        depends on (PPC64 && RELOCATABLE)
@@ -874,15 +886,33 @@ config CMDLINE
          some command-line options at build time by entering them here.  In
          most cases you will need to specify the root device here.
 
+choice
+       prompt "Kernel command line type" if CMDLINE != ""
+       default CMDLINE_FROM_BOOTLOADER
+
+config CMDLINE_FROM_BOOTLOADER
+       bool "Use bootloader kernel arguments if available"
+       help
+         Uses the command-line options passed by the boot loader. If
+         the boot loader doesn't provide any, the default kernel command
+         string provided in CMDLINE will be used.
+
+config CMDLINE_EXTEND
+       bool "Extend bootloader kernel arguments"
+       help
+         The command-line arguments provided by the boot loader will be
+         appended to the default kernel command string.
+
 config CMDLINE_FORCE
        bool "Always use the default kernel command string"
-       depends on CMDLINE_BOOL
        help
          Always use the default kernel command string, even if the boot
          loader passes other arguments to the kernel.
          This is useful if you cannot or don't want to change the
          command-line options your boot loader passes to the kernel.
 
+endchoice
+
 config EXTRA_TARGETS
        string "Additional default image types"
        help
@@ -934,6 +964,28 @@ config PPC_MEM_KEYS
 
          If unsure, say y.
 
+config PPC_SECURE_BOOT
+       prompt "Enable secure boot support"
+       bool
+       depends on PPC_POWERNV
+       depends on IMA_ARCH_POLICY
+       help
+         Systems with firmware secure boot enabled need to define security
+         policies to extend secure boot to the OS. This config allows a user
+         to enable OS secure boot on systems that have firmware support for
+         it. If in doubt say N.
+
+config PPC_SECVAR_SYSFS
+       bool "Enable sysfs interface for POWER secure variables"
+       default y
+       depends on PPC_SECURE_BOOT
+       depends on SYSFS
+       help
+         POWER secure variables are managed and controlled by firmware.
+         These variables are exposed to userspace via sysfs to enable
+         read/write operations on these variables. Say Y if you have
+         secure boot enabled and want to expose variables to userspace.
+
 endmenu
 
 config ISA_DMA_API
index c599209..4e1d398 100644 (file)
@@ -122,8 +122,8 @@ config XMON_DEFAULT_RO_MODE
        depends on XMON
        default y
        help
-          Operate xmon in read-only mode. The cmdline options 'xmon=rw' and
-          'xmon=ro' override this default.
+         Operate xmon in read-only mode. The cmdline options 'xmon=rw' and
+         'xmon=ro' override this default.
 
 config DEBUGGER
        bool
@@ -222,7 +222,7 @@ config PPC_EARLY_DEBUG_44x
        help
          Select this to enable early debugging for IBM 44x chips via the
          inbuilt serial port.  If you enable this, ensure you set
-          PPC_EARLY_DEBUG_44x_PHYSLOW below to suit your target board.
+         PPC_EARLY_DEBUG_44x_PHYSLOW below to suit your target board.
 
 config PPC_EARLY_DEBUG_40x
        bool "Early serial debugging for IBM/AMCC 40x CPUs"
@@ -325,7 +325,7 @@ config PPC_EARLY_DEBUG_44x_PHYSLOW
        default "0x40000200"
        help
          You probably want 0x40000200 for ebony boards and
-          0x40000300 for taishan
+         0x40000300 for taishan
 
 config PPC_EARLY_DEBUG_44x_PHYSHIGH
        hex "EPRN of early debug UART physical address"
@@ -359,9 +359,9 @@ config FAIL_IOMMU
          If you are unsure, say N.
 
 config PPC_PTDUMP
-        bool "Export kernel pagetable layout to userspace via debugfs"
-        depends on DEBUG_KERNEL && DEBUG_FS
-        help
+       bool "Export kernel pagetable layout to userspace via debugfs"
+       depends on DEBUG_KERNEL && DEBUG_FS
+       help
          This option exports the state of the kernel pagetables to a
          debugfs file. This is only useful for kernel developers who are
          working in architecture specific areas of the kernel - probably
@@ -390,8 +390,8 @@ config PPC_DEBUG_WX
 
 config PPC_FAST_ENDIAN_SWITCH
        bool "Deprecated fast endian-switch syscall"
-        depends on DEBUG_KERNEL && PPC_BOOK3S_64
-        help
+       depends on DEBUG_KERNEL && PPC_BOOK3S_64
+       help
          If you're unsure what this is, say N.
 
 config KASAN_SHADOW_OFFSET
index 83522c9..f357305 100644 (file)
@@ -91,11 +91,13 @@ MULTIPLEWORD        := -mmultiple
 endif
 
 ifdef CONFIG_PPC64
+ifndef CONFIG_CC_IS_CLANG
 cflags-$(CONFIG_CPU_BIG_ENDIAN)                += $(call cc-option,-mabi=elfv1)
 cflags-$(CONFIG_CPU_BIG_ENDIAN)                += $(call cc-option,-mcall-aixdesc)
 aflags-$(CONFIG_CPU_BIG_ENDIAN)                += $(call cc-option,-mabi=elfv1)
 aflags-$(CONFIG_CPU_LITTLE_ENDIAN)     += -mabi=elfv2
 endif
+endif
 
 ifndef CONFIG_CC_IS_CLANG
   cflags-$(CONFIG_CPU_LITTLE_ENDIAN)   += -mno-strict-align
@@ -141,6 +143,7 @@ endif
 endif
 
 CFLAGS-$(CONFIG_PPC64) := $(call cc-option,-mtraceback=no)
+ifndef CONFIG_CC_IS_CLANG
 ifdef CONFIG_CPU_LITTLE_ENDIAN
 CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc))
 AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2)
@@ -149,6 +152,7 @@ CFLAGS-$(CONFIG_PPC64)      += $(call cc-option,-mabi=elfv1)
 CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcall-aixdesc)
 AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1)
 endif
+endif
 CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc))
 CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions)
 
@@ -330,32 +334,32 @@ powernv_be_defconfig:
 
 PHONY += mpc85xx_defconfig
 mpc85xx_defconfig:
-       $(call merge_into_defconfig,mpc85xx_basic_defconfig,\
+       $(call merge_into_defconfig,mpc85xx_base.config,\
                85xx-32bit 85xx-hw fsl-emb-nonhw)
 
 PHONY += mpc85xx_smp_defconfig
 mpc85xx_smp_defconfig:
-       $(call merge_into_defconfig,mpc85xx_basic_defconfig,\
+       $(call merge_into_defconfig,mpc85xx_base.config,\
                85xx-32bit 85xx-smp 85xx-hw fsl-emb-nonhw)
 
 PHONY += corenet32_smp_defconfig
 corenet32_smp_defconfig:
-       $(call merge_into_defconfig,corenet_basic_defconfig,\
+       $(call merge_into_defconfig,corenet_base.config,\
                85xx-32bit 85xx-smp 85xx-hw fsl-emb-nonhw dpaa)
 
 PHONY += corenet64_smp_defconfig
 corenet64_smp_defconfig:
-       $(call merge_into_defconfig,corenet_basic_defconfig,\
+       $(call merge_into_defconfig,corenet_base.config,\
                85xx-64bit 85xx-smp altivec 85xx-hw fsl-emb-nonhw dpaa)
 
 PHONY += mpc86xx_defconfig
 mpc86xx_defconfig:
-       $(call merge_into_defconfig,mpc86xx_basic_defconfig,\
+       $(call merge_into_defconfig,mpc86xx_base.config,\
                86xx-hw fsl-emb-nonhw)
 
 PHONY += mpc86xx_smp_defconfig
 mpc86xx_smp_defconfig:
-       $(call merge_into_defconfig,mpc86xx_basic_defconfig,\
+       $(call merge_into_defconfig,mpc86xx_base.config,\
                86xx-smp 86xx-hw fsl-emb-nonhw)
 
 PHONY += ppc32_allmodconfig
index 48b7f97..8e7f082 100644 (file)
 
                fman@400000 {
                        ethernet@e0000 {
-                               fixed-link = <0 1 1000 0 0>;
-                               phy-connection-type = "sgmii";
+                               phy-mode = "sgmii";
+                               fixed-link {
+                                       speed = <1000>;
+                                       full-duplex;
+                               };
                        };
 
                        ethernet@e2000 {
-                               fixed-link = <1 1 1000 0 0>;
-                               phy-connection-type = "sgmii";
+                               phy-mode = "sgmii";
+                               fixed-link {
+                                       speed = <1000>;
+                                       full-duplex;
+                               };
                        };
 
                        ethernet@e4000 {
 
                        ethernet@e8000 {
                                phy-handle = <&front_phy>;
-                               phy-connection-type = "rgmii";
+                               phy-mode = "rgmii-id";
                        };
 
                        mdio0: mdio@fc000 {
 
        pci1: pcie@ffe250000 {
                status = "disabled";
+               reg = <0xf 0xfe250000 0 0x10000>;
+               ranges = <0x02000000 0 0xe0000000 0xc 0x10000000 0 0x10000000
+                         0x01000000 0 0 0xf 0xf8010000 0 0x00010000>;
+               pcie@0 {
+                       ranges = <0x02000000 0 0xe0000000
+                                 0x02000000 0 0xe0000000
+                                 0 0x10000000
+
+                                 0x01000000 0 0x00000000
+                                 0x01000000 0 0x00000000
+                                 0 0x00010000>;
+               };
        };
 
        pci2: pcie@ffe260000 {
                status = "disabled";
+               reg = <0xf 0xfe260000 0 0x10000>;
+               ranges = <0x02000000 0 0xe0000000 0xc 0x20000000 0 0x10000000
+                         0x01000000 0 0x00000000 0xf 0xf8020000 0 0x00010000>;
+               pcie@0 {
+                       ranges = <0x02000000 0 0xe0000000
+                                 0x02000000 0 0xe0000000
+                                 0 0x10000000
+
+                                 0x01000000 0 0x00000000
+                                 0x01000000 0 0x00000000
+                                 0 0x00010000>;
+               };
        };
 
        pci3: pcie@ffe270000 {
                status = "disabled";
+               reg = <0xf 0xfe270000 0 0x10000>;
+               ranges = <0x02000000 0 0xe0000000 0xc 0x30000000 0 0x10000000
+                         0x01000000 0 0x00000000 0xf 0xf8030000 0 0x00010000>;
+               pcie@0 {
+                       ranges = <0x02000000 0 0xe0000000
+                                 0x02000000 0 0xe0000000
+                                 0 0x10000000
+
+                                 0x01000000 0 0x00000000
+                                 0x01000000 0 0x00000000
+                                 0 0x00010000>;
+               };
        };
 
        qe: qe@ffe140000 {
index 5a75e4f..db93c11 100644 (file)
@@ -18,9 +18,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
index e2691c5..a3854cf 100644 (file)
@@ -17,9 +17,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
index 949989e..edc2246 100644 (file)
@@ -20,9 +20,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
index 90b759b..188789b 100644 (file)
@@ -17,9 +17,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
index 881c300..5bf6af7 100644 (file)
@@ -20,9 +20,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
index 0ed4670..9eaaf1a 100644 (file)
@@ -15,9 +15,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
index 2fa553e..f0c8a07 100644 (file)
@@ -29,9 +29,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
index 5a1b9ee..82c6f49 100644 (file)
@@ -20,9 +20,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
index 22e1ef5..6792132 100644 (file)
@@ -18,9 +18,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_BLK_DEV_RAM=y
index 86f34ea..ccc14eb 100644 (file)
@@ -20,9 +20,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
index ce3ec5a..be76e06 100644 (file)
@@ -27,9 +27,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
index f67447c..93d2a4e 100644 (file)
@@ -16,9 +16,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
index 5dbd83a..1abaa63 100644 (file)
@@ -21,9 +21,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
index e49114f..e67fc04 100644 (file)
@@ -39,9 +39,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_VLAN_8021Q=m
 CONFIG_DEVTMPFS=y
index fa5378a..7d7ff84 100644 (file)
@@ -20,9 +20,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
index aae879c..fb5c73a 100644 (file)
@@ -29,9 +29,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
index 56eddca..c6dc144 100644 (file)
@@ -18,9 +18,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
index 369bfd2..c83ad03 100644 (file)
@@ -19,9 +19,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
index 8be95f6..640fe1d 100644 (file)
@@ -21,9 +21,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
index 974a4f0..ed02f12 100644 (file)
@@ -23,9 +23,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_BLK_DEV_LOOP=y
index 10e517b..2c0973d 100644 (file)
@@ -20,9 +20,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
index cd08f3d..a2d355c 100644 (file)
@@ -18,9 +18,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
index 303600f..fdb11da 100644 (file)
@@ -31,9 +31,6 @@ CONFIG_IP_MULTICAST=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
index d21b5cb..648c6b3 100644 (file)
@@ -25,9 +25,6 @@ CONFIG_UNIX=y
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_PNP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_TIPC=y
 CONFIG_BRIDGE=m
index dad53ef..cbcae2a 100644 (file)
@@ -22,9 +22,6 @@ CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_BLK_DEV_LOOP=y
index 920f373..f29c166 100644 (file)
@@ -60,7 +60,6 @@ CONFIG_SYN_COOKIES=y
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
-# CONFIG_INET_XFRM_MODE_BEET is not set
 CONFIG_INET6_AH=m
 CONFIG_INET6_IPCOMP=m
 CONFIG_IPV6_TUNNEL=m
index f7a803a..510f7fd 100644 (file)
@@ -22,9 +22,6 @@ CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_PNP=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
index cf94d28..f6d140f 100644 (file)
@@ -26,9 +26,6 @@ CONFIG_UNIX=y
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_NETFILTER=y
 # CONFIG_NETFILTER_ADVANCED is not set
index 2dd1b58..42fbc70 100644 (file)
@@ -51,11 +51,9 @@ CONFIG_IP_PNP_BOOTP=y
 CONFIG_IP_PNP_RARP=y
 CONFIG_NET_IPIP=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_BEET is not set
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
-# CONFIG_INET6_XFRM_MODE_BEET is not set
 # CONFIG_IPV6_SIT is not set
 CONFIG_IPV6_TUNNEL=m
 CONFIG_NETFILTER=y
index 9ff493d..502a75d 100644 (file)
@@ -27,9 +27,6 @@ CONFIG_UNIX=y
 CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_NETFILTER=y
 # CONFIG_NETFILTER_ADVANCED is not set
diff --git a/arch/powerpc/configs/corenet_base.config b/arch/powerpc/configs/corenet_base.config
new file mode 100644 (file)
index 0000000..b568d46
--- /dev/null
@@ -0,0 +1 @@
+CONFIG_CORENET_GENERIC=y
diff --git a/arch/powerpc/configs/corenet_basic_defconfig b/arch/powerpc/configs/corenet_basic_defconfig
deleted file mode 100644 (file)
index b568d46..0000000
+++ /dev/null
@@ -1 +0,0 @@
-CONFIG_CORENET_GENERIC=y
diff --git a/arch/powerpc/configs/debug.config b/arch/powerpc/configs/debug.config
new file mode 100644 (file)
index 0000000..a14ae1f
--- /dev/null
@@ -0,0 +1 @@
+CONFIG_SCOM_DEBUGFS=y
index b20bd0c..9c1bf60 100644 (file)
@@ -24,9 +24,6 @@ CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_PNP=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
index 85e73c3..24c0e0e 100644 (file)
@@ -29,9 +29,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_RARP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 # CONFIG_WIRELESS is not set
index 6203c10..1f3a045 100644 (file)
@@ -25,9 +25,6 @@ CONFIG_PACKET=y
 CONFIG_UNIX=y
 CONFIG_INET=y
 CONFIG_IP_PNP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 CONFIG_CAN=y
index 6f87a5c..83d8013 100644 (file)
@@ -15,7 +15,6 @@ CONFIG_PPC_MEDIA5200=y
 CONFIG_PPC_MPC5200_BUGFIX=y
 CONFIG_PPC_MPC5200_LPBFIFO=m
 # CONFIG_PPC_PMAC is not set
-CONFIG_SIMPLE_GPIO=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/powerpc/configs/mpc85xx_base.config b/arch/powerpc/configs/mpc85xx_base.config
new file mode 100644 (file)
index 0000000..b1593fe
--- /dev/null
@@ -0,0 +1,24 @@
+CONFIG_MATH_EMULATION=y
+CONFIG_MPC8536_DS=y
+CONFIG_MPC8540_ADS=y
+CONFIG_MPC8560_ADS=y
+CONFIG_MPC85xx_CDS=y
+CONFIG_MPC85xx_DS=y
+CONFIG_MPC85xx_MDS=y
+CONFIG_MPC85xx_RDB=y
+CONFIG_KSI8560=y
+CONFIG_MVME2500=y
+CONFIG_P1010_RDB=y
+CONFIG_P1022_DS=y
+CONFIG_P1022_RDK=y
+CONFIG_P1023_RDB=y
+CONFIG_TWR_P102x=y
+CONFIG_SBC8548=y
+CONFIG_SOCRATES=y
+CONFIG_STX_GP3=y
+CONFIG_TQM8540=y
+CONFIG_TQM8541=y
+CONFIG_TQM8548=y
+CONFIG_TQM8555=y
+CONFIG_TQM8560=y
+CONFIG_XES_MPC85xx=y
diff --git a/arch/powerpc/configs/mpc85xx_basic_defconfig b/arch/powerpc/configs/mpc85xx_basic_defconfig
deleted file mode 100644 (file)
index b1593fe..0000000
+++ /dev/null
@@ -1,24 +0,0 @@
-CONFIG_MATH_EMULATION=y
-CONFIG_MPC8536_DS=y
-CONFIG_MPC8540_ADS=y
-CONFIG_MPC8560_ADS=y
-CONFIG_MPC85xx_CDS=y
-CONFIG_MPC85xx_DS=y
-CONFIG_MPC85xx_MDS=y
-CONFIG_MPC85xx_RDB=y
-CONFIG_KSI8560=y
-CONFIG_MVME2500=y
-CONFIG_P1010_RDB=y
-CONFIG_P1022_DS=y
-CONFIG_P1022_RDK=y
-CONFIG_P1023_RDB=y
-CONFIG_TWR_P102x=y
-CONFIG_SBC8548=y
-CONFIG_SOCRATES=y
-CONFIG_STX_GP3=y
-CONFIG_TQM8540=y
-CONFIG_TQM8541=y
-CONFIG_TQM8548=y
-CONFIG_TQM8555=y
-CONFIG_TQM8560=y
-CONFIG_XES_MPC85xx=y
diff --git a/arch/powerpc/configs/mpc86xx_base.config b/arch/powerpc/configs/mpc86xx_base.config
new file mode 100644 (file)
index 0000000..67bd1fa
--- /dev/null
@@ -0,0 +1,11 @@
+CONFIG_PPC_86xx=y
+CONFIG_MPC8641_HPCN=y
+CONFIG_SBC8641D=y
+CONFIG_MPC8610_HPCD=y
+CONFIG_GEF_PPC9A=y
+CONFIG_GEF_SBC310=y
+CONFIG_GEF_SBC610=y
+CONFIG_MVME7100=y
+CONFIG_HIGHMEM=y
+CONFIG_KEXEC=y
+CONFIG_PROC_KCORE=y
diff --git a/arch/powerpc/configs/mpc86xx_basic_defconfig b/arch/powerpc/configs/mpc86xx_basic_defconfig
deleted file mode 100644 (file)
index 67bd1fa..0000000
+++ /dev/null
@@ -1,11 +0,0 @@
-CONFIG_PPC_86xx=y
-CONFIG_MPC8641_HPCN=y
-CONFIG_SBC8641D=y
-CONFIG_MPC8610_HPCD=y
-CONFIG_GEF_PPC9A=y
-CONFIG_GEF_SBC310=y
-CONFIG_GEF_SBC610=y
-CONFIG_MVME7100=y
-CONFIG_HIGHMEM=y
-CONFIG_KEXEC=y
-CONFIG_PROC_KCORE=y
index 285d506..0327a32 100644 (file)
@@ -23,9 +23,6 @@ CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_PNP=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 # CONFIG_FW_LOADER is not set
 CONFIG_MTD=y
index 4e6e95f..f492e7d 100644 (file)
@@ -38,8 +38,6 @@ CONFIG_IP_MULTICAST=y
 CONFIG_SYN_COOKIES=y
 CONFIG_INET_AH=y
 CONFIG_INET_ESP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
 # CONFIG_IPV6 is not set
 CONFIG_NETFILTER=y
 CONFIG_NF_CONNTRACK=m
index 6658cce..3284145 100644 (file)
@@ -83,9 +83,6 @@ CONFIG_INET_IPCOMP=m
 CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
-CONFIG_INET6_XFRM_MODE_TRANSPORT=m
-CONFIG_INET6_XFRM_MODE_TUNNEL=m
-CONFIG_INET6_XFRM_MODE_BEET=m
 CONFIG_IPV6_SIT=m
 CONFIG_NETFILTER=y
 # CONFIG_NETFILTER_ADVANCED is not set
index 6795281..a41eedf 100644 (file)
@@ -32,9 +32,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_BOOTP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 CONFIG_BRIDGE=m
 CONFIG_CONNECTOR=y
 CONFIG_MTD=y
index 9dca4cf..7e28919 100644 (file)
@@ -109,9 +109,6 @@ CONFIG_SYN_COOKIES=y
 CONFIG_INET_AH=m
 CONFIG_INET_ESP=m
 CONFIG_INET_IPCOMP=m
-CONFIG_INET_XFRM_MODE_TRANSPORT=m
-CONFIG_INET_XFRM_MODE_TUNNEL=m
-CONFIG_INET_XFRM_MODE_BEET=m
 CONFIG_INET_DIAG=m
 CONFIG_TCP_CONG_ADVANCED=y
 CONFIG_TCP_CONG_HSTCP=m
@@ -129,7 +126,6 @@ CONFIG_INET6_AH=m
 CONFIG_INET6_ESP=m
 CONFIG_INET6_IPCOMP=m
 CONFIG_IPV6_MIP6=m
-CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
 CONFIG_IPV6_TUNNEL=m
 CONFIG_IPV6_MULTIPLE_TABLES=y
 CONFIG_IPV6_SUBTREES=y
index 314c639..4db5171 100644 (file)
@@ -47,9 +47,6 @@ CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_INET_DIAG is not set
 CONFIG_BT=m
 CONFIG_BT_RFCOMM=m
index 1253482..069f67f 100644 (file)
@@ -46,6 +46,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
 CONFIG_CPU_IDLE=y
 CONFIG_HZ_100=y
 CONFIG_KEXEC=y
+CONFIG_PRESERVE_FA_DUMP=y
 CONFIG_IRQ_ALL_CPUS=y
 CONFIG_NUMA=y
 # CONFIG_COMPACTION is not set
@@ -63,9 +64,6 @@ CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_NET_IPIP=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 CONFIG_DNS_RESOLVER=y
 # CONFIG_WIRELESS is not set
 CONFIG_DEVTMPFS=y
index 6c39c52..29b19ec 100644 (file)
@@ -22,9 +22,6 @@ CONFIG_INET=y
 CONFIG_IP_MULTICAST=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 CONFIG_MTD=y
 CONFIG_MTD_CMDLINE_PARTS=y
index 7493f36..ffed2b4 100644 (file)
@@ -27,9 +27,6 @@ CONFIG_UNIX=y
 CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_SYN_COOKIES=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_IPV6 is not set
 # CONFIG_WIRELESS is not set
 # CONFIG_FW_LOADER is not set
index 5a04448..379c171 100644 (file)
@@ -29,9 +29,6 @@ CONFIG_INET=y
 CONFIG_IP_PNP=y
 CONFIG_IP_PNP_DHCP=y
 CONFIG_IP_PNP_RARP=y
-# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
-# CONFIG_INET_XFRM_MODE_TUNNEL is not set
-# CONFIG_INET_XFRM_MODE_BEET is not set
 # CONFIG_INET_DIAG is not set
 # CONFIG_IPV6 is not set
 CONFIG_BT=y
index 4798521..dce86e7 100644 (file)
@@ -103,6 +103,7 @@ static int __init crc_test_init(void)
                                       crc32, verify32, len);
                                break;
                        }
+               cond_resched();
                }
                pr_info("crc-vpmsum_test done, completed %lu iterations\n", i);
        } while (0);
index 64870c7..148bee2 100644 (file)
@@ -4,6 +4,7 @@ generated-y += syscall_table_64.h
 generated-y += syscall_table_c32.h
 generated-y += syscall_table_spu.h
 generic-y += div64.h
+generic-y += dma-mapping.h
 generic-y += export.h
 generic-y += irq_regs.h
 generic-y += local64.h
@@ -11,3 +12,4 @@ generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += vtime.h
 generic-y += msi.h
+generic-y += early_ioremap.h
index d5a4491..f6968c8 100644 (file)
@@ -122,11 +122,6 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
                                  unsigned long address)
 {
-       /*
-        * By now all the pud entries should be none entries. So go
-        * ahead and flush the page walk cache
-        */
-       flush_tlb_pgtable(tlb, address);
        pgtable_free_tlb(tlb, pud, PUD_INDEX);
 }
 
@@ -143,11 +138,6 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd)
 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd,
                                  unsigned long address)
 {
-       /*
-        * By now all the pud entries should be none entries. So go
-        * ahead and flush the page walk cache
-        */
-       flush_tlb_pgtable(tlb, address);
        return pgtable_free_tlb(tlb, pmd, PMD_INDEX);
 }
 
@@ -166,11 +156,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
 static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t table,
                                  unsigned long address)
 {
-       /*
-        * By now all the pud entries should be none entries. So go
-        * ahead and flush the page walk cache
-        */
-       flush_tlb_pgtable(tlb, address);
        pgtable_free_tlb(tlb, table, PTE_INDEX);
 }
 
index 7aa8195..dcb5c38 100644 (file)
@@ -147,22 +147,6 @@ static inline void flush_tlb_fix_spurious_fault(struct vm_area_struct *vma,
                flush_tlb_page(vma, address);
 }
 
-/*
- * flush the page walk cache for the address
- */
-static inline void flush_tlb_pgtable(struct mmu_gather *tlb, unsigned long address)
-{
-       /*
-        * Flush the page table walk cache on freeing a page table. We already
-        * have marked the upper/higher level page table entry none by now.
-        * So it is safe to flush PWC here.
-        */
-       if (!radix_enabled())
-               return;
-
-       radix__flush_tlb_pwc(tlb, address);
-}
-
 extern bool tlbie_capable;
 extern bool tlbie_enabled;
 
index f47e6ff..338f36c 100644 (file)
        ".previous\n"
 #endif
 
+#define BUG_ENTRY(insn, flags, ...)                    \
+       __asm__ __volatile__(                           \
+               "1:     " insn "\n"                     \
+               _EMIT_BUG_ENTRY                         \
+               : : "i" (__FILE__), "i" (__LINE__),     \
+                 "i" (flags),                          \
+                 "i" (sizeof(struct bug_entry)),       \
+                 ##__VA_ARGS__)
+
 /*
  * BUG_ON() and WARN_ON() do their best to cooperate with compile-time
  * optimisations. However depending on the complexity of the condition
  */
 
 #define BUG() do {                                             \
-       __asm__ __volatile__(                                   \
-               "1:     twi 31,0,0\n"                           \
-               _EMIT_BUG_ENTRY                                 \
-               : : "i" (__FILE__), "i" (__LINE__),             \
-                   "i" (0), "i"  (sizeof(struct bug_entry)));  \
+       BUG_ENTRY("twi 31, 0, 0", 0);                           \
        unreachable();                                          \
 } while (0)
 
                if (x)                                          \
                        BUG();                                  \
        } else {                                                \
-               __asm__ __volatile__(                           \
-               "1:     "PPC_TLNEI"     %4,0\n"                 \
-               _EMIT_BUG_ENTRY                                 \
-               : : "i" (__FILE__), "i" (__LINE__), "i" (0),    \
-                 "i" (sizeof(struct bug_entry)),               \
-                 "r" ((__force long)(x)));                     \
+               BUG_ENTRY(PPC_TLNEI " %4, 0", 0, "r" ((__force long)(x)));      \
        }                                                       \
 } while (0)
 
-#define __WARN_FLAGS(flags) do {                               \
-       __asm__ __volatile__(                                   \
-               "1:     twi 31,0,0\n"                           \
-               _EMIT_BUG_ENTRY                                 \
-               : : "i" (__FILE__), "i" (__LINE__),             \
-                 "i" (BUGFLAG_WARNING|(flags)),                \
-                 "i" (sizeof(struct bug_entry)));              \
-} while (0)
+#define __WARN_FLAGS(flags) BUG_ENTRY("twi 31, 0, 0", BUGFLAG_WARNING | (flags))
 
 #define WARN_ON(x) ({                                          \
        int __ret_warn_on = !!(x);                              \
                if (__ret_warn_on)                              \
                        __WARN();                               \
        } else {                                                \
-               __asm__ __volatile__(                           \
-               "1:     "PPC_TLNEI"     %4,0\n"                 \
-               _EMIT_BUG_ENTRY                                 \
-               : : "i" (__FILE__), "i" (__LINE__),             \
-                 "i" (BUGFLAG_WARNING|BUGFLAG_TAINT(TAINT_WARN)),\
-                 "i" (sizeof(struct bug_entry)),               \
-                 "r" (__ret_warn_on));                         \
+               BUG_ENTRY(PPC_TLNEI " %4, 0",                   \
+                         BUGFLAG_WARNING | BUGFLAG_TAINT(TAINT_WARN),  \
+                         "r" (__ret_warn_on)); \
        }                                                       \
        unlikely(__ret_warn_on);                                \
 })
index 45e3137..72b8101 100644 (file)
@@ -55,42 +55,48 @@ struct ppc64_caches {
 
 extern struct ppc64_caches ppc64_caches;
 
-static inline u32 l1_cache_shift(void)
+static inline u32 l1_dcache_shift(void)
 {
        return ppc64_caches.l1d.log_block_size;
 }
 
-static inline u32 l1_cache_bytes(void)
+static inline u32 l1_dcache_bytes(void)
 {
        return ppc64_caches.l1d.block_size;
 }
+
+static inline u32 l1_icache_shift(void)
+{
+       return ppc64_caches.l1i.log_block_size;
+}
+
+static inline u32 l1_icache_bytes(void)
+{
+       return ppc64_caches.l1i.block_size;
+}
 #else
-static inline u32 l1_cache_shift(void)
+static inline u32 l1_dcache_shift(void)
 {
        return L1_CACHE_SHIFT;
 }
 
-static inline u32 l1_cache_bytes(void)
+static inline u32 l1_dcache_bytes(void)
 {
        return L1_CACHE_BYTES;
 }
+
+static inline u32 l1_icache_shift(void)
+{
+       return L1_CACHE_SHIFT;
+}
+
+static inline u32 l1_icache_bytes(void)
+{
+       return L1_CACHE_BYTES;
+}
+
 #endif
-#endif /* ! __ASSEMBLY__ */
-
-#if defined(__ASSEMBLY__)
-/*
- * For a snooping icache, we still need a dummy icbi to purge all the
- * prefetched instructions from the ifetch buffers. We also need a sync
- * before the icbi to order the the actual stores to memory that might
- * have modified instructions with the icbi.
- */
-#define PURGE_PREFETCHED_INS   \
-       sync;                   \
-       icbi    0,r3;           \
-       sync;                   \
-       isync
 
-#else
 #define __read_mostly __attribute__((__section__(".data..read_mostly")))
 
 #ifdef CONFIG_PPC_BOOK3S_32
@@ -124,6 +130,17 @@ static inline void dcbst(void *addr)
 {
        __asm__ __volatile__ ("dcbst 0, %0" : : "r"(addr) : "memory");
 }
+
+static inline void icbi(void *addr)
+{
+       asm volatile ("icbi 0, %0" : : "r"(addr) : "memory");
+}
+
+static inline void iccci(void *addr)
+{
+       asm volatile ("iccci 0, %0" : : "r"(addr) : "memory");
+}
+
 #endif /* !__ASSEMBLY__ */
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_CACHE_H */
index eef388f..4a1c9f0 100644 (file)
@@ -42,29 +42,25 @@ extern void flush_dcache_page(struct page *page);
 #define flush_dcache_mmap_lock(mapping)                do { } while (0)
 #define flush_dcache_mmap_unlock(mapping)      do { } while (0)
 
-extern void flush_icache_range(unsigned long, unsigned long);
+void flush_icache_range(unsigned long start, unsigned long stop);
 extern void flush_icache_user_range(struct vm_area_struct *vma,
                                    struct page *page, unsigned long addr,
                                    int len);
-extern void __flush_dcache_icache(void *page_va);
 extern void flush_dcache_icache_page(struct page *page);
-#if defined(CONFIG_PPC32) && !defined(CONFIG_BOOKE)
-extern void __flush_dcache_icache_phys(unsigned long physaddr);
-#else
-static inline void __flush_dcache_icache_phys(unsigned long physaddr)
-{
-       BUG();
-}
-#endif
-
-/*
- * Write any modified data cache blocks out to memory and invalidate them.
- * Does not invalidate the corresponding instruction cache blocks.
+void __flush_dcache_icache(void *page);
+
+/**
+ * flush_dcache_range(): Write any modified data cache blocks out to memory and
+ * invalidate them. Does not invalidate the corresponding instruction cache
+ * blocks.
+ *
+ * @start: the start address
+ * @stop: the stop address (exclusive)
  */
 static inline void flush_dcache_range(unsigned long start, unsigned long stop)
 {
-       unsigned long shift = l1_cache_shift();
-       unsigned long bytes = l1_cache_bytes();
+       unsigned long shift = l1_dcache_shift();
+       unsigned long bytes = l1_dcache_bytes();
        void *addr = (void *)(start & ~(bytes - 1));
        unsigned long size = stop - (unsigned long)addr + (bytes - 1);
        unsigned long i;
@@ -89,8 +85,8 @@ static inline void flush_dcache_range(unsigned long start, unsigned long stop)
  */
 static inline void clean_dcache_range(unsigned long start, unsigned long stop)
 {
-       unsigned long shift = l1_cache_shift();
-       unsigned long bytes = l1_cache_bytes();
+       unsigned long shift = l1_dcache_shift();
+       unsigned long bytes = l1_dcache_bytes();
        void *addr = (void *)(start & ~(bytes - 1));
        unsigned long size = stop - (unsigned long)addr + (bytes - 1);
        unsigned long i;
@@ -108,8 +104,8 @@ static inline void clean_dcache_range(unsigned long start, unsigned long stop)
 static inline void invalidate_dcache_range(unsigned long start,
                                           unsigned long stop)
 {
-       unsigned long shift = l1_cache_shift();
-       unsigned long bytes = l1_cache_bytes();
+       unsigned long shift = l1_dcache_shift();
+       unsigned long bytes = l1_dcache_bytes();
        void *addr = (void *)(start & ~(bytes - 1));
        unsigned long size = stop - (unsigned long)addr + (bytes - 1);
        unsigned long i;
diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h
deleted file mode 100644 (file)
index 565d6f7..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2004 IBM
- */
-#ifndef _ASM_DMA_MAPPING_H
-#define _ASM_DMA_MAPPING_H
-
-static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus)
-{
-       /* We don't handle the NULL dev case for ISA for now. We could
-        * do it via an out of line call but it is not needed for now. The
-        * only ISA DMA device we support is the floppy and we have a hack
-        * in the floppy driver directly to get a device for us.
-        */
-       return NULL;
-}
-
-#endif /* _ASM_DMA_MAPPING_H */
index 0cfc365..2ef155a 100644 (file)
@@ -15,6 +15,7 @@
 #define _ASM_FIXMAP_H
 
 #ifndef __ASSEMBLY__
+#include <linux/sizes.h>
 #include <asm/page.h>
 #include <asm/pgtable.h>
 #ifdef CONFIG_HIGHMEM
@@ -62,8 +63,23 @@ enum fixed_addresses {
        FIX_IMMR_START,
        FIX_IMMR_BASE = __ALIGN_MASK(FIX_IMMR_START, FIX_IMMR_SIZE - 1) - 1 +
                       FIX_IMMR_SIZE,
+#endif
+#ifdef CONFIG_PPC_83xx
+       /* For IMMR we need an aligned 2M area */
+#define FIX_IMMR_SIZE  (SZ_2M / PAGE_SIZE)
+       FIX_IMMR_START,
+       FIX_IMMR_BASE = __ALIGN_MASK(FIX_IMMR_START, FIX_IMMR_SIZE - 1) - 1 +
+                      FIX_IMMR_SIZE,
 #endif
        /* FIX_PCIE_MCFG, */
+       __end_of_permanent_fixed_addresses,
+
+#define NR_FIX_BTMAPS          (SZ_256K / PAGE_SIZE)
+#define FIX_BTMAPS_SLOTS       16
+#define TOTAL_FIX_BTMAPS       (NR_FIX_BTMAPS * FIX_BTMAPS_SLOTS)
+
+       FIX_BTMAP_END = __end_of_permanent_fixed_addresses,
+       FIX_BTMAP_BEGIN = FIX_BTMAP_END + TOTAL_FIX_BTMAPS - 1,
        __end_of_fixed_addresses
 };
 
@@ -71,14 +87,22 @@ enum fixed_addresses {
 #define FIXADDR_START          (FIXADDR_TOP - __FIXADDR_SIZE)
 
 #define FIXMAP_PAGE_NOCACHE PAGE_KERNEL_NCG
+#define FIXMAP_PAGE_IO PAGE_KERNEL_NCG
 
 #include <asm-generic/fixmap.h>
 
 static inline void __set_fixmap(enum fixed_addresses idx,
                                phys_addr_t phys, pgprot_t flags)
 {
-       map_kernel_page(fix_to_virt(idx), phys, flags);
+       if (__builtin_constant_p(idx))
+               BUILD_BUG_ON(idx >= __end_of_fixed_addresses);
+       else if (WARN_ON(idx >= __end_of_fixed_addresses))
+               return;
+
+       map_kernel_page(__fix_to_virt(idx), phys, flags);
 }
 
+#define __early_set_fixmap     __set_fixmap
+
 #endif /* !__ASSEMBLY__ */
 #endif
index 67e2da1..27ac6f5 100644 (file)
@@ -14,6 +14,7 @@ struct arch_hw_breakpoint {
        unsigned long   address;
        u16             type;
        u16             len; /* length of the target data symbol */
+       u16             hw_len; /* length programmed in hw */
 };
 
 /* Note: Don't change the the first 6 bits below as they are in the same order
@@ -33,6 +34,11 @@ struct arch_hw_breakpoint {
 #define HW_BRK_TYPE_PRIV_ALL   (HW_BRK_TYPE_USER | HW_BRK_TYPE_KERNEL | \
                                 HW_BRK_TYPE_HYP)
 
+#define HW_BREAKPOINT_ALIGN 0x7
+
+#define DABR_MAX_LEN   8
+#define DAWR_MAX_LEN   512
+
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
 #include <linux/kdebug.h>
 #include <asm/reg.h>
@@ -44,8 +50,6 @@ struct pmu;
 struct perf_sample_data;
 struct task_struct;
 
-#define HW_BREAKPOINT_ALIGN 0x7
-
 extern int hw_breakpoint_slots(int type);
 extern int arch_bp_generic_fields(int type, int *gen_bp_type);
 extern int arch_check_bp_in_kernelspace(struct arch_hw_breakpoint *hw);
@@ -70,6 +74,7 @@ static inline void hw_breakpoint_disable(void)
        brk.address = 0;
        brk.type = 0;
        brk.len = 0;
+       brk.hw_len = 0;
        if (ppc_breakpoint_available())
                __set_breakpoint(&brk);
 }
index 32a18f2..e3a905e 100644 (file)
@@ -226,8 +226,8 @@ static inline bool arch_irqs_disabled(void)
 #endif /* CONFIG_PPC_BOOK3S */
 
 #ifdef CONFIG_PPC_BOOK3E
-#define __hard_irq_enable()    asm volatile("wrteei 1" : : : "memory")
-#define __hard_irq_disable()   asm volatile("wrteei 0" : : : "memory")
+#define __hard_irq_enable()    wrtee(MSR_EE)
+#define __hard_irq_disable()   wrtee(0)
 #else
 #define __hard_irq_enable()    __mtmsrd(MSR_EE|MSR_RI, 1)
 #define __hard_irq_disable()   __mtmsrd(MSR_RI, 1)
@@ -280,8 +280,6 @@ extern void force_external_irq_replay(void);
 
 #else /* CONFIG_PPC64 */
 
-#define SET_MSR_EE(x)  mtmsr(x)
-
 static inline unsigned long arch_local_save_flags(void)
 {
        return mfmsr();
@@ -289,47 +287,44 @@ static inline unsigned long arch_local_save_flags(void)
 
 static inline void arch_local_irq_restore(unsigned long flags)
 {
-#if defined(CONFIG_BOOKE)
-       asm volatile("wrtee %0" : : "r" (flags) : "memory");
-#else
-       mtmsr(flags);
-#endif
+       if (IS_ENABLED(CONFIG_BOOKE))
+               wrtee(flags);
+       else
+               mtmsr(flags);
 }
 
 static inline unsigned long arch_local_irq_save(void)
 {
        unsigned long flags = arch_local_save_flags();
-#ifdef CONFIG_BOOKE
-       asm volatile("wrteei 0" : : : "memory");
-#elif defined(CONFIG_PPC_8xx)
-       wrtspr(SPRN_EID);
-#else
-       SET_MSR_EE(flags & ~MSR_EE);
-#endif
+
+       if (IS_ENABLED(CONFIG_BOOKE))
+               wrtee(0);
+       else if (IS_ENABLED(CONFIG_PPC_8xx))
+               wrtspr(SPRN_EID);
+       else
+               mtmsr(flags & ~MSR_EE);
+
        return flags;
 }
 
 static inline void arch_local_irq_disable(void)
 {
-#ifdef CONFIG_BOOKE
-       asm volatile("wrteei 0" : : : "memory");
-#elif defined(CONFIG_PPC_8xx)
-       wrtspr(SPRN_EID);
-#else
-       arch_local_irq_save();
-#endif
+       if (IS_ENABLED(CONFIG_BOOKE))
+               wrtee(0);
+       else if (IS_ENABLED(CONFIG_PPC_8xx))
+               wrtspr(SPRN_EID);
+       else
+               mtmsr(mfmsr() & ~MSR_EE);
 }
 
 static inline void arch_local_irq_enable(void)
 {
-#ifdef CONFIG_BOOKE
-       asm volatile("wrteei 1" : : : "memory");
-#elif defined(CONFIG_PPC_8xx)
-       wrtspr(SPRN_EIE);
-#else
-       unsigned long msr = mfmsr();
-       SET_MSR_EE(msr | MSR_EE);
-#endif
+       if (IS_ENABLED(CONFIG_BOOKE))
+               wrtee(MSR_EE);
+       else if (IS_ENABLED(CONFIG_PPC_8xx))
+               wrtspr(SPRN_EIE);
+       else
+               mtmsr(mfmsr() | MSR_EE);
 }
 
 static inline bool arch_irqs_disabled_flags(unsigned long flags)
index 1c3133b..1006a42 100644 (file)
@@ -3,6 +3,7 @@
 #define _ASM_POWERPC_KUP_8XX_H_
 
 #include <asm/bug.h>
+#include <asm/mmu.h>
 
 #ifdef CONFIG_PPC_KUAP
 
index 4c9777d..b410046 100644 (file)
@@ -75,7 +75,6 @@
 #define MAS2_E                 0x00000001
 #define MAS2_WIMGE_MASK                0x0000001f
 #define MAS2_EPN_MASK(size)            (~0 << (size + 10))
-#define MAS2_VAL(addr, size, flags)    ((addr) & MAS2_EPN_MASK(size) | (flags))
 
 #define MAS3_RPN               0xFFFFF000
 #define MAS3_U0                        0x00000200
 #define TLBILX_T_CLASS2                        6
 #define TLBILX_T_CLASS3                        7
 
+/*
+ * The mapping only needs to be cache-coherent on SMP, except on
+ * Freescale e500mc derivatives where it's also needed for coherent DMA.
+ */
+#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
+#define MAS2_M_IF_NEEDED       MAS2_M
+#else
+#define MAS2_M_IF_NEEDED       0
+#endif
+
 #ifndef __ASSEMBLY__
 #include <asm/bug.h>
 
index 378e399..c1f25a7 100644 (file)
 #define OPAL_MPIPL_UPDATE                      173
 #define OPAL_MPIPL_REGISTER_TAG                        174
 #define OPAL_MPIPL_QUERY_TAG                   175
-#define OPAL_LAST                              175
+#define OPAL_SECVAR_GET                                176
+#define OPAL_SECVAR_GET_NEXT                   177
+#define OPAL_SECVAR_ENQUEUE_UPDATE             178
+#define OPAL_LAST                              178
 
 #define QUIESCE_HOLD                   1 /* Spin all calls at entry */
 #define QUIESCE_REJECT                 2 /* Fail all calls with OPAL_BUSY */
index a0cf8fb..9986ac3 100644 (file)
@@ -298,6 +298,13 @@ int opal_sensor_group_clear(u32 group_hndl, int token);
 int opal_sensor_group_enable(u32 group_hndl, int token, bool enable);
 int opal_nx_coproc_init(uint32_t chip_id, uint32_t ct);
 
+int opal_secvar_get(const char *key, uint64_t key_len, u8 *data,
+                   uint64_t *data_size);
+int opal_secvar_get_next(const char *key, uint64_t *key_len,
+                        uint64_t key_buf_size);
+int opal_secvar_enqueue_update(const char *key, uint64_t key_len, u8 *data,
+                              uint64_t data_size);
+
 s64 opal_mpipl_update(enum opal_mpipl_ops op, u64 src, u64 dest, u64 size);
 s64 opal_mpipl_register_tag(enum opal_mpipl_tags tag, u64 addr);
 s64 opal_mpipl_query_tag(enum opal_mpipl_tags tag, u64 *addr);
index f6c562a..7f1fd41 100644 (file)
@@ -325,6 +325,13 @@ void arch_free_page(struct page *page, int order);
 
 struct vm_area_struct;
 
+extern unsigned long kernstart_virt_addr;
+
+static inline unsigned long kaslr_offset(void)
+{
+       return kernstart_virt_addr - KERNELBASE;
+}
+
 #include <asm-generic/memory_model.h>
 #endif /* __ASSEMBLY__ */
 #include <asm/slice.h>
index 4053b2a..0e4ec8c 100644 (file)
@@ -157,13 +157,9 @@ static inline bool pgd_is_leaf(pgd_t pgd)
 #define is_ioremap_addr is_ioremap_addr
 static inline bool is_ioremap_addr(const void *x)
 {
-#ifdef CONFIG_MMU
        unsigned long addr = (unsigned long)x;
 
        return addr >= IOREMAP_BASE && addr < IOREMAP_END;
-#else
-       return false;
-#endif
 }
 #endif /* CONFIG_PPC64 */
 
index 75c7e95..1aa46df 100644 (file)
@@ -25,9 +25,7 @@
 #include <asm/reg_fsl_emb.h>
 #endif
 
-#ifdef CONFIG_PPC_8xx
 #include <asm/reg_8xx.h>
-#endif /* CONFIG_PPC_8xx */
 
 #define MSR_SF_LG      63              /* Enable 64 bit mode */
 #define MSR_ISF_LG     61              /* Interrupt 64b mode valid on 630 */
@@ -1382,6 +1380,14 @@ static inline void mtmsr_isync(unsigned long val)
 #define wrtspr(rn)     asm volatile("mtspr " __stringify(rn) ",0" : \
                                     : : "memory")
 
+static inline void wrtee(unsigned long val)
+{
+       if (__builtin_constant_p(val))
+               asm volatile("wrteei %0" : : "i" ((val & MSR_EE) ? 1 : 0) : "memory");
+       else
+               asm volatile("wrtee %0" : : "r" (val) : "memory");
+}
+
 extern unsigned long msr_check_and_set(unsigned long bits);
 extern bool strict_msr_control;
 extern void __msr_check_and_clear(unsigned long bits);
@@ -1396,19 +1402,9 @@ static inline void msr_check_and_clear(unsigned long bits)
 #define mftb()         ({unsigned long rval;                           \
                        asm volatile(                                   \
                                "90:    mfspr %0, %2;\n"                \
-                               "97:    cmpwi %0,0;\n"                  \
-                               "       beq- 90b;\n"                    \
-                               "99:\n"                                 \
-                               ".section __ftr_fixup,\"a\"\n"          \
-                               ".align 3\n"                            \
-                               "98:\n"                                 \
-                               "       .8byte %1\n"                    \
-                               "       .8byte %1\n"                    \
-                               "       .8byte 97b-98b\n"               \
-                               "       .8byte 99b-98b\n"               \
-                               "       .8byte 0\n"                     \
-                               "       .8byte 0\n"                     \
-                               ".previous"                             \
+                               ASM_FTR_IFSET(                          \
+                                       "97:    cmpwi %0,0;\n"          \
+                                       "       beq- 90b;\n", "", %1)   \
                        : "=r" (rval) \
                        : "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \
                        rval;})
index 7192eec..07df35e 100644 (file)
@@ -5,8 +5,6 @@
 #ifndef _ASM_POWERPC_REG_8xx_H
 #define _ASM_POWERPC_REG_8xx_H
 
-#include <asm/mmu.h>
-
 /* Cache control on the MPC8xx is provided through some additional
  * special purpose registers.
  */
@@ -38,7 +36,9 @@
 #define SPRN_CMPF      153
 #define SPRN_LCTRL1    156
 #define SPRN_LCTRL2    157
+#ifdef CONFIG_PPC_8xx
 #define SPRN_ICTRL     158
+#endif
 #define SPRN_BAR       159
 
 /* Commands.  Only the first few are available to the instruction cache.
index 5a9b6eb..d198717 100644 (file)
@@ -5,8 +5,22 @@
 
 #include <linux/elf.h>
 #include <linux/uaccess.h>
+
+#define arch_is_kernel_initmem_freed arch_is_kernel_initmem_freed
+
 #include <asm-generic/sections.h>
 
+extern bool init_mem_is_free;
+
+static inline int arch_is_kernel_initmem_freed(unsigned long addr)
+{
+       if (!init_mem_is_free)
+               return 0;
+
+       return addr >= (unsigned long)__init_begin &&
+               addr < (unsigned long)__init_end;
+}
+
 extern char __head_end[];
 
 #ifdef __powerpc64__
diff --git a/arch/powerpc/include/asm/secure_boot.h b/arch/powerpc/include/asm/secure_boot.h
new file mode 100644 (file)
index 0000000..a2ff556
--- /dev/null
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Secure boot definitions
+ *
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ */
+#ifndef _ASM_POWER_SECURE_BOOT_H
+#define _ASM_POWER_SECURE_BOOT_H
+
+#ifdef CONFIG_PPC_SECURE_BOOT
+
+bool is_ppc_secureboot_enabled(void);
+bool is_ppc_trustedboot_enabled(void);
+
+#else
+
+static inline bool is_ppc_secureboot_enabled(void)
+{
+       return false;
+}
+
+static inline bool is_ppc_trustedboot_enabled(void)
+{
+       return false;
+}
+
+#endif
+#endif
index ccf44c1..7c05e95 100644 (file)
@@ -9,7 +9,7 @@
 #define _ASM_POWERPC_SECURITY_FEATURES_H
 
 
-extern unsigned long powerpc_security_features;
+extern u64 powerpc_security_features;
 extern bool rfi_flush;
 
 /* These are bit flags */
@@ -24,17 +24,17 @@ void setup_stf_barrier(void);
 void do_stf_barrier_fixups(enum stf_barrier_type types);
 void setup_count_cache_flush(void);
 
-static inline void security_ftr_set(unsigned long feature)
+static inline void security_ftr_set(u64 feature)
 {
        powerpc_security_features |= feature;
 }
 
-static inline void security_ftr_clear(unsigned long feature)
+static inline void security_ftr_clear(u64 feature)
 {
        powerpc_security_features &= ~feature;
 }
 
-static inline bool security_ftr_enabled(unsigned long feature)
+static inline bool security_ftr_enabled(u64 feature)
 {
        return !!(powerpc_security_features & feature);
 }
diff --git a/arch/powerpc/include/asm/secvar.h b/arch/powerpc/include/asm/secvar.h
new file mode 100644 (file)
index 0000000..4cc35b5
--- /dev/null
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ *
+ * PowerPC secure variable operations.
+ */
+#ifndef SECVAR_OPS_H
+#define SECVAR_OPS_H
+
+#include <linux/types.h>
+#include <linux/errno.h>
+
+extern const struct secvar_operations *secvar_ops;
+
+struct secvar_operations {
+       int (*get)(const char *key, uint64_t key_len, u8 *data,
+                  uint64_t *data_size);
+       int (*get_next)(const char *key, uint64_t *key_len,
+                       uint64_t keybufsize);
+       int (*set)(const char *key, uint64_t key_len, u8 *data,
+                  uint64_t data_size);
+};
+
+#ifdef CONFIG_PPC_SECURE_BOOT
+
+extern void set_secvar_ops(const struct secvar_operations *ops);
+
+#else
+
+static inline void set_secvar_ops(const struct secvar_operations *ops) { }
+
+#endif
+
+#endif
index cabfcbb..45f9715 100644 (file)
@@ -5,20 +5,6 @@
  * (C) Copyright 2006 IBM Corp.
  *
  * Author: Dwayne Grant McConnell <decimal@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.        See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #ifndef _UAPI_SPU_INFO_H
index a7ca8fe..157b014 100644 (file)
@@ -5,9 +5,6 @@
 
 CFLAGS_ptrace.o                += -DUTS_MACHINE='"$(UTS_MACHINE)"'
 
-# Disable clang warning for using setjmp without setjmp.h header
-CFLAGS_crash.o         += $(call cc-disable-warning, builtin-requires-header)
-
 ifdef CONFIG_PPC64
 CFLAGS_prom_init.o     += $(NO_MINIMAL_TOC)
 endif
@@ -22,6 +19,8 @@ CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
 CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
 
 CFLAGS_prom_init.o += $(call cc-option, -fno-stack-protector)
+CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING
+CFLAGS_prom_init.o += -ffreestanding
 
 ifdef CONFIG_FUNCTION_TRACER
 # Do not trace early boot code
@@ -39,7 +38,6 @@ KASAN_SANITIZE_btext.o := n
 ifdef CONFIG_KASAN
 CFLAGS_early_32.o += -DDISABLE_BRANCH_PROFILING
 CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING
-CFLAGS_prom_init.o += -DDISABLE_BRANCH_PROFILING
 CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING
 endif
 
@@ -78,9 +76,8 @@ obj-$(CONFIG_EEH)              += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
                                  eeh_driver.o eeh_event.o eeh_sysfs.o
 obj-$(CONFIG_GENERIC_TBSYNC)   += smp-tbsync.o
 obj-$(CONFIG_CRASH_DUMP)       += crash_dump.o
-ifneq ($(CONFIG_FA_DUMP)$(CONFIG_PRESERVE_FA_DUMP),)
-obj-y                          += fadump.o
-endif
+obj-$(CONFIG_FA_DUMP)          += fadump.o
+obj-$(CONFIG_PRESERVE_FA_DUMP) += fadump.o
 ifdef CONFIG_PPC32
 obj-$(CONFIG_E500)             += idle_e500.o
 endif
@@ -126,14 +123,6 @@ pci64-$(CONFIG_PPC64)              += pci_dn.o pci-hotplug.o isa-bridge.o
 obj-$(CONFIG_PCI)              += pci_$(BITS).o $(pci64-y) \
                                   pci-common.o pci_of_scan.o
 obj-$(CONFIG_PCI_MSI)          += msi.o
-obj-$(CONFIG_KEXEC_CORE)       += machine_kexec.o crash.o \
-                                  machine_kexec_$(BITS).o
-obj-$(CONFIG_KEXEC_FILE)       += machine_kexec_file_$(BITS).o kexec_elf_$(BITS).o
-ifdef CONFIG_HAVE_IMA_KEXEC
-ifdef CONFIG_IMA
-obj-y                          += ima_kexec.o
-endif
-endif
 
 obj-$(CONFIG_AUDIT)            += audit.o
 obj64-$(CONFIG_AUDIT)          += compat_audit.o
@@ -161,16 +150,13 @@ ifneq ($(CONFIG_PPC_POWERNV)$(CONFIG_PPC_SVM),)
 obj-y                          += ucall.o
 endif
 
+obj-$(CONFIG_PPC_SECURE_BOOT)  += secure_boot.o ima_arch.o secvar-ops.o
+obj-$(CONFIG_PPC_SECVAR_SYSFS) += secvar-sysfs.o
+
 # Disable GCOV, KCOV & sanitizers in odd or sensitive code
 GCOV_PROFILE_prom_init.o := n
 KCOV_INSTRUMENT_prom_init.o := n
 UBSAN_SANITIZE_prom_init.o := n
-GCOV_PROFILE_machine_kexec_64.o := n
-KCOV_INSTRUMENT_machine_kexec_64.o := n
-UBSAN_SANITIZE_machine_kexec_64.o := n
-GCOV_PROFILE_machine_kexec_32.o := n
-KCOV_INSTRUMENT_machine_kexec_32.o := n
-UBSAN_SANITIZE_machine_kexec_32.o := n
 GCOV_PROFILE_kprobes.o := n
 KCOV_INSTRUMENT_kprobes.o := n
 UBSAN_SANITIZE_kprobes.o := n
index 2b4f3ec..1d30878 100644 (file)
@@ -231,7 +231,7 @@ _GLOBAL(__setup_cpu_e5500)
        blr
 #endif
 
-/* flush L1 date cache, it can apply to e500v2, e500mc and e5500 */
+/* flush L1 data cache, it can apply to e500v2, e500mc and e5500 */
 _GLOBAL(flush_dcache_L1)
        mfmsr   r10
        wrteei  0
diff --git a/arch/powerpc/kernel/crash.c b/arch/powerpc/kernel/crash.c
deleted file mode 100644 (file)
index d488311..0000000
+++ /dev/null
@@ -1,374 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Architecture specific (PPC64) functions for kexec based crash dumps.
- *
- * Copyright (C) 2005, IBM Corp.
- *
- * Created by: Haren Myneni
- */
-
-#include <linux/kernel.h>
-#include <linux/smp.h>
-#include <linux/reboot.h>
-#include <linux/kexec.h>
-#include <linux/export.h>
-#include <linux/crash_dump.h>
-#include <linux/delay.h>
-#include <linux/irq.h>
-#include <linux/types.h>
-
-#include <asm/processor.h>
-#include <asm/machdep.h>
-#include <asm/kexec.h>
-#include <asm/prom.h>
-#include <asm/smp.h>
-#include <asm/setjmp.h>
-#include <asm/debug.h>
-
-/*
- * The primary CPU waits a while for all secondary CPUs to enter. This is to
- * avoid sending an IPI if the secondary CPUs are entering
- * crash_kexec_secondary on their own (eg via a system reset).
- *
- * The secondary timeout has to be longer than the primary. Both timeouts are
- * in milliseconds.
- */
-#define PRIMARY_TIMEOUT                500
-#define SECONDARY_TIMEOUT      1000
-
-#define IPI_TIMEOUT            10000
-#define REAL_MODE_TIMEOUT      10000
-
-static int time_to_dump;
-/*
- * crash_wake_offline should be set to 1 by platforms that intend to wake
- * up offline cpus prior to jumping to a kdump kernel. Currently powernv
- * sets it to 1, since we want to avoid things from happening when an
- * offline CPU wakes up due to something like an HMI (malfunction error),
- * which propagates to all threads.
- */
-int crash_wake_offline;
-
-#define CRASH_HANDLER_MAX 3
-/* List of shutdown handles */
-static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX];
-static DEFINE_SPINLOCK(crash_handlers_lock);
-
-static unsigned long crash_shutdown_buf[JMP_BUF_LEN];
-static int crash_shutdown_cpu = -1;
-
-static int handle_fault(struct pt_regs *regs)
-{
-       if (crash_shutdown_cpu == smp_processor_id())
-               longjmp(crash_shutdown_buf, 1);
-       return 0;
-}
-
-#ifdef CONFIG_SMP
-
-static atomic_t cpus_in_crash;
-void crash_ipi_callback(struct pt_regs *regs)
-{
-       static cpumask_t cpus_state_saved = CPU_MASK_NONE;
-
-       int cpu = smp_processor_id();
-
-       hard_irq_disable();
-       if (!cpumask_test_cpu(cpu, &cpus_state_saved)) {
-               crash_save_cpu(regs, cpu);
-               cpumask_set_cpu(cpu, &cpus_state_saved);
-       }
-
-       atomic_inc(&cpus_in_crash);
-       smp_mb__after_atomic();
-
-       /*
-        * Starting the kdump boot.
-        * This barrier is needed to make sure that all CPUs are stopped.
-        */
-       while (!time_to_dump)
-               cpu_relax();
-
-       if (ppc_md.kexec_cpu_down)
-               ppc_md.kexec_cpu_down(1, 1);
-
-#ifdef CONFIG_PPC64
-       kexec_smp_wait();
-#else
-       for (;;);       /* FIXME */
-#endif
-
-       /* NOTREACHED */
-}
-
-static void crash_kexec_prepare_cpus(int cpu)
-{
-       unsigned int msecs;
-       unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
-       int tries = 0;
-       int (*old_handler)(struct pt_regs *regs);
-
-       printk(KERN_EMERG "Sending IPI to other CPUs\n");
-
-       if (crash_wake_offline)
-               ncpus = num_present_cpus() - 1;
-
-       crash_send_ipi(crash_ipi_callback);
-       smp_wmb();
-
-again:
-       /*
-        * FIXME: Until we will have the way to stop other CPUs reliably,
-        * the crash CPU will send an IPI and wait for other CPUs to
-        * respond.
-        */
-       msecs = IPI_TIMEOUT;
-       while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0))
-               mdelay(1);
-
-       /* Would it be better to replace the trap vector here? */
-
-       if (atomic_read(&cpus_in_crash) >= ncpus) {
-               printk(KERN_EMERG "IPI complete\n");
-               return;
-       }
-
-       printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n",
-               ncpus - atomic_read(&cpus_in_crash));
-
-       /*
-        * If we have a panic timeout set then we can't wait indefinitely
-        * for someone to activate system reset. We also give up on the
-        * second time through if system reset fail to work.
-        */
-       if ((panic_timeout > 0) || (tries > 0))
-               return;
-
-       /*
-        * A system reset will cause all CPUs to take an 0x100 exception.
-        * The primary CPU returns here via setjmp, and the secondary
-        * CPUs reexecute the crash_kexec_secondary path.
-        */
-       old_handler = __debugger;
-       __debugger = handle_fault;
-       crash_shutdown_cpu = smp_processor_id();
-
-       if (setjmp(crash_shutdown_buf) == 0) {
-               printk(KERN_EMERG "Activate system reset (dumprestart) "
-                                 "to stop other cpu(s)\n");
-
-               /*
-                * A system reset will force all CPUs to execute the
-                * crash code again. We need to reset cpus_in_crash so we
-                * wait for everyone to do this.
-                */
-               atomic_set(&cpus_in_crash, 0);
-               smp_mb();
-
-               while (atomic_read(&cpus_in_crash) < ncpus)
-                       cpu_relax();
-       }
-
-       crash_shutdown_cpu = -1;
-       __debugger = old_handler;
-
-       tries++;
-       goto again;
-}
-
-/*
- * This function will be called by secondary cpus.
- */
-void crash_kexec_secondary(struct pt_regs *regs)
-{
-       unsigned long flags;
-       int msecs = SECONDARY_TIMEOUT;
-
-       local_irq_save(flags);
-
-       /* Wait for the primary crash CPU to signal its progress */
-       while (crashing_cpu < 0) {
-               if (--msecs < 0) {
-                       /* No response, kdump image may not have been loaded */
-                       local_irq_restore(flags);
-                       return;
-               }
-
-               mdelay(1);
-       }
-
-       crash_ipi_callback(regs);
-}
-
-#else  /* ! CONFIG_SMP */
-
-static void crash_kexec_prepare_cpus(int cpu)
-{
-       /*
-        * move the secondaries to us so that we can copy
-        * the new kernel 0-0x100 safely
-        *
-        * do this if kexec in setup.c ?
-        */
-#ifdef CONFIG_PPC64
-       smp_release_cpus();
-#else
-       /* FIXME */
-#endif
-}
-
-void crash_kexec_secondary(struct pt_regs *regs)
-{
-}
-#endif /* CONFIG_SMP */
-
-/* wait for all the CPUs to hit real mode but timeout if they don't come in */
-#if defined(CONFIG_SMP) && defined(CONFIG_PPC64)
-static void __maybe_unused crash_kexec_wait_realmode(int cpu)
-{
-       unsigned int msecs;
-       int i;
-
-       msecs = REAL_MODE_TIMEOUT;
-       for (i=0; i < nr_cpu_ids && msecs > 0; i++) {
-               if (i == cpu)
-                       continue;
-
-               while (paca_ptrs[i]->kexec_state < KEXEC_STATE_REAL_MODE) {
-                       barrier();
-                       if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0))
-                               break;
-                       msecs--;
-                       mdelay(1);
-               }
-       }
-       mb();
-}
-#else
-static inline void crash_kexec_wait_realmode(int cpu) {}
-#endif /* CONFIG_SMP && CONFIG_PPC64 */
-
-/*
- * Register a function to be called on shutdown.  Only use this if you
- * can't reset your device in the second kernel.
- */
-int crash_shutdown_register(crash_shutdown_t handler)
-{
-       unsigned int i, rc;
-
-       spin_lock(&crash_handlers_lock);
-       for (i = 0 ; i < CRASH_HANDLER_MAX; i++)
-               if (!crash_shutdown_handles[i]) {
-                       /* Insert handle at first empty entry */
-                       crash_shutdown_handles[i] = handler;
-                       rc = 0;
-                       break;
-               }
-
-       if (i == CRASH_HANDLER_MAX) {
-               printk(KERN_ERR "Crash shutdown handles full, "
-                      "not registered.\n");
-               rc = 1;
-       }
-
-       spin_unlock(&crash_handlers_lock);
-       return rc;
-}
-EXPORT_SYMBOL(crash_shutdown_register);
-
-int crash_shutdown_unregister(crash_shutdown_t handler)
-{
-       unsigned int i, rc;
-
-       spin_lock(&crash_handlers_lock);
-       for (i = 0 ; i < CRASH_HANDLER_MAX; i++)
-               if (crash_shutdown_handles[i] == handler)
-                       break;
-
-       if (i == CRASH_HANDLER_MAX) {
-               printk(KERN_ERR "Crash shutdown handle not found\n");
-               rc = 1;
-       } else {
-               /* Shift handles down */
-               for (; i < (CRASH_HANDLER_MAX - 1); i++)
-                       crash_shutdown_handles[i] =
-                               crash_shutdown_handles[i+1];
-               /*
-                * Reset last entry to NULL now that it has been shifted down,
-                * this will allow new handles to be added here.
-                */
-               crash_shutdown_handles[i] = NULL;
-               rc = 0;
-       }
-
-       spin_unlock(&crash_handlers_lock);
-       return rc;
-}
-EXPORT_SYMBOL(crash_shutdown_unregister);
-
-void default_machine_crash_shutdown(struct pt_regs *regs)
-{
-       unsigned int i;
-       int (*old_handler)(struct pt_regs *regs);
-
-       /*
-        * This function is only called after the system
-        * has panicked or is otherwise in a critical state.
-        * The minimum amount of code to allow a kexec'd kernel
-        * to run successfully needs to happen here.
-        *
-        * In practice this means stopping other cpus in
-        * an SMP system.
-        * The kernel is broken so disable interrupts.
-        */
-       hard_irq_disable();
-
-       /*
-        * Make a note of crashing cpu. Will be used in machine_kexec
-        * such that another IPI will not be sent.
-        */
-       crashing_cpu = smp_processor_id();
-
-       /*
-        * If we came in via system reset, wait a while for the secondary
-        * CPUs to enter.
-        */
-       if (TRAP(regs) == 0x100)
-               mdelay(PRIMARY_TIMEOUT);
-
-       crash_kexec_prepare_cpus(crashing_cpu);
-
-       crash_save_cpu(regs, crashing_cpu);
-
-       time_to_dump = 1;
-
-       crash_kexec_wait_realmode(crashing_cpu);
-
-       machine_kexec_mask_interrupts();
-
-       /*
-        * Call registered shutdown routines safely.  Swap out
-        * __debugger_fault_handler, and replace on exit.
-        */
-       old_handler = __debugger_fault_handler;
-       __debugger_fault_handler = handle_fault;
-       crash_shutdown_cpu = smp_processor_id();
-       for (i = 0; i < CRASH_HANDLER_MAX && crash_shutdown_handles[i]; i++) {
-               if (setjmp(crash_shutdown_buf) == 0) {
-                       /*
-                        * Insert syncs and delay to ensure
-                        * instructions in the dangerous region don't
-                        * leak away from this protected region.
-                        */
-                       asm volatile("sync; isync");
-                       /* dangerous region */
-                       crash_shutdown_handles[i]();
-                       asm volatile("sync; isync");
-               }
-       }
-       crash_shutdown_cpu = -1;
-       __debugger_fault_handler = old_handler;
-
-       if (ppc_md.kexec_cpu_down)
-               ppc_md.kexec_cpu_down(1, 0);
-}
index 5f66b95..cc14aa6 100644 (file)
@@ -30,10 +30,10 @@ int set_dawr(struct arch_hw_breakpoint *brk)
         * DAWR length is stored in field MDR bits 48:53.  Matches range in
         * doublewords (64 bits) baised by -1 eg. 0b000000=1DW and
         * 0b111111=64DW.
-        * brk->len is in bytes.
+        * brk->hw_len is in bytes.
         * This aligns up to double word size, shifts and does the bias.
         */
-       mrd = ((brk->len + 7) >> 3) - 1;
+       mrd = ((brk->hw_len + 7) >> 3) - 1;
        dawrx |= (mrd & 0x3f) << (63 - 53);
 
        if (ppc_md.set_dawr)
@@ -54,7 +54,7 @@ static ssize_t dawr_write_file_bool(struct file *file,
                                    const char __user *user_buf,
                                    size_t count, loff_t *ppos)
 {
-       struct arch_hw_breakpoint null_brk = {0, 0, 0};
+       struct arch_hw_breakpoint null_brk = {0};
        size_t rc;
 
        /* Send error to user if they hypervisor won't allow us to write DAWR */
index 3482118..ef2ad49 100644 (file)
  */
 notrace unsigned long __init early_init(unsigned long dt_ptr)
 {
-       unsigned long offset = reloc_offset();
+       unsigned long kva, offset = reloc_offset();
+
+       kva = *PTRRELOC(&kernstart_virt_addr);
 
        /* First zero the BSS */
-       memset(PTRRELOC(&__bss_start), 0, __bss_stop - __bss_start);
+       if (kva == KERNELBASE)
+               memset(PTRRELOC(&__bss_start), 0, __bss_stop - __bss_start);
 
        /*
         * Identify the CPU type and fix up code sections
@@ -32,5 +35,5 @@ notrace unsigned long __init early_init(unsigned long dt_ptr)
 
        apply_feature_fixups();
 
-       return KERNELBASE + offset;
+       return kva + offset;
 }
index d9279d0..3dd1a42 100644 (file)
@@ -1,25 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * PCI Error Recovery Driver for RPA-compliant PPC64 platform.
  * Copyright IBM Corp. 2004 2005
  * Copyright Linas Vepstas <linas@linas.org> 2004, 2005
  *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
  * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
  */
 #include <linux/delay.h>
@@ -897,12 +881,12 @@ void eeh_handle_normal_event(struct eeh_pe *pe)
 
        /* Log the event */
        if (pe->type & EEH_PE_PHB) {
-               pr_err("EEH: PHB#%x failure detected, location: %s\n",
+               pr_err("EEH: Recovering PHB#%x, location: %s\n",
                        pe->phb->global_number, eeh_pe_loc_get(pe));
        } else {
                struct eeh_pe *phb_pe = eeh_phb_pe_get(pe->phb);
 
-               pr_err("EEH: Frozen PHB#%x-PE#%x detected\n",
+               pr_err("EEH: Recovering PHB#%x-PE#%x\n",
                       pe->phb->global_number, pe->addr);
                pr_err("EEH: PE location: %s, PHB location: %s\n",
                       eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe));
index 3fa04dd..ab44d96 100644 (file)
@@ -1,25 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * Sysfs entries for PCI Error Recovery for PAPR-compliant platform.
  * Copyright IBM Corporation 2007
  * Copyright Linas Vepstas <linas@austin.ibm.com> 2007
  *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
  * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
  */
 #include <linux/pci.h>
index 829950b..e4076e3 100644 (file)
@@ -1346,16 +1346,6 @@ skpinv:  addi    r6,r6,1                         /* Increment */
        sync
        isync
 
-/*
- * The mapping only needs to be cache-coherent on SMP, except on
- * Freescale e500mc derivatives where it's also needed for coherent DMA.
- */
-#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
-#define M_IF_NEEDED    MAS2_M
-#else
-#define M_IF_NEEDED    0
-#endif
-
 /* 6. Setup KERNELBASE mapping in TLB[0]
  *
  * r3 = MAS0 w/TLBSEL & ESEL for the entry we started in
@@ -1368,7 +1358,7 @@ skpinv:   addi    r6,r6,1                         /* Increment */
        ori     r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_1GB))@l
        mtspr   SPRN_MAS1,r6
 
-       LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET | M_IF_NEEDED)
+       LOAD_REG_IMMEDIATE(r6, PAGE_OFFSET | MAS2_M_IF_NEEDED)
        mtspr   SPRN_MAS2,r6
 
        rlwinm  r5,r5,0,0,25
index d0018dd..46508b1 100644 (file)
@@ -514,7 +514,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
  * If stack=0, then the stack is already set in r1, and r1 is saved in r10.
  * PPR save and CPU accounting is not done for the !stack case (XXX why not?)
  */
-.macro INT_COMMON vec, area, stack, kaup, reconcile, dar, dsisr
+.macro INT_COMMON vec, area, stack, kuap, reconcile, dar, dsisr
        .if \stack
        andi.   r10,r12,MSR_PR          /* See if coming from user      */
        mr      r10,r1                  /* Save r1                      */
@@ -533,7 +533,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
        std     r10,GPR1(r1)            /* save r1 in stackframe        */
 
        .if \stack
-       .if \kaup
+       .if \kuap
        kuap_save_amr_and_lock r9, r10, cr1, cr0
        .endif
        beq     101f                    /* if from kernel mode          */
@@ -541,7 +541,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
        SAVE_PPR(\area, r9)
 101:
        .else
-       .if \kaup
+       .if \kuap
        kuap_save_amr_and_lock r9, r10, cr1
        .endif
        .endif
index ed59855..ff0114a 100644 (file)
@@ -1466,16 +1466,15 @@ static void fadump_init_files(void)
  */
 int __init setup_fadump(void)
 {
-       if (!fw_dump.fadump_enabled)
-               return 0;
-
-       if (!fw_dump.fadump_supported) {
-               printk(KERN_ERR "Firmware-assisted dump is not supported on"
-                       " this hardware\n");
+       if (!fw_dump.fadump_supported)
                return 0;
-       }
 
+       fadump_init_files();
        fadump_show_config();
+
+       if (!fw_dump.fadump_enabled)
+               return 1;
+
        /*
         * If dump data is available then see if it is valid and prepare for
         * saving it to the disk.
@@ -1492,8 +1491,6 @@ int __init setup_fadump(void)
        else if (fw_dump.reserve_dump_area_size)
                fw_dump.ops->fadump_init_mem_struct(&fw_dump);
 
-       fadump_init_files();
-
        return 1;
 }
 subsys_initcall(setup_fadump);
index ea06528..8bccce6 100644 (file)
@@ -153,35 +153,24 @@ skpinv:   addi    r6,r6,1                         /* Increment */
        tlbivax 0,r9
        TLBSYNC
 
-/*
- * The mapping only needs to be cache-coherent on SMP, except on
- * Freescale e500mc derivatives where it's also needed for coherent DMA.
- */
-#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
-#define M_IF_NEEDED    MAS2_M
-#else
-#define M_IF_NEEDED    0
-#endif
-
 #if defined(ENTRY_MAPPING_BOOT_SETUP)
 
-/* 6. Setup KERNELBASE mapping in TLB1[0] */
+/* 6. Setup kernstart_virt_addr mapping in TLB1[0] */
        lis     r6,0x1000               /* Set MAS0(TLBSEL) = TLB1(1), ESEL = 0 */
        mtspr   SPRN_MAS0,r6
        lis     r6,(MAS1_VALID|MAS1_IPROT)@h
        ori     r6,r6,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l
        mtspr   SPRN_MAS1,r6
-       lis     r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_NEEDED)@h
-       ori     r6,r6,MAS2_VAL(PAGE_OFFSET, BOOK3E_PAGESZ_64M, M_IF_NEEDED)@l
+       lis     r6,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@h
+       ori     r6,r6,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@l
+       and     r6,r6,r20
+       ori     r6,r6,MAS2_M_IF_NEEDED@l
        mtspr   SPRN_MAS2,r6
        mtspr   SPRN_MAS3,r8
        tlbwe
 
-/* 7. Jump to KERNELBASE mapping */
-       lis     r6,(KERNELBASE & ~0xfff)@h
-       ori     r6,r6,(KERNELBASE & ~0xfff)@l
-       rlwinm  r7,r25,0,0x03ffffff
-       add     r6,r7,r6
+/* 7. Jump to kernstart_virt_addr mapping */
+       mr      r6,r20
 
 #elif defined(ENTRY_MAPPING_KEXEC_SETUP)
 /*
index adf0505..838d9d4 100644 (file)
@@ -155,6 +155,8 @@ _ENTRY(_start);
  */
 
 _ENTRY(__early_start)
+       LOAD_REG_ADDR_PIC(r20, kernstart_virt_addr)
+       lwz     r20,0(r20)
 
 #define ENTRY_MAPPING_BOOT_SETUP
 #include "fsl_booke_entry_mapping.S"
@@ -277,8 +279,8 @@ set_ivor:
        ori     r6, r6, swapper_pg_dir@l
        lis     r5, abatron_pteptrs@h
        ori     r5, r5, abatron_pteptrs@l
-       lis     r4, KERNELBASE@h
-       ori     r4, r4, KERNELBASE@l
+       lis     r3, kernstart_virt_addr@ha
+       lwz     r4, kernstart_virt_addr@l(r3)
        stw     r5, 0(r4)       /* Save abatron_pteptrs at a fixed location */
        stw     r6, 0(r5)
 
@@ -1067,7 +1069,12 @@ __secondary_start:
        mr      r5,r25          /* phys kernel start */
        rlwinm  r5,r5,0,~0x3ffffff      /* aligned 64M */
        subf    r4,r5,r4        /* memstart_addr - phys kernel start */
-       li      r5,0            /* no device tree */
+       lis     r7,KERNELBASE@h
+       ori     r7,r7,KERNELBASE@l
+       cmpw    r20,r7          /* if kernstart_virt_addr != KERNELBASE, randomized */
+       beq     2f
+       li      r4,0
+2:     li      r5,0            /* no device tree */
        li      r6,0            /* not boot cpu */
        bl      restore_to_as0
 
@@ -1114,6 +1121,54 @@ __secondary_hold_acknowledge:
        .long   -1
 #endif
 
+/*
+ * Create a 64M tlb by address and entry
+ * r3 - entry
+ * r4 - virtual address
+ * r5/r6 - physical address
+ */
+_GLOBAL(create_kaslr_tlb_entry)
+       lis     r7,0x1000               /* Set MAS0(TLBSEL) = 1 */
+       rlwimi  r7,r3,16,4,15           /* Setup MAS0 = TLBSEL | ESEL(r6) */
+       mtspr   SPRN_MAS0,r7            /* Write MAS0 */
+
+       lis     r3,(MAS1_VALID|MAS1_IPROT)@h
+       ori     r3,r3,(MAS1_TSIZE(BOOK3E_PAGESZ_64M))@l
+       mtspr   SPRN_MAS1,r3            /* Write MAS1 */
+
+       lis     r3,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@h
+       ori     r3,r3,MAS2_EPN_MASK(BOOK3E_PAGESZ_64M)@l
+       and     r3,r3,r4
+       ori     r3,r3,MAS2_M_IF_NEEDED@l
+       mtspr   SPRN_MAS2,r3            /* Write MAS2(EPN) */
+
+#ifdef CONFIG_PHYS_64BIT
+       ori     r8,r6,(MAS3_SW|MAS3_SR|MAS3_SX)
+       mtspr   SPRN_MAS3,r8            /* Write MAS3(RPN) */
+       mtspr   SPRN_MAS7,r5
+#else
+       ori     r8,r5,(MAS3_SW|MAS3_SR|MAS3_SX)
+       mtspr   SPRN_MAS3,r8            /* Write MAS3(RPN) */
+#endif
+
+       tlbwe                           /* Write TLB */
+       isync
+       sync
+       blr
+
+/*
+ * Return to the start of the relocated kernel and run again
+ * r3 - virtual address of fdt
+ * r4 - entry of the kernel
+ */
+_GLOBAL(reloc_kernel_entry)
+       mfmsr   r7
+       rlwinm  r7, r7, 0, ~(MSR_IS | MSR_DS)
+
+       mtspr   SPRN_SRR0,r4
+       mtspr   SPRN_SRR1,r7
+       rfi
+
 /*
  * Create a tlb entry with the same effective and physical address as
  * the tlb entry used by the current running code. But set the TS to 1.
index 1007ec3..58ce3d3 100644 (file)
@@ -126,6 +126,49 @@ int arch_bp_generic_fields(int type, int *gen_bp_type)
        return 0;
 }
 
+/*
+ * Watchpoint match range is always doubleword(8 bytes) aligned on
+ * powerpc. If the given range is crossing doubleword boundary, we
+ * need to increase the length such that next doubleword also get
+ * covered. Ex,
+ *
+ *          address   len = 6 bytes
+ *                |=========.
+ *   |------------v--|------v--------|
+ *   | | | | | | | | | | | | | | | | |
+ *   |---------------|---------------|
+ *    <---8 bytes--->
+ *
+ * In this case, we should configure hw as:
+ *   start_addr = address & ~HW_BREAKPOINT_ALIGN
+ *   len = 16 bytes
+ *
+ * @start_addr and @end_addr are inclusive.
+ */
+static int hw_breakpoint_validate_len(struct arch_hw_breakpoint *hw)
+{
+       u16 max_len = DABR_MAX_LEN;
+       u16 hw_len;
+       unsigned long start_addr, end_addr;
+
+       start_addr = hw->address & ~HW_BREAKPOINT_ALIGN;
+       end_addr = (hw->address + hw->len - 1) | HW_BREAKPOINT_ALIGN;
+       hw_len = end_addr - start_addr + 1;
+
+       if (dawr_enabled()) {
+               max_len = DAWR_MAX_LEN;
+               /* DAWR region can't cross 512 bytes boundary */
+               if ((start_addr >> 9) != (end_addr >> 9))
+                       return -EINVAL;
+       }
+
+       if (hw_len > max_len)
+               return -EINVAL;
+
+       hw->hw_len = hw_len;
+       return 0;
+}
+
 /*
  * Validate the arch-specific HW Breakpoint register settings
  */
@@ -133,9 +176,9 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
                             const struct perf_event_attr *attr,
                             struct arch_hw_breakpoint *hw)
 {
-       int ret = -EINVAL, length_max;
+       int ret = -EINVAL;
 
-       if (!bp)
+       if (!bp || !attr->bp_len)
                return ret;
 
        hw->type = HW_BRK_TYPE_TRANSLATE;
@@ -155,26 +198,10 @@ int hw_breakpoint_arch_parse(struct perf_event *bp,
        hw->address = attr->bp_addr;
        hw->len = attr->bp_len;
 
-       /*
-        * Since breakpoint length can be a maximum of HW_BREAKPOINT_LEN(8)
-        * and breakpoint addresses are aligned to nearest double-word
-        * HW_BREAKPOINT_ALIGN by rounding off to the lower address, the
-        * 'symbolsize' should satisfy the check below.
-        */
        if (!ppc_breakpoint_available())
                return -ENODEV;
-       length_max = 8; /* DABR */
-       if (dawr_enabled()) {
-               length_max = 512 ; /* 64 doublewords */
-               /* DAWR region can't cross 512 boundary */
-               if ((attr->bp_addr >> 9) !=
-                   ((attr->bp_addr + attr->bp_len - 1) >> 9))
-                       return -EINVAL;
-       }
-       if (hw->len >
-           (length_max - (hw->address & HW_BREAKPOINT_ALIGN)))
-               return -EINVAL;
-       return 0;
+
+       return hw_breakpoint_validate_len(hw);
 }
 
 /*
@@ -195,33 +222,49 @@ void thread_change_pc(struct task_struct *tsk, struct pt_regs *regs)
        tsk->thread.last_hit_ubp = NULL;
 }
 
-static bool is_larx_stcx_instr(struct pt_regs *regs, unsigned int instr)
+static bool dar_within_range(unsigned long dar, struct arch_hw_breakpoint *info)
 {
-       int ret, type;
-       struct instruction_op op;
+       return ((info->address <= dar) && (dar - info->address < info->len));
+}
 
-       ret = analyse_instr(&op, regs, instr);
-       type = GETTYPE(op.type);
-       return (!ret && (type == LARX || type == STCX));
+static bool
+dar_range_overlaps(unsigned long dar, int size, struct arch_hw_breakpoint *info)
+{
+       return ((dar <= info->address + info->len - 1) &&
+               (dar + size - 1 >= info->address));
 }
 
 /*
  * Handle debug exception notifications.
  */
 static bool stepping_handler(struct pt_regs *regs, struct perf_event *bp,
-                            unsigned long addr)
+                            struct arch_hw_breakpoint *info)
 {
        unsigned int instr = 0;
+       int ret, type, size;
+       struct instruction_op op;
+       unsigned long addr = info->address;
 
        if (__get_user_inatomic(instr, (unsigned int *)regs->nip))
                goto fail;
 
-       if (is_larx_stcx_instr(regs, instr)) {
+       ret = analyse_instr(&op, regs, instr);
+       type = GETTYPE(op.type);
+       size = GETSIZE(op.type);
+
+       if (!ret && (type == LARX || type == STCX)) {
                printk_ratelimited("Breakpoint hit on instruction that can't be emulated."
                                   " Breakpoint at 0x%lx will be disabled.\n", addr);
                goto disable;
        }
 
+       /*
+        * If it's extraneous event, we still need to emulate/single-
+        * step the instruction, but we don't generate an event.
+        */
+       if (size && !dar_range_overlaps(regs->dar, size, info))
+               info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+
        /* Do not emulate user-space instructions, instead single-step them */
        if (user_mode(regs)) {
                current->thread.last_hit_ubp = bp;
@@ -253,7 +296,6 @@ int hw_breakpoint_handler(struct die_args *args)
        struct perf_event *bp;
        struct pt_regs *regs = args->regs;
        struct arch_hw_breakpoint *info;
-       unsigned long dar = regs->dar;
 
        /* Disable breakpoints during exception handling */
        hw_breakpoint_disable();
@@ -285,19 +327,14 @@ int hw_breakpoint_handler(struct die_args *args)
                goto out;
        }
 
-       /*
-        * Verify if dar lies within the address range occupied by the symbol
-        * being watched to filter extraneous exceptions.  If it doesn't,
-        * we still need to single-step the instruction, but we don't
-        * generate an event.
-        */
        info->type &= ~HW_BRK_TYPE_EXTRANEOUS_IRQ;
-       if (!((bp->attr.bp_addr <= dar) &&
-             (dar - bp->attr.bp_addr < bp->attr.bp_len)))
-               info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
-
-       if (!IS_ENABLED(CONFIG_PPC_8xx) && !stepping_handler(regs, bp, info->address))
-               goto out;
+       if (IS_ENABLED(CONFIG_PPC_8xx)) {
+               if (!dar_within_range(regs->dar, info))
+                       info->type |= HW_BRK_TYPE_EXTRANEOUS_IRQ;
+       } else {
+               if (!stepping_handler(regs, bp, info))
+                       goto out;
+       }
 
        /*
         * As a policy, the callback is invoked in a 'trigger-after-execute'
diff --git a/arch/powerpc/kernel/ima_arch.c b/arch/powerpc/kernel/ima_arch.c
new file mode 100644 (file)
index 0000000..e341162
--- /dev/null
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ */
+
+#include <linux/ima.h>
+#include <asm/secure_boot.h>
+
+bool arch_ima_get_secureboot(void)
+{
+       return is_ppc_secureboot_enabled();
+}
+
+/*
+ * The "secure_rules" are enabled only on "secureboot" enabled systems.
+ * These rules verify the file signatures against known good values.
+ * The "appraise_type=imasig|modsig" option allows the known good signature
+ * to be stored as an xattr or as an appended signature.
+ *
+ * To avoid duplicate signature verification as much as possible, the IMA
+ * policy rule for module appraisal is added only if CONFIG_MODULE_SIG_FORCE
+ * is not enabled.
+ */
+static const char *const secure_rules[] = {
+       "appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
+#ifndef CONFIG_MODULE_SIG_FORCE
+       "appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
+#endif
+       NULL
+};
+
+/*
+ * The "trusted_rules" are enabled only on "trustedboot" enabled systems.
+ * These rules add the kexec kernel image and kernel modules file hashes to
+ * the IMA measurement list.
+ */
+static const char *const trusted_rules[] = {
+       "measure func=KEXEC_KERNEL_CHECK",
+       "measure func=MODULE_CHECK",
+       NULL
+};
+
+/*
+ * The "secure_and_trusted_rules" contains rules for both the secure boot and
+ * trusted boot. The "template=ima-modsig" option includes the appended
+ * signature, when available, in the IMA measurement list.
+ */
+static const char *const secure_and_trusted_rules[] = {
+       "measure func=KEXEC_KERNEL_CHECK template=ima-modsig",
+       "measure func=MODULE_CHECK template=ima-modsig",
+       "appraise func=KEXEC_KERNEL_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
+#ifndef CONFIG_MODULE_SIG_FORCE
+       "appraise func=MODULE_CHECK appraise_flag=check_blacklist appraise_type=imasig|modsig",
+#endif
+       NULL
+};
+
+/*
+ * Returns the relevant IMA arch-specific policies based on the system secure
+ * boot state.
+ */
+const char *const *arch_get_ima_policy(void)
+{
+       if (is_ppc_secureboot_enabled()) {
+               if (IS_ENABLED(CONFIG_MODULE_SIG))
+                       set_module_sig_enforced();
+
+               if (is_ppc_trustedboot_enabled())
+                       return secure_and_trusted_rules;
+               else
+                       return secure_rules;
+       } else if (is_ppc_trustedboot_enabled()) {
+               return trusted_rules;
+       }
+
+       return NULL;
+}
diff --git a/arch/powerpc/kernel/ima_kexec.c b/arch/powerpc/kernel/ima_kexec.c
deleted file mode 100644 (file)
index 720e50e..0000000
+++ /dev/null
@@ -1,219 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (C) 2016 IBM Corporation
- *
- * Authors:
- * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>
- */
-
-#include <linux/slab.h>
-#include <linux/kexec.h>
-#include <linux/of.h>
-#include <linux/memblock.h>
-#include <linux/libfdt.h>
-
-static int get_addr_size_cells(int *addr_cells, int *size_cells)
-{
-       struct device_node *root;
-
-       root = of_find_node_by_path("/");
-       if (!root)
-               return -EINVAL;
-
-       *addr_cells = of_n_addr_cells(root);
-       *size_cells = of_n_size_cells(root);
-
-       of_node_put(root);
-
-       return 0;
-}
-
-static int do_get_kexec_buffer(const void *prop, int len, unsigned long *addr,
-                              size_t *size)
-{
-       int ret, addr_cells, size_cells;
-
-       ret = get_addr_size_cells(&addr_cells, &size_cells);
-       if (ret)
-               return ret;
-
-       if (len < 4 * (addr_cells + size_cells))
-               return -ENOENT;
-
-       *addr = of_read_number(prop, addr_cells);
-       *size = of_read_number(prop + 4 * addr_cells, size_cells);
-
-       return 0;
-}
-
-/**
- * ima_get_kexec_buffer - get IMA buffer from the previous kernel
- * @addr:      On successful return, set to point to the buffer contents.
- * @size:      On successful return, set to the buffer size.
- *
- * Return: 0 on success, negative errno on error.
- */
-int ima_get_kexec_buffer(void **addr, size_t *size)
-{
-       int ret, len;
-       unsigned long tmp_addr;
-       size_t tmp_size;
-       const void *prop;
-
-       prop = of_get_property(of_chosen, "linux,ima-kexec-buffer", &len);
-       if (!prop)
-               return -ENOENT;
-
-       ret = do_get_kexec_buffer(prop, len, &tmp_addr, &tmp_size);
-       if (ret)
-               return ret;
-
-       *addr = __va(tmp_addr);
-       *size = tmp_size;
-
-       return 0;
-}
-
-/**
- * ima_free_kexec_buffer - free memory used by the IMA buffer
- */
-int ima_free_kexec_buffer(void)
-{
-       int ret;
-       unsigned long addr;
-       size_t size;
-       struct property *prop;
-
-       prop = of_find_property(of_chosen, "linux,ima-kexec-buffer", NULL);
-       if (!prop)
-               return -ENOENT;
-
-       ret = do_get_kexec_buffer(prop->value, prop->length, &addr, &size);
-       if (ret)
-               return ret;
-
-       ret = of_remove_property(of_chosen, prop);
-       if (ret)
-               return ret;
-
-       return memblock_free(addr, size);
-
-}
-
-/**
- * remove_ima_buffer - remove the IMA buffer property and reservation from @fdt
- *
- * The IMA measurement buffer is of no use to a subsequent kernel, so we always
- * remove it from the device tree.
- */
-void remove_ima_buffer(void *fdt, int chosen_node)
-{
-       int ret, len;
-       unsigned long addr;
-       size_t size;
-       const void *prop;
-
-       prop = fdt_getprop(fdt, chosen_node, "linux,ima-kexec-buffer", &len);
-       if (!prop)
-               return;
-
-       ret = do_get_kexec_buffer(prop, len, &addr, &size);
-       fdt_delprop(fdt, chosen_node, "linux,ima-kexec-buffer");
-       if (ret)
-               return;
-
-       ret = delete_fdt_mem_rsv(fdt, addr, size);
-       if (!ret)
-               pr_debug("Removed old IMA buffer reservation.\n");
-}
-
-#ifdef CONFIG_IMA_KEXEC
-/**
- * arch_ima_add_kexec_buffer - do arch-specific steps to add the IMA buffer
- *
- * Architectures should use this function to pass on the IMA buffer
- * information to the next kernel.
- *
- * Return: 0 on success, negative errno on error.
- */
-int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr,
-                             size_t size)
-{
-       image->arch.ima_buffer_addr = load_addr;
-       image->arch.ima_buffer_size = size;
-
-       return 0;
-}
-
-static int write_number(void *p, u64 value, int cells)
-{
-       if (cells == 1) {
-               u32 tmp;
-
-               if (value > U32_MAX)
-                       return -EINVAL;
-
-               tmp = cpu_to_be32(value);
-               memcpy(p, &tmp, sizeof(tmp));
-       } else if (cells == 2) {
-               u64 tmp;
-
-               tmp = cpu_to_be64(value);
-               memcpy(p, &tmp, sizeof(tmp));
-       } else
-               return -EINVAL;
-
-       return 0;
-}
-
-/**
- * setup_ima_buffer - add IMA buffer information to the fdt
- * @image:             kexec image being loaded.
- * @fdt:               Flattened device tree for the next kernel.
- * @chosen_node:       Offset to the chosen node.
- *
- * Return: 0 on success, or negative errno on error.
- */
-int setup_ima_buffer(const struct kimage *image, void *fdt, int chosen_node)
-{
-       int ret, addr_cells, size_cells, entry_size;
-       u8 value[16];
-
-       remove_ima_buffer(fdt, chosen_node);
-       if (!image->arch.ima_buffer_size)
-               return 0;
-
-       ret = get_addr_size_cells(&addr_cells, &size_cells);
-       if (ret)
-               return ret;
-
-       entry_size = 4 * (addr_cells + size_cells);
-
-       if (entry_size > sizeof(value))
-               return -EINVAL;
-
-       ret = write_number(value, image->arch.ima_buffer_addr, addr_cells);
-       if (ret)
-               return ret;
-
-       ret = write_number(value + 4 * addr_cells, image->arch.ima_buffer_size,
-                          size_cells);
-       if (ret)
-               return ret;
-
-       ret = fdt_setprop(fdt, chosen_node, "linux,ima-kexec-buffer", value,
-                         entry_size);
-       if (ret < 0)
-               return -EINVAL;
-
-       ret = fdt_add_mem_rsv(fdt, image->arch.ima_buffer_addr,
-                             image->arch.ima_buffer_size);
-       if (ret)
-               return -EINVAL;
-
-       pr_debug("IMA buffer at 0x%llx, size = 0x%zx\n",
-                image->arch.ima_buffer_addr, image->arch.ima_buffer_size);
-
-       return 0;
-}
-#endif /* CONFIG_IMA_KEXEC */
diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c
deleted file mode 100644 (file)
index 3072fd6..0000000
+++ /dev/null
@@ -1,125 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Load ELF vmlinux file for the kexec_file_load syscall.
- *
- * Copyright (C) 2004  Adam Litke (agl@us.ibm.com)
- * Copyright (C) 2004  IBM Corp.
- * Copyright (C) 2005  R Sharada (sharada@in.ibm.com)
- * Copyright (C) 2006  Mohan Kumar M (mohan@in.ibm.com)
- * Copyright (C) 2016  IBM Corporation
- *
- * Based on kexec-tools' kexec-elf-exec.c and kexec-elf-ppc64.c.
- * Heavily modified for the kernel by
- * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
- */
-
-#define pr_fmt(fmt)    "kexec_elf: " fmt
-
-#include <linux/elf.h>
-#include <linux/kexec.h>
-#include <linux/libfdt.h>
-#include <linux/module.h>
-#include <linux/of_fdt.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-
-static void *elf64_load(struct kimage *image, char *kernel_buf,
-                       unsigned long kernel_len, char *initrd,
-                       unsigned long initrd_len, char *cmdline,
-                       unsigned long cmdline_len)
-{
-       int ret;
-       unsigned int fdt_size;
-       unsigned long kernel_load_addr;
-       unsigned long initrd_load_addr = 0, fdt_load_addr;
-       void *fdt;
-       const void *slave_code;
-       struct elfhdr ehdr;
-       struct kexec_elf_info elf_info;
-       struct kexec_buf kbuf = { .image = image, .buf_min = 0,
-                                 .buf_max = ppc64_rma_size };
-       struct kexec_buf pbuf = { .image = image, .buf_min = 0,
-                                 .buf_max = ppc64_rma_size, .top_down = true,
-                                 .mem = KEXEC_BUF_MEM_UNKNOWN };
-
-       ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info);
-       if (ret)
-               goto out;
-
-       ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr);
-       if (ret)
-               goto out;
-
-       pr_debug("Loaded the kernel at 0x%lx\n", kernel_load_addr);
-
-       ret = kexec_load_purgatory(image, &pbuf);
-       if (ret) {
-               pr_err("Loading purgatory failed.\n");
-               goto out;
-       }
-
-       pr_debug("Loaded purgatory at 0x%lx\n", pbuf.mem);
-
-       if (initrd != NULL) {
-               kbuf.buffer = initrd;
-               kbuf.bufsz = kbuf.memsz = initrd_len;
-               kbuf.buf_align = PAGE_SIZE;
-               kbuf.top_down = false;
-               kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
-               ret = kexec_add_buffer(&kbuf);
-               if (ret)
-                       goto out;
-               initrd_load_addr = kbuf.mem;
-
-               pr_debug("Loaded initrd at 0x%lx\n", initrd_load_addr);
-       }
-
-       fdt_size = fdt_totalsize(initial_boot_params) * 2;
-       fdt = kmalloc(fdt_size, GFP_KERNEL);
-       if (!fdt) {
-               pr_err("Not enough memory for the device tree.\n");
-               ret = -ENOMEM;
-               goto out;
-       }
-       ret = fdt_open_into(initial_boot_params, fdt, fdt_size);
-       if (ret < 0) {
-               pr_err("Error setting up the new device tree.\n");
-               ret = -EINVAL;
-               goto out;
-       }
-
-       ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline);
-       if (ret)
-               goto out;
-
-       fdt_pack(fdt);
-
-       kbuf.buffer = fdt;
-       kbuf.bufsz = kbuf.memsz = fdt_size;
-       kbuf.buf_align = PAGE_SIZE;
-       kbuf.top_down = true;
-       kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
-       ret = kexec_add_buffer(&kbuf);
-       if (ret)
-               goto out;
-       fdt_load_addr = kbuf.mem;
-
-       pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr);
-
-       slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset;
-       ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr,
-                             fdt_load_addr);
-       if (ret)
-               pr_err("Error setting up the purgatory.\n");
-
-out:
-       kexec_free_elf_info(&elf_info);
-
-       /* Make kimage_file_post_load_cleanup free the fdt buffer for us. */
-       return ret ? ERR_PTR(ret) : fdt;
-}
-
-const struct kexec_file_ops kexec_elf64_ops = {
-       .probe = kexec_elf_probe,
-       .load = elf64_load,
-};
diff --git a/arch/powerpc/kernel/machine_kexec.c b/arch/powerpc/kernel/machine_kexec.c
deleted file mode 100644 (file)
index c4ed328..0000000
+++ /dev/null
@@ -1,279 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Code to handle transition of Linux booting another kernel.
- *
- * Copyright (C) 2002-2003 Eric Biederman  <ebiederm@xmission.com>
- * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
- * Copyright (C) 2005 IBM Corporation.
- */
-
-#include <linux/kexec.h>
-#include <linux/reboot.h>
-#include <linux/threads.h>
-#include <linux/memblock.h>
-#include <linux/of.h>
-#include <linux/irq.h>
-#include <linux/ftrace.h>
-
-#include <asm/kdump.h>
-#include <asm/machdep.h>
-#include <asm/pgalloc.h>
-#include <asm/prom.h>
-#include <asm/sections.h>
-
-void machine_kexec_mask_interrupts(void) {
-       unsigned int i;
-       struct irq_desc *desc;
-
-       for_each_irq_desc(i, desc) {
-               struct irq_chip *chip;
-
-               chip = irq_desc_get_chip(desc);
-               if (!chip)
-                       continue;
-
-               if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
-                       chip->irq_eoi(&desc->irq_data);
-
-               if (chip->irq_mask)
-                       chip->irq_mask(&desc->irq_data);
-
-               if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
-                       chip->irq_disable(&desc->irq_data);
-       }
-}
-
-void machine_crash_shutdown(struct pt_regs *regs)
-{
-       default_machine_crash_shutdown(regs);
-}
-
-/*
- * Do what every setup is needed on image and the
- * reboot code buffer to allow us to avoid allocations
- * later.
- */
-int machine_kexec_prepare(struct kimage *image)
-{
-       if (ppc_md.machine_kexec_prepare)
-               return ppc_md.machine_kexec_prepare(image);
-       else
-               return default_machine_kexec_prepare(image);
-}
-
-void machine_kexec_cleanup(struct kimage *image)
-{
-}
-
-void arch_crash_save_vmcoreinfo(void)
-{
-
-#ifdef CONFIG_NEED_MULTIPLE_NODES
-       VMCOREINFO_SYMBOL(node_data);
-       VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
-#endif
-#ifndef CONFIG_NEED_MULTIPLE_NODES
-       VMCOREINFO_SYMBOL(contig_page_data);
-#endif
-#if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP)
-       VMCOREINFO_SYMBOL(vmemmap_list);
-       VMCOREINFO_SYMBOL(mmu_vmemmap_psize);
-       VMCOREINFO_SYMBOL(mmu_psize_defs);
-       VMCOREINFO_STRUCT_SIZE(vmemmap_backing);
-       VMCOREINFO_OFFSET(vmemmap_backing, list);
-       VMCOREINFO_OFFSET(vmemmap_backing, phys);
-       VMCOREINFO_OFFSET(vmemmap_backing, virt_addr);
-       VMCOREINFO_STRUCT_SIZE(mmu_psize_def);
-       VMCOREINFO_OFFSET(mmu_psize_def, shift);
-#endif
-}
-
-/*
- * Do not allocate memory (or fail in any way) in machine_kexec().
- * We are past the point of no return, committed to rebooting now.
- */
-void machine_kexec(struct kimage *image)
-{
-       int save_ftrace_enabled;
-
-       save_ftrace_enabled = __ftrace_enabled_save();
-       this_cpu_disable_ftrace();
-
-       if (ppc_md.machine_kexec)
-               ppc_md.machine_kexec(image);
-       else
-               default_machine_kexec(image);
-
-       this_cpu_enable_ftrace();
-       __ftrace_enabled_restore(save_ftrace_enabled);
-
-       /* Fall back to normal restart if we're still alive. */
-       machine_restart(NULL);
-       for(;;);
-}
-
-void __init reserve_crashkernel(void)
-{
-       unsigned long long crash_size, crash_base;
-       int ret;
-
-       /* use common parsing */
-       ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
-                       &crash_size, &crash_base);
-       if (ret == 0 && crash_size > 0) {
-               crashk_res.start = crash_base;
-               crashk_res.end = crash_base + crash_size - 1;
-       }
-
-       if (crashk_res.end == crashk_res.start) {
-               crashk_res.start = crashk_res.end = 0;
-               return;
-       }
-
-       /* We might have got these values via the command line or the
-        * device tree, either way sanitise them now. */
-
-       crash_size = resource_size(&crashk_res);
-
-#ifndef CONFIG_NONSTATIC_KERNEL
-       if (crashk_res.start != KDUMP_KERNELBASE)
-               printk("Crash kernel location must be 0x%x\n",
-                               KDUMP_KERNELBASE);
-
-       crashk_res.start = KDUMP_KERNELBASE;
-#else
-       if (!crashk_res.start) {
-#ifdef CONFIG_PPC64
-               /*
-                * On 64bit we split the RMO in half but cap it at half of
-                * a small SLB (128MB) since the crash kernel needs to place
-                * itself and some stacks to be in the first segment.
-                */
-               crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2));
-#else
-               crashk_res.start = KDUMP_KERNELBASE;
-#endif
-       }
-
-       crash_base = PAGE_ALIGN(crashk_res.start);
-       if (crash_base != crashk_res.start) {
-               printk("Crash kernel base must be aligned to 0x%lx\n",
-                               PAGE_SIZE);
-               crashk_res.start = crash_base;
-       }
-
-#endif
-       crash_size = PAGE_ALIGN(crash_size);
-       crashk_res.end = crashk_res.start + crash_size - 1;
-
-       /* The crash region must not overlap the current kernel */
-       if (overlaps_crashkernel(__pa(_stext), _end - _stext)) {
-               printk(KERN_WARNING
-                       "Crash kernel can not overlap current kernel\n");
-               crashk_res.start = crashk_res.end = 0;
-               return;
-       }
-
-       /* Crash kernel trumps memory limit */
-       if (memory_limit && memory_limit <= crashk_res.end) {
-               memory_limit = crashk_res.end + 1;
-               printk("Adjusted memory limit for crashkernel, now 0x%llx\n",
-                      memory_limit);
-       }
-
-       printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
-                       "for crashkernel (System RAM: %ldMB)\n",
-                       (unsigned long)(crash_size >> 20),
-                       (unsigned long)(crashk_res.start >> 20),
-                       (unsigned long)(memblock_phys_mem_size() >> 20));
-
-       if (!memblock_is_region_memory(crashk_res.start, crash_size) ||
-           memblock_reserve(crashk_res.start, crash_size)) {
-               pr_err("Failed to reserve memory for crashkernel!\n");
-               crashk_res.start = crashk_res.end = 0;
-               return;
-       }
-}
-
-int overlaps_crashkernel(unsigned long start, unsigned long size)
-{
-       return (start + size) > crashk_res.start && start <= crashk_res.end;
-}
-
-/* Values we need to export to the second kernel via the device tree. */
-static phys_addr_t kernel_end;
-static phys_addr_t crashk_base;
-static phys_addr_t crashk_size;
-static unsigned long long mem_limit;
-
-static struct property kernel_end_prop = {
-       .name = "linux,kernel-end",
-       .length = sizeof(phys_addr_t),
-       .value = &kernel_end,
-};
-
-static struct property crashk_base_prop = {
-       .name = "linux,crashkernel-base",
-       .length = sizeof(phys_addr_t),
-       .value = &crashk_base
-};
-
-static struct property crashk_size_prop = {
-       .name = "linux,crashkernel-size",
-       .length = sizeof(phys_addr_t),
-       .value = &crashk_size,
-};
-
-static struct property memory_limit_prop = {
-       .name = "linux,memory-limit",
-       .length = sizeof(unsigned long long),
-       .value = &mem_limit,
-};
-
-#define cpu_to_be_ulong        __PASTE(cpu_to_be, BITS_PER_LONG)
-
-static void __init export_crashk_values(struct device_node *node)
-{
-       /* There might be existing crash kernel properties, but we can't
-        * be sure what's in them, so remove them. */
-       of_remove_property(node, of_find_property(node,
-                               "linux,crashkernel-base", NULL));
-       of_remove_property(node, of_find_property(node,
-                               "linux,crashkernel-size", NULL));
-
-       if (crashk_res.start != 0) {
-               crashk_base = cpu_to_be_ulong(crashk_res.start),
-               of_add_property(node, &crashk_base_prop);
-               crashk_size = cpu_to_be_ulong(resource_size(&crashk_res));
-               of_add_property(node, &crashk_size_prop);
-       }
-
-       /*
-        * memory_limit is required by the kexec-tools to limit the
-        * crash regions to the actual memory used.
-        */
-       mem_limit = cpu_to_be_ulong(memory_limit);
-       of_update_property(node, &memory_limit_prop);
-}
-
-static int __init kexec_setup(void)
-{
-       struct device_node *node;
-
-       node = of_find_node_by_path("/chosen");
-       if (!node)
-               return -ENOENT;
-
-       /* remove any stale properties so ours can be found */
-       of_remove_property(node, of_find_property(node, kernel_end_prop.name, NULL));
-
-       /* information needed by userspace when using default_machine_kexec */
-       kernel_end = cpu_to_be_ulong(__pa(_end));
-       of_add_property(node, &kernel_end_prop);
-
-       export_crashk_values(node);
-
-       of_node_put(node);
-       return 0;
-}
-late_initcall(kexec_setup);
diff --git a/arch/powerpc/kernel/machine_kexec_32.c b/arch/powerpc/kernel/machine_kexec_32.c
deleted file mode 100644 (file)
index bf9f1f9..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * PPC32 code to handle Linux booting another kernel.
- *
- * Copyright (C) 2002-2003 Eric Biederman  <ebiederm@xmission.com>
- * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
- * Copyright (C) 2005 IBM Corporation.
- */
-
-#include <linux/kexec.h>
-#include <linux/mm.h>
-#include <linux/string.h>
-#include <asm/cacheflush.h>
-#include <asm/hw_irq.h>
-#include <asm/io.h>
-
-typedef void (*relocate_new_kernel_t)(
-                               unsigned long indirection_page,
-                               unsigned long reboot_code_buffer,
-                               unsigned long start_address) __noreturn;
-
-/*
- * This is a generic machine_kexec function suitable at least for
- * non-OpenFirmware embedded platforms.
- * It merely copies the image relocation code to the control page and
- * jumps to it.
- * A platform specific function may just call this one.
- */
-void default_machine_kexec(struct kimage *image)
-{
-       extern const unsigned int relocate_new_kernel_size;
-       unsigned long page_list;
-       unsigned long reboot_code_buffer, reboot_code_buffer_phys;
-       relocate_new_kernel_t rnk;
-
-       /* Interrupts aren't acceptable while we reboot */
-       local_irq_disable();
-
-       /* mask each interrupt so we are in a more sane state for the
-        * kexec kernel */
-       machine_kexec_mask_interrupts();
-
-       page_list = image->head;
-
-       /* we need both effective and real address here */
-       reboot_code_buffer =
-                       (unsigned long)page_address(image->control_code_page);
-       reboot_code_buffer_phys = virt_to_phys((void *)reboot_code_buffer);
-
-       /* copy our kernel relocation code to the control code page */
-       memcpy((void *)reboot_code_buffer, relocate_new_kernel,
-                                               relocate_new_kernel_size);
-
-       flush_icache_range(reboot_code_buffer,
-                               reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE);
-       printk(KERN_INFO "Bye!\n");
-
-       if (!IS_ENABLED(CONFIG_FSL_BOOKE) && !IS_ENABLED(CONFIG_44x))
-               relocate_new_kernel(page_list, reboot_code_buffer_phys, image->start);
-
-       /* now call it */
-       rnk = (relocate_new_kernel_t) reboot_code_buffer;
-       (*rnk)(page_list, reboot_code_buffer_phys, image->start);
-}
-
-int default_machine_kexec_prepare(struct kimage *image)
-{
-       return 0;
-}
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
deleted file mode 100644 (file)
index 04a7cba..0000000
+++ /dev/null
@@ -1,417 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * PPC64 code to handle Linux booting another kernel.
- *
- * Copyright (C) 2004-2005, IBM Corp.
- *
- * Created by: Milton D Miller II
- */
-
-
-#include <linux/kexec.h>
-#include <linux/smp.h>
-#include <linux/thread_info.h>
-#include <linux/init_task.h>
-#include <linux/errno.h>
-#include <linux/kernel.h>
-#include <linux/cpu.h>
-#include <linux/hardirq.h>
-
-#include <asm/page.h>
-#include <asm/current.h>
-#include <asm/machdep.h>
-#include <asm/cacheflush.h>
-#include <asm/firmware.h>
-#include <asm/paca.h>
-#include <asm/mmu.h>
-#include <asm/sections.h>      /* _end */
-#include <asm/prom.h>
-#include <asm/smp.h>
-#include <asm/hw_breakpoint.h>
-#include <asm/asm-prototypes.h>
-#include <asm/svm.h>
-#include <asm/ultravisor.h>
-
-int default_machine_kexec_prepare(struct kimage *image)
-{
-       int i;
-       unsigned long begin, end;       /* limits of segment */
-       unsigned long low, high;        /* limits of blocked memory range */
-       struct device_node *node;
-       const unsigned long *basep;
-       const unsigned int *sizep;
-
-       /*
-        * Since we use the kernel fault handlers and paging code to
-        * handle the virtual mode, we must make sure no destination
-        * overlaps kernel static data or bss.
-        */
-       for (i = 0; i < image->nr_segments; i++)
-               if (image->segment[i].mem < __pa(_end))
-                       return -ETXTBSY;
-
-       /* We also should not overwrite the tce tables */
-       for_each_node_by_type(node, "pci") {
-               basep = of_get_property(node, "linux,tce-base", NULL);
-               sizep = of_get_property(node, "linux,tce-size", NULL);
-               if (basep == NULL || sizep == NULL)
-                       continue;
-
-               low = *basep;
-               high = low + (*sizep);
-
-               for (i = 0; i < image->nr_segments; i++) {
-                       begin = image->segment[i].mem;
-                       end = begin + image->segment[i].memsz;
-
-                       if ((begin < high) && (end > low))
-                               return -ETXTBSY;
-               }
-       }
-
-       return 0;
-}
-
-static void copy_segments(unsigned long ind)
-{
-       unsigned long entry;
-       unsigned long *ptr;
-       void *dest;
-       void *addr;
-
-       /*
-        * We rely on kexec_load to create a lists that properly
-        * initializes these pointers before they are used.
-        * We will still crash if the list is wrong, but at least
-        * the compiler will be quiet.
-        */
-       ptr = NULL;
-       dest = NULL;
-
-       for (entry = ind; !(entry & IND_DONE); entry = *ptr++) {
-               addr = __va(entry & PAGE_MASK);
-
-               switch (entry & IND_FLAGS) {
-               case IND_DESTINATION:
-                       dest = addr;
-                       break;
-               case IND_INDIRECTION:
-                       ptr = addr;
-                       break;
-               case IND_SOURCE:
-                       copy_page(dest, addr);
-                       dest += PAGE_SIZE;
-               }
-       }
-}
-
-void kexec_copy_flush(struct kimage *image)
-{
-       long i, nr_segments = image->nr_segments;
-       struct  kexec_segment ranges[KEXEC_SEGMENT_MAX];
-
-       /* save the ranges on the stack to efficiently flush the icache */
-       memcpy(ranges, image->segment, sizeof(ranges));
-
-       /*
-        * After this call we may not use anything allocated in dynamic
-        * memory, including *image.
-        *
-        * Only globals and the stack are allowed.
-        */
-       copy_segments(image->head);
-
-       /*
-        * we need to clear the icache for all dest pages sometime,
-        * including ones that were in place on the original copy
-        */
-       for (i = 0; i < nr_segments; i++)
-               flush_icache_range((unsigned long)__va(ranges[i].mem),
-                       (unsigned long)__va(ranges[i].mem + ranges[i].memsz));
-}
-
-#ifdef CONFIG_SMP
-
-static int kexec_all_irq_disabled = 0;
-
-static void kexec_smp_down(void *arg)
-{
-       local_irq_disable();
-       hard_irq_disable();
-
-       mb(); /* make sure our irqs are disabled before we say they are */
-       get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF;
-       while(kexec_all_irq_disabled == 0)
-               cpu_relax();
-       mb(); /* make sure all irqs are disabled before this */
-       hw_breakpoint_disable();
-       /*
-        * Now every CPU has IRQs off, we can clear out any pending
-        * IPIs and be sure that no more will come in after this.
-        */
-       if (ppc_md.kexec_cpu_down)
-               ppc_md.kexec_cpu_down(0, 1);
-
-       kexec_smp_wait();
-       /* NOTREACHED */
-}
-
-static void kexec_prepare_cpus_wait(int wait_state)
-{
-       int my_cpu, i, notified=-1;
-
-       hw_breakpoint_disable();
-       my_cpu = get_cpu();
-       /* Make sure each CPU has at least made it to the state we need.
-        *
-        * FIXME: There is a (slim) chance of a problem if not all of the CPUs
-        * are correctly onlined.  If somehow we start a CPU on boot with RTAS
-        * start-cpu, but somehow that CPU doesn't write callin_cpu_map[] in
-        * time, the boot CPU will timeout.  If it does eventually execute
-        * stuff, the secondary will start up (paca_ptrs[]->cpu_start was
-        * written) and get into a peculiar state.
-        * If the platform supports smp_ops->take_timebase(), the secondary CPU
-        * will probably be spinning in there.  If not (i.e. pseries), the
-        * secondary will continue on and try to online itself/idle/etc. If it
-        * survives that, we need to find these
-        * possible-but-not-online-but-should-be CPUs and chaperone them into
-        * kexec_smp_wait().
-        */
-       for_each_online_cpu(i) {
-               if (i == my_cpu)
-                       continue;
-
-               while (paca_ptrs[i]->kexec_state < wait_state) {
-                       barrier();
-                       if (i != notified) {
-                               printk(KERN_INFO "kexec: waiting for cpu %d "
-                                      "(physical %d) to enter %i state\n",
-                                      i, paca_ptrs[i]->hw_cpu_id, wait_state);
-                               notified = i;
-                       }
-               }
-       }
-       mb();
-}
-
-/*
- * We need to make sure each present CPU is online.  The next kernel will scan
- * the device tree and assume primary threads are online and query secondary
- * threads via RTAS to online them if required.  If we don't online primary
- * threads, they will be stuck.  However, we also online secondary threads as we
- * may be using 'cede offline'.  In this case RTAS doesn't see the secondary
- * threads as offline -- and again, these CPUs will be stuck.
- *
- * So, we online all CPUs that should be running, including secondary threads.
- */
-static void wake_offline_cpus(void)
-{
-       int cpu = 0;
-
-       for_each_present_cpu(cpu) {
-               if (!cpu_online(cpu)) {
-                       printk(KERN_INFO "kexec: Waking offline cpu %d.\n",
-                              cpu);
-                       WARN_ON(cpu_up(cpu));
-               }
-       }
-}
-
-static void kexec_prepare_cpus(void)
-{
-       wake_offline_cpus();
-       smp_call_function(kexec_smp_down, NULL, /* wait */0);
-       local_irq_disable();
-       hard_irq_disable();
-
-       mb(); /* make sure IRQs are disabled before we say they are */
-       get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF;
-
-       kexec_prepare_cpus_wait(KEXEC_STATE_IRQS_OFF);
-       /* we are sure every CPU has IRQs off at this point */
-       kexec_all_irq_disabled = 1;
-
-       /*
-        * Before removing MMU mappings make sure all CPUs have entered real
-        * mode:
-        */
-       kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE);
-
-       /* after we tell the others to go down */
-       if (ppc_md.kexec_cpu_down)
-               ppc_md.kexec_cpu_down(0, 0);
-
-       put_cpu();
-}
-
-#else /* ! SMP */
-
-static void kexec_prepare_cpus(void)
-{
-       /*
-        * move the secondarys to us so that we can copy
-        * the new kernel 0-0x100 safely
-        *
-        * do this if kexec in setup.c ?
-        *
-        * We need to release the cpus if we are ever going from an
-        * UP to an SMP kernel.
-        */
-       smp_release_cpus();
-       if (ppc_md.kexec_cpu_down)
-               ppc_md.kexec_cpu_down(0, 0);
-       local_irq_disable();
-       hard_irq_disable();
-}
-
-#endif /* SMP */
-
-/*
- * kexec thread structure and stack.
- *
- * We need to make sure that this is 16384-byte aligned due to the
- * way process stacks are handled.  It also must be statically allocated
- * or allocated as part of the kimage, because everything else may be
- * overwritten when we copy the kexec image.  We piggyback on the
- * "init_task" linker section here to statically allocate a stack.
- *
- * We could use a smaller stack if we don't care about anything using
- * current, but that audit has not been performed.
- */
-static union thread_union kexec_stack __init_task_data =
-       { };
-
-/*
- * For similar reasons to the stack above, the kexecing CPU needs to be on a
- * static PACA; we switch to kexec_paca.
- */
-struct paca_struct kexec_paca;
-
-/* Our assembly helper, in misc_64.S */
-extern void kexec_sequence(void *newstack, unsigned long start,
-                          void *image, void *control,
-                          void (*clear_all)(void),
-                          bool copy_with_mmu_off) __noreturn;
-
-/* too late to fail here */
-void default_machine_kexec(struct kimage *image)
-{
-       bool copy_with_mmu_off;
-
-       /* prepare control code if any */
-
-       /*
-        * If the kexec boot is the normal one, need to shutdown other cpus
-        * into our wait loop and quiesce interrupts.
-        * Otherwise, in the case of crashed mode (crashing_cpu >= 0),
-        * stopping other CPUs and collecting their pt_regs is done before
-        * using debugger IPI.
-        */
-
-       if (!kdump_in_progress())
-               kexec_prepare_cpus();
-
-       printk("kexec: Starting switchover sequence.\n");
-
-       /* switch to a staticly allocated stack.  Based on irq stack code.
-        * We setup preempt_count to avoid using VMX in memcpy.
-        * XXX: the task struct will likely be invalid once we do the copy!
-        */
-       current_thread_info()->flags = 0;
-       current_thread_info()->preempt_count = HARDIRQ_OFFSET;
-
-       /* We need a static PACA, too; copy this CPU's PACA over and switch to
-        * it. Also poison per_cpu_offset and NULL lppaca to catch anyone using
-        * non-static data.
-        */
-       memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct));
-       kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL;
-#ifdef CONFIG_PPC_PSERIES
-       kexec_paca.lppaca_ptr = NULL;
-#endif
-
-       if (is_secure_guest() && !(image->preserve_context ||
-                                  image->type == KEXEC_TYPE_CRASH)) {
-               uv_unshare_all_pages();
-               printk("kexec: Unshared all shared pages.\n");
-       }
-
-       paca_ptrs[kexec_paca.paca_index] = &kexec_paca;
-
-       setup_paca(&kexec_paca);
-
-       /*
-        * The lppaca should be unregistered at this point so the HV won't
-        * touch it. In the case of a crash, none of the lppacas are
-        * unregistered so there is not much we can do about it here.
-        */
-
-       /*
-        * On Book3S, the copy must happen with the MMU off if we are either
-        * using Radix page tables or we are not in an LPAR since we can
-        * overwrite the page tables while copying.
-        *
-        * In an LPAR, we keep the MMU on otherwise we can't access beyond
-        * the RMA. On BookE there is no real MMU off mode, so we have to
-        * keep it enabled as well (but then we have bolted TLB entries).
-        */
-#ifdef CONFIG_PPC_BOOK3E
-       copy_with_mmu_off = false;
-#else
-       copy_with_mmu_off = radix_enabled() ||
-               !(firmware_has_feature(FW_FEATURE_LPAR) ||
-                 firmware_has_feature(FW_FEATURE_PS3_LV1));
-#endif
-
-       /* Some things are best done in assembly.  Finding globals with
-        * a toc is easier in C, so pass in what we can.
-        */
-       kexec_sequence(&kexec_stack, image->start, image,
-                      page_address(image->control_code_page),
-                      mmu_cleanup_all, copy_with_mmu_off);
-       /* NOTREACHED */
-}
-
-#ifdef CONFIG_PPC_BOOK3S_64
-/* Values we need to export to the second kernel via the device tree. */
-static unsigned long htab_base;
-static unsigned long htab_size;
-
-static struct property htab_base_prop = {
-       .name = "linux,htab-base",
-       .length = sizeof(unsigned long),
-       .value = &htab_base,
-};
-
-static struct property htab_size_prop = {
-       .name = "linux,htab-size",
-       .length = sizeof(unsigned long),
-       .value = &htab_size,
-};
-
-static int __init export_htab_values(void)
-{
-       struct device_node *node;
-
-       /* On machines with no htab htab_address is NULL */
-       if (!htab_address)
-               return -ENODEV;
-
-       node = of_find_node_by_path("/chosen");
-       if (!node)
-               return -ENODEV;
-
-       /* remove any stale propertys so ours can be found */
-       of_remove_property(node, of_find_property(node, htab_base_prop.name, NULL));
-       of_remove_property(node, of_find_property(node, htab_size_prop.name, NULL));
-
-       htab_base = cpu_to_be64(__pa(htab_address));
-       of_add_property(node, &htab_base_prop);
-       htab_size = cpu_to_be64(htab_size_bytes);
-       of_add_property(node, &htab_size_prop);
-
-       of_node_put(node);
-       return 0;
-}
-late_initcall(export_htab_values);
-#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c
deleted file mode 100644 (file)
index 143c917..0000000
+++ /dev/null
@@ -1,254 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * ppc64 code to implement the kexec_file_load syscall
- *
- * Copyright (C) 2004  Adam Litke (agl@us.ibm.com)
- * Copyright (C) 2004  IBM Corp.
- * Copyright (C) 2004,2005  Milton D Miller II, IBM Corporation
- * Copyright (C) 2005  R Sharada (sharada@in.ibm.com)
- * Copyright (C) 2006  Mohan Kumar M (mohan@in.ibm.com)
- * Copyright (C) 2016  IBM Corporation
- *
- * Based on kexec-tools' kexec-elf-ppc64.c, fs2dt.c.
- * Heavily modified for the kernel by
- * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
- */
-
-#include <linux/slab.h>
-#include <linux/kexec.h>
-#include <linux/of_fdt.h>
-#include <linux/libfdt.h>
-#include <asm/ima.h>
-
-#define SLAVE_CODE_SIZE                256
-
-const struct kexec_file_ops * const kexec_file_loaders[] = {
-       &kexec_elf64_ops,
-       NULL
-};
-
-int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
-                                 unsigned long buf_len)
-{
-       /* We don't support crash kernels yet. */
-       if (image->type == KEXEC_TYPE_CRASH)
-               return -EOPNOTSUPP;
-
-       return kexec_image_probe_default(image, buf, buf_len);
-}
-
-/**
- * setup_purgatory - initialize the purgatory's global variables
- * @image:             kexec image.
- * @slave_code:                Slave code for the purgatory.
- * @fdt:               Flattened device tree for the next kernel.
- * @kernel_load_addr:  Address where the kernel is loaded.
- * @fdt_load_addr:     Address where the flattened device tree is loaded.
- *
- * Return: 0 on success, or negative errno on error.
- */
-int setup_purgatory(struct kimage *image, const void *slave_code,
-                   const void *fdt, unsigned long kernel_load_addr,
-                   unsigned long fdt_load_addr)
-{
-       unsigned int *slave_code_buf, master_entry;
-       int ret;
-
-       slave_code_buf = kmalloc(SLAVE_CODE_SIZE, GFP_KERNEL);
-       if (!slave_code_buf)
-               return -ENOMEM;
-
-       /* Get the slave code from the new kernel and put it in purgatory. */
-       ret = kexec_purgatory_get_set_symbol(image, "purgatory_start",
-                                            slave_code_buf, SLAVE_CODE_SIZE,
-                                            true);
-       if (ret) {
-               kfree(slave_code_buf);
-               return ret;
-       }
-
-       master_entry = slave_code_buf[0];
-       memcpy(slave_code_buf, slave_code, SLAVE_CODE_SIZE);
-       slave_code_buf[0] = master_entry;
-       ret = kexec_purgatory_get_set_symbol(image, "purgatory_start",
-                                            slave_code_buf, SLAVE_CODE_SIZE,
-                                            false);
-       kfree(slave_code_buf);
-
-       ret = kexec_purgatory_get_set_symbol(image, "kernel", &kernel_load_addr,
-                                            sizeof(kernel_load_addr), false);
-       if (ret)
-               return ret;
-       ret = kexec_purgatory_get_set_symbol(image, "dt_offset", &fdt_load_addr,
-                                            sizeof(fdt_load_addr), false);
-       if (ret)
-               return ret;
-
-       return 0;
-}
-
-/**
- * delete_fdt_mem_rsv - delete memory reservation with given address and size
- *
- * Return: 0 on success, or negative errno on error.
- */
-int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size)
-{
-       int i, ret, num_rsvs = fdt_num_mem_rsv(fdt);
-
-       for (i = 0; i < num_rsvs; i++) {
-               uint64_t rsv_start, rsv_size;
-
-               ret = fdt_get_mem_rsv(fdt, i, &rsv_start, &rsv_size);
-               if (ret) {
-                       pr_err("Malformed device tree.\n");
-                       return -EINVAL;
-               }
-
-               if (rsv_start == start && rsv_size == size) {
-                       ret = fdt_del_mem_rsv(fdt, i);
-                       if (ret) {
-                               pr_err("Error deleting device tree reservation.\n");
-                               return -EINVAL;
-                       }
-
-                       return 0;
-               }
-       }
-
-       return -ENOENT;
-}
-
-/*
- * setup_new_fdt - modify /chosen and memory reservation for the next kernel
- * @image:             kexec image being loaded.
- * @fdt:               Flattened device tree for the next kernel.
- * @initrd_load_addr:  Address where the next initrd will be loaded.
- * @initrd_len:                Size of the next initrd, or 0 if there will be none.
- * @cmdline:           Command line for the next kernel, or NULL if there will
- *                     be none.
- *
- * Return: 0 on success, or negative errno on error.
- */
-int setup_new_fdt(const struct kimage *image, void *fdt,
-                 unsigned long initrd_load_addr, unsigned long initrd_len,
-                 const char *cmdline)
-{
-       int ret, chosen_node;
-       const void *prop;
-
-       /* Remove memory reservation for the current device tree. */
-       ret = delete_fdt_mem_rsv(fdt, __pa(initial_boot_params),
-                                fdt_totalsize(initial_boot_params));
-       if (ret == 0)
-               pr_debug("Removed old device tree reservation.\n");
-       else if (ret != -ENOENT)
-               return ret;
-
-       chosen_node = fdt_path_offset(fdt, "/chosen");
-       if (chosen_node == -FDT_ERR_NOTFOUND) {
-               chosen_node = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"),
-                                             "chosen");
-               if (chosen_node < 0) {
-                       pr_err("Error creating /chosen.\n");
-                       return -EINVAL;
-               }
-       } else if (chosen_node < 0) {
-               pr_err("Malformed device tree: error reading /chosen.\n");
-               return -EINVAL;
-       }
-
-       /* Did we boot using an initrd? */
-       prop = fdt_getprop(fdt, chosen_node, "linux,initrd-start", NULL);
-       if (prop) {
-               uint64_t tmp_start, tmp_end, tmp_size;
-
-               tmp_start = fdt64_to_cpu(*((const fdt64_t *) prop));
-
-               prop = fdt_getprop(fdt, chosen_node, "linux,initrd-end", NULL);
-               if (!prop) {
-                       pr_err("Malformed device tree.\n");
-                       return -EINVAL;
-               }
-               tmp_end = fdt64_to_cpu(*((const fdt64_t *) prop));
-
-               /*
-                * kexec reserves exact initrd size, while firmware may
-                * reserve a multiple of PAGE_SIZE, so check for both.
-                */
-               tmp_size = tmp_end - tmp_start;
-               ret = delete_fdt_mem_rsv(fdt, tmp_start, tmp_size);
-               if (ret == -ENOENT)
-                       ret = delete_fdt_mem_rsv(fdt, tmp_start,
-                                                round_up(tmp_size, PAGE_SIZE));
-               if (ret == 0)
-                       pr_debug("Removed old initrd reservation.\n");
-               else if (ret != -ENOENT)
-                       return ret;
-
-               /* If there's no new initrd, delete the old initrd's info. */
-               if (initrd_len == 0) {
-                       ret = fdt_delprop(fdt, chosen_node,
-                                         "linux,initrd-start");
-                       if (ret) {
-                               pr_err("Error deleting linux,initrd-start.\n");
-                               return -EINVAL;
-                       }
-
-                       ret = fdt_delprop(fdt, chosen_node, "linux,initrd-end");
-                       if (ret) {
-                               pr_err("Error deleting linux,initrd-end.\n");
-                               return -EINVAL;
-                       }
-               }
-       }
-
-       if (initrd_len) {
-               ret = fdt_setprop_u64(fdt, chosen_node,
-                                     "linux,initrd-start",
-                                     initrd_load_addr);
-               if (ret < 0)
-                       goto err;
-
-               /* initrd-end is the first address after the initrd image. */
-               ret = fdt_setprop_u64(fdt, chosen_node, "linux,initrd-end",
-                                     initrd_load_addr + initrd_len);
-               if (ret < 0)
-                       goto err;
-
-               ret = fdt_add_mem_rsv(fdt, initrd_load_addr, initrd_len);
-               if (ret) {
-                       pr_err("Error reserving initrd memory: %s\n",
-                              fdt_strerror(ret));
-                       return -EINVAL;
-               }
-       }
-
-       if (cmdline != NULL) {
-               ret = fdt_setprop_string(fdt, chosen_node, "bootargs", cmdline);
-               if (ret < 0)
-                       goto err;
-       } else {
-               ret = fdt_delprop(fdt, chosen_node, "bootargs");
-               if (ret && ret != -FDT_ERR_NOTFOUND) {
-                       pr_err("Error deleting bootargs.\n");
-                       return -EINVAL;
-               }
-       }
-
-       ret = setup_ima_buffer(image, fdt, chosen_node);
-       if (ret) {
-               pr_err("Error setting up the new device tree.\n");
-               return ret;
-       }
-
-       ret = fdt_setprop(fdt, chosen_node, "linux,booted-from-kexec", NULL, 0);
-       if (ret)
-               goto err;
-
-       return 0;
-
-err:
-       pr_err("Error setting up the new device tree.\n");
-       return -EINVAL;
-}
index 82df4b0..d80212b 100644 (file)
@@ -6,11 +6,6 @@
  * Largely rewritten by Cort Dougan (cort@cs.nmt.edu)
  * and Paul Mackerras.
  *
- * kexec bits:
- * Copyright (C) 2002-2003 Eric Biederman  <ebiederm@xmission.com>
- * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
- * PPC44x port. Copyright (C) 2011,  IBM Corporation
- *             Author: Suzuki Poulose <suzuki@in.ibm.com>
  */
 
 #include <linux/sys.h>
@@ -25,7 +20,6 @@
 #include <asm/thread_info.h>
 #include <asm/asm-offsets.h>
 #include <asm/processor.h>
-#include <asm/kexec.h>
 #include <asm/bug.h>
 #include <asm/ptrace.h>
 #include <asm/export.h>
@@ -316,126 +310,6 @@ _GLOBAL(flush_instruction_cache)
 EXPORT_SYMBOL(flush_instruction_cache)
 #endif /* CONFIG_PPC_8xx */
 
-/*
- * Write any modified data cache blocks out to memory
- * and invalidate the corresponding instruction cache blocks.
- * This is a no-op on the 601.
- *
- * flush_icache_range(unsigned long start, unsigned long stop)
- */
-_GLOBAL(flush_icache_range)
-#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200)
-       PURGE_PREFETCHED_INS
-       blr                             /* for 601 and e200, do nothing */
-#else
-       rlwinm  r3,r3,0,0,31 - L1_CACHE_SHIFT
-       subf    r4,r3,r4
-       addi    r4,r4,L1_CACHE_BYTES - 1
-       srwi.   r4,r4,L1_CACHE_SHIFT
-       beqlr
-       mtctr   r4
-       mr      r6,r3
-1:     dcbst   0,r3
-       addi    r3,r3,L1_CACHE_BYTES
-       bdnz    1b
-       sync                            /* wait for dcbst's to get to ram */
-#ifndef CONFIG_44x
-       mtctr   r4
-2:     icbi    0,r6
-       addi    r6,r6,L1_CACHE_BYTES
-       bdnz    2b
-#else
-       /* Flash invalidate on 44x because we are passed kmapped addresses and
-          this doesn't work for userspace pages due to the virtually tagged
-          icache.  Sigh. */
-       iccci   0, r0
-#endif
-       sync                            /* additional sync needed on g4 */
-       isync
-       blr
-#endif
-_ASM_NOKPROBE_SYMBOL(flush_icache_range)
-EXPORT_SYMBOL(flush_icache_range)
-
-/*
- * Flush a particular page from the data cache to RAM.
- * Note: this is necessary because the instruction cache does *not*
- * snoop from the data cache.
- * This is a no-op on the 601 and e200 which have a unified cache.
- *
- *     void __flush_dcache_icache(void *page)
- */
-_GLOBAL(__flush_dcache_icache)
-#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200)
-       PURGE_PREFETCHED_INS
-       blr
-#else
-       rlwinm  r3,r3,0,0,31-PAGE_SHIFT         /* Get page base address */
-       li      r4,PAGE_SIZE/L1_CACHE_BYTES     /* Number of lines in a page */
-       mtctr   r4
-       mr      r6,r3
-0:     dcbst   0,r3                            /* Write line to ram */
-       addi    r3,r3,L1_CACHE_BYTES
-       bdnz    0b
-       sync
-#ifdef CONFIG_44x
-       /* We don't flush the icache on 44x. Those have a virtual icache
-        * and we don't have access to the virtual address here (it's
-        * not the page vaddr but where it's mapped in user space). The
-        * flushing of the icache on these is handled elsewhere, when
-        * a change in the address space occurs, before returning to
-        * user space
-        */
-BEGIN_MMU_FTR_SECTION
-       blr
-END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_44x)
-#endif /* CONFIG_44x */
-       mtctr   r4
-1:     icbi    0,r6
-       addi    r6,r6,L1_CACHE_BYTES
-       bdnz    1b
-       sync
-       isync
-       blr
-#endif
-
-#ifndef CONFIG_BOOKE
-/*
- * Flush a particular page from the data cache to RAM, identified
- * by its physical address.  We turn off the MMU so we can just use
- * the physical address (this may be a highmem page without a kernel
- * mapping).
- *
- *     void __flush_dcache_icache_phys(unsigned long physaddr)
- */
-_GLOBAL(__flush_dcache_icache_phys)
-#if defined(CONFIG_PPC_BOOK3S_601) || defined(CONFIG_E200)
-       PURGE_PREFETCHED_INS
-       blr                                     /* for 601 and e200, do nothing */
-#else
-       mfmsr   r10
-       rlwinm  r0,r10,0,28,26                  /* clear DR */
-       mtmsr   r0
-       isync
-       rlwinm  r3,r3,0,0,31-PAGE_SHIFT         /* Get page base address */
-       li      r4,PAGE_SIZE/L1_CACHE_BYTES     /* Number of lines in a page */
-       mtctr   r4
-       mr      r6,r3
-0:     dcbst   0,r3                            /* Write line to ram */
-       addi    r3,r3,L1_CACHE_BYTES
-       bdnz    0b
-       sync
-       mtctr   r4
-1:     icbi    0,r6
-       addi    r6,r6,L1_CACHE_BYTES
-       bdnz    1b
-       sync
-       mtmsr   r10                             /* restore DR */
-       isync
-       blr
-#endif
-#endif /* CONFIG_BOOKE */
-
 /*
  * Copy a whole page.  We use the dcbz instruction on the destination
  * to reduce memory traffic (it eliminates the unnecessary reads of
@@ -614,488 +488,3 @@ _GLOBAL(start_secondary_resume)
  */
 _GLOBAL(__main)
        blr
-
-#ifdef CONFIG_KEXEC_CORE
-       /*
-        * Must be relocatable PIC code callable as a C function.
-        */
-       .globl relocate_new_kernel
-relocate_new_kernel:
-       /* r3 = page_list   */
-       /* r4 = reboot_code_buffer */
-       /* r5 = start_address      */
-
-#ifdef CONFIG_FSL_BOOKE
-
-       mr      r29, r3
-       mr      r30, r4
-       mr      r31, r5
-
-#define ENTRY_MAPPING_KEXEC_SETUP
-#include "fsl_booke_entry_mapping.S"
-#undef ENTRY_MAPPING_KEXEC_SETUP
-
-       mr      r3, r29
-       mr      r4, r30
-       mr      r5, r31
-
-       li      r0, 0
-#elif defined(CONFIG_44x)
-
-       /* Save our parameters */
-       mr      r29, r3
-       mr      r30, r4
-       mr      r31, r5
-
-#ifdef CONFIG_PPC_47x
-       /* Check for 47x cores */
-       mfspr   r3,SPRN_PVR
-       srwi    r3,r3,16
-       cmplwi  cr0,r3,PVR_476FPE@h
-       beq     setup_map_47x
-       cmplwi  cr0,r3,PVR_476@h
-       beq     setup_map_47x
-       cmplwi  cr0,r3,PVR_476_ISS@h
-       beq     setup_map_47x
-#endif /* CONFIG_PPC_47x */
-       
-/*
- * Code for setting up 1:1 mapping for PPC440x for KEXEC
- *
- * We cannot switch off the MMU on PPC44x.
- * So we:
- * 1) Invalidate all the mappings except the one we are running from.
- * 2) Create a tmp mapping for our code in the other address space(TS) and
- *    jump to it. Invalidate the entry we started in.
- * 3) Create a 1:1 mapping for 0-2GiB in chunks of 256M in original TS.
- * 4) Jump to the 1:1 mapping in original TS.
- * 5) Invalidate the tmp mapping.
- *
- * - Based on the kexec support code for FSL BookE
- *
- */
-
-       /* 
-        * Load the PID with kernel PID (0).
-        * Also load our MSR_IS and TID to MMUCR for TLB search.
-        */
-       li      r3, 0
-       mtspr   SPRN_PID, r3
-       mfmsr   r4
-       andi.   r4,r4,MSR_IS@l
-       beq     wmmucr
-       oris    r3,r3,PPC44x_MMUCR_STS@h
-wmmucr:
-       mtspr   SPRN_MMUCR,r3
-       sync
-
-       /*
-        * Invalidate all the TLB entries except the current entry
-        * where we are running from
-        */
-       bl      0f                              /* Find our address */
-0:     mflr    r5                              /* Make it accessible */
-       tlbsx   r23,0,r5                        /* Find entry we are in */
-       li      r4,0                            /* Start at TLB entry 0 */
-       li      r3,0                            /* Set PAGEID inval value */
-1:     cmpw    r23,r4                          /* Is this our entry? */
-       beq     skip                            /* If so, skip the inval */
-       tlbwe   r3,r4,PPC44x_TLB_PAGEID         /* If not, inval the entry */
-skip:
-       addi    r4,r4,1                         /* Increment */
-       cmpwi   r4,64                           /* Are we done? */
-       bne     1b                              /* If not, repeat */
-       isync
-
-       /* Create a temp mapping and jump to it */
-       andi.   r6, r23, 1              /* Find the index to use */
-       addi    r24, r6, 1              /* r24 will contain 1 or 2 */
-
-       mfmsr   r9                      /* get the MSR */
-       rlwinm  r5, r9, 27, 31, 31      /* Extract the MSR[IS] */
-       xori    r7, r5, 1               /* Use the other address space */
-
-       /* Read the current mapping entries */
-       tlbre   r3, r23, PPC44x_TLB_PAGEID
-       tlbre   r4, r23, PPC44x_TLB_XLAT
-       tlbre   r5, r23, PPC44x_TLB_ATTRIB
-
-       /* Save our current XLAT entry */
-       mr      r25, r4
-
-       /* Extract the TLB PageSize */
-       li      r10, 1                  /* r10 will hold PageSize */
-       rlwinm  r11, r3, 0, 24, 27      /* bits 24-27 */
-
-       /* XXX: As of now we use 256M, 4K pages */
-       cmpwi   r11, PPC44x_TLB_256M
-       bne     tlb_4k
-       rotlwi  r10, r10, 28            /* r10 = 256M */
-       b       write_out
-tlb_4k:
-       cmpwi   r11, PPC44x_TLB_4K
-       bne     default
-       rotlwi  r10, r10, 12            /* r10 = 4K */
-       b       write_out
-default:
-       rotlwi  r10, r10, 10            /* r10 = 1K */
-
-write_out:
-       /*
-        * Write out the tmp 1:1 mapping for this code in other address space
-        * Fixup  EPN = RPN , TS=other address space
-        */
-       insrwi  r3, r7, 1, 23           /* Bit 23 is TS for PAGEID field */
-
-       /* Write out the tmp mapping entries */
-       tlbwe   r3, r24, PPC44x_TLB_PAGEID
-       tlbwe   r4, r24, PPC44x_TLB_XLAT
-       tlbwe   r5, r24, PPC44x_TLB_ATTRIB
-
-       subi    r11, r10, 1             /* PageOffset Mask = PageSize - 1 */
-       not     r10, r11                /* Mask for PageNum */
-
-       /* Switch to other address space in MSR */
-       insrwi  r9, r7, 1, 26           /* Set MSR[IS] = r7 */
-
-       bl      1f
-1:     mflr    r8
-       addi    r8, r8, (2f-1b)         /* Find the target offset */
-
-       /* Jump to the tmp mapping */
-       mtspr   SPRN_SRR0, r8
-       mtspr   SPRN_SRR1, r9
-       rfi
-
-2:
-       /* Invalidate the entry we were executing from */
-       li      r3, 0
-       tlbwe   r3, r23, PPC44x_TLB_PAGEID
-
-       /* attribute fields. rwx for SUPERVISOR mode */
-       li      r5, 0
-       ori     r5, r5, (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
-
-       /* Create 1:1 mapping in 256M pages */
-       xori    r7, r7, 1                       /* Revert back to Original TS */
-
-       li      r8, 0                           /* PageNumber */
-       li      r6, 3                           /* TLB Index, start at 3  */
-
-next_tlb:
-       rotlwi  r3, r8, 28                      /* Create EPN (bits 0-3) */
-       mr      r4, r3                          /* RPN = EPN  */
-       ori     r3, r3, (PPC44x_TLB_VALID | PPC44x_TLB_256M) /* SIZE = 256M, Valid */
-       insrwi  r3, r7, 1, 23                   /* Set TS from r7 */
-
-       tlbwe   r3, r6, PPC44x_TLB_PAGEID       /* PageID field : EPN, V, SIZE */
-       tlbwe   r4, r6, PPC44x_TLB_XLAT         /* Address translation : RPN   */
-       tlbwe   r5, r6, PPC44x_TLB_ATTRIB       /* Attributes */
-
-       addi    r8, r8, 1                       /* Increment PN */
-       addi    r6, r6, 1                       /* Increment TLB Index */
-       cmpwi   r8, 8                           /* Are we done ? */
-       bne     next_tlb
-       isync
-
-       /* Jump to the new mapping 1:1 */
-       li      r9,0
-       insrwi  r9, r7, 1, 26                   /* Set MSR[IS] = r7 */
-
-       bl      1f
-1:     mflr    r8
-       and     r8, r8, r11                     /* Get our offset within page */
-       addi    r8, r8, (2f-1b)
-
-       and     r5, r25, r10                    /* Get our target PageNum */
-       or      r8, r8, r5                      /* Target jump address */
-
-       mtspr   SPRN_SRR0, r8
-       mtspr   SPRN_SRR1, r9
-       rfi
-2:
-       /* Invalidate the tmp entry we used */
-       li      r3, 0
-       tlbwe   r3, r24, PPC44x_TLB_PAGEID
-       sync
-       b       ppc44x_map_done
-
-#ifdef CONFIG_PPC_47x
-
-       /* 1:1 mapping for 47x */
-
-setup_map_47x:
-
-       /*
-        * Load the kernel pid (0) to PID and also to MMUCR[TID].
-        * Also set the MSR IS->MMUCR STS
-        */
-       li      r3, 0
-       mtspr   SPRN_PID, r3                    /* Set PID */
-       mfmsr   r4                              /* Get MSR */
-       andi.   r4, r4, MSR_IS@l                /* TS=1? */
-       beq     1f                              /* If not, leave STS=0 */
-       oris    r3, r3, PPC47x_MMUCR_STS@h      /* Set STS=1 */
-1:     mtspr   SPRN_MMUCR, r3                  /* Put MMUCR */
-       sync
-
-       /* Find the entry we are running from */
-       bl      2f
-2:     mflr    r23
-       tlbsx   r23, 0, r23
-       tlbre   r24, r23, 0                     /* TLB Word 0 */
-       tlbre   r25, r23, 1                     /* TLB Word 1 */
-       tlbre   r26, r23, 2                     /* TLB Word 2 */
-
-
-       /*
-        * Invalidates all the tlb entries by writing to 256 RPNs(r4)
-        * of 4k page size in all  4 ways (0-3 in r3).
-        * This would invalidate the entire UTLB including the one we are
-        * running from. However the shadow TLB entries would help us 
-        * to continue the execution, until we flush them (rfi/isync).
-        */
-       addis   r3, 0, 0x8000                   /* specify the way */
-       addi    r4, 0, 0                        /* TLB Word0 = (EPN=0, VALID = 0) */
-       addi    r5, 0, 0
-       b       clear_utlb_entry
-
-       /* Align the loop to speed things up. from head_44x.S */
-       .align  6
-
-clear_utlb_entry:
-
-       tlbwe   r4, r3, 0
-       tlbwe   r5, r3, 1
-       tlbwe   r5, r3, 2
-       addis   r3, r3, 0x2000                  /* Increment the way */
-       cmpwi   r3, 0
-       bne     clear_utlb_entry
-       addis   r3, 0, 0x8000
-       addis   r4, r4, 0x100                   /* Increment the EPN */
-       cmpwi   r4, 0
-       bne     clear_utlb_entry
-
-       /* Create the entries in the other address space */
-       mfmsr   r5
-       rlwinm  r7, r5, 27, 31, 31              /* Get the TS (Bit 26) from MSR */
-       xori    r7, r7, 1                       /* r7 = !TS */
-
-       insrwi  r24, r7, 1, 21                  /* Change the TS in the saved TLB word 0 */
-
-       /* 
-        * write out the TLB entries for the tmp mapping
-        * Use way '0' so that we could easily invalidate it later.
-        */
-       lis     r3, 0x8000                      /* Way '0' */ 
-
-       tlbwe   r24, r3, 0
-       tlbwe   r25, r3, 1
-       tlbwe   r26, r3, 2
-
-       /* Update the msr to the new TS */
-       insrwi  r5, r7, 1, 26
-
-       bl      1f
-1:     mflr    r6
-       addi    r6, r6, (2f-1b)
-
-       mtspr   SPRN_SRR0, r6
-       mtspr   SPRN_SRR1, r5
-       rfi
-
-       /* 
-        * Now we are in the tmp address space.
-        * Create a 1:1 mapping for 0-2GiB in the original TS.
-        */
-2:
-       li      r3, 0
-       li      r4, 0                           /* TLB Word 0 */
-       li      r5, 0                           /* TLB Word 1 */
-       li      r6, 0
-       ori     r6, r6, PPC47x_TLB2_S_RWX       /* TLB word 2 */
-
-       li      r8, 0                           /* PageIndex */
-
-       xori    r7, r7, 1                       /* revert back to original TS */
-
-write_utlb:
-       rotlwi  r5, r8, 28                      /* RPN = PageIndex * 256M */
-                                               /* ERPN = 0 as we don't use memory above 2G */
-
-       mr      r4, r5                          /* EPN = RPN */
-       ori     r4, r4, (PPC47x_TLB0_VALID | PPC47x_TLB0_256M)
-       insrwi  r4, r7, 1, 21                   /* Insert the TS to Word 0 */
-
-       tlbwe   r4, r3, 0                       /* Write out the entries */
-       tlbwe   r5, r3, 1
-       tlbwe   r6, r3, 2
-       addi    r8, r8, 1
-       cmpwi   r8, 8                           /* Have we completed ? */
-       bne     write_utlb
-
-       /* make sure we complete the TLB write up */
-       isync
-
-       /* 
-        * Prepare to jump to the 1:1 mapping.
-        * 1) Extract page size of the tmp mapping
-        *    DSIZ = TLB_Word0[22:27]
-        * 2) Calculate the physical address of the address
-        *    to jump to.
-        */
-       rlwinm  r10, r24, 0, 22, 27
-
-       cmpwi   r10, PPC47x_TLB0_4K
-       bne     0f
-       li      r10, 0x1000                     /* r10 = 4k */
-       bl      1f
-
-0:
-       /* Defaults to 256M */
-       lis     r10, 0x1000
-       
-       bl      1f
-1:     mflr    r4
-       addi    r4, r4, (2f-1b)                 /* virtual address  of 2f */
-
-       subi    r11, r10, 1                     /* offsetmask = Pagesize - 1 */
-       not     r10, r11                        /* Pagemask = ~(offsetmask) */
-
-       and     r5, r25, r10                    /* Physical page */
-       and     r6, r4, r11                     /* offset within the current page */
-
-       or      r5, r5, r6                      /* Physical address for 2f */
-
-       /* Switch the TS in MSR to the original one */
-       mfmsr   r8
-       insrwi  r8, r7, 1, 26
-
-       mtspr   SPRN_SRR1, r8
-       mtspr   SPRN_SRR0, r5
-       rfi
-
-2:
-       /* Invalidate the tmp mapping */
-       lis     r3, 0x8000                      /* Way '0' */
-
-       clrrwi  r24, r24, 12                    /* Clear the valid bit */
-       tlbwe   r24, r3, 0
-       tlbwe   r25, r3, 1
-       tlbwe   r26, r3, 2
-
-       /* Make sure we complete the TLB write and flush the shadow TLB */
-       isync
-
-#endif
-
-ppc44x_map_done:
-
-
-       /* Restore the parameters */
-       mr      r3, r29
-       mr      r4, r30
-       mr      r5, r31
-
-       li      r0, 0
-#else
-       li      r0, 0
-
-       /*
-        * Set Machine Status Register to a known status,
-        * switch the MMU off and jump to 1: in a single step.
-        */
-
-       mr      r8, r0
-       ori     r8, r8, MSR_RI|MSR_ME
-       mtspr   SPRN_SRR1, r8
-       addi    r8, r4, 1f - relocate_new_kernel
-       mtspr   SPRN_SRR0, r8
-       sync
-       rfi
-
-1:
-#endif
-       /* from this point address translation is turned off */
-       /* and interrupts are disabled */
-
-       /* set a new stack at the bottom of our page... */
-       /* (not really needed now) */
-       addi    r1, r4, KEXEC_CONTROL_PAGE_SIZE - 8 /* for LR Save+Back Chain */
-       stw     r0, 0(r1)
-
-       /* Do the copies */
-       li      r6, 0 /* checksum */
-       mr      r0, r3
-       b       1f
-
-0:     /* top, read another word for the indirection page */
-       lwzu    r0, 4(r3)
-
-1:
-       /* is it a destination page? (r8) */
-       rlwinm. r7, r0, 0, 31, 31 /* IND_DESTINATION (1<<0) */
-       beq     2f
-
-       rlwinm  r8, r0, 0, 0, 19 /* clear kexec flags, page align */
-       b       0b
-
-2:     /* is it an indirection page? (r3) */
-       rlwinm. r7, r0, 0, 30, 30 /* IND_INDIRECTION (1<<1) */
-       beq     2f
-
-       rlwinm  r3, r0, 0, 0, 19 /* clear kexec flags, page align */
-       subi    r3, r3, 4
-       b       0b
-
-2:     /* are we done? */
-       rlwinm. r7, r0, 0, 29, 29 /* IND_DONE (1<<2) */
-       beq     2f
-       b       3f
-
-2:     /* is it a source page? (r9) */
-       rlwinm. r7, r0, 0, 28, 28 /* IND_SOURCE (1<<3) */
-       beq     0b
-
-       rlwinm  r9, r0, 0, 0, 19 /* clear kexec flags, page align */
-
-       li      r7, PAGE_SIZE / 4
-       mtctr   r7
-       subi    r9, r9, 4
-       subi    r8, r8, 4
-9:
-       lwzu    r0, 4(r9)  /* do the copy */
-       xor     r6, r6, r0
-       stwu    r0, 4(r8)
-       dcbst   0, r8
-       sync
-       icbi    0, r8
-       bdnz    9b
-
-       addi    r9, r9, 4
-       addi    r8, r8, 4
-       b       0b
-
-3:
-
-       /* To be certain of avoiding problems with self-modifying code
-        * execute a serializing instruction here.
-        */
-       isync
-       sync
-
-       mfspr   r3, SPRN_PIR /* current core we are running on */
-       mr      r4, r5 /* load physical address of chunk called */
-
-       /* jump to the entry point, usually the setup routine */
-       mtlr    r5
-       blrl
-
-1:     b       1b
-
-relocate_new_kernel_end:
-
-       .globl relocate_new_kernel_size
-relocate_new_kernel_size:
-       .long relocate_new_kernel_end - relocate_new_kernel
-#endif
index b55a7b4..1864605 100644 (file)
@@ -49,108 +49,6 @@ _GLOBAL(call_do_irq)
        mtlr    r0
        blr
 
-       .section        ".toc","aw"
-PPC64_CACHES:
-       .tc             ppc64_caches[TC],ppc64_caches
-       .section        ".text"
-
-/*
- * Write any modified data cache blocks out to memory
- * and invalidate the corresponding instruction cache blocks.
- *
- * flush_icache_range(unsigned long start, unsigned long stop)
- *
- *   flush all bytes from start through stop-1 inclusive
- */
-
-_GLOBAL_TOC(flush_icache_range)
-BEGIN_FTR_SECTION
-       PURGE_PREFETCHED_INS
-       blr
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
-/*
- * Flush the data cache to memory 
- * 
- * Different systems have different cache line sizes
- * and in some cases i-cache and d-cache line sizes differ from
- * each other.
- */
-       ld      r10,PPC64_CACHES@toc(r2)
-       lwz     r7,DCACHEL1BLOCKSIZE(r10)/* Get cache block size */
-       addi    r5,r7,-1
-       andc    r6,r3,r5                /* round low to line bdy */
-       subf    r8,r6,r4                /* compute length */
-       add     r8,r8,r5                /* ensure we get enough */
-       lwz     r9,DCACHEL1LOGBLOCKSIZE(r10)    /* Get log-2 of cache block size */
-       srw.    r8,r8,r9                /* compute line count */
-       beqlr                           /* nothing to do? */
-       mtctr   r8
-1:     dcbst   0,r6
-       add     r6,r6,r7
-       bdnz    1b
-       sync
-
-/* Now invalidate the instruction cache */
-       
-       lwz     r7,ICACHEL1BLOCKSIZE(r10)       /* Get Icache block size */
-       addi    r5,r7,-1
-       andc    r6,r3,r5                /* round low to line bdy */
-       subf    r8,r6,r4                /* compute length */
-       add     r8,r8,r5
-       lwz     r9,ICACHEL1LOGBLOCKSIZE(r10)    /* Get log-2 of Icache block size */
-       srw.    r8,r8,r9                /* compute line count */
-       beqlr                           /* nothing to do? */
-       mtctr   r8
-2:     icbi    0,r6
-       add     r6,r6,r7
-       bdnz    2b
-       isync
-       blr
-_ASM_NOKPROBE_SYMBOL(flush_icache_range)
-EXPORT_SYMBOL(flush_icache_range)
-
-/*
- * Flush a particular page from the data cache to RAM.
- * Note: this is necessary because the instruction cache does *not*
- * snoop from the data cache.
- *
- *     void __flush_dcache_icache(void *page)
- */
-_GLOBAL(__flush_dcache_icache)
-/*
- * Flush the data cache to memory 
- * 
- * Different systems have different cache line sizes
- */
-
-BEGIN_FTR_SECTION
-       PURGE_PREFETCHED_INS
-       blr
-END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE)
-
-/* Flush the dcache */
-       ld      r7,PPC64_CACHES@toc(r2)
-       clrrdi  r3,r3,PAGE_SHIFT                    /* Page align */
-       lwz     r4,DCACHEL1BLOCKSPERPAGE(r7)    /* Get # dcache blocks per page */
-       lwz     r5,DCACHEL1BLOCKSIZE(r7)        /* Get dcache block size */
-       mr      r6,r3
-       mtctr   r4
-0:     dcbst   0,r6
-       add     r6,r6,r5
-       bdnz    0b
-       sync
-
-/* Now invalidate the icache */        
-
-       lwz     r4,ICACHEL1BLOCKSPERPAGE(r7)    /* Get # icache blocks per page */
-       lwz     r5,ICACHEL1BLOCKSIZE(r7)        /* Get icache block size */
-       mtctr   r4
-1:     icbi    0,r3
-       add     r3,r3,r5
-       bdnz    1b
-       isync
-       blr
-
 _GLOBAL(__bswapdi2)
 EXPORT_SYMBOL(__bswapdi2)
        srdi    r8,r3,32
@@ -432,18 +330,13 @@ kexec_create_tlb:
        rlwimi  r9,r10,16,4,15          /* Setup MAS0 = TLBSEL | ESEL(r9) */
 
 /* Set up a temp identity mapping v:0 to p:0 and return to it. */
-#if defined(CONFIG_SMP) || defined(CONFIG_PPC_E500MC)
-#define M_IF_NEEDED    MAS2_M
-#else
-#define M_IF_NEEDED    0
-#endif
        mtspr   SPRN_MAS0,r9
 
        lis     r9,(MAS1_VALID|MAS1_IPROT)@h
        ori     r9,r9,(MAS1_TSIZE(BOOK3E_PAGESZ_1GB))@l
        mtspr   SPRN_MAS1,r9
 
-       LOAD_REG_IMMEDIATE(r9, 0x0 | M_IF_NEEDED)
+       LOAD_REG_IMMEDIATE(r9, 0x0 | MAS2_M_IF_NEEDED)
        mtspr   SPRN_MAS2,r9
 
        LOAD_REG_IMMEDIATE(r9, 0x0 | MAS3_SR | MAS3_SW | MAS3_SX)
index 639ceae..4df94b6 100644 (file)
@@ -715,6 +715,8 @@ static void set_debug_reg_defaults(struct thread_struct *thread)
 {
        thread->hw_brk.address = 0;
        thread->hw_brk.type = 0;
+       thread->hw_brk.len = 0;
+       thread->hw_brk.hw_len = 0;
        if (ppc_breakpoint_available())
                set_breakpoint(&thread->hw_brk);
 }
@@ -816,6 +818,7 @@ static inline bool hw_brk_match(struct arch_hw_breakpoint *a,
                return false;
        if (a->len != b->len)
                return false;
+       /* no need to check hw_len. it's calculated from address and len */
        return true;
 }
 
index 100f1b5..5773453 100644 (file)
@@ -303,16 +303,24 @@ static char __init *prom_strstr(const char *s1, const char *s2)
        return NULL;
 }
 
-static size_t __init prom_strlcpy(char *dest, const char *src, size_t size)
-{
-       size_t ret = prom_strlen(src);
+static size_t __init prom_strlcat(char *dest, const char *src, size_t count)
+{
+       size_t dsize = prom_strlen(dest);
+       size_t len = prom_strlen(src);
+       size_t res = dsize + len;
+
+       /* This would be a bug */
+       if (dsize >= count)
+               return count;
+
+       dest += dsize;
+       count -= dsize;
+       if (len >= count)
+               len = count-1;
+       memcpy(dest, src, len);
+       dest[len] = 0;
+       return res;
 
-       if (size) {
-               size_t len = (ret >= size) ? size - 1 : ret;
-               memcpy(dest, src, len);
-               dest[len] = '\0';
-       }
-       return ret;
 }
 
 #ifdef CONFIG_PPC_PSERIES
@@ -764,10 +772,14 @@ static void __init early_cmdline_parse(void)
 
        prom_cmd_line[0] = 0;
        p = prom_cmd_line;
-       if ((long)prom.chosen > 0)
+
+       if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && (long)prom.chosen > 0)
                l = prom_getprop(prom.chosen, "bootargs", p, COMMAND_LINE_SIZE-1);
-       if (IS_ENABLED(CONFIG_CMDLINE_BOOL) && (l <= 0 || p[0] == '\0')) /* dbl check */
-               prom_strlcpy(prom_cmd_line, CONFIG_CMDLINE, sizeof(prom_cmd_line));
+
+       if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) || l <= 0 || p[0] == '\0')
+               prom_strlcat(prom_cmd_line, " " CONFIG_CMDLINE,
+                            sizeof(prom_cmd_line));
+
        prom_printf("command line: %s\n", prom_cmd_line);
 
 #ifdef CONFIG_PPC64
@@ -1053,7 +1065,7 @@ static const struct ibm_arch_vec ibm_architecture_vec_template __initconst = {
                .reserved2 = 0,
                .reserved3 = 0,
                .subprocessors = 1,
-               .byte22 = OV5_FEAT(OV5_DRMEM_V2),
+               .byte22 = OV5_FEAT(OV5_DRMEM_V2) | OV5_FEAT(OV5_DRC_INFO),
                .intarch = 0,
                .mmu = 0,
                .hash_ext = 0,
index 8c92feb..25c0424 100644 (file)
@@ -2425,7 +2425,8 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
                return -EIO;
        hw_brk.address = data & (~HW_BRK_TYPE_DABR);
        hw_brk.type = (data & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
-       hw_brk.len = 8;
+       hw_brk.len = DABR_MAX_LEN;
+       hw_brk.hw_len = DABR_MAX_LEN;
        set_bp = (data) && (hw_brk.type & HW_BRK_TYPE_RDWR);
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
        bp = thread->ptrace_bps[0];
@@ -2439,6 +2440,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
        if (bp) {
                attr = bp->attr;
                attr.bp_addr = hw_brk.address;
+               attr.bp_len = DABR_MAX_LEN;
                arch_bp_generic_fields(hw_brk.type, &attr.bp_type);
 
                /* Enable breakpoint */
@@ -2456,7 +2458,7 @@ static int ptrace_set_debugreg(struct task_struct *task, unsigned long addr,
        /* Create a new breakpoint request if one doesn't exist already */
        hw_breakpoint_init(&attr);
        attr.bp_addr = hw_brk.address;
-       attr.bp_len = 8;
+       attr.bp_len = DABR_MAX_LEN;
        arch_bp_generic_fields(hw_brk.type,
                               &attr.bp_type);
 
@@ -2880,18 +2882,14 @@ static long ppc_set_hwdebug(struct task_struct *child,
        if ((unsigned long)bp_info->addr >= TASK_SIZE)
                return -EIO;
 
-       brk.address = bp_info->addr & ~7UL;
+       brk.address = bp_info->addr & ~HW_BREAKPOINT_ALIGN;
        brk.type = HW_BRK_TYPE_TRANSLATE;
-       brk.len = 8;
+       brk.len = DABR_MAX_LEN;
        if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_READ)
                brk.type |= HW_BRK_TYPE_READ;
        if (bp_info->trigger_type & PPC_BREAKPOINT_TRIGGER_WRITE)
                brk.type |= HW_BRK_TYPE_WRITE;
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
-       /*
-        * Check if the request is for 'range' breakpoints. We can
-        * support it if range < 8 bytes.
-        */
        if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE)
                len = bp_info->addr2 - bp_info->addr;
        else if (bp_info->addr_mode == PPC_BREAKPOINT_MODE_EXACT)
@@ -2904,7 +2902,7 @@ static long ppc_set_hwdebug(struct task_struct *child,
 
        /* Create a new breakpoint request if one doesn't exist already */
        hw_breakpoint_init(&attr);
-       attr.bp_addr = (unsigned long)bp_info->addr & ~HW_BREAKPOINT_ALIGN;
+       attr.bp_addr = (unsigned long)bp_info->addr;
        attr.bp_len = len;
        arch_bp_generic_fields(brk.type, &attr.bp_type);
 
@@ -3361,6 +3359,12 @@ void do_syscall_trace_leave(struct pt_regs *regs)
        user_enter();
 }
 
+void __init pt_regs_check(void);
+
+/*
+ * Dummy function, its purpose is to break the build if struct pt_regs and
+ * struct user_pt_regs don't match.
+ */
 void __init pt_regs_check(void)
 {
        BUILD_BUG_ON(offsetof(struct pt_regs, gpr) !=
@@ -3398,4 +3402,67 @@ void __init pt_regs_check(void)
                     offsetof(struct user_pt_regs, result));
 
        BUILD_BUG_ON(sizeof(struct user_pt_regs) > sizeof(struct pt_regs));
+
+       // Now check that the pt_regs offsets match the uapi #defines
+       #define CHECK_REG(_pt, _reg) \
+               BUILD_BUG_ON(_pt != (offsetof(struct user_pt_regs, _reg) / \
+                                    sizeof(unsigned long)));
+
+       CHECK_REG(PT_R0,  gpr[0]);
+       CHECK_REG(PT_R1,  gpr[1]);
+       CHECK_REG(PT_R2,  gpr[2]);
+       CHECK_REG(PT_R3,  gpr[3]);
+       CHECK_REG(PT_R4,  gpr[4]);
+       CHECK_REG(PT_R5,  gpr[5]);
+       CHECK_REG(PT_R6,  gpr[6]);
+       CHECK_REG(PT_R7,  gpr[7]);
+       CHECK_REG(PT_R8,  gpr[8]);
+       CHECK_REG(PT_R9,  gpr[9]);
+       CHECK_REG(PT_R10, gpr[10]);
+       CHECK_REG(PT_R11, gpr[11]);
+       CHECK_REG(PT_R12, gpr[12]);
+       CHECK_REG(PT_R13, gpr[13]);
+       CHECK_REG(PT_R14, gpr[14]);
+       CHECK_REG(PT_R15, gpr[15]);
+       CHECK_REG(PT_R16, gpr[16]);
+       CHECK_REG(PT_R17, gpr[17]);
+       CHECK_REG(PT_R18, gpr[18]);
+       CHECK_REG(PT_R19, gpr[19]);
+       CHECK_REG(PT_R20, gpr[20]);
+       CHECK_REG(PT_R21, gpr[21]);
+       CHECK_REG(PT_R22, gpr[22]);
+       CHECK_REG(PT_R23, gpr[23]);
+       CHECK_REG(PT_R24, gpr[24]);
+       CHECK_REG(PT_R25, gpr[25]);
+       CHECK_REG(PT_R26, gpr[26]);
+       CHECK_REG(PT_R27, gpr[27]);
+       CHECK_REG(PT_R28, gpr[28]);
+       CHECK_REG(PT_R29, gpr[29]);
+       CHECK_REG(PT_R30, gpr[30]);
+       CHECK_REG(PT_R31, gpr[31]);
+       CHECK_REG(PT_NIP, nip);
+       CHECK_REG(PT_MSR, msr);
+       CHECK_REG(PT_ORIG_R3, orig_gpr3);
+       CHECK_REG(PT_CTR, ctr);
+       CHECK_REG(PT_LNK, link);
+       CHECK_REG(PT_XER, xer);
+       CHECK_REG(PT_CCR, ccr);
+#ifdef CONFIG_PPC64
+       CHECK_REG(PT_SOFTE, softe);
+#else
+       CHECK_REG(PT_MQ, mq);
+#endif
+       CHECK_REG(PT_TRAP, trap);
+       CHECK_REG(PT_DAR, dar);
+       CHECK_REG(PT_DSISR, dsisr);
+       CHECK_REG(PT_RESULT, result);
+       #undef CHECK_REG
+
+       BUILD_BUG_ON(PT_REGS_COUNT != sizeof(struct user_pt_regs) / sizeof(unsigned long));
+
+       /*
+        * PT_DSCR isn't a real reg, but it's important that it doesn't overlap the
+        * real registers.
+        */
+       BUILD_BUG_ON(PT_DSCR < sizeof(struct user_pt_regs) / sizeof(unsigned long));
 }
diff --git a/arch/powerpc/kernel/secure_boot.c b/arch/powerpc/kernel/secure_boot.c
new file mode 100644 (file)
index 0000000..4b98232
--- /dev/null
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ */
+#include <linux/types.h>
+#include <linux/of.h>
+#include <asm/secure_boot.h>
+
+static struct device_node *get_ppc_fw_sb_node(void)
+{
+       static const struct of_device_id ids[] = {
+               { .compatible = "ibm,secureboot", },
+               { .compatible = "ibm,secureboot-v1", },
+               { .compatible = "ibm,secureboot-v2", },
+               {},
+       };
+
+       return of_find_matching_node(NULL, ids);
+}
+
+bool is_ppc_secureboot_enabled(void)
+{
+       struct device_node *node;
+       bool enabled = false;
+
+       node = get_ppc_fw_sb_node();
+       enabled = of_property_read_bool(node, "os-secureboot-enforcing");
+
+       of_node_put(node);
+
+       pr_info("Secure boot mode %s\n", enabled ? "enabled" : "disabled");
+
+       return enabled;
+}
+
+bool is_ppc_trustedboot_enabled(void)
+{
+       struct device_node *node;
+       bool enabled = false;
+
+       node = get_ppc_fw_sb_node();
+       enabled = of_property_read_bool(node, "trusted-enabled");
+
+       of_node_put(node);
+
+       pr_info("Trusted boot mode %s\n", enabled ? "enabled" : "disabled");
+
+       return enabled;
+}
index bd91dce..bd70f5b 100644 (file)
@@ -16,7 +16,7 @@
 #include <asm/setup.h>
 
 
-unsigned long powerpc_security_features __read_mostly = SEC_FTR_DEFAULT;
+u64 powerpc_security_features __read_mostly = SEC_FTR_DEFAULT;
 
 enum count_cache_flush_type {
        COUNT_CACHE_FLUSH_NONE  = 0x1,
@@ -95,13 +95,14 @@ static int barrier_nospec_get(void *data, u64 *val)
        return 0;
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(fops_barrier_nospec,
-                       barrier_nospec_get, barrier_nospec_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_barrier_nospec, barrier_nospec_get,
+                        barrier_nospec_set, "%llu\n");
 
 static __init int barrier_nospec_debugfs_init(void)
 {
-       debugfs_create_file("barrier_nospec", 0600, powerpc_debugfs_root, NULL,
-                           &fops_barrier_nospec);
+       debugfs_create_file_unsafe("barrier_nospec", 0600,
+                                  powerpc_debugfs_root, NULL,
+                                  &fops_barrier_nospec);
        return 0;
 }
 device_initcall(barrier_nospec_debugfs_init);
@@ -109,7 +110,7 @@ device_initcall(barrier_nospec_debugfs_init);
 static __init int security_feature_debugfs_init(void)
 {
        debugfs_create_x64("security_features", 0400, powerpc_debugfs_root,
-                          (u64 *)&powerpc_security_features);
+                          &powerpc_security_features);
        return 0;
 }
 device_initcall(security_feature_debugfs_init);
@@ -142,32 +143,33 @@ ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, cha
 
        thread_priv = security_ftr_enabled(SEC_FTR_L1D_THREAD_PRIV);
 
-       if (rfi_flush || thread_priv) {
+       if (rfi_flush) {
                struct seq_buf s;
                seq_buf_init(&s, buf, PAGE_SIZE - 1);
 
-               seq_buf_printf(&s, "Mitigation: ");
-
-               if (rfi_flush)
-                       seq_buf_printf(&s, "RFI Flush");
-
-               if (rfi_flush && thread_priv)
-                       seq_buf_printf(&s, ", ");
-
+               seq_buf_printf(&s, "Mitigation: RFI Flush");
                if (thread_priv)
-                       seq_buf_printf(&s, "L1D private per thread");
+                       seq_buf_printf(&s, "L1D private per thread");
 
                seq_buf_printf(&s, "\n");
 
                return s.len;
        }
 
+       if (thread_priv)
+               return sprintf(buf, "Vulnerable: L1D private per thread\n");
+
        if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) &&
            !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR))
                return sprintf(buf, "Not affected\n");
 
        return sprintf(buf, "Vulnerable\n");
 }
+
+ssize_t cpu_show_l1tf(struct device *dev, struct device_attribute *attr, char *buf)
+{
+       return cpu_show_meltdown(dev, attr, buf);
+}
 #endif
 
 ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf)
@@ -376,11 +378,13 @@ static int stf_barrier_get(void *data, u64 *val)
        return 0;
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set,
+                        "%llu\n");
 
 static __init int stf_barrier_debugfs_init(void)
 {
-       debugfs_create_file("stf_barrier", 0600, powerpc_debugfs_root, NULL, &fops_stf_barrier);
+       debugfs_create_file_unsafe("stf_barrier", 0600, powerpc_debugfs_root,
+                                  NULL, &fops_stf_barrier);
        return 0;
 }
 device_initcall(stf_barrier_debugfs_init);
@@ -491,13 +495,14 @@ static int count_cache_flush_get(void *data, u64 *val)
        return 0;
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(fops_count_cache_flush, count_cache_flush_get,
-                       count_cache_flush_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(fops_count_cache_flush, count_cache_flush_get,
+                        count_cache_flush_set, "%llu\n");
 
 static __init int count_cache_flush_debugfs_init(void)
 {
-       debugfs_create_file("count_cache_flush", 0600, powerpc_debugfs_root,
-                           NULL, &fops_count_cache_flush);
+       debugfs_create_file_unsafe("count_cache_flush", 0600,
+                                  powerpc_debugfs_root, NULL,
+                                  &fops_count_cache_flush);
        return 0;
 }
 device_initcall(count_cache_flush_debugfs_init);
diff --git a/arch/powerpc/kernel/secvar-ops.c b/arch/powerpc/kernel/secvar-ops.c
new file mode 100644 (file)
index 0000000..6a29777
--- /dev/null
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ *
+ * This file initializes secvar operations for PowerPC Secureboot
+ */
+
+#include <linux/cache.h>
+#include <asm/secvar.h>
+
+const struct secvar_operations *secvar_ops __ro_after_init;
+
+void set_secvar_ops(const struct secvar_operations *ops)
+{
+       secvar_ops = ops;
+}
diff --git a/arch/powerpc/kernel/secvar-sysfs.c b/arch/powerpc/kernel/secvar-sysfs.c
new file mode 100644 (file)
index 0000000..a0a78ab
--- /dev/null
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019 IBM Corporation <nayna@linux.ibm.com>
+ *
+ * This code exposes secure variables to user via sysfs
+ */
+
+#define pr_fmt(fmt) "secvar-sysfs: "fmt
+
+#include <linux/slab.h>
+#include <linux/compat.h>
+#include <linux/string.h>
+#include <linux/of.h>
+#include <asm/secvar.h>
+
+#define NAME_MAX_SIZE     1024
+
+static struct kobject *secvar_kobj;
+static struct kset *secvar_kset;
+
+static ssize_t format_show(struct kobject *kobj, struct kobj_attribute *attr,
+                          char *buf)
+{
+       ssize_t rc = 0;
+       struct device_node *node;
+       const char *format;
+
+       node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend");
+       if (!of_device_is_available(node))
+               return -ENODEV;
+
+       rc = of_property_read_string(node, "format", &format);
+       if (rc)
+               return rc;
+
+       rc = sprintf(buf, "%s\n", format);
+
+       of_node_put(node);
+
+       return rc;
+}
+
+
+static ssize_t size_show(struct kobject *kobj, struct kobj_attribute *attr,
+                        char *buf)
+{
+       uint64_t dsize;
+       int rc;
+
+       rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize);
+       if (rc) {
+               pr_err("Error retrieving %s variable size %d\n", kobj->name,
+                      rc);
+               return rc;
+       }
+
+       return sprintf(buf, "%llu\n", dsize);
+}
+
+static ssize_t data_read(struct file *filep, struct kobject *kobj,
+                        struct bin_attribute *attr, char *buf, loff_t off,
+                        size_t count)
+{
+       uint64_t dsize;
+       char *data;
+       int rc;
+
+       rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, NULL, &dsize);
+       if (rc) {
+               pr_err("Error getting %s variable size %d\n", kobj->name, rc);
+               return rc;
+       }
+       pr_debug("dsize is %llu\n", dsize);
+
+       data = kzalloc(dsize, GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+
+       rc = secvar_ops->get(kobj->name, strlen(kobj->name) + 1, data, &dsize);
+       if (rc) {
+               pr_err("Error getting %s variable %d\n", kobj->name, rc);
+               goto data_fail;
+       }
+
+       rc = memory_read_from_buffer(buf, count, &off, data, dsize);
+
+data_fail:
+       kfree(data);
+       return rc;
+}
+
+static ssize_t update_write(struct file *filep, struct kobject *kobj,
+                           struct bin_attribute *attr, char *buf, loff_t off,
+                           size_t count)
+{
+       int rc;
+
+       pr_debug("count is %ld\n", count);
+       rc = secvar_ops->set(kobj->name, strlen(kobj->name) + 1, buf, count);
+       if (rc) {
+               pr_err("Error setting the %s variable %d\n", kobj->name, rc);
+               return rc;
+       }
+
+       return count;
+}
+
+static struct kobj_attribute format_attr = __ATTR_RO(format);
+
+static struct kobj_attribute size_attr = __ATTR_RO(size);
+
+static struct bin_attribute data_attr = __BIN_ATTR_RO(data, 0);
+
+static struct bin_attribute update_attr = __BIN_ATTR_WO(update, 0);
+
+static struct bin_attribute *secvar_bin_attrs[] = {
+       &data_attr,
+       &update_attr,
+       NULL,
+};
+
+static struct attribute *secvar_attrs[] = {
+       &size_attr.attr,
+       NULL,
+};
+
+static const struct attribute_group secvar_attr_group = {
+       .attrs = secvar_attrs,
+       .bin_attrs = secvar_bin_attrs,
+};
+__ATTRIBUTE_GROUPS(secvar_attr);
+
+static struct kobj_type secvar_ktype = {
+       .sysfs_ops      = &kobj_sysfs_ops,
+       .default_groups = secvar_attr_groups,
+};
+
+static int update_kobj_size(void)
+{
+
+       struct device_node *node;
+       u64 varsize;
+       int rc = 0;
+
+       node = of_find_compatible_node(NULL, NULL, "ibm,secvar-backend");
+       if (!of_device_is_available(node)) {
+               rc = -ENODEV;
+               goto out;
+       }
+
+       rc = of_property_read_u64(node, "max-var-size", &varsize);
+       if (rc)
+               goto out;
+
+       data_attr.size = varsize;
+       update_attr.size = varsize;
+
+out:
+       of_node_put(node);
+
+       return rc;
+}
+
+static int secvar_sysfs_load(void)
+{
+       char *name;
+       uint64_t namesize = 0;
+       struct kobject *kobj;
+       int rc;
+
+       name = kzalloc(NAME_MAX_SIZE, GFP_KERNEL);
+       if (!name)
+               return -ENOMEM;
+
+       do {
+               rc = secvar_ops->get_next(name, &namesize, NAME_MAX_SIZE);
+               if (rc) {
+                       if (rc != -ENOENT)
+                               pr_err("error getting secvar from firmware %d\n",
+                                      rc);
+                       break;
+               }
+
+               kobj = kzalloc(sizeof(*kobj), GFP_KERNEL);
+               if (!kobj) {
+                       rc = -ENOMEM;
+                       break;
+               }
+
+               kobject_init(kobj, &secvar_ktype);
+
+               rc = kobject_add(kobj, &secvar_kset->kobj, "%s", name);
+               if (rc) {
+                       pr_warn("kobject_add error %d for attribute: %s\n", rc,
+                               name);
+                       kobject_put(kobj);
+                       kobj = NULL;
+               }
+
+               if (kobj)
+                       kobject_uevent(kobj, KOBJ_ADD);
+
+       } while (!rc);
+
+       kfree(name);
+       return rc;
+}
+
+static int secvar_sysfs_init(void)
+{
+       int rc;
+
+       if (!secvar_ops) {
+               pr_warn("secvar: failed to retrieve secvar operations.\n");
+               return -ENODEV;
+       }
+
+       secvar_kobj = kobject_create_and_add("secvar", firmware_kobj);
+       if (!secvar_kobj) {
+               pr_err("secvar: Failed to create firmware kobj\n");
+               return -ENOMEM;
+       }
+
+       rc = sysfs_create_file(secvar_kobj, &format_attr.attr);
+       if (rc) {
+               kobject_put(secvar_kobj);
+               return -ENOMEM;
+       }
+
+       secvar_kset = kset_create_and_add("vars", NULL, secvar_kobj);
+       if (!secvar_kset) {
+               pr_err("secvar: sysfs kobject registration failed.\n");
+               kobject_put(secvar_kobj);
+               return -ENOMEM;
+       }
+
+       rc = update_kobj_size();
+       if (rc) {
+               pr_err("Cannot read the size of the attribute\n");
+               return rc;
+       }
+
+       secvar_sysfs_load();
+
+       return 0;
+}
+
+late_initcall(secvar_sysfs_init);
index 25aaa39..488f1ee 100644 (file)
@@ -715,8 +715,28 @@ static struct notifier_block ppc_panic_block = {
        .priority = INT_MIN /* may not return; must be done last */
 };
 
+/*
+ * Dump out kernel offset information on panic.
+ */
+static int dump_kernel_offset(struct notifier_block *self, unsigned long v,
+                             void *p)
+{
+       pr_emerg("Kernel Offset: 0x%lx from 0x%lx\n",
+                kaslr_offset(), KERNELBASE);
+
+       return 0;
+}
+
+static struct notifier_block kernel_offset_notifier = {
+       .notifier_call = dump_kernel_offset
+};
+
 void __init setup_panic(void)
 {
+       if (IS_ENABLED(CONFIG_RANDOMIZE_BASE) && kaslr_offset() > 0)
+               atomic_notifier_chain_register(&panic_notifier_list,
+                                              &kernel_offset_notifier);
+
        /* PPC64 always does a hard irq disable in its panic handler */
        if (!IS_ENABLED(CONFIG_PPC64) && !ppc_md.panic)
                return;
index a7541ed..dcffe92 100644 (file)
@@ -44,6 +44,7 @@
 #include <asm/asm-prototypes.h>
 #include <asm/kdump.h>
 #include <asm/feature-fixups.h>
+#include <asm/early_ioremap.h>
 
 #include "setup.h"
 
@@ -80,6 +81,8 @@ notrace void __init machine_init(u64 dt_ptr)
        /* Configure static keys first, now that we're relocated. */
        setup_feature_keys();
 
+       early_ioremap_setup();
+
        /* Enable early debugging if any specified (see udbg.h) */
        udbg_early_init();
 
index 44b4c43..6104917 100644 (file)
 #include <asm/hw_irq.h>
 #include <asm/feature-fixups.h>
 #include <asm/kup.h>
+#include <asm/early_ioremap.h>
 
 #include "setup.h"
 
-#ifdef DEBUG
-#define DBG(fmt...) udbg_printf(fmt)
-#else
-#define DBG(fmt...)
-#endif
-
 int spinning_secondaries;
 u64 ppc64_pft_size;
 
@@ -305,7 +300,7 @@ void __init early_setup(unsigned long dt_ptr)
        /* Enable early debugging if any specified (see udbg.h) */
        udbg_early_init();
 
-       DBG(" -> early_setup(), dt_ptr: 0x%lx\n", dt_ptr);
+       udbg_printf(" -> %s(), dt_ptr: 0x%lx\n", __func__, dt_ptr);
 
        /*
         * Do early initialization using the flattened device
@@ -338,6 +333,8 @@ void __init early_setup(unsigned long dt_ptr)
        apply_feature_fixups();
        setup_feature_keys();
 
+       early_ioremap_setup();
+
        /* Initialize the hash table or TLB handling */
        early_init_mmu();
 
@@ -362,11 +359,11 @@ void __init early_setup(unsigned long dt_ptr)
         */
        this_cpu_enable_ftrace();
 
-       DBG(" <- early_setup()\n");
+       udbg_printf(" <- %s()\n", __func__);
 
 #ifdef CONFIG_PPC_EARLY_DEBUG_BOOTX
        /*
-        * This needs to be done *last* (after the above DBG() even)
+        * This needs to be done *last* (after the above udbg_printf() even)
         *
         * Right after we return from this function, we turn on the MMU
         * which means the real-mode access trick that btext does will
@@ -436,8 +433,6 @@ void smp_release_cpus(void)
        if (!use_spinloop())
                return;
 
-       DBG(" -> smp_release_cpus()\n");
-
        /* All secondary cpus are spinning on a common spinloop, release them
         * all now so they can start to spin on their individual paca
         * spinloops. For non SMP kernels, the secondary cpus never get out
@@ -456,9 +451,7 @@ void smp_release_cpus(void)
                        break;
                udelay(1);
        }
-       DBG("spinning_secondaries = %d\n", spinning_secondaries);
-
-       DBG(" <- smp_release_cpus()\n");
+       pr_debug("spinning_secondaries = %d\n", spinning_secondaries);
 }
 #endif /* CONFIG_SMP || CONFIG_KEXEC_CORE */
 
@@ -551,8 +544,6 @@ void __init initialize_cache_info(void)
        struct device_node *cpu = NULL, *l2, *l3 = NULL;
        u32 pvr;
 
-       DBG(" -> initialize_cache_info()\n");
-
        /*
         * All shipping POWER8 machines have a firmware bug that
         * puts incorrect information in the device-tree. This will
@@ -576,10 +567,10 @@ void __init initialize_cache_info(void)
         */
        if (cpu) {
                if (!parse_cache_info(cpu, false, &ppc64_caches.l1d))
-                       DBG("Argh, can't find dcache properties !\n");
+                       pr_warn("Argh, can't find dcache properties !\n");
 
                if (!parse_cache_info(cpu, true, &ppc64_caches.l1i))
-                       DBG("Argh, can't find icache properties !\n");
+                       pr_warn("Argh, can't find icache properties !\n");
 
                /*
                 * Try to find the L2 and L3 if any. Assume they are
@@ -604,8 +595,6 @@ void __init initialize_cache_info(void)
 
        cur_cpu_spec->dcache_bsize = dcache_bsize;
        cur_cpu_spec->icache_bsize = icache_bsize;
-
-       DBG(" <- initialize_cache_info()\n");
 }
 
 /*
index 84827da..c7f1ea3 100644 (file)
@@ -232,7 +232,7 @@ static u64 scan_dispatch_log(u64 stop_tb)
  * Accumulate stolen time by scanning the dispatch trace log.
  * Called on entry from user mode.
  */
-void accumulate_stolen_time(void)
+void notrace accumulate_stolen_time(void)
 {
        u64 sst, ust;
        unsigned long save_irq_soft_mask = irq_soft_mask_return();
index 82f4353..014ff07 100644 (file)
@@ -250,15 +250,22 @@ static void oops_end(unsigned long flags, struct pt_regs *regs,
 }
 NOKPROBE_SYMBOL(oops_end);
 
+static char *get_mmu_str(void)
+{
+       if (early_radix_enabled())
+               return " MMU=Radix";
+       if (early_mmu_has_feature(MMU_FTR_HPTE_TABLE))
+               return " MMU=Hash";
+       return "";
+}
+
 static int __die(const char *str, struct pt_regs *regs, long err)
 {
        printk("Oops: %s, sig: %ld [#%d]\n", str, err, ++die_counter);
 
-       printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s%s %s\n",
+       printk("%s PAGE_SIZE=%luK%s%s%s%s%s%s %s\n",
               IS_ENABLED(CONFIG_CPU_LITTLE_ENDIAN) ? "LE" : "BE",
-              PAGE_SIZE / 1024,
-              early_radix_enabled() ? " MMU=Radix" : "",
-              early_mmu_has_feature(MMU_FTR_HPTE_TABLE) ? " MMU=Hash" : "",
+              PAGE_SIZE / 1024, get_mmu_str(),
               IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "",
               IS_ENABLED(CONFIG_SMP) ? " SMP" : "",
               IS_ENABLED(CONFIG_SMP) ? (" NR_CPUS=" __stringify(NR_CPUS)) : "",
index a384e7c..01595e8 100644 (file)
@@ -120,13 +120,15 @@ int udbg_write(const char *s, int n)
 #define UDBG_BUFSIZE 256
 void udbg_printf(const char *fmt, ...)
 {
-       char buf[UDBG_BUFSIZE];
-       va_list args;
+       if (udbg_putc) {
+               char buf[UDBG_BUFSIZE];
+               va_list args;
 
-       va_start(args, fmt);
-       vsnprintf(buf, UDBG_BUFSIZE, fmt, args);
-       udbg_puts(buf);
-       va_end(args);
+               va_start(args, fmt);
+               vsnprintf(buf, UDBG_BUFSIZE, fmt, args);
+               udbg_puts(buf);
+               va_end(args);
+       }
 }
 
 void __init udbg_progress(char *s, unsigned short hex)
index 3f92561..526f5ba 100644 (file)
@@ -35,7 +35,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
        subf    r8,r6,r4                /* compute length */
        add     r8,r8,r5                /* ensure we get enough */
        lwz     r9,CFG_DCACHE_LOGBLOCKSZ(r10)
-       srw.    r8,r8,r9                /* compute line count */
+       srd.    r8,r8,r9                /* compute line count */
        crclr   cr0*4+so
        beqlr                           /* nothing to do? */
        mtctr   r8
@@ -52,7 +52,7 @@ V_FUNCTION_BEGIN(__kernel_sync_dicache)
        subf    r8,r6,r4                /* compute length */
        add     r8,r8,r5
        lwz     r9,CFG_ICACHE_LOGBLOCKSZ(r10)
-       srw.    r8,r8,r9                /* compute line count */
+       srd.    r8,r8,r9                /* compute line count */
        crclr   cr0*4+so
        beqlr                           /* nothing to do? */
        mtctr   r8
diff --git a/arch/powerpc/kexec/Makefile b/arch/powerpc/kexec/Makefile
new file mode 100644 (file)
index 0000000..378f610
--- /dev/null
@@ -0,0 +1,25 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for the linux kernel.
+#
+
+# Avoid clang warnings around longjmp/setjmp declarations
+CFLAGS_crash.o += -ffreestanding
+
+obj-y                          += core.o crash.o core_$(BITS).o
+
+obj-$(CONFIG_PPC32)            += relocate_32.o
+
+obj-$(CONFIG_KEXEC_FILE)       += file_load.o elf_$(BITS).o
+
+ifdef CONFIG_HAVE_IMA_KEXEC
+ifdef CONFIG_IMA
+obj-y                          += ima.o
+endif
+endif
+
+
+# Disable GCOV, KCOV & sanitizers in odd or sensitive code
+GCOV_PROFILE_core_$(BITS).o := n
+KCOV_INSTRUMENT_core_$(BITS).o := n
+UBSAN_SANITIZE_core_$(BITS).o := n
diff --git a/arch/powerpc/kexec/core.c b/arch/powerpc/kexec/core.c
new file mode 100644 (file)
index 0000000..078fe3d
--- /dev/null
@@ -0,0 +1,280 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Code to handle transition of Linux booting another kernel.
+ *
+ * Copyright (C) 2002-2003 Eric Biederman  <ebiederm@xmission.com>
+ * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
+ * Copyright (C) 2005 IBM Corporation.
+ */
+
+#include <linux/kexec.h>
+#include <linux/reboot.h>
+#include <linux/threads.h>
+#include <linux/memblock.h>
+#include <linux/of.h>
+#include <linux/irq.h>
+#include <linux/ftrace.h>
+
+#include <asm/kdump.h>
+#include <asm/machdep.h>
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/sections.h>
+
+void machine_kexec_mask_interrupts(void) {
+       unsigned int i;
+       struct irq_desc *desc;
+
+       for_each_irq_desc(i, desc) {
+               struct irq_chip *chip;
+
+               chip = irq_desc_get_chip(desc);
+               if (!chip)
+                       continue;
+
+               if (chip->irq_eoi && irqd_irq_inprogress(&desc->irq_data))
+                       chip->irq_eoi(&desc->irq_data);
+
+               if (chip->irq_mask)
+                       chip->irq_mask(&desc->irq_data);
+
+               if (chip->irq_disable && !irqd_irq_disabled(&desc->irq_data))
+                       chip->irq_disable(&desc->irq_data);
+       }
+}
+
+void machine_crash_shutdown(struct pt_regs *regs)
+{
+       default_machine_crash_shutdown(regs);
+}
+
+/*
+ * Do what every setup is needed on image and the
+ * reboot code buffer to allow us to avoid allocations
+ * later.
+ */
+int machine_kexec_prepare(struct kimage *image)
+{
+       if (ppc_md.machine_kexec_prepare)
+               return ppc_md.machine_kexec_prepare(image);
+       else
+               return default_machine_kexec_prepare(image);
+}
+
+void machine_kexec_cleanup(struct kimage *image)
+{
+}
+
+void arch_crash_save_vmcoreinfo(void)
+{
+
+#ifdef CONFIG_NEED_MULTIPLE_NODES
+       VMCOREINFO_SYMBOL(node_data);
+       VMCOREINFO_LENGTH(node_data, MAX_NUMNODES);
+#endif
+#ifndef CONFIG_NEED_MULTIPLE_NODES
+       VMCOREINFO_SYMBOL(contig_page_data);
+#endif
+#if defined(CONFIG_PPC64) && defined(CONFIG_SPARSEMEM_VMEMMAP)
+       VMCOREINFO_SYMBOL(vmemmap_list);
+       VMCOREINFO_SYMBOL(mmu_vmemmap_psize);
+       VMCOREINFO_SYMBOL(mmu_psize_defs);
+       VMCOREINFO_STRUCT_SIZE(vmemmap_backing);
+       VMCOREINFO_OFFSET(vmemmap_backing, list);
+       VMCOREINFO_OFFSET(vmemmap_backing, phys);
+       VMCOREINFO_OFFSET(vmemmap_backing, virt_addr);
+       VMCOREINFO_STRUCT_SIZE(mmu_psize_def);
+       VMCOREINFO_OFFSET(mmu_psize_def, shift);
+#endif
+       vmcoreinfo_append_str("KERNELOFFSET=%lx\n", kaslr_offset());
+}
+
+/*
+ * Do not allocate memory (or fail in any way) in machine_kexec().
+ * We are past the point of no return, committed to rebooting now.
+ */
+void machine_kexec(struct kimage *image)
+{
+       int save_ftrace_enabled;
+
+       save_ftrace_enabled = __ftrace_enabled_save();
+       this_cpu_disable_ftrace();
+
+       if (ppc_md.machine_kexec)
+               ppc_md.machine_kexec(image);
+       else
+               default_machine_kexec(image);
+
+       this_cpu_enable_ftrace();
+       __ftrace_enabled_restore(save_ftrace_enabled);
+
+       /* Fall back to normal restart if we're still alive. */
+       machine_restart(NULL);
+       for(;;);
+}
+
+void __init reserve_crashkernel(void)
+{
+       unsigned long long crash_size, crash_base;
+       int ret;
+
+       /* use common parsing */
+       ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
+                       &crash_size, &crash_base);
+       if (ret == 0 && crash_size > 0) {
+               crashk_res.start = crash_base;
+               crashk_res.end = crash_base + crash_size - 1;
+       }
+
+       if (crashk_res.end == crashk_res.start) {
+               crashk_res.start = crashk_res.end = 0;
+               return;
+       }
+
+       /* We might have got these values via the command line or the
+        * device tree, either way sanitise them now. */
+
+       crash_size = resource_size(&crashk_res);
+
+#ifndef CONFIG_NONSTATIC_KERNEL
+       if (crashk_res.start != KDUMP_KERNELBASE)
+               printk("Crash kernel location must be 0x%x\n",
+                               KDUMP_KERNELBASE);
+
+       crashk_res.start = KDUMP_KERNELBASE;
+#else
+       if (!crashk_res.start) {
+#ifdef CONFIG_PPC64
+               /*
+                * On 64bit we split the RMO in half but cap it at half of
+                * a small SLB (128MB) since the crash kernel needs to place
+                * itself and some stacks to be in the first segment.
+                */
+               crashk_res.start = min(0x8000000ULL, (ppc64_rma_size / 2));
+#else
+               crashk_res.start = KDUMP_KERNELBASE;
+#endif
+       }
+
+       crash_base = PAGE_ALIGN(crashk_res.start);
+       if (crash_base != crashk_res.start) {
+               printk("Crash kernel base must be aligned to 0x%lx\n",
+                               PAGE_SIZE);
+               crashk_res.start = crash_base;
+       }
+
+#endif
+       crash_size = PAGE_ALIGN(crash_size);
+       crashk_res.end = crashk_res.start + crash_size - 1;
+
+       /* The crash region must not overlap the current kernel */
+       if (overlaps_crashkernel(__pa(_stext), _end - _stext)) {
+               printk(KERN_WARNING
+                       "Crash kernel can not overlap current kernel\n");
+               crashk_res.start = crashk_res.end = 0;
+               return;
+       }
+
+       /* Crash kernel trumps memory limit */
+       if (memory_limit && memory_limit <= crashk_res.end) {
+               memory_limit = crashk_res.end + 1;
+               printk("Adjusted memory limit for crashkernel, now 0x%llx\n",
+                      memory_limit);
+       }
+
+       printk(KERN_INFO "Reserving %ldMB of memory at %ldMB "
+                       "for crashkernel (System RAM: %ldMB)\n",
+                       (unsigned long)(crash_size >> 20),
+                       (unsigned long)(crashk_res.start >> 20),
+                       (unsigned long)(memblock_phys_mem_size() >> 20));
+
+       if (!memblock_is_region_memory(crashk_res.start, crash_size) ||
+           memblock_reserve(crashk_res.start, crash_size)) {
+               pr_err("Failed to reserve memory for crashkernel!\n");
+               crashk_res.start = crashk_res.end = 0;
+               return;
+       }
+}
+
+int overlaps_crashkernel(unsigned long start, unsigned long size)
+{
+       return (start + size) > crashk_res.start && start <= crashk_res.end;
+}
+
+/* Values we need to export to the second kernel via the device tree. */
+static phys_addr_t kernel_end;
+static phys_addr_t crashk_base;
+static phys_addr_t crashk_size;
+static unsigned long long mem_limit;
+
+static struct property kernel_end_prop = {
+       .name = "linux,kernel-end",
+       .length = sizeof(phys_addr_t),
+       .value = &kernel_end,
+};
+
+static struct property crashk_base_prop = {
+       .name = "linux,crashkernel-base",
+       .length = sizeof(phys_addr_t),
+       .value = &crashk_base
+};
+
+static struct property crashk_size_prop = {
+       .name = "linux,crashkernel-size",
+       .length = sizeof(phys_addr_t),
+       .value = &crashk_size,
+};
+
+static struct property memory_limit_prop = {
+       .name = "linux,memory-limit",
+       .length = sizeof(unsigned long long),
+       .value = &mem_limit,
+};
+
+#define cpu_to_be_ulong        __PASTE(cpu_to_be, BITS_PER_LONG)
+
+static void __init export_crashk_values(struct device_node *node)
+{
+       /* There might be existing crash kernel properties, but we can't
+        * be sure what's in them, so remove them. */
+       of_remove_property(node, of_find_property(node,
+                               "linux,crashkernel-base", NULL));
+       of_remove_property(node, of_find_property(node,
+                               "linux,crashkernel-size", NULL));
+
+       if (crashk_res.start != 0) {
+               crashk_base = cpu_to_be_ulong(crashk_res.start),
+               of_add_property(node, &crashk_base_prop);
+               crashk_size = cpu_to_be_ulong(resource_size(&crashk_res));
+               of_add_property(node, &crashk_size_prop);
+       }
+
+       /*
+        * memory_limit is required by the kexec-tools to limit the
+        * crash regions to the actual memory used.
+        */
+       mem_limit = cpu_to_be_ulong(memory_limit);
+       of_update_property(node, &memory_limit_prop);
+}
+
+static int __init kexec_setup(void)
+{
+       struct device_node *node;
+
+       node = of_find_node_by_path("/chosen");
+       if (!node)
+               return -ENOENT;
+
+       /* remove any stale properties so ours can be found */
+       of_remove_property(node, of_find_property(node, kernel_end_prop.name, NULL));
+
+       /* information needed by userspace when using default_machine_kexec */
+       kernel_end = cpu_to_be_ulong(__pa(_end));
+       of_add_property(node, &kernel_end_prop);
+
+       export_crashk_values(node);
+
+       of_node_put(node);
+       return 0;
+}
+late_initcall(kexec_setup);
diff --git a/arch/powerpc/kexec/core_32.c b/arch/powerpc/kexec/core_32.c
new file mode 100644 (file)
index 0000000..bf9f1f9
--- /dev/null
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PPC32 code to handle Linux booting another kernel.
+ *
+ * Copyright (C) 2002-2003 Eric Biederman  <ebiederm@xmission.com>
+ * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
+ * Copyright (C) 2005 IBM Corporation.
+ */
+
+#include <linux/kexec.h>
+#include <linux/mm.h>
+#include <linux/string.h>
+#include <asm/cacheflush.h>
+#include <asm/hw_irq.h>
+#include <asm/io.h>
+
+typedef void (*relocate_new_kernel_t)(
+                               unsigned long indirection_page,
+                               unsigned long reboot_code_buffer,
+                               unsigned long start_address) __noreturn;
+
+/*
+ * This is a generic machine_kexec function suitable at least for
+ * non-OpenFirmware embedded platforms.
+ * It merely copies the image relocation code to the control page and
+ * jumps to it.
+ * A platform specific function may just call this one.
+ */
+void default_machine_kexec(struct kimage *image)
+{
+       extern const unsigned int relocate_new_kernel_size;
+       unsigned long page_list;
+       unsigned long reboot_code_buffer, reboot_code_buffer_phys;
+       relocate_new_kernel_t rnk;
+
+       /* Interrupts aren't acceptable while we reboot */
+       local_irq_disable();
+
+       /* mask each interrupt so we are in a more sane state for the
+        * kexec kernel */
+       machine_kexec_mask_interrupts();
+
+       page_list = image->head;
+
+       /* we need both effective and real address here */
+       reboot_code_buffer =
+                       (unsigned long)page_address(image->control_code_page);
+       reboot_code_buffer_phys = virt_to_phys((void *)reboot_code_buffer);
+
+       /* copy our kernel relocation code to the control code page */
+       memcpy((void *)reboot_code_buffer, relocate_new_kernel,
+                                               relocate_new_kernel_size);
+
+       flush_icache_range(reboot_code_buffer,
+                               reboot_code_buffer + KEXEC_CONTROL_PAGE_SIZE);
+       printk(KERN_INFO "Bye!\n");
+
+       if (!IS_ENABLED(CONFIG_FSL_BOOKE) && !IS_ENABLED(CONFIG_44x))
+               relocate_new_kernel(page_list, reboot_code_buffer_phys, image->start);
+
+       /* now call it */
+       rnk = (relocate_new_kernel_t) reboot_code_buffer;
+       (*rnk)(page_list, reboot_code_buffer_phys, image->start);
+}
+
+int default_machine_kexec_prepare(struct kimage *image)
+{
+       return 0;
+}
diff --git a/arch/powerpc/kexec/core_64.c b/arch/powerpc/kexec/core_64.c
new file mode 100644 (file)
index 0000000..04a7cba
--- /dev/null
@@ -0,0 +1,417 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PPC64 code to handle Linux booting another kernel.
+ *
+ * Copyright (C) 2004-2005, IBM Corp.
+ *
+ * Created by: Milton D Miller II
+ */
+
+
+#include <linux/kexec.h>
+#include <linux/smp.h>
+#include <linux/thread_info.h>
+#include <linux/init_task.h>
+#include <linux/errno.h>
+#include <linux/kernel.h>
+#include <linux/cpu.h>
+#include <linux/hardirq.h>
+
+#include <asm/page.h>
+#include <asm/current.h>
+#include <asm/machdep.h>
+#include <asm/cacheflush.h>
+#include <asm/firmware.h>
+#include <asm/paca.h>
+#include <asm/mmu.h>
+#include <asm/sections.h>      /* _end */
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/hw_breakpoint.h>
+#include <asm/asm-prototypes.h>
+#include <asm/svm.h>
+#include <asm/ultravisor.h>
+
+int default_machine_kexec_prepare(struct kimage *image)
+{
+       int i;
+       unsigned long begin, end;       /* limits of segment */
+       unsigned long low, high;        /* limits of blocked memory range */
+       struct device_node *node;
+       const unsigned long *basep;
+       const unsigned int *sizep;
+
+       /*
+        * Since we use the kernel fault handlers and paging code to
+        * handle the virtual mode, we must make sure no destination
+        * overlaps kernel static data or bss.
+        */
+       for (i = 0; i < image->nr_segments; i++)
+               if (image->segment[i].mem < __pa(_end))
+                       return -ETXTBSY;
+
+       /* We also should not overwrite the tce tables */
+       for_each_node_by_type(node, "pci") {
+               basep = of_get_property(node, "linux,tce-base", NULL);
+               sizep = of_get_property(node, "linux,tce-size", NULL);
+               if (basep == NULL || sizep == NULL)
+                       continue;
+
+               low = *basep;
+               high = low + (*sizep);
+
+               for (i = 0; i < image->nr_segments; i++) {
+                       begin = image->segment[i].mem;
+                       end = begin + image->segment[i].memsz;
+
+                       if ((begin < high) && (end > low))
+                               return -ETXTBSY;
+               }
+       }
+
+       return 0;
+}
+
+static void copy_segments(unsigned long ind)
+{
+       unsigned long entry;
+       unsigned long *ptr;
+       void *dest;
+       void *addr;
+
+       /*
+        * We rely on kexec_load to create a lists that properly
+        * initializes these pointers before they are used.
+        * We will still crash if the list is wrong, but at least
+        * the compiler will be quiet.
+        */
+       ptr = NULL;
+       dest = NULL;
+
+       for (entry = ind; !(entry & IND_DONE); entry = *ptr++) {
+               addr = __va(entry & PAGE_MASK);
+
+               switch (entry & IND_FLAGS) {
+               case IND_DESTINATION:
+                       dest = addr;
+                       break;
+               case IND_INDIRECTION:
+                       ptr = addr;
+                       break;
+               case IND_SOURCE:
+                       copy_page(dest, addr);
+                       dest += PAGE_SIZE;
+               }
+       }
+}
+
+void kexec_copy_flush(struct kimage *image)
+{
+       long i, nr_segments = image->nr_segments;
+       struct  kexec_segment ranges[KEXEC_SEGMENT_MAX];
+
+       /* save the ranges on the stack to efficiently flush the icache */
+       memcpy(ranges, image->segment, sizeof(ranges));
+
+       /*
+        * After this call we may not use anything allocated in dynamic
+        * memory, including *image.
+        *
+        * Only globals and the stack are allowed.
+        */
+       copy_segments(image->head);
+
+       /*
+        * we need to clear the icache for all dest pages sometime,
+        * including ones that were in place on the original copy
+        */
+       for (i = 0; i < nr_segments; i++)
+               flush_icache_range((unsigned long)__va(ranges[i].mem),
+                       (unsigned long)__va(ranges[i].mem + ranges[i].memsz));
+}
+
+#ifdef CONFIG_SMP
+
+static int kexec_all_irq_disabled = 0;
+
+static void kexec_smp_down(void *arg)
+{
+       local_irq_disable();
+       hard_irq_disable();
+
+       mb(); /* make sure our irqs are disabled before we say they are */
+       get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF;
+       while(kexec_all_irq_disabled == 0)
+               cpu_relax();
+       mb(); /* make sure all irqs are disabled before this */
+       hw_breakpoint_disable();
+       /*
+        * Now every CPU has IRQs off, we can clear out any pending
+        * IPIs and be sure that no more will come in after this.
+        */
+       if (ppc_md.kexec_cpu_down)
+               ppc_md.kexec_cpu_down(0, 1);
+
+       kexec_smp_wait();
+       /* NOTREACHED */
+}
+
+static void kexec_prepare_cpus_wait(int wait_state)
+{
+       int my_cpu, i, notified=-1;
+
+       hw_breakpoint_disable();
+       my_cpu = get_cpu();
+       /* Make sure each CPU has at least made it to the state we need.
+        *
+        * FIXME: There is a (slim) chance of a problem if not all of the CPUs
+        * are correctly onlined.  If somehow we start a CPU on boot with RTAS
+        * start-cpu, but somehow that CPU doesn't write callin_cpu_map[] in
+        * time, the boot CPU will timeout.  If it does eventually execute
+        * stuff, the secondary will start up (paca_ptrs[]->cpu_start was
+        * written) and get into a peculiar state.
+        * If the platform supports smp_ops->take_timebase(), the secondary CPU
+        * will probably be spinning in there.  If not (i.e. pseries), the
+        * secondary will continue on and try to online itself/idle/etc. If it
+        * survives that, we need to find these
+        * possible-but-not-online-but-should-be CPUs and chaperone them into
+        * kexec_smp_wait().
+        */
+       for_each_online_cpu(i) {
+               if (i == my_cpu)
+                       continue;
+
+               while (paca_ptrs[i]->kexec_state < wait_state) {
+                       barrier();
+                       if (i != notified) {
+                               printk(KERN_INFO "kexec: waiting for cpu %d "
+                                      "(physical %d) to enter %i state\n",
+                                      i, paca_ptrs[i]->hw_cpu_id, wait_state);
+                               notified = i;
+                       }
+               }
+       }
+       mb();
+}
+
+/*
+ * We need to make sure each present CPU is online.  The next kernel will scan
+ * the device tree and assume primary threads are online and query secondary
+ * threads via RTAS to online them if required.  If we don't online primary
+ * threads, they will be stuck.  However, we also online secondary threads as we
+ * may be using 'cede offline'.  In this case RTAS doesn't see the secondary
+ * threads as offline -- and again, these CPUs will be stuck.
+ *
+ * So, we online all CPUs that should be running, including secondary threads.
+ */
+static void wake_offline_cpus(void)
+{
+       int cpu = 0;
+
+       for_each_present_cpu(cpu) {
+               if (!cpu_online(cpu)) {
+                       printk(KERN_INFO "kexec: Waking offline cpu %d.\n",
+                              cpu);
+                       WARN_ON(cpu_up(cpu));
+               }
+       }
+}
+
+static void kexec_prepare_cpus(void)
+{
+       wake_offline_cpus();
+       smp_call_function(kexec_smp_down, NULL, /* wait */0);
+       local_irq_disable();
+       hard_irq_disable();
+
+       mb(); /* make sure IRQs are disabled before we say they are */
+       get_paca()->kexec_state = KEXEC_STATE_IRQS_OFF;
+
+       kexec_prepare_cpus_wait(KEXEC_STATE_IRQS_OFF);
+       /* we are sure every CPU has IRQs off at this point */
+       kexec_all_irq_disabled = 1;
+
+       /*
+        * Before removing MMU mappings make sure all CPUs have entered real
+        * mode:
+        */
+       kexec_prepare_cpus_wait(KEXEC_STATE_REAL_MODE);
+
+       /* after we tell the others to go down */
+       if (ppc_md.kexec_cpu_down)
+               ppc_md.kexec_cpu_down(0, 0);
+
+       put_cpu();
+}
+
+#else /* ! SMP */
+
+static void kexec_prepare_cpus(void)
+{
+       /*
+        * move the secondarys to us so that we can copy
+        * the new kernel 0-0x100 safely
+        *
+        * do this if kexec in setup.c ?
+        *
+        * We need to release the cpus if we are ever going from an
+        * UP to an SMP kernel.
+        */
+       smp_release_cpus();
+       if (ppc_md.kexec_cpu_down)
+               ppc_md.kexec_cpu_down(0, 0);
+       local_irq_disable();
+       hard_irq_disable();
+}
+
+#endif /* SMP */
+
+/*
+ * kexec thread structure and stack.
+ *
+ * We need to make sure that this is 16384-byte aligned due to the
+ * way process stacks are handled.  It also must be statically allocated
+ * or allocated as part of the kimage, because everything else may be
+ * overwritten when we copy the kexec image.  We piggyback on the
+ * "init_task" linker section here to statically allocate a stack.
+ *
+ * We could use a smaller stack if we don't care about anything using
+ * current, but that audit has not been performed.
+ */
+static union thread_union kexec_stack __init_task_data =
+       { };
+
+/*
+ * For similar reasons to the stack above, the kexecing CPU needs to be on a
+ * static PACA; we switch to kexec_paca.
+ */
+struct paca_struct kexec_paca;
+
+/* Our assembly helper, in misc_64.S */
+extern void kexec_sequence(void *newstack, unsigned long start,
+                          void *image, void *control,
+                          void (*clear_all)(void),
+                          bool copy_with_mmu_off) __noreturn;
+
+/* too late to fail here */
+void default_machine_kexec(struct kimage *image)
+{
+       bool copy_with_mmu_off;
+
+       /* prepare control code if any */
+
+       /*
+        * If the kexec boot is the normal one, need to shutdown other cpus
+        * into our wait loop and quiesce interrupts.
+        * Otherwise, in the case of crashed mode (crashing_cpu >= 0),
+        * stopping other CPUs and collecting their pt_regs is done before
+        * using debugger IPI.
+        */
+
+       if (!kdump_in_progress())
+               kexec_prepare_cpus();
+
+       printk("kexec: Starting switchover sequence.\n");
+
+       /* switch to a staticly allocated stack.  Based on irq stack code.
+        * We setup preempt_count to avoid using VMX in memcpy.
+        * XXX: the task struct will likely be invalid once we do the copy!
+        */
+       current_thread_info()->flags = 0;
+       current_thread_info()->preempt_count = HARDIRQ_OFFSET;
+
+       /* We need a static PACA, too; copy this CPU's PACA over and switch to
+        * it. Also poison per_cpu_offset and NULL lppaca to catch anyone using
+        * non-static data.
+        */
+       memcpy(&kexec_paca, get_paca(), sizeof(struct paca_struct));
+       kexec_paca.data_offset = 0xedeaddeadeeeeeeeUL;
+#ifdef CONFIG_PPC_PSERIES
+       kexec_paca.lppaca_ptr = NULL;
+#endif
+
+       if (is_secure_guest() && !(image->preserve_context ||
+                                  image->type == KEXEC_TYPE_CRASH)) {
+               uv_unshare_all_pages();
+               printk("kexec: Unshared all shared pages.\n");
+       }
+
+       paca_ptrs[kexec_paca.paca_index] = &kexec_paca;
+
+       setup_paca(&kexec_paca);
+
+       /*
+        * The lppaca should be unregistered at this point so the HV won't
+        * touch it. In the case of a crash, none of the lppacas are
+        * unregistered so there is not much we can do about it here.
+        */
+
+       /*
+        * On Book3S, the copy must happen with the MMU off if we are either
+        * using Radix page tables or we are not in an LPAR since we can
+        * overwrite the page tables while copying.
+        *
+        * In an LPAR, we keep the MMU on otherwise we can't access beyond
+        * the RMA. On BookE there is no real MMU off mode, so we have to
+        * keep it enabled as well (but then we have bolted TLB entries).
+        */
+#ifdef CONFIG_PPC_BOOK3E
+       copy_with_mmu_off = false;
+#else
+       copy_with_mmu_off = radix_enabled() ||
+               !(firmware_has_feature(FW_FEATURE_LPAR) ||
+                 firmware_has_feature(FW_FEATURE_PS3_LV1));
+#endif
+
+       /* Some things are best done in assembly.  Finding globals with
+        * a toc is easier in C, so pass in what we can.
+        */
+       kexec_sequence(&kexec_stack, image->start, image,
+                      page_address(image->control_code_page),
+                      mmu_cleanup_all, copy_with_mmu_off);
+       /* NOTREACHED */
+}
+
+#ifdef CONFIG_PPC_BOOK3S_64
+/* Values we need to export to the second kernel via the device tree. */
+static unsigned long htab_base;
+static unsigned long htab_size;
+
+static struct property htab_base_prop = {
+       .name = "linux,htab-base",
+       .length = sizeof(unsigned long),
+       .value = &htab_base,
+};
+
+static struct property htab_size_prop = {
+       .name = "linux,htab-size",
+       .length = sizeof(unsigned long),
+       .value = &htab_size,
+};
+
+static int __init export_htab_values(void)
+{
+       struct device_node *node;
+
+       /* On machines with no htab htab_address is NULL */
+       if (!htab_address)
+               return -ENODEV;
+
+       node = of_find_node_by_path("/chosen");
+       if (!node)
+               return -ENODEV;
+
+       /* remove any stale propertys so ours can be found */
+       of_remove_property(node, of_find_property(node, htab_base_prop.name, NULL));
+       of_remove_property(node, of_find_property(node, htab_size_prop.name, NULL));
+
+       htab_base = cpu_to_be64(__pa(htab_address));
+       of_add_property(node, &htab_base_prop);
+       htab_size = cpu_to_be64(htab_size_bytes);
+       of_add_property(node, &htab_size_prop);
+
+       of_node_put(node);
+       return 0;
+}
+late_initcall(export_htab_values);
+#endif /* CONFIG_PPC_BOOK3S_64 */
diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c
new file mode 100644 (file)
index 0000000..d488311
--- /dev/null
@@ -0,0 +1,374 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Architecture specific (PPC64) functions for kexec based crash dumps.
+ *
+ * Copyright (C) 2005, IBM Corp.
+ *
+ * Created by: Haren Myneni
+ */
+
+#include <linux/kernel.h>
+#include <linux/smp.h>
+#include <linux/reboot.h>
+#include <linux/kexec.h>
+#include <linux/export.h>
+#include <linux/crash_dump.h>
+#include <linux/delay.h>
+#include <linux/irq.h>
+#include <linux/types.h>
+
+#include <asm/processor.h>
+#include <asm/machdep.h>
+#include <asm/kexec.h>
+#include <asm/prom.h>
+#include <asm/smp.h>
+#include <asm/setjmp.h>
+#include <asm/debug.h>
+
+/*
+ * The primary CPU waits a while for all secondary CPUs to enter. This is to
+ * avoid sending an IPI if the secondary CPUs are entering
+ * crash_kexec_secondary on their own (eg via a system reset).
+ *
+ * The secondary timeout has to be longer than the primary. Both timeouts are
+ * in milliseconds.
+ */
+#define PRIMARY_TIMEOUT                500
+#define SECONDARY_TIMEOUT      1000
+
+#define IPI_TIMEOUT            10000
+#define REAL_MODE_TIMEOUT      10000
+
+static int time_to_dump;
+/*
+ * crash_wake_offline should be set to 1 by platforms that intend to wake
+ * up offline cpus prior to jumping to a kdump kernel. Currently powernv
+ * sets it to 1, since we want to avoid things from happening when an
+ * offline CPU wakes up due to something like an HMI (malfunction error),
+ * which propagates to all threads.
+ */
+int crash_wake_offline;
+
+#define CRASH_HANDLER_MAX 3
+/* List of shutdown handles */
+static crash_shutdown_t crash_shutdown_handles[CRASH_HANDLER_MAX];
+static DEFINE_SPINLOCK(crash_handlers_lock);
+
+static unsigned long crash_shutdown_buf[JMP_BUF_LEN];
+static int crash_shutdown_cpu = -1;
+
+static int handle_fault(struct pt_regs *regs)
+{
+       if (crash_shutdown_cpu == smp_processor_id())
+               longjmp(crash_shutdown_buf, 1);
+       return 0;
+}
+
+#ifdef CONFIG_SMP
+
+static atomic_t cpus_in_crash;
+void crash_ipi_callback(struct pt_regs *regs)
+{
+       static cpumask_t cpus_state_saved = CPU_MASK_NONE;
+
+       int cpu = smp_processor_id();
+
+       hard_irq_disable();
+       if (!cpumask_test_cpu(cpu, &cpus_state_saved)) {
+               crash_save_cpu(regs, cpu);
+               cpumask_set_cpu(cpu, &cpus_state_saved);
+       }
+
+       atomic_inc(&cpus_in_crash);
+       smp_mb__after_atomic();
+
+       /*
+        * Starting the kdump boot.
+        * This barrier is needed to make sure that all CPUs are stopped.
+        */
+       while (!time_to_dump)
+               cpu_relax();
+
+       if (ppc_md.kexec_cpu_down)
+               ppc_md.kexec_cpu_down(1, 1);
+
+#ifdef CONFIG_PPC64
+       kexec_smp_wait();
+#else
+       for (;;);       /* FIXME */
+#endif
+
+       /* NOTREACHED */
+}
+
+static void crash_kexec_prepare_cpus(int cpu)
+{
+       unsigned int msecs;
+       unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
+       int tries = 0;
+       int (*old_handler)(struct pt_regs *regs);
+
+       printk(KERN_EMERG "Sending IPI to other CPUs\n");
+
+       if (crash_wake_offline)
+               ncpus = num_present_cpus() - 1;
+
+       crash_send_ipi(crash_ipi_callback);
+       smp_wmb();
+
+again:
+       /*
+        * FIXME: Until we will have the way to stop other CPUs reliably,
+        * the crash CPU will send an IPI and wait for other CPUs to
+        * respond.
+        */
+       msecs = IPI_TIMEOUT;
+       while ((atomic_read(&cpus_in_crash) < ncpus) && (--msecs > 0))
+               mdelay(1);
+
+       /* Would it be better to replace the trap vector here? */
+
+       if (atomic_read(&cpus_in_crash) >= ncpus) {
+               printk(KERN_EMERG "IPI complete\n");
+               return;
+       }
+
+       printk(KERN_EMERG "ERROR: %d cpu(s) not responding\n",
+               ncpus - atomic_read(&cpus_in_crash));
+
+       /*
+        * If we have a panic timeout set then we can't wait indefinitely
+        * for someone to activate system reset. We also give up on the
+        * second time through if system reset fail to work.
+        */
+       if ((panic_timeout > 0) || (tries > 0))
+               return;
+
+       /*
+        * A system reset will cause all CPUs to take an 0x100 exception.
+        * The primary CPU returns here via setjmp, and the secondary
+        * CPUs reexecute the crash_kexec_secondary path.
+        */
+       old_handler = __debugger;
+       __debugger = handle_fault;
+       crash_shutdown_cpu = smp_processor_id();
+
+       if (setjmp(crash_shutdown_buf) == 0) {
+               printk(KERN_EMERG "Activate system reset (dumprestart) "
+                                 "to stop other cpu(s)\n");
+
+               /*
+                * A system reset will force all CPUs to execute the
+                * crash code again. We need to reset cpus_in_crash so we
+                * wait for everyone to do this.
+                */
+               atomic_set(&cpus_in_crash, 0);
+               smp_mb();
+
+               while (atomic_read(&cpus_in_crash) < ncpus)
+                       cpu_relax();
+       }
+
+       crash_shutdown_cpu = -1;
+       __debugger = old_handler;
+
+       tries++;
+       goto again;
+}
+
+/*
+ * This function will be called by secondary cpus.
+ */
+void crash_kexec_secondary(struct pt_regs *regs)
+{
+       unsigned long flags;
+       int msecs = SECONDARY_TIMEOUT;
+
+       local_irq_save(flags);
+
+       /* Wait for the primary crash CPU to signal its progress */
+       while (crashing_cpu < 0) {
+               if (--msecs < 0) {
+                       /* No response, kdump image may not have been loaded */
+                       local_irq_restore(flags);
+                       return;
+               }
+
+               mdelay(1);
+       }
+
+       crash_ipi_callback(regs);
+}
+
+#else  /* ! CONFIG_SMP */
+
+static void crash_kexec_prepare_cpus(int cpu)
+{
+       /*
+        * move the secondaries to us so that we can copy
+        * the new kernel 0-0x100 safely
+        *
+        * do this if kexec in setup.c ?
+        */
+#ifdef CONFIG_PPC64
+       smp_release_cpus();
+#else
+       /* FIXME */
+#endif
+}
+
+void crash_kexec_secondary(struct pt_regs *regs)
+{
+}
+#endif /* CONFIG_SMP */
+
+/* wait for all the CPUs to hit real mode but timeout if they don't come in */
+#if defined(CONFIG_SMP) && defined(CONFIG_PPC64)
+static void __maybe_unused crash_kexec_wait_realmode(int cpu)
+{
+       unsigned int msecs;
+       int i;
+
+       msecs = REAL_MODE_TIMEOUT;
+       for (i=0; i < nr_cpu_ids && msecs > 0; i++) {
+               if (i == cpu)
+                       continue;
+
+               while (paca_ptrs[i]->kexec_state < KEXEC_STATE_REAL_MODE) {
+                       barrier();
+                       if (!cpu_possible(i) || !cpu_online(i) || (msecs <= 0))
+                               break;
+                       msecs--;
+                       mdelay(1);
+               }
+       }
+       mb();
+}
+#else
+static inline void crash_kexec_wait_realmode(int cpu) {}
+#endif /* CONFIG_SMP && CONFIG_PPC64 */
+
+/*
+ * Register a function to be called on shutdown.  Only use this if you
+ * can't reset your device in the second kernel.
+ */
+int crash_shutdown_register(crash_shutdown_t handler)
+{
+       unsigned int i, rc;
+
+       spin_lock(&crash_handlers_lock);
+       for (i = 0 ; i < CRASH_HANDLER_MAX; i++)
+               if (!crash_shutdown_handles[i]) {
+                       /* Insert handle at first empty entry */
+                       crash_shutdown_handles[i] = handler;
+                       rc = 0;
+                       break;
+               }
+
+       if (i == CRASH_HANDLER_MAX) {
+               printk(KERN_ERR "Crash shutdown handles full, "
+                      "not registered.\n");
+               rc = 1;
+       }
+
+       spin_unlock(&crash_handlers_lock);
+       return rc;
+}
+EXPORT_SYMBOL(crash_shutdown_register);
+
+int crash_shutdown_unregister(crash_shutdown_t handler)
+{
+       unsigned int i, rc;
+
+       spin_lock(&crash_handlers_lock);
+       for (i = 0 ; i < CRASH_HANDLER_MAX; i++)
+               if (crash_shutdown_handles[i] == handler)
+                       break;
+
+       if (i == CRASH_HANDLER_MAX) {
+               printk(KERN_ERR "Crash shutdown handle not found\n");
+               rc = 1;
+       } else {
+               /* Shift handles down */
+               for (; i < (CRASH_HANDLER_MAX - 1); i++)
+                       crash_shutdown_handles[i] =
+                               crash_shutdown_handles[i+1];
+               /*
+                * Reset last entry to NULL now that it has been shifted down,
+                * this will allow new handles to be added here.
+                */
+               crash_shutdown_handles[i] = NULL;
+               rc = 0;
+       }
+
+       spin_unlock(&crash_handlers_lock);
+       return rc;
+}
+EXPORT_SYMBOL(crash_shutdown_unregister);
+
+void default_machine_crash_shutdown(struct pt_regs *regs)
+{
+       unsigned int i;
+       int (*old_handler)(struct pt_regs *regs);
+
+       /*
+        * This function is only called after the system
+        * has panicked or is otherwise in a critical state.
+        * The minimum amount of code to allow a kexec'd kernel
+        * to run successfully needs to happen here.
+        *
+        * In practice this means stopping other cpus in
+        * an SMP system.
+        * The kernel is broken so disable interrupts.
+        */
+       hard_irq_disable();
+
+       /*
+        * Make a note of crashing cpu. Will be used in machine_kexec
+        * such that another IPI will not be sent.
+        */
+       crashing_cpu = smp_processor_id();
+
+       /*
+        * If we came in via system reset, wait a while for the secondary
+        * CPUs to enter.
+        */
+       if (TRAP(regs) == 0x100)
+               mdelay(PRIMARY_TIMEOUT);
+
+       crash_kexec_prepare_cpus(crashing_cpu);
+
+       crash_save_cpu(regs, crashing_cpu);
+
+       time_to_dump = 1;
+
+       crash_kexec_wait_realmode(crashing_cpu);
+
+       machine_kexec_mask_interrupts();
+
+       /*
+        * Call registered shutdown routines safely.  Swap out
+        * __debugger_fault_handler, and replace on exit.
+        */
+       old_handler = __debugger_fault_handler;
+       __debugger_fault_handler = handle_fault;
+       crash_shutdown_cpu = smp_processor_id();
+       for (i = 0; i < CRASH_HANDLER_MAX && crash_shutdown_handles[i]; i++) {
+               if (setjmp(crash_shutdown_buf) == 0) {
+                       /*
+                        * Insert syncs and delay to ensure
+                        * instructions in the dangerous region don't
+                        * leak away from this protected region.
+                        */
+                       asm volatile("sync; isync");
+                       /* dangerous region */
+                       crash_shutdown_handles[i]();
+                       asm volatile("sync; isync");
+               }
+       }
+       crash_shutdown_cpu = -1;
+       __debugger_fault_handler = old_handler;
+
+       if (ppc_md.kexec_cpu_down)
+               ppc_md.kexec_cpu_down(1, 0);
+}
diff --git a/arch/powerpc/kexec/elf_64.c b/arch/powerpc/kexec/elf_64.c
new file mode 100644 (file)
index 0000000..3072fd6
--- /dev/null
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Load ELF vmlinux file for the kexec_file_load syscall.
+ *
+ * Copyright (C) 2004  Adam Litke (agl@us.ibm.com)
+ * Copyright (C) 2004  IBM Corp.
+ * Copyright (C) 2005  R Sharada (sharada@in.ibm.com)
+ * Copyright (C) 2006  Mohan Kumar M (mohan@in.ibm.com)
+ * Copyright (C) 2016  IBM Corporation
+ *
+ * Based on kexec-tools' kexec-elf-exec.c and kexec-elf-ppc64.c.
+ * Heavily modified for the kernel by
+ * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
+ */
+
+#define pr_fmt(fmt)    "kexec_elf: " fmt
+
+#include <linux/elf.h>
+#include <linux/kexec.h>
+#include <linux/libfdt.h>
+#include <linux/module.h>
+#include <linux/of_fdt.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+static void *elf64_load(struct kimage *image, char *kernel_buf,
+                       unsigned long kernel_len, char *initrd,
+                       unsigned long initrd_len, char *cmdline,
+                       unsigned long cmdline_len)
+{
+       int ret;
+       unsigned int fdt_size;
+       unsigned long kernel_load_addr;
+       unsigned long initrd_load_addr = 0, fdt_load_addr;
+       void *fdt;
+       const void *slave_code;
+       struct elfhdr ehdr;
+       struct kexec_elf_info elf_info;
+       struct kexec_buf kbuf = { .image = image, .buf_min = 0,
+                                 .buf_max = ppc64_rma_size };
+       struct kexec_buf pbuf = { .image = image, .buf_min = 0,
+                                 .buf_max = ppc64_rma_size, .top_down = true,
+                                 .mem = KEXEC_BUF_MEM_UNKNOWN };
+
+       ret = kexec_build_elf_info(kernel_buf, kernel_len, &ehdr, &elf_info);
+       if (ret)
+               goto out;
+
+       ret = kexec_elf_load(image, &ehdr, &elf_info, &kbuf, &kernel_load_addr);
+       if (ret)
+               goto out;
+
+       pr_debug("Loaded the kernel at 0x%lx\n", kernel_load_addr);
+
+       ret = kexec_load_purgatory(image, &pbuf);
+       if (ret) {
+               pr_err("Loading purgatory failed.\n");
+               goto out;
+       }
+
+       pr_debug("Loaded purgatory at 0x%lx\n", pbuf.mem);
+
+       if (initrd != NULL) {
+               kbuf.buffer = initrd;
+               kbuf.bufsz = kbuf.memsz = initrd_len;
+               kbuf.buf_align = PAGE_SIZE;
+               kbuf.top_down = false;
+               kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+               ret = kexec_add_buffer(&kbuf);
+               if (ret)
+                       goto out;
+               initrd_load_addr = kbuf.mem;
+
+               pr_debug("Loaded initrd at 0x%lx\n", initrd_load_addr);
+       }
+
+       fdt_size = fdt_totalsize(initial_boot_params) * 2;
+       fdt = kmalloc(fdt_size, GFP_KERNEL);
+       if (!fdt) {
+               pr_err("Not enough memory for the device tree.\n");
+               ret = -ENOMEM;
+               goto out;
+       }
+       ret = fdt_open_into(initial_boot_params, fdt, fdt_size);
+       if (ret < 0) {
+               pr_err("Error setting up the new device tree.\n");
+               ret = -EINVAL;
+               goto out;
+       }
+
+       ret = setup_new_fdt(image, fdt, initrd_load_addr, initrd_len, cmdline);
+       if (ret)
+               goto out;
+
+       fdt_pack(fdt);
+
+       kbuf.buffer = fdt;
+       kbuf.bufsz = kbuf.memsz = fdt_size;
+       kbuf.buf_align = PAGE_SIZE;
+       kbuf.top_down = true;
+       kbuf.mem = KEXEC_BUF_MEM_UNKNOWN;
+       ret = kexec_add_buffer(&kbuf);
+       if (ret)
+               goto out;
+       fdt_load_addr = kbuf.mem;
+
+       pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr);
+
+       slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset;
+       ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr,
+                             fdt_load_addr);
+       if (ret)
+               pr_err("Error setting up the purgatory.\n");
+
+out:
+       kexec_free_elf_info(&elf_info);
+
+       /* Make kimage_file_post_load_cleanup free the fdt buffer for us. */
+       return ret ? ERR_PTR(ret) : fdt;
+}
+
+const struct kexec_file_ops kexec_elf64_ops = {
+       .probe = kexec_elf_probe,
+       .load = elf64_load,
+};
diff --git a/arch/powerpc/kexec/file_load.c b/arch/powerpc/kexec/file_load.c
new file mode 100644 (file)
index 0000000..143c917
--- /dev/null
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * ppc64 code to implement the kexec_file_load syscall
+ *
+ * Copyright (C) 2004  Adam Litke (agl@us.ibm.com)
+ * Copyright (C) 2004  IBM Corp.
+ * Copyright (C) 2004,2005  Milton D Miller II, IBM Corporation
+ * Copyright (C) 2005  R Sharada (sharada@in.ibm.com)
+ * Copyright (C) 2006  Mohan Kumar M (mohan@in.ibm.com)
+ * Copyright (C) 2016  IBM Corporation
+ *
+ * Based on kexec-tools' kexec-elf-ppc64.c, fs2dt.c.
+ * Heavily modified for the kernel by
+ * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
+ */
+
+#include <linux/slab.h>
+#include <linux/kexec.h>
+#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
+#include <asm/ima.h>
+
+#define SLAVE_CODE_SIZE                256
+
+const struct kexec_file_ops * const kexec_file_loaders[] = {
+       &kexec_elf64_ops,
+       NULL
+};
+
+int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+                                 unsigned long buf_len)
+{
+       /* We don't support crash kernels yet. */
+       if (image->type == KEXEC_TYPE_CRASH)
+               return -EOPNOTSUPP;
+
+       return kexec_image_probe_default(image, buf, buf_len);
+}
+
+/**
+ * setup_purgatory - initialize the purgatory's global variables
+ * @image:             kexec image.
+ * @slave_code:                Slave code for the purgatory.
+ * @fdt:               Flattened device tree for the next kernel.
+ * @kernel_load_addr:  Address where the kernel is loaded.
+ * @fdt_load_addr:     Address where the flattened device tree is loaded.
+ *
+ * Return: 0 on success, or negative errno on error.
+ */
+int setup_purgatory(struct kimage *image, const void *slave_code,
+                   const void *fdt, unsigned long kernel_load_addr,
+                   unsigned long fdt_load_addr)
+{
+       unsigned int *slave_code_buf, master_entry;
+       int ret;
+
+       slave_code_buf = kmalloc(SLAVE_CODE_SIZE, GFP_KERNEL);
+       if (!slave_code_buf)
+               return -ENOMEM;
+
+       /* Get the slave code from the new kernel and put it in purgatory. */
+       ret = kexec_purgatory_get_set_symbol(image, "purgatory_start",
+                                            slave_code_buf, SLAVE_CODE_SIZE,
+                                            true);
+       if (ret) {
+               kfree(slave_code_buf);
+               return ret;
+       }
+
+       master_entry = slave_code_buf[0];
+       memcpy(slave_code_buf, slave_code, SLAVE_CODE_SIZE);
+       slave_code_buf[0] = master_entry;
+       ret = kexec_purgatory_get_set_symbol(image, "purgatory_start",
+                                            slave_code_buf, SLAVE_CODE_SIZE,
+                                            false);
+       kfree(slave_code_buf);
+
+       ret = kexec_purgatory_get_set_symbol(image, "kernel", &kernel_load_addr,
+                                            sizeof(kernel_load_addr), false);
+       if (ret)
+               return ret;
+       ret = kexec_purgatory_get_set_symbol(image, "dt_offset", &fdt_load_addr,
+                                            sizeof(fdt_load_addr), false);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
+/**
+ * delete_fdt_mem_rsv - delete memory reservation with given address and size
+ *
+ * Return: 0 on success, or negative errno on error.
+ */
+int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size)
+{
+       int i, ret, num_rsvs = fdt_num_mem_rsv(fdt);
+
+       for (i = 0; i < num_rsvs; i++) {
+               uint64_t rsv_start, rsv_size;
+
+               ret = fdt_get_mem_rsv(fdt, i, &rsv_start, &rsv_size);
+               if (ret) {
+                       pr_err("Malformed device tree.\n");
+                       return -EINVAL;
+               }
+
+               if (rsv_start == start && rsv_size == size) {
+                       ret = fdt_del_mem_rsv(fdt, i);
+                       if (ret) {
+                               pr_err("Error deleting device tree reservation.\n");
+                               return -EINVAL;
+                       }
+
+                       return 0;
+               }
+       }
+
+       return -ENOENT;
+}
+
+/*
+ * setup_new_fdt - modify /chosen and memory reservation for the next kernel
+ * @image:             kexec image being loaded.
+ * @fdt:               Flattened device tree for the next kernel.
+ * @initrd_load_addr:  Address where the next initrd will be loaded.
+ * @initrd_len:                Size of the next initrd, or 0 if there will be none.
+ * @cmdline:           Command line for the next kernel, or NULL if there will
+ *                     be none.
+ *
+ * Return: 0 on success, or negative errno on error.
+ */
+int setup_new_fdt(const struct kimage *image, void *fdt,
+                 unsigned long initrd_load_addr, unsigned long initrd_len,
+                 const char *cmdline)
+{
+       int ret, chosen_node;
+       const void *prop;
+
+       /* Remove memory reservation for the current device tree. */
+       ret = delete_fdt_mem_rsv(fdt, __pa(initial_boot_params),
+                                fdt_totalsize(initial_boot_params));
+       if (ret == 0)
+               pr_debug("Removed old device tree reservation.\n");
+       else if (ret != -ENOENT)
+               return ret;
+
+       chosen_node = fdt_path_offset(fdt, "/chosen");
+       if (chosen_node == -FDT_ERR_NOTFOUND) {
+               chosen_node = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"),
+                                             "chosen");
+               if (chosen_node < 0) {
+                       pr_err("Error creating /chosen.\n");
+                       return -EINVAL;
+               }
+       } else if (chosen_node < 0) {
+               pr_err("Malformed device tree: error reading /chosen.\n");
+               return -EINVAL;
+       }
+
+       /* Did we boot using an initrd? */
+       prop = fdt_getprop(fdt, chosen_node, "linux,initrd-start", NULL);
+       if (prop) {
+               uint64_t tmp_start, tmp_end, tmp_size;
+
+               tmp_start = fdt64_to_cpu(*((const fdt64_t *) prop));
+
+               prop = fdt_getprop(fdt, chosen_node, "linux,initrd-end", NULL);
+               if (!prop) {
+                       pr_err("Malformed device tree.\n");
+                       return -EINVAL;
+               }
+               tmp_end = fdt64_to_cpu(*((const fdt64_t *) prop));
+
+               /*
+                * kexec reserves exact initrd size, while firmware may
+                * reserve a multiple of PAGE_SIZE, so check for both.
+                */
+               tmp_size = tmp_end - tmp_start;
+               ret = delete_fdt_mem_rsv(fdt, tmp_start, tmp_size);
+               if (ret == -ENOENT)
+                       ret = delete_fdt_mem_rsv(fdt, tmp_start,
+                                                round_up(tmp_size, PAGE_SIZE));
+               if (ret == 0)
+                       pr_debug("Removed old initrd reservation.\n");
+               else if (ret != -ENOENT)
+                       return ret;
+
+               /* If there's no new initrd, delete the old initrd's info. */
+               if (initrd_len == 0) {
+                       ret = fdt_delprop(fdt, chosen_node,
+                                         "linux,initrd-start");
+                       if (ret) {
+                               pr_err("Error deleting linux,initrd-start.\n");
+                               return -EINVAL;
+                       }
+
+                       ret = fdt_delprop(fdt, chosen_node, "linux,initrd-end");
+                       if (ret) {
+                               pr_err("Error deleting linux,initrd-end.\n");
+                               return -EINVAL;
+                       }
+               }
+       }
+
+       if (initrd_len) {
+               ret = fdt_setprop_u64(fdt, chosen_node,
+                                     "linux,initrd-start",
+                                     initrd_load_addr);
+               if (ret < 0)
+                       goto err;
+
+               /* initrd-end is the first address after the initrd image. */
+               ret = fdt_setprop_u64(fdt, chosen_node, "linux,initrd-end",
+                                     initrd_load_addr + initrd_len);
+               if (ret < 0)
+                       goto err;
+
+               ret = fdt_add_mem_rsv(fdt, initrd_load_addr, initrd_len);
+               if (ret) {
+                       pr_err("Error reserving initrd memory: %s\n",
+                              fdt_strerror(ret));
+                       return -EINVAL;
+               }
+       }
+
+       if (cmdline != NULL) {
+               ret = fdt_setprop_string(fdt, chosen_node, "bootargs", cmdline);
+               if (ret < 0)
+                       goto err;
+       } else {
+               ret = fdt_delprop(fdt, chosen_node, "bootargs");
+               if (ret && ret != -FDT_ERR_NOTFOUND) {
+                       pr_err("Error deleting bootargs.\n");
+                       return -EINVAL;
+               }
+       }
+
+       ret = setup_ima_buffer(image, fdt, chosen_node);
+       if (ret) {
+               pr_err("Error setting up the new device tree.\n");
+               return ret;
+       }
+
+       ret = fdt_setprop(fdt, chosen_node, "linux,booted-from-kexec", NULL, 0);
+       if (ret)
+               goto err;
+
+       return 0;
+
+err:
+       pr_err("Error setting up the new device tree.\n");
+       return -EINVAL;
+}
diff --git a/arch/powerpc/kexec/ima.c b/arch/powerpc/kexec/ima.c
new file mode 100644 (file)
index 0000000..720e50e
--- /dev/null
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2016 IBM Corporation
+ *
+ * Authors:
+ * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>
+ */
+
+#include <linux/slab.h>
+#include <linux/kexec.h>
+#include <linux/of.h>
+#include <linux/memblock.h>
+#include <linux/libfdt.h>
+
+static int get_addr_size_cells(int *addr_cells, int *size_cells)
+{
+       struct device_node *root;
+
+       root = of_find_node_by_path("/");
+       if (!root)
+               return -EINVAL;
+
+       *addr_cells = of_n_addr_cells(root);
+       *size_cells = of_n_size_cells(root);
+
+       of_node_put(root);
+
+       return 0;
+}
+
+static int do_get_kexec_buffer(const void *prop, int len, unsigned long *addr,
+                              size_t *size)
+{
+       int ret, addr_cells, size_cells;
+
+       ret = get_addr_size_cells(&addr_cells, &size_cells);
+       if (ret)
+               return ret;
+
+       if (len < 4 * (addr_cells + size_cells))
+               return -ENOENT;
+
+       *addr = of_read_number(prop, addr_cells);
+       *size = of_read_number(prop + 4 * addr_cells, size_cells);
+
+       return 0;
+}
+
+/**
+ * ima_get_kexec_buffer - get IMA buffer from the previous kernel
+ * @addr:      On successful return, set to point to the buffer contents.
+ * @size:      On successful return, set to the buffer size.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int ima_get_kexec_buffer(void **addr, size_t *size)
+{
+       int ret, len;
+       unsigned long tmp_addr;
+       size_t tmp_size;
+       const void *prop;
+
+       prop = of_get_property(of_chosen, "linux,ima-kexec-buffer", &len);
+       if (!prop)
+               return -ENOENT;
+
+       ret = do_get_kexec_buffer(prop, len, &tmp_addr, &tmp_size);
+       if (ret)
+               return ret;
+
+       *addr = __va(tmp_addr);
+       *size = tmp_size;
+
+       return 0;
+}
+
+/**
+ * ima_free_kexec_buffer - free memory used by the IMA buffer
+ */
+int ima_free_kexec_buffer(void)
+{
+       int ret;
+       unsigned long addr;
+       size_t size;
+       struct property *prop;
+
+       prop = of_find_property(of_chosen, "linux,ima-kexec-buffer", NULL);
+       if (!prop)
+               return -ENOENT;
+
+       ret = do_get_kexec_buffer(prop->value, prop->length, &addr, &size);
+       if (ret)
+               return ret;
+
+       ret = of_remove_property(of_chosen, prop);
+       if (ret)
+               return ret;
+
+       return memblock_free(addr, size);
+
+}
+
+/**
+ * remove_ima_buffer - remove the IMA buffer property and reservation from @fdt
+ *
+ * The IMA measurement buffer is of no use to a subsequent kernel, so we always
+ * remove it from the device tree.
+ */
+void remove_ima_buffer(void *fdt, int chosen_node)
+{
+       int ret, len;
+       unsigned long addr;
+       size_t size;
+       const void *prop;
+
+       prop = fdt_getprop(fdt, chosen_node, "linux,ima-kexec-buffer", &len);
+       if (!prop)
+               return;
+
+       ret = do_get_kexec_buffer(prop, len, &addr, &size);
+       fdt_delprop(fdt, chosen_node, "linux,ima-kexec-buffer");
+       if (ret)
+               return;
+
+       ret = delete_fdt_mem_rsv(fdt, addr, size);
+       if (!ret)
+               pr_debug("Removed old IMA buffer reservation.\n");
+}
+
+#ifdef CONFIG_IMA_KEXEC
+/**
+ * arch_ima_add_kexec_buffer - do arch-specific steps to add the IMA buffer
+ *
+ * Architectures should use this function to pass on the IMA buffer
+ * information to the next kernel.
+ *
+ * Return: 0 on success, negative errno on error.
+ */
+int arch_ima_add_kexec_buffer(struct kimage *image, unsigned long load_addr,
+                             size_t size)
+{
+       image->arch.ima_buffer_addr = load_addr;
+       image->arch.ima_buffer_size = size;
+
+       return 0;
+}
+
+static int write_number(void *p, u64 value, int cells)
+{
+       if (cells == 1) {
+               u32 tmp;
+
+               if (value > U32_MAX)
+                       return -EINVAL;
+
+               tmp = cpu_to_be32(value);
+               memcpy(p, &tmp, sizeof(tmp));
+       } else if (cells == 2) {
+               u64 tmp;
+
+               tmp = cpu_to_be64(value);
+               memcpy(p, &tmp, sizeof(tmp));
+       } else
+               return -EINVAL;
+
+       return 0;
+}
+
+/**
+ * setup_ima_buffer - add IMA buffer information to the fdt
+ * @image:             kexec image being loaded.
+ * @fdt:               Flattened device tree for the next kernel.
+ * @chosen_node:       Offset to the chosen node.
+ *
+ * Return: 0 on success, or negative errno on error.
+ */
+int setup_ima_buffer(const struct kimage *image, void *fdt, int chosen_node)
+{
+       int ret, addr_cells, size_cells, entry_size;
+       u8 value[16];
+
+       remove_ima_buffer(fdt, chosen_node);
+       if (!image->arch.ima_buffer_size)
+               return 0;
+
+       ret = get_addr_size_cells(&addr_cells, &size_cells);
+       if (ret)
+               return ret;
+
+       entry_size = 4 * (addr_cells + size_cells);
+
+       if (entry_size > sizeof(value))
+               return -EINVAL;
+
+       ret = write_number(value, image->arch.ima_buffer_addr, addr_cells);
+       if (ret)
+               return ret;
+
+       ret = write_number(value + 4 * addr_cells, image->arch.ima_buffer_size,
+                          size_cells);
+       if (ret)
+               return ret;
+
+       ret = fdt_setprop(fdt, chosen_node, "linux,ima-kexec-buffer", value,
+                         entry_size);
+       if (ret < 0)
+               return -EINVAL;
+
+       ret = fdt_add_mem_rsv(fdt, image->arch.ima_buffer_addr,
+                             image->arch.ima_buffer_size);
+       if (ret)
+               return -EINVAL;
+
+       pr_debug("IMA buffer at 0x%llx, size = 0x%zx\n",
+                image->arch.ima_buffer_addr, image->arch.ima_buffer_size);
+
+       return 0;
+}
+#endif /* CONFIG_IMA_KEXEC */
diff --git a/arch/powerpc/kexec/relocate_32.S b/arch/powerpc/kexec/relocate_32.S
new file mode 100644 (file)
index 0000000..61946c1
--- /dev/null
@@ -0,0 +1,500 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This file contains kexec low-level functions.
+ *
+ * Copyright (C) 2002-2003 Eric Biederman  <ebiederm@xmission.com>
+ * GameCube/ppc32 port Copyright (C) 2004 Albert Herranz
+ * PPC44x port. Copyright (C) 2011,  IBM Corporation
+ *             Author: Suzuki Poulose <suzuki@in.ibm.com>
+ */
+
+#include <asm/reg.h>
+#include <asm/page.h>
+#include <asm/mmu.h>
+#include <asm/ppc_asm.h>
+#include <asm/kexec.h>
+
+       .text
+
+       /*
+        * Must be relocatable PIC code callable as a C function.
+        */
+       .globl relocate_new_kernel
+relocate_new_kernel:
+       /* r3 = page_list   */
+       /* r4 = reboot_code_buffer */
+       /* r5 = start_address      */
+
+#ifdef CONFIG_FSL_BOOKE
+
+       mr      r29, r3
+       mr      r30, r4
+       mr      r31, r5
+
+#define ENTRY_MAPPING_KEXEC_SETUP
+#include <kernel/fsl_booke_entry_mapping.S>
+#undef ENTRY_MAPPING_KEXEC_SETUP
+
+       mr      r3, r29
+       mr      r4, r30
+       mr      r5, r31
+
+       li      r0, 0
+#elif defined(CONFIG_44x)
+
+       /* Save our parameters */
+       mr      r29, r3
+       mr      r30, r4
+       mr      r31, r5
+
+#ifdef CONFIG_PPC_47x
+       /* Check for 47x cores */
+       mfspr   r3,SPRN_PVR
+       srwi    r3,r3,16
+       cmplwi  cr0,r3,PVR_476FPE@h
+       beq     setup_map_47x
+       cmplwi  cr0,r3,PVR_476@h
+       beq     setup_map_47x
+       cmplwi  cr0,r3,PVR_476_ISS@h
+       beq     setup_map_47x
+#endif /* CONFIG_PPC_47x */
+
+/*
+ * Code for setting up 1:1 mapping for PPC440x for KEXEC
+ *
+ * We cannot switch off the MMU on PPC44x.
+ * So we:
+ * 1) Invalidate all the mappings except the one we are running from.
+ * 2) Create a tmp mapping for our code in the other address space(TS) and
+ *    jump to it. Invalidate the entry we started in.
+ * 3) Create a 1:1 mapping for 0-2GiB in chunks of 256M in original TS.
+ * 4) Jump to the 1:1 mapping in original TS.
+ * 5) Invalidate the tmp mapping.
+ *
+ * - Based on the kexec support code for FSL BookE
+ *
+ */
+
+       /*
+        * Load the PID with kernel PID (0).
+        * Also load our MSR_IS and TID to MMUCR for TLB search.
+        */
+       li      r3, 0
+       mtspr   SPRN_PID, r3
+       mfmsr   r4
+       andi.   r4,r4,MSR_IS@l
+       beq     wmmucr
+       oris    r3,r3,PPC44x_MMUCR_STS@h
+wmmucr:
+       mtspr   SPRN_MMUCR,r3
+       sync
+
+       /*
+        * Invalidate all the TLB entries except the current entry
+        * where we are running from
+        */
+       bl      0f                              /* Find our address */
+0:     mflr    r5                              /* Make it accessible */
+       tlbsx   r23,0,r5                        /* Find entry we are in */
+       li      r4,0                            /* Start at TLB entry 0 */
+       li      r3,0                            /* Set PAGEID inval value */
+1:     cmpw    r23,r4                          /* Is this our entry? */
+       beq     skip                            /* If so, skip the inval */
+       tlbwe   r3,r4,PPC44x_TLB_PAGEID         /* If not, inval the entry */
+skip:
+       addi    r4,r4,1                         /* Increment */
+       cmpwi   r4,64                           /* Are we done? */
+       bne     1b                              /* If not, repeat */
+       isync
+
+       /* Create a temp mapping and jump to it */
+       andi.   r6, r23, 1              /* Find the index to use */
+       addi    r24, r6, 1              /* r24 will contain 1 or 2 */
+
+       mfmsr   r9                      /* get the MSR */
+       rlwinm  r5, r9, 27, 31, 31      /* Extract the MSR[IS] */
+       xori    r7, r5, 1               /* Use the other address space */
+
+       /* Read the current mapping entries */
+       tlbre   r3, r23, PPC44x_TLB_PAGEID
+       tlbre   r4, r23, PPC44x_TLB_XLAT
+       tlbre   r5, r23, PPC44x_TLB_ATTRIB
+
+       /* Save our current XLAT entry */
+       mr      r25, r4
+
+       /* Extract the TLB PageSize */
+       li      r10, 1                  /* r10 will hold PageSize */
+       rlwinm  r11, r3, 0, 24, 27      /* bits 24-27 */
+
+       /* XXX: As of now we use 256M, 4K pages */
+       cmpwi   r11, PPC44x_TLB_256M
+       bne     tlb_4k
+       rotlwi  r10, r10, 28            /* r10 = 256M */
+       b       write_out
+tlb_4k:
+       cmpwi   r11, PPC44x_TLB_4K
+       bne     default
+       rotlwi  r10, r10, 12            /* r10 = 4K */
+       b       write_out
+default:
+       rotlwi  r10, r10, 10            /* r10 = 1K */
+
+write_out:
+       /*
+        * Write out the tmp 1:1 mapping for this code in other address space
+        * Fixup  EPN = RPN , TS=other address space
+        */
+       insrwi  r3, r7, 1, 23           /* Bit 23 is TS for PAGEID field */
+
+       /* Write out the tmp mapping entries */
+       tlbwe   r3, r24, PPC44x_TLB_PAGEID
+       tlbwe   r4, r24, PPC44x_TLB_XLAT
+       tlbwe   r5, r24, PPC44x_TLB_ATTRIB
+
+       subi    r11, r10, 1             /* PageOffset Mask = PageSize - 1 */
+       not     r10, r11                /* Mask for PageNum */
+
+       /* Switch to other address space in MSR */
+       insrwi  r9, r7, 1, 26           /* Set MSR[IS] = r7 */
+
+       bl      1f
+1:     mflr    r8
+       addi    r8, r8, (2f-1b)         /* Find the target offset */
+
+       /* Jump to the tmp mapping */
+       mtspr   SPRN_SRR0, r8
+       mtspr   SPRN_SRR1, r9
+       rfi
+
+2:
+       /* Invalidate the entry we were executing from */
+       li      r3, 0
+       tlbwe   r3, r23, PPC44x_TLB_PAGEID
+
+       /* attribute fields. rwx for SUPERVISOR mode */
+       li      r5, 0
+       ori     r5, r5, (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
+
+       /* Create 1:1 mapping in 256M pages */
+       xori    r7, r7, 1                       /* Revert back to Original TS */
+
+       li      r8, 0                           /* PageNumber */
+       li      r6, 3                           /* TLB Index, start at 3  */
+
+next_tlb:
+       rotlwi  r3, r8, 28                      /* Create EPN (bits 0-3) */
+       mr      r4, r3                          /* RPN = EPN  */
+       ori     r3, r3, (PPC44x_TLB_VALID | PPC44x_TLB_256M) /* SIZE = 256M, Valid */
+       insrwi  r3, r7, 1, 23                   /* Set TS from r7 */
+
+       tlbwe   r3, r6, PPC44x_TLB_PAGEID       /* PageID field : EPN, V, SIZE */
+       tlbwe   r4, r6, PPC44x_TLB_XLAT         /* Address translation : RPN   */
+       tlbwe   r5, r6, PPC44x_TLB_ATTRIB       /* Attributes */
+
+       addi    r8, r8, 1                       /* Increment PN */
+       addi    r6, r6, 1                       /* Increment TLB Index */
+       cmpwi   r8, 8                           /* Are we done ? */
+       bne     next_tlb
+       isync
+
+       /* Jump to the new mapping 1:1 */
+       li      r9,0
+       insrwi  r9, r7, 1, 26                   /* Set MSR[IS] = r7 */
+
+       bl      1f
+1:     mflr    r8
+       and     r8, r8, r11                     /* Get our offset within page */
+       addi    r8, r8, (2f-1b)
+
+       and     r5, r25, r10                    /* Get our target PageNum */
+       or      r8, r8, r5                      /* Target jump address */
+
+       mtspr   SPRN_SRR0, r8
+       mtspr   SPRN_SRR1, r9
+       rfi
+2:
+       /* Invalidate the tmp entry we used */
+       li      r3, 0
+       tlbwe   r3, r24, PPC44x_TLB_PAGEID
+       sync
+       b       ppc44x_map_done
+
+#ifdef CONFIG_PPC_47x
+
+       /* 1:1 mapping for 47x */
+
+setup_map_47x:
+
+       /*
+        * Load the kernel pid (0) to PID and also to MMUCR[TID].
+        * Also set the MSR IS->MMUCR STS
+        */
+       li      r3, 0
+       mtspr   SPRN_PID, r3                    /* Set PID */
+       mfmsr   r4                              /* Get MSR */
+       andi.   r4, r4, MSR_IS@l                /* TS=1? */
+       beq     1f                              /* If not, leave STS=0 */
+       oris    r3, r3, PPC47x_MMUCR_STS@h      /* Set STS=1 */
+1:     mtspr   SPRN_MMUCR, r3                  /* Put MMUCR */
+       sync
+
+       /* Find the entry we are running from */
+       bl      2f
+2:     mflr    r23
+       tlbsx   r23, 0, r23
+       tlbre   r24, r23, 0                     /* TLB Word 0 */
+       tlbre   r25, r23, 1                     /* TLB Word 1 */
+       tlbre   r26, r23, 2                     /* TLB Word 2 */
+
+
+       /*
+        * Invalidates all the tlb entries by writing to 256 RPNs(r4)
+        * of 4k page size in all  4 ways (0-3 in r3).
+        * This would invalidate the entire UTLB including the one we are
+        * running from. However the shadow TLB entries would help us
+        * to continue the execution, until we flush them (rfi/isync).
+        */
+       addis   r3, 0, 0x8000                   /* specify the way */
+       addi    r4, 0, 0                        /* TLB Word0 = (EPN=0, VALID = 0) */
+       addi    r5, 0, 0
+       b       clear_utlb_entry
+
+       /* Align the loop to speed things up. from head_44x.S */
+       .align  6
+
+clear_utlb_entry:
+
+       tlbwe   r4, r3, 0
+       tlbwe   r5, r3, 1
+       tlbwe   r5, r3, 2
+       addis   r3, r3, 0x2000                  /* Increment the way */
+       cmpwi   r3, 0
+       bne     clear_utlb_entry
+       addis   r3, 0, 0x8000
+       addis   r4, r4, 0x100                   /* Increment the EPN */
+       cmpwi   r4, 0
+       bne     clear_utlb_entry
+
+       /* Create the entries in the other address space */
+       mfmsr   r5
+       rlwinm  r7, r5, 27, 31, 31              /* Get the TS (Bit 26) from MSR */
+       xori    r7, r7, 1                       /* r7 = !TS */
+
+       insrwi  r24, r7, 1, 21                  /* Change the TS in the saved TLB word 0 */
+
+       /*
+        * write out the TLB entries for the tmp mapping
+        * Use way '0' so that we could easily invalidate it later.
+        */
+       lis     r3, 0x8000                      /* Way '0' */
+
+       tlbwe   r24, r3, 0
+       tlbwe   r25, r3, 1
+       tlbwe   r26, r3, 2
+
+       /* Update the msr to the new TS */
+       insrwi  r5, r7, 1, 26
+
+       bl      1f
+1:     mflr    r6
+       addi    r6, r6, (2f-1b)
+
+       mtspr   SPRN_SRR0, r6
+       mtspr   SPRN_SRR1, r5
+       rfi
+
+       /*
+        * Now we are in the tmp address space.
+        * Create a 1:1 mapping for 0-2GiB in the original TS.
+        */
+2:
+       li      r3, 0
+       li      r4, 0                           /* TLB Word 0 */
+       li      r5, 0                           /* TLB Word 1 */
+       li      r6, 0
+       ori     r6, r6, PPC47x_TLB2_S_RWX       /* TLB word 2 */
+
+       li      r8, 0                           /* PageIndex */
+
+       xori    r7, r7, 1                       /* revert back to original TS */
+
+write_utlb:
+       rotlwi  r5, r8, 28                      /* RPN = PageIndex * 256M */
+                                               /* ERPN = 0 as we don't use memory above 2G */
+
+       mr      r4, r5                          /* EPN = RPN */
+       ori     r4, r4, (PPC47x_TLB0_VALID | PPC47x_TLB0_256M)
+       insrwi  r4, r7, 1, 21                   /* Insert the TS to Word 0 */
+
+       tlbwe   r4, r3, 0                       /* Write out the entries */
+       tlbwe   r5, r3, 1
+       tlbwe   r6, r3, 2
+       addi    r8, r8, 1
+       cmpwi   r8, 8                           /* Have we completed ? */
+       bne     write_utlb
+
+       /* make sure we complete the TLB write up */
+       isync
+
+       /*
+        * Prepare to jump to the 1:1 mapping.
+        * 1) Extract page size of the tmp mapping
+        *    DSIZ = TLB_Word0[22:27]
+        * 2) Calculate the physical address of the address
+        *    to jump to.
+        */
+       rlwinm  r10, r24, 0, 22, 27
+
+       cmpwi   r10, PPC47x_TLB0_4K
+       bne     0f
+       li      r10, 0x1000                     /* r10 = 4k */
+       bl      1f
+
+0:
+       /* Defaults to 256M */
+       lis     r10, 0x1000
+
+       bl      1f
+1:     mflr    r4
+       addi    r4, r4, (2f-1b)                 /* virtual address  of 2f */
+
+       subi    r11, r10, 1                     /* offsetmask = Pagesize - 1 */
+       not     r10, r11                        /* Pagemask = ~(offsetmask) */
+
+       and     r5, r25, r10                    /* Physical page */
+       and     r6, r4, r11                     /* offset within the current page */
+
+       or      r5, r5, r6                      /* Physical address for 2f */
+
+       /* Switch the TS in MSR to the original one */
+       mfmsr   r8
+       insrwi  r8, r7, 1, 26
+
+       mtspr   SPRN_SRR1, r8
+       mtspr   SPRN_SRR0, r5
+       rfi
+
+2:
+       /* Invalidate the tmp mapping */
+       lis     r3, 0x8000                      /* Way '0' */
+
+       clrrwi  r24, r24, 12                    /* Clear the valid bit */
+       tlbwe   r24, r3, 0
+       tlbwe   r25, r3, 1
+       tlbwe   r26, r3, 2
+
+       /* Make sure we complete the TLB write and flush the shadow TLB */
+       isync
+
+#endif
+
+ppc44x_map_done:
+
+
+       /* Restore the parameters */
+       mr      r3, r29
+       mr      r4, r30
+       mr      r5, r31
+
+       li      r0, 0
+#else
+       li      r0, 0
+
+       /*
+        * Set Machine Status Register to a known status,
+        * switch the MMU off and jump to 1: in a single step.
+        */
+
+       mr      r8, r0
+       ori     r8, r8, MSR_RI|MSR_ME
+       mtspr   SPRN_SRR1, r8
+       addi    r8, r4, 1f - relocate_new_kernel
+       mtspr   SPRN_SRR0, r8
+       sync
+       rfi
+
+1:
+#endif
+       /* from this point address translation is turned off */
+       /* and interrupts are disabled */
+
+       /* set a new stack at the bottom of our page... */
+       /* (not really needed now) */
+       addi    r1, r4, KEXEC_CONTROL_PAGE_SIZE - 8 /* for LR Save+Back Chain */
+       stw     r0, 0(r1)
+
+       /* Do the copies */
+       li      r6, 0 /* checksum */
+       mr      r0, r3
+       b       1f
+
+0:     /* top, read another word for the indirection page */
+       lwzu    r0, 4(r3)
+
+1:
+       /* is it a destination page? (r8) */
+       rlwinm. r7, r0, 0, 31, 31 /* IND_DESTINATION (1<<0) */
+       beq     2f
+
+       rlwinm  r8, r0, 0, 0, 19 /* clear kexec flags, page align */
+       b       0b
+
+2:     /* is it an indirection page? (r3) */
+       rlwinm. r7, r0, 0, 30, 30 /* IND_INDIRECTION (1<<1) */
+       beq     2f
+
+       rlwinm  r3, r0, 0, 0, 19 /* clear kexec flags, page align */
+       subi    r3, r3, 4
+       b       0b
+
+2:     /* are we done? */
+       rlwinm. r7, r0, 0, 29, 29 /* IND_DONE (1<<2) */
+       beq     2f
+       b       3f
+
+2:     /* is it a source page? (r9) */
+       rlwinm. r7, r0, 0, 28, 28 /* IND_SOURCE (1<<3) */
+       beq     0b
+
+       rlwinm  r9, r0, 0, 0, 19 /* clear kexec flags, page align */
+
+       li      r7, PAGE_SIZE / 4
+       mtctr   r7
+       subi    r9, r9, 4
+       subi    r8, r8, 4
+9:
+       lwzu    r0, 4(r9)  /* do the copy */
+       xor     r6, r6, r0
+       stwu    r0, 4(r8)
+       dcbst   0, r8
+       sync
+       icbi    0, r8
+       bdnz    9b
+
+       addi    r9, r9, 4
+       addi    r8, r8, 4
+       b       0b
+
+3:
+
+       /* To be certain of avoiding problems with self-modifying code
+        * execute a serializing instruction here.
+        */
+       isync
+       sync
+
+       mfspr   r3, SPRN_PIR /* current core we are running on */
+       mr      r4, r5 /* load physical address of chunk called */
+
+       /* jump to the entry point, usually the setup routine */
+       mtlr    r5
+       blrl
+
+1:     b       1b
+
+relocate_new_kernel_end:
+
+       .globl relocate_new_kernel_size
+relocate_new_kernel_size:
+       .long relocate_new_kernel_end - relocate_new_kernel
index 84d5fab..69b2419 100644 (file)
@@ -251,9 +251,18 @@ void __init setbat(int index, unsigned long virt, phys_addr_t phys,
 {
        unsigned int bl;
        int wimgxpp;
-       struct ppc_bat *bat = BATS[index];
+       struct ppc_bat *bat;
        unsigned long flags = pgprot_val(prot);
 
+       if (index == -1)
+               index = find_free_bat();
+       if (index == -1) {
+               pr_err("%s: no BAT available for mapping 0x%llx\n", __func__,
+                      (unsigned long long)phys);
+               return;
+       }
+       bat = BATS[index];
+
        if ((flags & _PAGE_NO_CACHE) ||
            (cpu_has_feature(CPU_FTR_NEED_COHERENT) == 0))
                flags &= ~_PAGE_COHERENT;
index 523e42e..d2d8237 100644 (file)
@@ -482,19 +482,12 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp,
        return ret;
 }
 
-static long native_hpte_find(unsigned long vpn, int psize, int ssize)
+static long __native_hpte_find(unsigned long want_v, unsigned long slot)
 {
        struct hash_pte *hptep;
-       unsigned long hash;
+       unsigned long hpte_v;
        unsigned long i;
-       long slot;
-       unsigned long want_v, hpte_v;
 
-       hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
-       want_v = hpte_encode_avpn(vpn, psize, ssize);
-
-       /* Bolted mappings are only ever in the primary group */
-       slot = (hash & htab_hash_mask) * HPTES_PER_GROUP;
        for (i = 0; i < HPTES_PER_GROUP; i++) {
 
                hptep = htab_address + slot;
@@ -508,6 +501,33 @@ static long native_hpte_find(unsigned long vpn, int psize, int ssize)
        return -1;
 }
 
+static long native_hpte_find(unsigned long vpn, int psize, int ssize)
+{
+       unsigned long hpte_group;
+       unsigned long want_v;
+       unsigned long hash;
+       long slot;
+
+       hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
+       want_v = hpte_encode_avpn(vpn, psize, ssize);
+
+       /*
+        * We try to keep bolted entries always in primary hash
+        * But in some case we can find them in secondary too.
+        */
+       hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
+       slot = __native_hpte_find(want_v, hpte_group);
+       if (slot < 0) {
+               /* Try in secondary */
+               hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
+               slot = __native_hpte_find(want_v, hpte_group);
+               if (slot < 0)
+                       return -1;
+       }
+
+       return slot;
+}
+
 /*
  * Update the page protection bits. Intended to be used to create
  * guard pages for kernel data structures on pages which are bolted
index 6c12376..b30435c 100644 (file)
@@ -263,6 +263,7 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
                unsigned long vsid = get_kernel_vsid(vaddr, ssize);
                unsigned long vpn  = hpt_vpn(vaddr, vsid, ssize);
                unsigned long tprot = prot;
+               bool secondary_hash = false;
 
                /*
                 * If we hit a bad address return error.
@@ -291,13 +292,31 @@ int htab_bolt_mapping(unsigned long vstart, unsigned long vend,
                hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
 
                BUG_ON(!mmu_hash_ops.hpte_insert);
+repeat:
                ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot,
                                               HPTE_V_BOLTED, psize, psize,
                                               ssize);
+               if (ret == -1) {
+                       /*
+                        * Try to to keep bolted entries in primary.
+                        * Remove non bolted entries and try insert again
+                        */
+                       ret = mmu_hash_ops.hpte_remove(hpteg);
+                       if (ret != -1)
+                               ret = mmu_hash_ops.hpte_insert(hpteg, vpn, paddr, tprot,
+                                                              HPTE_V_BOLTED, psize, psize,
+                                                              ssize);
+                       if (ret == -1 && !secondary_hash) {
+                               secondary_hash = true;
+                               hpteg = ((~hash & htab_hash_mask) * HPTES_PER_GROUP);
+                               goto repeat;
+                       }
+               }
 
                if (ret < 0)
                        break;
 
+               cond_resched();
 #ifdef CONFIG_DEBUG_PAGEALLOC
                if (debug_pagealloc_enabled() &&
                        (paddr >> PAGE_SHIFT) < linear_map_hash_count)
index ae7fca4..59e0ebb 100644 (file)
@@ -307,16 +307,6 @@ void thread_pkey_regs_init(struct thread_struct *thread)
        write_iamr(pkey_iamr_mask);
 }
 
-static inline bool pkey_allows_readwrite(int pkey)
-{
-       int pkey_shift = pkeyshift(pkey);
-
-       if (!is_pkey_enabled(pkey))
-               return true;
-
-       return !(read_amr() & ((AMR_RD_BIT|AMR_WR_BIT) << pkey_shift));
-}
-
 int __execute_only_pkey(struct mm_struct *mm)
 {
        return mm->context.execute_only_pkey;
index 67af871..a95175c 100644 (file)
@@ -732,18 +732,13 @@ local:
        }
        preempt_enable();
 }
+
 void radix__flush_all_mm(struct mm_struct *mm)
 {
        __flush_all_mm(mm, false);
 }
 EXPORT_SYMBOL(radix__flush_all_mm);
 
-void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr)
-{
-       tlb->need_flush_all = 1;
-}
-EXPORT_SYMBOL(radix__flush_tlb_pwc);
-
 void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr,
                                 int psize)
 {
@@ -832,8 +827,7 @@ static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
 static unsigned long tlb_local_single_page_flush_ceiling __read_mostly = POWER9_TLB_SETS_RADIX * 2;
 
 static inline void __radix__flush_tlb_range(struct mm_struct *mm,
-                                       unsigned long start, unsigned long end,
-                                       bool flush_all_sizes)
+                                           unsigned long start, unsigned long end)
 
 {
        unsigned long pid;
@@ -879,26 +873,16 @@ is_local:
                        }
                }
        } else {
-               bool hflush = flush_all_sizes;
-               bool gflush = flush_all_sizes;
+               bool hflush = false;
                unsigned long hstart, hend;
-               unsigned long gstart, gend;
 
-               if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
-                       hflush = true;
-
-               if (hflush) {
+               if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
                        hstart = (start + PMD_SIZE - 1) & PMD_MASK;
                        hend = end & PMD_MASK;
                        if (hstart == hend)
                                hflush = false;
-               }
-
-               if (gflush) {
-                       gstart = (start + PUD_SIZE - 1) & PUD_MASK;
-                       gend = end & PUD_MASK;
-                       if (gstart == gend)
-                               gflush = false;
+                       else
+                               hflush = true;
                }
 
                if (local) {
@@ -907,9 +891,6 @@ is_local:
                        if (hflush)
                                __tlbiel_va_range(hstart, hend, pid,
                                                PMD_SIZE, MMU_PAGE_2M);
-                       if (gflush)
-                               __tlbiel_va_range(gstart, gend, pid,
-                                               PUD_SIZE, MMU_PAGE_1G);
                        asm volatile("ptesync": : :"memory");
                } else if (cputlb_use_tlbie()) {
                        asm volatile("ptesync": : :"memory");
@@ -917,10 +898,6 @@ is_local:
                        if (hflush)
                                __tlbie_va_range(hstart, hend, pid,
                                                PMD_SIZE, MMU_PAGE_2M);
-                       if (gflush)
-                               __tlbie_va_range(gstart, gend, pid,
-                                               PUD_SIZE, MMU_PAGE_1G);
-
                        asm volatile("eieio; tlbsync; ptesync": : :"memory");
                } else {
                        _tlbiel_va_range_multicast(mm,
@@ -928,9 +905,6 @@ is_local:
                        if (hflush)
                                _tlbiel_va_range_multicast(mm,
                                        hstart, hend, pid, PMD_SIZE, MMU_PAGE_2M, false);
-                       if (gflush)
-                               _tlbiel_va_range_multicast(mm,
-                                       gstart, gend, pid, PUD_SIZE, MMU_PAGE_1G, false);
                }
        }
        preempt_enable();
@@ -945,7 +919,7 @@ void radix__flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
                return radix__flush_hugetlb_tlb_range(vma, start, end);
 #endif
 
-       __radix__flush_tlb_range(vma->vm_mm, start, end, false);
+       __radix__flush_tlb_range(vma->vm_mm, start, end);
 }
 EXPORT_SYMBOL(radix__flush_tlb_range);
 
@@ -1021,53 +995,19 @@ void radix__tlb_flush(struct mmu_gather *tlb)
         * that flushes the process table entry cache upon process teardown.
         * See the comment for radix in arch_exit_mmap().
         */
-       if (tlb->fullmm) {
+       if (tlb->fullmm || tlb->need_flush_all) {
                __flush_all_mm(mm, true);
-#if defined(CONFIG_TRANSPARENT_HUGEPAGE) || defined(CONFIG_HUGETLB_PAGE)
-       } else if (mm_tlb_flush_nested(mm)) {
-               /*
-                * If there is a concurrent invalidation that is clearing ptes,
-                * then it's possible this invalidation will miss one of those
-                * cleared ptes and miss flushing the TLB. If this invalidate
-                * returns before the other one flushes TLBs, that can result
-                * in it returning while there are still valid TLBs inside the
-                * range to be invalidated.
-                *
-                * See mm/memory.c:tlb_finish_mmu() for more details.
-                *
-                * The solution to this is ensure the entire range is always
-                * flushed here. The problem for powerpc is that the flushes
-                * are page size specific, so this "forced flush" would not
-                * do the right thing if there are a mix of page sizes in
-                * the range to be invalidated. So use __flush_tlb_range
-                * which invalidates all possible page sizes in the range.
-                *
-                * PWC flush probably is not be required because the core code
-                * shouldn't free page tables in this path, but accounting
-                * for the possibility makes us a bit more robust.
-                *
-                * need_flush_all is an uncommon case because page table
-                * teardown should be done with exclusive locks held (but
-                * after locks are dropped another invalidate could come
-                * in), it could be optimized further if necessary.
-                */
-               if (!tlb->need_flush_all)
-                       __radix__flush_tlb_range(mm, start, end, true);
-               else
-                       radix__flush_all_mm(mm);
-#endif
        } else if ( (psize = radix_get_mmu_psize(page_size)) == -1) {
-               if (!tlb->need_flush_all)
+               if (!tlb->freed_tables)
                        radix__flush_tlb_mm(mm);
                else
                        radix__flush_all_mm(mm);
        } else {
-               if (!tlb->need_flush_all)
+               if (!tlb->freed_tables)
                        radix__flush_tlb_range_psize(mm, start, end, psize);
                else
                        radix__flush_tlb_pwc_range_psize(mm, start, end, psize);
        }
-       tlb->need_flush_all = 0;
 }
 
 static __always_inline void __radix__flush_tlb_range_psize(struct mm_struct *mm,
index 8432c28..b5047f9 100644 (file)
@@ -645,6 +645,7 @@ NOKPROBE_SYMBOL(do_page_fault);
 void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
 {
        const struct exception_table_entry *entry;
+       int is_write = page_fault_is_write(regs->dsisr);
 
        /* Are we prepared to handle this fault?  */
        if ((entry = search_exception_tables(regs->nip)) != NULL) {
@@ -658,9 +659,10 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
        case 0x300:
        case 0x380:
        case 0xe00:
-               pr_alert("BUG: %s at 0x%08lx\n",
+               pr_alert("BUG: %s on %s at 0x%08lx\n",
                         regs->dar < PAGE_SIZE ? "Kernel NULL pointer dereference" :
-                        "Unable to handle kernel data access", regs->dar);
+                        "Unable to handle kernel data access",
+                        is_write ? "write" : "read", regs->dar);
                break;
        case 0x400:
        case 0x480:
index a84da92..42ef7a6 100644 (file)
 #include <asm/pgtable.h>
 #include <asm/kup.h>
 
+phys_addr_t memstart_addr __ro_after_init = (phys_addr_t)~0ull;
+EXPORT_SYMBOL_GPL(memstart_addr);
+phys_addr_t kernstart_addr __ro_after_init;
+EXPORT_SYMBOL_GPL(kernstart_addr);
+unsigned long kernstart_virt_addr __ro_after_init = KERNELBASE;
+EXPORT_SYMBOL_GPL(kernstart_virt_addr);
+
 static bool disable_kuep = !IS_ENABLED(CONFIG_PPC_KUEP);
 static bool disable_kuap = !IS_ENABLED(CONFIG_PPC_KUAP);
 
index b04896a..872df48 100644 (file)
 phys_addr_t total_memory;
 phys_addr_t total_lowmem;
 
-phys_addr_t memstart_addr = (phys_addr_t)~0ull;
-EXPORT_SYMBOL(memstart_addr);
-phys_addr_t kernstart_addr;
-EXPORT_SYMBOL(kernstart_addr);
-
 #ifdef CONFIG_RELOCATABLE
 /* Used in __va()/__pa() */
 long long virt_phys_offset;
index 4e08246..4002ced 100644 (file)
 
 #include <mm/mmu_decl.h>
 
-phys_addr_t memstart_addr = ~0;
-EXPORT_SYMBOL_GPL(memstart_addr);
-phys_addr_t kernstart_addr;
-EXPORT_SYMBOL_GPL(kernstart_addr);
-
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
 /*
- * Given an address within the vmemmap, determine the pfn of the page that
- * represents the start of the section it is within.  Note that we have to
+ * Given an address within the vmemmap, determine the page that
+ * represents the start of the subsection it is within.  Note that we have to
  * do this by hand as the proffered address may not be correctly aligned.
  * Subtraction of non-aligned pointers produces undefined results.
  */
-static unsigned long __meminit vmemmap_section_start(unsigned long page)
+static struct page * __meminit vmemmap_subsection_start(unsigned long vmemmap_addr)
 {
-       unsigned long offset = page - ((unsigned long)(vmemmap));
+       unsigned long start_pfn;
+       unsigned long offset = vmemmap_addr - ((unsigned long)(vmemmap));
 
        /* Return the pfn of the start of the section. */
-       return (offset / sizeof(struct page)) & PAGE_SECTION_MASK;
+       start_pfn = (offset / sizeof(struct page)) & PAGE_SUBSECTION_MASK;
+       return pfn_to_page(start_pfn);
 }
 
 /*
- * Check if this vmemmap page is already initialised.  If any section
- * which overlaps this vmemmap page is initialised then this page is
- * initialised already.
+ * Since memory is added in sub-section chunks, before creating a new vmemmap
+ * mapping, the kernel should check whether there is an existing memmap mapping
+ * covering the new subsection added. This is needed because kernel can map
+ * vmemmap area using 16MB pages which will cover a memory range of 16G. Such
+ * a range covers multiple subsections (2M)
+ *
+ * If any subsection in the 16G range mapped by vmemmap is valid we consider the
+ * vmemmap populated (There is a page table entry already present). We can't do
+ * a page table lookup here because with the hash translation we don't keep
+ * vmemmap details in linux page table.
  */
-static int __meminit vmemmap_populated(unsigned long start, int page_size)
+static int __meminit vmemmap_populated(unsigned long vmemmap_addr, int vmemmap_map_size)
 {
-       unsigned long end = start + page_size;
-       start = (unsigned long)(pfn_to_page(vmemmap_section_start(start)));
+       struct page *start;
+       unsigned long vmemmap_end = vmemmap_addr + vmemmap_map_size;
+       start = vmemmap_subsection_start(vmemmap_addr);
 
-       for (; start < end; start += (PAGES_PER_SECTION * sizeof(struct page)))
-               if (pfn_valid(page_to_pfn((struct page *)start)))
+       for (; (unsigned long)start < vmemmap_end; start += PAGES_PER_SUBSECTION)
+               /*
+                * pfn valid check here is intended to really check
+                * whether we have any subsection already initialized
+                * in this range.
+                */
+               if (pfn_valid(page_to_pfn(start)))
                        return 1;
 
        return 0;
@@ -201,6 +211,12 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node,
                void *p = NULL;
                int rc;
 
+               /*
+                * This vmemmap range is backing different subsections. If any
+                * of that subsection is marked valid, that means we already
+                * have initialized a page table covering this range and hence
+                * the vmemmap range is populated.
+                */
                if (vmemmap_populated(start, page_size))
                        continue;
 
@@ -290,9 +306,10 @@ void __ref vmemmap_free(unsigned long start, unsigned long end,
                struct page *page;
 
                /*
-                * the section has already be marked as invalid, so
-                * vmemmap_populated() true means some other sections still
-                * in this page, so skip it.
+                * We have already marked the subsection we are trying to remove
+                * invalid. So if we want to remove the vmemmap range, we
+                * need to make sure there is no subsection marked valid
+                * in this range.
                 */
                if (vmemmap_populated(start, page_size))
                        continue;
index f36121f..743e113 100644 (file)
@@ -68,6 +68,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, pgprot_t prot, void *call
        /*
         * Should check if it is a candidate for a BAT mapping
         */
+       pr_warn("ioremap() called early from %pS. Use early_ioremap() instead\n", caller);
 
        err = early_ioremap_range(ioremap_bot - size, p, size, prot);
        if (err)
index fd29e51..50a99d9 100644 (file)
@@ -81,6 +81,8 @@ void __iomem *__ioremap_caller(phys_addr_t addr, unsigned long size,
        if (slab_is_available())
                return do_ioremap(paligned, offset, size, prot, caller);
 
+       pr_warn("ioremap() called early from %pS. Use early_ioremap() instead\n", caller);
+
        err = early_ioremap_range(ioremap_bot, paligned, size, prot);
        if (err)
                return NULL;
index c95b7fe..ad299e7 100644 (file)
@@ -105,6 +105,27 @@ int __weak remove_section_mapping(unsigned long start, unsigned long end)
        return -ENODEV;
 }
 
+#define FLUSH_CHUNK_SIZE SZ_1G
+/**
+ * flush_dcache_range_chunked(): Write any modified data cache blocks out to
+ * memory and invalidate them, in chunks of up to FLUSH_CHUNK_SIZE
+ * Does not invalidate the corresponding instruction cache blocks.
+ *
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ * @chunk: the max size of the chunks
+ */
+static void flush_dcache_range_chunked(unsigned long start, unsigned long stop,
+                                      unsigned long chunk)
+{
+       unsigned long i;
+
+       for (i = start; i < stop; i += chunk) {
+               flush_dcache_range(i, min(stop, start + chunk));
+               cond_resched();
+       }
+}
+
 int __ref arch_add_memory(int nid, u64 start, u64 size,
                        struct mhp_restrictions *restrictions)
 {
@@ -121,7 +142,6 @@ int __ref arch_add_memory(int nid, u64 start, u64 size,
                        start, start + size, rc);
                return -EFAULT;
        }
-       flush_dcache_range(start, start + size);
 
        return __add_pages(nid, start_pfn, nr_pages, restrictions);
 }
@@ -138,7 +158,8 @@ void __ref arch_remove_memory(int nid, u64 start, u64 size,
 
        /* Remove htab bolted mappings for this section of memory */
        start = (unsigned long)__va(start);
-       flush_dcache_range(start, start + size);
+       flush_dcache_range_chunked(start, start + size, FLUSH_CHUNK_SIZE);
+
        ret = remove_section_mapping(start, start + size);
        WARN_ON_ONCE(ret);
 
@@ -217,15 +238,13 @@ void __init paging_init(void)
        unsigned long long total_ram = memblock_phys_mem_size();
        phys_addr_t top_of_ram = memblock_end_of_DRAM();
 
-#ifdef CONFIG_PPC32
-       unsigned long v = __fix_to_virt(__end_of_fixed_addresses - 1);
-       unsigned long end = __fix_to_virt(FIX_HOLE);
+#ifdef CONFIG_HIGHMEM
+       unsigned long v = __fix_to_virt(FIX_KMAP_END);
+       unsigned long end = __fix_to_virt(FIX_KMAP_BEGIN);
 
        for (; v < end; v += PAGE_SIZE)
                map_kernel_page(v, 0, __pgprot(0)); /* XXX gross */
-#endif
 
-#ifdef CONFIG_HIGHMEM
        map_kernel_page(PKMAP_BASE, 0, __pgprot(0));    /* XXX gross */
        pkmap_page_table = virt_to_kpte(PKMAP_BASE);
 
@@ -328,6 +347,120 @@ void free_initmem(void)
        free_initmem_default(POISON_FREE_INITMEM);
 }
 
+/**
+ * flush_coherent_icache() - if a CPU has a coherent icache, flush it
+ * @addr: The base address to use (can be any valid address, the whole cache will be flushed)
+ * Return true if the cache was flushed, false otherwise
+ */
+static inline bool flush_coherent_icache(unsigned long addr)
+{
+       /*
+        * For a snooping icache, we still need a dummy icbi to purge all the
+        * prefetched instructions from the ifetch buffers. We also need a sync
+        * before the icbi to order the the actual stores to memory that might
+        * have modified instructions with the icbi.
+        */
+       if (cpu_has_feature(CPU_FTR_COHERENT_ICACHE)) {
+               mb(); /* sync */
+               icbi((void *)addr);
+               mb(); /* sync */
+               isync();
+               return true;
+       }
+
+       return false;
+}
+
+/**
+ * invalidate_icache_range() - Flush the icache by issuing icbi across an address range
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ */
+static void invalidate_icache_range(unsigned long start, unsigned long stop)
+{
+       unsigned long shift = l1_icache_shift();
+       unsigned long bytes = l1_icache_bytes();
+       char *addr = (char *)(start & ~(bytes - 1));
+       unsigned long size = stop - (unsigned long)addr + (bytes - 1);
+       unsigned long i;
+
+       for (i = 0; i < size >> shift; i++, addr += bytes)
+               icbi(addr);
+
+       mb(); /* sync */
+       isync();
+}
+
+/**
+ * flush_icache_range: Write any modified data cache blocks out to memory
+ * and invalidate the corresponding blocks in the instruction cache
+ *
+ * Generic code will call this after writing memory, before executing from it.
+ *
+ * @start: the start address
+ * @stop: the stop address (exclusive)
+ */
+void flush_icache_range(unsigned long start, unsigned long stop)
+{
+       if (flush_coherent_icache(start))
+               return;
+
+       clean_dcache_range(start, stop);
+
+       if (IS_ENABLED(CONFIG_44x)) {
+               /*
+                * Flash invalidate on 44x because we are passed kmapped
+                * addresses and this doesn't work for userspace pages due to
+                * the virtually tagged icache.
+                */
+               iccci((void *)start);
+               mb(); /* sync */
+               isync();
+       } else
+               invalidate_icache_range(start, stop);
+}
+EXPORT_SYMBOL(flush_icache_range);
+
+#if !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64)
+/**
+ * flush_dcache_icache_phys() - Flush a page by it's physical address
+ * @physaddr: the physical address of the page
+ */
+static void flush_dcache_icache_phys(unsigned long physaddr)
+{
+       unsigned long bytes = l1_dcache_bytes();
+       unsigned long nb = PAGE_SIZE / bytes;
+       unsigned long addr = physaddr & PAGE_MASK;
+       unsigned long msr, msr0;
+       unsigned long loop1 = addr, loop2 = addr;
+
+       msr0 = mfmsr();
+       msr = msr0 & ~MSR_DR;
+       /*
+        * This must remain as ASM to prevent potential memory accesses
+        * while the data MMU is disabled
+        */
+       asm volatile(
+               "   mtctr %2;\n"
+               "   mtmsr %3;\n"
+               "   isync;\n"
+               "0: dcbst   0, %0;\n"
+               "   addi    %0, %0, %4;\n"
+               "   bdnz    0b;\n"
+               "   sync;\n"
+               "   mtctr %2;\n"
+               "1: icbi    0, %1;\n"
+               "   addi    %1, %1, %4;\n"
+               "   bdnz    1b;\n"
+               "   sync;\n"
+               "   mtmsr %5;\n"
+               "   isync;\n"
+               : "+&r" (loop1), "+&r" (loop2)
+               : "r" (nb), "r" (msr), "i" (bytes), "r" (msr0)
+               : "ctr", "memory");
+}
+#endif // !defined(CONFIG_PPC_8xx) && !defined(CONFIG_PPC64)
+
 /*
  * This is called when a page has been modified by the kernel.
  * It just marks the page as not i-cache clean.  We do the i-cache
@@ -360,12 +493,46 @@ void flush_dcache_icache_page(struct page *page)
                __flush_dcache_icache(start);
                kunmap_atomic(start);
        } else {
-               __flush_dcache_icache_phys(page_to_pfn(page) << PAGE_SHIFT);
+               unsigned long addr = page_to_pfn(page) << PAGE_SHIFT;
+
+               if (flush_coherent_icache(addr))
+                       return;
+               flush_dcache_icache_phys(addr);
        }
 #endif
 }
 EXPORT_SYMBOL(flush_dcache_icache_page);
 
+/**
+ * __flush_dcache_icache(): Flush a particular page from the data cache to RAM.
+ * Note: this is necessary because the instruction cache does *not*
+ * snoop from the data cache.
+ *
+ * @page: the address of the page to flush
+ */
+void __flush_dcache_icache(void *p)
+{
+       unsigned long addr = (unsigned long)p;
+
+       if (flush_coherent_icache(addr))
+               return;
+
+       clean_dcache_range(addr, addr + PAGE_SIZE);
+
+       /*
+        * We don't flush the icache on 44x. Those have a virtual icache and we
+        * don't have access to the virtual address here (it's not the page
+        * vaddr but where it's mapped in user space). The flushing of the
+        * icache on these is handled elsewhere, when a change in the address
+        * space occurs, before returning to user space.
+        */
+
+       if (cpu_has_feature(MMU_FTR_TYPE_44x))
+               return;
+
+       invalidate_icache_range(addr, addr + PAGE_SIZE);
+}
+
 void clear_user_page(void *page, unsigned long vaddr, struct page *pg)
 {
        clear_page(page);
index c750ac9..8e99649 100644 (file)
@@ -139,10 +139,21 @@ extern unsigned long calc_cam_sz(unsigned long ram, unsigned long virt,
 extern void adjust_total_lowmem(void);
 extern int switch_to_as1(void);
 extern void restore_to_as0(int esel, int offset, void *dt_ptr, int bootcpu);
+void create_kaslr_tlb_entry(int entry, unsigned long virt, phys_addr_t phys);
+void reloc_kernel_entry(void *fdt, int addr);
+extern int is_second_reloc;
 #endif
 extern void loadcam_entry(unsigned int index);
 extern void loadcam_multi(int first_idx, int num, int tmp_idx);
 
+#ifdef CONFIG_RANDOMIZE_BASE
+void kaslr_early_init(void *dt_ptr, phys_addr_t size);
+void kaslr_late_init(void);
+#else
+static inline void kaslr_early_init(void *dt_ptr, phys_addr_t size) {}
+static inline void kaslr_late_init(void) {}
+#endif
+
 struct tlbcam {
        u32     MAS0;
        u32     MAS1;
index 4a06cb3..090af2d 100644 (file)
@@ -103,6 +103,19 @@ static void mmu_patch_addis(s32 *site, long simm)
        patch_instruction_site(site, instr);
 }
 
+void __init mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, pgprot_t prot)
+{
+       unsigned long s = offset;
+       unsigned long v = PAGE_OFFSET + s;
+       phys_addr_t p = memstart_addr + s;
+
+       for (; s < top; s += PAGE_SIZE) {
+               map_kernel_page(v, p, prot);
+               v += PAGE_SIZE;
+               p += PAGE_SIZE;
+       }
+}
+
 unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
 {
        unsigned long mapped;
@@ -115,10 +128,20 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top)
                if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
                        mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, 0);
        } else {
+               unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M);
+
                mapped = top & ~(LARGE_PAGE_SIZE_8M - 1);
                if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
-                       mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top,
-                                           _ALIGN(__pa(_einittext), 8 << 20));
+                       mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, einittext8);
+
+               /*
+                * Populate page tables to:
+                * - have them appear in /sys/kernel/debug/kernel_page_tables
+                * - allow the BDI to find the pages when they are not PINNED
+                */
+               mmu_mapin_ram_chunk(0, einittext8, PAGE_KERNEL_X);
+               mmu_mapin_ram_chunk(einittext8, mapped, PAGE_KERNEL);
+               mmu_mapin_immr();
        }
 
        mmu_patch_cmp_limit(&patch__dtlbmiss_linmem_top, mapped);
@@ -144,18 +167,41 @@ void mmu_mark_initmem_nx(void)
        if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX) && CONFIG_ETEXT_SHIFT < 23)
                mmu_patch_addis(&patch__itlbmiss_linmem_top8,
                                -((long)_etext & ~(LARGE_PAGE_SIZE_8M - 1)));
-       if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT))
+       if (!IS_ENABLED(CONFIG_PIN_TLB_TEXT)) {
+               unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M);
+               unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M);
+               unsigned long etext = __pa(_etext);
+
                mmu_patch_cmp_limit(&patch__itlbmiss_linmem_top, __pa(_etext));
+
+               /* Update page tables for PTDUMP and BDI */
+               mmu_mapin_ram_chunk(0, einittext8, __pgprot(0));
+               if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) {
+                       mmu_mapin_ram_chunk(0, etext, PAGE_KERNEL_TEXT);
+                       mmu_mapin_ram_chunk(etext, einittext8, PAGE_KERNEL);
+               } else {
+                       mmu_mapin_ram_chunk(0, etext8, PAGE_KERNEL_TEXT);
+                       mmu_mapin_ram_chunk(etext8, einittext8, PAGE_KERNEL);
+               }
+       }
 }
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
 void mmu_mark_rodata_ro(void)
 {
+       unsigned long sinittext = __pa(_sinittext);
+       unsigned long etext = __pa(_etext);
+
        if (CONFIG_DATA_SHIFT < 23)
                mmu_patch_addis(&patch__dtlbmiss_romem_top8,
                                -__pa(((unsigned long)_sinittext) &
                                      ~(LARGE_PAGE_SIZE_8M - 1)));
        mmu_patch_addis(&patch__dtlbmiss_romem_top, -__pa(_sinittext));
+
+       /* Update page tables for PTDUMP and BDI */
+       mmu_mapin_ram_chunk(0, sinittext, __pgprot(0));
+       mmu_mapin_ram_chunk(0, etext, PAGE_KERNEL_ROX);
+       mmu_mapin_ram_chunk(etext, sinittext, PAGE_KERNEL_RO);
 }
 #endif
 
index 33b6f6f..0424f6c 100644 (file)
@@ -8,6 +8,7 @@ obj-$(CONFIG_40x)               += 40x.o
 obj-$(CONFIG_44x)              += 44x.o
 obj-$(CONFIG_PPC_8xx)          += 8xx.o
 obj-$(CONFIG_PPC_FSL_BOOK3E)   += fsl_booke.o
+obj-$(CONFIG_RANDOMIZE_BASE)   += kaslr_booke.o
 ifdef CONFIG_HUGETLB_PAGE
 obj-$(CONFIG_PPC_FSL_BOOK3E)   += book3e_hugetlbpage.o
 endif
index 556e3cd..b4eb06c 100644 (file)
@@ -263,11 +263,13 @@ void setup_initial_memory_limit(phys_addr_t first_memblock_base,
 int __initdata is_second_reloc;
 notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start)
 {
-       unsigned long base = KERNELBASE;
+       unsigned long base = kernstart_virt_addr;
+       phys_addr_t size;
 
        kernstart_addr = start;
        if (is_second_reloc) {
                virt_phys_offset = PAGE_OFFSET - memstart_addr;
+               kaslr_late_init();
                return;
        }
 
@@ -291,7 +293,7 @@ notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start)
        start &= ~0x3ffffff;
        base &= ~0x3ffffff;
        virt_phys_offset = base - start;
-       early_get_first_memblock_info(__va(dt_ptr), NULL);
+       early_get_first_memblock_info(__va(dt_ptr), &size);
        /*
         * We now get the memstart_addr, then we should check if this
         * address is the same as what the PAGE_OFFSET map to now. If
@@ -316,6 +318,8 @@ notrace void __init relocate_init(u64 dt_ptr, phys_addr_t start)
                /* We should never reach here */
                panic("Relocation error");
        }
+
+       kaslr_early_init(__va(dt_ptr), size);
 }
 #endif
 #endif
diff --git a/arch/powerpc/mm/nohash/kaslr_booke.c b/arch/powerpc/mm/nohash/kaslr_booke.c
new file mode 100644 (file)
index 0000000..4a75f2d
--- /dev/null
@@ -0,0 +1,401 @@
+// SPDX-License-Identifier: GPL-2.0-only
+//
+// Copyright (C) 2019 Jason Yan <yanaijie@huawei.com>
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/stddef.h>
+#include <linux/init.h>
+#include <linux/delay.h>
+#include <linux/memblock.h>
+#include <linux/libfdt.h>
+#include <linux/crash_core.h>
+#include <asm/pgalloc.h>
+#include <asm/prom.h>
+#include <asm/kdump.h>
+#include <mm/mmu_decl.h>
+#include <generated/compile.h>
+#include <generated/utsrelease.h>
+
+struct regions {
+       unsigned long pa_start;
+       unsigned long pa_end;
+       unsigned long kernel_size;
+       unsigned long dtb_start;
+       unsigned long dtb_end;
+       unsigned long initrd_start;
+       unsigned long initrd_end;
+       unsigned long crash_start;
+       unsigned long crash_end;
+       int reserved_mem;
+       int reserved_mem_addr_cells;
+       int reserved_mem_size_cells;
+};
+
+/* Simplified build-specific string for starting entropy. */
+static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
+               LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
+
+struct regions __initdata regions;
+
+static __init void kaslr_get_cmdline(void *fdt)
+{
+       int node = fdt_path_offset(fdt, "/chosen");
+
+       early_init_dt_scan_chosen(node, "chosen", 1, boot_command_line);
+}
+
+static unsigned long __init rotate_xor(unsigned long hash, const void *area,
+                                      size_t size)
+{
+       size_t i;
+       const unsigned long *ptr = area;
+
+       for (i = 0; i < size / sizeof(hash); i++) {
+               /* Rotate by odd number of bits and XOR. */
+               hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
+               hash ^= ptr[i];
+       }
+
+       return hash;
+}
+
+/* Attempt to create a simple starting entropy. This can make it defferent for
+ * every build but it is still not enough. Stronger entropy should
+ * be added to make it change for every boot.
+ */
+static unsigned long __init get_boot_seed(void *fdt)
+{
+       unsigned long hash = 0;
+
+       hash = rotate_xor(hash, build_str, sizeof(build_str));
+       hash = rotate_xor(hash, fdt, fdt_totalsize(fdt));
+
+       return hash;
+}
+
+static __init u64 get_kaslr_seed(void *fdt)
+{
+       int node, len;
+       fdt64_t *prop;
+       u64 ret;
+
+       node = fdt_path_offset(fdt, "/chosen");
+       if (node < 0)
+               return 0;
+
+       prop = fdt_getprop_w(fdt, node, "kaslr-seed", &len);
+       if (!prop || len != sizeof(u64))
+               return 0;
+
+       ret = fdt64_to_cpu(*prop);
+       *prop = 0;
+       return ret;
+}
+
+static __init bool regions_overlap(u32 s1, u32 e1, u32 s2, u32 e2)
+{
+       return e1 >= s2 && e2 >= s1;
+}
+
+static __init bool overlaps_reserved_region(const void *fdt, u32 start,
+                                           u32 end)
+{
+       int subnode, len, i;
+       u64 base, size;
+
+       /* check for overlap with /memreserve/ entries */
+       for (i = 0; i < fdt_num_mem_rsv(fdt); i++) {
+               if (fdt_get_mem_rsv(fdt, i, &base, &size) < 0)
+                       continue;
+               if (regions_overlap(start, end, base, base + size))
+                       return true;
+       }
+
+       if (regions.reserved_mem < 0)
+               return false;
+
+       /* check for overlap with static reservations in /reserved-memory */
+       for (subnode = fdt_first_subnode(fdt, regions.reserved_mem);
+            subnode >= 0;
+            subnode = fdt_next_subnode(fdt, subnode)) {
+               const fdt32_t *reg;
+               u64 rsv_end;
+
+               len = 0;
+               reg = fdt_getprop(fdt, subnode, "reg", &len);
+               while (len >= (regions.reserved_mem_addr_cells +
+                              regions.reserved_mem_size_cells)) {
+                       base = fdt32_to_cpu(reg[0]);
+                       if (regions.reserved_mem_addr_cells == 2)
+                               base = (base << 32) | fdt32_to_cpu(reg[1]);
+
+                       reg += regions.reserved_mem_addr_cells;
+                       len -= 4 * regions.reserved_mem_addr_cells;
+
+                       size = fdt32_to_cpu(reg[0]);
+                       if (regions.reserved_mem_size_cells == 2)
+                               size = (size << 32) | fdt32_to_cpu(reg[1]);
+
+                       reg += regions.reserved_mem_size_cells;
+                       len -= 4 * regions.reserved_mem_size_cells;
+
+                       if (base >= regions.pa_end)
+                               continue;
+
+                       rsv_end = min(base + size, (u64)U32_MAX);
+
+                       if (regions_overlap(start, end, base, rsv_end))
+                               return true;
+               }
+       }
+       return false;
+}
+
+static __init bool overlaps_region(const void *fdt, u32 start,
+                                  u32 end)
+{
+       if (regions_overlap(start, end, __pa(_stext), __pa(_end)))
+               return true;
+
+       if (regions_overlap(start, end, regions.dtb_start,
+                           regions.dtb_end))
+               return true;
+
+       if (regions_overlap(start, end, regions.initrd_start,
+                           regions.initrd_end))
+               return true;
+
+       if (regions_overlap(start, end, regions.crash_start,
+                           regions.crash_end))
+               return true;
+
+       return overlaps_reserved_region(fdt, start, end);
+}
+
+static void __init get_crash_kernel(void *fdt, unsigned long size)
+{
+#ifdef CONFIG_CRASH_CORE
+       unsigned long long crash_size, crash_base;
+       int ret;
+
+       ret = parse_crashkernel(boot_command_line, size, &crash_size,
+                               &crash_base);
+       if (ret != 0 || crash_size == 0)
+               return;
+       if (crash_base == 0)
+               crash_base = KDUMP_KERNELBASE;
+
+       regions.crash_start = (unsigned long)crash_base;
+       regions.crash_end = (unsigned long)(crash_base + crash_size);
+
+       pr_debug("crash_base=0x%llx crash_size=0x%llx\n", crash_base, crash_size);
+#endif
+}
+
+static void __init get_initrd_range(void *fdt)
+{
+       u64 start, end;
+       int node, len;
+       const __be32 *prop;
+
+       node = fdt_path_offset(fdt, "/chosen");
+       if (node < 0)
+               return;
+
+       prop = fdt_getprop(fdt, node, "linux,initrd-start", &len);
+       if (!prop)
+               return;
+       start = of_read_number(prop, len / 4);
+
+       prop = fdt_getprop(fdt, node, "linux,initrd-end", &len);
+       if (!prop)
+               return;
+       end = of_read_number(prop, len / 4);
+
+       regions.initrd_start = (unsigned long)start;
+       regions.initrd_end = (unsigned long)end;
+
+       pr_debug("initrd_start=0x%llx  initrd_end=0x%llx\n", start, end);
+}
+
+static __init unsigned long get_usable_address(const void *fdt,
+                                              unsigned long start,
+                                              unsigned long offset)
+{
+       unsigned long pa;
+       unsigned long pa_end;
+
+       for (pa = offset; (long)pa > (long)start; pa -= SZ_16K) {
+               pa_end = pa + regions.kernel_size;
+               if (overlaps_region(fdt, pa, pa_end))
+                       continue;
+
+               return pa;
+       }
+       return 0;
+}
+
+static __init void get_cell_sizes(const void *fdt, int node, int *addr_cells,
+                                 int *size_cells)
+{
+       const int *prop;
+       int len;
+
+       /*
+        * Retrieve the #address-cells and #size-cells properties
+        * from the 'node', or use the default if not provided.
+        */
+       *addr_cells = *size_cells = 1;
+
+       prop = fdt_getprop(fdt, node, "#address-cells", &len);
+       if (len == 4)
+               *addr_cells = fdt32_to_cpu(*prop);
+       prop = fdt_getprop(fdt, node, "#size-cells", &len);
+       if (len == 4)
+               *size_cells = fdt32_to_cpu(*prop);
+}
+
+static unsigned long __init kaslr_legal_offset(void *dt_ptr, unsigned long index,
+                                              unsigned long offset)
+{
+       unsigned long koffset = 0;
+       unsigned long start;
+
+       while ((long)index >= 0) {
+               offset = memstart_addr + index * SZ_64M + offset;
+               start = memstart_addr + index * SZ_64M;
+               koffset = get_usable_address(dt_ptr, start, offset);
+               if (koffset)
+                       break;
+               index--;
+       }
+
+       if (koffset != 0)
+               koffset -= memstart_addr;
+
+       return koffset;
+}
+
+static inline __init bool kaslr_disabled(void)
+{
+       return strstr(boot_command_line, "nokaslr") != NULL;
+}
+
+static unsigned long __init kaslr_choose_location(void *dt_ptr, phys_addr_t size,
+                                                 unsigned long kernel_sz)
+{
+       unsigned long offset, random;
+       unsigned long ram, linear_sz;
+       u64 seed;
+       unsigned long index;
+
+       kaslr_get_cmdline(dt_ptr);
+       if (kaslr_disabled())
+               return 0;
+
+       random = get_boot_seed(dt_ptr);
+
+       seed = get_tb() << 32;
+       seed ^= get_tb();
+       random = rotate_xor(random, &seed, sizeof(seed));
+
+       /*
+        * Retrieve (and wipe) the seed from the FDT
+        */
+       seed = get_kaslr_seed(dt_ptr);
+       if (seed)
+               random = rotate_xor(random, &seed, sizeof(seed));
+       else
+               pr_warn("KASLR: No safe seed for randomizing the kernel base.\n");
+
+       ram = min_t(phys_addr_t, __max_low_memory, size);
+       ram = map_mem_in_cams(ram, CONFIG_LOWMEM_CAM_NUM, true);
+       linear_sz = min_t(unsigned long, ram, SZ_512M);
+
+       /* If the linear size is smaller than 64M, do not randmize */
+       if (linear_sz < SZ_64M)
+               return 0;
+
+       /* check for a reserved-memory node and record its cell sizes */
+       regions.reserved_mem = fdt_path_offset(dt_ptr, "/reserved-memory");
+       if (regions.reserved_mem >= 0)
+               get_cell_sizes(dt_ptr, regions.reserved_mem,
+                              &regions.reserved_mem_addr_cells,
+                              &regions.reserved_mem_size_cells);
+
+       regions.pa_start = memstart_addr;
+       regions.pa_end = memstart_addr + linear_sz;
+       regions.dtb_start = __pa(dt_ptr);
+       regions.dtb_end = __pa(dt_ptr) + fdt_totalsize(dt_ptr);
+       regions.kernel_size = kernel_sz;
+
+       get_initrd_range(dt_ptr);
+       get_crash_kernel(dt_ptr, ram);
+
+       /*
+        * Decide which 64M we want to start
+        * Only use the low 8 bits of the random seed
+        */
+       index = random & 0xFF;
+       index %= linear_sz / SZ_64M;
+
+       /* Decide offset inside 64M */
+       offset = random % (SZ_64M - kernel_sz);
+       offset = round_down(offset, SZ_16K);
+
+       return kaslr_legal_offset(dt_ptr, index, offset);
+}
+
+/*
+ * To see if we need to relocate the kernel to a random offset
+ * void *dt_ptr - address of the device tree
+ * phys_addr_t size - size of the first memory block
+ */
+notrace void __init kaslr_early_init(void *dt_ptr, phys_addr_t size)
+{
+       unsigned long tlb_virt;
+       phys_addr_t tlb_phys;
+       unsigned long offset;
+       unsigned long kernel_sz;
+
+       kernel_sz = (unsigned long)_end - (unsigned long)_stext;
+
+       offset = kaslr_choose_location(dt_ptr, size, kernel_sz);
+       if (offset == 0)
+               return;
+
+       kernstart_virt_addr += offset;
+       kernstart_addr += offset;
+
+       is_second_reloc = 1;
+
+       if (offset >= SZ_64M) {
+               tlb_virt = round_down(kernstart_virt_addr, SZ_64M);
+               tlb_phys = round_down(kernstart_addr, SZ_64M);
+
+               /* Create kernel map to relocate in */
+               create_kaslr_tlb_entry(1, tlb_virt, tlb_phys);
+       }
+
+       /* Copy the kernel to it's new location and run */
+       memcpy((void *)kernstart_virt_addr, (void *)_stext, kernel_sz);
+       flush_icache_range(kernstart_virt_addr, kernstart_virt_addr + kernel_sz);
+
+       reloc_kernel_entry(dt_ptr, kernstart_virt_addr);
+}
+
+void __init kaslr_late_init(void)
+{
+       /* If randomized, clear the original kernel */
+       if (kernstart_virt_addr != KERNELBASE) {
+               unsigned long kernel_sz;
+
+               kernel_sz = (unsigned long)_end - kernstart_virt_addr;
+               memzero_explicit((void *)KERNELBASE, kernel_sz);
+       }
+}
index 8ec5dfb..73b8416 100644 (file)
@@ -117,10 +117,7 @@ void __init mapin_ram(void)
                if (base >= top)
                        continue;
                base = mmu_mapin_ram(base, top);
-               if (IS_ENABLED(CONFIG_BDI_SWITCH))
-                       __mapin_ram_chunk(reg->base, top);
-               else
-                       __mapin_ram_chunk(base, top);
+               __mapin_ram_chunk(base, top);
        }
 }
 
index c84bbd4..35d5425 100644 (file)
@@ -284,16 +284,6 @@ static void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry,
        }
 }
 
-static inline int current_is_64bit(void)
-{
-       /*
-        * We can't use test_thread_flag() here because we may be on an
-        * interrupt stack, and the thread flags don't get copied over
-        * from the thread_info on the main stack to the interrupt stack.
-        */
-       return !test_ti_thread_flag(task_thread_info(current), TIF_32BIT);
-}
-
 #else  /* CONFIG_PPC64 */
 /*
  * On 32-bit we just access the address and let hash_page create a
@@ -321,11 +311,6 @@ static inline void perf_callchain_user_64(struct perf_callchain_entry_ctx *entry
 {
 }
 
-static inline int current_is_64bit(void)
-{
-       return 0;
-}
-
 static inline int valid_user_sp(unsigned long sp, int is_64)
 {
        if (!sp || (sp & 7) || sp > TASK_SIZE - 32)
@@ -486,7 +471,7 @@ static void perf_callchain_user_32(struct perf_callchain_entry_ctx *entry,
 void
 perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs)
 {
-       if (current_is_64bit())
+       if (!is_32bit_task())
                perf_callchain_user_64(entry, regs);
        else
                perf_callchain_user_32(entry, regs);
index f46d7bf..6399865 100644 (file)
@@ -18,6 +18,8 @@
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
 
+#include <mm/mmu_decl.h>
+
 #include "mpc83xx.h"
 
 static __be32 __iomem *restart_reg_base;
@@ -145,6 +147,15 @@ void __init mpc83xx_setup_arch(void)
        if (ppc_md.progress)
                ppc_md.progress("mpc83xx_setup_arch()", 0);
 
+       if (!__map_without_bats) {
+               phys_addr_t immrbase = get_immrbase();
+               int immrsize = IS_ALIGNED(immrbase, SZ_2M) ? SZ_2M : SZ_1M;
+               unsigned long va = fix_to_virt(FIX_IMMR_BASE);
+
+               setbat(-1, va, immrbase, immrsize, PAGE_KERNEL_NCG);
+               update_bats();
+       }
+
        mpc83xx_setup_pci();
 }
 
index 4a4efa9..240a26d 100644 (file)
@@ -39,7 +39,6 @@
 #include <asm/udbg.h>
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
-#include <sysdev/simple_gpio.h>
 #include <soc/fsl/qe/qe.h>
 #include <soc/fsl/qe/qe_ic.h>
 
@@ -181,12 +180,6 @@ static int __init mpc836x_usb_cfg(void)
                qe_usb_clock_set(QE_CLK21, 48000000);
        } else {
                setbits8(&bcsr[13], BCSR13_USBMODE);
-               /*
-                * The BCSR GPIOs are used to control power and
-                * speed of the USB transceiver. This is needed for
-                * the USB Host only.
-                */
-               simple_gpiochip_init("fsl,mpc8360mds-bcsr-gpio");
        }
 
        of_node_put(np);
index fe06064..a554b6d 100644 (file)
@@ -86,29 +86,6 @@ void __init mpc85xx_cpm2_pic_init(void)
 #endif
 
 #ifdef CONFIG_QUICC_ENGINE
-void __init mpc85xx_qe_init(void)
-{
-       struct device_node *np;
-
-       np = of_find_compatible_node(NULL, NULL, "fsl,qe");
-       if (!np) {
-               np = of_find_node_by_name(NULL, "qe");
-               if (!np) {
-                       pr_err("%s: Could not find Quicc Engine node\n",
-                                       __func__);
-                       return;
-               }
-       }
-
-       if (!of_device_is_available(np)) {
-               of_node_put(np);
-               return;
-       }
-
-       of_node_put(np);
-
-}
-
 void __init mpc85xx_qe_par_io_init(void)
 {
        struct device_node *np;
index 7ee2c66..a328a74 100644 (file)
@@ -66,8 +66,6 @@ void __init corenet_gen_setup_arch(void)
        swiotlb_detect_4g();
 
        pr_info("%s board\n", ppc_md.name);
-
-       mpc85xx_qe_init();
 }
 
 static const struct of_device_id of_device_ids[] = {
index fa23f9b..cb84c5c 100644 (file)
@@ -10,10 +10,8 @@ static inline void __init mpc85xx_cpm2_pic_init(void) {}
 #endif /* CONFIG_CPM2 */
 
 #ifdef CONFIG_QUICC_ENGINE
-extern void mpc85xx_qe_init(void);
 extern void mpc85xx_qe_par_io_init(void);
 #else
-static inline void __init mpc85xx_qe_init(void) {}
 static inline void __init mpc85xx_qe_par_io_init(void) {}
 #endif
 
index 5ca2542..381a6ac 100644 (file)
@@ -43,7 +43,6 @@
 #include <asm/udbg.h>
 #include <sysdev/fsl_soc.h>
 #include <sysdev/fsl_pci.h>
-#include <sysdev/simple_gpio.h>
 #include <soc/fsl/qe/qe.h>
 #include <soc/fsl/qe/qe_ic.h>
 #include <asm/mpic.h>
@@ -238,7 +237,6 @@ static void __init mpc85xx_mds_qe_init(void)
 {
        struct device_node *np;
 
-       mpc85xx_qe_init();
        mpc85xx_qe_par_io_init();
        mpc85xx_mds_reset_ucc_phys();
 
@@ -351,11 +349,6 @@ machine_arch_initcall(mpc8569_mds, board_fixups);
 
 static int __init mpc85xx_publish_devices(void)
 {
-       if (machine_is(mpc8568_mds))
-               simple_gpiochip_init("fsl,mpc8568mds-bcsr-gpio");
-       if (machine_is(mpc8569_mds))
-               simple_gpiochip_init("fsl,mpc8569mds-bcsr-gpio");
-
        return mpc85xx_common_publish_devices();
 }
 
index d3c540e..7f9a84f 100644 (file)
@@ -89,7 +89,6 @@ static void __init mpc85xx_rdb_setup_arch(void)
        fsl_pci_assign_primary();
 
 #ifdef CONFIG_QUICC_ENGINE
-       mpc85xx_qe_init();
        mpc85xx_qe_par_io_init();
 #if defined(CONFIG_UCC_GETH) || defined(CONFIG_SERIAL_QE)
        if (machine_is(p1025_rdb)) {
index 720b0c0..6c3c0cd 100644 (file)
@@ -72,7 +72,6 @@ static void __init twr_p1025_setup_arch(void)
        fsl_pci_assign_primary();
 
 #ifdef CONFIG_QUICC_ENGINE
-       mpc85xx_qe_init();
        mpc85xx_qe_par_io_init();
 
 #if IS_ENABLED(CONFIG_UCC_GETH) || IS_ENABLED(CONFIG_SERIAL_QE)
index 96b27f6..7733d06 100644 (file)
@@ -34,7 +34,6 @@
 #include <linux/of_platform.h>
 #include <sysdev/fsl_pci.h>
 #include <sysdev/fsl_soc.h>
-#include <sysdev/simple_gpio.h>
 
 #include "mpc86xx.h"
 
@@ -93,9 +92,6 @@ static const struct of_device_id mpc8610_ids[] __initconst = {
 
 static int __init mpc8610_declare_of_platform_devices(void)
 {
-       /* Firstly, register PIXIS GPIOs. */
-       simple_gpiochip_init("fsl,fpga-pixis-gpio-bank");
-
        /* Enable wakeup on PIXIS' event IRQ. */
        mpc8610_suspend_init();
 
index 0f65c51..a43ee7d 100644 (file)
@@ -51,7 +51,7 @@
 #define CPM_MAP_SIZE    (0x4000)
 
 cpm8xx_t __iomem *cpmp;  /* Pointer to comm processor space */
-immap_t __iomem *mpc8xx_immr;
+immap_t __iomem *mpc8xx_immr = (void __iomem *)VIRT_IMMR_BASE;
 static cpic8xx_t __iomem *cpic_reg;
 
 static struct irq_domain *cpm_pic_host;
@@ -130,7 +130,7 @@ static const struct irq_domain_ops cpm_pic_host_ops = {
        .map = cpm_pic_host_map,
 };
 
-unsigned int cpm_pic_init(void)
+unsigned int __init cpm_pic_init(void)
 {
        struct device_node *np = NULL;
        struct resource res;
@@ -201,12 +201,6 @@ void __init cpm_reset(void)
 {
        sysconf8xx_t __iomem *siu_conf;
 
-       mpc8xx_immr = ioremap(get_immrbase(), 0x4000);
-       if (!mpc8xx_immr) {
-               printk(KERN_CRIT "Could not map IMMR\n");
-               return;
-       }
-
        cpmp = &mpc8xx_immr->im_cpm;
 
 #ifndef CONFIG_PPC_EARLY_DEBUG_CPM
@@ -306,7 +300,7 @@ struct cpm_ioport32e {
        __be32 dir, par, sor, odr, dat;
 };
 
-static void cpm1_set_pin32(int port, int pin, int flags)
+static void __init cpm1_set_pin32(int port, int pin, int flags)
 {
        struct cpm_ioport32e __iomem *iop;
        pin = 1 << (31 - pin);
@@ -348,7 +342,7 @@ static void cpm1_set_pin32(int port, int pin, int flags)
        }
 }
 
-static void cpm1_set_pin16(int port, int pin, int flags)
+static void __init cpm1_set_pin16(int port, int pin, int flags)
 {
        struct cpm_ioport16 __iomem *iop =
                (struct cpm_ioport16 __iomem *)&mpc8xx_immr->im_ioport;
@@ -386,7 +380,7 @@ static void cpm1_set_pin16(int port, int pin, int flags)
        }
 }
 
-void cpm1_set_pin(enum cpm_port port, int pin, int flags)
+void __init cpm1_set_pin(enum cpm_port port, int pin, int flags)
 {
        if (port == CPM_PORTB || port == CPM_PORTE)
                cpm1_set_pin32(port, pin, flags);
@@ -394,7 +388,7 @@ void cpm1_set_pin(enum cpm_port port, int pin, int flags)
                cpm1_set_pin16(port, pin, flags);
 }
 
-int cpm1_clk_setup(enum cpm_clk_target target, int clock, int mode)
+int __init cpm1_clk_setup(enum cpm_clk_target target, int clock, int mode)
 {
        int shift;
        int i, bits = 0;
index e9617d3..f2ba837 100644 (file)
@@ -125,7 +125,7 @@ static const struct irq_domain_ops mpc8xx_pic_host_ops = {
        .xlate = mpc8xx_pic_host_xlate,
 };
 
-int mpc8xx_pic_init(void)
+int __init mpc8xx_pic_init(void)
 {
        struct resource res;
        struct device_node *np;
index d82e366..e28df29 100644 (file)
@@ -303,16 +303,6 @@ config GEN_RTC
          replacing their get_rtc_time/set_rtc_time callbacks with
          a proper RTC device driver.
 
-config SIMPLE_GPIO
-       bool "Support for simple, memory-mapped GPIO controllers"
-       depends on PPC
-       select GPIOLIB
-       help
-         Say Y here to support simple, memory-mapped GPIO controllers.
-         These are usually BCSRs used to control board's switches, LEDs,
-         chip-selects, Ethernet/USB PHY's power and various other small
-         on-board peripherals.
-
 config MCU_MPC8349EMITX
        bool "MPC8349E-mITX MCU driver"
        depends on I2C=y && PPC_83xx
index 303752f..8d7f9c3 100644 (file)
@@ -415,13 +415,13 @@ config PPC_MM_SLICES
        bool
 
 config PPC_HAVE_PMU_SUPPORT
-       bool
+       bool
 
 config PPC_PERF_CTRS
-       def_bool y
-       depends on PERF_EVENTS && PPC_HAVE_PMU_SUPPORT
-       help
-         This enables the powerpc-specific perf_event back-end.
+       def_bool y
+       depends on PERF_EVENTS && PPC_HAVE_PMU_SUPPORT
+       help
+        This enables the powerpc-specific perf_event back-end.
 
 config FORCE_SMP
        # Allow platforms to force SMP=y by selecting this
index 2dd452a..9b1586b 100644 (file)
@@ -198,14 +198,12 @@ static int spufs_fill_dir(struct dentry *dir,
 
 static int spufs_dir_close(struct inode *inode, struct file *file)
 {
-       struct spu_context *ctx;
        struct inode *parent;
        struct dentry *dir;
        int ret;
 
        dir = file->f_path.dentry;
        parent = d_inode(dir->d_parent);
-       ctx = SPUFS_I(d_inode(dir))->i_ctx;
 
        inode_lock_nested(parent, I_MUTEX_PARENT);
        ret = spufs_rmdir(parent, dir);
index a3ac964..c0f8120 100644 (file)
@@ -20,3 +20,4 @@ obj-$(CONFIG_PPC_MEMTRACE)    += memtrace.o
 obj-$(CONFIG_PPC_VAS)  += vas.o vas-window.o vas-debug.o
 obj-$(CONFIG_OCXL_BASE)        += ocxl.o
 obj-$(CONFIG_SCOM_DEBUGFS) += opal-xscom.o
+obj-$(CONFIG_PPC_SECURE_BOOT) += opal-secvar.o
index a2aa5e4..5cd0f52 100644 (file)
@@ -290,3 +290,6 @@ OPAL_CALL(opal_nx_coproc_init,                      OPAL_NX_COPROC_INIT);
 OPAL_CALL(opal_mpipl_update,                   OPAL_MPIPL_UPDATE);
 OPAL_CALL(opal_mpipl_register_tag,             OPAL_MPIPL_REGISTER_TAG);
 OPAL_CALL(opal_mpipl_query_tag,                        OPAL_MPIPL_QUERY_TAG);
+OPAL_CALL(opal_secvar_get,                     OPAL_SECVAR_GET);
+OPAL_CALL(opal_secvar_get_next,                        OPAL_SECVAR_GET_NEXT);
+OPAL_CALL(opal_secvar_enqueue_update,          OPAL_SECVAR_ENQUEUE_UPDATE);
index dc599e7..c16d44f 100644 (file)
@@ -13,7 +13,7 @@
 
 #include <asm/opal.h>
 
-DEFINE_MUTEX(powercap_mutex);
+static DEFINE_MUTEX(powercap_mutex);
 
 static struct kobject *powercap_kobj;
 
index b6ccb30..69d7e75 100644 (file)
 
 #include <asm/opal.h>
 
-DEFINE_MUTEX(psr_mutex);
+static DEFINE_MUTEX(psr_mutex);
 
 static struct kobject *psr_kobj;
 
-struct psr_attr {
+static struct psr_attr {
        u32 handle;
        struct kobj_attribute attr;
 } *psr_attrs;
diff --git a/arch/powerpc/platforms/powernv/opal-secvar.c b/arch/powerpc/platforms/powernv/opal-secvar.c
new file mode 100644 (file)
index 0000000..14133e1
--- /dev/null
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PowerNV code for secure variables
+ *
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Claudio Carvalho
+ *         Nayna Jain
+ *
+ * APIs to access secure variables managed by OPAL.
+ */
+
+#define pr_fmt(fmt) "secvar: "fmt
+
+#include <linux/types.h>
+#include <linux/platform_device.h>
+#include <linux/of_platform.h>
+#include <asm/opal.h>
+#include <asm/secvar.h>
+#include <asm/secure_boot.h>
+
+static int opal_status_to_err(int rc)
+{
+       int err;
+
+       switch (rc) {
+       case OPAL_SUCCESS:
+               err = 0;
+               break;
+       case OPAL_UNSUPPORTED:
+               err = -ENXIO;
+               break;
+       case OPAL_PARAMETER:
+               err = -EINVAL;
+               break;
+       case OPAL_RESOURCE:
+               err = -ENOSPC;
+               break;
+       case OPAL_HARDWARE:
+               err = -EIO;
+               break;
+       case OPAL_NO_MEM:
+               err = -ENOMEM;
+               break;
+       case OPAL_EMPTY:
+               err = -ENOENT;
+               break;
+       case OPAL_PARTIAL:
+               err = -EFBIG;
+               break;
+       default:
+               err = -EINVAL;
+       }
+
+       return err;
+}
+
+static int opal_get_variable(const char *key, uint64_t ksize,
+                            u8 *data, uint64_t *dsize)
+{
+       int rc;
+
+       if (!key || !dsize)
+               return -EINVAL;
+
+       *dsize = cpu_to_be64(*dsize);
+
+       rc = opal_secvar_get(key, ksize, data, dsize);
+
+       *dsize = be64_to_cpu(*dsize);
+
+       return opal_status_to_err(rc);
+}
+
+static int opal_get_next_variable(const char *key, uint64_t *keylen,
+                                 uint64_t keybufsize)
+{
+       int rc;
+
+       if (!key || !keylen)
+               return -EINVAL;
+
+       *keylen = cpu_to_be64(*keylen);
+
+       rc = opal_secvar_get_next(key, keylen, keybufsize);
+
+       *keylen = be64_to_cpu(*keylen);
+
+       return opal_status_to_err(rc);
+}
+
+static int opal_set_variable(const char *key, uint64_t ksize, u8 *data,
+                            uint64_t dsize)
+{
+       int rc;
+
+       if (!key || !data)
+               return -EINVAL;
+
+       rc = opal_secvar_enqueue_update(key, ksize, data, dsize);
+
+       return opal_status_to_err(rc);
+}
+
+static const struct secvar_operations opal_secvar_ops = {
+       .get = opal_get_variable,
+       .get_next = opal_get_next_variable,
+       .set = opal_set_variable,
+};
+
+static int opal_secvar_probe(struct platform_device *pdev)
+{
+       if (!opal_check_token(OPAL_SECVAR_GET)
+                       || !opal_check_token(OPAL_SECVAR_GET_NEXT)
+                       || !opal_check_token(OPAL_SECVAR_ENQUEUE_UPDATE)) {
+               pr_err("OPAL doesn't support secure variables\n");
+               return -ENODEV;
+       }
+
+       set_secvar_ops(&opal_secvar_ops);
+
+       return 0;
+}
+
+static const struct of_device_id opal_secvar_match[] = {
+       { .compatible = "ibm,secvar-backend",},
+       {},
+};
+
+static struct platform_driver opal_secvar_driver = {
+       .driver = {
+               .name = "secvar",
+               .of_match_table = opal_secvar_match,
+       },
+};
+
+static int __init opal_secvar_init(void)
+{
+       return platform_driver_probe(&opal_secvar_driver, opal_secvar_probe);
+}
+device_initcall(opal_secvar_init);
index 31f13c1..f8ae1fb 100644 (file)
@@ -13,7 +13,7 @@
 
 #include <asm/opal.h>
 
-DEFINE_MUTEX(sg_mutex);
+static DEFINE_MUTEX(sg_mutex);
 
 static struct kobject *sg_kobj;
 
index 38e9027..a6ee080 100644 (file)
 
 #include "powernv.h"
 
+#define OPAL_MSG_QUEUE_MAX 16
+
+struct opal_msg_node {
+       struct list_head        list;
+       struct opal_msg         msg;
+};
+
+static DEFINE_SPINLOCK(msg_list_lock);
+static LIST_HEAD(msg_list);
+
 /* /sys/firmware/opal */
 struct kobject *opal_kobj;
 
@@ -50,6 +60,8 @@ struct mcheck_recoverable_range {
        u64 recover_addr;
 };
 
+static int msg_list_size;
+
 static struct mcheck_recoverable_range *mc_recoverable_range;
 static int mc_recoverable_range_len;
 
@@ -237,6 +249,43 @@ static int __init opal_register_exception_handlers(void)
 }
 machine_early_initcall(powernv, opal_register_exception_handlers);
 
+static void queue_replay_msg(void *msg)
+{
+       struct opal_msg_node *msg_node;
+
+       if (msg_list_size < OPAL_MSG_QUEUE_MAX) {
+               msg_node = kzalloc(sizeof(*msg_node), GFP_ATOMIC);
+               if (msg_node) {
+                       INIT_LIST_HEAD(&msg_node->list);
+                       memcpy(&msg_node->msg, msg, sizeof(struct opal_msg));
+                       list_add_tail(&msg_node->list, &msg_list);
+                       msg_list_size++;
+               } else
+                       pr_warn_once("message queue no memory\n");
+
+               if (msg_list_size >= OPAL_MSG_QUEUE_MAX)
+                       pr_warn_once("message queue full\n");
+       }
+}
+
+static void dequeue_replay_msg(enum opal_msg_type msg_type)
+{
+       struct opal_msg_node *msg_node, *tmp;
+
+       list_for_each_entry_safe(msg_node, tmp, &msg_list, list) {
+               if (be32_to_cpu(msg_node->msg.msg_type) != msg_type)
+                       continue;
+
+               atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
+                                       msg_type,
+                                       &msg_node->msg);
+
+               list_del(&msg_node->list);
+               kfree(msg_node);
+               msg_list_size--;
+       }
+}
+
 /*
  * Opal message notifier based on message type. Allow subscribers to get
  * notified for specific messgae type.
@@ -244,14 +293,30 @@ machine_early_initcall(powernv, opal_register_exception_handlers);
 int opal_message_notifier_register(enum opal_msg_type msg_type,
                                        struct notifier_block *nb)
 {
+       int ret;
+       unsigned long flags;
+
        if (!nb || msg_type >= OPAL_MSG_TYPE_MAX) {
                pr_warn("%s: Invalid arguments, msg_type:%d\n",
                        __func__, msg_type);
                return -EINVAL;
        }
 
-       return atomic_notifier_chain_register(
-                               &opal_msg_notifier_head[msg_type], nb);
+       spin_lock_irqsave(&msg_list_lock, flags);
+       ret = atomic_notifier_chain_register(
+               &opal_msg_notifier_head[msg_type], nb);
+
+       /*
+        * If the registration succeeded, replay any queued messages that came
+        * in prior to the notifier chain registration. msg_list_lock held here
+        * to ensure they're delivered prior to any subsequent messages.
+        */
+       if (ret == 0)
+               dequeue_replay_msg(msg_type);
+
+       spin_unlock_irqrestore(&msg_list_lock, flags);
+
+       return ret;
 }
 EXPORT_SYMBOL_GPL(opal_message_notifier_register);
 
@@ -265,6 +330,23 @@ EXPORT_SYMBOL_GPL(opal_message_notifier_unregister);
 
 static void opal_message_do_notify(uint32_t msg_type, void *msg)
 {
+       unsigned long flags;
+       bool queued = false;
+
+       spin_lock_irqsave(&msg_list_lock, flags);
+       if (opal_msg_notifier_head[msg_type].head == NULL) {
+               /*
+                * Queue up the msg since no notifiers have registered
+                * yet for this msg_type.
+                */
+               queue_replay_msg(msg);
+               queued = true;
+       }
+       spin_unlock_irqrestore(&msg_list_lock, flags);
+
+       if (queued)
+               return;
+
        /* notify subscribers */
        atomic_notifier_call_chain(&opal_msg_notifier_head[msg_type],
                                        msg_type, msg);
@@ -1002,6 +1084,9 @@ static int __init opal_init(void)
        /* Initialise OPAL Power control interface */
        opal_power_control_init();
 
+       /* Initialize OPAL secure variables */
+       opal_pdev_init("ibm,secvar-backend");
+
        return 0;
 }
 machine_subsys_initcall(powernv, opal_init);
index a0b9c0c..5dc6847 100644 (file)
@@ -340,14 +340,6 @@ free_tces_exit:
        return -ENOMEM;
 }
 
-static void pnv_iommu_table_group_link_free(struct rcu_head *head)
-{
-       struct iommu_table_group_link *tgl = container_of(head,
-                       struct iommu_table_group_link, rcu);
-
-       kfree(tgl);
-}
-
 void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
                struct iommu_table_group *table_group)
 {
@@ -363,7 +355,7 @@ void pnv_pci_unlink_table_and_group(struct iommu_table *tbl,
        list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) {
                if (tgl->table_group == table_group) {
                        list_del_rcu(&tgl->next);
-                       call_rcu(&tgl->rcu, pnv_iommu_table_group_link_free);
+                       kfree_rcu(tgl, rcu);
                        found = true;
                        break;
                }
index c28d0d9..da1068a 100644 (file)
@@ -3086,8 +3086,8 @@ static int pnv_pci_diag_data_set(void *data, u64 val)
        return 0;
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(pnv_pci_diag_data_fops, NULL,
-                       pnv_pci_diag_data_set, "%llu\n");
+DEFINE_DEBUGFS_ATTRIBUTE(pnv_pci_diag_data_fops, NULL, pnv_pci_diag_data_set,
+                        "%llu\n");
 
 #endif /* CONFIG_DEBUG_FS */
 
@@ -3112,8 +3112,8 @@ static void pnv_pci_ioda_create_dbgfs(void)
                        continue;
                }
 
-               debugfs_create_file("dump_diag_regs", 0200, phb->dbgfs, hose,
-                                   &pnv_pci_diag_data_fops);
+               debugfs_create_file_unsafe("dump_diag_regs", 0200, phb->dbgfs,
+                                          hose, &pnv_pci_diag_data_fops);
        }
 #endif /* CONFIG_DEBUG_FS */
 }
index 2825d00..c0bea75 100644 (file)
@@ -945,6 +945,23 @@ void __init pnv_pci_init(void)
        if (!firmware_has_feature(FW_FEATURE_OPAL))
                return;
 
+#ifdef CONFIG_PCIEPORTBUS
+       /*
+        * On PowerNV PCIe devices are (currently) managed in cooperation
+        * with firmware. This isn't *strictly* required, but there's enough
+        * assumptions baked into both firmware and the platform code that
+        * it's unwise to allow the portbus services to be used.
+        *
+        * We need to fix this eventually, but for now set this flag to disable
+        * the portbus driver. The AER service isn't required since that AER
+        * events are handled via EEH. The pciehp hotplug driver can't work
+        * without kernel changes (and portbus binding breaks pnv_php). The
+        * other services also require some thinking about how we're going
+        * to integrate them.
+        */
+       pcie_ports_disabled = true;
+#endif
+
        /* Look for IODA IO-Hubs. */
        for_each_compatible_node(np, NULL, "ibm,ioda-hub") {
                pnv_pci_init_ioda_hub(np);
index 9e35cdd..595e9f8 100644 (file)
@@ -108,6 +108,7 @@ config PPC_SMLPAR
 config CMM
        tristate "Collaborative memory management"
        depends on PPC_SMLPAR
+       select MEMORY_BALLOON
        default y
        help
          Select this option, if you want to enable the kernel interface
index b33251d..9157184 100644 (file)
 #include <linux/stringify.h>
 #include <linux/swap.h>
 #include <linux/device.h>
+#include <linux/mount.h>
+#include <linux/pseudo_fs.h>
+#include <linux/magic.h>
+#include <linux/balloon_compaction.h>
 #include <asm/firmware.h>
 #include <asm/hvcall.h>
 #include <asm/mmu.h>
 #define CMM_MIN_MEM_MB         256
 #define KB2PAGES(_p)           ((_p)>>(PAGE_SHIFT-10))
 #define PAGES2KB(_p)           ((_p)<<(PAGE_SHIFT-10))
-/*
- * The priority level tries to ensure that this notifier is called as
- * late as possible to reduce thrashing in the shared memory pool.
- */
+
 #define CMM_MEM_HOTPLUG_PRI    1
-#define CMM_MEM_ISOLATE_PRI    15
 
 static unsigned int delay = CMM_DEFAULT_DELAY;
 static unsigned int hotplug_delay = CMM_HOTPLUG_DELAY;
@@ -51,6 +51,8 @@ static unsigned int oom_kb = CMM_OOM_KB;
 static unsigned int cmm_debug = CMM_DEBUG;
 static unsigned int cmm_disabled = CMM_DISABLE;
 static unsigned long min_mem_mb = CMM_MIN_MEM_MB;
+static bool __read_mostly simulate;
+static unsigned long simulate_loan_target_kb;
 static struct device cmm_dev;
 
 MODULE_AUTHOR("Brian King <brking@linux.vnet.ibm.com>");
@@ -74,35 +76,31 @@ MODULE_PARM_DESC(min_mem_mb, "Minimum amount of memory (in MB) to not balloon. "
 module_param_named(debug, cmm_debug, uint, 0644);
 MODULE_PARM_DESC(debug, "Enable module debugging logging. Set to 1 to enable. "
                 "[Default=" __stringify(CMM_DEBUG) "]");
-
-#define CMM_NR_PAGES ((PAGE_SIZE - sizeof(void *) - sizeof(unsigned long)) / sizeof(unsigned long))
+module_param_named(simulate, simulate, bool, 0444);
+MODULE_PARM_DESC(simulate, "Enable simulation mode (no communication with hw).");
 
 #define cmm_dbg(...) if (cmm_debug) { printk(KERN_INFO "cmm: "__VA_ARGS__); }
 
-struct cmm_page_array {
-       struct cmm_page_array *next;
-       unsigned long index;
-       unsigned long page[CMM_NR_PAGES];
-};
-
-static unsigned long loaned_pages;
+static atomic_long_t loaned_pages;
 static unsigned long loaned_pages_target;
 static unsigned long oom_freed_pages;
 
-static struct cmm_page_array *cmm_page_list;
-static DEFINE_SPINLOCK(cmm_lock);
-
 static DEFINE_MUTEX(hotplug_mutex);
 static int hotplug_occurred; /* protected by the hotplug mutex */
 
 static struct task_struct *cmm_thread_ptr;
+static struct balloon_dev_info b_dev_info;
 
-static long plpar_page_set_loaned(unsigned long vpa)
+static long plpar_page_set_loaned(struct page *page)
 {
+       const unsigned long vpa = page_to_phys(page);
        unsigned long cmo_page_sz = cmo_get_page_size();
        long rc = 0;
        int i;
 
+       if (unlikely(simulate))
+               return 0;
+
        for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
                rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_LOANED, vpa + i, 0);
 
@@ -113,12 +111,16 @@ static long plpar_page_set_loaned(unsigned long vpa)
        return rc;
 }
 
-static long plpar_page_set_active(unsigned long vpa)
+static long plpar_page_set_active(struct page *page)
 {
+       const unsigned long vpa = page_to_phys(page);
        unsigned long cmo_page_sz = cmo_get_page_size();
        long rc = 0;
        int i;
 
+       if (unlikely(simulate))
+               return 0;
+
        for (i = 0; !rc && i < PAGE_SIZE; i += cmo_page_sz)
                rc = plpar_hcall_norets(H_PAGE_INIT, H_PAGE_SET_ACTIVE, vpa + i, 0);
 
@@ -138,8 +140,7 @@ static long plpar_page_set_active(unsigned long vpa)
  **/
 static long cmm_alloc_pages(long nr)
 {
-       struct cmm_page_array *pa, *npa;
-       unsigned long addr;
+       struct page *page;
        long rc;
 
        cmm_dbg("Begin request for %ld pages\n", nr);
@@ -156,46 +157,19 @@ static long cmm_alloc_pages(long nr)
                        break;
                }
 
-               addr = __get_free_page(GFP_NOIO | __GFP_NOWARN |
-                                      __GFP_NORETRY | __GFP_NOMEMALLOC);
-               if (!addr)
+               page = balloon_page_alloc();
+               if (!page)
                        break;
-               spin_lock(&cmm_lock);
-               pa = cmm_page_list;
-               if (!pa || pa->index >= CMM_NR_PAGES) {
-                       /* Need a new page for the page list. */
-                       spin_unlock(&cmm_lock);
-                       npa = (struct cmm_page_array *)__get_free_page(
-                                       GFP_NOIO | __GFP_NOWARN |
-                                       __GFP_NORETRY | __GFP_NOMEMALLOC);
-                       if (!npa) {
-                               pr_info("%s: Can not allocate new page list\n", __func__);
-                               free_page(addr);
-                               break;
-                       }
-                       spin_lock(&cmm_lock);
-                       pa = cmm_page_list;
-
-                       if (!pa || pa->index >= CMM_NR_PAGES) {
-                               npa->next = pa;
-                               npa->index = 0;
-                               pa = npa;
-                               cmm_page_list = pa;
-                       } else
-                               free_page((unsigned long) npa);
-               }
-
-               if ((rc = plpar_page_set_loaned(__pa(addr)))) {
+               rc = plpar_page_set_loaned(page);
+               if (rc) {
                        pr_err("%s: Can not set page to loaned. rc=%ld\n", __func__, rc);
-                       spin_unlock(&cmm_lock);
-                       free_page(addr);
+                       __free_page(page);
                        break;
                }
 
-               pa->page[pa->index++] = addr;
-               loaned_pages++;
-               totalram_pages_dec();
-               spin_unlock(&cmm_lock);
+               balloon_page_enqueue(&b_dev_info, page);
+               atomic_long_inc(&loaned_pages);
+               adjust_managed_page_count(page, -1);
                nr--;
        }
 
@@ -212,30 +186,19 @@ static long cmm_alloc_pages(long nr)
  **/
 static long cmm_free_pages(long nr)
 {
-       struct cmm_page_array *pa;
-       unsigned long addr;
+       struct page *page;
 
        cmm_dbg("Begin free of %ld pages.\n", nr);
-       spin_lock(&cmm_lock);
-       pa = cmm_page_list;
        while (nr) {
-               if (!pa || pa->index <= 0)
+               page = balloon_page_dequeue(&b_dev_info);
+               if (!page)
                        break;
-               addr = pa->page[--pa->index];
-
-               if (pa->index == 0) {
-                       pa = pa->next;
-                       free_page((unsigned long) cmm_page_list);
-                       cmm_page_list = pa;
-               }
-
-               plpar_page_set_active(__pa(addr));
-               free_page(addr);
-               loaned_pages--;
+               plpar_page_set_active(page);
+               adjust_managed_page_count(page, 1);
+               __free_page(page);
+               atomic_long_dec(&loaned_pages);
                nr--;
-               totalram_pages_inc();
        }
-       spin_unlock(&cmm_lock);
        cmm_dbg("End request with %ld pages unfulfilled\n", nr);
        return nr;
 }
@@ -257,7 +220,7 @@ static int cmm_oom_notify(struct notifier_block *self,
 
        cmm_dbg("OOM processing started\n");
        nr = cmm_free_pages(nr);
-       loaned_pages_target = loaned_pages;
+       loaned_pages_target = atomic_long_read(&loaned_pages);
        *freed += KB2PAGES(oom_kb) - nr;
        oom_freed_pages += KB2PAGES(oom_kb) - nr;
        cmm_dbg("OOM processing complete\n");
@@ -274,19 +237,24 @@ static int cmm_oom_notify(struct notifier_block *self,
  **/
 static void cmm_get_mpp(void)
 {
+       const long __loaned_pages = atomic_long_read(&loaned_pages);
+       const long total_pages = totalram_pages() + __loaned_pages;
        int rc;
        struct hvcall_mpp_data mpp_data;
        signed long active_pages_target, page_loan_request, target;
-       signed long total_pages = totalram_pages() + loaned_pages;
        signed long min_mem_pages = (min_mem_mb * 1024 * 1024) / PAGE_SIZE;
 
-       rc = h_get_mpp(&mpp_data);
-
-       if (rc != H_SUCCESS)
-               return;
-
-       page_loan_request = div_s64((s64)mpp_data.loan_request, PAGE_SIZE);
-       target = page_loan_request + (signed long)loaned_pages;
+       if (likely(!simulate)) {
+               rc = h_get_mpp(&mpp_data);
+               if (rc != H_SUCCESS)
+                       return;
+               page_loan_request = div_s64((s64)mpp_data.loan_request,
+                                           PAGE_SIZE);
+               target = page_loan_request + __loaned_pages;
+       } else {
+               target = KB2PAGES(simulate_loan_target_kb);
+               page_loan_request = target - __loaned_pages;
+       }
 
        if (target < 0 || total_pages < min_mem_pages)
                target = 0;
@@ -307,7 +275,7 @@ static void cmm_get_mpp(void)
        loaned_pages_target = target;
 
        cmm_dbg("delta = %ld, loaned = %lu, target = %lu, oom = %lu, totalram = %lu\n",
-               page_loan_request, loaned_pages, loaned_pages_target,
+               page_loan_request, __loaned_pages, loaned_pages_target,
                oom_freed_pages, totalram_pages());
 }
 
@@ -325,6 +293,7 @@ static struct notifier_block cmm_oom_nb = {
 static int cmm_thread(void *dummy)
 {
        unsigned long timeleft;
+       long __loaned_pages;
 
        while (1) {
                timeleft = msleep_interruptible(delay * 1000);
@@ -355,11 +324,12 @@ static int cmm_thread(void *dummy)
 
                cmm_get_mpp();
 
-               if (loaned_pages_target > loaned_pages) {
-                       if (cmm_alloc_pages(loaned_pages_target - loaned_pages))
-                               loaned_pages_target = loaned_pages;
-               } else if (loaned_pages_target < loaned_pages)
-                       cmm_free_pages(loaned_pages - loaned_pages_target);
+               __loaned_pages = atomic_long_read(&loaned_pages);
+               if (loaned_pages_target > __loaned_pages) {
+                       if (cmm_alloc_pages(loaned_pages_target - __loaned_pages))
+                               loaned_pages_target = __loaned_pages;
+               } else if (loaned_pages_target < __loaned_pages)
+                       cmm_free_pages(__loaned_pages - loaned_pages_target);
        }
        return 0;
 }
@@ -373,7 +343,7 @@ static int cmm_thread(void *dummy)
        }                                                       \
        static DEVICE_ATTR(name, 0444, show_##name, NULL)
 
-CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(loaned_pages));
+CMM_SHOW(loaned_kb, "%lu\n", PAGES2KB(atomic_long_read(&loaned_pages)));
 CMM_SHOW(loaned_target_kb, "%lu\n", PAGES2KB(loaned_pages_target));
 
 static ssize_t show_oom_pages(struct device *dev,
@@ -406,11 +376,18 @@ static struct device_attribute *cmm_attrs[] = {
        &dev_attr_oom_freed_kb,
 };
 
+static DEVICE_ULONG_ATTR(simulate_loan_target_kb, 0644,
+                        simulate_loan_target_kb);
+
 static struct bus_type cmm_subsys = {
        .name = "cmm",
        .dev_name = "cmm",
 };
 
+static void cmm_release_device(struct device *dev)
+{
+}
+
 /**
  * cmm_sysfs_register - Register with sysfs
  *
@@ -426,6 +403,7 @@ static int cmm_sysfs_register(struct device *dev)
 
        dev->id = 0;
        dev->bus = &cmm_subsys;
+       dev->release = cmm_release_device;
 
        if ((rc = device_register(dev)))
                goto subsys_unregister;
@@ -435,6 +413,11 @@ static int cmm_sysfs_register(struct device *dev)
                        goto fail;
        }
 
+       if (!simulate)
+               return 0;
+       rc = device_create_file(dev, &dev_attr_simulate_loan_target_kb.attr);
+       if (rc)
+               goto fail;
        return 0;
 
 fail:
@@ -471,7 +454,7 @@ static int cmm_reboot_notifier(struct notifier_block *nb,
                if (cmm_thread_ptr)
                        kthread_stop(cmm_thread_ptr);
                cmm_thread_ptr = NULL;
-               cmm_free_pages(loaned_pages);
+               cmm_free_pages(atomic_long_read(&loaned_pages));
        }
        return NOTIFY_DONE;
 }
@@ -480,142 +463,6 @@ static struct notifier_block cmm_reboot_nb = {
        .notifier_call = cmm_reboot_notifier,
 };
 
-/**
- * cmm_count_pages - Count the number of pages loaned in a particular range.
- *
- * @arg: memory_isolate_notify structure with address range and count
- *
- * Return value:
- *      0 on success
- **/
-static unsigned long cmm_count_pages(void *arg)
-{
-       struct memory_isolate_notify *marg = arg;
-       struct cmm_page_array *pa;
-       unsigned long start = (unsigned long)pfn_to_kaddr(marg->start_pfn);
-       unsigned long end = start + (marg->nr_pages << PAGE_SHIFT);
-       unsigned long idx;
-
-       spin_lock(&cmm_lock);
-       pa = cmm_page_list;
-       while (pa) {
-               if ((unsigned long)pa >= start && (unsigned long)pa < end)
-                       marg->pages_found++;
-               for (idx = 0; idx < pa->index; idx++)
-                       if (pa->page[idx] >= start && pa->page[idx] < end)
-                               marg->pages_found++;
-               pa = pa->next;
-       }
-       spin_unlock(&cmm_lock);
-       return 0;
-}
-
-/**
- * cmm_memory_isolate_cb - Handle memory isolation notifier calls
- * @self:      notifier block struct
- * @action:    action to take
- * @arg:       struct memory_isolate_notify data for handler
- *
- * Return value:
- *     NOTIFY_OK or notifier error based on subfunction return value
- **/
-static int cmm_memory_isolate_cb(struct notifier_block *self,
-                                unsigned long action, void *arg)
-{
-       int ret = 0;
-
-       if (action == MEM_ISOLATE_COUNT)
-               ret = cmm_count_pages(arg);
-
-       return notifier_from_errno(ret);
-}
-
-static struct notifier_block cmm_mem_isolate_nb = {
-       .notifier_call = cmm_memory_isolate_cb,
-       .priority = CMM_MEM_ISOLATE_PRI
-};
-
-/**
- * cmm_mem_going_offline - Unloan pages where memory is to be removed
- * @arg: memory_notify structure with page range to be offlined
- *
- * Return value:
- *     0 on success
- **/
-static int cmm_mem_going_offline(void *arg)
-{
-       struct memory_notify *marg = arg;
-       unsigned long start_page = (unsigned long)pfn_to_kaddr(marg->start_pfn);
-       unsigned long end_page = start_page + (marg->nr_pages << PAGE_SHIFT);
-       struct cmm_page_array *pa_curr, *pa_last, *npa;
-       unsigned long idx;
-       unsigned long freed = 0;
-
-       cmm_dbg("Memory going offline, searching 0x%lx (%ld pages).\n",
-                       start_page, marg->nr_pages);
-       spin_lock(&cmm_lock);
-
-       /* Search the page list for pages in the range to be offlined */
-       pa_last = pa_curr = cmm_page_list;
-       while (pa_curr) {
-               for (idx = (pa_curr->index - 1); (idx + 1) > 0; idx--) {
-                       if ((pa_curr->page[idx] < start_page) ||
-                           (pa_curr->page[idx] >= end_page))
-                               continue;
-
-                       plpar_page_set_active(__pa(pa_curr->page[idx]));
-                       free_page(pa_curr->page[idx]);
-                       freed++;
-                       loaned_pages--;
-                       totalram_pages_inc();
-                       pa_curr->page[idx] = pa_last->page[--pa_last->index];
-                       if (pa_last->index == 0) {
-                               if (pa_curr == pa_last)
-                                       pa_curr = pa_last->next;
-                               pa_last = pa_last->next;
-                               free_page((unsigned long)cmm_page_list);
-                               cmm_page_list = pa_last;
-                       }
-               }
-               pa_curr = pa_curr->next;
-       }
-
-       /* Search for page list structures in the range to be offlined */
-       pa_last = NULL;
-       pa_curr = cmm_page_list;
-       while (pa_curr) {
-               if (((unsigned long)pa_curr >= start_page) &&
-                               ((unsigned long)pa_curr < end_page)) {
-                       npa = (struct cmm_page_array *)__get_free_page(
-                                       GFP_NOIO | __GFP_NOWARN |
-                                       __GFP_NORETRY | __GFP_NOMEMALLOC);
-                       if (!npa) {
-                               spin_unlock(&cmm_lock);
-                               cmm_dbg("Failed to allocate memory for list "
-                                               "management. Memory hotplug "
-                                               "failed.\n");
-                               return -ENOMEM;
-                       }
-                       memcpy(npa, pa_curr, PAGE_SIZE);
-                       if (pa_curr == cmm_page_list)
-                               cmm_page_list = npa;
-                       if (pa_last)
-                               pa_last->next = npa;
-                       free_page((unsigned long) pa_curr);
-                       freed++;
-                       pa_curr = npa;
-               }
-
-               pa_last = pa_curr;
-               pa_curr = pa_curr->next;
-       }
-
-       spin_unlock(&cmm_lock);
-       cmm_dbg("Released %ld pages in the search range.\n", freed);
-
-       return 0;
-}
-
 /**
  * cmm_memory_cb - Handle memory hotplug notifier calls
  * @self:      notifier block struct
@@ -635,7 +482,6 @@ static int cmm_memory_cb(struct notifier_block *self,
        case MEM_GOING_OFFLINE:
                mutex_lock(&hotplug_mutex);
                hotplug_occurred = 1;
-               ret = cmm_mem_going_offline(arg);
                break;
        case MEM_OFFLINE:
        case MEM_CANCEL_OFFLINE:
@@ -656,6 +502,106 @@ static struct notifier_block cmm_mem_nb = {
        .priority = CMM_MEM_HOTPLUG_PRI
 };
 
+#ifdef CONFIG_BALLOON_COMPACTION
+static struct vfsmount *balloon_mnt;
+
+static int cmm_init_fs_context(struct fs_context *fc)
+{
+       return init_pseudo(fc, PPC_CMM_MAGIC) ? 0 : -ENOMEM;
+}
+
+static struct file_system_type balloon_fs = {
+       .name = "ppc-cmm",
+       .init_fs_context = cmm_init_fs_context,
+       .kill_sb = kill_anon_super,
+};
+
+static int cmm_migratepage(struct balloon_dev_info *b_dev_info,
+                          struct page *newpage, struct page *page,
+                          enum migrate_mode mode)
+{
+       unsigned long flags;
+
+       /*
+        * loan/"inflate" the newpage first.
+        *
+        * We might race against the cmm_thread who might discover after our
+        * loan request that another page is to be unloaned. However, once
+        * the cmm_thread runs again later, this error will automatically
+        * be corrected.
+        */
+       if (plpar_page_set_loaned(newpage)) {
+               /* Unlikely, but possible. Tell the caller not to retry now. */
+               pr_err_ratelimited("%s: Cannot set page to loaned.", __func__);
+               return -EBUSY;
+       }
+
+       /* balloon page list reference */
+       get_page(newpage);
+
+       spin_lock_irqsave(&b_dev_info->pages_lock, flags);
+       balloon_page_insert(b_dev_info, newpage);
+       balloon_page_delete(page);
+       b_dev_info->isolated_pages--;
+       spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
+
+       /*
+        * activate/"deflate" the old page. We ignore any errors just like the
+        * other callers.
+        */
+       plpar_page_set_active(page);
+
+       /* balloon page list reference */
+       put_page(page);
+
+       return MIGRATEPAGE_SUCCESS;
+}
+
+static int cmm_balloon_compaction_init(void)
+{
+       int rc;
+
+       balloon_devinfo_init(&b_dev_info);
+       b_dev_info.migratepage = cmm_migratepage;
+
+       balloon_mnt = kern_mount(&balloon_fs);
+       if (IS_ERR(balloon_mnt)) {
+               rc = PTR_ERR(balloon_mnt);
+               balloon_mnt = NULL;
+               return rc;
+       }
+
+       b_dev_info.inode = alloc_anon_inode(balloon_mnt->mnt_sb);
+       if (IS_ERR(b_dev_info.inode)) {
+               rc = PTR_ERR(b_dev_info.inode);
+               b_dev_info.inode = NULL;
+               kern_unmount(balloon_mnt);
+               balloon_mnt = NULL;
+               return rc;
+       }
+
+       b_dev_info.inode->i_mapping->a_ops = &balloon_aops;
+       return 0;
+}
+static void cmm_balloon_compaction_deinit(void)
+{
+       if (b_dev_info.inode)
+               iput(b_dev_info.inode);
+       b_dev_info.inode = NULL;
+       kern_unmount(balloon_mnt);
+       balloon_mnt = NULL;
+}
+#else /* CONFIG_BALLOON_COMPACTION */
+static int cmm_balloon_compaction_init(void)
+{
+       return 0;
+}
+
+static void cmm_balloon_compaction_deinit(void)
+{
+}
+#endif /* CONFIG_BALLOON_COMPACTION */
+
 /**
  * cmm_init - Module initialization
  *
@@ -664,26 +610,31 @@ static struct notifier_block cmm_mem_nb = {
  **/
 static int cmm_init(void)
 {
-       int rc = -ENOMEM;
+       int rc;
 
-       if (!firmware_has_feature(FW_FEATURE_CMO))
+       if (!firmware_has_feature(FW_FEATURE_CMO) && !simulate)
                return -EOPNOTSUPP;
 
-       if ((rc = register_oom_notifier(&cmm_oom_nb)) < 0)
+       rc = cmm_balloon_compaction_init();
+       if (rc)
                return rc;
 
+       rc = register_oom_notifier(&cmm_oom_nb);
+       if (rc < 0)
+               goto out_balloon_compaction;
+
        if ((rc = register_reboot_notifier(&cmm_reboot_nb)))
                goto out_oom_notifier;
 
        if ((rc = cmm_sysfs_register(&cmm_dev)))
                goto out_reboot_notifier;
 
-       if (register_memory_notifier(&cmm_mem_nb) ||
-           register_memory_isolate_notifier(&cmm_mem_isolate_nb))
+       rc = register_memory_notifier(&cmm_mem_nb);
+       if (rc)
                goto out_unregister_notifier;
 
        if (cmm_disabled)
-               return rc;
+               return 0;
 
        cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
        if (IS_ERR(cmm_thread_ptr)) {
@@ -691,16 +642,16 @@ static int cmm_init(void)
                goto out_unregister_notifier;
        }
 
-       return rc;
-
+       return 0;
 out_unregister_notifier:
        unregister_memory_notifier(&cmm_mem_nb);
-       unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
        cmm_unregister_sysfs(&cmm_dev);
 out_reboot_notifier:
        unregister_reboot_notifier(&cmm_reboot_nb);
 out_oom_notifier:
        unregister_oom_notifier(&cmm_oom_nb);
+out_balloon_compaction:
+       cmm_balloon_compaction_deinit();
        return rc;
 }
 
@@ -717,9 +668,9 @@ static void cmm_exit(void)
        unregister_oom_notifier(&cmm_oom_nb);
        unregister_reboot_notifier(&cmm_reboot_nb);
        unregister_memory_notifier(&cmm_mem_nb);
-       unregister_memory_isolate_notifier(&cmm_mem_isolate_nb);
-       cmm_free_pages(loaned_pages);
+       cmm_free_pages(atomic_long_read(&loaned_pages));
        cmm_unregister_sysfs(&cmm_dev);
+       cmm_balloon_compaction_deinit();
 }
 
 /**
@@ -739,7 +690,7 @@ static int cmm_set_disable(const char *val, const struct kernel_param *kp)
                if (cmm_thread_ptr)
                        kthread_stop(cmm_thread_ptr);
                cmm_thread_ptr = NULL;
-               cmm_free_pages(loaned_pages);
+               cmm_free_pages(atomic_long_read(&loaned_pages));
        } else if (!disable && cmm_disabled) {
                cmm_thread_ptr = kthread_run(cmm_thread, NULL, "cmmthread");
                if (IS_ERR(cmm_thread_ptr))
index bbda646..3e8cbfe 100644 (file)
@@ -338,6 +338,62 @@ static void pseries_remove_processor(struct device_node *np)
        cpu_maps_update_done();
 }
 
+static int dlpar_offline_cpu(struct device_node *dn)
+{
+       int rc = 0;
+       unsigned int cpu;
+       int len, nthreads, i;
+       const __be32 *intserv;
+       u32 thread;
+
+       intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
+       if (!intserv)
+               return -EINVAL;
+
+       nthreads = len / sizeof(u32);
+
+       cpu_maps_update_begin();
+       for (i = 0; i < nthreads; i++) {
+               thread = be32_to_cpu(intserv[i]);
+               for_each_present_cpu(cpu) {
+                       if (get_hard_smp_processor_id(cpu) != thread)
+                               continue;
+
+                       if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE)
+                               break;
+
+                       if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) {
+                               set_preferred_offline_state(cpu,
+                                                           CPU_STATE_OFFLINE);
+                               cpu_maps_update_done();
+                               timed_topology_update(1);
+                               rc = device_offline(get_cpu_device(cpu));
+                               if (rc)
+                                       goto out;
+                               cpu_maps_update_begin();
+                               break;
+                       }
+
+                       /*
+                        * The cpu is in CPU_STATE_INACTIVE.
+                        * Upgrade it's state to CPU_STATE_OFFLINE.
+                        */
+                       set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
+                       WARN_ON(plpar_hcall_norets(H_PROD, thread) != H_SUCCESS);
+                       __cpu_die(cpu);
+                       break;
+               }
+               if (cpu == num_possible_cpus()) {
+                       pr_warn("Could not find cpu to offline with physical id 0x%x\n",
+                               thread);
+               }
+       }
+       cpu_maps_update_done();
+
+out:
+       return rc;
+}
+
 static int dlpar_online_cpu(struct device_node *dn)
 {
        int rc = 0;
@@ -364,8 +420,10 @@ static int dlpar_online_cpu(struct device_node *dn)
                        timed_topology_update(1);
                        find_and_online_cpu_nid(cpu);
                        rc = device_online(get_cpu_device(cpu));
-                       if (rc)
+                       if (rc) {
+                               dlpar_offline_cpu(dn);
                                goto out;
+                       }
                        cpu_maps_update_begin();
 
                        break;
@@ -407,17 +465,67 @@ static bool dlpar_cpu_exists(struct device_node *parent, u32 drc_index)
        return found;
 }
 
+static bool drc_info_valid_index(struct device_node *parent, u32 drc_index)
+{
+       struct property *info;
+       struct of_drc_info drc;
+       const __be32 *value;
+       u32 index;
+       int count, i, j;
+
+       info = of_find_property(parent, "ibm,drc-info", NULL);
+       if (!info)
+               return false;
+
+       value = of_prop_next_u32(info, NULL, &count);
+
+       /* First value of ibm,drc-info is number of drc-info records */
+       if (value)
+               value++;
+       else
+               return false;
+
+       for (i = 0; i < count; i++) {
+               if (of_read_drc_info_cell(&info, &value, &drc))
+                       return false;
+
+               if (strncmp(drc.drc_type, "CPU", 3))
+                       break;
+
+               if (drc_index > drc.last_drc_index)
+                       continue;
+
+               index = drc.drc_index_start;
+               for (j = 0; j < drc.num_sequential_elems; j++) {
+                       if (drc_index == index)
+                               return true;
+
+                       index += drc.sequential_inc;
+               }
+       }
+
+       return false;
+}
+
 static bool valid_cpu_drc_index(struct device_node *parent, u32 drc_index)
 {
        bool found = false;
        int rc, index;
 
-       index = 0;
+       if (of_find_property(parent, "ibm,drc-info", NULL))
+               return drc_info_valid_index(parent, drc_index);
+
+       /* Note that the format of the ibm,drc-indexes array is
+        * the number of entries in the array followed by the array
+        * of drc values so we start looking at index = 1.
+        */
+       index = 1;
        while (!found) {
                u32 drc;
 
                rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
                                                index++, &drc);
+
                if (rc)
                        break;
 
@@ -505,63 +613,6 @@ static ssize_t dlpar_cpu_add(u32 drc_index)
        return rc;
 }
 
-static int dlpar_offline_cpu(struct device_node *dn)
-{
-       int rc = 0;
-       unsigned int cpu;
-       int len, nthreads, i;
-       const __be32 *intserv;
-       u32 thread;
-
-       intserv = of_get_property(dn, "ibm,ppc-interrupt-server#s", &len);
-       if (!intserv)
-               return -EINVAL;
-
-       nthreads = len / sizeof(u32);
-
-       cpu_maps_update_begin();
-       for (i = 0; i < nthreads; i++) {
-               thread = be32_to_cpu(intserv[i]);
-               for_each_present_cpu(cpu) {
-                       if (get_hard_smp_processor_id(cpu) != thread)
-                               continue;
-
-                       if (get_cpu_current_state(cpu) == CPU_STATE_OFFLINE)
-                               break;
-
-                       if (get_cpu_current_state(cpu) == CPU_STATE_ONLINE) {
-                               set_preferred_offline_state(cpu,
-                                                           CPU_STATE_OFFLINE);
-                               cpu_maps_update_done();
-                               timed_topology_update(1);
-                               rc = device_offline(get_cpu_device(cpu));
-                               if (rc)
-                                       goto out;
-                               cpu_maps_update_begin();
-                               break;
-
-                       }
-
-                       /*
-                        * The cpu is in CPU_STATE_INACTIVE.
-                        * Upgrade it's state to CPU_STATE_OFFLINE.
-                        */
-                       set_preferred_offline_state(cpu, CPU_STATE_OFFLINE);
-                       BUG_ON(plpar_hcall_norets(H_PROD, thread)
-                                                               != H_SUCCESS);
-                       __cpu_die(cpu);
-                       break;
-               }
-               if (cpu == num_possible_cpus())
-                       printk(KERN_WARNING "Could not find cpu to offline with physical id 0x%x\n", thread);
-       }
-       cpu_maps_update_done();
-
-out:
-       return rc;
-
-}
-
 static ssize_t dlpar_cpu_remove(struct device_node *dn, u32 drc_index)
 {
        int rc;
@@ -717,19 +768,52 @@ static int dlpar_cpu_remove_by_count(u32 cpus_to_remove)
        return rc;
 }
 
-static int find_dlpar_cpus_to_add(u32 *cpu_drcs, u32 cpus_to_add)
+static int find_drc_info_cpus_to_add(struct device_node *cpus,
+                                    struct property *info,
+                                    u32 *cpu_drcs, u32 cpus_to_add)
 {
-       struct device_node *parent;
+       struct of_drc_info drc;
+       const __be32 *value;
+       u32 count, drc_index;
        int cpus_found = 0;
-       int index, rc;
+       int i, j;
 
-       parent = of_find_node_by_path("/cpus");
-       if (!parent) {
-               pr_warn("Could not find CPU root node in device tree\n");
-               kfree(cpu_drcs);
+       if (!info)
                return -1;
+
+       value = of_prop_next_u32(info, NULL, &count);
+       if (value)
+               value++;
+
+       for (i = 0; i < count; i++) {
+               of_read_drc_info_cell(&info, &value, &drc);
+               if (strncmp(drc.drc_type, "CPU", 3))
+                       break;
+
+               drc_index = drc.drc_index_start;
+               for (j = 0; j < drc.num_sequential_elems; j++) {
+                       if (dlpar_cpu_exists(cpus, drc_index))
+                               continue;
+
+                       cpu_drcs[cpus_found++] = drc_index;
+
+                       if (cpus_found == cpus_to_add)
+                               return cpus_found;
+
+                       drc_index += drc.sequential_inc;
+               }
        }
 
+       return cpus_found;
+}
+
+static int find_drc_index_cpus_to_add(struct device_node *cpus,
+                                     u32 *cpu_drcs, u32 cpus_to_add)
+{
+       int cpus_found = 0;
+       int index, rc;
+       u32 drc_index;
+
        /* Search the ibm,drc-indexes array for possible CPU drcs to
         * add. Note that the format of the ibm,drc-indexes array is
         * the number of entries in the array followed by the array
@@ -737,25 +821,25 @@ static int find_dlpar_cpus_to_add(u32 *cpu_drcs, u32 cpus_to_add)
         */
        index = 1;
        while (cpus_found < cpus_to_add) {
-               u32 drc;
+               rc = of_property_read_u32_index(cpus, "ibm,drc-indexes",
+                                               index++, &drc_index);
 
-               rc = of_property_read_u32_index(parent, "ibm,drc-indexes",
-                                               index++, &drc);
                if (rc)
                        break;
 
-               if (dlpar_cpu_exists(parent, drc))
+               if (dlpar_cpu_exists(cpus, drc_index))
                        continue;
 
-               cpu_drcs[cpus_found++] = drc;
+               cpu_drcs[cpus_found++] = drc_index;
        }
 
-       of_node_put(parent);
        return cpus_found;
 }
 
 static int dlpar_cpu_add_by_count(u32 cpus_to_add)
 {
+       struct device_node *parent;
+       struct property *info;
        u32 *cpu_drcs;
        int cpus_added = 0;
        int cpus_found;
@@ -767,7 +851,21 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add)
        if (!cpu_drcs)
                return -EINVAL;
 
-       cpus_found = find_dlpar_cpus_to_add(cpu_drcs, cpus_to_add);
+       parent = of_find_node_by_path("/cpus");
+       if (!parent) {
+               pr_warn("Could not find CPU root node in device tree\n");
+               kfree(cpu_drcs);
+               return -1;
+       }
+
+       info = of_find_property(parent, "ibm,drc-info", NULL);
+       if (info)
+               cpus_found = find_drc_info_cpus_to_add(parent, info, cpu_drcs, cpus_to_add);
+       else
+               cpus_found = find_drc_index_cpus_to_add(parent, cpu_drcs, cpus_to_add);
+
+       of_node_put(parent);
+
        if (cpus_found < cpus_to_add) {
                pr_warn("Failed to find enough CPUs (%d of %d) to add\n",
                        cpus_found, cpus_to_add);
index 8e70039..c126b94 100644 (file)
@@ -338,7 +338,7 @@ static int pseries_remove_mem_node(struct device_node *np)
 static bool lmb_is_removable(struct drmem_lmb *lmb)
 {
        int i, scns_per_block;
-       int rc = 1;
+       bool rc = true;
        unsigned long pfn, block_sz;
        u64 phys_addr;
 
@@ -363,11 +363,11 @@ static bool lmb_is_removable(struct drmem_lmb *lmb)
                if (!pfn_present(pfn))
                        continue;
 
-               rc &= is_mem_section_removable(pfn, PAGES_PER_SECTION);
+               rc = rc && is_mem_section_removable(pfn, PAGES_PER_SECTION);
                phys_addr += MIN_MEMORY_BLOCK_SIZE;
        }
 
-       return rc ? true : false;
+       return rc;
 }
 
 static int dlpar_add_lmb(struct drmem_lmb *);
index f9f57c5..60cb29a 100644 (file)
@@ -774,7 +774,7 @@ static long pSeries_lpar_hpte_remove(unsigned long hpte_group)
 
                /* don't remove a bolted entry */
                lpar_rc = plpar_pte_remove(H_ANDCOND, hpte_group + slot_offset,
-                                          (0x1UL << 4), &dummy1, &dummy2);
+                                          HPTE_V_BOLTED, &dummy1, &dummy2);
                if (lpar_rc == H_SUCCESS)
                        return i;
 
@@ -938,11 +938,19 @@ static long pSeries_lpar_hpte_find(unsigned long vpn, int psize, int ssize)
        hash = hpt_hash(vpn, mmu_psize_defs[psize].shift, ssize);
        want_v = hpte_encode_avpn(vpn, psize, ssize);
 
-       /* Bolted entries are always in the primary group */
+       /*
+        * We try to keep bolted entries always in primary hash
+        * But in some case we can find them in secondary too.
+        */
        hpte_group = (hash & htab_hash_mask) * HPTES_PER_GROUP;
        slot = __pSeries_lpar_hpte_find(want_v, hpte_group);
-       if (slot < 0)
-               return -1;
+       if (slot < 0) {
+               /* Try in secondary */
+               hpte_group = (~hash & htab_hash_mask) * HPTES_PER_GROUP;
+               slot = __pSeries_lpar_hpte_find(want_v, hpte_group);
+               if (slot < 0)
+                       return -1;
+       }
        return hpte_group + slot;
 }
 
@@ -1992,7 +2000,7 @@ static int __init vpa_debugfs_init(void)
 {
        char name[16];
        long i;
-       static struct dentry *vpa_dir;
+       struct dentry *vpa_dir;
 
        if (!firmware_has_feature(FW_FEATURE_SPLPAR))
                return 0;
index 6df192f..66dfd82 100644 (file)
@@ -45,14 +45,14 @@ struct device_node *pseries_of_derive_parent(const char *path)
 int of_read_drc_info_cell(struct property **prop, const __be32 **curval,
                        struct of_drc_info *data)
 {
-       const char *p;
+       const char *p = (char *)(*curval);
        const __be32 *p2;
 
        if (!data)
                return -EINVAL;
 
        /* Get drc-type:encode-string */
-       p = data->drc_type = (char*) (*curval);
+       data->drc_type = (char *)p;
        p = of_prop_next_string(*prop, p);
        if (!p)
                return -EINVAL;
@@ -65,9 +65,7 @@ int of_read_drc_info_cell(struct property **prop, const __be32 **curval,
 
        /* Get drc-index-start:encode-int */
        p2 = (const __be32 *)p;
-       p2 = of_prop_next_u32(*prop, p2, &data->drc_index_start);
-       if (!p2)
-               return -EINVAL;
+       data->drc_index_start = be32_to_cpu(*p2);
 
        /* Get drc-name-suffix-start:encode-int */
        p2 = of_prop_next_u32(*prop, p2, &data->drc_name_suffix_start);
index 6188329..f87b474 100644 (file)
@@ -152,7 +152,7 @@ static int papr_scm_meta_get(struct papr_scm_priv *p,
        int len, read;
        int64_t ret;
 
-       if ((hdr->in_offset + hdr->in_length) >= p->metadata_size)
+       if ((hdr->in_offset + hdr->in_length) > p->metadata_size)
                return -EINVAL;
 
        for (len = hdr->in_length; len; len -= read) {
@@ -206,7 +206,7 @@ static int papr_scm_meta_set(struct papr_scm_priv *p,
        __be64 data_be;
        int64_t ret;
 
-       if ((hdr->in_offset + hdr->in_length) >= p->metadata_size)
+       if ((hdr->in_offset + hdr->in_length) > p->metadata_size)
                return -EINVAL;
 
        for (len = hdr->in_length; len; len -= wrote) {
@@ -513,7 +513,6 @@ static struct platform_driver papr_scm_driver = {
        .remove = papr_scm_remove,
        .driver = {
                .name = "papr_scm",
-               .owner = THIS_MODULE,
                .of_match_table = papr_scm_match,
        },
 };
index 561917f..361986e 100644 (file)
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
 /*
  * PCI Dynamic LPAR, PCI Hot Plug and PCI EEH recovery code
  * for RPA-compliant PPC64 platform.
@@ -6,23 +7,6 @@
  *
  * Updates, 2005, John Rose <johnrose@austin.ibm.com>
  * Updates, 2005, Linas Vepstas <linas@austin.ibm.com>
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #include <linux/pci.h>
index a96874f..09e98d3 100644 (file)
@@ -36,6 +36,7 @@ static int sysfs_entries;
 static u32 cpu_to_drc_index(int cpu)
 {
        struct device_node *dn = NULL;
+       struct property *info;
        int thread_index;
        int rc = 1;
        u32 ret = 0;
@@ -47,20 +48,18 @@ static u32 cpu_to_drc_index(int cpu)
        /* Convert logical cpu number to core number */
        thread_index = cpu_core_index_of_thread(cpu);
 
-       if (firmware_has_feature(FW_FEATURE_DRC_INFO)) {
-               struct property *info = NULL;
+       info = of_find_property(dn, "ibm,drc-info", NULL);
+       if (info) {
                struct of_drc_info drc;
                int j;
                u32 num_set_entries;
                const __be32 *value;
 
-               info = of_find_property(dn, "ibm,drc-info", NULL);
-               if (info == NULL)
-                       goto err_of_node_put;
-
                value = of_prop_next_u32(info, NULL, &num_set_entries);
                if (!value)
                        goto err_of_node_put;
+               else
+                       value++;
 
                for (j = 0; j < num_set_entries; j++) {
 
@@ -110,6 +109,7 @@ err:
 static int drc_index_to_cpu(u32 drc_index)
 {
        struct device_node *dn = NULL;
+       struct property *info;
        const int *indexes;
        int thread_index = 0, cpu = 0;
        int rc = 1;
@@ -117,21 +117,18 @@ static int drc_index_to_cpu(u32 drc_index)
        dn = of_find_node_by_path("/cpus");
        if (dn == NULL)
                goto err;
-
-       if (firmware_has_feature(FW_FEATURE_DRC_INFO)) {
-               struct property *info = NULL;
+       info = of_find_property(dn, "ibm,drc-info", NULL);
+       if (info) {
                struct of_drc_info drc;
                int j;
                u32 num_set_entries;
                const __be32 *value;
 
-               info = of_find_property(dn, "ibm,drc-info", NULL);
-               if (info == NULL)
-                       goto err_of_node_put;
-
                value = of_prop_next_u32(info, NULL, &num_set_entries);
                if (!value)
                        goto err_of_node_put;
+               else
+                       value++;
 
                for (j = 0; j < num_set_entries; j++) {
 
index 3acdcc3..1d7f973 100644 (file)
@@ -255,7 +255,7 @@ static void rtas_parse_epow_errlog(struct rtas_error_log *log)
                break;
 
        case EPOW_SYSTEM_SHUTDOWN:
-               handle_system_shutdown(epow_log->event_modifier);
+               handle_system_shutdown(modifier);
                break;
 
        case EPOW_SYSTEM_HALT:
index 603b3c6..cb5a5bd 100644 (file)
@@ -24,7 +24,6 @@ obj-$(CONFIG_FSL_CORENET_RCPM)        += fsl_rcpm.o
 obj-$(CONFIG_FSL_LBC)          += fsl_lbc.o
 obj-$(CONFIG_FSL_GTM)          += fsl_gtm.o
 obj-$(CONFIG_FSL_85XX_CACHE_SRAM)      += fsl_85xx_l2ctlr.o fsl_85xx_cache_sram.o
-obj-$(CONFIG_SIMPLE_GPIO)      += simple_gpio.o
 obj-$(CONFIG_FSL_RIO)          += fsl_rio.o fsl_rmu.o
 obj-$(CONFIG_TSI108_BRIDGE)    += tsi108_pci.o tsi108_dev.o
 obj-$(CONFIG_RTC_DRV_CMOS)     += rtc_cmos_setup.o
diff --git a/arch/powerpc/sysdev/simple_gpio.c b/arch/powerpc/sysdev/simple_gpio.c
deleted file mode 100644 (file)
index dc1740c..0000000
+++ /dev/null
@@ -1,143 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Simple Memory-Mapped GPIOs
- *
- * Copyright (c) MontaVista Software, Inc. 2008.
- *
- * Author: Anton Vorontsov <avorontsov@ru.mvista.com>
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/spinlock.h>
-#include <linux/types.h>
-#include <linux/ioport.h>
-#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_gpio.h>
-#include <linux/gpio/driver.h>
-#include <linux/slab.h>
-#include <asm/prom.h>
-#include "simple_gpio.h"
-
-struct u8_gpio_chip {
-       struct of_mm_gpio_chip mm_gc;
-       spinlock_t lock;
-
-       /* shadowed data register to clear/set bits safely */
-       u8 data;
-};
-
-static u8 u8_pin2mask(unsigned int pin)
-{
-       return 1 << (8 - 1 - pin);
-}
-
-static int u8_gpio_get(struct gpio_chip *gc, unsigned int gpio)
-{
-       struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
-
-       return !!(in_8(mm_gc->regs) & u8_pin2mask(gpio));
-}
-
-static void u8_gpio_set(struct gpio_chip *gc, unsigned int gpio, int val)
-{
-       struct of_mm_gpio_chip *mm_gc = to_of_mm_gpio_chip(gc);
-       struct u8_gpio_chip *u8_gc = gpiochip_get_data(gc);
-       unsigned long flags;
-
-       spin_lock_irqsave(&u8_gc->lock, flags);
-
-       if (val)
-               u8_gc->data |= u8_pin2mask(gpio);
-       else
-               u8_gc->data &= ~u8_pin2mask(gpio);
-
-       out_8(mm_gc->regs, u8_gc->data);
-
-       spin_unlock_irqrestore(&u8_gc->lock, flags);
-}
-
-static int u8_gpio_dir_in(struct gpio_chip *gc, unsigned int gpio)
-{
-       return 0;
-}
-
-static int u8_gpio_dir_out(struct gpio_chip *gc, unsigned int gpio, int val)
-{
-       u8_gpio_set(gc, gpio, val);
-       return 0;
-}
-
-static void u8_gpio_save_regs(struct of_mm_gpio_chip *mm_gc)
-{
-       struct u8_gpio_chip *u8_gc =
-               container_of(mm_gc, struct u8_gpio_chip, mm_gc);
-
-       u8_gc->data = in_8(mm_gc->regs);
-}
-
-static int __init u8_simple_gpiochip_add(struct device_node *np)
-{
-       int ret;
-       struct u8_gpio_chip *u8_gc;
-       struct of_mm_gpio_chip *mm_gc;
-       struct gpio_chip *gc;
-
-       u8_gc = kzalloc(sizeof(*u8_gc), GFP_KERNEL);
-       if (!u8_gc)
-               return -ENOMEM;
-
-       spin_lock_init(&u8_gc->lock);
-
-       mm_gc = &u8_gc->mm_gc;
-       gc = &mm_gc->gc;
-
-       mm_gc->save_regs = u8_gpio_save_regs;
-       gc->ngpio = 8;
-       gc->direction_input = u8_gpio_dir_in;
-       gc->direction_output = u8_gpio_dir_out;
-       gc->get = u8_gpio_get;
-       gc->set = u8_gpio_set;
-
-       ret = of_mm_gpiochip_add_data(np, mm_gc, u8_gc);
-       if (ret)
-               goto err;
-       return 0;
-err:
-       kfree(u8_gc);
-       return ret;
-}
-
-void __init simple_gpiochip_init(const char *compatible)
-{
-       struct device_node *np;
-
-       for_each_compatible_node(np, NULL, compatible) {
-               int ret;
-               struct resource r;
-
-               ret = of_address_to_resource(np, 0, &r);
-               if (ret)
-                       goto err;
-
-               switch (resource_size(&r)) {
-               case 1:
-                       ret = u8_simple_gpiochip_add(np);
-                       if (ret)
-                               goto err;
-                       break;
-               default:
-                       /*
-                        * Whenever you need support for GPIO bank width > 1,
-                        * please just turn u8_ code into huge macros, and
-                        * construct needed uX_ code with it.
-                        */
-                       ret = -ENOSYS;
-                       goto err;
-               }
-               continue;
-err:
-               pr_err("%pOF: registration failed, status %d\n", np, ret);
-       }
-}
diff --git a/arch/powerpc/sysdev/simple_gpio.h b/arch/powerpc/sysdev/simple_gpio.h
deleted file mode 100644 (file)
index f3f3a20..0000000
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __SYSDEV_SIMPLE_GPIO_H
-#define __SYSDEV_SIMPLE_GPIO_H
-
-#include <linux/errno.h>
-
-#ifdef CONFIG_SIMPLE_GPIO
-extern void simple_gpiochip_init(const char *compatible);
-#else
-static inline void simple_gpiochip_init(const char *compatible) {}
-#endif /* CONFIG_SIMPLE_GPIO */
-
-#endif /* __SYSDEV_SIMPLE_GPIO_H */
index df832b0..f5fadbd 100644 (file)
@@ -1035,6 +1035,15 @@ static int xive_irq_alloc_data(unsigned int virq, irq_hw_number_t hw)
        xd->target = XIVE_INVALID_TARGET;
        irq_set_handler_data(virq, xd);
 
+       /*
+        * Turn OFF by default the interrupt being mapped. A side
+        * effect of this check is the mapping the ESB page of the
+        * interrupt in the Linux address space. This prevents page
+        * fault issues in the crash handler which masks all
+        * interrupts.
+        */
+       xive_esb_read(xd, XIVE_ESB_SET_PQ_01);
+
        return 0;
 }
 
index 2b4e959..7b9fe0a 100755 (executable)
@@ -20,7 +20,7 @@ objdump="$1"
 vmlinux="$2"
 
 bad_relocs=$(
-"$objdump" -R "$vmlinux" |
+$objdump -R "$vmlinux" |
        # Only look at relocation lines.
        grep -E '\<R_' |
        # These relocations are okay
index 1e972df..7711475 100755 (executable)
@@ -18,14 +18,14 @@ vmlinux="$2"
 #__end_interrupts should be located within the first 64K
 
 end_intr=0x$(
-"$objdump" -R "$vmlinux" -d --start-address=0xc000000000000000         \
+$objdump -R "$vmlinux" -d --start-address=0xc000000000000000           \
                 --stop-address=0xc000000000010000 |
 grep '\<__end_interrupts>:' |
 awk '{print $1}'
 )
 
 BRANCHES=$(
-"$objdump" -R "$vmlinux" -D --start-address=0xc000000000000000         \
+$objdump -R "$vmlinux" -D --start-address=0xc000000000000000           \
                --stop-address=${end_intr} |
 grep -e "^c[0-9a-f]*:[[:space:]]*\([0-9a-f][0-9a-f][[:space:]]\)\{4\}[[:space:]]*b" |
 grep -v '\<__start_initialization_multiplatform>' |
index f142570..c3842db 100644 (file)
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: GPL-2.0
 # Makefile for xmon
 
-# Disable clang warning for using setjmp without setjmp.h header
-subdir-ccflags-y := $(call cc-disable-warning, builtin-requires-header)
+# Avoid clang warnings around longjmp/setjmp declarations
+subdir-ccflags-y := -ffreestanding
 
 GCOV_PROFILE := n
 KCOV_INSTRUMENT := n
index d83364e..a705604 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/nmi.h>
 #include <linux/ctype.h>
 #include <linux/highmem.h>
+#include <linux/security.h>
 
 #include <asm/debugfs.h>
 #include <asm/ptrace.h>
@@ -187,6 +188,8 @@ static void dump_tlb_44x(void);
 static void dump_tlb_book3e(void);
 #endif
 
+static void clear_all_bpt(void);
+
 #ifdef CONFIG_PPC64
 #define REG            "%.16lx"
 #else
@@ -283,10 +286,38 @@ Commands:\n\
 "  U   show uptime information\n"
 "  ?   help\n"
 "  # n limit output to n lines per page (for dp, dpa, dl)\n"
-"  zr  reboot\n\
-  zh   halt\n"
+"  zr  reboot\n"
+"  zh  halt\n"
 ;
 
+#ifdef CONFIG_SECURITY
+static bool xmon_is_locked_down(void)
+{
+       static bool lockdown;
+
+       if (!lockdown) {
+               lockdown = !!security_locked_down(LOCKDOWN_XMON_RW);
+               if (lockdown) {
+                       printf("xmon: Disabled due to kernel lockdown\n");
+                       xmon_is_ro = true;
+               }
+       }
+
+       if (!xmon_is_ro) {
+               xmon_is_ro = !!security_locked_down(LOCKDOWN_XMON_WR);
+               if (xmon_is_ro)
+                       printf("xmon: Read-only due to kernel lockdown\n");
+       }
+
+       return lockdown;
+}
+#else /* CONFIG_SECURITY */
+static inline bool xmon_is_locked_down(void)
+{
+       return false;
+}
+#endif
+
 static struct pt_regs *xmon_regs;
 
 static inline void sync(void)
@@ -438,7 +469,10 @@ static bool wait_for_other_cpus(int ncpus)
 
        return false;
 }
-#endif /* CONFIG_SMP */
+#else /* CONFIG_SMP */
+static inline void get_output_lock(void) {}
+static inline void release_output_lock(void) {}
+#endif
 
 static inline int unrecoverable_excp(struct pt_regs *regs)
 {
@@ -455,6 +489,7 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
        int cmd = 0;
        struct bpt *bp;
        long recurse_jmp[JMP_BUF_LEN];
+       bool locked_down;
        unsigned long offset;
        unsigned long flags;
 #ifdef CONFIG_SMP
@@ -465,6 +500,8 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
        local_irq_save(flags);
        hard_irq_disable();
 
+       locked_down = xmon_is_locked_down();
+
        if (!fromipi) {
                tracing_enabled = tracing_is_on();
                tracing_off();
@@ -518,7 +555,8 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
 
        if (!fromipi) {
                get_output_lock();
-               excprint(regs);
+               if (!locked_down)
+                       excprint(regs);
                if (bp) {
                        printf("cpu 0x%x stopped at breakpoint 0x%tx (",
                               cpu, BP_NUM(bp));
@@ -570,10 +608,14 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
                }
                remove_bpts();
                disable_surveillance();
-               /* for breakpoint or single step, print the current instr. */
-               if (bp || TRAP(regs) == 0xd00)
-                       ppc_inst_dump(regs->nip, 1, 0);
-               printf("enter ? for help\n");
+
+               if (!locked_down) {
+                       /* for breakpoint or single step, print curr insn */
+                       if (bp || TRAP(regs) == 0xd00)
+                               ppc_inst_dump(regs->nip, 1, 0);
+                       printf("enter ? for help\n");
+               }
+
                mb();
                xmon_gate = 1;
                barrier();
@@ -597,8 +639,9 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
                        spin_cpu_relax();
                        touch_nmi_watchdog();
                } else {
-                       cmd = cmds(regs);
-                       if (cmd != 0) {
+                       if (!locked_down)
+                               cmd = cmds(regs);
+                       if (locked_down || cmd != 0) {
                                /* exiting xmon */
                                insert_bpts();
                                xmon_gate = 0;
@@ -635,13 +678,16 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
                               "can't continue\n");
                remove_bpts();
                disable_surveillance();
-               /* for breakpoint or single step, print the current instr. */
-               if (bp || TRAP(regs) == 0xd00)
-                       ppc_inst_dump(regs->nip, 1, 0);
-               printf("enter ? for help\n");
+               if (!locked_down) {
+                       /* for breakpoint or single step, print current insn */
+                       if (bp || TRAP(regs) == 0xd00)
+                               ppc_inst_dump(regs->nip, 1, 0);
+                       printf("enter ? for help\n");
+               }
        }
 
-       cmd = cmds(regs);
+       if (!locked_down)
+               cmd = cmds(regs);
 
        insert_bpts();
        in_xmon = 0;
@@ -670,7 +716,10 @@ static int xmon_core(struct pt_regs *regs, int fromipi)
                }
        }
 #endif
-       insert_cpu_bpts();
+       if (locked_down)
+               clear_all_bpt();
+       else
+               insert_cpu_bpts();
 
        touch_nmi_watchdog();
        local_irq_restore(flags);
@@ -884,7 +933,7 @@ static void insert_cpu_bpts(void)
        if (dabr.enabled) {
                brk.address = dabr.address;
                brk.type = (dabr.enabled & HW_BRK_TYPE_DABR) | HW_BRK_TYPE_PRIV_ALL;
-               brk.len = 8;
+               brk.len = DABR_MAX_LEN;
                __set_breakpoint(&brk);
        }
 
@@ -1047,10 +1096,6 @@ cmds(struct pt_regs *excp)
                        set_lpp_cmd();
                        break;
                case 'b':
-                       if (xmon_is_ro) {
-                               printf(xmon_ro_msg);
-                               break;
-                       }
                        bpt_cmds();
                        break;
                case 'C':
@@ -1319,11 +1364,16 @@ bpt_cmds(void)
        struct bpt *bp;
 
        cmd = inchar();
+
        switch (cmd) {
 #ifndef CONFIG_PPC_8xx
        static const char badaddr[] = "Only kernel addresses are permitted for breakpoints\n";
        int mode;
        case 'd':       /* bd - hardware data breakpoint */
+               if (xmon_is_ro) {
+                       printf(xmon_ro_msg);
+                       break;
+               }
                if (!ppc_breakpoint_available()) {
                        printf("Hardware data breakpoint not supported on this cpu\n");
                        break;
@@ -1351,6 +1401,10 @@ bpt_cmds(void)
                break;
 
        case 'i':       /* bi - hardware instr breakpoint */
+               if (xmon_is_ro) {
+                       printf(xmon_ro_msg);
+                       break;
+               }
                if (!cpu_has_feature(CPU_FTR_ARCH_207S)) {
                        printf("Hardware instruction breakpoint "
                               "not supported on this cpu\n");
@@ -1409,7 +1463,8 @@ bpt_cmds(void)
                        break;
                }
                termch = cmd;
-               if (!scanhex(&a)) {
+
+               if (xmon_is_ro || !scanhex(&a)) {
                        /* print all breakpoints */
                        printf("   type            address\n");
                        if (dabr.enabled) {
@@ -3762,6 +3817,11 @@ static void xmon_init(int enable)
 #ifdef CONFIG_MAGIC_SYSRQ
 static void sysrq_handle_xmon(int key)
 {
+       if (xmon_is_locked_down()) {
+               clear_all_bpt();
+               xmon_init(0);
+               return;
+       }
        /* ensure xmon is enabled */
        xmon_init(1);
        debugger(get_irq_regs());
@@ -3783,7 +3843,6 @@ static int __init setup_xmon_sysrq(void)
 device_initcall(setup_xmon_sysrq);
 #endif /* CONFIG_MAGIC_SYSRQ */
 
-#ifdef CONFIG_DEBUG_FS
 static void clear_all_bpt(void)
 {
        int i;
@@ -3801,18 +3860,22 @@ static void clear_all_bpt(void)
                iabr = NULL;
                dabr.enabled = 0;
        }
-
-       printf("xmon: All breakpoints cleared\n");
 }
 
+#ifdef CONFIG_DEBUG_FS
 static int xmon_dbgfs_set(void *data, u64 val)
 {
        xmon_on = !!val;
        xmon_init(xmon_on);
 
        /* make sure all breakpoints removed when disabling */
-       if (!xmon_on)
+       if (!xmon_on) {
                clear_all_bpt();
+               get_output_lock();
+               printf("xmon: All breakpoints cleared\n");
+               release_output_lock();
+       }
+
        return 0;
 }
 
@@ -3838,7 +3901,11 @@ static int xmon_early __initdata;
 
 static int __init early_parse_xmon(char *p)
 {
-       if (!p || strncmp(p, "early", 5) == 0) {
+       if (xmon_is_locked_down()) {
+               xmon_init(0);
+               xmon_early = 0;
+               xmon_on = 0;
+       } else if (!p || strncmp(p, "early", 5) == 0) {
                /* just "xmon" is equivalent to "xmon=early" */
                xmon_init(1);
                xmon_early = 1;
index ec00bf3..6514f9e 100644 (file)
@@ -135,6 +135,15 @@ int is_hash_blacklisted(const u8 *hash, size_t hash_len, const char *type)
 }
 EXPORT_SYMBOL_GPL(is_hash_blacklisted);
 
+int is_binary_blacklisted(const u8 *hash, size_t hash_len)
+{
+       if (is_hash_blacklisted(hash, hash_len, "bin") == -EKEYREJECTED)
+               return -EPERM;
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(is_binary_blacklisted);
+
 /*
  * Initialise the blacklist
  */
index 400960c..b1314d1 100644 (file)
@@ -147,7 +147,8 @@ static struct miscdevice anslcd_dev = {
        &anslcd_fops
 };
 
-const char anslcd_logo[] =     "********************"  /* Line #1 */
+static const char anslcd_logo[] __initconst =
+                               "********************"  /* Line #1 */
                                "*      LINUX!      *"  /* Line #3 */
                                "*    Welcome to    *"  /* Line #2 */
                                "********************"; /* Line #4 */
index 18627bb..951f7f2 100644 (file)
@@ -154,11 +154,11 @@ static enum pci_bus_speed get_max_bus_speed(struct slot *slot)
        return speed;
 }
 
-static int get_children_props(struct device_node *dn, const int **drc_indexes,
-               const int **drc_names, const int **drc_types,
-               const int **drc_power_domains)
+static int get_children_props(struct device_node *dn, const __be32 **drc_indexes,
+                             const __be32 **drc_names, const __be32 **drc_types,
+                             const __be32 **drc_power_domains)
 {
-       const int *indexes, *names, *types, *domains;
+       const __be32 *indexes, *names, *types, *domains;
 
        indexes = of_get_property(dn, "ibm,drc-indexes", NULL);
        names = of_get_property(dn, "ibm,drc-names", NULL);
@@ -194,8 +194,8 @@ static int rpaphp_check_drc_props_v1(struct device_node *dn, char *drc_name,
                                char *drc_type, unsigned int my_index)
 {
        char *name_tmp, *type_tmp;
-       const int *indexes, *names;
-       const int *types, *domains;
+       const __be32 *indexes, *names;
+       const __be32 *types, *domains;
        int i, rc;
 
        rc = get_children_props(dn->parent, &indexes, &names, &types, &domains);
@@ -208,7 +208,7 @@ static int rpaphp_check_drc_props_v1(struct device_node *dn, char *drc_name,
 
        /* Iterate through parent properties, looking for my-drc-index */
        for (i = 0; i < be32_to_cpu(indexes[0]); i++) {
-               if ((unsigned int) indexes[i + 1] == my_index)
+               if (be32_to_cpu(indexes[i + 1]) == my_index)
                        break;
 
                name_tmp += (strlen(name_tmp) + 1);
@@ -239,6 +239,8 @@ static int rpaphp_check_drc_props_v2(struct device_node *dn, char *drc_name,
        value = of_prop_next_u32(info, NULL, &entries);
        if (!value)
                return -EINVAL;
+       else
+               value++;
 
        for (j = 0; j < entries; j++) {
                of_read_drc_info_cell(&info, &value, &drc);
@@ -246,9 +248,10 @@ static int rpaphp_check_drc_props_v2(struct device_node *dn, char *drc_name,
                /* Should now know end of current entry */
 
                /* Found it */
-               if (my_index <= drc.last_drc_index) {
+               if (my_index >= drc.drc_index_start && my_index <= drc.last_drc_index) {
+                       int index = my_index - drc.drc_index_start;
                        sprintf(cell_drc_name, "%s%d", drc.drc_name_prefix,
-                               my_index);
+                               drc.drc_name_suffix_start + index);
                        break;
                }
        }
@@ -265,7 +268,7 @@ static int rpaphp_check_drc_props_v2(struct device_node *dn, char *drc_name,
 int rpaphp_check_drc_props(struct device_node *dn, char *drc_name,
                        char *drc_type)
 {
-       const unsigned int *my_index;
+       const __be32 *my_index;
 
        my_index = of_get_property(dn, "ibm,my-drc-index", NULL);
        if (!my_index) {
@@ -273,12 +276,12 @@ int rpaphp_check_drc_props(struct device_node *dn, char *drc_name,
                return -EINVAL;
        }
 
-       if (firmware_has_feature(FW_FEATURE_DRC_INFO))
+       if (of_find_property(dn->parent, "ibm,drc-info", NULL))
                return rpaphp_check_drc_props_v2(dn, drc_name, drc_type,
-                                               *my_index);
+                                               be32_to_cpu(*my_index));
        else
                return rpaphp_check_drc_props_v1(dn, drc_name, drc_type,
-                                               *my_index);
+                                               be32_to_cpu(*my_index));
 }
 EXPORT_SYMBOL_GPL(rpaphp_check_drc_props);
 
@@ -309,10 +312,11 @@ static int is_php_type(char *drc_type)
  * for built-in pci slots (even when the built-in slots are
  * dlparable.)
  */
-static int is_php_dn(struct device_node *dn, const int **indexes,
-               const int **names, const int **types, const int **power_domains)
+static int is_php_dn(struct device_node *dn, const __be32 **indexes,
+                    const __be32 **names, const __be32 **types,
+                    const __be32 **power_domains)
 {
-       const int *drc_types;
+       const __be32 *drc_types;
        int rc;
 
        rc = get_children_props(dn, indexes, names, &drc_types, power_domains);
@@ -326,33 +330,55 @@ static int is_php_dn(struct device_node *dn, const int **indexes,
        return 1;
 }
 
-/**
- * rpaphp_add_slot -- declare a hotplug slot to the hotplug subsystem.
- * @dn: device node of slot
- *
- * This subroutine will register a hotpluggable slot with the
- * PCI hotplug infrastructure. This routine is typically called
- * during boot time, if the hotplug slots are present at boot time,
- * or is called later, by the dlpar add code, if the slot is
- * being dynamically added during runtime.
- *
- * If the device node points at an embedded (built-in) slot, this
- * routine will just return without doing anything, since embedded
- * slots cannot be hotplugged.
- *
- * To remove a slot, it suffices to call rpaphp_deregister_slot().
- */
-int rpaphp_add_slot(struct device_node *dn)
+static int rpaphp_drc_info_add_slot(struct device_node *dn)
 {
        struct slot *slot;
+       struct property *info;
+       struct of_drc_info drc;
+       char drc_name[MAX_DRC_NAME_LEN];
+       const __be32 *cur;
+       u32 count;
        int retval = 0;
-       int i;
-       const int *indexes, *names, *types, *power_domains;
-       char *name, *type;
 
-       if (!dn->name || strcmp(dn->name, "pci"))
+       info = of_find_property(dn, "ibm,drc-info", NULL);
+       if (!info)
+               return 0;
+
+       cur = of_prop_next_u32(info, NULL, &count);
+       if (cur)
+               cur++;
+       else
                return 0;
 
+       of_read_drc_info_cell(&info, &cur, &drc);
+       if (!is_php_type(drc.drc_type))
+               return 0;
+
+       sprintf(drc_name, "%s%d", drc.drc_name_prefix, drc.drc_name_suffix_start);
+
+       slot = alloc_slot_struct(dn, drc.drc_index_start, drc_name, drc.drc_power_domain);
+       if (!slot)
+               return -ENOMEM;
+
+       slot->type = simple_strtoul(drc.drc_type, NULL, 10);
+       retval = rpaphp_enable_slot(slot);
+       if (!retval)
+               retval = rpaphp_register_slot(slot);
+
+       if (retval)
+               dealloc_slot_struct(slot);
+
+       return retval;
+}
+
+static int rpaphp_drc_add_slot(struct device_node *dn)
+{
+       struct slot *slot;
+       int retval = 0;
+       int i;
+       const __be32 *indexes, *names, *types, *power_domains;
+       char *name, *type;
+
        /* If this is not a hotplug slot, return without doing anything. */
        if (!is_php_dn(dn, &indexes, &names, &types, &power_domains))
                return 0;
@@ -391,6 +417,33 @@ int rpaphp_add_slot(struct device_node *dn)
        /* XXX FIXME: reports a failure only if last entry in loop failed */
        return retval;
 }
+
+/**
+ * rpaphp_add_slot -- declare a hotplug slot to the hotplug subsystem.
+ * @dn: device node of slot
+ *
+ * This subroutine will register a hotpluggable slot with the
+ * PCI hotplug infrastructure. This routine is typically called
+ * during boot time, if the hotplug slots are present at boot time,
+ * or is called later, by the dlpar add code, if the slot is
+ * being dynamically added during runtime.
+ *
+ * If the device node points at an embedded (built-in) slot, this
+ * routine will just return without doing anything, since embedded
+ * slots cannot be hotplugged.
+ *
+ * To remove a slot, it suffices to call rpaphp_deregister_slot().
+ */
+int rpaphp_add_slot(struct device_node *dn)
+{
+       if (!dn->name || strcmp(dn->name, "pci"))
+               return 0;
+
+       if (of_find_property(dn, "ibm,drc-info", NULL))
+               return rpaphp_drc_info_add_slot(dn);
+       else
+               return rpaphp_drc_add_slot(dn);
+}
 EXPORT_SYMBOL_GPL(rpaphp_add_slot);
 
 static void __exit cleanup_slots(void)
index 04c0644..e649911 100644 (file)
@@ -428,7 +428,7 @@ static inline void tlb_change_page_size(struct mmu_gather *tlb,
 {
 #ifdef CONFIG_HAVE_MMU_GATHER_PAGE_SIZE
        if (tlb->page_size && tlb->page_size != page_size) {
-               if (!tlb->fullmm)
+               if (!tlb->fullmm && !tlb->need_flush_all)
                        tlb_flush_mmu(tlb);
        }
 
index c1a96fd..fb8b07d 100644 (file)
@@ -35,12 +35,18 @@ extern int restrict_link_by_builtin_and_secondary_trusted(
 extern int mark_hash_blacklisted(const char *hash);
 extern int is_hash_blacklisted(const u8 *hash, size_t hash_len,
                               const char *type);
+extern int is_binary_blacklisted(const u8 *hash, size_t hash_len);
 #else
 static inline int is_hash_blacklisted(const u8 *hash, size_t hash_len,
                                      const char *type)
 {
        return 0;
 }
+
+static inline int is_binary_blacklisted(const u8 *hash, size_t hash_len)
+{
+       return 0;
+}
 #endif
 
 #ifdef CONFIG_IMA_BLACKLIST_KEYRING
index 1c37f17..6d90475 100644 (file)
@@ -29,7 +29,8 @@ extern void ima_kexec_cmdline(const void *buf, int size);
 extern void ima_add_kexec_buffer(struct kimage *image);
 #endif
 
-#if (defined(CONFIG_X86) && defined(CONFIG_EFI)) || defined(CONFIG_S390)
+#if (defined(CONFIG_X86) && defined(CONFIG_EFI)) || defined(CONFIG_S390) \
+       || defined(CONFIG_PPC_SECURE_BOOT)
 extern bool arch_ima_get_secureboot(void);
 extern const char * const *arch_get_ima_policy(void);
 #else
index 06ff668..3e8d4ba 100644 (file)
@@ -117,12 +117,14 @@ enum lockdown_reason {
        LOCKDOWN_MODULE_PARAMETERS,
        LOCKDOWN_MMIOTRACE,
        LOCKDOWN_DEBUGFS,
+       LOCKDOWN_XMON_WR,
        LOCKDOWN_INTEGRITY_MAX,
        LOCKDOWN_KCORE,
        LOCKDOWN_KPROBES,
        LOCKDOWN_BPF_READ,
        LOCKDOWN_PERF,
        LOCKDOWN_TRACEFS,
+       LOCKDOWN_XMON_RW,
        LOCKDOWN_CONFIDENTIALITY_MAX,
 };
 
index 903cc2d..3ac4363 100644 (file)
@@ -94,5 +94,6 @@
 #define ZSMALLOC_MAGIC         0x58295829
 #define DMA_BUF_MAGIC          0x444d4142      /* "DMAB" */
 #define Z3FOLD_MAGIC           0x33
+#define PPC_CMM_MAGIC          0xc7571590
 
 #endif /* __LINUX_MAGIC_H__ */
index 0bae6ad..71f0177 100644 (file)
@@ -72,6 +72,15 @@ config LOAD_IPL_KEYS
        depends on S390
        def_bool y
 
+config LOAD_PPC_KEYS
+       bool "Enable loading of platform and blacklisted keys for POWER"
+       depends on INTEGRITY_PLATFORM_KEYRING
+       depends on PPC_SECURE_BOOT
+       default y
+       help
+         Enable loading of keys to the .platform keyring and blacklisted
+         hashes to the .blacklist keyring for powerpc based platforms.
+
 config INTEGRITY_AUDIT
        bool "Enables integrity auditing support "
        depends on AUDIT
index 35e6ca7..7ee39d6 100644 (file)
@@ -11,8 +11,11 @@ integrity-$(CONFIG_INTEGRITY_SIGNATURE) += digsig.o
 integrity-$(CONFIG_INTEGRITY_ASYMMETRIC_KEYS) += digsig_asymmetric.o
 integrity-$(CONFIG_INTEGRITY_PLATFORM_KEYRING) += platform_certs/platform_keyring.o
 integrity-$(CONFIG_LOAD_UEFI_KEYS) += platform_certs/efi_parser.o \
-                                       platform_certs/load_uefi.o
+                                     platform_certs/load_uefi.o \
+                                     platform_certs/keyring_handler.o
 integrity-$(CONFIG_LOAD_IPL_KEYS) += platform_certs/load_ipl_s390.o
-
+integrity-$(CONFIG_LOAD_PPC_KEYS) += platform_certs/efi_parser.o \
+                                     platform_certs/load_powerpc.o \
+                                     platform_certs/keyring_handler.o
 obj-$(CONFIG_IMA)                      += ima/
 obj-$(CONFIG_EVM)                      += evm/
index 3689081..df4ca48 100644 (file)
@@ -217,6 +217,9 @@ void ima_store_measurement(struct integrity_iint_cache *iint, struct file *file,
                           struct evm_ima_xattr_data *xattr_value,
                           int xattr_len, const struct modsig *modsig, int pcr,
                           struct ima_template_desc *template_desc);
+void process_buffer_measurement(const void *buf, int size,
+                               const char *eventname, enum ima_hooks func,
+                               int pcr);
 void ima_audit_measurement(struct integrity_iint_cache *iint,
                           const unsigned char *filename);
 int ima_alloc_init_template(struct ima_event_data *event_data,
@@ -253,6 +256,8 @@ int ima_policy_show(struct seq_file *m, void *v);
 #define IMA_APPRAISE_KEXEC     0x40
 
 #ifdef CONFIG_IMA_APPRAISE
+int ima_check_blacklist(struct integrity_iint_cache *iint,
+                       const struct modsig *modsig, int pcr);
 int ima_appraise_measurement(enum ima_hooks func,
                             struct integrity_iint_cache *iint,
                             struct file *file, const unsigned char *filename,
@@ -268,6 +273,12 @@ int ima_read_xattr(struct dentry *dentry,
                   struct evm_ima_xattr_data **xattr_value);
 
 #else
+static inline int ima_check_blacklist(struct integrity_iint_cache *iint,
+                                     const struct modsig *modsig, int pcr)
+{
+       return 0;
+}
+
 static inline int ima_appraise_measurement(enum ima_hooks func,
                                           struct integrity_iint_cache *iint,
                                           struct file *file,
index 136ae4e..300c8d2 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/magic.h>
 #include <linux/ima.h>
 #include <linux/evm.h>
+#include <keys/system_keyring.h>
 
 #include "ima.h"
 
@@ -303,6 +304,38 @@ static int modsig_verify(enum ima_hooks func, const struct modsig *modsig,
        return rc;
 }
 
+/*
+ * ima_check_blacklist - determine if the binary is blacklisted.
+ *
+ * Add the hash of the blacklisted binary to the measurement list, based
+ * on policy.
+ *
+ * Returns -EPERM if the hash is blacklisted.
+ */
+int ima_check_blacklist(struct integrity_iint_cache *iint,
+                       const struct modsig *modsig, int pcr)
+{
+       enum hash_algo hash_algo;
+       const u8 *digest = NULL;
+       u32 digestsize = 0;
+       int rc = 0;
+
+       if (!(iint->flags & IMA_CHECK_BLACKLIST))
+               return 0;
+
+       if (iint->flags & IMA_MODSIG_ALLOWED && modsig) {
+               ima_get_modsig_digest(modsig, &hash_algo, &digest, &digestsize);
+
+               rc = is_binary_blacklisted(digest, digestsize);
+               if ((rc == -EPERM) && (iint->flags & IMA_MEASURE))
+                       process_buffer_measurement(digest, digestsize,
+                                                  "blacklisted-hash", NONE,
+                                                  pcr);
+       }
+
+       return rc;
+}
+
 /*
  * ima_appraise_measurement - appraise file measurement
  *
index 60027c6..d7e987b 100644 (file)
@@ -335,10 +335,14 @@ static int process_measurement(struct file *file, const struct cred *cred,
                                      xattr_value, xattr_len, modsig, pcr,
                                      template_desc);
        if (rc == 0 && (action & IMA_APPRAISE_SUBMASK)) {
-               inode_lock(inode);
-               rc = ima_appraise_measurement(func, iint, file, pathname,
-                                             xattr_value, xattr_len, modsig);
-               inode_unlock(inode);
+               rc = ima_check_blacklist(iint, modsig, pcr);
+               if (rc != -EPERM) {
+                       inode_lock(inode);
+                       rc = ima_appraise_measurement(func, iint, file,
+                                                     pathname, xattr_value,
+                                                     xattr_len, modsig);
+                       inode_unlock(inode);
+               }
                if (!rc)
                        rc = mmap_violation_check(func, file, &pathbuf,
                                                  &pathname, filename);
@@ -626,14 +630,14 @@ int ima_load_data(enum kernel_load_data_id id)
  * @buf: pointer to the buffer that needs to be added to the log.
  * @size: size of buffer(in bytes).
  * @eventname: event name to be used for the buffer entry.
- * @cred: a pointer to a credentials structure for user validation.
- * @secid: the secid of the task to be validated.
+ * @func: IMA hook
+ * @pcr: pcr to extend the measurement
  *
  * Based on policy, the buffer is measured into the ima log.
  */
-static void process_buffer_measurement(const void *buf, int size,
-                                      const char *eventname,
-                                      const struct cred *cred, u32 secid)
+void process_buffer_measurement(const void *buf, int size,
+                               const char *eventname, enum ima_hooks func,
+                               int pcr)
 {
        int ret = 0;
        struct ima_template_entry *entry = NULL;
@@ -642,19 +646,45 @@ static void process_buffer_measurement(const void *buf, int size,
                                            .filename = eventname,
                                            .buf = buf,
                                            .buf_len = size};
-       struct ima_template_desc *template_desc = NULL;
+       struct ima_template_desc *template = NULL;
        struct {
                struct ima_digest_data hdr;
                char digest[IMA_MAX_DIGEST_SIZE];
        } hash = {};
        int violation = 0;
-       int pcr = CONFIG_IMA_MEASURE_PCR_IDX;
        int action = 0;
+       u32 secid;
 
-       action = ima_get_action(NULL, cred, secid, 0, KEXEC_CMDLINE, &pcr,
-                               &template_desc);
-       if (!(action & IMA_MEASURE))
-               return;
+       /*
+        * Both LSM hooks and auxilary based buffer measurements are
+        * based on policy.  To avoid code duplication, differentiate
+        * between the LSM hooks and auxilary buffer measurements,
+        * retrieving the policy rule information only for the LSM hook
+        * buffer measurements.
+        */
+       if (func) {
+               security_task_getsecid(current, &secid);
+               action = ima_get_action(NULL, current_cred(), secid, 0, func,
+                                       &pcr, &template);
+               if (!(action & IMA_MEASURE))
+                       return;
+       }
+
+       if (!pcr)
+               pcr = CONFIG_IMA_MEASURE_PCR_IDX;
+
+       if (!template) {
+               template = lookup_template_desc("ima-buf");
+               ret = template_desc_init_fields(template->fmt,
+                                               &(template->fields),
+                                               &(template->num_fields));
+               if (ret < 0) {
+                       pr_err("template %s init failed, result: %d\n",
+                              (strlen(template->name) ?
+                               template->name : template->fmt), ret);
+                       return;
+               }
+       }
 
        iint.ima_hash = &hash.hdr;
        iint.ima_hash->algo = ima_hash_algo;
@@ -664,7 +694,7 @@ static void process_buffer_measurement(const void *buf, int size,
        if (ret < 0)
                goto out;
 
-       ret = ima_alloc_init_template(&event_data, &entry, template_desc);
+       ret = ima_alloc_init_template(&event_data, &entry, template);
        if (ret < 0)
                goto out;
 
@@ -686,13 +716,9 @@ out:
  */
 void ima_kexec_cmdline(const void *buf, int size)
 {
-       u32 secid;
-
-       if (buf && size != 0) {
-               security_task_getsecid(current, &secid);
+       if (buf && size != 0)
                process_buffer_measurement(buf, size, "kexec-cmdline",
-                                          current_cred(), secid);
-       }
+                                          KEXEC_CMDLINE, 0);
 }
 
 static int __init init_ima(void)
index 5380aca..f19a895 100644 (file)
@@ -765,8 +765,8 @@ enum {
        Opt_fsuuid, Opt_uid_eq, Opt_euid_eq, Opt_fowner_eq,
        Opt_uid_gt, Opt_euid_gt, Opt_fowner_gt,
        Opt_uid_lt, Opt_euid_lt, Opt_fowner_lt,
-       Opt_appraise_type, Opt_permit_directio,
-       Opt_pcr, Opt_template, Opt_err
+       Opt_appraise_type, Opt_appraise_flag,
+       Opt_permit_directio, Opt_pcr, Opt_template, Opt_err
 };
 
 static const match_table_t policy_tokens = {
@@ -798,6 +798,7 @@ static const match_table_t policy_tokens = {
        {Opt_euid_lt, "euid<%s"},
        {Opt_fowner_lt, "fowner<%s"},
        {Opt_appraise_type, "appraise_type=%s"},
+       {Opt_appraise_flag, "appraise_flag=%s"},
        {Opt_permit_directio, "permit_directio"},
        {Opt_pcr, "pcr=%s"},
        {Opt_template, "template=%s"},
@@ -1172,6 +1173,11 @@ static int ima_parse_rule(char *rule, struct ima_rule_entry *entry)
                        else
                                result = -EINVAL;
                        break;
+               case Opt_appraise_flag:
+                       ima_log_string(ab, "appraise_flag", args[0].from);
+                       if (strstr(args[0].from, "blacklist"))
+                               entry->flags |= IMA_CHECK_BLACKLIST;
+                       break;
                case Opt_permit_directio:
                        entry->flags |= IMA_PERMIT_DIRECTIO;
                        break;
@@ -1500,6 +1506,8 @@ int ima_policy_show(struct seq_file *m, void *v)
                else
                        seq_puts(m, "appraise_type=imasig ");
        }
+       if (entry->flags & IMA_CHECK_BLACKLIST)
+               seq_puts(m, "appraise_flag=check_blacklist ");
        if (entry->flags & IMA_PERMIT_DIRECTIO)
                seq_puts(m, "permit_directio ");
        rcu_read_unlock();
index d9323d3..73fc286 100644 (file)
@@ -32,6 +32,7 @@
 #define EVM_IMMUTABLE_DIGSIG   0x08000000
 #define IMA_FAIL_UNVERIFIABLE_SIGS     0x10000000
 #define IMA_MODSIG_ALLOWED     0x20000000
+#define IMA_CHECK_BLACKLIST    0x40000000
 
 #define IMA_DO_MASK            (IMA_MEASURE | IMA_APPRAISE | IMA_AUDIT | \
                                 IMA_HASH | IMA_APPRAISE_SUBMASK)
diff --git a/security/integrity/platform_certs/keyring_handler.c b/security/integrity/platform_certs/keyring_handler.c
new file mode 100644 (file)
index 0000000..c5ba695
--- /dev/null
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/cred.h>
+#include <linux/err.h>
+#include <linux/efi.h>
+#include <linux/slab.h>
+#include <keys/asymmetric-type.h>
+#include <keys/system_keyring.h>
+#include "../integrity.h"
+
+static efi_guid_t efi_cert_x509_guid __initdata = EFI_CERT_X509_GUID;
+static efi_guid_t efi_cert_x509_sha256_guid __initdata =
+       EFI_CERT_X509_SHA256_GUID;
+static efi_guid_t efi_cert_sha256_guid __initdata = EFI_CERT_SHA256_GUID;
+
+/*
+ * Blacklist a hash.
+ */
+static __init void uefi_blacklist_hash(const char *source, const void *data,
+                                      size_t len, const char *type,
+                                      size_t type_len)
+{
+       char *hash, *p;
+
+       hash = kmalloc(type_len + len * 2 + 1, GFP_KERNEL);
+       if (!hash)
+               return;
+       p = memcpy(hash, type, type_len);
+       p += type_len;
+       bin2hex(p, data, len);
+       p += len * 2;
+       *p = 0;
+
+       mark_hash_blacklisted(hash);
+       kfree(hash);
+}
+
+/*
+ * Blacklist an X509 TBS hash.
+ */
+static __init void uefi_blacklist_x509_tbs(const char *source,
+                                          const void *data, size_t len)
+{
+       uefi_blacklist_hash(source, data, len, "tbs:", 4);
+}
+
+/*
+ * Blacklist the hash of an executable.
+ */
+static __init void uefi_blacklist_binary(const char *source,
+                                        const void *data, size_t len)
+{
+       uefi_blacklist_hash(source, data, len, "bin:", 4);
+}
+
+/*
+ * Return the appropriate handler for particular signature list types found in
+ * the UEFI db and MokListRT tables.
+ */
+__init efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type)
+{
+       if (efi_guidcmp(*sig_type, efi_cert_x509_guid) == 0)
+               return add_to_platform_keyring;
+       return 0;
+}
+
+/*
+ * Return the appropriate handler for particular signature list types found in
+ * the UEFI dbx and MokListXRT tables.
+ */
+__init efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type)
+{
+       if (efi_guidcmp(*sig_type, efi_cert_x509_sha256_guid) == 0)
+               return uefi_blacklist_x509_tbs;
+       if (efi_guidcmp(*sig_type, efi_cert_sha256_guid) == 0)
+               return uefi_blacklist_binary;
+       return 0;
+}
diff --git a/security/integrity/platform_certs/keyring_handler.h b/security/integrity/platform_certs/keyring_handler.h
new file mode 100644 (file)
index 0000000..2462bfa
--- /dev/null
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef PLATFORM_CERTS_INTERNAL_H
+#define PLATFORM_CERTS_INTERNAL_H
+
+#include <linux/efi.h>
+
+void blacklist_hash(const char *source, const void *data,
+                   size_t len, const char *type,
+                   size_t type_len);
+
+/*
+ * Blacklist an X509 TBS hash.
+ */
+void blacklist_x509_tbs(const char *source, const void *data, size_t len);
+
+/*
+ * Blacklist the hash of an executable.
+ */
+void blacklist_binary(const char *source, const void *data, size_t len);
+
+/*
+ * Return the handler for particular signature list types found in the db.
+ */
+efi_element_handler_t get_handler_for_db(const efi_guid_t *sig_type);
+
+/*
+ * Return the handler for particular signature list types found in the dbx.
+ */
+efi_element_handler_t get_handler_for_dbx(const efi_guid_t *sig_type);
+
+#endif
diff --git a/security/integrity/platform_certs/load_powerpc.c b/security/integrity/platform_certs/load_powerpc.c
new file mode 100644 (file)
index 0000000..a2900cb
--- /dev/null
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 IBM Corporation
+ * Author: Nayna Jain
+ *
+ *      - loads keys and hashes stored and controlled by the firmware.
+ */
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/cred.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+#include <asm/secure_boot.h>
+#include <asm/secvar.h>
+#include "keyring_handler.h"
+
+/*
+ * Get a certificate list blob from the named secure variable.
+ */
+static __init void *get_cert_list(u8 *key, unsigned long keylen, uint64_t *size)
+{
+       int rc;
+       void *db;
+
+       rc = secvar_ops->get(key, keylen, NULL, size);
+       if (rc) {
+               pr_err("Couldn't get size: %d\n", rc);
+               return NULL;
+       }
+
+       db = kmalloc(*size, GFP_KERNEL);
+       if (!db)
+               return NULL;
+
+       rc = secvar_ops->get(key, keylen, db, size);
+       if (rc) {
+               kfree(db);
+               pr_err("Error reading %s var: %d\n", key, rc);
+               return NULL;
+       }
+
+       return db;
+}
+
+/*
+ * Load the certs contained in the keys databases into the platform trusted
+ * keyring and the blacklisted X.509 cert SHA256 hashes into the blacklist
+ * keyring.
+ */
+static int __init load_powerpc_certs(void)
+{
+       void *db = NULL, *dbx = NULL;
+       uint64_t dbsize = 0, dbxsize = 0;
+       int rc = 0;
+       struct device_node *node;
+
+       if (!secvar_ops)
+               return -ENODEV;
+
+       /* The following only applies for the edk2-compat backend. */
+       node = of_find_compatible_node(NULL, NULL, "ibm,edk2-compat-v1");
+       if (!node)
+               return -ENODEV;
+
+       /*
+        * Get db, and dbx. They might not exist, so it isn't an error if we
+        * can't get them.
+        */
+       db = get_cert_list("db", 3, &dbsize);
+       if (!db) {
+               pr_err("Couldn't get db list from firmware\n");
+       } else {
+               rc = parse_efi_signature_list("powerpc:db", db, dbsize,
+                                             get_handler_for_db);
+               if (rc)
+                       pr_err("Couldn't parse db signatures: %d\n", rc);
+               kfree(db);
+       }
+
+       dbx = get_cert_list("dbx", 4,  &dbxsize);
+       if (!dbx) {
+               pr_info("Couldn't get dbx list from firmware\n");
+       } else {
+               rc = parse_efi_signature_list("powerpc:dbx", dbx, dbxsize,
+                                             get_handler_for_dbx);
+               if (rc)
+                       pr_err("Couldn't parse dbx signatures: %d\n", rc);
+               kfree(dbx);
+       }
+
+       of_node_put(node);
+
+       return rc;
+}
+late_initcall(load_powerpc_certs);
index 81b19c5..111898a 100644 (file)
@@ -9,11 +9,7 @@
 #include <keys/asymmetric-type.h>
 #include <keys/system_keyring.h>
 #include "../integrity.h"
-
-static efi_guid_t efi_cert_x509_guid __initdata = EFI_CERT_X509_GUID;
-static efi_guid_t efi_cert_x509_sha256_guid __initdata =
-       EFI_CERT_X509_SHA256_GUID;
-static efi_guid_t efi_cert_sha256_guid __initdata = EFI_CERT_SHA256_GUID;
+#include "keyring_handler.h"
 
 /*
  * Look to see if a UEFI variable called MokIgnoreDB exists and return true if
@@ -67,72 +63,6 @@ static __init void *get_cert_list(efi_char16_t *name, efi_guid_t *guid,
        return db;
 }
 
-/*
- * Blacklist a hash.
- */
-static __init void uefi_blacklist_hash(const char *source, const void *data,
-                                      size_t len, const char *type,
-                                      size_t type_len)
-{
-       char *hash, *p;
-
-       hash = kmalloc(type_len + len * 2 + 1, GFP_KERNEL);
-       if (!hash)
-               return;
-       p = memcpy(hash, type, type_len);
-       p += type_len;
-       bin2hex(p, data, len);
-       p += len * 2;
-       *p = 0;
-
-       mark_hash_blacklisted(hash);
-       kfree(hash);
-}
-
-/*
- * Blacklist an X509 TBS hash.
- */
-static __init void uefi_blacklist_x509_tbs(const char *source,
-                                          const void *data, size_t len)
-{
-       uefi_blacklist_hash(source, data, len, "tbs:", 4);
-}
-
-/*
- * Blacklist the hash of an executable.
- */
-static __init void uefi_blacklist_binary(const char *source,
-                                        const void *data, size_t len)
-{
-       uefi_blacklist_hash(source, data, len, "bin:", 4);
-}
-
-/*
- * Return the appropriate handler for particular signature list types found in
- * the UEFI db and MokListRT tables.
- */
-static __init efi_element_handler_t get_handler_for_db(const efi_guid_t *
-                                                      sig_type)
-{
-       if (efi_guidcmp(*sig_type, efi_cert_x509_guid) == 0)
-               return add_to_platform_keyring;
-       return 0;
-}
-
-/*
- * Return the appropriate handler for particular signature list types found in
- * the UEFI dbx and MokListXRT tables.
- */
-static __init efi_element_handler_t get_handler_for_dbx(const efi_guid_t *
-                                                       sig_type)
-{
-       if (efi_guidcmp(*sig_type, efi_cert_x509_sha256_guid) == 0)
-               return uefi_blacklist_x509_tbs;
-       if (efi_guidcmp(*sig_type, efi_cert_sha256_guid) == 0)
-               return uefi_blacklist_binary;
-       return 0;
-}
-
 /*
  * Load the certs contained in the UEFI databases into the platform trusted
  * keyring and the UEFI blacklisted X.509 cert SHA256 hashes into the blacklist
index 40b7905..b2f8701 100644 (file)
@@ -32,12 +32,14 @@ static const char *const lockdown_reasons[LOCKDOWN_CONFIDENTIALITY_MAX+1] = {
        [LOCKDOWN_MODULE_PARAMETERS] = "unsafe module parameters",
        [LOCKDOWN_MMIOTRACE] = "unsafe mmio",
        [LOCKDOWN_DEBUGFS] = "debugfs access",
+       [LOCKDOWN_XMON_WR] = "xmon write access",
        [LOCKDOWN_INTEGRITY_MAX] = "integrity",
        [LOCKDOWN_KCORE] = "/proc/kcore access",
        [LOCKDOWN_KPROBES] = "use of kprobes",
        [LOCKDOWN_BPF_READ] = "use of bpf to read kernel RAM",
        [LOCKDOWN_PERF] = "unsafe use of perf",
        [LOCKDOWN_TRACEFS] = "use of tracefs",
+       [LOCKDOWN_XMON_RW] = "xmon read and write access",
        [LOCKDOWN_CONFIDENTIALITY_MAX] = "confidentiality",
 };
 
index 0e2b2e6..e089a0c 100644 (file)
@@ -34,6 +34,7 @@ int pick_online_cpu(void);
 
 int read_debugfs_file(char *debugfs_file, int *result);
 int write_debugfs_file(char *debugfs_file, int result);
+int read_sysfs_file(char *debugfs_file, char *result, size_t result_size);
 void set_dscr(unsigned long val);
 int perf_event_open_counter(unsigned int type,
                            unsigned long config, int group_fd);
index 23f4caf..4173063 100644 (file)
@@ -1,4 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
+include ../../../../../../scripts/Kbuild.include
+
 noarg:
        $(MAKE) -C ../../
 
@@ -6,7 +8,10 @@ noarg:
 CFLAGS += -m64
 
 # Toolchains may build PIE by default which breaks the assembly
-LDFLAGS += -no-pie
+no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
+        $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
+
+LDFLAGS += $(no-pie-option)
 
 TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test    \
         cycles_with_freeze_test pmc56_overflow_test            \
index 200337d..c1f324a 100644 (file)
@@ -148,6 +148,121 @@ static int runtestsingle(int readwriteflag, int exclude_user, int arraytest)
        return 0;
 }
 
+static int runtest_dar_outside(void)
+{
+       void *target;
+       volatile __u16 temp16;
+       volatile __u64 temp64;
+       struct perf_event_attr attr;
+       int break_fd;
+       unsigned long long breaks;
+       int fail = 0;
+       size_t res;
+
+       target = malloc(8);
+       if (!target) {
+               perror("malloc failed");
+               exit(EXIT_FAILURE);
+       }
+
+       /* setup counters */
+       memset(&attr, 0, sizeof(attr));
+       attr.disabled = 1;
+       attr.type = PERF_TYPE_BREAKPOINT;
+       attr.exclude_kernel = 1;
+       attr.exclude_hv = 1;
+       attr.exclude_guest = 1;
+       attr.bp_type = HW_BREAKPOINT_RW;
+       /* watch middle half of target array */
+       attr.bp_addr = (__u64)(target + 2);
+       attr.bp_len = 4;
+       break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+       if (break_fd < 0) {
+               free(target);
+               perror("sys_perf_event_open");
+               exit(EXIT_FAILURE);
+       }
+
+       /* Shouldn't hit. */
+       ioctl(break_fd, PERF_EVENT_IOC_RESET);
+       ioctl(break_fd, PERF_EVENT_IOC_ENABLE);
+       temp16 = *((__u16 *)target);
+       *((__u16 *)target) = temp16;
+       ioctl(break_fd, PERF_EVENT_IOC_DISABLE);
+       res = read(break_fd, &breaks, sizeof(unsigned long long));
+       assert(res == sizeof(unsigned long long));
+       if (breaks == 0) {
+               printf("TESTED: No overlap\n");
+       } else {
+               printf("FAILED: No overlap: %lld != 0\n", breaks);
+               fail = 1;
+       }
+
+       /* Hit */
+       ioctl(break_fd, PERF_EVENT_IOC_RESET);
+       ioctl(break_fd, PERF_EVENT_IOC_ENABLE);
+       temp16 = *((__u16 *)(target + 1));
+       *((__u16 *)(target + 1)) = temp16;
+       ioctl(break_fd, PERF_EVENT_IOC_DISABLE);
+       res = read(break_fd, &breaks, sizeof(unsigned long long));
+       assert(res == sizeof(unsigned long long));
+       if (breaks == 2) {
+               printf("TESTED: Partial overlap\n");
+       } else {
+               printf("FAILED: Partial overlap: %lld != 2\n", breaks);
+               fail = 1;
+       }
+
+       /* Hit */
+       ioctl(break_fd, PERF_EVENT_IOC_RESET);
+       ioctl(break_fd, PERF_EVENT_IOC_ENABLE);
+       temp16 = *((__u16 *)(target + 5));
+       *((__u16 *)(target + 5)) = temp16;
+       ioctl(break_fd, PERF_EVENT_IOC_DISABLE);
+       res = read(break_fd, &breaks, sizeof(unsigned long long));
+       assert(res == sizeof(unsigned long long));
+       if (breaks == 2) {
+               printf("TESTED: Partial overlap\n");
+       } else {
+               printf("FAILED: Partial overlap: %lld != 2\n", breaks);
+               fail = 1;
+       }
+
+       /* Shouldn't Hit */
+       ioctl(break_fd, PERF_EVENT_IOC_RESET);
+       ioctl(break_fd, PERF_EVENT_IOC_ENABLE);
+       temp16 = *((__u16 *)(target + 6));
+       *((__u16 *)(target + 6)) = temp16;
+       ioctl(break_fd, PERF_EVENT_IOC_DISABLE);
+       res = read(break_fd, &breaks, sizeof(unsigned long long));
+       assert(res == sizeof(unsigned long long));
+       if (breaks == 0) {
+               printf("TESTED: No overlap\n");
+       } else {
+               printf("FAILED: No overlap: %lld != 0\n", breaks);
+               fail = 1;
+       }
+
+       /* Hit */
+       ioctl(break_fd, PERF_EVENT_IOC_RESET);
+       ioctl(break_fd, PERF_EVENT_IOC_ENABLE);
+       temp64 = *((__u64 *)target);
+       *((__u64 *)target) = temp64;
+       ioctl(break_fd, PERF_EVENT_IOC_DISABLE);
+       res = read(break_fd, &breaks, sizeof(unsigned long long));
+       assert(res == sizeof(unsigned long long));
+       if (breaks == 2) {
+               printf("TESTED: Full overlap\n");
+       } else {
+               printf("FAILED: Full overlap: %lld != 2\n", breaks);
+               fail = 1;
+       }
+
+       free(target);
+       close(break_fd);
+       return fail;
+}
+
 static int runtest(void)
 {
        int rwflag;
@@ -172,7 +287,9 @@ static int runtest(void)
                                return ret;
                }
        }
-       return 0;
+
+       ret = runtest_dar_outside();
+       return ret;
 }
 
 
index 3066d31..7deedbc 100644 (file)
 #include <sys/wait.h>
 #include "ptrace.h"
 
-/* Breakpoint access modes */
-enum {
-       BP_X = 1,
-       BP_RW = 2,
-       BP_W = 4,
-};
-
-static pid_t child_pid;
-static struct ppc_debug_info dbginfo;
-
-static void get_dbginfo(void)
-{
-       int ret;
+#define SPRN_PVR       0x11F
+#define PVR_8xx                0x00500000
 
-       ret = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo);
-       if (ret) {
-               perror("Can't get breakpoint info\n");
-               exit(-1);
-       }
-}
+bool is_8xx;
 
-static bool hwbreak_present(void)
-{
-       return (dbginfo.num_data_bps != 0);
-}
+/*
+ * Use volatile on all global var so that compiler doesn't
+ * optimise their load/stores. Otherwise selftest can fail.
+ */
+static volatile __u64 glvar;
 
-static bool dawr_present(void)
-{
-       return !!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_DAWR);
-}
+#define DAWR_MAX_LEN 512
+static volatile __u8 big_var[DAWR_MAX_LEN] __attribute__((aligned(512)));
 
-static void set_breakpoint_addr(void *addr)
-{
-       int ret;
+#define A_LEN 6
+#define B_LEN 6
+struct gstruct {
+       __u8 a[A_LEN]; /* double word aligned */
+       __u8 b[B_LEN]; /* double word unaligned */
+};
+static volatile struct gstruct gstruct __attribute__((aligned(512)));
 
-       ret = ptrace(PTRACE_SET_DEBUGREG, child_pid, 0, addr);
-       if (ret) {
-               perror("Can't set breakpoint addr\n");
-               exit(-1);
-       }
-}
 
-static int set_hwbreakpoint_addr(void *addr, int range)
+static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo)
 {
-       int ret;
-
-       struct ppc_hw_breakpoint info;
-
-       info.version = 1;
-       info.trigger_type = PPC_BREAKPOINT_TRIGGER_RW;
-       info.addr_mode = PPC_BREAKPOINT_MODE_EXACT;
-       if (range > 0)
-               info.addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
-       info.condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
-       info.addr = (__u64)addr;
-       info.addr2 = (__u64)addr + range;
-       info.condition_value = 0;
-
-       ret = ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, &info);
-       if (ret < 0) {
-               perror("Can't set breakpoint\n");
+       if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, dbginfo)) {
+               perror("Can't get breakpoint info");
                exit(-1);
        }
-       return ret;
 }
 
-static int del_hwbreakpoint_addr(int watchpoint_handle)
+static bool dawr_present(struct ppc_debug_info *dbginfo)
 {
-       int ret;
-
-       ret = ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, watchpoint_handle);
-       if (ret < 0) {
-               perror("Can't delete hw breakpoint\n");
-               exit(-1);
-       }
-       return ret;
+       return !!(dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_DAWR);
 }
 
-#define DAWR_LENGTH_MAX 512
-
-/* Dummy variables to test read/write accesses */
-static unsigned long long
-       dummy_array[DAWR_LENGTH_MAX / sizeof(unsigned long long)]
-       __attribute__((aligned(512)));
-static unsigned long long *dummy_var = dummy_array;
-
 static void write_var(int len)
 {
-       long long *plval;
-       char *pcval;
-       short *psval;
-       int *pival;
+       __u8 *pcvar;
+       __u16 *psvar;
+       __u32 *pivar;
+       __u64 *plvar;
 
        switch (len) {
        case 1:
-               pcval = (char *)dummy_var;
-               *pcval = 0xff;
+               pcvar = (__u8 *)&glvar;
+               *pcvar = 0xff;
                break;
        case 2:
-               psval = (short *)dummy_var;
-               *psval = 0xffff;
+               psvar = (__u16 *)&glvar;
+               *psvar = 0xffff;
                break;
        case 4:
-               pival = (int *)dummy_var;
-               *pival = 0xffffffff;
+               pivar = (__u32 *)&glvar;
+               *pivar = 0xffffffff;
                break;
        case 8:
-               plval = (long long *)dummy_var;
-               *plval = 0xffffffffffffffffLL;
+               plvar = (__u64 *)&glvar;
+               *plvar = 0xffffffffffffffffLL;
                break;
        }
 }
 
 static void read_var(int len)
 {
-       char cval __attribute__((unused));
-       short sval __attribute__((unused));
-       int ival __attribute__((unused));
-       long long lval __attribute__((unused));
+       __u8 cvar __attribute__((unused));
+       __u16 svar __attribute__((unused));
+       __u32 ivar __attribute__((unused));
+       __u64 lvar __attribute__((unused));
 
        switch (len) {
        case 1:
-               cval = *(char *)dummy_var;
+               cvar = (__u8)glvar;
                break;
        case 2:
-               sval = *(short *)dummy_var;
+               svar = (__u16)glvar;
                break;
        case 4:
-               ival = *(int *)dummy_var;
+               ivar = (__u32)glvar;
                break;
        case 8:
-               lval = *(long long *)dummy_var;
+               lvar = (__u64)glvar;
                break;
        }
 }
 
-/*
- * Do the r/w accesses to trigger the breakpoints. And run
- * the usual traps.
- */
-static void trigger_tests(void)
+static void test_workload(void)
 {
-       int len, ret;
+       __u8 cvar __attribute__((unused));
+       __u32 ivar __attribute__((unused));
+       int len = 0;
 
-       ret = ptrace(PTRACE_TRACEME, 0, NULL, 0);
-       if (ret) {
-               perror("Can't be traced?\n");
-               return;
+       if (ptrace(PTRACE_TRACEME, 0, NULL, 0)) {
+               perror("Child can't be traced?");
+               exit(-1);
        }
 
        /* Wake up father so that it sets up the first test */
        kill(getpid(), SIGUSR1);
 
-       /* Test write watchpoints */
-       for (len = 1; len <= sizeof(long); len <<= 1)
+       /* PTRACE_SET_DEBUGREG, WO test */
+       for (len = 1; len <= sizeof(glvar); len <<= 1)
                write_var(len);
 
-       /* Test read/write watchpoints (on read accesses) */
-       for (len = 1; len <= sizeof(long); len <<= 1)
+       /* PTRACE_SET_DEBUGREG, RO test */
+       for (len = 1; len <= sizeof(glvar); len <<= 1)
                read_var(len);
 
-       /* Test when breakpoint is unset */
-
-       /* Test write watchpoints */
-       for (len = 1; len <= sizeof(long); len <<= 1)
-               write_var(len);
+       /* PTRACE_SET_DEBUGREG, RW test */
+       for (len = 1; len <= sizeof(glvar); len <<= 1) {
+               if (rand() % 2)
+                       read_var(len);
+               else
+                       write_var(len);
+       }
 
-       /* Test read/write watchpoints (on read accesses) */
-       for (len = 1; len <= sizeof(long); len <<= 1)
-               read_var(len);
+       /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, WO test */
+       write_var(1);
+
+       /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RO test */
+       read_var(1);
+
+       /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RW test */
+       if (rand() % 2)
+               write_var(1);
+       else
+               read_var(1);
+
+       /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, WO test */
+       gstruct.a[rand() % A_LEN] = 'a';
+
+       /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RO test */
+       cvar = gstruct.a[rand() % A_LEN];
+
+       /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RW test */
+       if (rand() % 2)
+               gstruct.a[rand() % A_LEN] = 'a';
+       else
+               cvar = gstruct.a[rand() % A_LEN];
+
+       /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, WO test */
+       gstruct.b[rand() % B_LEN] = 'b';
+
+       /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, RO test */
+       cvar = gstruct.b[rand() % B_LEN];
+
+       /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, RW test */
+       if (rand() % 2)
+               gstruct.b[rand() % B_LEN] = 'b';
+       else
+               cvar = gstruct.b[rand() % B_LEN];
+
+       /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, DAR OUTSIDE, RW test */
+       if (rand() % 2)
+               *((int *)(gstruct.a + 4)) = 10;
+       else
+               ivar = *((int *)(gstruct.a + 4));
+
+       /* PPC_PTRACE_SETHWDEBUG. DAWR_MAX_LEN. RW test */
+       if (rand() % 2)
+               big_var[rand() % DAWR_MAX_LEN] = 'a';
+       else
+               cvar = big_var[rand() % DAWR_MAX_LEN];
 }
 
-static void check_success(const char *msg)
+static void check_success(pid_t child_pid, const char *name, const char *type,
+                         unsigned long saddr, int len)
 {
-       const char *msg2;
        int status;
+       siginfo_t siginfo;
+       unsigned long eaddr = (saddr + len - 1) | 0x7;
+
+       saddr &= ~0x7;
 
        /* Wait for the child to SIGTRAP */
        wait(&status);
 
-       msg2 = "Failed";
+       ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &siginfo);
 
-       if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
-               msg2 = "Child process hit the breakpoint";
+       if (!WIFSTOPPED(status) || WSTOPSIG(status) != SIGTRAP ||
+           (unsigned long)siginfo.si_addr < saddr ||
+           (unsigned long)siginfo.si_addr > eaddr) {
+               printf("%s, %s, len: %d: Fail\n", name, type, len);
+               exit(-1);
        }
 
-       printf("%s Result: [%s]\n", msg, msg2);
+       printf("%s, %s, len: %d: Ok\n", name, type, len);
+
+       if (!is_8xx) {
+               /*
+                * For ptrace registered watchpoint, signal is generated
+                * before executing load/store. Singlestep the instruction
+                * and then continue the test.
+                */
+               ptrace(PTRACE_SINGLESTEP, child_pid, NULL, 0);
+               wait(NULL);
+       }
 }
 
-static void launch_watchpoints(char *buf, int mode, int len,
-                              struct ppc_debug_info *dbginfo, bool dawr)
+static void ptrace_set_debugreg(pid_t child_pid, unsigned long wp_addr)
 {
-       const char *mode_str;
-       unsigned long data = (unsigned long)(dummy_var);
-       int wh, range;
-
-       data &= ~0x7UL;
-
-       if (mode == BP_W) {
-               data |= (1UL << 1);
-               mode_str = "write";
-       } else {
-               data |= (1UL << 0);
-               data |= (1UL << 1);
-               mode_str = "read";
+       if (ptrace(PTRACE_SET_DEBUGREG, child_pid, 0, wp_addr)) {
+               perror("PTRACE_SET_DEBUGREG failed");
+               exit(-1);
        }
+}
 
-       /* Set DABR_TRANSLATION bit */
-       data |= (1UL << 2);
-
-       /* use PTRACE_SET_DEBUGREG breakpoints */
-       set_breakpoint_addr((void *)data);
-       ptrace(PTRACE_CONT, child_pid, NULL, 0);
-       sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
-       check_success(buf);
-       /* Unregister hw brkpoint */
-       set_breakpoint_addr(NULL);
+static int ptrace_sethwdebug(pid_t child_pid, struct ppc_hw_breakpoint *info)
+{
+       int wh = ptrace(PPC_PTRACE_SETHWDEBUG, child_pid, 0, info);
 
-       data = (data & ~7); /* remove dabr control bits */
+       if (wh <= 0) {
+               perror("PPC_PTRACE_SETHWDEBUG failed");
+               exit(-1);
+       }
+       return wh;
+}
 
-       /* use PPC_PTRACE_SETHWDEBUG breakpoint */
-       if (!(dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE))
-               return; /* not supported */
-       wh = set_hwbreakpoint_addr((void *)data, 0);
-       ptrace(PTRACE_CONT, child_pid, NULL, 0);
-       sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
-       check_success(buf);
-       /* Unregister hw brkpoint */
-       del_hwbreakpoint_addr(wh);
-
-       /* try a wider range */
-       range = 8;
-       if (dawr)
-               range = 512 - ((int)data & (DAWR_LENGTH_MAX - 1));
-       wh = set_hwbreakpoint_addr((void *)data, range);
-       ptrace(PTRACE_CONT, child_pid, NULL, 0);
-       sprintf(buf, "Test %s watchpoint with len: %d ", mode_str, len);
-       check_success(buf);
-       /* Unregister hw brkpoint */
-       del_hwbreakpoint_addr(wh);
+static void ptrace_delhwdebug(pid_t child_pid, int wh)
+{
+       if (ptrace(PPC_PTRACE_DELHWDEBUG, child_pid, 0, wh) < 0) {
+               perror("PPC_PTRACE_DELHWDEBUG failed");
+               exit(-1);
+       }
 }
 
-/* Set the breakpoints and check the child successfully trigger them */
-static int launch_tests(bool dawr)
+#define DABR_READ_SHIFT                0
+#define DABR_WRITE_SHIFT       1
+#define DABR_TRANSLATION_SHIFT 2
+
+static int test_set_debugreg(pid_t child_pid)
 {
-       char buf[1024];
-       int len, i, status;
+       unsigned long wp_addr = (unsigned long)&glvar;
+       char *name = "PTRACE_SET_DEBUGREG";
+       int len;
+
+       /* PTRACE_SET_DEBUGREG, WO test*/
+       wp_addr &= ~0x7UL;
+       wp_addr |= (1UL << DABR_WRITE_SHIFT);
+       wp_addr |= (1UL << DABR_TRANSLATION_SHIFT);
+       for (len = 1; len <= sizeof(glvar); len <<= 1) {
+               ptrace_set_debugreg(child_pid, wp_addr);
+               ptrace(PTRACE_CONT, child_pid, NULL, 0);
+               check_success(child_pid, name, "WO", wp_addr, len);
+       }
 
-       struct ppc_debug_info dbginfo;
+       /* PTRACE_SET_DEBUGREG, RO test */
+       wp_addr &= ~0x7UL;
+       wp_addr |= (1UL << DABR_READ_SHIFT);
+       wp_addr |= (1UL << DABR_TRANSLATION_SHIFT);
+       for (len = 1; len <= sizeof(glvar); len <<= 1) {
+               ptrace_set_debugreg(child_pid, wp_addr);
+               ptrace(PTRACE_CONT, child_pid, NULL, 0);
+               check_success(child_pid, name, "RO", wp_addr, len);
+       }
 
-       i = ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo);
-       if (i) {
-               perror("Can't set breakpoint info\n");
-               exit(-1);
+       /* PTRACE_SET_DEBUGREG, RW test */
+       wp_addr &= ~0x7UL;
+       wp_addr |= (1Ul << DABR_READ_SHIFT);
+       wp_addr |= (1UL << DABR_WRITE_SHIFT);
+       wp_addr |= (1UL << DABR_TRANSLATION_SHIFT);
+       for (len = 1; len <= sizeof(glvar); len <<= 1) {
+               ptrace_set_debugreg(child_pid, wp_addr);
+               ptrace(PTRACE_CONT, child_pid, NULL, 0);
+               check_success(child_pid, name, "RW", wp_addr, len);
        }
-       if (!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_RANGE))
-               printf("WARNING: Kernel doesn't support PPC_PTRACE_SETHWDEBUG\n");
 
-       /* Write watchpoint */
-       for (len = 1; len <= sizeof(long); len <<= 1)
-               launch_watchpoints(buf, BP_W, len, &dbginfo, dawr);
+       ptrace_set_debugreg(child_pid, 0);
+       return 0;
+}
 
-       /* Read-Write watchpoint */
-       for (len = 1; len <= sizeof(long); len <<= 1)
-               launch_watchpoints(buf, BP_RW, len, &dbginfo, dawr);
+static void get_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type,
+                                 unsigned long addr, int len)
+{
+       info->version = 1;
+       info->trigger_type = type;
+       info->condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
+       info->addr = (__u64)addr;
+       info->addr2 = (__u64)addr + len;
+       info->condition_value = 0;
+       if (!len)
+               info->addr_mode = PPC_BREAKPOINT_MODE_EXACT;
+       else
+               info->addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
+}
 
+static void test_sethwdebug_exact(pid_t child_pid)
+{
+       struct ppc_hw_breakpoint info;
+       unsigned long wp_addr = (unsigned long)&glvar;
+       char *name = "PPC_PTRACE_SETHWDEBUG, MODE_EXACT";
+       int len = 1; /* hardcoded in kernel */
+       int wh;
+
+       /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, WO test */
+       get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, 0);
+       wh = ptrace_sethwdebug(child_pid, &info);
        ptrace(PTRACE_CONT, child_pid, NULL, 0);
+       check_success(child_pid, name, "WO", wp_addr, len);
+       ptrace_delhwdebug(child_pid, wh);
 
-       /*
-        * Now we have unregistered the breakpoint, access by child
-        * should not cause SIGTRAP.
-        */
+       /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RO test */
+       get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_READ, wp_addr, 0);
+       wh = ptrace_sethwdebug(child_pid, &info);
+       ptrace(PTRACE_CONT, child_pid, NULL, 0);
+       check_success(child_pid, name, "RO", wp_addr, len);
+       ptrace_delhwdebug(child_pid, wh);
 
-       wait(&status);
+       /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RW test */
+       get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, 0);
+       wh = ptrace_sethwdebug(child_pid, &info);
+       ptrace(PTRACE_CONT, child_pid, NULL, 0);
+       check_success(child_pid, name, "RW", wp_addr, len);
+       ptrace_delhwdebug(child_pid, wh);
+}
 
-       if (WIFSTOPPED(status) && WSTOPSIG(status) == SIGTRAP) {
-               printf("FAIL: Child process hit the breakpoint, which is not expected\n");
-               ptrace(PTRACE_CONT, child_pid, NULL, 0);
-               return TEST_FAIL;
-       }
+static void test_sethwdebug_range_aligned(pid_t child_pid)
+{
+       struct ppc_hw_breakpoint info;
+       unsigned long wp_addr;
+       char *name = "PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED";
+       int len;
+       int wh;
+
+       /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, WO test */
+       wp_addr = (unsigned long)&gstruct.a;
+       len = A_LEN;
+       get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, len);
+       wh = ptrace_sethwdebug(child_pid, &info);
+       ptrace(PTRACE_CONT, child_pid, NULL, 0);
+       check_success(child_pid, name, "WO", wp_addr, len);
+       ptrace_delhwdebug(child_pid, wh);
+
+       /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RO test */
+       wp_addr = (unsigned long)&gstruct.a;
+       len = A_LEN;
+       get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_READ, wp_addr, len);
+       wh = ptrace_sethwdebug(child_pid, &info);
+       ptrace(PTRACE_CONT, child_pid, NULL, 0);
+       check_success(child_pid, name, "RO", wp_addr, len);
+       ptrace_delhwdebug(child_pid, wh);
+
+       /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RW test */
+       wp_addr = (unsigned long)&gstruct.a;
+       len = A_LEN;
+       get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len);
+       wh = ptrace_sethwdebug(child_pid, &info);
+       ptrace(PTRACE_CONT, child_pid, NULL, 0);
+       check_success(child_pid, name, "RW", wp_addr, len);
+       ptrace_delhwdebug(child_pid, wh);
+}
 
-       if (WIFEXITED(status))
-               printf("Child exited normally\n");
+static void test_sethwdebug_range_unaligned(pid_t child_pid)
+{
+       struct ppc_hw_breakpoint info;
+       unsigned long wp_addr;
+       char *name = "PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED";
+       int len;
+       int wh;
+
+       /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, WO test */
+       wp_addr = (unsigned long)&gstruct.b;
+       len = B_LEN;
+       get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, len);
+       wh = ptrace_sethwdebug(child_pid, &info);
+       ptrace(PTRACE_CONT, child_pid, NULL, 0);
+       check_success(child_pid, name, "WO", wp_addr, len);
+       ptrace_delhwdebug(child_pid, wh);
+
+       /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, RO test */
+       wp_addr = (unsigned long)&gstruct.b;
+       len = B_LEN;
+       get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_READ, wp_addr, len);
+       wh = ptrace_sethwdebug(child_pid, &info);
+       ptrace(PTRACE_CONT, child_pid, NULL, 0);
+       check_success(child_pid, name, "RO", wp_addr, len);
+       ptrace_delhwdebug(child_pid, wh);
+
+       /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, RW test */
+       wp_addr = (unsigned long)&gstruct.b;
+       len = B_LEN;
+       get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len);
+       wh = ptrace_sethwdebug(child_pid, &info);
+       ptrace(PTRACE_CONT, child_pid, NULL, 0);
+       check_success(child_pid, name, "RW", wp_addr, len);
+       ptrace_delhwdebug(child_pid, wh);
 
-       return TEST_PASS;
+}
+
+static void test_sethwdebug_range_unaligned_dar(pid_t child_pid)
+{
+       struct ppc_hw_breakpoint info;
+       unsigned long wp_addr;
+       char *name = "PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, DAR OUTSIDE";
+       int len;
+       int wh;
+
+       /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW UNALIGNED, DAR OUTSIDE, RW test */
+       wp_addr = (unsigned long)&gstruct.b;
+       len = B_LEN;
+       get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, len);
+       wh = ptrace_sethwdebug(child_pid, &info);
+       ptrace(PTRACE_CONT, child_pid, NULL, 0);
+       check_success(child_pid, name, "RW", wp_addr, len);
+       ptrace_delhwdebug(child_pid, wh);
+}
+
+static void test_sethwdebug_dawr_max_range(pid_t child_pid)
+{
+       struct ppc_hw_breakpoint info;
+       unsigned long wp_addr;
+       char *name = "PPC_PTRACE_SETHWDEBUG, DAWR_MAX_LEN";
+       int len;
+       int wh;
+
+       /* PPC_PTRACE_SETHWDEBUG, DAWR_MAX_LEN, RW test */
+       wp_addr = (unsigned long)big_var;
+       len = DAWR_MAX_LEN;
+       get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_RW, wp_addr, len);
+       wh = ptrace_sethwdebug(child_pid, &info);
+       ptrace(PTRACE_CONT, child_pid, NULL, 0);
+       check_success(child_pid, name, "RW", wp_addr, len);
+       ptrace_delhwdebug(child_pid, wh);
+}
+
+/* Set the breakpoints and check the child successfully trigger them */
+static void
+run_tests(pid_t child_pid, struct ppc_debug_info *dbginfo, bool dawr)
+{
+       test_set_debugreg(child_pid);
+       if (dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE) {
+               test_sethwdebug_exact(child_pid);
+
+               if (!is_8xx)
+                       test_sethwdebug_range_aligned(child_pid);
+               if (dawr && !is_8xx) {
+                       test_sethwdebug_range_unaligned(child_pid);
+                       test_sethwdebug_range_unaligned_dar(child_pid);
+                       test_sethwdebug_dawr_max_range(child_pid);
+               }
+       }
 }
 
 static int ptrace_hwbreak(void)
 {
-       pid_t pid;
-       int ret;
+       pid_t child_pid;
+       struct ppc_debug_info dbginfo;
        bool dawr;
 
-       pid = fork();
-       if (!pid) {
-               trigger_tests();
+       child_pid = fork();
+       if (!child_pid) {
+               test_workload();
                return 0;
        }
 
        wait(NULL);
 
-       child_pid = pid;
+       get_dbginfo(child_pid, &dbginfo);
+       SKIP_IF(dbginfo.num_data_bps == 0);
 
-       get_dbginfo();
-       SKIP_IF(!hwbreak_present());
-       dawr = dawr_present();
-
-       ret = launch_tests(dawr);
+       dawr = dawr_present(&dbginfo);
+       run_tests(child_pid, &dbginfo, dawr);
 
+       /* Let the child exit first. */
+       ptrace(PTRACE_CONT, child_pid, NULL, 0);
        wait(NULL);
 
-       return ret;
+       /*
+        * Testcases exits immediately with -1 on any failure. If
+        * it has reached here, it means all tests were successful.
+        */
+       return TEST_PASS;
 }
 
 int main(int argc, char **argv, char **envp)
 {
+       int pvr = 0;
+       asm __volatile__ ("mfspr %0,%1" : "=r"(pvr) : "i"(SPRN_PVR));
+       if (pvr == PVR_8xx)
+               is_8xx = true;
+
        return test_harness(ptrace_hwbreak, "ptrace-hwbreak");
 }
index 25e23e7..2ecfa11 100644 (file)
@@ -73,7 +73,7 @@ trans:
                [sprn_texasr]"i"(SPRN_TEXASR), [tar_1]"i"(TAR_1),
                [dscr_1]"i"(DSCR_1), [tar_2]"i"(TAR_2), [dscr_2]"i"(DSCR_2),
                [tar_3]"i"(TAR_3), [dscr_3]"i"(DSCR_3)
-               : "memory", "r0", "r1", "r3", "r4", "r5", "r6"
+               : "memory", "r0", "r3", "r4", "r5", "r6", "lr"
                );
 
        /* TM failed, analyse */
index f603fe5..6f7fb51 100644 (file)
@@ -74,8 +74,8 @@ trans:
                "3: ;"
                : [res] "=r" (result), [texasr] "=r" (texasr)
                : [sprn_texasr] "i"  (SPRN_TEXASR)
-               : "memory", "r0", "r1", "r3", "r4",
-               "r7", "r8", "r9", "r10", "r11"
+               : "memory", "r0", "r3", "r4",
+                 "r7", "r8", "r9", "r10", "r11", "lr"
                );
 
        if (result) {
index e0d37f0..46ef378 100644 (file)
@@ -62,7 +62,7 @@ trans:
                [sprn_ppr]"i"(SPRN_PPR), [sprn_texasr]"i"(SPRN_TEXASR),
                [tar_1]"i"(TAR_1), [dscr_1]"i"(DSCR_1), [tar_2]"i"(TAR_2),
                [dscr_2]"i"(DSCR_2), [cptr1] "b" (&cptr[1])
-               : "memory", "r0", "r1", "r3", "r4", "r5", "r6"
+               : "memory", "r0", "r3", "r4", "r5", "r6"
                );
 
        /* TM failed, analyse */
index 8027457..70ca012 100644 (file)
@@ -62,8 +62,8 @@ trans:
                "3: ;"
                : [res] "=r" (result), [texasr] "=r" (texasr)
                : [sprn_texasr] "i"  (SPRN_TEXASR), [cptr1] "b" (&cptr[1])
-               : "memory", "r0", "r1", "r3", "r4",
-               "r7", "r8", "r9", "r10", "r11"
+               : "memory", "r0", "r3", "r4",
+                 "r7", "r8", "r9", "r10", "r11", "lr"
                );
 
        if (result) {
index 85861c4..eadbbff 100644 (file)
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0+
 
-TEST_GEN_PROGS := rfi_flush
+TEST_GEN_PROGS := rfi_flush spectre_v2
 top_srcdir = ../../../../..
 
 CFLAGS += -I../../../../../usr/include
@@ -8,3 +8,6 @@ CFLAGS += -I../../../../../usr/include
 include ../../lib.mk
 
 $(TEST_GEN_PROGS): ../harness.c ../utils.c
+
+$(OUTPUT)/spectre_v2: CFLAGS += -m64
+$(OUTPUT)/spectre_v2: ../pmu/event.c branch_loops.S
diff --git a/tools/testing/selftests/powerpc/security/branch_loops.S b/tools/testing/selftests/powerpc/security/branch_loops.S
new file mode 100644 (file)
index 0000000..22e9204
--- /dev/null
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2019, Michael Ellerman, IBM Corp.
+ */
+
+#include <ppc-asm.h>
+
+       .data
+
+jump_table:
+       .long   0x0
+       .long   (.Lstate_1 - .Lstate_0)
+       .long   (.Lstate_2 - .Lstate_0)
+       .long   (.Lstate_3 - .Lstate_0)
+       .long   (.Lstate_4 - .Lstate_0)
+       .long   (.Lstate_5 - .Lstate_0)
+       .long   (.Lstate_6 - .Lstate_0)
+       .long   (.Lstate_7 - .Lstate_0)
+
+       .text
+
+#define ITER_SHIFT     31
+
+.macro state number
+       .balign 32
+.Lstate_\number:
+       .if     \number==7
+       li      r3, 0
+       .else
+       li      r3, \number+1
+       .endif
+       b       .Lloop
+.endm
+
+FUNC_START(pattern_cache_loop)
+       li      r3, 0
+       li      r4, 1
+       sldi    r4, r4, ITER_SHIFT
+
+.Lloop:        cmpdi   r4, 0
+       beqlr
+
+       addi    r4, r4, -1
+
+       ld      r6, jump_table@got(%r2)
+       sldi    r5, r3, 2
+       lwax    r6, r5, r6
+       ld      r7, .Lstate_0@got(%r2)
+       add     r6, r6, r7
+       mtctr   r6
+       bctr
+
+       state   0
+       state   1
+       state   2
+       state   3
+       state   4
+       state   5
+       state   6
+       state   7
+
+FUNC_END(pattern_cache_loop)
+
+
+FUNC_START(indirect_branch_loop)
+       li      r3, 1
+       sldi    r3, r3, ITER_SHIFT
+
+1:     cmpdi   r3, 0
+       beqlr
+
+       addi    r3, r3, -1
+
+       ld      r4, 2f@got(%r2)
+       mtctr   r4
+       bctr
+
+       .balign 32
+2:     b       1b
+
+FUNC_END(indirect_branch_loop)
diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c
new file mode 100644 (file)
index 0000000..8c6b982
--- /dev/null
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2018-2019 IBM Corporation.
+ */
+
+#define __SANE_USERSPACE_TYPES__
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <malloc.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/prctl.h>
+#include "utils.h"
+
+#include "../pmu/event.h"
+
+
+extern void pattern_cache_loop(void);
+extern void indirect_branch_loop(void);
+
+static int do_count_loop(struct event *events, bool is_p9, s64 *miss_percent)
+{
+       u64 pred, mpred;
+
+       prctl(PR_TASK_PERF_EVENTS_ENABLE);
+
+       if (is_p9)
+               pattern_cache_loop();
+       else
+               indirect_branch_loop();
+
+       prctl(PR_TASK_PERF_EVENTS_DISABLE);
+
+       event_read(&events[0]);
+       event_read(&events[1]);
+
+       // We could scale all the events by running/enabled but we're lazy
+       // As long as the PMU is uncontended they should all run
+       FAIL_IF(events[0].result.running != events[0].result.enabled);
+       FAIL_IF(events[1].result.running != events[1].result.enabled);
+
+       pred =  events[0].result.value;
+       mpred = events[1].result.value;
+
+       if (is_p9) {
+               event_read(&events[2]);
+               event_read(&events[3]);
+               FAIL_IF(events[2].result.running != events[2].result.enabled);
+               FAIL_IF(events[3].result.running != events[3].result.enabled);
+
+               pred  += events[2].result.value;
+               mpred += events[3].result.value;
+       }
+
+       *miss_percent = 100 * mpred / pred;
+
+       return 0;
+}
+
+static void setup_event(struct event *e, u64 config, char *name)
+{
+       event_init_named(e, config, name);
+
+       e->attr.disabled = 1;
+       e->attr.exclude_kernel = 1;
+       e->attr.exclude_hv = 1;
+       e->attr.exclude_idle = 1;
+}
+
+enum spectre_v2_state {
+       VULNERABLE = 0,
+       UNKNOWN = 1,            // Works with FAIL_IF()
+       NOT_AFFECTED,
+       BRANCH_SERIALISATION,
+       COUNT_CACHE_DISABLED,
+       COUNT_CACHE_FLUSH_SW,
+       COUNT_CACHE_FLUSH_HW,
+       BTB_FLUSH,
+};
+
+static enum spectre_v2_state get_sysfs_state(void)
+{
+       enum spectre_v2_state state = UNKNOWN;
+       char buf[256];
+       int len;
+
+       memset(buf, 0, sizeof(buf));
+       FAIL_IF(read_sysfs_file("devices/system/cpu/vulnerabilities/spectre_v2", buf, sizeof(buf)));
+
+       // Make sure it's NULL terminated
+       buf[sizeof(buf) - 1] = '\0';
+
+       // Trim the trailing newline
+       len = strlen(buf);
+       FAIL_IF(len < 1);
+       buf[len - 1] = '\0';
+
+       printf("sysfs reports: '%s'\n", buf);
+
+       // Order matters
+       if (strstr(buf, "Vulnerable"))
+               state = VULNERABLE;
+       else if (strstr(buf, "Not affected"))
+               state = NOT_AFFECTED;
+       else if (strstr(buf, "Indirect branch serialisation (kernel only)"))
+               state = BRANCH_SERIALISATION;
+       else if (strstr(buf, "Indirect branch cache disabled"))
+               state = COUNT_CACHE_DISABLED;
+       else if (strstr(buf, "Software count cache flush (hardware accelerated)"))
+               state = COUNT_CACHE_FLUSH_HW;
+       else if (strstr(buf, "Software count cache flush"))
+               state = COUNT_CACHE_FLUSH_SW;
+       else if (strstr(buf, "Branch predictor state flush"))
+               state = BTB_FLUSH;
+
+       return state;
+}
+
+#define PM_BR_PRED_CCACHE      0x040a4 // P8 + P9
+#define PM_BR_MPRED_CCACHE     0x040ac // P8 + P9
+#define PM_BR_PRED_PCACHE      0x048a0 // P9 only
+#define PM_BR_MPRED_PCACHE     0x048b0 // P9 only
+
+#define SPRN_PVR 287
+
+int spectre_v2_test(void)
+{
+       enum spectre_v2_state state;
+       struct event events[4];
+       s64 miss_percent;
+       bool is_p9;
+
+       state = get_sysfs_state();
+       if (state == UNKNOWN) {
+               printf("Error: couldn't determine spectre_v2 mitigation state?\n");
+               return -1;
+       }
+
+       memset(events, 0, sizeof(events));
+
+       setup_event(&events[0], PM_BR_PRED_CCACHE,  "PM_BR_PRED_CCACHE");
+       setup_event(&events[1], PM_BR_MPRED_CCACHE, "PM_BR_MPRED_CCACHE");
+       FAIL_IF(event_open(&events[0]));
+       FAIL_IF(event_open_with_group(&events[1], events[0].fd) == -1);
+
+       is_p9 = ((mfspr(SPRN_PVR) >>  16) & 0xFFFF) == 0x4e;
+
+       if (is_p9) {
+               // Count pattern cache too
+               setup_event(&events[2], PM_BR_PRED_PCACHE,  "PM_BR_PRED_PCACHE");
+               setup_event(&events[3], PM_BR_MPRED_PCACHE, "PM_BR_MPRED_PCACHE");
+
+               FAIL_IF(event_open_with_group(&events[2], events[0].fd) == -1);
+               FAIL_IF(event_open_with_group(&events[3], events[0].fd) == -1);
+       }
+
+       FAIL_IF(do_count_loop(events, is_p9, &miss_percent));
+
+       event_report_justified(&events[0], 18, 10);
+       event_report_justified(&events[1], 18, 10);
+       event_close(&events[0]);
+       event_close(&events[1]);
+
+       if (is_p9) {
+               event_report_justified(&events[2], 18, 10);
+               event_report_justified(&events[3], 18, 10);
+               event_close(&events[2]);
+               event_close(&events[3]);
+       }
+
+       printf("Miss percent %lld %%\n", miss_percent);
+
+       switch (state) {
+       case VULNERABLE:
+       case NOT_AFFECTED:
+       case COUNT_CACHE_FLUSH_SW:
+       case COUNT_CACHE_FLUSH_HW:
+               // These should all not affect userspace branch prediction
+               if (miss_percent > 15) {
+                       printf("Branch misses > 15%% unexpected in this configuration!\n");
+                       printf("Possible mis-match between reported & actual mitigation\n");
+                       return 1;
+               }
+               break;
+       case BRANCH_SERIALISATION:
+               // This seems to affect userspace branch prediction a bit?
+               if (miss_percent > 25) {
+                       printf("Branch misses > 25%% unexpected in this configuration!\n");
+                       printf("Possible mis-match between reported & actual mitigation\n");
+                       return 1;
+               }
+               break;
+       case COUNT_CACHE_DISABLED:
+               if (miss_percent < 95) {
+                       printf("Branch misses < 20%% unexpected in this configuration!\n");
+                       printf("Possible mis-match between reported & actual mitigation\n");
+                       return 1;
+               }
+               break;
+       case UNKNOWN:
+       case BTB_FLUSH:
+               printf("Not sure!\n");
+               return 1;
+       }
+
+       printf("OK - Measured branch prediction rates match reported spectre v2 mitigation.\n");
+
+       return 0;
+}
+
+int main(int argc, char *argv[])
+{
+       return test_harness(spectre_v2_test, "spectre_v2");
+}
index dade00c..08f9afe 100644 (file)
@@ -42,7 +42,7 @@
 #include "utils.h"
 
 /* Selftest defaults */
-#define COUNT_MAX      4000            /* Number of interactions */
+#define COUNT_MAX      600             /* Number of interactions */
 #define THREADS                16              /* Number of threads */
 
 /* Arguments options */
index 56fbf9f..07c3881 100644 (file)
  */
 
 #define _GNU_SOURCE
+#include <stdio.h>
 #include <stdlib.h>
 #include <signal.h>
 
 #include "utils.h"
+#include "tm.h"
 
 void trap_signal_handler(int signo, siginfo_t *si, void *uc)
 {
@@ -29,6 +31,8 @@ int tm_signal_sigreturn_nt(void)
 {
        struct sigaction trap_sa;
 
+       SKIP_IF(!have_htm());
+
        trap_sa.sa_flags = SA_SIGINFO;
        trap_sa.sa_sigaction = trap_signal_handler;
 
index c02d248..5ee0e98 100644 (file)
@@ -127,6 +127,26 @@ bool is_ppc64le(void)
        return strcmp(uts.machine, "ppc64le") == 0;
 }
 
+int read_sysfs_file(char *fpath, char *result, size_t result_size)
+{
+       char path[PATH_MAX] = "/sys/";
+       int rc = -1, fd;
+
+       strncat(path, fpath, PATH_MAX - strlen(path) - 1);
+
+       if ((fd = open(path, O_RDONLY)) < 0)
+               return rc;
+
+       rc = read(fd, result, result_size);
+
+       close(fd);
+
+       if (rc < 0)
+               return rc;
+
+       return 0;
+}
+
 int read_debugfs_file(char *debugfs_file, int *result)
 {
        int rc = -1, fd;