Merge branch 'akpm' (patches from Andrew)

author Linus Torvalds <torvalds@linux-foundation.org>

Fri, 3 Sep 2021 17:08:28 +0000 (10:08 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 3 Sep 2021 17:08:28 +0000 (10:08 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Fri, 3 Sep 2021 17:08:28 +0000 (10:08 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 3 Sep 2021 17:08:28 +0000 (10:08 -0700)
diff --combined arch/alpha/kernel/syscalls/syscall.tbl

index 7ac22e0,605645e..e4a041c
--- 1/arch/alpha/kernel/syscalls/syscall.tbl
--- 2/arch/alpha/kernel/syscalls/syscall.tbl
+++ b/arch/alpha/kernel/syscalls/syscall.tbl
@@@ -230,7 -230,7 +230,7 @@@
   259   common  osf_swapctl                     sys_ni_syscall
   260   common  osf_memcntl                     sys_ni_syscall
   261   common  osf_fdatasync                   sys_ni_syscall
- -300   common  bdflush                         sys_bdflush
+ +300   common  bdflush                         sys_ni_syscall
   301   common  sethae                          sys_sethae
   302   common  mount                           sys_mount
   303   common  old_adjtimex                    sys_old_adjtimex
@@@ -486,3 -486,5 +486,5 @@@
   554   common  landlock_create_ruleset         sys_landlock_create_ruleset
   555   common  landlock_add_rule               sys_landlock_add_rule
   556   common  landlock_restrict_self          sys_landlock_restrict_self
+ # 557 reserved for memfd_secret
+ 558   common  process_mrelease                sys_process_mrelease
diff --combined arch/arm/tools/syscall.tbl

index f8a2d5a,2f32eb8..7e0a9b6
--- 1/arch/arm/tools/syscall.tbl
--- 2/arch/arm/tools/syscall.tbl
+++ b/arch/arm/tools/syscall.tbl
@@@ -147,7 -147,7 +147,7 @@@
   131   common  quotactl                sys_quotactl
   132   common  getpgid                 sys_getpgid
   133   common  fchdir                  sys_fchdir
- -134   common  bdflush                 sys_bdflush
+ +134   common  bdflush                 sys_ni_syscall
   135   common  sysfs                   sys_sysfs
   136   common  personality             sys_personality
   # 137 was sys_afs_syscall
@@@ -460,3 -460,5 +460,5 @@@
   444   common  landlock_create_ruleset         sys_landlock_create_ruleset
   445   common  landlock_add_rule               sys_landlock_add_rule
   446   common  landlock_restrict_self          sys_landlock_restrict_self
+ # 447 reserved for memfd_secret
+ 448   common  process_mrelease                sys_process_mrelease
diff --combined arch/arm64/include/asm/unistd32.h

index 03d4ca4,0f49cdb..4e99e4b
--- 1/arch/arm64/include/asm/unistd32.h
--- 2/arch/arm64/include/asm/unistd32.h
+++ b/arch/arm64/include/asm/unistd32.h
@@@ -279,7 -279,7 +279,7 @@@ __SYSCALL(__NR_getpgid, sys_getpgid
   #define __NR_fchdir 133
   __SYSCALL(__NR_fchdir, sys_fchdir)
   #define __NR_bdflush 134
- -__SYSCALL(__NR_bdflush, sys_bdflush)
+ +__SYSCALL(__NR_bdflush, sys_ni_syscall)
   #define __NR_sysfs 135
   __SYSCALL(__NR_sysfs, sys_sysfs)
   #define __NR_personality 136
@@@ -901,6 -901,8 +901,8 @@@ __SYSCALL(__NR_landlock_create_ruleset
   __SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule)
   #define __NR_landlock_restrict_self 446
   __SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
+ #define __NR_process_mrelease 448
+ __SYSCALL(__NR_process_mrelease, sys_process_mrelease)
   
   /*
    * Please add new compat syscalls above this comment and update
diff --combined arch/arm64/mm/init.c

index edc8e95,bf5b8a5..b16be52
--- 1/arch/arm64/mm/init.c
--- 2/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@@ -74,6 -74,7 +74,7 @@@ phys_addr_t arm64_dma_phys_limit __ro_a
   static void __init reserve_crashkernel(void)
   {
         unsigned long long crash_base, crash_size;
+       unsigned long long crash_max = arm64_dma_phys_limit;
         int ret;
   
         ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(),
@@@ -84,33 -85,18 +85,18 @@@
   
         crash_size = PAGE_ALIGN(crash_size);
   
-       if (crash_base == 0) {
-               /* Current arm64 boot protocol requires 2MB alignment */
-               crash_base = memblock_find_in_range(0, arm64_dma_phys_limit,
-                               crash_size, SZ_2M);
-               if (crash_base == 0) {
-                       pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
-                               crash_size);
-                       return;
-               }
-       } else {
-               /* User specifies base address explicitly. */
-               if (!memblock_is_region_memory(crash_base, crash_size)) {
-                       pr_warn("cannot reserve crashkernel: region is not memory\n");
-                       return;
-               }
+       /* User specifies base address explicitly. */
+       if (crash_base)
+               crash_max = crash_base + crash_size;
   
-               if (memblock_is_region_reserved(crash_base, crash_size)) {
-                       pr_warn("cannot reserve crashkernel: region overlaps reserved memory\n");
-                       return;
-               }
- 
-               if (!IS_ALIGNED(crash_base, SZ_2M)) {
-                       pr_warn("cannot reserve crashkernel: base address is not 2MB aligned\n");
-                       return;
-               }
+       /* Current arm64 boot protocol requires 2MB alignment */
+       crash_base = memblock_phys_alloc_range(crash_size, SZ_2M,
+                                              crash_base, crash_max);
+       if (!crash_base) {
+               pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
+                       crash_size);
+               return;
         }
-       memblock_reserve(crash_base, crash_size);
   
         pr_info("crashkernel reserved: 0x%016llx - 0x%016llx (%lld MB)\n",
                 crash_base, crash_base + crash_size, crash_size >> 20);
@@@ -124,6 -110,57 +110,6 @@@ static void __init reserve_crashkernel(
   }
   #endif /* CONFIG_KEXEC_CORE */
   
- -#ifdef CONFIG_CRASH_DUMP
- -static int __init early_init_dt_scan_elfcorehdr(unsigned long node,
- -              const char *uname, int depth, void *data)
- -{
- -      const __be32 *reg;
- -      int len;
- -
- -      if (depth != 1 || strcmp(uname, "chosen") != 0)
- -              return 0;
- -
- -      reg = of_get_flat_dt_prop(node, "linux,elfcorehdr", &len);
- -      if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells)))
- -              return 1;
- -
- -      elfcorehdr_addr = dt_mem_next_cell(dt_root_addr_cells, &reg);
- -      elfcorehdr_size = dt_mem_next_cell(dt_root_size_cells, &reg);
- -
- -      return 1;
- -}
- -
- -/*
- - * reserve_elfcorehdr() - reserves memory for elf core header
- - *
- - * This function reserves the memory occupied by an elf core header
- - * described in the device tree. This region contains all the
- - * information about primary kernel's core image and is used by a dump
- - * capture kernel to access the system memory on primary kernel.
- - */
- -static void __init reserve_elfcorehdr(void)
- -{
- -      of_scan_flat_dt(early_init_dt_scan_elfcorehdr, NULL);
- -
- -      if (!elfcorehdr_size)
- -              return;
- -
- -      if (memblock_is_region_reserved(elfcorehdr_addr, elfcorehdr_size)) {
- -              pr_warn("elfcorehdr is overlapped\n");
- -              return;
- -      }
- -
- -      memblock_reserve(elfcorehdr_addr, elfcorehdr_size);
- -
- -      pr_info("Reserving %lldKB of memory at 0x%llx for elfcorehdr\n",
- -              elfcorehdr_size >> 10, elfcorehdr_addr);
- -}
- -#else
- -static void __init reserve_elfcorehdr(void)
- -{
- -}
- -#endif /* CONFIG_CRASH_DUMP */
- -
   /*
    * Return the maximum physical address for a zone accessible by the given bits
    * limit. If DRAM starts above 32-bit, expand the zone to the maximum
@@@ -234,10 -271,45 +220,10 @@@ static int __init early_mem(char *p
   }
   early_param("mem", early_mem);
   
- -static int __init early_init_dt_scan_usablemem(unsigned long node,
- -              const char *uname, int depth, void *data)
- -{
- -      struct memblock_region *usablemem = data;
- -      const __be32 *reg;
- -      int len;
- -
- -      if (depth != 1 || strcmp(uname, "chosen") != 0)
- -              return 0;
- -
- -      reg = of_get_flat_dt_prop(node, "linux,usable-memory-range", &len);
- -      if (!reg || (len < (dt_root_addr_cells + dt_root_size_cells)))
- -              return 1;
- -
- -      usablemem->base = dt_mem_next_cell(dt_root_addr_cells, &reg);
- -      usablemem->size = dt_mem_next_cell(dt_root_size_cells, &reg);
- -
- -      return 1;
- -}
- -
- -static void __init fdt_enforce_memory_region(void)
- -{
- -      struct memblock_region reg = {
- -              .size = 0,
- -      };
- -
- -      of_scan_flat_dt(early_init_dt_scan_usablemem, &reg);
- -
- -      if (reg.size)
- -              memblock_cap_memory_range(reg.base, reg.size);
- -}
- -
   void __init arm64_memblock_init(void)
   {
         const s64 linear_region_size = PAGE_END - _PAGE_OFFSET(vabits_actual);
   
- -      /* Handle linux,usable-memory-range property */
- -      fdt_enforce_memory_region();
- -
         /* Remove memory above our supported physical address size */
         memblock_remove(1ULL << PHYS_MASK_SHIFT, ULLONG_MAX);
   
@@@ -346,6 -418,8 +332,6 @@@
   
         early_init_fdt_scan_reserved_mem();
   
- -      reserve_elfcorehdr();
- -
         high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
   }
   
diff --combined arch/ia64/kernel/syscalls/syscall.tbl

index 4b20224,9bf45f2..6fea184
--- 1/arch/ia64/kernel/syscalls/syscall.tbl
--- 2/arch/ia64/kernel/syscalls/syscall.tbl
+++ b/arch/ia64/kernel/syscalls/syscall.tbl
@@@ -123,7 -123,7 +123,7 @@@
   # 1135 was get_kernel_syms
   # 1136 was query_module
   113   common  quotactl                        sys_quotactl
- -114   common  bdflush                         sys_bdflush
+ +114   common  bdflush                         sys_ni_syscall
   115   common  sysfs                           sys_sysfs
   116   common  personality                     sys_personality
   117   common  afs_syscall                     sys_ni_syscall
@@@ -367,3 -367,5 +367,5 @@@
   444   common  landlock_create_ruleset         sys_landlock_create_ruleset
   445   common  landlock_add_rule               sys_landlock_add_rule
   446   common  landlock_restrict_self          sys_landlock_restrict_self
+ # 447 reserved for memfd_secret
+ 448   common  process_mrelease                sys_process_mrelease
diff --combined arch/m68k/kernel/syscalls/syscall.tbl

index 3ec1291,f1f98ee..7976dff
--- 1/arch/m68k/kernel/syscalls/syscall.tbl
--- 2/arch/m68k/kernel/syscalls/syscall.tbl
+++ b/arch/m68k/kernel/syscalls/syscall.tbl
@@@ -141,7 -141,7 +141,7 @@@
   131   common  quotactl                        sys_quotactl
   132   common  getpgid                         sys_getpgid
   133   common  fchdir                          sys_fchdir
- -134   common  bdflush                         sys_bdflush
+ +134   common  bdflush                         sys_ni_syscall
   135   common  sysfs                           sys_sysfs
   136   common  personality                     sys_personality
   # 137 was afs_syscall
@@@ -446,3 -446,5 +446,5 @@@
   444   common  landlock_create_ruleset         sys_landlock_create_ruleset
   445   common  landlock_add_rule               sys_landlock_add_rule
   446   common  landlock_restrict_self          sys_landlock_restrict_self
+ # 447 reserved for memfd_secret
+ 448   common  process_mrelease                sys_process_mrelease
diff --combined arch/microblaze/kernel/syscalls/syscall.tbl

index 9be3ace,da49ddd..6b0e113
--- 1/arch/microblaze/kernel/syscalls/syscall.tbl
--- 2/arch/microblaze/kernel/syscalls/syscall.tbl
+++ b/arch/microblaze/kernel/syscalls/syscall.tbl
@@@ -141,7 -141,7 +141,7 @@@
   131   common  quotactl                        sys_quotactl
   132   common  getpgid                         sys_getpgid
   133   common  fchdir                          sys_fchdir
- -134   common  bdflush                         sys_bdflush
+ +134   common  bdflush                         sys_ni_syscall
   135   common  sysfs                           sys_sysfs
   136   common  personality                     sys_personality
   137   common  afs_syscall                     sys_ni_syscall
@@@ -452,3 -452,5 +452,5 @@@
   444   common  landlock_create_ruleset         sys_landlock_create_ruleset
   445   common  landlock_add_rule               sys_landlock_add_rule
   446   common  landlock_restrict_self          sys_landlock_restrict_self
+ # 447 reserved for memfd_secret
+ 448   common  process_mrelease                sys_process_mrelease
diff --combined arch/mips/kernel/syscalls/syscall_o32.tbl

index fae3588,fd3a9df..201237f
--- 1/arch/mips/kernel/syscalls/syscall_o32.tbl
--- 2/arch/mips/kernel/syscalls/syscall_o32.tbl
+++ b/arch/mips/kernel/syscalls/syscall_o32.tbl
@@@ -145,7 -145,7 +145,7 @@@
   131   o32     quotactl                        sys_quotactl
   132   o32     getpgid                         sys_getpgid
   133   o32     fchdir                          sys_fchdir
- -134   o32     bdflush                         sys_bdflush
+ +134   o32     bdflush                         sys_ni_syscall
   135   o32     sysfs                           sys_sysfs
   136   o32     personality                     sys_personality                 sys_32_personality
   137   o32     afs_syscall                     sys_ni_syscall
@@@ -434,3 -434,5 +434,5 @@@
   444   o32     landlock_create_ruleset         sys_landlock_create_ruleset
   445   o32     landlock_add_rule               sys_landlock_add_rule
   446   o32     landlock_restrict_self          sys_landlock_restrict_self
+ # 447 reserved for memfd_secret
+ 448   o32     process_mrelease                sys_process_mrelease
diff --combined arch/parisc/kernel/syscalls/syscall.tbl

index eaf0603,040df1b..0bf854b
--- 1/arch/parisc/kernel/syscalls/syscall.tbl
--- 2/arch/parisc/kernel/syscalls/syscall.tbl
+++ b/arch/parisc/kernel/syscalls/syscall.tbl
@@@ -147,7 -147,7 +147,7 @@@
   131   common  quotactl                sys_quotactl
   132   common  getpgid                 sys_getpgid
   133   common  fchdir                  sys_fchdir
- -134   common  bdflush                 sys_bdflush
+ +134   common  bdflush                 sys_ni_syscall
   135   common  sysfs                   sys_sysfs
   136   32      personality             parisc_personality
   136   64      personality             sys_personality
@@@ -444,3 -444,5 +444,5 @@@
   444   common  landlock_create_ruleset         sys_landlock_create_ruleset
   445   common  landlock_add_rule               sys_landlock_add_rule
   446   common  landlock_restrict_self          sys_landlock_restrict_self
+ # 447 reserved for memfd_secret
+ 448   common  process_mrelease                sys_process_mrelease
diff --combined arch/powerpc/kernel/syscalls/syscall.tbl

index 6f3953f,d8ebd7d..29b55e2
--- 1/arch/powerpc/kernel/syscalls/syscall.tbl
--- 2/arch/powerpc/kernel/syscalls/syscall.tbl
+++ b/arch/powerpc/kernel/syscalls/syscall.tbl
@@@ -176,7 -176,7 +176,7 @@@
   131   nospu   quotactl                        sys_quotactl
   132   common  getpgid                         sys_getpgid
   133   common  fchdir                          sys_fchdir
- -134   common  bdflush                         sys_bdflush
+ +134   common  bdflush                         sys_ni_syscall
   135   common  sysfs                           sys_sysfs
   136   32      personality                     sys_personality                 ppc64_personality
   136   64      personality                     ppc64_personality
@@@ -526,3 -526,5 +526,5 @@@
   444   common  landlock_create_ruleset         sys_landlock_create_ruleset
   445   common  landlock_add_rule               sys_landlock_add_rule
   446   common  landlock_restrict_self          sys_landlock_restrict_self
+ # 447 reserved for memfd_secret
+ 448   common  process_mrelease                sys_process_mrelease
diff --combined arch/riscv/mm/init.c

index 93720b0,e6cac49..fc818c8
--- 1/arch/riscv/mm/init.c
--- 2/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@@ -819,38 -819,22 +819,22 @@@ static void __init reserve_crashkernel(
   
         crash_size = PAGE_ALIGN(crash_size);
   
-       if (crash_base == 0) {
-               /*
-                * Current riscv boot protocol requires 2MB alignment for
-                * RV64 and 4MB alignment for RV32 (hugepage size)
-                */
-               crash_base = memblock_find_in_range(search_start, search_end,
-                                                   crash_size, PMD_SIZE);
- 
-               if (crash_base == 0) {
-                       pr_warn("crashkernel: couldn't allocate %lldKB\n",
-                               crash_size >> 10);
-                       return;
-               }
-       } else {
-               /* User specifies base address explicitly. */
-               if (!memblock_is_region_memory(crash_base, crash_size)) {
-                       pr_warn("crashkernel: requested region is not memory\n");
-                       return;
-               }
- 
-               if (memblock_is_region_reserved(crash_base, crash_size)) {
-                       pr_warn("crashkernel: requested region is reserved\n");
-                       return;
-               }
- 
+       if (crash_base) {
+               search_start = crash_base;
+               search_end = crash_base + crash_size;
+       }
   
-               if (!IS_ALIGNED(crash_base, PMD_SIZE)) {
-                       pr_warn("crashkernel: requested region is misaligned\n");
-                       return;
-               }
+       /*
+        * Current riscv boot protocol requires 2MB alignment for
+        * RV64 and 4MB alignment for RV32 (hugepage size)
+        */
+       crash_base = memblock_phys_alloc_range(crash_size, PMD_SIZE,
+                                              search_start, search_end);
+       if (crash_base == 0) {
+               pr_warn("crashkernel: couldn't allocate %lldKB\n",
+                       crash_size >> 10);
+               return;
         }
-       memblock_reserve(crash_base, crash_size);
   
         pr_info("crashkernel: reserved 0x%016llx - 0x%016llx (%lld MB)\n",
                 crash_base, crash_base + crash_size, crash_size >> 20);
@@@ -860,6 -844,26 +844,6 @@@
   }
   #endif /* CONFIG_KEXEC_CORE */
   
- -#ifdef CONFIG_CRASH_DUMP
- -/*
- - * We keep track of the ELF core header of the crashed
- - * kernel with a reserved-memory region with compatible
- - * string "linux,elfcorehdr". Here we register a callback
- - * to populate elfcorehdr_addr/size when this region is
- - * present. Note that this region will be marked as
- - * reserved once we call early_init_fdt_scan_reserved_mem()
- - * later on.
- - */
- -static int __init elfcore_hdr_setup(struct reserved_mem *rmem)
- -{
- -      elfcorehdr_addr = rmem->base;
- -      elfcorehdr_size = rmem->size;
- -      return 0;
- -}
- -
- -RESERVEDMEM_OF_DECLARE(elfcorehdr, "linux,elfcorehdr", elfcore_hdr_setup);
- -#endif
- -
   void __init paging_init(void)
   {
         setup_bootmem();
diff --combined arch/s390/kernel/setup.c

index fe14beb,0bab57d..5a01872
--- 1/arch/s390/kernel/setup.c
--- 2/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@@ -89,71 -89,27 +89,71 @@@ EXPORT_SYMBOL(console_devno)
   unsigned int console_irq = -1;
   EXPORT_SYMBOL(console_irq);
   
- -unsigned long elf_hwcap __read_mostly = 0;
- -char elf_platform[ELF_PLATFORM_SIZE];
+ +/*
+ + * Some code and data needs to stay below 2 GB, even when the kernel would be
+ + * relocated above 2 GB, because it has to use 31 bit addresses.
+ + * Such code and data is part of the .amode31 section.
+ + */
+ +unsigned long __amode31_ref __samode31 = __pa(&_samode31);
+ +unsigned long __amode31_ref __eamode31 = __pa(&_eamode31);
+ +unsigned long __amode31_ref __stext_amode31 = __pa(&_stext_amode31);
+ +unsigned long __amode31_ref __etext_amode31 = __pa(&_etext_amode31);
+ +struct exception_table_entry __amode31_ref *__start_amode31_ex_table = _start_amode31_ex_table;
+ +struct exception_table_entry __amode31_ref *__stop_amode31_ex_table = _stop_amode31_ex_table;
+ +
+ +/*
+ + * Control registers CR2, CR5 and CR15 are initialized with addresses
+ + * of tables that must be placed below 2G which is handled by the AMODE31
+ + * sections.
+ + * Because the AMODE31 sections are relocated below 2G at startup,
+ + * the content of control registers CR2, CR5 and CR15 must be updated
+ + * with new addresses after the relocation. The initial initialization of
+ + * control registers occurs in head64.S and then gets updated again after AMODE31
+ + * relocation. We must access the relevant AMODE31 tables indirectly via
+ + * pointers placed in the .amode31.refs linker section. Those pointers get
+ + * updated automatically during AMODE31 relocation and always contain a valid
+ + * address within AMODE31 sections.
+ + */
+ +
+ +static __amode31_data u32 __ctl_duct_amode31[16] __aligned(64);
+ +
+ +static __amode31_data u64 __ctl_aste_amode31[8] __aligned(64) = {
+ +      [1] = 0xffffffffffffffff
+ +};
+ +
+ +static __amode31_data u32 __ctl_duald_amode31[32] __aligned(128) = {
+ +      0x80000000, 0, 0, 0,
+ +      0x80000000, 0, 0, 0,
+ +      0x80000000, 0, 0, 0,
+ +      0x80000000, 0, 0, 0,
+ +      0x80000000, 0, 0, 0,
+ +      0x80000000, 0, 0, 0,
+ +      0x80000000, 0, 0, 0,
+ +      0x80000000, 0, 0, 0
+ +};
+ +
+ +static __amode31_data u32 __ctl_linkage_stack_amode31[8] __aligned(64) = {
+ +      0, 0, 0x89000000, 0,
+ +      0, 0, 0x8a000000, 0
+ +};
   
- -unsigned long int_hwcap = 0;
+ +static u64 __amode31_ref *__ctl_aste = __ctl_aste_amode31;
+ +static u32 __amode31_ref *__ctl_duald = __ctl_duald_amode31;
+ +static u32 __amode31_ref *__ctl_linkage_stack = __ctl_linkage_stack_amode31;
+ +static u32 __amode31_ref *__ctl_duct = __ctl_duct_amode31;
   
   int __bootdata(noexec_disabled);
   unsigned long __bootdata(ident_map_size);
   struct mem_detect_info __bootdata(mem_detect);
+ +struct initrd_data __bootdata(initrd_data);
   
- -struct exception_table_entry *__bootdata_preserved(__start_dma_ex_table);
- -struct exception_table_entry *__bootdata_preserved(__stop_dma_ex_table);
- -unsigned long __bootdata_preserved(__stext_dma);
- -unsigned long __bootdata_preserved(__etext_dma);
- -unsigned long __bootdata_preserved(__sdma);
- -unsigned long __bootdata_preserved(__edma);
   unsigned long __bootdata_preserved(__kaslr_offset);
   unsigned int __bootdata_preserved(zlib_dfltcc_support);
   EXPORT_SYMBOL(zlib_dfltcc_support);
   u64 __bootdata_preserved(stfle_fac_list[16]);
   EXPORT_SYMBOL(stfle_fac_list);
   u64 __bootdata_preserved(alt_stfle_fac_list[16]);
+ +struct oldmem_data __bootdata_preserved(oldmem_data);
   
   unsigned long VMALLOC_START;
   EXPORT_SYMBOL(VMALLOC_START);
@@@ -298,7 -254,7 +298,7 @@@ static void __init setup_zfcpdump(void
   {
         if (!is_ipl_type_dump())
                 return;
- -      if (OLDMEM_BASE)
+ +      if (oldmem_data.start)
                 return;
         strcat(boot_command_line, " cio_ignore=all,!ipldev,!condev");
         console_loglevel = 2;
@@@ -465,7 -421,7 +465,7 @@@ static void __init setup_lowcore_dat_of
         lc->restart_stack = (unsigned long) restart_stack;
         lc->restart_fn = (unsigned long) do_restart;
         lc->restart_data = 0;
- -      lc->restart_source = -1UL;
+ +      lc->restart_source = -1U;
   
         mcck_stack = (unsigned long)memblock_alloc(THREAD_SIZE, THREAD_SIZE);
         if (!mcck_stack)
@@@ -494,19 -450,12 +494,19 @@@
   
   static void __init setup_lowcore_dat_on(void)
   {
+ +      struct lowcore *lc = lowcore_ptr[0];
+ +
         __ctl_clear_bit(0, 28);
         S390_lowcore.external_new_psw.mask |= PSW_MASK_DAT;
         S390_lowcore.svc_new_psw.mask |= PSW_MASK_DAT;
         S390_lowcore.program_new_psw.mask |= PSW_MASK_DAT;
         S390_lowcore.io_new_psw.mask |= PSW_MASK_DAT;
+ +      __ctl_store(S390_lowcore.cregs_save_area, 0, 15);
         __ctl_set_bit(0, 28);
+ +      mem_assign_absolute(S390_lowcore.restart_flags, RESTART_FLAG_CTLREGS);
+ +      mem_assign_absolute(S390_lowcore.program_new_psw, lc->program_new_psw);
+ +      memcpy_absolute(&S390_lowcore.cregs_save_area, lc->cregs_save_area,
+ +                      sizeof(S390_lowcore.cregs_save_area));
   }
   
   static struct resource code_resource = {
@@@ -661,9 -610,9 +661,9 @@@ static void __init reserve_crashkernel(
                 return;
         }
   
- -      low = crash_base ?: OLDMEM_BASE;
+ +      low = crash_base ?: oldmem_data.start;
         high = low + crash_size;
- -      if (low >= OLDMEM_BASE && high <= OLDMEM_BASE + OLDMEM_SIZE) {
+ +      if (low >= oldmem_data.start && high <= oldmem_data.start + oldmem_data.size) {
                 /* The crashkernel fits into OLDMEM, reuse OLDMEM */
                 crash_base = low;
         } else {
@@@ -677,8 -626,9 +677,9 @@@
                         return;
                 }
                 low = crash_base ?: low;
-               crash_base = memblock_find_in_range(low, high, crash_size,
-                                                   KEXEC_CRASH_MEM_ALIGN);
+               crash_base = memblock_phys_alloc_range(crash_size,
+                                                      KEXEC_CRASH_MEM_ALIGN,
+                                                      low, high);
         }
   
         if (!crash_base) {
@@@ -687,10 -637,12 +688,12 @@@
                 return;
         }
   
-       if (register_memory_notifier(&kdump_mem_nb))
+       if (register_memory_notifier(&kdump_mem_nb)) {
+               memblock_free(crash_base, crash_size);
                 return;
+       }
   
- -      if (!OLDMEM_BASE && MACHINE_IS_VM)
+ +      if (!oldmem_data.start && MACHINE_IS_VM)
                 diag10_range(PFN_DOWN(crash_base), PFN_DOWN(crash_size));
         crashk_res.start = crash_base;
         crashk_res.end = crash_base + crash_size - 1;
@@@ -709,11 -661,11 +712,11 @@@
   static void __init reserve_initrd(void)
   {
   #ifdef CONFIG_BLK_DEV_INITRD
- -      if (!INITRD_START || !INITRD_SIZE)
+ +      if (!initrd_data.start || !initrd_data.size)
                 return;
- -      initrd_start = INITRD_START;
- -      initrd_end = initrd_start + INITRD_SIZE;
- -      memblock_reserve(INITRD_START, INITRD_SIZE);
+ +      initrd_start = initrd_data.start;
+ +      initrd_end = initrd_start + initrd_data.size;
+ +      memblock_reserve(initrd_data.start, initrd_data.size);
   #endif
   }
   
@@@ -783,10 -735,10 +786,10 @@@ static void __init memblock_add_mem_det
   static void __init check_initrd(void)
   {
   #ifdef CONFIG_BLK_DEV_INITRD
- -      if (INITRD_START && INITRD_SIZE &&
- -          !memblock_is_region_memory(INITRD_START, INITRD_SIZE)) {
+ +      if (initrd_data.start && initrd_data.size &&
+ +          !memblock_is_region_memory(initrd_data.start, initrd_data.size)) {
                 pr_err("The initial RAM disk does not fit into the memory\n");
- -              memblock_free(INITRD_START, INITRD_SIZE);
+ +              memblock_free(initrd_data.start, initrd_data.size);
                 initrd_start = initrd_end = 0;
         }
   #endif
@@@ -799,10 -751,10 +802,10 @@@ static void __init reserve_kernel(void
   {
         unsigned long start_pfn = PFN_UP(__pa(_end));
   
- -      memblock_reserve(0, HEAD_END);
+ +      memblock_reserve(0, STARTUP_NORMAL_OFFSET);
+ +      memblock_reserve((unsigned long)sclp_early_sccb, EXT_SCCB_READ_SCP);
         memblock_reserve((unsigned long)_stext, PFN_PHYS(start_pfn)
                          - (unsigned long)_stext);
- -      memblock_reserve(__sdma, __edma - __sdma);
   }
   
   static void __init setup_memory(void)
@@@ -822,52 -774,152 +825,52 @@@
         memblock_enforce_memory_limit(memblock_end_of_DRAM());
   }
   
- -/*
- - * Setup hardware capabilities.
- - */
- -static int __init setup_hwcaps(void)
+ +static void __init relocate_amode31_section(void)
   {
- -      static const int stfl_bits[6] = { 0, 2, 7, 17, 19, 21 };
- -      struct cpuid cpu_id;
- -      int i;
- -
- -      /*
- -       * The store facility list bits numbers as found in the principles
- -       * of operation are numbered with bit 1UL<<31 as number 0 to
- -       * bit 1UL<<0 as number 31.
- -       *   Bit 0: instructions named N3, "backported" to esa-mode
- -       *   Bit 2: z/Architecture mode is active
- -       *   Bit 7: the store-facility-list-extended facility is installed
- -       *   Bit 17: the message-security assist is installed
- -       *   Bit 19: the long-displacement facility is installed
- -       *   Bit 21: the extended-immediate facility is installed
- -       *   Bit 22: extended-translation facility 3 is installed
- -       *   Bit 30: extended-translation facility 3 enhancement facility
- -       * These get translated to:
- -       *   HWCAP_S390_ESAN3 bit 0, HWCAP_S390_ZARCH bit 1,
- -       *   HWCAP_S390_STFLE bit 2, HWCAP_S390_MSA bit 3,
- -       *   HWCAP_S390_LDISP bit 4, HWCAP_S390_EIMM bit 5 and
- -       *   HWCAP_S390_ETF3EH bit 8 (22 && 30).
- -       */
- -      for (i = 0; i < 6; i++)
- -              if (test_facility(stfl_bits[i]))
- -                      elf_hwcap |= 1UL << i;
- -
- -      if (test_facility(22) && test_facility(30))
- -              elf_hwcap |= HWCAP_S390_ETF3EH;
- -
- -      /*
- -       * Check for additional facilities with store-facility-list-extended.
- -       * stfle stores doublewords (8 byte) with bit 1ULL<<63 as bit 0
- -       * and 1ULL<<0 as bit 63. Bits 0-31 contain the same information
- -       * as stored by stfl, bits 32-xxx contain additional facilities.
- -       * How many facility words are stored depends on the number of
- -       * doublewords passed to the instruction. The additional facilities
- -       * are:
- -       *   Bit 42: decimal floating point facility is installed
- -       *   Bit 44: perform floating point operation facility is installed
- -       * translated to:
- -       *   HWCAP_S390_DFP bit 6 (42 && 44).
- -       */
- -      if ((elf_hwcap & (1UL << 2)) && test_facility(42) && test_facility(44))
- -              elf_hwcap |= HWCAP_S390_DFP;
- -
- -      /*
- -       * Huge page support HWCAP_S390_HPAGE is bit 7.
- -       */
- -      if (MACHINE_HAS_EDAT1)
- -              elf_hwcap |= HWCAP_S390_HPAGE;
- -
- -      /*
- -       * 64-bit register support for 31-bit processes
- -       * HWCAP_S390_HIGH_GPRS is bit 9.
- -       */
- -      elf_hwcap |= HWCAP_S390_HIGH_GPRS;
- -
- -      /*
- -       * Transactional execution support HWCAP_S390_TE is bit 10.
- -       */
- -      if (MACHINE_HAS_TE)
- -              elf_hwcap |= HWCAP_S390_TE;
- -
- -      /*
- -       * Vector extension HWCAP_S390_VXRS is bit 11. The Vector extension
- -       * can be disabled with the "novx" parameter. Use MACHINE_HAS_VX
- -       * instead of facility bit 129.
- -       */
- -      if (MACHINE_HAS_VX) {
- -              elf_hwcap |= HWCAP_S390_VXRS;
- -              if (test_facility(134))
- -                      elf_hwcap |= HWCAP_S390_VXRS_BCD;
- -              if (test_facility(135))
- -                      elf_hwcap |= HWCAP_S390_VXRS_EXT;
- -              if (test_facility(148))
- -                      elf_hwcap |= HWCAP_S390_VXRS_EXT2;
- -              if (test_facility(152))
- -                      elf_hwcap |= HWCAP_S390_VXRS_PDE;
- -      }
- -      if (test_facility(150))
- -              elf_hwcap |= HWCAP_S390_SORT;
- -      if (test_facility(151))
- -              elf_hwcap |= HWCAP_S390_DFLT;
- -
- -      /*
- -       * Guarded storage support HWCAP_S390_GS is bit 12.
- -       */
- -      if (MACHINE_HAS_GS)
- -              elf_hwcap |= HWCAP_S390_GS;
- -
- -      get_cpu_id(&cpu_id);
- -      add_device_randomness(&cpu_id, sizeof(cpu_id));
- -      switch (cpu_id.machine) {
- -      case 0x2064:
- -      case 0x2066:
- -      default:        /* Use "z900" as default for 64 bit kernels. */
- -              strcpy(elf_platform, "z900");
- -              break;
- -      case 0x2084:
- -      case 0x2086:
- -              strcpy(elf_platform, "z990");
- -              break;
- -      case 0x2094:
- -      case 0x2096:
- -              strcpy(elf_platform, "z9-109");
- -              break;
- -      case 0x2097:
- -      case 0x2098:
- -              strcpy(elf_platform, "z10");
- -              break;
- -      case 0x2817:
- -      case 0x2818:
- -              strcpy(elf_platform, "z196");
- -              break;
- -      case 0x2827:
- -      case 0x2828:
- -              strcpy(elf_platform, "zEC12");
- -              break;
- -      case 0x2964:
- -      case 0x2965:
- -              strcpy(elf_platform, "z13");
- -              break;
- -      case 0x3906:
- -      case 0x3907:
- -              strcpy(elf_platform, "z14");
- -              break;
- -      case 0x8561:
- -      case 0x8562:
- -              strcpy(elf_platform, "z15");
- -              break;
- -      }
- -
- -      /*
- -       * Virtualization support HWCAP_INT_SIE is bit 0.
- -       */
- -      if (sclp.has_sief2)
- -              int_hwcap |= HWCAP_INT_SIE;
+ +      unsigned long amode31_addr, amode31_size;
+ +      long amode31_offset;
+ +      long *ptr;
+ +
+ +      /* Allocate a new AMODE31 capable memory region */
+ +      amode31_size = __eamode31 - __samode31;
+ +      pr_info("Relocating AMODE31 section of size 0x%08lx\n", amode31_size);
+ +      amode31_addr = (unsigned long)memblock_alloc_low(amode31_size, PAGE_SIZE);
+ +      if (!amode31_addr)
+ +              panic("Failed to allocate memory for AMODE31 section\n");
+ +      amode31_offset = amode31_addr - __samode31;
+ +
+ +      /* Move original AMODE31 section to the new one */
+ +      memmove((void *)amode31_addr, (void *)__samode31, amode31_size);
+ +      /* Zero out the old AMODE31 section to catch invalid accesses within it */
+ +      memset((void *)__samode31, 0, amode31_size);
+ +
+ +      /* Update all AMODE31 region references */
+ +      for (ptr = _start_amode31_refs; ptr != _end_amode31_refs; ptr++)
+ +              *ptr += amode31_offset;
+ +}
   
- -      return 0;
+ +/* This must be called after AMODE31 relocation */
+ +static void __init setup_cr(void)
+ +{
+ +      union ctlreg2 cr2;
+ +      union ctlreg5 cr5;
+ +      union ctlreg15 cr15;
+ +
+ +      __ctl_duct[1] = (unsigned long)__ctl_aste;
+ +      __ctl_duct[2] = (unsigned long)__ctl_aste;
+ +      __ctl_duct[4] = (unsigned long)__ctl_duald;
+ +
+ +      /* Update control registers CR2, CR5 and CR15 */
+ +      __ctl_store(cr2.val, 2, 2);
+ +      __ctl_store(cr5.val, 5, 5);
+ +      __ctl_store(cr15.val, 15, 15);
+ +      cr2.ducto = (unsigned long)__ctl_duct >> 6;
+ +      cr5.pasteo = (unsigned long)__ctl_duct >> 6;
+ +      cr15.lsea = (unsigned long)__ctl_linkage_stack >> 3;
+ +      __ctl_load(cr2.val, 2, 2);
+ +      __ctl_load(cr5.val, 5, 5);
+ +      __ctl_load(cr15.val, 15, 15);
   }
- -arch_initcall(setup_hwcaps);
   
   /*
    * Add system information as device randomness
@@@ -1010,9 -1062,6 +1013,9 @@@ void __init setup_arch(char **cmdline_p
   
         free_mem_detect_info();
   
+ +      relocate_amode31_section();
+ +      setup_cr();
+ +
         setup_uv();
         setup_memory_end();
         setup_memory();
diff --combined arch/s390/kernel/syscalls/syscall.tbl

index aa705e1,57233ac..aa9d68b
--- 1/arch/s390/kernel/syscalls/syscall.tbl
--- 2/arch/s390/kernel/syscalls/syscall.tbl
+++ b/arch/s390/kernel/syscalls/syscall.tbl
@@@ -122,7 -122,7 +122,7 @@@
   131  common   quotactl                sys_quotactl                    sys_quotactl
   132  common   getpgid                 sys_getpgid                     sys_getpgid
   133  common   fchdir                  sys_fchdir                      sys_fchdir
- -134  common   bdflush                 sys_bdflush                     sys_bdflush
+ +134  common   bdflush                 sys_ni_syscall                  sys_ni_syscall
   135  common   sysfs                   sys_sysfs                       sys_sysfs
   136  common   personality             sys_s390_personality            sys_s390_personality
   137  common   afs_syscall             -                               -
@@@ -449,3 -449,5 +449,5 @@@
   444  common   landlock_create_ruleset sys_landlock_create_ruleset     sys_landlock_create_ruleset
   445  common   landlock_add_rule       sys_landlock_add_rule           sys_landlock_add_rule
   446  common   landlock_restrict_self  sys_landlock_restrict_self      sys_landlock_restrict_self
+ # 447 reserved for memfd_secret
+ 448  common   process_mrelease        sys_process_mrelease            sys_process_mrelease
diff --combined arch/s390/mm/fault.c

index 212632d,81d7607..a834e46
--- 1/arch/s390/mm/fault.c
--- 2/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@@ -31,7 -31,6 +31,7 @@@
   #include <linux/kprobes.h>
   #include <linux/uaccess.h>
   #include <linux/hugetlb.h>
+ +#include <linux/kfence.h>
   #include <asm/asm-offsets.h>
   #include <asm/diag.h>
   #include <asm/gmap.h>
@@@ -231,8 -230,8 +231,8 @@@ const struct exception_table_entry *s39
   {
         const struct exception_table_entry *fixup;
   
- -      fixup = search_extable(__start_dma_ex_table,
- -                             __stop_dma_ex_table - __start_dma_ex_table,
+ +      fixup = search_extable(__start_amode31_ex_table,
+ +                             __stop_amode31_ex_table - __start_amode31_ex_table,
                                addr);
         if (!fixup)
                 fixup = search_exception_tables(addr);
@@@ -357,7 -356,6 +357,7 @@@ static inline vm_fault_t do_exception(s
         unsigned long address;
         unsigned int flags;
         vm_fault_t fault;
+ +      bool is_write;
   
         tsk = current;
         /*
@@@ -371,8 -369,6 +371,8 @@@
   
         mm = tsk->mm;
         trans_exc_code = regs->int_parm_long;
+ +      address = trans_exc_code & __FAIL_ADDR_MASK;
+ +      is_write = (trans_exc_code & store_indication) == 0x400;
   
         /*
          * Verify that the fault happened in user space, that
@@@ -383,8 -379,6 +383,8 @@@
         type = get_fault_type(regs);
         switch (type) {
         case KERNEL_FAULT:
+ +              if (kfence_handle_page_fault(address, is_write, regs))
+ +                      return 0;
                 goto out;
         case USER_FAULT:
         case GMAP_FAULT:
@@@ -393,11 -387,12 +393,11 @@@
                 break;
         }
   
- -      address = trans_exc_code & __FAIL_ADDR_MASK;
         perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
         flags = FAULT_FLAG_DEFAULT;
         if (user_mode(regs))
                 flags |= FAULT_FLAG_USER;
- -      if (access == VM_WRITE || (trans_exc_code & store_indication) == 0x400)
+ +      if (access == VM_WRITE || is_write)
                 flags |= FAULT_FLAG_WRITE;
         mmap_read_lock(mm);
   
@@@ -822,7 -817,7 +822,7 @@@ void do_secure_storage_access(struct pt
                 break;
         case KERNEL_FAULT:
                 page = phys_to_page(addr);
-               if (unlikely(!try_get_page(page)))
+               if (unlikely(!try_get_compound_head(page, 1)))
                         break;
                 rc = arch_make_page_accessible(page);
                 put_page(page);
diff --combined arch/sh/kernel/syscalls/syscall.tbl

index 7bbd670,2f6e95e..208f131
--- 1/arch/sh/kernel/syscalls/syscall.tbl
--- 2/arch/sh/kernel/syscalls/syscall.tbl
+++ b/arch/sh/kernel/syscalls/syscall.tbl
@@@ -141,7 -141,7 +141,7 @@@
   131   common  quotactl                        sys_quotactl
   132   common  getpgid                         sys_getpgid
   133   common  fchdir                          sys_fchdir
- -134   common  bdflush                         sys_bdflush
+ +134   common  bdflush                         sys_ni_syscall
   135   common  sysfs                           sys_sysfs
   136   common  personality                     sys_personality
   # 137 was afs_syscall
@@@ -449,3 -449,5 +449,5 @@@
   444   common  landlock_create_ruleset         sys_landlock_create_ruleset
   445   common  landlock_add_rule               sys_landlock_add_rule
   446   common  landlock_restrict_self          sys_landlock_restrict_self
+ # 447 reserved for memfd_secret
+ 448   common  process_mrelease                sys_process_mrelease
diff --combined arch/sparc/kernel/syscalls/syscall.tbl

index f520e9c,42fc290..7893104
--- 1/arch/sparc/kernel/syscalls/syscall.tbl
--- 2/arch/sparc/kernel/syscalls/syscall.tbl
+++ b/arch/sparc/kernel/syscalls/syscall.tbl
@@@ -270,7 -270,7 +270,7 @@@
   222   common  delete_module           sys_delete_module
   223   common  get_kernel_syms         sys_ni_syscall
   224   common  getpgid                 sys_getpgid
- -225   common  bdflush                 sys_bdflush
+ +225   common  bdflush                 sys_ni_syscall
   226   common  sysfs                   sys_sysfs
   227   common  afs_syscall             sys_nis_syscall
   228   common  setfsuid                sys_setfsuid16
@@@ -492,3 -492,5 +492,5 @@@
   444   common  landlock_create_ruleset         sys_landlock_create_ruleset
   445   common  landlock_add_rule               sys_landlock_add_rule
   446   common  landlock_restrict_self          sys_landlock_restrict_self
+ # 447 reserved for memfd_secret
+ 448   common  process_mrelease                sys_process_mrelease
diff --combined arch/x86/entry/syscalls/syscall_32.tbl

index a5beae6,661a03b..61f18b7
--- 1/arch/x86/entry/syscalls/syscall_32.tbl
--- 2/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@@ -145,7 -145,7 +145,7 @@@
   131   i386    quotactl                sys_quotactl
   132   i386    getpgid                 sys_getpgid
   133   i386    fchdir                  sys_fchdir
- -134   i386    bdflush                 sys_bdflush
+ +134   i386    bdflush                 sys_ni_syscall
   135   i386    sysfs                   sys_sysfs
   136   i386    personality             sys_personality
   137   i386    afs_syscall
@@@ -452,3 -452,4 +452,4 @@@
   445   i386    landlock_add_rule       sys_landlock_add_rule
   446   i386    landlock_restrict_self  sys_landlock_restrict_self
   447   i386    memfd_secret            sys_memfd_secret
+ 448   i386    process_mrelease        sys_process_mrelease
diff --combined arch/xtensa/kernel/syscalls/syscall.tbl

index b3d1bc8,f438495..104b327
--- 1/arch/xtensa/kernel/syscalls/syscall.tbl
--- 2/arch/xtensa/kernel/syscalls/syscall.tbl
+++ b/arch/xtensa/kernel/syscalls/syscall.tbl
@@@ -223,7 -223,7 +223,7 @@@
   # 205 was old nfsservctl
   205   common  nfsservctl                      sys_ni_syscall
   206   common  _sysctl                         sys_ni_syscall
- -207   common  bdflush                         sys_bdflush
+ +207   common  bdflush                         sys_ni_syscall
   208   common  uname                           sys_newuname
   209   common  sysinfo                         sys_sysinfo
   210   common  init_module                     sys_init_module
@@@ -417,3 -417,5 +417,5 @@@
   444   common  landlock_create_ruleset         sys_landlock_create_ruleset
   445   common  landlock_add_rule               sys_landlock_add_rule
   446   common  landlock_restrict_self          sys_landlock_restrict_self
+ # 447 reserved for memfd_secret
+ 448   common  process_mrelease                sys_process_mrelease
diff --combined block/blk-map.c

index d1448aa,4639bc6..4526add
--- 1/block/blk-map.c
--- 2/block/blk-map.c
+++ b/block/blk-map.c
@@@ -309,7 -309,7 +309,7 @@@ static int bio_map_user_iov(struct requ
   
   static void bio_invalidate_vmalloc_pages(struct bio *bio)
   {
- #ifdef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
+ #ifdef ARCH_IMPLEMENTS_FLUSH_KERNEL_VMAP_RANGE
         if (bio->bi_private && !op_is_write(bio_op(bio))) {
                 unsigned long i, len = 0;
   
@@@ -400,7 -400,7 +400,7 @@@ static void bio_copy_kern_endio_read(st
         struct bvec_iter_all iter_all;
   
         bio_for_each_segment_all(bvec, bio, iter_all) {
- -              memcpy(p, page_address(bvec->bv_page), bvec->bv_len);
+ +              memcpy_from_bvec(p, bvec);
                 p += bvec->bv_len;
         }
   
diff --combined drivers/mmc/host/mmc_spi.c

index a1bcde3,3d28a3d..f4c8e1a
--- 1/drivers/mmc/host/mmc_spi.c
--- 2/drivers/mmc/host/mmc_spi.c
+++ b/drivers/mmc/host/mmc_spi.c
@@@ -180,7 -180,7 +180,7 @@@ static int mmc_spi_skip(struct mmc_spi_
         u8 *cp = host->data->status;
         unsigned long start = jiffies;
   
- -      while (1) {
+ +      do {
                 int             status;
                 unsigned        i;
   
@@@ -193,9 -193,16 +193,9 @@@
                                 return cp[i];
                 }
   
- -              if (time_is_before_jiffies(start + timeout))
- -                      break;
- -
- -              /* If we need long timeouts, we may release the CPU.
- -               * We use jiffies here because we want to have a relation
- -               * between elapsed time and the blocking of the scheduler.
- -               */
- -              if (time_is_before_jiffies(start + 1))
- -                      schedule();
- -      }
+ +              /* If we need long timeouts, we may release the CPU */
+ +              cond_resched();
+ +      } while (time_is_after_jiffies(start + timeout));
         return -ETIMEDOUT;
   }
   
@@@ -941,7 -948,7 +941,7 @@@ mmc_spi_data_do(struct mmc_spi_host *ho
   
                 /* discard mappings */
                 if (direction == DMA_FROM_DEVICE)
-                       flush_kernel_dcache_page(sg_page(sg));
+                       flush_dcache_page(sg_page(sg));
                 kunmap(sg_page(sg));
                 if (dma_dev)
                         dma_unmap_page(dma_dev, dma_addr, PAGE_SIZE, dir);
diff --combined fs/exec.c

index 3b78b22,fd292e9..2dc489c
--- 1/fs/exec.c
--- 2/fs/exec.c
+++ b/fs/exec.c
@@@ -217,8 -217,10 +217,10 @@@ static struct page *get_arg_page(struc
          * We are doing an exec().  'current' is the process
          * doing the exec and bprm->mm is the new process's mm.
          */
+       mmap_read_lock(bprm->mm);
         ret = get_user_pages_remote(bprm->mm, pos, 1, gup_flags,
                         &page, NULL, NULL);
+       mmap_read_unlock(bprm->mm);
         if (ret <= 0)
                 return NULL;
   
@@@ -574,7 -576,7 +576,7 @@@ static int copy_strings(int argc, struc
                                 }
   
                                 if (kmapped_page) {
-                                       flush_kernel_dcache_page(kmapped_page);
+                                       flush_dcache_page(kmapped_page);
                                         kunmap(kmapped_page);
                                         put_arg_page(kmapped_page);
                                 }
@@@ -592,7 -594,7 +594,7 @@@
         ret = 0;
   out:
         if (kmapped_page) {
-               flush_kernel_dcache_page(kmapped_page);
+               flush_dcache_page(kmapped_page);
                 kunmap(kmapped_page);
                 put_arg_page(kmapped_page);
         }
@@@ -634,7 -636,7 +636,7 @@@ int copy_string_kernel(const char *arg
                 kaddr = kmap_atomic(page);
                 flush_arg_page(bprm, pos & PAGE_MASK, page);
                 memcpy(kaddr + offset_in_page(pos), arg, bytes_to_copy);
-               flush_kernel_dcache_page(page);
+               flush_dcache_page(page);
                 kunmap_atomic(kaddr);
                 put_arg_page(page);
         }
@@@ -2070,8 -2072,10 +2072,8 @@@ SYSCALL_DEFINE5(execveat
                 const char __user *const __user *, envp,
                 int, flags)
   {
- -      int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
- -
         return do_execveat(fd,
- -                         getname_flags(filename, lookup_flags, NULL),
+ +                         getname_uflags(filename, flags),
                            argv, envp, flags);
   }
   
@@@ -2089,8 -2093,10 +2091,8 @@@ COMPAT_SYSCALL_DEFINE5(execveat, int, f
                        const compat_uptr_t __user *, envp,
                        int,  flags)
   {
- -      int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
- -
         return compat_do_execveat(fd,
- -                                getname_flags(filename, lookup_flags, NULL),
+ +                                getname_uflags(filename, flags),
                                   argv, envp, flags);
   }
   #endif
diff --combined fs/fcntl.c

index 68added,714e7c9..9c6c6a3
--- 1/fs/fcntl.c
--- 2/fs/fcntl.c
+++ b/fs/fcntl.c
@@@ -150,8 -150,7 +150,8 @@@ void f_delown(struct file *filp
   pid_t f_getown(struct file *filp)
   {
         pid_t pid = 0;
- -      read_lock(&filp->f_owner.lock);
+ +
+ +      read_lock_irq(&filp->f_owner.lock);
         rcu_read_lock();
         if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) {
                 pid = pid_vnr(filp->f_owner.pid);
@@@ -159,7 -158,7 +159,7 @@@
                         pid = -pid;
         }
         rcu_read_unlock();
- -      read_unlock(&filp->f_owner.lock);
+ +      read_unlock_irq(&filp->f_owner.lock);
         return pid;
   }
   
@@@ -209,7 -208,7 +209,7 @@@ static int f_getown_ex(struct file *fil
         struct f_owner_ex owner = {};
         int ret = 0;
   
- -      read_lock(&filp->f_owner.lock);
+ +      read_lock_irq(&filp->f_owner.lock);
         rcu_read_lock();
         if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type))
                 owner.pid = pid_vnr(filp->f_owner.pid);
@@@ -232,7 -231,7 +232,7 @@@
                 ret = -EINVAL;
                 break;
         }
- -      read_unlock(&filp->f_owner.lock);
+ +      read_unlock_irq(&filp->f_owner.lock);
   
         if (!ret) {
                 ret = copy_to_user(owner_p, &owner, sizeof(owner));
@@@ -250,10 -249,10 +250,10 @@@ static int f_getowner_uids(struct file 
         uid_t src[2];
         int err;
   
- -      read_lock(&filp->f_owner.lock);
+ +      read_lock_irq(&filp->f_owner.lock);
         src[0] = from_kuid(user_ns, filp->f_owner.uid);
         src[1] = from_kuid(user_ns, filp->f_owner.euid);
- -      read_unlock(&filp->f_owner.lock);
+ +      read_unlock_irq(&filp->f_owner.lock);
   
         err  = put_user(src[0], &dst[0]);
         err |= put_user(src[1], &dst[1]);
@@@ -1004,14 -1003,13 +1004,14 @@@ static void kill_fasync_rcu(struct fasy
   {
         while (fa) {
                 struct fown_struct *fown;
+ +              unsigned long flags;
   
                 if (fa->magic != FASYNC_MAGIC) {
                         printk(KERN_ERR "kill_fasync: bad magic number in "
                                "fasync_struct!\n");
                         return;
                 }
- -              read_lock(&fa->fa_lock);
+ +              read_lock_irqsave(&fa->fa_lock, flags);
                 if (fa->fa_file) {
                         fown = &fa->fa_file->f_owner;
                         /* Don't send SIGURG to processes which have not set a
@@@ -1020,7 -1018,7 +1020,7 @@@
                         if (!(sig == SIGURG && fown->signum == 0))
                                 send_sigio(fown, fa->fa_fd, band);
                 }
- -              read_unlock(&fa->fa_lock);
+ +              read_unlock_irqrestore(&fa->fa_lock, flags);
                 fa = rcu_dereference(fa->fa_next);
         }
   }
@@@ -1051,7 -1049,8 +1051,8 @@@ static int __init fcntl_init(void
                         __FMODE_EXEC | __FMODE_NONOTIFY));
   
         fasync_cache = kmem_cache_create("fasync_cache",
-               sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL);
+                                        sizeof(struct fasync_struct), 0,
+                                        SLAB_PANIC | SLAB_ACCOUNT, NULL);
         return 0;
   }
   
diff --combined fs/fs-writeback.c

index eb57dad,35894a2..81ec192
--- 1/fs/fs-writeback.c
--- 2/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@@ -406,6 -406,11 +406,11 @@@ static bool inode_do_switch_wbs(struct 
                 inc_wb_stat(new_wb, WB_WRITEBACK);
         }
   
+       if (mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) {
+               atomic_dec(&old_wb->writeback_inodes);
+               atomic_inc(&new_wb->writeback_inodes);
+       }
+ 
         wb_get(new_wb);
   
         /*
@@@ -1034,20 -1039,20 +1039,20 @@@ restart
    * cgroup_writeback_by_id - initiate cgroup writeback from bdi and memcg IDs
    * @bdi_id: target bdi id
    * @memcg_id: target memcg css id
-  * @nr: number of pages to write, 0 for best-effort dirty flushing
    * @reason: reason why some writeback work initiated
    * @done: target wb_completion
    *
    * Initiate flush of the bdi_writeback identified by @bdi_id and @memcg_id
    * with the specified parameters.
    */
- int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr,
+ int cgroup_writeback_by_id(u64 bdi_id, int memcg_id,
                            enum wb_reason reason, struct wb_completion *done)
   {
         struct backing_dev_info *bdi;
         struct cgroup_subsys_state *memcg_css;
         struct bdi_writeback *wb;
         struct wb_writeback_work *work;
+       unsigned long dirty;
         int ret;
   
         /* lookup bdi and memcg */
@@@ -1076,24 -1081,22 +1081,22 @@@
         }
   
         /*
-        * If @nr is zero, the caller is attempting to write out most of
+        * The caller is attempting to write out most of
          * the currently dirty pages.  Let's take the current dirty page
          * count and inflate it by 25% which should be large enough to
          * flush out most dirty pages while avoiding getting livelocked by
          * concurrent dirtiers.
+        *
+        * BTW the memcg stats are flushed periodically and this is best-effort
+        * estimation, so some potential error is ok.
          */
-       if (!nr) {
-               unsigned long filepages, headroom, dirty, writeback;
- 
-               mem_cgroup_wb_stats(wb, &filepages, &headroom, &dirty,
-                                     &writeback);
-               nr = dirty * 10 / 8;
-       }
+       dirty = memcg_page_state(mem_cgroup_from_css(memcg_css), NR_FILE_DIRTY);
+       dirty = dirty * 10 / 8;
   
         /* issue the writeback work */
         work = kzalloc(sizeof(*work), GFP_NOWAIT | __GFP_NOWARN);
         if (work) {
-               work->nr_pages = nr;
+               work->nr_pages = dirty;
                 work->sync_mode = WB_SYNC_NONE;
                 work->range_cyclic = 1;
                 work->reason = reason;
@@@ -1999,7 -2002,6 +2002,6 @@@ static long writeback_inodes_wb(struct 
   static long wb_writeback(struct bdi_writeback *wb,
                          struct wb_writeback_work *work)
   {
-       unsigned long wb_start = jiffies;
         long nr_pages = work->nr_pages;
         unsigned long dirtied_before = jiffies;
         struct inode *inode;
@@@ -2053,8 -2055,6 +2055,6 @@@
                         progress = __writeback_inodes_wb(wb, work);
                 trace_writeback_written(wb, work);
   
-               wb_update_bandwidth(wb, wb_start);
- 
                 /*
                  * Did we write something? Try for more
                  *
@@@ -2729,6 -2729,23 +2729,6 @@@ int write_inode_now(struct inode *inode
   }
   EXPORT_SYMBOL(write_inode_now);
   
- -/**
- - * sync_inode - write an inode and its pages to disk.
- - * @inode: the inode to sync
- - * @wbc: controls the writeback mode
- - *
- - * sync_inode() will write an inode and its pages to disk.  It will also
- - * correctly update the inode on its superblock's dirty inode lists and will
- - * update inode->i_state.
- - *
- - * The caller must have a ref on the inode.
- - */
- -int sync_inode(struct inode *inode, struct writeback_control *wbc)
- -{
- -      return writeback_single_inode(inode, wbc);
- -}
- -EXPORT_SYMBOL(sync_inode);
- -
   /**
    * sync_inode_metadata - write an inode to disk
    * @inode: the inode to sync
@@@ -2745,6 -2762,6 +2745,6 @@@ int sync_inode_metadata(struct inode *i
                 .nr_to_write = 0, /* metadata-only */
         };
   
- -      return sync_inode(inode, &wbc);
+ +      return writeback_single_inode(inode, &wbc);
   }
   EXPORT_SYMBOL(sync_inode_metadata);
diff --combined fs/inode.c

index 84c528c,8830a72..37710ca
--- 1/fs/inode.c
--- 2/fs/inode.c
+++ b/fs/inode.c
@@@ -190,8 -190,6 +190,8 @@@ int inode_init_always(struct super_bloc
         mapping_set_gfp_mask(mapping, GFP_HIGHUSER_MOVABLE);
         mapping->private_data = NULL;
         mapping->writeback_index = 0;
+ +      __init_rwsem(&mapping->invalidate_lock, "mapping.invalidate_lock",
+ +                   &sb->s_type->invalidate_lock_key);
         inode->i_private = NULL;
         inode->i_mapping = mapping;
         INIT_HLIST_HEAD(&inode->i_dentry);      /* buggered by rcu freeing */
@@@ -770,7 -768,7 +770,7 @@@ static enum lru_status inode_lru_isolat
                 return LRU_ROTATE;
         }
   
-       if (inode_has_buffers(inode) || inode->i_data.nrpages) {
+       if (inode_has_buffers(inode) || !mapping_empty(&inode->i_data)) {
                 __iget(inode);
                 spin_unlock(&inode->i_lock);
                 spin_unlock(lru_lock);
diff --combined fs/locks.c

index 3d6fb4a,1bc7ede..51a5b72
--- 1/fs/locks.c
--- 2/fs/locks.c
+++ b/fs/locks.c
@@@ -1397,6 -1397,103 +1397,6 @@@ static int posix_lock_inode_wait(struc
         return error;
   }
   
- -#ifdef CONFIG_MANDATORY_FILE_LOCKING
- -/**
- - * locks_mandatory_locked - Check for an active lock
- - * @file: the file to check
- - *
- - * Searches the inode's list of locks to find any POSIX locks which conflict.
- - * This function is called from locks_verify_locked() only.
- - */
- -int locks_mandatory_locked(struct file *file)
- -{
- -      int ret;
- -      struct inode *inode = locks_inode(file);
- -      struct file_lock_context *ctx;
- -      struct file_lock *fl;
- -
- -      ctx = smp_load_acquire(&inode->i_flctx);
- -      if (!ctx || list_empty_careful(&ctx->flc_posix))
- -              return 0;
- -
- -      /*
- -       * Search the lock list for this inode for any POSIX locks.
- -       */
- -      spin_lock(&ctx->flc_lock);
- -      ret = 0;
- -      list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
- -              if (fl->fl_owner != current->files &&
- -                  fl->fl_owner != file) {
- -                      ret = -EAGAIN;
- -                      break;
- -              }
- -      }
- -      spin_unlock(&ctx->flc_lock);
- -      return ret;
- -}
- -
- -/**
- - * locks_mandatory_area - Check for a conflicting lock
- - * @inode:    the file to check
- - * @filp:       how the file was opened (if it was)
- - * @start:    first byte in the file to check
- - * @end:      lastbyte in the file to check
- - * @type:     %F_WRLCK for a write lock, else %F_RDLCK
- - *
- - * Searches the inode's list of locks to find any POSIX locks which conflict.
- - */
- -int locks_mandatory_area(struct inode *inode, struct file *filp, loff_t start,
- -                       loff_t end, unsigned char type)
- -{
- -      struct file_lock fl;
- -      int error;
- -      bool sleep = false;
- -
- -      locks_init_lock(&fl);
- -      fl.fl_pid = current->tgid;
- -      fl.fl_file = filp;
- -      fl.fl_flags = FL_POSIX | FL_ACCESS;
- -      if (filp && !(filp->f_flags & O_NONBLOCK))
- -              sleep = true;
- -      fl.fl_type = type;
- -      fl.fl_start = start;
- -      fl.fl_end = end;
- -
- -      for (;;) {
- -              if (filp) {
- -                      fl.fl_owner = filp;
- -                      fl.fl_flags &= ~FL_SLEEP;
- -                      error = posix_lock_inode(inode, &fl, NULL);
- -                      if (!error)
- -                              break;
- -              }
- -
- -              if (sleep)
- -                      fl.fl_flags |= FL_SLEEP;
- -              fl.fl_owner = current->files;
- -              error = posix_lock_inode(inode, &fl, NULL);
- -              if (error != FILE_LOCK_DEFERRED)
- -                      break;
- -              error = wait_event_interruptible(fl.fl_wait,
- -                                      list_empty(&fl.fl_blocked_member));
- -              if (!error) {
- -                      /*
- -                       * If we've been sleeping someone might have
- -                       * changed the permissions behind our back.
- -                       */
- -                      if (__mandatory_lock(inode))
- -                              continue;
- -              }
- -
- -              break;
- -      }
- -      locks_delete_block(&fl);
- -
- -      return error;
- -}
- -EXPORT_SYMBOL(locks_mandatory_area);
- -#endif /* CONFIG_MANDATORY_FILE_LOCKING */
- -
   static void lease_clear_pending(struct file_lock *fl, int arg)
   {
         switch (arg) {
@@@ -2389,6 -2486,14 +2389,6 @@@ int fcntl_setlk(unsigned int fd, struc
         if (file_lock == NULL)
                 return -ENOLCK;
   
- -      /* Don't allow mandatory locks on files that may be memory mapped
- -       * and shared.
- -       */
- -      if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) {
- -              error = -EAGAIN;
- -              goto out;
- -      }
- -
         error = flock_to_posix_lock(filp, file_lock, flock);
         if (error)
                 goto out;
@@@ -2506,12 -2611,21 +2506,12 @@@ int fcntl_setlk64(unsigned int fd, stru
                 struct flock64 *flock)
   {
         struct file_lock *file_lock = locks_alloc_lock();
- -      struct inode *inode = locks_inode(filp);
         struct file *f;
         int error;
   
         if (file_lock == NULL)
                 return -ENOLCK;
   
- -      /* Don't allow mandatory locks on files that may be memory mapped
- -       * and shared.
- -       */
- -      if (mandatory_lock(inode) && mapping_writably_mapped(filp->f_mapping)) {
- -              error = -EAGAIN;
- -              goto out;
- -      }
- -
         error = flock64_to_posix_lock(filp, file_lock, flock);
         if (error)
                 goto out;
@@@ -2743,7 -2857,8 +2743,7 @@@ static void lock_get_status(struct seq_
                         seq_puts(f, "POSIX ");
   
                 seq_printf(f, " %s ",
- -                           (inode == NULL) ? "*NOINODE*" :
- -                           mandatory_lock(inode) ? "MANDATORY" : "ADVISORY ");
+ +                           (inode == NULL) ? "*NOINODE*" : "ADVISORY ");
         } else if (IS_FLOCK(fl)) {
                 if (fl->fl_type & LOCK_MAND) {
                         seq_puts(f, "FLOCK  MSNFS     ");
@@@ -2941,10 -3056,12 +2941,12 @@@ static int __init filelock_init(void
         int i;
   
         flctx_cache = kmem_cache_create("file_lock_ctx",
-                       sizeof(struct file_lock_context), 0, SLAB_PANIC, NULL);
+                       sizeof(struct file_lock_context), 0,
+                       SLAB_PANIC | SLAB_ACCOUNT, NULL);
   
         filelock_cache = kmem_cache_create("file_lock_cache",
-                       sizeof(struct file_lock), 0, SLAB_PANIC, NULL);
+                       sizeof(struct file_lock), 0,
+                       SLAB_PANIC | SLAB_ACCOUNT, NULL);
   
         for_each_possible_cpu(i) {
                 struct file_lock_list_struct *fll = per_cpu_ptr(&file_lock_list, i);
diff --combined fs/namei.c

index d049d39,ff866c0..95a881e
--- 1/fs/namei.c
--- 2/fs/namei.c
+++ b/fs/namei.c
@@@ -203,14 -203,6 +203,14 @@@ getname_flags(const char __user *filena
         return result;
   }
   
+ +struct filename *
+ +getname_uflags(const char __user *filename, int uflags)
+ +{
+ +      int flags = (uflags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
+ +
+ +      return getname_flags(filename, flags, NULL);
+ +}
+ +
   struct filename *
   getname(const char __user * filename)
   {
@@@ -255,9 -247,6 +255,9 @@@ getname_kernel(const char * filename
   
   void putname(struct filename *name)
   {
+ +      if (IS_ERR_OR_NULL(name))
+ +              return;
+ +
         BUG_ON(name->refcnt <= 0);
   
         if (--name->refcnt > 0)
@@@ -2467,7 -2456,7 +2467,7 @@@ static int path_lookupat(struct nameida
         return err;
   }
   
- -int filename_lookup(int dfd, struct filename *name, unsigned flags,
+ +static int __filename_lookup(int dfd, struct filename *name, unsigned flags,
                     struct path *path, struct path *root)
   {
         int retval;
@@@ -2485,14 -2474,6 +2485,14 @@@
                 audit_inode(name, path->dentry,
                             flags & LOOKUP_MOUNTPOINT ? AUDIT_INODE_NOEVAL : 0);
         restore_nameidata();
+ +      return retval;
+ +}
+ +
+ +int filename_lookup(int dfd, struct filename *name, unsigned flags,
+ +                  struct path *path, struct path *root)
+ +{
+ +      int retval = __filename_lookup(dfd, name, flags, path, root);
+ +
         putname(name);
         return retval;
   }
@@@ -2514,7 -2495,7 +2514,7 @@@ static int path_parentat(struct nameida
         return err;
   }
   
- -static struct filename *filename_parentat(int dfd, struct filename *name,
+ +static int __filename_parentat(int dfd, struct filename *name,
                                 unsigned int flags, struct path *parent,
                                 struct qstr *last, int *type)
   {
@@@ -2522,7 -2503,7 +2522,7 @@@
         struct nameidata nd;
   
         if (IS_ERR(name))
- -              return name;
+ +              return PTR_ERR(name);
         set_nameidata(&nd, dfd, name, NULL);
         retval = path_parentat(&nd, flags | LOOKUP_RCU, parent);
         if (unlikely(retval == -ECHILD))
@@@ -2533,34 -2514,29 +2533,34 @@@
                 *last = nd.last;
                 *type = nd.last_type;
                 audit_inode(name, parent->dentry, AUDIT_INODE_PARENT);
- -      } else {
- -              putname(name);
- -              name = ERR_PTR(retval);
         }
         restore_nameidata();
- -      return name;
+ +      return retval;
+ +}
+ +
+ +static int filename_parentat(int dfd, struct filename *name,
+ +                              unsigned int flags, struct path *parent,
+ +                              struct qstr *last, int *type)
+ +{
+ +      int retval = __filename_parentat(dfd, name, flags, parent, last, type);
+ +
+ +      putname(name);
+ +      return retval;
   }
   
   /* does lookup, returns the object with parent locked */
   struct dentry *kern_path_locked(const char *name, struct path *path)
   {
- -      struct filename *filename;
         struct dentry *d;
         struct qstr last;
- -      int type;
+ +      int type, error;
   
- -      filename = filename_parentat(AT_FDCWD, getname_kernel(name), 0, path,
+ +      error = filename_parentat(AT_FDCWD, getname_kernel(name), 0, path,
                                     &last, &type);
- -      if (IS_ERR(filename))
- -              return ERR_CAST(filename);
+ +      if (error)
+ +              return ERR_PTR(error);
         if (unlikely(type != LAST_NORM)) {
                 path_put(path);
- -              putname(filename);
                 return ERR_PTR(-EINVAL);
         }
         inode_lock_nested(path->dentry->d_inode, I_MUTEX_PARENT);
@@@ -2569,6 -2545,7 +2569,6 @@@
                 inode_unlock(path->dentry->d_inode);
                 path_put(path);
         }
- -      putname(filename);
         return d;
   }
   
@@@ -2598,9 -2575,8 +2598,9 @@@ int vfs_path_lookup(struct dentry *dent
   }
   EXPORT_SYMBOL(vfs_path_lookup);
   
- -static int lookup_one_len_common(const char *name, struct dentry *base,
- -                               int len, struct qstr *this)
+ +static int lookup_one_common(struct user_namespace *mnt_userns,
+ +                           const char *name, struct dentry *base, int len,
+ +                           struct qstr *this)
   {
         this->name = name;
         this->len = len;
@@@ -2628,7 -2604,7 +2628,7 @@@
                         return err;
         }
   
- -      return inode_permission(&init_user_ns, base->d_inode, MAY_EXEC);
+ +      return inode_permission(mnt_userns, base->d_inode, MAY_EXEC);
   }
   
   /**
@@@ -2652,7 -2628,7 +2652,7 @@@ struct dentry *try_lookup_one_len(cons
   
         WARN_ON_ONCE(!inode_is_locked(base->d_inode));
   
- -      err = lookup_one_len_common(name, base, len, &this);
+ +      err = lookup_one_common(&init_user_ns, name, base, len, &this);
         if (err)
                 return ERR_PTR(err);
   
@@@ -2679,7 -2655,7 +2679,7 @@@ struct dentry *lookup_one_len(const cha
   
         WARN_ON_ONCE(!inode_is_locked(base->d_inode));
   
- -      err = lookup_one_len_common(name, base, len, &this);
+ +      err = lookup_one_common(&init_user_ns, name, base, len, &this);
         if (err)
                 return ERR_PTR(err);
   
@@@ -2688,36 -2664,6 +2688,36 @@@
   }
   EXPORT_SYMBOL(lookup_one_len);
   
+ +/**
+ + * lookup_one - filesystem helper to lookup single pathname component
+ + * @mnt_userns:       user namespace of the mount the lookup is performed from
+ + * @name:     pathname component to lookup
+ + * @base:     base directory to lookup from
+ + * @len:      maximum length @len should be interpreted to
+ + *
+ + * Note that this routine is purely a helper for filesystem usage and should
+ + * not be called by generic code.
+ + *
+ + * The caller must hold base->i_mutex.
+ + */
+ +struct dentry *lookup_one(struct user_namespace *mnt_userns, const char *name,
+ +                        struct dentry *base, int len)
+ +{
+ +      struct dentry *dentry;
+ +      struct qstr this;
+ +      int err;
+ +
+ +      WARN_ON_ONCE(!inode_is_locked(base->d_inode));
+ +
+ +      err = lookup_one_common(mnt_userns, name, base, len, &this);
+ +      if (err)
+ +              return ERR_PTR(err);
+ +
+ +      dentry = lookup_dcache(&this, base, 0);
+ +      return dentry ? dentry : __lookup_slow(&this, base, 0);
+ +}
+ +EXPORT_SYMBOL(lookup_one);
+ +
   /**
    * lookup_one_len_unlocked - filesystem helper to lookup single pathname component
    * @name:     pathname component to lookup
@@@ -2737,7 -2683,7 +2737,7 @@@ struct dentry *lookup_one_len_unlocked(
         int err;
         struct dentry *ret;
   
- -      err = lookup_one_len_common(name, base, len, &this);
+ +      err = lookup_one_common(&init_user_ns, name, base, len, &this);
         if (err)
                 return ERR_PTR(err);
   
@@@ -3077,7 -3023,9 +3077,7 @@@ static int handle_truncate(struct user_
         /*
          * Refuse to truncate files with mandatory locks held on them.
          */
- -      error = locks_verify_locked(filp);
- -      if (!error)
- -              error = security_path_truncate(path);
+ +      error = security_path_truncate(path);
         if (!error) {
                 error = do_truncate(mnt_userns, path->dentry, 0,
                                     ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
@@@ -3618,7 -3566,7 +3618,7 @@@ struct file *do_file_open_root(const st
         return file;
   }
   
- -static struct dentry *filename_create(int dfd, struct filename *name,
+ +static struct dentry *__filename_create(int dfd, struct filename *name,
                                 struct path *path, unsigned int lookup_flags)
   {
         struct dentry *dentry = ERR_PTR(-EEXIST);
@@@ -3634,9 -3582,9 +3634,9 @@@
          */
         lookup_flags &= LOOKUP_REVAL;
   
- -      name = filename_parentat(dfd, name, lookup_flags, path, &last, &type);
- -      if (IS_ERR(name))
- -              return ERR_CAST(name);
+ +      error = __filename_parentat(dfd, name, lookup_flags, path, &last, &type);
+ +      if (error)
+ +              return ERR_PTR(error);
   
         /*
          * Yucky last component or no last component at all?
@@@ -3674,6 -3622,7 +3674,6 @@@
                 error = err2;
                 goto fail;
         }
- -      putname(name);
         return dentry;
   fail:
         dput(dentry);
@@@ -3684,18 -3633,10 +3684,18 @@@ unlock
                 mnt_drop_write(path->mnt);
   out:
         path_put(path);
- -      putname(name);
         return dentry;
   }
   
+ +static inline struct dentry *filename_create(int dfd, struct filename *name,
+ +                              struct path *path, unsigned int lookup_flags)
+ +{
+ +      struct dentry *res = __filename_create(dfd, name, path, lookup_flags);
+ +
+ +      putname(name);
+ +      return res;
+ +}
+ +
   struct dentry *kern_path_create(int dfd, const char *pathname,
                                 struct path *path, unsigned int lookup_flags)
   {
@@@ -3784,7 -3725,7 +3784,7 @@@ static int may_mknod(umode_t mode
         }
   }
   
- -static long do_mknodat(int dfd, const char __user *filename, umode_t mode,
+ +static int do_mknodat(int dfd, struct filename *name, umode_t mode,
                 unsigned int dev)
   {
         struct user_namespace *mnt_userns;
@@@ -3795,18 -3736,17 +3795,18 @@@
   
         error = may_mknod(mode);
         if (error)
- -              return error;
+ +              goto out1;
   retry:
- -      dentry = user_path_create(dfd, filename, &path, lookup_flags);
+ +      dentry = __filename_create(dfd, name, &path, lookup_flags);
+ +      error = PTR_ERR(dentry);
         if (IS_ERR(dentry))
- -              return PTR_ERR(dentry);
+ +              goto out1;
   
         if (!IS_POSIXACL(path.dentry->d_inode))
                 mode &= ~current_umask();
         error = security_path_mknod(&path, dentry, mode, dev);
         if (error)
- -              goto out;
+ +              goto out2;
   
         mnt_userns = mnt_user_ns(path.mnt);
         switch (mode & S_IFMT) {
@@@ -3825,26 -3765,24 +3825,26 @@@
                                           dentry, mode, 0);
                         break;
         }
- -out:
+ +out2:
         done_path_create(&path, dentry);
         if (retry_estale(error, lookup_flags)) {
                 lookup_flags |= LOOKUP_REVAL;
                 goto retry;
         }
+ +out1:
+ +      putname(name);
         return error;
   }
   
   SYSCALL_DEFINE4(mknodat, int, dfd, const char __user *, filename, umode_t, mode,
                 unsigned int, dev)
   {
- -      return do_mknodat(dfd, filename, mode, dev);
+ +      return do_mknodat(dfd, getname(filename), mode, dev);
   }
   
   SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, dev)
   {
- -      return do_mknodat(AT_FDCWD, filename, mode, dev);
+ +      return do_mknodat(AT_FDCWD, getname(filename), mode, dev);
   }
   
   /**
@@@ -3889,7 -3827,7 +3889,7 @@@ int vfs_mkdir(struct user_namespace *mn
   }
   EXPORT_SYMBOL(vfs_mkdir);
   
- -static long do_mkdirat(int dfd, const char __user *pathname, umode_t mode)
+ +int do_mkdirat(int dfd, struct filename *name, umode_t mode)
   {
         struct dentry *dentry;
         struct path path;
@@@ -3897,10 -3835,9 +3897,10 @@@
         unsigned int lookup_flags = LOOKUP_DIRECTORY;
   
   retry:
- -      dentry = user_path_create(dfd, pathname, &path, lookup_flags);
+ +      dentry = __filename_create(dfd, name, &path, lookup_flags);
+ +      error = PTR_ERR(dentry);
         if (IS_ERR(dentry))
- -              return PTR_ERR(dentry);
+ +              goto out_putname;
   
         if (!IS_POSIXACL(path.dentry->d_inode))
                 mode &= ~current_umask();
@@@ -3916,19 -3853,17 +3916,19 @@@
                 lookup_flags |= LOOKUP_REVAL;
                 goto retry;
         }
+ +out_putname:
+ +      putname(name);
         return error;
   }
   
   SYSCALL_DEFINE3(mkdirat, int, dfd, const char __user *, pathname, umode_t, mode)
   {
- -      return do_mkdirat(dfd, pathname, mode);
+ +      return do_mkdirat(dfd, getname(pathname), mode);
   }
   
   SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
   {
- -      return do_mkdirat(AT_FDCWD, pathname, mode);
+ +      return do_mkdirat(AT_FDCWD, getname(pathname), mode);
   }
   
   /**
@@@ -3986,62 -3921,62 +3986,62 @@@ out
   }
   EXPORT_SYMBOL(vfs_rmdir);
   
- -long do_rmdir(int dfd, struct filename *name)
+ +int do_rmdir(int dfd, struct filename *name)
   {
         struct user_namespace *mnt_userns;
- -      int error = 0;
+ +      int error;
         struct dentry *dentry;
         struct path path;
         struct qstr last;
         int type;
         unsigned int lookup_flags = 0;
   retry:
- -      name = filename_parentat(dfd, name, lookup_flags,
- -                              &path, &last, &type);
- -      if (IS_ERR(name))
- -              return PTR_ERR(name);
+ +      error = __filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
+ +      if (error)
+ +              goto exit1;
   
         switch (type) {
         case LAST_DOTDOT:
                 error = -ENOTEMPTY;
- -              goto exit1;
+ +              goto exit2;
         case LAST_DOT:
                 error = -EINVAL;
- -              goto exit1;
+ +              goto exit2;
         case LAST_ROOT:
                 error = -EBUSY;
- -              goto exit1;
+ +              goto exit2;
         }
   
         error = mnt_want_write(path.mnt);
         if (error)
- -              goto exit1;
+ +              goto exit2;
   
         inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
         dentry = __lookup_hash(&last, path.dentry, lookup_flags);
         error = PTR_ERR(dentry);
         if (IS_ERR(dentry))
- -              goto exit2;
+ +              goto exit3;
         if (!dentry->d_inode) {
                 error = -ENOENT;
- -              goto exit3;
+ +              goto exit4;
         }
         error = security_path_rmdir(&path, dentry);
         if (error)
- -              goto exit3;
+ +              goto exit4;
         mnt_userns = mnt_user_ns(path.mnt);
         error = vfs_rmdir(mnt_userns, path.dentry->d_inode, dentry);
- -exit3:
+ +exit4:
         dput(dentry);
- -exit2:
+ +exit3:
         inode_unlock(path.dentry->d_inode);
         mnt_drop_write(path.mnt);
- -exit1:
+ +exit2:
         path_put(&path);
         if (retry_estale(error, lookup_flags)) {
                 lookup_flags |= LOOKUP_REVAL;
                 goto retry;
         }
+ +exit1:
         putname(name);
         return error;
   }
@@@ -4089,7 -4024,9 +4089,9 @@@ int vfs_unlink(struct user_namespace *m
                 return -EPERM;
   
         inode_lock(target);
-       if (is_local_mountpoint(dentry))
+       if (IS_SWAPFILE(target))
+               error = -EPERM;
+       else if (is_local_mountpoint(dentry))
                 error = -EBUSY;
         else {
                 error = security_inode_unlink(dir, dentry);
@@@ -4124,7 -4061,7 +4126,7 @@@ EXPORT_SYMBOL(vfs_unlink)
    * writeout happening, and we don't want to prevent access to the directory
    * while waiting on the I/O.
    */
- -long do_unlinkat(int dfd, struct filename *name)
+ +int do_unlinkat(int dfd, struct filename *name)
   {
         int error;
         struct dentry *dentry;
@@@ -4135,17 -4072,17 +4137,17 @@@
         struct inode *delegated_inode = NULL;
         unsigned int lookup_flags = 0;
   retry:
- -      name = filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
- -      if (IS_ERR(name))
- -              return PTR_ERR(name);
+ +      error = __filename_parentat(dfd, name, lookup_flags, &path, &last, &type);
+ +      if (error)
+ +              goto exit1;
   
         error = -EISDIR;
         if (type != LAST_NORM)
- -              goto exit1;
+ +              goto exit2;
   
         error = mnt_want_write(path.mnt);
         if (error)
- -              goto exit1;
+ +              goto exit2;
   retry_deleg:
         inode_lock_nested(path.dentry->d_inode, I_MUTEX_PARENT);
         dentry = __lookup_hash(&last, path.dentry, lookup_flags);
@@@ -4162,11 -4099,11 +4164,11 @@@
                 ihold(inode);
                 error = security_path_unlink(&path, dentry);
                 if (error)
- -                      goto exit2;
+ +                      goto exit3;
                 mnt_userns = mnt_user_ns(path.mnt);
                 error = vfs_unlink(mnt_userns, path.dentry->d_inode, dentry,
                                    &delegated_inode);
- -exit2:
+ +exit3:
                 dput(dentry);
         }
         inode_unlock(path.dentry->d_inode);
@@@ -4179,14 -4116,13 +4181,14 @@@
                         goto retry_deleg;
         }
         mnt_drop_write(path.mnt);
- -exit1:
+ +exit2:
         path_put(&path);
         if (retry_estale(error, lookup_flags)) {
                 lookup_flags |= LOOKUP_REVAL;
                 inode = NULL;
                 goto retry;
         }
+ +exit1:
         putname(name);
         return error;
   
@@@ -4197,7 -4133,7 +4199,7 @@@ slashes
                 error = -EISDIR;
         else
                 error = -ENOTDIR;
- -      goto exit2;
+ +      goto exit3;
   }
   
   SYSCALL_DEFINE3(unlinkat, int, dfd, const char __user *, pathname, int, flag)
@@@ -4252,22 -4188,23 +4254,22 @@@ int vfs_symlink(struct user_namespace *
   }
   EXPORT_SYMBOL(vfs_symlink);
   
- -static long do_symlinkat(const char __user *oldname, int newdfd,
- -                const char __user *newname)
+ +int do_symlinkat(struct filename *from, int newdfd, struct filename *to)
   {
         int error;
- -      struct filename *from;
         struct dentry *dentry;
         struct path path;
         unsigned int lookup_flags = 0;
   
- -      from = getname(oldname);
- -      if (IS_ERR(from))
- -              return PTR_ERR(from);
+ +      if (IS_ERR(from)) {
+ +              error = PTR_ERR(from);
+ +              goto out_putnames;
+ +      }
   retry:
- -      dentry = user_path_create(newdfd, newname, &path, lookup_flags);
+ +      dentry = __filename_create(newdfd, to, &path, lookup_flags);
         error = PTR_ERR(dentry);
         if (IS_ERR(dentry))
- -              goto out_putname;
+ +              goto out_putnames;
   
         error = security_path_symlink(&path, dentry, from->name);
         if (!error) {
@@@ -4282,8 -4219,7 +4284,8 @@@
                 lookup_flags |= LOOKUP_REVAL;
                 goto retry;
         }
- -out_putname:
+ +out_putnames:
+ +      putname(to);
         putname(from);
         return error;
   }
@@@ -4291,12 -4227,12 +4293,12 @@@
   SYSCALL_DEFINE3(symlinkat, const char __user *, oldname,
                 int, newdfd, const char __user *, newname)
   {
- -      return do_symlinkat(oldname, newdfd, newname);
+ +      return do_symlinkat(getname(oldname), newdfd, getname(newname));
   }
   
   SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newname)
   {
- -      return do_symlinkat(oldname, AT_FDCWD, newname);
+ +      return do_symlinkat(getname(oldname), AT_FDCWD, getname(newname));
   }
   
   /**
@@@ -4397,8 -4333,8 +4399,8 @@@ EXPORT_SYMBOL(vfs_link)
    * with linux 2.0, and to avoid hard-linking to directories
    * and other special files.  --ADM
    */
- -static int do_linkat(int olddfd, const char __user *oldname, int newdfd,
- -            const char __user *newname, int flags)
+ +int do_linkat(int olddfd, struct filename *old, int newdfd,
+ +            struct filename *new, int flags)
   {
         struct user_namespace *mnt_userns;
         struct dentry *new_dentry;
@@@ -4407,32 -4343,31 +4409,32 @@@
         int how = 0;
         int error;
   
- -      if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0)
- -              return -EINVAL;
+ +      if ((flags & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH)) != 0) {
+ +              error = -EINVAL;
+ +              goto out_putnames;
+ +      }
         /*
          * To use null names we require CAP_DAC_READ_SEARCH
          * This ensures that not everyone will be able to create
          * handlink using the passed filedescriptor.
          */
- -      if (flags & AT_EMPTY_PATH) {
- -              if (!capable(CAP_DAC_READ_SEARCH))
- -                      return -ENOENT;
- -              how = LOOKUP_EMPTY;
+ +      if (flags & AT_EMPTY_PATH && !capable(CAP_DAC_READ_SEARCH)) {
+ +              error = -ENOENT;
+ +              goto out_putnames;
         }
   
         if (flags & AT_SYMLINK_FOLLOW)
                 how |= LOOKUP_FOLLOW;
   retry:
- -      error = user_path_at(olddfd, oldname, how, &old_path);
+ +      error = __filename_lookup(olddfd, old, how, &old_path, NULL);
         if (error)
- -              return error;
+ +              goto out_putnames;
   
- -      new_dentry = user_path_create(newdfd, newname, &new_path,
+ +      new_dentry = __filename_create(newdfd, new, &new_path,
                                         (how & LOOKUP_REVAL));
         error = PTR_ERR(new_dentry);
         if (IS_ERR(new_dentry))
- -              goto out;
+ +              goto out_putpath;
   
         error = -EXDEV;
         if (old_path.mnt != new_path.mnt)
@@@ -4460,11 -4395,8 +4462,11 @@@ out_dput
                 how |= LOOKUP_REVAL;
                 goto retry;
         }
- -out:
+ +out_putpath:
         path_put(&old_path);
+ +out_putnames:
+ +      putname(old);
+ +      putname(new);
   
         return error;
   }
@@@ -4472,13 -4404,12 +4474,13 @@@
   SYSCALL_DEFINE5(linkat, int, olddfd, const char __user *, oldname,
                 int, newdfd, const char __user *, newname, int, flags)
   {
- -      return do_linkat(olddfd, oldname, newdfd, newname, flags);
+ +      return do_linkat(olddfd, getname_uflags(oldname, flags),
+ +              newdfd, getname(newname), flags);
   }
   
   SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname)
   {
- -      return do_linkat(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
+ +      return do_linkat(AT_FDCWD, getname(oldname), AT_FDCWD, getname(newname), 0);
   }
   
   /**
@@@ -4597,6 -4528,10 +4599,10 @@@ int vfs_rename(struct renamedata *rd
         else if (target)
                 inode_lock(target);
   
+       error = -EPERM;
+       if (IS_SWAPFILE(source) || (target && IS_SWAPFILE(target)))
+               goto out;
+ 
         error = -EBUSY;
         if (is_local_mountpoint(old_dentry) || is_local_mountpoint(new_dentry))
                 goto out;
@@@ -4673,25 -4608,29 +4679,25 @@@ int do_renameat2(int olddfd, struct fil
         int error = -EINVAL;
   
         if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
- -              goto put_both;
+ +              goto put_names;
   
         if ((flags & (RENAME_NOREPLACE | RENAME_WHITEOUT)) &&
             (flags & RENAME_EXCHANGE))
- -              goto put_both;
+ +              goto put_names;
   
         if (flags & RENAME_EXCHANGE)
                 target_flags = 0;
   
   retry:
- -      from = filename_parentat(olddfd, from, lookup_flags, &old_path,
+ +      error = __filename_parentat(olddfd, from, lookup_flags, &old_path,
                                         &old_last, &old_type);
- -      if (IS_ERR(from)) {
- -              error = PTR_ERR(from);
- -              goto put_new;
- -      }
+ +      if (error)
+ +              goto put_names;
   
- -      to = filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last,
+ +      error = __filename_parentat(newdfd, to, lookup_flags, &new_path, &new_last,
                                 &new_type);
- -      if (IS_ERR(to)) {
- -              error = PTR_ERR(to);
+ +      if (error)
                 goto exit1;
- -      }
   
         error = -EXDEV;
         if (old_path.mnt != new_path.mnt)
@@@ -4794,9 -4733,12 +4800,9 @@@ exit1
                 lookup_flags |= LOOKUP_REVAL;
                 goto retry;
         }
- -put_both:
- -      if (!IS_ERR(from))
- -              putname(from);
- -put_new:
- -      if (!IS_ERR(to))
- -              putname(to);
+ +put_names:
+ +      putname(from);
+ +      putname(to);
         return error;
   }
   
diff --combined fs/namespace.c

index 1285236,94a9817..659a8f3
--- 1/fs/namespace.c
--- 2/fs/namespace.c
+++ b/fs/namespace.c
@@@ -203,7 -203,8 +203,8 @@@ static struct mount *alloc_vfsmnt(cons
                         goto out_free_cache;
   
                 if (name) {
-                       mnt->mnt_devname = kstrdup_const(name, GFP_KERNEL);
+                       mnt->mnt_devname = kstrdup_const(name,
+                                                        GFP_KERNEL_ACCOUNT);
                         if (!mnt->mnt_devname)
                                 goto out_free_id;
                 }
@@@ -1715,14 -1716,22 +1716,14 @@@ static inline bool may_mount(void
         return ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN);
   }
   
- -#ifdef        CONFIG_MANDATORY_FILE_LOCKING
- -static bool may_mandlock(void)
+ +static void warn_mandlock(void)
   {
- -      pr_warn_once("======================================================\n"
- -                   "WARNING: the mand mount option is being deprecated and\n"
- -                   "         will be removed in v5.15!\n"
- -                   "======================================================\n");
- -      return capable(CAP_SYS_ADMIN);
+ +      pr_warn_once("=======================================================\n"
+ +                   "WARNING: The mand mount option has been deprecated and\n"
+ +                   "         and is ignored by this kernel. Remove the mand\n"
+ +                   "         option from the mount to silence this warning.\n"
+ +                   "=======================================================\n");
   }
- -#else
- -static inline bool may_mandlock(void)
- -{
- -      pr_warn("VFS: \"mand\" mount option not supported");
- -      return false;
- -}
- -#endif
   
   static int can_umount(const struct path *path, int flags)
   {
@@@ -2694,78 -2703,6 +2695,78 @@@ out
         return ret;
   }
   
+ +static int do_set_group(struct path *from_path, struct path *to_path)
+ +{
+ +      struct mount *from, *to;
+ +      int err;
+ +
+ +      from = real_mount(from_path->mnt);
+ +      to = real_mount(to_path->mnt);
+ +
+ +      namespace_lock();
+ +
+ +      err = -EINVAL;
+ +      /* To and From must be mounted */
+ +      if (!is_mounted(&from->mnt))
+ +              goto out;
+ +      if (!is_mounted(&to->mnt))
+ +              goto out;
+ +
+ +      err = -EPERM;
+ +      /* We should be allowed to modify mount namespaces of both mounts */
+ +      if (!ns_capable(from->mnt_ns->user_ns, CAP_SYS_ADMIN))
+ +              goto out;
+ +      if (!ns_capable(to->mnt_ns->user_ns, CAP_SYS_ADMIN))
+ +              goto out;
+ +
+ +      err = -EINVAL;
+ +      /* To and From paths should be mount roots */
+ +      if (from_path->dentry != from_path->mnt->mnt_root)
+ +              goto out;
+ +      if (to_path->dentry != to_path->mnt->mnt_root)
+ +              goto out;
+ +
+ +      /* Setting sharing groups is only allowed across same superblock */
+ +      if (from->mnt.mnt_sb != to->mnt.mnt_sb)
+ +              goto out;
+ +
+ +      /* From mount root should be wider than To mount root */
+ +      if (!is_subdir(to->mnt.mnt_root, from->mnt.mnt_root))
+ +              goto out;
+ +
+ +      /* From mount should not have locked children in place of To's root */
+ +      if (has_locked_children(from, to->mnt.mnt_root))
+ +              goto out;
+ +
+ +      /* Setting sharing groups is only allowed on private mounts */
+ +      if (IS_MNT_SHARED(to) || IS_MNT_SLAVE(to))
+ +              goto out;
+ +
+ +      /* From should not be private */
+ +      if (!IS_MNT_SHARED(from) && !IS_MNT_SLAVE(from))
+ +              goto out;
+ +
+ +      if (IS_MNT_SLAVE(from)) {
+ +              struct mount *m = from->mnt_master;
+ +
+ +              list_add(&to->mnt_slave, &m->mnt_slave_list);
+ +              to->mnt_master = m;
+ +      }
+ +
+ +      if (IS_MNT_SHARED(from)) {
+ +              to->mnt_group_id = from->mnt_group_id;
+ +              list_add(&to->mnt_share, &from->mnt_share);
+ +              lock_mount_hash();
+ +              set_mnt_shared(to);
+ +              unlock_mount_hash();
+ +      }
+ +
+ +      err = 0;
+ +out:
+ +      namespace_unlock();
+ +      return err;
+ +}
+ +
   static int do_move_mount(struct path *old_path, struct path *new_path)
   {
         struct mnt_namespace *ns;
@@@ -3261,8 -3198,8 +3262,8 @@@ int path_mount(const char *dev_name, st
                 return ret;
         if (!may_mount())
                 return -EPERM;
- -      if ((flags & SB_MANDLOCK) && !may_mandlock())
- -              return -EPERM;
+ +      if (flags & SB_MANDLOCK)
+ +              warn_mandlock();
   
         /* Default to relatime unless overriden */
         if (!(flags & MS_NOATIME))
@@@ -3370,7 -3307,7 +3371,7 @@@ static struct mnt_namespace *alloc_mnt_
         if (!ucounts)
                 return ERR_PTR(-ENOSPC);
   
-       new_ns = kzalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
+       new_ns = kzalloc(sizeof(struct mnt_namespace), GFP_KERNEL_ACCOUNT);
         if (!new_ns) {
                 dec_mnt_namespaces(ucounts);
                 return ERR_PTR(-ENOMEM);
@@@ -3645,8 -3582,9 +3646,8 @@@ SYSCALL_DEFINE3(fsmount, int, fs_fd, un
         if (fc->phase != FS_CONTEXT_AWAITING_MOUNT)
                 goto err_unlock;
   
- -      ret = -EPERM;
- -      if ((fc->sb_flags & SB_MANDLOCK) && !may_mandlock())
- -              goto err_unlock;
+ +      if (fc->sb_flags & SB_MANDLOCK)
+ +              warn_mandlock();
   
         newmount.mnt = vfs_create_mount(fc);
         if (IS_ERR(newmount.mnt)) {
@@@ -3750,10 -3688,7 +3751,10 @@@ SYSCALL_DEFINE5(move_mount
         if (ret < 0)
                 goto out_to;
   
- -      ret = do_move_mount(&from_path, &to_path);
+ +      if (flags & MOVE_MOUNT_SET_GROUP)
+ +              ret = do_set_group(&from_path, &to_path);
+ +      else
+ +              ret = do_move_mount(&from_path, &to_path);
   
   out_to:
         path_put(&to_path);
@@@ -4306,7 -4241,7 +4307,7 @@@ void __init mnt_init(void
         int err;
   
         mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct mount),
-                       0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL);
+                       0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL);
   
         mount_hashtable = alloc_large_system_hash("Mount-cache",
                                 sizeof(struct hlist_head),
diff --combined include/linux/backing-dev.h

index 2953085,8a886bc..ac7f231
--- 1/include/linux/backing-dev.h
--- 2/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@@ -143,7 -143,7 +143,7 @@@ static inline struct backing_dev_info *
         sb = inode->i_sb;
   #ifdef CONFIG_BLOCK
         if (sb_is_blkdev_sb(sb))
- -              return I_BDEV(inode)->bd_bdi;
+ +              return I_BDEV(inode)->bd_disk->bdi;
   #endif
         return sb->s_bdi;
   }
@@@ -288,6 -288,17 +288,17 @@@ static inline struct bdi_writeback *ino
         return inode->i_wb;
   }
   
+ static inline struct bdi_writeback *inode_to_wb_wbc(
+                               struct inode *inode,
+                               struct writeback_control *wbc)
+ {
+       /*
+        * If wbc does not have inode attached, it means cgroup writeback was
+        * disabled when wbc started. Just use the default wb in that case.
+        */
+       return wbc->wb ? wbc->wb : &inode_to_bdi(inode)->wb;
+ }
+ 
   /**
    * unlocked_inode_to_wb_begin - begin unlocked inode wb access transaction
    * @inode: target inode
@@@ -366,6 -377,14 +377,14 @@@ static inline struct bdi_writeback *ino
         return &inode_to_bdi(inode)->wb;
   }
   
+ static inline struct bdi_writeback *inode_to_wb_wbc(
+                               struct inode *inode,
+                               struct writeback_control *wbc)
+ {
+       return inode_to_wb(inode);
+ }
+ 
+ 
   static inline struct bdi_writeback *
   unlocked_inode_to_wb_begin(struct inode *inode, struct wb_lock_cookie *cookie)
   {
diff --combined include/linux/memcontrol.h

index 20151c4,69e6c54..3096c9a
--- 1/include/linux/memcontrol.h
--- 2/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@@ -105,14 -105,6 +105,6 @@@ struct mem_cgroup_reclaim_iter 
         unsigned int generation;
   };
   
- struct lruvec_stat {
-       long count[NR_VM_NODE_STAT_ITEMS];
- };
- 
- struct batched_lruvec_stat {
-       s32 count[NR_VM_NODE_STAT_ITEMS];
- };
- 
   /*
    * Bitmap and deferred work of shrinker::id corresponding to memcg-aware
    * shrinkers, which have elements charged to this memcg.
@@@ -123,24 -115,30 +115,30 @@@ struct shrinker_info 
         unsigned long *map;
   };
   
+ struct lruvec_stats_percpu {
+       /* Local (CPU and cgroup) state */
+       long state[NR_VM_NODE_STAT_ITEMS];
+ 
+       /* Delta calculation for lockless upward propagation */
+       long state_prev[NR_VM_NODE_STAT_ITEMS];
+ };
+ 
+ struct lruvec_stats {
+       /* Aggregated (CPU and subtree) state */
+       long state[NR_VM_NODE_STAT_ITEMS];
+ 
+       /* Pending child counts during tree propagation */
+       long state_pending[NR_VM_NODE_STAT_ITEMS];
+ };
+ 
   /*
    * per-node information in memory controller.
    */
   struct mem_cgroup_per_node {
         struct lruvec           lruvec;
   
-       /*
-        * Legacy local VM stats. This should be struct lruvec_stat and
-        * cannot be optimized to struct batched_lruvec_stat. Because
-        * the threshold of the lruvec_stat_cpu can be as big as
-        * MEMCG_CHARGE_BATCH * PAGE_SIZE. It can fit into s32. But this
-        * filed has no upper limit.
-        */
-       struct lruvec_stat __percpu *lruvec_stat_local;
- 
-       /* Subtree VM stats (batched updates) */
-       struct batched_lruvec_stat __percpu *lruvec_stat_cpu;
-       atomic_long_t           lruvec_stat[NR_VM_NODE_STAT_ITEMS];
+       struct lruvec_stats_percpu __percpu     *lruvec_stats_percpu;
+       struct lruvec_stats                     lruvec_stats;
   
         unsigned long           lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
   
@@@ -595,13 -593,6 +593,6 @@@ static inline struct obj_cgroup **page_
   }
   #endif
   
- static __always_inline bool memcg_stat_item_in_bytes(int idx)
- {
-       if (idx == MEMCG_PERCPU_B)
-               return true;
-       return vmstat_item_in_bytes(idx);
- }
- 
   static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg)
   {
         return (memcg == root_mem_cgroup);
@@@ -693,13 -684,35 +684,35 @@@ static inline bool mem_cgroup_below_min
                 page_counter_read(&memcg->memory);
   }
   
- int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask);
+ int __mem_cgroup_charge(struct page *page, struct mm_struct *mm,
+                       gfp_t gfp_mask);
+ static inline int mem_cgroup_charge(struct page *page, struct mm_struct *mm,
+                                   gfp_t gfp_mask)
+ {
+       if (mem_cgroup_disabled())
+               return 0;
+       return __mem_cgroup_charge(page, mm, gfp_mask);
+ }
+ 
   int mem_cgroup_swapin_charge_page(struct page *page, struct mm_struct *mm,
                                   gfp_t gfp, swp_entry_t entry);
   void mem_cgroup_swapin_uncharge_swap(swp_entry_t entry);
   
- void mem_cgroup_uncharge(struct page *page);
- void mem_cgroup_uncharge_list(struct list_head *page_list);
+ void __mem_cgroup_uncharge(struct page *page);
+ static inline void mem_cgroup_uncharge(struct page *page)
+ {
+       if (mem_cgroup_disabled())
+               return;
+       __mem_cgroup_uncharge(page);
+ }
+ 
+ void __mem_cgroup_uncharge_list(struct list_head *page_list);
+ static inline void mem_cgroup_uncharge_list(struct list_head *page_list)
+ {
+       if (mem_cgroup_disabled())
+               return;
+       __mem_cgroup_uncharge_list(page_list);
+ }
   
   void mem_cgroup_migrate(struct page *oldpage, struct page *newpage);
   
@@@ -884,11 -897,6 +897,6 @@@ static inline bool mem_cgroup_online(st
         return !!(memcg->css.flags & CSS_ONLINE);
   }
   
- /*
-  * For memory reclaim.
-  */
- int mem_cgroup_select_victim_node(struct mem_cgroup *memcg);
- 
   void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
                 int zid, int nr_pages);
   
@@@ -955,22 -963,21 +963,21 @@@ static inline void mod_memcg_state(stru
         local_irq_restore(flags);
   }
   
+ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
+ {
+       return READ_ONCE(memcg->vmstats.state[idx]);
+ }
+ 
   static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
                                               enum node_stat_item idx)
   {
         struct mem_cgroup_per_node *pn;
-       long x;
   
         if (mem_cgroup_disabled())
                 return node_page_state(lruvec_pgdat(lruvec), idx);
   
         pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
-       x = atomic_long_read(&pn->lruvec_stat[idx]);
- #ifdef CONFIG_SMP
-       if (x < 0)
-               x = 0;
- #endif
-       return x;
+       return READ_ONCE(pn->lruvec_stats.state[idx]);
   }
   
   static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
@@@ -985,7 -992,7 +992,7 @@@
   
         pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
         for_each_possible_cpu(cpu)
-               x += per_cpu(pn->lruvec_stat_local->count[idx], cpu);
+               x += per_cpu(pn->lruvec_stats_percpu->state[idx], cpu);
   #ifdef CONFIG_SMP
         if (x < 0)
                 x = 0;
@@@ -993,6 -1000,8 +1000,8 @@@
         return x;
   }
   
+ void mem_cgroup_flush_stats(void);
+ 
   void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
                               int val);
   void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val);
@@@ -1391,6 -1400,11 +1400,11 @@@ static inline void mod_memcg_state(stru
   {
   }
   
+ static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
+ {
+       return 0;
+ }
+ 
   static inline unsigned long lruvec_page_state(struct lruvec *lruvec,
                                               enum node_stat_item idx)
   {
@@@ -1403,6 -1417,10 +1417,10 @@@ static inline unsigned long lruvec_page
         return node_page_state(lruvec_pgdat(lruvec), idx);
   }
   
+ static inline void mem_cgroup_flush_stats(void)
+ {
+ }
+ 
   static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec,
                                             enum node_stat_item idx, int val)
   {
@@@ -1582,8 -1600,7 +1600,8 @@@ static inline void mem_cgroup_flush_for
   #endif        /* CONFIG_CGROUP_WRITEBACK */
   
   struct sock;
- -bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
+ +bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
+ +                           gfp_t gfp_mask);
   void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages);
   #ifdef CONFIG_MEMCG
   extern struct static_key_false memcg_sockets_enabled_key;
diff --combined include/linux/mm.h

index e59646a,11c3855..ed2552c
--- 1/include/linux/mm.h
--- 2/include/linux/mm.h
+++ b/include/linux/mm.h
@@@ -829,8 -829,6 +829,8 @@@ static inline void *kvcalloc(size_t n, 
         return kvmalloc_array(n, size, flags | __GFP_ZERO);
   }
   
+ +extern void *kvrealloc(const void *p, size_t oldsize, size_t newsize,
+ +              gfp_t flags);
   extern void kvfree(const void *addr);
   extern void kvfree_sensitive(const void *addr, size_t len);
   
@@@ -1216,18 -1214,10 +1216,10 @@@ static inline void get_page(struct pag
   }
   
   bool __must_check try_grab_page(struct page *page, unsigned int flags);
- __maybe_unused struct page *try_grab_compound_head(struct page *page, int refs,
-                                                  unsigned int flags);
+ struct page *try_grab_compound_head(struct page *page, int refs,
+                                   unsigned int flags);
   
- 
- static inline __must_check bool try_get_page(struct page *page)
- {
-       page = compound_head(page);
-       if (WARN_ON_ONCE(page_ref_count(page) <= 0))
-               return false;
-       page_ref_inc(page);
-       return true;
- }
+ struct page *try_get_compound_head(struct page *page, int refs);
   
   static inline void put_page(struct page *page)
   {
@@@ -1849,7 -1839,6 +1841,6 @@@ int __account_locked_vm(struct mm_struc
   struct kvec;
   int get_kernel_pages(const struct kvec *iov, int nr_pages, int write,
                         struct page **pages);
- int get_kernel_page(unsigned long start, int write, struct page **pages);
   struct page *get_dump_page(unsigned long addr);
   
   extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
@@@ -3121,7 -3110,7 +3112,7 @@@ extern void memory_failure_queue_kick(i
   extern int unpoison_memory(unsigned long pfn);
   extern int sysctl_memory_failure_early_kill;
   extern int sysctl_memory_failure_recovery;
- extern void shake_page(struct page *p, int access);
+ extern void shake_page(struct page *p);
   extern atomic_long_t num_poisoned_pages __read_mostly;
   extern int soft_offline_page(unsigned long pfn, int flags);
   
diff --combined include/linux/syscalls.h

index 2b47584,00bc170..60a3ab0
--- 1/include/linux/syscalls.h
--- 2/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@@ -915,6 -915,7 +915,7 @@@ asmlinkage long sys_mincore(unsigned lo
   asmlinkage long sys_madvise(unsigned long start, size_t len, int behavior);
   asmlinkage long sys_process_madvise(int pidfd, const struct iovec __user *vec,
                         size_t vlen, int behavior, unsigned int flags);
+ asmlinkage long sys_process_mrelease(int pidfd, unsigned int flags);
   asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
                         unsigned long prot, unsigned long pgoff,
                         unsigned long flags);
@@@ -1158,6 -1159,7 +1159,6 @@@ asmlinkage long sys_ustat(unsigned dev
   asmlinkage long sys_vfork(void);
   asmlinkage long sys_recv(int, void __user *, size_t, unsigned);
   asmlinkage long sys_send(int, void __user *, size_t, unsigned);
- -asmlinkage long sys_bdflush(int func, long data);
   asmlinkage long sys_oldumount(char __user *name);
   asmlinkage long sys_uselib(const char __user *library);
   asmlinkage long sys_sysfs(int option,
diff --combined include/linux/writeback.h

index 270677d,aeda2c0..d1f65ad
--- 1/include/linux/writeback.h
--- 2/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@@ -218,7 -218,7 +218,7 @@@ void wbc_attach_and_unlock_inode(struc
   void wbc_detach_inode(struct writeback_control *wbc);
   void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
                               size_t bytes);
- int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr_pages,
+ int cgroup_writeback_by_id(u64 bdi_id, int memcg_id,
                            enum wb_reason reason, struct wb_completion *done);
   void cgroup_writeback_umount(void);
   bool cleanup_offline_cgwb(struct bdi_writeback *wb);
@@@ -336,9 -336,14 +336,9 @@@ static inline void cgroup_writeback_umo
   /*
    * mm/page-writeback.c
    */
- -#ifdef CONFIG_BLOCK
   void laptop_io_completion(struct backing_dev_info *info);
   void laptop_sync_completion(void);
- -void laptop_mode_sync(struct work_struct *work);
   void laptop_mode_timer_fn(struct timer_list *t);
- -#else
- -static inline void laptop_sync_completion(void) { }
- -#endif
   bool node_dirty_ok(struct pglist_data *pgdat);
   int wb_domain_init(struct wb_domain *dom, gfp_t gfp);
   #ifdef CONFIG_CGROUP_WRITEBACK
@@@ -374,7 -379,7 +374,7 @@@ int dirty_writeback_centisecs_handler(s
   void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
   unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh);
   
- void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time);
+ void wb_update_bandwidth(struct bdi_writeback *wb);
   void balance_dirty_pages_ratelimited(struct address_space *mapping);
   bool wb_over_bg_thresh(struct bdi_writeback *wb);
   
diff --combined kernel/signal.c

index cf7e250,8921c4a..952741f
--- 1/kernel/signal.c
--- 2/kernel/signal.c
+++ b/kernel/signal.c
@@@ -54,7 -54,6 +54,7 @@@
   #include <asm/unistd.h>
   #include <asm/siginfo.h>
   #include <asm/cacheflush.h>
+ +#include <asm/syscall.h>      /* for syscall_get_* */
   
   /*
    * SLAB caches for signal bits.
@@@ -1214,7 -1213,7 +1214,7 @@@ static inline bool has_si_pid_and_uid(s
         case SIL_FAULT_MCEERR:
         case SIL_FAULT_BNDERR:
         case SIL_FAULT_PKUERR:
- -      case SIL_PERF_EVENT:
+ +      case SIL_FAULT_PERF_EVENT:
         case SIL_SYS:
                 ret = false;
                 break;
@@@ -1323,7 -1322,7 +1323,7 @@@ int do_send_sig_info(int sig, struct ke
    * that is why we also clear SIGNAL_UNKILLABLE.
    */
   static int
- -force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t)
+ +force_sig_info_to_task(struct kernel_siginfo *info, struct task_struct *t, bool sigdfl)
   {
         unsigned long int flags;
         int ret, blocked, ignored;
@@@ -1334,7 -1333,7 +1334,7 @@@
         action = &t->sighand->action[sig-1];
         ignored = action->sa.sa_handler == SIG_IGN;
         blocked = sigismember(&t->blocked, sig);
- -      if (blocked || ignored) {
+ +      if (blocked || ignored || sigdfl) {
                 action->sa.sa_handler = SIG_DFL;
                 if (blocked) {
                         sigdelset(&t->blocked, sig);
@@@ -1355,7 -1354,7 +1355,7 @@@
   
   int force_sig_info(struct kernel_siginfo *info)
   {
- -      return force_sig_info_to_task(info, current);
+ +      return force_sig_info_to_task(info, current, false);
   }
   
   /*
@@@ -1414,21 -1413,6 +1414,21 @@@ struct sighand_struct *__lock_task_sigh
         return sighand;
   }
   
+ +#ifdef CONFIG_LOCKDEP
+ +void lockdep_assert_task_sighand_held(struct task_struct *task)
+ +{
+ +      struct sighand_struct *sighand;
+ +
+ +      rcu_read_lock();
+ +      sighand = rcu_dereference(task->sighand);
+ +      if (sighand)
+ +              lockdep_assert_held(&sighand->siglock);
+ +      else
+ +              WARN_ON_ONCE(1);
+ +      rcu_read_unlock();
+ +}
+ +#endif
+ +
   /*
    * send signal info to all the members of a group
    */
@@@ -1682,6 -1666,7 +1682,6 @@@ void force_sigsegv(int sig
   }
   
   int force_sig_fault_to_task(int sig, int code, void __user *addr
- -      ___ARCH_SI_TRAPNO(int trapno)
         ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr)
         , struct task_struct *t)
   {
@@@ -1692,22 -1677,28 +1692,22 @@@
         info.si_errno = 0;
         info.si_code  = code;
         info.si_addr  = addr;
- -#ifdef __ARCH_SI_TRAPNO
- -      info.si_trapno = trapno;
- -#endif
   #ifdef __ia64__
         info.si_imm = imm;
         info.si_flags = flags;
         info.si_isr = isr;
   #endif
- -      return force_sig_info_to_task(&info, t);
+ +      return force_sig_info_to_task(&info, t, false);
   }
   
   int force_sig_fault(int sig, int code, void __user *addr
- -      ___ARCH_SI_TRAPNO(int trapno)
         ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr))
   {
         return force_sig_fault_to_task(sig, code, addr
- -                                     ___ARCH_SI_TRAPNO(trapno)
                                        ___ARCH_SI_IA64(imm, flags, isr), current);
   }
   
   int send_sig_fault(int sig, int code, void __user *addr
- -      ___ARCH_SI_TRAPNO(int trapno)
         ___ARCH_SI_IA64(int imm, unsigned int flags, unsigned long isr)
         , struct task_struct *t)
   {
@@@ -1718,6 -1709,9 +1718,6 @@@
         info.si_errno = 0;
         info.si_code  = code;
         info.si_addr  = addr;
- -#ifdef __ARCH_SI_TRAPNO
- -      info.si_trapno = trapno;
- -#endif
   #ifdef __ia64__
         info.si_imm = imm;
         info.si_flags = flags;
@@@ -1799,27 -1793,6 +1799,27 @@@ int force_sig_perf(void __user *addr, u
         return force_sig_info(&info);
   }
   
+ +/**
+ + * force_sig_seccomp - signals the task to allow in-process syscall emulation
+ + * @syscall: syscall number to send to userland
+ + * @reason: filter-supplied reason code to send to userland (via si_errno)
+ + *
+ + * Forces a SIGSYS with a code of SYS_SECCOMP and related sigsys info.
+ + */
+ +int force_sig_seccomp(int syscall, int reason, bool force_coredump)
+ +{
+ +      struct kernel_siginfo info;
+ +
+ +      clear_siginfo(&info);
+ +      info.si_signo = SIGSYS;
+ +      info.si_code = SYS_SECCOMP;
+ +      info.si_call_addr = (void __user *)KSTK_EIP(current);
+ +      info.si_errno = reason;
+ +      info.si_arch = syscall_get_arch(current);
+ +      info.si_syscall = syscall;
+ +      return force_sig_info_to_task(&info, current, force_coredump);
+ +}
+ +
   /* For the crazy architectures that include trap information in
    * the errno field, instead of an actual errno value.
    */
@@@ -1835,39 -1808,6 +1835,39 @@@ int force_sig_ptrace_errno_trap(int err
         return force_sig_info(&info);
   }
   
+ +/* For the rare architectures that include trap information using
+ + * si_trapno.
+ + */
+ +int force_sig_fault_trapno(int sig, int code, void __user *addr, int trapno)
+ +{
+ +      struct kernel_siginfo info;
+ +
+ +      clear_siginfo(&info);
+ +      info.si_signo = sig;
+ +      info.si_errno = 0;
+ +      info.si_code  = code;
+ +      info.si_addr  = addr;
+ +      info.si_trapno = trapno;
+ +      return force_sig_info(&info);
+ +}
+ +
+ +/* For the rare architectures that include trap information using
+ + * si_trapno.
+ + */
+ +int send_sig_fault_trapno(int sig, int code, void __user *addr, int trapno,
+ +                        struct task_struct *t)
+ +{
+ +      struct kernel_siginfo info;
+ +
+ +      clear_siginfo(&info);
+ +      info.si_signo = sig;
+ +      info.si_errno = 0;
+ +      info.si_code  = code;
+ +      info.si_addr  = addr;
+ +      info.si_trapno = trapno;
+ +      return send_sig_info(info.si_signo, &info, t);
+ +}
+ +
   int kill_pgrp(struct pid *pid, int sig, int priv)
   {
         int ret;
@@@ -2617,7 -2557,7 +2617,7 @@@ static void hide_si_addr_tag_bits(struc
         case SIL_FAULT_MCEERR:
         case SIL_FAULT_BNDERR:
         case SIL_FAULT_PKUERR:
- -      case SIL_PERF_EVENT:
+ +      case SIL_FAULT_PERF_EVENT:
                 ksig->info.si_addr = arch_untagged_si_addr(
                         ksig->info.si_addr, ksig->sig, ksig->info.si_code);
                 break;
@@@ -3302,14 -3242,11 +3302,14 @@@ enum siginfo_layout siginfo_layout(unsi
                                 layout = SIL_FAULT_PKUERR;
   #endif
                         else if ((sig == SIGTRAP) && (si_code == TRAP_PERF))
- -                              layout = SIL_PERF_EVENT;
- -#ifdef __ARCH_SI_TRAPNO
- -                      else if (layout == SIL_FAULT)
+ +                              layout = SIL_FAULT_PERF_EVENT;
+ +                      else if (IS_ENABLED(CONFIG_SPARC) &&
+ +                               (sig == SIGILL) && (si_code == ILL_ILLTRP))
+ +                              layout = SIL_FAULT_TRAPNO;
+ +                      else if (IS_ENABLED(CONFIG_ALPHA) &&
+ +                               ((sig == SIGFPE) ||
+ +                                ((sig == SIGTRAP) && (si_code == TRAP_UNK))))
                                 layout = SIL_FAULT_TRAPNO;
- -#endif
                 }
                 else if (si_code <= NSIGPOLL)
                         layout = SIL_POLL;
@@@ -3431,7 -3368,7 +3431,7 @@@ void copy_siginfo_to_external32(struct 
                 to->si_addr = ptr_to_compat(from->si_addr);
                 to->si_pkey = from->si_pkey;
                 break;
- -      case SIL_PERF_EVENT:
+ +      case SIL_FAULT_PERF_EVENT:
                 to->si_addr = ptr_to_compat(from->si_addr);
                 to->si_perf_data = from->si_perf_data;
                 to->si_perf_type = from->si_perf_type;
@@@ -3508,7 -3445,7 +3508,7 @@@ static int post_copy_siginfo_from_user3
                 to->si_addr = compat_ptr(from->si_addr);
                 to->si_pkey = from->si_pkey;
                 break;
- -      case SIL_PERF_EVENT:
+ +      case SIL_FAULT_PERF_EVENT:
                 to->si_addr = compat_ptr(from->si_addr);
                 to->si_perf_data = from->si_perf_data;
                 to->si_perf_type = from->si_perf_type;
@@@ -4726,7 -4663,7 +4726,7 @@@ void __init signals_init(void
   {
         siginfo_buildtime_checks();
   
-       sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC);
+       sigqueue_cachep = KMEM_CACHE(sigqueue, SLAB_PANIC | SLAB_ACCOUNT);
   }
   
   #ifdef CONFIG_KGDB_KDB
diff --combined kernel/sys_ni.c

index cb6f98f,18a9c2c..64578ad
--- 1/kernel/sys_ni.c
--- 2/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@@ -289,6 -289,7 +289,7 @@@ COND_SYSCALL(munlockall)
   COND_SYSCALL(mincore);
   COND_SYSCALL(madvise);
   COND_SYSCALL(process_madvise);
+ COND_SYSCALL(process_mrelease);
   COND_SYSCALL(remap_file_pages);
   COND_SYSCALL(mbind);
   COND_SYSCALL_COMPAT(mbind);
@@@ -416,6 -417,7 +417,6 @@@ COND_SYSCALL(epoll_wait)
   COND_SYSCALL(recv);
   COND_SYSCALL_COMPAT(recv);
   COND_SYSCALL(send);
- -COND_SYSCALL(bdflush);
   COND_SYSCALL(uselib);
   
   /* optional: time32 */
diff --combined kernel/sysctl.c

index 25e49b4,297f0b3..083be6a
--- 1/kernel/sysctl.c
--- 2/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@@ -536,21 -536,6 +536,21 @@@ static void proc_put_char(void **buf, s
         }
   }
   
+ +static int do_proc_dobool_conv(bool *negp, unsigned long *lvalp,
+ +                              int *valp,
+ +                              int write, void *data)
+ +{
+ +      if (write) {
+ +              *(bool *)valp = *lvalp;
+ +      } else {
+ +              int val = *(bool *)valp;
+ +
+ +              *lvalp = (unsigned long)val;
+ +              *negp = false;
+ +      }
+ +      return 0;
+ +}
+ +
   static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
                                  int *valp,
                                  int write, void *data)
@@@ -813,26 -798,6 +813,26 @@@ static int do_proc_douintvec(struct ctl
                                    buffer, lenp, ppos, conv, data);
   }
   
+ +/**
+ + * proc_dobool - read/write a bool
+ + * @table: the sysctl table
+ + * @write: %TRUE if this is a write to the sysctl file
+ + * @buffer: the user buffer
+ + * @lenp: the size of the user buffer
+ + * @ppos: file position
+ + *
+ + * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
+ + * values from/to the user buffer, treated as an ASCII string.
+ + *
+ + * Returns 0 on success.
+ + */
+ +int proc_dobool(struct ctl_table *table, int write, void *buffer,
+ +              size_t *lenp, loff_t *ppos)
+ +{
+ +      return do_proc_dointvec(table, write, buffer, lenp, ppos,
+ +                              do_proc_dobool_conv, NULL);
+ +}
+ +
   /**
    * proc_dointvec - read a vector of integers
    * @table: the sysctl table
@@@ -1665,12 -1630,6 +1665,12 @@@ int proc_dostring(struct ctl_table *tab
         return -ENOSYS;
   }
   
+ +int proc_dobool(struct ctl_table *table, int write,
+ +              void *buffer, size_t *lenp, loff_t *ppos)
+ +{
+ +      return -ENOSYS;
+ +}
+ +
   int proc_dointvec(struct ctl_table *table, int write,
                   void *buffer, size_t *lenp, loff_t *ppos)
   {
@@@ -2912,7 -2871,7 +2912,7 @@@ static struct ctl_table vm_table[] = 
                 .data           = &sysctl_compaction_proactiveness,
                 .maxlen         = sizeof(sysctl_compaction_proactiveness),
                 .mode           = 0644,
-               .proc_handler   = proc_dointvec_minmax,
+               .proc_handler   = compaction_proactiveness_sysctl_handler,
                 .extra1         = SYSCTL_ZERO,
                 .extra2         = &one_hundred,
         },
@@@ -3466,7 -3425,6 +3466,7 @@@ int __init sysctl_init(void
    * No sense putting this after each symbol definition, twice,
    * exception granted :-)
    */
+ +EXPORT_SYMBOL(proc_dobool);
   EXPORT_SYMBOL(proc_dointvec);
   EXPORT_SYMBOL(proc_douintvec);
   EXPORT_SYMBOL(proc_dointvec_jiffies);
diff --combined kernel/time/posix-timers.c

index 3913222,7363f81..1cd10b1
--- 1/kernel/time/posix-timers.c
--- 2/kernel/time/posix-timers.c
+++ b/kernel/time/posix-timers.c
@@@ -273,8 -273,8 +273,8 @@@ static int posix_get_hrtimer_res(clocki
   static __init int init_posix_timers(void)
   {
         posix_timers_cache = kmem_cache_create("posix_timers_cache",
-                                       sizeof (struct k_itimer), 0, SLAB_PANIC,
-                                       NULL);
+                                       sizeof(struct k_itimer), 0,
+                                       SLAB_PANIC | SLAB_ACCOUNT, NULL);
         return 0;
   }
   __initcall(init_posix_timers);
@@@ -336,7 -336,7 +336,7 @@@ void posixtimer_rearm(struct kernel_sig
   int posix_timer_event(struct k_itimer *timr, int si_private)
   {
         enum pid_type type;
- -      int ret = -1;
+ +      int ret;
         /*
          * FIXME: if ->sigq is queued we can race with
          * dequeue_signal()->posixtimer_rearm().
diff --combined lib/scatterlist.c

index f4b1ff7,627aa84..abb3432
--- 1/lib/scatterlist.c
--- 2/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@@ -182,7 -182,6 +182,7 @@@ static void sg_kfree(struct scatterlis
    * @nents_first_chunk: Number of entries int the (preallocated) first
    *    scatterlist chunk, 0 means no such preallocated first chunk
    * @free_fn:  Free function
+ + * @num_ents: Number of entries in the table
    *
    *  Description:
    *    Free an sg table previously allocated and setup with
@@@ -191,8 -190,7 +191,8 @@@
    *
    **/
   void __sg_free_table(struct sg_table *table, unsigned int max_ents,
- -                   unsigned int nents_first_chunk, sg_free_fn *free_fn)
+ +                   unsigned int nents_first_chunk, sg_free_fn *free_fn,
+ +                   unsigned int num_ents)
   {
         struct scatterlist *sgl, *next;
         unsigned curr_max_ents = nents_first_chunk ?: max_ents;
@@@ -201,8 -199,8 +201,8 @@@
                 return;
   
         sgl = table->sgl;
- -      while (table->orig_nents) {
- -              unsigned int alloc_size = table->orig_nents;
+ +      while (num_ents) {
+ +              unsigned int alloc_size = num_ents;
                 unsigned int sg_size;
   
                 /*
@@@ -220,7 -218,7 +220,7 @@@
                         next = NULL;
                 }
   
- -              table->orig_nents -= sg_size;
+ +              num_ents -= sg_size;
                 if (nents_first_chunk)
                         nents_first_chunk = 0;
                 else
@@@ -233,19 -231,6 +233,19 @@@
   }
   EXPORT_SYMBOL(__sg_free_table);
   
+ +/**
+ + * sg_free_append_table - Free a previously allocated append sg table.
+ + * @table:     The mapped sg append table header
+ + *
+ + **/
+ +void sg_free_append_table(struct sg_append_table *table)
+ +{
+ +      __sg_free_table(&table->sgt, SG_MAX_SINGLE_ALLOC, false, sg_kfree,
+ +                      table->total_nents);
+ +}
+ +EXPORT_SYMBOL(sg_free_append_table);
+ +
+ +
   /**
    * sg_free_table - Free a previously allocated sg table
    * @table:    The mapped sg table header
@@@ -253,8 -238,7 +253,8 @@@
    **/
   void sg_free_table(struct sg_table *table)
   {
- -      __sg_free_table(table, SG_MAX_SINGLE_ALLOC, false, sg_kfree);
+ +      __sg_free_table(table, SG_MAX_SINGLE_ALLOC, false, sg_kfree,
+ +                      table->orig_nents);
   }
   EXPORT_SYMBOL(sg_free_table);
   
@@@ -375,12 -359,13 +375,12 @@@ int sg_alloc_table(struct sg_table *tab
         ret = __sg_alloc_table(table, nents, SG_MAX_SINGLE_ALLOC,
                                NULL, 0, gfp_mask, sg_kmalloc);
         if (unlikely(ret))
- -              __sg_free_table(table, SG_MAX_SINGLE_ALLOC, 0, sg_kfree);
- -
+ +              sg_free_table(table);
         return ret;
   }
   EXPORT_SYMBOL(sg_alloc_table);
   
- -static struct scatterlist *get_next_sg(struct sg_table *table,
+ +static struct scatterlist *get_next_sg(struct sg_append_table *table,
                                        struct scatterlist *cur,
                                        unsigned long needed_sges,
                                        gfp_t gfp_mask)
@@@ -401,52 -386,54 +401,52 @@@
                 return ERR_PTR(-ENOMEM);
         sg_init_table(new_sg, alloc_size);
         if (cur) {
+ +              table->total_nents += alloc_size - 1;
                 __sg_chain(next_sg, new_sg);
- -              table->orig_nents += alloc_size - 1;
         } else {
- -              table->sgl = new_sg;
- -              table->orig_nents = alloc_size;
- -              table->nents = 0;
+ +              table->sgt.sgl = new_sg;
+ +              table->total_nents = alloc_size;
         }
         return new_sg;
   }
   
   /**
- - * __sg_alloc_table_from_pages - Allocate and initialize an sg table from
- - *                             an array of pages
- - * @sgt:       The sg table header to use
- - * @pages:     Pointer to an array of page pointers
- - * @n_pages:   Number of pages in the pages array
+ + * sg_alloc_append_table_from_pages - Allocate and initialize an append sg
+ + *                                    table from an array of pages
+ + * @sgt_append:  The sg append table to use
+ + * @pages:       Pointer to an array of page pointers
+ + * @n_pages:     Number of pages in the pages array
    * @offset:      Offset from start of the first page to the start of a buffer
    * @size:        Number of valid bytes in the buffer (after offset)
    * @max_segment: Maximum size of a scatterlist element in bytes
- - * @prv:       Last populated sge in sgt
    * @left_pages:  Left pages caller have to set after this call
    * @gfp_mask:  GFP allocation mask
    *
    * Description:
- - *    If @prv is NULL, allocate and initialize an sg table from a list of pages,
- - *    else reuse the scatterlist passed in at @prv.
- - *    Contiguous ranges of the pages are squashed into a single scatterlist
- - *    entry up to the maximum size specified in @max_segment.  A user may
- - *    provide an offset at a start and a size of valid data in a buffer
- - *    specified by the page array.
+ + *    In the first call it allocate and initialize an sg table from a list of
+ + *    pages, else reuse the scatterlist from sgt_append. Contiguous ranges of
+ + *    the pages are squashed into a single scatterlist entry up to the maximum
+ + *    size specified in @max_segment.  A user may provide an offset at a start
+ + *    and a size of valid data in a buffer specified by the page array. The
+ + *    returned sg table is released by sg_free_append_table
    *
    * Returns:
- - *   Last SGE in sgt on success, PTR_ERR on otherwise.
- - *   The allocation in @sgt must be released by sg_free_table.
+ + *   0 on success, negative error on failure
    *
    * Notes:
    *   If this function returns non-0 (eg failure), the caller must call
- - *   sg_free_table() to cleanup any leftover allocations.
+ + *   sg_free_append_table() to cleanup any leftover allocations.
+ + *
+ + *   In the fist call, sgt_append must by initialized.
    */
- -struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt,
+ +int sg_alloc_append_table_from_pages(struct sg_append_table *sgt_append,
                 struct page **pages, unsigned int n_pages, unsigned int offset,
                 unsigned long size, unsigned int max_segment,
- -              struct scatterlist *prv, unsigned int left_pages,
- -              gfp_t gfp_mask)
+ +              unsigned int left_pages, gfp_t gfp_mask)
   {
         unsigned int chunks, cur_page, seg_len, i, prv_len = 0;
         unsigned int added_nents = 0;
- -      struct scatterlist *s = prv;
+ +      struct scatterlist *s = sgt_append->prv;
   
         /*
          * The algorithm below requires max_segment to be aligned to PAGE_SIZE
@@@ -454,26 -441,25 +454,26 @@@
          */
         max_segment = ALIGN_DOWN(max_segment, PAGE_SIZE);
         if (WARN_ON(max_segment < PAGE_SIZE))
- -              return ERR_PTR(-EINVAL);
+ +              return -EINVAL;
   
- -      if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && prv)
- -              return ERR_PTR(-EOPNOTSUPP);
+ +      if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && sgt_append->prv)
+ +              return -EOPNOTSUPP;
   
- -      if (prv) {
- -              unsigned long paddr = (page_to_pfn(sg_page(prv)) * PAGE_SIZE +
- -                                     prv->offset + prv->length) /
- -                                    PAGE_SIZE;
+ +      if (sgt_append->prv) {
+ +              unsigned long paddr =
+ +                      (page_to_pfn(sg_page(sgt_append->prv)) * PAGE_SIZE +
+ +                       sgt_append->prv->offset + sgt_append->prv->length) /
+ +                      PAGE_SIZE;
   
                 if (WARN_ON(offset))
- -                      return ERR_PTR(-EINVAL);
+ +                      return -EINVAL;
   
                 /* Merge contiguous pages into the last SG */
- -              prv_len = prv->length;
+ +              prv_len = sgt_append->prv->length;
                 while (n_pages && page_to_pfn(pages[0]) == paddr) {
- -                      if (prv->length + PAGE_SIZE > max_segment)
+ +                      if (sgt_append->prv->length + PAGE_SIZE > max_segment)
                                 break;
- -                      prv->length += PAGE_SIZE;
+ +                      sgt_append->prv->length += PAGE_SIZE;
                         paddr++;
                         pages++;
                         n_pages--;
@@@ -510,16 -496,15 +510,16 @@@
                 }
   
                 /* Pass how many chunks might be left */
- -              s = get_next_sg(sgt, s, chunks - i + left_pages, gfp_mask);
+ +              s = get_next_sg(sgt_append, s, chunks - i + left_pages,
+ +                              gfp_mask);
                 if (IS_ERR(s)) {
                         /*
                          * Adjust entry length to be as before function was
                          * called.
                          */
- -                      if (prv)
- -                              prv->length = prv_len;
- -                      return s;
+ +                      if (sgt_append->prv)
+ +                              sgt_append->prv->length = prv_len;
+ +                      return PTR_ERR(s);
                 }
                 chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset;
                 sg_set_page(s, pages[cur_page],
@@@ -529,58 -514,42 +529,58 @@@
                 offset = 0;
                 cur_page = j;
         }
- -      sgt->nents += added_nents;
+ +      sgt_append->sgt.nents += added_nents;
+ +      sgt_append->sgt.orig_nents = sgt_append->sgt.nents;
+ +      sgt_append->prv = s;
   out:
         if (!left_pages)
                 sg_mark_end(s);
- -      return s;
+ +      return 0;
   }
- -EXPORT_SYMBOL(__sg_alloc_table_from_pages);
+ +EXPORT_SYMBOL(sg_alloc_append_table_from_pages);
   
   /**
- - * sg_alloc_table_from_pages - Allocate and initialize an sg table from
- - *                           an array of pages
+ + * sg_alloc_table_from_pages_segment - Allocate and initialize an sg table from
+ + *                                     an array of pages and given maximum
+ + *                                     segment.
    * @sgt:       The sg table header to use
    * @pages:     Pointer to an array of page pointers
    * @n_pages:   Number of pages in the pages array
    * @offset:      Offset from start of the first page to the start of a buffer
    * @size:        Number of valid bytes in the buffer (after offset)
+ + * @max_segment: Maximum size of a scatterlist element in bytes
    * @gfp_mask:  GFP allocation mask
    *
    *  Description:
    *    Allocate and initialize an sg table from a list of pages. Contiguous
- - *    ranges of the pages are squashed into a single scatterlist node. A user
- - *    may provide an offset at a start and a size of valid data in a buffer
- - *    specified by the page array. The returned sg table is released by
- - *    sg_free_table.
+ + *    ranges of the pages are squashed into a single scatterlist node up to the
+ + *    maximum size specified in @max_segment. A user may provide an offset at a
+ + *    start and a size of valid data in a buffer specified by the page array.
    *
- - * Returns:
+ + *    The returned sg table is released by sg_free_table.
+ + *
+ + *  Returns:
    *   0 on success, negative error on failure
    */
- -int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
- -                            unsigned int n_pages, unsigned int offset,
- -                            unsigned long size, gfp_t gfp_mask)
+ +int sg_alloc_table_from_pages_segment(struct sg_table *sgt, struct page **pages,
+ +                              unsigned int n_pages, unsigned int offset,
+ +                              unsigned long size, unsigned int max_segment,
+ +                              gfp_t gfp_mask)
   {
- -      return PTR_ERR_OR_ZERO(__sg_alloc_table_from_pages(sgt, pages, n_pages,
- -                      offset, size, UINT_MAX, NULL, 0, gfp_mask));
+ +      struct sg_append_table append = {};
+ +      int err;
+ +
+ +      err = sg_alloc_append_table_from_pages(&append, pages, n_pages, offset,
+ +                                             size, max_segment, 0, gfp_mask);
+ +      if (err) {
+ +              sg_free_append_table(&append);
+ +              return err;
+ +      }
+ +      memcpy(sgt, &append.sgt, sizeof(*sgt));
+ +      WARN_ON(append.total_nents != sgt->orig_nents);
+ +      return 0;
   }
- -EXPORT_SYMBOL(sg_alloc_table_from_pages);
+ +EXPORT_SYMBOL(sg_alloc_table_from_pages_segment);
   
   #ifdef CONFIG_SGL_ALLOC
   
@@@ -918,9 -887,8 +918,8 @@@ void sg_miter_stop(struct sg_mapping_it
                 miter->__offset += miter->consumed;
                 miter->__remaining -= miter->consumed;
   
-               if ((miter->__flags & SG_MITER_TO_SG) &&
-                   !PageSlab(miter->page))
-                       flush_kernel_dcache_page(miter->page);
+               if (miter->__flags & SG_MITER_TO_SG)
+                       flush_dcache_page(miter->page);
   
                 if (miter->__flags & SG_MITER_ATOMIC) {
                         WARN_ON_ONCE(preemptible());
diff --combined lib/test_kasan.c

index 8be9d4b,30f2cde..8835e07
--- 1/lib/test_kasan.c
--- 2/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@@ -53,6 -53,7 +53,6 @@@ static int kasan_test_init(struct kuni
         }
   
         multishot = kasan_save_enable_multi_shot();
- -      kasan_set_tagging_report_once(false);
         fail_data.report_found = false;
         kunit_add_named_resource(test, NULL, NULL, &resource,
                                         "kasan_data", &fail_data);
@@@ -61,6 -62,7 +61,6 @@@
   
   static void kasan_test_exit(struct kunit *test)
   {
- -      kasan_set_tagging_report_once(true);
         kasan_restore_multi_shot(multishot);
         KUNIT_EXPECT_FALSE(test, fail_data.report_found);
   }
@@@ -120,12 -122,28 +120,28 @@@
   static void kmalloc_oob_right(struct kunit *test)
   {
         char *ptr;
-       size_t size = 123;
+       size_t size = 128 - KASAN_GRANULE_SIZE - 5;
   
         ptr = kmalloc(size, GFP_KERNEL);
         KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
   
-       KUNIT_EXPECT_KASAN_FAIL(test, ptr[size + OOB_TAG_OFF] = 'x');
+       /*
+        * An unaligned access past the requested kmalloc size.
+        * Only generic KASAN can precisely detect these.
+        */
+       if (IS_ENABLED(CONFIG_KASAN_GENERIC))
+               KUNIT_EXPECT_KASAN_FAIL(test, ptr[size] = 'x');
+ 
+       /*
+        * An aligned access into the first out-of-bounds granule that falls
+        * within the aligned kmalloc object.
+        */
+       KUNIT_EXPECT_KASAN_FAIL(test, ptr[size + 5] = 'y');
+ 
+       /* Out-of-bounds access past the aligned kmalloc object. */
+       KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] =
+                                       ptr[size + KASAN_GRANULE_SIZE + 5]);
+ 
         kfree(ptr);
   }
   
@@@ -149,7 -167,7 +165,7 @@@ static void kmalloc_node_oob_right(stru
         ptr = kmalloc_node(size, GFP_KERNEL, 0);
         KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
   
-       KUNIT_EXPECT_KASAN_FAIL(test, ptr[size] = 0);
+       KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] = ptr[size]);
         kfree(ptr);
   }
   
@@@ -185,7 -203,7 +201,7 @@@ static void kmalloc_pagealloc_uaf(struc
         KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
         kfree(ptr);
   
-       KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] = 0);
+       KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[0]);
   }
   
   static void kmalloc_pagealloc_invalid_free(struct kunit *test)
@@@ -219,7 -237,7 +235,7 @@@ static void pagealloc_oob_right(struct 
         ptr = page_address(pages);
         KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
   
-       KUNIT_EXPECT_KASAN_FAIL(test, ptr[size] = 0);
+       KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] = ptr[size]);
         free_pages((unsigned long)ptr, order);
   }
   
@@@ -234,7 -252,7 +250,7 @@@ static void pagealloc_uaf(struct kunit 
         KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
         free_pages((unsigned long)ptr, order);
   
-       KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] = 0);
+       KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[0]);
   }
   
   static void kmalloc_large_oob_right(struct kunit *test)
@@@ -410,64 -428,70 +426,70 @@@ static void kmalloc_uaf_16(struct kuni
         kfree(ptr1);
   }
   
+ /*
+  * Note: in the memset tests below, the written range touches both valid and
+  * invalid memory. This makes sure that the instrumentation does not only check
+  * the starting address but the whole range.
+  */
+ 
   static void kmalloc_oob_memset_2(struct kunit *test)
   {
         char *ptr;
-       size_t size = 8;
+       size_t size = 128 - KASAN_GRANULE_SIZE;
   
         ptr = kmalloc(size, GFP_KERNEL);
         KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
   
-       KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + 7 + OOB_TAG_OFF, 0, 2));
+       KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 1, 0, 2));
         kfree(ptr);
   }
   
   static void kmalloc_oob_memset_4(struct kunit *test)
   {
         char *ptr;
-       size_t size = 8;
+       size_t size = 128 - KASAN_GRANULE_SIZE;
   
         ptr = kmalloc(size, GFP_KERNEL);
         KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
   
-       KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + 5 + OOB_TAG_OFF, 0, 4));
+       KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 3, 0, 4));
         kfree(ptr);
   }
   
- 
   static void kmalloc_oob_memset_8(struct kunit *test)
   {
         char *ptr;
-       size_t size = 8;
+       size_t size = 128 - KASAN_GRANULE_SIZE;
   
         ptr = kmalloc(size, GFP_KERNEL);
         KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
   
-       KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + 1 + OOB_TAG_OFF, 0, 8));
+       KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 7, 0, 8));
         kfree(ptr);
   }
   
   static void kmalloc_oob_memset_16(struct kunit *test)
   {
         char *ptr;
-       size_t size = 16;
+       size_t size = 128 - KASAN_GRANULE_SIZE;
   
         ptr = kmalloc(size, GFP_KERNEL);
         KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
   
-       KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + 1 + OOB_TAG_OFF, 0, 16));
+       KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + size - 15, 0, 16));
         kfree(ptr);
   }
   
   static void kmalloc_oob_in_memset(struct kunit *test)
   {
         char *ptr;
-       size_t size = 666;
+       size_t size = 128 - KASAN_GRANULE_SIZE;
   
         ptr = kmalloc(size, GFP_KERNEL);
         KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
   
-       KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr, 0, size + 5 + OOB_TAG_OFF));
+       KUNIT_EXPECT_KASAN_FAIL(test,
+                               memset(ptr, 0, size + KASAN_GRANULE_SIZE));
         kfree(ptr);
   }
   
@@@ -477,11 -501,17 +499,17 @@@ static void kmalloc_memmove_invalid_siz
         size_t size = 64;
         volatile size_t invalid_size = -2;
   
+       /*
+        * Hardware tag-based mode doesn't check memmove for negative size.
+        * As a result, this test introduces a side-effect memory corruption,
+        * which can result in a crash.
+        */
+       KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_KASAN_HW_TAGS);
+ 
         ptr = kmalloc(size, GFP_KERNEL);
         KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
   
         memset((char *)ptr, 0, 64);
- 
         KUNIT_EXPECT_KASAN_FAIL(test,
                 memmove((char *)ptr, (char *)ptr + 4, invalid_size));
         kfree(ptr);
@@@ -496,7 -526,7 +524,7 @@@ static void kmalloc_uaf(struct kunit *t
         KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
   
         kfree(ptr);
-       KUNIT_EXPECT_KASAN_FAIL(test, *(ptr + 8) = 'x');
+       KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[8]);
   }
   
   static void kmalloc_uaf_memset(struct kunit *test)
@@@ -504,6 -534,12 +532,12 @@@
         char *ptr;
         size_t size = 33;
   
+       /*
+        * Only generic KASAN uses quarantine, which is required to avoid a
+        * kernel memory corruption this test causes.
+        */
+       KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
+ 
         ptr = kmalloc(size, GFP_KERNEL);
         KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
   
@@@ -535,7 -571,7 +569,7 @@@ again
                 goto again;
         }
   
-       KUNIT_EXPECT_KASAN_FAIL(test, ptr1[40] = 'x');
+       KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr1)[40]);
         KUNIT_EXPECT_PTR_NE(test, ptr1, ptr2);
   
         kfree(ptr2);
@@@ -682,7 -718,7 +716,7 @@@ static void ksize_unpoisons_memory(stru
         ptr[size] = 'x';
   
         /* This one must. */
-       KUNIT_EXPECT_KASAN_FAIL(test, ptr[real_size] = 'y');
+       KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[real_size]);
   
         kfree(ptr);
   }
@@@ -701,8 -737,8 +735,8 @@@ static void ksize_uaf(struct kunit *tes
         kfree(ptr);
   
         KUNIT_EXPECT_KASAN_FAIL(test, ksize(ptr));
-       KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = *ptr);
-       KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = *(ptr + size));
+       KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[0]);
+       KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)ptr)[size]);
   }
   
   static void kasan_stack_oob(struct kunit *test)
diff --combined mm/backing-dev.c

index cd06dca,6122c78..4a9d4e2
--- 1/mm/backing-dev.c
--- 2/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@@ -271,6 -271,14 +271,14 @@@ void wb_wakeup_delayed(struct bdi_write
         spin_unlock_bh(&wb->work_lock);
   }
   
+ static void wb_update_bandwidth_workfn(struct work_struct *work)
+ {
+       struct bdi_writeback *wb = container_of(to_delayed_work(work),
+                                               struct bdi_writeback, bw_dwork);
+ 
+       wb_update_bandwidth(wb);
+ }
+ 
   /*
    * Initial write bandwidth: 100 MB/s
    */
@@@ -293,6 -301,7 +301,7 @@@ static int wb_init(struct bdi_writebac
         INIT_LIST_HEAD(&wb->b_dirty_time);
         spin_lock_init(&wb->list_lock);
   
+       atomic_set(&wb->writeback_inodes, 0);
         wb->bw_time_stamp = jiffies;
         wb->balanced_dirty_ratelimit = INIT_BW;
         wb->dirty_ratelimit = INIT_BW;
@@@ -302,6 -311,7 +311,7 @@@
         spin_lock_init(&wb->work_lock);
         INIT_LIST_HEAD(&wb->work_list);
         INIT_DELAYED_WORK(&wb->dwork, wb_workfn);
+       INIT_DELAYED_WORK(&wb->bw_dwork, wb_update_bandwidth_workfn);
         wb->dirty_sleep = jiffies;
   
         err = fprop_local_init_percpu(&wb->completions, gfp);
@@@ -350,6 -360,7 +360,7 @@@ static void wb_shutdown(struct bdi_writ
         mod_delayed_work(bdi_wq, &wb->dwork, 0);
         flush_delayed_work(&wb->dwork);
         WARN_ON(!list_empty(&wb->work_list));
+       flush_delayed_work(&wb->bw_dwork);
   }
   
   static void wb_exit(struct bdi_writeback *wb)
@@@ -807,7 -818,6 +818,7 @@@ struct backing_dev_info *bdi_alloc(int 
         bdi->capabilities = BDI_CAP_WRITEBACK | BDI_CAP_WRITEBACK_ACCT;
         bdi->ra_pages = VM_READAHEAD_PAGES;
         bdi->io_pages = VM_READAHEAD_PAGES;
+ +      timer_setup(&bdi->laptop_mode_wb_timer, laptop_mode_timer_fn, 0);
         return bdi;
   }
   EXPORT_SYMBOL(bdi_alloc);
@@@ -929,8 -939,6 +940,8 @@@ static void bdi_remove_from_list(struc
   
   void bdi_unregister(struct backing_dev_info *bdi)
   {
+ +      del_timer_sync(&bdi->laptop_mode_wb_timer);
+ +
         /* make sure nobody finds us on the bdi_list anymore */
         bdi_remove_from_list(bdi);
         wb_shutdown(&bdi->wb);
diff --combined mm/filemap.c

index 920e8dc,4926f16..dae4812
--- 1/mm/filemap.c
--- 2/mm/filemap.c
+++ b/mm/filemap.c
@@@ -76,9 -76,8 +76,9 @@@
    *      ->swap_lock           (exclusive_swap_page, others)
    *        ->i_pages lock
    *
- - *  ->i_mutex
- - *    ->i_mmap_rwsem          (truncate->unmap_mapping_range)
+ + *  ->i_rwsem
+ + *    ->invalidate_lock               (acquired by fs in truncate path)
+ + *      ->i_mmap_rwsem                (truncate->unmap_mapping_range)
    *
    *  ->mmap_lock
    *    ->i_mmap_rwsem
@@@ -86,10 -85,9 +86,10 @@@
    *        ->i_pages lock      (arch-dependent flush_dcache_mmap_lock)
    *
    *  ->mmap_lock
- - *    ->lock_page             (access_process_vm)
+ + *    ->invalidate_lock               (filemap_fault)
+ + *      ->lock_page           (filemap_fault, access_process_vm)
    *
- - *  ->i_mutex                 (generic_perform_write)
+ + *  ->i_rwsem                 (generic_perform_write)
    *    ->mmap_lock             (fault_in_pages_readable->do_page_fault)
    *
    *  bdi->wb.list_lock
@@@ -260,12 -258,11 +260,11 @@@ static void page_cache_free_page(struc
   void delete_from_page_cache(struct page *page)
   {
         struct address_space *mapping = page_mapping(page);
-       unsigned long flags;
   
         BUG_ON(!PageLocked(page));
-       xa_lock_irqsave(&mapping->i_pages, flags);
+       xa_lock_irq(&mapping->i_pages);
         __delete_from_page_cache(page, NULL);
-       xa_unlock_irqrestore(&mapping->i_pages, flags);
+       xa_unlock_irq(&mapping->i_pages);
   
         page_cache_free_page(mapping, page);
   }
@@@ -337,19 -334,18 +336,18 @@@ void delete_from_page_cache_batch(struc
                                   struct pagevec *pvec)
   {
         int i;
-       unsigned long flags;
   
         if (!pagevec_count(pvec))
                 return;
   
-       xa_lock_irqsave(&mapping->i_pages, flags);
+       xa_lock_irq(&mapping->i_pages);
         for (i = 0; i < pagevec_count(pvec); i++) {
                 trace_mm_filemap_delete_from_page_cache(pvec->pages[i]);
   
                 unaccount_page_cache_page(mapping, pvec->pages[i]);
         }
         page_cache_delete_batch(mapping, pvec);
-       xa_unlock_irqrestore(&mapping->i_pages, flags);
+       xa_unlock_irq(&mapping->i_pages);
   
         for (i = 0; i < pagevec_count(pvec); i++)
                 page_cache_free_page(mapping, pvec->pages[i]);
@@@ -379,32 -375,6 +377,32 @@@ static int filemap_check_and_keep_error
         return 0;
   }
   
+ +/**
+ + * filemap_fdatawrite_wbc - start writeback on mapping dirty pages in range
+ + * @mapping:  address space structure to write
+ + * @wbc:      the writeback_control controlling the writeout
+ + *
+ + * Call writepages on the mapping using the provided wbc to control the
+ + * writeout.
+ + *
+ + * Return: %0 on success, negative error code otherwise.
+ + */
+ +int filemap_fdatawrite_wbc(struct address_space *mapping,
+ +                         struct writeback_control *wbc)
+ +{
+ +      int ret;
+ +
+ +      if (!mapping_can_writeback(mapping) ||
+ +          !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
+ +              return 0;
+ +
+ +      wbc_attach_fdatawrite_inode(wbc, mapping->host);
+ +      ret = do_writepages(mapping, wbc);
+ +      wbc_detach_inode(wbc);
+ +      return ret;
+ +}
+ +EXPORT_SYMBOL(filemap_fdatawrite_wbc);
+ +
   /**
    * __filemap_fdatawrite_range - start writeback on mapping dirty pages in range
    * @mapping:  address space structure to write
@@@ -425,6 -395,7 +423,6 @@@
   int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start,
                                 loff_t end, int sync_mode)
   {
- -      int ret;
         struct writeback_control wbc = {
                 .sync_mode = sync_mode,
                 .nr_to_write = LONG_MAX,
@@@ -432,7 -403,14 +430,7 @@@
                 .range_end = end,
         };
   
- -      if (!mapping_can_writeback(mapping) ||
- -          !mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
- -              return 0;
- -
- -      wbc_attach_fdatawrite_inode(&wbc, mapping->host);
- -      ret = do_writepages(mapping, &wbc);
- -      wbc_detach_inode(&wbc);
- -      return ret;
+ +      return filemap_fdatawrite_wbc(mapping, &wbc);
   }
   
   static inline int __filemap_fdatawrite(struct address_space *mapping,
@@@ -841,7 -819,6 +839,6 @@@ void replace_page_cache_page(struct pag
         void (*freepage)(struct page *) = mapping->a_ops->freepage;
         pgoff_t offset = old->index;
         XA_STATE(xas, &mapping->i_pages, offset);
-       unsigned long flags;
   
         VM_BUG_ON_PAGE(!PageLocked(old), old);
         VM_BUG_ON_PAGE(!PageLocked(new), new);
@@@ -853,7 -830,7 +850,7 @@@
   
         mem_cgroup_migrate(old, new);
   
-       xas_lock_irqsave(&xas, flags);
+       xas_lock_irq(&xas);
         xas_store(&xas, new);
   
         old->mapping = NULL;
@@@ -866,7 -843,7 +863,7 @@@
                 __dec_lruvec_page_state(old, NR_SHMEM);
         if (PageSwapBacked(new))
                 __inc_lruvec_page_state(new, NR_SHMEM);
-       xas_unlock_irqrestore(&xas, flags);
+       xas_unlock_irq(&xas);
         if (freepage)
                 freepage(old);
         put_page(old);
@@@ -1027,44 -1004,6 +1024,44 @@@ struct page *__page_cache_alloc(gfp_t g
   EXPORT_SYMBOL(__page_cache_alloc);
   #endif
   
+ +/*
+ + * filemap_invalidate_lock_two - lock invalidate_lock for two mappings
+ + *
+ + * Lock exclusively invalidate_lock of any passed mapping that is not NULL.
+ + *
+ + * @mapping1: the first mapping to lock
+ + * @mapping2: the second mapping to lock
+ + */
+ +void filemap_invalidate_lock_two(struct address_space *mapping1,
+ +                               struct address_space *mapping2)
+ +{
+ +      if (mapping1 > mapping2)
+ +              swap(mapping1, mapping2);
+ +      if (mapping1)
+ +              down_write(&mapping1->invalidate_lock);
+ +      if (mapping2 && mapping1 != mapping2)
+ +              down_write_nested(&mapping2->invalidate_lock, 1);
+ +}
+ +EXPORT_SYMBOL(filemap_invalidate_lock_two);
+ +
+ +/*
+ + * filemap_invalidate_unlock_two - unlock invalidate_lock for two mappings
+ + *
+ + * Unlock exclusive invalidate_lock of any passed mapping that is not NULL.
+ + *
+ + * @mapping1: the first mapping to unlock
+ + * @mapping2: the second mapping to unlock
+ + */
+ +void filemap_invalidate_unlock_two(struct address_space *mapping1,
+ +                                 struct address_space *mapping2)
+ +{
+ +      if (mapping1)
+ +              up_write(&mapping1->invalidate_lock);
+ +      if (mapping2 && mapping1 != mapping2)
+ +              up_write(&mapping2->invalidate_lock);
+ +}
+ +EXPORT_SYMBOL(filemap_invalidate_unlock_two);
+ +
   /*
    * In order to wait for pages to become available there must be
    * waitqueues associated with pages. By using a hash table of
@@@ -2426,30 -2365,20 +2423,30 @@@ static int filemap_update_page(struct k
   {
         int error;
   
+ +      if (iocb->ki_flags & IOCB_NOWAIT) {
+ +              if (!filemap_invalidate_trylock_shared(mapping))
+ +                      return -EAGAIN;
+ +      } else {
+ +              filemap_invalidate_lock_shared(mapping);
+ +      }
+ +
         if (!trylock_page(page)) {
+ +              error = -EAGAIN;
                 if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO))
- -                      return -EAGAIN;
+ +                      goto unlock_mapping;
                 if (!(iocb->ki_flags & IOCB_WAITQ)) {
+ +                      filemap_invalidate_unlock_shared(mapping);
                         put_and_wait_on_page_locked(page, TASK_KILLABLE);
                         return AOP_TRUNCATED_PAGE;
                 }
                 error = __lock_page_async(page, iocb->ki_waitq);
                 if (error)
- -                      return error;
+ +                      goto unlock_mapping;
         }
   
+ +      error = AOP_TRUNCATED_PAGE;
         if (!page->mapping)
- -              goto truncated;
+ +              goto unlock;
   
         error = 0;
         if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, page))
@@@ -2460,13 -2389,15 +2457,13 @@@
                 goto unlock;
   
         error = filemap_read_page(iocb->ki_filp, mapping, page);
- -      if (error == AOP_TRUNCATED_PAGE)
- -              put_page(page);
- -      return error;
- -truncated:
- -      unlock_page(page);
- -      put_page(page);
- -      return AOP_TRUNCATED_PAGE;
+ +      goto unlock_mapping;
   unlock:
         unlock_page(page);
+ +unlock_mapping:
+ +      filemap_invalidate_unlock_shared(mapping);
+ +      if (error == AOP_TRUNCATED_PAGE)
+ +              put_page(page);
         return error;
   }
   
@@@ -2481,19 -2412,6 +2478,19 @@@ static int filemap_create_page(struct f
         if (!page)
                 return -ENOMEM;
   
+ +      /*
+ +       * Protect against truncate / hole punch. Grabbing invalidate_lock here
+ +       * assures we cannot instantiate and bring uptodate new pagecache pages
+ +       * after evicting page cache during truncate and before actually
+ +       * freeing blocks.  Note that we could release invalidate_lock after
+ +       * inserting the page into page cache as the locked page would then be
+ +       * enough to synchronize with hole punching. But there are code paths
+ +       * such as filemap_update_page() filling in partially uptodate pages or
+ +       * ->readpages() that need to hold invalidate_lock while mapping blocks
+ +       * for IO so let's hold the lock here as well to keep locking rules
+ +       * simple.
+ +       */
+ +      filemap_invalidate_lock_shared(mapping);
         error = add_to_page_cache_lru(page, mapping, index,
                         mapping_gfp_constraint(mapping, GFP_KERNEL));
         if (error == -EEXIST)
@@@ -2505,11 -2423,9 +2502,11 @@@
         if (error)
                 goto error;
   
+ +      filemap_invalidate_unlock_shared(mapping);
         pagevec_add(pvec, page);
         return 0;
   error:
+ +      filemap_invalidate_unlock_shared(mapping);
         put_page(page);
         return error;
   }
@@@ -3048,7 -2964,6 +3045,7 @@@ vm_fault_t filemap_fault(struct vm_faul
         pgoff_t max_off;
         struct page *page;
         vm_fault_t ret = 0;
+ +      bool mapping_locked = false;
   
         max_off = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
         if (unlikely(offset >= max_off))
@@@ -3058,39 -2973,25 +3055,39 @@@
          * Do we have something in the page cache already?
          */
         page = find_get_page(mapping, offset);
- -      if (likely(page) && !(vmf->flags & FAULT_FLAG_TRIED)) {
+ +      if (likely(page)) {
                 /*
- -               * We found the page, so try async readahead before
- -               * waiting for the lock.
+ +               * We found the page, so try async readahead before waiting for
+ +               * the lock.
                  */
- -              fpin = do_async_mmap_readahead(vmf, page);
- -      } else if (!page) {
+ +              if (!(vmf->flags & FAULT_FLAG_TRIED))
+ +                      fpin = do_async_mmap_readahead(vmf, page);
+ +              if (unlikely(!PageUptodate(page))) {
+ +                      filemap_invalidate_lock_shared(mapping);
+ +                      mapping_locked = true;
+ +              }
+ +      } else {
                 /* No page in the page cache at all */
                 count_vm_event(PGMAJFAULT);
                 count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);
                 ret = VM_FAULT_MAJOR;
                 fpin = do_sync_mmap_readahead(vmf);
   retry_find:
+ +              /*
+ +               * See comment in filemap_create_page() why we need
+ +               * invalidate_lock
+ +               */
+ +              if (!mapping_locked) {
+ +                      filemap_invalidate_lock_shared(mapping);
+ +                      mapping_locked = true;
+ +              }
                 page = pagecache_get_page(mapping, offset,
                                           FGP_CREAT|FGP_FOR_MMAP,
                                           vmf->gfp_mask);
                 if (!page) {
                         if (fpin)
                                 goto out_retry;
+ +                      filemap_invalidate_unlock_shared(mapping);
                         return VM_FAULT_OOM;
                 }
         }
@@@ -3110,20 -3011,8 +3107,20 @@@
          * We have a locked page in the page cache, now we need to check
          * that it's up-to-date. If not, it is going to be due to an error.
          */
- -      if (unlikely(!PageUptodate(page)))
+ +      if (unlikely(!PageUptodate(page))) {
+ +              /*
+ +               * The page was in cache and uptodate and now it is not.
+ +               * Strange but possible since we didn't hold the page lock all
+ +               * the time. Let's drop everything get the invalidate lock and
+ +               * try again.
+ +               */
+ +              if (!mapping_locked) {
+ +                      unlock_page(page);
+ +                      put_page(page);
+ +                      goto retry_find;
+ +              }
                 goto page_not_uptodate;
+ +      }
   
         /*
          * We've made it this far and we had to drop our mmap_lock, now is the
@@@ -3134,8 -3023,6 +3131,8 @@@
                 unlock_page(page);
                 goto out_retry;
         }
+ +      if (mapping_locked)
+ +              filemap_invalidate_unlock_shared(mapping);
   
         /*
          * Found the page and have a reference on it.
@@@ -3166,7 -3053,6 +3163,7 @@@ page_not_uptodate
   
         if (!error || error == AOP_TRUNCATED_PAGE)
                 goto retry_find;
+ +      filemap_invalidate_unlock_shared(mapping);
   
         return VM_FAULT_SIGBUS;
   
@@@ -3178,8 -3064,6 +3175,8 @@@ out_retry
          */
         if (page)
                 put_page(page);
+ +      if (mapping_locked)
+ +              filemap_invalidate_unlock_shared(mapping);
         if (fpin)
                 fput(fpin);
         return ret | VM_FAULT_RETRY;
@@@ -3550,8 -3434,6 +3547,8 @@@ out
    *
    * If the page does not get brought uptodate, return -EIO.
    *
+ + * The function expects mapping->invalidate_lock to be already held.
+ + *
    * Return: up to date page on success, ERR_PTR() on failure.
    */
   struct page *read_cache_page(struct address_space *mapping,
@@@ -3575,8 -3457,6 +3572,8 @@@ EXPORT_SYMBOL(read_cache_page)
    *
    * If the page does not get brought uptodate, return -EIO.
    *
+ + * The function expects mapping->invalidate_lock to be already held.
+ + *
    * Return: up to date page on success, ERR_PTR() on failure.
    */
   struct page *read_cache_page_gfp(struct address_space *mapping,
@@@ -3821,12 -3701,12 +3818,12 @@@ EXPORT_SYMBOL(generic_perform_write)
    * modification times and calls proper subroutines depending on whether we
    * do direct IO or a standard buffered write.
    *
- - * It expects i_mutex to be grabbed unless we work on a block device or similar
+ + * It expects i_rwsem to be grabbed unless we work on a block device or similar
    * object which does not need locking at all.
    *
    * This function does *not* take care of syncing data in case of O_SYNC write.
    * A caller has to handle it. This is mainly due to the fact that we want to
- - * avoid syncing under i_mutex.
+ + * avoid syncing under i_rwsem.
    *
    * Return:
    * * number of bytes written, even for truncated writes
@@@ -3914,7 -3794,7 +3911,7 @@@ EXPORT_SYMBOL(__generic_file_write_iter
    *
    * This is a wrapper around __generic_file_write_iter() to be used by most
    * filesystems. It takes care of syncing the file in case of O_SYNC file
- - * and acquires i_mutex as needed.
+ + * and acquires i_rwsem as needed.
    * Return:
    * * negative error code if no data has been written at all of
    *   vfs_fsync_range() failed for a synchronous write
diff --combined mm/kasan/hw_tags.c

index e4c16f6,5190363..05d1e94
--- 1/mm/kasan/hw_tags.c
--- 2/mm/kasan/hw_tags.c
+++ b/mm/kasan/hw_tags.c
@@@ -37,16 -37,9 +37,9 @@@ enum kasan_arg_stacktrace 
         KASAN_ARG_STACKTRACE_ON,
   };
   
- enum kasan_arg_fault {
-       KASAN_ARG_FAULT_DEFAULT,
-       KASAN_ARG_FAULT_REPORT,
-       KASAN_ARG_FAULT_PANIC,
- };
- 
   static enum kasan_arg kasan_arg __ro_after_init;
   static enum kasan_arg_mode kasan_arg_mode __ro_after_init;
   static enum kasan_arg_stacktrace kasan_arg_stacktrace __ro_after_init;
- static enum kasan_arg_fault kasan_arg_fault __ro_after_init;
   
   /* Whether KASAN is enabled at all. */
   DEFINE_STATIC_KEY_FALSE(kasan_flag_enabled);
@@@ -59,9 -52,6 +52,6 @@@ EXPORT_SYMBOL_GPL(kasan_flag_async)
   /* Whether to collect alloc/free stack traces. */
   DEFINE_STATIC_KEY_FALSE(kasan_flag_stacktrace);
   
- /* Whether to panic or print a report and disable tag checking on fault. */
- bool kasan_flag_panic __ro_after_init;
- 
   /* kasan=off/on */
   static int __init early_kasan_flag(char *arg)
   {
@@@ -113,23 -103,6 +103,6 @@@ static int __init early_kasan_flag_stac
   }
   early_param("kasan.stacktrace", early_kasan_flag_stacktrace);
   
- /* kasan.fault=report/panic */
- static int __init early_kasan_fault(char *arg)
- {
-       if (!arg)
-               return -EINVAL;
- 
-       if (!strcmp(arg, "report"))
-               kasan_arg_fault = KASAN_ARG_FAULT_REPORT;
-       else if (!strcmp(arg, "panic"))
-               kasan_arg_fault = KASAN_ARG_FAULT_PANIC;
-       else
-               return -EINVAL;
- 
-       return 0;
- }
- early_param("kasan.fault", early_kasan_fault);
- 
   /* kasan_init_hw_tags_cpu() is called for each CPU. */
   void kasan_init_hw_tags_cpu(void)
   {
@@@ -142,6 -115,8 +115,6 @@@
         if (kasan_arg == KASAN_ARG_OFF)
                 return;
   
- -      hw_init_tags(KASAN_TAG_MAX);
- -
         /*
          * Enable async mode only when explicitly requested through
          * the command line.
@@@ -195,22 -170,6 +168,6 @@@ void __init kasan_init_hw_tags(void
                 break;
         }
   
-       switch (kasan_arg_fault) {
-       case KASAN_ARG_FAULT_DEFAULT:
-               /*
-                * Default to no panic on report.
-                * Do nothing, kasan_flag_panic keeps its default value.
-                */
-               break;
-       case KASAN_ARG_FAULT_REPORT:
-               /* Do nothing, kasan_flag_panic keeps its default value. */
-               break;
-       case KASAN_ARG_FAULT_PANIC:
-               /* Enable panic on report. */
-               kasan_flag_panic = true;
-               break;
-       }
- 
         pr_info("KernelAddressSanitizer initialized\n");
   }
   
@@@ -248,6 -207,12 +205,6 @@@ void kasan_free_pages(struct page *page
   
   #if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST)
   
- -void kasan_set_tagging_report_once(bool state)
- -{
- -      hw_set_tagging_report_once(state);
- -}
- -EXPORT_SYMBOL_GPL(kasan_set_tagging_report_once);
- -
   void kasan_enable_tagging_sync(void)
   {
         hw_enable_tagging_sync();
diff --combined mm/kasan/kasan.h

index fff93b0,fa02c88..8bf568a
--- 1/mm/kasan/kasan.h
--- 2/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@@ -3,7 -3,6 +3,7 @@@
   #define __MM_KASAN_KASAN_H
   
   #include <linux/kasan.h>
+ +#include <linux/kasan-tags.h>
   #include <linux/kfence.h>
   #include <linux/stackdepot.h>
   
@@@ -38,7 -37,6 +38,6 @@@ static inline bool kasan_async_mode_ena
   
   #endif
   
- extern bool kasan_flag_panic __ro_after_init;
   extern bool kasan_flag_async __ro_after_init;
   
   #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
@@@ -52,6 -50,16 +51,6 @@@
   
   #define KASAN_MEMORY_PER_SHADOW_PAGE  (KASAN_GRANULE_SIZE << PAGE_SHIFT)
   
- -#define KASAN_TAG_KERNEL      0xFF /* native kernel pointers tag */
- -#define KASAN_TAG_INVALID     0xFE /* inaccessible memory tag */
- -#define KASAN_TAG_MAX         0xFD /* maximum value for random tags */
- -
- -#ifdef CONFIG_KASAN_HW_TAGS
- -#define KASAN_TAG_MIN         0xF0 /* minimum value for random tags */
- -#else
- -#define KASAN_TAG_MIN         0x00 /* minimum value for random tags */
- -#endif
- -
   #ifdef CONFIG_KASAN_GENERIC
   #define KASAN_FREE_PAGE         0xFF  /* page was freed */
   #define KASAN_PAGE_REDZONE      0xFE  /* redzone for kmalloc_large allocations */
@@@ -290,6 -298,12 +289,6 @@@ static inline const void *arch_kasan_se
   #ifndef arch_enable_tagging_async
   #define arch_enable_tagging_async()
   #endif
- -#ifndef arch_init_tags
- -#define arch_init_tags(max_tag)
- -#endif
- -#ifndef arch_set_tagging_report_once
- -#define arch_set_tagging_report_once(state)
- -#endif
   #ifndef arch_force_async_tag_fault
   #define arch_force_async_tag_fault()
   #endif
@@@ -305,6 -319,8 +304,6 @@@
   
   #define hw_enable_tagging_sync()              arch_enable_tagging_sync()
   #define hw_enable_tagging_async()             arch_enable_tagging_async()
- -#define hw_init_tags(max_tag)                 arch_init_tags(max_tag)
- -#define hw_set_tagging_report_once(state)     arch_set_tagging_report_once(state)
   #define hw_force_async_tag_fault()            arch_force_async_tag_fault()
   #define hw_get_random_tag()                   arch_get_random_tag()
   #define hw_get_mem_tag(addr)                  arch_get_mem_tag(addr)
@@@ -315,16 -331,19 +314,16 @@@
   
   #define hw_enable_tagging_sync()
   #define hw_enable_tagging_async()
- -#define hw_set_tagging_report_once(state)
   
   #endif /* CONFIG_KASAN_HW_TAGS */
   
   #if defined(CONFIG_KASAN_HW_TAGS) && IS_ENABLED(CONFIG_KASAN_KUNIT_TEST)
   
- -void kasan_set_tagging_report_once(bool state);
   void kasan_enable_tagging_sync(void);
   void kasan_force_async_fault(void);
   
   #else /* CONFIG_KASAN_HW_TAGS || CONFIG_KASAN_KUNIT_TEST */
   
- -static inline void kasan_set_tagging_report_once(bool state) { }
   static inline void kasan_enable_tagging_sync(void) { }
   static inline void kasan_force_async_fault(void) { }
   
diff --combined mm/madvise.c

index 56324a3,4a15a83..0734db8
--- 1/mm/madvise.c
--- 2/mm/madvise.c
+++ b/mm/madvise.c
@@@ -912,7 -912,7 +912,7 @@@ static long madvise_remove(struct vm_ar
                         + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
   
         /*
- -       * Filesystem's fallocate may need to take i_mutex.  We need to
+ +       * Filesystem's fallocate may need to take i_rwsem.  We need to
          * explicitly grab a reference because the vma (and hence the
          * vma's reference to the file) can go away as soon as we drop
          * mmap_lock.
@@@ -1048,6 -1048,7 +1048,7 @@@ process_madvise_behavior_valid(int beha
         switch (behavior) {
         case MADV_COLD:
         case MADV_PAGEOUT:
+       case MADV_WILLNEED:
                 return true;
         default:
                 return false;
diff --combined mm/memblock.c

index e2ca8dd,e6b4654..0ab5a74
--- 1/mm/memblock.c
--- 2/mm/memblock.c
+++ b/mm/memblock.c
@@@ -315,7 -315,7 +315,7 @@@ static phys_addr_t __init_memblock memb
    * Return:
    * Found address on success, 0 on failure.
    */
- phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
+ static phys_addr_t __init_memblock memblock_find_in_range(phys_addr_t start,
                                         phys_addr_t end, phys_addr_t size,
                                         phys_addr_t align)
   {
@@@ -665,11 -665,6 +665,11 @@@ repeat
   int __init_memblock memblock_add_node(phys_addr_t base, phys_addr_t size,
                                        int nid)
   {
+ +      phys_addr_t end = base + size - 1;
+ +
+ +      memblock_dbg("%s: [%pa-%pa] nid=%d %pS\n", __func__,
+ +                   &base, &end, nid, (void *)_RET_IP_);
+ +
         return memblock_add_range(&memblock.memory, base, size, nid, 0);
   }
   
@@@ -1496,18 -1491,12 +1496,12 @@@ void * __init memblock_alloc_exact_nid_
                         phys_addr_t min_addr, phys_addr_t max_addr,
                         int nid)
   {
-       void *ptr;
- 
         memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n",
                      __func__, (u64)size, (u64)align, nid, &min_addr,
                      &max_addr, (void *)_RET_IP_);
   
-       ptr = memblock_alloc_internal(size, align,
-                                          min_addr, max_addr, nid, true);
-       if (ptr && size > 0)
-               page_init_poison(ptr, size);
- 
-       return ptr;
+       return memblock_alloc_internal(size, align, min_addr, max_addr, nid,
+                                      true);
   }
   
   /**
@@@ -1534,18 -1523,12 +1528,12 @@@ void * __init memblock_alloc_try_nid_ra
                         phys_addr_t min_addr, phys_addr_t max_addr,
                         int nid)
   {
-       void *ptr;
- 
         memblock_dbg("%s: %llu bytes align=0x%llx nid=%d from=%pa max_addr=%pa %pS\n",
                      __func__, (u64)size, (u64)align, nid, &min_addr,
                      &max_addr, (void *)_RET_IP_);
   
-       ptr = memblock_alloc_internal(size, align,
-                                          min_addr, max_addr, nid, false);
-       if (ptr && size > 0)
-               page_init_poison(ptr, size);
- 
-       return ptr;
+       return memblock_alloc_internal(size, align, min_addr, max_addr, nid,
+                                      false);
   }
   
   /**
@@@ -1685,11 -1668,6 +1673,11 @@@ void __init memblock_cap_memory_range(p
         if (!size)
                 return;
   
+ +      if (memblock.memory.cnt <= 1) {
+ +              pr_warn("%s: No memory registered yet\n", __func__);
+ +              return;
+ +      }
+ +
         ret = memblock_isolate_range(&memblock.memory, base, size,
                                                 &start_rgn, &end_rgn);
         if (ret)
diff --combined mm/memcontrol.c

index 389b576,896f0f4..b762215
--- 1/mm/memcontrol.c
--- 2/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@@ -103,6 -103,14 +103,14 @@@ static bool do_memsw_account(void
         return !cgroup_subsys_on_dfl(memory_cgrp_subsys) && !cgroup_memory_noswap;
   }
   
+ /* memcg and lruvec stats flushing */
+ static void flush_memcg_stats_dwork(struct work_struct *w);
+ static DECLARE_DEFERRABLE_WORK(stats_flush_dwork, flush_memcg_stats_dwork);
+ static void flush_memcg_stats_work(struct work_struct *w);
+ static DECLARE_WORK(stats_flush_work, flush_memcg_stats_work);
+ static DEFINE_PER_CPU(unsigned int, stats_flush_threshold);
+ static DEFINE_SPINLOCK(stats_flush_lock);
+ 
   #define THRESHOLDS_EVENTS_TARGET 128
   #define SOFTLIMIT_EVENTS_TARGET 1024
   
@@@ -248,9 -256,9 +256,9 @@@ struct vmpressure *memcg_to_vmpressure(
         return &memcg->vmpressure;
   }
   
- struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr)
+ struct mem_cgroup *vmpressure_to_memcg(struct vmpressure *vmpr)
   {
-       return &container_of(vmpr, struct mem_cgroup, vmpressure)->css;
+       return container_of(vmpr, struct mem_cgroup, vmpressure);
   }
   
   #ifdef CONFIG_MEMCG_KMEM
@@@ -645,17 -653,6 +653,6 @@@ void __mod_memcg_state(struct mem_cgrou
         cgroup_rstat_updated(memcg->css.cgroup, smp_processor_id());
   }
   
- /* idx can be of type enum memcg_stat_item or node_stat_item. */
- static unsigned long memcg_page_state(struct mem_cgroup *memcg, int idx)
- {
-       long x = READ_ONCE(memcg->vmstats.state[idx]);
- #ifdef CONFIG_SMP
-       if (x < 0)
-               x = 0;
- #endif
-       return x;
- }
- 
   /* idx can be of type enum memcg_stat_item or node_stat_item. */
   static unsigned long memcg_page_state_local(struct mem_cgroup *memcg, int idx)
   {
@@@ -671,23 -668,11 +668,11 @@@
         return x;
   }
   
- static struct mem_cgroup_per_node *
- parent_nodeinfo(struct mem_cgroup_per_node *pn, int nid)
- {
-       struct mem_cgroup *parent;
- 
-       parent = parent_mem_cgroup(pn->memcg);
-       if (!parent)
-               return NULL;
-       return parent->nodeinfo[nid];
- }
- 
   void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
                               int val)
   {
         struct mem_cgroup_per_node *pn;
         struct mem_cgroup *memcg;
-       long x, threshold = MEMCG_CHARGE_BATCH;
   
         pn = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
         memcg = pn->memcg;
@@@ -696,21 -681,9 +681,9 @@@
         __mod_memcg_state(memcg, idx, val);
   
         /* Update lruvec */
-       __this_cpu_add(pn->lruvec_stat_local->count[idx], val);
- 
-       if (vmstat_item_in_bytes(idx))
-               threshold <<= PAGE_SHIFT;
- 
-       x = val + __this_cpu_read(pn->lruvec_stat_cpu->count[idx]);
-       if (unlikely(abs(x) > threshold)) {
-               pg_data_t *pgdat = lruvec_pgdat(lruvec);
-               struct mem_cgroup_per_node *pi;
- 
-               for (pi = pn; pi; pi = parent_nodeinfo(pi, pgdat->node_id))
-                       atomic_long_add(x, &pi->lruvec_stat[idx]);
-               x = 0;
-       }
-       __this_cpu_write(pn->lruvec_stat_cpu->count[idx], x);
+       __this_cpu_add(pn->lruvec_stats_percpu->state[idx], val);
+       if (!(__this_cpu_inc_return(stats_flush_threshold) % MEMCG_CHARGE_BATCH))
+               queue_work(system_unbound_wq, &stats_flush_work);
   }
   
   /**
@@@ -905,7 -878,7 +878,7 @@@ EXPORT_SYMBOL(mem_cgroup_from_task)
   
   static __always_inline struct mem_cgroup *active_memcg(void)
   {
-       if (in_interrupt())
+       if (!in_task())
                 return this_cpu_read(int_active_memcg);
         else
                 return current->active_memcg;
@@@ -968,7 -941,7 +941,7 @@@ static __always_inline bool memcg_kmem_
                 return false;
   
         /* Memcg to charge can't be determined. */
- -      if (in_interrupt() || !current->mm || (current->flags & PF_KTHREAD))
+ +      if (!in_task() || !current->mm || (current->flags & PF_KTHREAD))
                 return true;
   
         return false;
@@@ -2205,8 -2178,9 +2178,9 @@@ static void drain_local_stock(struct wo
         unsigned long flags;
   
         /*
-        * The only protection from memory hotplug vs. drain_stock races is
-        * that we always operate on local CPU stock here with IRQ disabled
+        * The only protection from cpu hotplug (memcg_hotplug_cpu_dead) vs.
+        * drain_stock races is that we always operate on local CPU stock
+        * here with IRQ disabled
          */
         local_irq_save(flags);
   
@@@ -2273,7 -2247,7 +2247,7 @@@ static void drain_all_stock(struct mem_
                 if (memcg && stock->nr_pages &&
                     mem_cgroup_is_descendant(memcg, root_memcg))
                         flush = true;
-               if (obj_stock_flush_required(stock, root_memcg))
+               else if (obj_stock_flush_required(stock, root_memcg))
                         flush = true;
                 rcu_read_unlock();
   
@@@ -2289,40 -2263,13 +2263,13 @@@
         mutex_unlock(&percpu_charge_mutex);
   }
   
- static void memcg_flush_lruvec_page_state(struct mem_cgroup *memcg, int cpu)
- {
-       int nid;
- 
-       for_each_node(nid) {
-               struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid];
-               unsigned long stat[NR_VM_NODE_STAT_ITEMS];
-               struct batched_lruvec_stat *lstatc;
-               int i;
- 
-               lstatc = per_cpu_ptr(pn->lruvec_stat_cpu, cpu);
-               for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
-                       stat[i] = lstatc->count[i];
-                       lstatc->count[i] = 0;
-               }
- 
-               do {
-                       for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
-                               atomic_long_add(stat[i], &pn->lruvec_stat[i]);
-               } while ((pn = parent_nodeinfo(pn, nid)));
-       }
- }
- 
   static int memcg_hotplug_cpu_dead(unsigned int cpu)
   {
         struct memcg_stock_pcp *stock;
-       struct mem_cgroup *memcg;
   
         stock = &per_cpu(memcg_stock, cpu);
         drain_stock(stock);
   
-       for_each_mem_cgroup(memcg)
-               memcg_flush_lruvec_page_state(memcg, cpu);
- 
         return 0;
   }
   
@@@ -4116,7 -4063,7 +4063,7 @@@ static int mem_cgroup_swappiness_write(
   {
         struct mem_cgroup *memcg = mem_cgroup_from_css(css);
   
-       if (val > 100)
+       if (val > 200)
                 return -EINVAL;
   
         if (!mem_cgroup_is_root(memcg))
@@@ -4668,7 -4615,7 +4615,7 @@@ void mem_cgroup_flush_foreign(struct bd
                     atomic_read(&frn->done.cnt) == 1) {
                         frn->at = 0;
                         trace_flush_foreign(wb, frn->bdi_id, frn->memcg_id);
-                       cgroup_writeback_by_id(frn->bdi_id, frn->memcg_id, 0,
+                       cgroup_writeback_by_id(frn->bdi_id, frn->memcg_id,
                                                WB_REASON_FOREIGN_FLUSH,
                                                &frn->done);
                 }
@@@ -4892,9 -4839,9 +4839,9 @@@ static ssize_t memcg_write_event_contro
   
         vfs_poll(efile.file, &event->pt);
   
-       spin_lock(&memcg->event_list_lock);
+       spin_lock_irq(&memcg->event_list_lock);
         list_add(&event->list, &memcg->event_list);
-       spin_unlock(&memcg->event_list_lock);
+       spin_unlock_irq(&memcg->event_list_lock);
   
         fdput(cfile);
         fdput(efile);
@@@ -5129,17 -5076,9 +5076,9 @@@ static int alloc_mem_cgroup_per_node_in
         if (!pn)
                 return 1;
   
-       pn->lruvec_stat_local = alloc_percpu_gfp(struct lruvec_stat,
-                                                GFP_KERNEL_ACCOUNT);
-       if (!pn->lruvec_stat_local) {
-               kfree(pn);
-               return 1;
-       }
- 
-       pn->lruvec_stat_cpu = alloc_percpu_gfp(struct batched_lruvec_stat,
-                                              GFP_KERNEL_ACCOUNT);
-       if (!pn->lruvec_stat_cpu) {
-               free_percpu(pn->lruvec_stat_local);
+       pn->lruvec_stats_percpu = alloc_percpu_gfp(struct lruvec_stats_percpu,
+                                                  GFP_KERNEL_ACCOUNT);
+       if (!pn->lruvec_stats_percpu) {
                 kfree(pn);
                 return 1;
         }
@@@ -5160,8 -5099,7 +5099,7 @@@ static void free_mem_cgroup_per_node_in
         if (!pn)
                 return;
   
-       free_percpu(pn->lruvec_stat_cpu);
-       free_percpu(pn->lruvec_stat_local);
+       free_percpu(pn->lruvec_stats_percpu);
         kfree(pn);
   }
   
@@@ -5177,15 -5115,7 +5115,7 @@@ static void __mem_cgroup_free(struct me
   
   static void mem_cgroup_free(struct mem_cgroup *memcg)
   {
-       int cpu;
- 
         memcg_wb_domain_exit(memcg);
-       /*
-        * Flush percpu lruvec stats to guarantee the value
-        * correctness on parent's and all ancestor levels.
-        */
-       for_each_online_cpu(cpu)
-               memcg_flush_lruvec_page_state(memcg, cpu);
         __mem_cgroup_free(memcg);
   }
   
@@@ -5321,6 -5251,10 +5251,10 @@@ static int mem_cgroup_css_online(struc
         /* Online state pins memcg ID, memcg ID pins CSS */
         refcount_set(&memcg->id.ref, 1);
         css_get(css);
+ 
+       if (unlikely(mem_cgroup_is_root(memcg)))
+               queue_delayed_work(system_unbound_wq, &stats_flush_dwork,
+                                  2UL*HZ);
         return 0;
   }
   
@@@ -5334,12 -5268,12 +5268,12 @@@ static void mem_cgroup_css_offline(stru
          * Notify userspace about cgroup removing only after rmdir of cgroup
          * directory to avoid race between userspace and kernelspace.
          */
-       spin_lock(&memcg->event_list_lock);
+       spin_lock_irq(&memcg->event_list_lock);
         list_for_each_entry_safe(event, tmp, &memcg->event_list, list) {
                 list_del_init(&event->list);
                 schedule_work(&event->remove);
         }
-       spin_unlock(&memcg->event_list_lock);
+       spin_unlock_irq(&memcg->event_list_lock);
   
         page_counter_set_min(&memcg->memory, 0);
         page_counter_set_low(&memcg->memory, 0);
@@@ -5412,13 -5346,33 +5346,33 @@@ static void mem_cgroup_css_reset(struc
         memcg_wb_domain_size_changed(memcg);
   }
   
+ void mem_cgroup_flush_stats(void)
+ {
+       if (!spin_trylock(&stats_flush_lock))
+               return;
+ 
+       cgroup_rstat_flush_irqsafe(root_mem_cgroup->css.cgroup);
+       spin_unlock(&stats_flush_lock);
+ }
+ 
+ static void flush_memcg_stats_dwork(struct work_struct *w)
+ {
+       mem_cgroup_flush_stats();
+       queue_delayed_work(system_unbound_wq, &stats_flush_dwork, 2UL*HZ);
+ }
+ 
+ static void flush_memcg_stats_work(struct work_struct *w)
+ {
+       mem_cgroup_flush_stats();
+ }
+ 
   static void mem_cgroup_css_rstat_flush(struct cgroup_subsys_state *css, int cpu)
   {
         struct mem_cgroup *memcg = mem_cgroup_from_css(css);
         struct mem_cgroup *parent = parent_mem_cgroup(memcg);
         struct memcg_vmstats_percpu *statc;
         long delta, v;
-       int i;
+       int i, nid;
   
         statc = per_cpu_ptr(memcg->vmstats_percpu, cpu);
   
@@@ -5466,6 -5420,36 +5420,36 @@@
                 if (parent)
                         parent->vmstats.events_pending[i] += delta;
         }
+ 
+       for_each_node_state(nid, N_MEMORY) {
+               struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid];
+               struct mem_cgroup_per_node *ppn = NULL;
+               struct lruvec_stats_percpu *lstatc;
+ 
+               if (parent)
+                       ppn = parent->nodeinfo[nid];
+ 
+               lstatc = per_cpu_ptr(pn->lruvec_stats_percpu, cpu);
+ 
+               for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
+                       delta = pn->lruvec_stats.state_pending[i];
+                       if (delta)
+                               pn->lruvec_stats.state_pending[i] = 0;
+ 
+                       v = READ_ONCE(lstatc->state[i]);
+                       if (v != lstatc->state_prev[i]) {
+                               delta += v - lstatc->state_prev[i];
+                               lstatc->state_prev[i] = v;
+                       }
+ 
+                       if (!delta)
+                               continue;
+ 
+                       pn->lruvec_stats.state[i] += delta;
+                       if (ppn)
+                               ppn->lruvec_stats.state_pending[i] += delta;
+               }
+       }
   }
   
   #ifdef CONFIG_MMU
@@@ -6399,6 -6383,8 +6383,8 @@@ static int memory_numa_stat_show(struc
         int i;
         struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
   
+       cgroup_rstat_flush(memcg->css.cgroup);
+ 
         for (i = 0; i < ARRAY_SIZE(memory_stats); i++) {
                 int nid;
   
@@@ -6704,8 -6690,7 +6690,7 @@@ void mem_cgroup_calculate_protection(st
                         atomic_long_read(&parent->memory.children_low_usage)));
   }
   
- static int __mem_cgroup_charge(struct page *page, struct mem_cgroup *memcg,
-                              gfp_t gfp)
+ static int charge_memcg(struct page *page, struct mem_cgroup *memcg, gfp_t gfp)
   {
         unsigned int nr_pages = thp_nr_pages(page);
         int ret;
@@@ -6726,7 -6711,7 +6711,7 @@@ out
   }
   
   /**
-  * mem_cgroup_charge - charge a newly allocated page to a cgroup
+  * __mem_cgroup_charge - charge a newly allocated page to a cgroup
    * @page: page to charge
    * @mm: mm context of the victim
    * @gfp_mask: reclaim mode
@@@ -6739,16 -6724,14 +6724,14 @@@
    *
    * Returns 0 on success. Otherwise, an error code is returned.
    */
- int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
+ int __mem_cgroup_charge(struct page *page, struct mm_struct *mm,
+                       gfp_t gfp_mask)
   {
         struct mem_cgroup *memcg;
         int ret;
   
-       if (mem_cgroup_disabled())
-               return 0;
- 
         memcg = get_mem_cgroup_from_mm(mm);
-       ret = __mem_cgroup_charge(page, memcg, gfp_mask);
+       ret = charge_memcg(page, memcg, gfp_mask);
         css_put(&memcg->css);
   
         return ret;
@@@ -6783,7 -6766,7 +6766,7 @@@ int mem_cgroup_swapin_charge_page(struc
                 memcg = get_mem_cgroup_from_mm(mm);
         rcu_read_unlock();
   
-       ret = __mem_cgroup_charge(page, memcg, gfp);
+       ret = charge_memcg(page, memcg, gfp);
   
         css_put(&memcg->css);
         return ret;
@@@ -6919,18 -6902,15 +6902,15 @@@ static void uncharge_page(struct page *
   }
   
   /**
-  * mem_cgroup_uncharge - uncharge a page
+  * __mem_cgroup_uncharge - uncharge a page
    * @page: page to uncharge
    *
-  * Uncharge a page previously charged with mem_cgroup_charge().
+  * Uncharge a page previously charged with __mem_cgroup_charge().
    */
- void mem_cgroup_uncharge(struct page *page)
+ void __mem_cgroup_uncharge(struct page *page)
   {
         struct uncharge_gather ug;
   
-       if (mem_cgroup_disabled())
-               return;
- 
         /* Don't touch page->lru of any random page, pre-check: */
         if (!page_memcg(page))
                 return;
@@@ -6941,20 -6921,17 +6921,17 @@@
   }
   
   /**
-  * mem_cgroup_uncharge_list - uncharge a list of page
+  * __mem_cgroup_uncharge_list - uncharge a list of page
    * @page_list: list of pages to uncharge
    *
    * Uncharge a list of pages previously charged with
-  * mem_cgroup_charge().
+  * __mem_cgroup_charge().
    */
- void mem_cgroup_uncharge_list(struct list_head *page_list)
+ void __mem_cgroup_uncharge_list(struct list_head *page_list)
   {
         struct uncharge_gather ug;
         struct page *page;
   
-       if (mem_cgroup_disabled())
-               return;
- 
         uncharge_gather_clear(&ug);
         list_for_each_entry(page, page_list, lru)
                 uncharge_page(page, &ug);
@@@ -7050,14 -7027,14 +7027,14 @@@ void mem_cgroup_sk_free(struct sock *sk
    * mem_cgroup_charge_skmem - charge socket memory
    * @memcg: memcg to charge
    * @nr_pages: number of pages to charge
+ + * @gfp_mask: reclaim mode
    *
    * Charges @nr_pages to @memcg. Returns %true if the charge fit within
- - * @memcg's configured limit, %false if the charge had to be forced.
+ + * @memcg's configured limit, %false if it doesn't.
    */
- -bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
+ +bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages,
+ +                           gfp_t gfp_mask)
   {
- -      gfp_t gfp_mask = GFP_KERNEL;
- -
         if (!cgroup_subsys_on_dfl(memory_cgrp_subsys)) {
                 struct page_counter *fail;
   
@@@ -7065,19 -7042,21 +7042,19 @@@
                         memcg->tcpmem_pressure = 0;
                         return true;
                 }
- -              page_counter_charge(&memcg->tcpmem, nr_pages);
                 memcg->tcpmem_pressure = 1;
+ +              if (gfp_mask & __GFP_NOFAIL) {
+ +                      page_counter_charge(&memcg->tcpmem, nr_pages);
+ +                      return true;
+ +              }
                 return false;
         }
   
- -      /* Don't block in the packet receive path */
- -      if (in_softirq())
- -              gfp_mask = GFP_NOWAIT;
- -
- -      mod_memcg_state(memcg, MEMCG_SOCK, nr_pages);
- -
- -      if (try_charge(memcg, gfp_mask, nr_pages) == 0)
+ +      if (try_charge(memcg, gfp_mask, nr_pages) == 0) {
+ +              mod_memcg_state(memcg, MEMCG_SOCK, nr_pages);
                 return true;
+ +      }
   
- -      try_charge(memcg, gfp_mask|__GFP_NOFAIL, nr_pages);
         return false;
   }
   
@@@ -7244,7 -7223,7 +7221,7 @@@ void mem_cgroup_swapout(struct page *pa
   }
   
   /**
-  * mem_cgroup_try_charge_swap - try charging swap space for a page
+  * __mem_cgroup_try_charge_swap - try charging swap space for a page
    * @page: page being added to swap
    * @entry: swap entry to charge
    *
@@@ -7252,16 -7231,13 +7229,13 @@@
    *
    * Returns 0 on success, -ENOMEM on failure.
    */
- int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
+ int __mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
   {
         unsigned int nr_pages = thp_nr_pages(page);
         struct page_counter *counter;
         struct mem_cgroup *memcg;
         unsigned short oldid;
   
-       if (mem_cgroup_disabled())
-               return 0;
- 
         if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
                 return 0;
   
@@@ -7297,11 -7273,11 +7271,11 @@@
   }
   
   /**
-  * mem_cgroup_uncharge_swap - uncharge swap space
+  * __mem_cgroup_uncharge_swap - uncharge swap space
    * @entry: swap entry to uncharge
    * @nr_pages: the amount of swap space to uncharge
    */
- void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages)
+ void __mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_pages)
   {
         struct mem_cgroup *memcg;
         unsigned short id;
diff --combined mm/memory-failure.c

index e1f87cf,517789b..54879c3
--- 1/mm/memory-failure.c
--- 2/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@@ -68,7 -68,7 +68,7 @@@ atomic_long_t num_poisoned_pages __read
   
   static bool __page_handle_poison(struct page *page)
   {
-       bool ret;
+       int ret;
   
         zone_pcp_disable(page_zone(page));
         ret = dissolve_free_huge_page(page);
@@@ -76,7 -76,7 +76,7 @@@
                 ret = take_page_off_buddy(page);
         zone_pcp_enable(page_zone(page));
   
-       return ret;
+       return ret > 0;
   }
   
   static bool page_handle_poison(struct page *page, bool hugepage_or_freepage, bool release)
@@@ -282,9 -282,9 +282,9 @@@ static int kill_proc(struct to_kill *tk
   
   /*
    * Unknown page type encountered. Try to check whether it can turn PageLRU by
-  * lru_add_drain_all, or a free page by reclaiming slabs when possible.
+  * lru_add_drain_all.
    */
- void shake_page(struct page *p, int access)
+ void shake_page(struct page *p)
   {
         if (PageHuge(p))
                 return;
@@@ -296,11 -296,9 +296,9 @@@
         }
   
         /*
-        * Only call shrink_node_slabs here (which would also shrink
-        * other caches) if access is not potentially fatal.
+        * TODO: Could shrink slab caches here if a lightweight range-based
+        * shrinker will be available.
          */
-       if (access)
-               drop_slab_node(page_to_nid(p));
   }
   EXPORT_SYMBOL_GPL(shake_page);
   
@@@ -391,8 -389,8 +389,8 @@@ static void add_to_kill(struct task_str
   /*
    * Kill the processes that have been collected earlier.
    *
-  * Only do anything when DOIT is set, otherwise just free the list
-  * (this is used for clean pages which do not need killing)
+  * Only do anything when FORCEKILL is set, otherwise just free the
+  * list (this is used for clean pages which do not need killing)
    * Also when FAIL is set do a force kill because something went
    * wrong earlier.
    */
@@@ -632,7 -630,7 +630,7 @@@ static int hwpoison_pte_range(pmd_t *pm
   {
         struct hwp_walk *hwp = (struct hwp_walk *)walk->private;
         int ret = 0;
-       pte_t *ptep;
+       pte_t *ptep, *mapped_pte;
         spinlock_t *ptl;
   
         ptl = pmd_trans_huge_lock(pmdp, walk->vma);
@@@ -645,14 -643,15 +643,15 @@@
         if (pmd_trans_unstable(pmdp))
                 goto out;
   
-       ptep = pte_offset_map_lock(walk->vma->vm_mm, pmdp, addr, &ptl);
+       mapped_pte = ptep = pte_offset_map_lock(walk->vma->vm_mm, pmdp,
+                                               addr, &ptl);
         for (; addr != end; ptep++, addr += PAGE_SIZE) {
                 ret = check_hwpoisoned_entry(*ptep, addr, PAGE_SHIFT,
                                              hwp->pfn, &hwp->tk);
                 if (ret == 1)
                         break;
         }
-       pte_unmap_unlock(ptep - 1, ptl);
+       pte_unmap_unlock(mapped_pte, ptl);
   out:
         cond_resched();
         return ret;
@@@ -866,7 -865,7 +865,7 @@@ static int me_pagecache_clean(struct pa
         /*
          * Truncation is a bit tricky. Enable it per file system for now.
          *
- -       * Open: to take i_mutex or not for this? Right now we don't.
+ +       * Open: to take i_rwsem or not for this? Right now we don't.
          */
         ret = truncate_error_page(p, pfn, mapping);
   out:
@@@ -1204,7 -1203,7 +1203,7 @@@ try_again
                          * page, retry.
                          */
                         if (pass++ < 3) {
-                               shake_page(p, 1);
+                               shake_page(p);
                                 goto try_again;
                         }
                         ret = -EIO;
@@@ -1221,7 -1220,7 +1220,7 @@@
                  */
                 if (pass++ < 3) {
                         put_page(p);
-                       shake_page(p, 1);
+                       shake_page(p);
                         count_increased = false;
                         goto try_again;
                 }
@@@ -1229,6 -1228,9 +1228,9 @@@
                 ret = -EIO;
         }
   out:
+       if (ret == -EIO)
+               dump_page(p, "hwpoison: unhandlable page");
+ 
         return ret;
   }
   
@@@ -1270,14 -1272,13 +1272,13 @@@ static int get_hwpoison_page(struct pag
    * the pages and send SIGBUS to the processes if the data was dirty.
    */
   static bool hwpoison_user_mappings(struct page *p, unsigned long pfn,
-                                 int flags, struct page **hpagep)
+                                 int flags, struct page *hpage)
   {
         enum ttu_flags ttu = TTU_IGNORE_MLOCK | TTU_SYNC;
         struct address_space *mapping;
         LIST_HEAD(tokill);
         bool unmap_success;
         int kill = 1, forcekill;
-       struct page *hpage = *hpagep;
         bool mlocked = PageMlocked(hpage);
   
         /*
@@@ -1369,7 -1370,7 +1370,7 @@@
          * shake_page() again to ensure that it's flushed.
          */
         if (mlocked)
-               shake_page(hpage, 0);
+               shake_page(hpage);
   
         /*
          * Now that the dirty bit has been propagated to the
@@@ -1502,7 -1503,7 +1503,7 @@@ static int memory_failure_hugetlb(unsig
                 goto out;
         }
   
-       if (!hwpoison_user_mappings(p, pfn, flags, &head)) {
+       if (!hwpoison_user_mappings(p, pfn, flags, head)) {
                 action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
                 res = -EBUSY;
                 goto out;
@@@ -1518,7 -1519,6 +1519,6 @@@ static int memory_failure_dev_pagemap(u
                 struct dev_pagemap *pgmap)
   {
         struct page *page = pfn_to_page(pfn);
-       const bool unmap_success = true;
         unsigned long size = 0;
         struct to_kill *tk;
         LIST_HEAD(tokill);
@@@ -1590,7 -1590,7 +1590,7 @@@
                 start = (page->index << PAGE_SHIFT) & ~(size - 1);
                 unmap_mapping_range(page->mapping, start, size, 0);
         }
-       kill_procs(&tokill, flags & MF_MUST_KILL, !unmap_success, pfn, flags);
+       kill_procs(&tokill, flags & MF_MUST_KILL, false, pfn, flags);
         rc = 0;
   unlock:
         dax_unlock_page(page, cookie);
@@@ -1724,7 -1724,7 +1724,7 @@@ try_again
          * The check (unnecessarily) ignores LRU pages being isolated and
          * walked by the page reclaim code, however that's not a big loss.
          */
-       shake_page(p, 0);
+       shake_page(p);
   
         lock_page(p);
   
@@@ -1783,7 -1783,7 +1783,7 @@@
          * Now take care of user space mappings.
          * Abort on fail: __delete_from_page_cache() assumes unmapped page.
          */
-       if (!hwpoison_user_mappings(p, pfn, flags, &p)) {
+       if (!hwpoison_user_mappings(p, pfn, flags, p)) {
                 action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED);
                 res = -EBUSY;
                 goto unlock_page;
@@@ -2099,7 -2099,7 +2099,7 @@@ static int __soft_offline_page(struct p
   
         if (isolate_page(hpage, &pagelist)) {
                 ret = migrate_pages(&pagelist, alloc_migration_target, NULL,
-                       (unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_FAILURE);
+                       (unsigned long)&mtc, MIGRATE_SYNC, MR_MEMORY_FAILURE, NULL);
                 if (!ret) {
                         bool release = !huge;
   
@@@ -2208,9 -2208,6 +2208,6 @@@ retry
                         try_again = false;
                         goto retry;
                 }
-       } else if (ret == -EIO) {
-               pr_info("%s: %#lx: unknown page type: %lx (%pGp)\n",
-                        __func__, pfn, page->flags, &page->flags);
         }
   
         return ret;
diff --combined mm/mmap.c

index 181a113,52fed23..dce4610
--- 1/mm/mmap.c
--- 2/mm/mmap.c
+++ b/mm/mmap.c
@@@ -534,6 -534,7 +534,7 @@@ static int find_vma_links(struct mm_str
   {
         struct rb_node **__rb_link, *__rb_parent, *rb_prev;
   
+       mmap_assert_locked(mm);
         __rb_link = &mm->mm_rb.rb_node;
         rb_prev = __rb_parent = NULL;
   
@@@ -1517,6 -1518,12 +1518,6 @@@ unsigned long do_mmap(struct file *file
                         if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
                                 return -EACCES;
   
- -                      /*
- -                       * Make sure there are no mandatory locks on the file.
- -                       */
- -                      if (locks_verify_locked(file))
- -                              return -EAGAIN;
- -
                         vm_flags |= VM_SHARED | VM_MAYSHARE;
                         if (!(file->f_mode & FMODE_WRITE))
                                 vm_flags &= ~(VM_MAYWRITE | VM_SHARED);
@@@ -2297,6 -2304,7 +2298,7 @@@ struct vm_area_struct *find_vma(struct 
         struct rb_node *rb_node;
         struct vm_area_struct *vma;
   
+       mmap_assert_locked(mm);
         /* Check the cache first. */
         vma = vmacache_find(mm, addr);
         if (likely(vma))
@@@ -2986,14 -2994,11 +2988,11 @@@ SYSCALL_DEFINE5(remap_file_pages, unsig
         if (mmap_write_lock_killable(mm))
                 return -EINTR;
   
-       vma = find_vma(mm, start);
+       vma = vma_lookup(mm, start);
   
         if (!vma || !(vma->vm_flags & VM_SHARED))
                 goto out;
   
-       if (start < vma->vm_start)
-               goto out;
- 
         if (start + size > vma->vm_end) {
                 struct vm_area_struct *next;
   
diff --combined mm/page-writeback.c

index c12f67c,c3b00c6..4812a17
--- 1/mm/page-writeback.c
--- 2/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@@ -183,7 -183,7 +183,7 @@@ static struct fprop_local_percpu *wb_me
   static void wb_min_max_ratio(struct bdi_writeback *wb,
                              unsigned long *minp, unsigned long *maxp)
   {
-       unsigned long this_bw = wb->avg_write_bandwidth;
+       unsigned long this_bw = READ_ONCE(wb->avg_write_bandwidth);
         unsigned long tot_bw = atomic_long_read(&wb->bdi->tot_write_bandwidth);
         unsigned long long min = wb->bdi->min_ratio;
         unsigned long long max = wb->bdi->max_ratio;
@@@ -892,7 -892,7 +892,7 @@@ static long long pos_ratio_polynom(unsi
   static void wb_position_ratio(struct dirty_throttle_control *dtc)
   {
         struct bdi_writeback *wb = dtc->wb;
-       unsigned long write_bw = wb->avg_write_bandwidth;
+       unsigned long write_bw = READ_ONCE(wb->avg_write_bandwidth);
         unsigned long freerun = dirty_freerun_ceiling(dtc->thresh, dtc->bg_thresh);
         unsigned long limit = hard_dirty_limit(dtc_dom(dtc), dtc->thresh);
         unsigned long wb_thresh = dtc->wb_thresh;
@@@ -1115,7 -1115,7 +1115,7 @@@ out
                                         &wb->bdi->tot_write_bandwidth) <= 0);
         }
         wb->write_bandwidth = bw;
-       wb->avg_write_bandwidth = avg;
+       WRITE_ONCE(wb->avg_write_bandwidth, avg);
   }
   
   static void update_dirty_limit(struct dirty_throttle_control *dtc)
@@@ -1147,8 -1147,8 +1147,8 @@@ update
         dom->dirty_limit = limit;
   }
   
- static void domain_update_bandwidth(struct dirty_throttle_control *dtc,
-                                   unsigned long now)
+ static void domain_update_dirty_limit(struct dirty_throttle_control *dtc,
+                                     unsigned long now)
   {
         struct wb_domain *dom = dtc_dom(dtc);
   
@@@ -1324,7 -1324,7 +1324,7 @@@ static void wb_update_dirty_ratelimit(s
         else
                 dirty_ratelimit -= step;
   
-       wb->dirty_ratelimit = max(dirty_ratelimit, 1UL);
+       WRITE_ONCE(wb->dirty_ratelimit, max(dirty_ratelimit, 1UL));
         wb->balanced_dirty_ratelimit = balanced_dirty_ratelimit;
   
         trace_bdi_dirty_ratelimit(wb, dirty_rate, task_ratelimit);
@@@ -1332,35 -1332,28 +1332,28 @@@
   
   static void __wb_update_bandwidth(struct dirty_throttle_control *gdtc,
                                   struct dirty_throttle_control *mdtc,
-                                 unsigned long start_time,
                                   bool update_ratelimit)
   {
         struct bdi_writeback *wb = gdtc->wb;
         unsigned long now = jiffies;
-       unsigned long elapsed = now - wb->bw_time_stamp;
+       unsigned long elapsed;
         unsigned long dirtied;
         unsigned long written;
   
-       lockdep_assert_held(&wb->list_lock);
+       spin_lock(&wb->list_lock);
   
         /*
-        * rate-limit, only update once every 200ms.
+        * Lockless checks for elapsed time are racy and delayed update after
+        * IO completion doesn't do it at all (to make sure written pages are
+        * accounted reasonably quickly). Make sure elapsed >= 1 to avoid
+        * division errors.
          */
-       if (elapsed < BANDWIDTH_INTERVAL)
-               return;
- 
+       elapsed = max(now - wb->bw_time_stamp, 1UL);
         dirtied = percpu_counter_read(&wb->stat[WB_DIRTIED]);
         written = percpu_counter_read(&wb->stat[WB_WRITTEN]);
   
-       /*
-        * Skip quiet periods when disk bandwidth is under-utilized.
-        * (at least 1s idle time between two flusher runs)
-        */
-       if (elapsed > HZ && time_before(wb->bw_time_stamp, start_time))
-               goto snapshot;
- 
         if (update_ratelimit) {
-               domain_update_bandwidth(gdtc, now);
+               domain_update_dirty_limit(gdtc, now);
                 wb_update_dirty_ratelimit(gdtc, dirtied, elapsed);
   
                 /*
@@@ -1368,23 -1361,41 +1361,41 @@@
                  * compiler has no way to figure that out.  Help it.
                  */
                 if (IS_ENABLED(CONFIG_CGROUP_WRITEBACK) && mdtc) {
-                       domain_update_bandwidth(mdtc, now);
+                       domain_update_dirty_limit(mdtc, now);
                         wb_update_dirty_ratelimit(mdtc, dirtied, elapsed);
                 }
         }
         wb_update_write_bandwidth(wb, elapsed, written);
   
- snapshot:
         wb->dirtied_stamp = dirtied;
         wb->written_stamp = written;
-       wb->bw_time_stamp = now;
+       WRITE_ONCE(wb->bw_time_stamp, now);
+       spin_unlock(&wb->list_lock);
   }
   
- void wb_update_bandwidth(struct bdi_writeback *wb, unsigned long start_time)
+ void wb_update_bandwidth(struct bdi_writeback *wb)
   {
         struct dirty_throttle_control gdtc = { GDTC_INIT(wb) };
   
-       __wb_update_bandwidth(&gdtc, NULL, start_time, false);
+       __wb_update_bandwidth(&gdtc, NULL, false);
+ }
+ 
+ /* Interval after which we consider wb idle and don't estimate bandwidth */
+ #define WB_BANDWIDTH_IDLE_JIF (HZ)
+ 
+ static void wb_bandwidth_estimate_start(struct bdi_writeback *wb)
+ {
+       unsigned long now = jiffies;
+       unsigned long elapsed = now - READ_ONCE(wb->bw_time_stamp);
+ 
+       if (elapsed > WB_BANDWIDTH_IDLE_JIF &&
+           !atomic_read(&wb->writeback_inodes)) {
+               spin_lock(&wb->list_lock);
+               wb->dirtied_stamp = wb_stat(wb, WB_DIRTIED);
+               wb->written_stamp = wb_stat(wb, WB_WRITTEN);
+               WRITE_ONCE(wb->bw_time_stamp, now);
+               spin_unlock(&wb->list_lock);
+       }
   }
   
   /*
@@@ -1407,7 -1418,7 +1418,7 @@@ static unsigned long dirty_poll_interva
   static unsigned long wb_max_pause(struct bdi_writeback *wb,
                                   unsigned long wb_dirty)
   {
-       unsigned long bw = wb->avg_write_bandwidth;
+       unsigned long bw = READ_ONCE(wb->avg_write_bandwidth);
         unsigned long t;
   
         /*
@@@ -1429,8 -1440,8 +1440,8 @@@ static long wb_min_pause(struct bdi_wri
                          unsigned long dirty_ratelimit,
                          int *nr_dirtied_pause)
   {
-       long hi = ilog2(wb->avg_write_bandwidth);
-       long lo = ilog2(wb->dirty_ratelimit);
+       long hi = ilog2(READ_ONCE(wb->avg_write_bandwidth));
+       long lo = ilog2(READ_ONCE(wb->dirty_ratelimit));
         long t;         /* target pause */
         long pause;     /* estimated next pause */
         int pages;      /* target nr_dirtied_pause */
@@@ -1710,15 -1721,12 +1721,12 @@@ free_running
                 if (dirty_exceeded && !wb->dirty_exceeded)
                         wb->dirty_exceeded = 1;
   
-               if (time_is_before_jiffies(wb->bw_time_stamp +
-                                          BANDWIDTH_INTERVAL)) {
-                       spin_lock(&wb->list_lock);
-                       __wb_update_bandwidth(gdtc, mdtc, start_time, true);
-                       spin_unlock(&wb->list_lock);
-               }
+               if (time_is_before_jiffies(READ_ONCE(wb->bw_time_stamp) +
+                                          BANDWIDTH_INTERVAL))
+                       __wb_update_bandwidth(gdtc, mdtc, true);
   
                 /* throttle according to the chosen dtc */
-               dirty_ratelimit = wb->dirty_ratelimit;
+               dirty_ratelimit = READ_ONCE(wb->dirty_ratelimit);
                 task_ratelimit = ((u64)dirty_ratelimit * sdtc->pos_ratio) >>
                                                         RATELIMIT_CALC_SHIFT;
                 max_pause = wb_max_pause(wb, sdtc->wb_dirty);
@@@ -2010,6 -2018,7 +2018,6 @@@ int dirty_writeback_centisecs_handler(s
         return ret;
   }
   
- -#ifdef CONFIG_BLOCK
   void laptop_mode_timer_fn(struct timer_list *t)
   {
         struct backing_dev_info *backing_dev_info =
@@@ -2044,6 -2053,7 +2052,6 @@@ void laptop_sync_completion(void
   
         rcu_read_unlock();
   }
- -#endif
   
   /*
    * If ratelimit_pages is too high then we can get into dirty-data overload
@@@ -2345,9 -2355,12 +2353,12 @@@ EXPORT_SYMBOL(generic_writepages)
   int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
   {
         int ret;
+       struct bdi_writeback *wb;
   
         if (wbc->nr_to_write <= 0)
                 return 0;
+       wb = inode_to_wb_wbc(mapping->host, wbc);
+       wb_bandwidth_estimate_start(wb);
         while (1) {
                 if (mapping->a_ops->writepages)
                         ret = mapping->a_ops->writepages(mapping, wbc);
@@@ -2358,6 -2371,14 +2369,14 @@@
                 cond_resched();
                 congestion_wait(BLK_RW_ASYNC, HZ/50);
         }
+       /*
+        * Usually few pages are written by now from those we've just submitted
+        * but if there's constant writeback being submitted, this makes sure
+        * writeback bandwidth is updated once in a while.
+        */
+       if (time_is_before_jiffies(READ_ONCE(wb->bw_time_stamp) +
+                                  BANDWIDTH_INTERVAL))
+               wb_update_bandwidth(wb);
         return ret;
   }
   
@@@ -2729,6 -2750,24 +2748,24 @@@ int clear_page_dirty_for_io(struct pag
   }
   EXPORT_SYMBOL(clear_page_dirty_for_io);
   
+ static void wb_inode_writeback_start(struct bdi_writeback *wb)
+ {
+       atomic_inc(&wb->writeback_inodes);
+ }
+ 
+ static void wb_inode_writeback_end(struct bdi_writeback *wb)
+ {
+       atomic_dec(&wb->writeback_inodes);
+       /*
+        * Make sure estimate of writeback throughput gets updated after
+        * writeback completed. We delay the update by BANDWIDTH_INTERVAL
+        * (which is the interval other bandwidth updates use for batching) so
+        * that if multiple inodes end writeback at a similar time, they get
+        * batched into one bandwidth update.
+        */
+       queue_delayed_work(bdi_wq, &wb->bw_dwork, BANDWIDTH_INTERVAL);
+ }
+ 
   int test_clear_page_writeback(struct page *page)
   {
         struct address_space *mapping = page_mapping(page);
@@@ -2750,6 -2789,9 +2787,9 @@@
   
                                 dec_wb_stat(wb, WB_WRITEBACK);
                                 __wb_writeout_inc(wb);
+                               if (!mapping_tagged(mapping,
+                                                   PAGECACHE_TAG_WRITEBACK))
+                                       wb_inode_writeback_end(wb);
                         }
                 }
   
@@@ -2792,8 -2834,13 +2832,13 @@@ int __test_set_page_writeback(struct pa
                                                    PAGECACHE_TAG_WRITEBACK);
   
                         xas_set_mark(&xas, PAGECACHE_TAG_WRITEBACK);
-                       if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT)
-                               inc_wb_stat(inode_to_wb(inode), WB_WRITEBACK);
+                       if (bdi->capabilities & BDI_CAP_WRITEBACK_ACCT) {
+                               struct bdi_writeback *wb = inode_to_wb(inode);
+ 
+                               inc_wb_stat(wb, WB_WRITEBACK);
+                               if (!on_wblist)
+                                       wb_inode_writeback_start(wb);
+                       }
   
                         /*
                          * We can come through here when swapping anonymous
diff --combined mm/shmem.c

index 3107ace,21c29f7..8874295
--- 1/mm/shmem.c
--- 2/mm/shmem.c
+++ b/mm/shmem.c
@@@ -38,8 -38,7 +38,7 @@@
   #include <linux/hugetlb.h>
   #include <linux/frontswap.h>
   #include <linux/fs_parser.h>
- 
- #include <asm/tlbflush.h> /* for arch/microblaze update_mmu_cache() */
+ #include <linux/swapfile.h>
   
   static struct vfsmount *shm_mnt;
   
@@@ -96,7 -95,7 +95,7 @@@
   
   /*
    * shmem_fallocate communicates with shmem_fault or shmem_writepage via
- - * inode->i_private (with i_mutex making sure that it has only one user at
+ + * inode->i_private (with i_rwsem making sure that it has only one user at
    * a time): we would prefer not to enlarge the shmem inode just for that.
    */
   struct shmem_falloc {
@@@ -137,9 -136,6 +136,6 @@@ static unsigned long shmem_default_max_
   }
   #endif
   
- static bool shmem_should_replace_page(struct page *page, gfp_t gfp);
- static int shmem_replace_page(struct page **pagep, gfp_t gfp,
-                               struct shmem_inode_info *info, pgoff_t index);
   static int shmem_swapin_page(struct inode *inode, pgoff_t index,
                              struct page **pagep, enum sgp_type sgp,
                              gfp_t gfp, struct vm_area_struct *vma,
@@@ -278,10 -274,10 +274,10 @@@ static int shmem_reserve_inode(struct s
         ino_t ino;
   
         if (!(sb->s_flags & SB_KERNMOUNT)) {
-               spin_lock(&sbinfo->stat_lock);
+               raw_spin_lock(&sbinfo->stat_lock);
                 if (sbinfo->max_inodes) {
                         if (!sbinfo->free_inodes) {
-                               spin_unlock(&sbinfo->stat_lock);
+                               raw_spin_unlock(&sbinfo->stat_lock);
                                 return -ENOSPC;
                         }
                         sbinfo->free_inodes--;
@@@ -304,7 -300,7 +300,7 @@@
                         }
                         *inop = ino;
                 }
-               spin_unlock(&sbinfo->stat_lock);
+               raw_spin_unlock(&sbinfo->stat_lock);
         } else if (inop) {
                 /*
                  * __shmem_file_setup, one of our callers, is lock-free: it
@@@ -319,13 -315,14 +315,14 @@@
                  * to worry about things like glibc compatibility.
                  */
                 ino_t *next_ino;
+ 
                 next_ino = per_cpu_ptr(sbinfo->ino_batch, get_cpu());
                 ino = *next_ino;
                 if (unlikely(ino % SHMEM_INO_BATCH == 0)) {
-                       spin_lock(&sbinfo->stat_lock);
+                       raw_spin_lock(&sbinfo->stat_lock);
                         ino = sbinfo->next_ino;
                         sbinfo->next_ino += SHMEM_INO_BATCH;
-                       spin_unlock(&sbinfo->stat_lock);
+                       raw_spin_unlock(&sbinfo->stat_lock);
                         if (unlikely(is_zero_ino(ino)))
                                 ino++;
                 }
@@@ -341,9 -338,9 +338,9 @@@ static void shmem_free_inode(struct sup
   {
         struct shmem_sb_info *sbinfo = SHMEM_SB(sb);
         if (sbinfo->max_inodes) {
-               spin_lock(&sbinfo->stat_lock);
+               raw_spin_lock(&sbinfo->stat_lock);
                 sbinfo->free_inodes++;
-               spin_unlock(&sbinfo->stat_lock);
+               raw_spin_unlock(&sbinfo->stat_lock);
         }
   }
   
@@@ -474,7 -471,38 +471,38 @@@ static bool shmem_confirm_swap(struct a
   #ifdef CONFIG_TRANSPARENT_HUGEPAGE
   /* ifdef here to avoid bloating shmem.o when not necessary */
   
- static int shmem_huge __read_mostly;
+ static int shmem_huge __read_mostly = SHMEM_HUGE_NEVER;
+ 
+ bool shmem_is_huge(struct vm_area_struct *vma,
+                  struct inode *inode, pgoff_t index)
+ {
+       loff_t i_size;
+ 
+       if (shmem_huge == SHMEM_HUGE_DENY)
+               return false;
+       if (vma && ((vma->vm_flags & VM_NOHUGEPAGE) ||
+           test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags)))
+               return false;
+       if (shmem_huge == SHMEM_HUGE_FORCE)
+               return true;
+ 
+       switch (SHMEM_SB(inode->i_sb)->huge) {
+       case SHMEM_HUGE_ALWAYS:
+               return true;
+       case SHMEM_HUGE_WITHIN_SIZE:
+               index = round_up(index, HPAGE_PMD_NR);
+               i_size = round_up(i_size_read(inode), PAGE_SIZE);
+               if (i_size >= HPAGE_PMD_SIZE && (i_size >> PAGE_SHIFT) >= index)
+                       return true;
+               fallthrough;
+       case SHMEM_HUGE_ADVISE:
+               if (vma && (vma->vm_flags & VM_HUGEPAGE))
+                       return true;
+               fallthrough;
+       default:
+               return false;
+       }
+ }
   
   #if defined(CONFIG_SYSFS)
   static int shmem_parse_huge(const char *str)
@@@ -645,6 -673,12 +673,12 @@@ static long shmem_unused_huge_count(str
   
   #define shmem_huge SHMEM_HUGE_DENY
   
+ bool shmem_is_huge(struct vm_area_struct *vma,
+                  struct inode *inode, pgoff_t index)
+ {
+       return false;
+ }
+ 
   static unsigned long shmem_unused_huge_shrink(struct shmem_sb_info *sbinfo,
                 struct shrink_control *sc, unsigned long nr_to_split)
   {
@@@ -652,15 -686,6 +686,6 @@@
   }
   #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
   
- static inline bool is_huge_enabled(struct shmem_sb_info *sbinfo)
- {
-       if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) &&
-           (shmem_huge == SHMEM_HUGE_FORCE || sbinfo->huge) &&
-           shmem_huge != SHMEM_HUGE_DENY)
-               return true;
-       return false;
- }
- 
   /*
    * Like add_to_page_cache_locked, but error if expected item has gone.
    */
@@@ -774,7 -799,7 +799,7 @@@ static int shmem_free_swap(struct addre
    * Determine (in bytes) how many of the shmem object's pages mapped by the
    * given offsets are swapped out.
    *
- - * This is safe to call without i_mutex or the i_pages lock thanks to RCU,
+ + * This is safe to call without i_rwsem or the i_pages lock thanks to RCU,
    * as long as the inode doesn't go away and racy results are not a problem.
    */
   unsigned long shmem_partial_swap_usage(struct address_space *mapping,
@@@ -806,7 -831,7 +831,7 @@@
    * Determine (in bytes) how many of the shmem object's pages mapped by the
    * given vma is swapped out.
    *
- - * This is safe to call without i_mutex or the i_pages lock thanks to RCU,
+ + * This is safe to call without i_rwsem or the i_pages lock thanks to RCU,
    * as long as the inode doesn't go away and racy results are not a problem.
    */
   unsigned long shmem_swap_usage(struct vm_area_struct *vma)
@@@ -905,6 -930,9 +930,9 @@@ static void shmem_undo_range(struct ino
         if (lend == -1)
                 end = -1;       /* unsigned, so actually very big */
   
+       if (info->fallocend > start && info->fallocend <= end && !unfalloc)
+               info->fallocend = start;
+ 
         pagevec_init(&pvec);
         index = start;
         while (index < end && find_lock_entries(mapping, index, end - 1,
@@@ -1038,7 -1066,6 +1066,6 @@@ static int shmem_getattr(struct user_na
   {
         struct inode *inode = path->dentry->d_inode;
         struct shmem_inode_info *info = SHMEM_I(inode);
-       struct shmem_sb_info *sb_info = SHMEM_SB(inode->i_sb);
   
         if (info->alloced - info->swapped != inode->i_mapping->nrpages) {
                 spin_lock_irq(&info->lock);
@@@ -1047,7 -1074,7 +1074,7 @@@
         }
         generic_fillattr(&init_user_ns, inode, stat);
   
-       if (is_huge_enabled(sb_info))
+       if (shmem_is_huge(NULL, inode, 0))
                 stat->blksize = HPAGE_PMD_SIZE;
   
         return 0;
@@@ -1058,7 -1085,6 +1085,6 @@@ static int shmem_setattr(struct user_na
   {
         struct inode *inode = d_inode(dentry);
         struct shmem_inode_info *info = SHMEM_I(inode);
-       struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
         int error;
   
         error = setattr_prepare(&init_user_ns, dentry, attr);
@@@ -1069,7 -1095,7 +1095,7 @@@
                 loff_t oldsize = inode->i_size;
                 loff_t newsize = attr->ia_size;
   
- -              /* protected by i_mutex */
+ +              /* protected by i_rwsem */
                 if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
                     (newsize > oldsize && (info->seals & F_SEAL_GROW)))
                         return -EPERM;
@@@ -1094,24 -1120,6 +1120,6 @@@
                         if (oldsize > holebegin)
                                 unmap_mapping_range(inode->i_mapping,
                                                         holebegin, 0, 1);
- 
-                       /*
-                        * Part of the huge page can be beyond i_size: subject
-                        * to shrink under memory pressure.
-                        */
-                       if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
-                               spin_lock(&sbinfo->shrinklist_lock);
-                               /*
-                                * _careful to defend against unlocked access to
-                                * ->shrink_list in shmem_unused_huge_shrink()
-                                */
-                               if (list_empty_careful(&info->shrinklist)) {
-                                       list_add_tail(&info->shrinklist,
-                                                       &sbinfo->shrinklist);
-                                       sbinfo->shrinklist_len++;
-                               }
-                               spin_unlock(&sbinfo->shrinklist_lock);
-                       }
                 }
         }
   
@@@ -1156,8 -1164,6 +1164,6 @@@ static void shmem_evict_inode(struct in
         clear_inode(inode);
   }
   
- extern struct swap_info_struct *swap_info[];
- 
   static int shmem_find_swap_entries(struct address_space *mapping,
                                    pgoff_t start, unsigned int nr_entries,
                                    struct page **entries, pgoff_t *indices,
@@@ -1338,7 -1344,19 +1344,19 @@@ static int shmem_writepage(struct page 
         swp_entry_t swap;
         pgoff_t index;
   
-       VM_BUG_ON_PAGE(PageCompound(page), page);
+       /*
+        * If /sys/kernel/mm/transparent_hugepage/shmem_enabled is "always" or
+        * "force", drivers/gpu/drm/i915/gem/i915_gem_shmem.c gets huge pages,
+        * and its shmem_writeback() needs them to be split when swapping.
+        */
+       if (PageTransCompound(page)) {
+               /* Ensure the subpages are still dirty */
+               SetPageDirty(page);
+               if (split_huge_page(page) < 0)
+                       goto redirty;
+               ClearPageDirty(page);
+       }
+ 
         BUG_ON(!PageLocked(page));
         mapping = page->mapping;
         index = page->index;
@@@ -1453,10 -1471,10 +1471,10 @@@ static struct mempolicy *shmem_get_sbmp
   {
         struct mempolicy *mpol = NULL;
         if (sbinfo->mpol) {
-               spin_lock(&sbinfo->stat_lock);  /* prevent replace/use races */
+               raw_spin_lock(&sbinfo->stat_lock);      /* prevent replace/use races */
                 mpol = sbinfo->mpol;
                 mpol_get(mpol);
-               spin_unlock(&sbinfo->stat_lock);
+               raw_spin_unlock(&sbinfo->stat_lock);
         }
         return mpol;
   }
@@@ -1798,7 -1816,6 +1816,6 @@@ static int shmem_getpage_gfp(struct ino
         struct shmem_sb_info *sbinfo;
         struct mm_struct *charge_mm;
         struct page *page;
-       enum sgp_type sgp_huge = sgp;
         pgoff_t hindex = index;
         gfp_t huge_gfp;
         int error;
@@@ -1807,8 -1824,6 +1824,6 @@@
   
         if (index > (MAX_LFS_FILESIZE >> PAGE_SHIFT))
                 return -EFBIG;
-       if (sgp == SGP_NOHUGE || sgp == SGP_HUGE)
-               sgp = SGP_CACHE;
   repeat:
         if (sgp <= SGP_CACHE &&
             ((loff_t)index << PAGE_SHIFT) >= i_size_read(inode)) {
@@@ -1840,26 -1855,31 +1855,31 @@@
                 return error;
         }
   
-       if (page)
+       if (page) {
                 hindex = page->index;
-       if (page && sgp == SGP_WRITE)
-               mark_page_accessed(page);
- 
-       /* fallocated page? */
-       if (page && !PageUptodate(page)) {
+               if (sgp == SGP_WRITE)
+                       mark_page_accessed(page);
+               if (PageUptodate(page))
+                       goto out;
+               /* fallocated page */
                 if (sgp != SGP_READ)
                         goto clear;
                 unlock_page(page);
                 put_page(page);
-               page = NULL;
-               hindex = index;
         }
-       if (page || sgp == SGP_READ)
-               goto out;
   
         /*
-        * Fast cache lookup did not find it:
-        * bring it back from swap or allocate.
+        * SGP_READ: succeed on hole, with NULL page, letting caller zero.
+        * SGP_NOALLOC: fail on hole, with NULL page, letting caller fail.
+        */
+       *pagep = NULL;
+       if (sgp == SGP_READ)
+               return 0;
+       if (sgp == SGP_NOALLOC)
+               return -ENOENT;
+ 
+       /*
+        * Fast cache lookup and swap lookup did not find it: allocate.
          */
   
         if (vma && userfaultfd_missing(vma)) {
@@@ -1867,36 -1887,12 +1887,12 @@@
                 return 0;
         }
   
-       /* shmem_symlink() */
-       if (!shmem_mapping(mapping))
-               goto alloc_nohuge;
-       if (shmem_huge == SHMEM_HUGE_DENY || sgp_huge == SGP_NOHUGE)
-               goto alloc_nohuge;
-       if (shmem_huge == SHMEM_HUGE_FORCE)
-               goto alloc_huge;
-       switch (sbinfo->huge) {
-       case SHMEM_HUGE_NEVER:
+       /* Never use a huge page for shmem_symlink() */
+       if (S_ISLNK(inode->i_mode))
                 goto alloc_nohuge;
-       case SHMEM_HUGE_WITHIN_SIZE: {
-               loff_t i_size;
-               pgoff_t off;
- 
-               off = round_up(index, HPAGE_PMD_NR);
-               i_size = round_up(i_size_read(inode), PAGE_SIZE);
-               if (i_size >= HPAGE_PMD_SIZE &&
-                   i_size >> PAGE_SHIFT >= off)
-                       goto alloc_huge;
- 
-               fallthrough;
-       }
-       case SHMEM_HUGE_ADVISE:
-               if (sgp_huge == SGP_HUGE)
-                       goto alloc_huge;
-               /* TODO: implement fadvise() hints */
+       if (!shmem_is_huge(vma, inode, index))
                 goto alloc_nohuge;
-       }
   
- alloc_huge:
         huge_gfp = vma_thp_gfp_mask(vma);
         huge_gfp = limit_gfp_mask(huge_gfp, gfp);
         page = shmem_alloc_and_acct_page(huge_gfp, inode, index, true);
@@@ -2052,14 -2048,13 +2048,13 @@@ static vm_fault_t shmem_fault(struct vm
         struct vm_area_struct *vma = vmf->vma;
         struct inode *inode = file_inode(vma->vm_file);
         gfp_t gfp = mapping_gfp_mask(inode->i_mapping);
-       enum sgp_type sgp;
         int err;
         vm_fault_t ret = VM_FAULT_LOCKED;
   
         /*
          * Trinity finds that probing a hole which tmpfs is punching can
          * prevent the hole-punch from ever completing: which in turn
- -       * locks writers out with its hold on i_mutex.  So refrain from
+ +       * locks writers out with its hold on i_rwsem.  So refrain from
          * faulting pages into the hole while it's being punched.  Although
          * shmem_undo_range() does remove the additions, it may be unable to
          * keep up, as each new page needs its own unmap_mapping_range() call,
@@@ -2070,7 -2065,7 +2065,7 @@@
          * we just need to make racing faults a rare case.
          *
          * The implementation below would be much simpler if we just used a
- -       * standard mutex or completion: but we cannot take i_mutex in fault,
+ +       * standard mutex or completion: but we cannot take i_rwsem in fault,
          * and bloating every shmem inode for this unlikely case would be sad.
          */
         if (unlikely(inode->i_private)) {
@@@ -2115,15 -2110,7 +2110,7 @@@
                 spin_unlock(&inode->i_lock);
         }
   
-       sgp = SGP_CACHE;
- 
-       if ((vma->vm_flags & VM_NOHUGEPAGE) ||
-           test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
-               sgp = SGP_NOHUGE;
-       else if (vma->vm_flags & VM_HUGEPAGE)
-               sgp = SGP_HUGE;
- 
-       err = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, sgp,
+       err = shmem_getpage_gfp(inode, vmf->pgoff, &vmf->page, SGP_CACHE,
                                   gfp, vma, vmf, &ret);
         if (err)
                 return vmf_error(err);
@@@ -2470,7 -2457,7 +2457,7 @@@ shmem_write_begin(struct file *file, st
         struct shmem_inode_info *info = SHMEM_I(inode);
         pgoff_t index = pos >> PAGE_SHIFT;
   
- -      /* i_mutex is held by caller */
+ +      /* i_rwsem is held by caller */
         if (unlikely(info->seals & (F_SEAL_GROW |
                                    F_SEAL_WRITE | F_SEAL_FUTURE_WRITE))) {
                 if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE))
@@@ -2570,7 -2557,7 +2557,7 @@@ static ssize_t shmem_file_read_iter(str
   
                 /*
                  * We must evaluate after, since reads (unlike writes)
- -               * are called without i_mutex protection against truncate
+ +               * are called without i_rwsem protection against truncate
                  */
                 nr = PAGE_SIZE;
                 i_size = i_size_read(inode);
@@@ -2640,7 -2627,7 +2627,7 @@@ static loff_t shmem_file_llseek(struct 
                 return -ENXIO;
   
         inode_lock(inode);
- -      /* We're holding i_mutex so we can access i_size directly */
+ +      /* We're holding i_rwsem so we can access i_size directly */
         offset = mapping_seek_hole_data(mapping, offset, inode->i_size, whence);
         if (offset >= 0)
                 offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
@@@ -2655,7 -2642,7 +2642,7 @@@ static long shmem_fallocate(struct fil
         struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
         struct shmem_inode_info *info = SHMEM_I(inode);
         struct shmem_falloc shmem_falloc;
-       pgoff_t start, index, end;
+       pgoff_t start, index, end, undo_fallocend;
         int error;
   
         if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
@@@ -2669,7 -2656,7 +2656,7 @@@
                 loff_t unmap_end = round_down(offset + len, PAGE_SIZE) - 1;
                 DECLARE_WAIT_QUEUE_HEAD_ONSTACK(shmem_falloc_waitq);
   
- -              /* protected by i_mutex */
+ +              /* protected by i_rwsem */
                 if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
                         error = -EPERM;
                         goto out;
@@@ -2724,7 -2711,16 +2711,16 @@@
         inode->i_private = &shmem_falloc;
         spin_unlock(&inode->i_lock);
   
-       for (index = start; index < end; index++) {
+       /*
+        * info->fallocend is only relevant when huge pages might be
+        * involved: to prevent split_huge_page() freeing fallocated
+        * pages when FALLOC_FL_KEEP_SIZE committed beyond i_size.
+        */
+       undo_fallocend = info->fallocend;
+       if (info->fallocend < end)
+               info->fallocend = end;
+ 
+       for (index = start; index < end; ) {
                 struct page *page;
   
                 /*
@@@ -2738,6 -2734,7 +2734,7 @@@
                 else
                         error = shmem_getpage(inode, index, &page, SGP_FALLOC);
                 if (error) {
+                       info->fallocend = undo_fallocend;
                         /* Remove the !PageUptodate pages we added */
                         if (index > start) {
                                 shmem_undo_range(inode,
@@@ -2747,13 -2744,26 +2744,26 @@@
                         goto undone;
                 }
   
+               index++;
+               /*
+                * Here is a more important optimization than it appears:
+                * a second SGP_FALLOC on the same huge page will clear it,
+                * making it PageUptodate and un-undoable if we fail later.
+                */
+               if (PageTransCompound(page)) {
+                       index = round_up(index, HPAGE_PMD_NR);
+                       /* Beware 32-bit wraparound */
+                       if (!index)
+                               index--;
+               }
+ 
                 /*
                  * Inform shmem_writepage() how far we have reached.
                  * No need for lock or barrier: we have the page lock.
                  */
-               shmem_falloc.next++;
                 if (!PageUptodate(page))
-                       shmem_falloc.nr_falloced++;
+                       shmem_falloc.nr_falloced += index - shmem_falloc.next;
+               shmem_falloc.next = index;
   
                 /*
                  * If !PageUptodate, leave it that way so that freeable pages
@@@ -3488,9 -3498,10 +3498,10 @@@ static int shmem_reconfigure(struct fs_
         struct shmem_options *ctx = fc->fs_private;
         struct shmem_sb_info *sbinfo = SHMEM_SB(fc->root->d_sb);
         unsigned long inodes;
+       struct mempolicy *mpol = NULL;
         const char *err;
   
-       spin_lock(&sbinfo->stat_lock);
+       raw_spin_lock(&sbinfo->stat_lock);
         inodes = sbinfo->max_inodes - sbinfo->free_inodes;
         if ((ctx->seen & SHMEM_SEEN_BLOCKS) && ctx->blocks) {
                 if (!sbinfo->max_blocks) {
@@@ -3535,14 -3546,15 +3546,15 @@@
          * Preserve previous mempolicy unless mpol remount option was specified.
          */
         if (ctx->mpol) {
-               mpol_put(sbinfo->mpol);
+               mpol = sbinfo->mpol;
                 sbinfo->mpol = ctx->mpol;       /* transfers initial ref */
                 ctx->mpol = NULL;
         }
-       spin_unlock(&sbinfo->stat_lock);
+       raw_spin_unlock(&sbinfo->stat_lock);
+       mpol_put(mpol);
         return 0;
   out:
-       spin_unlock(&sbinfo->stat_lock);
+       raw_spin_unlock(&sbinfo->stat_lock);
         return invalfc(fc, "%s", err);
   }
   
@@@ -3613,7 -3625,6 +3625,6 @@@ static int shmem_fill_super(struct supe
         struct shmem_options *ctx = fc->fs_private;
         struct inode *inode;
         struct shmem_sb_info *sbinfo;
-       int err = -ENOMEM;
   
         /* Round up to L1_CACHE_BYTES to resist false sharing */
         sbinfo = kzalloc(max((int)sizeof(struct shmem_sb_info),
@@@ -3659,7 -3670,7 +3670,7 @@@
         sbinfo->mpol = ctx->mpol;
         ctx->mpol = NULL;
   
-       spin_lock_init(&sbinfo->stat_lock);
+       raw_spin_lock_init(&sbinfo->stat_lock);
         if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
                 goto failed;
         spin_lock_init(&sbinfo->shrinklist_lock);
@@@ -3691,7 -3702,7 +3702,7 @@@
   
   failed:
         shmem_put_super(sb);
-       return err;
+       return -ENOMEM;
   }
   
   static int shmem_get_tree(struct fs_context *fc)
@@@ -3907,7 -3918,7 +3918,7 @@@ int __init shmem_init(void
         if (has_transparent_hugepage() && shmem_huge > SHMEM_HUGE_DENY)
                 SHMEM_SB(shm_mnt->mnt_sb)->huge = shmem_huge;
         else
-               shmem_huge = 0; /* just in case it was patched */
+               shmem_huge = SHMEM_HUGE_NEVER; /* just in case it was patched */
   #endif
         return 0;
   
@@@ -3976,42 -3987,6 +3987,6 @@@ struct kobj_attribute shmem_enabled_att
         __ATTR(shmem_enabled, 0644, shmem_enabled_show, shmem_enabled_store);
   #endif /* CONFIG_TRANSPARENT_HUGEPAGE && CONFIG_SYSFS */
   
- #ifdef CONFIG_TRANSPARENT_HUGEPAGE
- bool shmem_huge_enabled(struct vm_area_struct *vma)
- {
-       struct inode *inode = file_inode(vma->vm_file);
-       struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
-       loff_t i_size;
-       pgoff_t off;
- 
-       if (!transhuge_vma_enabled(vma, vma->vm_flags))
-               return false;
-       if (shmem_huge == SHMEM_HUGE_FORCE)
-               return true;
-       if (shmem_huge == SHMEM_HUGE_DENY)
-               return false;
-       switch (sbinfo->huge) {
-               case SHMEM_HUGE_NEVER:
-                       return false;
-               case SHMEM_HUGE_ALWAYS:
-                       return true;
-               case SHMEM_HUGE_WITHIN_SIZE:
-                       off = round_up(vma->vm_pgoff, HPAGE_PMD_NR);
-                       i_size = round_up(i_size_read(inode), PAGE_SIZE);
-                       if (i_size >= HPAGE_PMD_SIZE &&
-                                       i_size >> PAGE_SHIFT >= off)
-                               return true;
-                       fallthrough;
-               case SHMEM_HUGE_ADVISE:
-                       /* TODO: implement fadvise() hints */
-                       return (vma->vm_flags & VM_HUGEPAGE);
-               default:
-                       VM_BUG_ON(1);
-                       return false;
-       }
- }
- #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
- 
   #else /* !CONFIG_SHMEM */
   
   /*
diff --combined mm/truncate.c

index 44ad5e5,787b35f..714eaf1
--- 1/mm/truncate.c
--- 2/mm/truncate.c
+++ b/mm/truncate.c
@@@ -412,8 -412,7 +412,8 @@@ EXPORT_SYMBOL(truncate_inode_pages_rang
    * @mapping: mapping to truncate
    * @lstart: offset from which to truncate
    *
- - * Called under (and serialised by) inode->i_mutex.
+ + * Called under (and serialised by) inode->i_rwsem and
+ + * mapping->invalidate_lock.
    *
    * Note: When this function returns, there can be a page in the process of
    * deletion (inside __delete_from_page_cache()) in the specified range.  Thus
@@@ -430,7 -429,7 +430,7 @@@ EXPORT_SYMBOL(truncate_inode_pages)
    * truncate_inode_pages_final - truncate *all* pages before inode dies
    * @mapping: mapping to truncate
    *
- - * Called under (and serialized by) inode->i_mutex.
+ + * Called under (and serialized by) inode->i_rwsem.
    *
    * Filesystems have to use this in the .evict_inode path to inform the
    * VM that this is the final truncate and the inode is going away.
@@@ -484,8 -483,9 +484,9 @@@ static unsigned long __invalidate_mappi
                         index = indices[i];
   
                         if (xa_is_value(page)) {
-                               invalidate_exceptional_entry(mapping, index,
-                                                            page);
+                               count += invalidate_exceptional_entry(mapping,
+                                                                     index,
+                                                                     page);
                                 continue;
                         }
                         index += thp_nr_pages(page) - 1;
@@@ -513,19 -513,18 +514,18 @@@
   }
   
   /**
-  * invalidate_mapping_pages - Invalidate all the unlocked pages of one inode
-  * @mapping: the address_space which holds the pages to invalidate
+  * invalidate_mapping_pages - Invalidate all clean, unlocked cache of one inode
+  * @mapping: the address_space which holds the cache to invalidate
    * @start: the offset 'from' which to invalidate
    * @end: the offset 'to' which to invalidate (inclusive)
    *
-  * This function only removes the unlocked pages, if you want to
-  * remove all the pages of one inode, you must call truncate_inode_pages.
+  * This function removes pages that are clean, unmapped and unlocked,
+  * as well as shadow entries. It will not block on IO activity.
    *
-  * invalidate_mapping_pages() will not block on IO activity. It will not
-  * invalidate pages which are dirty, locked, under writeback or mapped into
-  * pagetables.
+  * If you want to remove all the pages of one inode, regardless of
+  * their use and writeback state, use truncate_inode_pages().
    *
-  * Return: the number of the pages that were invalidated
+  * Return: the number of the cache entries that were invalidated
    */
   unsigned long invalidate_mapping_pages(struct address_space *mapping,
                 pgoff_t start, pgoff_t end)
@@@ -561,21 -560,19 +561,19 @@@ void invalidate_mapping_pagevec(struct 
   static int
   invalidate_complete_page2(struct address_space *mapping, struct page *page)
   {
-       unsigned long flags;
- 
         if (page->mapping != mapping)
                 return 0;
   
         if (page_has_private(page) && !try_to_release_page(page, GFP_KERNEL))
                 return 0;
   
-       xa_lock_irqsave(&mapping->i_pages, flags);
+       xa_lock_irq(&mapping->i_pages);
         if (PageDirty(page))
                 goto failed;
   
         BUG_ON(page_has_private(page));
         __delete_from_page_cache(page, NULL);
-       xa_unlock_irqrestore(&mapping->i_pages, flags);
+       xa_unlock_irq(&mapping->i_pages);
   
         if (mapping->a_ops->freepage)
                 mapping->a_ops->freepage(page);
@@@ -583,7 -580,7 +581,7 @@@
         put_page(page); /* pagecache ref */
         return 1;
   failed:
-       xa_unlock_irqrestore(&mapping->i_pages, flags);
+       xa_unlock_irq(&mapping->i_pages);
         return 0;
   }
   
@@@ -749,7 -746,7 +747,7 @@@ EXPORT_SYMBOL(truncate_pagecache)
    * setattr function when ATTR_SIZE is passed in.
    *
    * Must be called with a lock serializing truncates and writes (generally
- - * i_mutex but e.g. xfs uses a different lock) and before all filesystem
+ + * i_rwsem but e.g. xfs uses a different lock) and before all filesystem
    * specific block truncation has been performed.
    */
   void truncate_setsize(struct inode *inode, loff_t newsize)
@@@ -778,7 -775,7 +776,7 @@@ EXPORT_SYMBOL(truncate_setsize)
    *
    * The function must be called after i_size is updated so that page fault
    * coming after we unlock the page will already see the new i_size.
- - * The function must be called while we still hold i_mutex - this not only
+ + * The function must be called while we still hold i_rwsem - this not only
    * makes sure i_size is stable but also that userspace cannot observe new
    * i_size value before we are prepared to store mmap writes at new inode size.
    */
diff --combined mm/vmstat.c

index a7ed56a,13ff25d..0885a34
--- 1/mm/vmstat.c
--- 2/mm/vmstat.c
+++ b/mm/vmstat.c
@@@ -129,9 -129,9 +129,9 @@@ static void sum_vm_events(unsigned lon
   */
   void all_vm_events(unsigned long *ret)
   {
- -      get_online_cpus();
+ +      cpus_read_lock();
         sum_vm_events(ret);
- -      put_online_cpus();
+ +      cpus_read_unlock();
   }
   EXPORT_SYMBOL_GPL(all_vm_events);
   
@@@ -204,7 -204,7 +204,7 @@@ int calculate_normal_threshold(struct z
          *
          * Some sample thresholds:
          *
-        * Threshold    Processors      (fls)   Zonesize        fls(mem+1)
+        * Threshold    Processors      (fls)   Zonesize        fls(mem)+1
          * ------------------------------------------------------------------
          * 8            1               1       0.9-1 GB        4
          * 16           2               2       0.9-1 GB        4
@@@ -1217,6 -1217,8 +1217,8 @@@ const char * const vmstat_text[] = 
         "pgreuse",
         "pgsteal_kswapd",
         "pgsteal_direct",
+       "pgdemote_kswapd",
+       "pgdemote_direct",
         "pgscan_kswapd",
         "pgscan_direct",
         "pgscan_direct_throttle",
@@@ -1452,7 -1454,7 +1454,7 @@@ static void pagetypeinfo_showfree_print
   }
   
   /* Print out the free pages at each order for each migatetype */
- static int pagetypeinfo_showfree(struct seq_file *m, void *arg)
+ static void pagetypeinfo_showfree(struct seq_file *m, void *arg)
   {
         int order;
         pg_data_t *pgdat = (pg_data_t *)arg;
@@@ -1464,8 -1466,6 +1466,6 @@@
         seq_putc(m, '\n');
   
         walk_zones_in_node(m, pgdat, true, false, pagetypeinfo_showfree_print);
- 
-       return 0;
   }
   
   static void pagetypeinfo_showblockcount_print(struct seq_file *m,
@@@ -1501,7 -1501,7 +1501,7 @@@
   }
   
   /* Print out the number of pageblocks for each migratetype */
- static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
+ static void pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
   {
         int mtype;
         pg_data_t *pgdat = (pg_data_t *)arg;
@@@ -1512,8 -1512,6 +1512,6 @@@
         seq_putc(m, '\n');
         walk_zones_in_node(m, pgdat, true, false,
                 pagetypeinfo_showblockcount_print);
- 
-       return 0;
   }
   
   /*
@@@ -1873,11 -1871,6 +1871,6 @@@ static void vmstat_update(struct work_s
         }
   }
   
- /*
-  * Switch off vmstat processing and then fold all the remaining differentials
-  * until the diffs stay at zero. The function is used by NOHZ and can only be
-  * invoked when tick processing is not active.
-  */
   /*
    * Check if the diffs for a certain cpu indicate that
    * an update is needed.
@@@ -1894,17 -1887,15 +1887,15 @@@ static bool need_update(int cpu
                 /*
                  * The fast way of checking if there are any vmstat diffs.
                  */
-               if (memchr_inv(pzstats->vm_stat_diff, 0, NR_VM_ZONE_STAT_ITEMS *
-                              sizeof(pzstats->vm_stat_diff[0])))
+               if (memchr_inv(pzstats->vm_stat_diff, 0, sizeof(pzstats->vm_stat_diff)))
                         return true;
   
                 if (last_pgdat == zone->zone_pgdat)
                         continue;
                 last_pgdat = zone->zone_pgdat;
                 n = per_cpu_ptr(zone->zone_pgdat->per_cpu_nodestats, cpu);
-               if (memchr_inv(n->vm_node_stat_diff, 0, NR_VM_NODE_STAT_ITEMS *
-                              sizeof(n->vm_node_stat_diff[0])))
-                   return true;
+               if (memchr_inv(n->vm_node_stat_diff, 0, sizeof(n->vm_node_stat_diff)))
+                       return true;
         }
         return false;
   }
@@@ -1948,7 -1939,7 +1939,7 @@@ static void vmstat_shepherd(struct work
   {
         int cpu;
   
- -      get_online_cpus();
+ +      cpus_read_lock();
         /* Check processors whose vmstat worker threads have been disabled */
         for_each_online_cpu(cpu) {
                 struct delayed_work *dw = &per_cpu(vmstat_work, cpu);
@@@ -1958,7 -1949,7 +1949,7 @@@
   
                 cond_resched();
         }
- -      put_online_cpus();
+ +      cpus_read_unlock();
   
         schedule_delayed_work(&shepherd,
                 round_jiffies_relative(sysctl_stat_interval));
@@@ -2037,9 -2028,9 +2028,9 @@@ void __init init_mm_internals(void
         if (ret < 0)
                 pr_err("vmstat: failed to register 'online' hotplug state\n");
   
- -      get_online_cpus();
+ +      cpus_read_lock();
         init_cpu_node_state();
- -      put_online_cpus();
+ +      cpus_read_unlock();
   
         start_shepherd_timer();
   #endif
author	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 3 Sep 2021 17:08:28 +0000 (10:08 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 3 Sep 2021 17:08:28 +0000 (10:08 -0700)
		1	2
arch/alpha/kernel/syscalls/syscall.tbl	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm/tools/syscall.tbl	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/include/asm/unistd32.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/mm/init.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/ia64/kernel/syscalls/syscall.tbl	patch \|	diff1 \|	diff2 \|	blob \| history
arch/m68k/kernel/syscalls/syscall.tbl	patch \|	diff1 \|	diff2 \|	blob \| history
arch/microblaze/kernel/syscalls/syscall.tbl	patch \|	diff1 \|	diff2 \|	blob \| history
arch/mips/kernel/syscalls/syscall_o32.tbl	patch \|	diff1 \|	diff2 \|	blob \| history
arch/parisc/kernel/syscalls/syscall.tbl	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kernel/syscalls/syscall.tbl	patch \|	diff1 \|	diff2 \|	blob \| history
arch/riscv/mm/init.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kernel/setup.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kernel/syscalls/syscall.tbl	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/mm/fault.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/sh/kernel/syscalls/syscall.tbl	patch \|	diff1 \|	diff2 \|	blob \| history
arch/sparc/kernel/syscalls/syscall.tbl	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/entry/syscalls/syscall_32.tbl	patch \|	diff1 \|	diff2 \|	blob \| history
arch/xtensa/kernel/syscalls/syscall.tbl	patch \|	diff1 \|	diff2 \|	blob \| history
block/blk-map.c	patch \|	diff1 \|	diff2 \|	blob \| history
drivers/mmc/host/mmc_spi.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/exec.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/fcntl.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/fs-writeback.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/inode.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/locks.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/namei.c	patch \|	diff1 \|	diff2 \|	blob \| history
fs/namespace.c	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/backing-dev.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/memcontrol.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/mm.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/syscalls.h	patch \|	diff1 \|	diff2 \|	blob \| history
include/linux/writeback.h	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/signal.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sys_ni.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/sysctl.c	patch \|	diff1 \|	diff2 \|	blob \| history
kernel/time/posix-timers.c	patch \|	diff1 \|	diff2 \|	blob \| history
lib/scatterlist.c	patch \|	diff1 \|	diff2 \|	blob \| history
lib/test_kasan.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/backing-dev.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/filemap.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/kasan/hw_tags.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/kasan/kasan.h	patch \|	diff1 \|	diff2 \|	blob \| history
mm/madvise.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/memblock.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/memcontrol.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/memory-failure.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/mmap.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/page-writeback.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/shmem.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/truncate.c	patch \|	diff1 \|	diff2 \|	blob \| history
mm/vmstat.c	patch \|	diff1 \|	diff2 \|	blob \| history