Merge tag 'pm-5.11-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
authorLinus Torvalds <torvalds@linux-foundation.org>
Sat, 2 Jan 2021 19:53:05 +0000 (11:53 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Sat, 2 Jan 2021 19:53:05 +0000 (11:53 -0800)
Pull power management fixes from Rafael Wysocki:
 "These fix a crash in intel_pstate during resume from suspend-to-RAM
  that may occur after recent changes and two resource leaks in error
  paths in the operating performance points (OPP) framework, add a new
  C-states table to intel_idle and update the cpuidle MAINTAINERS entry
  to cover the governors too.

  Specifics:

   - Fix recently introduced crash in the intel_pstate driver that
     occurs if scale-invariance is disabled during resume from
     suspend-to-RAM due to inconsistent changes of APERF or MPERF MSR
     values made by the platform firmware (Rafael Wysocki).

   - Fix a memory leak and add a missing clk_put() in error paths in the
     OPP framework (Quanyang Wang, Viresh Kumar).

   - Add new C-states table for SnowRidge processors to the intel_idle
     driver (Artem Bityutskiy).

   - Update the MAINTAINERS entry for cpuidle to make it clear that the
     governors are covered by it too (Lukas Bulwahn)"

* tag 'pm-5.11-rc2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm:
  intel_idle: add SnowRidge C-state table
  cpufreq: intel_pstate: Fix fast-switch fallback path
  opp: Call the missing clk_put() on error
  opp: fix memory leak in _allocate_opp_table
  MAINTAINERS: include governors into CPU IDLE TIME MANAGEMENT FRAMEWORK

75 files changed:
arch/alpha/include/asm/local64.h [deleted file]
arch/arc/include/asm/Kbuild
arch/arm/include/asm/Kbuild
arch/arm64/include/asm/Kbuild
arch/csky/include/asm/Kbuild
arch/h8300/include/asm/Kbuild
arch/hexagon/include/asm/Kbuild
arch/ia64/include/asm/local64.h [deleted file]
arch/ia64/mm/init.c
arch/m68k/include/asm/Kbuild
arch/microblaze/include/asm/Kbuild
arch/mips/include/asm/Kbuild
arch/nds32/include/asm/Kbuild
arch/openrisc/include/asm/Kbuild
arch/parisc/include/asm/Kbuild
arch/powerpc/include/asm/Kbuild
arch/riscv/include/asm/Kbuild
arch/s390/include/asm/Kbuild
arch/sh/include/asm/Kbuild
arch/sparc/include/asm/Kbuild
arch/x86/include/asm/local64.h [deleted file]
arch/xtensa/include/asm/Kbuild
block/blk-core.c
block/blk-mq-debugfs.c
block/blk-mq.c
block/blk-pm.c
block/blk-pm.h
drivers/ide/ide-atapi.c
drivers/ide/ide-io.c
drivers/ide/ide-pm.c
drivers/md/dm-crypt.c
drivers/scsi/cxgbi/cxgb4i/Kconfig
drivers/scsi/mpt3sas/mpt3sas_base.c
drivers/scsi/scsi_lib.c
drivers/scsi/scsi_transport_spi.c
drivers/scsi/ufs/ufs-mediatek-trace.h
drivers/scsi/ufs/ufs-mediatek.c
drivers/scsi/ufs/ufs-mediatek.h
drivers/scsi/ufs/ufs.h
drivers/scsi/ufs/ufshcd-pci.c
drivers/scsi/ufs/ufshcd.c
drivers/scsi/ufs/ufshcd.h
fs/block_dev.c
fs/ceph/mds_client.c
fs/file.c
fs/io_uring.c
include/asm-generic/Kbuild
include/linux/blk-mq.h
include/linux/blkdev.h
include/linux/build_bug.h
include/linux/ceph/msgr.h
include/linux/kdev_t.h
include/linux/mm.h
include/linux/sizes.h
kernel/cgroup/cgroup-v1.c
kernel/cgroup/cgroup.c
kernel/exit.c
kernel/workqueue.c
lib/genalloc.c
lib/zlib_dfltcc/Makefile
lib/zlib_dfltcc/dfltcc.c
lib/zlib_dfltcc/dfltcc_deflate.c
lib/zlib_dfltcc/dfltcc_inflate.c
lib/zlib_dfltcc/dfltcc_syms.c [deleted file]
mm/hugetlb.c
mm/kasan/generic.c
mm/memory.c
mm/memory_hotplug.c
mm/mremap.c
mm/page_alloc.c
mm/slub.c
net/ceph/messenger_v2.c
scripts/checkpatch.pl
scripts/depmod.sh
tools/testing/selftests/vm/Makefile

diff --git a/arch/alpha/include/asm/local64.h b/arch/alpha/include/asm/local64.h
deleted file mode 100644 (file)
index 36c93b5..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local64.h>
index 81f4ede..3c1afa5 100644 (file)
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 generic-y += extable.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += parport.h
 generic-y += user.h
index 4a0848a..03657ff 100644 (file)
@@ -2,7 +2,6 @@
 generic-y += early_ioremap.h
 generic-y += extable.h
 generic-y += flat.h
-generic-y += local64.h
 generic-y += parport.h
 
 generated-y += mach-types.h
index ff9cbb6..07ac208 100644 (file)
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 generic-y += early_ioremap.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += qrwlock.h
 generic-y += qspinlock.h
index 9337225..cc24bb8 100644 (file)
@@ -2,7 +2,6 @@
 generic-y += asm-offsets.h
 generic-y += gpio.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += qrwlock.h
 generic-y += user.h
 generic-y += vmlinux.lds.h
index ddf04f3..60ee7f0 100644 (file)
@@ -2,7 +2,6 @@
 generic-y += asm-offsets.h
 generic-y += extable.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += parport.h
 generic-y += spinlock.h
index 373964b..3ece3c9 100644 (file)
@@ -2,5 +2,4 @@
 generic-y += extable.h
 generic-y += iomap.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
diff --git a/arch/ia64/include/asm/local64.h b/arch/ia64/include/asm/local64.h
deleted file mode 100644 (file)
index 36c93b5..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local64.h>
index 9b5acf8..e76386a 100644 (file)
@@ -536,7 +536,7 @@ virtual_memmap_init(u64 start, u64 end, void *arg)
 
        if (map_start < map_end)
                memmap_init_zone((unsigned long)(map_end - map_start),
-                                args->nid, args->zone, page_to_pfn(map_start),
+                                args->nid, args->zone, page_to_pfn(map_start), page_to_pfn(map_end),
                                 MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
        return 0;
 }
@@ -546,7 +546,7 @@ memmap_init (unsigned long size, int nid, unsigned long zone,
             unsigned long start_pfn)
 {
        if (!vmem_map) {
-               memmap_init_zone(size, nid, zone, start_pfn,
+               memmap_init_zone(size, nid, zone, start_pfn, start_pfn + size,
                                 MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
        } else {
                struct page *start;
index 1bff55a..0dbf9c5 100644 (file)
@@ -2,6 +2,5 @@
 generated-y += syscall_table.h
 generic-y += extable.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += spinlock.h
index 63bce83..29b0e55 100644 (file)
@@ -2,7 +2,6 @@
 generated-y += syscall_table.h
 generic-y += extable.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += parport.h
 generic-y += syscalls.h
index 198b3ba..95b4fa7 100644 (file)
@@ -6,7 +6,6 @@ generated-y += syscall_table_64_n64.h
 generated-y += syscall_table_64_o32.h
 generic-y += export.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += parport.h
 generic-y += qrwlock.h
index ff1e942..82a4453 100644 (file)
@@ -4,6 +4,5 @@ generic-y += cmpxchg.h
 generic-y += export.h
 generic-y += gpio.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += parport.h
 generic-y += user.h
index 442f3d3..ca5987e 100644 (file)
@@ -1,7 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 generic-y += extable.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += qspinlock_types.h
 generic-y += qspinlock.h
index f16c4db..4406475 100644 (file)
@@ -3,6 +3,5 @@ generated-y += syscall_table_32.h
 generated-y += syscall_table_64.h
 generated-y += syscall_table_c32.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += user.h
index 90cd5c5..e1f9b4e 100644 (file)
@@ -5,7 +5,6 @@ generated-y += syscall_table_c32.h
 generated-y += syscall_table_spu.h
 generic-y += export.h
 generic-y += kvm_types.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += qrwlock.h
 generic-y += vtime.h
index 59dd7be..445ccc9 100644 (file)
@@ -3,6 +3,5 @@ generic-y += early_ioremap.h
 generic-y += extable.h
 generic-y += flat.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += user.h
 generic-y += vmlinux.lds.h
index 319efa0..1a18d7b 100644 (file)
@@ -7,5 +7,4 @@ generated-y += unistd_nr.h
 generic-y += asm-offsets.h
 generic-y += export.h
 generic-y += kvm_types.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
index 7435182..fc44d9c 100644 (file)
@@ -1,6 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 generated-y += syscall_table.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += parport.h
index 5269a70..3688fda 100644 (file)
@@ -6,5 +6,4 @@ generated-y += syscall_table_64.h
 generated-y += syscall_table_c32.h
 generic-y += export.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
diff --git a/arch/x86/include/asm/local64.h b/arch/x86/include/asm/local64.h
deleted file mode 100644 (file)
index 36c93b5..0000000
+++ /dev/null
@@ -1 +0,0 @@
-#include <asm-generic/local64.h>
index 9718e95..854c5e0 100644 (file)
@@ -2,7 +2,6 @@
 generated-y += syscall_table.h
 generic-y += extable.h
 generic-y += kvm_para.h
-generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += param.h
 generic-y += qrwlock.h
index 96e5fcd..7663a9b 100644 (file)
@@ -18,6 +18,7 @@
 #include <linux/bio.h>
 #include <linux/blkdev.h>
 #include <linux/blk-mq.h>
+#include <linux/blk-pm.h>
 #include <linux/highmem.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
@@ -424,11 +425,11 @@ EXPORT_SYMBOL(blk_cleanup_queue);
 /**
  * blk_queue_enter() - try to increase q->q_usage_counter
  * @q: request queue pointer
- * @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PREEMPT
+ * @flags: BLK_MQ_REQ_NOWAIT and/or BLK_MQ_REQ_PM
  */
 int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
 {
-       const bool pm = flags & BLK_MQ_REQ_PREEMPT;
+       const bool pm = flags & BLK_MQ_REQ_PM;
 
        while (true) {
                bool success = false;
@@ -440,7 +441,8 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
                         * responsible for ensuring that that counter is
                         * globally visible before the queue is unfrozen.
                         */
-                       if (pm || !blk_queue_pm_only(q)) {
+                       if ((pm && queue_rpm_status(q) != RPM_SUSPENDED) ||
+                           !blk_queue_pm_only(q)) {
                                success = true;
                        } else {
                                percpu_ref_put(&q->q_usage_counter);
@@ -465,8 +467,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
 
                wait_event(q->mq_freeze_wq,
                           (!q->mq_freeze_depth &&
-                           (pm || (blk_pm_request_resume(q),
-                                   !blk_queue_pm_only(q)))) ||
+                           blk_pm_resume_queue(pm, q)) ||
                           blk_queue_dying(q));
                if (blk_queue_dying(q))
                        return -ENODEV;
@@ -630,7 +631,7 @@ struct request *blk_get_request(struct request_queue *q, unsigned int op,
        struct request *req;
 
        WARN_ON_ONCE(op & REQ_NOWAIT);
-       WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PREEMPT));
+       WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PM));
 
        req = blk_mq_alloc_request(q, op, flags);
        if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn)
index 3094542..4d6e83e 100644 (file)
@@ -129,6 +129,7 @@ static const char *const blk_queue_flag_name[] = {
        QUEUE_FLAG_NAME(PCI_P2PDMA),
        QUEUE_FLAG_NAME(ZONE_RESETALL),
        QUEUE_FLAG_NAME(RQ_ALLOC_TIME),
+       QUEUE_FLAG_NAME(NOWAIT),
 };
 #undef QUEUE_FLAG_NAME
 
@@ -297,7 +298,6 @@ static const char *const rqf_name[] = {
        RQF_NAME(MIXED_MERGE),
        RQF_NAME(MQ_INFLIGHT),
        RQF_NAME(DONTPREP),
-       RQF_NAME(PREEMPT),
        RQF_NAME(FAILED),
        RQF_NAME(QUIET),
        RQF_NAME(ELVPRIV),
index c338c9b..f285a91 100644 (file)
@@ -294,8 +294,8 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data,
        rq->mq_hctx = data->hctx;
        rq->rq_flags = 0;
        rq->cmd_flags = data->cmd_flags;
-       if (data->flags & BLK_MQ_REQ_PREEMPT)
-               rq->rq_flags |= RQF_PREEMPT;
+       if (data->flags & BLK_MQ_REQ_PM)
+               rq->rq_flags |= RQF_PM;
        if (blk_queue_io_stat(data->q))
                rq->rq_flags |= RQF_IO_STAT;
        INIT_LIST_HEAD(&rq->queuelist);
index b85234d..17bd020 100644 (file)
@@ -67,6 +67,10 @@ int blk_pre_runtime_suspend(struct request_queue *q)
 
        WARN_ON_ONCE(q->rpm_status != RPM_ACTIVE);
 
+       spin_lock_irq(&q->queue_lock);
+       q->rpm_status = RPM_SUSPENDING;
+       spin_unlock_irq(&q->queue_lock);
+
        /*
         * Increase the pm_only counter before checking whether any
         * non-PM blk_queue_enter() calls are in progress to avoid that any
@@ -89,15 +93,14 @@ int blk_pre_runtime_suspend(struct request_queue *q)
        /* Switch q_usage_counter back to per-cpu mode. */
        blk_mq_unfreeze_queue(q);
 
-       spin_lock_irq(&q->queue_lock);
-       if (ret < 0)
+       if (ret < 0) {
+               spin_lock_irq(&q->queue_lock);
+               q->rpm_status = RPM_ACTIVE;
                pm_runtime_mark_last_busy(q->dev);
-       else
-               q->rpm_status = RPM_SUSPENDING;
-       spin_unlock_irq(&q->queue_lock);
+               spin_unlock_irq(&q->queue_lock);
 
-       if (ret)
                blk_clear_pm_only(q);
+       }
 
        return ret;
 }
index ea5507d..a2283cc 100644 (file)
@@ -6,11 +6,14 @@
 #include <linux/pm_runtime.h>
 
 #ifdef CONFIG_PM
-static inline void blk_pm_request_resume(struct request_queue *q)
+static inline int blk_pm_resume_queue(const bool pm, struct request_queue *q)
 {
-       if (q->dev && (q->rpm_status == RPM_SUSPENDED ||
-                      q->rpm_status == RPM_SUSPENDING))
-               pm_request_resume(q->dev);
+       if (!q->dev || !blk_queue_pm_only(q))
+               return 1;       /* Nothing to do */
+       if (pm && q->rpm_status != RPM_SUSPENDED)
+               return 1;       /* Request allowed */
+       pm_request_resume(q->dev);
+       return 0;
 }
 
 static inline void blk_pm_mark_last_busy(struct request *rq)
@@ -44,8 +47,9 @@ static inline void blk_pm_put_request(struct request *rq)
                --rq->q->nr_pending;
 }
 #else
-static inline void blk_pm_request_resume(struct request_queue *q)
+static inline int blk_pm_resume_queue(const bool pm, struct request_queue *q)
 {
+       return 1;
 }
 
 static inline void blk_pm_mark_last_busy(struct request *rq)
index 2162bc8..013ad33 100644 (file)
@@ -223,7 +223,6 @@ void ide_prep_sense(ide_drive_t *drive, struct request *rq)
        sense_rq->rq_disk = rq->rq_disk;
        sense_rq->cmd_flags = REQ_OP_DRV_IN;
        ide_req(sense_rq)->type = ATA_PRIV_SENSE;
-       sense_rq->rq_flags |= RQF_PREEMPT;
 
        req->cmd[0] = GPCMD_REQUEST_SENSE;
        req->cmd[4] = cmd_len;
index 1a53c7a..4867b67 100644 (file)
@@ -515,15 +515,10 @@ repeat:
                 * above to return us whatever is in the queue. Since we call
                 * ide_do_request() ourselves, we end up taking requests while
                 * the queue is blocked...
-                * 
-                * We let requests forced at head of queue with ide-preempt
-                * though. I hope that doesn't happen too much, hopefully not
-                * unless the subdriver triggers such a thing in its own PM
-                * state machine.
                 */
                if ((drive->dev_flags & IDE_DFLAG_BLOCKED) &&
                    ata_pm_request(rq) == 0 &&
-                   (rq->rq_flags & RQF_PREEMPT) == 0) {
+                   (rq->rq_flags & RQF_PM) == 0) {
                        /* there should be no pending command at this point */
                        ide_unlock_port(hwif);
                        goto plug_device;
index 192e6c6..82ab308 100644 (file)
@@ -77,7 +77,7 @@ int generic_ide_resume(struct device *dev)
        }
 
        memset(&rqpm, 0, sizeof(rqpm));
-       rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_PREEMPT);
+       rq = blk_get_request(drive->queue, REQ_OP_DRV_IN, BLK_MQ_REQ_PM);
        ide_req(rq)->type = ATA_PRIV_PM_RESUME;
        ide_req(rq)->special = &rqpm;
        rqpm.pm_step = IDE_PM_START_RESUME;
index 5f9f9b3..5379113 100644 (file)
@@ -3166,12 +3166,11 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
        }
 
        if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
-               cc->crypt_queue = alloc_workqueue("kcryptd-%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
+               cc->crypt_queue = alloc_workqueue("kcryptd/%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
                                                  1, devname);
        else
-               cc->crypt_queue = alloc_workqueue("kcryptd-%s",
-                                                 WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM |
-                                                 WQ_UNBOUND | WQ_SYSFS,
+               cc->crypt_queue = alloc_workqueue("kcryptd/%s",
+                                                 WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
                                                  num_online_cpus(), devname);
        if (!cc->crypt_queue) {
                ti->error = "Couldn't create kcryptd queue";
index b206e26..8b0deec 100644 (file)
@@ -4,6 +4,7 @@ config SCSI_CXGB4_ISCSI
        depends on PCI && INET && (IPV6 || IPV6=n)
        depends on THERMAL || !THERMAL
        depends on ETHERNET
+       depends on TLS || TLS=n
        select NET_VENDOR_CHELSIO
        select CHELSIO_T4
        select CHELSIO_LIB
index 969baf4..6e23dc3 100644 (file)
@@ -5034,7 +5034,7 @@ _base_check_for_trigger_pages_support(struct MPT3SAS_ADAPTER *ioc)
 static void
 _base_get_diag_triggers(struct MPT3SAS_ADAPTER *ioc)
 {
-       u16 trigger_flags;
+       int trigger_flags;
 
        /*
         * Default setting of master trigger.
index 4848ae3..b3f14f0 100644 (file)
@@ -249,7 +249,8 @@ int __scsi_execute(struct scsi_device *sdev, const unsigned char *cmd,
 
        req = blk_get_request(sdev->request_queue,
                        data_direction == DMA_TO_DEVICE ?
-                       REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, BLK_MQ_REQ_PREEMPT);
+                       REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN,
+                       rq_flags & RQF_PM ? BLK_MQ_REQ_PM : 0);
        if (IS_ERR(req))
                return ret;
        rq = scsi_req(req);
@@ -1206,6 +1207,8 @@ static blk_status_t
 scsi_device_state_check(struct scsi_device *sdev, struct request *req)
 {
        switch (sdev->sdev_state) {
+       case SDEV_CREATED:
+               return BLK_STS_OK;
        case SDEV_OFFLINE:
        case SDEV_TRANSPORT_OFFLINE:
                /*
@@ -1232,18 +1235,18 @@ scsi_device_state_check(struct scsi_device *sdev, struct request *req)
                return BLK_STS_RESOURCE;
        case SDEV_QUIESCE:
                /*
-                * If the devices is blocked we defer normal commands.
+                * If the device is blocked we only accept power management
+                * commands.
                 */
-               if (req && !(req->rq_flags & RQF_PREEMPT))
+               if (req && WARN_ON_ONCE(!(req->rq_flags & RQF_PM)))
                        return BLK_STS_RESOURCE;
                return BLK_STS_OK;
        default:
                /*
                 * For any other not fully online state we only allow
-                * special commands.  In particular any user initiated
-                * command is not allowed.
+                * power management commands.
                 */
-               if (req && !(req->rq_flags & RQF_PREEMPT))
+               if (req && !(req->rq_flags & RQF_PM))
                        return BLK_STS_IOERR;
                return BLK_STS_OK;
        }
@@ -2516,15 +2519,13 @@ void sdev_evt_send_simple(struct scsi_device *sdev,
 EXPORT_SYMBOL_GPL(sdev_evt_send_simple);
 
 /**
- *     scsi_device_quiesce - Block user issued commands.
+ *     scsi_device_quiesce - Block all commands except power management.
  *     @sdev:  scsi device to quiesce.
  *
  *     This works by trying to transition to the SDEV_QUIESCE state
  *     (which must be a legal transition).  When the device is in this
- *     state, only special requests will be accepted, all others will
- *     be deferred.  Since special requests may also be requeued requests,
- *     a successful return doesn't guarantee the device will be
- *     totally quiescent.
+ *     state, only power management requests will be accepted, all others will
+ *     be deferred.
  *
  *     Must be called with user context, may sleep.
  *
@@ -2586,12 +2587,12 @@ void scsi_device_resume(struct scsi_device *sdev)
         * device deleted during suspend)
         */
        mutex_lock(&sdev->state_mutex);
+       if (sdev->sdev_state == SDEV_QUIESCE)
+               scsi_device_set_state(sdev, SDEV_RUNNING);
        if (sdev->quiesced_by) {
                sdev->quiesced_by = NULL;
                blk_clear_pm_only(sdev->request_queue);
        }
-       if (sdev->sdev_state == SDEV_QUIESCE)
-               scsi_device_set_state(sdev, SDEV_RUNNING);
        mutex_unlock(&sdev->state_mutex);
 }
 EXPORT_SYMBOL(scsi_device_resume);
index f3d5b1b..c37dd15 100644 (file)
@@ -117,12 +117,16 @@ static int spi_execute(struct scsi_device *sdev, const void *cmd,
                sshdr = &sshdr_tmp;
 
        for(i = 0; i < DV_RETRIES; i++) {
+               /*
+                * The purpose of the RQF_PM flag below is to bypass the
+                * SDEV_QUIESCE state.
+                */
                result = scsi_execute(sdev, cmd, dir, buffer, bufflen, sense,
                                      sshdr, DV_TIMEOUT, /* retries */ 1,
                                      REQ_FAILFAST_DEV |
                                      REQ_FAILFAST_TRANSPORT |
                                      REQ_FAILFAST_DRIVER,
-                                     0, NULL);
+                                     RQF_PM, NULL);
                if (driver_byte(result) != DRIVER_SENSE ||
                    sshdr->sense_key != UNIT_ATTENTION)
                        break;
@@ -1005,23 +1009,26 @@ spi_dv_device(struct scsi_device *sdev)
         */
        lock_system_sleep();
 
+       if (scsi_autopm_get_device(sdev))
+               goto unlock_system_sleep;
+
        if (unlikely(spi_dv_in_progress(starget)))
-               goto unlock;
+               goto put_autopm;
 
        if (unlikely(scsi_device_get(sdev)))
-               goto unlock;
+               goto put_autopm;
 
        spi_dv_in_progress(starget) = 1;
 
        buffer = kzalloc(len, GFP_KERNEL);
 
        if (unlikely(!buffer))
-               goto out_put;
+               goto put_sdev;
 
        /* We need to verify that the actual device will quiesce; the
         * later target quiesce is just a nice to have */
        if (unlikely(scsi_device_quiesce(sdev)))
-               goto out_free;
+               goto free_buffer;
 
        scsi_target_quiesce(starget);
 
@@ -1041,12 +1048,16 @@ spi_dv_device(struct scsi_device *sdev)
 
        spi_initial_dv(starget) = 1;
 
- out_free:
+free_buffer:
        kfree(buffer);
- out_put:
+
+put_sdev:
        spi_dv_in_progress(starget) = 0;
        scsi_device_put(sdev);
-unlock:
+put_autopm:
+       scsi_autopm_put_device(sdev);
+
+unlock_system_sleep:
        unlock_system_sleep();
 }
 EXPORT_SYMBOL(spi_dv_device);
index fd6f84c..895e82e 100644 (file)
@@ -31,6 +31,6 @@ TRACE_EVENT(ufs_mtk_event,
 
 #undef TRACE_INCLUDE_PATH
 #undef TRACE_INCLUDE_FILE
-#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_PATH ../../drivers/scsi/ufs/
 #define TRACE_INCLUDE_FILE ufs-mediatek-trace
 #include <trace/define_trace.h>
index 3522458..80618af 100644 (file)
@@ -70,6 +70,13 @@ static bool ufs_mtk_is_va09_supported(struct ufs_hba *hba)
        return !!(host->caps & UFS_MTK_CAP_VA09_PWR_CTRL);
 }
 
+static bool ufs_mtk_is_broken_vcc(struct ufs_hba *hba)
+{
+       struct ufs_mtk_host *host = ufshcd_get_variant(hba);
+
+       return !!(host->caps & UFS_MTK_CAP_BROKEN_VCC);
+}
+
 static void ufs_mtk_cfg_unipro_cg(struct ufs_hba *hba, bool enable)
 {
        u32 tmp;
@@ -514,6 +521,9 @@ static void ufs_mtk_init_host_caps(struct ufs_hba *hba)
        if (of_property_read_bool(np, "mediatek,ufs-disable-ah8"))
                host->caps |= UFS_MTK_CAP_DISABLE_AH8;
 
+       if (of_property_read_bool(np, "mediatek,ufs-broken-vcc"))
+               host->caps |= UFS_MTK_CAP_BROKEN_VCC;
+
        dev_info(hba->dev, "caps: 0x%x", host->caps);
 }
 
@@ -1003,6 +1013,17 @@ static int ufs_mtk_apply_dev_quirks(struct ufs_hba *hba)
 static void ufs_mtk_fixup_dev_quirks(struct ufs_hba *hba)
 {
        ufshcd_fixup_dev_quirks(hba, ufs_mtk_dev_fixups);
+
+       if (ufs_mtk_is_broken_vcc(hba) && hba->vreg_info.vcc &&
+           (hba->dev_quirks & UFS_DEVICE_QUIRK_DELAY_AFTER_LPM)) {
+               hba->vreg_info.vcc->always_on = true;
+               /*
+                * VCC will be kept always-on thus we don't
+                * need any delay during regulator operations
+                */
+               hba->dev_quirks &= ~(UFS_DEVICE_QUIRK_DELAY_BEFORE_LPM |
+                       UFS_DEVICE_QUIRK_DELAY_AFTER_LPM);
+       }
 }
 
 static void ufs_mtk_event_notify(struct ufs_hba *hba,
index 93d3509..3f0d3bb 100644 (file)
@@ -81,6 +81,7 @@ enum ufs_mtk_host_caps {
        UFS_MTK_CAP_BOOST_CRYPT_ENGINE         = 1 << 0,
        UFS_MTK_CAP_VA09_PWR_CTRL              = 1 << 1,
        UFS_MTK_CAP_DISABLE_AH8                = 1 << 2,
+       UFS_MTK_CAP_BROKEN_VCC                 = 1 << 3,
 };
 
 struct ufs_mtk_crypt_cfg {
index d593edb..14dfda7 100644 (file)
@@ -330,7 +330,6 @@ enum {
        UFS_DEV_WRITE_BOOSTER_SUP       = BIT(8),
 };
 
-#define POWER_DESC_MAX_SIZE                    0x62
 #define POWER_DESC_MAX_ACTV_ICC_LVLS           16
 
 /* Attribute  bActiveICCLevel parameter bit masks definitions */
@@ -513,6 +512,7 @@ struct ufs_query_res {
 struct ufs_vreg {
        struct regulator *reg;
        const char *name;
+       bool always_on;
        bool enabled;
        int min_uV;
        int max_uV;
index df3a564..fadd566 100644 (file)
@@ -148,6 +148,8 @@ static int ufs_intel_common_init(struct ufs_hba *hba)
 {
        struct intel_host *host;
 
+       hba->caps |= UFSHCD_CAP_RPM_AUTOSUSPEND;
+
        host = devm_kzalloc(hba->dev, sizeof(*host), GFP_KERNEL);
        if (!host)
                return -ENOMEM;
@@ -163,6 +165,41 @@ static void ufs_intel_common_exit(struct ufs_hba *hba)
        intel_ltr_hide(hba->dev);
 }
 
+static int ufs_intel_resume(struct ufs_hba *hba, enum ufs_pm_op op)
+{
+       /*
+        * To support S4 (suspend-to-disk) with spm_lvl other than 5, the base
+        * address registers must be restored because the restore kernel can
+        * have used different addresses.
+        */
+       ufshcd_writel(hba, lower_32_bits(hba->utrdl_dma_addr),
+                     REG_UTP_TRANSFER_REQ_LIST_BASE_L);
+       ufshcd_writel(hba, upper_32_bits(hba->utrdl_dma_addr),
+                     REG_UTP_TRANSFER_REQ_LIST_BASE_H);
+       ufshcd_writel(hba, lower_32_bits(hba->utmrdl_dma_addr),
+                     REG_UTP_TASK_REQ_LIST_BASE_L);
+       ufshcd_writel(hba, upper_32_bits(hba->utmrdl_dma_addr),
+                     REG_UTP_TASK_REQ_LIST_BASE_H);
+
+       if (ufshcd_is_link_hibern8(hba)) {
+               int ret = ufshcd_uic_hibern8_exit(hba);
+
+               if (!ret) {
+                       ufshcd_set_link_active(hba);
+               } else {
+                       dev_err(hba->dev, "%s: hibern8 exit failed %d\n",
+                               __func__, ret);
+                       /*
+                        * Force reset and restore. Any other actions can lead
+                        * to an unrecoverable state.
+                        */
+                       ufshcd_set_link_off(hba);
+               }
+       }
+
+       return 0;
+}
+
 static int ufs_intel_ehl_init(struct ufs_hba *hba)
 {
        hba->quirks |= UFSHCD_QUIRK_BROKEN_AUTO_HIBERN8;
@@ -174,6 +211,7 @@ static struct ufs_hba_variant_ops ufs_intel_cnl_hba_vops = {
        .init                   = ufs_intel_common_init,
        .exit                   = ufs_intel_common_exit,
        .link_startup_notify    = ufs_intel_link_startup_notify,
+       .resume                 = ufs_intel_resume,
 };
 
 static struct ufs_hba_variant_ops ufs_intel_ehl_hba_vops = {
@@ -181,6 +219,7 @@ static struct ufs_hba_variant_ops ufs_intel_ehl_hba_vops = {
        .init                   = ufs_intel_ehl_init,
        .exit                   = ufs_intel_common_exit,
        .link_startup_notify    = ufs_intel_link_startup_notify,
+       .resume                 = ufs_intel_resume,
 };
 
 #ifdef CONFIG_PM_SLEEP
@@ -207,6 +246,30 @@ static int ufshcd_pci_resume(struct device *dev)
 {
        return ufshcd_system_resume(dev_get_drvdata(dev));
 }
+
+/**
+ * ufshcd_pci_poweroff - suspend-to-disk poweroff function
+ * @dev: pointer to PCI device handle
+ *
+ * Returns 0 if successful
+ * Returns non-zero otherwise
+ */
+static int ufshcd_pci_poweroff(struct device *dev)
+{
+       struct ufs_hba *hba = dev_get_drvdata(dev);
+       int spm_lvl = hba->spm_lvl;
+       int ret;
+
+       /*
+        * For poweroff we need to set the UFS device to PowerDown mode.
+        * Force spm_lvl to ensure that.
+        */
+       hba->spm_lvl = 5;
+       ret = ufshcd_system_suspend(hba);
+       hba->spm_lvl = spm_lvl;
+       return ret;
+}
+
 #endif /* !CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_PM
@@ -302,8 +365,14 @@ ufshcd_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 }
 
 static const struct dev_pm_ops ufshcd_pci_pm_ops = {
-       SET_SYSTEM_SLEEP_PM_OPS(ufshcd_pci_suspend,
-                               ufshcd_pci_resume)
+#ifdef CONFIG_PM_SLEEP
+       .suspend        = ufshcd_pci_suspend,
+       .resume         = ufshcd_pci_resume,
+       .freeze         = ufshcd_pci_suspend,
+       .thaw           = ufshcd_pci_resume,
+       .poweroff       = ufshcd_pci_poweroff,
+       .restore        = ufshcd_pci_resume,
+#endif
        SET_RUNTIME_PM_OPS(ufshcd_pci_runtime_suspend,
                           ufshcd_pci_runtime_resume,
                           ufshcd_pci_runtime_idle)
index 9902b7e..82ad317 100644 (file)
@@ -225,6 +225,7 @@ static int ufshcd_reset_and_restore(struct ufs_hba *hba);
 static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd);
 static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag);
 static void ufshcd_hba_exit(struct ufs_hba *hba);
+static int ufshcd_clear_ua_wluns(struct ufs_hba *hba);
 static int ufshcd_probe_hba(struct ufs_hba *hba, bool async);
 static int ufshcd_setup_clocks(struct ufs_hba *hba, bool on);
 static int ufshcd_uic_hibern8_enter(struct ufs_hba *hba);
@@ -580,6 +581,23 @@ static void ufshcd_print_pwr_info(struct ufs_hba *hba)
                 hba->pwr_info.hs_rate);
 }
 
+static void ufshcd_device_reset(struct ufs_hba *hba)
+{
+       int err;
+
+       err = ufshcd_vops_device_reset(hba);
+
+       if (!err) {
+               ufshcd_set_ufs_dev_active(hba);
+               if (ufshcd_is_wb_allowed(hba)) {
+                       hba->wb_enabled = false;
+                       hba->wb_buf_flush_enabled = false;
+               }
+       }
+       if (err != -EOPNOTSUPP)
+               ufshcd_update_evt_hist(hba, UFS_EVT_DEV_RESET, err);
+}
+
 void ufshcd_delay_us(unsigned long us, unsigned long tolerance)
 {
        if (!us)
@@ -3665,7 +3683,7 @@ static int ufshcd_dme_enable(struct ufs_hba *hba)
        ret = ufshcd_send_uic_cmd(hba, &uic_cmd);
        if (ret)
                dev_err(hba->dev,
-                       "dme-reset: error code %d\n", ret);
+                       "dme-enable: error code %d\n", ret);
 
        return ret;
 }
@@ -3964,7 +3982,7 @@ int ufshcd_link_recovery(struct ufs_hba *hba)
        spin_unlock_irqrestore(hba->host->host_lock, flags);
 
        /* Reset the attached device */
-       ufshcd_vops_device_reset(hba);
+       ufshcd_device_reset(hba);
 
        ret = ufshcd_host_reset_and_restore(hba);
 
@@ -6930,7 +6948,8 @@ static int ufshcd_host_reset_and_restore(struct ufs_hba *hba)
 
        /* Establish the link again and restore the device */
        err = ufshcd_probe_hba(hba, false);
-
+       if (!err)
+               ufshcd_clear_ua_wluns(hba);
 out:
        if (err)
                dev_err(hba->dev, "%s: Host init failed %d\n", __func__, err);
@@ -6968,7 +6987,7 @@ static int ufshcd_reset_and_restore(struct ufs_hba *hba)
 
        do {
                /* Reset the attached device */
-               ufshcd_vops_device_reset(hba);
+               ufshcd_device_reset(hba);
 
                err = ufshcd_host_reset_and_restore(hba);
        } while (err && --retries);
@@ -8045,7 +8064,7 @@ static int ufshcd_disable_vreg(struct device *dev, struct ufs_vreg *vreg)
 {
        int ret = 0;
 
-       if (!vreg || !vreg->enabled)
+       if (!vreg || !vreg->enabled || vreg->always_on)
                goto out;
 
        ret = regulator_disable(vreg->reg);
@@ -8414,13 +8433,7 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
         * handling context.
         */
        hba->host->eh_noresume = 1;
-       if (hba->wlun_dev_clr_ua) {
-               ret = ufshcd_send_request_sense(hba, sdp);
-               if (ret)
-                       goto out;
-               /* Unit attention condition is cleared now */
-               hba->wlun_dev_clr_ua = false;
-       }
+       ufshcd_clear_ua_wluns(hba);
 
        cmd[4] = pwr_mode << 4;
 
@@ -8441,7 +8454,7 @@ static int ufshcd_set_dev_pwr_mode(struct ufs_hba *hba,
 
        if (!ret)
                hba->curr_dev_pwr_mode = pwr_mode;
-out:
+
        scsi_device_put(sdp);
        hba->host->eh_noresume = 0;
        return ret;
@@ -8747,7 +8760,7 @@ set_link_active:
         * further below.
         */
        if (ufshcd_is_ufs_dev_deepsleep(hba)) {
-               ufshcd_vops_device_reset(hba);
+               ufshcd_device_reset(hba);
                WARN_ON(!ufshcd_is_link_off(hba));
        }
        if (ufshcd_is_link_hibern8(hba) && !ufshcd_uic_hibern8_exit(hba))
@@ -8757,7 +8770,7 @@ set_link_active:
 set_dev_active:
        /* Can also get here needing to exit DeepSleep */
        if (ufshcd_is_ufs_dev_deepsleep(hba)) {
-               ufshcd_vops_device_reset(hba);
+               ufshcd_device_reset(hba);
                ufshcd_host_reset_and_restore(hba);
        }
        if (!ufshcd_set_dev_pwr_mode(hba, UFS_ACTIVE_PWR_MODE))
@@ -9353,7 +9366,7 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq)
        }
 
        /* Reset the attached device */
-       ufshcd_vops_device_reset(hba);
+       ufshcd_device_reset(hba);
 
        ufshcd_init_crypto(hba);
 
index f8c2467..aa9ea35 100644 (file)
@@ -1218,16 +1218,12 @@ static inline void ufshcd_vops_dbg_register_dump(struct ufs_hba *hba)
                hba->vops->dbg_register_dump(hba);
 }
 
-static inline void ufshcd_vops_device_reset(struct ufs_hba *hba)
+static inline int ufshcd_vops_device_reset(struct ufs_hba *hba)
 {
-       if (hba->vops && hba->vops->device_reset) {
-               int err = hba->vops->device_reset(hba);
-
-               if (!err)
-                       ufshcd_set_ufs_dev_active(hba);
-               if (err != -EOPNOTSUPP)
-                       ufshcd_update_evt_hist(hba, UFS_EVT_DEV_RESET, err);
-       }
+       if (hba->vops && hba->vops->device_reset)
+               return hba->vops->device_reset(hba);
+
+       return -EOPNOTSUPP;
 }
 
 static inline void ufshcd_vops_config_scaling_param(struct ufs_hba *hba,
index 9293045..3e5b02f 100644 (file)
@@ -1055,7 +1055,6 @@ static void bd_finish_claiming(struct block_device *bdev, void *holder)
 /**
  * bd_abort_claiming - abort claiming of a block device
  * @bdev: block device of interest
- * @whole: whole block device
  * @holder: holder that has claimed @bdev
  *
  * Abort claiming of a block device when the exclusive open failed. This can be
@@ -1828,6 +1827,7 @@ const struct file_operations def_blk_fops = {
 /**
  * lookup_bdev  - lookup a struct block_device by name
  * @pathname:  special file representing the block device
+ * @dev:       return value of the block device's dev_t
  *
  * Get a reference to the blockdevice at @pathname in the current
  * namespace if possible and return it.  Return ERR_PTR(error)
index 98c15ff..8405870 100644 (file)
@@ -2475,6 +2475,22 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
        return r;
 }
 
+static void encode_timestamp_and_gids(void **p,
+                                     const struct ceph_mds_request *req)
+{
+       struct ceph_timespec ts;
+       int i;
+
+       ceph_encode_timespec64(&ts, &req->r_stamp);
+       ceph_encode_copy(p, &ts, sizeof(ts));
+
+       /* gid_list */
+       ceph_encode_32(p, req->r_cred->group_info->ngroups);
+       for (i = 0; i < req->r_cred->group_info->ngroups; i++)
+               ceph_encode_64(p, from_kgid(&init_user_ns,
+                                           req->r_cred->group_info->gid[i]));
+}
+
 /*
  * called under mdsc->mutex
  */
@@ -2491,7 +2507,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
        u64 ino1 = 0, ino2 = 0;
        int pathlen1 = 0, pathlen2 = 0;
        bool freepath1 = false, freepath2 = false;
-       int len, i;
+       int len;
        u16 releases;
        void *p, *end;
        int ret;
@@ -2517,17 +2533,10 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
                goto out_free1;
        }
 
-       if (legacy) {
-               /* Old style */
-               len = sizeof(*head);
-       } else {
-               /* New style: add gid_list and any later fields */
-               len = sizeof(struct ceph_mds_request_head) + sizeof(u32) +
-                     (sizeof(u64) * req->r_cred->group_info->ngroups);
-       }
-
+       len = legacy ? sizeof(*head) : sizeof(struct ceph_mds_request_head);
        len += pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
                sizeof(struct ceph_timespec);
+       len += sizeof(u32) + (sizeof(u64) * req->r_cred->group_info->ngroups);
 
        /* calculate (max) length for cap releases */
        len += sizeof(struct ceph_mds_request_release) *
@@ -2548,7 +2557,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
        msg->hdr.tid = cpu_to_le64(req->r_tid);
 
        /*
-        * The old ceph_mds_request_header didn't contain a version field, and
+        * The old ceph_mds_request_head didn't contain a version field, and
         * one was added when we moved the message version from 3->4.
         */
        if (legacy) {
@@ -2609,20 +2618,7 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
 
        head->num_releases = cpu_to_le16(releases);
 
-       /* time stamp */
-       {
-               struct ceph_timespec ts;
-               ceph_encode_timespec64(&ts, &req->r_stamp);
-               ceph_encode_copy(&p, &ts, sizeof(ts));
-       }
-
-       /* gid list */
-       if (!legacy) {
-               ceph_encode_32(&p, req->r_cred->group_info->ngroups);
-               for (i = 0; i < req->r_cred->group_info->ngroups; i++)
-                       ceph_encode_64(&p, from_kgid(&init_user_ns,
-                                      req->r_cred->group_info->gid[i]));
-       }
+       encode_timestamp_and_gids(&p, req);
 
        if (WARN_ON_ONCE(p > end)) {
                ceph_msg_put(msg);
@@ -2730,13 +2726,8 @@ static int __prepare_send_request(struct ceph_mds_session *session,
                /* remove cap/dentry releases from message */
                rhead->num_releases = 0;
 
-               /* time stamp */
                p = msg->front.iov_base + req->r_request_release_offset;
-               {
-                       struct ceph_timespec ts;
-                       ceph_encode_timespec64(&ts, &req->r_stamp);
-                       ceph_encode_copy(&p, &ts, sizeof(ts));
-               }
+               encode_timestamp_and_gids(&p, req);
 
                msg->front.iov_len = p - msg->front.iov_base;
                msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
index c0b6096..dab120b 100644 (file)
--- a/fs/file.c
+++ b/fs/file.c
@@ -21,7 +21,6 @@
 #include <linux/rcupdate.h>
 #include <linux/close_range.h>
 #include <net/sock.h>
-#include <linux/io_uring.h>
 
 unsigned int sysctl_nr_open __read_mostly = 1024*1024;
 unsigned int sysctl_nr_open_min = BITS_PER_LONG;
@@ -428,7 +427,6 @@ void exit_files(struct task_struct *tsk)
        struct files_struct * files = tsk->files;
 
        if (files) {
-               io_uring_files_cancel(files);
                task_lock(tsk);
                tsk->files = NULL;
                task_unlock(tsk);
index 7e35283..ca46f31 100644 (file)
@@ -992,6 +992,10 @@ enum io_mem_account {
        ACCT_PINNED,
 };
 
+static void destroy_fixed_file_ref_node(struct fixed_file_ref_node *ref_node);
+static struct fixed_file_ref_node *alloc_fixed_file_ref_node(
+                       struct io_ring_ctx *ctx);
+
 static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
                             struct io_comp_state *cs);
 static void io_cqring_fill_event(struct io_kiocb *req, long res);
@@ -1501,6 +1505,13 @@ static bool io_grab_identity(struct io_kiocb *req)
                spin_unlock_irq(&ctx->inflight_lock);
                req->work.flags |= IO_WQ_WORK_FILES;
        }
+       if (!(req->work.flags & IO_WQ_WORK_MM) &&
+           (def->work_flags & IO_WQ_WORK_MM)) {
+               if (id->mm != current->mm)
+                       return false;
+               mmgrab(id->mm);
+               req->work.flags |= IO_WQ_WORK_MM;
+       }
 
        return true;
 }
@@ -1525,13 +1536,6 @@ static void io_prep_async_work(struct io_kiocb *req)
                        req->work.flags |= IO_WQ_WORK_UNBOUND;
        }
 
-       /* ->mm can never change on us */
-       if (!(req->work.flags & IO_WQ_WORK_MM) &&
-           (def->work_flags & IO_WQ_WORK_MM)) {
-               mmgrab(id->mm);
-               req->work.flags |= IO_WQ_WORK_MM;
-       }
-
        /* if we fail grabbing identity, we must COW, regrab, and retry */
        if (io_grab_identity(req))
                return;
@@ -7231,14 +7235,28 @@ static void io_file_ref_kill(struct percpu_ref *ref)
        complete(&data->done);
 }
 
+static void io_sqe_files_set_node(struct fixed_file_data *file_data,
+                                 struct fixed_file_ref_node *ref_node)
+{
+       spin_lock_bh(&file_data->lock);
+       file_data->node = ref_node;
+       list_add_tail(&ref_node->node, &file_data->ref_list);
+       spin_unlock_bh(&file_data->lock);
+       percpu_ref_get(&file_data->refs);
+}
+
 static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 {
        struct fixed_file_data *data = ctx->file_data;
-       struct fixed_file_ref_node *ref_node = NULL;
+       struct fixed_file_ref_node *backup_node, *ref_node = NULL;
        unsigned nr_tables, i;
+       int ret;
 
        if (!data)
                return -ENXIO;
+       backup_node = alloc_fixed_file_ref_node(ctx);
+       if (!backup_node)
+               return -ENOMEM;
 
        spin_lock_bh(&data->lock);
        ref_node = data->node;
@@ -7250,7 +7268,18 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 
        /* wait for all refs nodes to complete */
        flush_delayed_work(&ctx->file_put_work);
-       wait_for_completion(&data->done);
+       do {
+               ret = wait_for_completion_interruptible(&data->done);
+               if (!ret)
+                       break;
+               ret = io_run_task_work_sig();
+               if (ret < 0) {
+                       percpu_ref_resurrect(&data->refs);
+                       reinit_completion(&data->done);
+                       io_sqe_files_set_node(data, backup_node);
+                       return ret;
+               }
+       } while (1);
 
        __io_sqe_files_unregister(ctx);
        nr_tables = DIV_ROUND_UP(ctx->nr_user_files, IORING_MAX_FILES_TABLE);
@@ -7261,6 +7290,7 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
        kfree(data);
        ctx->file_data = NULL;
        ctx->nr_user_files = 0;
+       destroy_fixed_file_ref_node(backup_node);
        return 0;
 }
 
@@ -7758,11 +7788,7 @@ static int io_sqe_files_register(struct io_ring_ctx *ctx, void __user *arg,
                return PTR_ERR(ref_node);
        }
 
-       file_data->node = ref_node;
-       spin_lock_bh(&file_data->lock);
-       list_add_tail(&ref_node->node, &file_data->ref_list);
-       spin_unlock_bh(&file_data->lock);
-       percpu_ref_get(&file_data->refs);
+       io_sqe_files_set_node(file_data, ref_node);
        return ret;
 out_fput:
        for (i = 0; i < ctx->nr_user_files; i++) {
@@ -7918,11 +7944,7 @@ static int __io_sqe_files_update(struct io_ring_ctx *ctx,
 
        if (needs_switch) {
                percpu_ref_kill(&data->node->refs);
-               spin_lock_bh(&data->lock);
-               list_add_tail(&ref_node->node, &data->ref_list);
-               data->node = ref_node;
-               spin_unlock_bh(&data->lock);
-               percpu_ref_get(&ctx->file_data->refs);
+               io_sqe_files_set_node(data, ref_node);
        } else
                destroy_fixed_file_ref_node(ref_node);
 
index 4365b9a..267f6df 100644 (file)
@@ -34,6 +34,7 @@ mandatory-y += kmap_size.h
 mandatory-y += kprobes.h
 mandatory-y += linkage.h
 mandatory-y += local.h
+mandatory-y += local64.h
 mandatory-y += mm-arch-hooks.h
 mandatory-y += mmiowb.h
 mandatory-y += mmu.h
index 47b0219..d705b17 100644 (file)
@@ -447,8 +447,8 @@ enum {
        BLK_MQ_REQ_NOWAIT       = (__force blk_mq_req_flags_t)(1 << 0),
        /* allocate from reserved pool */
        BLK_MQ_REQ_RESERVED     = (__force blk_mq_req_flags_t)(1 << 1),
-       /* set RQF_PREEMPT */
-       BLK_MQ_REQ_PREEMPT      = (__force blk_mq_req_flags_t)(1 << 3),
+       /* set RQF_PM */
+       BLK_MQ_REQ_PM           = (__force blk_mq_req_flags_t)(1 << 2),
 };
 
 struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
index 070de09..f94ee30 100644 (file)
@@ -79,9 +79,6 @@ typedef __u32 __bitwise req_flags_t;
 #define RQF_MQ_INFLIGHT                ((__force req_flags_t)(1 << 6))
 /* don't call prep for this one */
 #define RQF_DONTPREP           ((__force req_flags_t)(1 << 7))
-/* set for "ide_preempt" requests and also for requests for which the SCSI
-   "quiesce" state must be ignored. */
-#define RQF_PREEMPT            ((__force req_flags_t)(1 << 8))
 /* vaguely specified driver internal error.  Ignored by the block layer */
 #define RQF_FAILED             ((__force req_flags_t)(1 << 10))
 /* don't warn about errors */
@@ -430,8 +427,7 @@ struct request_queue {
        unsigned long           queue_flags;
        /*
         * Number of contexts that have called blk_set_pm_only(). If this
-        * counter is above zero then only RQF_PM and RQF_PREEMPT requests are
-        * processed.
+        * counter is above zero then only RQF_PM requests are processed.
         */
        atomic_t                pm_only;
 
@@ -696,6 +692,18 @@ static inline bool queue_is_mq(struct request_queue *q)
        return q->mq_ops;
 }
 
+#ifdef CONFIG_PM
+static inline enum rpm_status queue_rpm_status(struct request_queue *q)
+{
+       return q->rpm_status;
+}
+#else
+static inline enum rpm_status queue_rpm_status(struct request_queue *q)
+{
+       return RPM_ACTIVE;
+}
+#endif
+
 static inline enum blk_zoned_model
 blk_queue_zoned_model(struct request_queue *q)
 {
index 7bb66e1..e3a0be2 100644 (file)
@@ -77,9 +77,4 @@
 #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr)
 #define __static_assert(expr, msg, ...) _Static_assert(expr, msg)
 
-#ifdef __GENKSYMS__
-/* genksyms gets confused by _Static_assert */
-#define _Static_assert(expr, ...)
-#endif
-
 #endif /* _LINUX_BUILD_BUG_H */
index f5e02f6..3989dcb 100644 (file)
@@ -33,8 +33,8 @@
 #define CEPH_MSGR2_INCARNATION_1 (0ull)
 
 #define DEFINE_MSGR2_FEATURE(bit, incarnation, name)               \
-       static const uint64_t CEPH_MSGR2_FEATURE_##name = (1ULL << bit); \
-       static const uint64_t CEPH_MSGR2_FEATUREMASK_##name =            \
+       static const uint64_t __maybe_unused CEPH_MSGR2_FEATURE_##name = (1ULL << bit); \
+       static const uint64_t __maybe_unused CEPH_MSGR2_FEATUREMASK_##name =            \
                        (1ULL << bit | CEPH_MSGR2_INCARNATION_##incarnation);
 
 #define HAVE_MSGR2_FEATURE(x, name) \
index 85b5151..4856706 100644 (file)
        })
 
 /* acceptable for old filesystems */
-static inline bool old_valid_dev(dev_t dev)
+static __always_inline bool old_valid_dev(dev_t dev)
 {
        return MAJOR(dev) < 256 && MINOR(dev) < 256;
 }
 
-static inline u16 old_encode_dev(dev_t dev)
+static __always_inline u16 old_encode_dev(dev_t dev)
 {
        return (MAJOR(dev) << 8) | MINOR(dev);
 }
 
-static inline dev_t old_decode_dev(u16 val)
+static __always_inline dev_t old_decode_dev(u16 val)
 {
        return MKDEV((val >> 8) & 255, val & 255);
 }
 
-static inline u32 new_encode_dev(dev_t dev)
+static __always_inline u32 new_encode_dev(dev_t dev)
 {
        unsigned major = MAJOR(dev);
        unsigned minor = MINOR(dev);
        return (minor & 0xff) | (major << 8) | ((minor & ~0xff) << 12);
 }
 
-static inline dev_t new_decode_dev(u32 dev)
+static __always_inline dev_t new_decode_dev(u32 dev)
 {
        unsigned major = (dev & 0xfff00) >> 8;
        unsigned minor = (dev & 0xff) | ((dev >> 12) & 0xfff00);
        return MKDEV(major, minor);
 }
 
-static inline u64 huge_encode_dev(dev_t dev)
+static __always_inline u64 huge_encode_dev(dev_t dev)
 {
        return new_encode_dev(dev);
 }
 
-static inline dev_t huge_decode_dev(u64 dev)
+static __always_inline dev_t huge_decode_dev(u64 dev)
 {
        return new_decode_dev(dev);
 }
 
-static inline int sysv_valid_dev(dev_t dev)
+static __always_inline int sysv_valid_dev(dev_t dev)
 {
        return MAJOR(dev) < (1<<14) && MINOR(dev) < (1<<18);
 }
 
-static inline u32 sysv_encode_dev(dev_t dev)
+static __always_inline u32 sysv_encode_dev(dev_t dev)
 {
        return MINOR(dev) | (MAJOR(dev) << 18);
 }
 
-static inline unsigned sysv_major(u32 dev)
+static __always_inline unsigned sysv_major(u32 dev)
 {
        return (dev >> 18) & 0x3fff;
 }
 
-static inline unsigned sysv_minor(u32 dev)
+static __always_inline unsigned sysv_minor(u32 dev)
 {
        return dev & 0x3ffff;
 }
index 5299b90..ecdf8a8 100644 (file)
@@ -216,6 +216,13 @@ int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *,
                loff_t *);
 int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *,
                loff_t *);
+/*
+ * Any attempt to mark this function as static leads to build failure
+ * when CONFIG_DEBUG_INFO_BTF is enabled because __add_to_page_cache_locked()
+ * is referred to by BPF code. This must be visible for error injection.
+ */
+int __add_to_page_cache_locked(struct page *page, struct address_space *mapping,
+               pgoff_t index, gfp_t gfp, void **shadowp);
 
 #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
 
@@ -2432,8 +2439,9 @@ extern int __meminit early_pfn_to_nid(unsigned long pfn);
 #endif
 
 extern void set_dma_reserve(unsigned long new_dma_reserve);
-extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long,
-               enum meminit_context, struct vmem_altmap *, int migratetype);
+extern void memmap_init_zone(unsigned long, int, unsigned long,
+               unsigned long, unsigned long, enum meminit_context,
+               struct vmem_altmap *, int migratetype);
 extern void setup_per_zone_wmarks(void);
 extern int __meminit init_per_zone_wmark_min(void);
 extern void mem_init(void);
index 9874f6f..1ac79bc 100644 (file)
@@ -44,6 +44,9 @@
 #define SZ_2G                          0x80000000
 
 #define SZ_4G                          _AC(0x100000000, ULL)
+#define SZ_8G                          _AC(0x200000000, ULL)
+#define SZ_16G                         _AC(0x400000000, ULL)
+#define SZ_32G                         _AC(0x800000000, ULL)
 #define SZ_64T                         _AC(0x400000000000, ULL)
 
 #endif /* __LINUX_SIZES_H__ */
index 191c329..32596fd 100644 (file)
@@ -908,6 +908,8 @@ int cgroup1_parse_param(struct fs_context *fc, struct fs_parameter *param)
        opt = fs_parse(fc, cgroup1_fs_parameters, param, &result);
        if (opt == -ENOPARAM) {
                if (strcmp(param->key, "source") == 0) {
+                       if (fc->source)
+                               return invalf(fc, "Multiple sources not supported");
                        fc->source = param->string;
                        param->string = NULL;
                        return 0;
index fefa219..6138457 100644 (file)
@@ -244,7 +244,7 @@ bool cgroup_ssid_enabled(int ssid)
  *
  * The default hierarchy is the v2 interface of cgroup and this function
  * can be used to test whether a cgroup is on the default hierarchy for
- * cases where a subsystem should behave differnetly depending on the
+ * cases where a subsystem should behave differently depending on the
  * interface version.
  *
  * List of changed behaviors:
@@ -262,7 +262,7 @@ bool cgroup_ssid_enabled(int ssid)
  *   "cgroup.procs" instead.
  *
  * - "cgroup.procs" is not sorted.  pids will be unique unless they got
- *   recycled inbetween reads.
+ *   recycled in-between reads.
  *
  * - "release_agent" and "notify_on_release" are removed.  Replacement
  *   notification mechanism will be implemented.
@@ -342,7 +342,7 @@ static bool cgroup_is_mixable(struct cgroup *cgrp)
        return !cgroup_parent(cgrp);
 }
 
-/* can @cgrp become a thread root? should always be true for a thread root */
+/* can @cgrp become a thread root? Should always be true for a thread root */
 static bool cgroup_can_be_thread_root(struct cgroup *cgrp)
 {
        /* mixables don't care */
@@ -527,7 +527,7 @@ static struct cgroup_subsys_state *cgroup_e_css_by_mask(struct cgroup *cgrp,
  * the root css is returned, so this function always returns a valid css.
  *
  * The returned css is not guaranteed to be online, and therefore it is the
- * callers responsiblity to tryget a reference for it.
+ * callers responsibility to try get a reference for it.
  */
 struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
                                         struct cgroup_subsys *ss)
@@ -699,7 +699,7 @@ EXPORT_SYMBOL_GPL(of_css);
                        ;                                               \
                else
 
-/* walk live descendants in preorder */
+/* walk live descendants in pre order */
 #define cgroup_for_each_live_descendant_pre(dsct, d_css, cgrp)         \
        css_for_each_descendant_pre((d_css), cgroup_css((cgrp), NULL))  \
                if (({ lockdep_assert_held(&cgroup_mutex);              \
@@ -933,7 +933,7 @@ void put_css_set_locked(struct css_set *cset)
 
        WARN_ON_ONCE(!list_empty(&cset->threaded_csets));
 
-       /* This css_set is dead. unlink it and release cgroup and css refs */
+       /* This css_set is dead. Unlink it and release cgroup and css refs */
        for_each_subsys(ss, ssid) {
                list_del(&cset->e_cset_node[ssid]);
                css_put(cset->subsys[ssid]);
@@ -1058,7 +1058,7 @@ static struct css_set *find_existing_css_set(struct css_set *old_cset,
 
        /*
         * Build the set of subsystem state objects that we want to see in the
-        * new css_set. while subsystems can change globally, the entries here
+        * new css_set. While subsystems can change globally, the entries here
         * won't change, so no need for locking.
         */
        for_each_subsys(ss, i) {
@@ -1148,7 +1148,7 @@ static void link_css_set(struct list_head *tmp_links, struct css_set *cset,
 
        /*
         * Always add links to the tail of the lists so that the lists are
-        * in choronological order.
+        * in chronological order.
         */
        list_move_tail(&link->cset_link, &cgrp->cset_links);
        list_add_tail(&link->cgrp_link, &cset->cgrp_links);
@@ -3654,7 +3654,7 @@ static ssize_t cgroup_freeze_write(struct kernfs_open_file *of,
 
 static int cgroup_file_open(struct kernfs_open_file *of)
 {
-       struct cftype *cft = of->kn->priv;
+       struct cftype *cft = of_cft(of);
 
        if (cft->open)
                return cft->open(of);
@@ -3663,7 +3663,7 @@ static int cgroup_file_open(struct kernfs_open_file *of)
 
 static void cgroup_file_release(struct kernfs_open_file *of)
 {
-       struct cftype *cft = of->kn->priv;
+       struct cftype *cft = of_cft(of);
 
        if (cft->release)
                cft->release(of);
@@ -3674,7 +3674,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
 {
        struct cgroup_namespace *ns = current->nsproxy->cgroup_ns;
        struct cgroup *cgrp = of->kn->parent->priv;
-       struct cftype *cft = of->kn->priv;
+       struct cftype *cft = of_cft(of);
        struct cgroup_subsys_state *css;
        int ret;
 
@@ -3724,7 +3724,7 @@ static ssize_t cgroup_file_write(struct kernfs_open_file *of, char *buf,
 
 static __poll_t cgroup_file_poll(struct kernfs_open_file *of, poll_table *pt)
 {
-       struct cftype *cft = of->kn->priv;
+       struct cftype *cft = of_cft(of);
 
        if (cft->poll)
                return cft->poll(of, pt);
@@ -4134,7 +4134,7 @@ struct cgroup_subsys_state *css_next_child(struct cgroup_subsys_state *pos,
         * implies that if we observe !CSS_RELEASED on @pos in this RCU
         * critical section, the one pointed to by its next pointer is
         * guaranteed to not have finished its RCU grace period even if we
-        * have dropped rcu_read_lock() inbetween iterations.
+        * have dropped rcu_read_lock() in-between iterations.
         *
         * If @pos has CSS_RELEASED set, its next pointer can't be
         * dereferenced; however, as each css is given a monotonically
@@ -4382,7 +4382,7 @@ static struct css_set *css_task_iter_next_css_set(struct css_task_iter *it)
 }
 
 /**
- * css_task_iter_advance_css_set - advance a task itererator to the next css_set
+ * css_task_iter_advance_css_set - advance a task iterator to the next css_set
  * @it: the iterator to advance
  *
  * Advance @it to the next css_set to walk.
@@ -6308,7 +6308,7 @@ struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss)
  *
  * Find the cgroup at @path on the default hierarchy, increment its
  * reference count and return it.  Returns pointer to the found cgroup on
- * success, ERR_PTR(-ENOENT) if @path doens't exist and ERR_PTR(-ENOTDIR)
+ * success, ERR_PTR(-ENOENT) if @path doesn't exist and ERR_PTR(-ENOTDIR)
  * if @path points to a non-directory.
  */
 struct cgroup *cgroup_get_from_path(const char *path)
index 3594291..04029e3 100644 (file)
@@ -63,6 +63,7 @@
 #include <linux/random.h>
 #include <linux/rcuwait.h>
 #include <linux/compat.h>
+#include <linux/io_uring.h>
 
 #include <linux/uaccess.h>
 #include <asm/unistd.h>
@@ -776,6 +777,7 @@ void __noreturn do_exit(long code)
                schedule();
        }
 
+       io_uring_files_cancel(tsk->files);
        exit_signals(tsk);  /* sets PF_EXITING */
 
        /* sync mm's RSS info before statistics gathering */
index b5295a0..9880b6c 100644 (file)
@@ -3731,17 +3731,24 @@ static void pwq_adjust_max_active(struct pool_workqueue *pwq)
         * is updated and visible.
         */
        if (!freezable || !workqueue_freezing) {
+               bool kick = false;
+
                pwq->max_active = wq->saved_max_active;
 
                while (!list_empty(&pwq->delayed_works) &&
-                      pwq->nr_active < pwq->max_active)
+                      pwq->nr_active < pwq->max_active) {
                        pwq_activate_first_delayed(pwq);
+                       kick = true;
+               }
 
                /*
                 * Need to kick a worker after thawed or an unbound wq's
-                * max_active is bumped.  It's a slow path.  Do it always.
+                * max_active is bumped. In realtime scenarios, always kicking a
+                * worker will cause interference on the isolated cpu cores, so
+                * let's kick iff work items were activated.
                 */
-               wake_up_worker(pwq->pool);
+               if (kick)
+                       wake_up_worker(pwq->pool);
        } else {
                pwq->max_active = 0;
        }
index 7f1244b..dab97bb 100644 (file)
@@ -81,14 +81,14 @@ static int clear_bits_ll(unsigned long *addr, unsigned long mask_to_clear)
  * users set the same bit, one user will return remain bits, otherwise
  * return 0.
  */
-static int bitmap_set_ll(unsigned long *map, int start, int nr)
+static int bitmap_set_ll(unsigned long *map, unsigned long start, unsigned long nr)
 {
        unsigned long *p = map + BIT_WORD(start);
-       const int size = start + nr;
+       const unsigned long size = start + nr;
        int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG);
        unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start);
 
-       while (nr - bits_to_set >= 0) {
+       while (nr >= bits_to_set) {
                if (set_bits_ll(p, mask_to_set))
                        return nr;
                nr -= bits_to_set;
@@ -116,14 +116,15 @@ static int bitmap_set_ll(unsigned long *map, int start, int nr)
  * users clear the same bit, one user will return remain bits,
  * otherwise return 0.
  */
-static int bitmap_clear_ll(unsigned long *map, int start, int nr)
+static unsigned long
+bitmap_clear_ll(unsigned long *map, unsigned long start, unsigned long nr)
 {
        unsigned long *p = map + BIT_WORD(start);
-       const int size = start + nr;
+       const unsigned long size = start + nr;
        int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG);
        unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start);
 
-       while (nr - bits_to_clear >= 0) {
+       while (nr >= bits_to_clear) {
                if (clear_bits_ll(p, mask_to_clear))
                        return nr;
                nr -= bits_to_clear;
@@ -183,8 +184,8 @@ int gen_pool_add_owner(struct gen_pool *pool, unsigned long virt, phys_addr_t ph
                 size_t size, int nid, void *owner)
 {
        struct gen_pool_chunk *chunk;
-       int nbits = size >> pool->min_alloc_order;
-       int nbytes = sizeof(struct gen_pool_chunk) +
+       unsigned long nbits = size >> pool->min_alloc_order;
+       unsigned long nbytes = sizeof(struct gen_pool_chunk) +
                                BITS_TO_LONGS(nbits) * sizeof(long);
 
        chunk = vzalloc_node(nbytes, nid);
@@ -242,7 +243,7 @@ void gen_pool_destroy(struct gen_pool *pool)
        struct list_head *_chunk, *_next_chunk;
        struct gen_pool_chunk *chunk;
        int order = pool->min_alloc_order;
-       int bit, end_bit;
+       unsigned long bit, end_bit;
 
        list_for_each_safe(_chunk, _next_chunk, &pool->chunks) {
                chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk);
@@ -278,7 +279,7 @@ unsigned long gen_pool_alloc_algo_owner(struct gen_pool *pool, size_t size,
        struct gen_pool_chunk *chunk;
        unsigned long addr = 0;
        int order = pool->min_alloc_order;
-       int nbits, start_bit, end_bit, remain;
+       unsigned long nbits, start_bit, end_bit, remain;
 
 #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
        BUG_ON(in_nmi());
@@ -487,7 +488,7 @@ void gen_pool_free_owner(struct gen_pool *pool, unsigned long addr, size_t size,
 {
        struct gen_pool_chunk *chunk;
        int order = pool->min_alloc_order;
-       int start_bit, nbits, remain;
+       unsigned long start_bit, nbits, remain;
 
 #ifndef CONFIG_ARCH_HAVE_NMI_SAFE_CMPXCHG
        BUG_ON(in_nmi());
@@ -755,7 +756,7 @@ unsigned long gen_pool_best_fit(unsigned long *map, unsigned long size,
        index = bitmap_find_next_zero_area(map, size, start, nr, 0);
 
        while (index < size) {
-               int next_bit = find_next_bit(map, size, index + nr);
+               unsigned long next_bit = find_next_bit(map, size, index + nr);
                if ((next_bit - index) < len) {
                        len = next_bit - index;
                        start_bit = index;
index 8e4d5af..66e1c96 100644 (file)
@@ -8,4 +8,4 @@
 
 obj-$(CONFIG_ZLIB_DFLTCC) += zlib_dfltcc.o
 
-zlib_dfltcc-objs := dfltcc.o dfltcc_deflate.o dfltcc_inflate.o dfltcc_syms.o
+zlib_dfltcc-objs := dfltcc.o dfltcc_deflate.o dfltcc_inflate.o
index c30de43..782f76e 100644 (file)
@@ -1,7 +1,8 @@
 // SPDX-License-Identifier: Zlib
 /* dfltcc.c - SystemZ DEFLATE CONVERSION CALL support. */
 
-#include <linux/zutil.h>
+#include <linux/export.h>
+#include <linux/module.h>
 #include "dfltcc_util.h"
 #include "dfltcc.h"
 
@@ -53,3 +54,6 @@ void dfltcc_reset(
     dfltcc_state->dht_threshold = DFLTCC_DHT_MIN_SAMPLE_SIZE;
     dfltcc_state->param.ribm = DFLTCC_RIBM;
 }
+EXPORT_SYMBOL(dfltcc_reset);
+
+MODULE_LICENSE("GPL");
index 00c1851..6c946e8 100644 (file)
@@ -4,6 +4,7 @@
 #include "dfltcc_util.h"
 #include "dfltcc.h"
 #include <asm/setup.h>
+#include <linux/export.h>
 #include <linux/zutil.h>
 
 /*
@@ -34,6 +35,7 @@ int dfltcc_can_deflate(
 
     return 1;
 }
+EXPORT_SYMBOL(dfltcc_can_deflate);
 
 static void dfltcc_gdht(
     z_streamp strm
@@ -277,3 +279,4 @@ again:
         goto again; /* deflate() must use all input or all output */
     return 1;
 }
+EXPORT_SYMBOL(dfltcc_deflate);
index db10701..fb60b5a 100644 (file)
@@ -125,7 +125,7 @@ dfltcc_inflate_action dfltcc_inflate(
     param->ho = (state->write - state->whave) & ((1 << HB_BITS) - 1);
     if (param->hl)
         param->nt = 0; /* Honor history for the first block */
-    param->cv = state->flags ? REVERSE(state->check) : state->check;
+    param->cv = state->check;
 
     /* Inflate */
     do {
@@ -138,7 +138,7 @@ dfltcc_inflate_action dfltcc_inflate(
     state->bits = param->sbb;
     state->whave = param->hl;
     state->write = (param->ho + param->hl) & ((1 << HB_BITS) - 1);
-    state->check = state->flags ? REVERSE(param->cv) : param->cv;
+    state->check = param->cv;
     if (cc == DFLTCC_CC_OP2_CORRUPT && param->oesc != 0) {
         /* Report an error if stream is corrupted */
         state->mode = BAD;
diff --git a/lib/zlib_dfltcc/dfltcc_syms.c b/lib/zlib_dfltcc/dfltcc_syms.c
deleted file mode 100644 (file)
index 6f23481..0000000
+++ /dev/null
@@ -1,17 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * linux/lib/zlib_dfltcc/dfltcc_syms.c
- *
- * Exported symbols for the s390 zlib dfltcc support.
- *
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/zlib.h>
-#include "dfltcc.h"
-
-EXPORT_SYMBOL(dfltcc_can_deflate);
-EXPORT_SYMBOL(dfltcc_deflate);
-EXPORT_SYMBOL(dfltcc_reset);
-MODULE_LICENSE("GPL");
index cbf32d2..a260296 100644 (file)
@@ -4105,10 +4105,30 @@ retry_avoidcopy:
                 * may get SIGKILLed if it later faults.
                 */
                if (outside_reserve) {
+                       struct address_space *mapping = vma->vm_file->f_mapping;
+                       pgoff_t idx;
+                       u32 hash;
+
                        put_page(old_page);
                        BUG_ON(huge_pte_none(pte));
+                       /*
+                        * Drop hugetlb_fault_mutex and i_mmap_rwsem before
+                        * unmapping.  unmapping needs to hold i_mmap_rwsem
+                        * in write mode.  Dropping i_mmap_rwsem in read mode
+                        * here is OK as COW mappings do not interact with
+                        * PMD sharing.
+                        *
+                        * Reacquire both after unmap operation.
+                        */
+                       idx = vma_hugecache_offset(h, vma, haddr);
+                       hash = hugetlb_fault_mutex_hash(mapping, idx);
+                       mutex_unlock(&hugetlb_fault_mutex_table[hash]);
+                       i_mmap_unlock_read(mapping);
+
                        unmap_ref_private(mm, vma, old_page, haddr);
-                       BUG_ON(huge_pte_none(pte));
+
+                       i_mmap_lock_read(mapping);
+                       mutex_lock(&hugetlb_fault_mutex_table[hash]);
                        spin_lock(ptl);
                        ptep = huge_pte_offset(mm, haddr, huge_page_size(h));
                        if (likely(ptep &&
index 1dd5a0f..5106b84 100644 (file)
@@ -337,6 +337,8 @@ void kasan_record_aux_stack(void *addr)
        cache = page->slab_cache;
        object = nearest_obj(cache, page, addr);
        alloc_meta = kasan_get_alloc_meta(cache, object);
+       if (!alloc_meta)
+               return;
 
        alloc_meta->aux_stack[1] = alloc_meta->aux_stack[0];
        alloc_meta->aux_stack[0] = kasan_save_stack(GFP_NOWAIT);
index 7d60876..feff48e 100644 (file)
@@ -2892,11 +2892,13 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
                entry = mk_pte(new_page, vma->vm_page_prot);
                entry = pte_sw_mkyoung(entry);
                entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+
                /*
                 * Clear the pte entry and flush it first, before updating the
-                * pte with the new entry. This will avoid a race condition
-                * seen in the presence of one thread doing SMC and another
-                * thread doing COW.
+                * pte with the new entry, to keep TLBs on different CPUs in
+                * sync. This code used to set the new PTE then flush TLBs, but
+                * that left a window where the new PTE could be loaded into
+                * some TLBs while the old PTE remains in others.
                 */
                ptep_clear_flush_notify(vma, vmf->address, vmf->pte);
                page_add_new_anon_rmap(new_page, vma, vmf->address, false);
index af41fb9..f9d57b9 100644 (file)
@@ -713,7 +713,7 @@ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
         * expects the zone spans the pfn range. All the pages in the range
         * are reserved so nobody should be touching them so we should be safe
         */
-       memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn,
+       memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, 0,
                         MEMINIT_HOTPLUG, altmap, migratetype);
 
        set_zone_contiguous(zone);
index c5590af..f554320 100644 (file)
@@ -358,7 +358,9 @@ static unsigned long get_extent(enum pgt_entry entry, unsigned long old_addr,
 
        next = (old_addr + size) & mask;
        /* even if next overflowed, extent below will be ok */
-       extent = (next > old_end) ? old_end - old_addr : next - old_addr;
+       extent = next - old_addr;
+       if (extent > old_end - old_addr)
+               extent = old_end - old_addr;
        next = (new_addr + size) & mask;
        if (extent > next - new_addr)
                extent = next - new_addr;
index 7a2c89b..bdbec4c 100644 (file)
@@ -423,6 +423,8 @@ defer_init(int nid, unsigned long pfn, unsigned long end_pfn)
        if (end_pfn < pgdat_end_pfn(NODE_DATA(nid)))
                return false;
 
+       if (NODE_DATA(nid)->first_deferred_pfn != ULONG_MAX)
+               return true;
        /*
         * We start only with one section of pages, more pages are added as
         * needed until the rest of deferred pages are initialized.
@@ -6116,7 +6118,7 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn)
  * zone stats (e.g., nr_isolate_pageblock) are touched.
  */
 void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
-               unsigned long start_pfn,
+               unsigned long start_pfn, unsigned long zone_end_pfn,
                enum meminit_context context,
                struct vmem_altmap *altmap, int migratetype)
 {
@@ -6152,7 +6154,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
                if (context == MEMINIT_EARLY) {
                        if (overlap_memmap_init(zone, &pfn))
                                continue;
-                       if (defer_init(nid, pfn, end_pfn))
+                       if (defer_init(nid, pfn, zone_end_pfn))
                                break;
                }
 
@@ -6266,7 +6268,7 @@ void __meminit __weak memmap_init(unsigned long size, int nid,
 
                if (end_pfn > start_pfn) {
                        size = end_pfn - start_pfn;
-                       memmap_init_zone(size, nid, zone, start_pfn,
+                       memmap_init_zone(size, nid, zone, start_pfn, range_end_pfn,
                                         MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
                }
        }
index 0c8b43a..dc5b42e 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1619,9 +1619,6 @@ static inline struct page *alloc_slab_page(struct kmem_cache *s,
        else
                page = __alloc_pages_node(node, flags, order);
 
-       if (page)
-               account_slab_page(page, order, s);
-
        return page;
 }
 
@@ -1774,6 +1771,8 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 
        page->objects = oo_objects(oo);
 
+       account_slab_page(page, oo_order(oo), s);
+
        page->slab_cache = s;
        __SetPageSlab(page);
        if (page_is_pfmemalloc(page))
index c1ebb2a..c38d8de 100644 (file)
@@ -1333,7 +1333,8 @@ static int prepare_auth_signature(struct ceph_connection *con)
        void *buf;
        int ret;
 
-       buf = alloc_conn_buf(con, head_onwire_len(SHA256_DIGEST_SIZE, false));
+       buf = alloc_conn_buf(con, head_onwire_len(SHA256_DIGEST_SIZE,
+                                                 con_secure(con)));
        if (!buf)
                return -ENOMEM;
 
@@ -2032,10 +2033,18 @@ bad:
        return -EINVAL;
 }
 
+/*
+ * Align session_key and con_secret to avoid GFP_ATOMIC allocation
+ * inside crypto_shash_setkey() and crypto_aead_setkey() called from
+ * setup_crypto().  __aligned(16) isn't guaranteed to work for stack
+ * objects, so do it by hand.
+ */
 static int process_auth_done(struct ceph_connection *con, void *p, void *end)
 {
-       u8 session_key[CEPH_KEY_LEN];
-       u8 con_secret[CEPH_MAX_CON_SECRET_LEN];
+       u8 session_key_buf[CEPH_KEY_LEN + 16];
+       u8 con_secret_buf[CEPH_MAX_CON_SECRET_LEN + 16];
+       u8 *session_key = PTR_ALIGN(&session_key_buf[0], 16);
+       u8 *con_secret = PTR_ALIGN(&con_secret_buf[0], 16);
        int session_key_len, con_secret_len;
        int payload_len;
        u64 global_id;
index 0008530..92e888e 100755 (executable)
@@ -6646,6 +6646,12 @@ sub process {
 #                      }
 #              }
 
+# strlcpy uses that should likely be strscpy
+               if ($line =~ /\bstrlcpy\s*\(/) {
+                       WARN("STRLCPY",
+                            "Prefer strscpy over strlcpy - see: https://lore.kernel.org/r/CAHk-=wgfRnXz0W3D37d01q3JFkr_i_uTL=V6A6G1oUZcprmknw\@mail.gmail.com/\n" . $herecurr);
+               }
+
 # typecasts on min/max could be min_t/max_t
                if ($perl_version_ok &&
                    defined $stat &&
index e083bca..3643b4f 100755 (executable)
@@ -15,6 +15,8 @@ if ! test -r System.map ; then
        exit 0
 fi
 
+# legacy behavior: "depmod" in /sbin, no /sbin in PATH
+PATH="$PATH:/sbin"
 if [ -z $(command -v $DEPMOD) ]; then
        echo "Warning: 'make modules_install' requires $DEPMOD. Please install it." >&2
        echo "This is probably in the kmod package." >&2
index 9a25307..d42115e 100644 (file)
@@ -4,7 +4,7 @@
 include local_config.mk
 
 uname_M := $(shell uname -m 2>/dev/null || echo not)
-MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/')
+MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/ppc64/')
 
 # Without this, failed build products remain, with up-to-date timestamps,
 # thus tricking Make (and you!) into believing that All Is Well, in subsequent
@@ -43,7 +43,7 @@ TEST_GEN_FILES += thuge-gen
 TEST_GEN_FILES += transhuge-stress
 TEST_GEN_FILES += userfaultfd
 
-ifeq ($(ARCH),x86_64)
+ifeq ($(MACHINE),x86_64)
 CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32)
 CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_64bit_program.c)
 CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_program.c -no-pie)
@@ -65,13 +65,13 @@ TEST_GEN_FILES += $(BINARIES_64)
 endif
 else
 
-ifneq (,$(findstring $(ARCH),powerpc))
+ifneq (,$(findstring $(MACHINE),ppc64))
 TEST_GEN_FILES += protection_keys
 endif
 
 endif
 
-ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64))
+ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sh64 sparc64 x86_64))
 TEST_GEN_FILES += va_128TBswitch
 TEST_GEN_FILES += virtual_address_range
 TEST_GEN_FILES += write_to_hugetlbfs
@@ -84,7 +84,7 @@ TEST_FILES := test_vmalloc.sh
 KSFT_KHDR_INSTALL := 1
 include ../lib.mk
 
-ifeq ($(ARCH),x86_64)
+ifeq ($(MACHINE),x86_64)
 BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
 BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))