Merge branch 'akpm' (patches from Andrew)
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 12 Jun 2020 01:18:50 +0000 (18:18 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 12 Jun 2020 01:18:50 +0000 (18:18 -0700)
Pull updates from Andrew Morton:
 "A few fixes and stragglers.

  Subsystems affected by this patch series: mm/memory-failure, ocfs2,
  lib/lzo, misc"

* emailed patches from Andrew Morton <akpm@linux-foundation.org>:
  amdgpu: a NULL ->mm does not mean a thread is a kthread
  lib/lzo: fix ambiguous encoding bug in lzo-rle
  ocfs2: fix build failure when TCP/IP is disabled
  mm/memory-failure: send SIGBUS(BUS_MCEERR_AR) only to current thread
  mm/memory-failure: prioritize prctl(PR_MCE_KILL) over vm.memory_failure_early_kill

34 files changed:
arch/x86/include/asm/intel-family.h
arch/x86/include/asm/vdso/gettimeofday.h
arch/x86/kernel/apic/apic.c
arch/x86/kernel/cpu/bugs.c
arch/x86/kernel/cpu/intel.c
arch/x86/kernel/process.c
arch/x86/kernel/reboot.c
arch/x86/kernel/time.c
arch/x86/kernel/vmlinux.lds.S
block/bio-integrity.c
block/bio.c
block/blk-mq-tag.c
block/blk-mq-tag.h
block/blk-mq.c
block/blk-mq.h
block/blk.h
drivers/block/loop.c
drivers/block/pktcdvd.c
drivers/block/umem.c
drivers/nvme/host/core.c
drivers/nvme/host/fc.c
drivers/nvme/host/nvme.h
drivers/nvme/host/pci.c
drivers/nvme/host/tcp.c
drivers/nvme/target/core.c
drivers/nvme/target/tcp.c
fs/afs/write.c
fs/io-wq.c
fs/io-wq.h
fs/io_uring.c
include/trace/events/block.h
kernel/time/clocksource.c
kernel/trace/blktrace.c
lib/vdso/gettimeofday.c

index 8f1e94f..a338a6d 100644 (file)
@@ -89,6 +89,8 @@
 #define INTEL_FAM6_COMETLAKE           0xA5
 #define INTEL_FAM6_COMETLAKE_L         0xA6
 
+#define INTEL_FAM6_SAPPHIRERAPIDS_X    0x8F
+
 /* "Small Core" Processors (Atom) */
 
 #define INTEL_FAM6_ATOM_BONNELL                0x1C /* Diamondville, Pineview */
index 9a6dc9b..fb81fea 100644 (file)
@@ -271,6 +271,24 @@ static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
        return __vdso_data;
 }
 
+static inline bool arch_vdso_clocksource_ok(const struct vdso_data *vd)
+{
+       return true;
+}
+#define vdso_clocksource_ok arch_vdso_clocksource_ok
+
+/*
+ * Clocksource read value validation to handle PV and HyperV clocksources
+ * which can be invalidated asynchronously and indicate invalidation by
+ * returning U64_MAX, which can be effectively tested by checking for a
+ * negative value after casting it to s64.
+ */
+static inline bool arch_vdso_cycles_ok(u64 cycles)
+{
+       return (s64)cycles >= 0;
+}
+#define vdso_cycles_ok arch_vdso_cycles_ok
+
 /*
  * x86 specific delta calculation.
  *
index 4b1d31b..bf4acb0 100644 (file)
@@ -2060,7 +2060,7 @@ void __init init_apic_mappings(void)
        unsigned int new_apicid;
 
        if (apic_validate_deadline_timer())
-               pr_debug("TSC deadline timer available\n");
+               pr_info("TSC deadline timer available\n");
 
        if (x2apic_mode) {
                boot_cpu_physical_apicid = read_apic_id();
index b6f887b..0b71970 100644 (file)
@@ -588,7 +588,9 @@ early_param("nospectre_v1", nospectre_v1_cmdline);
 static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init =
        SPECTRE_V2_NONE;
 
-static enum spectre_v2_user_mitigation spectre_v2_user __ro_after_init =
+static enum spectre_v2_user_mitigation spectre_v2_user_stibp __ro_after_init =
+       SPECTRE_V2_USER_NONE;
+static enum spectre_v2_user_mitigation spectre_v2_user_ibpb __ro_after_init =
        SPECTRE_V2_USER_NONE;
 
 #ifdef CONFIG_RETPOLINE
@@ -734,15 +736,6 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
                break;
        }
 
-       /*
-        * At this point, an STIBP mode other than "off" has been set.
-        * If STIBP support is not being forced, check if STIBP always-on
-        * is preferred.
-        */
-       if (mode != SPECTRE_V2_USER_STRICT &&
-           boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
-               mode = SPECTRE_V2_USER_STRICT_PREFERRED;
-
        /* Initialize Indirect Branch Prediction Barrier */
        if (boot_cpu_has(X86_FEATURE_IBPB)) {
                setup_force_cpu_cap(X86_FEATURE_USE_IBPB);
@@ -765,23 +758,36 @@ spectre_v2_user_select_mitigation(enum spectre_v2_mitigation_cmd v2_cmd)
                pr_info("mitigation: Enabling %s Indirect Branch Prediction Barrier\n",
                        static_key_enabled(&switch_mm_always_ibpb) ?
                        "always-on" : "conditional");
+
+               spectre_v2_user_ibpb = mode;
        }
 
-       /* If enhanced IBRS is enabled no STIBP required */
-       if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
+       /*
+        * If enhanced IBRS is enabled or SMT impossible, STIBP is not
+        * required.
+        */
+       if (!smt_possible || spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
                return;
 
        /*
-        * If SMT is not possible or STIBP is not available clear the STIBP
-        * mode.
+        * At this point, an STIBP mode other than "off" has been set.
+        * If STIBP support is not being forced, check if STIBP always-on
+        * is preferred.
         */
-       if (!smt_possible || !boot_cpu_has(X86_FEATURE_STIBP))
+       if (mode != SPECTRE_V2_USER_STRICT &&
+           boot_cpu_has(X86_FEATURE_AMD_STIBP_ALWAYS_ON))
+               mode = SPECTRE_V2_USER_STRICT_PREFERRED;
+
+       /*
+        * If STIBP is not available, clear the STIBP mode.
+        */
+       if (!boot_cpu_has(X86_FEATURE_STIBP))
                mode = SPECTRE_V2_USER_NONE;
+
+       spectre_v2_user_stibp = mode;
+
 set_mode:
-       spectre_v2_user = mode;
-       /* Only print the STIBP mode when SMT possible */
-       if (smt_possible)
-               pr_info("%s\n", spectre_v2_user_strings[mode]);
+       pr_info("%s\n", spectre_v2_user_strings[mode]);
 }
 
 static const char * const spectre_v2_strings[] = {
@@ -1014,7 +1020,7 @@ void cpu_bugs_smt_update(void)
 {
        mutex_lock(&spec_ctrl_mutex);
 
-       switch (spectre_v2_user) {
+       switch (spectre_v2_user_stibp) {
        case SPECTRE_V2_USER_NONE:
                break;
        case SPECTRE_V2_USER_STRICT:
@@ -1257,14 +1263,19 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
 {
        switch (ctrl) {
        case PR_SPEC_ENABLE:
-               if (spectre_v2_user == SPECTRE_V2_USER_NONE)
+               if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
+                   spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
                        return 0;
                /*
                 * Indirect branch speculation is always disabled in strict
-                * mode.
+                * mode. It can neither be enabled if it was force-disabled
+                * by a  previous prctl call.
+
                 */
-               if (spectre_v2_user == SPECTRE_V2_USER_STRICT ||
-                   spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED)
+               if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
+                   spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
+                   spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED ||
+                   task_spec_ib_force_disable(task))
                        return -EPERM;
                task_clear_spec_ib_disable(task);
                task_update_spec_tif(task);
@@ -1275,10 +1286,12 @@ static int ib_prctl_set(struct task_struct *task, unsigned long ctrl)
                 * Indirect branch speculation is always allowed when
                 * mitigation is force disabled.
                 */
-               if (spectre_v2_user == SPECTRE_V2_USER_NONE)
+               if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
+                   spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
                        return -EPERM;
-               if (spectre_v2_user == SPECTRE_V2_USER_STRICT ||
-                   spectre_v2_user == SPECTRE_V2_USER_STRICT_PREFERRED)
+               if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
+                   spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
+                   spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
                        return 0;
                task_set_spec_ib_disable(task);
                if (ctrl == PR_SPEC_FORCE_DISABLE)
@@ -1309,7 +1322,8 @@ void arch_seccomp_spec_mitigate(struct task_struct *task)
 {
        if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP)
                ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE);
-       if (spectre_v2_user == SPECTRE_V2_USER_SECCOMP)
+       if (spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP ||
+           spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP)
                ib_prctl_set(task, PR_SPEC_FORCE_DISABLE);
 }
 #endif
@@ -1340,22 +1354,24 @@ static int ib_prctl_get(struct task_struct *task)
        if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2))
                return PR_SPEC_NOT_AFFECTED;
 
-       switch (spectre_v2_user) {
-       case SPECTRE_V2_USER_NONE:
+       if (spectre_v2_user_ibpb == SPECTRE_V2_USER_NONE &&
+           spectre_v2_user_stibp == SPECTRE_V2_USER_NONE)
                return PR_SPEC_ENABLE;
-       case SPECTRE_V2_USER_PRCTL:
-       case SPECTRE_V2_USER_SECCOMP:
+       else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_STRICT ||
+           spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT ||
+           spectre_v2_user_stibp == SPECTRE_V2_USER_STRICT_PREFERRED)
+               return PR_SPEC_DISABLE;
+       else if (spectre_v2_user_ibpb == SPECTRE_V2_USER_PRCTL ||
+           spectre_v2_user_ibpb == SPECTRE_V2_USER_SECCOMP ||
+           spectre_v2_user_stibp == SPECTRE_V2_USER_PRCTL ||
+           spectre_v2_user_stibp == SPECTRE_V2_USER_SECCOMP) {
                if (task_spec_ib_force_disable(task))
                        return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE;
                if (task_spec_ib_disable(task))
                        return PR_SPEC_PRCTL | PR_SPEC_DISABLE;
                return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
-       case SPECTRE_V2_USER_STRICT:
-       case SPECTRE_V2_USER_STRICT_PREFERRED:
-               return PR_SPEC_DISABLE;
-       default:
+       } else
                return PR_SPEC_NOT_AFFECTED;
-       }
 }
 
 int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which)
@@ -1594,7 +1610,7 @@ static char *stibp_state(void)
        if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED)
                return "";
 
-       switch (spectre_v2_user) {
+       switch (spectre_v2_user_stibp) {
        case SPECTRE_V2_USER_NONE:
                return ", STIBP: disabled";
        case SPECTRE_V2_USER_STRICT:
index 63926c9..c25a67a 100644 (file)
@@ -1142,9 +1142,12 @@ void switch_to_sld(unsigned long tifn)
 static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X,           0),
        X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L,           0),
+       X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D,           0),
        X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT,        1),
        X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_D,      1),
        X86_MATCH_INTEL_FAM6_MODEL(ATOM_TREMONT_L,      1),
+       X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE_L,         1),
+       X86_MATCH_INTEL_FAM6_MODEL(TIGERLAKE,           1),
        {}
 };
 
index 8e3d034..f362ce0 100644 (file)
@@ -545,28 +545,20 @@ static __always_inline void __speculation_ctrl_update(unsigned long tifp,
 
        lockdep_assert_irqs_disabled();
 
-       /*
-        * If TIF_SSBD is different, select the proper mitigation
-        * method. Note that if SSBD mitigation is disabled or permanentely
-        * enabled this branch can't be taken because nothing can set
-        * TIF_SSBD.
-        */
-       if (tif_diff & _TIF_SSBD) {
-               if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) {
+       /* Handle change of TIF_SSBD depending on the mitigation method. */
+       if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) {
+               if (tif_diff & _TIF_SSBD)
                        amd_set_ssb_virt_state(tifn);
-               } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) {
+       } else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) {
+               if (tif_diff & _TIF_SSBD)
                        amd_set_core_ssb_state(tifn);
-               } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
-                          static_cpu_has(X86_FEATURE_AMD_SSBD)) {
-                       msr |= ssbd_tif_to_spec_ctrl(tifn);
-                       updmsr  = true;
-               }
+       } else if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD) ||
+                  static_cpu_has(X86_FEATURE_AMD_SSBD)) {
+               updmsr |= !!(tif_diff & _TIF_SSBD);
+               msr |= ssbd_tif_to_spec_ctrl(tifn);
        }
 
-       /*
-        * Only evaluate TIF_SPEC_IB if conditional STIBP is enabled,
-        * otherwise avoid the MSR write.
-        */
+       /* Only evaluate TIF_SPEC_IB if conditional STIBP is enabled. */
        if (IS_ENABLED(CONFIG_SMP) &&
            static_branch_unlikely(&switch_to_cond_stibp)) {
                updmsr |= !!(tif_diff & _TIF_SPEC_IB);
index e040ba6..0ec7ced 100644 (file)
@@ -197,6 +197,14 @@ static const struct dmi_system_id reboot_dmi_table[] __initconst = {
                        DMI_MATCH(DMI_PRODUCT_NAME, "MacBook5"),
                },
        },
+       {       /* Handle problems with rebooting on Apple MacBook6,1 */
+               .callback = set_pci_reboot,
+               .ident = "Apple MacBook6,1",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "Apple Inc."),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "MacBook6,1"),
+               },
+       },
        {       /* Handle problems with rebooting on Apple MacBookPro5 */
                .callback = set_pci_reboot,
                .ident = "Apple MacBookPro5",
index 371a6b3..e42faa7 100644 (file)
 #include <asm/hpet.h>
 #include <asm/time.h>
 
-#ifdef CONFIG_X86_64
-__visible volatile unsigned long jiffies __cacheline_aligned_in_smp = INITIAL_JIFFIES;
-#endif
-
 unsigned long profile_pc(struct pt_regs *regs)
 {
        unsigned long pc = instruction_pointer(regs);
index 1bf7e31..7c35556 100644 (file)
@@ -40,13 +40,13 @@ OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT)
 #ifdef CONFIG_X86_32
 OUTPUT_ARCH(i386)
 ENTRY(phys_startup_32)
-jiffies = jiffies_64;
 #else
 OUTPUT_ARCH(i386:x86-64)
 ENTRY(phys_startup_64)
-jiffies_64 = jiffies;
 #endif
 
+jiffies = jiffies_64;
+
 #if defined(CONFIG_X86_64)
 /*
  * On 64-bit, align RODATA to 2MB so we retain large page mappings for
index 3579ac0..23632a3 100644 (file)
@@ -281,7 +281,6 @@ bool bio_integrity_prep(struct bio *bio)
 
                if (ret == 0) {
                        printk(KERN_ERR "could not attach integrity payload\n");
-                       kfree(buf);
                        status = BLK_STS_RESOURCE;
                        goto err_end_io;
                }
index 5235da6..a7366c0 100644 (file)
@@ -1434,8 +1434,7 @@ again:
        }
 
        if (bio->bi_disk && bio_flagged(bio, BIO_TRACE_COMPLETION)) {
-               trace_block_bio_complete(bio->bi_disk->queue, bio,
-                                        blk_status_to_errno(bio->bi_status));
+               trace_block_bio_complete(bio->bi_disk->queue, bio);
                bio_clear_flag(bio, BIO_TRACE_COMPLETION);
        }
 
index 96a39d0..44f3d09 100644 (file)
@@ -191,6 +191,33 @@ found_tag:
        return tag + tag_offset;
 }
 
+bool __blk_mq_get_driver_tag(struct request *rq)
+{
+       struct sbitmap_queue *bt = &rq->mq_hctx->tags->bitmap_tags;
+       unsigned int tag_offset = rq->mq_hctx->tags->nr_reserved_tags;
+       bool shared = blk_mq_tag_busy(rq->mq_hctx);
+       int tag;
+
+       if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) {
+               bt = &rq->mq_hctx->tags->breserved_tags;
+               tag_offset = 0;
+       }
+
+       if (!hctx_may_queue(rq->mq_hctx, bt))
+               return false;
+       tag = __sbitmap_queue_get(bt);
+       if (tag == BLK_MQ_NO_TAG)
+               return false;
+
+       rq->tag = tag + tag_offset;
+       if (shared) {
+               rq->rq_flags |= RQF_MQ_INFLIGHT;
+               atomic_inc(&rq->mq_hctx->nr_active);
+       }
+       rq->mq_hctx->tags->rqs[rq->tag] = rq;
+       return true;
+}
+
 void blk_mq_put_tag(struct blk_mq_tags *tags, struct blk_mq_ctx *ctx,
                    unsigned int tag)
 {
@@ -269,6 +296,7 @@ struct bt_tags_iter_data {
 
 #define BT_TAG_ITER_RESERVED           (1 << 0)
 #define BT_TAG_ITER_STARTED            (1 << 1)
+#define BT_TAG_ITER_STATIC_RQS         (1 << 2)
 
 static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
 {
@@ -282,9 +310,12 @@ static bool bt_tags_iter(struct sbitmap *bitmap, unsigned int bitnr, void *data)
 
        /*
         * We can hit rq == NULL here, because the tagging functions
-        * test and set the bit before assining ->rqs[].
+        * test and set the bit before assigning ->rqs[].
         */
-       rq = tags->rqs[bitnr];
+       if (iter_data->flags & BT_TAG_ITER_STATIC_RQS)
+               rq = tags->static_rqs[bitnr];
+       else
+               rq = tags->rqs[bitnr];
        if (!rq)
                return true;
        if ((iter_data->flags & BT_TAG_ITER_STARTED) &&
@@ -339,11 +370,13 @@ static void __blk_mq_all_tag_iter(struct blk_mq_tags *tags,
  *             indicates whether or not @rq is a reserved request. Return
  *             true to continue iterating tags, false to stop.
  * @priv:      Will be passed as second argument to @fn.
+ *
+ * Caller has to pass the tag map from which requests are allocated.
  */
 void blk_mq_all_tag_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn,
                void *priv)
 {
-       return __blk_mq_all_tag_iter(tags, fn, priv, 0);
+       return __blk_mq_all_tag_iter(tags, fn, priv, BT_TAG_ITER_STATIC_RQS);
 }
 
 /**
index d38e48f..2e4ef51 100644 (file)
@@ -51,6 +51,14 @@ enum {
        BLK_MQ_TAG_MAX          = BLK_MQ_NO_TAG - 1,
 };
 
+bool __blk_mq_get_driver_tag(struct request *rq);
+static inline bool blk_mq_get_driver_tag(struct request *rq)
+{
+       if (rq->tag != BLK_MQ_NO_TAG)
+               return true;
+       return __blk_mq_get_driver_tag(rq);
+}
+
 extern bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *);
 extern void __blk_mq_tag_idle(struct blk_mq_hw_ctx *);
 
index 9a36ac1..4f57d27 100644 (file)
@@ -1052,35 +1052,6 @@ static inline unsigned int queued_to_index(unsigned int queued)
        return min(BLK_MQ_MAX_DISPATCH_ORDER - 1, ilog2(queued) + 1);
 }
 
-bool blk_mq_get_driver_tag(struct request *rq)
-{
-       struct blk_mq_alloc_data data = {
-               .q = rq->q,
-               .hctx = rq->mq_hctx,
-               .flags = BLK_MQ_REQ_NOWAIT,
-               .cmd_flags = rq->cmd_flags,
-       };
-       bool shared;
-
-       if (rq->tag != BLK_MQ_NO_TAG)
-               return true;
-
-       if (blk_mq_tag_is_reserved(data.hctx->sched_tags, rq->internal_tag))
-               data.flags |= BLK_MQ_REQ_RESERVED;
-
-       shared = blk_mq_tag_busy(data.hctx);
-       rq->tag = blk_mq_get_tag(&data);
-       if (rq->tag >= 0) {
-               if (shared) {
-                       rq->rq_flags |= RQF_MQ_INFLIGHT;
-                       atomic_inc(&data.hctx->nr_active);
-               }
-               data.hctx->tags->rqs[rq->tag] = rq;
-       }
-
-       return rq->tag != BLK_MQ_NO_TAG;
-}
-
 static int blk_mq_dispatch_wake(wait_queue_entry_t *wait, unsigned mode,
                                int flags, void *key)
 {
index a139b06..b3ce0f3 100644 (file)
@@ -44,7 +44,6 @@ bool blk_mq_dispatch_rq_list(struct request_queue *, struct list_head *, bool);
 void blk_mq_add_to_requeue_list(struct request *rq, bool at_head,
                                bool kick_requeue_list);
 void blk_mq_flush_busy_ctxs(struct blk_mq_hw_ctx *hctx, struct list_head *list);
-bool blk_mq_get_driver_tag(struct request *rq);
 struct request *blk_mq_dequeue_from_ctx(struct blk_mq_hw_ctx *hctx,
                                        struct blk_mq_ctx *start);
 
index aa16e52..b5d1f0f 100644 (file)
@@ -420,9 +420,11 @@ static inline sector_t part_nr_sects_read(struct hd_struct *part)
 static inline void part_nr_sects_write(struct hd_struct *part, sector_t size)
 {
 #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+       preempt_disable();
        write_seqcount_begin(&part->nr_sects_seq);
        part->nr_sects = size;
        write_seqcount_end(&part->nr_sects_seq);
+       preempt_enable();
 #elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPTION)
        preempt_disable();
        part->nr_sects = size;
index 2e96d8b..c33bbbf 100644 (file)
@@ -1390,7 +1390,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info)
                goto out_unfreeze;
 
        /* Mask out flags that can't be set using LOOP_SET_STATUS. */
-       lo->lo_flags &= ~LOOP_SET_STATUS_SETTABLE_FLAGS;
+       lo->lo_flags &= LOOP_SET_STATUS_SETTABLE_FLAGS;
        /* For those flags, use the previous values instead */
        lo->lo_flags |= prev_lo_flags & ~LOOP_SET_STATUS_SETTABLE_FLAGS;
        /* For flags that can't be cleared, use previous values too */
index 0b944ac..27a33ad 100644 (file)
@@ -1613,7 +1613,7 @@ static noinline_for_stack int pkt_get_last_written(struct pktcdvd_device *pd,
        disc_information di;
        track_information ti;
        __u32 last_track;
-       int ret = -1;
+       int ret;
 
        ret = pkt_get_disc_info(pd, &di);
        if (ret)
index d84e8a8..1e2aa5a 100644 (file)
@@ -784,7 +784,7 @@ static const struct block_device_operations mm_fops = {
 
 static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
 {
-       int ret = -ENODEV;
+       int ret;
        struct cardinfo *card = &cards[num_cards];
        unsigned char   mem_present;
        unsigned char   batt_status;
index 0585efa..c2c5bc4 100644 (file)
@@ -3669,7 +3669,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
        ns->disk = disk;
 
        if (__nvme_revalidate_disk(disk, id))
-               goto out_free_disk;
+               goto out_put_disk;
 
        if ((ctrl->quirks & NVME_QUIRK_LIGHTNVM) && id->vs[0] == 0x1) {
                ret = nvme_nvm_register(ns, disk_name, node);
@@ -3696,8 +3696,6 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
        /* prevent double queue cleanup */
        ns->disk->queue = NULL;
        put_disk(ns->disk);
- out_free_disk:
-       del_gendisk(ns->disk);
  out_unlink_ns:
        mutex_lock(&ctrl->subsys->lock);
        list_del_rcu(&ns->siblings);
index cb00075..e999a8c 100644 (file)
@@ -2634,10 +2634,11 @@ nvme_fc_start_fcp_op(struct nvme_fc_ctrl *ctrl, struct nvme_fc_queue *queue,
                opstate = atomic_xchg(&op->state, FCPOP_STATE_COMPLETE);
                __nvme_fc_fcpop_chk_teardowns(ctrl, op, opstate);
 
-               if (!(op->flags & FCOP_FLAGS_AEN))
+               if (!(op->flags & FCOP_FLAGS_AEN)) {
                        nvme_fc_unmap_data(ctrl, op->rq, op);
+                       nvme_cleanup_cmd(op->rq);
+               }
 
-               nvme_cleanup_cmd(op->rq);
                nvme_fc_ctrl_put(ctrl);
 
                if (ctrl->rport->remoteport.port_state == FC_OBJSTATE_ONLINE &&
index fa5c755..c0f4226 100644 (file)
@@ -599,8 +599,7 @@ static inline void nvme_trace_bio_complete(struct request *req,
        struct nvme_ns *ns = req->q->queuedata;
 
        if (req->cmd_flags & REQ_NVME_MPATH)
-               trace_block_bio_complete(ns->head->disk->queue,
-                                        req->bio, status);
+               trace_block_bio_complete(ns->head->disk->queue, req->bio);
 }
 
 extern struct device_attribute dev_attr_ana_grpid;
index d690d55..e2bacd3 100644 (file)
@@ -2950,9 +2950,15 @@ static int nvme_suspend(struct device *dev)
         * the PCI bus layer to put it into D3 in order to take the PCIe link
         * down, so as to allow the platform to achieve its minimum low-power
         * state (which may not be possible if the link is up).
+        *
+        * If a host memory buffer is enabled, shut down the device as the NVMe
+        * specification allows the device to access the host memory buffer in
+        * host DRAM from all power states, but hosts will fail access to DRAM
+        * during S3.
         */
        if (pm_suspend_via_firmware() || !ctrl->npss ||
            !pcie_aspm_enabled(pdev) ||
+           ndev->nr_host_mem_descs ||
            (ndev->ctrl.quirks & NVME_QUIRK_SIMPLE_SUSPEND))
                return nvme_disable_prepare_reset(ndev, true);
 
index 1843110..3345ec7 100644 (file)
@@ -131,8 +131,8 @@ struct nvme_tcp_ctrl {
 static LIST_HEAD(nvme_tcp_ctrl_list);
 static DEFINE_MUTEX(nvme_tcp_ctrl_mutex);
 static struct workqueue_struct *nvme_tcp_wq;
-static struct blk_mq_ops nvme_tcp_mq_ops;
-static struct blk_mq_ops nvme_tcp_admin_mq_ops;
+static const struct blk_mq_ops nvme_tcp_mq_ops;
+static const struct blk_mq_ops nvme_tcp_admin_mq_ops;
 static int nvme_tcp_try_send(struct nvme_tcp_queue *queue);
 
 static inline struct nvme_tcp_ctrl *to_tcp_ctrl(struct nvme_ctrl *ctrl)
@@ -2301,7 +2301,7 @@ static int nvme_tcp_poll(struct blk_mq_hw_ctx *hctx)
        return queue->nr_cqe;
 }
 
-static struct blk_mq_ops nvme_tcp_mq_ops = {
+static const struct blk_mq_ops nvme_tcp_mq_ops = {
        .queue_rq       = nvme_tcp_queue_rq,
        .complete       = nvme_complete_rq,
        .init_request   = nvme_tcp_init_request,
@@ -2312,7 +2312,7 @@ static struct blk_mq_ops nvme_tcp_mq_ops = {
        .poll           = nvme_tcp_poll,
 };
 
-static struct blk_mq_ops nvme_tcp_admin_mq_ops = {
+static const struct blk_mq_ops nvme_tcp_admin_mq_ops = {
        .queue_rq       = nvme_tcp_queue_rq,
        .complete       = nvme_complete_rq,
        .init_request   = nvme_tcp_init_request,
index 6392bcd..6e2f623 100644 (file)
@@ -129,7 +129,22 @@ static u32 nvmet_async_event_result(struct nvmet_async_event *aen)
        return aen->event_type | (aen->event_info << 8) | (aen->log_page << 16);
 }
 
-static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status)
+static void nvmet_async_events_failall(struct nvmet_ctrl *ctrl)
+{
+       u16 status = NVME_SC_INTERNAL | NVME_SC_DNR;
+       struct nvmet_req *req;
+
+       mutex_lock(&ctrl->lock);
+       while (ctrl->nr_async_event_cmds) {
+               req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
+               mutex_unlock(&ctrl->lock);
+               nvmet_req_complete(req, status);
+               mutex_lock(&ctrl->lock);
+       }
+       mutex_unlock(&ctrl->lock);
+}
+
+static void nvmet_async_events_process(struct nvmet_ctrl *ctrl)
 {
        struct nvmet_async_event *aen;
        struct nvmet_req *req;
@@ -139,15 +154,14 @@ static void nvmet_async_events_process(struct nvmet_ctrl *ctrl, u16 status)
                aen = list_first_entry(&ctrl->async_events,
                                       struct nvmet_async_event, entry);
                req = ctrl->async_event_cmds[--ctrl->nr_async_event_cmds];
-               if (status == 0)
-                       nvmet_set_result(req, nvmet_async_event_result(aen));
+               nvmet_set_result(req, nvmet_async_event_result(aen));
 
                list_del(&aen->entry);
                kfree(aen);
 
                mutex_unlock(&ctrl->lock);
                trace_nvmet_async_event(ctrl, req->cqe->result.u32);
-               nvmet_req_complete(req, status);
+               nvmet_req_complete(req, 0);
                mutex_lock(&ctrl->lock);
        }
        mutex_unlock(&ctrl->lock);
@@ -170,7 +184,7 @@ static void nvmet_async_event_work(struct work_struct *work)
        struct nvmet_ctrl *ctrl =
                container_of(work, struct nvmet_ctrl, async_event_work);
 
-       nvmet_async_events_process(ctrl, 0);
+       nvmet_async_events_process(ctrl);
 }
 
 void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type,
@@ -779,7 +793,6 @@ static void nvmet_confirm_sq(struct percpu_ref *ref)
 
 void nvmet_sq_destroy(struct nvmet_sq *sq)
 {
-       u16 status = NVME_SC_INTERNAL | NVME_SC_DNR;
        struct nvmet_ctrl *ctrl = sq->ctrl;
 
        /*
@@ -787,7 +800,7 @@ void nvmet_sq_destroy(struct nvmet_sq *sq)
         * queue doesn't have outstanding requests on it.
         */
        if (ctrl && ctrl->sqs && ctrl->sqs[0] == sq)
-               nvmet_async_events_process(ctrl, status);
+               nvmet_async_events_failall(ctrl);
        percpu_ref_kill_and_confirm(&sq->ref, nvmet_confirm_sq);
        wait_for_completion(&sq->confirm_done);
        wait_for_completion(&sq->free_done);
index 1669177..de9217c 100644 (file)
@@ -153,7 +153,7 @@ static LIST_HEAD(nvmet_tcp_queue_list);
 static DEFINE_MUTEX(nvmet_tcp_queue_mutex);
 
 static struct workqueue_struct *nvmet_tcp_wq;
-static struct nvmet_fabrics_ops nvmet_tcp_ops;
+static const struct nvmet_fabrics_ops nvmet_tcp_ops;
 static void nvmet_tcp_free_cmd(struct nvmet_tcp_cmd *c);
 static void nvmet_tcp_finish_cmd(struct nvmet_tcp_cmd *cmd);
 
@@ -1713,7 +1713,7 @@ static void nvmet_tcp_disc_port_addr(struct nvmet_req *req,
        }
 }
 
-static struct nvmet_fabrics_ops nvmet_tcp_ops = {
+static const struct nvmet_fabrics_ops nvmet_tcp_ops = {
        .owner                  = THIS_MODULE,
        .type                   = NVMF_TRTYPE_TCP,
        .msdbd                  = 1,
index 97bccde..768497f 100644 (file)
@@ -447,6 +447,7 @@ static int afs_store_data(struct address_space *mapping,
        op->store.last = last;
        op->store.first_offset = offset;
        op->store.last_to = to;
+       op->mtime = vnode->vfs_inode.i_mtime;
        op->ops = &afs_store_data_operation;
 
 try_next_key:
index a5e90ac..0b65a91 100644 (file)
@@ -111,6 +111,7 @@ struct io_wq {
        unsigned long state;
 
        free_work_fn *free_work;
+       io_wq_work_fn *do_work;
 
        struct task_struct *manager;
        struct user_struct *user;
@@ -523,7 +524,7 @@ get_next:
 
                        hash = io_get_work_hash(work);
                        linked = old_work = work;
-                       linked->func(&linked);
+                       wq->do_work(&linked);
                        linked = (old_work == linked) ? NULL : linked;
 
                        work = next_hashed;
@@ -780,7 +781,7 @@ static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
                struct io_wq_work *old_work = work;
 
                work->flags |= IO_WQ_WORK_CANCEL;
-               work->func(&work);
+               wq->do_work(&work);
                work = (work == old_work) ? NULL : work;
                wq->free_work(old_work);
        } while (work);
@@ -1018,7 +1019,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
        int ret = -ENOMEM, node;
        struct io_wq *wq;
 
-       if (WARN_ON_ONCE(!data->free_work))
+       if (WARN_ON_ONCE(!data->free_work || !data->do_work))
                return ERR_PTR(-EINVAL);
 
        wq = kzalloc(sizeof(*wq), GFP_KERNEL);
@@ -1032,6 +1033,7 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
        }
 
        wq->free_work = data->free_work;
+       wq->do_work = data->do_work;
 
        /* caller must already hold a reference to this */
        wq->user = data->user;
@@ -1088,7 +1090,7 @@ err:
 
 bool io_wq_get(struct io_wq *wq, struct io_wq_data *data)
 {
-       if (data->free_work != wq->free_work)
+       if (data->free_work != wq->free_work || data->do_work != wq->do_work)
                return false;
 
        return refcount_inc_not_zero(&wq->use_refs);
index 5ba12de..8e138fa 100644 (file)
@@ -85,7 +85,6 @@ static inline void wq_list_del(struct io_wq_work_list *list,
 
 struct io_wq_work {
        struct io_wq_work_node list;
-       void (*func)(struct io_wq_work **);
        struct files_struct *files;
        struct mm_struct *mm;
        const struct cred *creds;
@@ -94,11 +93,6 @@ struct io_wq_work {
        pid_t task_pid;
 };
 
-#define INIT_IO_WORK(work, _func)                              \
-       do {                                                    \
-               *(work) = (struct io_wq_work){ .func = _func }; \
-       } while (0)                                             \
-
 static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
 {
        if (!work->list.next)
@@ -108,10 +102,12 @@ static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
 }
 
 typedef void (free_work_fn)(struct io_wq_work *);
+typedef void (io_wq_work_fn)(struct io_wq_work **);
 
 struct io_wq_data {
        struct user_struct *user;
 
+       io_wq_work_fn *do_work;
        free_work_fn *free_work;
 };
 
index 26f7bc9..155f3d8 100644 (file)
@@ -528,7 +528,6 @@ enum {
        REQ_F_INFLIGHT_BIT,
        REQ_F_CUR_POS_BIT,
        REQ_F_NOWAIT_BIT,
-       REQ_F_IOPOLL_COMPLETED_BIT,
        REQ_F_LINK_TIMEOUT_BIT,
        REQ_F_TIMEOUT_BIT,
        REQ_F_ISREG_BIT,
@@ -540,6 +539,8 @@ enum {
        REQ_F_POLLED_BIT,
        REQ_F_BUFFER_SELECTED_BIT,
        REQ_F_NO_FILE_TABLE_BIT,
+       REQ_F_QUEUE_TIMEOUT_BIT,
+       REQ_F_WORK_INITIALIZED_BIT,
 
        /* not a real bit, just to check we're not overflowing the space */
        __REQ_F_LAST_BIT,
@@ -571,8 +572,6 @@ enum {
        REQ_F_CUR_POS           = BIT(REQ_F_CUR_POS_BIT),
        /* must not punt to workers */
        REQ_F_NOWAIT            = BIT(REQ_F_NOWAIT_BIT),
-       /* polled IO has completed */
-       REQ_F_IOPOLL_COMPLETED  = BIT(REQ_F_IOPOLL_COMPLETED_BIT),
        /* has linked timeout */
        REQ_F_LINK_TIMEOUT      = BIT(REQ_F_LINK_TIMEOUT_BIT),
        /* timeout request */
@@ -595,6 +594,10 @@ enum {
        REQ_F_BUFFER_SELECTED   = BIT(REQ_F_BUFFER_SELECTED_BIT),
        /* doesn't need file table for this request */
        REQ_F_NO_FILE_TABLE     = BIT(REQ_F_NO_FILE_TABLE_BIT),
+       /* needs to queue linked timeout */
+       REQ_F_QUEUE_TIMEOUT     = BIT(REQ_F_QUEUE_TIMEOUT_BIT),
+       /* io_wq_work is initialized */
+       REQ_F_WORK_INITIALIZED  = BIT(REQ_F_WORK_INITIALIZED_BIT),
 };
 
 struct async_poll {
@@ -633,6 +636,8 @@ struct io_kiocb {
        struct io_async_ctx             *io;
        int                             cflags;
        u8                              opcode;
+       /* polled IO has completed */
+       u8                              iopoll_completed;
 
        u16                             buf_index;
 
@@ -697,6 +702,8 @@ struct io_op_def {
        unsigned                needs_mm : 1;
        /* needs req->file assigned */
        unsigned                needs_file : 1;
+       /* don't fail if file grab fails */
+       unsigned                needs_file_no_error : 1;
        /* hash wq insertion if file is a regular file */
        unsigned                hash_reg_file : 1;
        /* unbound wq insertion if file is a non-regular file */
@@ -803,6 +810,8 @@ static const struct io_op_def io_op_defs[] = {
                .needs_fs               = 1,
        },
        [IORING_OP_CLOSE] = {
+               .needs_file             = 1,
+               .needs_file_no_error    = 1,
                .file_table             = 1,
        },
        [IORING_OP_FILES_UPDATE] = {
@@ -903,6 +912,19 @@ EXPORT_SYMBOL(io_uring_get_socket);
 
 static void io_file_put_work(struct work_struct *work);
 
+/*
+ * Note: must call io_req_init_async() for the first time you
+ * touch any members of io_wq_work.
+ */
+static inline void io_req_init_async(struct io_kiocb *req)
+{
+       if (req->flags & REQ_F_WORK_INITIALIZED)
+               return;
+
+       memset(&req->work, 0, sizeof(req->work));
+       req->flags |= REQ_F_WORK_INITIALIZED;
+}
+
 static inline bool io_async_submit(struct io_ring_ctx *ctx)
 {
        return ctx->flags & IORING_SETUP_SQPOLL;
@@ -1029,6 +1051,9 @@ static inline void io_req_work_grab_env(struct io_kiocb *req,
 
 static inline void io_req_work_drop_env(struct io_kiocb *req)
 {
+       if (!(req->flags & REQ_F_WORK_INITIALIZED))
+               return;
+
        if (req->work.mm) {
                mmdrop(req->work.mm);
                req->work.mm = NULL;
@@ -1575,16 +1600,6 @@ static void io_free_req(struct io_kiocb *req)
                io_queue_async_work(nxt);
 }
 
-static void io_link_work_cb(struct io_wq_work **workptr)
-{
-       struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
-       struct io_kiocb *link;
-
-       link = list_first_entry(&req->link_list, struct io_kiocb, link_list);
-       io_queue_linked_timeout(link);
-       io_wq_submit_work(workptr);
-}
-
 static void io_wq_assign_next(struct io_wq_work **workptr, struct io_kiocb *nxt)
 {
        struct io_kiocb *link;
@@ -1596,7 +1611,7 @@ static void io_wq_assign_next(struct io_wq_work **workptr, struct io_kiocb *nxt)
        *workptr = &nxt->work;
        link = io_prep_linked_timeout(nxt);
        if (link)
-               nxt->work.func = io_link_work_cb;
+               nxt->flags |= REQ_F_QUEUE_TIMEOUT;
 }
 
 /*
@@ -1781,7 +1796,7 @@ static int io_do_iopoll(struct io_ring_ctx *ctx, unsigned int *nr_events,
                 * If we find a request that requires polling, break out
                 * and complete those lists first, if we have entries there.
                 */
-               if (req->flags & REQ_F_IOPOLL_COMPLETED) {
+               if (READ_ONCE(req->iopoll_completed)) {
                        list_move_tail(&req->list, &done);
                        continue;
                }
@@ -1962,7 +1977,7 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
                req_set_fail_links(req);
        req->result = res;
        if (res != -EAGAIN)
-               req->flags |= REQ_F_IOPOLL_COMPLETED;
+               WRITE_ONCE(req->iopoll_completed, 1);
 }
 
 /*
@@ -1995,7 +2010,7 @@ static void io_iopoll_req_issued(struct io_kiocb *req)
         * For fast devices, IO may have already completed. If it has, add
         * it to the front so we find it first.
         */
-       if (req->flags & REQ_F_IOPOLL_COMPLETED)
+       if (READ_ONCE(req->iopoll_completed))
                list_add(&req->list, &ctx->poll_list);
        else
                list_add_tail(&req->list, &ctx->poll_list);
@@ -2063,6 +2078,10 @@ static bool io_file_supports_async(struct file *file, int rw)
        if (S_ISREG(mode) && file->f_op != &io_uring_fops)
                return true;
 
+       /* any ->read/write should understand O_NONBLOCK */
+       if (file->f_flags & O_NONBLOCK)
+               return true;
+
        if (!(file->f_mode & FMODE_NOWAIT))
                return false;
 
@@ -2105,8 +2124,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                kiocb->ki_ioprio = get_current_ioprio();
 
        /* don't allow async punt if RWF_NOWAIT was requested */
-       if ((kiocb->ki_flags & IOCB_NOWAIT) ||
-           (req->file->f_flags & O_NONBLOCK))
+       if (kiocb->ki_flags & IOCB_NOWAIT)
                req->flags |= REQ_F_NOWAIT;
 
        if (force_nonblock)
@@ -2120,6 +2138,7 @@ static int io_prep_rw(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                kiocb->ki_flags |= IOCB_HIPRI;
                kiocb->ki_complete = io_complete_rw_iopoll;
                req->result = 0;
+               req->iopoll_completed = 0;
        } else {
                if (kiocb->ki_flags & IOCB_HIPRI)
                        return -EINVAL;
@@ -2358,8 +2377,14 @@ static ssize_t __io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
 static ssize_t io_iov_buffer_select(struct io_kiocb *req, struct iovec *iov,
                                    bool needs_lock)
 {
-       if (req->flags & REQ_F_BUFFER_SELECTED)
+       if (req->flags & REQ_F_BUFFER_SELECTED) {
+               struct io_buffer *kbuf;
+
+               kbuf = (struct io_buffer *) (unsigned long) req->rw.addr;
+               iov[0].iov_base = u64_to_user_ptr(kbuf->addr);
+               iov[0].iov_len = kbuf->len;
                return 0;
+       }
        if (!req->rw.len)
                return 0;
        else if (req->rw.len > 1)
@@ -2741,7 +2766,8 @@ copy_iov:
                        if (ret)
                                goto out_free;
                        /* any defer here is final, must blocking retry */
-                       if (!file_can_poll(req->file))
+                       if (!(req->flags & REQ_F_NOWAIT) &&
+                           !file_can_poll(req->file))
                                req->flags |= REQ_F_MUST_PUNT;
                        return -EAGAIN;
                }
@@ -2761,6 +2787,8 @@ static int __io_splice_prep(struct io_kiocb *req,
 
        if (req->flags & REQ_F_NEED_CLEANUP)
                return 0;
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
 
        sp->file_in = NULL;
        sp->len = READ_ONCE(sqe->len);
@@ -2775,8 +2803,14 @@ static int __io_splice_prep(struct io_kiocb *req,
                return ret;
        req->flags |= REQ_F_NEED_CLEANUP;
 
-       if (!S_ISREG(file_inode(sp->file_in)->i_mode))
+       if (!S_ISREG(file_inode(sp->file_in)->i_mode)) {
+               /*
+                * Splice operation will be punted aync, and here need to
+                * modify io_wq_work.flags, so initialize io_wq_work firstly.
+                */
+               io_req_init_async(req);
                req->work.flags |= IO_WQ_WORK_UNBOUND;
+       }
 
        return 0;
 }
@@ -2885,23 +2919,15 @@ static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        return 0;
 }
 
-static bool io_req_cancelled(struct io_kiocb *req)
-{
-       if (req->work.flags & IO_WQ_WORK_CANCEL) {
-               req_set_fail_links(req);
-               io_cqring_add_event(req, -ECANCELED);
-               io_put_req(req);
-               return true;
-       }
-
-       return false;
-}
-
-static void __io_fsync(struct io_kiocb *req)
+static int io_fsync(struct io_kiocb *req, bool force_nonblock)
 {
        loff_t end = req->sync.off + req->sync.len;
        int ret;
 
+       /* fsync always requires a blocking context */
+       if (force_nonblock)
+               return -EAGAIN;
+
        ret = vfs_fsync_range(req->file, req->sync.off,
                                end > 0 ? end : LLONG_MAX,
                                req->sync.flags & IORING_FSYNC_DATASYNC);
@@ -2909,58 +2935,16 @@ static void __io_fsync(struct io_kiocb *req)
                req_set_fail_links(req);
        io_cqring_add_event(req, ret);
        io_put_req(req);
-}
-
-static void io_fsync_finish(struct io_wq_work **workptr)
-{
-       struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
-
-       if (io_req_cancelled(req))
-               return;
-       __io_fsync(req);
-       io_steal_work(req, workptr);
-}
-
-static int io_fsync(struct io_kiocb *req, bool force_nonblock)
-{
-       /* fsync always requires a blocking context */
-       if (force_nonblock) {
-               req->work.func = io_fsync_finish;
-               return -EAGAIN;
-       }
-       __io_fsync(req);
        return 0;
 }
 
-static void __io_fallocate(struct io_kiocb *req)
-{
-       int ret;
-
-       current->signal->rlim[RLIMIT_FSIZE].rlim_cur = req->fsize;
-       ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off,
-                               req->sync.len);
-       current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
-       if (ret < 0)
-               req_set_fail_links(req);
-       io_cqring_add_event(req, ret);
-       io_put_req(req);
-}
-
-static void io_fallocate_finish(struct io_wq_work **workptr)
-{
-       struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
-
-       if (io_req_cancelled(req))
-               return;
-       __io_fallocate(req);
-       io_steal_work(req, workptr);
-}
-
 static int io_fallocate_prep(struct io_kiocb *req,
                             const struct io_uring_sqe *sqe)
 {
        if (sqe->ioprio || sqe->buf_index || sqe->rw_flags)
                return -EINVAL;
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
 
        req->sync.off = READ_ONCE(sqe->off);
        req->sync.len = READ_ONCE(sqe->addr);
@@ -2971,66 +2955,74 @@ static int io_fallocate_prep(struct io_kiocb *req,
 
 static int io_fallocate(struct io_kiocb *req, bool force_nonblock)
 {
+       int ret;
+
        /* fallocate always requiring blocking context */
-       if (force_nonblock) {
-               req->work.func = io_fallocate_finish;
+       if (force_nonblock)
                return -EAGAIN;
-       }
 
-       __io_fallocate(req);
+       current->signal->rlim[RLIMIT_FSIZE].rlim_cur = req->fsize;
+       ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off,
+                               req->sync.len);
+       current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
+       if (ret < 0)
+               req_set_fail_links(req);
+       io_cqring_add_event(req, ret);
+       io_put_req(req);
        return 0;
 }
 
-static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static int __io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        const char __user *fname;
        int ret;
 
-       if (sqe->ioprio || sqe->buf_index)
+       if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
                return -EINVAL;
-       if (req->flags & REQ_F_FIXED_FILE)
+       if (unlikely(sqe->ioprio || sqe->buf_index))
+               return -EINVAL;
+       if (unlikely(req->flags & REQ_F_FIXED_FILE))
                return -EBADF;
-       if (req->flags & REQ_F_NEED_CLEANUP)
-               return 0;
 
-       req->open.dfd = READ_ONCE(sqe->fd);
-       req->open.how.mode = READ_ONCE(sqe->len);
-       fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
-       req->open.how.flags = READ_ONCE(sqe->open_flags);
-       if (force_o_largefile())
+       /* open.how should be already initialised */
+       if (!(req->open.how.flags & O_PATH) && force_o_largefile())
                req->open.how.flags |= O_LARGEFILE;
 
+       req->open.dfd = READ_ONCE(sqe->fd);
+       fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
        req->open.filename = getname(fname);
        if (IS_ERR(req->open.filename)) {
                ret = PTR_ERR(req->open.filename);
                req->open.filename = NULL;
                return ret;
        }
-
        req->open.nofile = rlimit(RLIMIT_NOFILE);
        req->flags |= REQ_F_NEED_CLEANUP;
        return 0;
 }
 
+static int io_openat_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+       u64 flags, mode;
+
+       if (req->flags & REQ_F_NEED_CLEANUP)
+               return 0;
+       mode = READ_ONCE(sqe->len);
+       flags = READ_ONCE(sqe->open_flags);
+       req->open.how = build_open_how(flags, mode);
+       return __io_openat_prep(req, sqe);
+}
+
 static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        struct open_how __user *how;
-       const char __user *fname;
        size_t len;
        int ret;
 
-       if (sqe->ioprio || sqe->buf_index)
-               return -EINVAL;
-       if (req->flags & REQ_F_FIXED_FILE)
-               return -EBADF;
        if (req->flags & REQ_F_NEED_CLEANUP)
                return 0;
-
-       req->open.dfd = READ_ONCE(sqe->fd);
-       fname = u64_to_user_ptr(READ_ONCE(sqe->addr));
        how = u64_to_user_ptr(READ_ONCE(sqe->addr2));
        len = READ_ONCE(sqe->len);
-
        if (len < OPEN_HOW_SIZE_VER0)
                return -EINVAL;
 
@@ -3039,19 +3031,7 @@ static int io_openat2_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        if (ret)
                return ret;
 
-       if (!(req->open.how.flags & O_PATH) && force_o_largefile())
-               req->open.how.flags |= O_LARGEFILE;
-
-       req->open.filename = getname(fname);
-       if (IS_ERR(req->open.filename)) {
-               ret = PTR_ERR(req->open.filename);
-               req->open.filename = NULL;
-               return ret;
-       }
-
-       req->open.nofile = rlimit(RLIMIT_NOFILE);
-       req->flags |= REQ_F_NEED_CLEANUP;
-       return 0;
+       return __io_openat_prep(req, sqe);
 }
 
 static int io_openat2(struct io_kiocb *req, bool force_nonblock)
@@ -3091,7 +3071,6 @@ err:
 
 static int io_openat(struct io_kiocb *req, bool force_nonblock)
 {
-       req->open.how = build_open_how(req->open.how.flags, req->open.how.mode);
        return io_openat2(req, force_nonblock);
 }
 
@@ -3180,7 +3159,7 @@ static int io_provide_buffers_prep(struct io_kiocb *req,
        p->addr = READ_ONCE(sqe->addr);
        p->len = READ_ONCE(sqe->len);
 
-       if (!access_ok(u64_to_user_ptr(p->addr), p->len))
+       if (!access_ok(u64_to_user_ptr(p->addr), (p->len * p->nbufs)))
                return -EFAULT;
 
        p->bgid = READ_ONCE(sqe->buf_group);
@@ -3258,6 +3237,8 @@ static int io_epoll_ctl_prep(struct io_kiocb *req,
 #if defined(CONFIG_EPOLL)
        if (sqe->ioprio || sqe->buf_index)
                return -EINVAL;
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
 
        req->epoll.epfd = READ_ONCE(sqe->fd);
        req->epoll.op = READ_ONCE(sqe->len);
@@ -3302,6 +3283,8 @@ static int io_madvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 #if defined(CONFIG_ADVISE_SYSCALLS) && defined(CONFIG_MMU)
        if (sqe->ioprio || sqe->buf_index || sqe->off)
                return -EINVAL;
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
 
        req->madvise.addr = READ_ONCE(sqe->addr);
        req->madvise.len = READ_ONCE(sqe->len);
@@ -3336,6 +3319,8 @@ static int io_fadvise_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        if (sqe->ioprio || sqe->buf_index || sqe->addr)
                return -EINVAL;
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
 
        req->fadvise.offset = READ_ONCE(sqe->off);
        req->fadvise.len = READ_ONCE(sqe->len);
@@ -3369,6 +3354,8 @@ static int io_fadvise(struct io_kiocb *req, bool force_nonblock)
 
 static int io_statx_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
        if (sqe->ioprio || sqe->buf_index)
                return -EINVAL;
        if (req->flags & REQ_F_FIXED_FILE)
@@ -3409,10 +3396,14 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        /*
         * If we queue this for async, it must not be cancellable. That would
-        * leave the 'file' in an undeterminate state.
+        * leave the 'file' in an undeterminate state, and here need to modify
+        * io_wq_work.flags, so initialize io_wq_work firstly.
         */
+       io_req_init_async(req);
        req->work.flags |= IO_WQ_WORK_NO_CANCEL;
 
+       if (unlikely(req->ctx->flags & (IORING_SETUP_IOPOLL|IORING_SETUP_SQPOLL)))
+               return -EINVAL;
        if (sqe->ioprio || sqe->off || sqe->addr || sqe->len ||
            sqe->rw_flags || sqe->buf_index)
                return -EINVAL;
@@ -3420,53 +3411,41 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
                return -EBADF;
 
        req->close.fd = READ_ONCE(sqe->fd);
-       return 0;
-}
-
-/* only called when __close_fd_get_file() is done */
-static void __io_close_finish(struct io_kiocb *req)
-{
-       int ret;
-
-       ret = filp_close(req->close.put_file, req->work.files);
-       if (ret < 0)
-               req_set_fail_links(req);
-       io_cqring_add_event(req, ret);
-       fput(req->close.put_file);
-       io_put_req(req);
-}
-
-static void io_close_finish(struct io_wq_work **workptr)
-{
-       struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
+       if ((req->file && req->file->f_op == &io_uring_fops) ||
+           req->close.fd == req->ctx->ring_fd)
+               return -EBADF;
 
-       /* not cancellable, don't do io_req_cancelled() */
-       __io_close_finish(req);
-       io_steal_work(req, workptr);
+       req->close.put_file = NULL;
+       return 0;
 }
 
 static int io_close(struct io_kiocb *req, bool force_nonblock)
 {
+       struct io_close *close = &req->close;
        int ret;
 
-       req->close.put_file = NULL;
-       ret = __close_fd_get_file(req->close.fd, &req->close.put_file);
-       if (ret < 0)
-               return (ret == -ENOENT) ? -EBADF : ret;
+       /* might be already done during nonblock submission */
+       if (!close->put_file) {
+               ret = __close_fd_get_file(close->fd, &close->put_file);
+               if (ret < 0)
+                       return (ret == -ENOENT) ? -EBADF : ret;
+       }
 
        /* if the file has a flush method, be safe and punt to async */
-       if (req->close.put_file->f_op->flush && force_nonblock) {
+       if (close->put_file->f_op->flush && force_nonblock) {
                /* avoid grabbing files - we don't need the files */
                req->flags |= REQ_F_NO_FILE_TABLE | REQ_F_MUST_PUNT;
-               req->work.func = io_close_finish;
                return -EAGAIN;
        }
 
-       /*
-        * No ->flush(), safely close from here and just punt the
-        * fput() to async context.
-        */
-       __io_close_finish(req);
+       /* No ->flush() or already async, safely close from here */
+       ret = filp_close(close->put_file, req->work.files);
+       if (ret < 0)
+               req_set_fail_links(req);
+       io_cqring_add_event(req, ret);
+       fput(close->put_file);
+       close->put_file = NULL;
+       io_put_req(req);
        return 0;
 }
 
@@ -3488,38 +3467,20 @@ static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        return 0;
 }
 
-static void __io_sync_file_range(struct io_kiocb *req)
+static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock)
 {
        int ret;
 
+       /* sync_file_range always requires a blocking context */
+       if (force_nonblock)
+               return -EAGAIN;
+
        ret = sync_file_range(req->file, req->sync.off, req->sync.len,
                                req->sync.flags);
        if (ret < 0)
                req_set_fail_links(req);
        io_cqring_add_event(req, ret);
        io_put_req(req);
-}
-
-
-static void io_sync_file_range_finish(struct io_wq_work **workptr)
-{
-       struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
-
-       if (io_req_cancelled(req))
-               return;
-       __io_sync_file_range(req);
-       io_steal_work(req, workptr);
-}
-
-static int io_sync_file_range(struct io_kiocb *req, bool force_nonblock)
-{
-       /* sync_file_range always requires a blocking context */
-       if (force_nonblock) {
-               req->work.func = io_sync_file_range_finish;
-               return -EAGAIN;
-       }
-
-       __io_sync_file_range(req);
        return 0;
 }
 
@@ -3545,6 +3506,9 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        struct io_async_ctx *io = req->io;
        int ret;
 
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+
        sr->msg_flags = READ_ONCE(sqe->msg_flags);
        sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
        sr->len = READ_ONCE(sqe->len);
@@ -3574,9 +3538,6 @@ static int io_sendmsg(struct io_kiocb *req, bool force_nonblock)
        struct socket *sock;
        int ret;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-
        sock = sock_from_file(req->file, &ret);
        if (sock) {
                struct io_async_ctx io;
@@ -3630,9 +3591,6 @@ static int io_send(struct io_kiocb *req, bool force_nonblock)
        struct socket *sock;
        int ret;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-
        sock = sock_from_file(req->file, &ret);
        if (sock) {
                struct io_sr_msg *sr = &req->sr_msg;
@@ -3785,6 +3743,9 @@ static int io_recvmsg_prep(struct io_kiocb *req,
        struct io_async_ctx *io = req->io;
        int ret;
 
+       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
+               return -EINVAL;
+
        sr->msg_flags = READ_ONCE(sqe->msg_flags);
        sr->msg = u64_to_user_ptr(READ_ONCE(sqe->addr));
        sr->len = READ_ONCE(sqe->len);
@@ -3813,9 +3774,6 @@ static int io_recvmsg(struct io_kiocb *req, bool force_nonblock)
        struct socket *sock;
        int ret, cflags = 0;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-
        sock = sock_from_file(req->file, &ret);
        if (sock) {
                struct io_buffer *kbuf;
@@ -3877,9 +3835,6 @@ static int io_recv(struct io_kiocb *req, bool force_nonblock)
        struct socket *sock;
        int ret, cflags = 0;
 
-       if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
-               return -EINVAL;
-
        sock = sock_from_file(req->file, &ret);
        if (sock) {
                struct io_sr_msg *sr = &req->sr_msg;
@@ -3947,49 +3902,30 @@ static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        return 0;
 }
 
-static int __io_accept(struct io_kiocb *req, bool force_nonblock)
+static int io_accept(struct io_kiocb *req, bool force_nonblock)
 {
        struct io_accept *accept = &req->accept;
-       unsigned file_flags;
+       unsigned int file_flags = force_nonblock ? O_NONBLOCK : 0;
        int ret;
 
-       file_flags = force_nonblock ? O_NONBLOCK : 0;
+       if (req->file->f_flags & O_NONBLOCK)
+               req->flags |= REQ_F_NOWAIT;
+
        ret = __sys_accept4_file(req->file, file_flags, accept->addr,
                                        accept->addr_len, accept->flags,
                                        accept->nofile);
        if (ret == -EAGAIN && force_nonblock)
                return -EAGAIN;
-       if (ret == -ERESTARTSYS)
-               ret = -EINTR;
-       if (ret < 0)
+       if (ret < 0) {
+               if (ret == -ERESTARTSYS)
+                       ret = -EINTR;
                req_set_fail_links(req);
+       }
        io_cqring_add_event(req, ret);
        io_put_req(req);
        return 0;
 }
 
-static void io_accept_finish(struct io_wq_work **workptr)
-{
-       struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
-
-       if (io_req_cancelled(req))
-               return;
-       __io_accept(req, false);
-       io_steal_work(req, workptr);
-}
-
-static int io_accept(struct io_kiocb *req, bool force_nonblock)
-{
-       int ret;
-
-       ret = __io_accept(req, force_nonblock);
-       if (ret == -EAGAIN && force_nonblock) {
-               req->work.func = io_accept_finish;
-               return -EAGAIN;
-       }
-       return 0;
-}
-
 static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        struct io_connect *conn = &req->connect;
@@ -4328,7 +4264,8 @@ static void io_async_task_func(struct callback_head *cb)
        spin_unlock_irq(&ctx->completion_lock);
 
        /* restore ->work in case we need to retry again */
-       memcpy(&req->work, &apoll->work, sizeof(req->work));
+       if (req->flags & REQ_F_WORK_INITIALIZED)
+               memcpy(&req->work, &apoll->work, sizeof(req->work));
        kfree(apoll);
 
        if (!canceled) {
@@ -4425,7 +4362,8 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
                return false;
 
        req->flags |= REQ_F_POLLED;
-       memcpy(&apoll->work, &req->work, sizeof(req->work));
+       if (req->flags & REQ_F_WORK_INITIALIZED)
+               memcpy(&apoll->work, &req->work, sizeof(req->work));
        had_io = req->io != NULL;
 
        get_task_struct(current);
@@ -4450,7 +4388,8 @@ static bool io_arm_poll_handler(struct io_kiocb *req)
                if (!had_io)
                        io_poll_remove_double(req);
                spin_unlock_irq(&ctx->completion_lock);
-               memcpy(&req->work, &apoll->work, sizeof(req->work));
+               if (req->flags & REQ_F_WORK_INITIALIZED)
+                       memcpy(&req->work, &apoll->work, sizeof(req->work));
                kfree(apoll);
                return false;
        }
@@ -4495,7 +4434,9 @@ static bool io_poll_remove_one(struct io_kiocb *req)
                         * io_req_work_drop_env below when dropping the
                         * final reference.
                         */
-                       memcpy(&req->work, &apoll->work, sizeof(req->work));
+                       if (req->flags & REQ_F_WORK_INITIALIZED)
+                               memcpy(&req->work, &apoll->work,
+                                      sizeof(req->work));
                        kfree(apoll);
                }
        }
@@ -4944,6 +4885,8 @@ static int io_req_defer_prep(struct io_kiocb *req,
        if (!sqe)
                return 0;
 
+       io_req_init_async(req);
+
        if (io_op_defs[req->opcode].file_table) {
                ret = io_grab_files(req);
                if (unlikely(ret))
@@ -5381,12 +5324,26 @@ static int io_issue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
        return 0;
 }
 
+static void io_arm_async_linked_timeout(struct io_kiocb *req)
+{
+       struct io_kiocb *link;
+
+       /* link head's timeout is queued in io_queue_async_work() */
+       if (!(req->flags & REQ_F_QUEUE_TIMEOUT))
+               return;
+
+       link = list_first_entry(&req->link_list, struct io_kiocb, link_list);
+       io_queue_linked_timeout(link);
+}
+
 static void io_wq_submit_work(struct io_wq_work **workptr)
 {
        struct io_wq_work *work = *workptr;
        struct io_kiocb *req = container_of(work, struct io_kiocb, work);
        int ret = 0;
 
+       io_arm_async_linked_timeout(req);
+
        /* if NO_CANCEL is set, we must still run the work */
        if ((work->flags & (IO_WQ_WORK_CANCEL|IO_WQ_WORK_NO_CANCEL)) ==
                                IO_WQ_WORK_CANCEL) {
@@ -5437,19 +5394,20 @@ static int io_file_get(struct io_submit_state *state, struct io_kiocb *req,
                        return -EBADF;
                fd = array_index_nospec(fd, ctx->nr_user_files);
                file = io_file_from_index(ctx, fd);
-               if (!file)
-                       return -EBADF;
-               req->fixed_file_refs = ctx->file_data->cur_refs;
-               percpu_ref_get(req->fixed_file_refs);
+               if (file) {
+                       req->fixed_file_refs = ctx->file_data->cur_refs;
+                       percpu_ref_get(req->fixed_file_refs);
+               }
        } else {
                trace_io_uring_file_get(ctx, fd);
                file = __io_file_get(state, fd);
-               if (unlikely(!file))
-                       return -EBADF;
        }
 
-       *out_file = file;
-       return 0;
+       if (file || io_op_defs[req->opcode].needs_file_no_error) {
+               *out_file = file;
+               return 0;
+       }
+       return -EBADF;
 }
 
 static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req,
@@ -5583,7 +5541,8 @@ static void __io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 again:
        linked_timeout = io_prep_linked_timeout(req);
 
-       if (req->work.creds && req->work.creds != current_cred()) {
+       if ((req->flags & REQ_F_WORK_INITIALIZED) && req->work.creds &&
+           req->work.creds != current_cred()) {
                if (old_creds)
                        revert_creds(old_creds);
                if (old_creds == req->work.creds)
@@ -5606,6 +5565,8 @@ again:
                        goto exit;
                }
 punt:
+               io_req_init_async(req);
+
                if (io_op_defs[req->opcode].file_table) {
                        ret = io_grab_files(req);
                        if (ret)
@@ -5858,7 +5819,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
        refcount_set(&req->refs, 2);
        req->task = NULL;
        req->result = 0;
-       INIT_IO_WORK(&req->work, io_wq_submit_work);
 
        if (unlikely(req->opcode >= IORING_OP_LAST))
                return -EINVAL;
@@ -5880,6 +5840,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
 
        id = READ_ONCE(sqe->personality);
        if (id) {
+               io_req_init_async(req);
                req->work.creds = idr_find(&ctx->personality_idr, id);
                if (unlikely(!req->work.creds))
                        return -EINVAL;
@@ -6874,6 +6835,7 @@ static int io_init_wq_offload(struct io_ring_ctx *ctx,
 
        data.user = ctx->user;
        data.free_work = io_free_work;
+       data.do_work = io_wq_submit_work;
 
        if (!(p->flags & IORING_SETUP_ATTACH_WQ)) {
                /* Do QD, or 4 * CPUS, whatever is smallest */
@@ -7155,8 +7117,8 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg,
 
                ret = 0;
                if (!pages || nr_pages > got_pages) {
-                       kfree(vmas);
-                       kfree(pages);
+                       kvfree(vmas);
+                       kvfree(pages);
                        pages = kvmalloc_array(nr_pages, sizeof(struct page *),
                                                GFP_KERNEL);
                        vmas = kvmalloc_array(nr_pages,
index 81b43f5..1257f26 100644 (file)
@@ -261,9 +261,9 @@ TRACE_EVENT(block_bio_bounce,
  */
 TRACE_EVENT(block_bio_complete,
 
-       TP_PROTO(struct request_queue *q, struct bio *bio, int error),
+       TP_PROTO(struct request_queue *q, struct bio *bio),
 
-       TP_ARGS(q, bio, error),
+       TP_ARGS(q, bio),
 
        TP_STRUCT__entry(
                __field( dev_t,         dev             )
@@ -277,7 +277,7 @@ TRACE_EVENT(block_bio_complete,
                __entry->dev            = bio_dev(bio);
                __entry->sector         = bio->bi_iter.bi_sector;
                __entry->nr_sector      = bio_sectors(bio);
-               __entry->error          = error;
+               __entry->error          = blk_status_to_errno(bio->bi_status);
                blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
        ),
 
index 7cb09c4..02441ea 100644 (file)
@@ -928,14 +928,12 @@ int __clocksource_register_scale(struct clocksource *cs, u32 scale, u32 freq)
 
        clocksource_arch_init(cs);
 
-#ifdef CONFIG_GENERIC_VDSO_CLOCK_MODE
        if (cs->vdso_clock_mode < 0 ||
            cs->vdso_clock_mode >= VDSO_CLOCKMODE_MAX) {
                pr_warn("clocksource %s registered with invalid VDSO mode %d. Disabling VDSO support.\n",
                        cs->name, cs->vdso_clock_mode);
                cs->vdso_clock_mode = VDSO_CLOCKMODE_NONE;
        }
-#endif
 
        /* Initialize mult/shift and max_idle_ns */
        __clocksource_update_freq_scale(cs, scale, freq);
index ea47f20..5773f0b 100644 (file)
@@ -885,10 +885,10 @@ static void blk_add_trace_bio_bounce(void *ignore,
 }
 
 static void blk_add_trace_bio_complete(void *ignore,
-                                      struct request_queue *q, struct bio *bio,
-                                      int error)
+                                      struct request_queue *q, struct bio *bio)
 {
-       blk_add_trace_bio(q, bio, BLK_TA_COMPLETE, error);
+       blk_add_trace_bio(q, bio, BLK_TA_COMPLETE,
+                         blk_status_to_errno(bio->bi_status));
 }
 
 static void blk_add_trace_bio_backmerge(void *ignore,
@@ -995,8 +995,10 @@ static void blk_add_trace_split(void *ignore,
 
                __blk_add_trace(bt, bio->bi_iter.bi_sector,
                                bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf,
-                               BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu),
-                               &rpdu, blk_trace_bio_get_cgid(q, bio));
+                               BLK_TA_SPLIT,
+                               blk_status_to_errno(bio->bi_status),
+                               sizeof(rpdu), &rpdu,
+                               blk_trace_bio_get_cgid(q, bio));
        }
        rcu_read_unlock();
 }
@@ -1033,7 +1035,8 @@ static void blk_add_trace_bio_remap(void *ignore,
        r.sector_from = cpu_to_be64(from);
 
        __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size,
-                       bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status,
+                       bio_op(bio), bio->bi_opf, BLK_TA_REMAP,
+                       blk_status_to_errno(bio->bi_status),
                        sizeof(r), &r, blk_trace_bio_get_cgid(q, bio));
        rcu_read_unlock();
 }
@@ -1253,21 +1256,10 @@ static inline __u16 t_error(const struct trace_entry *ent)
 
 static __u64 get_pdu_int(const struct trace_entry *ent, bool has_cg)
 {
-       const __u64 *val = pdu_start(ent, has_cg);
+       const __be64 *val = pdu_start(ent, has_cg);
        return be64_to_cpu(*val);
 }
 
-static void get_pdu_remap(const struct trace_entry *ent,
-                         struct blk_io_trace_remap *r, bool has_cg)
-{
-       const struct blk_io_trace_remap *__r = pdu_start(ent, has_cg);
-       __u64 sector_from = __r->sector_from;
-
-       r->device_from = be32_to_cpu(__r->device_from);
-       r->device_to   = be32_to_cpu(__r->device_to);
-       r->sector_from = be64_to_cpu(sector_from);
-}
-
 typedef void (blk_log_action_t) (struct trace_iterator *iter, const char *act,
        bool has_cg);
 
@@ -1407,13 +1399,13 @@ static void blk_log_with_error(struct trace_seq *s,
 
 static void blk_log_remap(struct trace_seq *s, const struct trace_entry *ent, bool has_cg)
 {
-       struct blk_io_trace_remap r = { .device_from = 0, };
+       const struct blk_io_trace_remap *__r = pdu_start(ent, has_cg);
 
-       get_pdu_remap(ent, &r, has_cg);
        trace_seq_printf(s, "%llu + %u <- (%d,%d) %llu\n",
                         t_sector(ent), t_sec(ent),
-                        MAJOR(r.device_from), MINOR(r.device_from),
-                        (unsigned long long)r.sector_from);
+                        MAJOR(be32_to_cpu(__r->device_from)),
+                        MINOR(be32_to_cpu(__r->device_from)),
+                        be64_to_cpu(__r->sector_from));
 }
 
 static void blk_log_plug(struct trace_seq *s, const struct trace_entry *ent, bool has_cg)
index a2909af..bcc9a98 100644 (file)
@@ -38,6 +38,13 @@ static inline bool vdso_clocksource_ok(const struct vdso_data *vd)
 }
 #endif
 
+#ifndef vdso_cycles_ok
+static inline bool vdso_cycles_ok(u64 cycles)
+{
+       return true;
+}
+#endif
+
 #ifdef CONFIG_TIME_NS
 static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
                          struct __kernel_timespec *ts)
@@ -62,6 +69,8 @@ static int do_hres_timens(const struct vdso_data *vdns, clockid_t clk,
                        return -1;
 
                cycles = __arch_get_hw_counter(vd->clock_mode);
+               if (unlikely(!vdso_cycles_ok(cycles)))
+                       return -1;
                ns = vdso_ts->nsec;
                last = vd->cycle_last;
                ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
@@ -130,6 +139,8 @@ static __always_inline int do_hres(const struct vdso_data *vd, clockid_t clk,
                        return -1;
 
                cycles = __arch_get_hw_counter(vd->clock_mode);
+               if (unlikely(!vdso_cycles_ok(cycles)))
+                       return -1;
                ns = vdso_ts->nsec;
                last = vd->cycle_last;
                ns += vdso_calc_delta(cycles, last, vd->mask, vd->mult);
@@ -210,7 +221,7 @@ static __always_inline int do_coarse(const struct vdso_data *vd, clockid_t clk,
        return 0;
 }
 
-static __maybe_unused int
+static __always_inline int
 __cvdso_clock_gettime_common(const struct vdso_data *vd, clockid_t clock,
                             struct __kernel_timespec *ts)
 {