dm writecache: make writeback pause configurable
authorMikulas Patocka <mpatocka@redhat.com>
Mon, 28 Jun 2021 13:59:37 +0000 (09:59 -0400)
committerMike Snitzer <snitzer@redhat.com>
Mon, 28 Jun 2021 20:30:13 +0000 (16:30 -0400)
Commit 95b88f4d71cb953e02206be3c757083601391a0f ("dm writecache: pause
writeback if cache full and origin being written directly") introduced a
code that pauses cache flushing if we are issuing writes directly to the
origin.

Improve that initial commit by making the timeout code configurable
(via the option "pause_writeback"). Also change the default from 1s to
3s because it performed better.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Documentation/admin-guide/device-mapper/writecache.rst
drivers/md/dm-io-tracker.h
drivers/md/dm-writecache.c

index 977f82b..65427d8 100644 (file)
@@ -12,7 +12,6 @@ first sector should contain valid superblock from previous invocation.
 Constructor parameters:
 
 1. type of the cache device - "p" or "s"
-
        - p - persistent memory
        - s - SSD
 2. the underlying device that will be cached
@@ -21,7 +20,6 @@ Constructor parameters:
    size)
 5. the number of optional parameters (the parameters with an argument
    count as two)
-
        start_sector n          (default: 0)
                offset from the start of cache device in 512-byte sectors
        high_watermark n        (default: 50)
@@ -71,6 +69,9 @@ Constructor parameters:
        metadata_only
                only metadata is promoted to the cache. This option
                improves performance for heavier REQ_META workloads.
+       pause_writeback n       (default: 3000)
+               pause writeback if there was some write I/O redirected to
+               the origin volume in the last n milliseconds
 
 Status:
 1. error indicator - 0 if there was no error, otherwise error number
index 1dcf01f..bdcc627 100644 (file)
@@ -45,6 +45,18 @@ static inline bool dm_iot_idle_for(struct dm_io_tracker *iot, unsigned long j)
        return r;
 }
 
+static inline unsigned long dm_iot_idle_time(struct dm_io_tracker *iot)
+{
+       unsigned long r = 0;
+
+       spin_lock_irq(&iot->lock);
+       if (!iot->in_flight)
+               r = jiffies - iot->idle_time;
+       spin_unlock_irq(&iot->lock);
+
+       return r;
+}
+
 static inline void dm_iot_io_begin(struct dm_io_tracker *iot, sector_t len)
 {
        spin_lock_irq(&iot->lock);
index d70342c..e21e29e 100644 (file)
@@ -30,6 +30,7 @@
 #define AUTOCOMMIT_MSEC                        1000
 #define MAX_AGE_DIV                    16
 #define MAX_AGE_UNSPECIFIED            -1UL
+#define PAUSE_WRITEBACK                        (HZ * 3)
 
 #define BITMAP_GRANULARITY     65536
 #if BITMAP_GRANULARITY < PAGE_SIZE
@@ -125,6 +126,7 @@ struct dm_writecache {
        size_t freelist_high_watermark;
        size_t freelist_low_watermark;
        unsigned long max_age;
+       unsigned long pause;
 
        unsigned uncommitted_blocks;
        unsigned autocommit_blocks;
@@ -174,11 +176,13 @@ struct dm_writecache {
        bool cleaner:1;
        bool cleaner_set:1;
        bool metadata_only:1;
+       bool pause_set:1;
 
        unsigned high_wm_percent_value;
        unsigned low_wm_percent_value;
        unsigned autocommit_time_value;
        unsigned max_age_value;
+       unsigned pause_value;
 
        unsigned writeback_all;
        struct workqueue_struct *writeback_wq;
@@ -1470,9 +1474,11 @@ bio_copy:
        }
 
 unlock_remap_origin:
-       if (bio_data_dir(bio) != READ) {
-               dm_iot_io_begin(&wc->iot, 1);
-               bio->bi_private = (void *)2;
+       if (likely(wc->pause != 0)) {
+                if (bio_op(bio) == REQ_OP_WRITE) {
+                       dm_iot_io_begin(&wc->iot, 1);
+                       bio->bi_private = (void *)2;
+               }
        }
        bio_set_dev(bio, wc->dev->bdev);
        wc_unlock(wc);
@@ -1837,10 +1843,19 @@ static void writecache_writeback(struct work_struct *work)
                dm_kcopyd_client_flush(wc->dm_kcopyd);
        }
 
-       if (!wc->writeback_all && !dm_suspended(wc->ti)) {
-               while (!dm_iot_idle_for(&wc->iot, HZ)) {
-                       cond_resched();
-                       msleep(1000);
+       if (likely(wc->pause != 0)) {
+               while (1) {
+                       unsigned long idle;
+                       if (unlikely(wc->cleaner) || unlikely(wc->writeback_all) ||
+                           unlikely(dm_suspended(wc->ti)))
+                               break;
+                       idle = dm_iot_idle_time(&wc->iot);
+                       if (idle >= wc->pause)
+                               break;
+                       idle = wc->pause - idle;
+                       if (idle > HZ)
+                               idle = HZ;
+                       schedule_timeout_idle(idle);
                }
        }
 
@@ -2113,7 +2128,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
        struct wc_memory_superblock s;
 
        static struct dm_arg _args[] = {
-               {0, 17, "Invalid number of feature args"},
+               {0, 18, "Invalid number of feature args"},
        };
 
        as.argc = argc;
@@ -2206,6 +2221,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
                        goto bad;
                }
        } else {
+               wc->pause = PAUSE_WRITEBACK;
                r = mempool_init_kmalloc_pool(&wc->copy_pool, 1, sizeof(struct copy_struct));
                if (r) {
                        ti->error = "Could not allocate mempool";
@@ -2344,6 +2360,18 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
                        } else goto invalid_optional;
                } else if (!strcasecmp(string, "metadata_only")) {
                        wc->metadata_only = true;
+               } else if (!strcasecmp(string, "pause_writeback") && opt_params >= 1) {
+                       unsigned pause_msecs;
+                       if (WC_MODE_PMEM(wc))
+                               goto invalid_optional;
+                       string = dm_shift_arg(&as), opt_params--;
+                       if (sscanf(string, "%u%c", &pause_msecs, &dummy) != 1)
+                               goto invalid_optional;
+                       if (pause_msecs > 60000)
+                               goto invalid_optional;
+                       wc->pause = msecs_to_jiffies(pause_msecs);
+                       wc->pause_set = true;
+                       wc->pause_value = pause_msecs;
                } else {
 invalid_optional:
                        r = -EINVAL;
@@ -2569,6 +2597,8 @@ static void writecache_status(struct dm_target *ti, status_type_t type,
                        extra_args++;
                if (wc->metadata_only)
                        extra_args++;
+               if (wc->pause_set)
+                       extra_args += 2;
 
                DMEMIT("%u", extra_args);
                if (wc->start_sector_set)
@@ -2591,6 +2621,8 @@ static void writecache_status(struct dm_target *ti, status_type_t type,
                        DMEMIT(" %sfua", wc->writeback_fua ? "" : "no");
                if (wc->metadata_only)
                        DMEMIT(" metadata_only");
+               if (wc->pause_set)
+                       DMEMIT(" pause_writeback %u", wc->pause_value);
                break;
        }
 }