fs/btrfs/discard.c

   1 // SPDX-License-Identifier: GPL-2.0
   2
   3 #include <linux/jiffies.h>
   4 #include <linux/kernel.h>
   5 #include <linux/ktime.h>
   6 #include <linux/list.h>
   7 #include <linux/math64.h>
   8 #include <linux/sizes.h>
   9 #include <linux/workqueue.h>
  10 #include "ctree.h"
  11 #include "block-group.h"
  12 #include "discard.h"
  13 #include "free-space-cache.h"
  14
  15 /*
  16  * This contains the logic to handle async discard.
  17  *
  18  * Async discard manages trimming of free space outside of transaction commit.
  19  * Discarding is done by managing the block_groups on a LRU list based on free
  20  * space recency.  Two passes are used to first prioritize discarding extents
  21  * and then allow for trimming in the bitmap the best opportunity to coalesce.
  22  * The block_groups are maintained on multiple lists to allow for multiple
  23  * passes with different discard filter requirements.  A delayed work item is
  24  * used to manage discarding with timeout determined by a max of the delay
  25  * incurred by the iops rate limit, the byte rate limit, and the max delay of
  26  * BTRFS_DISCARD_MAX_DELAY.
  27  *
  28  * Note, this only keeps track of block_groups that are explicitly for data.
  29  * Mixed block_groups are not supported.
  30  *
  31  * The first list is special to manage discarding of fully free block groups.
  32  * This is necessary because we issue a final trim for a full free block group
  33  * after forgetting it.  When a block group becomes unused, instead of directly
  34  * being added to the unused_bgs list, we add it to this first list.  Then
  35  * from there, if it becomes fully discarded, we place it onto the unused_bgs
  36  * list.
  37  *
  38  * The in-memory free space cache serves as the backing state for discard.
  39  * Consequently this means there is no persistence.  We opt to load all the
  40  * block groups in as not discarded, so the mount case degenerates to the
  41  * crashing case.
  42  *
  43  * As the free space cache uses bitmaps, there exists a tradeoff between
  44  * ease/efficiency for find_free_extent() and the accuracy of discard state.
  45  * Here we opt to let untrimmed regions merge with everything while only letting
  46  * trimmed regions merge with other trimmed regions.  This can cause
  47  * overtrimming, but the coalescing benefit seems to be worth it.  Additionally,
  48  * bitmap state is tracked as a whole.  If we're able to fully trim a bitmap,
  49  * the trimmed flag is set on the bitmap.  Otherwise, if an allocation comes in,
  50  * this resets the state and we will retry trimming the whole bitmap.  This is a
  51  * tradeoff between discard state accuracy and the cost of accounting.
  52  */
  53
  54 /* This is an initial delay to give some chance for block reuse */
  55 #define BTRFS_DISCARD_DELAY             (120ULL * NSEC_PER_SEC)
  56 #define BTRFS_DISCARD_UNUSED_DELAY      (10ULL * NSEC_PER_SEC)
  57
  58 /* Target completion latency of discarding all discardable extents */
  59 #define BTRFS_DISCARD_TARGET_MSEC       (6 * 60 * 60UL * MSEC_PER_SEC)
  60 #define BTRFS_DISCARD_MIN_DELAY_MSEC    (1UL)
  61 #define BTRFS_DISCARD_MAX_DELAY_MSEC    (1000UL)
  62 #define BTRFS_DISCARD_MAX_IOPS          (10U)
  63
  64 /* Montonically decreasing minimum length filters after index 0 */
  65 static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = {
  66         0,
  67         BTRFS_ASYNC_DISCARD_MAX_FILTER,
  68         BTRFS_ASYNC_DISCARD_MIN_FILTER
  69 };
  70
  71 static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl,
  72                                           struct btrfs_block_group *block_group)
  73 {
  74         return &discard_ctl->discard_list[block_group->discard_index];
  75 }
  76
  77 static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
  78                                   struct btrfs_block_group *block_group)
  79 {
  80         if (!btrfs_run_discard_work(discard_ctl))
  81                 return;
  82
  83         if (list_empty(&block_group->discard_list) ||
  84             block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) {
  85                 if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED)
  86                         block_group->discard_index = BTRFS_DISCARD_INDEX_START;
  87                 block_group->discard_eligible_time = (ktime_get_ns() +
  88                                                       BTRFS_DISCARD_DELAY);
  89                 block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
  90         }
  91
  92         list_move_tail(&block_group->discard_list,
  93                        get_discard_list(discard_ctl, block_group));
  94 }
  95
  96 static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
  97                                 struct btrfs_block_group *block_group)
  98 {
  99         if (!btrfs_is_block_group_data_only(block_group))
 100                 return;
 101
 102         spin_lock(&discard_ctl->lock);
 103         __add_to_discard_list(discard_ctl, block_group);
 104         spin_unlock(&discard_ctl->lock);
 105 }
 106
 107 static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl,
 108                                        struct btrfs_block_group *block_group)
 109 {
 110         spin_lock(&discard_ctl->lock);
 111
 112         if (!btrfs_run_discard_work(discard_ctl)) {
 113                 spin_unlock(&discard_ctl->lock);
 114                 return;
 115         }
 116
 117         list_del_init(&block_group->discard_list);
 118
 119         block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
 120         block_group->discard_eligible_time = (ktime_get_ns() +
 121                                               BTRFS_DISCARD_UNUSED_DELAY);
 122         block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
 123         list_add_tail(&block_group->discard_list,
 124                       &discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]);
 125
 126         spin_unlock(&discard_ctl->lock);
 127 }
 128
 129 static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
 130                                      struct btrfs_block_group *block_group)
 131 {
 132         bool running = false;
 133
 134         spin_lock(&discard_ctl->lock);
 135
 136         if (block_group == discard_ctl->block_group) {
 137                 running = true;
 138                 discard_ctl->block_group = NULL;
 139         }
 140
 141         block_group->discard_eligible_time = 0;
 142         list_del_init(&block_group->discard_list);
 143
 144         spin_unlock(&discard_ctl->lock);
 145
 146         return running;
 147 }
 148
 149 /**
 150  * find_next_block_group - find block_group that's up next for discarding
 151  * @discard_ctl: discard control
 152  * @now: current time
 153  *
 154  * Iterate over the discard lists to find the next block_group up for
 155  * discarding checking the discard_eligible_time of block_group.
 156  */
 157 static struct btrfs_block_group *find_next_block_group(
 158                                         struct btrfs_discard_ctl *discard_ctl,
 159                                         u64 now)
 160 {
 161         struct btrfs_block_group *ret_block_group = NULL, *block_group;
 162         int i;
 163
 164         for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
 165                 struct list_head *discard_list = &discard_ctl->discard_list[i];
 166
 167                 if (!list_empty(discard_list)) {
 168                         block_group = list_first_entry(discard_list,
 169                                                        struct btrfs_block_group,
 170                                                        discard_list);
 171
 172                         if (!ret_block_group)
 173                                 ret_block_group = block_group;
 174
 175                         if (ret_block_group->discard_eligible_time < now)
 176                                 break;
 177
 178                         if (ret_block_group->discard_eligible_time >
 179                             block_group->discard_eligible_time)
 180                                 ret_block_group = block_group;
 181                 }
 182         }
 183
 184         return ret_block_group;
 185 }
 186
 187 /**
 188  * peek_discard_list - wrap find_next_block_group()
 189  * @discard_ctl: discard control
 190  * @discard_state: the discard_state of the block_group after state management
 191  * @discard_index: the discard_index of the block_group after state management
 192  *
 193  * This wraps find_next_block_group() and sets the block_group to be in use.
 194  * discard_state's control flow is managed here.  Variables related to
 195  * discard_state are reset here as needed (eg discard_cursor).  @discard_state
 196  * and @discard_index are remembered as it may change while we're discarding,
 197  * but we want the discard to execute in the context determined here.
 198  */
 199 static struct btrfs_block_group *peek_discard_list(
 200                                         struct btrfs_discard_ctl *discard_ctl,
 201                                         enum btrfs_discard_state *discard_state,
 202                                         int *discard_index)
 203 {
 204         struct btrfs_block_group *block_group;
 205         const u64 now = ktime_get_ns();
 206
 207         spin_lock(&discard_ctl->lock);
 208 again:
 209         block_group = find_next_block_group(discard_ctl, now);
 210
 211         if (block_group && now > block_group->discard_eligible_time) {
 212                 if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
 213                     block_group->used != 0) {
 214                         if (btrfs_is_block_group_data_only(block_group))
 215                                 __add_to_discard_list(discard_ctl, block_group);
 216                         else
 217                                 list_del_init(&block_group->discard_list);
 218                         goto again;
 219                 }
 220                 if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) {
 221                         block_group->discard_cursor = block_group->start;
 222                         block_group->discard_state = BTRFS_DISCARD_EXTENTS;
 223                 }
 224                 discard_ctl->block_group = block_group;
 225                 *discard_state = block_group->discard_state;
 226                 *discard_index = block_group->discard_index;
 227         } else {
 228                 block_group = NULL;
 229         }
 230
 231         spin_unlock(&discard_ctl->lock);
 232
 233         return block_group;
 234 }
 235
 236 /**
 237  * btrfs_discard_check_filter - updates a block groups filters
 238  * @block_group: block group of interest
 239  * @bytes: recently freed region size after coalescing
 240  *
 241  * Async discard maintains multiple lists with progressively smaller filters
 242  * to prioritize discarding based on size.  Should a free space that matches
 243  * a larger filter be returned to the free_space_cache, prioritize that discard
 244  * by moving @block_group to the proper filter.
 245  */
 246 void btrfs_discard_check_filter(struct btrfs_block_group *block_group,
 247                                 u64 bytes)
 248 {
 249         struct btrfs_discard_ctl *discard_ctl;
 250
 251         if (!block_group ||
 252             !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
 253                 return;
 254
 255         discard_ctl = &block_group->fs_info->discard_ctl;
 256
 257         if (block_group->discard_index > BTRFS_DISCARD_INDEX_START &&
 258             bytes >= discard_minlen[block_group->discard_index - 1]) {
 259                 int i;
 260
 261                 remove_from_discard_list(discard_ctl, block_group);
 262
 263                 for (i = BTRFS_DISCARD_INDEX_START; i < BTRFS_NR_DISCARD_LISTS;
 264                      i++) {
 265                         if (bytes >= discard_minlen[i]) {
 266                                 block_group->discard_index = i;
 267                                 add_to_discard_list(discard_ctl, block_group);
 268                                 break;
 269                         }
 270                 }
 271         }
 272 }
 273
 274 /**
 275  * btrfs_update_discard_index - moves a block group along the discard lists
 276  * @discard_ctl: discard control
 277  * @block_group: block_group of interest
 278  *
 279  * Increment @block_group's discard_index.  If it falls of the list, let it be.
 280  * Otherwise add it back to the appropriate list.
 281  */
 282 static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl,
 283                                        struct btrfs_block_group *block_group)
 284 {
 285         block_group->discard_index++;
 286         if (block_group->discard_index == BTRFS_NR_DISCARD_LISTS) {
 287                 block_group->discard_index = 1;
 288                 return;
 289         }
 290
 291         add_to_discard_list(discard_ctl, block_group);
 292 }
 293
 294 /**
 295  * btrfs_discard_cancel_work - remove a block_group from the discard lists
 296  * @discard_ctl: discard control
 297  * @block_group: block_group of interest
 298  *
 299  * This removes @block_group from the discard lists.  If necessary, it waits on
 300  * the current work and then reschedules the delayed work.
 301  */
 302 void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
 303                                struct btrfs_block_group *block_group)
 304 {
 305         if (remove_from_discard_list(discard_ctl, block_group)) {
 306                 cancel_delayed_work_sync(&discard_ctl->work);
 307                 btrfs_discard_schedule_work(discard_ctl, true);
 308         }
 309 }
 310
 311 /**
 312  * btrfs_discard_queue_work - handles queuing the block_groups
 313  * @discard_ctl: discard control
 314  * @block_group: block_group of interest
 315  *
 316  * This maintains the LRU order of the discard lists.
 317  */
 318 void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
 319                               struct btrfs_block_group *block_group)
 320 {
 321         if (!block_group || !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
 322                 return;
 323
 324         if (block_group->used == 0)
 325                 add_to_discard_unused_list(discard_ctl, block_group);
 326         else
 327                 add_to_discard_list(discard_ctl, block_group);
 328
 329         if (!delayed_work_pending(&discard_ctl->work))
 330                 btrfs_discard_schedule_work(discard_ctl, false);
 331 }
 332
 333 /**
 334  * btrfs_discard_schedule_work - responsible for scheduling the discard work
 335  * @discard_ctl: discard control
 336  * @override: override the current timer
 337  *
 338  * Discards are issued by a delayed workqueue item.  @override is used to
 339  * update the current delay as the baseline delay interval is reevaluated on
 340  * transaction commit.  This is also maxed with any other rate limit.
 341  */
 342 void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
 343                                  bool override)
 344 {
 345         struct btrfs_block_group *block_group;
 346         const u64 now = ktime_get_ns();
 347
 348         spin_lock(&discard_ctl->lock);
 349
 350         if (!btrfs_run_discard_work(discard_ctl))
 351                 goto out;
 352
 353         if (!override && delayed_work_pending(&discard_ctl->work))
 354                 goto out;
 355
 356         block_group = find_next_block_group(discard_ctl, now);
 357         if (block_group) {
 358                 unsigned long delay = discard_ctl->delay;
 359                 u32 kbps_limit = READ_ONCE(discard_ctl->kbps_limit);
 360
 361                 /*
 362                  * A single delayed workqueue item is responsible for
 363                  * discarding, so we can manage the bytes rate limit by keeping
 364                  * track of the previous discard.
 365                  */
 366                 if (kbps_limit && discard_ctl->prev_discard) {
 367                         u64 bps_limit = ((u64)kbps_limit) * SZ_1K;
 368                         u64 bps_delay = div64_u64(discard_ctl->prev_discard *
 369                                                   MSEC_PER_SEC, bps_limit);
 370
 371                         delay = max(delay, msecs_to_jiffies(bps_delay));
 372                 }
 373
 374                 /*
 375                  * This timeout is to hopefully prevent immediate discarding
 376                  * in a recently allocated block group.
 377                  */
 378                 if (now < block_group->discard_eligible_time) {
 379                         u64 bg_timeout = block_group->discard_eligible_time - now;
 380
 381                         delay = max(delay, nsecs_to_jiffies(bg_timeout));
 382                 }
 383
 384                 mod_delayed_work(discard_ctl->discard_workers,
 385                                  &discard_ctl->work, delay);
 386         }
 387 out:
 388         spin_unlock(&discard_ctl->lock);
 389 }
 390
 391 /**
 392  * btrfs_finish_discard_pass - determine next step of a block_group
 393  * @discard_ctl: discard control
 394  * @block_group: block_group of interest
 395  *
 396  * This determines the next step for a block group after it's finished going
 397  * through a pass on a discard list.  If it is unused and fully trimmed, we can
 398  * mark it unused and send it to the unused_bgs path.  Otherwise, pass it onto
 399  * the appropriate filter list or let it fall off.
 400  */
 401 static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl,
 402                                       struct btrfs_block_group *block_group)
 403 {
 404         remove_from_discard_list(discard_ctl, block_group);
 405
 406         if (block_group->used == 0) {
 407                 if (btrfs_is_free_space_trimmed(block_group))
 408                         btrfs_mark_bg_unused(block_group);
 409                 else
 410                         add_to_discard_unused_list(discard_ctl, block_group);
 411         } else {
 412                 btrfs_update_discard_index(discard_ctl, block_group);
 413         }
 414 }
 415
 416 /**
 417  * btrfs_discard_workfn - discard work function
 418  * @work: work
 419  *
 420  * This finds the next block_group to start discarding and then discards a
 421  * single region.  It does this in a two-pass fashion: first extents and second
 422  * bitmaps.  Completely discarded block groups are sent to the unused_bgs path.
 423  */
 424 static void btrfs_discard_workfn(struct work_struct *work)
 425 {
 426         struct btrfs_discard_ctl *discard_ctl;
 427         struct btrfs_block_group *block_group;
 428         enum btrfs_discard_state discard_state;
 429         int discard_index = 0;
 430         u64 trimmed = 0;
 431         u64 minlen = 0;
 432
 433         discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);
 434
 435         block_group = peek_discard_list(discard_ctl, &discard_state,
 436                                         &discard_index);
 437         if (!block_group || !btrfs_run_discard_work(discard_ctl))
 438                 return;
 439
 440         /* Perform discarding */
 441         minlen = discard_minlen[discard_index];
 442
 443         if (discard_state == BTRFS_DISCARD_BITMAPS) {
 444                 u64 maxlen = 0;
 445
 446                 /*
 447                  * Use the previous levels minimum discard length as the max
 448                  * length filter.  In the case something is added to make a
 449                  * region go beyond the max filter, the entire bitmap is set
 450                  * back to BTRFS_TRIM_STATE_UNTRIMMED.
 451                  */
 452                 if (discard_index != BTRFS_DISCARD_INDEX_UNUSED)
 453                         maxlen = discard_minlen[discard_index - 1];
 454
 455                 btrfs_trim_block_group_bitmaps(block_group, &trimmed,
 456                                        block_group->discard_cursor,
 457                                        btrfs_block_group_end(block_group),
 458                                        minlen, maxlen, true);
 459                 discard_ctl->discard_bitmap_bytes += trimmed;
 460         } else {
 461                 btrfs_trim_block_group_extents(block_group, &trimmed,
 462                                        block_group->discard_cursor,
 463                                        btrfs_block_group_end(block_group),
 464                                        minlen, true);
 465                 discard_ctl->discard_extent_bytes += trimmed;
 466         }
 467
 468         discard_ctl->prev_discard = trimmed;
 469
 470         /* Determine next steps for a block_group */
 471         if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
 472                 if (discard_state == BTRFS_DISCARD_BITMAPS) {
 473                         btrfs_finish_discard_pass(discard_ctl, block_group);
 474                 } else {
 475                         block_group->discard_cursor = block_group->start;
 476                         spin_lock(&discard_ctl->lock);
 477                         if (block_group->discard_state !=
 478                             BTRFS_DISCARD_RESET_CURSOR)
 479                                 block_group->discard_state =
 480                                                         BTRFS_DISCARD_BITMAPS;
 481                         spin_unlock(&discard_ctl->lock);
 482                 }
 483         }
 484
 485         spin_lock(&discard_ctl->lock);
 486         discard_ctl->block_group = NULL;
 487         spin_unlock(&discard_ctl->lock);
 488
 489         btrfs_discard_schedule_work(discard_ctl, false);
 490 }
 491
 492 /**
 493  * btrfs_run_discard_work - determines if async discard should be running
 494  * @discard_ctl: discard control
 495  *
 496  * Checks if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
 497  */
 498 bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
 499 {
 500         struct btrfs_fs_info *fs_info = container_of(discard_ctl,
 501                                                      struct btrfs_fs_info,
 502                                                      discard_ctl);
 503
 504         return (!(fs_info->sb->s_flags & SB_RDONLY) &&
 505                 test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
 506 }
 507
 508 /**
 509  * btrfs_discard_calc_delay - recalculate the base delay
 510  * @discard_ctl: discard control
 511  *
 512  * Recalculate the base delay which is based off the total number of
 513  * discardable_extents.  Clamp this between the lower_limit (iops_limit or 1ms)
 514  * and the upper_limit (BTRFS_DISCARD_MAX_DELAY_MSEC).
 515  */
 516 void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
 517 {
 518         s32 discardable_extents;
 519         s64 discardable_bytes;
 520         u32 iops_limit;
 521         unsigned long delay;
 522         unsigned long lower_limit = BTRFS_DISCARD_MIN_DELAY_MSEC;
 523
 524         discardable_extents = atomic_read(&discard_ctl->discardable_extents);
 525         if (!discardable_extents)
 526                 return;
 527
 528         spin_lock(&discard_ctl->lock);
 529
 530         /*
 531          * The following is to fix a potential -1 discrepenancy that we're not
 532          * sure how to reproduce. But given that this is the only place that
 533          * utilizes these numbers and this is only called by from
 534          * btrfs_finish_extent_commit() which is synchronized, we can correct
 535          * here.
 536          */
 537         if (discardable_extents < 0)
 538                 atomic_add(-discardable_extents,
 539                            &discard_ctl->discardable_extents);
 540
 541         discardable_bytes = atomic64_read(&discard_ctl->discardable_bytes);
 542         if (discardable_bytes < 0)
 543                 atomic64_add(-discardable_bytes,
 544                              &discard_ctl->discardable_bytes);
 545
 546         if (discardable_extents <= 0) {
 547                 spin_unlock(&discard_ctl->lock);
 548                 return;
 549         }
 550
 551         iops_limit = READ_ONCE(discard_ctl->iops_limit);
 552         if (iops_limit)
 553                 lower_limit = max_t(unsigned long, lower_limit,
 554                                     MSEC_PER_SEC / iops_limit);
 555
 556         delay = BTRFS_DISCARD_TARGET_MSEC / discardable_extents;
 557         delay = clamp(delay, lower_limit, BTRFS_DISCARD_MAX_DELAY_MSEC);
 558         discard_ctl->delay = msecs_to_jiffies(delay);
 559
 560         spin_unlock(&discard_ctl->lock);
 561 }
 562
 563 /**
 564  * btrfs_discard_update_discardable - propagate discard counters
 565  * @block_group: block_group of interest
 566  * @ctl: free_space_ctl of @block_group
 567  *
 568  * This propagates deltas of counters up to the discard_ctl.  It maintains a
 569  * current counter and a previous counter passing the delta up to the global
 570  * stat.  Then the current counter value becomes the previous counter value.
 571  */
 572 void btrfs_discard_update_discardable(struct btrfs_block_group *block_group,
 573                                       struct btrfs_free_space_ctl *ctl)
 574 {
 575         struct btrfs_discard_ctl *discard_ctl;
 576         s32 extents_delta;
 577         s64 bytes_delta;
 578
 579         if (!block_group ||
 580             !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC) ||
 581             !btrfs_is_block_group_data_only(block_group))
 582                 return;
 583
 584         discard_ctl = &block_group->fs_info->discard_ctl;
 585
 586         extents_delta = ctl->discardable_extents[BTRFS_STAT_CURR] -
 587                         ctl->discardable_extents[BTRFS_STAT_PREV];
 588         if (extents_delta) {
 589                 atomic_add(extents_delta, &discard_ctl->discardable_extents);
 590                 ctl->discardable_extents[BTRFS_STAT_PREV] =
 591                         ctl->discardable_extents[BTRFS_STAT_CURR];
 592         }
 593
 594         bytes_delta = ctl->discardable_bytes[BTRFS_STAT_CURR] -
 595                       ctl->discardable_bytes[BTRFS_STAT_PREV];
 596         if (bytes_delta) {
 597                 atomic64_add(bytes_delta, &discard_ctl->discardable_bytes);
 598                 ctl->discardable_bytes[BTRFS_STAT_PREV] =
 599                         ctl->discardable_bytes[BTRFS_STAT_CURR];
 600         }
 601 }
 602
 603 /**
 604  * btrfs_discard_punt_unused_bgs_list - punt unused_bgs list to discard lists
 605  * @fs_info: fs_info of interest
 606  *
 607  * The unused_bgs list needs to be punted to the discard lists because the
 608  * order of operations is changed.  In the normal sychronous discard path, the
 609  * block groups are trimmed via a single large trim in transaction commit.  This
 610  * is ultimately what we are trying to avoid with asynchronous discard.  Thus,
 611  * it must be done before going down the unused_bgs path.
 612  */
 613 void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info)
 614 {
 615         struct btrfs_block_group *block_group, *next;
 616
 617         spin_lock(&fs_info->unused_bgs_lock);
 618         /* We enabled async discard, so punt all to the queue */
 619         list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs,
 620                                  bg_list) {
 621                 list_del_init(&block_group->bg_list);
 622                 btrfs_put_block_group(block_group);
 623                 btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
 624         }
 625         spin_unlock(&fs_info->unused_bgs_lock);
 626 }
 627
 628 /**
 629  * btrfs_discard_purge_list - purge discard lists
 630  * @discard_ctl: discard control
 631  *
 632  * If we are disabling async discard, we may have intercepted block groups that
 633  * are completely free and ready for the unused_bgs path.  As discarding will
 634  * now happen in transaction commit or not at all, we can safely mark the
 635  * corresponding block groups as unused and they will be sent on their merry
 636  * way to the unused_bgs list.
 637  */
 638 static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl)
 639 {
 640         struct btrfs_block_group *block_group, *next;
 641         int i;
 642
 643         spin_lock(&discard_ctl->lock);
 644         for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
 645                 list_for_each_entry_safe(block_group, next,
 646                                          &discard_ctl->discard_list[i],
 647                                          discard_list) {
 648                         list_del_init(&block_group->discard_list);
 649                         spin_unlock(&discard_ctl->lock);
 650                         if (block_group->used == 0)
 651                                 btrfs_mark_bg_unused(block_group);
 652                         spin_lock(&discard_ctl->lock);
 653                 }
 654         }
 655         spin_unlock(&discard_ctl->lock);
 656 }
 657
 658 void btrfs_discard_resume(struct btrfs_fs_info *fs_info)
 659 {
 660         if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
 661                 btrfs_discard_cleanup(fs_info);
 662                 return;
 663         }
 664
 665         btrfs_discard_punt_unused_bgs_list(fs_info);
 666
 667         set_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
 668 }
 669
 670 void btrfs_discard_stop(struct btrfs_fs_info *fs_info)
 671 {
 672         clear_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
 673 }
 674
 675 void btrfs_discard_init(struct btrfs_fs_info *fs_info)
 676 {
 677         struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
 678         int i;
 679
 680         spin_lock_init(&discard_ctl->lock);
 681         INIT_DELAYED_WORK(&discard_ctl->work, btrfs_discard_workfn);
 682
 683         for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++)
 684                 INIT_LIST_HEAD(&discard_ctl->discard_list[i]);
 685
 686         discard_ctl->prev_discard = 0;
 687         atomic_set(&discard_ctl->discardable_extents, 0);
 688         atomic64_set(&discard_ctl->discardable_bytes, 0);
 689         discard_ctl->max_discard_size = BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE;
 690         discard_ctl->delay = BTRFS_DISCARD_MAX_DELAY_MSEC;
 691         discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS;
 692         discard_ctl->kbps_limit = 0;
 693         discard_ctl->discard_extent_bytes = 0;
 694         discard_ctl->discard_bitmap_bytes = 0;
 695         atomic64_set(&discard_ctl->discard_bytes_saved, 0);
 696 }
 697
 698 void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info)
 699 {
 700         btrfs_discard_stop(fs_info);
 701         cancel_delayed_work_sync(&fs_info->discard_ctl.work);
 702         btrfs_discard_purge_list(&fs_info->discard_ctl);
 703 }