fs/btrfs/discard.c

   1 // SPDX-License-Identifier: GPL-2.0
   2
   3 #include <linux/jiffies.h>
   4 #include <linux/kernel.h>
   5 #include <linux/ktime.h>
   6 #include <linux/list.h>
   7 #include <linux/math64.h>
   8 #include <linux/sizes.h>
   9 #include <linux/workqueue.h>
  10 #include "ctree.h"
  11 #include "block-group.h"
  12 #include "discard.h"
  13 #include "free-space-cache.h"
  14
  15 /*
  16  * This contains the logic to handle async discard.
  17  *
  18  * Async discard manages trimming of free space outside of transaction commit.
  19  * Discarding is done by managing the block_groups on a LRU list based on free
  20  * space recency.  Two passes are used to first prioritize discarding extents
  21  * and then allow for trimming in the bitmap the best opportunity to coalesce.
  22  * The block_groups are maintained on multiple lists to allow for multiple
  23  * passes with different discard filter requirements.  A delayed work item is
  24  * used to manage discarding with timeout determined by a max of the delay
  25  * incurred by the iops rate limit, the byte rate limit, and the max delay of
  26  * BTRFS_DISCARD_MAX_DELAY.
  27  *
  28  * Note, this only keeps track of block_groups that are explicitly for data.
  29  * Mixed block_groups are not supported.
  30  *
  31  * The first list is special to manage discarding of fully free block groups.
  32  * This is necessary because we issue a final trim for a full free block group
  33  * after forgetting it.  When a block group becomes unused, instead of directly
  34  * being added to the unused_bgs list, we add it to this first list.  Then
  35  * from there, if it becomes fully discarded, we place it onto the unused_bgs
  36  * list.
  37  *
  38  * The in-memory free space cache serves as the backing state for discard.
  39  * Consequently this means there is no persistence.  We opt to load all the
  40  * block groups in as not discarded, so the mount case degenerates to the
  41  * crashing case.
  42  *
  43  * As the free space cache uses bitmaps, there exists a tradeoff between
  44  * ease/efficiency for find_free_extent() and the accuracy of discard state.
  45  * Here we opt to let untrimmed regions merge with everything while only letting
  46  * trimmed regions merge with other trimmed regions.  This can cause
  47  * overtrimming, but the coalescing benefit seems to be worth it.  Additionally,
  48  * bitmap state is tracked as a whole.  If we're able to fully trim a bitmap,
  49  * the trimmed flag is set on the bitmap.  Otherwise, if an allocation comes in,
  50  * this resets the state and we will retry trimming the whole bitmap.  This is a
  51  * tradeoff between discard state accuracy and the cost of accounting.
  52  */
  53
  54 /* This is an initial delay to give some chance for block reuse */
  55 #define BTRFS_DISCARD_DELAY             (120ULL * NSEC_PER_SEC)
  56 #define BTRFS_DISCARD_UNUSED_DELAY      (10ULL * NSEC_PER_SEC)
  57
  58 /* Target completion latency of discarding all discardable extents */
  59 #define BTRFS_DISCARD_TARGET_MSEC       (6 * 60 * 60UL * MSEC_PER_SEC)
  60 #define BTRFS_DISCARD_MIN_DELAY_MSEC    (1UL)
  61 #define BTRFS_DISCARD_MAX_DELAY_MSEC    (1000UL)
  62 #define BTRFS_DISCARD_MAX_IOPS          (10U)
  63
  64 /* Montonically decreasing minimum length filters after index 0 */
  65 static int discard_minlen[BTRFS_NR_DISCARD_LISTS] = {
  66         0,
  67         BTRFS_ASYNC_DISCARD_MAX_FILTER,
  68         BTRFS_ASYNC_DISCARD_MIN_FILTER
  69 };
  70
  71 static struct list_head *get_discard_list(struct btrfs_discard_ctl *discard_ctl,
  72                                           struct btrfs_block_group *block_group)
  73 {
  74         return &discard_ctl->discard_list[block_group->discard_index];
  75 }
  76
  77 static void __add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
  78                                   struct btrfs_block_group *block_group)
  79 {
  80         if (!btrfs_run_discard_work(discard_ctl))
  81                 return;
  82
  83         if (list_empty(&block_group->discard_list) ||
  84             block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED) {
  85                 if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED)
  86                         block_group->discard_index = BTRFS_DISCARD_INDEX_START;
  87                 block_group->discard_eligible_time = (ktime_get_ns() +
  88                                                       BTRFS_DISCARD_DELAY);
  89                 block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
  90         }
  91
  92         list_move_tail(&block_group->discard_list,
  93                        get_discard_list(discard_ctl, block_group));
  94 }
  95
  96 static void add_to_discard_list(struct btrfs_discard_ctl *discard_ctl,
  97                                 struct btrfs_block_group *block_group)
  98 {
  99         if (!btrfs_is_block_group_data_only(block_group))
 100                 return;
 101
 102         spin_lock(&discard_ctl->lock);
 103         __add_to_discard_list(discard_ctl, block_group);
 104         spin_unlock(&discard_ctl->lock);
 105 }
 106
 107 static void add_to_discard_unused_list(struct btrfs_discard_ctl *discard_ctl,
 108                                        struct btrfs_block_group *block_group)
 109 {
 110         spin_lock(&discard_ctl->lock);
 111
 112         if (!btrfs_run_discard_work(discard_ctl)) {
 113                 spin_unlock(&discard_ctl->lock);
 114                 return;
 115         }
 116
 117         list_del_init(&block_group->discard_list);
 118
 119         block_group->discard_index = BTRFS_DISCARD_INDEX_UNUSED;
 120         block_group->discard_eligible_time = (ktime_get_ns() +
 121                                               BTRFS_DISCARD_UNUSED_DELAY);
 122         block_group->discard_state = BTRFS_DISCARD_RESET_CURSOR;
 123         list_add_tail(&block_group->discard_list,
 124                       &discard_ctl->discard_list[BTRFS_DISCARD_INDEX_UNUSED]);
 125
 126         spin_unlock(&discard_ctl->lock);
 127 }
 128
 129 static bool remove_from_discard_list(struct btrfs_discard_ctl *discard_ctl,
 130                                      struct btrfs_block_group *block_group)
 131 {
 132         bool running = false;
 133
 134         spin_lock(&discard_ctl->lock);
 135
 136         if (block_group == discard_ctl->block_group) {
 137                 running = true;
 138                 discard_ctl->block_group = NULL;
 139         }
 140
 141         block_group->discard_eligible_time = 0;
 142         list_del_init(&block_group->discard_list);
 143
 144         spin_unlock(&discard_ctl->lock);
 145
 146         return running;
 147 }
 148
 149 /**
 150  * find_next_block_group - find block_group that's up next for discarding
 151  * @discard_ctl: discard control
 152  * @now: current time
 153  *
 154  * Iterate over the discard lists to find the next block_group up for
 155  * discarding checking the discard_eligible_time of block_group.
 156  */
 157 static struct btrfs_block_group *find_next_block_group(
 158                                         struct btrfs_discard_ctl *discard_ctl,
 159                                         u64 now)
 160 {
 161         struct btrfs_block_group *ret_block_group = NULL, *block_group;
 162         int i;
 163
 164         for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
 165                 struct list_head *discard_list = &discard_ctl->discard_list[i];
 166
 167                 if (!list_empty(discard_list)) {
 168                         block_group = list_first_entry(discard_list,
 169                                                        struct btrfs_block_group,
 170                                                        discard_list);
 171
 172                         if (!ret_block_group)
 173                                 ret_block_group = block_group;
 174
 175                         if (ret_block_group->discard_eligible_time < now)
 176                                 break;
 177
 178                         if (ret_block_group->discard_eligible_time >
 179                             block_group->discard_eligible_time)
 180                                 ret_block_group = block_group;
 181                 }
 182         }
 183
 184         return ret_block_group;
 185 }
 186
 187 /**
 188  * peek_discard_list - wrap find_next_block_group()
 189  * @discard_ctl: discard control
 190  * @discard_state: the discard_state of the block_group after state management
 191  * @discard_index: the discard_index of the block_group after state management
 192  *
 193  * This wraps find_next_block_group() and sets the block_group to be in use.
 194  * discard_state's control flow is managed here.  Variables related to
 195  * discard_state are reset here as needed (eg discard_cursor).  @discard_state
 196  * and @discard_index are remembered as it may change while we're discarding,
 197  * but we want the discard to execute in the context determined here.
 198  */
 199 static struct btrfs_block_group *peek_discard_list(
 200                                         struct btrfs_discard_ctl *discard_ctl,
 201                                         enum btrfs_discard_state *discard_state,
 202                                         int *discard_index)
 203 {
 204         struct btrfs_block_group *block_group;
 205         const u64 now = ktime_get_ns();
 206
 207         spin_lock(&discard_ctl->lock);
 208 again:
 209         block_group = find_next_block_group(discard_ctl, now);
 210
 211         if (block_group && now > block_group->discard_eligible_time) {
 212                 if (block_group->discard_index == BTRFS_DISCARD_INDEX_UNUSED &&
 213                     block_group->used != 0) {
 214                         if (btrfs_is_block_group_data_only(block_group))
 215                                 __add_to_discard_list(discard_ctl, block_group);
 216                         else
 217                                 list_del_init(&block_group->discard_list);
 218                         goto again;
 219                 }
 220                 if (block_group->discard_state == BTRFS_DISCARD_RESET_CURSOR) {
 221                         block_group->discard_cursor = block_group->start;
 222                         block_group->discard_state = BTRFS_DISCARD_EXTENTS;
 223                 }
 224                 discard_ctl->block_group = block_group;
 225                 *discard_state = block_group->discard_state;
 226                 *discard_index = block_group->discard_index;
 227         } else {
 228                 block_group = NULL;
 229         }
 230
 231         spin_unlock(&discard_ctl->lock);
 232
 233         return block_group;
 234 }
 235
 236 /**
 237  * btrfs_discard_check_filter - updates a block groups filters
 238  * @block_group: block group of interest
 239  * @bytes: recently freed region size after coalescing
 240  *
 241  * Async discard maintains multiple lists with progressively smaller filters
 242  * to prioritize discarding based on size.  Should a free space that matches
 243  * a larger filter be returned to the free_space_cache, prioritize that discard
 244  * by moving @block_group to the proper filter.
 245  */
 246 void btrfs_discard_check_filter(struct btrfs_block_group *block_group,
 247                                 u64 bytes)
 248 {
 249         struct btrfs_discard_ctl *discard_ctl;
 250
 251         if (!block_group ||
 252             !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
 253                 return;
 254
 255         discard_ctl = &block_group->fs_info->discard_ctl;
 256
 257         if (block_group->discard_index > BTRFS_DISCARD_INDEX_START &&
 258             bytes >= discard_minlen[block_group->discard_index - 1]) {
 259                 int i;
 260
 261                 remove_from_discard_list(discard_ctl, block_group);
 262
 263                 for (i = BTRFS_DISCARD_INDEX_START; i < BTRFS_NR_DISCARD_LISTS;
 264                      i++) {
 265                         if (bytes >= discard_minlen[i]) {
 266                                 block_group->discard_index = i;
 267                                 add_to_discard_list(discard_ctl, block_group);
 268                                 break;
 269                         }
 270                 }
 271         }
 272 }
 273
 274 /**
 275  * btrfs_update_discard_index - moves a block group along the discard lists
 276  * @discard_ctl: discard control
 277  * @block_group: block_group of interest
 278  *
 279  * Increment @block_group's discard_index.  If it falls of the list, let it be.
 280  * Otherwise add it back to the appropriate list.
 281  */
 282 static void btrfs_update_discard_index(struct btrfs_discard_ctl *discard_ctl,
 283                                        struct btrfs_block_group *block_group)
 284 {
 285         block_group->discard_index++;
 286         if (block_group->discard_index == BTRFS_NR_DISCARD_LISTS) {
 287                 block_group->discard_index = 1;
 288                 return;
 289         }
 290
 291         add_to_discard_list(discard_ctl, block_group);
 292 }
 293
 294 /**
 295  * btrfs_discard_cancel_work - remove a block_group from the discard lists
 296  * @discard_ctl: discard control
 297  * @block_group: block_group of interest
 298  *
 299  * This removes @block_group from the discard lists.  If necessary, it waits on
 300  * the current work and then reschedules the delayed work.
 301  */
 302 void btrfs_discard_cancel_work(struct btrfs_discard_ctl *discard_ctl,
 303                                struct btrfs_block_group *block_group)
 304 {
 305         if (remove_from_discard_list(discard_ctl, block_group)) {
 306                 cancel_delayed_work_sync(&discard_ctl->work);
 307                 btrfs_discard_schedule_work(discard_ctl, true);
 308         }
 309 }
 310
 311 /**
 312  * btrfs_discard_queue_work - handles queuing the block_groups
 313  * @discard_ctl: discard control
 314  * @block_group: block_group of interest
 315  *
 316  * This maintains the LRU order of the discard lists.
 317  */
 318 void btrfs_discard_queue_work(struct btrfs_discard_ctl *discard_ctl,
 319                               struct btrfs_block_group *block_group)
 320 {
 321         if (!block_group || !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC))
 322                 return;
 323
 324         if (block_group->used == 0)
 325                 add_to_discard_unused_list(discard_ctl, block_group);
 326         else
 327                 add_to_discard_list(discard_ctl, block_group);
 328
 329         if (!delayed_work_pending(&discard_ctl->work))
 330                 btrfs_discard_schedule_work(discard_ctl, false);
 331 }
 332
 333 /**
 334  * btrfs_discard_schedule_work - responsible for scheduling the discard work
 335  * @discard_ctl: discard control
 336  * @override: override the current timer
 337  *
 338  * Discards are issued by a delayed workqueue item.  @override is used to
 339  * update the current delay as the baseline delay interval is reevaluated on
 340  * transaction commit.  This is also maxed with any other rate limit.
 341  */
 342 void btrfs_discard_schedule_work(struct btrfs_discard_ctl *discard_ctl,
 343                                  bool override)
 344 {
 345         struct btrfs_block_group *block_group;
 346         const u64 now = ktime_get_ns();
 347
 348         spin_lock(&discard_ctl->lock);
 349
 350         if (!btrfs_run_discard_work(discard_ctl))
 351                 goto out;
 352
 353         if (!override && delayed_work_pending(&discard_ctl->work))
 354                 goto out;
 355
 356         block_group = find_next_block_group(discard_ctl, now);
 357         if (block_group) {
 358                 u64 delay = discard_ctl->delay_ms * NSEC_PER_MSEC;
 359                 u32 kbps_limit = READ_ONCE(discard_ctl->kbps_limit);
 360
 361                 /*
 362                  * A single delayed workqueue item is responsible for
 363                  * discarding, so we can manage the bytes rate limit by keeping
 364                  * track of the previous discard.
 365                  */
 366                 if (kbps_limit && discard_ctl->prev_discard) {
 367                         u64 bps_limit = ((u64)kbps_limit) * SZ_1K;
 368                         u64 bps_delay = div64_u64(discard_ctl->prev_discard *
 369                                                   NSEC_PER_SEC, bps_limit);
 370
 371                         delay = max(delay, bps_delay);
 372                 }
 373
 374                 /*
 375                  * This timeout is to hopefully prevent immediate discarding
 376                  * in a recently allocated block group.
 377                  */
 378                 if (now < block_group->discard_eligible_time) {
 379                         u64 bg_timeout = block_group->discard_eligible_time - now;
 380
 381                         delay = max(delay, bg_timeout);
 382                 }
 383
 384                 if (override && discard_ctl->prev_discard) {
 385                         u64 elapsed = now - discard_ctl->prev_discard_time;
 386
 387                         if (delay > elapsed)
 388                                 delay -= elapsed;
 389                         else
 390                                 delay = 0;
 391                 }
 392
 393                 mod_delayed_work(discard_ctl->discard_workers,
 394                                  &discard_ctl->work, nsecs_to_jiffies(delay));
 395         }
 396 out:
 397         spin_unlock(&discard_ctl->lock);
 398 }
 399
 400 /**
 401  * btrfs_finish_discard_pass - determine next step of a block_group
 402  * @discard_ctl: discard control
 403  * @block_group: block_group of interest
 404  *
 405  * This determines the next step for a block group after it's finished going
 406  * through a pass on a discard list.  If it is unused and fully trimmed, we can
 407  * mark it unused and send it to the unused_bgs path.  Otherwise, pass it onto
 408  * the appropriate filter list or let it fall off.
 409  */
 410 static void btrfs_finish_discard_pass(struct btrfs_discard_ctl *discard_ctl,
 411                                       struct btrfs_block_group *block_group)
 412 {
 413         remove_from_discard_list(discard_ctl, block_group);
 414
 415         if (block_group->used == 0) {
 416                 if (btrfs_is_free_space_trimmed(block_group))
 417                         btrfs_mark_bg_unused(block_group);
 418                 else
 419                         add_to_discard_unused_list(discard_ctl, block_group);
 420         } else {
 421                 btrfs_update_discard_index(discard_ctl, block_group);
 422         }
 423 }
 424
 425 /**
 426  * btrfs_discard_workfn - discard work function
 427  * @work: work
 428  *
 429  * This finds the next block_group to start discarding and then discards a
 430  * single region.  It does this in a two-pass fashion: first extents and second
 431  * bitmaps.  Completely discarded block groups are sent to the unused_bgs path.
 432  */
 433 static void btrfs_discard_workfn(struct work_struct *work)
 434 {
 435         struct btrfs_discard_ctl *discard_ctl;
 436         struct btrfs_block_group *block_group;
 437         enum btrfs_discard_state discard_state;
 438         int discard_index = 0;
 439         u64 trimmed = 0;
 440         u64 minlen = 0;
 441
 442         discard_ctl = container_of(work, struct btrfs_discard_ctl, work.work);
 443
 444         block_group = peek_discard_list(discard_ctl, &discard_state,
 445                                         &discard_index);
 446         if (!block_group || !btrfs_run_discard_work(discard_ctl))
 447                 return;
 448
 449         /* Perform discarding */
 450         minlen = discard_minlen[discard_index];
 451
 452         if (discard_state == BTRFS_DISCARD_BITMAPS) {
 453                 u64 maxlen = 0;
 454
 455                 /*
 456                  * Use the previous levels minimum discard length as the max
 457                  * length filter.  In the case something is added to make a
 458                  * region go beyond the max filter, the entire bitmap is set
 459                  * back to BTRFS_TRIM_STATE_UNTRIMMED.
 460                  */
 461                 if (discard_index != BTRFS_DISCARD_INDEX_UNUSED)
 462                         maxlen = discard_minlen[discard_index - 1];
 463
 464                 btrfs_trim_block_group_bitmaps(block_group, &trimmed,
 465                                        block_group->discard_cursor,
 466                                        btrfs_block_group_end(block_group),
 467                                        minlen, maxlen, true);
 468                 discard_ctl->discard_bitmap_bytes += trimmed;
 469         } else {
 470                 btrfs_trim_block_group_extents(block_group, &trimmed,
 471                                        block_group->discard_cursor,
 472                                        btrfs_block_group_end(block_group),
 473                                        minlen, true);
 474                 discard_ctl->discard_extent_bytes += trimmed;
 475         }
 476
 477         /*
 478          * Updated without locks as this is inside the workfn and nothing else
 479          * is reading the values
 480          */
 481         discard_ctl->prev_discard = trimmed;
 482         discard_ctl->prev_discard_time = ktime_get_ns();
 483
 484         /* Determine next steps for a block_group */
 485         if (block_group->discard_cursor >= btrfs_block_group_end(block_group)) {
 486                 if (discard_state == BTRFS_DISCARD_BITMAPS) {
 487                         btrfs_finish_discard_pass(discard_ctl, block_group);
 488                 } else {
 489                         block_group->discard_cursor = block_group->start;
 490                         spin_lock(&discard_ctl->lock);
 491                         if (block_group->discard_state !=
 492                             BTRFS_DISCARD_RESET_CURSOR)
 493                                 block_group->discard_state =
 494                                                         BTRFS_DISCARD_BITMAPS;
 495                         spin_unlock(&discard_ctl->lock);
 496                 }
 497         }
 498
 499         spin_lock(&discard_ctl->lock);
 500         discard_ctl->block_group = NULL;
 501         spin_unlock(&discard_ctl->lock);
 502
 503         btrfs_discard_schedule_work(discard_ctl, false);
 504 }
 505
 506 /**
 507  * btrfs_run_discard_work - determines if async discard should be running
 508  * @discard_ctl: discard control
 509  *
 510  * Checks if the file system is writeable and BTRFS_FS_DISCARD_RUNNING is set.
 511  */
 512 bool btrfs_run_discard_work(struct btrfs_discard_ctl *discard_ctl)
 513 {
 514         struct btrfs_fs_info *fs_info = container_of(discard_ctl,
 515                                                      struct btrfs_fs_info,
 516                                                      discard_ctl);
 517
 518         return (!(fs_info->sb->s_flags & SB_RDONLY) &&
 519                 test_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags));
 520 }
 521
 522 /**
 523  * btrfs_discard_calc_delay - recalculate the base delay
 524  * @discard_ctl: discard control
 525  *
 526  * Recalculate the base delay which is based off the total number of
 527  * discardable_extents.  Clamp this between the lower_limit (iops_limit or 1ms)
 528  * and the upper_limit (BTRFS_DISCARD_MAX_DELAY_MSEC).
 529  */
 530 void btrfs_discard_calc_delay(struct btrfs_discard_ctl *discard_ctl)
 531 {
 532         s32 discardable_extents;
 533         s64 discardable_bytes;
 534         u32 iops_limit;
 535         unsigned long delay;
 536
 537         discardable_extents = atomic_read(&discard_ctl->discardable_extents);
 538         if (!discardable_extents)
 539                 return;
 540
 541         spin_lock(&discard_ctl->lock);
 542
 543         /*
 544          * The following is to fix a potential -1 discrepenancy that we're not
 545          * sure how to reproduce. But given that this is the only place that
 546          * utilizes these numbers and this is only called by from
 547          * btrfs_finish_extent_commit() which is synchronized, we can correct
 548          * here.
 549          */
 550         if (discardable_extents < 0)
 551                 atomic_add(-discardable_extents,
 552                            &discard_ctl->discardable_extents);
 553
 554         discardable_bytes = atomic64_read(&discard_ctl->discardable_bytes);
 555         if (discardable_bytes < 0)
 556                 atomic64_add(-discardable_bytes,
 557                              &discard_ctl->discardable_bytes);
 558
 559         if (discardable_extents <= 0) {
 560                 spin_unlock(&discard_ctl->lock);
 561                 return;
 562         }
 563
 564         iops_limit = READ_ONCE(discard_ctl->iops_limit);
 565         if (iops_limit)
 566                 delay = MSEC_PER_SEC / iops_limit;
 567         else
 568                 delay = BTRFS_DISCARD_TARGET_MSEC / discardable_extents;
 569
 570         delay = clamp(delay, BTRFS_DISCARD_MIN_DELAY_MSEC,
 571                       BTRFS_DISCARD_MAX_DELAY_MSEC);
 572         discard_ctl->delay_ms = delay;
 573
 574         spin_unlock(&discard_ctl->lock);
 575 }
 576
 577 /**
 578  * btrfs_discard_update_discardable - propagate discard counters
 579  * @block_group: block_group of interest
 580  *
 581  * This propagates deltas of counters up to the discard_ctl.  It maintains a
 582  * current counter and a previous counter passing the delta up to the global
 583  * stat.  Then the current counter value becomes the previous counter value.
 584  */
 585 void btrfs_discard_update_discardable(struct btrfs_block_group *block_group)
 586 {
 587         struct btrfs_free_space_ctl *ctl;
 588         struct btrfs_discard_ctl *discard_ctl;
 589         s32 extents_delta;
 590         s64 bytes_delta;
 591
 592         if (!block_group ||
 593             !btrfs_test_opt(block_group->fs_info, DISCARD_ASYNC) ||
 594             !btrfs_is_block_group_data_only(block_group))
 595                 return;
 596
 597         ctl = block_group->free_space_ctl;
 598         discard_ctl = &block_group->fs_info->discard_ctl;
 599
 600         lockdep_assert_held(&ctl->tree_lock);
 601         extents_delta = ctl->discardable_extents[BTRFS_STAT_CURR] -
 602                         ctl->discardable_extents[BTRFS_STAT_PREV];
 603         if (extents_delta) {
 604                 atomic_add(extents_delta, &discard_ctl->discardable_extents);
 605                 ctl->discardable_extents[BTRFS_STAT_PREV] =
 606                         ctl->discardable_extents[BTRFS_STAT_CURR];
 607         }
 608
 609         bytes_delta = ctl->discardable_bytes[BTRFS_STAT_CURR] -
 610                       ctl->discardable_bytes[BTRFS_STAT_PREV];
 611         if (bytes_delta) {
 612                 atomic64_add(bytes_delta, &discard_ctl->discardable_bytes);
 613                 ctl->discardable_bytes[BTRFS_STAT_PREV] =
 614                         ctl->discardable_bytes[BTRFS_STAT_CURR];
 615         }
 616 }
 617
 618 /**
 619  * btrfs_discard_punt_unused_bgs_list - punt unused_bgs list to discard lists
 620  * @fs_info: fs_info of interest
 621  *
 622  * The unused_bgs list needs to be punted to the discard lists because the
 623  * order of operations is changed.  In the normal sychronous discard path, the
 624  * block groups are trimmed via a single large trim in transaction commit.  This
 625  * is ultimately what we are trying to avoid with asynchronous discard.  Thus,
 626  * it must be done before going down the unused_bgs path.
 627  */
 628 void btrfs_discard_punt_unused_bgs_list(struct btrfs_fs_info *fs_info)
 629 {
 630         struct btrfs_block_group *block_group, *next;
 631
 632         spin_lock(&fs_info->unused_bgs_lock);
 633         /* We enabled async discard, so punt all to the queue */
 634         list_for_each_entry_safe(block_group, next, &fs_info->unused_bgs,
 635                                  bg_list) {
 636                 list_del_init(&block_group->bg_list);
 637                 btrfs_put_block_group(block_group);
 638                 btrfs_discard_queue_work(&fs_info->discard_ctl, block_group);
 639         }
 640         spin_unlock(&fs_info->unused_bgs_lock);
 641 }
 642
 643 /**
 644  * btrfs_discard_purge_list - purge discard lists
 645  * @discard_ctl: discard control
 646  *
 647  * If we are disabling async discard, we may have intercepted block groups that
 648  * are completely free and ready for the unused_bgs path.  As discarding will
 649  * now happen in transaction commit or not at all, we can safely mark the
 650  * corresponding block groups as unused and they will be sent on their merry
 651  * way to the unused_bgs list.
 652  */
 653 static void btrfs_discard_purge_list(struct btrfs_discard_ctl *discard_ctl)
 654 {
 655         struct btrfs_block_group *block_group, *next;
 656         int i;
 657
 658         spin_lock(&discard_ctl->lock);
 659         for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++) {
 660                 list_for_each_entry_safe(block_group, next,
 661                                          &discard_ctl->discard_list[i],
 662                                          discard_list) {
 663                         list_del_init(&block_group->discard_list);
 664                         spin_unlock(&discard_ctl->lock);
 665                         if (block_group->used == 0)
 666                                 btrfs_mark_bg_unused(block_group);
 667                         spin_lock(&discard_ctl->lock);
 668                 }
 669         }
 670         spin_unlock(&discard_ctl->lock);
 671 }
 672
 673 void btrfs_discard_resume(struct btrfs_fs_info *fs_info)
 674 {
 675         if (!btrfs_test_opt(fs_info, DISCARD_ASYNC)) {
 676                 btrfs_discard_cleanup(fs_info);
 677                 return;
 678         }
 679
 680         btrfs_discard_punt_unused_bgs_list(fs_info);
 681
 682         set_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
 683 }
 684
 685 void btrfs_discard_stop(struct btrfs_fs_info *fs_info)
 686 {
 687         clear_bit(BTRFS_FS_DISCARD_RUNNING, &fs_info->flags);
 688 }
 689
 690 void btrfs_discard_init(struct btrfs_fs_info *fs_info)
 691 {
 692         struct btrfs_discard_ctl *discard_ctl = &fs_info->discard_ctl;
 693         int i;
 694
 695         spin_lock_init(&discard_ctl->lock);
 696         INIT_DELAYED_WORK(&discard_ctl->work, btrfs_discard_workfn);
 697
 698         for (i = 0; i < BTRFS_NR_DISCARD_LISTS; i++)
 699                 INIT_LIST_HEAD(&discard_ctl->discard_list[i]);
 700
 701         discard_ctl->prev_discard = 0;
 702         discard_ctl->prev_discard_time = 0;
 703         atomic_set(&discard_ctl->discardable_extents, 0);
 704         atomic64_set(&discard_ctl->discardable_bytes, 0);
 705         discard_ctl->max_discard_size = BTRFS_ASYNC_DISCARD_DEFAULT_MAX_SIZE;
 706         discard_ctl->delay_ms = BTRFS_DISCARD_MAX_DELAY_MSEC;
 707         discard_ctl->iops_limit = BTRFS_DISCARD_MAX_IOPS;
 708         discard_ctl->kbps_limit = 0;
 709         discard_ctl->discard_extent_bytes = 0;
 710         discard_ctl->discard_bitmap_bytes = 0;
 711         atomic64_set(&discard_ctl->discard_bytes_saved, 0);
 712 }
 713
 714 void btrfs_discard_cleanup(struct btrfs_fs_info *fs_info)
 715 {
 716         btrfs_discard_stop(fs_info);
 717         cancel_delayed_work_sync(&fs_info->discard_ctl.work);
 718         btrfs_discard_purge_list(&fs_info->discard_ctl);
 719 }