bcachefs: Fix for buffered writes getting -ENOSPC
authorKent Overstreet <kent.overstreet@gmail.com>
Thu, 20 May 2021 19:49:23 +0000 (15:49 -0400)
committerKent Overstreet <kent.overstreet@linux.dev>
Sun, 22 Oct 2023 21:09:04 +0000 (17:09 -0400)
Buffered writes may have to increase their disk reservation at btree
update time, due to compression and erasure coding being unpredictable:
O_DIRECT writes should be checking for -ENOSPC, but buffered writes have
already been accepted and should not.

Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com>
fs/bcachefs/btree_update_leaf.c
fs/bcachefs/fs-io.c
fs/bcachefs/io.c
fs/bcachefs/io.h
fs/bcachefs/reflink.c

index cc844ca..3c4bf13 100644 (file)
@@ -690,6 +690,7 @@ int bch2_trans_commit_error(struct btree_trans *trans,
                }
                break;
        case BTREE_INSERT_ENOSPC:
+               BUG_ON(flags & BTREE_INSERT_NOFAIL);
                ret = -ENOSPC;
                break;
        case BTREE_INSERT_NEED_MARK_REPLICAS:
@@ -743,6 +744,8 @@ int bch2_trans_commit_error(struct btree_trans *trans,
                break;
        }
 
+       BUG_ON(ret == -ENOSPC && (flags & BTREE_INSERT_NOFAIL));
+
        return ret;
 }
 
index 162f0ee..45e58ba 100644 (file)
@@ -1910,6 +1910,7 @@ static long bch2_dio_write_loop(struct dio_write *dio)
                if ((req->ki_flags & IOCB_DSYNC) &&
                    !c->opts.journal_flush_disabled)
                        dio->op.flags |= BCH_WRITE_FLUSH;
+               dio->op.flags |= BCH_WRITE_CHECK_ENOSPC;
 
                ret = bch2_disk_reservation_get(c, &dio->op.res, bio_sectors(bio),
                                                dio->op.opts.data_replicas, 0);
@@ -2725,7 +2726,7 @@ static int __bchfs_fallocate(struct bch_inode_info *inode, int mode,
 
                ret = bch2_extent_update(&trans, iter, &reservation.k_i,
                                &disk_res, &inode->ei_journal_seq,
-                               0, &i_sectors_delta);
+                               0, &i_sectors_delta, true);
                i_sectors_acct(c, inode, &quota_res, i_sectors_delta);
 bkey_err:
                bch2_quota_reservation_put(c, inode, &quota_res);
index 5a45e73..eafefb6 100644 (file)
@@ -197,7 +197,7 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans,
                               struct btree_iter *extent_iter,
                               struct bkey_i *new,
                               bool *maybe_extending,
-                              bool *should_check_enospc,
+                              bool *usage_increasing,
                               s64 *i_sectors_delta,
                               s64 *disk_sectors_delta)
 {
@@ -209,7 +209,7 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans,
        int ret = 0;
 
        *maybe_extending        = true;
-       *should_check_enospc    = false;
+       *usage_increasing       = false;
        *i_sectors_delta        = 0;
        *disk_sectors_delta     = 0;
 
@@ -229,10 +229,10 @@ int bch2_sum_sector_overwrites(struct btree_trans *trans,
                        ? sectors * bch2_bkey_nr_ptrs_fully_allocated(old)
                        : 0;
 
-               if (!*should_check_enospc &&
+               if (!*usage_increasing &&
                    (new_replicas > bch2_bkey_replicas(c, old) ||
                     (!new_compressed && bch2_bkey_sectors_compressed(old))))
-                       *should_check_enospc = true;
+                       *usage_increasing = true;
 
                if (bkey_cmp(old.k->p, new->k.p) >= 0) {
                        /*
@@ -267,11 +267,12 @@ int bch2_extent_update(struct btree_trans *trans,
                       struct disk_reservation *disk_res,
                       u64 *journal_seq,
                       u64 new_i_size,
-                      s64 *i_sectors_delta_total)
+                      s64 *i_sectors_delta_total,
+                      bool check_enospc)
 {
        /* this must live until after bch2_trans_commit(): */
        struct bkey_inode_buf inode_p;
-       bool extending = false, should_check_enospc;
+       bool extending = false, usage_increasing;
        s64 i_sectors_delta = 0, disk_sectors_delta = 0;
        int ret;
 
@@ -281,17 +282,20 @@ int bch2_extent_update(struct btree_trans *trans,
 
        ret = bch2_sum_sector_overwrites(trans, iter, k,
                        &extending,
-                       &should_check_enospc,
+                       &usage_increasing,
                        &i_sectors_delta,
                        &disk_sectors_delta);
        if (ret)
                return ret;
 
+       if (!usage_increasing)
+               check_enospc = false;
+
        if (disk_res &&
            disk_sectors_delta > (s64) disk_res->sectors) {
                ret = bch2_disk_reservation_add(trans->c, disk_res,
                                        disk_sectors_delta - disk_res->sectors,
-                                       !should_check_enospc
+                                       !check_enospc
                                        ? BCH_DISK_RESERVATION_NOFAIL : 0);
                if (ret)
                        return ret;
@@ -346,6 +350,7 @@ int bch2_extent_update(struct btree_trans *trans,
                bch2_trans_commit(trans, disk_res, journal_seq,
                                BTREE_INSERT_NOCHECK_RW|
                                BTREE_INSERT_NOFAIL);
+       BUG_ON(ret == -ENOSPC);
        if (ret)
                return ret;
 
@@ -384,7 +389,7 @@ int bch2_fpunch_at(struct btree_trans *trans, struct btree_iter *iter,
 
                ret = bch2_extent_update(trans, iter, &delete,
                                &disk_res, journal_seq,
-                               0, i_sectors_delta);
+                               0, i_sectors_delta, false);
                bch2_disk_reservation_put(c, &disk_res);
 btree_err:
                if (ret == -EINTR) {
@@ -457,7 +462,8 @@ int bch2_write_index_default(struct bch_write_op *op)
 
                ret = bch2_extent_update(&trans, iter, sk.k,
                                         &op->res, op_journal_seq(op),
-                                        op->new_i_size, &op->i_sectors_delta);
+                                        op->new_i_size, &op->i_sectors_delta,
+                                        op->flags & BCH_WRITE_CHECK_ENOSPC);
                if (ret == -EINTR)
                        continue;
                if (ret)
index ccbd8c3..d1fd37e 100644 (file)
@@ -38,11 +38,12 @@ enum bch_write_flags {
        BCH_WRITE_ONLY_SPECIFIED_DEVS   = (1 << 6),
        BCH_WRITE_WROTE_DATA_INLINE     = (1 << 7),
        BCH_WRITE_FROM_INTERNAL         = (1 << 8),
+       BCH_WRITE_CHECK_ENOSPC          = (1 << 9),
 
        /* Internal: */
-       BCH_WRITE_JOURNAL_SEQ_PTR       = (1 << 9),
-       BCH_WRITE_SKIP_CLOSURE_PUT      = (1 << 10),
-       BCH_WRITE_DONE                  = (1 << 11),
+       BCH_WRITE_JOURNAL_SEQ_PTR       = (1 << 10),
+       BCH_WRITE_SKIP_CLOSURE_PUT      = (1 << 11),
+       BCH_WRITE_DONE                  = (1 << 12),
 };
 
 static inline u64 *op_journal_seq(struct bch_write_op *op)
@@ -68,7 +69,7 @@ int bch2_sum_sector_overwrites(struct btree_trans *, struct btree_iter *,
                               struct bkey_i *, bool *, bool *, s64 *, s64 *);
 int bch2_extent_update(struct btree_trans *, struct btree_iter *,
                       struct bkey_i *, struct disk_reservation *,
-                      u64 *, u64, s64 *);
+                      u64 *, u64, s64 *, bool);
 int bch2_fpunch_at(struct btree_trans *, struct btree_iter *,
                   struct bpos, u64 *, s64 *);
 int bch2_fpunch(struct bch_fs *c, u64, u64, u64, u64 *, s64 *);
index ec8532b..c624fab 100644 (file)
@@ -293,7 +293,8 @@ s64 bch2_remap_range(struct bch_fs *c,
 
                ret = bch2_extent_update(&trans, dst_iter, new_dst.k,
                                         &disk_res, journal_seq,
-                                        new_i_size, i_sectors_delta);
+                                        new_i_size, i_sectors_delta,
+                                        true);
                bch2_disk_reservation_put(c, &disk_res);
                if (ret)
                        continue;