fs/f2fs/data.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  * fs/f2fs/data.c
   4  *
   5  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
   6  *             http://www.samsung.com/
   7  */
   8 #include <linux/fs.h>
   9 #include <linux/f2fs_fs.h>
  10 #include <linux/buffer_head.h>
  11 #include <linux/sched/mm.h>
  12 #include <linux/mpage.h>
  13 #include <linux/writeback.h>
  14 #include <linux/pagevec.h>
  15 #include <linux/blkdev.h>
  16 #include <linux/bio.h>
  17 #include <linux/blk-crypto.h>
  18 #include <linux/swap.h>
  19 #include <linux/prefetch.h>
  20 #include <linux/uio.h>
  21 #include <linux/sched/signal.h>
  22 #include <linux/fiemap.h>
  23 #include <linux/iomap.h>
  24
  25 #include "f2fs.h"
  26 #include "node.h"
  27 #include "segment.h"
  28 #include "iostat.h"
  29 #include <trace/events/f2fs.h>
  30
  31 #define NUM_PREALLOC_POST_READ_CTXS     128
  32
  33 static struct kmem_cache *bio_post_read_ctx_cache;
  34 static struct kmem_cache *bio_entry_slab;
  35 static mempool_t *bio_post_read_ctx_pool;
  36 static struct bio_set f2fs_bioset;
  37
  38 #define F2FS_BIO_POOL_SIZE      NR_CURSEG_TYPE
  39
  40 int __init f2fs_init_bioset(void)
  41 {
  42         return bioset_init(&f2fs_bioset, F2FS_BIO_POOL_SIZE,
  43                                         0, BIOSET_NEED_BVECS);
  44 }
  45
  46 void f2fs_destroy_bioset(void)
  47 {
  48         bioset_exit(&f2fs_bioset);
  49 }
  50
  51 bool f2fs_is_cp_guaranteed(struct page *page)
  52 {
  53         struct address_space *mapping = page->mapping;
  54         struct inode *inode;
  55         struct f2fs_sb_info *sbi;
  56
  57         if (!mapping)
  58                 return false;
  59
  60         inode = mapping->host;
  61         sbi = F2FS_I_SB(inode);
  62
  63         if (inode->i_ino == F2FS_META_INO(sbi) ||
  64                         inode->i_ino == F2FS_NODE_INO(sbi) ||
  65                         S_ISDIR(inode->i_mode))
  66                 return true;
  67
  68         if ((S_ISREG(inode->i_mode) && IS_NOQUOTA(inode)) ||
  69                         page_private_gcing(page))
  70                 return true;
  71         return false;
  72 }
  73
  74 static enum count_type __read_io_type(struct page *page)
  75 {
  76         struct address_space *mapping = page_file_mapping(page);
  77
  78         if (mapping) {
  79                 struct inode *inode = mapping->host;
  80                 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  81
  82                 if (inode->i_ino == F2FS_META_INO(sbi))
  83                         return F2FS_RD_META;
  84
  85                 if (inode->i_ino == F2FS_NODE_INO(sbi))
  86                         return F2FS_RD_NODE;
  87         }
  88         return F2FS_RD_DATA;
  89 }
  90
  91 /* postprocessing steps for read bios */
  92 enum bio_post_read_step {
  93 #ifdef CONFIG_FS_ENCRYPTION
  94         STEP_DECRYPT    = BIT(0),
  95 #else
  96         STEP_DECRYPT    = 0,    /* compile out the decryption-related code */
  97 #endif
  98 #ifdef CONFIG_F2FS_FS_COMPRESSION
  99         STEP_DECOMPRESS = BIT(1),
 100 #else
 101         STEP_DECOMPRESS = 0,    /* compile out the decompression-related code */
 102 #endif
 103 #ifdef CONFIG_FS_VERITY
 104         STEP_VERITY     = BIT(2),
 105 #else
 106         STEP_VERITY     = 0,    /* compile out the verity-related code */
 107 #endif
 108 };
 109
 110 struct bio_post_read_ctx {
 111         struct bio *bio;
 112         struct f2fs_sb_info *sbi;
 113         struct work_struct work;
 114         unsigned int enabled_steps;
 115         /*
 116          * decompression_attempted keeps track of whether
 117          * f2fs_end_read_compressed_page() has been called on the pages in the
 118          * bio that belong to a compressed cluster yet.
 119          */
 120         bool decompression_attempted;
 121         block_t fs_blkaddr;
 122 };
 123
 124 /*
 125  * Update and unlock a bio's pages, and free the bio.
 126  *
 127  * This marks pages up-to-date only if there was no error in the bio (I/O error,
 128  * decryption error, or verity error), as indicated by bio->bi_status.
 129  *
 130  * "Compressed pages" (pagecache pages backed by a compressed cluster on-disk)
 131  * aren't marked up-to-date here, as decompression is done on a per-compression-
 132  * cluster basis rather than a per-bio basis.  Instead, we only must do two
 133  * things for each compressed page here: call f2fs_end_read_compressed_page()
 134  * with failed=true if an error occurred before it would have normally gotten
 135  * called (i.e., I/O error or decryption error, but *not* verity error), and
 136  * release the bio's reference to the decompress_io_ctx of the page's cluster.
 137  */
 138 static void f2fs_finish_read_bio(struct bio *bio, bool in_task)
 139 {
 140         struct bio_vec *bv;
 141         struct bvec_iter_all iter_all;
 142         struct bio_post_read_ctx *ctx = bio->bi_private;
 143
 144         bio_for_each_segment_all(bv, bio, iter_all) {
 145                 struct page *page = bv->bv_page;
 146
 147                 if (f2fs_is_compressed_page(page)) {
 148                         if (ctx && !ctx->decompression_attempted)
 149                                 f2fs_end_read_compressed_page(page, true, 0,
 150                                                         in_task);
 151                         f2fs_put_page_dic(page, in_task);
 152                         continue;
 153                 }
 154
 155                 if (bio->bi_status)
 156                         ClearPageUptodate(page);
 157                 else
 158                         SetPageUptodate(page);
 159                 dec_page_count(F2FS_P_SB(page), __read_io_type(page));
 160                 unlock_page(page);
 161         }
 162
 163         if (ctx)
 164                 mempool_free(ctx, bio_post_read_ctx_pool);
 165         bio_put(bio);
 166 }
 167
 168 static void f2fs_verify_bio(struct work_struct *work)
 169 {
 170         struct bio_post_read_ctx *ctx =
 171                 container_of(work, struct bio_post_read_ctx, work);
 172         struct bio *bio = ctx->bio;
 173         bool may_have_compressed_pages = (ctx->enabled_steps & STEP_DECOMPRESS);
 174
 175         /*
 176          * fsverity_verify_bio() may call readahead() again, and while verity
 177          * will be disabled for this, decryption and/or decompression may still
 178          * be needed, resulting in another bio_post_read_ctx being allocated.
 179          * So to prevent deadlocks we need to release the current ctx to the
 180          * mempool first.  This assumes that verity is the last post-read step.
 181          */
 182         mempool_free(ctx, bio_post_read_ctx_pool);
 183         bio->bi_private = NULL;
 184
 185         /*
 186          * Verify the bio's pages with fs-verity.  Exclude compressed pages,
 187          * as those were handled separately by f2fs_end_read_compressed_page().
 188          */
 189         if (may_have_compressed_pages) {
 190                 struct bio_vec *bv;
 191                 struct bvec_iter_all iter_all;
 192
 193                 bio_for_each_segment_all(bv, bio, iter_all) {
 194                         struct page *page = bv->bv_page;
 195
 196                         if (!f2fs_is_compressed_page(page) &&
 197                             !fsverity_verify_page(page)) {
 198                                 bio->bi_status = BLK_STS_IOERR;
 199                                 break;
 200                         }
 201                 }
 202         } else {
 203                 fsverity_verify_bio(bio);
 204         }
 205
 206         f2fs_finish_read_bio(bio, true);
 207 }
 208
 209 /*
 210  * If the bio's data needs to be verified with fs-verity, then enqueue the
 211  * verity work for the bio.  Otherwise finish the bio now.
 212  *
 213  * Note that to avoid deadlocks, the verity work can't be done on the
 214  * decryption/decompression workqueue.  This is because verifying the data pages
 215  * can involve reading verity metadata pages from the file, and these verity
 216  * metadata pages may be encrypted and/or compressed.
 217  */
 218 static void f2fs_verify_and_finish_bio(struct bio *bio, bool in_task)
 219 {
 220         struct bio_post_read_ctx *ctx = bio->bi_private;
 221
 222         if (ctx && (ctx->enabled_steps & STEP_VERITY)) {
 223                 INIT_WORK(&ctx->work, f2fs_verify_bio);
 224                 fsverity_enqueue_verify_work(&ctx->work);
 225         } else {
 226                 f2fs_finish_read_bio(bio, in_task);
 227         }
 228 }
 229
 230 /*
 231  * Handle STEP_DECOMPRESS by decompressing any compressed clusters whose last
 232  * remaining page was read by @ctx->bio.
 233  *
 234  * Note that a bio may span clusters (even a mix of compressed and uncompressed
 235  * clusters) or be for just part of a cluster.  STEP_DECOMPRESS just indicates
 236  * that the bio includes at least one compressed page.  The actual decompression
 237  * is done on a per-cluster basis, not a per-bio basis.
 238  */
 239 static void f2fs_handle_step_decompress(struct bio_post_read_ctx *ctx,
 240                 bool in_task)
 241 {
 242         struct bio_vec *bv;
 243         struct bvec_iter_all iter_all;
 244         bool all_compressed = true;
 245         block_t blkaddr = ctx->fs_blkaddr;
 246
 247         bio_for_each_segment_all(bv, ctx->bio, iter_all) {
 248                 struct page *page = bv->bv_page;
 249
 250                 if (f2fs_is_compressed_page(page))
 251                         f2fs_end_read_compressed_page(page, false, blkaddr,
 252                                                       in_task);
 253                 else
 254                         all_compressed = false;
 255
 256                 blkaddr++;
 257         }
 258
 259         ctx->decompression_attempted = true;
 260
 261         /*
 262          * Optimization: if all the bio's pages are compressed, then scheduling
 263          * the per-bio verity work is unnecessary, as verity will be fully
 264          * handled at the compression cluster level.
 265          */
 266         if (all_compressed)
 267                 ctx->enabled_steps &= ~STEP_VERITY;
 268 }
 269
 270 static void f2fs_post_read_work(struct work_struct *work)
 271 {
 272         struct bio_post_read_ctx *ctx =
 273                 container_of(work, struct bio_post_read_ctx, work);
 274         struct bio *bio = ctx->bio;
 275
 276         if ((ctx->enabled_steps & STEP_DECRYPT) && !fscrypt_decrypt_bio(bio)) {
 277                 f2fs_finish_read_bio(bio, true);
 278                 return;
 279         }
 280
 281         if (ctx->enabled_steps & STEP_DECOMPRESS)
 282                 f2fs_handle_step_decompress(ctx, true);
 283
 284         f2fs_verify_and_finish_bio(bio, true);
 285 }
 286
 287 static void f2fs_read_end_io(struct bio *bio)
 288 {
 289         struct f2fs_sb_info *sbi = F2FS_P_SB(bio_first_page_all(bio));
 290         struct bio_post_read_ctx *ctx;
 291         bool intask = in_task();
 292
 293         iostat_update_and_unbind_ctx(bio);
 294         ctx = bio->bi_private;
 295
 296         if (time_to_inject(sbi, FAULT_READ_IO))
 297                 bio->bi_status = BLK_STS_IOERR;
 298
 299         if (bio->bi_status) {
 300                 f2fs_finish_read_bio(bio, intask);
 301                 return;
 302         }
 303
 304         if (ctx) {
 305                 unsigned int enabled_steps = ctx->enabled_steps &
 306                                         (STEP_DECRYPT | STEP_DECOMPRESS);
 307
 308                 /*
 309                  * If we have only decompression step between decompression and
 310                  * decrypt, we don't need post processing for this.
 311                  */
 312                 if (enabled_steps == STEP_DECOMPRESS &&
 313                                 !f2fs_low_mem_mode(sbi)) {
 314                         f2fs_handle_step_decompress(ctx, intask);
 315                 } else if (enabled_steps) {
 316                         INIT_WORK(&ctx->work, f2fs_post_read_work);
 317                         queue_work(ctx->sbi->post_read_wq, &ctx->work);
 318                         return;
 319                 }
 320         }
 321
 322         f2fs_verify_and_finish_bio(bio, intask);
 323 }
 324
 325 static void f2fs_write_end_io(struct bio *bio)
 326 {
 327         struct f2fs_sb_info *sbi;
 328         struct bio_vec *bvec;
 329         struct bvec_iter_all iter_all;
 330
 331         iostat_update_and_unbind_ctx(bio);
 332         sbi = bio->bi_private;
 333
 334         if (time_to_inject(sbi, FAULT_WRITE_IO))
 335                 bio->bi_status = BLK_STS_IOERR;
 336
 337         bio_for_each_segment_all(bvec, bio, iter_all) {
 338                 struct page *page = bvec->bv_page;
 339                 enum count_type type = WB_DATA_TYPE(page, false);
 340
 341                 fscrypt_finalize_bounce_page(&page);
 342
 343 #ifdef CONFIG_F2FS_FS_COMPRESSION
 344                 if (f2fs_is_compressed_page(page)) {
 345                         f2fs_compress_write_end_io(bio, page);
 346                         continue;
 347                 }
 348 #endif
 349
 350                 if (unlikely(bio->bi_status)) {
 351                         mapping_set_error(page->mapping, -EIO);
 352                         if (type == F2FS_WB_CP_DATA)
 353                                 f2fs_stop_checkpoint(sbi, true,
 354                                                 STOP_CP_REASON_WRITE_FAIL);
 355                 }
 356
 357                 f2fs_bug_on(sbi, page->mapping == NODE_MAPPING(sbi) &&
 358                                         page->index != nid_of_node(page));
 359
 360                 dec_page_count(sbi, type);
 361                 if (f2fs_in_warm_node_list(sbi, page))
 362                         f2fs_del_fsync_node_entry(sbi, page);
 363                 clear_page_private_gcing(page);
 364                 end_page_writeback(page);
 365         }
 366         if (!get_pages(sbi, F2FS_WB_CP_DATA) &&
 367                                 wq_has_sleeper(&sbi->cp_wait))
 368                 wake_up(&sbi->cp_wait);
 369
 370         bio_put(bio);
 371 }
 372
 373 #ifdef CONFIG_BLK_DEV_ZONED
 374 static void f2fs_zone_write_end_io(struct bio *bio)
 375 {
 376         struct f2fs_bio_info *io = (struct f2fs_bio_info *)bio->bi_private;
 377
 378         bio->bi_private = io->bi_private;
 379         complete(&io->zone_wait);
 380         f2fs_write_end_io(bio);
 381 }
 382 #endif
 383
 384 struct block_device *f2fs_target_device(struct f2fs_sb_info *sbi,
 385                 block_t blk_addr, sector_t *sector)
 386 {
 387         struct block_device *bdev = sbi->sb->s_bdev;
 388         int i;
 389
 390         if (f2fs_is_multi_device(sbi)) {
 391                 for (i = 0; i < sbi->s_ndevs; i++) {
 392                         if (FDEV(i).start_blk <= blk_addr &&
 393                             FDEV(i).end_blk >= blk_addr) {
 394                                 blk_addr -= FDEV(i).start_blk;
 395                                 bdev = FDEV(i).bdev;
 396                                 break;
 397                         }
 398                 }
 399         }
 400
 401         if (sector)
 402                 *sector = SECTOR_FROM_BLOCK(blk_addr);
 403         return bdev;
 404 }
 405
 406 int f2fs_target_device_index(struct f2fs_sb_info *sbi, block_t blkaddr)
 407 {
 408         int i;
 409
 410         if (!f2fs_is_multi_device(sbi))
 411                 return 0;
 412
 413         for (i = 0; i < sbi->s_ndevs; i++)
 414                 if (FDEV(i).start_blk <= blkaddr && FDEV(i).end_blk >= blkaddr)
 415                         return i;
 416         return 0;
 417 }
 418
 419 static blk_opf_t f2fs_io_flags(struct f2fs_io_info *fio)
 420 {
 421         unsigned int temp_mask = GENMASK(NR_TEMP_TYPE - 1, 0);
 422         unsigned int fua_flag, meta_flag, io_flag;
 423         blk_opf_t op_flags = 0;
 424
 425         if (fio->op != REQ_OP_WRITE)
 426                 return 0;
 427         if (fio->type == DATA)
 428                 io_flag = fio->sbi->data_io_flag;
 429         else if (fio->type == NODE)
 430                 io_flag = fio->sbi->node_io_flag;
 431         else
 432                 return 0;
 433
 434         fua_flag = io_flag & temp_mask;
 435         meta_flag = (io_flag >> NR_TEMP_TYPE) & temp_mask;
 436
 437         /*
 438          * data/node io flag bits per temp:
 439          *      REQ_META     |      REQ_FUA      |
 440          *    5 |    4 |   3 |    2 |    1 |   0 |
 441          * Cold | Warm | Hot | Cold | Warm | Hot |
 442          */
 443         if (BIT(fio->temp) & meta_flag)
 444                 op_flags |= REQ_META;
 445         if (BIT(fio->temp) & fua_flag)
 446                 op_flags |= REQ_FUA;
 447         return op_flags;
 448 }
 449
 450 static struct bio *__bio_alloc(struct f2fs_io_info *fio, int npages)
 451 {
 452         struct f2fs_sb_info *sbi = fio->sbi;
 453         struct block_device *bdev;
 454         sector_t sector;
 455         struct bio *bio;
 456
 457         bdev = f2fs_target_device(sbi, fio->new_blkaddr, &sector);
 458         bio = bio_alloc_bioset(bdev, npages,
 459                                 fio->op | fio->op_flags | f2fs_io_flags(fio),
 460                                 GFP_NOIO, &f2fs_bioset);
 461         bio->bi_iter.bi_sector = sector;
 462         if (is_read_io(fio->op)) {
 463                 bio->bi_end_io = f2fs_read_end_io;
 464                 bio->bi_private = NULL;
 465         } else {
 466                 bio->bi_end_io = f2fs_write_end_io;
 467                 bio->bi_private = sbi;
 468         }
 469         iostat_alloc_and_bind_ctx(sbi, bio, NULL);
 470
 471         if (fio->io_wbc)
 472                 wbc_init_bio(fio->io_wbc, bio);
 473
 474         return bio;
 475 }
 476
 477 static void f2fs_set_bio_crypt_ctx(struct bio *bio, const struct inode *inode,
 478                                   pgoff_t first_idx,
 479                                   const struct f2fs_io_info *fio,
 480                                   gfp_t gfp_mask)
 481 {
 482         /*
 483          * The f2fs garbage collector sets ->encrypted_page when it wants to
 484          * read/write raw data without encryption.
 485          */
 486         if (!fio || !fio->encrypted_page)
 487                 fscrypt_set_bio_crypt_ctx(bio, inode, first_idx, gfp_mask);
 488 }
 489
 490 static bool f2fs_crypt_mergeable_bio(struct bio *bio, const struct inode *inode,
 491                                      pgoff_t next_idx,
 492                                      const struct f2fs_io_info *fio)
 493 {
 494         /*
 495          * The f2fs garbage collector sets ->encrypted_page when it wants to
 496          * read/write raw data without encryption.
 497          */
 498         if (fio && fio->encrypted_page)
 499                 return !bio_has_crypt_ctx(bio);
 500
 501         return fscrypt_mergeable_bio(bio, inode, next_idx);
 502 }
 503
 504 void f2fs_submit_read_bio(struct f2fs_sb_info *sbi, struct bio *bio,
 505                                  enum page_type type)
 506 {
 507         WARN_ON_ONCE(!is_read_io(bio_op(bio)));
 508         trace_f2fs_submit_read_bio(sbi->sb, type, bio);
 509
 510         iostat_update_submit_ctx(bio, type);
 511         submit_bio(bio);
 512 }
 513
 514 static void f2fs_submit_write_bio(struct f2fs_sb_info *sbi, struct bio *bio,
 515                                   enum page_type type)
 516 {
 517         WARN_ON_ONCE(is_read_io(bio_op(bio)));
 518
 519         if (f2fs_lfs_mode(sbi) && current->plug && PAGE_TYPE_ON_MAIN(type))
 520                 blk_finish_plug(current->plug);
 521
 522         trace_f2fs_submit_write_bio(sbi->sb, type, bio);
 523         iostat_update_submit_ctx(bio, type);
 524         submit_bio(bio);
 525 }
 526
 527 static void __submit_merged_bio(struct f2fs_bio_info *io)
 528 {
 529         struct f2fs_io_info *fio = &io->fio;
 530
 531         if (!io->bio)
 532                 return;
 533
 534         if (is_read_io(fio->op)) {
 535                 trace_f2fs_prepare_read_bio(io->sbi->sb, fio->type, io->bio);
 536                 f2fs_submit_read_bio(io->sbi, io->bio, fio->type);
 537         } else {
 538                 trace_f2fs_prepare_write_bio(io->sbi->sb, fio->type, io->bio);
 539                 f2fs_submit_write_bio(io->sbi, io->bio, fio->type);
 540         }
 541         io->bio = NULL;
 542 }
 543
 544 static bool __has_merged_page(struct bio *bio, struct inode *inode,
 545                                                 struct page *page, nid_t ino)
 546 {
 547         struct bio_vec *bvec;
 548         struct bvec_iter_all iter_all;
 549
 550         if (!bio)
 551                 return false;
 552
 553         if (!inode && !page && !ino)
 554                 return true;
 555
 556         bio_for_each_segment_all(bvec, bio, iter_all) {
 557                 struct page *target = bvec->bv_page;
 558
 559                 if (fscrypt_is_bounce_page(target)) {
 560                         target = fscrypt_pagecache_page(target);
 561                         if (IS_ERR(target))
 562                                 continue;
 563                 }
 564                 if (f2fs_is_compressed_page(target)) {
 565                         target = f2fs_compress_control_page(target);
 566                         if (IS_ERR(target))
 567                                 continue;
 568                 }
 569
 570                 if (inode && inode == target->mapping->host)
 571                         return true;
 572                 if (page && page == target)
 573                         return true;
 574                 if (ino && ino == ino_of_node(target))
 575                         return true;
 576         }
 577
 578         return false;
 579 }
 580
 581 int f2fs_init_write_merge_io(struct f2fs_sb_info *sbi)
 582 {
 583         int i;
 584
 585         for (i = 0; i < NR_PAGE_TYPE; i++) {
 586                 int n = (i == META) ? 1 : NR_TEMP_TYPE;
 587                 int j;
 588
 589                 sbi->write_io[i] = f2fs_kmalloc(sbi,
 590                                 array_size(n, sizeof(struct f2fs_bio_info)),
 591                                 GFP_KERNEL);
 592                 if (!sbi->write_io[i])
 593                         return -ENOMEM;
 594
 595                 for (j = HOT; j < n; j++) {
 596                         init_f2fs_rwsem(&sbi->write_io[i][j].io_rwsem);
 597                         sbi->write_io[i][j].sbi = sbi;
 598                         sbi->write_io[i][j].bio = NULL;
 599                         spin_lock_init(&sbi->write_io[i][j].io_lock);
 600                         INIT_LIST_HEAD(&sbi->write_io[i][j].io_list);
 601                         INIT_LIST_HEAD(&sbi->write_io[i][j].bio_list);
 602                         init_f2fs_rwsem(&sbi->write_io[i][j].bio_list_lock);
 603 #ifdef CONFIG_BLK_DEV_ZONED
 604                         init_completion(&sbi->write_io[i][j].zone_wait);
 605                         sbi->write_io[i][j].zone_pending_bio = NULL;
 606                         sbi->write_io[i][j].bi_private = NULL;
 607 #endif
 608                 }
 609         }
 610
 611         return 0;
 612 }
 613
 614 static void __f2fs_submit_merged_write(struct f2fs_sb_info *sbi,
 615                                 enum page_type type, enum temp_type temp)
 616 {
 617         enum page_type btype = PAGE_TYPE_OF_BIO(type);
 618         struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
 619
 620         f2fs_down_write(&io->io_rwsem);
 621
 622         if (!io->bio)
 623                 goto unlock_out;
 624
 625         /* change META to META_FLUSH in the checkpoint procedure */
 626         if (type >= META_FLUSH) {
 627                 io->fio.type = META_FLUSH;
 628                 io->bio->bi_opf |= REQ_META | REQ_PRIO | REQ_SYNC;
 629                 if (!test_opt(sbi, NOBARRIER))
 630                         io->bio->bi_opf |= REQ_PREFLUSH | REQ_FUA;
 631         }
 632         __submit_merged_bio(io);
 633 unlock_out:
 634         f2fs_up_write(&io->io_rwsem);
 635 }
 636
 637 static void __submit_merged_write_cond(struct f2fs_sb_info *sbi,
 638                                 struct inode *inode, struct page *page,
 639                                 nid_t ino, enum page_type type, bool force)
 640 {
 641         enum temp_type temp;
 642         bool ret = true;
 643
 644         for (temp = HOT; temp < NR_TEMP_TYPE; temp++) {
 645                 if (!force)     {
 646                         enum page_type btype = PAGE_TYPE_OF_BIO(type);
 647                         struct f2fs_bio_info *io = sbi->write_io[btype] + temp;
 648
 649                         f2fs_down_read(&io->io_rwsem);
 650                         ret = __has_merged_page(io->bio, inode, page, ino);
 651                         f2fs_up_read(&io->io_rwsem);
 652                 }
 653                 if (ret)
 654                         __f2fs_submit_merged_write(sbi, type, temp);
 655
 656                 /* TODO: use HOT temp only for meta pages now. */
 657                 if (type >= META)
 658                         break;
 659         }
 660 }
 661
 662 void f2fs_submit_merged_write(struct f2fs_sb_info *sbi, enum page_type type)
 663 {
 664         __submit_merged_write_cond(sbi, NULL, NULL, 0, type, true);
 665 }
 666
 667 void f2fs_submit_merged_write_cond(struct f2fs_sb_info *sbi,
 668                                 struct inode *inode, struct page *page,
 669                                 nid_t ino, enum page_type type)
 670 {
 671         __submit_merged_write_cond(sbi, inode, page, ino, type, false);
 672 }
 673
 674 void f2fs_flush_merged_writes(struct f2fs_sb_info *sbi)
 675 {
 676         f2fs_submit_merged_write(sbi, DATA);
 677         f2fs_submit_merged_write(sbi, NODE);
 678         f2fs_submit_merged_write(sbi, META);
 679 }
 680
 681 /*
 682  * Fill the locked page with data located in the block address.
 683  * A caller needs to unlock the page on failure.
 684  */
 685 int f2fs_submit_page_bio(struct f2fs_io_info *fio)
 686 {
 687         struct bio *bio;
 688         struct page *page = fio->encrypted_page ?
 689                         fio->encrypted_page : fio->page;
 690
 691         if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
 692                         fio->is_por ? META_POR : (__is_meta_io(fio) ?
 693                         META_GENERIC : DATA_GENERIC_ENHANCE)))
 694                 return -EFSCORRUPTED;
 695
 696         trace_f2fs_submit_page_bio(page, fio);
 697
 698         /* Allocate a new bio */
 699         bio = __bio_alloc(fio, 1);
 700
 701         f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
 702                                fio->page->index, fio, GFP_NOIO);
 703
 704         if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
 705                 bio_put(bio);
 706                 return -EFAULT;
 707         }
 708
 709         if (fio->io_wbc && !is_read_io(fio->op))
 710                 wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
 711
 712         inc_page_count(fio->sbi, is_read_io(fio->op) ?
 713                         __read_io_type(page) : WB_DATA_TYPE(fio->page, false));
 714
 715         if (is_read_io(bio_op(bio)))
 716                 f2fs_submit_read_bio(fio->sbi, bio, fio->type);
 717         else
 718                 f2fs_submit_write_bio(fio->sbi, bio, fio->type);
 719         return 0;
 720 }
 721
 722 static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
 723                                 block_t last_blkaddr, block_t cur_blkaddr)
 724 {
 725         if (unlikely(sbi->max_io_bytes &&
 726                         bio->bi_iter.bi_size >= sbi->max_io_bytes))
 727                 return false;
 728         if (last_blkaddr + 1 != cur_blkaddr)
 729                 return false;
 730         return bio->bi_bdev == f2fs_target_device(sbi, cur_blkaddr, NULL);
 731 }
 732
 733 static bool io_type_is_mergeable(struct f2fs_bio_info *io,
 734                                                 struct f2fs_io_info *fio)
 735 {
 736         if (io->fio.op != fio->op)
 737                 return false;
 738         return io->fio.op_flags == fio->op_flags;
 739 }
 740
 741 static bool io_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
 742                                         struct f2fs_bio_info *io,
 743                                         struct f2fs_io_info *fio,
 744                                         block_t last_blkaddr,
 745                                         block_t cur_blkaddr)
 746 {
 747         if (!page_is_mergeable(sbi, bio, last_blkaddr, cur_blkaddr))
 748                 return false;
 749         return io_type_is_mergeable(io, fio);
 750 }
 751
 752 static void add_bio_entry(struct f2fs_sb_info *sbi, struct bio *bio,
 753                                 struct page *page, enum temp_type temp)
 754 {
 755         struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
 756         struct bio_entry *be;
 757
 758         be = f2fs_kmem_cache_alloc(bio_entry_slab, GFP_NOFS, true, NULL);
 759         be->bio = bio;
 760         bio_get(bio);
 761
 762         if (bio_add_page(bio, page, PAGE_SIZE, 0) != PAGE_SIZE)
 763                 f2fs_bug_on(sbi, 1);
 764
 765         f2fs_down_write(&io->bio_list_lock);
 766         list_add_tail(&be->list, &io->bio_list);
 767         f2fs_up_write(&io->bio_list_lock);
 768 }
 769
 770 static void del_bio_entry(struct bio_entry *be)
 771 {
 772         list_del(&be->list);
 773         kmem_cache_free(bio_entry_slab, be);
 774 }
 775
 776 static int add_ipu_page(struct f2fs_io_info *fio, struct bio **bio,
 777                                                         struct page *page)
 778 {
 779         struct f2fs_sb_info *sbi = fio->sbi;
 780         enum temp_type temp;
 781         bool found = false;
 782         int ret = -EAGAIN;
 783
 784         for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
 785                 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
 786                 struct list_head *head = &io->bio_list;
 787                 struct bio_entry *be;
 788
 789                 f2fs_down_write(&io->bio_list_lock);
 790                 list_for_each_entry(be, head, list) {
 791                         if (be->bio != *bio)
 792                                 continue;
 793
 794                         found = true;
 795
 796                         f2fs_bug_on(sbi, !page_is_mergeable(sbi, *bio,
 797                                                             *fio->last_block,
 798                                                             fio->new_blkaddr));
 799                         if (f2fs_crypt_mergeable_bio(*bio,
 800                                         fio->page->mapping->host,
 801                                         fio->page->index, fio) &&
 802                             bio_add_page(*bio, page, PAGE_SIZE, 0) ==
 803                                         PAGE_SIZE) {
 804                                 ret = 0;
 805                                 break;
 806                         }
 807
 808                         /* page can't be merged into bio; submit the bio */
 809                         del_bio_entry(be);
 810                         f2fs_submit_write_bio(sbi, *bio, DATA);
 811                         break;
 812                 }
 813                 f2fs_up_write(&io->bio_list_lock);
 814         }
 815
 816         if (ret) {
 817                 bio_put(*bio);
 818                 *bio = NULL;
 819         }
 820
 821         return ret;
 822 }
 823
 824 void f2fs_submit_merged_ipu_write(struct f2fs_sb_info *sbi,
 825                                         struct bio **bio, struct page *page)
 826 {
 827         enum temp_type temp;
 828         bool found = false;
 829         struct bio *target = bio ? *bio : NULL;
 830
 831         f2fs_bug_on(sbi, !target && !page);
 832
 833         for (temp = HOT; temp < NR_TEMP_TYPE && !found; temp++) {
 834                 struct f2fs_bio_info *io = sbi->write_io[DATA] + temp;
 835                 struct list_head *head = &io->bio_list;
 836                 struct bio_entry *be;
 837
 838                 if (list_empty(head))
 839                         continue;
 840
 841                 f2fs_down_read(&io->bio_list_lock);
 842                 list_for_each_entry(be, head, list) {
 843                         if (target)
 844                                 found = (target == be->bio);
 845                         else
 846                                 found = __has_merged_page(be->bio, NULL,
 847                                                                 page, 0);
 848                         if (found)
 849                                 break;
 850                 }
 851                 f2fs_up_read(&io->bio_list_lock);
 852
 853                 if (!found)
 854                         continue;
 855
 856                 found = false;
 857
 858                 f2fs_down_write(&io->bio_list_lock);
 859                 list_for_each_entry(be, head, list) {
 860                         if (target)
 861                                 found = (target == be->bio);
 862                         else
 863                                 found = __has_merged_page(be->bio, NULL,
 864                                                                 page, 0);
 865                         if (found) {
 866                                 target = be->bio;
 867                                 del_bio_entry(be);
 868                                 break;
 869                         }
 870                 }
 871                 f2fs_up_write(&io->bio_list_lock);
 872         }
 873
 874         if (found)
 875                 f2fs_submit_write_bio(sbi, target, DATA);
 876         if (bio && *bio) {
 877                 bio_put(*bio);
 878                 *bio = NULL;
 879         }
 880 }
 881
 882 int f2fs_merge_page_bio(struct f2fs_io_info *fio)
 883 {
 884         struct bio *bio = *fio->bio;
 885         struct page *page = fio->encrypted_page ?
 886                         fio->encrypted_page : fio->page;
 887
 888         if (!f2fs_is_valid_blkaddr(fio->sbi, fio->new_blkaddr,
 889                         __is_meta_io(fio) ? META_GENERIC : DATA_GENERIC))
 890                 return -EFSCORRUPTED;
 891
 892         trace_f2fs_submit_page_bio(page, fio);
 893
 894         if (bio && !page_is_mergeable(fio->sbi, bio, *fio->last_block,
 895                                                 fio->new_blkaddr))
 896                 f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL);
 897 alloc_new:
 898         if (!bio) {
 899                 bio = __bio_alloc(fio, BIO_MAX_VECS);
 900                 f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
 901                                        fio->page->index, fio, GFP_NOIO);
 902
 903                 add_bio_entry(fio->sbi, bio, page, fio->temp);
 904         } else {
 905                 if (add_ipu_page(fio, &bio, page))
 906                         goto alloc_new;
 907         }
 908
 909         if (fio->io_wbc)
 910                 wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
 911
 912         inc_page_count(fio->sbi, WB_DATA_TYPE(page, false));
 913
 914         *fio->last_block = fio->new_blkaddr;
 915         *fio->bio = bio;
 916
 917         return 0;
 918 }
 919
 920 #ifdef CONFIG_BLK_DEV_ZONED
 921 static bool is_end_zone_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr)
 922 {
 923         int devi = 0;
 924
 925         if (f2fs_is_multi_device(sbi)) {
 926                 devi = f2fs_target_device_index(sbi, blkaddr);
 927                 if (blkaddr < FDEV(devi).start_blk ||
 928                     blkaddr > FDEV(devi).end_blk) {
 929                         f2fs_err(sbi, "Invalid block %x", blkaddr);
 930                         return false;
 931                 }
 932                 blkaddr -= FDEV(devi).start_blk;
 933         }
 934         return bdev_is_zoned(FDEV(devi).bdev) &&
 935                 f2fs_blkz_is_seq(sbi, devi, blkaddr) &&
 936                 (blkaddr % sbi->blocks_per_blkz == sbi->blocks_per_blkz - 1);
 937 }
 938 #endif
 939
 940 void f2fs_submit_page_write(struct f2fs_io_info *fio)
 941 {
 942         struct f2fs_sb_info *sbi = fio->sbi;
 943         enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
 944         struct f2fs_bio_info *io = sbi->write_io[btype] + fio->temp;
 945         struct page *bio_page;
 946         enum count_type type;
 947
 948         f2fs_bug_on(sbi, is_read_io(fio->op));
 949
 950         f2fs_down_write(&io->io_rwsem);
 951 next:
 952 #ifdef CONFIG_BLK_DEV_ZONED
 953         if (f2fs_sb_has_blkzoned(sbi) && btype < META && io->zone_pending_bio) {
 954                 wait_for_completion_io(&io->zone_wait);
 955                 bio_put(io->zone_pending_bio);
 956                 io->zone_pending_bio = NULL;
 957                 io->bi_private = NULL;
 958         }
 959 #endif
 960
 961         if (fio->in_list) {
 962                 spin_lock(&io->io_lock);
 963                 if (list_empty(&io->io_list)) {
 964                         spin_unlock(&io->io_lock);
 965                         goto out;
 966                 }
 967                 fio = list_first_entry(&io->io_list,
 968                                                 struct f2fs_io_info, list);
 969                 list_del(&fio->list);
 970                 spin_unlock(&io->io_lock);
 971         }
 972
 973         verify_fio_blkaddr(fio);
 974
 975         if (fio->encrypted_page)
 976                 bio_page = fio->encrypted_page;
 977         else if (fio->compressed_page)
 978                 bio_page = fio->compressed_page;
 979         else
 980                 bio_page = fio->page;
 981
 982         /* set submitted = true as a return value */
 983         fio->submitted = 1;
 984
 985         type = WB_DATA_TYPE(bio_page, fio->compressed_page);
 986         inc_page_count(sbi, type);
 987
 988         if (io->bio &&
 989             (!io_is_mergeable(sbi, io->bio, io, fio, io->last_block_in_bio,
 990                               fio->new_blkaddr) ||
 991              !f2fs_crypt_mergeable_bio(io->bio, fio->page->mapping->host,
 992                                        bio_page->index, fio)))
 993                 __submit_merged_bio(io);
 994 alloc_new:
 995         if (io->bio == NULL) {
 996                 io->bio = __bio_alloc(fio, BIO_MAX_VECS);
 997                 f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host,
 998                                        bio_page->index, fio, GFP_NOIO);
 999                 io->fio = *fio;
1000         }
1001
1002         if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) < PAGE_SIZE) {
1003                 __submit_merged_bio(io);
1004                 goto alloc_new;
1005         }
1006
1007         if (fio->io_wbc)
1008                 wbc_account_cgroup_owner(fio->io_wbc, fio->page, PAGE_SIZE);
1009
1010         io->last_block_in_bio = fio->new_blkaddr;
1011
1012         trace_f2fs_submit_page_write(fio->page, fio);
1013 #ifdef CONFIG_BLK_DEV_ZONED
1014         if (f2fs_sb_has_blkzoned(sbi) && btype < META &&
1015                         is_end_zone_blkaddr(sbi, fio->new_blkaddr)) {
1016                 bio_get(io->bio);
1017                 reinit_completion(&io->zone_wait);
1018                 io->bi_private = io->bio->bi_private;
1019                 io->bio->bi_private = io;
1020                 io->bio->bi_end_io = f2fs_zone_write_end_io;
1021                 io->zone_pending_bio = io->bio;
1022                 __submit_merged_bio(io);
1023         }
1024 #endif
1025         if (fio->in_list)
1026                 goto next;
1027 out:
1028         if (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) ||
1029                                 !f2fs_is_checkpoint_ready(sbi))
1030                 __submit_merged_bio(io);
1031         f2fs_up_write(&io->io_rwsem);
1032 }
1033
1034 static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
1035                                       unsigned nr_pages, blk_opf_t op_flag,
1036                                       pgoff_t first_idx, bool for_write)
1037 {
1038         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1039         struct bio *bio;
1040         struct bio_post_read_ctx *ctx = NULL;
1041         unsigned int post_read_steps = 0;
1042         sector_t sector;
1043         struct block_device *bdev = f2fs_target_device(sbi, blkaddr, &sector);
1044
1045         bio = bio_alloc_bioset(bdev, bio_max_segs(nr_pages),
1046                                REQ_OP_READ | op_flag,
1047                                for_write ? GFP_NOIO : GFP_KERNEL, &f2fs_bioset);
1048         if (!bio)
1049                 return ERR_PTR(-ENOMEM);
1050         bio->bi_iter.bi_sector = sector;
1051         f2fs_set_bio_crypt_ctx(bio, inode, first_idx, NULL, GFP_NOFS);
1052         bio->bi_end_io = f2fs_read_end_io;
1053
1054         if (fscrypt_inode_uses_fs_layer_crypto(inode))
1055                 post_read_steps |= STEP_DECRYPT;
1056
1057         if (f2fs_need_verity(inode, first_idx))
1058                 post_read_steps |= STEP_VERITY;
1059
1060         /*
1061          * STEP_DECOMPRESS is handled specially, since a compressed file might
1062          * contain both compressed and uncompressed clusters.  We'll allocate a
1063          * bio_post_read_ctx if the file is compressed, but the caller is
1064          * responsible for enabling STEP_DECOMPRESS if it's actually needed.
1065          */
1066
1067         if (post_read_steps || f2fs_compressed_file(inode)) {
1068                 /* Due to the mempool, this never fails. */
1069                 ctx = mempool_alloc(bio_post_read_ctx_pool, GFP_NOFS);
1070                 ctx->bio = bio;
1071                 ctx->sbi = sbi;
1072                 ctx->enabled_steps = post_read_steps;
1073                 ctx->fs_blkaddr = blkaddr;
1074                 ctx->decompression_attempted = false;
1075                 bio->bi_private = ctx;
1076         }
1077         iostat_alloc_and_bind_ctx(sbi, bio, ctx);
1078
1079         return bio;
1080 }
1081
1082 /* This can handle encryption stuffs */
1083 static int f2fs_submit_page_read(struct inode *inode, struct page *page,
1084                                  block_t blkaddr, blk_opf_t op_flags,
1085                                  bool for_write)
1086 {
1087         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1088         struct bio *bio;
1089
1090         bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
1091                                         page->index, for_write);
1092         if (IS_ERR(bio))
1093                 return PTR_ERR(bio);
1094
1095         /* wait for GCed page writeback via META_MAPPING */
1096         f2fs_wait_on_block_writeback(inode, blkaddr);
1097
1098         if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
1099                 iostat_update_and_unbind_ctx(bio);
1100                 if (bio->bi_private)
1101                         mempool_free(bio->bi_private, bio_post_read_ctx_pool);
1102                 bio_put(bio);
1103                 return -EFAULT;
1104         }
1105         inc_page_count(sbi, F2FS_RD_DATA);
1106         f2fs_update_iostat(sbi, NULL, FS_DATA_READ_IO, F2FS_BLKSIZE);
1107         f2fs_submit_read_bio(sbi, bio, DATA);
1108         return 0;
1109 }
1110
1111 static void __set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
1112 {
1113         __le32 *addr = get_dnode_addr(dn->inode, dn->node_page);
1114
1115         dn->data_blkaddr = blkaddr;
1116         addr[dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
1117 }
1118
1119 /*
1120  * Lock ordering for the change of data block address:
1121  * ->data_page
1122  *  ->node_page
1123  *    update block addresses in the node page
1124  */
1125 void f2fs_set_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
1126 {
1127         f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
1128         __set_data_blkaddr(dn, blkaddr);
1129         if (set_page_dirty(dn->node_page))
1130                 dn->node_changed = true;
1131 }
1132
1133 void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
1134 {
1135         f2fs_set_data_blkaddr(dn, blkaddr);
1136         f2fs_update_read_extent_cache(dn);
1137 }
1138
1139 /* dn->ofs_in_node will be returned with up-to-date last block pointer */
1140 int f2fs_reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
1141 {
1142         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1143         int err;
1144
1145         if (!count)
1146                 return 0;
1147
1148         if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
1149                 return -EPERM;
1150         err = inc_valid_block_count(sbi, dn->inode, &count, true);
1151         if (unlikely(err))
1152                 return err;
1153
1154         trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
1155                                                 dn->ofs_in_node, count);
1156
1157         f2fs_wait_on_page_writeback(dn->node_page, NODE, true, true);
1158
1159         for (; count > 0; dn->ofs_in_node++) {
1160                 block_t blkaddr = f2fs_data_blkaddr(dn);
1161
1162                 if (blkaddr == NULL_ADDR) {
1163                         __set_data_blkaddr(dn, NEW_ADDR);
1164                         count--;
1165                 }
1166         }
1167
1168         if (set_page_dirty(dn->node_page))
1169                 dn->node_changed = true;
1170         return 0;
1171 }
1172
1173 /* Should keep dn->ofs_in_node unchanged */
1174 int f2fs_reserve_new_block(struct dnode_of_data *dn)
1175 {
1176         unsigned int ofs_in_node = dn->ofs_in_node;
1177         int ret;
1178
1179         ret = f2fs_reserve_new_blocks(dn, 1);
1180         dn->ofs_in_node = ofs_in_node;
1181         return ret;
1182 }
1183
1184 int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
1185 {
1186         bool need_put = dn->inode_page ? false : true;
1187         int err;
1188
1189         err = f2fs_get_dnode_of_data(dn, index, ALLOC_NODE);
1190         if (err)
1191                 return err;
1192
1193         if (dn->data_blkaddr == NULL_ADDR)
1194                 err = f2fs_reserve_new_block(dn);
1195         if (err || need_put)
1196                 f2fs_put_dnode(dn);
1197         return err;
1198 }
1199
1200 struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index,
1201                                      blk_opf_t op_flags, bool for_write,
1202                                      pgoff_t *next_pgofs)
1203 {
1204         struct address_space *mapping = inode->i_mapping;
1205         struct dnode_of_data dn;
1206         struct page *page;
1207         int err;
1208
1209         page = f2fs_grab_cache_page(mapping, index, for_write);
1210         if (!page)
1211                 return ERR_PTR(-ENOMEM);
1212
1213         if (f2fs_lookup_read_extent_cache_block(inode, index,
1214                                                 &dn.data_blkaddr)) {
1215                 if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), dn.data_blkaddr,
1216                                                 DATA_GENERIC_ENHANCE_READ)) {
1217                         err = -EFSCORRUPTED;
1218                         goto put_err;
1219                 }
1220                 goto got_it;
1221         }
1222
1223         set_new_dnode(&dn, inode, NULL, NULL, 0);
1224         err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
1225         if (err) {
1226                 if (err == -ENOENT && next_pgofs)
1227                         *next_pgofs = f2fs_get_next_page_offset(&dn, index);
1228                 goto put_err;
1229         }
1230         f2fs_put_dnode(&dn);
1231
1232         if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
1233                 err = -ENOENT;
1234                 if (next_pgofs)
1235                         *next_pgofs = index + 1;
1236                 goto put_err;
1237         }
1238         if (dn.data_blkaddr != NEW_ADDR &&
1239                         !f2fs_is_valid_blkaddr(F2FS_I_SB(inode),
1240                                                 dn.data_blkaddr,
1241                                                 DATA_GENERIC_ENHANCE)) {
1242                 err = -EFSCORRUPTED;
1243                 goto put_err;
1244         }
1245 got_it:
1246         if (PageUptodate(page)) {
1247                 unlock_page(page);
1248                 return page;
1249         }
1250
1251         /*
1252          * A new dentry page is allocated but not able to be written, since its
1253          * new inode page couldn't be allocated due to -ENOSPC.
1254          * In such the case, its blkaddr can be remained as NEW_ADDR.
1255          * see, f2fs_add_link -> f2fs_get_new_data_page ->
1256          * f2fs_init_inode_metadata.
1257          */
1258         if (dn.data_blkaddr == NEW_ADDR) {
1259                 zero_user_segment(page, 0, PAGE_SIZE);
1260                 if (!PageUptodate(page))
1261                         SetPageUptodate(page);
1262                 unlock_page(page);
1263                 return page;
1264         }
1265
1266         err = f2fs_submit_page_read(inode, page, dn.data_blkaddr,
1267                                                 op_flags, for_write);
1268         if (err)
1269                 goto put_err;
1270         return page;
1271
1272 put_err:
1273         f2fs_put_page(page, 1);
1274         return ERR_PTR(err);
1275 }
1276
1277 struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index,
1278                                         pgoff_t *next_pgofs)
1279 {
1280         struct address_space *mapping = inode->i_mapping;
1281         struct page *page;
1282
1283         page = find_get_page(mapping, index);
1284         if (page && PageUptodate(page))
1285                 return page;
1286         f2fs_put_page(page, 0);
1287
1288         page = f2fs_get_read_data_page(inode, index, 0, false, next_pgofs);
1289         if (IS_ERR(page))
1290                 return page;
1291
1292         if (PageUptodate(page))
1293                 return page;
1294
1295         wait_on_page_locked(page);
1296         if (unlikely(!PageUptodate(page))) {
1297                 f2fs_put_page(page, 0);
1298                 return ERR_PTR(-EIO);
1299         }
1300         return page;
1301 }
1302
1303 /*
1304  * If it tries to access a hole, return an error.
1305  * Because, the callers, functions in dir.c and GC, should be able to know
1306  * whether this page exists or not.
1307  */
1308 struct page *f2fs_get_lock_data_page(struct inode *inode, pgoff_t index,
1309                                                         bool for_write)
1310 {
1311         struct address_space *mapping = inode->i_mapping;
1312         struct page *page;
1313
1314         page = f2fs_get_read_data_page(inode, index, 0, for_write, NULL);
1315         if (IS_ERR(page))
1316                 return page;
1317
1318         /* wait for read completion */
1319         lock_page(page);
1320         if (unlikely(page->mapping != mapping || !PageUptodate(page))) {
1321                 f2fs_put_page(page, 1);
1322                 return ERR_PTR(-EIO);
1323         }
1324         return page;
1325 }
1326
1327 /*
1328  * Caller ensures that this data page is never allocated.
1329  * A new zero-filled data page is allocated in the page cache.
1330  *
1331  * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
1332  * f2fs_unlock_op().
1333  * Note that, ipage is set only by make_empty_dir, and if any error occur,
1334  * ipage should be released by this function.
1335  */
1336 struct page *f2fs_get_new_data_page(struct inode *inode,
1337                 struct page *ipage, pgoff_t index, bool new_i_size)
1338 {
1339         struct address_space *mapping = inode->i_mapping;
1340         struct page *page;
1341         struct dnode_of_data dn;
1342         int err;
1343
1344         page = f2fs_grab_cache_page(mapping, index, true);
1345         if (!page) {
1346                 /*
1347                  * before exiting, we should make sure ipage will be released
1348                  * if any error occur.
1349                  */
1350                 f2fs_put_page(ipage, 1);
1351                 return ERR_PTR(-ENOMEM);
1352         }
1353
1354         set_new_dnode(&dn, inode, ipage, NULL, 0);
1355         err = f2fs_reserve_block(&dn, index);
1356         if (err) {
1357                 f2fs_put_page(page, 1);
1358                 return ERR_PTR(err);
1359         }
1360         if (!ipage)
1361                 f2fs_put_dnode(&dn);
1362
1363         if (PageUptodate(page))
1364                 goto got_it;
1365
1366         if (dn.data_blkaddr == NEW_ADDR) {
1367                 zero_user_segment(page, 0, PAGE_SIZE);
1368                 if (!PageUptodate(page))
1369                         SetPageUptodate(page);
1370         } else {
1371                 f2fs_put_page(page, 1);
1372
1373                 /* if ipage exists, blkaddr should be NEW_ADDR */
1374                 f2fs_bug_on(F2FS_I_SB(inode), ipage);
1375                 page = f2fs_get_lock_data_page(inode, index, true);
1376                 if (IS_ERR(page))
1377                         return page;
1378         }
1379 got_it:
1380         if (new_i_size && i_size_read(inode) <
1381                                 ((loff_t)(index + 1) << PAGE_SHIFT))
1382                 f2fs_i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
1383         return page;
1384 }
1385
1386 static int __allocate_data_block(struct dnode_of_data *dn, int seg_type)
1387 {
1388         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1389         struct f2fs_summary sum;
1390         struct node_info ni;
1391         block_t old_blkaddr;
1392         blkcnt_t count = 1;
1393         int err;
1394
1395         if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
1396                 return -EPERM;
1397
1398         err = f2fs_get_node_info(sbi, dn->nid, &ni, false);
1399         if (err)
1400                 return err;
1401
1402         dn->data_blkaddr = f2fs_data_blkaddr(dn);
1403         if (dn->data_blkaddr == NULL_ADDR) {
1404                 err = inc_valid_block_count(sbi, dn->inode, &count, true);
1405                 if (unlikely(err))
1406                         return err;
1407         }
1408
1409         set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
1410         old_blkaddr = dn->data_blkaddr;
1411         err = f2fs_allocate_data_block(sbi, NULL, old_blkaddr,
1412                                 &dn->data_blkaddr, &sum, seg_type, NULL);
1413         if (err)
1414                 return err;
1415
1416         if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
1417                 f2fs_invalidate_internal_cache(sbi, old_blkaddr);
1418
1419         f2fs_update_data_blkaddr(dn, dn->data_blkaddr);
1420         return 0;
1421 }
1422
1423 static void f2fs_map_lock(struct f2fs_sb_info *sbi, int flag)
1424 {
1425         if (flag == F2FS_GET_BLOCK_PRE_AIO)
1426                 f2fs_down_read(&sbi->node_change);
1427         else
1428                 f2fs_lock_op(sbi);
1429 }
1430
1431 static void f2fs_map_unlock(struct f2fs_sb_info *sbi, int flag)
1432 {
1433         if (flag == F2FS_GET_BLOCK_PRE_AIO)
1434                 f2fs_up_read(&sbi->node_change);
1435         else
1436                 f2fs_unlock_op(sbi);
1437 }
1438
1439 int f2fs_get_block_locked(struct dnode_of_data *dn, pgoff_t index)
1440 {
1441         struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1442         int err = 0;
1443
1444         f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO);
1445         if (!f2fs_lookup_read_extent_cache_block(dn->inode, index,
1446                                                 &dn->data_blkaddr))
1447                 err = f2fs_reserve_block(dn, index);
1448         f2fs_map_unlock(sbi, F2FS_GET_BLOCK_PRE_AIO);
1449
1450         return err;
1451 }
1452
1453 static int f2fs_map_no_dnode(struct inode *inode,
1454                 struct f2fs_map_blocks *map, struct dnode_of_data *dn,
1455                 pgoff_t pgoff)
1456 {
1457         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1458
1459         /*
1460          * There is one exceptional case that read_node_page() may return
1461          * -ENOENT due to filesystem has been shutdown or cp_error, return
1462          * -EIO in that case.
1463          */
1464         if (map->m_may_create &&
1465             (is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN) || f2fs_cp_error(sbi)))
1466                 return -EIO;
1467
1468         if (map->m_next_pgofs)
1469                 *map->m_next_pgofs = f2fs_get_next_page_offset(dn, pgoff);
1470         if (map->m_next_extent)
1471                 *map->m_next_extent = f2fs_get_next_page_offset(dn, pgoff);
1472         return 0;
1473 }
1474
1475 static bool f2fs_map_blocks_cached(struct inode *inode,
1476                 struct f2fs_map_blocks *map, int flag)
1477 {
1478         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1479         unsigned int maxblocks = map->m_len;
1480         pgoff_t pgoff = (pgoff_t)map->m_lblk;
1481         struct extent_info ei = {};
1482
1483         if (!f2fs_lookup_read_extent_cache(inode, pgoff, &ei))
1484                 return false;
1485
1486         map->m_pblk = ei.blk + pgoff - ei.fofs;
1487         map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgoff);
1488         map->m_flags = F2FS_MAP_MAPPED;
1489         if (map->m_next_extent)
1490                 *map->m_next_extent = pgoff + map->m_len;
1491
1492         /* for hardware encryption, but to avoid potential issue in future */
1493         if (flag == F2FS_GET_BLOCK_DIO)
1494                 f2fs_wait_on_block_writeback_range(inode,
1495                                         map->m_pblk, map->m_len);
1496
1497         if (f2fs_allow_multi_device_dio(sbi, flag)) {
1498                 int bidx = f2fs_target_device_index(sbi, map->m_pblk);
1499                 struct f2fs_dev_info *dev = &sbi->devs[bidx];
1500
1501                 map->m_bdev = dev->bdev;
1502                 map->m_pblk -= dev->start_blk;
1503                 map->m_len = min(map->m_len, dev->end_blk + 1 - map->m_pblk);
1504         } else {
1505                 map->m_bdev = inode->i_sb->s_bdev;
1506         }
1507         return true;
1508 }
1509
1510 /*
1511  * f2fs_map_blocks() tries to find or build mapping relationship which
1512  * maps continuous logical blocks to physical blocks, and return such
1513  * info via f2fs_map_blocks structure.
1514  */
1515 int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map, int flag)
1516 {
1517         unsigned int maxblocks = map->m_len;
1518         struct dnode_of_data dn;
1519         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1520         int mode = map->m_may_create ? ALLOC_NODE : LOOKUP_NODE;
1521         pgoff_t pgofs, end_offset, end;
1522         int err = 0, ofs = 1;
1523         unsigned int ofs_in_node, last_ofs_in_node;
1524         blkcnt_t prealloc;
1525         block_t blkaddr;
1526         unsigned int start_pgofs;
1527         int bidx = 0;
1528         bool is_hole;
1529
1530         if (!maxblocks)
1531                 return 0;
1532
1533         if (!map->m_may_create && f2fs_map_blocks_cached(inode, map, flag))
1534                 goto out;
1535
1536         map->m_bdev = inode->i_sb->s_bdev;
1537         map->m_multidev_dio =
1538                 f2fs_allow_multi_device_dio(F2FS_I_SB(inode), flag);
1539
1540         map->m_len = 0;
1541         map->m_flags = 0;
1542
1543         /* it only supports block size == page size */
1544         pgofs = (pgoff_t)map->m_lblk;
1545         end = pgofs + maxblocks;
1546
1547 next_dnode:
1548         if (map->m_may_create)
1549                 f2fs_map_lock(sbi, flag);
1550
1551         /* When reading holes, we need its node page */
1552         set_new_dnode(&dn, inode, NULL, NULL, 0);
1553         err = f2fs_get_dnode_of_data(&dn, pgofs, mode);
1554         if (err) {
1555                 if (flag == F2FS_GET_BLOCK_BMAP)
1556                         map->m_pblk = 0;
1557                 if (err == -ENOENT)
1558                         err = f2fs_map_no_dnode(inode, map, &dn, pgofs);
1559                 goto unlock_out;
1560         }
1561
1562         start_pgofs = pgofs;
1563         prealloc = 0;
1564         last_ofs_in_node = ofs_in_node = dn.ofs_in_node;
1565         end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
1566
1567 next_block:
1568         blkaddr = f2fs_data_blkaddr(&dn);
1569         is_hole = !__is_valid_data_blkaddr(blkaddr);
1570         if (!is_hole &&
1571             !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE)) {
1572                 err = -EFSCORRUPTED;
1573                 goto sync_out;
1574         }
1575
1576         /* use out-place-update for direct IO under LFS mode */
1577         if (map->m_may_create &&
1578             (is_hole || (f2fs_lfs_mode(sbi) && flag == F2FS_GET_BLOCK_DIO))) {
1579                 if (unlikely(f2fs_cp_error(sbi))) {
1580                         err = -EIO;
1581                         goto sync_out;
1582                 }
1583
1584                 switch (flag) {
1585                 case F2FS_GET_BLOCK_PRE_AIO:
1586                         if (blkaddr == NULL_ADDR) {
1587                                 prealloc++;
1588                                 last_ofs_in_node = dn.ofs_in_node;
1589                         }
1590                         break;
1591                 case F2FS_GET_BLOCK_PRE_DIO:
1592                 case F2FS_GET_BLOCK_DIO:
1593                         err = __allocate_data_block(&dn, map->m_seg_type);
1594                         if (err)
1595                                 goto sync_out;
1596                         if (flag == F2FS_GET_BLOCK_PRE_DIO)
1597                                 file_need_truncate(inode);
1598                         set_inode_flag(inode, FI_APPEND_WRITE);
1599                         break;
1600                 default:
1601                         WARN_ON_ONCE(1);
1602                         err = -EIO;
1603                         goto sync_out;
1604                 }
1605
1606                 blkaddr = dn.data_blkaddr;
1607                 if (is_hole)
1608                         map->m_flags |= F2FS_MAP_NEW;
1609         } else if (is_hole) {
1610                 if (f2fs_compressed_file(inode) &&
1611                     f2fs_sanity_check_cluster(&dn)) {
1612                         err = -EFSCORRUPTED;
1613                         f2fs_handle_error(sbi,
1614                                         ERROR_CORRUPTED_CLUSTER);
1615                         goto sync_out;
1616                 }
1617
1618                 switch (flag) {
1619                 case F2FS_GET_BLOCK_PRECACHE:
1620                         goto sync_out;
1621                 case F2FS_GET_BLOCK_BMAP:
1622                         map->m_pblk = 0;
1623                         goto sync_out;
1624                 case F2FS_GET_BLOCK_FIEMAP:
1625                         if (blkaddr == NULL_ADDR) {
1626                                 if (map->m_next_pgofs)
1627                                         *map->m_next_pgofs = pgofs + 1;
1628                                 goto sync_out;
1629                         }
1630                         break;
1631                 default:
1632                         /* for defragment case */
1633                         if (map->m_next_pgofs)
1634                                 *map->m_next_pgofs = pgofs + 1;
1635                         goto sync_out;
1636                 }
1637         }
1638
1639         if (flag == F2FS_GET_BLOCK_PRE_AIO)
1640                 goto skip;
1641
1642         if (map->m_multidev_dio)
1643                 bidx = f2fs_target_device_index(sbi, blkaddr);
1644
1645         if (map->m_len == 0) {
1646                 /* reserved delalloc block should be mapped for fiemap. */
1647                 if (blkaddr == NEW_ADDR)
1648                         map->m_flags |= F2FS_MAP_DELALLOC;
1649                 map->m_flags |= F2FS_MAP_MAPPED;
1650
1651                 map->m_pblk = blkaddr;
1652                 map->m_len = 1;
1653
1654                 if (map->m_multidev_dio)
1655                         map->m_bdev = FDEV(bidx).bdev;
1656         } else if ((map->m_pblk != NEW_ADDR &&
1657                         blkaddr == (map->m_pblk + ofs)) ||
1658                         (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
1659                         flag == F2FS_GET_BLOCK_PRE_DIO) {
1660                 if (map->m_multidev_dio && map->m_bdev != FDEV(bidx).bdev)
1661                         goto sync_out;
1662                 ofs++;
1663                 map->m_len++;
1664         } else {
1665                 goto sync_out;
1666         }
1667
1668 skip:
1669         dn.ofs_in_node++;
1670         pgofs++;
1671
1672         /* preallocate blocks in batch for one dnode page */
1673         if (flag == F2FS_GET_BLOCK_PRE_AIO &&
1674                         (pgofs == end || dn.ofs_in_node == end_offset)) {
1675
1676                 dn.ofs_in_node = ofs_in_node;
1677                 err = f2fs_reserve_new_blocks(&dn, prealloc);
1678                 if (err)
1679                         goto sync_out;
1680
1681                 map->m_len += dn.ofs_in_node - ofs_in_node;
1682                 if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
1683                         err = -ENOSPC;
1684                         goto sync_out;
1685                 }
1686                 dn.ofs_in_node = end_offset;
1687         }
1688
1689         if (pgofs >= end)
1690                 goto sync_out;
1691         else if (dn.ofs_in_node < end_offset)
1692                 goto next_block;
1693
1694         if (flag == F2FS_GET_BLOCK_PRECACHE) {
1695                 if (map->m_flags & F2FS_MAP_MAPPED) {
1696                         unsigned int ofs = start_pgofs - map->m_lblk;
1697
1698                         f2fs_update_read_extent_cache_range(&dn,
1699                                 start_pgofs, map->m_pblk + ofs,
1700                                 map->m_len - ofs);
1701                 }
1702         }
1703
1704         f2fs_put_dnode(&dn);
1705
1706         if (map->m_may_create) {
1707                 f2fs_map_unlock(sbi, flag);
1708                 f2fs_balance_fs(sbi, dn.node_changed);
1709         }
1710         goto next_dnode;
1711
1712 sync_out:
1713
1714         if (flag == F2FS_GET_BLOCK_DIO && map->m_flags & F2FS_MAP_MAPPED) {
1715                 /*
1716                  * for hardware encryption, but to avoid potential issue
1717                  * in future
1718                  */
1719                 f2fs_wait_on_block_writeback_range(inode,
1720                                                 map->m_pblk, map->m_len);
1721
1722                 if (map->m_multidev_dio) {
1723                         block_t blk_addr = map->m_pblk;
1724
1725                         bidx = f2fs_target_device_index(sbi, map->m_pblk);
1726
1727                         map->m_bdev = FDEV(bidx).bdev;
1728                         map->m_pblk -= FDEV(bidx).start_blk;
1729
1730                         if (map->m_may_create)
1731                                 f2fs_update_device_state(sbi, inode->i_ino,
1732                                                         blk_addr, map->m_len);
1733
1734                         f2fs_bug_on(sbi, blk_addr + map->m_len >
1735                                                 FDEV(bidx).end_blk + 1);
1736                 }
1737         }
1738
1739         if (flag == F2FS_GET_BLOCK_PRECACHE) {
1740                 if (map->m_flags & F2FS_MAP_MAPPED) {
1741                         unsigned int ofs = start_pgofs - map->m_lblk;
1742
1743                         f2fs_update_read_extent_cache_range(&dn,
1744                                 start_pgofs, map->m_pblk + ofs,
1745                                 map->m_len - ofs);
1746                 }
1747                 if (map->m_next_extent)
1748                         *map->m_next_extent = pgofs + 1;
1749         }
1750         f2fs_put_dnode(&dn);
1751 unlock_out:
1752         if (map->m_may_create) {
1753                 f2fs_map_unlock(sbi, flag);
1754                 f2fs_balance_fs(sbi, dn.node_changed);
1755         }
1756 out:
1757         trace_f2fs_map_blocks(inode, map, flag, err);
1758         return err;
1759 }
1760
1761 bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
1762 {
1763         struct f2fs_map_blocks map;
1764         block_t last_lblk;
1765         int err;
1766
1767         if (pos + len > i_size_read(inode))
1768                 return false;
1769
1770         map.m_lblk = F2FS_BYTES_TO_BLK(pos);
1771         map.m_next_pgofs = NULL;
1772         map.m_next_extent = NULL;
1773         map.m_seg_type = NO_CHECK_TYPE;
1774         map.m_may_create = false;
1775         last_lblk = F2FS_BLK_ALIGN(pos + len);
1776
1777         while (map.m_lblk < last_lblk) {
1778                 map.m_len = last_lblk - map.m_lblk;
1779                 err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DEFAULT);
1780                 if (err || map.m_len == 0)
1781                         return false;
1782                 map.m_lblk += map.m_len;
1783         }
1784         return true;
1785 }
1786
1787 static inline u64 bytes_to_blks(struct inode *inode, u64 bytes)
1788 {
1789         return (bytes >> inode->i_blkbits);
1790 }
1791
1792 static inline u64 blks_to_bytes(struct inode *inode, u64 blks)
1793 {
1794         return (blks << inode->i_blkbits);
1795 }
1796
1797 static int f2fs_xattr_fiemap(struct inode *inode,
1798                                 struct fiemap_extent_info *fieinfo)
1799 {
1800         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1801         struct page *page;
1802         struct node_info ni;
1803         __u64 phys = 0, len;
1804         __u32 flags;
1805         nid_t xnid = F2FS_I(inode)->i_xattr_nid;
1806         int err = 0;
1807
1808         if (f2fs_has_inline_xattr(inode)) {
1809                 int offset;
1810
1811                 page = f2fs_grab_cache_page(NODE_MAPPING(sbi),
1812                                                 inode->i_ino, false);
1813                 if (!page)
1814                         return -ENOMEM;
1815
1816                 err = f2fs_get_node_info(sbi, inode->i_ino, &ni, false);
1817                 if (err) {
1818                         f2fs_put_page(page, 1);
1819                         return err;
1820                 }
1821
1822                 phys = blks_to_bytes(inode, ni.blk_addr);
1823                 offset = offsetof(struct f2fs_inode, i_addr) +
1824                                         sizeof(__le32) * (DEF_ADDRS_PER_INODE -
1825                                         get_inline_xattr_addrs(inode));
1826
1827                 phys += offset;
1828                 len = inline_xattr_size(inode);
1829
1830                 f2fs_put_page(page, 1);
1831
1832                 flags = FIEMAP_EXTENT_DATA_INLINE | FIEMAP_EXTENT_NOT_ALIGNED;
1833
1834                 if (!xnid)
1835                         flags |= FIEMAP_EXTENT_LAST;
1836
1837                 err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1838                 trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
1839                 if (err)
1840                         return err;
1841         }
1842
1843         if (xnid) {
1844                 page = f2fs_grab_cache_page(NODE_MAPPING(sbi), xnid, false);
1845                 if (!page)
1846                         return -ENOMEM;
1847
1848                 err = f2fs_get_node_info(sbi, xnid, &ni, false);
1849                 if (err) {
1850                         f2fs_put_page(page, 1);
1851                         return err;
1852                 }
1853
1854                 phys = blks_to_bytes(inode, ni.blk_addr);
1855                 len = inode->i_sb->s_blocksize;
1856
1857                 f2fs_put_page(page, 1);
1858
1859                 flags = FIEMAP_EXTENT_LAST;
1860         }
1861
1862         if (phys) {
1863                 err = fiemap_fill_next_extent(fieinfo, 0, phys, len, flags);
1864                 trace_f2fs_fiemap(inode, 0, phys, len, flags, err);
1865         }
1866
1867         return (err < 0 ? err : 0);
1868 }
1869
1870 static loff_t max_inode_blocks(struct inode *inode)
1871 {
1872         loff_t result = ADDRS_PER_INODE(inode);
1873         loff_t leaf_count = ADDRS_PER_BLOCK(inode);
1874
1875         /* two direct node blocks */
1876         result += (leaf_count * 2);
1877
1878         /* two indirect node blocks */
1879         leaf_count *= NIDS_PER_BLOCK;
1880         result += (leaf_count * 2);
1881
1882         /* one double indirect node block */
1883         leaf_count *= NIDS_PER_BLOCK;
1884         result += leaf_count;
1885
1886         return result;
1887 }
1888
1889 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
1890                 u64 start, u64 len)
1891 {
1892         struct f2fs_map_blocks map;
1893         sector_t start_blk, last_blk;
1894         pgoff_t next_pgofs;
1895         u64 logical = 0, phys = 0, size = 0;
1896         u32 flags = 0;
1897         int ret = 0;
1898         bool compr_cluster = false, compr_appended;
1899         unsigned int cluster_size = F2FS_I(inode)->i_cluster_size;
1900         unsigned int count_in_cluster = 0;
1901         loff_t maxbytes;
1902
1903         if (fieinfo->fi_flags & FIEMAP_FLAG_CACHE) {
1904                 ret = f2fs_precache_extents(inode);
1905                 if (ret)
1906                         return ret;
1907         }
1908
1909         ret = fiemap_prep(inode, fieinfo, start, &len, FIEMAP_FLAG_XATTR);
1910         if (ret)
1911                 return ret;
1912
1913         inode_lock_shared(inode);
1914
1915         maxbytes = max_file_blocks(inode) << F2FS_BLKSIZE_BITS;
1916         if (start > maxbytes) {
1917                 ret = -EFBIG;
1918                 goto out;
1919         }
1920
1921         if (len > maxbytes || (maxbytes - len) < start)
1922                 len = maxbytes - start;
1923
1924         if (fieinfo->fi_flags & FIEMAP_FLAG_XATTR) {
1925                 ret = f2fs_xattr_fiemap(inode, fieinfo);
1926                 goto out;
1927         }
1928
1929         if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode)) {
1930                 ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
1931                 if (ret != -EAGAIN)
1932                         goto out;
1933         }
1934
1935         if (bytes_to_blks(inode, len) == 0)
1936                 len = blks_to_bytes(inode, 1);
1937
1938         start_blk = bytes_to_blks(inode, start);
1939         last_blk = bytes_to_blks(inode, start + len - 1);
1940
1941 next:
1942         memset(&map, 0, sizeof(map));
1943         map.m_lblk = start_blk;
1944         map.m_len = bytes_to_blks(inode, len);
1945         map.m_next_pgofs = &next_pgofs;
1946         map.m_seg_type = NO_CHECK_TYPE;
1947
1948         if (compr_cluster) {
1949                 map.m_lblk += 1;
1950                 map.m_len = cluster_size - count_in_cluster;
1951         }
1952
1953         ret = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_FIEMAP);
1954         if (ret)
1955                 goto out;
1956
1957         /* HOLE */
1958         if (!compr_cluster && !(map.m_flags & F2FS_MAP_FLAGS)) {
1959                 start_blk = next_pgofs;
1960
1961                 if (blks_to_bytes(inode, start_blk) < blks_to_bytes(inode,
1962                                                 max_inode_blocks(inode)))
1963                         goto prep_next;
1964
1965                 flags |= FIEMAP_EXTENT_LAST;
1966         }
1967
1968         compr_appended = false;
1969         /* In a case of compressed cluster, append this to the last extent */
1970         if (compr_cluster && ((map.m_flags & F2FS_MAP_DELALLOC) ||
1971                         !(map.m_flags & F2FS_MAP_FLAGS))) {
1972                 compr_appended = true;
1973                 goto skip_fill;
1974         }
1975
1976         if (size) {
1977                 flags |= FIEMAP_EXTENT_MERGED;
1978                 if (IS_ENCRYPTED(inode))
1979                         flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
1980
1981                 ret = fiemap_fill_next_extent(fieinfo, logical,
1982                                 phys, size, flags);
1983                 trace_f2fs_fiemap(inode, logical, phys, size, flags, ret);
1984                 if (ret)
1985                         goto out;
1986                 size = 0;
1987         }
1988
1989         if (start_blk > last_blk)
1990                 goto out;
1991
1992 skip_fill:
1993         if (map.m_pblk == COMPRESS_ADDR) {
1994                 compr_cluster = true;
1995                 count_in_cluster = 1;
1996         } else if (compr_appended) {
1997                 unsigned int appended_blks = cluster_size -
1998                                                 count_in_cluster + 1;
1999                 size += blks_to_bytes(inode, appended_blks);
2000                 start_blk += appended_blks;
2001                 compr_cluster = false;
2002         } else {
2003                 logical = blks_to_bytes(inode, start_blk);
2004                 phys = __is_valid_data_blkaddr(map.m_pblk) ?
2005                         blks_to_bytes(inode, map.m_pblk) : 0;
2006                 size = blks_to_bytes(inode, map.m_len);
2007                 flags = 0;
2008
2009                 if (compr_cluster) {
2010                         flags = FIEMAP_EXTENT_ENCODED;
2011                         count_in_cluster += map.m_len;
2012                         if (count_in_cluster == cluster_size) {
2013                                 compr_cluster = false;
2014                                 size += blks_to_bytes(inode, 1);
2015                         }
2016                 } else if (map.m_flags & F2FS_MAP_DELALLOC) {
2017                         flags = FIEMAP_EXTENT_UNWRITTEN;
2018                 }
2019
2020                 start_blk += bytes_to_blks(inode, size);
2021         }
2022
2023 prep_next:
2024         cond_resched();
2025         if (fatal_signal_pending(current))
2026                 ret = -EINTR;
2027         else
2028                 goto next;
2029 out:
2030         if (ret == 1)
2031                 ret = 0;
2032
2033         inode_unlock_shared(inode);
2034         return ret;
2035 }
2036
2037 static inline loff_t f2fs_readpage_limit(struct inode *inode)
2038 {
2039         if (IS_ENABLED(CONFIG_FS_VERITY) && IS_VERITY(inode))
2040                 return inode->i_sb->s_maxbytes;
2041
2042         return i_size_read(inode);
2043 }
2044
2045 static int f2fs_read_single_page(struct inode *inode, struct page *page,
2046                                         unsigned nr_pages,
2047                                         struct f2fs_map_blocks *map,
2048                                         struct bio **bio_ret,
2049                                         sector_t *last_block_in_bio,
2050                                         bool is_readahead)
2051 {
2052         struct bio *bio = *bio_ret;
2053         const unsigned blocksize = blks_to_bytes(inode, 1);
2054         sector_t block_in_file;
2055         sector_t last_block;
2056         sector_t last_block_in_file;
2057         sector_t block_nr;
2058         int ret = 0;
2059
2060         block_in_file = (sector_t)page_index(page);
2061         last_block = block_in_file + nr_pages;
2062         last_block_in_file = bytes_to_blks(inode,
2063                         f2fs_readpage_limit(inode) + blocksize - 1);
2064         if (last_block > last_block_in_file)
2065                 last_block = last_block_in_file;
2066
2067         /* just zeroing out page which is beyond EOF */
2068         if (block_in_file >= last_block)
2069                 goto zero_out;
2070         /*
2071          * Map blocks using the previous result first.
2072          */
2073         if ((map->m_flags & F2FS_MAP_MAPPED) &&
2074                         block_in_file > map->m_lblk &&
2075                         block_in_file < (map->m_lblk + map->m_len))
2076                 goto got_it;
2077
2078         /*
2079          * Then do more f2fs_map_blocks() calls until we are
2080          * done with this page.
2081          */
2082         map->m_lblk = block_in_file;
2083         map->m_len = last_block - block_in_file;
2084
2085         ret = f2fs_map_blocks(inode, map, F2FS_GET_BLOCK_DEFAULT);
2086         if (ret)
2087                 goto out;
2088 got_it:
2089         if ((map->m_flags & F2FS_MAP_MAPPED)) {
2090                 block_nr = map->m_pblk + block_in_file - map->m_lblk;
2091                 SetPageMappedToDisk(page);
2092
2093                 if (!f2fs_is_valid_blkaddr(F2FS_I_SB(inode), block_nr,
2094                                                 DATA_GENERIC_ENHANCE_READ)) {
2095                         ret = -EFSCORRUPTED;
2096                         goto out;
2097                 }
2098         } else {
2099 zero_out:
2100                 zero_user_segment(page, 0, PAGE_SIZE);
2101                 if (f2fs_need_verity(inode, page->index) &&
2102                     !fsverity_verify_page(page)) {
2103                         ret = -EIO;
2104                         goto out;
2105                 }
2106                 if (!PageUptodate(page))
2107                         SetPageUptodate(page);
2108                 unlock_page(page);
2109                 goto out;
2110         }
2111
2112         /*
2113          * This page will go to BIO.  Do we need to send this
2114          * BIO off first?
2115          */
2116         if (bio && (!page_is_mergeable(F2FS_I_SB(inode), bio,
2117                                        *last_block_in_bio, block_nr) ||
2118                     !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
2119 submit_and_realloc:
2120                 f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
2121                 bio = NULL;
2122         }
2123         if (bio == NULL) {
2124                 bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
2125                                 is_readahead ? REQ_RAHEAD : 0, page->index,
2126                                 false);
2127                 if (IS_ERR(bio)) {
2128                         ret = PTR_ERR(bio);
2129                         bio = NULL;
2130                         goto out;
2131                 }
2132         }
2133
2134         /*
2135          * If the page is under writeback, we need to wait for
2136          * its completion to see the correct decrypted data.
2137          */
2138         f2fs_wait_on_block_writeback(inode, block_nr);
2139
2140         if (bio_add_page(bio, page, blocksize, 0) < blocksize)
2141                 goto submit_and_realloc;
2142
2143         inc_page_count(F2FS_I_SB(inode), F2FS_RD_DATA);
2144         f2fs_update_iostat(F2FS_I_SB(inode), NULL, FS_DATA_READ_IO,
2145                                                         F2FS_BLKSIZE);
2146         *last_block_in_bio = block_nr;
2147 out:
2148         *bio_ret = bio;
2149         return ret;
2150 }
2151
2152 #ifdef CONFIG_F2FS_FS_COMPRESSION
2153 int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
2154                                 unsigned nr_pages, sector_t *last_block_in_bio,
2155                                 bool is_readahead, bool for_write)
2156 {
2157         struct dnode_of_data dn;
2158         struct inode *inode = cc->inode;
2159         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2160         struct bio *bio = *bio_ret;
2161         unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size;
2162         sector_t last_block_in_file;
2163         const unsigned blocksize = blks_to_bytes(inode, 1);
2164         struct decompress_io_ctx *dic = NULL;
2165         struct extent_info ei = {};
2166         bool from_dnode = true;
2167         int i;
2168         int ret = 0;
2169
2170         f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc));
2171
2172         last_block_in_file = bytes_to_blks(inode,
2173                         f2fs_readpage_limit(inode) + blocksize - 1);
2174
2175         /* get rid of pages beyond EOF */
2176         for (i = 0; i < cc->cluster_size; i++) {
2177                 struct page *page = cc->rpages[i];
2178
2179                 if (!page)
2180                         continue;
2181                 if ((sector_t)page->index >= last_block_in_file) {
2182                         zero_user_segment(page, 0, PAGE_SIZE);
2183                         if (!PageUptodate(page))
2184                                 SetPageUptodate(page);
2185                 } else if (!PageUptodate(page)) {
2186                         continue;
2187                 }
2188                 unlock_page(page);
2189                 if (for_write)
2190                         put_page(page);
2191                 cc->rpages[i] = NULL;
2192                 cc->nr_rpages--;
2193         }
2194
2195         /* we are done since all pages are beyond EOF */
2196         if (f2fs_cluster_is_empty(cc))
2197                 goto out;
2198
2199         if (f2fs_lookup_read_extent_cache(inode, start_idx, &ei))
2200                 from_dnode = false;
2201
2202         if (!from_dnode)
2203                 goto skip_reading_dnode;
2204
2205         set_new_dnode(&dn, inode, NULL, NULL, 0);
2206         ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
2207         if (ret)
2208                 goto out;
2209
2210         if (unlikely(f2fs_cp_error(sbi))) {
2211                 ret = -EIO;
2212                 goto out_put_dnode;
2213         }
2214         f2fs_bug_on(sbi, dn.data_blkaddr != COMPRESS_ADDR);
2215
2216 skip_reading_dnode:
2217         for (i = 1; i < cc->cluster_size; i++) {
2218                 block_t blkaddr;
2219
2220                 blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
2221                                         dn.ofs_in_node + i) :
2222                                         ei.blk + i - 1;
2223
2224                 if (!__is_valid_data_blkaddr(blkaddr))
2225                         break;
2226
2227                 if (!f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC)) {
2228                         ret = -EFAULT;
2229                         goto out_put_dnode;
2230                 }
2231                 cc->nr_cpages++;
2232
2233                 if (!from_dnode && i >= ei.c_len)
2234                         break;
2235         }
2236
2237         /* nothing to decompress */
2238         if (cc->nr_cpages == 0) {
2239                 ret = 0;
2240                 goto out_put_dnode;
2241         }
2242
2243         dic = f2fs_alloc_dic(cc);
2244         if (IS_ERR(dic)) {
2245                 ret = PTR_ERR(dic);
2246                 goto out_put_dnode;
2247         }
2248
2249         for (i = 0; i < cc->nr_cpages; i++) {
2250                 struct page *page = dic->cpages[i];
2251                 block_t blkaddr;
2252                 struct bio_post_read_ctx *ctx;
2253
2254                 blkaddr = from_dnode ? data_blkaddr(dn.inode, dn.node_page,
2255                                         dn.ofs_in_node + i + 1) :
2256                                         ei.blk + i;
2257
2258                 f2fs_wait_on_block_writeback(inode, blkaddr);
2259
2260                 if (f2fs_load_compressed_page(sbi, page, blkaddr)) {
2261                         if (atomic_dec_and_test(&dic->remaining_pages)) {
2262                                 f2fs_decompress_cluster(dic, true);
2263                                 break;
2264                         }
2265                         continue;
2266                 }
2267
2268                 if (bio && (!page_is_mergeable(sbi, bio,
2269                                         *last_block_in_bio, blkaddr) ||
2270                     !f2fs_crypt_mergeable_bio(bio, inode, page->index, NULL))) {
2271 submit_and_realloc:
2272                         f2fs_submit_read_bio(sbi, bio, DATA);
2273                         bio = NULL;
2274                 }
2275
2276                 if (!bio) {
2277                         bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
2278                                         is_readahead ? REQ_RAHEAD : 0,
2279                                         page->index, for_write);
2280                         if (IS_ERR(bio)) {
2281                                 ret = PTR_ERR(bio);
2282                                 f2fs_decompress_end_io(dic, ret, true);
2283                                 f2fs_put_dnode(&dn);
2284                                 *bio_ret = NULL;
2285                                 return ret;
2286                         }
2287                 }
2288
2289                 if (bio_add_page(bio, page, blocksize, 0) < blocksize)
2290                         goto submit_and_realloc;
2291
2292                 ctx = get_post_read_ctx(bio);
2293                 ctx->enabled_steps |= STEP_DECOMPRESS;
2294                 refcount_inc(&dic->refcnt);
2295
2296                 inc_page_count(sbi, F2FS_RD_DATA);
2297                 f2fs_update_iostat(sbi, inode, FS_DATA_READ_IO, F2FS_BLKSIZE);
2298                 *last_block_in_bio = blkaddr;
2299         }
2300
2301         if (from_dnode)
2302                 f2fs_put_dnode(&dn);
2303
2304         *bio_ret = bio;
2305         return 0;
2306
2307 out_put_dnode:
2308         if (from_dnode)
2309                 f2fs_put_dnode(&dn);
2310 out:
2311         for (i = 0; i < cc->cluster_size; i++) {
2312                 if (cc->rpages[i]) {
2313                         ClearPageUptodate(cc->rpages[i]);
2314                         unlock_page(cc->rpages[i]);
2315                 }
2316         }
2317         *bio_ret = bio;
2318         return ret;
2319 }
2320 #endif
2321
2322 /*
2323  * This function was originally taken from fs/mpage.c, and customized for f2fs.
2324  * Major change was from block_size == page_size in f2fs by default.
2325  */
2326 static int f2fs_mpage_readpages(struct inode *inode,
2327                 struct readahead_control *rac, struct page *page)
2328 {
2329         struct bio *bio = NULL;
2330         sector_t last_block_in_bio = 0;
2331         struct f2fs_map_blocks map;
2332 #ifdef CONFIG_F2FS_FS_COMPRESSION
2333         struct compress_ctx cc = {
2334                 .inode = inode,
2335                 .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
2336                 .cluster_size = F2FS_I(inode)->i_cluster_size,
2337                 .cluster_idx = NULL_CLUSTER,
2338                 .rpages = NULL,
2339                 .cpages = NULL,
2340                 .nr_rpages = 0,
2341                 .nr_cpages = 0,
2342         };
2343         pgoff_t nc_cluster_idx = NULL_CLUSTER;
2344 #endif
2345         unsigned nr_pages = rac ? readahead_count(rac) : 1;
2346         unsigned max_nr_pages = nr_pages;
2347         int ret = 0;
2348
2349         map.m_pblk = 0;
2350         map.m_lblk = 0;
2351         map.m_len = 0;
2352         map.m_flags = 0;
2353         map.m_next_pgofs = NULL;
2354         map.m_next_extent = NULL;
2355         map.m_seg_type = NO_CHECK_TYPE;
2356         map.m_may_create = false;
2357
2358         for (; nr_pages; nr_pages--) {
2359                 if (rac) {
2360                         page = readahead_page(rac);
2361                         prefetchw(&page->flags);
2362                 }
2363
2364 #ifdef CONFIG_F2FS_FS_COMPRESSION
2365                 if (f2fs_compressed_file(inode)) {
2366                         /* there are remained compressed pages, submit them */
2367                         if (!f2fs_cluster_can_merge_page(&cc, page->index)) {
2368                                 ret = f2fs_read_multi_pages(&cc, &bio,
2369                                                         max_nr_pages,
2370                                                         &last_block_in_bio,
2371                                                         rac != NULL, false);
2372                                 f2fs_destroy_compress_ctx(&cc, false);
2373                                 if (ret)
2374                                         goto set_error_page;
2375                         }
2376                         if (cc.cluster_idx == NULL_CLUSTER) {
2377                                 if (nc_cluster_idx ==
2378                                         page->index >> cc.log_cluster_size) {
2379                                         goto read_single_page;
2380                                 }
2381
2382                                 ret = f2fs_is_compressed_cluster(inode, page->index);
2383                                 if (ret < 0)
2384                                         goto set_error_page;
2385                                 else if (!ret) {
2386                                         nc_cluster_idx =
2387                                                 page->index >> cc.log_cluster_size;
2388                                         goto read_single_page;
2389                                 }
2390
2391                                 nc_cluster_idx = NULL_CLUSTER;
2392                         }
2393                         ret = f2fs_init_compress_ctx(&cc);
2394                         if (ret)
2395                                 goto set_error_page;
2396
2397                         f2fs_compress_ctx_add_page(&cc, page);
2398
2399                         goto next_page;
2400                 }
2401 read_single_page:
2402 #endif
2403
2404                 ret = f2fs_read_single_page(inode, page, max_nr_pages, &map,
2405                                         &bio, &last_block_in_bio, rac);
2406                 if (ret) {
2407 #ifdef CONFIG_F2FS_FS_COMPRESSION
2408 set_error_page:
2409 #endif
2410                         zero_user_segment(page, 0, PAGE_SIZE);
2411                         unlock_page(page);
2412                 }
2413 #ifdef CONFIG_F2FS_FS_COMPRESSION
2414 next_page:
2415 #endif
2416                 if (rac)
2417                         put_page(page);
2418
2419 #ifdef CONFIG_F2FS_FS_COMPRESSION
2420                 if (f2fs_compressed_file(inode)) {
2421                         /* last page */
2422                         if (nr_pages == 1 && !f2fs_cluster_is_empty(&cc)) {
2423                                 ret = f2fs_read_multi_pages(&cc, &bio,
2424                                                         max_nr_pages,
2425                                                         &last_block_in_bio,
2426                                                         rac != NULL, false);
2427                                 f2fs_destroy_compress_ctx(&cc, false);
2428                         }
2429                 }
2430 #endif
2431         }
2432         if (bio)
2433                 f2fs_submit_read_bio(F2FS_I_SB(inode), bio, DATA);
2434         return ret;
2435 }
2436
2437 static int f2fs_read_data_folio(struct file *file, struct folio *folio)
2438 {
2439         struct page *page = &folio->page;
2440         struct inode *inode = page_file_mapping(page)->host;
2441         int ret = -EAGAIN;
2442
2443         trace_f2fs_readpage(page, DATA);
2444
2445         if (!f2fs_is_compress_backend_ready(inode)) {
2446                 unlock_page(page);
2447                 return -EOPNOTSUPP;
2448         }
2449
2450         /* If the file has inline data, try to read it directly */
2451         if (f2fs_has_inline_data(inode))
2452                 ret = f2fs_read_inline_data(inode, page);
2453         if (ret == -EAGAIN)
2454                 ret = f2fs_mpage_readpages(inode, NULL, page);
2455         return ret;
2456 }
2457
2458 static void f2fs_readahead(struct readahead_control *rac)
2459 {
2460         struct inode *inode = rac->mapping->host;
2461
2462         trace_f2fs_readpages(inode, readahead_index(rac), readahead_count(rac));
2463
2464         if (!f2fs_is_compress_backend_ready(inode))
2465                 return;
2466
2467         /* If the file has inline data, skip readahead */
2468         if (f2fs_has_inline_data(inode))
2469                 return;
2470
2471         f2fs_mpage_readpages(inode, rac, NULL);
2472 }
2473
2474 int f2fs_encrypt_one_page(struct f2fs_io_info *fio)
2475 {
2476         struct inode *inode = fio->page->mapping->host;
2477         struct page *mpage, *page;
2478         gfp_t gfp_flags = GFP_NOFS;
2479
2480         if (!f2fs_encrypted_file(inode))
2481                 return 0;
2482
2483         page = fio->compressed_page ? fio->compressed_page : fio->page;
2484
2485         if (fscrypt_inode_uses_inline_crypto(inode))
2486                 return 0;
2487
2488 retry_encrypt:
2489         fio->encrypted_page = fscrypt_encrypt_pagecache_blocks(page,
2490                                         PAGE_SIZE, 0, gfp_flags);
2491         if (IS_ERR(fio->encrypted_page)) {
2492                 /* flush pending IOs and wait for a while in the ENOMEM case */
2493                 if (PTR_ERR(fio->encrypted_page) == -ENOMEM) {
2494                         f2fs_flush_merged_writes(fio->sbi);
2495                         memalloc_retry_wait(GFP_NOFS);
2496                         gfp_flags |= __GFP_NOFAIL;
2497                         goto retry_encrypt;
2498                 }
2499                 return PTR_ERR(fio->encrypted_page);
2500         }
2501
2502         mpage = find_lock_page(META_MAPPING(fio->sbi), fio->old_blkaddr);
2503         if (mpage) {
2504                 if (PageUptodate(mpage))
2505                         memcpy(page_address(mpage),
2506                                 page_address(fio->encrypted_page), PAGE_SIZE);
2507                 f2fs_put_page(mpage, 1);
2508         }
2509         return 0;
2510 }
2511
2512 static inline bool check_inplace_update_policy(struct inode *inode,
2513                                 struct f2fs_io_info *fio)
2514 {
2515         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2516
2517         if (IS_F2FS_IPU_HONOR_OPU_WRITE(sbi) &&
2518             is_inode_flag_set(inode, FI_OPU_WRITE))
2519                 return false;
2520         if (IS_F2FS_IPU_FORCE(sbi))
2521                 return true;
2522         if (IS_F2FS_IPU_SSR(sbi) && f2fs_need_SSR(sbi))
2523                 return true;
2524         if (IS_F2FS_IPU_UTIL(sbi) && utilization(sbi) > SM_I(sbi)->min_ipu_util)
2525                 return true;
2526         if (IS_F2FS_IPU_SSR_UTIL(sbi) && f2fs_need_SSR(sbi) &&
2527             utilization(sbi) > SM_I(sbi)->min_ipu_util)
2528                 return true;
2529
2530         /*
2531          * IPU for rewrite async pages
2532          */
2533         if (IS_F2FS_IPU_ASYNC(sbi) && fio && fio->op == REQ_OP_WRITE &&
2534             !(fio->op_flags & REQ_SYNC) && !IS_ENCRYPTED(inode))
2535                 return true;
2536
2537         /* this is only set during fdatasync */
2538         if (IS_F2FS_IPU_FSYNC(sbi) && is_inode_flag_set(inode, FI_NEED_IPU))
2539                 return true;
2540
2541         if (unlikely(fio && is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
2542                         !f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
2543                 return true;
2544
2545         return false;
2546 }
2547
2548 bool f2fs_should_update_inplace(struct inode *inode, struct f2fs_io_info *fio)
2549 {
2550         /* swap file is migrating in aligned write mode */
2551         if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
2552                 return false;
2553
2554         if (f2fs_is_pinned_file(inode))
2555                 return true;
2556
2557         /* if this is cold file, we should overwrite to avoid fragmentation */
2558         if (file_is_cold(inode) && !is_inode_flag_set(inode, FI_OPU_WRITE))
2559                 return true;
2560
2561         return check_inplace_update_policy(inode, fio);
2562 }
2563
2564 bool f2fs_should_update_outplace(struct inode *inode, struct f2fs_io_info *fio)
2565 {
2566         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2567
2568         /* The below cases were checked when setting it. */
2569         if (f2fs_is_pinned_file(inode))
2570                 return false;
2571         if (fio && is_sbi_flag_set(sbi, SBI_NEED_FSCK))
2572                 return true;
2573         if (f2fs_lfs_mode(sbi))
2574                 return true;
2575         if (S_ISDIR(inode->i_mode))
2576                 return true;
2577         if (IS_NOQUOTA(inode))
2578                 return true;
2579         if (f2fs_is_atomic_file(inode))
2580                 return true;
2581         /* rewrite low ratio compress data w/ OPU mode to avoid fragmentation */
2582         if (f2fs_compressed_file(inode) &&
2583                 F2FS_OPTION(sbi).compress_mode == COMPR_MODE_USER &&
2584                 is_inode_flag_set(inode, FI_ENABLE_COMPRESS))
2585                 return true;
2586
2587         /* swap file is migrating in aligned write mode */
2588         if (is_inode_flag_set(inode, FI_ALIGNED_WRITE))
2589                 return true;
2590
2591         if (is_inode_flag_set(inode, FI_OPU_WRITE))
2592                 return true;
2593
2594         if (fio) {
2595                 if (page_private_gcing(fio->page))
2596                         return true;
2597                 if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED) &&
2598                         f2fs_is_checkpointed_data(sbi, fio->old_blkaddr)))
2599                         return true;
2600         }
2601         return false;
2602 }
2603
2604 static inline bool need_inplace_update(struct f2fs_io_info *fio)
2605 {
2606         struct inode *inode = fio->page->mapping->host;
2607
2608         if (f2fs_should_update_outplace(inode, fio))
2609                 return false;
2610
2611         return f2fs_should_update_inplace(inode, fio);
2612 }
2613
2614 int f2fs_do_write_data_page(struct f2fs_io_info *fio)
2615 {
2616         struct page *page = fio->page;
2617         struct inode *inode = page->mapping->host;
2618         struct dnode_of_data dn;
2619         struct node_info ni;
2620         bool ipu_force = false;
2621         int err = 0;
2622
2623         /* Use COW inode to make dnode_of_data for atomic write */
2624         if (f2fs_is_atomic_file(inode))
2625                 set_new_dnode(&dn, F2FS_I(inode)->cow_inode, NULL, NULL, 0);
2626         else
2627                 set_new_dnode(&dn, inode, NULL, NULL, 0);
2628
2629         if (need_inplace_update(fio) &&
2630             f2fs_lookup_read_extent_cache_block(inode, page->index,
2631                                                 &fio->old_blkaddr)) {
2632                 if (!f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
2633                                                 DATA_GENERIC_ENHANCE))
2634                         return -EFSCORRUPTED;
2635
2636                 ipu_force = true;
2637                 fio->need_lock = LOCK_DONE;
2638                 goto got_it;
2639         }
2640
2641         /* Deadlock due to between page->lock and f2fs_lock_op */
2642         if (fio->need_lock == LOCK_REQ && !f2fs_trylock_op(fio->sbi))
2643                 return -EAGAIN;
2644
2645         err = f2fs_get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
2646         if (err)
2647                 goto out;
2648
2649         fio->old_blkaddr = dn.data_blkaddr;
2650
2651         /* This page is already truncated */
2652         if (fio->old_blkaddr == NULL_ADDR) {
2653                 ClearPageUptodate(page);
2654                 clear_page_private_gcing(page);
2655                 goto out_writepage;
2656         }
2657 got_it:
2658         if (__is_valid_data_blkaddr(fio->old_blkaddr) &&
2659                 !f2fs_is_valid_blkaddr(fio->sbi, fio->old_blkaddr,
2660                                                 DATA_GENERIC_ENHANCE)) {
2661                 err = -EFSCORRUPTED;
2662                 goto out_writepage;
2663         }
2664
2665         /* wait for GCed page writeback via META_MAPPING */
2666         if (fio->post_read)
2667                 f2fs_wait_on_block_writeback(inode, fio->old_blkaddr);
2668
2669         /*
2670          * If current allocation needs SSR,
2671          * it had better in-place writes for updated data.
2672          */
2673         if (ipu_force ||
2674                 (__is_valid_data_blkaddr(fio->old_blkaddr) &&
2675                                         need_inplace_update(fio))) {
2676                 err = f2fs_encrypt_one_page(fio);
2677                 if (err)
2678                         goto out_writepage;
2679
2680                 set_page_writeback(page);
2681                 f2fs_put_dnode(&dn);
2682                 if (fio->need_lock == LOCK_REQ)
2683                         f2fs_unlock_op(fio->sbi);
2684                 err = f2fs_inplace_write_data(fio);
2685                 if (err) {
2686                         if (fscrypt_inode_uses_fs_layer_crypto(inode))
2687                                 fscrypt_finalize_bounce_page(&fio->encrypted_page);
2688                         if (PageWriteback(page))
2689                                 end_page_writeback(page);
2690                 } else {
2691                         set_inode_flag(inode, FI_UPDATE_WRITE);
2692                 }
2693                 trace_f2fs_do_write_data_page(fio->page, IPU);
2694                 return err;
2695         }
2696
2697         if (fio->need_lock == LOCK_RETRY) {
2698                 if (!f2fs_trylock_op(fio->sbi)) {
2699                         err = -EAGAIN;
2700                         goto out_writepage;
2701                 }
2702                 fio->need_lock = LOCK_REQ;
2703         }
2704
2705         err = f2fs_get_node_info(fio->sbi, dn.nid, &ni, false);
2706         if (err)
2707                 goto out_writepage;
2708
2709         fio->version = ni.version;
2710
2711         err = f2fs_encrypt_one_page(fio);
2712         if (err)
2713                 goto out_writepage;
2714
2715         set_page_writeback(page);
2716
2717         if (fio->compr_blocks && fio->old_blkaddr == COMPRESS_ADDR)
2718                 f2fs_i_compr_blocks_update(inode, fio->compr_blocks - 1, false);
2719
2720         /* LFS mode write path */
2721         f2fs_outplace_write_data(&dn, fio);
2722         trace_f2fs_do_write_data_page(page, OPU);
2723         set_inode_flag(inode, FI_APPEND_WRITE);
2724 out_writepage:
2725         f2fs_put_dnode(&dn);
2726 out:
2727         if (fio->need_lock == LOCK_REQ)
2728                 f2fs_unlock_op(fio->sbi);
2729         return err;
2730 }
2731
2732 int f2fs_write_single_data_page(struct page *page, int *submitted,
2733                                 struct bio **bio,
2734                                 sector_t *last_block,
2735                                 struct writeback_control *wbc,
2736                                 enum iostat_type io_type,
2737                                 int compr_blocks,
2738                                 bool allow_balance)
2739 {
2740         struct inode *inode = page->mapping->host;
2741         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2742         loff_t i_size = i_size_read(inode);
2743         const pgoff_t end_index = ((unsigned long long)i_size)
2744                                                         >> PAGE_SHIFT;
2745         loff_t psize = (loff_t)(page->index + 1) << PAGE_SHIFT;
2746         unsigned offset = 0;
2747         bool need_balance_fs = false;
2748         bool quota_inode = IS_NOQUOTA(inode);
2749         int err = 0;
2750         struct f2fs_io_info fio = {
2751                 .sbi = sbi,
2752                 .ino = inode->i_ino,
2753                 .type = DATA,
2754                 .op = REQ_OP_WRITE,
2755                 .op_flags = wbc_to_write_flags(wbc),
2756                 .old_blkaddr = NULL_ADDR,
2757                 .page = page,
2758                 .encrypted_page = NULL,
2759                 .submitted = 0,
2760                 .compr_blocks = compr_blocks,
2761                 .need_lock = compr_blocks ? LOCK_DONE : LOCK_RETRY,
2762                 .post_read = f2fs_post_read_required(inode) ? 1 : 0,
2763                 .io_type = io_type,
2764                 .io_wbc = wbc,
2765                 .bio = bio,
2766                 .last_block = last_block,
2767         };
2768
2769         trace_f2fs_writepage(page, DATA);
2770
2771         /* we should bypass data pages to proceed the kworker jobs */
2772         if (unlikely(f2fs_cp_error(sbi))) {
2773                 mapping_set_error(page->mapping, -EIO);
2774                 /*
2775                  * don't drop any dirty dentry pages for keeping lastest
2776                  * directory structure.
2777                  */
2778                 if (S_ISDIR(inode->i_mode) &&
2779                                 !is_sbi_flag_set(sbi, SBI_IS_CLOSE))
2780                         goto redirty_out;
2781
2782                 /* keep data pages in remount-ro mode */
2783                 if (F2FS_OPTION(sbi).errors == MOUNT_ERRORS_READONLY)
2784                         goto redirty_out;
2785                 goto out;
2786         }
2787
2788         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
2789                 goto redirty_out;
2790
2791         if (page->index < end_index ||
2792                         f2fs_verity_in_progress(inode) ||
2793                         compr_blocks)
2794                 goto write;
2795
2796         /*
2797          * If the offset is out-of-range of file size,
2798          * this page does not have to be written to disk.
2799          */
2800         offset = i_size & (PAGE_SIZE - 1);
2801         if ((page->index >= end_index + 1) || !offset)
2802                 goto out;
2803
2804         zero_user_segment(page, offset, PAGE_SIZE);
2805 write:
2806         /* Dentry/quota blocks are controlled by checkpoint */
2807         if (S_ISDIR(inode->i_mode) || quota_inode) {
2808                 /*
2809                  * We need to wait for node_write to avoid block allocation during
2810                  * checkpoint. This can only happen to quota writes which can cause
2811                  * the below discard race condition.
2812                  */
2813                 if (quota_inode)
2814                         f2fs_down_read(&sbi->node_write);
2815
2816                 fio.need_lock = LOCK_DONE;
2817                 err = f2fs_do_write_data_page(&fio);
2818
2819                 if (quota_inode)
2820                         f2fs_up_read(&sbi->node_write);
2821
2822                 goto done;
2823         }
2824
2825         if (!wbc->for_reclaim)
2826                 need_balance_fs = true;
2827         else if (has_not_enough_free_secs(sbi, 0, 0))
2828                 goto redirty_out;
2829         else
2830                 set_inode_flag(inode, FI_HOT_DATA);
2831
2832         err = -EAGAIN;
2833         if (f2fs_has_inline_data(inode)) {
2834                 err = f2fs_write_inline_data(inode, page);
2835                 if (!err)
2836                         goto out;
2837         }
2838
2839         if (err == -EAGAIN) {
2840                 err = f2fs_do_write_data_page(&fio);
2841                 if (err == -EAGAIN) {
2842                         f2fs_bug_on(sbi, compr_blocks);
2843                         fio.need_lock = LOCK_REQ;
2844                         err = f2fs_do_write_data_page(&fio);
2845                 }
2846         }
2847
2848         if (err) {
2849                 file_set_keep_isize(inode);
2850         } else {
2851                 spin_lock(&F2FS_I(inode)->i_size_lock);
2852                 if (F2FS_I(inode)->last_disk_size < psize)
2853                         F2FS_I(inode)->last_disk_size = psize;
2854                 spin_unlock(&F2FS_I(inode)->i_size_lock);
2855         }
2856
2857 done:
2858         if (err && err != -ENOENT)
2859                 goto redirty_out;
2860
2861 out:
2862         inode_dec_dirty_pages(inode);
2863         if (err) {
2864                 ClearPageUptodate(page);
2865                 clear_page_private_gcing(page);
2866         }
2867
2868         if (wbc->for_reclaim) {
2869                 f2fs_submit_merged_write_cond(sbi, NULL, page, 0, DATA);
2870                 clear_inode_flag(inode, FI_HOT_DATA);
2871                 f2fs_remove_dirty_inode(inode);
2872                 submitted = NULL;
2873         }
2874         unlock_page(page);
2875         if (!S_ISDIR(inode->i_mode) && !IS_NOQUOTA(inode) &&
2876                         !F2FS_I(inode)->wb_task && allow_balance)
2877                 f2fs_balance_fs(sbi, need_balance_fs);
2878
2879         if (unlikely(f2fs_cp_error(sbi))) {
2880                 f2fs_submit_merged_write(sbi, DATA);
2881                 if (bio && *bio)
2882                         f2fs_submit_merged_ipu_write(sbi, bio, NULL);
2883                 submitted = NULL;
2884         }
2885
2886         if (submitted)
2887                 *submitted = fio.submitted;
2888
2889         return 0;
2890
2891 redirty_out:
2892         redirty_page_for_writepage(wbc, page);
2893         /*
2894          * pageout() in MM translates EAGAIN, so calls handle_write_error()
2895          * -> mapping_set_error() -> set_bit(AS_EIO, ...).
2896          * file_write_and_wait_range() will see EIO error, which is critical
2897          * to return value of fsync() followed by atomic_write failure to user.
2898          */
2899         if (!err || wbc->for_reclaim)
2900                 return AOP_WRITEPAGE_ACTIVATE;
2901         unlock_page(page);
2902         return err;
2903 }
2904
2905 static int f2fs_write_data_page(struct page *page,
2906                                         struct writeback_control *wbc)
2907 {
2908 #ifdef CONFIG_F2FS_FS_COMPRESSION
2909         struct inode *inode = page->mapping->host;
2910
2911         if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
2912                 goto out;
2913
2914         if (f2fs_compressed_file(inode)) {
2915                 if (f2fs_is_compressed_cluster(inode, page->index)) {
2916                         redirty_page_for_writepage(wbc, page);
2917                         return AOP_WRITEPAGE_ACTIVATE;
2918                 }
2919         }
2920 out:
2921 #endif
2922
2923         return f2fs_write_single_data_page(page, NULL, NULL, NULL,
2924                                                 wbc, FS_DATA_IO, 0, true);
2925 }
2926
2927 /*
2928  * This function was copied from write_cache_pages from mm/page-writeback.c.
2929  * The major change is making write step of cold data page separately from
2930  * warm/hot data page.
2931  */
2932 static int f2fs_write_cache_pages(struct address_space *mapping,
2933                                         struct writeback_control *wbc,
2934                                         enum iostat_type io_type)
2935 {
2936         int ret = 0;
2937         int done = 0, retry = 0;
2938         struct page *pages_local[F2FS_ONSTACK_PAGES];
2939         struct page **pages = pages_local;
2940         struct folio_batch fbatch;
2941         struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
2942         struct bio *bio = NULL;
2943         sector_t last_block;
2944 #ifdef CONFIG_F2FS_FS_COMPRESSION
2945         struct inode *inode = mapping->host;
2946         struct compress_ctx cc = {
2947                 .inode = inode,
2948                 .log_cluster_size = F2FS_I(inode)->i_log_cluster_size,
2949                 .cluster_size = F2FS_I(inode)->i_cluster_size,
2950                 .cluster_idx = NULL_CLUSTER,
2951                 .rpages = NULL,
2952                 .nr_rpages = 0,
2953                 .cpages = NULL,
2954                 .valid_nr_cpages = 0,
2955                 .rbuf = NULL,
2956                 .cbuf = NULL,
2957                 .rlen = PAGE_SIZE * F2FS_I(inode)->i_cluster_size,
2958                 .private = NULL,
2959         };
2960 #endif
2961         int nr_folios, p, idx;
2962         int nr_pages;
2963         unsigned int max_pages = F2FS_ONSTACK_PAGES;
2964         pgoff_t index;
2965         pgoff_t end;            /* Inclusive */
2966         pgoff_t done_index;
2967         int range_whole = 0;
2968         xa_mark_t tag;
2969         int nwritten = 0;
2970         int submitted = 0;
2971         int i;
2972
2973 #ifdef CONFIG_F2FS_FS_COMPRESSION
2974         if (f2fs_compressed_file(inode) &&
2975                 1 << cc.log_cluster_size > F2FS_ONSTACK_PAGES) {
2976                 pages = f2fs_kzalloc(sbi, sizeof(struct page *) <<
2977                                 cc.log_cluster_size, GFP_NOFS | __GFP_NOFAIL);
2978                 max_pages = 1 << cc.log_cluster_size;
2979         }
2980 #endif
2981
2982         folio_batch_init(&fbatch);
2983
2984         if (get_dirty_pages(mapping->host) <=
2985                                 SM_I(F2FS_M_SB(mapping))->min_hot_blocks)
2986                 set_inode_flag(mapping->host, FI_HOT_DATA);
2987         else
2988                 clear_inode_flag(mapping->host, FI_HOT_DATA);
2989
2990         if (wbc->range_cyclic) {
2991                 index = mapping->writeback_index; /* prev offset */
2992                 end = -1;
2993         } else {
2994                 index = wbc->range_start >> PAGE_SHIFT;
2995                 end = wbc->range_end >> PAGE_SHIFT;
2996                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2997                         range_whole = 1;
2998         }
2999         if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
3000                 tag = PAGECACHE_TAG_TOWRITE;
3001         else
3002                 tag = PAGECACHE_TAG_DIRTY;
3003 retry:
3004         retry = 0;
3005         if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
3006                 tag_pages_for_writeback(mapping, index, end);
3007         done_index = index;
3008         while (!done && !retry && (index <= end)) {
3009                 nr_pages = 0;
3010 again:
3011                 nr_folios = filemap_get_folios_tag(mapping, &index, end,
3012                                 tag, &fbatch);
3013                 if (nr_folios == 0) {
3014                         if (nr_pages)
3015                                 goto write;
3016                         break;
3017                 }
3018
3019                 for (i = 0; i < nr_folios; i++) {
3020                         struct folio *folio = fbatch.folios[i];
3021
3022                         idx = 0;
3023                         p = folio_nr_pages(folio);
3024 add_more:
3025                         pages[nr_pages] = folio_page(folio, idx);
3026                         folio_get(folio);
3027                         if (++nr_pages == max_pages) {
3028                                 index = folio->index + idx + 1;
3029                                 folio_batch_release(&fbatch);
3030                                 goto write;
3031                         }
3032                         if (++idx < p)
3033                                 goto add_more;
3034                 }
3035                 folio_batch_release(&fbatch);
3036                 goto again;
3037 write:
3038                 for (i = 0; i < nr_pages; i++) {
3039                         struct page *page = pages[i];
3040                         struct folio *folio = page_folio(page);
3041                         bool need_readd;
3042 readd:
3043                         need_readd = false;
3044 #ifdef CONFIG_F2FS_FS_COMPRESSION
3045                         if (f2fs_compressed_file(inode)) {
3046                                 void *fsdata = NULL;
3047                                 struct page *pagep;
3048                                 int ret2;
3049
3050                                 ret = f2fs_init_compress_ctx(&cc);
3051                                 if (ret) {
3052                                         done = 1;
3053                                         break;
3054                                 }
3055
3056                                 if (!f2fs_cluster_can_merge_page(&cc,
3057                                                                 folio->index)) {
3058                                         ret = f2fs_write_multi_pages(&cc,
3059                                                 &submitted, wbc, io_type);
3060                                         if (!ret)
3061                                                 need_readd = true;
3062                                         goto result;
3063                                 }
3064
3065                                 if (unlikely(f2fs_cp_error(sbi)))
3066                                         goto lock_folio;
3067
3068                                 if (!f2fs_cluster_is_empty(&cc))
3069                                         goto lock_folio;
3070
3071                                 if (f2fs_all_cluster_page_ready(&cc,
3072                                         pages, i, nr_pages, true))
3073                                         goto lock_folio;
3074
3075                                 ret2 = f2fs_prepare_compress_overwrite(
3076                                                         inode, &pagep,
3077                                                         folio->index, &fsdata);
3078                                 if (ret2 < 0) {
3079                                         ret = ret2;
3080                                         done = 1;
3081                                         break;
3082                                 } else if (ret2 &&
3083                                         (!f2fs_compress_write_end(inode,
3084                                                 fsdata, folio->index, 1) ||
3085                                          !f2fs_all_cluster_page_ready(&cc,
3086                                                 pages, i, nr_pages,
3087                                                 false))) {
3088                                         retry = 1;
3089                                         break;
3090                                 }
3091                         }
3092 #endif
3093                         /* give a priority to WB_SYNC threads */
3094                         if (atomic_read(&sbi->wb_sync_req[DATA]) &&
3095                                         wbc->sync_mode == WB_SYNC_NONE) {
3096                                 done = 1;
3097                                 break;
3098                         }
3099 #ifdef CONFIG_F2FS_FS_COMPRESSION
3100 lock_folio:
3101 #endif
3102                         done_index = folio->index;
3103 retry_write:
3104                         folio_lock(folio);
3105
3106                         if (unlikely(folio->mapping != mapping)) {
3107 continue_unlock:
3108                                 folio_unlock(folio);
3109                                 continue;
3110                         }
3111
3112                         if (!folio_test_dirty(folio)) {
3113                                 /* someone wrote it for us */
3114                                 goto continue_unlock;
3115                         }
3116
3117                         if (folio_test_writeback(folio)) {
3118                                 if (wbc->sync_mode == WB_SYNC_NONE)
3119                                         goto continue_unlock;
3120                                 f2fs_wait_on_page_writeback(&folio->page, DATA, true, true);
3121                         }
3122
3123                         if (!folio_clear_dirty_for_io(folio))
3124                                 goto continue_unlock;
3125
3126 #ifdef CONFIG_F2FS_FS_COMPRESSION
3127                         if (f2fs_compressed_file(inode)) {
3128                                 folio_get(folio);
3129                                 f2fs_compress_ctx_add_page(&cc, &folio->page);
3130                                 continue;
3131                         }
3132 #endif
3133                         ret = f2fs_write_single_data_page(&folio->page,
3134                                         &submitted, &bio, &last_block,
3135                                         wbc, io_type, 0, true);
3136                         if (ret == AOP_WRITEPAGE_ACTIVATE)
3137                                 folio_unlock(folio);
3138 #ifdef CONFIG_F2FS_FS_COMPRESSION
3139 result:
3140 #endif
3141                         nwritten += submitted;
3142                         wbc->nr_to_write -= submitted;
3143
3144                         if (unlikely(ret)) {
3145                                 /*
3146                                  * keep nr_to_write, since vfs uses this to
3147                                  * get # of written pages.
3148                                  */
3149                                 if (ret == AOP_WRITEPAGE_ACTIVATE) {
3150                                         ret = 0;
3151                                         goto next;
3152                                 } else if (ret == -EAGAIN) {
3153                                         ret = 0;
3154                                         if (wbc->sync_mode == WB_SYNC_ALL) {
3155                                                 f2fs_io_schedule_timeout(
3156                                                         DEFAULT_IO_TIMEOUT);
3157                                                 goto retry_write;
3158                                         }
3159                                         goto next;
3160                                 }
3161                                 done_index = folio_next_index(folio);
3162                                 done = 1;
3163                                 break;
3164                         }
3165
3166                         if (wbc->nr_to_write <= 0 &&
3167                                         wbc->sync_mode == WB_SYNC_NONE) {
3168                                 done = 1;
3169                                 break;
3170                         }
3171 next:
3172                         if (need_readd)
3173                                 goto readd;
3174                 }
3175                 release_pages(pages, nr_pages);
3176                 cond_resched();
3177         }
3178 #ifdef CONFIG_F2FS_FS_COMPRESSION
3179         /* flush remained pages in compress cluster */
3180         if (f2fs_compressed_file(inode) && !f2fs_cluster_is_empty(&cc)) {
3181                 ret = f2fs_write_multi_pages(&cc, &submitted, wbc, io_type);
3182                 nwritten += submitted;
3183                 wbc->nr_to_write -= submitted;
3184                 if (ret) {
3185                         done = 1;
3186                         retry = 0;
3187                 }
3188         }
3189         if (f2fs_compressed_file(inode))
3190                 f2fs_destroy_compress_ctx(&cc, false);
3191 #endif
3192         if (retry) {
3193                 index = 0;
3194                 end = -1;
3195                 goto retry;
3196         }
3197         if (wbc->range_cyclic && !done)
3198                 done_index = 0;
3199         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
3200                 mapping->writeback_index = done_index;
3201
3202         if (nwritten)
3203                 f2fs_submit_merged_write_cond(F2FS_M_SB(mapping), mapping->host,
3204                                                                 NULL, 0, DATA);
3205         /* submit cached bio of IPU write */
3206         if (bio)
3207                 f2fs_submit_merged_ipu_write(sbi, &bio, NULL);
3208
3209 #ifdef CONFIG_F2FS_FS_COMPRESSION
3210         if (pages != pages_local)
3211                 kfree(pages);
3212 #endif
3213
3214         return ret;
3215 }
3216
3217 static inline bool __should_serialize_io(struct inode *inode,
3218                                         struct writeback_control *wbc)
3219 {
3220         /* to avoid deadlock in path of data flush */
3221         if (F2FS_I(inode)->wb_task)
3222                 return false;
3223
3224         if (!S_ISREG(inode->i_mode))
3225                 return false;
3226         if (IS_NOQUOTA(inode))
3227                 return false;
3228
3229         if (f2fs_need_compress_data(inode))
3230                 return true;
3231         if (wbc->sync_mode != WB_SYNC_ALL)
3232                 return true;
3233         if (get_dirty_pages(inode) >= SM_I(F2FS_I_SB(inode))->min_seq_blocks)
3234                 return true;
3235         return false;
3236 }
3237
3238 static int __f2fs_write_data_pages(struct address_space *mapping,
3239                                                 struct writeback_control *wbc,
3240                                                 enum iostat_type io_type)
3241 {
3242         struct inode *inode = mapping->host;
3243         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3244         struct blk_plug plug;
3245         int ret;
3246         bool locked = false;
3247
3248         /* deal with chardevs and other special file */
3249         if (!mapping->a_ops->writepage)
3250                 return 0;
3251
3252         /* skip writing if there is no dirty page in this inode */
3253         if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
3254                 return 0;
3255
3256         /* during POR, we don't need to trigger writepage at all. */
3257         if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
3258                 goto skip_write;
3259
3260         if ((S_ISDIR(inode->i_mode) || IS_NOQUOTA(inode)) &&
3261                         wbc->sync_mode == WB_SYNC_NONE &&
3262                         get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
3263                         f2fs_available_free_memory(sbi, DIRTY_DENTS))
3264                 goto skip_write;
3265
3266         /* skip writing in file defragment preparing stage */
3267         if (is_inode_flag_set(inode, FI_SKIP_WRITES))
3268                 goto skip_write;
3269
3270         trace_f2fs_writepages(mapping->host, wbc, DATA);
3271
3272         /* to avoid spliting IOs due to mixed WB_SYNC_ALL and WB_SYNC_NONE */
3273         if (wbc->sync_mode == WB_SYNC_ALL)
3274                 atomic_inc(&sbi->wb_sync_req[DATA]);
3275         else if (atomic_read(&sbi->wb_sync_req[DATA])) {
3276                 /* to avoid potential deadlock */
3277                 if (current->plug)
3278                         blk_finish_plug(current->plug);
3279                 goto skip_write;
3280         }
3281
3282         if (__should_serialize_io(inode, wbc)) {
3283                 mutex_lock(&sbi->writepages);
3284                 locked = true;
3285         }
3286
3287         blk_start_plug(&plug);
3288         ret = f2fs_write_cache_pages(mapping, wbc, io_type);
3289         blk_finish_plug(&plug);
3290
3291         if (locked)
3292                 mutex_unlock(&sbi->writepages);
3293
3294         if (wbc->sync_mode == WB_SYNC_ALL)
3295                 atomic_dec(&sbi->wb_sync_req[DATA]);
3296         /*
3297          * if some pages were truncated, we cannot guarantee its mapping->host
3298          * to detect pending bios.
3299          */
3300
3301         f2fs_remove_dirty_inode(inode);
3302         return ret;
3303
3304 skip_write:
3305         wbc->pages_skipped += get_dirty_pages(inode);
3306         trace_f2fs_writepages(mapping->host, wbc, DATA);
3307         return 0;
3308 }
3309
3310 static int f2fs_write_data_pages(struct address_space *mapping,
3311                             struct writeback_control *wbc)
3312 {
3313         struct inode *inode = mapping->host;
3314
3315         return __f2fs_write_data_pages(mapping, wbc,
3316                         F2FS_I(inode)->cp_task == current ?
3317                         FS_CP_DATA_IO : FS_DATA_IO);
3318 }
3319
3320 void f2fs_write_failed(struct inode *inode, loff_t to)
3321 {
3322         loff_t i_size = i_size_read(inode);
3323
3324         if (IS_NOQUOTA(inode))
3325                 return;
3326
3327         /* In the fs-verity case, f2fs_end_enable_verity() does the truncate */
3328         if (to > i_size && !f2fs_verity_in_progress(inode)) {
3329                 f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3330                 filemap_invalidate_lock(inode->i_mapping);
3331
3332                 truncate_pagecache(inode, i_size);
3333                 f2fs_truncate_blocks(inode, i_size, true);
3334
3335                 filemap_invalidate_unlock(inode->i_mapping);
3336                 f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3337         }
3338 }
3339
3340 static int prepare_write_begin(struct f2fs_sb_info *sbi,
3341                         struct page *page, loff_t pos, unsigned len,
3342                         block_t *blk_addr, bool *node_changed)
3343 {
3344         struct inode *inode = page->mapping->host;
3345         pgoff_t index = page->index;
3346         struct dnode_of_data dn;
3347         struct page *ipage;
3348         bool locked = false;
3349         int flag = F2FS_GET_BLOCK_PRE_AIO;
3350         int err = 0;
3351
3352         /*
3353          * If a whole page is being written and we already preallocated all the
3354          * blocks, then there is no need to get a block address now.
3355          */
3356         if (len == PAGE_SIZE && is_inode_flag_set(inode, FI_PREALLOCATED_ALL))
3357                 return 0;
3358
3359         /* f2fs_lock_op avoids race between write CP and convert_inline_page */
3360         if (f2fs_has_inline_data(inode)) {
3361                 if (pos + len > MAX_INLINE_DATA(inode))
3362                         flag = F2FS_GET_BLOCK_DEFAULT;
3363                 f2fs_map_lock(sbi, flag);
3364                 locked = true;
3365         } else if ((pos & PAGE_MASK) >= i_size_read(inode)) {
3366                 f2fs_map_lock(sbi, flag);
3367                 locked = true;
3368         }
3369
3370 restart:
3371         /* check inline_data */
3372         ipage = f2fs_get_node_page(sbi, inode->i_ino);
3373         if (IS_ERR(ipage)) {
3374                 err = PTR_ERR(ipage);
3375                 goto unlock_out;
3376         }
3377
3378         set_new_dnode(&dn, inode, ipage, ipage, 0);
3379
3380         if (f2fs_has_inline_data(inode)) {
3381                 if (pos + len <= MAX_INLINE_DATA(inode)) {
3382                         f2fs_do_read_inline_data(page, ipage);
3383                         set_inode_flag(inode, FI_DATA_EXIST);
3384                         if (inode->i_nlink)
3385                                 set_page_private_inline(ipage);
3386                         goto out;
3387                 }
3388                 err = f2fs_convert_inline_page(&dn, page);
3389                 if (err || dn.data_blkaddr != NULL_ADDR)
3390                         goto out;
3391         }
3392
3393         if (!f2fs_lookup_read_extent_cache_block(inode, index,
3394                                                  &dn.data_blkaddr)) {
3395                 if (locked) {
3396                         err = f2fs_reserve_block(&dn, index);
3397                         goto out;
3398                 }
3399
3400                 /* hole case */
3401                 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
3402                 if (!err && dn.data_blkaddr != NULL_ADDR)
3403                         goto out;
3404                 f2fs_put_dnode(&dn);
3405                 f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO);
3406                 WARN_ON(flag != F2FS_GET_BLOCK_PRE_AIO);
3407                 locked = true;
3408                 goto restart;
3409         }
3410 out:
3411         if (!err) {
3412                 /* convert_inline_page can make node_changed */
3413                 *blk_addr = dn.data_blkaddr;
3414                 *node_changed = dn.node_changed;
3415         }
3416         f2fs_put_dnode(&dn);
3417 unlock_out:
3418         if (locked)
3419                 f2fs_map_unlock(sbi, flag);
3420         return err;
3421 }
3422
3423 static int __find_data_block(struct inode *inode, pgoff_t index,
3424                                 block_t *blk_addr)
3425 {
3426         struct dnode_of_data dn;
3427         struct page *ipage;
3428         int err = 0;
3429
3430         ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
3431         if (IS_ERR(ipage))
3432                 return PTR_ERR(ipage);
3433
3434         set_new_dnode(&dn, inode, ipage, ipage, 0);
3435
3436         if (!f2fs_lookup_read_extent_cache_block(inode, index,
3437                                                  &dn.data_blkaddr)) {
3438                 /* hole case */
3439                 err = f2fs_get_dnode_of_data(&dn, index, LOOKUP_NODE);
3440                 if (err) {
3441                         dn.data_blkaddr = NULL_ADDR;
3442                         err = 0;
3443                 }
3444         }
3445         *blk_addr = dn.data_blkaddr;
3446         f2fs_put_dnode(&dn);
3447         return err;
3448 }
3449
3450 static int __reserve_data_block(struct inode *inode, pgoff_t index,
3451                                 block_t *blk_addr, bool *node_changed)
3452 {
3453         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3454         struct dnode_of_data dn;
3455         struct page *ipage;
3456         int err = 0;
3457
3458         f2fs_map_lock(sbi, F2FS_GET_BLOCK_PRE_AIO);
3459
3460         ipage = f2fs_get_node_page(sbi, inode->i_ino);
3461         if (IS_ERR(ipage)) {
3462                 err = PTR_ERR(ipage);
3463                 goto unlock_out;
3464         }
3465         set_new_dnode(&dn, inode, ipage, ipage, 0);
3466
3467         if (!f2fs_lookup_read_extent_cache_block(dn.inode, index,
3468                                                 &dn.data_blkaddr))
3469                 err = f2fs_reserve_block(&dn, index);
3470
3471         *blk_addr = dn.data_blkaddr;
3472         *node_changed = dn.node_changed;
3473         f2fs_put_dnode(&dn);
3474
3475 unlock_out:
3476         f2fs_map_unlock(sbi, F2FS_GET_BLOCK_PRE_AIO);
3477         return err;
3478 }
3479
3480 static int prepare_atomic_write_begin(struct f2fs_sb_info *sbi,
3481                         struct page *page, loff_t pos, unsigned int len,
3482                         block_t *blk_addr, bool *node_changed, bool *use_cow)
3483 {
3484         struct inode *inode = page->mapping->host;
3485         struct inode *cow_inode = F2FS_I(inode)->cow_inode;
3486         pgoff_t index = page->index;
3487         int err = 0;
3488         block_t ori_blk_addr = NULL_ADDR;
3489
3490         /* If pos is beyond the end of file, reserve a new block in COW inode */
3491         if ((pos & PAGE_MASK) >= i_size_read(inode))
3492                 goto reserve_block;
3493
3494         /* Look for the block in COW inode first */
3495         err = __find_data_block(cow_inode, index, blk_addr);
3496         if (err) {
3497                 return err;
3498         } else if (*blk_addr != NULL_ADDR) {
3499                 *use_cow = true;
3500                 return 0;
3501         }
3502
3503         if (is_inode_flag_set(inode, FI_ATOMIC_REPLACE))
3504                 goto reserve_block;
3505
3506         /* Look for the block in the original inode */
3507         err = __find_data_block(inode, index, &ori_blk_addr);
3508         if (err)
3509                 return err;
3510
3511 reserve_block:
3512         /* Finally, we should reserve a new block in COW inode for the update */
3513         err = __reserve_data_block(cow_inode, index, blk_addr, node_changed);
3514         if (err)
3515                 return err;
3516         inc_atomic_write_cnt(inode);
3517
3518         if (ori_blk_addr != NULL_ADDR)
3519                 *blk_addr = ori_blk_addr;
3520         return 0;
3521 }
3522
3523 static int f2fs_write_begin(struct file *file, struct address_space *mapping,
3524                 loff_t pos, unsigned len, struct page **pagep, void **fsdata)
3525 {
3526         struct inode *inode = mapping->host;
3527         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3528         struct page *page = NULL;
3529         pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
3530         bool need_balance = false;
3531         bool use_cow = false;
3532         block_t blkaddr = NULL_ADDR;
3533         int err = 0;
3534
3535         trace_f2fs_write_begin(inode, pos, len);
3536
3537         if (!f2fs_is_checkpoint_ready(sbi)) {
3538                 err = -ENOSPC;
3539                 goto fail;
3540         }
3541
3542         /*
3543          * We should check this at this moment to avoid deadlock on inode page
3544          * and #0 page. The locking rule for inline_data conversion should be:
3545          * lock_page(page #0) -> lock_page(inode_page)
3546          */
3547         if (index != 0) {
3548                 err = f2fs_convert_inline_inode(inode);
3549                 if (err)
3550                         goto fail;
3551         }
3552
3553 #ifdef CONFIG_F2FS_FS_COMPRESSION
3554         if (f2fs_compressed_file(inode)) {
3555                 int ret;
3556
3557                 *fsdata = NULL;
3558
3559                 if (len == PAGE_SIZE && !(f2fs_is_atomic_file(inode)))
3560                         goto repeat;
3561
3562                 ret = f2fs_prepare_compress_overwrite(inode, pagep,
3563                                                         index, fsdata);
3564                 if (ret < 0) {
3565                         err = ret;
3566                         goto fail;
3567                 } else if (ret) {
3568                         return 0;
3569                 }
3570         }
3571 #endif
3572
3573 repeat:
3574         /*
3575          * Do not use grab_cache_page_write_begin() to avoid deadlock due to
3576          * wait_for_stable_page. Will wait that below with our IO control.
3577          */
3578         page = f2fs_pagecache_get_page(mapping, index,
3579                                 FGP_LOCK | FGP_WRITE | FGP_CREAT, GFP_NOFS);
3580         if (!page) {
3581                 err = -ENOMEM;
3582                 goto fail;
3583         }
3584
3585         /* TODO: cluster can be compressed due to race with .writepage */
3586
3587         *pagep = page;
3588
3589         if (f2fs_is_atomic_file(inode))
3590                 err = prepare_atomic_write_begin(sbi, page, pos, len,
3591                                         &blkaddr, &need_balance, &use_cow);
3592         else
3593                 err = prepare_write_begin(sbi, page, pos, len,
3594                                         &blkaddr, &need_balance);
3595         if (err)
3596                 goto fail;
3597
3598         if (need_balance && !IS_NOQUOTA(inode) &&
3599                         has_not_enough_free_secs(sbi, 0, 0)) {
3600                 unlock_page(page);
3601                 f2fs_balance_fs(sbi, true);
3602                 lock_page(page);
3603                 if (page->mapping != mapping) {
3604                         /* The page got truncated from under us */
3605                         f2fs_put_page(page, 1);
3606                         goto repeat;
3607                 }
3608         }
3609
3610         f2fs_wait_on_page_writeback(page, DATA, false, true);
3611
3612         if (len == PAGE_SIZE || PageUptodate(page))
3613                 return 0;
3614
3615         if (!(pos & (PAGE_SIZE - 1)) && (pos + len) >= i_size_read(inode) &&
3616             !f2fs_verity_in_progress(inode)) {
3617                 zero_user_segment(page, len, PAGE_SIZE);
3618                 return 0;
3619         }
3620
3621         if (blkaddr == NEW_ADDR) {
3622                 zero_user_segment(page, 0, PAGE_SIZE);
3623                 SetPageUptodate(page);
3624         } else {
3625                 if (!f2fs_is_valid_blkaddr(sbi, blkaddr,
3626                                 DATA_GENERIC_ENHANCE_READ)) {
3627                         err = -EFSCORRUPTED;
3628                         goto fail;
3629                 }
3630                 err = f2fs_submit_page_read(use_cow ?
3631                                 F2FS_I(inode)->cow_inode : inode, page,
3632                                 blkaddr, 0, true);
3633                 if (err)
3634                         goto fail;
3635
3636                 lock_page(page);
3637                 if (unlikely(page->mapping != mapping)) {
3638                         f2fs_put_page(page, 1);
3639                         goto repeat;
3640                 }
3641                 if (unlikely(!PageUptodate(page))) {
3642                         err = -EIO;
3643                         goto fail;
3644                 }
3645         }
3646         return 0;
3647
3648 fail:
3649         f2fs_put_page(page, 1);
3650         f2fs_write_failed(inode, pos + len);
3651         return err;
3652 }
3653
3654 static int f2fs_write_end(struct file *file,
3655                         struct address_space *mapping,
3656                         loff_t pos, unsigned len, unsigned copied,
3657                         struct page *page, void *fsdata)
3658 {
3659         struct inode *inode = page->mapping->host;
3660
3661         trace_f2fs_write_end(inode, pos, len, copied);
3662
3663         /*
3664          * This should be come from len == PAGE_SIZE, and we expect copied
3665          * should be PAGE_SIZE. Otherwise, we treat it with zero copied and
3666          * let generic_perform_write() try to copy data again through copied=0.
3667          */
3668         if (!PageUptodate(page)) {
3669                 if (unlikely(copied != len))
3670                         copied = 0;
3671                 else
3672                         SetPageUptodate(page);
3673         }
3674
3675 #ifdef CONFIG_F2FS_FS_COMPRESSION
3676         /* overwrite compressed file */
3677         if (f2fs_compressed_file(inode) && fsdata) {
3678                 f2fs_compress_write_end(inode, fsdata, page->index, copied);
3679                 f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3680
3681                 if (pos + copied > i_size_read(inode) &&
3682                                 !f2fs_verity_in_progress(inode))
3683                         f2fs_i_size_write(inode, pos + copied);
3684                 return copied;
3685         }
3686 #endif
3687
3688         if (!copied)
3689                 goto unlock_out;
3690
3691         set_page_dirty(page);
3692
3693         if (pos + copied > i_size_read(inode) &&
3694             !f2fs_verity_in_progress(inode)) {
3695                 f2fs_i_size_write(inode, pos + copied);
3696                 if (f2fs_is_atomic_file(inode))
3697                         f2fs_i_size_write(F2FS_I(inode)->cow_inode,
3698                                         pos + copied);
3699         }
3700 unlock_out:
3701         f2fs_put_page(page, 1);
3702         f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
3703         return copied;
3704 }
3705
3706 void f2fs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
3707 {
3708         struct inode *inode = folio->mapping->host;
3709         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3710
3711         if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
3712                                 (offset || length != folio_size(folio)))
3713                 return;
3714
3715         if (folio_test_dirty(folio)) {
3716                 if (inode->i_ino == F2FS_META_INO(sbi)) {
3717                         dec_page_count(sbi, F2FS_DIRTY_META);
3718                 } else if (inode->i_ino == F2FS_NODE_INO(sbi)) {
3719                         dec_page_count(sbi, F2FS_DIRTY_NODES);
3720                 } else {
3721                         inode_dec_dirty_pages(inode);
3722                         f2fs_remove_dirty_inode(inode);
3723                 }
3724         }
3725         clear_page_private_all(&folio->page);
3726 }
3727
3728 bool f2fs_release_folio(struct folio *folio, gfp_t wait)
3729 {
3730         /* If this is dirty folio, keep private data */
3731         if (folio_test_dirty(folio))
3732                 return false;
3733
3734         clear_page_private_all(&folio->page);
3735         return true;
3736 }
3737
3738 static bool f2fs_dirty_data_folio(struct address_space *mapping,
3739                 struct folio *folio)
3740 {
3741         struct inode *inode = mapping->host;
3742
3743         trace_f2fs_set_page_dirty(&folio->page, DATA);
3744
3745         if (!folio_test_uptodate(folio))
3746                 folio_mark_uptodate(folio);
3747         BUG_ON(folio_test_swapcache(folio));
3748
3749         if (filemap_dirty_folio(mapping, folio)) {
3750                 f2fs_update_dirty_folio(inode, folio);
3751                 return true;
3752         }
3753         return false;
3754 }
3755
3756
3757 static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block)
3758 {
3759 #ifdef CONFIG_F2FS_FS_COMPRESSION
3760         struct dnode_of_data dn;
3761         sector_t start_idx, blknr = 0;
3762         int ret;
3763
3764         start_idx = round_down(block, F2FS_I(inode)->i_cluster_size);
3765
3766         set_new_dnode(&dn, inode, NULL, NULL, 0);
3767         ret = f2fs_get_dnode_of_data(&dn, start_idx, LOOKUP_NODE);
3768         if (ret)
3769                 return 0;
3770
3771         if (dn.data_blkaddr != COMPRESS_ADDR) {
3772                 dn.ofs_in_node += block - start_idx;
3773                 blknr = f2fs_data_blkaddr(&dn);
3774                 if (!__is_valid_data_blkaddr(blknr))
3775                         blknr = 0;
3776         }
3777
3778         f2fs_put_dnode(&dn);
3779         return blknr;
3780 #else
3781         return 0;
3782 #endif
3783 }
3784
3785
3786 static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
3787 {
3788         struct inode *inode = mapping->host;
3789         sector_t blknr = 0;
3790
3791         if (f2fs_has_inline_data(inode))
3792                 goto out;
3793
3794         /* make sure allocating whole blocks */
3795         if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
3796                 filemap_write_and_wait(mapping);
3797
3798         /* Block number less than F2FS MAX BLOCKS */
3799         if (unlikely(block >= max_file_blocks(inode)))
3800                 goto out;
3801
3802         if (f2fs_compressed_file(inode)) {
3803                 blknr = f2fs_bmap_compress(inode, block);
3804         } else {
3805                 struct f2fs_map_blocks map;
3806
3807                 memset(&map, 0, sizeof(map));
3808                 map.m_lblk = block;
3809                 map.m_len = 1;
3810                 map.m_next_pgofs = NULL;
3811                 map.m_seg_type = NO_CHECK_TYPE;
3812
3813                 if (!f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_BMAP))
3814                         blknr = map.m_pblk;
3815         }
3816 out:
3817         trace_f2fs_bmap(inode, block, blknr);
3818         return blknr;
3819 }
3820
3821 #ifdef CONFIG_SWAP
3822 static int f2fs_migrate_blocks(struct inode *inode, block_t start_blk,
3823                                                         unsigned int blkcnt)
3824 {
3825         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3826         unsigned int blkofs;
3827         unsigned int blk_per_sec = BLKS_PER_SEC(sbi);
3828         unsigned int end_blk = start_blk + blkcnt - 1;
3829         unsigned int secidx = start_blk / blk_per_sec;
3830         unsigned int end_sec;
3831         int ret = 0;
3832
3833         if (!blkcnt)
3834                 return 0;
3835         end_sec = end_blk / blk_per_sec;
3836
3837         f2fs_down_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3838         filemap_invalidate_lock(inode->i_mapping);
3839
3840         set_inode_flag(inode, FI_ALIGNED_WRITE);
3841         set_inode_flag(inode, FI_OPU_WRITE);
3842
3843         for (; secidx <= end_sec; secidx++) {
3844                 unsigned int blkofs_end = secidx == end_sec ?
3845                                 end_blk % blk_per_sec : blk_per_sec - 1;
3846
3847                 f2fs_down_write(&sbi->pin_sem);
3848
3849                 ret = f2fs_allocate_pinning_section(sbi);
3850                 if (ret) {
3851                         f2fs_up_write(&sbi->pin_sem);
3852                         break;
3853                 }
3854
3855                 set_inode_flag(inode, FI_SKIP_WRITES);
3856
3857                 for (blkofs = 0; blkofs <= blkofs_end; blkofs++) {
3858                         struct page *page;
3859                         unsigned int blkidx = secidx * blk_per_sec + blkofs;
3860
3861                         page = f2fs_get_lock_data_page(inode, blkidx, true);
3862                         if (IS_ERR(page)) {
3863                                 f2fs_up_write(&sbi->pin_sem);
3864                                 ret = PTR_ERR(page);
3865                                 goto done;
3866                         }
3867
3868                         set_page_dirty(page);
3869                         f2fs_put_page(page, 1);
3870                 }
3871
3872                 clear_inode_flag(inode, FI_SKIP_WRITES);
3873
3874                 ret = filemap_fdatawrite(inode->i_mapping);
3875
3876                 f2fs_up_write(&sbi->pin_sem);
3877
3878                 if (ret)
3879                         break;
3880         }
3881
3882 done:
3883         clear_inode_flag(inode, FI_SKIP_WRITES);
3884         clear_inode_flag(inode, FI_OPU_WRITE);
3885         clear_inode_flag(inode, FI_ALIGNED_WRITE);
3886
3887         filemap_invalidate_unlock(inode->i_mapping);
3888         f2fs_up_write(&F2FS_I(inode)->i_gc_rwsem[WRITE]);
3889
3890         return ret;
3891 }
3892
3893 static int check_swap_activate(struct swap_info_struct *sis,
3894                                 struct file *swap_file, sector_t *span)
3895 {
3896         struct address_space *mapping = swap_file->f_mapping;
3897         struct inode *inode = mapping->host;
3898         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3899         sector_t cur_lblock;
3900         sector_t last_lblock;
3901         sector_t pblock;
3902         sector_t lowest_pblock = -1;
3903         sector_t highest_pblock = 0;
3904         int nr_extents = 0;
3905         unsigned long nr_pblocks;
3906         unsigned int blks_per_sec = BLKS_PER_SEC(sbi);
3907         unsigned int sec_blks_mask = BLKS_PER_SEC(sbi) - 1;
3908         unsigned int not_aligned = 0;
3909         int ret = 0;
3910
3911         /*
3912          * Map all the blocks into the extent list.  This code doesn't try
3913          * to be very smart.
3914          */
3915         cur_lblock = 0;
3916         last_lblock = bytes_to_blks(inode, i_size_read(inode));
3917
3918         while (cur_lblock < last_lblock && cur_lblock < sis->max) {
3919                 struct f2fs_map_blocks map;
3920 retry:
3921                 cond_resched();
3922
3923                 memset(&map, 0, sizeof(map));
3924                 map.m_lblk = cur_lblock;
3925                 map.m_len = last_lblock - cur_lblock;
3926                 map.m_next_pgofs = NULL;
3927                 map.m_next_extent = NULL;
3928                 map.m_seg_type = NO_CHECK_TYPE;
3929                 map.m_may_create = false;
3930
3931                 ret = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_FIEMAP);
3932                 if (ret)
3933                         goto out;
3934
3935                 /* hole */
3936                 if (!(map.m_flags & F2FS_MAP_FLAGS)) {
3937                         f2fs_err(sbi, "Swapfile has holes");
3938                         ret = -EINVAL;
3939                         goto out;
3940                 }
3941
3942                 pblock = map.m_pblk;
3943                 nr_pblocks = map.m_len;
3944
3945                 if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask ||
3946                                 nr_pblocks & sec_blks_mask ||
3947                                 !f2fs_valid_pinned_area(sbi, pblock)) {
3948                         bool last_extent = false;
3949
3950                         not_aligned++;
3951
3952                         nr_pblocks = roundup(nr_pblocks, blks_per_sec);
3953                         if (cur_lblock + nr_pblocks > sis->max)
3954                                 nr_pblocks -= blks_per_sec;
3955
3956                         /* this extent is last one */
3957                         if (!nr_pblocks) {
3958                                 nr_pblocks = last_lblock - cur_lblock;
3959                                 last_extent = true;
3960                         }
3961
3962                         ret = f2fs_migrate_blocks(inode, cur_lblock,
3963                                                         nr_pblocks);
3964                         if (ret) {
3965                                 if (ret == -ENOENT)
3966                                         ret = -EINVAL;
3967                                 goto out;
3968                         }
3969
3970                         if (!last_extent)
3971                                 goto retry;
3972                 }
3973
3974                 if (cur_lblock + nr_pblocks >= sis->max)
3975                         nr_pblocks = sis->max - cur_lblock;
3976
3977                 if (cur_lblock) {       /* exclude the header page */
3978                         if (pblock < lowest_pblock)
3979                                 lowest_pblock = pblock;
3980                         if (pblock + nr_pblocks - 1 > highest_pblock)
3981                                 highest_pblock = pblock + nr_pblocks - 1;
3982                 }
3983
3984                 /*
3985                  * We found a PAGE_SIZE-length, PAGE_SIZE-aligned run of blocks
3986                  */
3987                 ret = add_swap_extent(sis, cur_lblock, nr_pblocks, pblock);
3988                 if (ret < 0)
3989                         goto out;
3990                 nr_extents += ret;
3991                 cur_lblock += nr_pblocks;
3992         }
3993         ret = nr_extents;
3994         *span = 1 + highest_pblock - lowest_pblock;
3995         if (cur_lblock == 0)
3996                 cur_lblock = 1; /* force Empty message */
3997         sis->max = cur_lblock;
3998         sis->pages = cur_lblock - 1;
3999         sis->highest_bit = cur_lblock - 1;
4000 out:
4001         if (not_aligned)
4002                 f2fs_warn(sbi, "Swapfile (%u) is not align to section: 1) creat(), 2) ioctl(F2FS_IOC_SET_PIN_FILE), 3) fallocate(%lu * N)",
4003                           not_aligned, blks_per_sec * F2FS_BLKSIZE);
4004         return ret;
4005 }
4006
4007 static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
4008                                 sector_t *span)
4009 {
4010         struct inode *inode = file_inode(file);
4011         struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
4012         int ret;
4013
4014         if (!S_ISREG(inode->i_mode))
4015                 return -EINVAL;
4016
4017         if (f2fs_readonly(sbi->sb))
4018                 return -EROFS;
4019
4020         if (f2fs_lfs_mode(sbi) && !f2fs_sb_has_blkzoned(sbi)) {
4021                 f2fs_err(sbi, "Swapfile not supported in LFS mode");
4022                 return -EINVAL;
4023         }
4024
4025         ret = f2fs_convert_inline_inode(inode);
4026         if (ret)
4027                 return ret;
4028
4029         if (!f2fs_disable_compressed_file(inode))
4030                 return -EINVAL;
4031
4032         ret = filemap_fdatawrite(inode->i_mapping);
4033         if (ret < 0)
4034                 return ret;
4035
4036         f2fs_precache_extents(inode);
4037
4038         ret = check_swap_activate(sis, file, span);
4039         if (ret < 0)
4040                 return ret;
4041
4042         stat_inc_swapfile_inode(inode);
4043         set_inode_flag(inode, FI_PIN_FILE);
4044         f2fs_update_time(sbi, REQ_TIME);
4045         return ret;
4046 }
4047
4048 static void f2fs_swap_deactivate(struct file *file)
4049 {
4050         struct inode *inode = file_inode(file);
4051
4052         stat_dec_swapfile_inode(inode);
4053         clear_inode_flag(inode, FI_PIN_FILE);
4054 }
4055 #else
4056 static int f2fs_swap_activate(struct swap_info_struct *sis, struct file *file,
4057                                 sector_t *span)
4058 {
4059         return -EOPNOTSUPP;
4060 }
4061
4062 static void f2fs_swap_deactivate(struct file *file)
4063 {
4064 }
4065 #endif
4066
4067 const struct address_space_operations f2fs_dblock_aops = {
4068         .read_folio     = f2fs_read_data_folio,
4069         .readahead      = f2fs_readahead,
4070         .writepage      = f2fs_write_data_page,
4071         .writepages     = f2fs_write_data_pages,
4072         .write_begin    = f2fs_write_begin,
4073         .write_end      = f2fs_write_end,
4074         .dirty_folio    = f2fs_dirty_data_folio,
4075         .migrate_folio  = filemap_migrate_folio,
4076         .invalidate_folio = f2fs_invalidate_folio,
4077         .release_folio  = f2fs_release_folio,
4078         .bmap           = f2fs_bmap,
4079         .swap_activate  = f2fs_swap_activate,
4080         .swap_deactivate = f2fs_swap_deactivate,
4081 };
4082
4083 void f2fs_clear_page_cache_dirty_tag(struct page *page)
4084 {
4085         struct address_space *mapping = page_mapping(page);
4086         unsigned long flags;
4087
4088         xa_lock_irqsave(&mapping->i_pages, flags);
4089         __xa_clear_mark(&mapping->i_pages, page_index(page),
4090                                                 PAGECACHE_TAG_DIRTY);
4091         xa_unlock_irqrestore(&mapping->i_pages, flags);
4092 }
4093
4094 int __init f2fs_init_post_read_processing(void)
4095 {
4096         bio_post_read_ctx_cache =
4097                 kmem_cache_create("f2fs_bio_post_read_ctx",
4098                                   sizeof(struct bio_post_read_ctx), 0, 0, NULL);
4099         if (!bio_post_read_ctx_cache)
4100                 goto fail;
4101         bio_post_read_ctx_pool =
4102                 mempool_create_slab_pool(NUM_PREALLOC_POST_READ_CTXS,
4103                                          bio_post_read_ctx_cache);
4104         if (!bio_post_read_ctx_pool)
4105                 goto fail_free_cache;
4106         return 0;
4107
4108 fail_free_cache:
4109         kmem_cache_destroy(bio_post_read_ctx_cache);
4110 fail:
4111         return -ENOMEM;
4112 }
4113
4114 void f2fs_destroy_post_read_processing(void)
4115 {
4116         mempool_destroy(bio_post_read_ctx_pool);
4117         kmem_cache_destroy(bio_post_read_ctx_cache);
4118 }
4119
4120 int f2fs_init_post_read_wq(struct f2fs_sb_info *sbi)
4121 {
4122         if (!f2fs_sb_has_encrypt(sbi) &&
4123                 !f2fs_sb_has_verity(sbi) &&
4124                 !f2fs_sb_has_compression(sbi))
4125                 return 0;
4126
4127         sbi->post_read_wq = alloc_workqueue("f2fs_post_read_wq",
4128                                                  WQ_UNBOUND | WQ_HIGHPRI,
4129                                                  num_online_cpus());
4130         return sbi->post_read_wq ? 0 : -ENOMEM;
4131 }
4132
4133 void f2fs_destroy_post_read_wq(struct f2fs_sb_info *sbi)
4134 {
4135         if (sbi->post_read_wq)
4136                 destroy_workqueue(sbi->post_read_wq);
4137 }
4138
4139 int __init f2fs_init_bio_entry_cache(void)
4140 {
4141         bio_entry_slab = f2fs_kmem_cache_create("f2fs_bio_entry_slab",
4142                         sizeof(struct bio_entry));
4143         return bio_entry_slab ? 0 : -ENOMEM;
4144 }
4145
4146 void f2fs_destroy_bio_entry_cache(void)
4147 {
4148         kmem_cache_destroy(bio_entry_slab);
4149 }
4150
4151 static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
4152                             unsigned int flags, struct iomap *iomap,
4153                             struct iomap *srcmap)
4154 {
4155         struct f2fs_map_blocks map = {};
4156         pgoff_t next_pgofs = 0;
4157         int err;
4158
4159         map.m_lblk = bytes_to_blks(inode, offset);
4160         map.m_len = bytes_to_blks(inode, offset + length - 1) - map.m_lblk + 1;
4161         map.m_next_pgofs = &next_pgofs;
4162         map.m_seg_type = f2fs_rw_hint_to_seg_type(inode->i_write_hint);
4163         if (flags & IOMAP_WRITE)
4164                 map.m_may_create = true;
4165
4166         err = f2fs_map_blocks(inode, &map, F2FS_GET_BLOCK_DIO);
4167         if (err)
4168                 return err;
4169
4170         iomap->offset = blks_to_bytes(inode, map.m_lblk);
4171
4172         /*
4173          * When inline encryption is enabled, sometimes I/O to an encrypted file
4174          * has to be broken up to guarantee DUN contiguity.  Handle this by
4175          * limiting the length of the mapping returned.
4176          */
4177         map.m_len = fscrypt_limit_io_blocks(inode, map.m_lblk, map.m_len);
4178
4179         /*
4180          * We should never see delalloc or compressed extents here based on
4181          * prior flushing and checks.
4182          */
4183         if (WARN_ON_ONCE(map.m_pblk == NEW_ADDR))
4184                 return -EINVAL;
4185         if (WARN_ON_ONCE(map.m_pblk == COMPRESS_ADDR))
4186                 return -EINVAL;
4187
4188         if (map.m_pblk != NULL_ADDR) {
4189                 iomap->length = blks_to_bytes(inode, map.m_len);
4190                 iomap->type = IOMAP_MAPPED;
4191                 iomap->flags |= IOMAP_F_MERGED;
4192                 iomap->bdev = map.m_bdev;
4193                 iomap->addr = blks_to_bytes(inode, map.m_pblk);
4194         } else {
4195                 if (flags & IOMAP_WRITE)
4196                         return -ENOTBLK;
4197                 iomap->length = blks_to_bytes(inode, next_pgofs) -
4198                                 iomap->offset;
4199                 iomap->type = IOMAP_HOLE;
4200                 iomap->addr = IOMAP_NULL_ADDR;
4201         }
4202
4203         if (map.m_flags & F2FS_MAP_NEW)
4204                 iomap->flags |= IOMAP_F_NEW;
4205         if ((inode->i_state & I_DIRTY_DATASYNC) ||
4206             offset + length > i_size_read(inode))
4207                 iomap->flags |= IOMAP_F_DIRTY;
4208
4209         return 0;
4210 }
4211
4212 const struct iomap_ops f2fs_iomap_ops = {
4213         .iomap_begin    = f2fs_iomap_begin,
4214 };