include/linux/blk_types.h

   1 /* SPDX-License-Identifier: GPL-2.0 */
   2 /*
   3  * Block data types and constants.  Directly include this file only to
   4  * break include dependency loop.
   5  */
   6 #ifndef __LINUX_BLK_TYPES_H
   7 #define __LINUX_BLK_TYPES_H
   8
   9 #include <linux/types.h>
  10 #include <linux/bvec.h>
  11 #include <linux/ktime.h>
  12
  13 struct bio_set;
  14 struct bio;
  15 struct bio_integrity_payload;
  16 struct page;
  17 struct io_context;
  18 struct cgroup_subsys_state;
  19 typedef void (bio_end_io_t) (struct bio *);
  20 struct bio_crypt_ctx;
  21
  22 struct block_device {
  23         sector_t                bd_start_sect;
  24         struct disk_stats __percpu *bd_stats;
  25         unsigned long           bd_stamp;
  26         dev_t                   bd_dev;
  27         int                     bd_openers;
  28         struct inode *          bd_inode;       /* will die */
  29         struct super_block *    bd_super;
  30         struct mutex            bd_mutex;       /* open/close mutex */
  31         void *                  bd_claiming;
  32         void *                  bd_holder;
  33         int                     bd_holders;
  34         bool                    bd_write_holder;
  35 #ifdef CONFIG_SYSFS
  36         struct list_head        bd_holder_disks;
  37 #endif
  38         struct kobject          *bd_holder_dir;
  39         u8                      bd_partno;
  40         struct hd_struct *      bd_part;
  41         /* number of times partitions within this device have been opened. */
  42         unsigned                bd_part_count;
  43
  44         spinlock_t              bd_size_lock; /* for bd_inode->i_size updates */
  45         struct gendisk *        bd_disk;
  46         struct backing_dev_info *bd_bdi;
  47
  48         /* The counter of freeze processes */
  49         int                     bd_fsfreeze_count;
  50         /* Mutex for freeze */
  51         struct mutex            bd_fsfreeze_mutex;
  52         struct super_block      *bd_fsfreeze_sb;
  53
  54         struct partition_meta_info *bd_meta_info;
  55 } __randomize_layout;
  56
  57 #define bdev_whole(_bdev) \
  58         ((_bdev)->bd_disk->part0.bdev)
  59
  60 #define bdev_kobj(_bdev) \
  61         (&part_to_dev((_bdev)->bd_part)->kobj)
  62
  63 /*
  64  * Block error status values.  See block/blk-core:blk_errors for the details.
  65  * Alpha cannot write a byte atomically, so we need to use 32-bit value.
  66  */
  67 #if defined(CONFIG_ALPHA) && !defined(__alpha_bwx__)
  68 typedef u32 __bitwise blk_status_t;
  69 #else
  70 typedef u8 __bitwise blk_status_t;
  71 #endif
  72 #define BLK_STS_OK 0
  73 #define BLK_STS_NOTSUPP         ((__force blk_status_t)1)
  74 #define BLK_STS_TIMEOUT         ((__force blk_status_t)2)
  75 #define BLK_STS_NOSPC           ((__force blk_status_t)3)
  76 #define BLK_STS_TRANSPORT       ((__force blk_status_t)4)
  77 #define BLK_STS_TARGET          ((__force blk_status_t)5)
  78 #define BLK_STS_NEXUS           ((__force blk_status_t)6)
  79 #define BLK_STS_MEDIUM          ((__force blk_status_t)7)
  80 #define BLK_STS_PROTECTION      ((__force blk_status_t)8)
  81 #define BLK_STS_RESOURCE        ((__force blk_status_t)9)
  82 #define BLK_STS_IOERR           ((__force blk_status_t)10)
  83
  84 /* hack for device mapper, don't use elsewhere: */
  85 #define BLK_STS_DM_REQUEUE    ((__force blk_status_t)11)
  86
  87 #define BLK_STS_AGAIN           ((__force blk_status_t)12)
  88
  89 /*
  90  * BLK_STS_DEV_RESOURCE is returned from the driver to the block layer if
  91  * device related resources are unavailable, but the driver can guarantee
  92  * that the queue will be rerun in the future once resources become
  93  * available again. This is typically the case for device specific
  94  * resources that are consumed for IO. If the driver fails allocating these
  95  * resources, we know that inflight (or pending) IO will free these
  96  * resource upon completion.
  97  *
  98  * This is different from BLK_STS_RESOURCE in that it explicitly references
  99  * a device specific resource. For resources of wider scope, allocation
 100  * failure can happen without having pending IO. This means that we can't
 101  * rely on request completions freeing these resources, as IO may not be in
 102  * flight. Examples of that are kernel memory allocations, DMA mappings, or
 103  * any other system wide resources.
 104  */
 105 #define BLK_STS_DEV_RESOURCE    ((__force blk_status_t)13)
 106
 107 /*
 108  * BLK_STS_ZONE_RESOURCE is returned from the driver to the block layer if zone
 109  * related resources are unavailable, but the driver can guarantee the queue
 110  * will be rerun in the future once the resources become available again.
 111  *
 112  * This is different from BLK_STS_DEV_RESOURCE in that it explicitly references
 113  * a zone specific resource and IO to a different zone on the same device could
 114  * still be served. Examples of that are zones that are write-locked, but a read
 115  * to the same zone could be served.
 116  */
 117 #define BLK_STS_ZONE_RESOURCE   ((__force blk_status_t)14)
 118
 119 /*
 120  * BLK_STS_ZONE_OPEN_RESOURCE is returned from the driver in the completion
 121  * path if the device returns a status indicating that too many zone resources
 122  * are currently open. The same command should be successful if resubmitted
 123  * after the number of open zones decreases below the device's limits, which is
 124  * reported in the request_queue's max_open_zones.
 125  */
 126 #define BLK_STS_ZONE_OPEN_RESOURCE      ((__force blk_status_t)15)
 127
 128 /*
 129  * BLK_STS_ZONE_ACTIVE_RESOURCE is returned from the driver in the completion
 130  * path if the device returns a status indicating that too many zone resources
 131  * are currently active. The same command should be successful if resubmitted
 132  * after the number of active zones decreases below the device's limits, which
 133  * is reported in the request_queue's max_active_zones.
 134  */
 135 #define BLK_STS_ZONE_ACTIVE_RESOURCE    ((__force blk_status_t)16)
 136
 137 /**
 138  * blk_path_error - returns true if error may be path related
 139  * @error: status the request was completed with
 140  *
 141  * Description:
 142  *     This classifies block error status into non-retryable errors and ones
 143  *     that may be successful if retried on a failover path.
 144  *
 145  * Return:
 146  *     %false - retrying failover path will not help
 147  *     %true  - may succeed if retried
 148  */
 149 static inline bool blk_path_error(blk_status_t error)
 150 {
 151         switch (error) {
 152         case BLK_STS_NOTSUPP:
 153         case BLK_STS_NOSPC:
 154         case BLK_STS_TARGET:
 155         case BLK_STS_NEXUS:
 156         case BLK_STS_MEDIUM:
 157         case BLK_STS_PROTECTION:
 158                 return false;
 159         }
 160
 161         /* Anything else could be a path failure, so should be retried */
 162         return true;
 163 }
 164
 165 /*
 166  * From most significant bit:
 167  * 1 bit: reserved for other usage, see below
 168  * 12 bits: original size of bio
 169  * 51 bits: issue time of bio
 170  */
 171 #define BIO_ISSUE_RES_BITS      1
 172 #define BIO_ISSUE_SIZE_BITS     12
 173 #define BIO_ISSUE_RES_SHIFT     (64 - BIO_ISSUE_RES_BITS)
 174 #define BIO_ISSUE_SIZE_SHIFT    (BIO_ISSUE_RES_SHIFT - BIO_ISSUE_SIZE_BITS)
 175 #define BIO_ISSUE_TIME_MASK     ((1ULL << BIO_ISSUE_SIZE_SHIFT) - 1)
 176 #define BIO_ISSUE_SIZE_MASK     \
 177         (((1ULL << BIO_ISSUE_SIZE_BITS) - 1) << BIO_ISSUE_SIZE_SHIFT)
 178 #define BIO_ISSUE_RES_MASK      (~((1ULL << BIO_ISSUE_RES_SHIFT) - 1))
 179
 180 /* Reserved bit for blk-throtl */
 181 #define BIO_ISSUE_THROTL_SKIP_LATENCY (1ULL << 63)
 182
 183 struct bio_issue {
 184         u64 value;
 185 };
 186
 187 static inline u64 __bio_issue_time(u64 time)
 188 {
 189         return time & BIO_ISSUE_TIME_MASK;
 190 }
 191
 192 static inline u64 bio_issue_time(struct bio_issue *issue)
 193 {
 194         return __bio_issue_time(issue->value);
 195 }
 196
 197 static inline sector_t bio_issue_size(struct bio_issue *issue)
 198 {
 199         return ((issue->value & BIO_ISSUE_SIZE_MASK) >> BIO_ISSUE_SIZE_SHIFT);
 200 }
 201
 202 static inline void bio_issue_init(struct bio_issue *issue,
 203                                        sector_t size)
 204 {
 205         size &= (1ULL << BIO_ISSUE_SIZE_BITS) - 1;
 206         issue->value = ((issue->value & BIO_ISSUE_RES_MASK) |
 207                         (ktime_get_ns() & BIO_ISSUE_TIME_MASK) |
 208                         ((u64)size << BIO_ISSUE_SIZE_SHIFT));
 209 }
 210
 211 /*
 212  * main unit of I/O for the block layer and lower layers (ie drivers and
 213  * stacking drivers)
 214  */
 215 struct bio {
 216         struct bio              *bi_next;       /* request queue link */
 217         struct gendisk          *bi_disk;
 218         unsigned int            bi_opf;         /* bottom bits req flags,
 219                                                  * top bits REQ_OP. Use
 220                                                  * accessors.
 221                                                  */
 222         unsigned short          bi_flags;       /* status, etc and bvec pool number */
 223         unsigned short          bi_ioprio;
 224         unsigned short          bi_write_hint;
 225         blk_status_t            bi_status;
 226         u8                      bi_partno;
 227         atomic_t                __bi_remaining;
 228
 229         struct bvec_iter        bi_iter;
 230
 231         bio_end_io_t            *bi_end_io;
 232
 233         void                    *bi_private;
 234 #ifdef CONFIG_BLK_CGROUP
 235         /*
 236          * Represents the association of the css and request_queue for the bio.
 237          * If a bio goes direct to device, it will not have a blkg as it will
 238          * not have a request_queue associated with it.  The reference is put
 239          * on release of the bio.
 240          */
 241         struct blkcg_gq         *bi_blkg;
 242         struct bio_issue        bi_issue;
 243 #ifdef CONFIG_BLK_CGROUP_IOCOST
 244         u64                     bi_iocost_cost;
 245 #endif
 246 #endif
 247
 248 #ifdef CONFIG_BLK_INLINE_ENCRYPTION
 249         struct bio_crypt_ctx    *bi_crypt_context;
 250 #endif
 251
 252         union {
 253 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 254                 struct bio_integrity_payload *bi_integrity; /* data integrity */
 255 #endif
 256         };
 257
 258         unsigned short          bi_vcnt;        /* how many bio_vec's */
 259
 260         /*
 261          * Everything starting with bi_max_vecs will be preserved by bio_reset()
 262          */
 263
 264         unsigned short          bi_max_vecs;    /* max bvl_vecs we can hold */
 265
 266         atomic_t                __bi_cnt;       /* pin count */
 267
 268         struct bio_vec          *bi_io_vec;     /* the actual vec list */
 269
 270         struct bio_set          *bi_pool;
 271
 272         /*
 273          * We can inline a number of vecs at the end of the bio, to avoid
 274          * double allocations for a small number of bio_vecs. This member
 275          * MUST obviously be kept at the very end of the bio.
 276          */
 277         struct bio_vec          bi_inline_vecs[];
 278 };
 279
 280 #define BIO_RESET_BYTES         offsetof(struct bio, bi_max_vecs)
 281
 282 /*
 283  * bio flags
 284  */
 285 enum {
 286         BIO_NO_PAGE_REF,        /* don't put release vec pages */
 287         BIO_CLONED,             /* doesn't own data */
 288         BIO_BOUNCED,            /* bio is a bounce bio */
 289         BIO_WORKINGSET,         /* contains userspace workingset pages */
 290         BIO_QUIET,              /* Make BIO Quiet */
 291         BIO_CHAIN,              /* chained bio, ->bi_remaining in effect */
 292         BIO_REFFED,             /* bio has elevated ->bi_cnt */
 293         BIO_THROTTLED,          /* This bio has already been subjected to
 294                                  * throttling rules. Don't do it again. */
 295         BIO_TRACE_COMPLETION,   /* bio_endio() should trace the final completion
 296                                  * of this bio. */
 297         BIO_CGROUP_ACCT,        /* has been accounted to a cgroup */
 298         BIO_TRACKED,            /* set if bio goes through the rq_qos path */
 299         BIO_FLAG_LAST
 300 };
 301
 302 /* See BVEC_POOL_OFFSET below before adding new flags */
 303
 304 /*
 305  * We support 6 different bvec pools, the last one is magic in that it
 306  * is backed by a mempool.
 307  */
 308 #define BVEC_POOL_NR            6
 309 #define BVEC_POOL_MAX           (BVEC_POOL_NR - 1)
 310
 311 /*
 312  * Top 3 bits of bio flags indicate the pool the bvecs came from.  We add
 313  * 1 to the actual index so that 0 indicates that there are no bvecs to be
 314  * freed.
 315  */
 316 #define BVEC_POOL_BITS          (3)
 317 #define BVEC_POOL_OFFSET        (16 - BVEC_POOL_BITS)
 318 #define BVEC_POOL_IDX(bio)      ((bio)->bi_flags >> BVEC_POOL_OFFSET)
 319 #if (1<< BVEC_POOL_BITS) < (BVEC_POOL_NR+1)
 320 # error "BVEC_POOL_BITS is too small"
 321 #endif
 322
 323 /*
 324  * Flags starting here get preserved by bio_reset() - this includes
 325  * only BVEC_POOL_IDX()
 326  */
 327 #define BIO_RESET_BITS  BVEC_POOL_OFFSET
 328
 329 typedef __u32 __bitwise blk_mq_req_flags_t;
 330
 331 /*
 332  * Operations and flags common to the bio and request structures.
 333  * We use 8 bits for encoding the operation, and the remaining 24 for flags.
 334  *
 335  * The least significant bit of the operation number indicates the data
 336  * transfer direction:
 337  *
 338  *   - if the least significant bit is set transfers are TO the device
 339  *   - if the least significant bit is not set transfers are FROM the device
 340  *
 341  * If a operation does not transfer data the least significant bit has no
 342  * meaning.
 343  */
 344 #define REQ_OP_BITS     8
 345 #define REQ_OP_MASK     ((1 << REQ_OP_BITS) - 1)
 346 #define REQ_FLAG_BITS   24
 347
 348 enum req_opf {
 349         /* read sectors from the device */
 350         REQ_OP_READ             = 0,
 351         /* write sectors to the device */
 352         REQ_OP_WRITE            = 1,
 353         /* flush the volatile write cache */
 354         REQ_OP_FLUSH            = 2,
 355         /* discard sectors */
 356         REQ_OP_DISCARD          = 3,
 357         /* securely erase sectors */
 358         REQ_OP_SECURE_ERASE     = 5,
 359         /* write the same sector many times */
 360         REQ_OP_WRITE_SAME       = 7,
 361         /* write the zero filled sector many times */
 362         REQ_OP_WRITE_ZEROES     = 9,
 363         /* Open a zone */
 364         REQ_OP_ZONE_OPEN        = 10,
 365         /* Close a zone */
 366         REQ_OP_ZONE_CLOSE       = 11,
 367         /* Transition a zone to full */
 368         REQ_OP_ZONE_FINISH      = 12,
 369         /* write data at the current zone write pointer */
 370         REQ_OP_ZONE_APPEND      = 13,
 371         /* reset a zone write pointer */
 372         REQ_OP_ZONE_RESET       = 15,
 373         /* reset all the zone present on the device */
 374         REQ_OP_ZONE_RESET_ALL   = 17,
 375
 376         /* SCSI passthrough using struct scsi_request */
 377         REQ_OP_SCSI_IN          = 32,
 378         REQ_OP_SCSI_OUT         = 33,
 379         /* Driver private requests */
 380         REQ_OP_DRV_IN           = 34,
 381         REQ_OP_DRV_OUT          = 35,
 382
 383         REQ_OP_LAST,
 384 };
 385
 386 enum req_flag_bits {
 387         __REQ_FAILFAST_DEV =    /* no driver retries of device errors */
 388                 REQ_OP_BITS,
 389         __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */
 390         __REQ_FAILFAST_DRIVER,  /* no driver retries of driver errors */
 391         __REQ_SYNC,             /* request is sync (sync write or read) */
 392         __REQ_META,             /* metadata io request */
 393         __REQ_PRIO,             /* boost priority in cfq */
 394         __REQ_NOMERGE,          /* don't touch this for merging */
 395         __REQ_IDLE,             /* anticipate more IO after this one */
 396         __REQ_INTEGRITY,        /* I/O includes block integrity payload */
 397         __REQ_FUA,              /* forced unit access */
 398         __REQ_PREFLUSH,         /* request for cache flush */
 399         __REQ_RAHEAD,           /* read ahead, can fail anytime */
 400         __REQ_BACKGROUND,       /* background IO */
 401         __REQ_NOWAIT,           /* Don't wait if request will block */
 402         /*
 403          * When a shared kthread needs to issue a bio for a cgroup, doing
 404          * so synchronously can lead to priority inversions as the kthread
 405          * can be trapped waiting for that cgroup.  CGROUP_PUNT flag makes
 406          * submit_bio() punt the actual issuing to a dedicated per-blkcg
 407          * work item to avoid such priority inversions.
 408          */
 409         __REQ_CGROUP_PUNT,
 410
 411         /* command specific flags for REQ_OP_WRITE_ZEROES: */
 412         __REQ_NOUNMAP,          /* do not free blocks when zeroing */
 413
 414         __REQ_HIPRI,
 415
 416         /* for driver use */
 417         __REQ_DRV,
 418         __REQ_SWAP,             /* swapping request. */
 419         __REQ_NR_BITS,          /* stops here */
 420 };
 421
 422 #define REQ_FAILFAST_DEV        (1ULL << __REQ_FAILFAST_DEV)
 423 #define REQ_FAILFAST_TRANSPORT  (1ULL << __REQ_FAILFAST_TRANSPORT)
 424 #define REQ_FAILFAST_DRIVER     (1ULL << __REQ_FAILFAST_DRIVER)
 425 #define REQ_SYNC                (1ULL << __REQ_SYNC)
 426 #define REQ_META                (1ULL << __REQ_META)
 427 #define REQ_PRIO                (1ULL << __REQ_PRIO)
 428 #define REQ_NOMERGE             (1ULL << __REQ_NOMERGE)
 429 #define REQ_IDLE                (1ULL << __REQ_IDLE)
 430 #define REQ_INTEGRITY           (1ULL << __REQ_INTEGRITY)
 431 #define REQ_FUA                 (1ULL << __REQ_FUA)
 432 #define REQ_PREFLUSH            (1ULL << __REQ_PREFLUSH)
 433 #define REQ_RAHEAD              (1ULL << __REQ_RAHEAD)
 434 #define REQ_BACKGROUND          (1ULL << __REQ_BACKGROUND)
 435 #define REQ_NOWAIT              (1ULL << __REQ_NOWAIT)
 436 #define REQ_CGROUP_PUNT         (1ULL << __REQ_CGROUP_PUNT)
 437
 438 #define REQ_NOUNMAP             (1ULL << __REQ_NOUNMAP)
 439 #define REQ_HIPRI               (1ULL << __REQ_HIPRI)
 440
 441 #define REQ_DRV                 (1ULL << __REQ_DRV)
 442 #define REQ_SWAP                (1ULL << __REQ_SWAP)
 443
 444 #define REQ_FAILFAST_MASK \
 445         (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
 446
 447 #define REQ_NOMERGE_FLAGS \
 448         (REQ_NOMERGE | REQ_PREFLUSH | REQ_FUA)
 449
 450 enum stat_group {
 451         STAT_READ,
 452         STAT_WRITE,
 453         STAT_DISCARD,
 454         STAT_FLUSH,
 455
 456         NR_STAT_GROUPS
 457 };
 458
 459 #define bio_op(bio) \
 460         ((bio)->bi_opf & REQ_OP_MASK)
 461 #define req_op(req) \
 462         ((req)->cmd_flags & REQ_OP_MASK)
 463
 464 /* obsolete, don't use in new code */
 465 static inline void bio_set_op_attrs(struct bio *bio, unsigned op,
 466                 unsigned op_flags)
 467 {
 468         bio->bi_opf = op | op_flags;
 469 }
 470
 471 static inline bool op_is_write(unsigned int op)
 472 {
 473         return (op & 1);
 474 }
 475
 476 /*
 477  * Check if the bio or request is one that needs special treatment in the
 478  * flush state machine.
 479  */
 480 static inline bool op_is_flush(unsigned int op)
 481 {
 482         return op & (REQ_FUA | REQ_PREFLUSH);
 483 }
 484
 485 /*
 486  * Reads are always treated as synchronous, as are requests with the FUA or
 487  * PREFLUSH flag.  Other operations may be marked as synchronous using the
 488  * REQ_SYNC flag.
 489  */
 490 static inline bool op_is_sync(unsigned int op)
 491 {
 492         return (op & REQ_OP_MASK) == REQ_OP_READ ||
 493                 (op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH));
 494 }
 495
 496 static inline bool op_is_discard(unsigned int op)
 497 {
 498         return (op & REQ_OP_MASK) == REQ_OP_DISCARD;
 499 }
 500
 501 /*
 502  * Check if a bio or request operation is a zone management operation, with
 503  * the exception of REQ_OP_ZONE_RESET_ALL which is treated as a special case
 504  * due to its different handling in the block layer and device response in
 505  * case of command failure.
 506  */
 507 static inline bool op_is_zone_mgmt(enum req_opf op)
 508 {
 509         switch (op & REQ_OP_MASK) {
 510         case REQ_OP_ZONE_RESET:
 511         case REQ_OP_ZONE_OPEN:
 512         case REQ_OP_ZONE_CLOSE:
 513         case REQ_OP_ZONE_FINISH:
 514                 return true;
 515         default:
 516                 return false;
 517         }
 518 }
 519
 520 static inline int op_stat_group(unsigned int op)
 521 {
 522         if (op_is_discard(op))
 523                 return STAT_DISCARD;
 524         return op_is_write(op);
 525 }
 526
 527 typedef unsigned int blk_qc_t;
 528 #define BLK_QC_T_NONE           -1U
 529 #define BLK_QC_T_SHIFT          16
 530 #define BLK_QC_T_INTERNAL       (1U << 31)
 531
 532 static inline bool blk_qc_t_valid(blk_qc_t cookie)
 533 {
 534         return cookie != BLK_QC_T_NONE;
 535 }
 536
 537 static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie)
 538 {
 539         return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT;
 540 }
 541
 542 static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie)
 543 {
 544         return cookie & ((1u << BLK_QC_T_SHIFT) - 1);
 545 }
 546
 547 static inline bool blk_qc_t_is_internal(blk_qc_t cookie)
 548 {
 549         return (cookie & BLK_QC_T_INTERNAL) != 0;
 550 }
 551
 552 struct blk_rq_stat {
 553         u64 mean;
 554         u64 min;
 555         u64 max;
 556         u32 nr_samples;
 557         u64 batch;
 558 };
 559
 560 #endif /* __LINUX_BLK_TYPES_H */