include/linux/blk_types.h

   1 /* SPDX-License-Identifier: GPL-2.0 */
   2 /*
   3  * Block data types and constants.  Directly include this file only to
   4  * break include dependency loop.
   5  */
   6 #ifndef __LINUX_BLK_TYPES_H
   7 #define __LINUX_BLK_TYPES_H
   8
   9 #include <linux/types.h>
  10 #include <linux/bvec.h>
  11 #include <linux/ktime.h>
  12
  13 struct bio_set;
  14 struct bio;
  15 struct bio_integrity_payload;
  16 struct page;
  17 struct io_context;
  18 struct cgroup_subsys_state;
  19 typedef void (bio_end_io_t) (struct bio *);
  20 struct bio_crypt_ctx;
  21
  22 struct block_device {
  23         sector_t                bd_start_sect;
  24         struct disk_stats __percpu *bd_stats;
  25         unsigned long           bd_stamp;
  26         bool                    bd_read_only;   /* read-only policy */
  27         dev_t                   bd_dev;
  28         int                     bd_openers;
  29         struct inode *          bd_inode;       /* will die */
  30         struct super_block *    bd_super;
  31         struct mutex            bd_mutex;       /* open/close mutex */
  32         void *                  bd_claiming;
  33         void *                  bd_holder;
  34         int                     bd_holders;
  35         bool                    bd_write_holder;
  36 #ifdef CONFIG_SYSFS
  37         struct list_head        bd_holder_disks;
  38 #endif
  39         struct kobject          *bd_holder_dir;
  40         u8                      bd_partno;
  41         struct hd_struct *      bd_part;
  42         /* number of times partitions within this device have been opened. */
  43         unsigned                bd_part_count;
  44
  45         spinlock_t              bd_size_lock; /* for bd_inode->i_size updates */
  46         struct gendisk *        bd_disk;
  47         struct backing_dev_info *bd_bdi;
  48
  49         /* The counter of freeze processes */
  50         int                     bd_fsfreeze_count;
  51         /* Mutex for freeze */
  52         struct mutex            bd_fsfreeze_mutex;
  53         struct super_block      *bd_fsfreeze_sb;
  54
  55         struct partition_meta_info *bd_meta_info;
  56 #ifdef CONFIG_FAIL_MAKE_REQUEST
  57         bool                    bd_make_it_fail;
  58 #endif
  59 } __randomize_layout;
  60
  61 #define bdev_whole(_bdev) \
  62         ((_bdev)->bd_disk->part0)
  63
  64 #define bdev_kobj(_bdev) \
  65         (&part_to_dev((_bdev)->bd_part)->kobj)
  66
  67 /*
  68  * Block error status values.  See block/blk-core:blk_errors for the details.
  69  * Alpha cannot write a byte atomically, so we need to use 32-bit value.
  70  */
  71 #if defined(CONFIG_ALPHA) && !defined(__alpha_bwx__)
  72 typedef u32 __bitwise blk_status_t;
  73 #else
  74 typedef u8 __bitwise blk_status_t;
  75 #endif
  76 #define BLK_STS_OK 0
  77 #define BLK_STS_NOTSUPP         ((__force blk_status_t)1)
  78 #define BLK_STS_TIMEOUT         ((__force blk_status_t)2)
  79 #define BLK_STS_NOSPC           ((__force blk_status_t)3)
  80 #define BLK_STS_TRANSPORT       ((__force blk_status_t)4)
  81 #define BLK_STS_TARGET          ((__force blk_status_t)5)
  82 #define BLK_STS_NEXUS           ((__force blk_status_t)6)
  83 #define BLK_STS_MEDIUM          ((__force blk_status_t)7)
  84 #define BLK_STS_PROTECTION      ((__force blk_status_t)8)
  85 #define BLK_STS_RESOURCE        ((__force blk_status_t)9)
  86 #define BLK_STS_IOERR           ((__force blk_status_t)10)
  87
  88 /* hack for device mapper, don't use elsewhere: */
  89 #define BLK_STS_DM_REQUEUE    ((__force blk_status_t)11)
  90
  91 #define BLK_STS_AGAIN           ((__force blk_status_t)12)
  92
  93 /*
  94  * BLK_STS_DEV_RESOURCE is returned from the driver to the block layer if
  95  * device related resources are unavailable, but the driver can guarantee
  96  * that the queue will be rerun in the future once resources become
  97  * available again. This is typically the case for device specific
  98  * resources that are consumed for IO. If the driver fails allocating these
  99  * resources, we know that inflight (or pending) IO will free these
 100  * resource upon completion.
 101  *
 102  * This is different from BLK_STS_RESOURCE in that it explicitly references
 103  * a device specific resource. For resources of wider scope, allocation
 104  * failure can happen without having pending IO. This means that we can't
 105  * rely on request completions freeing these resources, as IO may not be in
 106  * flight. Examples of that are kernel memory allocations, DMA mappings, or
 107  * any other system wide resources.
 108  */
 109 #define BLK_STS_DEV_RESOURCE    ((__force blk_status_t)13)
 110
 111 /*
 112  * BLK_STS_ZONE_RESOURCE is returned from the driver to the block layer if zone
 113  * related resources are unavailable, but the driver can guarantee the queue
 114  * will be rerun in the future once the resources become available again.
 115  *
 116  * This is different from BLK_STS_DEV_RESOURCE in that it explicitly references
 117  * a zone specific resource and IO to a different zone on the same device could
 118  * still be served. Examples of that are zones that are write-locked, but a read
 119  * to the same zone could be served.
 120  */
 121 #define BLK_STS_ZONE_RESOURCE   ((__force blk_status_t)14)
 122
 123 /*
 124  * BLK_STS_ZONE_OPEN_RESOURCE is returned from the driver in the completion
 125  * path if the device returns a status indicating that too many zone resources
 126  * are currently open. The same command should be successful if resubmitted
 127  * after the number of open zones decreases below the device's limits, which is
 128  * reported in the request_queue's max_open_zones.
 129  */
 130 #define BLK_STS_ZONE_OPEN_RESOURCE      ((__force blk_status_t)15)
 131
 132 /*
 133  * BLK_STS_ZONE_ACTIVE_RESOURCE is returned from the driver in the completion
 134  * path if the device returns a status indicating that too many zone resources
 135  * are currently active. The same command should be successful if resubmitted
 136  * after the number of active zones decreases below the device's limits, which
 137  * is reported in the request_queue's max_active_zones.
 138  */
 139 #define BLK_STS_ZONE_ACTIVE_RESOURCE    ((__force blk_status_t)16)
 140
 141 /**
 142  * blk_path_error - returns true if error may be path related
 143  * @error: status the request was completed with
 144  *
 145  * Description:
 146  *     This classifies block error status into non-retryable errors and ones
 147  *     that may be successful if retried on a failover path.
 148  *
 149  * Return:
 150  *     %false - retrying failover path will not help
 151  *     %true  - may succeed if retried
 152  */
 153 static inline bool blk_path_error(blk_status_t error)
 154 {
 155         switch (error) {
 156         case BLK_STS_NOTSUPP:
 157         case BLK_STS_NOSPC:
 158         case BLK_STS_TARGET:
 159         case BLK_STS_NEXUS:
 160         case BLK_STS_MEDIUM:
 161         case BLK_STS_PROTECTION:
 162                 return false;
 163         }
 164
 165         /* Anything else could be a path failure, so should be retried */
 166         return true;
 167 }
 168
 169 /*
 170  * From most significant bit:
 171  * 1 bit: reserved for other usage, see below
 172  * 12 bits: original size of bio
 173  * 51 bits: issue time of bio
 174  */
 175 #define BIO_ISSUE_RES_BITS      1
 176 #define BIO_ISSUE_SIZE_BITS     12
 177 #define BIO_ISSUE_RES_SHIFT     (64 - BIO_ISSUE_RES_BITS)
 178 #define BIO_ISSUE_SIZE_SHIFT    (BIO_ISSUE_RES_SHIFT - BIO_ISSUE_SIZE_BITS)
 179 #define BIO_ISSUE_TIME_MASK     ((1ULL << BIO_ISSUE_SIZE_SHIFT) - 1)
 180 #define BIO_ISSUE_SIZE_MASK     \
 181         (((1ULL << BIO_ISSUE_SIZE_BITS) - 1) << BIO_ISSUE_SIZE_SHIFT)
 182 #define BIO_ISSUE_RES_MASK      (~((1ULL << BIO_ISSUE_RES_SHIFT) - 1))
 183
 184 /* Reserved bit for blk-throtl */
 185 #define BIO_ISSUE_THROTL_SKIP_LATENCY (1ULL << 63)
 186
 187 struct bio_issue {
 188         u64 value;
 189 };
 190
 191 static inline u64 __bio_issue_time(u64 time)
 192 {
 193         return time & BIO_ISSUE_TIME_MASK;
 194 }
 195
 196 static inline u64 bio_issue_time(struct bio_issue *issue)
 197 {
 198         return __bio_issue_time(issue->value);
 199 }
 200
 201 static inline sector_t bio_issue_size(struct bio_issue *issue)
 202 {
 203         return ((issue->value & BIO_ISSUE_SIZE_MASK) >> BIO_ISSUE_SIZE_SHIFT);
 204 }
 205
 206 static inline void bio_issue_init(struct bio_issue *issue,
 207                                        sector_t size)
 208 {
 209         size &= (1ULL << BIO_ISSUE_SIZE_BITS) - 1;
 210         issue->value = ((issue->value & BIO_ISSUE_RES_MASK) |
 211                         (ktime_get_ns() & BIO_ISSUE_TIME_MASK) |
 212                         ((u64)size << BIO_ISSUE_SIZE_SHIFT));
 213 }
 214
 215 /*
 216  * main unit of I/O for the block layer and lower layers (ie drivers and
 217  * stacking drivers)
 218  */
 219 struct bio {
 220         struct bio              *bi_next;       /* request queue link */
 221         struct gendisk          *bi_disk;
 222         unsigned int            bi_opf;         /* bottom bits req flags,
 223                                                  * top bits REQ_OP. Use
 224                                                  * accessors.
 225                                                  */
 226         unsigned short          bi_flags;       /* status, etc and bvec pool number */
 227         unsigned short          bi_ioprio;
 228         unsigned short          bi_write_hint;
 229         blk_status_t            bi_status;
 230         u8                      bi_partno;
 231         atomic_t                __bi_remaining;
 232
 233         struct bvec_iter        bi_iter;
 234
 235         bio_end_io_t            *bi_end_io;
 236
 237         void                    *bi_private;
 238 #ifdef CONFIG_BLK_CGROUP
 239         /*
 240          * Represents the association of the css and request_queue for the bio.
 241          * If a bio goes direct to device, it will not have a blkg as it will
 242          * not have a request_queue associated with it.  The reference is put
 243          * on release of the bio.
 244          */
 245         struct blkcg_gq         *bi_blkg;
 246         struct bio_issue        bi_issue;
 247 #ifdef CONFIG_BLK_CGROUP_IOCOST
 248         u64                     bi_iocost_cost;
 249 #endif
 250 #endif
 251
 252 #ifdef CONFIG_BLK_INLINE_ENCRYPTION
 253         struct bio_crypt_ctx    *bi_crypt_context;
 254 #endif
 255
 256         union {
 257 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 258                 struct bio_integrity_payload *bi_integrity; /* data integrity */
 259 #endif
 260         };
 261
 262         unsigned short          bi_vcnt;        /* how many bio_vec's */
 263
 264         /*
 265          * Everything starting with bi_max_vecs will be preserved by bio_reset()
 266          */
 267
 268         unsigned short          bi_max_vecs;    /* max bvl_vecs we can hold */
 269
 270         atomic_t                __bi_cnt;       /* pin count */
 271
 272         struct bio_vec          *bi_io_vec;     /* the actual vec list */
 273
 274         struct bio_set          *bi_pool;
 275
 276         /*
 277          * We can inline a number of vecs at the end of the bio, to avoid
 278          * double allocations for a small number of bio_vecs. This member
 279          * MUST obviously be kept at the very end of the bio.
 280          */
 281         struct bio_vec          bi_inline_vecs[];
 282 };
 283
 284 #define BIO_RESET_BYTES         offsetof(struct bio, bi_max_vecs)
 285
 286 /*
 287  * bio flags
 288  */
 289 enum {
 290         BIO_NO_PAGE_REF,        /* don't put release vec pages */
 291         BIO_CLONED,             /* doesn't own data */
 292         BIO_BOUNCED,            /* bio is a bounce bio */
 293         BIO_WORKINGSET,         /* contains userspace workingset pages */
 294         BIO_QUIET,              /* Make BIO Quiet */
 295         BIO_CHAIN,              /* chained bio, ->bi_remaining in effect */
 296         BIO_REFFED,             /* bio has elevated ->bi_cnt */
 297         BIO_THROTTLED,          /* This bio has already been subjected to
 298                                  * throttling rules. Don't do it again. */
 299         BIO_TRACE_COMPLETION,   /* bio_endio() should trace the final completion
 300                                  * of this bio. */
 301         BIO_CGROUP_ACCT,        /* has been accounted to a cgroup */
 302         BIO_TRACKED,            /* set if bio goes through the rq_qos path */
 303         BIO_FLAG_LAST
 304 };
 305
 306 /* See BVEC_POOL_OFFSET below before adding new flags */
 307
 308 /*
 309  * We support 6 different bvec pools, the last one is magic in that it
 310  * is backed by a mempool.
 311  */
 312 #define BVEC_POOL_NR            6
 313 #define BVEC_POOL_MAX           (BVEC_POOL_NR - 1)
 314
 315 /*
 316  * Top 3 bits of bio flags indicate the pool the bvecs came from.  We add
 317  * 1 to the actual index so that 0 indicates that there are no bvecs to be
 318  * freed.
 319  */
 320 #define BVEC_POOL_BITS          (3)
 321 #define BVEC_POOL_OFFSET        (16 - BVEC_POOL_BITS)
 322 #define BVEC_POOL_IDX(bio)      ((bio)->bi_flags >> BVEC_POOL_OFFSET)
 323 #if (1<< BVEC_POOL_BITS) < (BVEC_POOL_NR+1)
 324 # error "BVEC_POOL_BITS is too small"
 325 #endif
 326
 327 /*
 328  * Flags starting here get preserved by bio_reset() - this includes
 329  * only BVEC_POOL_IDX()
 330  */
 331 #define BIO_RESET_BITS  BVEC_POOL_OFFSET
 332
 333 typedef __u32 __bitwise blk_mq_req_flags_t;
 334
 335 /*
 336  * Operations and flags common to the bio and request structures.
 337  * We use 8 bits for encoding the operation, and the remaining 24 for flags.
 338  *
 339  * The least significant bit of the operation number indicates the data
 340  * transfer direction:
 341  *
 342  *   - if the least significant bit is set transfers are TO the device
 343  *   - if the least significant bit is not set transfers are FROM the device
 344  *
 345  * If a operation does not transfer data the least significant bit has no
 346  * meaning.
 347  */
 348 #define REQ_OP_BITS     8
 349 #define REQ_OP_MASK     ((1 << REQ_OP_BITS) - 1)
 350 #define REQ_FLAG_BITS   24
 351
 352 enum req_opf {
 353         /* read sectors from the device */
 354         REQ_OP_READ             = 0,
 355         /* write sectors to the device */
 356         REQ_OP_WRITE            = 1,
 357         /* flush the volatile write cache */
 358         REQ_OP_FLUSH            = 2,
 359         /* discard sectors */
 360         REQ_OP_DISCARD          = 3,
 361         /* securely erase sectors */
 362         REQ_OP_SECURE_ERASE     = 5,
 363         /* write the same sector many times */
 364         REQ_OP_WRITE_SAME       = 7,
 365         /* write the zero filled sector many times */
 366         REQ_OP_WRITE_ZEROES     = 9,
 367         /* Open a zone */
 368         REQ_OP_ZONE_OPEN        = 10,
 369         /* Close a zone */
 370         REQ_OP_ZONE_CLOSE       = 11,
 371         /* Transition a zone to full */
 372         REQ_OP_ZONE_FINISH      = 12,
 373         /* write data at the current zone write pointer */
 374         REQ_OP_ZONE_APPEND      = 13,
 375         /* reset a zone write pointer */
 376         REQ_OP_ZONE_RESET       = 15,
 377         /* reset all the zone present on the device */
 378         REQ_OP_ZONE_RESET_ALL   = 17,
 379
 380         /* SCSI passthrough using struct scsi_request */
 381         REQ_OP_SCSI_IN          = 32,
 382         REQ_OP_SCSI_OUT         = 33,
 383         /* Driver private requests */
 384         REQ_OP_DRV_IN           = 34,
 385         REQ_OP_DRV_OUT          = 35,
 386
 387         REQ_OP_LAST,
 388 };
 389
 390 enum req_flag_bits {
 391         __REQ_FAILFAST_DEV =    /* no driver retries of device errors */
 392                 REQ_OP_BITS,
 393         __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */
 394         __REQ_FAILFAST_DRIVER,  /* no driver retries of driver errors */
 395         __REQ_SYNC,             /* request is sync (sync write or read) */
 396         __REQ_META,             /* metadata io request */
 397         __REQ_PRIO,             /* boost priority in cfq */
 398         __REQ_NOMERGE,          /* don't touch this for merging */
 399         __REQ_IDLE,             /* anticipate more IO after this one */
 400         __REQ_INTEGRITY,        /* I/O includes block integrity payload */
 401         __REQ_FUA,              /* forced unit access */
 402         __REQ_PREFLUSH,         /* request for cache flush */
 403         __REQ_RAHEAD,           /* read ahead, can fail anytime */
 404         __REQ_BACKGROUND,       /* background IO */
 405         __REQ_NOWAIT,           /* Don't wait if request will block */
 406         /*
 407          * When a shared kthread needs to issue a bio for a cgroup, doing
 408          * so synchronously can lead to priority inversions as the kthread
 409          * can be trapped waiting for that cgroup.  CGROUP_PUNT flag makes
 410          * submit_bio() punt the actual issuing to a dedicated per-blkcg
 411          * work item to avoid such priority inversions.
 412          */
 413         __REQ_CGROUP_PUNT,
 414
 415         /* command specific flags for REQ_OP_WRITE_ZEROES: */
 416         __REQ_NOUNMAP,          /* do not free blocks when zeroing */
 417
 418         __REQ_HIPRI,
 419
 420         /* for driver use */
 421         __REQ_DRV,
 422         __REQ_SWAP,             /* swapping request. */
 423         __REQ_NR_BITS,          /* stops here */
 424 };
 425
 426 #define REQ_FAILFAST_DEV        (1ULL << __REQ_FAILFAST_DEV)
 427 #define REQ_FAILFAST_TRANSPORT  (1ULL << __REQ_FAILFAST_TRANSPORT)
 428 #define REQ_FAILFAST_DRIVER     (1ULL << __REQ_FAILFAST_DRIVER)
 429 #define REQ_SYNC                (1ULL << __REQ_SYNC)
 430 #define REQ_META                (1ULL << __REQ_META)
 431 #define REQ_PRIO                (1ULL << __REQ_PRIO)
 432 #define REQ_NOMERGE             (1ULL << __REQ_NOMERGE)
 433 #define REQ_IDLE                (1ULL << __REQ_IDLE)
 434 #define REQ_INTEGRITY           (1ULL << __REQ_INTEGRITY)
 435 #define REQ_FUA                 (1ULL << __REQ_FUA)
 436 #define REQ_PREFLUSH            (1ULL << __REQ_PREFLUSH)
 437 #define REQ_RAHEAD              (1ULL << __REQ_RAHEAD)
 438 #define REQ_BACKGROUND          (1ULL << __REQ_BACKGROUND)
 439 #define REQ_NOWAIT              (1ULL << __REQ_NOWAIT)
 440 #define REQ_CGROUP_PUNT         (1ULL << __REQ_CGROUP_PUNT)
 441
 442 #define REQ_NOUNMAP             (1ULL << __REQ_NOUNMAP)
 443 #define REQ_HIPRI               (1ULL << __REQ_HIPRI)
 444
 445 #define REQ_DRV                 (1ULL << __REQ_DRV)
 446 #define REQ_SWAP                (1ULL << __REQ_SWAP)
 447
 448 #define REQ_FAILFAST_MASK \
 449         (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
 450
 451 #define REQ_NOMERGE_FLAGS \
 452         (REQ_NOMERGE | REQ_PREFLUSH | REQ_FUA)
 453
 454 enum stat_group {
 455         STAT_READ,
 456         STAT_WRITE,
 457         STAT_DISCARD,
 458         STAT_FLUSH,
 459
 460         NR_STAT_GROUPS
 461 };
 462
 463 #define bio_op(bio) \
 464         ((bio)->bi_opf & REQ_OP_MASK)
 465 #define req_op(req) \
 466         ((req)->cmd_flags & REQ_OP_MASK)
 467
 468 /* obsolete, don't use in new code */
 469 static inline void bio_set_op_attrs(struct bio *bio, unsigned op,
 470                 unsigned op_flags)
 471 {
 472         bio->bi_opf = op | op_flags;
 473 }
 474
 475 static inline bool op_is_write(unsigned int op)
 476 {
 477         return (op & 1);
 478 }
 479
 480 /*
 481  * Check if the bio or request is one that needs special treatment in the
 482  * flush state machine.
 483  */
 484 static inline bool op_is_flush(unsigned int op)
 485 {
 486         return op & (REQ_FUA | REQ_PREFLUSH);
 487 }
 488
 489 /*
 490  * Reads are always treated as synchronous, as are requests with the FUA or
 491  * PREFLUSH flag.  Other operations may be marked as synchronous using the
 492  * REQ_SYNC flag.
 493  */
 494 static inline bool op_is_sync(unsigned int op)
 495 {
 496         return (op & REQ_OP_MASK) == REQ_OP_READ ||
 497                 (op & (REQ_SYNC | REQ_FUA | REQ_PREFLUSH));
 498 }
 499
 500 static inline bool op_is_discard(unsigned int op)
 501 {
 502         return (op & REQ_OP_MASK) == REQ_OP_DISCARD;
 503 }
 504
 505 /*
 506  * Check if a bio or request operation is a zone management operation, with
 507  * the exception of REQ_OP_ZONE_RESET_ALL which is treated as a special case
 508  * due to its different handling in the block layer and device response in
 509  * case of command failure.
 510  */
 511 static inline bool op_is_zone_mgmt(enum req_opf op)
 512 {
 513         switch (op & REQ_OP_MASK) {
 514         case REQ_OP_ZONE_RESET:
 515         case REQ_OP_ZONE_OPEN:
 516         case REQ_OP_ZONE_CLOSE:
 517         case REQ_OP_ZONE_FINISH:
 518                 return true;
 519         default:
 520                 return false;
 521         }
 522 }
 523
 524 static inline int op_stat_group(unsigned int op)
 525 {
 526         if (op_is_discard(op))
 527                 return STAT_DISCARD;
 528         return op_is_write(op);
 529 }
 530
 531 typedef unsigned int blk_qc_t;
 532 #define BLK_QC_T_NONE           -1U
 533 #define BLK_QC_T_SHIFT          16
 534 #define BLK_QC_T_INTERNAL       (1U << 31)
 535
 536 static inline bool blk_qc_t_valid(blk_qc_t cookie)
 537 {
 538         return cookie != BLK_QC_T_NONE;
 539 }
 540
 541 static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie)
 542 {
 543         return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT;
 544 }
 545
 546 static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie)
 547 {
 548         return cookie & ((1u << BLK_QC_T_SHIFT) - 1);
 549 }
 550
 551 static inline bool blk_qc_t_is_internal(blk_qc_t cookie)
 552 {
 553         return (cookie & BLK_QC_T_INTERNAL) != 0;
 554 }
 555
 556 struct blk_rq_stat {
 557         u64 mean;
 558         u64 min;
 559         u64 max;
 560         u32 nr_samples;
 561         u64 batch;
 562 };
 563
 564 #endif /* __LINUX_BLK_TYPES_H */