2 * Zoned block device handling
4 * Copyright (c) 2015, Hannes Reinecke
5 * Copyright (c) 2015, SUSE Linux GmbH
7 * Copyright (c) 2016, Damien Le Moal
8 * Copyright (c) 2016, Western Digital
11 #include <linux/kernel.h>
12 #include <linux/module.h>
13 #include <linux/rbtree.h>
14 #include <linux/blkdev.h>
18 static inline sector_t blk_zone_start(struct request_queue *q,
21 sector_t zone_mask = blk_queue_zone_sectors(q) - 1;
23 return sector & ~zone_mask;
27 * Return true if a request is a write requests that needs zone write locking.
29 bool blk_req_needs_zone_write_lock(struct request *rq)
31 if (!rq->q->seq_zones_wlock)
34 if (blk_rq_is_passthrough(rq))
38 case REQ_OP_WRITE_ZEROES:
39 case REQ_OP_WRITE_SAME:
41 return blk_rq_zone_is_seq(rq);
46 EXPORT_SYMBOL_GPL(blk_req_needs_zone_write_lock);
48 void __blk_req_zone_write_lock(struct request *rq)
50 if (WARN_ON_ONCE(test_and_set_bit(blk_rq_zone_no(rq),
51 rq->q->seq_zones_wlock)))
54 WARN_ON_ONCE(rq->rq_flags & RQF_ZONE_WRITE_LOCKED);
55 rq->rq_flags |= RQF_ZONE_WRITE_LOCKED;
57 EXPORT_SYMBOL_GPL(__blk_req_zone_write_lock);
59 void __blk_req_zone_write_unlock(struct request *rq)
61 rq->rq_flags &= ~RQF_ZONE_WRITE_LOCKED;
62 if (rq->q->seq_zones_wlock)
63 WARN_ON_ONCE(!test_and_clear_bit(blk_rq_zone_no(rq),
64 rq->q->seq_zones_wlock));
66 EXPORT_SYMBOL_GPL(__blk_req_zone_write_unlock);
68 static inline unsigned int __blkdev_nr_zones(struct request_queue *q,
71 unsigned long zone_sectors = blk_queue_zone_sectors(q);
73 return (nr_sectors + zone_sectors - 1) >> ilog2(zone_sectors);
77 * blkdev_nr_zones - Get number of zones
78 * @bdev: Target block device
81 * Return the total number of zones of a zoned block device.
82 * For a regular block device, the number of zones is always 0.
84 unsigned int blkdev_nr_zones(struct block_device *bdev)
86 struct request_queue *q = bdev_get_queue(bdev);
88 if (!blk_queue_is_zoned(q))
91 return __blkdev_nr_zones(q, bdev->bd_part->nr_sects);
93 EXPORT_SYMBOL_GPL(blkdev_nr_zones);
96 * Check that a zone report belongs to the partition.
97 * If yes, fix its start sector and write pointer, copy it in the
98 * zone information array and return true. Return false otherwise.
100 static bool blkdev_report_zone(struct block_device *bdev,
101 struct blk_zone *rep,
102 struct blk_zone *zone)
104 sector_t offset = get_start_sect(bdev);
106 if (rep->start < offset)
109 rep->start -= offset;
110 if (rep->start + rep->len > bdev->bd_part->nr_sects)
113 if (rep->type == BLK_ZONE_TYPE_CONVENTIONAL)
114 rep->wp = rep->start + rep->len;
117 memcpy(zone, rep, sizeof(struct blk_zone));
123 * blkdev_report_zones - Get zones information
124 * @bdev: Target block device
125 * @sector: Sector from which to report zones
126 * @zones: Array of zone structures where to return the zones information
127 * @nr_zones: Number of zone structures in the zone array
128 * @gfp_mask: Memory allocation flags (for bio_alloc)
131 * Get zone information starting from the zone containing @sector.
132 * The number of zone information reported may be less than the number
133 * requested by @nr_zones. The number of zones actually reported is
134 * returned in @nr_zones.
136 int blkdev_report_zones(struct block_device *bdev,
138 struct blk_zone *zones,
139 unsigned int *nr_zones,
142 struct request_queue *q = bdev_get_queue(bdev);
143 struct blk_zone_report_hdr *hdr;
144 unsigned int nrz = *nr_zones;
148 unsigned int nr_pages;
151 unsigned int i, n, nz;
159 if (!blk_queue_is_zoned(q))
165 if (sector > bdev->bd_part->nr_sects) {
171 * The zone report has a header. So make room for it in the
172 * payload. Also make sure that the report fits in a single BIO
173 * that will not be split down the stack.
175 rep_bytes = sizeof(struct blk_zone_report_hdr) +
176 sizeof(struct blk_zone) * nrz;
177 rep_bytes = (rep_bytes + PAGE_SIZE - 1) & PAGE_MASK;
178 if (rep_bytes > (queue_max_sectors(q) << 9))
179 rep_bytes = queue_max_sectors(q) << 9;
181 nr_pages = min_t(unsigned int, BIO_MAX_PAGES,
182 rep_bytes >> PAGE_SHIFT);
183 nr_pages = min_t(unsigned int, nr_pages,
184 queue_max_segments(q));
186 bio = bio_alloc(gfp_mask, nr_pages);
190 bio_set_dev(bio, bdev);
191 bio->bi_iter.bi_sector = blk_zone_start(q, sector);
192 bio_set_op_attrs(bio, REQ_OP_ZONE_REPORT, 0);
194 for (i = 0; i < nr_pages; i++) {
195 page = alloc_page(gfp_mask);
200 if (!bio_add_page(bio, page, PAGE_SIZE, 0)) {
209 ret = submit_bio_wait(bio);
214 * Process the report result: skip the header and go through the
215 * reported zones to fixup and fixup the zone information for
216 * partitions. At the same time, return the zone information into
222 bio_for_each_segment_all(bv, bio, i) {
227 addr = kmap_atomic(bv->bv_page);
229 /* Get header in the first page */
233 nr_rep = hdr->nr_zones;
234 ofst = sizeof(struct blk_zone_report_hdr);
237 /* Fixup and report zones */
238 while (ofst < bv->bv_len &&
239 n < nr_rep && nz < nrz) {
240 if (blkdev_report_zone(bdev, addr + ofst, &zones[nz]))
242 ofst += sizeof(struct blk_zone);
248 if (n >= nr_rep || nz >= nrz)
255 bio_for_each_segment_all(bv, bio, i)
256 __free_page(bv->bv_page);
261 EXPORT_SYMBOL_GPL(blkdev_report_zones);
264 * blkdev_reset_zones - Reset zones write pointer
265 * @bdev: Target block device
266 * @sector: Start sector of the first zone to reset
267 * @nr_sectors: Number of sectors, at least the length of one zone
268 * @gfp_mask: Memory allocation flags (for bio_alloc)
271 * Reset the write pointer of the zones contained in the range
272 * @sector..@sector+@nr_sectors. Specifying the entire disk sector range
273 * is valid, but the specified range should not contain conventional zones.
275 int blkdev_reset_zones(struct block_device *bdev,
276 sector_t sector, sector_t nr_sectors,
279 struct request_queue *q = bdev_get_queue(bdev);
280 sector_t zone_sectors;
281 sector_t end_sector = sector + nr_sectors;
282 struct bio *bio = NULL;
283 struct blk_plug plug;
286 if (!blk_queue_is_zoned(q))
289 if (bdev_read_only(bdev))
292 if (!nr_sectors || end_sector > bdev->bd_part->nr_sects)
296 /* Check alignment (handle eventual smaller last zone) */
297 zone_sectors = blk_queue_zone_sectors(q);
298 if (sector & (zone_sectors - 1))
301 if ((nr_sectors & (zone_sectors - 1)) &&
302 end_sector != bdev->bd_part->nr_sects)
305 blk_start_plug(&plug);
306 while (sector < end_sector) {
308 bio = blk_next_bio(bio, 0, gfp_mask);
309 bio->bi_iter.bi_sector = sector;
310 bio_set_dev(bio, bdev);
311 bio_set_op_attrs(bio, REQ_OP_ZONE_RESET, 0);
313 sector += zone_sectors;
315 /* This may take a while, so be nice to others */
320 ret = submit_bio_wait(bio);
323 blk_finish_plug(&plug);
327 EXPORT_SYMBOL_GPL(blkdev_reset_zones);
330 * BLKREPORTZONE ioctl processing.
331 * Called from blkdev_ioctl.
333 int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
334 unsigned int cmd, unsigned long arg)
336 void __user *argp = (void __user *)arg;
337 struct request_queue *q;
338 struct blk_zone_report rep;
339 struct blk_zone *zones;
345 q = bdev_get_queue(bdev);
349 if (!blk_queue_is_zoned(q))
352 if (!capable(CAP_SYS_ADMIN))
355 if (copy_from_user(&rep, argp, sizeof(struct blk_zone_report)))
361 rep.nr_zones = min(blkdev_nr_zones(bdev), rep.nr_zones);
363 zones = kvmalloc_array(rep.nr_zones, sizeof(struct blk_zone),
364 GFP_KERNEL | __GFP_ZERO);
368 ret = blkdev_report_zones(bdev, rep.sector,
369 zones, &rep.nr_zones,
374 if (copy_to_user(argp, &rep, sizeof(struct blk_zone_report))) {
380 if (copy_to_user(argp + sizeof(struct blk_zone_report), zones,
381 sizeof(struct blk_zone) * rep.nr_zones))
392 * BLKRESETZONE ioctl processing.
393 * Called from blkdev_ioctl.
395 int blkdev_reset_zones_ioctl(struct block_device *bdev, fmode_t mode,
396 unsigned int cmd, unsigned long arg)
398 void __user *argp = (void __user *)arg;
399 struct request_queue *q;
400 struct blk_zone_range zrange;
405 q = bdev_get_queue(bdev);
409 if (!blk_queue_is_zoned(q))
412 if (!capable(CAP_SYS_ADMIN))
415 if (!(mode & FMODE_WRITE))
418 if (copy_from_user(&zrange, argp, sizeof(struct blk_zone_range)))
421 return blkdev_reset_zones(bdev, zrange.sector, zrange.nr_sectors,