tools headers UAPI: Sync linux/prctl.h with the kernel sources
[linux-2.6-microblaze.git] / block / blk-lib.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Functions related to generic helpers functions
4  */
5 #include <linux/kernel.h>
6 #include <linux/module.h>
7 #include <linux/bio.h>
8 #include <linux/blkdev.h>
9 #include <linux/scatterlist.h>
10
11 #include "blk.h"
12
13 struct bio *blk_next_bio(struct bio *bio, unsigned int nr_pages, gfp_t gfp)
14 {
15         struct bio *new = bio_alloc(gfp, nr_pages);
16
17         if (bio) {
18                 bio_chain(bio, new);
19                 submit_bio(bio);
20         }
21
22         return new;
23 }
24
25 int __blkdev_issue_discard(struct block_device *bdev, sector_t sector,
26                 sector_t nr_sects, gfp_t gfp_mask, int flags,
27                 struct bio **biop)
28 {
29         struct request_queue *q = bdev_get_queue(bdev);
30         struct bio *bio = *biop;
31         unsigned int op;
32         sector_t bs_mask, part_offset = 0;
33
34         if (!q)
35                 return -ENXIO;
36
37         if (bdev_read_only(bdev))
38                 return -EPERM;
39
40         if (flags & BLKDEV_DISCARD_SECURE) {
41                 if (!blk_queue_secure_erase(q))
42                         return -EOPNOTSUPP;
43                 op = REQ_OP_SECURE_ERASE;
44         } else {
45                 if (!blk_queue_discard(q))
46                         return -EOPNOTSUPP;
47                 op = REQ_OP_DISCARD;
48         }
49
50         /* In case the discard granularity isn't set by buggy device driver */
51         if (WARN_ON_ONCE(!q->limits.discard_granularity)) {
52                 char dev_name[BDEVNAME_SIZE];
53
54                 bdevname(bdev, dev_name);
55                 pr_err_ratelimited("%s: Error: discard_granularity is 0.\n", dev_name);
56                 return -EOPNOTSUPP;
57         }
58
59         bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
60         if ((sector | nr_sects) & bs_mask)
61                 return -EINVAL;
62
63         if (!nr_sects)
64                 return -EINVAL;
65
66         /* In case the discard request is in a partition */
67         if (bdev_is_partition(bdev))
68                 part_offset = bdev->bd_start_sect;
69
70         while (nr_sects) {
71                 sector_t granularity_aligned_lba, req_sects;
72                 sector_t sector_mapped = sector + part_offset;
73
74                 granularity_aligned_lba = round_up(sector_mapped,
75                                 q->limits.discard_granularity >> SECTOR_SHIFT);
76
77                 /*
78                  * Check whether the discard bio starts at a discard_granularity
79                  * aligned LBA,
80                  * - If no: set (granularity_aligned_lba - sector_mapped) to
81                  *   bi_size of the first split bio, then the second bio will
82                  *   start at a discard_granularity aligned LBA on the device.
83                  * - If yes: use bio_aligned_discard_max_sectors() as the max
84                  *   possible bi_size of the first split bio. Then when this bio
85                  *   is split in device drive, the split ones are very probably
86                  *   to be aligned to discard_granularity of the device's queue.
87                  */
88                 if (granularity_aligned_lba == sector_mapped)
89                         req_sects = min_t(sector_t, nr_sects,
90                                           bio_aligned_discard_max_sectors(q));
91                 else
92                         req_sects = min_t(sector_t, nr_sects,
93                                           granularity_aligned_lba - sector_mapped);
94
95                 WARN_ON_ONCE((req_sects << 9) > UINT_MAX);
96
97                 bio = blk_next_bio(bio, 0, gfp_mask);
98                 bio->bi_iter.bi_sector = sector;
99                 bio_set_dev(bio, bdev);
100                 bio_set_op_attrs(bio, op, 0);
101
102                 bio->bi_iter.bi_size = req_sects << 9;
103                 sector += req_sects;
104                 nr_sects -= req_sects;
105
106                 /*
107                  * We can loop for a long time in here, if someone does
108                  * full device discards (like mkfs). Be nice and allow
109                  * us to schedule out to avoid softlocking if preempt
110                  * is disabled.
111                  */
112                 cond_resched();
113         }
114
115         *biop = bio;
116         return 0;
117 }
118 EXPORT_SYMBOL(__blkdev_issue_discard);
119
120 /**
121  * blkdev_issue_discard - queue a discard
122  * @bdev:       blockdev to issue discard for
123  * @sector:     start sector
124  * @nr_sects:   number of sectors to discard
125  * @gfp_mask:   memory allocation flags (for bio_alloc)
126  * @flags:      BLKDEV_DISCARD_* flags to control behaviour
127  *
128  * Description:
129  *    Issue a discard request for the sectors in question.
130  */
131 int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
132                 sector_t nr_sects, gfp_t gfp_mask, unsigned long flags)
133 {
134         struct bio *bio = NULL;
135         struct blk_plug plug;
136         int ret;
137
138         blk_start_plug(&plug);
139         ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask, flags,
140                         &bio);
141         if (!ret && bio) {
142                 ret = submit_bio_wait(bio);
143                 if (ret == -EOPNOTSUPP)
144                         ret = 0;
145                 bio_put(bio);
146         }
147         blk_finish_plug(&plug);
148
149         return ret;
150 }
151 EXPORT_SYMBOL(blkdev_issue_discard);
152
153 /**
154  * __blkdev_issue_write_same - generate number of bios with same page
155  * @bdev:       target blockdev
156  * @sector:     start sector
157  * @nr_sects:   number of sectors to write
158  * @gfp_mask:   memory allocation flags (for bio_alloc)
159  * @page:       page containing data to write
160  * @biop:       pointer to anchor bio
161  *
162  * Description:
163  *  Generate and issue number of bios(REQ_OP_WRITE_SAME) with same page.
164  */
165 static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
166                 sector_t nr_sects, gfp_t gfp_mask, struct page *page,
167                 struct bio **biop)
168 {
169         struct request_queue *q = bdev_get_queue(bdev);
170         unsigned int max_write_same_sectors;
171         struct bio *bio = *biop;
172         sector_t bs_mask;
173
174         if (!q)
175                 return -ENXIO;
176
177         if (bdev_read_only(bdev))
178                 return -EPERM;
179
180         bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
181         if ((sector | nr_sects) & bs_mask)
182                 return -EINVAL;
183
184         if (!bdev_write_same(bdev))
185                 return -EOPNOTSUPP;
186
187         /* Ensure that max_write_same_sectors doesn't overflow bi_size */
188         max_write_same_sectors = bio_allowed_max_sectors(q);
189
190         while (nr_sects) {
191                 bio = blk_next_bio(bio, 1, gfp_mask);
192                 bio->bi_iter.bi_sector = sector;
193                 bio_set_dev(bio, bdev);
194                 bio->bi_vcnt = 1;
195                 bio->bi_io_vec->bv_page = page;
196                 bio->bi_io_vec->bv_offset = 0;
197                 bio->bi_io_vec->bv_len = bdev_logical_block_size(bdev);
198                 bio_set_op_attrs(bio, REQ_OP_WRITE_SAME, 0);
199
200                 if (nr_sects > max_write_same_sectors) {
201                         bio->bi_iter.bi_size = max_write_same_sectors << 9;
202                         nr_sects -= max_write_same_sectors;
203                         sector += max_write_same_sectors;
204                 } else {
205                         bio->bi_iter.bi_size = nr_sects << 9;
206                         nr_sects = 0;
207                 }
208                 cond_resched();
209         }
210
211         *biop = bio;
212         return 0;
213 }
214
215 /**
216  * blkdev_issue_write_same - queue a write same operation
217  * @bdev:       target blockdev
218  * @sector:     start sector
219  * @nr_sects:   number of sectors to write
220  * @gfp_mask:   memory allocation flags (for bio_alloc)
221  * @page:       page containing data
222  *
223  * Description:
224  *    Issue a write same request for the sectors in question.
225  */
226 int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
227                                 sector_t nr_sects, gfp_t gfp_mask,
228                                 struct page *page)
229 {
230         struct bio *bio = NULL;
231         struct blk_plug plug;
232         int ret;
233
234         blk_start_plug(&plug);
235         ret = __blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, page,
236                         &bio);
237         if (ret == 0 && bio) {
238                 ret = submit_bio_wait(bio);
239                 bio_put(bio);
240         }
241         blk_finish_plug(&plug);
242         return ret;
243 }
244 EXPORT_SYMBOL(blkdev_issue_write_same);
245
246 static int __blkdev_issue_write_zeroes(struct block_device *bdev,
247                 sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
248                 struct bio **biop, unsigned flags)
249 {
250         struct bio *bio = *biop;
251         unsigned int max_write_zeroes_sectors;
252         struct request_queue *q = bdev_get_queue(bdev);
253
254         if (!q)
255                 return -ENXIO;
256
257         if (bdev_read_only(bdev))
258                 return -EPERM;
259
260         /* Ensure that max_write_zeroes_sectors doesn't overflow bi_size */
261         max_write_zeroes_sectors = bdev_write_zeroes_sectors(bdev);
262
263         if (max_write_zeroes_sectors == 0)
264                 return -EOPNOTSUPP;
265
266         while (nr_sects) {
267                 bio = blk_next_bio(bio, 0, gfp_mask);
268                 bio->bi_iter.bi_sector = sector;
269                 bio_set_dev(bio, bdev);
270                 bio->bi_opf = REQ_OP_WRITE_ZEROES;
271                 if (flags & BLKDEV_ZERO_NOUNMAP)
272                         bio->bi_opf |= REQ_NOUNMAP;
273
274                 if (nr_sects > max_write_zeroes_sectors) {
275                         bio->bi_iter.bi_size = max_write_zeroes_sectors << 9;
276                         nr_sects -= max_write_zeroes_sectors;
277                         sector += max_write_zeroes_sectors;
278                 } else {
279                         bio->bi_iter.bi_size = nr_sects << 9;
280                         nr_sects = 0;
281                 }
282                 cond_resched();
283         }
284
285         *biop = bio;
286         return 0;
287 }
288
289 /*
290  * Convert a number of 512B sectors to a number of pages.
291  * The result is limited to a number of pages that can fit into a BIO.
292  * Also make sure that the result is always at least 1 (page) for the cases
293  * where nr_sects is lower than the number of sectors in a page.
294  */
295 static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
296 {
297         sector_t pages = DIV_ROUND_UP_SECTOR_T(nr_sects, PAGE_SIZE / 512);
298
299         return min(pages, (sector_t)BIO_MAX_VECS);
300 }
301
302 static int __blkdev_issue_zero_pages(struct block_device *bdev,
303                 sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
304                 struct bio **biop)
305 {
306         struct request_queue *q = bdev_get_queue(bdev);
307         struct bio *bio = *biop;
308         int bi_size = 0;
309         unsigned int sz;
310
311         if (!q)
312                 return -ENXIO;
313
314         if (bdev_read_only(bdev))
315                 return -EPERM;
316
317         while (nr_sects != 0) {
318                 bio = blk_next_bio(bio, __blkdev_sectors_to_bio_pages(nr_sects),
319                                    gfp_mask);
320                 bio->bi_iter.bi_sector = sector;
321                 bio_set_dev(bio, bdev);
322                 bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
323
324                 while (nr_sects != 0) {
325                         sz = min((sector_t) PAGE_SIZE, nr_sects << 9);
326                         bi_size = bio_add_page(bio, ZERO_PAGE(0), sz, 0);
327                         nr_sects -= bi_size >> 9;
328                         sector += bi_size >> 9;
329                         if (bi_size < sz)
330                                 break;
331                 }
332                 cond_resched();
333         }
334
335         *biop = bio;
336         return 0;
337 }
338
339 /**
340  * __blkdev_issue_zeroout - generate number of zero filed write bios
341  * @bdev:       blockdev to issue
342  * @sector:     start sector
343  * @nr_sects:   number of sectors to write
344  * @gfp_mask:   memory allocation flags (for bio_alloc)
345  * @biop:       pointer to anchor bio
346  * @flags:      controls detailed behavior
347  *
348  * Description:
349  *  Zero-fill a block range, either using hardware offload or by explicitly
350  *  writing zeroes to the device.
351  *
352  *  If a device is using logical block provisioning, the underlying space will
353  *  not be released if %flags contains BLKDEV_ZERO_NOUNMAP.
354  *
355  *  If %flags contains BLKDEV_ZERO_NOFALLBACK, the function will return
356  *  -EOPNOTSUPP if no explicit hardware offload for zeroing is provided.
357  */
358 int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
359                 sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
360                 unsigned flags)
361 {
362         int ret;
363         sector_t bs_mask;
364
365         bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
366         if ((sector | nr_sects) & bs_mask)
367                 return -EINVAL;
368
369         ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
370                         biop, flags);
371         if (ret != -EOPNOTSUPP || (flags & BLKDEV_ZERO_NOFALLBACK))
372                 return ret;
373
374         return __blkdev_issue_zero_pages(bdev, sector, nr_sects, gfp_mask,
375                                          biop);
376 }
377 EXPORT_SYMBOL(__blkdev_issue_zeroout);
378
379 /**
380  * blkdev_issue_zeroout - zero-fill a block range
381  * @bdev:       blockdev to write
382  * @sector:     start sector
383  * @nr_sects:   number of sectors to write
384  * @gfp_mask:   memory allocation flags (for bio_alloc)
385  * @flags:      controls detailed behavior
386  *
387  * Description:
388  *  Zero-fill a block range, either using hardware offload or by explicitly
389  *  writing zeroes to the device.  See __blkdev_issue_zeroout() for the
390  *  valid values for %flags.
391  */
392 int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
393                 sector_t nr_sects, gfp_t gfp_mask, unsigned flags)
394 {
395         int ret = 0;
396         sector_t bs_mask;
397         struct bio *bio;
398         struct blk_plug plug;
399         bool try_write_zeroes = !!bdev_write_zeroes_sectors(bdev);
400
401         bs_mask = (bdev_logical_block_size(bdev) >> 9) - 1;
402         if ((sector | nr_sects) & bs_mask)
403                 return -EINVAL;
404
405 retry:
406         bio = NULL;
407         blk_start_plug(&plug);
408         if (try_write_zeroes) {
409                 ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects,
410                                                   gfp_mask, &bio, flags);
411         } else if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
412                 ret = __blkdev_issue_zero_pages(bdev, sector, nr_sects,
413                                                 gfp_mask, &bio);
414         } else {
415                 /* No zeroing offload support */
416                 ret = -EOPNOTSUPP;
417         }
418         if (ret == 0 && bio) {
419                 ret = submit_bio_wait(bio);
420                 bio_put(bio);
421         }
422         blk_finish_plug(&plug);
423         if (ret && try_write_zeroes) {
424                 if (!(flags & BLKDEV_ZERO_NOFALLBACK)) {
425                         try_write_zeroes = false;
426                         goto retry;
427                 }
428                 if (!bdev_write_zeroes_sectors(bdev)) {
429                         /*
430                          * Zeroing offload support was indicated, but the
431                          * device reported ILLEGAL REQUEST (for some devices
432                          * there is no non-destructive way to verify whether
433                          * WRITE ZEROES is actually supported).
434                          */
435                         ret = -EOPNOTSUPP;
436                 }
437         }
438
439         return ret;
440 }
441 EXPORT_SYMBOL(blkdev_issue_zeroout);