Merge tag 'dlm-5.13' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm
[linux-2.6-microblaze.git] / lib / iov_iter.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <crypto/hash.h>
3 #include <linux/export.h>
4 #include <linux/bvec.h>
5 #include <linux/fault-inject-usercopy.h>
6 #include <linux/uio.h>
7 #include <linux/pagemap.h>
8 #include <linux/slab.h>
9 #include <linux/vmalloc.h>
10 #include <linux/splice.h>
11 #include <linux/compat.h>
12 #include <net/checksum.h>
13 #include <linux/scatterlist.h>
14 #include <linux/instrumented.h>
15
16 #define PIPE_PARANOIA /* for now */
17
18 #define iterate_iovec(i, n, __v, __p, skip, STEP) {     \
19         size_t left;                                    \
20         size_t wanted = n;                              \
21         __p = i->iov;                                   \
22         __v.iov_len = min(n, __p->iov_len - skip);      \
23         if (likely(__v.iov_len)) {                      \
24                 __v.iov_base = __p->iov_base + skip;    \
25                 left = (STEP);                          \
26                 __v.iov_len -= left;                    \
27                 skip += __v.iov_len;                    \
28                 n -= __v.iov_len;                       \
29         } else {                                        \
30                 left = 0;                               \
31         }                                               \
32         while (unlikely(!left && n)) {                  \
33                 __p++;                                  \
34                 __v.iov_len = min(n, __p->iov_len);     \
35                 if (unlikely(!__v.iov_len))             \
36                         continue;                       \
37                 __v.iov_base = __p->iov_base;           \
38                 left = (STEP);                          \
39                 __v.iov_len -= left;                    \
40                 skip = __v.iov_len;                     \
41                 n -= __v.iov_len;                       \
42         }                                               \
43         n = wanted - n;                                 \
44 }
45
46 #define iterate_kvec(i, n, __v, __p, skip, STEP) {      \
47         size_t wanted = n;                              \
48         __p = i->kvec;                                  \
49         __v.iov_len = min(n, __p->iov_len - skip);      \
50         if (likely(__v.iov_len)) {                      \
51                 __v.iov_base = __p->iov_base + skip;    \
52                 (void)(STEP);                           \
53                 skip += __v.iov_len;                    \
54                 n -= __v.iov_len;                       \
55         }                                               \
56         while (unlikely(n)) {                           \
57                 __p++;                                  \
58                 __v.iov_len = min(n, __p->iov_len);     \
59                 if (unlikely(!__v.iov_len))             \
60                         continue;                       \
61                 __v.iov_base = __p->iov_base;           \
62                 (void)(STEP);                           \
63                 skip = __v.iov_len;                     \
64                 n -= __v.iov_len;                       \
65         }                                               \
66         n = wanted;                                     \
67 }
68
69 #define iterate_bvec(i, n, __v, __bi, skip, STEP) {     \
70         struct bvec_iter __start;                       \
71         __start.bi_size = n;                            \
72         __start.bi_bvec_done = skip;                    \
73         __start.bi_idx = 0;                             \
74         for_each_bvec(__v, i->bvec, __bi, __start) {    \
75                 (void)(STEP);                           \
76         }                                               \
77 }
78
79 #define iterate_xarray(i, n, __v, skip, STEP) {         \
80         struct page *head = NULL;                               \
81         size_t wanted = n, seg, offset;                         \
82         loff_t start = i->xarray_start + skip;                  \
83         pgoff_t index = start >> PAGE_SHIFT;                    \
84         int j;                                                  \
85                                                                 \
86         XA_STATE(xas, i->xarray, index);                        \
87                                                                 \
88         rcu_read_lock();                                                \
89         xas_for_each(&xas, head, ULONG_MAX) {                           \
90                 if (xas_retry(&xas, head))                              \
91                         continue;                                       \
92                 if (WARN_ON(xa_is_value(head)))                         \
93                         break;                                          \
94                 if (WARN_ON(PageHuge(head)))                            \
95                         break;                                          \
96                 for (j = (head->index < index) ? index - head->index : 0; \
97                      j < thp_nr_pages(head); j++) {                     \
98                         __v.bv_page = head + j;                         \
99                         offset = (i->xarray_start + skip) & ~PAGE_MASK; \
100                         seg = PAGE_SIZE - offset;                       \
101                         __v.bv_offset = offset;                         \
102                         __v.bv_len = min(n, seg);                       \
103                         (void)(STEP);                                   \
104                         n -= __v.bv_len;                                \
105                         skip += __v.bv_len;                             \
106                         if (n == 0)                                     \
107                                 break;                                  \
108                 }                                                       \
109                 if (n == 0)                                             \
110                         break;                                          \
111         }                                                       \
112         rcu_read_unlock();                                      \
113         n = wanted - n;                                         \
114 }
115
116 #define iterate_all_kinds(i, n, v, I, B, K, X) {                \
117         if (likely(n)) {                                        \
118                 size_t skip = i->iov_offset;                    \
119                 if (unlikely(i->type & ITER_BVEC)) {            \
120                         struct bio_vec v;                       \
121                         struct bvec_iter __bi;                  \
122                         iterate_bvec(i, n, v, __bi, skip, (B))  \
123                 } else if (unlikely(i->type & ITER_KVEC)) {     \
124                         const struct kvec *kvec;                \
125                         struct kvec v;                          \
126                         iterate_kvec(i, n, v, kvec, skip, (K))  \
127                 } else if (unlikely(i->type & ITER_DISCARD)) {  \
128                 } else if (unlikely(i->type & ITER_XARRAY)) {   \
129                         struct bio_vec v;                       \
130                         iterate_xarray(i, n, v, skip, (X));     \
131                 } else {                                        \
132                         const struct iovec *iov;                \
133                         struct iovec v;                         \
134                         iterate_iovec(i, n, v, iov, skip, (I))  \
135                 }                                               \
136         }                                                       \
137 }
138
139 #define iterate_and_advance(i, n, v, I, B, K, X) {              \
140         if (unlikely(i->count < n))                             \
141                 n = i->count;                                   \
142         if (i->count) {                                         \
143                 size_t skip = i->iov_offset;                    \
144                 if (unlikely(i->type & ITER_BVEC)) {            \
145                         const struct bio_vec *bvec = i->bvec;   \
146                         struct bio_vec v;                       \
147                         struct bvec_iter __bi;                  \
148                         iterate_bvec(i, n, v, __bi, skip, (B))  \
149                         i->bvec = __bvec_iter_bvec(i->bvec, __bi);      \
150                         i->nr_segs -= i->bvec - bvec;           \
151                         skip = __bi.bi_bvec_done;               \
152                 } else if (unlikely(i->type & ITER_KVEC)) {     \
153                         const struct kvec *kvec;                \
154                         struct kvec v;                          \
155                         iterate_kvec(i, n, v, kvec, skip, (K))  \
156                         if (skip == kvec->iov_len) {            \
157                                 kvec++;                         \
158                                 skip = 0;                       \
159                         }                                       \
160                         i->nr_segs -= kvec - i->kvec;           \
161                         i->kvec = kvec;                         \
162                 } else if (unlikely(i->type & ITER_DISCARD)) {  \
163                         skip += n;                              \
164                 } else if (unlikely(i->type & ITER_XARRAY)) {   \
165                         struct bio_vec v;                       \
166                         iterate_xarray(i, n, v, skip, (X))      \
167                 } else {                                        \
168                         const struct iovec *iov;                \
169                         struct iovec v;                         \
170                         iterate_iovec(i, n, v, iov, skip, (I))  \
171                         if (skip == iov->iov_len) {             \
172                                 iov++;                          \
173                                 skip = 0;                       \
174                         }                                       \
175                         i->nr_segs -= iov - i->iov;             \
176                         i->iov = iov;                           \
177                 }                                               \
178                 i->count -= n;                                  \
179                 i->iov_offset = skip;                           \
180         }                                                       \
181 }
182
183 static int copyout(void __user *to, const void *from, size_t n)
184 {
185         if (should_fail_usercopy())
186                 return n;
187         if (access_ok(to, n)) {
188                 instrument_copy_to_user(to, from, n);
189                 n = raw_copy_to_user(to, from, n);
190         }
191         return n;
192 }
193
194 static int copyin(void *to, const void __user *from, size_t n)
195 {
196         if (should_fail_usercopy())
197                 return n;
198         if (access_ok(from, n)) {
199                 instrument_copy_from_user(to, from, n);
200                 n = raw_copy_from_user(to, from, n);
201         }
202         return n;
203 }
204
205 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
206                          struct iov_iter *i)
207 {
208         size_t skip, copy, left, wanted;
209         const struct iovec *iov;
210         char __user *buf;
211         void *kaddr, *from;
212
213         if (unlikely(bytes > i->count))
214                 bytes = i->count;
215
216         if (unlikely(!bytes))
217                 return 0;
218
219         might_fault();
220         wanted = bytes;
221         iov = i->iov;
222         skip = i->iov_offset;
223         buf = iov->iov_base + skip;
224         copy = min(bytes, iov->iov_len - skip);
225
226         if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
227                 kaddr = kmap_atomic(page);
228                 from = kaddr + offset;
229
230                 /* first chunk, usually the only one */
231                 left = copyout(buf, from, copy);
232                 copy -= left;
233                 skip += copy;
234                 from += copy;
235                 bytes -= copy;
236
237                 while (unlikely(!left && bytes)) {
238                         iov++;
239                         buf = iov->iov_base;
240                         copy = min(bytes, iov->iov_len);
241                         left = copyout(buf, from, copy);
242                         copy -= left;
243                         skip = copy;
244                         from += copy;
245                         bytes -= copy;
246                 }
247                 if (likely(!bytes)) {
248                         kunmap_atomic(kaddr);
249                         goto done;
250                 }
251                 offset = from - kaddr;
252                 buf += copy;
253                 kunmap_atomic(kaddr);
254                 copy = min(bytes, iov->iov_len - skip);
255         }
256         /* Too bad - revert to non-atomic kmap */
257
258         kaddr = kmap(page);
259         from = kaddr + offset;
260         left = copyout(buf, from, copy);
261         copy -= left;
262         skip += copy;
263         from += copy;
264         bytes -= copy;
265         while (unlikely(!left && bytes)) {
266                 iov++;
267                 buf = iov->iov_base;
268                 copy = min(bytes, iov->iov_len);
269                 left = copyout(buf, from, copy);
270                 copy -= left;
271                 skip = copy;
272                 from += copy;
273                 bytes -= copy;
274         }
275         kunmap(page);
276
277 done:
278         if (skip == iov->iov_len) {
279                 iov++;
280                 skip = 0;
281         }
282         i->count -= wanted - bytes;
283         i->nr_segs -= iov - i->iov;
284         i->iov = iov;
285         i->iov_offset = skip;
286         return wanted - bytes;
287 }
288
289 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
290                          struct iov_iter *i)
291 {
292         size_t skip, copy, left, wanted;
293         const struct iovec *iov;
294         char __user *buf;
295         void *kaddr, *to;
296
297         if (unlikely(bytes > i->count))
298                 bytes = i->count;
299
300         if (unlikely(!bytes))
301                 return 0;
302
303         might_fault();
304         wanted = bytes;
305         iov = i->iov;
306         skip = i->iov_offset;
307         buf = iov->iov_base + skip;
308         copy = min(bytes, iov->iov_len - skip);
309
310         if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
311                 kaddr = kmap_atomic(page);
312                 to = kaddr + offset;
313
314                 /* first chunk, usually the only one */
315                 left = copyin(to, buf, copy);
316                 copy -= left;
317                 skip += copy;
318                 to += copy;
319                 bytes -= copy;
320
321                 while (unlikely(!left && bytes)) {
322                         iov++;
323                         buf = iov->iov_base;
324                         copy = min(bytes, iov->iov_len);
325                         left = copyin(to, buf, copy);
326                         copy -= left;
327                         skip = copy;
328                         to += copy;
329                         bytes -= copy;
330                 }
331                 if (likely(!bytes)) {
332                         kunmap_atomic(kaddr);
333                         goto done;
334                 }
335                 offset = to - kaddr;
336                 buf += copy;
337                 kunmap_atomic(kaddr);
338                 copy = min(bytes, iov->iov_len - skip);
339         }
340         /* Too bad - revert to non-atomic kmap */
341
342         kaddr = kmap(page);
343         to = kaddr + offset;
344         left = copyin(to, buf, copy);
345         copy -= left;
346         skip += copy;
347         to += copy;
348         bytes -= copy;
349         while (unlikely(!left && bytes)) {
350                 iov++;
351                 buf = iov->iov_base;
352                 copy = min(bytes, iov->iov_len);
353                 left = copyin(to, buf, copy);
354                 copy -= left;
355                 skip = copy;
356                 to += copy;
357                 bytes -= copy;
358         }
359         kunmap(page);
360
361 done:
362         if (skip == iov->iov_len) {
363                 iov++;
364                 skip = 0;
365         }
366         i->count -= wanted - bytes;
367         i->nr_segs -= iov - i->iov;
368         i->iov = iov;
369         i->iov_offset = skip;
370         return wanted - bytes;
371 }
372
373 #ifdef PIPE_PARANOIA
374 static bool sanity(const struct iov_iter *i)
375 {
376         struct pipe_inode_info *pipe = i->pipe;
377         unsigned int p_head = pipe->head;
378         unsigned int p_tail = pipe->tail;
379         unsigned int p_mask = pipe->ring_size - 1;
380         unsigned int p_occupancy = pipe_occupancy(p_head, p_tail);
381         unsigned int i_head = i->head;
382         unsigned int idx;
383
384         if (i->iov_offset) {
385                 struct pipe_buffer *p;
386                 if (unlikely(p_occupancy == 0))
387                         goto Bad;       // pipe must be non-empty
388                 if (unlikely(i_head != p_head - 1))
389                         goto Bad;       // must be at the last buffer...
390
391                 p = &pipe->bufs[i_head & p_mask];
392                 if (unlikely(p->offset + p->len != i->iov_offset))
393                         goto Bad;       // ... at the end of segment
394         } else {
395                 if (i_head != p_head)
396                         goto Bad;       // must be right after the last buffer
397         }
398         return true;
399 Bad:
400         printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset);
401         printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n",
402                         p_head, p_tail, pipe->ring_size);
403         for (idx = 0; idx < pipe->ring_size; idx++)
404                 printk(KERN_ERR "[%p %p %d %d]\n",
405                         pipe->bufs[idx].ops,
406                         pipe->bufs[idx].page,
407                         pipe->bufs[idx].offset,
408                         pipe->bufs[idx].len);
409         WARN_ON(1);
410         return false;
411 }
412 #else
413 #define sanity(i) true
414 #endif
415
416 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
417                          struct iov_iter *i)
418 {
419         struct pipe_inode_info *pipe = i->pipe;
420         struct pipe_buffer *buf;
421         unsigned int p_tail = pipe->tail;
422         unsigned int p_mask = pipe->ring_size - 1;
423         unsigned int i_head = i->head;
424         size_t off;
425
426         if (unlikely(bytes > i->count))
427                 bytes = i->count;
428
429         if (unlikely(!bytes))
430                 return 0;
431
432         if (!sanity(i))
433                 return 0;
434
435         off = i->iov_offset;
436         buf = &pipe->bufs[i_head & p_mask];
437         if (off) {
438                 if (offset == off && buf->page == page) {
439                         /* merge with the last one */
440                         buf->len += bytes;
441                         i->iov_offset += bytes;
442                         goto out;
443                 }
444                 i_head++;
445                 buf = &pipe->bufs[i_head & p_mask];
446         }
447         if (pipe_full(i_head, p_tail, pipe->max_usage))
448                 return 0;
449
450         buf->ops = &page_cache_pipe_buf_ops;
451         get_page(page);
452         buf->page = page;
453         buf->offset = offset;
454         buf->len = bytes;
455
456         pipe->head = i_head + 1;
457         i->iov_offset = offset + bytes;
458         i->head = i_head;
459 out:
460         i->count -= bytes;
461         return bytes;
462 }
463
464 /*
465  * Fault in one or more iovecs of the given iov_iter, to a maximum length of
466  * bytes.  For each iovec, fault in each page that constitutes the iovec.
467  *
468  * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
469  * because it is an invalid address).
470  */
471 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
472 {
473         size_t skip = i->iov_offset;
474         const struct iovec *iov;
475         int err;
476         struct iovec v;
477
478         if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
479                 iterate_iovec(i, bytes, v, iov, skip, ({
480                         err = fault_in_pages_readable(v.iov_base, v.iov_len);
481                         if (unlikely(err))
482                         return err;
483                 0;}))
484         }
485         return 0;
486 }
487 EXPORT_SYMBOL(iov_iter_fault_in_readable);
488
489 void iov_iter_init(struct iov_iter *i, unsigned int direction,
490                         const struct iovec *iov, unsigned long nr_segs,
491                         size_t count)
492 {
493         WARN_ON(direction & ~(READ | WRITE));
494         direction &= READ | WRITE;
495
496         /* It will get better.  Eventually... */
497         if (uaccess_kernel()) {
498                 i->type = ITER_KVEC | direction;
499                 i->kvec = (struct kvec *)iov;
500         } else {
501                 i->type = ITER_IOVEC | direction;
502                 i->iov = iov;
503         }
504         i->nr_segs = nr_segs;
505         i->iov_offset = 0;
506         i->count = count;
507 }
508 EXPORT_SYMBOL(iov_iter_init);
509
510 static void memzero_page(struct page *page, size_t offset, size_t len)
511 {
512         char *addr = kmap_atomic(page);
513         memset(addr + offset, 0, len);
514         kunmap_atomic(addr);
515 }
516
517 static inline bool allocated(struct pipe_buffer *buf)
518 {
519         return buf->ops == &default_pipe_buf_ops;
520 }
521
522 static inline void data_start(const struct iov_iter *i,
523                               unsigned int *iter_headp, size_t *offp)
524 {
525         unsigned int p_mask = i->pipe->ring_size - 1;
526         unsigned int iter_head = i->head;
527         size_t off = i->iov_offset;
528
529         if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) ||
530                     off == PAGE_SIZE)) {
531                 iter_head++;
532                 off = 0;
533         }
534         *iter_headp = iter_head;
535         *offp = off;
536 }
537
538 static size_t push_pipe(struct iov_iter *i, size_t size,
539                         int *iter_headp, size_t *offp)
540 {
541         struct pipe_inode_info *pipe = i->pipe;
542         unsigned int p_tail = pipe->tail;
543         unsigned int p_mask = pipe->ring_size - 1;
544         unsigned int iter_head;
545         size_t off;
546         ssize_t left;
547
548         if (unlikely(size > i->count))
549                 size = i->count;
550         if (unlikely(!size))
551                 return 0;
552
553         left = size;
554         data_start(i, &iter_head, &off);
555         *iter_headp = iter_head;
556         *offp = off;
557         if (off) {
558                 left -= PAGE_SIZE - off;
559                 if (left <= 0) {
560                         pipe->bufs[iter_head & p_mask].len += size;
561                         return size;
562                 }
563                 pipe->bufs[iter_head & p_mask].len = PAGE_SIZE;
564                 iter_head++;
565         }
566         while (!pipe_full(iter_head, p_tail, pipe->max_usage)) {
567                 struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask];
568                 struct page *page = alloc_page(GFP_USER);
569                 if (!page)
570                         break;
571
572                 buf->ops = &default_pipe_buf_ops;
573                 buf->page = page;
574                 buf->offset = 0;
575                 buf->len = min_t(ssize_t, left, PAGE_SIZE);
576                 left -= buf->len;
577                 iter_head++;
578                 pipe->head = iter_head;
579
580                 if (left == 0)
581                         return size;
582         }
583         return size - left;
584 }
585
586 static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
587                                 struct iov_iter *i)
588 {
589         struct pipe_inode_info *pipe = i->pipe;
590         unsigned int p_mask = pipe->ring_size - 1;
591         unsigned int i_head;
592         size_t n, off;
593
594         if (!sanity(i))
595                 return 0;
596
597         bytes = n = push_pipe(i, bytes, &i_head, &off);
598         if (unlikely(!n))
599                 return 0;
600         do {
601                 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
602                 memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk);
603                 i->head = i_head;
604                 i->iov_offset = off + chunk;
605                 n -= chunk;
606                 addr += chunk;
607                 off = 0;
608                 i_head++;
609         } while (n);
610         i->count -= bytes;
611         return bytes;
612 }
613
614 static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
615                               __wsum sum, size_t off)
616 {
617         __wsum next = csum_partial_copy_nocheck(from, to, len);
618         return csum_block_add(sum, next, off);
619 }
620
621 static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
622                                          struct csum_state *csstate,
623                                          struct iov_iter *i)
624 {
625         struct pipe_inode_info *pipe = i->pipe;
626         unsigned int p_mask = pipe->ring_size - 1;
627         __wsum sum = csstate->csum;
628         size_t off = csstate->off;
629         unsigned int i_head;
630         size_t n, r;
631
632         if (!sanity(i))
633                 return 0;
634
635         bytes = n = push_pipe(i, bytes, &i_head, &r);
636         if (unlikely(!n))
637                 return 0;
638         do {
639                 size_t chunk = min_t(size_t, n, PAGE_SIZE - r);
640                 char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page);
641                 sum = csum_and_memcpy(p + r, addr, chunk, sum, off);
642                 kunmap_atomic(p);
643                 i->head = i_head;
644                 i->iov_offset = r + chunk;
645                 n -= chunk;
646                 off += chunk;
647                 addr += chunk;
648                 r = 0;
649                 i_head++;
650         } while (n);
651         i->count -= bytes;
652         csstate->csum = sum;
653         csstate->off = off;
654         return bytes;
655 }
656
657 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
658 {
659         const char *from = addr;
660         if (unlikely(iov_iter_is_pipe(i)))
661                 return copy_pipe_to_iter(addr, bytes, i);
662         if (iter_is_iovec(i))
663                 might_fault();
664         iterate_and_advance(i, bytes, v,
665                 copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
666                 memcpy_to_page(v.bv_page, v.bv_offset,
667                                (from += v.bv_len) - v.bv_len, v.bv_len),
668                 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
669                 memcpy_to_page(v.bv_page, v.bv_offset,
670                                (from += v.bv_len) - v.bv_len, v.bv_len)
671         )
672
673         return bytes;
674 }
675 EXPORT_SYMBOL(_copy_to_iter);
676
677 #ifdef CONFIG_ARCH_HAS_COPY_MC
678 static int copyout_mc(void __user *to, const void *from, size_t n)
679 {
680         if (access_ok(to, n)) {
681                 instrument_copy_to_user(to, from, n);
682                 n = copy_mc_to_user((__force void *) to, from, n);
683         }
684         return n;
685 }
686
687 static unsigned long copy_mc_to_page(struct page *page, size_t offset,
688                 const char *from, size_t len)
689 {
690         unsigned long ret;
691         char *to;
692
693         to = kmap_atomic(page);
694         ret = copy_mc_to_kernel(to + offset, from, len);
695         kunmap_atomic(to);
696
697         return ret;
698 }
699
700 static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
701                                 struct iov_iter *i)
702 {
703         struct pipe_inode_info *pipe = i->pipe;
704         unsigned int p_mask = pipe->ring_size - 1;
705         unsigned int i_head;
706         size_t n, off, xfer = 0;
707
708         if (!sanity(i))
709                 return 0;
710
711         bytes = n = push_pipe(i, bytes, &i_head, &off);
712         if (unlikely(!n))
713                 return 0;
714         do {
715                 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
716                 unsigned long rem;
717
718                 rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page,
719                                             off, addr, chunk);
720                 i->head = i_head;
721                 i->iov_offset = off + chunk - rem;
722                 xfer += chunk - rem;
723                 if (rem)
724                         break;
725                 n -= chunk;
726                 addr += chunk;
727                 off = 0;
728                 i_head++;
729         } while (n);
730         i->count -= xfer;
731         return xfer;
732 }
733
734 /**
735  * _copy_mc_to_iter - copy to iter with source memory error exception handling
736  * @addr: source kernel address
737  * @bytes: total transfer length
738  * @iter: destination iterator
739  *
740  * The pmem driver deploys this for the dax operation
741  * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
742  * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
743  * successfully copied.
744  *
745  * The main differences between this and typical _copy_to_iter().
746  *
747  * * Typical tail/residue handling after a fault retries the copy
748  *   byte-by-byte until the fault happens again. Re-triggering machine
749  *   checks is potentially fatal so the implementation uses source
750  *   alignment and poison alignment assumptions to avoid re-triggering
751  *   hardware exceptions.
752  *
753  * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
754  *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
755  *   a short copy.
756  */
757 size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
758 {
759         const char *from = addr;
760         unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
761
762         if (unlikely(iov_iter_is_pipe(i)))
763                 return copy_mc_pipe_to_iter(addr, bytes, i);
764         if (iter_is_iovec(i))
765                 might_fault();
766         iterate_and_advance(i, bytes, v,
767                 copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len,
768                            v.iov_len),
769                 ({
770                 rem = copy_mc_to_page(v.bv_page, v.bv_offset,
771                                       (from += v.bv_len) - v.bv_len, v.bv_len);
772                 if (rem) {
773                         curr_addr = (unsigned long) from;
774                         bytes = curr_addr - s_addr - rem;
775                         return bytes;
776                 }
777                 }),
778                 ({
779                 rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len)
780                                         - v.iov_len, v.iov_len);
781                 if (rem) {
782                         curr_addr = (unsigned long) from;
783                         bytes = curr_addr - s_addr - rem;
784                         return bytes;
785                 }
786                 }),
787                 ({
788                 rem = copy_mc_to_page(v.bv_page, v.bv_offset,
789                                       (from += v.bv_len) - v.bv_len, v.bv_len);
790                 if (rem) {
791                         curr_addr = (unsigned long) from;
792                         bytes = curr_addr - s_addr - rem;
793                         rcu_read_unlock();
794                         i->iov_offset += bytes;
795                         i->count -= bytes;
796                         return bytes;
797                 }
798                 })
799         )
800
801         return bytes;
802 }
803 EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
804 #endif /* CONFIG_ARCH_HAS_COPY_MC */
805
806 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
807 {
808         char *to = addr;
809         if (unlikely(iov_iter_is_pipe(i))) {
810                 WARN_ON(1);
811                 return 0;
812         }
813         if (iter_is_iovec(i))
814                 might_fault();
815         iterate_and_advance(i, bytes, v,
816                 copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
817                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
818                                  v.bv_offset, v.bv_len),
819                 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
820                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
821                                  v.bv_offset, v.bv_len)
822         )
823
824         return bytes;
825 }
826 EXPORT_SYMBOL(_copy_from_iter);
827
828 bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
829 {
830         char *to = addr;
831         if (unlikely(iov_iter_is_pipe(i))) {
832                 WARN_ON(1);
833                 return false;
834         }
835         if (unlikely(i->count < bytes))
836                 return false;
837
838         if (iter_is_iovec(i))
839                 might_fault();
840         iterate_all_kinds(i, bytes, v, ({
841                 if (copyin((to += v.iov_len) - v.iov_len,
842                                       v.iov_base, v.iov_len))
843                         return false;
844                 0;}),
845                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
846                                  v.bv_offset, v.bv_len),
847                 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
848                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
849                                  v.bv_offset, v.bv_len)
850         )
851
852         iov_iter_advance(i, bytes);
853         return true;
854 }
855 EXPORT_SYMBOL(_copy_from_iter_full);
856
857 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
858 {
859         char *to = addr;
860         if (unlikely(iov_iter_is_pipe(i))) {
861                 WARN_ON(1);
862                 return 0;
863         }
864         iterate_and_advance(i, bytes, v,
865                 __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
866                                          v.iov_base, v.iov_len),
867                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
868                                  v.bv_offset, v.bv_len),
869                 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
870                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
871                                  v.bv_offset, v.bv_len)
872         )
873
874         return bytes;
875 }
876 EXPORT_SYMBOL(_copy_from_iter_nocache);
877
878 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
879 /**
880  * _copy_from_iter_flushcache - write destination through cpu cache
881  * @addr: destination kernel address
882  * @bytes: total transfer length
883  * @iter: source iterator
884  *
885  * The pmem driver arranges for filesystem-dax to use this facility via
886  * dax_copy_from_iter() for ensuring that writes to persistent memory
887  * are flushed through the CPU cache. It is differentiated from
888  * _copy_from_iter_nocache() in that guarantees all data is flushed for
889  * all iterator types. The _copy_from_iter_nocache() only attempts to
890  * bypass the cache for the ITER_IOVEC case, and on some archs may use
891  * instructions that strand dirty-data in the cache.
892  */
893 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
894 {
895         char *to = addr;
896         if (unlikely(iov_iter_is_pipe(i))) {
897                 WARN_ON(1);
898                 return 0;
899         }
900         iterate_and_advance(i, bytes, v,
901                 __copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
902                                          v.iov_base, v.iov_len),
903                 memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
904                                  v.bv_offset, v.bv_len),
905                 memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
906                         v.iov_len),
907                 memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
908                                  v.bv_offset, v.bv_len)
909         )
910
911         return bytes;
912 }
913 EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
914 #endif
915
916 bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
917 {
918         char *to = addr;
919         if (unlikely(iov_iter_is_pipe(i))) {
920                 WARN_ON(1);
921                 return false;
922         }
923         if (unlikely(i->count < bytes))
924                 return false;
925         iterate_all_kinds(i, bytes, v, ({
926                 if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
927                                              v.iov_base, v.iov_len))
928                         return false;
929                 0;}),
930                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
931                                  v.bv_offset, v.bv_len),
932                 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
933                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
934                                  v.bv_offset, v.bv_len)
935         )
936
937         iov_iter_advance(i, bytes);
938         return true;
939 }
940 EXPORT_SYMBOL(_copy_from_iter_full_nocache);
941
942 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
943 {
944         struct page *head;
945         size_t v = n + offset;
946
947         /*
948          * The general case needs to access the page order in order
949          * to compute the page size.
950          * However, we mostly deal with order-0 pages and thus can
951          * avoid a possible cache line miss for requests that fit all
952          * page orders.
953          */
954         if (n <= v && v <= PAGE_SIZE)
955                 return true;
956
957         head = compound_head(page);
958         v += (page - head) << PAGE_SHIFT;
959
960         if (likely(n <= v && v <= (page_size(head))))
961                 return true;
962         WARN_ON(1);
963         return false;
964 }
965
966 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
967                          struct iov_iter *i)
968 {
969         if (unlikely(!page_copy_sane(page, offset, bytes)))
970                 return 0;
971         if (i->type & (ITER_BVEC | ITER_KVEC | ITER_XARRAY)) {
972                 void *kaddr = kmap_atomic(page);
973                 size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
974                 kunmap_atomic(kaddr);
975                 return wanted;
976         } else if (unlikely(iov_iter_is_discard(i)))
977                 return bytes;
978         else if (likely(!iov_iter_is_pipe(i)))
979                 return copy_page_to_iter_iovec(page, offset, bytes, i);
980         else
981                 return copy_page_to_iter_pipe(page, offset, bytes, i);
982 }
983 EXPORT_SYMBOL(copy_page_to_iter);
984
985 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
986                          struct iov_iter *i)
987 {
988         if (unlikely(!page_copy_sane(page, offset, bytes)))
989                 return 0;
990         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
991                 WARN_ON(1);
992                 return 0;
993         }
994         if (i->type & (ITER_BVEC | ITER_KVEC | ITER_XARRAY)) {
995                 void *kaddr = kmap_atomic(page);
996                 size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
997                 kunmap_atomic(kaddr);
998                 return wanted;
999         } else
1000                 return copy_page_from_iter_iovec(page, offset, bytes, i);
1001 }
1002 EXPORT_SYMBOL(copy_page_from_iter);
1003
1004 static size_t pipe_zero(size_t bytes, struct iov_iter *i)
1005 {
1006         struct pipe_inode_info *pipe = i->pipe;
1007         unsigned int p_mask = pipe->ring_size - 1;
1008         unsigned int i_head;
1009         size_t n, off;
1010
1011         if (!sanity(i))
1012                 return 0;
1013
1014         bytes = n = push_pipe(i, bytes, &i_head, &off);
1015         if (unlikely(!n))
1016                 return 0;
1017
1018         do {
1019                 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
1020                 memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk);
1021                 i->head = i_head;
1022                 i->iov_offset = off + chunk;
1023                 n -= chunk;
1024                 off = 0;
1025                 i_head++;
1026         } while (n);
1027         i->count -= bytes;
1028         return bytes;
1029 }
1030
1031 size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
1032 {
1033         if (unlikely(iov_iter_is_pipe(i)))
1034                 return pipe_zero(bytes, i);
1035         iterate_and_advance(i, bytes, v,
1036                 clear_user(v.iov_base, v.iov_len),
1037                 memzero_page(v.bv_page, v.bv_offset, v.bv_len),
1038                 memset(v.iov_base, 0, v.iov_len),
1039                 memzero_page(v.bv_page, v.bv_offset, v.bv_len)
1040         )
1041
1042         return bytes;
1043 }
1044 EXPORT_SYMBOL(iov_iter_zero);
1045
1046 size_t iov_iter_copy_from_user_atomic(struct page *page,
1047                 struct iov_iter *i, unsigned long offset, size_t bytes)
1048 {
1049         char *kaddr = kmap_atomic(page), *p = kaddr + offset;
1050         if (unlikely(!page_copy_sane(page, offset, bytes))) {
1051                 kunmap_atomic(kaddr);
1052                 return 0;
1053         }
1054         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1055                 kunmap_atomic(kaddr);
1056                 WARN_ON(1);
1057                 return 0;
1058         }
1059         iterate_all_kinds(i, bytes, v,
1060                 copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
1061                 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
1062                                  v.bv_offset, v.bv_len),
1063                 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
1064                 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
1065                                  v.bv_offset, v.bv_len)
1066         )
1067         kunmap_atomic(kaddr);
1068         return bytes;
1069 }
1070 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
1071
1072 static inline void pipe_truncate(struct iov_iter *i)
1073 {
1074         struct pipe_inode_info *pipe = i->pipe;
1075         unsigned int p_tail = pipe->tail;
1076         unsigned int p_head = pipe->head;
1077         unsigned int p_mask = pipe->ring_size - 1;
1078
1079         if (!pipe_empty(p_head, p_tail)) {
1080                 struct pipe_buffer *buf;
1081                 unsigned int i_head = i->head;
1082                 size_t off = i->iov_offset;
1083
1084                 if (off) {
1085                         buf = &pipe->bufs[i_head & p_mask];
1086                         buf->len = off - buf->offset;
1087                         i_head++;
1088                 }
1089                 while (p_head != i_head) {
1090                         p_head--;
1091                         pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]);
1092                 }
1093
1094                 pipe->head = p_head;
1095         }
1096 }
1097
1098 static void pipe_advance(struct iov_iter *i, size_t size)
1099 {
1100         struct pipe_inode_info *pipe = i->pipe;
1101         if (unlikely(i->count < size))
1102                 size = i->count;
1103         if (size) {
1104                 struct pipe_buffer *buf;
1105                 unsigned int p_mask = pipe->ring_size - 1;
1106                 unsigned int i_head = i->head;
1107                 size_t off = i->iov_offset, left = size;
1108
1109                 if (off) /* make it relative to the beginning of buffer */
1110                         left += off - pipe->bufs[i_head & p_mask].offset;
1111                 while (1) {
1112                         buf = &pipe->bufs[i_head & p_mask];
1113                         if (left <= buf->len)
1114                                 break;
1115                         left -= buf->len;
1116                         i_head++;
1117                 }
1118                 i->head = i_head;
1119                 i->iov_offset = buf->offset + left;
1120         }
1121         i->count -= size;
1122         /* ... and discard everything past that point */
1123         pipe_truncate(i);
1124 }
1125
1126 static void iov_iter_bvec_advance(struct iov_iter *i, size_t size)
1127 {
1128         struct bvec_iter bi;
1129
1130         bi.bi_size = i->count;
1131         bi.bi_bvec_done = i->iov_offset;
1132         bi.bi_idx = 0;
1133         bvec_iter_advance(i->bvec, &bi, size);
1134
1135         i->bvec += bi.bi_idx;
1136         i->nr_segs -= bi.bi_idx;
1137         i->count = bi.bi_size;
1138         i->iov_offset = bi.bi_bvec_done;
1139 }
1140
1141 void iov_iter_advance(struct iov_iter *i, size_t size)
1142 {
1143         if (unlikely(iov_iter_is_pipe(i))) {
1144                 pipe_advance(i, size);
1145                 return;
1146         }
1147         if (unlikely(iov_iter_is_discard(i))) {
1148                 i->count -= size;
1149                 return;
1150         }
1151         if (unlikely(iov_iter_is_xarray(i))) {
1152                 size = min(size, i->count);
1153                 i->iov_offset += size;
1154                 i->count -= size;
1155                 return;
1156         }
1157         if (iov_iter_is_bvec(i)) {
1158                 iov_iter_bvec_advance(i, size);
1159                 return;
1160         }
1161         iterate_and_advance(i, size, v, 0, 0, 0, 0)
1162 }
1163 EXPORT_SYMBOL(iov_iter_advance);
1164
1165 void iov_iter_revert(struct iov_iter *i, size_t unroll)
1166 {
1167         if (!unroll)
1168                 return;
1169         if (WARN_ON(unroll > MAX_RW_COUNT))
1170                 return;
1171         i->count += unroll;
1172         if (unlikely(iov_iter_is_pipe(i))) {
1173                 struct pipe_inode_info *pipe = i->pipe;
1174                 unsigned int p_mask = pipe->ring_size - 1;
1175                 unsigned int i_head = i->head;
1176                 size_t off = i->iov_offset;
1177                 while (1) {
1178                         struct pipe_buffer *b = &pipe->bufs[i_head & p_mask];
1179                         size_t n = off - b->offset;
1180                         if (unroll < n) {
1181                                 off -= unroll;
1182                                 break;
1183                         }
1184                         unroll -= n;
1185                         if (!unroll && i_head == i->start_head) {
1186                                 off = 0;
1187                                 break;
1188                         }
1189                         i_head--;
1190                         b = &pipe->bufs[i_head & p_mask];
1191                         off = b->offset + b->len;
1192                 }
1193                 i->iov_offset = off;
1194                 i->head = i_head;
1195                 pipe_truncate(i);
1196                 return;
1197         }
1198         if (unlikely(iov_iter_is_discard(i)))
1199                 return;
1200         if (unroll <= i->iov_offset) {
1201                 i->iov_offset -= unroll;
1202                 return;
1203         }
1204         unroll -= i->iov_offset;
1205         if (iov_iter_is_xarray(i)) {
1206                 BUG(); /* We should never go beyond the start of the specified
1207                         * range since we might then be straying into pages that
1208                         * aren't pinned.
1209                         */
1210         } else if (iov_iter_is_bvec(i)) {
1211                 const struct bio_vec *bvec = i->bvec;
1212                 while (1) {
1213                         size_t n = (--bvec)->bv_len;
1214                         i->nr_segs++;
1215                         if (unroll <= n) {
1216                                 i->bvec = bvec;
1217                                 i->iov_offset = n - unroll;
1218                                 return;
1219                         }
1220                         unroll -= n;
1221                 }
1222         } else { /* same logics for iovec and kvec */
1223                 const struct iovec *iov = i->iov;
1224                 while (1) {
1225                         size_t n = (--iov)->iov_len;
1226                         i->nr_segs++;
1227                         if (unroll <= n) {
1228                                 i->iov = iov;
1229                                 i->iov_offset = n - unroll;
1230                                 return;
1231                         }
1232                         unroll -= n;
1233                 }
1234         }
1235 }
1236 EXPORT_SYMBOL(iov_iter_revert);
1237
1238 /*
1239  * Return the count of just the current iov_iter segment.
1240  */
1241 size_t iov_iter_single_seg_count(const struct iov_iter *i)
1242 {
1243         if (unlikely(iov_iter_is_pipe(i)))
1244                 return i->count;        // it is a silly place, anyway
1245         if (i->nr_segs == 1)
1246                 return i->count;
1247         if (unlikely(iov_iter_is_discard(i) || iov_iter_is_xarray(i)))
1248                 return i->count;
1249         if (iov_iter_is_bvec(i))
1250                 return min(i->count, i->bvec->bv_len - i->iov_offset);
1251         else
1252                 return min(i->count, i->iov->iov_len - i->iov_offset);
1253 }
1254 EXPORT_SYMBOL(iov_iter_single_seg_count);
1255
1256 void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
1257                         const struct kvec *kvec, unsigned long nr_segs,
1258                         size_t count)
1259 {
1260         WARN_ON(direction & ~(READ | WRITE));
1261         i->type = ITER_KVEC | (direction & (READ | WRITE));
1262         i->kvec = kvec;
1263         i->nr_segs = nr_segs;
1264         i->iov_offset = 0;
1265         i->count = count;
1266 }
1267 EXPORT_SYMBOL(iov_iter_kvec);
1268
1269 void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
1270                         const struct bio_vec *bvec, unsigned long nr_segs,
1271                         size_t count)
1272 {
1273         WARN_ON(direction & ~(READ | WRITE));
1274         i->type = ITER_BVEC | (direction & (READ | WRITE));
1275         i->bvec = bvec;
1276         i->nr_segs = nr_segs;
1277         i->iov_offset = 0;
1278         i->count = count;
1279 }
1280 EXPORT_SYMBOL(iov_iter_bvec);
1281
1282 void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
1283                         struct pipe_inode_info *pipe,
1284                         size_t count)
1285 {
1286         BUG_ON(direction != READ);
1287         WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size));
1288         i->type = ITER_PIPE | READ;
1289         i->pipe = pipe;
1290         i->head = pipe->head;
1291         i->iov_offset = 0;
1292         i->count = count;
1293         i->start_head = i->head;
1294 }
1295 EXPORT_SYMBOL(iov_iter_pipe);
1296
1297 /**
1298  * iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray
1299  * @i: The iterator to initialise.
1300  * @direction: The direction of the transfer.
1301  * @xarray: The xarray to access.
1302  * @start: The start file position.
1303  * @count: The size of the I/O buffer in bytes.
1304  *
1305  * Set up an I/O iterator to either draw data out of the pages attached to an
1306  * inode or to inject data into those pages.  The pages *must* be prevented
1307  * from evaporation, either by taking a ref on them or locking them by the
1308  * caller.
1309  */
1310 void iov_iter_xarray(struct iov_iter *i, unsigned int direction,
1311                      struct xarray *xarray, loff_t start, size_t count)
1312 {
1313         BUG_ON(direction & ~1);
1314         i->type = ITER_XARRAY | (direction & (READ | WRITE));
1315         i->xarray = xarray;
1316         i->xarray_start = start;
1317         i->count = count;
1318         i->iov_offset = 0;
1319 }
1320 EXPORT_SYMBOL(iov_iter_xarray);
1321
1322 /**
1323  * iov_iter_discard - Initialise an I/O iterator that discards data
1324  * @i: The iterator to initialise.
1325  * @direction: The direction of the transfer.
1326  * @count: The size of the I/O buffer in bytes.
1327  *
1328  * Set up an I/O iterator that just discards everything that's written to it.
1329  * It's only available as a READ iterator.
1330  */
1331 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
1332 {
1333         BUG_ON(direction != READ);
1334         i->type = ITER_DISCARD | READ;
1335         i->count = count;
1336         i->iov_offset = 0;
1337 }
1338 EXPORT_SYMBOL(iov_iter_discard);
1339
1340 unsigned long iov_iter_alignment(const struct iov_iter *i)
1341 {
1342         unsigned long res = 0;
1343         size_t size = i->count;
1344
1345         if (unlikely(iov_iter_is_pipe(i))) {
1346                 unsigned int p_mask = i->pipe->ring_size - 1;
1347
1348                 if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask]))
1349                         return size | i->iov_offset;
1350                 return size;
1351         }
1352         if (unlikely(iov_iter_is_xarray(i)))
1353                 return (i->xarray_start + i->iov_offset) | i->count;
1354         iterate_all_kinds(i, size, v,
1355                 (res |= (unsigned long)v.iov_base | v.iov_len, 0),
1356                 res |= v.bv_offset | v.bv_len,
1357                 res |= (unsigned long)v.iov_base | v.iov_len,
1358                 res |= v.bv_offset | v.bv_len
1359         )
1360         return res;
1361 }
1362 EXPORT_SYMBOL(iov_iter_alignment);
1363
1364 unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
1365 {
1366         unsigned long res = 0;
1367         size_t size = i->count;
1368
1369         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1370                 WARN_ON(1);
1371                 return ~0U;
1372         }
1373
1374         iterate_all_kinds(i, size, v,
1375                 (res |= (!res ? 0 : (unsigned long)v.iov_base) |
1376                         (size != v.iov_len ? size : 0), 0),
1377                 (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
1378                         (size != v.bv_len ? size : 0)),
1379                 (res |= (!res ? 0 : (unsigned long)v.iov_base) |
1380                         (size != v.iov_len ? size : 0)),
1381                 (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
1382                         (size != v.bv_len ? size : 0))
1383                 );
1384         return res;
1385 }
1386 EXPORT_SYMBOL(iov_iter_gap_alignment);
1387
1388 static inline ssize_t __pipe_get_pages(struct iov_iter *i,
1389                                 size_t maxsize,
1390                                 struct page **pages,
1391                                 int iter_head,
1392                                 size_t *start)
1393 {
1394         struct pipe_inode_info *pipe = i->pipe;
1395         unsigned int p_mask = pipe->ring_size - 1;
1396         ssize_t n = push_pipe(i, maxsize, &iter_head, start);
1397         if (!n)
1398                 return -EFAULT;
1399
1400         maxsize = n;
1401         n += *start;
1402         while (n > 0) {
1403                 get_page(*pages++ = pipe->bufs[iter_head & p_mask].page);
1404                 iter_head++;
1405                 n -= PAGE_SIZE;
1406         }
1407
1408         return maxsize;
1409 }
1410
1411 static ssize_t pipe_get_pages(struct iov_iter *i,
1412                    struct page **pages, size_t maxsize, unsigned maxpages,
1413                    size_t *start)
1414 {
1415         unsigned int iter_head, npages;
1416         size_t capacity;
1417
1418         if (!maxsize)
1419                 return 0;
1420
1421         if (!sanity(i))
1422                 return -EFAULT;
1423
1424         data_start(i, &iter_head, start);
1425         /* Amount of free space: some of this one + all after this one */
1426         npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1427         capacity = min(npages, maxpages) * PAGE_SIZE - *start;
1428
1429         return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start);
1430 }
1431
1432 static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa,
1433                                           pgoff_t index, unsigned int nr_pages)
1434 {
1435         XA_STATE(xas, xa, index);
1436         struct page *page;
1437         unsigned int ret = 0;
1438
1439         rcu_read_lock();
1440         for (page = xas_load(&xas); page; page = xas_next(&xas)) {
1441                 if (xas_retry(&xas, page))
1442                         continue;
1443
1444                 /* Has the page moved or been split? */
1445                 if (unlikely(page != xas_reload(&xas))) {
1446                         xas_reset(&xas);
1447                         continue;
1448                 }
1449
1450                 pages[ret] = find_subpage(page, xas.xa_index);
1451                 get_page(pages[ret]);
1452                 if (++ret == nr_pages)
1453                         break;
1454         }
1455         rcu_read_unlock();
1456         return ret;
1457 }
1458
1459 static ssize_t iter_xarray_get_pages(struct iov_iter *i,
1460                                      struct page **pages, size_t maxsize,
1461                                      unsigned maxpages, size_t *_start_offset)
1462 {
1463         unsigned nr, offset;
1464         pgoff_t index, count;
1465         size_t size = maxsize, actual;
1466         loff_t pos;
1467
1468         if (!size || !maxpages)
1469                 return 0;
1470
1471         pos = i->xarray_start + i->iov_offset;
1472         index = pos >> PAGE_SHIFT;
1473         offset = pos & ~PAGE_MASK;
1474         *_start_offset = offset;
1475
1476         count = 1;
1477         if (size > PAGE_SIZE - offset) {
1478                 size -= PAGE_SIZE - offset;
1479                 count += size >> PAGE_SHIFT;
1480                 size &= ~PAGE_MASK;
1481                 if (size)
1482                         count++;
1483         }
1484
1485         if (count > maxpages)
1486                 count = maxpages;
1487
1488         nr = iter_xarray_populate_pages(pages, i->xarray, index, count);
1489         if (nr == 0)
1490                 return 0;
1491
1492         actual = PAGE_SIZE * nr;
1493         actual -= offset;
1494         if (nr == count && size > 0) {
1495                 unsigned last_offset = (nr > 1) ? 0 : offset;
1496                 actual -= PAGE_SIZE - (last_offset + size);
1497         }
1498         return actual;
1499 }
1500
1501 ssize_t iov_iter_get_pages(struct iov_iter *i,
1502                    struct page **pages, size_t maxsize, unsigned maxpages,
1503                    size_t *start)
1504 {
1505         if (maxsize > i->count)
1506                 maxsize = i->count;
1507
1508         if (unlikely(iov_iter_is_pipe(i)))
1509                 return pipe_get_pages(i, pages, maxsize, maxpages, start);
1510         if (unlikely(iov_iter_is_xarray(i)))
1511                 return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);
1512         if (unlikely(iov_iter_is_discard(i)))
1513                 return -EFAULT;
1514
1515         iterate_all_kinds(i, maxsize, v, ({
1516                 unsigned long addr = (unsigned long)v.iov_base;
1517                 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1518                 int n;
1519                 int res;
1520
1521                 if (len > maxpages * PAGE_SIZE)
1522                         len = maxpages * PAGE_SIZE;
1523                 addr &= ~(PAGE_SIZE - 1);
1524                 n = DIV_ROUND_UP(len, PAGE_SIZE);
1525                 res = get_user_pages_fast(addr, n,
1526                                 iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0,
1527                                 pages);
1528                 if (unlikely(res < 0))
1529                         return res;
1530                 return (res == n ? len : res * PAGE_SIZE) - *start;
1531         0;}),({
1532                 /* can't be more than PAGE_SIZE */
1533                 *start = v.bv_offset;
1534                 get_page(*pages = v.bv_page);
1535                 return v.bv_len;
1536         }),({
1537                 return -EFAULT;
1538         }),
1539         0
1540         )
1541         return 0;
1542 }
1543 EXPORT_SYMBOL(iov_iter_get_pages);
1544
1545 static struct page **get_pages_array(size_t n)
1546 {
1547         return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
1548 }
1549
1550 static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
1551                    struct page ***pages, size_t maxsize,
1552                    size_t *start)
1553 {
1554         struct page **p;
1555         unsigned int iter_head, npages;
1556         ssize_t n;
1557
1558         if (!maxsize)
1559                 return 0;
1560
1561         if (!sanity(i))
1562                 return -EFAULT;
1563
1564         data_start(i, &iter_head, start);
1565         /* Amount of free space: some of this one + all after this one */
1566         npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1567         n = npages * PAGE_SIZE - *start;
1568         if (maxsize > n)
1569                 maxsize = n;
1570         else
1571                 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1572         p = get_pages_array(npages);
1573         if (!p)
1574                 return -ENOMEM;
1575         n = __pipe_get_pages(i, maxsize, p, iter_head, start);
1576         if (n > 0)
1577                 *pages = p;
1578         else
1579                 kvfree(p);
1580         return n;
1581 }
1582
1583 static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i,
1584                                            struct page ***pages, size_t maxsize,
1585                                            size_t *_start_offset)
1586 {
1587         struct page **p;
1588         unsigned nr, offset;
1589         pgoff_t index, count;
1590         size_t size = maxsize, actual;
1591         loff_t pos;
1592
1593         if (!size)
1594                 return 0;
1595
1596         pos = i->xarray_start + i->iov_offset;
1597         index = pos >> PAGE_SHIFT;
1598         offset = pos & ~PAGE_MASK;
1599         *_start_offset = offset;
1600
1601         count = 1;
1602         if (size > PAGE_SIZE - offset) {
1603                 size -= PAGE_SIZE - offset;
1604                 count += size >> PAGE_SHIFT;
1605                 size &= ~PAGE_MASK;
1606                 if (size)
1607                         count++;
1608         }
1609
1610         p = get_pages_array(count);
1611         if (!p)
1612                 return -ENOMEM;
1613         *pages = p;
1614
1615         nr = iter_xarray_populate_pages(p, i->xarray, index, count);
1616         if (nr == 0)
1617                 return 0;
1618
1619         actual = PAGE_SIZE * nr;
1620         actual -= offset;
1621         if (nr == count && size > 0) {
1622                 unsigned last_offset = (nr > 1) ? 0 : offset;
1623                 actual -= PAGE_SIZE - (last_offset + size);
1624         }
1625         return actual;
1626 }
1627
1628 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
1629                    struct page ***pages, size_t maxsize,
1630                    size_t *start)
1631 {
1632         struct page **p;
1633
1634         if (maxsize > i->count)
1635                 maxsize = i->count;
1636
1637         if (unlikely(iov_iter_is_pipe(i)))
1638                 return pipe_get_pages_alloc(i, pages, maxsize, start);
1639         if (unlikely(iov_iter_is_xarray(i)))
1640                 return iter_xarray_get_pages_alloc(i, pages, maxsize, start);
1641         if (unlikely(iov_iter_is_discard(i)))
1642                 return -EFAULT;
1643
1644         iterate_all_kinds(i, maxsize, v, ({
1645                 unsigned long addr = (unsigned long)v.iov_base;
1646                 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1647                 int n;
1648                 int res;
1649
1650                 addr &= ~(PAGE_SIZE - 1);
1651                 n = DIV_ROUND_UP(len, PAGE_SIZE);
1652                 p = get_pages_array(n);
1653                 if (!p)
1654                         return -ENOMEM;
1655                 res = get_user_pages_fast(addr, n,
1656                                 iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0, p);
1657                 if (unlikely(res < 0)) {
1658                         kvfree(p);
1659                         return res;
1660                 }
1661                 *pages = p;
1662                 return (res == n ? len : res * PAGE_SIZE) - *start;
1663         0;}),({
1664                 /* can't be more than PAGE_SIZE */
1665                 *start = v.bv_offset;
1666                 *pages = p = get_pages_array(1);
1667                 if (!p)
1668                         return -ENOMEM;
1669                 get_page(*p = v.bv_page);
1670                 return v.bv_len;
1671         }),({
1672                 return -EFAULT;
1673         }), 0
1674         )
1675         return 0;
1676 }
1677 EXPORT_SYMBOL(iov_iter_get_pages_alloc);
1678
1679 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1680                                struct iov_iter *i)
1681 {
1682         char *to = addr;
1683         __wsum sum, next;
1684         size_t off = 0;
1685         sum = *csum;
1686         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1687                 WARN_ON(1);
1688                 return 0;
1689         }
1690         iterate_and_advance(i, bytes, v, ({
1691                 next = csum_and_copy_from_user(v.iov_base,
1692                                                (to += v.iov_len) - v.iov_len,
1693                                                v.iov_len);
1694                 if (next) {
1695                         sum = csum_block_add(sum, next, off);
1696                         off += v.iov_len;
1697                 }
1698                 next ? 0 : v.iov_len;
1699         }), ({
1700                 char *p = kmap_atomic(v.bv_page);
1701                 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1702                                       p + v.bv_offset, v.bv_len,
1703                                       sum, off);
1704                 kunmap_atomic(p);
1705                 off += v.bv_len;
1706         }),({
1707                 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1708                                       v.iov_base, v.iov_len,
1709                                       sum, off);
1710                 off += v.iov_len;
1711         }), ({
1712                 char *p = kmap_atomic(v.bv_page);
1713                 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1714                                       p + v.bv_offset, v.bv_len,
1715                                       sum, off);
1716                 kunmap_atomic(p);
1717                 off += v.bv_len;
1718         })
1719         )
1720         *csum = sum;
1721         return bytes;
1722 }
1723 EXPORT_SYMBOL(csum_and_copy_from_iter);
1724
1725 bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
1726                                struct iov_iter *i)
1727 {
1728         char *to = addr;
1729         __wsum sum, next;
1730         size_t off = 0;
1731         sum = *csum;
1732         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1733                 WARN_ON(1);
1734                 return false;
1735         }
1736         if (unlikely(i->count < bytes))
1737                 return false;
1738         iterate_all_kinds(i, bytes, v, ({
1739                 next = csum_and_copy_from_user(v.iov_base,
1740                                                (to += v.iov_len) - v.iov_len,
1741                                                v.iov_len);
1742                 if (!next)
1743                         return false;
1744                 sum = csum_block_add(sum, next, off);
1745                 off += v.iov_len;
1746                 0;
1747         }), ({
1748                 char *p = kmap_atomic(v.bv_page);
1749                 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1750                                       p + v.bv_offset, v.bv_len,
1751                                       sum, off);
1752                 kunmap_atomic(p);
1753                 off += v.bv_len;
1754         }),({
1755                 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1756                                       v.iov_base, v.iov_len,
1757                                       sum, off);
1758                 off += v.iov_len;
1759         }), ({
1760                 char *p = kmap_atomic(v.bv_page);
1761                 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1762                                       p + v.bv_offset, v.bv_len,
1763                                       sum, off);
1764                 kunmap_atomic(p);
1765                 off += v.bv_len;
1766         })
1767         )
1768         *csum = sum;
1769         iov_iter_advance(i, bytes);
1770         return true;
1771 }
1772 EXPORT_SYMBOL(csum_and_copy_from_iter_full);
1773
1774 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
1775                              struct iov_iter *i)
1776 {
1777         struct csum_state *csstate = _csstate;
1778         const char *from = addr;
1779         __wsum sum, next;
1780         size_t off;
1781
1782         if (unlikely(iov_iter_is_pipe(i)))
1783                 return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i);
1784
1785         sum = csstate->csum;
1786         off = csstate->off;
1787         if (unlikely(iov_iter_is_discard(i))) {
1788                 WARN_ON(1);     /* for now */
1789                 return 0;
1790         }
1791         iterate_and_advance(i, bytes, v, ({
1792                 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
1793                                              v.iov_base,
1794                                              v.iov_len);
1795                 if (next) {
1796                         sum = csum_block_add(sum, next, off);
1797                         off += v.iov_len;
1798                 }
1799                 next ? 0 : v.iov_len;
1800         }), ({
1801                 char *p = kmap_atomic(v.bv_page);
1802                 sum = csum_and_memcpy(p + v.bv_offset,
1803                                       (from += v.bv_len) - v.bv_len,
1804                                       v.bv_len, sum, off);
1805                 kunmap_atomic(p);
1806                 off += v.bv_len;
1807         }),({
1808                 sum = csum_and_memcpy(v.iov_base,
1809                                      (from += v.iov_len) - v.iov_len,
1810                                      v.iov_len, sum, off);
1811                 off += v.iov_len;
1812         }), ({
1813                 char *p = kmap_atomic(v.bv_page);
1814                 sum = csum_and_memcpy(p + v.bv_offset,
1815                                       (from += v.bv_len) - v.bv_len,
1816                                       v.bv_len, sum, off);
1817                 kunmap_atomic(p);
1818                 off += v.bv_len;
1819         })
1820         )
1821         csstate->csum = sum;
1822         csstate->off = off;
1823         return bytes;
1824 }
1825 EXPORT_SYMBOL(csum_and_copy_to_iter);
1826
1827 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
1828                 struct iov_iter *i)
1829 {
1830 #ifdef CONFIG_CRYPTO_HASH
1831         struct ahash_request *hash = hashp;
1832         struct scatterlist sg;
1833         size_t copied;
1834
1835         copied = copy_to_iter(addr, bytes, i);
1836         sg_init_one(&sg, addr, copied);
1837         ahash_request_set_crypt(hash, &sg, NULL, copied);
1838         crypto_ahash_update(hash);
1839         return copied;
1840 #else
1841         return 0;
1842 #endif
1843 }
1844 EXPORT_SYMBOL(hash_and_copy_to_iter);
1845
1846 int iov_iter_npages(const struct iov_iter *i, int maxpages)
1847 {
1848         size_t size = i->count;
1849         int npages = 0;
1850
1851         if (!size)
1852                 return 0;
1853         if (unlikely(iov_iter_is_discard(i)))
1854                 return 0;
1855
1856         if (unlikely(iov_iter_is_pipe(i))) {
1857                 struct pipe_inode_info *pipe = i->pipe;
1858                 unsigned int iter_head;
1859                 size_t off;
1860
1861                 if (!sanity(i))
1862                         return 0;
1863
1864                 data_start(i, &iter_head, &off);
1865                 /* some of this one + all after this one */
1866                 npages = pipe_space_for_user(iter_head, pipe->tail, pipe);
1867                 if (npages >= maxpages)
1868                         return maxpages;
1869         } else if (unlikely(iov_iter_is_xarray(i))) {
1870                 unsigned offset;
1871
1872                 offset = (i->xarray_start + i->iov_offset) & ~PAGE_MASK;
1873
1874                 npages = 1;
1875                 if (size > PAGE_SIZE - offset) {
1876                         size -= PAGE_SIZE - offset;
1877                         npages += size >> PAGE_SHIFT;
1878                         size &= ~PAGE_MASK;
1879                         if (size)
1880                                 npages++;
1881                 }
1882                 if (npages >= maxpages)
1883                         return maxpages;
1884         } else iterate_all_kinds(i, size, v, ({
1885                 unsigned long p = (unsigned long)v.iov_base;
1886                 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1887                         - p / PAGE_SIZE;
1888                 if (npages >= maxpages)
1889                         return maxpages;
1890         0;}),({
1891                 npages++;
1892                 if (npages >= maxpages)
1893                         return maxpages;
1894         }),({
1895                 unsigned long p = (unsigned long)v.iov_base;
1896                 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1897                         - p / PAGE_SIZE;
1898                 if (npages >= maxpages)
1899                         return maxpages;
1900         }),
1901         0
1902         )
1903         return npages;
1904 }
1905 EXPORT_SYMBOL(iov_iter_npages);
1906
1907 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
1908 {
1909         *new = *old;
1910         if (unlikely(iov_iter_is_pipe(new))) {
1911                 WARN_ON(1);
1912                 return NULL;
1913         }
1914         if (unlikely(iov_iter_is_discard(new) || iov_iter_is_xarray(new)))
1915                 return NULL;
1916         if (iov_iter_is_bvec(new))
1917                 return new->bvec = kmemdup(new->bvec,
1918                                     new->nr_segs * sizeof(struct bio_vec),
1919                                     flags);
1920         else
1921                 /* iovec and kvec have identical layout */
1922                 return new->iov = kmemdup(new->iov,
1923                                    new->nr_segs * sizeof(struct iovec),
1924                                    flags);
1925 }
1926 EXPORT_SYMBOL(dup_iter);
1927
1928 static int copy_compat_iovec_from_user(struct iovec *iov,
1929                 const struct iovec __user *uvec, unsigned long nr_segs)
1930 {
1931         const struct compat_iovec __user *uiov =
1932                 (const struct compat_iovec __user *)uvec;
1933         int ret = -EFAULT, i;
1934
1935         if (!user_access_begin(uiov, nr_segs * sizeof(*uiov)))
1936                 return -EFAULT;
1937
1938         for (i = 0; i < nr_segs; i++) {
1939                 compat_uptr_t buf;
1940                 compat_ssize_t len;
1941
1942                 unsafe_get_user(len, &uiov[i].iov_len, uaccess_end);
1943                 unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end);
1944
1945                 /* check for compat_size_t not fitting in compat_ssize_t .. */
1946                 if (len < 0) {
1947                         ret = -EINVAL;
1948                         goto uaccess_end;
1949                 }
1950                 iov[i].iov_base = compat_ptr(buf);
1951                 iov[i].iov_len = len;
1952         }
1953
1954         ret = 0;
1955 uaccess_end:
1956         user_access_end();
1957         return ret;
1958 }
1959
1960 static int copy_iovec_from_user(struct iovec *iov,
1961                 const struct iovec __user *uvec, unsigned long nr_segs)
1962 {
1963         unsigned long seg;
1964
1965         if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec)))
1966                 return -EFAULT;
1967         for (seg = 0; seg < nr_segs; seg++) {
1968                 if ((ssize_t)iov[seg].iov_len < 0)
1969                         return -EINVAL;
1970         }
1971
1972         return 0;
1973 }
1974
1975 struct iovec *iovec_from_user(const struct iovec __user *uvec,
1976                 unsigned long nr_segs, unsigned long fast_segs,
1977                 struct iovec *fast_iov, bool compat)
1978 {
1979         struct iovec *iov = fast_iov;
1980         int ret;
1981
1982         /*
1983          * SuS says "The readv() function *may* fail if the iovcnt argument was
1984          * less than or equal to 0, or greater than {IOV_MAX}.  Linux has
1985          * traditionally returned zero for zero segments, so...
1986          */
1987         if (nr_segs == 0)
1988                 return iov;
1989         if (nr_segs > UIO_MAXIOV)
1990                 return ERR_PTR(-EINVAL);
1991         if (nr_segs > fast_segs) {
1992                 iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
1993                 if (!iov)
1994                         return ERR_PTR(-ENOMEM);
1995         }
1996
1997         if (compat)
1998                 ret = copy_compat_iovec_from_user(iov, uvec, nr_segs);
1999         else
2000                 ret = copy_iovec_from_user(iov, uvec, nr_segs);
2001         if (ret) {
2002                 if (iov != fast_iov)
2003                         kfree(iov);
2004                 return ERR_PTR(ret);
2005         }
2006
2007         return iov;
2008 }
2009
2010 ssize_t __import_iovec(int type, const struct iovec __user *uvec,
2011                  unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
2012                  struct iov_iter *i, bool compat)
2013 {
2014         ssize_t total_len = 0;
2015         unsigned long seg;
2016         struct iovec *iov;
2017
2018         iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat);
2019         if (IS_ERR(iov)) {
2020                 *iovp = NULL;
2021                 return PTR_ERR(iov);
2022         }
2023
2024         /*
2025          * According to the Single Unix Specification we should return EINVAL if
2026          * an element length is < 0 when cast to ssize_t or if the total length
2027          * would overflow the ssize_t return value of the system call.
2028          *
2029          * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
2030          * overflow case.
2031          */
2032         for (seg = 0; seg < nr_segs; seg++) {
2033                 ssize_t len = (ssize_t)iov[seg].iov_len;
2034
2035                 if (!access_ok(iov[seg].iov_base, len)) {
2036                         if (iov != *iovp)
2037                                 kfree(iov);
2038                         *iovp = NULL;
2039                         return -EFAULT;
2040                 }
2041
2042                 if (len > MAX_RW_COUNT - total_len) {
2043                         len = MAX_RW_COUNT - total_len;
2044                         iov[seg].iov_len = len;
2045                 }
2046                 total_len += len;
2047         }
2048
2049         iov_iter_init(i, type, iov, nr_segs, total_len);
2050         if (iov == *iovp)
2051                 *iovp = NULL;
2052         else
2053                 *iovp = iov;
2054         return total_len;
2055 }
2056
2057 /**
2058  * import_iovec() - Copy an array of &struct iovec from userspace
2059  *     into the kernel, check that it is valid, and initialize a new
2060  *     &struct iov_iter iterator to access it.
2061  *
2062  * @type: One of %READ or %WRITE.
2063  * @uvec: Pointer to the userspace array.
2064  * @nr_segs: Number of elements in userspace array.
2065  * @fast_segs: Number of elements in @iov.
2066  * @iovp: (input and output parameter) Pointer to pointer to (usually small
2067  *     on-stack) kernel array.
2068  * @i: Pointer to iterator that will be initialized on success.
2069  *
2070  * If the array pointed to by *@iov is large enough to hold all @nr_segs,
2071  * then this function places %NULL in *@iov on return. Otherwise, a new
2072  * array will be allocated and the result placed in *@iov. This means that
2073  * the caller may call kfree() on *@iov regardless of whether the small
2074  * on-stack array was used or not (and regardless of whether this function
2075  * returns an error or not).
2076  *
2077  * Return: Negative error code on error, bytes imported on success
2078  */
2079 ssize_t import_iovec(int type, const struct iovec __user *uvec,
2080                  unsigned nr_segs, unsigned fast_segs,
2081                  struct iovec **iovp, struct iov_iter *i)
2082 {
2083         return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i,
2084                               in_compat_syscall());
2085 }
2086 EXPORT_SYMBOL(import_iovec);
2087
2088 int import_single_range(int rw, void __user *buf, size_t len,
2089                  struct iovec *iov, struct iov_iter *i)
2090 {
2091         if (len > MAX_RW_COUNT)
2092                 len = MAX_RW_COUNT;
2093         if (unlikely(!access_ok(buf, len)))
2094                 return -EFAULT;
2095
2096         iov->iov_base = buf;
2097         iov->iov_len = len;
2098         iov_iter_init(i, rw, iov, 1, len);
2099         return 0;
2100 }
2101 EXPORT_SYMBOL(import_single_range);
2102
2103 int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
2104                             int (*f)(struct kvec *vec, void *context),
2105                             void *context)
2106 {
2107         struct kvec w;
2108         int err = -EINVAL;
2109         if (!bytes)
2110                 return 0;
2111
2112         iterate_all_kinds(i, bytes, v, -EINVAL, ({
2113                 w.iov_base = kmap(v.bv_page) + v.bv_offset;
2114                 w.iov_len = v.bv_len;
2115                 err = f(&w, context);
2116                 kunmap(v.bv_page);
2117                 err;}), ({
2118                 w = v;
2119                 err = f(&w, context);}), ({
2120                 w.iov_base = kmap(v.bv_page) + v.bv_offset;
2121                 w.iov_len = v.bv_len;
2122                 err = f(&w, context);
2123                 kunmap(v.bv_page);
2124                 err;})
2125         )
2126         return err;
2127 }
2128 EXPORT_SYMBOL(iov_iter_for_each_range);