iov_iter: Add ITER_XARRAY
[linux-2.6-microblaze.git] / lib / iov_iter.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <crypto/hash.h>
3 #include <linux/export.h>
4 #include <linux/bvec.h>
5 #include <linux/fault-inject-usercopy.h>
6 #include <linux/uio.h>
7 #include <linux/pagemap.h>
8 #include <linux/slab.h>
9 #include <linux/vmalloc.h>
10 #include <linux/splice.h>
11 #include <linux/compat.h>
12 #include <net/checksum.h>
13 #include <linux/scatterlist.h>
14 #include <linux/instrumented.h>
15
16 #define PIPE_PARANOIA /* for now */
17
18 #define iterate_iovec(i, n, __v, __p, skip, STEP) {     \
19         size_t left;                                    \
20         size_t wanted = n;                              \
21         __p = i->iov;                                   \
22         __v.iov_len = min(n, __p->iov_len - skip);      \
23         if (likely(__v.iov_len)) {                      \
24                 __v.iov_base = __p->iov_base + skip;    \
25                 left = (STEP);                          \
26                 __v.iov_len -= left;                    \
27                 skip += __v.iov_len;                    \
28                 n -= __v.iov_len;                       \
29         } else {                                        \
30                 left = 0;                               \
31         }                                               \
32         while (unlikely(!left && n)) {                  \
33                 __p++;                                  \
34                 __v.iov_len = min(n, __p->iov_len);     \
35                 if (unlikely(!__v.iov_len))             \
36                         continue;                       \
37                 __v.iov_base = __p->iov_base;           \
38                 left = (STEP);                          \
39                 __v.iov_len -= left;                    \
40                 skip = __v.iov_len;                     \
41                 n -= __v.iov_len;                       \
42         }                                               \
43         n = wanted - n;                                 \
44 }
45
46 #define iterate_kvec(i, n, __v, __p, skip, STEP) {      \
47         size_t wanted = n;                              \
48         __p = i->kvec;                                  \
49         __v.iov_len = min(n, __p->iov_len - skip);      \
50         if (likely(__v.iov_len)) {                      \
51                 __v.iov_base = __p->iov_base + skip;    \
52                 (void)(STEP);                           \
53                 skip += __v.iov_len;                    \
54                 n -= __v.iov_len;                       \
55         }                                               \
56         while (unlikely(n)) {                           \
57                 __p++;                                  \
58                 __v.iov_len = min(n, __p->iov_len);     \
59                 if (unlikely(!__v.iov_len))             \
60                         continue;                       \
61                 __v.iov_base = __p->iov_base;           \
62                 (void)(STEP);                           \
63                 skip = __v.iov_len;                     \
64                 n -= __v.iov_len;                       \
65         }                                               \
66         n = wanted;                                     \
67 }
68
69 #define iterate_bvec(i, n, __v, __bi, skip, STEP) {     \
70         struct bvec_iter __start;                       \
71         __start.bi_size = n;                            \
72         __start.bi_bvec_done = skip;                    \
73         __start.bi_idx = 0;                             \
74         for_each_bvec(__v, i->bvec, __bi, __start) {    \
75                 (void)(STEP);                           \
76         }                                               \
77 }
78
79 #define iterate_xarray(i, n, __v, skip, STEP) {         \
80         struct page *head = NULL;                               \
81         size_t wanted = n, seg, offset;                         \
82         loff_t start = i->xarray_start + skip;                  \
83         pgoff_t index = start >> PAGE_SHIFT;                    \
84         int j;                                                  \
85                                                                 \
86         XA_STATE(xas, i->xarray, index);                        \
87                                                                 \
88         rcu_read_lock();                                                \
89         xas_for_each(&xas, head, ULONG_MAX) {                           \
90                 if (xas_retry(&xas, head))                              \
91                         continue;                                       \
92                 if (WARN_ON(xa_is_value(head)))                         \
93                         break;                                          \
94                 if (WARN_ON(PageHuge(head)))                            \
95                         break;                                          \
96                 for (j = (head->index < index) ? index - head->index : 0; \
97                      j < thp_nr_pages(head); j++) {                     \
98                         __v.bv_page = head + j;                         \
99                         offset = (i->xarray_start + skip) & ~PAGE_MASK; \
100                         seg = PAGE_SIZE - offset;                       \
101                         __v.bv_offset = offset;                         \
102                         __v.bv_len = min(n, seg);                       \
103                         (void)(STEP);                                   \
104                         n -= __v.bv_len;                                \
105                         skip += __v.bv_len;                             \
106                         if (n == 0)                                     \
107                                 break;                                  \
108                 }                                                       \
109                 if (n == 0)                                             \
110                         break;                                          \
111         }                                                       \
112         rcu_read_unlock();                                      \
113         n = wanted - n;                                         \
114 }
115
116 #define iterate_all_kinds(i, n, v, I, B, K, X) {                \
117         if (likely(n)) {                                        \
118                 size_t skip = i->iov_offset;                    \
119                 if (unlikely(i->type & ITER_BVEC)) {            \
120                         struct bio_vec v;                       \
121                         struct bvec_iter __bi;                  \
122                         iterate_bvec(i, n, v, __bi, skip, (B))  \
123                 } else if (unlikely(i->type & ITER_KVEC)) {     \
124                         const struct kvec *kvec;                \
125                         struct kvec v;                          \
126                         iterate_kvec(i, n, v, kvec, skip, (K))  \
127                 } else if (unlikely(i->type & ITER_DISCARD)) {  \
128                 } else if (unlikely(i->type & ITER_XARRAY)) {   \
129                         struct bio_vec v;                       \
130                         iterate_xarray(i, n, v, skip, (X));     \
131                 } else {                                        \
132                         const struct iovec *iov;                \
133                         struct iovec v;                         \
134                         iterate_iovec(i, n, v, iov, skip, (I))  \
135                 }                                               \
136         }                                                       \
137 }
138
139 #define iterate_and_advance(i, n, v, I, B, K, X) {              \
140         if (unlikely(i->count < n))                             \
141                 n = i->count;                                   \
142         if (i->count) {                                         \
143                 size_t skip = i->iov_offset;                    \
144                 if (unlikely(i->type & ITER_BVEC)) {            \
145                         const struct bio_vec *bvec = i->bvec;   \
146                         struct bio_vec v;                       \
147                         struct bvec_iter __bi;                  \
148                         iterate_bvec(i, n, v, __bi, skip, (B))  \
149                         i->bvec = __bvec_iter_bvec(i->bvec, __bi);      \
150                         i->nr_segs -= i->bvec - bvec;           \
151                         skip = __bi.bi_bvec_done;               \
152                 } else if (unlikely(i->type & ITER_KVEC)) {     \
153                         const struct kvec *kvec;                \
154                         struct kvec v;                          \
155                         iterate_kvec(i, n, v, kvec, skip, (K))  \
156                         if (skip == kvec->iov_len) {            \
157                                 kvec++;                         \
158                                 skip = 0;                       \
159                         }                                       \
160                         i->nr_segs -= kvec - i->kvec;           \
161                         i->kvec = kvec;                         \
162                 } else if (unlikely(i->type & ITER_DISCARD)) {  \
163                         skip += n;                              \
164                 } else if (unlikely(i->type & ITER_XARRAY)) {   \
165                         struct bio_vec v;                       \
166                         iterate_xarray(i, n, v, skip, (X))      \
167                 } else {                                        \
168                         const struct iovec *iov;                \
169                         struct iovec v;                         \
170                         iterate_iovec(i, n, v, iov, skip, (I))  \
171                         if (skip == iov->iov_len) {             \
172                                 iov++;                          \
173                                 skip = 0;                       \
174                         }                                       \
175                         i->nr_segs -= iov - i->iov;             \
176                         i->iov = iov;                           \
177                 }                                               \
178                 i->count -= n;                                  \
179                 i->iov_offset = skip;                           \
180         }                                                       \
181 }
182
183 static int copyout(void __user *to, const void *from, size_t n)
184 {
185         if (should_fail_usercopy())
186                 return n;
187         if (access_ok(to, n)) {
188                 instrument_copy_to_user(to, from, n);
189                 n = raw_copy_to_user(to, from, n);
190         }
191         return n;
192 }
193
194 static int copyin(void *to, const void __user *from, size_t n)
195 {
196         if (should_fail_usercopy())
197                 return n;
198         if (access_ok(from, n)) {
199                 instrument_copy_from_user(to, from, n);
200                 n = raw_copy_from_user(to, from, n);
201         }
202         return n;
203 }
204
205 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
206                          struct iov_iter *i)
207 {
208         size_t skip, copy, left, wanted;
209         const struct iovec *iov;
210         char __user *buf;
211         void *kaddr, *from;
212
213         if (unlikely(bytes > i->count))
214                 bytes = i->count;
215
216         if (unlikely(!bytes))
217                 return 0;
218
219         might_fault();
220         wanted = bytes;
221         iov = i->iov;
222         skip = i->iov_offset;
223         buf = iov->iov_base + skip;
224         copy = min(bytes, iov->iov_len - skip);
225
226         if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
227                 kaddr = kmap_atomic(page);
228                 from = kaddr + offset;
229
230                 /* first chunk, usually the only one */
231                 left = copyout(buf, from, copy);
232                 copy -= left;
233                 skip += copy;
234                 from += copy;
235                 bytes -= copy;
236
237                 while (unlikely(!left && bytes)) {
238                         iov++;
239                         buf = iov->iov_base;
240                         copy = min(bytes, iov->iov_len);
241                         left = copyout(buf, from, copy);
242                         copy -= left;
243                         skip = copy;
244                         from += copy;
245                         bytes -= copy;
246                 }
247                 if (likely(!bytes)) {
248                         kunmap_atomic(kaddr);
249                         goto done;
250                 }
251                 offset = from - kaddr;
252                 buf += copy;
253                 kunmap_atomic(kaddr);
254                 copy = min(bytes, iov->iov_len - skip);
255         }
256         /* Too bad - revert to non-atomic kmap */
257
258         kaddr = kmap(page);
259         from = kaddr + offset;
260         left = copyout(buf, from, copy);
261         copy -= left;
262         skip += copy;
263         from += copy;
264         bytes -= copy;
265         while (unlikely(!left && bytes)) {
266                 iov++;
267                 buf = iov->iov_base;
268                 copy = min(bytes, iov->iov_len);
269                 left = copyout(buf, from, copy);
270                 copy -= left;
271                 skip = copy;
272                 from += copy;
273                 bytes -= copy;
274         }
275         kunmap(page);
276
277 done:
278         if (skip == iov->iov_len) {
279                 iov++;
280                 skip = 0;
281         }
282         i->count -= wanted - bytes;
283         i->nr_segs -= iov - i->iov;
284         i->iov = iov;
285         i->iov_offset = skip;
286         return wanted - bytes;
287 }
288
289 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
290                          struct iov_iter *i)
291 {
292         size_t skip, copy, left, wanted;
293         const struct iovec *iov;
294         char __user *buf;
295         void *kaddr, *to;
296
297         if (unlikely(bytes > i->count))
298                 bytes = i->count;
299
300         if (unlikely(!bytes))
301                 return 0;
302
303         might_fault();
304         wanted = bytes;
305         iov = i->iov;
306         skip = i->iov_offset;
307         buf = iov->iov_base + skip;
308         copy = min(bytes, iov->iov_len - skip);
309
310         if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
311                 kaddr = kmap_atomic(page);
312                 to = kaddr + offset;
313
314                 /* first chunk, usually the only one */
315                 left = copyin(to, buf, copy);
316                 copy -= left;
317                 skip += copy;
318                 to += copy;
319                 bytes -= copy;
320
321                 while (unlikely(!left && bytes)) {
322                         iov++;
323                         buf = iov->iov_base;
324                         copy = min(bytes, iov->iov_len);
325                         left = copyin(to, buf, copy);
326                         copy -= left;
327                         skip = copy;
328                         to += copy;
329                         bytes -= copy;
330                 }
331                 if (likely(!bytes)) {
332                         kunmap_atomic(kaddr);
333                         goto done;
334                 }
335                 offset = to - kaddr;
336                 buf += copy;
337                 kunmap_atomic(kaddr);
338                 copy = min(bytes, iov->iov_len - skip);
339         }
340         /* Too bad - revert to non-atomic kmap */
341
342         kaddr = kmap(page);
343         to = kaddr + offset;
344         left = copyin(to, buf, copy);
345         copy -= left;
346         skip += copy;
347         to += copy;
348         bytes -= copy;
349         while (unlikely(!left && bytes)) {
350                 iov++;
351                 buf = iov->iov_base;
352                 copy = min(bytes, iov->iov_len);
353                 left = copyin(to, buf, copy);
354                 copy -= left;
355                 skip = copy;
356                 to += copy;
357                 bytes -= copy;
358         }
359         kunmap(page);
360
361 done:
362         if (skip == iov->iov_len) {
363                 iov++;
364                 skip = 0;
365         }
366         i->count -= wanted - bytes;
367         i->nr_segs -= iov - i->iov;
368         i->iov = iov;
369         i->iov_offset = skip;
370         return wanted - bytes;
371 }
372
373 #ifdef PIPE_PARANOIA
374 static bool sanity(const struct iov_iter *i)
375 {
376         struct pipe_inode_info *pipe = i->pipe;
377         unsigned int p_head = pipe->head;
378         unsigned int p_tail = pipe->tail;
379         unsigned int p_mask = pipe->ring_size - 1;
380         unsigned int p_occupancy = pipe_occupancy(p_head, p_tail);
381         unsigned int i_head = i->head;
382         unsigned int idx;
383
384         if (i->iov_offset) {
385                 struct pipe_buffer *p;
386                 if (unlikely(p_occupancy == 0))
387                         goto Bad;       // pipe must be non-empty
388                 if (unlikely(i_head != p_head - 1))
389                         goto Bad;       // must be at the last buffer...
390
391                 p = &pipe->bufs[i_head & p_mask];
392                 if (unlikely(p->offset + p->len != i->iov_offset))
393                         goto Bad;       // ... at the end of segment
394         } else {
395                 if (i_head != p_head)
396                         goto Bad;       // must be right after the last buffer
397         }
398         return true;
399 Bad:
400         printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset);
401         printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n",
402                         p_head, p_tail, pipe->ring_size);
403         for (idx = 0; idx < pipe->ring_size; idx++)
404                 printk(KERN_ERR "[%p %p %d %d]\n",
405                         pipe->bufs[idx].ops,
406                         pipe->bufs[idx].page,
407                         pipe->bufs[idx].offset,
408                         pipe->bufs[idx].len);
409         WARN_ON(1);
410         return false;
411 }
412 #else
413 #define sanity(i) true
414 #endif
415
416 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
417                          struct iov_iter *i)
418 {
419         struct pipe_inode_info *pipe = i->pipe;
420         struct pipe_buffer *buf;
421         unsigned int p_tail = pipe->tail;
422         unsigned int p_mask = pipe->ring_size - 1;
423         unsigned int i_head = i->head;
424         size_t off;
425
426         if (unlikely(bytes > i->count))
427                 bytes = i->count;
428
429         if (unlikely(!bytes))
430                 return 0;
431
432         if (!sanity(i))
433                 return 0;
434
435         off = i->iov_offset;
436         buf = &pipe->bufs[i_head & p_mask];
437         if (off) {
438                 if (offset == off && buf->page == page) {
439                         /* merge with the last one */
440                         buf->len += bytes;
441                         i->iov_offset += bytes;
442                         goto out;
443                 }
444                 i_head++;
445                 buf = &pipe->bufs[i_head & p_mask];
446         }
447         if (pipe_full(i_head, p_tail, pipe->max_usage))
448                 return 0;
449
450         buf->ops = &page_cache_pipe_buf_ops;
451         get_page(page);
452         buf->page = page;
453         buf->offset = offset;
454         buf->len = bytes;
455
456         pipe->head = i_head + 1;
457         i->iov_offset = offset + bytes;
458         i->head = i_head;
459 out:
460         i->count -= bytes;
461         return bytes;
462 }
463
464 /*
465  * Fault in one or more iovecs of the given iov_iter, to a maximum length of
466  * bytes.  For each iovec, fault in each page that constitutes the iovec.
467  *
468  * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
469  * because it is an invalid address).
470  */
471 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
472 {
473         size_t skip = i->iov_offset;
474         const struct iovec *iov;
475         int err;
476         struct iovec v;
477
478         if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
479                 iterate_iovec(i, bytes, v, iov, skip, ({
480                         err = fault_in_pages_readable(v.iov_base, v.iov_len);
481                         if (unlikely(err))
482                         return err;
483                 0;}))
484         }
485         return 0;
486 }
487 EXPORT_SYMBOL(iov_iter_fault_in_readable);
488
489 void iov_iter_init(struct iov_iter *i, unsigned int direction,
490                         const struct iovec *iov, unsigned long nr_segs,
491                         size_t count)
492 {
493         WARN_ON(direction & ~(READ | WRITE));
494         direction &= READ | WRITE;
495
496         /* It will get better.  Eventually... */
497         if (uaccess_kernel()) {
498                 i->type = ITER_KVEC | direction;
499                 i->kvec = (struct kvec *)iov;
500         } else {
501                 i->type = ITER_IOVEC | direction;
502                 i->iov = iov;
503         }
504         i->nr_segs = nr_segs;
505         i->iov_offset = 0;
506         i->count = count;
507 }
508 EXPORT_SYMBOL(iov_iter_init);
509
510 static void memzero_page(struct page *page, size_t offset, size_t len)
511 {
512         char *addr = kmap_atomic(page);
513         memset(addr + offset, 0, len);
514         kunmap_atomic(addr);
515 }
516
517 static inline bool allocated(struct pipe_buffer *buf)
518 {
519         return buf->ops == &default_pipe_buf_ops;
520 }
521
522 static inline void data_start(const struct iov_iter *i,
523                               unsigned int *iter_headp, size_t *offp)
524 {
525         unsigned int p_mask = i->pipe->ring_size - 1;
526         unsigned int iter_head = i->head;
527         size_t off = i->iov_offset;
528
529         if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) ||
530                     off == PAGE_SIZE)) {
531                 iter_head++;
532                 off = 0;
533         }
534         *iter_headp = iter_head;
535         *offp = off;
536 }
537
538 static size_t push_pipe(struct iov_iter *i, size_t size,
539                         int *iter_headp, size_t *offp)
540 {
541         struct pipe_inode_info *pipe = i->pipe;
542         unsigned int p_tail = pipe->tail;
543         unsigned int p_mask = pipe->ring_size - 1;
544         unsigned int iter_head;
545         size_t off;
546         ssize_t left;
547
548         if (unlikely(size > i->count))
549                 size = i->count;
550         if (unlikely(!size))
551                 return 0;
552
553         left = size;
554         data_start(i, &iter_head, &off);
555         *iter_headp = iter_head;
556         *offp = off;
557         if (off) {
558                 left -= PAGE_SIZE - off;
559                 if (left <= 0) {
560                         pipe->bufs[iter_head & p_mask].len += size;
561                         return size;
562                 }
563                 pipe->bufs[iter_head & p_mask].len = PAGE_SIZE;
564                 iter_head++;
565         }
566         while (!pipe_full(iter_head, p_tail, pipe->max_usage)) {
567                 struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask];
568                 struct page *page = alloc_page(GFP_USER);
569                 if (!page)
570                         break;
571
572                 buf->ops = &default_pipe_buf_ops;
573                 buf->page = page;
574                 buf->offset = 0;
575                 buf->len = min_t(ssize_t, left, PAGE_SIZE);
576                 left -= buf->len;
577                 iter_head++;
578                 pipe->head = iter_head;
579
580                 if (left == 0)
581                         return size;
582         }
583         return size - left;
584 }
585
586 static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
587                                 struct iov_iter *i)
588 {
589         struct pipe_inode_info *pipe = i->pipe;
590         unsigned int p_mask = pipe->ring_size - 1;
591         unsigned int i_head;
592         size_t n, off;
593
594         if (!sanity(i))
595                 return 0;
596
597         bytes = n = push_pipe(i, bytes, &i_head, &off);
598         if (unlikely(!n))
599                 return 0;
600         do {
601                 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
602                 memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk);
603                 i->head = i_head;
604                 i->iov_offset = off + chunk;
605                 n -= chunk;
606                 addr += chunk;
607                 off = 0;
608                 i_head++;
609         } while (n);
610         i->count -= bytes;
611         return bytes;
612 }
613
614 static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
615                               __wsum sum, size_t off)
616 {
617         __wsum next = csum_partial_copy_nocheck(from, to, len);
618         return csum_block_add(sum, next, off);
619 }
620
621 static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
622                                          struct csum_state *csstate,
623                                          struct iov_iter *i)
624 {
625         struct pipe_inode_info *pipe = i->pipe;
626         unsigned int p_mask = pipe->ring_size - 1;
627         __wsum sum = csstate->csum;
628         size_t off = csstate->off;
629         unsigned int i_head;
630         size_t n, r;
631
632         if (!sanity(i))
633                 return 0;
634
635         bytes = n = push_pipe(i, bytes, &i_head, &r);
636         if (unlikely(!n))
637                 return 0;
638         do {
639                 size_t chunk = min_t(size_t, n, PAGE_SIZE - r);
640                 char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page);
641                 sum = csum_and_memcpy(p + r, addr, chunk, sum, off);
642                 kunmap_atomic(p);
643                 i->head = i_head;
644                 i->iov_offset = r + chunk;
645                 n -= chunk;
646                 off += chunk;
647                 addr += chunk;
648                 r = 0;
649                 i_head++;
650         } while (n);
651         i->count -= bytes;
652         csstate->csum = sum;
653         csstate->off = off;
654         return bytes;
655 }
656
657 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
658 {
659         const char *from = addr;
660         if (unlikely(iov_iter_is_pipe(i)))
661                 return copy_pipe_to_iter(addr, bytes, i);
662         if (iter_is_iovec(i))
663                 might_fault();
664         iterate_and_advance(i, bytes, v,
665                 copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
666                 memcpy_to_page(v.bv_page, v.bv_offset,
667                                (from += v.bv_len) - v.bv_len, v.bv_len),
668                 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
669                 memcpy_to_page(v.bv_page, v.bv_offset,
670                                (from += v.bv_len) - v.bv_len, v.bv_len)
671         )
672
673         return bytes;
674 }
675 EXPORT_SYMBOL(_copy_to_iter);
676
677 #ifdef CONFIG_ARCH_HAS_COPY_MC
678 static int copyout_mc(void __user *to, const void *from, size_t n)
679 {
680         if (access_ok(to, n)) {
681                 instrument_copy_to_user(to, from, n);
682                 n = copy_mc_to_user((__force void *) to, from, n);
683         }
684         return n;
685 }
686
687 static unsigned long copy_mc_to_page(struct page *page, size_t offset,
688                 const char *from, size_t len)
689 {
690         unsigned long ret;
691         char *to;
692
693         to = kmap_atomic(page);
694         ret = copy_mc_to_kernel(to + offset, from, len);
695         kunmap_atomic(to);
696
697         return ret;
698 }
699
700 static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
701                                 struct iov_iter *i)
702 {
703         struct pipe_inode_info *pipe = i->pipe;
704         unsigned int p_mask = pipe->ring_size - 1;
705         unsigned int i_head;
706         size_t n, off, xfer = 0;
707
708         if (!sanity(i))
709                 return 0;
710
711         bytes = n = push_pipe(i, bytes, &i_head, &off);
712         if (unlikely(!n))
713                 return 0;
714         do {
715                 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
716                 unsigned long rem;
717
718                 rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page,
719                                             off, addr, chunk);
720                 i->head = i_head;
721                 i->iov_offset = off + chunk - rem;
722                 xfer += chunk - rem;
723                 if (rem)
724                         break;
725                 n -= chunk;
726                 addr += chunk;
727                 off = 0;
728                 i_head++;
729         } while (n);
730         i->count -= xfer;
731         return xfer;
732 }
733
734 /**
735  * _copy_mc_to_iter - copy to iter with source memory error exception handling
736  * @addr: source kernel address
737  * @bytes: total transfer length
738  * @iter: destination iterator
739  *
740  * The pmem driver deploys this for the dax operation
741  * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
742  * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
743  * successfully copied.
744  *
745  * The main differences between this and typical _copy_to_iter().
746  *
747  * * Typical tail/residue handling after a fault retries the copy
748  *   byte-by-byte until the fault happens again. Re-triggering machine
749  *   checks is potentially fatal so the implementation uses source
750  *   alignment and poison alignment assumptions to avoid re-triggering
751  *   hardware exceptions.
752  *
753  * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
754  *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
755  *   a short copy.
756  */
757 size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
758 {
759         const char *from = addr;
760         unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
761
762         if (unlikely(iov_iter_is_pipe(i)))
763                 return copy_mc_pipe_to_iter(addr, bytes, i);
764         if (iter_is_iovec(i))
765                 might_fault();
766         iterate_and_advance(i, bytes, v,
767                 copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len,
768                            v.iov_len),
769                 ({
770                 rem = copy_mc_to_page(v.bv_page, v.bv_offset,
771                                       (from += v.bv_len) - v.bv_len, v.bv_len);
772                 if (rem) {
773                         curr_addr = (unsigned long) from;
774                         bytes = curr_addr - s_addr - rem;
775                         return bytes;
776                 }
777                 }),
778                 ({
779                 rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len)
780                                         - v.iov_len, v.iov_len);
781                 if (rem) {
782                         curr_addr = (unsigned long) from;
783                         bytes = curr_addr - s_addr - rem;
784                         return bytes;
785                 }
786                 }),
787                 ({
788                 rem = copy_mc_to_page(v.bv_page, v.bv_offset,
789                                       (from += v.bv_len) - v.bv_len, v.bv_len);
790                 if (rem) {
791                         curr_addr = (unsigned long) from;
792                         bytes = curr_addr - s_addr - rem;
793                         rcu_read_unlock();
794                         return bytes;
795                 }
796                 })
797         )
798
799         return bytes;
800 }
801 EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
802 #endif /* CONFIG_ARCH_HAS_COPY_MC */
803
804 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
805 {
806         char *to = addr;
807         if (unlikely(iov_iter_is_pipe(i))) {
808                 WARN_ON(1);
809                 return 0;
810         }
811         if (iter_is_iovec(i))
812                 might_fault();
813         iterate_and_advance(i, bytes, v,
814                 copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
815                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
816                                  v.bv_offset, v.bv_len),
817                 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
818                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
819                                  v.bv_offset, v.bv_len)
820         )
821
822         return bytes;
823 }
824 EXPORT_SYMBOL(_copy_from_iter);
825
826 bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
827 {
828         char *to = addr;
829         if (unlikely(iov_iter_is_pipe(i))) {
830                 WARN_ON(1);
831                 return false;
832         }
833         if (unlikely(i->count < bytes))
834                 return false;
835
836         if (iter_is_iovec(i))
837                 might_fault();
838         iterate_all_kinds(i, bytes, v, ({
839                 if (copyin((to += v.iov_len) - v.iov_len,
840                                       v.iov_base, v.iov_len))
841                         return false;
842                 0;}),
843                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
844                                  v.bv_offset, v.bv_len),
845                 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
846                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
847                                  v.bv_offset, v.bv_len)
848         )
849
850         iov_iter_advance(i, bytes);
851         return true;
852 }
853 EXPORT_SYMBOL(_copy_from_iter_full);
854
855 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
856 {
857         char *to = addr;
858         if (unlikely(iov_iter_is_pipe(i))) {
859                 WARN_ON(1);
860                 return 0;
861         }
862         iterate_and_advance(i, bytes, v,
863                 __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
864                                          v.iov_base, v.iov_len),
865                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
866                                  v.bv_offset, v.bv_len),
867                 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
868                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
869                                  v.bv_offset, v.bv_len)
870         )
871
872         return bytes;
873 }
874 EXPORT_SYMBOL(_copy_from_iter_nocache);
875
876 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
877 /**
878  * _copy_from_iter_flushcache - write destination through cpu cache
879  * @addr: destination kernel address
880  * @bytes: total transfer length
881  * @iter: source iterator
882  *
883  * The pmem driver arranges for filesystem-dax to use this facility via
884  * dax_copy_from_iter() for ensuring that writes to persistent memory
885  * are flushed through the CPU cache. It is differentiated from
886  * _copy_from_iter_nocache() in that guarantees all data is flushed for
887  * all iterator types. The _copy_from_iter_nocache() only attempts to
888  * bypass the cache for the ITER_IOVEC case, and on some archs may use
889  * instructions that strand dirty-data in the cache.
890  */
891 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
892 {
893         char *to = addr;
894         if (unlikely(iov_iter_is_pipe(i))) {
895                 WARN_ON(1);
896                 return 0;
897         }
898         iterate_and_advance(i, bytes, v,
899                 __copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
900                                          v.iov_base, v.iov_len),
901                 memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
902                                  v.bv_offset, v.bv_len),
903                 memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
904                         v.iov_len),
905                 memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
906                                  v.bv_offset, v.bv_len)
907         )
908
909         return bytes;
910 }
911 EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
912 #endif
913
914 bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
915 {
916         char *to = addr;
917         if (unlikely(iov_iter_is_pipe(i))) {
918                 WARN_ON(1);
919                 return false;
920         }
921         if (unlikely(i->count < bytes))
922                 return false;
923         iterate_all_kinds(i, bytes, v, ({
924                 if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
925                                              v.iov_base, v.iov_len))
926                         return false;
927                 0;}),
928                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
929                                  v.bv_offset, v.bv_len),
930                 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
931                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
932                                  v.bv_offset, v.bv_len)
933         )
934
935         iov_iter_advance(i, bytes);
936         return true;
937 }
938 EXPORT_SYMBOL(_copy_from_iter_full_nocache);
939
940 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
941 {
942         struct page *head;
943         size_t v = n + offset;
944
945         /*
946          * The general case needs to access the page order in order
947          * to compute the page size.
948          * However, we mostly deal with order-0 pages and thus can
949          * avoid a possible cache line miss for requests that fit all
950          * page orders.
951          */
952         if (n <= v && v <= PAGE_SIZE)
953                 return true;
954
955         head = compound_head(page);
956         v += (page - head) << PAGE_SHIFT;
957
958         if (likely(n <= v && v <= (page_size(head))))
959                 return true;
960         WARN_ON(1);
961         return false;
962 }
963
964 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
965                          struct iov_iter *i)
966 {
967         if (unlikely(!page_copy_sane(page, offset, bytes)))
968                 return 0;
969         if (i->type & (ITER_BVEC | ITER_KVEC | ITER_XARRAY)) {
970                 void *kaddr = kmap_atomic(page);
971                 size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
972                 kunmap_atomic(kaddr);
973                 return wanted;
974         } else if (unlikely(iov_iter_is_discard(i)))
975                 return bytes;
976         else if (likely(!iov_iter_is_pipe(i)))
977                 return copy_page_to_iter_iovec(page, offset, bytes, i);
978         else
979                 return copy_page_to_iter_pipe(page, offset, bytes, i);
980 }
981 EXPORT_SYMBOL(copy_page_to_iter);
982
983 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
984                          struct iov_iter *i)
985 {
986         if (unlikely(!page_copy_sane(page, offset, bytes)))
987                 return 0;
988         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
989                 WARN_ON(1);
990                 return 0;
991         }
992         if (i->type & (ITER_BVEC | ITER_KVEC | ITER_XARRAY)) {
993                 void *kaddr = kmap_atomic(page);
994                 size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
995                 kunmap_atomic(kaddr);
996                 return wanted;
997         } else
998                 return copy_page_from_iter_iovec(page, offset, bytes, i);
999 }
1000 EXPORT_SYMBOL(copy_page_from_iter);
1001
1002 static size_t pipe_zero(size_t bytes, struct iov_iter *i)
1003 {
1004         struct pipe_inode_info *pipe = i->pipe;
1005         unsigned int p_mask = pipe->ring_size - 1;
1006         unsigned int i_head;
1007         size_t n, off;
1008
1009         if (!sanity(i))
1010                 return 0;
1011
1012         bytes = n = push_pipe(i, bytes, &i_head, &off);
1013         if (unlikely(!n))
1014                 return 0;
1015
1016         do {
1017                 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
1018                 memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk);
1019                 i->head = i_head;
1020                 i->iov_offset = off + chunk;
1021                 n -= chunk;
1022                 off = 0;
1023                 i_head++;
1024         } while (n);
1025         i->count -= bytes;
1026         return bytes;
1027 }
1028
1029 size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
1030 {
1031         if (unlikely(iov_iter_is_pipe(i)))
1032                 return pipe_zero(bytes, i);
1033         iterate_and_advance(i, bytes, v,
1034                 clear_user(v.iov_base, v.iov_len),
1035                 memzero_page(v.bv_page, v.bv_offset, v.bv_len),
1036                 memset(v.iov_base, 0, v.iov_len),
1037                 memzero_page(v.bv_page, v.bv_offset, v.bv_len)
1038         )
1039
1040         return bytes;
1041 }
1042 EXPORT_SYMBOL(iov_iter_zero);
1043
1044 size_t iov_iter_copy_from_user_atomic(struct page *page,
1045                 struct iov_iter *i, unsigned long offset, size_t bytes)
1046 {
1047         char *kaddr = kmap_atomic(page), *p = kaddr + offset;
1048         if (unlikely(!page_copy_sane(page, offset, bytes))) {
1049                 kunmap_atomic(kaddr);
1050                 return 0;
1051         }
1052         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1053                 kunmap_atomic(kaddr);
1054                 WARN_ON(1);
1055                 return 0;
1056         }
1057         iterate_all_kinds(i, bytes, v,
1058                 copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
1059                 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
1060                                  v.bv_offset, v.bv_len),
1061                 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
1062                 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
1063                                  v.bv_offset, v.bv_len)
1064         )
1065         kunmap_atomic(kaddr);
1066         return bytes;
1067 }
1068 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
1069
1070 static inline void pipe_truncate(struct iov_iter *i)
1071 {
1072         struct pipe_inode_info *pipe = i->pipe;
1073         unsigned int p_tail = pipe->tail;
1074         unsigned int p_head = pipe->head;
1075         unsigned int p_mask = pipe->ring_size - 1;
1076
1077         if (!pipe_empty(p_head, p_tail)) {
1078                 struct pipe_buffer *buf;
1079                 unsigned int i_head = i->head;
1080                 size_t off = i->iov_offset;
1081
1082                 if (off) {
1083                         buf = &pipe->bufs[i_head & p_mask];
1084                         buf->len = off - buf->offset;
1085                         i_head++;
1086                 }
1087                 while (p_head != i_head) {
1088                         p_head--;
1089                         pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]);
1090                 }
1091
1092                 pipe->head = p_head;
1093         }
1094 }
1095
1096 static void pipe_advance(struct iov_iter *i, size_t size)
1097 {
1098         struct pipe_inode_info *pipe = i->pipe;
1099         if (unlikely(i->count < size))
1100                 size = i->count;
1101         if (size) {
1102                 struct pipe_buffer *buf;
1103                 unsigned int p_mask = pipe->ring_size - 1;
1104                 unsigned int i_head = i->head;
1105                 size_t off = i->iov_offset, left = size;
1106
1107                 if (off) /* make it relative to the beginning of buffer */
1108                         left += off - pipe->bufs[i_head & p_mask].offset;
1109                 while (1) {
1110                         buf = &pipe->bufs[i_head & p_mask];
1111                         if (left <= buf->len)
1112                                 break;
1113                         left -= buf->len;
1114                         i_head++;
1115                 }
1116                 i->head = i_head;
1117                 i->iov_offset = buf->offset + left;
1118         }
1119         i->count -= size;
1120         /* ... and discard everything past that point */
1121         pipe_truncate(i);
1122 }
1123
1124 static void iov_iter_bvec_advance(struct iov_iter *i, size_t size)
1125 {
1126         struct bvec_iter bi;
1127
1128         bi.bi_size = i->count;
1129         bi.bi_bvec_done = i->iov_offset;
1130         bi.bi_idx = 0;
1131         bvec_iter_advance(i->bvec, &bi, size);
1132
1133         i->bvec += bi.bi_idx;
1134         i->nr_segs -= bi.bi_idx;
1135         i->count = bi.bi_size;
1136         i->iov_offset = bi.bi_bvec_done;
1137 }
1138
1139 void iov_iter_advance(struct iov_iter *i, size_t size)
1140 {
1141         if (unlikely(iov_iter_is_pipe(i))) {
1142                 pipe_advance(i, size);
1143                 return;
1144         }
1145         if (unlikely(iov_iter_is_discard(i))) {
1146                 i->count -= size;
1147                 return;
1148         }
1149         if (unlikely(iov_iter_is_xarray(i))) {
1150                 i->iov_offset += size;
1151                 i->count -= size;
1152                 return;
1153         }
1154         if (iov_iter_is_bvec(i)) {
1155                 iov_iter_bvec_advance(i, size);
1156                 return;
1157         }
1158         iterate_and_advance(i, size, v, 0, 0, 0, 0)
1159 }
1160 EXPORT_SYMBOL(iov_iter_advance);
1161
1162 void iov_iter_revert(struct iov_iter *i, size_t unroll)
1163 {
1164         if (!unroll)
1165                 return;
1166         if (WARN_ON(unroll > MAX_RW_COUNT))
1167                 return;
1168         i->count += unroll;
1169         if (unlikely(iov_iter_is_pipe(i))) {
1170                 struct pipe_inode_info *pipe = i->pipe;
1171                 unsigned int p_mask = pipe->ring_size - 1;
1172                 unsigned int i_head = i->head;
1173                 size_t off = i->iov_offset;
1174                 while (1) {
1175                         struct pipe_buffer *b = &pipe->bufs[i_head & p_mask];
1176                         size_t n = off - b->offset;
1177                         if (unroll < n) {
1178                                 off -= unroll;
1179                                 break;
1180                         }
1181                         unroll -= n;
1182                         if (!unroll && i_head == i->start_head) {
1183                                 off = 0;
1184                                 break;
1185                         }
1186                         i_head--;
1187                         b = &pipe->bufs[i_head & p_mask];
1188                         off = b->offset + b->len;
1189                 }
1190                 i->iov_offset = off;
1191                 i->head = i_head;
1192                 pipe_truncate(i);
1193                 return;
1194         }
1195         if (unlikely(iov_iter_is_discard(i)))
1196                 return;
1197         if (unroll <= i->iov_offset) {
1198                 i->iov_offset -= unroll;
1199                 return;
1200         }
1201         unroll -= i->iov_offset;
1202         if (iov_iter_is_xarray(i)) {
1203                 BUG(); /* We should never go beyond the start of the specified
1204                         * range since we might then be straying into pages that
1205                         * aren't pinned.
1206                         */
1207         } else if (iov_iter_is_bvec(i)) {
1208                 const struct bio_vec *bvec = i->bvec;
1209                 while (1) {
1210                         size_t n = (--bvec)->bv_len;
1211                         i->nr_segs++;
1212                         if (unroll <= n) {
1213                                 i->bvec = bvec;
1214                                 i->iov_offset = n - unroll;
1215                                 return;
1216                         }
1217                         unroll -= n;
1218                 }
1219         } else { /* same logics for iovec and kvec */
1220                 const struct iovec *iov = i->iov;
1221                 while (1) {
1222                         size_t n = (--iov)->iov_len;
1223                         i->nr_segs++;
1224                         if (unroll <= n) {
1225                                 i->iov = iov;
1226                                 i->iov_offset = n - unroll;
1227                                 return;
1228                         }
1229                         unroll -= n;
1230                 }
1231         }
1232 }
1233 EXPORT_SYMBOL(iov_iter_revert);
1234
1235 /*
1236  * Return the count of just the current iov_iter segment.
1237  */
1238 size_t iov_iter_single_seg_count(const struct iov_iter *i)
1239 {
1240         if (unlikely(iov_iter_is_pipe(i)))
1241                 return i->count;        // it is a silly place, anyway
1242         if (i->nr_segs == 1)
1243                 return i->count;
1244         if (unlikely(iov_iter_is_discard(i) || iov_iter_is_xarray(i)))
1245                 return i->count;
1246         if (iov_iter_is_bvec(i))
1247                 return min(i->count, i->bvec->bv_len - i->iov_offset);
1248         else
1249                 return min(i->count, i->iov->iov_len - i->iov_offset);
1250 }
1251 EXPORT_SYMBOL(iov_iter_single_seg_count);
1252
1253 void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
1254                         const struct kvec *kvec, unsigned long nr_segs,
1255                         size_t count)
1256 {
1257         WARN_ON(direction & ~(READ | WRITE));
1258         i->type = ITER_KVEC | (direction & (READ | WRITE));
1259         i->kvec = kvec;
1260         i->nr_segs = nr_segs;
1261         i->iov_offset = 0;
1262         i->count = count;
1263 }
1264 EXPORT_SYMBOL(iov_iter_kvec);
1265
1266 void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
1267                         const struct bio_vec *bvec, unsigned long nr_segs,
1268                         size_t count)
1269 {
1270         WARN_ON(direction & ~(READ | WRITE));
1271         i->type = ITER_BVEC | (direction & (READ | WRITE));
1272         i->bvec = bvec;
1273         i->nr_segs = nr_segs;
1274         i->iov_offset = 0;
1275         i->count = count;
1276 }
1277 EXPORT_SYMBOL(iov_iter_bvec);
1278
1279 void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
1280                         struct pipe_inode_info *pipe,
1281                         size_t count)
1282 {
1283         BUG_ON(direction != READ);
1284         WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size));
1285         i->type = ITER_PIPE | READ;
1286         i->pipe = pipe;
1287         i->head = pipe->head;
1288         i->iov_offset = 0;
1289         i->count = count;
1290         i->start_head = i->head;
1291 }
1292 EXPORT_SYMBOL(iov_iter_pipe);
1293
1294 /**
1295  * iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray
1296  * @i: The iterator to initialise.
1297  * @direction: The direction of the transfer.
1298  * @xarray: The xarray to access.
1299  * @start: The start file position.
1300  * @count: The size of the I/O buffer in bytes.
1301  *
1302  * Set up an I/O iterator to either draw data out of the pages attached to an
1303  * inode or to inject data into those pages.  The pages *must* be prevented
1304  * from evaporation, either by taking a ref on them or locking them by the
1305  * caller.
1306  */
1307 void iov_iter_xarray(struct iov_iter *i, unsigned int direction,
1308                      struct xarray *xarray, loff_t start, size_t count)
1309 {
1310         BUG_ON(direction & ~1);
1311         i->type = ITER_XARRAY | (direction & (READ | WRITE));
1312         i->xarray = xarray;
1313         i->xarray_start = start;
1314         i->count = count;
1315         i->iov_offset = 0;
1316 }
1317 EXPORT_SYMBOL(iov_iter_xarray);
1318
1319 /**
1320  * iov_iter_discard - Initialise an I/O iterator that discards data
1321  * @i: The iterator to initialise.
1322  * @direction: The direction of the transfer.
1323  * @count: The size of the I/O buffer in bytes.
1324  *
1325  * Set up an I/O iterator that just discards everything that's written to it.
1326  * It's only available as a READ iterator.
1327  */
1328 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
1329 {
1330         BUG_ON(direction != READ);
1331         i->type = ITER_DISCARD | READ;
1332         i->count = count;
1333         i->iov_offset = 0;
1334 }
1335 EXPORT_SYMBOL(iov_iter_discard);
1336
1337 unsigned long iov_iter_alignment(const struct iov_iter *i)
1338 {
1339         unsigned long res = 0;
1340         size_t size = i->count;
1341
1342         if (unlikely(iov_iter_is_pipe(i))) {
1343                 unsigned int p_mask = i->pipe->ring_size - 1;
1344
1345                 if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask]))
1346                         return size | i->iov_offset;
1347                 return size;
1348         }
1349         iterate_all_kinds(i, size, v,
1350                 (res |= (unsigned long)v.iov_base | v.iov_len, 0),
1351                 res |= v.bv_offset | v.bv_len,
1352                 res |= (unsigned long)v.iov_base | v.iov_len,
1353                 res |= v.bv_offset | v.bv_len
1354         )
1355         return res;
1356 }
1357 EXPORT_SYMBOL(iov_iter_alignment);
1358
1359 unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
1360 {
1361         unsigned long res = 0;
1362         size_t size = i->count;
1363
1364         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1365                 WARN_ON(1);
1366                 return ~0U;
1367         }
1368
1369         iterate_all_kinds(i, size, v,
1370                 (res |= (!res ? 0 : (unsigned long)v.iov_base) |
1371                         (size != v.iov_len ? size : 0), 0),
1372                 (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
1373                         (size != v.bv_len ? size : 0)),
1374                 (res |= (!res ? 0 : (unsigned long)v.iov_base) |
1375                         (size != v.iov_len ? size : 0)),
1376                 (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
1377                         (size != v.bv_len ? size : 0))
1378                 );
1379         return res;
1380 }
1381 EXPORT_SYMBOL(iov_iter_gap_alignment);
1382
1383 static inline ssize_t __pipe_get_pages(struct iov_iter *i,
1384                                 size_t maxsize,
1385                                 struct page **pages,
1386                                 int iter_head,
1387                                 size_t *start)
1388 {
1389         struct pipe_inode_info *pipe = i->pipe;
1390         unsigned int p_mask = pipe->ring_size - 1;
1391         ssize_t n = push_pipe(i, maxsize, &iter_head, start);
1392         if (!n)
1393                 return -EFAULT;
1394
1395         maxsize = n;
1396         n += *start;
1397         while (n > 0) {
1398                 get_page(*pages++ = pipe->bufs[iter_head & p_mask].page);
1399                 iter_head++;
1400                 n -= PAGE_SIZE;
1401         }
1402
1403         return maxsize;
1404 }
1405
1406 static ssize_t pipe_get_pages(struct iov_iter *i,
1407                    struct page **pages, size_t maxsize, unsigned maxpages,
1408                    size_t *start)
1409 {
1410         unsigned int iter_head, npages;
1411         size_t capacity;
1412
1413         if (!maxsize)
1414                 return 0;
1415
1416         if (!sanity(i))
1417                 return -EFAULT;
1418
1419         data_start(i, &iter_head, start);
1420         /* Amount of free space: some of this one + all after this one */
1421         npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1422         capacity = min(npages, maxpages) * PAGE_SIZE - *start;
1423
1424         return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start);
1425 }
1426
1427 static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa,
1428                                           pgoff_t index, unsigned int nr_pages)
1429 {
1430         XA_STATE(xas, xa, index);
1431         struct page *page;
1432         unsigned int ret = 0;
1433
1434         rcu_read_lock();
1435         for (page = xas_load(&xas); page; page = xas_next(&xas)) {
1436                 if (xas_retry(&xas, page))
1437                         continue;
1438
1439                 /* Has the page moved or been split? */
1440                 if (unlikely(page != xas_reload(&xas))) {
1441                         xas_reset(&xas);
1442                         continue;
1443                 }
1444
1445                 pages[ret] = find_subpage(page, xas.xa_index);
1446                 get_page(pages[ret]);
1447                 if (++ret == nr_pages)
1448                         break;
1449         }
1450         rcu_read_unlock();
1451         return ret;
1452 }
1453
1454 static ssize_t iter_xarray_get_pages(struct iov_iter *i,
1455                                      struct page **pages, size_t maxsize,
1456                                      unsigned maxpages, size_t *_start_offset)
1457 {
1458         unsigned nr, offset;
1459         pgoff_t index, count;
1460         size_t size = maxsize, actual;
1461         loff_t pos;
1462
1463         if (!size || !maxpages)
1464                 return 0;
1465
1466         pos = i->xarray_start + i->iov_offset;
1467         index = pos >> PAGE_SHIFT;
1468         offset = pos & ~PAGE_MASK;
1469         *_start_offset = offset;
1470
1471         count = 1;
1472         if (size > PAGE_SIZE - offset) {
1473                 size -= PAGE_SIZE - offset;
1474                 count += size >> PAGE_SHIFT;
1475                 size &= ~PAGE_MASK;
1476                 if (size)
1477                         count++;
1478         }
1479
1480         if (count > maxpages)
1481                 count = maxpages;
1482
1483         nr = iter_xarray_populate_pages(pages, i->xarray, index, count);
1484         if (nr == 0)
1485                 return 0;
1486
1487         actual = PAGE_SIZE * nr;
1488         actual -= offset;
1489         if (nr == count && size > 0) {
1490                 unsigned last_offset = (nr > 1) ? 0 : offset;
1491                 actual -= PAGE_SIZE - (last_offset + size);
1492         }
1493         return actual;
1494 }
1495
1496 ssize_t iov_iter_get_pages(struct iov_iter *i,
1497                    struct page **pages, size_t maxsize, unsigned maxpages,
1498                    size_t *start)
1499 {
1500         if (maxsize > i->count)
1501                 maxsize = i->count;
1502
1503         if (unlikely(iov_iter_is_pipe(i)))
1504                 return pipe_get_pages(i, pages, maxsize, maxpages, start);
1505         if (unlikely(iov_iter_is_xarray(i)))
1506                 return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);
1507         if (unlikely(iov_iter_is_discard(i)))
1508                 return -EFAULT;
1509
1510         iterate_all_kinds(i, maxsize, v, ({
1511                 unsigned long addr = (unsigned long)v.iov_base;
1512                 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1513                 int n;
1514                 int res;
1515
1516                 if (len > maxpages * PAGE_SIZE)
1517                         len = maxpages * PAGE_SIZE;
1518                 addr &= ~(PAGE_SIZE - 1);
1519                 n = DIV_ROUND_UP(len, PAGE_SIZE);
1520                 res = get_user_pages_fast(addr, n,
1521                                 iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0,
1522                                 pages);
1523                 if (unlikely(res < 0))
1524                         return res;
1525                 return (res == n ? len : res * PAGE_SIZE) - *start;
1526         0;}),({
1527                 /* can't be more than PAGE_SIZE */
1528                 *start = v.bv_offset;
1529                 get_page(*pages = v.bv_page);
1530                 return v.bv_len;
1531         }),({
1532                 return -EFAULT;
1533         }),
1534         0
1535         )
1536         return 0;
1537 }
1538 EXPORT_SYMBOL(iov_iter_get_pages);
1539
1540 static struct page **get_pages_array(size_t n)
1541 {
1542         return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
1543 }
1544
1545 static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
1546                    struct page ***pages, size_t maxsize,
1547                    size_t *start)
1548 {
1549         struct page **p;
1550         unsigned int iter_head, npages;
1551         ssize_t n;
1552
1553         if (!maxsize)
1554                 return 0;
1555
1556         if (!sanity(i))
1557                 return -EFAULT;
1558
1559         data_start(i, &iter_head, start);
1560         /* Amount of free space: some of this one + all after this one */
1561         npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1562         n = npages * PAGE_SIZE - *start;
1563         if (maxsize > n)
1564                 maxsize = n;
1565         else
1566                 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1567         p = get_pages_array(npages);
1568         if (!p)
1569                 return -ENOMEM;
1570         n = __pipe_get_pages(i, maxsize, p, iter_head, start);
1571         if (n > 0)
1572                 *pages = p;
1573         else
1574                 kvfree(p);
1575         return n;
1576 }
1577
1578 static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i,
1579                                            struct page ***pages, size_t maxsize,
1580                                            size_t *_start_offset)
1581 {
1582         struct page **p;
1583         unsigned nr, offset;
1584         pgoff_t index, count;
1585         size_t size = maxsize, actual;
1586         loff_t pos;
1587
1588         if (!size)
1589                 return 0;
1590
1591         pos = i->xarray_start + i->iov_offset;
1592         index = pos >> PAGE_SHIFT;
1593         offset = pos & ~PAGE_MASK;
1594         *_start_offset = offset;
1595
1596         count = 1;
1597         if (size > PAGE_SIZE - offset) {
1598                 size -= PAGE_SIZE - offset;
1599                 count += size >> PAGE_SHIFT;
1600                 size &= ~PAGE_MASK;
1601                 if (size)
1602                         count++;
1603         }
1604
1605         p = get_pages_array(count);
1606         if (!p)
1607                 return -ENOMEM;
1608         *pages = p;
1609
1610         nr = iter_xarray_populate_pages(p, i->xarray, index, count);
1611         if (nr == 0)
1612                 return 0;
1613
1614         actual = PAGE_SIZE * nr;
1615         actual -= offset;
1616         if (nr == count && size > 0) {
1617                 unsigned last_offset = (nr > 1) ? 0 : offset;
1618                 actual -= PAGE_SIZE - (last_offset + size);
1619         }
1620         return actual;
1621 }
1622
1623 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
1624                    struct page ***pages, size_t maxsize,
1625                    size_t *start)
1626 {
1627         struct page **p;
1628
1629         if (maxsize > i->count)
1630                 maxsize = i->count;
1631
1632         if (unlikely(iov_iter_is_pipe(i)))
1633                 return pipe_get_pages_alloc(i, pages, maxsize, start);
1634         if (unlikely(iov_iter_is_xarray(i)))
1635                 return iter_xarray_get_pages_alloc(i, pages, maxsize, start);
1636         if (unlikely(iov_iter_is_discard(i)))
1637                 return -EFAULT;
1638
1639         iterate_all_kinds(i, maxsize, v, ({
1640                 unsigned long addr = (unsigned long)v.iov_base;
1641                 size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
1642                 int n;
1643                 int res;
1644
1645                 addr &= ~(PAGE_SIZE - 1);
1646                 n = DIV_ROUND_UP(len, PAGE_SIZE);
1647                 p = get_pages_array(n);
1648                 if (!p)
1649                         return -ENOMEM;
1650                 res = get_user_pages_fast(addr, n,
1651                                 iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0, p);
1652                 if (unlikely(res < 0)) {
1653                         kvfree(p);
1654                         return res;
1655                 }
1656                 *pages = p;
1657                 return (res == n ? len : res * PAGE_SIZE) - *start;
1658         0;}),({
1659                 /* can't be more than PAGE_SIZE */
1660                 *start = v.bv_offset;
1661                 *pages = p = get_pages_array(1);
1662                 if (!p)
1663                         return -ENOMEM;
1664                 get_page(*p = v.bv_page);
1665                 return v.bv_len;
1666         }),({
1667                 return -EFAULT;
1668         }), 0
1669         )
1670         return 0;
1671 }
1672 EXPORT_SYMBOL(iov_iter_get_pages_alloc);
1673
1674 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1675                                struct iov_iter *i)
1676 {
1677         char *to = addr;
1678         __wsum sum, next;
1679         size_t off = 0;
1680         sum = *csum;
1681         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1682                 WARN_ON(1);
1683                 return 0;
1684         }
1685         iterate_and_advance(i, bytes, v, ({
1686                 next = csum_and_copy_from_user(v.iov_base,
1687                                                (to += v.iov_len) - v.iov_len,
1688                                                v.iov_len);
1689                 if (next) {
1690                         sum = csum_block_add(sum, next, off);
1691                         off += v.iov_len;
1692                 }
1693                 next ? 0 : v.iov_len;
1694         }), ({
1695                 char *p = kmap_atomic(v.bv_page);
1696                 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1697                                       p + v.bv_offset, v.bv_len,
1698                                       sum, off);
1699                 kunmap_atomic(p);
1700                 off += v.bv_len;
1701         }),({
1702                 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1703                                       v.iov_base, v.iov_len,
1704                                       sum, off);
1705                 off += v.iov_len;
1706         }), ({
1707                 char *p = kmap_atomic(v.bv_page);
1708                 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1709                                       p + v.bv_offset, v.bv_len,
1710                                       sum, off);
1711                 kunmap_atomic(p);
1712                 off += v.bv_len;
1713         })
1714         )
1715         *csum = sum;
1716         return bytes;
1717 }
1718 EXPORT_SYMBOL(csum_and_copy_from_iter);
1719
1720 bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
1721                                struct iov_iter *i)
1722 {
1723         char *to = addr;
1724         __wsum sum, next;
1725         size_t off = 0;
1726         sum = *csum;
1727         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1728                 WARN_ON(1);
1729                 return false;
1730         }
1731         if (unlikely(i->count < bytes))
1732                 return false;
1733         iterate_all_kinds(i, bytes, v, ({
1734                 next = csum_and_copy_from_user(v.iov_base,
1735                                                (to += v.iov_len) - v.iov_len,
1736                                                v.iov_len);
1737                 if (!next)
1738                         return false;
1739                 sum = csum_block_add(sum, next, off);
1740                 off += v.iov_len;
1741                 0;
1742         }), ({
1743                 char *p = kmap_atomic(v.bv_page);
1744                 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1745                                       p + v.bv_offset, v.bv_len,
1746                                       sum, off);
1747                 kunmap_atomic(p);
1748                 off += v.bv_len;
1749         }),({
1750                 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1751                                       v.iov_base, v.iov_len,
1752                                       sum, off);
1753                 off += v.iov_len;
1754         }), ({
1755                 char *p = kmap_atomic(v.bv_page);
1756                 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1757                                       p + v.bv_offset, v.bv_len,
1758                                       sum, off);
1759                 kunmap_atomic(p);
1760                 off += v.bv_len;
1761         })
1762         )
1763         *csum = sum;
1764         iov_iter_advance(i, bytes);
1765         return true;
1766 }
1767 EXPORT_SYMBOL(csum_and_copy_from_iter_full);
1768
1769 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
1770                              struct iov_iter *i)
1771 {
1772         struct csum_state *csstate = _csstate;
1773         const char *from = addr;
1774         __wsum sum, next;
1775         size_t off;
1776
1777         if (unlikely(iov_iter_is_pipe(i)))
1778                 return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i);
1779
1780         sum = csstate->csum;
1781         off = csstate->off;
1782         if (unlikely(iov_iter_is_discard(i))) {
1783                 WARN_ON(1);     /* for now */
1784                 return 0;
1785         }
1786         iterate_and_advance(i, bytes, v, ({
1787                 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
1788                                              v.iov_base,
1789                                              v.iov_len);
1790                 if (next) {
1791                         sum = csum_block_add(sum, next, off);
1792                         off += v.iov_len;
1793                 }
1794                 next ? 0 : v.iov_len;
1795         }), ({
1796                 char *p = kmap_atomic(v.bv_page);
1797                 sum = csum_and_memcpy(p + v.bv_offset,
1798                                       (from += v.bv_len) - v.bv_len,
1799                                       v.bv_len, sum, off);
1800                 kunmap_atomic(p);
1801                 off += v.bv_len;
1802         }),({
1803                 sum = csum_and_memcpy(v.iov_base,
1804                                      (from += v.iov_len) - v.iov_len,
1805                                      v.iov_len, sum, off);
1806                 off += v.iov_len;
1807         }), ({
1808                 char *p = kmap_atomic(v.bv_page);
1809                 sum = csum_and_memcpy(p + v.bv_offset,
1810                                       (from += v.bv_len) - v.bv_len,
1811                                       v.bv_len, sum, off);
1812                 kunmap_atomic(p);
1813                 off += v.bv_len;
1814         })
1815         )
1816         csstate->csum = sum;
1817         csstate->off = off;
1818         return bytes;
1819 }
1820 EXPORT_SYMBOL(csum_and_copy_to_iter);
1821
1822 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
1823                 struct iov_iter *i)
1824 {
1825 #ifdef CONFIG_CRYPTO_HASH
1826         struct ahash_request *hash = hashp;
1827         struct scatterlist sg;
1828         size_t copied;
1829
1830         copied = copy_to_iter(addr, bytes, i);
1831         sg_init_one(&sg, addr, copied);
1832         ahash_request_set_crypt(hash, &sg, NULL, copied);
1833         crypto_ahash_update(hash);
1834         return copied;
1835 #else
1836         return 0;
1837 #endif
1838 }
1839 EXPORT_SYMBOL(hash_and_copy_to_iter);
1840
1841 int iov_iter_npages(const struct iov_iter *i, int maxpages)
1842 {
1843         size_t size = i->count;
1844         int npages = 0;
1845
1846         if (!size)
1847                 return 0;
1848         if (unlikely(iov_iter_is_discard(i)))
1849                 return 0;
1850
1851         if (unlikely(iov_iter_is_pipe(i))) {
1852                 struct pipe_inode_info *pipe = i->pipe;
1853                 unsigned int iter_head;
1854                 size_t off;
1855
1856                 if (!sanity(i))
1857                         return 0;
1858
1859                 data_start(i, &iter_head, &off);
1860                 /* some of this one + all after this one */
1861                 npages = pipe_space_for_user(iter_head, pipe->tail, pipe);
1862                 if (npages >= maxpages)
1863                         return maxpages;
1864         } else if (unlikely(iov_iter_is_xarray(i))) {
1865                 unsigned offset;
1866
1867                 offset = (i->xarray_start + i->iov_offset) & ~PAGE_MASK;
1868
1869                 npages = 1;
1870                 if (size > PAGE_SIZE - offset) {
1871                         size -= PAGE_SIZE - offset;
1872                         npages += size >> PAGE_SHIFT;
1873                         size &= ~PAGE_MASK;
1874                         if (size)
1875                                 npages++;
1876                 }
1877                 if (npages >= maxpages)
1878                         return maxpages;
1879         } else iterate_all_kinds(i, size, v, ({
1880                 unsigned long p = (unsigned long)v.iov_base;
1881                 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1882                         - p / PAGE_SIZE;
1883                 if (npages >= maxpages)
1884                         return maxpages;
1885         0;}),({
1886                 npages++;
1887                 if (npages >= maxpages)
1888                         return maxpages;
1889         }),({
1890                 unsigned long p = (unsigned long)v.iov_base;
1891                 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1892                         - p / PAGE_SIZE;
1893                 if (npages >= maxpages)
1894                         return maxpages;
1895         }),
1896         0
1897         )
1898         return npages;
1899 }
1900 EXPORT_SYMBOL(iov_iter_npages);
1901
1902 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
1903 {
1904         *new = *old;
1905         if (unlikely(iov_iter_is_pipe(new))) {
1906                 WARN_ON(1);
1907                 return NULL;
1908         }
1909         if (unlikely(iov_iter_is_discard(new) || iov_iter_is_xarray(new)))
1910                 return NULL;
1911         if (iov_iter_is_bvec(new))
1912                 return new->bvec = kmemdup(new->bvec,
1913                                     new->nr_segs * sizeof(struct bio_vec),
1914                                     flags);
1915         else
1916                 /* iovec and kvec have identical layout */
1917                 return new->iov = kmemdup(new->iov,
1918                                    new->nr_segs * sizeof(struct iovec),
1919                                    flags);
1920 }
1921 EXPORT_SYMBOL(dup_iter);
1922
1923 static int copy_compat_iovec_from_user(struct iovec *iov,
1924                 const struct iovec __user *uvec, unsigned long nr_segs)
1925 {
1926         const struct compat_iovec __user *uiov =
1927                 (const struct compat_iovec __user *)uvec;
1928         int ret = -EFAULT, i;
1929
1930         if (!user_access_begin(uiov, nr_segs * sizeof(*uiov)))
1931                 return -EFAULT;
1932
1933         for (i = 0; i < nr_segs; i++) {
1934                 compat_uptr_t buf;
1935                 compat_ssize_t len;
1936
1937                 unsafe_get_user(len, &uiov[i].iov_len, uaccess_end);
1938                 unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end);
1939
1940                 /* check for compat_size_t not fitting in compat_ssize_t .. */
1941                 if (len < 0) {
1942                         ret = -EINVAL;
1943                         goto uaccess_end;
1944                 }
1945                 iov[i].iov_base = compat_ptr(buf);
1946                 iov[i].iov_len = len;
1947         }
1948
1949         ret = 0;
1950 uaccess_end:
1951         user_access_end();
1952         return ret;
1953 }
1954
1955 static int copy_iovec_from_user(struct iovec *iov,
1956                 const struct iovec __user *uvec, unsigned long nr_segs)
1957 {
1958         unsigned long seg;
1959
1960         if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec)))
1961                 return -EFAULT;
1962         for (seg = 0; seg < nr_segs; seg++) {
1963                 if ((ssize_t)iov[seg].iov_len < 0)
1964                         return -EINVAL;
1965         }
1966
1967         return 0;
1968 }
1969
1970 struct iovec *iovec_from_user(const struct iovec __user *uvec,
1971                 unsigned long nr_segs, unsigned long fast_segs,
1972                 struct iovec *fast_iov, bool compat)
1973 {
1974         struct iovec *iov = fast_iov;
1975         int ret;
1976
1977         /*
1978          * SuS says "The readv() function *may* fail if the iovcnt argument was
1979          * less than or equal to 0, or greater than {IOV_MAX}.  Linux has
1980          * traditionally returned zero for zero segments, so...
1981          */
1982         if (nr_segs == 0)
1983                 return iov;
1984         if (nr_segs > UIO_MAXIOV)
1985                 return ERR_PTR(-EINVAL);
1986         if (nr_segs > fast_segs) {
1987                 iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
1988                 if (!iov)
1989                         return ERR_PTR(-ENOMEM);
1990         }
1991
1992         if (compat)
1993                 ret = copy_compat_iovec_from_user(iov, uvec, nr_segs);
1994         else
1995                 ret = copy_iovec_from_user(iov, uvec, nr_segs);
1996         if (ret) {
1997                 if (iov != fast_iov)
1998                         kfree(iov);
1999                 return ERR_PTR(ret);
2000         }
2001
2002         return iov;
2003 }
2004
2005 ssize_t __import_iovec(int type, const struct iovec __user *uvec,
2006                  unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
2007                  struct iov_iter *i, bool compat)
2008 {
2009         ssize_t total_len = 0;
2010         unsigned long seg;
2011         struct iovec *iov;
2012
2013         iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat);
2014         if (IS_ERR(iov)) {
2015                 *iovp = NULL;
2016                 return PTR_ERR(iov);
2017         }
2018
2019         /*
2020          * According to the Single Unix Specification we should return EINVAL if
2021          * an element length is < 0 when cast to ssize_t or if the total length
2022          * would overflow the ssize_t return value of the system call.
2023          *
2024          * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
2025          * overflow case.
2026          */
2027         for (seg = 0; seg < nr_segs; seg++) {
2028                 ssize_t len = (ssize_t)iov[seg].iov_len;
2029
2030                 if (!access_ok(iov[seg].iov_base, len)) {
2031                         if (iov != *iovp)
2032                                 kfree(iov);
2033                         *iovp = NULL;
2034                         return -EFAULT;
2035                 }
2036
2037                 if (len > MAX_RW_COUNT - total_len) {
2038                         len = MAX_RW_COUNT - total_len;
2039                         iov[seg].iov_len = len;
2040                 }
2041                 total_len += len;
2042         }
2043
2044         iov_iter_init(i, type, iov, nr_segs, total_len);
2045         if (iov == *iovp)
2046                 *iovp = NULL;
2047         else
2048                 *iovp = iov;
2049         return total_len;
2050 }
2051
2052 /**
2053  * import_iovec() - Copy an array of &struct iovec from userspace
2054  *     into the kernel, check that it is valid, and initialize a new
2055  *     &struct iov_iter iterator to access it.
2056  *
2057  * @type: One of %READ or %WRITE.
2058  * @uvec: Pointer to the userspace array.
2059  * @nr_segs: Number of elements in userspace array.
2060  * @fast_segs: Number of elements in @iov.
2061  * @iovp: (input and output parameter) Pointer to pointer to (usually small
2062  *     on-stack) kernel array.
2063  * @i: Pointer to iterator that will be initialized on success.
2064  *
2065  * If the array pointed to by *@iov is large enough to hold all @nr_segs,
2066  * then this function places %NULL in *@iov on return. Otherwise, a new
2067  * array will be allocated and the result placed in *@iov. This means that
2068  * the caller may call kfree() on *@iov regardless of whether the small
2069  * on-stack array was used or not (and regardless of whether this function
2070  * returns an error or not).
2071  *
2072  * Return: Negative error code on error, bytes imported on success
2073  */
2074 ssize_t import_iovec(int type, const struct iovec __user *uvec,
2075                  unsigned nr_segs, unsigned fast_segs,
2076                  struct iovec **iovp, struct iov_iter *i)
2077 {
2078         return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i,
2079                               in_compat_syscall());
2080 }
2081 EXPORT_SYMBOL(import_iovec);
2082
2083 int import_single_range(int rw, void __user *buf, size_t len,
2084                  struct iovec *iov, struct iov_iter *i)
2085 {
2086         if (len > MAX_RW_COUNT)
2087                 len = MAX_RW_COUNT;
2088         if (unlikely(!access_ok(buf, len)))
2089                 return -EFAULT;
2090
2091         iov->iov_base = buf;
2092         iov->iov_len = len;
2093         iov_iter_init(i, rw, iov, 1, len);
2094         return 0;
2095 }
2096 EXPORT_SYMBOL(import_single_range);
2097
2098 int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
2099                             int (*f)(struct kvec *vec, void *context),
2100                             void *context)
2101 {
2102         struct kvec w;
2103         int err = -EINVAL;
2104         if (!bytes)
2105                 return 0;
2106
2107         iterate_all_kinds(i, bytes, v, -EINVAL, ({
2108                 w.iov_base = kmap(v.bv_page) + v.bv_offset;
2109                 w.iov_len = v.bv_len;
2110                 err = f(&w, context);
2111                 kunmap(v.bv_page);
2112                 err;}), ({
2113                 w = v;
2114                 err = f(&w, context);}), ({
2115                 w.iov_base = kmap(v.bv_page) + v.bv_offset;
2116                 w.iov_len = v.bv_len;
2117                 err = f(&w, context);
2118                 kunmap(v.bv_page);
2119                 err;})
2120         )
2121         return err;
2122 }
2123 EXPORT_SYMBOL(iov_iter_for_each_range);