get rid of iterate_all_kinds() in iov_iter_get_pages()/iov_iter_get_pages_alloc()
[linux-2.6-microblaze.git] / lib / iov_iter.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <crypto/hash.h>
3 #include <linux/export.h>
4 #include <linux/bvec.h>
5 #include <linux/fault-inject-usercopy.h>
6 #include <linux/uio.h>
7 #include <linux/pagemap.h>
8 #include <linux/highmem.h>
9 #include <linux/slab.h>
10 #include <linux/vmalloc.h>
11 #include <linux/splice.h>
12 #include <linux/compat.h>
13 #include <net/checksum.h>
14 #include <linux/scatterlist.h>
15 #include <linux/instrumented.h>
16
17 #define PIPE_PARANOIA /* for now */
18
19 #define iterate_iovec(i, n, __v, __p, skip, STEP) {     \
20         size_t left;                                    \
21         size_t wanted = n;                              \
22         __p = i->iov;                                   \
23         __v.iov_len = min(n, __p->iov_len - skip);      \
24         if (likely(__v.iov_len)) {                      \
25                 __v.iov_base = __p->iov_base + skip;    \
26                 left = (STEP);                          \
27                 __v.iov_len -= left;                    \
28                 skip += __v.iov_len;                    \
29                 n -= __v.iov_len;                       \
30         } else {                                        \
31                 left = 0;                               \
32         }                                               \
33         while (unlikely(!left && n)) {                  \
34                 __p++;                                  \
35                 __v.iov_len = min(n, __p->iov_len);     \
36                 if (unlikely(!__v.iov_len))             \
37                         continue;                       \
38                 __v.iov_base = __p->iov_base;           \
39                 left = (STEP);                          \
40                 __v.iov_len -= left;                    \
41                 skip = __v.iov_len;                     \
42                 n -= __v.iov_len;                       \
43         }                                               \
44         n = wanted - n;                                 \
45 }
46
47 #define iterate_kvec(i, n, __v, __p, skip, STEP) {      \
48         size_t wanted = n;                              \
49         __p = i->kvec;                                  \
50         __v.iov_len = min(n, __p->iov_len - skip);      \
51         if (likely(__v.iov_len)) {                      \
52                 __v.iov_base = __p->iov_base + skip;    \
53                 (void)(STEP);                           \
54                 skip += __v.iov_len;                    \
55                 n -= __v.iov_len;                       \
56         }                                               \
57         while (unlikely(n)) {                           \
58                 __p++;                                  \
59                 __v.iov_len = min(n, __p->iov_len);     \
60                 if (unlikely(!__v.iov_len))             \
61                         continue;                       \
62                 __v.iov_base = __p->iov_base;           \
63                 (void)(STEP);                           \
64                 skip = __v.iov_len;                     \
65                 n -= __v.iov_len;                       \
66         }                                               \
67         n = wanted;                                     \
68 }
69
70 #define iterate_bvec(i, n, __v, __bi, skip, STEP) {     \
71         struct bvec_iter __start;                       \
72         __start.bi_size = n;                            \
73         __start.bi_bvec_done = skip;                    \
74         __start.bi_idx = 0;                             \
75         for_each_bvec(__v, i->bvec, __bi, __start) {    \
76                 (void)(STEP);                           \
77         }                                               \
78 }
79
80 #define iterate_xarray(i, n, __v, skip, STEP) {         \
81         struct page *head = NULL;                               \
82         size_t wanted = n, seg, offset;                         \
83         loff_t start = i->xarray_start + skip;                  \
84         pgoff_t index = start >> PAGE_SHIFT;                    \
85         int j;                                                  \
86                                                                 \
87         XA_STATE(xas, i->xarray, index);                        \
88                                                                 \
89         rcu_read_lock();                                                \
90         xas_for_each(&xas, head, ULONG_MAX) {                           \
91                 if (xas_retry(&xas, head))                              \
92                         continue;                                       \
93                 if (WARN_ON(xa_is_value(head)))                         \
94                         break;                                          \
95                 if (WARN_ON(PageHuge(head)))                            \
96                         break;                                          \
97                 for (j = (head->index < index) ? index - head->index : 0; \
98                      j < thp_nr_pages(head); j++) {                     \
99                         __v.bv_page = head + j;                         \
100                         offset = (i->xarray_start + skip) & ~PAGE_MASK; \
101                         seg = PAGE_SIZE - offset;                       \
102                         __v.bv_offset = offset;                         \
103                         __v.bv_len = min(n, seg);                       \
104                         (void)(STEP);                                   \
105                         n -= __v.bv_len;                                \
106                         skip += __v.bv_len;                             \
107                         if (n == 0)                                     \
108                                 break;                                  \
109                 }                                                       \
110                 if (n == 0)                                             \
111                         break;                                          \
112         }                                                       \
113         rcu_read_unlock();                                      \
114         n = wanted - n;                                         \
115 }
116
117 #define iterate_all_kinds(i, n, v, I, B, K, X) {                \
118         if (likely(n)) {                                        \
119                 size_t skip = i->iov_offset;                    \
120                 if (likely(iter_is_iovec(i))) {                 \
121                         const struct iovec *iov;                \
122                         struct iovec v;                         \
123                         iterate_iovec(i, n, v, iov, skip, (I))  \
124                 } else if (iov_iter_is_bvec(i)) {               \
125                         struct bio_vec v;                       \
126                         struct bvec_iter __bi;                  \
127                         iterate_bvec(i, n, v, __bi, skip, (B))  \
128                 } else if (iov_iter_is_kvec(i)) {               \
129                         const struct kvec *kvec;                \
130                         struct kvec v;                          \
131                         iterate_kvec(i, n, v, kvec, skip, (K))  \
132                 } else if (iov_iter_is_xarray(i)) {             \
133                         struct bio_vec v;                       \
134                         iterate_xarray(i, n, v, skip, (X));     \
135                 }                                               \
136         }                                                       \
137 }
138
139 #define iterate_and_advance(i, n, v, I, B, K, X) {              \
140         if (unlikely(i->count < n))                             \
141                 n = i->count;                                   \
142         if (i->count) {                                         \
143                 size_t skip = i->iov_offset;                    \
144                 if (likely(iter_is_iovec(i))) {                 \
145                         const struct iovec *iov;                \
146                         struct iovec v;                         \
147                         iterate_iovec(i, n, v, iov, skip, (I))  \
148                         if (skip == iov->iov_len) {             \
149                                 iov++;                          \
150                                 skip = 0;                       \
151                         }                                       \
152                         i->nr_segs -= iov - i->iov;             \
153                         i->iov = iov;                           \
154                 } else if (iov_iter_is_bvec(i)) {               \
155                         const struct bio_vec *bvec = i->bvec;   \
156                         struct bio_vec v;                       \
157                         struct bvec_iter __bi;                  \
158                         iterate_bvec(i, n, v, __bi, skip, (B))  \
159                         i->bvec = __bvec_iter_bvec(i->bvec, __bi);      \
160                         i->nr_segs -= i->bvec - bvec;           \
161                         skip = __bi.bi_bvec_done;               \
162                 } else if (iov_iter_is_kvec(i)) {               \
163                         const struct kvec *kvec;                \
164                         struct kvec v;                          \
165                         iterate_kvec(i, n, v, kvec, skip, (K))  \
166                         if (skip == kvec->iov_len) {            \
167                                 kvec++;                         \
168                                 skip = 0;                       \
169                         }                                       \
170                         i->nr_segs -= kvec - i->kvec;           \
171                         i->kvec = kvec;                         \
172                 } else if (iov_iter_is_xarray(i)) {             \
173                         struct bio_vec v;                       \
174                         iterate_xarray(i, n, v, skip, (X))      \
175                 }                                               \
176                 i->count -= n;                                  \
177                 i->iov_offset = skip;                           \
178         }                                                       \
179 }
180
181 static int copyout(void __user *to, const void *from, size_t n)
182 {
183         if (should_fail_usercopy())
184                 return n;
185         if (access_ok(to, n)) {
186                 instrument_copy_to_user(to, from, n);
187                 n = raw_copy_to_user(to, from, n);
188         }
189         return n;
190 }
191
192 static int copyin(void *to, const void __user *from, size_t n)
193 {
194         if (should_fail_usercopy())
195                 return n;
196         if (access_ok(from, n)) {
197                 instrument_copy_from_user(to, from, n);
198                 n = raw_copy_from_user(to, from, n);
199         }
200         return n;
201 }
202
203 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
204                          struct iov_iter *i)
205 {
206         size_t skip, copy, left, wanted;
207         const struct iovec *iov;
208         char __user *buf;
209         void *kaddr, *from;
210
211         if (unlikely(bytes > i->count))
212                 bytes = i->count;
213
214         if (unlikely(!bytes))
215                 return 0;
216
217         might_fault();
218         wanted = bytes;
219         iov = i->iov;
220         skip = i->iov_offset;
221         buf = iov->iov_base + skip;
222         copy = min(bytes, iov->iov_len - skip);
223
224         if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
225                 kaddr = kmap_atomic(page);
226                 from = kaddr + offset;
227
228                 /* first chunk, usually the only one */
229                 left = copyout(buf, from, copy);
230                 copy -= left;
231                 skip += copy;
232                 from += copy;
233                 bytes -= copy;
234
235                 while (unlikely(!left && bytes)) {
236                         iov++;
237                         buf = iov->iov_base;
238                         copy = min(bytes, iov->iov_len);
239                         left = copyout(buf, from, copy);
240                         copy -= left;
241                         skip = copy;
242                         from += copy;
243                         bytes -= copy;
244                 }
245                 if (likely(!bytes)) {
246                         kunmap_atomic(kaddr);
247                         goto done;
248                 }
249                 offset = from - kaddr;
250                 buf += copy;
251                 kunmap_atomic(kaddr);
252                 copy = min(bytes, iov->iov_len - skip);
253         }
254         /* Too bad - revert to non-atomic kmap */
255
256         kaddr = kmap(page);
257         from = kaddr + offset;
258         left = copyout(buf, from, copy);
259         copy -= left;
260         skip += copy;
261         from += copy;
262         bytes -= copy;
263         while (unlikely(!left && bytes)) {
264                 iov++;
265                 buf = iov->iov_base;
266                 copy = min(bytes, iov->iov_len);
267                 left = copyout(buf, from, copy);
268                 copy -= left;
269                 skip = copy;
270                 from += copy;
271                 bytes -= copy;
272         }
273         kunmap(page);
274
275 done:
276         if (skip == iov->iov_len) {
277                 iov++;
278                 skip = 0;
279         }
280         i->count -= wanted - bytes;
281         i->nr_segs -= iov - i->iov;
282         i->iov = iov;
283         i->iov_offset = skip;
284         return wanted - bytes;
285 }
286
287 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
288                          struct iov_iter *i)
289 {
290         size_t skip, copy, left, wanted;
291         const struct iovec *iov;
292         char __user *buf;
293         void *kaddr, *to;
294
295         if (unlikely(bytes > i->count))
296                 bytes = i->count;
297
298         if (unlikely(!bytes))
299                 return 0;
300
301         might_fault();
302         wanted = bytes;
303         iov = i->iov;
304         skip = i->iov_offset;
305         buf = iov->iov_base + skip;
306         copy = min(bytes, iov->iov_len - skip);
307
308         if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
309                 kaddr = kmap_atomic(page);
310                 to = kaddr + offset;
311
312                 /* first chunk, usually the only one */
313                 left = copyin(to, buf, copy);
314                 copy -= left;
315                 skip += copy;
316                 to += copy;
317                 bytes -= copy;
318
319                 while (unlikely(!left && bytes)) {
320                         iov++;
321                         buf = iov->iov_base;
322                         copy = min(bytes, iov->iov_len);
323                         left = copyin(to, buf, copy);
324                         copy -= left;
325                         skip = copy;
326                         to += copy;
327                         bytes -= copy;
328                 }
329                 if (likely(!bytes)) {
330                         kunmap_atomic(kaddr);
331                         goto done;
332                 }
333                 offset = to - kaddr;
334                 buf += copy;
335                 kunmap_atomic(kaddr);
336                 copy = min(bytes, iov->iov_len - skip);
337         }
338         /* Too bad - revert to non-atomic kmap */
339
340         kaddr = kmap(page);
341         to = kaddr + offset;
342         left = copyin(to, buf, copy);
343         copy -= left;
344         skip += copy;
345         to += copy;
346         bytes -= copy;
347         while (unlikely(!left && bytes)) {
348                 iov++;
349                 buf = iov->iov_base;
350                 copy = min(bytes, iov->iov_len);
351                 left = copyin(to, buf, copy);
352                 copy -= left;
353                 skip = copy;
354                 to += copy;
355                 bytes -= copy;
356         }
357         kunmap(page);
358
359 done:
360         if (skip == iov->iov_len) {
361                 iov++;
362                 skip = 0;
363         }
364         i->count -= wanted - bytes;
365         i->nr_segs -= iov - i->iov;
366         i->iov = iov;
367         i->iov_offset = skip;
368         return wanted - bytes;
369 }
370
371 #ifdef PIPE_PARANOIA
372 static bool sanity(const struct iov_iter *i)
373 {
374         struct pipe_inode_info *pipe = i->pipe;
375         unsigned int p_head = pipe->head;
376         unsigned int p_tail = pipe->tail;
377         unsigned int p_mask = pipe->ring_size - 1;
378         unsigned int p_occupancy = pipe_occupancy(p_head, p_tail);
379         unsigned int i_head = i->head;
380         unsigned int idx;
381
382         if (i->iov_offset) {
383                 struct pipe_buffer *p;
384                 if (unlikely(p_occupancy == 0))
385                         goto Bad;       // pipe must be non-empty
386                 if (unlikely(i_head != p_head - 1))
387                         goto Bad;       // must be at the last buffer...
388
389                 p = &pipe->bufs[i_head & p_mask];
390                 if (unlikely(p->offset + p->len != i->iov_offset))
391                         goto Bad;       // ... at the end of segment
392         } else {
393                 if (i_head != p_head)
394                         goto Bad;       // must be right after the last buffer
395         }
396         return true;
397 Bad:
398         printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset);
399         printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n",
400                         p_head, p_tail, pipe->ring_size);
401         for (idx = 0; idx < pipe->ring_size; idx++)
402                 printk(KERN_ERR "[%p %p %d %d]\n",
403                         pipe->bufs[idx].ops,
404                         pipe->bufs[idx].page,
405                         pipe->bufs[idx].offset,
406                         pipe->bufs[idx].len);
407         WARN_ON(1);
408         return false;
409 }
410 #else
411 #define sanity(i) true
412 #endif
413
414 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
415                          struct iov_iter *i)
416 {
417         struct pipe_inode_info *pipe = i->pipe;
418         struct pipe_buffer *buf;
419         unsigned int p_tail = pipe->tail;
420         unsigned int p_mask = pipe->ring_size - 1;
421         unsigned int i_head = i->head;
422         size_t off;
423
424         if (unlikely(bytes > i->count))
425                 bytes = i->count;
426
427         if (unlikely(!bytes))
428                 return 0;
429
430         if (!sanity(i))
431                 return 0;
432
433         off = i->iov_offset;
434         buf = &pipe->bufs[i_head & p_mask];
435         if (off) {
436                 if (offset == off && buf->page == page) {
437                         /* merge with the last one */
438                         buf->len += bytes;
439                         i->iov_offset += bytes;
440                         goto out;
441                 }
442                 i_head++;
443                 buf = &pipe->bufs[i_head & p_mask];
444         }
445         if (pipe_full(i_head, p_tail, pipe->max_usage))
446                 return 0;
447
448         buf->ops = &page_cache_pipe_buf_ops;
449         get_page(page);
450         buf->page = page;
451         buf->offset = offset;
452         buf->len = bytes;
453
454         pipe->head = i_head + 1;
455         i->iov_offset = offset + bytes;
456         i->head = i_head;
457 out:
458         i->count -= bytes;
459         return bytes;
460 }
461
462 /*
463  * Fault in one or more iovecs of the given iov_iter, to a maximum length of
464  * bytes.  For each iovec, fault in each page that constitutes the iovec.
465  *
466  * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
467  * because it is an invalid address).
468  */
469 int iov_iter_fault_in_readable(const struct iov_iter *i, size_t bytes)
470 {
471         if (iter_is_iovec(i)) {
472                 const struct iovec *p;
473                 size_t skip;
474
475                 if (bytes > i->count)
476                         bytes = i->count;
477                 for (p = i->iov, skip = i->iov_offset; bytes; p++, skip = 0) {
478                         size_t len = min(bytes, p->iov_len - skip);
479                         int err;
480
481                         if (unlikely(!len))
482                                 continue;
483                         err = fault_in_pages_readable(p->iov_base + skip, len);
484                         if (unlikely(err))
485                                 return err;
486                         bytes -= len;
487                 }
488         }
489         return 0;
490 }
491 EXPORT_SYMBOL(iov_iter_fault_in_readable);
492
493 void iov_iter_init(struct iov_iter *i, unsigned int direction,
494                         const struct iovec *iov, unsigned long nr_segs,
495                         size_t count)
496 {
497         WARN_ON(direction & ~(READ | WRITE));
498         WARN_ON_ONCE(uaccess_kernel());
499         *i = (struct iov_iter) {
500                 .iter_type = ITER_IOVEC,
501                 .data_source = direction,
502                 .iov = iov,
503                 .nr_segs = nr_segs,
504                 .iov_offset = 0,
505                 .count = count
506         };
507 }
508 EXPORT_SYMBOL(iov_iter_init);
509
510 static inline bool allocated(struct pipe_buffer *buf)
511 {
512         return buf->ops == &default_pipe_buf_ops;
513 }
514
515 static inline void data_start(const struct iov_iter *i,
516                               unsigned int *iter_headp, size_t *offp)
517 {
518         unsigned int p_mask = i->pipe->ring_size - 1;
519         unsigned int iter_head = i->head;
520         size_t off = i->iov_offset;
521
522         if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) ||
523                     off == PAGE_SIZE)) {
524                 iter_head++;
525                 off = 0;
526         }
527         *iter_headp = iter_head;
528         *offp = off;
529 }
530
531 static size_t push_pipe(struct iov_iter *i, size_t size,
532                         int *iter_headp, size_t *offp)
533 {
534         struct pipe_inode_info *pipe = i->pipe;
535         unsigned int p_tail = pipe->tail;
536         unsigned int p_mask = pipe->ring_size - 1;
537         unsigned int iter_head;
538         size_t off;
539         ssize_t left;
540
541         if (unlikely(size > i->count))
542                 size = i->count;
543         if (unlikely(!size))
544                 return 0;
545
546         left = size;
547         data_start(i, &iter_head, &off);
548         *iter_headp = iter_head;
549         *offp = off;
550         if (off) {
551                 left -= PAGE_SIZE - off;
552                 if (left <= 0) {
553                         pipe->bufs[iter_head & p_mask].len += size;
554                         return size;
555                 }
556                 pipe->bufs[iter_head & p_mask].len = PAGE_SIZE;
557                 iter_head++;
558         }
559         while (!pipe_full(iter_head, p_tail, pipe->max_usage)) {
560                 struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask];
561                 struct page *page = alloc_page(GFP_USER);
562                 if (!page)
563                         break;
564
565                 buf->ops = &default_pipe_buf_ops;
566                 buf->page = page;
567                 buf->offset = 0;
568                 buf->len = min_t(ssize_t, left, PAGE_SIZE);
569                 left -= buf->len;
570                 iter_head++;
571                 pipe->head = iter_head;
572
573                 if (left == 0)
574                         return size;
575         }
576         return size - left;
577 }
578
579 static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
580                                 struct iov_iter *i)
581 {
582         struct pipe_inode_info *pipe = i->pipe;
583         unsigned int p_mask = pipe->ring_size - 1;
584         unsigned int i_head;
585         size_t n, off;
586
587         if (!sanity(i))
588                 return 0;
589
590         bytes = n = push_pipe(i, bytes, &i_head, &off);
591         if (unlikely(!n))
592                 return 0;
593         do {
594                 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
595                 memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk);
596                 i->head = i_head;
597                 i->iov_offset = off + chunk;
598                 n -= chunk;
599                 addr += chunk;
600                 off = 0;
601                 i_head++;
602         } while (n);
603         i->count -= bytes;
604         return bytes;
605 }
606
607 static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
608                               __wsum sum, size_t off)
609 {
610         __wsum next = csum_partial_copy_nocheck(from, to, len);
611         return csum_block_add(sum, next, off);
612 }
613
614 static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
615                                          struct csum_state *csstate,
616                                          struct iov_iter *i)
617 {
618         struct pipe_inode_info *pipe = i->pipe;
619         unsigned int p_mask = pipe->ring_size - 1;
620         __wsum sum = csstate->csum;
621         size_t off = csstate->off;
622         unsigned int i_head;
623         size_t n, r;
624
625         if (!sanity(i))
626                 return 0;
627
628         bytes = n = push_pipe(i, bytes, &i_head, &r);
629         if (unlikely(!n))
630                 return 0;
631         do {
632                 size_t chunk = min_t(size_t, n, PAGE_SIZE - r);
633                 char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page);
634                 sum = csum_and_memcpy(p + r, addr, chunk, sum, off);
635                 kunmap_atomic(p);
636                 i->head = i_head;
637                 i->iov_offset = r + chunk;
638                 n -= chunk;
639                 off += chunk;
640                 addr += chunk;
641                 r = 0;
642                 i_head++;
643         } while (n);
644         i->count -= bytes;
645         csstate->csum = sum;
646         csstate->off = off;
647         return bytes;
648 }
649
650 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
651 {
652         const char *from = addr;
653         if (unlikely(iov_iter_is_pipe(i)))
654                 return copy_pipe_to_iter(addr, bytes, i);
655         if (iter_is_iovec(i))
656                 might_fault();
657         iterate_and_advance(i, bytes, v,
658                 copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
659                 memcpy_to_page(v.bv_page, v.bv_offset,
660                                (from += v.bv_len) - v.bv_len, v.bv_len),
661                 memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
662                 memcpy_to_page(v.bv_page, v.bv_offset,
663                                (from += v.bv_len) - v.bv_len, v.bv_len)
664         )
665
666         return bytes;
667 }
668 EXPORT_SYMBOL(_copy_to_iter);
669
670 #ifdef CONFIG_ARCH_HAS_COPY_MC
671 static int copyout_mc(void __user *to, const void *from, size_t n)
672 {
673         if (access_ok(to, n)) {
674                 instrument_copy_to_user(to, from, n);
675                 n = copy_mc_to_user((__force void *) to, from, n);
676         }
677         return n;
678 }
679
680 static unsigned long copy_mc_to_page(struct page *page, size_t offset,
681                 const char *from, size_t len)
682 {
683         unsigned long ret;
684         char *to;
685
686         to = kmap_atomic(page);
687         ret = copy_mc_to_kernel(to + offset, from, len);
688         kunmap_atomic(to);
689
690         return ret;
691 }
692
693 static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
694                                 struct iov_iter *i)
695 {
696         struct pipe_inode_info *pipe = i->pipe;
697         unsigned int p_mask = pipe->ring_size - 1;
698         unsigned int i_head;
699         size_t n, off, xfer = 0;
700
701         if (!sanity(i))
702                 return 0;
703
704         bytes = n = push_pipe(i, bytes, &i_head, &off);
705         if (unlikely(!n))
706                 return 0;
707         do {
708                 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
709                 unsigned long rem;
710
711                 rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page,
712                                             off, addr, chunk);
713                 i->head = i_head;
714                 i->iov_offset = off + chunk - rem;
715                 xfer += chunk - rem;
716                 if (rem)
717                         break;
718                 n -= chunk;
719                 addr += chunk;
720                 off = 0;
721                 i_head++;
722         } while (n);
723         i->count -= xfer;
724         return xfer;
725 }
726
727 /**
728  * _copy_mc_to_iter - copy to iter with source memory error exception handling
729  * @addr: source kernel address
730  * @bytes: total transfer length
731  * @iter: destination iterator
732  *
733  * The pmem driver deploys this for the dax operation
734  * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
735  * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
736  * successfully copied.
737  *
738  * The main differences between this and typical _copy_to_iter().
739  *
740  * * Typical tail/residue handling after a fault retries the copy
741  *   byte-by-byte until the fault happens again. Re-triggering machine
742  *   checks is potentially fatal so the implementation uses source
743  *   alignment and poison alignment assumptions to avoid re-triggering
744  *   hardware exceptions.
745  *
746  * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
747  *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
748  *   a short copy.
749  */
750 size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
751 {
752         const char *from = addr;
753         unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
754
755         if (unlikely(iov_iter_is_pipe(i)))
756                 return copy_mc_pipe_to_iter(addr, bytes, i);
757         if (iter_is_iovec(i))
758                 might_fault();
759         iterate_and_advance(i, bytes, v,
760                 copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len,
761                            v.iov_len),
762                 ({
763                 rem = copy_mc_to_page(v.bv_page, v.bv_offset,
764                                       (from += v.bv_len) - v.bv_len, v.bv_len);
765                 if (rem) {
766                         curr_addr = (unsigned long) from;
767                         bytes = curr_addr - s_addr - rem;
768                         return bytes;
769                 }
770                 }),
771                 ({
772                 rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len)
773                                         - v.iov_len, v.iov_len);
774                 if (rem) {
775                         curr_addr = (unsigned long) from;
776                         bytes = curr_addr - s_addr - rem;
777                         return bytes;
778                 }
779                 }),
780                 ({
781                 rem = copy_mc_to_page(v.bv_page, v.bv_offset,
782                                       (from += v.bv_len) - v.bv_len, v.bv_len);
783                 if (rem) {
784                         curr_addr = (unsigned long) from;
785                         bytes = curr_addr - s_addr - rem;
786                         rcu_read_unlock();
787                         i->iov_offset += bytes;
788                         i->count -= bytes;
789                         return bytes;
790                 }
791                 })
792         )
793
794         return bytes;
795 }
796 EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
797 #endif /* CONFIG_ARCH_HAS_COPY_MC */
798
799 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
800 {
801         char *to = addr;
802         if (unlikely(iov_iter_is_pipe(i))) {
803                 WARN_ON(1);
804                 return 0;
805         }
806         if (iter_is_iovec(i))
807                 might_fault();
808         iterate_and_advance(i, bytes, v,
809                 copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
810                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
811                                  v.bv_offset, v.bv_len),
812                 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
813                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
814                                  v.bv_offset, v.bv_len)
815         )
816
817         return bytes;
818 }
819 EXPORT_SYMBOL(_copy_from_iter);
820
821 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
822 {
823         char *to = addr;
824         if (unlikely(iov_iter_is_pipe(i))) {
825                 WARN_ON(1);
826                 return 0;
827         }
828         iterate_and_advance(i, bytes, v,
829                 __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
830                                          v.iov_base, v.iov_len),
831                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
832                                  v.bv_offset, v.bv_len),
833                 memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
834                 memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
835                                  v.bv_offset, v.bv_len)
836         )
837
838         return bytes;
839 }
840 EXPORT_SYMBOL(_copy_from_iter_nocache);
841
842 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
843 /**
844  * _copy_from_iter_flushcache - write destination through cpu cache
845  * @addr: destination kernel address
846  * @bytes: total transfer length
847  * @iter: source iterator
848  *
849  * The pmem driver arranges for filesystem-dax to use this facility via
850  * dax_copy_from_iter() for ensuring that writes to persistent memory
851  * are flushed through the CPU cache. It is differentiated from
852  * _copy_from_iter_nocache() in that guarantees all data is flushed for
853  * all iterator types. The _copy_from_iter_nocache() only attempts to
854  * bypass the cache for the ITER_IOVEC case, and on some archs may use
855  * instructions that strand dirty-data in the cache.
856  */
857 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
858 {
859         char *to = addr;
860         if (unlikely(iov_iter_is_pipe(i))) {
861                 WARN_ON(1);
862                 return 0;
863         }
864         iterate_and_advance(i, bytes, v,
865                 __copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
866                                          v.iov_base, v.iov_len),
867                 memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
868                                  v.bv_offset, v.bv_len),
869                 memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
870                         v.iov_len),
871                 memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
872                                  v.bv_offset, v.bv_len)
873         )
874
875         return bytes;
876 }
877 EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
878 #endif
879
880 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
881 {
882         struct page *head;
883         size_t v = n + offset;
884
885         /*
886          * The general case needs to access the page order in order
887          * to compute the page size.
888          * However, we mostly deal with order-0 pages and thus can
889          * avoid a possible cache line miss for requests that fit all
890          * page orders.
891          */
892         if (n <= v && v <= PAGE_SIZE)
893                 return true;
894
895         head = compound_head(page);
896         v += (page - head) << PAGE_SHIFT;
897
898         if (likely(n <= v && v <= (page_size(head))))
899                 return true;
900         WARN_ON(1);
901         return false;
902 }
903
904 static size_t __copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
905                          struct iov_iter *i)
906 {
907         if (likely(iter_is_iovec(i)))
908                 return copy_page_to_iter_iovec(page, offset, bytes, i);
909         if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) {
910                 void *kaddr = kmap_atomic(page);
911                 size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
912                 kunmap_atomic(kaddr);
913                 return wanted;
914         }
915         if (iov_iter_is_pipe(i))
916                 return copy_page_to_iter_pipe(page, offset, bytes, i);
917         if (unlikely(iov_iter_is_discard(i))) {
918                 if (unlikely(i->count < bytes))
919                         bytes = i->count;
920                 i->count -= bytes;
921                 return bytes;
922         }
923         WARN_ON(1);
924         return 0;
925 }
926
927 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
928                          struct iov_iter *i)
929 {
930         size_t res = 0;
931         if (unlikely(!page_copy_sane(page, offset, bytes)))
932                 return 0;
933         page += offset / PAGE_SIZE; // first subpage
934         offset %= PAGE_SIZE;
935         while (1) {
936                 size_t n = __copy_page_to_iter(page, offset,
937                                 min(bytes, (size_t)PAGE_SIZE - offset), i);
938                 res += n;
939                 bytes -= n;
940                 if (!bytes || !n)
941                         break;
942                 offset += n;
943                 if (offset == PAGE_SIZE) {
944                         page++;
945                         offset = 0;
946                 }
947         }
948         return res;
949 }
950 EXPORT_SYMBOL(copy_page_to_iter);
951
952 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
953                          struct iov_iter *i)
954 {
955         if (unlikely(!page_copy_sane(page, offset, bytes)))
956                 return 0;
957         if (likely(iter_is_iovec(i)))
958                 return copy_page_from_iter_iovec(page, offset, bytes, i);
959         if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) {
960                 void *kaddr = kmap_atomic(page);
961                 size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
962                 kunmap_atomic(kaddr);
963                 return wanted;
964         }
965         WARN_ON(1);
966         return 0;
967 }
968 EXPORT_SYMBOL(copy_page_from_iter);
969
970 static size_t pipe_zero(size_t bytes, struct iov_iter *i)
971 {
972         struct pipe_inode_info *pipe = i->pipe;
973         unsigned int p_mask = pipe->ring_size - 1;
974         unsigned int i_head;
975         size_t n, off;
976
977         if (!sanity(i))
978                 return 0;
979
980         bytes = n = push_pipe(i, bytes, &i_head, &off);
981         if (unlikely(!n))
982                 return 0;
983
984         do {
985                 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
986                 memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk);
987                 i->head = i_head;
988                 i->iov_offset = off + chunk;
989                 n -= chunk;
990                 off = 0;
991                 i_head++;
992         } while (n);
993         i->count -= bytes;
994         return bytes;
995 }
996
997 size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
998 {
999         if (unlikely(iov_iter_is_pipe(i)))
1000                 return pipe_zero(bytes, i);
1001         iterate_and_advance(i, bytes, v,
1002                 clear_user(v.iov_base, v.iov_len),
1003                 memzero_page(v.bv_page, v.bv_offset, v.bv_len),
1004                 memset(v.iov_base, 0, v.iov_len),
1005                 memzero_page(v.bv_page, v.bv_offset, v.bv_len)
1006         )
1007
1008         return bytes;
1009 }
1010 EXPORT_SYMBOL(iov_iter_zero);
1011
1012 size_t iov_iter_copy_from_user_atomic(struct page *page,
1013                 struct iov_iter *i, unsigned long offset, size_t bytes)
1014 {
1015         char *kaddr = kmap_atomic(page), *p = kaddr + offset;
1016         if (unlikely(!page_copy_sane(page, offset, bytes))) {
1017                 kunmap_atomic(kaddr);
1018                 return 0;
1019         }
1020         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1021                 kunmap_atomic(kaddr);
1022                 WARN_ON(1);
1023                 return 0;
1024         }
1025         iterate_all_kinds(i, bytes, v,
1026                 copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
1027                 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
1028                                  v.bv_offset, v.bv_len),
1029                 memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
1030                 memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
1031                                  v.bv_offset, v.bv_len)
1032         )
1033         kunmap_atomic(kaddr);
1034         return bytes;
1035 }
1036 EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
1037
1038 static inline void pipe_truncate(struct iov_iter *i)
1039 {
1040         struct pipe_inode_info *pipe = i->pipe;
1041         unsigned int p_tail = pipe->tail;
1042         unsigned int p_head = pipe->head;
1043         unsigned int p_mask = pipe->ring_size - 1;
1044
1045         if (!pipe_empty(p_head, p_tail)) {
1046                 struct pipe_buffer *buf;
1047                 unsigned int i_head = i->head;
1048                 size_t off = i->iov_offset;
1049
1050                 if (off) {
1051                         buf = &pipe->bufs[i_head & p_mask];
1052                         buf->len = off - buf->offset;
1053                         i_head++;
1054                 }
1055                 while (p_head != i_head) {
1056                         p_head--;
1057                         pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]);
1058                 }
1059
1060                 pipe->head = p_head;
1061         }
1062 }
1063
1064 static void pipe_advance(struct iov_iter *i, size_t size)
1065 {
1066         struct pipe_inode_info *pipe = i->pipe;
1067         if (size) {
1068                 struct pipe_buffer *buf;
1069                 unsigned int p_mask = pipe->ring_size - 1;
1070                 unsigned int i_head = i->head;
1071                 size_t off = i->iov_offset, left = size;
1072
1073                 if (off) /* make it relative to the beginning of buffer */
1074                         left += off - pipe->bufs[i_head & p_mask].offset;
1075                 while (1) {
1076                         buf = &pipe->bufs[i_head & p_mask];
1077                         if (left <= buf->len)
1078                                 break;
1079                         left -= buf->len;
1080                         i_head++;
1081                 }
1082                 i->head = i_head;
1083                 i->iov_offset = buf->offset + left;
1084         }
1085         i->count -= size;
1086         /* ... and discard everything past that point */
1087         pipe_truncate(i);
1088 }
1089
1090 static void iov_iter_bvec_advance(struct iov_iter *i, size_t size)
1091 {
1092         struct bvec_iter bi;
1093
1094         bi.bi_size = i->count;
1095         bi.bi_bvec_done = i->iov_offset;
1096         bi.bi_idx = 0;
1097         bvec_iter_advance(i->bvec, &bi, size);
1098
1099         i->bvec += bi.bi_idx;
1100         i->nr_segs -= bi.bi_idx;
1101         i->count = bi.bi_size;
1102         i->iov_offset = bi.bi_bvec_done;
1103 }
1104
1105 static void iov_iter_iovec_advance(struct iov_iter *i, size_t size)
1106 {
1107         const struct iovec *iov, *end;
1108
1109         if (!i->count)
1110                 return;
1111         i->count -= size;
1112
1113         size += i->iov_offset; // from beginning of current segment
1114         for (iov = i->iov, end = iov + i->nr_segs; iov < end; iov++) {
1115                 if (likely(size < iov->iov_len))
1116                         break;
1117                 size -= iov->iov_len;
1118         }
1119         i->iov_offset = size;
1120         i->nr_segs -= iov - i->iov;
1121         i->iov = iov;
1122 }
1123
1124 void iov_iter_advance(struct iov_iter *i, size_t size)
1125 {
1126         if (unlikely(i->count < size))
1127                 size = i->count;
1128         if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) {
1129                 /* iovec and kvec have identical layouts */
1130                 iov_iter_iovec_advance(i, size);
1131         } else if (iov_iter_is_bvec(i)) {
1132                 iov_iter_bvec_advance(i, size);
1133         } else if (iov_iter_is_pipe(i)) {
1134                 pipe_advance(i, size);
1135         } else if (unlikely(iov_iter_is_xarray(i))) {
1136                 i->iov_offset += size;
1137                 i->count -= size;
1138         } else if (iov_iter_is_discard(i)) {
1139                 i->count -= size;
1140         }
1141 }
1142 EXPORT_SYMBOL(iov_iter_advance);
1143
1144 void iov_iter_revert(struct iov_iter *i, size_t unroll)
1145 {
1146         if (!unroll)
1147                 return;
1148         if (WARN_ON(unroll > MAX_RW_COUNT))
1149                 return;
1150         i->count += unroll;
1151         if (unlikely(iov_iter_is_pipe(i))) {
1152                 struct pipe_inode_info *pipe = i->pipe;
1153                 unsigned int p_mask = pipe->ring_size - 1;
1154                 unsigned int i_head = i->head;
1155                 size_t off = i->iov_offset;
1156                 while (1) {
1157                         struct pipe_buffer *b = &pipe->bufs[i_head & p_mask];
1158                         size_t n = off - b->offset;
1159                         if (unroll < n) {
1160                                 off -= unroll;
1161                                 break;
1162                         }
1163                         unroll -= n;
1164                         if (!unroll && i_head == i->start_head) {
1165                                 off = 0;
1166                                 break;
1167                         }
1168                         i_head--;
1169                         b = &pipe->bufs[i_head & p_mask];
1170                         off = b->offset + b->len;
1171                 }
1172                 i->iov_offset = off;
1173                 i->head = i_head;
1174                 pipe_truncate(i);
1175                 return;
1176         }
1177         if (unlikely(iov_iter_is_discard(i)))
1178                 return;
1179         if (unroll <= i->iov_offset) {
1180                 i->iov_offset -= unroll;
1181                 return;
1182         }
1183         unroll -= i->iov_offset;
1184         if (iov_iter_is_xarray(i)) {
1185                 BUG(); /* We should never go beyond the start of the specified
1186                         * range since we might then be straying into pages that
1187                         * aren't pinned.
1188                         */
1189         } else if (iov_iter_is_bvec(i)) {
1190                 const struct bio_vec *bvec = i->bvec;
1191                 while (1) {
1192                         size_t n = (--bvec)->bv_len;
1193                         i->nr_segs++;
1194                         if (unroll <= n) {
1195                                 i->bvec = bvec;
1196                                 i->iov_offset = n - unroll;
1197                                 return;
1198                         }
1199                         unroll -= n;
1200                 }
1201         } else { /* same logics for iovec and kvec */
1202                 const struct iovec *iov = i->iov;
1203                 while (1) {
1204                         size_t n = (--iov)->iov_len;
1205                         i->nr_segs++;
1206                         if (unroll <= n) {
1207                                 i->iov = iov;
1208                                 i->iov_offset = n - unroll;
1209                                 return;
1210                         }
1211                         unroll -= n;
1212                 }
1213         }
1214 }
1215 EXPORT_SYMBOL(iov_iter_revert);
1216
1217 /*
1218  * Return the count of just the current iov_iter segment.
1219  */
1220 size_t iov_iter_single_seg_count(const struct iov_iter *i)
1221 {
1222         if (i->nr_segs > 1) {
1223                 if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
1224                         return min(i->count, i->iov->iov_len - i->iov_offset);
1225                 if (iov_iter_is_bvec(i))
1226                         return min(i->count, i->bvec->bv_len - i->iov_offset);
1227         }
1228         return i->count;
1229 }
1230 EXPORT_SYMBOL(iov_iter_single_seg_count);
1231
1232 void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
1233                         const struct kvec *kvec, unsigned long nr_segs,
1234                         size_t count)
1235 {
1236         WARN_ON(direction & ~(READ | WRITE));
1237         *i = (struct iov_iter){
1238                 .iter_type = ITER_KVEC,
1239                 .data_source = direction,
1240                 .kvec = kvec,
1241                 .nr_segs = nr_segs,
1242                 .iov_offset = 0,
1243                 .count = count
1244         };
1245 }
1246 EXPORT_SYMBOL(iov_iter_kvec);
1247
1248 void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
1249                         const struct bio_vec *bvec, unsigned long nr_segs,
1250                         size_t count)
1251 {
1252         WARN_ON(direction & ~(READ | WRITE));
1253         *i = (struct iov_iter){
1254                 .iter_type = ITER_BVEC,
1255                 .data_source = direction,
1256                 .bvec = bvec,
1257                 .nr_segs = nr_segs,
1258                 .iov_offset = 0,
1259                 .count = count
1260         };
1261 }
1262 EXPORT_SYMBOL(iov_iter_bvec);
1263
1264 void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
1265                         struct pipe_inode_info *pipe,
1266                         size_t count)
1267 {
1268         BUG_ON(direction != READ);
1269         WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size));
1270         *i = (struct iov_iter){
1271                 .iter_type = ITER_PIPE,
1272                 .data_source = false,
1273                 .pipe = pipe,
1274                 .head = pipe->head,
1275                 .start_head = pipe->head,
1276                 .iov_offset = 0,
1277                 .count = count
1278         };
1279 }
1280 EXPORT_SYMBOL(iov_iter_pipe);
1281
1282 /**
1283  * iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray
1284  * @i: The iterator to initialise.
1285  * @direction: The direction of the transfer.
1286  * @xarray: The xarray to access.
1287  * @start: The start file position.
1288  * @count: The size of the I/O buffer in bytes.
1289  *
1290  * Set up an I/O iterator to either draw data out of the pages attached to an
1291  * inode or to inject data into those pages.  The pages *must* be prevented
1292  * from evaporation, either by taking a ref on them or locking them by the
1293  * caller.
1294  */
1295 void iov_iter_xarray(struct iov_iter *i, unsigned int direction,
1296                      struct xarray *xarray, loff_t start, size_t count)
1297 {
1298         BUG_ON(direction & ~1);
1299         *i = (struct iov_iter) {
1300                 .iter_type = ITER_XARRAY,
1301                 .data_source = direction,
1302                 .xarray = xarray,
1303                 .xarray_start = start,
1304                 .count = count,
1305                 .iov_offset = 0
1306         };
1307 }
1308 EXPORT_SYMBOL(iov_iter_xarray);
1309
1310 /**
1311  * iov_iter_discard - Initialise an I/O iterator that discards data
1312  * @i: The iterator to initialise.
1313  * @direction: The direction of the transfer.
1314  * @count: The size of the I/O buffer in bytes.
1315  *
1316  * Set up an I/O iterator that just discards everything that's written to it.
1317  * It's only available as a READ iterator.
1318  */
1319 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
1320 {
1321         BUG_ON(direction != READ);
1322         *i = (struct iov_iter){
1323                 .iter_type = ITER_DISCARD,
1324                 .data_source = false,
1325                 .count = count,
1326                 .iov_offset = 0
1327         };
1328 }
1329 EXPORT_SYMBOL(iov_iter_discard);
1330
1331 static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i)
1332 {
1333         unsigned long res = 0;
1334         size_t size = i->count;
1335         size_t skip = i->iov_offset;
1336         unsigned k;
1337
1338         for (k = 0; k < i->nr_segs; k++, skip = 0) {
1339                 size_t len = i->iov[k].iov_len - skip;
1340                 if (len) {
1341                         res |= (unsigned long)i->iov[k].iov_base + skip;
1342                         if (len > size)
1343                                 len = size;
1344                         res |= len;
1345                         size -= len;
1346                         if (!size)
1347                                 break;
1348                 }
1349         }
1350         return res;
1351 }
1352
1353 static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i)
1354 {
1355         unsigned res = 0;
1356         size_t size = i->count;
1357         unsigned skip = i->iov_offset;
1358         unsigned k;
1359
1360         for (k = 0; k < i->nr_segs; k++, skip = 0) {
1361                 size_t len = i->bvec[k].bv_len - skip;
1362                 res |= (unsigned long)i->bvec[k].bv_offset + skip;
1363                 if (len > size)
1364                         len = size;
1365                 res |= len;
1366                 size -= len;
1367                 if (!size)
1368                         break;
1369         }
1370         return res;
1371 }
1372
1373 unsigned long iov_iter_alignment(const struct iov_iter *i)
1374 {
1375         /* iovec and kvec have identical layouts */
1376         if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
1377                 return iov_iter_alignment_iovec(i);
1378
1379         if (iov_iter_is_bvec(i))
1380                 return iov_iter_alignment_bvec(i);
1381
1382         if (iov_iter_is_pipe(i)) {
1383                 unsigned int p_mask = i->pipe->ring_size - 1;
1384                 size_t size = i->count;
1385
1386                 if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask]))
1387                         return size | i->iov_offset;
1388                 return size;
1389         }
1390
1391         if (iov_iter_is_xarray(i))
1392                 return (i->xarray_start + i->iov_offset) | i->count;
1393
1394         return 0;
1395 }
1396 EXPORT_SYMBOL(iov_iter_alignment);
1397
1398 unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
1399 {
1400         unsigned long res = 0;
1401         unsigned long v = 0;
1402         size_t size = i->count;
1403         unsigned k;
1404
1405         if (WARN_ON(!iter_is_iovec(i)))
1406                 return ~0U;
1407
1408         for (k = 0; k < i->nr_segs; k++) {
1409                 if (i->iov[k].iov_len) {
1410                         unsigned long base = (unsigned long)i->iov[k].iov_base;
1411                         if (v) // if not the first one
1412                                 res |= base | v; // this start | previous end
1413                         v = base + i->iov[k].iov_len;
1414                         if (size <= i->iov[k].iov_len)
1415                                 break;
1416                         size -= i->iov[k].iov_len;
1417                 }
1418         }
1419         return res;
1420 }
1421 EXPORT_SYMBOL(iov_iter_gap_alignment);
1422
1423 static inline ssize_t __pipe_get_pages(struct iov_iter *i,
1424                                 size_t maxsize,
1425                                 struct page **pages,
1426                                 int iter_head,
1427                                 size_t *start)
1428 {
1429         struct pipe_inode_info *pipe = i->pipe;
1430         unsigned int p_mask = pipe->ring_size - 1;
1431         ssize_t n = push_pipe(i, maxsize, &iter_head, start);
1432         if (!n)
1433                 return -EFAULT;
1434
1435         maxsize = n;
1436         n += *start;
1437         while (n > 0) {
1438                 get_page(*pages++ = pipe->bufs[iter_head & p_mask].page);
1439                 iter_head++;
1440                 n -= PAGE_SIZE;
1441         }
1442
1443         return maxsize;
1444 }
1445
1446 static ssize_t pipe_get_pages(struct iov_iter *i,
1447                    struct page **pages, size_t maxsize, unsigned maxpages,
1448                    size_t *start)
1449 {
1450         unsigned int iter_head, npages;
1451         size_t capacity;
1452
1453         if (!sanity(i))
1454                 return -EFAULT;
1455
1456         data_start(i, &iter_head, start);
1457         /* Amount of free space: some of this one + all after this one */
1458         npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1459         capacity = min(npages, maxpages) * PAGE_SIZE - *start;
1460
1461         return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start);
1462 }
1463
1464 static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa,
1465                                           pgoff_t index, unsigned int nr_pages)
1466 {
1467         XA_STATE(xas, xa, index);
1468         struct page *page;
1469         unsigned int ret = 0;
1470
1471         rcu_read_lock();
1472         for (page = xas_load(&xas); page; page = xas_next(&xas)) {
1473                 if (xas_retry(&xas, page))
1474                         continue;
1475
1476                 /* Has the page moved or been split? */
1477                 if (unlikely(page != xas_reload(&xas))) {
1478                         xas_reset(&xas);
1479                         continue;
1480                 }
1481
1482                 pages[ret] = find_subpage(page, xas.xa_index);
1483                 get_page(pages[ret]);
1484                 if (++ret == nr_pages)
1485                         break;
1486         }
1487         rcu_read_unlock();
1488         return ret;
1489 }
1490
1491 static ssize_t iter_xarray_get_pages(struct iov_iter *i,
1492                                      struct page **pages, size_t maxsize,
1493                                      unsigned maxpages, size_t *_start_offset)
1494 {
1495         unsigned nr, offset;
1496         pgoff_t index, count;
1497         size_t size = maxsize, actual;
1498         loff_t pos;
1499
1500         if (!size || !maxpages)
1501                 return 0;
1502
1503         pos = i->xarray_start + i->iov_offset;
1504         index = pos >> PAGE_SHIFT;
1505         offset = pos & ~PAGE_MASK;
1506         *_start_offset = offset;
1507
1508         count = 1;
1509         if (size > PAGE_SIZE - offset) {
1510                 size -= PAGE_SIZE - offset;
1511                 count += size >> PAGE_SHIFT;
1512                 size &= ~PAGE_MASK;
1513                 if (size)
1514                         count++;
1515         }
1516
1517         if (count > maxpages)
1518                 count = maxpages;
1519
1520         nr = iter_xarray_populate_pages(pages, i->xarray, index, count);
1521         if (nr == 0)
1522                 return 0;
1523
1524         actual = PAGE_SIZE * nr;
1525         actual -= offset;
1526         if (nr == count && size > 0) {
1527                 unsigned last_offset = (nr > 1) ? 0 : offset;
1528                 actual -= PAGE_SIZE - (last_offset + size);
1529         }
1530         return actual;
1531 }
1532
1533 /* must be done on non-empty ITER_IOVEC one */
1534 static unsigned long first_iovec_segment(const struct iov_iter *i,
1535                                          size_t *size, size_t *start,
1536                                          size_t maxsize, unsigned maxpages)
1537 {
1538         size_t skip;
1539         long k;
1540
1541         for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) {
1542                 unsigned long addr = (unsigned long)i->iov[k].iov_base + skip;
1543                 size_t len = i->iov[k].iov_len - skip;
1544
1545                 if (unlikely(!len))
1546                         continue;
1547                 if (len > maxsize)
1548                         len = maxsize;
1549                 len += (*start = addr % PAGE_SIZE);
1550                 if (len > maxpages * PAGE_SIZE)
1551                         len = maxpages * PAGE_SIZE;
1552                 *size = len;
1553                 return addr & PAGE_MASK;
1554         }
1555         BUG(); // if it had been empty, we wouldn't get called
1556 }
1557
1558 /* must be done on non-empty ITER_BVEC one */
1559 static struct page *first_bvec_segment(const struct iov_iter *i,
1560                                        size_t *size, size_t *start,
1561                                        size_t maxsize, unsigned maxpages)
1562 {
1563         struct page *page;
1564         size_t skip = i->iov_offset, len;
1565
1566         len = i->bvec->bv_len - skip;
1567         if (len > maxsize)
1568                 len = maxsize;
1569         skip += i->bvec->bv_offset;
1570         page = i->bvec->bv_page + skip / PAGE_SIZE;
1571         len += (*start = skip % PAGE_SIZE);
1572         if (len > maxpages * PAGE_SIZE)
1573                 len = maxpages * PAGE_SIZE;
1574         *size = len;
1575         return page;
1576 }
1577
1578 ssize_t iov_iter_get_pages(struct iov_iter *i,
1579                    struct page **pages, size_t maxsize, unsigned maxpages,
1580                    size_t *start)
1581 {
1582         size_t len;
1583         int n, res;
1584
1585         if (maxsize > i->count)
1586                 maxsize = i->count;
1587         if (!maxsize)
1588                 return 0;
1589
1590         if (likely(iter_is_iovec(i))) {
1591                 unsigned long addr;
1592
1593                 addr = first_iovec_segment(i, &len, start, maxsize, maxpages);
1594                 n = DIV_ROUND_UP(len, PAGE_SIZE);
1595                 res = get_user_pages_fast(addr, n,
1596                                 iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0,
1597                                 pages);
1598                 if (unlikely(res < 0))
1599                         return res;
1600                 return (res == n ? len : res * PAGE_SIZE) - *start;
1601         }
1602         if (iov_iter_is_bvec(i)) {
1603                 struct page *page;
1604
1605                 page = first_bvec_segment(i, &len, start, maxsize, maxpages);
1606                 n = DIV_ROUND_UP(len, PAGE_SIZE);
1607                 while (n--)
1608                         get_page(*pages++ = page++);
1609                 return len - *start;
1610         }
1611         if (iov_iter_is_pipe(i))
1612                 return pipe_get_pages(i, pages, maxsize, maxpages, start);
1613         if (iov_iter_is_xarray(i))
1614                 return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);
1615         return -EFAULT;
1616 }
1617 EXPORT_SYMBOL(iov_iter_get_pages);
1618
1619 static struct page **get_pages_array(size_t n)
1620 {
1621         return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
1622 }
1623
1624 static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
1625                    struct page ***pages, size_t maxsize,
1626                    size_t *start)
1627 {
1628         struct page **p;
1629         unsigned int iter_head, npages;
1630         ssize_t n;
1631
1632         if (!sanity(i))
1633                 return -EFAULT;
1634
1635         data_start(i, &iter_head, start);
1636         /* Amount of free space: some of this one + all after this one */
1637         npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1638         n = npages * PAGE_SIZE - *start;
1639         if (maxsize > n)
1640                 maxsize = n;
1641         else
1642                 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1643         p = get_pages_array(npages);
1644         if (!p)
1645                 return -ENOMEM;
1646         n = __pipe_get_pages(i, maxsize, p, iter_head, start);
1647         if (n > 0)
1648                 *pages = p;
1649         else
1650                 kvfree(p);
1651         return n;
1652 }
1653
1654 static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i,
1655                                            struct page ***pages, size_t maxsize,
1656                                            size_t *_start_offset)
1657 {
1658         struct page **p;
1659         unsigned nr, offset;
1660         pgoff_t index, count;
1661         size_t size = maxsize, actual;
1662         loff_t pos;
1663
1664         if (!size)
1665                 return 0;
1666
1667         pos = i->xarray_start + i->iov_offset;
1668         index = pos >> PAGE_SHIFT;
1669         offset = pos & ~PAGE_MASK;
1670         *_start_offset = offset;
1671
1672         count = 1;
1673         if (size > PAGE_SIZE - offset) {
1674                 size -= PAGE_SIZE - offset;
1675                 count += size >> PAGE_SHIFT;
1676                 size &= ~PAGE_MASK;
1677                 if (size)
1678                         count++;
1679         }
1680
1681         p = get_pages_array(count);
1682         if (!p)
1683                 return -ENOMEM;
1684         *pages = p;
1685
1686         nr = iter_xarray_populate_pages(p, i->xarray, index, count);
1687         if (nr == 0)
1688                 return 0;
1689
1690         actual = PAGE_SIZE * nr;
1691         actual -= offset;
1692         if (nr == count && size > 0) {
1693                 unsigned last_offset = (nr > 1) ? 0 : offset;
1694                 actual -= PAGE_SIZE - (last_offset + size);
1695         }
1696         return actual;
1697 }
1698
1699 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
1700                    struct page ***pages, size_t maxsize,
1701                    size_t *start)
1702 {
1703         struct page **p;
1704         size_t len;
1705         int n, res;
1706
1707         if (maxsize > i->count)
1708                 maxsize = i->count;
1709         if (!maxsize)
1710                 return 0;
1711
1712         if (likely(iter_is_iovec(i))) {
1713                 unsigned long addr;
1714
1715                 addr = first_iovec_segment(i, &len, start, maxsize, ~0U);
1716                 n = DIV_ROUND_UP(len, PAGE_SIZE);
1717                 p = get_pages_array(n);
1718                 if (!p)
1719                         return -ENOMEM;
1720                 res = get_user_pages_fast(addr, n,
1721                                 iov_iter_rw(i) != WRITE ?  FOLL_WRITE : 0, p);
1722                 if (unlikely(res < 0)) {
1723                         kvfree(p);
1724                         return res;
1725                 }
1726                 *pages = p;
1727                 return (res == n ? len : res * PAGE_SIZE) - *start;
1728         }
1729         if (iov_iter_is_bvec(i)) {
1730                 struct page *page;
1731
1732                 page = first_bvec_segment(i, &len, start, maxsize, ~0U);
1733                 n = DIV_ROUND_UP(len, PAGE_SIZE);
1734                 *pages = p = get_pages_array(n);
1735                 if (!p)
1736                         return -ENOMEM;
1737                 while (n--)
1738                         get_page(*p++ = page++);
1739                 return len - *start;
1740         }
1741         if (iov_iter_is_pipe(i))
1742                 return pipe_get_pages_alloc(i, pages, maxsize, start);
1743         if (iov_iter_is_xarray(i))
1744                 return iter_xarray_get_pages_alloc(i, pages, maxsize, start);
1745         return -EFAULT;
1746 }
1747 EXPORT_SYMBOL(iov_iter_get_pages_alloc);
1748
1749 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1750                                struct iov_iter *i)
1751 {
1752         char *to = addr;
1753         __wsum sum, next;
1754         size_t off = 0;
1755         sum = *csum;
1756         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1757                 WARN_ON(1);
1758                 return 0;
1759         }
1760         iterate_and_advance(i, bytes, v, ({
1761                 next = csum_and_copy_from_user(v.iov_base,
1762                                                (to += v.iov_len) - v.iov_len,
1763                                                v.iov_len);
1764                 if (next) {
1765                         sum = csum_block_add(sum, next, off);
1766                         off += v.iov_len;
1767                 }
1768                 next ? 0 : v.iov_len;
1769         }), ({
1770                 char *p = kmap_atomic(v.bv_page);
1771                 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1772                                       p + v.bv_offset, v.bv_len,
1773                                       sum, off);
1774                 kunmap_atomic(p);
1775                 off += v.bv_len;
1776         }),({
1777                 sum = csum_and_memcpy((to += v.iov_len) - v.iov_len,
1778                                       v.iov_base, v.iov_len,
1779                                       sum, off);
1780                 off += v.iov_len;
1781         }), ({
1782                 char *p = kmap_atomic(v.bv_page);
1783                 sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
1784                                       p + v.bv_offset, v.bv_len,
1785                                       sum, off);
1786                 kunmap_atomic(p);
1787                 off += v.bv_len;
1788         })
1789         )
1790         *csum = sum;
1791         return bytes;
1792 }
1793 EXPORT_SYMBOL(csum_and_copy_from_iter);
1794
1795 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
1796                              struct iov_iter *i)
1797 {
1798         struct csum_state *csstate = _csstate;
1799         const char *from = addr;
1800         __wsum sum, next;
1801         size_t off;
1802
1803         if (unlikely(iov_iter_is_pipe(i)))
1804                 return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i);
1805
1806         sum = csstate->csum;
1807         off = csstate->off;
1808         if (unlikely(iov_iter_is_discard(i))) {
1809                 WARN_ON(1);     /* for now */
1810                 return 0;
1811         }
1812         iterate_and_advance(i, bytes, v, ({
1813                 next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
1814                                              v.iov_base,
1815                                              v.iov_len);
1816                 if (next) {
1817                         sum = csum_block_add(sum, next, off);
1818                         off += v.iov_len;
1819                 }
1820                 next ? 0 : v.iov_len;
1821         }), ({
1822                 char *p = kmap_atomic(v.bv_page);
1823                 sum = csum_and_memcpy(p + v.bv_offset,
1824                                       (from += v.bv_len) - v.bv_len,
1825                                       v.bv_len, sum, off);
1826                 kunmap_atomic(p);
1827                 off += v.bv_len;
1828         }),({
1829                 sum = csum_and_memcpy(v.iov_base,
1830                                      (from += v.iov_len) - v.iov_len,
1831                                      v.iov_len, sum, off);
1832                 off += v.iov_len;
1833         }), ({
1834                 char *p = kmap_atomic(v.bv_page);
1835                 sum = csum_and_memcpy(p + v.bv_offset,
1836                                       (from += v.bv_len) - v.bv_len,
1837                                       v.bv_len, sum, off);
1838                 kunmap_atomic(p);
1839                 off += v.bv_len;
1840         })
1841         )
1842         csstate->csum = sum;
1843         csstate->off = off;
1844         return bytes;
1845 }
1846 EXPORT_SYMBOL(csum_and_copy_to_iter);
1847
1848 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
1849                 struct iov_iter *i)
1850 {
1851 #ifdef CONFIG_CRYPTO_HASH
1852         struct ahash_request *hash = hashp;
1853         struct scatterlist sg;
1854         size_t copied;
1855
1856         copied = copy_to_iter(addr, bytes, i);
1857         sg_init_one(&sg, addr, copied);
1858         ahash_request_set_crypt(hash, &sg, NULL, copied);
1859         crypto_ahash_update(hash);
1860         return copied;
1861 #else
1862         return 0;
1863 #endif
1864 }
1865 EXPORT_SYMBOL(hash_and_copy_to_iter);
1866
1867 int iov_iter_npages(const struct iov_iter *i, int maxpages)
1868 {
1869         size_t size = i->count;
1870         int npages = 0;
1871
1872         if (!size)
1873                 return 0;
1874         if (unlikely(iov_iter_is_discard(i)))
1875                 return 0;
1876
1877         if (unlikely(iov_iter_is_pipe(i))) {
1878                 struct pipe_inode_info *pipe = i->pipe;
1879                 unsigned int iter_head;
1880                 size_t off;
1881
1882                 if (!sanity(i))
1883                         return 0;
1884
1885                 data_start(i, &iter_head, &off);
1886                 /* some of this one + all after this one */
1887                 npages = pipe_space_for_user(iter_head, pipe->tail, pipe);
1888                 if (npages >= maxpages)
1889                         return maxpages;
1890         } else if (unlikely(iov_iter_is_xarray(i))) {
1891                 unsigned offset;
1892
1893                 offset = (i->xarray_start + i->iov_offset) & ~PAGE_MASK;
1894
1895                 npages = 1;
1896                 if (size > PAGE_SIZE - offset) {
1897                         size -= PAGE_SIZE - offset;
1898                         npages += size >> PAGE_SHIFT;
1899                         size &= ~PAGE_MASK;
1900                         if (size)
1901                                 npages++;
1902                 }
1903                 if (npages >= maxpages)
1904                         return maxpages;
1905         } else iterate_all_kinds(i, size, v, ({
1906                 unsigned long p = (unsigned long)v.iov_base;
1907                 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1908                         - p / PAGE_SIZE;
1909                 if (npages >= maxpages)
1910                         return maxpages;
1911         0;}),({
1912                 npages++;
1913                 if (npages >= maxpages)
1914                         return maxpages;
1915         }),({
1916                 unsigned long p = (unsigned long)v.iov_base;
1917                 npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
1918                         - p / PAGE_SIZE;
1919                 if (npages >= maxpages)
1920                         return maxpages;
1921         }),
1922         0
1923         )
1924         return npages;
1925 }
1926 EXPORT_SYMBOL(iov_iter_npages);
1927
1928 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
1929 {
1930         *new = *old;
1931         if (unlikely(iov_iter_is_pipe(new))) {
1932                 WARN_ON(1);
1933                 return NULL;
1934         }
1935         if (unlikely(iov_iter_is_discard(new) || iov_iter_is_xarray(new)))
1936                 return NULL;
1937         if (iov_iter_is_bvec(new))
1938                 return new->bvec = kmemdup(new->bvec,
1939                                     new->nr_segs * sizeof(struct bio_vec),
1940                                     flags);
1941         else
1942                 /* iovec and kvec have identical layout */
1943                 return new->iov = kmemdup(new->iov,
1944                                    new->nr_segs * sizeof(struct iovec),
1945                                    flags);
1946 }
1947 EXPORT_SYMBOL(dup_iter);
1948
1949 static int copy_compat_iovec_from_user(struct iovec *iov,
1950                 const struct iovec __user *uvec, unsigned long nr_segs)
1951 {
1952         const struct compat_iovec __user *uiov =
1953                 (const struct compat_iovec __user *)uvec;
1954         int ret = -EFAULT, i;
1955
1956         if (!user_access_begin(uiov, nr_segs * sizeof(*uiov)))
1957                 return -EFAULT;
1958
1959         for (i = 0; i < nr_segs; i++) {
1960                 compat_uptr_t buf;
1961                 compat_ssize_t len;
1962
1963                 unsafe_get_user(len, &uiov[i].iov_len, uaccess_end);
1964                 unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end);
1965
1966                 /* check for compat_size_t not fitting in compat_ssize_t .. */
1967                 if (len < 0) {
1968                         ret = -EINVAL;
1969                         goto uaccess_end;
1970                 }
1971                 iov[i].iov_base = compat_ptr(buf);
1972                 iov[i].iov_len = len;
1973         }
1974
1975         ret = 0;
1976 uaccess_end:
1977         user_access_end();
1978         return ret;
1979 }
1980
1981 static int copy_iovec_from_user(struct iovec *iov,
1982                 const struct iovec __user *uvec, unsigned long nr_segs)
1983 {
1984         unsigned long seg;
1985
1986         if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec)))
1987                 return -EFAULT;
1988         for (seg = 0; seg < nr_segs; seg++) {
1989                 if ((ssize_t)iov[seg].iov_len < 0)
1990                         return -EINVAL;
1991         }
1992
1993         return 0;
1994 }
1995
1996 struct iovec *iovec_from_user(const struct iovec __user *uvec,
1997                 unsigned long nr_segs, unsigned long fast_segs,
1998                 struct iovec *fast_iov, bool compat)
1999 {
2000         struct iovec *iov = fast_iov;
2001         int ret;
2002
2003         /*
2004          * SuS says "The readv() function *may* fail if the iovcnt argument was
2005          * less than or equal to 0, or greater than {IOV_MAX}.  Linux has
2006          * traditionally returned zero for zero segments, so...
2007          */
2008         if (nr_segs == 0)
2009                 return iov;
2010         if (nr_segs > UIO_MAXIOV)
2011                 return ERR_PTR(-EINVAL);
2012         if (nr_segs > fast_segs) {
2013                 iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
2014                 if (!iov)
2015                         return ERR_PTR(-ENOMEM);
2016         }
2017
2018         if (compat)
2019                 ret = copy_compat_iovec_from_user(iov, uvec, nr_segs);
2020         else
2021                 ret = copy_iovec_from_user(iov, uvec, nr_segs);
2022         if (ret) {
2023                 if (iov != fast_iov)
2024                         kfree(iov);
2025                 return ERR_PTR(ret);
2026         }
2027
2028         return iov;
2029 }
2030
2031 ssize_t __import_iovec(int type, const struct iovec __user *uvec,
2032                  unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
2033                  struct iov_iter *i, bool compat)
2034 {
2035         ssize_t total_len = 0;
2036         unsigned long seg;
2037         struct iovec *iov;
2038
2039         iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat);
2040         if (IS_ERR(iov)) {
2041                 *iovp = NULL;
2042                 return PTR_ERR(iov);
2043         }
2044
2045         /*
2046          * According to the Single Unix Specification we should return EINVAL if
2047          * an element length is < 0 when cast to ssize_t or if the total length
2048          * would overflow the ssize_t return value of the system call.
2049          *
2050          * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
2051          * overflow case.
2052          */
2053         for (seg = 0; seg < nr_segs; seg++) {
2054                 ssize_t len = (ssize_t)iov[seg].iov_len;
2055
2056                 if (!access_ok(iov[seg].iov_base, len)) {
2057                         if (iov != *iovp)
2058                                 kfree(iov);
2059                         *iovp = NULL;
2060                         return -EFAULT;
2061                 }
2062
2063                 if (len > MAX_RW_COUNT - total_len) {
2064                         len = MAX_RW_COUNT - total_len;
2065                         iov[seg].iov_len = len;
2066                 }
2067                 total_len += len;
2068         }
2069
2070         iov_iter_init(i, type, iov, nr_segs, total_len);
2071         if (iov == *iovp)
2072                 *iovp = NULL;
2073         else
2074                 *iovp = iov;
2075         return total_len;
2076 }
2077
2078 /**
2079  * import_iovec() - Copy an array of &struct iovec from userspace
2080  *     into the kernel, check that it is valid, and initialize a new
2081  *     &struct iov_iter iterator to access it.
2082  *
2083  * @type: One of %READ or %WRITE.
2084  * @uvec: Pointer to the userspace array.
2085  * @nr_segs: Number of elements in userspace array.
2086  * @fast_segs: Number of elements in @iov.
2087  * @iovp: (input and output parameter) Pointer to pointer to (usually small
2088  *     on-stack) kernel array.
2089  * @i: Pointer to iterator that will be initialized on success.
2090  *
2091  * If the array pointed to by *@iov is large enough to hold all @nr_segs,
2092  * then this function places %NULL in *@iov on return. Otherwise, a new
2093  * array will be allocated and the result placed in *@iov. This means that
2094  * the caller may call kfree() on *@iov regardless of whether the small
2095  * on-stack array was used or not (and regardless of whether this function
2096  * returns an error or not).
2097  *
2098  * Return: Negative error code on error, bytes imported on success
2099  */
2100 ssize_t import_iovec(int type, const struct iovec __user *uvec,
2101                  unsigned nr_segs, unsigned fast_segs,
2102                  struct iovec **iovp, struct iov_iter *i)
2103 {
2104         return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i,
2105                               in_compat_syscall());
2106 }
2107 EXPORT_SYMBOL(import_iovec);
2108
2109 int import_single_range(int rw, void __user *buf, size_t len,
2110                  struct iovec *iov, struct iov_iter *i)
2111 {
2112         if (len > MAX_RW_COUNT)
2113                 len = MAX_RW_COUNT;
2114         if (unlikely(!access_ok(buf, len)))
2115                 return -EFAULT;
2116
2117         iov->iov_base = buf;
2118         iov->iov_len = len;
2119         iov_iter_init(i, rw, iov, 1, len);
2120         return 0;
2121 }
2122 EXPORT_SYMBOL(import_single_range);