Merge tag 'acpi-5.17-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael...
[linux-2.6-microblaze.git] / lib / iov_iter.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <crypto/hash.h>
3 #include <linux/export.h>
4 #include <linux/bvec.h>
5 #include <linux/fault-inject-usercopy.h>
6 #include <linux/uio.h>
7 #include <linux/pagemap.h>
8 #include <linux/highmem.h>
9 #include <linux/slab.h>
10 #include <linux/vmalloc.h>
11 #include <linux/splice.h>
12 #include <linux/compat.h>
13 #include <net/checksum.h>
14 #include <linux/scatterlist.h>
15 #include <linux/instrumented.h>
16
17 #define PIPE_PARANOIA /* for now */
18
19 /* covers iovec and kvec alike */
20 #define iterate_iovec(i, n, base, len, off, __p, STEP) {        \
21         size_t off = 0;                                         \
22         size_t skip = i->iov_offset;                            \
23         do {                                                    \
24                 len = min(n, __p->iov_len - skip);              \
25                 if (likely(len)) {                              \
26                         base = __p->iov_base + skip;            \
27                         len -= (STEP);                          \
28                         off += len;                             \
29                         skip += len;                            \
30                         n -= len;                               \
31                         if (skip < __p->iov_len)                \
32                                 break;                          \
33                 }                                               \
34                 __p++;                                          \
35                 skip = 0;                                       \
36         } while (n);                                            \
37         i->iov_offset = skip;                                   \
38         n = off;                                                \
39 }
40
41 #define iterate_bvec(i, n, base, len, off, p, STEP) {           \
42         size_t off = 0;                                         \
43         unsigned skip = i->iov_offset;                          \
44         while (n) {                                             \
45                 unsigned offset = p->bv_offset + skip;          \
46                 unsigned left;                                  \
47                 void *kaddr = kmap_local_page(p->bv_page +      \
48                                         offset / PAGE_SIZE);    \
49                 base = kaddr + offset % PAGE_SIZE;              \
50                 len = min(min(n, (size_t)(p->bv_len - skip)),   \
51                      (size_t)(PAGE_SIZE - offset % PAGE_SIZE)); \
52                 left = (STEP);                                  \
53                 kunmap_local(kaddr);                            \
54                 len -= left;                                    \
55                 off += len;                                     \
56                 skip += len;                                    \
57                 if (skip == p->bv_len) {                        \
58                         skip = 0;                               \
59                         p++;                                    \
60                 }                                               \
61                 n -= len;                                       \
62                 if (left)                                       \
63                         break;                                  \
64         }                                                       \
65         i->iov_offset = skip;                                   \
66         n = off;                                                \
67 }
68
69 #define iterate_xarray(i, n, base, len, __off, STEP) {          \
70         __label__ __out;                                        \
71         size_t __off = 0;                                       \
72         struct folio *folio;                                    \
73         loff_t start = i->xarray_start + i->iov_offset;         \
74         pgoff_t index = start / PAGE_SIZE;                      \
75         XA_STATE(xas, i->xarray, index);                        \
76                                                                 \
77         len = PAGE_SIZE - offset_in_page(start);                \
78         rcu_read_lock();                                        \
79         xas_for_each(&xas, folio, ULONG_MAX) {                  \
80                 unsigned left;                                  \
81                 size_t offset;                                  \
82                 if (xas_retry(&xas, folio))                     \
83                         continue;                               \
84                 if (WARN_ON(xa_is_value(folio)))                \
85                         break;                                  \
86                 if (WARN_ON(folio_test_hugetlb(folio)))         \
87                         break;                                  \
88                 offset = offset_in_folio(folio, start + __off); \
89                 while (offset < folio_size(folio)) {            \
90                         base = kmap_local_folio(folio, offset); \
91                         len = min(n, len);                      \
92                         left = (STEP);                          \
93                         kunmap_local(base);                     \
94                         len -= left;                            \
95                         __off += len;                           \
96                         n -= len;                               \
97                         if (left || n == 0)                     \
98                                 goto __out;                     \
99                         offset += len;                          \
100                         len = PAGE_SIZE;                        \
101                 }                                               \
102         }                                                       \
103 __out:                                                          \
104         rcu_read_unlock();                                      \
105         i->iov_offset += __off;                                 \
106         n = __off;                                              \
107 }
108
109 #define __iterate_and_advance(i, n, base, len, off, I, K) {     \
110         if (unlikely(i->count < n))                             \
111                 n = i->count;                                   \
112         if (likely(n)) {                                        \
113                 if (likely(iter_is_iovec(i))) {                 \
114                         const struct iovec *iov = i->iov;       \
115                         void __user *base;                      \
116                         size_t len;                             \
117                         iterate_iovec(i, n, base, len, off,     \
118                                                 iov, (I))       \
119                         i->nr_segs -= iov - i->iov;             \
120                         i->iov = iov;                           \
121                 } else if (iov_iter_is_bvec(i)) {               \
122                         const struct bio_vec *bvec = i->bvec;   \
123                         void *base;                             \
124                         size_t len;                             \
125                         iterate_bvec(i, n, base, len, off,      \
126                                                 bvec, (K))      \
127                         i->nr_segs -= bvec - i->bvec;           \
128                         i->bvec = bvec;                         \
129                 } else if (iov_iter_is_kvec(i)) {               \
130                         const struct kvec *kvec = i->kvec;      \
131                         void *base;                             \
132                         size_t len;                             \
133                         iterate_iovec(i, n, base, len, off,     \
134                                                 kvec, (K))      \
135                         i->nr_segs -= kvec - i->kvec;           \
136                         i->kvec = kvec;                         \
137                 } else if (iov_iter_is_xarray(i)) {             \
138                         void *base;                             \
139                         size_t len;                             \
140                         iterate_xarray(i, n, base, len, off,    \
141                                                         (K))    \
142                 }                                               \
143                 i->count -= n;                                  \
144         }                                                       \
145 }
146 #define iterate_and_advance(i, n, base, len, off, I, K) \
147         __iterate_and_advance(i, n, base, len, off, I, ((void)(K),0))
148
149 static int copyout(void __user *to, const void *from, size_t n)
150 {
151         if (should_fail_usercopy())
152                 return n;
153         if (access_ok(to, n)) {
154                 instrument_copy_to_user(to, from, n);
155                 n = raw_copy_to_user(to, from, n);
156         }
157         return n;
158 }
159
160 static int copyin(void *to, const void __user *from, size_t n)
161 {
162         if (should_fail_usercopy())
163                 return n;
164         if (access_ok(from, n)) {
165                 instrument_copy_from_user(to, from, n);
166                 n = raw_copy_from_user(to, from, n);
167         }
168         return n;
169 }
170
171 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
172                          struct iov_iter *i)
173 {
174         size_t skip, copy, left, wanted;
175         const struct iovec *iov;
176         char __user *buf;
177         void *kaddr, *from;
178
179         if (unlikely(bytes > i->count))
180                 bytes = i->count;
181
182         if (unlikely(!bytes))
183                 return 0;
184
185         might_fault();
186         wanted = bytes;
187         iov = i->iov;
188         skip = i->iov_offset;
189         buf = iov->iov_base + skip;
190         copy = min(bytes, iov->iov_len - skip);
191
192         if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_writeable(buf, copy)) {
193                 kaddr = kmap_atomic(page);
194                 from = kaddr + offset;
195
196                 /* first chunk, usually the only one */
197                 left = copyout(buf, from, copy);
198                 copy -= left;
199                 skip += copy;
200                 from += copy;
201                 bytes -= copy;
202
203                 while (unlikely(!left && bytes)) {
204                         iov++;
205                         buf = iov->iov_base;
206                         copy = min(bytes, iov->iov_len);
207                         left = copyout(buf, from, copy);
208                         copy -= left;
209                         skip = copy;
210                         from += copy;
211                         bytes -= copy;
212                 }
213                 if (likely(!bytes)) {
214                         kunmap_atomic(kaddr);
215                         goto done;
216                 }
217                 offset = from - kaddr;
218                 buf += copy;
219                 kunmap_atomic(kaddr);
220                 copy = min(bytes, iov->iov_len - skip);
221         }
222         /* Too bad - revert to non-atomic kmap */
223
224         kaddr = kmap(page);
225         from = kaddr + offset;
226         left = copyout(buf, from, copy);
227         copy -= left;
228         skip += copy;
229         from += copy;
230         bytes -= copy;
231         while (unlikely(!left && bytes)) {
232                 iov++;
233                 buf = iov->iov_base;
234                 copy = min(bytes, iov->iov_len);
235                 left = copyout(buf, from, copy);
236                 copy -= left;
237                 skip = copy;
238                 from += copy;
239                 bytes -= copy;
240         }
241         kunmap(page);
242
243 done:
244         if (skip == iov->iov_len) {
245                 iov++;
246                 skip = 0;
247         }
248         i->count -= wanted - bytes;
249         i->nr_segs -= iov - i->iov;
250         i->iov = iov;
251         i->iov_offset = skip;
252         return wanted - bytes;
253 }
254
255 static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
256                          struct iov_iter *i)
257 {
258         size_t skip, copy, left, wanted;
259         const struct iovec *iov;
260         char __user *buf;
261         void *kaddr, *to;
262
263         if (unlikely(bytes > i->count))
264                 bytes = i->count;
265
266         if (unlikely(!bytes))
267                 return 0;
268
269         might_fault();
270         wanted = bytes;
271         iov = i->iov;
272         skip = i->iov_offset;
273         buf = iov->iov_base + skip;
274         copy = min(bytes, iov->iov_len - skip);
275
276         if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_readable(buf, copy)) {
277                 kaddr = kmap_atomic(page);
278                 to = kaddr + offset;
279
280                 /* first chunk, usually the only one */
281                 left = copyin(to, buf, copy);
282                 copy -= left;
283                 skip += copy;
284                 to += copy;
285                 bytes -= copy;
286
287                 while (unlikely(!left && bytes)) {
288                         iov++;
289                         buf = iov->iov_base;
290                         copy = min(bytes, iov->iov_len);
291                         left = copyin(to, buf, copy);
292                         copy -= left;
293                         skip = copy;
294                         to += copy;
295                         bytes -= copy;
296                 }
297                 if (likely(!bytes)) {
298                         kunmap_atomic(kaddr);
299                         goto done;
300                 }
301                 offset = to - kaddr;
302                 buf += copy;
303                 kunmap_atomic(kaddr);
304                 copy = min(bytes, iov->iov_len - skip);
305         }
306         /* Too bad - revert to non-atomic kmap */
307
308         kaddr = kmap(page);
309         to = kaddr + offset;
310         left = copyin(to, buf, copy);
311         copy -= left;
312         skip += copy;
313         to += copy;
314         bytes -= copy;
315         while (unlikely(!left && bytes)) {
316                 iov++;
317                 buf = iov->iov_base;
318                 copy = min(bytes, iov->iov_len);
319                 left = copyin(to, buf, copy);
320                 copy -= left;
321                 skip = copy;
322                 to += copy;
323                 bytes -= copy;
324         }
325         kunmap(page);
326
327 done:
328         if (skip == iov->iov_len) {
329                 iov++;
330                 skip = 0;
331         }
332         i->count -= wanted - bytes;
333         i->nr_segs -= iov - i->iov;
334         i->iov = iov;
335         i->iov_offset = skip;
336         return wanted - bytes;
337 }
338
339 #ifdef PIPE_PARANOIA
340 static bool sanity(const struct iov_iter *i)
341 {
342         struct pipe_inode_info *pipe = i->pipe;
343         unsigned int p_head = pipe->head;
344         unsigned int p_tail = pipe->tail;
345         unsigned int p_mask = pipe->ring_size - 1;
346         unsigned int p_occupancy = pipe_occupancy(p_head, p_tail);
347         unsigned int i_head = i->head;
348         unsigned int idx;
349
350         if (i->iov_offset) {
351                 struct pipe_buffer *p;
352                 if (unlikely(p_occupancy == 0))
353                         goto Bad;       // pipe must be non-empty
354                 if (unlikely(i_head != p_head - 1))
355                         goto Bad;       // must be at the last buffer...
356
357                 p = &pipe->bufs[i_head & p_mask];
358                 if (unlikely(p->offset + p->len != i->iov_offset))
359                         goto Bad;       // ... at the end of segment
360         } else {
361                 if (i_head != p_head)
362                         goto Bad;       // must be right after the last buffer
363         }
364         return true;
365 Bad:
366         printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset);
367         printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n",
368                         p_head, p_tail, pipe->ring_size);
369         for (idx = 0; idx < pipe->ring_size; idx++)
370                 printk(KERN_ERR "[%p %p %d %d]\n",
371                         pipe->bufs[idx].ops,
372                         pipe->bufs[idx].page,
373                         pipe->bufs[idx].offset,
374                         pipe->bufs[idx].len);
375         WARN_ON(1);
376         return false;
377 }
378 #else
379 #define sanity(i) true
380 #endif
381
382 static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
383                          struct iov_iter *i)
384 {
385         struct pipe_inode_info *pipe = i->pipe;
386         struct pipe_buffer *buf;
387         unsigned int p_tail = pipe->tail;
388         unsigned int p_mask = pipe->ring_size - 1;
389         unsigned int i_head = i->head;
390         size_t off;
391
392         if (unlikely(bytes > i->count))
393                 bytes = i->count;
394
395         if (unlikely(!bytes))
396                 return 0;
397
398         if (!sanity(i))
399                 return 0;
400
401         off = i->iov_offset;
402         buf = &pipe->bufs[i_head & p_mask];
403         if (off) {
404                 if (offset == off && buf->page == page) {
405                         /* merge with the last one */
406                         buf->len += bytes;
407                         i->iov_offset += bytes;
408                         goto out;
409                 }
410                 i_head++;
411                 buf = &pipe->bufs[i_head & p_mask];
412         }
413         if (pipe_full(i_head, p_tail, pipe->max_usage))
414                 return 0;
415
416         buf->ops = &page_cache_pipe_buf_ops;
417         get_page(page);
418         buf->page = page;
419         buf->offset = offset;
420         buf->len = bytes;
421
422         pipe->head = i_head + 1;
423         i->iov_offset = offset + bytes;
424         i->head = i_head;
425 out:
426         i->count -= bytes;
427         return bytes;
428 }
429
430 /*
431  * fault_in_iov_iter_readable - fault in iov iterator for reading
432  * @i: iterator
433  * @size: maximum length
434  *
435  * Fault in one or more iovecs of the given iov_iter, to a maximum length of
436  * @size.  For each iovec, fault in each page that constitutes the iovec.
437  *
438  * Returns the number of bytes not faulted in (like copy_to_user() and
439  * copy_from_user()).
440  *
441  * Always returns 0 for non-userspace iterators.
442  */
443 size_t fault_in_iov_iter_readable(const struct iov_iter *i, size_t size)
444 {
445         if (iter_is_iovec(i)) {
446                 size_t count = min(size, iov_iter_count(i));
447                 const struct iovec *p;
448                 size_t skip;
449
450                 size -= count;
451                 for (p = i->iov, skip = i->iov_offset; count; p++, skip = 0) {
452                         size_t len = min(count, p->iov_len - skip);
453                         size_t ret;
454
455                         if (unlikely(!len))
456                                 continue;
457                         ret = fault_in_readable(p->iov_base + skip, len);
458                         count -= len - ret;
459                         if (ret)
460                                 break;
461                 }
462                 return count + size;
463         }
464         return 0;
465 }
466 EXPORT_SYMBOL(fault_in_iov_iter_readable);
467
468 /*
469  * fault_in_iov_iter_writeable - fault in iov iterator for writing
470  * @i: iterator
471  * @size: maximum length
472  *
473  * Faults in the iterator using get_user_pages(), i.e., without triggering
474  * hardware page faults.  This is primarily useful when we already know that
475  * some or all of the pages in @i aren't in memory.
476  *
477  * Returns the number of bytes not faulted in, like copy_to_user() and
478  * copy_from_user().
479  *
480  * Always returns 0 for non-user-space iterators.
481  */
482 size_t fault_in_iov_iter_writeable(const struct iov_iter *i, size_t size)
483 {
484         if (iter_is_iovec(i)) {
485                 size_t count = min(size, iov_iter_count(i));
486                 const struct iovec *p;
487                 size_t skip;
488
489                 size -= count;
490                 for (p = i->iov, skip = i->iov_offset; count; p++, skip = 0) {
491                         size_t len = min(count, p->iov_len - skip);
492                         size_t ret;
493
494                         if (unlikely(!len))
495                                 continue;
496                         ret = fault_in_safe_writeable(p->iov_base + skip, len);
497                         count -= len - ret;
498                         if (ret)
499                                 break;
500                 }
501                 return count + size;
502         }
503         return 0;
504 }
505 EXPORT_SYMBOL(fault_in_iov_iter_writeable);
506
507 void iov_iter_init(struct iov_iter *i, unsigned int direction,
508                         const struct iovec *iov, unsigned long nr_segs,
509                         size_t count)
510 {
511         WARN_ON(direction & ~(READ | WRITE));
512         *i = (struct iov_iter) {
513                 .iter_type = ITER_IOVEC,
514                 .nofault = false,
515                 .data_source = direction,
516                 .iov = iov,
517                 .nr_segs = nr_segs,
518                 .iov_offset = 0,
519                 .count = count
520         };
521 }
522 EXPORT_SYMBOL(iov_iter_init);
523
524 static inline bool allocated(struct pipe_buffer *buf)
525 {
526         return buf->ops == &default_pipe_buf_ops;
527 }
528
529 static inline void data_start(const struct iov_iter *i,
530                               unsigned int *iter_headp, size_t *offp)
531 {
532         unsigned int p_mask = i->pipe->ring_size - 1;
533         unsigned int iter_head = i->head;
534         size_t off = i->iov_offset;
535
536         if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) ||
537                     off == PAGE_SIZE)) {
538                 iter_head++;
539                 off = 0;
540         }
541         *iter_headp = iter_head;
542         *offp = off;
543 }
544
545 static size_t push_pipe(struct iov_iter *i, size_t size,
546                         int *iter_headp, size_t *offp)
547 {
548         struct pipe_inode_info *pipe = i->pipe;
549         unsigned int p_tail = pipe->tail;
550         unsigned int p_mask = pipe->ring_size - 1;
551         unsigned int iter_head;
552         size_t off;
553         ssize_t left;
554
555         if (unlikely(size > i->count))
556                 size = i->count;
557         if (unlikely(!size))
558                 return 0;
559
560         left = size;
561         data_start(i, &iter_head, &off);
562         *iter_headp = iter_head;
563         *offp = off;
564         if (off) {
565                 left -= PAGE_SIZE - off;
566                 if (left <= 0) {
567                         pipe->bufs[iter_head & p_mask].len += size;
568                         return size;
569                 }
570                 pipe->bufs[iter_head & p_mask].len = PAGE_SIZE;
571                 iter_head++;
572         }
573         while (!pipe_full(iter_head, p_tail, pipe->max_usage)) {
574                 struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask];
575                 struct page *page = alloc_page(GFP_USER);
576                 if (!page)
577                         break;
578
579                 buf->ops = &default_pipe_buf_ops;
580                 buf->page = page;
581                 buf->offset = 0;
582                 buf->len = min_t(ssize_t, left, PAGE_SIZE);
583                 left -= buf->len;
584                 iter_head++;
585                 pipe->head = iter_head;
586
587                 if (left == 0)
588                         return size;
589         }
590         return size - left;
591 }
592
593 static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
594                                 struct iov_iter *i)
595 {
596         struct pipe_inode_info *pipe = i->pipe;
597         unsigned int p_mask = pipe->ring_size - 1;
598         unsigned int i_head;
599         size_t n, off;
600
601         if (!sanity(i))
602                 return 0;
603
604         bytes = n = push_pipe(i, bytes, &i_head, &off);
605         if (unlikely(!n))
606                 return 0;
607         do {
608                 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
609                 memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk);
610                 i->head = i_head;
611                 i->iov_offset = off + chunk;
612                 n -= chunk;
613                 addr += chunk;
614                 off = 0;
615                 i_head++;
616         } while (n);
617         i->count -= bytes;
618         return bytes;
619 }
620
621 static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
622                               __wsum sum, size_t off)
623 {
624         __wsum next = csum_partial_copy_nocheck(from, to, len);
625         return csum_block_add(sum, next, off);
626 }
627
628 static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
629                                          struct iov_iter *i, __wsum *sump)
630 {
631         struct pipe_inode_info *pipe = i->pipe;
632         unsigned int p_mask = pipe->ring_size - 1;
633         __wsum sum = *sump;
634         size_t off = 0;
635         unsigned int i_head;
636         size_t r;
637
638         if (!sanity(i))
639                 return 0;
640
641         bytes = push_pipe(i, bytes, &i_head, &r);
642         while (bytes) {
643                 size_t chunk = min_t(size_t, bytes, PAGE_SIZE - r);
644                 char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page);
645                 sum = csum_and_memcpy(p + r, addr + off, chunk, sum, off);
646                 kunmap_local(p);
647                 i->head = i_head;
648                 i->iov_offset = r + chunk;
649                 bytes -= chunk;
650                 off += chunk;
651                 r = 0;
652                 i_head++;
653         }
654         *sump = sum;
655         i->count -= off;
656         return off;
657 }
658
659 size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
660 {
661         if (unlikely(iov_iter_is_pipe(i)))
662                 return copy_pipe_to_iter(addr, bytes, i);
663         if (iter_is_iovec(i))
664                 might_fault();
665         iterate_and_advance(i, bytes, base, len, off,
666                 copyout(base, addr + off, len),
667                 memcpy(base, addr + off, len)
668         )
669
670         return bytes;
671 }
672 EXPORT_SYMBOL(_copy_to_iter);
673
674 #ifdef CONFIG_ARCH_HAS_COPY_MC
675 static int copyout_mc(void __user *to, const void *from, size_t n)
676 {
677         if (access_ok(to, n)) {
678                 instrument_copy_to_user(to, from, n);
679                 n = copy_mc_to_user((__force void *) to, from, n);
680         }
681         return n;
682 }
683
684 static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
685                                 struct iov_iter *i)
686 {
687         struct pipe_inode_info *pipe = i->pipe;
688         unsigned int p_mask = pipe->ring_size - 1;
689         unsigned int i_head;
690         size_t n, off, xfer = 0;
691
692         if (!sanity(i))
693                 return 0;
694
695         n = push_pipe(i, bytes, &i_head, &off);
696         while (n) {
697                 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
698                 char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page);
699                 unsigned long rem;
700                 rem = copy_mc_to_kernel(p + off, addr + xfer, chunk);
701                 chunk -= rem;
702                 kunmap_local(p);
703                 i->head = i_head;
704                 i->iov_offset = off + chunk;
705                 xfer += chunk;
706                 if (rem)
707                         break;
708                 n -= chunk;
709                 off = 0;
710                 i_head++;
711         }
712         i->count -= xfer;
713         return xfer;
714 }
715
716 /**
717  * _copy_mc_to_iter - copy to iter with source memory error exception handling
718  * @addr: source kernel address
719  * @bytes: total transfer length
720  * @i: destination iterator
721  *
722  * The pmem driver deploys this for the dax operation
723  * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
724  * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
725  * successfully copied.
726  *
727  * The main differences between this and typical _copy_to_iter().
728  *
729  * * Typical tail/residue handling after a fault retries the copy
730  *   byte-by-byte until the fault happens again. Re-triggering machine
731  *   checks is potentially fatal so the implementation uses source
732  *   alignment and poison alignment assumptions to avoid re-triggering
733  *   hardware exceptions.
734  *
735  * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
736  *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
737  *   a short copy.
738  *
739  * Return: number of bytes copied (may be %0)
740  */
741 size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
742 {
743         if (unlikely(iov_iter_is_pipe(i)))
744                 return copy_mc_pipe_to_iter(addr, bytes, i);
745         if (iter_is_iovec(i))
746                 might_fault();
747         __iterate_and_advance(i, bytes, base, len, off,
748                 copyout_mc(base, addr + off, len),
749                 copy_mc_to_kernel(base, addr + off, len)
750         )
751
752         return bytes;
753 }
754 EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
755 #endif /* CONFIG_ARCH_HAS_COPY_MC */
756
757 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
758 {
759         if (unlikely(iov_iter_is_pipe(i))) {
760                 WARN_ON(1);
761                 return 0;
762         }
763         if (iter_is_iovec(i))
764                 might_fault();
765         iterate_and_advance(i, bytes, base, len, off,
766                 copyin(addr + off, base, len),
767                 memcpy(addr + off, base, len)
768         )
769
770         return bytes;
771 }
772 EXPORT_SYMBOL(_copy_from_iter);
773
774 size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
775 {
776         if (unlikely(iov_iter_is_pipe(i))) {
777                 WARN_ON(1);
778                 return 0;
779         }
780         iterate_and_advance(i, bytes, base, len, off,
781                 __copy_from_user_inatomic_nocache(addr + off, base, len),
782                 memcpy(addr + off, base, len)
783         )
784
785         return bytes;
786 }
787 EXPORT_SYMBOL(_copy_from_iter_nocache);
788
789 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
790 /**
791  * _copy_from_iter_flushcache - write destination through cpu cache
792  * @addr: destination kernel address
793  * @bytes: total transfer length
794  * @i: source iterator
795  *
796  * The pmem driver arranges for filesystem-dax to use this facility via
797  * dax_copy_from_iter() for ensuring that writes to persistent memory
798  * are flushed through the CPU cache. It is differentiated from
799  * _copy_from_iter_nocache() in that guarantees all data is flushed for
800  * all iterator types. The _copy_from_iter_nocache() only attempts to
801  * bypass the cache for the ITER_IOVEC case, and on some archs may use
802  * instructions that strand dirty-data in the cache.
803  *
804  * Return: number of bytes copied (may be %0)
805  */
806 size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
807 {
808         if (unlikely(iov_iter_is_pipe(i))) {
809                 WARN_ON(1);
810                 return 0;
811         }
812         iterate_and_advance(i, bytes, base, len, off,
813                 __copy_from_user_flushcache(addr + off, base, len),
814                 memcpy_flushcache(addr + off, base, len)
815         )
816
817         return bytes;
818 }
819 EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
820 #endif
821
822 static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
823 {
824         struct page *head;
825         size_t v = n + offset;
826
827         /*
828          * The general case needs to access the page order in order
829          * to compute the page size.
830          * However, we mostly deal with order-0 pages and thus can
831          * avoid a possible cache line miss for requests that fit all
832          * page orders.
833          */
834         if (n <= v && v <= PAGE_SIZE)
835                 return true;
836
837         head = compound_head(page);
838         v += (page - head) << PAGE_SHIFT;
839
840         if (likely(n <= v && v <= (page_size(head))))
841                 return true;
842         WARN_ON(1);
843         return false;
844 }
845
846 static size_t __copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
847                          struct iov_iter *i)
848 {
849         if (likely(iter_is_iovec(i)))
850                 return copy_page_to_iter_iovec(page, offset, bytes, i);
851         if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) {
852                 void *kaddr = kmap_local_page(page);
853                 size_t wanted = _copy_to_iter(kaddr + offset, bytes, i);
854                 kunmap_local(kaddr);
855                 return wanted;
856         }
857         if (iov_iter_is_pipe(i))
858                 return copy_page_to_iter_pipe(page, offset, bytes, i);
859         if (unlikely(iov_iter_is_discard(i))) {
860                 if (unlikely(i->count < bytes))
861                         bytes = i->count;
862                 i->count -= bytes;
863                 return bytes;
864         }
865         WARN_ON(1);
866         return 0;
867 }
868
869 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
870                          struct iov_iter *i)
871 {
872         size_t res = 0;
873         if (unlikely(!page_copy_sane(page, offset, bytes)))
874                 return 0;
875         page += offset / PAGE_SIZE; // first subpage
876         offset %= PAGE_SIZE;
877         while (1) {
878                 size_t n = __copy_page_to_iter(page, offset,
879                                 min(bytes, (size_t)PAGE_SIZE - offset), i);
880                 res += n;
881                 bytes -= n;
882                 if (!bytes || !n)
883                         break;
884                 offset += n;
885                 if (offset == PAGE_SIZE) {
886                         page++;
887                         offset = 0;
888                 }
889         }
890         return res;
891 }
892 EXPORT_SYMBOL(copy_page_to_iter);
893
894 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
895                          struct iov_iter *i)
896 {
897         if (unlikely(!page_copy_sane(page, offset, bytes)))
898                 return 0;
899         if (likely(iter_is_iovec(i)))
900                 return copy_page_from_iter_iovec(page, offset, bytes, i);
901         if (iov_iter_is_bvec(i) || iov_iter_is_kvec(i) || iov_iter_is_xarray(i)) {
902                 void *kaddr = kmap_local_page(page);
903                 size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
904                 kunmap_local(kaddr);
905                 return wanted;
906         }
907         WARN_ON(1);
908         return 0;
909 }
910 EXPORT_SYMBOL(copy_page_from_iter);
911
912 static size_t pipe_zero(size_t bytes, struct iov_iter *i)
913 {
914         struct pipe_inode_info *pipe = i->pipe;
915         unsigned int p_mask = pipe->ring_size - 1;
916         unsigned int i_head;
917         size_t n, off;
918
919         if (!sanity(i))
920                 return 0;
921
922         bytes = n = push_pipe(i, bytes, &i_head, &off);
923         if (unlikely(!n))
924                 return 0;
925
926         do {
927                 size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
928                 char *p = kmap_local_page(pipe->bufs[i_head & p_mask].page);
929                 memset(p + off, 0, chunk);
930                 kunmap_local(p);
931                 i->head = i_head;
932                 i->iov_offset = off + chunk;
933                 n -= chunk;
934                 off = 0;
935                 i_head++;
936         } while (n);
937         i->count -= bytes;
938         return bytes;
939 }
940
941 size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
942 {
943         if (unlikely(iov_iter_is_pipe(i)))
944                 return pipe_zero(bytes, i);
945         iterate_and_advance(i, bytes, base, len, count,
946                 clear_user(base, len),
947                 memset(base, 0, len)
948         )
949
950         return bytes;
951 }
952 EXPORT_SYMBOL(iov_iter_zero);
953
954 size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, size_t bytes,
955                                   struct iov_iter *i)
956 {
957         char *kaddr = kmap_atomic(page), *p = kaddr + offset;
958         if (unlikely(!page_copy_sane(page, offset, bytes))) {
959                 kunmap_atomic(kaddr);
960                 return 0;
961         }
962         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
963                 kunmap_atomic(kaddr);
964                 WARN_ON(1);
965                 return 0;
966         }
967         iterate_and_advance(i, bytes, base, len, off,
968                 copyin(p + off, base, len),
969                 memcpy(p + off, base, len)
970         )
971         kunmap_atomic(kaddr);
972         return bytes;
973 }
974 EXPORT_SYMBOL(copy_page_from_iter_atomic);
975
976 static inline void pipe_truncate(struct iov_iter *i)
977 {
978         struct pipe_inode_info *pipe = i->pipe;
979         unsigned int p_tail = pipe->tail;
980         unsigned int p_head = pipe->head;
981         unsigned int p_mask = pipe->ring_size - 1;
982
983         if (!pipe_empty(p_head, p_tail)) {
984                 struct pipe_buffer *buf;
985                 unsigned int i_head = i->head;
986                 size_t off = i->iov_offset;
987
988                 if (off) {
989                         buf = &pipe->bufs[i_head & p_mask];
990                         buf->len = off - buf->offset;
991                         i_head++;
992                 }
993                 while (p_head != i_head) {
994                         p_head--;
995                         pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]);
996                 }
997
998                 pipe->head = p_head;
999         }
1000 }
1001
1002 static void pipe_advance(struct iov_iter *i, size_t size)
1003 {
1004         struct pipe_inode_info *pipe = i->pipe;
1005         if (size) {
1006                 struct pipe_buffer *buf;
1007                 unsigned int p_mask = pipe->ring_size - 1;
1008                 unsigned int i_head = i->head;
1009                 size_t off = i->iov_offset, left = size;
1010
1011                 if (off) /* make it relative to the beginning of buffer */
1012                         left += off - pipe->bufs[i_head & p_mask].offset;
1013                 while (1) {
1014                         buf = &pipe->bufs[i_head & p_mask];
1015                         if (left <= buf->len)
1016                                 break;
1017                         left -= buf->len;
1018                         i_head++;
1019                 }
1020                 i->head = i_head;
1021                 i->iov_offset = buf->offset + left;
1022         }
1023         i->count -= size;
1024         /* ... and discard everything past that point */
1025         pipe_truncate(i);
1026 }
1027
1028 static void iov_iter_bvec_advance(struct iov_iter *i, size_t size)
1029 {
1030         struct bvec_iter bi;
1031
1032         bi.bi_size = i->count;
1033         bi.bi_bvec_done = i->iov_offset;
1034         bi.bi_idx = 0;
1035         bvec_iter_advance(i->bvec, &bi, size);
1036
1037         i->bvec += bi.bi_idx;
1038         i->nr_segs -= bi.bi_idx;
1039         i->count = bi.bi_size;
1040         i->iov_offset = bi.bi_bvec_done;
1041 }
1042
1043 static void iov_iter_iovec_advance(struct iov_iter *i, size_t size)
1044 {
1045         const struct iovec *iov, *end;
1046
1047         if (!i->count)
1048                 return;
1049         i->count -= size;
1050
1051         size += i->iov_offset; // from beginning of current segment
1052         for (iov = i->iov, end = iov + i->nr_segs; iov < end; iov++) {
1053                 if (likely(size < iov->iov_len))
1054                         break;
1055                 size -= iov->iov_len;
1056         }
1057         i->iov_offset = size;
1058         i->nr_segs -= iov - i->iov;
1059         i->iov = iov;
1060 }
1061
1062 void iov_iter_advance(struct iov_iter *i, size_t size)
1063 {
1064         if (unlikely(i->count < size))
1065                 size = i->count;
1066         if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i))) {
1067                 /* iovec and kvec have identical layouts */
1068                 iov_iter_iovec_advance(i, size);
1069         } else if (iov_iter_is_bvec(i)) {
1070                 iov_iter_bvec_advance(i, size);
1071         } else if (iov_iter_is_pipe(i)) {
1072                 pipe_advance(i, size);
1073         } else if (unlikely(iov_iter_is_xarray(i))) {
1074                 i->iov_offset += size;
1075                 i->count -= size;
1076         } else if (iov_iter_is_discard(i)) {
1077                 i->count -= size;
1078         }
1079 }
1080 EXPORT_SYMBOL(iov_iter_advance);
1081
1082 void iov_iter_revert(struct iov_iter *i, size_t unroll)
1083 {
1084         if (!unroll)
1085                 return;
1086         if (WARN_ON(unroll > MAX_RW_COUNT))
1087                 return;
1088         i->count += unroll;
1089         if (unlikely(iov_iter_is_pipe(i))) {
1090                 struct pipe_inode_info *pipe = i->pipe;
1091                 unsigned int p_mask = pipe->ring_size - 1;
1092                 unsigned int i_head = i->head;
1093                 size_t off = i->iov_offset;
1094                 while (1) {
1095                         struct pipe_buffer *b = &pipe->bufs[i_head & p_mask];
1096                         size_t n = off - b->offset;
1097                         if (unroll < n) {
1098                                 off -= unroll;
1099                                 break;
1100                         }
1101                         unroll -= n;
1102                         if (!unroll && i_head == i->start_head) {
1103                                 off = 0;
1104                                 break;
1105                         }
1106                         i_head--;
1107                         b = &pipe->bufs[i_head & p_mask];
1108                         off = b->offset + b->len;
1109                 }
1110                 i->iov_offset = off;
1111                 i->head = i_head;
1112                 pipe_truncate(i);
1113                 return;
1114         }
1115         if (unlikely(iov_iter_is_discard(i)))
1116                 return;
1117         if (unroll <= i->iov_offset) {
1118                 i->iov_offset -= unroll;
1119                 return;
1120         }
1121         unroll -= i->iov_offset;
1122         if (iov_iter_is_xarray(i)) {
1123                 BUG(); /* We should never go beyond the start of the specified
1124                         * range since we might then be straying into pages that
1125                         * aren't pinned.
1126                         */
1127         } else if (iov_iter_is_bvec(i)) {
1128                 const struct bio_vec *bvec = i->bvec;
1129                 while (1) {
1130                         size_t n = (--bvec)->bv_len;
1131                         i->nr_segs++;
1132                         if (unroll <= n) {
1133                                 i->bvec = bvec;
1134                                 i->iov_offset = n - unroll;
1135                                 return;
1136                         }
1137                         unroll -= n;
1138                 }
1139         } else { /* same logics for iovec and kvec */
1140                 const struct iovec *iov = i->iov;
1141                 while (1) {
1142                         size_t n = (--iov)->iov_len;
1143                         i->nr_segs++;
1144                         if (unroll <= n) {
1145                                 i->iov = iov;
1146                                 i->iov_offset = n - unroll;
1147                                 return;
1148                         }
1149                         unroll -= n;
1150                 }
1151         }
1152 }
1153 EXPORT_SYMBOL(iov_iter_revert);
1154
1155 /*
1156  * Return the count of just the current iov_iter segment.
1157  */
1158 size_t iov_iter_single_seg_count(const struct iov_iter *i)
1159 {
1160         if (i->nr_segs > 1) {
1161                 if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
1162                         return min(i->count, i->iov->iov_len - i->iov_offset);
1163                 if (iov_iter_is_bvec(i))
1164                         return min(i->count, i->bvec->bv_len - i->iov_offset);
1165         }
1166         return i->count;
1167 }
1168 EXPORT_SYMBOL(iov_iter_single_seg_count);
1169
1170 void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
1171                         const struct kvec *kvec, unsigned long nr_segs,
1172                         size_t count)
1173 {
1174         WARN_ON(direction & ~(READ | WRITE));
1175         *i = (struct iov_iter){
1176                 .iter_type = ITER_KVEC,
1177                 .data_source = direction,
1178                 .kvec = kvec,
1179                 .nr_segs = nr_segs,
1180                 .iov_offset = 0,
1181                 .count = count
1182         };
1183 }
1184 EXPORT_SYMBOL(iov_iter_kvec);
1185
1186 void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
1187                         const struct bio_vec *bvec, unsigned long nr_segs,
1188                         size_t count)
1189 {
1190         WARN_ON(direction & ~(READ | WRITE));
1191         *i = (struct iov_iter){
1192                 .iter_type = ITER_BVEC,
1193                 .data_source = direction,
1194                 .bvec = bvec,
1195                 .nr_segs = nr_segs,
1196                 .iov_offset = 0,
1197                 .count = count
1198         };
1199 }
1200 EXPORT_SYMBOL(iov_iter_bvec);
1201
1202 void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
1203                         struct pipe_inode_info *pipe,
1204                         size_t count)
1205 {
1206         BUG_ON(direction != READ);
1207         WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size));
1208         *i = (struct iov_iter){
1209                 .iter_type = ITER_PIPE,
1210                 .data_source = false,
1211                 .pipe = pipe,
1212                 .head = pipe->head,
1213                 .start_head = pipe->head,
1214                 .iov_offset = 0,
1215                 .count = count
1216         };
1217 }
1218 EXPORT_SYMBOL(iov_iter_pipe);
1219
1220 /**
1221  * iov_iter_xarray - Initialise an I/O iterator to use the pages in an xarray
1222  * @i: The iterator to initialise.
1223  * @direction: The direction of the transfer.
1224  * @xarray: The xarray to access.
1225  * @start: The start file position.
1226  * @count: The size of the I/O buffer in bytes.
1227  *
1228  * Set up an I/O iterator to either draw data out of the pages attached to an
1229  * inode or to inject data into those pages.  The pages *must* be prevented
1230  * from evaporation, either by taking a ref on them or locking them by the
1231  * caller.
1232  */
1233 void iov_iter_xarray(struct iov_iter *i, unsigned int direction,
1234                      struct xarray *xarray, loff_t start, size_t count)
1235 {
1236         BUG_ON(direction & ~1);
1237         *i = (struct iov_iter) {
1238                 .iter_type = ITER_XARRAY,
1239                 .data_source = direction,
1240                 .xarray = xarray,
1241                 .xarray_start = start,
1242                 .count = count,
1243                 .iov_offset = 0
1244         };
1245 }
1246 EXPORT_SYMBOL(iov_iter_xarray);
1247
1248 /**
1249  * iov_iter_discard - Initialise an I/O iterator that discards data
1250  * @i: The iterator to initialise.
1251  * @direction: The direction of the transfer.
1252  * @count: The size of the I/O buffer in bytes.
1253  *
1254  * Set up an I/O iterator that just discards everything that's written to it.
1255  * It's only available as a READ iterator.
1256  */
1257 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count)
1258 {
1259         BUG_ON(direction != READ);
1260         *i = (struct iov_iter){
1261                 .iter_type = ITER_DISCARD,
1262                 .data_source = false,
1263                 .count = count,
1264                 .iov_offset = 0
1265         };
1266 }
1267 EXPORT_SYMBOL(iov_iter_discard);
1268
1269 static unsigned long iov_iter_alignment_iovec(const struct iov_iter *i)
1270 {
1271         unsigned long res = 0;
1272         size_t size = i->count;
1273         size_t skip = i->iov_offset;
1274         unsigned k;
1275
1276         for (k = 0; k < i->nr_segs; k++, skip = 0) {
1277                 size_t len = i->iov[k].iov_len - skip;
1278                 if (len) {
1279                         res |= (unsigned long)i->iov[k].iov_base + skip;
1280                         if (len > size)
1281                                 len = size;
1282                         res |= len;
1283                         size -= len;
1284                         if (!size)
1285                                 break;
1286                 }
1287         }
1288         return res;
1289 }
1290
1291 static unsigned long iov_iter_alignment_bvec(const struct iov_iter *i)
1292 {
1293         unsigned res = 0;
1294         size_t size = i->count;
1295         unsigned skip = i->iov_offset;
1296         unsigned k;
1297
1298         for (k = 0; k < i->nr_segs; k++, skip = 0) {
1299                 size_t len = i->bvec[k].bv_len - skip;
1300                 res |= (unsigned long)i->bvec[k].bv_offset + skip;
1301                 if (len > size)
1302                         len = size;
1303                 res |= len;
1304                 size -= len;
1305                 if (!size)
1306                         break;
1307         }
1308         return res;
1309 }
1310
1311 unsigned long iov_iter_alignment(const struct iov_iter *i)
1312 {
1313         /* iovec and kvec have identical layouts */
1314         if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
1315                 return iov_iter_alignment_iovec(i);
1316
1317         if (iov_iter_is_bvec(i))
1318                 return iov_iter_alignment_bvec(i);
1319
1320         if (iov_iter_is_pipe(i)) {
1321                 unsigned int p_mask = i->pipe->ring_size - 1;
1322                 size_t size = i->count;
1323
1324                 if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask]))
1325                         return size | i->iov_offset;
1326                 return size;
1327         }
1328
1329         if (iov_iter_is_xarray(i))
1330                 return (i->xarray_start + i->iov_offset) | i->count;
1331
1332         return 0;
1333 }
1334 EXPORT_SYMBOL(iov_iter_alignment);
1335
1336 unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
1337 {
1338         unsigned long res = 0;
1339         unsigned long v = 0;
1340         size_t size = i->count;
1341         unsigned k;
1342
1343         if (WARN_ON(!iter_is_iovec(i)))
1344                 return ~0U;
1345
1346         for (k = 0; k < i->nr_segs; k++) {
1347                 if (i->iov[k].iov_len) {
1348                         unsigned long base = (unsigned long)i->iov[k].iov_base;
1349                         if (v) // if not the first one
1350                                 res |= base | v; // this start | previous end
1351                         v = base + i->iov[k].iov_len;
1352                         if (size <= i->iov[k].iov_len)
1353                                 break;
1354                         size -= i->iov[k].iov_len;
1355                 }
1356         }
1357         return res;
1358 }
1359 EXPORT_SYMBOL(iov_iter_gap_alignment);
1360
1361 static inline ssize_t __pipe_get_pages(struct iov_iter *i,
1362                                 size_t maxsize,
1363                                 struct page **pages,
1364                                 int iter_head,
1365                                 size_t *start)
1366 {
1367         struct pipe_inode_info *pipe = i->pipe;
1368         unsigned int p_mask = pipe->ring_size - 1;
1369         ssize_t n = push_pipe(i, maxsize, &iter_head, start);
1370         if (!n)
1371                 return -EFAULT;
1372
1373         maxsize = n;
1374         n += *start;
1375         while (n > 0) {
1376                 get_page(*pages++ = pipe->bufs[iter_head & p_mask].page);
1377                 iter_head++;
1378                 n -= PAGE_SIZE;
1379         }
1380
1381         return maxsize;
1382 }
1383
1384 static ssize_t pipe_get_pages(struct iov_iter *i,
1385                    struct page **pages, size_t maxsize, unsigned maxpages,
1386                    size_t *start)
1387 {
1388         unsigned int iter_head, npages;
1389         size_t capacity;
1390
1391         if (!sanity(i))
1392                 return -EFAULT;
1393
1394         data_start(i, &iter_head, start);
1395         /* Amount of free space: some of this one + all after this one */
1396         npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1397         capacity = min(npages, maxpages) * PAGE_SIZE - *start;
1398
1399         return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start);
1400 }
1401
1402 static ssize_t iter_xarray_populate_pages(struct page **pages, struct xarray *xa,
1403                                           pgoff_t index, unsigned int nr_pages)
1404 {
1405         XA_STATE(xas, xa, index);
1406         struct page *page;
1407         unsigned int ret = 0;
1408
1409         rcu_read_lock();
1410         for (page = xas_load(&xas); page; page = xas_next(&xas)) {
1411                 if (xas_retry(&xas, page))
1412                         continue;
1413
1414                 /* Has the page moved or been split? */
1415                 if (unlikely(page != xas_reload(&xas))) {
1416                         xas_reset(&xas);
1417                         continue;
1418                 }
1419
1420                 pages[ret] = find_subpage(page, xas.xa_index);
1421                 get_page(pages[ret]);
1422                 if (++ret == nr_pages)
1423                         break;
1424         }
1425         rcu_read_unlock();
1426         return ret;
1427 }
1428
1429 static ssize_t iter_xarray_get_pages(struct iov_iter *i,
1430                                      struct page **pages, size_t maxsize,
1431                                      unsigned maxpages, size_t *_start_offset)
1432 {
1433         unsigned nr, offset;
1434         pgoff_t index, count;
1435         size_t size = maxsize, actual;
1436         loff_t pos;
1437
1438         if (!size || !maxpages)
1439                 return 0;
1440
1441         pos = i->xarray_start + i->iov_offset;
1442         index = pos >> PAGE_SHIFT;
1443         offset = pos & ~PAGE_MASK;
1444         *_start_offset = offset;
1445
1446         count = 1;
1447         if (size > PAGE_SIZE - offset) {
1448                 size -= PAGE_SIZE - offset;
1449                 count += size >> PAGE_SHIFT;
1450                 size &= ~PAGE_MASK;
1451                 if (size)
1452                         count++;
1453         }
1454
1455         if (count > maxpages)
1456                 count = maxpages;
1457
1458         nr = iter_xarray_populate_pages(pages, i->xarray, index, count);
1459         if (nr == 0)
1460                 return 0;
1461
1462         actual = PAGE_SIZE * nr;
1463         actual -= offset;
1464         if (nr == count && size > 0) {
1465                 unsigned last_offset = (nr > 1) ? 0 : offset;
1466                 actual -= PAGE_SIZE - (last_offset + size);
1467         }
1468         return actual;
1469 }
1470
1471 /* must be done on non-empty ITER_IOVEC one */
1472 static unsigned long first_iovec_segment(const struct iov_iter *i,
1473                                          size_t *size, size_t *start,
1474                                          size_t maxsize, unsigned maxpages)
1475 {
1476         size_t skip;
1477         long k;
1478
1479         for (k = 0, skip = i->iov_offset; k < i->nr_segs; k++, skip = 0) {
1480                 unsigned long addr = (unsigned long)i->iov[k].iov_base + skip;
1481                 size_t len = i->iov[k].iov_len - skip;
1482
1483                 if (unlikely(!len))
1484                         continue;
1485                 if (len > maxsize)
1486                         len = maxsize;
1487                 len += (*start = addr % PAGE_SIZE);
1488                 if (len > maxpages * PAGE_SIZE)
1489                         len = maxpages * PAGE_SIZE;
1490                 *size = len;
1491                 return addr & PAGE_MASK;
1492         }
1493         BUG(); // if it had been empty, we wouldn't get called
1494 }
1495
1496 /* must be done on non-empty ITER_BVEC one */
1497 static struct page *first_bvec_segment(const struct iov_iter *i,
1498                                        size_t *size, size_t *start,
1499                                        size_t maxsize, unsigned maxpages)
1500 {
1501         struct page *page;
1502         size_t skip = i->iov_offset, len;
1503
1504         len = i->bvec->bv_len - skip;
1505         if (len > maxsize)
1506                 len = maxsize;
1507         skip += i->bvec->bv_offset;
1508         page = i->bvec->bv_page + skip / PAGE_SIZE;
1509         len += (*start = skip % PAGE_SIZE);
1510         if (len > maxpages * PAGE_SIZE)
1511                 len = maxpages * PAGE_SIZE;
1512         *size = len;
1513         return page;
1514 }
1515
1516 ssize_t iov_iter_get_pages(struct iov_iter *i,
1517                    struct page **pages, size_t maxsize, unsigned maxpages,
1518                    size_t *start)
1519 {
1520         size_t len;
1521         int n, res;
1522
1523         if (maxsize > i->count)
1524                 maxsize = i->count;
1525         if (!maxsize)
1526                 return 0;
1527
1528         if (likely(iter_is_iovec(i))) {
1529                 unsigned int gup_flags = 0;
1530                 unsigned long addr;
1531
1532                 if (iov_iter_rw(i) != WRITE)
1533                         gup_flags |= FOLL_WRITE;
1534                 if (i->nofault)
1535                         gup_flags |= FOLL_NOFAULT;
1536
1537                 addr = first_iovec_segment(i, &len, start, maxsize, maxpages);
1538                 n = DIV_ROUND_UP(len, PAGE_SIZE);
1539                 res = get_user_pages_fast(addr, n, gup_flags, pages);
1540                 if (unlikely(res <= 0))
1541                         return res;
1542                 return (res == n ? len : res * PAGE_SIZE) - *start;
1543         }
1544         if (iov_iter_is_bvec(i)) {
1545                 struct page *page;
1546
1547                 page = first_bvec_segment(i, &len, start, maxsize, maxpages);
1548                 n = DIV_ROUND_UP(len, PAGE_SIZE);
1549                 while (n--)
1550                         get_page(*pages++ = page++);
1551                 return len - *start;
1552         }
1553         if (iov_iter_is_pipe(i))
1554                 return pipe_get_pages(i, pages, maxsize, maxpages, start);
1555         if (iov_iter_is_xarray(i))
1556                 return iter_xarray_get_pages(i, pages, maxsize, maxpages, start);
1557         return -EFAULT;
1558 }
1559 EXPORT_SYMBOL(iov_iter_get_pages);
1560
1561 static struct page **get_pages_array(size_t n)
1562 {
1563         return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
1564 }
1565
1566 static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
1567                    struct page ***pages, size_t maxsize,
1568                    size_t *start)
1569 {
1570         struct page **p;
1571         unsigned int iter_head, npages;
1572         ssize_t n;
1573
1574         if (!sanity(i))
1575                 return -EFAULT;
1576
1577         data_start(i, &iter_head, start);
1578         /* Amount of free space: some of this one + all after this one */
1579         npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1580         n = npages * PAGE_SIZE - *start;
1581         if (maxsize > n)
1582                 maxsize = n;
1583         else
1584                 npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
1585         p = get_pages_array(npages);
1586         if (!p)
1587                 return -ENOMEM;
1588         n = __pipe_get_pages(i, maxsize, p, iter_head, start);
1589         if (n > 0)
1590                 *pages = p;
1591         else
1592                 kvfree(p);
1593         return n;
1594 }
1595
1596 static ssize_t iter_xarray_get_pages_alloc(struct iov_iter *i,
1597                                            struct page ***pages, size_t maxsize,
1598                                            size_t *_start_offset)
1599 {
1600         struct page **p;
1601         unsigned nr, offset;
1602         pgoff_t index, count;
1603         size_t size = maxsize, actual;
1604         loff_t pos;
1605
1606         if (!size)
1607                 return 0;
1608
1609         pos = i->xarray_start + i->iov_offset;
1610         index = pos >> PAGE_SHIFT;
1611         offset = pos & ~PAGE_MASK;
1612         *_start_offset = offset;
1613
1614         count = 1;
1615         if (size > PAGE_SIZE - offset) {
1616                 size -= PAGE_SIZE - offset;
1617                 count += size >> PAGE_SHIFT;
1618                 size &= ~PAGE_MASK;
1619                 if (size)
1620                         count++;
1621         }
1622
1623         p = get_pages_array(count);
1624         if (!p)
1625                 return -ENOMEM;
1626         *pages = p;
1627
1628         nr = iter_xarray_populate_pages(p, i->xarray, index, count);
1629         if (nr == 0)
1630                 return 0;
1631
1632         actual = PAGE_SIZE * nr;
1633         actual -= offset;
1634         if (nr == count && size > 0) {
1635                 unsigned last_offset = (nr > 1) ? 0 : offset;
1636                 actual -= PAGE_SIZE - (last_offset + size);
1637         }
1638         return actual;
1639 }
1640
1641 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
1642                    struct page ***pages, size_t maxsize,
1643                    size_t *start)
1644 {
1645         struct page **p;
1646         size_t len;
1647         int n, res;
1648
1649         if (maxsize > i->count)
1650                 maxsize = i->count;
1651         if (!maxsize)
1652                 return 0;
1653
1654         if (likely(iter_is_iovec(i))) {
1655                 unsigned int gup_flags = 0;
1656                 unsigned long addr;
1657
1658                 if (iov_iter_rw(i) != WRITE)
1659                         gup_flags |= FOLL_WRITE;
1660                 if (i->nofault)
1661                         gup_flags |= FOLL_NOFAULT;
1662
1663                 addr = first_iovec_segment(i, &len, start, maxsize, ~0U);
1664                 n = DIV_ROUND_UP(len, PAGE_SIZE);
1665                 p = get_pages_array(n);
1666                 if (!p)
1667                         return -ENOMEM;
1668                 res = get_user_pages_fast(addr, n, gup_flags, p);
1669                 if (unlikely(res <= 0)) {
1670                         kvfree(p);
1671                         *pages = NULL;
1672                         return res;
1673                 }
1674                 *pages = p;
1675                 return (res == n ? len : res * PAGE_SIZE) - *start;
1676         }
1677         if (iov_iter_is_bvec(i)) {
1678                 struct page *page;
1679
1680                 page = first_bvec_segment(i, &len, start, maxsize, ~0U);
1681                 n = DIV_ROUND_UP(len, PAGE_SIZE);
1682                 *pages = p = get_pages_array(n);
1683                 if (!p)
1684                         return -ENOMEM;
1685                 while (n--)
1686                         get_page(*p++ = page++);
1687                 return len - *start;
1688         }
1689         if (iov_iter_is_pipe(i))
1690                 return pipe_get_pages_alloc(i, pages, maxsize, start);
1691         if (iov_iter_is_xarray(i))
1692                 return iter_xarray_get_pages_alloc(i, pages, maxsize, start);
1693         return -EFAULT;
1694 }
1695 EXPORT_SYMBOL(iov_iter_get_pages_alloc);
1696
1697 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
1698                                struct iov_iter *i)
1699 {
1700         __wsum sum, next;
1701         sum = *csum;
1702         if (unlikely(iov_iter_is_pipe(i) || iov_iter_is_discard(i))) {
1703                 WARN_ON(1);
1704                 return 0;
1705         }
1706         iterate_and_advance(i, bytes, base, len, off, ({
1707                 next = csum_and_copy_from_user(base, addr + off, len);
1708                 sum = csum_block_add(sum, next, off);
1709                 next ? 0 : len;
1710         }), ({
1711                 sum = csum_and_memcpy(addr + off, base, len, sum, off);
1712         })
1713         )
1714         *csum = sum;
1715         return bytes;
1716 }
1717 EXPORT_SYMBOL(csum_and_copy_from_iter);
1718
1719 size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
1720                              struct iov_iter *i)
1721 {
1722         struct csum_state *csstate = _csstate;
1723         __wsum sum, next;
1724
1725         if (unlikely(iov_iter_is_discard(i))) {
1726                 WARN_ON(1);     /* for now */
1727                 return 0;
1728         }
1729
1730         sum = csum_shift(csstate->csum, csstate->off);
1731         if (unlikely(iov_iter_is_pipe(i)))
1732                 bytes = csum_and_copy_to_pipe_iter(addr, bytes, i, &sum);
1733         else iterate_and_advance(i, bytes, base, len, off, ({
1734                 next = csum_and_copy_to_user(addr + off, base, len);
1735                 sum = csum_block_add(sum, next, off);
1736                 next ? 0 : len;
1737         }), ({
1738                 sum = csum_and_memcpy(base, addr + off, len, sum, off);
1739         })
1740         )
1741         csstate->csum = csum_shift(sum, csstate->off);
1742         csstate->off += bytes;
1743         return bytes;
1744 }
1745 EXPORT_SYMBOL(csum_and_copy_to_iter);
1746
1747 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
1748                 struct iov_iter *i)
1749 {
1750 #ifdef CONFIG_CRYPTO_HASH
1751         struct ahash_request *hash = hashp;
1752         struct scatterlist sg;
1753         size_t copied;
1754
1755         copied = copy_to_iter(addr, bytes, i);
1756         sg_init_one(&sg, addr, copied);
1757         ahash_request_set_crypt(hash, &sg, NULL, copied);
1758         crypto_ahash_update(hash);
1759         return copied;
1760 #else
1761         return 0;
1762 #endif
1763 }
1764 EXPORT_SYMBOL(hash_and_copy_to_iter);
1765
1766 static int iov_npages(const struct iov_iter *i, int maxpages)
1767 {
1768         size_t skip = i->iov_offset, size = i->count;
1769         const struct iovec *p;
1770         int npages = 0;
1771
1772         for (p = i->iov; size; skip = 0, p++) {
1773                 unsigned offs = offset_in_page(p->iov_base + skip);
1774                 size_t len = min(p->iov_len - skip, size);
1775
1776                 if (len) {
1777                         size -= len;
1778                         npages += DIV_ROUND_UP(offs + len, PAGE_SIZE);
1779                         if (unlikely(npages > maxpages))
1780                                 return maxpages;
1781                 }
1782         }
1783         return npages;
1784 }
1785
1786 static int bvec_npages(const struct iov_iter *i, int maxpages)
1787 {
1788         size_t skip = i->iov_offset, size = i->count;
1789         const struct bio_vec *p;
1790         int npages = 0;
1791
1792         for (p = i->bvec; size; skip = 0, p++) {
1793                 unsigned offs = (p->bv_offset + skip) % PAGE_SIZE;
1794                 size_t len = min(p->bv_len - skip, size);
1795
1796                 size -= len;
1797                 npages += DIV_ROUND_UP(offs + len, PAGE_SIZE);
1798                 if (unlikely(npages > maxpages))
1799                         return maxpages;
1800         }
1801         return npages;
1802 }
1803
1804 int iov_iter_npages(const struct iov_iter *i, int maxpages)
1805 {
1806         if (unlikely(!i->count))
1807                 return 0;
1808         /* iovec and kvec have identical layouts */
1809         if (likely(iter_is_iovec(i) || iov_iter_is_kvec(i)))
1810                 return iov_npages(i, maxpages);
1811         if (iov_iter_is_bvec(i))
1812                 return bvec_npages(i, maxpages);
1813         if (iov_iter_is_pipe(i)) {
1814                 unsigned int iter_head;
1815                 int npages;
1816                 size_t off;
1817
1818                 if (!sanity(i))
1819                         return 0;
1820
1821                 data_start(i, &iter_head, &off);
1822                 /* some of this one + all after this one */
1823                 npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe);
1824                 return min(npages, maxpages);
1825         }
1826         if (iov_iter_is_xarray(i)) {
1827                 unsigned offset = (i->xarray_start + i->iov_offset) % PAGE_SIZE;
1828                 int npages = DIV_ROUND_UP(offset + i->count, PAGE_SIZE);
1829                 return min(npages, maxpages);
1830         }
1831         return 0;
1832 }
1833 EXPORT_SYMBOL(iov_iter_npages);
1834
1835 const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
1836 {
1837         *new = *old;
1838         if (unlikely(iov_iter_is_pipe(new))) {
1839                 WARN_ON(1);
1840                 return NULL;
1841         }
1842         if (unlikely(iov_iter_is_discard(new) || iov_iter_is_xarray(new)))
1843                 return NULL;
1844         if (iov_iter_is_bvec(new))
1845                 return new->bvec = kmemdup(new->bvec,
1846                                     new->nr_segs * sizeof(struct bio_vec),
1847                                     flags);
1848         else
1849                 /* iovec and kvec have identical layout */
1850                 return new->iov = kmemdup(new->iov,
1851                                    new->nr_segs * sizeof(struct iovec),
1852                                    flags);
1853 }
1854 EXPORT_SYMBOL(dup_iter);
1855
1856 static int copy_compat_iovec_from_user(struct iovec *iov,
1857                 const struct iovec __user *uvec, unsigned long nr_segs)
1858 {
1859         const struct compat_iovec __user *uiov =
1860                 (const struct compat_iovec __user *)uvec;
1861         int ret = -EFAULT, i;
1862
1863         if (!user_access_begin(uiov, nr_segs * sizeof(*uiov)))
1864                 return -EFAULT;
1865
1866         for (i = 0; i < nr_segs; i++) {
1867                 compat_uptr_t buf;
1868                 compat_ssize_t len;
1869
1870                 unsafe_get_user(len, &uiov[i].iov_len, uaccess_end);
1871                 unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end);
1872
1873                 /* check for compat_size_t not fitting in compat_ssize_t .. */
1874                 if (len < 0) {
1875                         ret = -EINVAL;
1876                         goto uaccess_end;
1877                 }
1878                 iov[i].iov_base = compat_ptr(buf);
1879                 iov[i].iov_len = len;
1880         }
1881
1882         ret = 0;
1883 uaccess_end:
1884         user_access_end();
1885         return ret;
1886 }
1887
1888 static int copy_iovec_from_user(struct iovec *iov,
1889                 const struct iovec __user *uvec, unsigned long nr_segs)
1890 {
1891         unsigned long seg;
1892
1893         if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec)))
1894                 return -EFAULT;
1895         for (seg = 0; seg < nr_segs; seg++) {
1896                 if ((ssize_t)iov[seg].iov_len < 0)
1897                         return -EINVAL;
1898         }
1899
1900         return 0;
1901 }
1902
1903 struct iovec *iovec_from_user(const struct iovec __user *uvec,
1904                 unsigned long nr_segs, unsigned long fast_segs,
1905                 struct iovec *fast_iov, bool compat)
1906 {
1907         struct iovec *iov = fast_iov;
1908         int ret;
1909
1910         /*
1911          * SuS says "The readv() function *may* fail if the iovcnt argument was
1912          * less than or equal to 0, or greater than {IOV_MAX}.  Linux has
1913          * traditionally returned zero for zero segments, so...
1914          */
1915         if (nr_segs == 0)
1916                 return iov;
1917         if (nr_segs > UIO_MAXIOV)
1918                 return ERR_PTR(-EINVAL);
1919         if (nr_segs > fast_segs) {
1920                 iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
1921                 if (!iov)
1922                         return ERR_PTR(-ENOMEM);
1923         }
1924
1925         if (compat)
1926                 ret = copy_compat_iovec_from_user(iov, uvec, nr_segs);
1927         else
1928                 ret = copy_iovec_from_user(iov, uvec, nr_segs);
1929         if (ret) {
1930                 if (iov != fast_iov)
1931                         kfree(iov);
1932                 return ERR_PTR(ret);
1933         }
1934
1935         return iov;
1936 }
1937
1938 ssize_t __import_iovec(int type, const struct iovec __user *uvec,
1939                  unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
1940                  struct iov_iter *i, bool compat)
1941 {
1942         ssize_t total_len = 0;
1943         unsigned long seg;
1944         struct iovec *iov;
1945
1946         iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat);
1947         if (IS_ERR(iov)) {
1948                 *iovp = NULL;
1949                 return PTR_ERR(iov);
1950         }
1951
1952         /*
1953          * According to the Single Unix Specification we should return EINVAL if
1954          * an element length is < 0 when cast to ssize_t or if the total length
1955          * would overflow the ssize_t return value of the system call.
1956          *
1957          * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
1958          * overflow case.
1959          */
1960         for (seg = 0; seg < nr_segs; seg++) {
1961                 ssize_t len = (ssize_t)iov[seg].iov_len;
1962
1963                 if (!access_ok(iov[seg].iov_base, len)) {
1964                         if (iov != *iovp)
1965                                 kfree(iov);
1966                         *iovp = NULL;
1967                         return -EFAULT;
1968                 }
1969
1970                 if (len > MAX_RW_COUNT - total_len) {
1971                         len = MAX_RW_COUNT - total_len;
1972                         iov[seg].iov_len = len;
1973                 }
1974                 total_len += len;
1975         }
1976
1977         iov_iter_init(i, type, iov, nr_segs, total_len);
1978         if (iov == *iovp)
1979                 *iovp = NULL;
1980         else
1981                 *iovp = iov;
1982         return total_len;
1983 }
1984
1985 /**
1986  * import_iovec() - Copy an array of &struct iovec from userspace
1987  *     into the kernel, check that it is valid, and initialize a new
1988  *     &struct iov_iter iterator to access it.
1989  *
1990  * @type: One of %READ or %WRITE.
1991  * @uvec: Pointer to the userspace array.
1992  * @nr_segs: Number of elements in userspace array.
1993  * @fast_segs: Number of elements in @iov.
1994  * @iovp: (input and output parameter) Pointer to pointer to (usually small
1995  *     on-stack) kernel array.
1996  * @i: Pointer to iterator that will be initialized on success.
1997  *
1998  * If the array pointed to by *@iov is large enough to hold all @nr_segs,
1999  * then this function places %NULL in *@iov on return. Otherwise, a new
2000  * array will be allocated and the result placed in *@iov. This means that
2001  * the caller may call kfree() on *@iov regardless of whether the small
2002  * on-stack array was used or not (and regardless of whether this function
2003  * returns an error or not).
2004  *
2005  * Return: Negative error code on error, bytes imported on success
2006  */
2007 ssize_t import_iovec(int type, const struct iovec __user *uvec,
2008                  unsigned nr_segs, unsigned fast_segs,
2009                  struct iovec **iovp, struct iov_iter *i)
2010 {
2011         return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i,
2012                               in_compat_syscall());
2013 }
2014 EXPORT_SYMBOL(import_iovec);
2015
2016 int import_single_range(int rw, void __user *buf, size_t len,
2017                  struct iovec *iov, struct iov_iter *i)
2018 {
2019         if (len > MAX_RW_COUNT)
2020                 len = MAX_RW_COUNT;
2021         if (unlikely(!access_ok(buf, len)))
2022                 return -EFAULT;
2023
2024         iov->iov_base = buf;
2025         iov->iov_len = len;
2026         iov_iter_init(i, rw, iov, 1, len);
2027         return 0;
2028 }
2029 EXPORT_SYMBOL(import_single_range);
2030
2031 /**
2032  * iov_iter_restore() - Restore a &struct iov_iter to the same state as when
2033  *     iov_iter_save_state() was called.
2034  *
2035  * @i: &struct iov_iter to restore
2036  * @state: state to restore from
2037  *
2038  * Used after iov_iter_save_state() to bring restore @i, if operations may
2039  * have advanced it.
2040  *
2041  * Note: only works on ITER_IOVEC, ITER_BVEC, and ITER_KVEC
2042  */
2043 void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state)
2044 {
2045         if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) &&
2046                          !iov_iter_is_kvec(i))
2047                 return;
2048         i->iov_offset = state->iov_offset;
2049         i->count = state->count;
2050         /*
2051          * For the *vec iters, nr_segs + iov is constant - if we increment
2052          * the vec, then we also decrement the nr_segs count. Hence we don't
2053          * need to track both of these, just one is enough and we can deduct
2054          * the other from that. ITER_KVEC and ITER_IOVEC are the same struct
2055          * size, so we can just increment the iov pointer as they are unionzed.
2056          * ITER_BVEC _may_ be the same size on some archs, but on others it is
2057          * not. Be safe and handle it separately.
2058          */
2059         BUILD_BUG_ON(sizeof(struct iovec) != sizeof(struct kvec));
2060         if (iov_iter_is_bvec(i))
2061                 i->bvec -= state->nr_segs - i->nr_segs;
2062         else
2063                 i->iov -= state->nr_segs - i->nr_segs;
2064         i->nr_segs = state->nr_segs;
2065 }