1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* kiocb-using read/write
4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
8 #include <linux/mount.h>
9 #include <linux/slab.h>
10 #include <linux/file.h>
11 #include <linux/uio.h>
12 #include <linux/falloc.h>
13 #include <linux/sched/mm.h>
14 #include <trace/events/fscache.h>
17 struct cachefiles_kiocb {
25 struct cachefiles_object *object;
26 netfs_io_terminated_t term_func;
29 unsigned int inval_counter; /* Copy of cookie->inval_counter */
33 static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki)
35 if (refcount_dec_and_test(&ki->ki_refcnt)) {
36 cachefiles_put_object(ki->object, cachefiles_obj_put_ioreq);
37 fput(ki->iocb.ki_filp);
43 * Handle completion of a read from the cache.
45 static void cachefiles_read_complete(struct kiocb *iocb, long ret)
47 struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
48 struct inode *inode = file_inode(ki->iocb.ki_filp);
53 trace_cachefiles_io_error(ki->object, inode, ret,
54 cachefiles_trace_read_error);
58 if (ki->object->cookie->inval_counter == ki->inval_counter)
64 ki->term_func(ki->term_func_priv, ret, ki->was_async);
67 cachefiles_put_kiocb(ki);
71 * Initiate a read from the cache.
73 static int cachefiles_read(struct netfs_cache_resources *cres,
75 struct iov_iter *iter,
76 enum netfs_read_from_hole read_hole,
77 netfs_io_terminated_t term_func,
80 struct cachefiles_object *object;
81 struct cachefiles_kiocb *ki;
83 unsigned int old_nofs;
84 ssize_t ret = -ENOBUFS;
85 size_t len = iov_iter_count(iter), skipped = 0;
87 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
88 goto presubmission_error;
91 object = cachefiles_cres_object(cres);
92 file = cachefiles_cres_file(cres);
94 _enter("%pD,%li,%llx,%zx/%llx",
95 file, file_inode(file)->i_ino, start_pos, len,
96 i_size_read(file_inode(file)));
98 /* If the caller asked us to seek for data before doing the read, then
99 * we should do that now. If we find a gap, we fill it with zeros.
101 if (read_hole != NETFS_READ_HOLE_IGNORE) {
102 loff_t off = start_pos, off2;
104 off2 = cachefiles_inject_read_error();
106 off2 = vfs_llseek(file, off, SEEK_DATA);
107 if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO && off2 != -ENXIO) {
110 goto presubmission_error;
113 if (off2 == -ENXIO || off2 >= start_pos + len) {
114 /* The region is beyond the EOF or there's no more data
115 * in the region, so clear the rest of the buffer and
119 if (read_hole == NETFS_READ_HOLE_FAIL)
120 goto presubmission_error;
122 iov_iter_zero(len, iter);
125 goto presubmission_error;
128 skipped = off2 - off;
129 iov_iter_zero(skipped, iter);
133 ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
135 goto presubmission_error;
137 refcount_set(&ki->ki_refcnt, 2);
138 ki->iocb.ki_filp = file;
139 ki->iocb.ki_pos = start_pos + skipped;
140 ki->iocb.ki_flags = IOCB_DIRECT;
141 ki->iocb.ki_hint = ki_hint_validate(file_write_hint(file));
142 ki->iocb.ki_ioprio = get_current_ioprio();
143 ki->skipped = skipped;
145 ki->inval_counter = cres->inval_counter;
146 ki->term_func = term_func;
147 ki->term_func_priv = term_func_priv;
148 ki->was_async = true;
151 ki->iocb.ki_complete = cachefiles_read_complete;
153 get_file(ki->iocb.ki_filp);
154 cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
156 trace_cachefiles_read(object, file_inode(file), ki->iocb.ki_pos, len - skipped);
157 old_nofs = memalloc_nofs_save();
158 ret = cachefiles_inject_read_error();
160 ret = vfs_iocb_iter_read(file, &ki->iocb, iter);
161 memalloc_nofs_restore(old_nofs);
167 case -ERESTARTNOINTR:
168 case -ERESTARTNOHAND:
169 case -ERESTART_RESTARTBLOCK:
170 /* There's no easy way to restart the syscall since other AIO's
171 * may be already running. Just fail this IO with EINTR.
176 ki->was_async = false;
177 cachefiles_read_complete(&ki->iocb, ret);
184 cachefiles_put_kiocb(ki);
185 _leave(" = %zd", ret);
190 term_func(term_func_priv, ret < 0 ? ret : skipped, false);
195 * Handle completion of a write to the cache.
197 static void cachefiles_write_complete(struct kiocb *iocb, long ret)
199 struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
200 struct cachefiles_object *object = ki->object;
201 struct inode *inode = file_inode(ki->iocb.ki_filp);
205 /* Tell lockdep we inherited freeze protection from submission thread */
206 __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
207 __sb_end_write(inode->i_sb, SB_FREEZE_WRITE);
210 trace_cachefiles_io_error(object, inode, ret,
211 cachefiles_trace_write_error);
213 atomic_long_sub(ki->b_writing, &object->volume->cache->b_writing);
214 set_bit(FSCACHE_COOKIE_HAVE_DATA, &object->cookie->flags);
216 ki->term_func(ki->term_func_priv, ret, ki->was_async);
217 cachefiles_put_kiocb(ki);
221 * Initiate a write to the cache.
223 static int cachefiles_write(struct netfs_cache_resources *cres,
225 struct iov_iter *iter,
226 netfs_io_terminated_t term_func,
227 void *term_func_priv)
229 struct cachefiles_object *object;
230 struct cachefiles_cache *cache;
231 struct cachefiles_kiocb *ki;
234 unsigned int old_nofs;
235 ssize_t ret = -ENOBUFS;
236 size_t len = iov_iter_count(iter);
238 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE))
239 goto presubmission_error;
240 fscache_count_write();
241 object = cachefiles_cres_object(cres);
242 cache = object->volume->cache;
243 file = cachefiles_cres_file(cres);
245 _enter("%pD,%li,%llx,%zx/%llx",
246 file, file_inode(file)->i_ino, start_pos, len,
247 i_size_read(file_inode(file)));
250 ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
252 goto presubmission_error;
254 refcount_set(&ki->ki_refcnt, 2);
255 ki->iocb.ki_filp = file;
256 ki->iocb.ki_pos = start_pos;
257 ki->iocb.ki_flags = IOCB_DIRECT | IOCB_WRITE;
258 ki->iocb.ki_hint = ki_hint_validate(file_write_hint(file));
259 ki->iocb.ki_ioprio = get_current_ioprio();
261 ki->inval_counter = cres->inval_counter;
262 ki->start = start_pos;
264 ki->term_func = term_func;
265 ki->term_func_priv = term_func_priv;
266 ki->was_async = true;
267 ki->b_writing = (len + (1 << cache->bshift) - 1) >> cache->bshift;
270 ki->iocb.ki_complete = cachefiles_write_complete;
271 atomic_long_add(ki->b_writing, &cache->b_writing);
273 /* Open-code file_start_write here to grab freeze protection, which
274 * will be released by another thread in aio_complete_rw(). Fool
275 * lockdep by telling it the lock got released so that it doesn't
276 * complain about the held lock when we return to userspace.
278 inode = file_inode(file);
279 __sb_start_write(inode->i_sb, SB_FREEZE_WRITE);
280 __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
282 get_file(ki->iocb.ki_filp);
283 cachefiles_grab_object(object, cachefiles_obj_get_ioreq);
285 trace_cachefiles_write(object, inode, ki->iocb.ki_pos, len);
286 old_nofs = memalloc_nofs_save();
287 ret = cachefiles_inject_write_error();
289 ret = vfs_iocb_iter_write(file, &ki->iocb, iter);
290 memalloc_nofs_restore(old_nofs);
296 case -ERESTARTNOINTR:
297 case -ERESTARTNOHAND:
298 case -ERESTART_RESTARTBLOCK:
299 /* There's no easy way to restart the syscall since other AIO's
300 * may be already running. Just fail this IO with EINTR.
305 ki->was_async = false;
306 cachefiles_write_complete(&ki->iocb, ret);
313 cachefiles_put_kiocb(ki);
314 _leave(" = %zd", ret);
319 term_func(term_func_priv, ret, false);
324 * Prepare a read operation, shortening it to a cached/uncached
325 * boundary as appropriate.
327 static enum netfs_read_source cachefiles_prepare_read(struct netfs_read_subrequest *subreq,
330 enum cachefiles_prepare_read_trace why;
331 struct netfs_read_request *rreq = subreq->rreq;
332 struct netfs_cache_resources *cres = &rreq->cache_resources;
333 struct cachefiles_object *object;
334 struct cachefiles_cache *cache;
335 struct fscache_cookie *cookie = fscache_cres_cookie(cres);
336 const struct cred *saved_cred;
337 struct file *file = cachefiles_cres_file(cres);
338 enum netfs_read_source ret = NETFS_DOWNLOAD_FROM_SERVER;
340 ino_t ino = file ? file_inode(file)->i_ino : 0;
342 _enter("%zx @%llx/%llx", subreq->len, subreq->start, i_size);
344 if (subreq->start >= i_size) {
345 ret = NETFS_FILL_WITH_ZEROES;
346 why = cachefiles_trace_read_after_eof;
350 if (test_bit(FSCACHE_COOKIE_NO_DATA_TO_READ, &cookie->flags)) {
351 __set_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags);
352 why = cachefiles_trace_read_no_data;
356 /* The object and the file may be being created in the background. */
358 why = cachefiles_trace_read_no_file;
359 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_READ))
361 file = cachefiles_cres_file(cres);
364 ino = file_inode(file)->i_ino;
367 object = cachefiles_cres_object(cres);
368 cache = object->volume->cache;
369 cachefiles_begin_secure(cache, &saved_cred);
371 off = cachefiles_inject_read_error();
373 off = vfs_llseek(file, subreq->start, SEEK_DATA);
374 if (off < 0 && off >= (loff_t)-MAX_ERRNO) {
375 if (off == (loff_t)-ENXIO) {
376 why = cachefiles_trace_read_seek_nxio;
377 goto download_and_store;
379 trace_cachefiles_io_error(object, file_inode(file), off,
380 cachefiles_trace_seek_error);
381 why = cachefiles_trace_read_seek_error;
385 if (off >= subreq->start + subreq->len) {
386 why = cachefiles_trace_read_found_hole;
387 goto download_and_store;
390 if (off > subreq->start) {
391 off = round_up(off, cache->bsize);
392 subreq->len = off - subreq->start;
393 why = cachefiles_trace_read_found_part;
394 goto download_and_store;
397 to = cachefiles_inject_read_error();
399 to = vfs_llseek(file, subreq->start, SEEK_HOLE);
400 if (to < 0 && to >= (loff_t)-MAX_ERRNO) {
401 trace_cachefiles_io_error(object, file_inode(file), to,
402 cachefiles_trace_seek_error);
403 why = cachefiles_trace_read_seek_error;
407 if (to < subreq->start + subreq->len) {
408 if (subreq->start + subreq->len >= i_size)
409 to = round_up(to, cache->bsize);
411 to = round_down(to, cache->bsize);
412 subreq->len = to - subreq->start;
415 why = cachefiles_trace_read_have_data;
416 ret = NETFS_READ_FROM_CACHE;
420 __set_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags);
422 cachefiles_end_secure(cache, saved_cred);
424 trace_cachefiles_prep_read(subreq, ret, why, ino);
429 * Prepare for a write to occur.
431 static int __cachefiles_prepare_write(struct netfs_cache_resources *cres,
432 loff_t *_start, size_t *_len, loff_t i_size,
433 bool no_space_allocated_yet)
435 struct cachefiles_object *object = cachefiles_cres_object(cres);
436 struct cachefiles_cache *cache = object->volume->cache;
437 struct file *file = cachefiles_cres_file(cres);
438 loff_t start = *_start, pos;
439 size_t len = *_len, down;
442 /* Round to DIO size */
443 down = start - round_down(start, PAGE_SIZE);
444 *_start = start - down;
445 *_len = round_up(down + len, PAGE_SIZE);
447 /* We need to work out whether there's sufficient disk space to perform
448 * the write - but we can skip that check if we have space already
451 if (no_space_allocated_yet)
454 pos = cachefiles_inject_read_error();
456 pos = vfs_llseek(file, *_start, SEEK_DATA);
457 if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) {
459 goto check_space; /* Unallocated tail */
460 trace_cachefiles_io_error(object, file_inode(file), pos,
461 cachefiles_trace_seek_error);
464 if ((u64)pos >= (u64)*_start + *_len)
465 goto check_space; /* Unallocated region */
467 /* We have a block that's at least partially filled - if we're low on
468 * space, we need to see if it's fully allocated. If it's not, we may
471 if (cachefiles_has_space(cache, 0, *_len / PAGE_SIZE,
472 cachefiles_has_space_check) == 0)
473 return 0; /* Enough space to simply overwrite the whole block */
475 pos = cachefiles_inject_read_error();
477 pos = vfs_llseek(file, *_start, SEEK_HOLE);
478 if (pos < 0 && pos >= (loff_t)-MAX_ERRNO) {
479 trace_cachefiles_io_error(object, file_inode(file), pos,
480 cachefiles_trace_seek_error);
483 if ((u64)pos >= (u64)*_start + *_len)
484 return 0; /* Fully allocated */
486 /* Partially allocated, but insufficient space: cull. */
487 fscache_count_no_write_space();
488 ret = cachefiles_inject_remove_error();
490 ret = vfs_fallocate(file, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
493 trace_cachefiles_io_error(object, file_inode(file), ret,
494 cachefiles_trace_fallocate_error);
495 cachefiles_io_error_obj(object,
496 "CacheFiles: fallocate failed (%d)\n", ret);
503 return cachefiles_has_space(cache, 0, *_len / PAGE_SIZE,
504 cachefiles_has_space_for_write);
507 static int cachefiles_prepare_write(struct netfs_cache_resources *cres,
508 loff_t *_start, size_t *_len, loff_t i_size,
509 bool no_space_allocated_yet)
511 struct cachefiles_object *object = cachefiles_cres_object(cres);
512 struct cachefiles_cache *cache = object->volume->cache;
513 const struct cred *saved_cred;
516 if (!cachefiles_cres_file(cres)) {
517 if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE))
519 if (!cachefiles_cres_file(cres))
523 cachefiles_begin_secure(cache, &saved_cred);
524 ret = __cachefiles_prepare_write(cres, _start, _len, i_size,
525 no_space_allocated_yet);
526 cachefiles_end_secure(cache, saved_cred);
531 * Clean up an operation.
533 static void cachefiles_end_operation(struct netfs_cache_resources *cres)
535 struct file *file = cachefiles_cres_file(cres);
539 fscache_end_cookie_access(fscache_cres_cookie(cres), fscache_access_io_end);
542 static const struct netfs_cache_ops cachefiles_netfs_cache_ops = {
543 .end_operation = cachefiles_end_operation,
544 .read = cachefiles_read,
545 .write = cachefiles_write,
546 .prepare_read = cachefiles_prepare_read,
547 .prepare_write = cachefiles_prepare_write,
551 * Open the cache file when beginning a cache operation.
553 bool cachefiles_begin_operation(struct netfs_cache_resources *cres,
554 enum fscache_want_state want_state)
556 struct cachefiles_object *object = cachefiles_cres_object(cres);
558 if (!cachefiles_cres_file(cres)) {
559 cres->ops = &cachefiles_netfs_cache_ops;
561 spin_lock(&object->lock);
562 if (!cres->cache_priv2 && object->file)
563 cres->cache_priv2 = get_file(object->file);
564 spin_unlock(&object->lock);
568 if (!cachefiles_cres_file(cres) && want_state != FSCACHE_WANT_PARAMS) {
569 pr_err("failed to get cres->file\n");