fuse: fix bad inode
[linux-2.6-microblaze.git] / fs / fuse / readdir.c
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2018  Miklos Szeredi <miklos@szeredi.hu>
4
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8
9
10 #include "fuse_i.h"
11 #include <linux/iversion.h>
12 #include <linux/posix_acl.h>
13 #include <linux/pagemap.h>
14 #include <linux/highmem.h>
15
16 static bool fuse_use_readdirplus(struct inode *dir, struct dir_context *ctx)
17 {
18         struct fuse_conn *fc = get_fuse_conn(dir);
19         struct fuse_inode *fi = get_fuse_inode(dir);
20
21         if (!fc->do_readdirplus)
22                 return false;
23         if (!fc->readdirplus_auto)
24                 return true;
25         if (test_and_clear_bit(FUSE_I_ADVISE_RDPLUS, &fi->state))
26                 return true;
27         if (ctx->pos == 0)
28                 return true;
29         return false;
30 }
31
32 static void fuse_add_dirent_to_cache(struct file *file,
33                                      struct fuse_dirent *dirent, loff_t pos)
34 {
35         struct fuse_inode *fi = get_fuse_inode(file_inode(file));
36         size_t reclen = FUSE_DIRENT_SIZE(dirent);
37         pgoff_t index;
38         struct page *page;
39         loff_t size;
40         u64 version;
41         unsigned int offset;
42         void *addr;
43
44         spin_lock(&fi->rdc.lock);
45         /*
46          * Is cache already completed?  Or this entry does not go at the end of
47          * cache?
48          */
49         if (fi->rdc.cached || pos != fi->rdc.pos) {
50                 spin_unlock(&fi->rdc.lock);
51                 return;
52         }
53         version = fi->rdc.version;
54         size = fi->rdc.size;
55         offset = size & ~PAGE_MASK;
56         index = size >> PAGE_SHIFT;
57         /* Dirent doesn't fit in current page?  Jump to next page. */
58         if (offset + reclen > PAGE_SIZE) {
59                 index++;
60                 offset = 0;
61         }
62         spin_unlock(&fi->rdc.lock);
63
64         if (offset) {
65                 page = find_lock_page(file->f_mapping, index);
66         } else {
67                 page = find_or_create_page(file->f_mapping, index,
68                                            mapping_gfp_mask(file->f_mapping));
69         }
70         if (!page)
71                 return;
72
73         spin_lock(&fi->rdc.lock);
74         /* Raced with another readdir */
75         if (fi->rdc.version != version || fi->rdc.size != size ||
76             WARN_ON(fi->rdc.pos != pos))
77                 goto unlock;
78
79         addr = kmap_atomic(page);
80         if (!offset)
81                 clear_page(addr);
82         memcpy(addr + offset, dirent, reclen);
83         kunmap_atomic(addr);
84         fi->rdc.size = (index << PAGE_SHIFT) + offset + reclen;
85         fi->rdc.pos = dirent->off;
86 unlock:
87         spin_unlock(&fi->rdc.lock);
88         unlock_page(page);
89         put_page(page);
90 }
91
92 static void fuse_readdir_cache_end(struct file *file, loff_t pos)
93 {
94         struct fuse_inode *fi = get_fuse_inode(file_inode(file));
95         loff_t end;
96
97         spin_lock(&fi->rdc.lock);
98         /* does cache end position match current position? */
99         if (fi->rdc.pos != pos) {
100                 spin_unlock(&fi->rdc.lock);
101                 return;
102         }
103
104         fi->rdc.cached = true;
105         end = ALIGN(fi->rdc.size, PAGE_SIZE);
106         spin_unlock(&fi->rdc.lock);
107
108         /* truncate unused tail of cache */
109         truncate_inode_pages(file->f_mapping, end);
110 }
111
112 static bool fuse_emit(struct file *file, struct dir_context *ctx,
113                       struct fuse_dirent *dirent)
114 {
115         struct fuse_file *ff = file->private_data;
116
117         if (ff->open_flags & FOPEN_CACHE_DIR)
118                 fuse_add_dirent_to_cache(file, dirent, ctx->pos);
119
120         return dir_emit(ctx, dirent->name, dirent->namelen, dirent->ino,
121                         dirent->type);
122 }
123
124 static int parse_dirfile(char *buf, size_t nbytes, struct file *file,
125                          struct dir_context *ctx)
126 {
127         while (nbytes >= FUSE_NAME_OFFSET) {
128                 struct fuse_dirent *dirent = (struct fuse_dirent *) buf;
129                 size_t reclen = FUSE_DIRENT_SIZE(dirent);
130                 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
131                         return -EIO;
132                 if (reclen > nbytes)
133                         break;
134                 if (memchr(dirent->name, '/', dirent->namelen) != NULL)
135                         return -EIO;
136
137                 if (!fuse_emit(file, ctx, dirent))
138                         break;
139
140                 buf += reclen;
141                 nbytes -= reclen;
142                 ctx->pos = dirent->off;
143         }
144
145         return 0;
146 }
147
148 static int fuse_direntplus_link(struct file *file,
149                                 struct fuse_direntplus *direntplus,
150                                 u64 attr_version)
151 {
152         struct fuse_entry_out *o = &direntplus->entry_out;
153         struct fuse_dirent *dirent = &direntplus->dirent;
154         struct dentry *parent = file->f_path.dentry;
155         struct qstr name = QSTR_INIT(dirent->name, dirent->namelen);
156         struct dentry *dentry;
157         struct dentry *alias;
158         struct inode *dir = d_inode(parent);
159         struct fuse_conn *fc;
160         struct inode *inode;
161         DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
162
163         if (!o->nodeid) {
164                 /*
165                  * Unlike in the case of fuse_lookup, zero nodeid does not mean
166                  * ENOENT. Instead, it only means the userspace filesystem did
167                  * not want to return attributes/handle for this entry.
168                  *
169                  * So do nothing.
170                  */
171                 return 0;
172         }
173
174         if (name.name[0] == '.') {
175                 /*
176                  * We could potentially refresh the attributes of the directory
177                  * and its parent?
178                  */
179                 if (name.len == 1)
180                         return 0;
181                 if (name.name[1] == '.' && name.len == 2)
182                         return 0;
183         }
184
185         if (invalid_nodeid(o->nodeid))
186                 return -EIO;
187         if (fuse_invalid_attr(&o->attr))
188                 return -EIO;
189
190         fc = get_fuse_conn(dir);
191
192         name.hash = full_name_hash(parent, name.name, name.len);
193         dentry = d_lookup(parent, &name);
194         if (!dentry) {
195 retry:
196                 dentry = d_alloc_parallel(parent, &name, &wq);
197                 if (IS_ERR(dentry))
198                         return PTR_ERR(dentry);
199         }
200         if (!d_in_lookup(dentry)) {
201                 struct fuse_inode *fi;
202                 inode = d_inode(dentry);
203                 if (!inode ||
204                     get_node_id(inode) != o->nodeid ||
205                     ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
206                         d_invalidate(dentry);
207                         dput(dentry);
208                         goto retry;
209                 }
210                 if (fuse_is_bad(inode)) {
211                         dput(dentry);
212                         return -EIO;
213                 }
214
215                 fi = get_fuse_inode(inode);
216                 spin_lock(&fi->lock);
217                 fi->nlookup++;
218                 spin_unlock(&fi->lock);
219
220                 forget_all_cached_acls(inode);
221                 fuse_change_attributes(inode, &o->attr,
222                                        entry_attr_timeout(o),
223                                        attr_version);
224                 /*
225                  * The other branch comes via fuse_iget()
226                  * which bumps nlookup inside
227                  */
228         } else {
229                 inode = fuse_iget(dir->i_sb, o->nodeid, o->generation,
230                                   &o->attr, entry_attr_timeout(o),
231                                   attr_version);
232                 if (!inode)
233                         inode = ERR_PTR(-ENOMEM);
234
235                 alias = d_splice_alias(inode, dentry);
236                 d_lookup_done(dentry);
237                 if (alias) {
238                         dput(dentry);
239                         dentry = alias;
240                 }
241                 if (IS_ERR(dentry))
242                         return PTR_ERR(dentry);
243         }
244         if (fc->readdirplus_auto)
245                 set_bit(FUSE_I_INIT_RDPLUS, &get_fuse_inode(inode)->state);
246         fuse_change_entry_timeout(dentry, o);
247
248         dput(dentry);
249         return 0;
250 }
251
252 static void fuse_force_forget(struct file *file, u64 nodeid)
253 {
254         struct inode *inode = file_inode(file);
255         struct fuse_mount *fm = get_fuse_mount(inode);
256         struct fuse_forget_in inarg;
257         FUSE_ARGS(args);
258
259         memset(&inarg, 0, sizeof(inarg));
260         inarg.nlookup = 1;
261         args.opcode = FUSE_FORGET;
262         args.nodeid = nodeid;
263         args.in_numargs = 1;
264         args.in_args[0].size = sizeof(inarg);
265         args.in_args[0].value = &inarg;
266         args.force = true;
267         args.noreply = true;
268
269         fuse_simple_request(fm, &args);
270         /* ignore errors */
271 }
272
273 static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file,
274                              struct dir_context *ctx, u64 attr_version)
275 {
276         struct fuse_direntplus *direntplus;
277         struct fuse_dirent *dirent;
278         size_t reclen;
279         int over = 0;
280         int ret;
281
282         while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) {
283                 direntplus = (struct fuse_direntplus *) buf;
284                 dirent = &direntplus->dirent;
285                 reclen = FUSE_DIRENTPLUS_SIZE(direntplus);
286
287                 if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX)
288                         return -EIO;
289                 if (reclen > nbytes)
290                         break;
291                 if (memchr(dirent->name, '/', dirent->namelen) != NULL)
292                         return -EIO;
293
294                 if (!over) {
295                         /* We fill entries into dstbuf only as much as
296                            it can hold. But we still continue iterating
297                            over remaining entries to link them. If not,
298                            we need to send a FORGET for each of those
299                            which we did not link.
300                         */
301                         over = !fuse_emit(file, ctx, dirent);
302                         if (!over)
303                                 ctx->pos = dirent->off;
304                 }
305
306                 buf += reclen;
307                 nbytes -= reclen;
308
309                 ret = fuse_direntplus_link(file, direntplus, attr_version);
310                 if (ret)
311                         fuse_force_forget(file, direntplus->entry_out.nodeid);
312         }
313
314         return 0;
315 }
316
317 static int fuse_readdir_uncached(struct file *file, struct dir_context *ctx)
318 {
319         int plus;
320         ssize_t res;
321         struct page *page;
322         struct inode *inode = file_inode(file);
323         struct fuse_mount *fm = get_fuse_mount(inode);
324         struct fuse_io_args ia = {};
325         struct fuse_args_pages *ap = &ia.ap;
326         struct fuse_page_desc desc = { .length = PAGE_SIZE };
327         u64 attr_version = 0;
328         bool locked;
329
330         page = alloc_page(GFP_KERNEL);
331         if (!page)
332                 return -ENOMEM;
333
334         plus = fuse_use_readdirplus(inode, ctx);
335         ap->args.out_pages = true;
336         ap->num_pages = 1;
337         ap->pages = &page;
338         ap->descs = &desc;
339         if (plus) {
340                 attr_version = fuse_get_attr_version(fm->fc);
341                 fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
342                                     FUSE_READDIRPLUS);
343         } else {
344                 fuse_read_args_fill(&ia, file, ctx->pos, PAGE_SIZE,
345                                     FUSE_READDIR);
346         }
347         locked = fuse_lock_inode(inode);
348         res = fuse_simple_request(fm, &ap->args);
349         fuse_unlock_inode(inode, locked);
350         if (res >= 0) {
351                 if (!res) {
352                         struct fuse_file *ff = file->private_data;
353
354                         if (ff->open_flags & FOPEN_CACHE_DIR)
355                                 fuse_readdir_cache_end(file, ctx->pos);
356                 } else if (plus) {
357                         res = parse_dirplusfile(page_address(page), res,
358                                                 file, ctx, attr_version);
359                 } else {
360                         res = parse_dirfile(page_address(page), res, file,
361                                             ctx);
362                 }
363         }
364
365         __free_page(page);
366         fuse_invalidate_atime(inode);
367         return res;
368 }
369
370 enum fuse_parse_result {
371         FOUND_ERR = -1,
372         FOUND_NONE = 0,
373         FOUND_SOME,
374         FOUND_ALL,
375 };
376
377 static enum fuse_parse_result fuse_parse_cache(struct fuse_file *ff,
378                                                void *addr, unsigned int size,
379                                                struct dir_context *ctx)
380 {
381         unsigned int offset = ff->readdir.cache_off & ~PAGE_MASK;
382         enum fuse_parse_result res = FOUND_NONE;
383
384         WARN_ON(offset >= size);
385
386         for (;;) {
387                 struct fuse_dirent *dirent = addr + offset;
388                 unsigned int nbytes = size - offset;
389                 size_t reclen;
390
391                 if (nbytes < FUSE_NAME_OFFSET || !dirent->namelen)
392                         break;
393
394                 reclen = FUSE_DIRENT_SIZE(dirent); /* derefs ->namelen */
395
396                 if (WARN_ON(dirent->namelen > FUSE_NAME_MAX))
397                         return FOUND_ERR;
398                 if (WARN_ON(reclen > nbytes))
399                         return FOUND_ERR;
400                 if (WARN_ON(memchr(dirent->name, '/', dirent->namelen) != NULL))
401                         return FOUND_ERR;
402
403                 if (ff->readdir.pos == ctx->pos) {
404                         res = FOUND_SOME;
405                         if (!dir_emit(ctx, dirent->name, dirent->namelen,
406                                       dirent->ino, dirent->type))
407                                 return FOUND_ALL;
408                         ctx->pos = dirent->off;
409                 }
410                 ff->readdir.pos = dirent->off;
411                 ff->readdir.cache_off += reclen;
412
413                 offset += reclen;
414         }
415
416         return res;
417 }
418
419 static void fuse_rdc_reset(struct inode *inode)
420 {
421         struct fuse_inode *fi = get_fuse_inode(inode);
422
423         fi->rdc.cached = false;
424         fi->rdc.version++;
425         fi->rdc.size = 0;
426         fi->rdc.pos = 0;
427 }
428
429 #define UNCACHED 1
430
431 static int fuse_readdir_cached(struct file *file, struct dir_context *ctx)
432 {
433         struct fuse_file *ff = file->private_data;
434         struct inode *inode = file_inode(file);
435         struct fuse_conn *fc = get_fuse_conn(inode);
436         struct fuse_inode *fi = get_fuse_inode(inode);
437         enum fuse_parse_result res;
438         pgoff_t index;
439         unsigned int size;
440         struct page *page;
441         void *addr;
442
443         /* Seeked?  If so, reset the cache stream */
444         if (ff->readdir.pos != ctx->pos) {
445                 ff->readdir.pos = 0;
446                 ff->readdir.cache_off = 0;
447         }
448
449         /*
450          * We're just about to start reading into the cache or reading the
451          * cache; both cases require an up-to-date mtime value.
452          */
453         if (!ctx->pos && fc->auto_inval_data) {
454                 int err = fuse_update_attributes(inode, file);
455
456                 if (err)
457                         return err;
458         }
459
460 retry:
461         spin_lock(&fi->rdc.lock);
462 retry_locked:
463         if (!fi->rdc.cached) {
464                 /* Starting cache? Set cache mtime. */
465                 if (!ctx->pos && !fi->rdc.size) {
466                         fi->rdc.mtime = inode->i_mtime;
467                         fi->rdc.iversion = inode_query_iversion(inode);
468                 }
469                 spin_unlock(&fi->rdc.lock);
470                 return UNCACHED;
471         }
472         /*
473          * When at the beginning of the directory (i.e. just after opendir(3) or
474          * rewinddir(3)), then need to check whether directory contents have
475          * changed, and reset the cache if so.
476          */
477         if (!ctx->pos) {
478                 if (inode_peek_iversion(inode) != fi->rdc.iversion ||
479                     !timespec64_equal(&fi->rdc.mtime, &inode->i_mtime)) {
480                         fuse_rdc_reset(inode);
481                         goto retry_locked;
482                 }
483         }
484
485         /*
486          * If cache version changed since the last getdents() call, then reset
487          * the cache stream.
488          */
489         if (ff->readdir.version != fi->rdc.version) {
490                 ff->readdir.pos = 0;
491                 ff->readdir.cache_off = 0;
492         }
493         /*
494          * If at the beginning of the cache, than reset version to
495          * current.
496          */
497         if (ff->readdir.pos == 0)
498                 ff->readdir.version = fi->rdc.version;
499
500         WARN_ON(fi->rdc.size < ff->readdir.cache_off);
501
502         index = ff->readdir.cache_off >> PAGE_SHIFT;
503
504         if (index == (fi->rdc.size >> PAGE_SHIFT))
505                 size = fi->rdc.size & ~PAGE_MASK;
506         else
507                 size = PAGE_SIZE;
508         spin_unlock(&fi->rdc.lock);
509
510         /* EOF? */
511         if ((ff->readdir.cache_off & ~PAGE_MASK) == size)
512                 return 0;
513
514         page = find_get_page_flags(file->f_mapping, index,
515                                    FGP_ACCESSED | FGP_LOCK);
516         spin_lock(&fi->rdc.lock);
517         if (!page) {
518                 /*
519                  * Uh-oh: page gone missing, cache is useless
520                  */
521                 if (fi->rdc.version == ff->readdir.version)
522                         fuse_rdc_reset(inode);
523                 goto retry_locked;
524         }
525
526         /* Make sure it's still the same version after getting the page. */
527         if (ff->readdir.version != fi->rdc.version) {
528                 spin_unlock(&fi->rdc.lock);
529                 unlock_page(page);
530                 put_page(page);
531                 goto retry;
532         }
533         spin_unlock(&fi->rdc.lock);
534
535         /*
536          * Contents of the page are now protected against changing by holding
537          * the page lock.
538          */
539         addr = kmap(page);
540         res = fuse_parse_cache(ff, addr, size, ctx);
541         kunmap(page);
542         unlock_page(page);
543         put_page(page);
544
545         if (res == FOUND_ERR)
546                 return -EIO;
547
548         if (res == FOUND_ALL)
549                 return 0;
550
551         if (size == PAGE_SIZE) {
552                 /* We hit end of page: skip to next page. */
553                 ff->readdir.cache_off = ALIGN(ff->readdir.cache_off, PAGE_SIZE);
554                 goto retry;
555         }
556
557         /*
558          * End of cache reached.  If found position, then we are done, otherwise
559          * need to fall back to uncached, since the position we were looking for
560          * wasn't in the cache.
561          */
562         return res == FOUND_SOME ? 0 : UNCACHED;
563 }
564
565 int fuse_readdir(struct file *file, struct dir_context *ctx)
566 {
567         struct fuse_file *ff = file->private_data;
568         struct inode *inode = file_inode(file);
569         int err;
570
571         if (fuse_is_bad(inode))
572                 return -EIO;
573
574         mutex_lock(&ff->readdir.lock);
575
576         err = UNCACHED;
577         if (ff->open_flags & FOPEN_CACHE_DIR)
578                 err = fuse_readdir_cached(file, ctx);
579         if (err == UNCACHED)
580                 err = fuse_readdir_uncached(file, ctx);
581
582         mutex_unlock(&ff->readdir.lock);
583
584         return err;
585 }