fs: add ksys_sync_file_range helper(); remove in-kernel calls to syscall
[linux-2.6-microblaze.git] / fs / orangefs / orangefs-utils.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * (C) 2001 Clemson University and The University of Chicago
4  *
5  * See COPYING in top-level directory.
6  */
7 #include <linux/kernel.h>
8 #include "protocol.h"
9 #include "orangefs-kernel.h"
10 #include "orangefs-dev-proto.h"
11 #include "orangefs-bufmap.h"
12
13 __s32 fsid_of_op(struct orangefs_kernel_op_s *op)
14 {
15         __s32 fsid = ORANGEFS_FS_ID_NULL;
16
17         if (op) {
18                 switch (op->upcall.type) {
19                 case ORANGEFS_VFS_OP_FILE_IO:
20                         fsid = op->upcall.req.io.refn.fs_id;
21                         break;
22                 case ORANGEFS_VFS_OP_LOOKUP:
23                         fsid = op->upcall.req.lookup.parent_refn.fs_id;
24                         break;
25                 case ORANGEFS_VFS_OP_CREATE:
26                         fsid = op->upcall.req.create.parent_refn.fs_id;
27                         break;
28                 case ORANGEFS_VFS_OP_GETATTR:
29                         fsid = op->upcall.req.getattr.refn.fs_id;
30                         break;
31                 case ORANGEFS_VFS_OP_REMOVE:
32                         fsid = op->upcall.req.remove.parent_refn.fs_id;
33                         break;
34                 case ORANGEFS_VFS_OP_MKDIR:
35                         fsid = op->upcall.req.mkdir.parent_refn.fs_id;
36                         break;
37                 case ORANGEFS_VFS_OP_READDIR:
38                         fsid = op->upcall.req.readdir.refn.fs_id;
39                         break;
40                 case ORANGEFS_VFS_OP_SETATTR:
41                         fsid = op->upcall.req.setattr.refn.fs_id;
42                         break;
43                 case ORANGEFS_VFS_OP_SYMLINK:
44                         fsid = op->upcall.req.sym.parent_refn.fs_id;
45                         break;
46                 case ORANGEFS_VFS_OP_RENAME:
47                         fsid = op->upcall.req.rename.old_parent_refn.fs_id;
48                         break;
49                 case ORANGEFS_VFS_OP_STATFS:
50                         fsid = op->upcall.req.statfs.fs_id;
51                         break;
52                 case ORANGEFS_VFS_OP_TRUNCATE:
53                         fsid = op->upcall.req.truncate.refn.fs_id;
54                         break;
55                 case ORANGEFS_VFS_OP_RA_FLUSH:
56                         fsid = op->upcall.req.ra_cache_flush.refn.fs_id;
57                         break;
58                 case ORANGEFS_VFS_OP_FS_UMOUNT:
59                         fsid = op->upcall.req.fs_umount.fs_id;
60                         break;
61                 case ORANGEFS_VFS_OP_GETXATTR:
62                         fsid = op->upcall.req.getxattr.refn.fs_id;
63                         break;
64                 case ORANGEFS_VFS_OP_SETXATTR:
65                         fsid = op->upcall.req.setxattr.refn.fs_id;
66                         break;
67                 case ORANGEFS_VFS_OP_LISTXATTR:
68                         fsid = op->upcall.req.listxattr.refn.fs_id;
69                         break;
70                 case ORANGEFS_VFS_OP_REMOVEXATTR:
71                         fsid = op->upcall.req.removexattr.refn.fs_id;
72                         break;
73                 case ORANGEFS_VFS_OP_FSYNC:
74                         fsid = op->upcall.req.fsync.refn.fs_id;
75                         break;
76                 default:
77                         break;
78                 }
79         }
80         return fsid;
81 }
82
83 static int orangefs_inode_flags(struct ORANGEFS_sys_attr_s *attrs)
84 {
85         int flags = 0;
86         if (attrs->flags & ORANGEFS_IMMUTABLE_FL)
87                 flags |= S_IMMUTABLE;
88         else
89                 flags &= ~S_IMMUTABLE;
90         if (attrs->flags & ORANGEFS_APPEND_FL)
91                 flags |= S_APPEND;
92         else
93                 flags &= ~S_APPEND;
94         if (attrs->flags & ORANGEFS_NOATIME_FL)
95                 flags |= S_NOATIME;
96         else
97                 flags &= ~S_NOATIME;
98         return flags;
99 }
100
101 static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s *attrs)
102 {
103         int perm_mode = 0;
104
105         if (attrs->perms & ORANGEFS_O_EXECUTE)
106                 perm_mode |= S_IXOTH;
107         if (attrs->perms & ORANGEFS_O_WRITE)
108                 perm_mode |= S_IWOTH;
109         if (attrs->perms & ORANGEFS_O_READ)
110                 perm_mode |= S_IROTH;
111
112         if (attrs->perms & ORANGEFS_G_EXECUTE)
113                 perm_mode |= S_IXGRP;
114         if (attrs->perms & ORANGEFS_G_WRITE)
115                 perm_mode |= S_IWGRP;
116         if (attrs->perms & ORANGEFS_G_READ)
117                 perm_mode |= S_IRGRP;
118
119         if (attrs->perms & ORANGEFS_U_EXECUTE)
120                 perm_mode |= S_IXUSR;
121         if (attrs->perms & ORANGEFS_U_WRITE)
122                 perm_mode |= S_IWUSR;
123         if (attrs->perms & ORANGEFS_U_READ)
124                 perm_mode |= S_IRUSR;
125
126         if (attrs->perms & ORANGEFS_G_SGID)
127                 perm_mode |= S_ISGID;
128         if (attrs->perms & ORANGEFS_U_SUID)
129                 perm_mode |= S_ISUID;
130
131         return perm_mode;
132 }
133
134 /*
135  * NOTE: in kernel land, we never use the sys_attr->link_target for
136  * anything, so don't bother copying it into the sys_attr object here.
137  */
138 static inline int copy_attributes_from_inode(struct inode *inode,
139                                              struct ORANGEFS_sys_attr_s *attrs,
140                                              struct iattr *iattr)
141 {
142         umode_t tmp_mode;
143
144         if (!iattr || !inode || !attrs) {
145                 gossip_err("NULL iattr (%p), inode (%p), attrs (%p) "
146                            "in copy_attributes_from_inode!\n",
147                            iattr,
148                            inode,
149                            attrs);
150                 return -EINVAL;
151         }
152         /*
153          * We need to be careful to only copy the attributes out of the
154          * iattr object that we know are valid.
155          */
156         attrs->mask = 0;
157         if (iattr->ia_valid & ATTR_UID) {
158                 attrs->owner = from_kuid(&init_user_ns, iattr->ia_uid);
159                 attrs->mask |= ORANGEFS_ATTR_SYS_UID;
160                 gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner);
161         }
162         if (iattr->ia_valid & ATTR_GID) {
163                 attrs->group = from_kgid(&init_user_ns, iattr->ia_gid);
164                 attrs->mask |= ORANGEFS_ATTR_SYS_GID;
165                 gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group);
166         }
167
168         if (iattr->ia_valid & ATTR_ATIME) {
169                 attrs->mask |= ORANGEFS_ATTR_SYS_ATIME;
170                 if (iattr->ia_valid & ATTR_ATIME_SET) {
171                         attrs->atime = (time64_t)iattr->ia_atime.tv_sec;
172                         attrs->mask |= ORANGEFS_ATTR_SYS_ATIME_SET;
173                 }
174         }
175         if (iattr->ia_valid & ATTR_MTIME) {
176                 attrs->mask |= ORANGEFS_ATTR_SYS_MTIME;
177                 if (iattr->ia_valid & ATTR_MTIME_SET) {
178                         attrs->mtime = (time64_t)iattr->ia_mtime.tv_sec;
179                         attrs->mask |= ORANGEFS_ATTR_SYS_MTIME_SET;
180                 }
181         }
182         if (iattr->ia_valid & ATTR_CTIME)
183                 attrs->mask |= ORANGEFS_ATTR_SYS_CTIME;
184
185         /*
186          * ORANGEFS cannot set size with a setattr operation.  Probably not likely
187          * to be requested through the VFS, but just in case, don't worry about
188          * ATTR_SIZE
189          */
190
191         if (iattr->ia_valid & ATTR_MODE) {
192                 tmp_mode = iattr->ia_mode;
193                 if (tmp_mode & (S_ISVTX)) {
194                         if (is_root_handle(inode)) {
195                                 /*
196                                  * allow sticky bit to be set on root (since
197                                  * it shows up that way by default anyhow),
198                                  * but don't show it to the server
199                                  */
200                                 tmp_mode -= S_ISVTX;
201                         } else {
202                                 gossip_debug(GOSSIP_UTILS_DEBUG,
203                                              "User attempted to set sticky bit on non-root directory; returning EINVAL.\n");
204                                 return -EINVAL;
205                         }
206                 }
207
208                 if (tmp_mode & (S_ISUID)) {
209                         gossip_debug(GOSSIP_UTILS_DEBUG,
210                                      "Attempting to set setuid bit (not supported); returning EINVAL.\n");
211                         return -EINVAL;
212                 }
213
214                 attrs->perms = ORANGEFS_util_translate_mode(tmp_mode);
215                 attrs->mask |= ORANGEFS_ATTR_SYS_PERM;
216         }
217
218         return 0;
219 }
220
221 static int orangefs_inode_type(enum orangefs_ds_type objtype)
222 {
223         if (objtype == ORANGEFS_TYPE_METAFILE)
224                 return S_IFREG;
225         else if (objtype == ORANGEFS_TYPE_DIRECTORY)
226                 return S_IFDIR;
227         else if (objtype == ORANGEFS_TYPE_SYMLINK)
228                 return S_IFLNK;
229         else
230                 return -1;
231 }
232
233 static void orangefs_make_bad_inode(struct inode *inode)
234 {
235         if (is_root_handle(inode)) {
236                 /*
237                  * if this occurs, the pvfs2-client-core was killed but we
238                  * can't afford to lose the inode operations and such
239                  * associated with the root handle in any case.
240                  */
241                 gossip_debug(GOSSIP_UTILS_DEBUG,
242                              "*** NOT making bad root inode %pU\n",
243                              get_khandle_from_ino(inode));
244         } else {
245                 gossip_debug(GOSSIP_UTILS_DEBUG,
246                              "*** making bad inode %pU\n",
247                              get_khandle_from_ino(inode));
248                 make_bad_inode(inode);
249         }
250 }
251
252 static int orangefs_inode_is_stale(struct inode *inode,
253     struct ORANGEFS_sys_attr_s *attrs, char *link_target)
254 {
255         struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
256         int type = orangefs_inode_type(attrs->objtype);
257         /*
258          * If the inode type or symlink target have changed then this
259          * inode is stale.
260          */
261         if (type == -1 || !(inode->i_mode & type)) {
262                 orangefs_make_bad_inode(inode);
263                 return 1;
264         }
265         if (type == S_IFLNK && strncmp(orangefs_inode->link_target,
266             link_target, ORANGEFS_NAME_MAX)) {
267                 orangefs_make_bad_inode(inode);
268                 return 1;
269         }
270         return 0;
271 }
272
273 int orangefs_inode_getattr(struct inode *inode, int new, int bypass,
274     u32 request_mask)
275 {
276         struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
277         struct orangefs_kernel_op_s *new_op;
278         loff_t inode_size, rounded_up_size;
279         int ret, type;
280
281         gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
282             get_khandle_from_ino(inode));
283
284         if (!new && !bypass) {
285                 /*
286                  * Must have all the attributes in the mask and be within cache
287                  * time.
288                  */
289                 if ((request_mask & orangefs_inode->getattr_mask) ==
290                     request_mask &&
291                     time_before(jiffies, orangefs_inode->getattr_time))
292                         return 0;
293         }
294
295         new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
296         if (!new_op)
297                 return -ENOMEM;
298         new_op->upcall.req.getattr.refn = orangefs_inode->refn;
299         /*
300          * Size is the hardest attribute to get.  The incremental cost of any
301          * other attribute is essentially zero.
302          */
303         if (request_mask & STATX_SIZE || new)
304                 new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_ALL_NOHINT;
305         else
306                 new_op->upcall.req.getattr.mask =
307                     ORANGEFS_ATTR_SYS_ALL_NOHINT & ~ORANGEFS_ATTR_SYS_SIZE;
308
309         ret = service_operation(new_op, __func__,
310             get_interruptible_flag(inode));
311         if (ret != 0)
312                 goto out;
313
314         if (!new) {
315                 ret = orangefs_inode_is_stale(inode,
316                     &new_op->downcall.resp.getattr.attributes,
317                     new_op->downcall.resp.getattr.link_target);
318                 if (ret) {
319                         ret = -ESTALE;
320                         goto out;
321                 }
322         }
323
324         type = orangefs_inode_type(new_op->
325             downcall.resp.getattr.attributes.objtype);
326         switch (type) {
327         case S_IFREG:
328                 inode->i_flags = orangefs_inode_flags(&new_op->
329                     downcall.resp.getattr.attributes);
330                 if (request_mask & STATX_SIZE || new) {
331                         inode_size = (loff_t)new_op->
332                             downcall.resp.getattr.attributes.size;
333                         rounded_up_size =
334                             (inode_size + (4096 - (inode_size % 4096)));
335                         inode->i_size = inode_size;
336                         orangefs_inode->blksize =
337                             new_op->downcall.resp.getattr.attributes.blksize;
338                         spin_lock(&inode->i_lock);
339                         inode->i_bytes = inode_size;
340                         inode->i_blocks =
341                             (unsigned long)(rounded_up_size / 512);
342                         spin_unlock(&inode->i_lock);
343                 }
344                 break;
345         case S_IFDIR:
346                 if (request_mask & STATX_SIZE || new) {
347                         inode->i_size = PAGE_SIZE;
348                         orangefs_inode->blksize = i_blocksize(inode);
349                         spin_lock(&inode->i_lock);
350                         inode_set_bytes(inode, inode->i_size);
351                         spin_unlock(&inode->i_lock);
352                 }
353                 set_nlink(inode, 1);
354                 break;
355         case S_IFLNK:
356                 if (new) {
357                         inode->i_size = (loff_t)strlen(new_op->
358                             downcall.resp.getattr.link_target);
359                         orangefs_inode->blksize = i_blocksize(inode);
360                         ret = strscpy(orangefs_inode->link_target,
361                             new_op->downcall.resp.getattr.link_target,
362                             ORANGEFS_NAME_MAX);
363                         if (ret == -E2BIG) {
364                                 ret = -EIO;
365                                 goto out;
366                         }
367                         inode->i_link = orangefs_inode->link_target;
368                 }
369                 break;
370         /* i.e. -1 */
371         default:
372                 /* XXX: ESTALE?  This is what is done if it is not new. */
373                 orangefs_make_bad_inode(inode);
374                 ret = -ESTALE;
375                 goto out;
376         }
377
378         inode->i_uid = make_kuid(&init_user_ns, new_op->
379             downcall.resp.getattr.attributes.owner);
380         inode->i_gid = make_kgid(&init_user_ns, new_op->
381             downcall.resp.getattr.attributes.group);
382         inode->i_atime.tv_sec = (time64_t)new_op->
383             downcall.resp.getattr.attributes.atime;
384         inode->i_mtime.tv_sec = (time64_t)new_op->
385             downcall.resp.getattr.attributes.mtime;
386         inode->i_ctime.tv_sec = (time64_t)new_op->
387             downcall.resp.getattr.attributes.ctime;
388         inode->i_atime.tv_nsec = 0;
389         inode->i_mtime.tv_nsec = 0;
390         inode->i_ctime.tv_nsec = 0;
391
392         /* special case: mark the root inode as sticky */
393         inode->i_mode = type | (is_root_handle(inode) ? S_ISVTX : 0) |
394             orangefs_inode_perms(&new_op->downcall.resp.getattr.attributes);
395
396         orangefs_inode->getattr_time = jiffies +
397             orangefs_getattr_timeout_msecs*HZ/1000;
398         if (request_mask & STATX_SIZE || new)
399                 orangefs_inode->getattr_mask = STATX_BASIC_STATS;
400         else
401                 orangefs_inode->getattr_mask = STATX_BASIC_STATS & ~STATX_SIZE;
402         ret = 0;
403 out:
404         op_release(new_op);
405         return ret;
406 }
407
408 int orangefs_inode_check_changed(struct inode *inode)
409 {
410         struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
411         struct orangefs_kernel_op_s *new_op;
412         int ret;
413
414         gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
415             get_khandle_from_ino(inode));
416
417         new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
418         if (!new_op)
419                 return -ENOMEM;
420         new_op->upcall.req.getattr.refn = orangefs_inode->refn;
421         new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_TYPE |
422             ORANGEFS_ATTR_SYS_LNK_TARGET;
423
424         ret = service_operation(new_op, __func__,
425             get_interruptible_flag(inode));
426         if (ret != 0)
427                 goto out;
428
429         ret = orangefs_inode_is_stale(inode,
430             &new_op->downcall.resp.getattr.attributes,
431             new_op->downcall.resp.getattr.link_target);
432 out:
433         op_release(new_op);
434         return ret;
435 }
436
437 /*
438  * issues a orangefs setattr request to make sure the new attribute values
439  * take effect if successful.  returns 0 on success; -errno otherwise
440  */
441 int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr)
442 {
443         struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
444         struct orangefs_kernel_op_s *new_op;
445         int ret;
446
447         new_op = op_alloc(ORANGEFS_VFS_OP_SETATTR);
448         if (!new_op)
449                 return -ENOMEM;
450
451         new_op->upcall.req.setattr.refn = orangefs_inode->refn;
452         ret = copy_attributes_from_inode(inode,
453                        &new_op->upcall.req.setattr.attributes,
454                        iattr);
455         if (ret >= 0) {
456                 ret = service_operation(new_op, __func__,
457                                 get_interruptible_flag(inode));
458
459                 gossip_debug(GOSSIP_UTILS_DEBUG,
460                              "orangefs_inode_setattr: returning %d\n",
461                              ret);
462         }
463
464         op_release(new_op);
465
466         if (ret == 0)
467                 orangefs_inode->getattr_time = jiffies - 1;
468
469         return ret;
470 }
471
472 /*
473  * The following is a very dirty hack that is now a permanent part of the
474  * ORANGEFS protocol. See protocol.h for more error definitions.
475  */
476
477 /* The order matches include/orangefs-types.h in the OrangeFS source. */
478 static int PINT_errno_mapping[] = {
479         0, EPERM, ENOENT, EINTR, EIO, ENXIO, EBADF, EAGAIN, ENOMEM,
480         EFAULT, EBUSY, EEXIST, ENODEV, ENOTDIR, EISDIR, EINVAL, EMFILE,
481         EFBIG, ENOSPC, EROFS, EMLINK, EPIPE, EDEADLK, ENAMETOOLONG,
482         ENOLCK, ENOSYS, ENOTEMPTY, ELOOP, EWOULDBLOCK, ENOMSG, EUNATCH,
483         EBADR, EDEADLOCK, ENODATA, ETIME, ENONET, EREMOTE, ECOMM,
484         EPROTO, EBADMSG, EOVERFLOW, ERESTART, EMSGSIZE, EPROTOTYPE,
485         ENOPROTOOPT, EPROTONOSUPPORT, EOPNOTSUPP, EADDRINUSE,
486         EADDRNOTAVAIL, ENETDOWN, ENETUNREACH, ENETRESET, ENOBUFS,
487         ETIMEDOUT, ECONNREFUSED, EHOSTDOWN, EHOSTUNREACH, EALREADY,
488         EACCES, ECONNRESET, ERANGE
489 };
490
491 int orangefs_normalize_to_errno(__s32 error_code)
492 {
493         __u32 i;
494
495         /* Success */
496         if (error_code == 0) {
497                 return 0;
498         /*
499          * This shouldn't ever happen. If it does it should be fixed on the
500          * server.
501          */
502         } else if (error_code > 0) {
503                 gossip_err("orangefs: error status receieved.\n");
504                 gossip_err("orangefs: assuming error code is inverted.\n");
505                 error_code = -error_code;
506         }
507
508         /*
509          * XXX: This is very bad since error codes from ORANGEFS may not be
510          * suitable for return into userspace.
511          */
512
513         /*
514          * Convert ORANGEFS error values into errno values suitable for return
515          * from the kernel.
516          */
517         if ((-error_code) & ORANGEFS_NON_ERRNO_ERROR_BIT) {
518                 if (((-error_code) &
519                     (ORANGEFS_ERROR_NUMBER_BITS|ORANGEFS_NON_ERRNO_ERROR_BIT|
520                     ORANGEFS_ERROR_BIT)) == ORANGEFS_ECANCEL) {
521                         /*
522                          * cancellation error codes generally correspond to
523                          * a timeout from the client's perspective
524                          */
525                         error_code = -ETIMEDOUT;
526                 } else {
527                         /* assume a default error code */
528                         gossip_err("orangefs: warning: got error code without errno equivalent: %d.\n", error_code);
529                         error_code = -EINVAL;
530                 }
531
532         /* Convert ORANGEFS encoded errno values into regular errno values. */
533         } else if ((-error_code) & ORANGEFS_ERROR_BIT) {
534                 i = (-error_code) & ~(ORANGEFS_ERROR_BIT|ORANGEFS_ERROR_CLASS_BITS);
535                 if (i < ARRAY_SIZE(PINT_errno_mapping))
536                         error_code = -PINT_errno_mapping[i];
537                 else
538                         error_code = -EINVAL;
539
540         /*
541          * Only ORANGEFS protocol error codes should ever come here. Otherwise
542          * there is a bug somewhere.
543          */
544         } else {
545                 gossip_err("orangefs: orangefs_normalize_to_errno: got error code which is not from ORANGEFS.\n");
546                 error_code = -EINVAL;
547         }
548         return error_code;
549 }
550
551 #define NUM_MODES 11
552 __s32 ORANGEFS_util_translate_mode(int mode)
553 {
554         int ret = 0;
555         int i = 0;
556         static int modes[NUM_MODES] = {
557                 S_IXOTH, S_IWOTH, S_IROTH,
558                 S_IXGRP, S_IWGRP, S_IRGRP,
559                 S_IXUSR, S_IWUSR, S_IRUSR,
560                 S_ISGID, S_ISUID
561         };
562         static int orangefs_modes[NUM_MODES] = {
563                 ORANGEFS_O_EXECUTE, ORANGEFS_O_WRITE, ORANGEFS_O_READ,
564                 ORANGEFS_G_EXECUTE, ORANGEFS_G_WRITE, ORANGEFS_G_READ,
565                 ORANGEFS_U_EXECUTE, ORANGEFS_U_WRITE, ORANGEFS_U_READ,
566                 ORANGEFS_G_SGID, ORANGEFS_U_SUID
567         };
568
569         for (i = 0; i < NUM_MODES; i++)
570                 if (mode & modes[i])
571                         ret |= orangefs_modes[i];
572
573         return ret;
574 }
575 #undef NUM_MODES