fs/fcntl.c

   1 // SPDX-License-Identifier: GPL-2.0
   2 /*
   3  *  linux/fs/fcntl.c
   4  *
   5  *  Copyright (C) 1991, 1992  Linus Torvalds
   6  */
   7
   8 #include <linux/syscalls.h>
   9 #include <linux/init.h>
  10 #include <linux/mm.h>
  11 #include <linux/sched/task.h>
  12 #include <linux/fs.h>
  13 #include <linux/file.h>
  14 #include <linux/fdtable.h>
  15 #include <linux/capability.h>
  16 #include <linux/dnotify.h>
  17 #include <linux/slab.h>
  18 #include <linux/module.h>
  19 #include <linux/pipe_fs_i.h>
  20 #include <linux/security.h>
  21 #include <linux/ptrace.h>
  22 #include <linux/signal.h>
  23 #include <linux/rcupdate.h>
  24 #include <linux/pid_namespace.h>
  25 #include <linux/user_namespace.h>
  26 #include <linux/memfd.h>
  27 #include <linux/compat.h>
  28
  29 #include <linux/poll.h>
  30 #include <asm/siginfo.h>
  31 #include <linux/uaccess.h>
  32
  33 #define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
  34
  35 static int setfl(int fd, struct file * filp, unsigned long arg)
  36 {
  37         struct inode * inode = file_inode(filp);
  38         int error = 0;
  39
  40         /*
  41          * O_APPEND cannot be cleared if the file is marked as append-only
  42          * and the file is open for write.
  43          */
  44         if (((arg ^ filp->f_flags) & O_APPEND) && IS_APPEND(inode))
  45                 return -EPERM;
  46
  47         /* O_NOATIME can only be set by the owner or superuser */
  48         if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
  49                 if (!inode_owner_or_capable(inode))
  50                         return -EPERM;
  51
  52         /* required for strict SunOS emulation */
  53         if (O_NONBLOCK != O_NDELAY)
  54                if (arg & O_NDELAY)
  55                    arg |= O_NONBLOCK;
  56
  57         /* Pipe packetized mode is controlled by O_DIRECT flag */
  58         if (!S_ISFIFO(inode->i_mode) && (arg & O_DIRECT)) {
  59                 if (!filp->f_mapping || !filp->f_mapping->a_ops ||
  60                         !filp->f_mapping->a_ops->direct_IO)
  61                                 return -EINVAL;
  62         }
  63
  64         if (filp->f_op->check_flags)
  65                 error = filp->f_op->check_flags(arg);
  66         if (error)
  67                 return error;
  68
  69         /*
  70          * ->fasync() is responsible for setting the FASYNC bit.
  71          */
  72         if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op->fasync) {
  73                 error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
  74                 if (error < 0)
  75                         goto out;
  76                 if (error > 0)
  77                         error = 0;
  78         }
  79         spin_lock(&filp->f_lock);
  80         filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
  81         spin_unlock(&filp->f_lock);
  82
  83  out:
  84         return error;
  85 }
  86
  87 static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
  88                      int force)
  89 {
  90         write_lock_irq(&filp->f_owner.lock);
  91         if (force || !filp->f_owner.pid) {
  92                 put_pid(filp->f_owner.pid);
  93                 filp->f_owner.pid = get_pid(pid);
  94                 filp->f_owner.pid_type = type;
  95
  96                 if (pid) {
  97                         const struct cred *cred = current_cred();
  98                         filp->f_owner.uid = cred->uid;
  99                         filp->f_owner.euid = cred->euid;
 100                 }
 101         }
 102         write_unlock_irq(&filp->f_owner.lock);
 103 }
 104
 105 void __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
 106                 int force)
 107 {
 108         security_file_set_fowner(filp);
 109         f_modown(filp, pid, type, force);
 110 }
 111 EXPORT_SYMBOL(__f_setown);
 112
 113 int f_setown(struct file *filp, unsigned long arg, int force)
 114 {
 115         enum pid_type type;
 116         struct pid *pid = NULL;
 117         int who = arg, ret = 0;
 118
 119         type = PIDTYPE_TGID;
 120         if (who < 0) {
 121                 /* avoid overflow below */
 122                 if (who == INT_MIN)
 123                         return -EINVAL;
 124
 125                 type = PIDTYPE_PGID;
 126                 who = -who;
 127         }
 128
 129         rcu_read_lock();
 130         if (who) {
 131                 pid = find_vpid(who);
 132                 if (!pid)
 133                         ret = -ESRCH;
 134         }
 135
 136         if (!ret)
 137                 __f_setown(filp, pid, type, force);
 138         rcu_read_unlock();
 139
 140         return ret;
 141 }
 142 EXPORT_SYMBOL(f_setown);
 143
 144 void f_delown(struct file *filp)
 145 {
 146         f_modown(filp, NULL, PIDTYPE_TGID, 1);
 147 }
 148
 149 pid_t f_getown(struct file *filp)
 150 {
 151         pid_t pid = 0;
 152         read_lock(&filp->f_owner.lock);
 153         rcu_read_lock();
 154         if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type)) {
 155                 pid = pid_vnr(filp->f_owner.pid);
 156                 if (filp->f_owner.pid_type == PIDTYPE_PGID)
 157                         pid = -pid;
 158         }
 159         rcu_read_unlock();
 160         read_unlock(&filp->f_owner.lock);
 161         return pid;
 162 }
 163
 164 static int f_setown_ex(struct file *filp, unsigned long arg)
 165 {
 166         struct f_owner_ex __user *owner_p = (void __user *)arg;
 167         struct f_owner_ex owner;
 168         struct pid *pid;
 169         int type;
 170         int ret;
 171
 172         ret = copy_from_user(&owner, owner_p, sizeof(owner));
 173         if (ret)
 174                 return -EFAULT;
 175
 176         switch (owner.type) {
 177         case F_OWNER_TID:
 178                 type = PIDTYPE_PID;
 179                 break;
 180
 181         case F_OWNER_PID:
 182                 type = PIDTYPE_TGID;
 183                 break;
 184
 185         case F_OWNER_PGRP:
 186                 type = PIDTYPE_PGID;
 187                 break;
 188
 189         default:
 190                 return -EINVAL;
 191         }
 192
 193         rcu_read_lock();
 194         pid = find_vpid(owner.pid);
 195         if (owner.pid && !pid)
 196                 ret = -ESRCH;
 197         else
 198                  __f_setown(filp, pid, type, 1);
 199         rcu_read_unlock();
 200
 201         return ret;
 202 }
 203
 204 static int f_getown_ex(struct file *filp, unsigned long arg)
 205 {
 206         struct f_owner_ex __user *owner_p = (void __user *)arg;
 207         struct f_owner_ex owner = {};
 208         int ret = 0;
 209
 210         read_lock(&filp->f_owner.lock);
 211         rcu_read_lock();
 212         if (pid_task(filp->f_owner.pid, filp->f_owner.pid_type))
 213                 owner.pid = pid_vnr(filp->f_owner.pid);
 214         rcu_read_unlock();
 215         switch (filp->f_owner.pid_type) {
 216         case PIDTYPE_PID:
 217                 owner.type = F_OWNER_TID;
 218                 break;
 219
 220         case PIDTYPE_TGID:
 221                 owner.type = F_OWNER_PID;
 222                 break;
 223
 224         case PIDTYPE_PGID:
 225                 owner.type = F_OWNER_PGRP;
 226                 break;
 227
 228         default:
 229                 WARN_ON(1);
 230                 ret = -EINVAL;
 231                 break;
 232         }
 233         read_unlock(&filp->f_owner.lock);
 234
 235         if (!ret) {
 236                 ret = copy_to_user(owner_p, &owner, sizeof(owner));
 237                 if (ret)
 238                         ret = -EFAULT;
 239         }
 240         return ret;
 241 }
 242
 243 #ifdef CONFIG_CHECKPOINT_RESTORE
 244 static int f_getowner_uids(struct file *filp, unsigned long arg)
 245 {
 246         struct user_namespace *user_ns = current_user_ns();
 247         uid_t __user *dst = (void __user *)arg;
 248         uid_t src[2];
 249         int err;
 250
 251         read_lock(&filp->f_owner.lock);
 252         src[0] = from_kuid(user_ns, filp->f_owner.uid);
 253         src[1] = from_kuid(user_ns, filp->f_owner.euid);
 254         read_unlock(&filp->f_owner.lock);
 255
 256         err  = put_user(src[0], &dst[0]);
 257         err |= put_user(src[1], &dst[1]);
 258
 259         return err;
 260 }
 261 #else
 262 static int f_getowner_uids(struct file *filp, unsigned long arg)
 263 {
 264         return -EINVAL;
 265 }
 266 #endif
 267
 268 static bool rw_hint_valid(enum rw_hint hint)
 269 {
 270         switch (hint) {
 271         case RWH_WRITE_LIFE_NOT_SET:
 272         case RWH_WRITE_LIFE_NONE:
 273         case RWH_WRITE_LIFE_SHORT:
 274         case RWH_WRITE_LIFE_MEDIUM:
 275         case RWH_WRITE_LIFE_LONG:
 276         case RWH_WRITE_LIFE_EXTREME:
 277                 return true;
 278         default:
 279                 return false;
 280         }
 281 }
 282
 283 static long fcntl_rw_hint(struct file *file, unsigned int cmd,
 284                           unsigned long arg)
 285 {
 286         struct inode *inode = file_inode(file);
 287         u64 __user *argp = (u64 __user *)arg;
 288         enum rw_hint hint;
 289         u64 h;
 290
 291         switch (cmd) {
 292         case F_GET_FILE_RW_HINT:
 293                 h = file_write_hint(file);
 294                 if (copy_to_user(argp, &h, sizeof(*argp)))
 295                         return -EFAULT;
 296                 return 0;
 297         case F_SET_FILE_RW_HINT:
 298                 if (copy_from_user(&h, argp, sizeof(h)))
 299                         return -EFAULT;
 300                 hint = (enum rw_hint) h;
 301                 if (!rw_hint_valid(hint))
 302                         return -EINVAL;
 303
 304                 spin_lock(&file->f_lock);
 305                 file->f_write_hint = hint;
 306                 spin_unlock(&file->f_lock);
 307                 return 0;
 308         case F_GET_RW_HINT:
 309                 h = inode->i_write_hint;
 310                 if (copy_to_user(argp, &h, sizeof(*argp)))
 311                         return -EFAULT;
 312                 return 0;
 313         case F_SET_RW_HINT:
 314                 if (copy_from_user(&h, argp, sizeof(h)))
 315                         return -EFAULT;
 316                 hint = (enum rw_hint) h;
 317                 if (!rw_hint_valid(hint))
 318                         return -EINVAL;
 319
 320                 inode_lock(inode);
 321                 inode->i_write_hint = hint;
 322                 inode_unlock(inode);
 323                 return 0;
 324         default:
 325                 return -EINVAL;
 326         }
 327 }
 328
 329 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 330                 struct file *filp)
 331 {
 332         void __user *argp = (void __user *)arg;
 333         struct flock flock;
 334         long err = -EINVAL;
 335
 336         switch (cmd) {
 337         case F_DUPFD:
 338                 err = f_dupfd(arg, filp, 0);
 339                 break;
 340         case F_DUPFD_CLOEXEC:
 341                 err = f_dupfd(arg, filp, O_CLOEXEC);
 342                 break;
 343         case F_GETFD:
 344                 err = get_close_on_exec(fd) ? FD_CLOEXEC : 0;
 345                 break;
 346         case F_SETFD:
 347                 err = 0;
 348                 set_close_on_exec(fd, arg & FD_CLOEXEC);
 349                 break;
 350         case F_GETFL:
 351                 err = filp->f_flags;
 352                 break;
 353         case F_SETFL:
 354                 err = setfl(fd, filp, arg);
 355                 break;
 356 #if BITS_PER_LONG != 32
 357         /* 32-bit arches must use fcntl64() */
 358         case F_OFD_GETLK:
 359 #endif
 360         case F_GETLK:
 361                 if (copy_from_user(&flock, argp, sizeof(flock)))
 362                         return -EFAULT;
 363                 err = fcntl_getlk(filp, cmd, &flock);
 364                 if (!err && copy_to_user(argp, &flock, sizeof(flock)))
 365                         return -EFAULT;
 366                 break;
 367 #if BITS_PER_LONG != 32
 368         /* 32-bit arches must use fcntl64() */
 369         case F_OFD_SETLK:
 370         case F_OFD_SETLKW:
 371 #endif
 372                 fallthrough;
 373         case F_SETLK:
 374         case F_SETLKW:
 375                 if (copy_from_user(&flock, argp, sizeof(flock)))
 376                         return -EFAULT;
 377                 err = fcntl_setlk(fd, filp, cmd, &flock);
 378                 break;
 379         case F_GETOWN:
 380                 /*
 381                  * XXX If f_owner is a process group, the
 382                  * negative return value will get converted
 383                  * into an error.  Oops.  If we keep the
 384                  * current syscall conventions, the only way
 385                  * to fix this will be in libc.
 386                  */
 387                 err = f_getown(filp);
 388                 force_successful_syscall_return();
 389                 break;
 390         case F_SETOWN:
 391                 err = f_setown(filp, arg, 1);
 392                 break;
 393         case F_GETOWN_EX:
 394                 err = f_getown_ex(filp, arg);
 395                 break;
 396         case F_SETOWN_EX:
 397                 err = f_setown_ex(filp, arg);
 398                 break;
 399         case F_GETOWNER_UIDS:
 400                 err = f_getowner_uids(filp, arg);
 401                 break;
 402         case F_GETSIG:
 403                 err = filp->f_owner.signum;
 404                 break;
 405         case F_SETSIG:
 406                 /* arg == 0 restores default behaviour. */
 407                 if (!valid_signal(arg)) {
 408                         break;
 409                 }
 410                 err = 0;
 411                 filp->f_owner.signum = arg;
 412                 break;
 413         case F_GETLEASE:
 414                 err = fcntl_getlease(filp);
 415                 break;
 416         case F_SETLEASE:
 417                 err = fcntl_setlease(fd, filp, arg);
 418                 break;
 419         case F_NOTIFY:
 420                 err = fcntl_dirnotify(fd, filp, arg);
 421                 break;
 422         case F_SETPIPE_SZ:
 423         case F_GETPIPE_SZ:
 424                 err = pipe_fcntl(filp, cmd, arg);
 425                 break;
 426         case F_ADD_SEALS:
 427         case F_GET_SEALS:
 428                 err = memfd_fcntl(filp, cmd, arg);
 429                 break;
 430         case F_GET_RW_HINT:
 431         case F_SET_RW_HINT:
 432         case F_GET_FILE_RW_HINT:
 433         case F_SET_FILE_RW_HINT:
 434                 err = fcntl_rw_hint(filp, cmd, arg);
 435                 break;
 436         default:
 437                 break;
 438         }
 439         return err;
 440 }
 441
 442 static int check_fcntl_cmd(unsigned cmd)
 443 {
 444         switch (cmd) {
 445         case F_DUPFD:
 446         case F_DUPFD_CLOEXEC:
 447         case F_GETFD:
 448         case F_SETFD:
 449         case F_GETFL:
 450                 return 1;
 451         }
 452         return 0;
 453 }
 454
 455 SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, unsigned long, arg)
 456 {
 457         struct fd f = fdget_raw(fd);
 458         long err = -EBADF;
 459
 460         if (!f.file)
 461                 goto out;
 462
 463         if (unlikely(f.file->f_mode & FMODE_PATH)) {
 464                 if (!check_fcntl_cmd(cmd))
 465                         goto out1;
 466         }
 467
 468         err = security_file_fcntl(f.file, cmd, arg);
 469         if (!err)
 470                 err = do_fcntl(fd, cmd, arg, f.file);
 471
 472 out1:
 473         fdput(f);
 474 out:
 475         return err;
 476 }
 477
 478 #if BITS_PER_LONG == 32
 479 SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
 480                 unsigned long, arg)
 481 {
 482         void __user *argp = (void __user *)arg;
 483         struct fd f = fdget_raw(fd);
 484         struct flock64 flock;
 485         long err = -EBADF;
 486
 487         if (!f.file)
 488                 goto out;
 489
 490         if (unlikely(f.file->f_mode & FMODE_PATH)) {
 491                 if (!check_fcntl_cmd(cmd))
 492                         goto out1;
 493         }
 494
 495         err = security_file_fcntl(f.file, cmd, arg);
 496         if (err)
 497                 goto out1;
 498
 499         switch (cmd) {
 500         case F_GETLK64:
 501         case F_OFD_GETLK:
 502                 err = -EFAULT;
 503                 if (copy_from_user(&flock, argp, sizeof(flock)))
 504                         break;
 505                 err = fcntl_getlk64(f.file, cmd, &flock);
 506                 if (!err && copy_to_user(argp, &flock, sizeof(flock)))
 507                         err = -EFAULT;
 508                 break;
 509         case F_SETLK64:
 510         case F_SETLKW64:
 511         case F_OFD_SETLK:
 512         case F_OFD_SETLKW:
 513                 err = -EFAULT;
 514                 if (copy_from_user(&flock, argp, sizeof(flock)))
 515                         break;
 516                 err = fcntl_setlk64(fd, f.file, cmd, &flock);
 517                 break;
 518         default:
 519                 err = do_fcntl(fd, cmd, arg, f.file);
 520                 break;
 521         }
 522 out1:
 523         fdput(f);
 524 out:
 525         return err;
 526 }
 527 #endif
 528
 529 #ifdef CONFIG_COMPAT
 530 /* careful - don't use anywhere else */
 531 #define copy_flock_fields(dst, src)             \
 532         (dst)->l_type = (src)->l_type;          \
 533         (dst)->l_whence = (src)->l_whence;      \
 534         (dst)->l_start = (src)->l_start;        \
 535         (dst)->l_len = (src)->l_len;            \
 536         (dst)->l_pid = (src)->l_pid;
 537
 538 static int get_compat_flock(struct flock *kfl, const struct compat_flock __user *ufl)
 539 {
 540         struct compat_flock fl;
 541
 542         if (copy_from_user(&fl, ufl, sizeof(struct compat_flock)))
 543                 return -EFAULT;
 544         copy_flock_fields(kfl, &fl);
 545         return 0;
 546 }
 547
 548 static int get_compat_flock64(struct flock *kfl, const struct compat_flock64 __user *ufl)
 549 {
 550         struct compat_flock64 fl;
 551
 552         if (copy_from_user(&fl, ufl, sizeof(struct compat_flock64)))
 553                 return -EFAULT;
 554         copy_flock_fields(kfl, &fl);
 555         return 0;
 556 }
 557
 558 static int put_compat_flock(const struct flock *kfl, struct compat_flock __user *ufl)
 559 {
 560         struct compat_flock fl;
 561
 562         memset(&fl, 0, sizeof(struct compat_flock));
 563         copy_flock_fields(&fl, kfl);
 564         if (copy_to_user(ufl, &fl, sizeof(struct compat_flock)))
 565                 return -EFAULT;
 566         return 0;
 567 }
 568
 569 static int put_compat_flock64(const struct flock *kfl, struct compat_flock64 __user *ufl)
 570 {
 571         struct compat_flock64 fl;
 572
 573         BUILD_BUG_ON(sizeof(kfl->l_start) > sizeof(ufl->l_start));
 574         BUILD_BUG_ON(sizeof(kfl->l_len) > sizeof(ufl->l_len));
 575
 576         memset(&fl, 0, sizeof(struct compat_flock64));
 577         copy_flock_fields(&fl, kfl);
 578         if (copy_to_user(ufl, &fl, sizeof(struct compat_flock64)))
 579                 return -EFAULT;
 580         return 0;
 581 }
 582 #undef copy_flock_fields
 583
 584 static unsigned int
 585 convert_fcntl_cmd(unsigned int cmd)
 586 {
 587         switch (cmd) {
 588         case F_GETLK64:
 589                 return F_GETLK;
 590         case F_SETLK64:
 591                 return F_SETLK;
 592         case F_SETLKW64:
 593                 return F_SETLKW;
 594         }
 595
 596         return cmd;
 597 }
 598
 599 /*
 600  * GETLK was successful and we need to return the data, but it needs to fit in
 601  * the compat structure.
 602  * l_start shouldn't be too big, unless the original start + end is greater than
 603  * COMPAT_OFF_T_MAX, in which case the app was asking for trouble, so we return
 604  * -EOVERFLOW in that case.  l_len could be too big, in which case we just
 605  * truncate it, and only allow the app to see that part of the conflicting lock
 606  * that might make sense to it anyway
 607  */
 608 static int fixup_compat_flock(struct flock *flock)
 609 {
 610         if (flock->l_start > COMPAT_OFF_T_MAX)
 611                 return -EOVERFLOW;
 612         if (flock->l_len > COMPAT_OFF_T_MAX)
 613                 flock->l_len = COMPAT_OFF_T_MAX;
 614         return 0;
 615 }
 616
 617 static long do_compat_fcntl64(unsigned int fd, unsigned int cmd,
 618                              compat_ulong_t arg)
 619 {
 620         struct fd f = fdget_raw(fd);
 621         struct flock flock;
 622         long err = -EBADF;
 623
 624         if (!f.file)
 625                 return err;
 626
 627         if (unlikely(f.file->f_mode & FMODE_PATH)) {
 628                 if (!check_fcntl_cmd(cmd))
 629                         goto out_put;
 630         }
 631
 632         err = security_file_fcntl(f.file, cmd, arg);
 633         if (err)
 634                 goto out_put;
 635
 636         switch (cmd) {
 637         case F_GETLK:
 638                 err = get_compat_flock(&flock, compat_ptr(arg));
 639                 if (err)
 640                         break;
 641                 err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
 642                 if (err)
 643                         break;
 644                 err = fixup_compat_flock(&flock);
 645                 if (!err)
 646                         err = put_compat_flock(&flock, compat_ptr(arg));
 647                 break;
 648         case F_GETLK64:
 649         case F_OFD_GETLK:
 650                 err = get_compat_flock64(&flock, compat_ptr(arg));
 651                 if (err)
 652                         break;
 653                 err = fcntl_getlk(f.file, convert_fcntl_cmd(cmd), &flock);
 654                 if (!err)
 655                         err = put_compat_flock64(&flock, compat_ptr(arg));
 656                 break;
 657         case F_SETLK:
 658         case F_SETLKW:
 659                 err = get_compat_flock(&flock, compat_ptr(arg));
 660                 if (err)
 661                         break;
 662                 err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
 663                 break;
 664         case F_SETLK64:
 665         case F_SETLKW64:
 666         case F_OFD_SETLK:
 667         case F_OFD_SETLKW:
 668                 err = get_compat_flock64(&flock, compat_ptr(arg));
 669                 if (err)
 670                         break;
 671                 err = fcntl_setlk(fd, f.file, convert_fcntl_cmd(cmd), &flock);
 672                 break;
 673         default:
 674                 err = do_fcntl(fd, cmd, arg, f.file);
 675                 break;
 676         }
 677 out_put:
 678         fdput(f);
 679         return err;
 680 }
 681
 682 COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd,
 683                        compat_ulong_t, arg)
 684 {
 685         return do_compat_fcntl64(fd, cmd, arg);
 686 }
 687
 688 COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd,
 689                        compat_ulong_t, arg)
 690 {
 691         switch (cmd) {
 692         case F_GETLK64:
 693         case F_SETLK64:
 694         case F_SETLKW64:
 695         case F_OFD_GETLK:
 696         case F_OFD_SETLK:
 697         case F_OFD_SETLKW:
 698                 return -EINVAL;
 699         }
 700         return do_compat_fcntl64(fd, cmd, arg);
 701 }
 702 #endif
 703
 704 /* Table to convert sigio signal codes into poll band bitmaps */
 705
 706 static const __poll_t band_table[NSIGPOLL] = {
 707         EPOLLIN | EPOLLRDNORM,                  /* POLL_IN */
 708         EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND,   /* POLL_OUT */
 709         EPOLLIN | EPOLLRDNORM | EPOLLMSG,               /* POLL_MSG */
 710         EPOLLERR,                               /* POLL_ERR */
 711         EPOLLPRI | EPOLLRDBAND,                 /* POLL_PRI */
 712         EPOLLHUP | EPOLLERR                     /* POLL_HUP */
 713 };
 714
 715 static inline int sigio_perm(struct task_struct *p,
 716                              struct fown_struct *fown, int sig)
 717 {
 718         const struct cred *cred;
 719         int ret;
 720
 721         rcu_read_lock();
 722         cred = __task_cred(p);
 723         ret = ((uid_eq(fown->euid, GLOBAL_ROOT_UID) ||
 724                 uid_eq(fown->euid, cred->suid) || uid_eq(fown->euid, cred->uid) ||
 725                 uid_eq(fown->uid,  cred->suid) || uid_eq(fown->uid,  cred->uid)) &&
 726                !security_file_send_sigiotask(p, fown, sig));
 727         rcu_read_unlock();
 728         return ret;
 729 }
 730
 731 static void send_sigio_to_task(struct task_struct *p,
 732                                struct fown_struct *fown,
 733                                int fd, int reason, enum pid_type type)
 734 {
 735         /*
 736          * F_SETSIG can change ->signum lockless in parallel, make
 737          * sure we read it once and use the same value throughout.
 738          */
 739         int signum = READ_ONCE(fown->signum);
 740
 741         if (!sigio_perm(p, fown, signum))
 742                 return;
 743
 744         switch (signum) {
 745                 default: {
 746                         kernel_siginfo_t si;
 747
 748                         /* Queue a rt signal with the appropriate fd as its
 749                            value.  We use SI_SIGIO as the source, not
 750                            SI_KERNEL, since kernel signals always get
 751                            delivered even if we can't queue.  Failure to
 752                            queue in this case _should_ be reported; we fall
 753                            back to SIGIO in that case. --sct */
 754                         clear_siginfo(&si);
 755                         si.si_signo = signum;
 756                         si.si_errno = 0;
 757                         si.si_code  = reason;
 758                         /*
 759                          * Posix definies POLL_IN and friends to be signal
 760                          * specific si_codes for SIG_POLL.  Linux extended
 761                          * these si_codes to other signals in a way that is
 762                          * ambiguous if other signals also have signal
 763                          * specific si_codes.  In that case use SI_SIGIO instead
 764                          * to remove the ambiguity.
 765                          */
 766                         if ((signum != SIGPOLL) && sig_specific_sicodes(signum))
 767                                 si.si_code = SI_SIGIO;
 768
 769                         /* Make sure we are called with one of the POLL_*
 770                            reasons, otherwise we could leak kernel stack into
 771                            userspace.  */
 772                         BUG_ON((reason < POLL_IN) || ((reason - POLL_IN) >= NSIGPOLL));
 773                         if (reason - POLL_IN >= NSIGPOLL)
 774                                 si.si_band  = ~0L;
 775                         else
 776                                 si.si_band = mangle_poll(band_table[reason - POLL_IN]);
 777                         si.si_fd    = fd;
 778                         if (!do_send_sig_info(signum, &si, p, type))
 779                                 break;
 780                 }
 781                         fallthrough;    /* fall back on the old plain SIGIO signal */
 782                 case 0:
 783                         do_send_sig_info(SIGIO, SEND_SIG_PRIV, p, type);
 784         }
 785 }
 786
 787 void send_sigio(struct fown_struct *fown, int fd, int band)
 788 {
 789         struct task_struct *p;
 790         enum pid_type type;
 791         unsigned long flags;
 792         struct pid *pid;
 793
 794         read_lock_irqsave(&fown->lock, flags);
 795
 796         type = fown->pid_type;
 797         pid = fown->pid;
 798         if (!pid)
 799                 goto out_unlock_fown;
 800
 801         if (type <= PIDTYPE_TGID) {
 802                 rcu_read_lock();
 803                 p = pid_task(pid, PIDTYPE_PID);
 804                 if (p)
 805                         send_sigio_to_task(p, fown, fd, band, type);
 806                 rcu_read_unlock();
 807         } else {
 808                 read_lock(&tasklist_lock);
 809                 do_each_pid_task(pid, type, p) {
 810                         send_sigio_to_task(p, fown, fd, band, type);
 811                 } while_each_pid_task(pid, type, p);
 812                 read_unlock(&tasklist_lock);
 813         }
 814  out_unlock_fown:
 815         read_unlock_irqrestore(&fown->lock, flags);
 816 }
 817
 818 static void send_sigurg_to_task(struct task_struct *p,
 819                                 struct fown_struct *fown, enum pid_type type)
 820 {
 821         if (sigio_perm(p, fown, SIGURG))
 822                 do_send_sig_info(SIGURG, SEND_SIG_PRIV, p, type);
 823 }
 824
 825 int send_sigurg(struct fown_struct *fown)
 826 {
 827         struct task_struct *p;
 828         enum pid_type type;
 829         struct pid *pid;
 830         unsigned long flags;
 831         int ret = 0;
 832
 833         read_lock_irqsave(&fown->lock, flags);
 834
 835         type = fown->pid_type;
 836         pid = fown->pid;
 837         if (!pid)
 838                 goto out_unlock_fown;
 839
 840         ret = 1;
 841
 842         if (type <= PIDTYPE_TGID) {
 843                 rcu_read_lock();
 844                 p = pid_task(pid, PIDTYPE_PID);
 845                 if (p)
 846                         send_sigurg_to_task(p, fown, type);
 847                 rcu_read_unlock();
 848         } else {
 849                 read_lock(&tasklist_lock);
 850                 do_each_pid_task(pid, type, p) {
 851                         send_sigurg_to_task(p, fown, type);
 852                 } while_each_pid_task(pid, type, p);
 853                 read_unlock(&tasklist_lock);
 854         }
 855  out_unlock_fown:
 856         read_unlock_irqrestore(&fown->lock, flags);
 857         return ret;
 858 }
 859
 860 static DEFINE_SPINLOCK(fasync_lock);
 861 static struct kmem_cache *fasync_cache __read_mostly;
 862
 863 static void fasync_free_rcu(struct rcu_head *head)
 864 {
 865         kmem_cache_free(fasync_cache,
 866                         container_of(head, struct fasync_struct, fa_rcu));
 867 }
 868
 869 /*
 870  * Remove a fasync entry. If successfully removed, return
 871  * positive and clear the FASYNC flag. If no entry exists,
 872  * do nothing and return 0.
 873  *
 874  * NOTE! It is very important that the FASYNC flag always
 875  * match the state "is the filp on a fasync list".
 876  *
 877  */
 878 int fasync_remove_entry(struct file *filp, struct fasync_struct **fapp)
 879 {
 880         struct fasync_struct *fa, **fp;
 881         int result = 0;
 882
 883         spin_lock(&filp->f_lock);
 884         spin_lock(&fasync_lock);
 885         for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
 886                 if (fa->fa_file != filp)
 887                         continue;
 888
 889                 write_lock_irq(&fa->fa_lock);
 890                 fa->fa_file = NULL;
 891                 write_unlock_irq(&fa->fa_lock);
 892
 893                 *fp = fa->fa_next;
 894                 call_rcu(&fa->fa_rcu, fasync_free_rcu);
 895                 filp->f_flags &= ~FASYNC;
 896                 result = 1;
 897                 break;
 898         }
 899         spin_unlock(&fasync_lock);
 900         spin_unlock(&filp->f_lock);
 901         return result;
 902 }
 903
 904 struct fasync_struct *fasync_alloc(void)
 905 {
 906         return kmem_cache_alloc(fasync_cache, GFP_KERNEL);
 907 }
 908
 909 /*
 910  * NOTE! This can be used only for unused fasync entries:
 911  * entries that actually got inserted on the fasync list
 912  * need to be released by rcu - see fasync_remove_entry.
 913  */
 914 void fasync_free(struct fasync_struct *new)
 915 {
 916         kmem_cache_free(fasync_cache, new);
 917 }
 918
 919 /*
 920  * Insert a new entry into the fasync list.  Return the pointer to the
 921  * old one if we didn't use the new one.
 922  *
 923  * NOTE! It is very important that the FASYNC flag always
 924  * match the state "is the filp on a fasync list".
 925  */
 926 struct fasync_struct *fasync_insert_entry(int fd, struct file *filp, struct fasync_struct **fapp, struct fasync_struct *new)
 927 {
 928         struct fasync_struct *fa, **fp;
 929
 930         spin_lock(&filp->f_lock);
 931         spin_lock(&fasync_lock);
 932         for (fp = fapp; (fa = *fp) != NULL; fp = &fa->fa_next) {
 933                 if (fa->fa_file != filp)
 934                         continue;
 935
 936                 write_lock_irq(&fa->fa_lock);
 937                 fa->fa_fd = fd;
 938                 write_unlock_irq(&fa->fa_lock);
 939                 goto out;
 940         }
 941
 942         rwlock_init(&new->fa_lock);
 943         new->magic = FASYNC_MAGIC;
 944         new->fa_file = filp;
 945         new->fa_fd = fd;
 946         new->fa_next = *fapp;
 947         rcu_assign_pointer(*fapp, new);
 948         filp->f_flags |= FASYNC;
 949
 950 out:
 951         spin_unlock(&fasync_lock);
 952         spin_unlock(&filp->f_lock);
 953         return fa;
 954 }
 955
 956 /*
 957  * Add a fasync entry. Return negative on error, positive if
 958  * added, and zero if did nothing but change an existing one.
 959  */
 960 static int fasync_add_entry(int fd, struct file *filp, struct fasync_struct **fapp)
 961 {
 962         struct fasync_struct *new;
 963
 964         new = fasync_alloc();
 965         if (!new)
 966                 return -ENOMEM;
 967
 968         /*
 969          * fasync_insert_entry() returns the old (update) entry if
 970          * it existed.
 971          *
 972          * So free the (unused) new entry and return 0 to let the
 973          * caller know that we didn't add any new fasync entries.
 974          */
 975         if (fasync_insert_entry(fd, filp, fapp, new)) {
 976                 fasync_free(new);
 977                 return 0;
 978         }
 979
 980         return 1;
 981 }
 982
 983 /*
 984  * fasync_helper() is used by almost all character device drivers
 985  * to set up the fasync queue, and for regular files by the file
 986  * lease code. It returns negative on error, 0 if it did no changes
 987  * and positive if it added/deleted the entry.
 988  */
 989 int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fapp)
 990 {
 991         if (!on)
 992                 return fasync_remove_entry(filp, fapp);
 993         return fasync_add_entry(fd, filp, fapp);
 994 }
 995
 996 EXPORT_SYMBOL(fasync_helper);
 997
 998 /*
 999  * rcu_read_lock() is held
1000  */
1001 static void kill_fasync_rcu(struct fasync_struct *fa, int sig, int band)
1002 {
1003         while (fa) {
1004                 struct fown_struct *fown;
1005
1006                 if (fa->magic != FASYNC_MAGIC) {
1007                         printk(KERN_ERR "kill_fasync: bad magic number in "
1008                                "fasync_struct!\n");
1009                         return;
1010                 }
1011                 read_lock(&fa->fa_lock);
1012                 if (fa->fa_file) {
1013                         fown = &fa->fa_file->f_owner;
1014                         /* Don't send SIGURG to processes which have not set a
1015                            queued signum: SIGURG has its own default signalling
1016                            mechanism. */
1017                         if (!(sig == SIGURG && fown->signum == 0))
1018                                 send_sigio(fown, fa->fa_fd, band);
1019                 }
1020                 read_unlock(&fa->fa_lock);
1021                 fa = rcu_dereference(fa->fa_next);
1022         }
1023 }
1024
1025 void kill_fasync(struct fasync_struct **fp, int sig, int band)
1026 {
1027         /* First a quick test without locking: usually
1028          * the list is empty.
1029          */
1030         if (*fp) {
1031                 rcu_read_lock();
1032                 kill_fasync_rcu(rcu_dereference(*fp), sig, band);
1033                 rcu_read_unlock();
1034         }
1035 }
1036 EXPORT_SYMBOL(kill_fasync);
1037
1038 static int __init fcntl_init(void)
1039 {
1040         /*
1041          * Please add new bits here to ensure allocation uniqueness.
1042          * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
1043          * is defined as O_NONBLOCK on some platforms and not on others.
1044          */
1045         BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ !=
1046                 HWEIGHT32(
1047                         (VALID_OPEN_FLAGS & ~(O_NONBLOCK | O_NDELAY)) |
1048                         __FMODE_EXEC | __FMODE_NONOTIFY));
1049
1050         fasync_cache = kmem_cache_create("fasync_cache",
1051                 sizeof(struct fasync_struct), 0, SLAB_PANIC, NULL);
1052         return 0;
1053 }
1054
1055 module_init(fcntl_init)