ipc: compute kern_ipc_perm.id under the ipc lock
[linux-2.6-microblaze.git] / ipc / shm.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * linux/ipc/shm.c
4  * Copyright (C) 1992, 1993 Krishna Balasubramanian
5  *       Many improvements/fixes by Bruno Haible.
6  * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
7  * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
8  *
9  * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
10  * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
11  * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
12  * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
13  * Make shmmax, shmall, shmmni sysctl'able, Christoph Rohland <cr@sap.com>
14  * Shared /dev/zero support, Kanoj Sarcar <kanoj@sgi.com>
15  * Move the mm functionality over to mm/shmem.c, Christoph Rohland <cr@sap.com>
16  *
17  * support for audit of ipc object properties and permission changes
18  * Dustin Kirkland <dustin.kirkland@us.ibm.com>
19  *
20  * namespaces support
21  * OpenVZ, SWsoft Inc.
22  * Pavel Emelianov <xemul@openvz.org>
23  *
24  * Better ipc lock (kern_ipc_perm.lock) handling
25  * Davidlohr Bueso <davidlohr.bueso@hp.com>, June 2013.
26  */
27
28 #include <linux/slab.h>
29 #include <linux/mm.h>
30 #include <linux/hugetlb.h>
31 #include <linux/shm.h>
32 #include <linux/init.h>
33 #include <linux/file.h>
34 #include <linux/mman.h>
35 #include <linux/shmem_fs.h>
36 #include <linux/security.h>
37 #include <linux/syscalls.h>
38 #include <linux/audit.h>
39 #include <linux/capability.h>
40 #include <linux/ptrace.h>
41 #include <linux/seq_file.h>
42 #include <linux/rwsem.h>
43 #include <linux/nsproxy.h>
44 #include <linux/mount.h>
45 #include <linux/ipc_namespace.h>
46 #include <linux/rhashtable.h>
47
48 #include <linux/uaccess.h>
49
50 #include "util.h"
51
52 struct shmid_kernel /* private to the kernel */
53 {
54         struct kern_ipc_perm    shm_perm;
55         struct file             *shm_file;
56         unsigned long           shm_nattch;
57         unsigned long           shm_segsz;
58         time64_t                shm_atim;
59         time64_t                shm_dtim;
60         time64_t                shm_ctim;
61         struct pid              *shm_cprid;
62         struct pid              *shm_lprid;
63         struct user_struct      *mlock_user;
64
65         /* The task created the shm object.  NULL if the task is dead. */
66         struct task_struct      *shm_creator;
67         struct list_head        shm_clist;      /* list by creator */
68 } __randomize_layout;
69
70 /* shm_mode upper byte flags */
71 #define SHM_DEST        01000   /* segment will be destroyed on last detach */
72 #define SHM_LOCKED      02000   /* segment will not be swapped */
73
74 struct shm_file_data {
75         int id;
76         struct ipc_namespace *ns;
77         struct file *file;
78         const struct vm_operations_struct *vm_ops;
79 };
80
81 #define shm_file_data(file) (*((struct shm_file_data **)&(file)->private_data))
82
83 static const struct file_operations shm_file_operations;
84 static const struct vm_operations_struct shm_vm_ops;
85
86 #define shm_ids(ns)     ((ns)->ids[IPC_SHM_IDS])
87
88 #define shm_unlock(shp)                 \
89         ipc_unlock(&(shp)->shm_perm)
90
91 static int newseg(struct ipc_namespace *, struct ipc_params *);
92 static void shm_open(struct vm_area_struct *vma);
93 static void shm_close(struct vm_area_struct *vma);
94 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp);
95 #ifdef CONFIG_PROC_FS
96 static int sysvipc_shm_proc_show(struct seq_file *s, void *it);
97 #endif
98
99 int shm_init_ns(struct ipc_namespace *ns)
100 {
101         ns->shm_ctlmax = SHMMAX;
102         ns->shm_ctlall = SHMALL;
103         ns->shm_ctlmni = SHMMNI;
104         ns->shm_rmid_forced = 0;
105         ns->shm_tot = 0;
106         return ipc_init_ids(&shm_ids(ns));
107 }
108
109 /*
110  * Called with shm_ids.rwsem (writer) and the shp structure locked.
111  * Only shm_ids.rwsem remains locked on exit.
112  */
113 static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)
114 {
115         struct shmid_kernel *shp;
116
117         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
118
119         if (shp->shm_nattch) {
120                 shp->shm_perm.mode |= SHM_DEST;
121                 /* Do not find it any more */
122                 ipc_set_key_private(&shm_ids(ns), &shp->shm_perm);
123                 shm_unlock(shp);
124         } else
125                 shm_destroy(ns, shp);
126 }
127
128 #ifdef CONFIG_IPC_NS
129 void shm_exit_ns(struct ipc_namespace *ns)
130 {
131         free_ipcs(ns, &shm_ids(ns), do_shm_rmid);
132         idr_destroy(&ns->ids[IPC_SHM_IDS].ipcs_idr);
133         rhashtable_destroy(&ns->ids[IPC_SHM_IDS].key_ht);
134 }
135 #endif
136
137 static int __init ipc_ns_init(void)
138 {
139         const int err = shm_init_ns(&init_ipc_ns);
140         WARN(err, "ipc: sysv shm_init_ns failed: %d\n", err);
141         return err;
142 }
143
144 pure_initcall(ipc_ns_init);
145
146 void __init shm_init(void)
147 {
148         ipc_init_proc_interface("sysvipc/shm",
149 #if BITS_PER_LONG <= 32
150                                 "       key      shmid perms       size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime        rss       swap\n",
151 #else
152                                 "       key      shmid perms                  size  cpid  lpid nattch   uid   gid  cuid  cgid      atime      dtime      ctime                   rss                  swap\n",
153 #endif
154                                 IPC_SHM_IDS, sysvipc_shm_proc_show);
155 }
156
157 static inline struct shmid_kernel *shm_obtain_object(struct ipc_namespace *ns, int id)
158 {
159         struct kern_ipc_perm *ipcp = ipc_obtain_object_idr(&shm_ids(ns), id);
160
161         if (IS_ERR(ipcp))
162                 return ERR_CAST(ipcp);
163
164         return container_of(ipcp, struct shmid_kernel, shm_perm);
165 }
166
167 static inline struct shmid_kernel *shm_obtain_object_check(struct ipc_namespace *ns, int id)
168 {
169         struct kern_ipc_perm *ipcp = ipc_obtain_object_check(&shm_ids(ns), id);
170
171         if (IS_ERR(ipcp))
172                 return ERR_CAST(ipcp);
173
174         return container_of(ipcp, struct shmid_kernel, shm_perm);
175 }
176
177 /*
178  * shm_lock_(check_) routines are called in the paths where the rwsem
179  * is not necessarily held.
180  */
181 static inline struct shmid_kernel *shm_lock(struct ipc_namespace *ns, int id)
182 {
183         struct kern_ipc_perm *ipcp = ipc_lock(&shm_ids(ns), id);
184
185         /*
186          * Callers of shm_lock() must validate the status of the returned ipc
187          * object pointer (as returned by ipc_lock()), and error out as
188          * appropriate.
189          */
190         if (IS_ERR(ipcp))
191                 return (void *)ipcp;
192         return container_of(ipcp, struct shmid_kernel, shm_perm);
193 }
194
195 static inline void shm_lock_by_ptr(struct shmid_kernel *ipcp)
196 {
197         rcu_read_lock();
198         ipc_lock_object(&ipcp->shm_perm);
199 }
200
201 static void shm_rcu_free(struct rcu_head *head)
202 {
203         struct kern_ipc_perm *ptr = container_of(head, struct kern_ipc_perm,
204                                                         rcu);
205         struct shmid_kernel *shp = container_of(ptr, struct shmid_kernel,
206                                                         shm_perm);
207         security_shm_free(&shp->shm_perm);
208         kvfree(shp);
209 }
210
211 static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s)
212 {
213         list_del(&s->shm_clist);
214         ipc_rmid(&shm_ids(ns), &s->shm_perm);
215 }
216
217
218 static int __shm_open(struct vm_area_struct *vma)
219 {
220         struct file *file = vma->vm_file;
221         struct shm_file_data *sfd = shm_file_data(file);
222         struct shmid_kernel *shp;
223
224         shp = shm_lock(sfd->ns, sfd->id);
225
226         if (IS_ERR(shp))
227                 return PTR_ERR(shp);
228
229         if (shp->shm_file != sfd->file) {
230                 /* ID was reused */
231                 shm_unlock(shp);
232                 return -EINVAL;
233         }
234
235         shp->shm_atim = ktime_get_real_seconds();
236         ipc_update_pid(&shp->shm_lprid, task_tgid(current));
237         shp->shm_nattch++;
238         shm_unlock(shp);
239         return 0;
240 }
241
242 /* This is called by fork, once for every shm attach. */
243 static void shm_open(struct vm_area_struct *vma)
244 {
245         int err = __shm_open(vma);
246         /*
247          * We raced in the idr lookup or with shm_destroy().
248          * Either way, the ID is busted.
249          */
250         WARN_ON_ONCE(err);
251 }
252
253 /*
254  * shm_destroy - free the struct shmid_kernel
255  *
256  * @ns: namespace
257  * @shp: struct to free
258  *
259  * It has to be called with shp and shm_ids.rwsem (writer) locked,
260  * but returns with shp unlocked and freed.
261  */
262 static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
263 {
264         struct file *shm_file;
265
266         shm_file = shp->shm_file;
267         shp->shm_file = NULL;
268         ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT;
269         shm_rmid(ns, shp);
270         shm_unlock(shp);
271         if (!is_file_hugepages(shm_file))
272                 shmem_lock(shm_file, 0, shp->mlock_user);
273         else if (shp->mlock_user)
274                 user_shm_unlock(i_size_read(file_inode(shm_file)),
275                                 shp->mlock_user);
276         fput(shm_file);
277         ipc_update_pid(&shp->shm_cprid, NULL);
278         ipc_update_pid(&shp->shm_lprid, NULL);
279         ipc_rcu_putref(&shp->shm_perm, shm_rcu_free);
280 }
281
282 /*
283  * shm_may_destroy - identifies whether shm segment should be destroyed now
284  *
285  * Returns true if and only if there are no active users of the segment and
286  * one of the following is true:
287  *
288  * 1) shmctl(id, IPC_RMID, NULL) was called for this shp
289  *
290  * 2) sysctl kernel.shm_rmid_forced is set to 1.
291  */
292 static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)
293 {
294         return (shp->shm_nattch == 0) &&
295                (ns->shm_rmid_forced ||
296                 (shp->shm_perm.mode & SHM_DEST));
297 }
298
299 /*
300  * remove the attach descriptor vma.
301  * free memory for segment if it is marked destroyed.
302  * The descriptor has already been removed from the current->mm->mmap list
303  * and will later be kfree()d.
304  */
305 static void shm_close(struct vm_area_struct *vma)
306 {
307         struct file *file = vma->vm_file;
308         struct shm_file_data *sfd = shm_file_data(file);
309         struct shmid_kernel *shp;
310         struct ipc_namespace *ns = sfd->ns;
311
312         down_write(&shm_ids(ns).rwsem);
313         /* remove from the list of attaches of the shm segment */
314         shp = shm_lock(ns, sfd->id);
315
316         /*
317          * We raced in the idr lookup or with shm_destroy().
318          * Either way, the ID is busted.
319          */
320         if (WARN_ON_ONCE(IS_ERR(shp)))
321                 goto done; /* no-op */
322
323         ipc_update_pid(&shp->shm_lprid, task_tgid(current));
324         shp->shm_dtim = ktime_get_real_seconds();
325         shp->shm_nattch--;
326         if (shm_may_destroy(ns, shp))
327                 shm_destroy(ns, shp);
328         else
329                 shm_unlock(shp);
330 done:
331         up_write(&shm_ids(ns).rwsem);
332 }
333
334 /* Called with ns->shm_ids(ns).rwsem locked */
335 static int shm_try_destroy_orphaned(int id, void *p, void *data)
336 {
337         struct ipc_namespace *ns = data;
338         struct kern_ipc_perm *ipcp = p;
339         struct shmid_kernel *shp = container_of(ipcp, struct shmid_kernel, shm_perm);
340
341         /*
342          * We want to destroy segments without users and with already
343          * exit'ed originating process.
344          *
345          * As shp->* are changed under rwsem, it's safe to skip shp locking.
346          */
347         if (shp->shm_creator != NULL)
348                 return 0;
349
350         if (shm_may_destroy(ns, shp)) {
351                 shm_lock_by_ptr(shp);
352                 shm_destroy(ns, shp);
353         }
354         return 0;
355 }
356
357 void shm_destroy_orphaned(struct ipc_namespace *ns)
358 {
359         down_write(&shm_ids(ns).rwsem);
360         if (shm_ids(ns).in_use)
361                 idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns);
362         up_write(&shm_ids(ns).rwsem);
363 }
364
365 /* Locking assumes this will only be called with task == current */
366 void exit_shm(struct task_struct *task)
367 {
368         struct ipc_namespace *ns = task->nsproxy->ipc_ns;
369         struct shmid_kernel *shp, *n;
370
371         if (list_empty(&task->sysvshm.shm_clist))
372                 return;
373
374         /*
375          * If kernel.shm_rmid_forced is not set then only keep track of
376          * which shmids are orphaned, so that a later set of the sysctl
377          * can clean them up.
378          */
379         if (!ns->shm_rmid_forced) {
380                 down_read(&shm_ids(ns).rwsem);
381                 list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist)
382                         shp->shm_creator = NULL;
383                 /*
384                  * Only under read lock but we are only called on current
385                  * so no entry on the list will be shared.
386                  */
387                 list_del(&task->sysvshm.shm_clist);
388                 up_read(&shm_ids(ns).rwsem);
389                 return;
390         }
391
392         /*
393          * Destroy all already created segments, that were not yet mapped,
394          * and mark any mapped as orphan to cover the sysctl toggling.
395          * Destroy is skipped if shm_may_destroy() returns false.
396          */
397         down_write(&shm_ids(ns).rwsem);
398         list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) {
399                 shp->shm_creator = NULL;
400
401                 if (shm_may_destroy(ns, shp)) {
402                         shm_lock_by_ptr(shp);
403                         shm_destroy(ns, shp);
404                 }
405         }
406
407         /* Remove the list head from any segments still attached. */
408         list_del(&task->sysvshm.shm_clist);
409         up_write(&shm_ids(ns).rwsem);
410 }
411
412 static vm_fault_t shm_fault(struct vm_fault *vmf)
413 {
414         struct file *file = vmf->vma->vm_file;
415         struct shm_file_data *sfd = shm_file_data(file);
416
417         return sfd->vm_ops->fault(vmf);
418 }
419
420 static int shm_split(struct vm_area_struct *vma, unsigned long addr)
421 {
422         struct file *file = vma->vm_file;
423         struct shm_file_data *sfd = shm_file_data(file);
424
425         if (sfd->vm_ops->split)
426                 return sfd->vm_ops->split(vma, addr);
427
428         return 0;
429 }
430
431 static unsigned long shm_pagesize(struct vm_area_struct *vma)
432 {
433         struct file *file = vma->vm_file;
434         struct shm_file_data *sfd = shm_file_data(file);
435
436         if (sfd->vm_ops->pagesize)
437                 return sfd->vm_ops->pagesize(vma);
438
439         return PAGE_SIZE;
440 }
441
442 #ifdef CONFIG_NUMA
443 static int shm_set_policy(struct vm_area_struct *vma, struct mempolicy *new)
444 {
445         struct file *file = vma->vm_file;
446         struct shm_file_data *sfd = shm_file_data(file);
447         int err = 0;
448
449         if (sfd->vm_ops->set_policy)
450                 err = sfd->vm_ops->set_policy(vma, new);
451         return err;
452 }
453
454 static struct mempolicy *shm_get_policy(struct vm_area_struct *vma,
455                                         unsigned long addr)
456 {
457         struct file *file = vma->vm_file;
458         struct shm_file_data *sfd = shm_file_data(file);
459         struct mempolicy *pol = NULL;
460
461         if (sfd->vm_ops->get_policy)
462                 pol = sfd->vm_ops->get_policy(vma, addr);
463         else if (vma->vm_policy)
464                 pol = vma->vm_policy;
465
466         return pol;
467 }
468 #endif
469
470 static int shm_mmap(struct file *file, struct vm_area_struct *vma)
471 {
472         struct shm_file_data *sfd = shm_file_data(file);
473         int ret;
474
475         /*
476          * In case of remap_file_pages() emulation, the file can represent an
477          * IPC ID that was removed, and possibly even reused by another shm
478          * segment already.  Propagate this case as an error to caller.
479          */
480         ret = __shm_open(vma);
481         if (ret)
482                 return ret;
483
484         ret = call_mmap(sfd->file, vma);
485         if (ret) {
486                 shm_close(vma);
487                 return ret;
488         }
489         sfd->vm_ops = vma->vm_ops;
490 #ifdef CONFIG_MMU
491         WARN_ON(!sfd->vm_ops->fault);
492 #endif
493         vma->vm_ops = &shm_vm_ops;
494         return 0;
495 }
496
497 static int shm_release(struct inode *ino, struct file *file)
498 {
499         struct shm_file_data *sfd = shm_file_data(file);
500
501         put_ipc_ns(sfd->ns);
502         fput(sfd->file);
503         shm_file_data(file) = NULL;
504         kfree(sfd);
505         return 0;
506 }
507
508 static int shm_fsync(struct file *file, loff_t start, loff_t end, int datasync)
509 {
510         struct shm_file_data *sfd = shm_file_data(file);
511
512         if (!sfd->file->f_op->fsync)
513                 return -EINVAL;
514         return sfd->file->f_op->fsync(sfd->file, start, end, datasync);
515 }
516
517 static long shm_fallocate(struct file *file, int mode, loff_t offset,
518                           loff_t len)
519 {
520         struct shm_file_data *sfd = shm_file_data(file);
521
522         if (!sfd->file->f_op->fallocate)
523                 return -EOPNOTSUPP;
524         return sfd->file->f_op->fallocate(file, mode, offset, len);
525 }
526
527 static unsigned long shm_get_unmapped_area(struct file *file,
528         unsigned long addr, unsigned long len, unsigned long pgoff,
529         unsigned long flags)
530 {
531         struct shm_file_data *sfd = shm_file_data(file);
532
533         return sfd->file->f_op->get_unmapped_area(sfd->file, addr, len,
534                                                 pgoff, flags);
535 }
536
537 static const struct file_operations shm_file_operations = {
538         .mmap           = shm_mmap,
539         .fsync          = shm_fsync,
540         .release        = shm_release,
541         .get_unmapped_area      = shm_get_unmapped_area,
542         .llseek         = noop_llseek,
543         .fallocate      = shm_fallocate,
544 };
545
546 /*
547  * shm_file_operations_huge is now identical to shm_file_operations,
548  * but we keep it distinct for the sake of is_file_shm_hugepages().
549  */
550 static const struct file_operations shm_file_operations_huge = {
551         .mmap           = shm_mmap,
552         .fsync          = shm_fsync,
553         .release        = shm_release,
554         .get_unmapped_area      = shm_get_unmapped_area,
555         .llseek         = noop_llseek,
556         .fallocate      = shm_fallocate,
557 };
558
559 bool is_file_shm_hugepages(struct file *file)
560 {
561         return file->f_op == &shm_file_operations_huge;
562 }
563
564 static const struct vm_operations_struct shm_vm_ops = {
565         .open   = shm_open,     /* callback for a new vm-area open */
566         .close  = shm_close,    /* callback for when the vm-area is released */
567         .fault  = shm_fault,
568         .split  = shm_split,
569         .pagesize = shm_pagesize,
570 #if defined(CONFIG_NUMA)
571         .set_policy = shm_set_policy,
572         .get_policy = shm_get_policy,
573 #endif
574 };
575
576 /**
577  * newseg - Create a new shared memory segment
578  * @ns: namespace
579  * @params: ptr to the structure that contains key, size and shmflg
580  *
581  * Called with shm_ids.rwsem held as a writer.
582  */
583 static int newseg(struct ipc_namespace *ns, struct ipc_params *params)
584 {
585         key_t key = params->key;
586         int shmflg = params->flg;
587         size_t size = params->u.size;
588         int error;
589         struct shmid_kernel *shp;
590         size_t numpages = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
591         struct file *file;
592         char name[13];
593         vm_flags_t acctflag = 0;
594
595         if (size < SHMMIN || size > ns->shm_ctlmax)
596                 return -EINVAL;
597
598         if (numpages << PAGE_SHIFT < size)
599                 return -ENOSPC;
600
601         if (ns->shm_tot + numpages < ns->shm_tot ||
602                         ns->shm_tot + numpages > ns->shm_ctlall)
603                 return -ENOSPC;
604
605         shp = kvmalloc(sizeof(*shp), GFP_KERNEL);
606         if (unlikely(!shp))
607                 return -ENOMEM;
608
609         shp->shm_perm.key = key;
610         shp->shm_perm.mode = (shmflg & S_IRWXUGO);
611         shp->mlock_user = NULL;
612
613         shp->shm_perm.security = NULL;
614         error = security_shm_alloc(&shp->shm_perm);
615         if (error) {
616                 kvfree(shp);
617                 return error;
618         }
619
620         sprintf(name, "SYSV%08x", key);
621         if (shmflg & SHM_HUGETLB) {
622                 struct hstate *hs;
623                 size_t hugesize;
624
625                 hs = hstate_sizelog((shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
626                 if (!hs) {
627                         error = -EINVAL;
628                         goto no_file;
629                 }
630                 hugesize = ALIGN(size, huge_page_size(hs));
631
632                 /* hugetlb_file_setup applies strict accounting */
633                 if (shmflg & SHM_NORESERVE)
634                         acctflag = VM_NORESERVE;
635                 file = hugetlb_file_setup(name, hugesize, acctflag,
636                                   &shp->mlock_user, HUGETLB_SHMFS_INODE,
637                                 (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK);
638         } else {
639                 /*
640                  * Do not allow no accounting for OVERCOMMIT_NEVER, even
641                  * if it's asked for.
642                  */
643                 if  ((shmflg & SHM_NORESERVE) &&
644                                 sysctl_overcommit_memory != OVERCOMMIT_NEVER)
645                         acctflag = VM_NORESERVE;
646                 file = shmem_kernel_file_setup(name, size, acctflag);
647         }
648         error = PTR_ERR(file);
649         if (IS_ERR(file))
650                 goto no_file;
651
652         shp->shm_cprid = get_pid(task_tgid(current));
653         shp->shm_lprid = NULL;
654         shp->shm_atim = shp->shm_dtim = 0;
655         shp->shm_ctim = ktime_get_real_seconds();
656         shp->shm_segsz = size;
657         shp->shm_nattch = 0;
658         shp->shm_file = file;
659         shp->shm_creator = current;
660
661         /* ipc_addid() locks shp upon success. */
662         error = ipc_addid(&shm_ids(ns), &shp->shm_perm, ns->shm_ctlmni);
663         if (error < 0)
664                 goto no_id;
665
666         list_add(&shp->shm_clist, &current->sysvshm.shm_clist);
667
668         /*
669          * shmid gets reported as "inode#" in /proc/pid/maps.
670          * proc-ps tools use this. Changing this will break them.
671          */
672         file_inode(file)->i_ino = shp->shm_perm.id;
673
674         ns->shm_tot += numpages;
675         error = shp->shm_perm.id;
676
677         ipc_unlock_object(&shp->shm_perm);
678         rcu_read_unlock();
679         return error;
680
681 no_id:
682         ipc_update_pid(&shp->shm_cprid, NULL);
683         ipc_update_pid(&shp->shm_lprid, NULL);
684         if (is_file_hugepages(file) && shp->mlock_user)
685                 user_shm_unlock(size, shp->mlock_user);
686         fput(file);
687 no_file:
688         call_rcu(&shp->shm_perm.rcu, shm_rcu_free);
689         return error;
690 }
691
692 /*
693  * Called with shm_ids.rwsem and ipcp locked.
694  */
695 static inline int shm_more_checks(struct kern_ipc_perm *ipcp,
696                                 struct ipc_params *params)
697 {
698         struct shmid_kernel *shp;
699
700         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
701         if (shp->shm_segsz < params->u.size)
702                 return -EINVAL;
703
704         return 0;
705 }
706
707 long ksys_shmget(key_t key, size_t size, int shmflg)
708 {
709         struct ipc_namespace *ns;
710         static const struct ipc_ops shm_ops = {
711                 .getnew = newseg,
712                 .associate = security_shm_associate,
713                 .more_checks = shm_more_checks,
714         };
715         struct ipc_params shm_params;
716
717         ns = current->nsproxy->ipc_ns;
718
719         shm_params.key = key;
720         shm_params.flg = shmflg;
721         shm_params.u.size = size;
722
723         return ipcget(ns, &shm_ids(ns), &shm_ops, &shm_params);
724 }
725
726 SYSCALL_DEFINE3(shmget, key_t, key, size_t, size, int, shmflg)
727 {
728         return ksys_shmget(key, size, shmflg);
729 }
730
731 static inline unsigned long copy_shmid_to_user(void __user *buf, struct shmid64_ds *in, int version)
732 {
733         switch (version) {
734         case IPC_64:
735                 return copy_to_user(buf, in, sizeof(*in));
736         case IPC_OLD:
737             {
738                 struct shmid_ds out;
739
740                 memset(&out, 0, sizeof(out));
741                 ipc64_perm_to_ipc_perm(&in->shm_perm, &out.shm_perm);
742                 out.shm_segsz   = in->shm_segsz;
743                 out.shm_atime   = in->shm_atime;
744                 out.shm_dtime   = in->shm_dtime;
745                 out.shm_ctime   = in->shm_ctime;
746                 out.shm_cpid    = in->shm_cpid;
747                 out.shm_lpid    = in->shm_lpid;
748                 out.shm_nattch  = in->shm_nattch;
749
750                 return copy_to_user(buf, &out, sizeof(out));
751             }
752         default:
753                 return -EINVAL;
754         }
755 }
756
757 static inline unsigned long
758 copy_shmid_from_user(struct shmid64_ds *out, void __user *buf, int version)
759 {
760         switch (version) {
761         case IPC_64:
762                 if (copy_from_user(out, buf, sizeof(*out)))
763                         return -EFAULT;
764                 return 0;
765         case IPC_OLD:
766             {
767                 struct shmid_ds tbuf_old;
768
769                 if (copy_from_user(&tbuf_old, buf, sizeof(tbuf_old)))
770                         return -EFAULT;
771
772                 out->shm_perm.uid       = tbuf_old.shm_perm.uid;
773                 out->shm_perm.gid       = tbuf_old.shm_perm.gid;
774                 out->shm_perm.mode      = tbuf_old.shm_perm.mode;
775
776                 return 0;
777             }
778         default:
779                 return -EINVAL;
780         }
781 }
782
783 static inline unsigned long copy_shminfo_to_user(void __user *buf, struct shminfo64 *in, int version)
784 {
785         switch (version) {
786         case IPC_64:
787                 return copy_to_user(buf, in, sizeof(*in));
788         case IPC_OLD:
789             {
790                 struct shminfo out;
791
792                 if (in->shmmax > INT_MAX)
793                         out.shmmax = INT_MAX;
794                 else
795                         out.shmmax = (int)in->shmmax;
796
797                 out.shmmin      = in->shmmin;
798                 out.shmmni      = in->shmmni;
799                 out.shmseg      = in->shmseg;
800                 out.shmall      = in->shmall;
801
802                 return copy_to_user(buf, &out, sizeof(out));
803             }
804         default:
805                 return -EINVAL;
806         }
807 }
808
809 /*
810  * Calculate and add used RSS and swap pages of a shm.
811  * Called with shm_ids.rwsem held as a reader
812  */
813 static void shm_add_rss_swap(struct shmid_kernel *shp,
814         unsigned long *rss_add, unsigned long *swp_add)
815 {
816         struct inode *inode;
817
818         inode = file_inode(shp->shm_file);
819
820         if (is_file_hugepages(shp->shm_file)) {
821                 struct address_space *mapping = inode->i_mapping;
822                 struct hstate *h = hstate_file(shp->shm_file);
823                 *rss_add += pages_per_huge_page(h) * mapping->nrpages;
824         } else {
825 #ifdef CONFIG_SHMEM
826                 struct shmem_inode_info *info = SHMEM_I(inode);
827
828                 spin_lock_irq(&info->lock);
829                 *rss_add += inode->i_mapping->nrpages;
830                 *swp_add += info->swapped;
831                 spin_unlock_irq(&info->lock);
832 #else
833                 *rss_add += inode->i_mapping->nrpages;
834 #endif
835         }
836 }
837
838 /*
839  * Called with shm_ids.rwsem held as a reader
840  */
841 static void shm_get_stat(struct ipc_namespace *ns, unsigned long *rss,
842                 unsigned long *swp)
843 {
844         int next_id;
845         int total, in_use;
846
847         *rss = 0;
848         *swp = 0;
849
850         in_use = shm_ids(ns).in_use;
851
852         for (total = 0, next_id = 0; total < in_use; next_id++) {
853                 struct kern_ipc_perm *ipc;
854                 struct shmid_kernel *shp;
855
856                 ipc = idr_find(&shm_ids(ns).ipcs_idr, next_id);
857                 if (ipc == NULL)
858                         continue;
859                 shp = container_of(ipc, struct shmid_kernel, shm_perm);
860
861                 shm_add_rss_swap(shp, rss, swp);
862
863                 total++;
864         }
865 }
866
867 /*
868  * This function handles some shmctl commands which require the rwsem
869  * to be held in write mode.
870  * NOTE: no locks must be held, the rwsem is taken inside this function.
871  */
872 static int shmctl_down(struct ipc_namespace *ns, int shmid, int cmd,
873                        struct shmid64_ds *shmid64)
874 {
875         struct kern_ipc_perm *ipcp;
876         struct shmid_kernel *shp;
877         int err;
878
879         down_write(&shm_ids(ns).rwsem);
880         rcu_read_lock();
881
882         ipcp = ipcctl_pre_down_nolock(ns, &shm_ids(ns), shmid, cmd,
883                                       &shmid64->shm_perm, 0);
884         if (IS_ERR(ipcp)) {
885                 err = PTR_ERR(ipcp);
886                 goto out_unlock1;
887         }
888
889         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
890
891         err = security_shm_shmctl(&shp->shm_perm, cmd);
892         if (err)
893                 goto out_unlock1;
894
895         switch (cmd) {
896         case IPC_RMID:
897                 ipc_lock_object(&shp->shm_perm);
898                 /* do_shm_rmid unlocks the ipc object and rcu */
899                 do_shm_rmid(ns, ipcp);
900                 goto out_up;
901         case IPC_SET:
902                 ipc_lock_object(&shp->shm_perm);
903                 err = ipc_update_perm(&shmid64->shm_perm, ipcp);
904                 if (err)
905                         goto out_unlock0;
906                 shp->shm_ctim = ktime_get_real_seconds();
907                 break;
908         default:
909                 err = -EINVAL;
910                 goto out_unlock1;
911         }
912
913 out_unlock0:
914         ipc_unlock_object(&shp->shm_perm);
915 out_unlock1:
916         rcu_read_unlock();
917 out_up:
918         up_write(&shm_ids(ns).rwsem);
919         return err;
920 }
921
922 static int shmctl_ipc_info(struct ipc_namespace *ns,
923                            struct shminfo64 *shminfo)
924 {
925         int err = security_shm_shmctl(NULL, IPC_INFO);
926         if (!err) {
927                 memset(shminfo, 0, sizeof(*shminfo));
928                 shminfo->shmmni = shminfo->shmseg = ns->shm_ctlmni;
929                 shminfo->shmmax = ns->shm_ctlmax;
930                 shminfo->shmall = ns->shm_ctlall;
931                 shminfo->shmmin = SHMMIN;
932                 down_read(&shm_ids(ns).rwsem);
933                 err = ipc_get_maxid(&shm_ids(ns));
934                 up_read(&shm_ids(ns).rwsem);
935                 if (err < 0)
936                         err = 0;
937         }
938         return err;
939 }
940
941 static int shmctl_shm_info(struct ipc_namespace *ns,
942                            struct shm_info *shm_info)
943 {
944         int err = security_shm_shmctl(NULL, SHM_INFO);
945         if (!err) {
946                 memset(shm_info, 0, sizeof(*shm_info));
947                 down_read(&shm_ids(ns).rwsem);
948                 shm_info->used_ids = shm_ids(ns).in_use;
949                 shm_get_stat(ns, &shm_info->shm_rss, &shm_info->shm_swp);
950                 shm_info->shm_tot = ns->shm_tot;
951                 shm_info->swap_attempts = 0;
952                 shm_info->swap_successes = 0;
953                 err = ipc_get_maxid(&shm_ids(ns));
954                 up_read(&shm_ids(ns).rwsem);
955                 if (err < 0)
956                         err = 0;
957         }
958         return err;
959 }
960
961 static int shmctl_stat(struct ipc_namespace *ns, int shmid,
962                         int cmd, struct shmid64_ds *tbuf)
963 {
964         struct shmid_kernel *shp;
965         int err;
966
967         memset(tbuf, 0, sizeof(*tbuf));
968
969         rcu_read_lock();
970         if (cmd == SHM_STAT || cmd == SHM_STAT_ANY) {
971                 shp = shm_obtain_object(ns, shmid);
972                 if (IS_ERR(shp)) {
973                         err = PTR_ERR(shp);
974                         goto out_unlock;
975                 }
976         } else { /* IPC_STAT */
977                 shp = shm_obtain_object_check(ns, shmid);
978                 if (IS_ERR(shp)) {
979                         err = PTR_ERR(shp);
980                         goto out_unlock;
981                 }
982         }
983
984         /*
985          * Semantically SHM_STAT_ANY ought to be identical to
986          * that functionality provided by the /proc/sysvipc/
987          * interface. As such, only audit these calls and
988          * do not do traditional S_IRUGO permission checks on
989          * the ipc object.
990          */
991         if (cmd == SHM_STAT_ANY)
992                 audit_ipc_obj(&shp->shm_perm);
993         else {
994                 err = -EACCES;
995                 if (ipcperms(ns, &shp->shm_perm, S_IRUGO))
996                         goto out_unlock;
997         }
998
999         err = security_shm_shmctl(&shp->shm_perm, cmd);
1000         if (err)
1001                 goto out_unlock;
1002
1003         ipc_lock_object(&shp->shm_perm);
1004
1005         if (!ipc_valid_object(&shp->shm_perm)) {
1006                 ipc_unlock_object(&shp->shm_perm);
1007                 err = -EIDRM;
1008                 goto out_unlock;
1009         }
1010
1011         kernel_to_ipc64_perm(&shp->shm_perm, &tbuf->shm_perm);
1012         tbuf->shm_segsz = shp->shm_segsz;
1013         tbuf->shm_atime = shp->shm_atim;
1014         tbuf->shm_dtime = shp->shm_dtim;
1015         tbuf->shm_ctime = shp->shm_ctim;
1016 #ifndef CONFIG_64BIT
1017         tbuf->shm_atime_high = shp->shm_atim >> 32;
1018         tbuf->shm_dtime_high = shp->shm_dtim >> 32;
1019         tbuf->shm_ctime_high = shp->shm_ctim >> 32;
1020 #endif
1021         tbuf->shm_cpid  = pid_vnr(shp->shm_cprid);
1022         tbuf->shm_lpid  = pid_vnr(shp->shm_lprid);
1023         tbuf->shm_nattch = shp->shm_nattch;
1024
1025         if (cmd == IPC_STAT) {
1026                 /*
1027                  * As defined in SUS:
1028                  * Return 0 on success
1029                  */
1030                 err = 0;
1031         } else {
1032                 /*
1033                  * SHM_STAT and SHM_STAT_ANY (both Linux specific)
1034                  * Return the full id, including the sequence number
1035                  */
1036                 err = shp->shm_perm.id;
1037         }
1038
1039         ipc_unlock_object(&shp->shm_perm);
1040 out_unlock:
1041         rcu_read_unlock();
1042         return err;
1043 }
1044
1045 static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd)
1046 {
1047         struct shmid_kernel *shp;
1048         struct file *shm_file;
1049         int err;
1050
1051         rcu_read_lock();
1052         shp = shm_obtain_object_check(ns, shmid);
1053         if (IS_ERR(shp)) {
1054                 err = PTR_ERR(shp);
1055                 goto out_unlock1;
1056         }
1057
1058         audit_ipc_obj(&(shp->shm_perm));
1059         err = security_shm_shmctl(&shp->shm_perm, cmd);
1060         if (err)
1061                 goto out_unlock1;
1062
1063         ipc_lock_object(&shp->shm_perm);
1064
1065         /* check if shm_destroy() is tearing down shp */
1066         if (!ipc_valid_object(&shp->shm_perm)) {
1067                 err = -EIDRM;
1068                 goto out_unlock0;
1069         }
1070
1071         if (!ns_capable(ns->user_ns, CAP_IPC_LOCK)) {
1072                 kuid_t euid = current_euid();
1073
1074                 if (!uid_eq(euid, shp->shm_perm.uid) &&
1075                     !uid_eq(euid, shp->shm_perm.cuid)) {
1076                         err = -EPERM;
1077                         goto out_unlock0;
1078                 }
1079                 if (cmd == SHM_LOCK && !rlimit(RLIMIT_MEMLOCK)) {
1080                         err = -EPERM;
1081                         goto out_unlock0;
1082                 }
1083         }
1084
1085         shm_file = shp->shm_file;
1086         if (is_file_hugepages(shm_file))
1087                 goto out_unlock0;
1088
1089         if (cmd == SHM_LOCK) {
1090                 struct user_struct *user = current_user();
1091
1092                 err = shmem_lock(shm_file, 1, user);
1093                 if (!err && !(shp->shm_perm.mode & SHM_LOCKED)) {
1094                         shp->shm_perm.mode |= SHM_LOCKED;
1095                         shp->mlock_user = user;
1096                 }
1097                 goto out_unlock0;
1098         }
1099
1100         /* SHM_UNLOCK */
1101         if (!(shp->shm_perm.mode & SHM_LOCKED))
1102                 goto out_unlock0;
1103         shmem_lock(shm_file, 0, shp->mlock_user);
1104         shp->shm_perm.mode &= ~SHM_LOCKED;
1105         shp->mlock_user = NULL;
1106         get_file(shm_file);
1107         ipc_unlock_object(&shp->shm_perm);
1108         rcu_read_unlock();
1109         shmem_unlock_mapping(shm_file->f_mapping);
1110
1111         fput(shm_file);
1112         return err;
1113
1114 out_unlock0:
1115         ipc_unlock_object(&shp->shm_perm);
1116 out_unlock1:
1117         rcu_read_unlock();
1118         return err;
1119 }
1120
1121 long ksys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf)
1122 {
1123         int err, version;
1124         struct ipc_namespace *ns;
1125         struct shmid64_ds sem64;
1126
1127         if (cmd < 0 || shmid < 0)
1128                 return -EINVAL;
1129
1130         version = ipc_parse_version(&cmd);
1131         ns = current->nsproxy->ipc_ns;
1132
1133         switch (cmd) {
1134         case IPC_INFO: {
1135                 struct shminfo64 shminfo;
1136                 err = shmctl_ipc_info(ns, &shminfo);
1137                 if (err < 0)
1138                         return err;
1139                 if (copy_shminfo_to_user(buf, &shminfo, version))
1140                         err = -EFAULT;
1141                 return err;
1142         }
1143         case SHM_INFO: {
1144                 struct shm_info shm_info;
1145                 err = shmctl_shm_info(ns, &shm_info);
1146                 if (err < 0)
1147                         return err;
1148                 if (copy_to_user(buf, &shm_info, sizeof(shm_info)))
1149                         err = -EFAULT;
1150                 return err;
1151         }
1152         case SHM_STAT:
1153         case SHM_STAT_ANY:
1154         case IPC_STAT: {
1155                 err = shmctl_stat(ns, shmid, cmd, &sem64);
1156                 if (err < 0)
1157                         return err;
1158                 if (copy_shmid_to_user(buf, &sem64, version))
1159                         err = -EFAULT;
1160                 return err;
1161         }
1162         case IPC_SET:
1163                 if (copy_shmid_from_user(&sem64, buf, version))
1164                         return -EFAULT;
1165                 /* fallthru */
1166         case IPC_RMID:
1167                 return shmctl_down(ns, shmid, cmd, &sem64);
1168         case SHM_LOCK:
1169         case SHM_UNLOCK:
1170                 return shmctl_do_lock(ns, shmid, cmd);
1171         default:
1172                 return -EINVAL;
1173         }
1174 }
1175
1176 SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, struct shmid_ds __user *, buf)
1177 {
1178         return ksys_shmctl(shmid, cmd, buf);
1179 }
1180
1181 #ifdef CONFIG_COMPAT
1182
1183 struct compat_shmid_ds {
1184         struct compat_ipc_perm shm_perm;
1185         int shm_segsz;
1186         compat_time_t shm_atime;
1187         compat_time_t shm_dtime;
1188         compat_time_t shm_ctime;
1189         compat_ipc_pid_t shm_cpid;
1190         compat_ipc_pid_t shm_lpid;
1191         unsigned short shm_nattch;
1192         unsigned short shm_unused;
1193         compat_uptr_t shm_unused2;
1194         compat_uptr_t shm_unused3;
1195 };
1196
1197 struct compat_shminfo64 {
1198         compat_ulong_t shmmax;
1199         compat_ulong_t shmmin;
1200         compat_ulong_t shmmni;
1201         compat_ulong_t shmseg;
1202         compat_ulong_t shmall;
1203         compat_ulong_t __unused1;
1204         compat_ulong_t __unused2;
1205         compat_ulong_t __unused3;
1206         compat_ulong_t __unused4;
1207 };
1208
1209 struct compat_shm_info {
1210         compat_int_t used_ids;
1211         compat_ulong_t shm_tot, shm_rss, shm_swp;
1212         compat_ulong_t swap_attempts, swap_successes;
1213 };
1214
1215 static int copy_compat_shminfo_to_user(void __user *buf, struct shminfo64 *in,
1216                                         int version)
1217 {
1218         if (in->shmmax > INT_MAX)
1219                 in->shmmax = INT_MAX;
1220         if (version == IPC_64) {
1221                 struct compat_shminfo64 info;
1222                 memset(&info, 0, sizeof(info));
1223                 info.shmmax = in->shmmax;
1224                 info.shmmin = in->shmmin;
1225                 info.shmmni = in->shmmni;
1226                 info.shmseg = in->shmseg;
1227                 info.shmall = in->shmall;
1228                 return copy_to_user(buf, &info, sizeof(info));
1229         } else {
1230                 struct shminfo info;
1231                 memset(&info, 0, sizeof(info));
1232                 info.shmmax = in->shmmax;
1233                 info.shmmin = in->shmmin;
1234                 info.shmmni = in->shmmni;
1235                 info.shmseg = in->shmseg;
1236                 info.shmall = in->shmall;
1237                 return copy_to_user(buf, &info, sizeof(info));
1238         }
1239 }
1240
1241 static int put_compat_shm_info(struct shm_info *ip,
1242                                 struct compat_shm_info __user *uip)
1243 {
1244         struct compat_shm_info info;
1245
1246         memset(&info, 0, sizeof(info));
1247         info.used_ids = ip->used_ids;
1248         info.shm_tot = ip->shm_tot;
1249         info.shm_rss = ip->shm_rss;
1250         info.shm_swp = ip->shm_swp;
1251         info.swap_attempts = ip->swap_attempts;
1252         info.swap_successes = ip->swap_successes;
1253         return copy_to_user(uip, &info, sizeof(info));
1254 }
1255
1256 static int copy_compat_shmid_to_user(void __user *buf, struct shmid64_ds *in,
1257                                         int version)
1258 {
1259         if (version == IPC_64) {
1260                 struct compat_shmid64_ds v;
1261                 memset(&v, 0, sizeof(v));
1262                 to_compat_ipc64_perm(&v.shm_perm, &in->shm_perm);
1263                 v.shm_atime      = lower_32_bits(in->shm_atime);
1264                 v.shm_atime_high = upper_32_bits(in->shm_atime);
1265                 v.shm_dtime      = lower_32_bits(in->shm_dtime);
1266                 v.shm_dtime_high = upper_32_bits(in->shm_dtime);
1267                 v.shm_ctime      = lower_32_bits(in->shm_ctime);
1268                 v.shm_ctime_high = upper_32_bits(in->shm_ctime);
1269                 v.shm_segsz = in->shm_segsz;
1270                 v.shm_nattch = in->shm_nattch;
1271                 v.shm_cpid = in->shm_cpid;
1272                 v.shm_lpid = in->shm_lpid;
1273                 return copy_to_user(buf, &v, sizeof(v));
1274         } else {
1275                 struct compat_shmid_ds v;
1276                 memset(&v, 0, sizeof(v));
1277                 to_compat_ipc_perm(&v.shm_perm, &in->shm_perm);
1278                 v.shm_perm.key = in->shm_perm.key;
1279                 v.shm_atime = in->shm_atime;
1280                 v.shm_dtime = in->shm_dtime;
1281                 v.shm_ctime = in->shm_ctime;
1282                 v.shm_segsz = in->shm_segsz;
1283                 v.shm_nattch = in->shm_nattch;
1284                 v.shm_cpid = in->shm_cpid;
1285                 v.shm_lpid = in->shm_lpid;
1286                 return copy_to_user(buf, &v, sizeof(v));
1287         }
1288 }
1289
1290 static int copy_compat_shmid_from_user(struct shmid64_ds *out, void __user *buf,
1291                                         int version)
1292 {
1293         memset(out, 0, sizeof(*out));
1294         if (version == IPC_64) {
1295                 struct compat_shmid64_ds __user *p = buf;
1296                 return get_compat_ipc64_perm(&out->shm_perm, &p->shm_perm);
1297         } else {
1298                 struct compat_shmid_ds __user *p = buf;
1299                 return get_compat_ipc_perm(&out->shm_perm, &p->shm_perm);
1300         }
1301 }
1302
1303 long compat_ksys_shmctl(int shmid, int cmd, void __user *uptr)
1304 {
1305         struct ipc_namespace *ns;
1306         struct shmid64_ds sem64;
1307         int version = compat_ipc_parse_version(&cmd);
1308         int err;
1309
1310         ns = current->nsproxy->ipc_ns;
1311
1312         if (cmd < 0 || shmid < 0)
1313                 return -EINVAL;
1314
1315         switch (cmd) {
1316         case IPC_INFO: {
1317                 struct shminfo64 shminfo;
1318                 err = shmctl_ipc_info(ns, &shminfo);
1319                 if (err < 0)
1320                         return err;
1321                 if (copy_compat_shminfo_to_user(uptr, &shminfo, version))
1322                         err = -EFAULT;
1323                 return err;
1324         }
1325         case SHM_INFO: {
1326                 struct shm_info shm_info;
1327                 err = shmctl_shm_info(ns, &shm_info);
1328                 if (err < 0)
1329                         return err;
1330                 if (put_compat_shm_info(&shm_info, uptr))
1331                         err = -EFAULT;
1332                 return err;
1333         }
1334         case IPC_STAT:
1335         case SHM_STAT_ANY:
1336         case SHM_STAT:
1337                 err = shmctl_stat(ns, shmid, cmd, &sem64);
1338                 if (err < 0)
1339                         return err;
1340                 if (copy_compat_shmid_to_user(uptr, &sem64, version))
1341                         err = -EFAULT;
1342                 return err;
1343
1344         case IPC_SET:
1345                 if (copy_compat_shmid_from_user(&sem64, uptr, version))
1346                         return -EFAULT;
1347                 /* fallthru */
1348         case IPC_RMID:
1349                 return shmctl_down(ns, shmid, cmd, &sem64);
1350         case SHM_LOCK:
1351         case SHM_UNLOCK:
1352                 return shmctl_do_lock(ns, shmid, cmd);
1353                 break;
1354         default:
1355                 return -EINVAL;
1356         }
1357         return err;
1358 }
1359
1360 COMPAT_SYSCALL_DEFINE3(shmctl, int, shmid, int, cmd, void __user *, uptr)
1361 {
1362         return compat_ksys_shmctl(shmid, cmd, uptr);
1363 }
1364 #endif
1365
1366 /*
1367  * Fix shmaddr, allocate descriptor, map shm, add attach descriptor to lists.
1368  *
1369  * NOTE! Despite the name, this is NOT a direct system call entrypoint. The
1370  * "raddr" thing points to kernel space, and there has to be a wrapper around
1371  * this.
1372  */
1373 long do_shmat(int shmid, char __user *shmaddr, int shmflg,
1374               ulong *raddr, unsigned long shmlba)
1375 {
1376         struct shmid_kernel *shp;
1377         unsigned long addr = (unsigned long)shmaddr;
1378         unsigned long size;
1379         struct file *file, *base;
1380         int    err;
1381         unsigned long flags = MAP_SHARED;
1382         unsigned long prot;
1383         int acc_mode;
1384         struct ipc_namespace *ns;
1385         struct shm_file_data *sfd;
1386         int f_flags;
1387         unsigned long populate = 0;
1388
1389         err = -EINVAL;
1390         if (shmid < 0)
1391                 goto out;
1392
1393         if (addr) {
1394                 if (addr & (shmlba - 1)) {
1395                         if (shmflg & SHM_RND) {
1396                                 addr &= ~(shmlba - 1);  /* round down */
1397
1398                                 /*
1399                                  * Ensure that the round-down is non-nil
1400                                  * when remapping. This can happen for
1401                                  * cases when addr < shmlba.
1402                                  */
1403                                 if (!addr && (shmflg & SHM_REMAP))
1404                                         goto out;
1405                         } else
1406 #ifndef __ARCH_FORCE_SHMLBA
1407                                 if (addr & ~PAGE_MASK)
1408 #endif
1409                                         goto out;
1410                 }
1411
1412                 flags |= MAP_FIXED;
1413         } else if ((shmflg & SHM_REMAP))
1414                 goto out;
1415
1416         if (shmflg & SHM_RDONLY) {
1417                 prot = PROT_READ;
1418                 acc_mode = S_IRUGO;
1419                 f_flags = O_RDONLY;
1420         } else {
1421                 prot = PROT_READ | PROT_WRITE;
1422                 acc_mode = S_IRUGO | S_IWUGO;
1423                 f_flags = O_RDWR;
1424         }
1425         if (shmflg & SHM_EXEC) {
1426                 prot |= PROT_EXEC;
1427                 acc_mode |= S_IXUGO;
1428         }
1429
1430         /*
1431          * We cannot rely on the fs check since SYSV IPC does have an
1432          * additional creator id...
1433          */
1434         ns = current->nsproxy->ipc_ns;
1435         rcu_read_lock();
1436         shp = shm_obtain_object_check(ns, shmid);
1437         if (IS_ERR(shp)) {
1438                 err = PTR_ERR(shp);
1439                 goto out_unlock;
1440         }
1441
1442         err = -EACCES;
1443         if (ipcperms(ns, &shp->shm_perm, acc_mode))
1444                 goto out_unlock;
1445
1446         err = security_shm_shmat(&shp->shm_perm, shmaddr, shmflg);
1447         if (err)
1448                 goto out_unlock;
1449
1450         ipc_lock_object(&shp->shm_perm);
1451
1452         /* check if shm_destroy() is tearing down shp */
1453         if (!ipc_valid_object(&shp->shm_perm)) {
1454                 ipc_unlock_object(&shp->shm_perm);
1455                 err = -EIDRM;
1456                 goto out_unlock;
1457         }
1458
1459         /*
1460          * We need to take a reference to the real shm file to prevent the
1461          * pointer from becoming stale in cases where the lifetime of the outer
1462          * file extends beyond that of the shm segment.  It's not usually
1463          * possible, but it can happen during remap_file_pages() emulation as
1464          * that unmaps the memory, then does ->mmap() via file reference only.
1465          * We'll deny the ->mmap() if the shm segment was since removed, but to
1466          * detect shm ID reuse we need to compare the file pointers.
1467          */
1468         base = get_file(shp->shm_file);
1469         shp->shm_nattch++;
1470         size = i_size_read(file_inode(base));
1471         ipc_unlock_object(&shp->shm_perm);
1472         rcu_read_unlock();
1473
1474         err = -ENOMEM;
1475         sfd = kzalloc(sizeof(*sfd), GFP_KERNEL);
1476         if (!sfd) {
1477                 fput(base);
1478                 goto out_nattch;
1479         }
1480
1481         file = alloc_file_clone(base, f_flags,
1482                           is_file_hugepages(base) ?
1483                                 &shm_file_operations_huge :
1484                                 &shm_file_operations);
1485         err = PTR_ERR(file);
1486         if (IS_ERR(file)) {
1487                 kfree(sfd);
1488                 fput(base);
1489                 goto out_nattch;
1490         }
1491
1492         sfd->id = shp->shm_perm.id;
1493         sfd->ns = get_ipc_ns(ns);
1494         sfd->file = base;
1495         sfd->vm_ops = NULL;
1496         file->private_data = sfd;
1497
1498         err = security_mmap_file(file, prot, flags);
1499         if (err)
1500                 goto out_fput;
1501
1502         if (down_write_killable(&current->mm->mmap_sem)) {
1503                 err = -EINTR;
1504                 goto out_fput;
1505         }
1506
1507         if (addr && !(shmflg & SHM_REMAP)) {
1508                 err = -EINVAL;
1509                 if (addr + size < addr)
1510                         goto invalid;
1511
1512                 if (find_vma_intersection(current->mm, addr, addr + size))
1513                         goto invalid;
1514         }
1515
1516         addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate, NULL);
1517         *raddr = addr;
1518         err = 0;
1519         if (IS_ERR_VALUE(addr))
1520                 err = (long)addr;
1521 invalid:
1522         up_write(&current->mm->mmap_sem);
1523         if (populate)
1524                 mm_populate(addr, populate);
1525
1526 out_fput:
1527         fput(file);
1528
1529 out_nattch:
1530         down_write(&shm_ids(ns).rwsem);
1531         shp = shm_lock(ns, shmid);
1532         shp->shm_nattch--;
1533         if (shm_may_destroy(ns, shp))
1534                 shm_destroy(ns, shp);
1535         else
1536                 shm_unlock(shp);
1537         up_write(&shm_ids(ns).rwsem);
1538         return err;
1539
1540 out_unlock:
1541         rcu_read_unlock();
1542 out:
1543         return err;
1544 }
1545
1546 SYSCALL_DEFINE3(shmat, int, shmid, char __user *, shmaddr, int, shmflg)
1547 {
1548         unsigned long ret;
1549         long err;
1550
1551         err = do_shmat(shmid, shmaddr, shmflg, &ret, SHMLBA);
1552         if (err)
1553                 return err;
1554         force_successful_syscall_return();
1555         return (long)ret;
1556 }
1557
1558 #ifdef CONFIG_COMPAT
1559
1560 #ifndef COMPAT_SHMLBA
1561 #define COMPAT_SHMLBA   SHMLBA
1562 #endif
1563
1564 COMPAT_SYSCALL_DEFINE3(shmat, int, shmid, compat_uptr_t, shmaddr, int, shmflg)
1565 {
1566         unsigned long ret;
1567         long err;
1568
1569         err = do_shmat(shmid, compat_ptr(shmaddr), shmflg, &ret, COMPAT_SHMLBA);
1570         if (err)
1571                 return err;
1572         force_successful_syscall_return();
1573         return (long)ret;
1574 }
1575 #endif
1576
1577 /*
1578  * detach and kill segment if marked destroyed.
1579  * The work is done in shm_close.
1580  */
1581 long ksys_shmdt(char __user *shmaddr)
1582 {
1583         struct mm_struct *mm = current->mm;
1584         struct vm_area_struct *vma;
1585         unsigned long addr = (unsigned long)shmaddr;
1586         int retval = -EINVAL;
1587 #ifdef CONFIG_MMU
1588         loff_t size = 0;
1589         struct file *file;
1590         struct vm_area_struct *next;
1591 #endif
1592
1593         if (addr & ~PAGE_MASK)
1594                 return retval;
1595
1596         if (down_write_killable(&mm->mmap_sem))
1597                 return -EINTR;
1598
1599         /*
1600          * This function tries to be smart and unmap shm segments that
1601          * were modified by partial mlock or munmap calls:
1602          * - It first determines the size of the shm segment that should be
1603          *   unmapped: It searches for a vma that is backed by shm and that
1604          *   started at address shmaddr. It records it's size and then unmaps
1605          *   it.
1606          * - Then it unmaps all shm vmas that started at shmaddr and that
1607          *   are within the initially determined size and that are from the
1608          *   same shm segment from which we determined the size.
1609          * Errors from do_munmap are ignored: the function only fails if
1610          * it's called with invalid parameters or if it's called to unmap
1611          * a part of a vma. Both calls in this function are for full vmas,
1612          * the parameters are directly copied from the vma itself and always
1613          * valid - therefore do_munmap cannot fail. (famous last words?)
1614          */
1615         /*
1616          * If it had been mremap()'d, the starting address would not
1617          * match the usual checks anyway. So assume all vma's are
1618          * above the starting address given.
1619          */
1620         vma = find_vma(mm, addr);
1621
1622 #ifdef CONFIG_MMU
1623         while (vma) {
1624                 next = vma->vm_next;
1625
1626                 /*
1627                  * Check if the starting address would match, i.e. it's
1628                  * a fragment created by mprotect() and/or munmap(), or it
1629                  * otherwise it starts at this address with no hassles.
1630                  */
1631                 if ((vma->vm_ops == &shm_vm_ops) &&
1632                         (vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) {
1633
1634                         /*
1635                          * Record the file of the shm segment being
1636                          * unmapped.  With mremap(), someone could place
1637                          * page from another segment but with equal offsets
1638                          * in the range we are unmapping.
1639                          */
1640                         file = vma->vm_file;
1641                         size = i_size_read(file_inode(vma->vm_file));
1642                         do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1643                         /*
1644                          * We discovered the size of the shm segment, so
1645                          * break out of here and fall through to the next
1646                          * loop that uses the size information to stop
1647                          * searching for matching vma's.
1648                          */
1649                         retval = 0;
1650                         vma = next;
1651                         break;
1652                 }
1653                 vma = next;
1654         }
1655
1656         /*
1657          * We need look no further than the maximum address a fragment
1658          * could possibly have landed at. Also cast things to loff_t to
1659          * prevent overflows and make comparisons vs. equal-width types.
1660          */
1661         size = PAGE_ALIGN(size);
1662         while (vma && (loff_t)(vma->vm_end - addr) <= size) {
1663                 next = vma->vm_next;
1664
1665                 /* finding a matching vma now does not alter retval */
1666                 if ((vma->vm_ops == &shm_vm_ops) &&
1667                     ((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) &&
1668                     (vma->vm_file == file))
1669                         do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1670                 vma = next;
1671         }
1672
1673 #else   /* CONFIG_MMU */
1674         /* under NOMMU conditions, the exact address to be destroyed must be
1675          * given
1676          */
1677         if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) {
1678                 do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL);
1679                 retval = 0;
1680         }
1681
1682 #endif
1683
1684         up_write(&mm->mmap_sem);
1685         return retval;
1686 }
1687
1688 SYSCALL_DEFINE1(shmdt, char __user *, shmaddr)
1689 {
1690         return ksys_shmdt(shmaddr);
1691 }
1692
1693 #ifdef CONFIG_PROC_FS
1694 static int sysvipc_shm_proc_show(struct seq_file *s, void *it)
1695 {
1696         struct pid_namespace *pid_ns = ipc_seq_pid_ns(s);
1697         struct user_namespace *user_ns = seq_user_ns(s);
1698         struct kern_ipc_perm *ipcp = it;
1699         struct shmid_kernel *shp;
1700         unsigned long rss = 0, swp = 0;
1701
1702         shp = container_of(ipcp, struct shmid_kernel, shm_perm);
1703         shm_add_rss_swap(shp, &rss, &swp);
1704
1705 #if BITS_PER_LONG <= 32
1706 #define SIZE_SPEC "%10lu"
1707 #else
1708 #define SIZE_SPEC "%21lu"
1709 #endif
1710
1711         seq_printf(s,
1712                    "%10d %10d  %4o " SIZE_SPEC " %5u %5u  "
1713                    "%5lu %5u %5u %5u %5u %10llu %10llu %10llu "
1714                    SIZE_SPEC " " SIZE_SPEC "\n",
1715                    shp->shm_perm.key,
1716                    shp->shm_perm.id,
1717                    shp->shm_perm.mode,
1718                    shp->shm_segsz,
1719                    pid_nr_ns(shp->shm_cprid, pid_ns),
1720                    pid_nr_ns(shp->shm_lprid, pid_ns),
1721                    shp->shm_nattch,
1722                    from_kuid_munged(user_ns, shp->shm_perm.uid),
1723                    from_kgid_munged(user_ns, shp->shm_perm.gid),
1724                    from_kuid_munged(user_ns, shp->shm_perm.cuid),
1725                    from_kgid_munged(user_ns, shp->shm_perm.cgid),
1726                    shp->shm_atim,
1727                    shp->shm_dtim,
1728                    shp->shm_ctim,
1729                    rss * PAGE_SIZE,
1730                    swp * PAGE_SIZE);
1731
1732         return 0;
1733 }
1734 #endif