Btrfs: allow delayed refs to be merged
[linux-2.6-microblaze.git] / fs / binfmt_elf.c
1 /*
2  * linux/fs/binfmt_elf.c
3  *
4  * These are the functions used to load ELF format executables as used
5  * on SVr4 machines.  Information on the format may be found in the book
6  * "UNIX SYSTEM V RELEASE 4 Programmers Guide: Ansi C and Programming Support
7  * Tools".
8  *
9  * Copyright 1993, 1994: Eric Youngdale (ericy@cais.com).
10  */
11
12 #include <linux/module.h>
13 #include <linux/kernel.h>
14 #include <linux/fs.h>
15 #include <linux/mm.h>
16 #include <linux/mman.h>
17 #include <linux/errno.h>
18 #include <linux/signal.h>
19 #include <linux/binfmts.h>
20 #include <linux/string.h>
21 #include <linux/file.h>
22 #include <linux/slab.h>
23 #include <linux/personality.h>
24 #include <linux/elfcore.h>
25 #include <linux/init.h>
26 #include <linux/highuid.h>
27 #include <linux/compiler.h>
28 #include <linux/highmem.h>
29 #include <linux/pagemap.h>
30 #include <linux/security.h>
31 #include <linux/random.h>
32 #include <linux/elf.h>
33 #include <linux/utsname.h>
34 #include <linux/coredump.h>
35 #include <asm/uaccess.h>
36 #include <asm/param.h>
37 #include <asm/page.h>
38 #include <asm/exec.h>
39
40 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs);
41 static int load_elf_library(struct file *);
42 static unsigned long elf_map(struct file *, unsigned long, struct elf_phdr *,
43                                 int, int, unsigned long);
44
45 /*
46  * If we don't support core dumping, then supply a NULL so we
47  * don't even try.
48  */
49 #ifdef CONFIG_ELF_CORE
50 static int elf_core_dump(struct coredump_params *cprm);
51 #else
52 #define elf_core_dump   NULL
53 #endif
54
55 #if ELF_EXEC_PAGESIZE > PAGE_SIZE
56 #define ELF_MIN_ALIGN   ELF_EXEC_PAGESIZE
57 #else
58 #define ELF_MIN_ALIGN   PAGE_SIZE
59 #endif
60
61 #ifndef ELF_CORE_EFLAGS
62 #define ELF_CORE_EFLAGS 0
63 #endif
64
65 #define ELF_PAGESTART(_v) ((_v) & ~(unsigned long)(ELF_MIN_ALIGN-1))
66 #define ELF_PAGEOFFSET(_v) ((_v) & (ELF_MIN_ALIGN-1))
67 #define ELF_PAGEALIGN(_v) (((_v) + ELF_MIN_ALIGN - 1) & ~(ELF_MIN_ALIGN - 1))
68
69 static struct linux_binfmt elf_format = {
70         .module         = THIS_MODULE,
71         .load_binary    = load_elf_binary,
72         .load_shlib     = load_elf_library,
73         .core_dump      = elf_core_dump,
74         .min_coredump   = ELF_EXEC_PAGESIZE,
75 };
76
77 #define BAD_ADDR(x) ((unsigned long)(x) >= TASK_SIZE)
78
79 static int set_brk(unsigned long start, unsigned long end)
80 {
81         start = ELF_PAGEALIGN(start);
82         end = ELF_PAGEALIGN(end);
83         if (end > start) {
84                 unsigned long addr;
85                 addr = vm_brk(start, end - start);
86                 if (BAD_ADDR(addr))
87                         return addr;
88         }
89         current->mm->start_brk = current->mm->brk = end;
90         return 0;
91 }
92
93 /* We need to explicitly zero any fractional pages
94    after the data section (i.e. bss).  This would
95    contain the junk from the file that should not
96    be in memory
97  */
98 static int padzero(unsigned long elf_bss)
99 {
100         unsigned long nbyte;
101
102         nbyte = ELF_PAGEOFFSET(elf_bss);
103         if (nbyte) {
104                 nbyte = ELF_MIN_ALIGN - nbyte;
105                 if (clear_user((void __user *) elf_bss, nbyte))
106                         return -EFAULT;
107         }
108         return 0;
109 }
110
111 /* Let's use some macros to make this stack manipulation a little clearer */
112 #ifdef CONFIG_STACK_GROWSUP
113 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) + (items))
114 #define STACK_ROUND(sp, items) \
115         ((15 + (unsigned long) ((sp) + (items))) &~ 15UL)
116 #define STACK_ALLOC(sp, len) ({ \
117         elf_addr_t __user *old_sp = (elf_addr_t __user *)sp; sp += len; \
118         old_sp; })
119 #else
120 #define STACK_ADD(sp, items) ((elf_addr_t __user *)(sp) - (items))
121 #define STACK_ROUND(sp, items) \
122         (((unsigned long) (sp - items)) &~ 15UL)
123 #define STACK_ALLOC(sp, len) ({ sp -= len ; sp; })
124 #endif
125
126 #ifndef ELF_BASE_PLATFORM
127 /*
128  * AT_BASE_PLATFORM indicates the "real" hardware/microarchitecture.
129  * If the arch defines ELF_BASE_PLATFORM (in asm/elf.h), the value
130  * will be copied to the user stack in the same manner as AT_PLATFORM.
131  */
132 #define ELF_BASE_PLATFORM NULL
133 #endif
134
135 static int
136 create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec,
137                 unsigned long load_addr, unsigned long interp_load_addr)
138 {
139         unsigned long p = bprm->p;
140         int argc = bprm->argc;
141         int envc = bprm->envc;
142         elf_addr_t __user *argv;
143         elf_addr_t __user *envp;
144         elf_addr_t __user *sp;
145         elf_addr_t __user *u_platform;
146         elf_addr_t __user *u_base_platform;
147         elf_addr_t __user *u_rand_bytes;
148         const char *k_platform = ELF_PLATFORM;
149         const char *k_base_platform = ELF_BASE_PLATFORM;
150         unsigned char k_rand_bytes[16];
151         int items;
152         elf_addr_t *elf_info;
153         int ei_index = 0;
154         const struct cred *cred = current_cred();
155         struct vm_area_struct *vma;
156
157         /*
158          * In some cases (e.g. Hyper-Threading), we want to avoid L1
159          * evictions by the processes running on the same package. One
160          * thing we can do is to shuffle the initial stack for them.
161          */
162
163         p = arch_align_stack(p);
164
165         /*
166          * If this architecture has a platform capability string, copy it
167          * to userspace.  In some cases (Sparc), this info is impossible
168          * for userspace to get any other way, in others (i386) it is
169          * merely difficult.
170          */
171         u_platform = NULL;
172         if (k_platform) {
173                 size_t len = strlen(k_platform) + 1;
174
175                 u_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
176                 if (__copy_to_user(u_platform, k_platform, len))
177                         return -EFAULT;
178         }
179
180         /*
181          * If this architecture has a "base" platform capability
182          * string, copy it to userspace.
183          */
184         u_base_platform = NULL;
185         if (k_base_platform) {
186                 size_t len = strlen(k_base_platform) + 1;
187
188                 u_base_platform = (elf_addr_t __user *)STACK_ALLOC(p, len);
189                 if (__copy_to_user(u_base_platform, k_base_platform, len))
190                         return -EFAULT;
191         }
192
193         /*
194          * Generate 16 random bytes for userspace PRNG seeding.
195          */
196         get_random_bytes(k_rand_bytes, sizeof(k_rand_bytes));
197         u_rand_bytes = (elf_addr_t __user *)
198                        STACK_ALLOC(p, sizeof(k_rand_bytes));
199         if (__copy_to_user(u_rand_bytes, k_rand_bytes, sizeof(k_rand_bytes)))
200                 return -EFAULT;
201
202         /* Create the ELF interpreter info */
203         elf_info = (elf_addr_t *)current->mm->saved_auxv;
204         /* update AT_VECTOR_SIZE_BASE if the number of NEW_AUX_ENT() changes */
205 #define NEW_AUX_ENT(id, val) \
206         do { \
207                 elf_info[ei_index++] = id; \
208                 elf_info[ei_index++] = val; \
209         } while (0)
210
211 #ifdef ARCH_DLINFO
212         /* 
213          * ARCH_DLINFO must come first so PPC can do its special alignment of
214          * AUXV.
215          * update AT_VECTOR_SIZE_ARCH if the number of NEW_AUX_ENT() in
216          * ARCH_DLINFO changes
217          */
218         ARCH_DLINFO;
219 #endif
220         NEW_AUX_ENT(AT_HWCAP, ELF_HWCAP);
221         NEW_AUX_ENT(AT_PAGESZ, ELF_EXEC_PAGESIZE);
222         NEW_AUX_ENT(AT_CLKTCK, CLOCKS_PER_SEC);
223         NEW_AUX_ENT(AT_PHDR, load_addr + exec->e_phoff);
224         NEW_AUX_ENT(AT_PHENT, sizeof(struct elf_phdr));
225         NEW_AUX_ENT(AT_PHNUM, exec->e_phnum);
226         NEW_AUX_ENT(AT_BASE, interp_load_addr);
227         NEW_AUX_ENT(AT_FLAGS, 0);
228         NEW_AUX_ENT(AT_ENTRY, exec->e_entry);
229         NEW_AUX_ENT(AT_UID, from_kuid_munged(cred->user_ns, cred->uid));
230         NEW_AUX_ENT(AT_EUID, from_kuid_munged(cred->user_ns, cred->euid));
231         NEW_AUX_ENT(AT_GID, from_kgid_munged(cred->user_ns, cred->gid));
232         NEW_AUX_ENT(AT_EGID, from_kgid_munged(cred->user_ns, cred->egid));
233         NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm));
234         NEW_AUX_ENT(AT_RANDOM, (elf_addr_t)(unsigned long)u_rand_bytes);
235         NEW_AUX_ENT(AT_EXECFN, bprm->exec);
236         if (k_platform) {
237                 NEW_AUX_ENT(AT_PLATFORM,
238                             (elf_addr_t)(unsigned long)u_platform);
239         }
240         if (k_base_platform) {
241                 NEW_AUX_ENT(AT_BASE_PLATFORM,
242                             (elf_addr_t)(unsigned long)u_base_platform);
243         }
244         if (bprm->interp_flags & BINPRM_FLAGS_EXECFD) {
245                 NEW_AUX_ENT(AT_EXECFD, bprm->interp_data);
246         }
247 #undef NEW_AUX_ENT
248         /* AT_NULL is zero; clear the rest too */
249         memset(&elf_info[ei_index], 0,
250                sizeof current->mm->saved_auxv - ei_index * sizeof elf_info[0]);
251
252         /* And advance past the AT_NULL entry.  */
253         ei_index += 2;
254
255         sp = STACK_ADD(p, ei_index);
256
257         items = (argc + 1) + (envc + 1) + 1;
258         bprm->p = STACK_ROUND(sp, items);
259
260         /* Point sp at the lowest address on the stack */
261 #ifdef CONFIG_STACK_GROWSUP
262         sp = (elf_addr_t __user *)bprm->p - items - ei_index;
263         bprm->exec = (unsigned long)sp; /* XXX: PARISC HACK */
264 #else
265         sp = (elf_addr_t __user *)bprm->p;
266 #endif
267
268
269         /*
270          * Grow the stack manually; some architectures have a limit on how
271          * far ahead a user-space access may be in order to grow the stack.
272          */
273         vma = find_extend_vma(current->mm, bprm->p);
274         if (!vma)
275                 return -EFAULT;
276
277         /* Now, let's put argc (and argv, envp if appropriate) on the stack */
278         if (__put_user(argc, sp++))
279                 return -EFAULT;
280         argv = sp;
281         envp = argv + argc + 1;
282
283         /* Populate argv and envp */
284         p = current->mm->arg_end = current->mm->arg_start;
285         while (argc-- > 0) {
286                 size_t len;
287                 if (__put_user((elf_addr_t)p, argv++))
288                         return -EFAULT;
289                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
290                 if (!len || len > MAX_ARG_STRLEN)
291                         return -EINVAL;
292                 p += len;
293         }
294         if (__put_user(0, argv))
295                 return -EFAULT;
296         current->mm->arg_end = current->mm->env_start = p;
297         while (envc-- > 0) {
298                 size_t len;
299                 if (__put_user((elf_addr_t)p, envp++))
300                         return -EFAULT;
301                 len = strnlen_user((void __user *)p, MAX_ARG_STRLEN);
302                 if (!len || len > MAX_ARG_STRLEN)
303                         return -EINVAL;
304                 p += len;
305         }
306         if (__put_user(0, envp))
307                 return -EFAULT;
308         current->mm->env_end = p;
309
310         /* Put the elf_info on the stack in the right place.  */
311         sp = (elf_addr_t __user *)envp + 1;
312         if (copy_to_user(sp, elf_info, ei_index * sizeof(elf_addr_t)))
313                 return -EFAULT;
314         return 0;
315 }
316
317 static unsigned long elf_map(struct file *filep, unsigned long addr,
318                 struct elf_phdr *eppnt, int prot, int type,
319                 unsigned long total_size)
320 {
321         unsigned long map_addr;
322         unsigned long size = eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr);
323         unsigned long off = eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr);
324         addr = ELF_PAGESTART(addr);
325         size = ELF_PAGEALIGN(size);
326
327         /* mmap() will return -EINVAL if given a zero size, but a
328          * segment with zero filesize is perfectly valid */
329         if (!size)
330                 return addr;
331
332         /*
333         * total_size is the size of the ELF (interpreter) image.
334         * The _first_ mmap needs to know the full size, otherwise
335         * randomization might put this image into an overlapping
336         * position with the ELF binary image. (since size < total_size)
337         * So we first map the 'big' image - and unmap the remainder at
338         * the end. (which unmap is needed for ELF images with holes.)
339         */
340         if (total_size) {
341                 total_size = ELF_PAGEALIGN(total_size);
342                 map_addr = vm_mmap(filep, addr, total_size, prot, type, off);
343                 if (!BAD_ADDR(map_addr))
344                         vm_munmap(map_addr+size, total_size-size);
345         } else
346                 map_addr = vm_mmap(filep, addr, size, prot, type, off);
347
348         return(map_addr);
349 }
350
351 static unsigned long total_mapping_size(struct elf_phdr *cmds, int nr)
352 {
353         int i, first_idx = -1, last_idx = -1;
354
355         for (i = 0; i < nr; i++) {
356                 if (cmds[i].p_type == PT_LOAD) {
357                         last_idx = i;
358                         if (first_idx == -1)
359                                 first_idx = i;
360                 }
361         }
362         if (first_idx == -1)
363                 return 0;
364
365         return cmds[last_idx].p_vaddr + cmds[last_idx].p_memsz -
366                                 ELF_PAGESTART(cmds[first_idx].p_vaddr);
367 }
368
369
370 /* This is much more generalized than the library routine read function,
371    so we keep this separate.  Technically the library read function
372    is only provided so that we can read a.out libraries that have
373    an ELF header */
374
375 static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
376                 struct file *interpreter, unsigned long *interp_map_addr,
377                 unsigned long no_base)
378 {
379         struct elf_phdr *elf_phdata;
380         struct elf_phdr *eppnt;
381         unsigned long load_addr = 0;
382         int load_addr_set = 0;
383         unsigned long last_bss = 0, elf_bss = 0;
384         unsigned long error = ~0UL;
385         unsigned long total_size;
386         int retval, i, size;
387
388         /* First of all, some simple consistency checks */
389         if (interp_elf_ex->e_type != ET_EXEC &&
390             interp_elf_ex->e_type != ET_DYN)
391                 goto out;
392         if (!elf_check_arch(interp_elf_ex))
393                 goto out;
394         if (!interpreter->f_op || !interpreter->f_op->mmap)
395                 goto out;
396
397         /*
398          * If the size of this structure has changed, then punt, since
399          * we will be doing the wrong thing.
400          */
401         if (interp_elf_ex->e_phentsize != sizeof(struct elf_phdr))
402                 goto out;
403         if (interp_elf_ex->e_phnum < 1 ||
404                 interp_elf_ex->e_phnum > 65536U / sizeof(struct elf_phdr))
405                 goto out;
406
407         /* Now read in all of the header information */
408         size = sizeof(struct elf_phdr) * interp_elf_ex->e_phnum;
409         if (size > ELF_MIN_ALIGN)
410                 goto out;
411         elf_phdata = kmalloc(size, GFP_KERNEL);
412         if (!elf_phdata)
413                 goto out;
414
415         retval = kernel_read(interpreter, interp_elf_ex->e_phoff,
416                              (char *)elf_phdata, size);
417         error = -EIO;
418         if (retval != size) {
419                 if (retval < 0)
420                         error = retval; 
421                 goto out_close;
422         }
423
424         total_size = total_mapping_size(elf_phdata, interp_elf_ex->e_phnum);
425         if (!total_size) {
426                 error = -EINVAL;
427                 goto out_close;
428         }
429
430         eppnt = elf_phdata;
431         for (i = 0; i < interp_elf_ex->e_phnum; i++, eppnt++) {
432                 if (eppnt->p_type == PT_LOAD) {
433                         int elf_type = MAP_PRIVATE | MAP_DENYWRITE;
434                         int elf_prot = 0;
435                         unsigned long vaddr = 0;
436                         unsigned long k, map_addr;
437
438                         if (eppnt->p_flags & PF_R)
439                                 elf_prot = PROT_READ;
440                         if (eppnt->p_flags & PF_W)
441                                 elf_prot |= PROT_WRITE;
442                         if (eppnt->p_flags & PF_X)
443                                 elf_prot |= PROT_EXEC;
444                         vaddr = eppnt->p_vaddr;
445                         if (interp_elf_ex->e_type == ET_EXEC || load_addr_set)
446                                 elf_type |= MAP_FIXED;
447                         else if (no_base && interp_elf_ex->e_type == ET_DYN)
448                                 load_addr = -vaddr;
449
450                         map_addr = elf_map(interpreter, load_addr + vaddr,
451                                         eppnt, elf_prot, elf_type, total_size);
452                         total_size = 0;
453                         if (!*interp_map_addr)
454                                 *interp_map_addr = map_addr;
455                         error = map_addr;
456                         if (BAD_ADDR(map_addr))
457                                 goto out_close;
458
459                         if (!load_addr_set &&
460                             interp_elf_ex->e_type == ET_DYN) {
461                                 load_addr = map_addr - ELF_PAGESTART(vaddr);
462                                 load_addr_set = 1;
463                         }
464
465                         /*
466                          * Check to see if the section's size will overflow the
467                          * allowed task size. Note that p_filesz must always be
468                          * <= p_memsize so it's only necessary to check p_memsz.
469                          */
470                         k = load_addr + eppnt->p_vaddr;
471                         if (BAD_ADDR(k) ||
472                             eppnt->p_filesz > eppnt->p_memsz ||
473                             eppnt->p_memsz > TASK_SIZE ||
474                             TASK_SIZE - eppnt->p_memsz < k) {
475                                 error = -ENOMEM;
476                                 goto out_close;
477                         }
478
479                         /*
480                          * Find the end of the file mapping for this phdr, and
481                          * keep track of the largest address we see for this.
482                          */
483                         k = load_addr + eppnt->p_vaddr + eppnt->p_filesz;
484                         if (k > elf_bss)
485                                 elf_bss = k;
486
487                         /*
488                          * Do the same thing for the memory mapping - between
489                          * elf_bss and last_bss is the bss section.
490                          */
491                         k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
492                         if (k > last_bss)
493                                 last_bss = k;
494                 }
495         }
496
497         if (last_bss > elf_bss) {
498                 /*
499                  * Now fill out the bss section.  First pad the last page up
500                  * to the page boundary, and then perform a mmap to make sure
501                  * that there are zero-mapped pages up to and including the
502                  * last bss page.
503                  */
504                 if (padzero(elf_bss)) {
505                         error = -EFAULT;
506                         goto out_close;
507                 }
508
509                 /* What we have mapped so far */
510                 elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
511
512                 /* Map the last of the bss segment */
513                 error = vm_brk(elf_bss, last_bss - elf_bss);
514                 if (BAD_ADDR(error))
515                         goto out_close;
516         }
517
518         error = load_addr;
519
520 out_close:
521         kfree(elf_phdata);
522 out:
523         return error;
524 }
525
526 /*
527  * These are the functions used to load ELF style executables and shared
528  * libraries.  There is no binary dependent code anywhere else.
529  */
530
531 #define INTERPRETER_NONE 0
532 #define INTERPRETER_ELF 2
533
534 #ifndef STACK_RND_MASK
535 #define STACK_RND_MASK (0x7ff >> (PAGE_SHIFT - 12))     /* 8MB of VA */
536 #endif
537
538 static unsigned long randomize_stack_top(unsigned long stack_top)
539 {
540         unsigned int random_variable = 0;
541
542         if ((current->flags & PF_RANDOMIZE) &&
543                 !(current->personality & ADDR_NO_RANDOMIZE)) {
544                 random_variable = get_random_int() & STACK_RND_MASK;
545                 random_variable <<= PAGE_SHIFT;
546         }
547 #ifdef CONFIG_STACK_GROWSUP
548         return PAGE_ALIGN(stack_top) + random_variable;
549 #else
550         return PAGE_ALIGN(stack_top) - random_variable;
551 #endif
552 }
553
554 static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
555 {
556         struct file *interpreter = NULL; /* to shut gcc up */
557         unsigned long load_addr = 0, load_bias = 0;
558         int load_addr_set = 0;
559         char * elf_interpreter = NULL;
560         unsigned long error;
561         struct elf_phdr *elf_ppnt, *elf_phdata;
562         unsigned long elf_bss, elf_brk;
563         int retval, i;
564         unsigned int size;
565         unsigned long elf_entry;
566         unsigned long interp_load_addr = 0;
567         unsigned long start_code, end_code, start_data, end_data;
568         unsigned long reloc_func_desc __maybe_unused = 0;
569         int executable_stack = EXSTACK_DEFAULT;
570         unsigned long def_flags = 0;
571         struct {
572                 struct elfhdr elf_ex;
573                 struct elfhdr interp_elf_ex;
574         } *loc;
575
576         loc = kmalloc(sizeof(*loc), GFP_KERNEL);
577         if (!loc) {
578                 retval = -ENOMEM;
579                 goto out_ret;
580         }
581         
582         /* Get the exec-header */
583         loc->elf_ex = *((struct elfhdr *)bprm->buf);
584
585         retval = -ENOEXEC;
586         /* First of all, some simple consistency checks */
587         if (memcmp(loc->elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
588                 goto out;
589
590         if (loc->elf_ex.e_type != ET_EXEC && loc->elf_ex.e_type != ET_DYN)
591                 goto out;
592         if (!elf_check_arch(&loc->elf_ex))
593                 goto out;
594         if (!bprm->file->f_op || !bprm->file->f_op->mmap)
595                 goto out;
596
597         /* Now read in all of the header information */
598         if (loc->elf_ex.e_phentsize != sizeof(struct elf_phdr))
599                 goto out;
600         if (loc->elf_ex.e_phnum < 1 ||
601                 loc->elf_ex.e_phnum > 65536U / sizeof(struct elf_phdr))
602                 goto out;
603         size = loc->elf_ex.e_phnum * sizeof(struct elf_phdr);
604         retval = -ENOMEM;
605         elf_phdata = kmalloc(size, GFP_KERNEL);
606         if (!elf_phdata)
607                 goto out;
608
609         retval = kernel_read(bprm->file, loc->elf_ex.e_phoff,
610                              (char *)elf_phdata, size);
611         if (retval != size) {
612                 if (retval >= 0)
613                         retval = -EIO;
614                 goto out_free_ph;
615         }
616
617         elf_ppnt = elf_phdata;
618         elf_bss = 0;
619         elf_brk = 0;
620
621         start_code = ~0UL;
622         end_code = 0;
623         start_data = 0;
624         end_data = 0;
625
626         for (i = 0; i < loc->elf_ex.e_phnum; i++) {
627                 if (elf_ppnt->p_type == PT_INTERP) {
628                         /* This is the program interpreter used for
629                          * shared libraries - for now assume that this
630                          * is an a.out format binary
631                          */
632                         retval = -ENOEXEC;
633                         if (elf_ppnt->p_filesz > PATH_MAX || 
634                             elf_ppnt->p_filesz < 2)
635                                 goto out_free_ph;
636
637                         retval = -ENOMEM;
638                         elf_interpreter = kmalloc(elf_ppnt->p_filesz,
639                                                   GFP_KERNEL);
640                         if (!elf_interpreter)
641                                 goto out_free_ph;
642
643                         retval = kernel_read(bprm->file, elf_ppnt->p_offset,
644                                              elf_interpreter,
645                                              elf_ppnt->p_filesz);
646                         if (retval != elf_ppnt->p_filesz) {
647                                 if (retval >= 0)
648                                         retval = -EIO;
649                                 goto out_free_interp;
650                         }
651                         /* make sure path is NULL terminated */
652                         retval = -ENOEXEC;
653                         if (elf_interpreter[elf_ppnt->p_filesz - 1] != '\0')
654                                 goto out_free_interp;
655
656                         interpreter = open_exec(elf_interpreter);
657                         retval = PTR_ERR(interpreter);
658                         if (IS_ERR(interpreter))
659                                 goto out_free_interp;
660
661                         /*
662                          * If the binary is not readable then enforce
663                          * mm->dumpable = 0 regardless of the interpreter's
664                          * permissions.
665                          */
666                         would_dump(bprm, interpreter);
667
668                         retval = kernel_read(interpreter, 0, bprm->buf,
669                                              BINPRM_BUF_SIZE);
670                         if (retval != BINPRM_BUF_SIZE) {
671                                 if (retval >= 0)
672                                         retval = -EIO;
673                                 goto out_free_dentry;
674                         }
675
676                         /* Get the exec headers */
677                         loc->interp_elf_ex = *((struct elfhdr *)bprm->buf);
678                         break;
679                 }
680                 elf_ppnt++;
681         }
682
683         elf_ppnt = elf_phdata;
684         for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
685                 if (elf_ppnt->p_type == PT_GNU_STACK) {
686                         if (elf_ppnt->p_flags & PF_X)
687                                 executable_stack = EXSTACK_ENABLE_X;
688                         else
689                                 executable_stack = EXSTACK_DISABLE_X;
690                         break;
691                 }
692
693         /* Some simple consistency checks for the interpreter */
694         if (elf_interpreter) {
695                 retval = -ELIBBAD;
696                 /* Not an ELF interpreter */
697                 if (memcmp(loc->interp_elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
698                         goto out_free_dentry;
699                 /* Verify the interpreter has a valid arch */
700                 if (!elf_check_arch(&loc->interp_elf_ex))
701                         goto out_free_dentry;
702         }
703
704         /* Flush all traces of the currently running executable */
705         retval = flush_old_exec(bprm);
706         if (retval)
707                 goto out_free_dentry;
708
709         /* OK, This is the point of no return */
710         current->mm->def_flags = def_flags;
711
712         /* Do this immediately, since STACK_TOP as used in setup_arg_pages
713            may depend on the personality.  */
714         SET_PERSONALITY(loc->elf_ex);
715         if (elf_read_implies_exec(loc->elf_ex, executable_stack))
716                 current->personality |= READ_IMPLIES_EXEC;
717
718         if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
719                 current->flags |= PF_RANDOMIZE;
720
721         setup_new_exec(bprm);
722
723         /* Do this so that we can load the interpreter, if need be.  We will
724            change some of these later */
725         current->mm->free_area_cache = current->mm->mmap_base;
726         current->mm->cached_hole_size = 0;
727         retval = setup_arg_pages(bprm, randomize_stack_top(STACK_TOP),
728                                  executable_stack);
729         if (retval < 0) {
730                 send_sig(SIGKILL, current, 0);
731                 goto out_free_dentry;
732         }
733         
734         current->mm->start_stack = bprm->p;
735
736         /* Now we do a little grungy work by mmapping the ELF image into
737            the correct location in memory. */
738         for(i = 0, elf_ppnt = elf_phdata;
739             i < loc->elf_ex.e_phnum; i++, elf_ppnt++) {
740                 int elf_prot = 0, elf_flags;
741                 unsigned long k, vaddr;
742
743                 if (elf_ppnt->p_type != PT_LOAD)
744                         continue;
745
746                 if (unlikely (elf_brk > elf_bss)) {
747                         unsigned long nbyte;
748                     
749                         /* There was a PT_LOAD segment with p_memsz > p_filesz
750                            before this one. Map anonymous pages, if needed,
751                            and clear the area.  */
752                         retval = set_brk(elf_bss + load_bias,
753                                          elf_brk + load_bias);
754                         if (retval) {
755                                 send_sig(SIGKILL, current, 0);
756                                 goto out_free_dentry;
757                         }
758                         nbyte = ELF_PAGEOFFSET(elf_bss);
759                         if (nbyte) {
760                                 nbyte = ELF_MIN_ALIGN - nbyte;
761                                 if (nbyte > elf_brk - elf_bss)
762                                         nbyte = elf_brk - elf_bss;
763                                 if (clear_user((void __user *)elf_bss +
764                                                         load_bias, nbyte)) {
765                                         /*
766                                          * This bss-zeroing can fail if the ELF
767                                          * file specifies odd protections. So
768                                          * we don't check the return value
769                                          */
770                                 }
771                         }
772                 }
773
774                 if (elf_ppnt->p_flags & PF_R)
775                         elf_prot |= PROT_READ;
776                 if (elf_ppnt->p_flags & PF_W)
777                         elf_prot |= PROT_WRITE;
778                 if (elf_ppnt->p_flags & PF_X)
779                         elf_prot |= PROT_EXEC;
780
781                 elf_flags = MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE;
782
783                 vaddr = elf_ppnt->p_vaddr;
784                 if (loc->elf_ex.e_type == ET_EXEC || load_addr_set) {
785                         elf_flags |= MAP_FIXED;
786                 } else if (loc->elf_ex.e_type == ET_DYN) {
787                         /* Try and get dynamic programs out of the way of the
788                          * default mmap base, as well as whatever program they
789                          * might try to exec.  This is because the brk will
790                          * follow the loader, and is not movable.  */
791 #ifdef CONFIG_ARCH_BINFMT_ELF_RANDOMIZE_PIE
792                         /* Memory randomization might have been switched off
793                          * in runtime via sysctl.
794                          * If that is the case, retain the original non-zero
795                          * load_bias value in order to establish proper
796                          * non-randomized mappings.
797                          */
798                         if (current->flags & PF_RANDOMIZE)
799                                 load_bias = 0;
800                         else
801                                 load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
802 #else
803                         load_bias = ELF_PAGESTART(ELF_ET_DYN_BASE - vaddr);
804 #endif
805                 }
806
807                 error = elf_map(bprm->file, load_bias + vaddr, elf_ppnt,
808                                 elf_prot, elf_flags, 0);
809                 if (BAD_ADDR(error)) {
810                         send_sig(SIGKILL, current, 0);
811                         retval = IS_ERR((void *)error) ?
812                                 PTR_ERR((void*)error) : -EINVAL;
813                         goto out_free_dentry;
814                 }
815
816                 if (!load_addr_set) {
817                         load_addr_set = 1;
818                         load_addr = (elf_ppnt->p_vaddr - elf_ppnt->p_offset);
819                         if (loc->elf_ex.e_type == ET_DYN) {
820                                 load_bias += error -
821                                              ELF_PAGESTART(load_bias + vaddr);
822                                 load_addr += load_bias;
823                                 reloc_func_desc = load_bias;
824                         }
825                 }
826                 k = elf_ppnt->p_vaddr;
827                 if (k < start_code)
828                         start_code = k;
829                 if (start_data < k)
830                         start_data = k;
831
832                 /*
833                  * Check to see if the section's size will overflow the
834                  * allowed task size. Note that p_filesz must always be
835                  * <= p_memsz so it is only necessary to check p_memsz.
836                  */
837                 if (BAD_ADDR(k) || elf_ppnt->p_filesz > elf_ppnt->p_memsz ||
838                     elf_ppnt->p_memsz > TASK_SIZE ||
839                     TASK_SIZE - elf_ppnt->p_memsz < k) {
840                         /* set_brk can never work. Avoid overflows. */
841                         send_sig(SIGKILL, current, 0);
842                         retval = -EINVAL;
843                         goto out_free_dentry;
844                 }
845
846                 k = elf_ppnt->p_vaddr + elf_ppnt->p_filesz;
847
848                 if (k > elf_bss)
849                         elf_bss = k;
850                 if ((elf_ppnt->p_flags & PF_X) && end_code < k)
851                         end_code = k;
852                 if (end_data < k)
853                         end_data = k;
854                 k = elf_ppnt->p_vaddr + elf_ppnt->p_memsz;
855                 if (k > elf_brk)
856                         elf_brk = k;
857         }
858
859         loc->elf_ex.e_entry += load_bias;
860         elf_bss += load_bias;
861         elf_brk += load_bias;
862         start_code += load_bias;
863         end_code += load_bias;
864         start_data += load_bias;
865         end_data += load_bias;
866
867         /* Calling set_brk effectively mmaps the pages that we need
868          * for the bss and break sections.  We must do this before
869          * mapping in the interpreter, to make sure it doesn't wind
870          * up getting placed where the bss needs to go.
871          */
872         retval = set_brk(elf_bss, elf_brk);
873         if (retval) {
874                 send_sig(SIGKILL, current, 0);
875                 goto out_free_dentry;
876         }
877         if (likely(elf_bss != elf_brk) && unlikely(padzero(elf_bss))) {
878                 send_sig(SIGSEGV, current, 0);
879                 retval = -EFAULT; /* Nobody gets to see this, but.. */
880                 goto out_free_dentry;
881         }
882
883         if (elf_interpreter) {
884                 unsigned long uninitialized_var(interp_map_addr);
885
886                 elf_entry = load_elf_interp(&loc->interp_elf_ex,
887                                             interpreter,
888                                             &interp_map_addr,
889                                             load_bias);
890                 if (!IS_ERR((void *)elf_entry)) {
891                         /*
892                          * load_elf_interp() returns relocation
893                          * adjustment
894                          */
895                         interp_load_addr = elf_entry;
896                         elf_entry += loc->interp_elf_ex.e_entry;
897                 }
898                 if (BAD_ADDR(elf_entry)) {
899                         force_sig(SIGSEGV, current);
900                         retval = IS_ERR((void *)elf_entry) ?
901                                         (int)elf_entry : -EINVAL;
902                         goto out_free_dentry;
903                 }
904                 reloc_func_desc = interp_load_addr;
905
906                 allow_write_access(interpreter);
907                 fput(interpreter);
908                 kfree(elf_interpreter);
909         } else {
910                 elf_entry = loc->elf_ex.e_entry;
911                 if (BAD_ADDR(elf_entry)) {
912                         force_sig(SIGSEGV, current);
913                         retval = -EINVAL;
914                         goto out_free_dentry;
915                 }
916         }
917
918         kfree(elf_phdata);
919
920         set_binfmt(&elf_format);
921
922 #ifdef ARCH_HAS_SETUP_ADDITIONAL_PAGES
923         retval = arch_setup_additional_pages(bprm, !!elf_interpreter);
924         if (retval < 0) {
925                 send_sig(SIGKILL, current, 0);
926                 goto out;
927         }
928 #endif /* ARCH_HAS_SETUP_ADDITIONAL_PAGES */
929
930         install_exec_creds(bprm);
931         retval = create_elf_tables(bprm, &loc->elf_ex,
932                           load_addr, interp_load_addr);
933         if (retval < 0) {
934                 send_sig(SIGKILL, current, 0);
935                 goto out;
936         }
937         /* N.B. passed_fileno might not be initialized? */
938         current->mm->end_code = end_code;
939         current->mm->start_code = start_code;
940         current->mm->start_data = start_data;
941         current->mm->end_data = end_data;
942         current->mm->start_stack = bprm->p;
943
944 #ifdef arch_randomize_brk
945         if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) {
946                 current->mm->brk = current->mm->start_brk =
947                         arch_randomize_brk(current->mm);
948 #ifdef CONFIG_COMPAT_BRK
949                 current->brk_randomized = 1;
950 #endif
951         }
952 #endif
953
954         if (current->personality & MMAP_PAGE_ZERO) {
955                 /* Why this, you ask???  Well SVr4 maps page 0 as read-only,
956                    and some applications "depend" upon this behavior.
957                    Since we do not have the power to recompile these, we
958                    emulate the SVr4 behavior. Sigh. */
959                 error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC,
960                                 MAP_FIXED | MAP_PRIVATE, 0);
961         }
962
963 #ifdef ELF_PLAT_INIT
964         /*
965          * The ABI may specify that certain registers be set up in special
966          * ways (on i386 %edx is the address of a DT_FINI function, for
967          * example.  In addition, it may also specify (eg, PowerPC64 ELF)
968          * that the e_entry field is the address of the function descriptor
969          * for the startup routine, rather than the address of the startup
970          * routine itself.  This macro performs whatever initialization to
971          * the regs structure is required as well as any relocations to the
972          * function descriptor entries when executing dynamically links apps.
973          */
974         ELF_PLAT_INIT(regs, reloc_func_desc);
975 #endif
976
977         start_thread(regs, elf_entry, bprm->p);
978         retval = 0;
979 out:
980         kfree(loc);
981 out_ret:
982         return retval;
983
984         /* error cleanup */
985 out_free_dentry:
986         allow_write_access(interpreter);
987         if (interpreter)
988                 fput(interpreter);
989 out_free_interp:
990         kfree(elf_interpreter);
991 out_free_ph:
992         kfree(elf_phdata);
993         goto out;
994 }
995
996 /* This is really simpleminded and specialized - we are loading an
997    a.out library that is given an ELF header. */
998 static int load_elf_library(struct file *file)
999 {
1000         struct elf_phdr *elf_phdata;
1001         struct elf_phdr *eppnt;
1002         unsigned long elf_bss, bss, len;
1003         int retval, error, i, j;
1004         struct elfhdr elf_ex;
1005
1006         error = -ENOEXEC;
1007         retval = kernel_read(file, 0, (char *)&elf_ex, sizeof(elf_ex));
1008         if (retval != sizeof(elf_ex))
1009                 goto out;
1010
1011         if (memcmp(elf_ex.e_ident, ELFMAG, SELFMAG) != 0)
1012                 goto out;
1013
1014         /* First of all, some simple consistency checks */
1015         if (elf_ex.e_type != ET_EXEC || elf_ex.e_phnum > 2 ||
1016             !elf_check_arch(&elf_ex) || !file->f_op || !file->f_op->mmap)
1017                 goto out;
1018
1019         /* Now read in all of the header information */
1020
1021         j = sizeof(struct elf_phdr) * elf_ex.e_phnum;
1022         /* j < ELF_MIN_ALIGN because elf_ex.e_phnum <= 2 */
1023
1024         error = -ENOMEM;
1025         elf_phdata = kmalloc(j, GFP_KERNEL);
1026         if (!elf_phdata)
1027                 goto out;
1028
1029         eppnt = elf_phdata;
1030         error = -ENOEXEC;
1031         retval = kernel_read(file, elf_ex.e_phoff, (char *)eppnt, j);
1032         if (retval != j)
1033                 goto out_free_ph;
1034
1035         for (j = 0, i = 0; i<elf_ex.e_phnum; i++)
1036                 if ((eppnt + i)->p_type == PT_LOAD)
1037                         j++;
1038         if (j != 1)
1039                 goto out_free_ph;
1040
1041         while (eppnt->p_type != PT_LOAD)
1042                 eppnt++;
1043
1044         /* Now use mmap to map the library into memory. */
1045         error = vm_mmap(file,
1046                         ELF_PAGESTART(eppnt->p_vaddr),
1047                         (eppnt->p_filesz +
1048                          ELF_PAGEOFFSET(eppnt->p_vaddr)),
1049                         PROT_READ | PROT_WRITE | PROT_EXEC,
1050                         MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE,
1051                         (eppnt->p_offset -
1052                          ELF_PAGEOFFSET(eppnt->p_vaddr)));
1053         if (error != ELF_PAGESTART(eppnt->p_vaddr))
1054                 goto out_free_ph;
1055
1056         elf_bss = eppnt->p_vaddr + eppnt->p_filesz;
1057         if (padzero(elf_bss)) {
1058                 error = -EFAULT;
1059                 goto out_free_ph;
1060         }
1061
1062         len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
1063                             ELF_MIN_ALIGN - 1);
1064         bss = eppnt->p_memsz + eppnt->p_vaddr;
1065         if (bss > len)
1066                 vm_brk(len, bss - len);
1067         error = 0;
1068
1069 out_free_ph:
1070         kfree(elf_phdata);
1071 out:
1072         return error;
1073 }
1074
1075 #ifdef CONFIG_ELF_CORE
1076 /*
1077  * ELF core dumper
1078  *
1079  * Modelled on fs/exec.c:aout_core_dump()
1080  * Jeremy Fitzhardinge <jeremy@sw.oz.au>
1081  */
1082
1083 /*
1084  * The purpose of always_dump_vma() is to make sure that special kernel mappings
1085  * that are useful for post-mortem analysis are included in every core dump.
1086  * In that way we ensure that the core dump is fully interpretable later
1087  * without matching up the same kernel and hardware config to see what PC values
1088  * meant. These special mappings include - vDSO, vsyscall, and other
1089  * architecture specific mappings
1090  */
1091 static bool always_dump_vma(struct vm_area_struct *vma)
1092 {
1093         /* Any vsyscall mappings? */
1094         if (vma == get_gate_vma(vma->vm_mm))
1095                 return true;
1096         /*
1097          * arch_vma_name() returns non-NULL for special architecture mappings,
1098          * such as vDSO sections.
1099          */
1100         if (arch_vma_name(vma))
1101                 return true;
1102
1103         return false;
1104 }
1105
1106 /*
1107  * Decide what to dump of a segment, part, all or none.
1108  */
1109 static unsigned long vma_dump_size(struct vm_area_struct *vma,
1110                                    unsigned long mm_flags)
1111 {
1112 #define FILTER(type)    (mm_flags & (1UL << MMF_DUMP_##type))
1113
1114         /* always dump the vdso and vsyscall sections */
1115         if (always_dump_vma(vma))
1116                 goto whole;
1117
1118         if (vma->vm_flags & VM_NODUMP)
1119                 return 0;
1120
1121         /* Hugetlb memory check */
1122         if (vma->vm_flags & VM_HUGETLB) {
1123                 if ((vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_SHARED))
1124                         goto whole;
1125                 if (!(vma->vm_flags & VM_SHARED) && FILTER(HUGETLB_PRIVATE))
1126                         goto whole;
1127         }
1128
1129         /* Do not dump I/O mapped devices or special mappings */
1130         if (vma->vm_flags & (VM_IO | VM_RESERVED))
1131                 return 0;
1132
1133         /* By default, dump shared memory if mapped from an anonymous file. */
1134         if (vma->vm_flags & VM_SHARED) {
1135                 if (vma->vm_file->f_path.dentry->d_inode->i_nlink == 0 ?
1136                     FILTER(ANON_SHARED) : FILTER(MAPPED_SHARED))
1137                         goto whole;
1138                 return 0;
1139         }
1140
1141         /* Dump segments that have been written to.  */
1142         if (vma->anon_vma && FILTER(ANON_PRIVATE))
1143                 goto whole;
1144         if (vma->vm_file == NULL)
1145                 return 0;
1146
1147         if (FILTER(MAPPED_PRIVATE))
1148                 goto whole;
1149
1150         /*
1151          * If this looks like the beginning of a DSO or executable mapping,
1152          * check for an ELF header.  If we find one, dump the first page to
1153          * aid in determining what was mapped here.
1154          */
1155         if (FILTER(ELF_HEADERS) &&
1156             vma->vm_pgoff == 0 && (vma->vm_flags & VM_READ)) {
1157                 u32 __user *header = (u32 __user *) vma->vm_start;
1158                 u32 word;
1159                 mm_segment_t fs = get_fs();
1160                 /*
1161                  * Doing it this way gets the constant folded by GCC.
1162                  */
1163                 union {
1164                         u32 cmp;
1165                         char elfmag[SELFMAG];
1166                 } magic;
1167                 BUILD_BUG_ON(SELFMAG != sizeof word);
1168                 magic.elfmag[EI_MAG0] = ELFMAG0;
1169                 magic.elfmag[EI_MAG1] = ELFMAG1;
1170                 magic.elfmag[EI_MAG2] = ELFMAG2;
1171                 magic.elfmag[EI_MAG3] = ELFMAG3;
1172                 /*
1173                  * Switch to the user "segment" for get_user(),
1174                  * then put back what elf_core_dump() had in place.
1175                  */
1176                 set_fs(USER_DS);
1177                 if (unlikely(get_user(word, header)))
1178                         word = 0;
1179                 set_fs(fs);
1180                 if (word == magic.cmp)
1181                         return PAGE_SIZE;
1182         }
1183
1184 #undef  FILTER
1185
1186         return 0;
1187
1188 whole:
1189         return vma->vm_end - vma->vm_start;
1190 }
1191
1192 /* An ELF note in memory */
1193 struct memelfnote
1194 {
1195         const char *name;
1196         int type;
1197         unsigned int datasz;
1198         void *data;
1199 };
1200
1201 static int notesize(struct memelfnote *en)
1202 {
1203         int sz;
1204
1205         sz = sizeof(struct elf_note);
1206         sz += roundup(strlen(en->name) + 1, 4);
1207         sz += roundup(en->datasz, 4);
1208
1209         return sz;
1210 }
1211
1212 #define DUMP_WRITE(addr, nr, foffset)   \
1213         do { if (!dump_write(file, (addr), (nr))) return 0; *foffset += (nr); } while(0)
1214
1215 static int alignfile(struct file *file, loff_t *foffset)
1216 {
1217         static const char buf[4] = { 0, };
1218         DUMP_WRITE(buf, roundup(*foffset, 4) - *foffset, foffset);
1219         return 1;
1220 }
1221
1222 static int writenote(struct memelfnote *men, struct file *file,
1223                         loff_t *foffset)
1224 {
1225         struct elf_note en;
1226         en.n_namesz = strlen(men->name) + 1;
1227         en.n_descsz = men->datasz;
1228         en.n_type = men->type;
1229
1230         DUMP_WRITE(&en, sizeof(en), foffset);
1231         DUMP_WRITE(men->name, en.n_namesz, foffset);
1232         if (!alignfile(file, foffset))
1233                 return 0;
1234         DUMP_WRITE(men->data, men->datasz, foffset);
1235         if (!alignfile(file, foffset))
1236                 return 0;
1237
1238         return 1;
1239 }
1240 #undef DUMP_WRITE
1241
1242 static void fill_elf_header(struct elfhdr *elf, int segs,
1243                             u16 machine, u32 flags, u8 osabi)
1244 {
1245         memset(elf, 0, sizeof(*elf));
1246
1247         memcpy(elf->e_ident, ELFMAG, SELFMAG);
1248         elf->e_ident[EI_CLASS] = ELF_CLASS;
1249         elf->e_ident[EI_DATA] = ELF_DATA;
1250         elf->e_ident[EI_VERSION] = EV_CURRENT;
1251         elf->e_ident[EI_OSABI] = ELF_OSABI;
1252
1253         elf->e_type = ET_CORE;
1254         elf->e_machine = machine;
1255         elf->e_version = EV_CURRENT;
1256         elf->e_phoff = sizeof(struct elfhdr);
1257         elf->e_flags = flags;
1258         elf->e_ehsize = sizeof(struct elfhdr);
1259         elf->e_phentsize = sizeof(struct elf_phdr);
1260         elf->e_phnum = segs;
1261
1262         return;
1263 }
1264
1265 static void fill_elf_note_phdr(struct elf_phdr *phdr, int sz, loff_t offset)
1266 {
1267         phdr->p_type = PT_NOTE;
1268         phdr->p_offset = offset;
1269         phdr->p_vaddr = 0;
1270         phdr->p_paddr = 0;
1271         phdr->p_filesz = sz;
1272         phdr->p_memsz = 0;
1273         phdr->p_flags = 0;
1274         phdr->p_align = 0;
1275         return;
1276 }
1277
1278 static void fill_note(struct memelfnote *note, const char *name, int type, 
1279                 unsigned int sz, void *data)
1280 {
1281         note->name = name;
1282         note->type = type;
1283         note->datasz = sz;
1284         note->data = data;
1285         return;
1286 }
1287
1288 /*
1289  * fill up all the fields in prstatus from the given task struct, except
1290  * registers which need to be filled up separately.
1291  */
1292 static void fill_prstatus(struct elf_prstatus *prstatus,
1293                 struct task_struct *p, long signr)
1294 {
1295         prstatus->pr_info.si_signo = prstatus->pr_cursig = signr;
1296         prstatus->pr_sigpend = p->pending.signal.sig[0];
1297         prstatus->pr_sighold = p->blocked.sig[0];
1298         rcu_read_lock();
1299         prstatus->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1300         rcu_read_unlock();
1301         prstatus->pr_pid = task_pid_vnr(p);
1302         prstatus->pr_pgrp = task_pgrp_vnr(p);
1303         prstatus->pr_sid = task_session_vnr(p);
1304         if (thread_group_leader(p)) {
1305                 struct task_cputime cputime;
1306
1307                 /*
1308                  * This is the record for the group leader.  It shows the
1309                  * group-wide total, not its individual thread total.
1310                  */
1311                 thread_group_cputime(p, &cputime);
1312                 cputime_to_timeval(cputime.utime, &prstatus->pr_utime);
1313                 cputime_to_timeval(cputime.stime, &prstatus->pr_stime);
1314         } else {
1315                 cputime_to_timeval(p->utime, &prstatus->pr_utime);
1316                 cputime_to_timeval(p->stime, &prstatus->pr_stime);
1317         }
1318         cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime);
1319         cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime);
1320 }
1321
1322 static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p,
1323                        struct mm_struct *mm)
1324 {
1325         const struct cred *cred;
1326         unsigned int i, len;
1327         
1328         /* first copy the parameters from user space */
1329         memset(psinfo, 0, sizeof(struct elf_prpsinfo));
1330
1331         len = mm->arg_end - mm->arg_start;
1332         if (len >= ELF_PRARGSZ)
1333                 len = ELF_PRARGSZ-1;
1334         if (copy_from_user(&psinfo->pr_psargs,
1335                            (const char __user *)mm->arg_start, len))
1336                 return -EFAULT;
1337         for(i = 0; i < len; i++)
1338                 if (psinfo->pr_psargs[i] == 0)
1339                         psinfo->pr_psargs[i] = ' ';
1340         psinfo->pr_psargs[len] = 0;
1341
1342         rcu_read_lock();
1343         psinfo->pr_ppid = task_pid_vnr(rcu_dereference(p->real_parent));
1344         rcu_read_unlock();
1345         psinfo->pr_pid = task_pid_vnr(p);
1346         psinfo->pr_pgrp = task_pgrp_vnr(p);
1347         psinfo->pr_sid = task_session_vnr(p);
1348
1349         i = p->state ? ffz(~p->state) + 1 : 0;
1350         psinfo->pr_state = i;
1351         psinfo->pr_sname = (i > 5) ? '.' : "RSDTZW"[i];
1352         psinfo->pr_zomb = psinfo->pr_sname == 'Z';
1353         psinfo->pr_nice = task_nice(p);
1354         psinfo->pr_flag = p->flags;
1355         rcu_read_lock();
1356         cred = __task_cred(p);
1357         SET_UID(psinfo->pr_uid, from_kuid_munged(cred->user_ns, cred->uid));
1358         SET_GID(psinfo->pr_gid, from_kgid_munged(cred->user_ns, cred->gid));
1359         rcu_read_unlock();
1360         strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname));
1361         
1362         return 0;
1363 }
1364
1365 static void fill_auxv_note(struct memelfnote *note, struct mm_struct *mm)
1366 {
1367         elf_addr_t *auxv = (elf_addr_t *) mm->saved_auxv;
1368         int i = 0;
1369         do
1370                 i += 2;
1371         while (auxv[i - 2] != AT_NULL);
1372         fill_note(note, "CORE", NT_AUXV, i * sizeof(elf_addr_t), auxv);
1373 }
1374
1375 #ifdef CORE_DUMP_USE_REGSET
1376 #include <linux/regset.h>
1377
1378 struct elf_thread_core_info {
1379         struct elf_thread_core_info *next;
1380         struct task_struct *task;
1381         struct elf_prstatus prstatus;
1382         struct memelfnote notes[0];
1383 };
1384
1385 struct elf_note_info {
1386         struct elf_thread_core_info *thread;
1387         struct memelfnote psinfo;
1388         struct memelfnote auxv;
1389         size_t size;
1390         int thread_notes;
1391 };
1392
1393 /*
1394  * When a regset has a writeback hook, we call it on each thread before
1395  * dumping user memory.  On register window machines, this makes sure the
1396  * user memory backing the register data is up to date before we read it.
1397  */
1398 static void do_thread_regset_writeback(struct task_struct *task,
1399                                        const struct user_regset *regset)
1400 {
1401         if (regset->writeback)
1402                 regset->writeback(task, regset, 1);
1403 }
1404
1405 #ifndef PR_REG_SIZE
1406 #define PR_REG_SIZE(S) sizeof(S)
1407 #endif
1408
1409 #ifndef PRSTATUS_SIZE
1410 #define PRSTATUS_SIZE(S) sizeof(S)
1411 #endif
1412
1413 #ifndef PR_REG_PTR
1414 #define PR_REG_PTR(S) (&((S)->pr_reg))
1415 #endif
1416
1417 #ifndef SET_PR_FPVALID
1418 #define SET_PR_FPVALID(S, V) ((S)->pr_fpvalid = (V))
1419 #endif
1420
1421 static int fill_thread_core_info(struct elf_thread_core_info *t,
1422                                  const struct user_regset_view *view,
1423                                  long signr, size_t *total)
1424 {
1425         unsigned int i;
1426
1427         /*
1428          * NT_PRSTATUS is the one special case, because the regset data
1429          * goes into the pr_reg field inside the note contents, rather
1430          * than being the whole note contents.  We fill the reset in here.
1431          * We assume that regset 0 is NT_PRSTATUS.
1432          */
1433         fill_prstatus(&t->prstatus, t->task, signr);
1434         (void) view->regsets[0].get(t->task, &view->regsets[0],
1435                                     0, PR_REG_SIZE(t->prstatus.pr_reg),
1436                                     PR_REG_PTR(&t->prstatus), NULL);
1437
1438         fill_note(&t->notes[0], "CORE", NT_PRSTATUS,
1439                   PRSTATUS_SIZE(t->prstatus), &t->prstatus);
1440         *total += notesize(&t->notes[0]);
1441
1442         do_thread_regset_writeback(t->task, &view->regsets[0]);
1443
1444         /*
1445          * Each other regset might generate a note too.  For each regset
1446          * that has no core_note_type or is inactive, we leave t->notes[i]
1447          * all zero and we'll know to skip writing it later.
1448          */
1449         for (i = 1; i < view->n; ++i) {
1450                 const struct user_regset *regset = &view->regsets[i];
1451                 do_thread_regset_writeback(t->task, regset);
1452                 if (regset->core_note_type && regset->get &&
1453                     (!regset->active || regset->active(t->task, regset))) {
1454                         int ret;
1455                         size_t size = regset->n * regset->size;
1456                         void *data = kmalloc(size, GFP_KERNEL);
1457                         if (unlikely(!data))
1458                                 return 0;
1459                         ret = regset->get(t->task, regset,
1460                                           0, size, data, NULL);
1461                         if (unlikely(ret))
1462                                 kfree(data);
1463                         else {
1464                                 if (regset->core_note_type != NT_PRFPREG)
1465                                         fill_note(&t->notes[i], "LINUX",
1466                                                   regset->core_note_type,
1467                                                   size, data);
1468                                 else {
1469                                         SET_PR_FPVALID(&t->prstatus, 1);
1470                                         fill_note(&t->notes[i], "CORE",
1471                                                   NT_PRFPREG, size, data);
1472                                 }
1473                                 *total += notesize(&t->notes[i]);
1474                         }
1475                 }
1476         }
1477
1478         return 1;
1479 }
1480
1481 static int fill_note_info(struct elfhdr *elf, int phdrs,
1482                           struct elf_note_info *info,
1483                           long signr, struct pt_regs *regs)
1484 {
1485         struct task_struct *dump_task = current;
1486         const struct user_regset_view *view = task_user_regset_view(dump_task);
1487         struct elf_thread_core_info *t;
1488         struct elf_prpsinfo *psinfo;
1489         struct core_thread *ct;
1490         unsigned int i;
1491
1492         info->size = 0;
1493         info->thread = NULL;
1494
1495         psinfo = kmalloc(sizeof(*psinfo), GFP_KERNEL);
1496         if (psinfo == NULL)
1497                 return 0;
1498
1499         fill_note(&info->psinfo, "CORE", NT_PRPSINFO, sizeof(*psinfo), psinfo);
1500
1501         /*
1502          * Figure out how many notes we're going to need for each thread.
1503          */
1504         info->thread_notes = 0;
1505         for (i = 0; i < view->n; ++i)
1506                 if (view->regsets[i].core_note_type != 0)
1507                         ++info->thread_notes;
1508
1509         /*
1510          * Sanity check.  We rely on regset 0 being in NT_PRSTATUS,
1511          * since it is our one special case.
1512          */
1513         if (unlikely(info->thread_notes == 0) ||
1514             unlikely(view->regsets[0].core_note_type != NT_PRSTATUS)) {
1515                 WARN_ON(1);
1516                 return 0;
1517         }
1518
1519         /*
1520          * Initialize the ELF file header.
1521          */
1522         fill_elf_header(elf, phdrs,
1523                         view->e_machine, view->e_flags, view->ei_osabi);
1524
1525         /*
1526          * Allocate a structure for each thread.
1527          */
1528         for (ct = &dump_task->mm->core_state->dumper; ct; ct = ct->next) {
1529                 t = kzalloc(offsetof(struct elf_thread_core_info,
1530                                      notes[info->thread_notes]),
1531                             GFP_KERNEL);
1532                 if (unlikely(!t))
1533                         return 0;
1534
1535                 t->task = ct->task;
1536                 if (ct->task == dump_task || !info->thread) {
1537                         t->next = info->thread;
1538                         info->thread = t;
1539                 } else {
1540                         /*
1541                          * Make sure to keep the original task at
1542                          * the head of the list.
1543                          */
1544                         t->next = info->thread->next;
1545                         info->thread->next = t;
1546                 }
1547         }
1548
1549         /*
1550          * Now fill in each thread's information.
1551          */
1552         for (t = info->thread; t != NULL; t = t->next)
1553                 if (!fill_thread_core_info(t, view, signr, &info->size))
1554                         return 0;
1555
1556         /*
1557          * Fill in the two process-wide notes.
1558          */
1559         fill_psinfo(psinfo, dump_task->group_leader, dump_task->mm);
1560         info->size += notesize(&info->psinfo);
1561
1562         fill_auxv_note(&info->auxv, current->mm);
1563         info->size += notesize(&info->auxv);
1564
1565         return 1;
1566 }
1567
1568 static size_t get_note_info_size(struct elf_note_info *info)
1569 {
1570         return info->size;
1571 }
1572
1573 /*
1574  * Write all the notes for each thread.  When writing the first thread, the
1575  * process-wide notes are interleaved after the first thread-specific note.
1576  */
1577 static int write_note_info(struct elf_note_info *info,
1578                            struct file *file, loff_t *foffset)
1579 {
1580         bool first = 1;
1581         struct elf_thread_core_info *t = info->thread;
1582
1583         do {
1584                 int i;
1585
1586                 if (!writenote(&t->notes[0], file, foffset))
1587                         return 0;
1588
1589                 if (first && !writenote(&info->psinfo, file, foffset))
1590                         return 0;
1591                 if (first && !writenote(&info->auxv, file, foffset))
1592                         return 0;
1593
1594                 for (i = 1; i < info->thread_notes; ++i)
1595                         if (t->notes[i].data &&
1596                             !writenote(&t->notes[i], file, foffset))
1597                                 return 0;
1598
1599                 first = 0;
1600                 t = t->next;
1601         } while (t);
1602
1603         return 1;
1604 }
1605
1606 static void free_note_info(struct elf_note_info *info)
1607 {
1608         struct elf_thread_core_info *threads = info->thread;
1609         while (threads) {
1610                 unsigned int i;
1611                 struct elf_thread_core_info *t = threads;
1612                 threads = t->next;
1613                 WARN_ON(t->notes[0].data && t->notes[0].data != &t->prstatus);
1614                 for (i = 1; i < info->thread_notes; ++i)
1615                         kfree(t->notes[i].data);
1616                 kfree(t);
1617         }
1618         kfree(info->psinfo.data);
1619 }
1620
1621 #else
1622
1623 /* Here is the structure in which status of each thread is captured. */
1624 struct elf_thread_status
1625 {
1626         struct list_head list;
1627         struct elf_prstatus prstatus;   /* NT_PRSTATUS */
1628         elf_fpregset_t fpu;             /* NT_PRFPREG */
1629         struct task_struct *thread;
1630 #ifdef ELF_CORE_COPY_XFPREGS
1631         elf_fpxregset_t xfpu;           /* ELF_CORE_XFPREG_TYPE */
1632 #endif
1633         struct memelfnote notes[3];
1634         int num_notes;
1635 };
1636
1637 /*
1638  * In order to add the specific thread information for the elf file format,
1639  * we need to keep a linked list of every threads pr_status and then create
1640  * a single section for them in the final core file.
1641  */
1642 static int elf_dump_thread_status(long signr, struct elf_thread_status *t)
1643 {
1644         int sz = 0;
1645         struct task_struct *p = t->thread;
1646         t->num_notes = 0;
1647
1648         fill_prstatus(&t->prstatus, p, signr);
1649         elf_core_copy_task_regs(p, &t->prstatus.pr_reg);        
1650         
1651         fill_note(&t->notes[0], "CORE", NT_PRSTATUS, sizeof(t->prstatus),
1652                   &(t->prstatus));
1653         t->num_notes++;
1654         sz += notesize(&t->notes[0]);
1655
1656         if ((t->prstatus.pr_fpvalid = elf_core_copy_task_fpregs(p, NULL,
1657                                                                 &t->fpu))) {
1658                 fill_note(&t->notes[1], "CORE", NT_PRFPREG, sizeof(t->fpu),
1659                           &(t->fpu));
1660                 t->num_notes++;
1661                 sz += notesize(&t->notes[1]);
1662         }
1663
1664 #ifdef ELF_CORE_COPY_XFPREGS
1665         if (elf_core_copy_task_xfpregs(p, &t->xfpu)) {
1666                 fill_note(&t->notes[2], "LINUX", ELF_CORE_XFPREG_TYPE,
1667                           sizeof(t->xfpu), &t->xfpu);
1668                 t->num_notes++;
1669                 sz += notesize(&t->notes[2]);
1670         }
1671 #endif  
1672         return sz;
1673 }
1674
1675 struct elf_note_info {
1676         struct memelfnote *notes;
1677         struct elf_prstatus *prstatus;  /* NT_PRSTATUS */
1678         struct elf_prpsinfo *psinfo;    /* NT_PRPSINFO */
1679         struct list_head thread_list;
1680         elf_fpregset_t *fpu;
1681 #ifdef ELF_CORE_COPY_XFPREGS
1682         elf_fpxregset_t *xfpu;
1683 #endif
1684         int thread_status_size;
1685         int numnote;
1686 };
1687
1688 static int elf_note_info_init(struct elf_note_info *info)
1689 {
1690         memset(info, 0, sizeof(*info));
1691         INIT_LIST_HEAD(&info->thread_list);
1692
1693         /* Allocate space for six ELF notes */
1694         info->notes = kmalloc(6 * sizeof(struct memelfnote), GFP_KERNEL);
1695         if (!info->notes)
1696                 return 0;
1697         info->psinfo = kmalloc(sizeof(*info->psinfo), GFP_KERNEL);
1698         if (!info->psinfo)
1699                 goto notes_free;
1700         info->prstatus = kmalloc(sizeof(*info->prstatus), GFP_KERNEL);
1701         if (!info->prstatus)
1702                 goto psinfo_free;
1703         info->fpu = kmalloc(sizeof(*info->fpu), GFP_KERNEL);
1704         if (!info->fpu)
1705                 goto prstatus_free;
1706 #ifdef ELF_CORE_COPY_XFPREGS
1707         info->xfpu = kmalloc(sizeof(*info->xfpu), GFP_KERNEL);
1708         if (!info->xfpu)
1709                 goto fpu_free;
1710 #endif
1711         return 1;
1712 #ifdef ELF_CORE_COPY_XFPREGS
1713  fpu_free:
1714         kfree(info->fpu);
1715 #endif
1716  prstatus_free:
1717         kfree(info->prstatus);
1718  psinfo_free:
1719         kfree(info->psinfo);
1720  notes_free:
1721         kfree(info->notes);
1722         return 0;
1723 }
1724
1725 static int fill_note_info(struct elfhdr *elf, int phdrs,
1726                           struct elf_note_info *info,
1727                           long signr, struct pt_regs *regs)
1728 {
1729         struct list_head *t;
1730
1731         if (!elf_note_info_init(info))
1732                 return 0;
1733
1734         if (signr) {
1735                 struct core_thread *ct;
1736                 struct elf_thread_status *ets;
1737
1738                 for (ct = current->mm->core_state->dumper.next;
1739                                                 ct; ct = ct->next) {
1740                         ets = kzalloc(sizeof(*ets), GFP_KERNEL);
1741                         if (!ets)
1742                                 return 0;
1743
1744                         ets->thread = ct->task;
1745                         list_add(&ets->list, &info->thread_list);
1746                 }
1747
1748                 list_for_each(t, &info->thread_list) {
1749                         int sz;
1750
1751                         ets = list_entry(t, struct elf_thread_status, list);
1752                         sz = elf_dump_thread_status(signr, ets);
1753                         info->thread_status_size += sz;
1754                 }
1755         }
1756         /* now collect the dump for the current */
1757         memset(info->prstatus, 0, sizeof(*info->prstatus));
1758         fill_prstatus(info->prstatus, current, signr);
1759         elf_core_copy_regs(&info->prstatus->pr_reg, regs);
1760
1761         /* Set up header */
1762         fill_elf_header(elf, phdrs, ELF_ARCH, ELF_CORE_EFLAGS, ELF_OSABI);
1763
1764         /*
1765          * Set up the notes in similar form to SVR4 core dumps made
1766          * with info from their /proc.
1767          */
1768
1769         fill_note(info->notes + 0, "CORE", NT_PRSTATUS,
1770                   sizeof(*info->prstatus), info->prstatus);
1771         fill_psinfo(info->psinfo, current->group_leader, current->mm);
1772         fill_note(info->notes + 1, "CORE", NT_PRPSINFO,
1773                   sizeof(*info->psinfo), info->psinfo);
1774
1775         info->numnote = 2;
1776
1777         fill_auxv_note(&info->notes[info->numnote++], current->mm);
1778
1779         /* Try to dump the FPU. */
1780         info->prstatus->pr_fpvalid = elf_core_copy_task_fpregs(current, regs,
1781                                                                info->fpu);
1782         if (info->prstatus->pr_fpvalid)
1783                 fill_note(info->notes + info->numnote++,
1784                           "CORE", NT_PRFPREG, sizeof(*info->fpu), info->fpu);
1785 #ifdef ELF_CORE_COPY_XFPREGS
1786         if (elf_core_copy_task_xfpregs(current, info->xfpu))
1787                 fill_note(info->notes + info->numnote++,
1788                           "LINUX", ELF_CORE_XFPREG_TYPE,
1789                           sizeof(*info->xfpu), info->xfpu);
1790 #endif
1791
1792         return 1;
1793 }
1794
1795 static size_t get_note_info_size(struct elf_note_info *info)
1796 {
1797         int sz = 0;
1798         int i;
1799
1800         for (i = 0; i < info->numnote; i++)
1801                 sz += notesize(info->notes + i);
1802
1803         sz += info->thread_status_size;
1804
1805         return sz;
1806 }
1807
1808 static int write_note_info(struct elf_note_info *info,
1809                            struct file *file, loff_t *foffset)
1810 {
1811         int i;
1812         struct list_head *t;
1813
1814         for (i = 0; i < info->numnote; i++)
1815                 if (!writenote(info->notes + i, file, foffset))
1816                         return 0;
1817
1818         /* write out the thread status notes section */
1819         list_for_each(t, &info->thread_list) {
1820                 struct elf_thread_status *tmp =
1821                                 list_entry(t, struct elf_thread_status, list);
1822
1823                 for (i = 0; i < tmp->num_notes; i++)
1824                         if (!writenote(&tmp->notes[i], file, foffset))
1825                                 return 0;
1826         }
1827
1828         return 1;
1829 }
1830
1831 static void free_note_info(struct elf_note_info *info)
1832 {
1833         while (!list_empty(&info->thread_list)) {
1834                 struct list_head *tmp = info->thread_list.next;
1835                 list_del(tmp);
1836                 kfree(list_entry(tmp, struct elf_thread_status, list));
1837         }
1838
1839         kfree(info->prstatus);
1840         kfree(info->psinfo);
1841         kfree(info->notes);
1842         kfree(info->fpu);
1843 #ifdef ELF_CORE_COPY_XFPREGS
1844         kfree(info->xfpu);
1845 #endif
1846 }
1847
1848 #endif
1849
1850 static struct vm_area_struct *first_vma(struct task_struct *tsk,
1851                                         struct vm_area_struct *gate_vma)
1852 {
1853         struct vm_area_struct *ret = tsk->mm->mmap;
1854
1855         if (ret)
1856                 return ret;
1857         return gate_vma;
1858 }
1859 /*
1860  * Helper function for iterating across a vma list.  It ensures that the caller
1861  * will visit `gate_vma' prior to terminating the search.
1862  */
1863 static struct vm_area_struct *next_vma(struct vm_area_struct *this_vma,
1864                                         struct vm_area_struct *gate_vma)
1865 {
1866         struct vm_area_struct *ret;
1867
1868         ret = this_vma->vm_next;
1869         if (ret)
1870                 return ret;
1871         if (this_vma == gate_vma)
1872                 return NULL;
1873         return gate_vma;
1874 }
1875
1876 static void fill_extnum_info(struct elfhdr *elf, struct elf_shdr *shdr4extnum,
1877                              elf_addr_t e_shoff, int segs)
1878 {
1879         elf->e_shoff = e_shoff;
1880         elf->e_shentsize = sizeof(*shdr4extnum);
1881         elf->e_shnum = 1;
1882         elf->e_shstrndx = SHN_UNDEF;
1883
1884         memset(shdr4extnum, 0, sizeof(*shdr4extnum));
1885
1886         shdr4extnum->sh_type = SHT_NULL;
1887         shdr4extnum->sh_size = elf->e_shnum;
1888         shdr4extnum->sh_link = elf->e_shstrndx;
1889         shdr4extnum->sh_info = segs;
1890 }
1891
1892 static size_t elf_core_vma_data_size(struct vm_area_struct *gate_vma,
1893                                      unsigned long mm_flags)
1894 {
1895         struct vm_area_struct *vma;
1896         size_t size = 0;
1897
1898         for (vma = first_vma(current, gate_vma); vma != NULL;
1899              vma = next_vma(vma, gate_vma))
1900                 size += vma_dump_size(vma, mm_flags);
1901         return size;
1902 }
1903
1904 /*
1905  * Actual dumper
1906  *
1907  * This is a two-pass process; first we find the offsets of the bits,
1908  * and then they are actually written out.  If we run out of core limit
1909  * we just truncate.
1910  */
1911 static int elf_core_dump(struct coredump_params *cprm)
1912 {
1913         int has_dumped = 0;
1914         mm_segment_t fs;
1915         int segs;
1916         size_t size = 0;
1917         struct vm_area_struct *vma, *gate_vma;
1918         struct elfhdr *elf = NULL;
1919         loff_t offset = 0, dataoff, foffset;
1920         struct elf_note_info info;
1921         struct elf_phdr *phdr4note = NULL;
1922         struct elf_shdr *shdr4extnum = NULL;
1923         Elf_Half e_phnum;
1924         elf_addr_t e_shoff;
1925
1926         /*
1927          * We no longer stop all VM operations.
1928          * 
1929          * This is because those proceses that could possibly change map_count
1930          * or the mmap / vma pages are now blocked in do_exit on current
1931          * finishing this core dump.
1932          *
1933          * Only ptrace can touch these memory addresses, but it doesn't change
1934          * the map_count or the pages allocated. So no possibility of crashing
1935          * exists while dumping the mm->vm_next areas to the core file.
1936          */
1937   
1938         /* alloc memory for large data structures: too large to be on stack */
1939         elf = kmalloc(sizeof(*elf), GFP_KERNEL);
1940         if (!elf)
1941                 goto out;
1942         /*
1943          * The number of segs are recored into ELF header as 16bit value.
1944          * Please check DEFAULT_MAX_MAP_COUNT definition when you modify here.
1945          */
1946         segs = current->mm->map_count;
1947         segs += elf_core_extra_phdrs();
1948
1949         gate_vma = get_gate_vma(current->mm);
1950         if (gate_vma != NULL)
1951                 segs++;
1952
1953         /* for notes section */
1954         segs++;
1955
1956         /* If segs > PN_XNUM(0xffff), then e_phnum overflows. To avoid
1957          * this, kernel supports extended numbering. Have a look at
1958          * include/linux/elf.h for further information. */
1959         e_phnum = segs > PN_XNUM ? PN_XNUM : segs;
1960
1961         /*
1962          * Collect all the non-memory information about the process for the
1963          * notes.  This also sets up the file header.
1964          */
1965         if (!fill_note_info(elf, e_phnum, &info, cprm->signr, cprm->regs))
1966                 goto cleanup;
1967
1968         has_dumped = 1;
1969         current->flags |= PF_DUMPCORE;
1970   
1971         fs = get_fs();
1972         set_fs(KERNEL_DS);
1973
1974         offset += sizeof(*elf);                         /* Elf header */
1975         offset += segs * sizeof(struct elf_phdr);       /* Program headers */
1976         foffset = offset;
1977
1978         /* Write notes phdr entry */
1979         {
1980                 size_t sz = get_note_info_size(&info);
1981
1982                 sz += elf_coredump_extra_notes_size();
1983
1984                 phdr4note = kmalloc(sizeof(*phdr4note), GFP_KERNEL);
1985                 if (!phdr4note)
1986                         goto end_coredump;
1987
1988                 fill_elf_note_phdr(phdr4note, sz, offset);
1989                 offset += sz;
1990         }
1991
1992         dataoff = offset = roundup(offset, ELF_EXEC_PAGESIZE);
1993
1994         offset += elf_core_vma_data_size(gate_vma, cprm->mm_flags);
1995         offset += elf_core_extra_data_size();
1996         e_shoff = offset;
1997
1998         if (e_phnum == PN_XNUM) {
1999                 shdr4extnum = kmalloc(sizeof(*shdr4extnum), GFP_KERNEL);
2000                 if (!shdr4extnum)
2001                         goto end_coredump;
2002                 fill_extnum_info(elf, shdr4extnum, e_shoff, segs);
2003         }
2004
2005         offset = dataoff;
2006
2007         size += sizeof(*elf);
2008         if (size > cprm->limit || !dump_write(cprm->file, elf, sizeof(*elf)))
2009                 goto end_coredump;
2010
2011         size += sizeof(*phdr4note);
2012         if (size > cprm->limit
2013             || !dump_write(cprm->file, phdr4note, sizeof(*phdr4note)))
2014                 goto end_coredump;
2015
2016         /* Write program headers for segments dump */
2017         for (vma = first_vma(current, gate_vma); vma != NULL;
2018                         vma = next_vma(vma, gate_vma)) {
2019                 struct elf_phdr phdr;
2020
2021                 phdr.p_type = PT_LOAD;
2022                 phdr.p_offset = offset;
2023                 phdr.p_vaddr = vma->vm_start;
2024                 phdr.p_paddr = 0;
2025                 phdr.p_filesz = vma_dump_size(vma, cprm->mm_flags);
2026                 phdr.p_memsz = vma->vm_end - vma->vm_start;
2027                 offset += phdr.p_filesz;
2028                 phdr.p_flags = vma->vm_flags & VM_READ ? PF_R : 0;
2029                 if (vma->vm_flags & VM_WRITE)
2030                         phdr.p_flags |= PF_W;
2031                 if (vma->vm_flags & VM_EXEC)
2032                         phdr.p_flags |= PF_X;
2033                 phdr.p_align = ELF_EXEC_PAGESIZE;
2034
2035                 size += sizeof(phdr);
2036                 if (size > cprm->limit
2037                     || !dump_write(cprm->file, &phdr, sizeof(phdr)))
2038                         goto end_coredump;
2039         }
2040
2041         if (!elf_core_write_extra_phdrs(cprm->file, offset, &size, cprm->limit))
2042                 goto end_coredump;
2043
2044         /* write out the notes section */
2045         if (!write_note_info(&info, cprm->file, &foffset))
2046                 goto end_coredump;
2047
2048         if (elf_coredump_extra_notes_write(cprm->file, &foffset))
2049                 goto end_coredump;
2050
2051         /* Align to page */
2052         if (!dump_seek(cprm->file, dataoff - foffset))
2053                 goto end_coredump;
2054
2055         for (vma = first_vma(current, gate_vma); vma != NULL;
2056                         vma = next_vma(vma, gate_vma)) {
2057                 unsigned long addr;
2058                 unsigned long end;
2059
2060                 end = vma->vm_start + vma_dump_size(vma, cprm->mm_flags);
2061
2062                 for (addr = vma->vm_start; addr < end; addr += PAGE_SIZE) {
2063                         struct page *page;
2064                         int stop;
2065
2066                         page = get_dump_page(addr);
2067                         if (page) {
2068                                 void *kaddr = kmap(page);
2069                                 stop = ((size += PAGE_SIZE) > cprm->limit) ||
2070                                         !dump_write(cprm->file, kaddr,
2071                                                     PAGE_SIZE);
2072                                 kunmap(page);
2073                                 page_cache_release(page);
2074                         } else
2075                                 stop = !dump_seek(cprm->file, PAGE_SIZE);
2076                         if (stop)
2077                                 goto end_coredump;
2078                 }
2079         }
2080
2081         if (!elf_core_write_extra_data(cprm->file, &size, cprm->limit))
2082                 goto end_coredump;
2083
2084         if (e_phnum == PN_XNUM) {
2085                 size += sizeof(*shdr4extnum);
2086                 if (size > cprm->limit
2087                     || !dump_write(cprm->file, shdr4extnum,
2088                                    sizeof(*shdr4extnum)))
2089                         goto end_coredump;
2090         }
2091
2092 end_coredump:
2093         set_fs(fs);
2094
2095 cleanup:
2096         free_note_info(&info);
2097         kfree(shdr4extnum);
2098         kfree(phdr4note);
2099         kfree(elf);
2100 out:
2101         return has_dumped;
2102 }
2103
2104 #endif          /* CONFIG_ELF_CORE */
2105
2106 static int __init init_elf_binfmt(void)
2107 {
2108         register_binfmt(&elf_format);
2109         return 0;
2110 }
2111
2112 static void __exit exit_elf_binfmt(void)
2113 {
2114         /* Remove the COFF and ELF loaders. */
2115         unregister_binfmt(&elf_format);
2116 }
2117
2118 core_initcall(init_elf_binfmt);
2119 module_exit(exit_elf_binfmt);
2120 MODULE_LICENSE("GPL");