1 // SPDX-License-Identifier: GPL-2.0
5 * Copyright IBM Corp. 2008
6 * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
9 #include <linux/binfmts.h>
10 #include <linux/compat.h>
11 #include <linux/elf.h>
12 #include <linux/errno.h>
13 #include <linux/init.h>
14 #include <linux/kernel.h>
16 #include <linux/slab.h>
17 #include <linux/smp.h>
18 #include <linux/time_namespace.h>
19 #include <vdso/datapage.h>
22 extern char vdso64_start[], vdso64_end[];
23 static unsigned int vdso_pages;
25 static struct vm_special_mapping vvar_mapping;
28 struct vdso_data data[CS_BASES];
30 } vdso_data_store __page_aligned_data;
32 struct vdso_data *vdso_data = vdso_data_store.data;
35 VVAR_DATA_PAGE_OFFSET,
36 VVAR_TIMENS_PAGE_OFFSET,
40 unsigned int __read_mostly vdso_enabled = 1;
42 static int __init vdso_setup(char *str)
46 if (!kstrtobool(str, &enabled))
47 vdso_enabled = enabled;
50 __setup("vdso=", vdso_setup);
53 struct vdso_data *arch_get_vdso_data(void *vvar_page)
55 return (struct vdso_data *)(vvar_page);
58 static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
60 if (likely(vma->vm_mm == current->mm))
61 return current->nsproxy->time_ns->vvar_page;
63 * VM_PFNMAP | VM_IO protect .fault() handler from being called
64 * through interfaces like /proc/$pid/mem or
65 * process_vm_{readv,writev}() as long as there's no .access()
66 * in special_mapping_vmops().
67 * For more details check_vma_flags() and __access_remote_vm()
69 WARN(1, "vvar_page accessed remotely");
74 * The VVAR page layout depends on whether a task belongs to the root or
75 * non-root time namespace. Whenever a task changes its namespace, the VVAR
76 * page tables are cleared and then they will be re-faulted with a
77 * corresponding layout.
78 * See also the comment near timens_setup_vdso_data() for details.
80 int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
82 struct mm_struct *mm = task->mm;
83 struct vm_area_struct *vma;
86 for (vma = mm->mmap; vma; vma = vma->vm_next) {
87 unsigned long size = vma->vm_end - vma->vm_start;
89 if (!vma_is_special_mapping(vma, &vvar_mapping))
91 zap_page_range(vma, vma->vm_start, size);
98 static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma)
104 static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
105 struct vm_area_struct *vma, struct vm_fault *vmf)
107 struct page *timens_page = find_timens_vvar_page(vma);
108 unsigned long addr, pfn;
111 switch (vmf->pgoff) {
112 case VVAR_DATA_PAGE_OFFSET:
113 pfn = virt_to_pfn(vdso_data);
116 * Fault in VVAR page too, since it will be accessed
117 * to get clock data anyway.
119 addr = vmf->address + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE;
120 err = vmf_insert_pfn(vma, addr, pfn);
121 if (unlikely(err & VM_FAULT_ERROR))
123 pfn = page_to_pfn(timens_page);
126 #ifdef CONFIG_TIME_NS
127 case VVAR_TIMENS_PAGE_OFFSET:
129 * If a task belongs to a time namespace then a namespace
130 * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
131 * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
133 * See also the comment near timens_setup_vdso_data().
136 return VM_FAULT_SIGBUS;
137 pfn = virt_to_pfn(vdso_data);
139 #endif /* CONFIG_TIME_NS */
141 return VM_FAULT_SIGBUS;
143 return vmf_insert_pfn(vma, vmf->address, pfn);
146 static int vdso_mremap(const struct vm_special_mapping *sm,
147 struct vm_area_struct *vma)
149 current->mm->context.vdso_base = vma->vm_start;
153 static struct vm_special_mapping vvar_mapping = {
158 static struct vm_special_mapping vdso_mapping = {
160 .mremap = vdso_mremap,
163 int vdso_getcpu_init(void)
165 set_tod_programmable_field(smp_processor_id());
168 early_initcall(vdso_getcpu_init); /* Must be called before SMP init */
170 int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
172 unsigned long vdso_text_len, vdso_mapping_len;
173 unsigned long vvar_start, vdso_text_start;
174 struct mm_struct *mm = current->mm;
175 struct vm_area_struct *vma;
178 BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
179 if (!vdso_enabled || is_compat_task())
181 if (mmap_write_lock_killable(mm))
183 vdso_text_len = vdso_pages << PAGE_SHIFT;
184 vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE;
185 vvar_start = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
187 if (IS_ERR_VALUE(vvar_start))
189 vma = _install_special_mapping(mm, vvar_start, VVAR_NR_PAGES*PAGE_SIZE,
190 VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
196 vdso_text_start = vvar_start + VVAR_NR_PAGES * PAGE_SIZE;
197 /* VM_MAYWRITE for COW so gdb can set breakpoints */
198 vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len,
200 VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
203 do_munmap(mm, vvar_start, PAGE_SIZE, NULL);
206 current->mm->context.vdso_base = vdso_text_start;
210 mmap_write_unlock(mm);
214 static int __init vdso_init(void)
219 vdso_pages = (vdso64_end - vdso64_start) >> PAGE_SHIFT;
220 pages = kcalloc(vdso_pages + 1, sizeof(struct page *), GFP_KERNEL);
225 for (i = 0; i < vdso_pages; i++)
226 pages[i] = virt_to_page(vdso64_start + i * PAGE_SIZE);
227 pages[vdso_pages] = NULL;
228 vdso_mapping.pages = pages;
231 arch_initcall(vdso_init);