Merge tag 'audit-pr-20231030' of git://git.kernel.org/pub/scm/linux/kernel/git/pcmoor...
[linux-2.6-microblaze.git] / kernel / bpf / task_iter.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2020 Facebook */
3
4 #include <linux/init.h>
5 #include <linux/namei.h>
6 #include <linux/pid_namespace.h>
7 #include <linux/fs.h>
8 #include <linux/fdtable.h>
9 #include <linux/filter.h>
10 #include <linux/btf_ids.h>
11 #include "mmap_unlock_work.h"
12
13 static const char * const iter_task_type_names[] = {
14         "ALL",
15         "TID",
16         "PID",
17 };
18
19 struct bpf_iter_seq_task_common {
20         struct pid_namespace *ns;
21         enum bpf_iter_task_type type;
22         u32 pid;
23         u32 pid_visiting;
24 };
25
26 struct bpf_iter_seq_task_info {
27         /* The first field must be struct bpf_iter_seq_task_common.
28          * this is assumed by {init, fini}_seq_pidns() callback functions.
29          */
30         struct bpf_iter_seq_task_common common;
31         u32 tid;
32 };
33
34 static struct task_struct *task_group_seq_get_next(struct bpf_iter_seq_task_common *common,
35                                                    u32 *tid,
36                                                    bool skip_if_dup_files)
37 {
38         struct task_struct *task, *next_task;
39         struct pid *pid;
40         u32 saved_tid;
41
42         if (!*tid) {
43                 /* The first time, the iterator calls this function. */
44                 pid = find_pid_ns(common->pid, common->ns);
45                 if (!pid)
46                         return NULL;
47
48                 task = get_pid_task(pid, PIDTYPE_TGID);
49                 if (!task)
50                         return NULL;
51
52                 *tid = common->pid;
53                 common->pid_visiting = common->pid;
54
55                 return task;
56         }
57
58         /* If the control returns to user space and comes back to the
59          * kernel again, *tid and common->pid_visiting should be the
60          * same for task_seq_start() to pick up the correct task.
61          */
62         if (*tid == common->pid_visiting) {
63                 pid = find_pid_ns(common->pid_visiting, common->ns);
64                 task = get_pid_task(pid, PIDTYPE_PID);
65
66                 return task;
67         }
68
69         pid = find_pid_ns(common->pid_visiting, common->ns);
70         if (!pid)
71                 return NULL;
72
73         task = get_pid_task(pid, PIDTYPE_PID);
74         if (!task)
75                 return NULL;
76
77 retry:
78         if (!pid_alive(task)) {
79                 put_task_struct(task);
80                 return NULL;
81         }
82
83         next_task = next_thread(task);
84         put_task_struct(task);
85         if (!next_task)
86                 return NULL;
87
88         saved_tid = *tid;
89         *tid = __task_pid_nr_ns(next_task, PIDTYPE_PID, common->ns);
90         if (!*tid || *tid == common->pid) {
91                 /* Run out of tasks of a process.  The tasks of a
92                  * thread_group are linked as circular linked list.
93                  */
94                 *tid = saved_tid;
95                 return NULL;
96         }
97
98         get_task_struct(next_task);
99         common->pid_visiting = *tid;
100
101         if (skip_if_dup_files && task->files == task->group_leader->files) {
102                 task = next_task;
103                 goto retry;
104         }
105
106         return next_task;
107 }
108
109 static struct task_struct *task_seq_get_next(struct bpf_iter_seq_task_common *common,
110                                              u32 *tid,
111                                              bool skip_if_dup_files)
112 {
113         struct task_struct *task = NULL;
114         struct pid *pid;
115
116         if (common->type == BPF_TASK_ITER_TID) {
117                 if (*tid && *tid != common->pid)
118                         return NULL;
119                 rcu_read_lock();
120                 pid = find_pid_ns(common->pid, common->ns);
121                 if (pid) {
122                         task = get_pid_task(pid, PIDTYPE_TGID);
123                         *tid = common->pid;
124                 }
125                 rcu_read_unlock();
126
127                 return task;
128         }
129
130         if (common->type == BPF_TASK_ITER_TGID) {
131                 rcu_read_lock();
132                 task = task_group_seq_get_next(common, tid, skip_if_dup_files);
133                 rcu_read_unlock();
134
135                 return task;
136         }
137
138         rcu_read_lock();
139 retry:
140         pid = find_ge_pid(*tid, common->ns);
141         if (pid) {
142                 *tid = pid_nr_ns(pid, common->ns);
143                 task = get_pid_task(pid, PIDTYPE_PID);
144                 if (!task) {
145                         ++*tid;
146                         goto retry;
147                 } else if (skip_if_dup_files && !thread_group_leader(task) &&
148                            task->files == task->group_leader->files) {
149                         put_task_struct(task);
150                         task = NULL;
151                         ++*tid;
152                         goto retry;
153                 }
154         }
155         rcu_read_unlock();
156
157         return task;
158 }
159
160 static void *task_seq_start(struct seq_file *seq, loff_t *pos)
161 {
162         struct bpf_iter_seq_task_info *info = seq->private;
163         struct task_struct *task;
164
165         task = task_seq_get_next(&info->common, &info->tid, false);
166         if (!task)
167                 return NULL;
168
169         if (*pos == 0)
170                 ++*pos;
171         return task;
172 }
173
174 static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos)
175 {
176         struct bpf_iter_seq_task_info *info = seq->private;
177         struct task_struct *task;
178
179         ++*pos;
180         ++info->tid;
181         put_task_struct((struct task_struct *)v);
182         task = task_seq_get_next(&info->common, &info->tid, false);
183         if (!task)
184                 return NULL;
185
186         return task;
187 }
188
189 struct bpf_iter__task {
190         __bpf_md_ptr(struct bpf_iter_meta *, meta);
191         __bpf_md_ptr(struct task_struct *, task);
192 };
193
194 DEFINE_BPF_ITER_FUNC(task, struct bpf_iter_meta *meta, struct task_struct *task)
195
196 static int __task_seq_show(struct seq_file *seq, struct task_struct *task,
197                            bool in_stop)
198 {
199         struct bpf_iter_meta meta;
200         struct bpf_iter__task ctx;
201         struct bpf_prog *prog;
202
203         meta.seq = seq;
204         prog = bpf_iter_get_info(&meta, in_stop);
205         if (!prog)
206                 return 0;
207
208         ctx.meta = &meta;
209         ctx.task = task;
210         return bpf_iter_run_prog(prog, &ctx);
211 }
212
213 static int task_seq_show(struct seq_file *seq, void *v)
214 {
215         return __task_seq_show(seq, v, false);
216 }
217
218 static void task_seq_stop(struct seq_file *seq, void *v)
219 {
220         if (!v)
221                 (void)__task_seq_show(seq, v, true);
222         else
223                 put_task_struct((struct task_struct *)v);
224 }
225
226 static int bpf_iter_attach_task(struct bpf_prog *prog,
227                                 union bpf_iter_link_info *linfo,
228                                 struct bpf_iter_aux_info *aux)
229 {
230         unsigned int flags;
231         struct pid *pid;
232         pid_t tgid;
233
234         if ((!!linfo->task.tid + !!linfo->task.pid + !!linfo->task.pid_fd) > 1)
235                 return -EINVAL;
236
237         aux->task.type = BPF_TASK_ITER_ALL;
238         if (linfo->task.tid != 0) {
239                 aux->task.type = BPF_TASK_ITER_TID;
240                 aux->task.pid = linfo->task.tid;
241         }
242         if (linfo->task.pid != 0) {
243                 aux->task.type = BPF_TASK_ITER_TGID;
244                 aux->task.pid = linfo->task.pid;
245         }
246         if (linfo->task.pid_fd != 0) {
247                 aux->task.type = BPF_TASK_ITER_TGID;
248
249                 pid = pidfd_get_pid(linfo->task.pid_fd, &flags);
250                 if (IS_ERR(pid))
251                         return PTR_ERR(pid);
252
253                 tgid = pid_nr_ns(pid, task_active_pid_ns(current));
254                 aux->task.pid = tgid;
255                 put_pid(pid);
256         }
257
258         return 0;
259 }
260
261 static const struct seq_operations task_seq_ops = {
262         .start  = task_seq_start,
263         .next   = task_seq_next,
264         .stop   = task_seq_stop,
265         .show   = task_seq_show,
266 };
267
268 struct bpf_iter_seq_task_file_info {
269         /* The first field must be struct bpf_iter_seq_task_common.
270          * this is assumed by {init, fini}_seq_pidns() callback functions.
271          */
272         struct bpf_iter_seq_task_common common;
273         struct task_struct *task;
274         u32 tid;
275         u32 fd;
276 };
277
278 static struct file *
279 task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
280 {
281         u32 saved_tid = info->tid;
282         struct task_struct *curr_task;
283         unsigned int curr_fd = info->fd;
284
285         /* If this function returns a non-NULL file object,
286          * it held a reference to the task/file.
287          * Otherwise, it does not hold any reference.
288          */
289 again:
290         if (info->task) {
291                 curr_task = info->task;
292                 curr_fd = info->fd;
293         } else {
294                 curr_task = task_seq_get_next(&info->common, &info->tid, true);
295                 if (!curr_task) {
296                         info->task = NULL;
297                         return NULL;
298                 }
299
300                 /* set info->task */
301                 info->task = curr_task;
302                 if (saved_tid == info->tid)
303                         curr_fd = info->fd;
304                 else
305                         curr_fd = 0;
306         }
307
308         rcu_read_lock();
309         for (;; curr_fd++) {
310                 struct file *f;
311                 f = task_lookup_next_fdget_rcu(curr_task, &curr_fd);
312                 if (!f)
313                         break;
314
315                 /* set info->fd */
316                 info->fd = curr_fd;
317                 rcu_read_unlock();
318                 return f;
319         }
320
321         /* the current task is done, go to the next task */
322         rcu_read_unlock();
323         put_task_struct(curr_task);
324
325         if (info->common.type == BPF_TASK_ITER_TID) {
326                 info->task = NULL;
327                 return NULL;
328         }
329
330         info->task = NULL;
331         info->fd = 0;
332         saved_tid = ++(info->tid);
333         goto again;
334 }
335
336 static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
337 {
338         struct bpf_iter_seq_task_file_info *info = seq->private;
339         struct file *file;
340
341         info->task = NULL;
342         file = task_file_seq_get_next(info);
343         if (file && *pos == 0)
344                 ++*pos;
345
346         return file;
347 }
348
349 static void *task_file_seq_next(struct seq_file *seq, void *v, loff_t *pos)
350 {
351         struct bpf_iter_seq_task_file_info *info = seq->private;
352
353         ++*pos;
354         ++info->fd;
355         fput((struct file *)v);
356         return task_file_seq_get_next(info);
357 }
358
359 struct bpf_iter__task_file {
360         __bpf_md_ptr(struct bpf_iter_meta *, meta);
361         __bpf_md_ptr(struct task_struct *, task);
362         u32 fd __aligned(8);
363         __bpf_md_ptr(struct file *, file);
364 };
365
366 DEFINE_BPF_ITER_FUNC(task_file, struct bpf_iter_meta *meta,
367                      struct task_struct *task, u32 fd,
368                      struct file *file)
369
370 static int __task_file_seq_show(struct seq_file *seq, struct file *file,
371                                 bool in_stop)
372 {
373         struct bpf_iter_seq_task_file_info *info = seq->private;
374         struct bpf_iter__task_file ctx;
375         struct bpf_iter_meta meta;
376         struct bpf_prog *prog;
377
378         meta.seq = seq;
379         prog = bpf_iter_get_info(&meta, in_stop);
380         if (!prog)
381                 return 0;
382
383         ctx.meta = &meta;
384         ctx.task = info->task;
385         ctx.fd = info->fd;
386         ctx.file = file;
387         return bpf_iter_run_prog(prog, &ctx);
388 }
389
390 static int task_file_seq_show(struct seq_file *seq, void *v)
391 {
392         return __task_file_seq_show(seq, v, false);
393 }
394
395 static void task_file_seq_stop(struct seq_file *seq, void *v)
396 {
397         struct bpf_iter_seq_task_file_info *info = seq->private;
398
399         if (!v) {
400                 (void)__task_file_seq_show(seq, v, true);
401         } else {
402                 fput((struct file *)v);
403                 put_task_struct(info->task);
404                 info->task = NULL;
405         }
406 }
407
408 static int init_seq_pidns(void *priv_data, struct bpf_iter_aux_info *aux)
409 {
410         struct bpf_iter_seq_task_common *common = priv_data;
411
412         common->ns = get_pid_ns(task_active_pid_ns(current));
413         common->type = aux->task.type;
414         common->pid = aux->task.pid;
415
416         return 0;
417 }
418
419 static void fini_seq_pidns(void *priv_data)
420 {
421         struct bpf_iter_seq_task_common *common = priv_data;
422
423         put_pid_ns(common->ns);
424 }
425
426 static const struct seq_operations task_file_seq_ops = {
427         .start  = task_file_seq_start,
428         .next   = task_file_seq_next,
429         .stop   = task_file_seq_stop,
430         .show   = task_file_seq_show,
431 };
432
433 struct bpf_iter_seq_task_vma_info {
434         /* The first field must be struct bpf_iter_seq_task_common.
435          * this is assumed by {init, fini}_seq_pidns() callback functions.
436          */
437         struct bpf_iter_seq_task_common common;
438         struct task_struct *task;
439         struct mm_struct *mm;
440         struct vm_area_struct *vma;
441         u32 tid;
442         unsigned long prev_vm_start;
443         unsigned long prev_vm_end;
444 };
445
446 enum bpf_task_vma_iter_find_op {
447         task_vma_iter_first_vma,   /* use find_vma() with addr 0 */
448         task_vma_iter_next_vma,    /* use vma_next() with curr_vma */
449         task_vma_iter_find_vma,    /* use find_vma() to find next vma */
450 };
451
452 static struct vm_area_struct *
453 task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
454 {
455         enum bpf_task_vma_iter_find_op op;
456         struct vm_area_struct *curr_vma;
457         struct task_struct *curr_task;
458         struct mm_struct *curr_mm;
459         u32 saved_tid = info->tid;
460
461         /* If this function returns a non-NULL vma, it holds a reference to
462          * the task_struct, holds a refcount on mm->mm_users, and holds
463          * read lock on vma->mm->mmap_lock.
464          * If this function returns NULL, it does not hold any reference or
465          * lock.
466          */
467         if (info->task) {
468                 curr_task = info->task;
469                 curr_vma = info->vma;
470                 curr_mm = info->mm;
471                 /* In case of lock contention, drop mmap_lock to unblock
472                  * the writer.
473                  *
474                  * After relock, call find(mm, prev_vm_end - 1) to find
475                  * new vma to process.
476                  *
477                  *   +------+------+-----------+
478                  *   | VMA1 | VMA2 | VMA3      |
479                  *   +------+------+-----------+
480                  *   |      |      |           |
481                  *  4k     8k     16k         400k
482                  *
483                  * For example, curr_vma == VMA2. Before unlock, we set
484                  *
485                  *    prev_vm_start = 8k
486                  *    prev_vm_end   = 16k
487                  *
488                  * There are a few cases:
489                  *
490                  * 1) VMA2 is freed, but VMA3 exists.
491                  *
492                  *    find_vma() will return VMA3, just process VMA3.
493                  *
494                  * 2) VMA2 still exists.
495                  *
496                  *    find_vma() will return VMA2, process VMA2->next.
497                  *
498                  * 3) no more vma in this mm.
499                  *
500                  *    Process the next task.
501                  *
502                  * 4) find_vma() returns a different vma, VMA2'.
503                  *
504                  *    4.1) If VMA2 covers same range as VMA2', skip VMA2',
505                  *         because we already covered the range;
506                  *    4.2) VMA2 and VMA2' covers different ranges, process
507                  *         VMA2'.
508                  */
509                 if (mmap_lock_is_contended(curr_mm)) {
510                         info->prev_vm_start = curr_vma->vm_start;
511                         info->prev_vm_end = curr_vma->vm_end;
512                         op = task_vma_iter_find_vma;
513                         mmap_read_unlock(curr_mm);
514                         if (mmap_read_lock_killable(curr_mm)) {
515                                 mmput(curr_mm);
516                                 goto finish;
517                         }
518                 } else {
519                         op = task_vma_iter_next_vma;
520                 }
521         } else {
522 again:
523                 curr_task = task_seq_get_next(&info->common, &info->tid, true);
524                 if (!curr_task) {
525                         info->tid++;
526                         goto finish;
527                 }
528
529                 if (saved_tid != info->tid) {
530                         /* new task, process the first vma */
531                         op = task_vma_iter_first_vma;
532                 } else {
533                         /* Found the same tid, which means the user space
534                          * finished data in previous buffer and read more.
535                          * We dropped mmap_lock before returning to user
536                          * space, so it is necessary to use find_vma() to
537                          * find the next vma to process.
538                          */
539                         op = task_vma_iter_find_vma;
540                 }
541
542                 curr_mm = get_task_mm(curr_task);
543                 if (!curr_mm)
544                         goto next_task;
545
546                 if (mmap_read_lock_killable(curr_mm)) {
547                         mmput(curr_mm);
548                         goto finish;
549                 }
550         }
551
552         switch (op) {
553         case task_vma_iter_first_vma:
554                 curr_vma = find_vma(curr_mm, 0);
555                 break;
556         case task_vma_iter_next_vma:
557                 curr_vma = find_vma(curr_mm, curr_vma->vm_end);
558                 break;
559         case task_vma_iter_find_vma:
560                 /* We dropped mmap_lock so it is necessary to use find_vma
561                  * to find the next vma. This is similar to the  mechanism
562                  * in show_smaps_rollup().
563                  */
564                 curr_vma = find_vma(curr_mm, info->prev_vm_end - 1);
565                 /* case 1) and 4.2) above just use curr_vma */
566
567                 /* check for case 2) or case 4.1) above */
568                 if (curr_vma &&
569                     curr_vma->vm_start == info->prev_vm_start &&
570                     curr_vma->vm_end == info->prev_vm_end)
571                         curr_vma = find_vma(curr_mm, curr_vma->vm_end);
572                 break;
573         }
574         if (!curr_vma) {
575                 /* case 3) above, or case 2) 4.1) with vma->next == NULL */
576                 mmap_read_unlock(curr_mm);
577                 mmput(curr_mm);
578                 goto next_task;
579         }
580         info->task = curr_task;
581         info->vma = curr_vma;
582         info->mm = curr_mm;
583         return curr_vma;
584
585 next_task:
586         if (info->common.type == BPF_TASK_ITER_TID)
587                 goto finish;
588
589         put_task_struct(curr_task);
590         info->task = NULL;
591         info->mm = NULL;
592         info->tid++;
593         goto again;
594
595 finish:
596         if (curr_task)
597                 put_task_struct(curr_task);
598         info->task = NULL;
599         info->vma = NULL;
600         info->mm = NULL;
601         return NULL;
602 }
603
604 static void *task_vma_seq_start(struct seq_file *seq, loff_t *pos)
605 {
606         struct bpf_iter_seq_task_vma_info *info = seq->private;
607         struct vm_area_struct *vma;
608
609         vma = task_vma_seq_get_next(info);
610         if (vma && *pos == 0)
611                 ++*pos;
612
613         return vma;
614 }
615
616 static void *task_vma_seq_next(struct seq_file *seq, void *v, loff_t *pos)
617 {
618         struct bpf_iter_seq_task_vma_info *info = seq->private;
619
620         ++*pos;
621         return task_vma_seq_get_next(info);
622 }
623
624 struct bpf_iter__task_vma {
625         __bpf_md_ptr(struct bpf_iter_meta *, meta);
626         __bpf_md_ptr(struct task_struct *, task);
627         __bpf_md_ptr(struct vm_area_struct *, vma);
628 };
629
630 DEFINE_BPF_ITER_FUNC(task_vma, struct bpf_iter_meta *meta,
631                      struct task_struct *task, struct vm_area_struct *vma)
632
633 static int __task_vma_seq_show(struct seq_file *seq, bool in_stop)
634 {
635         struct bpf_iter_seq_task_vma_info *info = seq->private;
636         struct bpf_iter__task_vma ctx;
637         struct bpf_iter_meta meta;
638         struct bpf_prog *prog;
639
640         meta.seq = seq;
641         prog = bpf_iter_get_info(&meta, in_stop);
642         if (!prog)
643                 return 0;
644
645         ctx.meta = &meta;
646         ctx.task = info->task;
647         ctx.vma = info->vma;
648         return bpf_iter_run_prog(prog, &ctx);
649 }
650
651 static int task_vma_seq_show(struct seq_file *seq, void *v)
652 {
653         return __task_vma_seq_show(seq, false);
654 }
655
656 static void task_vma_seq_stop(struct seq_file *seq, void *v)
657 {
658         struct bpf_iter_seq_task_vma_info *info = seq->private;
659
660         if (!v) {
661                 (void)__task_vma_seq_show(seq, true);
662         } else {
663                 /* info->vma has not been seen by the BPF program. If the
664                  * user space reads more, task_vma_seq_get_next should
665                  * return this vma again. Set prev_vm_start to ~0UL,
666                  * so that we don't skip the vma returned by the next
667                  * find_vma() (case task_vma_iter_find_vma in
668                  * task_vma_seq_get_next()).
669                  */
670                 info->prev_vm_start = ~0UL;
671                 info->prev_vm_end = info->vma->vm_end;
672                 mmap_read_unlock(info->mm);
673                 mmput(info->mm);
674                 info->mm = NULL;
675                 put_task_struct(info->task);
676                 info->task = NULL;
677         }
678 }
679
680 static const struct seq_operations task_vma_seq_ops = {
681         .start  = task_vma_seq_start,
682         .next   = task_vma_seq_next,
683         .stop   = task_vma_seq_stop,
684         .show   = task_vma_seq_show,
685 };
686
687 static const struct bpf_iter_seq_info task_seq_info = {
688         .seq_ops                = &task_seq_ops,
689         .init_seq_private       = init_seq_pidns,
690         .fini_seq_private       = fini_seq_pidns,
691         .seq_priv_size          = sizeof(struct bpf_iter_seq_task_info),
692 };
693
694 static int bpf_iter_fill_link_info(const struct bpf_iter_aux_info *aux, struct bpf_link_info *info)
695 {
696         switch (aux->task.type) {
697         case BPF_TASK_ITER_TID:
698                 info->iter.task.tid = aux->task.pid;
699                 break;
700         case BPF_TASK_ITER_TGID:
701                 info->iter.task.pid = aux->task.pid;
702                 break;
703         default:
704                 break;
705         }
706         return 0;
707 }
708
709 static void bpf_iter_task_show_fdinfo(const struct bpf_iter_aux_info *aux, struct seq_file *seq)
710 {
711         seq_printf(seq, "task_type:\t%s\n", iter_task_type_names[aux->task.type]);
712         if (aux->task.type == BPF_TASK_ITER_TID)
713                 seq_printf(seq, "tid:\t%u\n", aux->task.pid);
714         else if (aux->task.type == BPF_TASK_ITER_TGID)
715                 seq_printf(seq, "pid:\t%u\n", aux->task.pid);
716 }
717
718 static struct bpf_iter_reg task_reg_info = {
719         .target                 = "task",
720         .attach_target          = bpf_iter_attach_task,
721         .feature                = BPF_ITER_RESCHED,
722         .ctx_arg_info_size      = 1,
723         .ctx_arg_info           = {
724                 { offsetof(struct bpf_iter__task, task),
725                   PTR_TO_BTF_ID_OR_NULL },
726         },
727         .seq_info               = &task_seq_info,
728         .fill_link_info         = bpf_iter_fill_link_info,
729         .show_fdinfo            = bpf_iter_task_show_fdinfo,
730 };
731
732 static const struct bpf_iter_seq_info task_file_seq_info = {
733         .seq_ops                = &task_file_seq_ops,
734         .init_seq_private       = init_seq_pidns,
735         .fini_seq_private       = fini_seq_pidns,
736         .seq_priv_size          = sizeof(struct bpf_iter_seq_task_file_info),
737 };
738
739 static struct bpf_iter_reg task_file_reg_info = {
740         .target                 = "task_file",
741         .attach_target          = bpf_iter_attach_task,
742         .feature                = BPF_ITER_RESCHED,
743         .ctx_arg_info_size      = 2,
744         .ctx_arg_info           = {
745                 { offsetof(struct bpf_iter__task_file, task),
746                   PTR_TO_BTF_ID_OR_NULL },
747                 { offsetof(struct bpf_iter__task_file, file),
748                   PTR_TO_BTF_ID_OR_NULL },
749         },
750         .seq_info               = &task_file_seq_info,
751         .fill_link_info         = bpf_iter_fill_link_info,
752         .show_fdinfo            = bpf_iter_task_show_fdinfo,
753 };
754
755 static const struct bpf_iter_seq_info task_vma_seq_info = {
756         .seq_ops                = &task_vma_seq_ops,
757         .init_seq_private       = init_seq_pidns,
758         .fini_seq_private       = fini_seq_pidns,
759         .seq_priv_size          = sizeof(struct bpf_iter_seq_task_vma_info),
760 };
761
762 static struct bpf_iter_reg task_vma_reg_info = {
763         .target                 = "task_vma",
764         .attach_target          = bpf_iter_attach_task,
765         .feature                = BPF_ITER_RESCHED,
766         .ctx_arg_info_size      = 2,
767         .ctx_arg_info           = {
768                 { offsetof(struct bpf_iter__task_vma, task),
769                   PTR_TO_BTF_ID_OR_NULL },
770                 { offsetof(struct bpf_iter__task_vma, vma),
771                   PTR_TO_BTF_ID_OR_NULL },
772         },
773         .seq_info               = &task_vma_seq_info,
774         .fill_link_info         = bpf_iter_fill_link_info,
775         .show_fdinfo            = bpf_iter_task_show_fdinfo,
776 };
777
778 BPF_CALL_5(bpf_find_vma, struct task_struct *, task, u64, start,
779            bpf_callback_t, callback_fn, void *, callback_ctx, u64, flags)
780 {
781         struct mmap_unlock_irq_work *work = NULL;
782         struct vm_area_struct *vma;
783         bool irq_work_busy = false;
784         struct mm_struct *mm;
785         int ret = -ENOENT;
786
787         if (flags)
788                 return -EINVAL;
789
790         if (!task)
791                 return -ENOENT;
792
793         mm = task->mm;
794         if (!mm)
795                 return -ENOENT;
796
797         irq_work_busy = bpf_mmap_unlock_get_irq_work(&work);
798
799         if (irq_work_busy || !mmap_read_trylock(mm))
800                 return -EBUSY;
801
802         vma = find_vma(mm, start);
803
804         if (vma && vma->vm_start <= start && vma->vm_end > start) {
805                 callback_fn((u64)(long)task, (u64)(long)vma,
806                             (u64)(long)callback_ctx, 0, 0);
807                 ret = 0;
808         }
809         bpf_mmap_unlock_mm(work, mm);
810         return ret;
811 }
812
813 const struct bpf_func_proto bpf_find_vma_proto = {
814         .func           = bpf_find_vma,
815         .ret_type       = RET_INTEGER,
816         .arg1_type      = ARG_PTR_TO_BTF_ID,
817         .arg1_btf_id    = &btf_tracing_ids[BTF_TRACING_TYPE_TASK],
818         .arg2_type      = ARG_ANYTHING,
819         .arg3_type      = ARG_PTR_TO_FUNC,
820         .arg4_type      = ARG_PTR_TO_STACK_OR_NULL,
821         .arg5_type      = ARG_ANYTHING,
822 };
823
824 DEFINE_PER_CPU(struct mmap_unlock_irq_work, mmap_unlock_work);
825
826 static void do_mmap_read_unlock(struct irq_work *entry)
827 {
828         struct mmap_unlock_irq_work *work;
829
830         if (WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_RT)))
831                 return;
832
833         work = container_of(entry, struct mmap_unlock_irq_work, irq_work);
834         mmap_read_unlock_non_owner(work->mm);
835 }
836
837 static int __init task_iter_init(void)
838 {
839         struct mmap_unlock_irq_work *work;
840         int ret, cpu;
841
842         for_each_possible_cpu(cpu) {
843                 work = per_cpu_ptr(&mmap_unlock_work, cpu);
844                 init_irq_work(&work->irq_work, do_mmap_read_unlock);
845         }
846
847         task_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK];
848         ret = bpf_iter_reg_target(&task_reg_info);
849         if (ret)
850                 return ret;
851
852         task_file_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK];
853         task_file_reg_info.ctx_arg_info[1].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_FILE];
854         ret =  bpf_iter_reg_target(&task_file_reg_info);
855         if (ret)
856                 return ret;
857
858         task_vma_reg_info.ctx_arg_info[0].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_TASK];
859         task_vma_reg_info.ctx_arg_info[1].btf_id = btf_tracing_ids[BTF_TRACING_TYPE_VMA];
860         return bpf_iter_reg_target(&task_vma_reg_info);
861 }
862 late_initcall(task_iter_init);