bpf: Use BTF_ID to resolve bpf_ctx_convert struct
[linux-2.6-microblaze.git] / kernel / bpf / bpf_iter.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2020 Facebook */
3
4 #include <linux/fs.h>
5 #include <linux/anon_inodes.h>
6 #include <linux/filter.h>
7 #include <linux/bpf.h>
8
9 struct bpf_iter_target_info {
10         struct list_head list;
11         const struct bpf_iter_reg *reg_info;
12         u32 btf_id;     /* cached value */
13 };
14
15 struct bpf_iter_link {
16         struct bpf_link link;
17         struct bpf_iter_target_info *tinfo;
18 };
19
20 struct bpf_iter_priv_data {
21         struct bpf_iter_target_info *tinfo;
22         struct bpf_prog *prog;
23         u64 session_id;
24         u64 seq_num;
25         bool done_stop;
26         u8 target_private[] __aligned(8);
27 };
28
29 static struct list_head targets = LIST_HEAD_INIT(targets);
30 static DEFINE_MUTEX(targets_mutex);
31
32 /* protect bpf_iter_link changes */
33 static DEFINE_MUTEX(link_mutex);
34
35 /* incremented on every opened seq_file */
36 static atomic64_t session_id;
37
38 static int prepare_seq_file(struct file *file, struct bpf_iter_link *link);
39
40 static void bpf_iter_inc_seq_num(struct seq_file *seq)
41 {
42         struct bpf_iter_priv_data *iter_priv;
43
44         iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
45                                  target_private);
46         iter_priv->seq_num++;
47 }
48
49 static void bpf_iter_dec_seq_num(struct seq_file *seq)
50 {
51         struct bpf_iter_priv_data *iter_priv;
52
53         iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
54                                  target_private);
55         iter_priv->seq_num--;
56 }
57
58 static void bpf_iter_done_stop(struct seq_file *seq)
59 {
60         struct bpf_iter_priv_data *iter_priv;
61
62         iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
63                                  target_private);
64         iter_priv->done_stop = true;
65 }
66
67 /* bpf_seq_read, a customized and simpler version for bpf iterator.
68  * no_llseek is assumed for this file.
69  * The following are differences from seq_read():
70  *  . fixed buffer size (PAGE_SIZE)
71  *  . assuming no_llseek
72  *  . stop() may call bpf program, handling potential overflow there
73  */
74 static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
75                             loff_t *ppos)
76 {
77         struct seq_file *seq = file->private_data;
78         size_t n, offs, copied = 0;
79         int err = 0;
80         void *p;
81
82         mutex_lock(&seq->lock);
83
84         if (!seq->buf) {
85                 seq->size = PAGE_SIZE;
86                 seq->buf = kmalloc(seq->size, GFP_KERNEL);
87                 if (!seq->buf) {
88                         err = -ENOMEM;
89                         goto done;
90                 }
91         }
92
93         if (seq->count) {
94                 n = min(seq->count, size);
95                 err = copy_to_user(buf, seq->buf + seq->from, n);
96                 if (err) {
97                         err = -EFAULT;
98                         goto done;
99                 }
100                 seq->count -= n;
101                 seq->from += n;
102                 copied = n;
103                 goto done;
104         }
105
106         seq->from = 0;
107         p = seq->op->start(seq, &seq->index);
108         if (!p)
109                 goto stop;
110         if (IS_ERR(p)) {
111                 err = PTR_ERR(p);
112                 seq->op->stop(seq, p);
113                 seq->count = 0;
114                 goto done;
115         }
116
117         err = seq->op->show(seq, p);
118         if (err > 0) {
119                 /* object is skipped, decrease seq_num, so next
120                  * valid object can reuse the same seq_num.
121                  */
122                 bpf_iter_dec_seq_num(seq);
123                 seq->count = 0;
124         } else if (err < 0 || seq_has_overflowed(seq)) {
125                 if (!err)
126                         err = -E2BIG;
127                 seq->op->stop(seq, p);
128                 seq->count = 0;
129                 goto done;
130         }
131
132         while (1) {
133                 loff_t pos = seq->index;
134
135                 offs = seq->count;
136                 p = seq->op->next(seq, p, &seq->index);
137                 if (pos == seq->index) {
138                         pr_info_ratelimited("buggy seq_file .next function %ps "
139                                 "did not updated position index\n",
140                                 seq->op->next);
141                         seq->index++;
142                 }
143
144                 if (IS_ERR_OR_NULL(p))
145                         break;
146
147                 /* got a valid next object, increase seq_num */
148                 bpf_iter_inc_seq_num(seq);
149
150                 if (seq->count >= size)
151                         break;
152
153                 err = seq->op->show(seq, p);
154                 if (err > 0) {
155                         bpf_iter_dec_seq_num(seq);
156                         seq->count = offs;
157                 } else if (err < 0 || seq_has_overflowed(seq)) {
158                         seq->count = offs;
159                         if (offs == 0) {
160                                 if (!err)
161                                         err = -E2BIG;
162                                 seq->op->stop(seq, p);
163                                 goto done;
164                         }
165                         break;
166                 }
167         }
168 stop:
169         offs = seq->count;
170         /* bpf program called if !p */
171         seq->op->stop(seq, p);
172         if (!p) {
173                 if (!seq_has_overflowed(seq)) {
174                         bpf_iter_done_stop(seq);
175                 } else {
176                         seq->count = offs;
177                         if (offs == 0) {
178                                 err = -E2BIG;
179                                 goto done;
180                         }
181                 }
182         }
183
184         n = min(seq->count, size);
185         err = copy_to_user(buf, seq->buf, n);
186         if (err) {
187                 err = -EFAULT;
188                 goto done;
189         }
190         copied = n;
191         seq->count -= n;
192         seq->from = n;
193 done:
194         if (!copied)
195                 copied = err;
196         else
197                 *ppos += copied;
198         mutex_unlock(&seq->lock);
199         return copied;
200 }
201
202 static int iter_open(struct inode *inode, struct file *file)
203 {
204         struct bpf_iter_link *link = inode->i_private;
205
206         return prepare_seq_file(file, link);
207 }
208
209 static int iter_release(struct inode *inode, struct file *file)
210 {
211         struct bpf_iter_priv_data *iter_priv;
212         struct seq_file *seq;
213
214         seq = file->private_data;
215         if (!seq)
216                 return 0;
217
218         iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
219                                  target_private);
220
221         if (iter_priv->tinfo->reg_info->fini_seq_private)
222                 iter_priv->tinfo->reg_info->fini_seq_private(seq->private);
223
224         bpf_prog_put(iter_priv->prog);
225         seq->private = iter_priv;
226
227         return seq_release_private(inode, file);
228 }
229
230 const struct file_operations bpf_iter_fops = {
231         .open           = iter_open,
232         .llseek         = no_llseek,
233         .read           = bpf_seq_read,
234         .release        = iter_release,
235 };
236
237 /* The argument reg_info will be cached in bpf_iter_target_info.
238  * The common practice is to declare target reg_info as
239  * a const static variable and passed as an argument to
240  * bpf_iter_reg_target().
241  */
242 int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info)
243 {
244         struct bpf_iter_target_info *tinfo;
245
246         tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
247         if (!tinfo)
248                 return -ENOMEM;
249
250         tinfo->reg_info = reg_info;
251         INIT_LIST_HEAD(&tinfo->list);
252
253         mutex_lock(&targets_mutex);
254         list_add(&tinfo->list, &targets);
255         mutex_unlock(&targets_mutex);
256
257         return 0;
258 }
259
260 void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info)
261 {
262         struct bpf_iter_target_info *tinfo;
263         bool found = false;
264
265         mutex_lock(&targets_mutex);
266         list_for_each_entry(tinfo, &targets, list) {
267                 if (reg_info == tinfo->reg_info) {
268                         list_del(&tinfo->list);
269                         kfree(tinfo);
270                         found = true;
271                         break;
272                 }
273         }
274         mutex_unlock(&targets_mutex);
275
276         WARN_ON(found == false);
277 }
278
279 static void cache_btf_id(struct bpf_iter_target_info *tinfo,
280                          struct bpf_prog *prog)
281 {
282         tinfo->btf_id = prog->aux->attach_btf_id;
283 }
284
285 bool bpf_iter_prog_supported(struct bpf_prog *prog)
286 {
287         const char *attach_fname = prog->aux->attach_func_name;
288         u32 prog_btf_id = prog->aux->attach_btf_id;
289         const char *prefix = BPF_ITER_FUNC_PREFIX;
290         struct bpf_iter_target_info *tinfo;
291         int prefix_len = strlen(prefix);
292         bool supported = false;
293
294         if (strncmp(attach_fname, prefix, prefix_len))
295                 return false;
296
297         mutex_lock(&targets_mutex);
298         list_for_each_entry(tinfo, &targets, list) {
299                 if (tinfo->btf_id && tinfo->btf_id == prog_btf_id) {
300                         supported = true;
301                         break;
302                 }
303                 if (!strcmp(attach_fname + prefix_len, tinfo->reg_info->target)) {
304                         cache_btf_id(tinfo, prog);
305                         supported = true;
306                         break;
307                 }
308         }
309         mutex_unlock(&targets_mutex);
310
311         if (supported) {
312                 prog->aux->ctx_arg_info_size = tinfo->reg_info->ctx_arg_info_size;
313                 prog->aux->ctx_arg_info = tinfo->reg_info->ctx_arg_info;
314         }
315
316         return supported;
317 }
318
319 static void bpf_iter_link_release(struct bpf_link *link)
320 {
321 }
322
323 static void bpf_iter_link_dealloc(struct bpf_link *link)
324 {
325         struct bpf_iter_link *iter_link =
326                 container_of(link, struct bpf_iter_link, link);
327
328         kfree(iter_link);
329 }
330
331 static int bpf_iter_link_replace(struct bpf_link *link,
332                                  struct bpf_prog *new_prog,
333                                  struct bpf_prog *old_prog)
334 {
335         int ret = 0;
336
337         mutex_lock(&link_mutex);
338         if (old_prog && link->prog != old_prog) {
339                 ret = -EPERM;
340                 goto out_unlock;
341         }
342
343         if (link->prog->type != new_prog->type ||
344             link->prog->expected_attach_type != new_prog->expected_attach_type ||
345             link->prog->aux->attach_btf_id != new_prog->aux->attach_btf_id) {
346                 ret = -EINVAL;
347                 goto out_unlock;
348         }
349
350         old_prog = xchg(&link->prog, new_prog);
351         bpf_prog_put(old_prog);
352
353 out_unlock:
354         mutex_unlock(&link_mutex);
355         return ret;
356 }
357
358 static const struct bpf_link_ops bpf_iter_link_lops = {
359         .release = bpf_iter_link_release,
360         .dealloc = bpf_iter_link_dealloc,
361         .update_prog = bpf_iter_link_replace,
362 };
363
364 bool bpf_link_is_iter(struct bpf_link *link)
365 {
366         return link->ops == &bpf_iter_link_lops;
367 }
368
369 int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
370 {
371         struct bpf_link_primer link_primer;
372         struct bpf_iter_target_info *tinfo;
373         struct bpf_iter_link *link;
374         bool existed = false;
375         u32 prog_btf_id;
376         int err;
377
378         if (attr->link_create.target_fd || attr->link_create.flags)
379                 return -EINVAL;
380
381         prog_btf_id = prog->aux->attach_btf_id;
382         mutex_lock(&targets_mutex);
383         list_for_each_entry(tinfo, &targets, list) {
384                 if (tinfo->btf_id == prog_btf_id) {
385                         existed = true;
386                         break;
387                 }
388         }
389         mutex_unlock(&targets_mutex);
390         if (!existed)
391                 return -ENOENT;
392
393         link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
394         if (!link)
395                 return -ENOMEM;
396
397         bpf_link_init(&link->link, BPF_LINK_TYPE_ITER, &bpf_iter_link_lops, prog);
398         link->tinfo = tinfo;
399
400         err  = bpf_link_prime(&link->link, &link_primer);
401         if (err) {
402                 kfree(link);
403                 return err;
404         }
405
406         return bpf_link_settle(&link_primer);
407 }
408
409 static void init_seq_meta(struct bpf_iter_priv_data *priv_data,
410                           struct bpf_iter_target_info *tinfo,
411                           struct bpf_prog *prog)
412 {
413         priv_data->tinfo = tinfo;
414         priv_data->prog = prog;
415         priv_data->session_id = atomic64_inc_return(&session_id);
416         priv_data->seq_num = 0;
417         priv_data->done_stop = false;
418 }
419
420 static int prepare_seq_file(struct file *file, struct bpf_iter_link *link)
421 {
422         struct bpf_iter_priv_data *priv_data;
423         struct bpf_iter_target_info *tinfo;
424         struct bpf_prog *prog;
425         u32 total_priv_dsize;
426         struct seq_file *seq;
427         int err = 0;
428
429         mutex_lock(&link_mutex);
430         prog = link->link.prog;
431         bpf_prog_inc(prog);
432         mutex_unlock(&link_mutex);
433
434         tinfo = link->tinfo;
435         total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) +
436                            tinfo->reg_info->seq_priv_size;
437         priv_data = __seq_open_private(file, tinfo->reg_info->seq_ops,
438                                        total_priv_dsize);
439         if (!priv_data) {
440                 err = -ENOMEM;
441                 goto release_prog;
442         }
443
444         if (tinfo->reg_info->init_seq_private) {
445                 err = tinfo->reg_info->init_seq_private(priv_data->target_private);
446                 if (err)
447                         goto release_seq_file;
448         }
449
450         init_seq_meta(priv_data, tinfo, prog);
451         seq = file->private_data;
452         seq->private = priv_data->target_private;
453
454         return 0;
455
456 release_seq_file:
457         seq_release_private(file->f_inode, file);
458         file->private_data = NULL;
459 release_prog:
460         bpf_prog_put(prog);
461         return err;
462 }
463
464 int bpf_iter_new_fd(struct bpf_link *link)
465 {
466         struct file *file;
467         unsigned int flags;
468         int err, fd;
469
470         if (link->ops != &bpf_iter_link_lops)
471                 return -EINVAL;
472
473         flags = O_RDONLY | O_CLOEXEC;
474         fd = get_unused_fd_flags(flags);
475         if (fd < 0)
476                 return fd;
477
478         file = anon_inode_getfile("bpf_iter", &bpf_iter_fops, NULL, flags);
479         if (IS_ERR(file)) {
480                 err = PTR_ERR(file);
481                 goto free_fd;
482         }
483
484         err = prepare_seq_file(file,
485                                container_of(link, struct bpf_iter_link, link));
486         if (err)
487                 goto free_file;
488
489         fd_install(fd, file);
490         return fd;
491
492 free_file:
493         fput(file);
494 free_fd:
495         put_unused_fd(fd);
496         return err;
497 }
498
499 struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop)
500 {
501         struct bpf_iter_priv_data *iter_priv;
502         struct seq_file *seq;
503         void *seq_priv;
504
505         seq = meta->seq;
506         if (seq->file->f_op != &bpf_iter_fops)
507                 return NULL;
508
509         seq_priv = seq->private;
510         iter_priv = container_of(seq_priv, struct bpf_iter_priv_data,
511                                  target_private);
512
513         if (in_stop && iter_priv->done_stop)
514                 return NULL;
515
516         meta->session_id = iter_priv->session_id;
517         meta->seq_num = iter_priv->seq_num;
518
519         return iter_priv->prog;
520 }
521
522 int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
523 {
524         int ret;
525
526         rcu_read_lock();
527         migrate_disable();
528         ret = BPF_PROG_RUN(prog, ctx);
529         migrate_enable();
530         rcu_read_unlock();
531
532         /* bpf program can only return 0 or 1:
533          *  0 : okay
534          *  1 : retry the same object
535          * The bpf_iter_run_prog() return value
536          * will be seq_ops->show() return value.
537          */
538         return ret == 0 ? 0 : -EAGAIN;
539 }