net/bpf/test_run.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /* Copyright (c) 2017 Facebook
   3  */
   4 #include <linux/bpf.h>
   5 #include <linux/btf_ids.h>
   6 #include <linux/slab.h>
   7 #include <linux/vmalloc.h>
   8 #include <linux/etherdevice.h>
   9 #include <linux/filter.h>
  10 #include <linux/sched/signal.h>
  11 #include <net/bpf_sk_storage.h>
  12 #include <net/sock.h>
  13 #include <net/tcp.h>
  14 #include <net/net_namespace.h>
  15 #include <linux/error-injection.h>
  16 #include <linux/smp.h>
  17 #include <linux/sock_diag.h>
  18 #include <net/xdp.h>
  19
  20 #define CREATE_TRACE_POINTS
  21 #include <trace/events/bpf_test_run.h>
  22
  23 struct bpf_test_timer {
  24         enum { NO_PREEMPT, NO_MIGRATE } mode;
  25         u32 i;
  26         u64 time_start, time_spent;
  27 };
  28
  29 static void bpf_test_timer_enter(struct bpf_test_timer *t)
  30         __acquires(rcu)
  31 {
  32         rcu_read_lock();
  33         if (t->mode == NO_PREEMPT)
  34                 preempt_disable();
  35         else
  36                 migrate_disable();
  37
  38         t->time_start = ktime_get_ns();
  39 }
  40
  41 static void bpf_test_timer_leave(struct bpf_test_timer *t)
  42         __releases(rcu)
  43 {
  44         t->time_start = 0;
  45
  46         if (t->mode == NO_PREEMPT)
  47                 preempt_enable();
  48         else
  49                 migrate_enable();
  50         rcu_read_unlock();
  51 }
  52
  53 static bool bpf_test_timer_continue(struct bpf_test_timer *t, u32 repeat, int *err, u32 *duration)
  54         __must_hold(rcu)
  55 {
  56         t->i++;
  57         if (t->i >= repeat) {
  58                 /* We're done. */
  59                 t->time_spent += ktime_get_ns() - t->time_start;
  60                 do_div(t->time_spent, t->i);
  61                 *duration = t->time_spent > U32_MAX ? U32_MAX : (u32)t->time_spent;
  62                 *err = 0;
  63                 goto reset;
  64         }
  65
  66         if (signal_pending(current)) {
  67                 /* During iteration: we've been cancelled, abort. */
  68                 *err = -EINTR;
  69                 goto reset;
  70         }
  71
  72         if (need_resched()) {
  73                 /* During iteration: we need to reschedule between runs. */
  74                 t->time_spent += ktime_get_ns() - t->time_start;
  75                 bpf_test_timer_leave(t);
  76                 cond_resched();
  77                 bpf_test_timer_enter(t);
  78         }
  79
  80         /* Do another round. */
  81         return true;
  82
  83 reset:
  84         t->i = 0;
  85         return false;
  86 }
  87
  88 static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
  89                         u32 *retval, u32 *time, bool xdp)
  90 {
  91         struct bpf_prog_array_item item = {.prog = prog};
  92         struct bpf_run_ctx *old_ctx;
  93         struct bpf_cg_run_ctx run_ctx;
  94         struct bpf_test_timer t = { NO_MIGRATE };
  95         enum bpf_cgroup_storage_type stype;
  96         int ret;
  97
  98         for_each_cgroup_storage_type(stype) {
  99                 item.cgroup_storage[stype] = bpf_cgroup_storage_alloc(prog, stype);
 100                 if (IS_ERR(item.cgroup_storage[stype])) {
 101                         item.cgroup_storage[stype] = NULL;
 102                         for_each_cgroup_storage_type(stype)
 103                                 bpf_cgroup_storage_free(item.cgroup_storage[stype]);
 104                         return -ENOMEM;
 105                 }
 106         }
 107
 108         if (!repeat)
 109                 repeat = 1;
 110
 111         bpf_test_timer_enter(&t);
 112         old_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
 113         do {
 114                 run_ctx.prog_item = &item;
 115                 if (xdp)
 116                         *retval = bpf_prog_run_xdp(prog, ctx);
 117                 else
 118                         *retval = BPF_PROG_RUN(prog, ctx);
 119         } while (bpf_test_timer_continue(&t, repeat, &ret, time));
 120         bpf_reset_run_ctx(old_ctx);
 121         bpf_test_timer_leave(&t);
 122
 123         for_each_cgroup_storage_type(stype)
 124                 bpf_cgroup_storage_free(item.cgroup_storage[stype]);
 125
 126         return ret;
 127 }
 128
 129 static int bpf_test_finish(const union bpf_attr *kattr,
 130                            union bpf_attr __user *uattr, const void *data,
 131                            u32 size, u32 retval, u32 duration)
 132 {
 133         void __user *data_out = u64_to_user_ptr(kattr->test.data_out);
 134         int err = -EFAULT;
 135         u32 copy_size = size;
 136
 137         /* Clamp copy if the user has provided a size hint, but copy the full
 138          * buffer if not to retain old behaviour.
 139          */
 140         if (kattr->test.data_size_out &&
 141             copy_size > kattr->test.data_size_out) {
 142                 copy_size = kattr->test.data_size_out;
 143                 err = -ENOSPC;
 144         }
 145
 146         if (data_out && copy_to_user(data_out, data, copy_size))
 147                 goto out;
 148         if (copy_to_user(&uattr->test.data_size_out, &size, sizeof(size)))
 149                 goto out;
 150         if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval)))
 151                 goto out;
 152         if (copy_to_user(&uattr->test.duration, &duration, sizeof(duration)))
 153                 goto out;
 154         if (err != -ENOSPC)
 155                 err = 0;
 156 out:
 157         trace_bpf_test_finish(&err);
 158         return err;
 159 }
 160
 161 /* Integer types of various sizes and pointer combinations cover variety of
 162  * architecture dependent calling conventions. 7+ can be supported in the
 163  * future.
 164  */
 165 __diag_push();
 166 __diag_ignore(GCC, 8, "-Wmissing-prototypes",
 167               "Global functions as their definitions will be in vmlinux BTF");
 168 int noinline bpf_fentry_test1(int a)
 169 {
 170         return a + 1;
 171 }
 172
 173 int noinline bpf_fentry_test2(int a, u64 b)
 174 {
 175         return a + b;
 176 }
 177
 178 int noinline bpf_fentry_test3(char a, int b, u64 c)
 179 {
 180         return a + b + c;
 181 }
 182
 183 int noinline bpf_fentry_test4(void *a, char b, int c, u64 d)
 184 {
 185         return (long)a + b + c + d;
 186 }
 187
 188 int noinline bpf_fentry_test5(u64 a, void *b, short c, int d, u64 e)
 189 {
 190         return a + (long)b + c + d + e;
 191 }
 192
 193 int noinline bpf_fentry_test6(u64 a, void *b, short c, int d, void *e, u64 f)
 194 {
 195         return a + (long)b + c + d + (long)e + f;
 196 }
 197
 198 struct bpf_fentry_test_t {
 199         struct bpf_fentry_test_t *a;
 200 };
 201
 202 int noinline bpf_fentry_test7(struct bpf_fentry_test_t *arg)
 203 {
 204         return (long)arg;
 205 }
 206
 207 int noinline bpf_fentry_test8(struct bpf_fentry_test_t *arg)
 208 {
 209         return (long)arg->a;
 210 }
 211
 212 int noinline bpf_modify_return_test(int a, int *b)
 213 {
 214         *b += 1;
 215         return a + *b;
 216 }
 217
 218 u64 noinline bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d)
 219 {
 220         return a + b + c + d;
 221 }
 222
 223 int noinline bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b)
 224 {
 225         return a + b;
 226 }
 227
 228 struct sock * noinline bpf_kfunc_call_test3(struct sock *sk)
 229 {
 230         return sk;
 231 }
 232
 233 __diag_pop();
 234
 235 ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
 236
 237 BTF_SET_START(test_sk_kfunc_ids)
 238 BTF_ID(func, bpf_kfunc_call_test1)
 239 BTF_ID(func, bpf_kfunc_call_test2)
 240 BTF_ID(func, bpf_kfunc_call_test3)
 241 BTF_SET_END(test_sk_kfunc_ids)
 242
 243 bool bpf_prog_test_check_kfunc_call(u32 kfunc_id)
 244 {
 245         return btf_id_set_contains(&test_sk_kfunc_ids, kfunc_id);
 246 }
 247
 248 static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
 249                            u32 headroom, u32 tailroom)
 250 {
 251         void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
 252         u32 user_size = kattr->test.data_size_in;
 253         void *data;
 254
 255         if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom)
 256                 return ERR_PTR(-EINVAL);
 257
 258         if (user_size > size)
 259                 return ERR_PTR(-EMSGSIZE);
 260
 261         data = kzalloc(size + headroom + tailroom, GFP_USER);
 262         if (!data)
 263                 return ERR_PTR(-ENOMEM);
 264
 265         if (copy_from_user(data + headroom, data_in, user_size)) {
 266                 kfree(data);
 267                 return ERR_PTR(-EFAULT);
 268         }
 269
 270         return data;
 271 }
 272
 273 int bpf_prog_test_run_tracing(struct bpf_prog *prog,
 274                               const union bpf_attr *kattr,
 275                               union bpf_attr __user *uattr)
 276 {
 277         struct bpf_fentry_test_t arg = {};
 278         u16 side_effect = 0, ret = 0;
 279         int b = 2, err = -EFAULT;
 280         u32 retval = 0;
 281
 282         if (kattr->test.flags || kattr->test.cpu)
 283                 return -EINVAL;
 284
 285         switch (prog->expected_attach_type) {
 286         case BPF_TRACE_FENTRY:
 287         case BPF_TRACE_FEXIT:
 288                 if (bpf_fentry_test1(1) != 2 ||
 289                     bpf_fentry_test2(2, 3) != 5 ||
 290                     bpf_fentry_test3(4, 5, 6) != 15 ||
 291                     bpf_fentry_test4((void *)7, 8, 9, 10) != 34 ||
 292                     bpf_fentry_test5(11, (void *)12, 13, 14, 15) != 65 ||
 293                     bpf_fentry_test6(16, (void *)17, 18, 19, (void *)20, 21) != 111 ||
 294                     bpf_fentry_test7((struct bpf_fentry_test_t *)0) != 0 ||
 295                     bpf_fentry_test8(&arg) != 0)
 296                         goto out;
 297                 break;
 298         case BPF_MODIFY_RETURN:
 299                 ret = bpf_modify_return_test(1, &b);
 300                 if (b != 2)
 301                         side_effect = 1;
 302                 break;
 303         default:
 304                 goto out;
 305         }
 306
 307         retval = ((u32)side_effect << 16) | ret;
 308         if (copy_to_user(&uattr->test.retval, &retval, sizeof(retval)))
 309                 goto out;
 310
 311         err = 0;
 312 out:
 313         trace_bpf_test_finish(&err);
 314         return err;
 315 }
 316
 317 struct bpf_raw_tp_test_run_info {
 318         struct bpf_prog *prog;
 319         void *ctx;
 320         u32 retval;
 321 };
 322
 323 static void
 324 __bpf_prog_test_run_raw_tp(void *data)
 325 {
 326         struct bpf_raw_tp_test_run_info *info = data;
 327
 328         rcu_read_lock();
 329         info->retval = BPF_PROG_RUN(info->prog, info->ctx);
 330         rcu_read_unlock();
 331 }
 332
 333 int bpf_prog_test_run_raw_tp(struct bpf_prog *prog,
 334                              const union bpf_attr *kattr,
 335                              union bpf_attr __user *uattr)
 336 {
 337         void __user *ctx_in = u64_to_user_ptr(kattr->test.ctx_in);
 338         __u32 ctx_size_in = kattr->test.ctx_size_in;
 339         struct bpf_raw_tp_test_run_info info;
 340         int cpu = kattr->test.cpu, err = 0;
 341         int current_cpu;
 342
 343         /* doesn't support data_in/out, ctx_out, duration, or repeat */
 344         if (kattr->test.data_in || kattr->test.data_out ||
 345             kattr->test.ctx_out || kattr->test.duration ||
 346             kattr->test.repeat)
 347                 return -EINVAL;
 348
 349         if (ctx_size_in < prog->aux->max_ctx_offset ||
 350             ctx_size_in > MAX_BPF_FUNC_ARGS * sizeof(u64))
 351                 return -EINVAL;
 352
 353         if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 && cpu != 0)
 354                 return -EINVAL;
 355
 356         if (ctx_size_in) {
 357                 info.ctx = kzalloc(ctx_size_in, GFP_USER);
 358                 if (!info.ctx)
 359                         return -ENOMEM;
 360                 if (copy_from_user(info.ctx, ctx_in, ctx_size_in)) {
 361                         err = -EFAULT;
 362                         goto out;
 363                 }
 364         } else {
 365                 info.ctx = NULL;
 366         }
 367
 368         info.prog = prog;
 369
 370         current_cpu = get_cpu();
 371         if ((kattr->test.flags & BPF_F_TEST_RUN_ON_CPU) == 0 ||
 372             cpu == current_cpu) {
 373                 __bpf_prog_test_run_raw_tp(&info);
 374         } else if (cpu >= nr_cpu_ids || !cpu_online(cpu)) {
 375                 /* smp_call_function_single() also checks cpu_online()
 376                  * after csd_lock(). However, since cpu is from user
 377                  * space, let's do an extra quick check to filter out
 378                  * invalid value before smp_call_function_single().
 379                  */
 380                 err = -ENXIO;
 381         } else {
 382                 err = smp_call_function_single(cpu, __bpf_prog_test_run_raw_tp,
 383                                                &info, 1);
 384         }
 385         put_cpu();
 386
 387         if (!err &&
 388             copy_to_user(&uattr->test.retval, &info.retval, sizeof(u32)))
 389                 err = -EFAULT;
 390
 391 out:
 392         kfree(info.ctx);
 393         return err;
 394 }
 395
 396 static void *bpf_ctx_init(const union bpf_attr *kattr, u32 max_size)
 397 {
 398         void __user *data_in = u64_to_user_ptr(kattr->test.ctx_in);
 399         void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out);
 400         u32 size = kattr->test.ctx_size_in;
 401         void *data;
 402         int err;
 403
 404         if (!data_in && !data_out)
 405                 return NULL;
 406
 407         data = kzalloc(max_size, GFP_USER);
 408         if (!data)
 409                 return ERR_PTR(-ENOMEM);
 410
 411         if (data_in) {
 412                 err = bpf_check_uarg_tail_zero(USER_BPFPTR(data_in), max_size, size);
 413                 if (err) {
 414                         kfree(data);
 415                         return ERR_PTR(err);
 416                 }
 417
 418                 size = min_t(u32, max_size, size);
 419                 if (copy_from_user(data, data_in, size)) {
 420                         kfree(data);
 421                         return ERR_PTR(-EFAULT);
 422                 }
 423         }
 424         return data;
 425 }
 426
 427 static int bpf_ctx_finish(const union bpf_attr *kattr,
 428                           union bpf_attr __user *uattr, const void *data,
 429                           u32 size)
 430 {
 431         void __user *data_out = u64_to_user_ptr(kattr->test.ctx_out);
 432         int err = -EFAULT;
 433         u32 copy_size = size;
 434
 435         if (!data || !data_out)
 436                 return 0;
 437
 438         if (copy_size > kattr->test.ctx_size_out) {
 439                 copy_size = kattr->test.ctx_size_out;
 440                 err = -ENOSPC;
 441         }
 442
 443         if (copy_to_user(data_out, data, copy_size))
 444                 goto out;
 445         if (copy_to_user(&uattr->test.ctx_size_out, &size, sizeof(size)))
 446                 goto out;
 447         if (err != -ENOSPC)
 448                 err = 0;
 449 out:
 450         return err;
 451 }
 452
 453 /**
 454  * range_is_zero - test whether buffer is initialized
 455  * @buf: buffer to check
 456  * @from: check from this position
 457  * @to: check up until (excluding) this position
 458  *
 459  * This function returns true if the there is a non-zero byte
 460  * in the buf in the range [from,to).
 461  */
 462 static inline bool range_is_zero(void *buf, size_t from, size_t to)
 463 {
 464         return !memchr_inv((u8 *)buf + from, 0, to - from);
 465 }
 466
 467 static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb)
 468 {
 469         struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb;
 470
 471         if (!__skb)
 472                 return 0;
 473
 474         /* make sure the fields we don't use are zeroed */
 475         if (!range_is_zero(__skb, 0, offsetof(struct __sk_buff, mark)))
 476                 return -EINVAL;
 477
 478         /* mark is allowed */
 479
 480         if (!range_is_zero(__skb, offsetofend(struct __sk_buff, mark),
 481                            offsetof(struct __sk_buff, priority)))
 482                 return -EINVAL;
 483
 484         /* priority is allowed */
 485
 486         if (!range_is_zero(__skb, offsetofend(struct __sk_buff, priority),
 487                            offsetof(struct __sk_buff, ifindex)))
 488                 return -EINVAL;
 489
 490         /* ifindex is allowed */
 491
 492         if (!range_is_zero(__skb, offsetofend(struct __sk_buff, ifindex),
 493                            offsetof(struct __sk_buff, cb)))
 494                 return -EINVAL;
 495
 496         /* cb is allowed */
 497
 498         if (!range_is_zero(__skb, offsetofend(struct __sk_buff, cb),
 499                            offsetof(struct __sk_buff, tstamp)))
 500                 return -EINVAL;
 501
 502         /* tstamp is allowed */
 503         /* wire_len is allowed */
 504         /* gso_segs is allowed */
 505
 506         if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_segs),
 507                            offsetof(struct __sk_buff, gso_size)))
 508                 return -EINVAL;
 509
 510         /* gso_size is allowed */
 511
 512         if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_size),
 513                            sizeof(struct __sk_buff)))
 514                 return -EINVAL;
 515
 516         skb->mark = __skb->mark;
 517         skb->priority = __skb->priority;
 518         skb->tstamp = __skb->tstamp;
 519         memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN);
 520
 521         if (__skb->wire_len == 0) {
 522                 cb->pkt_len = skb->len;
 523         } else {
 524                 if (__skb->wire_len < skb->len ||
 525                     __skb->wire_len > GSO_MAX_SIZE)
 526                         return -EINVAL;
 527                 cb->pkt_len = __skb->wire_len;
 528         }
 529
 530         if (__skb->gso_segs > GSO_MAX_SEGS)
 531                 return -EINVAL;
 532         skb_shinfo(skb)->gso_segs = __skb->gso_segs;
 533         skb_shinfo(skb)->gso_size = __skb->gso_size;
 534
 535         return 0;
 536 }
 537
 538 static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb)
 539 {
 540         struct qdisc_skb_cb *cb = (struct qdisc_skb_cb *)skb->cb;
 541
 542         if (!__skb)
 543                 return;
 544
 545         __skb->mark = skb->mark;
 546         __skb->priority = skb->priority;
 547         __skb->ifindex = skb->dev->ifindex;
 548         __skb->tstamp = skb->tstamp;
 549         memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN);
 550         __skb->wire_len = cb->pkt_len;
 551         __skb->gso_segs = skb_shinfo(skb)->gso_segs;
 552 }
 553
 554 int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr,
 555                           union bpf_attr __user *uattr)
 556 {
 557         bool is_l2 = false, is_direct_pkt_access = false;
 558         struct net *net = current->nsproxy->net_ns;
 559         struct net_device *dev = net->loopback_dev;
 560         u32 size = kattr->test.data_size_in;
 561         u32 repeat = kattr->test.repeat;
 562         struct __sk_buff *ctx = NULL;
 563         u32 retval, duration;
 564         int hh_len = ETH_HLEN;
 565         struct sk_buff *skb;
 566         struct sock *sk;
 567         void *data;
 568         int ret;
 569
 570         if (kattr->test.flags || kattr->test.cpu)
 571                 return -EINVAL;
 572
 573         data = bpf_test_init(kattr, size, NET_SKB_PAD + NET_IP_ALIGN,
 574                              SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
 575         if (IS_ERR(data))
 576                 return PTR_ERR(data);
 577
 578         ctx = bpf_ctx_init(kattr, sizeof(struct __sk_buff));
 579         if (IS_ERR(ctx)) {
 580                 kfree(data);
 581                 return PTR_ERR(ctx);
 582         }
 583
 584         switch (prog->type) {
 585         case BPF_PROG_TYPE_SCHED_CLS:
 586         case BPF_PROG_TYPE_SCHED_ACT:
 587                 is_l2 = true;
 588                 fallthrough;
 589         case BPF_PROG_TYPE_LWT_IN:
 590         case BPF_PROG_TYPE_LWT_OUT:
 591         case BPF_PROG_TYPE_LWT_XMIT:
 592                 is_direct_pkt_access = true;
 593                 break;
 594         default:
 595                 break;
 596         }
 597
 598         sk = kzalloc(sizeof(struct sock), GFP_USER);
 599         if (!sk) {
 600                 kfree(data);
 601                 kfree(ctx);
 602                 return -ENOMEM;
 603         }
 604         sock_net_set(sk, net);
 605         sock_init_data(NULL, sk);
 606
 607         skb = build_skb(data, 0);
 608         if (!skb) {
 609                 kfree(data);
 610                 kfree(ctx);
 611                 kfree(sk);
 612                 return -ENOMEM;
 613         }
 614         skb->sk = sk;
 615
 616         skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
 617         __skb_put(skb, size);
 618         if (ctx && ctx->ifindex > 1) {
 619                 dev = dev_get_by_index(net, ctx->ifindex);
 620                 if (!dev) {
 621                         ret = -ENODEV;
 622                         goto out;
 623                 }
 624         }
 625         skb->protocol = eth_type_trans(skb, dev);
 626         skb_reset_network_header(skb);
 627
 628         switch (skb->protocol) {
 629         case htons(ETH_P_IP):
 630                 sk->sk_family = AF_INET;
 631                 if (sizeof(struct iphdr) <= skb_headlen(skb)) {
 632                         sk->sk_rcv_saddr = ip_hdr(skb)->saddr;
 633                         sk->sk_daddr = ip_hdr(skb)->daddr;
 634                 }
 635                 break;
 636 #if IS_ENABLED(CONFIG_IPV6)
 637         case htons(ETH_P_IPV6):
 638                 sk->sk_family = AF_INET6;
 639                 if (sizeof(struct ipv6hdr) <= skb_headlen(skb)) {
 640                         sk->sk_v6_rcv_saddr = ipv6_hdr(skb)->saddr;
 641                         sk->sk_v6_daddr = ipv6_hdr(skb)->daddr;
 642                 }
 643                 break;
 644 #endif
 645         default:
 646                 break;
 647         }
 648
 649         if (is_l2)
 650                 __skb_push(skb, hh_len);
 651         if (is_direct_pkt_access)
 652                 bpf_compute_data_pointers(skb);
 653         ret = convert___skb_to_skb(skb, ctx);
 654         if (ret)
 655                 goto out;
 656         ret = bpf_test_run(prog, skb, repeat, &retval, &duration, false);
 657         if (ret)
 658                 goto out;
 659         if (!is_l2) {
 660                 if (skb_headroom(skb) < hh_len) {
 661                         int nhead = HH_DATA_ALIGN(hh_len - skb_headroom(skb));
 662
 663                         if (pskb_expand_head(skb, nhead, 0, GFP_USER)) {
 664                                 ret = -ENOMEM;
 665                                 goto out;
 666                         }
 667                 }
 668                 memset(__skb_push(skb, hh_len), 0, hh_len);
 669         }
 670         convert_skb_to___skb(skb, ctx);
 671
 672         size = skb->len;
 673         /* bpf program can never convert linear skb to non-linear */
 674         if (WARN_ON_ONCE(skb_is_nonlinear(skb)))
 675                 size = skb_headlen(skb);
 676         ret = bpf_test_finish(kattr, uattr, skb->data, size, retval, duration);
 677         if (!ret)
 678                 ret = bpf_ctx_finish(kattr, uattr, ctx,
 679                                      sizeof(struct __sk_buff));
 680 out:
 681         if (dev && dev != net->loopback_dev)
 682                 dev_put(dev);
 683         kfree_skb(skb);
 684         bpf_sk_storage_free(sk);
 685         kfree(sk);
 686         kfree(ctx);
 687         return ret;
 688 }
 689
 690 static int xdp_convert_md_to_buff(struct xdp_md *xdp_md, struct xdp_buff *xdp)
 691 {
 692         unsigned int ingress_ifindex, rx_queue_index;
 693         struct netdev_rx_queue *rxqueue;
 694         struct net_device *device;
 695
 696         if (!xdp_md)
 697                 return 0;
 698
 699         if (xdp_md->egress_ifindex != 0)
 700                 return -EINVAL;
 701
 702         ingress_ifindex = xdp_md->ingress_ifindex;
 703         rx_queue_index = xdp_md->rx_queue_index;
 704
 705         if (!ingress_ifindex && rx_queue_index)
 706                 return -EINVAL;
 707
 708         if (ingress_ifindex) {
 709                 device = dev_get_by_index(current->nsproxy->net_ns,
 710                                           ingress_ifindex);
 711                 if (!device)
 712                         return -ENODEV;
 713
 714                 if (rx_queue_index >= device->real_num_rx_queues)
 715                         goto free_dev;
 716
 717                 rxqueue = __netif_get_rx_queue(device, rx_queue_index);
 718
 719                 if (!xdp_rxq_info_is_reg(&rxqueue->xdp_rxq))
 720                         goto free_dev;
 721
 722                 xdp->rxq = &rxqueue->xdp_rxq;
 723                 /* The device is now tracked in the xdp->rxq for later
 724                  * dev_put()
 725                  */
 726         }
 727
 728         xdp->data = xdp->data_meta + xdp_md->data;
 729         return 0;
 730
 731 free_dev:
 732         dev_put(device);
 733         return -EINVAL;
 734 }
 735
 736 static void xdp_convert_buff_to_md(struct xdp_buff *xdp, struct xdp_md *xdp_md)
 737 {
 738         if (!xdp_md)
 739                 return;
 740
 741         xdp_md->data = xdp->data - xdp->data_meta;
 742         xdp_md->data_end = xdp->data_end - xdp->data_meta;
 743
 744         if (xdp_md->ingress_ifindex)
 745                 dev_put(xdp->rxq->dev);
 746 }
 747
 748 int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
 749                           union bpf_attr __user *uattr)
 750 {
 751         u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 752         u32 headroom = XDP_PACKET_HEADROOM;
 753         u32 size = kattr->test.data_size_in;
 754         u32 repeat = kattr->test.repeat;
 755         struct netdev_rx_queue *rxqueue;
 756         struct xdp_buff xdp = {};
 757         u32 retval, duration;
 758         struct xdp_md *ctx;
 759         u32 max_data_sz;
 760         void *data;
 761         int ret = -EINVAL;
 762
 763         ctx = bpf_ctx_init(kattr, sizeof(struct xdp_md));
 764         if (IS_ERR(ctx))
 765                 return PTR_ERR(ctx);
 766
 767         if (ctx) {
 768                 /* There can't be user provided data before the meta data */
 769                 if (ctx->data_meta || ctx->data_end != size ||
 770                     ctx->data > ctx->data_end ||
 771                     unlikely(xdp_metalen_invalid(ctx->data)))
 772                         goto free_ctx;
 773                 /* Meta data is allocated from the headroom */
 774                 headroom -= ctx->data;
 775         }
 776
 777         /* XDP have extra tailroom as (most) drivers use full page */
 778         max_data_sz = 4096 - headroom - tailroom;
 779
 780         data = bpf_test_init(kattr, max_data_sz, headroom, tailroom);
 781         if (IS_ERR(data)) {
 782                 ret = PTR_ERR(data);
 783                 goto free_ctx;
 784         }
 785
 786         rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
 787         xdp_init_buff(&xdp, headroom + max_data_sz + tailroom,
 788                       &rxqueue->xdp_rxq);
 789         xdp_prepare_buff(&xdp, data, headroom, size, true);
 790
 791         ret = xdp_convert_md_to_buff(ctx, &xdp);
 792         if (ret)
 793                 goto free_data;
 794
 795         bpf_prog_change_xdp(NULL, prog);
 796         ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
 797         /* We convert the xdp_buff back to an xdp_md before checking the return
 798          * code so the reference count of any held netdevice will be decremented
 799          * even if the test run failed.
 800          */
 801         xdp_convert_buff_to_md(&xdp, ctx);
 802         if (ret)
 803                 goto out;
 804
 805         if (xdp.data_meta != data + headroom ||
 806             xdp.data_end != xdp.data_meta + size)
 807                 size = xdp.data_end - xdp.data_meta;
 808
 809         ret = bpf_test_finish(kattr, uattr, xdp.data_meta, size, retval,
 810                               duration);
 811         if (!ret)
 812                 ret = bpf_ctx_finish(kattr, uattr, ctx,
 813                                      sizeof(struct xdp_md));
 814
 815 out:
 816         bpf_prog_change_xdp(prog, NULL);
 817 free_data:
 818         kfree(data);
 819 free_ctx:
 820         kfree(ctx);
 821         return ret;
 822 }
 823
 824 static int verify_user_bpf_flow_keys(struct bpf_flow_keys *ctx)
 825 {
 826         /* make sure the fields we don't use are zeroed */
 827         if (!range_is_zero(ctx, 0, offsetof(struct bpf_flow_keys, flags)))
 828                 return -EINVAL;
 829
 830         /* flags is allowed */
 831
 832         if (!range_is_zero(ctx, offsetofend(struct bpf_flow_keys, flags),
 833                            sizeof(struct bpf_flow_keys)))
 834                 return -EINVAL;
 835
 836         return 0;
 837 }
 838
 839 int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 840                                      const union bpf_attr *kattr,
 841                                      union bpf_attr __user *uattr)
 842 {
 843         struct bpf_test_timer t = { NO_PREEMPT };
 844         u32 size = kattr->test.data_size_in;
 845         struct bpf_flow_dissector ctx = {};
 846         u32 repeat = kattr->test.repeat;
 847         struct bpf_flow_keys *user_ctx;
 848         struct bpf_flow_keys flow_keys;
 849         const struct ethhdr *eth;
 850         unsigned int flags = 0;
 851         u32 retval, duration;
 852         void *data;
 853         int ret;
 854
 855         if (prog->type != BPF_PROG_TYPE_FLOW_DISSECTOR)
 856                 return -EINVAL;
 857
 858         if (kattr->test.flags || kattr->test.cpu)
 859                 return -EINVAL;
 860
 861         if (size < ETH_HLEN)
 862                 return -EINVAL;
 863
 864         data = bpf_test_init(kattr, size, 0, 0);
 865         if (IS_ERR(data))
 866                 return PTR_ERR(data);
 867
 868         eth = (struct ethhdr *)data;
 869
 870         if (!repeat)
 871                 repeat = 1;
 872
 873         user_ctx = bpf_ctx_init(kattr, sizeof(struct bpf_flow_keys));
 874         if (IS_ERR(user_ctx)) {
 875                 kfree(data);
 876                 return PTR_ERR(user_ctx);
 877         }
 878         if (user_ctx) {
 879                 ret = verify_user_bpf_flow_keys(user_ctx);
 880                 if (ret)
 881                         goto out;
 882                 flags = user_ctx->flags;
 883         }
 884
 885         ctx.flow_keys = &flow_keys;
 886         ctx.data = data;
 887         ctx.data_end = (__u8 *)data + size;
 888
 889         bpf_test_timer_enter(&t);
 890         do {
 891                 retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN,
 892                                           size, flags);
 893         } while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
 894         bpf_test_timer_leave(&t);
 895
 896         if (ret < 0)
 897                 goto out;
 898
 899         ret = bpf_test_finish(kattr, uattr, &flow_keys, sizeof(flow_keys),
 900                               retval, duration);
 901         if (!ret)
 902                 ret = bpf_ctx_finish(kattr, uattr, user_ctx,
 903                                      sizeof(struct bpf_flow_keys));
 904
 905 out:
 906         kfree(user_ctx);
 907         kfree(data);
 908         return ret;
 909 }
 910
 911 int bpf_prog_test_run_sk_lookup(struct bpf_prog *prog, const union bpf_attr *kattr,
 912                                 union bpf_attr __user *uattr)
 913 {
 914         struct bpf_test_timer t = { NO_PREEMPT };
 915         struct bpf_prog_array *progs = NULL;
 916         struct bpf_sk_lookup_kern ctx = {};
 917         u32 repeat = kattr->test.repeat;
 918         struct bpf_sk_lookup *user_ctx;
 919         u32 retval, duration;
 920         int ret = -EINVAL;
 921
 922         if (prog->type != BPF_PROG_TYPE_SK_LOOKUP)
 923                 return -EINVAL;
 924
 925         if (kattr->test.flags || kattr->test.cpu)
 926                 return -EINVAL;
 927
 928         if (kattr->test.data_in || kattr->test.data_size_in || kattr->test.data_out ||
 929             kattr->test.data_size_out)
 930                 return -EINVAL;
 931
 932         if (!repeat)
 933                 repeat = 1;
 934
 935         user_ctx = bpf_ctx_init(kattr, sizeof(*user_ctx));
 936         if (IS_ERR(user_ctx))
 937                 return PTR_ERR(user_ctx);
 938
 939         if (!user_ctx)
 940                 return -EINVAL;
 941
 942         if (user_ctx->sk)
 943                 goto out;
 944
 945         if (!range_is_zero(user_ctx, offsetofend(typeof(*user_ctx), local_port), sizeof(*user_ctx)))
 946                 goto out;
 947
 948         if (user_ctx->local_port > U16_MAX || user_ctx->remote_port > U16_MAX) {
 949                 ret = -ERANGE;
 950                 goto out;
 951         }
 952
 953         ctx.family = (u16)user_ctx->family;
 954         ctx.protocol = (u16)user_ctx->protocol;
 955         ctx.dport = (u16)user_ctx->local_port;
 956         ctx.sport = (__force __be16)user_ctx->remote_port;
 957
 958         switch (ctx.family) {
 959         case AF_INET:
 960                 ctx.v4.daddr = (__force __be32)user_ctx->local_ip4;
 961                 ctx.v4.saddr = (__force __be32)user_ctx->remote_ip4;
 962                 break;
 963
 964 #if IS_ENABLED(CONFIG_IPV6)
 965         case AF_INET6:
 966                 ctx.v6.daddr = (struct in6_addr *)user_ctx->local_ip6;
 967                 ctx.v6.saddr = (struct in6_addr *)user_ctx->remote_ip6;
 968                 break;
 969 #endif
 970
 971         default:
 972                 ret = -EAFNOSUPPORT;
 973                 goto out;
 974         }
 975
 976         progs = bpf_prog_array_alloc(1, GFP_KERNEL);
 977         if (!progs) {
 978                 ret = -ENOMEM;
 979                 goto out;
 980         }
 981
 982         progs->items[0].prog = prog;
 983
 984         bpf_test_timer_enter(&t);
 985         do {
 986                 ctx.selected_sk = NULL;
 987                 retval = BPF_PROG_SK_LOOKUP_RUN_ARRAY(progs, ctx, BPF_PROG_RUN);
 988         } while (bpf_test_timer_continue(&t, repeat, &ret, &duration));
 989         bpf_test_timer_leave(&t);
 990
 991         if (ret < 0)
 992                 goto out;
 993
 994         user_ctx->cookie = 0;
 995         if (ctx.selected_sk) {
 996                 if (ctx.selected_sk->sk_reuseport && !ctx.no_reuseport) {
 997                         ret = -EOPNOTSUPP;
 998                         goto out;
 999                 }
1000
1001                 user_ctx->cookie = sock_gen_cookie(ctx.selected_sk);
1002         }
1003
1004         ret = bpf_test_finish(kattr, uattr, NULL, 0, retval, duration);
1005         if (!ret)
1006                 ret = bpf_ctx_finish(kattr, uattr, user_ctx, sizeof(*user_ctx));
1007
1008 out:
1009         bpf_prog_array_free(progs);
1010         kfree(user_ctx);
1011         return ret;
1012 }
1013
1014 int bpf_prog_test_run_syscall(struct bpf_prog *prog,
1015                               const union bpf_attr *kattr,
1016                               union bpf_attr __user *uattr)
1017 {
1018         void __user *ctx_in = u64_to_user_ptr(kattr->test.ctx_in);
1019         __u32 ctx_size_in = kattr->test.ctx_size_in;
1020         void *ctx = NULL;
1021         u32 retval;
1022         int err = 0;
1023
1024         /* doesn't support data_in/out, ctx_out, duration, or repeat or flags */
1025         if (kattr->test.data_in || kattr->test.data_out ||
1026             kattr->test.ctx_out || kattr->test.duration ||
1027             kattr->test.repeat || kattr->test.flags)
1028                 return -EINVAL;
1029
1030         if (ctx_size_in < prog->aux->max_ctx_offset ||
1031             ctx_size_in > U16_MAX)
1032                 return -EINVAL;
1033
1034         if (ctx_size_in) {
1035                 ctx = kzalloc(ctx_size_in, GFP_USER);
1036                 if (!ctx)
1037                         return -ENOMEM;
1038                 if (copy_from_user(ctx, ctx_in, ctx_size_in)) {
1039                         err = -EFAULT;
1040                         goto out;
1041                 }
1042         }
1043         retval = bpf_prog_run_pin_on_cpu(prog, ctx);
1044
1045         if (copy_to_user(&uattr->test.retval, &retval, sizeof(u32))) {
1046                 err = -EFAULT;
1047                 goto out;
1048         }
1049         if (ctx_size_in)
1050                 if (copy_to_user(ctx_in, ctx, ctx_size_in))
1051                         err = -EFAULT;
1052 out:
1053         kfree(ctx);
1054         return err;
1055 }