0 - disable JIT kallsyms export (default value)
1 - enable JIT kallsyms export for privileged users only
+bpf_jit_limit
+-------------
+
+This enforces a global limit for memory allocations to the BPF JIT
+compiler in order to reject unprivileged JIT requests once it has
+been surpassed. bpf_jit_limit contains the value of the global limit
+in bytes.
+
dev_weight
--------------
struct net_device *real_dev = macsec->real_dev;
int err;
- if (!(real_dev->flags & IFF_UP))
- return -ENETDOWN;
-
err = dev_uc_add(real_dev, dev->dev_addr);
if (err < 0)
return err;
if (err < 0)
goto del_dev;
+ netif_stacked_transfer_operstate(real_dev, dev);
+ linkwatch_fire_event(dev);
+
macsec_generation++;
return 0;
return NOTIFY_DONE;
switch (event) {
+ case NETDEV_DOWN:
+ case NETDEV_UP:
+ case NETDEV_CHANGE: {
+ struct macsec_dev *m, *n;
+ struct macsec_rxh_data *rxd;
+
+ rxd = macsec_data_rtnl(real_dev);
+ list_for_each_entry_safe(m, n, &rxd->secys, secys) {
+ struct net_device *dev = m->secy.netdev;
+
+ netif_stacked_transfer_operstate(real_dev, dev);
+ }
+ break;
+ }
case NETDEV_UNREGISTER: {
struct macsec_dev *m, *n;
struct macsec_rxh_data *rxd;
init_waitqueue_head(&ptp->tsev_wq);
if (ptp->info->do_aux_work) {
- char *worker_name = kasprintf(GFP_KERNEL, "ptp%d", ptp->index);
-
kthread_init_delayed_work(&ptp->aux_work, ptp_aux_kworker);
- ptp->kworker = kthread_create_worker(0, worker_name ?
- worker_name : info->name);
- kfree(worker_name);
+ ptp->kworker = kthread_create_worker(0, "ptp%d", ptp->index);
if (IS_ERR(ptp->kworker)) {
err = PTR_ERR(ptp->kworker);
pr_err("failed to create ptp aux_worker %d\n", err);
extern int bpf_jit_enable;
extern int bpf_jit_harden;
extern int bpf_jit_kallsyms;
+extern int bpf_jit_limit;
typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
return 0;
}
-static int btf_parse_hdr(struct btf_verifier_env *env, void __user *btf_data,
- u32 btf_data_size)
+static int btf_parse_hdr(struct btf_verifier_env *env)
{
+ u32 hdr_len, hdr_copy, btf_data_size;
const struct btf_header *hdr;
- u32 hdr_len, hdr_copy;
- /*
- * Minimal part of the "struct btf_header" that
- * contains the hdr_len.
- */
- struct btf_min_header {
- u16 magic;
- u8 version;
- u8 flags;
- u32 hdr_len;
- } __user *min_hdr;
struct btf *btf;
int err;
btf = env->btf;
- min_hdr = btf_data;
+ btf_data_size = btf->data_size;
- if (btf_data_size < sizeof(*min_hdr)) {
+ if (btf_data_size <
+ offsetof(struct btf_header, hdr_len) + sizeof(hdr->hdr_len)) {
btf_verifier_log(env, "hdr_len not found");
return -EINVAL;
}
- if (get_user(hdr_len, &min_hdr->hdr_len))
- return -EFAULT;
-
+ hdr = btf->data;
+ hdr_len = hdr->hdr_len;
if (btf_data_size < hdr_len) {
btf_verifier_log(env, "btf_header not found");
return -EINVAL;
}
- err = bpf_check_uarg_tail_zero(btf_data, sizeof(btf->hdr), hdr_len);
- if (err) {
- if (err == -E2BIG)
- btf_verifier_log(env, "Unsupported btf_header");
- return err;
+ /* Ensure the unsupported header fields are zero */
+ if (hdr_len > sizeof(btf->hdr)) {
+ u8 *expected_zero = btf->data + sizeof(btf->hdr);
+ u8 *end = btf->data + hdr_len;
+
+ for (; expected_zero < end; expected_zero++) {
+ if (*expected_zero) {
+ btf_verifier_log(env, "Unsupported btf_header");
+ return -E2BIG;
+ }
+ }
}
hdr_copy = min_t(u32, hdr_len, sizeof(btf->hdr));
- if (copy_from_user(&btf->hdr, btf_data, hdr_copy))
- return -EFAULT;
+ memcpy(&btf->hdr, btf->data, hdr_copy);
hdr = &btf->hdr;
- if (hdr->hdr_len != hdr_len)
- return -EINVAL;
-
btf_verifier_log_hdr(env, btf_data_size);
if (hdr->magic != BTF_MAGIC) {
}
env->btf = btf;
- err = btf_parse_hdr(env, btf_data, btf_data_size);
- if (err)
- goto errout;
-
data = kvmalloc(btf_data_size, GFP_KERNEL | __GFP_NOWARN);
if (!data) {
err = -ENOMEM;
btf->data = data;
btf->data_size = btf_data_size;
- btf->nohdr_data = btf->data + btf->hdr.hdr_len;
if (copy_from_user(data, btf_data, btf_data_size)) {
err = -EFAULT;
goto errout;
}
+ err = btf_parse_hdr(env);
+ if (err)
+ goto errout;
+
+ btf->nohdr_data = btf->data + btf->hdr.hdr_len;
+
err = btf_parse_str_sec(env);
if (err)
goto errout;
}
#ifdef CONFIG_BPF_JIT
+# define BPF_JIT_LIMIT_DEFAULT (PAGE_SIZE * 40000)
+
/* All BPF JIT sysctl knobs here. */
int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON);
int bpf_jit_harden __read_mostly;
int bpf_jit_kallsyms __read_mostly;
+int bpf_jit_limit __read_mostly = BPF_JIT_LIMIT_DEFAULT;
static __always_inline void
bpf_get_prog_addr_region(const struct bpf_prog *prog,
return ret;
}
+static atomic_long_t bpf_jit_current;
+
+#if defined(MODULES_VADDR)
+static int __init bpf_jit_charge_init(void)
+{
+ /* Only used as heuristic here to derive limit. */
+ bpf_jit_limit = min_t(u64, round_up((MODULES_END - MODULES_VADDR) >> 2,
+ PAGE_SIZE), INT_MAX);
+ return 0;
+}
+pure_initcall(bpf_jit_charge_init);
+#endif
+
+static int bpf_jit_charge_modmem(u32 pages)
+{
+ if (atomic_long_add_return(pages, &bpf_jit_current) >
+ (bpf_jit_limit >> PAGE_SHIFT)) {
+ if (!capable(CAP_SYS_ADMIN)) {
+ atomic_long_sub(pages, &bpf_jit_current);
+ return -EPERM;
+ }
+ }
+
+ return 0;
+}
+
+static void bpf_jit_uncharge_modmem(u32 pages)
+{
+ atomic_long_sub(pages, &bpf_jit_current);
+}
+
struct bpf_binary_header *
bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
unsigned int alignment,
bpf_jit_fill_hole_t bpf_fill_ill_insns)
{
struct bpf_binary_header *hdr;
- unsigned int size, hole, start;
+ u32 size, hole, start, pages;
/* Most of BPF filters are really small, but if some of them
* fill a page, allow at least 128 extra bytes to insert a
* random section of illegal instructions.
*/
size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE);
+ pages = size / PAGE_SIZE;
+
+ if (bpf_jit_charge_modmem(pages))
+ return NULL;
hdr = module_alloc(size);
- if (hdr == NULL)
+ if (!hdr) {
+ bpf_jit_uncharge_modmem(pages);
return NULL;
+ }
/* Fill space with illegal/arch-dep instructions. */
bpf_fill_ill_insns(hdr, size);
- hdr->pages = size / PAGE_SIZE;
+ hdr->pages = pages;
hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
PAGE_SIZE - sizeof(*hdr));
start = (get_random_int() % hole) & ~(alignment - 1);
void bpf_jit_binary_free(struct bpf_binary_header *hdr)
{
+ u32 pages = hdr->pages;
+
module_memfree(hdr);
+ bpf_jit_uncharge_modmem(pages);
}
/* This symbol is only overridden by archs that have different
struct bpf_dtab_netdev *dev, *odev;
dev = READ_ONCE(dtab->netdev_map[i]);
- if (!dev ||
- dev->dev->ifindex != netdev->ifindex)
+ if (!dev || netdev != dev->dev)
continue;
odev = cmpxchg(&dtab->netdev_map[i], dev, NULL);
if (dev == odev)
const struct bpf_func_proto bpf_map_pop_elem_proto = {
.func = bpf_map_pop_elem,
.gpl_only = false,
- .pkt_access = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE,
const struct bpf_func_proto bpf_map_peek_elem_proto = {
.func = bpf_map_pop_elem,
.gpl_only = false,
- .pkt_access = true,
.ret_type = RET_INTEGER,
.arg1_type = ARG_CONST_MAP_PTR,
.arg2_type = ARG_PTR_TO_UNINIT_MAP_VALUE,
raw_spin_lock_irqsave(&qs->lock, flags);
if (queue_stack_map_is_empty(qs)) {
+ memset(value, 0, qs->map.value_size);
err = -ENOENT;
goto out;
}
raw_spin_lock_irqsave(&qs->lock, flags);
if (queue_stack_map_is_empty(qs)) {
+ memset(value, 0, qs->map.value_size);
err = -ENOENT;
goto out;
}
enum bpf_access_type t)
{
switch (env->prog->type) {
+ /* Program types only with direct read access go here! */
case BPF_PROG_TYPE_LWT_IN:
case BPF_PROG_TYPE_LWT_OUT:
case BPF_PROG_TYPE_LWT_SEG6LOCAL:
case BPF_PROG_TYPE_SK_REUSEPORT:
- /* dst_input() and dst_output() can't write for now */
+ case BPF_PROG_TYPE_FLOW_DISSECTOR:
+ case BPF_PROG_TYPE_CGROUP_SKB:
if (t == BPF_WRITE)
return false;
/* fallthrough */
+
+ /* Program types with direct read + write access go here! */
case BPF_PROG_TYPE_SCHED_CLS:
case BPF_PROG_TYPE_SCHED_ACT:
case BPF_PROG_TYPE_XDP:
case BPF_PROG_TYPE_LWT_XMIT:
case BPF_PROG_TYPE_SK_SKB:
case BPF_PROG_TYPE_SK_MSG:
- case BPF_PROG_TYPE_FLOW_DISSECTOR:
if (meta)
return meta->pkt_access;
bool is_narrower_load;
u32 target_size;
- if (ops->gen_prologue) {
+ if (ops->gen_prologue || env->seen_direct_write) {
+ if (!ops->gen_prologue) {
+ verbose(env, "bpf verifier is misconfigured\n");
+ return -EINVAL;
+ }
cnt = ops->gen_prologue(insn_buf, env->seen_direct_write,
env->prog);
if (cnt >= ARRAY_SIZE(insn_buf)) {
* is 0.0.0.0 should not be added to router port list.
*/
if ((saddr->proto == htons(ETH_P_IP) && saddr->u.ip4) ||
- (saddr->proto == htons(ETH_P_IPV6) &&
- !ipv6_addr_any(&saddr->u.ip6)))
+ saddr->proto == htons(ETH_P_IPV6))
br_multicast_mark_router(br, port);
}
/* Do not adjust napi->gro_hash[].count, caller is adding a new
* SKB to the chain.
*/
- list_del(&oldest->list);
+ skb_list_del_init(oldest);
napi_gro_complete(oldest);
}
return &bpf_msg_pull_data_proto;
case BPF_FUNC_msg_push_data:
return &bpf_msg_push_data_proto;
- case BPF_FUNC_get_local_storage:
- return &bpf_get_local_storage_proto;
default:
return bpf_base_func_proto(func_id);
}
return &bpf_sk_redirect_map_proto;
case BPF_FUNC_sk_redirect_hash:
return &bpf_sk_redirect_hash_proto;
- case BPF_FUNC_get_local_storage:
- return &bpf_get_local_storage_proto;
#ifdef CONFIG_INET
case BPF_FUNC_sk_lookup_tcp:
return &bpf_sk_lookup_tcp_proto;
case bpf_ctx_range(struct __sk_buff, data_meta):
case bpf_ctx_range(struct __sk_buff, flow_keys):
return false;
+ case bpf_ctx_range(struct __sk_buff, data):
+ case bpf_ctx_range(struct __sk_buff, data_end):
+ if (!capable(CAP_SYS_ADMIN))
+ return false;
+ break;
}
+
if (type == BPF_WRITE) {
switch (off) {
case bpf_ctx_range(struct __sk_buff, mark):
prog->expected_attach_type);
}
+static int bpf_noop_prologue(struct bpf_insn *insn_buf, bool direct_write,
+ const struct bpf_prog *prog)
+{
+ /* Neither direct read nor direct write requires any preliminary
+ * action.
+ */
+ return 0;
+}
+
static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write,
const struct bpf_prog *prog, int drop_verdict)
{
.get_func_proto = xdp_func_proto,
.is_valid_access = xdp_is_valid_access,
.convert_ctx_access = xdp_convert_ctx_access,
+ .gen_prologue = bpf_noop_prologue,
};
const struct bpf_prog_ops xdp_prog_ops = {
.get_func_proto = sk_msg_func_proto,
.is_valid_access = sk_msg_is_valid_access,
.convert_ctx_access = sk_msg_convert_ctx_access,
+ .gen_prologue = bpf_noop_prologue,
};
const struct bpf_prog_ops sk_msg_prog_ops = {
return ret;
}
-# ifdef CONFIG_HAVE_EBPF_JIT
static int
proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp,
return proc_dointvec_minmax(table, write, buffer, lenp, ppos);
}
-# endif
#endif
static struct ctl_table net_core_table[] = {
.extra2 = &one,
},
# endif
+ {
+ .procname = "bpf_jit_limit",
+ .data = &bpf_jit_limit,
+ .maxlen = sizeof(int),
+ .mode = 0600,
+ .proc_handler = proc_dointvec_minmax_bpf_restricted,
+ .extra1 = &one,
+ },
#endif
{
.procname = "netdev_tstamp_prequeue",
rcu_read_lock();
if (req->sdiag_family == AF_INET)
+ /* src and dst are swapped for historical reasons */
sk = __udp4_lib_lookup(net,
req->id.idiag_src[0], req->id.idiag_sport,
req->id.idiag_dst[0], req->id.idiag_dport,
if (tb[TCA_GRED_PARMS] == NULL && tb[TCA_GRED_STAB] == NULL) {
if (tb[TCA_GRED_LIMIT] != NULL)
sch->limit = nla_get_u32(tb[TCA_GRED_LIMIT]);
- return gred_change_table_def(sch, opt);
+ return gred_change_table_def(sch, tb[TCA_GRED_DPS]);
}
if (tb[TCA_GRED_PARMS] == NULL ||
CONFIG_GENEVE=y
CONFIG_NET_CLS_FLOWER=m
CONFIG_LWTUNNEL=y
+CONFIG_BPF_STREAM_PARSER=y
+CONFIG_XDP_SOCKETS=y
BPF_EXIT_INSN(),
},
.result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "invalid bpf_context access off=76 size=4",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
},
{
.fixup_cgroup_storage = { 1 },
.result = REJECT,
.errstr = "get_local_storage() doesn't support non-zero flags",
+ .errstr_unpriv = "R2 leaks addr into helper function",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
},
{
.fixup_percpu_cgroup_storage = { 1 },
.result = REJECT,
.errstr = "get_local_storage() doesn't support non-zero flags",
+ .errstr_unpriv = "R2 leaks addr into helper function",
.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
},
{
fclose(fd);
}
+static bool test_as_unpriv(struct bpf_test *test)
+{
+ return !test->prog_type ||
+ test->prog_type == BPF_PROG_TYPE_SOCKET_FILTER ||
+ test->prog_type == BPF_PROG_TYPE_CGROUP_SKB;
+}
+
static int do_test(bool unpriv, unsigned int from, unsigned int to)
{
int i, passes = 0, errors = 0, skips = 0;
/* Program types that are not supported by non-root we
* skip right away.
*/
- if (!test->prog_type && unpriv_disabled) {
+ if (test_as_unpriv(test) && unpriv_disabled) {
printf("#%d/u %s SKIP\n", i, test->descr);
skips++;
- } else if (!test->prog_type) {
+ } else if (test_as_unpriv(test)) {
if (!unpriv)
set_admin(false);
printf("#%d/u %s ", i, test->descr);