mm: remove check_move_unevictable_pages()
[linux-2.6-microblaze.git] / net / netfilter / nfnetlink_queue.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * This is a module which is used for queueing packets and communicating with
4  * userspace via nfnetlink.
5  *
6  * (C) 2005 by Harald Welte <laforge@netfilter.org>
7  * (C) 2007 by Patrick McHardy <kaber@trash.net>
8  *
9  * Based on the old ipv4-only ip_queue.c:
10  * (C) 2000-2002 James Morris <jmorris@intercode.com.au>
11  * (C) 2003-2005 Netfilter Core Team <coreteam@netfilter.org>
12  */
13
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15
16 #include <linux/module.h>
17 #include <linux/skbuff.h>
18 #include <linux/init.h>
19 #include <linux/spinlock.h>
20 #include <linux/slab.h>
21 #include <linux/notifier.h>
22 #include <linux/netdevice.h>
23 #include <linux/netfilter.h>
24 #include <linux/proc_fs.h>
25 #include <linux/netfilter_ipv4.h>
26 #include <linux/netfilter_ipv6.h>
27 #include <linux/netfilter_bridge.h>
28 #include <linux/netfilter/nfnetlink.h>
29 #include <linux/netfilter/nfnetlink_queue.h>
30 #include <linux/netfilter/nf_conntrack_common.h>
31 #include <linux/list.h>
32 #include <linux/cgroup-defs.h>
33 #include <net/sock.h>
34 #include <net/tcp_states.h>
35 #include <net/netfilter/nf_queue.h>
36 #include <net/netns/generic.h>
37
38 #include <linux/atomic.h>
39
40 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
41 #include "../bridge/br_private.h"
42 #endif
43
44 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
45 #include <net/netfilter/nf_conntrack.h>
46 #endif
47
48 #define NFQNL_QMAX_DEFAULT 1024
49
50 /* We're using struct nlattr which has 16bit nla_len. Note that nla_len
51  * includes the header length. Thus, the maximum packet length that we
52  * support is 65531 bytes. We send truncated packets if the specified length
53  * is larger than that.  Userspace can check for presence of NFQA_CAP_LEN
54  * attribute to detect truncation.
55  */
56 #define NFQNL_MAX_COPY_RANGE (0xffff - NLA_HDRLEN)
57
58 struct nfqnl_instance {
59         struct hlist_node hlist;                /* global list of queues */
60         struct rcu_head rcu;
61
62         u32 peer_portid;
63         unsigned int queue_maxlen;
64         unsigned int copy_range;
65         unsigned int queue_dropped;
66         unsigned int queue_user_dropped;
67
68
69         u_int16_t queue_num;                    /* number of this queue */
70         u_int8_t copy_mode;
71         u_int32_t flags;                        /* Set using NFQA_CFG_FLAGS */
72 /*
73  * Following fields are dirtied for each queued packet,
74  * keep them in same cache line if possible.
75  */
76         spinlock_t      lock    ____cacheline_aligned_in_smp;
77         unsigned int    queue_total;
78         unsigned int    id_sequence;            /* 'sequence' of pkt ids */
79         struct list_head queue_list;            /* packets in queue */
80 };
81
82 typedef int (*nfqnl_cmpfn)(struct nf_queue_entry *, unsigned long);
83
84 static unsigned int nfnl_queue_net_id __read_mostly;
85
86 #define INSTANCE_BUCKETS        16
87 struct nfnl_queue_net {
88         spinlock_t instances_lock;
89         struct hlist_head instance_table[INSTANCE_BUCKETS];
90 };
91
92 static struct nfnl_queue_net *nfnl_queue_pernet(struct net *net)
93 {
94         return net_generic(net, nfnl_queue_net_id);
95 }
96
97 static inline u_int8_t instance_hashfn(u_int16_t queue_num)
98 {
99         return ((queue_num >> 8) ^ queue_num) % INSTANCE_BUCKETS;
100 }
101
102 static struct nfqnl_instance *
103 instance_lookup(struct nfnl_queue_net *q, u_int16_t queue_num)
104 {
105         struct hlist_head *head;
106         struct nfqnl_instance *inst;
107
108         head = &q->instance_table[instance_hashfn(queue_num)];
109         hlist_for_each_entry_rcu(inst, head, hlist) {
110                 if (inst->queue_num == queue_num)
111                         return inst;
112         }
113         return NULL;
114 }
115
116 static struct nfqnl_instance *
117 instance_create(struct nfnl_queue_net *q, u_int16_t queue_num, u32 portid)
118 {
119         struct nfqnl_instance *inst;
120         unsigned int h;
121         int err;
122
123         spin_lock(&q->instances_lock);
124         if (instance_lookup(q, queue_num)) {
125                 err = -EEXIST;
126                 goto out_unlock;
127         }
128
129         inst = kzalloc(sizeof(*inst), GFP_ATOMIC);
130         if (!inst) {
131                 err = -ENOMEM;
132                 goto out_unlock;
133         }
134
135         inst->queue_num = queue_num;
136         inst->peer_portid = portid;
137         inst->queue_maxlen = NFQNL_QMAX_DEFAULT;
138         inst->copy_range = NFQNL_MAX_COPY_RANGE;
139         inst->copy_mode = NFQNL_COPY_NONE;
140         spin_lock_init(&inst->lock);
141         INIT_LIST_HEAD(&inst->queue_list);
142
143         if (!try_module_get(THIS_MODULE)) {
144                 err = -EAGAIN;
145                 goto out_free;
146         }
147
148         h = instance_hashfn(queue_num);
149         hlist_add_head_rcu(&inst->hlist, &q->instance_table[h]);
150
151         spin_unlock(&q->instances_lock);
152
153         return inst;
154
155 out_free:
156         kfree(inst);
157 out_unlock:
158         spin_unlock(&q->instances_lock);
159         return ERR_PTR(err);
160 }
161
162 static void nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn,
163                         unsigned long data);
164
165 static void
166 instance_destroy_rcu(struct rcu_head *head)
167 {
168         struct nfqnl_instance *inst = container_of(head, struct nfqnl_instance,
169                                                    rcu);
170
171         nfqnl_flush(inst, NULL, 0);
172         kfree(inst);
173         module_put(THIS_MODULE);
174 }
175
176 static void
177 __instance_destroy(struct nfqnl_instance *inst)
178 {
179         hlist_del_rcu(&inst->hlist);
180         call_rcu(&inst->rcu, instance_destroy_rcu);
181 }
182
183 static void
184 instance_destroy(struct nfnl_queue_net *q, struct nfqnl_instance *inst)
185 {
186         spin_lock(&q->instances_lock);
187         __instance_destroy(inst);
188         spin_unlock(&q->instances_lock);
189 }
190
191 static inline void
192 __enqueue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
193 {
194        list_add_tail(&entry->list, &queue->queue_list);
195        queue->queue_total++;
196 }
197
198 static void
199 __dequeue_entry(struct nfqnl_instance *queue, struct nf_queue_entry *entry)
200 {
201         list_del(&entry->list);
202         queue->queue_total--;
203 }
204
205 static struct nf_queue_entry *
206 find_dequeue_entry(struct nfqnl_instance *queue, unsigned int id)
207 {
208         struct nf_queue_entry *entry = NULL, *i;
209
210         spin_lock_bh(&queue->lock);
211
212         list_for_each_entry(i, &queue->queue_list, list) {
213                 if (i->id == id) {
214                         entry = i;
215                         break;
216                 }
217         }
218
219         if (entry)
220                 __dequeue_entry(queue, entry);
221
222         spin_unlock_bh(&queue->lock);
223
224         return entry;
225 }
226
227 static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
228 {
229         const struct nf_ct_hook *ct_hook;
230         int err;
231
232         if (verdict == NF_ACCEPT ||
233             verdict == NF_REPEAT ||
234             verdict == NF_STOP) {
235                 rcu_read_lock();
236                 ct_hook = rcu_dereference(nf_ct_hook);
237                 if (ct_hook) {
238                         err = ct_hook->update(entry->state.net, entry->skb);
239                         if (err < 0)
240                                 verdict = NF_DROP;
241                 }
242                 rcu_read_unlock();
243         }
244         nf_reinject(entry, verdict);
245 }
246
247 static void
248 nfqnl_flush(struct nfqnl_instance *queue, nfqnl_cmpfn cmpfn, unsigned long data)
249 {
250         struct nf_queue_entry *entry, *next;
251
252         spin_lock_bh(&queue->lock);
253         list_for_each_entry_safe(entry, next, &queue->queue_list, list) {
254                 if (!cmpfn || cmpfn(entry, data)) {
255                         list_del(&entry->list);
256                         queue->queue_total--;
257                         nfqnl_reinject(entry, NF_DROP);
258                 }
259         }
260         spin_unlock_bh(&queue->lock);
261 }
262
263 static int
264 nfqnl_put_packet_info(struct sk_buff *nlskb, struct sk_buff *packet,
265                       bool csum_verify)
266 {
267         __u32 flags = 0;
268
269         if (packet->ip_summed == CHECKSUM_PARTIAL)
270                 flags = NFQA_SKB_CSUMNOTREADY;
271         else if (csum_verify)
272                 flags = NFQA_SKB_CSUM_NOTVERIFIED;
273
274         if (skb_is_gso(packet))
275                 flags |= NFQA_SKB_GSO;
276
277         return flags ? nla_put_be32(nlskb, NFQA_SKB_INFO, htonl(flags)) : 0;
278 }
279
280 static int nfqnl_put_sk_uidgid(struct sk_buff *skb, struct sock *sk)
281 {
282         const struct cred *cred;
283
284         if (!sk_fullsock(sk))
285                 return 0;
286
287         read_lock_bh(&sk->sk_callback_lock);
288         if (sk->sk_socket && sk->sk_socket->file) {
289                 cred = sk->sk_socket->file->f_cred;
290                 if (nla_put_be32(skb, NFQA_UID,
291                     htonl(from_kuid_munged(&init_user_ns, cred->fsuid))))
292                         goto nla_put_failure;
293                 if (nla_put_be32(skb, NFQA_GID,
294                     htonl(from_kgid_munged(&init_user_ns, cred->fsgid))))
295                         goto nla_put_failure;
296         }
297         read_unlock_bh(&sk->sk_callback_lock);
298         return 0;
299
300 nla_put_failure:
301         read_unlock_bh(&sk->sk_callback_lock);
302         return -1;
303 }
304
305 static int nfqnl_put_sk_classid(struct sk_buff *skb, struct sock *sk)
306 {
307 #if IS_ENABLED(CONFIG_CGROUP_NET_CLASSID)
308         if (sk && sk_fullsock(sk)) {
309                 u32 classid = sock_cgroup_classid(&sk->sk_cgrp_data);
310
311                 if (classid && nla_put_be32(skb, NFQA_CGROUP_CLASSID, htonl(classid)))
312                         return -1;
313         }
314 #endif
315         return 0;
316 }
317
318 static u32 nfqnl_get_sk_secctx(struct sk_buff *skb, char **secdata)
319 {
320         u32 seclen = 0;
321 #if IS_ENABLED(CONFIG_NETWORK_SECMARK)
322         if (!skb || !sk_fullsock(skb->sk))
323                 return 0;
324
325         read_lock_bh(&skb->sk->sk_callback_lock);
326
327         if (skb->secmark)
328                 security_secid_to_secctx(skb->secmark, secdata, &seclen);
329
330         read_unlock_bh(&skb->sk->sk_callback_lock);
331 #endif
332         return seclen;
333 }
334
335 static u32 nfqnl_get_bridge_size(struct nf_queue_entry *entry)
336 {
337         struct sk_buff *entskb = entry->skb;
338         u32 nlalen = 0;
339
340         if (entry->state.pf != PF_BRIDGE || !skb_mac_header_was_set(entskb))
341                 return 0;
342
343         if (skb_vlan_tag_present(entskb))
344                 nlalen += nla_total_size(nla_total_size(sizeof(__be16)) +
345                                          nla_total_size(sizeof(__be16)));
346
347         if (entskb->network_header > entskb->mac_header)
348                 nlalen += nla_total_size((entskb->network_header -
349                                           entskb->mac_header));
350
351         return nlalen;
352 }
353
354 static int nfqnl_put_bridge(struct nf_queue_entry *entry, struct sk_buff *skb)
355 {
356         struct sk_buff *entskb = entry->skb;
357
358         if (entry->state.pf != PF_BRIDGE || !skb_mac_header_was_set(entskb))
359                 return 0;
360
361         if (skb_vlan_tag_present(entskb)) {
362                 struct nlattr *nest;
363
364                 nest = nla_nest_start(skb, NFQA_VLAN);
365                 if (!nest)
366                         goto nla_put_failure;
367
368                 if (nla_put_be16(skb, NFQA_VLAN_TCI, htons(entskb->vlan_tci)) ||
369                     nla_put_be16(skb, NFQA_VLAN_PROTO, entskb->vlan_proto))
370                         goto nla_put_failure;
371
372                 nla_nest_end(skb, nest);
373         }
374
375         if (entskb->mac_header < entskb->network_header) {
376                 int len = (int)(entskb->network_header - entskb->mac_header);
377
378                 if (nla_put(skb, NFQA_L2HDR, len, skb_mac_header(entskb)))
379                         goto nla_put_failure;
380         }
381
382         return 0;
383
384 nla_put_failure:
385         return -1;
386 }
387
388 static struct sk_buff *
389 nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue,
390                            struct nf_queue_entry *entry,
391                            __be32 **packet_id_ptr)
392 {
393         size_t size;
394         size_t data_len = 0, cap_len = 0;
395         unsigned int hlen = 0;
396         struct sk_buff *skb;
397         struct nlattr *nla;
398         struct nfqnl_msg_packet_hdr *pmsg;
399         struct nlmsghdr *nlh;
400         struct sk_buff *entskb = entry->skb;
401         struct net_device *indev;
402         struct net_device *outdev;
403         struct nf_conn *ct = NULL;
404         enum ip_conntrack_info ctinfo = 0;
405         const struct nfnl_ct_hook *nfnl_ct;
406         bool csum_verify;
407         char *secdata = NULL;
408         u32 seclen = 0;
409         ktime_t tstamp;
410
411         size = nlmsg_total_size(sizeof(struct nfgenmsg))
412                 + nla_total_size(sizeof(struct nfqnl_msg_packet_hdr))
413                 + nla_total_size(sizeof(u_int32_t))     /* ifindex */
414                 + nla_total_size(sizeof(u_int32_t))     /* ifindex */
415 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
416                 + nla_total_size(sizeof(u_int32_t))     /* ifindex */
417                 + nla_total_size(sizeof(u_int32_t))     /* ifindex */
418 #endif
419                 + nla_total_size(sizeof(u_int32_t))     /* mark */
420                 + nla_total_size(sizeof(u_int32_t))     /* priority */
421                 + nla_total_size(sizeof(struct nfqnl_msg_packet_hw))
422                 + nla_total_size(sizeof(u_int32_t))     /* skbinfo */
423 #if IS_ENABLED(CONFIG_CGROUP_NET_CLASSID)
424                 + nla_total_size(sizeof(u_int32_t))     /* classid */
425 #endif
426                 + nla_total_size(sizeof(u_int32_t));    /* cap_len */
427
428         tstamp = skb_tstamp_cond(entskb, false);
429         if (tstamp)
430                 size += nla_total_size(sizeof(struct nfqnl_msg_packet_timestamp));
431
432         size += nfqnl_get_bridge_size(entry);
433
434         if (entry->state.hook <= NF_INET_FORWARD ||
435            (entry->state.hook == NF_INET_POST_ROUTING && entskb->sk == NULL))
436                 csum_verify = !skb_csum_unnecessary(entskb);
437         else
438                 csum_verify = false;
439
440         outdev = entry->state.out;
441
442         switch ((enum nfqnl_config_mode)READ_ONCE(queue->copy_mode)) {
443         case NFQNL_COPY_META:
444         case NFQNL_COPY_NONE:
445                 break;
446
447         case NFQNL_COPY_PACKET:
448                 if (!(queue->flags & NFQA_CFG_F_GSO) &&
449                     entskb->ip_summed == CHECKSUM_PARTIAL &&
450                     skb_checksum_help(entskb))
451                         return NULL;
452
453                 data_len = READ_ONCE(queue->copy_range);
454                 if (data_len > entskb->len)
455                         data_len = entskb->len;
456
457                 hlen = skb_zerocopy_headlen(entskb);
458                 hlen = min_t(unsigned int, hlen, data_len);
459                 size += sizeof(struct nlattr) + hlen;
460                 cap_len = entskb->len;
461                 break;
462         }
463
464         nfnl_ct = rcu_dereference(nfnl_ct_hook);
465
466 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
467         if (queue->flags & NFQA_CFG_F_CONNTRACK) {
468                 if (nfnl_ct != NULL) {
469                         ct = nf_ct_get(entskb, &ctinfo);
470                         if (ct != NULL)
471                                 size += nfnl_ct->build_size(ct);
472                 }
473         }
474 #endif
475
476         if (queue->flags & NFQA_CFG_F_UID_GID) {
477                 size += (nla_total_size(sizeof(u_int32_t))      /* uid */
478                         + nla_total_size(sizeof(u_int32_t)));   /* gid */
479         }
480
481         if ((queue->flags & NFQA_CFG_F_SECCTX) && entskb->sk) {
482                 seclen = nfqnl_get_sk_secctx(entskb, &secdata);
483                 if (seclen)
484                         size += nla_total_size(seclen);
485         }
486
487         skb = alloc_skb(size, GFP_ATOMIC);
488         if (!skb) {
489                 skb_tx_error(entskb);
490                 goto nlmsg_failure;
491         }
492
493         nlh = nfnl_msg_put(skb, 0, 0,
494                            nfnl_msg_type(NFNL_SUBSYS_QUEUE, NFQNL_MSG_PACKET),
495                            0, entry->state.pf, NFNETLINK_V0,
496                            htons(queue->queue_num));
497         if (!nlh) {
498                 skb_tx_error(entskb);
499                 kfree_skb(skb);
500                 goto nlmsg_failure;
501         }
502
503         nla = __nla_reserve(skb, NFQA_PACKET_HDR, sizeof(*pmsg));
504         pmsg = nla_data(nla);
505         pmsg->hw_protocol       = entskb->protocol;
506         pmsg->hook              = entry->state.hook;
507         *packet_id_ptr          = &pmsg->packet_id;
508
509         indev = entry->state.in;
510         if (indev) {
511 #if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
512                 if (nla_put_be32(skb, NFQA_IFINDEX_INDEV, htonl(indev->ifindex)))
513                         goto nla_put_failure;
514 #else
515                 if (entry->state.pf == PF_BRIDGE) {
516                         /* Case 1: indev is physical input device, we need to
517                          * look for bridge group (when called from
518                          * netfilter_bridge) */
519                         if (nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV,
520                                          htonl(indev->ifindex)) ||
521                         /* this is the bridge group "brX" */
522                         /* rcu_read_lock()ed by __nf_queue */
523                             nla_put_be32(skb, NFQA_IFINDEX_INDEV,
524                                          htonl(br_port_get_rcu(indev)->br->dev->ifindex)))
525                                 goto nla_put_failure;
526                 } else {
527                         int physinif;
528
529                         /* Case 2: indev is bridge group, we need to look for
530                          * physical device (when called from ipv4) */
531                         if (nla_put_be32(skb, NFQA_IFINDEX_INDEV,
532                                          htonl(indev->ifindex)))
533                                 goto nla_put_failure;
534
535                         physinif = nf_bridge_get_physinif(entskb);
536                         if (physinif &&
537                             nla_put_be32(skb, NFQA_IFINDEX_PHYSINDEV,
538                                          htonl(physinif)))
539                                 goto nla_put_failure;
540                 }
541 #endif
542         }
543
544         if (outdev) {
545 #if !IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
546                 if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV, htonl(outdev->ifindex)))
547                         goto nla_put_failure;
548 #else
549                 if (entry->state.pf == PF_BRIDGE) {
550                         /* Case 1: outdev is physical output device, we need to
551                          * look for bridge group (when called from
552                          * netfilter_bridge) */
553                         if (nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV,
554                                          htonl(outdev->ifindex)) ||
555                         /* this is the bridge group "brX" */
556                         /* rcu_read_lock()ed by __nf_queue */
557                             nla_put_be32(skb, NFQA_IFINDEX_OUTDEV,
558                                          htonl(br_port_get_rcu(outdev)->br->dev->ifindex)))
559                                 goto nla_put_failure;
560                 } else {
561                         int physoutif;
562
563                         /* Case 2: outdev is bridge group, we need to look for
564                          * physical output device (when called from ipv4) */
565                         if (nla_put_be32(skb, NFQA_IFINDEX_OUTDEV,
566                                          htonl(outdev->ifindex)))
567                                 goto nla_put_failure;
568
569                         physoutif = nf_bridge_get_physoutif(entskb);
570                         if (physoutif &&
571                             nla_put_be32(skb, NFQA_IFINDEX_PHYSOUTDEV,
572                                          htonl(physoutif)))
573                                 goto nla_put_failure;
574                 }
575 #endif
576         }
577
578         if (entskb->mark &&
579             nla_put_be32(skb, NFQA_MARK, htonl(entskb->mark)))
580                 goto nla_put_failure;
581
582         if (entskb->priority &&
583             nla_put_be32(skb, NFQA_PRIORITY, htonl(entskb->priority)))
584                 goto nla_put_failure;
585
586         if (indev && entskb->dev &&
587             skb_mac_header_was_set(entskb) &&
588             skb_mac_header_len(entskb) != 0) {
589                 struct nfqnl_msg_packet_hw phw;
590                 int len;
591
592                 memset(&phw, 0, sizeof(phw));
593                 len = dev_parse_header(entskb, phw.hw_addr);
594                 if (len) {
595                         phw.hw_addrlen = htons(len);
596                         if (nla_put(skb, NFQA_HWADDR, sizeof(phw), &phw))
597                                 goto nla_put_failure;
598                 }
599         }
600
601         if (nfqnl_put_bridge(entry, skb) < 0)
602                 goto nla_put_failure;
603
604         if (entry->state.hook <= NF_INET_FORWARD && tstamp) {
605                 struct nfqnl_msg_packet_timestamp ts;
606                 struct timespec64 kts = ktime_to_timespec64(tstamp);
607
608                 ts.sec = cpu_to_be64(kts.tv_sec);
609                 ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC);
610
611                 if (nla_put(skb, NFQA_TIMESTAMP, sizeof(ts), &ts))
612                         goto nla_put_failure;
613         }
614
615         if ((queue->flags & NFQA_CFG_F_UID_GID) && entskb->sk &&
616             nfqnl_put_sk_uidgid(skb, entskb->sk) < 0)
617                 goto nla_put_failure;
618
619         if (nfqnl_put_sk_classid(skb, entskb->sk) < 0)
620                 goto nla_put_failure;
621
622         if (seclen && nla_put(skb, NFQA_SECCTX, seclen, secdata))
623                 goto nla_put_failure;
624
625         if (ct && nfnl_ct->build(skb, ct, ctinfo, NFQA_CT, NFQA_CT_INFO) < 0)
626                 goto nla_put_failure;
627
628         if (cap_len > data_len &&
629             nla_put_be32(skb, NFQA_CAP_LEN, htonl(cap_len)))
630                 goto nla_put_failure;
631
632         if (nfqnl_put_packet_info(skb, entskb, csum_verify))
633                 goto nla_put_failure;
634
635         if (data_len) {
636                 struct nlattr *nla;
637
638                 if (skb_tailroom(skb) < sizeof(*nla) + hlen)
639                         goto nla_put_failure;
640
641                 nla = skb_put(skb, sizeof(*nla));
642                 nla->nla_type = NFQA_PAYLOAD;
643                 nla->nla_len = nla_attr_size(data_len);
644
645                 if (skb_zerocopy(skb, entskb, data_len, hlen))
646                         goto nla_put_failure;
647         }
648
649         nlh->nlmsg_len = skb->len;
650         if (seclen)
651                 security_release_secctx(secdata, seclen);
652         return skb;
653
654 nla_put_failure:
655         skb_tx_error(entskb);
656         kfree_skb(skb);
657         net_err_ratelimited("nf_queue: error creating packet message\n");
658 nlmsg_failure:
659         if (seclen)
660                 security_release_secctx(secdata, seclen);
661         return NULL;
662 }
663
664 static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry)
665 {
666 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
667         static const unsigned long flags = IPS_CONFIRMED | IPS_DYING;
668         const struct nf_conn *ct = (void *)skb_nfct(entry->skb);
669
670         if (ct && ((ct->status & flags) == IPS_DYING))
671                 return true;
672 #endif
673         return false;
674 }
675
676 static int
677 __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue,
678                         struct nf_queue_entry *entry)
679 {
680         struct sk_buff *nskb;
681         int err = -ENOBUFS;
682         __be32 *packet_id_ptr;
683         int failopen = 0;
684
685         nskb = nfqnl_build_packet_message(net, queue, entry, &packet_id_ptr);
686         if (nskb == NULL) {
687                 err = -ENOMEM;
688                 goto err_out;
689         }
690         spin_lock_bh(&queue->lock);
691
692         if (nf_ct_drop_unconfirmed(entry))
693                 goto err_out_free_nskb;
694
695         if (queue->queue_total >= queue->queue_maxlen) {
696                 if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
697                         failopen = 1;
698                         err = 0;
699                 } else {
700                         queue->queue_dropped++;
701                         net_warn_ratelimited("nf_queue: full at %d entries, dropping packets(s)\n",
702                                              queue->queue_total);
703                 }
704                 goto err_out_free_nskb;
705         }
706         entry->id = ++queue->id_sequence;
707         *packet_id_ptr = htonl(entry->id);
708
709         /* nfnetlink_unicast will either free the nskb or add it to a socket */
710         err = nfnetlink_unicast(nskb, net, queue->peer_portid);
711         if (err < 0) {
712                 if (queue->flags & NFQA_CFG_F_FAIL_OPEN) {
713                         failopen = 1;
714                         err = 0;
715                 } else {
716                         queue->queue_user_dropped++;
717                 }
718                 goto err_out_unlock;
719         }
720
721         __enqueue_entry(queue, entry);
722
723         spin_unlock_bh(&queue->lock);
724         return 0;
725
726 err_out_free_nskb:
727         kfree_skb(nskb);
728 err_out_unlock:
729         spin_unlock_bh(&queue->lock);
730         if (failopen)
731                 nfqnl_reinject(entry, NF_ACCEPT);
732 err_out:
733         return err;
734 }
735
736 static struct nf_queue_entry *
737 nf_queue_entry_dup(struct nf_queue_entry *e)
738 {
739         struct nf_queue_entry *entry = kmemdup(e, e->size, GFP_ATOMIC);
740
741         if (!entry)
742                 return NULL;
743
744         if (nf_queue_entry_get_refs(entry))
745                 return entry;
746
747         kfree(entry);
748         return NULL;
749 }
750
751 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
752 /* When called from bridge netfilter, skb->data must point to MAC header
753  * before calling skb_gso_segment(). Else, original MAC header is lost
754  * and segmented skbs will be sent to wrong destination.
755  */
756 static void nf_bridge_adjust_skb_data(struct sk_buff *skb)
757 {
758         if (nf_bridge_info_get(skb))
759                 __skb_push(skb, skb->network_header - skb->mac_header);
760 }
761
762 static void nf_bridge_adjust_segmented_data(struct sk_buff *skb)
763 {
764         if (nf_bridge_info_get(skb))
765                 __skb_pull(skb, skb->network_header - skb->mac_header);
766 }
767 #else
768 #define nf_bridge_adjust_skb_data(s) do {} while (0)
769 #define nf_bridge_adjust_segmented_data(s) do {} while (0)
770 #endif
771
772 static int
773 __nfqnl_enqueue_packet_gso(struct net *net, struct nfqnl_instance *queue,
774                            struct sk_buff *skb, struct nf_queue_entry *entry)
775 {
776         int ret = -ENOMEM;
777         struct nf_queue_entry *entry_seg;
778
779         nf_bridge_adjust_segmented_data(skb);
780
781         if (skb->next == NULL) { /* last packet, no need to copy entry */
782                 struct sk_buff *gso_skb = entry->skb;
783                 entry->skb = skb;
784                 ret = __nfqnl_enqueue_packet(net, queue, entry);
785                 if (ret)
786                         entry->skb = gso_skb;
787                 return ret;
788         }
789
790         skb_mark_not_on_list(skb);
791
792         entry_seg = nf_queue_entry_dup(entry);
793         if (entry_seg) {
794                 entry_seg->skb = skb;
795                 ret = __nfqnl_enqueue_packet(net, queue, entry_seg);
796                 if (ret)
797                         nf_queue_entry_free(entry_seg);
798         }
799         return ret;
800 }
801
802 static int
803 nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum)
804 {
805         unsigned int queued;
806         struct nfqnl_instance *queue;
807         struct sk_buff *skb, *segs, *nskb;
808         int err = -ENOBUFS;
809         struct net *net = entry->state.net;
810         struct nfnl_queue_net *q = nfnl_queue_pernet(net);
811
812         /* rcu_read_lock()ed by nf_hook_thresh */
813         queue = instance_lookup(q, queuenum);
814         if (!queue)
815                 return -ESRCH;
816
817         if (queue->copy_mode == NFQNL_COPY_NONE)
818                 return -EINVAL;
819
820         skb = entry->skb;
821
822         switch (entry->state.pf) {
823         case NFPROTO_IPV4:
824                 skb->protocol = htons(ETH_P_IP);
825                 break;
826         case NFPROTO_IPV6:
827                 skb->protocol = htons(ETH_P_IPV6);
828                 break;
829         }
830
831         if ((queue->flags & NFQA_CFG_F_GSO) || !skb_is_gso(skb))
832                 return __nfqnl_enqueue_packet(net, queue, entry);
833
834         nf_bridge_adjust_skb_data(skb);
835         segs = skb_gso_segment(skb, 0);
836         /* Does not use PTR_ERR to limit the number of error codes that can be
837          * returned by nf_queue.  For instance, callers rely on -ESRCH to
838          * mean 'ignore this hook'.
839          */
840         if (IS_ERR_OR_NULL(segs))
841                 goto out_err;
842         queued = 0;
843         err = 0;
844         skb_list_walk_safe(segs, segs, nskb) {
845                 if (err == 0)
846                         err = __nfqnl_enqueue_packet_gso(net, queue,
847                                                         segs, entry);
848                 if (err == 0)
849                         queued++;
850                 else
851                         kfree_skb(segs);
852         }
853
854         if (queued) {
855                 if (err) /* some segments are already queued */
856                         nf_queue_entry_free(entry);
857                 kfree_skb(skb);
858                 return 0;
859         }
860  out_err:
861         nf_bridge_adjust_segmented_data(skb);
862         return err;
863 }
864
865 static int
866 nfqnl_mangle(void *data, unsigned int data_len, struct nf_queue_entry *e, int diff)
867 {
868         struct sk_buff *nskb;
869
870         if (diff < 0) {
871                 unsigned int min_len = skb_transport_offset(e->skb);
872
873                 if (data_len < min_len)
874                         return -EINVAL;
875
876                 if (pskb_trim(e->skb, data_len))
877                         return -ENOMEM;
878         } else if (diff > 0) {
879                 if (data_len > 0xFFFF)
880                         return -EINVAL;
881                 if (diff > skb_tailroom(e->skb)) {
882                         nskb = skb_copy_expand(e->skb, skb_headroom(e->skb),
883                                                diff, GFP_ATOMIC);
884                         if (!nskb)
885                                 return -ENOMEM;
886                         kfree_skb(e->skb);
887                         e->skb = nskb;
888                 }
889                 skb_put(e->skb, diff);
890         }
891         if (skb_ensure_writable(e->skb, data_len))
892                 return -ENOMEM;
893         skb_copy_to_linear_data(e->skb, data, data_len);
894         e->skb->ip_summed = CHECKSUM_NONE;
895         return 0;
896 }
897
898 static int
899 nfqnl_set_mode(struct nfqnl_instance *queue,
900                unsigned char mode, unsigned int range)
901 {
902         int status = 0;
903
904         spin_lock_bh(&queue->lock);
905         switch (mode) {
906         case NFQNL_COPY_NONE:
907         case NFQNL_COPY_META:
908                 queue->copy_mode = mode;
909                 queue->copy_range = 0;
910                 break;
911
912         case NFQNL_COPY_PACKET:
913                 queue->copy_mode = mode;
914                 if (range == 0 || range > NFQNL_MAX_COPY_RANGE)
915                         queue->copy_range = NFQNL_MAX_COPY_RANGE;
916                 else
917                         queue->copy_range = range;
918                 break;
919
920         default:
921                 status = -EINVAL;
922
923         }
924         spin_unlock_bh(&queue->lock);
925
926         return status;
927 }
928
929 static int
930 dev_cmp(struct nf_queue_entry *entry, unsigned long ifindex)
931 {
932 #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
933         int physinif, physoutif;
934
935         physinif = nf_bridge_get_physinif(entry->skb);
936         physoutif = nf_bridge_get_physoutif(entry->skb);
937
938         if (physinif == ifindex || physoutif == ifindex)
939                 return 1;
940 #endif
941         if (entry->state.in)
942                 if (entry->state.in->ifindex == ifindex)
943                         return 1;
944         if (entry->state.out)
945                 if (entry->state.out->ifindex == ifindex)
946                         return 1;
947
948         return 0;
949 }
950
951 /* drop all packets with either indev or outdev == ifindex from all queue
952  * instances */
953 static void
954 nfqnl_dev_drop(struct net *net, int ifindex)
955 {
956         int i;
957         struct nfnl_queue_net *q = nfnl_queue_pernet(net);
958
959         rcu_read_lock();
960
961         for (i = 0; i < INSTANCE_BUCKETS; i++) {
962                 struct nfqnl_instance *inst;
963                 struct hlist_head *head = &q->instance_table[i];
964
965                 hlist_for_each_entry_rcu(inst, head, hlist)
966                         nfqnl_flush(inst, dev_cmp, ifindex);
967         }
968
969         rcu_read_unlock();
970 }
971
972 static int
973 nfqnl_rcv_dev_event(struct notifier_block *this,
974                     unsigned long event, void *ptr)
975 {
976         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
977
978         /* Drop any packets associated with the downed device */
979         if (event == NETDEV_DOWN)
980                 nfqnl_dev_drop(dev_net(dev), dev->ifindex);
981         return NOTIFY_DONE;
982 }
983
984 static struct notifier_block nfqnl_dev_notifier = {
985         .notifier_call  = nfqnl_rcv_dev_event,
986 };
987
988 static void nfqnl_nf_hook_drop(struct net *net)
989 {
990         struct nfnl_queue_net *q = nfnl_queue_pernet(net);
991         int i;
992
993         /* This function is also called on net namespace error unwind,
994          * when pernet_ops->init() failed and ->exit() functions of the
995          * previous pernet_ops gets called.
996          *
997          * This may result in a call to nfqnl_nf_hook_drop() before
998          * struct nfnl_queue_net was allocated.
999          */
1000         if (!q)
1001                 return;
1002
1003         for (i = 0; i < INSTANCE_BUCKETS; i++) {
1004                 struct nfqnl_instance *inst;
1005                 struct hlist_head *head = &q->instance_table[i];
1006
1007                 hlist_for_each_entry_rcu(inst, head, hlist)
1008                         nfqnl_flush(inst, NULL, 0);
1009         }
1010 }
1011
1012 static int
1013 nfqnl_rcv_nl_event(struct notifier_block *this,
1014                    unsigned long event, void *ptr)
1015 {
1016         struct netlink_notify *n = ptr;
1017         struct nfnl_queue_net *q = nfnl_queue_pernet(n->net);
1018
1019         if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) {
1020                 int i;
1021
1022                 /* destroy all instances for this portid */
1023                 spin_lock(&q->instances_lock);
1024                 for (i = 0; i < INSTANCE_BUCKETS; i++) {
1025                         struct hlist_node *t2;
1026                         struct nfqnl_instance *inst;
1027                         struct hlist_head *head = &q->instance_table[i];
1028
1029                         hlist_for_each_entry_safe(inst, t2, head, hlist) {
1030                                 if (n->portid == inst->peer_portid)
1031                                         __instance_destroy(inst);
1032                         }
1033                 }
1034                 spin_unlock(&q->instances_lock);
1035         }
1036         return NOTIFY_DONE;
1037 }
1038
1039 static struct notifier_block nfqnl_rtnl_notifier = {
1040         .notifier_call  = nfqnl_rcv_nl_event,
1041 };
1042
1043 static const struct nla_policy nfqa_vlan_policy[NFQA_VLAN_MAX + 1] = {
1044         [NFQA_VLAN_TCI]         = { .type = NLA_U16},
1045         [NFQA_VLAN_PROTO]       = { .type = NLA_U16},
1046 };
1047
1048 static const struct nla_policy nfqa_verdict_policy[NFQA_MAX+1] = {
1049         [NFQA_VERDICT_HDR]      = { .len = sizeof(struct nfqnl_msg_verdict_hdr) },
1050         [NFQA_MARK]             = { .type = NLA_U32 },
1051         [NFQA_PAYLOAD]          = { .type = NLA_UNSPEC },
1052         [NFQA_CT]               = { .type = NLA_UNSPEC },
1053         [NFQA_EXP]              = { .type = NLA_UNSPEC },
1054         [NFQA_VLAN]             = { .type = NLA_NESTED },
1055         [NFQA_PRIORITY]         = { .type = NLA_U32 },
1056 };
1057
1058 static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = {
1059         [NFQA_VERDICT_HDR]      = { .len = sizeof(struct nfqnl_msg_verdict_hdr) },
1060         [NFQA_MARK]             = { .type = NLA_U32 },
1061         [NFQA_PRIORITY]         = { .type = NLA_U32 },
1062 };
1063
1064 static struct nfqnl_instance *
1065 verdict_instance_lookup(struct nfnl_queue_net *q, u16 queue_num, u32 nlportid)
1066 {
1067         struct nfqnl_instance *queue;
1068
1069         queue = instance_lookup(q, queue_num);
1070         if (!queue)
1071                 return ERR_PTR(-ENODEV);
1072
1073         if (queue->peer_portid != nlportid)
1074                 return ERR_PTR(-EPERM);
1075
1076         return queue;
1077 }
1078
1079 static struct nfqnl_msg_verdict_hdr*
1080 verdicthdr_get(const struct nlattr * const nfqa[])
1081 {
1082         struct nfqnl_msg_verdict_hdr *vhdr;
1083         unsigned int verdict;
1084
1085         if (!nfqa[NFQA_VERDICT_HDR])
1086                 return NULL;
1087
1088         vhdr = nla_data(nfqa[NFQA_VERDICT_HDR]);
1089         verdict = ntohl(vhdr->verdict) & NF_VERDICT_MASK;
1090         if (verdict > NF_MAX_VERDICT || verdict == NF_STOLEN)
1091                 return NULL;
1092         return vhdr;
1093 }
1094
1095 static int nfq_id_after(unsigned int id, unsigned int max)
1096 {
1097         return (int)(id - max) > 0;
1098 }
1099
1100 static int nfqnl_recv_verdict_batch(struct sk_buff *skb,
1101                                     const struct nfnl_info *info,
1102                                     const struct nlattr * const nfqa[])
1103 {
1104         struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
1105         u16 queue_num = ntohs(info->nfmsg->res_id);
1106         struct nf_queue_entry *entry, *tmp;
1107         struct nfqnl_msg_verdict_hdr *vhdr;
1108         struct nfqnl_instance *queue;
1109         unsigned int verdict, maxid;
1110         LIST_HEAD(batch_list);
1111
1112         queue = verdict_instance_lookup(q, queue_num,
1113                                         NETLINK_CB(skb).portid);
1114         if (IS_ERR(queue))
1115                 return PTR_ERR(queue);
1116
1117         vhdr = verdicthdr_get(nfqa);
1118         if (!vhdr)
1119                 return -EINVAL;
1120
1121         verdict = ntohl(vhdr->verdict);
1122         maxid = ntohl(vhdr->id);
1123
1124         spin_lock_bh(&queue->lock);
1125
1126         list_for_each_entry_safe(entry, tmp, &queue->queue_list, list) {
1127                 if (nfq_id_after(entry->id, maxid))
1128                         break;
1129                 __dequeue_entry(queue, entry);
1130                 list_add_tail(&entry->list, &batch_list);
1131         }
1132
1133         spin_unlock_bh(&queue->lock);
1134
1135         if (list_empty(&batch_list))
1136                 return -ENOENT;
1137
1138         list_for_each_entry_safe(entry, tmp, &batch_list, list) {
1139                 if (nfqa[NFQA_MARK])
1140                         entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
1141
1142                 if (nfqa[NFQA_PRIORITY])
1143                         entry->skb->priority = ntohl(nla_get_be32(nfqa[NFQA_PRIORITY]));
1144
1145                 nfqnl_reinject(entry, verdict);
1146         }
1147         return 0;
1148 }
1149
1150 static struct nf_conn *nfqnl_ct_parse(const struct nfnl_ct_hook *nfnl_ct,
1151                                       const struct nlmsghdr *nlh,
1152                                       const struct nlattr * const nfqa[],
1153                                       struct nf_queue_entry *entry,
1154                                       enum ip_conntrack_info *ctinfo)
1155 {
1156 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
1157         struct nf_conn *ct;
1158
1159         ct = nf_ct_get(entry->skb, ctinfo);
1160         if (ct == NULL)
1161                 return NULL;
1162
1163         if (nfnl_ct->parse(nfqa[NFQA_CT], ct) < 0)
1164                 return NULL;
1165
1166         if (nfqa[NFQA_EXP])
1167                 nfnl_ct->attach_expect(nfqa[NFQA_EXP], ct,
1168                                       NETLINK_CB(entry->skb).portid,
1169                                       nlmsg_report(nlh));
1170         return ct;
1171 #else
1172         return NULL;
1173 #endif
1174 }
1175
1176 static int nfqa_parse_bridge(struct nf_queue_entry *entry,
1177                              const struct nlattr * const nfqa[])
1178 {
1179         if (nfqa[NFQA_VLAN]) {
1180                 struct nlattr *tb[NFQA_VLAN_MAX + 1];
1181                 int err;
1182
1183                 err = nla_parse_nested_deprecated(tb, NFQA_VLAN_MAX,
1184                                                   nfqa[NFQA_VLAN],
1185                                                   nfqa_vlan_policy, NULL);
1186                 if (err < 0)
1187                         return err;
1188
1189                 if (!tb[NFQA_VLAN_TCI] || !tb[NFQA_VLAN_PROTO])
1190                         return -EINVAL;
1191
1192                 __vlan_hwaccel_put_tag(entry->skb,
1193                         nla_get_be16(tb[NFQA_VLAN_PROTO]),
1194                         ntohs(nla_get_be16(tb[NFQA_VLAN_TCI])));
1195         }
1196
1197         if (nfqa[NFQA_L2HDR]) {
1198                 int mac_header_len = entry->skb->network_header -
1199                         entry->skb->mac_header;
1200
1201                 if (mac_header_len != nla_len(nfqa[NFQA_L2HDR]))
1202                         return -EINVAL;
1203                 else if (mac_header_len > 0)
1204                         memcpy(skb_mac_header(entry->skb),
1205                                nla_data(nfqa[NFQA_L2HDR]),
1206                                mac_header_len);
1207         }
1208
1209         return 0;
1210 }
1211
1212 static int nfqnl_recv_verdict(struct sk_buff *skb, const struct nfnl_info *info,
1213                               const struct nlattr * const nfqa[])
1214 {
1215         struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
1216         u_int16_t queue_num = ntohs(info->nfmsg->res_id);
1217         const struct nfnl_ct_hook *nfnl_ct;
1218         struct nfqnl_msg_verdict_hdr *vhdr;
1219         enum ip_conntrack_info ctinfo;
1220         struct nfqnl_instance *queue;
1221         struct nf_queue_entry *entry;
1222         struct nf_conn *ct = NULL;
1223         unsigned int verdict;
1224         int err;
1225
1226         queue = verdict_instance_lookup(q, queue_num,
1227                                         NETLINK_CB(skb).portid);
1228         if (IS_ERR(queue))
1229                 return PTR_ERR(queue);
1230
1231         vhdr = verdicthdr_get(nfqa);
1232         if (!vhdr)
1233                 return -EINVAL;
1234
1235         verdict = ntohl(vhdr->verdict);
1236
1237         entry = find_dequeue_entry(queue, ntohl(vhdr->id));
1238         if (entry == NULL)
1239                 return -ENOENT;
1240
1241         /* rcu lock already held from nfnl->call_rcu. */
1242         nfnl_ct = rcu_dereference(nfnl_ct_hook);
1243
1244         if (nfqa[NFQA_CT]) {
1245                 if (nfnl_ct != NULL)
1246                         ct = nfqnl_ct_parse(nfnl_ct, info->nlh, nfqa, entry,
1247                                             &ctinfo);
1248         }
1249
1250         if (entry->state.pf == PF_BRIDGE) {
1251                 err = nfqa_parse_bridge(entry, nfqa);
1252                 if (err < 0)
1253                         return err;
1254         }
1255
1256         if (nfqa[NFQA_PAYLOAD]) {
1257                 u16 payload_len = nla_len(nfqa[NFQA_PAYLOAD]);
1258                 int diff = payload_len - entry->skb->len;
1259
1260                 if (nfqnl_mangle(nla_data(nfqa[NFQA_PAYLOAD]),
1261                                  payload_len, entry, diff) < 0)
1262                         verdict = NF_DROP;
1263
1264                 if (ct && diff)
1265                         nfnl_ct->seq_adjust(entry->skb, ct, ctinfo, diff);
1266         }
1267
1268         if (nfqa[NFQA_MARK])
1269                 entry->skb->mark = ntohl(nla_get_be32(nfqa[NFQA_MARK]));
1270
1271         if (nfqa[NFQA_PRIORITY])
1272                 entry->skb->priority = ntohl(nla_get_be32(nfqa[NFQA_PRIORITY]));
1273
1274         nfqnl_reinject(entry, verdict);
1275         return 0;
1276 }
1277
1278 static int nfqnl_recv_unsupp(struct sk_buff *skb, const struct nfnl_info *info,
1279                              const struct nlattr * const cda[])
1280 {
1281         return -ENOTSUPP;
1282 }
1283
1284 static const struct nla_policy nfqa_cfg_policy[NFQA_CFG_MAX+1] = {
1285         [NFQA_CFG_CMD]          = { .len = sizeof(struct nfqnl_msg_config_cmd) },
1286         [NFQA_CFG_PARAMS]       = { .len = sizeof(struct nfqnl_msg_config_params) },
1287         [NFQA_CFG_QUEUE_MAXLEN] = { .type = NLA_U32 },
1288         [NFQA_CFG_MASK]         = { .type = NLA_U32 },
1289         [NFQA_CFG_FLAGS]        = { .type = NLA_U32 },
1290 };
1291
1292 static const struct nf_queue_handler nfqh = {
1293         .outfn          = nfqnl_enqueue_packet,
1294         .nf_hook_drop   = nfqnl_nf_hook_drop,
1295 };
1296
1297 static int nfqnl_recv_config(struct sk_buff *skb, const struct nfnl_info *info,
1298                              const struct nlattr * const nfqa[])
1299 {
1300         struct nfnl_queue_net *q = nfnl_queue_pernet(info->net);
1301         u_int16_t queue_num = ntohs(info->nfmsg->res_id);
1302         struct nfqnl_msg_config_cmd *cmd = NULL;
1303         struct nfqnl_instance *queue;
1304         __u32 flags = 0, mask = 0;
1305         int ret = 0;
1306
1307         if (nfqa[NFQA_CFG_CMD]) {
1308                 cmd = nla_data(nfqa[NFQA_CFG_CMD]);
1309
1310                 /* Obsolete commands without queue context */
1311                 switch (cmd->command) {
1312                 case NFQNL_CFG_CMD_PF_BIND: return 0;
1313                 case NFQNL_CFG_CMD_PF_UNBIND: return 0;
1314                 }
1315         }
1316
1317         /* Check if we support these flags in first place, dependencies should
1318          * be there too not to break atomicity.
1319          */
1320         if (nfqa[NFQA_CFG_FLAGS]) {
1321                 if (!nfqa[NFQA_CFG_MASK]) {
1322                         /* A mask is needed to specify which flags are being
1323                          * changed.
1324                          */
1325                         return -EINVAL;
1326                 }
1327
1328                 flags = ntohl(nla_get_be32(nfqa[NFQA_CFG_FLAGS]));
1329                 mask = ntohl(nla_get_be32(nfqa[NFQA_CFG_MASK]));
1330
1331                 if (flags >= NFQA_CFG_F_MAX)
1332                         return -EOPNOTSUPP;
1333
1334 #if !IS_ENABLED(CONFIG_NETWORK_SECMARK)
1335                 if (flags & mask & NFQA_CFG_F_SECCTX)
1336                         return -EOPNOTSUPP;
1337 #endif
1338                 if ((flags & mask & NFQA_CFG_F_CONNTRACK) &&
1339                     !rcu_access_pointer(nfnl_ct_hook)) {
1340 #ifdef CONFIG_MODULES
1341                         nfnl_unlock(NFNL_SUBSYS_QUEUE);
1342                         request_module("ip_conntrack_netlink");
1343                         nfnl_lock(NFNL_SUBSYS_QUEUE);
1344                         if (rcu_access_pointer(nfnl_ct_hook))
1345                                 return -EAGAIN;
1346 #endif
1347                         return -EOPNOTSUPP;
1348                 }
1349         }
1350
1351         rcu_read_lock();
1352         queue = instance_lookup(q, queue_num);
1353         if (queue && queue->peer_portid != NETLINK_CB(skb).portid) {
1354                 ret = -EPERM;
1355                 goto err_out_unlock;
1356         }
1357
1358         if (cmd != NULL) {
1359                 switch (cmd->command) {
1360                 case NFQNL_CFG_CMD_BIND:
1361                         if (queue) {
1362                                 ret = -EBUSY;
1363                                 goto err_out_unlock;
1364                         }
1365                         queue = instance_create(q, queue_num,
1366                                                 NETLINK_CB(skb).portid);
1367                         if (IS_ERR(queue)) {
1368                                 ret = PTR_ERR(queue);
1369                                 goto err_out_unlock;
1370                         }
1371                         break;
1372                 case NFQNL_CFG_CMD_UNBIND:
1373                         if (!queue) {
1374                                 ret = -ENODEV;
1375                                 goto err_out_unlock;
1376                         }
1377                         instance_destroy(q, queue);
1378                         goto err_out_unlock;
1379                 case NFQNL_CFG_CMD_PF_BIND:
1380                 case NFQNL_CFG_CMD_PF_UNBIND:
1381                         break;
1382                 default:
1383                         ret = -ENOTSUPP;
1384                         goto err_out_unlock;
1385                 }
1386         }
1387
1388         if (!queue) {
1389                 ret = -ENODEV;
1390                 goto err_out_unlock;
1391         }
1392
1393         if (nfqa[NFQA_CFG_PARAMS]) {
1394                 struct nfqnl_msg_config_params *params =
1395                         nla_data(nfqa[NFQA_CFG_PARAMS]);
1396
1397                 nfqnl_set_mode(queue, params->copy_mode,
1398                                 ntohl(params->copy_range));
1399         }
1400
1401         if (nfqa[NFQA_CFG_QUEUE_MAXLEN]) {
1402                 __be32 *queue_maxlen = nla_data(nfqa[NFQA_CFG_QUEUE_MAXLEN]);
1403
1404                 spin_lock_bh(&queue->lock);
1405                 queue->queue_maxlen = ntohl(*queue_maxlen);
1406                 spin_unlock_bh(&queue->lock);
1407         }
1408
1409         if (nfqa[NFQA_CFG_FLAGS]) {
1410                 spin_lock_bh(&queue->lock);
1411                 queue->flags &= ~mask;
1412                 queue->flags |= flags & mask;
1413                 spin_unlock_bh(&queue->lock);
1414         }
1415
1416 err_out_unlock:
1417         rcu_read_unlock();
1418         return ret;
1419 }
1420
1421 static const struct nfnl_callback nfqnl_cb[NFQNL_MSG_MAX] = {
1422         [NFQNL_MSG_PACKET]      = {
1423                 .call           = nfqnl_recv_unsupp,
1424                 .type           = NFNL_CB_RCU,
1425                 .attr_count     = NFQA_MAX,
1426         },
1427         [NFQNL_MSG_VERDICT]     = {
1428                 .call           = nfqnl_recv_verdict,
1429                 .type           = NFNL_CB_RCU,
1430                 .attr_count     = NFQA_MAX,
1431                 .policy         = nfqa_verdict_policy
1432         },
1433         [NFQNL_MSG_CONFIG]      = {
1434                 .call           = nfqnl_recv_config,
1435                 .type           = NFNL_CB_MUTEX,
1436                 .attr_count     = NFQA_CFG_MAX,
1437                 .policy         = nfqa_cfg_policy
1438         },
1439         [NFQNL_MSG_VERDICT_BATCH] = {
1440                 .call           = nfqnl_recv_verdict_batch,
1441                 .type           = NFNL_CB_RCU,
1442                 .attr_count     = NFQA_MAX,
1443                 .policy         = nfqa_verdict_batch_policy
1444         },
1445 };
1446
1447 static const struct nfnetlink_subsystem nfqnl_subsys = {
1448         .name           = "nf_queue",
1449         .subsys_id      = NFNL_SUBSYS_QUEUE,
1450         .cb_count       = NFQNL_MSG_MAX,
1451         .cb             = nfqnl_cb,
1452 };
1453
1454 #ifdef CONFIG_PROC_FS
1455 struct iter_state {
1456         struct seq_net_private p;
1457         unsigned int bucket;
1458 };
1459
1460 static struct hlist_node *get_first(struct seq_file *seq)
1461 {
1462         struct iter_state *st = seq->private;
1463         struct net *net;
1464         struct nfnl_queue_net *q;
1465
1466         if (!st)
1467                 return NULL;
1468
1469         net = seq_file_net(seq);
1470         q = nfnl_queue_pernet(net);
1471         for (st->bucket = 0; st->bucket < INSTANCE_BUCKETS; st->bucket++) {
1472                 if (!hlist_empty(&q->instance_table[st->bucket]))
1473                         return q->instance_table[st->bucket].first;
1474         }
1475         return NULL;
1476 }
1477
1478 static struct hlist_node *get_next(struct seq_file *seq, struct hlist_node *h)
1479 {
1480         struct iter_state *st = seq->private;
1481         struct net *net = seq_file_net(seq);
1482
1483         h = h->next;
1484         while (!h) {
1485                 struct nfnl_queue_net *q;
1486
1487                 if (++st->bucket >= INSTANCE_BUCKETS)
1488                         return NULL;
1489
1490                 q = nfnl_queue_pernet(net);
1491                 h = q->instance_table[st->bucket].first;
1492         }
1493         return h;
1494 }
1495
1496 static struct hlist_node *get_idx(struct seq_file *seq, loff_t pos)
1497 {
1498         struct hlist_node *head;
1499         head = get_first(seq);
1500
1501         if (head)
1502                 while (pos && (head = get_next(seq, head)))
1503                         pos--;
1504         return pos ? NULL : head;
1505 }
1506
1507 static void *seq_start(struct seq_file *s, loff_t *pos)
1508         __acquires(nfnl_queue_pernet(seq_file_net(s))->instances_lock)
1509 {
1510         spin_lock(&nfnl_queue_pernet(seq_file_net(s))->instances_lock);
1511         return get_idx(s, *pos);
1512 }
1513
1514 static void *seq_next(struct seq_file *s, void *v, loff_t *pos)
1515 {
1516         (*pos)++;
1517         return get_next(s, v);
1518 }
1519
1520 static void seq_stop(struct seq_file *s, void *v)
1521         __releases(nfnl_queue_pernet(seq_file_net(s))->instances_lock)
1522 {
1523         spin_unlock(&nfnl_queue_pernet(seq_file_net(s))->instances_lock);
1524 }
1525
1526 static int seq_show(struct seq_file *s, void *v)
1527 {
1528         const struct nfqnl_instance *inst = v;
1529
1530         seq_printf(s, "%5u %6u %5u %1u %5u %5u %5u %8u %2d\n",
1531                    inst->queue_num,
1532                    inst->peer_portid, inst->queue_total,
1533                    inst->copy_mode, inst->copy_range,
1534                    inst->queue_dropped, inst->queue_user_dropped,
1535                    inst->id_sequence, 1);
1536         return 0;
1537 }
1538
1539 static const struct seq_operations nfqnl_seq_ops = {
1540         .start  = seq_start,
1541         .next   = seq_next,
1542         .stop   = seq_stop,
1543         .show   = seq_show,
1544 };
1545 #endif /* PROC_FS */
1546
1547 static int __net_init nfnl_queue_net_init(struct net *net)
1548 {
1549         unsigned int i;
1550         struct nfnl_queue_net *q = nfnl_queue_pernet(net);
1551
1552         for (i = 0; i < INSTANCE_BUCKETS; i++)
1553                 INIT_HLIST_HEAD(&q->instance_table[i]);
1554
1555         spin_lock_init(&q->instances_lock);
1556
1557 #ifdef CONFIG_PROC_FS
1558         if (!proc_create_net("nfnetlink_queue", 0440, net->nf.proc_netfilter,
1559                         &nfqnl_seq_ops, sizeof(struct iter_state)))
1560                 return -ENOMEM;
1561 #endif
1562         return 0;
1563 }
1564
1565 static void __net_exit nfnl_queue_net_exit(struct net *net)
1566 {
1567         struct nfnl_queue_net *q = nfnl_queue_pernet(net);
1568         unsigned int i;
1569
1570 #ifdef CONFIG_PROC_FS
1571         remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter);
1572 #endif
1573         for (i = 0; i < INSTANCE_BUCKETS; i++)
1574                 WARN_ON_ONCE(!hlist_empty(&q->instance_table[i]));
1575 }
1576
1577 static struct pernet_operations nfnl_queue_net_ops = {
1578         .init           = nfnl_queue_net_init,
1579         .exit           = nfnl_queue_net_exit,
1580         .id             = &nfnl_queue_net_id,
1581         .size           = sizeof(struct nfnl_queue_net),
1582 };
1583
1584 static int __init nfnetlink_queue_init(void)
1585 {
1586         int status;
1587
1588         status = register_pernet_subsys(&nfnl_queue_net_ops);
1589         if (status < 0) {
1590                 pr_err("failed to register pernet ops\n");
1591                 goto out;
1592         }
1593
1594         netlink_register_notifier(&nfqnl_rtnl_notifier);
1595         status = nfnetlink_subsys_register(&nfqnl_subsys);
1596         if (status < 0) {
1597                 pr_err("failed to create netlink socket\n");
1598                 goto cleanup_netlink_notifier;
1599         }
1600
1601         status = register_netdevice_notifier(&nfqnl_dev_notifier);
1602         if (status < 0) {
1603                 pr_err("failed to register netdevice notifier\n");
1604                 goto cleanup_netlink_subsys;
1605         }
1606
1607         nf_register_queue_handler(&nfqh);
1608
1609         return status;
1610
1611 cleanup_netlink_subsys:
1612         nfnetlink_subsys_unregister(&nfqnl_subsys);
1613 cleanup_netlink_notifier:
1614         netlink_unregister_notifier(&nfqnl_rtnl_notifier);
1615         unregister_pernet_subsys(&nfnl_queue_net_ops);
1616 out:
1617         return status;
1618 }
1619
1620 static void __exit nfnetlink_queue_fini(void)
1621 {
1622         nf_unregister_queue_handler();
1623         unregister_netdevice_notifier(&nfqnl_dev_notifier);
1624         nfnetlink_subsys_unregister(&nfqnl_subsys);
1625         netlink_unregister_notifier(&nfqnl_rtnl_notifier);
1626         unregister_pernet_subsys(&nfnl_queue_net_ops);
1627
1628         rcu_barrier(); /* Wait for completion of call_rcu()'s */
1629 }
1630
1631 MODULE_DESCRIPTION("netfilter packet queue handler");
1632 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
1633 MODULE_LICENSE("GPL");
1634 MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_QUEUE);
1635
1636 module_init(nfnetlink_queue_init);
1637 module_exit(nfnetlink_queue_fini);