nds32: fix build error "relocation truncated to fit: R_NDS32_25_PCREL_RELA" when
[linux-2.6-microblaze.git] / net / ipv4 / netfilter / ip_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
6  * Copyright (C) 2006-2010 Patrick McHardy <kaber@trash.net>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  */
12 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 #include <linux/cache.h>
14 #include <linux/capability.h>
15 #include <linux/skbuff.h>
16 #include <linux/kmod.h>
17 #include <linux/vmalloc.h>
18 #include <linux/netdevice.h>
19 #include <linux/module.h>
20 #include <linux/icmp.h>
21 #include <net/ip.h>
22 #include <net/compat.h>
23 #include <linux/uaccess.h>
24 #include <linux/mutex.h>
25 #include <linux/proc_fs.h>
26 #include <linux/err.h>
27 #include <linux/cpumask.h>
28
29 #include <linux/netfilter/x_tables.h>
30 #include <linux/netfilter_ipv4/ip_tables.h>
31 #include <net/netfilter/nf_log.h>
32 #include "../../netfilter/xt_repldata.h"
33
34 MODULE_LICENSE("GPL");
35 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
36 MODULE_DESCRIPTION("IPv4 packet filter");
37 MODULE_ALIAS("ipt_icmp");
38
39 void *ipt_alloc_initial_table(const struct xt_table *info)
40 {
41         return xt_alloc_initial_table(ipt, IPT);
42 }
43 EXPORT_SYMBOL_GPL(ipt_alloc_initial_table);
44
45 /* Returns whether matches rule or not. */
46 /* Performance critical - called for every packet */
47 static inline bool
48 ip_packet_match(const struct iphdr *ip,
49                 const char *indev,
50                 const char *outdev,
51                 const struct ipt_ip *ipinfo,
52                 int isfrag)
53 {
54         unsigned long ret;
55
56         if (NF_INVF(ipinfo, IPT_INV_SRCIP,
57                     (ip->saddr & ipinfo->smsk.s_addr) != ipinfo->src.s_addr) ||
58             NF_INVF(ipinfo, IPT_INV_DSTIP,
59                     (ip->daddr & ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr))
60                 return false;
61
62         ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask);
63
64         if (NF_INVF(ipinfo, IPT_INV_VIA_IN, ret != 0))
65                 return false;
66
67         ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask);
68
69         if (NF_INVF(ipinfo, IPT_INV_VIA_OUT, ret != 0))
70                 return false;
71
72         /* Check specific protocol */
73         if (ipinfo->proto &&
74             NF_INVF(ipinfo, IPT_INV_PROTO, ip->protocol != ipinfo->proto))
75                 return false;
76
77         /* If we have a fragment rule but the packet is not a fragment
78          * then we return zero */
79         if (NF_INVF(ipinfo, IPT_INV_FRAG,
80                     (ipinfo->flags & IPT_F_FRAG) && !isfrag))
81                 return false;
82
83         return true;
84 }
85
86 static bool
87 ip_checkentry(const struct ipt_ip *ip)
88 {
89         if (ip->flags & ~IPT_F_MASK)
90                 return false;
91         if (ip->invflags & ~IPT_INV_MASK)
92                 return false;
93         return true;
94 }
95
96 static unsigned int
97 ipt_error(struct sk_buff *skb, const struct xt_action_param *par)
98 {
99         net_info_ratelimited("error: `%s'\n", (const char *)par->targinfo);
100
101         return NF_DROP;
102 }
103
104 /* Performance critical */
105 static inline struct ipt_entry *
106 get_entry(const void *base, unsigned int offset)
107 {
108         return (struct ipt_entry *)(base + offset);
109 }
110
111 /* All zeroes == unconditional rule. */
112 /* Mildly perf critical (only if packet tracing is on) */
113 static inline bool unconditional(const struct ipt_entry *e)
114 {
115         static const struct ipt_ip uncond;
116
117         return e->target_offset == sizeof(struct ipt_entry) &&
118                memcmp(&e->ip, &uncond, sizeof(uncond)) == 0;
119 }
120
121 /* for const-correctness */
122 static inline const struct xt_entry_target *
123 ipt_get_target_c(const struct ipt_entry *e)
124 {
125         return ipt_get_target((struct ipt_entry *)e);
126 }
127
128 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
129 static const char *const hooknames[] = {
130         [NF_INET_PRE_ROUTING]           = "PREROUTING",
131         [NF_INET_LOCAL_IN]              = "INPUT",
132         [NF_INET_FORWARD]               = "FORWARD",
133         [NF_INET_LOCAL_OUT]             = "OUTPUT",
134         [NF_INET_POST_ROUTING]          = "POSTROUTING",
135 };
136
137 enum nf_ip_trace_comments {
138         NF_IP_TRACE_COMMENT_RULE,
139         NF_IP_TRACE_COMMENT_RETURN,
140         NF_IP_TRACE_COMMENT_POLICY,
141 };
142
143 static const char *const comments[] = {
144         [NF_IP_TRACE_COMMENT_RULE]      = "rule",
145         [NF_IP_TRACE_COMMENT_RETURN]    = "return",
146         [NF_IP_TRACE_COMMENT_POLICY]    = "policy",
147 };
148
149 static const struct nf_loginfo trace_loginfo = {
150         .type = NF_LOG_TYPE_LOG,
151         .u = {
152                 .log = {
153                         .level = 4,
154                         .logflags = NF_LOG_DEFAULT_MASK,
155                 },
156         },
157 };
158
159 /* Mildly perf critical (only if packet tracing is on) */
160 static inline int
161 get_chainname_rulenum(const struct ipt_entry *s, const struct ipt_entry *e,
162                       const char *hookname, const char **chainname,
163                       const char **comment, unsigned int *rulenum)
164 {
165         const struct xt_standard_target *t = (void *)ipt_get_target_c(s);
166
167         if (strcmp(t->target.u.kernel.target->name, XT_ERROR_TARGET) == 0) {
168                 /* Head of user chain: ERROR target with chainname */
169                 *chainname = t->target.data;
170                 (*rulenum) = 0;
171         } else if (s == e) {
172                 (*rulenum)++;
173
174                 if (unconditional(s) &&
175                     strcmp(t->target.u.kernel.target->name,
176                            XT_STANDARD_TARGET) == 0 &&
177                    t->verdict < 0) {
178                         /* Tail of chains: STANDARD target (return/policy) */
179                         *comment = *chainname == hookname
180                                 ? comments[NF_IP_TRACE_COMMENT_POLICY]
181                                 : comments[NF_IP_TRACE_COMMENT_RETURN];
182                 }
183                 return 1;
184         } else
185                 (*rulenum)++;
186
187         return 0;
188 }
189
190 static void trace_packet(struct net *net,
191                          const struct sk_buff *skb,
192                          unsigned int hook,
193                          const struct net_device *in,
194                          const struct net_device *out,
195                          const char *tablename,
196                          const struct xt_table_info *private,
197                          const struct ipt_entry *e)
198 {
199         const struct ipt_entry *root;
200         const char *hookname, *chainname, *comment;
201         const struct ipt_entry *iter;
202         unsigned int rulenum = 0;
203
204         root = get_entry(private->entries, private->hook_entry[hook]);
205
206         hookname = chainname = hooknames[hook];
207         comment = comments[NF_IP_TRACE_COMMENT_RULE];
208
209         xt_entry_foreach(iter, root, private->size - private->hook_entry[hook])
210                 if (get_chainname_rulenum(iter, e, hookname,
211                     &chainname, &comment, &rulenum) != 0)
212                         break;
213
214         nf_log_trace(net, AF_INET, hook, skb, in, out, &trace_loginfo,
215                      "TRACE: %s:%s:%s:%u ",
216                      tablename, chainname, comment, rulenum);
217 }
218 #endif
219
220 static inline
221 struct ipt_entry *ipt_next_entry(const struct ipt_entry *entry)
222 {
223         return (void *)entry + entry->next_offset;
224 }
225
226 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
227 unsigned int
228 ipt_do_table(struct sk_buff *skb,
229              const struct nf_hook_state *state,
230              struct xt_table *table)
231 {
232         unsigned int hook = state->hook;
233         static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
234         const struct iphdr *ip;
235         /* Initializing verdict to NF_DROP keeps gcc happy. */
236         unsigned int verdict = NF_DROP;
237         const char *indev, *outdev;
238         const void *table_base;
239         struct ipt_entry *e, **jumpstack;
240         unsigned int stackidx, cpu;
241         const struct xt_table_info *private;
242         struct xt_action_param acpar;
243         unsigned int addend;
244
245         /* Initialization */
246         stackidx = 0;
247         ip = ip_hdr(skb);
248         indev = state->in ? state->in->name : nulldevname;
249         outdev = state->out ? state->out->name : nulldevname;
250         /* We handle fragments by dealing with the first fragment as
251          * if it was a normal packet.  All other fragments are treated
252          * normally, except that they will NEVER match rules that ask
253          * things we don't know, ie. tcp syn flag or ports).  If the
254          * rule is also a fragment-specific rule, non-fragments won't
255          * match it. */
256         acpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
257         acpar.thoff   = ip_hdrlen(skb);
258         acpar.hotdrop = false;
259         acpar.state   = state;
260
261         WARN_ON(!(table->valid_hooks & (1 << hook)));
262         local_bh_disable();
263         addend = xt_write_recseq_begin();
264         private = READ_ONCE(table->private); /* Address dependency. */
265         cpu        = smp_processor_id();
266         table_base = private->entries;
267         jumpstack  = (struct ipt_entry **)private->jumpstack[cpu];
268
269         /* Switch to alternate jumpstack if we're being invoked via TEE.
270          * TEE issues XT_CONTINUE verdict on original skb so we must not
271          * clobber the jumpstack.
272          *
273          * For recursion via REJECT or SYNPROXY the stack will be clobbered
274          * but it is no problem since absolute verdict is issued by these.
275          */
276         if (static_key_false(&xt_tee_enabled))
277                 jumpstack += private->stacksize * __this_cpu_read(nf_skb_duplicated);
278
279         e = get_entry(table_base, private->hook_entry[hook]);
280
281         do {
282                 const struct xt_entry_target *t;
283                 const struct xt_entry_match *ematch;
284                 struct xt_counters *counter;
285
286                 WARN_ON(!e);
287                 if (!ip_packet_match(ip, indev, outdev,
288                     &e->ip, acpar.fragoff)) {
289  no_match:
290                         e = ipt_next_entry(e);
291                         continue;
292                 }
293
294                 xt_ematch_foreach(ematch, e) {
295                         acpar.match     = ematch->u.kernel.match;
296                         acpar.matchinfo = ematch->data;
297                         if (!acpar.match->match(skb, &acpar))
298                                 goto no_match;
299                 }
300
301                 counter = xt_get_this_cpu_counter(&e->counters);
302                 ADD_COUNTER(*counter, skb->len, 1);
303
304                 t = ipt_get_target_c(e);
305                 WARN_ON(!t->u.kernel.target);
306
307 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
308                 /* The packet is traced: log it */
309                 if (unlikely(skb->nf_trace))
310                         trace_packet(state->net, skb, hook, state->in,
311                                      state->out, table->name, private, e);
312 #endif
313                 /* Standard target? */
314                 if (!t->u.kernel.target->target) {
315                         int v;
316
317                         v = ((struct xt_standard_target *)t)->verdict;
318                         if (v < 0) {
319                                 /* Pop from stack? */
320                                 if (v != XT_RETURN) {
321                                         verdict = (unsigned int)(-v) - 1;
322                                         break;
323                                 }
324                                 if (stackidx == 0) {
325                                         e = get_entry(table_base,
326                                             private->underflow[hook]);
327                                 } else {
328                                         e = jumpstack[--stackidx];
329                                         e = ipt_next_entry(e);
330                                 }
331                                 continue;
332                         }
333                         if (table_base + v != ipt_next_entry(e) &&
334                             !(e->ip.flags & IPT_F_GOTO)) {
335                                 if (unlikely(stackidx >= private->stacksize)) {
336                                         verdict = NF_DROP;
337                                         break;
338                                 }
339                                 jumpstack[stackidx++] = e;
340                         }
341
342                         e = get_entry(table_base, v);
343                         continue;
344                 }
345
346                 acpar.target   = t->u.kernel.target;
347                 acpar.targinfo = t->data;
348
349                 verdict = t->u.kernel.target->target(skb, &acpar);
350                 if (verdict == XT_CONTINUE) {
351                         /* Target might have changed stuff. */
352                         ip = ip_hdr(skb);
353                         e = ipt_next_entry(e);
354                 } else {
355                         /* Verdict */
356                         break;
357                 }
358         } while (!acpar.hotdrop);
359
360         xt_write_recseq_end(addend);
361         local_bh_enable();
362
363         if (acpar.hotdrop)
364                 return NF_DROP;
365         else return verdict;
366 }
367
368 /* Figures out from what hook each rule can be called: returns 0 if
369    there are loops.  Puts hook bitmask in comefrom. */
370 static int
371 mark_source_chains(const struct xt_table_info *newinfo,
372                    unsigned int valid_hooks, void *entry0,
373                    unsigned int *offsets)
374 {
375         unsigned int hook;
376
377         /* No recursion; use packet counter to save back ptrs (reset
378            to 0 as we leave), and comefrom to save source hook bitmask */
379         for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
380                 unsigned int pos = newinfo->hook_entry[hook];
381                 struct ipt_entry *e = entry0 + pos;
382
383                 if (!(valid_hooks & (1 << hook)))
384                         continue;
385
386                 /* Set initial back pointer. */
387                 e->counters.pcnt = pos;
388
389                 for (;;) {
390                         const struct xt_standard_target *t
391                                 = (void *)ipt_get_target_c(e);
392                         int visited = e->comefrom & (1 << hook);
393
394                         if (e->comefrom & (1 << NF_INET_NUMHOOKS))
395                                 return 0;
396
397                         e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
398
399                         /* Unconditional return/END. */
400                         if ((unconditional(e) &&
401                              (strcmp(t->target.u.user.name,
402                                      XT_STANDARD_TARGET) == 0) &&
403                              t->verdict < 0) || visited) {
404                                 unsigned int oldpos, size;
405
406                                 /* Return: backtrack through the last
407                                    big jump. */
408                                 do {
409                                         e->comefrom ^= (1<<NF_INET_NUMHOOKS);
410                                         oldpos = pos;
411                                         pos = e->counters.pcnt;
412                                         e->counters.pcnt = 0;
413
414                                         /* We're at the start. */
415                                         if (pos == oldpos)
416                                                 goto next;
417
418                                         e = entry0 + pos;
419                                 } while (oldpos == pos + e->next_offset);
420
421                                 /* Move along one */
422                                 size = e->next_offset;
423                                 e = entry0 + pos + size;
424                                 if (pos + size >= newinfo->size)
425                                         return 0;
426                                 e->counters.pcnt = pos;
427                                 pos += size;
428                         } else {
429                                 int newpos = t->verdict;
430
431                                 if (strcmp(t->target.u.user.name,
432                                            XT_STANDARD_TARGET) == 0 &&
433                                     newpos >= 0) {
434                                         /* This a jump; chase it. */
435                                         if (!xt_find_jump_offset(offsets, newpos,
436                                                                  newinfo->number))
437                                                 return 0;
438                                 } else {
439                                         /* ... this is a fallthru */
440                                         newpos = pos + e->next_offset;
441                                         if (newpos >= newinfo->size)
442                                                 return 0;
443                                 }
444                                 e = entry0 + newpos;
445                                 e->counters.pcnt = pos;
446                                 pos = newpos;
447                         }
448                 }
449 next:           ;
450         }
451         return 1;
452 }
453
454 static void cleanup_match(struct xt_entry_match *m, struct net *net)
455 {
456         struct xt_mtdtor_param par;
457
458         par.net       = net;
459         par.match     = m->u.kernel.match;
460         par.matchinfo = m->data;
461         par.family    = NFPROTO_IPV4;
462         if (par.match->destroy != NULL)
463                 par.match->destroy(&par);
464         module_put(par.match->me);
465 }
466
467 static int
468 check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
469 {
470         const struct ipt_ip *ip = par->entryinfo;
471
472         par->match     = m->u.kernel.match;
473         par->matchinfo = m->data;
474
475         return xt_check_match(par, m->u.match_size - sizeof(*m),
476                               ip->proto, ip->invflags & IPT_INV_PROTO);
477 }
478
479 static int
480 find_check_match(struct xt_entry_match *m, struct xt_mtchk_param *par)
481 {
482         struct xt_match *match;
483         int ret;
484
485         match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
486                                       m->u.user.revision);
487         if (IS_ERR(match))
488                 return PTR_ERR(match);
489         m->u.kernel.match = match;
490
491         ret = check_match(m, par);
492         if (ret)
493                 goto err;
494
495         return 0;
496 err:
497         module_put(m->u.kernel.match->me);
498         return ret;
499 }
500
501 static int check_target(struct ipt_entry *e, struct net *net, const char *name)
502 {
503         struct xt_entry_target *t = ipt_get_target(e);
504         struct xt_tgchk_param par = {
505                 .net       = net,
506                 .table     = name,
507                 .entryinfo = e,
508                 .target    = t->u.kernel.target,
509                 .targinfo  = t->data,
510                 .hook_mask = e->comefrom,
511                 .family    = NFPROTO_IPV4,
512         };
513
514         return xt_check_target(&par, t->u.target_size - sizeof(*t),
515                                e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
516 }
517
518 static int
519 find_check_entry(struct ipt_entry *e, struct net *net, const char *name,
520                  unsigned int size,
521                  struct xt_percpu_counter_alloc_state *alloc_state)
522 {
523         struct xt_entry_target *t;
524         struct xt_target *target;
525         int ret;
526         unsigned int j;
527         struct xt_mtchk_param mtpar;
528         struct xt_entry_match *ematch;
529
530         if (!xt_percpu_counter_alloc(alloc_state, &e->counters))
531                 return -ENOMEM;
532
533         j = 0;
534         memset(&mtpar, 0, sizeof(mtpar));
535         mtpar.net       = net;
536         mtpar.table     = name;
537         mtpar.entryinfo = &e->ip;
538         mtpar.hook_mask = e->comefrom;
539         mtpar.family    = NFPROTO_IPV4;
540         xt_ematch_foreach(ematch, e) {
541                 ret = find_check_match(ematch, &mtpar);
542                 if (ret != 0)
543                         goto cleanup_matches;
544                 ++j;
545         }
546
547         t = ipt_get_target(e);
548         target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
549                                         t->u.user.revision);
550         if (IS_ERR(target)) {
551                 ret = PTR_ERR(target);
552                 goto cleanup_matches;
553         }
554         t->u.kernel.target = target;
555
556         ret = check_target(e, net, name);
557         if (ret)
558                 goto err;
559
560         return 0;
561  err:
562         module_put(t->u.kernel.target->me);
563  cleanup_matches:
564         xt_ematch_foreach(ematch, e) {
565                 if (j-- == 0)
566                         break;
567                 cleanup_match(ematch, net);
568         }
569
570         xt_percpu_counter_free(&e->counters);
571
572         return ret;
573 }
574
575 static bool check_underflow(const struct ipt_entry *e)
576 {
577         const struct xt_entry_target *t;
578         unsigned int verdict;
579
580         if (!unconditional(e))
581                 return false;
582         t = ipt_get_target_c(e);
583         if (strcmp(t->u.user.name, XT_STANDARD_TARGET) != 0)
584                 return false;
585         verdict = ((struct xt_standard_target *)t)->verdict;
586         verdict = -verdict - 1;
587         return verdict == NF_DROP || verdict == NF_ACCEPT;
588 }
589
590 static int
591 check_entry_size_and_hooks(struct ipt_entry *e,
592                            struct xt_table_info *newinfo,
593                            const unsigned char *base,
594                            const unsigned char *limit,
595                            const unsigned int *hook_entries,
596                            const unsigned int *underflows,
597                            unsigned int valid_hooks)
598 {
599         unsigned int h;
600         int err;
601
602         if ((unsigned long)e % __alignof__(struct ipt_entry) != 0 ||
603             (unsigned char *)e + sizeof(struct ipt_entry) >= limit ||
604             (unsigned char *)e + e->next_offset > limit)
605                 return -EINVAL;
606
607         if (e->next_offset
608             < sizeof(struct ipt_entry) + sizeof(struct xt_entry_target))
609                 return -EINVAL;
610
611         if (!ip_checkentry(&e->ip))
612                 return -EINVAL;
613
614         err = xt_check_entry_offsets(e, e->elems, e->target_offset,
615                                      e->next_offset);
616         if (err)
617                 return err;
618
619         /* Check hooks & underflows */
620         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
621                 if (!(valid_hooks & (1 << h)))
622                         continue;
623                 if ((unsigned char *)e - base == hook_entries[h])
624                         newinfo->hook_entry[h] = hook_entries[h];
625                 if ((unsigned char *)e - base == underflows[h]) {
626                         if (!check_underflow(e))
627                                 return -EINVAL;
628
629                         newinfo->underflow[h] = underflows[h];
630                 }
631         }
632
633         /* Clear counters and comefrom */
634         e->counters = ((struct xt_counters) { 0, 0 });
635         e->comefrom = 0;
636         return 0;
637 }
638
639 static void
640 cleanup_entry(struct ipt_entry *e, struct net *net)
641 {
642         struct xt_tgdtor_param par;
643         struct xt_entry_target *t;
644         struct xt_entry_match *ematch;
645
646         /* Cleanup all matches */
647         xt_ematch_foreach(ematch, e)
648                 cleanup_match(ematch, net);
649         t = ipt_get_target(e);
650
651         par.net      = net;
652         par.target   = t->u.kernel.target;
653         par.targinfo = t->data;
654         par.family   = NFPROTO_IPV4;
655         if (par.target->destroy != NULL)
656                 par.target->destroy(&par);
657         module_put(par.target->me);
658         xt_percpu_counter_free(&e->counters);
659 }
660
661 /* Checks and translates the user-supplied table segment (held in
662    newinfo) */
663 static int
664 translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0,
665                 const struct ipt_replace *repl)
666 {
667         struct xt_percpu_counter_alloc_state alloc_state = { 0 };
668         struct ipt_entry *iter;
669         unsigned int *offsets;
670         unsigned int i;
671         int ret = 0;
672
673         newinfo->size = repl->size;
674         newinfo->number = repl->num_entries;
675
676         /* Init all hooks to impossible value. */
677         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
678                 newinfo->hook_entry[i] = 0xFFFFFFFF;
679                 newinfo->underflow[i] = 0xFFFFFFFF;
680         }
681
682         offsets = xt_alloc_entry_offsets(newinfo->number);
683         if (!offsets)
684                 return -ENOMEM;
685         i = 0;
686         /* Walk through entries, checking offsets. */
687         xt_entry_foreach(iter, entry0, newinfo->size) {
688                 ret = check_entry_size_and_hooks(iter, newinfo, entry0,
689                                                  entry0 + repl->size,
690                                                  repl->hook_entry,
691                                                  repl->underflow,
692                                                  repl->valid_hooks);
693                 if (ret != 0)
694                         goto out_free;
695                 if (i < repl->num_entries)
696                         offsets[i] = (void *)iter - entry0;
697                 ++i;
698                 if (strcmp(ipt_get_target(iter)->u.user.name,
699                     XT_ERROR_TARGET) == 0)
700                         ++newinfo->stacksize;
701         }
702
703         ret = -EINVAL;
704         if (i != repl->num_entries)
705                 goto out_free;
706
707         ret = xt_check_table_hooks(newinfo, repl->valid_hooks);
708         if (ret)
709                 goto out_free;
710
711         if (!mark_source_chains(newinfo, repl->valid_hooks, entry0, offsets)) {
712                 ret = -ELOOP;
713                 goto out_free;
714         }
715         kvfree(offsets);
716
717         /* Finally, each sanity check must pass */
718         i = 0;
719         xt_entry_foreach(iter, entry0, newinfo->size) {
720                 ret = find_check_entry(iter, net, repl->name, repl->size,
721                                        &alloc_state);
722                 if (ret != 0)
723                         break;
724                 ++i;
725         }
726
727         if (ret != 0) {
728                 xt_entry_foreach(iter, entry0, newinfo->size) {
729                         if (i-- == 0)
730                                 break;
731                         cleanup_entry(iter, net);
732                 }
733                 return ret;
734         }
735
736         return ret;
737  out_free:
738         kvfree(offsets);
739         return ret;
740 }
741
742 static void
743 get_counters(const struct xt_table_info *t,
744              struct xt_counters counters[])
745 {
746         struct ipt_entry *iter;
747         unsigned int cpu;
748         unsigned int i;
749
750         for_each_possible_cpu(cpu) {
751                 seqcount_t *s = &per_cpu(xt_recseq, cpu);
752
753                 i = 0;
754                 xt_entry_foreach(iter, t->entries, t->size) {
755                         struct xt_counters *tmp;
756                         u64 bcnt, pcnt;
757                         unsigned int start;
758
759                         tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
760                         do {
761                                 start = read_seqcount_begin(s);
762                                 bcnt = tmp->bcnt;
763                                 pcnt = tmp->pcnt;
764                         } while (read_seqcount_retry(s, start));
765
766                         ADD_COUNTER(counters[i], bcnt, pcnt);
767                         ++i; /* macro does multi eval of i */
768                         cond_resched();
769                 }
770         }
771 }
772
773 static void get_old_counters(const struct xt_table_info *t,
774                              struct xt_counters counters[])
775 {
776         struct ipt_entry *iter;
777         unsigned int cpu, i;
778
779         for_each_possible_cpu(cpu) {
780                 i = 0;
781                 xt_entry_foreach(iter, t->entries, t->size) {
782                         const struct xt_counters *tmp;
783
784                         tmp = xt_get_per_cpu_counter(&iter->counters, cpu);
785                         ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt);
786                         ++i; /* macro does multi eval of i */
787                 }
788
789                 cond_resched();
790         }
791 }
792
793 static struct xt_counters *alloc_counters(const struct xt_table *table)
794 {
795         unsigned int countersize;
796         struct xt_counters *counters;
797         const struct xt_table_info *private = table->private;
798
799         /* We need atomic snapshot of counters: rest doesn't change
800            (other than comefrom, which userspace doesn't care
801            about). */
802         countersize = sizeof(struct xt_counters) * private->number;
803         counters = vzalloc(countersize);
804
805         if (counters == NULL)
806                 return ERR_PTR(-ENOMEM);
807
808         get_counters(private, counters);
809
810         return counters;
811 }
812
813 static int
814 copy_entries_to_user(unsigned int total_size,
815                      const struct xt_table *table,
816                      void __user *userptr)
817 {
818         unsigned int off, num;
819         const struct ipt_entry *e;
820         struct xt_counters *counters;
821         const struct xt_table_info *private = table->private;
822         int ret = 0;
823         const void *loc_cpu_entry;
824
825         counters = alloc_counters(table);
826         if (IS_ERR(counters))
827                 return PTR_ERR(counters);
828
829         loc_cpu_entry = private->entries;
830
831         /* FIXME: use iterator macros --RR */
832         /* ... then go back and fix counters and names */
833         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
834                 unsigned int i;
835                 const struct xt_entry_match *m;
836                 const struct xt_entry_target *t;
837
838                 e = loc_cpu_entry + off;
839                 if (copy_to_user(userptr + off, e, sizeof(*e))) {
840                         ret = -EFAULT;
841                         goto free_counters;
842                 }
843                 if (copy_to_user(userptr + off
844                                  + offsetof(struct ipt_entry, counters),
845                                  &counters[num],
846                                  sizeof(counters[num])) != 0) {
847                         ret = -EFAULT;
848                         goto free_counters;
849                 }
850
851                 for (i = sizeof(struct ipt_entry);
852                      i < e->target_offset;
853                      i += m->u.match_size) {
854                         m = (void *)e + i;
855
856                         if (xt_match_to_user(m, userptr + off + i)) {
857                                 ret = -EFAULT;
858                                 goto free_counters;
859                         }
860                 }
861
862                 t = ipt_get_target_c(e);
863                 if (xt_target_to_user(t, userptr + off + e->target_offset)) {
864                         ret = -EFAULT;
865                         goto free_counters;
866                 }
867         }
868
869  free_counters:
870         vfree(counters);
871         return ret;
872 }
873
874 #ifdef CONFIG_COMPAT
875 static void compat_standard_from_user(void *dst, const void *src)
876 {
877         int v = *(compat_int_t *)src;
878
879         if (v > 0)
880                 v += xt_compat_calc_jump(AF_INET, v);
881         memcpy(dst, &v, sizeof(v));
882 }
883
884 static int compat_standard_to_user(void __user *dst, const void *src)
885 {
886         compat_int_t cv = *(int *)src;
887
888         if (cv > 0)
889                 cv -= xt_compat_calc_jump(AF_INET, cv);
890         return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
891 }
892
893 static int compat_calc_entry(const struct ipt_entry *e,
894                              const struct xt_table_info *info,
895                              const void *base, struct xt_table_info *newinfo)
896 {
897         const struct xt_entry_match *ematch;
898         const struct xt_entry_target *t;
899         unsigned int entry_offset;
900         int off, i, ret;
901
902         off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
903         entry_offset = (void *)e - base;
904         xt_ematch_foreach(ematch, e)
905                 off += xt_compat_match_offset(ematch->u.kernel.match);
906         t = ipt_get_target_c(e);
907         off += xt_compat_target_offset(t->u.kernel.target);
908         newinfo->size -= off;
909         ret = xt_compat_add_offset(AF_INET, entry_offset, off);
910         if (ret)
911                 return ret;
912
913         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
914                 if (info->hook_entry[i] &&
915                     (e < (struct ipt_entry *)(base + info->hook_entry[i])))
916                         newinfo->hook_entry[i] -= off;
917                 if (info->underflow[i] &&
918                     (e < (struct ipt_entry *)(base + info->underflow[i])))
919                         newinfo->underflow[i] -= off;
920         }
921         return 0;
922 }
923
924 static int compat_table_info(const struct xt_table_info *info,
925                              struct xt_table_info *newinfo)
926 {
927         struct ipt_entry *iter;
928         const void *loc_cpu_entry;
929         int ret;
930
931         if (!newinfo || !info)
932                 return -EINVAL;
933
934         /* we dont care about newinfo->entries */
935         memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
936         newinfo->initial_entries = 0;
937         loc_cpu_entry = info->entries;
938         ret = xt_compat_init_offsets(AF_INET, info->number);
939         if (ret)
940                 return ret;
941         xt_entry_foreach(iter, loc_cpu_entry, info->size) {
942                 ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo);
943                 if (ret != 0)
944                         return ret;
945         }
946         return 0;
947 }
948 #endif
949
950 static int get_info(struct net *net, void __user *user,
951                     const int *len, int compat)
952 {
953         char name[XT_TABLE_MAXNAMELEN];
954         struct xt_table *t;
955         int ret;
956
957         if (*len != sizeof(struct ipt_getinfo))
958                 return -EINVAL;
959
960         if (copy_from_user(name, user, sizeof(name)) != 0)
961                 return -EFAULT;
962
963         name[XT_TABLE_MAXNAMELEN-1] = '\0';
964 #ifdef CONFIG_COMPAT
965         if (compat)
966                 xt_compat_lock(AF_INET);
967 #endif
968         t = xt_request_find_table_lock(net, AF_INET, name);
969         if (!IS_ERR(t)) {
970                 struct ipt_getinfo info;
971                 const struct xt_table_info *private = t->private;
972 #ifdef CONFIG_COMPAT
973                 struct xt_table_info tmp;
974
975                 if (compat) {
976                         ret = compat_table_info(private, &tmp);
977                         xt_compat_flush_offsets(AF_INET);
978                         private = &tmp;
979                 }
980 #endif
981                 memset(&info, 0, sizeof(info));
982                 info.valid_hooks = t->valid_hooks;
983                 memcpy(info.hook_entry, private->hook_entry,
984                        sizeof(info.hook_entry));
985                 memcpy(info.underflow, private->underflow,
986                        sizeof(info.underflow));
987                 info.num_entries = private->number;
988                 info.size = private->size;
989                 strcpy(info.name, name);
990
991                 if (copy_to_user(user, &info, *len) != 0)
992                         ret = -EFAULT;
993                 else
994                         ret = 0;
995
996                 xt_table_unlock(t);
997                 module_put(t->me);
998         } else
999                 ret = PTR_ERR(t);
1000 #ifdef CONFIG_COMPAT
1001         if (compat)
1002                 xt_compat_unlock(AF_INET);
1003 #endif
1004         return ret;
1005 }
1006
1007 static int
1008 get_entries(struct net *net, struct ipt_get_entries __user *uptr,
1009             const int *len)
1010 {
1011         int ret;
1012         struct ipt_get_entries get;
1013         struct xt_table *t;
1014
1015         if (*len < sizeof(get))
1016                 return -EINVAL;
1017         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1018                 return -EFAULT;
1019         if (*len != sizeof(struct ipt_get_entries) + get.size)
1020                 return -EINVAL;
1021         get.name[sizeof(get.name) - 1] = '\0';
1022
1023         t = xt_find_table_lock(net, AF_INET, get.name);
1024         if (!IS_ERR(t)) {
1025                 const struct xt_table_info *private = t->private;
1026                 if (get.size == private->size)
1027                         ret = copy_entries_to_user(private->size,
1028                                                    t, uptr->entrytable);
1029                 else
1030                         ret = -EAGAIN;
1031
1032                 module_put(t->me);
1033                 xt_table_unlock(t);
1034         } else
1035                 ret = PTR_ERR(t);
1036
1037         return ret;
1038 }
1039
1040 static int
1041 __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1042              struct xt_table_info *newinfo, unsigned int num_counters,
1043              void __user *counters_ptr)
1044 {
1045         int ret;
1046         struct xt_table *t;
1047         struct xt_table_info *oldinfo;
1048         struct xt_counters *counters;
1049         struct ipt_entry *iter;
1050
1051         ret = 0;
1052         counters = xt_counters_alloc(num_counters);
1053         if (!counters) {
1054                 ret = -ENOMEM;
1055                 goto out;
1056         }
1057
1058         t = xt_request_find_table_lock(net, AF_INET, name);
1059         if (IS_ERR(t)) {
1060                 ret = PTR_ERR(t);
1061                 goto free_newinfo_counters_untrans;
1062         }
1063
1064         /* You lied! */
1065         if (valid_hooks != t->valid_hooks) {
1066                 ret = -EINVAL;
1067                 goto put_module;
1068         }
1069
1070         oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
1071         if (!oldinfo)
1072                 goto put_module;
1073
1074         /* Update module usage count based on number of rules */
1075         if ((oldinfo->number > oldinfo->initial_entries) ||
1076             (newinfo->number <= oldinfo->initial_entries))
1077                 module_put(t->me);
1078         if ((oldinfo->number > oldinfo->initial_entries) &&
1079             (newinfo->number <= oldinfo->initial_entries))
1080                 module_put(t->me);
1081
1082         xt_table_unlock(t);
1083
1084         get_old_counters(oldinfo, counters);
1085
1086         /* Decrease module usage counts and free resource */
1087         xt_entry_foreach(iter, oldinfo->entries, oldinfo->size)
1088                 cleanup_entry(iter, net);
1089
1090         xt_free_table_info(oldinfo);
1091         if (copy_to_user(counters_ptr, counters,
1092                          sizeof(struct xt_counters) * num_counters) != 0) {
1093                 /* Silent error, can't fail, new table is already in place */
1094                 net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n");
1095         }
1096         vfree(counters);
1097         return ret;
1098
1099  put_module:
1100         module_put(t->me);
1101         xt_table_unlock(t);
1102  free_newinfo_counters_untrans:
1103         vfree(counters);
1104  out:
1105         return ret;
1106 }
1107
1108 static int
1109 do_replace(struct net *net, const void __user *user, unsigned int len)
1110 {
1111         int ret;
1112         struct ipt_replace tmp;
1113         struct xt_table_info *newinfo;
1114         void *loc_cpu_entry;
1115         struct ipt_entry *iter;
1116
1117         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1118                 return -EFAULT;
1119
1120         /* overflow check */
1121         if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1122                 return -ENOMEM;
1123         if (tmp.num_counters == 0)
1124                 return -EINVAL;
1125
1126         tmp.name[sizeof(tmp.name)-1] = 0;
1127
1128         newinfo = xt_alloc_table_info(tmp.size);
1129         if (!newinfo)
1130                 return -ENOMEM;
1131
1132         loc_cpu_entry = newinfo->entries;
1133         if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1134                            tmp.size) != 0) {
1135                 ret = -EFAULT;
1136                 goto free_newinfo;
1137         }
1138
1139         ret = translate_table(net, newinfo, loc_cpu_entry, &tmp);
1140         if (ret != 0)
1141                 goto free_newinfo;
1142
1143         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1144                            tmp.num_counters, tmp.counters);
1145         if (ret)
1146                 goto free_newinfo_untrans;
1147         return 0;
1148
1149  free_newinfo_untrans:
1150         xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
1151                 cleanup_entry(iter, net);
1152  free_newinfo:
1153         xt_free_table_info(newinfo);
1154         return ret;
1155 }
1156
1157 static int
1158 do_add_counters(struct net *net, const void __user *user,
1159                 unsigned int len, int compat)
1160 {
1161         unsigned int i;
1162         struct xt_counters_info tmp;
1163         struct xt_counters *paddc;
1164         struct xt_table *t;
1165         const struct xt_table_info *private;
1166         int ret = 0;
1167         struct ipt_entry *iter;
1168         unsigned int addend;
1169
1170         paddc = xt_copy_counters_from_user(user, len, &tmp, compat);
1171         if (IS_ERR(paddc))
1172                 return PTR_ERR(paddc);
1173
1174         t = xt_find_table_lock(net, AF_INET, tmp.name);
1175         if (IS_ERR(t)) {
1176                 ret = PTR_ERR(t);
1177                 goto free;
1178         }
1179
1180         local_bh_disable();
1181         private = t->private;
1182         if (private->number != tmp.num_counters) {
1183                 ret = -EINVAL;
1184                 goto unlock_up_free;
1185         }
1186
1187         i = 0;
1188         addend = xt_write_recseq_begin();
1189         xt_entry_foreach(iter, private->entries, private->size) {
1190                 struct xt_counters *tmp;
1191
1192                 tmp = xt_get_this_cpu_counter(&iter->counters);
1193                 ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt);
1194                 ++i;
1195         }
1196         xt_write_recseq_end(addend);
1197  unlock_up_free:
1198         local_bh_enable();
1199         xt_table_unlock(t);
1200         module_put(t->me);
1201  free:
1202         vfree(paddc);
1203
1204         return ret;
1205 }
1206
1207 #ifdef CONFIG_COMPAT
1208 struct compat_ipt_replace {
1209         char                    name[XT_TABLE_MAXNAMELEN];
1210         u32                     valid_hooks;
1211         u32                     num_entries;
1212         u32                     size;
1213         u32                     hook_entry[NF_INET_NUMHOOKS];
1214         u32                     underflow[NF_INET_NUMHOOKS];
1215         u32                     num_counters;
1216         compat_uptr_t           counters;       /* struct xt_counters * */
1217         struct compat_ipt_entry entries[0];
1218 };
1219
1220 static int
1221 compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
1222                           unsigned int *size, struct xt_counters *counters,
1223                           unsigned int i)
1224 {
1225         struct xt_entry_target *t;
1226         struct compat_ipt_entry __user *ce;
1227         u_int16_t target_offset, next_offset;
1228         compat_uint_t origsize;
1229         const struct xt_entry_match *ematch;
1230         int ret = 0;
1231
1232         origsize = *size;
1233         ce = *dstptr;
1234         if (copy_to_user(ce, e, sizeof(struct ipt_entry)) != 0 ||
1235             copy_to_user(&ce->counters, &counters[i],
1236             sizeof(counters[i])) != 0)
1237                 return -EFAULT;
1238
1239         *dstptr += sizeof(struct compat_ipt_entry);
1240         *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1241
1242         xt_ematch_foreach(ematch, e) {
1243                 ret = xt_compat_match_to_user(ematch, dstptr, size);
1244                 if (ret != 0)
1245                         return ret;
1246         }
1247         target_offset = e->target_offset - (origsize - *size);
1248         t = ipt_get_target(e);
1249         ret = xt_compat_target_to_user(t, dstptr, size);
1250         if (ret)
1251                 return ret;
1252         next_offset = e->next_offset - (origsize - *size);
1253         if (put_user(target_offset, &ce->target_offset) != 0 ||
1254             put_user(next_offset, &ce->next_offset) != 0)
1255                 return -EFAULT;
1256         return 0;
1257 }
1258
1259 static int
1260 compat_find_calc_match(struct xt_entry_match *m,
1261                        const struct ipt_ip *ip,
1262                        int *size)
1263 {
1264         struct xt_match *match;
1265
1266         match = xt_request_find_match(NFPROTO_IPV4, m->u.user.name,
1267                                       m->u.user.revision);
1268         if (IS_ERR(match))
1269                 return PTR_ERR(match);
1270
1271         m->u.kernel.match = match;
1272         *size += xt_compat_match_offset(match);
1273         return 0;
1274 }
1275
1276 static void compat_release_entry(struct compat_ipt_entry *e)
1277 {
1278         struct xt_entry_target *t;
1279         struct xt_entry_match *ematch;
1280
1281         /* Cleanup all matches */
1282         xt_ematch_foreach(ematch, e)
1283                 module_put(ematch->u.kernel.match->me);
1284         t = compat_ipt_get_target(e);
1285         module_put(t->u.kernel.target->me);
1286 }
1287
1288 static int
1289 check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1290                                   struct xt_table_info *newinfo,
1291                                   unsigned int *size,
1292                                   const unsigned char *base,
1293                                   const unsigned char *limit)
1294 {
1295         struct xt_entry_match *ematch;
1296         struct xt_entry_target *t;
1297         struct xt_target *target;
1298         unsigned int entry_offset;
1299         unsigned int j;
1300         int ret, off;
1301
1302         if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0 ||
1303             (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit ||
1304             (unsigned char *)e + e->next_offset > limit)
1305                 return -EINVAL;
1306
1307         if (e->next_offset < sizeof(struct compat_ipt_entry) +
1308                              sizeof(struct compat_xt_entry_target))
1309                 return -EINVAL;
1310
1311         if (!ip_checkentry(&e->ip))
1312                 return -EINVAL;
1313
1314         ret = xt_compat_check_entry_offsets(e, e->elems,
1315                                             e->target_offset, e->next_offset);
1316         if (ret)
1317                 return ret;
1318
1319         off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1320         entry_offset = (void *)e - (void *)base;
1321         j = 0;
1322         xt_ematch_foreach(ematch, e) {
1323                 ret = compat_find_calc_match(ematch, &e->ip, &off);
1324                 if (ret != 0)
1325                         goto release_matches;
1326                 ++j;
1327         }
1328
1329         t = compat_ipt_get_target(e);
1330         target = xt_request_find_target(NFPROTO_IPV4, t->u.user.name,
1331                                         t->u.user.revision);
1332         if (IS_ERR(target)) {
1333                 ret = PTR_ERR(target);
1334                 goto release_matches;
1335         }
1336         t->u.kernel.target = target;
1337
1338         off += xt_compat_target_offset(target);
1339         *size += off;
1340         ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1341         if (ret)
1342                 goto out;
1343
1344         return 0;
1345
1346 out:
1347         module_put(t->u.kernel.target->me);
1348 release_matches:
1349         xt_ematch_foreach(ematch, e) {
1350                 if (j-- == 0)
1351                         break;
1352                 module_put(ematch->u.kernel.match->me);
1353         }
1354         return ret;
1355 }
1356
1357 static void
1358 compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
1359                             unsigned int *size,
1360                             struct xt_table_info *newinfo, unsigned char *base)
1361 {
1362         struct xt_entry_target *t;
1363         struct ipt_entry *de;
1364         unsigned int origsize;
1365         int h;
1366         struct xt_entry_match *ematch;
1367
1368         origsize = *size;
1369         de = *dstptr;
1370         memcpy(de, e, sizeof(struct ipt_entry));
1371         memcpy(&de->counters, &e->counters, sizeof(e->counters));
1372
1373         *dstptr += sizeof(struct ipt_entry);
1374         *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1375
1376         xt_ematch_foreach(ematch, e)
1377                 xt_compat_match_from_user(ematch, dstptr, size);
1378
1379         de->target_offset = e->target_offset - (origsize - *size);
1380         t = compat_ipt_get_target(e);
1381         xt_compat_target_from_user(t, dstptr, size);
1382
1383         de->next_offset = e->next_offset - (origsize - *size);
1384
1385         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1386                 if ((unsigned char *)de - base < newinfo->hook_entry[h])
1387                         newinfo->hook_entry[h] -= origsize - *size;
1388                 if ((unsigned char *)de - base < newinfo->underflow[h])
1389                         newinfo->underflow[h] -= origsize - *size;
1390         }
1391 }
1392
1393 static int
1394 translate_compat_table(struct net *net,
1395                        struct xt_table_info **pinfo,
1396                        void **pentry0,
1397                        const struct compat_ipt_replace *compatr)
1398 {
1399         unsigned int i, j;
1400         struct xt_table_info *newinfo, *info;
1401         void *pos, *entry0, *entry1;
1402         struct compat_ipt_entry *iter0;
1403         struct ipt_replace repl;
1404         unsigned int size;
1405         int ret;
1406
1407         info = *pinfo;
1408         entry0 = *pentry0;
1409         size = compatr->size;
1410         info->number = compatr->num_entries;
1411
1412         j = 0;
1413         xt_compat_lock(AF_INET);
1414         ret = xt_compat_init_offsets(AF_INET, compatr->num_entries);
1415         if (ret)
1416                 goto out_unlock;
1417         /* Walk through entries, checking offsets. */
1418         xt_entry_foreach(iter0, entry0, compatr->size) {
1419                 ret = check_compat_entry_size_and_hooks(iter0, info, &size,
1420                                                         entry0,
1421                                                         entry0 + compatr->size);
1422                 if (ret != 0)
1423                         goto out_unlock;
1424                 ++j;
1425         }
1426
1427         ret = -EINVAL;
1428         if (j != compatr->num_entries)
1429                 goto out_unlock;
1430
1431         ret = -ENOMEM;
1432         newinfo = xt_alloc_table_info(size);
1433         if (!newinfo)
1434                 goto out_unlock;
1435
1436         newinfo->number = compatr->num_entries;
1437         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1438                 newinfo->hook_entry[i] = compatr->hook_entry[i];
1439                 newinfo->underflow[i] = compatr->underflow[i];
1440         }
1441         entry1 = newinfo->entries;
1442         pos = entry1;
1443         size = compatr->size;
1444         xt_entry_foreach(iter0, entry0, compatr->size)
1445                 compat_copy_entry_from_user(iter0, &pos, &size,
1446                                             newinfo, entry1);
1447
1448         /* all module references in entry0 are now gone.
1449          * entry1/newinfo contains a 64bit ruleset that looks exactly as
1450          * generated by 64bit userspace.
1451          *
1452          * Call standard translate_table() to validate all hook_entrys,
1453          * underflows, check for loops, etc.
1454          */
1455         xt_compat_flush_offsets(AF_INET);
1456         xt_compat_unlock(AF_INET);
1457
1458         memcpy(&repl, compatr, sizeof(*compatr));
1459
1460         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1461                 repl.hook_entry[i] = newinfo->hook_entry[i];
1462                 repl.underflow[i] = newinfo->underflow[i];
1463         }
1464
1465         repl.num_counters = 0;
1466         repl.counters = NULL;
1467         repl.size = newinfo->size;
1468         ret = translate_table(net, newinfo, entry1, &repl);
1469         if (ret)
1470                 goto free_newinfo;
1471
1472         *pinfo = newinfo;
1473         *pentry0 = entry1;
1474         xt_free_table_info(info);
1475         return 0;
1476
1477 free_newinfo:
1478         xt_free_table_info(newinfo);
1479         return ret;
1480 out_unlock:
1481         xt_compat_flush_offsets(AF_INET);
1482         xt_compat_unlock(AF_INET);
1483         xt_entry_foreach(iter0, entry0, compatr->size) {
1484                 if (j-- == 0)
1485                         break;
1486                 compat_release_entry(iter0);
1487         }
1488         return ret;
1489 }
1490
1491 static int
1492 compat_do_replace(struct net *net, void __user *user, unsigned int len)
1493 {
1494         int ret;
1495         struct compat_ipt_replace tmp;
1496         struct xt_table_info *newinfo;
1497         void *loc_cpu_entry;
1498         struct ipt_entry *iter;
1499
1500         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1501                 return -EFAULT;
1502
1503         /* overflow check */
1504         if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1505                 return -ENOMEM;
1506         if (tmp.num_counters == 0)
1507                 return -EINVAL;
1508
1509         tmp.name[sizeof(tmp.name)-1] = 0;
1510
1511         newinfo = xt_alloc_table_info(tmp.size);
1512         if (!newinfo)
1513                 return -ENOMEM;
1514
1515         loc_cpu_entry = newinfo->entries;
1516         if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1517                            tmp.size) != 0) {
1518                 ret = -EFAULT;
1519                 goto free_newinfo;
1520         }
1521
1522         ret = translate_compat_table(net, &newinfo, &loc_cpu_entry, &tmp);
1523         if (ret != 0)
1524                 goto free_newinfo;
1525
1526         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1527                            tmp.num_counters, compat_ptr(tmp.counters));
1528         if (ret)
1529                 goto free_newinfo_untrans;
1530         return 0;
1531
1532  free_newinfo_untrans:
1533         xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
1534                 cleanup_entry(iter, net);
1535  free_newinfo:
1536         xt_free_table_info(newinfo);
1537         return ret;
1538 }
1539
1540 static int
1541 compat_do_ipt_set_ctl(struct sock *sk,  int cmd, void __user *user,
1542                       unsigned int len)
1543 {
1544         int ret;
1545
1546         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1547                 return -EPERM;
1548
1549         switch (cmd) {
1550         case IPT_SO_SET_REPLACE:
1551                 ret = compat_do_replace(sock_net(sk), user, len);
1552                 break;
1553
1554         case IPT_SO_SET_ADD_COUNTERS:
1555                 ret = do_add_counters(sock_net(sk), user, len, 1);
1556                 break;
1557
1558         default:
1559                 ret = -EINVAL;
1560         }
1561
1562         return ret;
1563 }
1564
1565 struct compat_ipt_get_entries {
1566         char name[XT_TABLE_MAXNAMELEN];
1567         compat_uint_t size;
1568         struct compat_ipt_entry entrytable[0];
1569 };
1570
1571 static int
1572 compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
1573                             void __user *userptr)
1574 {
1575         struct xt_counters *counters;
1576         const struct xt_table_info *private = table->private;
1577         void __user *pos;
1578         unsigned int size;
1579         int ret = 0;
1580         unsigned int i = 0;
1581         struct ipt_entry *iter;
1582
1583         counters = alloc_counters(table);
1584         if (IS_ERR(counters))
1585                 return PTR_ERR(counters);
1586
1587         pos = userptr;
1588         size = total_size;
1589         xt_entry_foreach(iter, private->entries, total_size) {
1590                 ret = compat_copy_entry_to_user(iter, &pos,
1591                                                 &size, counters, i++);
1592                 if (ret != 0)
1593                         break;
1594         }
1595
1596         vfree(counters);
1597         return ret;
1598 }
1599
1600 static int
1601 compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
1602                    int *len)
1603 {
1604         int ret;
1605         struct compat_ipt_get_entries get;
1606         struct xt_table *t;
1607
1608         if (*len < sizeof(get))
1609                 return -EINVAL;
1610
1611         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1612                 return -EFAULT;
1613
1614         if (*len != sizeof(struct compat_ipt_get_entries) + get.size)
1615                 return -EINVAL;
1616
1617         get.name[sizeof(get.name) - 1] = '\0';
1618
1619         xt_compat_lock(AF_INET);
1620         t = xt_find_table_lock(net, AF_INET, get.name);
1621         if (!IS_ERR(t)) {
1622                 const struct xt_table_info *private = t->private;
1623                 struct xt_table_info info;
1624                 ret = compat_table_info(private, &info);
1625                 if (!ret && get.size == info.size)
1626                         ret = compat_copy_entries_to_user(private->size,
1627                                                           t, uptr->entrytable);
1628                 else if (!ret)
1629                         ret = -EAGAIN;
1630
1631                 xt_compat_flush_offsets(AF_INET);
1632                 module_put(t->me);
1633                 xt_table_unlock(t);
1634         } else
1635                 ret = PTR_ERR(t);
1636
1637         xt_compat_unlock(AF_INET);
1638         return ret;
1639 }
1640
1641 static int do_ipt_get_ctl(struct sock *, int, void __user *, int *);
1642
1643 static int
1644 compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1645 {
1646         int ret;
1647
1648         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1649                 return -EPERM;
1650
1651         switch (cmd) {
1652         case IPT_SO_GET_INFO:
1653                 ret = get_info(sock_net(sk), user, len, 1);
1654                 break;
1655         case IPT_SO_GET_ENTRIES:
1656                 ret = compat_get_entries(sock_net(sk), user, len);
1657                 break;
1658         default:
1659                 ret = do_ipt_get_ctl(sk, cmd, user, len);
1660         }
1661         return ret;
1662 }
1663 #endif
1664
1665 static int
1666 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1667 {
1668         int ret;
1669
1670         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1671                 return -EPERM;
1672
1673         switch (cmd) {
1674         case IPT_SO_SET_REPLACE:
1675                 ret = do_replace(sock_net(sk), user, len);
1676                 break;
1677
1678         case IPT_SO_SET_ADD_COUNTERS:
1679                 ret = do_add_counters(sock_net(sk), user, len, 0);
1680                 break;
1681
1682         default:
1683                 ret = -EINVAL;
1684         }
1685
1686         return ret;
1687 }
1688
1689 static int
1690 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1691 {
1692         int ret;
1693
1694         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
1695                 return -EPERM;
1696
1697         switch (cmd) {
1698         case IPT_SO_GET_INFO:
1699                 ret = get_info(sock_net(sk), user, len, 0);
1700                 break;
1701
1702         case IPT_SO_GET_ENTRIES:
1703                 ret = get_entries(sock_net(sk), user, len);
1704                 break;
1705
1706         case IPT_SO_GET_REVISION_MATCH:
1707         case IPT_SO_GET_REVISION_TARGET: {
1708                 struct xt_get_revision rev;
1709                 int target;
1710
1711                 if (*len != sizeof(rev)) {
1712                         ret = -EINVAL;
1713                         break;
1714                 }
1715                 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
1716                         ret = -EFAULT;
1717                         break;
1718                 }
1719                 rev.name[sizeof(rev.name)-1] = 0;
1720
1721                 if (cmd == IPT_SO_GET_REVISION_TARGET)
1722                         target = 1;
1723                 else
1724                         target = 0;
1725
1726                 try_then_request_module(xt_find_revision(AF_INET, rev.name,
1727                                                          rev.revision,
1728                                                          target, &ret),
1729                                         "ipt_%s", rev.name);
1730                 break;
1731         }
1732
1733         default:
1734                 ret = -EINVAL;
1735         }
1736
1737         return ret;
1738 }
1739
1740 static void __ipt_unregister_table(struct net *net, struct xt_table *table)
1741 {
1742         struct xt_table_info *private;
1743         void *loc_cpu_entry;
1744         struct module *table_owner = table->me;
1745         struct ipt_entry *iter;
1746
1747         private = xt_unregister_table(table);
1748
1749         /* Decrease module usage counts and free resources */
1750         loc_cpu_entry = private->entries;
1751         xt_entry_foreach(iter, loc_cpu_entry, private->size)
1752                 cleanup_entry(iter, net);
1753         if (private->number > private->initial_entries)
1754                 module_put(table_owner);
1755         xt_free_table_info(private);
1756 }
1757
1758 int ipt_register_table(struct net *net, const struct xt_table *table,
1759                        const struct ipt_replace *repl,
1760                        const struct nf_hook_ops *ops, struct xt_table **res)
1761 {
1762         int ret;
1763         struct xt_table_info *newinfo;
1764         struct xt_table_info bootstrap = {0};
1765         void *loc_cpu_entry;
1766         struct xt_table *new_table;
1767
1768         newinfo = xt_alloc_table_info(repl->size);
1769         if (!newinfo)
1770                 return -ENOMEM;
1771
1772         loc_cpu_entry = newinfo->entries;
1773         memcpy(loc_cpu_entry, repl->entries, repl->size);
1774
1775         ret = translate_table(net, newinfo, loc_cpu_entry, repl);
1776         if (ret != 0)
1777                 goto out_free;
1778
1779         new_table = xt_register_table(net, table, &bootstrap, newinfo);
1780         if (IS_ERR(new_table)) {
1781                 ret = PTR_ERR(new_table);
1782                 goto out_free;
1783         }
1784
1785         /* set res now, will see skbs right after nf_register_net_hooks */
1786         WRITE_ONCE(*res, new_table);
1787         if (!ops)
1788                 return 0;
1789
1790         ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks));
1791         if (ret != 0) {
1792                 __ipt_unregister_table(net, new_table);
1793                 *res = NULL;
1794         }
1795
1796         return ret;
1797
1798 out_free:
1799         xt_free_table_info(newinfo);
1800         return ret;
1801 }
1802
1803 void ipt_unregister_table(struct net *net, struct xt_table *table,
1804                           const struct nf_hook_ops *ops)
1805 {
1806         if (ops)
1807                 nf_unregister_net_hooks(net, ops, hweight32(table->valid_hooks));
1808         __ipt_unregister_table(net, table);
1809 }
1810
1811 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1812 static inline bool
1813 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1814                      u_int8_t type, u_int8_t code,
1815                      bool invert)
1816 {
1817         return ((test_type == 0xFF) ||
1818                 (type == test_type && code >= min_code && code <= max_code))
1819                 ^ invert;
1820 }
1821
1822 static bool
1823 icmp_match(const struct sk_buff *skb, struct xt_action_param *par)
1824 {
1825         const struct icmphdr *ic;
1826         struct icmphdr _icmph;
1827         const struct ipt_icmp *icmpinfo = par->matchinfo;
1828
1829         /* Must not be a fragment. */
1830         if (par->fragoff != 0)
1831                 return false;
1832
1833         ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph);
1834         if (ic == NULL) {
1835                 /* We've been asked to examine this packet, and we
1836                  * can't.  Hence, no choice but to drop.
1837                  */
1838                 par->hotdrop = true;
1839                 return false;
1840         }
1841
1842         return icmp_type_code_match(icmpinfo->type,
1843                                     icmpinfo->code[0],
1844                                     icmpinfo->code[1],
1845                                     ic->type, ic->code,
1846                                     !!(icmpinfo->invflags&IPT_ICMP_INV));
1847 }
1848
1849 static int icmp_checkentry(const struct xt_mtchk_param *par)
1850 {
1851         const struct ipt_icmp *icmpinfo = par->matchinfo;
1852
1853         /* Must specify no unknown invflags */
1854         return (icmpinfo->invflags & ~IPT_ICMP_INV) ? -EINVAL : 0;
1855 }
1856
1857 static struct xt_target ipt_builtin_tg[] __read_mostly = {
1858         {
1859                 .name             = XT_STANDARD_TARGET,
1860                 .targetsize       = sizeof(int),
1861                 .family           = NFPROTO_IPV4,
1862 #ifdef CONFIG_COMPAT
1863                 .compatsize       = sizeof(compat_int_t),
1864                 .compat_from_user = compat_standard_from_user,
1865                 .compat_to_user   = compat_standard_to_user,
1866 #endif
1867         },
1868         {
1869                 .name             = XT_ERROR_TARGET,
1870                 .target           = ipt_error,
1871                 .targetsize       = XT_FUNCTION_MAXNAMELEN,
1872                 .family           = NFPROTO_IPV4,
1873         },
1874 };
1875
1876 static struct nf_sockopt_ops ipt_sockopts = {
1877         .pf             = PF_INET,
1878         .set_optmin     = IPT_BASE_CTL,
1879         .set_optmax     = IPT_SO_SET_MAX+1,
1880         .set            = do_ipt_set_ctl,
1881 #ifdef CONFIG_COMPAT
1882         .compat_set     = compat_do_ipt_set_ctl,
1883 #endif
1884         .get_optmin     = IPT_BASE_CTL,
1885         .get_optmax     = IPT_SO_GET_MAX+1,
1886         .get            = do_ipt_get_ctl,
1887 #ifdef CONFIG_COMPAT
1888         .compat_get     = compat_do_ipt_get_ctl,
1889 #endif
1890         .owner          = THIS_MODULE,
1891 };
1892
1893 static struct xt_match ipt_builtin_mt[] __read_mostly = {
1894         {
1895                 .name       = "icmp",
1896                 .match      = icmp_match,
1897                 .matchsize  = sizeof(struct ipt_icmp),
1898                 .checkentry = icmp_checkentry,
1899                 .proto      = IPPROTO_ICMP,
1900                 .family     = NFPROTO_IPV4,
1901         },
1902 };
1903
1904 static int __net_init ip_tables_net_init(struct net *net)
1905 {
1906         return xt_proto_init(net, NFPROTO_IPV4);
1907 }
1908
1909 static void __net_exit ip_tables_net_exit(struct net *net)
1910 {
1911         xt_proto_fini(net, NFPROTO_IPV4);
1912 }
1913
1914 static struct pernet_operations ip_tables_net_ops = {
1915         .init = ip_tables_net_init,
1916         .exit = ip_tables_net_exit,
1917 };
1918
1919 static int __init ip_tables_init(void)
1920 {
1921         int ret;
1922
1923         ret = register_pernet_subsys(&ip_tables_net_ops);
1924         if (ret < 0)
1925                 goto err1;
1926
1927         /* No one else will be downing sem now, so we won't sleep */
1928         ret = xt_register_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
1929         if (ret < 0)
1930                 goto err2;
1931         ret = xt_register_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
1932         if (ret < 0)
1933                 goto err4;
1934
1935         /* Register setsockopt */
1936         ret = nf_register_sockopt(&ipt_sockopts);
1937         if (ret < 0)
1938                 goto err5;
1939
1940         return 0;
1941
1942 err5:
1943         xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
1944 err4:
1945         xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
1946 err2:
1947         unregister_pernet_subsys(&ip_tables_net_ops);
1948 err1:
1949         return ret;
1950 }
1951
1952 static void __exit ip_tables_fini(void)
1953 {
1954         nf_unregister_sockopt(&ipt_sockopts);
1955
1956         xt_unregister_matches(ipt_builtin_mt, ARRAY_SIZE(ipt_builtin_mt));
1957         xt_unregister_targets(ipt_builtin_tg, ARRAY_SIZE(ipt_builtin_tg));
1958         unregister_pernet_subsys(&ip_tables_net_ops);
1959 }
1960
1961 EXPORT_SYMBOL(ipt_register_table);
1962 EXPORT_SYMBOL(ipt_unregister_table);
1963 EXPORT_SYMBOL(ipt_do_table);
1964 module_init(ip_tables_init);
1965 module_exit(ip_tables_fini);