powerpc/pseries/vas: use default_groups in kobj_type
[linux-2.6-microblaze.git] / net / openvswitch / datapath.c
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Copyright (c) 2007-2014 Nicira, Inc.
4  */
5
6 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
7
8 #include <linux/init.h>
9 #include <linux/module.h>
10 #include <linux/if_arp.h>
11 #include <linux/if_vlan.h>
12 #include <linux/in.h>
13 #include <linux/ip.h>
14 #include <linux/jhash.h>
15 #include <linux/delay.h>
16 #include <linux/time.h>
17 #include <linux/etherdevice.h>
18 #include <linux/genetlink.h>
19 #include <linux/kernel.h>
20 #include <linux/kthread.h>
21 #include <linux/mutex.h>
22 #include <linux/percpu.h>
23 #include <linux/rcupdate.h>
24 #include <linux/tcp.h>
25 #include <linux/udp.h>
26 #include <linux/ethtool.h>
27 #include <linux/wait.h>
28 #include <asm/div64.h>
29 #include <linux/highmem.h>
30 #include <linux/netfilter_bridge.h>
31 #include <linux/netfilter_ipv4.h>
32 #include <linux/inetdevice.h>
33 #include <linux/list.h>
34 #include <linux/openvswitch.h>
35 #include <linux/rculist.h>
36 #include <linux/dmi.h>
37 #include <net/genetlink.h>
38 #include <net/net_namespace.h>
39 #include <net/netns/generic.h>
40 #include <net/pkt_cls.h>
41
42 #include "datapath.h"
43 #include "flow.h"
44 #include "flow_table.h"
45 #include "flow_netlink.h"
46 #include "meter.h"
47 #include "openvswitch_trace.h"
48 #include "vport-internal_dev.h"
49 #include "vport-netdev.h"
50
51 unsigned int ovs_net_id __read_mostly;
52
53 static struct genl_family dp_packet_genl_family;
54 static struct genl_family dp_flow_genl_family;
55 static struct genl_family dp_datapath_genl_family;
56
57 static const struct nla_policy flow_policy[];
58
59 static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
60         .name = OVS_FLOW_MCGROUP,
61 };
62
63 static const struct genl_multicast_group ovs_dp_datapath_multicast_group = {
64         .name = OVS_DATAPATH_MCGROUP,
65 };
66
67 static const struct genl_multicast_group ovs_dp_vport_multicast_group = {
68         .name = OVS_VPORT_MCGROUP,
69 };
70
71 /* Check if need to build a reply message.
72  * OVS userspace sets the NLM_F_ECHO flag if it needs the reply. */
73 static bool ovs_must_notify(struct genl_family *family, struct genl_info *info,
74                             unsigned int group)
75 {
76         return info->nlhdr->nlmsg_flags & NLM_F_ECHO ||
77                genl_has_listeners(family, genl_info_net(info), group);
78 }
79
80 static void ovs_notify(struct genl_family *family,
81                        struct sk_buff *skb, struct genl_info *info)
82 {
83         genl_notify(family, skb, info, 0, GFP_KERNEL);
84 }
85
86 /**
87  * DOC: Locking:
88  *
89  * All writes e.g. Writes to device state (add/remove datapath, port, set
90  * operations on vports, etc.), Writes to other state (flow table
91  * modifications, set miscellaneous datapath parameters, etc.) are protected
92  * by ovs_lock.
93  *
94  * Reads are protected by RCU.
95  *
96  * There are a few special cases (mostly stats) that have their own
97  * synchronization but they nest under all of above and don't interact with
98  * each other.
99  *
100  * The RTNL lock nests inside ovs_mutex.
101  */
102
103 static DEFINE_MUTEX(ovs_mutex);
104
105 void ovs_lock(void)
106 {
107         mutex_lock(&ovs_mutex);
108 }
109
110 void ovs_unlock(void)
111 {
112         mutex_unlock(&ovs_mutex);
113 }
114
115 #ifdef CONFIG_LOCKDEP
116 int lockdep_ovsl_is_held(void)
117 {
118         if (debug_locks)
119                 return lockdep_is_held(&ovs_mutex);
120         else
121                 return 1;
122 }
123 #endif
124
125 static struct vport *new_vport(const struct vport_parms *);
126 static int queue_gso_packets(struct datapath *dp, struct sk_buff *,
127                              const struct sw_flow_key *,
128                              const struct dp_upcall_info *,
129                              uint32_t cutlen);
130 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *,
131                                   const struct sw_flow_key *,
132                                   const struct dp_upcall_info *,
133                                   uint32_t cutlen);
134
135 static void ovs_dp_masks_rebalance(struct work_struct *work);
136
137 static int ovs_dp_set_upcall_portids(struct datapath *, const struct nlattr *);
138
139 /* Must be called with rcu_read_lock or ovs_mutex. */
140 const char *ovs_dp_name(const struct datapath *dp)
141 {
142         struct vport *vport = ovs_vport_ovsl_rcu(dp, OVSP_LOCAL);
143         return ovs_vport_name(vport);
144 }
145
146 static int get_dpifindex(const struct datapath *dp)
147 {
148         struct vport *local;
149         int ifindex;
150
151         rcu_read_lock();
152
153         local = ovs_vport_rcu(dp, OVSP_LOCAL);
154         if (local)
155                 ifindex = local->dev->ifindex;
156         else
157                 ifindex = 0;
158
159         rcu_read_unlock();
160
161         return ifindex;
162 }
163
164 static void destroy_dp_rcu(struct rcu_head *rcu)
165 {
166         struct datapath *dp = container_of(rcu, struct datapath, rcu);
167
168         ovs_flow_tbl_destroy(&dp->table);
169         free_percpu(dp->stats_percpu);
170         kfree(dp->ports);
171         ovs_meters_exit(dp);
172         kfree(rcu_dereference_raw(dp->upcall_portids));
173         kfree(dp);
174 }
175
176 static struct hlist_head *vport_hash_bucket(const struct datapath *dp,
177                                             u16 port_no)
178 {
179         return &dp->ports[port_no & (DP_VPORT_HASH_BUCKETS - 1)];
180 }
181
182 /* Called with ovs_mutex or RCU read lock. */
183 struct vport *ovs_lookup_vport(const struct datapath *dp, u16 port_no)
184 {
185         struct vport *vport;
186         struct hlist_head *head;
187
188         head = vport_hash_bucket(dp, port_no);
189         hlist_for_each_entry_rcu(vport, head, dp_hash_node,
190                                  lockdep_ovsl_is_held()) {
191                 if (vport->port_no == port_no)
192                         return vport;
193         }
194         return NULL;
195 }
196
197 /* Called with ovs_mutex. */
198 static struct vport *new_vport(const struct vport_parms *parms)
199 {
200         struct vport *vport;
201
202         vport = ovs_vport_add(parms);
203         if (!IS_ERR(vport)) {
204                 struct datapath *dp = parms->dp;
205                 struct hlist_head *head = vport_hash_bucket(dp, vport->port_no);
206
207                 hlist_add_head_rcu(&vport->dp_hash_node, head);
208         }
209         return vport;
210 }
211
212 void ovs_dp_detach_port(struct vport *p)
213 {
214         ASSERT_OVSL();
215
216         /* First drop references to device. */
217         hlist_del_rcu(&p->dp_hash_node);
218
219         /* Then destroy it. */
220         ovs_vport_del(p);
221 }
222
223 /* Must be called with rcu_read_lock. */
224 void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key)
225 {
226         const struct vport *p = OVS_CB(skb)->input_vport;
227         struct datapath *dp = p->dp;
228         struct sw_flow *flow;
229         struct sw_flow_actions *sf_acts;
230         struct dp_stats_percpu *stats;
231         u64 *stats_counter;
232         u32 n_mask_hit;
233         u32 n_cache_hit;
234         int error;
235
236         stats = this_cpu_ptr(dp->stats_percpu);
237
238         /* Look up flow. */
239         flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
240                                          &n_mask_hit, &n_cache_hit);
241         if (unlikely(!flow)) {
242                 struct dp_upcall_info upcall;
243
244                 memset(&upcall, 0, sizeof(upcall));
245                 upcall.cmd = OVS_PACKET_CMD_MISS;
246
247                 if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU)
248                         upcall.portid =
249                             ovs_dp_get_upcall_portid(dp, smp_processor_id());
250                 else
251                         upcall.portid = ovs_vport_find_upcall_portid(p, skb);
252
253                 upcall.mru = OVS_CB(skb)->mru;
254                 error = ovs_dp_upcall(dp, skb, key, &upcall, 0);
255                 if (unlikely(error))
256                         kfree_skb(skb);
257                 else
258                         consume_skb(skb);
259                 stats_counter = &stats->n_missed;
260                 goto out;
261         }
262
263         ovs_flow_stats_update(flow, key->tp.flags, skb);
264         sf_acts = rcu_dereference(flow->sf_acts);
265         error = ovs_execute_actions(dp, skb, sf_acts, key);
266         if (unlikely(error))
267                 net_dbg_ratelimited("ovs: action execution error on datapath %s: %d\n",
268                                     ovs_dp_name(dp), error);
269
270         stats_counter = &stats->n_hit;
271
272 out:
273         /* Update datapath statistics. */
274         u64_stats_update_begin(&stats->syncp);
275         (*stats_counter)++;
276         stats->n_mask_hit += n_mask_hit;
277         stats->n_cache_hit += n_cache_hit;
278         u64_stats_update_end(&stats->syncp);
279 }
280
281 int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb,
282                   const struct sw_flow_key *key,
283                   const struct dp_upcall_info *upcall_info,
284                   uint32_t cutlen)
285 {
286         struct dp_stats_percpu *stats;
287         int err;
288
289         if (trace_ovs_dp_upcall_enabled())
290                 trace_ovs_dp_upcall(dp, skb, key, upcall_info);
291
292         if (upcall_info->portid == 0) {
293                 err = -ENOTCONN;
294                 goto err;
295         }
296
297         if (!skb_is_gso(skb))
298                 err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
299         else
300                 err = queue_gso_packets(dp, skb, key, upcall_info, cutlen);
301         if (err)
302                 goto err;
303
304         return 0;
305
306 err:
307         stats = this_cpu_ptr(dp->stats_percpu);
308
309         u64_stats_update_begin(&stats->syncp);
310         stats->n_lost++;
311         u64_stats_update_end(&stats->syncp);
312
313         return err;
314 }
315
316 static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
317                              const struct sw_flow_key *key,
318                              const struct dp_upcall_info *upcall_info,
319                              uint32_t cutlen)
320 {
321         unsigned int gso_type = skb_shinfo(skb)->gso_type;
322         struct sw_flow_key later_key;
323         struct sk_buff *segs, *nskb;
324         int err;
325
326         BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_GSO_CB_OFFSET);
327         segs = __skb_gso_segment(skb, NETIF_F_SG, false);
328         if (IS_ERR(segs))
329                 return PTR_ERR(segs);
330         if (segs == NULL)
331                 return -EINVAL;
332
333         if (gso_type & SKB_GSO_UDP) {
334                 /* The initial flow key extracted by ovs_flow_key_extract()
335                  * in this case is for a first fragment, so we need to
336                  * properly mark later fragments.
337                  */
338                 later_key = *key;
339                 later_key.ip.frag = OVS_FRAG_TYPE_LATER;
340         }
341
342         /* Queue all of the segments. */
343         skb_list_walk_safe(segs, skb, nskb) {
344                 if (gso_type & SKB_GSO_UDP && skb != segs)
345                         key = &later_key;
346
347                 err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen);
348                 if (err)
349                         break;
350
351         }
352
353         /* Free all of the segments. */
354         skb_list_walk_safe(segs, skb, nskb) {
355                 if (err)
356                         kfree_skb(skb);
357                 else
358                         consume_skb(skb);
359         }
360         return err;
361 }
362
363 static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
364                               unsigned int hdrlen, int actions_attrlen)
365 {
366         size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
367                 + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
368                 + nla_total_size(ovs_key_attr_size()) /* OVS_PACKET_ATTR_KEY */
369                 + nla_total_size(sizeof(unsigned int)) /* OVS_PACKET_ATTR_LEN */
370                 + nla_total_size(sizeof(u64)); /* OVS_PACKET_ATTR_HASH */
371
372         /* OVS_PACKET_ATTR_USERDATA */
373         if (upcall_info->userdata)
374                 size += NLA_ALIGN(upcall_info->userdata->nla_len);
375
376         /* OVS_PACKET_ATTR_EGRESS_TUN_KEY */
377         if (upcall_info->egress_tun_info)
378                 size += nla_total_size(ovs_tun_key_attr_size());
379
380         /* OVS_PACKET_ATTR_ACTIONS */
381         if (upcall_info->actions_len)
382                 size += nla_total_size(actions_attrlen);
383
384         /* OVS_PACKET_ATTR_MRU */
385         if (upcall_info->mru)
386                 size += nla_total_size(sizeof(upcall_info->mru));
387
388         return size;
389 }
390
391 static void pad_packet(struct datapath *dp, struct sk_buff *skb)
392 {
393         if (!(dp->user_features & OVS_DP_F_UNALIGNED)) {
394                 size_t plen = NLA_ALIGN(skb->len) - skb->len;
395
396                 if (plen > 0)
397                         skb_put_zero(skb, plen);
398         }
399 }
400
401 static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
402                                   const struct sw_flow_key *key,
403                                   const struct dp_upcall_info *upcall_info,
404                                   uint32_t cutlen)
405 {
406         struct ovs_header *upcall;
407         struct sk_buff *nskb = NULL;
408         struct sk_buff *user_skb = NULL; /* to be queued to userspace */
409         struct nlattr *nla;
410         size_t len;
411         unsigned int hlen;
412         int err, dp_ifindex;
413         u64 hash;
414
415         dp_ifindex = get_dpifindex(dp);
416         if (!dp_ifindex)
417                 return -ENODEV;
418
419         if (skb_vlan_tag_present(skb)) {
420                 nskb = skb_clone(skb, GFP_ATOMIC);
421                 if (!nskb)
422                         return -ENOMEM;
423
424                 nskb = __vlan_hwaccel_push_inside(nskb);
425                 if (!nskb)
426                         return -ENOMEM;
427
428                 skb = nskb;
429         }
430
431         if (nla_attr_size(skb->len) > USHRT_MAX) {
432                 err = -EFBIG;
433                 goto out;
434         }
435
436         /* Complete checksum if needed */
437         if (skb->ip_summed == CHECKSUM_PARTIAL &&
438             (err = skb_csum_hwoffload_help(skb, 0)))
439                 goto out;
440
441         /* Older versions of OVS user space enforce alignment of the last
442          * Netlink attribute to NLA_ALIGNTO which would require extensive
443          * padding logic. Only perform zerocopy if padding is not required.
444          */
445         if (dp->user_features & OVS_DP_F_UNALIGNED)
446                 hlen = skb_zerocopy_headlen(skb);
447         else
448                 hlen = skb->len;
449
450         len = upcall_msg_size(upcall_info, hlen - cutlen,
451                               OVS_CB(skb)->acts_origlen);
452         user_skb = genlmsg_new(len, GFP_ATOMIC);
453         if (!user_skb) {
454                 err = -ENOMEM;
455                 goto out;
456         }
457
458         upcall = genlmsg_put(user_skb, 0, 0, &dp_packet_genl_family,
459                              0, upcall_info->cmd);
460         if (!upcall) {
461                 err = -EINVAL;
462                 goto out;
463         }
464         upcall->dp_ifindex = dp_ifindex;
465
466         err = ovs_nla_put_key(key, key, OVS_PACKET_ATTR_KEY, false, user_skb);
467         if (err)
468                 goto out;
469
470         if (upcall_info->userdata)
471                 __nla_put(user_skb, OVS_PACKET_ATTR_USERDATA,
472                           nla_len(upcall_info->userdata),
473                           nla_data(upcall_info->userdata));
474
475         if (upcall_info->egress_tun_info) {
476                 nla = nla_nest_start_noflag(user_skb,
477                                             OVS_PACKET_ATTR_EGRESS_TUN_KEY);
478                 if (!nla) {
479                         err = -EMSGSIZE;
480                         goto out;
481                 }
482                 err = ovs_nla_put_tunnel_info(user_skb,
483                                               upcall_info->egress_tun_info);
484                 if (err)
485                         goto out;
486
487                 nla_nest_end(user_skb, nla);
488         }
489
490         if (upcall_info->actions_len) {
491                 nla = nla_nest_start_noflag(user_skb, OVS_PACKET_ATTR_ACTIONS);
492                 if (!nla) {
493                         err = -EMSGSIZE;
494                         goto out;
495                 }
496                 err = ovs_nla_put_actions(upcall_info->actions,
497                                           upcall_info->actions_len,
498                                           user_skb);
499                 if (!err)
500                         nla_nest_end(user_skb, nla);
501                 else
502                         nla_nest_cancel(user_skb, nla);
503         }
504
505         /* Add OVS_PACKET_ATTR_MRU */
506         if (upcall_info->mru &&
507             nla_put_u16(user_skb, OVS_PACKET_ATTR_MRU, upcall_info->mru)) {
508                 err = -ENOBUFS;
509                 goto out;
510         }
511
512         /* Add OVS_PACKET_ATTR_LEN when packet is truncated */
513         if (cutlen > 0 &&
514             nla_put_u32(user_skb, OVS_PACKET_ATTR_LEN, skb->len)) {
515                 err = -ENOBUFS;
516                 goto out;
517         }
518
519         /* Add OVS_PACKET_ATTR_HASH */
520         hash = skb_get_hash_raw(skb);
521         if (skb->sw_hash)
522                 hash |= OVS_PACKET_HASH_SW_BIT;
523
524         if (skb->l4_hash)
525                 hash |= OVS_PACKET_HASH_L4_BIT;
526
527         if (nla_put(user_skb, OVS_PACKET_ATTR_HASH, sizeof (u64), &hash)) {
528                 err = -ENOBUFS;
529                 goto out;
530         }
531
532         /* Only reserve room for attribute header, packet data is added
533          * in skb_zerocopy() */
534         if (!(nla = nla_reserve(user_skb, OVS_PACKET_ATTR_PACKET, 0))) {
535                 err = -ENOBUFS;
536                 goto out;
537         }
538         nla->nla_len = nla_attr_size(skb->len - cutlen);
539
540         err = skb_zerocopy(user_skb, skb, skb->len - cutlen, hlen);
541         if (err)
542                 goto out;
543
544         /* Pad OVS_PACKET_ATTR_PACKET if linear copy was performed */
545         pad_packet(dp, user_skb);
546
547         ((struct nlmsghdr *) user_skb->data)->nlmsg_len = user_skb->len;
548
549         err = genlmsg_unicast(ovs_dp_get_net(dp), user_skb, upcall_info->portid);
550         user_skb = NULL;
551 out:
552         if (err)
553                 skb_tx_error(skb);
554         kfree_skb(user_skb);
555         kfree_skb(nskb);
556         return err;
557 }
558
559 static int ovs_packet_cmd_execute(struct sk_buff *skb, struct genl_info *info)
560 {
561         struct ovs_header *ovs_header = info->userhdr;
562         struct net *net = sock_net(skb->sk);
563         struct nlattr **a = info->attrs;
564         struct sw_flow_actions *acts;
565         struct sk_buff *packet;
566         struct sw_flow *flow;
567         struct sw_flow_actions *sf_acts;
568         struct datapath *dp;
569         struct vport *input_vport;
570         u16 mru = 0;
571         u64 hash;
572         int len;
573         int err;
574         bool log = !a[OVS_PACKET_ATTR_PROBE];
575
576         err = -EINVAL;
577         if (!a[OVS_PACKET_ATTR_PACKET] || !a[OVS_PACKET_ATTR_KEY] ||
578             !a[OVS_PACKET_ATTR_ACTIONS])
579                 goto err;
580
581         len = nla_len(a[OVS_PACKET_ATTR_PACKET]);
582         packet = __dev_alloc_skb(NET_IP_ALIGN + len, GFP_KERNEL);
583         err = -ENOMEM;
584         if (!packet)
585                 goto err;
586         skb_reserve(packet, NET_IP_ALIGN);
587
588         nla_memcpy(__skb_put(packet, len), a[OVS_PACKET_ATTR_PACKET], len);
589
590         /* Set packet's mru */
591         if (a[OVS_PACKET_ATTR_MRU]) {
592                 mru = nla_get_u16(a[OVS_PACKET_ATTR_MRU]);
593                 packet->ignore_df = 1;
594         }
595         OVS_CB(packet)->mru = mru;
596
597         if (a[OVS_PACKET_ATTR_HASH]) {
598                 hash = nla_get_u64(a[OVS_PACKET_ATTR_HASH]);
599
600                 __skb_set_hash(packet, hash & 0xFFFFFFFFULL,
601                                !!(hash & OVS_PACKET_HASH_SW_BIT),
602                                !!(hash & OVS_PACKET_HASH_L4_BIT));
603         }
604
605         /* Build an sw_flow for sending this packet. */
606         flow = ovs_flow_alloc();
607         err = PTR_ERR(flow);
608         if (IS_ERR(flow))
609                 goto err_kfree_skb;
610
611         err = ovs_flow_key_extract_userspace(net, a[OVS_PACKET_ATTR_KEY],
612                                              packet, &flow->key, log);
613         if (err)
614                 goto err_flow_free;
615
616         err = ovs_nla_copy_actions(net, a[OVS_PACKET_ATTR_ACTIONS],
617                                    &flow->key, &acts, log);
618         if (err)
619                 goto err_flow_free;
620
621         rcu_assign_pointer(flow->sf_acts, acts);
622         packet->priority = flow->key.phy.priority;
623         packet->mark = flow->key.phy.skb_mark;
624
625         rcu_read_lock();
626         dp = get_dp_rcu(net, ovs_header->dp_ifindex);
627         err = -ENODEV;
628         if (!dp)
629                 goto err_unlock;
630
631         input_vport = ovs_vport_rcu(dp, flow->key.phy.in_port);
632         if (!input_vport)
633                 input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
634
635         if (!input_vport)
636                 goto err_unlock;
637
638         packet->dev = input_vport->dev;
639         OVS_CB(packet)->input_vport = input_vport;
640         sf_acts = rcu_dereference(flow->sf_acts);
641
642         local_bh_disable();
643         err = ovs_execute_actions(dp, packet, sf_acts, &flow->key);
644         local_bh_enable();
645         rcu_read_unlock();
646
647         ovs_flow_free(flow, false);
648         return err;
649
650 err_unlock:
651         rcu_read_unlock();
652 err_flow_free:
653         ovs_flow_free(flow, false);
654 err_kfree_skb:
655         kfree_skb(packet);
656 err:
657         return err;
658 }
659
660 static const struct nla_policy packet_policy[OVS_PACKET_ATTR_MAX + 1] = {
661         [OVS_PACKET_ATTR_PACKET] = { .len = ETH_HLEN },
662         [OVS_PACKET_ATTR_KEY] = { .type = NLA_NESTED },
663         [OVS_PACKET_ATTR_ACTIONS] = { .type = NLA_NESTED },
664         [OVS_PACKET_ATTR_PROBE] = { .type = NLA_FLAG },
665         [OVS_PACKET_ATTR_MRU] = { .type = NLA_U16 },
666         [OVS_PACKET_ATTR_HASH] = { .type = NLA_U64 },
667 };
668
669 static const struct genl_small_ops dp_packet_genl_ops[] = {
670         { .cmd = OVS_PACKET_CMD_EXECUTE,
671           .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
672           .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
673           .doit = ovs_packet_cmd_execute
674         }
675 };
676
677 static struct genl_family dp_packet_genl_family __ro_after_init = {
678         .hdrsize = sizeof(struct ovs_header),
679         .name = OVS_PACKET_FAMILY,
680         .version = OVS_PACKET_VERSION,
681         .maxattr = OVS_PACKET_ATTR_MAX,
682         .policy = packet_policy,
683         .netnsok = true,
684         .parallel_ops = true,
685         .small_ops = dp_packet_genl_ops,
686         .n_small_ops = ARRAY_SIZE(dp_packet_genl_ops),
687         .module = THIS_MODULE,
688 };
689
690 static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
691                          struct ovs_dp_megaflow_stats *mega_stats)
692 {
693         int i;
694
695         memset(mega_stats, 0, sizeof(*mega_stats));
696
697         stats->n_flows = ovs_flow_tbl_count(&dp->table);
698         mega_stats->n_masks = ovs_flow_tbl_num_masks(&dp->table);
699
700         stats->n_hit = stats->n_missed = stats->n_lost = 0;
701
702         for_each_possible_cpu(i) {
703                 const struct dp_stats_percpu *percpu_stats;
704                 struct dp_stats_percpu local_stats;
705                 unsigned int start;
706
707                 percpu_stats = per_cpu_ptr(dp->stats_percpu, i);
708
709                 do {
710                         start = u64_stats_fetch_begin_irq(&percpu_stats->syncp);
711                         local_stats = *percpu_stats;
712                 } while (u64_stats_fetch_retry_irq(&percpu_stats->syncp, start));
713
714                 stats->n_hit += local_stats.n_hit;
715                 stats->n_missed += local_stats.n_missed;
716                 stats->n_lost += local_stats.n_lost;
717                 mega_stats->n_mask_hit += local_stats.n_mask_hit;
718                 mega_stats->n_cache_hit += local_stats.n_cache_hit;
719         }
720 }
721
722 static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
723 {
724         return ovs_identifier_is_ufid(sfid) &&
725                !(ufid_flags & OVS_UFID_F_OMIT_KEY);
726 }
727
728 static bool should_fill_mask(uint32_t ufid_flags)
729 {
730         return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
731 }
732
733 static bool should_fill_actions(uint32_t ufid_flags)
734 {
735         return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
736 }
737
738 static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
739                                     const struct sw_flow_id *sfid,
740                                     uint32_t ufid_flags)
741 {
742         size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
743
744         /* OVS_FLOW_ATTR_UFID, or unmasked flow key as fallback
745          * see ovs_nla_put_identifier()
746          */
747         if (sfid && ovs_identifier_is_ufid(sfid))
748                 len += nla_total_size(sfid->ufid_len);
749         else
750                 len += nla_total_size(ovs_key_attr_size());
751
752         /* OVS_FLOW_ATTR_KEY */
753         if (!sfid || should_fill_key(sfid, ufid_flags))
754                 len += nla_total_size(ovs_key_attr_size());
755
756         /* OVS_FLOW_ATTR_MASK */
757         if (should_fill_mask(ufid_flags))
758                 len += nla_total_size(ovs_key_attr_size());
759
760         /* OVS_FLOW_ATTR_ACTIONS */
761         if (should_fill_actions(ufid_flags))
762                 len += nla_total_size(acts->orig_len);
763
764         return len
765                 + nla_total_size_64bit(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
766                 + nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
767                 + nla_total_size_64bit(8); /* OVS_FLOW_ATTR_USED */
768 }
769
770 /* Called with ovs_mutex or RCU read lock. */
771 static int ovs_flow_cmd_fill_stats(const struct sw_flow *flow,
772                                    struct sk_buff *skb)
773 {
774         struct ovs_flow_stats stats;
775         __be16 tcp_flags;
776         unsigned long used;
777
778         ovs_flow_stats_get(flow, &stats, &used, &tcp_flags);
779
780         if (used &&
781             nla_put_u64_64bit(skb, OVS_FLOW_ATTR_USED, ovs_flow_used_time(used),
782                               OVS_FLOW_ATTR_PAD))
783                 return -EMSGSIZE;
784
785         if (stats.n_packets &&
786             nla_put_64bit(skb, OVS_FLOW_ATTR_STATS,
787                           sizeof(struct ovs_flow_stats), &stats,
788                           OVS_FLOW_ATTR_PAD))
789                 return -EMSGSIZE;
790
791         if ((u8)ntohs(tcp_flags) &&
792              nla_put_u8(skb, OVS_FLOW_ATTR_TCP_FLAGS, (u8)ntohs(tcp_flags)))
793                 return -EMSGSIZE;
794
795         return 0;
796 }
797
798 /* Called with ovs_mutex or RCU read lock. */
799 static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
800                                      struct sk_buff *skb, int skb_orig_len)
801 {
802         struct nlattr *start;
803         int err;
804
805         /* If OVS_FLOW_ATTR_ACTIONS doesn't fit, skip dumping the actions if
806          * this is the first flow to be dumped into 'skb'.  This is unusual for
807          * Netlink but individual action lists can be longer than
808          * NLMSG_GOODSIZE and thus entirely undumpable if we didn't do this.
809          * The userspace caller can always fetch the actions separately if it
810          * really wants them.  (Most userspace callers in fact don't care.)
811          *
812          * This can only fail for dump operations because the skb is always
813          * properly sized for single flows.
814          */
815         start = nla_nest_start_noflag(skb, OVS_FLOW_ATTR_ACTIONS);
816         if (start) {
817                 const struct sw_flow_actions *sf_acts;
818
819                 sf_acts = rcu_dereference_ovsl(flow->sf_acts);
820                 err = ovs_nla_put_actions(sf_acts->actions,
821                                           sf_acts->actions_len, skb);
822
823                 if (!err)
824                         nla_nest_end(skb, start);
825                 else {
826                         if (skb_orig_len)
827                                 return err;
828
829                         nla_nest_cancel(skb, start);
830                 }
831         } else if (skb_orig_len) {
832                 return -EMSGSIZE;
833         }
834
835         return 0;
836 }
837
838 /* Called with ovs_mutex or RCU read lock. */
839 static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
840                                   struct sk_buff *skb, u32 portid,
841                                   u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
842 {
843         const int skb_orig_len = skb->len;
844         struct ovs_header *ovs_header;
845         int err;
846
847         ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family,
848                                  flags, cmd);
849         if (!ovs_header)
850                 return -EMSGSIZE;
851
852         ovs_header->dp_ifindex = dp_ifindex;
853
854         err = ovs_nla_put_identifier(flow, skb);
855         if (err)
856                 goto error;
857
858         if (should_fill_key(&flow->id, ufid_flags)) {
859                 err = ovs_nla_put_masked_key(flow, skb);
860                 if (err)
861                         goto error;
862         }
863
864         if (should_fill_mask(ufid_flags)) {
865                 err = ovs_nla_put_mask(flow, skb);
866                 if (err)
867                         goto error;
868         }
869
870         err = ovs_flow_cmd_fill_stats(flow, skb);
871         if (err)
872                 goto error;
873
874         if (should_fill_actions(ufid_flags)) {
875                 err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
876                 if (err)
877                         goto error;
878         }
879
880         genlmsg_end(skb, ovs_header);
881         return 0;
882
883 error:
884         genlmsg_cancel(skb, ovs_header);
885         return err;
886 }
887
888 /* May not be called with RCU read lock. */
889 static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
890                                                const struct sw_flow_id *sfid,
891                                                struct genl_info *info,
892                                                bool always,
893                                                uint32_t ufid_flags)
894 {
895         struct sk_buff *skb;
896         size_t len;
897
898         if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
899                 return NULL;
900
901         len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
902         skb = genlmsg_new(len, GFP_KERNEL);
903         if (!skb)
904                 return ERR_PTR(-ENOMEM);
905
906         return skb;
907 }
908
909 /* Called with ovs_mutex. */
910 static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
911                                                int dp_ifindex,
912                                                struct genl_info *info, u8 cmd,
913                                                bool always, u32 ufid_flags)
914 {
915         struct sk_buff *skb;
916         int retval;
917
918         skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
919                                       &flow->id, info, always, ufid_flags);
920         if (IS_ERR_OR_NULL(skb))
921                 return skb;
922
923         retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
924                                         info->snd_portid, info->snd_seq, 0,
925                                         cmd, ufid_flags);
926         if (WARN_ON_ONCE(retval < 0)) {
927                 kfree_skb(skb);
928                 skb = ERR_PTR(retval);
929         }
930         return skb;
931 }
932
933 static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
934 {
935         struct net *net = sock_net(skb->sk);
936         struct nlattr **a = info->attrs;
937         struct ovs_header *ovs_header = info->userhdr;
938         struct sw_flow *flow = NULL, *new_flow;
939         struct sw_flow_mask mask;
940         struct sk_buff *reply;
941         struct datapath *dp;
942         struct sw_flow_actions *acts;
943         struct sw_flow_match match;
944         u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
945         int error;
946         bool log = !a[OVS_FLOW_ATTR_PROBE];
947
948         /* Must have key and actions. */
949         error = -EINVAL;
950         if (!a[OVS_FLOW_ATTR_KEY]) {
951                 OVS_NLERR(log, "Flow key attr not present in new flow.");
952                 goto error;
953         }
954         if (!a[OVS_FLOW_ATTR_ACTIONS]) {
955                 OVS_NLERR(log, "Flow actions attr not present in new flow.");
956                 goto error;
957         }
958
959         /* Most of the time we need to allocate a new flow, do it before
960          * locking.
961          */
962         new_flow = ovs_flow_alloc();
963         if (IS_ERR(new_flow)) {
964                 error = PTR_ERR(new_flow);
965                 goto error;
966         }
967
968         /* Extract key. */
969         ovs_match_init(&match, &new_flow->key, false, &mask);
970         error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
971                                   a[OVS_FLOW_ATTR_MASK], log);
972         if (error)
973                 goto err_kfree_flow;
974
975         /* Extract flow identifier. */
976         error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
977                                        &new_flow->key, log);
978         if (error)
979                 goto err_kfree_flow;
980
981         /* unmasked key is needed to match when ufid is not used. */
982         if (ovs_identifier_is_key(&new_flow->id))
983                 match.key = new_flow->id.unmasked_key;
984
985         ovs_flow_mask_key(&new_flow->key, &new_flow->key, true, &mask);
986
987         /* Validate actions. */
988         error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
989                                      &new_flow->key, &acts, log);
990         if (error) {
991                 OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
992                 goto err_kfree_flow;
993         }
994
995         reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
996                                         ufid_flags);
997         if (IS_ERR(reply)) {
998                 error = PTR_ERR(reply);
999                 goto err_kfree_acts;
1000         }
1001
1002         ovs_lock();
1003         dp = get_dp(net, ovs_header->dp_ifindex);
1004         if (unlikely(!dp)) {
1005                 error = -ENODEV;
1006                 goto err_unlock_ovs;
1007         }
1008
1009         /* Check if this is a duplicate flow */
1010         if (ovs_identifier_is_ufid(&new_flow->id))
1011                 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
1012         if (!flow)
1013                 flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->key);
1014         if (likely(!flow)) {
1015                 rcu_assign_pointer(new_flow->sf_acts, acts);
1016
1017                 /* Put flow in bucket. */
1018                 error = ovs_flow_tbl_insert(&dp->table, new_flow, &mask);
1019                 if (unlikely(error)) {
1020                         acts = NULL;
1021                         goto err_unlock_ovs;
1022                 }
1023
1024                 if (unlikely(reply)) {
1025                         error = ovs_flow_cmd_fill_info(new_flow,
1026                                                        ovs_header->dp_ifindex,
1027                                                        reply, info->snd_portid,
1028                                                        info->snd_seq, 0,
1029                                                        OVS_FLOW_CMD_NEW,
1030                                                        ufid_flags);
1031                         BUG_ON(error < 0);
1032                 }
1033                 ovs_unlock();
1034         } else {
1035                 struct sw_flow_actions *old_acts;
1036
1037                 /* Bail out if we're not allowed to modify an existing flow.
1038                  * We accept NLM_F_CREATE in place of the intended NLM_F_EXCL
1039                  * because Generic Netlink treats the latter as a dump
1040                  * request.  We also accept NLM_F_EXCL in case that bug ever
1041                  * gets fixed.
1042                  */
1043                 if (unlikely(info->nlhdr->nlmsg_flags & (NLM_F_CREATE
1044                                                          | NLM_F_EXCL))) {
1045                         error = -EEXIST;
1046                         goto err_unlock_ovs;
1047                 }
1048                 /* The flow identifier has to be the same for flow updates.
1049                  * Look for any overlapping flow.
1050                  */
1051                 if (unlikely(!ovs_flow_cmp(flow, &match))) {
1052                         if (ovs_identifier_is_key(&flow->id))
1053                                 flow = ovs_flow_tbl_lookup_exact(&dp->table,
1054                                                                  &match);
1055                         else /* UFID matches but key is different */
1056                                 flow = NULL;
1057                         if (!flow) {
1058                                 error = -ENOENT;
1059                                 goto err_unlock_ovs;
1060                         }
1061                 }
1062                 /* Update actions. */
1063                 old_acts = ovsl_dereference(flow->sf_acts);
1064                 rcu_assign_pointer(flow->sf_acts, acts);
1065
1066                 if (unlikely(reply)) {
1067                         error = ovs_flow_cmd_fill_info(flow,
1068                                                        ovs_header->dp_ifindex,
1069                                                        reply, info->snd_portid,
1070                                                        info->snd_seq, 0,
1071                                                        OVS_FLOW_CMD_NEW,
1072                                                        ufid_flags);
1073                         BUG_ON(error < 0);
1074                 }
1075                 ovs_unlock();
1076
1077                 ovs_nla_free_flow_actions_rcu(old_acts);
1078                 ovs_flow_free(new_flow, false);
1079         }
1080
1081         if (reply)
1082                 ovs_notify(&dp_flow_genl_family, reply, info);
1083         return 0;
1084
1085 err_unlock_ovs:
1086         ovs_unlock();
1087         kfree_skb(reply);
1088 err_kfree_acts:
1089         ovs_nla_free_flow_actions(acts);
1090 err_kfree_flow:
1091         ovs_flow_free(new_flow, false);
1092 error:
1093         return error;
1094 }
1095
1096 /* Factor out action copy to avoid "Wframe-larger-than=1024" warning. */
1097 static noinline_for_stack
1098 struct sw_flow_actions *get_flow_actions(struct net *net,
1099                                          const struct nlattr *a,
1100                                          const struct sw_flow_key *key,
1101                                          const struct sw_flow_mask *mask,
1102                                          bool log)
1103 {
1104         struct sw_flow_actions *acts;
1105         struct sw_flow_key masked_key;
1106         int error;
1107
1108         ovs_flow_mask_key(&masked_key, key, true, mask);
1109         error = ovs_nla_copy_actions(net, a, &masked_key, &acts, log);
1110         if (error) {
1111                 OVS_NLERR(log,
1112                           "Actions may not be safe on all matching packets");
1113                 return ERR_PTR(error);
1114         }
1115
1116         return acts;
1117 }
1118
1119 /* Factor out match-init and action-copy to avoid
1120  * "Wframe-larger-than=1024" warning. Because mask is only
1121  * used to get actions, we new a function to save some
1122  * stack space.
1123  *
1124  * If there are not key and action attrs, we return 0
1125  * directly. In the case, the caller will also not use the
1126  * match as before. If there is action attr, we try to get
1127  * actions and save them to *acts. Before returning from
1128  * the function, we reset the match->mask pointer. Because
1129  * we should not to return match object with dangling reference
1130  * to mask.
1131  * */
1132 static noinline_for_stack int
1133 ovs_nla_init_match_and_action(struct net *net,
1134                               struct sw_flow_match *match,
1135                               struct sw_flow_key *key,
1136                               struct nlattr **a,
1137                               struct sw_flow_actions **acts,
1138                               bool log)
1139 {
1140         struct sw_flow_mask mask;
1141         int error = 0;
1142
1143         if (a[OVS_FLOW_ATTR_KEY]) {
1144                 ovs_match_init(match, key, true, &mask);
1145                 error = ovs_nla_get_match(net, match, a[OVS_FLOW_ATTR_KEY],
1146                                           a[OVS_FLOW_ATTR_MASK], log);
1147                 if (error)
1148                         goto error;
1149         }
1150
1151         if (a[OVS_FLOW_ATTR_ACTIONS]) {
1152                 if (!a[OVS_FLOW_ATTR_KEY]) {
1153                         OVS_NLERR(log,
1154                                   "Flow key attribute not present in set flow.");
1155                         error = -EINVAL;
1156                         goto error;
1157                 }
1158
1159                 *acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key,
1160                                          &mask, log);
1161                 if (IS_ERR(*acts)) {
1162                         error = PTR_ERR(*acts);
1163                         goto error;
1164                 }
1165         }
1166
1167         /* On success, error is 0. */
1168 error:
1169         match->mask = NULL;
1170         return error;
1171 }
1172
1173 static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
1174 {
1175         struct net *net = sock_net(skb->sk);
1176         struct nlattr **a = info->attrs;
1177         struct ovs_header *ovs_header = info->userhdr;
1178         struct sw_flow_key key;
1179         struct sw_flow *flow;
1180         struct sk_buff *reply = NULL;
1181         struct datapath *dp;
1182         struct sw_flow_actions *old_acts = NULL, *acts = NULL;
1183         struct sw_flow_match match;
1184         struct sw_flow_id sfid;
1185         u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1186         int error = 0;
1187         bool log = !a[OVS_FLOW_ATTR_PROBE];
1188         bool ufid_present;
1189
1190         ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
1191         if (!a[OVS_FLOW_ATTR_KEY] && !ufid_present) {
1192                 OVS_NLERR(log,
1193                           "Flow set message rejected, Key attribute missing.");
1194                 return -EINVAL;
1195         }
1196
1197         error = ovs_nla_init_match_and_action(net, &match, &key, a,
1198                                               &acts, log);
1199         if (error)
1200                 goto error;
1201
1202         if (acts) {
1203                 /* Can allocate before locking if have acts. */
1204                 reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
1205                                                 ufid_flags);
1206                 if (IS_ERR(reply)) {
1207                         error = PTR_ERR(reply);
1208                         goto err_kfree_acts;
1209                 }
1210         }
1211
1212         ovs_lock();
1213         dp = get_dp(net, ovs_header->dp_ifindex);
1214         if (unlikely(!dp)) {
1215                 error = -ENODEV;
1216                 goto err_unlock_ovs;
1217         }
1218         /* Check that the flow exists. */
1219         if (ufid_present)
1220                 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
1221         else
1222                 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1223         if (unlikely(!flow)) {
1224                 error = -ENOENT;
1225                 goto err_unlock_ovs;
1226         }
1227
1228         /* Update actions, if present. */
1229         if (likely(acts)) {
1230                 old_acts = ovsl_dereference(flow->sf_acts);
1231                 rcu_assign_pointer(flow->sf_acts, acts);
1232
1233                 if (unlikely(reply)) {
1234                         error = ovs_flow_cmd_fill_info(flow,
1235                                                        ovs_header->dp_ifindex,
1236                                                        reply, info->snd_portid,
1237                                                        info->snd_seq, 0,
1238                                                        OVS_FLOW_CMD_SET,
1239                                                        ufid_flags);
1240                         BUG_ON(error < 0);
1241                 }
1242         } else {
1243                 /* Could not alloc without acts before locking. */
1244                 reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
1245                                                 info, OVS_FLOW_CMD_SET, false,
1246                                                 ufid_flags);
1247
1248                 if (IS_ERR(reply)) {
1249                         error = PTR_ERR(reply);
1250                         goto err_unlock_ovs;
1251                 }
1252         }
1253
1254         /* Clear stats. */
1255         if (a[OVS_FLOW_ATTR_CLEAR])
1256                 ovs_flow_stats_clear(flow);
1257         ovs_unlock();
1258
1259         if (reply)
1260                 ovs_notify(&dp_flow_genl_family, reply, info);
1261         if (old_acts)
1262                 ovs_nla_free_flow_actions_rcu(old_acts);
1263
1264         return 0;
1265
1266 err_unlock_ovs:
1267         ovs_unlock();
1268         kfree_skb(reply);
1269 err_kfree_acts:
1270         ovs_nla_free_flow_actions(acts);
1271 error:
1272         return error;
1273 }
1274
1275 static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
1276 {
1277         struct nlattr **a = info->attrs;
1278         struct ovs_header *ovs_header = info->userhdr;
1279         struct net *net = sock_net(skb->sk);
1280         struct sw_flow_key key;
1281         struct sk_buff *reply;
1282         struct sw_flow *flow;
1283         struct datapath *dp;
1284         struct sw_flow_match match;
1285         struct sw_flow_id ufid;
1286         u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1287         int err = 0;
1288         bool log = !a[OVS_FLOW_ATTR_PROBE];
1289         bool ufid_present;
1290
1291         ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1292         if (a[OVS_FLOW_ATTR_KEY]) {
1293                 ovs_match_init(&match, &key, true, NULL);
1294                 err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY], NULL,
1295                                         log);
1296         } else if (!ufid_present) {
1297                 OVS_NLERR(log,
1298                           "Flow get message rejected, Key attribute missing.");
1299                 err = -EINVAL;
1300         }
1301         if (err)
1302                 return err;
1303
1304         ovs_lock();
1305         dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1306         if (!dp) {
1307                 err = -ENODEV;
1308                 goto unlock;
1309         }
1310
1311         if (ufid_present)
1312                 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1313         else
1314                 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1315         if (!flow) {
1316                 err = -ENOENT;
1317                 goto unlock;
1318         }
1319
1320         reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
1321                                         OVS_FLOW_CMD_GET, true, ufid_flags);
1322         if (IS_ERR(reply)) {
1323                 err = PTR_ERR(reply);
1324                 goto unlock;
1325         }
1326
1327         ovs_unlock();
1328         return genlmsg_reply(reply, info);
1329 unlock:
1330         ovs_unlock();
1331         return err;
1332 }
1333
1334 static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
1335 {
1336         struct nlattr **a = info->attrs;
1337         struct ovs_header *ovs_header = info->userhdr;
1338         struct net *net = sock_net(skb->sk);
1339         struct sw_flow_key key;
1340         struct sk_buff *reply;
1341         struct sw_flow *flow = NULL;
1342         struct datapath *dp;
1343         struct sw_flow_match match;
1344         struct sw_flow_id ufid;
1345         u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1346         int err;
1347         bool log = !a[OVS_FLOW_ATTR_PROBE];
1348         bool ufid_present;
1349
1350         ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
1351         if (a[OVS_FLOW_ATTR_KEY]) {
1352                 ovs_match_init(&match, &key, true, NULL);
1353                 err = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
1354                                         NULL, log);
1355                 if (unlikely(err))
1356                         return err;
1357         }
1358
1359         ovs_lock();
1360         dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
1361         if (unlikely(!dp)) {
1362                 err = -ENODEV;
1363                 goto unlock;
1364         }
1365
1366         if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
1367                 err = ovs_flow_tbl_flush(&dp->table);
1368                 goto unlock;
1369         }
1370
1371         if (ufid_present)
1372                 flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
1373         else
1374                 flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
1375         if (unlikely(!flow)) {
1376                 err = -ENOENT;
1377                 goto unlock;
1378         }
1379
1380         ovs_flow_tbl_remove(&dp->table, flow);
1381         ovs_unlock();
1382
1383         reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
1384                                         &flow->id, info, false, ufid_flags);
1385         if (likely(reply)) {
1386                 if (!IS_ERR(reply)) {
1387                         rcu_read_lock();        /*To keep RCU checker happy. */
1388                         err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
1389                                                      reply, info->snd_portid,
1390                                                      info->snd_seq, 0,
1391                                                      OVS_FLOW_CMD_DEL,
1392                                                      ufid_flags);
1393                         rcu_read_unlock();
1394                         if (WARN_ON_ONCE(err < 0)) {
1395                                 kfree_skb(reply);
1396                                 goto out_free;
1397                         }
1398
1399                         ovs_notify(&dp_flow_genl_family, reply, info);
1400                 } else {
1401                         netlink_set_err(sock_net(skb->sk)->genl_sock, 0, 0,
1402                                         PTR_ERR(reply));
1403                 }
1404         }
1405
1406 out_free:
1407         ovs_flow_free(flow, true);
1408         return 0;
1409 unlock:
1410         ovs_unlock();
1411         return err;
1412 }
1413
1414 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1415 {
1416         struct nlattr *a[__OVS_FLOW_ATTR_MAX];
1417         struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
1418         struct table_instance *ti;
1419         struct datapath *dp;
1420         u32 ufid_flags;
1421         int err;
1422
1423         err = genlmsg_parse_deprecated(cb->nlh, &dp_flow_genl_family, a,
1424                                        OVS_FLOW_ATTR_MAX, flow_policy, NULL);
1425         if (err)
1426                 return err;
1427         ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
1428
1429         rcu_read_lock();
1430         dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
1431         if (!dp) {
1432                 rcu_read_unlock();
1433                 return -ENODEV;
1434         }
1435
1436         ti = rcu_dereference(dp->table.ti);
1437         for (;;) {
1438                 struct sw_flow *flow;
1439                 u32 bucket, obj;
1440
1441                 bucket = cb->args[0];
1442                 obj = cb->args[1];
1443                 flow = ovs_flow_tbl_dump_next(ti, &bucket, &obj);
1444                 if (!flow)
1445                         break;
1446
1447                 if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
1448                                            NETLINK_CB(cb->skb).portid,
1449                                            cb->nlh->nlmsg_seq, NLM_F_MULTI,
1450                                            OVS_FLOW_CMD_GET, ufid_flags) < 0)
1451                         break;
1452
1453                 cb->args[0] = bucket;
1454                 cb->args[1] = obj;
1455         }
1456         rcu_read_unlock();
1457         return skb->len;
1458 }
1459
1460 static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
1461         [OVS_FLOW_ATTR_KEY] = { .type = NLA_NESTED },
1462         [OVS_FLOW_ATTR_MASK] = { .type = NLA_NESTED },
1463         [OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
1464         [OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
1465         [OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
1466         [OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
1467         [OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
1468 };
1469
1470 static const struct genl_small_ops dp_flow_genl_ops[] = {
1471         { .cmd = OVS_FLOW_CMD_NEW,
1472           .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1473           .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1474           .doit = ovs_flow_cmd_new
1475         },
1476         { .cmd = OVS_FLOW_CMD_DEL,
1477           .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1478           .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1479           .doit = ovs_flow_cmd_del
1480         },
1481         { .cmd = OVS_FLOW_CMD_GET,
1482           .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1483           .flags = 0,               /* OK for unprivileged users. */
1484           .doit = ovs_flow_cmd_get,
1485           .dumpit = ovs_flow_cmd_dump
1486         },
1487         { .cmd = OVS_FLOW_CMD_SET,
1488           .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
1489           .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
1490           .doit = ovs_flow_cmd_set,
1491         },
1492 };
1493
1494 static struct genl_family dp_flow_genl_family __ro_after_init = {
1495         .hdrsize = sizeof(struct ovs_header),
1496         .name = OVS_FLOW_FAMILY,
1497         .version = OVS_FLOW_VERSION,
1498         .maxattr = OVS_FLOW_ATTR_MAX,
1499         .policy = flow_policy,
1500         .netnsok = true,
1501         .parallel_ops = true,
1502         .small_ops = dp_flow_genl_ops,
1503         .n_small_ops = ARRAY_SIZE(dp_flow_genl_ops),
1504         .mcgrps = &ovs_dp_flow_multicast_group,
1505         .n_mcgrps = 1,
1506         .module = THIS_MODULE,
1507 };
1508
1509 static size_t ovs_dp_cmd_msg_size(void)
1510 {
1511         size_t msgsize = NLMSG_ALIGN(sizeof(struct ovs_header));
1512
1513         msgsize += nla_total_size(IFNAMSIZ);
1514         msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_stats));
1515         msgsize += nla_total_size_64bit(sizeof(struct ovs_dp_megaflow_stats));
1516         msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_USER_FEATURES */
1517         msgsize += nla_total_size(sizeof(u32)); /* OVS_DP_ATTR_MASKS_CACHE_SIZE */
1518
1519         return msgsize;
1520 }
1521
1522 /* Called with ovs_mutex. */
1523 static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
1524                                 u32 portid, u32 seq, u32 flags, u8 cmd)
1525 {
1526         struct ovs_header *ovs_header;
1527         struct ovs_dp_stats dp_stats;
1528         struct ovs_dp_megaflow_stats dp_megaflow_stats;
1529         int err;
1530
1531         ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family,
1532                                  flags, cmd);
1533         if (!ovs_header)
1534                 goto error;
1535
1536         ovs_header->dp_ifindex = get_dpifindex(dp);
1537
1538         err = nla_put_string(skb, OVS_DP_ATTR_NAME, ovs_dp_name(dp));
1539         if (err)
1540                 goto nla_put_failure;
1541
1542         get_dp_stats(dp, &dp_stats, &dp_megaflow_stats);
1543         if (nla_put_64bit(skb, OVS_DP_ATTR_STATS, sizeof(struct ovs_dp_stats),
1544                           &dp_stats, OVS_DP_ATTR_PAD))
1545                 goto nla_put_failure;
1546
1547         if (nla_put_64bit(skb, OVS_DP_ATTR_MEGAFLOW_STATS,
1548                           sizeof(struct ovs_dp_megaflow_stats),
1549                           &dp_megaflow_stats, OVS_DP_ATTR_PAD))
1550                 goto nla_put_failure;
1551
1552         if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
1553                 goto nla_put_failure;
1554
1555         if (nla_put_u32(skb, OVS_DP_ATTR_MASKS_CACHE_SIZE,
1556                         ovs_flow_tbl_masks_cache_size(&dp->table)))
1557                 goto nla_put_failure;
1558
1559         genlmsg_end(skb, ovs_header);
1560         return 0;
1561
1562 nla_put_failure:
1563         genlmsg_cancel(skb, ovs_header);
1564 error:
1565         return -EMSGSIZE;
1566 }
1567
1568 static struct sk_buff *ovs_dp_cmd_alloc_info(void)
1569 {
1570         return genlmsg_new(ovs_dp_cmd_msg_size(), GFP_KERNEL);
1571 }
1572
1573 /* Called with rcu_read_lock or ovs_mutex. */
1574 static struct datapath *lookup_datapath(struct net *net,
1575                                         const struct ovs_header *ovs_header,
1576                                         struct nlattr *a[OVS_DP_ATTR_MAX + 1])
1577 {
1578         struct datapath *dp;
1579
1580         if (!a[OVS_DP_ATTR_NAME])
1581                 dp = get_dp(net, ovs_header->dp_ifindex);
1582         else {
1583                 struct vport *vport;
1584
1585                 vport = ovs_vport_locate(net, nla_data(a[OVS_DP_ATTR_NAME]));
1586                 dp = vport && vport->port_no == OVSP_LOCAL ? vport->dp : NULL;
1587         }
1588         return dp ? dp : ERR_PTR(-ENODEV);
1589 }
1590
1591 static void ovs_dp_reset_user_features(struct sk_buff *skb,
1592                                        struct genl_info *info)
1593 {
1594         struct datapath *dp;
1595
1596         dp = lookup_datapath(sock_net(skb->sk), info->userhdr,
1597                              info->attrs);
1598         if (IS_ERR(dp))
1599                 return;
1600
1601         WARN(dp->user_features, "Dropping previously announced user features\n");
1602         dp->user_features = 0;
1603 }
1604
1605 static int ovs_dp_set_upcall_portids(struct datapath *dp,
1606                               const struct nlattr *ids)
1607 {
1608         struct dp_nlsk_pids *old, *dp_nlsk_pids;
1609
1610         if (!nla_len(ids) || nla_len(ids) % sizeof(u32))
1611                 return -EINVAL;
1612
1613         old = ovsl_dereference(dp->upcall_portids);
1614
1615         dp_nlsk_pids = kmalloc(sizeof(*dp_nlsk_pids) + nla_len(ids),
1616                                GFP_KERNEL);
1617         if (!dp_nlsk_pids)
1618                 return -ENOMEM;
1619
1620         dp_nlsk_pids->n_pids = nla_len(ids) / sizeof(u32);
1621         nla_memcpy(dp_nlsk_pids->pids, ids, nla_len(ids));
1622
1623         rcu_assign_pointer(dp->upcall_portids, dp_nlsk_pids);
1624
1625         kfree_rcu(old, rcu);
1626
1627         return 0;
1628 }
1629
1630 u32 ovs_dp_get_upcall_portid(const struct datapath *dp, uint32_t cpu_id)
1631 {
1632         struct dp_nlsk_pids *dp_nlsk_pids;
1633
1634         dp_nlsk_pids = rcu_dereference(dp->upcall_portids);
1635
1636         if (dp_nlsk_pids) {
1637                 if (cpu_id < dp_nlsk_pids->n_pids) {
1638                         return dp_nlsk_pids->pids[cpu_id];
1639                 } else if (dp_nlsk_pids->n_pids > 0 &&
1640                            cpu_id >= dp_nlsk_pids->n_pids) {
1641                         /* If the number of netlink PIDs is mismatched with
1642                          * the number of CPUs as seen by the kernel, log this
1643                          * and send the upcall to an arbitrary socket (0) in
1644                          * order to not drop packets
1645                          */
1646                         pr_info_ratelimited("cpu_id mismatch with handler threads");
1647                         return dp_nlsk_pids->pids[cpu_id %
1648                                                   dp_nlsk_pids->n_pids];
1649                 } else {
1650                         return 0;
1651                 }
1652         } else {
1653                 return 0;
1654         }
1655 }
1656
1657 static int ovs_dp_change(struct datapath *dp, struct nlattr *a[])
1658 {
1659         u32 user_features = 0, old_features = dp->user_features;
1660         int err;
1661
1662         if (a[OVS_DP_ATTR_USER_FEATURES]) {
1663                 user_features = nla_get_u32(a[OVS_DP_ATTR_USER_FEATURES]);
1664
1665                 if (user_features & ~(OVS_DP_F_VPORT_PIDS |
1666                                       OVS_DP_F_UNALIGNED |
1667                                       OVS_DP_F_TC_RECIRC_SHARING |
1668                                       OVS_DP_F_DISPATCH_UPCALL_PER_CPU))
1669                         return -EOPNOTSUPP;
1670
1671 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
1672                 if (user_features & OVS_DP_F_TC_RECIRC_SHARING)
1673                         return -EOPNOTSUPP;
1674 #endif
1675         }
1676
1677         if (a[OVS_DP_ATTR_MASKS_CACHE_SIZE]) {
1678                 int err;
1679                 u32 cache_size;
1680
1681                 cache_size = nla_get_u32(a[OVS_DP_ATTR_MASKS_CACHE_SIZE]);
1682                 err = ovs_flow_tbl_masks_cache_resize(&dp->table, cache_size);
1683                 if (err)
1684                         return err;
1685         }
1686
1687         dp->user_features = user_features;
1688
1689         if (dp->user_features & OVS_DP_F_DISPATCH_UPCALL_PER_CPU &&
1690             a[OVS_DP_ATTR_PER_CPU_PIDS]) {
1691                 /* Upcall Netlink Port IDs have been updated */
1692                 err = ovs_dp_set_upcall_portids(dp,
1693                                                 a[OVS_DP_ATTR_PER_CPU_PIDS]);
1694                 if (err)
1695                         return err;
1696         }
1697
1698         if ((dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
1699             !(old_features & OVS_DP_F_TC_RECIRC_SHARING))
1700                 tc_skb_ext_tc_enable();
1701         else if (!(dp->user_features & OVS_DP_F_TC_RECIRC_SHARING) &&
1702                  (old_features & OVS_DP_F_TC_RECIRC_SHARING))
1703                 tc_skb_ext_tc_disable();
1704
1705         return 0;
1706 }
1707
1708 static int ovs_dp_stats_init(struct datapath *dp)
1709 {
1710         dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
1711         if (!dp->stats_percpu)
1712                 return -ENOMEM;
1713
1714         return 0;
1715 }
1716
1717 static int ovs_dp_vport_init(struct datapath *dp)
1718 {
1719         int i;
1720
1721         dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
1722                                   sizeof(struct hlist_head),
1723                                   GFP_KERNEL);
1724         if (!dp->ports)
1725                 return -ENOMEM;
1726
1727         for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
1728                 INIT_HLIST_HEAD(&dp->ports[i]);
1729
1730         return 0;
1731 }
1732
1733 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
1734 {
1735         struct nlattr **a = info->attrs;
1736         struct vport_parms parms;
1737         struct sk_buff *reply;
1738         struct datapath *dp;
1739         struct vport *vport;
1740         struct ovs_net *ovs_net;
1741         int err;
1742
1743         err = -EINVAL;
1744         if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
1745                 goto err;
1746
1747         reply = ovs_dp_cmd_alloc_info();
1748         if (!reply)
1749                 return -ENOMEM;
1750
1751         err = -ENOMEM;
1752         dp = kzalloc(sizeof(*dp), GFP_KERNEL);
1753         if (dp == NULL)
1754                 goto err_destroy_reply;
1755
1756         ovs_dp_set_net(dp, sock_net(skb->sk));
1757
1758         /* Allocate table. */
1759         err = ovs_flow_tbl_init(&dp->table);
1760         if (err)
1761                 goto err_destroy_dp;
1762
1763         err = ovs_dp_stats_init(dp);
1764         if (err)
1765                 goto err_destroy_table;
1766
1767         err = ovs_dp_vport_init(dp);
1768         if (err)
1769                 goto err_destroy_stats;
1770
1771         err = ovs_meters_init(dp);
1772         if (err)
1773                 goto err_destroy_ports;
1774
1775         /* Set up our datapath device. */
1776         parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
1777         parms.type = OVS_VPORT_TYPE_INTERNAL;
1778         parms.options = NULL;
1779         parms.dp = dp;
1780         parms.port_no = OVSP_LOCAL;
1781         parms.upcall_portids = a[OVS_DP_ATTR_UPCALL_PID];
1782
1783         /* So far only local changes have been made, now need the lock. */
1784         ovs_lock();
1785
1786         err = ovs_dp_change(dp, a);
1787         if (err)
1788                 goto err_unlock_and_destroy_meters;
1789
1790         vport = new_vport(&parms);
1791         if (IS_ERR(vport)) {
1792                 err = PTR_ERR(vport);
1793                 if (err == -EBUSY)
1794                         err = -EEXIST;
1795
1796                 if (err == -EEXIST) {
1797                         /* An outdated user space instance that does not understand
1798                          * the concept of user_features has attempted to create a new
1799                          * datapath and is likely to reuse it. Drop all user features.
1800                          */
1801                         if (info->genlhdr->version < OVS_DP_VER_FEATURES)
1802                                 ovs_dp_reset_user_features(skb, info);
1803                 }
1804
1805                 goto err_unlock_and_destroy_meters;
1806         }
1807
1808         err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1809                                    info->snd_seq, 0, OVS_DP_CMD_NEW);
1810         BUG_ON(err < 0);
1811
1812         ovs_net = net_generic(ovs_dp_get_net(dp), ovs_net_id);
1813         list_add_tail_rcu(&dp->list_node, &ovs_net->dps);
1814
1815         ovs_unlock();
1816
1817         ovs_notify(&dp_datapath_genl_family, reply, info);
1818         return 0;
1819
1820 err_unlock_and_destroy_meters:
1821         ovs_unlock();
1822         ovs_meters_exit(dp);
1823 err_destroy_ports:
1824         kfree(dp->ports);
1825 err_destroy_stats:
1826         free_percpu(dp->stats_percpu);
1827 err_destroy_table:
1828         ovs_flow_tbl_destroy(&dp->table);
1829 err_destroy_dp:
1830         kfree(dp);
1831 err_destroy_reply:
1832         kfree_skb(reply);
1833 err:
1834         return err;
1835 }
1836
1837 /* Called with ovs_mutex. */
1838 static void __dp_destroy(struct datapath *dp)
1839 {
1840         struct flow_table *table = &dp->table;
1841         int i;
1842
1843         if (dp->user_features & OVS_DP_F_TC_RECIRC_SHARING)
1844                 tc_skb_ext_tc_disable();
1845
1846         for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
1847                 struct vport *vport;
1848                 struct hlist_node *n;
1849
1850                 hlist_for_each_entry_safe(vport, n, &dp->ports[i], dp_hash_node)
1851                         if (vport->port_no != OVSP_LOCAL)
1852                                 ovs_dp_detach_port(vport);
1853         }
1854
1855         list_del_rcu(&dp->list_node);
1856
1857         /* OVSP_LOCAL is datapath internal port. We need to make sure that
1858          * all ports in datapath are destroyed first before freeing datapath.
1859          */
1860         ovs_dp_detach_port(ovs_vport_ovsl(dp, OVSP_LOCAL));
1861
1862         /* Flush sw_flow in the tables. RCU cb only releases resource
1863          * such as dp, ports and tables. That may avoid some issues
1864          * such as RCU usage warning.
1865          */
1866         table_instance_flow_flush(table, ovsl_dereference(table->ti),
1867                                   ovsl_dereference(table->ufid_ti));
1868
1869         /* RCU destroy the ports, meters and flow tables. */
1870         call_rcu(&dp->rcu, destroy_dp_rcu);
1871 }
1872
1873 static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info)
1874 {
1875         struct sk_buff *reply;
1876         struct datapath *dp;
1877         int err;
1878
1879         reply = ovs_dp_cmd_alloc_info();
1880         if (!reply)
1881                 return -ENOMEM;
1882
1883         ovs_lock();
1884         dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1885         err = PTR_ERR(dp);
1886         if (IS_ERR(dp))
1887                 goto err_unlock_free;
1888
1889         err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1890                                    info->snd_seq, 0, OVS_DP_CMD_DEL);
1891         BUG_ON(err < 0);
1892
1893         __dp_destroy(dp);
1894         ovs_unlock();
1895
1896         ovs_notify(&dp_datapath_genl_family, reply, info);
1897
1898         return 0;
1899
1900 err_unlock_free:
1901         ovs_unlock();
1902         kfree_skb(reply);
1903         return err;
1904 }
1905
1906 static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info)
1907 {
1908         struct sk_buff *reply;
1909         struct datapath *dp;
1910         int err;
1911
1912         reply = ovs_dp_cmd_alloc_info();
1913         if (!reply)
1914                 return -ENOMEM;
1915
1916         ovs_lock();
1917         dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1918         err = PTR_ERR(dp);
1919         if (IS_ERR(dp))
1920                 goto err_unlock_free;
1921
1922         err = ovs_dp_change(dp, info->attrs);
1923         if (err)
1924                 goto err_unlock_free;
1925
1926         err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1927                                    info->snd_seq, 0, OVS_DP_CMD_SET);
1928         BUG_ON(err < 0);
1929
1930         ovs_unlock();
1931         ovs_notify(&dp_datapath_genl_family, reply, info);
1932
1933         return 0;
1934
1935 err_unlock_free:
1936         ovs_unlock();
1937         kfree_skb(reply);
1938         return err;
1939 }
1940
1941 static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info)
1942 {
1943         struct sk_buff *reply;
1944         struct datapath *dp;
1945         int err;
1946
1947         reply = ovs_dp_cmd_alloc_info();
1948         if (!reply)
1949                 return -ENOMEM;
1950
1951         ovs_lock();
1952         dp = lookup_datapath(sock_net(skb->sk), info->userhdr, info->attrs);
1953         if (IS_ERR(dp)) {
1954                 err = PTR_ERR(dp);
1955                 goto err_unlock_free;
1956         }
1957         err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
1958                                    info->snd_seq, 0, OVS_DP_CMD_GET);
1959         BUG_ON(err < 0);
1960         ovs_unlock();
1961
1962         return genlmsg_reply(reply, info);
1963
1964 err_unlock_free:
1965         ovs_unlock();
1966         kfree_skb(reply);
1967         return err;
1968 }
1969
1970 static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
1971 {
1972         struct ovs_net *ovs_net = net_generic(sock_net(skb->sk), ovs_net_id);
1973         struct datapath *dp;
1974         int skip = cb->args[0];
1975         int i = 0;
1976
1977         ovs_lock();
1978         list_for_each_entry(dp, &ovs_net->dps, list_node) {
1979                 if (i >= skip &&
1980                     ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid,
1981                                          cb->nlh->nlmsg_seq, NLM_F_MULTI,
1982                                          OVS_DP_CMD_GET) < 0)
1983                         break;
1984                 i++;
1985         }
1986         ovs_unlock();
1987
1988         cb->args[0] = i;
1989
1990         return skb->len;
1991 }
1992
1993 static const struct nla_policy datapath_policy[OVS_DP_ATTR_MAX + 1] = {
1994         [OVS_DP_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
1995         [OVS_DP_ATTR_UPCALL_PID] = { .type = NLA_U32 },
1996         [OVS_DP_ATTR_USER_FEATURES] = { .type = NLA_U32 },
1997         [OVS_DP_ATTR_MASKS_CACHE_SIZE] =  NLA_POLICY_RANGE(NLA_U32, 0,
1998                 PCPU_MIN_UNIT_SIZE / sizeof(struct mask_cache_entry)),
1999 };
2000
2001 static const struct genl_small_ops dp_datapath_genl_ops[] = {
2002         { .cmd = OVS_DP_CMD_NEW,
2003           .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2004           .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2005           .doit = ovs_dp_cmd_new
2006         },
2007         { .cmd = OVS_DP_CMD_DEL,
2008           .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2009           .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2010           .doit = ovs_dp_cmd_del
2011         },
2012         { .cmd = OVS_DP_CMD_GET,
2013           .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2014           .flags = 0,               /* OK for unprivileged users. */
2015           .doit = ovs_dp_cmd_get,
2016           .dumpit = ovs_dp_cmd_dump
2017         },
2018         { .cmd = OVS_DP_CMD_SET,
2019           .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2020           .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2021           .doit = ovs_dp_cmd_set,
2022         },
2023 };
2024
2025 static struct genl_family dp_datapath_genl_family __ro_after_init = {
2026         .hdrsize = sizeof(struct ovs_header),
2027         .name = OVS_DATAPATH_FAMILY,
2028         .version = OVS_DATAPATH_VERSION,
2029         .maxattr = OVS_DP_ATTR_MAX,
2030         .policy = datapath_policy,
2031         .netnsok = true,
2032         .parallel_ops = true,
2033         .small_ops = dp_datapath_genl_ops,
2034         .n_small_ops = ARRAY_SIZE(dp_datapath_genl_ops),
2035         .mcgrps = &ovs_dp_datapath_multicast_group,
2036         .n_mcgrps = 1,
2037         .module = THIS_MODULE,
2038 };
2039
2040 /* Called with ovs_mutex or RCU read lock. */
2041 static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
2042                                    struct net *net, u32 portid, u32 seq,
2043                                    u32 flags, u8 cmd, gfp_t gfp)
2044 {
2045         struct ovs_header *ovs_header;
2046         struct ovs_vport_stats vport_stats;
2047         int err;
2048
2049         ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family,
2050                                  flags, cmd);
2051         if (!ovs_header)
2052                 return -EMSGSIZE;
2053
2054         ovs_header->dp_ifindex = get_dpifindex(vport->dp);
2055
2056         if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) ||
2057             nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) ||
2058             nla_put_string(skb, OVS_VPORT_ATTR_NAME,
2059                            ovs_vport_name(vport)) ||
2060             nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex))
2061                 goto nla_put_failure;
2062
2063         if (!net_eq(net, dev_net(vport->dev))) {
2064                 int id = peernet2id_alloc(net, dev_net(vport->dev), gfp);
2065
2066                 if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id))
2067                         goto nla_put_failure;
2068         }
2069
2070         ovs_vport_get_stats(vport, &vport_stats);
2071         if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS,
2072                           sizeof(struct ovs_vport_stats), &vport_stats,
2073                           OVS_VPORT_ATTR_PAD))
2074                 goto nla_put_failure;
2075
2076         if (ovs_vport_get_upcall_portids(vport, skb))
2077                 goto nla_put_failure;
2078
2079         err = ovs_vport_get_options(vport, skb);
2080         if (err == -EMSGSIZE)
2081                 goto error;
2082
2083         genlmsg_end(skb, ovs_header);
2084         return 0;
2085
2086 nla_put_failure:
2087         err = -EMSGSIZE;
2088 error:
2089         genlmsg_cancel(skb, ovs_header);
2090         return err;
2091 }
2092
2093 static struct sk_buff *ovs_vport_cmd_alloc_info(void)
2094 {
2095         return nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2096 }
2097
2098 /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */
2099 struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net,
2100                                          u32 portid, u32 seq, u8 cmd)
2101 {
2102         struct sk_buff *skb;
2103         int retval;
2104
2105         skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
2106         if (!skb)
2107                 return ERR_PTR(-ENOMEM);
2108
2109         retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd,
2110                                          GFP_KERNEL);
2111         BUG_ON(retval < 0);
2112
2113         return skb;
2114 }
2115
2116 /* Called with ovs_mutex or RCU read lock. */
2117 static struct vport *lookup_vport(struct net *net,
2118                                   const struct ovs_header *ovs_header,
2119                                   struct nlattr *a[OVS_VPORT_ATTR_MAX + 1])
2120 {
2121         struct datapath *dp;
2122         struct vport *vport;
2123
2124         if (a[OVS_VPORT_ATTR_IFINDEX])
2125                 return ERR_PTR(-EOPNOTSUPP);
2126         if (a[OVS_VPORT_ATTR_NAME]) {
2127                 vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME]));
2128                 if (!vport)
2129                         return ERR_PTR(-ENODEV);
2130                 if (ovs_header->dp_ifindex &&
2131                     ovs_header->dp_ifindex != get_dpifindex(vport->dp))
2132                         return ERR_PTR(-ENODEV);
2133                 return vport;
2134         } else if (a[OVS_VPORT_ATTR_PORT_NO]) {
2135                 u32 port_no = nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]);
2136
2137                 if (port_no >= DP_MAX_PORTS)
2138                         return ERR_PTR(-EFBIG);
2139
2140                 dp = get_dp(net, ovs_header->dp_ifindex);
2141                 if (!dp)
2142                         return ERR_PTR(-ENODEV);
2143
2144                 vport = ovs_vport_ovsl_rcu(dp, port_no);
2145                 if (!vport)
2146                         return ERR_PTR(-ENODEV);
2147                 return vport;
2148         } else
2149                 return ERR_PTR(-EINVAL);
2150
2151 }
2152
2153 static unsigned int ovs_get_max_headroom(struct datapath *dp)
2154 {
2155         unsigned int dev_headroom, max_headroom = 0;
2156         struct net_device *dev;
2157         struct vport *vport;
2158         int i;
2159
2160         for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2161                 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
2162                                          lockdep_ovsl_is_held()) {
2163                         dev = vport->dev;
2164                         dev_headroom = netdev_get_fwd_headroom(dev);
2165                         if (dev_headroom > max_headroom)
2166                                 max_headroom = dev_headroom;
2167                 }
2168         }
2169
2170         return max_headroom;
2171 }
2172
2173 /* Called with ovs_mutex */
2174 static void ovs_update_headroom(struct datapath *dp, unsigned int new_headroom)
2175 {
2176         struct vport *vport;
2177         int i;
2178
2179         dp->max_headroom = new_headroom;
2180         for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2181                 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node,
2182                                          lockdep_ovsl_is_held())
2183                         netdev_set_rx_headroom(vport->dev, new_headroom);
2184         }
2185 }
2186
2187 static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info)
2188 {
2189         struct nlattr **a = info->attrs;
2190         struct ovs_header *ovs_header = info->userhdr;
2191         struct vport_parms parms;
2192         struct sk_buff *reply;
2193         struct vport *vport;
2194         struct datapath *dp;
2195         unsigned int new_headroom;
2196         u32 port_no;
2197         int err;
2198
2199         if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] ||
2200             !a[OVS_VPORT_ATTR_UPCALL_PID])
2201                 return -EINVAL;
2202         if (a[OVS_VPORT_ATTR_IFINDEX])
2203                 return -EOPNOTSUPP;
2204
2205         port_no = a[OVS_VPORT_ATTR_PORT_NO]
2206                 ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0;
2207         if (port_no >= DP_MAX_PORTS)
2208                 return -EFBIG;
2209
2210         reply = ovs_vport_cmd_alloc_info();
2211         if (!reply)
2212                 return -ENOMEM;
2213
2214         ovs_lock();
2215 restart:
2216         dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
2217         err = -ENODEV;
2218         if (!dp)
2219                 goto exit_unlock_free;
2220
2221         if (port_no) {
2222                 vport = ovs_vport_ovsl(dp, port_no);
2223                 err = -EBUSY;
2224                 if (vport)
2225                         goto exit_unlock_free;
2226         } else {
2227                 for (port_no = 1; ; port_no++) {
2228                         if (port_no >= DP_MAX_PORTS) {
2229                                 err = -EFBIG;
2230                                 goto exit_unlock_free;
2231                         }
2232                         vport = ovs_vport_ovsl(dp, port_no);
2233                         if (!vport)
2234                                 break;
2235                 }
2236         }
2237
2238         parms.name = nla_data(a[OVS_VPORT_ATTR_NAME]);
2239         parms.type = nla_get_u32(a[OVS_VPORT_ATTR_TYPE]);
2240         parms.options = a[OVS_VPORT_ATTR_OPTIONS];
2241         parms.dp = dp;
2242         parms.port_no = port_no;
2243         parms.upcall_portids = a[OVS_VPORT_ATTR_UPCALL_PID];
2244
2245         vport = new_vport(&parms);
2246         err = PTR_ERR(vport);
2247         if (IS_ERR(vport)) {
2248                 if (err == -EAGAIN)
2249                         goto restart;
2250                 goto exit_unlock_free;
2251         }
2252
2253         err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2254                                       info->snd_portid, info->snd_seq, 0,
2255                                       OVS_VPORT_CMD_NEW, GFP_KERNEL);
2256
2257         new_headroom = netdev_get_fwd_headroom(vport->dev);
2258
2259         if (new_headroom > dp->max_headroom)
2260                 ovs_update_headroom(dp, new_headroom);
2261         else
2262                 netdev_set_rx_headroom(vport->dev, dp->max_headroom);
2263
2264         BUG_ON(err < 0);
2265         ovs_unlock();
2266
2267         ovs_notify(&dp_vport_genl_family, reply, info);
2268         return 0;
2269
2270 exit_unlock_free:
2271         ovs_unlock();
2272         kfree_skb(reply);
2273         return err;
2274 }
2275
2276 static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info)
2277 {
2278         struct nlattr **a = info->attrs;
2279         struct sk_buff *reply;
2280         struct vport *vport;
2281         int err;
2282
2283         reply = ovs_vport_cmd_alloc_info();
2284         if (!reply)
2285                 return -ENOMEM;
2286
2287         ovs_lock();
2288         vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2289         err = PTR_ERR(vport);
2290         if (IS_ERR(vport))
2291                 goto exit_unlock_free;
2292
2293         if (a[OVS_VPORT_ATTR_TYPE] &&
2294             nla_get_u32(a[OVS_VPORT_ATTR_TYPE]) != vport->ops->type) {
2295                 err = -EINVAL;
2296                 goto exit_unlock_free;
2297         }
2298
2299         if (a[OVS_VPORT_ATTR_OPTIONS]) {
2300                 err = ovs_vport_set_options(vport, a[OVS_VPORT_ATTR_OPTIONS]);
2301                 if (err)
2302                         goto exit_unlock_free;
2303         }
2304
2305
2306         if (a[OVS_VPORT_ATTR_UPCALL_PID]) {
2307                 struct nlattr *ids = a[OVS_VPORT_ATTR_UPCALL_PID];
2308
2309                 err = ovs_vport_set_upcall_portids(vport, ids);
2310                 if (err)
2311                         goto exit_unlock_free;
2312         }
2313
2314         err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2315                                       info->snd_portid, info->snd_seq, 0,
2316                                       OVS_VPORT_CMD_SET, GFP_KERNEL);
2317         BUG_ON(err < 0);
2318
2319         ovs_unlock();
2320         ovs_notify(&dp_vport_genl_family, reply, info);
2321         return 0;
2322
2323 exit_unlock_free:
2324         ovs_unlock();
2325         kfree_skb(reply);
2326         return err;
2327 }
2328
2329 static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info)
2330 {
2331         bool update_headroom = false;
2332         struct nlattr **a = info->attrs;
2333         struct sk_buff *reply;
2334         struct datapath *dp;
2335         struct vport *vport;
2336         unsigned int new_headroom;
2337         int err;
2338
2339         reply = ovs_vport_cmd_alloc_info();
2340         if (!reply)
2341                 return -ENOMEM;
2342
2343         ovs_lock();
2344         vport = lookup_vport(sock_net(skb->sk), info->userhdr, a);
2345         err = PTR_ERR(vport);
2346         if (IS_ERR(vport))
2347                 goto exit_unlock_free;
2348
2349         if (vport->port_no == OVSP_LOCAL) {
2350                 err = -EINVAL;
2351                 goto exit_unlock_free;
2352         }
2353
2354         err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2355                                       info->snd_portid, info->snd_seq, 0,
2356                                       OVS_VPORT_CMD_DEL, GFP_KERNEL);
2357         BUG_ON(err < 0);
2358
2359         /* the vport deletion may trigger dp headroom update */
2360         dp = vport->dp;
2361         if (netdev_get_fwd_headroom(vport->dev) == dp->max_headroom)
2362                 update_headroom = true;
2363
2364         netdev_reset_rx_headroom(vport->dev);
2365         ovs_dp_detach_port(vport);
2366
2367         if (update_headroom) {
2368                 new_headroom = ovs_get_max_headroom(dp);
2369
2370                 if (new_headroom < dp->max_headroom)
2371                         ovs_update_headroom(dp, new_headroom);
2372         }
2373         ovs_unlock();
2374
2375         ovs_notify(&dp_vport_genl_family, reply, info);
2376         return 0;
2377
2378 exit_unlock_free:
2379         ovs_unlock();
2380         kfree_skb(reply);
2381         return err;
2382 }
2383
2384 static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info)
2385 {
2386         struct nlattr **a = info->attrs;
2387         struct ovs_header *ovs_header = info->userhdr;
2388         struct sk_buff *reply;
2389         struct vport *vport;
2390         int err;
2391
2392         reply = ovs_vport_cmd_alloc_info();
2393         if (!reply)
2394                 return -ENOMEM;
2395
2396         rcu_read_lock();
2397         vport = lookup_vport(sock_net(skb->sk), ovs_header, a);
2398         err = PTR_ERR(vport);
2399         if (IS_ERR(vport))
2400                 goto exit_unlock_free;
2401         err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
2402                                       info->snd_portid, info->snd_seq, 0,
2403                                       OVS_VPORT_CMD_GET, GFP_ATOMIC);
2404         BUG_ON(err < 0);
2405         rcu_read_unlock();
2406
2407         return genlmsg_reply(reply, info);
2408
2409 exit_unlock_free:
2410         rcu_read_unlock();
2411         kfree_skb(reply);
2412         return err;
2413 }
2414
2415 static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
2416 {
2417         struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
2418         struct datapath *dp;
2419         int bucket = cb->args[0], skip = cb->args[1];
2420         int i, j = 0;
2421
2422         rcu_read_lock();
2423         dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
2424         if (!dp) {
2425                 rcu_read_unlock();
2426                 return -ENODEV;
2427         }
2428         for (i = bucket; i < DP_VPORT_HASH_BUCKETS; i++) {
2429                 struct vport *vport;
2430
2431                 j = 0;
2432                 hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) {
2433                         if (j >= skip &&
2434                             ovs_vport_cmd_fill_info(vport, skb,
2435                                                     sock_net(skb->sk),
2436                                                     NETLINK_CB(cb->skb).portid,
2437                                                     cb->nlh->nlmsg_seq,
2438                                                     NLM_F_MULTI,
2439                                                     OVS_VPORT_CMD_GET,
2440                                                     GFP_ATOMIC) < 0)
2441                                 goto out;
2442
2443                         j++;
2444                 }
2445                 skip = 0;
2446         }
2447 out:
2448         rcu_read_unlock();
2449
2450         cb->args[0] = i;
2451         cb->args[1] = j;
2452
2453         return skb->len;
2454 }
2455
2456 static void ovs_dp_masks_rebalance(struct work_struct *work)
2457 {
2458         struct ovs_net *ovs_net = container_of(work, struct ovs_net,
2459                                                masks_rebalance.work);
2460         struct datapath *dp;
2461
2462         ovs_lock();
2463
2464         list_for_each_entry(dp, &ovs_net->dps, list_node)
2465                 ovs_flow_masks_rebalance(&dp->table);
2466
2467         ovs_unlock();
2468
2469         schedule_delayed_work(&ovs_net->masks_rebalance,
2470                               msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
2471 }
2472
2473 static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = {
2474         [OVS_VPORT_ATTR_NAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ - 1 },
2475         [OVS_VPORT_ATTR_STATS] = { .len = sizeof(struct ovs_vport_stats) },
2476         [OVS_VPORT_ATTR_PORT_NO] = { .type = NLA_U32 },
2477         [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 },
2478         [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_UNSPEC },
2479         [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED },
2480         [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 },
2481         [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 },
2482 };
2483
2484 static const struct genl_small_ops dp_vport_genl_ops[] = {
2485         { .cmd = OVS_VPORT_CMD_NEW,
2486           .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2487           .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2488           .doit = ovs_vport_cmd_new
2489         },
2490         { .cmd = OVS_VPORT_CMD_DEL,
2491           .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2492           .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2493           .doit = ovs_vport_cmd_del
2494         },
2495         { .cmd = OVS_VPORT_CMD_GET,
2496           .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2497           .flags = 0,               /* OK for unprivileged users. */
2498           .doit = ovs_vport_cmd_get,
2499           .dumpit = ovs_vport_cmd_dump
2500         },
2501         { .cmd = OVS_VPORT_CMD_SET,
2502           .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
2503           .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN privilege. */
2504           .doit = ovs_vport_cmd_set,
2505         },
2506 };
2507
2508 struct genl_family dp_vport_genl_family __ro_after_init = {
2509         .hdrsize = sizeof(struct ovs_header),
2510         .name = OVS_VPORT_FAMILY,
2511         .version = OVS_VPORT_VERSION,
2512         .maxattr = OVS_VPORT_ATTR_MAX,
2513         .policy = vport_policy,
2514         .netnsok = true,
2515         .parallel_ops = true,
2516         .small_ops = dp_vport_genl_ops,
2517         .n_small_ops = ARRAY_SIZE(dp_vport_genl_ops),
2518         .mcgrps = &ovs_dp_vport_multicast_group,
2519         .n_mcgrps = 1,
2520         .module = THIS_MODULE,
2521 };
2522
2523 static struct genl_family * const dp_genl_families[] = {
2524         &dp_datapath_genl_family,
2525         &dp_vport_genl_family,
2526         &dp_flow_genl_family,
2527         &dp_packet_genl_family,
2528         &dp_meter_genl_family,
2529 #if     IS_ENABLED(CONFIG_NETFILTER_CONNCOUNT)
2530         &dp_ct_limit_genl_family,
2531 #endif
2532 };
2533
2534 static void dp_unregister_genl(int n_families)
2535 {
2536         int i;
2537
2538         for (i = 0; i < n_families; i++)
2539                 genl_unregister_family(dp_genl_families[i]);
2540 }
2541
2542 static int __init dp_register_genl(void)
2543 {
2544         int err;
2545         int i;
2546
2547         for (i = 0; i < ARRAY_SIZE(dp_genl_families); i++) {
2548
2549                 err = genl_register_family(dp_genl_families[i]);
2550                 if (err)
2551                         goto error;
2552         }
2553
2554         return 0;
2555
2556 error:
2557         dp_unregister_genl(i);
2558         return err;
2559 }
2560
2561 static int __net_init ovs_init_net(struct net *net)
2562 {
2563         struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2564         int err;
2565
2566         INIT_LIST_HEAD(&ovs_net->dps);
2567         INIT_WORK(&ovs_net->dp_notify_work, ovs_dp_notify_wq);
2568         INIT_DELAYED_WORK(&ovs_net->masks_rebalance, ovs_dp_masks_rebalance);
2569
2570         err = ovs_ct_init(net);
2571         if (err)
2572                 return err;
2573
2574         schedule_delayed_work(&ovs_net->masks_rebalance,
2575                               msecs_to_jiffies(DP_MASKS_REBALANCE_INTERVAL));
2576         return 0;
2577 }
2578
2579 static void __net_exit list_vports_from_net(struct net *net, struct net *dnet,
2580                                             struct list_head *head)
2581 {
2582         struct ovs_net *ovs_net = net_generic(net, ovs_net_id);
2583         struct datapath *dp;
2584
2585         list_for_each_entry(dp, &ovs_net->dps, list_node) {
2586                 int i;
2587
2588                 for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) {
2589                         struct vport *vport;
2590
2591                         hlist_for_each_entry(vport, &dp->ports[i], dp_hash_node) {
2592                                 if (vport->ops->type != OVS_VPORT_TYPE_INTERNAL)
2593                                         continue;
2594
2595                                 if (dev_net(vport->dev) == dnet)
2596                                         list_add(&vport->detach_list, head);
2597                         }
2598                 }
2599         }
2600 }
2601
2602 static void __net_exit ovs_exit_net(struct net *dnet)
2603 {
2604         struct datapath *dp, *dp_next;
2605         struct ovs_net *ovs_net = net_generic(dnet, ovs_net_id);
2606         struct vport *vport, *vport_next;
2607         struct net *net;
2608         LIST_HEAD(head);
2609
2610         ovs_lock();
2611
2612         ovs_ct_exit(dnet);
2613
2614         list_for_each_entry_safe(dp, dp_next, &ovs_net->dps, list_node)
2615                 __dp_destroy(dp);
2616
2617         down_read(&net_rwsem);
2618         for_each_net(net)
2619                 list_vports_from_net(net, dnet, &head);
2620         up_read(&net_rwsem);
2621
2622         /* Detach all vports from given namespace. */
2623         list_for_each_entry_safe(vport, vport_next, &head, detach_list) {
2624                 list_del(&vport->detach_list);
2625                 ovs_dp_detach_port(vport);
2626         }
2627
2628         ovs_unlock();
2629
2630         cancel_delayed_work_sync(&ovs_net->masks_rebalance);
2631         cancel_work_sync(&ovs_net->dp_notify_work);
2632 }
2633
2634 static struct pernet_operations ovs_net_ops = {
2635         .init = ovs_init_net,
2636         .exit = ovs_exit_net,
2637         .id   = &ovs_net_id,
2638         .size = sizeof(struct ovs_net),
2639 };
2640
2641 static int __init dp_init(void)
2642 {
2643         int err;
2644
2645         BUILD_BUG_ON(sizeof(struct ovs_skb_cb) >
2646                      sizeof_field(struct sk_buff, cb));
2647
2648         pr_info("Open vSwitch switching datapath\n");
2649
2650         err = action_fifos_init();
2651         if (err)
2652                 goto error;
2653
2654         err = ovs_internal_dev_rtnl_link_register();
2655         if (err)
2656                 goto error_action_fifos_exit;
2657
2658         err = ovs_flow_init();
2659         if (err)
2660                 goto error_unreg_rtnl_link;
2661
2662         err = ovs_vport_init();
2663         if (err)
2664                 goto error_flow_exit;
2665
2666         err = register_pernet_device(&ovs_net_ops);
2667         if (err)
2668                 goto error_vport_exit;
2669
2670         err = register_netdevice_notifier(&ovs_dp_device_notifier);
2671         if (err)
2672                 goto error_netns_exit;
2673
2674         err = ovs_netdev_init();
2675         if (err)
2676                 goto error_unreg_notifier;
2677
2678         err = dp_register_genl();
2679         if (err < 0)
2680                 goto error_unreg_netdev;
2681
2682         return 0;
2683
2684 error_unreg_netdev:
2685         ovs_netdev_exit();
2686 error_unreg_notifier:
2687         unregister_netdevice_notifier(&ovs_dp_device_notifier);
2688 error_netns_exit:
2689         unregister_pernet_device(&ovs_net_ops);
2690 error_vport_exit:
2691         ovs_vport_exit();
2692 error_flow_exit:
2693         ovs_flow_exit();
2694 error_unreg_rtnl_link:
2695         ovs_internal_dev_rtnl_link_unregister();
2696 error_action_fifos_exit:
2697         action_fifos_exit();
2698 error:
2699         return err;
2700 }
2701
2702 static void dp_cleanup(void)
2703 {
2704         dp_unregister_genl(ARRAY_SIZE(dp_genl_families));
2705         ovs_netdev_exit();
2706         unregister_netdevice_notifier(&ovs_dp_device_notifier);
2707         unregister_pernet_device(&ovs_net_ops);
2708         rcu_barrier();
2709         ovs_vport_exit();
2710         ovs_flow_exit();
2711         ovs_internal_dev_rtnl_link_unregister();
2712         action_fifos_exit();
2713 }
2714
2715 module_init(dp_init);
2716 module_exit(dp_cleanup);
2717
2718 MODULE_DESCRIPTION("Open vSwitch switching datapath");
2719 MODULE_LICENSE("GPL");
2720 MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY);
2721 MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY);
2722 MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY);
2723 MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY);
2724 MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY);
2725 MODULE_ALIAS_GENL_FAMILY(OVS_CT_LIMIT_FAMILY);