Merge tag 'asoc-fix-v5.13-rc3' of https://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6-microblaze.git] / drivers / net / ethernet / mellanox / mlx5 / core / en / tc_tun_encap.c
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
3
4 #include <net/fib_notifier.h>
5 #include "tc_tun_encap.h"
6 #include "en_tc.h"
7 #include "tc_tun.h"
8 #include "rep/tc.h"
9 #include "diag/en_tc_tracepoint.h"
10
11 enum {
12         MLX5E_ROUTE_ENTRY_VALID     = BIT(0),
13 };
14
15 struct mlx5e_route_key {
16         int ip_version;
17         union {
18                 __be32 v4;
19                 struct in6_addr v6;
20         } endpoint_ip;
21 };
22
23 struct mlx5e_route_entry {
24         struct mlx5e_route_key key;
25         struct list_head encap_entries;
26         struct list_head decap_flows;
27         u32 flags;
28         struct hlist_node hlist;
29         refcount_t refcnt;
30         int tunnel_dev_index;
31         struct rcu_head rcu;
32 };
33
34 struct mlx5e_tc_tun_encap {
35         struct mlx5e_priv *priv;
36         struct notifier_block fib_nb;
37         spinlock_t route_lock; /* protects route_tbl */
38         unsigned long route_tbl_last_update;
39         DECLARE_HASHTABLE(route_tbl, 8);
40 };
41
42 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
43 {
44         return r->flags & MLX5E_ROUTE_ENTRY_VALID;
45 }
46
47 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
48                              struct mlx5_flow_spec *spec)
49 {
50         struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
51         struct mlx5_rx_tun_attr *tun_attr;
52         void *daddr, *saddr;
53         u8 ip_version;
54
55         tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
56         if (!tun_attr)
57                 return -ENOMEM;
58
59         esw_attr->rx_tun_attr = tun_attr;
60         ip_version = mlx5e_tc_get_ip_version(spec, true);
61
62         if (ip_version == 4) {
63                 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
64                                      outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
65                 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
66                                      outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
67                 tun_attr->dst_ip.v4 = *(__be32 *)daddr;
68                 tun_attr->src_ip.v4 = *(__be32 *)saddr;
69                 if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
70                         return 0;
71         }
72 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
73         else if (ip_version == 6) {
74                 int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
75                 struct in6_addr zerov6 = {};
76
77                 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
78                                      outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
79                 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
80                                      outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
81                 memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
82                 memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
83                 if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) ||
84                     !memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6)))
85                         return 0;
86         }
87 #endif
88         /* Only set the flag if both src and dst ip addresses exist. They are
89          * required to establish routing.
90          */
91         flow_flag_set(flow, TUN_RX);
92         flow->attr->tun_ip_version = ip_version;
93         return 0;
94 }
95
96 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
97 {
98         bool all_flow_encaps_valid = true;
99         int i;
100
101         /* Flow can be associated with multiple encap entries.
102          * Before offloading the flow verify that all of them have
103          * a valid neighbour.
104          */
105         for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
106                 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
107                         continue;
108                 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
109                         all_flow_encaps_valid = false;
110                         break;
111                 }
112         }
113
114         return all_flow_encaps_valid;
115 }
116
117 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
118                               struct mlx5e_encap_entry *e,
119                               struct list_head *flow_list)
120 {
121         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
122         struct mlx5_esw_flow_attr *esw_attr;
123         struct mlx5_flow_handle *rule;
124         struct mlx5_flow_attr *attr;
125         struct mlx5_flow_spec *spec;
126         struct mlx5e_tc_flow *flow;
127         int err;
128
129         if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
130                 return;
131
132         e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
133                                                      e->reformat_type,
134                                                      e->encap_size, e->encap_header,
135                                                      MLX5_FLOW_NAMESPACE_FDB);
136         if (IS_ERR(e->pkt_reformat)) {
137                 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
138                                PTR_ERR(e->pkt_reformat));
139                 return;
140         }
141         e->flags |= MLX5_ENCAP_ENTRY_VALID;
142         mlx5e_rep_queue_neigh_stats_work(priv);
143
144         list_for_each_entry(flow, flow_list, tmp_list) {
145                 if (!mlx5e_is_offloaded_flow(flow))
146                         continue;
147                 attr = flow->attr;
148                 esw_attr = attr->esw_attr;
149                 spec = &attr->parse_attr->spec;
150
151                 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
152                 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
153
154                 /* Do not offload flows with unresolved neighbors */
155                 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
156                         continue;
157                 /* update from slow path rule to encap rule */
158                 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
159                 if (IS_ERR(rule)) {
160                         err = PTR_ERR(rule);
161                         mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
162                                        err);
163                         continue;
164                 }
165
166                 mlx5e_tc_unoffload_from_slow_path(esw, flow);
167                 flow->rule[0] = rule;
168                 /* was unset when slow path rule removed */
169                 flow_flag_set(flow, OFFLOADED);
170         }
171 }
172
173 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
174                               struct mlx5e_encap_entry *e,
175                               struct list_head *flow_list)
176 {
177         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
178         struct mlx5_esw_flow_attr *esw_attr;
179         struct mlx5_flow_handle *rule;
180         struct mlx5_flow_attr *attr;
181         struct mlx5_flow_spec *spec;
182         struct mlx5e_tc_flow *flow;
183         int err;
184
185         list_for_each_entry(flow, flow_list, tmp_list) {
186                 if (!mlx5e_is_offloaded_flow(flow))
187                         continue;
188                 attr = flow->attr;
189                 esw_attr = attr->esw_attr;
190                 spec = &attr->parse_attr->spec;
191
192                 /* update from encap rule to slow path rule */
193                 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
194                 /* mark the flow's encap dest as non-valid */
195                 esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
196
197                 if (IS_ERR(rule)) {
198                         err = PTR_ERR(rule);
199                         mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
200                                        err);
201                         continue;
202                 }
203
204                 mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
205                 flow->rule[0] = rule;
206                 /* was unset when fast path rule removed */
207                 flow_flag_set(flow, OFFLOADED);
208         }
209
210         /* we know that the encap is valid */
211         e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
212         mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
213 }
214
215 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
216                                 struct list_head *flow_list,
217                                 int index)
218 {
219         if (IS_ERR(mlx5e_flow_get(flow)))
220                 return;
221         wait_for_completion(&flow->init_done);
222
223         flow->tmp_entry_index = index;
224         list_add(&flow->tmp_list, flow_list);
225 }
226
227 /* Takes reference to all flows attached to encap and adds the flows to
228  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
229  */
230 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
231 {
232         struct encap_flow_item *efi;
233         struct mlx5e_tc_flow *flow;
234
235         list_for_each_entry(efi, &e->flows, list) {
236                 flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
237                 mlx5e_take_tmp_flow(flow, flow_list, efi->index);
238         }
239 }
240
241 /* Takes reference to all flows attached to route and adds the flows to
242  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
243  */
244 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
245                                              struct list_head *flow_list)
246 {
247         struct mlx5e_tc_flow *flow;
248
249         list_for_each_entry(flow, &r->decap_flows, decap_routes)
250                 mlx5e_take_tmp_flow(flow, flow_list, 0);
251 }
252
253 static struct mlx5e_encap_entry *
254 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
255                            struct mlx5e_encap_entry *e)
256 {
257         struct mlx5e_encap_entry *next = NULL;
258
259 retry:
260         rcu_read_lock();
261
262         /* find encap with non-zero reference counter value */
263         for (next = e ?
264                      list_next_or_null_rcu(&nhe->encap_list,
265                                            &e->encap_list,
266                                            struct mlx5e_encap_entry,
267                                            encap_list) :
268                      list_first_or_null_rcu(&nhe->encap_list,
269                                             struct mlx5e_encap_entry,
270                                             encap_list);
271              next;
272              next = list_next_or_null_rcu(&nhe->encap_list,
273                                           &next->encap_list,
274                                           struct mlx5e_encap_entry,
275                                           encap_list))
276                 if (mlx5e_encap_take(next))
277                         break;
278
279         rcu_read_unlock();
280
281         /* release starting encap */
282         if (e)
283                 mlx5e_encap_put(netdev_priv(e->out_dev), e);
284         if (!next)
285                 return next;
286
287         /* wait for encap to be fully initialized */
288         wait_for_completion(&next->res_ready);
289         /* continue searching if encap entry is not in valid state after completion */
290         if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
291                 e = next;
292                 goto retry;
293         }
294
295         return next;
296 }
297
298 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
299 {
300         struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
301         struct mlx5e_encap_entry *e = NULL;
302         struct mlx5e_tc_flow *flow;
303         struct mlx5_fc *counter;
304         struct neigh_table *tbl;
305         bool neigh_used = false;
306         struct neighbour *n;
307         u64 lastuse;
308
309         if (m_neigh->family == AF_INET)
310                 tbl = &arp_tbl;
311 #if IS_ENABLED(CONFIG_IPV6)
312         else if (m_neigh->family == AF_INET6)
313                 tbl = ipv6_stub->nd_tbl;
314 #endif
315         else
316                 return;
317
318         /* mlx5e_get_next_valid_encap() releases previous encap before returning
319          * next one.
320          */
321         while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
322                 struct mlx5e_priv *priv = netdev_priv(e->out_dev);
323                 struct encap_flow_item *efi, *tmp;
324                 struct mlx5_eswitch *esw;
325                 LIST_HEAD(flow_list);
326
327                 esw = priv->mdev->priv.eswitch;
328                 mutex_lock(&esw->offloads.encap_tbl_lock);
329                 list_for_each_entry_safe(efi, tmp, &e->flows, list) {
330                         flow = container_of(efi, struct mlx5e_tc_flow,
331                                             encaps[efi->index]);
332                         if (IS_ERR(mlx5e_flow_get(flow)))
333                                 continue;
334                         list_add(&flow->tmp_list, &flow_list);
335
336                         if (mlx5e_is_offloaded_flow(flow)) {
337                                 counter = mlx5e_tc_get_counter(flow);
338                                 lastuse = mlx5_fc_query_lastuse(counter);
339                                 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
340                                         neigh_used = true;
341                                         break;
342                                 }
343                         }
344                 }
345                 mutex_unlock(&esw->offloads.encap_tbl_lock);
346
347                 mlx5e_put_flow_list(priv, &flow_list);
348                 if (neigh_used) {
349                         /* release current encap before breaking the loop */
350                         mlx5e_encap_put(priv, e);
351                         break;
352                 }
353         }
354
355         trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
356
357         if (neigh_used) {
358                 nhe->reported_lastuse = jiffies;
359
360                 /* find the relevant neigh according to the cached device and
361                  * dst ip pair
362                  */
363                 n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
364                 if (!n)
365                         return;
366
367                 neigh_event_send(n, NULL);
368                 neigh_release(n);
369         }
370 }
371
372 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
373 {
374         WARN_ON(!list_empty(&e->flows));
375
376         if (e->compl_result > 0) {
377                 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
378
379                 if (e->flags & MLX5_ENCAP_ENTRY_VALID)
380                         mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
381         }
382
383         kfree(e->tun_info);
384         kfree(e->encap_header);
385         kfree_rcu(e, rcu);
386 }
387
388 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
389                                 struct mlx5e_decap_entry *d)
390 {
391         WARN_ON(!list_empty(&d->flows));
392
393         if (!d->compl_result)
394                 mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
395
396         kfree_rcu(d, rcu);
397 }
398
399 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
400 {
401         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
402
403         if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
404                 return;
405         list_del(&e->route_list);
406         hash_del_rcu(&e->encap_hlist);
407         mutex_unlock(&esw->offloads.encap_tbl_lock);
408
409         mlx5e_encap_dealloc(priv, e);
410 }
411
412 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
413 {
414         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
415
416         if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
417                 return;
418         hash_del_rcu(&d->hlist);
419         mutex_unlock(&esw->offloads.decap_tbl_lock);
420
421         mlx5e_decap_dealloc(priv, d);
422 }
423
424 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
425                                      struct mlx5e_tc_flow *flow,
426                                      int out_index);
427
428 void mlx5e_detach_encap(struct mlx5e_priv *priv,
429                         struct mlx5e_tc_flow *flow, int out_index)
430 {
431         struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
432         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
433
434         if (flow->attr->esw_attr->dests[out_index].flags &
435             MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
436                 mlx5e_detach_encap_route(priv, flow, out_index);
437
438         /* flow wasn't fully initialized */
439         if (!e)
440                 return;
441
442         mutex_lock(&esw->offloads.encap_tbl_lock);
443         list_del(&flow->encaps[out_index].list);
444         flow->encaps[out_index].e = NULL;
445         if (!refcount_dec_and_test(&e->refcnt)) {
446                 mutex_unlock(&esw->offloads.encap_tbl_lock);
447                 return;
448         }
449         list_del(&e->route_list);
450         hash_del_rcu(&e->encap_hlist);
451         mutex_unlock(&esw->offloads.encap_tbl_lock);
452
453         mlx5e_encap_dealloc(priv, e);
454 }
455
456 void mlx5e_detach_decap(struct mlx5e_priv *priv,
457                         struct mlx5e_tc_flow *flow)
458 {
459         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
460         struct mlx5e_decap_entry *d = flow->decap_reformat;
461
462         if (!d)
463                 return;
464
465         mutex_lock(&esw->offloads.decap_tbl_lock);
466         list_del(&flow->l3_to_l2_reformat);
467         flow->decap_reformat = NULL;
468
469         if (!refcount_dec_and_test(&d->refcnt)) {
470                 mutex_unlock(&esw->offloads.decap_tbl_lock);
471                 return;
472         }
473         hash_del_rcu(&d->hlist);
474         mutex_unlock(&esw->offloads.decap_tbl_lock);
475
476         mlx5e_decap_dealloc(priv, d);
477 }
478
479 bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
480                                            struct mlx5e_encap_key *b)
481 {
482         return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
483                 a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
484 }
485
486 static int cmp_decap_info(struct mlx5e_decap_key *a,
487                           struct mlx5e_decap_key *b)
488 {
489         return memcmp(&a->key, &b->key, sizeof(b->key));
490 }
491
492 static int hash_encap_info(struct mlx5e_encap_key *key)
493 {
494         return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
495                      key->tc_tunnel->tunnel_type);
496 }
497
498 static int hash_decap_info(struct mlx5e_decap_key *key)
499 {
500         return jhash(&key->key, sizeof(key->key), 0);
501 }
502
503 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
504 {
505         return refcount_inc_not_zero(&e->refcnt);
506 }
507
508 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
509 {
510         return refcount_inc_not_zero(&e->refcnt);
511 }
512
513 static struct mlx5e_encap_entry *
514 mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
515                 uintptr_t hash_key)
516 {
517         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
518         struct mlx5e_encap_key e_key;
519         struct mlx5e_encap_entry *e;
520
521         hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
522                                    encap_hlist, hash_key) {
523                 e_key.ip_tun_key = &e->tun_info->key;
524                 e_key.tc_tunnel = e->tunnel;
525                 if (e->tunnel->encap_info_equal(&e_key, key) &&
526                     mlx5e_encap_take(e))
527                         return e;
528         }
529
530         return NULL;
531 }
532
533 static struct mlx5e_decap_entry *
534 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
535                 uintptr_t hash_key)
536 {
537         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
538         struct mlx5e_decap_key r_key;
539         struct mlx5e_decap_entry *e;
540
541         hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
542                                    hlist, hash_key) {
543                 r_key = e->key;
544                 if (!cmp_decap_info(&r_key, key) &&
545                     mlx5e_decap_take(e))
546                         return e;
547         }
548         return NULL;
549 }
550
551 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
552 {
553         size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
554
555         return kmemdup(tun_info, tun_size, GFP_KERNEL);
556 }
557
558 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
559                                       struct mlx5e_tc_flow *flow,
560                                       int out_index,
561                                       struct mlx5e_encap_entry *e,
562                                       struct netlink_ext_ack *extack)
563 {
564         int i;
565
566         for (i = 0; i < out_index; i++) {
567                 if (flow->encaps[i].e != e)
568                         continue;
569                 NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
570                 netdev_err(priv->netdev, "can't duplicate encap action\n");
571                 return true;
572         }
573
574         return false;
575 }
576
577 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
578                                struct mlx5_flow_attr *attr,
579                                struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
580                                struct net_device *out_dev,
581                                int route_dev_ifindex,
582                                int out_index)
583 {
584         struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
585         struct net_device *route_dev;
586         u16 vport_num;
587         int err = 0;
588         u32 data;
589
590         route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
591
592         if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
593             !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
594                 goto out;
595
596         err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
597         if (err)
598                 goto out;
599
600         attr->dest_chain = 0;
601         attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
602         esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
603         data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
604                                                        vport_num);
605         err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
606                                                    MLX5_FLOW_NAMESPACE_FDB,
607                                                    VPORT_TO_REG, data);
608         if (err >= 0) {
609                 esw_attr->dests[out_index].src_port_rewrite_act_id = err;
610                 err = 0;
611         }
612
613 out:
614         if (route_dev)
615                 dev_put(route_dev);
616         return err;
617 }
618
619 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
620                                   struct mlx5_esw_flow_attr *attr,
621                                   struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
622                                   struct net_device *out_dev,
623                                   int route_dev_ifindex,
624                                   int out_index)
625 {
626         int act_id = attr->dests[out_index].src_port_rewrite_act_id;
627         struct net_device *route_dev;
628         u16 vport_num;
629         int err = 0;
630         u32 data;
631
632         route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
633
634         if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
635             !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
636                 err = -ENODEV;
637                 goto out;
638         }
639
640         err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
641         if (err)
642                 goto out;
643
644         data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
645                                                        vport_num);
646         mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
647
648 out:
649         if (route_dev)
650                 dev_put(route_dev);
651         return err;
652 }
653
654 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
655 {
656         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
657         struct mlx5_rep_uplink_priv *uplink_priv;
658         struct mlx5e_rep_priv *uplink_rpriv;
659         struct mlx5e_tc_tun_encap *encap;
660         unsigned int ret;
661
662         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
663         uplink_priv = &uplink_rpriv->uplink_priv;
664         encap = uplink_priv->encap;
665
666         spin_lock_bh(&encap->route_lock);
667         ret = encap->route_tbl_last_update;
668         spin_unlock_bh(&encap->route_lock);
669         return ret;
670 }
671
672 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
673                                     struct mlx5e_tc_flow *flow,
674                                     struct mlx5e_encap_entry *e,
675                                     bool new_encap_entry,
676                                     unsigned long tbl_time_before,
677                                     int out_index);
678
679 int mlx5e_attach_encap(struct mlx5e_priv *priv,
680                        struct mlx5e_tc_flow *flow,
681                        struct net_device *mirred_dev,
682                        int out_index,
683                        struct netlink_ext_ack *extack,
684                        struct net_device **encap_dev,
685                        bool *encap_valid)
686 {
687         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
688         struct mlx5e_tc_flow_parse_attr *parse_attr;
689         struct mlx5_flow_attr *attr = flow->attr;
690         const struct ip_tunnel_info *tun_info;
691         unsigned long tbl_time_before = 0;
692         struct mlx5e_encap_entry *e;
693         struct mlx5e_encap_key key;
694         bool entry_created = false;
695         unsigned short family;
696         uintptr_t hash_key;
697         int err = 0;
698
699         parse_attr = attr->parse_attr;
700         tun_info = parse_attr->tun_info[out_index];
701         family = ip_tunnel_info_af(tun_info);
702         key.ip_tun_key = &tun_info->key;
703         key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
704         if (!key.tc_tunnel) {
705                 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
706                 return -EOPNOTSUPP;
707         }
708
709         hash_key = hash_encap_info(&key);
710
711         mutex_lock(&esw->offloads.encap_tbl_lock);
712         e = mlx5e_encap_get(priv, &key, hash_key);
713
714         /* must verify if encap is valid or not */
715         if (e) {
716                 /* Check that entry was not already attached to this flow */
717                 if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
718                         err = -EOPNOTSUPP;
719                         goto out_err;
720                 }
721
722                 mutex_unlock(&esw->offloads.encap_tbl_lock);
723                 wait_for_completion(&e->res_ready);
724
725                 /* Protect against concurrent neigh update. */
726                 mutex_lock(&esw->offloads.encap_tbl_lock);
727                 if (e->compl_result < 0) {
728                         err = -EREMOTEIO;
729                         goto out_err;
730                 }
731                 goto attach_flow;
732         }
733
734         e = kzalloc(sizeof(*e), GFP_KERNEL);
735         if (!e) {
736                 err = -ENOMEM;
737                 goto out_err;
738         }
739
740         refcount_set(&e->refcnt, 1);
741         init_completion(&e->res_ready);
742         entry_created = true;
743         INIT_LIST_HEAD(&e->route_list);
744
745         tun_info = mlx5e_dup_tun_info(tun_info);
746         if (!tun_info) {
747                 err = -ENOMEM;
748                 goto out_err_init;
749         }
750         e->tun_info = tun_info;
751         err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
752         if (err)
753                 goto out_err_init;
754
755         INIT_LIST_HEAD(&e->flows);
756         hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
757         tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
758         mutex_unlock(&esw->offloads.encap_tbl_lock);
759
760         if (family == AF_INET)
761                 err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
762         else if (family == AF_INET6)
763                 err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
764
765         /* Protect against concurrent neigh update. */
766         mutex_lock(&esw->offloads.encap_tbl_lock);
767         complete_all(&e->res_ready);
768         if (err) {
769                 e->compl_result = err;
770                 goto out_err;
771         }
772         e->compl_result = 1;
773
774 attach_flow:
775         err = mlx5e_attach_encap_route(priv, flow, e, entry_created, tbl_time_before,
776                                        out_index);
777         if (err)
778                 goto out_err;
779
780         flow->encaps[out_index].e = e;
781         list_add(&flow->encaps[out_index].list, &e->flows);
782         flow->encaps[out_index].index = out_index;
783         *encap_dev = e->out_dev;
784         if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
785                 attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
786                 attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
787                 *encap_valid = true;
788         } else {
789                 *encap_valid = false;
790         }
791         mutex_unlock(&esw->offloads.encap_tbl_lock);
792
793         return err;
794
795 out_err:
796         mutex_unlock(&esw->offloads.encap_tbl_lock);
797         if (e)
798                 mlx5e_encap_put(priv, e);
799         return err;
800
801 out_err_init:
802         mutex_unlock(&esw->offloads.encap_tbl_lock);
803         kfree(tun_info);
804         kfree(e);
805         return err;
806 }
807
808 int mlx5e_attach_decap(struct mlx5e_priv *priv,
809                        struct mlx5e_tc_flow *flow,
810                        struct netlink_ext_ack *extack)
811 {
812         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
813         struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
814         struct mlx5e_tc_flow_parse_attr *parse_attr;
815         struct mlx5e_decap_entry *d;
816         struct mlx5e_decap_key key;
817         uintptr_t hash_key;
818         int err = 0;
819
820         parse_attr = flow->attr->parse_attr;
821         if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
822                 NL_SET_ERR_MSG_MOD(extack,
823                                    "encap header larger than max supported");
824                 return -EOPNOTSUPP;
825         }
826
827         key.key = parse_attr->eth;
828         hash_key = hash_decap_info(&key);
829         mutex_lock(&esw->offloads.decap_tbl_lock);
830         d = mlx5e_decap_get(priv, &key, hash_key);
831         if (d) {
832                 mutex_unlock(&esw->offloads.decap_tbl_lock);
833                 wait_for_completion(&d->res_ready);
834                 mutex_lock(&esw->offloads.decap_tbl_lock);
835                 if (d->compl_result) {
836                         err = -EREMOTEIO;
837                         goto out_free;
838                 }
839                 goto found;
840         }
841
842         d = kzalloc(sizeof(*d), GFP_KERNEL);
843         if (!d) {
844                 err = -ENOMEM;
845                 goto out_err;
846         }
847
848         d->key = key;
849         refcount_set(&d->refcnt, 1);
850         init_completion(&d->res_ready);
851         INIT_LIST_HEAD(&d->flows);
852         hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
853         mutex_unlock(&esw->offloads.decap_tbl_lock);
854
855         d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
856                                                      MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2,
857                                                      sizeof(parse_attr->eth),
858                                                      &parse_attr->eth,
859                                                      MLX5_FLOW_NAMESPACE_FDB);
860         if (IS_ERR(d->pkt_reformat)) {
861                 err = PTR_ERR(d->pkt_reformat);
862                 d->compl_result = err;
863         }
864         mutex_lock(&esw->offloads.decap_tbl_lock);
865         complete_all(&d->res_ready);
866         if (err)
867                 goto out_free;
868
869 found:
870         flow->decap_reformat = d;
871         attr->decap_pkt_reformat = d->pkt_reformat;
872         list_add(&flow->l3_to_l2_reformat, &d->flows);
873         mutex_unlock(&esw->offloads.decap_tbl_lock);
874         return 0;
875
876 out_free:
877         mutex_unlock(&esw->offloads.decap_tbl_lock);
878         mlx5e_decap_put(priv, d);
879         return err;
880
881 out_err:
882         mutex_unlock(&esw->offloads.decap_tbl_lock);
883         return err;
884 }
885
886 static int cmp_route_info(struct mlx5e_route_key *a,
887                           struct mlx5e_route_key *b)
888 {
889         if (a->ip_version == 4 && b->ip_version == 4)
890                 return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
891                               sizeof(a->endpoint_ip.v4));
892         else if (a->ip_version == 6 && b->ip_version == 6)
893                 return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
894                               sizeof(a->endpoint_ip.v6));
895         return 1;
896 }
897
898 static u32 hash_route_info(struct mlx5e_route_key *key)
899 {
900         if (key->ip_version == 4)
901                 return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
902         return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
903 }
904
905 static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
906                                 struct mlx5e_route_entry *r)
907 {
908         WARN_ON(!list_empty(&r->decap_flows));
909         WARN_ON(!list_empty(&r->encap_entries));
910
911         kfree_rcu(r, rcu);
912 }
913
914 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
915 {
916         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
917
918         if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
919                 return;
920
921         hash_del_rcu(&r->hlist);
922         mutex_unlock(&esw->offloads.encap_tbl_lock);
923
924         mlx5e_route_dealloc(priv, r);
925 }
926
927 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
928 {
929         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
930
931         lockdep_assert_held(&esw->offloads.encap_tbl_lock);
932
933         if (!refcount_dec_and_test(&r->refcnt))
934                 return;
935         hash_del_rcu(&r->hlist);
936         mlx5e_route_dealloc(priv, r);
937 }
938
939 static struct mlx5e_route_entry *
940 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
941                 u32 hash_key)
942 {
943         struct mlx5e_route_key r_key;
944         struct mlx5e_route_entry *r;
945
946         hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
947                 r_key = r->key;
948                 if (!cmp_route_info(&r_key, key) &&
949                     refcount_inc_not_zero(&r->refcnt))
950                         return r;
951         }
952         return NULL;
953 }
954
955 static struct mlx5e_route_entry *
956 mlx5e_route_get_create(struct mlx5e_priv *priv,
957                        struct mlx5e_route_key *key,
958                        int tunnel_dev_index,
959                        unsigned long *route_tbl_change_time)
960 {
961         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
962         struct mlx5_rep_uplink_priv *uplink_priv;
963         struct mlx5e_rep_priv *uplink_rpriv;
964         struct mlx5e_tc_tun_encap *encap;
965         struct mlx5e_route_entry *r;
966         u32 hash_key;
967
968         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
969         uplink_priv = &uplink_rpriv->uplink_priv;
970         encap = uplink_priv->encap;
971
972         hash_key = hash_route_info(key);
973         spin_lock_bh(&encap->route_lock);
974         r = mlx5e_route_get(encap, key, hash_key);
975         spin_unlock_bh(&encap->route_lock);
976         if (r) {
977                 if (!mlx5e_route_entry_valid(r)) {
978                         mlx5e_route_put_locked(priv, r);
979                         return ERR_PTR(-EINVAL);
980                 }
981                 return r;
982         }
983
984         r = kzalloc(sizeof(*r), GFP_KERNEL);
985         if (!r)
986                 return ERR_PTR(-ENOMEM);
987
988         r->key = *key;
989         r->flags |= MLX5E_ROUTE_ENTRY_VALID;
990         r->tunnel_dev_index = tunnel_dev_index;
991         refcount_set(&r->refcnt, 1);
992         INIT_LIST_HEAD(&r->decap_flows);
993         INIT_LIST_HEAD(&r->encap_entries);
994
995         spin_lock_bh(&encap->route_lock);
996         *route_tbl_change_time = encap->route_tbl_last_update;
997         hash_add(encap->route_tbl, &r->hlist, hash_key);
998         spin_unlock_bh(&encap->route_lock);
999
1000         return r;
1001 }
1002
1003 static struct mlx5e_route_entry *
1004 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
1005 {
1006         u32 hash_key = hash_route_info(key);
1007         struct mlx5e_route_entry *r;
1008
1009         spin_lock_bh(&encap->route_lock);
1010         encap->route_tbl_last_update = jiffies;
1011         r = mlx5e_route_get(encap, key, hash_key);
1012         spin_unlock_bh(&encap->route_lock);
1013
1014         return r;
1015 }
1016
1017 struct mlx5e_tc_fib_event_data {
1018         struct work_struct work;
1019         unsigned long event;
1020         struct mlx5e_route_entry *r;
1021         struct net_device *ul_dev;
1022 };
1023
1024 static void mlx5e_tc_fib_event_work(struct work_struct *work);
1025 static struct mlx5e_tc_fib_event_data *
1026 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
1027 {
1028         struct mlx5e_tc_fib_event_data *fib_work;
1029
1030         fib_work = kzalloc(sizeof(*fib_work), flags);
1031         if (WARN_ON(!fib_work))
1032                 return NULL;
1033
1034         INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
1035         fib_work->event = event;
1036         fib_work->ul_dev = ul_dev;
1037
1038         return fib_work;
1039 }
1040
1041 static int
1042 mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
1043                            struct mlx5e_route_entry *r,
1044                            unsigned long event)
1045 {
1046         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1047         struct mlx5e_tc_fib_event_data *fib_work;
1048         struct mlx5e_rep_priv *uplink_rpriv;
1049         struct net_device *ul_dev;
1050
1051         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1052         ul_dev = uplink_rpriv->netdev;
1053
1054         fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
1055         if (!fib_work)
1056                 return -ENOMEM;
1057
1058         dev_hold(ul_dev);
1059         refcount_inc(&r->refcnt);
1060         fib_work->r = r;
1061         queue_work(priv->wq, &fib_work->work);
1062
1063         return 0;
1064 }
1065
1066 int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
1067                              struct mlx5e_tc_flow *flow)
1068 {
1069         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1070         unsigned long tbl_time_before, tbl_time_after;
1071         struct mlx5e_tc_flow_parse_attr *parse_attr;
1072         struct mlx5_flow_attr *attr = flow->attr;
1073         struct mlx5_esw_flow_attr *esw_attr;
1074         struct mlx5e_route_entry *r;
1075         struct mlx5e_route_key key;
1076         int err = 0;
1077
1078         esw_attr = attr->esw_attr;
1079         parse_attr = attr->parse_attr;
1080         mutex_lock(&esw->offloads.encap_tbl_lock);
1081         if (!esw_attr->rx_tun_attr)
1082                 goto out;
1083
1084         tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
1085         tbl_time_after = tbl_time_before;
1086         err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr);
1087         if (err || !esw_attr->rx_tun_attr->decap_vport)
1088                 goto out;
1089
1090         key.ip_version = attr->tun_ip_version;
1091         if (key.ip_version == 4)
1092                 key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
1093         else
1094                 key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
1095
1096         r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
1097                                    &tbl_time_after);
1098         if (IS_ERR(r)) {
1099                 err = PTR_ERR(r);
1100                 goto out;
1101         }
1102         /* Routing changed concurrently. FIB event handler might have missed new
1103          * entry, schedule update.
1104          */
1105         if (tbl_time_before != tbl_time_after) {
1106                 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1107                 if (err) {
1108                         mlx5e_route_put_locked(priv, r);
1109                         goto out;
1110                 }
1111         }
1112
1113         flow->decap_route = r;
1114         list_add(&flow->decap_routes, &r->decap_flows);
1115         mutex_unlock(&esw->offloads.encap_tbl_lock);
1116         return 0;
1117
1118 out:
1119         mutex_unlock(&esw->offloads.encap_tbl_lock);
1120         return err;
1121 }
1122
1123 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
1124                                     struct mlx5e_tc_flow *flow,
1125                                     struct mlx5e_encap_entry *e,
1126                                     bool new_encap_entry,
1127                                     unsigned long tbl_time_before,
1128                                     int out_index)
1129 {
1130         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1131         unsigned long tbl_time_after = tbl_time_before;
1132         struct mlx5e_tc_flow_parse_attr *parse_attr;
1133         struct mlx5_flow_attr *attr = flow->attr;
1134         const struct ip_tunnel_info *tun_info;
1135         struct mlx5_esw_flow_attr *esw_attr;
1136         struct mlx5e_route_entry *r;
1137         struct mlx5e_route_key key;
1138         unsigned short family;
1139         int err = 0;
1140
1141         esw_attr = attr->esw_attr;
1142         parse_attr = attr->parse_attr;
1143         tun_info = parse_attr->tun_info[out_index];
1144         family = ip_tunnel_info_af(tun_info);
1145
1146         if (family == AF_INET) {
1147                 key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
1148                 key.ip_version = 4;
1149         } else if (family == AF_INET6) {
1150                 key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
1151                 key.ip_version = 6;
1152         }
1153
1154         err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
1155                                   e->route_dev_ifindex, out_index);
1156         if (err || !(esw_attr->dests[out_index].flags &
1157                      MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
1158                 return err;
1159
1160         r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
1161                                    &tbl_time_after);
1162         if (IS_ERR(r))
1163                 return PTR_ERR(r);
1164         /* Routing changed concurrently. FIB event handler might have missed new
1165          * entry, schedule update.
1166          */
1167         if (tbl_time_before != tbl_time_after) {
1168                 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1169                 if (err) {
1170                         mlx5e_route_put_locked(priv, r);
1171                         return err;
1172                 }
1173         }
1174
1175         flow->encap_routes[out_index].r = r;
1176         if (new_encap_entry)
1177                 list_add(&e->route_list, &r->encap_entries);
1178         flow->encap_routes[out_index].index = out_index;
1179         return 0;
1180 }
1181
1182 void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
1183                               struct mlx5e_tc_flow *flow)
1184 {
1185         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1186         struct mlx5e_route_entry *r = flow->decap_route;
1187
1188         if (!r)
1189                 return;
1190
1191         mutex_lock(&esw->offloads.encap_tbl_lock);
1192         list_del(&flow->decap_routes);
1193         flow->decap_route = NULL;
1194
1195         if (!refcount_dec_and_test(&r->refcnt)) {
1196                 mutex_unlock(&esw->offloads.encap_tbl_lock);
1197                 return;
1198         }
1199         hash_del_rcu(&r->hlist);
1200         mutex_unlock(&esw->offloads.encap_tbl_lock);
1201
1202         mlx5e_route_dealloc(priv, r);
1203 }
1204
1205 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
1206                                      struct mlx5e_tc_flow *flow,
1207                                      int out_index)
1208 {
1209         struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
1210         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1211         struct mlx5e_encap_entry *e, *tmp;
1212
1213         if (!r)
1214                 return;
1215
1216         mutex_lock(&esw->offloads.encap_tbl_lock);
1217         flow->encap_routes[out_index].r = NULL;
1218
1219         if (!refcount_dec_and_test(&r->refcnt)) {
1220                 mutex_unlock(&esw->offloads.encap_tbl_lock);
1221                 return;
1222         }
1223         list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
1224                 list_del_init(&e->route_list);
1225         hash_del_rcu(&r->hlist);
1226         mutex_unlock(&esw->offloads.encap_tbl_lock);
1227
1228         mlx5e_route_dealloc(priv, r);
1229 }
1230
1231 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
1232                                    struct mlx5e_encap_entry *e,
1233                                    struct list_head *encap_flows)
1234 {
1235         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1236         struct mlx5e_tc_flow *flow;
1237
1238         list_for_each_entry(flow, encap_flows, tmp_list) {
1239                 struct mlx5_flow_attr *attr = flow->attr;
1240                 struct mlx5_esw_flow_attr *esw_attr;
1241
1242                 if (!mlx5e_is_offloaded_flow(flow))
1243                         continue;
1244                 esw_attr = attr->esw_attr;
1245
1246                 if (flow_flag_test(flow, SLOW))
1247                         mlx5e_tc_unoffload_from_slow_path(esw, flow);
1248                 else
1249                         mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1250                 mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
1251                 attr->modify_hdr = NULL;
1252
1253                 esw_attr->dests[flow->tmp_entry_index].flags &=
1254                         ~MLX5_ESW_DEST_ENCAP_VALID;
1255                 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
1256         }
1257
1258         e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
1259         if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1260                 e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1261                 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1262                 e->pkt_reformat = NULL;
1263         }
1264 }
1265
1266 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
1267                                   struct net_device *tunnel_dev,
1268                                   struct mlx5e_encap_entry *e,
1269                                   struct list_head *encap_flows)
1270 {
1271         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1272         struct mlx5e_tc_flow *flow;
1273         int err;
1274
1275         err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
1276                 mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
1277                 mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
1278         if (err)
1279                 mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
1280         e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
1281
1282         list_for_each_entry(flow, encap_flows, tmp_list) {
1283                 struct mlx5e_tc_flow_parse_attr *parse_attr;
1284                 struct mlx5_flow_attr *attr = flow->attr;
1285                 struct mlx5_esw_flow_attr *esw_attr;
1286                 struct mlx5_flow_handle *rule;
1287                 struct mlx5_flow_spec *spec;
1288
1289                 if (flow_flag_test(flow, FAILED))
1290                         continue;
1291
1292                 esw_attr = attr->esw_attr;
1293                 parse_attr = attr->parse_attr;
1294                 spec = &parse_attr->spec;
1295
1296                 err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
1297                                              e->out_dev, e->route_dev_ifindex,
1298                                              flow->tmp_entry_index);
1299                 if (err) {
1300                         mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
1301                         continue;
1302                 }
1303
1304                 err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow);
1305                 if (err) {
1306                         mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
1307                                        err);
1308                         continue;
1309                 }
1310
1311                 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1312                         esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
1313                         esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1314                         if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
1315                                 goto offload_to_slow_path;
1316                         /* update from slow path rule to encap rule */
1317                         rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1318                         if (IS_ERR(rule)) {
1319                                 err = PTR_ERR(rule);
1320                                 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1321                                                err);
1322                         } else {
1323                                 flow->rule[0] = rule;
1324                         }
1325                 } else {
1326 offload_to_slow_path:
1327                         rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1328                         /* mark the flow's encap dest as non-valid */
1329                         esw_attr->dests[flow->tmp_entry_index].flags &=
1330                                 ~MLX5_ESW_DEST_ENCAP_VALID;
1331
1332                         if (IS_ERR(rule)) {
1333                                 err = PTR_ERR(rule);
1334                                 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1335                                                err);
1336                         } else {
1337                                 flow->rule[0] = rule;
1338                         }
1339                 }
1340                 flow_flag_set(flow, OFFLOADED);
1341         }
1342 }
1343
1344 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
1345                                      struct mlx5e_route_entry *r,
1346                                      struct list_head *flow_list,
1347                                      bool replace)
1348 {
1349         struct net_device *tunnel_dev;
1350         struct mlx5e_encap_entry *e;
1351
1352         tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1353         if (!tunnel_dev)
1354                 return -ENODEV;
1355
1356         list_for_each_entry(e, &r->encap_entries, route_list) {
1357                 LIST_HEAD(encap_flows);
1358
1359                 mlx5e_take_all_encap_flows(e, &encap_flows);
1360                 if (list_empty(&encap_flows))
1361                         continue;
1362
1363                 if (mlx5e_route_entry_valid(r))
1364                         mlx5e_invalidate_encap(priv, e, &encap_flows);
1365
1366                 if (!replace) {
1367                         list_splice(&encap_flows, flow_list);
1368                         continue;
1369                 }
1370
1371                 mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
1372                 list_splice(&encap_flows, flow_list);
1373         }
1374
1375         return 0;
1376 }
1377
1378 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
1379                                       struct list_head *flow_list)
1380 {
1381         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1382         struct mlx5e_tc_flow *flow;
1383
1384         list_for_each_entry(flow, flow_list, tmp_list)
1385                 if (mlx5e_is_offloaded_flow(flow))
1386                         mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1387 }
1388
1389 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
1390                                   struct list_head *decap_flows)
1391 {
1392         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1393         struct mlx5e_tc_flow *flow;
1394
1395         list_for_each_entry(flow, decap_flows, tmp_list) {
1396                 struct mlx5e_tc_flow_parse_attr *parse_attr;
1397                 struct mlx5_flow_attr *attr = flow->attr;
1398                 struct mlx5_flow_handle *rule;
1399                 struct mlx5_flow_spec *spec;
1400                 int err;
1401
1402                 if (flow_flag_test(flow, FAILED))
1403                         continue;
1404
1405                 parse_attr = attr->parse_attr;
1406                 spec = &parse_attr->spec;
1407                 err = mlx5e_tc_tun_route_lookup(priv, spec, attr);
1408                 if (err) {
1409                         mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
1410                                        err);
1411                         continue;
1412                 }
1413
1414                 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1415                 if (IS_ERR(rule)) {
1416                         err = PTR_ERR(rule);
1417                         mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
1418                                        err);
1419                 } else {
1420                         flow->rule[0] = rule;
1421                         flow_flag_set(flow, OFFLOADED);
1422                 }
1423         }
1424 }
1425
1426 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
1427                                           struct mlx5e_route_entry *r,
1428                                           struct list_head *flow_list,
1429                                           bool replace)
1430 {
1431         struct net_device *tunnel_dev;
1432         LIST_HEAD(decap_flows);
1433
1434         tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1435         if (!tunnel_dev)
1436                 return -ENODEV;
1437
1438         mlx5e_take_all_route_decap_flows(r, &decap_flows);
1439         if (mlx5e_route_entry_valid(r))
1440                 mlx5e_unoffload_flow_list(priv, &decap_flows);
1441         if (replace)
1442                 mlx5e_reoffload_decap(priv, &decap_flows);
1443
1444         list_splice(&decap_flows, flow_list);
1445
1446         return 0;
1447 }
1448
1449 static void mlx5e_tc_fib_event_work(struct work_struct *work)
1450 {
1451         struct mlx5e_tc_fib_event_data *event_data =
1452                 container_of(work, struct mlx5e_tc_fib_event_data, work);
1453         struct net_device *ul_dev = event_data->ul_dev;
1454         struct mlx5e_priv *priv = netdev_priv(ul_dev);
1455         struct mlx5e_route_entry *r = event_data->r;
1456         struct mlx5_eswitch *esw;
1457         LIST_HEAD(flow_list);
1458         bool replace;
1459         int err;
1460
1461         /* sync with concurrent neigh updates */
1462         rtnl_lock();
1463         esw = priv->mdev->priv.eswitch;
1464         mutex_lock(&esw->offloads.encap_tbl_lock);
1465         replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
1466
1467         if (!mlx5e_route_entry_valid(r) && !replace)
1468                 goto out;
1469
1470         err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
1471         if (err)
1472                 mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
1473                                err);
1474
1475         err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
1476         if (err)
1477                 mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
1478                                err);
1479
1480         if (replace)
1481                 r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1482 out:
1483         mutex_unlock(&esw->offloads.encap_tbl_lock);
1484         rtnl_unlock();
1485
1486         mlx5e_put_flow_list(priv, &flow_list);
1487         mlx5e_route_put(priv, event_data->r);
1488         dev_put(event_data->ul_dev);
1489         kfree(event_data);
1490 }
1491
1492 static struct mlx5e_tc_fib_event_data *
1493 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
1494                          struct net_device *ul_dev,
1495                          struct mlx5e_tc_tun_encap *encap,
1496                          unsigned long event,
1497                          struct fib_notifier_info *info)
1498 {
1499         struct fib_entry_notifier_info *fen_info;
1500         struct mlx5e_tc_fib_event_data *fib_work;
1501         struct mlx5e_route_entry *r;
1502         struct mlx5e_route_key key;
1503         struct net_device *fib_dev;
1504
1505         fen_info = container_of(info, struct fib_entry_notifier_info, info);
1506         fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
1507         if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1508             fen_info->dst_len != 32)
1509                 return NULL;
1510
1511         fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1512         if (!fib_work)
1513                 return ERR_PTR(-ENOMEM);
1514
1515         key.endpoint_ip.v4 = htonl(fen_info->dst);
1516         key.ip_version = 4;
1517
1518         /* Can't fail after this point because releasing reference to r
1519          * requires obtaining sleeping mutex which we can't do in atomic
1520          * context.
1521          */
1522         r = mlx5e_route_lookup_for_update(encap, &key);
1523         if (!r)
1524                 goto out;
1525         fib_work->r = r;
1526         dev_hold(ul_dev);
1527
1528         return fib_work;
1529
1530 out:
1531         kfree(fib_work);
1532         return NULL;
1533 }
1534
1535 static struct mlx5e_tc_fib_event_data *
1536 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
1537                          struct net_device *ul_dev,
1538                          struct mlx5e_tc_tun_encap *encap,
1539                          unsigned long event,
1540                          struct fib_notifier_info *info)
1541 {
1542         struct fib6_entry_notifier_info *fen_info;
1543         struct mlx5e_tc_fib_event_data *fib_work;
1544         struct mlx5e_route_entry *r;
1545         struct mlx5e_route_key key;
1546         struct net_device *fib_dev;
1547
1548         fen_info = container_of(info, struct fib6_entry_notifier_info, info);
1549         fib_dev = fib6_info_nh_dev(fen_info->rt);
1550         if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1551             fen_info->rt->fib6_dst.plen != 128)
1552                 return NULL;
1553
1554         fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1555         if (!fib_work)
1556                 return ERR_PTR(-ENOMEM);
1557
1558         memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
1559                sizeof(fen_info->rt->fib6_dst.addr));
1560         key.ip_version = 6;
1561
1562         /* Can't fail after this point because releasing reference to r
1563          * requires obtaining sleeping mutex which we can't do in atomic
1564          * context.
1565          */
1566         r = mlx5e_route_lookup_for_update(encap, &key);
1567         if (!r)
1568                 goto out;
1569         fib_work->r = r;
1570         dev_hold(ul_dev);
1571
1572         return fib_work;
1573
1574 out:
1575         kfree(fib_work);
1576         return NULL;
1577 }
1578
1579 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
1580 {
1581         struct mlx5e_tc_fib_event_data *fib_work;
1582         struct fib_notifier_info *info = ptr;
1583         struct mlx5e_tc_tun_encap *encap;
1584         struct net_device *ul_dev;
1585         struct mlx5e_priv *priv;
1586
1587         encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
1588         priv = encap->priv;
1589         ul_dev = priv->netdev;
1590         priv = netdev_priv(ul_dev);
1591
1592         switch (event) {
1593         case FIB_EVENT_ENTRY_REPLACE:
1594         case FIB_EVENT_ENTRY_DEL:
1595                 if (info->family == AF_INET)
1596                         fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
1597                 else if (info->family == AF_INET6)
1598                         fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
1599                 else
1600                         return NOTIFY_DONE;
1601
1602                 if (!IS_ERR_OR_NULL(fib_work)) {
1603                         queue_work(priv->wq, &fib_work->work);
1604                 } else if (IS_ERR(fib_work)) {
1605                         NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
1606                         mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
1607                                        PTR_ERR(fib_work));
1608                 }
1609
1610                 break;
1611         default:
1612                 return NOTIFY_DONE;
1613         }
1614
1615         return NOTIFY_DONE;
1616 }
1617
1618 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
1619 {
1620         struct mlx5e_tc_tun_encap *encap;
1621         int err;
1622
1623         encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
1624         if (!encap)
1625                 return ERR_PTR(-ENOMEM);
1626
1627         encap->priv = priv;
1628         encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
1629         spin_lock_init(&encap->route_lock);
1630         hash_init(encap->route_tbl);
1631         err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
1632                                     NULL, NULL);
1633         if (err) {
1634                 kvfree(encap);
1635                 return ERR_PTR(err);
1636         }
1637
1638         return encap;
1639 }
1640
1641 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
1642 {
1643         if (!encap)
1644                 return;
1645
1646         unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
1647         flush_workqueue(encap->priv->wq); /* flush fib event works */
1648         kvfree(encap);
1649 }