Merge tag 'sched_urgent_for_v5.13_rc6' of git://git.kernel.org/pub/scm/linux/kernel...
[linux-2.6-microblaze.git] / drivers / net / ethernet / mellanox / mlx5 / core / en / tc_tun_encap.c
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
3
4 #include <net/fib_notifier.h>
5 #include <net/nexthop.h>
6 #include "tc_tun_encap.h"
7 #include "en_tc.h"
8 #include "tc_tun.h"
9 #include "rep/tc.h"
10 #include "diag/en_tc_tracepoint.h"
11
12 enum {
13         MLX5E_ROUTE_ENTRY_VALID     = BIT(0),
14 };
15
16 struct mlx5e_route_key {
17         int ip_version;
18         union {
19                 __be32 v4;
20                 struct in6_addr v6;
21         } endpoint_ip;
22 };
23
24 struct mlx5e_route_entry {
25         struct mlx5e_route_key key;
26         struct list_head encap_entries;
27         struct list_head decap_flows;
28         u32 flags;
29         struct hlist_node hlist;
30         refcount_t refcnt;
31         int tunnel_dev_index;
32         struct rcu_head rcu;
33 };
34
35 struct mlx5e_tc_tun_encap {
36         struct mlx5e_priv *priv;
37         struct notifier_block fib_nb;
38         spinlock_t route_lock; /* protects route_tbl */
39         unsigned long route_tbl_last_update;
40         DECLARE_HASHTABLE(route_tbl, 8);
41 };
42
43 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
44 {
45         return r->flags & MLX5E_ROUTE_ENTRY_VALID;
46 }
47
48 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
49                              struct mlx5_flow_spec *spec)
50 {
51         struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
52         struct mlx5_rx_tun_attr *tun_attr;
53         void *daddr, *saddr;
54         u8 ip_version;
55
56         tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
57         if (!tun_attr)
58                 return -ENOMEM;
59
60         esw_attr->rx_tun_attr = tun_attr;
61         ip_version = mlx5e_tc_get_ip_version(spec, true);
62
63         if (ip_version == 4) {
64                 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
65                                      outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
66                 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
67                                      outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
68                 tun_attr->dst_ip.v4 = *(__be32 *)daddr;
69                 tun_attr->src_ip.v4 = *(__be32 *)saddr;
70                 if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
71                         return 0;
72         }
73 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
74         else if (ip_version == 6) {
75                 int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
76                 struct in6_addr zerov6 = {};
77
78                 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
79                                      outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
80                 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
81                                      outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
82                 memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
83                 memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
84                 if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) ||
85                     !memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6)))
86                         return 0;
87         }
88 #endif
89         /* Only set the flag if both src and dst ip addresses exist. They are
90          * required to establish routing.
91          */
92         flow_flag_set(flow, TUN_RX);
93         flow->attr->tun_ip_version = ip_version;
94         return 0;
95 }
96
97 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
98 {
99         bool all_flow_encaps_valid = true;
100         int i;
101
102         /* Flow can be associated with multiple encap entries.
103          * Before offloading the flow verify that all of them have
104          * a valid neighbour.
105          */
106         for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
107                 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
108                         continue;
109                 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
110                         all_flow_encaps_valid = false;
111                         break;
112                 }
113         }
114
115         return all_flow_encaps_valid;
116 }
117
118 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
119                               struct mlx5e_encap_entry *e,
120                               struct list_head *flow_list)
121 {
122         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
123         struct mlx5_esw_flow_attr *esw_attr;
124         struct mlx5_flow_handle *rule;
125         struct mlx5_flow_attr *attr;
126         struct mlx5_flow_spec *spec;
127         struct mlx5e_tc_flow *flow;
128         int err;
129
130         if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
131                 return;
132
133         e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
134                                                      e->reformat_type,
135                                                      e->encap_size, e->encap_header,
136                                                      MLX5_FLOW_NAMESPACE_FDB);
137         if (IS_ERR(e->pkt_reformat)) {
138                 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
139                                PTR_ERR(e->pkt_reformat));
140                 return;
141         }
142         e->flags |= MLX5_ENCAP_ENTRY_VALID;
143         mlx5e_rep_queue_neigh_stats_work(priv);
144
145         list_for_each_entry(flow, flow_list, tmp_list) {
146                 if (!mlx5e_is_offloaded_flow(flow))
147                         continue;
148                 attr = flow->attr;
149                 esw_attr = attr->esw_attr;
150                 spec = &attr->parse_attr->spec;
151
152                 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
153                 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
154
155                 /* Do not offload flows with unresolved neighbors */
156                 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
157                         continue;
158                 /* update from slow path rule to encap rule */
159                 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
160                 if (IS_ERR(rule)) {
161                         err = PTR_ERR(rule);
162                         mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
163                                        err);
164                         continue;
165                 }
166
167                 mlx5e_tc_unoffload_from_slow_path(esw, flow);
168                 flow->rule[0] = rule;
169                 /* was unset when slow path rule removed */
170                 flow_flag_set(flow, OFFLOADED);
171         }
172 }
173
174 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
175                               struct mlx5e_encap_entry *e,
176                               struct list_head *flow_list)
177 {
178         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
179         struct mlx5_esw_flow_attr *esw_attr;
180         struct mlx5_flow_handle *rule;
181         struct mlx5_flow_attr *attr;
182         struct mlx5_flow_spec *spec;
183         struct mlx5e_tc_flow *flow;
184         int err;
185
186         list_for_each_entry(flow, flow_list, tmp_list) {
187                 if (!mlx5e_is_offloaded_flow(flow))
188                         continue;
189                 attr = flow->attr;
190                 esw_attr = attr->esw_attr;
191                 spec = &attr->parse_attr->spec;
192
193                 /* update from encap rule to slow path rule */
194                 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
195                 /* mark the flow's encap dest as non-valid */
196                 esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
197
198                 if (IS_ERR(rule)) {
199                         err = PTR_ERR(rule);
200                         mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
201                                        err);
202                         continue;
203                 }
204
205                 mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
206                 flow->rule[0] = rule;
207                 /* was unset when fast path rule removed */
208                 flow_flag_set(flow, OFFLOADED);
209         }
210
211         /* we know that the encap is valid */
212         e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
213         mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
214 }
215
216 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
217                                 struct list_head *flow_list,
218                                 int index)
219 {
220         if (IS_ERR(mlx5e_flow_get(flow)))
221                 return;
222         wait_for_completion(&flow->init_done);
223
224         flow->tmp_entry_index = index;
225         list_add(&flow->tmp_list, flow_list);
226 }
227
228 /* Takes reference to all flows attached to encap and adds the flows to
229  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
230  */
231 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
232 {
233         struct encap_flow_item *efi;
234         struct mlx5e_tc_flow *flow;
235
236         list_for_each_entry(efi, &e->flows, list) {
237                 flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
238                 mlx5e_take_tmp_flow(flow, flow_list, efi->index);
239         }
240 }
241
242 /* Takes reference to all flows attached to route and adds the flows to
243  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
244  */
245 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
246                                              struct list_head *flow_list)
247 {
248         struct mlx5e_tc_flow *flow;
249
250         list_for_each_entry(flow, &r->decap_flows, decap_routes)
251                 mlx5e_take_tmp_flow(flow, flow_list, 0);
252 }
253
254 typedef bool (match_cb)(struct mlx5e_encap_entry *);
255
256 static struct mlx5e_encap_entry *
257 mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
258                               struct mlx5e_encap_entry *e,
259                               match_cb match)
260 {
261         struct mlx5e_encap_entry *next = NULL;
262
263 retry:
264         rcu_read_lock();
265
266         /* find encap with non-zero reference counter value */
267         for (next = e ?
268                      list_next_or_null_rcu(&nhe->encap_list,
269                                            &e->encap_list,
270                                            struct mlx5e_encap_entry,
271                                            encap_list) :
272                      list_first_or_null_rcu(&nhe->encap_list,
273                                             struct mlx5e_encap_entry,
274                                             encap_list);
275              next;
276              next = list_next_or_null_rcu(&nhe->encap_list,
277                                           &next->encap_list,
278                                           struct mlx5e_encap_entry,
279                                           encap_list))
280                 if (mlx5e_encap_take(next))
281                         break;
282
283         rcu_read_unlock();
284
285         /* release starting encap */
286         if (e)
287                 mlx5e_encap_put(netdev_priv(e->out_dev), e);
288         if (!next)
289                 return next;
290
291         /* wait for encap to be fully initialized */
292         wait_for_completion(&next->res_ready);
293         /* continue searching if encap entry is not in valid state after completion */
294         if (!match(next)) {
295                 e = next;
296                 goto retry;
297         }
298
299         return next;
300 }
301
302 static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
303 {
304         return e->flags & MLX5_ENCAP_ENTRY_VALID;
305 }
306
307 static struct mlx5e_encap_entry *
308 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
309                            struct mlx5e_encap_entry *e)
310 {
311         return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
312 }
313
314 static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
315 {
316         return e->compl_result >= 0;
317 }
318
319 struct mlx5e_encap_entry *
320 mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
321                           struct mlx5e_encap_entry *e)
322 {
323         return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
324 }
325
326 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
327 {
328         struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
329         struct mlx5e_encap_entry *e = NULL;
330         struct mlx5e_tc_flow *flow;
331         struct mlx5_fc *counter;
332         struct neigh_table *tbl;
333         bool neigh_used = false;
334         struct neighbour *n;
335         u64 lastuse;
336
337         if (m_neigh->family == AF_INET)
338                 tbl = &arp_tbl;
339 #if IS_ENABLED(CONFIG_IPV6)
340         else if (m_neigh->family == AF_INET6)
341                 tbl = ipv6_stub->nd_tbl;
342 #endif
343         else
344                 return;
345
346         /* mlx5e_get_next_valid_encap() releases previous encap before returning
347          * next one.
348          */
349         while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
350                 struct mlx5e_priv *priv = netdev_priv(e->out_dev);
351                 struct encap_flow_item *efi, *tmp;
352                 struct mlx5_eswitch *esw;
353                 LIST_HEAD(flow_list);
354
355                 esw = priv->mdev->priv.eswitch;
356                 mutex_lock(&esw->offloads.encap_tbl_lock);
357                 list_for_each_entry_safe(efi, tmp, &e->flows, list) {
358                         flow = container_of(efi, struct mlx5e_tc_flow,
359                                             encaps[efi->index]);
360                         if (IS_ERR(mlx5e_flow_get(flow)))
361                                 continue;
362                         list_add(&flow->tmp_list, &flow_list);
363
364                         if (mlx5e_is_offloaded_flow(flow)) {
365                                 counter = mlx5e_tc_get_counter(flow);
366                                 lastuse = mlx5_fc_query_lastuse(counter);
367                                 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
368                                         neigh_used = true;
369                                         break;
370                                 }
371                         }
372                 }
373                 mutex_unlock(&esw->offloads.encap_tbl_lock);
374
375                 mlx5e_put_flow_list(priv, &flow_list);
376                 if (neigh_used) {
377                         /* release current encap before breaking the loop */
378                         mlx5e_encap_put(priv, e);
379                         break;
380                 }
381         }
382
383         trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
384
385         if (neigh_used) {
386                 nhe->reported_lastuse = jiffies;
387
388                 /* find the relevant neigh according to the cached device and
389                  * dst ip pair
390                  */
391                 n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
392                 if (!n)
393                         return;
394
395                 neigh_event_send(n, NULL);
396                 neigh_release(n);
397         }
398 }
399
400 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
401 {
402         WARN_ON(!list_empty(&e->flows));
403
404         if (e->compl_result > 0) {
405                 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
406
407                 if (e->flags & MLX5_ENCAP_ENTRY_VALID)
408                         mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
409         }
410
411         kfree(e->tun_info);
412         kfree(e->encap_header);
413         kfree_rcu(e, rcu);
414 }
415
416 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
417                                 struct mlx5e_decap_entry *d)
418 {
419         WARN_ON(!list_empty(&d->flows));
420
421         if (!d->compl_result)
422                 mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
423
424         kfree_rcu(d, rcu);
425 }
426
427 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
428 {
429         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
430
431         if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
432                 return;
433         list_del(&e->route_list);
434         hash_del_rcu(&e->encap_hlist);
435         mutex_unlock(&esw->offloads.encap_tbl_lock);
436
437         mlx5e_encap_dealloc(priv, e);
438 }
439
440 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
441 {
442         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
443
444         if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
445                 return;
446         hash_del_rcu(&d->hlist);
447         mutex_unlock(&esw->offloads.decap_tbl_lock);
448
449         mlx5e_decap_dealloc(priv, d);
450 }
451
452 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
453                                      struct mlx5e_tc_flow *flow,
454                                      int out_index);
455
456 void mlx5e_detach_encap(struct mlx5e_priv *priv,
457                         struct mlx5e_tc_flow *flow, int out_index)
458 {
459         struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
460         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
461
462         if (flow->attr->esw_attr->dests[out_index].flags &
463             MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
464                 mlx5e_detach_encap_route(priv, flow, out_index);
465
466         /* flow wasn't fully initialized */
467         if (!e)
468                 return;
469
470         mutex_lock(&esw->offloads.encap_tbl_lock);
471         list_del(&flow->encaps[out_index].list);
472         flow->encaps[out_index].e = NULL;
473         if (!refcount_dec_and_test(&e->refcnt)) {
474                 mutex_unlock(&esw->offloads.encap_tbl_lock);
475                 return;
476         }
477         list_del(&e->route_list);
478         hash_del_rcu(&e->encap_hlist);
479         mutex_unlock(&esw->offloads.encap_tbl_lock);
480
481         mlx5e_encap_dealloc(priv, e);
482 }
483
484 void mlx5e_detach_decap(struct mlx5e_priv *priv,
485                         struct mlx5e_tc_flow *flow)
486 {
487         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
488         struct mlx5e_decap_entry *d = flow->decap_reformat;
489
490         if (!d)
491                 return;
492
493         mutex_lock(&esw->offloads.decap_tbl_lock);
494         list_del(&flow->l3_to_l2_reformat);
495         flow->decap_reformat = NULL;
496
497         if (!refcount_dec_and_test(&d->refcnt)) {
498                 mutex_unlock(&esw->offloads.decap_tbl_lock);
499                 return;
500         }
501         hash_del_rcu(&d->hlist);
502         mutex_unlock(&esw->offloads.decap_tbl_lock);
503
504         mlx5e_decap_dealloc(priv, d);
505 }
506
507 bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
508                                            struct mlx5e_encap_key *b)
509 {
510         return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
511                 a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
512 }
513
514 static int cmp_decap_info(struct mlx5e_decap_key *a,
515                           struct mlx5e_decap_key *b)
516 {
517         return memcmp(&a->key, &b->key, sizeof(b->key));
518 }
519
520 static int hash_encap_info(struct mlx5e_encap_key *key)
521 {
522         return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
523                      key->tc_tunnel->tunnel_type);
524 }
525
526 static int hash_decap_info(struct mlx5e_decap_key *key)
527 {
528         return jhash(&key->key, sizeof(key->key), 0);
529 }
530
531 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
532 {
533         return refcount_inc_not_zero(&e->refcnt);
534 }
535
536 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
537 {
538         return refcount_inc_not_zero(&e->refcnt);
539 }
540
541 static struct mlx5e_encap_entry *
542 mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
543                 uintptr_t hash_key)
544 {
545         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
546         struct mlx5e_encap_key e_key;
547         struct mlx5e_encap_entry *e;
548
549         hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
550                                    encap_hlist, hash_key) {
551                 e_key.ip_tun_key = &e->tun_info->key;
552                 e_key.tc_tunnel = e->tunnel;
553                 if (e->tunnel->encap_info_equal(&e_key, key) &&
554                     mlx5e_encap_take(e))
555                         return e;
556         }
557
558         return NULL;
559 }
560
561 static struct mlx5e_decap_entry *
562 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
563                 uintptr_t hash_key)
564 {
565         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
566         struct mlx5e_decap_key r_key;
567         struct mlx5e_decap_entry *e;
568
569         hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
570                                    hlist, hash_key) {
571                 r_key = e->key;
572                 if (!cmp_decap_info(&r_key, key) &&
573                     mlx5e_decap_take(e))
574                         return e;
575         }
576         return NULL;
577 }
578
579 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
580 {
581         size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
582
583         return kmemdup(tun_info, tun_size, GFP_KERNEL);
584 }
585
586 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
587                                       struct mlx5e_tc_flow *flow,
588                                       int out_index,
589                                       struct mlx5e_encap_entry *e,
590                                       struct netlink_ext_ack *extack)
591 {
592         int i;
593
594         for (i = 0; i < out_index; i++) {
595                 if (flow->encaps[i].e != e)
596                         continue;
597                 NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
598                 netdev_err(priv->netdev, "can't duplicate encap action\n");
599                 return true;
600         }
601
602         return false;
603 }
604
605 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
606                                struct mlx5_flow_attr *attr,
607                                struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
608                                struct net_device *out_dev,
609                                int route_dev_ifindex,
610                                int out_index)
611 {
612         struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
613         struct net_device *route_dev;
614         u16 vport_num;
615         int err = 0;
616         u32 data;
617
618         route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
619
620         if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
621             !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
622                 goto out;
623
624         err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
625         if (err)
626                 goto out;
627
628         attr->dest_chain = 0;
629         attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
630         esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
631         data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
632                                                        vport_num);
633         err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
634                                                    MLX5_FLOW_NAMESPACE_FDB,
635                                                    VPORT_TO_REG, data);
636         if (err >= 0) {
637                 esw_attr->dests[out_index].src_port_rewrite_act_id = err;
638                 err = 0;
639         }
640
641 out:
642         if (route_dev)
643                 dev_put(route_dev);
644         return err;
645 }
646
647 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
648                                   struct mlx5_esw_flow_attr *attr,
649                                   struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
650                                   struct net_device *out_dev,
651                                   int route_dev_ifindex,
652                                   int out_index)
653 {
654         int act_id = attr->dests[out_index].src_port_rewrite_act_id;
655         struct net_device *route_dev;
656         u16 vport_num;
657         int err = 0;
658         u32 data;
659
660         route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
661
662         if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
663             !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
664                 err = -ENODEV;
665                 goto out;
666         }
667
668         err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
669         if (err)
670                 goto out;
671
672         data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
673                                                        vport_num);
674         mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
675
676 out:
677         if (route_dev)
678                 dev_put(route_dev);
679         return err;
680 }
681
682 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
683 {
684         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
685         struct mlx5_rep_uplink_priv *uplink_priv;
686         struct mlx5e_rep_priv *uplink_rpriv;
687         struct mlx5e_tc_tun_encap *encap;
688         unsigned int ret;
689
690         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
691         uplink_priv = &uplink_rpriv->uplink_priv;
692         encap = uplink_priv->encap;
693
694         spin_lock_bh(&encap->route_lock);
695         ret = encap->route_tbl_last_update;
696         spin_unlock_bh(&encap->route_lock);
697         return ret;
698 }
699
700 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
701                                     struct mlx5e_tc_flow *flow,
702                                     struct mlx5e_encap_entry *e,
703                                     bool new_encap_entry,
704                                     unsigned long tbl_time_before,
705                                     int out_index);
706
707 int mlx5e_attach_encap(struct mlx5e_priv *priv,
708                        struct mlx5e_tc_flow *flow,
709                        struct net_device *mirred_dev,
710                        int out_index,
711                        struct netlink_ext_ack *extack,
712                        struct net_device **encap_dev,
713                        bool *encap_valid)
714 {
715         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
716         struct mlx5e_tc_flow_parse_attr *parse_attr;
717         struct mlx5_flow_attr *attr = flow->attr;
718         const struct ip_tunnel_info *tun_info;
719         unsigned long tbl_time_before = 0;
720         struct mlx5e_encap_entry *e;
721         struct mlx5e_encap_key key;
722         bool entry_created = false;
723         unsigned short family;
724         uintptr_t hash_key;
725         int err = 0;
726
727         parse_attr = attr->parse_attr;
728         tun_info = parse_attr->tun_info[out_index];
729         family = ip_tunnel_info_af(tun_info);
730         key.ip_tun_key = &tun_info->key;
731         key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
732         if (!key.tc_tunnel) {
733                 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
734                 return -EOPNOTSUPP;
735         }
736
737         hash_key = hash_encap_info(&key);
738
739         mutex_lock(&esw->offloads.encap_tbl_lock);
740         e = mlx5e_encap_get(priv, &key, hash_key);
741
742         /* must verify if encap is valid or not */
743         if (e) {
744                 /* Check that entry was not already attached to this flow */
745                 if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
746                         err = -EOPNOTSUPP;
747                         goto out_err;
748                 }
749
750                 mutex_unlock(&esw->offloads.encap_tbl_lock);
751                 wait_for_completion(&e->res_ready);
752
753                 /* Protect against concurrent neigh update. */
754                 mutex_lock(&esw->offloads.encap_tbl_lock);
755                 if (e->compl_result < 0) {
756                         err = -EREMOTEIO;
757                         goto out_err;
758                 }
759                 goto attach_flow;
760         }
761
762         e = kzalloc(sizeof(*e), GFP_KERNEL);
763         if (!e) {
764                 err = -ENOMEM;
765                 goto out_err;
766         }
767
768         refcount_set(&e->refcnt, 1);
769         init_completion(&e->res_ready);
770         entry_created = true;
771         INIT_LIST_HEAD(&e->route_list);
772
773         tun_info = mlx5e_dup_tun_info(tun_info);
774         if (!tun_info) {
775                 err = -ENOMEM;
776                 goto out_err_init;
777         }
778         e->tun_info = tun_info;
779         err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
780         if (err)
781                 goto out_err_init;
782
783         INIT_LIST_HEAD(&e->flows);
784         hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
785         tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
786         mutex_unlock(&esw->offloads.encap_tbl_lock);
787
788         if (family == AF_INET)
789                 err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
790         else if (family == AF_INET6)
791                 err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
792
793         /* Protect against concurrent neigh update. */
794         mutex_lock(&esw->offloads.encap_tbl_lock);
795         complete_all(&e->res_ready);
796         if (err) {
797                 e->compl_result = err;
798                 goto out_err;
799         }
800         e->compl_result = 1;
801
802 attach_flow:
803         err = mlx5e_attach_encap_route(priv, flow, e, entry_created, tbl_time_before,
804                                        out_index);
805         if (err)
806                 goto out_err;
807
808         flow->encaps[out_index].e = e;
809         list_add(&flow->encaps[out_index].list, &e->flows);
810         flow->encaps[out_index].index = out_index;
811         *encap_dev = e->out_dev;
812         if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
813                 attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
814                 attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
815                 *encap_valid = true;
816         } else {
817                 *encap_valid = false;
818         }
819         mutex_unlock(&esw->offloads.encap_tbl_lock);
820
821         return err;
822
823 out_err:
824         mutex_unlock(&esw->offloads.encap_tbl_lock);
825         if (e)
826                 mlx5e_encap_put(priv, e);
827         return err;
828
829 out_err_init:
830         mutex_unlock(&esw->offloads.encap_tbl_lock);
831         kfree(tun_info);
832         kfree(e);
833         return err;
834 }
835
836 int mlx5e_attach_decap(struct mlx5e_priv *priv,
837                        struct mlx5e_tc_flow *flow,
838                        struct netlink_ext_ack *extack)
839 {
840         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
841         struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
842         struct mlx5e_tc_flow_parse_attr *parse_attr;
843         struct mlx5e_decap_entry *d;
844         struct mlx5e_decap_key key;
845         uintptr_t hash_key;
846         int err = 0;
847
848         parse_attr = flow->attr->parse_attr;
849         if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
850                 NL_SET_ERR_MSG_MOD(extack,
851                                    "encap header larger than max supported");
852                 return -EOPNOTSUPP;
853         }
854
855         key.key = parse_attr->eth;
856         hash_key = hash_decap_info(&key);
857         mutex_lock(&esw->offloads.decap_tbl_lock);
858         d = mlx5e_decap_get(priv, &key, hash_key);
859         if (d) {
860                 mutex_unlock(&esw->offloads.decap_tbl_lock);
861                 wait_for_completion(&d->res_ready);
862                 mutex_lock(&esw->offloads.decap_tbl_lock);
863                 if (d->compl_result) {
864                         err = -EREMOTEIO;
865                         goto out_free;
866                 }
867                 goto found;
868         }
869
870         d = kzalloc(sizeof(*d), GFP_KERNEL);
871         if (!d) {
872                 err = -ENOMEM;
873                 goto out_err;
874         }
875
876         d->key = key;
877         refcount_set(&d->refcnt, 1);
878         init_completion(&d->res_ready);
879         INIT_LIST_HEAD(&d->flows);
880         hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
881         mutex_unlock(&esw->offloads.decap_tbl_lock);
882
883         d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
884                                                      MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2,
885                                                      sizeof(parse_attr->eth),
886                                                      &parse_attr->eth,
887                                                      MLX5_FLOW_NAMESPACE_FDB);
888         if (IS_ERR(d->pkt_reformat)) {
889                 err = PTR_ERR(d->pkt_reformat);
890                 d->compl_result = err;
891         }
892         mutex_lock(&esw->offloads.decap_tbl_lock);
893         complete_all(&d->res_ready);
894         if (err)
895                 goto out_free;
896
897 found:
898         flow->decap_reformat = d;
899         attr->decap_pkt_reformat = d->pkt_reformat;
900         list_add(&flow->l3_to_l2_reformat, &d->flows);
901         mutex_unlock(&esw->offloads.decap_tbl_lock);
902         return 0;
903
904 out_free:
905         mutex_unlock(&esw->offloads.decap_tbl_lock);
906         mlx5e_decap_put(priv, d);
907         return err;
908
909 out_err:
910         mutex_unlock(&esw->offloads.decap_tbl_lock);
911         return err;
912 }
913
914 static int cmp_route_info(struct mlx5e_route_key *a,
915                           struct mlx5e_route_key *b)
916 {
917         if (a->ip_version == 4 && b->ip_version == 4)
918                 return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
919                               sizeof(a->endpoint_ip.v4));
920         else if (a->ip_version == 6 && b->ip_version == 6)
921                 return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
922                               sizeof(a->endpoint_ip.v6));
923         return 1;
924 }
925
926 static u32 hash_route_info(struct mlx5e_route_key *key)
927 {
928         if (key->ip_version == 4)
929                 return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
930         return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
931 }
932
933 static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
934                                 struct mlx5e_route_entry *r)
935 {
936         WARN_ON(!list_empty(&r->decap_flows));
937         WARN_ON(!list_empty(&r->encap_entries));
938
939         kfree_rcu(r, rcu);
940 }
941
942 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
943 {
944         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
945
946         if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
947                 return;
948
949         hash_del_rcu(&r->hlist);
950         mutex_unlock(&esw->offloads.encap_tbl_lock);
951
952         mlx5e_route_dealloc(priv, r);
953 }
954
955 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
956 {
957         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
958
959         lockdep_assert_held(&esw->offloads.encap_tbl_lock);
960
961         if (!refcount_dec_and_test(&r->refcnt))
962                 return;
963         hash_del_rcu(&r->hlist);
964         mlx5e_route_dealloc(priv, r);
965 }
966
967 static struct mlx5e_route_entry *
968 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
969                 u32 hash_key)
970 {
971         struct mlx5e_route_key r_key;
972         struct mlx5e_route_entry *r;
973
974         hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
975                 r_key = r->key;
976                 if (!cmp_route_info(&r_key, key) &&
977                     refcount_inc_not_zero(&r->refcnt))
978                         return r;
979         }
980         return NULL;
981 }
982
983 static struct mlx5e_route_entry *
984 mlx5e_route_get_create(struct mlx5e_priv *priv,
985                        struct mlx5e_route_key *key,
986                        int tunnel_dev_index,
987                        unsigned long *route_tbl_change_time)
988 {
989         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
990         struct mlx5_rep_uplink_priv *uplink_priv;
991         struct mlx5e_rep_priv *uplink_rpriv;
992         struct mlx5e_tc_tun_encap *encap;
993         struct mlx5e_route_entry *r;
994         u32 hash_key;
995
996         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
997         uplink_priv = &uplink_rpriv->uplink_priv;
998         encap = uplink_priv->encap;
999
1000         hash_key = hash_route_info(key);
1001         spin_lock_bh(&encap->route_lock);
1002         r = mlx5e_route_get(encap, key, hash_key);
1003         spin_unlock_bh(&encap->route_lock);
1004         if (r) {
1005                 if (!mlx5e_route_entry_valid(r)) {
1006                         mlx5e_route_put_locked(priv, r);
1007                         return ERR_PTR(-EINVAL);
1008                 }
1009                 return r;
1010         }
1011
1012         r = kzalloc(sizeof(*r), GFP_KERNEL);
1013         if (!r)
1014                 return ERR_PTR(-ENOMEM);
1015
1016         r->key = *key;
1017         r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1018         r->tunnel_dev_index = tunnel_dev_index;
1019         refcount_set(&r->refcnt, 1);
1020         INIT_LIST_HEAD(&r->decap_flows);
1021         INIT_LIST_HEAD(&r->encap_entries);
1022
1023         spin_lock_bh(&encap->route_lock);
1024         *route_tbl_change_time = encap->route_tbl_last_update;
1025         hash_add(encap->route_tbl, &r->hlist, hash_key);
1026         spin_unlock_bh(&encap->route_lock);
1027
1028         return r;
1029 }
1030
1031 static struct mlx5e_route_entry *
1032 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
1033 {
1034         u32 hash_key = hash_route_info(key);
1035         struct mlx5e_route_entry *r;
1036
1037         spin_lock_bh(&encap->route_lock);
1038         encap->route_tbl_last_update = jiffies;
1039         r = mlx5e_route_get(encap, key, hash_key);
1040         spin_unlock_bh(&encap->route_lock);
1041
1042         return r;
1043 }
1044
1045 struct mlx5e_tc_fib_event_data {
1046         struct work_struct work;
1047         unsigned long event;
1048         struct mlx5e_route_entry *r;
1049         struct net_device *ul_dev;
1050 };
1051
1052 static void mlx5e_tc_fib_event_work(struct work_struct *work);
1053 static struct mlx5e_tc_fib_event_data *
1054 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
1055 {
1056         struct mlx5e_tc_fib_event_data *fib_work;
1057
1058         fib_work = kzalloc(sizeof(*fib_work), flags);
1059         if (WARN_ON(!fib_work))
1060                 return NULL;
1061
1062         INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
1063         fib_work->event = event;
1064         fib_work->ul_dev = ul_dev;
1065
1066         return fib_work;
1067 }
1068
1069 static int
1070 mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
1071                            struct mlx5e_route_entry *r,
1072                            unsigned long event)
1073 {
1074         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1075         struct mlx5e_tc_fib_event_data *fib_work;
1076         struct mlx5e_rep_priv *uplink_rpriv;
1077         struct net_device *ul_dev;
1078
1079         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1080         ul_dev = uplink_rpriv->netdev;
1081
1082         fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
1083         if (!fib_work)
1084                 return -ENOMEM;
1085
1086         dev_hold(ul_dev);
1087         refcount_inc(&r->refcnt);
1088         fib_work->r = r;
1089         queue_work(priv->wq, &fib_work->work);
1090
1091         return 0;
1092 }
1093
1094 int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
1095                              struct mlx5e_tc_flow *flow)
1096 {
1097         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1098         unsigned long tbl_time_before, tbl_time_after;
1099         struct mlx5e_tc_flow_parse_attr *parse_attr;
1100         struct mlx5_flow_attr *attr = flow->attr;
1101         struct mlx5_esw_flow_attr *esw_attr;
1102         struct mlx5e_route_entry *r;
1103         struct mlx5e_route_key key;
1104         int err = 0;
1105
1106         esw_attr = attr->esw_attr;
1107         parse_attr = attr->parse_attr;
1108         mutex_lock(&esw->offloads.encap_tbl_lock);
1109         if (!esw_attr->rx_tun_attr)
1110                 goto out;
1111
1112         tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
1113         tbl_time_after = tbl_time_before;
1114         err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr);
1115         if (err || !esw_attr->rx_tun_attr->decap_vport)
1116                 goto out;
1117
1118         key.ip_version = attr->tun_ip_version;
1119         if (key.ip_version == 4)
1120                 key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
1121         else
1122                 key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
1123
1124         r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
1125                                    &tbl_time_after);
1126         if (IS_ERR(r)) {
1127                 err = PTR_ERR(r);
1128                 goto out;
1129         }
1130         /* Routing changed concurrently. FIB event handler might have missed new
1131          * entry, schedule update.
1132          */
1133         if (tbl_time_before != tbl_time_after) {
1134                 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1135                 if (err) {
1136                         mlx5e_route_put_locked(priv, r);
1137                         goto out;
1138                 }
1139         }
1140
1141         flow->decap_route = r;
1142         list_add(&flow->decap_routes, &r->decap_flows);
1143         mutex_unlock(&esw->offloads.encap_tbl_lock);
1144         return 0;
1145
1146 out:
1147         mutex_unlock(&esw->offloads.encap_tbl_lock);
1148         return err;
1149 }
1150
1151 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
1152                                     struct mlx5e_tc_flow *flow,
1153                                     struct mlx5e_encap_entry *e,
1154                                     bool new_encap_entry,
1155                                     unsigned long tbl_time_before,
1156                                     int out_index)
1157 {
1158         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1159         unsigned long tbl_time_after = tbl_time_before;
1160         struct mlx5e_tc_flow_parse_attr *parse_attr;
1161         struct mlx5_flow_attr *attr = flow->attr;
1162         const struct ip_tunnel_info *tun_info;
1163         struct mlx5_esw_flow_attr *esw_attr;
1164         struct mlx5e_route_entry *r;
1165         struct mlx5e_route_key key;
1166         unsigned short family;
1167         int err = 0;
1168
1169         esw_attr = attr->esw_attr;
1170         parse_attr = attr->parse_attr;
1171         tun_info = parse_attr->tun_info[out_index];
1172         family = ip_tunnel_info_af(tun_info);
1173
1174         if (family == AF_INET) {
1175                 key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
1176                 key.ip_version = 4;
1177         } else if (family == AF_INET6) {
1178                 key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
1179                 key.ip_version = 6;
1180         }
1181
1182         err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
1183                                   e->route_dev_ifindex, out_index);
1184         if (err || !(esw_attr->dests[out_index].flags &
1185                      MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
1186                 return err;
1187
1188         r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
1189                                    &tbl_time_after);
1190         if (IS_ERR(r))
1191                 return PTR_ERR(r);
1192         /* Routing changed concurrently. FIB event handler might have missed new
1193          * entry, schedule update.
1194          */
1195         if (tbl_time_before != tbl_time_after) {
1196                 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1197                 if (err) {
1198                         mlx5e_route_put_locked(priv, r);
1199                         return err;
1200                 }
1201         }
1202
1203         flow->encap_routes[out_index].r = r;
1204         if (new_encap_entry)
1205                 list_add(&e->route_list, &r->encap_entries);
1206         flow->encap_routes[out_index].index = out_index;
1207         return 0;
1208 }
1209
1210 void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
1211                               struct mlx5e_tc_flow *flow)
1212 {
1213         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1214         struct mlx5e_route_entry *r = flow->decap_route;
1215
1216         if (!r)
1217                 return;
1218
1219         mutex_lock(&esw->offloads.encap_tbl_lock);
1220         list_del(&flow->decap_routes);
1221         flow->decap_route = NULL;
1222
1223         if (!refcount_dec_and_test(&r->refcnt)) {
1224                 mutex_unlock(&esw->offloads.encap_tbl_lock);
1225                 return;
1226         }
1227         hash_del_rcu(&r->hlist);
1228         mutex_unlock(&esw->offloads.encap_tbl_lock);
1229
1230         mlx5e_route_dealloc(priv, r);
1231 }
1232
1233 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
1234                                      struct mlx5e_tc_flow *flow,
1235                                      int out_index)
1236 {
1237         struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
1238         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1239         struct mlx5e_encap_entry *e, *tmp;
1240
1241         if (!r)
1242                 return;
1243
1244         mutex_lock(&esw->offloads.encap_tbl_lock);
1245         flow->encap_routes[out_index].r = NULL;
1246
1247         if (!refcount_dec_and_test(&r->refcnt)) {
1248                 mutex_unlock(&esw->offloads.encap_tbl_lock);
1249                 return;
1250         }
1251         list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
1252                 list_del_init(&e->route_list);
1253         hash_del_rcu(&r->hlist);
1254         mutex_unlock(&esw->offloads.encap_tbl_lock);
1255
1256         mlx5e_route_dealloc(priv, r);
1257 }
1258
1259 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
1260                                    struct mlx5e_encap_entry *e,
1261                                    struct list_head *encap_flows)
1262 {
1263         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1264         struct mlx5e_tc_flow *flow;
1265
1266         list_for_each_entry(flow, encap_flows, tmp_list) {
1267                 struct mlx5_flow_attr *attr = flow->attr;
1268                 struct mlx5_esw_flow_attr *esw_attr;
1269
1270                 if (!mlx5e_is_offloaded_flow(flow))
1271                         continue;
1272                 esw_attr = attr->esw_attr;
1273
1274                 if (flow_flag_test(flow, SLOW))
1275                         mlx5e_tc_unoffload_from_slow_path(esw, flow);
1276                 else
1277                         mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1278                 mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
1279                 attr->modify_hdr = NULL;
1280
1281                 esw_attr->dests[flow->tmp_entry_index].flags &=
1282                         ~MLX5_ESW_DEST_ENCAP_VALID;
1283                 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
1284         }
1285
1286         e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
1287         if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1288                 e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1289                 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1290                 e->pkt_reformat = NULL;
1291         }
1292 }
1293
1294 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
1295                                   struct net_device *tunnel_dev,
1296                                   struct mlx5e_encap_entry *e,
1297                                   struct list_head *encap_flows)
1298 {
1299         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1300         struct mlx5e_tc_flow *flow;
1301         int err;
1302
1303         err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
1304                 mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
1305                 mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
1306         if (err)
1307                 mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
1308         e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
1309
1310         list_for_each_entry(flow, encap_flows, tmp_list) {
1311                 struct mlx5e_tc_flow_parse_attr *parse_attr;
1312                 struct mlx5_flow_attr *attr = flow->attr;
1313                 struct mlx5_esw_flow_attr *esw_attr;
1314                 struct mlx5_flow_handle *rule;
1315                 struct mlx5_flow_spec *spec;
1316
1317                 if (flow_flag_test(flow, FAILED))
1318                         continue;
1319
1320                 esw_attr = attr->esw_attr;
1321                 parse_attr = attr->parse_attr;
1322                 spec = &parse_attr->spec;
1323
1324                 err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
1325                                              e->out_dev, e->route_dev_ifindex,
1326                                              flow->tmp_entry_index);
1327                 if (err) {
1328                         mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
1329                         continue;
1330                 }
1331
1332                 err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow);
1333                 if (err) {
1334                         mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
1335                                        err);
1336                         continue;
1337                 }
1338
1339                 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1340                         esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
1341                         esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1342                         if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
1343                                 goto offload_to_slow_path;
1344                         /* update from slow path rule to encap rule */
1345                         rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1346                         if (IS_ERR(rule)) {
1347                                 err = PTR_ERR(rule);
1348                                 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1349                                                err);
1350                         } else {
1351                                 flow->rule[0] = rule;
1352                         }
1353                 } else {
1354 offload_to_slow_path:
1355                         rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1356                         /* mark the flow's encap dest as non-valid */
1357                         esw_attr->dests[flow->tmp_entry_index].flags &=
1358                                 ~MLX5_ESW_DEST_ENCAP_VALID;
1359
1360                         if (IS_ERR(rule)) {
1361                                 err = PTR_ERR(rule);
1362                                 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1363                                                err);
1364                         } else {
1365                                 flow->rule[0] = rule;
1366                         }
1367                 }
1368                 flow_flag_set(flow, OFFLOADED);
1369         }
1370 }
1371
1372 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
1373                                      struct mlx5e_route_entry *r,
1374                                      struct list_head *flow_list,
1375                                      bool replace)
1376 {
1377         struct net_device *tunnel_dev;
1378         struct mlx5e_encap_entry *e;
1379
1380         tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1381         if (!tunnel_dev)
1382                 return -ENODEV;
1383
1384         list_for_each_entry(e, &r->encap_entries, route_list) {
1385                 LIST_HEAD(encap_flows);
1386
1387                 mlx5e_take_all_encap_flows(e, &encap_flows);
1388                 if (list_empty(&encap_flows))
1389                         continue;
1390
1391                 if (mlx5e_route_entry_valid(r))
1392                         mlx5e_invalidate_encap(priv, e, &encap_flows);
1393
1394                 if (!replace) {
1395                         list_splice(&encap_flows, flow_list);
1396                         continue;
1397                 }
1398
1399                 mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
1400                 list_splice(&encap_flows, flow_list);
1401         }
1402
1403         return 0;
1404 }
1405
1406 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
1407                                       struct list_head *flow_list)
1408 {
1409         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1410         struct mlx5e_tc_flow *flow;
1411
1412         list_for_each_entry(flow, flow_list, tmp_list)
1413                 if (mlx5e_is_offloaded_flow(flow))
1414                         mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1415 }
1416
1417 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
1418                                   struct list_head *decap_flows)
1419 {
1420         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1421         struct mlx5e_tc_flow *flow;
1422
1423         list_for_each_entry(flow, decap_flows, tmp_list) {
1424                 struct mlx5e_tc_flow_parse_attr *parse_attr;
1425                 struct mlx5_flow_attr *attr = flow->attr;
1426                 struct mlx5_flow_handle *rule;
1427                 struct mlx5_flow_spec *spec;
1428                 int err;
1429
1430                 if (flow_flag_test(flow, FAILED))
1431                         continue;
1432
1433                 parse_attr = attr->parse_attr;
1434                 spec = &parse_attr->spec;
1435                 err = mlx5e_tc_tun_route_lookup(priv, spec, attr);
1436                 if (err) {
1437                         mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
1438                                        err);
1439                         continue;
1440                 }
1441
1442                 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1443                 if (IS_ERR(rule)) {
1444                         err = PTR_ERR(rule);
1445                         mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
1446                                        err);
1447                 } else {
1448                         flow->rule[0] = rule;
1449                         flow_flag_set(flow, OFFLOADED);
1450                 }
1451         }
1452 }
1453
1454 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
1455                                           struct mlx5e_route_entry *r,
1456                                           struct list_head *flow_list,
1457                                           bool replace)
1458 {
1459         struct net_device *tunnel_dev;
1460         LIST_HEAD(decap_flows);
1461
1462         tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1463         if (!tunnel_dev)
1464                 return -ENODEV;
1465
1466         mlx5e_take_all_route_decap_flows(r, &decap_flows);
1467         if (mlx5e_route_entry_valid(r))
1468                 mlx5e_unoffload_flow_list(priv, &decap_flows);
1469         if (replace)
1470                 mlx5e_reoffload_decap(priv, &decap_flows);
1471
1472         list_splice(&decap_flows, flow_list);
1473
1474         return 0;
1475 }
1476
1477 static void mlx5e_tc_fib_event_work(struct work_struct *work)
1478 {
1479         struct mlx5e_tc_fib_event_data *event_data =
1480                 container_of(work, struct mlx5e_tc_fib_event_data, work);
1481         struct net_device *ul_dev = event_data->ul_dev;
1482         struct mlx5e_priv *priv = netdev_priv(ul_dev);
1483         struct mlx5e_route_entry *r = event_data->r;
1484         struct mlx5_eswitch *esw;
1485         LIST_HEAD(flow_list);
1486         bool replace;
1487         int err;
1488
1489         /* sync with concurrent neigh updates */
1490         rtnl_lock();
1491         esw = priv->mdev->priv.eswitch;
1492         mutex_lock(&esw->offloads.encap_tbl_lock);
1493         replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
1494
1495         if (!mlx5e_route_entry_valid(r) && !replace)
1496                 goto out;
1497
1498         err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
1499         if (err)
1500                 mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
1501                                err);
1502
1503         err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
1504         if (err)
1505                 mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
1506                                err);
1507
1508         if (replace)
1509                 r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1510 out:
1511         mutex_unlock(&esw->offloads.encap_tbl_lock);
1512         rtnl_unlock();
1513
1514         mlx5e_put_flow_list(priv, &flow_list);
1515         mlx5e_route_put(priv, event_data->r);
1516         dev_put(event_data->ul_dev);
1517         kfree(event_data);
1518 }
1519
1520 static struct mlx5e_tc_fib_event_data *
1521 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
1522                          struct net_device *ul_dev,
1523                          struct mlx5e_tc_tun_encap *encap,
1524                          unsigned long event,
1525                          struct fib_notifier_info *info)
1526 {
1527         struct fib_entry_notifier_info *fen_info;
1528         struct mlx5e_tc_fib_event_data *fib_work;
1529         struct mlx5e_route_entry *r;
1530         struct mlx5e_route_key key;
1531         struct net_device *fib_dev;
1532
1533         fen_info = container_of(info, struct fib_entry_notifier_info, info);
1534         fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
1535         if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1536             fen_info->dst_len != 32)
1537                 return NULL;
1538
1539         fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1540         if (!fib_work)
1541                 return ERR_PTR(-ENOMEM);
1542
1543         key.endpoint_ip.v4 = htonl(fen_info->dst);
1544         key.ip_version = 4;
1545
1546         /* Can't fail after this point because releasing reference to r
1547          * requires obtaining sleeping mutex which we can't do in atomic
1548          * context.
1549          */
1550         r = mlx5e_route_lookup_for_update(encap, &key);
1551         if (!r)
1552                 goto out;
1553         fib_work->r = r;
1554         dev_hold(ul_dev);
1555
1556         return fib_work;
1557
1558 out:
1559         kfree(fib_work);
1560         return NULL;
1561 }
1562
1563 static struct mlx5e_tc_fib_event_data *
1564 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
1565                          struct net_device *ul_dev,
1566                          struct mlx5e_tc_tun_encap *encap,
1567                          unsigned long event,
1568                          struct fib_notifier_info *info)
1569 {
1570         struct fib6_entry_notifier_info *fen_info;
1571         struct mlx5e_tc_fib_event_data *fib_work;
1572         struct mlx5e_route_entry *r;
1573         struct mlx5e_route_key key;
1574         struct net_device *fib_dev;
1575
1576         fen_info = container_of(info, struct fib6_entry_notifier_info, info);
1577         fib_dev = fib6_info_nh_dev(fen_info->rt);
1578         if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1579             fen_info->rt->fib6_dst.plen != 128)
1580                 return NULL;
1581
1582         fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1583         if (!fib_work)
1584                 return ERR_PTR(-ENOMEM);
1585
1586         memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
1587                sizeof(fen_info->rt->fib6_dst.addr));
1588         key.ip_version = 6;
1589
1590         /* Can't fail after this point because releasing reference to r
1591          * requires obtaining sleeping mutex which we can't do in atomic
1592          * context.
1593          */
1594         r = mlx5e_route_lookup_for_update(encap, &key);
1595         if (!r)
1596                 goto out;
1597         fib_work->r = r;
1598         dev_hold(ul_dev);
1599
1600         return fib_work;
1601
1602 out:
1603         kfree(fib_work);
1604         return NULL;
1605 }
1606
1607 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
1608 {
1609         struct mlx5e_tc_fib_event_data *fib_work;
1610         struct fib_notifier_info *info = ptr;
1611         struct mlx5e_tc_tun_encap *encap;
1612         struct net_device *ul_dev;
1613         struct mlx5e_priv *priv;
1614
1615         encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
1616         priv = encap->priv;
1617         ul_dev = priv->netdev;
1618         priv = netdev_priv(ul_dev);
1619
1620         switch (event) {
1621         case FIB_EVENT_ENTRY_REPLACE:
1622         case FIB_EVENT_ENTRY_DEL:
1623                 if (info->family == AF_INET)
1624                         fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
1625                 else if (info->family == AF_INET6)
1626                         fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
1627                 else
1628                         return NOTIFY_DONE;
1629
1630                 if (!IS_ERR_OR_NULL(fib_work)) {
1631                         queue_work(priv->wq, &fib_work->work);
1632                 } else if (IS_ERR(fib_work)) {
1633                         NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
1634                         mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
1635                                        PTR_ERR(fib_work));
1636                 }
1637
1638                 break;
1639         default:
1640                 return NOTIFY_DONE;
1641         }
1642
1643         return NOTIFY_DONE;
1644 }
1645
1646 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
1647 {
1648         struct mlx5e_tc_tun_encap *encap;
1649         int err;
1650
1651         encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
1652         if (!encap)
1653                 return ERR_PTR(-ENOMEM);
1654
1655         encap->priv = priv;
1656         encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
1657         spin_lock_init(&encap->route_lock);
1658         hash_init(encap->route_tbl);
1659         err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
1660                                     NULL, NULL);
1661         if (err) {
1662                 kvfree(encap);
1663                 return ERR_PTR(err);
1664         }
1665
1666         return encap;
1667 }
1668
1669 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
1670 {
1671         if (!encap)
1672                 return;
1673
1674         unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
1675         flush_workqueue(encap->priv->wq); /* flush fib event works */
1676         kvfree(encap);
1677 }