spi: core: add dma_map_dev for dma device
[linux-2.6-microblaze.git] / drivers / net / ethernet / mellanox / mlx5 / core / en / tc_tun_encap.c
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
3
4 #include <net/fib_notifier.h>
5 #include <net/nexthop.h>
6 #include "tc_tun_encap.h"
7 #include "en_tc.h"
8 #include "tc_tun.h"
9 #include "rep/tc.h"
10 #include "diag/en_tc_tracepoint.h"
11
12 enum {
13         MLX5E_ROUTE_ENTRY_VALID     = BIT(0),
14 };
15
16 struct mlx5e_route_key {
17         int ip_version;
18         union {
19                 __be32 v4;
20                 struct in6_addr v6;
21         } endpoint_ip;
22 };
23
24 struct mlx5e_route_entry {
25         struct mlx5e_route_key key;
26         struct list_head encap_entries;
27         struct list_head decap_flows;
28         u32 flags;
29         struct hlist_node hlist;
30         refcount_t refcnt;
31         int tunnel_dev_index;
32         struct rcu_head rcu;
33 };
34
35 struct mlx5e_tc_tun_encap {
36         struct mlx5e_priv *priv;
37         struct notifier_block fib_nb;
38         spinlock_t route_lock; /* protects route_tbl */
39         unsigned long route_tbl_last_update;
40         DECLARE_HASHTABLE(route_tbl, 8);
41 };
42
43 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
44 {
45         return r->flags & MLX5E_ROUTE_ENTRY_VALID;
46 }
47
48 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
49                              struct mlx5_flow_spec *spec)
50 {
51         struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
52         struct mlx5_rx_tun_attr *tun_attr;
53         void *daddr, *saddr;
54         u8 ip_version;
55
56         tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
57         if (!tun_attr)
58                 return -ENOMEM;
59
60         esw_attr->rx_tun_attr = tun_attr;
61         ip_version = mlx5e_tc_get_ip_version(spec, true);
62
63         if (ip_version == 4) {
64                 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
65                                      outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
66                 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
67                                      outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
68                 tun_attr->dst_ip.v4 = *(__be32 *)daddr;
69                 tun_attr->src_ip.v4 = *(__be32 *)saddr;
70                 if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
71                         return 0;
72         }
73 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
74         else if (ip_version == 6) {
75                 int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
76                 struct in6_addr zerov6 = {};
77
78                 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
79                                      outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
80                 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
81                                      outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
82                 memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
83                 memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
84                 if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) ||
85                     !memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6)))
86                         return 0;
87         }
88 #endif
89         /* Only set the flag if both src and dst ip addresses exist. They are
90          * required to establish routing.
91          */
92         flow_flag_set(flow, TUN_RX);
93         flow->attr->tun_ip_version = ip_version;
94         return 0;
95 }
96
97 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
98 {
99         bool all_flow_encaps_valid = true;
100         int i;
101
102         /* Flow can be associated with multiple encap entries.
103          * Before offloading the flow verify that all of them have
104          * a valid neighbour.
105          */
106         for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
107                 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
108                         continue;
109                 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
110                         all_flow_encaps_valid = false;
111                         break;
112                 }
113         }
114
115         return all_flow_encaps_valid;
116 }
117
118 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
119                               struct mlx5e_encap_entry *e,
120                               struct list_head *flow_list)
121 {
122         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
123         struct mlx5_esw_flow_attr *esw_attr;
124         struct mlx5_flow_handle *rule;
125         struct mlx5_flow_attr *attr;
126         struct mlx5_flow_spec *spec;
127         struct mlx5e_tc_flow *flow;
128         int err;
129
130         if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
131                 return;
132
133         e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
134                                                      e->reformat_type,
135                                                      e->encap_size, e->encap_header,
136                                                      MLX5_FLOW_NAMESPACE_FDB);
137         if (IS_ERR(e->pkt_reformat)) {
138                 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
139                                PTR_ERR(e->pkt_reformat));
140                 return;
141         }
142         e->flags |= MLX5_ENCAP_ENTRY_VALID;
143         mlx5e_rep_queue_neigh_stats_work(priv);
144
145         list_for_each_entry(flow, flow_list, tmp_list) {
146                 if (!mlx5e_is_offloaded_flow(flow))
147                         continue;
148                 attr = flow->attr;
149                 esw_attr = attr->esw_attr;
150                 spec = &attr->parse_attr->spec;
151
152                 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
153                 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
154
155                 /* Do not offload flows with unresolved neighbors */
156                 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
157                         continue;
158                 /* update from slow path rule to encap rule */
159                 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
160                 if (IS_ERR(rule)) {
161                         err = PTR_ERR(rule);
162                         mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
163                                        err);
164                         continue;
165                 }
166
167                 mlx5e_tc_unoffload_from_slow_path(esw, flow);
168                 flow->rule[0] = rule;
169                 /* was unset when slow path rule removed */
170                 flow_flag_set(flow, OFFLOADED);
171         }
172 }
173
174 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
175                               struct mlx5e_encap_entry *e,
176                               struct list_head *flow_list)
177 {
178         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
179         struct mlx5_esw_flow_attr *esw_attr;
180         struct mlx5_flow_handle *rule;
181         struct mlx5_flow_attr *attr;
182         struct mlx5_flow_spec *spec;
183         struct mlx5e_tc_flow *flow;
184         int err;
185
186         list_for_each_entry(flow, flow_list, tmp_list) {
187                 if (!mlx5e_is_offloaded_flow(flow))
188                         continue;
189                 attr = flow->attr;
190                 esw_attr = attr->esw_attr;
191                 spec = &attr->parse_attr->spec;
192
193                 /* update from encap rule to slow path rule */
194                 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
195                 /* mark the flow's encap dest as non-valid */
196                 esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
197
198                 if (IS_ERR(rule)) {
199                         err = PTR_ERR(rule);
200                         mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
201                                        err);
202                         continue;
203                 }
204
205                 mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
206                 flow->rule[0] = rule;
207                 /* was unset when fast path rule removed */
208                 flow_flag_set(flow, OFFLOADED);
209         }
210
211         /* we know that the encap is valid */
212         e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
213         mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
214 }
215
216 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
217                                 struct list_head *flow_list,
218                                 int index)
219 {
220         if (IS_ERR(mlx5e_flow_get(flow)))
221                 return;
222         wait_for_completion(&flow->init_done);
223
224         flow->tmp_entry_index = index;
225         list_add(&flow->tmp_list, flow_list);
226 }
227
228 /* Takes reference to all flows attached to encap and adds the flows to
229  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
230  */
231 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
232 {
233         struct encap_flow_item *efi;
234         struct mlx5e_tc_flow *flow;
235
236         list_for_each_entry(efi, &e->flows, list) {
237                 flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
238                 mlx5e_take_tmp_flow(flow, flow_list, efi->index);
239         }
240 }
241
242 /* Takes reference to all flows attached to route and adds the flows to
243  * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
244  */
245 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
246                                              struct list_head *flow_list)
247 {
248         struct mlx5e_tc_flow *flow;
249
250         list_for_each_entry(flow, &r->decap_flows, decap_routes)
251                 mlx5e_take_tmp_flow(flow, flow_list, 0);
252 }
253
254 static struct mlx5e_encap_entry *
255 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
256                            struct mlx5e_encap_entry *e)
257 {
258         struct mlx5e_encap_entry *next = NULL;
259
260 retry:
261         rcu_read_lock();
262
263         /* find encap with non-zero reference counter value */
264         for (next = e ?
265                      list_next_or_null_rcu(&nhe->encap_list,
266                                            &e->encap_list,
267                                            struct mlx5e_encap_entry,
268                                            encap_list) :
269                      list_first_or_null_rcu(&nhe->encap_list,
270                                             struct mlx5e_encap_entry,
271                                             encap_list);
272              next;
273              next = list_next_or_null_rcu(&nhe->encap_list,
274                                           &next->encap_list,
275                                           struct mlx5e_encap_entry,
276                                           encap_list))
277                 if (mlx5e_encap_take(next))
278                         break;
279
280         rcu_read_unlock();
281
282         /* release starting encap */
283         if (e)
284                 mlx5e_encap_put(netdev_priv(e->out_dev), e);
285         if (!next)
286                 return next;
287
288         /* wait for encap to be fully initialized */
289         wait_for_completion(&next->res_ready);
290         /* continue searching if encap entry is not in valid state after completion */
291         if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
292                 e = next;
293                 goto retry;
294         }
295
296         return next;
297 }
298
299 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
300 {
301         struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
302         struct mlx5e_encap_entry *e = NULL;
303         struct mlx5e_tc_flow *flow;
304         struct mlx5_fc *counter;
305         struct neigh_table *tbl;
306         bool neigh_used = false;
307         struct neighbour *n;
308         u64 lastuse;
309
310         if (m_neigh->family == AF_INET)
311                 tbl = &arp_tbl;
312 #if IS_ENABLED(CONFIG_IPV6)
313         else if (m_neigh->family == AF_INET6)
314                 tbl = ipv6_stub->nd_tbl;
315 #endif
316         else
317                 return;
318
319         /* mlx5e_get_next_valid_encap() releases previous encap before returning
320          * next one.
321          */
322         while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
323                 struct mlx5e_priv *priv = netdev_priv(e->out_dev);
324                 struct encap_flow_item *efi, *tmp;
325                 struct mlx5_eswitch *esw;
326                 LIST_HEAD(flow_list);
327
328                 esw = priv->mdev->priv.eswitch;
329                 mutex_lock(&esw->offloads.encap_tbl_lock);
330                 list_for_each_entry_safe(efi, tmp, &e->flows, list) {
331                         flow = container_of(efi, struct mlx5e_tc_flow,
332                                             encaps[efi->index]);
333                         if (IS_ERR(mlx5e_flow_get(flow)))
334                                 continue;
335                         list_add(&flow->tmp_list, &flow_list);
336
337                         if (mlx5e_is_offloaded_flow(flow)) {
338                                 counter = mlx5e_tc_get_counter(flow);
339                                 lastuse = mlx5_fc_query_lastuse(counter);
340                                 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
341                                         neigh_used = true;
342                                         break;
343                                 }
344                         }
345                 }
346                 mutex_unlock(&esw->offloads.encap_tbl_lock);
347
348                 mlx5e_put_flow_list(priv, &flow_list);
349                 if (neigh_used) {
350                         /* release current encap before breaking the loop */
351                         mlx5e_encap_put(priv, e);
352                         break;
353                 }
354         }
355
356         trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
357
358         if (neigh_used) {
359                 nhe->reported_lastuse = jiffies;
360
361                 /* find the relevant neigh according to the cached device and
362                  * dst ip pair
363                  */
364                 n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
365                 if (!n)
366                         return;
367
368                 neigh_event_send(n, NULL);
369                 neigh_release(n);
370         }
371 }
372
373 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
374 {
375         WARN_ON(!list_empty(&e->flows));
376
377         if (e->compl_result > 0) {
378                 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
379
380                 if (e->flags & MLX5_ENCAP_ENTRY_VALID)
381                         mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
382         }
383
384         kfree(e->tun_info);
385         kfree(e->encap_header);
386         kfree_rcu(e, rcu);
387 }
388
389 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
390                                 struct mlx5e_decap_entry *d)
391 {
392         WARN_ON(!list_empty(&d->flows));
393
394         if (!d->compl_result)
395                 mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
396
397         kfree_rcu(d, rcu);
398 }
399
400 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
401 {
402         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
403
404         if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
405                 return;
406         list_del(&e->route_list);
407         hash_del_rcu(&e->encap_hlist);
408         mutex_unlock(&esw->offloads.encap_tbl_lock);
409
410         mlx5e_encap_dealloc(priv, e);
411 }
412
413 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
414 {
415         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
416
417         if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
418                 return;
419         hash_del_rcu(&d->hlist);
420         mutex_unlock(&esw->offloads.decap_tbl_lock);
421
422         mlx5e_decap_dealloc(priv, d);
423 }
424
425 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
426                                      struct mlx5e_tc_flow *flow,
427                                      int out_index);
428
429 void mlx5e_detach_encap(struct mlx5e_priv *priv,
430                         struct mlx5e_tc_flow *flow, int out_index)
431 {
432         struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
433         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
434
435         if (flow->attr->esw_attr->dests[out_index].flags &
436             MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
437                 mlx5e_detach_encap_route(priv, flow, out_index);
438
439         /* flow wasn't fully initialized */
440         if (!e)
441                 return;
442
443         mutex_lock(&esw->offloads.encap_tbl_lock);
444         list_del(&flow->encaps[out_index].list);
445         flow->encaps[out_index].e = NULL;
446         if (!refcount_dec_and_test(&e->refcnt)) {
447                 mutex_unlock(&esw->offloads.encap_tbl_lock);
448                 return;
449         }
450         list_del(&e->route_list);
451         hash_del_rcu(&e->encap_hlist);
452         mutex_unlock(&esw->offloads.encap_tbl_lock);
453
454         mlx5e_encap_dealloc(priv, e);
455 }
456
457 void mlx5e_detach_decap(struct mlx5e_priv *priv,
458                         struct mlx5e_tc_flow *flow)
459 {
460         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
461         struct mlx5e_decap_entry *d = flow->decap_reformat;
462
463         if (!d)
464                 return;
465
466         mutex_lock(&esw->offloads.decap_tbl_lock);
467         list_del(&flow->l3_to_l2_reformat);
468         flow->decap_reformat = NULL;
469
470         if (!refcount_dec_and_test(&d->refcnt)) {
471                 mutex_unlock(&esw->offloads.decap_tbl_lock);
472                 return;
473         }
474         hash_del_rcu(&d->hlist);
475         mutex_unlock(&esw->offloads.decap_tbl_lock);
476
477         mlx5e_decap_dealloc(priv, d);
478 }
479
480 bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
481                                            struct mlx5e_encap_key *b)
482 {
483         return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
484                 a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
485 }
486
487 static int cmp_decap_info(struct mlx5e_decap_key *a,
488                           struct mlx5e_decap_key *b)
489 {
490         return memcmp(&a->key, &b->key, sizeof(b->key));
491 }
492
493 static int hash_encap_info(struct mlx5e_encap_key *key)
494 {
495         return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
496                      key->tc_tunnel->tunnel_type);
497 }
498
499 static int hash_decap_info(struct mlx5e_decap_key *key)
500 {
501         return jhash(&key->key, sizeof(key->key), 0);
502 }
503
504 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
505 {
506         return refcount_inc_not_zero(&e->refcnt);
507 }
508
509 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
510 {
511         return refcount_inc_not_zero(&e->refcnt);
512 }
513
514 static struct mlx5e_encap_entry *
515 mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
516                 uintptr_t hash_key)
517 {
518         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
519         struct mlx5e_encap_key e_key;
520         struct mlx5e_encap_entry *e;
521
522         hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
523                                    encap_hlist, hash_key) {
524                 e_key.ip_tun_key = &e->tun_info->key;
525                 e_key.tc_tunnel = e->tunnel;
526                 if (e->tunnel->encap_info_equal(&e_key, key) &&
527                     mlx5e_encap_take(e))
528                         return e;
529         }
530
531         return NULL;
532 }
533
534 static struct mlx5e_decap_entry *
535 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
536                 uintptr_t hash_key)
537 {
538         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
539         struct mlx5e_decap_key r_key;
540         struct mlx5e_decap_entry *e;
541
542         hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
543                                    hlist, hash_key) {
544                 r_key = e->key;
545                 if (!cmp_decap_info(&r_key, key) &&
546                     mlx5e_decap_take(e))
547                         return e;
548         }
549         return NULL;
550 }
551
552 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
553 {
554         size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
555
556         return kmemdup(tun_info, tun_size, GFP_KERNEL);
557 }
558
559 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
560                                       struct mlx5e_tc_flow *flow,
561                                       int out_index,
562                                       struct mlx5e_encap_entry *e,
563                                       struct netlink_ext_ack *extack)
564 {
565         int i;
566
567         for (i = 0; i < out_index; i++) {
568                 if (flow->encaps[i].e != e)
569                         continue;
570                 NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
571                 netdev_err(priv->netdev, "can't duplicate encap action\n");
572                 return true;
573         }
574
575         return false;
576 }
577
578 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
579                                struct mlx5_flow_attr *attr,
580                                struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
581                                struct net_device *out_dev,
582                                int route_dev_ifindex,
583                                int out_index)
584 {
585         struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
586         struct net_device *route_dev;
587         u16 vport_num;
588         int err = 0;
589         u32 data;
590
591         route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
592
593         if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
594             !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
595                 goto out;
596
597         err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
598         if (err)
599                 goto out;
600
601         attr->dest_chain = 0;
602         attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
603         esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
604         data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
605                                                        vport_num);
606         err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
607                                                    MLX5_FLOW_NAMESPACE_FDB,
608                                                    VPORT_TO_REG, data);
609         if (err >= 0) {
610                 esw_attr->dests[out_index].src_port_rewrite_act_id = err;
611                 err = 0;
612         }
613
614 out:
615         if (route_dev)
616                 dev_put(route_dev);
617         return err;
618 }
619
620 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
621                                   struct mlx5_esw_flow_attr *attr,
622                                   struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
623                                   struct net_device *out_dev,
624                                   int route_dev_ifindex,
625                                   int out_index)
626 {
627         int act_id = attr->dests[out_index].src_port_rewrite_act_id;
628         struct net_device *route_dev;
629         u16 vport_num;
630         int err = 0;
631         u32 data;
632
633         route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
634
635         if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
636             !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
637                 err = -ENODEV;
638                 goto out;
639         }
640
641         err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
642         if (err)
643                 goto out;
644
645         data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
646                                                        vport_num);
647         mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
648
649 out:
650         if (route_dev)
651                 dev_put(route_dev);
652         return err;
653 }
654
655 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
656 {
657         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
658         struct mlx5_rep_uplink_priv *uplink_priv;
659         struct mlx5e_rep_priv *uplink_rpriv;
660         struct mlx5e_tc_tun_encap *encap;
661         unsigned int ret;
662
663         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
664         uplink_priv = &uplink_rpriv->uplink_priv;
665         encap = uplink_priv->encap;
666
667         spin_lock_bh(&encap->route_lock);
668         ret = encap->route_tbl_last_update;
669         spin_unlock_bh(&encap->route_lock);
670         return ret;
671 }
672
673 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
674                                     struct mlx5e_tc_flow *flow,
675                                     struct mlx5e_encap_entry *e,
676                                     bool new_encap_entry,
677                                     unsigned long tbl_time_before,
678                                     int out_index);
679
680 int mlx5e_attach_encap(struct mlx5e_priv *priv,
681                        struct mlx5e_tc_flow *flow,
682                        struct net_device *mirred_dev,
683                        int out_index,
684                        struct netlink_ext_ack *extack,
685                        struct net_device **encap_dev,
686                        bool *encap_valid)
687 {
688         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
689         struct mlx5e_tc_flow_parse_attr *parse_attr;
690         struct mlx5_flow_attr *attr = flow->attr;
691         const struct ip_tunnel_info *tun_info;
692         unsigned long tbl_time_before = 0;
693         struct mlx5e_encap_entry *e;
694         struct mlx5e_encap_key key;
695         bool entry_created = false;
696         unsigned short family;
697         uintptr_t hash_key;
698         int err = 0;
699
700         parse_attr = attr->parse_attr;
701         tun_info = parse_attr->tun_info[out_index];
702         family = ip_tunnel_info_af(tun_info);
703         key.ip_tun_key = &tun_info->key;
704         key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
705         if (!key.tc_tunnel) {
706                 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
707                 return -EOPNOTSUPP;
708         }
709
710         hash_key = hash_encap_info(&key);
711
712         mutex_lock(&esw->offloads.encap_tbl_lock);
713         e = mlx5e_encap_get(priv, &key, hash_key);
714
715         /* must verify if encap is valid or not */
716         if (e) {
717                 /* Check that entry was not already attached to this flow */
718                 if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
719                         err = -EOPNOTSUPP;
720                         goto out_err;
721                 }
722
723                 mutex_unlock(&esw->offloads.encap_tbl_lock);
724                 wait_for_completion(&e->res_ready);
725
726                 /* Protect against concurrent neigh update. */
727                 mutex_lock(&esw->offloads.encap_tbl_lock);
728                 if (e->compl_result < 0) {
729                         err = -EREMOTEIO;
730                         goto out_err;
731                 }
732                 goto attach_flow;
733         }
734
735         e = kzalloc(sizeof(*e), GFP_KERNEL);
736         if (!e) {
737                 err = -ENOMEM;
738                 goto out_err;
739         }
740
741         refcount_set(&e->refcnt, 1);
742         init_completion(&e->res_ready);
743         entry_created = true;
744         INIT_LIST_HEAD(&e->route_list);
745
746         tun_info = mlx5e_dup_tun_info(tun_info);
747         if (!tun_info) {
748                 err = -ENOMEM;
749                 goto out_err_init;
750         }
751         e->tun_info = tun_info;
752         err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
753         if (err)
754                 goto out_err_init;
755
756         INIT_LIST_HEAD(&e->flows);
757         hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
758         tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
759         mutex_unlock(&esw->offloads.encap_tbl_lock);
760
761         if (family == AF_INET)
762                 err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
763         else if (family == AF_INET6)
764                 err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
765
766         /* Protect against concurrent neigh update. */
767         mutex_lock(&esw->offloads.encap_tbl_lock);
768         complete_all(&e->res_ready);
769         if (err) {
770                 e->compl_result = err;
771                 goto out_err;
772         }
773         e->compl_result = 1;
774
775 attach_flow:
776         err = mlx5e_attach_encap_route(priv, flow, e, entry_created, tbl_time_before,
777                                        out_index);
778         if (err)
779                 goto out_err;
780
781         flow->encaps[out_index].e = e;
782         list_add(&flow->encaps[out_index].list, &e->flows);
783         flow->encaps[out_index].index = out_index;
784         *encap_dev = e->out_dev;
785         if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
786                 attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
787                 attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
788                 *encap_valid = true;
789         } else {
790                 *encap_valid = false;
791         }
792         mutex_unlock(&esw->offloads.encap_tbl_lock);
793
794         return err;
795
796 out_err:
797         mutex_unlock(&esw->offloads.encap_tbl_lock);
798         if (e)
799                 mlx5e_encap_put(priv, e);
800         return err;
801
802 out_err_init:
803         mutex_unlock(&esw->offloads.encap_tbl_lock);
804         kfree(tun_info);
805         kfree(e);
806         return err;
807 }
808
809 int mlx5e_attach_decap(struct mlx5e_priv *priv,
810                        struct mlx5e_tc_flow *flow,
811                        struct netlink_ext_ack *extack)
812 {
813         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
814         struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
815         struct mlx5e_tc_flow_parse_attr *parse_attr;
816         struct mlx5e_decap_entry *d;
817         struct mlx5e_decap_key key;
818         uintptr_t hash_key;
819         int err = 0;
820
821         parse_attr = flow->attr->parse_attr;
822         if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
823                 NL_SET_ERR_MSG_MOD(extack,
824                                    "encap header larger than max supported");
825                 return -EOPNOTSUPP;
826         }
827
828         key.key = parse_attr->eth;
829         hash_key = hash_decap_info(&key);
830         mutex_lock(&esw->offloads.decap_tbl_lock);
831         d = mlx5e_decap_get(priv, &key, hash_key);
832         if (d) {
833                 mutex_unlock(&esw->offloads.decap_tbl_lock);
834                 wait_for_completion(&d->res_ready);
835                 mutex_lock(&esw->offloads.decap_tbl_lock);
836                 if (d->compl_result) {
837                         err = -EREMOTEIO;
838                         goto out_free;
839                 }
840                 goto found;
841         }
842
843         d = kzalloc(sizeof(*d), GFP_KERNEL);
844         if (!d) {
845                 err = -ENOMEM;
846                 goto out_err;
847         }
848
849         d->key = key;
850         refcount_set(&d->refcnt, 1);
851         init_completion(&d->res_ready);
852         INIT_LIST_HEAD(&d->flows);
853         hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
854         mutex_unlock(&esw->offloads.decap_tbl_lock);
855
856         d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
857                                                      MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2,
858                                                      sizeof(parse_attr->eth),
859                                                      &parse_attr->eth,
860                                                      MLX5_FLOW_NAMESPACE_FDB);
861         if (IS_ERR(d->pkt_reformat)) {
862                 err = PTR_ERR(d->pkt_reformat);
863                 d->compl_result = err;
864         }
865         mutex_lock(&esw->offloads.decap_tbl_lock);
866         complete_all(&d->res_ready);
867         if (err)
868                 goto out_free;
869
870 found:
871         flow->decap_reformat = d;
872         attr->decap_pkt_reformat = d->pkt_reformat;
873         list_add(&flow->l3_to_l2_reformat, &d->flows);
874         mutex_unlock(&esw->offloads.decap_tbl_lock);
875         return 0;
876
877 out_free:
878         mutex_unlock(&esw->offloads.decap_tbl_lock);
879         mlx5e_decap_put(priv, d);
880         return err;
881
882 out_err:
883         mutex_unlock(&esw->offloads.decap_tbl_lock);
884         return err;
885 }
886
887 static int cmp_route_info(struct mlx5e_route_key *a,
888                           struct mlx5e_route_key *b)
889 {
890         if (a->ip_version == 4 && b->ip_version == 4)
891                 return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
892                               sizeof(a->endpoint_ip.v4));
893         else if (a->ip_version == 6 && b->ip_version == 6)
894                 return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
895                               sizeof(a->endpoint_ip.v6));
896         return 1;
897 }
898
899 static u32 hash_route_info(struct mlx5e_route_key *key)
900 {
901         if (key->ip_version == 4)
902                 return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
903         return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
904 }
905
906 static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
907                                 struct mlx5e_route_entry *r)
908 {
909         WARN_ON(!list_empty(&r->decap_flows));
910         WARN_ON(!list_empty(&r->encap_entries));
911
912         kfree_rcu(r, rcu);
913 }
914
915 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
916 {
917         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
918
919         if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
920                 return;
921
922         hash_del_rcu(&r->hlist);
923         mutex_unlock(&esw->offloads.encap_tbl_lock);
924
925         mlx5e_route_dealloc(priv, r);
926 }
927
928 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
929 {
930         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
931
932         lockdep_assert_held(&esw->offloads.encap_tbl_lock);
933
934         if (!refcount_dec_and_test(&r->refcnt))
935                 return;
936         hash_del_rcu(&r->hlist);
937         mlx5e_route_dealloc(priv, r);
938 }
939
940 static struct mlx5e_route_entry *
941 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
942                 u32 hash_key)
943 {
944         struct mlx5e_route_key r_key;
945         struct mlx5e_route_entry *r;
946
947         hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
948                 r_key = r->key;
949                 if (!cmp_route_info(&r_key, key) &&
950                     refcount_inc_not_zero(&r->refcnt))
951                         return r;
952         }
953         return NULL;
954 }
955
956 static struct mlx5e_route_entry *
957 mlx5e_route_get_create(struct mlx5e_priv *priv,
958                        struct mlx5e_route_key *key,
959                        int tunnel_dev_index,
960                        unsigned long *route_tbl_change_time)
961 {
962         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
963         struct mlx5_rep_uplink_priv *uplink_priv;
964         struct mlx5e_rep_priv *uplink_rpriv;
965         struct mlx5e_tc_tun_encap *encap;
966         struct mlx5e_route_entry *r;
967         u32 hash_key;
968
969         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
970         uplink_priv = &uplink_rpriv->uplink_priv;
971         encap = uplink_priv->encap;
972
973         hash_key = hash_route_info(key);
974         spin_lock_bh(&encap->route_lock);
975         r = mlx5e_route_get(encap, key, hash_key);
976         spin_unlock_bh(&encap->route_lock);
977         if (r) {
978                 if (!mlx5e_route_entry_valid(r)) {
979                         mlx5e_route_put_locked(priv, r);
980                         return ERR_PTR(-EINVAL);
981                 }
982                 return r;
983         }
984
985         r = kzalloc(sizeof(*r), GFP_KERNEL);
986         if (!r)
987                 return ERR_PTR(-ENOMEM);
988
989         r->key = *key;
990         r->flags |= MLX5E_ROUTE_ENTRY_VALID;
991         r->tunnel_dev_index = tunnel_dev_index;
992         refcount_set(&r->refcnt, 1);
993         INIT_LIST_HEAD(&r->decap_flows);
994         INIT_LIST_HEAD(&r->encap_entries);
995
996         spin_lock_bh(&encap->route_lock);
997         *route_tbl_change_time = encap->route_tbl_last_update;
998         hash_add(encap->route_tbl, &r->hlist, hash_key);
999         spin_unlock_bh(&encap->route_lock);
1000
1001         return r;
1002 }
1003
1004 static struct mlx5e_route_entry *
1005 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
1006 {
1007         u32 hash_key = hash_route_info(key);
1008         struct mlx5e_route_entry *r;
1009
1010         spin_lock_bh(&encap->route_lock);
1011         encap->route_tbl_last_update = jiffies;
1012         r = mlx5e_route_get(encap, key, hash_key);
1013         spin_unlock_bh(&encap->route_lock);
1014
1015         return r;
1016 }
1017
1018 struct mlx5e_tc_fib_event_data {
1019         struct work_struct work;
1020         unsigned long event;
1021         struct mlx5e_route_entry *r;
1022         struct net_device *ul_dev;
1023 };
1024
1025 static void mlx5e_tc_fib_event_work(struct work_struct *work);
1026 static struct mlx5e_tc_fib_event_data *
1027 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
1028 {
1029         struct mlx5e_tc_fib_event_data *fib_work;
1030
1031         fib_work = kzalloc(sizeof(*fib_work), flags);
1032         if (WARN_ON(!fib_work))
1033                 return NULL;
1034
1035         INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
1036         fib_work->event = event;
1037         fib_work->ul_dev = ul_dev;
1038
1039         return fib_work;
1040 }
1041
1042 static int
1043 mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
1044                            struct mlx5e_route_entry *r,
1045                            unsigned long event)
1046 {
1047         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1048         struct mlx5e_tc_fib_event_data *fib_work;
1049         struct mlx5e_rep_priv *uplink_rpriv;
1050         struct net_device *ul_dev;
1051
1052         uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1053         ul_dev = uplink_rpriv->netdev;
1054
1055         fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
1056         if (!fib_work)
1057                 return -ENOMEM;
1058
1059         dev_hold(ul_dev);
1060         refcount_inc(&r->refcnt);
1061         fib_work->r = r;
1062         queue_work(priv->wq, &fib_work->work);
1063
1064         return 0;
1065 }
1066
1067 int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
1068                              struct mlx5e_tc_flow *flow)
1069 {
1070         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1071         unsigned long tbl_time_before, tbl_time_after;
1072         struct mlx5e_tc_flow_parse_attr *parse_attr;
1073         struct mlx5_flow_attr *attr = flow->attr;
1074         struct mlx5_esw_flow_attr *esw_attr;
1075         struct mlx5e_route_entry *r;
1076         struct mlx5e_route_key key;
1077         int err = 0;
1078
1079         esw_attr = attr->esw_attr;
1080         parse_attr = attr->parse_attr;
1081         mutex_lock(&esw->offloads.encap_tbl_lock);
1082         if (!esw_attr->rx_tun_attr)
1083                 goto out;
1084
1085         tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
1086         tbl_time_after = tbl_time_before;
1087         err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr);
1088         if (err || !esw_attr->rx_tun_attr->decap_vport)
1089                 goto out;
1090
1091         key.ip_version = attr->tun_ip_version;
1092         if (key.ip_version == 4)
1093                 key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
1094         else
1095                 key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
1096
1097         r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
1098                                    &tbl_time_after);
1099         if (IS_ERR(r)) {
1100                 err = PTR_ERR(r);
1101                 goto out;
1102         }
1103         /* Routing changed concurrently. FIB event handler might have missed new
1104          * entry, schedule update.
1105          */
1106         if (tbl_time_before != tbl_time_after) {
1107                 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1108                 if (err) {
1109                         mlx5e_route_put_locked(priv, r);
1110                         goto out;
1111                 }
1112         }
1113
1114         flow->decap_route = r;
1115         list_add(&flow->decap_routes, &r->decap_flows);
1116         mutex_unlock(&esw->offloads.encap_tbl_lock);
1117         return 0;
1118
1119 out:
1120         mutex_unlock(&esw->offloads.encap_tbl_lock);
1121         return err;
1122 }
1123
1124 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
1125                                     struct mlx5e_tc_flow *flow,
1126                                     struct mlx5e_encap_entry *e,
1127                                     bool new_encap_entry,
1128                                     unsigned long tbl_time_before,
1129                                     int out_index)
1130 {
1131         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1132         unsigned long tbl_time_after = tbl_time_before;
1133         struct mlx5e_tc_flow_parse_attr *parse_attr;
1134         struct mlx5_flow_attr *attr = flow->attr;
1135         const struct ip_tunnel_info *tun_info;
1136         struct mlx5_esw_flow_attr *esw_attr;
1137         struct mlx5e_route_entry *r;
1138         struct mlx5e_route_key key;
1139         unsigned short family;
1140         int err = 0;
1141
1142         esw_attr = attr->esw_attr;
1143         parse_attr = attr->parse_attr;
1144         tun_info = parse_attr->tun_info[out_index];
1145         family = ip_tunnel_info_af(tun_info);
1146
1147         if (family == AF_INET) {
1148                 key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
1149                 key.ip_version = 4;
1150         } else if (family == AF_INET6) {
1151                 key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
1152                 key.ip_version = 6;
1153         }
1154
1155         err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
1156                                   e->route_dev_ifindex, out_index);
1157         if (err || !(esw_attr->dests[out_index].flags &
1158                      MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
1159                 return err;
1160
1161         r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
1162                                    &tbl_time_after);
1163         if (IS_ERR(r))
1164                 return PTR_ERR(r);
1165         /* Routing changed concurrently. FIB event handler might have missed new
1166          * entry, schedule update.
1167          */
1168         if (tbl_time_before != tbl_time_after) {
1169                 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1170                 if (err) {
1171                         mlx5e_route_put_locked(priv, r);
1172                         return err;
1173                 }
1174         }
1175
1176         flow->encap_routes[out_index].r = r;
1177         if (new_encap_entry)
1178                 list_add(&e->route_list, &r->encap_entries);
1179         flow->encap_routes[out_index].index = out_index;
1180         return 0;
1181 }
1182
1183 void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
1184                               struct mlx5e_tc_flow *flow)
1185 {
1186         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1187         struct mlx5e_route_entry *r = flow->decap_route;
1188
1189         if (!r)
1190                 return;
1191
1192         mutex_lock(&esw->offloads.encap_tbl_lock);
1193         list_del(&flow->decap_routes);
1194         flow->decap_route = NULL;
1195
1196         if (!refcount_dec_and_test(&r->refcnt)) {
1197                 mutex_unlock(&esw->offloads.encap_tbl_lock);
1198                 return;
1199         }
1200         hash_del_rcu(&r->hlist);
1201         mutex_unlock(&esw->offloads.encap_tbl_lock);
1202
1203         mlx5e_route_dealloc(priv, r);
1204 }
1205
1206 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
1207                                      struct mlx5e_tc_flow *flow,
1208                                      int out_index)
1209 {
1210         struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
1211         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1212         struct mlx5e_encap_entry *e, *tmp;
1213
1214         if (!r)
1215                 return;
1216
1217         mutex_lock(&esw->offloads.encap_tbl_lock);
1218         flow->encap_routes[out_index].r = NULL;
1219
1220         if (!refcount_dec_and_test(&r->refcnt)) {
1221                 mutex_unlock(&esw->offloads.encap_tbl_lock);
1222                 return;
1223         }
1224         list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
1225                 list_del_init(&e->route_list);
1226         hash_del_rcu(&r->hlist);
1227         mutex_unlock(&esw->offloads.encap_tbl_lock);
1228
1229         mlx5e_route_dealloc(priv, r);
1230 }
1231
1232 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
1233                                    struct mlx5e_encap_entry *e,
1234                                    struct list_head *encap_flows)
1235 {
1236         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1237         struct mlx5e_tc_flow *flow;
1238
1239         list_for_each_entry(flow, encap_flows, tmp_list) {
1240                 struct mlx5_flow_attr *attr = flow->attr;
1241                 struct mlx5_esw_flow_attr *esw_attr;
1242
1243                 if (!mlx5e_is_offloaded_flow(flow))
1244                         continue;
1245                 esw_attr = attr->esw_attr;
1246
1247                 if (flow_flag_test(flow, SLOW))
1248                         mlx5e_tc_unoffload_from_slow_path(esw, flow);
1249                 else
1250                         mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1251                 mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
1252                 attr->modify_hdr = NULL;
1253
1254                 esw_attr->dests[flow->tmp_entry_index].flags &=
1255                         ~MLX5_ESW_DEST_ENCAP_VALID;
1256                 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
1257         }
1258
1259         e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
1260         if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1261                 e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1262                 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1263                 e->pkt_reformat = NULL;
1264         }
1265 }
1266
1267 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
1268                                   struct net_device *tunnel_dev,
1269                                   struct mlx5e_encap_entry *e,
1270                                   struct list_head *encap_flows)
1271 {
1272         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1273         struct mlx5e_tc_flow *flow;
1274         int err;
1275
1276         err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
1277                 mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
1278                 mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
1279         if (err)
1280                 mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
1281         e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
1282
1283         list_for_each_entry(flow, encap_flows, tmp_list) {
1284                 struct mlx5e_tc_flow_parse_attr *parse_attr;
1285                 struct mlx5_flow_attr *attr = flow->attr;
1286                 struct mlx5_esw_flow_attr *esw_attr;
1287                 struct mlx5_flow_handle *rule;
1288                 struct mlx5_flow_spec *spec;
1289
1290                 if (flow_flag_test(flow, FAILED))
1291                         continue;
1292
1293                 esw_attr = attr->esw_attr;
1294                 parse_attr = attr->parse_attr;
1295                 spec = &parse_attr->spec;
1296
1297                 err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
1298                                              e->out_dev, e->route_dev_ifindex,
1299                                              flow->tmp_entry_index);
1300                 if (err) {
1301                         mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
1302                         continue;
1303                 }
1304
1305                 err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow);
1306                 if (err) {
1307                         mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
1308                                        err);
1309                         continue;
1310                 }
1311
1312                 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1313                         esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
1314                         esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1315                         if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
1316                                 goto offload_to_slow_path;
1317                         /* update from slow path rule to encap rule */
1318                         rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1319                         if (IS_ERR(rule)) {
1320                                 err = PTR_ERR(rule);
1321                                 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1322                                                err);
1323                         } else {
1324                                 flow->rule[0] = rule;
1325                         }
1326                 } else {
1327 offload_to_slow_path:
1328                         rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1329                         /* mark the flow's encap dest as non-valid */
1330                         esw_attr->dests[flow->tmp_entry_index].flags &=
1331                                 ~MLX5_ESW_DEST_ENCAP_VALID;
1332
1333                         if (IS_ERR(rule)) {
1334                                 err = PTR_ERR(rule);
1335                                 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1336                                                err);
1337                         } else {
1338                                 flow->rule[0] = rule;
1339                         }
1340                 }
1341                 flow_flag_set(flow, OFFLOADED);
1342         }
1343 }
1344
1345 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
1346                                      struct mlx5e_route_entry *r,
1347                                      struct list_head *flow_list,
1348                                      bool replace)
1349 {
1350         struct net_device *tunnel_dev;
1351         struct mlx5e_encap_entry *e;
1352
1353         tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1354         if (!tunnel_dev)
1355                 return -ENODEV;
1356
1357         list_for_each_entry(e, &r->encap_entries, route_list) {
1358                 LIST_HEAD(encap_flows);
1359
1360                 mlx5e_take_all_encap_flows(e, &encap_flows);
1361                 if (list_empty(&encap_flows))
1362                         continue;
1363
1364                 if (mlx5e_route_entry_valid(r))
1365                         mlx5e_invalidate_encap(priv, e, &encap_flows);
1366
1367                 if (!replace) {
1368                         list_splice(&encap_flows, flow_list);
1369                         continue;
1370                 }
1371
1372                 mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
1373                 list_splice(&encap_flows, flow_list);
1374         }
1375
1376         return 0;
1377 }
1378
1379 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
1380                                       struct list_head *flow_list)
1381 {
1382         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1383         struct mlx5e_tc_flow *flow;
1384
1385         list_for_each_entry(flow, flow_list, tmp_list)
1386                 if (mlx5e_is_offloaded_flow(flow))
1387                         mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1388 }
1389
1390 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
1391                                   struct list_head *decap_flows)
1392 {
1393         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1394         struct mlx5e_tc_flow *flow;
1395
1396         list_for_each_entry(flow, decap_flows, tmp_list) {
1397                 struct mlx5e_tc_flow_parse_attr *parse_attr;
1398                 struct mlx5_flow_attr *attr = flow->attr;
1399                 struct mlx5_flow_handle *rule;
1400                 struct mlx5_flow_spec *spec;
1401                 int err;
1402
1403                 if (flow_flag_test(flow, FAILED))
1404                         continue;
1405
1406                 parse_attr = attr->parse_attr;
1407                 spec = &parse_attr->spec;
1408                 err = mlx5e_tc_tun_route_lookup(priv, spec, attr);
1409                 if (err) {
1410                         mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
1411                                        err);
1412                         continue;
1413                 }
1414
1415                 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1416                 if (IS_ERR(rule)) {
1417                         err = PTR_ERR(rule);
1418                         mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
1419                                        err);
1420                 } else {
1421                         flow->rule[0] = rule;
1422                         flow_flag_set(flow, OFFLOADED);
1423                 }
1424         }
1425 }
1426
1427 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
1428                                           struct mlx5e_route_entry *r,
1429                                           struct list_head *flow_list,
1430                                           bool replace)
1431 {
1432         struct net_device *tunnel_dev;
1433         LIST_HEAD(decap_flows);
1434
1435         tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1436         if (!tunnel_dev)
1437                 return -ENODEV;
1438
1439         mlx5e_take_all_route_decap_flows(r, &decap_flows);
1440         if (mlx5e_route_entry_valid(r))
1441                 mlx5e_unoffload_flow_list(priv, &decap_flows);
1442         if (replace)
1443                 mlx5e_reoffload_decap(priv, &decap_flows);
1444
1445         list_splice(&decap_flows, flow_list);
1446
1447         return 0;
1448 }
1449
1450 static void mlx5e_tc_fib_event_work(struct work_struct *work)
1451 {
1452         struct mlx5e_tc_fib_event_data *event_data =
1453                 container_of(work, struct mlx5e_tc_fib_event_data, work);
1454         struct net_device *ul_dev = event_data->ul_dev;
1455         struct mlx5e_priv *priv = netdev_priv(ul_dev);
1456         struct mlx5e_route_entry *r = event_data->r;
1457         struct mlx5_eswitch *esw;
1458         LIST_HEAD(flow_list);
1459         bool replace;
1460         int err;
1461
1462         /* sync with concurrent neigh updates */
1463         rtnl_lock();
1464         esw = priv->mdev->priv.eswitch;
1465         mutex_lock(&esw->offloads.encap_tbl_lock);
1466         replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
1467
1468         if (!mlx5e_route_entry_valid(r) && !replace)
1469                 goto out;
1470
1471         err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
1472         if (err)
1473                 mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
1474                                err);
1475
1476         err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
1477         if (err)
1478                 mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
1479                                err);
1480
1481         if (replace)
1482                 r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1483 out:
1484         mutex_unlock(&esw->offloads.encap_tbl_lock);
1485         rtnl_unlock();
1486
1487         mlx5e_put_flow_list(priv, &flow_list);
1488         mlx5e_route_put(priv, event_data->r);
1489         dev_put(event_data->ul_dev);
1490         kfree(event_data);
1491 }
1492
1493 static struct mlx5e_tc_fib_event_data *
1494 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
1495                          struct net_device *ul_dev,
1496                          struct mlx5e_tc_tun_encap *encap,
1497                          unsigned long event,
1498                          struct fib_notifier_info *info)
1499 {
1500         struct fib_entry_notifier_info *fen_info;
1501         struct mlx5e_tc_fib_event_data *fib_work;
1502         struct mlx5e_route_entry *r;
1503         struct mlx5e_route_key key;
1504         struct net_device *fib_dev;
1505
1506         fen_info = container_of(info, struct fib_entry_notifier_info, info);
1507         fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
1508         if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1509             fen_info->dst_len != 32)
1510                 return NULL;
1511
1512         fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1513         if (!fib_work)
1514                 return ERR_PTR(-ENOMEM);
1515
1516         key.endpoint_ip.v4 = htonl(fen_info->dst);
1517         key.ip_version = 4;
1518
1519         /* Can't fail after this point because releasing reference to r
1520          * requires obtaining sleeping mutex which we can't do in atomic
1521          * context.
1522          */
1523         r = mlx5e_route_lookup_for_update(encap, &key);
1524         if (!r)
1525                 goto out;
1526         fib_work->r = r;
1527         dev_hold(ul_dev);
1528
1529         return fib_work;
1530
1531 out:
1532         kfree(fib_work);
1533         return NULL;
1534 }
1535
1536 static struct mlx5e_tc_fib_event_data *
1537 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
1538                          struct net_device *ul_dev,
1539                          struct mlx5e_tc_tun_encap *encap,
1540                          unsigned long event,
1541                          struct fib_notifier_info *info)
1542 {
1543         struct fib6_entry_notifier_info *fen_info;
1544         struct mlx5e_tc_fib_event_data *fib_work;
1545         struct mlx5e_route_entry *r;
1546         struct mlx5e_route_key key;
1547         struct net_device *fib_dev;
1548
1549         fen_info = container_of(info, struct fib6_entry_notifier_info, info);
1550         fib_dev = fib6_info_nh_dev(fen_info->rt);
1551         if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1552             fen_info->rt->fib6_dst.plen != 128)
1553                 return NULL;
1554
1555         fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1556         if (!fib_work)
1557                 return ERR_PTR(-ENOMEM);
1558
1559         memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
1560                sizeof(fen_info->rt->fib6_dst.addr));
1561         key.ip_version = 6;
1562
1563         /* Can't fail after this point because releasing reference to r
1564          * requires obtaining sleeping mutex which we can't do in atomic
1565          * context.
1566          */
1567         r = mlx5e_route_lookup_for_update(encap, &key);
1568         if (!r)
1569                 goto out;
1570         fib_work->r = r;
1571         dev_hold(ul_dev);
1572
1573         return fib_work;
1574
1575 out:
1576         kfree(fib_work);
1577         return NULL;
1578 }
1579
1580 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
1581 {
1582         struct mlx5e_tc_fib_event_data *fib_work;
1583         struct fib_notifier_info *info = ptr;
1584         struct mlx5e_tc_tun_encap *encap;
1585         struct net_device *ul_dev;
1586         struct mlx5e_priv *priv;
1587
1588         encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
1589         priv = encap->priv;
1590         ul_dev = priv->netdev;
1591         priv = netdev_priv(ul_dev);
1592
1593         switch (event) {
1594         case FIB_EVENT_ENTRY_REPLACE:
1595         case FIB_EVENT_ENTRY_DEL:
1596                 if (info->family == AF_INET)
1597                         fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
1598                 else if (info->family == AF_INET6)
1599                         fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
1600                 else
1601                         return NOTIFY_DONE;
1602
1603                 if (!IS_ERR_OR_NULL(fib_work)) {
1604                         queue_work(priv->wq, &fib_work->work);
1605                 } else if (IS_ERR(fib_work)) {
1606                         NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
1607                         mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
1608                                        PTR_ERR(fib_work));
1609                 }
1610
1611                 break;
1612         default:
1613                 return NOTIFY_DONE;
1614         }
1615
1616         return NOTIFY_DONE;
1617 }
1618
1619 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
1620 {
1621         struct mlx5e_tc_tun_encap *encap;
1622         int err;
1623
1624         encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
1625         if (!encap)
1626                 return ERR_PTR(-ENOMEM);
1627
1628         encap->priv = priv;
1629         encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
1630         spin_lock_init(&encap->route_lock);
1631         hash_init(encap->route_tbl);
1632         err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
1633                                     NULL, NULL);
1634         if (err) {
1635                 kvfree(encap);
1636                 return ERR_PTR(err);
1637         }
1638
1639         return encap;
1640 }
1641
1642 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
1643 {
1644         if (!encap)
1645                 return;
1646
1647         unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
1648         flush_workqueue(encap->priv->wq); /* flush fib event works */
1649         kvfree(encap);
1650 }