1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
4 #include <net/fib_notifier.h>
5 #include <net/nexthop.h>
6 #include "tc_tun_encap.h"
10 #include "diag/en_tc_tracepoint.h"
13 MLX5E_ROUTE_ENTRY_VALID = BIT(0),
16 struct mlx5e_route_key {
24 struct mlx5e_route_entry {
25 struct mlx5e_route_key key;
26 struct list_head encap_entries;
27 struct list_head decap_flows;
29 struct hlist_node hlist;
35 struct mlx5e_tc_tun_encap {
36 struct mlx5e_priv *priv;
37 struct notifier_block fib_nb;
38 spinlock_t route_lock; /* protects route_tbl */
39 unsigned long route_tbl_last_update;
40 DECLARE_HASHTABLE(route_tbl, 8);
43 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
45 return r->flags & MLX5E_ROUTE_ENTRY_VALID;
48 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
49 struct mlx5_flow_spec *spec)
51 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
52 struct mlx5_rx_tun_attr *tun_attr;
56 tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
60 esw_attr->rx_tun_attr = tun_attr;
61 ip_version = mlx5e_tc_get_ip_version(spec, true);
63 if (ip_version == 4) {
64 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
65 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
66 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
67 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
68 tun_attr->dst_ip.v4 = *(__be32 *)daddr;
69 tun_attr->src_ip.v4 = *(__be32 *)saddr;
70 if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
73 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
74 else if (ip_version == 6) {
75 int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
76 struct in6_addr zerov6 = {};
78 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
79 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
80 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
81 outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
82 memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
83 memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
84 if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) ||
85 !memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6)))
89 /* Only set the flag if both src and dst ip addresses exist. They are
90 * required to establish routing.
92 flow_flag_set(flow, TUN_RX);
93 flow->attr->tun_ip_version = ip_version;
97 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
99 bool all_flow_encaps_valid = true;
102 /* Flow can be associated with multiple encap entries.
103 * Before offloading the flow verify that all of them have
106 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
107 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
109 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
110 all_flow_encaps_valid = false;
115 return all_flow_encaps_valid;
118 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
119 struct mlx5e_encap_entry *e,
120 struct list_head *flow_list)
122 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
123 struct mlx5_esw_flow_attr *esw_attr;
124 struct mlx5_flow_handle *rule;
125 struct mlx5_flow_attr *attr;
126 struct mlx5_flow_spec *spec;
127 struct mlx5e_tc_flow *flow;
130 if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
133 e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
135 e->encap_size, e->encap_header,
136 MLX5_FLOW_NAMESPACE_FDB);
137 if (IS_ERR(e->pkt_reformat)) {
138 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
139 PTR_ERR(e->pkt_reformat));
142 e->flags |= MLX5_ENCAP_ENTRY_VALID;
143 mlx5e_rep_queue_neigh_stats_work(priv);
145 list_for_each_entry(flow, flow_list, tmp_list) {
146 if (!mlx5e_is_offloaded_flow(flow))
149 esw_attr = attr->esw_attr;
150 spec = &attr->parse_attr->spec;
152 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
153 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
155 /* Do not offload flows with unresolved neighbors */
156 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
158 /* update from slow path rule to encap rule */
159 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
162 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
167 mlx5e_tc_unoffload_from_slow_path(esw, flow);
168 flow->rule[0] = rule;
169 /* was unset when slow path rule removed */
170 flow_flag_set(flow, OFFLOADED);
174 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
175 struct mlx5e_encap_entry *e,
176 struct list_head *flow_list)
178 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
179 struct mlx5_esw_flow_attr *esw_attr;
180 struct mlx5_flow_handle *rule;
181 struct mlx5_flow_attr *attr;
182 struct mlx5_flow_spec *spec;
183 struct mlx5e_tc_flow *flow;
186 list_for_each_entry(flow, flow_list, tmp_list) {
187 if (!mlx5e_is_offloaded_flow(flow))
190 esw_attr = attr->esw_attr;
191 spec = &attr->parse_attr->spec;
193 /* update from encap rule to slow path rule */
194 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
195 /* mark the flow's encap dest as non-valid */
196 esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
200 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
205 mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
206 flow->rule[0] = rule;
207 /* was unset when fast path rule removed */
208 flow_flag_set(flow, OFFLOADED);
211 /* we know that the encap is valid */
212 e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
213 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
216 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
217 struct list_head *flow_list,
220 if (IS_ERR(mlx5e_flow_get(flow)))
222 wait_for_completion(&flow->init_done);
224 flow->tmp_entry_index = index;
225 list_add(&flow->tmp_list, flow_list);
228 /* Takes reference to all flows attached to encap and adds the flows to
229 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
231 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
233 struct encap_flow_item *efi;
234 struct mlx5e_tc_flow *flow;
236 list_for_each_entry(efi, &e->flows, list) {
237 flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
238 mlx5e_take_tmp_flow(flow, flow_list, efi->index);
242 /* Takes reference to all flows attached to route and adds the flows to
243 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
245 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
246 struct list_head *flow_list)
248 struct mlx5e_tc_flow *flow;
250 list_for_each_entry(flow, &r->decap_flows, decap_routes)
251 mlx5e_take_tmp_flow(flow, flow_list, 0);
254 typedef bool (match_cb)(struct mlx5e_encap_entry *);
256 static struct mlx5e_encap_entry *
257 mlx5e_get_next_matching_encap(struct mlx5e_neigh_hash_entry *nhe,
258 struct mlx5e_encap_entry *e,
261 struct mlx5e_encap_entry *next = NULL;
266 /* find encap with non-zero reference counter value */
268 list_next_or_null_rcu(&nhe->encap_list,
270 struct mlx5e_encap_entry,
272 list_first_or_null_rcu(&nhe->encap_list,
273 struct mlx5e_encap_entry,
276 next = list_next_or_null_rcu(&nhe->encap_list,
278 struct mlx5e_encap_entry,
280 if (mlx5e_encap_take(next))
285 /* release starting encap */
287 mlx5e_encap_put(netdev_priv(e->out_dev), e);
291 /* wait for encap to be fully initialized */
292 wait_for_completion(&next->res_ready);
293 /* continue searching if encap entry is not in valid state after completion */
302 static bool mlx5e_encap_valid(struct mlx5e_encap_entry *e)
304 return e->flags & MLX5_ENCAP_ENTRY_VALID;
307 static struct mlx5e_encap_entry *
308 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
309 struct mlx5e_encap_entry *e)
311 return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_valid);
314 static bool mlx5e_encap_initialized(struct mlx5e_encap_entry *e)
316 return e->compl_result >= 0;
319 struct mlx5e_encap_entry *
320 mlx5e_get_next_init_encap(struct mlx5e_neigh_hash_entry *nhe,
321 struct mlx5e_encap_entry *e)
323 return mlx5e_get_next_matching_encap(nhe, e, mlx5e_encap_initialized);
326 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
328 struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
329 struct mlx5e_encap_entry *e = NULL;
330 struct mlx5e_tc_flow *flow;
331 struct mlx5_fc *counter;
332 struct neigh_table *tbl;
333 bool neigh_used = false;
337 if (m_neigh->family == AF_INET)
339 #if IS_ENABLED(CONFIG_IPV6)
340 else if (m_neigh->family == AF_INET6)
341 tbl = ipv6_stub->nd_tbl;
346 /* mlx5e_get_next_valid_encap() releases previous encap before returning
349 while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
350 struct mlx5e_priv *priv = netdev_priv(e->out_dev);
351 struct encap_flow_item *efi, *tmp;
352 struct mlx5_eswitch *esw;
353 LIST_HEAD(flow_list);
355 esw = priv->mdev->priv.eswitch;
356 mutex_lock(&esw->offloads.encap_tbl_lock);
357 list_for_each_entry_safe(efi, tmp, &e->flows, list) {
358 flow = container_of(efi, struct mlx5e_tc_flow,
360 if (IS_ERR(mlx5e_flow_get(flow)))
362 list_add(&flow->tmp_list, &flow_list);
364 if (mlx5e_is_offloaded_flow(flow)) {
365 counter = mlx5e_tc_get_counter(flow);
366 lastuse = mlx5_fc_query_lastuse(counter);
367 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
373 mutex_unlock(&esw->offloads.encap_tbl_lock);
375 mlx5e_put_flow_list(priv, &flow_list);
377 /* release current encap before breaking the loop */
378 mlx5e_encap_put(priv, e);
383 trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
386 nhe->reported_lastuse = jiffies;
388 /* find the relevant neigh according to the cached device and
391 n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
395 neigh_event_send(n, NULL);
400 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
402 WARN_ON(!list_empty(&e->flows));
404 if (e->compl_result > 0) {
405 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
407 if (e->flags & MLX5_ENCAP_ENTRY_VALID)
408 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
412 kfree(e->encap_header);
416 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
417 struct mlx5e_decap_entry *d)
419 WARN_ON(!list_empty(&d->flows));
421 if (!d->compl_result)
422 mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
427 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
429 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
431 if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
433 list_del(&e->route_list);
434 hash_del_rcu(&e->encap_hlist);
435 mutex_unlock(&esw->offloads.encap_tbl_lock);
437 mlx5e_encap_dealloc(priv, e);
440 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
442 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
444 if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
446 hash_del_rcu(&d->hlist);
447 mutex_unlock(&esw->offloads.decap_tbl_lock);
449 mlx5e_decap_dealloc(priv, d);
452 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
453 struct mlx5e_tc_flow *flow,
456 void mlx5e_detach_encap(struct mlx5e_priv *priv,
457 struct mlx5e_tc_flow *flow, int out_index)
459 struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
460 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
462 if (flow->attr->esw_attr->dests[out_index].flags &
463 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
464 mlx5e_detach_encap_route(priv, flow, out_index);
466 /* flow wasn't fully initialized */
470 mutex_lock(&esw->offloads.encap_tbl_lock);
471 list_del(&flow->encaps[out_index].list);
472 flow->encaps[out_index].e = NULL;
473 if (!refcount_dec_and_test(&e->refcnt)) {
474 mutex_unlock(&esw->offloads.encap_tbl_lock);
477 list_del(&e->route_list);
478 hash_del_rcu(&e->encap_hlist);
479 mutex_unlock(&esw->offloads.encap_tbl_lock);
481 mlx5e_encap_dealloc(priv, e);
484 void mlx5e_detach_decap(struct mlx5e_priv *priv,
485 struct mlx5e_tc_flow *flow)
487 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
488 struct mlx5e_decap_entry *d = flow->decap_reformat;
493 mutex_lock(&esw->offloads.decap_tbl_lock);
494 list_del(&flow->l3_to_l2_reformat);
495 flow->decap_reformat = NULL;
497 if (!refcount_dec_and_test(&d->refcnt)) {
498 mutex_unlock(&esw->offloads.decap_tbl_lock);
501 hash_del_rcu(&d->hlist);
502 mutex_unlock(&esw->offloads.decap_tbl_lock);
504 mlx5e_decap_dealloc(priv, d);
507 bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
508 struct mlx5e_encap_key *b)
510 return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
511 a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
514 static int cmp_decap_info(struct mlx5e_decap_key *a,
515 struct mlx5e_decap_key *b)
517 return memcmp(&a->key, &b->key, sizeof(b->key));
520 static int hash_encap_info(struct mlx5e_encap_key *key)
522 return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
523 key->tc_tunnel->tunnel_type);
526 static int hash_decap_info(struct mlx5e_decap_key *key)
528 return jhash(&key->key, sizeof(key->key), 0);
531 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
533 return refcount_inc_not_zero(&e->refcnt);
536 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
538 return refcount_inc_not_zero(&e->refcnt);
541 static struct mlx5e_encap_entry *
542 mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
545 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
546 struct mlx5e_encap_key e_key;
547 struct mlx5e_encap_entry *e;
549 hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
550 encap_hlist, hash_key) {
551 e_key.ip_tun_key = &e->tun_info->key;
552 e_key.tc_tunnel = e->tunnel;
553 if (e->tunnel->encap_info_equal(&e_key, key) &&
561 static struct mlx5e_decap_entry *
562 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
565 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
566 struct mlx5e_decap_key r_key;
567 struct mlx5e_decap_entry *e;
569 hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
572 if (!cmp_decap_info(&r_key, key) &&
579 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
581 size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
583 return kmemdup(tun_info, tun_size, GFP_KERNEL);
586 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
587 struct mlx5e_tc_flow *flow,
589 struct mlx5e_encap_entry *e,
590 struct netlink_ext_ack *extack)
594 for (i = 0; i < out_index; i++) {
595 if (flow->encaps[i].e != e)
597 NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
598 netdev_err(priv->netdev, "can't duplicate encap action\n");
605 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
606 struct mlx5_flow_attr *attr,
607 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
608 struct net_device *out_dev,
609 int route_dev_ifindex,
612 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
613 struct net_device *route_dev;
618 route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
620 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
621 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
624 err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
628 attr->dest_chain = 0;
629 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
630 esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
631 data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
633 err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
634 MLX5_FLOW_NAMESPACE_FDB,
637 esw_attr->dests[out_index].src_port_rewrite_act_id = err;
647 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
648 struct mlx5_esw_flow_attr *attr,
649 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
650 struct net_device *out_dev,
651 int route_dev_ifindex,
654 int act_id = attr->dests[out_index].src_port_rewrite_act_id;
655 struct net_device *route_dev;
660 route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
662 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
663 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
668 err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
672 data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
674 mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
682 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
684 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
685 struct mlx5_rep_uplink_priv *uplink_priv;
686 struct mlx5e_rep_priv *uplink_rpriv;
687 struct mlx5e_tc_tun_encap *encap;
690 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
691 uplink_priv = &uplink_rpriv->uplink_priv;
692 encap = uplink_priv->encap;
694 spin_lock_bh(&encap->route_lock);
695 ret = encap->route_tbl_last_update;
696 spin_unlock_bh(&encap->route_lock);
700 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
701 struct mlx5e_tc_flow *flow,
702 struct mlx5e_encap_entry *e,
703 bool new_encap_entry,
704 unsigned long tbl_time_before,
707 int mlx5e_attach_encap(struct mlx5e_priv *priv,
708 struct mlx5e_tc_flow *flow,
709 struct net_device *mirred_dev,
711 struct netlink_ext_ack *extack,
712 struct net_device **encap_dev,
715 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
716 struct mlx5e_tc_flow_parse_attr *parse_attr;
717 struct mlx5_flow_attr *attr = flow->attr;
718 const struct ip_tunnel_info *tun_info;
719 unsigned long tbl_time_before = 0;
720 struct mlx5e_encap_entry *e;
721 struct mlx5e_encap_key key;
722 bool entry_created = false;
723 unsigned short family;
727 parse_attr = attr->parse_attr;
728 tun_info = parse_attr->tun_info[out_index];
729 family = ip_tunnel_info_af(tun_info);
730 key.ip_tun_key = &tun_info->key;
731 key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
732 if (!key.tc_tunnel) {
733 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
737 hash_key = hash_encap_info(&key);
739 mutex_lock(&esw->offloads.encap_tbl_lock);
740 e = mlx5e_encap_get(priv, &key, hash_key);
742 /* must verify if encap is valid or not */
744 /* Check that entry was not already attached to this flow */
745 if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
750 mutex_unlock(&esw->offloads.encap_tbl_lock);
751 wait_for_completion(&e->res_ready);
753 /* Protect against concurrent neigh update. */
754 mutex_lock(&esw->offloads.encap_tbl_lock);
755 if (e->compl_result < 0) {
762 e = kzalloc(sizeof(*e), GFP_KERNEL);
768 refcount_set(&e->refcnt, 1);
769 init_completion(&e->res_ready);
770 entry_created = true;
771 INIT_LIST_HEAD(&e->route_list);
773 tun_info = mlx5e_dup_tun_info(tun_info);
778 e->tun_info = tun_info;
779 err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
783 INIT_LIST_HEAD(&e->flows);
784 hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
785 tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
786 mutex_unlock(&esw->offloads.encap_tbl_lock);
788 if (family == AF_INET)
789 err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
790 else if (family == AF_INET6)
791 err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
793 /* Protect against concurrent neigh update. */
794 mutex_lock(&esw->offloads.encap_tbl_lock);
795 complete_all(&e->res_ready);
797 e->compl_result = err;
803 err = mlx5e_attach_encap_route(priv, flow, e, entry_created, tbl_time_before,
808 flow->encaps[out_index].e = e;
809 list_add(&flow->encaps[out_index].list, &e->flows);
810 flow->encaps[out_index].index = out_index;
811 *encap_dev = e->out_dev;
812 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
813 attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
814 attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
817 *encap_valid = false;
819 mutex_unlock(&esw->offloads.encap_tbl_lock);
824 mutex_unlock(&esw->offloads.encap_tbl_lock);
826 mlx5e_encap_put(priv, e);
830 mutex_unlock(&esw->offloads.encap_tbl_lock);
836 int mlx5e_attach_decap(struct mlx5e_priv *priv,
837 struct mlx5e_tc_flow *flow,
838 struct netlink_ext_ack *extack)
840 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
841 struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
842 struct mlx5e_tc_flow_parse_attr *parse_attr;
843 struct mlx5e_decap_entry *d;
844 struct mlx5e_decap_key key;
848 parse_attr = flow->attr->parse_attr;
849 if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
850 NL_SET_ERR_MSG_MOD(extack,
851 "encap header larger than max supported");
855 key.key = parse_attr->eth;
856 hash_key = hash_decap_info(&key);
857 mutex_lock(&esw->offloads.decap_tbl_lock);
858 d = mlx5e_decap_get(priv, &key, hash_key);
860 mutex_unlock(&esw->offloads.decap_tbl_lock);
861 wait_for_completion(&d->res_ready);
862 mutex_lock(&esw->offloads.decap_tbl_lock);
863 if (d->compl_result) {
870 d = kzalloc(sizeof(*d), GFP_KERNEL);
877 refcount_set(&d->refcnt, 1);
878 init_completion(&d->res_ready);
879 INIT_LIST_HEAD(&d->flows);
880 hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
881 mutex_unlock(&esw->offloads.decap_tbl_lock);
883 d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
884 MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2,
885 sizeof(parse_attr->eth),
887 MLX5_FLOW_NAMESPACE_FDB);
888 if (IS_ERR(d->pkt_reformat)) {
889 err = PTR_ERR(d->pkt_reformat);
890 d->compl_result = err;
892 mutex_lock(&esw->offloads.decap_tbl_lock);
893 complete_all(&d->res_ready);
898 flow->decap_reformat = d;
899 attr->decap_pkt_reformat = d->pkt_reformat;
900 list_add(&flow->l3_to_l2_reformat, &d->flows);
901 mutex_unlock(&esw->offloads.decap_tbl_lock);
905 mutex_unlock(&esw->offloads.decap_tbl_lock);
906 mlx5e_decap_put(priv, d);
910 mutex_unlock(&esw->offloads.decap_tbl_lock);
914 static int cmp_route_info(struct mlx5e_route_key *a,
915 struct mlx5e_route_key *b)
917 if (a->ip_version == 4 && b->ip_version == 4)
918 return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
919 sizeof(a->endpoint_ip.v4));
920 else if (a->ip_version == 6 && b->ip_version == 6)
921 return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
922 sizeof(a->endpoint_ip.v6));
926 static u32 hash_route_info(struct mlx5e_route_key *key)
928 if (key->ip_version == 4)
929 return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
930 return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
933 static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
934 struct mlx5e_route_entry *r)
936 WARN_ON(!list_empty(&r->decap_flows));
937 WARN_ON(!list_empty(&r->encap_entries));
942 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
944 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
946 if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
949 hash_del_rcu(&r->hlist);
950 mutex_unlock(&esw->offloads.encap_tbl_lock);
952 mlx5e_route_dealloc(priv, r);
955 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
957 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
959 lockdep_assert_held(&esw->offloads.encap_tbl_lock);
961 if (!refcount_dec_and_test(&r->refcnt))
963 hash_del_rcu(&r->hlist);
964 mlx5e_route_dealloc(priv, r);
967 static struct mlx5e_route_entry *
968 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
971 struct mlx5e_route_key r_key;
972 struct mlx5e_route_entry *r;
974 hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
976 if (!cmp_route_info(&r_key, key) &&
977 refcount_inc_not_zero(&r->refcnt))
983 static struct mlx5e_route_entry *
984 mlx5e_route_get_create(struct mlx5e_priv *priv,
985 struct mlx5e_route_key *key,
986 int tunnel_dev_index,
987 unsigned long *route_tbl_change_time)
989 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
990 struct mlx5_rep_uplink_priv *uplink_priv;
991 struct mlx5e_rep_priv *uplink_rpriv;
992 struct mlx5e_tc_tun_encap *encap;
993 struct mlx5e_route_entry *r;
996 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
997 uplink_priv = &uplink_rpriv->uplink_priv;
998 encap = uplink_priv->encap;
1000 hash_key = hash_route_info(key);
1001 spin_lock_bh(&encap->route_lock);
1002 r = mlx5e_route_get(encap, key, hash_key);
1003 spin_unlock_bh(&encap->route_lock);
1005 if (!mlx5e_route_entry_valid(r)) {
1006 mlx5e_route_put_locked(priv, r);
1007 return ERR_PTR(-EINVAL);
1012 r = kzalloc(sizeof(*r), GFP_KERNEL);
1014 return ERR_PTR(-ENOMEM);
1017 r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1018 r->tunnel_dev_index = tunnel_dev_index;
1019 refcount_set(&r->refcnt, 1);
1020 INIT_LIST_HEAD(&r->decap_flows);
1021 INIT_LIST_HEAD(&r->encap_entries);
1023 spin_lock_bh(&encap->route_lock);
1024 *route_tbl_change_time = encap->route_tbl_last_update;
1025 hash_add(encap->route_tbl, &r->hlist, hash_key);
1026 spin_unlock_bh(&encap->route_lock);
1031 static struct mlx5e_route_entry *
1032 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
1034 u32 hash_key = hash_route_info(key);
1035 struct mlx5e_route_entry *r;
1037 spin_lock_bh(&encap->route_lock);
1038 encap->route_tbl_last_update = jiffies;
1039 r = mlx5e_route_get(encap, key, hash_key);
1040 spin_unlock_bh(&encap->route_lock);
1045 struct mlx5e_tc_fib_event_data {
1046 struct work_struct work;
1047 unsigned long event;
1048 struct mlx5e_route_entry *r;
1049 struct net_device *ul_dev;
1052 static void mlx5e_tc_fib_event_work(struct work_struct *work);
1053 static struct mlx5e_tc_fib_event_data *
1054 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
1056 struct mlx5e_tc_fib_event_data *fib_work;
1058 fib_work = kzalloc(sizeof(*fib_work), flags);
1059 if (WARN_ON(!fib_work))
1062 INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
1063 fib_work->event = event;
1064 fib_work->ul_dev = ul_dev;
1070 mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
1071 struct mlx5e_route_entry *r,
1072 unsigned long event)
1074 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1075 struct mlx5e_tc_fib_event_data *fib_work;
1076 struct mlx5e_rep_priv *uplink_rpriv;
1077 struct net_device *ul_dev;
1079 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1080 ul_dev = uplink_rpriv->netdev;
1082 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
1087 refcount_inc(&r->refcnt);
1089 queue_work(priv->wq, &fib_work->work);
1094 int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
1095 struct mlx5e_tc_flow *flow)
1097 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1098 unsigned long tbl_time_before, tbl_time_after;
1099 struct mlx5e_tc_flow_parse_attr *parse_attr;
1100 struct mlx5_flow_attr *attr = flow->attr;
1101 struct mlx5_esw_flow_attr *esw_attr;
1102 struct mlx5e_route_entry *r;
1103 struct mlx5e_route_key key;
1106 esw_attr = attr->esw_attr;
1107 parse_attr = attr->parse_attr;
1108 mutex_lock(&esw->offloads.encap_tbl_lock);
1109 if (!esw_attr->rx_tun_attr)
1112 tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
1113 tbl_time_after = tbl_time_before;
1114 err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr);
1115 if (err || !esw_attr->rx_tun_attr->decap_vport)
1118 key.ip_version = attr->tun_ip_version;
1119 if (key.ip_version == 4)
1120 key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
1122 key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
1124 r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
1130 /* Routing changed concurrently. FIB event handler might have missed new
1131 * entry, schedule update.
1133 if (tbl_time_before != tbl_time_after) {
1134 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1136 mlx5e_route_put_locked(priv, r);
1141 flow->decap_route = r;
1142 list_add(&flow->decap_routes, &r->decap_flows);
1143 mutex_unlock(&esw->offloads.encap_tbl_lock);
1147 mutex_unlock(&esw->offloads.encap_tbl_lock);
1151 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
1152 struct mlx5e_tc_flow *flow,
1153 struct mlx5e_encap_entry *e,
1154 bool new_encap_entry,
1155 unsigned long tbl_time_before,
1158 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1159 unsigned long tbl_time_after = tbl_time_before;
1160 struct mlx5e_tc_flow_parse_attr *parse_attr;
1161 struct mlx5_flow_attr *attr = flow->attr;
1162 const struct ip_tunnel_info *tun_info;
1163 struct mlx5_esw_flow_attr *esw_attr;
1164 struct mlx5e_route_entry *r;
1165 struct mlx5e_route_key key;
1166 unsigned short family;
1169 esw_attr = attr->esw_attr;
1170 parse_attr = attr->parse_attr;
1171 tun_info = parse_attr->tun_info[out_index];
1172 family = ip_tunnel_info_af(tun_info);
1174 if (family == AF_INET) {
1175 key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
1177 } else if (family == AF_INET6) {
1178 key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
1182 err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
1183 e->route_dev_ifindex, out_index);
1184 if (err || !(esw_attr->dests[out_index].flags &
1185 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
1188 r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
1192 /* Routing changed concurrently. FIB event handler might have missed new
1193 * entry, schedule update.
1195 if (tbl_time_before != tbl_time_after) {
1196 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1198 mlx5e_route_put_locked(priv, r);
1203 flow->encap_routes[out_index].r = r;
1204 if (new_encap_entry)
1205 list_add(&e->route_list, &r->encap_entries);
1206 flow->encap_routes[out_index].index = out_index;
1210 void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
1211 struct mlx5e_tc_flow *flow)
1213 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1214 struct mlx5e_route_entry *r = flow->decap_route;
1219 mutex_lock(&esw->offloads.encap_tbl_lock);
1220 list_del(&flow->decap_routes);
1221 flow->decap_route = NULL;
1223 if (!refcount_dec_and_test(&r->refcnt)) {
1224 mutex_unlock(&esw->offloads.encap_tbl_lock);
1227 hash_del_rcu(&r->hlist);
1228 mutex_unlock(&esw->offloads.encap_tbl_lock);
1230 mlx5e_route_dealloc(priv, r);
1233 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
1234 struct mlx5e_tc_flow *flow,
1237 struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
1238 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1239 struct mlx5e_encap_entry *e, *tmp;
1244 mutex_lock(&esw->offloads.encap_tbl_lock);
1245 flow->encap_routes[out_index].r = NULL;
1247 if (!refcount_dec_and_test(&r->refcnt)) {
1248 mutex_unlock(&esw->offloads.encap_tbl_lock);
1251 list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
1252 list_del_init(&e->route_list);
1253 hash_del_rcu(&r->hlist);
1254 mutex_unlock(&esw->offloads.encap_tbl_lock);
1256 mlx5e_route_dealloc(priv, r);
1259 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
1260 struct mlx5e_encap_entry *e,
1261 struct list_head *encap_flows)
1263 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1264 struct mlx5e_tc_flow *flow;
1266 list_for_each_entry(flow, encap_flows, tmp_list) {
1267 struct mlx5_flow_attr *attr = flow->attr;
1268 struct mlx5_esw_flow_attr *esw_attr;
1270 if (!mlx5e_is_offloaded_flow(flow))
1272 esw_attr = attr->esw_attr;
1274 if (flow_flag_test(flow, SLOW))
1275 mlx5e_tc_unoffload_from_slow_path(esw, flow);
1277 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1278 mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
1279 attr->modify_hdr = NULL;
1281 esw_attr->dests[flow->tmp_entry_index].flags &=
1282 ~MLX5_ESW_DEST_ENCAP_VALID;
1283 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
1286 e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
1287 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1288 e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1289 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1290 e->pkt_reformat = NULL;
1294 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
1295 struct net_device *tunnel_dev,
1296 struct mlx5e_encap_entry *e,
1297 struct list_head *encap_flows)
1299 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1300 struct mlx5e_tc_flow *flow;
1303 err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
1304 mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
1305 mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
1307 mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
1308 e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
1310 list_for_each_entry(flow, encap_flows, tmp_list) {
1311 struct mlx5e_tc_flow_parse_attr *parse_attr;
1312 struct mlx5_flow_attr *attr = flow->attr;
1313 struct mlx5_esw_flow_attr *esw_attr;
1314 struct mlx5_flow_handle *rule;
1315 struct mlx5_flow_spec *spec;
1317 if (flow_flag_test(flow, FAILED))
1320 esw_attr = attr->esw_attr;
1321 parse_attr = attr->parse_attr;
1322 spec = &parse_attr->spec;
1324 err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
1325 e->out_dev, e->route_dev_ifindex,
1326 flow->tmp_entry_index);
1328 mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
1332 err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow);
1334 mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
1339 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1340 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
1341 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1342 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
1343 goto offload_to_slow_path;
1344 /* update from slow path rule to encap rule */
1345 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1347 err = PTR_ERR(rule);
1348 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1351 flow->rule[0] = rule;
1354 offload_to_slow_path:
1355 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1356 /* mark the flow's encap dest as non-valid */
1357 esw_attr->dests[flow->tmp_entry_index].flags &=
1358 ~MLX5_ESW_DEST_ENCAP_VALID;
1361 err = PTR_ERR(rule);
1362 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1365 flow->rule[0] = rule;
1368 flow_flag_set(flow, OFFLOADED);
1372 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
1373 struct mlx5e_route_entry *r,
1374 struct list_head *flow_list,
1377 struct net_device *tunnel_dev;
1378 struct mlx5e_encap_entry *e;
1380 tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1384 list_for_each_entry(e, &r->encap_entries, route_list) {
1385 LIST_HEAD(encap_flows);
1387 mlx5e_take_all_encap_flows(e, &encap_flows);
1388 if (list_empty(&encap_flows))
1391 if (mlx5e_route_entry_valid(r))
1392 mlx5e_invalidate_encap(priv, e, &encap_flows);
1395 list_splice(&encap_flows, flow_list);
1399 mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
1400 list_splice(&encap_flows, flow_list);
1406 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
1407 struct list_head *flow_list)
1409 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1410 struct mlx5e_tc_flow *flow;
1412 list_for_each_entry(flow, flow_list, tmp_list)
1413 if (mlx5e_is_offloaded_flow(flow))
1414 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1417 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
1418 struct list_head *decap_flows)
1420 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1421 struct mlx5e_tc_flow *flow;
1423 list_for_each_entry(flow, decap_flows, tmp_list) {
1424 struct mlx5e_tc_flow_parse_attr *parse_attr;
1425 struct mlx5_flow_attr *attr = flow->attr;
1426 struct mlx5_flow_handle *rule;
1427 struct mlx5_flow_spec *spec;
1430 if (flow_flag_test(flow, FAILED))
1433 parse_attr = attr->parse_attr;
1434 spec = &parse_attr->spec;
1435 err = mlx5e_tc_tun_route_lookup(priv, spec, attr);
1437 mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
1442 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1444 err = PTR_ERR(rule);
1445 mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
1448 flow->rule[0] = rule;
1449 flow_flag_set(flow, OFFLOADED);
1454 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
1455 struct mlx5e_route_entry *r,
1456 struct list_head *flow_list,
1459 struct net_device *tunnel_dev;
1460 LIST_HEAD(decap_flows);
1462 tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1466 mlx5e_take_all_route_decap_flows(r, &decap_flows);
1467 if (mlx5e_route_entry_valid(r))
1468 mlx5e_unoffload_flow_list(priv, &decap_flows);
1470 mlx5e_reoffload_decap(priv, &decap_flows);
1472 list_splice(&decap_flows, flow_list);
1477 static void mlx5e_tc_fib_event_work(struct work_struct *work)
1479 struct mlx5e_tc_fib_event_data *event_data =
1480 container_of(work, struct mlx5e_tc_fib_event_data, work);
1481 struct net_device *ul_dev = event_data->ul_dev;
1482 struct mlx5e_priv *priv = netdev_priv(ul_dev);
1483 struct mlx5e_route_entry *r = event_data->r;
1484 struct mlx5_eswitch *esw;
1485 LIST_HEAD(flow_list);
1489 /* sync with concurrent neigh updates */
1491 esw = priv->mdev->priv.eswitch;
1492 mutex_lock(&esw->offloads.encap_tbl_lock);
1493 replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
1495 if (!mlx5e_route_entry_valid(r) && !replace)
1498 err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
1500 mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
1503 err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
1505 mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
1509 r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1511 mutex_unlock(&esw->offloads.encap_tbl_lock);
1514 mlx5e_put_flow_list(priv, &flow_list);
1515 mlx5e_route_put(priv, event_data->r);
1516 dev_put(event_data->ul_dev);
1520 static struct mlx5e_tc_fib_event_data *
1521 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
1522 struct net_device *ul_dev,
1523 struct mlx5e_tc_tun_encap *encap,
1524 unsigned long event,
1525 struct fib_notifier_info *info)
1527 struct fib_entry_notifier_info *fen_info;
1528 struct mlx5e_tc_fib_event_data *fib_work;
1529 struct mlx5e_route_entry *r;
1530 struct mlx5e_route_key key;
1531 struct net_device *fib_dev;
1533 fen_info = container_of(info, struct fib_entry_notifier_info, info);
1534 fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
1535 if (!fib_dev || fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1536 fen_info->dst_len != 32)
1539 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1541 return ERR_PTR(-ENOMEM);
1543 key.endpoint_ip.v4 = htonl(fen_info->dst);
1546 /* Can't fail after this point because releasing reference to r
1547 * requires obtaining sleeping mutex which we can't do in atomic
1550 r = mlx5e_route_lookup_for_update(encap, &key);
1563 static struct mlx5e_tc_fib_event_data *
1564 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
1565 struct net_device *ul_dev,
1566 struct mlx5e_tc_tun_encap *encap,
1567 unsigned long event,
1568 struct fib_notifier_info *info)
1570 struct fib6_entry_notifier_info *fen_info;
1571 struct mlx5e_tc_fib_event_data *fib_work;
1572 struct mlx5e_route_entry *r;
1573 struct mlx5e_route_key key;
1574 struct net_device *fib_dev;
1576 fen_info = container_of(info, struct fib6_entry_notifier_info, info);
1577 fib_dev = fib6_info_nh_dev(fen_info->rt);
1578 if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1579 fen_info->rt->fib6_dst.plen != 128)
1582 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1584 return ERR_PTR(-ENOMEM);
1586 memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
1587 sizeof(fen_info->rt->fib6_dst.addr));
1590 /* Can't fail after this point because releasing reference to r
1591 * requires obtaining sleeping mutex which we can't do in atomic
1594 r = mlx5e_route_lookup_for_update(encap, &key);
1607 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
1609 struct mlx5e_tc_fib_event_data *fib_work;
1610 struct fib_notifier_info *info = ptr;
1611 struct mlx5e_tc_tun_encap *encap;
1612 struct net_device *ul_dev;
1613 struct mlx5e_priv *priv;
1615 encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
1617 ul_dev = priv->netdev;
1618 priv = netdev_priv(ul_dev);
1621 case FIB_EVENT_ENTRY_REPLACE:
1622 case FIB_EVENT_ENTRY_DEL:
1623 if (info->family == AF_INET)
1624 fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
1625 else if (info->family == AF_INET6)
1626 fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
1630 if (!IS_ERR_OR_NULL(fib_work)) {
1631 queue_work(priv->wq, &fib_work->work);
1632 } else if (IS_ERR(fib_work)) {
1633 NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
1634 mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
1646 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
1648 struct mlx5e_tc_tun_encap *encap;
1651 encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
1653 return ERR_PTR(-ENOMEM);
1656 encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
1657 spin_lock_init(&encap->route_lock);
1658 hash_init(encap->route_tbl);
1659 err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
1663 return ERR_PTR(err);
1669 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
1674 unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
1675 flush_workqueue(encap->priv->wq); /* flush fib event works */