1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
4 #include <net/fib_notifier.h>
5 #include "tc_tun_encap.h"
9 #include "diag/en_tc_tracepoint.h"
12 MLX5E_ROUTE_ENTRY_VALID = BIT(0),
15 struct mlx5e_route_key {
23 struct mlx5e_route_entry {
24 struct mlx5e_route_key key;
25 struct list_head encap_entries;
26 struct list_head decap_flows;
28 struct hlist_node hlist;
34 struct mlx5e_tc_tun_encap {
35 struct mlx5e_priv *priv;
36 struct notifier_block fib_nb;
37 spinlock_t route_lock; /* protects route_tbl */
38 unsigned long route_tbl_last_update;
39 DECLARE_HASHTABLE(route_tbl, 8);
42 static bool mlx5e_route_entry_valid(struct mlx5e_route_entry *r)
44 return r->flags & MLX5E_ROUTE_ENTRY_VALID;
47 int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
48 struct mlx5_flow_spec *spec)
50 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
51 struct mlx5_rx_tun_attr *tun_attr;
55 tun_attr = kvzalloc(sizeof(*tun_attr), GFP_KERNEL);
59 esw_attr->rx_tun_attr = tun_attr;
60 ip_version = mlx5e_tc_get_ip_version(spec, true);
62 if (ip_version == 4) {
63 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
64 outer_headers.dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
65 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
66 outer_headers.src_ipv4_src_ipv6.ipv4_layout.ipv4);
67 tun_attr->dst_ip.v4 = *(__be32 *)daddr;
68 tun_attr->src_ip.v4 = *(__be32 *)saddr;
69 if (!tun_attr->dst_ip.v4 || !tun_attr->src_ip.v4)
72 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
73 else if (ip_version == 6) {
74 int ipv6_size = MLX5_FLD_SZ_BYTES(ipv6_layout, ipv6);
75 struct in6_addr zerov6 = {};
77 daddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
78 outer_headers.dst_ipv4_dst_ipv6.ipv6_layout.ipv6);
79 saddr = MLX5_ADDR_OF(fte_match_param, spec->match_value,
80 outer_headers.src_ipv4_src_ipv6.ipv6_layout.ipv6);
81 memcpy(&tun_attr->dst_ip.v6, daddr, ipv6_size);
82 memcpy(&tun_attr->src_ip.v6, saddr, ipv6_size);
83 if (!memcmp(&tun_attr->dst_ip.v6, &zerov6, sizeof(zerov6)) ||
84 !memcmp(&tun_attr->src_ip.v6, &zerov6, sizeof(zerov6)))
88 /* Only set the flag if both src and dst ip addresses exist. They are
89 * required to establish routing.
91 flow_flag_set(flow, TUN_RX);
92 flow->attr->tun_ip_version = ip_version;
96 static bool mlx5e_tc_flow_all_encaps_valid(struct mlx5_esw_flow_attr *esw_attr)
98 bool all_flow_encaps_valid = true;
101 /* Flow can be associated with multiple encap entries.
102 * Before offloading the flow verify that all of them have
105 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
106 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP))
108 if (!(esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP_VALID)) {
109 all_flow_encaps_valid = false;
114 return all_flow_encaps_valid;
117 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
118 struct mlx5e_encap_entry *e,
119 struct list_head *flow_list)
121 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
122 struct mlx5_esw_flow_attr *esw_attr;
123 struct mlx5_flow_handle *rule;
124 struct mlx5_flow_attr *attr;
125 struct mlx5_flow_spec *spec;
126 struct mlx5e_tc_flow *flow;
129 if (e->flags & MLX5_ENCAP_ENTRY_NO_ROUTE)
132 e->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
134 e->encap_size, e->encap_header,
135 MLX5_FLOW_NAMESPACE_FDB);
136 if (IS_ERR(e->pkt_reformat)) {
137 mlx5_core_warn(priv->mdev, "Failed to offload cached encapsulation header, %lu\n",
138 PTR_ERR(e->pkt_reformat));
141 e->flags |= MLX5_ENCAP_ENTRY_VALID;
142 mlx5e_rep_queue_neigh_stats_work(priv);
144 list_for_each_entry(flow, flow_list, tmp_list) {
145 if (!mlx5e_is_offloaded_flow(flow))
148 esw_attr = attr->esw_attr;
149 spec = &attr->parse_attr->spec;
151 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
152 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
154 /* Do not offload flows with unresolved neighbors */
155 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
157 /* update from slow path rule to encap rule */
158 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
161 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
166 mlx5e_tc_unoffload_from_slow_path(esw, flow);
167 flow->rule[0] = rule;
168 /* was unset when slow path rule removed */
169 flow_flag_set(flow, OFFLOADED);
173 void mlx5e_tc_encap_flows_del(struct mlx5e_priv *priv,
174 struct mlx5e_encap_entry *e,
175 struct list_head *flow_list)
177 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
178 struct mlx5_esw_flow_attr *esw_attr;
179 struct mlx5_flow_handle *rule;
180 struct mlx5_flow_attr *attr;
181 struct mlx5_flow_spec *spec;
182 struct mlx5e_tc_flow *flow;
185 list_for_each_entry(flow, flow_list, tmp_list) {
186 if (!mlx5e_is_offloaded_flow(flow))
189 esw_attr = attr->esw_attr;
190 spec = &attr->parse_attr->spec;
192 /* update from encap rule to slow path rule */
193 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
194 /* mark the flow's encap dest as non-valid */
195 esw_attr->dests[flow->tmp_entry_index].flags &= ~MLX5_ESW_DEST_ENCAP_VALID;
199 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
204 mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
205 flow->rule[0] = rule;
206 /* was unset when fast path rule removed */
207 flow_flag_set(flow, OFFLOADED);
210 /* we know that the encap is valid */
211 e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
212 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
215 static void mlx5e_take_tmp_flow(struct mlx5e_tc_flow *flow,
216 struct list_head *flow_list,
219 if (IS_ERR(mlx5e_flow_get(flow)))
221 wait_for_completion(&flow->init_done);
223 flow->tmp_entry_index = index;
224 list_add(&flow->tmp_list, flow_list);
227 /* Takes reference to all flows attached to encap and adds the flows to
228 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
230 void mlx5e_take_all_encap_flows(struct mlx5e_encap_entry *e, struct list_head *flow_list)
232 struct encap_flow_item *efi;
233 struct mlx5e_tc_flow *flow;
235 list_for_each_entry(efi, &e->flows, list) {
236 flow = container_of(efi, struct mlx5e_tc_flow, encaps[efi->index]);
237 mlx5e_take_tmp_flow(flow, flow_list, efi->index);
241 /* Takes reference to all flows attached to route and adds the flows to
242 * flow_list using 'tmp_list' list_head in mlx5e_tc_flow.
244 static void mlx5e_take_all_route_decap_flows(struct mlx5e_route_entry *r,
245 struct list_head *flow_list)
247 struct mlx5e_tc_flow *flow;
249 list_for_each_entry(flow, &r->decap_flows, decap_routes)
250 mlx5e_take_tmp_flow(flow, flow_list, 0);
253 static struct mlx5e_encap_entry *
254 mlx5e_get_next_valid_encap(struct mlx5e_neigh_hash_entry *nhe,
255 struct mlx5e_encap_entry *e)
257 struct mlx5e_encap_entry *next = NULL;
262 /* find encap with non-zero reference counter value */
264 list_next_or_null_rcu(&nhe->encap_list,
266 struct mlx5e_encap_entry,
268 list_first_or_null_rcu(&nhe->encap_list,
269 struct mlx5e_encap_entry,
272 next = list_next_or_null_rcu(&nhe->encap_list,
274 struct mlx5e_encap_entry,
276 if (mlx5e_encap_take(next))
281 /* release starting encap */
283 mlx5e_encap_put(netdev_priv(e->out_dev), e);
287 /* wait for encap to be fully initialized */
288 wait_for_completion(&next->res_ready);
289 /* continue searching if encap entry is not in valid state after completion */
290 if (!(next->flags & MLX5_ENCAP_ENTRY_VALID)) {
298 void mlx5e_tc_update_neigh_used_value(struct mlx5e_neigh_hash_entry *nhe)
300 struct mlx5e_neigh *m_neigh = &nhe->m_neigh;
301 struct mlx5e_encap_entry *e = NULL;
302 struct mlx5e_tc_flow *flow;
303 struct mlx5_fc *counter;
304 struct neigh_table *tbl;
305 bool neigh_used = false;
309 if (m_neigh->family == AF_INET)
311 #if IS_ENABLED(CONFIG_IPV6)
312 else if (m_neigh->family == AF_INET6)
313 tbl = ipv6_stub->nd_tbl;
318 /* mlx5e_get_next_valid_encap() releases previous encap before returning
321 while ((e = mlx5e_get_next_valid_encap(nhe, e)) != NULL) {
322 struct mlx5e_priv *priv = netdev_priv(e->out_dev);
323 struct encap_flow_item *efi, *tmp;
324 struct mlx5_eswitch *esw;
325 LIST_HEAD(flow_list);
327 esw = priv->mdev->priv.eswitch;
328 mutex_lock(&esw->offloads.encap_tbl_lock);
329 list_for_each_entry_safe(efi, tmp, &e->flows, list) {
330 flow = container_of(efi, struct mlx5e_tc_flow,
332 if (IS_ERR(mlx5e_flow_get(flow)))
334 list_add(&flow->tmp_list, &flow_list);
336 if (mlx5e_is_offloaded_flow(flow)) {
337 counter = mlx5e_tc_get_counter(flow);
338 lastuse = mlx5_fc_query_lastuse(counter);
339 if (time_after((unsigned long)lastuse, nhe->reported_lastuse)) {
345 mutex_unlock(&esw->offloads.encap_tbl_lock);
347 mlx5e_put_flow_list(priv, &flow_list);
349 /* release current encap before breaking the loop */
350 mlx5e_encap_put(priv, e);
355 trace_mlx5e_tc_update_neigh_used_value(nhe, neigh_used);
358 nhe->reported_lastuse = jiffies;
360 /* find the relevant neigh according to the cached device and
363 n = neigh_lookup(tbl, &m_neigh->dst_ip, READ_ONCE(nhe->neigh_dev));
367 neigh_event_send(n, NULL);
372 static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
374 WARN_ON(!list_empty(&e->flows));
376 if (e->compl_result > 0) {
377 mlx5e_rep_encap_entry_detach(netdev_priv(e->out_dev), e);
379 if (e->flags & MLX5_ENCAP_ENTRY_VALID)
380 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
384 kfree(e->encap_header);
388 static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
389 struct mlx5e_decap_entry *d)
391 WARN_ON(!list_empty(&d->flows));
393 if (!d->compl_result)
394 mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
399 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
401 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
403 if (!refcount_dec_and_mutex_lock(&e->refcnt, &esw->offloads.encap_tbl_lock))
405 list_del(&e->route_list);
406 hash_del_rcu(&e->encap_hlist);
407 mutex_unlock(&esw->offloads.encap_tbl_lock);
409 mlx5e_encap_dealloc(priv, e);
412 static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
414 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
416 if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
418 hash_del_rcu(&d->hlist);
419 mutex_unlock(&esw->offloads.decap_tbl_lock);
421 mlx5e_decap_dealloc(priv, d);
424 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
425 struct mlx5e_tc_flow *flow,
428 void mlx5e_detach_encap(struct mlx5e_priv *priv,
429 struct mlx5e_tc_flow *flow, int out_index)
431 struct mlx5e_encap_entry *e = flow->encaps[out_index].e;
432 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
434 if (flow->attr->esw_attr->dests[out_index].flags &
435 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE)
436 mlx5e_detach_encap_route(priv, flow, out_index);
438 /* flow wasn't fully initialized */
442 mutex_lock(&esw->offloads.encap_tbl_lock);
443 list_del(&flow->encaps[out_index].list);
444 flow->encaps[out_index].e = NULL;
445 if (!refcount_dec_and_test(&e->refcnt)) {
446 mutex_unlock(&esw->offloads.encap_tbl_lock);
449 list_del(&e->route_list);
450 hash_del_rcu(&e->encap_hlist);
451 mutex_unlock(&esw->offloads.encap_tbl_lock);
453 mlx5e_encap_dealloc(priv, e);
456 void mlx5e_detach_decap(struct mlx5e_priv *priv,
457 struct mlx5e_tc_flow *flow)
459 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
460 struct mlx5e_decap_entry *d = flow->decap_reformat;
465 mutex_lock(&esw->offloads.decap_tbl_lock);
466 list_del(&flow->l3_to_l2_reformat);
467 flow->decap_reformat = NULL;
469 if (!refcount_dec_and_test(&d->refcnt)) {
470 mutex_unlock(&esw->offloads.decap_tbl_lock);
473 hash_del_rcu(&d->hlist);
474 mutex_unlock(&esw->offloads.decap_tbl_lock);
476 mlx5e_decap_dealloc(priv, d);
479 bool mlx5e_tc_tun_encap_info_equal_generic(struct mlx5e_encap_key *a,
480 struct mlx5e_encap_key *b)
482 return memcmp(a->ip_tun_key, b->ip_tun_key, sizeof(*a->ip_tun_key)) == 0 &&
483 a->tc_tunnel->tunnel_type == b->tc_tunnel->tunnel_type;
486 static int cmp_decap_info(struct mlx5e_decap_key *a,
487 struct mlx5e_decap_key *b)
489 return memcmp(&a->key, &b->key, sizeof(b->key));
492 static int hash_encap_info(struct mlx5e_encap_key *key)
494 return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
495 key->tc_tunnel->tunnel_type);
498 static int hash_decap_info(struct mlx5e_decap_key *key)
500 return jhash(&key->key, sizeof(key->key), 0);
503 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
505 return refcount_inc_not_zero(&e->refcnt);
508 static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
510 return refcount_inc_not_zero(&e->refcnt);
513 static struct mlx5e_encap_entry *
514 mlx5e_encap_get(struct mlx5e_priv *priv, struct mlx5e_encap_key *key,
517 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
518 struct mlx5e_encap_key e_key;
519 struct mlx5e_encap_entry *e;
521 hash_for_each_possible_rcu(esw->offloads.encap_tbl, e,
522 encap_hlist, hash_key) {
523 e_key.ip_tun_key = &e->tun_info->key;
524 e_key.tc_tunnel = e->tunnel;
525 if (e->tunnel->encap_info_equal(&e_key, key) &&
533 static struct mlx5e_decap_entry *
534 mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
537 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
538 struct mlx5e_decap_key r_key;
539 struct mlx5e_decap_entry *e;
541 hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
544 if (!cmp_decap_info(&r_key, key) &&
551 struct ip_tunnel_info *mlx5e_dup_tun_info(const struct ip_tunnel_info *tun_info)
553 size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
555 return kmemdup(tun_info, tun_size, GFP_KERNEL);
558 static bool is_duplicated_encap_entry(struct mlx5e_priv *priv,
559 struct mlx5e_tc_flow *flow,
561 struct mlx5e_encap_entry *e,
562 struct netlink_ext_ack *extack)
566 for (i = 0; i < out_index; i++) {
567 if (flow->encaps[i].e != e)
569 NL_SET_ERR_MSG_MOD(extack, "can't duplicate encap action");
570 netdev_err(priv->netdev, "can't duplicate encap action\n");
577 static int mlx5e_set_vf_tunnel(struct mlx5_eswitch *esw,
578 struct mlx5_flow_attr *attr,
579 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
580 struct net_device *out_dev,
581 int route_dev_ifindex,
584 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
585 struct net_device *route_dev;
590 route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
592 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
593 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev))
596 err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
600 attr->dest_chain = 0;
601 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
602 esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
603 data = mlx5_eswitch_get_vport_metadata_for_set(esw_attr->in_mdev->priv.eswitch,
605 err = mlx5e_tc_match_to_reg_set_and_get_id(esw->dev, mod_hdr_acts,
606 MLX5_FLOW_NAMESPACE_FDB,
609 esw_attr->dests[out_index].src_port_rewrite_act_id = err;
619 static int mlx5e_update_vf_tunnel(struct mlx5_eswitch *esw,
620 struct mlx5_esw_flow_attr *attr,
621 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
622 struct net_device *out_dev,
623 int route_dev_ifindex,
626 int act_id = attr->dests[out_index].src_port_rewrite_act_id;
627 struct net_device *route_dev;
632 route_dev = dev_get_by_index(dev_net(out_dev), route_dev_ifindex);
634 if (!route_dev || route_dev->netdev_ops != &mlx5e_netdev_ops ||
635 !mlx5e_tc_is_vf_tunnel(out_dev, route_dev)) {
640 err = mlx5e_tc_query_route_vport(out_dev, route_dev, &vport_num);
644 data = mlx5_eswitch_get_vport_metadata_for_set(attr->in_mdev->priv.eswitch,
646 mlx5e_tc_match_to_reg_mod_hdr_change(esw->dev, mod_hdr_acts, VPORT_TO_REG, act_id, data);
654 static unsigned int mlx5e_route_tbl_get_last_update(struct mlx5e_priv *priv)
656 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
657 struct mlx5_rep_uplink_priv *uplink_priv;
658 struct mlx5e_rep_priv *uplink_rpriv;
659 struct mlx5e_tc_tun_encap *encap;
662 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
663 uplink_priv = &uplink_rpriv->uplink_priv;
664 encap = uplink_priv->encap;
666 spin_lock_bh(&encap->route_lock);
667 ret = encap->route_tbl_last_update;
668 spin_unlock_bh(&encap->route_lock);
672 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
673 struct mlx5e_tc_flow *flow,
674 struct mlx5e_encap_entry *e,
675 bool new_encap_entry,
676 unsigned long tbl_time_before,
679 int mlx5e_attach_encap(struct mlx5e_priv *priv,
680 struct mlx5e_tc_flow *flow,
681 struct net_device *mirred_dev,
683 struct netlink_ext_ack *extack,
684 struct net_device **encap_dev,
687 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
688 struct mlx5e_tc_flow_parse_attr *parse_attr;
689 struct mlx5_flow_attr *attr = flow->attr;
690 const struct ip_tunnel_info *tun_info;
691 unsigned long tbl_time_before = 0;
692 struct mlx5e_encap_entry *e;
693 struct mlx5e_encap_key key;
694 bool entry_created = false;
695 unsigned short family;
699 parse_attr = attr->parse_attr;
700 tun_info = parse_attr->tun_info[out_index];
701 family = ip_tunnel_info_af(tun_info);
702 key.ip_tun_key = &tun_info->key;
703 key.tc_tunnel = mlx5e_get_tc_tun(mirred_dev);
704 if (!key.tc_tunnel) {
705 NL_SET_ERR_MSG_MOD(extack, "Unsupported tunnel");
709 hash_key = hash_encap_info(&key);
711 mutex_lock(&esw->offloads.encap_tbl_lock);
712 e = mlx5e_encap_get(priv, &key, hash_key);
714 /* must verify if encap is valid or not */
716 /* Check that entry was not already attached to this flow */
717 if (is_duplicated_encap_entry(priv, flow, out_index, e, extack)) {
722 mutex_unlock(&esw->offloads.encap_tbl_lock);
723 wait_for_completion(&e->res_ready);
725 /* Protect against concurrent neigh update. */
726 mutex_lock(&esw->offloads.encap_tbl_lock);
727 if (e->compl_result < 0) {
734 e = kzalloc(sizeof(*e), GFP_KERNEL);
740 refcount_set(&e->refcnt, 1);
741 init_completion(&e->res_ready);
742 entry_created = true;
743 INIT_LIST_HEAD(&e->route_list);
745 tun_info = mlx5e_dup_tun_info(tun_info);
750 e->tun_info = tun_info;
751 err = mlx5e_tc_tun_init_encap_attr(mirred_dev, priv, e, extack);
755 INIT_LIST_HEAD(&e->flows);
756 hash_add_rcu(esw->offloads.encap_tbl, &e->encap_hlist, hash_key);
757 tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
758 mutex_unlock(&esw->offloads.encap_tbl_lock);
760 if (family == AF_INET)
761 err = mlx5e_tc_tun_create_header_ipv4(priv, mirred_dev, e);
762 else if (family == AF_INET6)
763 err = mlx5e_tc_tun_create_header_ipv6(priv, mirred_dev, e);
765 /* Protect against concurrent neigh update. */
766 mutex_lock(&esw->offloads.encap_tbl_lock);
767 complete_all(&e->res_ready);
769 e->compl_result = err;
775 err = mlx5e_attach_encap_route(priv, flow, e, entry_created, tbl_time_before,
780 flow->encaps[out_index].e = e;
781 list_add(&flow->encaps[out_index].list, &e->flows);
782 flow->encaps[out_index].index = out_index;
783 *encap_dev = e->out_dev;
784 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
785 attr->esw_attr->dests[out_index].pkt_reformat = e->pkt_reformat;
786 attr->esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
789 *encap_valid = false;
791 mutex_unlock(&esw->offloads.encap_tbl_lock);
796 mutex_unlock(&esw->offloads.encap_tbl_lock);
798 mlx5e_encap_put(priv, e);
802 mutex_unlock(&esw->offloads.encap_tbl_lock);
808 int mlx5e_attach_decap(struct mlx5e_priv *priv,
809 struct mlx5e_tc_flow *flow,
810 struct netlink_ext_ack *extack)
812 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
813 struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
814 struct mlx5e_tc_flow_parse_attr *parse_attr;
815 struct mlx5e_decap_entry *d;
816 struct mlx5e_decap_key key;
820 parse_attr = flow->attr->parse_attr;
821 if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
822 NL_SET_ERR_MSG_MOD(extack,
823 "encap header larger than max supported");
827 key.key = parse_attr->eth;
828 hash_key = hash_decap_info(&key);
829 mutex_lock(&esw->offloads.decap_tbl_lock);
830 d = mlx5e_decap_get(priv, &key, hash_key);
832 mutex_unlock(&esw->offloads.decap_tbl_lock);
833 wait_for_completion(&d->res_ready);
834 mutex_lock(&esw->offloads.decap_tbl_lock);
835 if (d->compl_result) {
842 d = kzalloc(sizeof(*d), GFP_KERNEL);
849 refcount_set(&d->refcnt, 1);
850 init_completion(&d->res_ready);
851 INIT_LIST_HEAD(&d->flows);
852 hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
853 mutex_unlock(&esw->offloads.decap_tbl_lock);
855 d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
856 MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2,
857 sizeof(parse_attr->eth),
859 MLX5_FLOW_NAMESPACE_FDB);
860 if (IS_ERR(d->pkt_reformat)) {
861 err = PTR_ERR(d->pkt_reformat);
862 d->compl_result = err;
864 mutex_lock(&esw->offloads.decap_tbl_lock);
865 complete_all(&d->res_ready);
870 flow->decap_reformat = d;
871 attr->decap_pkt_reformat = d->pkt_reformat;
872 list_add(&flow->l3_to_l2_reformat, &d->flows);
873 mutex_unlock(&esw->offloads.decap_tbl_lock);
877 mutex_unlock(&esw->offloads.decap_tbl_lock);
878 mlx5e_decap_put(priv, d);
882 mutex_unlock(&esw->offloads.decap_tbl_lock);
886 static int cmp_route_info(struct mlx5e_route_key *a,
887 struct mlx5e_route_key *b)
889 if (a->ip_version == 4 && b->ip_version == 4)
890 return memcmp(&a->endpoint_ip.v4, &b->endpoint_ip.v4,
891 sizeof(a->endpoint_ip.v4));
892 else if (a->ip_version == 6 && b->ip_version == 6)
893 return memcmp(&a->endpoint_ip.v6, &b->endpoint_ip.v6,
894 sizeof(a->endpoint_ip.v6));
898 static u32 hash_route_info(struct mlx5e_route_key *key)
900 if (key->ip_version == 4)
901 return jhash(&key->endpoint_ip.v4, sizeof(key->endpoint_ip.v4), 0);
902 return jhash(&key->endpoint_ip.v6, sizeof(key->endpoint_ip.v6), 0);
905 static void mlx5e_route_dealloc(struct mlx5e_priv *priv,
906 struct mlx5e_route_entry *r)
908 WARN_ON(!list_empty(&r->decap_flows));
909 WARN_ON(!list_empty(&r->encap_entries));
914 static void mlx5e_route_put(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
916 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
918 if (!refcount_dec_and_mutex_lock(&r->refcnt, &esw->offloads.encap_tbl_lock))
921 hash_del_rcu(&r->hlist);
922 mutex_unlock(&esw->offloads.encap_tbl_lock);
924 mlx5e_route_dealloc(priv, r);
927 static void mlx5e_route_put_locked(struct mlx5e_priv *priv, struct mlx5e_route_entry *r)
929 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
931 lockdep_assert_held(&esw->offloads.encap_tbl_lock);
933 if (!refcount_dec_and_test(&r->refcnt))
935 hash_del_rcu(&r->hlist);
936 mlx5e_route_dealloc(priv, r);
939 static struct mlx5e_route_entry *
940 mlx5e_route_get(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key,
943 struct mlx5e_route_key r_key;
944 struct mlx5e_route_entry *r;
946 hash_for_each_possible(encap->route_tbl, r, hlist, hash_key) {
948 if (!cmp_route_info(&r_key, key) &&
949 refcount_inc_not_zero(&r->refcnt))
955 static struct mlx5e_route_entry *
956 mlx5e_route_get_create(struct mlx5e_priv *priv,
957 struct mlx5e_route_key *key,
958 int tunnel_dev_index,
959 unsigned long *route_tbl_change_time)
961 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
962 struct mlx5_rep_uplink_priv *uplink_priv;
963 struct mlx5e_rep_priv *uplink_rpriv;
964 struct mlx5e_tc_tun_encap *encap;
965 struct mlx5e_route_entry *r;
968 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
969 uplink_priv = &uplink_rpriv->uplink_priv;
970 encap = uplink_priv->encap;
972 hash_key = hash_route_info(key);
973 spin_lock_bh(&encap->route_lock);
974 r = mlx5e_route_get(encap, key, hash_key);
975 spin_unlock_bh(&encap->route_lock);
977 if (!mlx5e_route_entry_valid(r)) {
978 mlx5e_route_put_locked(priv, r);
979 return ERR_PTR(-EINVAL);
984 r = kzalloc(sizeof(*r), GFP_KERNEL);
986 return ERR_PTR(-ENOMEM);
989 r->flags |= MLX5E_ROUTE_ENTRY_VALID;
990 r->tunnel_dev_index = tunnel_dev_index;
991 refcount_set(&r->refcnt, 1);
992 INIT_LIST_HEAD(&r->decap_flows);
993 INIT_LIST_HEAD(&r->encap_entries);
995 spin_lock_bh(&encap->route_lock);
996 *route_tbl_change_time = encap->route_tbl_last_update;
997 hash_add(encap->route_tbl, &r->hlist, hash_key);
998 spin_unlock_bh(&encap->route_lock);
1003 static struct mlx5e_route_entry *
1004 mlx5e_route_lookup_for_update(struct mlx5e_tc_tun_encap *encap, struct mlx5e_route_key *key)
1006 u32 hash_key = hash_route_info(key);
1007 struct mlx5e_route_entry *r;
1009 spin_lock_bh(&encap->route_lock);
1010 encap->route_tbl_last_update = jiffies;
1011 r = mlx5e_route_get(encap, key, hash_key);
1012 spin_unlock_bh(&encap->route_lock);
1017 struct mlx5e_tc_fib_event_data {
1018 struct work_struct work;
1019 unsigned long event;
1020 struct mlx5e_route_entry *r;
1021 struct net_device *ul_dev;
1024 static void mlx5e_tc_fib_event_work(struct work_struct *work);
1025 static struct mlx5e_tc_fib_event_data *
1026 mlx5e_tc_init_fib_work(unsigned long event, struct net_device *ul_dev, gfp_t flags)
1028 struct mlx5e_tc_fib_event_data *fib_work;
1030 fib_work = kzalloc(sizeof(*fib_work), flags);
1031 if (WARN_ON(!fib_work))
1034 INIT_WORK(&fib_work->work, mlx5e_tc_fib_event_work);
1035 fib_work->event = event;
1036 fib_work->ul_dev = ul_dev;
1042 mlx5e_route_enqueue_update(struct mlx5e_priv *priv,
1043 struct mlx5e_route_entry *r,
1044 unsigned long event)
1046 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1047 struct mlx5e_tc_fib_event_data *fib_work;
1048 struct mlx5e_rep_priv *uplink_rpriv;
1049 struct net_device *ul_dev;
1051 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1052 ul_dev = uplink_rpriv->netdev;
1054 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_KERNEL);
1059 refcount_inc(&r->refcnt);
1061 queue_work(priv->wq, &fib_work->work);
1066 int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
1067 struct mlx5e_tc_flow *flow)
1069 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1070 unsigned long tbl_time_before, tbl_time_after;
1071 struct mlx5e_tc_flow_parse_attr *parse_attr;
1072 struct mlx5_flow_attr *attr = flow->attr;
1073 struct mlx5_esw_flow_attr *esw_attr;
1074 struct mlx5e_route_entry *r;
1075 struct mlx5e_route_key key;
1078 esw_attr = attr->esw_attr;
1079 parse_attr = attr->parse_attr;
1080 mutex_lock(&esw->offloads.encap_tbl_lock);
1081 if (!esw_attr->rx_tun_attr)
1084 tbl_time_before = mlx5e_route_tbl_get_last_update(priv);
1085 tbl_time_after = tbl_time_before;
1086 err = mlx5e_tc_tun_route_lookup(priv, &parse_attr->spec, attr);
1087 if (err || !esw_attr->rx_tun_attr->decap_vport)
1090 key.ip_version = attr->tun_ip_version;
1091 if (key.ip_version == 4)
1092 key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
1094 key.endpoint_ip.v6 = esw_attr->rx_tun_attr->dst_ip.v6;
1096 r = mlx5e_route_get_create(priv, &key, parse_attr->filter_dev->ifindex,
1102 /* Routing changed concurrently. FIB event handler might have missed new
1103 * entry, schedule update.
1105 if (tbl_time_before != tbl_time_after) {
1106 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1108 mlx5e_route_put_locked(priv, r);
1113 flow->decap_route = r;
1114 list_add(&flow->decap_routes, &r->decap_flows);
1115 mutex_unlock(&esw->offloads.encap_tbl_lock);
1119 mutex_unlock(&esw->offloads.encap_tbl_lock);
1123 static int mlx5e_attach_encap_route(struct mlx5e_priv *priv,
1124 struct mlx5e_tc_flow *flow,
1125 struct mlx5e_encap_entry *e,
1126 bool new_encap_entry,
1127 unsigned long tbl_time_before,
1130 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1131 unsigned long tbl_time_after = tbl_time_before;
1132 struct mlx5e_tc_flow_parse_attr *parse_attr;
1133 struct mlx5_flow_attr *attr = flow->attr;
1134 const struct ip_tunnel_info *tun_info;
1135 struct mlx5_esw_flow_attr *esw_attr;
1136 struct mlx5e_route_entry *r;
1137 struct mlx5e_route_key key;
1138 unsigned short family;
1141 esw_attr = attr->esw_attr;
1142 parse_attr = attr->parse_attr;
1143 tun_info = parse_attr->tun_info[out_index];
1144 family = ip_tunnel_info_af(tun_info);
1146 if (family == AF_INET) {
1147 key.endpoint_ip.v4 = tun_info->key.u.ipv4.src;
1149 } else if (family == AF_INET6) {
1150 key.endpoint_ip.v6 = tun_info->key.u.ipv6.src;
1154 err = mlx5e_set_vf_tunnel(esw, attr, &parse_attr->mod_hdr_acts, e->out_dev,
1155 e->route_dev_ifindex, out_index);
1156 if (err || !(esw_attr->dests[out_index].flags &
1157 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE))
1160 r = mlx5e_route_get_create(priv, &key, parse_attr->mirred_ifindex[out_index],
1164 /* Routing changed concurrently. FIB event handler might have missed new
1165 * entry, schedule update.
1167 if (tbl_time_before != tbl_time_after) {
1168 err = mlx5e_route_enqueue_update(priv, r, FIB_EVENT_ENTRY_REPLACE);
1170 mlx5e_route_put_locked(priv, r);
1175 flow->encap_routes[out_index].r = r;
1176 if (new_encap_entry)
1177 list_add(&e->route_list, &r->encap_entries);
1178 flow->encap_routes[out_index].index = out_index;
1182 void mlx5e_detach_decap_route(struct mlx5e_priv *priv,
1183 struct mlx5e_tc_flow *flow)
1185 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1186 struct mlx5e_route_entry *r = flow->decap_route;
1191 mutex_lock(&esw->offloads.encap_tbl_lock);
1192 list_del(&flow->decap_routes);
1193 flow->decap_route = NULL;
1195 if (!refcount_dec_and_test(&r->refcnt)) {
1196 mutex_unlock(&esw->offloads.encap_tbl_lock);
1199 hash_del_rcu(&r->hlist);
1200 mutex_unlock(&esw->offloads.encap_tbl_lock);
1202 mlx5e_route_dealloc(priv, r);
1205 static void mlx5e_detach_encap_route(struct mlx5e_priv *priv,
1206 struct mlx5e_tc_flow *flow,
1209 struct mlx5e_route_entry *r = flow->encap_routes[out_index].r;
1210 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1211 struct mlx5e_encap_entry *e, *tmp;
1216 mutex_lock(&esw->offloads.encap_tbl_lock);
1217 flow->encap_routes[out_index].r = NULL;
1219 if (!refcount_dec_and_test(&r->refcnt)) {
1220 mutex_unlock(&esw->offloads.encap_tbl_lock);
1223 list_for_each_entry_safe(e, tmp, &r->encap_entries, route_list)
1224 list_del_init(&e->route_list);
1225 hash_del_rcu(&r->hlist);
1226 mutex_unlock(&esw->offloads.encap_tbl_lock);
1228 mlx5e_route_dealloc(priv, r);
1231 static void mlx5e_invalidate_encap(struct mlx5e_priv *priv,
1232 struct mlx5e_encap_entry *e,
1233 struct list_head *encap_flows)
1235 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1236 struct mlx5e_tc_flow *flow;
1238 list_for_each_entry(flow, encap_flows, tmp_list) {
1239 struct mlx5_flow_attr *attr = flow->attr;
1240 struct mlx5_esw_flow_attr *esw_attr;
1242 if (!mlx5e_is_offloaded_flow(flow))
1244 esw_attr = attr->esw_attr;
1246 if (flow_flag_test(flow, SLOW))
1247 mlx5e_tc_unoffload_from_slow_path(esw, flow);
1249 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1250 mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
1251 attr->modify_hdr = NULL;
1253 esw_attr->dests[flow->tmp_entry_index].flags &=
1254 ~MLX5_ESW_DEST_ENCAP_VALID;
1255 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = NULL;
1258 e->flags |= MLX5_ENCAP_ENTRY_NO_ROUTE;
1259 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1260 e->flags &= ~MLX5_ENCAP_ENTRY_VALID;
1261 mlx5_packet_reformat_dealloc(priv->mdev, e->pkt_reformat);
1262 e->pkt_reformat = NULL;
1266 static void mlx5e_reoffload_encap(struct mlx5e_priv *priv,
1267 struct net_device *tunnel_dev,
1268 struct mlx5e_encap_entry *e,
1269 struct list_head *encap_flows)
1271 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1272 struct mlx5e_tc_flow *flow;
1275 err = ip_tunnel_info_af(e->tun_info) == AF_INET ?
1276 mlx5e_tc_tun_update_header_ipv4(priv, tunnel_dev, e) :
1277 mlx5e_tc_tun_update_header_ipv6(priv, tunnel_dev, e);
1279 mlx5_core_warn(priv->mdev, "Failed to update encap header, %d", err);
1280 e->flags &= ~MLX5_ENCAP_ENTRY_NO_ROUTE;
1282 list_for_each_entry(flow, encap_flows, tmp_list) {
1283 struct mlx5e_tc_flow_parse_attr *parse_attr;
1284 struct mlx5_flow_attr *attr = flow->attr;
1285 struct mlx5_esw_flow_attr *esw_attr;
1286 struct mlx5_flow_handle *rule;
1287 struct mlx5_flow_spec *spec;
1289 if (flow_flag_test(flow, FAILED))
1292 esw_attr = attr->esw_attr;
1293 parse_attr = attr->parse_attr;
1294 spec = &parse_attr->spec;
1296 err = mlx5e_update_vf_tunnel(esw, esw_attr, &parse_attr->mod_hdr_acts,
1297 e->out_dev, e->route_dev_ifindex,
1298 flow->tmp_entry_index);
1300 mlx5_core_warn(priv->mdev, "Failed to update VF tunnel err=%d", err);
1304 err = mlx5e_tc_add_flow_mod_hdr(priv, parse_attr, flow);
1306 mlx5_core_warn(priv->mdev, "Failed to update flow mod_hdr err=%d",
1311 if (e->flags & MLX5_ENCAP_ENTRY_VALID) {
1312 esw_attr->dests[flow->tmp_entry_index].pkt_reformat = e->pkt_reformat;
1313 esw_attr->dests[flow->tmp_entry_index].flags |= MLX5_ESW_DEST_ENCAP_VALID;
1314 if (!mlx5e_tc_flow_all_encaps_valid(esw_attr))
1315 goto offload_to_slow_path;
1316 /* update from slow path rule to encap rule */
1317 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1319 err = PTR_ERR(rule);
1320 mlx5_core_warn(priv->mdev, "Failed to update cached encapsulation flow, %d\n",
1323 flow->rule[0] = rule;
1326 offload_to_slow_path:
1327 rule = mlx5e_tc_offload_to_slow_path(esw, flow, spec);
1328 /* mark the flow's encap dest as non-valid */
1329 esw_attr->dests[flow->tmp_entry_index].flags &=
1330 ~MLX5_ESW_DEST_ENCAP_VALID;
1333 err = PTR_ERR(rule);
1334 mlx5_core_warn(priv->mdev, "Failed to update slow path (encap) flow, %d\n",
1337 flow->rule[0] = rule;
1340 flow_flag_set(flow, OFFLOADED);
1344 static int mlx5e_update_route_encaps(struct mlx5e_priv *priv,
1345 struct mlx5e_route_entry *r,
1346 struct list_head *flow_list,
1349 struct net_device *tunnel_dev;
1350 struct mlx5e_encap_entry *e;
1352 tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1356 list_for_each_entry(e, &r->encap_entries, route_list) {
1357 LIST_HEAD(encap_flows);
1359 mlx5e_take_all_encap_flows(e, &encap_flows);
1360 if (list_empty(&encap_flows))
1363 if (mlx5e_route_entry_valid(r))
1364 mlx5e_invalidate_encap(priv, e, &encap_flows);
1367 list_splice(&encap_flows, flow_list);
1371 mlx5e_reoffload_encap(priv, tunnel_dev, e, &encap_flows);
1372 list_splice(&encap_flows, flow_list);
1378 static void mlx5e_unoffload_flow_list(struct mlx5e_priv *priv,
1379 struct list_head *flow_list)
1381 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1382 struct mlx5e_tc_flow *flow;
1384 list_for_each_entry(flow, flow_list, tmp_list)
1385 if (mlx5e_is_offloaded_flow(flow))
1386 mlx5e_tc_unoffload_fdb_rules(esw, flow, flow->attr);
1389 static void mlx5e_reoffload_decap(struct mlx5e_priv *priv,
1390 struct list_head *decap_flows)
1392 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1393 struct mlx5e_tc_flow *flow;
1395 list_for_each_entry(flow, decap_flows, tmp_list) {
1396 struct mlx5e_tc_flow_parse_attr *parse_attr;
1397 struct mlx5_flow_attr *attr = flow->attr;
1398 struct mlx5_flow_handle *rule;
1399 struct mlx5_flow_spec *spec;
1402 if (flow_flag_test(flow, FAILED))
1405 parse_attr = attr->parse_attr;
1406 spec = &parse_attr->spec;
1407 err = mlx5e_tc_tun_route_lookup(priv, spec, attr);
1409 mlx5_core_warn(priv->mdev, "Failed to lookup route for flow, %d\n",
1414 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, attr);
1416 err = PTR_ERR(rule);
1417 mlx5_core_warn(priv->mdev, "Failed to update cached decap flow, %d\n",
1420 flow->rule[0] = rule;
1421 flow_flag_set(flow, OFFLOADED);
1426 static int mlx5e_update_route_decap_flows(struct mlx5e_priv *priv,
1427 struct mlx5e_route_entry *r,
1428 struct list_head *flow_list,
1431 struct net_device *tunnel_dev;
1432 LIST_HEAD(decap_flows);
1434 tunnel_dev = __dev_get_by_index(dev_net(priv->netdev), r->tunnel_dev_index);
1438 mlx5e_take_all_route_decap_flows(r, &decap_flows);
1439 if (mlx5e_route_entry_valid(r))
1440 mlx5e_unoffload_flow_list(priv, &decap_flows);
1442 mlx5e_reoffload_decap(priv, &decap_flows);
1444 list_splice(&decap_flows, flow_list);
1449 static void mlx5e_tc_fib_event_work(struct work_struct *work)
1451 struct mlx5e_tc_fib_event_data *event_data =
1452 container_of(work, struct mlx5e_tc_fib_event_data, work);
1453 struct net_device *ul_dev = event_data->ul_dev;
1454 struct mlx5e_priv *priv = netdev_priv(ul_dev);
1455 struct mlx5e_route_entry *r = event_data->r;
1456 struct mlx5_eswitch *esw;
1457 LIST_HEAD(flow_list);
1461 /* sync with concurrent neigh updates */
1463 esw = priv->mdev->priv.eswitch;
1464 mutex_lock(&esw->offloads.encap_tbl_lock);
1465 replace = event_data->event == FIB_EVENT_ENTRY_REPLACE;
1467 if (!mlx5e_route_entry_valid(r) && !replace)
1470 err = mlx5e_update_route_encaps(priv, r, &flow_list, replace);
1472 mlx5_core_warn(priv->mdev, "Failed to update route encaps, %d\n",
1475 err = mlx5e_update_route_decap_flows(priv, r, &flow_list, replace);
1477 mlx5_core_warn(priv->mdev, "Failed to update route decap flows, %d\n",
1481 r->flags |= MLX5E_ROUTE_ENTRY_VALID;
1483 mutex_unlock(&esw->offloads.encap_tbl_lock);
1486 mlx5e_put_flow_list(priv, &flow_list);
1487 mlx5e_route_put(priv, event_data->r);
1488 dev_put(event_data->ul_dev);
1492 static struct mlx5e_tc_fib_event_data *
1493 mlx5e_init_fib_work_ipv4(struct mlx5e_priv *priv,
1494 struct net_device *ul_dev,
1495 struct mlx5e_tc_tun_encap *encap,
1496 unsigned long event,
1497 struct fib_notifier_info *info)
1499 struct fib_entry_notifier_info *fen_info;
1500 struct mlx5e_tc_fib_event_data *fib_work;
1501 struct mlx5e_route_entry *r;
1502 struct mlx5e_route_key key;
1503 struct net_device *fib_dev;
1505 fen_info = container_of(info, struct fib_entry_notifier_info, info);
1506 fib_dev = fib_info_nh(fen_info->fi, 0)->fib_nh_dev;
1507 if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1508 fen_info->dst_len != 32)
1511 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1513 return ERR_PTR(-ENOMEM);
1515 key.endpoint_ip.v4 = htonl(fen_info->dst);
1518 /* Can't fail after this point because releasing reference to r
1519 * requires obtaining sleeping mutex which we can't do in atomic
1522 r = mlx5e_route_lookup_for_update(encap, &key);
1535 static struct mlx5e_tc_fib_event_data *
1536 mlx5e_init_fib_work_ipv6(struct mlx5e_priv *priv,
1537 struct net_device *ul_dev,
1538 struct mlx5e_tc_tun_encap *encap,
1539 unsigned long event,
1540 struct fib_notifier_info *info)
1542 struct fib6_entry_notifier_info *fen_info;
1543 struct mlx5e_tc_fib_event_data *fib_work;
1544 struct mlx5e_route_entry *r;
1545 struct mlx5e_route_key key;
1546 struct net_device *fib_dev;
1548 fen_info = container_of(info, struct fib6_entry_notifier_info, info);
1549 fib_dev = fib6_info_nh_dev(fen_info->rt);
1550 if (fib_dev->netdev_ops != &mlx5e_netdev_ops ||
1551 fen_info->rt->fib6_dst.plen != 128)
1554 fib_work = mlx5e_tc_init_fib_work(event, ul_dev, GFP_ATOMIC);
1556 return ERR_PTR(-ENOMEM);
1558 memcpy(&key.endpoint_ip.v6, &fen_info->rt->fib6_dst.addr,
1559 sizeof(fen_info->rt->fib6_dst.addr));
1562 /* Can't fail after this point because releasing reference to r
1563 * requires obtaining sleeping mutex which we can't do in atomic
1566 r = mlx5e_route_lookup_for_update(encap, &key);
1579 static int mlx5e_tc_tun_fib_event(struct notifier_block *nb, unsigned long event, void *ptr)
1581 struct mlx5e_tc_fib_event_data *fib_work;
1582 struct fib_notifier_info *info = ptr;
1583 struct mlx5e_tc_tun_encap *encap;
1584 struct net_device *ul_dev;
1585 struct mlx5e_priv *priv;
1587 encap = container_of(nb, struct mlx5e_tc_tun_encap, fib_nb);
1589 ul_dev = priv->netdev;
1590 priv = netdev_priv(ul_dev);
1593 case FIB_EVENT_ENTRY_REPLACE:
1594 case FIB_EVENT_ENTRY_DEL:
1595 if (info->family == AF_INET)
1596 fib_work = mlx5e_init_fib_work_ipv4(priv, ul_dev, encap, event, info);
1597 else if (info->family == AF_INET6)
1598 fib_work = mlx5e_init_fib_work_ipv6(priv, ul_dev, encap, event, info);
1602 if (!IS_ERR_OR_NULL(fib_work)) {
1603 queue_work(priv->wq, &fib_work->work);
1604 } else if (IS_ERR(fib_work)) {
1605 NL_SET_ERR_MSG_MOD(info->extack, "Failed to init fib work");
1606 mlx5_core_warn(priv->mdev, "Failed to init fib work, %ld\n",
1618 struct mlx5e_tc_tun_encap *mlx5e_tc_tun_init(struct mlx5e_priv *priv)
1620 struct mlx5e_tc_tun_encap *encap;
1623 encap = kvzalloc(sizeof(*encap), GFP_KERNEL);
1625 return ERR_PTR(-ENOMEM);
1628 encap->fib_nb.notifier_call = mlx5e_tc_tun_fib_event;
1629 spin_lock_init(&encap->route_lock);
1630 hash_init(encap->route_tbl);
1631 err = register_fib_notifier(dev_net(priv->netdev), &encap->fib_nb,
1635 return ERR_PTR(err);
1641 void mlx5e_tc_tun_cleanup(struct mlx5e_tc_tun_encap *encap)
1646 unregister_fib_notifier(dev_net(encap->priv->netdev), &encap->fib_nb);
1647 flush_workqueue(encap->priv->wq); /* flush fib event works */