2 * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <net/flow_dissector.h>
34 #include <net/flow_offload.h>
35 #include <net/sch_generic.h>
36 #include <net/pkt_cls.h>
37 #include <linux/mlx5/fs.h>
38 #include <linux/mlx5/device.h>
39 #include <linux/rhashtable.h>
40 #include <linux/refcount.h>
41 #include <linux/completion.h>
43 #include <net/ipv6_stubs.h>
44 #include <net/bareudp.h>
45 #include <net/bonding.h>
47 #include "en/tc/post_act.h"
49 #include "en/rep/tc.h"
50 #include "en/rep/neigh.h"
55 #include "en/tc_tun.h"
56 #include "en/mapping.h"
58 #include "en/mod_hdr.h"
59 #include "en/tc_tun_encap.h"
60 #include "en/tc/sample.h"
61 #include "en/tc/act/act.h"
62 #include "lib/devcom.h"
63 #include "lib/geneve.h"
64 #include "lib/fs_chains.h"
65 #include "diag/en_tc_tracepoint.h"
66 #include <asm/div64.h>
70 #define MLX5E_TC_TABLE_NUM_GROUPS 4
71 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(18)
73 struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
75 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
80 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
85 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_1,
87 .mlen = ESW_TUN_OPTS_BITS + ESW_TUN_ID_BITS,
88 .soffset = MLX5_BYTE_OFF(fte_match_param,
89 misc_parameters_2.metadata_reg_c_1),
91 [ZONE_TO_REG] = zone_to_reg_ct,
92 [ZONE_RESTORE_TO_REG] = zone_restore_to_reg_ct,
93 [CTSTATE_TO_REG] = ctstate_to_reg_ct,
94 [MARK_TO_REG] = mark_to_reg_ct,
95 [LABELS_TO_REG] = labels_to_reg_ct,
96 [FTEID_TO_REG] = fteid_to_reg_ct,
97 /* For NIC rules we store the restore metadata directly
98 * into reg_b that is passed to SW since we don't
99 * jump between steering domains.
101 [NIC_CHAIN_TO_REG] = {
102 .mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_B,
106 [NIC_ZONE_RESTORE_TO_REG] = nic_zone_restore_to_reg_ct,
109 /* To avoid false lock dependency warning set the tc_ht lock
110 * class different than the lock class of the ht being used when deleting
111 * last flow from a group and then deleting a group, we get into del_sw_flow_group()
112 * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
113 * it's different than the ht->mutex here.
115 static struct lock_class_key tc_ht_lock_key;
117 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow);
118 static void free_flow_post_acts(struct mlx5e_tc_flow *flow);
121 mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
122 enum mlx5e_tc_attr_to_reg type,
126 void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
127 int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
128 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
129 int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
130 u32 max_mask = GENMASK(match_len - 1, 0);
131 __be32 curr_mask_be, curr_val_be;
132 u32 curr_mask, curr_val;
134 fmask = headers_c + soffset;
135 fval = headers_v + soffset;
137 memcpy(&curr_mask_be, fmask, 4);
138 memcpy(&curr_val_be, fval, 4);
140 curr_mask = be32_to_cpu(curr_mask_be);
141 curr_val = be32_to_cpu(curr_val_be);
143 //move to correct offset
144 WARN_ON(mask > max_mask);
147 max_mask <<= moffset;
150 curr_mask &= ~max_mask;
151 curr_val &= ~max_mask;
153 //add current to mask
157 //back to be32 and write
158 curr_mask_be = cpu_to_be32(curr_mask);
159 curr_val_be = cpu_to_be32(curr_val);
161 memcpy(fmask, &curr_mask_be, 4);
162 memcpy(fval, &curr_val_be, 4);
164 spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
168 mlx5e_tc_match_to_reg_get_match(struct mlx5_flow_spec *spec,
169 enum mlx5e_tc_attr_to_reg type,
173 void *headers_c = spec->match_criteria, *headers_v = spec->match_value, *fmask, *fval;
174 int soffset = mlx5e_tc_attr_to_reg_mappings[type].soffset;
175 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
176 int match_len = mlx5e_tc_attr_to_reg_mappings[type].mlen;
177 u32 max_mask = GENMASK(match_len - 1, 0);
178 __be32 curr_mask_be, curr_val_be;
179 u32 curr_mask, curr_val;
181 fmask = headers_c + soffset;
182 fval = headers_v + soffset;
184 memcpy(&curr_mask_be, fmask, 4);
185 memcpy(&curr_val_be, fval, 4);
187 curr_mask = be32_to_cpu(curr_mask_be);
188 curr_val = be32_to_cpu(curr_val_be);
190 *mask = (curr_mask >> moffset) & max_mask;
191 *val = (curr_val >> moffset) & max_mask;
195 mlx5e_tc_match_to_reg_set_and_get_id(struct mlx5_core_dev *mdev,
196 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
197 enum mlx5_flow_namespace_type ns,
198 enum mlx5e_tc_attr_to_reg type,
201 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
202 int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
203 int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
207 modact = mlx5e_mod_hdr_alloc(mdev, ns, mod_hdr_acts);
209 return PTR_ERR(modact);
211 /* Firmware has 5bit length field and 0 means 32bits */
215 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
216 MLX5_SET(set_action_in, modact, field, mfield);
217 MLX5_SET(set_action_in, modact, offset, moffset);
218 MLX5_SET(set_action_in, modact, length, mlen);
219 MLX5_SET(set_action_in, modact, data, data);
220 err = mod_hdr_acts->num_actions;
221 mod_hdr_acts->num_actions++;
226 struct mlx5e_tc_int_port_priv *
227 mlx5e_get_int_port_priv(struct mlx5e_priv *priv)
229 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
230 struct mlx5_rep_uplink_priv *uplink_priv;
231 struct mlx5e_rep_priv *uplink_rpriv;
233 if (is_mdev_switchdev_mode(priv->mdev)) {
234 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
235 uplink_priv = &uplink_rpriv->uplink_priv;
237 return uplink_priv->int_port_priv;
243 static struct mlx5_tc_ct_priv *
244 get_ct_priv(struct mlx5e_priv *priv)
246 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
247 struct mlx5_rep_uplink_priv *uplink_priv;
248 struct mlx5e_rep_priv *uplink_rpriv;
250 if (is_mdev_switchdev_mode(priv->mdev)) {
251 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
252 uplink_priv = &uplink_rpriv->uplink_priv;
254 return uplink_priv->ct_priv;
257 return priv->fs.tc.ct;
260 static struct mlx5e_tc_psample *
261 get_sample_priv(struct mlx5e_priv *priv)
263 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
264 struct mlx5_rep_uplink_priv *uplink_priv;
265 struct mlx5e_rep_priv *uplink_rpriv;
267 if (is_mdev_switchdev_mode(priv->mdev)) {
268 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
269 uplink_priv = &uplink_rpriv->uplink_priv;
271 return uplink_priv->tc_psample;
277 static struct mlx5e_post_act *
278 get_post_action(struct mlx5e_priv *priv)
280 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
281 struct mlx5_rep_uplink_priv *uplink_priv;
282 struct mlx5e_rep_priv *uplink_rpriv;
284 if (is_mdev_switchdev_mode(priv->mdev)) {
285 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
286 uplink_priv = &uplink_rpriv->uplink_priv;
288 return uplink_priv->post_act;
291 return priv->fs.tc.post_act;
294 struct mlx5_flow_handle *
295 mlx5_tc_rule_insert(struct mlx5e_priv *priv,
296 struct mlx5_flow_spec *spec,
297 struct mlx5_flow_attr *attr)
299 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
301 if (is_mdev_switchdev_mode(priv->mdev))
302 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
304 return mlx5e_add_offloaded_nic_rule(priv, spec, attr);
308 mlx5_tc_rule_delete(struct mlx5e_priv *priv,
309 struct mlx5_flow_handle *rule,
310 struct mlx5_flow_attr *attr)
312 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
314 if (is_mdev_switchdev_mode(priv->mdev)) {
315 mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
319 mlx5e_del_offloaded_nic_rule(priv, rule, attr);
322 struct mlx5_flow_handle *
323 mlx5e_tc_rule_offload(struct mlx5e_priv *priv,
324 struct mlx5_flow_spec *spec,
325 struct mlx5_flow_attr *attr)
327 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
329 if (attr->flags & MLX5_ATTR_FLAG_CT) {
330 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts =
331 &attr->parse_attr->mod_hdr_acts;
333 return mlx5_tc_ct_flow_offload(get_ct_priv(priv),
338 if (!is_mdev_switchdev_mode(priv->mdev))
339 return mlx5e_add_offloaded_nic_rule(priv, spec, attr);
341 if (attr->flags & MLX5_ATTR_FLAG_SAMPLE)
342 return mlx5e_tc_sample_offload(get_sample_priv(priv), spec, attr);
344 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
348 mlx5e_tc_rule_unoffload(struct mlx5e_priv *priv,
349 struct mlx5_flow_handle *rule,
350 struct mlx5_flow_attr *attr)
352 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
354 if (attr->flags & MLX5_ATTR_FLAG_CT) {
355 mlx5_tc_ct_delete_flow(get_ct_priv(priv), attr);
359 if (!is_mdev_switchdev_mode(priv->mdev)) {
360 mlx5e_del_offloaded_nic_rule(priv, rule, attr);
364 if (attr->flags & MLX5_ATTR_FLAG_SAMPLE) {
365 mlx5e_tc_sample_unoffload(get_sample_priv(priv), rule, attr);
369 mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
373 mlx5e_tc_match_to_reg_set(struct mlx5_core_dev *mdev,
374 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
375 enum mlx5_flow_namespace_type ns,
376 enum mlx5e_tc_attr_to_reg type,
379 int ret = mlx5e_tc_match_to_reg_set_and_get_id(mdev, mod_hdr_acts, ns, type, data);
381 return ret < 0 ? ret : 0;
384 void mlx5e_tc_match_to_reg_mod_hdr_change(struct mlx5_core_dev *mdev,
385 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts,
386 enum mlx5e_tc_attr_to_reg type,
387 int act_id, u32 data)
389 int moffset = mlx5e_tc_attr_to_reg_mappings[type].moffset;
390 int mfield = mlx5e_tc_attr_to_reg_mappings[type].mfield;
391 int mlen = mlx5e_tc_attr_to_reg_mappings[type].mlen;
394 modact = mlx5e_mod_hdr_get_item(mod_hdr_acts, act_id);
396 /* Firmware has 5bit length field and 0 means 32bits */
400 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
401 MLX5_SET(set_action_in, modact, field, mfield);
402 MLX5_SET(set_action_in, modact, offset, moffset);
403 MLX5_SET(set_action_in, modact, length, mlen);
404 MLX5_SET(set_action_in, modact, data, data);
407 struct mlx5e_hairpin {
408 struct mlx5_hairpin *pair;
410 struct mlx5_core_dev *func_mdev;
411 struct mlx5e_priv *func_priv;
413 struct mlx5e_tir direct_tir;
416 struct mlx5e_rqt indir_rqt;
417 struct mlx5e_tir indir_tir[MLX5E_NUM_INDIR_TIRS];
418 struct mlx5_ttc_table *ttc;
421 struct mlx5e_hairpin_entry {
422 /* a node of a hash table which keeps all the hairpin entries */
423 struct hlist_node hairpin_hlist;
425 /* protects flows list */
426 spinlock_t flows_lock;
427 /* flows sharing the same hairpin */
428 struct list_head flows;
429 /* hpe's that were not fully initialized when dead peer update event
430 * function traversed them.
432 struct list_head dead_peer_wait_list;
436 struct mlx5e_hairpin *hp;
438 struct completion res_ready;
441 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
442 struct mlx5e_tc_flow *flow);
444 struct mlx5e_tc_flow *mlx5e_flow_get(struct mlx5e_tc_flow *flow)
446 if (!flow || !refcount_inc_not_zero(&flow->refcnt))
447 return ERR_PTR(-EINVAL);
451 void mlx5e_flow_put(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
453 if (refcount_dec_and_test(&flow->refcnt)) {
454 mlx5e_tc_del_flow(priv, flow);
455 kfree_rcu(flow, rcu_head);
459 bool mlx5e_is_eswitch_flow(struct mlx5e_tc_flow *flow)
461 return flow_flag_test(flow, ESWITCH);
464 bool mlx5e_is_ft_flow(struct mlx5e_tc_flow *flow)
466 return flow_flag_test(flow, FT);
469 bool mlx5e_is_offloaded_flow(struct mlx5e_tc_flow *flow)
471 return flow_flag_test(flow, OFFLOADED);
474 int mlx5e_get_flow_namespace(struct mlx5e_tc_flow *flow)
476 return mlx5e_is_eswitch_flow(flow) ?
477 MLX5_FLOW_NAMESPACE_FDB : MLX5_FLOW_NAMESPACE_KERNEL;
480 static struct mod_hdr_tbl *
481 get_mod_hdr_table(struct mlx5e_priv *priv, struct mlx5e_tc_flow *flow)
483 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
485 return mlx5e_get_flow_namespace(flow) == MLX5_FLOW_NAMESPACE_FDB ?
486 &esw->offloads.mod_hdr :
487 &priv->fs.tc.mod_hdr;
490 static int mlx5e_attach_mod_hdr(struct mlx5e_priv *priv,
491 struct mlx5e_tc_flow *flow,
492 struct mlx5e_tc_flow_parse_attr *parse_attr)
494 struct mlx5_modify_hdr *modify_hdr;
495 struct mlx5e_mod_hdr_handle *mh;
497 mh = mlx5e_mod_hdr_attach(priv->mdev, get_mod_hdr_table(priv, flow),
498 mlx5e_get_flow_namespace(flow),
499 &parse_attr->mod_hdr_acts);
503 modify_hdr = mlx5e_mod_hdr_get(mh);
504 flow->attr->modify_hdr = modify_hdr;
510 static void mlx5e_detach_mod_hdr(struct mlx5e_priv *priv,
511 struct mlx5e_tc_flow *flow)
513 /* flow wasn't fully initialized */
517 mlx5e_mod_hdr_detach(priv->mdev, get_mod_hdr_table(priv, flow),
523 struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
525 struct mlx5_core_dev *mdev;
526 struct net_device *netdev;
527 struct mlx5e_priv *priv;
529 netdev = dev_get_by_index(net, ifindex);
531 return ERR_PTR(-ENODEV);
533 priv = netdev_priv(netdev);
537 /* Mirred tc action holds a refcount on the ifindex net_device (see
538 * net/sched/act_mirred.c:tcf_mirred_get_dev). So, it's okay to continue using mdev
539 * after dev_put(netdev), while we're in the context of adding a tc flow.
541 * The mdev pointer corresponds to the peer/out net_device of a hairpin. It is then
542 * stored in a hairpin object, which exists until all flows, that refer to it, get
545 * On the other hand, after a hairpin object has been created, the peer net_device may
546 * be removed/unbound while there are still some hairpin flows that are using it. This
547 * case is handled by mlx5e_tc_hairpin_update_dead_peer, which is hooked to
548 * NETDEV_UNREGISTER event of the peer net_device.
553 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
555 struct mlx5e_tir_builder *builder;
558 builder = mlx5e_tir_builder_alloc(false);
562 err = mlx5_core_alloc_transport_domain(hp->func_mdev, &hp->tdn);
566 mlx5e_tir_builder_build_inline(builder, hp->tdn, hp->pair->rqn[0]);
567 err = mlx5e_tir_init(&hp->direct_tir, builder, hp->func_mdev, false);
572 mlx5e_tir_builder_free(builder);
576 mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
581 static void mlx5e_hairpin_destroy_transport(struct mlx5e_hairpin *hp)
583 mlx5e_tir_destroy(&hp->direct_tir);
584 mlx5_core_dealloc_transport_domain(hp->func_mdev, hp->tdn);
587 static int mlx5e_hairpin_create_indirect_rqt(struct mlx5e_hairpin *hp)
589 struct mlx5e_priv *priv = hp->func_priv;
590 struct mlx5_core_dev *mdev = priv->mdev;
591 struct mlx5e_rss_params_indir *indir;
594 indir = kvmalloc(sizeof(*indir), GFP_KERNEL);
598 mlx5e_rss_params_indir_init_uniform(indir, hp->num_channels);
599 err = mlx5e_rqt_init_indir(&hp->indir_rqt, mdev, hp->pair->rqn, hp->num_channels,
600 mlx5e_rx_res_get_current_hash(priv->rx_res).hfunc,
607 static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
609 struct mlx5e_priv *priv = hp->func_priv;
610 struct mlx5e_rss_params_hash rss_hash;
611 enum mlx5_traffic_types tt, max_tt;
612 struct mlx5e_tir_builder *builder;
615 builder = mlx5e_tir_builder_alloc(false);
619 rss_hash = mlx5e_rx_res_get_current_hash(priv->rx_res);
621 for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
622 struct mlx5e_rss_params_traffic_type rss_tt;
624 rss_tt = mlx5e_rss_get_default_tt_config(tt);
626 mlx5e_tir_builder_build_rqt(builder, hp->tdn,
627 mlx5e_rqt_get_rqtn(&hp->indir_rqt),
629 mlx5e_tir_builder_build_rss(builder, &rss_hash, &rss_tt, false);
631 err = mlx5e_tir_init(&hp->indir_tir[tt], builder, hp->func_mdev, false);
633 mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
634 goto err_destroy_tirs;
637 mlx5e_tir_builder_clear(builder);
641 mlx5e_tir_builder_free(builder);
646 for (tt = 0; tt < max_tt; tt++)
647 mlx5e_tir_destroy(&hp->indir_tir[tt]);
652 static void mlx5e_hairpin_destroy_indirect_tirs(struct mlx5e_hairpin *hp)
656 for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++)
657 mlx5e_tir_destroy(&hp->indir_tir[tt]);
660 static void mlx5e_hairpin_set_ttc_params(struct mlx5e_hairpin *hp,
661 struct ttc_params *ttc_params)
663 struct mlx5_flow_table_attr *ft_attr = &ttc_params->ft_attr;
666 memset(ttc_params, 0, sizeof(*ttc_params));
668 ttc_params->ns = mlx5_get_flow_namespace(hp->func_mdev,
669 MLX5_FLOW_NAMESPACE_KERNEL);
670 for (tt = 0; tt < MLX5_NUM_TT; tt++) {
671 ttc_params->dests[tt].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
672 ttc_params->dests[tt].tir_num =
674 mlx5e_tir_get_tirn(&hp->direct_tir) :
675 mlx5e_tir_get_tirn(&hp->indir_tir[tt]);
678 ft_attr->level = MLX5E_TC_TTC_FT_LEVEL;
679 ft_attr->prio = MLX5E_TC_PRIO;
682 static int mlx5e_hairpin_rss_init(struct mlx5e_hairpin *hp)
684 struct mlx5e_priv *priv = hp->func_priv;
685 struct ttc_params ttc_params;
688 err = mlx5e_hairpin_create_indirect_rqt(hp);
692 err = mlx5e_hairpin_create_indirect_tirs(hp);
694 goto err_create_indirect_tirs;
696 mlx5e_hairpin_set_ttc_params(hp, &ttc_params);
697 hp->ttc = mlx5_create_ttc_table(priv->mdev, &ttc_params);
698 if (IS_ERR(hp->ttc)) {
699 err = PTR_ERR(hp->ttc);
700 goto err_create_ttc_table;
703 netdev_dbg(priv->netdev, "add hairpin: using %d channels rss ttc table id %x\n",
705 mlx5_get_ttc_flow_table(priv->fs.ttc)->id);
709 err_create_ttc_table:
710 mlx5e_hairpin_destroy_indirect_tirs(hp);
711 err_create_indirect_tirs:
712 mlx5e_rqt_destroy(&hp->indir_rqt);
717 static void mlx5e_hairpin_rss_cleanup(struct mlx5e_hairpin *hp)
719 mlx5_destroy_ttc_table(hp->ttc);
720 mlx5e_hairpin_destroy_indirect_tirs(hp);
721 mlx5e_rqt_destroy(&hp->indir_rqt);
724 static struct mlx5e_hairpin *
725 mlx5e_hairpin_create(struct mlx5e_priv *priv, struct mlx5_hairpin_params *params,
728 struct mlx5_core_dev *func_mdev, *peer_mdev;
729 struct mlx5e_hairpin *hp;
730 struct mlx5_hairpin *pair;
733 hp = kzalloc(sizeof(*hp), GFP_KERNEL);
735 return ERR_PTR(-ENOMEM);
737 func_mdev = priv->mdev;
738 peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
739 if (IS_ERR(peer_mdev)) {
740 err = PTR_ERR(peer_mdev);
741 goto create_pair_err;
744 pair = mlx5_core_hairpin_create(func_mdev, peer_mdev, params);
747 goto create_pair_err;
750 hp->func_mdev = func_mdev;
751 hp->func_priv = priv;
752 hp->num_channels = params->num_channels;
754 err = mlx5e_hairpin_create_transport(hp);
756 goto create_transport_err;
758 if (hp->num_channels > 1) {
759 err = mlx5e_hairpin_rss_init(hp);
767 mlx5e_hairpin_destroy_transport(hp);
768 create_transport_err:
769 mlx5_core_hairpin_destroy(hp->pair);
775 static void mlx5e_hairpin_destroy(struct mlx5e_hairpin *hp)
777 if (hp->num_channels > 1)
778 mlx5e_hairpin_rss_cleanup(hp);
779 mlx5e_hairpin_destroy_transport(hp);
780 mlx5_core_hairpin_destroy(hp->pair);
784 static inline u32 hash_hairpin_info(u16 peer_vhca_id, u8 prio)
786 return (peer_vhca_id << 16 | prio);
789 static struct mlx5e_hairpin_entry *mlx5e_hairpin_get(struct mlx5e_priv *priv,
790 u16 peer_vhca_id, u8 prio)
792 struct mlx5e_hairpin_entry *hpe;
793 u32 hash_key = hash_hairpin_info(peer_vhca_id, prio);
795 hash_for_each_possible(priv->fs.tc.hairpin_tbl, hpe,
796 hairpin_hlist, hash_key) {
797 if (hpe->peer_vhca_id == peer_vhca_id && hpe->prio == prio) {
798 refcount_inc(&hpe->refcnt);
806 static void mlx5e_hairpin_put(struct mlx5e_priv *priv,
807 struct mlx5e_hairpin_entry *hpe)
809 /* no more hairpin flows for us, release the hairpin pair */
810 if (!refcount_dec_and_mutex_lock(&hpe->refcnt, &priv->fs.tc.hairpin_tbl_lock))
812 hash_del(&hpe->hairpin_hlist);
813 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
815 if (!IS_ERR_OR_NULL(hpe->hp)) {
816 netdev_dbg(priv->netdev, "del hairpin: peer %s\n",
817 dev_name(hpe->hp->pair->peer_mdev->device));
819 mlx5e_hairpin_destroy(hpe->hp);
822 WARN_ON(!list_empty(&hpe->flows));
826 #define UNKNOWN_MATCH_PRIO 8
828 static int mlx5e_hairpin_get_prio(struct mlx5e_priv *priv,
829 struct mlx5_flow_spec *spec, u8 *match_prio,
830 struct netlink_ext_ack *extack)
832 void *headers_c, *headers_v;
833 u8 prio_val, prio_mask = 0;
836 #ifdef CONFIG_MLX5_CORE_EN_DCB
837 if (priv->dcbx_dp.trust_state != MLX5_QPTS_TRUST_PCP) {
838 NL_SET_ERR_MSG_MOD(extack,
839 "only PCP trust state supported for hairpin");
843 headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, outer_headers);
844 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
846 vlan_present = MLX5_GET(fte_match_set_lyr_2_4, headers_v, cvlan_tag);
848 prio_mask = MLX5_GET(fte_match_set_lyr_2_4, headers_c, first_prio);
849 prio_val = MLX5_GET(fte_match_set_lyr_2_4, headers_v, first_prio);
852 if (!vlan_present || !prio_mask) {
853 prio_val = UNKNOWN_MATCH_PRIO;
854 } else if (prio_mask != 0x7) {
855 NL_SET_ERR_MSG_MOD(extack,
856 "masked priority match not supported for hairpin");
860 *match_prio = prio_val;
864 static int mlx5e_hairpin_flow_add(struct mlx5e_priv *priv,
865 struct mlx5e_tc_flow *flow,
866 struct mlx5e_tc_flow_parse_attr *parse_attr,
867 struct netlink_ext_ack *extack)
869 int peer_ifindex = parse_attr->mirred_ifindex[0];
870 struct mlx5_hairpin_params params;
871 struct mlx5_core_dev *peer_mdev;
872 struct mlx5e_hairpin_entry *hpe;
873 struct mlx5e_hairpin *hp;
880 peer_mdev = mlx5e_hairpin_get_mdev(dev_net(priv->netdev), peer_ifindex);
881 if (IS_ERR(peer_mdev)) {
882 NL_SET_ERR_MSG_MOD(extack, "invalid ifindex of mirred device");
883 return PTR_ERR(peer_mdev);
886 if (!MLX5_CAP_GEN(priv->mdev, hairpin) || !MLX5_CAP_GEN(peer_mdev, hairpin)) {
887 NL_SET_ERR_MSG_MOD(extack, "hairpin is not supported");
891 peer_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
892 err = mlx5e_hairpin_get_prio(priv, &parse_attr->spec, &match_prio,
897 mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
898 hpe = mlx5e_hairpin_get(priv, peer_id, match_prio);
900 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
901 wait_for_completion(&hpe->res_ready);
903 if (IS_ERR(hpe->hp)) {
910 hpe = kzalloc(sizeof(*hpe), GFP_KERNEL);
912 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
916 spin_lock_init(&hpe->flows_lock);
917 INIT_LIST_HEAD(&hpe->flows);
918 INIT_LIST_HEAD(&hpe->dead_peer_wait_list);
919 hpe->peer_vhca_id = peer_id;
920 hpe->prio = match_prio;
921 refcount_set(&hpe->refcnt, 1);
922 init_completion(&hpe->res_ready);
924 hash_add(priv->fs.tc.hairpin_tbl, &hpe->hairpin_hlist,
925 hash_hairpin_info(peer_id, match_prio));
926 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
928 params.log_data_size = 16;
929 params.log_data_size = min_t(u8, params.log_data_size,
930 MLX5_CAP_GEN(priv->mdev, log_max_hairpin_wq_data_sz));
931 params.log_data_size = max_t(u8, params.log_data_size,
932 MLX5_CAP_GEN(priv->mdev, log_min_hairpin_wq_data_sz));
934 params.log_num_packets = params.log_data_size -
935 MLX5_MPWRQ_MIN_LOG_STRIDE_SZ(priv->mdev);
936 params.log_num_packets = min_t(u8, params.log_num_packets,
937 MLX5_CAP_GEN(priv->mdev, log_max_hairpin_num_packets));
939 params.q_counter = priv->q_counter;
940 /* set hairpin pair per each 50Gbs share of the link */
941 mlx5e_port_max_linkspeed(priv->mdev, &link_speed);
942 link_speed = max_t(u32, link_speed, 50000);
943 link_speed64 = link_speed;
944 do_div(link_speed64, 50000);
945 params.num_channels = link_speed64;
947 hp = mlx5e_hairpin_create(priv, ¶ms, peer_ifindex);
949 complete_all(&hpe->res_ready);
955 netdev_dbg(priv->netdev, "add hairpin: tirn %x rqn %x peer %s sqn %x prio %d (log) data %d packets %d\n",
956 mlx5e_tir_get_tirn(&hp->direct_tir), hp->pair->rqn[0],
957 dev_name(hp->pair->peer_mdev->device),
958 hp->pair->sqn[0], match_prio, params.log_data_size, params.log_num_packets);
961 if (hpe->hp->num_channels > 1) {
962 flow_flag_set(flow, HAIRPIN_RSS);
963 flow->attr->nic_attr->hairpin_ft =
964 mlx5_get_ttc_flow_table(hpe->hp->ttc);
966 flow->attr->nic_attr->hairpin_tirn = mlx5e_tir_get_tirn(&hpe->hp->direct_tir);
970 spin_lock(&hpe->flows_lock);
971 list_add(&flow->hairpin, &hpe->flows);
972 spin_unlock(&hpe->flows_lock);
977 mlx5e_hairpin_put(priv, hpe);
981 static void mlx5e_hairpin_flow_del(struct mlx5e_priv *priv,
982 struct mlx5e_tc_flow *flow)
984 /* flow wasn't fully initialized */
988 spin_lock(&flow->hpe->flows_lock);
989 list_del(&flow->hairpin);
990 spin_unlock(&flow->hpe->flows_lock);
992 mlx5e_hairpin_put(priv, flow->hpe);
996 struct mlx5_flow_handle *
997 mlx5e_add_offloaded_nic_rule(struct mlx5e_priv *priv,
998 struct mlx5_flow_spec *spec,
999 struct mlx5_flow_attr *attr)
1001 struct mlx5_flow_context *flow_context = &spec->flow_context;
1002 struct mlx5_fs_chains *nic_chains = mlx5e_nic_chains(priv);
1003 struct mlx5_nic_flow_attr *nic_attr = attr->nic_attr;
1004 struct mlx5e_tc_table *tc = &priv->fs.tc;
1005 struct mlx5_flow_destination dest[2] = {};
1006 struct mlx5_flow_act flow_act = {
1007 .action = attr->action,
1008 .flags = FLOW_ACT_NO_APPEND,
1010 struct mlx5_flow_handle *rule;
1011 struct mlx5_flow_table *ft;
1014 flow_context->flags |= FLOW_CONTEXT_HAS_TAG;
1015 flow_context->flow_tag = nic_attr->flow_tag;
1017 if (attr->dest_ft) {
1018 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1019 dest[dest_ix].ft = attr->dest_ft;
1021 } else if (nic_attr->hairpin_ft) {
1022 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1023 dest[dest_ix].ft = nic_attr->hairpin_ft;
1025 } else if (nic_attr->hairpin_tirn) {
1026 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_TIR;
1027 dest[dest_ix].tir_num = nic_attr->hairpin_tirn;
1029 } else if (attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
1030 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1031 if (attr->dest_chain) {
1032 dest[dest_ix].ft = mlx5_chains_get_table(nic_chains,
1033 attr->dest_chain, 1,
1035 if (IS_ERR(dest[dest_ix].ft))
1036 return ERR_CAST(dest[dest_ix].ft);
1038 dest[dest_ix].ft = mlx5e_vlan_get_flowtable(priv->fs.vlan);
1043 if (dest[0].type == MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE &&
1044 MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
1045 flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1047 if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1048 dest[dest_ix].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
1049 dest[dest_ix].counter_id = mlx5_fc_id(attr->counter);
1053 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1054 flow_act.modify_hdr = attr->modify_hdr;
1056 mutex_lock(&tc->t_lock);
1057 if (IS_ERR_OR_NULL(tc->t)) {
1058 /* Create the root table here if doesn't exist yet */
1060 mlx5_chains_get_table(nic_chains, 0, 1, MLX5E_TC_FT_LEVEL);
1062 if (IS_ERR(tc->t)) {
1063 mutex_unlock(&tc->t_lock);
1064 netdev_err(priv->netdev,
1065 "Failed to create tc offload table\n");
1066 rule = ERR_CAST(priv->fs.tc.t);
1070 mutex_unlock(&tc->t_lock);
1072 if (attr->chain || attr->prio)
1073 ft = mlx5_chains_get_table(nic_chains,
1074 attr->chain, attr->prio,
1080 rule = ERR_CAST(ft);
1084 if (attr->outer_match_level != MLX5_MATCH_NONE)
1085 spec->match_criteria_enable |= MLX5_MATCH_OUTER_HEADERS;
1087 rule = mlx5_add_flow_rules(ft, spec,
1088 &flow_act, dest, dest_ix);
1095 if (attr->chain || attr->prio)
1096 mlx5_chains_put_table(nic_chains,
1097 attr->chain, attr->prio,
1100 if (attr->dest_chain)
1101 mlx5_chains_put_table(nic_chains,
1102 attr->dest_chain, 1,
1105 return ERR_CAST(rule);
1109 alloc_flow_attr_counter(struct mlx5_core_dev *counter_dev,
1110 struct mlx5_flow_attr *attr)
1113 struct mlx5_fc *counter;
1115 counter = mlx5_fc_create(counter_dev, true);
1116 if (IS_ERR(counter))
1117 return PTR_ERR(counter);
1119 attr->counter = counter;
1124 mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
1125 struct mlx5e_tc_flow *flow,
1126 struct netlink_ext_ack *extack)
1128 struct mlx5e_tc_flow_parse_attr *parse_attr;
1129 struct mlx5_flow_attr *attr = flow->attr;
1130 struct mlx5_core_dev *dev = priv->mdev;
1133 parse_attr = attr->parse_attr;
1135 if (flow_flag_test(flow, HAIRPIN)) {
1136 err = mlx5e_hairpin_flow_add(priv, flow, parse_attr, extack);
1141 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1142 err = alloc_flow_attr_counter(dev, attr);
1147 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1148 err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
1149 mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
1154 if (attr->flags & MLX5_ATTR_FLAG_CT)
1155 flow->rule[0] = mlx5_tc_ct_flow_offload(get_ct_priv(priv), &parse_attr->spec,
1156 attr, &parse_attr->mod_hdr_acts);
1158 flow->rule[0] = mlx5e_add_offloaded_nic_rule(priv, &parse_attr->spec,
1161 return PTR_ERR_OR_ZERO(flow->rule[0]);
1164 void mlx5e_del_offloaded_nic_rule(struct mlx5e_priv *priv,
1165 struct mlx5_flow_handle *rule,
1166 struct mlx5_flow_attr *attr)
1168 struct mlx5_fs_chains *nic_chains = mlx5e_nic_chains(priv);
1170 mlx5_del_flow_rules(rule);
1172 if (attr->chain || attr->prio)
1173 mlx5_chains_put_table(nic_chains, attr->chain, attr->prio,
1176 if (attr->dest_chain)
1177 mlx5_chains_put_table(nic_chains, attr->dest_chain, 1,
1181 static void mlx5e_tc_del_nic_flow(struct mlx5e_priv *priv,
1182 struct mlx5e_tc_flow *flow)
1184 struct mlx5_flow_attr *attr = flow->attr;
1185 struct mlx5e_tc_table *tc = &priv->fs.tc;
1187 flow_flag_clear(flow, OFFLOADED);
1189 if (attr->flags & MLX5_ATTR_FLAG_CT)
1190 mlx5_tc_ct_delete_flow(get_ct_priv(flow->priv), attr);
1191 else if (!IS_ERR_OR_NULL(flow->rule[0]))
1192 mlx5e_del_offloaded_nic_rule(priv, flow->rule[0], attr);
1194 /* Remove root table if no rules are left to avoid
1195 * extra steering hops.
1197 mutex_lock(&priv->fs.tc.t_lock);
1198 if (!mlx5e_tc_num_filters(priv, MLX5_TC_FLAG(NIC_OFFLOAD)) &&
1199 !IS_ERR_OR_NULL(tc->t)) {
1200 mlx5_chains_put_table(mlx5e_nic_chains(priv), 0, 1, MLX5E_TC_FT_LEVEL);
1201 priv->fs.tc.t = NULL;
1203 mutex_unlock(&priv->fs.tc.t_lock);
1205 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)
1206 mlx5e_detach_mod_hdr(priv, flow);
1208 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
1209 mlx5_fc_destroy(priv->mdev, attr->counter);
1211 if (flow_flag_test(flow, HAIRPIN))
1212 mlx5e_hairpin_flow_del(priv, flow);
1214 free_flow_post_acts(flow);
1216 kvfree(attr->parse_attr);
1220 struct mlx5_flow_handle *
1221 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
1222 struct mlx5e_tc_flow *flow,
1223 struct mlx5_flow_spec *spec,
1224 struct mlx5_flow_attr *attr)
1226 struct mlx5_flow_handle *rule;
1228 if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)
1229 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
1231 rule = mlx5e_tc_rule_offload(flow->priv, spec, attr);
1236 if (attr->esw_attr->split_count) {
1237 flow->rule[1] = mlx5_eswitch_add_fwd_rule(esw, spec, attr);
1238 if (IS_ERR(flow->rule[1]))
1245 mlx5e_tc_rule_unoffload(flow->priv, rule, attr);
1246 return flow->rule[1];
1249 void mlx5e_tc_unoffload_fdb_rules(struct mlx5_eswitch *esw,
1250 struct mlx5e_tc_flow *flow,
1251 struct mlx5_flow_attr *attr)
1253 flow_flag_clear(flow, OFFLOADED);
1255 if (attr->flags & MLX5_ATTR_FLAG_SLOW_PATH)
1256 return mlx5_eswitch_del_offloaded_rule(esw, flow->rule[0], attr);
1258 if (attr->esw_attr->split_count)
1259 mlx5_eswitch_del_fwd_rule(esw, flow->rule[1], attr);
1261 mlx5e_tc_rule_unoffload(flow->priv, flow->rule[0], attr);
1264 struct mlx5_flow_handle *
1265 mlx5e_tc_offload_to_slow_path(struct mlx5_eswitch *esw,
1266 struct mlx5e_tc_flow *flow,
1267 struct mlx5_flow_spec *spec)
1269 struct mlx5_flow_attr *slow_attr;
1270 struct mlx5_flow_handle *rule;
1272 slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1274 return ERR_PTR(-ENOMEM);
1276 memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1277 slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1278 slow_attr->esw_attr->split_count = 0;
1279 slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
1281 rule = mlx5e_tc_offload_fdb_rules(esw, flow, spec, slow_attr);
1283 flow_flag_set(flow, SLOW);
1290 void mlx5e_tc_unoffload_from_slow_path(struct mlx5_eswitch *esw,
1291 struct mlx5e_tc_flow *flow)
1293 struct mlx5_flow_attr *slow_attr;
1295 slow_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
1297 mlx5_core_warn(flow->priv->mdev, "Unable to alloc attr to unoffload slow path rule\n");
1301 memcpy(slow_attr, flow->attr, ESW_FLOW_ATTR_SZ);
1302 slow_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
1303 slow_attr->esw_attr->split_count = 0;
1304 slow_attr->flags |= MLX5_ATTR_FLAG_SLOW_PATH;
1305 mlx5e_tc_unoffload_fdb_rules(esw, flow, slow_attr);
1306 flow_flag_clear(flow, SLOW);
1310 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1313 static void unready_flow_add(struct mlx5e_tc_flow *flow,
1314 struct list_head *unready_flows)
1316 flow_flag_set(flow, NOT_READY);
1317 list_add_tail(&flow->unready, unready_flows);
1320 /* Caller must obtain uplink_priv->unready_flows_lock mutex before calling this
1323 static void unready_flow_del(struct mlx5e_tc_flow *flow)
1325 list_del(&flow->unready);
1326 flow_flag_clear(flow, NOT_READY);
1329 static void add_unready_flow(struct mlx5e_tc_flow *flow)
1331 struct mlx5_rep_uplink_priv *uplink_priv;
1332 struct mlx5e_rep_priv *rpriv;
1333 struct mlx5_eswitch *esw;
1335 esw = flow->priv->mdev->priv.eswitch;
1336 rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1337 uplink_priv = &rpriv->uplink_priv;
1339 mutex_lock(&uplink_priv->unready_flows_lock);
1340 unready_flow_add(flow, &uplink_priv->unready_flows);
1341 mutex_unlock(&uplink_priv->unready_flows_lock);
1344 static void remove_unready_flow(struct mlx5e_tc_flow *flow)
1346 struct mlx5_rep_uplink_priv *uplink_priv;
1347 struct mlx5e_rep_priv *rpriv;
1348 struct mlx5_eswitch *esw;
1350 esw = flow->priv->mdev->priv.eswitch;
1351 rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1352 uplink_priv = &rpriv->uplink_priv;
1354 mutex_lock(&uplink_priv->unready_flows_lock);
1355 unready_flow_del(flow);
1356 mutex_unlock(&uplink_priv->unready_flows_lock);
1359 bool mlx5e_tc_is_vf_tunnel(struct net_device *out_dev, struct net_device *route_dev)
1361 struct mlx5_core_dev *out_mdev, *route_mdev;
1362 struct mlx5e_priv *out_priv, *route_priv;
1364 out_priv = netdev_priv(out_dev);
1365 out_mdev = out_priv->mdev;
1366 route_priv = netdev_priv(route_dev);
1367 route_mdev = route_priv->mdev;
1369 if (out_mdev->coredev_type != MLX5_COREDEV_PF ||
1370 route_mdev->coredev_type != MLX5_COREDEV_VF)
1373 return mlx5e_same_hw_devs(out_priv, route_priv);
1376 int mlx5e_tc_query_route_vport(struct net_device *out_dev, struct net_device *route_dev, u16 *vport)
1378 struct mlx5e_priv *out_priv, *route_priv;
1379 struct mlx5_devcom *devcom = NULL;
1380 struct mlx5_core_dev *route_mdev;
1381 struct mlx5_eswitch *esw;
1385 out_priv = netdev_priv(out_dev);
1386 esw = out_priv->mdev->priv.eswitch;
1387 route_priv = netdev_priv(route_dev);
1388 route_mdev = route_priv->mdev;
1390 vhca_id = MLX5_CAP_GEN(route_mdev, vhca_id);
1391 if (mlx5_lag_is_active(out_priv->mdev)) {
1392 /* In lag case we may get devices from different eswitch instances.
1393 * If we failed to get vport num, it means, mostly, that we on the wrong
1396 err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
1400 devcom = out_priv->mdev->priv.devcom;
1401 esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1406 err = mlx5_eswitch_vhca_id_to_vport(esw, vhca_id, vport);
1408 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1412 int mlx5e_tc_add_flow_mod_hdr(struct mlx5e_priv *priv,
1413 struct mlx5e_tc_flow *flow,
1414 struct mlx5_flow_attr *attr)
1416 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
1417 struct mlx5_modify_hdr *mod_hdr;
1419 mod_hdr = mlx5_modify_header_alloc(priv->mdev,
1420 mlx5e_get_flow_namespace(flow),
1421 mod_hdr_acts->num_actions,
1422 mod_hdr_acts->actions);
1423 if (IS_ERR(mod_hdr))
1424 return PTR_ERR(mod_hdr);
1426 WARN_ON(attr->modify_hdr);
1427 attr->modify_hdr = mod_hdr;
1433 set_encap_dests(struct mlx5e_priv *priv,
1434 struct mlx5e_tc_flow *flow,
1435 struct mlx5_flow_attr *attr,
1436 struct netlink_ext_ack *extack,
1440 struct mlx5e_tc_flow_parse_attr *parse_attr;
1441 struct mlx5_esw_flow_attr *esw_attr;
1442 struct net_device *encap_dev = NULL;
1443 struct mlx5e_rep_priv *rpriv;
1444 struct mlx5e_priv *out_priv;
1448 if (!mlx5e_is_eswitch_flow(flow))
1451 parse_attr = attr->parse_attr;
1452 esw_attr = attr->esw_attr;
1454 *encap_valid = true;
1456 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1457 struct net_device *out_dev;
1460 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1463 mirred_ifindex = parse_attr->mirred_ifindex[out_index];
1464 out_dev = dev_get_by_index(dev_net(priv->netdev), mirred_ifindex);
1466 NL_SET_ERR_MSG_MOD(extack, "Requested mirred device not found");
1470 err = mlx5e_attach_encap(priv, flow, attr, out_dev, out_index,
1471 extack, &encap_dev, encap_valid);
1476 if (esw_attr->dests[out_index].flags &
1477 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
1478 !esw_attr->dest_int_port)
1481 out_priv = netdev_priv(encap_dev);
1482 rpriv = out_priv->ppriv;
1483 esw_attr->dests[out_index].rep = rpriv->rep;
1484 esw_attr->dests[out_index].mdev = out_priv->mdev;
1487 if (*vf_tun && esw_attr->out_count > 1) {
1488 NL_SET_ERR_MSG_MOD(extack, "VF tunnel encap with mirroring is not supported");
1498 clean_encap_dests(struct mlx5e_priv *priv,
1499 struct mlx5e_tc_flow *flow,
1500 struct mlx5_flow_attr *attr,
1503 struct mlx5_esw_flow_attr *esw_attr;
1506 if (!mlx5e_is_eswitch_flow(flow))
1509 esw_attr = attr->esw_attr;
1512 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
1513 if (!(esw_attr->dests[out_index].flags & MLX5_ESW_DEST_ENCAP))
1516 if (esw_attr->dests[out_index].flags &
1517 MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE &&
1518 !esw_attr->dest_int_port)
1521 mlx5e_detach_encap(priv, flow, attr, out_index);
1522 kfree(attr->parse_attr->tun_info[out_index]);
1527 mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
1528 struct mlx5e_tc_flow *flow,
1529 struct netlink_ext_ack *extack)
1531 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1532 struct mlx5e_tc_flow_parse_attr *parse_attr;
1533 struct mlx5_flow_attr *attr = flow->attr;
1534 struct mlx5_esw_flow_attr *esw_attr;
1535 bool vf_tun, encap_valid;
1536 u32 max_prio, max_chain;
1539 parse_attr = attr->parse_attr;
1540 esw_attr = attr->esw_attr;
1542 /* We check chain range only for tc flows.
1543 * For ft flows, we checked attr->chain was originally 0 and set it to
1544 * FDB_FT_CHAIN which is outside tc range.
1545 * See mlx5e_rep_setup_ft_cb().
1547 max_chain = mlx5_chains_get_chain_range(esw_chains(esw));
1548 if (!mlx5e_is_ft_flow(flow) && attr->chain > max_chain) {
1549 NL_SET_ERR_MSG_MOD(extack,
1550 "Requested chain is out of supported range");
1555 max_prio = mlx5_chains_get_prio_range(esw_chains(esw));
1556 if (attr->prio > max_prio) {
1557 NL_SET_ERR_MSG_MOD(extack,
1558 "Requested priority is out of supported range");
1563 if (flow_flag_test(flow, TUN_RX)) {
1564 err = mlx5e_attach_decap_route(priv, flow);
1568 if (!attr->chain && esw_attr->int_port &&
1569 attr->action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) {
1570 /* If decap route device is internal port, change the
1571 * source vport value in reg_c0 back to uplink just in
1572 * case the rule performs goto chain > 0. If we have a miss
1573 * on chain > 0 we want the metadata regs to hold the
1574 * chain id so SW will resume handling of this packet
1575 * from the proper chain.
1577 u32 metadata = mlx5_eswitch_get_vport_metadata_for_set(esw,
1578 esw_attr->in_rep->vport);
1580 err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts,
1581 MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG,
1586 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1590 if (flow_flag_test(flow, L3_TO_L2_DECAP)) {
1591 err = mlx5e_attach_decap(priv, flow, extack);
1596 if (netif_is_ovs_master(parse_attr->filter_dev)) {
1597 struct mlx5e_tc_int_port *int_port;
1600 NL_SET_ERR_MSG_MOD(extack,
1601 "Internal port rule is only supported on chain 0");
1606 if (attr->dest_chain) {
1607 NL_SET_ERR_MSG_MOD(extack,
1608 "Internal port rule offload doesn't support goto action");
1613 int_port = mlx5e_tc_int_port_get(mlx5e_get_int_port_priv(priv),
1614 parse_attr->filter_dev->ifindex,
1615 flow_flag_test(flow, EGRESS) ?
1616 MLX5E_TC_INT_PORT_EGRESS :
1617 MLX5E_TC_INT_PORT_INGRESS);
1618 if (IS_ERR(int_port)) {
1619 err = PTR_ERR(int_port);
1623 esw_attr->int_port = int_port;
1626 err = set_encap_dests(priv, flow, attr, extack, &encap_valid, &vf_tun);
1630 err = mlx5_eswitch_add_vlan_action(esw, attr);
1634 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1636 err = mlx5e_tc_add_flow_mod_hdr(priv, flow, attr);
1640 err = mlx5e_attach_mod_hdr(priv, flow, parse_attr);
1646 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
1647 err = alloc_flow_attr_counter(esw_attr->counter_dev, attr);
1652 /* we get here if one of the following takes place:
1653 * (1) there's no error
1654 * (2) there's an encap action and we don't have valid neigh
1656 if (!encap_valid || flow_flag_test(flow, SLOW))
1657 flow->rule[0] = mlx5e_tc_offload_to_slow_path(esw, flow, &parse_attr->spec);
1659 flow->rule[0] = mlx5e_tc_offload_fdb_rules(esw, flow, &parse_attr->spec, attr);
1661 if (IS_ERR(flow->rule[0])) {
1662 err = PTR_ERR(flow->rule[0]);
1665 flow_flag_set(flow, OFFLOADED);
1670 flow_flag_set(flow, FAILED);
1674 static bool mlx5_flow_has_geneve_opt(struct mlx5e_tc_flow *flow)
1676 struct mlx5_flow_spec *spec = &flow->attr->parse_attr->spec;
1677 void *headers_v = MLX5_ADDR_OF(fte_match_param,
1680 u32 geneve_tlv_opt_0_data = MLX5_GET(fte_match_set_misc3,
1682 geneve_tlv_option_0_data);
1684 return !!geneve_tlv_opt_0_data;
1687 static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
1688 struct mlx5e_tc_flow *flow)
1690 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
1691 struct mlx5_flow_attr *attr = flow->attr;
1692 struct mlx5_esw_flow_attr *esw_attr;
1695 esw_attr = attr->esw_attr;
1696 mlx5e_put_flow_tunnel_id(flow);
1698 if (flow_flag_test(flow, NOT_READY))
1699 remove_unready_flow(flow);
1701 if (mlx5e_is_offloaded_flow(flow)) {
1702 if (flow_flag_test(flow, SLOW))
1703 mlx5e_tc_unoffload_from_slow_path(esw, flow);
1705 mlx5e_tc_unoffload_fdb_rules(esw, flow, attr);
1707 complete_all(&flow->del_hw_done);
1709 if (mlx5_flow_has_geneve_opt(flow))
1710 mlx5_geneve_tlv_option_del(priv->mdev->geneve);
1712 mlx5_eswitch_del_vlan_action(esw, attr);
1714 if (flow->decap_route)
1715 mlx5e_detach_decap_route(priv, flow);
1717 clean_encap_dests(priv, flow, attr, &vf_tun);
1719 mlx5_tc_ct_match_del(get_ct_priv(priv), &flow->attr->ct_attr);
1721 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
1722 mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
1723 if (vf_tun && attr->modify_hdr)
1724 mlx5_modify_header_dealloc(priv->mdev, attr->modify_hdr);
1726 mlx5e_detach_mod_hdr(priv, flow);
1729 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
1730 mlx5_fc_destroy(esw_attr->counter_dev, attr->counter);
1732 if (esw_attr->int_port)
1733 mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->int_port);
1735 if (esw_attr->dest_int_port)
1736 mlx5e_tc_int_port_put(mlx5e_get_int_port_priv(priv), esw_attr->dest_int_port);
1738 if (flow_flag_test(flow, L3_TO_L2_DECAP))
1739 mlx5e_detach_decap(priv, flow);
1741 free_flow_post_acts(flow);
1743 if (flow->attr->lag.count)
1744 mlx5_lag_del_mpesw_rule(esw->dev);
1746 kvfree(attr->esw_attr->rx_tun_attr);
1747 kvfree(attr->parse_attr);
1751 struct mlx5_fc *mlx5e_tc_get_counter(struct mlx5e_tc_flow *flow)
1753 struct mlx5_flow_attr *attr;
1755 attr = list_first_entry(&flow->attrs, struct mlx5_flow_attr, list);
1756 return attr->counter;
1759 /* Iterate over tmp_list of flows attached to flow_list head. */
1760 void mlx5e_put_flow_list(struct mlx5e_priv *priv, struct list_head *flow_list)
1762 struct mlx5e_tc_flow *flow, *tmp;
1764 list_for_each_entry_safe(flow, tmp, flow_list, tmp_list)
1765 mlx5e_flow_put(priv, flow);
1768 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1770 struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
1772 if (!flow_flag_test(flow, ESWITCH) ||
1773 !flow_flag_test(flow, DUP))
1776 mutex_lock(&esw->offloads.peer_mutex);
1777 list_del(&flow->peer);
1778 mutex_unlock(&esw->offloads.peer_mutex);
1780 flow_flag_clear(flow, DUP);
1782 if (refcount_dec_and_test(&flow->peer_flow->refcnt)) {
1783 mlx5e_tc_del_fdb_flow(flow->peer_flow->priv, flow->peer_flow);
1784 kfree(flow->peer_flow);
1787 flow->peer_flow = NULL;
1790 static void mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
1792 struct mlx5_core_dev *dev = flow->priv->mdev;
1793 struct mlx5_devcom *devcom = dev->priv.devcom;
1794 struct mlx5_eswitch *peer_esw;
1796 peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1800 __mlx5e_tc_del_fdb_peer_flow(flow);
1801 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
1804 static void mlx5e_tc_del_flow(struct mlx5e_priv *priv,
1805 struct mlx5e_tc_flow *flow)
1807 if (mlx5e_is_eswitch_flow(flow)) {
1808 mlx5e_tc_del_fdb_peer_flow(flow);
1809 mlx5e_tc_del_fdb_flow(priv, flow);
1811 mlx5e_tc_del_nic_flow(priv, flow);
1815 static bool flow_requires_tunnel_mapping(u32 chain, struct flow_cls_offload *f)
1817 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1818 struct flow_action *flow_action = &rule->action;
1819 const struct flow_action_entry *act;
1825 flow_action_for_each(i, act, flow_action) {
1827 case FLOW_ACTION_GOTO:
1829 case FLOW_ACTION_SAMPLE:
1840 enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv,
1841 struct flow_dissector_key_enc_opts *opts,
1842 struct netlink_ext_ack *extack,
1845 struct geneve_opt *opt;
1850 while (opts->len > off) {
1851 opt = (struct geneve_opt *)&opts->data[off];
1853 if (!(*dont_care) || opt->opt_class || opt->type ||
1854 memchr_inv(opt->opt_data, 0, opt->length * 4)) {
1857 if (opt->opt_class != htons(U16_MAX) ||
1858 opt->type != U8_MAX) {
1859 NL_SET_ERR_MSG_MOD(extack,
1860 "Partial match of tunnel options in chain > 0 isn't supported");
1861 netdev_warn(priv->netdev,
1862 "Partial match of tunnel options in chain > 0 isn't supported");
1867 off += sizeof(struct geneve_opt) + opt->length * 4;
1873 #define COPY_DISSECTOR(rule, diss_key, dst)\
1875 struct flow_rule *__rule = (rule);\
1876 typeof(dst) __dst = dst;\
1879 skb_flow_dissector_target(__rule->match.dissector,\
1881 __rule->match.key),\
1885 static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
1886 struct mlx5e_tc_flow *flow,
1887 struct flow_cls_offload *f,
1888 struct net_device *filter_dev)
1890 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1891 struct netlink_ext_ack *extack = f->common.extack;
1892 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
1893 struct flow_match_enc_opts enc_opts_match;
1894 struct tunnel_match_enc_opts tun_enc_opts;
1895 struct mlx5_rep_uplink_priv *uplink_priv;
1896 struct mlx5_flow_attr *attr = flow->attr;
1897 struct mlx5e_rep_priv *uplink_rpriv;
1898 struct tunnel_match_key tunnel_key;
1899 bool enc_opts_is_dont_care = true;
1900 u32 tun_id, enc_opts_id = 0;
1901 struct mlx5_eswitch *esw;
1905 esw = priv->mdev->priv.eswitch;
1906 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1907 uplink_priv = &uplink_rpriv->uplink_priv;
1909 memset(&tunnel_key, 0, sizeof(tunnel_key));
1910 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL,
1911 &tunnel_key.enc_control);
1912 if (tunnel_key.enc_control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS)
1913 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS,
1914 &tunnel_key.enc_ipv4);
1916 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS,
1917 &tunnel_key.enc_ipv6);
1918 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_IP, &tunnel_key.enc_ip);
1919 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_PORTS,
1920 &tunnel_key.enc_tp);
1921 COPY_DISSECTOR(rule, FLOW_DISSECTOR_KEY_ENC_KEYID,
1922 &tunnel_key.enc_key_id);
1923 tunnel_key.filter_ifindex = filter_dev->ifindex;
1925 err = mapping_add(uplink_priv->tunnel_mapping, &tunnel_key, &tun_id);
1929 flow_rule_match_enc_opts(rule, &enc_opts_match);
1930 err = enc_opts_is_dont_care_or_full_match(priv,
1931 enc_opts_match.mask,
1933 &enc_opts_is_dont_care);
1937 if (!enc_opts_is_dont_care) {
1938 memset(&tun_enc_opts, 0, sizeof(tun_enc_opts));
1939 memcpy(&tun_enc_opts.key, enc_opts_match.key,
1940 sizeof(*enc_opts_match.key));
1941 memcpy(&tun_enc_opts.mask, enc_opts_match.mask,
1942 sizeof(*enc_opts_match.mask));
1944 err = mapping_add(uplink_priv->tunnel_enc_opts_mapping,
1945 &tun_enc_opts, &enc_opts_id);
1950 value = tun_id << ENC_OPTS_BITS | enc_opts_id;
1951 mask = enc_opts_id ? TUNNEL_ID_MASK :
1952 (TUNNEL_ID_MASK & ~ENC_OPTS_BITS_MASK);
1955 mlx5e_tc_match_to_reg_match(&attr->parse_attr->spec,
1956 TUNNEL_TO_REG, value, mask);
1958 mod_hdr_acts = &attr->parse_attr->mod_hdr_acts;
1959 err = mlx5e_tc_match_to_reg_set(priv->mdev,
1960 mod_hdr_acts, MLX5_FLOW_NAMESPACE_FDB,
1961 TUNNEL_TO_REG, value);
1965 attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1968 flow->attr->tunnel_id = value;
1973 mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
1976 mapping_remove(uplink_priv->tunnel_mapping, tun_id);
1980 static void mlx5e_put_flow_tunnel_id(struct mlx5e_tc_flow *flow)
1982 u32 enc_opts_id = flow->attr->tunnel_id & ENC_OPTS_BITS_MASK;
1983 u32 tun_id = flow->attr->tunnel_id >> ENC_OPTS_BITS;
1984 struct mlx5_rep_uplink_priv *uplink_priv;
1985 struct mlx5e_rep_priv *uplink_rpriv;
1986 struct mlx5_eswitch *esw;
1988 esw = flow->priv->mdev->priv.eswitch;
1989 uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
1990 uplink_priv = &uplink_rpriv->uplink_priv;
1993 mapping_remove(uplink_priv->tunnel_mapping, tun_id);
1995 mapping_remove(uplink_priv->tunnel_enc_opts_mapping,
1999 void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
2000 struct flow_match_basic *match, bool outer,
2001 void *headers_c, void *headers_v)
2003 bool ip_version_cap;
2005 ip_version_cap = outer ?
2006 MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2007 ft_field_support.outer_ip_version) :
2008 MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
2009 ft_field_support.inner_ip_version);
2011 if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) &&
2012 (match->key->n_proto == htons(ETH_P_IP) ||
2013 match->key->n_proto == htons(ETH_P_IPV6))) {
2014 MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version);
2015 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version,
2016 match->key->n_proto == htons(ETH_P_IP) ? 4 : 6);
2018 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
2019 ntohs(match->mask->n_proto));
2020 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
2021 ntohs(match->key->n_proto));
2025 u8 mlx5e_tc_get_ip_version(struct mlx5_flow_spec *spec, bool outer)
2032 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, outer_headers);
2034 headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value, inner_headers);
2036 ip_version = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_version);
2037 /* Return ip_version converted from ethertype anyway */
2039 ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
2040 if (ethertype == ETH_P_IP || ethertype == ETH_P_ARP)
2042 else if (ethertype == ETH_P_IPV6)
2048 /* Tunnel device follows RFC 6040, see include/net/inet_ecn.h.
2049 * And changes inner ip_ecn depending on inner and outer ip_ecn as follows:
2050 * +---------+----------------------------------------+
2051 * |Arriving | Arriving Outer Header |
2052 * | Inner +---------+---------+---------+----------+
2053 * | Header | Not-ECT | ECT(0) | ECT(1) | CE |
2054 * +---------+---------+---------+---------+----------+
2055 * | Not-ECT | Not-ECT | Not-ECT | Not-ECT | <drop> |
2056 * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE* |
2057 * | ECT(1) | ECT(1) | ECT(1) | ECT(1)* | CE* |
2058 * | CE | CE | CE | CE | CE |
2059 * +---------+---------+---------+---------+----------+
2061 * Tc matches on inner after decapsulation on tunnel device, but hw offload matches
2062 * the inner ip_ecn value before hardware decap action.
2064 * Cells marked are changed from original inner packet ip_ecn value during decap, and
2065 * so matching those values on inner ip_ecn before decap will fail.
2067 * The following helper allows offload when inner ip_ecn won't be changed by outer ip_ecn,
2068 * except for the outer ip_ecn = CE, where in all cases inner ip_ecn will be changed to CE,
2069 * and such we can drop the inner ip_ecn=CE match.
2072 static int mlx5e_tc_verify_tunnel_ecn(struct mlx5e_priv *priv,
2073 struct flow_cls_offload *f,
2074 bool *match_inner_ecn)
2076 u8 outer_ecn_mask = 0, outer_ecn_key = 0, inner_ecn_mask = 0, inner_ecn_key = 0;
2077 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2078 struct netlink_ext_ack *extack = f->common.extack;
2079 struct flow_match_ip match;
2081 *match_inner_ecn = true;
2083 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) {
2084 flow_rule_match_enc_ip(rule, &match);
2085 outer_ecn_key = match.key->tos & INET_ECN_MASK;
2086 outer_ecn_mask = match.mask->tos & INET_ECN_MASK;
2089 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
2090 flow_rule_match_ip(rule, &match);
2091 inner_ecn_key = match.key->tos & INET_ECN_MASK;
2092 inner_ecn_mask = match.mask->tos & INET_ECN_MASK;
2095 if (outer_ecn_mask != 0 && outer_ecn_mask != INET_ECN_MASK) {
2096 NL_SET_ERR_MSG_MOD(extack, "Partial match on enc_tos ecn bits isn't supported");
2097 netdev_warn(priv->netdev, "Partial match on enc_tos ecn bits isn't supported");
2101 if (!outer_ecn_mask) {
2102 if (!inner_ecn_mask)
2105 NL_SET_ERR_MSG_MOD(extack,
2106 "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
2107 netdev_warn(priv->netdev,
2108 "Matching on tos ecn bits without also matching enc_tos ecn bits isn't supported");
2112 if (inner_ecn_mask && inner_ecn_mask != INET_ECN_MASK) {
2113 NL_SET_ERR_MSG_MOD(extack,
2114 "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
2115 netdev_warn(priv->netdev,
2116 "Partial match on tos ecn bits with match on enc_tos ecn bits isn't supported");
2120 if (!inner_ecn_mask)
2123 /* Both inner and outer have full mask on ecn */
2125 if (outer_ecn_key == INET_ECN_ECT_1) {
2126 /* inner ecn might change by DECAP action */
2128 NL_SET_ERR_MSG_MOD(extack, "Match on enc_tos ecn = ECT(1) isn't supported");
2129 netdev_warn(priv->netdev, "Match on enc_tos ecn = ECT(1) isn't supported");
2133 if (outer_ecn_key != INET_ECN_CE)
2136 if (inner_ecn_key != INET_ECN_CE) {
2137 /* Can't happen in software, as packet ecn will be changed to CE after decap */
2138 NL_SET_ERR_MSG_MOD(extack,
2139 "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
2140 netdev_warn(priv->netdev,
2141 "Match on tos enc_tos ecn = CE while match on tos ecn != CE isn't supported");
2145 /* outer ecn = CE, inner ecn = CE, as decap will change inner ecn to CE in anycase,
2146 * drop match on inner ecn
2148 *match_inner_ecn = false;
2153 static int parse_tunnel_attr(struct mlx5e_priv *priv,
2154 struct mlx5e_tc_flow *flow,
2155 struct mlx5_flow_spec *spec,
2156 struct flow_cls_offload *f,
2157 struct net_device *filter_dev,
2161 struct mlx5e_tc_tunnel *tunnel = mlx5e_get_tc_tun(filter_dev);
2162 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2163 struct netlink_ext_ack *extack = f->common.extack;
2164 bool needs_mapping, sets_mapping;
2167 if (!mlx5e_is_eswitch_flow(flow)) {
2168 NL_SET_ERR_MSG_MOD(extack, "Match on tunnel is not supported");
2172 needs_mapping = !!flow->attr->chain;
2173 sets_mapping = flow_requires_tunnel_mapping(flow->attr->chain, f);
2174 *match_inner = !needs_mapping;
2176 if ((needs_mapping || sets_mapping) &&
2177 !mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2178 NL_SET_ERR_MSG_MOD(extack,
2179 "Chains on tunnel devices isn't supported without register loopback support");
2180 netdev_warn(priv->netdev,
2181 "Chains on tunnel devices isn't supported without register loopback support");
2185 if (!flow->attr->chain) {
2186 err = mlx5e_tc_tun_parse(filter_dev, priv, spec, f,
2189 NL_SET_ERR_MSG_MOD(extack,
2190 "Failed to parse tunnel attributes");
2191 netdev_warn(priv->netdev,
2192 "Failed to parse tunnel attributes");
2196 /* With mpls over udp we decapsulate using packet reformat
2199 if (!netif_is_bareudp(filter_dev))
2200 flow->attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
2201 err = mlx5e_tc_set_attr_rx_tun(flow, spec);
2204 } else if (tunnel && tunnel->tunnel_type == MLX5E_TC_TUNNEL_TYPE_VXLAN) {
2205 struct mlx5_flow_spec *tmp_spec;
2207 tmp_spec = kvzalloc(sizeof(*tmp_spec), GFP_KERNEL);
2209 NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory for vxlan tmp spec");
2210 netdev_warn(priv->netdev, "Failed to allocate memory for vxlan tmp spec");
2213 memcpy(tmp_spec, spec, sizeof(*tmp_spec));
2215 err = mlx5e_tc_tun_parse(filter_dev, priv, tmp_spec, f, match_level);
2218 NL_SET_ERR_MSG_MOD(extack, "Failed to parse tunnel attributes");
2219 netdev_warn(priv->netdev, "Failed to parse tunnel attributes");
2222 err = mlx5e_tc_set_attr_rx_tun(flow, tmp_spec);
2228 if (!needs_mapping && !sets_mapping)
2231 return mlx5e_get_flow_tunnel_id(priv, flow, f, filter_dev);
2234 static void *get_match_inner_headers_criteria(struct mlx5_flow_spec *spec)
2236 return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2240 static void *get_match_inner_headers_value(struct mlx5_flow_spec *spec)
2242 return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2246 static void *get_match_outer_headers_criteria(struct mlx5_flow_spec *spec)
2248 return MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2252 static void *get_match_outer_headers_value(struct mlx5_flow_spec *spec)
2254 return MLX5_ADDR_OF(fte_match_param, spec->match_value,
2258 void *mlx5e_get_match_headers_value(u32 flags, struct mlx5_flow_spec *spec)
2260 return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2261 get_match_inner_headers_value(spec) :
2262 get_match_outer_headers_value(spec);
2265 void *mlx5e_get_match_headers_criteria(u32 flags, struct mlx5_flow_spec *spec)
2267 return (flags & MLX5_FLOW_CONTEXT_ACTION_DECAP) ?
2268 get_match_inner_headers_criteria(spec) :
2269 get_match_outer_headers_criteria(spec);
2272 static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
2273 struct flow_cls_offload *f)
2275 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2276 struct netlink_ext_ack *extack = f->common.extack;
2277 struct net_device *ingress_dev;
2278 struct flow_match_meta match;
2280 if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META))
2283 flow_rule_match_meta(rule, &match);
2284 if (!match.mask->ingress_ifindex)
2287 if (match.mask->ingress_ifindex != 0xFFFFFFFF) {
2288 NL_SET_ERR_MSG_MOD(extack, "Unsupported ingress ifindex mask");
2292 ingress_dev = __dev_get_by_index(dev_net(filter_dev),
2293 match.key->ingress_ifindex);
2295 NL_SET_ERR_MSG_MOD(extack,
2296 "Can't find the ingress port to match on");
2300 if (ingress_dev != filter_dev) {
2301 NL_SET_ERR_MSG_MOD(extack,
2302 "Can't match on the ingress filter port");
2309 static bool skip_key_basic(struct net_device *filter_dev,
2310 struct flow_cls_offload *f)
2312 /* When doing mpls over udp decap, the user needs to provide
2313 * MPLS_UC as the protocol in order to be able to match on mpls
2314 * label fields. However, the actual ethertype is IP so we want to
2315 * avoid matching on this, otherwise we'll fail the match.
2317 if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0)
2323 static int __parse_cls_flower(struct mlx5e_priv *priv,
2324 struct mlx5e_tc_flow *flow,
2325 struct mlx5_flow_spec *spec,
2326 struct flow_cls_offload *f,
2327 struct net_device *filter_dev,
2328 u8 *inner_match_level, u8 *outer_match_level)
2330 struct netlink_ext_ack *extack = f->common.extack;
2331 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2333 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2335 void *misc_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2337 void *misc_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2339 void *misc_c_3 = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
2341 void *misc_v_3 = MLX5_ADDR_OF(fte_match_param, spec->match_value,
2343 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
2344 struct flow_dissector *dissector = rule->match.dissector;
2345 enum fs_flow_table_type fs_type;
2346 bool match_inner_ecn = true;
2352 fs_type = mlx5e_is_eswitch_flow(flow) ? FS_FT_FDB : FS_FT_NIC_RX;
2353 match_level = outer_match_level;
2355 if (dissector->used_keys &
2356 ~(BIT(FLOW_DISSECTOR_KEY_META) |
2357 BIT(FLOW_DISSECTOR_KEY_CONTROL) |
2358 BIT(FLOW_DISSECTOR_KEY_BASIC) |
2359 BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) |
2360 BIT(FLOW_DISSECTOR_KEY_VLAN) |
2361 BIT(FLOW_DISSECTOR_KEY_CVLAN) |
2362 BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) |
2363 BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) |
2364 BIT(FLOW_DISSECTOR_KEY_PORTS) |
2365 BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) |
2366 BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) |
2367 BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) |
2368 BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) |
2369 BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) |
2370 BIT(FLOW_DISSECTOR_KEY_TCP) |
2371 BIT(FLOW_DISSECTOR_KEY_IP) |
2372 BIT(FLOW_DISSECTOR_KEY_CT) |
2373 BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
2374 BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) |
2375 BIT(FLOW_DISSECTOR_KEY_ICMP) |
2376 BIT(FLOW_DISSECTOR_KEY_MPLS))) {
2377 NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
2378 netdev_dbg(priv->netdev, "Unsupported key used: 0x%x\n",
2379 dissector->used_keys);
2383 if (mlx5e_get_tc_tun(filter_dev)) {
2384 bool match_inner = false;
2386 err = parse_tunnel_attr(priv, flow, spec, f, filter_dev,
2387 outer_match_level, &match_inner);
2392 /* header pointers should point to the inner headers
2393 * if the packet was decapsulated already.
2394 * outer headers are set by parse_tunnel_attr.
2396 match_level = inner_match_level;
2397 headers_c = get_match_inner_headers_criteria(spec);
2398 headers_v = get_match_inner_headers_value(spec);
2401 err = mlx5e_tc_verify_tunnel_ecn(priv, f, &match_inner_ecn);
2406 err = mlx5e_flower_parse_meta(filter_dev, f);
2410 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) &&
2411 !skip_key_basic(filter_dev, f)) {
2412 struct flow_match_basic match;
2414 flow_rule_match_basic(rule, &match);
2415 mlx5e_tc_set_ethertype(priv->mdev, &match,
2416 match_level == outer_match_level,
2417 headers_c, headers_v);
2419 if (match.mask->n_proto)
2420 *match_level = MLX5_MATCH_L2;
2422 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) ||
2423 is_vlan_dev(filter_dev)) {
2424 struct flow_dissector_key_vlan filter_dev_mask;
2425 struct flow_dissector_key_vlan filter_dev_key;
2426 struct flow_match_vlan match;
2428 if (is_vlan_dev(filter_dev)) {
2429 match.key = &filter_dev_key;
2430 match.key->vlan_id = vlan_dev_vlan_id(filter_dev);
2431 match.key->vlan_tpid = vlan_dev_vlan_proto(filter_dev);
2432 match.key->vlan_priority = 0;
2433 match.mask = &filter_dev_mask;
2434 memset(match.mask, 0xff, sizeof(*match.mask));
2435 match.mask->vlan_priority = 0;
2437 flow_rule_match_vlan(rule, &match);
2439 if (match.mask->vlan_id ||
2440 match.mask->vlan_priority ||
2441 match.mask->vlan_tpid) {
2442 if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2443 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2445 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2448 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2450 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2454 MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_vid,
2455 match.mask->vlan_id);
2456 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_vid,
2457 match.key->vlan_id);
2459 MLX5_SET(fte_match_set_lyr_2_4, headers_c, first_prio,
2460 match.mask->vlan_priority);
2461 MLX5_SET(fte_match_set_lyr_2_4, headers_v, first_prio,
2462 match.key->vlan_priority);
2464 *match_level = MLX5_MATCH_L2;
2466 if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN) &&
2467 match.mask->vlan_eth_type &&
2468 MLX5_CAP_FLOWTABLE_TYPE(priv->mdev,
2469 ft_field_support.outer_second_vid,
2471 MLX5_SET(fte_match_set_misc, misc_c,
2472 outer_second_cvlan_tag, 1);
2473 spec->match_criteria_enable |=
2474 MLX5_MATCH_MISC_PARAMETERS;
2477 } else if (*match_level != MLX5_MATCH_NONE) {
2478 /* cvlan_tag enabled in match criteria and
2479 * disabled in match value means both S & C tags
2480 * don't exist (untagged of both)
2482 MLX5_SET(fte_match_set_lyr_2_4, headers_c, cvlan_tag, 1);
2483 *match_level = MLX5_MATCH_L2;
2486 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CVLAN)) {
2487 struct flow_match_vlan match;
2489 flow_rule_match_cvlan(rule, &match);
2490 if (match.mask->vlan_id ||
2491 match.mask->vlan_priority ||
2492 match.mask->vlan_tpid) {
2493 if (!MLX5_CAP_FLOWTABLE_TYPE(priv->mdev, ft_field_support.outer_second_vid,
2495 NL_SET_ERR_MSG_MOD(extack,
2496 "Matching on CVLAN is not supported");
2500 if (match.key->vlan_tpid == htons(ETH_P_8021AD)) {
2501 MLX5_SET(fte_match_set_misc, misc_c,
2502 outer_second_svlan_tag, 1);
2503 MLX5_SET(fte_match_set_misc, misc_v,
2504 outer_second_svlan_tag, 1);
2506 MLX5_SET(fte_match_set_misc, misc_c,
2507 outer_second_cvlan_tag, 1);
2508 MLX5_SET(fte_match_set_misc, misc_v,
2509 outer_second_cvlan_tag, 1);
2512 MLX5_SET(fte_match_set_misc, misc_c, outer_second_vid,
2513 match.mask->vlan_id);
2514 MLX5_SET(fte_match_set_misc, misc_v, outer_second_vid,
2515 match.key->vlan_id);
2516 MLX5_SET(fte_match_set_misc, misc_c, outer_second_prio,
2517 match.mask->vlan_priority);
2518 MLX5_SET(fte_match_set_misc, misc_v, outer_second_prio,
2519 match.key->vlan_priority);
2521 *match_level = MLX5_MATCH_L2;
2522 spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS;
2526 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
2527 struct flow_match_eth_addrs match;
2529 flow_rule_match_eth_addrs(rule, &match);
2530 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2533 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2537 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2540 ether_addr_copy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2544 if (!is_zero_ether_addr(match.mask->src) ||
2545 !is_zero_ether_addr(match.mask->dst))
2546 *match_level = MLX5_MATCH_L2;
2549 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
2550 struct flow_match_control match;
2552 flow_rule_match_control(rule, &match);
2553 addr_type = match.key->addr_type;
2555 /* the HW doesn't support frag first/later */
2556 if (match.mask->flags & FLOW_DIS_FIRST_FRAG) {
2557 NL_SET_ERR_MSG_MOD(extack, "Match on frag first/later is not supported");
2561 if (match.mask->flags & FLOW_DIS_IS_FRAGMENT) {
2562 MLX5_SET(fte_match_set_lyr_2_4, headers_c, frag, 1);
2563 MLX5_SET(fte_match_set_lyr_2_4, headers_v, frag,
2564 match.key->flags & FLOW_DIS_IS_FRAGMENT);
2566 /* the HW doesn't need L3 inline to match on frag=no */
2567 if (!(match.key->flags & FLOW_DIS_IS_FRAGMENT))
2568 *match_level = MLX5_MATCH_L2;
2569 /* *** L2 attributes parsing up to here *** */
2571 *match_level = MLX5_MATCH_L3;
2575 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
2576 struct flow_match_basic match;
2578 flow_rule_match_basic(rule, &match);
2579 ip_proto = match.key->ip_proto;
2581 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
2582 match.mask->ip_proto);
2583 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
2584 match.key->ip_proto);
2586 if (match.mask->ip_proto)
2587 *match_level = MLX5_MATCH_L3;
2590 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
2591 struct flow_match_ipv4_addrs match;
2593 flow_rule_match_ipv4_addrs(rule, &match);
2594 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2595 src_ipv4_src_ipv6.ipv4_layout.ipv4),
2596 &match.mask->src, sizeof(match.mask->src));
2597 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2598 src_ipv4_src_ipv6.ipv4_layout.ipv4),
2599 &match.key->src, sizeof(match.key->src));
2600 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2601 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2602 &match.mask->dst, sizeof(match.mask->dst));
2603 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2604 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2605 &match.key->dst, sizeof(match.key->dst));
2607 if (match.mask->src || match.mask->dst)
2608 *match_level = MLX5_MATCH_L3;
2611 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
2612 struct flow_match_ipv6_addrs match;
2614 flow_rule_match_ipv6_addrs(rule, &match);
2615 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2616 src_ipv4_src_ipv6.ipv6_layout.ipv6),
2617 &match.mask->src, sizeof(match.mask->src));
2618 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2619 src_ipv4_src_ipv6.ipv6_layout.ipv6),
2620 &match.key->src, sizeof(match.key->src));
2622 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
2623 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2624 &match.mask->dst, sizeof(match.mask->dst));
2625 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
2626 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
2627 &match.key->dst, sizeof(match.key->dst));
2629 if (ipv6_addr_type(&match.mask->src) != IPV6_ADDR_ANY ||
2630 ipv6_addr_type(&match.mask->dst) != IPV6_ADDR_ANY)
2631 *match_level = MLX5_MATCH_L3;
2634 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IP)) {
2635 struct flow_match_ip match;
2637 flow_rule_match_ip(rule, &match);
2638 if (match_inner_ecn) {
2639 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_ecn,
2640 match.mask->tos & 0x3);
2641 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_ecn,
2642 match.key->tos & 0x3);
2645 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_dscp,
2646 match.mask->tos >> 2);
2647 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_dscp,
2648 match.key->tos >> 2);
2650 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ttl_hoplimit,
2652 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ttl_hoplimit,
2655 if (match.mask->ttl &&
2656 !MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
2657 ft_field_support.outer_ipv4_ttl)) {
2658 NL_SET_ERR_MSG_MOD(extack,
2659 "Matching on TTL is not supported");
2663 if (match.mask->tos || match.mask->ttl)
2664 *match_level = MLX5_MATCH_L3;
2667 /* *** L3 attributes parsing up to here *** */
2669 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
2670 struct flow_match_ports match;
2672 flow_rule_match_ports(rule, &match);
2675 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2676 tcp_sport, ntohs(match.mask->src));
2677 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2678 tcp_sport, ntohs(match.key->src));
2680 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2681 tcp_dport, ntohs(match.mask->dst));
2682 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2683 tcp_dport, ntohs(match.key->dst));
2687 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2688 udp_sport, ntohs(match.mask->src));
2689 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2690 udp_sport, ntohs(match.key->src));
2692 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
2693 udp_dport, ntohs(match.mask->dst));
2694 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
2695 udp_dport, ntohs(match.key->dst));
2698 NL_SET_ERR_MSG_MOD(extack,
2699 "Only UDP and TCP transports are supported for L4 matching");
2700 netdev_err(priv->netdev,
2701 "Only UDP and TCP transport are supported\n");
2705 if (match.mask->src || match.mask->dst)
2706 *match_level = MLX5_MATCH_L4;
2709 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
2710 struct flow_match_tcp match;
2712 flow_rule_match_tcp(rule, &match);
2713 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
2714 ntohs(match.mask->flags));
2715 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
2716 ntohs(match.key->flags));
2718 if (match.mask->flags)
2719 *match_level = MLX5_MATCH_L4;
2721 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ICMP)) {
2722 struct flow_match_icmp match;
2724 flow_rule_match_icmp(rule, &match);
2727 if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
2728 MLX5_FLEX_PROTO_ICMP)) {
2729 NL_SET_ERR_MSG_MOD(extack,
2730 "Match on Flex protocols for ICMP is not supported");
2733 MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_type,
2735 MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_type,
2737 MLX5_SET(fte_match_set_misc3, misc_c_3, icmp_code,
2739 MLX5_SET(fte_match_set_misc3, misc_v_3, icmp_code,
2742 case IPPROTO_ICMPV6:
2743 if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
2744 MLX5_FLEX_PROTO_ICMPV6)) {
2745 NL_SET_ERR_MSG_MOD(extack,
2746 "Match on Flex protocols for ICMPV6 is not supported");
2749 MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_type,
2751 MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_type,
2753 MLX5_SET(fte_match_set_misc3, misc_c_3, icmpv6_code,
2755 MLX5_SET(fte_match_set_misc3, misc_v_3, icmpv6_code,
2759 NL_SET_ERR_MSG_MOD(extack,
2760 "Code and type matching only with ICMP and ICMPv6");
2761 netdev_err(priv->netdev,
2762 "Code and type matching only with ICMP and ICMPv6\n");
2765 if (match.mask->code || match.mask->type) {
2766 *match_level = MLX5_MATCH_L4;
2767 spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_3;
2770 /* Currently supported only for MPLS over UDP */
2771 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) &&
2772 !netif_is_bareudp(filter_dev)) {
2773 NL_SET_ERR_MSG_MOD(extack,
2774 "Matching on MPLS is supported only for MPLS over UDP");
2775 netdev_err(priv->netdev,
2776 "Matching on MPLS is supported only for MPLS over UDP\n");
2783 static int parse_cls_flower(struct mlx5e_priv *priv,
2784 struct mlx5e_tc_flow *flow,
2785 struct mlx5_flow_spec *spec,
2786 struct flow_cls_offload *f,
2787 struct net_device *filter_dev)
2789 u8 inner_match_level, outer_match_level, non_tunnel_match_level;
2790 struct netlink_ext_ack *extack = f->common.extack;
2791 struct mlx5_core_dev *dev = priv->mdev;
2792 struct mlx5_eswitch *esw = dev->priv.eswitch;
2793 struct mlx5e_rep_priv *rpriv = priv->ppriv;
2794 struct mlx5_eswitch_rep *rep;
2795 bool is_eswitch_flow;
2798 inner_match_level = MLX5_MATCH_NONE;
2799 outer_match_level = MLX5_MATCH_NONE;
2801 err = __parse_cls_flower(priv, flow, spec, f, filter_dev,
2802 &inner_match_level, &outer_match_level);
2803 non_tunnel_match_level = (inner_match_level == MLX5_MATCH_NONE) ?
2804 outer_match_level : inner_match_level;
2806 is_eswitch_flow = mlx5e_is_eswitch_flow(flow);
2807 if (!err && is_eswitch_flow) {
2809 if (rep->vport != MLX5_VPORT_UPLINK &&
2810 (esw->offloads.inline_mode != MLX5_INLINE_MODE_NONE &&
2811 esw->offloads.inline_mode < non_tunnel_match_level)) {
2812 NL_SET_ERR_MSG_MOD(extack,
2813 "Flow is not offloaded due to min inline setting");
2814 netdev_warn(priv->netdev,
2815 "Flow is not offloaded due to min inline setting, required %d actual %d\n",
2816 non_tunnel_match_level, esw->offloads.inline_mode);
2821 flow->attr->inner_match_level = inner_match_level;
2822 flow->attr->outer_match_level = outer_match_level;
2828 struct mlx5_fields {
2836 #define OFFLOAD(fw_field, field_bsize, field_mask, field, off, match_field) \
2837 {MLX5_ACTION_IN_FIELD_OUT_ ## fw_field, field_bsize, field_mask, \
2838 offsetof(struct pedit_headers, field) + (off), \
2839 MLX5_BYTE_OFF(fte_match_set_lyr_2_4, match_field)}
2841 /* masked values are the same and there are no rewrites that do not have a
2844 #define SAME_VAL_MASK(type, valp, maskp, matchvalp, matchmaskp) ({ \
2845 type matchmaskx = *(type *)(matchmaskp); \
2846 type matchvalx = *(type *)(matchvalp); \
2847 type maskx = *(type *)(maskp); \
2848 type valx = *(type *)(valp); \
2850 (valx & maskx) == (matchvalx & matchmaskx) && !(maskx & (maskx ^ \
2854 static bool cmp_val_mask(void *valp, void *maskp, void *matchvalp,
2855 void *matchmaskp, u8 bsize)
2861 same = SAME_VAL_MASK(u8, valp, maskp, matchvalp, matchmaskp);
2864 same = SAME_VAL_MASK(u16, valp, maskp, matchvalp, matchmaskp);
2867 same = SAME_VAL_MASK(u32, valp, maskp, matchvalp, matchmaskp);
2874 static struct mlx5_fields fields[] = {
2875 OFFLOAD(DMAC_47_16, 32, U32_MAX, eth.h_dest[0], 0, dmac_47_16),
2876 OFFLOAD(DMAC_15_0, 16, U16_MAX, eth.h_dest[4], 0, dmac_15_0),
2877 OFFLOAD(SMAC_47_16, 32, U32_MAX, eth.h_source[0], 0, smac_47_16),
2878 OFFLOAD(SMAC_15_0, 16, U16_MAX, eth.h_source[4], 0, smac_15_0),
2879 OFFLOAD(ETHERTYPE, 16, U16_MAX, eth.h_proto, 0, ethertype),
2880 OFFLOAD(FIRST_VID, 16, U16_MAX, vlan.h_vlan_TCI, 0, first_vid),
2882 OFFLOAD(IP_DSCP, 8, 0xfc, ip4.tos, 0, ip_dscp),
2883 OFFLOAD(IP_TTL, 8, U8_MAX, ip4.ttl, 0, ttl_hoplimit),
2884 OFFLOAD(SIPV4, 32, U32_MAX, ip4.saddr, 0, src_ipv4_src_ipv6.ipv4_layout.ipv4),
2885 OFFLOAD(DIPV4, 32, U32_MAX, ip4.daddr, 0, dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
2887 OFFLOAD(SIPV6_127_96, 32, U32_MAX, ip6.saddr.s6_addr32[0], 0,
2888 src_ipv4_src_ipv6.ipv6_layout.ipv6[0]),
2889 OFFLOAD(SIPV6_95_64, 32, U32_MAX, ip6.saddr.s6_addr32[1], 0,
2890 src_ipv4_src_ipv6.ipv6_layout.ipv6[4]),
2891 OFFLOAD(SIPV6_63_32, 32, U32_MAX, ip6.saddr.s6_addr32[2], 0,
2892 src_ipv4_src_ipv6.ipv6_layout.ipv6[8]),
2893 OFFLOAD(SIPV6_31_0, 32, U32_MAX, ip6.saddr.s6_addr32[3], 0,
2894 src_ipv4_src_ipv6.ipv6_layout.ipv6[12]),
2895 OFFLOAD(DIPV6_127_96, 32, U32_MAX, ip6.daddr.s6_addr32[0], 0,
2896 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[0]),
2897 OFFLOAD(DIPV6_95_64, 32, U32_MAX, ip6.daddr.s6_addr32[1], 0,
2898 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[4]),
2899 OFFLOAD(DIPV6_63_32, 32, U32_MAX, ip6.daddr.s6_addr32[2], 0,
2900 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[8]),
2901 OFFLOAD(DIPV6_31_0, 32, U32_MAX, ip6.daddr.s6_addr32[3], 0,
2902 dst_ipv4_dst_ipv6.ipv6_layout.ipv6[12]),
2903 OFFLOAD(IPV6_HOPLIMIT, 8, U8_MAX, ip6.hop_limit, 0, ttl_hoplimit),
2904 OFFLOAD(IP_DSCP, 16, 0xc00f, ip6, 0, ip_dscp),
2906 OFFLOAD(TCP_SPORT, 16, U16_MAX, tcp.source, 0, tcp_sport),
2907 OFFLOAD(TCP_DPORT, 16, U16_MAX, tcp.dest, 0, tcp_dport),
2908 /* in linux iphdr tcp_flags is 8 bits long */
2909 OFFLOAD(TCP_FLAGS, 8, U8_MAX, tcp.ack_seq, 5, tcp_flags),
2911 OFFLOAD(UDP_SPORT, 16, U16_MAX, udp.source, 0, udp_sport),
2912 OFFLOAD(UDP_DPORT, 16, U16_MAX, udp.dest, 0, udp_dport),
2915 static unsigned long mask_to_le(unsigned long mask, int size)
2921 mask_be32 = (__force __be32)(mask);
2922 mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
2923 } else if (size == 16) {
2924 mask_be32 = (__force __be32)(mask);
2925 mask_be16 = *(__be16 *)&mask_be32;
2926 mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
2932 static int offload_pedit_fields(struct mlx5e_priv *priv,
2934 struct mlx5e_tc_flow_parse_attr *parse_attr,
2936 struct netlink_ext_ack *extack)
2938 struct pedit_headers *set_masks, *add_masks, *set_vals, *add_vals;
2939 struct pedit_headers_action *hdrs = parse_attr->hdrs;
2940 void *headers_c, *headers_v, *action, *vals_p;
2941 u32 *s_masks_p, *a_masks_p, s_mask, a_mask;
2942 struct mlx5e_tc_mod_hdr_acts *mod_acts;
2943 unsigned long mask, field_mask;
2944 int i, first, last, next_z;
2945 struct mlx5_fields *f;
2948 mod_acts = &parse_attr->mod_hdr_acts;
2949 headers_c = mlx5e_get_match_headers_criteria(*action_flags, &parse_attr->spec);
2950 headers_v = mlx5e_get_match_headers_value(*action_flags, &parse_attr->spec);
2952 set_masks = &hdrs[0].masks;
2953 add_masks = &hdrs[1].masks;
2954 set_vals = &hdrs[0].vals;
2955 add_vals = &hdrs[1].vals;
2957 for (i = 0; i < ARRAY_SIZE(fields); i++) {
2961 /* avoid seeing bits set from previous iterations */
2965 s_masks_p = (void *)set_masks + f->offset;
2966 a_masks_p = (void *)add_masks + f->offset;
2968 s_mask = *s_masks_p & f->field_mask;
2969 a_mask = *a_masks_p & f->field_mask;
2971 if (!s_mask && !a_mask) /* nothing to offload here */
2974 if (s_mask && a_mask) {
2975 NL_SET_ERR_MSG_MOD(extack,
2976 "can't set and add to the same HW field");
2977 netdev_warn(priv->netdev,
2978 "mlx5: can't set and add to the same HW field (%x)\n",
2985 void *match_mask = headers_c + f->match_offset;
2986 void *match_val = headers_v + f->match_offset;
2988 cmd = MLX5_ACTION_TYPE_SET;
2990 vals_p = (void *)set_vals + f->offset;
2991 /* don't rewrite if we have a match on the same value */
2992 if (cmp_val_mask(vals_p, s_masks_p, match_val,
2993 match_mask, f->field_bsize))
2995 /* clear to denote we consumed this field */
2996 *s_masks_p &= ~f->field_mask;
2998 cmd = MLX5_ACTION_TYPE_ADD;
3000 vals_p = (void *)add_vals + f->offset;
3001 /* add 0 is no change */
3002 if ((*(u32 *)vals_p & f->field_mask) == 0)
3004 /* clear to denote we consumed this field */
3005 *a_masks_p &= ~f->field_mask;
3010 mask = mask_to_le(mask, f->field_bsize);
3012 first = find_first_bit(&mask, f->field_bsize);
3013 next_z = find_next_zero_bit(&mask, f->field_bsize, first);
3014 last = find_last_bit(&mask, f->field_bsize);
3015 if (first < next_z && next_z < last) {
3016 NL_SET_ERR_MSG_MOD(extack,
3017 "rewrite of few sub-fields isn't supported");
3018 netdev_warn(priv->netdev,
3019 "mlx5: rewrite of few sub-fields (mask %lx) isn't offloaded\n",
3024 action = mlx5e_mod_hdr_alloc(priv->mdev, namespace, mod_acts);
3025 if (IS_ERR(action)) {
3026 NL_SET_ERR_MSG_MOD(extack,
3027 "too many pedit actions, can't offload");
3028 mlx5_core_warn(priv->mdev,
3029 "mlx5: parsed %d pedit actions, can't do more\n",
3030 mod_acts->num_actions);
3031 return PTR_ERR(action);
3034 MLX5_SET(set_action_in, action, action_type, cmd);
3035 MLX5_SET(set_action_in, action, field, f->field);
3037 if (cmd == MLX5_ACTION_TYPE_SET) {
3040 field_mask = mask_to_le(f->field_mask, f->field_bsize);
3042 /* if field is bit sized it can start not from first bit */
3043 start = find_first_bit(&field_mask, f->field_bsize);
3045 MLX5_SET(set_action_in, action, offset, first - start);
3046 /* length is num of bits to be written, zero means length of 32 */
3047 MLX5_SET(set_action_in, action, length, (last - first + 1));
3050 if (f->field_bsize == 32)
3051 MLX5_SET(set_action_in, action, data, ntohl(*(__be32 *)vals_p) >> first);
3052 else if (f->field_bsize == 16)
3053 MLX5_SET(set_action_in, action, data, ntohs(*(__be16 *)vals_p) >> first);
3054 else if (f->field_bsize == 8)
3055 MLX5_SET(set_action_in, action, data, *(u8 *)vals_p >> first);
3057 ++mod_acts->num_actions;
3063 static const struct pedit_headers zero_masks = {};
3065 static int verify_offload_pedit_fields(struct mlx5e_priv *priv,
3066 struct mlx5e_tc_flow_parse_attr *parse_attr,
3067 struct netlink_ext_ack *extack)
3069 struct pedit_headers *cmd_masks;
3072 for (cmd = 0; cmd < __PEDIT_CMD_MAX; cmd++) {
3073 cmd_masks = &parse_attr->hdrs[cmd].masks;
3074 if (memcmp(cmd_masks, &zero_masks, sizeof(zero_masks))) {
3075 NL_SET_ERR_MSG_MOD(extack, "attempt to offload an unsupported field");
3076 netdev_warn(priv->netdev, "attempt to offload an unsupported field (cmd %d)\n", cmd);
3077 print_hex_dump(KERN_WARNING, "mask: ", DUMP_PREFIX_ADDRESS,
3078 16, 1, cmd_masks, sizeof(zero_masks), true);
3086 static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
3087 struct mlx5e_tc_flow_parse_attr *parse_attr,
3089 struct netlink_ext_ack *extack)
3093 err = offload_pedit_fields(priv, namespace, parse_attr, action_flags, extack);
3095 goto out_dealloc_parsed_actions;
3097 err = verify_offload_pedit_fields(priv, parse_attr, extack);
3099 goto out_dealloc_parsed_actions;
3103 out_dealloc_parsed_actions:
3104 mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
3108 struct ip_ttl_word {
3114 struct ipv6_hoplimit_word {
3121 is_action_keys_supported(const struct flow_action_entry *act, bool ct_flow,
3122 bool *modify_ip_header, bool *modify_tuple,
3123 struct netlink_ext_ack *extack)
3128 htype = act->mangle.htype;
3129 offset = act->mangle.offset;
3130 mask = ~act->mangle.mask;
3131 /* For IPv4 & IPv6 header check 4 byte word,
3132 * to determine that modified fields
3133 * are NOT ttl & hop_limit only.
3135 if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP4) {
3136 struct ip_ttl_word *ttl_word =
3137 (struct ip_ttl_word *)&mask;
3139 if (offset != offsetof(struct iphdr, ttl) ||
3140 ttl_word->protocol ||
3142 *modify_ip_header = true;
3145 if (offset >= offsetof(struct iphdr, saddr))
3146 *modify_tuple = true;
3148 if (ct_flow && *modify_tuple) {
3149 NL_SET_ERR_MSG_MOD(extack,
3150 "can't offload re-write of ipv4 address with action ct");
3153 } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_IP6) {
3154 struct ipv6_hoplimit_word *hoplimit_word =
3155 (struct ipv6_hoplimit_word *)&mask;
3157 if (offset != offsetof(struct ipv6hdr, payload_len) ||
3158 hoplimit_word->payload_len ||
3159 hoplimit_word->nexthdr) {
3160 *modify_ip_header = true;
3163 if (ct_flow && offset >= offsetof(struct ipv6hdr, saddr))
3164 *modify_tuple = true;
3166 if (ct_flow && *modify_tuple) {
3167 NL_SET_ERR_MSG_MOD(extack,
3168 "can't offload re-write of ipv6 address with action ct");
3171 } else if (htype == FLOW_ACT_MANGLE_HDR_TYPE_TCP ||
3172 htype == FLOW_ACT_MANGLE_HDR_TYPE_UDP) {
3173 *modify_tuple = true;
3175 NL_SET_ERR_MSG_MOD(extack,
3176 "can't offload re-write of transport header ports with action ct");
3184 static bool modify_tuple_supported(bool modify_tuple, bool ct_clear,
3185 bool ct_flow, struct netlink_ext_ack *extack,
3186 struct mlx5e_priv *priv,
3187 struct mlx5_flow_spec *spec)
3189 if (!modify_tuple || ct_clear)
3193 NL_SET_ERR_MSG_MOD(extack,
3194 "can't offload tuple modification with non-clear ct()");
3195 netdev_info(priv->netdev,
3196 "can't offload tuple modification with non-clear ct()");
3200 /* Add ct_state=-trk match so it will be offloaded for non ct flows
3201 * (or after clear action), as otherwise, since the tuple is changed,
3202 * we can't restore ct state
3204 if (mlx5_tc_ct_add_no_trk_match(spec)) {
3205 NL_SET_ERR_MSG_MOD(extack,
3206 "can't offload tuple modification with ct matches and no ct(clear) action");
3207 netdev_info(priv->netdev,
3208 "can't offload tuple modification with ct matches and no ct(clear) action");
3215 static bool modify_header_match_supported(struct mlx5e_priv *priv,
3216 struct mlx5_flow_spec *spec,
3217 struct flow_action *flow_action,
3218 u32 actions, bool ct_flow,
3220 struct netlink_ext_ack *extack)
3222 const struct flow_action_entry *act;
3223 bool modify_ip_header, modify_tuple;
3230 headers_c = mlx5e_get_match_headers_criteria(actions, spec);
3231 headers_v = mlx5e_get_match_headers_value(actions, spec);
3232 ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
3234 /* for non-IP we only re-write MACs, so we're okay */
3235 if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 &&
3236 ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
3239 modify_ip_header = false;
3240 modify_tuple = false;
3241 flow_action_for_each(i, act, flow_action) {
3242 if (act->id != FLOW_ACTION_MANGLE &&
3243 act->id != FLOW_ACTION_ADD)
3246 if (!is_action_keys_supported(act, ct_flow,
3248 &modify_tuple, extack))
3252 if (!modify_tuple_supported(modify_tuple, ct_clear, ct_flow, extack,
3256 ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
3257 if (modify_ip_header && ip_proto != IPPROTO_TCP &&
3258 ip_proto != IPPROTO_UDP && ip_proto != IPPROTO_ICMP) {
3259 NL_SET_ERR_MSG_MOD(extack,
3260 "can't offload re-write of non TCP/UDP");
3261 netdev_info(priv->netdev, "can't offload re-write of ip proto %d\n",
3271 actions_match_supported_fdb(struct mlx5e_priv *priv,
3272 struct mlx5e_tc_flow_parse_attr *parse_attr,
3273 struct mlx5e_tc_flow *flow,
3274 struct netlink_ext_ack *extack)
3276 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
3277 bool ct_flow, ct_clear;
3279 ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
3280 ct_flow = flow_flag_test(flow, CT) && !ct_clear;
3282 if (esw_attr->split_count && ct_flow &&
3283 !MLX5_CAP_GEN(esw_attr->in_mdev, reg_c_preserve)) {
3284 /* All registers used by ct are cleared when using
3287 NL_SET_ERR_MSG_MOD(extack, "Can't offload mirroring with action ct");
3291 if (esw_attr->split_count > 0 && !mlx5_esw_has_fwd_fdb(priv->mdev)) {
3292 NL_SET_ERR_MSG_MOD(extack,
3293 "current firmware doesn't support split rule for port mirroring");
3294 netdev_warn_once(priv->netdev,
3295 "current firmware doesn't support split rule for port mirroring\n");
3303 actions_match_supported(struct mlx5e_priv *priv,
3304 struct flow_action *flow_action,
3306 struct mlx5e_tc_flow_parse_attr *parse_attr,
3307 struct mlx5e_tc_flow *flow,
3308 struct netlink_ext_ack *extack)
3310 bool ct_flow, ct_clear;
3312 ct_clear = flow->attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
3313 ct_flow = flow_flag_test(flow, CT) && !ct_clear;
3316 (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
3317 NL_SET_ERR_MSG_MOD(extack, "Rule must have at least one forward/drop action");
3322 (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
3323 NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
3327 if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
3328 actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
3329 NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
3334 (MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_DROP))) {
3335 NL_SET_ERR_MSG_MOD(extack, "Rule cannot support forward+drop action");
3339 if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
3340 actions & MLX5_FLOW_CONTEXT_ACTION_DROP) {
3341 NL_SET_ERR_MSG_MOD(extack, "Drop with modify header action is not supported");
3345 if (actions & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR &&
3346 !modify_header_match_supported(priv, &parse_attr->spec, flow_action,
3347 actions, ct_flow, ct_clear, extack))
3350 if (mlx5e_is_eswitch_flow(flow) &&
3351 !actions_match_supported_fdb(priv, parse_attr, flow, extack))
3357 static bool same_port_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3359 return priv->mdev == peer_priv->mdev;
3362 bool mlx5e_same_hw_devs(struct mlx5e_priv *priv, struct mlx5e_priv *peer_priv)
3364 struct mlx5_core_dev *fmdev, *pmdev;
3365 u64 fsystem_guid, psystem_guid;
3368 pmdev = peer_priv->mdev;
3370 fsystem_guid = mlx5_query_nic_system_image_guid(fmdev);
3371 psystem_guid = mlx5_query_nic_system_image_guid(pmdev);
3373 return (fsystem_guid == psystem_guid);
3377 actions_prepare_mod_hdr_actions(struct mlx5e_priv *priv,
3378 struct mlx5e_tc_flow *flow,
3379 struct mlx5_flow_attr *attr,
3380 struct netlink_ext_ack *extack)
3382 struct mlx5e_tc_flow_parse_attr *parse_attr = attr->parse_attr;
3383 struct pedit_headers_action *hdrs = parse_attr->hdrs;
3384 enum mlx5_flow_namespace_type ns_type;
3387 if (!hdrs[TCA_PEDIT_KEY_EX_CMD_SET].pedits &&
3388 !hdrs[TCA_PEDIT_KEY_EX_CMD_ADD].pedits)
3391 ns_type = mlx5e_get_flow_namespace(flow);
3393 err = alloc_tc_pedit_action(priv, ns_type, parse_attr, &attr->action, extack);
3397 if (parse_attr->mod_hdr_acts.num_actions > 0)
3400 /* In case all pedit actions are skipped, remove the MOD_HDR flag. */
3401 attr->action &= ~MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3402 mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
3404 if (ns_type != MLX5_FLOW_NAMESPACE_FDB)
3407 if (!((attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_POP) ||
3408 (attr->action & MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH)))
3409 attr->esw_attr->split_count = 0;
3414 static struct mlx5_flow_attr*
3415 mlx5e_clone_flow_attr_for_post_act(struct mlx5_flow_attr *attr,
3416 enum mlx5_flow_namespace_type ns_type)
3418 struct mlx5e_tc_flow_parse_attr *parse_attr;
3419 u32 attr_sz = ns_to_attr_sz(ns_type);
3420 struct mlx5_flow_attr *attr2;
3422 attr2 = mlx5_alloc_flow_attr(ns_type);
3423 parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
3424 if (!attr2 || !parse_attr) {
3430 memcpy(attr2, attr, attr_sz);
3431 INIT_LIST_HEAD(&attr2->list);
3432 parse_attr->filter_dev = attr->parse_attr->filter_dev;
3435 attr2->parse_attr = parse_attr;
3439 static struct mlx5_core_dev *
3440 get_flow_counter_dev(struct mlx5e_tc_flow *flow)
3442 return mlx5e_is_eswitch_flow(flow) ? flow->attr->esw_attr->counter_dev : flow->priv->mdev;
3445 struct mlx5_flow_attr *
3446 mlx5e_tc_get_encap_attr(struct mlx5e_tc_flow *flow)
3448 struct mlx5_esw_flow_attr *esw_attr;
3449 struct mlx5_flow_attr *attr;
3452 list_for_each_entry(attr, &flow->attrs, list) {
3453 esw_attr = attr->esw_attr;
3454 for (i = 0; i < MLX5_MAX_FLOW_FWD_VPORTS; i++) {
3455 if (esw_attr->dests[i].flags & MLX5_ESW_DEST_ENCAP)
3464 mlx5e_tc_unoffload_flow_post_acts(struct mlx5e_tc_flow *flow)
3466 struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3467 struct mlx5_flow_attr *attr;
3469 list_for_each_entry(attr, &flow->attrs, list) {
3470 if (list_is_last(&attr->list, &flow->attrs))
3473 mlx5e_tc_post_act_unoffload(post_act, attr->post_act_handle);
3478 free_flow_post_acts(struct mlx5e_tc_flow *flow)
3480 struct mlx5_core_dev *counter_dev = get_flow_counter_dev(flow);
3481 struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3482 struct mlx5_flow_attr *attr, *tmp;
3485 list_for_each_entry_safe(attr, tmp, &flow->attrs, list) {
3486 if (list_is_last(&attr->list, &flow->attrs))
3489 if (attr->post_act_handle)
3490 mlx5e_tc_post_act_del(post_act, attr->post_act_handle);
3492 clean_encap_dests(flow->priv, flow, attr, &vf_tun);
3494 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
3495 mlx5_fc_destroy(counter_dev, attr->counter);
3497 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
3498 mlx5e_mod_hdr_dealloc(&attr->parse_attr->mod_hdr_acts);
3499 if (attr->modify_hdr)
3500 mlx5_modify_header_dealloc(flow->priv->mdev, attr->modify_hdr);
3503 list_del(&attr->list);
3504 kvfree(attr->parse_attr);
3510 mlx5e_tc_offload_flow_post_acts(struct mlx5e_tc_flow *flow)
3512 struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3513 struct mlx5_flow_attr *attr;
3516 list_for_each_entry(attr, &flow->attrs, list) {
3517 if (list_is_last(&attr->list, &flow->attrs))
3520 err = mlx5e_tc_post_act_offload(post_act, attr->post_act_handle);
3528 /* TC filter rule HW translation:
3530 * +---------------------+
3531 * + ft prio (tc chain) +
3532 * + original match +
3533 * +---------------------+
3535 * | if multi table action
3538 * +---------------------+
3539 * + post act ft |<----.
3540 * + match fte id | | split on multi table action
3541 * + do actions |-----'
3542 * +---------------------+
3546 * Do rest of the actions after last multi table action.
3549 alloc_flow_post_acts(struct mlx5e_tc_flow *flow, struct netlink_ext_ack *extack)
3551 struct mlx5e_post_act *post_act = get_post_action(flow->priv);
3552 struct mlx5_flow_attr *attr, *next_attr = NULL;
3553 struct mlx5e_post_act_handle *handle;
3554 bool vf_tun, encap_valid = true;
3557 /* This is going in reverse order as needed.
3558 * The first entry is the last attribute.
3560 list_for_each_entry(attr, &flow->attrs, list) {
3562 /* Set counter action on last post act rule. */
3563 attr->action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
3565 err = mlx5e_tc_act_set_next_post_act(flow, attr, next_attr);
3570 /* Don't add post_act rule for first attr (last in the list).
3571 * It's being handled by the caller.
3573 if (list_is_last(&attr->list, &flow->attrs))
3576 err = set_encap_dests(flow->priv, flow, attr, extack, &encap_valid, &vf_tun);
3581 flow_flag_set(flow, SLOW);
3583 err = actions_prepare_mod_hdr_actions(flow->priv, flow, attr, extack);
3587 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_MOD_HDR) {
3588 err = mlx5e_tc_add_flow_mod_hdr(flow->priv, flow, attr);
3593 if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
3594 err = alloc_flow_attr_counter(get_flow_counter_dev(flow), attr);
3599 handle = mlx5e_tc_post_act_add(post_act, attr);
3600 if (IS_ERR(handle)) {
3601 err = PTR_ERR(handle);
3605 attr->post_act_handle = handle;
3609 if (flow_flag_test(flow, SLOW))
3612 err = mlx5e_tc_offload_flow_post_acts(flow);
3620 free_flow_post_acts(flow);
3625 parse_tc_actions(struct mlx5e_tc_act_parse_state *parse_state,
3626 struct flow_action *flow_action)
3628 struct netlink_ext_ack *extack = parse_state->extack;
3629 struct mlx5e_tc_flow_action flow_action_reorder;
3630 struct mlx5e_tc_flow *flow = parse_state->flow;
3631 struct mlx5_flow_attr *attr = flow->attr;
3632 enum mlx5_flow_namespace_type ns_type;
3633 struct mlx5e_priv *priv = flow->priv;
3634 struct flow_action_entry *act, **_act;
3635 struct mlx5e_tc_act *tc_act;
3638 flow_action_reorder.num_entries = flow_action->num_entries;
3639 flow_action_reorder.entries = kcalloc(flow_action->num_entries,
3640 sizeof(flow_action), GFP_KERNEL);
3641 if (!flow_action_reorder.entries)
3644 mlx5e_tc_act_reorder_flow_actions(flow_action, &flow_action_reorder);
3646 ns_type = mlx5e_get_flow_namespace(flow);
3647 list_add(&attr->list, &flow->attrs);
3649 flow_action_for_each(i, _act, &flow_action_reorder) {
3651 tc_act = mlx5e_tc_act_get(act->id, ns_type);
3653 NL_SET_ERR_MSG_MOD(extack, "Not implemented offload action");
3658 if (!tc_act->can_offload(parse_state, act, i, attr)) {
3663 err = tc_act->parse_action(parse_state, act, priv, attr);
3667 parse_state->actions |= attr->action;
3669 /* Split attr for multi table act if not the last act. */
3670 if (tc_act->is_multi_table_act &&
3671 tc_act->is_multi_table_act(priv, act, attr) &&
3672 i < flow_action_reorder.num_entries - 1) {
3673 err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type);
3677 attr = mlx5e_clone_flow_attr_for_post_act(flow->attr, ns_type);
3683 list_add(&attr->list, &flow->attrs);
3687 kfree(flow_action_reorder.entries);
3689 err = mlx5e_tc_act_post_parse(parse_state, flow_action, attr, ns_type);
3691 goto out_free_post_acts;
3693 err = alloc_flow_post_acts(flow, extack);
3695 goto out_free_post_acts;
3700 kfree(flow_action_reorder.entries);
3702 free_flow_post_acts(flow);
3708 flow_action_supported(struct flow_action *flow_action,
3709 struct netlink_ext_ack *extack)
3711 if (!flow_action_has_entries(flow_action)) {
3712 NL_SET_ERR_MSG_MOD(extack, "Flow action doesn't have any entries");
3716 if (!flow_action_hw_stats_check(flow_action, extack,
3717 FLOW_ACTION_HW_STATS_DELAYED_BIT)) {
3718 NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported");
3726 parse_tc_nic_actions(struct mlx5e_priv *priv,
3727 struct flow_action *flow_action,
3728 struct mlx5e_tc_flow *flow,
3729 struct netlink_ext_ack *extack)
3731 struct mlx5e_tc_act_parse_state *parse_state;
3732 struct mlx5e_tc_flow_parse_attr *parse_attr;
3733 struct mlx5_flow_attr *attr = flow->attr;
3736 err = flow_action_supported(flow_action, extack);
3740 attr->nic_attr->flow_tag = MLX5_FS_DEFAULT_FLOW_TAG;
3741 parse_attr = attr->parse_attr;
3742 parse_state = &parse_attr->parse_state;
3743 mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack);
3744 parse_state->ct_priv = get_ct_priv(priv);
3746 err = parse_tc_actions(parse_state, flow_action);
3750 err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack);
3754 if (!actions_match_supported(priv, flow_action, parse_state->actions,
3755 parse_attr, flow, extack))
3761 static bool is_merged_eswitch_vfs(struct mlx5e_priv *priv,
3762 struct net_device *peer_netdev)
3764 struct mlx5e_priv *peer_priv;
3766 peer_priv = netdev_priv(peer_netdev);
3768 return (MLX5_CAP_ESW(priv->mdev, merged_eswitch) &&
3769 mlx5e_eswitch_vf_rep(priv->netdev) &&
3770 mlx5e_eswitch_vf_rep(peer_netdev) &&
3771 mlx5e_same_hw_devs(priv, peer_priv));
3774 static bool same_hw_reps(struct mlx5e_priv *priv,
3775 struct net_device *peer_netdev)
3777 struct mlx5e_priv *peer_priv;
3779 peer_priv = netdev_priv(peer_netdev);
3781 return mlx5e_eswitch_rep(priv->netdev) &&
3782 mlx5e_eswitch_rep(peer_netdev) &&
3783 mlx5e_same_hw_devs(priv, peer_priv);
3786 static bool is_lag_dev(struct mlx5e_priv *priv,
3787 struct net_device *peer_netdev)
3789 return ((mlx5_lag_is_sriov(priv->mdev) ||
3790 mlx5_lag_is_multipath(priv->mdev)) &&
3791 same_hw_reps(priv, peer_netdev));
3794 static bool is_multiport_eligible(struct mlx5e_priv *priv, struct net_device *out_dev)
3796 if (mlx5e_eswitch_uplink_rep(out_dev) &&
3797 MLX5_CAP_PORT_SELECTION(priv->mdev, port_select_flow_table) &&
3798 MLX5_CAP_GEN(priv->mdev, create_lag_when_not_master_up))
3804 bool mlx5e_is_valid_eswitch_fwd_dev(struct mlx5e_priv *priv,
3805 struct net_device *out_dev)
3807 if (is_merged_eswitch_vfs(priv, out_dev))
3810 if (is_multiport_eligible(priv, out_dev))
3813 if (is_lag_dev(priv, out_dev))
3816 return mlx5e_eswitch_rep(out_dev) &&
3817 same_port_devs(priv, netdev_priv(out_dev));
3820 int mlx5e_set_fwd_to_int_port_actions(struct mlx5e_priv *priv,
3821 struct mlx5_flow_attr *attr,
3823 enum mlx5e_tc_int_port_type type,
3827 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
3828 struct mlx5e_tc_int_port_priv *int_port_priv;
3829 struct mlx5e_tc_flow_parse_attr *parse_attr;
3830 struct mlx5e_tc_int_port *dest_int_port;
3833 parse_attr = attr->parse_attr;
3834 int_port_priv = mlx5e_get_int_port_priv(priv);
3836 dest_int_port = mlx5e_tc_int_port_get(int_port_priv, ifindex, type);
3837 if (IS_ERR(dest_int_port))
3838 return PTR_ERR(dest_int_port);
3840 err = mlx5e_tc_match_to_reg_set(priv->mdev, &parse_attr->mod_hdr_acts,
3841 MLX5_FLOW_NAMESPACE_FDB, VPORT_TO_REG,
3842 mlx5e_tc_int_port_get_metadata(dest_int_port));
3844 mlx5e_tc_int_port_put(int_port_priv, dest_int_port);
3848 *action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
3850 esw_attr->dest_int_port = dest_int_port;
3851 esw_attr->dests[out_index].flags |= MLX5_ESW_DEST_CHAIN_WITH_SRC_PORT_CHANGE;
3853 /* Forward to root fdb for matching against the new source vport */
3854 attr->dest_chain = 0;
3860 parse_tc_fdb_actions(struct mlx5e_priv *priv,
3861 struct flow_action *flow_action,
3862 struct mlx5e_tc_flow *flow,
3863 struct netlink_ext_ack *extack)
3865 struct mlx5e_tc_act_parse_state *parse_state;
3866 struct mlx5e_tc_flow_parse_attr *parse_attr;
3867 struct mlx5_flow_attr *attr = flow->attr;
3868 struct mlx5_esw_flow_attr *esw_attr;
3871 err = flow_action_supported(flow_action, extack);
3875 esw_attr = attr->esw_attr;
3876 parse_attr = attr->parse_attr;
3877 parse_state = &parse_attr->parse_state;
3878 mlx5e_tc_act_init_parse_state(parse_state, flow, flow_action, extack);
3879 parse_state->ct_priv = get_ct_priv(priv);
3881 err = parse_tc_actions(parse_state, flow_action);
3885 /* Forward to/from internal port can only have 1 dest */
3886 if ((netif_is_ovs_master(parse_attr->filter_dev) || esw_attr->dest_int_port) &&
3887 esw_attr->out_count > 1) {
3888 NL_SET_ERR_MSG_MOD(extack,
3889 "Rules with internal port can have only one destination");
3893 err = actions_prepare_mod_hdr_actions(priv, flow, attr, extack);
3897 if (!actions_match_supported(priv, flow_action, parse_state->actions,
3898 parse_attr, flow, extack))
3904 static void get_flags(int flags, unsigned long *flow_flags)
3906 unsigned long __flow_flags = 0;
3908 if (flags & MLX5_TC_FLAG(INGRESS))
3909 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_INGRESS);
3910 if (flags & MLX5_TC_FLAG(EGRESS))
3911 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_EGRESS);
3913 if (flags & MLX5_TC_FLAG(ESW_OFFLOAD))
3914 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
3915 if (flags & MLX5_TC_FLAG(NIC_OFFLOAD))
3916 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
3917 if (flags & MLX5_TC_FLAG(FT_OFFLOAD))
3918 __flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_FT);
3920 *flow_flags = __flow_flags;
3923 static const struct rhashtable_params tc_ht_params = {
3924 .head_offset = offsetof(struct mlx5e_tc_flow, node),
3925 .key_offset = offsetof(struct mlx5e_tc_flow, cookie),
3926 .key_len = sizeof(((struct mlx5e_tc_flow *)0)->cookie),
3927 .automatic_shrinking = true,
3930 static struct rhashtable *get_tc_ht(struct mlx5e_priv *priv,
3931 unsigned long flags)
3933 struct mlx5e_rep_priv *rpriv;
3935 if (flags & MLX5_TC_FLAG(ESW_OFFLOAD)) {
3936 rpriv = priv->ppriv;
3937 return &rpriv->tc_ht;
3938 } else /* NIC offload */
3939 return &priv->fs.tc.ht;
3942 static bool is_peer_flow_needed(struct mlx5e_tc_flow *flow)
3944 struct mlx5_esw_flow_attr *esw_attr = flow->attr->esw_attr;
3945 struct mlx5_flow_attr *attr = flow->attr;
3946 bool is_rep_ingress = esw_attr->in_rep->vport != MLX5_VPORT_UPLINK &&
3947 flow_flag_test(flow, INGRESS);
3948 bool act_is_encap = !!(attr->action &
3949 MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT);
3950 bool esw_paired = mlx5_devcom_is_paired(esw_attr->in_mdev->priv.devcom,
3951 MLX5_DEVCOM_ESW_OFFLOADS);
3956 if ((mlx5_lag_is_sriov(esw_attr->in_mdev) ||
3957 mlx5_lag_is_multipath(esw_attr->in_mdev)) &&
3958 (is_rep_ingress || act_is_encap))
3964 struct mlx5_flow_attr *
3965 mlx5_alloc_flow_attr(enum mlx5_flow_namespace_type type)
3967 u32 ex_attr_size = (type == MLX5_FLOW_NAMESPACE_FDB) ?
3968 sizeof(struct mlx5_esw_flow_attr) :
3969 sizeof(struct mlx5_nic_flow_attr);
3970 struct mlx5_flow_attr *attr;
3972 attr = kzalloc(sizeof(*attr) + ex_attr_size, GFP_KERNEL);
3976 INIT_LIST_HEAD(&attr->list);
3981 mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
3982 struct flow_cls_offload *f, unsigned long flow_flags,
3983 struct mlx5e_tc_flow_parse_attr **__parse_attr,
3984 struct mlx5e_tc_flow **__flow)
3986 struct mlx5e_tc_flow_parse_attr *parse_attr;
3987 struct mlx5_flow_attr *attr;
3988 struct mlx5e_tc_flow *flow;
3992 flow = kzalloc(sizeof(*flow), GFP_KERNEL);
3993 parse_attr = kvzalloc(sizeof(*parse_attr), GFP_KERNEL);
3994 if (!parse_attr || !flow)
3997 flow->flags = flow_flags;
3998 flow->cookie = f->cookie;
4001 attr = mlx5_alloc_flow_attr(mlx5e_get_flow_namespace(flow));
4007 for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++)
4008 INIT_LIST_HEAD(&flow->encaps[out_index].list);
4009 INIT_LIST_HEAD(&flow->hairpin);
4010 INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
4011 INIT_LIST_HEAD(&flow->attrs);
4012 refcount_set(&flow->refcnt, 1);
4013 init_completion(&flow->init_done);
4014 init_completion(&flow->del_hw_done);
4017 *__parse_attr = parse_attr;
4028 mlx5e_flow_attr_init(struct mlx5_flow_attr *attr,
4029 struct mlx5e_tc_flow_parse_attr *parse_attr,
4030 struct flow_cls_offload *f)
4032 attr->parse_attr = parse_attr;
4033 attr->chain = f->common.chain_index;
4034 attr->prio = f->common.prio;
4038 mlx5e_flow_esw_attr_init(struct mlx5_flow_attr *attr,
4039 struct mlx5e_priv *priv,
4040 struct mlx5e_tc_flow_parse_attr *parse_attr,
4041 struct flow_cls_offload *f,
4042 struct mlx5_eswitch_rep *in_rep,
4043 struct mlx5_core_dev *in_mdev)
4045 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4046 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
4048 mlx5e_flow_attr_init(attr, parse_attr, f);
4050 esw_attr->in_rep = in_rep;
4051 esw_attr->in_mdev = in_mdev;
4053 if (MLX5_CAP_ESW(esw->dev, counter_eswitch_affinity) ==
4054 MLX5_COUNTER_SOURCE_ESWITCH)
4055 esw_attr->counter_dev = in_mdev;
4057 esw_attr->counter_dev = priv->mdev;
4060 static struct mlx5e_tc_flow *
4061 __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4062 struct flow_cls_offload *f,
4063 unsigned long flow_flags,
4064 struct net_device *filter_dev,
4065 struct mlx5_eswitch_rep *in_rep,
4066 struct mlx5_core_dev *in_mdev)
4068 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4069 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4070 struct netlink_ext_ack *extack = f->common.extack;
4071 struct mlx5e_tc_flow_parse_attr *parse_attr;
4072 struct mlx5e_tc_flow *flow;
4075 flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_ESWITCH);
4076 attr_size = sizeof(struct mlx5_esw_flow_attr);
4077 err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4078 &parse_attr, &flow);
4082 parse_attr->filter_dev = filter_dev;
4083 mlx5e_flow_esw_attr_init(flow->attr,
4085 f, in_rep, in_mdev);
4087 err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4092 /* actions validation depends on parsing the ct matches first */
4093 err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4094 &flow->attr->ct_attr, extack);
4098 /* always set IP version for indirect table handling */
4099 flow->attr->ip_version = mlx5e_tc_get_ip_version(&parse_attr->spec, true);
4101 err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
4105 if (flow->attr->lag.count) {
4106 err = mlx5_lag_add_mpesw_rule(esw->dev);
4111 err = mlx5e_tc_add_fdb_flow(priv, flow, extack);
4112 complete_all(&flow->init_done);
4114 if (!(err == -ENETUNREACH && mlx5_lag_is_multipath(in_mdev)))
4117 add_unready_flow(flow);
4123 if (flow->attr->lag.count)
4124 mlx5_lag_del_mpesw_rule(esw->dev);
4126 mlx5e_flow_put(priv, flow);
4128 return ERR_PTR(err);
4131 static int mlx5e_tc_add_fdb_peer_flow(struct flow_cls_offload *f,
4132 struct mlx5e_tc_flow *flow,
4133 unsigned long flow_flags)
4135 struct mlx5e_priv *priv = flow->priv, *peer_priv;
4136 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch, *peer_esw;
4137 struct mlx5_esw_flow_attr *attr = flow->attr->esw_attr;
4138 struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4139 struct mlx5e_tc_flow_parse_attr *parse_attr;
4140 struct mlx5e_rep_priv *peer_urpriv;
4141 struct mlx5e_tc_flow *peer_flow;
4142 struct mlx5_core_dev *in_mdev;
4145 peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4149 peer_urpriv = mlx5_eswitch_get_uplink_priv(peer_esw, REP_ETH);
4150 peer_priv = netdev_priv(peer_urpriv->netdev);
4152 /* in_mdev is assigned of which the packet originated from.
4153 * So packets redirected to uplink use the same mdev of the
4154 * original flow and packets redirected from uplink use the
4157 if (attr->in_rep->vport == MLX5_VPORT_UPLINK)
4158 in_mdev = peer_priv->mdev;
4160 in_mdev = priv->mdev;
4162 parse_attr = flow->attr->parse_attr;
4163 peer_flow = __mlx5e_add_fdb_flow(peer_priv, f, flow_flags,
4164 parse_attr->filter_dev,
4165 attr->in_rep, in_mdev);
4166 if (IS_ERR(peer_flow)) {
4167 err = PTR_ERR(peer_flow);
4171 flow->peer_flow = peer_flow;
4172 flow_flag_set(flow, DUP);
4173 mutex_lock(&esw->offloads.peer_mutex);
4174 list_add_tail(&flow->peer, &esw->offloads.peer_flows);
4175 mutex_unlock(&esw->offloads.peer_mutex);
4178 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4183 mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
4184 struct flow_cls_offload *f,
4185 unsigned long flow_flags,
4186 struct net_device *filter_dev,
4187 struct mlx5e_tc_flow **__flow)
4189 struct mlx5e_rep_priv *rpriv = priv->ppriv;
4190 struct mlx5_eswitch_rep *in_rep = rpriv->rep;
4191 struct mlx5_core_dev *in_mdev = priv->mdev;
4192 struct mlx5e_tc_flow *flow;
4195 flow = __mlx5e_add_fdb_flow(priv, f, flow_flags, filter_dev, in_rep,
4198 return PTR_ERR(flow);
4200 if (is_peer_flow_needed(flow)) {
4201 err = mlx5e_tc_add_fdb_peer_flow(f, flow, flow_flags);
4203 mlx5e_tc_del_fdb_flow(priv, flow);
4217 mlx5e_add_nic_flow(struct mlx5e_priv *priv,
4218 struct flow_cls_offload *f,
4219 unsigned long flow_flags,
4220 struct net_device *filter_dev,
4221 struct mlx5e_tc_flow **__flow)
4223 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
4224 struct netlink_ext_ack *extack = f->common.extack;
4225 struct mlx5e_tc_flow_parse_attr *parse_attr;
4226 struct mlx5e_tc_flow *flow;
4229 if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
4230 if (!tc_cls_can_offload_and_chain0(priv->netdev, &f->common))
4232 } else if (!tc_can_offload_extack(priv->netdev, f->common.extack)) {
4236 flow_flags |= BIT(MLX5E_TC_FLOW_FLAG_NIC);
4237 attr_size = sizeof(struct mlx5_nic_flow_attr);
4238 err = mlx5e_alloc_flow(priv, attr_size, f, flow_flags,
4239 &parse_attr, &flow);
4243 parse_attr->filter_dev = filter_dev;
4244 mlx5e_flow_attr_init(flow->attr, parse_attr, f);
4246 err = parse_cls_flower(flow->priv, flow, &parse_attr->spec,
4251 err = mlx5_tc_ct_match_add(get_ct_priv(priv), &parse_attr->spec, f,
4252 &flow->attr->ct_attr, extack);
4256 err = parse_tc_nic_actions(priv, &rule->action, flow, extack);
4260 err = mlx5e_tc_add_nic_flow(priv, flow, extack);
4264 flow_flag_set(flow, OFFLOADED);
4270 flow_flag_set(flow, FAILED);
4271 mlx5e_mod_hdr_dealloc(&parse_attr->mod_hdr_acts);
4272 mlx5e_flow_put(priv, flow);
4278 mlx5e_tc_add_flow(struct mlx5e_priv *priv,
4279 struct flow_cls_offload *f,
4280 unsigned long flags,
4281 struct net_device *filter_dev,
4282 struct mlx5e_tc_flow **flow)
4284 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
4285 unsigned long flow_flags;
4288 get_flags(flags, &flow_flags);
4290 if (!tc_can_offload_extack(priv->netdev, f->common.extack))
4293 if (esw && esw->mode == MLX5_ESWITCH_OFFLOADS)
4294 err = mlx5e_add_fdb_flow(priv, f, flow_flags,
4297 err = mlx5e_add_nic_flow(priv, f, flow_flags,
4303 static bool is_flow_rule_duplicate_allowed(struct net_device *dev,
4304 struct mlx5e_rep_priv *rpriv)
4306 /* Offloaded flow rule is allowed to duplicate on non-uplink representor
4307 * sharing tc block with other slaves of a lag device. Rpriv can be NULL if this
4308 * function is called from NIC mode.
4310 return netif_is_lag_port(dev) && rpriv && rpriv->rep->vport != MLX5_VPORT_UPLINK;
4313 int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
4314 struct flow_cls_offload *f, unsigned long flags)
4316 struct netlink_ext_ack *extack = f->common.extack;
4317 struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4318 struct mlx5e_rep_priv *rpriv = priv->ppriv;
4319 struct mlx5e_tc_flow *flow;
4322 if (!mlx5_esw_hold(priv->mdev))
4325 mlx5_esw_get(priv->mdev);
4328 flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4330 /* Same flow rule offloaded to non-uplink representor sharing tc block,
4333 if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev)
4336 NL_SET_ERR_MSG_MOD(extack,
4337 "flow cookie already exists, ignoring");
4338 netdev_warn_once(priv->netdev,
4339 "flow cookie %lx already exists, ignoring\n",
4349 trace_mlx5e_configure_flower(f);
4350 err = mlx5e_tc_add_flow(priv, f, flags, dev, &flow);
4354 /* Flow rule offloaded to non-uplink representor sharing tc block,
4355 * set the flow's owner dev.
4357 if (is_flow_rule_duplicate_allowed(dev, rpriv))
4358 flow->orig_dev = dev;
4360 err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
4364 mlx5_esw_release(priv->mdev);
4368 mlx5e_flow_put(priv, flow);
4370 mlx5_esw_put(priv->mdev);
4371 mlx5_esw_release(priv->mdev);
4375 static bool same_flow_direction(struct mlx5e_tc_flow *flow, int flags)
4377 bool dir_ingress = !!(flags & MLX5_TC_FLAG(INGRESS));
4378 bool dir_egress = !!(flags & MLX5_TC_FLAG(EGRESS));
4380 return flow_flag_test(flow, INGRESS) == dir_ingress &&
4381 flow_flag_test(flow, EGRESS) == dir_egress;
4384 int mlx5e_delete_flower(struct net_device *dev, struct mlx5e_priv *priv,
4385 struct flow_cls_offload *f, unsigned long flags)
4387 struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4388 struct mlx5e_tc_flow *flow;
4392 flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
4393 if (!flow || !same_flow_direction(flow, flags)) {
4398 /* Only delete the flow if it doesn't have MLX5E_TC_FLOW_DELETED flag
4401 if (flow_flag_test_and_set(flow, DELETED)) {
4405 rhashtable_remove_fast(tc_ht, &flow->node, tc_ht_params);
4408 trace_mlx5e_delete_flower(f);
4409 mlx5e_flow_put(priv, flow);
4411 mlx5_esw_put(priv->mdev);
4419 int mlx5e_stats_flower(struct net_device *dev, struct mlx5e_priv *priv,
4420 struct flow_cls_offload *f, unsigned long flags)
4422 struct mlx5_devcom *devcom = priv->mdev->priv.devcom;
4423 struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4424 struct mlx5_eswitch *peer_esw;
4425 struct mlx5e_tc_flow *flow;
4426 struct mlx5_fc *counter;
4433 flow = mlx5e_flow_get(rhashtable_lookup(tc_ht, &f->cookie,
4437 return PTR_ERR(flow);
4439 if (!same_flow_direction(flow, flags)) {
4444 if (mlx5e_is_offloaded_flow(flow) || flow_flag_test(flow, CT)) {
4445 counter = mlx5e_tc_get_counter(flow);
4449 mlx5_fc_query_cached(counter, &bytes, &packets, &lastuse);
4452 /* Under multipath it's possible for one rule to be currently
4453 * un-offloaded while the other rule is offloaded.
4455 peer_esw = mlx5_devcom_get_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4459 if (flow_flag_test(flow, DUP) &&
4460 flow_flag_test(flow->peer_flow, OFFLOADED)) {
4465 counter = mlx5e_tc_get_counter(flow->peer_flow);
4467 goto no_peer_counter;
4468 mlx5_fc_query_cached(counter, &bytes2, &packets2, &lastuse2);
4471 packets += packets2;
4472 lastuse = max_t(u64, lastuse, lastuse2);
4476 mlx5_devcom_release_peer_data(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
4478 flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
4479 FLOW_ACTION_HW_STATS_DELAYED);
4480 trace_mlx5e_stats_flower(f);
4482 mlx5e_flow_put(priv, flow);
4486 static int apply_police_params(struct mlx5e_priv *priv, u64 rate,
4487 struct netlink_ext_ack *extack)
4489 struct mlx5e_rep_priv *rpriv = priv->ppriv;
4490 struct mlx5_eswitch *esw;
4495 vport_num = rpriv->rep->vport;
4496 if (vport_num >= MLX5_VPORT_ECPF) {
4497 NL_SET_ERR_MSG_MOD(extack,
4498 "Ingress rate limit is supported only for Eswitch ports connected to VFs");
4502 esw = priv->mdev->priv.eswitch;
4503 /* rate is given in bytes/sec.
4504 * First convert to bits/sec and then round to the nearest mbit/secs.
4505 * mbit means million bits.
4506 * Moreover, if rate is non zero we choose to configure to a minimum of
4510 rate = (rate * BITS_PER_BYTE) + 500000;
4511 do_div(rate, 1000000);
4512 rate_mbps = max_t(u32, rate, 1);
4515 err = mlx5_esw_qos_modify_vport_rate(esw, vport_num, rate_mbps);
4517 NL_SET_ERR_MSG_MOD(extack, "failed applying action to hardware");
4522 static int mlx5e_policer_validate(const struct flow_action *action,
4523 const struct flow_action_entry *act,
4524 struct netlink_ext_ack *extack)
4526 if (act->police.exceed.act_id != FLOW_ACTION_DROP) {
4527 NL_SET_ERR_MSG_MOD(extack,
4528 "Offload not supported when exceed action is not drop");
4532 if (act->police.notexceed.act_id != FLOW_ACTION_PIPE &&
4533 act->police.notexceed.act_id != FLOW_ACTION_ACCEPT) {
4534 NL_SET_ERR_MSG_MOD(extack,
4535 "Offload not supported when conform action is not pipe or ok");
4539 if (act->police.notexceed.act_id == FLOW_ACTION_ACCEPT &&
4540 !flow_action_is_last_entry(action, act)) {
4541 NL_SET_ERR_MSG_MOD(extack,
4542 "Offload not supported when conform action is ok, but action is not last");
4546 if (act->police.peakrate_bytes_ps ||
4547 act->police.avrate || act->police.overhead) {
4548 NL_SET_ERR_MSG_MOD(extack,
4549 "Offload not supported when peakrate/avrate/overhead is configured");
4553 if (act->police.rate_pkt_ps) {
4554 NL_SET_ERR_MSG_MOD(extack,
4555 "QoS offload not support packets per second");
4562 static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv,
4563 struct flow_action *flow_action,
4564 struct netlink_ext_ack *extack)
4566 struct mlx5e_rep_priv *rpriv = priv->ppriv;
4567 const struct flow_action_entry *act;
4571 if (!flow_action_has_entries(flow_action)) {
4572 NL_SET_ERR_MSG_MOD(extack, "matchall called with no action");
4576 if (!flow_offload_has_one_action(flow_action)) {
4577 NL_SET_ERR_MSG_MOD(extack, "matchall policing support only a single action");
4581 if (!flow_action_basic_hw_stats_check(flow_action, extack)) {
4582 NL_SET_ERR_MSG_MOD(extack, "Flow action HW stats type is not supported");
4586 flow_action_for_each(i, act, flow_action) {
4588 case FLOW_ACTION_POLICE:
4589 err = mlx5e_policer_validate(flow_action, act, extack);
4593 err = apply_police_params(priv, act->police.rate_bytes_ps, extack);
4597 rpriv->prev_vf_vport_stats = priv->stats.vf_vport;
4600 NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall");
4608 int mlx5e_tc_configure_matchall(struct mlx5e_priv *priv,
4609 struct tc_cls_matchall_offload *ma)
4611 struct netlink_ext_ack *extack = ma->common.extack;
4613 if (ma->common.prio != 1) {
4614 NL_SET_ERR_MSG_MOD(extack, "only priority 1 is supported");
4618 return scan_tc_matchall_fdb_actions(priv, &ma->rule->action, extack);
4621 int mlx5e_tc_delete_matchall(struct mlx5e_priv *priv,
4622 struct tc_cls_matchall_offload *ma)
4624 struct netlink_ext_ack *extack = ma->common.extack;
4626 return apply_police_params(priv, 0, extack);
4629 void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv,
4630 struct tc_cls_matchall_offload *ma)
4632 struct mlx5e_rep_priv *rpriv = priv->ppriv;
4633 struct rtnl_link_stats64 cur_stats;
4637 cur_stats = priv->stats.vf_vport;
4638 dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets;
4639 dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes;
4640 rpriv->prev_vf_vport_stats = cur_stats;
4641 flow_stats_update(&ma->stats, dbytes, dpkts, 0, jiffies,
4642 FLOW_ACTION_HW_STATS_DELAYED);
4645 static void mlx5e_tc_hairpin_update_dead_peer(struct mlx5e_priv *priv,
4646 struct mlx5e_priv *peer_priv)
4648 struct mlx5_core_dev *peer_mdev = peer_priv->mdev;
4649 struct mlx5e_hairpin_entry *hpe, *tmp;
4650 LIST_HEAD(init_wait_list);
4654 if (!mlx5e_same_hw_devs(priv, peer_priv))
4657 peer_vhca_id = MLX5_CAP_GEN(peer_mdev, vhca_id);
4659 mutex_lock(&priv->fs.tc.hairpin_tbl_lock);
4660 hash_for_each(priv->fs.tc.hairpin_tbl, bkt, hpe, hairpin_hlist)
4661 if (refcount_inc_not_zero(&hpe->refcnt))
4662 list_add(&hpe->dead_peer_wait_list, &init_wait_list);
4663 mutex_unlock(&priv->fs.tc.hairpin_tbl_lock);
4665 list_for_each_entry_safe(hpe, tmp, &init_wait_list, dead_peer_wait_list) {
4666 wait_for_completion(&hpe->res_ready);
4667 if (!IS_ERR_OR_NULL(hpe->hp) && hpe->peer_vhca_id == peer_vhca_id)
4668 mlx5_core_hairpin_clear_dead_peer(hpe->hp->pair);
4670 mlx5e_hairpin_put(priv, hpe);
4674 static int mlx5e_tc_netdev_event(struct notifier_block *this,
4675 unsigned long event, void *ptr)
4677 struct net_device *ndev = netdev_notifier_info_to_dev(ptr);
4678 struct mlx5e_flow_steering *fs;
4679 struct mlx5e_priv *peer_priv;
4680 struct mlx5e_tc_table *tc;
4681 struct mlx5e_priv *priv;
4683 if (ndev->netdev_ops != &mlx5e_netdev_ops ||
4684 event != NETDEV_UNREGISTER ||
4685 ndev->reg_state == NETREG_REGISTERED)
4688 tc = container_of(this, struct mlx5e_tc_table, netdevice_nb);
4689 fs = container_of(tc, struct mlx5e_flow_steering, tc);
4690 priv = container_of(fs, struct mlx5e_priv, fs);
4691 peer_priv = netdev_priv(ndev);
4692 if (priv == peer_priv ||
4693 !(priv->netdev->features & NETIF_F_HW_TC))
4696 mlx5e_tc_hairpin_update_dead_peer(priv, peer_priv);
4701 static int mlx5e_tc_nic_get_ft_size(struct mlx5_core_dev *dev)
4703 int tc_grp_size, tc_tbl_size;
4704 u32 max_flow_counter;
4706 max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
4707 MLX5_CAP_GEN(dev, max_flow_counter_15_0);
4709 tc_grp_size = min_t(int, max_flow_counter, MLX5E_TC_TABLE_MAX_GROUP_SIZE);
4711 tc_tbl_size = min_t(int, tc_grp_size * MLX5E_TC_TABLE_NUM_GROUPS,
4712 BIT(MLX5_CAP_FLOWTABLE_NIC_RX(dev, log_max_ft_size)));
4717 static int mlx5e_tc_nic_create_miss_table(struct mlx5e_priv *priv)
4719 struct mlx5_flow_table **ft = &priv->fs.tc.miss_t;
4720 struct mlx5_flow_table_attr ft_attr = {};
4721 struct mlx5_flow_namespace *ns;
4724 ft_attr.max_fte = 1;
4725 ft_attr.autogroup.max_num_groups = 1;
4726 ft_attr.level = MLX5E_TC_MISS_LEVEL;
4728 ns = mlx5_get_flow_namespace(priv->mdev, MLX5_FLOW_NAMESPACE_KERNEL);
4730 *ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
4733 netdev_err(priv->netdev, "failed to create tc nic miss table err=%d\n", err);
4739 static void mlx5e_tc_nic_destroy_miss_table(struct mlx5e_priv *priv)
4741 mlx5_destroy_flow_table(priv->fs.tc.miss_t);
4744 int mlx5e_tc_nic_init(struct mlx5e_priv *priv)
4746 struct mlx5e_tc_table *tc = &priv->fs.tc;
4747 struct mlx5_core_dev *dev = priv->mdev;
4748 struct mapping_ctx *chains_mapping;
4749 struct mlx5_chains_attr attr = {};
4753 mlx5e_mod_hdr_tbl_init(&tc->mod_hdr);
4754 mutex_init(&tc->t_lock);
4755 mutex_init(&tc->hairpin_tbl_lock);
4756 hash_init(tc->hairpin_tbl);
4758 err = rhashtable_init(&tc->ht, &tc_ht_params);
4762 lockdep_set_class(&tc->ht.mutex, &tc_ht_lock_key);
4764 mapping_id = mlx5_query_nic_system_image_guid(dev);
4766 chains_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_CHAIN,
4767 sizeof(struct mlx5_mapped_obj),
4768 MLX5E_TC_TABLE_CHAIN_TAG_MASK, true);
4770 if (IS_ERR(chains_mapping)) {
4771 err = PTR_ERR(chains_mapping);
4774 tc->mapping = chains_mapping;
4776 err = mlx5e_tc_nic_create_miss_table(priv);
4780 if (MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level))
4781 attr.flags = MLX5_CHAINS_AND_PRIOS_SUPPORTED |
4782 MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
4783 attr.ns = MLX5_FLOW_NAMESPACE_KERNEL;
4784 attr.max_ft_sz = mlx5e_tc_nic_get_ft_size(dev);
4785 attr.max_grp_num = MLX5E_TC_TABLE_NUM_GROUPS;
4786 attr.default_ft = priv->fs.tc.miss_t;
4787 attr.mapping = chains_mapping;
4789 tc->chains = mlx5_chains_create(dev, &attr);
4790 if (IS_ERR(tc->chains)) {
4791 err = PTR_ERR(tc->chains);
4795 tc->post_act = mlx5e_tc_post_act_init(priv, tc->chains, MLX5_FLOW_NAMESPACE_KERNEL);
4796 tc->ct = mlx5_tc_ct_init(priv, tc->chains, &priv->fs.tc.mod_hdr,
4797 MLX5_FLOW_NAMESPACE_KERNEL, tc->post_act);
4799 tc->netdevice_nb.notifier_call = mlx5e_tc_netdev_event;
4800 err = register_netdevice_notifier_dev_net(priv->netdev,
4804 tc->netdevice_nb.notifier_call = NULL;
4805 mlx5_core_warn(priv->mdev, "Failed to register netdev notifier\n");
4812 mlx5_tc_ct_clean(tc->ct);
4813 mlx5e_tc_post_act_destroy(tc->post_act);
4814 mlx5_chains_destroy(tc->chains);
4816 mlx5e_tc_nic_destroy_miss_table(priv);
4818 mapping_destroy(chains_mapping);
4820 rhashtable_destroy(&tc->ht);
4824 static void _mlx5e_tc_del_flow(void *ptr, void *arg)
4826 struct mlx5e_tc_flow *flow = ptr;
4827 struct mlx5e_priv *priv = flow->priv;
4829 mlx5e_tc_del_flow(priv, flow);
4833 void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
4835 struct mlx5e_tc_table *tc = &priv->fs.tc;
4837 if (tc->netdevice_nb.notifier_call)
4838 unregister_netdevice_notifier_dev_net(priv->netdev,
4842 mlx5e_mod_hdr_tbl_destroy(&tc->mod_hdr);
4843 mutex_destroy(&tc->hairpin_tbl_lock);
4845 rhashtable_free_and_destroy(&tc->ht, _mlx5e_tc_del_flow, NULL);
4847 if (!IS_ERR_OR_NULL(tc->t)) {
4848 mlx5_chains_put_table(tc->chains, 0, 1, MLX5E_TC_FT_LEVEL);
4851 mutex_destroy(&tc->t_lock);
4853 mlx5_tc_ct_clean(tc->ct);
4854 mlx5e_tc_post_act_destroy(tc->post_act);
4855 mapping_destroy(tc->mapping);
4856 mlx5_chains_destroy(tc->chains);
4857 mlx5e_tc_nic_destroy_miss_table(priv);
4860 int mlx5e_tc_ht_init(struct rhashtable *tc_ht)
4864 err = rhashtable_init(tc_ht, &tc_ht_params);
4868 lockdep_set_class(&tc_ht->mutex, &tc_ht_lock_key);
4873 void mlx5e_tc_ht_cleanup(struct rhashtable *tc_ht)
4875 rhashtable_free_and_destroy(tc_ht, _mlx5e_tc_del_flow, NULL);
4878 int mlx5e_tc_esw_init(struct mlx5_rep_uplink_priv *uplink_priv)
4880 const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts);
4881 struct mlx5e_rep_priv *rpriv;
4882 struct mapping_ctx *mapping;
4883 struct mlx5_eswitch *esw;
4884 struct mlx5e_priv *priv;
4888 rpriv = container_of(uplink_priv, struct mlx5e_rep_priv, uplink_priv);
4889 priv = netdev_priv(rpriv->netdev);
4890 esw = priv->mdev->priv.eswitch;
4892 uplink_priv->post_act = mlx5e_tc_post_act_init(priv, esw_chains(esw),
4893 MLX5_FLOW_NAMESPACE_FDB);
4894 uplink_priv->ct_priv = mlx5_tc_ct_init(netdev_priv(priv->netdev),
4896 &esw->offloads.mod_hdr,
4897 MLX5_FLOW_NAMESPACE_FDB,
4898 uplink_priv->post_act);
4900 uplink_priv->int_port_priv = mlx5e_tc_int_port_init(netdev_priv(priv->netdev));
4902 uplink_priv->tc_psample = mlx5e_tc_sample_init(esw, uplink_priv->post_act);
4904 mapping_id = mlx5_query_nic_system_image_guid(esw->dev);
4906 mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL,
4907 sizeof(struct tunnel_match_key),
4908 TUNNEL_INFO_BITS_MASK, true);
4910 if (IS_ERR(mapping)) {
4911 err = PTR_ERR(mapping);
4912 goto err_tun_mapping;
4914 uplink_priv->tunnel_mapping = mapping;
4916 /* Two last values are reserved for stack devices slow path table mark
4917 * and bridge ingress push mark.
4919 mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_TUNNEL_ENC_OPTS,
4920 sz_enc_opts, ENC_OPTS_BITS_MASK - 2, true);
4921 if (IS_ERR(mapping)) {
4922 err = PTR_ERR(mapping);
4923 goto err_enc_opts_mapping;
4925 uplink_priv->tunnel_enc_opts_mapping = mapping;
4927 uplink_priv->encap = mlx5e_tc_tun_init(priv);
4928 if (IS_ERR(uplink_priv->encap)) {
4929 err = PTR_ERR(uplink_priv->encap);
4930 goto err_register_fib_notifier;
4935 err_register_fib_notifier:
4936 mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
4937 err_enc_opts_mapping:
4938 mapping_destroy(uplink_priv->tunnel_mapping);
4940 mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
4941 mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv);
4942 mlx5_tc_ct_clean(uplink_priv->ct_priv);
4943 netdev_warn(priv->netdev,
4944 "Failed to initialize tc (eswitch), err: %d", err);
4945 mlx5e_tc_post_act_destroy(uplink_priv->post_act);
4949 void mlx5e_tc_esw_cleanup(struct mlx5_rep_uplink_priv *uplink_priv)
4951 mlx5e_tc_tun_cleanup(uplink_priv->encap);
4953 mapping_destroy(uplink_priv->tunnel_enc_opts_mapping);
4954 mapping_destroy(uplink_priv->tunnel_mapping);
4956 mlx5e_tc_sample_cleanup(uplink_priv->tc_psample);
4957 mlx5e_tc_int_port_cleanup(uplink_priv->int_port_priv);
4958 mlx5_tc_ct_clean(uplink_priv->ct_priv);
4959 mlx5e_tc_post_act_destroy(uplink_priv->post_act);
4962 int mlx5e_tc_num_filters(struct mlx5e_priv *priv, unsigned long flags)
4964 struct rhashtable *tc_ht = get_tc_ht(priv, flags);
4966 return atomic_read(&tc_ht->nelems);
4969 void mlx5e_tc_clean_fdb_peer_flows(struct mlx5_eswitch *esw)
4971 struct mlx5e_tc_flow *flow, *tmp;
4973 list_for_each_entry_safe(flow, tmp, &esw->offloads.peer_flows, peer)
4974 __mlx5e_tc_del_fdb_peer_flow(flow);
4977 void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
4979 struct mlx5_rep_uplink_priv *rpriv =
4980 container_of(work, struct mlx5_rep_uplink_priv,
4981 reoffload_flows_work);
4982 struct mlx5e_tc_flow *flow, *tmp;
4984 mutex_lock(&rpriv->unready_flows_lock);
4985 list_for_each_entry_safe(flow, tmp, &rpriv->unready_flows, unready) {
4986 if (!mlx5e_tc_add_fdb_flow(flow->priv, flow, NULL))
4987 unready_flow_del(flow);
4989 mutex_unlock(&rpriv->unready_flows_lock);
4992 static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
4993 struct flow_cls_offload *cls_flower,
4994 unsigned long flags)
4996 switch (cls_flower->command) {
4997 case FLOW_CLS_REPLACE:
4998 return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
5000 case FLOW_CLS_DESTROY:
5001 return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
5003 case FLOW_CLS_STATS:
5004 return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
5011 int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
5014 unsigned long flags = MLX5_TC_FLAG(INGRESS);
5015 struct mlx5e_priv *priv = cb_priv;
5017 if (!priv->netdev || !netif_device_present(priv->netdev))
5020 if (mlx5e_is_uplink_rep(priv))
5021 flags |= MLX5_TC_FLAG(ESW_OFFLOAD);
5023 flags |= MLX5_TC_FLAG(NIC_OFFLOAD);
5026 case TC_SETUP_CLSFLOWER:
5027 return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
5033 bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe,
5034 struct sk_buff *skb)
5036 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
5037 u32 chain = 0, chain_tag, reg_b, zone_restore_id;
5038 struct mlx5e_priv *priv = netdev_priv(skb->dev);
5039 struct mlx5e_tc_table *tc = &priv->fs.tc;
5040 struct mlx5_mapped_obj mapped_obj;
5041 struct tc_skb_ext *tc_skb_ext;
5044 reg_b = be32_to_cpu(cqe->ft_metadata);
5046 chain_tag = reg_b & MLX5E_TC_TABLE_CHAIN_TAG_MASK;
5048 err = mapping_find(tc->mapping, chain_tag, &mapped_obj);
5050 netdev_dbg(priv->netdev,
5051 "Couldn't find chain for chain tag: %d, err: %d\n",
5056 if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) {
5057 chain = mapped_obj.chain;
5058 tc_skb_ext = tc_skb_ext_alloc(skb);
5059 if (WARN_ON(!tc_skb_ext))
5062 tc_skb_ext->chain = chain;
5064 zone_restore_id = (reg_b >> REG_MAPPING_MOFFSET(NIC_ZONE_RESTORE_TO_REG)) &
5067 if (!mlx5e_tc_ct_restore_flow(tc->ct, skb,
5071 netdev_dbg(priv->netdev, "Invalid mapped object type: %d\n", mapped_obj.type);
5074 #endif /* CONFIG_NET_TC_SKB_EXT */