1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 // Copyright (c) 2020 Mellanox Technologies.
4 #include <linux/mlx5/driver.h>
5 #include <linux/mlx5/mlx5_ifc.h>
6 #include <linux/mlx5/fs.h>
8 #include "lib/fs_chains.h"
9 #include "fs_ft_pool.h"
10 #include "en/mapping.h"
14 #define chains_lock(chains) ((chains)->lock)
15 #define chains_ht(chains) ((chains)->chains_ht)
16 #define prios_ht(chains) ((chains)->prios_ht)
17 #define tc_default_ft(chains) ((chains)->tc_default_ft)
18 #define tc_end_ft(chains) ((chains)->tc_end_ft)
19 #define ns_to_chains_fs_prio(ns) ((ns) == MLX5_FLOW_NAMESPACE_FDB ? \
20 FDB_TC_OFFLOAD : MLX5E_TC_PRIO)
21 #define FT_TBL_SZ (64 * 1024)
23 struct mlx5_fs_chains {
24 struct mlx5_core_dev *dev;
26 struct rhashtable chains_ht;
27 struct rhashtable prios_ht;
28 /* Protects above chains_ht and prios_ht */
31 struct mlx5_flow_table *tc_default_ft;
32 struct mlx5_flow_table *tc_end_ft;
33 struct mapping_ctx *chains_mapping;
35 enum mlx5_flow_namespace_type ns;
41 struct rhash_head node;
48 struct mlx5_fs_chains *chains;
49 struct list_head prios_list;
50 struct mlx5_flow_handle *restore_rule;
51 struct mlx5_modify_hdr *miss_modify_hdr;
61 struct rhash_head node;
62 struct list_head list;
68 struct fs_chain *chain;
69 struct mlx5_flow_table *ft;
70 struct mlx5_flow_table *next_ft;
71 struct mlx5_flow_group *miss_group;
72 struct mlx5_flow_handle *miss_rule;
75 static const struct rhashtable_params chain_params = {
76 .head_offset = offsetof(struct fs_chain, node),
77 .key_offset = offsetof(struct fs_chain, chain),
78 .key_len = sizeof_field(struct fs_chain, chain),
79 .automatic_shrinking = true,
82 static const struct rhashtable_params prio_params = {
83 .head_offset = offsetof(struct prio, node),
84 .key_offset = offsetof(struct prio, key),
85 .key_len = sizeof_field(struct prio, key),
86 .automatic_shrinking = true,
89 bool mlx5_chains_prios_supported(struct mlx5_fs_chains *chains)
91 return chains->flags & MLX5_CHAINS_AND_PRIOS_SUPPORTED;
94 bool mlx5_chains_ignore_flow_level_supported(struct mlx5_fs_chains *chains)
96 return chains->flags & MLX5_CHAINS_IGNORE_FLOW_LEVEL_SUPPORTED;
99 bool mlx5_chains_backwards_supported(struct mlx5_fs_chains *chains)
101 return mlx5_chains_prios_supported(chains) &&
102 mlx5_chains_ignore_flow_level_supported(chains);
105 u32 mlx5_chains_get_chain_range(struct mlx5_fs_chains *chains)
107 if (!mlx5_chains_prios_supported(chains))
110 if (mlx5_chains_ignore_flow_level_supported(chains))
113 /* We should get here only for eswitch case */
114 return FDB_TC_MAX_CHAIN;
117 u32 mlx5_chains_get_nf_ft_chain(struct mlx5_fs_chains *chains)
119 return mlx5_chains_get_chain_range(chains) + 1;
122 u32 mlx5_chains_get_prio_range(struct mlx5_fs_chains *chains)
124 if (mlx5_chains_ignore_flow_level_supported(chains))
127 if (!chains->dev->priv.eswitch ||
128 chains->dev->priv.eswitch->mode != MLX5_ESWITCH_OFFLOADS)
131 /* We should get here only for eswitch case */
132 return FDB_TC_MAX_PRIO;
135 static unsigned int mlx5_chains_get_level_range(struct mlx5_fs_chains *chains)
137 if (mlx5_chains_ignore_flow_level_supported(chains))
140 /* Same value for FDB and NIC RX tables */
141 return FDB_TC_LEVELS_PER_PRIO;
145 mlx5_chains_set_end_ft(struct mlx5_fs_chains *chains,
146 struct mlx5_flow_table *ft)
148 tc_end_ft(chains) = ft;
151 static struct mlx5_flow_table *
152 mlx5_chains_create_table(struct mlx5_fs_chains *chains,
153 u32 chain, u32 prio, u32 level)
155 struct mlx5_flow_table_attr ft_attr = {};
156 struct mlx5_flow_namespace *ns;
157 struct mlx5_flow_table *ft;
160 if (chains->flags & MLX5_CHAINS_FT_TUNNEL_SUPPORTED)
161 ft_attr.flags |= (MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT |
162 MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
164 sz = (chain == mlx5_chains_get_nf_ft_chain(chains)) ? FT_TBL_SZ : POOL_NEXT_SIZE;
165 ft_attr.max_fte = sz;
167 /* We use tc_default_ft(chains) as the table's next_ft till
168 * ignore_flow_level is allowed on FT creation and not just for FTEs.
169 * Instead caller should add an explicit miss rule if needed.
171 ft_attr.next_ft = tc_default_ft(chains);
173 /* The root table(chain 0, prio 1, level 0) is required to be
174 * connected to the previous fs_core managed prio.
175 * We always create it, as a managed table, in order to align with
178 if (!mlx5_chains_ignore_flow_level_supported(chains) ||
179 (chain == 0 && prio == 1 && level == 0)) {
180 ft_attr.level = level;
181 ft_attr.prio = prio - 1;
182 ns = (chains->ns == MLX5_FLOW_NAMESPACE_FDB) ?
183 mlx5_get_fdb_sub_ns(chains->dev, chain) :
184 mlx5_get_flow_namespace(chains->dev, chains->ns);
186 ft_attr.flags |= MLX5_FLOW_TABLE_UNMANAGED;
187 ft_attr.prio = ns_to_chains_fs_prio(chains->ns);
188 /* Firmware doesn't allow us to create another level 0 table,
189 * so we create all unmanaged tables as level 1.
191 * To connect them, we use explicit miss rules with
192 * ignore_flow_level. Caller is responsible to create
193 * these rules (if needed).
196 ns = mlx5_get_flow_namespace(chains->dev, chains->ns);
199 ft_attr.autogroup.num_reserved_entries = 2;
200 ft_attr.autogroup.max_num_groups = chains->group_num;
201 ft = mlx5_create_auto_grouped_flow_table(ns, &ft_attr);
203 mlx5_core_warn(chains->dev, "Failed to create chains table err %d (chain: %d, prio: %d, level: %d, size: %d)\n",
204 (int)PTR_ERR(ft), chain, prio, level, sz);
212 create_chain_restore(struct fs_chain *chain)
214 struct mlx5_eswitch *esw = chain->chains->dev->priv.eswitch;
215 u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
216 struct mlx5_fs_chains *chains = chain->chains;
217 enum mlx5e_tc_attr_to_reg chain_to_reg;
218 struct mlx5_modify_hdr *mod_hdr;
222 if (chain->chain == mlx5_chains_get_nf_ft_chain(chains) ||
223 !mlx5_chains_prios_supported(chains))
226 err = mlx5_chains_get_chain_mapping(chains, chain->chain, &index);
229 if (index == MLX5_FS_DEFAULT_FLOW_TAG) {
230 /* we got the special default flow tag id, so we won't know
231 * if we actually marked the packet with the restore rule
234 * This case isn't possible with MLX5_FS_DEFAULT_FLOW_TAG = 0.
236 err = mlx5_chains_get_chain_mapping(chains, chain->chain, &index);
237 mapping_remove(chains->chains_mapping, MLX5_FS_DEFAULT_FLOW_TAG);
244 if (chains->ns == MLX5_FLOW_NAMESPACE_FDB) {
245 chain_to_reg = CHAIN_TO_REG;
246 chain->restore_rule = esw_add_restore_rule(esw, chain->id);
247 if (IS_ERR(chain->restore_rule)) {
248 err = PTR_ERR(chain->restore_rule);
251 } else if (chains->ns == MLX5_FLOW_NAMESPACE_KERNEL) {
252 /* For NIC RX we don't need a restore rule
253 * since we write the metadata to reg_b
254 * that is passed to SW directly.
256 chain_to_reg = NIC_CHAIN_TO_REG;
262 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
263 MLX5_SET(set_action_in, modact, field,
264 mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mfield);
265 MLX5_SET(set_action_in, modact, offset,
266 mlx5e_tc_attr_to_reg_mappings[chain_to_reg].moffset);
267 MLX5_SET(set_action_in, modact, length,
268 mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mlen == 32 ?
269 0 : mlx5e_tc_attr_to_reg_mappings[chain_to_reg].mlen);
270 MLX5_SET(set_action_in, modact, data, chain->id);
271 mod_hdr = mlx5_modify_header_alloc(chains->dev, chains->ns,
273 if (IS_ERR(mod_hdr)) {
274 err = PTR_ERR(mod_hdr);
277 chain->miss_modify_hdr = mod_hdr;
282 if (!IS_ERR_OR_NULL(chain->restore_rule))
283 mlx5_del_flow_rules(chain->restore_rule);
285 /* Datapath can't find this mapping, so we can safely remove it */
286 mapping_remove(chains->chains_mapping, chain->id);
290 static void destroy_chain_restore(struct fs_chain *chain)
292 struct mlx5_fs_chains *chains = chain->chains;
294 if (!chain->miss_modify_hdr)
297 if (chain->restore_rule)
298 mlx5_del_flow_rules(chain->restore_rule);
300 mlx5_modify_header_dealloc(chains->dev, chain->miss_modify_hdr);
301 mapping_remove(chains->chains_mapping, chain->id);
304 static struct fs_chain *
305 mlx5_chains_create_chain(struct mlx5_fs_chains *chains, u32 chain)
307 struct fs_chain *chain_s = NULL;
310 chain_s = kvzalloc(sizeof(*chain_s), GFP_KERNEL);
312 return ERR_PTR(-ENOMEM);
314 chain_s->chains = chains;
315 chain_s->chain = chain;
316 INIT_LIST_HEAD(&chain_s->prios_list);
318 err = create_chain_restore(chain_s);
322 err = rhashtable_insert_fast(&chains_ht(chains), &chain_s->node,
330 destroy_chain_restore(chain_s);
337 mlx5_chains_destroy_chain(struct fs_chain *chain)
339 struct mlx5_fs_chains *chains = chain->chains;
341 rhashtable_remove_fast(&chains_ht(chains), &chain->node,
344 destroy_chain_restore(chain);
348 static struct fs_chain *
349 mlx5_chains_get_chain(struct mlx5_fs_chains *chains, u32 chain)
351 struct fs_chain *chain_s;
353 chain_s = rhashtable_lookup_fast(&chains_ht(chains), &chain,
356 chain_s = mlx5_chains_create_chain(chains, chain);
366 static struct mlx5_flow_handle *
367 mlx5_chains_add_miss_rule(struct fs_chain *chain,
368 struct mlx5_flow_table *ft,
369 struct mlx5_flow_table *next_ft)
371 struct mlx5_fs_chains *chains = chain->chains;
372 struct mlx5_flow_destination dest = {};
373 struct mlx5_flow_act act = {};
375 act.flags = FLOW_ACT_NO_APPEND;
376 if (mlx5_chains_ignore_flow_level_supported(chain->chains))
377 act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
379 act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
380 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
383 if (next_ft == tc_end_ft(chains) &&
384 chain->chain != mlx5_chains_get_nf_ft_chain(chains) &&
385 mlx5_chains_prios_supported(chains)) {
386 act.modify_hdr = chain->miss_modify_hdr;
387 act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
390 return mlx5_add_flow_rules(ft, NULL, &act, &dest, 1);
394 mlx5_chains_update_prio_prevs(struct prio *prio,
395 struct mlx5_flow_table *next_ft)
397 struct mlx5_flow_handle *miss_rules[FDB_TC_LEVELS_PER_PRIO + 1] = {};
398 struct fs_chain *chain = prio->chain;
405 /* Iterate in reverse order until reaching the level 0 rule of
406 * the previous priority, adding all the miss rules first, so we can
407 * revert them if any of them fails.
410 list_for_each_entry_continue_reverse(pos,
413 miss_rules[n] = mlx5_chains_add_miss_rule(chain,
416 if (IS_ERR(miss_rules[n])) {
417 err = PTR_ERR(miss_rules[n]);
426 /* Success, delete old miss rules, and update the pointers. */
429 list_for_each_entry_continue_reverse(pos,
432 mlx5_del_flow_rules(pos->miss_rule);
434 pos->miss_rule = miss_rules[n];
435 pos->next_ft = next_ft;
446 mlx5_del_flow_rules(miss_rules[n]);
452 mlx5_chains_put_chain(struct fs_chain *chain)
454 if (--chain->ref == 0)
455 mlx5_chains_destroy_chain(chain);
459 mlx5_chains_create_prio(struct mlx5_fs_chains *chains,
460 u32 chain, u32 prio, u32 level)
462 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
463 struct mlx5_flow_handle *miss_rule;
464 struct mlx5_flow_group *miss_group;
465 struct mlx5_flow_table *next_ft;
466 struct mlx5_flow_table *ft;
467 struct fs_chain *chain_s;
468 struct list_head *pos;
473 chain_s = mlx5_chains_get_chain(chains, chain);
475 return ERR_CAST(chain_s);
477 prio_s = kvzalloc(sizeof(*prio_s), GFP_KERNEL);
478 flow_group_in = kvzalloc(inlen, GFP_KERNEL);
479 if (!prio_s || !flow_group_in) {
484 /* Chain's prio list is sorted by prio and level.
485 * And all levels of some prio point to the next prio's level 0.
486 * Example list (prio, level):
487 * (3,0)->(3,1)->(5,0)->(5,1)->(6,1)->(7,0)
488 * In hardware, we will we have the following pointers:
489 * (3,0) -> (5,0) -> (7,0) -> Slow path
495 /* Default miss for each chain: */
496 next_ft = (chain == mlx5_chains_get_nf_ft_chain(chains)) ?
497 tc_default_ft(chains) :
499 list_for_each(pos, &chain_s->prios_list) {
500 struct prio *p = list_entry(pos, struct prio, list);
502 /* exit on first pos that is larger */
503 if (prio < p->key.prio || (prio == p->key.prio &&
504 level < p->key.level)) {
505 /* Get next level 0 table */
506 next_ft = p->key.level == 0 ? p->ft : p->next_ft;
511 ft = mlx5_chains_create_table(chains, chain, prio, level);
517 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index,
519 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index,
521 miss_group = mlx5_create_flow_group(ft, flow_group_in);
522 if (IS_ERR(miss_group)) {
523 err = PTR_ERR(miss_group);
527 /* Add miss rule to next_ft */
528 miss_rule = mlx5_chains_add_miss_rule(chain_s, ft, next_ft);
529 if (IS_ERR(miss_rule)) {
530 err = PTR_ERR(miss_rule);
534 prio_s->miss_group = miss_group;
535 prio_s->miss_rule = miss_rule;
536 prio_s->next_ft = next_ft;
537 prio_s->chain = chain_s;
538 prio_s->key.chain = chain;
539 prio_s->key.prio = prio;
540 prio_s->key.level = level;
543 err = rhashtable_insert_fast(&prios_ht(chains), &prio_s->node,
548 list_add(&prio_s->list, pos->prev);
550 /* Table is ready, connect it */
551 err = mlx5_chains_update_prio_prevs(prio_s, ft);
555 kvfree(flow_group_in);
559 list_del(&prio_s->list);
560 rhashtable_remove_fast(&prios_ht(chains), &prio_s->node,
563 mlx5_del_flow_rules(miss_rule);
565 mlx5_destroy_flow_group(miss_group);
567 mlx5_destroy_flow_table(ft);
571 kvfree(flow_group_in);
572 mlx5_chains_put_chain(chain_s);
577 mlx5_chains_destroy_prio(struct mlx5_fs_chains *chains,
580 struct fs_chain *chain = prio->chain;
582 WARN_ON(mlx5_chains_update_prio_prevs(prio,
585 list_del(&prio->list);
586 rhashtable_remove_fast(&prios_ht(chains), &prio->node,
588 mlx5_del_flow_rules(prio->miss_rule);
589 mlx5_destroy_flow_group(prio->miss_group);
590 mlx5_destroy_flow_table(prio->ft);
591 mlx5_chains_put_chain(chain);
595 struct mlx5_flow_table *
596 mlx5_chains_get_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
599 struct mlx5_flow_table *prev_fts;
604 if ((chain > mlx5_chains_get_chain_range(chains) &&
605 chain != mlx5_chains_get_nf_ft_chain(chains)) ||
606 prio > mlx5_chains_get_prio_range(chains) ||
607 level > mlx5_chains_get_level_range(chains))
608 return ERR_PTR(-EOPNOTSUPP);
610 /* create earlier levels for correct fs_core lookup when
613 for (l = 0; l < level; l++) {
614 prev_fts = mlx5_chains_get_table(chains, chain, prio, l);
615 if (IS_ERR(prev_fts)) {
616 prio_s = ERR_CAST(prev_fts);
625 mutex_lock(&chains_lock(chains));
626 prio_s = rhashtable_lookup_fast(&prios_ht(chains), &key,
629 prio_s = mlx5_chains_create_prio(chains, chain,
632 goto err_create_prio;
636 mutex_unlock(&chains_lock(chains));
641 mutex_unlock(&chains_lock(chains));
644 mlx5_chains_put_table(chains, chain, prio, l);
645 return ERR_CAST(prio_s);
649 mlx5_chains_put_table(struct mlx5_fs_chains *chains, u32 chain, u32 prio,
659 mutex_lock(&chains_lock(chains));
660 prio_s = rhashtable_lookup_fast(&prios_ht(chains), &key,
665 if (--prio_s->ref == 0)
666 mlx5_chains_destroy_prio(chains, prio_s);
667 mutex_unlock(&chains_lock(chains));
670 mlx5_chains_put_table(chains, chain, prio, level);
675 mutex_unlock(&chains_lock(chains));
677 "Couldn't find table: (chain: %d prio: %d level: %d)",
681 struct mlx5_flow_table *
682 mlx5_chains_get_tc_end_ft(struct mlx5_fs_chains *chains)
684 return tc_end_ft(chains);
687 struct mlx5_flow_table *
688 mlx5_chains_create_global_table(struct mlx5_fs_chains *chains)
690 u32 chain, prio, level;
693 if (!mlx5_chains_ignore_flow_level_supported(chains)) {
696 mlx5_core_warn(chains->dev,
697 "Couldn't create global flow table, ignore_flow_level not supported.");
701 chain = mlx5_chains_get_chain_range(chains),
702 prio = mlx5_chains_get_prio_range(chains);
703 level = mlx5_chains_get_level_range(chains);
705 return mlx5_chains_create_table(chains, chain, prio, level);
712 mlx5_chains_destroy_global_table(struct mlx5_fs_chains *chains,
713 struct mlx5_flow_table *ft)
715 mlx5_destroy_flow_table(ft);
718 static struct mlx5_fs_chains *
719 mlx5_chains_init(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr)
721 struct mlx5_fs_chains *chains_priv;
722 u32 max_flow_counter;
725 chains_priv = kzalloc(sizeof(*chains_priv), GFP_KERNEL);
727 return ERR_PTR(-ENOMEM);
729 max_flow_counter = (MLX5_CAP_GEN(dev, max_flow_counter_31_16) << 16) |
730 MLX5_CAP_GEN(dev, max_flow_counter_15_0);
733 "Init flow table chains, max counters(%d), groups(%d), max flow table size(%d)\n",
734 max_flow_counter, attr->max_grp_num, attr->max_ft_sz);
736 chains_priv->dev = dev;
737 chains_priv->flags = attr->flags;
738 chains_priv->ns = attr->ns;
739 chains_priv->group_num = attr->max_grp_num;
740 chains_priv->chains_mapping = attr->mapping;
741 tc_default_ft(chains_priv) = tc_end_ft(chains_priv) = attr->default_ft;
743 mlx5_core_info(dev, "Supported tc offload range - chains: %u, prios: %u\n",
744 mlx5_chains_get_chain_range(chains_priv),
745 mlx5_chains_get_prio_range(chains_priv));
747 err = rhashtable_init(&chains_ht(chains_priv), &chain_params);
749 goto init_chains_ht_err;
751 err = rhashtable_init(&prios_ht(chains_priv), &prio_params);
753 goto init_prios_ht_err;
755 mutex_init(&chains_lock(chains_priv));
760 rhashtable_destroy(&chains_ht(chains_priv));
767 mlx5_chains_cleanup(struct mlx5_fs_chains *chains)
769 mutex_destroy(&chains_lock(chains));
770 rhashtable_destroy(&prios_ht(chains));
771 rhashtable_destroy(&chains_ht(chains));
776 struct mlx5_fs_chains *
777 mlx5_chains_create(struct mlx5_core_dev *dev, struct mlx5_chains_attr *attr)
779 struct mlx5_fs_chains *chains;
781 chains = mlx5_chains_init(dev, attr);
787 mlx5_chains_destroy(struct mlx5_fs_chains *chains)
789 mlx5_chains_cleanup(chains);
793 mlx5_chains_get_chain_mapping(struct mlx5_fs_chains *chains, u32 chain,
796 struct mapping_ctx *ctx = chains->chains_mapping;
797 struct mlx5_mapped_obj mapped_obj = {};
799 mapped_obj.type = MLX5_MAPPED_OBJ_CHAIN;
800 mapped_obj.chain = chain;
801 return mapping_add(ctx, &mapped_obj, chain_mapping);
805 mlx5_chains_put_chain_mapping(struct mlx5_fs_chains *chains, u32 chain_mapping)
807 struct mapping_ctx *ctx = chains->chains_mapping;
809 return mapping_remove(ctx, chain_mapping);