1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2021 Mellanox Technologies. */
4 #include <linux/skbuff.h>
5 #include <net/psample.h>
6 #include "en/mapping.h"
7 #include "esw/sample.h"
12 #define MLX5_ESW_VPORT_TBL_SIZE_SAMPLE (64 * 1024)
14 static const struct esw_vport_tbl_namespace mlx5_esw_vport_tbl_sample_ns = {
15 .max_fte = MLX5_ESW_VPORT_TBL_SIZE_SAMPLE,
16 .max_num_groups = 0, /* default num of groups */
17 .flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT | MLX5_FLOW_TABLE_TUNNEL_EN_DECAP,
20 struct mlx5_esw_psample {
21 struct mlx5e_priv *priv;
22 struct mlx5_flow_table *termtbl;
23 struct mlx5_flow_handle *termtbl_rule;
24 DECLARE_HASHTABLE(hashtbl, 8);
25 struct mutex ht_lock; /* protect hashtbl */
26 DECLARE_HASHTABLE(restore_hashtbl, 8);
27 struct mutex restore_lock; /* protect restore_hashtbl */
31 struct hlist_node hlist;
39 struct mlx5_sample_flow {
40 struct mlx5_sampler *sampler;
41 struct mlx5_sample_restore *restore;
42 struct mlx5_flow_attr *pre_attr;
43 struct mlx5_flow_handle *pre_rule;
44 struct mlx5_flow_handle *rule;
47 struct mlx5_sample_restore {
48 struct hlist_node hlist;
49 struct mlx5_modify_hdr *modify_hdr;
50 struct mlx5_flow_handle *rule;
56 sampler_termtbl_create(struct mlx5_esw_psample *esw_psample)
58 struct mlx5_core_dev *dev = esw_psample->priv->mdev;
59 struct mlx5_eswitch *esw = dev->priv.eswitch;
60 struct mlx5_flow_table_attr ft_attr = {};
61 struct mlx5_flow_destination dest = {};
62 struct mlx5_flow_namespace *root_ns;
63 struct mlx5_flow_act act = {};
66 if (!MLX5_CAP_ESW_FLOWTABLE_FDB(dev, termination_table)) {
67 mlx5_core_warn(dev, "termination table is not supported\n");
71 root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
73 mlx5_core_warn(dev, "failed to get FDB flow namespace\n");
77 ft_attr.flags = MLX5_FLOW_TABLE_TERMINATION | MLX5_FLOW_TABLE_UNMANAGED;
78 ft_attr.autogroup.max_num_groups = 1;
79 ft_attr.prio = FDB_SLOW_PATH;
82 esw_psample->termtbl = mlx5_create_auto_grouped_flow_table(root_ns, &ft_attr);
83 if (IS_ERR(esw_psample->termtbl)) {
84 err = PTR_ERR(esw_psample->termtbl);
85 mlx5_core_warn(dev, "failed to create termtbl, err: %d\n", err);
89 act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
90 dest.vport.num = esw->manager_vport;
91 esw_psample->termtbl_rule = mlx5_add_flow_rules(esw_psample->termtbl, NULL, &act, &dest, 1);
92 if (IS_ERR(esw_psample->termtbl_rule)) {
93 err = PTR_ERR(esw_psample->termtbl_rule);
94 mlx5_core_warn(dev, "failed to create termtbl rule, err: %d\n", err);
95 mlx5_destroy_flow_table(esw_psample->termtbl);
103 sampler_termtbl_destroy(struct mlx5_esw_psample *esw_psample)
105 mlx5_del_flow_rules(esw_psample->termtbl_rule);
106 mlx5_destroy_flow_table(esw_psample->termtbl);
110 sampler_obj_create(struct mlx5_core_dev *mdev, struct mlx5_sampler *sampler)
112 u32 in[MLX5_ST_SZ_DW(create_sampler_obj_in)] = {};
113 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
114 u64 general_obj_types;
118 general_obj_types = MLX5_CAP_GEN_64(mdev, general_obj_types);
119 if (!(general_obj_types & MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_SAMPLER))
121 if (!MLX5_CAP_ESW_FLOWTABLE_FDB(mdev, ignore_flow_level))
124 obj = MLX5_ADDR_OF(create_sampler_obj_in, in, sampler_object);
125 MLX5_SET(sampler_obj, obj, table_type, FS_FT_FDB);
126 MLX5_SET(sampler_obj, obj, ignore_flow_level, 1);
127 MLX5_SET(sampler_obj, obj, level, 1);
128 MLX5_SET(sampler_obj, obj, sample_ratio, sampler->sample_ratio);
129 MLX5_SET(sampler_obj, obj, sample_table_id, sampler->sample_table_id);
130 MLX5_SET(sampler_obj, obj, default_table_id, sampler->default_table_id);
131 MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_CREATE_GENERAL_OBJECT);
132 MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_SAMPLER);
134 err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
136 sampler->sampler_id = MLX5_GET(general_obj_out_cmd_hdr, out, obj_id);
142 sampler_obj_destroy(struct mlx5_core_dev *mdev, u32 sampler_id)
144 u32 in[MLX5_ST_SZ_DW(general_obj_in_cmd_hdr)] = {};
145 u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)];
147 MLX5_SET(general_obj_in_cmd_hdr, in, opcode, MLX5_CMD_OP_DESTROY_GENERAL_OBJECT);
148 MLX5_SET(general_obj_in_cmd_hdr, in, obj_type, MLX5_GENERAL_OBJECT_TYPES_SAMPLER);
149 MLX5_SET(general_obj_in_cmd_hdr, in, obj_id, sampler_id);
151 mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
155 sampler_hash(u32 sample_ratio, u32 default_table_id)
157 return jhash_2words(sample_ratio, default_table_id, 0);
161 sampler_cmp(u32 sample_ratio1, u32 default_table_id1, u32 sample_ratio2, u32 default_table_id2)
163 return sample_ratio1 != sample_ratio2 || default_table_id1 != default_table_id2;
166 static struct mlx5_sampler *
167 sampler_get(struct mlx5_esw_psample *esw_psample, u32 sample_ratio, u32 default_table_id)
169 struct mlx5_sampler *sampler;
173 mutex_lock(&esw_psample->ht_lock);
174 hash_key = sampler_hash(sample_ratio, default_table_id);
175 hash_for_each_possible(esw_psample->hashtbl, sampler, hlist, hash_key)
176 if (!sampler_cmp(sampler->sample_ratio, sampler->default_table_id,
177 sample_ratio, default_table_id))
180 sampler = kzalloc(sizeof(*sampler), GFP_KERNEL);
186 sampler->sample_table_id = esw_psample->termtbl->id;
187 sampler->default_table_id = default_table_id;
188 sampler->sample_ratio = sample_ratio;
190 err = sampler_obj_create(esw_psample->priv->mdev, sampler);
194 hash_add(esw_psample->hashtbl, &sampler->hlist, hash_key);
198 mutex_unlock(&esw_psample->ht_lock);
204 mutex_unlock(&esw_psample->ht_lock);
209 sampler_put(struct mlx5_esw_psample *esw_psample, struct mlx5_sampler *sampler)
211 mutex_lock(&esw_psample->ht_lock);
212 if (--sampler->count == 0) {
213 hash_del(&sampler->hlist);
214 sampler_obj_destroy(esw_psample->priv->mdev, sampler->sampler_id);
217 mutex_unlock(&esw_psample->ht_lock);
220 static struct mlx5_modify_hdr *
221 sample_metadata_rule_get(struct mlx5_core_dev *mdev, u32 obj_id)
223 struct mlx5e_tc_mod_hdr_acts mod_acts = {};
224 struct mlx5_modify_hdr *modify_hdr;
227 err = mlx5e_tc_match_to_reg_set(mdev, &mod_acts, MLX5_FLOW_NAMESPACE_FDB,
228 CHAIN_TO_REG, obj_id);
232 modify_hdr = mlx5_modify_header_alloc(mdev, MLX5_FLOW_NAMESPACE_FDB,
233 mod_acts.num_actions,
235 if (IS_ERR(modify_hdr)) {
236 err = PTR_ERR(modify_hdr);
240 dealloc_mod_hdr_actions(&mod_acts);
244 dealloc_mod_hdr_actions(&mod_acts);
249 static struct mlx5_sample_restore *
250 sample_restore_get(struct mlx5_esw_psample *esw_psample, u32 obj_id)
252 struct mlx5_core_dev *mdev = esw_psample->priv->mdev;
253 struct mlx5_eswitch *esw = mdev->priv.eswitch;
254 struct mlx5_sample_restore *restore;
255 struct mlx5_modify_hdr *modify_hdr;
258 mutex_lock(&esw_psample->restore_lock);
259 hash_for_each_possible(esw_psample->restore_hashtbl, restore, hlist, obj_id)
260 if (restore->obj_id == obj_id)
263 restore = kzalloc(sizeof(*restore), GFP_KERNEL);
268 restore->obj_id = obj_id;
270 modify_hdr = sample_metadata_rule_get(mdev, obj_id);
271 if (IS_ERR(modify_hdr)) {
272 err = PTR_ERR(modify_hdr);
275 restore->modify_hdr = modify_hdr;
277 restore->rule = esw_add_restore_rule(esw, obj_id);
278 if (IS_ERR(restore->rule)) {
279 err = PTR_ERR(restore->rule);
283 hash_add(esw_psample->restore_hashtbl, &restore->hlist, obj_id);
286 mutex_unlock(&esw_psample->restore_lock);
290 mlx5_modify_header_dealloc(mdev, restore->modify_hdr);
294 mutex_unlock(&esw_psample->restore_lock);
299 sample_restore_put(struct mlx5_esw_psample *esw_psample, struct mlx5_sample_restore *restore)
301 mutex_lock(&esw_psample->restore_lock);
302 if (--restore->count == 0)
303 hash_del(&restore->hlist);
304 mutex_unlock(&esw_psample->restore_lock);
306 if (!restore->count) {
307 mlx5_del_flow_rules(restore->rule);
308 mlx5_modify_header_dealloc(esw_psample->priv->mdev, restore->modify_hdr);
313 void mlx5_esw_sample_skb(struct sk_buff *skb, struct mlx5_mapped_obj *mapped_obj)
315 u32 trunc_size = mapped_obj->sample.trunc_size;
316 struct psample_group psample_group = {};
317 struct psample_metadata md = {};
319 md.trunc_size = trunc_size ? min(trunc_size, skb->len) : skb->len;
320 md.in_ifindex = skb->dev->ifindex;
321 psample_group.group_num = mapped_obj->sample.group_id;
322 psample_group.net = &init_net;
323 skb_push(skb, skb->mac_len);
325 psample_sample_packet(&psample_group, skb, mapped_obj->sample.rate, &md);
328 /* For the following typical flow table:
330 * +-------------------------------+
331 * + original flow table +
332 * +-------------------------------+
334 * +-------------------------------+
335 * + sample action + other actions +
336 * +-------------------------------+
338 * We translate the tc filter with sample action to the following HW model:
340 * +---------------------+
341 * + original flow table +
342 * +---------------------+
344 * +---------------------+
347 * +------------------------------------------------+
348 * + Flow Sampler Object +
349 * +------------------------------------------------+
351 * +------------------------------------------------+
352 * + sample table id | default table id +
353 * +------------------------------------------------+
356 * +-----------------------------+ +----------------------------------------+
357 * + sample table + + default table per <vport, chain, prio> +
358 * +-----------------------------+ +----------------------------------------+
359 * + forward to management vport + + original match +
360 * +-----------------------------+ +----------------------------------------+
362 * +----------------------------------------+
364 struct mlx5_flow_handle *
365 mlx5_esw_sample_offload(struct mlx5_esw_psample *esw_psample,
366 struct mlx5_flow_spec *spec,
367 struct mlx5_flow_attr *attr)
369 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
370 struct mlx5_vport_tbl_attr per_vport_tbl_attr;
371 struct mlx5_esw_flow_attr *pre_esw_attr;
372 struct mlx5_mapped_obj restore_obj = {};
373 struct mlx5_sample_flow *sample_flow;
374 struct mlx5_sample_attr *sample_attr;
375 struct mlx5_flow_table *default_tbl;
376 struct mlx5_flow_attr *pre_attr;
377 struct mlx5_eswitch *esw;
381 if (IS_ERR_OR_NULL(esw_psample))
382 return ERR_PTR(-EOPNOTSUPP);
384 /* If slow path flag is set, eg. when the neigh is invalid for encap,
385 * don't offload sample action.
387 esw = esw_psample->priv->mdev->priv.eswitch;
388 if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH)
389 return mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
391 sample_flow = kzalloc(sizeof(*sample_flow), GFP_KERNEL);
393 return ERR_PTR(-ENOMEM);
394 esw_attr->sample->sample_flow = sample_flow;
396 /* Allocate default table per vport, chain and prio. Otherwise, there is
397 * only one default table for the same sampler object. Rules with different
398 * prio and chain may overlap. For CT sample action, per vport default
399 * table is needed to resotre the metadata.
401 per_vport_tbl_attr.chain = attr->chain;
402 per_vport_tbl_attr.prio = attr->prio;
403 per_vport_tbl_attr.vport = esw_attr->in_rep->vport;
404 per_vport_tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns;
405 default_tbl = mlx5_esw_vporttbl_get(esw, &per_vport_tbl_attr);
406 if (IS_ERR(default_tbl)) {
407 err = PTR_ERR(default_tbl);
408 goto err_default_tbl;
411 /* Perform the original matches on the default table.
412 * Offload all actions except the sample action.
414 esw_attr->sample->sample_default_tbl = default_tbl;
415 /* When offloading sample and encap action, if there is no valid
416 * neigh data struct, a slow path rule is offloaded first. Source
417 * port metadata match is set at that time. A per vport table is
418 * already allocated. No need to match it again. So clear the source
419 * port metadata match.
421 mlx5_eswitch_clear_rule_source_port(esw, spec);
422 sample_flow->rule = mlx5_eswitch_add_offloaded_rule(esw, spec, attr);
423 if (IS_ERR(sample_flow->rule)) {
424 err = PTR_ERR(sample_flow->rule);
425 goto err_offload_rule;
428 /* Create sampler object. */
429 sample_flow->sampler = sampler_get(esw_psample, esw_attr->sample->rate, default_tbl->id);
430 if (IS_ERR(sample_flow->sampler)) {
431 err = PTR_ERR(sample_flow->sampler);
435 /* Create an id mapping reg_c0 value to sample object. */
436 restore_obj.type = MLX5_MAPPED_OBJ_SAMPLE;
437 restore_obj.sample.group_id = esw_attr->sample->group_num;
438 restore_obj.sample.rate = esw_attr->sample->rate;
439 restore_obj.sample.trunc_size = esw_attr->sample->trunc_size;
440 err = mapping_add(esw->offloads.reg_c0_obj_pool, &restore_obj, &obj_id);
443 esw_attr->sample->restore_obj_id = obj_id;
445 /* Create sample restore context. */
446 sample_flow->restore = sample_restore_get(esw_psample, obj_id);
447 if (IS_ERR(sample_flow->restore)) {
448 err = PTR_ERR(sample_flow->restore);
449 goto err_sample_restore;
452 /* Perform the original matches on the original table. Offload the
453 * sample action. The destination is the sampler object.
455 pre_attr = mlx5_alloc_flow_attr(MLX5_FLOW_NAMESPACE_FDB);
458 goto err_alloc_flow_attr;
460 sample_attr = kzalloc(sizeof(*sample_attr), GFP_KERNEL);
463 goto err_alloc_sample_attr;
465 pre_esw_attr = pre_attr->esw_attr;
466 pre_attr->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST | MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
467 pre_attr->modify_hdr = sample_flow->restore->modify_hdr;
468 pre_attr->flags = MLX5_ESW_ATTR_FLAG_SAMPLE;
469 pre_attr->chain = attr->chain;
470 pre_attr->prio = attr->prio;
471 pre_esw_attr->sample = sample_attr;
472 pre_esw_attr->sample->sampler_id = sample_flow->sampler->sampler_id;
473 pre_esw_attr->in_mdev = esw_attr->in_mdev;
474 pre_esw_attr->in_rep = esw_attr->in_rep;
475 sample_flow->pre_rule = mlx5_eswitch_add_offloaded_rule(esw, spec, pre_attr);
476 if (IS_ERR(sample_flow->pre_rule)) {
477 err = PTR_ERR(sample_flow->pre_rule);
478 goto err_pre_offload_rule;
480 sample_flow->pre_attr = pre_attr;
482 return sample_flow->rule;
484 err_pre_offload_rule:
486 err_alloc_sample_attr:
489 sample_restore_put(esw_psample, sample_flow->restore);
491 mapping_remove(esw->offloads.reg_c0_obj_pool, obj_id);
493 sampler_put(esw_psample, sample_flow->sampler);
495 /* For sample offload, rule is added in default_tbl. No need to call
496 * mlx5_esw_chains_put_table()
500 mlx5_eswitch_del_offloaded_rule(esw, sample_flow->rule, attr);
502 mlx5_esw_vporttbl_put(esw, &per_vport_tbl_attr);
509 mlx5_esw_sample_unoffload(struct mlx5_esw_psample *esw_psample,
510 struct mlx5_flow_handle *rule,
511 struct mlx5_flow_attr *attr)
513 struct mlx5_esw_flow_attr *esw_attr = attr->esw_attr;
514 struct mlx5_sample_flow *sample_flow;
515 struct mlx5_vport_tbl_attr tbl_attr;
516 struct mlx5_flow_attr *pre_attr;
517 struct mlx5_eswitch *esw;
519 if (IS_ERR_OR_NULL(esw_psample))
522 /* If slow path flag is set, sample action is not offloaded.
523 * No need to delete sample rule.
525 esw = esw_psample->priv->mdev->priv.eswitch;
526 if (attr->flags & MLX5_ESW_ATTR_FLAG_SLOW_PATH) {
527 mlx5_eswitch_del_offloaded_rule(esw, rule, attr);
531 sample_flow = esw_attr->sample->sample_flow;
532 pre_attr = sample_flow->pre_attr;
533 memset(pre_attr, 0, sizeof(*pre_attr));
534 esw = esw_psample->priv->mdev->priv.eswitch;
535 mlx5_eswitch_del_offloaded_rule(esw, sample_flow->pre_rule, pre_attr);
536 mlx5_eswitch_del_offloaded_rule(esw, sample_flow->rule, attr);
538 sample_restore_put(esw_psample, sample_flow->restore);
539 mapping_remove(esw->offloads.reg_c0_obj_pool, esw_attr->sample->restore_obj_id);
540 sampler_put(esw_psample, sample_flow->sampler);
541 tbl_attr.chain = attr->chain;
542 tbl_attr.prio = attr->prio;
543 tbl_attr.vport = esw_attr->in_rep->vport;
544 tbl_attr.vport_ns = &mlx5_esw_vport_tbl_sample_ns;
545 mlx5_esw_vporttbl_put(esw, &tbl_attr);
547 kfree(pre_attr->esw_attr->sample);
552 struct mlx5_esw_psample *
553 mlx5_esw_sample_init(struct mlx5e_priv *priv)
555 struct mlx5_esw_psample *esw_psample;
558 esw_psample = kzalloc(sizeof(*esw_psample), GFP_KERNEL);
560 return ERR_PTR(-ENOMEM);
561 esw_psample->priv = priv;
562 err = sampler_termtbl_create(esw_psample);
566 mutex_init(&esw_psample->ht_lock);
567 mutex_init(&esw_psample->restore_lock);
577 mlx5_esw_sample_cleanup(struct mlx5_esw_psample *esw_psample)
579 if (IS_ERR_OR_NULL(esw_psample))
582 mutex_destroy(&esw_psample->restore_lock);
583 mutex_destroy(&esw_psample->ht_lock);
584 sampler_termtbl_destroy(esw_psample);