1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
4 #include <net/netfilter/nf_conntrack.h>
5 #include <net/netfilter/nf_conntrack_core.h>
6 #include <net/netfilter/nf_conntrack_zones.h>
7 #include <net/netfilter/nf_conntrack_labels.h>
8 #include <net/netfilter/nf_conntrack_helper.h>
9 #include <net/netfilter/nf_conntrack_acct.h>
10 #include <uapi/linux/tc_act/tc_pedit.h>
11 #include <net/tc_act/tc_ct.h>
12 #include <net/flow_offload.h>
13 #include <net/netfilter/nf_flow_table.h>
14 #include <linux/workqueue.h>
15 #include <linux/refcount.h>
16 #include <linux/xarray.h>
17 #include <linux/if_macvlan.h>
18 #include <linux/debugfs.h>
20 #include "lib/fs_chains.h"
22 #include "en/tc/ct_fs.h"
23 #include "en/tc_priv.h"
24 #include "en/mod_hdr.h"
25 #include "en/mapping.h"
26 #include "en/tc/post_act.h"
32 #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
33 #define MLX5_CT_STATE_TRK_BIT BIT(2)
34 #define MLX5_CT_STATE_NAT_BIT BIT(3)
35 #define MLX5_CT_STATE_REPLY_BIT BIT(4)
36 #define MLX5_CT_STATE_RELATED_BIT BIT(5)
37 #define MLX5_CT_STATE_INVALID_BIT BIT(6)
39 #define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen)
40 #define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0)
42 /* Statically allocate modify actions for
43 * ipv6 and port nat (5) + tuple fields (4) + nic mode zone restore (1) = 10.
44 * This will be increased dynamically if needed (for the ipv6 snat + dnat).
46 #define MLX5_CT_MIN_MOD_ACTS 10
48 #define ct_dbg(fmt, args...)\
49 netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
51 struct mlx5_tc_ct_debugfs {
60 struct mlx5_tc_ct_priv {
61 struct mlx5_core_dev *dev;
62 const struct net_device *netdev;
63 struct mod_hdr_tbl *mod_hdr_tbl;
64 struct xarray tuple_ids;
65 struct rhashtable zone_ht;
66 struct rhashtable ct_tuples_ht;
67 struct rhashtable ct_tuples_nat_ht;
68 struct mlx5_flow_table *ct;
69 struct mlx5_flow_table *ct_nat;
70 struct mlx5e_post_act *post_act;
71 struct mutex control_lock; /* guards parallel adds/dels */
72 struct mapping_ctx *zone_mapping;
73 struct mapping_ctx *labels_mapping;
74 enum mlx5_flow_namespace_type ns_type;
75 struct mlx5_fs_chains *chains;
76 struct mlx5_ct_fs *fs;
77 struct mlx5_ct_fs_ops *fs_ops;
78 spinlock_t ht_lock; /* protects ft entries */
79 struct workqueue_struct *wq;
81 struct mlx5_tc_ct_debugfs debugfs;
85 struct mlx5_flow_attr *pre_ct_attr;
86 struct mlx5_flow_handle *pre_ct_rule;
87 struct mlx5_ct_ft *ft;
91 struct mlx5_ct_zone_rule {
92 struct mlx5_ct_fs_rule *rule;
93 struct mlx5e_mod_hdr_handle *mh;
94 struct mlx5_flow_attr *attr;
98 struct mlx5_tc_ct_pre {
99 struct mlx5_flow_table *ft;
100 struct mlx5_flow_group *flow_grp;
101 struct mlx5_flow_group *miss_grp;
102 struct mlx5_flow_handle *flow_rule;
103 struct mlx5_flow_handle *miss_rule;
104 struct mlx5_modify_hdr *modify_hdr;
108 struct rhash_head node;
112 struct nf_flowtable *nf_ft;
113 struct mlx5_tc_ct_priv *ct_priv;
114 struct rhashtable ct_entries_ht;
115 struct mlx5_tc_ct_pre pre_ct;
116 struct mlx5_tc_ct_pre pre_ct_nat;
119 struct mlx5_ct_tuple {
126 struct in6_addr src_v6;
130 struct in6_addr dst_v6;
141 struct mlx5_ct_counter {
142 struct mlx5_fc *counter;
148 MLX5_CT_ENTRY_FLAG_VALID,
151 struct mlx5_ct_entry {
152 struct rhash_head node;
153 struct rhash_head tuple_node;
154 struct rhash_head tuple_nat_node;
155 struct mlx5_ct_counter *counter;
156 unsigned long cookie;
157 unsigned long restore_cookie;
158 struct mlx5_ct_tuple tuple;
159 struct mlx5_ct_tuple tuple_nat;
160 struct mlx5_ct_zone_rule zone_rules[2];
162 struct mlx5_tc_ct_priv *ct_priv;
163 struct work_struct work;
170 mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
171 struct mlx5_flow_attr *attr,
172 struct mlx5e_mod_hdr_handle *mh);
174 static const struct rhashtable_params cts_ht_params = {
175 .head_offset = offsetof(struct mlx5_ct_entry, node),
176 .key_offset = offsetof(struct mlx5_ct_entry, cookie),
177 .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
178 .automatic_shrinking = true,
179 .min_size = 16 * 1024,
182 static const struct rhashtable_params zone_params = {
183 .head_offset = offsetof(struct mlx5_ct_ft, node),
184 .key_offset = offsetof(struct mlx5_ct_ft, zone),
185 .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
186 .automatic_shrinking = true,
189 static const struct rhashtable_params tuples_ht_params = {
190 .head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
191 .key_offset = offsetof(struct mlx5_ct_entry, tuple),
192 .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
193 .automatic_shrinking = true,
194 .min_size = 16 * 1024,
197 static const struct rhashtable_params tuples_nat_ht_params = {
198 .head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
199 .key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
200 .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
201 .automatic_shrinking = true,
202 .min_size = 16 * 1024,
206 mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry)
208 return !!(entry->tuple_nat_node.next);
212 mlx5_get_label_mapping(struct mlx5_tc_ct_priv *ct_priv,
213 u32 *labels, u32 *id)
215 if (!memchr_inv(labels, 0, sizeof(u32) * 4)) {
220 if (mapping_add(ct_priv->labels_mapping, labels, id))
227 mlx5_put_label_mapping(struct mlx5_tc_ct_priv *ct_priv, u32 id)
230 mapping_remove(ct_priv->labels_mapping, id);
234 mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
236 struct flow_match_control control;
237 struct flow_match_basic basic;
239 flow_rule_match_basic(rule, &basic);
240 flow_rule_match_control(rule, &control);
242 tuple->n_proto = basic.key->n_proto;
243 tuple->ip_proto = basic.key->ip_proto;
244 tuple->addr_type = control.key->addr_type;
246 if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
247 struct flow_match_ipv4_addrs match;
249 flow_rule_match_ipv4_addrs(rule, &match);
250 tuple->ip.src_v4 = match.key->src;
251 tuple->ip.dst_v4 = match.key->dst;
252 } else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
253 struct flow_match_ipv6_addrs match;
255 flow_rule_match_ipv6_addrs(rule, &match);
256 tuple->ip.src_v6 = match.key->src;
257 tuple->ip.dst_v6 = match.key->dst;
262 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
263 struct flow_match_ports match;
265 flow_rule_match_ports(rule, &match);
266 switch (tuple->ip_proto) {
269 tuple->port.src = match.key->src;
270 tuple->port.dst = match.key->dst;
276 if (tuple->ip_proto != IPPROTO_GRE)
284 mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
285 struct flow_rule *rule)
287 struct flow_action *flow_action = &rule->action;
288 struct flow_action_entry *act;
289 u32 offset, val, ip6_offset;
292 flow_action_for_each(i, act, flow_action) {
293 if (act->id != FLOW_ACTION_MANGLE)
296 offset = act->mangle.offset;
297 val = act->mangle.val;
298 switch (act->mangle.htype) {
299 case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
300 if (offset == offsetof(struct iphdr, saddr))
301 tuple->ip.src_v4 = cpu_to_be32(val);
302 else if (offset == offsetof(struct iphdr, daddr))
303 tuple->ip.dst_v4 = cpu_to_be32(val);
308 case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
309 ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
312 tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
313 else if (ip6_offset < 8)
314 tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
319 case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
320 if (offset == offsetof(struct tcphdr, source))
321 tuple->port.src = cpu_to_be16(val);
322 else if (offset == offsetof(struct tcphdr, dest))
323 tuple->port.dst = cpu_to_be16(val);
328 case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
329 if (offset == offsetof(struct udphdr, source))
330 tuple->port.src = cpu_to_be16(val);
331 else if (offset == offsetof(struct udphdr, dest))
332 tuple->port.dst = cpu_to_be16(val);
346 mlx5_tc_ct_get_flow_source_match(struct mlx5_tc_ct_priv *ct_priv,
347 struct net_device *ndev)
349 struct mlx5e_priv *other_priv = netdev_priv(ndev);
350 struct mlx5_core_dev *mdev = ct_priv->dev;
351 bool vf_rep, uplink_rep;
353 vf_rep = mlx5e_eswitch_vf_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev);
354 uplink_rep = mlx5e_eswitch_uplink_rep(ndev) && mlx5_same_hw_devs(mdev, other_priv->mdev);
357 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_LOCAL_VPORT;
359 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
360 if (is_vlan_dev(ndev))
361 return mlx5_tc_ct_get_flow_source_match(ct_priv, vlan_dev_real_dev(ndev));
362 if (netif_is_macvlan(ndev))
363 return mlx5_tc_ct_get_flow_source_match(ct_priv, macvlan_dev_real_dev(ndev));
364 if (mlx5e_get_tc_tun(ndev) || netif_is_lag_master(ndev))
365 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_UPLINK;
367 return MLX5_FLOW_CONTEXT_FLOW_SOURCE_ANY_VPORT;
371 mlx5_tc_ct_set_tuple_match(struct mlx5_tc_ct_priv *ct_priv,
372 struct mlx5_flow_spec *spec,
373 struct flow_rule *rule)
375 void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
377 void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
382 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
383 struct flow_match_basic match;
385 flow_rule_match_basic(rule, &match);
387 mlx5e_tc_set_ethertype(ct_priv->dev, &match, true, headers_c, headers_v);
388 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
389 match.mask->ip_proto);
390 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
391 match.key->ip_proto);
393 ip_proto = match.key->ip_proto;
396 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
397 struct flow_match_control match;
399 flow_rule_match_control(rule, &match);
400 addr_type = match.key->addr_type;
403 if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
404 struct flow_match_ipv4_addrs match;
406 flow_rule_match_ipv4_addrs(rule, &match);
407 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
408 src_ipv4_src_ipv6.ipv4_layout.ipv4),
409 &match.mask->src, sizeof(match.mask->src));
410 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
411 src_ipv4_src_ipv6.ipv4_layout.ipv4),
412 &match.key->src, sizeof(match.key->src));
413 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
414 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
415 &match.mask->dst, sizeof(match.mask->dst));
416 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
417 dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
418 &match.key->dst, sizeof(match.key->dst));
421 if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
422 struct flow_match_ipv6_addrs match;
424 flow_rule_match_ipv6_addrs(rule, &match);
425 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
426 src_ipv4_src_ipv6.ipv6_layout.ipv6),
427 &match.mask->src, sizeof(match.mask->src));
428 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
429 src_ipv4_src_ipv6.ipv6_layout.ipv6),
430 &match.key->src, sizeof(match.key->src));
432 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
433 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
434 &match.mask->dst, sizeof(match.mask->dst));
435 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
436 dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
437 &match.key->dst, sizeof(match.key->dst));
440 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
441 struct flow_match_ports match;
443 flow_rule_match_ports(rule, &match);
446 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
447 tcp_sport, ntohs(match.mask->src));
448 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
449 tcp_sport, ntohs(match.key->src));
451 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
452 tcp_dport, ntohs(match.mask->dst));
453 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
454 tcp_dport, ntohs(match.key->dst));
458 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
459 udp_sport, ntohs(match.mask->src));
460 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
461 udp_sport, ntohs(match.key->src));
463 MLX5_SET(fte_match_set_lyr_2_4, headers_c,
464 udp_dport, ntohs(match.mask->dst));
465 MLX5_SET(fte_match_set_lyr_2_4, headers_v,
466 udp_dport, ntohs(match.key->dst));
473 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
474 struct flow_match_tcp match;
476 flow_rule_match_tcp(rule, &match);
477 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
478 ntohs(match.mask->flags));
479 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
480 ntohs(match.key->flags));
483 if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_META)) {
484 struct flow_match_meta match;
486 flow_rule_match_meta(rule, &match);
488 if (match.key->ingress_ifindex & match.mask->ingress_ifindex) {
489 struct net_device *dev;
491 dev = dev_get_by_index(&init_net, match.key->ingress_ifindex);
492 if (dev && MLX5_CAP_ESW_FLOWTABLE(ct_priv->dev, flow_source))
493 spec->flow_context.flow_source =
494 mlx5_tc_ct_get_flow_source_match(ct_priv, dev);
504 mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
506 if (entry->counter->is_shared &&
507 !refcount_dec_and_test(&entry->counter->refcount))
510 mlx5_fc_destroy(ct_priv->dev, entry->counter->counter);
511 kfree(entry->counter);
515 mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
516 struct mlx5_ct_entry *entry,
519 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
520 struct mlx5_flow_attr *attr = zone_rule->attr;
522 ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
524 ct_priv->fs_ops->ct_rule_del(ct_priv->fs, zone_rule->rule);
525 mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
526 mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
531 mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
532 struct mlx5_ct_entry *entry)
534 mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
535 mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
537 atomic_dec(&ct_priv->debugfs.stats.offloaded);
540 static struct flow_action_entry *
541 mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
543 struct flow_action *flow_action = &flow_rule->action;
544 struct flow_action_entry *act;
547 flow_action_for_each(i, act, flow_action) {
548 if (act->id == FLOW_ACTION_CT_METADATA)
556 mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
557 struct mlx5e_tc_mod_hdr_acts *mod_acts,
563 enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
564 struct mlx5_core_dev *dev = ct_priv->dev;
567 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
568 CTSTATE_TO_REG, ct_state);
572 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
577 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
578 LABELS_TO_REG, labels_id);
582 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
583 ZONE_RESTORE_TO_REG, zone_restore_id);
587 /* Make another copy of zone id in reg_b for
588 * NIC rx flows since we don't copy reg_c1 to
591 if (ns != MLX5_FLOW_NAMESPACE_FDB) {
592 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
593 NIC_ZONE_RESTORE_TO_REG, zone_restore_id);
600 int mlx5_tc_ct_set_ct_clear_regs(struct mlx5_tc_ct_priv *priv,
601 struct mlx5e_tc_mod_hdr_acts *mod_acts)
603 return mlx5_tc_ct_entry_set_registers(priv, mod_acts, 0, 0, 0, 0);
607 mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
610 u32 offset = act->mangle.offset, field;
612 switch (act->mangle.htype) {
613 case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
614 MLX5_SET(set_action_in, modact, length, 0);
615 if (offset == offsetof(struct iphdr, saddr))
616 field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
617 else if (offset == offsetof(struct iphdr, daddr))
618 field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
623 case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
624 MLX5_SET(set_action_in, modact, length, 0);
625 if (offset == offsetof(struct ipv6hdr, saddr) + 12)
626 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
627 else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
628 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
629 else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
630 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
631 else if (offset == offsetof(struct ipv6hdr, saddr))
632 field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
633 else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
634 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
635 else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
636 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
637 else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
638 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
639 else if (offset == offsetof(struct ipv6hdr, daddr))
640 field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
645 case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
646 MLX5_SET(set_action_in, modact, length, 16);
647 if (offset == offsetof(struct tcphdr, source))
648 field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
649 else if (offset == offsetof(struct tcphdr, dest))
650 field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
655 case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
656 MLX5_SET(set_action_in, modact, length, 16);
657 if (offset == offsetof(struct udphdr, source))
658 field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
659 else if (offset == offsetof(struct udphdr, dest))
660 field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
669 MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
670 MLX5_SET(set_action_in, modact, offset, 0);
671 MLX5_SET(set_action_in, modact, field, field);
672 MLX5_SET(set_action_in, modact, data, act->mangle.val);
678 mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
679 struct flow_rule *flow_rule,
680 struct mlx5e_tc_mod_hdr_acts *mod_acts)
682 struct flow_action *flow_action = &flow_rule->action;
683 struct mlx5_core_dev *mdev = ct_priv->dev;
684 struct flow_action_entry *act;
688 flow_action_for_each(i, act, flow_action) {
690 case FLOW_ACTION_MANGLE: {
691 modact = mlx5e_mod_hdr_alloc(mdev, ct_priv->ns_type, mod_acts);
693 return PTR_ERR(modact);
695 err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
699 mod_acts->num_actions++;
703 case FLOW_ACTION_CT_METADATA:
704 /* Handled earlier */
715 mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
716 struct mlx5_flow_attr *attr,
717 struct flow_rule *flow_rule,
718 struct mlx5e_mod_hdr_handle **mh,
719 u8 zone_restore_id, bool nat_table, bool has_nat)
721 DECLARE_MOD_HDR_ACTS_ACTIONS(actions_arr, MLX5_CT_MIN_MOD_ACTS);
722 DECLARE_MOD_HDR_ACTS(mod_acts, actions_arr);
723 struct flow_action_entry *meta;
727 meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
731 err = mlx5_get_label_mapping(ct_priv, meta->ct_metadata.labels,
732 &attr->ct_attr.ct_labels_id);
737 err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule, &mod_acts);
742 ct_state |= MLX5_CT_STATE_NAT_BIT;
745 ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
746 ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT;
747 err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
749 meta->ct_metadata.mark,
750 attr->ct_attr.ct_labels_id,
755 if (nat_table && has_nat) {
756 attr->modify_hdr = mlx5_modify_header_alloc(ct_priv->dev, ct_priv->ns_type,
757 mod_acts.num_actions,
759 if (IS_ERR(attr->modify_hdr)) {
760 err = PTR_ERR(attr->modify_hdr);
766 *mh = mlx5e_mod_hdr_attach(ct_priv->dev,
767 ct_priv->mod_hdr_tbl,
774 attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
777 mlx5e_mod_hdr_dealloc(&mod_acts);
781 mlx5e_mod_hdr_dealloc(&mod_acts);
782 mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
787 mlx5_tc_ct_entry_destroy_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
788 struct mlx5_flow_attr *attr,
789 struct mlx5e_mod_hdr_handle *mh)
792 mlx5e_mod_hdr_detach(ct_priv->dev, ct_priv->mod_hdr_tbl, mh);
794 mlx5_modify_header_dealloc(ct_priv->dev, attr->modify_hdr);
798 mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
799 struct flow_rule *flow_rule,
800 struct mlx5_ct_entry *entry,
801 bool nat, u8 zone_restore_id)
803 struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
804 struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
805 struct mlx5_flow_spec *spec = NULL;
806 struct mlx5_flow_attr *attr;
809 zone_rule->nat = nat;
811 spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
815 attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
821 err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
825 mlx5_tc_ct_entry_has_nat(entry));
827 ct_dbg("Failed to create ct entry mod hdr");
831 attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
832 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
833 MLX5_FLOW_CONTEXT_ACTION_COUNT;
834 attr->dest_chain = 0;
835 attr->dest_ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
836 attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
837 if (entry->tuple.ip_proto == IPPROTO_TCP ||
838 entry->tuple.ip_proto == IPPROTO_UDP)
839 attr->outer_match_level = MLX5_MATCH_L4;
841 attr->outer_match_level = MLX5_MATCH_L3;
842 attr->counter = entry->counter->counter;
843 attr->flags |= MLX5_ATTR_FLAG_NO_IN_PORT;
844 if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
845 attr->esw_attr->in_mdev = priv->mdev;
847 mlx5_tc_ct_set_tuple_match(ct_priv, spec, flow_rule);
848 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK);
850 zone_rule->rule = ct_priv->fs_ops->ct_rule_add(ct_priv->fs, spec, attr, flow_rule);
851 if (IS_ERR(zone_rule->rule)) {
852 err = PTR_ERR(zone_rule->rule);
853 ct_dbg("Failed to add ct entry rule, nat: %d", nat);
857 zone_rule->attr = attr;
860 ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
865 mlx5_tc_ct_entry_destroy_mod_hdr(ct_priv, zone_rule->attr, zone_rule->mh);
866 mlx5_put_label_mapping(ct_priv, attr->ct_attr.ct_labels_id);
875 mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry)
877 return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
880 static struct mlx5_ct_entry *
881 mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple)
883 struct mlx5_ct_entry *entry;
885 entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple,
887 if (entry && mlx5_tc_ct_entry_valid(entry) &&
888 refcount_inc_not_zero(&entry->refcnt)) {
891 entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
892 tuple, tuples_nat_ht_params);
893 if (entry && mlx5_tc_ct_entry_valid(entry) &&
894 refcount_inc_not_zero(&entry->refcnt))
898 return entry ? ERR_PTR(-EINVAL) : NULL;
901 static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry)
903 struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
905 rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
906 &entry->tuple_nat_node,
907 tuples_nat_ht_params);
908 rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
912 static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry)
914 struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
916 mlx5_tc_ct_entry_del_rules(ct_priv, entry);
918 spin_lock_bh(&ct_priv->ht_lock);
919 mlx5_tc_ct_entry_remove_from_tuples(entry);
920 spin_unlock_bh(&ct_priv->ht_lock);
922 mlx5_tc_ct_counter_put(ct_priv, entry);
927 mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
929 if (!refcount_dec_and_test(&entry->refcnt))
932 mlx5_tc_ct_entry_del(entry);
935 static void mlx5_tc_ct_entry_del_work(struct work_struct *work)
937 struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work);
939 mlx5_tc_ct_entry_del(entry);
943 __mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
945 if (!refcount_dec_and_test(&entry->refcnt))
948 INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
949 queue_work(entry->ct_priv->wq, &entry->work);
952 static struct mlx5_ct_counter *
953 mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
955 struct mlx5_ct_counter *counter;
958 counter = kzalloc(sizeof(*counter), GFP_KERNEL);
960 return ERR_PTR(-ENOMEM);
962 counter->is_shared = false;
963 counter->counter = mlx5_fc_create_ex(ct_priv->dev, true);
964 if (IS_ERR(counter->counter)) {
965 ct_dbg("Failed to create counter for ct entry");
966 ret = PTR_ERR(counter->counter);
974 static struct mlx5_ct_counter *
975 mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
976 struct mlx5_ct_entry *entry)
978 struct mlx5_ct_tuple rev_tuple = entry->tuple;
979 struct mlx5_ct_counter *shared_counter;
980 struct mlx5_ct_entry *rev_entry;
982 /* get the reversed tuple */
983 swap(rev_tuple.port.src, rev_tuple.port.dst);
985 if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
986 __be32 tmp_addr = rev_tuple.ip.src_v4;
988 rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
989 rev_tuple.ip.dst_v4 = tmp_addr;
990 } else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
991 struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
993 rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
994 rev_tuple.ip.dst_v6 = tmp_addr;
996 return ERR_PTR(-EOPNOTSUPP);
999 /* Use the same counter as the reverse direction */
1000 spin_lock_bh(&ct_priv->ht_lock);
1001 rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple);
1003 if (IS_ERR(rev_entry)) {
1004 spin_unlock_bh(&ct_priv->ht_lock);
1005 goto create_counter;
1008 if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) {
1009 ct_dbg("Using shared counter entry=0x%p rev=0x%p", entry, rev_entry);
1010 shared_counter = rev_entry->counter;
1011 spin_unlock_bh(&ct_priv->ht_lock);
1013 mlx5_tc_ct_entry_put(rev_entry);
1014 return shared_counter;
1017 spin_unlock_bh(&ct_priv->ht_lock);
1021 shared_counter = mlx5_tc_ct_counter_create(ct_priv);
1022 if (IS_ERR(shared_counter))
1023 return shared_counter;
1025 shared_counter->is_shared = true;
1026 refcount_set(&shared_counter->refcount, 1);
1027 return shared_counter;
1031 mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
1032 struct flow_rule *flow_rule,
1033 struct mlx5_ct_entry *entry,
1038 if (nf_ct_acct_enabled(dev_net(ct_priv->netdev)))
1039 entry->counter = mlx5_tc_ct_counter_create(ct_priv);
1041 entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
1043 if (IS_ERR(entry->counter)) {
1044 err = PTR_ERR(entry->counter);
1048 err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
1053 err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
1058 atomic_inc(&ct_priv->debugfs.stats.offloaded);
1062 mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
1064 mlx5_tc_ct_counter_put(ct_priv, entry);
1069 mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
1070 struct flow_cls_offload *flow)
1072 struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
1073 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1074 struct flow_action_entry *meta_action;
1075 unsigned long cookie = flow->cookie;
1076 struct mlx5_ct_entry *entry;
1079 meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
1083 spin_lock_bh(&ct_priv->ht_lock);
1084 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1085 if (entry && refcount_inc_not_zero(&entry->refcnt)) {
1086 spin_unlock_bh(&ct_priv->ht_lock);
1087 mlx5_tc_ct_entry_put(entry);
1090 spin_unlock_bh(&ct_priv->ht_lock);
1092 entry = kzalloc(sizeof(*entry), GFP_KERNEL);
1096 entry->tuple.zone = ft->zone;
1097 entry->cookie = flow->cookie;
1098 entry->restore_cookie = meta_action->ct_metadata.cookie;
1099 refcount_set(&entry->refcnt, 2);
1100 entry->ct_priv = ct_priv;
1102 err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
1106 memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
1107 err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule);
1111 spin_lock_bh(&ct_priv->ht_lock);
1113 err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node,
1118 err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht,
1124 if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
1125 err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht,
1126 &entry->tuple_nat_node,
1127 tuples_nat_ht_params);
1131 spin_unlock_bh(&ct_priv->ht_lock);
1133 err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
1134 ft->zone_restore_id);
1138 set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
1139 mlx5_tc_ct_entry_put(entry); /* this function reference */
1144 spin_lock_bh(&ct_priv->ht_lock);
1145 if (mlx5_tc_ct_entry_has_nat(entry))
1146 rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
1147 &entry->tuple_nat_node, tuples_nat_ht_params);
1149 rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
1153 rhashtable_remove_fast(&ft->ct_entries_ht,
1157 spin_unlock_bh(&ct_priv->ht_lock);
1161 netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err);
1166 mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
1167 struct flow_cls_offload *flow)
1169 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1170 unsigned long cookie = flow->cookie;
1171 struct mlx5_ct_entry *entry;
1173 spin_lock_bh(&ct_priv->ht_lock);
1174 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1176 spin_unlock_bh(&ct_priv->ht_lock);
1180 if (!mlx5_tc_ct_entry_valid(entry)) {
1181 spin_unlock_bh(&ct_priv->ht_lock);
1185 rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
1186 spin_unlock_bh(&ct_priv->ht_lock);
1188 mlx5_tc_ct_entry_put(entry);
1194 mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
1195 struct flow_cls_offload *f)
1197 struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1198 unsigned long cookie = f->cookie;
1199 struct mlx5_ct_entry *entry;
1200 u64 lastuse, packets, bytes;
1202 spin_lock_bh(&ct_priv->ht_lock);
1203 entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1205 spin_unlock_bh(&ct_priv->ht_lock);
1209 if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) {
1210 spin_unlock_bh(&ct_priv->ht_lock);
1214 spin_unlock_bh(&ct_priv->ht_lock);
1216 mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
1217 flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
1218 FLOW_ACTION_HW_STATS_DELAYED);
1220 mlx5_tc_ct_entry_put(entry);
1225 mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
1228 struct flow_cls_offload *f = type_data;
1229 struct mlx5_ct_ft *ft = cb_priv;
1231 if (type != TC_SETUP_CLSFLOWER)
1234 switch (f->command) {
1235 case FLOW_CLS_REPLACE:
1236 return mlx5_tc_ct_block_flow_offload_add(ft, f);
1237 case FLOW_CLS_DESTROY:
1238 return mlx5_tc_ct_block_flow_offload_del(ft, f);
1239 case FLOW_CLS_STATS:
1240 return mlx5_tc_ct_block_flow_offload_stats(ft, f);
1249 mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
1252 struct flow_keys flow_keys;
1254 skb_reset_network_header(skb);
1255 skb_flow_dissect_flow_keys(skb, &flow_keys, FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP);
1259 if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
1260 flow_keys.basic.ip_proto != IPPROTO_UDP &&
1261 flow_keys.basic.ip_proto != IPPROTO_GRE)
1264 if (flow_keys.basic.ip_proto == IPPROTO_TCP ||
1265 flow_keys.basic.ip_proto == IPPROTO_UDP) {
1266 tuple->port.src = flow_keys.ports.src;
1267 tuple->port.dst = flow_keys.ports.dst;
1269 tuple->n_proto = flow_keys.basic.n_proto;
1270 tuple->ip_proto = flow_keys.basic.ip_proto;
1272 switch (flow_keys.basic.n_proto) {
1273 case htons(ETH_P_IP):
1274 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1275 tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
1276 tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
1279 case htons(ETH_P_IPV6):
1280 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1281 tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
1282 tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
1294 int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
1296 u32 ctstate = 0, ctstate_mask = 0;
1298 mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
1299 &ctstate, &ctstate_mask);
1301 if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT)
1304 ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
1305 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1306 ctstate, ctstate_mask);
1311 void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
1313 if (!priv || !ct_attr->ct_labels_id)
1316 mlx5_put_label_mapping(priv, ct_attr->ct_labels_id);
1320 mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
1321 struct mlx5_flow_spec *spec,
1322 struct flow_cls_offload *f,
1323 struct mlx5_ct_attr *ct_attr,
1324 struct netlink_ext_ack *extack)
1326 bool trk, est, untrk, unest, new, rpl, unrpl, rel, unrel, inv, uninv;
1327 struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1328 struct flow_dissector_key_ct *mask, *key;
1329 u32 ctstate = 0, ctstate_mask = 0;
1330 u16 ct_state_on, ct_state_off;
1331 u16 ct_state, ct_state_mask;
1332 struct flow_match_ct match;
1335 if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
1339 NL_SET_ERR_MSG_MOD(extack,
1340 "offload of ct matching isn't available");
1344 flow_rule_match_ct(rule, &match);
1349 ct_state = key->ct_state;
1350 ct_state_mask = mask->ct_state;
1352 if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
1353 TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
1354 TCA_FLOWER_KEY_CT_FLAGS_NEW |
1355 TCA_FLOWER_KEY_CT_FLAGS_REPLY |
1356 TCA_FLOWER_KEY_CT_FLAGS_RELATED |
1357 TCA_FLOWER_KEY_CT_FLAGS_INVALID)) {
1358 NL_SET_ERR_MSG_MOD(extack,
1359 "only ct_state trk, est, new and rpl are supported for offload");
1363 ct_state_on = ct_state & ct_state_mask;
1364 ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
1365 trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1366 new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
1367 est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1368 rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
1369 rel = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
1370 inv = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
1371 untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1372 unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1373 unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
1374 unrel = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_RELATED;
1375 uninv = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_INVALID;
1377 ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
1378 ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1379 ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0;
1380 ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
1381 ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1382 ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0;
1383 ctstate_mask |= unrel ? MLX5_CT_STATE_RELATED_BIT : 0;
1384 ctstate_mask |= uninv ? MLX5_CT_STATE_INVALID_BIT : 0;
1387 NL_SET_ERR_MSG_MOD(extack,
1388 "matching on ct_state +rel isn't supported");
1393 NL_SET_ERR_MSG_MOD(extack,
1394 "matching on ct_state +inv isn't supported");
1399 NL_SET_ERR_MSG_MOD(extack,
1400 "matching on ct_state +new isn't supported");
1405 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1406 key->ct_zone, MLX5_CT_ZONE_MASK);
1408 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1409 ctstate, ctstate_mask);
1411 mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
1412 key->ct_mark, mask->ct_mark);
1413 if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
1414 mask->ct_labels[3]) {
1415 ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
1416 ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
1417 ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
1418 ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
1419 if (mlx5_get_label_mapping(priv, ct_labels, &ct_attr->ct_labels_id))
1421 mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
1422 MLX5_CT_LABELS_MASK);
1429 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
1430 struct mlx5_flow_attr *attr,
1431 struct mlx5e_tc_mod_hdr_acts *mod_acts,
1432 const struct flow_action_entry *act,
1433 struct netlink_ext_ack *extack)
1436 NL_SET_ERR_MSG_MOD(extack,
1437 "offload of ct action isn't available");
1441 attr->ct_attr.zone = act->ct.zone;
1442 attr->ct_attr.ct_action = act->ct.action;
1443 attr->ct_attr.nf_ft = act->ct.flow_table;
1448 static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
1449 struct mlx5_tc_ct_pre *pre_ct,
1452 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1453 struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1454 struct mlx5_core_dev *dev = ct_priv->dev;
1455 struct mlx5_flow_table *ft = pre_ct->ft;
1456 struct mlx5_flow_destination dest = {};
1457 struct mlx5_flow_act flow_act = {};
1458 struct mlx5_modify_hdr *mod_hdr;
1459 struct mlx5_flow_handle *rule;
1460 struct mlx5_flow_spec *spec;
1465 spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1469 zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
1470 err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type,
1473 ct_dbg("Failed to set zone register mapping");
1477 mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type,
1478 pre_mod_acts.num_actions,
1479 pre_mod_acts.actions);
1481 if (IS_ERR(mod_hdr)) {
1482 err = PTR_ERR(mod_hdr);
1483 ct_dbg("Failed to create pre ct mod hdr");
1486 pre_ct->modify_hdr = mod_hdr;
1488 flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1489 MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1490 flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1491 flow_act.modify_hdr = mod_hdr;
1492 dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1495 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1496 zone, MLX5_CT_ZONE_MASK);
1497 ctstate = MLX5_CT_STATE_TRK_BIT;
1499 ctstate |= MLX5_CT_STATE_NAT_BIT;
1500 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
1502 dest.ft = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
1503 rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
1505 err = PTR_ERR(rule);
1506 ct_dbg("Failed to add pre ct flow rule zone %d", zone);
1509 pre_ct->flow_rule = rule;
1512 dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
1513 rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1);
1515 err = PTR_ERR(rule);
1516 ct_dbg("Failed to add pre ct miss rule zone %d", zone);
1519 pre_ct->miss_rule = rule;
1521 mlx5e_mod_hdr_dealloc(&pre_mod_acts);
1526 mlx5_del_flow_rules(pre_ct->flow_rule);
1528 mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1530 mlx5e_mod_hdr_dealloc(&pre_mod_acts);
1536 tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
1537 struct mlx5_tc_ct_pre *pre_ct)
1539 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1540 struct mlx5_core_dev *dev = ct_priv->dev;
1542 mlx5_del_flow_rules(pre_ct->flow_rule);
1543 mlx5_del_flow_rules(pre_ct->miss_rule);
1544 mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1548 mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
1549 struct mlx5_tc_ct_pre *pre_ct,
1552 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1553 struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1554 struct mlx5_core_dev *dev = ct_priv->dev;
1555 struct mlx5_flow_table_attr ft_attr = {};
1556 struct mlx5_flow_namespace *ns;
1557 struct mlx5_flow_table *ft;
1558 struct mlx5_flow_group *g;
1559 u32 metadata_reg_c_2_mask;
1564 ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type);
1567 ct_dbg("Failed to get flow namespace");
1571 flow_group_in = kvzalloc(inlen, GFP_KERNEL);
1575 ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
1576 ft_attr.prio = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB ?
1577 FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
1578 ft_attr.max_fte = 2;
1580 ft = mlx5_create_flow_table(ns, &ft_attr);
1583 ct_dbg("Failed to create pre ct table");
1588 /* create flow group */
1589 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1590 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
1591 MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1592 MLX5_MATCH_MISC_PARAMETERS_2);
1594 misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1595 match_criteria.misc_parameters_2);
1597 metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
1598 metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
1600 metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
1602 MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
1603 metadata_reg_c_2_mask);
1605 g = mlx5_create_flow_group(ft, flow_group_in);
1608 ct_dbg("Failed to create pre ct group");
1611 pre_ct->flow_grp = g;
1613 /* create miss group */
1614 memset(flow_group_in, 0, inlen);
1615 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
1616 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
1617 g = mlx5_create_flow_group(ft, flow_group_in);
1620 ct_dbg("Failed to create pre ct miss group");
1623 pre_ct->miss_grp = g;
1625 err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
1629 kvfree(flow_group_in);
1633 mlx5_destroy_flow_group(pre_ct->miss_grp);
1635 mlx5_destroy_flow_group(pre_ct->flow_grp);
1637 mlx5_destroy_flow_table(ft);
1639 kvfree(flow_group_in);
1644 mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
1645 struct mlx5_tc_ct_pre *pre_ct)
1647 tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
1648 mlx5_destroy_flow_group(pre_ct->miss_grp);
1649 mlx5_destroy_flow_group(pre_ct->flow_grp);
1650 mlx5_destroy_flow_table(pre_ct->ft);
1654 mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
1658 err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
1662 err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
1664 goto err_pre_ct_nat;
1669 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1674 mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
1676 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
1677 mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1680 /* To avoid false lock dependency warning set the ct_entries_ht lock
1681 * class different than the lock class of the ht being used when deleting
1682 * last flow from a group and then deleting a group, we get into del_sw_flow_group()
1683 * which call rhashtable_destroy on fg->ftes_hash which will take ht->mutex but
1684 * it's different than the ht->mutex here.
1686 static struct lock_class_key ct_entries_ht_lock_key;
1688 static struct mlx5_ct_ft *
1689 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
1690 struct nf_flowtable *nf_ft)
1692 struct mlx5_ct_ft *ft;
1695 ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
1697 refcount_inc(&ft->refcount);
1701 ft = kzalloc(sizeof(*ft), GFP_KERNEL);
1703 return ERR_PTR(-ENOMEM);
1705 err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id);
1711 ft->ct_priv = ct_priv;
1712 refcount_set(&ft->refcount, 1);
1714 err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
1716 goto err_alloc_pre_ct;
1718 err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
1722 lockdep_set_class(&ft->ct_entries_ht.mutex, &ct_entries_ht_lock_key);
1724 err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
1729 err = nf_flow_table_offload_add_cb(ft->nf_ft,
1730 mlx5_tc_ct_block_flow_offload, ft);
1737 rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1739 rhashtable_destroy(&ft->ct_entries_ht);
1741 mlx5_tc_ct_free_pre_ct_tables(ft);
1743 mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1746 return ERR_PTR(err);
1750 mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
1752 struct mlx5_ct_entry *entry = ptr;
1754 mlx5_tc_ct_entry_put(entry);
1758 mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
1760 if (!refcount_dec_and_test(&ft->refcount))
1763 flush_workqueue(ct_priv->wq);
1764 nf_flow_table_offload_del_cb(ft->nf_ft,
1765 mlx5_tc_ct_block_flow_offload, ft);
1766 rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1767 rhashtable_free_and_destroy(&ft->ct_entries_ht,
1768 mlx5_tc_ct_flush_ft_entry,
1770 mlx5_tc_ct_free_pre_ct_tables(ft);
1771 mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1775 /* We translate the tc filter with CT action to the following HW model:
1777 * +---------------------+
1778 * + ft prio (tc chain) +
1779 * + original match +
1780 * +---------------------+
1781 * | set chain miss mapping
1786 * +---------------------+
1787 * + pre_ct/pre_ct_nat + if matches +-------------------------+
1788 * + zone+nat match +---------------->+ post_act (see below) +
1789 * +---------------------+ set zone +-------------------------+
1792 * +--------------------+
1793 * + CT (nat or no nat) +
1794 * + tuple + zone match +
1795 * +--------------------+
1799 * | set zone_restore
1800 * | do nat (if needed)
1803 * + post_act + original filter actions
1804 * + fte_id match +------------------------>
1807 static struct mlx5_flow_handle *
1808 __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
1809 struct mlx5_flow_spec *orig_spec,
1810 struct mlx5_flow_attr *attr)
1812 bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
1813 struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1814 struct mlx5e_tc_mod_hdr_acts *pre_mod_acts;
1815 u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1816 struct mlx5_flow_attr *pre_ct_attr;
1817 struct mlx5_modify_hdr *mod_hdr;
1818 struct mlx5_ct_flow *ct_flow;
1819 int chain_mapping = 0, err;
1820 struct mlx5_ct_ft *ft;
1822 ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1824 return ERR_PTR(-ENOMEM);
1827 /* Register for CT established events */
1828 ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
1829 attr->ct_attr.nf_ft);
1832 ct_dbg("Failed to register to ft callback");
1837 /* Base flow attributes of both rules on original rule attribute */
1838 ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1839 if (!ct_flow->pre_ct_attr) {
1844 pre_ct_attr = ct_flow->pre_ct_attr;
1845 memcpy(pre_ct_attr, attr, attr_sz);
1846 pre_mod_acts = &pre_ct_attr->parse_attr->mod_hdr_acts;
1848 /* Modify the original rule's action to fwd and modify, leave decap */
1849 pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
1850 pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1851 MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1853 /* Write chain miss tag for miss in ct table as we
1854 * don't go though all prios of this chain as normal tc rules
1857 err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain,
1860 ct_dbg("Failed to get chain register mapping for chain");
1863 ct_flow->chain_mapping = chain_mapping;
1865 err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts, ct_priv->ns_type,
1866 CHAIN_TO_REG, chain_mapping);
1868 ct_dbg("Failed to set chain register mapping");
1872 /* If original flow is decap, we do it before going into ct table
1873 * so add a rewrite for the tunnel match_id.
1875 if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
1877 err = mlx5e_tc_match_to_reg_set(priv->mdev, pre_mod_acts,
1882 ct_dbg("Failed to set tunnel register mapping");
1887 mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1888 pre_mod_acts->num_actions,
1889 pre_mod_acts->actions);
1890 if (IS_ERR(mod_hdr)) {
1891 err = PTR_ERR(mod_hdr);
1892 ct_dbg("Failed to create pre ct mod hdr");
1895 pre_ct_attr->modify_hdr = mod_hdr;
1897 /* Change original rule point to ct table */
1898 pre_ct_attr->dest_chain = 0;
1899 pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
1900 ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec,
1902 if (IS_ERR(ct_flow->pre_ct_rule)) {
1903 err = PTR_ERR(ct_flow->pre_ct_rule);
1904 ct_dbg("Failed to add pre ct rule");
1905 goto err_insert_orig;
1908 attr->ct_attr.ct_flow = ct_flow;
1909 mlx5e_mod_hdr_dealloc(pre_mod_acts);
1911 return ct_flow->pre_ct_rule;
1914 mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1916 mlx5e_mod_hdr_dealloc(pre_mod_acts);
1917 mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1919 kfree(ct_flow->pre_ct_attr);
1921 mlx5_tc_ct_del_ft_cb(ct_priv, ft);
1924 netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
1925 return ERR_PTR(err);
1928 struct mlx5_flow_handle *
1929 mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
1930 struct mlx5_flow_spec *spec,
1931 struct mlx5_flow_attr *attr,
1932 struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
1934 struct mlx5_flow_handle *rule;
1937 return ERR_PTR(-EOPNOTSUPP);
1939 mutex_lock(&priv->control_lock);
1940 rule = __mlx5_tc_ct_flow_offload(priv, spec, attr);
1941 mutex_unlock(&priv->control_lock);
1947 __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
1948 struct mlx5_ct_flow *ct_flow,
1949 struct mlx5_flow_attr *attr)
1951 struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
1952 struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1954 mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule, pre_ct_attr);
1955 mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1957 mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1958 mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
1960 kfree(ct_flow->pre_ct_attr);
1965 mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
1966 struct mlx5_flow_attr *attr)
1968 struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
1970 /* We are called on error to clean up stuff from parsing
1971 * but we don't have anything for now
1976 mutex_lock(&priv->control_lock);
1977 __mlx5_tc_ct_delete_flow(priv, ct_flow, attr);
1978 mutex_unlock(&priv->control_lock);
1982 mlx5_tc_ct_fs_init(struct mlx5_tc_ct_priv *ct_priv)
1984 struct mlx5_flow_table *post_ct = mlx5e_tc_post_act_get_ft(ct_priv->post_act);
1985 struct mlx5_ct_fs_ops *fs_ops = mlx5_ct_fs_dmfs_ops_get();
1988 if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB &&
1989 ct_priv->dev->priv.steering->mode == MLX5_FLOW_STEERING_MODE_SMFS) {
1990 ct_dbg("Using SMFS ct flow steering provider");
1991 fs_ops = mlx5_ct_fs_smfs_ops_get();
1994 ct_priv->fs = kzalloc(sizeof(*ct_priv->fs) + fs_ops->priv_size, GFP_KERNEL);
1998 ct_priv->fs->netdev = ct_priv->netdev;
1999 ct_priv->fs->dev = ct_priv->dev;
2000 ct_priv->fs_ops = fs_ops;
2002 err = ct_priv->fs_ops->init(ct_priv->fs, ct_priv->ct, ct_priv->ct_nat, post_ct);
2014 mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
2015 const char **err_msg)
2017 if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
2018 /* vlan workaround should be avoided for multi chain rules.
2019 * This is just a sanity check as pop vlan action should
2020 * be supported by any FW that supports ignore_flow_level
2023 *err_msg = "firmware vlan actions support is missing";
2027 if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
2028 fdb_modify_header_fwd_to_table)) {
2029 /* CT always writes to registers which are mod header actions.
2030 * Therefore, mod header and goto is required
2033 *err_msg = "firmware fwd and modify support is missing";
2037 if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2038 *err_msg = "register loopback isn't supported";
2046 mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
2047 enum mlx5_flow_namespace_type ns_type,
2048 struct mlx5e_post_act *post_act)
2050 struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2051 const char *err_msg = NULL;
2054 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
2055 /* cannot restore chain ID on HW miss */
2057 err_msg = "tc skb extension missing";
2061 if (IS_ERR_OR_NULL(post_act)) {
2062 /* Ignore_flow_level support isn't supported by default for VFs and so post_act
2063 * won't be supported. Skip showing error msg.
2065 if (priv->mdev->coredev_type != MLX5_COREDEV_VF)
2066 err_msg = "post action is missing";
2071 if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
2072 err = mlx5_tc_ct_init_check_esw_support(esw, &err_msg);
2076 netdev_dbg(priv->netdev, "tc ct offload not supported, %s\n", err_msg);
2081 mlx5_ct_tc_create_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
2083 bool is_fdb = ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB;
2084 struct mlx5_tc_ct_debugfs *ct_dbgfs = &ct_priv->debugfs;
2085 char dirname[16] = {};
2087 if (sscanf(dirname, "ct_%s", is_fdb ? "fdb" : "nic") < 0)
2090 ct_dbgfs->root = debugfs_create_dir(dirname, mlx5_debugfs_get_dev_root(ct_priv->dev));
2091 debugfs_create_atomic_t("offloaded", 0400, ct_dbgfs->root,
2092 &ct_dbgfs->stats.offloaded);
2093 debugfs_create_atomic_t("rx_dropped", 0400, ct_dbgfs->root,
2094 &ct_dbgfs->stats.rx_dropped);
2098 mlx5_ct_tc_remove_dbgfs(struct mlx5_tc_ct_priv *ct_priv)
2100 debugfs_remove_recursive(ct_priv->debugfs.root);
2103 #define INIT_ERR_PREFIX "tc ct offload init failed"
2105 struct mlx5_tc_ct_priv *
2106 mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
2107 struct mod_hdr_tbl *mod_hdr,
2108 enum mlx5_flow_namespace_type ns_type,
2109 struct mlx5e_post_act *post_act)
2111 struct mlx5_tc_ct_priv *ct_priv;
2112 struct mlx5_core_dev *dev;
2117 err = mlx5_tc_ct_init_check_support(priv, ns_type, post_act);
2121 ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
2125 mapping_id = mlx5_query_nic_system_image_guid(dev);
2127 ct_priv->zone_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_ZONE,
2128 sizeof(u16), 0, true);
2129 if (IS_ERR(ct_priv->zone_mapping)) {
2130 err = PTR_ERR(ct_priv->zone_mapping);
2131 goto err_mapping_zone;
2134 ct_priv->labels_mapping = mapping_create_for_id(mapping_id, MAPPING_TYPE_LABELS,
2135 sizeof(u32) * 4, 0, true);
2136 if (IS_ERR(ct_priv->labels_mapping)) {
2137 err = PTR_ERR(ct_priv->labels_mapping);
2138 goto err_mapping_labels;
2141 spin_lock_init(&ct_priv->ht_lock);
2142 ct_priv->ns_type = ns_type;
2143 ct_priv->chains = chains;
2144 ct_priv->netdev = priv->netdev;
2145 ct_priv->dev = priv->mdev;
2146 ct_priv->mod_hdr_tbl = mod_hdr;
2147 ct_priv->ct = mlx5_chains_create_global_table(chains);
2148 if (IS_ERR(ct_priv->ct)) {
2149 err = PTR_ERR(ct_priv->ct);
2151 "%s, failed to create ct table err: %d\n",
2152 INIT_ERR_PREFIX, err);
2156 ct_priv->ct_nat = mlx5_chains_create_global_table(chains);
2157 if (IS_ERR(ct_priv->ct_nat)) {
2158 err = PTR_ERR(ct_priv->ct_nat);
2160 "%s, failed to create ct nat table err: %d\n",
2161 INIT_ERR_PREFIX, err);
2162 goto err_ct_nat_tbl;
2165 ct_priv->post_act = post_act;
2166 mutex_init(&ct_priv->control_lock);
2167 if (rhashtable_init(&ct_priv->zone_ht, &zone_params))
2168 goto err_ct_zone_ht;
2169 if (rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params))
2170 goto err_ct_tuples_ht;
2171 if (rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params))
2172 goto err_ct_tuples_nat_ht;
2174 ct_priv->wq = alloc_ordered_workqueue("mlx5e_ct_priv_wq", 0);
2180 err = mlx5_tc_ct_fs_init(ct_priv);
2184 mlx5_ct_tc_create_dbgfs(ct_priv);
2188 destroy_workqueue(ct_priv->wq);
2190 rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
2191 err_ct_tuples_nat_ht:
2192 rhashtable_destroy(&ct_priv->ct_tuples_ht);
2194 rhashtable_destroy(&ct_priv->zone_ht);
2196 mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2198 mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2200 mapping_destroy(ct_priv->labels_mapping);
2202 mapping_destroy(ct_priv->zone_mapping);
2212 mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
2214 struct mlx5_fs_chains *chains;
2219 destroy_workqueue(ct_priv->wq);
2220 mlx5_ct_tc_remove_dbgfs(ct_priv);
2221 chains = ct_priv->chains;
2223 ct_priv->fs_ops->destroy(ct_priv->fs);
2226 mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2227 mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2228 mapping_destroy(ct_priv->zone_mapping);
2229 mapping_destroy(ct_priv->labels_mapping);
2231 rhashtable_destroy(&ct_priv->ct_tuples_ht);
2232 rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
2233 rhashtable_destroy(&ct_priv->zone_ht);
2234 mutex_destroy(&ct_priv->control_lock);
2239 mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
2240 struct sk_buff *skb, u8 zone_restore_id)
2242 struct mlx5_ct_tuple tuple = {};
2243 struct mlx5_ct_entry *entry;
2246 if (!ct_priv || !zone_restore_id)
2249 if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
2252 if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
2255 spin_lock(&ct_priv->ht_lock);
2257 entry = mlx5_tc_ct_entry_get(ct_priv, &tuple);
2259 spin_unlock(&ct_priv->ht_lock);
2263 if (IS_ERR(entry)) {
2264 spin_unlock(&ct_priv->ht_lock);
2267 spin_unlock(&ct_priv->ht_lock);
2269 tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
2270 __mlx5_tc_ct_entry_put(entry);
2275 atomic_inc(&ct_priv->debugfs.stats.rx_dropped);