Merge tag 'keys-misc-20210126' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowe...
[linux-2.6-microblaze.git] / drivers / net / ethernet / mellanox / mlx5 / core / en / tc_ct.c
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2019 Mellanox Technologies. */
3
4 #include <net/netfilter/nf_conntrack.h>
5 #include <net/netfilter/nf_conntrack_core.h>
6 #include <net/netfilter/nf_conntrack_zones.h>
7 #include <net/netfilter/nf_conntrack_labels.h>
8 #include <net/netfilter/nf_conntrack_helper.h>
9 #include <net/netfilter/nf_conntrack_acct.h>
10 #include <uapi/linux/tc_act/tc_pedit.h>
11 #include <net/tc_act/tc_ct.h>
12 #include <net/flow_offload.h>
13 #include <net/netfilter/nf_flow_table.h>
14 #include <linux/workqueue.h>
15 #include <linux/refcount.h>
16 #include <linux/xarray.h>
17
18 #include "lib/fs_chains.h"
19 #include "en/tc_ct.h"
20 #include "en/mod_hdr.h"
21 #include "en/mapping.h"
22 #include "en.h"
23 #include "en_tc.h"
24 #include "en_rep.h"
25
26 #define MLX5_CT_ZONE_BITS (mlx5e_tc_attr_to_reg_mappings[ZONE_TO_REG].mlen * 8)
27 #define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0)
28 #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
29 #define MLX5_CT_STATE_TRK_BIT BIT(2)
30 #define MLX5_CT_STATE_NAT_BIT BIT(3)
31 #define MLX5_CT_STATE_REPLY_BIT BIT(4)
32
33 #define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8)
34 #define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0)
35 #define MLX5_FTE_ID_MASK MLX5_FTE_ID_MAX
36
37 #define MLX5_CT_LABELS_BITS (mlx5e_tc_attr_to_reg_mappings[LABELS_TO_REG].mlen * 8)
38 #define MLX5_CT_LABELS_MASK GENMASK(MLX5_CT_LABELS_BITS - 1, 0)
39
40 #define ct_dbg(fmt, args...)\
41         netdev_dbg(ct_priv->netdev, "ct_debug: " fmt "\n", ##args)
42
43 struct mlx5_tc_ct_priv {
44         struct mlx5_core_dev *dev;
45         const struct net_device *netdev;
46         struct mod_hdr_tbl *mod_hdr_tbl;
47         struct idr fte_ids;
48         struct xarray tuple_ids;
49         struct rhashtable zone_ht;
50         struct rhashtable ct_tuples_ht;
51         struct rhashtable ct_tuples_nat_ht;
52         struct mlx5_flow_table *ct;
53         struct mlx5_flow_table *ct_nat;
54         struct mlx5_flow_table *post_ct;
55         struct mutex control_lock; /* guards parallel adds/dels */
56         struct mapping_ctx *zone_mapping;
57         struct mapping_ctx *labels_mapping;
58         enum mlx5_flow_namespace_type ns_type;
59         struct mlx5_fs_chains *chains;
60         spinlock_t ht_lock; /* protects ft entries */
61 };
62
63 struct mlx5_ct_flow {
64         struct mlx5_flow_attr *pre_ct_attr;
65         struct mlx5_flow_attr *post_ct_attr;
66         struct mlx5_flow_handle *pre_ct_rule;
67         struct mlx5_flow_handle *post_ct_rule;
68         struct mlx5_ct_ft *ft;
69         u32 fte_id;
70         u32 chain_mapping;
71 };
72
73 struct mlx5_ct_zone_rule {
74         struct mlx5_flow_handle *rule;
75         struct mlx5e_mod_hdr_handle *mh;
76         struct mlx5_flow_attr *attr;
77         bool nat;
78 };
79
80 struct mlx5_tc_ct_pre {
81         struct mlx5_flow_table *ft;
82         struct mlx5_flow_group *flow_grp;
83         struct mlx5_flow_group *miss_grp;
84         struct mlx5_flow_handle *flow_rule;
85         struct mlx5_flow_handle *miss_rule;
86         struct mlx5_modify_hdr *modify_hdr;
87 };
88
89 struct mlx5_ct_ft {
90         struct rhash_head node;
91         u16 zone;
92         u32 zone_restore_id;
93         refcount_t refcount;
94         struct nf_flowtable *nf_ft;
95         struct mlx5_tc_ct_priv *ct_priv;
96         struct rhashtable ct_entries_ht;
97         struct mlx5_tc_ct_pre pre_ct;
98         struct mlx5_tc_ct_pre pre_ct_nat;
99 };
100
101 struct mlx5_ct_tuple {
102         u16 addr_type;
103         __be16 n_proto;
104         u8 ip_proto;
105         struct {
106                 union {
107                         __be32 src_v4;
108                         struct in6_addr src_v6;
109                 };
110                 union {
111                         __be32 dst_v4;
112                         struct in6_addr dst_v6;
113                 };
114         } ip;
115         struct {
116                 __be16 src;
117                 __be16 dst;
118         } port;
119
120         u16 zone;
121 };
122
123 struct mlx5_ct_counter {
124         struct mlx5_fc *counter;
125         refcount_t refcount;
126         bool is_shared;
127 };
128
129 enum {
130         MLX5_CT_ENTRY_FLAG_VALID,
131 };
132
133 struct mlx5_ct_entry {
134         struct rhash_head node;
135         struct rhash_head tuple_node;
136         struct rhash_head tuple_nat_node;
137         struct mlx5_ct_counter *counter;
138         unsigned long cookie;
139         unsigned long restore_cookie;
140         struct mlx5_ct_tuple tuple;
141         struct mlx5_ct_tuple tuple_nat;
142         struct mlx5_ct_zone_rule zone_rules[2];
143
144         struct mlx5_tc_ct_priv *ct_priv;
145         struct work_struct work;
146
147         refcount_t refcnt;
148         unsigned long flags;
149 };
150
151 static const struct rhashtable_params cts_ht_params = {
152         .head_offset = offsetof(struct mlx5_ct_entry, node),
153         .key_offset = offsetof(struct mlx5_ct_entry, cookie),
154         .key_len = sizeof(((struct mlx5_ct_entry *)0)->cookie),
155         .automatic_shrinking = true,
156         .min_size = 16 * 1024,
157 };
158
159 static const struct rhashtable_params zone_params = {
160         .head_offset = offsetof(struct mlx5_ct_ft, node),
161         .key_offset = offsetof(struct mlx5_ct_ft, zone),
162         .key_len = sizeof(((struct mlx5_ct_ft *)0)->zone),
163         .automatic_shrinking = true,
164 };
165
166 static const struct rhashtable_params tuples_ht_params = {
167         .head_offset = offsetof(struct mlx5_ct_entry, tuple_node),
168         .key_offset = offsetof(struct mlx5_ct_entry, tuple),
169         .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple),
170         .automatic_shrinking = true,
171         .min_size = 16 * 1024,
172 };
173
174 static const struct rhashtable_params tuples_nat_ht_params = {
175         .head_offset = offsetof(struct mlx5_ct_entry, tuple_nat_node),
176         .key_offset = offsetof(struct mlx5_ct_entry, tuple_nat),
177         .key_len = sizeof(((struct mlx5_ct_entry *)0)->tuple_nat),
178         .automatic_shrinking = true,
179         .min_size = 16 * 1024,
180 };
181
182 static bool
183 mlx5_tc_ct_entry_has_nat(struct mlx5_ct_entry *entry)
184 {
185         return !!(entry->tuple_nat_node.next);
186 }
187
188 static int
189 mlx5_tc_ct_rule_to_tuple(struct mlx5_ct_tuple *tuple, struct flow_rule *rule)
190 {
191         struct flow_match_control control;
192         struct flow_match_basic basic;
193
194         flow_rule_match_basic(rule, &basic);
195         flow_rule_match_control(rule, &control);
196
197         tuple->n_proto = basic.key->n_proto;
198         tuple->ip_proto = basic.key->ip_proto;
199         tuple->addr_type = control.key->addr_type;
200
201         if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
202                 struct flow_match_ipv4_addrs match;
203
204                 flow_rule_match_ipv4_addrs(rule, &match);
205                 tuple->ip.src_v4 = match.key->src;
206                 tuple->ip.dst_v4 = match.key->dst;
207         } else if (tuple->addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
208                 struct flow_match_ipv6_addrs match;
209
210                 flow_rule_match_ipv6_addrs(rule, &match);
211                 tuple->ip.src_v6 = match.key->src;
212                 tuple->ip.dst_v6 = match.key->dst;
213         } else {
214                 return -EOPNOTSUPP;
215         }
216
217         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
218                 struct flow_match_ports match;
219
220                 flow_rule_match_ports(rule, &match);
221                 switch (tuple->ip_proto) {
222                 case IPPROTO_TCP:
223                 case IPPROTO_UDP:
224                         tuple->port.src = match.key->src;
225                         tuple->port.dst = match.key->dst;
226                         break;
227                 default:
228                         return -EOPNOTSUPP;
229                 }
230         } else {
231                 return -EOPNOTSUPP;
232         }
233
234         return 0;
235 }
236
237 static int
238 mlx5_tc_ct_rule_to_tuple_nat(struct mlx5_ct_tuple *tuple,
239                              struct flow_rule *rule)
240 {
241         struct flow_action *flow_action = &rule->action;
242         struct flow_action_entry *act;
243         u32 offset, val, ip6_offset;
244         int i;
245
246         flow_action_for_each(i, act, flow_action) {
247                 if (act->id != FLOW_ACTION_MANGLE)
248                         continue;
249
250                 offset = act->mangle.offset;
251                 val = act->mangle.val;
252                 switch (act->mangle.htype) {
253                 case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
254                         if (offset == offsetof(struct iphdr, saddr))
255                                 tuple->ip.src_v4 = cpu_to_be32(val);
256                         else if (offset == offsetof(struct iphdr, daddr))
257                                 tuple->ip.dst_v4 = cpu_to_be32(val);
258                         else
259                                 return -EOPNOTSUPP;
260                         break;
261
262                 case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
263                         ip6_offset = (offset - offsetof(struct ipv6hdr, saddr));
264                         ip6_offset /= 4;
265                         if (ip6_offset < 4)
266                                 tuple->ip.src_v6.s6_addr32[ip6_offset] = cpu_to_be32(val);
267                         else if (ip6_offset < 8)
268                                 tuple->ip.dst_v6.s6_addr32[ip6_offset - 4] = cpu_to_be32(val);
269                         else
270                                 return -EOPNOTSUPP;
271                         break;
272
273                 case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
274                         if (offset == offsetof(struct tcphdr, source))
275                                 tuple->port.src = cpu_to_be16(val);
276                         else if (offset == offsetof(struct tcphdr, dest))
277                                 tuple->port.dst = cpu_to_be16(val);
278                         else
279                                 return -EOPNOTSUPP;
280                         break;
281
282                 case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
283                         if (offset == offsetof(struct udphdr, source))
284                                 tuple->port.src = cpu_to_be16(val);
285                         else if (offset == offsetof(struct udphdr, dest))
286                                 tuple->port.dst = cpu_to_be16(val);
287                         else
288                                 return -EOPNOTSUPP;
289                         break;
290
291                 default:
292                         return -EOPNOTSUPP;
293                 }
294         }
295
296         return 0;
297 }
298
299 static int
300 mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
301                            struct flow_rule *rule)
302 {
303         void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
304                                        outer_headers);
305         void *headers_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
306                                        outer_headers);
307         u16 addr_type = 0;
308         u8 ip_proto = 0;
309
310         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
311                 struct flow_match_basic match;
312
313                 flow_rule_match_basic(rule, &match);
314
315                 mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c,
316                                        headers_v);
317                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
318                          match.mask->ip_proto);
319                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
320                          match.key->ip_proto);
321
322                 ip_proto = match.key->ip_proto;
323         }
324
325         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) {
326                 struct flow_match_control match;
327
328                 flow_rule_match_control(rule, &match);
329                 addr_type = match.key->addr_type;
330         }
331
332         if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
333                 struct flow_match_ipv4_addrs match;
334
335                 flow_rule_match_ipv4_addrs(rule, &match);
336                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
337                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
338                        &match.mask->src, sizeof(match.mask->src));
339                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
340                                     src_ipv4_src_ipv6.ipv4_layout.ipv4),
341                        &match.key->src, sizeof(match.key->src));
342                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
343                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
344                        &match.mask->dst, sizeof(match.mask->dst));
345                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
346                                     dst_ipv4_dst_ipv6.ipv4_layout.ipv4),
347                        &match.key->dst, sizeof(match.key->dst));
348         }
349
350         if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
351                 struct flow_match_ipv6_addrs match;
352
353                 flow_rule_match_ipv6_addrs(rule, &match);
354                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
355                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
356                        &match.mask->src, sizeof(match.mask->src));
357                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
358                                     src_ipv4_src_ipv6.ipv6_layout.ipv6),
359                        &match.key->src, sizeof(match.key->src));
360
361                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_c,
362                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
363                        &match.mask->dst, sizeof(match.mask->dst));
364                 memcpy(MLX5_ADDR_OF(fte_match_set_lyr_2_4, headers_v,
365                                     dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
366                        &match.key->dst, sizeof(match.key->dst));
367         }
368
369         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) {
370                 struct flow_match_ports match;
371
372                 flow_rule_match_ports(rule, &match);
373                 switch (ip_proto) {
374                 case IPPROTO_TCP:
375                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
376                                  tcp_sport, ntohs(match.mask->src));
377                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
378                                  tcp_sport, ntohs(match.key->src));
379
380                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
381                                  tcp_dport, ntohs(match.mask->dst));
382                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
383                                  tcp_dport, ntohs(match.key->dst));
384                         break;
385
386                 case IPPROTO_UDP:
387                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
388                                  udp_sport, ntohs(match.mask->src));
389                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
390                                  udp_sport, ntohs(match.key->src));
391
392                         MLX5_SET(fte_match_set_lyr_2_4, headers_c,
393                                  udp_dport, ntohs(match.mask->dst));
394                         MLX5_SET(fte_match_set_lyr_2_4, headers_v,
395                                  udp_dport, ntohs(match.key->dst));
396                         break;
397                 default:
398                         break;
399                 }
400         }
401
402         if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_TCP)) {
403                 struct flow_match_tcp match;
404
405                 flow_rule_match_tcp(rule, &match);
406                 MLX5_SET(fte_match_set_lyr_2_4, headers_c, tcp_flags,
407                          ntohs(match.mask->flags));
408                 MLX5_SET(fte_match_set_lyr_2_4, headers_v, tcp_flags,
409                          ntohs(match.key->flags));
410         }
411
412         return 0;
413 }
414
415 static void
416 mlx5_tc_ct_counter_put(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_entry *entry)
417 {
418         if (entry->counter->is_shared &&
419             !refcount_dec_and_test(&entry->counter->refcount))
420                 return;
421
422         mlx5_fc_destroy(ct_priv->dev, entry->counter->counter);
423         kfree(entry->counter);
424 }
425
426 static void
427 mlx5_tc_ct_entry_del_rule(struct mlx5_tc_ct_priv *ct_priv,
428                           struct mlx5_ct_entry *entry,
429                           bool nat)
430 {
431         struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
432         struct mlx5_flow_attr *attr = zone_rule->attr;
433
434         ct_dbg("Deleting ct entry rule in zone %d", entry->tuple.zone);
435
436         mlx5_tc_rule_delete(netdev_priv(ct_priv->netdev), zone_rule->rule, attr);
437         mlx5e_mod_hdr_detach(ct_priv->dev,
438                              ct_priv->mod_hdr_tbl, zone_rule->mh);
439         mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
440         kfree(attr);
441 }
442
443 static void
444 mlx5_tc_ct_entry_del_rules(struct mlx5_tc_ct_priv *ct_priv,
445                            struct mlx5_ct_entry *entry)
446 {
447         mlx5_tc_ct_entry_del_rule(ct_priv, entry, true);
448         mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
449 }
450
451 static struct flow_action_entry *
452 mlx5_tc_ct_get_ct_metadata_action(struct flow_rule *flow_rule)
453 {
454         struct flow_action *flow_action = &flow_rule->action;
455         struct flow_action_entry *act;
456         int i;
457
458         flow_action_for_each(i, act, flow_action) {
459                 if (act->id == FLOW_ACTION_CT_METADATA)
460                         return act;
461         }
462
463         return NULL;
464 }
465
466 static int
467 mlx5_tc_ct_entry_set_registers(struct mlx5_tc_ct_priv *ct_priv,
468                                struct mlx5e_tc_mod_hdr_acts *mod_acts,
469                                u8 ct_state,
470                                u32 mark,
471                                u32 labels_id,
472                                u8 zone_restore_id)
473 {
474         enum mlx5_flow_namespace_type ns = ct_priv->ns_type;
475         struct mlx5_core_dev *dev = ct_priv->dev;
476         int err;
477
478         err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
479                                         CTSTATE_TO_REG, ct_state);
480         if (err)
481                 return err;
482
483         err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
484                                         MARK_TO_REG, mark);
485         if (err)
486                 return err;
487
488         err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
489                                         LABELS_TO_REG, labels_id);
490         if (err)
491                 return err;
492
493         err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
494                                         ZONE_RESTORE_TO_REG, zone_restore_id);
495         if (err)
496                 return err;
497
498         /* Make another copy of zone id in reg_b for
499          * NIC rx flows since we don't copy reg_c1 to
500          * reg_b upon miss.
501          */
502         if (ns != MLX5_FLOW_NAMESPACE_FDB) {
503                 err = mlx5e_tc_match_to_reg_set(dev, mod_acts, ns,
504                                                 NIC_ZONE_RESTORE_TO_REG, zone_restore_id);
505                 if (err)
506                         return err;
507         }
508         return 0;
509 }
510
511 static int
512 mlx5_tc_ct_parse_mangle_to_mod_act(struct flow_action_entry *act,
513                                    char *modact)
514 {
515         u32 offset = act->mangle.offset, field;
516
517         switch (act->mangle.htype) {
518         case FLOW_ACT_MANGLE_HDR_TYPE_IP4:
519                 MLX5_SET(set_action_in, modact, length, 0);
520                 if (offset == offsetof(struct iphdr, saddr))
521                         field = MLX5_ACTION_IN_FIELD_OUT_SIPV4;
522                 else if (offset == offsetof(struct iphdr, daddr))
523                         field = MLX5_ACTION_IN_FIELD_OUT_DIPV4;
524                 else
525                         return -EOPNOTSUPP;
526                 break;
527
528         case FLOW_ACT_MANGLE_HDR_TYPE_IP6:
529                 MLX5_SET(set_action_in, modact, length, 0);
530                 if (offset == offsetof(struct ipv6hdr, saddr) + 12)
531                         field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0;
532                 else if (offset == offsetof(struct ipv6hdr, saddr) + 8)
533                         field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32;
534                 else if (offset == offsetof(struct ipv6hdr, saddr) + 4)
535                         field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64;
536                 else if (offset == offsetof(struct ipv6hdr, saddr))
537                         field = MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96;
538                 else if (offset == offsetof(struct ipv6hdr, daddr) + 12)
539                         field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0;
540                 else if (offset == offsetof(struct ipv6hdr, daddr) + 8)
541                         field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32;
542                 else if (offset == offsetof(struct ipv6hdr, daddr) + 4)
543                         field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64;
544                 else if (offset == offsetof(struct ipv6hdr, daddr))
545                         field = MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96;
546                 else
547                         return -EOPNOTSUPP;
548                 break;
549
550         case FLOW_ACT_MANGLE_HDR_TYPE_TCP:
551                 MLX5_SET(set_action_in, modact, length, 16);
552                 if (offset == offsetof(struct tcphdr, source))
553                         field = MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT;
554                 else if (offset == offsetof(struct tcphdr, dest))
555                         field = MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT;
556                 else
557                         return -EOPNOTSUPP;
558                 break;
559
560         case FLOW_ACT_MANGLE_HDR_TYPE_UDP:
561                 MLX5_SET(set_action_in, modact, length, 16);
562                 if (offset == offsetof(struct udphdr, source))
563                         field = MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT;
564                 else if (offset == offsetof(struct udphdr, dest))
565                         field = MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT;
566                 else
567                         return -EOPNOTSUPP;
568                 break;
569
570         default:
571                 return -EOPNOTSUPP;
572         }
573
574         MLX5_SET(set_action_in, modact, action_type, MLX5_ACTION_TYPE_SET);
575         MLX5_SET(set_action_in, modact, offset, 0);
576         MLX5_SET(set_action_in, modact, field, field);
577         MLX5_SET(set_action_in, modact, data, act->mangle.val);
578
579         return 0;
580 }
581
582 static int
583 mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
584                             struct flow_rule *flow_rule,
585                             struct mlx5e_tc_mod_hdr_acts *mod_acts)
586 {
587         struct flow_action *flow_action = &flow_rule->action;
588         struct mlx5_core_dev *mdev = ct_priv->dev;
589         struct flow_action_entry *act;
590         size_t action_size;
591         char *modact;
592         int err, i;
593
594         action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
595
596         flow_action_for_each(i, act, flow_action) {
597                 switch (act->id) {
598                 case FLOW_ACTION_MANGLE: {
599                         err = alloc_mod_hdr_actions(mdev, ct_priv->ns_type,
600                                                     mod_acts);
601                         if (err)
602                                 return err;
603
604                         modact = mod_acts->actions +
605                                  mod_acts->num_actions * action_size;
606
607                         err = mlx5_tc_ct_parse_mangle_to_mod_act(act, modact);
608                         if (err)
609                                 return err;
610
611                         mod_acts->num_actions++;
612                 }
613                 break;
614
615                 case FLOW_ACTION_CT_METADATA:
616                         /* Handled earlier */
617                         continue;
618                 default:
619                         return -EOPNOTSUPP;
620                 }
621         }
622
623         return 0;
624 }
625
626 static int
627 mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
628                                 struct mlx5_flow_attr *attr,
629                                 struct flow_rule *flow_rule,
630                                 struct mlx5e_mod_hdr_handle **mh,
631                                 u8 zone_restore_id, bool nat)
632 {
633         struct mlx5e_tc_mod_hdr_acts mod_acts = {};
634         struct flow_action_entry *meta;
635         u16 ct_state = 0;
636         int err;
637
638         meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
639         if (!meta)
640                 return -EOPNOTSUPP;
641
642         err = mapping_add(ct_priv->labels_mapping, meta->ct_metadata.labels,
643                           &attr->ct_attr.ct_labels_id);
644         if (err)
645                 return -EOPNOTSUPP;
646         if (nat) {
647                 err = mlx5_tc_ct_entry_create_nat(ct_priv, flow_rule,
648                                                   &mod_acts);
649                 if (err)
650                         goto err_mapping;
651
652                 ct_state |= MLX5_CT_STATE_NAT_BIT;
653         }
654
655         ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
656         ct_state |= meta->ct_metadata.orig_dir ? 0 : MLX5_CT_STATE_REPLY_BIT;
657         err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
658                                              ct_state,
659                                              meta->ct_metadata.mark,
660                                              attr->ct_attr.ct_labels_id,
661                                              zone_restore_id);
662         if (err)
663                 goto err_mapping;
664
665         *mh = mlx5e_mod_hdr_attach(ct_priv->dev,
666                                    ct_priv->mod_hdr_tbl,
667                                    ct_priv->ns_type,
668                                    &mod_acts);
669         if (IS_ERR(*mh)) {
670                 err = PTR_ERR(*mh);
671                 goto err_mapping;
672         }
673         attr->modify_hdr = mlx5e_mod_hdr_get(*mh);
674
675         dealloc_mod_hdr_actions(&mod_acts);
676         return 0;
677
678 err_mapping:
679         dealloc_mod_hdr_actions(&mod_acts);
680         mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
681         return err;
682 }
683
684 static int
685 mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
686                           struct flow_rule *flow_rule,
687                           struct mlx5_ct_entry *entry,
688                           bool nat, u8 zone_restore_id)
689 {
690         struct mlx5_ct_zone_rule *zone_rule = &entry->zone_rules[nat];
691         struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
692         struct mlx5_flow_spec *spec = NULL;
693         struct mlx5_flow_attr *attr;
694         int err;
695
696         zone_rule->nat = nat;
697
698         spec = kzalloc(sizeof(*spec), GFP_KERNEL);
699         if (!spec)
700                 return -ENOMEM;
701
702         attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
703         if (!attr) {
704                 err = -ENOMEM;
705                 goto err_attr;
706         }
707
708         err = mlx5_tc_ct_entry_create_mod_hdr(ct_priv, attr, flow_rule,
709                                               &zone_rule->mh,
710                                               zone_restore_id, nat);
711         if (err) {
712                 ct_dbg("Failed to create ct entry mod hdr");
713                 goto err_mod_hdr;
714         }
715
716         attr->action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
717                        MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
718                        MLX5_FLOW_CONTEXT_ACTION_COUNT;
719         attr->dest_chain = 0;
720         attr->dest_ft = ct_priv->post_ct;
721         attr->ft = nat ? ct_priv->ct_nat : ct_priv->ct;
722         attr->outer_match_level = MLX5_MATCH_L4;
723         attr->counter = entry->counter->counter;
724         attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
725         if (ct_priv->ns_type == MLX5_FLOW_NAMESPACE_FDB)
726                 attr->esw_attr->in_mdev = priv->mdev;
727
728         mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
729         mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG, entry->tuple.zone, MLX5_CT_ZONE_MASK);
730
731         zone_rule->rule = mlx5_tc_rule_insert(priv, spec, attr);
732         if (IS_ERR(zone_rule->rule)) {
733                 err = PTR_ERR(zone_rule->rule);
734                 ct_dbg("Failed to add ct entry rule, nat: %d", nat);
735                 goto err_rule;
736         }
737
738         zone_rule->attr = attr;
739
740         kfree(spec);
741         ct_dbg("Offloaded ct entry rule in zone %d", entry->tuple.zone);
742
743         return 0;
744
745 err_rule:
746         mlx5e_mod_hdr_detach(ct_priv->dev,
747                              ct_priv->mod_hdr_tbl, zone_rule->mh);
748         mapping_remove(ct_priv->labels_mapping, attr->ct_attr.ct_labels_id);
749 err_mod_hdr:
750         kfree(attr);
751 err_attr:
752         kfree(spec);
753         return err;
754 }
755
756 static bool
757 mlx5_tc_ct_entry_valid(struct mlx5_ct_entry *entry)
758 {
759         return test_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
760 }
761
762 static struct mlx5_ct_entry *
763 mlx5_tc_ct_entry_get(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_tuple *tuple)
764 {
765         struct mlx5_ct_entry *entry;
766
767         entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_ht, tuple,
768                                        tuples_ht_params);
769         if (entry && mlx5_tc_ct_entry_valid(entry) &&
770             refcount_inc_not_zero(&entry->refcnt)) {
771                 return entry;
772         } else if (!entry) {
773                 entry = rhashtable_lookup_fast(&ct_priv->ct_tuples_nat_ht,
774                                                tuple, tuples_nat_ht_params);
775                 if (entry && mlx5_tc_ct_entry_valid(entry) &&
776                     refcount_inc_not_zero(&entry->refcnt))
777                         return entry;
778         }
779
780         return entry ? ERR_PTR(-EINVAL) : NULL;
781 }
782
783 static void mlx5_tc_ct_entry_remove_from_tuples(struct mlx5_ct_entry *entry)
784 {
785         struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
786
787         rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
788                                &entry->tuple_nat_node,
789                                tuples_nat_ht_params);
790         rhashtable_remove_fast(&ct_priv->ct_tuples_ht, &entry->tuple_node,
791                                tuples_ht_params);
792 }
793
794 static void mlx5_tc_ct_entry_del(struct mlx5_ct_entry *entry)
795 {
796         struct mlx5_tc_ct_priv *ct_priv = entry->ct_priv;
797
798         mlx5_tc_ct_entry_del_rules(ct_priv, entry);
799
800         spin_lock_bh(&ct_priv->ht_lock);
801         mlx5_tc_ct_entry_remove_from_tuples(entry);
802         spin_unlock_bh(&ct_priv->ht_lock);
803
804         mlx5_tc_ct_counter_put(ct_priv, entry);
805         kfree(entry);
806 }
807
808 static void
809 mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
810 {
811         if (!refcount_dec_and_test(&entry->refcnt))
812                 return;
813
814         mlx5_tc_ct_entry_del(entry);
815 }
816
817 static void mlx5_tc_ct_entry_del_work(struct work_struct *work)
818 {
819         struct mlx5_ct_entry *entry = container_of(work, struct mlx5_ct_entry, work);
820
821         mlx5_tc_ct_entry_del(entry);
822 }
823
824 static void
825 __mlx5_tc_ct_entry_put(struct mlx5_ct_entry *entry)
826 {
827         struct mlx5e_priv *priv;
828
829         if (!refcount_dec_and_test(&entry->refcnt))
830                 return;
831
832         priv = netdev_priv(entry->ct_priv->netdev);
833         INIT_WORK(&entry->work, mlx5_tc_ct_entry_del_work);
834         queue_work(priv->wq, &entry->work);
835 }
836
837 static struct mlx5_ct_counter *
838 mlx5_tc_ct_counter_create(struct mlx5_tc_ct_priv *ct_priv)
839 {
840         struct mlx5_ct_counter *counter;
841         int ret;
842
843         counter = kzalloc(sizeof(*counter), GFP_KERNEL);
844         if (!counter)
845                 return ERR_PTR(-ENOMEM);
846
847         counter->is_shared = false;
848         counter->counter = mlx5_fc_create(ct_priv->dev, true);
849         if (IS_ERR(counter->counter)) {
850                 ct_dbg("Failed to create counter for ct entry");
851                 ret = PTR_ERR(counter->counter);
852                 kfree(counter);
853                 return ERR_PTR(ret);
854         }
855
856         return counter;
857 }
858
859 static struct mlx5_ct_counter *
860 mlx5_tc_ct_shared_counter_get(struct mlx5_tc_ct_priv *ct_priv,
861                               struct mlx5_ct_entry *entry)
862 {
863         struct mlx5_ct_tuple rev_tuple = entry->tuple;
864         struct mlx5_ct_counter *shared_counter;
865         struct mlx5_ct_entry *rev_entry;
866         __be16 tmp_port;
867
868         /* get the reversed tuple */
869         tmp_port = rev_tuple.port.src;
870         rev_tuple.port.src = rev_tuple.port.dst;
871         rev_tuple.port.dst = tmp_port;
872
873         if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) {
874                 __be32 tmp_addr = rev_tuple.ip.src_v4;
875
876                 rev_tuple.ip.src_v4 = rev_tuple.ip.dst_v4;
877                 rev_tuple.ip.dst_v4 = tmp_addr;
878         } else if (rev_tuple.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
879                 struct in6_addr tmp_addr = rev_tuple.ip.src_v6;
880
881                 rev_tuple.ip.src_v6 = rev_tuple.ip.dst_v6;
882                 rev_tuple.ip.dst_v6 = tmp_addr;
883         } else {
884                 return ERR_PTR(-EOPNOTSUPP);
885         }
886
887         /* Use the same counter as the reverse direction */
888         spin_lock_bh(&ct_priv->ht_lock);
889         rev_entry = mlx5_tc_ct_entry_get(ct_priv, &rev_tuple);
890
891         if (IS_ERR(rev_entry)) {
892                 spin_unlock_bh(&ct_priv->ht_lock);
893                 goto create_counter;
894         }
895
896         if (rev_entry && refcount_inc_not_zero(&rev_entry->counter->refcount)) {
897                 ct_dbg("Using shared counter entry=0x%p rev=0x%p\n", entry, rev_entry);
898                 shared_counter = rev_entry->counter;
899                 spin_unlock_bh(&ct_priv->ht_lock);
900
901                 mlx5_tc_ct_entry_put(rev_entry);
902                 return shared_counter;
903         }
904
905         spin_unlock_bh(&ct_priv->ht_lock);
906
907 create_counter:
908
909         shared_counter = mlx5_tc_ct_counter_create(ct_priv);
910         if (IS_ERR(shared_counter))
911                 return shared_counter;
912
913         shared_counter->is_shared = true;
914         refcount_set(&shared_counter->refcount, 1);
915         return shared_counter;
916 }
917
918 static int
919 mlx5_tc_ct_entry_add_rules(struct mlx5_tc_ct_priv *ct_priv,
920                            struct flow_rule *flow_rule,
921                            struct mlx5_ct_entry *entry,
922                            u8 zone_restore_id)
923 {
924         int err;
925
926         if (nf_ct_acct_enabled(dev_net(ct_priv->netdev)))
927                 entry->counter = mlx5_tc_ct_counter_create(ct_priv);
928         else
929                 entry->counter = mlx5_tc_ct_shared_counter_get(ct_priv, entry);
930
931         if (IS_ERR(entry->counter)) {
932                 err = PTR_ERR(entry->counter);
933                 return err;
934         }
935
936         err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, false,
937                                         zone_restore_id);
938         if (err)
939                 goto err_orig;
940
941         err = mlx5_tc_ct_entry_add_rule(ct_priv, flow_rule, entry, true,
942                                         zone_restore_id);
943         if (err)
944                 goto err_nat;
945
946         return 0;
947
948 err_nat:
949         mlx5_tc_ct_entry_del_rule(ct_priv, entry, false);
950 err_orig:
951         mlx5_tc_ct_counter_put(ct_priv, entry);
952         return err;
953 }
954
955 static int
956 mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
957                                   struct flow_cls_offload *flow)
958 {
959         struct flow_rule *flow_rule = flow_cls_offload_flow_rule(flow);
960         struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
961         struct flow_action_entry *meta_action;
962         unsigned long cookie = flow->cookie;
963         struct mlx5_ct_entry *entry;
964         int err;
965
966         meta_action = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
967         if (!meta_action)
968                 return -EOPNOTSUPP;
969
970         spin_lock_bh(&ct_priv->ht_lock);
971         entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
972         if (entry && refcount_inc_not_zero(&entry->refcnt)) {
973                 spin_unlock_bh(&ct_priv->ht_lock);
974                 mlx5_tc_ct_entry_put(entry);
975                 return -EEXIST;
976         }
977         spin_unlock_bh(&ct_priv->ht_lock);
978
979         entry = kzalloc(sizeof(*entry), GFP_KERNEL);
980         if (!entry)
981                 return -ENOMEM;
982
983         entry->tuple.zone = ft->zone;
984         entry->cookie = flow->cookie;
985         entry->restore_cookie = meta_action->ct_metadata.cookie;
986         refcount_set(&entry->refcnt, 2);
987         entry->ct_priv = ct_priv;
988
989         err = mlx5_tc_ct_rule_to_tuple(&entry->tuple, flow_rule);
990         if (err)
991                 goto err_set;
992
993         memcpy(&entry->tuple_nat, &entry->tuple, sizeof(entry->tuple));
994         err = mlx5_tc_ct_rule_to_tuple_nat(&entry->tuple_nat, flow_rule);
995         if (err)
996                 goto err_set;
997
998         spin_lock_bh(&ct_priv->ht_lock);
999
1000         err = rhashtable_lookup_insert_fast(&ft->ct_entries_ht, &entry->node,
1001                                             cts_ht_params);
1002         if (err)
1003                 goto err_entries;
1004
1005         err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_ht,
1006                                             &entry->tuple_node,
1007                                             tuples_ht_params);
1008         if (err)
1009                 goto err_tuple;
1010
1011         if (memcmp(&entry->tuple, &entry->tuple_nat, sizeof(entry->tuple))) {
1012                 err = rhashtable_lookup_insert_fast(&ct_priv->ct_tuples_nat_ht,
1013                                                     &entry->tuple_nat_node,
1014                                                     tuples_nat_ht_params);
1015                 if (err)
1016                         goto err_tuple_nat;
1017         }
1018         spin_unlock_bh(&ct_priv->ht_lock);
1019
1020         err = mlx5_tc_ct_entry_add_rules(ct_priv, flow_rule, entry,
1021                                          ft->zone_restore_id);
1022         if (err)
1023                 goto err_rules;
1024
1025         set_bit(MLX5_CT_ENTRY_FLAG_VALID, &entry->flags);
1026         mlx5_tc_ct_entry_put(entry); /* this function reference */
1027
1028         return 0;
1029
1030 err_rules:
1031         spin_lock_bh(&ct_priv->ht_lock);
1032         if (mlx5_tc_ct_entry_has_nat(entry))
1033                 rhashtable_remove_fast(&ct_priv->ct_tuples_nat_ht,
1034                                        &entry->tuple_nat_node, tuples_nat_ht_params);
1035 err_tuple_nat:
1036         rhashtable_remove_fast(&ct_priv->ct_tuples_ht,
1037                                &entry->tuple_node,
1038                                tuples_ht_params);
1039 err_tuple:
1040         rhashtable_remove_fast(&ft->ct_entries_ht,
1041                                &entry->node,
1042                                cts_ht_params);
1043 err_entries:
1044         spin_unlock_bh(&ct_priv->ht_lock);
1045 err_set:
1046         kfree(entry);
1047         if (err != -EEXIST)
1048                 netdev_warn(ct_priv->netdev, "Failed to offload ct entry, err: %d\n", err);
1049         return err;
1050 }
1051
1052 static int
1053 mlx5_tc_ct_block_flow_offload_del(struct mlx5_ct_ft *ft,
1054                                   struct flow_cls_offload *flow)
1055 {
1056         struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1057         unsigned long cookie = flow->cookie;
1058         struct mlx5_ct_entry *entry;
1059
1060         spin_lock_bh(&ct_priv->ht_lock);
1061         entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1062         if (!entry) {
1063                 spin_unlock_bh(&ct_priv->ht_lock);
1064                 return -ENOENT;
1065         }
1066
1067         if (!mlx5_tc_ct_entry_valid(entry)) {
1068                 spin_unlock_bh(&ct_priv->ht_lock);
1069                 return -EINVAL;
1070         }
1071
1072         rhashtable_remove_fast(&ft->ct_entries_ht, &entry->node, cts_ht_params);
1073         mlx5_tc_ct_entry_remove_from_tuples(entry);
1074         spin_unlock_bh(&ct_priv->ht_lock);
1075
1076         mlx5_tc_ct_entry_put(entry);
1077
1078         return 0;
1079 }
1080
1081 static int
1082 mlx5_tc_ct_block_flow_offload_stats(struct mlx5_ct_ft *ft,
1083                                     struct flow_cls_offload *f)
1084 {
1085         struct mlx5_tc_ct_priv *ct_priv = ft->ct_priv;
1086         unsigned long cookie = f->cookie;
1087         struct mlx5_ct_entry *entry;
1088         u64 lastuse, packets, bytes;
1089
1090         spin_lock_bh(&ct_priv->ht_lock);
1091         entry = rhashtable_lookup_fast(&ft->ct_entries_ht, &cookie, cts_ht_params);
1092         if (!entry) {
1093                 spin_unlock_bh(&ct_priv->ht_lock);
1094                 return -ENOENT;
1095         }
1096
1097         if (!mlx5_tc_ct_entry_valid(entry) || !refcount_inc_not_zero(&entry->refcnt)) {
1098                 spin_unlock_bh(&ct_priv->ht_lock);
1099                 return -EINVAL;
1100         }
1101
1102         spin_unlock_bh(&ct_priv->ht_lock);
1103
1104         mlx5_fc_query_cached(entry->counter->counter, &bytes, &packets, &lastuse);
1105         flow_stats_update(&f->stats, bytes, packets, 0, lastuse,
1106                           FLOW_ACTION_HW_STATS_DELAYED);
1107
1108         mlx5_tc_ct_entry_put(entry);
1109         return 0;
1110 }
1111
1112 static int
1113 mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
1114                               void *cb_priv)
1115 {
1116         struct flow_cls_offload *f = type_data;
1117         struct mlx5_ct_ft *ft = cb_priv;
1118
1119         if (type != TC_SETUP_CLSFLOWER)
1120                 return -EOPNOTSUPP;
1121
1122         switch (f->command) {
1123         case FLOW_CLS_REPLACE:
1124                 return mlx5_tc_ct_block_flow_offload_add(ft, f);
1125         case FLOW_CLS_DESTROY:
1126                 return mlx5_tc_ct_block_flow_offload_del(ft, f);
1127         case FLOW_CLS_STATS:
1128                 return mlx5_tc_ct_block_flow_offload_stats(ft, f);
1129         default:
1130                 break;
1131         }
1132
1133         return -EOPNOTSUPP;
1134 }
1135
1136 static bool
1137 mlx5_tc_ct_skb_to_tuple(struct sk_buff *skb, struct mlx5_ct_tuple *tuple,
1138                         u16 zone)
1139 {
1140         struct flow_keys flow_keys;
1141
1142         skb_reset_network_header(skb);
1143         skb_flow_dissect_flow_keys(skb, &flow_keys, 0);
1144
1145         tuple->zone = zone;
1146
1147         if (flow_keys.basic.ip_proto != IPPROTO_TCP &&
1148             flow_keys.basic.ip_proto != IPPROTO_UDP)
1149                 return false;
1150
1151         tuple->port.src = flow_keys.ports.src;
1152         tuple->port.dst = flow_keys.ports.dst;
1153         tuple->n_proto = flow_keys.basic.n_proto;
1154         tuple->ip_proto = flow_keys.basic.ip_proto;
1155
1156         switch (flow_keys.basic.n_proto) {
1157         case htons(ETH_P_IP):
1158                 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS;
1159                 tuple->ip.src_v4 = flow_keys.addrs.v4addrs.src;
1160                 tuple->ip.dst_v4 = flow_keys.addrs.v4addrs.dst;
1161                 break;
1162
1163         case htons(ETH_P_IPV6):
1164                 tuple->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
1165                 tuple->ip.src_v6 = flow_keys.addrs.v6addrs.src;
1166                 tuple->ip.dst_v6 = flow_keys.addrs.v6addrs.dst;
1167                 break;
1168         default:
1169                 goto out;
1170         }
1171
1172         return true;
1173
1174 out:
1175         return false;
1176 }
1177
1178 int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
1179 {
1180         u32 ctstate = 0, ctstate_mask = 0;
1181
1182         mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
1183                                         &ctstate, &ctstate_mask);
1184         if (ctstate_mask)
1185                 return -EOPNOTSUPP;
1186
1187         ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
1188         mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1189                                     ctstate, ctstate_mask);
1190
1191         return 0;
1192 }
1193
1194 void mlx5_tc_ct_match_del(struct mlx5_tc_ct_priv *priv, struct mlx5_ct_attr *ct_attr)
1195 {
1196         if (!priv || !ct_attr->ct_labels_id)
1197                 return;
1198
1199         mapping_remove(priv->labels_mapping, ct_attr->ct_labels_id);
1200 }
1201
1202 int
1203 mlx5_tc_ct_match_add(struct mlx5_tc_ct_priv *priv,
1204                      struct mlx5_flow_spec *spec,
1205                      struct flow_cls_offload *f,
1206                      struct mlx5_ct_attr *ct_attr,
1207                      struct netlink_ext_ack *extack)
1208 {
1209         struct flow_rule *rule = flow_cls_offload_flow_rule(f);
1210         bool trk, est, untrk, unest, new, rpl, unrpl;
1211         struct flow_dissector_key_ct *mask, *key;
1212         u32 ctstate = 0, ctstate_mask = 0;
1213         u16 ct_state_on, ct_state_off;
1214         u16 ct_state, ct_state_mask;
1215         struct flow_match_ct match;
1216         u32 ct_labels[4];
1217
1218         if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CT))
1219                 return 0;
1220
1221         if (!priv) {
1222                 NL_SET_ERR_MSG_MOD(extack,
1223                                    "offload of ct matching isn't available");
1224                 return -EOPNOTSUPP;
1225         }
1226
1227         flow_rule_match_ct(rule, &match);
1228
1229         key = match.key;
1230         mask = match.mask;
1231
1232         ct_state = key->ct_state;
1233         ct_state_mask = mask->ct_state;
1234
1235         if (ct_state_mask & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
1236                               TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED |
1237                               TCA_FLOWER_KEY_CT_FLAGS_NEW |
1238                               TCA_FLOWER_KEY_CT_FLAGS_REPLY)) {
1239                 NL_SET_ERR_MSG_MOD(extack,
1240                                    "only ct_state trk, est, new and rpl are supported for offload");
1241                 return -EOPNOTSUPP;
1242         }
1243
1244         ct_state_on = ct_state & ct_state_mask;
1245         ct_state_off = (ct_state & ct_state_mask) ^ ct_state_mask;
1246         trk = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1247         new = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_NEW;
1248         est = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1249         rpl = ct_state_on & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
1250         untrk = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_TRACKED;
1251         unest = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_ESTABLISHED;
1252         unrpl = ct_state_off & TCA_FLOWER_KEY_CT_FLAGS_REPLY;
1253
1254         ctstate |= trk ? MLX5_CT_STATE_TRK_BIT : 0;
1255         ctstate |= est ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1256         ctstate |= rpl ? MLX5_CT_STATE_REPLY_BIT : 0;
1257         ctstate_mask |= (untrk || trk) ? MLX5_CT_STATE_TRK_BIT : 0;
1258         ctstate_mask |= (unest || est) ? MLX5_CT_STATE_ESTABLISHED_BIT : 0;
1259         ctstate_mask |= (unrpl || rpl) ? MLX5_CT_STATE_REPLY_BIT : 0;
1260
1261         if (new) {
1262                 NL_SET_ERR_MSG_MOD(extack,
1263                                    "matching on ct_state +new isn't supported");
1264                 return -EOPNOTSUPP;
1265         }
1266
1267         if (mask->ct_zone)
1268                 mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1269                                             key->ct_zone, MLX5_CT_ZONE_MASK);
1270         if (ctstate_mask)
1271                 mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG,
1272                                             ctstate, ctstate_mask);
1273         if (mask->ct_mark)
1274                 mlx5e_tc_match_to_reg_match(spec, MARK_TO_REG,
1275                                             key->ct_mark, mask->ct_mark);
1276         if (mask->ct_labels[0] || mask->ct_labels[1] || mask->ct_labels[2] ||
1277             mask->ct_labels[3]) {
1278                 ct_labels[0] = key->ct_labels[0] & mask->ct_labels[0];
1279                 ct_labels[1] = key->ct_labels[1] & mask->ct_labels[1];
1280                 ct_labels[2] = key->ct_labels[2] & mask->ct_labels[2];
1281                 ct_labels[3] = key->ct_labels[3] & mask->ct_labels[3];
1282                 if (mapping_add(priv->labels_mapping, ct_labels, &ct_attr->ct_labels_id))
1283                         return -EOPNOTSUPP;
1284                 mlx5e_tc_match_to_reg_match(spec, LABELS_TO_REG, ct_attr->ct_labels_id,
1285                                             MLX5_CT_LABELS_MASK);
1286         }
1287
1288         return 0;
1289 }
1290
1291 int
1292 mlx5_tc_ct_parse_action(struct mlx5_tc_ct_priv *priv,
1293                         struct mlx5_flow_attr *attr,
1294                         const struct flow_action_entry *act,
1295                         struct netlink_ext_ack *extack)
1296 {
1297         if (!priv) {
1298                 NL_SET_ERR_MSG_MOD(extack,
1299                                    "offload of ct action isn't available");
1300                 return -EOPNOTSUPP;
1301         }
1302
1303         attr->ct_attr.zone = act->ct.zone;
1304         attr->ct_attr.ct_action = act->ct.action;
1305         attr->ct_attr.nf_ft = act->ct.flow_table;
1306
1307         return 0;
1308 }
1309
1310 static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
1311                                   struct mlx5_tc_ct_pre *pre_ct,
1312                                   bool nat)
1313 {
1314         struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1315         struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1316         struct mlx5_core_dev *dev = ct_priv->dev;
1317         struct mlx5_flow_table *ft = pre_ct->ft;
1318         struct mlx5_flow_destination dest = {};
1319         struct mlx5_flow_act flow_act = {};
1320         struct mlx5_modify_hdr *mod_hdr;
1321         struct mlx5_flow_handle *rule;
1322         struct mlx5_flow_spec *spec;
1323         u32 ctstate;
1324         u16 zone;
1325         int err;
1326
1327         spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
1328         if (!spec)
1329                 return -ENOMEM;
1330
1331         zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
1332         err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ct_priv->ns_type,
1333                                         ZONE_TO_REG, zone);
1334         if (err) {
1335                 ct_dbg("Failed to set zone register mapping");
1336                 goto err_mapping;
1337         }
1338
1339         mod_hdr = mlx5_modify_header_alloc(dev, ct_priv->ns_type,
1340                                            pre_mod_acts.num_actions,
1341                                            pre_mod_acts.actions);
1342
1343         if (IS_ERR(mod_hdr)) {
1344                 err = PTR_ERR(mod_hdr);
1345                 ct_dbg("Failed to create pre ct mod hdr");
1346                 goto err_mapping;
1347         }
1348         pre_ct->modify_hdr = mod_hdr;
1349
1350         flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1351                           MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1352         flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
1353         flow_act.modify_hdr = mod_hdr;
1354         dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
1355
1356         /* add flow rule */
1357         mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
1358                                     zone, MLX5_CT_ZONE_MASK);
1359         ctstate = MLX5_CT_STATE_TRK_BIT;
1360         if (nat)
1361                 ctstate |= MLX5_CT_STATE_NAT_BIT;
1362         mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
1363
1364         dest.ft = ct_priv->post_ct;
1365         rule = mlx5_add_flow_rules(ft, spec, &flow_act, &dest, 1);
1366         if (IS_ERR(rule)) {
1367                 err = PTR_ERR(rule);
1368                 ct_dbg("Failed to add pre ct flow rule zone %d", zone);
1369                 goto err_flow_rule;
1370         }
1371         pre_ct->flow_rule = rule;
1372
1373         /* add miss rule */
1374         dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
1375         rule = mlx5_add_flow_rules(ft, NULL, &flow_act, &dest, 1);
1376         if (IS_ERR(rule)) {
1377                 err = PTR_ERR(rule);
1378                 ct_dbg("Failed to add pre ct miss rule zone %d", zone);
1379                 goto err_miss_rule;
1380         }
1381         pre_ct->miss_rule = rule;
1382
1383         dealloc_mod_hdr_actions(&pre_mod_acts);
1384         kvfree(spec);
1385         return 0;
1386
1387 err_miss_rule:
1388         mlx5_del_flow_rules(pre_ct->flow_rule);
1389 err_flow_rule:
1390         mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1391 err_mapping:
1392         dealloc_mod_hdr_actions(&pre_mod_acts);
1393         kvfree(spec);
1394         return err;
1395 }
1396
1397 static void
1398 tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
1399                        struct mlx5_tc_ct_pre *pre_ct)
1400 {
1401         struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1402         struct mlx5_core_dev *dev = ct_priv->dev;
1403
1404         mlx5_del_flow_rules(pre_ct->flow_rule);
1405         mlx5_del_flow_rules(pre_ct->miss_rule);
1406         mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
1407 }
1408
1409 static int
1410 mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
1411                         struct mlx5_tc_ct_pre *pre_ct,
1412                         bool nat)
1413 {
1414         int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
1415         struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
1416         struct mlx5_core_dev *dev = ct_priv->dev;
1417         struct mlx5_flow_table_attr ft_attr = {};
1418         struct mlx5_flow_namespace *ns;
1419         struct mlx5_flow_table *ft;
1420         struct mlx5_flow_group *g;
1421         u32 metadata_reg_c_2_mask;
1422         u32 *flow_group_in;
1423         void *misc;
1424         int err;
1425
1426         ns = mlx5_get_flow_namespace(dev, ct_priv->ns_type);
1427         if (!ns) {
1428                 err = -EOPNOTSUPP;
1429                 ct_dbg("Failed to get flow namespace");
1430                 return err;
1431         }
1432
1433         flow_group_in = kvzalloc(inlen, GFP_KERNEL);
1434         if (!flow_group_in)
1435                 return -ENOMEM;
1436
1437         ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
1438         ft_attr.prio =  ct_priv->ns_type ==  MLX5_FLOW_NAMESPACE_FDB ?
1439                         FDB_TC_OFFLOAD : MLX5E_TC_PRIO;
1440         ft_attr.max_fte = 2;
1441         ft_attr.level = 1;
1442         ft = mlx5_create_flow_table(ns, &ft_attr);
1443         if (IS_ERR(ft)) {
1444                 err = PTR_ERR(ft);
1445                 ct_dbg("Failed to create pre ct table");
1446                 goto out_free;
1447         }
1448         pre_ct->ft = ft;
1449
1450         /* create flow group */
1451         MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
1452         MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
1453         MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
1454                  MLX5_MATCH_MISC_PARAMETERS_2);
1455
1456         misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
1457                             match_criteria.misc_parameters_2);
1458
1459         metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
1460         metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
1461         if (nat)
1462                 metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
1463
1464         MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
1465                  metadata_reg_c_2_mask);
1466
1467         g = mlx5_create_flow_group(ft, flow_group_in);
1468         if (IS_ERR(g)) {
1469                 err = PTR_ERR(g);
1470                 ct_dbg("Failed to create pre ct group");
1471                 goto err_flow_grp;
1472         }
1473         pre_ct->flow_grp = g;
1474
1475         /* create miss group */
1476         memset(flow_group_in, 0, inlen);
1477         MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
1478         MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
1479         g = mlx5_create_flow_group(ft, flow_group_in);
1480         if (IS_ERR(g)) {
1481                 err = PTR_ERR(g);
1482                 ct_dbg("Failed to create pre ct miss group");
1483                 goto err_miss_grp;
1484         }
1485         pre_ct->miss_grp = g;
1486
1487         err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
1488         if (err)
1489                 goto err_add_rules;
1490
1491         kvfree(flow_group_in);
1492         return 0;
1493
1494 err_add_rules:
1495         mlx5_destroy_flow_group(pre_ct->miss_grp);
1496 err_miss_grp:
1497         mlx5_destroy_flow_group(pre_ct->flow_grp);
1498 err_flow_grp:
1499         mlx5_destroy_flow_table(ft);
1500 out_free:
1501         kvfree(flow_group_in);
1502         return err;
1503 }
1504
1505 static void
1506 mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
1507                        struct mlx5_tc_ct_pre *pre_ct)
1508 {
1509         tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
1510         mlx5_destroy_flow_group(pre_ct->miss_grp);
1511         mlx5_destroy_flow_group(pre_ct->flow_grp);
1512         mlx5_destroy_flow_table(pre_ct->ft);
1513 }
1514
1515 static int
1516 mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
1517 {
1518         int err;
1519
1520         err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
1521         if (err)
1522                 return err;
1523
1524         err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
1525         if (err)
1526                 goto err_pre_ct_nat;
1527
1528         return 0;
1529
1530 err_pre_ct_nat:
1531         mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1532         return err;
1533 }
1534
1535 static void
1536 mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
1537 {
1538         mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
1539         mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
1540 }
1541
1542 static struct mlx5_ct_ft *
1543 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
1544                      struct nf_flowtable *nf_ft)
1545 {
1546         struct mlx5_ct_ft *ft;
1547         int err;
1548
1549         ft = rhashtable_lookup_fast(&ct_priv->zone_ht, &zone, zone_params);
1550         if (ft) {
1551                 refcount_inc(&ft->refcount);
1552                 return ft;
1553         }
1554
1555         ft = kzalloc(sizeof(*ft), GFP_KERNEL);
1556         if (!ft)
1557                 return ERR_PTR(-ENOMEM);
1558
1559         err = mapping_add(ct_priv->zone_mapping, &zone, &ft->zone_restore_id);
1560         if (err)
1561                 goto err_mapping;
1562
1563         ft->zone = zone;
1564         ft->nf_ft = nf_ft;
1565         ft->ct_priv = ct_priv;
1566         refcount_set(&ft->refcount, 1);
1567
1568         err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
1569         if (err)
1570                 goto err_alloc_pre_ct;
1571
1572         err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
1573         if (err)
1574                 goto err_init;
1575
1576         err = rhashtable_insert_fast(&ct_priv->zone_ht, &ft->node,
1577                                      zone_params);
1578         if (err)
1579                 goto err_insert;
1580
1581         err = nf_flow_table_offload_add_cb(ft->nf_ft,
1582                                            mlx5_tc_ct_block_flow_offload, ft);
1583         if (err)
1584                 goto err_add_cb;
1585
1586         return ft;
1587
1588 err_add_cb:
1589         rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1590 err_insert:
1591         rhashtable_destroy(&ft->ct_entries_ht);
1592 err_init:
1593         mlx5_tc_ct_free_pre_ct_tables(ft);
1594 err_alloc_pre_ct:
1595         mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1596 err_mapping:
1597         kfree(ft);
1598         return ERR_PTR(err);
1599 }
1600
1601 static void
1602 mlx5_tc_ct_flush_ft_entry(void *ptr, void *arg)
1603 {
1604         struct mlx5_ct_entry *entry = ptr;
1605
1606         mlx5_tc_ct_entry_put(entry);
1607 }
1608
1609 static void
1610 mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
1611 {
1612         if (!refcount_dec_and_test(&ft->refcount))
1613                 return;
1614
1615         nf_flow_table_offload_del_cb(ft->nf_ft,
1616                                      mlx5_tc_ct_block_flow_offload, ft);
1617         rhashtable_remove_fast(&ct_priv->zone_ht, &ft->node, zone_params);
1618         rhashtable_free_and_destroy(&ft->ct_entries_ht,
1619                                     mlx5_tc_ct_flush_ft_entry,
1620                                     ct_priv);
1621         mlx5_tc_ct_free_pre_ct_tables(ft);
1622         mapping_remove(ct_priv->zone_mapping, ft->zone_restore_id);
1623         kfree(ft);
1624 }
1625
1626 /* We translate the tc filter with CT action to the following HW model:
1627  *
1628  * +---------------------+
1629  * + ft prio (tc chain) +
1630  * + original match      +
1631  * +---------------------+
1632  *      | set chain miss mapping
1633  *      | set fte_id
1634  *      | set tunnel_id
1635  *      | do decap
1636  *      v
1637  * +---------------------+
1638  * + pre_ct/pre_ct_nat   +  if matches     +---------------------+
1639  * + zone+nat match      +---------------->+ post_ct (see below) +
1640  * +---------------------+  set zone       +---------------------+
1641  *      | set zone
1642  *      v
1643  * +--------------------+
1644  * + CT (nat or no nat) +
1645  * + tuple + zone match +
1646  * +--------------------+
1647  *      | set mark
1648  *      | set labels_id
1649  *      | set established
1650  *      | set zone_restore
1651  *      | do nat (if needed)
1652  *      v
1653  * +--------------+
1654  * + post_ct      + original filter actions
1655  * + fte_id match +------------------------>
1656  * +--------------+
1657  */
1658 static struct mlx5_flow_handle *
1659 __mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *ct_priv,
1660                           struct mlx5e_tc_flow *flow,
1661                           struct mlx5_flow_spec *orig_spec,
1662                           struct mlx5_flow_attr *attr)
1663 {
1664         bool nat = attr->ct_attr.ct_action & TCA_CT_ACT_NAT;
1665         struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1666         struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
1667         u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1668         struct mlx5_flow_spec *post_ct_spec = NULL;
1669         struct mlx5_flow_attr *pre_ct_attr;
1670         struct mlx5_modify_hdr *mod_hdr;
1671         struct mlx5_flow_handle *rule;
1672         struct mlx5_ct_flow *ct_flow;
1673         int chain_mapping = 0, err;
1674         struct mlx5_ct_ft *ft;
1675         u32 fte_id = 1;
1676
1677         post_ct_spec = kzalloc(sizeof(*post_ct_spec), GFP_KERNEL);
1678         ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1679         if (!post_ct_spec || !ct_flow) {
1680                 kfree(post_ct_spec);
1681                 kfree(ct_flow);
1682                 return ERR_PTR(-ENOMEM);
1683         }
1684
1685         /* Register for CT established events */
1686         ft = mlx5_tc_ct_add_ft_cb(ct_priv, attr->ct_attr.zone,
1687                                   attr->ct_attr.nf_ft);
1688         if (IS_ERR(ft)) {
1689                 err = PTR_ERR(ft);
1690                 ct_dbg("Failed to register to ft callback");
1691                 goto err_ft;
1692         }
1693         ct_flow->ft = ft;
1694
1695         err = idr_alloc_u32(&ct_priv->fte_ids, ct_flow, &fte_id,
1696                             MLX5_FTE_ID_MAX, GFP_KERNEL);
1697         if (err) {
1698                 netdev_warn(priv->netdev,
1699                             "Failed to allocate fte id, err: %d\n", err);
1700                 goto err_idr;
1701         }
1702         ct_flow->fte_id = fte_id;
1703
1704         /* Base flow attributes of both rules on original rule attribute */
1705         ct_flow->pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1706         if (!ct_flow->pre_ct_attr) {
1707                 err = -ENOMEM;
1708                 goto err_alloc_pre;
1709         }
1710
1711         ct_flow->post_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1712         if (!ct_flow->post_ct_attr) {
1713                 err = -ENOMEM;
1714                 goto err_alloc_post;
1715         }
1716
1717         pre_ct_attr = ct_flow->pre_ct_attr;
1718         memcpy(pre_ct_attr, attr, attr_sz);
1719         memcpy(ct_flow->post_ct_attr, attr, attr_sz);
1720
1721         /* Modify the original rule's action to fwd and modify, leave decap */
1722         pre_ct_attr->action = attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP;
1723         pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
1724                                MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1725
1726         /* Write chain miss tag for miss in ct table as we
1727          * don't go though all prios of this chain as normal tc rules
1728          * miss.
1729          */
1730         err = mlx5_chains_get_chain_mapping(ct_priv->chains, attr->chain,
1731                                             &chain_mapping);
1732         if (err) {
1733                 ct_dbg("Failed to get chain register mapping for chain");
1734                 goto err_get_chain;
1735         }
1736         ct_flow->chain_mapping = chain_mapping;
1737
1738         err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
1739                                         CHAIN_TO_REG, chain_mapping);
1740         if (err) {
1741                 ct_dbg("Failed to set chain register mapping");
1742                 goto err_mapping;
1743         }
1744
1745         err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts, ct_priv->ns_type,
1746                                         FTEID_TO_REG, fte_id);
1747         if (err) {
1748                 ct_dbg("Failed to set fte_id register mapping");
1749                 goto err_mapping;
1750         }
1751
1752         /* If original flow is decap, we do it before going into ct table
1753          * so add a rewrite for the tunnel match_id.
1754          */
1755         if ((pre_ct_attr->action & MLX5_FLOW_CONTEXT_ACTION_DECAP) &&
1756             attr->chain == 0) {
1757                 u32 tun_id = mlx5e_tc_get_flow_tun_id(flow);
1758
1759                 err = mlx5e_tc_match_to_reg_set(priv->mdev, &pre_mod_acts,
1760                                                 ct_priv->ns_type,
1761                                                 TUNNEL_TO_REG,
1762                                                 tun_id);
1763                 if (err) {
1764                         ct_dbg("Failed to set tunnel register mapping");
1765                         goto err_mapping;
1766                 }
1767         }
1768
1769         mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1770                                            pre_mod_acts.num_actions,
1771                                            pre_mod_acts.actions);
1772         if (IS_ERR(mod_hdr)) {
1773                 err = PTR_ERR(mod_hdr);
1774                 ct_dbg("Failed to create pre ct mod hdr");
1775                 goto err_mapping;
1776         }
1777         pre_ct_attr->modify_hdr = mod_hdr;
1778
1779         /* Post ct rule matches on fte_id and executes original rule's
1780          * tc rule action
1781          */
1782         mlx5e_tc_match_to_reg_match(post_ct_spec, FTEID_TO_REG,
1783                                     fte_id, MLX5_FTE_ID_MASK);
1784
1785         /* Put post_ct rule on post_ct flow table */
1786         ct_flow->post_ct_attr->chain = 0;
1787         ct_flow->post_ct_attr->prio = 0;
1788         ct_flow->post_ct_attr->ft = ct_priv->post_ct;
1789
1790         ct_flow->post_ct_attr->inner_match_level = MLX5_MATCH_NONE;
1791         ct_flow->post_ct_attr->outer_match_level = MLX5_MATCH_NONE;
1792         ct_flow->post_ct_attr->action &= ~(MLX5_FLOW_CONTEXT_ACTION_DECAP);
1793         rule = mlx5_tc_rule_insert(priv, post_ct_spec,
1794                                    ct_flow->post_ct_attr);
1795         ct_flow->post_ct_rule = rule;
1796         if (IS_ERR(ct_flow->post_ct_rule)) {
1797                 err = PTR_ERR(ct_flow->post_ct_rule);
1798                 ct_dbg("Failed to add post ct rule");
1799                 goto err_insert_post_ct;
1800         }
1801
1802         /* Change original rule point to ct table */
1803         pre_ct_attr->dest_chain = 0;
1804         pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.ft : ft->pre_ct.ft;
1805         ct_flow->pre_ct_rule = mlx5_tc_rule_insert(priv, orig_spec,
1806                                                    pre_ct_attr);
1807         if (IS_ERR(ct_flow->pre_ct_rule)) {
1808                 err = PTR_ERR(ct_flow->pre_ct_rule);
1809                 ct_dbg("Failed to add pre ct rule");
1810                 goto err_insert_orig;
1811         }
1812
1813         attr->ct_attr.ct_flow = ct_flow;
1814         dealloc_mod_hdr_actions(&pre_mod_acts);
1815         kfree(post_ct_spec);
1816
1817         return rule;
1818
1819 err_insert_orig:
1820         mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
1821                             ct_flow->post_ct_attr);
1822 err_insert_post_ct:
1823         mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1824 err_mapping:
1825         dealloc_mod_hdr_actions(&pre_mod_acts);
1826         mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1827 err_get_chain:
1828         kfree(ct_flow->post_ct_attr);
1829 err_alloc_post:
1830         kfree(ct_flow->pre_ct_attr);
1831 err_alloc_pre:
1832         idr_remove(&ct_priv->fte_ids, fte_id);
1833 err_idr:
1834         mlx5_tc_ct_del_ft_cb(ct_priv, ft);
1835 err_ft:
1836         kfree(post_ct_spec);
1837         kfree(ct_flow);
1838         netdev_warn(priv->netdev, "Failed to offload ct flow, err %d\n", err);
1839         return ERR_PTR(err);
1840 }
1841
1842 static struct mlx5_flow_handle *
1843 __mlx5_tc_ct_flow_offload_clear(struct mlx5_tc_ct_priv *ct_priv,
1844                                 struct mlx5_flow_spec *orig_spec,
1845                                 struct mlx5_flow_attr *attr,
1846                                 struct mlx5e_tc_mod_hdr_acts *mod_acts)
1847 {
1848         struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1849         u32 attr_sz = ns_to_attr_sz(ct_priv->ns_type);
1850         struct mlx5_flow_attr *pre_ct_attr;
1851         struct mlx5_modify_hdr *mod_hdr;
1852         struct mlx5_flow_handle *rule;
1853         struct mlx5_ct_flow *ct_flow;
1854         int err;
1855
1856         ct_flow = kzalloc(sizeof(*ct_flow), GFP_KERNEL);
1857         if (!ct_flow)
1858                 return ERR_PTR(-ENOMEM);
1859
1860         /* Base esw attributes on original rule attribute */
1861         pre_ct_attr = mlx5_alloc_flow_attr(ct_priv->ns_type);
1862         if (!pre_ct_attr) {
1863                 err = -ENOMEM;
1864                 goto err_attr;
1865         }
1866
1867         memcpy(pre_ct_attr, attr, attr_sz);
1868
1869         err = mlx5_tc_ct_entry_set_registers(ct_priv, mod_acts, 0, 0, 0, 0);
1870         if (err) {
1871                 ct_dbg("Failed to set register for ct clear");
1872                 goto err_set_registers;
1873         }
1874
1875         mod_hdr = mlx5_modify_header_alloc(priv->mdev, ct_priv->ns_type,
1876                                            mod_acts->num_actions,
1877                                            mod_acts->actions);
1878         if (IS_ERR(mod_hdr)) {
1879                 err = PTR_ERR(mod_hdr);
1880                 ct_dbg("Failed to add create ct clear mod hdr");
1881                 goto err_set_registers;
1882         }
1883
1884         pre_ct_attr->modify_hdr = mod_hdr;
1885         pre_ct_attr->action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
1886
1887         rule = mlx5_tc_rule_insert(priv, orig_spec, pre_ct_attr);
1888         if (IS_ERR(rule)) {
1889                 err = PTR_ERR(rule);
1890                 ct_dbg("Failed to add ct clear rule");
1891                 goto err_insert;
1892         }
1893
1894         attr->ct_attr.ct_flow = ct_flow;
1895         ct_flow->pre_ct_attr = pre_ct_attr;
1896         ct_flow->pre_ct_rule = rule;
1897         return rule;
1898
1899 err_insert:
1900         mlx5_modify_header_dealloc(priv->mdev, mod_hdr);
1901 err_set_registers:
1902         netdev_warn(priv->netdev,
1903                     "Failed to offload ct clear flow, err %d\n", err);
1904         kfree(pre_ct_attr);
1905 err_attr:
1906         kfree(ct_flow);
1907
1908         return ERR_PTR(err);
1909 }
1910
1911 struct mlx5_flow_handle *
1912 mlx5_tc_ct_flow_offload(struct mlx5_tc_ct_priv *priv,
1913                         struct mlx5e_tc_flow *flow,
1914                         struct mlx5_flow_spec *spec,
1915                         struct mlx5_flow_attr *attr,
1916                         struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
1917 {
1918         bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
1919         struct mlx5_flow_handle *rule;
1920
1921         if (!priv)
1922                 return ERR_PTR(-EOPNOTSUPP);
1923
1924         mutex_lock(&priv->control_lock);
1925
1926         if (clear_action)
1927                 rule = __mlx5_tc_ct_flow_offload_clear(priv, spec, attr, mod_hdr_acts);
1928         else
1929                 rule = __mlx5_tc_ct_flow_offload(priv, flow, spec, attr);
1930         mutex_unlock(&priv->control_lock);
1931
1932         return rule;
1933 }
1934
1935 static void
1936 __mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *ct_priv,
1937                          struct mlx5e_tc_flow *flow,
1938                          struct mlx5_ct_flow *ct_flow)
1939 {
1940         struct mlx5_flow_attr *pre_ct_attr = ct_flow->pre_ct_attr;
1941         struct mlx5e_priv *priv = netdev_priv(ct_priv->netdev);
1942
1943         mlx5_tc_rule_delete(priv, ct_flow->pre_ct_rule,
1944                             pre_ct_attr);
1945         mlx5_modify_header_dealloc(priv->mdev, pre_ct_attr->modify_hdr);
1946
1947         if (ct_flow->post_ct_rule) {
1948                 mlx5_tc_rule_delete(priv, ct_flow->post_ct_rule,
1949                                     ct_flow->post_ct_attr);
1950                 mlx5_chains_put_chain_mapping(ct_priv->chains, ct_flow->chain_mapping);
1951                 idr_remove(&ct_priv->fte_ids, ct_flow->fte_id);
1952                 mlx5_tc_ct_del_ft_cb(ct_priv, ct_flow->ft);
1953         }
1954
1955         kfree(ct_flow->pre_ct_attr);
1956         kfree(ct_flow->post_ct_attr);
1957         kfree(ct_flow);
1958 }
1959
1960 void
1961 mlx5_tc_ct_delete_flow(struct mlx5_tc_ct_priv *priv,
1962                        struct mlx5e_tc_flow *flow,
1963                        struct mlx5_flow_attr *attr)
1964 {
1965         struct mlx5_ct_flow *ct_flow = attr->ct_attr.ct_flow;
1966
1967         /* We are called on error to clean up stuff from parsing
1968          * but we don't have anything for now
1969          */
1970         if (!ct_flow)
1971                 return;
1972
1973         mutex_lock(&priv->control_lock);
1974         __mlx5_tc_ct_delete_flow(priv, flow, ct_flow);
1975         mutex_unlock(&priv->control_lock);
1976 }
1977
1978 static int
1979 mlx5_tc_ct_init_check_esw_support(struct mlx5_eswitch *esw,
1980                                   const char **err_msg)
1981 {
1982         if (!MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level)) {
1983                 *err_msg = "firmware level support is missing";
1984                 return -EOPNOTSUPP;
1985         }
1986
1987         if (!mlx5_eswitch_vlan_actions_supported(esw->dev, 1)) {
1988                 /* vlan workaround should be avoided for multi chain rules.
1989                  * This is just a sanity check as pop vlan action should
1990                  * be supported by any FW that supports ignore_flow_level
1991                  */
1992
1993                 *err_msg = "firmware vlan actions support is missing";
1994                 return -EOPNOTSUPP;
1995         }
1996
1997         if (!MLX5_CAP_ESW_FLOWTABLE(esw->dev,
1998                                     fdb_modify_header_fwd_to_table)) {
1999                 /* CT always writes to registers which are mod header actions.
2000                  * Therefore, mod header and goto is required
2001                  */
2002
2003                 *err_msg = "firmware fwd and modify support is missing";
2004                 return -EOPNOTSUPP;
2005         }
2006
2007         if (!mlx5_eswitch_reg_c1_loopback_enabled(esw)) {
2008                 *err_msg = "register loopback isn't supported";
2009                 return -EOPNOTSUPP;
2010         }
2011
2012         return 0;
2013 }
2014
2015 static int
2016 mlx5_tc_ct_init_check_nic_support(struct mlx5e_priv *priv,
2017                                   const char **err_msg)
2018 {
2019         if (!MLX5_CAP_FLOWTABLE_NIC_RX(priv->mdev, ignore_flow_level)) {
2020                 *err_msg = "firmware level support is missing";
2021                 return -EOPNOTSUPP;
2022         }
2023
2024         return 0;
2025 }
2026
2027 static int
2028 mlx5_tc_ct_init_check_support(struct mlx5e_priv *priv,
2029                               enum mlx5_flow_namespace_type ns_type,
2030                               const char **err_msg)
2031 {
2032         struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
2033
2034 #if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
2035         /* cannot restore chain ID on HW miss */
2036
2037         *err_msg = "tc skb extension missing";
2038         return -EOPNOTSUPP;
2039 #endif
2040         if (ns_type == MLX5_FLOW_NAMESPACE_FDB)
2041                 return mlx5_tc_ct_init_check_esw_support(esw, err_msg);
2042         else
2043                 return mlx5_tc_ct_init_check_nic_support(priv, err_msg);
2044 }
2045
2046 #define INIT_ERR_PREFIX "tc ct offload init failed"
2047
2048 struct mlx5_tc_ct_priv *
2049 mlx5_tc_ct_init(struct mlx5e_priv *priv, struct mlx5_fs_chains *chains,
2050                 struct mod_hdr_tbl *mod_hdr,
2051                 enum mlx5_flow_namespace_type ns_type)
2052 {
2053         struct mlx5_tc_ct_priv *ct_priv;
2054         struct mlx5_core_dev *dev;
2055         const char *msg;
2056         int err;
2057
2058         dev = priv->mdev;
2059         err = mlx5_tc_ct_init_check_support(priv, ns_type, &msg);
2060         if (err) {
2061                 mlx5_core_warn(dev,
2062                                "tc ct offload not supported, %s\n",
2063                                msg);
2064                 goto err_support;
2065         }
2066
2067         ct_priv = kzalloc(sizeof(*ct_priv), GFP_KERNEL);
2068         if (!ct_priv)
2069                 goto err_alloc;
2070
2071         ct_priv->zone_mapping = mapping_create(sizeof(u16), 0, true);
2072         if (IS_ERR(ct_priv->zone_mapping)) {
2073                 err = PTR_ERR(ct_priv->zone_mapping);
2074                 goto err_mapping_zone;
2075         }
2076
2077         ct_priv->labels_mapping = mapping_create(sizeof(u32) * 4, 0, true);
2078         if (IS_ERR(ct_priv->labels_mapping)) {
2079                 err = PTR_ERR(ct_priv->labels_mapping);
2080                 goto err_mapping_labels;
2081         }
2082
2083         spin_lock_init(&ct_priv->ht_lock);
2084         ct_priv->ns_type = ns_type;
2085         ct_priv->chains = chains;
2086         ct_priv->netdev = priv->netdev;
2087         ct_priv->dev = priv->mdev;
2088         ct_priv->mod_hdr_tbl = mod_hdr;
2089         ct_priv->ct = mlx5_chains_create_global_table(chains);
2090         if (IS_ERR(ct_priv->ct)) {
2091                 err = PTR_ERR(ct_priv->ct);
2092                 mlx5_core_warn(dev,
2093                                "%s, failed to create ct table err: %d\n",
2094                                INIT_ERR_PREFIX, err);
2095                 goto err_ct_tbl;
2096         }
2097
2098         ct_priv->ct_nat = mlx5_chains_create_global_table(chains);
2099         if (IS_ERR(ct_priv->ct_nat)) {
2100                 err = PTR_ERR(ct_priv->ct_nat);
2101                 mlx5_core_warn(dev,
2102                                "%s, failed to create ct nat table err: %d\n",
2103                                INIT_ERR_PREFIX, err);
2104                 goto err_ct_nat_tbl;
2105         }
2106
2107         ct_priv->post_ct = mlx5_chains_create_global_table(chains);
2108         if (IS_ERR(ct_priv->post_ct)) {
2109                 err = PTR_ERR(ct_priv->post_ct);
2110                 mlx5_core_warn(dev,
2111                                "%s, failed to create post ct table err: %d\n",
2112                                INIT_ERR_PREFIX, err);
2113                 goto err_post_ct_tbl;
2114         }
2115
2116         idr_init(&ct_priv->fte_ids);
2117         mutex_init(&ct_priv->control_lock);
2118         rhashtable_init(&ct_priv->zone_ht, &zone_params);
2119         rhashtable_init(&ct_priv->ct_tuples_ht, &tuples_ht_params);
2120         rhashtable_init(&ct_priv->ct_tuples_nat_ht, &tuples_nat_ht_params);
2121
2122         return ct_priv;
2123
2124 err_post_ct_tbl:
2125         mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2126 err_ct_nat_tbl:
2127         mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2128 err_ct_tbl:
2129         mapping_destroy(ct_priv->labels_mapping);
2130 err_mapping_labels:
2131         mapping_destroy(ct_priv->zone_mapping);
2132 err_mapping_zone:
2133         kfree(ct_priv);
2134 err_alloc:
2135 err_support:
2136
2137         return NULL;
2138 }
2139
2140 void
2141 mlx5_tc_ct_clean(struct mlx5_tc_ct_priv *ct_priv)
2142 {
2143         struct mlx5_fs_chains *chains;
2144
2145         if (!ct_priv)
2146                 return;
2147
2148         chains = ct_priv->chains;
2149
2150         mlx5_chains_destroy_global_table(chains, ct_priv->post_ct);
2151         mlx5_chains_destroy_global_table(chains, ct_priv->ct_nat);
2152         mlx5_chains_destroy_global_table(chains, ct_priv->ct);
2153         mapping_destroy(ct_priv->zone_mapping);
2154         mapping_destroy(ct_priv->labels_mapping);
2155
2156         rhashtable_destroy(&ct_priv->ct_tuples_ht);
2157         rhashtable_destroy(&ct_priv->ct_tuples_nat_ht);
2158         rhashtable_destroy(&ct_priv->zone_ht);
2159         mutex_destroy(&ct_priv->control_lock);
2160         idr_destroy(&ct_priv->fte_ids);
2161         kfree(ct_priv);
2162 }
2163
2164 bool
2165 mlx5e_tc_ct_restore_flow(struct mlx5_tc_ct_priv *ct_priv,
2166                          struct sk_buff *skb, u8 zone_restore_id)
2167 {
2168         struct mlx5_ct_tuple tuple = {};
2169         struct mlx5_ct_entry *entry;
2170         u16 zone;
2171
2172         if (!ct_priv || !zone_restore_id)
2173                 return true;
2174
2175         if (mapping_find(ct_priv->zone_mapping, zone_restore_id, &zone))
2176                 return false;
2177
2178         if (!mlx5_tc_ct_skb_to_tuple(skb, &tuple, zone))
2179                 return false;
2180
2181         spin_lock(&ct_priv->ht_lock);
2182
2183         entry = mlx5_tc_ct_entry_get(ct_priv, &tuple);
2184         if (!entry) {
2185                 spin_unlock(&ct_priv->ht_lock);
2186                 return false;
2187         }
2188
2189         if (IS_ERR(entry)) {
2190                 spin_unlock(&ct_priv->ht_lock);
2191                 return false;
2192         }
2193         spin_unlock(&ct_priv->ht_lock);
2194
2195         tcf_ct_flow_table_restore_skb(skb, entry->restore_cookie);
2196         __mlx5_tc_ct_entry_put(entry);
2197
2198         return true;
2199 }